1//===---------- ARM.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGBuiltin.h"
15#include "CGDebugInfo.h"
16#include "TargetInfo.h"
17#include "clang/Basic/TargetBuiltins.h"
18#include "llvm/IR/InlineAsm.h"
19#include "llvm/IR/IntrinsicsAArch64.h"
20#include "llvm/IR/IntrinsicsARM.h"
21#include "llvm/IR/IntrinsicsBPF.h"
22#include "llvm/TargetParser/AArch64TargetParser.h"
23
24#include <numeric>
25
26using namespace clang;
27using namespace CodeGen;
28using namespace llvm;
29
30static std::optional<CodeGenFunction::MSVCIntrin>
31translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
32 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
33 switch (BuiltinID) {
34 default:
35 return std::nullopt;
36 case clang::AArch64::BI_BitScanForward:
37 case clang::AArch64::BI_BitScanForward64:
38 return MSVCIntrin::_BitScanForward;
39 case clang::AArch64::BI_BitScanReverse:
40 case clang::AArch64::BI_BitScanReverse64:
41 return MSVCIntrin::_BitScanReverse;
42 case clang::AArch64::BI_InterlockedAnd64:
43 return MSVCIntrin::_InterlockedAnd;
44 case clang::AArch64::BI_InterlockedExchange64:
45 return MSVCIntrin::_InterlockedExchange;
46 case clang::AArch64::BI_InterlockedExchangeAdd64:
47 return MSVCIntrin::_InterlockedExchangeAdd;
48 case clang::AArch64::BI_InterlockedExchangeSub64:
49 return MSVCIntrin::_InterlockedExchangeSub;
50 case clang::AArch64::BI_InterlockedOr64:
51 return MSVCIntrin::_InterlockedOr;
52 case clang::AArch64::BI_InterlockedXor64:
53 return MSVCIntrin::_InterlockedXor;
54 case clang::AArch64::BI_InterlockedDecrement64:
55 return MSVCIntrin::_InterlockedDecrement;
56 case clang::AArch64::BI_InterlockedIncrement64:
57 return MSVCIntrin::_InterlockedIncrement;
58 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
59 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
60 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
61 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
62 return MSVCIntrin::_InterlockedExchangeAdd_acq;
63 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
64 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
65 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
66 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
67 return MSVCIntrin::_InterlockedExchangeAdd_rel;
68 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
69 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
70 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
71 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
72 return MSVCIntrin::_InterlockedExchangeAdd_nf;
73 case clang::AArch64::BI_InterlockedExchange8_acq:
74 case clang::AArch64::BI_InterlockedExchange16_acq:
75 case clang::AArch64::BI_InterlockedExchange_acq:
76 case clang::AArch64::BI_InterlockedExchange64_acq:
77 case clang::AArch64::BI_InterlockedExchangePointer_acq:
78 return MSVCIntrin::_InterlockedExchange_acq;
79 case clang::AArch64::BI_InterlockedExchange8_rel:
80 case clang::AArch64::BI_InterlockedExchange16_rel:
81 case clang::AArch64::BI_InterlockedExchange_rel:
82 case clang::AArch64::BI_InterlockedExchange64_rel:
83 case clang::AArch64::BI_InterlockedExchangePointer_rel:
84 return MSVCIntrin::_InterlockedExchange_rel;
85 case clang::AArch64::BI_InterlockedExchange8_nf:
86 case clang::AArch64::BI_InterlockedExchange16_nf:
87 case clang::AArch64::BI_InterlockedExchange_nf:
88 case clang::AArch64::BI_InterlockedExchange64_nf:
89 case clang::AArch64::BI_InterlockedExchangePointer_nf:
90 return MSVCIntrin::_InterlockedExchange_nf;
91 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
92 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
93 case clang::AArch64::BI_InterlockedCompareExchange_acq:
94 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
95 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
96 return MSVCIntrin::_InterlockedCompareExchange_acq;
97 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
98 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
99 case clang::AArch64::BI_InterlockedCompareExchange_rel:
100 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
101 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
102 return MSVCIntrin::_InterlockedCompareExchange_rel;
103 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
104 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
105 case clang::AArch64::BI_InterlockedCompareExchange_nf:
106 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
107 return MSVCIntrin::_InterlockedCompareExchange_nf;
108 case clang::AArch64::BI_InterlockedCompareExchange128:
109 return MSVCIntrin::_InterlockedCompareExchange128;
110 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
111 return MSVCIntrin::_InterlockedCompareExchange128_acq;
112 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
113 return MSVCIntrin::_InterlockedCompareExchange128_nf;
114 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
115 return MSVCIntrin::_InterlockedCompareExchange128_rel;
116 case clang::AArch64::BI_InterlockedOr8_acq:
117 case clang::AArch64::BI_InterlockedOr16_acq:
118 case clang::AArch64::BI_InterlockedOr_acq:
119 case clang::AArch64::BI_InterlockedOr64_acq:
120 return MSVCIntrin::_InterlockedOr_acq;
121 case clang::AArch64::BI_InterlockedOr8_rel:
122 case clang::AArch64::BI_InterlockedOr16_rel:
123 case clang::AArch64::BI_InterlockedOr_rel:
124 case clang::AArch64::BI_InterlockedOr64_rel:
125 return MSVCIntrin::_InterlockedOr_rel;
126 case clang::AArch64::BI_InterlockedOr8_nf:
127 case clang::AArch64::BI_InterlockedOr16_nf:
128 case clang::AArch64::BI_InterlockedOr_nf:
129 case clang::AArch64::BI_InterlockedOr64_nf:
130 return MSVCIntrin::_InterlockedOr_nf;
131 case clang::AArch64::BI_InterlockedXor8_acq:
132 case clang::AArch64::BI_InterlockedXor16_acq:
133 case clang::AArch64::BI_InterlockedXor_acq:
134 case clang::AArch64::BI_InterlockedXor64_acq:
135 return MSVCIntrin::_InterlockedXor_acq;
136 case clang::AArch64::BI_InterlockedXor8_rel:
137 case clang::AArch64::BI_InterlockedXor16_rel:
138 case clang::AArch64::BI_InterlockedXor_rel:
139 case clang::AArch64::BI_InterlockedXor64_rel:
140 return MSVCIntrin::_InterlockedXor_rel;
141 case clang::AArch64::BI_InterlockedXor8_nf:
142 case clang::AArch64::BI_InterlockedXor16_nf:
143 case clang::AArch64::BI_InterlockedXor_nf:
144 case clang::AArch64::BI_InterlockedXor64_nf:
145 return MSVCIntrin::_InterlockedXor_nf;
146 case clang::AArch64::BI_InterlockedAnd8_acq:
147 case clang::AArch64::BI_InterlockedAnd16_acq:
148 case clang::AArch64::BI_InterlockedAnd_acq:
149 case clang::AArch64::BI_InterlockedAnd64_acq:
150 return MSVCIntrin::_InterlockedAnd_acq;
151 case clang::AArch64::BI_InterlockedAnd8_rel:
152 case clang::AArch64::BI_InterlockedAnd16_rel:
153 case clang::AArch64::BI_InterlockedAnd_rel:
154 case clang::AArch64::BI_InterlockedAnd64_rel:
155 return MSVCIntrin::_InterlockedAnd_rel;
156 case clang::AArch64::BI_InterlockedAnd8_nf:
157 case clang::AArch64::BI_InterlockedAnd16_nf:
158 case clang::AArch64::BI_InterlockedAnd_nf:
159 case clang::AArch64::BI_InterlockedAnd64_nf:
160 return MSVCIntrin::_InterlockedAnd_nf;
161 case clang::AArch64::BI_InterlockedIncrement16_acq:
162 case clang::AArch64::BI_InterlockedIncrement_acq:
163 case clang::AArch64::BI_InterlockedIncrement64_acq:
164 return MSVCIntrin::_InterlockedIncrement_acq;
165 case clang::AArch64::BI_InterlockedIncrement16_rel:
166 case clang::AArch64::BI_InterlockedIncrement_rel:
167 case clang::AArch64::BI_InterlockedIncrement64_rel:
168 return MSVCIntrin::_InterlockedIncrement_rel;
169 case clang::AArch64::BI_InterlockedIncrement16_nf:
170 case clang::AArch64::BI_InterlockedIncrement_nf:
171 case clang::AArch64::BI_InterlockedIncrement64_nf:
172 return MSVCIntrin::_InterlockedIncrement_nf;
173 case clang::AArch64::BI_InterlockedDecrement16_acq:
174 case clang::AArch64::BI_InterlockedDecrement_acq:
175 case clang::AArch64::BI_InterlockedDecrement64_acq:
176 return MSVCIntrin::_InterlockedDecrement_acq;
177 case clang::AArch64::BI_InterlockedDecrement16_rel:
178 case clang::AArch64::BI_InterlockedDecrement_rel:
179 case clang::AArch64::BI_InterlockedDecrement64_rel:
180 return MSVCIntrin::_InterlockedDecrement_rel;
181 case clang::AArch64::BI_InterlockedDecrement16_nf:
182 case clang::AArch64::BI_InterlockedDecrement_nf:
183 case clang::AArch64::BI_InterlockedDecrement64_nf:
184 return MSVCIntrin::_InterlockedDecrement_nf;
185 }
186 llvm_unreachable("must return from switch");
187}
188
189static std::optional<CodeGenFunction::MSVCIntrin>
190translateArmToMsvcIntrin(unsigned BuiltinID) {
191 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
192 switch (BuiltinID) {
193 default:
194 return std::nullopt;
195 case clang::ARM::BI_BitScanForward:
196 case clang::ARM::BI_BitScanForward64:
197 return MSVCIntrin::_BitScanForward;
198 case clang::ARM::BI_BitScanReverse:
199 case clang::ARM::BI_BitScanReverse64:
200 return MSVCIntrin::_BitScanReverse;
201 case clang::ARM::BI_InterlockedAnd64:
202 return MSVCIntrin::_InterlockedAnd;
203 case clang::ARM::BI_InterlockedExchange64:
204 return MSVCIntrin::_InterlockedExchange;
205 case clang::ARM::BI_InterlockedExchangeAdd64:
206 return MSVCIntrin::_InterlockedExchangeAdd;
207 case clang::ARM::BI_InterlockedExchangeSub64:
208 return MSVCIntrin::_InterlockedExchangeSub;
209 case clang::ARM::BI_InterlockedOr64:
210 return MSVCIntrin::_InterlockedOr;
211 case clang::ARM::BI_InterlockedXor64:
212 return MSVCIntrin::_InterlockedXor;
213 case clang::ARM::BI_InterlockedDecrement64:
214 return MSVCIntrin::_InterlockedDecrement;
215 case clang::ARM::BI_InterlockedIncrement64:
216 return MSVCIntrin::_InterlockedIncrement;
217 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
218 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
219 case clang::ARM::BI_InterlockedExchangeAdd_acq:
220 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
221 return MSVCIntrin::_InterlockedExchangeAdd_acq;
222 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
223 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
224 case clang::ARM::BI_InterlockedExchangeAdd_rel:
225 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
226 return MSVCIntrin::_InterlockedExchangeAdd_rel;
227 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
228 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
229 case clang::ARM::BI_InterlockedExchangeAdd_nf:
230 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
231 return MSVCIntrin::_InterlockedExchangeAdd_nf;
232 case clang::ARM::BI_InterlockedExchange8_acq:
233 case clang::ARM::BI_InterlockedExchange16_acq:
234 case clang::ARM::BI_InterlockedExchange_acq:
235 case clang::ARM::BI_InterlockedExchange64_acq:
236 case clang::ARM::BI_InterlockedExchangePointer_acq:
237 return MSVCIntrin::_InterlockedExchange_acq;
238 case clang::ARM::BI_InterlockedExchange8_rel:
239 case clang::ARM::BI_InterlockedExchange16_rel:
240 case clang::ARM::BI_InterlockedExchange_rel:
241 case clang::ARM::BI_InterlockedExchange64_rel:
242 case clang::ARM::BI_InterlockedExchangePointer_rel:
243 return MSVCIntrin::_InterlockedExchange_rel;
244 case clang::ARM::BI_InterlockedExchange8_nf:
245 case clang::ARM::BI_InterlockedExchange16_nf:
246 case clang::ARM::BI_InterlockedExchange_nf:
247 case clang::ARM::BI_InterlockedExchange64_nf:
248 case clang::ARM::BI_InterlockedExchangePointer_nf:
249 return MSVCIntrin::_InterlockedExchange_nf;
250 case clang::ARM::BI_InterlockedCompareExchange8_acq:
251 case clang::ARM::BI_InterlockedCompareExchange16_acq:
252 case clang::ARM::BI_InterlockedCompareExchange_acq:
253 case clang::ARM::BI_InterlockedCompareExchange64_acq:
254 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
255 return MSVCIntrin::_InterlockedCompareExchange_acq;
256 case clang::ARM::BI_InterlockedCompareExchange8_rel:
257 case clang::ARM::BI_InterlockedCompareExchange16_rel:
258 case clang::ARM::BI_InterlockedCompareExchange_rel:
259 case clang::ARM::BI_InterlockedCompareExchange64_rel:
260 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
261 return MSVCIntrin::_InterlockedCompareExchange_rel;
262 case clang::ARM::BI_InterlockedCompareExchange8_nf:
263 case clang::ARM::BI_InterlockedCompareExchange16_nf:
264 case clang::ARM::BI_InterlockedCompareExchange_nf:
265 case clang::ARM::BI_InterlockedCompareExchange64_nf:
266 return MSVCIntrin::_InterlockedCompareExchange_nf;
267 case clang::ARM::BI_InterlockedOr8_acq:
268 case clang::ARM::BI_InterlockedOr16_acq:
269 case clang::ARM::BI_InterlockedOr_acq:
270 case clang::ARM::BI_InterlockedOr64_acq:
271 return MSVCIntrin::_InterlockedOr_acq;
272 case clang::ARM::BI_InterlockedOr8_rel:
273 case clang::ARM::BI_InterlockedOr16_rel:
274 case clang::ARM::BI_InterlockedOr_rel:
275 case clang::ARM::BI_InterlockedOr64_rel:
276 return MSVCIntrin::_InterlockedOr_rel;
277 case clang::ARM::BI_InterlockedOr8_nf:
278 case clang::ARM::BI_InterlockedOr16_nf:
279 case clang::ARM::BI_InterlockedOr_nf:
280 case clang::ARM::BI_InterlockedOr64_nf:
281 return MSVCIntrin::_InterlockedOr_nf;
282 case clang::ARM::BI_InterlockedXor8_acq:
283 case clang::ARM::BI_InterlockedXor16_acq:
284 case clang::ARM::BI_InterlockedXor_acq:
285 case clang::ARM::BI_InterlockedXor64_acq:
286 return MSVCIntrin::_InterlockedXor_acq;
287 case clang::ARM::BI_InterlockedXor8_rel:
288 case clang::ARM::BI_InterlockedXor16_rel:
289 case clang::ARM::BI_InterlockedXor_rel:
290 case clang::ARM::BI_InterlockedXor64_rel:
291 return MSVCIntrin::_InterlockedXor_rel;
292 case clang::ARM::BI_InterlockedXor8_nf:
293 case clang::ARM::BI_InterlockedXor16_nf:
294 case clang::ARM::BI_InterlockedXor_nf:
295 case clang::ARM::BI_InterlockedXor64_nf:
296 return MSVCIntrin::_InterlockedXor_nf;
297 case clang::ARM::BI_InterlockedAnd8_acq:
298 case clang::ARM::BI_InterlockedAnd16_acq:
299 case clang::ARM::BI_InterlockedAnd_acq:
300 case clang::ARM::BI_InterlockedAnd64_acq:
301 return MSVCIntrin::_InterlockedAnd_acq;
302 case clang::ARM::BI_InterlockedAnd8_rel:
303 case clang::ARM::BI_InterlockedAnd16_rel:
304 case clang::ARM::BI_InterlockedAnd_rel:
305 case clang::ARM::BI_InterlockedAnd64_rel:
306 return MSVCIntrin::_InterlockedAnd_rel;
307 case clang::ARM::BI_InterlockedAnd8_nf:
308 case clang::ARM::BI_InterlockedAnd16_nf:
309 case clang::ARM::BI_InterlockedAnd_nf:
310 case clang::ARM::BI_InterlockedAnd64_nf:
311 return MSVCIntrin::_InterlockedAnd_nf;
312 case clang::ARM::BI_InterlockedIncrement16_acq:
313 case clang::ARM::BI_InterlockedIncrement_acq:
314 case clang::ARM::BI_InterlockedIncrement64_acq:
315 return MSVCIntrin::_InterlockedIncrement_acq;
316 case clang::ARM::BI_InterlockedIncrement16_rel:
317 case clang::ARM::BI_InterlockedIncrement_rel:
318 case clang::ARM::BI_InterlockedIncrement64_rel:
319 return MSVCIntrin::_InterlockedIncrement_rel;
320 case clang::ARM::BI_InterlockedIncrement16_nf:
321 case clang::ARM::BI_InterlockedIncrement_nf:
322 case clang::ARM::BI_InterlockedIncrement64_nf:
323 return MSVCIntrin::_InterlockedIncrement_nf;
324 case clang::ARM::BI_InterlockedDecrement16_acq:
325 case clang::ARM::BI_InterlockedDecrement_acq:
326 case clang::ARM::BI_InterlockedDecrement64_acq:
327 return MSVCIntrin::_InterlockedDecrement_acq;
328 case clang::ARM::BI_InterlockedDecrement16_rel:
329 case clang::ARM::BI_InterlockedDecrement_rel:
330 case clang::ARM::BI_InterlockedDecrement64_rel:
331 return MSVCIntrin::_InterlockedDecrement_rel;
332 case clang::ARM::BI_InterlockedDecrement16_nf:
333 case clang::ARM::BI_InterlockedDecrement_nf:
334 case clang::ARM::BI_InterlockedDecrement64_nf:
335 return MSVCIntrin::_InterlockedDecrement_nf;
336 }
337 llvm_unreachable("must return from switch");
338}
339
340// Emit an intrinsic where all operands are of the same type as the result.
341// Depending on mode, this may be a constrained floating-point intrinsic.
342static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
343 unsigned IntrinsicID,
344 unsigned ConstrainedIntrinsicID,
345 llvm::Type *Ty,
346 ArrayRef<Value *> Args) {
347 Function *F;
348 if (CGF.Builder.getIsFPConstrained())
349 F = CGF.CGM.getIntrinsic(IID: ConstrainedIntrinsicID, Tys: Ty);
350 else
351 F = CGF.CGM.getIntrinsic(IID: IntrinsicID, Tys: Ty);
352
353 if (CGF.Builder.getIsFPConstrained())
354 return CGF.Builder.CreateConstrainedFPCall(Callee: F, Args);
355
356 return CGF.Builder.CreateCall(Callee: F, Args);
357}
358
359static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
360 NeonTypeFlags TypeFlags,
361 bool HasFastHalfType = true,
362 bool V1Ty = false,
363 bool AllowBFloatArgsAndRet = true) {
364 int IsQuad = TypeFlags.isQuad();
365 switch (TypeFlags.getEltType()) {
366 case NeonTypeFlags::Int8:
367 case NeonTypeFlags::Poly8:
368 case NeonTypeFlags::MFloat8:
369 return llvm::FixedVectorType::get(ElementType: CGF->Int8Ty, NumElts: V1Ty ? 1 : (8 << IsQuad));
370 case NeonTypeFlags::Int16:
371 case NeonTypeFlags::Poly16:
372 return llvm::FixedVectorType::get(ElementType: CGF->Int16Ty, NumElts: V1Ty ? 1 : (4 << IsQuad));
373 case NeonTypeFlags::BFloat16:
374 if (AllowBFloatArgsAndRet)
375 return llvm::FixedVectorType::get(ElementType: CGF->BFloatTy, NumElts: V1Ty ? 1 : (4 << IsQuad));
376 return llvm::FixedVectorType::get(ElementType: CGF->Int16Ty, NumElts: V1Ty ? 1 : (4 << IsQuad));
377 case NeonTypeFlags::Float16:
378 if (HasFastHalfType)
379 return llvm::FixedVectorType::get(ElementType: CGF->HalfTy, NumElts: V1Ty ? 1 : (4 << IsQuad));
380 return llvm::FixedVectorType::get(ElementType: CGF->Int16Ty, NumElts: V1Ty ? 1 : (4 << IsQuad));
381 case NeonTypeFlags::Int32:
382 return llvm::FixedVectorType::get(ElementType: CGF->Int32Ty, NumElts: V1Ty ? 1 : (2 << IsQuad));
383 case NeonTypeFlags::Int64:
384 case NeonTypeFlags::Poly64:
385 return llvm::FixedVectorType::get(ElementType: CGF->Int64Ty, NumElts: V1Ty ? 1 : (1 << IsQuad));
386 case NeonTypeFlags::Poly128:
387 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
388 // There is a lot of i128 and f128 API missing.
389 // so we use v16i8 to represent poly128 and get pattern matched.
390 return llvm::FixedVectorType::get(ElementType: CGF->Int8Ty, NumElts: 16);
391 case NeonTypeFlags::Float32:
392 return llvm::FixedVectorType::get(ElementType: CGF->FloatTy, NumElts: V1Ty ? 1 : (2 << IsQuad));
393 case NeonTypeFlags::Float64:
394 return llvm::FixedVectorType::get(ElementType: CGF->DoubleTy, NumElts: V1Ty ? 1 : (1 << IsQuad));
395 }
396 llvm_unreachable("Unknown vector element type!");
397}
398
399static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
400 NeonTypeFlags IntTypeFlags) {
401 int IsQuad = IntTypeFlags.isQuad();
402 switch (IntTypeFlags.getEltType()) {
403 case NeonTypeFlags::Int16:
404 return llvm::FixedVectorType::get(ElementType: CGF->HalfTy, NumElts: (4 << IsQuad));
405 case NeonTypeFlags::Int32:
406 return llvm::FixedVectorType::get(ElementType: CGF->FloatTy, NumElts: (2 << IsQuad));
407 case NeonTypeFlags::Int64:
408 return llvm::FixedVectorType::get(ElementType: CGF->DoubleTy, NumElts: (1 << IsQuad));
409 default:
410 llvm_unreachable("Type can't be converted to floating-point!");
411 }
412}
413
414Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C,
415 const ElementCount &Count) {
416 Value *SV = llvm::ConstantVector::getSplat(EC: Count, Elt: C);
417 return Builder.CreateShuffleVector(V1: V, V2: V, Mask: SV, Name: "lane");
418}
419
420Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
421 ElementCount EC = cast<llvm::VectorType>(Val: V->getType())->getElementCount();
422 return EmitNeonSplat(V, C, Count: EC);
423}
424
425Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
426 const char *name,
427 unsigned shift, bool rightshift) {
428 unsigned j = 0;
429 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
430 ai != ae; ++ai, ++j) {
431 if (F->isConstrainedFPIntrinsic())
432 if (ai->getType()->isMetadataTy())
433 continue;
434 if (shift > 0 && shift == j)
435 Ops[j] = EmitNeonShiftVector(V: Ops[j], Ty: ai->getType(), negateForRightShift: rightshift);
436 else
437 Ops[j] = Builder.CreateBitCast(V: Ops[j], DestTy: ai->getType(), Name: name);
438 }
439
440 if (F->isConstrainedFPIntrinsic())
441 return Builder.CreateConstrainedFPCall(Callee: F, Args: Ops, Name: name);
442 return Builder.CreateCall(Callee: F, Args: Ops, Name: name);
443}
444
445Value *CodeGenFunction::EmitFP8NeonCall(unsigned IID,
446 ArrayRef<llvm::Type *> Tys,
447 SmallVectorImpl<Value *> &Ops,
448 const CallExpr *E, const char *name) {
449 Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_set_fpmr),
450 Args: Ops.pop_back_val());
451 return EmitNeonCall(F: CGM.getIntrinsic(IID, Tys), Ops, name);
452}
453
454llvm::Value *CodeGenFunction::EmitFP8NeonFDOTCall(
455 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
456 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
457
458 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
459 RetTy->getPrimitiveSizeInBits();
460 llvm::Type *Tys[] = {llvm::FixedVectorType::get(ElementType: RetTy, NumElts: ElemCount),
461 Ops[1]->getType()};
462 if (ExtendLaneArg) {
463 auto *VT = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
464 Ops[2] = Builder.CreateInsertVector(DstType: VT, SrcVec: PoisonValue::get(T: VT), SubVec: Ops[2],
465 Idx: uint64_t(0));
466 }
467 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
468}
469
470llvm::Value *CodeGenFunction::EmitFP8NeonFMLACall(
471 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
472 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
473
474 if (ExtendLaneArg) {
475 auto *VT = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
476 Ops[2] = Builder.CreateInsertVector(DstType: VT, SrcVec: PoisonValue::get(T: VT), SubVec: Ops[2],
477 Idx: uint64_t(0));
478 }
479 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
480 RetTy->getPrimitiveSizeInBits();
481 return EmitFP8NeonCall(IID, Tys: {llvm::FixedVectorType::get(ElementType: RetTy, NumElts: ElemCount)},
482 Ops, E, name);
483}
484
485Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
486 bool neg) {
487 int SV = cast<ConstantInt>(Val: V)->getSExtValue();
488 return ConstantInt::getSigned(Ty, V: neg ? -SV : SV);
489}
490
491Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
492 llvm::Type *Ty1, bool Extract,
493 SmallVectorImpl<llvm::Value *> &Ops,
494 const CallExpr *E,
495 const char *name) {
496 llvm::Type *Tys[] = {Ty0, Ty1};
497 if (Extract) {
498 // Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
499 // the vector.
500 Tys[1] = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
501 Ops[0] = Builder.CreateExtractVector(DstType: Tys[1], SrcVec: Ops[0], Idx: uint64_t(0));
502 }
503 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
504}
505
506// Right-shift a vector by a constant.
507Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
508 llvm::Type *Ty, bool usgn,
509 const char *name) {
510 llvm::VectorType *VTy = cast<llvm::VectorType>(Val: Ty);
511
512 int ShiftAmt = cast<ConstantInt>(Val: Shift)->getSExtValue();
513 int EltSize = VTy->getScalarSizeInBits();
514
515 Vec = Builder.CreateBitCast(V: Vec, DestTy: Ty);
516
517 // lshr/ashr are undefined when the shift amount is equal to the vector
518 // element size.
519 if (ShiftAmt == EltSize) {
520 if (usgn) {
521 // Right-shifting an unsigned value by its size yields 0.
522 return llvm::ConstantAggregateZero::get(Ty: VTy);
523 } else {
524 // Right-shifting a signed value by its size is equivalent
525 // to a shift of size-1.
526 --ShiftAmt;
527 Shift = ConstantInt::get(Ty: VTy->getElementType(), V: ShiftAmt);
528 }
529 }
530
531 Shift = EmitNeonShiftVector(V: Shift, Ty, neg: false);
532 if (usgn)
533 return Builder.CreateLShr(LHS: Vec, RHS: Shift, Name: name);
534 return Builder.CreateAShr(LHS: Vec, RHS: Shift, Name: name);
535}
536
537enum {
538 AddRetType = (1 << 0),
539 Add1ArgType = (1 << 1),
540 Add2ArgTypes = (1 << 2),
541
542 VectorizeRetType = (1 << 3),
543 VectorizeArgTypes = (1 << 4),
544
545 InventFloatType = (1 << 5),
546 UnsignedAlts = (1 << 6),
547
548 Use64BitVectors = (1 << 7),
549 Use128BitVectors = (1 << 8),
550
551 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
552 VectorRet = AddRetType | VectorizeRetType,
553 VectorRetGetArgs01 =
554 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
555 FpCmpzModifiers =
556 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
557};
558
559namespace {
560struct ARMVectorIntrinsicInfo {
561 const char *NameHint;
562 unsigned BuiltinID;
563 unsigned LLVMIntrinsic;
564 unsigned AltLLVMIntrinsic;
565 uint64_t TypeModifier;
566
567 bool operator<(unsigned RHSBuiltinID) const {
568 return BuiltinID < RHSBuiltinID;
569 }
570 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
571 return BuiltinID < TE.BuiltinID;
572 }
573};
574} // end anonymous namespace
575
576#define NEONMAP0(NameBase) \
577 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
578
579#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
580 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
581 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
582
583#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
584 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
585 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
586 TypeModifier }
587
588// clang-format off
589static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
590 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
591 NEONMAP0(splat_lane_v),
592 NEONMAP0(splat_laneq_v),
593 NEONMAP0(splatq_lane_v),
594 NEONMAP0(splatq_laneq_v),
595 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
596 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
597 NEONMAP1(vabs_v, arm_neon_vabs, 0),
598 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
599 NEONMAP0(vadd_v),
600 NEONMAP0(vaddhn_v),
601 NEONMAP0(vaddq_v),
602 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
603 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
604 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
605 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
606 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
607 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
608 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
609 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
610 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
611 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
612 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
613 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
614 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
615 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
616 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
617 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
618 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
619 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
620 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
621 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
622 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
623 NEONMAP1(vcage_v, arm_neon_vacge, 0),
624 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
625 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
626 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
627 NEONMAP1(vcale_v, arm_neon_vacge, 0),
628 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
629 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
630 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
631 NEONMAP0(vceqz_v),
632 NEONMAP0(vceqzq_v),
633 NEONMAP0(vcgez_v),
634 NEONMAP0(vcgezq_v),
635 NEONMAP0(vcgtz_v),
636 NEONMAP0(vcgtzq_v),
637 NEONMAP0(vclez_v),
638 NEONMAP0(vclezq_v),
639 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
640 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
641 NEONMAP0(vcltz_v),
642 NEONMAP0(vcltzq_v),
643 NEONMAP1(vclz_v, ctlz, Add1ArgType),
644 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
645 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
646 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
647 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
648 NEONMAP0(vcvt_f16_s16),
649 NEONMAP0(vcvt_f16_u16),
650 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
651 NEONMAP0(vcvt_f32_v),
652 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
653 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
654 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
655 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
656 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
657 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
658 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
659 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
660 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
661 NEONMAP0(vcvt_s16_f16),
662 NEONMAP0(vcvt_s32_v),
663 NEONMAP0(vcvt_s64_v),
664 NEONMAP0(vcvt_u16_f16),
665 NEONMAP0(vcvt_u32_v),
666 NEONMAP0(vcvt_u64_v),
667 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
668 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
669 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
670 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
671 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
672 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
673 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
674 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
675 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
676 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
677 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
678 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
679 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
680 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
681 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
682 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
683 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
684 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
685 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
686 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
687 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
688 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
689 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
690 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
691 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
692 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
693 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
694 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
695 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
696 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
697 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
698 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
699 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
700 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
701 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
702 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
703 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
704 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
705 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
706 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
707 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
708 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
709 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
710 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
711 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
712 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
713 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
714 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
715 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
716 NEONMAP0(vcvtq_f16_s16),
717 NEONMAP0(vcvtq_f16_u16),
718 NEONMAP0(vcvtq_f32_v),
719 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
720 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
721 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
722 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
723 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
724 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
725 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
726 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
727 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
728 NEONMAP0(vcvtq_s16_f16),
729 NEONMAP0(vcvtq_s32_v),
730 NEONMAP0(vcvtq_s64_v),
731 NEONMAP0(vcvtq_u16_f16),
732 NEONMAP0(vcvtq_u32_v),
733 NEONMAP0(vcvtq_u64_v),
734 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
735 NEONMAP1(vdot_u32, arm_neon_udot, 0),
736 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
737 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
738 NEONMAP0(vext_v),
739 NEONMAP0(vextq_v),
740 NEONMAP0(vfma_v),
741 NEONMAP0(vfmaq_v),
742 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
743 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
744 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
745 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
746 NEONMAP0(vld1_dup_v),
747 NEONMAP1(vld1_v, arm_neon_vld1, 0),
748 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
749 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
750 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
751 NEONMAP0(vld1q_dup_v),
752 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
753 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
754 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
755 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
756 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
757 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
758 NEONMAP1(vld2_v, arm_neon_vld2, 0),
759 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
760 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
761 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
762 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
763 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
764 NEONMAP1(vld3_v, arm_neon_vld3, 0),
765 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
766 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
767 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
768 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
769 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
770 NEONMAP1(vld4_v, arm_neon_vld4, 0),
771 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
772 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
773 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
774 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
775 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
776 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
777 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
778 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
779 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
780 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
781 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
782 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
783 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
784 NEONMAP0(vmovl_v),
785 NEONMAP0(vmovn_v),
786 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
787 NEONMAP0(vmull_v),
788 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
789 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
790 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
791 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
792 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
793 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
794 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
795 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
796 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
797 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
798 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
799 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
800 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
801 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
802 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
803 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
804 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
805 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
806 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
807 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
808 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
809 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
810 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
811 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
812 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
813 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
814 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
815 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
816 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
817 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
818 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
819 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
820 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
821 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
822 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
823 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
824 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
825 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
826 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
827 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
828 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
829 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
830 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
831 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
832 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
833 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
834 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
835 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
836 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
837 NEONMAP1(vrnd_v, trunc, Add1ArgType),
838 NEONMAP1(vrnda_v, round, Add1ArgType),
839 NEONMAP1(vrndaq_v, round, Add1ArgType),
840 NEONMAP0(vrndi_v),
841 NEONMAP0(vrndiq_v),
842 NEONMAP1(vrndm_v, floor, Add1ArgType),
843 NEONMAP1(vrndmq_v, floor, Add1ArgType),
844 NEONMAP1(vrndn_v, roundeven, Add1ArgType),
845 NEONMAP1(vrndnq_v, roundeven, Add1ArgType),
846 NEONMAP1(vrndp_v, ceil, Add1ArgType),
847 NEONMAP1(vrndpq_v, ceil, Add1ArgType),
848 NEONMAP1(vrndq_v, trunc, Add1ArgType),
849 NEONMAP1(vrndx_v, rint, Add1ArgType),
850 NEONMAP1(vrndxq_v, rint, Add1ArgType),
851 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
852 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
853 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
854 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
855 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
856 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
857 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
858 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
859 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
860 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
861 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
862 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
863 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
864 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
865 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
866 NEONMAP0(vshl_n_v),
867 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
868 NEONMAP0(vshll_n_v),
869 NEONMAP0(vshlq_n_v),
870 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
871 NEONMAP0(vshr_n_v),
872 NEONMAP0(vshrn_n_v),
873 NEONMAP0(vshrq_n_v),
874 NEONMAP1(vst1_v, arm_neon_vst1, 0),
875 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
876 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
877 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
878 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
879 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
880 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
881 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
882 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
883 NEONMAP1(vst2_v, arm_neon_vst2, 0),
884 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
885 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
886 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
887 NEONMAP1(vst3_v, arm_neon_vst3, 0),
888 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
889 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
890 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
891 NEONMAP1(vst4_v, arm_neon_vst4, 0),
892 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
893 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
894 NEONMAP0(vsubhn_v),
895 NEONMAP0(vtrn_v),
896 NEONMAP0(vtrnq_v),
897 NEONMAP0(vtst_v),
898 NEONMAP0(vtstq_v),
899 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
900 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
901 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
902 NEONMAP0(vuzp_v),
903 NEONMAP0(vuzpq_v),
904 NEONMAP0(vzip_v),
905 NEONMAP0(vzipq_v)
906};
907
908static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
909 NEONMAP0(splat_lane_v),
910 NEONMAP0(splat_laneq_v),
911 NEONMAP0(splatq_lane_v),
912 NEONMAP0(splatq_laneq_v),
913 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
914 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
915 NEONMAP0(vadd_v),
916 NEONMAP0(vaddhn_v),
917 NEONMAP0(vaddq_v),
918 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
919 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
920 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
921 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
922 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
923 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
924 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
925 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
926 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
927 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
928 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
929 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
930 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
931 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
932 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
933 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
934 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
935 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
936 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
937 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
938 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
939 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
940 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
941 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
942 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
943 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
944 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
945 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
946 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
947 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
948 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
949 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
950 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
951 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
952 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
953 NEONMAP0(vceqz_v),
954 NEONMAP0(vceqzq_v),
955 NEONMAP0(vcgez_v),
956 NEONMAP0(vcgezq_v),
957 NEONMAP0(vcgtz_v),
958 NEONMAP0(vcgtzq_v),
959 NEONMAP0(vclez_v),
960 NEONMAP0(vclezq_v),
961 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
962 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
963 NEONMAP0(vcltz_v),
964 NEONMAP0(vcltzq_v),
965 NEONMAP1(vclz_v, ctlz, Add1ArgType),
966 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
967 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
968 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
969 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
970 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
971 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
972 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
973 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
974 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
975 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
976 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
977 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
978 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
979 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
980 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
981 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
982 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
983 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
984 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
985 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
986 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
987 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
988 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
989 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
990 NEONMAP0(vcvt_f16_s16),
991 NEONMAP0(vcvt_f16_u16),
992 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
993 NEONMAP0(vcvt_f32_v),
994 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
995 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
996 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
997 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
998 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
999 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1000 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1001 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1002 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1003 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1004 NEONMAP0(vcvtq_f16_s16),
1005 NEONMAP0(vcvtq_f16_u16),
1006 NEONMAP0(vcvtq_f32_v),
1007 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
1008 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
1009 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1010 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
1011 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
1012 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
1013 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
1014 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
1015 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
1016 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
1017 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
1018 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
1019 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
1020 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
1021 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
1022 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1023 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1024 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1025 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1026 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1027 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1028 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1029 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
1030 NEONMAP0(vext_v),
1031 NEONMAP0(vextq_v),
1032 NEONMAP0(vfma_v),
1033 NEONMAP0(vfmaq_v),
1034 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
1035 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
1036 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
1037 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
1038 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
1039 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
1040 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
1041 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
1042 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
1043 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
1044 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
1045 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
1046 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
1047 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
1048 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
1049 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
1050 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
1051 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
1052 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
1053 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
1054 NEONMAP0(vmovl_v),
1055 NEONMAP0(vmovn_v),
1056 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
1057 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
1058 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
1059 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
1060 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
1061 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
1062 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
1063 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
1064 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
1065 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
1066 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
1067 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
1068 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
1069 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1070 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
1071 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
1072 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
1073 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
1074 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
1075 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
1076 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
1077 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
1078 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
1079 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
1080 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1081 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
1082 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1083 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
1084 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1085 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
1086 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1087 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1088 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1089 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
1090 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
1091 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
1092 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
1093 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
1094 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
1095 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
1096 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
1097 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
1098 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
1099 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
1100 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
1101 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
1102 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
1103 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
1104 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
1105 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1106 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
1107 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
1108 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
1109 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
1110 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
1111 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
1112 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
1113 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
1114 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
1115 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
1116 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
1117 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
1118 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
1119 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
1120 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
1121 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
1122 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
1123 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
1124 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
1125 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
1126 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
1127 NEONMAP0(vrndi_v),
1128 NEONMAP0(vrndiq_v),
1129 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
1130 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
1131 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
1132 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
1133 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1134 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
1135 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
1136 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
1137 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
1138 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
1139 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
1140 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
1141 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
1142 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
1143 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
1144 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
1145 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
1146 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
1147 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
1148 NEONMAP0(vshl_n_v),
1149 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
1150 NEONMAP0(vshll_n_v),
1151 NEONMAP0(vshlq_n_v),
1152 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
1153 NEONMAP0(vshr_n_v),
1154 NEONMAP0(vshrn_n_v),
1155 NEONMAP0(vshrq_n_v),
1156 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
1157 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
1158 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
1159 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
1160 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
1161 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
1162 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
1163 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
1164 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
1165 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
1166 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
1167 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
1168 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
1169 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
1170 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
1171 NEONMAP0(vsubhn_v),
1172 NEONMAP0(vtst_v),
1173 NEONMAP0(vtstq_v),
1174 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
1175 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
1176 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
1177 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
1178};
1179
1180static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
1181 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
1182 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
1183 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
1184 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
1185 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
1186 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
1187 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
1188 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1189 NEONMAP1(vaddv_s16, vector_reduce_add, Add1ArgType),
1190 NEONMAP1(vaddv_s32, vector_reduce_add, Add1ArgType),
1191 NEONMAP1(vaddv_s8, vector_reduce_add, Add1ArgType),
1192 NEONMAP1(vaddv_u16, vector_reduce_add, Add1ArgType),
1193 NEONMAP1(vaddv_u32, vector_reduce_add, Add1ArgType),
1194 NEONMAP1(vaddv_u8, vector_reduce_add, Add1ArgType),
1195 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
1196 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
1197 NEONMAP1(vaddvq_s16, vector_reduce_add, Add1ArgType),
1198 NEONMAP1(vaddvq_s32, vector_reduce_add, Add1ArgType),
1199 NEONMAP1(vaddvq_s64, vector_reduce_add, Add1ArgType),
1200 NEONMAP1(vaddvq_s8, vector_reduce_add, Add1ArgType),
1201 NEONMAP1(vaddvq_u16, vector_reduce_add, Add1ArgType),
1202 NEONMAP1(vaddvq_u32, vector_reduce_add, Add1ArgType),
1203 NEONMAP1(vaddvq_u64, vector_reduce_add, Add1ArgType),
1204 NEONMAP1(vaddvq_u8, vector_reduce_add, Add1ArgType),
1205 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
1206 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
1207 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
1208 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1209 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
1210 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
1211 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
1212 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1213 NEONMAP1(vcvtad_s32_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1214 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1215 NEONMAP1(vcvtad_u32_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1216 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1217 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1218 NEONMAP1(vcvtas_s64_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1219 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1220 NEONMAP1(vcvtas_u64_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1221 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1222 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1223 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1224 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1225 NEONMAP1(vcvtd_s32_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1226 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1227 NEONMAP1(vcvtd_u32_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1228 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1229 NEONMAP0(vcvth_bf16_f32),
1230 NEONMAP1(vcvtmd_s32_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1231 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1232 NEONMAP1(vcvtmd_u32_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1233 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1234 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1235 NEONMAP1(vcvtms_s64_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1236 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1237 NEONMAP1(vcvtms_u64_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1238 NEONMAP1(vcvtnd_s32_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1239 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1240 NEONMAP1(vcvtnd_u32_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1241 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1242 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1243 NEONMAP1(vcvtns_s64_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1244 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1245 NEONMAP1(vcvtns_u64_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1246 NEONMAP1(vcvtpd_s32_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1247 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1248 NEONMAP1(vcvtpd_u32_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1249 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1250 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1251 NEONMAP1(vcvtps_s64_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1252 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1253 NEONMAP1(vcvtps_u64_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1254 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1255 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1256 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1257 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1258 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1259 NEONMAP1(vcvts_s64_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1260 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1261 NEONMAP1(vcvts_u64_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1262 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
1263 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1264 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1265 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1266 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1267 NEONMAP1(vmaxv_s16, vector_reduce_smax, Add1ArgType),
1268 NEONMAP1(vmaxv_s32, vector_reduce_smax, Add1ArgType),
1269 NEONMAP1(vmaxv_s8, vector_reduce_smax, Add1ArgType),
1270 NEONMAP1(vmaxv_u16, vector_reduce_umax, Add1ArgType),
1271 NEONMAP1(vmaxv_u32, vector_reduce_umax, Add1ArgType),
1272 NEONMAP1(vmaxv_u8, vector_reduce_umax, Add1ArgType),
1273 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1274 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1275 NEONMAP1(vmaxvq_s16, vector_reduce_smax, Add1ArgType),
1276 NEONMAP1(vmaxvq_s32, vector_reduce_smax, Add1ArgType),
1277 NEONMAP1(vmaxvq_s8, vector_reduce_smax, Add1ArgType),
1278 NEONMAP1(vmaxvq_u16, vector_reduce_umax, Add1ArgType),
1279 NEONMAP1(vmaxvq_u32, vector_reduce_umax, Add1ArgType),
1280 NEONMAP1(vmaxvq_u8, vector_reduce_umax, Add1ArgType),
1281 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1282 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1283 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1284 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1285 NEONMAP1(vminv_s16, vector_reduce_smin, Add1ArgType),
1286 NEONMAP1(vminv_s32, vector_reduce_smin, Add1ArgType),
1287 NEONMAP1(vminv_s8, vector_reduce_smin, Add1ArgType),
1288 NEONMAP1(vminv_u16, vector_reduce_umin, Add1ArgType),
1289 NEONMAP1(vminv_u32, vector_reduce_umin, Add1ArgType),
1290 NEONMAP1(vminv_u8, vector_reduce_umin, Add1ArgType),
1291 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1292 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1293 NEONMAP1(vminvq_s16, vector_reduce_smin, Add1ArgType),
1294 NEONMAP1(vminvq_s32, vector_reduce_smin, Add1ArgType),
1295 NEONMAP1(vminvq_s8, vector_reduce_smin, Add1ArgType),
1296 NEONMAP1(vminvq_u16, vector_reduce_umin, Add1ArgType),
1297 NEONMAP1(vminvq_u32, vector_reduce_umin, Add1ArgType),
1298 NEONMAP1(vminvq_u8, vector_reduce_umin, Add1ArgType),
1299 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
1300 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
1301 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
1302 NEONMAP1(vpaddd_s64, vector_reduce_add, Add1ArgType),
1303 NEONMAP1(vpaddd_u64, vector_reduce_add, Add1ArgType),
1304 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1305 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
1306 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1307 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
1308 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1309 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
1310 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
1311 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
1312 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
1313 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
1314 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
1315 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
1316 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
1317 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
1318 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
1319 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
1320 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
1321 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
1322 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
1323 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
1324 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
1325 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
1326 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
1327 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
1328 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
1329 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
1330 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
1331 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
1332 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
1333 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
1334 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
1335 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
1336 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
1337 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
1338 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
1339 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
1340 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
1341 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
1342 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
1343 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
1344 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
1345 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
1346 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
1347 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
1348 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
1349 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
1350 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
1351 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
1352 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
1353 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
1354 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
1355 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
1356 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
1357 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
1358 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
1359 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
1360 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
1361 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
1362 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
1363 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
1364 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1365 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1366 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1367 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1368 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
1369 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
1370 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1371 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1372 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
1373 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
1374 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
1375 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
1376 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
1377 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
1378 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
1379 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
1380 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
1381 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
1382 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
1383 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
1384 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
1385 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
1386 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
1387 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
1388 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
1389 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
1390 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
1391 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
1392 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
1393 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
1394 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
1395 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
1396 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
1397 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
1398 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
1399 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
1400 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
1401 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
1402 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
1403 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
1404 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
1405 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
1406 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
1407 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
1408 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
1409 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
1410 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
1411 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
1412 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
1413 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
1414 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
1415 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
1416 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
1417 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
1418 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
1419 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
1420 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
1421 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
1422 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
1423 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
1424 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
1425 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
1426 // FP16 scalar intrinisics go here.
1427 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
1428 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1429 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1430 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1431 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1432 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1433 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
1434 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1435 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
1436 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1437 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
1438 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1439 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1440 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1441 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1442 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1443 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1444 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1445 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1446 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1447 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1448 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1449 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1450 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1451 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1452 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1453 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1454 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1455 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1456 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
1457 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
1458 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
1459 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
1460 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
1461};
1462// clang-format on
1463
1464// Some intrinsics are equivalent for codegen.
1465static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
1466 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
1467 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
1468 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
1469 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
1470 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
1471 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
1472 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
1473 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
1474 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
1475 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
1476 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
1477 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
1478 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
1479 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
1480 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
1481 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
1482 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
1483 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
1484 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
1485 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
1486 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
1487 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
1488 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
1489 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
1490 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
1491 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
1492 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
1493 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
1494 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
1495 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
1496 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
1497 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
1498 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
1499 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
1500 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
1501 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
1502 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
1503 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
1504 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
1505 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
1506 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
1507 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
1508 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
1509 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
1510 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
1511 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
1512 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
1513 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
1514 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
1515 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
1516 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
1517 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
1518 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
1519 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
1520 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
1521 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
1522 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
1523 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
1524 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
1525 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
1526 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
1527 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
1528 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
1529 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
1530 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
1531 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
1532 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
1533 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
1534 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
1535 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
1536 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
1537 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
1538 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
1539 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
1540 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
1541 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
1542 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
1543 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
1544 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
1545 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
1546 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
1547 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
1548 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
1549 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
1550 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
1551 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
1552 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
1553 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
1554 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
1555 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
1556 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
1557 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
1558 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
1559 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
1560 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
1561 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
1562 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
1563 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
1564 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
1565 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
1566 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
1567 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
1568 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
1569 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
1570 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
1571 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
1572 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
1573 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
1574 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
1575 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
1576 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
1577 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
1578 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
1579 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
1580 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
1581 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
1582 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
1583 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
1584 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
1585 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
1586 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
1587 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
1588 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
1589 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
1590 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
1591 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
1592 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
1593 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
1594 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
1595 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
1596 // arbitrary one to be handled as tha canonical variation.
1597 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1598 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1599 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
1600 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1601 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1602 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
1603 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1604 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1605 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
1606 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1607 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1608 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
1609};
1610
1611#undef NEONMAP0
1612#undef NEONMAP1
1613#undef NEONMAP2
1614
1615#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1616 { \
1617 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1618 TypeModifier \
1619 }
1620
1621#define SVEMAP2(NameBase, TypeModifier) \
1622 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
1623static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
1624#define GET_SVE_LLVM_INTRINSIC_MAP
1625#include "clang/Basic/arm_sve_builtin_cg.inc"
1626#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
1627#undef GET_SVE_LLVM_INTRINSIC_MAP
1628};
1629
1630#undef SVEMAP1
1631#undef SVEMAP2
1632
1633#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
1634 { \
1635 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
1636 TypeModifier \
1637 }
1638
1639#define SMEMAP2(NameBase, TypeModifier) \
1640 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
1641static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
1642#define GET_SME_LLVM_INTRINSIC_MAP
1643#include "clang/Basic/arm_sme_builtin_cg.inc"
1644#undef GET_SME_LLVM_INTRINSIC_MAP
1645};
1646
1647#undef SMEMAP1
1648#undef SMEMAP2
1649
1650static bool NEONSIMDIntrinsicsProvenSorted = false;
1651
1652static bool AArch64SIMDIntrinsicsProvenSorted = false;
1653static bool AArch64SISDIntrinsicsProvenSorted = false;
1654static bool AArch64SVEIntrinsicsProvenSorted = false;
1655static bool AArch64SMEIntrinsicsProvenSorted = false;
1656
1657static const ARMVectorIntrinsicInfo *
1658findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,
1659 unsigned BuiltinID, bool &MapProvenSorted) {
1660
1661#ifndef NDEBUG
1662 if (!MapProvenSorted) {
1663 assert(llvm::is_sorted(IntrinsicMap));
1664 MapProvenSorted = true;
1665 }
1666#endif
1667
1668 const ARMVectorIntrinsicInfo *Builtin =
1669 llvm::lower_bound(Range&: IntrinsicMap, Value&: BuiltinID);
1670
1671 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
1672 return Builtin;
1673
1674 return nullptr;
1675}
1676
1677Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
1678 unsigned Modifier,
1679 llvm::Type *ArgType,
1680 const CallExpr *E) {
1681 int VectorSize = 0;
1682 if (Modifier & Use64BitVectors)
1683 VectorSize = 64;
1684 else if (Modifier & Use128BitVectors)
1685 VectorSize = 128;
1686
1687 // Return type.
1688 SmallVector<llvm::Type *, 3> Tys;
1689 if (Modifier & AddRetType) {
1690 llvm::Type *Ty = ConvertType(T: E->getCallReturnType(Ctx: getContext()));
1691 if (Modifier & VectorizeRetType)
1692 Ty = llvm::FixedVectorType::get(
1693 ElementType: Ty, NumElts: VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
1694
1695 Tys.push_back(Elt: Ty);
1696 }
1697
1698 // Arguments.
1699 if (Modifier & VectorizeArgTypes) {
1700 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
1701 ArgType = llvm::FixedVectorType::get(ElementType: ArgType, NumElts: Elts);
1702 }
1703
1704 if (Modifier & (Add1ArgType | Add2ArgTypes))
1705 Tys.push_back(Elt: ArgType);
1706
1707 if (Modifier & Add2ArgTypes)
1708 Tys.push_back(Elt: ArgType);
1709
1710 if (Modifier & InventFloatType)
1711 Tys.push_back(Elt: FloatTy);
1712
1713 return CGM.getIntrinsic(IID: IntrinsicID, Tys);
1714}
1715
1716//===----------------------------------------------------------------------===//
1717// Emit-helpers
1718//===----------------------------------------------------------------------===//
1719static Value *EmitCommonNeonSISDBuiltinExpr(
1720 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
1721 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
1722 unsigned BuiltinID = SISDInfo.BuiltinID;
1723 unsigned int Int = SISDInfo.LLVMIntrinsic;
1724 unsigned Modifier = SISDInfo.TypeModifier;
1725 const char *s = SISDInfo.NameHint;
1726
1727 switch (BuiltinID) {
1728 case NEON::BI__builtin_neon_vcled_s64:
1729 case NEON::BI__builtin_neon_vcled_u64:
1730 case NEON::BI__builtin_neon_vcles_f32:
1731 case NEON::BI__builtin_neon_vcled_f64:
1732 case NEON::BI__builtin_neon_vcltd_s64:
1733 case NEON::BI__builtin_neon_vcltd_u64:
1734 case NEON::BI__builtin_neon_vclts_f32:
1735 case NEON::BI__builtin_neon_vcltd_f64:
1736 case NEON::BI__builtin_neon_vcales_f32:
1737 case NEON::BI__builtin_neon_vcaled_f64:
1738 case NEON::BI__builtin_neon_vcalts_f32:
1739 case NEON::BI__builtin_neon_vcaltd_f64:
1740 // Only one direction of comparisons actually exist, cmle is actually a cmge
1741 // with swapped operands. The table gives us the right intrinsic but we
1742 // still need to do the swap.
1743 std::swap(a&: Ops[0], b&: Ops[1]);
1744 break;
1745 }
1746
1747 assert(Int && "Generic code assumes a valid intrinsic");
1748
1749 // Determine the type(s) of this overloaded AArch64 intrinsic.
1750 const Expr *Arg = E->getArg(Arg: 0);
1751 llvm::Type *ArgTy = CGF.ConvertType(T: Arg->getType());
1752 Function *F = CGF.LookupNeonLLVMIntrinsic(IntrinsicID: Int, Modifier, ArgType: ArgTy, E);
1753
1754 int j = 0;
1755 ConstantInt *C0 = ConstantInt::get(Ty: CGF.SizeTy, V: 0);
1756 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1757 ai != ae; ++ai, ++j) {
1758 llvm::Type *ArgTy = ai->getType();
1759 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
1760 ArgTy->getPrimitiveSizeInBits())
1761 continue;
1762
1763 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
1764 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
1765 // it before inserting.
1766 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
1767 V: Ops[j], DestTy: cast<llvm::VectorType>(Val: ArgTy)->getElementType());
1768 Ops[j] =
1769 CGF.Builder.CreateInsertElement(Vec: PoisonValue::get(T: ArgTy), NewElt: Ops[j], Idx: C0);
1770 }
1771
1772 Value *Result = CGF.EmitNeonCall(F, Ops, name: s);
1773 llvm::Type *ResultType = CGF.ConvertType(T: E->getType());
1774 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
1775 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
1776 return CGF.Builder.CreateExtractElement(Vec: Result, Idx: C0);
1777
1778 return CGF.Builder.CreateBitCast(V: Result, DestTy: ResultType, Name: s);
1779}
1780
1781Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
1782 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
1783 const char *NameHint, unsigned Modifier, const CallExpr *E,
1784 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
1785 llvm::Triple::ArchType Arch) {
1786 // Get the last argument, which specifies the vector type.
1787 const Expr *Arg = E->getArg(Arg: E->getNumArgs() - 1);
1788 std::optional<llvm::APSInt> NeonTypeConst =
1789 Arg->getIntegerConstantExpr(Ctx: getContext());
1790 if (!NeonTypeConst)
1791 return nullptr;
1792
1793 // Determine the type of this overloaded NEON intrinsic.
1794 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
1795 const bool Usgn = Type.isUnsigned();
1796 const bool Quad = Type.isQuad();
1797 const bool Floating = Type.isFloatingPoint();
1798 const bool HasFastHalfType = getTarget().hasFastHalfType();
1799 const bool AllowBFloatArgsAndRet =
1800 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
1801
1802 llvm::FixedVectorType *VTy =
1803 GetNeonType(CGF: this, TypeFlags: Type, HasFastHalfType, V1Ty: false, AllowBFloatArgsAndRet);
1804 llvm::Type *Ty = VTy;
1805 if (!Ty)
1806 return nullptr;
1807
1808 auto getAlignmentValue32 = [&](Address addr) -> Value* {
1809 return Builder.getInt32(C: addr.getAlignment().getQuantity());
1810 };
1811
1812 unsigned Int = LLVMIntrinsic;
1813 if ((Modifier & UnsignedAlts) && !Usgn)
1814 Int = AltLLVMIntrinsic;
1815
1816 switch (BuiltinID) {
1817 default: break;
1818 case NEON::BI__builtin_neon_splat_lane_v:
1819 case NEON::BI__builtin_neon_splat_laneq_v:
1820 case NEON::BI__builtin_neon_splatq_lane_v:
1821 case NEON::BI__builtin_neon_splatq_laneq_v: {
1822 auto NumElements = VTy->getElementCount();
1823 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
1824 NumElements = NumElements * 2;
1825 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
1826 NumElements = NumElements.divideCoefficientBy(RHS: 2);
1827
1828 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: VTy);
1829 return EmitNeonSplat(V: Ops[0], C: cast<ConstantInt>(Val: Ops[1]), Count: NumElements);
1830 }
1831 case NEON::BI__builtin_neon_vpadd_v:
1832 case NEON::BI__builtin_neon_vpaddq_v:
1833 // We don't allow fp/int overloading of intrinsics.
1834 if (VTy->getElementType()->isFloatingPointTy() &&
1835 Int == Intrinsic::aarch64_neon_addp)
1836 Int = Intrinsic::aarch64_neon_faddp;
1837 break;
1838 case NEON::BI__builtin_neon_vabs_v:
1839 case NEON::BI__builtin_neon_vabsq_v:
1840 if (VTy->getElementType()->isFloatingPointTy())
1841 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::fabs, Tys: Ty), Ops, name: "vabs");
1842 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys: Ty), Ops, name: "vabs");
1843 case NEON::BI__builtin_neon_vadd_v:
1844 case NEON::BI__builtin_neon_vaddq_v: {
1845 llvm::Type *VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Quad ? 16 : 8);
1846 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: VTy);
1847 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: VTy);
1848 Ops[0] = Builder.CreateXor(LHS: Ops[0], RHS: Ops[1]);
1849 return Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
1850 }
1851 case NEON::BI__builtin_neon_vaddhn_v: {
1852 llvm::FixedVectorType *SrcTy =
1853 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
1854
1855 // %sum = add <4 x i32> %lhs, %rhs
1856 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
1857 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: SrcTy);
1858 Ops[0] = Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1], Name: "vaddhn");
1859
1860 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
1861 Constant *ShiftAmt =
1862 ConstantInt::get(Ty: SrcTy, V: SrcTy->getScalarSizeInBits() / 2);
1863 Ops[0] = Builder.CreateLShr(LHS: Ops[0], RHS: ShiftAmt, Name: "vaddhn");
1864
1865 // %res = trunc <4 x i32> %high to <4 x i16>
1866 return Builder.CreateTrunc(V: Ops[0], DestTy: VTy, Name: "vaddhn");
1867 }
1868 case NEON::BI__builtin_neon_vcale_v:
1869 case NEON::BI__builtin_neon_vcaleq_v:
1870 case NEON::BI__builtin_neon_vcalt_v:
1871 case NEON::BI__builtin_neon_vcaltq_v:
1872 std::swap(a&: Ops[0], b&: Ops[1]);
1873 [[fallthrough]];
1874 case NEON::BI__builtin_neon_vcage_v:
1875 case NEON::BI__builtin_neon_vcageq_v:
1876 case NEON::BI__builtin_neon_vcagt_v:
1877 case NEON::BI__builtin_neon_vcagtq_v: {
1878 llvm::Type *Ty;
1879 switch (VTy->getScalarSizeInBits()) {
1880 default: llvm_unreachable("unexpected type");
1881 case 32:
1882 Ty = FloatTy;
1883 break;
1884 case 64:
1885 Ty = DoubleTy;
1886 break;
1887 case 16:
1888 Ty = HalfTy;
1889 break;
1890 }
1891 auto *VecFlt = llvm::FixedVectorType::get(ElementType: Ty, NumElts: VTy->getNumElements());
1892 llvm::Type *Tys[] = { VTy, VecFlt };
1893 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
1894 return EmitNeonCall(F, Ops, name: NameHint);
1895 }
1896 case NEON::BI__builtin_neon_vceqz_v:
1897 case NEON::BI__builtin_neon_vceqzq_v:
1898 return EmitAArch64CompareBuiltinExpr(
1899 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ, Name: "vceqz");
1900 case NEON::BI__builtin_neon_vcgez_v:
1901 case NEON::BI__builtin_neon_vcgezq_v:
1902 return EmitAArch64CompareBuiltinExpr(
1903 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1904 Name: "vcgez");
1905 case NEON::BI__builtin_neon_vclez_v:
1906 case NEON::BI__builtin_neon_vclezq_v:
1907 return EmitAArch64CompareBuiltinExpr(
1908 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1909 Name: "vclez");
1910 case NEON::BI__builtin_neon_vcgtz_v:
1911 case NEON::BI__builtin_neon_vcgtzq_v:
1912 return EmitAArch64CompareBuiltinExpr(
1913 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1914 Name: "vcgtz");
1915 case NEON::BI__builtin_neon_vcltz_v:
1916 case NEON::BI__builtin_neon_vcltzq_v:
1917 return EmitAArch64CompareBuiltinExpr(
1918 Op: Ops[0], Ty, Pred: Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1919 Name: "vcltz");
1920 case NEON::BI__builtin_neon_vclz_v:
1921 case NEON::BI__builtin_neon_vclzq_v:
1922 // We generate target-independent intrinsic, which needs a second argument
1923 // for whether or not clz of zero is undefined; on ARM it isn't.
1924 Ops.push_back(Elt: Builder.getInt1(V: getTarget().isCLZForZeroUndef()));
1925 break;
1926 case NEON::BI__builtin_neon_vcvt_f32_v:
1927 case NEON::BI__builtin_neon_vcvtq_f32_v:
1928 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
1929 Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
1930 HasFastHalfType);
1931 return Usgn ? Builder.CreateUIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt")
1932 : Builder.CreateSIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt");
1933 case NEON::BI__builtin_neon_vcvt_f16_s16:
1934 case NEON::BI__builtin_neon_vcvt_f16_u16:
1935 case NEON::BI__builtin_neon_vcvtq_f16_s16:
1936 case NEON::BI__builtin_neon_vcvtq_f16_u16:
1937 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
1938 Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
1939 HasFastHalfType);
1940 return Usgn ? Builder.CreateUIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt")
1941 : Builder.CreateSIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt");
1942 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
1943 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
1944 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
1945 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
1946 llvm::Type *Tys[2] = { GetFloatNeonType(CGF: this, IntTypeFlags: Type), Ty };
1947 Function *F = CGM.getIntrinsic(IID: Int, Tys);
1948 return EmitNeonCall(F, Ops, name: "vcvt_n");
1949 }
1950 case NEON::BI__builtin_neon_vcvt_n_f32_v:
1951 case NEON::BI__builtin_neon_vcvt_n_f64_v:
1952 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
1953 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
1954 llvm::Type *Tys[2] = { GetFloatNeonType(CGF: this, IntTypeFlags: Type), Ty };
1955 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
1956 Function *F = CGM.getIntrinsic(IID: Int, Tys);
1957 return EmitNeonCall(F, Ops, name: "vcvt_n");
1958 }
1959 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
1960 case NEON::BI__builtin_neon_vcvt_n_s32_v:
1961 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
1962 case NEON::BI__builtin_neon_vcvt_n_u32_v:
1963 case NEON::BI__builtin_neon_vcvt_n_s64_v:
1964 case NEON::BI__builtin_neon_vcvt_n_u64_v:
1965 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
1966 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
1967 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
1968 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
1969 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
1970 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
1971 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
1972 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
1973 return EmitNeonCall(F, Ops, name: "vcvt_n");
1974 }
1975 case NEON::BI__builtin_neon_vcvt_s32_v:
1976 case NEON::BI__builtin_neon_vcvt_u32_v:
1977 case NEON::BI__builtin_neon_vcvt_s64_v:
1978 case NEON::BI__builtin_neon_vcvt_u64_v:
1979 case NEON::BI__builtin_neon_vcvt_s16_f16:
1980 case NEON::BI__builtin_neon_vcvt_u16_f16:
1981 case NEON::BI__builtin_neon_vcvtq_s32_v:
1982 case NEON::BI__builtin_neon_vcvtq_u32_v:
1983 case NEON::BI__builtin_neon_vcvtq_s64_v:
1984 case NEON::BI__builtin_neon_vcvtq_u64_v:
1985 case NEON::BI__builtin_neon_vcvtq_s16_f16:
1986 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
1987 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: GetFloatNeonType(CGF: this, IntTypeFlags: Type));
1988 return Usgn ? Builder.CreateFPToUI(V: Ops[0], DestTy: Ty, Name: "vcvt")
1989 : Builder.CreateFPToSI(V: Ops[0], DestTy: Ty, Name: "vcvt");
1990 }
1991 case NEON::BI__builtin_neon_vcvta_s16_f16:
1992 case NEON::BI__builtin_neon_vcvta_s32_v:
1993 case NEON::BI__builtin_neon_vcvta_s64_v:
1994 case NEON::BI__builtin_neon_vcvta_u16_f16:
1995 case NEON::BI__builtin_neon_vcvta_u32_v:
1996 case NEON::BI__builtin_neon_vcvta_u64_v:
1997 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
1998 case NEON::BI__builtin_neon_vcvtaq_s32_v:
1999 case NEON::BI__builtin_neon_vcvtaq_s64_v:
2000 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
2001 case NEON::BI__builtin_neon_vcvtaq_u32_v:
2002 case NEON::BI__builtin_neon_vcvtaq_u64_v:
2003 case NEON::BI__builtin_neon_vcvtn_s16_f16:
2004 case NEON::BI__builtin_neon_vcvtn_s32_v:
2005 case NEON::BI__builtin_neon_vcvtn_s64_v:
2006 case NEON::BI__builtin_neon_vcvtn_u16_f16:
2007 case NEON::BI__builtin_neon_vcvtn_u32_v:
2008 case NEON::BI__builtin_neon_vcvtn_u64_v:
2009 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
2010 case NEON::BI__builtin_neon_vcvtnq_s32_v:
2011 case NEON::BI__builtin_neon_vcvtnq_s64_v:
2012 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
2013 case NEON::BI__builtin_neon_vcvtnq_u32_v:
2014 case NEON::BI__builtin_neon_vcvtnq_u64_v:
2015 case NEON::BI__builtin_neon_vcvtp_s16_f16:
2016 case NEON::BI__builtin_neon_vcvtp_s32_v:
2017 case NEON::BI__builtin_neon_vcvtp_s64_v:
2018 case NEON::BI__builtin_neon_vcvtp_u16_f16:
2019 case NEON::BI__builtin_neon_vcvtp_u32_v:
2020 case NEON::BI__builtin_neon_vcvtp_u64_v:
2021 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
2022 case NEON::BI__builtin_neon_vcvtpq_s32_v:
2023 case NEON::BI__builtin_neon_vcvtpq_s64_v:
2024 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
2025 case NEON::BI__builtin_neon_vcvtpq_u32_v:
2026 case NEON::BI__builtin_neon_vcvtpq_u64_v:
2027 case NEON::BI__builtin_neon_vcvtm_s16_f16:
2028 case NEON::BI__builtin_neon_vcvtm_s32_v:
2029 case NEON::BI__builtin_neon_vcvtm_s64_v:
2030 case NEON::BI__builtin_neon_vcvtm_u16_f16:
2031 case NEON::BI__builtin_neon_vcvtm_u32_v:
2032 case NEON::BI__builtin_neon_vcvtm_u64_v:
2033 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
2034 case NEON::BI__builtin_neon_vcvtmq_s32_v:
2035 case NEON::BI__builtin_neon_vcvtmq_s64_v:
2036 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
2037 case NEON::BI__builtin_neon_vcvtmq_u32_v:
2038 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
2039 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
2040 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: NameHint);
2041 }
2042 case NEON::BI__builtin_neon_vcvtx_f32_v: {
2043 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
2044 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: NameHint);
2045
2046 }
2047 case NEON::BI__builtin_neon_vext_v:
2048 case NEON::BI__builtin_neon_vextq_v: {
2049 int CV = cast<ConstantInt>(Val: Ops[2])->getSExtValue();
2050 SmallVector<int, 16> Indices;
2051 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2052 Indices.push_back(Elt: i+CV);
2053
2054 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
2055 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2056 return Builder.CreateShuffleVector(V1: Ops[0], V2: Ops[1], Mask: Indices, Name: "vext");
2057 }
2058 case NEON::BI__builtin_neon_vfma_v:
2059 case NEON::BI__builtin_neon_vfmaq_v: {
2060 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
2061 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2062 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
2063
2064 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
2065 return emitCallMaybeConstrainedFPBuiltin(
2066 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty,
2067 Args: {Ops[1], Ops[2], Ops[0]});
2068 }
2069 case NEON::BI__builtin_neon_vld1_v:
2070 case NEON::BI__builtin_neon_vld1q_v: {
2071 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2072 Ops.push_back(Elt: getAlignmentValue32(PtrOp0));
2073 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "vld1");
2074 }
2075 case NEON::BI__builtin_neon_vld1_x2_v:
2076 case NEON::BI__builtin_neon_vld1q_x2_v:
2077 case NEON::BI__builtin_neon_vld1_x3_v:
2078 case NEON::BI__builtin_neon_vld1q_x3_v:
2079 case NEON::BI__builtin_neon_vld1_x4_v:
2080 case NEON::BI__builtin_neon_vld1q_x4_v: {
2081 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
2082 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
2083 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld1xN");
2084 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
2085 }
2086 case NEON::BI__builtin_neon_vld2_v:
2087 case NEON::BI__builtin_neon_vld2q_v:
2088 case NEON::BI__builtin_neon_vld3_v:
2089 case NEON::BI__builtin_neon_vld3q_v:
2090 case NEON::BI__builtin_neon_vld4_v:
2091 case NEON::BI__builtin_neon_vld4q_v:
2092 case NEON::BI__builtin_neon_vld2_dup_v:
2093 case NEON::BI__builtin_neon_vld2q_dup_v:
2094 case NEON::BI__builtin_neon_vld3_dup_v:
2095 case NEON::BI__builtin_neon_vld3q_dup_v:
2096 case NEON::BI__builtin_neon_vld4_dup_v:
2097 case NEON::BI__builtin_neon_vld4q_dup_v: {
2098 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2099 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
2100 Value *Align = getAlignmentValue32(PtrOp1);
2101 Ops[1] = Builder.CreateCall(Callee: F, Args: {Ops[1], Align}, Name: NameHint);
2102 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
2103 }
2104 case NEON::BI__builtin_neon_vld1_dup_v:
2105 case NEON::BI__builtin_neon_vld1q_dup_v: {
2106 Value *V = PoisonValue::get(T: Ty);
2107 PtrOp0 = PtrOp0.withElementType(ElemTy: VTy->getElementType());
2108 LoadInst *Ld = Builder.CreateLoad(Addr: PtrOp0);
2109 llvm::Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
2110 Ops[0] = Builder.CreateInsertElement(Vec: V, NewElt: Ld, Idx: CI);
2111 return EmitNeonSplat(V: Ops[0], C: CI);
2112 }
2113 case NEON::BI__builtin_neon_vld2_lane_v:
2114 case NEON::BI__builtin_neon_vld2q_lane_v:
2115 case NEON::BI__builtin_neon_vld3_lane_v:
2116 case NEON::BI__builtin_neon_vld3q_lane_v:
2117 case NEON::BI__builtin_neon_vld4_lane_v:
2118 case NEON::BI__builtin_neon_vld4q_lane_v: {
2119 llvm::Type *Tys[] = {Ty, Int8PtrTy};
2120 Function *F = CGM.getIntrinsic(IID: LLVMIntrinsic, Tys);
2121 for (unsigned I = 2; I < Ops.size() - 1; ++I)
2122 Ops[I] = Builder.CreateBitCast(V: Ops[I], DestTy: Ty);
2123 Ops.push_back(Elt: getAlignmentValue32(PtrOp1));
2124 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: NameHint);
2125 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
2126 }
2127 case NEON::BI__builtin_neon_vmovl_v: {
2128 llvm::FixedVectorType *DTy =
2129 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2130 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DTy);
2131 if (Usgn)
2132 return Builder.CreateZExt(V: Ops[0], DestTy: Ty, Name: "vmovl");
2133 return Builder.CreateSExt(V: Ops[0], DestTy: Ty, Name: "vmovl");
2134 }
2135 case NEON::BI__builtin_neon_vmovn_v: {
2136 llvm::FixedVectorType *QTy =
2137 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2138 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: QTy);
2139 return Builder.CreateTrunc(V: Ops[0], DestTy: Ty, Name: "vmovn");
2140 }
2141 case NEON::BI__builtin_neon_vmull_v:
2142 // FIXME: the integer vmull operations could be emitted in terms of pure
2143 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
2144 // hoisting the exts outside loops. Until global ISel comes along that can
2145 // see through such movement this leads to bad CodeGen. So we need an
2146 // intrinsic for now.
2147 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
2148 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
2149 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmull");
2150 case NEON::BI__builtin_neon_vpadal_v:
2151 case NEON::BI__builtin_neon_vpadalq_v: {
2152 // The source operand type has twice as many elements of half the size.
2153 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2154 llvm::Type *EltTy =
2155 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: EltBits / 2);
2156 auto *NarrowTy =
2157 llvm::FixedVectorType::get(ElementType: EltTy, NumElts: VTy->getNumElements() * 2);
2158 llvm::Type *Tys[2] = { Ty, NarrowTy };
2159 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: NameHint);
2160 }
2161 case NEON::BI__builtin_neon_vpaddl_v:
2162 case NEON::BI__builtin_neon_vpaddlq_v: {
2163 // The source operand type has twice as many elements of half the size.
2164 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2165 llvm::Type *EltTy = llvm::IntegerType::get(C&: getLLVMContext(), NumBits: EltBits / 2);
2166 auto *NarrowTy =
2167 llvm::FixedVectorType::get(ElementType: EltTy, NumElts: VTy->getNumElements() * 2);
2168 llvm::Type *Tys[2] = { Ty, NarrowTy };
2169 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vpaddl");
2170 }
2171 case NEON::BI__builtin_neon_vqdmlal_v:
2172 case NEON::BI__builtin_neon_vqdmlsl_v: {
2173 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
2174 Ops[1] =
2175 EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys: Ty), Ops&: MulOps, name: "vqdmlal");
2176 Ops.resize(N: 2);
2177 return EmitNeonCall(F: CGM.getIntrinsic(IID: AltLLVMIntrinsic, Tys: Ty), Ops, name: NameHint);
2178 }
2179 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
2180 case NEON::BI__builtin_neon_vqdmulh_lane_v:
2181 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
2182 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
2183 auto *RTy = cast<llvm::FixedVectorType>(Val: Ty);
2184 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
2185 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
2186 RTy = llvm::FixedVectorType::get(ElementType: RTy->getElementType(),
2187 NumElts: RTy->getNumElements() * 2);
2188 llvm::Type *Tys[2] = {
2189 RTy, GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
2190 /*isQuad*/ false))};
2191 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: NameHint);
2192 }
2193 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
2194 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
2195 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
2196 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
2197 llvm::Type *Tys[2] = {
2198 Ty, GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
2199 /*isQuad*/ true))};
2200 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: NameHint);
2201 }
2202 case NEON::BI__builtin_neon_vqshl_n_v:
2203 case NEON::BI__builtin_neon_vqshlq_n_v:
2204 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshl_n",
2205 shift: 1, rightshift: false);
2206 case NEON::BI__builtin_neon_vqshlu_n_v:
2207 case NEON::BI__builtin_neon_vqshluq_n_v:
2208 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshlu_n",
2209 shift: 1, rightshift: false);
2210 case NEON::BI__builtin_neon_vrecpe_v:
2211 case NEON::BI__builtin_neon_vrecpeq_v:
2212 case NEON::BI__builtin_neon_vrsqrte_v:
2213 case NEON::BI__builtin_neon_vrsqrteq_v:
2214 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
2215 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: NameHint);
2216 case NEON::BI__builtin_neon_vrndi_v:
2217 case NEON::BI__builtin_neon_vrndiq_v:
2218 Int = Builder.getIsFPConstrained()
2219 ? Intrinsic::experimental_constrained_nearbyint
2220 : Intrinsic::nearbyint;
2221 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: NameHint);
2222 case NEON::BI__builtin_neon_vrshr_n_v:
2223 case NEON::BI__builtin_neon_vrshrq_n_v:
2224 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrshr_n",
2225 shift: 1, rightshift: true);
2226 case NEON::BI__builtin_neon_vsha512hq_u64:
2227 case NEON::BI__builtin_neon_vsha512h2q_u64:
2228 case NEON::BI__builtin_neon_vsha512su0q_u64:
2229 case NEON::BI__builtin_neon_vsha512su1q_u64: {
2230 Function *F = CGM.getIntrinsic(IID: Int);
2231 return EmitNeonCall(F, Ops, name: "");
2232 }
2233 case NEON::BI__builtin_neon_vshl_n_v:
2234 case NEON::BI__builtin_neon_vshlq_n_v:
2235 Ops[1] = EmitNeonShiftVector(V: Ops[1], Ty, neg: false);
2236 return Builder.CreateShl(LHS: Builder.CreateBitCast(V: Ops[0],DestTy: Ty), RHS: Ops[1],
2237 Name: "vshl_n");
2238 case NEON::BI__builtin_neon_vshll_n_v: {
2239 llvm::FixedVectorType *SrcTy =
2240 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
2241 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
2242 if (Usgn)
2243 Ops[0] = Builder.CreateZExt(V: Ops[0], DestTy: VTy);
2244 else
2245 Ops[0] = Builder.CreateSExt(V: Ops[0], DestTy: VTy);
2246 Ops[1] = EmitNeonShiftVector(V: Ops[1], Ty: VTy, neg: false);
2247 return Builder.CreateShl(LHS: Ops[0], RHS: Ops[1], Name: "vshll_n");
2248 }
2249 case NEON::BI__builtin_neon_vshrn_n_v: {
2250 llvm::FixedVectorType *SrcTy =
2251 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2252 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
2253 Ops[1] = EmitNeonShiftVector(V: Ops[1], Ty: SrcTy, neg: false);
2254 if (Usgn)
2255 Ops[0] = Builder.CreateLShr(LHS: Ops[0], RHS: Ops[1]);
2256 else
2257 Ops[0] = Builder.CreateAShr(LHS: Ops[0], RHS: Ops[1]);
2258 return Builder.CreateTrunc(V: Ops[0], DestTy: Ty, Name: "vshrn_n");
2259 }
2260 case NEON::BI__builtin_neon_vshr_n_v:
2261 case NEON::BI__builtin_neon_vshrq_n_v:
2262 return EmitNeonRShiftImm(Vec: Ops[0], Shift: Ops[1], Ty, usgn: Usgn, name: "vshr_n");
2263 case NEON::BI__builtin_neon_vst1_v:
2264 case NEON::BI__builtin_neon_vst1q_v:
2265 case NEON::BI__builtin_neon_vst2_v:
2266 case NEON::BI__builtin_neon_vst2q_v:
2267 case NEON::BI__builtin_neon_vst3_v:
2268 case NEON::BI__builtin_neon_vst3q_v:
2269 case NEON::BI__builtin_neon_vst4_v:
2270 case NEON::BI__builtin_neon_vst4q_v:
2271 case NEON::BI__builtin_neon_vst2_lane_v:
2272 case NEON::BI__builtin_neon_vst2q_lane_v:
2273 case NEON::BI__builtin_neon_vst3_lane_v:
2274 case NEON::BI__builtin_neon_vst3q_lane_v:
2275 case NEON::BI__builtin_neon_vst4_lane_v:
2276 case NEON::BI__builtin_neon_vst4q_lane_v: {
2277 llvm::Type *Tys[] = {Int8PtrTy, Ty};
2278 Ops.push_back(Elt: getAlignmentValue32(PtrOp0));
2279 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "");
2280 }
2281 case NEON::BI__builtin_neon_vsm3partw1q_u32:
2282 case NEON::BI__builtin_neon_vsm3partw2q_u32:
2283 case NEON::BI__builtin_neon_vsm3ss1q_u32:
2284 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
2285 case NEON::BI__builtin_neon_vsm4eq_u32: {
2286 Function *F = CGM.getIntrinsic(IID: Int);
2287 return EmitNeonCall(F, Ops, name: "");
2288 }
2289 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
2290 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
2291 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
2292 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
2293 Function *F = CGM.getIntrinsic(IID: Int);
2294 Ops[3] = Builder.CreateZExt(V: Ops[3], DestTy: Int64Ty);
2295 return EmitNeonCall(F, Ops, name: "");
2296 }
2297 case NEON::BI__builtin_neon_vst1_x2_v:
2298 case NEON::BI__builtin_neon_vst1q_x2_v:
2299 case NEON::BI__builtin_neon_vst1_x3_v:
2300 case NEON::BI__builtin_neon_vst1q_x3_v:
2301 case NEON::BI__builtin_neon_vst1_x4_v:
2302 case NEON::BI__builtin_neon_vst1q_x4_v: {
2303 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
2304 // in AArch64 it comes last. We may want to stick to one or another.
2305 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
2306 Arch == llvm::Triple::aarch64_32) {
2307 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
2308 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
2309 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "");
2310 }
2311 llvm::Type *Tys[2] = {DefaultPtrTy, VTy};
2312 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "");
2313 }
2314 case NEON::BI__builtin_neon_vsubhn_v: {
2315 llvm::FixedVectorType *SrcTy =
2316 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
2317
2318 // %sum = add <4 x i32> %lhs, %rhs
2319 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: SrcTy);
2320 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: SrcTy);
2321 Ops[0] = Builder.CreateSub(LHS: Ops[0], RHS: Ops[1], Name: "vsubhn");
2322
2323 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
2324 Constant *ShiftAmt =
2325 ConstantInt::get(Ty: SrcTy, V: SrcTy->getScalarSizeInBits() / 2);
2326 Ops[0] = Builder.CreateLShr(LHS: Ops[0], RHS: ShiftAmt, Name: "vsubhn");
2327
2328 // %res = trunc <4 x i32> %high to <4 x i16>
2329 return Builder.CreateTrunc(V: Ops[0], DestTy: VTy, Name: "vsubhn");
2330 }
2331 case NEON::BI__builtin_neon_vtrn_v:
2332 case NEON::BI__builtin_neon_vtrnq_v: {
2333 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2334 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
2335 Value *SV = nullptr;
2336
2337 for (unsigned vi = 0; vi != 2; ++vi) {
2338 SmallVector<int, 16> Indices;
2339 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2340 Indices.push_back(Elt: i+vi);
2341 Indices.push_back(Elt: i+e+vi);
2342 }
2343 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
2344 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vtrn");
2345 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
2346 }
2347 return SV;
2348 }
2349 case NEON::BI__builtin_neon_vtst_v:
2350 case NEON::BI__builtin_neon_vtstq_v: {
2351 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
2352 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2353 Ops[0] = Builder.CreateAnd(LHS: Ops[0], RHS: Ops[1]);
2354 Ops[0] = Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: Ops[0],
2355 RHS: ConstantAggregateZero::get(Ty));
2356 return Builder.CreateSExt(V: Ops[0], DestTy: Ty, Name: "vtst");
2357 }
2358 case NEON::BI__builtin_neon_vuzp_v:
2359 case NEON::BI__builtin_neon_vuzpq_v: {
2360 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2361 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
2362 Value *SV = nullptr;
2363
2364 for (unsigned vi = 0; vi != 2; ++vi) {
2365 SmallVector<int, 16> Indices;
2366 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2367 Indices.push_back(Elt: 2*i+vi);
2368
2369 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
2370 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vuzp");
2371 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
2372 }
2373 return SV;
2374 }
2375 case NEON::BI__builtin_neon_vxarq_u64: {
2376 Function *F = CGM.getIntrinsic(IID: Int);
2377 Ops[2] = Builder.CreateZExt(V: Ops[2], DestTy: Int64Ty);
2378 return EmitNeonCall(F, Ops, name: "");
2379 }
2380 case NEON::BI__builtin_neon_vzip_v:
2381 case NEON::BI__builtin_neon_vzipq_v: {
2382 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
2383 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
2384 Value *SV = nullptr;
2385
2386 for (unsigned vi = 0; vi != 2; ++vi) {
2387 SmallVector<int, 16> Indices;
2388 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2389 Indices.push_back(Elt: (i + vi*e) >> 1);
2390 Indices.push_back(Elt: ((i + vi*e) >> 1)+e);
2391 }
2392 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
2393 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vzip");
2394 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
2395 }
2396 return SV;
2397 }
2398 case NEON::BI__builtin_neon_vdot_s32:
2399 case NEON::BI__builtin_neon_vdot_u32:
2400 case NEON::BI__builtin_neon_vdotq_s32:
2401 case NEON::BI__builtin_neon_vdotq_u32: {
2402 auto *InputTy =
2403 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
2404 llvm::Type *Tys[2] = { Ty, InputTy };
2405 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vdot");
2406 }
2407 case NEON::BI__builtin_neon_vfmlal_low_f16:
2408 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
2409 auto *InputTy =
2410 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
2411 llvm::Type *Tys[2] = { Ty, InputTy };
2412 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlal_low");
2413 }
2414 case NEON::BI__builtin_neon_vfmlsl_low_f16:
2415 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
2416 auto *InputTy =
2417 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
2418 llvm::Type *Tys[2] = { Ty, InputTy };
2419 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlsl_low");
2420 }
2421 case NEON::BI__builtin_neon_vfmlal_high_f16:
2422 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
2423 auto *InputTy =
2424 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
2425 llvm::Type *Tys[2] = { Ty, InputTy };
2426 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlal_high");
2427 }
2428 case NEON::BI__builtin_neon_vfmlsl_high_f16:
2429 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
2430 auto *InputTy =
2431 llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
2432 llvm::Type *Tys[2] = { Ty, InputTy };
2433 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vfmlsl_high");
2434 }
2435 case NEON::BI__builtin_neon_vmmlaq_s32:
2436 case NEON::BI__builtin_neon_vmmlaq_u32: {
2437 auto *InputTy =
2438 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
2439 llvm::Type *Tys[2] = { Ty, InputTy };
2440 return EmitNeonCall(F: CGM.getIntrinsic(IID: LLVMIntrinsic, Tys), Ops, name: "vmmla");
2441 }
2442 case NEON::BI__builtin_neon_vusmmlaq_s32: {
2443 auto *InputTy =
2444 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
2445 llvm::Type *Tys[2] = { Ty, InputTy };
2446 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vusmmla");
2447 }
2448 case NEON::BI__builtin_neon_vusdot_s32:
2449 case NEON::BI__builtin_neon_vusdotq_s32: {
2450 auto *InputTy =
2451 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: Ty->getPrimitiveSizeInBits() / 8);
2452 llvm::Type *Tys[2] = { Ty, InputTy };
2453 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vusdot");
2454 }
2455 case NEON::BI__builtin_neon_vbfdot_f32:
2456 case NEON::BI__builtin_neon_vbfdotq_f32: {
2457 llvm::Type *InputTy =
2458 llvm::FixedVectorType::get(ElementType: BFloatTy, NumElts: Ty->getPrimitiveSizeInBits() / 16);
2459 llvm::Type *Tys[2] = { Ty, InputTy };
2460 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vbfdot");
2461 }
2462 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
2463 llvm::Type *Tys[1] = { Ty };
2464 Function *F = CGM.getIntrinsic(IID: Int, Tys);
2465 return EmitNeonCall(F, Ops, name: "vcvtfp2bf");
2466 }
2467
2468 }
2469
2470 assert(Int && "Expected valid intrinsic number");
2471
2472 // Determine the type(s) of this overloaded AArch64 intrinsic.
2473 Function *F = LookupNeonLLVMIntrinsic(IntrinsicID: Int, Modifier, ArgType: Ty, E);
2474
2475 Value *Result = EmitNeonCall(F, Ops, name: NameHint);
2476 llvm::Type *ResultType = ConvertType(T: E->getType());
2477 // AArch64 intrinsic one-element vector type cast to
2478 // scalar type expected by the builtin
2479 return Builder.CreateBitCast(V: Result, DestTy: ResultType, Name: NameHint);
2480}
2481
2482Value *
2483CodeGenFunction::EmitAArch64CompareBuiltinExpr(Value *Op, llvm::Type *Ty,
2484 const CmpInst::Predicate Pred,
2485 const Twine &Name) {
2486
2487 if (isa<FixedVectorType>(Val: Ty)) {
2488 // Vector types are cast to i8 vectors. Recover original type.
2489 Op = Builder.CreateBitCast(V: Op, DestTy: Ty);
2490 }
2491
2492 Constant *zero = Constant::getNullValue(Ty: Op->getType());
2493
2494 if (CmpInst::isFPPredicate(P: Pred)) {
2495 if (Pred == CmpInst::FCMP_OEQ)
2496 Op = Builder.CreateFCmp(P: Pred, LHS: Op, RHS: zero);
2497 else
2498 Op = Builder.CreateFCmpS(P: Pred, LHS: Op, RHS: zero);
2499 } else {
2500 Op = Builder.CreateICmp(P: Pred, LHS: Op, RHS: zero);
2501 }
2502
2503 llvm::Type *ResTy = Ty;
2504 if (auto *VTy = dyn_cast<FixedVectorType>(Val: Ty))
2505 ResTy = FixedVectorType::get(
2506 ElementType: IntegerType::get(C&: getLLVMContext(), NumBits: VTy->getScalarSizeInBits()),
2507 NumElts: VTy->getNumElements());
2508
2509 return Builder.CreateSExt(V: Op, DestTy: ResTy, Name);
2510}
2511
2512static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
2513 Value *ExtOp, Value *IndexOp,
2514 llvm::Type *ResTy, unsigned IntID,
2515 const char *Name) {
2516 SmallVector<Value *, 2> TblOps;
2517 if (ExtOp)
2518 TblOps.push_back(Elt: ExtOp);
2519
2520 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
2521 SmallVector<int, 16> Indices;
2522 auto *TblTy = cast<llvm::FixedVectorType>(Val: Ops[0]->getType());
2523 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
2524 Indices.push_back(Elt: 2*i);
2525 Indices.push_back(Elt: 2*i+1);
2526 }
2527
2528 int PairPos = 0, End = Ops.size() - 1;
2529 while (PairPos < End) {
2530 TblOps.push_back(Elt: CGF.Builder.CreateShuffleVector(V1: Ops[PairPos],
2531 V2: Ops[PairPos+1], Mask: Indices,
2532 Name));
2533 PairPos += 2;
2534 }
2535
2536 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
2537 // of the 128-bit lookup table with zero.
2538 if (PairPos == End) {
2539 Value *ZeroTbl = ConstantAggregateZero::get(Ty: TblTy);
2540 TblOps.push_back(Elt: CGF.Builder.CreateShuffleVector(V1: Ops[PairPos],
2541 V2: ZeroTbl, Mask: Indices, Name));
2542 }
2543
2544 Function *TblF;
2545 TblOps.push_back(Elt: IndexOp);
2546 TblF = CGF.CGM.getIntrinsic(IID: IntID, Tys: ResTy);
2547
2548 return CGF.EmitNeonCall(F: TblF, Ops&: TblOps, name: Name);
2549}
2550
2551Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
2552 unsigned Value;
2553 switch (BuiltinID) {
2554 default:
2555 return nullptr;
2556 case clang::ARM::BI__builtin_arm_nop:
2557 Value = 0;
2558 break;
2559 case clang::ARM::BI__builtin_arm_yield:
2560 case clang::ARM::BI__yield:
2561 Value = 1;
2562 break;
2563 case clang::ARM::BI__builtin_arm_wfe:
2564 case clang::ARM::BI__wfe:
2565 Value = 2;
2566 break;
2567 case clang::ARM::BI__builtin_arm_wfi:
2568 case clang::ARM::BI__wfi:
2569 Value = 3;
2570 break;
2571 case clang::ARM::BI__builtin_arm_sev:
2572 case clang::ARM::BI__sev:
2573 Value = 4;
2574 break;
2575 case clang::ARM::BI__builtin_arm_sevl:
2576 case clang::ARM::BI__sevl:
2577 Value = 5;
2578 break;
2579 }
2580
2581 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::arm_hint),
2582 Args: llvm::ConstantInt::get(Ty: Int32Ty, V: Value));
2583}
2584
2585enum SpecialRegisterAccessKind {
2586 NormalRead,
2587 VolatileRead,
2588 Write,
2589};
2590
2591// Generates the IR for the read/write special register builtin,
2592// ValueType is the type of the value that is to be written or read,
2593// RegisterType is the type of the register being written to or read from.
2594static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
2595 const CallExpr *E,
2596 llvm::Type *RegisterType,
2597 llvm::Type *ValueType,
2598 SpecialRegisterAccessKind AccessKind,
2599 StringRef SysReg = "") {
2600 // write and register intrinsics only support 32, 64 and 128 bit operations.
2601 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
2602 RegisterType->isIntegerTy(128)) &&
2603 "Unsupported size for register.");
2604
2605 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2606 CodeGen::CodeGenModule &CGM = CGF.CGM;
2607 LLVMContext &Context = CGM.getLLVMContext();
2608
2609 if (SysReg.empty()) {
2610 const Expr *SysRegStrExpr = E->getArg(Arg: 0)->IgnoreParenCasts();
2611 SysReg = cast<clang::StringLiteral>(Val: SysRegStrExpr)->getString();
2612 }
2613
2614 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, Str: SysReg) };
2615 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
2616 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
2617
2618 llvm::Type *Types[] = { RegisterType };
2619
2620 bool MixedTypes = RegisterType->isIntegerTy(Bitwidth: 64) && ValueType->isIntegerTy(Bitwidth: 32);
2621 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
2622 && "Can't fit 64-bit value in 32-bit register");
2623
2624 if (AccessKind != Write) {
2625 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
2626 llvm::Function *F = CGM.getIntrinsic(
2627 IID: AccessKind == VolatileRead ? Intrinsic::read_volatile_register
2628 : Intrinsic::read_register,
2629 Tys: Types);
2630 llvm::Value *Call = Builder.CreateCall(Callee: F, Args: Metadata);
2631
2632 if (MixedTypes)
2633 // Read into 64 bit register and then truncate result to 32 bit.
2634 return Builder.CreateTrunc(V: Call, DestTy: ValueType);
2635
2636 if (ValueType->isPointerTy())
2637 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
2638 return Builder.CreateIntToPtr(V: Call, DestTy: ValueType);
2639
2640 return Call;
2641 }
2642
2643 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::write_register, Tys: Types);
2644 llvm::Value *ArgValue = CGF.EmitScalarExpr(E: E->getArg(Arg: 1));
2645 if (MixedTypes) {
2646 // Extend 32 bit write value to 64 bit to pass to write.
2647 ArgValue = Builder.CreateZExt(V: ArgValue, DestTy: RegisterType);
2648 return Builder.CreateCall(Callee: F, Args: { Metadata, ArgValue });
2649 }
2650
2651 if (ValueType->isPointerTy()) {
2652 // Have VoidPtrTy ArgValue but want to return an i32/i64.
2653 ArgValue = Builder.CreatePtrToInt(V: ArgValue, DestTy: RegisterType);
2654 return Builder.CreateCall(Callee: F, Args: { Metadata, ArgValue });
2655 }
2656
2657 return Builder.CreateCall(Callee: F, Args: { Metadata, ArgValue });
2658}
2659
2660static Value *EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID,
2661 const CallExpr *E) {
2662 CodeGen::CGBuilderTy &Builder = CGF.Builder;
2663 CodeGen::CodeGenModule &CGM = CGF.CGM;
2664 SmallVector<llvm::Value *, 4> Ops;
2665
2666 auto getIntArg = [&](unsigned ArgNo) {
2667 Expr::EvalResult Result;
2668 if (!E->getArg(Arg: ArgNo)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
2669 llvm_unreachable("Expected constant argument to range prefetch.");
2670 return Result.Val.getInt().getExtValue();
2671 };
2672
2673 Ops.push_back(Elt: CGF.EmitScalarExpr(E: E->getArg(Arg: 0))); /*Addr*/
2674 Ops.push_back(Elt: CGF.EmitScalarExpr(E: E->getArg(Arg: 1))); /*Access Kind*/
2675 Ops.push_back(Elt: CGF.EmitScalarExpr(E: E->getArg(Arg: 2))); /*Policy*/
2676
2677 if (BuiltinID == clang::AArch64::BI__builtin_arm_range_prefetch_x) {
2678 auto Length = getIntArg(3);
2679 auto Count = getIntArg(4) - 1;
2680 auto Stride = getIntArg(5);
2681 auto Distance = getIntArg(6);
2682
2683 // Map ReuseDistance given in bytes to four bits representing decreasing
2684 // powers of two in the range 512MiB (0b0001) to 32KiB (0b1111). Values
2685 // are rounded up to the nearest power of 2, starting at 32KiB. Any value
2686 // over the maximum is represented by 0 (distance not known).
2687 if (Distance > 0) {
2688 Distance = llvm::Log2_32_Ceil(Value: Distance);
2689 if (Distance < 15)
2690 Distance = 15;
2691 else if (Distance > 29)
2692 Distance = 0;
2693 else
2694 Distance = 30 - Distance;
2695 }
2696
2697 uint64_t Mask22 = (1ULL << 22) - 1;
2698 uint64_t Mask16 = (1ULL << 16) - 1;
2699 uint64_t Metadata = (Distance << 60) | ((Stride & Mask22) << 38) |
2700 ((Count & Mask16) << 22) | (Length & Mask22);
2701
2702 Ops.push_back(Elt: llvm::ConstantInt::get(Ty: Builder.getInt64Ty(), V: Metadata));
2703 } else
2704 Ops.push_back(Elt: CGF.EmitScalarExpr(E: E->getArg(Arg: 3)));
2705
2706 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_range_prefetch),
2707 Args: Ops);
2708}
2709
2710/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
2711/// argument that specifies the vector type. The additional argument is meant
2712/// for Sema checking (see `CheckNeonBuiltinFunctionCall`) and this function
2713/// should be kept consistent with the logic in Sema.
2714/// TODO: Make this return false for SISD builtins.
2715static bool HasExtraNeonArgument(unsigned BuiltinID) {
2716 // Required by the headers included below, but not in this particular
2717 // function.
2718 [[maybe_unused]] int PtrArgNum = -1;
2719 [[maybe_unused]] bool HasConstPtr = false;
2720
2721 // The mask encodes the type. We don't care about the actual value. Instead,
2722 // we just check whether its been set.
2723 uint64_t mask = 0;
2724 switch (BuiltinID) {
2725#define GET_NEON_OVERLOAD_CHECK
2726#include "clang/Basic/arm_fp16.inc"
2727#include "clang/Basic/arm_neon.inc"
2728#undef GET_NEON_OVERLOAD_CHECK
2729 // Non-neon builtins for controling VFP that take extra argument for
2730 // discriminating the type.
2731 case ARM::BI__builtin_arm_vcvtr_f:
2732 case ARM::BI__builtin_arm_vcvtr_d:
2733 mask = 1;
2734 }
2735
2736 if (mask)
2737 return true;
2738
2739 return false;
2740}
2741
2742Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
2743 const CallExpr *E,
2744 ReturnValueSlot ReturnValue,
2745 llvm::Triple::ArchType Arch) {
2746 if (auto Hint = GetValueForARMHint(BuiltinID))
2747 return Hint;
2748
2749 if (BuiltinID == clang::ARM::BI__emit) {
2750 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
2751 llvm::FunctionType *FTy =
2752 llvm::FunctionType::get(Result: VoidTy, /*Variadic=*/isVarArg: false);
2753
2754 Expr::EvalResult Result;
2755 if (!E->getArg(Arg: 0)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
2756 llvm_unreachable("Sema will ensure that the parameter is constant");
2757
2758 llvm::APSInt Value = Result.Val.getInt();
2759 uint64_t ZExtValue = Value.zextOrTrunc(width: IsThumb ? 16 : 32).getZExtValue();
2760
2761 llvm::InlineAsm *Emit =
2762 IsThumb ? InlineAsm::get(Ty: FTy, AsmString: ".inst.n 0x" + utohexstr(X: ZExtValue), Constraints: "",
2763 /*hasSideEffects=*/true)
2764 : InlineAsm::get(Ty: FTy, AsmString: ".inst 0x" + utohexstr(X: ZExtValue), Constraints: "",
2765 /*hasSideEffects=*/true);
2766
2767 return Builder.CreateCall(Callee: Emit);
2768 }
2769
2770 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
2771 Value *Option = EmitScalarExpr(E: E->getArg(Arg: 0));
2772 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::arm_dbg), Args: Option);
2773 }
2774
2775 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
2776 Value *Address = EmitScalarExpr(E: E->getArg(Arg: 0));
2777 Value *RW = EmitScalarExpr(E: E->getArg(Arg: 1));
2778 Value *IsData = EmitScalarExpr(E: E->getArg(Arg: 2));
2779
2780 // Locality is not supported on ARM target
2781 Value *Locality = llvm::ConstantInt::get(Ty: Int32Ty, V: 3);
2782
2783 Function *F = CGM.getIntrinsic(IID: Intrinsic::prefetch, Tys: Address->getType());
2784 return Builder.CreateCall(Callee: F, Args: {Address, RW, Locality, IsData});
2785 }
2786
2787 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
2788 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
2789 return Builder.CreateCall(
2790 Callee: CGM.getIntrinsic(IID: Intrinsic::bitreverse, Tys: Arg->getType()), Args: Arg, Name: "rbit");
2791 }
2792
2793 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
2794 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
2795 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
2796 Function *F = CGM.getIntrinsic(IID: Intrinsic::ctlz, Tys: Arg->getType());
2797 Value *Res = Builder.CreateCall(Callee: F, Args: {Arg, Builder.getInt1(V: false)});
2798 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
2799 Res = Builder.CreateTrunc(V: Res, DestTy: Builder.getInt32Ty());
2800 return Res;
2801 }
2802
2803
2804 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
2805 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
2806 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::arm_cls), Args: Arg, Name: "cls");
2807 }
2808 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
2809 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
2810 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::arm_cls64), Args: Arg,
2811 Name: "cls");
2812 }
2813
2814 if (BuiltinID == clang::ARM::BI__clear_cache) {
2815 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
2816 const FunctionDecl *FD = E->getDirectCallee();
2817 Value *Ops[2];
2818 for (unsigned i = 0; i < 2; i++)
2819 Ops[i] = EmitScalarExpr(E: E->getArg(Arg: i));
2820 llvm::Type *Ty = CGM.getTypes().ConvertType(T: FD->getType());
2821 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Val: Ty);
2822 StringRef Name = FD->getName();
2823 return EmitNounwindRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FTy, Name), args: Ops);
2824 }
2825
2826 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
2827 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
2828 Function *F;
2829
2830 switch (BuiltinID) {
2831 default: llvm_unreachable("unexpected builtin");
2832 case clang::ARM::BI__builtin_arm_mcrr:
2833 F = CGM.getIntrinsic(IID: Intrinsic::arm_mcrr);
2834 break;
2835 case clang::ARM::BI__builtin_arm_mcrr2:
2836 F = CGM.getIntrinsic(IID: Intrinsic::arm_mcrr2);
2837 break;
2838 }
2839
2840 // MCRR{2} instruction has 5 operands but
2841 // the intrinsic has 4 because Rt and Rt2
2842 // are represented as a single unsigned 64
2843 // bit integer in the intrinsic definition
2844 // but internally it's represented as 2 32
2845 // bit integers.
2846
2847 Value *Coproc = EmitScalarExpr(E: E->getArg(Arg: 0));
2848 Value *Opc1 = EmitScalarExpr(E: E->getArg(Arg: 1));
2849 Value *RtAndRt2 = EmitScalarExpr(E: E->getArg(Arg: 2));
2850 Value *CRm = EmitScalarExpr(E: E->getArg(Arg: 3));
2851
2852 Value *C1 = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
2853 Value *Rt = Builder.CreateTruncOrBitCast(V: RtAndRt2, DestTy: Int32Ty);
2854 Value *Rt2 = Builder.CreateLShr(LHS: RtAndRt2, RHS: C1);
2855 Rt2 = Builder.CreateTruncOrBitCast(V: Rt2, DestTy: Int32Ty);
2856
2857 return Builder.CreateCall(Callee: F, Args: {Coproc, Opc1, Rt, Rt2, CRm});
2858 }
2859
2860 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
2861 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
2862 Function *F;
2863
2864 switch (BuiltinID) {
2865 default: llvm_unreachable("unexpected builtin");
2866 case clang::ARM::BI__builtin_arm_mrrc:
2867 F = CGM.getIntrinsic(IID: Intrinsic::arm_mrrc);
2868 break;
2869 case clang::ARM::BI__builtin_arm_mrrc2:
2870 F = CGM.getIntrinsic(IID: Intrinsic::arm_mrrc2);
2871 break;
2872 }
2873
2874 Value *Coproc = EmitScalarExpr(E: E->getArg(Arg: 0));
2875 Value *Opc1 = EmitScalarExpr(E: E->getArg(Arg: 1));
2876 Value *CRm = EmitScalarExpr(E: E->getArg(Arg: 2));
2877 Value *RtAndRt2 = Builder.CreateCall(Callee: F, Args: {Coproc, Opc1, CRm});
2878
2879 // Returns an unsigned 64 bit integer, represented
2880 // as two 32 bit integers.
2881
2882 Value *Rt = Builder.CreateExtractValue(Agg: RtAndRt2, Idxs: 1);
2883 Value *Rt1 = Builder.CreateExtractValue(Agg: RtAndRt2, Idxs: 0);
2884 Rt = Builder.CreateZExt(V: Rt, DestTy: Int64Ty);
2885 Rt1 = Builder.CreateZExt(V: Rt1, DestTy: Int64Ty);
2886
2887 Value *ShiftCast = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
2888 RtAndRt2 = Builder.CreateShl(LHS: Rt, RHS: ShiftCast, Name: "shl", HasNUW: true);
2889 RtAndRt2 = Builder.CreateOr(LHS: RtAndRt2, RHS: Rt1);
2890
2891 return Builder.CreateBitCast(V: RtAndRt2, DestTy: ConvertType(T: E->getType()));
2892 }
2893
2894 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
2895 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2896 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
2897 getContext().getTypeSize(T: E->getType()) == 64) ||
2898 BuiltinID == clang::ARM::BI__ldrexd) {
2899 Function *F;
2900
2901 switch (BuiltinID) {
2902 default: llvm_unreachable("unexpected builtin");
2903 case clang::ARM::BI__builtin_arm_ldaex:
2904 F = CGM.getIntrinsic(IID: Intrinsic::arm_ldaexd);
2905 break;
2906 case clang::ARM::BI__builtin_arm_ldrexd:
2907 case clang::ARM::BI__builtin_arm_ldrex:
2908 case clang::ARM::BI__ldrexd:
2909 F = CGM.getIntrinsic(IID: Intrinsic::arm_ldrexd);
2910 break;
2911 }
2912
2913 Value *LdPtr = EmitScalarExpr(E: E->getArg(Arg: 0));
2914 Value *Val = Builder.CreateCall(Callee: F, Args: LdPtr, Name: "ldrexd");
2915
2916 Value *Val0 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
2917 Value *Val1 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
2918 Val0 = Builder.CreateZExt(V: Val0, DestTy: Int64Ty);
2919 Val1 = Builder.CreateZExt(V: Val1, DestTy: Int64Ty);
2920
2921 Value *ShiftCst = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
2922 Val = Builder.CreateShl(LHS: Val0, RHS: ShiftCst, Name: "shl", HasNUW: true /* nuw */);
2923 Val = Builder.CreateOr(LHS: Val, RHS: Val1);
2924 return Builder.CreateBitCast(V: Val, DestTy: ConvertType(T: E->getType()));
2925 }
2926
2927 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
2928 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
2929 Value *LoadAddr = EmitScalarExpr(E: E->getArg(Arg: 0));
2930
2931 QualType Ty = E->getType();
2932 llvm::Type *RealResTy = ConvertType(T: Ty);
2933 llvm::Type *IntTy =
2934 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
2935
2936 Function *F = CGM.getIntrinsic(
2937 IID: BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
2938 : Intrinsic::arm_ldrex,
2939 Tys: DefaultPtrTy);
2940 CallInst *Val = Builder.CreateCall(Callee: F, Args: LoadAddr, Name: "ldrex");
2941 Val->addParamAttr(
2942 ArgNo: 0, Attr: Attribute::get(Context&: getLLVMContext(), Kind: Attribute::ElementType, Ty: IntTy));
2943
2944 if (RealResTy->isPointerTy())
2945 return Builder.CreateIntToPtr(V: Val, DestTy: RealResTy);
2946 else {
2947 llvm::Type *IntResTy = llvm::IntegerType::get(
2948 C&: getLLVMContext(), NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: RealResTy));
2949 return Builder.CreateBitCast(V: Builder.CreateTruncOrBitCast(V: Val, DestTy: IntResTy),
2950 DestTy: RealResTy);
2951 }
2952 }
2953
2954 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
2955 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
2956 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
2957 getContext().getTypeSize(T: E->getArg(Arg: 0)->getType()) == 64)) {
2958 Function *F = CGM.getIntrinsic(
2959 IID: BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
2960 : Intrinsic::arm_strexd);
2961 llvm::Type *STy = llvm::StructType::get(elt1: Int32Ty, elts: Int32Ty);
2962
2963 Address Tmp = CreateMemTemp(T: E->getArg(Arg: 0)->getType());
2964 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 0));
2965 Builder.CreateStore(Val, Addr: Tmp);
2966
2967 Address LdPtr = Tmp.withElementType(ElemTy: STy);
2968 Val = Builder.CreateLoad(Addr: LdPtr);
2969
2970 Value *Arg0 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
2971 Value *Arg1 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
2972 Value *StPtr = EmitScalarExpr(E: E->getArg(Arg: 1));
2973 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1, StPtr}, Name: "strexd");
2974 }
2975
2976 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
2977 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
2978 Value *StoreVal = EmitScalarExpr(E: E->getArg(Arg: 0));
2979 Value *StoreAddr = EmitScalarExpr(E: E->getArg(Arg: 1));
2980
2981 QualType Ty = E->getArg(Arg: 0)->getType();
2982 llvm::Type *StoreTy =
2983 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
2984
2985 if (StoreVal->getType()->isPointerTy())
2986 StoreVal = Builder.CreatePtrToInt(V: StoreVal, DestTy: Int32Ty);
2987 else {
2988 llvm::Type *IntTy = llvm::IntegerType::get(
2989 C&: getLLVMContext(),
2990 NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: StoreVal->getType()));
2991 StoreVal = Builder.CreateBitCast(V: StoreVal, DestTy: IntTy);
2992 StoreVal = Builder.CreateZExtOrBitCast(V: StoreVal, DestTy: Int32Ty);
2993 }
2994
2995 Function *F = CGM.getIntrinsic(
2996 IID: BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
2997 : Intrinsic::arm_strex,
2998 Tys: StoreAddr->getType());
2999
3000 CallInst *CI = Builder.CreateCall(Callee: F, Args: {StoreVal, StoreAddr}, Name: "strex");
3001 CI->addParamAttr(
3002 ArgNo: 1, Attr: Attribute::get(Context&: getLLVMContext(), Kind: Attribute::ElementType, Ty: StoreTy));
3003 return CI;
3004 }
3005
3006 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
3007 Function *F = CGM.getIntrinsic(IID: Intrinsic::arm_clrex);
3008 return Builder.CreateCall(Callee: F);
3009 }
3010
3011 // CRC32
3012 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
3013 switch (BuiltinID) {
3014 case clang::ARM::BI__builtin_arm_crc32b:
3015 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
3016 case clang::ARM::BI__builtin_arm_crc32cb:
3017 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
3018 case clang::ARM::BI__builtin_arm_crc32h:
3019 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
3020 case clang::ARM::BI__builtin_arm_crc32ch:
3021 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
3022 case clang::ARM::BI__builtin_arm_crc32w:
3023 case clang::ARM::BI__builtin_arm_crc32d:
3024 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
3025 case clang::ARM::BI__builtin_arm_crc32cw:
3026 case clang::ARM::BI__builtin_arm_crc32cd:
3027 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
3028 }
3029
3030 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
3031 Value *Arg0 = EmitScalarExpr(E: E->getArg(Arg: 0));
3032 Value *Arg1 = EmitScalarExpr(E: E->getArg(Arg: 1));
3033
3034 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
3035 // intrinsics, hence we need different codegen for these cases.
3036 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
3037 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
3038 Value *C1 = llvm::ConstantInt::get(Ty: Int64Ty, V: 32);
3039 Value *Arg1a = Builder.CreateTruncOrBitCast(V: Arg1, DestTy: Int32Ty);
3040 Value *Arg1b = Builder.CreateLShr(LHS: Arg1, RHS: C1);
3041 Arg1b = Builder.CreateTruncOrBitCast(V: Arg1b, DestTy: Int32Ty);
3042
3043 Function *F = CGM.getIntrinsic(IID: CRCIntrinsicID);
3044 Value *Res = Builder.CreateCall(Callee: F, Args: {Arg0, Arg1a});
3045 return Builder.CreateCall(Callee: F, Args: {Res, Arg1b});
3046 } else {
3047 Arg1 = Builder.CreateZExtOrBitCast(V: Arg1, DestTy: Int32Ty);
3048
3049 Function *F = CGM.getIntrinsic(IID: CRCIntrinsicID);
3050 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1});
3051 }
3052 }
3053
3054 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
3055 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
3056 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
3057 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
3058 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
3059 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
3060
3061 SpecialRegisterAccessKind AccessKind = Write;
3062 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
3063 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
3064 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
3065 AccessKind = VolatileRead;
3066
3067 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
3068 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
3069
3070 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
3071 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
3072
3073 llvm::Type *ValueType;
3074 llvm::Type *RegisterType;
3075 if (IsPointerBuiltin) {
3076 ValueType = VoidPtrTy;
3077 RegisterType = Int32Ty;
3078 } else if (Is64Bit) {
3079 ValueType = RegisterType = Int64Ty;
3080 } else {
3081 ValueType = RegisterType = Int32Ty;
3082 }
3083
3084 return EmitSpecialRegisterBuiltin(CGF&: *this, E, RegisterType, ValueType,
3085 AccessKind);
3086 }
3087
3088 if (BuiltinID == ARM::BI__builtin_sponentry) {
3089 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::sponentry, Tys: AllocaInt8PtrTy);
3090 return Builder.CreateCall(Callee: F);
3091 }
3092
3093 // Handle MSVC intrinsics before argument evaluation to prevent double
3094 // evaluation.
3095 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
3096 return EmitMSVCBuiltinExpr(BuiltinID: *MsvcIntId, E);
3097
3098 // Deal with MVE builtins
3099 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
3100 return Result;
3101 // Handle CDE builtins
3102 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
3103 return Result;
3104
3105 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
3106 auto It = llvm::find_if(Range: NEONEquivalentIntrinsicMap, P: [BuiltinID](auto &P) {
3107 return P.first == BuiltinID;
3108 });
3109 if (It != end(arr: NEONEquivalentIntrinsicMap))
3110 BuiltinID = It->second;
3111
3112 // Find out if any arguments are required to be integer constant
3113 // expressions.
3114 unsigned ICEArguments = 0;
3115 ASTContext::GetBuiltinTypeError Error;
3116 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
3117 assert(Error == ASTContext::GE_None && "Should not codegen an error");
3118
3119 auto getAlignmentValue32 = [&](Address addr) -> Value* {
3120 return Builder.getInt32(C: addr.getAlignment().getQuantity());
3121 };
3122
3123 Address PtrOp0 = Address::invalid();
3124 Address PtrOp1 = Address::invalid();
3125 SmallVector<Value*, 4> Ops;
3126 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
3127 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
3128 for (unsigned i = 0, e = NumArgs; i != e; i++) {
3129 if (i == 0) {
3130 switch (BuiltinID) {
3131 case NEON::BI__builtin_neon_vld1_v:
3132 case NEON::BI__builtin_neon_vld1q_v:
3133 case NEON::BI__builtin_neon_vld1q_lane_v:
3134 case NEON::BI__builtin_neon_vld1_lane_v:
3135 case NEON::BI__builtin_neon_vld1_dup_v:
3136 case NEON::BI__builtin_neon_vld1q_dup_v:
3137 case NEON::BI__builtin_neon_vst1_v:
3138 case NEON::BI__builtin_neon_vst1q_v:
3139 case NEON::BI__builtin_neon_vst1q_lane_v:
3140 case NEON::BI__builtin_neon_vst1_lane_v:
3141 case NEON::BI__builtin_neon_vst2_v:
3142 case NEON::BI__builtin_neon_vst2q_v:
3143 case NEON::BI__builtin_neon_vst2_lane_v:
3144 case NEON::BI__builtin_neon_vst2q_lane_v:
3145 case NEON::BI__builtin_neon_vst3_v:
3146 case NEON::BI__builtin_neon_vst3q_v:
3147 case NEON::BI__builtin_neon_vst3_lane_v:
3148 case NEON::BI__builtin_neon_vst3q_lane_v:
3149 case NEON::BI__builtin_neon_vst4_v:
3150 case NEON::BI__builtin_neon_vst4q_v:
3151 case NEON::BI__builtin_neon_vst4_lane_v:
3152 case NEON::BI__builtin_neon_vst4q_lane_v:
3153 // Get the alignment for the argument in addition to the value;
3154 // we'll use it later.
3155 PtrOp0 = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
3156 Ops.push_back(Elt: PtrOp0.emitRawPointer(CGF&: *this));
3157 continue;
3158 }
3159 }
3160 if (i == 1) {
3161 switch (BuiltinID) {
3162 case NEON::BI__builtin_neon_vld2_v:
3163 case NEON::BI__builtin_neon_vld2q_v:
3164 case NEON::BI__builtin_neon_vld3_v:
3165 case NEON::BI__builtin_neon_vld3q_v:
3166 case NEON::BI__builtin_neon_vld4_v:
3167 case NEON::BI__builtin_neon_vld4q_v:
3168 case NEON::BI__builtin_neon_vld2_lane_v:
3169 case NEON::BI__builtin_neon_vld2q_lane_v:
3170 case NEON::BI__builtin_neon_vld3_lane_v:
3171 case NEON::BI__builtin_neon_vld3q_lane_v:
3172 case NEON::BI__builtin_neon_vld4_lane_v:
3173 case NEON::BI__builtin_neon_vld4q_lane_v:
3174 case NEON::BI__builtin_neon_vld2_dup_v:
3175 case NEON::BI__builtin_neon_vld2q_dup_v:
3176 case NEON::BI__builtin_neon_vld3_dup_v:
3177 case NEON::BI__builtin_neon_vld3q_dup_v:
3178 case NEON::BI__builtin_neon_vld4_dup_v:
3179 case NEON::BI__builtin_neon_vld4q_dup_v:
3180 // Get the alignment for the argument in addition to the value;
3181 // we'll use it later.
3182 PtrOp1 = EmitPointerWithAlignment(Addr: E->getArg(Arg: 1));
3183 Ops.push_back(Elt: PtrOp1.emitRawPointer(CGF&: *this));
3184 continue;
3185 }
3186 }
3187
3188 Ops.push_back(Elt: EmitScalarOrConstFoldImmArg(ICEArguments, Idx: i, E));
3189 }
3190
3191 switch (BuiltinID) {
3192 default: break;
3193
3194 case NEON::BI__builtin_neon_vget_lane_i8:
3195 case NEON::BI__builtin_neon_vget_lane_i16:
3196 case NEON::BI__builtin_neon_vget_lane_i32:
3197 case NEON::BI__builtin_neon_vget_lane_i64:
3198 case NEON::BI__builtin_neon_vget_lane_bf16:
3199 case NEON::BI__builtin_neon_vget_lane_f32:
3200 case NEON::BI__builtin_neon_vgetq_lane_i8:
3201 case NEON::BI__builtin_neon_vgetq_lane_i16:
3202 case NEON::BI__builtin_neon_vgetq_lane_i32:
3203 case NEON::BI__builtin_neon_vgetq_lane_i64:
3204 case NEON::BI__builtin_neon_vgetq_lane_bf16:
3205 case NEON::BI__builtin_neon_vgetq_lane_f32:
3206 case NEON::BI__builtin_neon_vduph_lane_bf16:
3207 case NEON::BI__builtin_neon_vduph_laneq_bf16:
3208 return Builder.CreateExtractElement(Vec: Ops[0], Idx: Ops[1], Name: "vget_lane");
3209
3210 case NEON::BI__builtin_neon_vrndns_f32: {
3211 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
3212 llvm::Type *Tys[] = {Arg->getType()};
3213 Function *F = CGM.getIntrinsic(IID: Intrinsic::roundeven, Tys);
3214 return Builder.CreateCall(Callee: F, Args: {Arg}, Name: "vrndn"); }
3215
3216 case NEON::BI__builtin_neon_vset_lane_i8:
3217 case NEON::BI__builtin_neon_vset_lane_i16:
3218 case NEON::BI__builtin_neon_vset_lane_i32:
3219 case NEON::BI__builtin_neon_vset_lane_i64:
3220 case NEON::BI__builtin_neon_vset_lane_bf16:
3221 case NEON::BI__builtin_neon_vset_lane_f32:
3222 case NEON::BI__builtin_neon_vsetq_lane_i8:
3223 case NEON::BI__builtin_neon_vsetq_lane_i16:
3224 case NEON::BI__builtin_neon_vsetq_lane_i32:
3225 case NEON::BI__builtin_neon_vsetq_lane_i64:
3226 case NEON::BI__builtin_neon_vsetq_lane_bf16:
3227 case NEON::BI__builtin_neon_vsetq_lane_f32:
3228 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
3229
3230 case NEON::BI__builtin_neon_vsha1h_u32:
3231 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_sha1h), Ops,
3232 name: "vsha1h");
3233 case NEON::BI__builtin_neon_vsha1cq_u32:
3234 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_sha1c), Ops,
3235 name: "vsha1h");
3236 case NEON::BI__builtin_neon_vsha1pq_u32:
3237 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_sha1p), Ops,
3238 name: "vsha1h");
3239 case NEON::BI__builtin_neon_vsha1mq_u32:
3240 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_sha1m), Ops,
3241 name: "vsha1h");
3242
3243 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
3244 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vcvtbfp2bf), Ops,
3245 name: "vcvtbfp2bf");
3246 }
3247
3248 // The ARM _MoveToCoprocessor builtins put the input register value as
3249 // the first argument, but the LLVM intrinsic expects it as the third one.
3250 case clang::ARM::BI_MoveToCoprocessor:
3251 case clang::ARM::BI_MoveToCoprocessor2: {
3252 Function *F = CGM.getIntrinsic(IID: BuiltinID == clang::ARM::BI_MoveToCoprocessor
3253 ? Intrinsic::arm_mcr
3254 : Intrinsic::arm_mcr2);
3255 return Builder.CreateCall(Callee: F, Args: {Ops[1], Ops[2], Ops[0],
3256 Ops[3], Ops[4], Ops[5]});
3257 }
3258 }
3259
3260 // Get the last argument, which specifies the vector type.
3261 assert(HasExtraArg);
3262 const Expr *Arg = E->getArg(Arg: E->getNumArgs()-1);
3263 std::optional<llvm::APSInt> Result =
3264 Arg->getIntegerConstantExpr(Ctx: getContext());
3265 if (!Result)
3266 return nullptr;
3267
3268 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
3269 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
3270 // Determine the overloaded type of this builtin.
3271 llvm::Type *Ty;
3272 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
3273 Ty = FloatTy;
3274 else
3275 Ty = DoubleTy;
3276
3277 // Determine whether this is an unsigned conversion or not.
3278 bool usgn = Result->getZExtValue() == 1;
3279 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
3280
3281 // Call the appropriate intrinsic.
3282 Function *F = CGM.getIntrinsic(IID: Int, Tys: Ty);
3283 return Builder.CreateCall(Callee: F, Args: Ops, Name: "vcvtr");
3284 }
3285
3286 // Determine the type of this overloaded NEON intrinsic.
3287 NeonTypeFlags Type = Result->getZExtValue();
3288 bool usgn = Type.isUnsigned();
3289 bool rightShift = false;
3290
3291 llvm::FixedVectorType *VTy =
3292 GetNeonType(CGF: this, TypeFlags: Type, HasFastHalfType: getTarget().hasFastHalfType(), V1Ty: false,
3293 AllowBFloatArgsAndRet: getTarget().hasBFloat16Type());
3294 llvm::Type *Ty = VTy;
3295 if (!Ty)
3296 return nullptr;
3297
3298 // Many NEON builtins have identical semantics and uses in ARM and
3299 // AArch64. Emit these in a single function.
3300 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
3301 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
3302 IntrinsicMap, BuiltinID, MapProvenSorted&: NEONSIMDIntrinsicsProvenSorted);
3303 if (Builtin)
3304 return EmitCommonNeonBuiltinExpr(
3305 BuiltinID: Builtin->BuiltinID, LLVMIntrinsic: Builtin->LLVMIntrinsic, AltLLVMIntrinsic: Builtin->AltLLVMIntrinsic,
3306 NameHint: Builtin->NameHint, Modifier: Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
3307
3308 unsigned Int;
3309 switch (BuiltinID) {
3310 default: return nullptr;
3311 case NEON::BI__builtin_neon_vld1q_lane_v:
3312 // Handle 64-bit integer elements as a special case. Use shuffles of
3313 // one-element vectors to avoid poor code for i64 in the backend.
3314 if (VTy->getElementType()->isIntegerTy(Bitwidth: 64)) {
3315 // Extract the other lane.
3316 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
3317 int Lane = cast<ConstantInt>(Val: Ops[2])->getZExtValue();
3318 Value *SV = llvm::ConstantVector::get(V: ConstantInt::get(Ty: Int32Ty, V: 1-Lane));
3319 Ops[1] = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[1], Mask: SV);
3320 // Load the value as a one-element vector.
3321 Ty = llvm::FixedVectorType::get(ElementType: VTy->getElementType(), NumElts: 1);
3322 llvm::Type *Tys[] = {Ty, Int8PtrTy};
3323 Function *F = CGM.getIntrinsic(IID: Intrinsic::arm_neon_vld1, Tys);
3324 Value *Align = getAlignmentValue32(PtrOp0);
3325 Value *Ld = Builder.CreateCall(Callee: F, Args: {Ops[0], Align});
3326 // Combine them.
3327 int Indices[] = {1 - Lane, Lane};
3328 return Builder.CreateShuffleVector(V1: Ops[1], V2: Ld, Mask: Indices, Name: "vld1q_lane");
3329 }
3330 [[fallthrough]];
3331 case NEON::BI__builtin_neon_vld1_lane_v: {
3332 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
3333 PtrOp0 = PtrOp0.withElementType(ElemTy: VTy->getElementType());
3334 Value *Ld = Builder.CreateLoad(Addr: PtrOp0);
3335 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ld, Idx: Ops[2], Name: "vld1_lane");
3336 }
3337 case NEON::BI__builtin_neon_vqrshrn_n_v:
3338 Int =
3339 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
3340 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqrshrn_n",
3341 shift: 1, rightshift: true);
3342 case NEON::BI__builtin_neon_vqrshrun_n_v:
3343 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vqrshiftnsu, Tys: Ty),
3344 Ops, name: "vqrshrun_n", shift: 1, rightshift: true);
3345 case NEON::BI__builtin_neon_vqshrn_n_v:
3346 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
3347 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshrn_n",
3348 shift: 1, rightshift: true);
3349 case NEON::BI__builtin_neon_vqshrun_n_v:
3350 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vqshiftnsu, Tys: Ty),
3351 Ops, name: "vqshrun_n", shift: 1, rightshift: true);
3352 case NEON::BI__builtin_neon_vrecpe_v:
3353 case NEON::BI__builtin_neon_vrecpeq_v:
3354 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vrecpe, Tys: Ty),
3355 Ops, name: "vrecpe");
3356 case NEON::BI__builtin_neon_vrshrn_n_v:
3357 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vrshiftn, Tys: Ty),
3358 Ops, name: "vrshrn_n", shift: 1, rightshift: true);
3359 case NEON::BI__builtin_neon_vrsra_n_v:
3360 case NEON::BI__builtin_neon_vrsraq_n_v:
3361 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
3362 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
3363 Ops[2] = EmitNeonShiftVector(V: Ops[2], Ty, neg: true);
3364 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
3365 Ops[1] = Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Int, Tys: Ty), Args: {Ops[1], Ops[2]});
3366 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1], Name: "vrsra_n");
3367 case NEON::BI__builtin_neon_vsri_n_v:
3368 case NEON::BI__builtin_neon_vsriq_n_v:
3369 rightShift = true;
3370 [[fallthrough]];
3371 case NEON::BI__builtin_neon_vsli_n_v:
3372 case NEON::BI__builtin_neon_vsliq_n_v:
3373 Ops[2] = EmitNeonShiftVector(V: Ops[2], Ty, neg: rightShift);
3374 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vshiftins, Tys: Ty),
3375 Ops, name: "vsli_n");
3376 case NEON::BI__builtin_neon_vsra_n_v:
3377 case NEON::BI__builtin_neon_vsraq_n_v:
3378 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
3379 Ops[1] = EmitNeonRShiftImm(Vec: Ops[1], Shift: Ops[2], Ty, usgn, name: "vsra_n");
3380 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
3381 case NEON::BI__builtin_neon_vst1q_lane_v:
3382 // Handle 64-bit integer elements as a special case. Use a shuffle to get
3383 // a one-element vector and avoid poor code for i64 in the backend.
3384 if (VTy->getElementType()->isIntegerTy(Bitwidth: 64)) {
3385 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
3386 Value *SV = llvm::ConstantVector::get(V: cast<llvm::Constant>(Val: Ops[2]));
3387 Ops[1] = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[1], Mask: SV);
3388 Ops[2] = getAlignmentValue32(PtrOp0);
3389 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
3390 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vst1,
3391 Tys), Args: Ops);
3392 }
3393 [[fallthrough]];
3394 case NEON::BI__builtin_neon_vst1_lane_v: {
3395 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
3396 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2]);
3397 return Builder.CreateStore(Val: Ops[1],
3398 Addr: PtrOp0.withElementType(ElemTy: Ops[1]->getType()));
3399 }
3400 case NEON::BI__builtin_neon_vtbl1_v:
3401 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbl1),
3402 Ops, name: "vtbl1");
3403 case NEON::BI__builtin_neon_vtbl2_v:
3404 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbl2),
3405 Ops, name: "vtbl2");
3406 case NEON::BI__builtin_neon_vtbl3_v:
3407 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbl3),
3408 Ops, name: "vtbl3");
3409 case NEON::BI__builtin_neon_vtbl4_v:
3410 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbl4),
3411 Ops, name: "vtbl4");
3412 case NEON::BI__builtin_neon_vtbx1_v:
3413 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbx1),
3414 Ops, name: "vtbx1");
3415 case NEON::BI__builtin_neon_vtbx2_v:
3416 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbx2),
3417 Ops, name: "vtbx2");
3418 case NEON::BI__builtin_neon_vtbx3_v:
3419 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbx3),
3420 Ops, name: "vtbx3");
3421 case NEON::BI__builtin_neon_vtbx4_v:
3422 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::arm_neon_vtbx4),
3423 Ops, name: "vtbx4");
3424 }
3425}
3426
3427template<typename Integer>
3428static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
3429 return E->getIntegerConstantExpr(Ctx: Context)->getExtValue();
3430}
3431
3432static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
3433 llvm::Type *T, bool Unsigned) {
3434 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
3435 // which finds it convenient to specify signed/unsigned as a boolean flag.
3436 return Unsigned ? Builder.CreateZExt(V, DestTy: T) : Builder.CreateSExt(V, DestTy: T);
3437}
3438
3439static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
3440 uint32_t Shift, bool Unsigned) {
3441 // MVE helper function for integer shift right. This must handle signed vs
3442 // unsigned, and also deal specially with the case where the shift count is
3443 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
3444 // undefined behavior, but in MVE it's legal, so we must convert it to code
3445 // that is not undefined in IR.
3446 unsigned LaneBits = cast<llvm::VectorType>(Val: V->getType())
3447 ->getElementType()
3448 ->getPrimitiveSizeInBits();
3449 if (Shift == LaneBits) {
3450 // An unsigned shift of the full lane size always generates zero, so we can
3451 // simply emit a zero vector. A signed shift of the full lane size does the
3452 // same thing as shifting by one bit fewer.
3453 if (Unsigned)
3454 return llvm::Constant::getNullValue(Ty: V->getType());
3455 else
3456 --Shift;
3457 }
3458 return Unsigned ? Builder.CreateLShr(LHS: V, RHS: Shift) : Builder.CreateAShr(LHS: V, RHS: Shift);
3459}
3460
3461static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
3462 // MVE-specific helper function for a vector splat, which infers the element
3463 // count of the output vector by knowing that MVE vectors are all 128 bits
3464 // wide.
3465 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
3466 return Builder.CreateVectorSplat(NumElts: Elements, V);
3467}
3468
3469static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
3470 CodeGenFunction *CGF,
3471 llvm::Value *V,
3472 llvm::Type *DestType) {
3473 // Convert one MVE vector type into another by reinterpreting its in-register
3474 // format.
3475 //
3476 // Little-endian, this is identical to a bitcast (which reinterprets the
3477 // memory format). But big-endian, they're not necessarily the same, because
3478 // the register and memory formats map to each other differently depending on
3479 // the lane size.
3480 //
3481 // We generate a bitcast whenever we can (if we're little-endian, or if the
3482 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
3483 // that performs the different kind of reinterpretation.
3484 if (CGF->getTarget().isBigEndian() &&
3485 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
3486 return Builder.CreateCall(
3487 Callee: CGF->CGM.getIntrinsic(IID: Intrinsic::arm_mve_vreinterpretq,
3488 Tys: {DestType, V->getType()}),
3489 Args: V);
3490 } else {
3491 return Builder.CreateBitCast(V, DestTy: DestType);
3492 }
3493}
3494
3495static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
3496 // Make a shufflevector that extracts every other element of a vector (evens
3497 // or odds, as desired).
3498 SmallVector<int, 16> Indices;
3499 unsigned InputElements =
3500 cast<llvm::FixedVectorType>(Val: V->getType())->getNumElements();
3501 for (unsigned i = 0; i < InputElements; i += 2)
3502 Indices.push_back(Elt: i + Odd);
3503 return Builder.CreateShuffleVector(V, Mask: Indices);
3504}
3505
3506static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
3507 llvm::Value *V1) {
3508 // Make a shufflevector that interleaves two vectors element by element.
3509 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
3510 SmallVector<int, 16> Indices;
3511 unsigned InputElements =
3512 cast<llvm::FixedVectorType>(Val: V0->getType())->getNumElements();
3513 for (unsigned i = 0; i < InputElements; i++) {
3514 Indices.push_back(Elt: i);
3515 Indices.push_back(Elt: i + InputElements);
3516 }
3517 return Builder.CreateShuffleVector(V1: V0, V2: V1, Mask: Indices);
3518}
3519
3520template<unsigned HighBit, unsigned OtherBits>
3521static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
3522 // MVE-specific helper function to make a vector splat of a constant such as
3523 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
3524 llvm::Type *T = cast<llvm::VectorType>(Val: VT)->getElementType();
3525 unsigned LaneBits = T->getPrimitiveSizeInBits();
3526 uint32_t Value = HighBit << (LaneBits - 1);
3527 if (OtherBits)
3528 Value |= (1UL << (LaneBits - 1)) - 1;
3529 llvm::Value *Lane = llvm::ConstantInt::get(Ty: T, V: Value);
3530 return ARMMVEVectorSplat(Builder, V: Lane);
3531}
3532
3533static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
3534 llvm::Value *V,
3535 unsigned ReverseWidth) {
3536 // MVE-specific helper function which reverses the elements of a
3537 // vector within every (ReverseWidth)-bit collection of lanes.
3538 SmallVector<int, 16> Indices;
3539 unsigned LaneSize = V->getType()->getScalarSizeInBits();
3540 unsigned Elements = 128 / LaneSize;
3541 unsigned Mask = ReverseWidth / LaneSize - 1;
3542 for (unsigned i = 0; i < Elements; i++)
3543 Indices.push_back(Elt: i ^ Mask);
3544 return Builder.CreateShuffleVector(V, Mask: Indices);
3545}
3546
3547static llvm::Value *ARMMVECreateSIToFP(CGBuilderTy &Builder,
3548 CodeGenFunction *CGF, llvm::Value *V,
3549 llvm::Type *Ty) {
3550 return Builder.CreateCall(
3551 Callee: CGF->CGM.getIntrinsic(IID: Intrinsic::arm_mve_vcvt_fp_int, Tys: {Ty, V->getType()}),
3552 Args: {V, llvm::ConstantInt::get(Ty: Builder.getInt32Ty(), V: 0)});
3553}
3554
3555static llvm::Value *ARMMVECreateUIToFP(CGBuilderTy &Builder,
3556 CodeGenFunction *CGF, llvm::Value *V,
3557 llvm::Type *Ty) {
3558 return Builder.CreateCall(
3559 Callee: CGF->CGM.getIntrinsic(IID: Intrinsic::arm_mve_vcvt_fp_int, Tys: {Ty, V->getType()}),
3560 Args: {V, llvm::ConstantInt::get(Ty: Builder.getInt32Ty(), V: 1)});
3561}
3562
3563static llvm::Value *ARMMVECreateFPToSI(CGBuilderTy &Builder,
3564 CodeGenFunction *CGF, llvm::Value *V,
3565 llvm::Type *Ty) {
3566 return Builder.CreateCall(
3567 Callee: CGF->CGM.getIntrinsic(IID: Intrinsic::arm_mve_vcvt_int_fp, Tys: {Ty, V->getType()}),
3568 Args: {V, llvm::ConstantInt::get(Ty: Builder.getInt32Ty(), V: 0)});
3569}
3570
3571static llvm::Value *ARMMVECreateFPToUI(CGBuilderTy &Builder,
3572 CodeGenFunction *CGF, llvm::Value *V,
3573 llvm::Type *Ty) {
3574 return Builder.CreateCall(
3575 Callee: CGF->CGM.getIntrinsic(IID: Intrinsic::arm_mve_vcvt_int_fp, Tys: {Ty, V->getType()}),
3576 Args: {V, llvm::ConstantInt::get(Ty: Builder.getInt32Ty(), V: 1)});
3577}
3578
3579Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
3580 const CallExpr *E,
3581 ReturnValueSlot ReturnValue,
3582 llvm::Triple::ArchType Arch) {
3583 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
3584 Intrinsic::ID IRIntr;
3585 unsigned NumVectors;
3586
3587 // Code autogenerated by Tablegen will handle all the simple builtins.
3588 switch (BuiltinID) {
3589 #include "clang/Basic/arm_mve_builtin_cg.inc"
3590
3591 // If we didn't match an MVE builtin id at all, go back to the
3592 // main EmitARMBuiltinExpr.
3593 default:
3594 return nullptr;
3595 }
3596
3597 // Anything that breaks from that switch is an MVE builtin that
3598 // needs handwritten code to generate.
3599
3600 switch (CustomCodeGenType) {
3601
3602 case CustomCodeGen::VLD24: {
3603 llvm::SmallVector<Value *, 4> Ops;
3604 llvm::SmallVector<llvm::Type *, 4> Tys;
3605
3606 auto MvecCType = E->getType();
3607 auto MvecLType = ConvertType(T: MvecCType);
3608 assert(MvecLType->isStructTy() &&
3609 "Return type for vld[24]q should be a struct");
3610 assert(MvecLType->getStructNumElements() == 1 &&
3611 "Return-type struct for vld[24]q should have one element");
3612 auto MvecLTypeInner = MvecLType->getStructElementType(N: 0);
3613 assert(MvecLTypeInner->isArrayTy() &&
3614 "Return-type struct for vld[24]q should contain an array");
3615 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3616 "Array member of return-type struct vld[24]q has wrong length");
3617 auto VecLType = MvecLTypeInner->getArrayElementType();
3618
3619 Tys.push_back(Elt: VecLType);
3620
3621 auto Addr = E->getArg(Arg: 0);
3622 Ops.push_back(Elt: EmitScalarExpr(E: Addr));
3623 Tys.push_back(Elt: ConvertType(T: Addr->getType()));
3624
3625 Function *F = CGM.getIntrinsic(IID: IRIntr, Tys: ArrayRef(Tys));
3626 Value *LoadResult = Builder.CreateCall(Callee: F, Args: Ops);
3627 Value *MvecOut = PoisonValue::get(T: MvecLType);
3628 for (unsigned i = 0; i < NumVectors; ++i) {
3629 Value *Vec = Builder.CreateExtractValue(Agg: LoadResult, Idxs: i);
3630 MvecOut = Builder.CreateInsertValue(Agg: MvecOut, Val: Vec, Idxs: {0, i});
3631 }
3632
3633 if (ReturnValue.isNull())
3634 return MvecOut;
3635 else
3636 return Builder.CreateStore(Val: MvecOut, Addr: ReturnValue.getAddress());
3637 }
3638
3639 case CustomCodeGen::VST24: {
3640 llvm::SmallVector<Value *, 4> Ops;
3641 llvm::SmallVector<llvm::Type *, 4> Tys;
3642
3643 auto Addr = E->getArg(Arg: 0);
3644 Ops.push_back(Elt: EmitScalarExpr(E: Addr));
3645 Tys.push_back(Elt: ConvertType(T: Addr->getType()));
3646
3647 auto MvecCType = E->getArg(Arg: 1)->getType();
3648 auto MvecLType = ConvertType(T: MvecCType);
3649 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
3650 assert(MvecLType->getStructNumElements() == 1 &&
3651 "Data-type struct for vst2q should have one element");
3652 auto MvecLTypeInner = MvecLType->getStructElementType(N: 0);
3653 assert(MvecLTypeInner->isArrayTy() &&
3654 "Data-type struct for vst2q should contain an array");
3655 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
3656 "Array member of return-type struct vld[24]q has wrong length");
3657 auto VecLType = MvecLTypeInner->getArrayElementType();
3658
3659 Tys.push_back(Elt: VecLType);
3660
3661 AggValueSlot MvecSlot = CreateAggTemp(T: MvecCType);
3662 EmitAggExpr(E: E->getArg(Arg: 1), AS: MvecSlot);
3663 auto Mvec = Builder.CreateLoad(Addr: MvecSlot.getAddress());
3664 for (unsigned i = 0; i < NumVectors; i++)
3665 Ops.push_back(Elt: Builder.CreateExtractValue(Agg: Mvec, Idxs: {0, i}));
3666
3667 Function *F = CGM.getIntrinsic(IID: IRIntr, Tys: ArrayRef(Tys));
3668 Value *ToReturn = nullptr;
3669 for (unsigned i = 0; i < NumVectors; i++) {
3670 Ops.push_back(Elt: llvm::ConstantInt::get(Ty: Int32Ty, V: i));
3671 ToReturn = Builder.CreateCall(Callee: F, Args: Ops);
3672 Ops.pop_back();
3673 }
3674 return ToReturn;
3675 }
3676 }
3677 llvm_unreachable("unknown custom codegen type.");
3678}
3679
3680Value *CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID,
3681 const CallExpr *E,
3682 ReturnValueSlot ReturnValue,
3683 llvm::Triple::ArchType Arch) {
3684 switch (BuiltinID) {
3685 default:
3686 return nullptr;
3687#include "clang/Basic/arm_cde_builtin_cg.inc"
3688 }
3689}
3690
3691static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
3692 const CallExpr *E,
3693 SmallVectorImpl<Value *> &Ops,
3694 llvm::Triple::ArchType Arch) {
3695 unsigned int Int = 0;
3696 const char *s = nullptr;
3697
3698 switch (BuiltinID) {
3699 default:
3700 return nullptr;
3701 case NEON::BI__builtin_neon_vtbl1_v:
3702 case NEON::BI__builtin_neon_vqtbl1_v:
3703 case NEON::BI__builtin_neon_vqtbl1q_v:
3704 case NEON::BI__builtin_neon_vtbl2_v:
3705 case NEON::BI__builtin_neon_vqtbl2_v:
3706 case NEON::BI__builtin_neon_vqtbl2q_v:
3707 case NEON::BI__builtin_neon_vtbl3_v:
3708 case NEON::BI__builtin_neon_vqtbl3_v:
3709 case NEON::BI__builtin_neon_vqtbl3q_v:
3710 case NEON::BI__builtin_neon_vtbl4_v:
3711 case NEON::BI__builtin_neon_vqtbl4_v:
3712 case NEON::BI__builtin_neon_vqtbl4q_v:
3713 break;
3714 case NEON::BI__builtin_neon_vtbx1_v:
3715 case NEON::BI__builtin_neon_vqtbx1_v:
3716 case NEON::BI__builtin_neon_vqtbx1q_v:
3717 case NEON::BI__builtin_neon_vtbx2_v:
3718 case NEON::BI__builtin_neon_vqtbx2_v:
3719 case NEON::BI__builtin_neon_vqtbx2q_v:
3720 case NEON::BI__builtin_neon_vtbx3_v:
3721 case NEON::BI__builtin_neon_vqtbx3_v:
3722 case NEON::BI__builtin_neon_vqtbx3q_v:
3723 case NEON::BI__builtin_neon_vtbx4_v:
3724 case NEON::BI__builtin_neon_vqtbx4_v:
3725 case NEON::BI__builtin_neon_vqtbx4q_v:
3726 break;
3727 }
3728
3729 assert(E->getNumArgs() >= 3);
3730
3731 // Get the last argument, which specifies the vector type.
3732 const Expr *Arg = E->getArg(Arg: E->getNumArgs() - 1);
3733 std::optional<llvm::APSInt> Result =
3734 Arg->getIntegerConstantExpr(Ctx: CGF.getContext());
3735 if (!Result)
3736 return nullptr;
3737
3738 // Determine the type of this overloaded NEON intrinsic.
3739 NeonTypeFlags Type = Result->getZExtValue();
3740 llvm::FixedVectorType *Ty = GetNeonType(CGF: &CGF, TypeFlags: Type);
3741 if (!Ty)
3742 return nullptr;
3743
3744 CodeGen::CGBuilderTy &Builder = CGF.Builder;
3745
3746 // AArch64 scalar builtins are not overloaded, they do not have an extra
3747 // argument that specifies the vector type, need to handle each case.
3748 switch (BuiltinID) {
3749 case NEON::BI__builtin_neon_vtbl1_v: {
3750 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 0, M: 1), ExtOp: nullptr, IndexOp: Ops[1],
3751 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbl1, Name: "vtbl1");
3752 }
3753 case NEON::BI__builtin_neon_vtbl2_v: {
3754 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 0, M: 2), ExtOp: nullptr, IndexOp: Ops[2],
3755 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbl1, Name: "vtbl1");
3756 }
3757 case NEON::BI__builtin_neon_vtbl3_v: {
3758 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 0, M: 3), ExtOp: nullptr, IndexOp: Ops[3],
3759 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbl2, Name: "vtbl2");
3760 }
3761 case NEON::BI__builtin_neon_vtbl4_v: {
3762 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 0, M: 4), ExtOp: nullptr, IndexOp: Ops[4],
3763 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbl2, Name: "vtbl2");
3764 }
3765 case NEON::BI__builtin_neon_vtbx1_v: {
3766 Value *TblRes =
3767 packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 1, M: 1), ExtOp: nullptr, IndexOp: Ops[2], ResTy: Ty,
3768 IntID: Intrinsic::aarch64_neon_tbl1, Name: "vtbl1");
3769
3770 llvm::Constant *EightV = ConstantInt::get(Ty, V: 8);
3771 Value *CmpRes = Builder.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: Ops[2], RHS: EightV);
3772 CmpRes = Builder.CreateSExt(V: CmpRes, DestTy: Ty);
3773
3774 Value *EltsFromInput = Builder.CreateAnd(LHS: CmpRes, RHS: Ops[0]);
3775 Value *EltsFromTbl = Builder.CreateAnd(LHS: Builder.CreateNot(V: CmpRes), RHS: TblRes);
3776 return Builder.CreateOr(LHS: EltsFromInput, RHS: EltsFromTbl, Name: "vtbx");
3777 }
3778 case NEON::BI__builtin_neon_vtbx2_v: {
3779 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 1, M: 2), ExtOp: Ops[0], IndexOp: Ops[3],
3780 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbx1, Name: "vtbx1");
3781 }
3782 case NEON::BI__builtin_neon_vtbx3_v: {
3783 Value *TblRes =
3784 packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 1, M: 3), ExtOp: nullptr, IndexOp: Ops[4], ResTy: Ty,
3785 IntID: Intrinsic::aarch64_neon_tbl2, Name: "vtbl2");
3786
3787 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, V: 24);
3788 Value *CmpRes = Builder.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: Ops[4],
3789 RHS: TwentyFourV);
3790 CmpRes = Builder.CreateSExt(V: CmpRes, DestTy: Ty);
3791
3792 Value *EltsFromInput = Builder.CreateAnd(LHS: CmpRes, RHS: Ops[0]);
3793 Value *EltsFromTbl = Builder.CreateAnd(LHS: Builder.CreateNot(V: CmpRes), RHS: TblRes);
3794 return Builder.CreateOr(LHS: EltsFromInput, RHS: EltsFromTbl, Name: "vtbx");
3795 }
3796 case NEON::BI__builtin_neon_vtbx4_v: {
3797 return packTBLDVectorList(CGF, Ops: ArrayRef(Ops).slice(N: 1, M: 4), ExtOp: Ops[0], IndexOp: Ops[5],
3798 ResTy: Ty, IntID: Intrinsic::aarch64_neon_tbx2, Name: "vtbx2");
3799 }
3800 case NEON::BI__builtin_neon_vqtbl1_v:
3801 case NEON::BI__builtin_neon_vqtbl1q_v:
3802 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
3803 case NEON::BI__builtin_neon_vqtbl2_v:
3804 case NEON::BI__builtin_neon_vqtbl2q_v: {
3805 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
3806 case NEON::BI__builtin_neon_vqtbl3_v:
3807 case NEON::BI__builtin_neon_vqtbl3q_v:
3808 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
3809 case NEON::BI__builtin_neon_vqtbl4_v:
3810 case NEON::BI__builtin_neon_vqtbl4q_v:
3811 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
3812 case NEON::BI__builtin_neon_vqtbx1_v:
3813 case NEON::BI__builtin_neon_vqtbx1q_v:
3814 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
3815 case NEON::BI__builtin_neon_vqtbx2_v:
3816 case NEON::BI__builtin_neon_vqtbx2q_v:
3817 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
3818 case NEON::BI__builtin_neon_vqtbx3_v:
3819 case NEON::BI__builtin_neon_vqtbx3q_v:
3820 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
3821 case NEON::BI__builtin_neon_vqtbx4_v:
3822 case NEON::BI__builtin_neon_vqtbx4q_v:
3823 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
3824 }
3825 }
3826
3827 if (!Int)
3828 return nullptr;
3829
3830 Function *F = CGF.CGM.getIntrinsic(IID: Int, Tys: Ty);
3831 return CGF.EmitNeonCall(F, Ops, name: s);
3832}
3833
3834Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
3835 auto *VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
3836 Op = Builder.CreateBitCast(V: Op, DestTy: Int16Ty);
3837 Value *V = PoisonValue::get(T: VTy);
3838 llvm::Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
3839 Op = Builder.CreateInsertElement(Vec: V, NewElt: Op, Idx: CI);
3840 return Op;
3841}
3842
3843/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
3844/// access builtin. Only required if it can't be inferred from the base pointer
3845/// operand.
3846llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
3847 switch (TypeFlags.getMemEltType()) {
3848 case SVETypeFlags::MemEltTyDefault:
3849 return getEltType(TypeFlags);
3850 case SVETypeFlags::MemEltTyInt8:
3851 return Builder.getInt8Ty();
3852 case SVETypeFlags::MemEltTyInt16:
3853 return Builder.getInt16Ty();
3854 case SVETypeFlags::MemEltTyInt32:
3855 return Builder.getInt32Ty();
3856 case SVETypeFlags::MemEltTyInt64:
3857 return Builder.getInt64Ty();
3858 }
3859 llvm_unreachable("Unknown MemEltType");
3860}
3861
3862llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
3863 switch (TypeFlags.getEltType()) {
3864 default:
3865 llvm_unreachable("Invalid SVETypeFlag!");
3866
3867 case SVETypeFlags::EltTyMFloat8:
3868 case SVETypeFlags::EltTyInt8:
3869 return Builder.getInt8Ty();
3870 case SVETypeFlags::EltTyInt16:
3871 return Builder.getInt16Ty();
3872 case SVETypeFlags::EltTyInt32:
3873 return Builder.getInt32Ty();
3874 case SVETypeFlags::EltTyInt64:
3875 return Builder.getInt64Ty();
3876 case SVETypeFlags::EltTyInt128:
3877 return Builder.getInt128Ty();
3878
3879 case SVETypeFlags::EltTyFloat16:
3880 return Builder.getHalfTy();
3881 case SVETypeFlags::EltTyFloat32:
3882 return Builder.getFloatTy();
3883 case SVETypeFlags::EltTyFloat64:
3884 return Builder.getDoubleTy();
3885
3886 case SVETypeFlags::EltTyBFloat16:
3887 return Builder.getBFloatTy();
3888
3889 case SVETypeFlags::EltTyBool8:
3890 case SVETypeFlags::EltTyBool16:
3891 case SVETypeFlags::EltTyBool32:
3892 case SVETypeFlags::EltTyBool64:
3893 return Builder.getInt1Ty();
3894 }
3895}
3896
3897// Return the llvm predicate vector type corresponding to the specified element
3898// TypeFlags.
3899llvm::ScalableVectorType *
3900CodeGenFunction::getSVEPredType(const SVETypeFlags &TypeFlags) {
3901 switch (TypeFlags.getEltType()) {
3902 default: llvm_unreachable("Unhandled SVETypeFlag!");
3903
3904 case SVETypeFlags::EltTyInt8:
3905 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
3906 case SVETypeFlags::EltTyInt16:
3907 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3908 case SVETypeFlags::EltTyInt32:
3909 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
3910 case SVETypeFlags::EltTyInt64:
3911 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
3912
3913 case SVETypeFlags::EltTyBFloat16:
3914 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3915 case SVETypeFlags::EltTyFloat16:
3916 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3917 case SVETypeFlags::EltTyFloat32:
3918 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
3919 case SVETypeFlags::EltTyFloat64:
3920 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
3921
3922 case SVETypeFlags::EltTyBool8:
3923 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
3924 case SVETypeFlags::EltTyBool16:
3925 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3926 case SVETypeFlags::EltTyBool32:
3927 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
3928 case SVETypeFlags::EltTyBool64:
3929 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
3930 }
3931}
3932
3933// Return the llvm vector type corresponding to the specified element TypeFlags.
3934llvm::ScalableVectorType *
3935CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
3936 switch (TypeFlags.getEltType()) {
3937 default:
3938 llvm_unreachable("Invalid SVETypeFlag!");
3939
3940 case SVETypeFlags::EltTyInt8:
3941 return llvm::ScalableVectorType::get(ElementType: Builder.getInt8Ty(), MinNumElts: 16);
3942 case SVETypeFlags::EltTyInt16:
3943 return llvm::ScalableVectorType::get(ElementType: Builder.getInt16Ty(), MinNumElts: 8);
3944 case SVETypeFlags::EltTyInt32:
3945 return llvm::ScalableVectorType::get(ElementType: Builder.getInt32Ty(), MinNumElts: 4);
3946 case SVETypeFlags::EltTyInt64:
3947 return llvm::ScalableVectorType::get(ElementType: Builder.getInt64Ty(), MinNumElts: 2);
3948
3949 case SVETypeFlags::EltTyMFloat8:
3950 return llvm::ScalableVectorType::get(ElementType: Builder.getInt8Ty(), MinNumElts: 16);
3951 case SVETypeFlags::EltTyFloat16:
3952 return llvm::ScalableVectorType::get(ElementType: Builder.getHalfTy(), MinNumElts: 8);
3953 case SVETypeFlags::EltTyBFloat16:
3954 return llvm::ScalableVectorType::get(ElementType: Builder.getBFloatTy(), MinNumElts: 8);
3955 case SVETypeFlags::EltTyFloat32:
3956 return llvm::ScalableVectorType::get(ElementType: Builder.getFloatTy(), MinNumElts: 4);
3957 case SVETypeFlags::EltTyFloat64:
3958 return llvm::ScalableVectorType::get(ElementType: Builder.getDoubleTy(), MinNumElts: 2);
3959
3960 case SVETypeFlags::EltTyBool8:
3961 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
3962 case SVETypeFlags::EltTyBool16:
3963 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 8);
3964 case SVETypeFlags::EltTyBool32:
3965 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 4);
3966 case SVETypeFlags::EltTyBool64:
3967 return llvm::ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 2);
3968 }
3969}
3970
3971llvm::Value *
3972CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags &TypeFlags) {
3973 Function *Ptrue =
3974 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_ptrue, Tys: getSVEPredType(TypeFlags));
3975 return Builder.CreateCall(Callee: Ptrue, Args: {Builder.getInt32(/*SV_ALL*/ C: 31)});
3976}
3977
3978constexpr unsigned SVEBitsPerBlock = 128;
3979
3980static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
3981 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
3982 return llvm::ScalableVectorType::get(ElementType: EltTy, MinNumElts: NumElts);
3983}
3984
3985// Reinterpret the input predicate so that it can be used to correctly isolate
3986// the elements of the specified datatype.
3987Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
3988 llvm::ScalableVectorType *VTy) {
3989
3990 if (isa<TargetExtType>(Val: Pred->getType()) &&
3991 cast<TargetExtType>(Val: Pred->getType())->getName() == "aarch64.svcount")
3992 return Pred;
3993
3994 auto *RTy = llvm::VectorType::get(ElementType: IntegerType::get(C&: getLLVMContext(), NumBits: 1), Other: VTy);
3995 if (Pred->getType() == RTy)
3996 return Pred;
3997
3998 unsigned IntID;
3999 llvm::Type *IntrinsicTy;
4000 switch (VTy->getMinNumElements()) {
4001 default:
4002 llvm_unreachable("unsupported element count!");
4003 case 1:
4004 case 2:
4005 case 4:
4006 case 8:
4007 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
4008 IntrinsicTy = RTy;
4009 break;
4010 case 16:
4011 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
4012 IntrinsicTy = Pred->getType();
4013 break;
4014 }
4015
4016 Function *F = CGM.getIntrinsic(IID: IntID, Tys: IntrinsicTy);
4017 Value *C = Builder.CreateCall(Callee: F, Args: Pred);
4018 assert(C->getType() == RTy && "Unexpected return type!");
4019 return C;
4020}
4021
4022Value *CodeGenFunction::EmitSVEPredicateTupleCast(Value *PredTuple,
4023 llvm::StructType *Ty) {
4024 if (PredTuple->getType() == Ty)
4025 return PredTuple;
4026
4027 Value *Ret = llvm::PoisonValue::get(T: Ty);
4028 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
4029 Value *Pred = Builder.CreateExtractValue(Agg: PredTuple, Idxs: I);
4030 Pred = EmitSVEPredicateCast(
4031 Pred, VTy: cast<llvm::ScalableVectorType>(Val: Ty->getTypeAtIndex(N: I)));
4032 Ret = Builder.CreateInsertValue(Agg: Ret, Val: Pred, Idxs: I);
4033 }
4034
4035 return Ret;
4036}
4037
4038Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
4039 SmallVectorImpl<Value *> &Ops,
4040 unsigned IntID) {
4041 auto *ResultTy = getSVEType(TypeFlags);
4042 auto *OverloadedTy =
4043 llvm::ScalableVectorType::get(ElementType: SVEBuiltinMemEltTy(TypeFlags), SVTy: ResultTy);
4044
4045 Function *F = nullptr;
4046 if (Ops[1]->getType()->isVectorTy())
4047 // This is the "vector base, scalar offset" case. In order to uniquely
4048 // map this built-in to an LLVM IR intrinsic, we need both the return type
4049 // and the type of the vector base.
4050 F = CGM.getIntrinsic(IID: IntID, Tys: {OverloadedTy, Ops[1]->getType()});
4051 else
4052 // This is the "scalar base, vector offset case". The type of the offset
4053 // is encoded in the name of the intrinsic. We only need to specify the
4054 // return type in order to uniquely map this built-in to an LLVM IR
4055 // intrinsic.
4056 F = CGM.getIntrinsic(IID: IntID, Tys: OverloadedTy);
4057
4058 // At the ACLE level there's only one predicate type, svbool_t, which is
4059 // mapped to <n x 16 x i1>. However, this might be incompatible with the
4060 // actual type being loaded. For example, when loading doubles (i64) the
4061 // predicate should be <n x 2 x i1> instead. At the IR level the type of
4062 // the predicate and the data being loaded must match. Cast to the type
4063 // expected by the intrinsic. The intrinsic itself should be defined in
4064 // a way than enforces relations between parameter types.
4065 Ops[0] = EmitSVEPredicateCast(
4066 Pred: Ops[0], VTy: cast<llvm::ScalableVectorType>(Val: F->getArg(i: 0)->getType()));
4067
4068 // Pass 0 when the offset is missing. This can only be applied when using
4069 // the "vector base" addressing mode for which ACLE allows no offset. The
4070 // corresponding LLVM IR always requires an offset.
4071 if (Ops.size() == 2) {
4072 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
4073 Ops.push_back(Elt: ConstantInt::get(Ty: Int64Ty, V: 0));
4074 }
4075
4076 // For "vector base, scalar index" scale the index so that it becomes a
4077 // scalar offset.
4078 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
4079 unsigned BytesPerElt =
4080 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
4081 Ops[2] = Builder.CreateShl(LHS: Ops[2], RHS: Log2_32(Value: BytesPerElt));
4082 }
4083
4084 Value *Call = Builder.CreateCall(Callee: F, Args: Ops);
4085
4086 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
4087 // other cases it's folded into a nop.
4088 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(V: Call, DestTy: ResultTy)
4089 : Builder.CreateSExt(V: Call, DestTy: ResultTy);
4090}
4091
4092Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags,
4093 SmallVectorImpl<Value *> &Ops,
4094 unsigned IntID) {
4095 auto *SrcDataTy = getSVEType(TypeFlags);
4096 auto *OverloadedTy =
4097 llvm::ScalableVectorType::get(ElementType: SVEBuiltinMemEltTy(TypeFlags), SVTy: SrcDataTy);
4098
4099 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
4100 // it's the first argument. Move it accordingly.
4101 Ops.insert(I: Ops.begin(), Elt: Ops.pop_back_val());
4102
4103 Function *F = nullptr;
4104 if (Ops[2]->getType()->isVectorTy())
4105 // This is the "vector base, scalar offset" case. In order to uniquely
4106 // map this built-in to an LLVM IR intrinsic, we need both the return type
4107 // and the type of the vector base.
4108 F = CGM.getIntrinsic(IID: IntID, Tys: {OverloadedTy, Ops[2]->getType()});
4109 else
4110 // This is the "scalar base, vector offset case". The type of the offset
4111 // is encoded in the name of the intrinsic. We only need to specify the
4112 // return type in order to uniquely map this built-in to an LLVM IR
4113 // intrinsic.
4114 F = CGM.getIntrinsic(IID: IntID, Tys: OverloadedTy);
4115
4116 // Pass 0 when the offset is missing. This can only be applied when using
4117 // the "vector base" addressing mode for which ACLE allows no offset. The
4118 // corresponding LLVM IR always requires an offset.
4119 if (Ops.size() == 3) {
4120 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
4121 Ops.push_back(Elt: ConstantInt::get(Ty: Int64Ty, V: 0));
4122 }
4123
4124 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
4125 // folded into a nop.
4126 Ops[0] = Builder.CreateTrunc(V: Ops[0], DestTy: OverloadedTy);
4127
4128 // At the ACLE level there's only one predicate type, svbool_t, which is
4129 // mapped to <n x 16 x i1>. However, this might be incompatible with the
4130 // actual type being stored. For example, when storing doubles (i64) the
4131 // predicated should be <n x 2 x i1> instead. At the IR level the type of
4132 // the predicate and the data being stored must match. Cast to the type
4133 // expected by the intrinsic. The intrinsic itself should be defined in
4134 // a way that enforces relations between parameter types.
4135 Ops[1] = EmitSVEPredicateCast(
4136 Pred: Ops[1], VTy: cast<llvm::ScalableVectorType>(Val: F->getArg(i: 1)->getType()));
4137
4138 // For "vector base, scalar index" scale the index so that it becomes a
4139 // scalar offset.
4140 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
4141 unsigned BytesPerElt =
4142 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
4143 Ops[3] = Builder.CreateShl(LHS: Ops[3], RHS: Log2_32(Value: BytesPerElt));
4144 }
4145
4146 return Builder.CreateCall(Callee: F, Args: Ops);
4147}
4148
4149Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags,
4150 SmallVectorImpl<Value *> &Ops,
4151 unsigned IntID) {
4152 // The gather prefetches are overloaded on the vector input - this can either
4153 // be the vector of base addresses or vector of offsets.
4154 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Val: Ops[1]->getType());
4155 if (!OverloadedTy)
4156 OverloadedTy = cast<llvm::ScalableVectorType>(Val: Ops[2]->getType());
4157
4158 // Cast the predicate from svbool_t to the right number of elements.
4159 Ops[0] = EmitSVEPredicateCast(Pred: Ops[0], VTy: OverloadedTy);
4160
4161 // vector + imm addressing modes
4162 if (Ops[1]->getType()->isVectorTy()) {
4163 if (Ops.size() == 3) {
4164 // Pass 0 for 'vector+imm' when the index is omitted.
4165 Ops.push_back(Elt: ConstantInt::get(Ty: Int64Ty, V: 0));
4166
4167 // The sv_prfop is the last operand in the builtin and IR intrinsic.
4168 std::swap(a&: Ops[2], b&: Ops[3]);
4169 } else {
4170 // Index needs to be passed as scaled offset.
4171 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
4172 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
4173 if (BytesPerElt > 1)
4174 Ops[2] = Builder.CreateShl(LHS: Ops[2], RHS: Log2_32(Value: BytesPerElt));
4175 }
4176 }
4177
4178 Function *F = CGM.getIntrinsic(IID: IntID, Tys: OverloadedTy);
4179 return Builder.CreateCall(Callee: F, Args: Ops);
4180}
4181
4182Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
4183 SmallVectorImpl<Value*> &Ops,
4184 unsigned IntID) {
4185 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
4186 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy);
4187 Value *BasePtr = Ops[1];
4188
4189 // Does the load have an offset?
4190 if (Ops.size() > 2)
4191 BasePtr = Builder.CreateGEP(Ty: VTy, Ptr: BasePtr, IdxList: Ops[2]);
4192
4193 Function *F = CGM.getIntrinsic(IID: IntID, Tys: {VTy});
4194 return Builder.CreateCall(Callee: F, Args: {Predicate, BasePtr});
4195}
4196
4197Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
4198 SmallVectorImpl<Value*> &Ops,
4199 unsigned IntID) {
4200 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
4201
4202 unsigned N;
4203 switch (IntID) {
4204 case Intrinsic::aarch64_sve_st2:
4205 case Intrinsic::aarch64_sve_st1_pn_x2:
4206 case Intrinsic::aarch64_sve_stnt1_pn_x2:
4207 case Intrinsic::aarch64_sve_st2q:
4208 N = 2;
4209 break;
4210 case Intrinsic::aarch64_sve_st3:
4211 case Intrinsic::aarch64_sve_st3q:
4212 N = 3;
4213 break;
4214 case Intrinsic::aarch64_sve_st4:
4215 case Intrinsic::aarch64_sve_st1_pn_x4:
4216 case Intrinsic::aarch64_sve_stnt1_pn_x4:
4217 case Intrinsic::aarch64_sve_st4q:
4218 N = 4;
4219 break;
4220 default:
4221 llvm_unreachable("unknown intrinsic!");
4222 }
4223
4224 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy);
4225 Value *BasePtr = Ops[1];
4226
4227 // Does the store have an offset?
4228 if (Ops.size() > (2 + N))
4229 BasePtr = Builder.CreateGEP(Ty: VTy, Ptr: BasePtr, IdxList: Ops[2]);
4230
4231 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
4232 // need to break up the tuple vector.
4233 SmallVector<llvm::Value*, 5> Operands;
4234 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
4235 Operands.push_back(Elt: Ops[I]);
4236 Operands.append(IL: {Predicate, BasePtr});
4237 Function *F = CGM.getIntrinsic(IID: IntID, Tys: { VTy });
4238
4239 return Builder.CreateCall(Callee: F, Args: Operands);
4240}
4241
4242// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
4243// svpmullt_pair intrinsics, with the exception that their results are bitcast
4244// to a wider type.
4245Value *CodeGenFunction::EmitSVEPMull(const SVETypeFlags &TypeFlags,
4246 SmallVectorImpl<Value *> &Ops,
4247 unsigned BuiltinID) {
4248 // Splat scalar operand to vector (intrinsics with _n infix)
4249 if (TypeFlags.hasSplatOperand()) {
4250 unsigned OpNo = TypeFlags.getSplatOperand();
4251 Ops[OpNo] = EmitSVEDupX(Scalar: Ops[OpNo]);
4252 }
4253
4254 // The pair-wise function has a narrower overloaded type.
4255 Function *F = CGM.getIntrinsic(IID: BuiltinID, Tys: Ops[0]->getType());
4256 Value *Call = Builder.CreateCall(Callee: F, Args: {Ops[0], Ops[1]});
4257
4258 // Now bitcast to the wider result type.
4259 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
4260 return EmitSVEReinterpret(Val: Call, Ty);
4261}
4262
4263Value *CodeGenFunction::EmitSVEMovl(const SVETypeFlags &TypeFlags,
4264 ArrayRef<Value *> Ops, unsigned BuiltinID) {
4265 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
4266 Function *F = CGM.getIntrinsic(IID: BuiltinID, Tys: OverloadedTy);
4267 return Builder.CreateCall(Callee: F, Args: {Ops[0], Builder.getInt32(C: 0)});
4268}
4269
4270Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,
4271 SmallVectorImpl<Value *> &Ops,
4272 unsigned BuiltinID) {
4273 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
4274 auto *VectorTy = getSVEVectorForElementType(EltTy: MemEltTy);
4275 auto *MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, SVTy: VectorTy);
4276
4277 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy: MemoryTy);
4278 Value *BasePtr = Ops[1];
4279
4280 // Implement the index operand if not omitted.
4281 if (Ops.size() > 3)
4282 BasePtr = Builder.CreateGEP(Ty: MemoryTy, Ptr: BasePtr, IdxList: Ops[2]);
4283
4284 Value *PrfOp = Ops.back();
4285
4286 Function *F = CGM.getIntrinsic(IID: BuiltinID, Tys: Predicate->getType());
4287 return Builder.CreateCall(Callee: F, Args: {Predicate, BasePtr, PrfOp});
4288}
4289
4290Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
4291 llvm::Type *ReturnTy,
4292 SmallVectorImpl<Value *> &Ops,
4293 unsigned IntrinsicID,
4294 bool IsZExtReturn) {
4295 QualType LangPTy = E->getArg(Arg: 1)->getType();
4296 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
4297 T: LangPTy->castAs<PointerType>()->getPointeeType());
4298
4299 // Mfloat8 types is stored as a vector, so extra work
4300 // to extract sclar element type is necessary.
4301 if (MemEltTy->isVectorTy()) {
4302 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
4303 "Only <1 x i8> expected");
4304 MemEltTy = cast<llvm::VectorType>(Val: MemEltTy)->getElementType();
4305 }
4306
4307 // The vector type that is returned may be different from the
4308 // eventual type loaded from memory.
4309 auto VectorTy = cast<llvm::ScalableVectorType>(Val: ReturnTy);
4310 llvm::ScalableVectorType *MemoryTy = nullptr;
4311 llvm::ScalableVectorType *PredTy = nullptr;
4312 bool IsQuadLoad = false;
4313 switch (IntrinsicID) {
4314 case Intrinsic::aarch64_sve_ld1uwq:
4315 case Intrinsic::aarch64_sve_ld1udq:
4316 MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, MinNumElts: 1);
4317 PredTy = llvm::ScalableVectorType::get(
4318 ElementType: llvm::Type::getInt1Ty(C&: getLLVMContext()), MinNumElts: 1);
4319 IsQuadLoad = true;
4320 break;
4321 default:
4322 MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, SVTy: VectorTy);
4323 PredTy = MemoryTy;
4324 break;
4325 }
4326
4327 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy: PredTy);
4328 Value *BasePtr = Ops[1];
4329
4330 // Does the load have an offset?
4331 if (Ops.size() > 2)
4332 BasePtr = Builder.CreateGEP(Ty: MemoryTy, Ptr: BasePtr, IdxList: Ops[2]);
4333
4334 Function *F = CGM.getIntrinsic(IID: IntrinsicID, Tys: IsQuadLoad ? VectorTy : MemoryTy);
4335 auto *Load =
4336 cast<llvm::Instruction>(Val: Builder.CreateCall(Callee: F, Args: {Predicate, BasePtr}));
4337 auto TBAAInfo = CGM.getTBAAAccessInfo(AccessType: LangPTy->getPointeeType());
4338 CGM.DecorateInstructionWithTBAA(Inst: Load, TBAAInfo);
4339
4340 if (IsQuadLoad)
4341 return Load;
4342
4343 return IsZExtReturn ? Builder.CreateZExt(V: Load, DestTy: VectorTy)
4344 : Builder.CreateSExt(V: Load, DestTy: VectorTy);
4345}
4346
4347Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
4348 SmallVectorImpl<Value *> &Ops,
4349 unsigned IntrinsicID) {
4350 QualType LangPTy = E->getArg(Arg: 1)->getType();
4351 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
4352 T: LangPTy->castAs<PointerType>()->getPointeeType());
4353
4354 // Mfloat8 types is stored as a vector, so extra work
4355 // to extract sclar element type is necessary.
4356 if (MemEltTy->isVectorTy()) {
4357 assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) &&
4358 "Only <1 x i8> expected");
4359 MemEltTy = cast<llvm::VectorType>(Val: MemEltTy)->getElementType();
4360 }
4361
4362 // The vector type that is stored may be different from the
4363 // eventual type stored to memory.
4364 auto VectorTy = cast<llvm::ScalableVectorType>(Val: Ops.back()->getType());
4365 auto MemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, SVTy: VectorTy);
4366
4367 auto PredTy = MemoryTy;
4368 auto AddrMemoryTy = MemoryTy;
4369 bool IsQuadStore = false;
4370
4371 switch (IntrinsicID) {
4372 case Intrinsic::aarch64_sve_st1wq:
4373 case Intrinsic::aarch64_sve_st1dq:
4374 AddrMemoryTy = llvm::ScalableVectorType::get(ElementType: MemEltTy, MinNumElts: 1);
4375 PredTy =
4376 llvm::ScalableVectorType::get(ElementType: IntegerType::get(C&: getLLVMContext(), NumBits: 1), MinNumElts: 1);
4377 IsQuadStore = true;
4378 break;
4379 default:
4380 break;
4381 }
4382 Value *Predicate = EmitSVEPredicateCast(Pred: Ops[0], VTy: PredTy);
4383 Value *BasePtr = Ops[1];
4384
4385 // Does the store have an offset?
4386 if (Ops.size() == 4)
4387 BasePtr = Builder.CreateGEP(Ty: AddrMemoryTy, Ptr: BasePtr, IdxList: Ops[2]);
4388
4389 // Last value is always the data
4390 Value *Val =
4391 IsQuadStore ? Ops.back() : Builder.CreateTrunc(V: Ops.back(), DestTy: MemoryTy);
4392
4393 Function *F =
4394 CGM.getIntrinsic(IID: IntrinsicID, Tys: IsQuadStore ? VectorTy : MemoryTy);
4395 auto *Store =
4396 cast<llvm::Instruction>(Val: Builder.CreateCall(Callee: F, Args: {Val, Predicate, BasePtr}));
4397 auto TBAAInfo = CGM.getTBAAAccessInfo(AccessType: LangPTy->getPointeeType());
4398 CGM.DecorateInstructionWithTBAA(Inst: Store, TBAAInfo);
4399 return Store;
4400}
4401
4402Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
4403 SmallVectorImpl<Value *> &Ops,
4404 unsigned IntID) {
4405 Ops[2] = EmitSVEPredicateCast(
4406 Pred: Ops[2], VTy: getSVEVectorForElementType(EltTy: SVEBuiltinMemEltTy(TypeFlags)));
4407
4408 SmallVector<Value *> NewOps;
4409 NewOps.push_back(Elt: Ops[2]);
4410
4411 llvm::Value *BasePtr = Ops[3];
4412 llvm::Value *RealSlice = Ops[1];
4413 // If the intrinsic contains the vnum parameter, multiply it with the vector
4414 // size in bytes.
4415 if (Ops.size() == 5) {
4416 Function *StreamingVectorLength =
4417 CGM.getIntrinsic(IID: Intrinsic::aarch64_sme_cntsd);
4418 llvm::Value *StreamingVectorLengthCall =
4419 Builder.CreateMul(LHS: Builder.CreateCall(Callee: StreamingVectorLength),
4420 RHS: llvm::ConstantInt::get(Ty: Int64Ty, V: 8), Name: "svl",
4421 /* HasNUW */ true, /* HasNSW */ true);
4422 llvm::Value *Mulvl =
4423 Builder.CreateMul(LHS: StreamingVectorLengthCall, RHS: Ops[4], Name: "mulvl");
4424 // The type of the ptr parameter is void *, so use Int8Ty here.
4425 BasePtr = Builder.CreateGEP(Ty: Int8Ty, Ptr: Ops[3], IdxList: Mulvl);
4426 RealSlice = Builder.CreateZExt(V: RealSlice, DestTy: Int64Ty);
4427 RealSlice = Builder.CreateAdd(LHS: RealSlice, RHS: Ops[4]);
4428 RealSlice = Builder.CreateTrunc(V: RealSlice, DestTy: Int32Ty);
4429 }
4430 NewOps.push_back(Elt: BasePtr);
4431 NewOps.push_back(Elt: Ops[0]);
4432 NewOps.push_back(Elt: RealSlice);
4433 Function *F = CGM.getIntrinsic(IID: IntID);
4434 return Builder.CreateCall(Callee: F, Args: NewOps);
4435}
4436
4437Value *CodeGenFunction::EmitSMEReadWrite(const SVETypeFlags &TypeFlags,
4438 SmallVectorImpl<Value *> &Ops,
4439 unsigned IntID) {
4440 auto *VecTy = getSVEType(TypeFlags);
4441 Function *F = CGM.getIntrinsic(IID: IntID, Tys: VecTy);
4442 if (TypeFlags.isReadZA())
4443 Ops[1] = EmitSVEPredicateCast(Pred: Ops[1], VTy: VecTy);
4444 else if (TypeFlags.isWriteZA())
4445 Ops[2] = EmitSVEPredicateCast(Pred: Ops[2], VTy: VecTy);
4446 return Builder.CreateCall(Callee: F, Args: Ops);
4447}
4448
4449Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags,
4450 SmallVectorImpl<Value *> &Ops,
4451 unsigned IntID) {
4452 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
4453 if (Ops.size() == 0)
4454 Ops.push_back(Elt: llvm::ConstantInt::get(Ty: Int32Ty, V: 255));
4455 Function *F = CGM.getIntrinsic(IID: IntID, Tys: {});
4456 return Builder.CreateCall(Callee: F, Args: Ops);
4457}
4458
4459Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
4460 SmallVectorImpl<Value *> &Ops,
4461 unsigned IntID) {
4462 if (Ops.size() == 2)
4463 Ops.push_back(Elt: Builder.getInt32(C: 0));
4464 else
4465 Ops[2] = Builder.CreateIntCast(V: Ops[2], DestTy: Int32Ty, isSigned: true);
4466 Function *F = CGM.getIntrinsic(IID: IntID, Tys: {});
4467 return Builder.CreateCall(Callee: F, Args: Ops);
4468}
4469
4470// Limit the usage of scalable llvm IR generated by the ACLE by using the
4471// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
4472Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
4473 return Builder.CreateVectorSplat(
4474 EC: cast<llvm::VectorType>(Val: Ty)->getElementCount(), V: Scalar);
4475}
4476
4477Value *CodeGenFunction::EmitSVEDupX(Value *Scalar) {
4478 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
4479#ifndef NDEBUG
4480 auto *VecTy = cast<llvm::VectorType>(Ty);
4481 ElementCount EC = VecTy->getElementCount();
4482 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
4483 "Only <1 x i8> expected");
4484#endif
4485 Scalar = Builder.CreateExtractElement(Vec: Scalar, Idx: uint64_t(0));
4486 }
4487 return EmitSVEDupX(Scalar, Ty: getSVEVectorForElementType(EltTy: Scalar->getType()));
4488}
4489
4490Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
4491 // FIXME: For big endian this needs an additional REV, or needs a separate
4492 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
4493 // instruction is defined as 'bitwise' equivalent from memory point of
4494 // view (when storing/reloading), whereas the svreinterpret builtin
4495 // implements bitwise equivalent cast from register point of view.
4496 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
4497
4498 if (auto *StructTy = dyn_cast<StructType>(Val: Ty)) {
4499 Value *Tuple = llvm::PoisonValue::get(T: Ty);
4500
4501 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
4502 Value *In = Builder.CreateExtractValue(Agg: Val, Idxs: I);
4503 Value *Out = Builder.CreateBitCast(V: In, DestTy: StructTy->getTypeAtIndex(N: I));
4504 Tuple = Builder.CreateInsertValue(Agg: Tuple, Val: Out, Idxs: I);
4505 }
4506
4507 return Tuple;
4508 }
4509
4510 return Builder.CreateBitCast(V: Val, DestTy: Ty);
4511}
4512
4513static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
4514 SmallVectorImpl<Value *> &Ops) {
4515 auto *SplatZero = Constant::getNullValue(Ty);
4516 Ops.insert(I: Ops.begin(), Elt: SplatZero);
4517}
4518
4519static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
4520 SmallVectorImpl<Value *> &Ops) {
4521 auto *SplatUndef = UndefValue::get(T: Ty);
4522 Ops.insert(I: Ops.begin(), Elt: SplatUndef);
4523}
4524
4525SmallVector<llvm::Type *, 2>
4526CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags,
4527 llvm::Type *ResultType,
4528 ArrayRef<Value *> Ops) {
4529 if (TypeFlags.isOverloadNone())
4530 return {};
4531
4532 llvm::Type *DefaultType = getSVEType(TypeFlags);
4533
4534 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
4535 return {DefaultType, Ops[1]->getType()};
4536
4537 if (TypeFlags.isOverloadWhileRW())
4538 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
4539
4540 if (TypeFlags.isOverloadFirstandLast())
4541 return {Ops[0]->getType(), Ops.back()->getType()};
4542
4543 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
4544 ResultType->isVectorTy())
4545 return {ResultType, Ops[1]->getType()};
4546
4547 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
4548 return {DefaultType};
4549}
4550
4551Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags,
4552 ArrayRef<Value *> Ops) {
4553 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
4554 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
4555 unsigned Idx = cast<ConstantInt>(Val: Ops[1])->getZExtValue();
4556
4557 if (TypeFlags.isTupleSet())
4558 return Builder.CreateInsertValue(Agg: Ops[0], Val: Ops[2], Idxs: Idx);
4559 return Builder.CreateExtractValue(Agg: Ops[0], Idxs: Idx);
4560}
4561
4562Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
4563 llvm::Type *Ty,
4564 ArrayRef<Value *> Ops) {
4565 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
4566
4567 Value *Tuple = llvm::PoisonValue::get(T: Ty);
4568 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
4569 Tuple = Builder.CreateInsertValue(Agg: Tuple, Val: Ops[Idx], Idxs: Idx);
4570
4571 return Tuple;
4572}
4573
4574void CodeGenFunction::GetAArch64SVEProcessedOperands(
4575 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
4576 SVETypeFlags TypeFlags) {
4577 // Find out if any arguments are required to be integer constant expressions.
4578 unsigned ICEArguments = 0;
4579 ASTContext::GetBuiltinTypeError Error;
4580 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
4581 assert(Error == ASTContext::GE_None && "Should not codegen an error");
4582
4583 // Tuple set/get only requires one insert/extract vector, which is
4584 // created by EmitSVETupleSetOrGet.
4585 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
4586
4587 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
4588 bool IsICE = ICEArguments & (1 << i);
4589 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: i));
4590
4591 if (IsICE) {
4592 // If this is required to be a constant, constant fold it so that we know
4593 // that the generated intrinsic gets a ConstantInt.
4594 std::optional<llvm::APSInt> Result =
4595 E->getArg(Arg: i)->getIntegerConstantExpr(Ctx: getContext());
4596 assert(Result && "Expected argument to be a constant");
4597
4598 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
4599 // truncate because the immediate has been range checked and no valid
4600 // immediate requires more than a handful of bits.
4601 *Result = Result->extOrTrunc(width: 32);
4602 Ops.push_back(Elt: llvm::ConstantInt::get(Context&: getLLVMContext(), V: *Result));
4603 continue;
4604 }
4605
4606 if (isa<StructType>(Val: Arg->getType()) && !IsTupleGetOrSet) {
4607 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
4608 Ops.push_back(Elt: Builder.CreateExtractValue(Agg: Arg, Idxs: I));
4609
4610 continue;
4611 }
4612
4613 Ops.push_back(Elt: Arg);
4614 }
4615}
4616
4617Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
4618 const CallExpr *E) {
4619 llvm::Type *Ty = ConvertType(T: E->getType());
4620 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
4621 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
4622 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 0));
4623 return EmitSVEReinterpret(Val, Ty);
4624 }
4625
4626 auto *Builtin = findARMVectorIntrinsicInMap(IntrinsicMap: AArch64SVEIntrinsicMap, BuiltinID,
4627 MapProvenSorted&: AArch64SVEIntrinsicsProvenSorted);
4628
4629 llvm::SmallVector<Value *, 4> Ops;
4630 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4631 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
4632
4633 if (TypeFlags.isLoad())
4634 return EmitSVEMaskedLoad(E, ReturnTy: Ty, Ops, IntrinsicID: Builtin->LLVMIntrinsic,
4635 IsZExtReturn: TypeFlags.isZExtReturn());
4636 if (TypeFlags.isStore())
4637 return EmitSVEMaskedStore(E, Ops, IntrinsicID: Builtin->LLVMIntrinsic);
4638 if (TypeFlags.isGatherLoad())
4639 return EmitSVEGatherLoad(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4640 if (TypeFlags.isScatterStore())
4641 return EmitSVEScatterStore(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4642 if (TypeFlags.isPrefetch())
4643 return EmitSVEPrefetchLoad(TypeFlags, Ops, BuiltinID: Builtin->LLVMIntrinsic);
4644 if (TypeFlags.isGatherPrefetch())
4645 return EmitSVEGatherPrefetch(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4646 if (TypeFlags.isStructLoad())
4647 return EmitSVEStructLoad(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4648 if (TypeFlags.isStructStore())
4649 return EmitSVEStructStore(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
4650 if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
4651 return EmitSVETupleSetOrGet(TypeFlags, Ops);
4652 if (TypeFlags.isTupleCreate())
4653 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
4654 if (TypeFlags.isUndef())
4655 return UndefValue::get(T: Ty);
4656
4657 // Handle built-ins for which there is a corresponding LLVM Intrinsic.
4658 // -------------------------------------------------------------------
4659 if (Builtin->LLVMIntrinsic != 0) {
4660 // Emit set FPMR for intrinsics that require it
4661 if (TypeFlags.setsFPMR())
4662 Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_set_fpmr),
4663 Args: Ops.pop_back_val());
4664 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
4665 InsertExplicitZeroOperand(Builder, Ty, Ops);
4666
4667 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
4668 InsertExplicitUndefOperand(Builder, Ty, Ops);
4669
4670 // Some ACLE builtins leave out the argument to specify the predicate
4671 // pattern, which is expected to be expanded to an SV_ALL pattern.
4672 if (TypeFlags.isAppendSVALL())
4673 Ops.push_back(Elt: Builder.getInt32(/*SV_ALL*/ C: 31));
4674 if (TypeFlags.isInsertOp1SVALL())
4675 Ops.insert(I: &Ops[1], Elt: Builder.getInt32(/*SV_ALL*/ C: 31));
4676
4677 // Predicates must match the main datatype.
4678 for (Value *&Op : Ops)
4679 if (auto PredTy = dyn_cast<llvm::VectorType>(Val: Op->getType()))
4680 if (PredTy->getElementType()->isIntegerTy(Bitwidth: 1))
4681 Op = EmitSVEPredicateCast(Pred: Op, VTy: getSVEType(TypeFlags));
4682
4683 // Splat scalar operand to vector (intrinsics with _n infix)
4684 if (TypeFlags.hasSplatOperand()) {
4685 unsigned OpNo = TypeFlags.getSplatOperand();
4686 Ops[OpNo] = EmitSVEDupX(Scalar: Ops[OpNo]);
4687 }
4688
4689 if (TypeFlags.isReverseCompare())
4690 std::swap(a&: Ops[1], b&: Ops[2]);
4691 else if (TypeFlags.isReverseUSDOT())
4692 std::swap(a&: Ops[1], b&: Ops[2]);
4693 else if (TypeFlags.isReverseMergeAnyBinOp() &&
4694 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4695 std::swap(a&: Ops[1], b&: Ops[2]);
4696 else if (TypeFlags.isReverseMergeAnyAccOp() &&
4697 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
4698 std::swap(a&: Ops[1], b&: Ops[3]);
4699
4700 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
4701 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
4702 llvm::Type *OpndTy = Ops[1]->getType();
4703 auto *SplatZero = Constant::getNullValue(Ty: OpndTy);
4704 Ops[1] = Builder.CreateSelect(C: Ops[0], True: Ops[1], False: SplatZero);
4705 }
4706
4707 Function *F = CGM.getIntrinsic(IID: Builtin->LLVMIntrinsic,
4708 Tys: getSVEOverloadTypes(TypeFlags, ResultType: Ty, Ops));
4709 Value *Call = Builder.CreateCall(Callee: F, Args: Ops);
4710
4711 if (Call->getType() == Ty)
4712 return Call;
4713
4714 // Predicate results must be converted to svbool_t.
4715 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Val: Ty))
4716 return EmitSVEPredicateCast(Pred: Call, VTy: PredTy);
4717 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Val: Ty))
4718 return EmitSVEPredicateTupleCast(PredTuple: Call, Ty: PredTupleTy);
4719
4720 llvm_unreachable("unsupported element count!");
4721 }
4722
4723 switch (BuiltinID) {
4724 default:
4725 return nullptr;
4726
4727 case SVE::BI__builtin_sve_svreinterpret_b: {
4728 auto SVCountTy =
4729 llvm::TargetExtType::get(Context&: getLLVMContext(), Name: "aarch64.svcount");
4730 Function *CastFromSVCountF =
4731 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_convert_to_svbool, Tys: SVCountTy);
4732 return Builder.CreateCall(Callee: CastFromSVCountF, Args: Ops[0]);
4733 }
4734 case SVE::BI__builtin_sve_svreinterpret_c: {
4735 auto SVCountTy =
4736 llvm::TargetExtType::get(Context&: getLLVMContext(), Name: "aarch64.svcount");
4737 Function *CastToSVCountF =
4738 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_convert_from_svbool, Tys: SVCountTy);
4739 return Builder.CreateCall(Callee: CastToSVCountF, Args: Ops[0]);
4740 }
4741
4742 case SVE::BI__builtin_sve_svpsel_lane_b8:
4743 case SVE::BI__builtin_sve_svpsel_lane_b16:
4744 case SVE::BI__builtin_sve_svpsel_lane_b32:
4745 case SVE::BI__builtin_sve_svpsel_lane_b64:
4746 case SVE::BI__builtin_sve_svpsel_lane_c8:
4747 case SVE::BI__builtin_sve_svpsel_lane_c16:
4748 case SVE::BI__builtin_sve_svpsel_lane_c32:
4749 case SVE::BI__builtin_sve_svpsel_lane_c64: {
4750 bool IsSVCount = isa<TargetExtType>(Val: Ops[0]->getType());
4751 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
4752 "aarch64.svcount")) &&
4753 "Unexpected TargetExtType");
4754 auto SVCountTy =
4755 llvm::TargetExtType::get(Context&: getLLVMContext(), Name: "aarch64.svcount");
4756 Function *CastFromSVCountF =
4757 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_convert_to_svbool, Tys: SVCountTy);
4758 Function *CastToSVCountF =
4759 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_convert_from_svbool, Tys: SVCountTy);
4760
4761 auto OverloadedTy = getSVEType(TypeFlags: SVETypeFlags(Builtin->TypeModifier));
4762 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_psel, Tys: OverloadedTy);
4763 llvm::Value *Ops0 =
4764 IsSVCount ? Builder.CreateCall(Callee: CastFromSVCountF, Args: Ops[0]) : Ops[0];
4765 llvm::Value *Ops1 = EmitSVEPredicateCast(Pred: Ops[1], VTy: OverloadedTy);
4766 llvm::Value *PSel = Builder.CreateCall(Callee: F, Args: {Ops0, Ops1, Ops[2]});
4767 return IsSVCount ? Builder.CreateCall(Callee: CastToSVCountF, Args: PSel) : PSel;
4768 }
4769 case SVE::BI__builtin_sve_svmov_b_z: {
4770 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
4771 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4772 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4773 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_and_z, Tys: OverloadedTy);
4774 return Builder.CreateCall(Callee: F, Args: {Ops[0], Ops[1], Ops[1]});
4775 }
4776
4777 case SVE::BI__builtin_sve_svnot_b_z: {
4778 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
4779 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4780 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
4781 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_eor_z, Tys: OverloadedTy);
4782 return Builder.CreateCall(Callee: F, Args: {Ops[0], Ops[1], Ops[0]});
4783 }
4784
4785 case SVE::BI__builtin_sve_svmovlb_u16:
4786 case SVE::BI__builtin_sve_svmovlb_u32:
4787 case SVE::BI__builtin_sve_svmovlb_u64:
4788 return EmitSVEMovl(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_ushllb);
4789
4790 case SVE::BI__builtin_sve_svmovlb_s16:
4791 case SVE::BI__builtin_sve_svmovlb_s32:
4792 case SVE::BI__builtin_sve_svmovlb_s64:
4793 return EmitSVEMovl(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_sshllb);
4794
4795 case SVE::BI__builtin_sve_svmovlt_u16:
4796 case SVE::BI__builtin_sve_svmovlt_u32:
4797 case SVE::BI__builtin_sve_svmovlt_u64:
4798 return EmitSVEMovl(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_ushllt);
4799
4800 case SVE::BI__builtin_sve_svmovlt_s16:
4801 case SVE::BI__builtin_sve_svmovlt_s32:
4802 case SVE::BI__builtin_sve_svmovlt_s64:
4803 return EmitSVEMovl(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_sshllt);
4804
4805 case SVE::BI__builtin_sve_svpmullt_u16:
4806 case SVE::BI__builtin_sve_svpmullt_u64:
4807 case SVE::BI__builtin_sve_svpmullt_n_u16:
4808 case SVE::BI__builtin_sve_svpmullt_n_u64:
4809 return EmitSVEPMull(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_pmullt_pair);
4810
4811 case SVE::BI__builtin_sve_svpmullb_u16:
4812 case SVE::BI__builtin_sve_svpmullb_u64:
4813 case SVE::BI__builtin_sve_svpmullb_n_u16:
4814 case SVE::BI__builtin_sve_svpmullb_n_u64:
4815 return EmitSVEPMull(TypeFlags, Ops, BuiltinID: Intrinsic::aarch64_sve_pmullb_pair);
4816
4817 case SVE::BI__builtin_sve_svdup_n_b8:
4818 case SVE::BI__builtin_sve_svdup_n_b16:
4819 case SVE::BI__builtin_sve_svdup_n_b32:
4820 case SVE::BI__builtin_sve_svdup_n_b64: {
4821 Value *CmpNE =
4822 Builder.CreateICmpNE(LHS: Ops[0], RHS: Constant::getNullValue(Ty: Ops[0]->getType()));
4823 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
4824 Value *Dup = EmitSVEDupX(Scalar: CmpNE, Ty: OverloadedTy);
4825 return EmitSVEPredicateCast(Pred: Dup, VTy: cast<llvm::ScalableVectorType>(Val: Ty));
4826 }
4827
4828 case SVE::BI__builtin_sve_svdupq_n_b8:
4829 case SVE::BI__builtin_sve_svdupq_n_b16:
4830 case SVE::BI__builtin_sve_svdupq_n_b32:
4831 case SVE::BI__builtin_sve_svdupq_n_b64:
4832 case SVE::BI__builtin_sve_svdupq_n_u8:
4833 case SVE::BI__builtin_sve_svdupq_n_s8:
4834 case SVE::BI__builtin_sve_svdupq_n_u64:
4835 case SVE::BI__builtin_sve_svdupq_n_f64:
4836 case SVE::BI__builtin_sve_svdupq_n_s64:
4837 case SVE::BI__builtin_sve_svdupq_n_u16:
4838 case SVE::BI__builtin_sve_svdupq_n_f16:
4839 case SVE::BI__builtin_sve_svdupq_n_bf16:
4840 case SVE::BI__builtin_sve_svdupq_n_s16:
4841 case SVE::BI__builtin_sve_svdupq_n_u32:
4842 case SVE::BI__builtin_sve_svdupq_n_f32:
4843 case SVE::BI__builtin_sve_svdupq_n_s32: {
4844 // These builtins are implemented by storing each element to an array and using
4845 // ld1rq to materialize a vector.
4846 unsigned NumOpnds = Ops.size();
4847
4848 bool IsBoolTy =
4849 cast<llvm::VectorType>(Val: Ty)->getElementType()->isIntegerTy(Bitwidth: 1);
4850
4851 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
4852 // so that the compare can use the width that is natural for the expected
4853 // number of predicate lanes.
4854 llvm::Type *EltTy = Ops[0]->getType();
4855 if (IsBoolTy)
4856 EltTy = IntegerType::get(C&: getLLVMContext(), NumBits: SVEBitsPerBlock / NumOpnds);
4857
4858 SmallVector<llvm::Value *, 16> VecOps;
4859 for (unsigned I = 0; I < NumOpnds; ++I)
4860 VecOps.push_back(Elt: Builder.CreateZExt(V: Ops[I], DestTy: EltTy));
4861 Value *Vec = BuildVector(Ops: VecOps);
4862
4863 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
4864 Value *InsertSubVec = Builder.CreateInsertVector(
4865 DstType: OverloadedTy, SrcVec: PoisonValue::get(T: OverloadedTy), SubVec: Vec, Idx: uint64_t(0));
4866
4867 Function *F =
4868 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_dupq_lane, Tys: OverloadedTy);
4869 Value *DupQLane =
4870 Builder.CreateCall(Callee: F, Args: {InsertSubVec, Builder.getInt64(C: 0)});
4871
4872 if (!IsBoolTy)
4873 return DupQLane;
4874
4875 SVETypeFlags TypeFlags(Builtin->TypeModifier);
4876 Value *Pred = EmitSVEAllTruePred(TypeFlags);
4877
4878 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
4879 F = CGM.getIntrinsic(IID: NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
4880 : Intrinsic::aarch64_sve_cmpne_wide,
4881 Tys: OverloadedTy);
4882 Value *Call = Builder.CreateCall(
4883 Callee: F, Args: {Pred, DupQLane, EmitSVEDupX(Scalar: Builder.getInt64(C: 0))});
4884 return EmitSVEPredicateCast(Pred: Call, VTy: cast<llvm::ScalableVectorType>(Val: Ty));
4885 }
4886
4887 case SVE::BI__builtin_sve_svpfalse_b:
4888 return ConstantInt::getFalse(Ty);
4889
4890 case SVE::BI__builtin_sve_svpfalse_c: {
4891 auto SVBoolTy = ScalableVectorType::get(ElementType: Builder.getInt1Ty(), MinNumElts: 16);
4892 Function *CastToSVCountF =
4893 CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_convert_from_svbool, Tys: Ty);
4894 return Builder.CreateCall(Callee: CastToSVCountF, Args: ConstantInt::getFalse(Ty: SVBoolTy));
4895 }
4896
4897 case SVE::BI__builtin_sve_svlen_bf16:
4898 case SVE::BI__builtin_sve_svlen_f16:
4899 case SVE::BI__builtin_sve_svlen_f32:
4900 case SVE::BI__builtin_sve_svlen_f64:
4901 case SVE::BI__builtin_sve_svlen_s8:
4902 case SVE::BI__builtin_sve_svlen_s16:
4903 case SVE::BI__builtin_sve_svlen_s32:
4904 case SVE::BI__builtin_sve_svlen_s64:
4905 case SVE::BI__builtin_sve_svlen_u8:
4906 case SVE::BI__builtin_sve_svlen_u16:
4907 case SVE::BI__builtin_sve_svlen_u32:
4908 case SVE::BI__builtin_sve_svlen_u64: {
4909 SVETypeFlags TF(Builtin->TypeModifier);
4910 return Builder.CreateElementCount(Ty, EC: getSVEType(TypeFlags: TF)->getElementCount());
4911 }
4912
4913 case SVE::BI__builtin_sve_svtbl2_u8:
4914 case SVE::BI__builtin_sve_svtbl2_s8:
4915 case SVE::BI__builtin_sve_svtbl2_u16:
4916 case SVE::BI__builtin_sve_svtbl2_s16:
4917 case SVE::BI__builtin_sve_svtbl2_u32:
4918 case SVE::BI__builtin_sve_svtbl2_s32:
4919 case SVE::BI__builtin_sve_svtbl2_u64:
4920 case SVE::BI__builtin_sve_svtbl2_s64:
4921 case SVE::BI__builtin_sve_svtbl2_f16:
4922 case SVE::BI__builtin_sve_svtbl2_bf16:
4923 case SVE::BI__builtin_sve_svtbl2_f32:
4924 case SVE::BI__builtin_sve_svtbl2_f64: {
4925 SVETypeFlags TF(Builtin->TypeModifier);
4926 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_sve_tbl2, Tys: getSVEType(TypeFlags: TF));
4927 return Builder.CreateCall(Callee: F, Args: Ops);
4928 }
4929
4930 case SVE::BI__builtin_sve_svset_neonq_s8:
4931 case SVE::BI__builtin_sve_svset_neonq_s16:
4932 case SVE::BI__builtin_sve_svset_neonq_s32:
4933 case SVE::BI__builtin_sve_svset_neonq_s64:
4934 case SVE::BI__builtin_sve_svset_neonq_u8:
4935 case SVE::BI__builtin_sve_svset_neonq_u16:
4936 case SVE::BI__builtin_sve_svset_neonq_u32:
4937 case SVE::BI__builtin_sve_svset_neonq_u64:
4938 case SVE::BI__builtin_sve_svset_neonq_f16:
4939 case SVE::BI__builtin_sve_svset_neonq_f32:
4940 case SVE::BI__builtin_sve_svset_neonq_f64:
4941 case SVE::BI__builtin_sve_svset_neonq_bf16: {
4942 return Builder.CreateInsertVector(DstType: Ty, SrcVec: Ops[0], SubVec: Ops[1], Idx: uint64_t(0));
4943 }
4944
4945 case SVE::BI__builtin_sve_svget_neonq_s8:
4946 case SVE::BI__builtin_sve_svget_neonq_s16:
4947 case SVE::BI__builtin_sve_svget_neonq_s32:
4948 case SVE::BI__builtin_sve_svget_neonq_s64:
4949 case SVE::BI__builtin_sve_svget_neonq_u8:
4950 case SVE::BI__builtin_sve_svget_neonq_u16:
4951 case SVE::BI__builtin_sve_svget_neonq_u32:
4952 case SVE::BI__builtin_sve_svget_neonq_u64:
4953 case SVE::BI__builtin_sve_svget_neonq_f16:
4954 case SVE::BI__builtin_sve_svget_neonq_f32:
4955 case SVE::BI__builtin_sve_svget_neonq_f64:
4956 case SVE::BI__builtin_sve_svget_neonq_bf16: {
4957 return Builder.CreateExtractVector(DstType: Ty, SrcVec: Ops[0], Idx: uint64_t(0));
4958 }
4959
4960 case SVE::BI__builtin_sve_svdup_neonq_s8:
4961 case SVE::BI__builtin_sve_svdup_neonq_s16:
4962 case SVE::BI__builtin_sve_svdup_neonq_s32:
4963 case SVE::BI__builtin_sve_svdup_neonq_s64:
4964 case SVE::BI__builtin_sve_svdup_neonq_u8:
4965 case SVE::BI__builtin_sve_svdup_neonq_u16:
4966 case SVE::BI__builtin_sve_svdup_neonq_u32:
4967 case SVE::BI__builtin_sve_svdup_neonq_u64:
4968 case SVE::BI__builtin_sve_svdup_neonq_f16:
4969 case SVE::BI__builtin_sve_svdup_neonq_f32:
4970 case SVE::BI__builtin_sve_svdup_neonq_f64:
4971 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
4972 Value *Insert = Builder.CreateInsertVector(DstType: Ty, SrcVec: PoisonValue::get(T: Ty), SubVec: Ops[0],
4973 Idx: uint64_t(0));
4974 return Builder.CreateIntrinsic(ID: Intrinsic::aarch64_sve_dupq_lane, Types: {Ty},
4975 Args: {Insert, Builder.getInt64(C: 0)});
4976 }
4977 }
4978
4979 /// Should not happen
4980 return nullptr;
4981}
4982
4983static void swapCommutativeSMEOperands(unsigned BuiltinID,
4984 SmallVectorImpl<Value *> &Ops) {
4985 unsigned MultiVec;
4986 switch (BuiltinID) {
4987 default:
4988 return;
4989 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
4990 MultiVec = 1;
4991 break;
4992 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
4993 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
4994 MultiVec = 2;
4995 break;
4996 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
4997 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
4998 MultiVec = 4;
4999 break;
5000 }
5001
5002 if (MultiVec > 0)
5003 for (unsigned I = 0; I < MultiVec; ++I)
5004 std::swap(a&: Ops[I + 1], b&: Ops[I + 1 + MultiVec]);
5005}
5006
5007Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
5008 const CallExpr *E) {
5009 auto *Builtin = findARMVectorIntrinsicInMap(IntrinsicMap: AArch64SMEIntrinsicMap, BuiltinID,
5010 MapProvenSorted&: AArch64SMEIntrinsicsProvenSorted);
5011
5012 llvm::SmallVector<Value *, 4> Ops;
5013 SVETypeFlags TypeFlags(Builtin->TypeModifier);
5014 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
5015
5016 if (TypeFlags.isLoad() || TypeFlags.isStore())
5017 return EmitSMELd1St1(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
5018 if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
5019 return EmitSMEReadWrite(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
5020 if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
5021 BuiltinID == SME::BI__builtin_sme_svzero_za)
5022 return EmitSMEZero(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
5023 if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
5024 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
5025 BuiltinID == SME::BI__builtin_sme_svldr_za ||
5026 BuiltinID == SME::BI__builtin_sme_svstr_za)
5027 return EmitSMELdrStr(TypeFlags, Ops, IntID: Builtin->LLVMIntrinsic);
5028
5029 // Emit set FPMR for intrinsics that require it
5030 if (TypeFlags.setsFPMR())
5031 Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_set_fpmr),
5032 Args: Ops.pop_back_val());
5033 // Handle builtins which require their multi-vector operands to be swapped
5034 swapCommutativeSMEOperands(BuiltinID, Ops);
5035
5036 auto isCntsBuiltin = [&]() {
5037 switch (BuiltinID) {
5038 default:
5039 return 0;
5040 case SME::BI__builtin_sme_svcntsb:
5041 return 8;
5042 case SME::BI__builtin_sme_svcntsh:
5043 return 4;
5044 case SME::BI__builtin_sme_svcntsw:
5045 return 2;
5046 }
5047 };
5048
5049 if (auto Mul = isCntsBuiltin()) {
5050 llvm::Value *Cntd =
5051 Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_sme_cntsd));
5052 return Builder.CreateMul(LHS: Cntd, RHS: llvm::ConstantInt::get(Ty: Int64Ty, V: Mul),
5053 Name: "mulsvl", /* HasNUW */ true, /* HasNSW */ true);
5054 }
5055
5056 // Should not happen!
5057 if (Builtin->LLVMIntrinsic == 0)
5058 return nullptr;
5059
5060 // Predicates must match the main datatype.
5061 for (Value *&Op : Ops)
5062 if (auto PredTy = dyn_cast<llvm::VectorType>(Val: Op->getType()))
5063 if (PredTy->getElementType()->isIntegerTy(Bitwidth: 1))
5064 Op = EmitSVEPredicateCast(Pred: Op, VTy: getSVEType(TypeFlags));
5065
5066 Function *F =
5067 TypeFlags.isOverloadNone()
5068 ? CGM.getIntrinsic(IID: Builtin->LLVMIntrinsic)
5069 : CGM.getIntrinsic(IID: Builtin->LLVMIntrinsic, Tys: {getSVEType(TypeFlags)});
5070
5071 return Builder.CreateCall(Callee: F, Args: Ops);
5072}
5073
5074/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
5075/// return it as an i8 pointer.
5076Value *readX18AsPtr(CodeGenFunction &CGF) {
5077 LLVMContext &Context = CGF.CGM.getLLVMContext();
5078 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Str: "x18")};
5079 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
5080 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
5081 llvm::Function *F =
5082 CGF.CGM.getIntrinsic(IID: Intrinsic::read_register, Tys: {CGF.Int64Ty});
5083 llvm::Value *X18 = CGF.Builder.CreateCall(Callee: F, Args: Metadata);
5084 return CGF.Builder.CreateIntToPtr(V: X18, DestTy: CGF.Int8PtrTy);
5085}
5086
5087Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
5088 const CallExpr *E,
5089 llvm::Triple::ArchType Arch) {
5090 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
5091 BuiltinID <= clang::AArch64::LastSVEBuiltin)
5092 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
5093
5094 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
5095 BuiltinID <= clang::AArch64::LastSMEBuiltin)
5096 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
5097
5098 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
5099 return EmitAArch64CpuSupports(E);
5100
5101 unsigned HintID = static_cast<unsigned>(-1);
5102 switch (BuiltinID) {
5103 default: break;
5104 case clang::AArch64::BI__builtin_arm_nop:
5105 HintID = 0;
5106 break;
5107 case clang::AArch64::BI__builtin_arm_yield:
5108 case clang::AArch64::BI__yield:
5109 HintID = 1;
5110 break;
5111 case clang::AArch64::BI__builtin_arm_wfe:
5112 case clang::AArch64::BI__wfe:
5113 HintID = 2;
5114 break;
5115 case clang::AArch64::BI__builtin_arm_wfi:
5116 case clang::AArch64::BI__wfi:
5117 HintID = 3;
5118 break;
5119 case clang::AArch64::BI__builtin_arm_sev:
5120 case clang::AArch64::BI__sev:
5121 HintID = 4;
5122 break;
5123 case clang::AArch64::BI__builtin_arm_sevl:
5124 case clang::AArch64::BI__sevl:
5125 HintID = 5;
5126 break;
5127 }
5128
5129 if (HintID != static_cast<unsigned>(-1)) {
5130 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_hint);
5131 return Builder.CreateCall(Callee: F, Args: llvm::ConstantInt::get(Ty: Int32Ty, V: HintID));
5132 }
5133
5134 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
5135 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_break);
5136 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5137 return Builder.CreateCall(Callee: F, Args: Builder.CreateZExt(V: Arg, DestTy: CGM.Int32Ty));
5138 }
5139
5140 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
5141 // Create call to __arm_sme_state and store the results to the two pointers.
5142 CallInst *CI = EmitRuntimeCall(callee: CGM.CreateRuntimeFunction(
5143 Ty: llvm::FunctionType::get(Result: StructType::get(elt1: CGM.Int64Ty, elts: CGM.Int64Ty), Params: {},
5144 isVarArg: false),
5145 Name: "__arm_sme_state"));
5146 auto Attrs = AttributeList().addFnAttribute(C&: getLLVMContext(),
5147 Kind: "aarch64_pstate_sm_compatible");
5148 CI->setAttributes(Attrs);
5149 CI->setCallingConv(
5150 llvm::CallingConv::
5151 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
5152 Builder.CreateStore(Val: Builder.CreateExtractValue(Agg: CI, Idxs: 0),
5153 Addr: EmitPointerWithAlignment(Addr: E->getArg(Arg: 0)));
5154 return Builder.CreateStore(Val: Builder.CreateExtractValue(Agg: CI, Idxs: 1),
5155 Addr: EmitPointerWithAlignment(Addr: E->getArg(Arg: 1)));
5156 }
5157
5158 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
5159 assert((getContext().getTypeSize(E->getType()) == 32) &&
5160 "rbit of unusual size!");
5161 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5162 return Builder.CreateCall(
5163 Callee: CGM.getIntrinsic(IID: Intrinsic::bitreverse, Tys: Arg->getType()), Args: Arg, Name: "rbit");
5164 }
5165 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
5166 assert((getContext().getTypeSize(E->getType()) == 64) &&
5167 "rbit of unusual size!");
5168 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5169 return Builder.CreateCall(
5170 Callee: CGM.getIntrinsic(IID: Intrinsic::bitreverse, Tys: Arg->getType()), Args: Arg, Name: "rbit");
5171 }
5172
5173 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
5174 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
5175 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5176 Function *F = CGM.getIntrinsic(IID: Intrinsic::ctlz, Tys: Arg->getType());
5177 Value *Res = Builder.CreateCall(Callee: F, Args: {Arg, Builder.getInt1(V: false)});
5178 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
5179 Res = Builder.CreateTrunc(V: Res, DestTy: Builder.getInt32Ty());
5180 return Res;
5181 }
5182
5183 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
5184 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5185 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_cls), Args: Arg,
5186 Name: "cls");
5187 }
5188 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
5189 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5190 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_cls64), Args: Arg,
5191 Name: "cls");
5192 }
5193
5194 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
5195 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
5196 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5197 llvm::Type *Ty = Arg->getType();
5198 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_frint32z, Tys: Ty),
5199 Args: Arg, Name: "frint32z");
5200 }
5201
5202 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
5203 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
5204 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5205 llvm::Type *Ty = Arg->getType();
5206 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_frint64z, Tys: Ty),
5207 Args: Arg, Name: "frint64z");
5208 }
5209
5210 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
5211 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
5212 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5213 llvm::Type *Ty = Arg->getType();
5214 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_frint32x, Tys: Ty),
5215 Args: Arg, Name: "frint32x");
5216 }
5217
5218 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
5219 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
5220 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5221 llvm::Type *Ty = Arg->getType();
5222 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_frint64x, Tys: Ty),
5223 Args: Arg, Name: "frint64x");
5224 }
5225
5226 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
5227 assert((getContext().getTypeSize(E->getType()) == 32) &&
5228 "__jcvt of unusual size!");
5229 llvm::Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5230 return Builder.CreateCall(
5231 Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_fjcvtzs), Args: Arg);
5232 }
5233
5234 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
5235 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
5236 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
5237 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
5238 llvm::Value *MemAddr = EmitScalarExpr(E: E->getArg(Arg: 0));
5239 llvm::Value *ValPtr = EmitScalarExpr(E: E->getArg(Arg: 1));
5240
5241 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
5242 // Load from the address via an LLVM intrinsic, receiving a
5243 // tuple of 8 i64 words, and store each one to ValPtr.
5244 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_ld64b);
5245 llvm::Value *Val = Builder.CreateCall(Callee: F, Args: MemAddr);
5246 llvm::Value *ToRet;
5247 for (size_t i = 0; i < 8; i++) {
5248 llvm::Value *ValOffsetPtr =
5249 Builder.CreateGEP(Ty: Int64Ty, Ptr: ValPtr, IdxList: Builder.getInt32(C: i));
5250 Address Addr =
5251 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(Quantity: 8));
5252 ToRet = Builder.CreateStore(Val: Builder.CreateExtractValue(Agg: Val, Idxs: i), Addr);
5253 }
5254 return ToRet;
5255 }
5256
5257 // Load 8 i64 words from ValPtr, and store them to the address
5258 // via an LLVM intrinsic.
5259 SmallVector<llvm::Value *, 9> Args;
5260 Args.push_back(Elt: MemAddr);
5261 for (size_t i = 0; i < 8; i++) {
5262 llvm::Value *ValOffsetPtr =
5263 Builder.CreateGEP(Ty: Int64Ty, Ptr: ValPtr, IdxList: Builder.getInt32(C: i));
5264 Address Addr = Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(Quantity: 8));
5265 Args.push_back(Elt: Builder.CreateLoad(Addr));
5266 }
5267
5268 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
5269 ? Intrinsic::aarch64_st64b
5270 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
5271 ? Intrinsic::aarch64_st64bv
5272 : Intrinsic::aarch64_st64bv0);
5273 Function *F = CGM.getIntrinsic(IID: Intr);
5274 return Builder.CreateCall(Callee: F, Args);
5275 }
5276
5277 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
5278 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
5279
5280 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
5281 ? Intrinsic::aarch64_rndr
5282 : Intrinsic::aarch64_rndrrs);
5283 Function *F = CGM.getIntrinsic(IID: Intr);
5284 llvm::Value *Val = Builder.CreateCall(Callee: F);
5285 Value *RandomValue = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
5286 Value *Status = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
5287
5288 Address MemAddress = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
5289 Builder.CreateStore(Val: RandomValue, Addr: MemAddress);
5290 Status = Builder.CreateZExt(V: Status, DestTy: Int32Ty);
5291 return Status;
5292 }
5293
5294 if (BuiltinID == clang::AArch64::BI__clear_cache) {
5295 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5296 const FunctionDecl *FD = E->getDirectCallee();
5297 Value *Ops[2];
5298 for (unsigned i = 0; i < 2; i++)
5299 Ops[i] = EmitScalarExpr(E: E->getArg(Arg: i));
5300 llvm::Type *Ty = CGM.getTypes().ConvertType(T: FD->getType());
5301 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Val: Ty);
5302 StringRef Name = FD->getName();
5303 return EmitNounwindRuntimeCall(callee: CGM.CreateRuntimeFunction(Ty: FTy, Name), args: Ops);
5304 }
5305
5306 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5307 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
5308 getContext().getTypeSize(T: E->getType()) == 128) {
5309 Function *F =
5310 CGM.getIntrinsic(IID: BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5311 ? Intrinsic::aarch64_ldaxp
5312 : Intrinsic::aarch64_ldxp);
5313
5314 Value *LdPtr = EmitScalarExpr(E: E->getArg(Arg: 0));
5315 Value *Val = Builder.CreateCall(Callee: F, Args: LdPtr, Name: "ldxp");
5316
5317 Value *Val0 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
5318 Value *Val1 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
5319 llvm::Type *Int128Ty = llvm::IntegerType::get(C&: getLLVMContext(), NumBits: 128);
5320 Val0 = Builder.CreateZExt(V: Val0, DestTy: Int128Ty);
5321 Val1 = Builder.CreateZExt(V: Val1, DestTy: Int128Ty);
5322
5323 Value *ShiftCst = llvm::ConstantInt::get(Ty: Int128Ty, V: 64);
5324 Val = Builder.CreateShl(LHS: Val0, RHS: ShiftCst, Name: "shl", HasNUW: true /* nuw */);
5325 Val = Builder.CreateOr(LHS: Val, RHS: Val1);
5326 return Builder.CreateBitCast(V: Val, DestTy: ConvertType(T: E->getType()));
5327 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
5328 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
5329 Value *LoadAddr = EmitScalarExpr(E: E->getArg(Arg: 0));
5330
5331 QualType Ty = E->getType();
5332 llvm::Type *RealResTy = ConvertType(T: Ty);
5333 llvm::Type *IntTy =
5334 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
5335
5336 Function *F =
5337 CGM.getIntrinsic(IID: BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
5338 ? Intrinsic::aarch64_ldaxr
5339 : Intrinsic::aarch64_ldxr,
5340 Tys: DefaultPtrTy);
5341 CallInst *Val = Builder.CreateCall(Callee: F, Args: LoadAddr, Name: "ldxr");
5342 Val->addParamAttr(
5343 ArgNo: 0, Attr: Attribute::get(Context&: getLLVMContext(), Kind: Attribute::ElementType, Ty: IntTy));
5344
5345 if (RealResTy->isPointerTy())
5346 return Builder.CreateIntToPtr(V: Val, DestTy: RealResTy);
5347
5348 llvm::Type *IntResTy = llvm::IntegerType::get(
5349 C&: getLLVMContext(), NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: RealResTy));
5350 return Builder.CreateBitCast(V: Builder.CreateTruncOrBitCast(V: Val, DestTy: IntResTy),
5351 DestTy: RealResTy);
5352 }
5353
5354 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5355 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
5356 getContext().getTypeSize(T: E->getArg(Arg: 0)->getType()) == 128) {
5357 Function *F =
5358 CGM.getIntrinsic(IID: BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5359 ? Intrinsic::aarch64_stlxp
5360 : Intrinsic::aarch64_stxp);
5361 llvm::Type *STy = llvm::StructType::get(elt1: Int64Ty, elts: Int64Ty);
5362
5363 Address Tmp = CreateMemTemp(T: E->getArg(Arg: 0)->getType());
5364 EmitAnyExprToMem(E: E->getArg(Arg: 0), Location: Tmp, Quals: Qualifiers(), /*init*/ IsInitializer: true);
5365
5366 Tmp = Tmp.withElementType(ElemTy: STy);
5367 llvm::Value *Val = Builder.CreateLoad(Addr: Tmp);
5368
5369 Value *Arg0 = Builder.CreateExtractValue(Agg: Val, Idxs: 0);
5370 Value *Arg1 = Builder.CreateExtractValue(Agg: Val, Idxs: 1);
5371 Value *StPtr = EmitScalarExpr(E: E->getArg(Arg: 1));
5372 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1, StPtr}, Name: "stxp");
5373 }
5374
5375 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
5376 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
5377 Value *StoreVal = EmitScalarExpr(E: E->getArg(Arg: 0));
5378 Value *StoreAddr = EmitScalarExpr(E: E->getArg(Arg: 1));
5379
5380 QualType Ty = E->getArg(Arg: 0)->getType();
5381 llvm::Type *StoreTy =
5382 llvm::IntegerType::get(C&: getLLVMContext(), NumBits: getContext().getTypeSize(T: Ty));
5383
5384 if (StoreVal->getType()->isPointerTy())
5385 StoreVal = Builder.CreatePtrToInt(V: StoreVal, DestTy: Int64Ty);
5386 else {
5387 llvm::Type *IntTy = llvm::IntegerType::get(
5388 C&: getLLVMContext(),
5389 NumBits: CGM.getDataLayout().getTypeSizeInBits(Ty: StoreVal->getType()));
5390 StoreVal = Builder.CreateBitCast(V: StoreVal, DestTy: IntTy);
5391 StoreVal = Builder.CreateZExtOrBitCast(V: StoreVal, DestTy: Int64Ty);
5392 }
5393
5394 Function *F =
5395 CGM.getIntrinsic(IID: BuiltinID == clang::AArch64::BI__builtin_arm_stlex
5396 ? Intrinsic::aarch64_stlxr
5397 : Intrinsic::aarch64_stxr,
5398 Tys: StoreAddr->getType());
5399 CallInst *CI = Builder.CreateCall(Callee: F, Args: {StoreVal, StoreAddr}, Name: "stxr");
5400 CI->addParamAttr(
5401 ArgNo: 1, Attr: Attribute::get(Context&: getLLVMContext(), Kind: Attribute::ElementType, Ty: StoreTy));
5402 return CI;
5403 }
5404
5405 if (BuiltinID == clang::AArch64::BI__getReg) {
5406 Expr::EvalResult Result;
5407 if (!E->getArg(Arg: 0)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
5408 llvm_unreachable("Sema will ensure that the parameter is constant");
5409
5410 llvm::APSInt Value = Result.Val.getInt();
5411 LLVMContext &Context = CGM.getLLVMContext();
5412 std::string Reg = Value == 31 ? "sp" : "x" + toString(I: Value, Radix: 10);
5413
5414 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Str: Reg)};
5415 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
5416 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
5417
5418 llvm::Function *F =
5419 CGM.getIntrinsic(IID: Intrinsic::read_register, Tys: {Int64Ty});
5420 return Builder.CreateCall(Callee: F, Args: Metadata);
5421 }
5422
5423 if (BuiltinID == clang::AArch64::BI__break) {
5424 Expr::EvalResult Result;
5425 if (!E->getArg(Arg: 0)->EvaluateAsInt(Result, Ctx: CGM.getContext()))
5426 llvm_unreachable("Sema will ensure that the parameter is constant");
5427
5428 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_break);
5429 return Builder.CreateCall(Callee: F, Args: {EmitScalarExpr(E: E->getArg(Arg: 0))});
5430 }
5431
5432 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
5433 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_clrex);
5434 return Builder.CreateCall(Callee: F);
5435 }
5436
5437 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
5438 return Builder.CreateFence(Ordering: llvm::AtomicOrdering::SequentiallyConsistent,
5439 SSID: llvm::SyncScope::SingleThread);
5440
5441 // CRC32
5442 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5443 switch (BuiltinID) {
5444 case clang::AArch64::BI__builtin_arm_crc32b:
5445 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5446 case clang::AArch64::BI__builtin_arm_crc32cb:
5447 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5448 case clang::AArch64::BI__builtin_arm_crc32h:
5449 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5450 case clang::AArch64::BI__builtin_arm_crc32ch:
5451 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5452 case clang::AArch64::BI__builtin_arm_crc32w:
5453 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5454 case clang::AArch64::BI__builtin_arm_crc32cw:
5455 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5456 case clang::AArch64::BI__builtin_arm_crc32d:
5457 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5458 case clang::AArch64::BI__builtin_arm_crc32cd:
5459 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5460 }
5461
5462 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5463 Value *Arg0 = EmitScalarExpr(E: E->getArg(Arg: 0));
5464 Value *Arg1 = EmitScalarExpr(E: E->getArg(Arg: 1));
5465 Function *F = CGM.getIntrinsic(IID: CRCIntrinsicID);
5466
5467 llvm::Type *DataTy = F->getFunctionType()->getParamType(i: 1);
5468 Arg1 = Builder.CreateZExtOrBitCast(V: Arg1, DestTy: DataTy);
5469
5470 return Builder.CreateCall(Callee: F, Args: {Arg0, Arg1});
5471 }
5472
5473 // Memory Operations (MOPS)
5474 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
5475 Value *Dst = EmitScalarExpr(E: E->getArg(Arg: 0));
5476 Value *Val = EmitScalarExpr(E: E->getArg(Arg: 1));
5477 Value *Size = EmitScalarExpr(E: E->getArg(Arg: 2));
5478 Val = Builder.CreateTrunc(V: Val, DestTy: Int8Ty);
5479 Size = Builder.CreateIntCast(V: Size, DestTy: Int64Ty, isSigned: false);
5480 return Builder.CreateCall(
5481 Callee: CGM.getIntrinsic(IID: Intrinsic::aarch64_mops_memset_tag), Args: {Dst, Val, Size});
5482 }
5483
5484 if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch ||
5485 BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x)
5486 return EmitRangePrefetchBuiltin(CGF&: *this, BuiltinID, E);
5487
5488 // Memory Tagging Extensions (MTE) Intrinsics
5489 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
5490 switch (BuiltinID) {
5491 case clang::AArch64::BI__builtin_arm_irg:
5492 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
5493 case clang::AArch64::BI__builtin_arm_addg:
5494 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
5495 case clang::AArch64::BI__builtin_arm_gmi:
5496 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
5497 case clang::AArch64::BI__builtin_arm_ldg:
5498 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
5499 case clang::AArch64::BI__builtin_arm_stg:
5500 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
5501 case clang::AArch64::BI__builtin_arm_subp:
5502 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
5503 }
5504
5505 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
5506 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
5507 Value *Pointer = EmitScalarExpr(E: E->getArg(Arg: 0));
5508 Value *Mask = EmitScalarExpr(E: E->getArg(Arg: 1));
5509
5510 Mask = Builder.CreateZExt(V: Mask, DestTy: Int64Ty);
5511 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: MTEIntrinsicID),
5512 Args: {Pointer, Mask});
5513 }
5514 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
5515 Value *Pointer = EmitScalarExpr(E: E->getArg(Arg: 0));
5516 Value *TagOffset = EmitScalarExpr(E: E->getArg(Arg: 1));
5517
5518 TagOffset = Builder.CreateZExt(V: TagOffset, DestTy: Int64Ty);
5519 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: MTEIntrinsicID),
5520 Args: {Pointer, TagOffset});
5521 }
5522 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
5523 Value *Pointer = EmitScalarExpr(E: E->getArg(Arg: 0));
5524 Value *ExcludedMask = EmitScalarExpr(E: E->getArg(Arg: 1));
5525
5526 ExcludedMask = Builder.CreateZExt(V: ExcludedMask, DestTy: Int64Ty);
5527 return Builder.CreateCall(
5528 Callee: CGM.getIntrinsic(IID: MTEIntrinsicID), Args: {Pointer, ExcludedMask});
5529 }
5530 // Although it is possible to supply a different return
5531 // address (first arg) to this intrinsic, for now we set
5532 // return address same as input address.
5533 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
5534 Value *TagAddress = EmitScalarExpr(E: E->getArg(Arg: 0));
5535 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: MTEIntrinsicID),
5536 Args: {TagAddress, TagAddress});
5537 }
5538 // Although it is possible to supply a different tag (to set)
5539 // to this intrinsic (as first arg), for now we supply
5540 // the tag that is in input address arg (common use case).
5541 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
5542 Value *TagAddress = EmitScalarExpr(E: E->getArg(Arg: 0));
5543 return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: MTEIntrinsicID),
5544 Args: {TagAddress, TagAddress});
5545 }
5546 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
5547 Value *PointerA = EmitScalarExpr(E: E->getArg(Arg: 0));
5548 Value *PointerB = EmitScalarExpr(E: E->getArg(Arg: 1));
5549 return Builder.CreateCall(
5550 Callee: CGM.getIntrinsic(IID: MTEIntrinsicID), Args: {PointerA, PointerB});
5551 }
5552 }
5553
5554 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5555 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5556 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5557 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5558 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
5559 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
5560 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
5561 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
5562
5563 SpecialRegisterAccessKind AccessKind = Write;
5564 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5565 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
5566 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5567 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
5568 AccessKind = VolatileRead;
5569
5570 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
5571 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
5572
5573 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
5574 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
5575
5576 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
5577 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
5578
5579 llvm::Type *ValueType;
5580 llvm::Type *RegisterType = Int64Ty;
5581 if (Is32Bit) {
5582 ValueType = Int32Ty;
5583 } else if (Is128Bit) {
5584 llvm::Type *Int128Ty =
5585 llvm::IntegerType::getInt128Ty(C&: CGM.getLLVMContext());
5586 ValueType = Int128Ty;
5587 RegisterType = Int128Ty;
5588 } else if (IsPointerBuiltin) {
5589 ValueType = VoidPtrTy;
5590 } else {
5591 ValueType = Int64Ty;
5592 };
5593
5594 return EmitSpecialRegisterBuiltin(CGF&: *this, E, RegisterType, ValueType,
5595 AccessKind);
5596 }
5597
5598 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5599 BuiltinID == clang::AArch64::BI_WriteStatusReg ||
5600 BuiltinID == clang::AArch64::BI__sys) {
5601 LLVMContext &Context = CGM.getLLVMContext();
5602
5603 unsigned SysReg =
5604 E->getArg(Arg: 0)->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
5605
5606 std::string SysRegStr;
5607 unsigned SysRegOp0 = (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
5608 BuiltinID == clang::AArch64::BI_WriteStatusReg)
5609 ? ((1 << 1) | ((SysReg >> 14) & 1))
5610 : 1;
5611 llvm::raw_string_ostream(SysRegStr)
5612 << SysRegOp0 << ":" << ((SysReg >> 11) & 7) << ":"
5613 << ((SysReg >> 7) & 15) << ":" << ((SysReg >> 3) & 15) << ":"
5614 << (SysReg & 7);
5615
5616 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, Str: SysRegStr) };
5617 llvm::MDNode *RegName = llvm::MDNode::get(Context, MDs: Ops);
5618 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, MD: RegName);
5619
5620 llvm::Type *RegisterType = Int64Ty;
5621 llvm::Type *Types[] = { RegisterType };
5622
5623 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
5624 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::read_register, Tys: Types);
5625
5626 return Builder.CreateCall(Callee: F, Args: Metadata);
5627 }
5628
5629 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::write_register, Tys: Types);
5630 llvm::Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 1));
5631 llvm::Value *Result = Builder.CreateCall(Callee: F, Args: {Metadata, ArgValue});
5632 if (BuiltinID == clang::AArch64::BI__sys) {
5633 // Return 0 for convenience, even though MSVC returns some other undefined
5634 // value.
5635 Result = ConstantInt::get(Ty: Builder.getInt32Ty(), V: 0);
5636 }
5637 return Result;
5638 }
5639
5640 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
5641 llvm::Function *F =
5642 CGM.getIntrinsic(IID: Intrinsic::addressofreturnaddress, Tys: AllocaInt8PtrTy);
5643 return Builder.CreateCall(Callee: F);
5644 }
5645
5646 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
5647 llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::sponentry, Tys: AllocaInt8PtrTy);
5648 return Builder.CreateCall(Callee: F);
5649 }
5650
5651 if (BuiltinID == clang::AArch64::BI__mulh ||
5652 BuiltinID == clang::AArch64::BI__umulh) {
5653 llvm::Type *ResType = ConvertType(T: E->getType());
5654 llvm::Type *Int128Ty = llvm::IntegerType::get(C&: getLLVMContext(), NumBits: 128);
5655
5656 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
5657 Value *LHS =
5658 Builder.CreateIntCast(V: EmitScalarExpr(E: E->getArg(Arg: 0)), DestTy: Int128Ty, isSigned: IsSigned);
5659 Value *RHS =
5660 Builder.CreateIntCast(V: EmitScalarExpr(E: E->getArg(Arg: 1)), DestTy: Int128Ty, isSigned: IsSigned);
5661
5662 Value *MulResult, *HigherBits;
5663 if (IsSigned) {
5664 MulResult = Builder.CreateNSWMul(LHS, RHS);
5665 HigherBits = Builder.CreateAShr(LHS: MulResult, RHS: 64);
5666 } else {
5667 MulResult = Builder.CreateNUWMul(LHS, RHS);
5668 HigherBits = Builder.CreateLShr(LHS: MulResult, RHS: 64);
5669 }
5670 HigherBits = Builder.CreateIntCast(V: HigherBits, DestTy: ResType, isSigned: IsSigned);
5671
5672 return HigherBits;
5673 }
5674
5675 if (BuiltinID == AArch64::BI__writex18byte ||
5676 BuiltinID == AArch64::BI__writex18word ||
5677 BuiltinID == AArch64::BI__writex18dword ||
5678 BuiltinID == AArch64::BI__writex18qword) {
5679 // Process the args first
5680 Value *OffsetArg = EmitScalarExpr(E: E->getArg(Arg: 0));
5681 Value *DataArg = EmitScalarExpr(E: E->getArg(Arg: 1));
5682
5683 // Read x18 as i8*
5684 llvm::Value *X18 = readX18AsPtr(CGF&: *this);
5685
5686 // Store val at x18 + offset
5687 Value *Offset = Builder.CreateZExt(V: OffsetArg, DestTy: Int64Ty);
5688 Value *Ptr = Builder.CreateGEP(Ty: Int8Ty, Ptr: X18, IdxList: Offset);
5689 StoreInst *Store =
5690 Builder.CreateAlignedStore(Val: DataArg, Addr: Ptr, Align: CharUnits::One());
5691 return Store;
5692 }
5693
5694 if (BuiltinID == AArch64::BI__readx18byte ||
5695 BuiltinID == AArch64::BI__readx18word ||
5696 BuiltinID == AArch64::BI__readx18dword ||
5697 BuiltinID == AArch64::BI__readx18qword) {
5698 // Process the args first
5699 Value *OffsetArg = EmitScalarExpr(E: E->getArg(Arg: 0));
5700
5701 // Read x18 as i8*
5702 llvm::Value *X18 = readX18AsPtr(CGF&: *this);
5703
5704 // Load x18 + offset
5705 Value *Offset = Builder.CreateZExt(V: OffsetArg, DestTy: Int64Ty);
5706 Value *Ptr = Builder.CreateGEP(Ty: Int8Ty, Ptr: X18, IdxList: Offset);
5707 llvm::Type *IntTy = ConvertType(T: E->getType());
5708 LoadInst *Load = Builder.CreateAlignedLoad(Ty: IntTy, Addr: Ptr, Align: CharUnits::One());
5709 return Load;
5710 }
5711
5712 if (BuiltinID == AArch64::BI__addx18byte ||
5713 BuiltinID == AArch64::BI__addx18word ||
5714 BuiltinID == AArch64::BI__addx18dword ||
5715 BuiltinID == AArch64::BI__addx18qword ||
5716 BuiltinID == AArch64::BI__incx18byte ||
5717 BuiltinID == AArch64::BI__incx18word ||
5718 BuiltinID == AArch64::BI__incx18dword ||
5719 BuiltinID == AArch64::BI__incx18qword) {
5720 llvm::Type *IntTy;
5721 bool isIncrement;
5722 switch (BuiltinID) {
5723 case AArch64::BI__incx18byte:
5724 IntTy = Int8Ty;
5725 isIncrement = true;
5726 break;
5727 case AArch64::BI__incx18word:
5728 IntTy = Int16Ty;
5729 isIncrement = true;
5730 break;
5731 case AArch64::BI__incx18dword:
5732 IntTy = Int32Ty;
5733 isIncrement = true;
5734 break;
5735 case AArch64::BI__incx18qword:
5736 IntTy = Int64Ty;
5737 isIncrement = true;
5738 break;
5739 default:
5740 IntTy = ConvertType(T: E->getArg(Arg: 1)->getType());
5741 isIncrement = false;
5742 break;
5743 }
5744 // Process the args first
5745 Value *OffsetArg = EmitScalarExpr(E: E->getArg(Arg: 0));
5746 Value *ValToAdd =
5747 isIncrement ? ConstantInt::get(Ty: IntTy, V: 1) : EmitScalarExpr(E: E->getArg(Arg: 1));
5748
5749 // Read x18 as i8*
5750 llvm::Value *X18 = readX18AsPtr(CGF&: *this);
5751
5752 // Load x18 + offset
5753 Value *Offset = Builder.CreateZExt(V: OffsetArg, DestTy: Int64Ty);
5754 Value *Ptr = Builder.CreateGEP(Ty: Int8Ty, Ptr: X18, IdxList: Offset);
5755 LoadInst *Load = Builder.CreateAlignedLoad(Ty: IntTy, Addr: Ptr, Align: CharUnits::One());
5756
5757 // Add values
5758 Value *AddResult = Builder.CreateAdd(LHS: Load, RHS: ValToAdd);
5759
5760 // Store val at x18 + offset
5761 StoreInst *Store =
5762 Builder.CreateAlignedStore(Val: AddResult, Addr: Ptr, Align: CharUnits::One());
5763 return Store;
5764 }
5765
5766 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
5767 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
5768 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
5769 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
5770 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5771 llvm::Type *RetTy = ConvertType(T: E->getType());
5772 return Builder.CreateBitCast(V: Arg, DestTy: RetTy);
5773 }
5774
5775 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5776 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5777 BuiltinID == AArch64::BI_CountLeadingZeros ||
5778 BuiltinID == AArch64::BI_CountLeadingZeros64) {
5779 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5780 llvm::Type *ArgType = Arg->getType();
5781
5782 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
5783 BuiltinID == AArch64::BI_CountLeadingOnes64)
5784 Arg = Builder.CreateXor(LHS: Arg, RHS: Constant::getAllOnesValue(Ty: ArgType));
5785
5786 Function *F = CGM.getIntrinsic(IID: Intrinsic::ctlz, Tys: ArgType);
5787 Value *Result = Builder.CreateCall(Callee: F, Args: {Arg, Builder.getInt1(V: false)});
5788
5789 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
5790 BuiltinID == AArch64::BI_CountLeadingZeros64)
5791 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
5792 return Result;
5793 }
5794
5795 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
5796 BuiltinID == AArch64::BI_CountLeadingSigns64) {
5797 Value *Arg = EmitScalarExpr(E: E->getArg(Arg: 0));
5798
5799 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
5800 ? CGM.getIntrinsic(IID: Intrinsic::aarch64_cls)
5801 : CGM.getIntrinsic(IID: Intrinsic::aarch64_cls64);
5802
5803 Value *Result = Builder.CreateCall(Callee: F, Args: Arg, Name: "cls");
5804 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
5805 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
5806 return Result;
5807 }
5808
5809 if (BuiltinID == AArch64::BI_CountOneBits ||
5810 BuiltinID == AArch64::BI_CountOneBits64) {
5811 Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 0));
5812 llvm::Type *ArgType = ArgValue->getType();
5813 Function *F = CGM.getIntrinsic(IID: Intrinsic::ctpop, Tys: ArgType);
5814
5815 Value *Result = Builder.CreateCall(Callee: F, Args: ArgValue);
5816 if (BuiltinID == AArch64::BI_CountOneBits64)
5817 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
5818 return Result;
5819 }
5820
5821 if (BuiltinID == AArch64::BI__prefetch) {
5822 Value *Address = EmitScalarExpr(E: E->getArg(Arg: 0));
5823 Value *RW = llvm::ConstantInt::get(Ty: Int32Ty, V: 0);
5824 Value *Locality = ConstantInt::get(Ty: Int32Ty, V: 3);
5825 Value *Data = llvm::ConstantInt::get(Ty: Int32Ty, V: 1);
5826 Function *F = CGM.getIntrinsic(IID: Intrinsic::prefetch, Tys: Address->getType());
5827 return Builder.CreateCall(Callee: F, Args: {Address, RW, Locality, Data});
5828 }
5829
5830 if (BuiltinID == AArch64::BI__hlt) {
5831 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_hlt);
5832 Builder.CreateCall(Callee: F, Args: {EmitScalarExpr(E: E->getArg(Arg: 0))});
5833
5834 // Return 0 for convenience, even though MSVC returns some other undefined
5835 // value.
5836 return ConstantInt::get(Ty: Builder.getInt32Ty(), V: 0);
5837 }
5838
5839 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
5840 return Builder.CreateFPTrunc(
5841 V: Builder.CreateBitCast(V: EmitScalarExpr(E: E->getArg(Arg: 0)),
5842 DestTy: Builder.getFloatTy()),
5843 DestTy: Builder.getBFloatTy());
5844
5845 // Handle MSVC intrinsics before argument evaluation to prevent double
5846 // evaluation.
5847 if (std::optional<MSVCIntrin> MsvcIntId =
5848 translateAarch64ToMsvcIntrin(BuiltinID))
5849 return EmitMSVCBuiltinExpr(BuiltinID: *MsvcIntId, E);
5850
5851 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
5852 auto It = llvm::find_if(Range: NEONEquivalentIntrinsicMap, P: [BuiltinID](auto &P) {
5853 return P.first == BuiltinID;
5854 });
5855 if (It != end(arr: NEONEquivalentIntrinsicMap))
5856 BuiltinID = It->second;
5857
5858 // Check whether this is an SISD builtin.
5859 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
5860 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
5861 IntrinsicMap: SISDMap, BuiltinID, MapProvenSorted&: AArch64SISDIntrinsicsProvenSorted);
5862 bool IsSISD = (Builtin != nullptr);
5863
5864 // Find out if any arguments are required to be integer constant
5865 // expressions.
5866 unsigned ICEArguments = 0;
5867 ASTContext::GetBuiltinTypeError Error;
5868 getContext().GetBuiltinType(ID: BuiltinID, Error, IntegerConstantArgs: &ICEArguments);
5869 assert(Error == ASTContext::GE_None && "Should not codegen an error");
5870
5871 llvm::SmallVector<Value*, 4> Ops;
5872 Address PtrOp0 = Address::invalid();
5873 // Note the assumption that SISD intrinsics do not contain extra arguments.
5874 // TODO: Fold this into a single function call instead of, effectively, two
5875 // separate checks.
5876 bool HasExtraArg = !IsSISD && HasExtraNeonArgument(BuiltinID);
5877 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
5878 for (unsigned i = 0, e = NumArgs; i != e; i++) {
5879 if (i == 0) {
5880 switch (BuiltinID) {
5881 case NEON::BI__builtin_neon_vld1_v:
5882 case NEON::BI__builtin_neon_vld1q_v:
5883 case NEON::BI__builtin_neon_vld1_dup_v:
5884 case NEON::BI__builtin_neon_vld1q_dup_v:
5885 case NEON::BI__builtin_neon_vld1_lane_v:
5886 case NEON::BI__builtin_neon_vld1q_lane_v:
5887 case NEON::BI__builtin_neon_vst1_v:
5888 case NEON::BI__builtin_neon_vst1q_v:
5889 case NEON::BI__builtin_neon_vst1_lane_v:
5890 case NEON::BI__builtin_neon_vst1q_lane_v:
5891 case NEON::BI__builtin_neon_vldap1_lane_s64:
5892 case NEON::BI__builtin_neon_vldap1q_lane_s64:
5893 case NEON::BI__builtin_neon_vstl1_lane_s64:
5894 case NEON::BI__builtin_neon_vstl1q_lane_s64:
5895 // Get the alignment for the argument in addition to the value;
5896 // we'll use it later.
5897 PtrOp0 = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0));
5898 Ops.push_back(Elt: PtrOp0.emitRawPointer(CGF&: *this));
5899 continue;
5900 }
5901 }
5902 Ops.push_back(Elt: EmitScalarOrConstFoldImmArg(ICEArguments, Idx: i, E));
5903 }
5904
5905 if (Builtin) {
5906 Value *Result = EmitCommonNeonSISDBuiltinExpr(CGF&: *this, SISDInfo: *Builtin, Ops, E);
5907 assert(Result && "SISD intrinsic should have been handled");
5908 return Result;
5909 }
5910
5911 const Expr *Arg = E->getArg(Arg: E->getNumArgs()-1);
5912 NeonTypeFlags Type(0);
5913 if (std::optional<llvm::APSInt> Result =
5914 Arg->getIntegerConstantExpr(Ctx: getContext()))
5915 // Determine the type of this overloaded NEON intrinsic.
5916 Type = NeonTypeFlags(Result->getZExtValue());
5917
5918 bool usgn = Type.isUnsigned();
5919 bool quad = Type.isQuad();
5920 unsigned Int;
5921
5922 // Not all intrinsics handled by the common case work for AArch64 yet, so only
5923 // defer to common code if it's been added to our special map.
5924 Builtin = findARMVectorIntrinsicInMap(IntrinsicMap: AArch64SIMDIntrinsicMap, BuiltinID,
5925 MapProvenSorted&: AArch64SIMDIntrinsicsProvenSorted);
5926
5927 if (Builtin)
5928 return EmitCommonNeonBuiltinExpr(
5929 BuiltinID: Builtin->BuiltinID, LLVMIntrinsic: Builtin->LLVMIntrinsic, AltLLVMIntrinsic: Builtin->AltLLVMIntrinsic,
5930 NameHint: Builtin->NameHint, Modifier: Builtin->TypeModifier, E, Ops,
5931 /*never use addresses*/ PtrOp0: Address::invalid(), PtrOp1: Address::invalid(), Arch);
5932
5933 if (Value *V = EmitAArch64TblBuiltinExpr(CGF&: *this, BuiltinID, E, Ops, Arch))
5934 return V;
5935
5936 // Handle non-overloaded intrinsics first.
5937 switch (BuiltinID) {
5938 default: break;
5939 case NEON::BI__builtin_neon_vabsh_f16:
5940 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::fabs, Tys: HalfTy), Ops, name: "vabs");
5941 case NEON::BI__builtin_neon_vaddq_p128: {
5942 llvm::Type *Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags::Poly128);
5943 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
5944 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
5945 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
5946 Ops[0] = Builder.CreateXor(LHS: Ops[0], RHS: Ops[1]);
5947 llvm::Type *Int128Ty = llvm::Type::getIntNTy(C&: getLLVMContext(), N: 128);
5948 return Builder.CreateBitCast(V: Ops[0], DestTy: Int128Ty);
5949 }
5950 case NEON::BI__builtin_neon_vldrq_p128: {
5951 llvm::Type *Int128Ty = llvm::Type::getIntNTy(C&: getLLVMContext(), N: 128);
5952 Value *Ptr = EmitScalarExpr(E: E->getArg(Arg: 0));
5953 return Builder.CreateAlignedLoad(Ty: Int128Ty, Addr: Ptr,
5954 Align: CharUnits::fromQuantity(Quantity: 16));
5955 }
5956 case NEON::BI__builtin_neon_vstrq_p128: {
5957 Value *Ptr = Ops[0];
5958 return Builder.CreateDefaultAlignedStore(Val: EmitScalarExpr(E: E->getArg(Arg: 1)), Addr: Ptr);
5959 }
5960 case NEON::BI__builtin_neon_vcvts_f32_u32:
5961 case NEON::BI__builtin_neon_vcvtd_f64_u64:
5962 usgn = true;
5963 [[fallthrough]];
5964 case NEON::BI__builtin_neon_vcvts_f32_s32:
5965 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5966 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5967 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5968 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5969 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: InTy);
5970 if (usgn)
5971 return Builder.CreateUIToFP(V: Ops[0], DestTy: FTy);
5972 return Builder.CreateSIToFP(V: Ops[0], DestTy: FTy);
5973 }
5974 case NEON::BI__builtin_neon_vcvth_f16_u16:
5975 case NEON::BI__builtin_neon_vcvth_f16_u32:
5976 case NEON::BI__builtin_neon_vcvth_f16_u64:
5977 usgn = true;
5978 [[fallthrough]];
5979 case NEON::BI__builtin_neon_vcvth_f16_s16:
5980 case NEON::BI__builtin_neon_vcvth_f16_s32:
5981 case NEON::BI__builtin_neon_vcvth_f16_s64: {
5982 llvm::Type *FTy = HalfTy;
5983 llvm::Type *InTy;
5984 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
5985 InTy = Int64Ty;
5986 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
5987 InTy = Int32Ty;
5988 else
5989 InTy = Int16Ty;
5990 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: InTy);
5991 if (usgn)
5992 return Builder.CreateUIToFP(V: Ops[0], DestTy: FTy);
5993 return Builder.CreateSIToFP(V: Ops[0], DestTy: FTy);
5994 }
5995 case NEON::BI__builtin_neon_vcvtah_u16_f16:
5996 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
5997 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
5998 case NEON::BI__builtin_neon_vcvtph_u16_f16:
5999 case NEON::BI__builtin_neon_vcvth_u16_f16:
6000 case NEON::BI__builtin_neon_vcvtah_s16_f16:
6001 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
6002 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
6003 case NEON::BI__builtin_neon_vcvtph_s16_f16:
6004 case NEON::BI__builtin_neon_vcvth_s16_f16: {
6005 llvm::Type *InTy = Int16Ty;
6006 llvm::Type* FTy = HalfTy;
6007 llvm::Type *Tys[2] = {InTy, FTy};
6008 switch (BuiltinID) {
6009 default: llvm_unreachable("missing builtin ID in switch!");
6010 case NEON::BI__builtin_neon_vcvtah_u16_f16:
6011 Int = Intrinsic::aarch64_neon_fcvtau; break;
6012 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
6013 Int = Intrinsic::aarch64_neon_fcvtmu; break;
6014 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
6015 Int = Intrinsic::aarch64_neon_fcvtnu; break;
6016 case NEON::BI__builtin_neon_vcvtph_u16_f16:
6017 Int = Intrinsic::aarch64_neon_fcvtpu; break;
6018 case NEON::BI__builtin_neon_vcvth_u16_f16:
6019 Int = Intrinsic::aarch64_neon_fcvtzu; break;
6020 case NEON::BI__builtin_neon_vcvtah_s16_f16:
6021 Int = Intrinsic::aarch64_neon_fcvtas; break;
6022 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
6023 Int = Intrinsic::aarch64_neon_fcvtms; break;
6024 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
6025 Int = Intrinsic::aarch64_neon_fcvtns; break;
6026 case NEON::BI__builtin_neon_vcvtph_s16_f16:
6027 Int = Intrinsic::aarch64_neon_fcvtps; break;
6028 case NEON::BI__builtin_neon_vcvth_s16_f16:
6029 Int = Intrinsic::aarch64_neon_fcvtzs; break;
6030 }
6031 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "fcvt");
6032 }
6033 case NEON::BI__builtin_neon_vcaleh_f16:
6034 case NEON::BI__builtin_neon_vcalth_f16:
6035 case NEON::BI__builtin_neon_vcageh_f16:
6036 case NEON::BI__builtin_neon_vcagth_f16: {
6037 llvm::Type* InTy = Int32Ty;
6038 llvm::Type* FTy = HalfTy;
6039 llvm::Type *Tys[2] = {InTy, FTy};
6040 switch (BuiltinID) {
6041 default: llvm_unreachable("missing builtin ID in switch!");
6042 case NEON::BI__builtin_neon_vcageh_f16:
6043 Int = Intrinsic::aarch64_neon_facge; break;
6044 case NEON::BI__builtin_neon_vcagth_f16:
6045 Int = Intrinsic::aarch64_neon_facgt; break;
6046 case NEON::BI__builtin_neon_vcaleh_f16:
6047 Int = Intrinsic::aarch64_neon_facge; std::swap(a&: Ops[0], b&: Ops[1]); break;
6048 case NEON::BI__builtin_neon_vcalth_f16:
6049 Int = Intrinsic::aarch64_neon_facgt; std::swap(a&: Ops[0], b&: Ops[1]); break;
6050 }
6051 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "facg");
6052 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
6053 }
6054 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
6055 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
6056 llvm::Type* InTy = Int32Ty;
6057 llvm::Type* FTy = HalfTy;
6058 llvm::Type *Tys[2] = {InTy, FTy};
6059 switch (BuiltinID) {
6060 default: llvm_unreachable("missing builtin ID in switch!");
6061 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
6062 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
6063 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
6064 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
6065 }
6066 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "fcvth_n");
6067 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
6068 }
6069 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
6070 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
6071 llvm::Type* FTy = HalfTy;
6072 llvm::Type* InTy = Int32Ty;
6073 llvm::Type *Tys[2] = {FTy, InTy};
6074 switch (BuiltinID) {
6075 default: llvm_unreachable("missing builtin ID in switch!");
6076 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
6077 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
6078 Ops[0] = Builder.CreateSExt(V: Ops[0], DestTy: InTy, Name: "sext");
6079 break;
6080 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
6081 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
6082 Ops[0] = Builder.CreateZExt(V: Ops[0], DestTy: InTy);
6083 break;
6084 }
6085 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "fcvth_n");
6086 }
6087 case NEON::BI__builtin_neon_vpaddd_s64: {
6088 // TODO: Isn't this handled by
6089 // EmitCommonNeonSISDBuiltinExpr?
6090 auto *Ty = llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2);
6091 // The vector is v2f64, so make sure it's bitcast to that.
6092 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty, Name: "v2i64");
6093 llvm::Value *Idx0 = llvm::ConstantInt::get(Ty: SizeTy, V: 0);
6094 llvm::Value *Idx1 = llvm::ConstantInt::get(Ty: SizeTy, V: 1);
6095 Value *Op0 = Builder.CreateExtractElement(Vec: Ops[0], Idx: Idx0, Name: "lane0");
6096 Value *Op1 = Builder.CreateExtractElement(Vec: Ops[0], Idx: Idx1, Name: "lane1");
6097 // Pairwise addition of a v2f64 into a scalar f64.
6098 return Builder.CreateAdd(LHS: Op0, RHS: Op1, Name: "vpaddd");
6099 }
6100 case NEON::BI__builtin_neon_vpaddd_f64: {
6101 auto *Ty = llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 2);
6102 // The vector is v2f64, so make sure it's bitcast to that.
6103 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty, Name: "v2f64");
6104 llvm::Value *Idx0 = llvm::ConstantInt::get(Ty: SizeTy, V: 0);
6105 llvm::Value *Idx1 = llvm::ConstantInt::get(Ty: SizeTy, V: 1);
6106 Value *Op0 = Builder.CreateExtractElement(Vec: Ops[0], Idx: Idx0, Name: "lane0");
6107 Value *Op1 = Builder.CreateExtractElement(Vec: Ops[0], Idx: Idx1, Name: "lane1");
6108 // Pairwise addition of a v2f64 into a scalar f64.
6109 return Builder.CreateFAdd(L: Op0, R: Op1, Name: "vpaddd");
6110 }
6111 case NEON::BI__builtin_neon_vpadds_f32: {
6112 auto *Ty = llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 2);
6113 // The vector is v2f32, so make sure it's bitcast to that.
6114 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty, Name: "v2f32");
6115 llvm::Value *Idx0 = llvm::ConstantInt::get(Ty: SizeTy, V: 0);
6116 llvm::Value *Idx1 = llvm::ConstantInt::get(Ty: SizeTy, V: 1);
6117 Value *Op0 = Builder.CreateExtractElement(Vec: Ops[0], Idx: Idx0, Name: "lane0");
6118 Value *Op1 = Builder.CreateExtractElement(Vec: Ops[0], Idx: Idx1, Name: "lane1");
6119 // Pairwise addition of a v2f32 into a scalar f32.
6120 return Builder.CreateFAdd(L: Op0, R: Op1, Name: "vpaddd");
6121 }
6122 case NEON::BI__builtin_neon_vceqzd_s64:
6123 return EmitAArch64CompareBuiltinExpr(
6124 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6125 Pred: ICmpInst::ICMP_EQ, Name: "vceqz");
6126 case NEON::BI__builtin_neon_vceqzd_f64:
6127 case NEON::BI__builtin_neon_vceqzs_f32:
6128 case NEON::BI__builtin_neon_vceqzh_f16:
6129 return EmitAArch64CompareBuiltinExpr(
6130 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6131 Pred: ICmpInst::FCMP_OEQ, Name: "vceqz");
6132 case NEON::BI__builtin_neon_vcgezd_s64:
6133 return EmitAArch64CompareBuiltinExpr(
6134 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6135 Pred: ICmpInst::ICMP_SGE, Name: "vcgez");
6136 case NEON::BI__builtin_neon_vcgezd_f64:
6137 case NEON::BI__builtin_neon_vcgezs_f32:
6138 case NEON::BI__builtin_neon_vcgezh_f16:
6139 return EmitAArch64CompareBuiltinExpr(
6140 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6141 Pred: ICmpInst::FCMP_OGE, Name: "vcgez");
6142 case NEON::BI__builtin_neon_vclezd_s64:
6143 return EmitAArch64CompareBuiltinExpr(
6144 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6145 Pred: ICmpInst::ICMP_SLE, Name: "vclez");
6146 case NEON::BI__builtin_neon_vclezd_f64:
6147 case NEON::BI__builtin_neon_vclezs_f32:
6148 case NEON::BI__builtin_neon_vclezh_f16:
6149 return EmitAArch64CompareBuiltinExpr(
6150 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6151 Pred: ICmpInst::FCMP_OLE, Name: "vclez");
6152 case NEON::BI__builtin_neon_vcgtzd_s64:
6153 return EmitAArch64CompareBuiltinExpr(
6154 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6155 Pred: ICmpInst::ICMP_SGT, Name: "vcgtz");
6156 case NEON::BI__builtin_neon_vcgtzd_f64:
6157 case NEON::BI__builtin_neon_vcgtzs_f32:
6158 case NEON::BI__builtin_neon_vcgtzh_f16:
6159 return EmitAArch64CompareBuiltinExpr(
6160 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6161 Pred: ICmpInst::FCMP_OGT, Name: "vcgtz");
6162 case NEON::BI__builtin_neon_vcltzd_s64:
6163 return EmitAArch64CompareBuiltinExpr(
6164 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6165 Pred: ICmpInst::ICMP_SLT, Name: "vcltz");
6166
6167 case NEON::BI__builtin_neon_vcltzd_f64:
6168 case NEON::BI__builtin_neon_vcltzs_f32:
6169 case NEON::BI__builtin_neon_vcltzh_f16:
6170 return EmitAArch64CompareBuiltinExpr(
6171 Op: Ops[0], Ty: ConvertType(T: E->getCallReturnType(Ctx: getContext())),
6172 Pred: ICmpInst::FCMP_OLT, Name: "vcltz");
6173
6174 case NEON::BI__builtin_neon_vceqzd_u64: {
6175 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Int64Ty);
6176 Ops[0] =
6177 Builder.CreateICmpEQ(LHS: Ops[0], RHS: llvm::Constant::getNullValue(Ty: Int64Ty));
6178 return Builder.CreateSExt(V: Ops[0], DestTy: Int64Ty, Name: "vceqzd");
6179 }
6180 case NEON::BI__builtin_neon_vceqd_f64:
6181 case NEON::BI__builtin_neon_vcled_f64:
6182 case NEON::BI__builtin_neon_vcltd_f64:
6183 case NEON::BI__builtin_neon_vcged_f64:
6184 case NEON::BI__builtin_neon_vcgtd_f64: {
6185 llvm::CmpInst::Predicate P;
6186 switch (BuiltinID) {
6187 default: llvm_unreachable("missing builtin ID in switch!");
6188 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
6189 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
6190 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
6191 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
6192 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
6193 }
6194 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
6195 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: DoubleTy);
6196 if (P == llvm::FCmpInst::FCMP_OEQ)
6197 Ops[0] = Builder.CreateFCmp(P, LHS: Ops[0], RHS: Ops[1]);
6198 else
6199 Ops[0] = Builder.CreateFCmpS(P, LHS: Ops[0], RHS: Ops[1]);
6200 return Builder.CreateSExt(V: Ops[0], DestTy: Int64Ty, Name: "vcmpd");
6201 }
6202 case NEON::BI__builtin_neon_vceqs_f32:
6203 case NEON::BI__builtin_neon_vcles_f32:
6204 case NEON::BI__builtin_neon_vclts_f32:
6205 case NEON::BI__builtin_neon_vcges_f32:
6206 case NEON::BI__builtin_neon_vcgts_f32: {
6207 llvm::CmpInst::Predicate P;
6208 switch (BuiltinID) {
6209 default: llvm_unreachable("missing builtin ID in switch!");
6210 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
6211 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
6212 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
6213 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
6214 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
6215 }
6216 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6217 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: FloatTy);
6218 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: FloatTy);
6219 if (P == llvm::FCmpInst::FCMP_OEQ)
6220 Ops[0] = Builder.CreateFCmp(P, LHS: Ops[0], RHS: Ops[1]);
6221 else
6222 Ops[0] = Builder.CreateFCmpS(P, LHS: Ops[0], RHS: Ops[1]);
6223 return Builder.CreateSExt(V: Ops[0], DestTy: Int32Ty, Name: "vcmpd");
6224 }
6225 case NEON::BI__builtin_neon_vceqh_f16:
6226 case NEON::BI__builtin_neon_vcleh_f16:
6227 case NEON::BI__builtin_neon_vclth_f16:
6228 case NEON::BI__builtin_neon_vcgeh_f16:
6229 case NEON::BI__builtin_neon_vcgth_f16: {
6230 llvm::CmpInst::Predicate P;
6231 switch (BuiltinID) {
6232 default: llvm_unreachable("missing builtin ID in switch!");
6233 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
6234 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
6235 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
6236 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
6237 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
6238 }
6239 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6240 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: HalfTy);
6241 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: HalfTy);
6242 if (P == llvm::FCmpInst::FCMP_OEQ)
6243 Ops[0] = Builder.CreateFCmp(P, LHS: Ops[0], RHS: Ops[1]);
6244 else
6245 Ops[0] = Builder.CreateFCmpS(P, LHS: Ops[0], RHS: Ops[1]);
6246 return Builder.CreateSExt(V: Ops[0], DestTy: Int16Ty, Name: "vcmpd");
6247 }
6248 case NEON::BI__builtin_neon_vceqd_s64:
6249 case NEON::BI__builtin_neon_vceqd_u64:
6250 case NEON::BI__builtin_neon_vcgtd_s64:
6251 case NEON::BI__builtin_neon_vcgtd_u64:
6252 case NEON::BI__builtin_neon_vcltd_s64:
6253 case NEON::BI__builtin_neon_vcltd_u64:
6254 case NEON::BI__builtin_neon_vcged_u64:
6255 case NEON::BI__builtin_neon_vcged_s64:
6256 case NEON::BI__builtin_neon_vcled_u64:
6257 case NEON::BI__builtin_neon_vcled_s64: {
6258 llvm::CmpInst::Predicate P;
6259 switch (BuiltinID) {
6260 default: llvm_unreachable("missing builtin ID in switch!");
6261 case NEON::BI__builtin_neon_vceqd_s64:
6262 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
6263 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
6264 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
6265 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
6266 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
6267 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
6268 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
6269 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
6270 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
6271 }
6272 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6273 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Int64Ty);
6274 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty);
6275 Ops[0] = Builder.CreateICmp(P, LHS: Ops[0], RHS: Ops[1]);
6276 return Builder.CreateSExt(V: Ops[0], DestTy: Int64Ty, Name: "vceqd");
6277 }
6278 case NEON::BI__builtin_neon_vnegd_s64:
6279 return Builder.CreateNeg(V: EmitScalarExpr(E: E->getArg(Arg: 0)), Name: "vnegd");
6280 case NEON::BI__builtin_neon_vnegh_f16:
6281 return Builder.CreateFNeg(V: EmitScalarExpr(E: E->getArg(Arg: 0)), Name: "vnegh");
6282 case NEON::BI__builtin_neon_vtstd_s64:
6283 case NEON::BI__builtin_neon_vtstd_u64: {
6284 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6285 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Int64Ty);
6286 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty);
6287 Ops[0] = Builder.CreateAnd(LHS: Ops[0], RHS: Ops[1]);
6288 Ops[0] = Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: Ops[0],
6289 RHS: llvm::Constant::getNullValue(Ty: Int64Ty));
6290 return Builder.CreateSExt(V: Ops[0], DestTy: Int64Ty, Name: "vtstd");
6291 }
6292 case NEON::BI__builtin_neon_vset_lane_i8:
6293 case NEON::BI__builtin_neon_vset_lane_i16:
6294 case NEON::BI__builtin_neon_vset_lane_i32:
6295 case NEON::BI__builtin_neon_vset_lane_i64:
6296 case NEON::BI__builtin_neon_vset_lane_bf16:
6297 case NEON::BI__builtin_neon_vset_lane_f32:
6298 case NEON::BI__builtin_neon_vsetq_lane_i8:
6299 case NEON::BI__builtin_neon_vsetq_lane_i16:
6300 case NEON::BI__builtin_neon_vsetq_lane_i32:
6301 case NEON::BI__builtin_neon_vsetq_lane_i64:
6302 case NEON::BI__builtin_neon_vsetq_lane_bf16:
6303 case NEON::BI__builtin_neon_vsetq_lane_f32:
6304 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
6305 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
6306 case NEON::BI__builtin_neon_vset_lane_f64:
6307 // The vector type needs a cast for the v1f64 variant.
6308 Ops[1] =
6309 Builder.CreateBitCast(V: Ops[1], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 1));
6310 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
6311 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
6312 case NEON::BI__builtin_neon_vset_lane_mf8:
6313 case NEON::BI__builtin_neon_vsetq_lane_mf8:
6314 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
6315 // The input vector type needs a cast to scalar type.
6316 Ops[0] =
6317 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::Type::getInt8Ty(C&: getLLVMContext()));
6318 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
6319 case NEON::BI__builtin_neon_vsetq_lane_f64:
6320 // The vector type needs a cast for the v2f64 variant.
6321 Ops[1] =
6322 Builder.CreateBitCast(V: Ops[1], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 2));
6323 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
6324 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vset_lane");
6325
6326 case NEON::BI__builtin_neon_vget_lane_i8:
6327 case NEON::BI__builtin_neon_vdupb_lane_i8:
6328 Ops[0] =
6329 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8));
6330 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6331 Name: "vget_lane");
6332 case NEON::BI__builtin_neon_vgetq_lane_i8:
6333 case NEON::BI__builtin_neon_vdupb_laneq_i8:
6334 Ops[0] =
6335 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16));
6336 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6337 Name: "vgetq_lane");
6338 case NEON::BI__builtin_neon_vget_lane_mf8:
6339 case NEON::BI__builtin_neon_vdupb_lane_mf8:
6340 case NEON::BI__builtin_neon_vgetq_lane_mf8:
6341 case NEON::BI__builtin_neon_vdupb_laneq_mf8:
6342 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6343 Name: "vget_lane");
6344 case NEON::BI__builtin_neon_vget_lane_i16:
6345 case NEON::BI__builtin_neon_vduph_lane_i16:
6346 Ops[0] =
6347 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4));
6348 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6349 Name: "vget_lane");
6350 case NEON::BI__builtin_neon_vgetq_lane_i16:
6351 case NEON::BI__builtin_neon_vduph_laneq_i16:
6352 Ops[0] =
6353 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8));
6354 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6355 Name: "vgetq_lane");
6356 case NEON::BI__builtin_neon_vget_lane_i32:
6357 case NEON::BI__builtin_neon_vdups_lane_i32:
6358 Ops[0] =
6359 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 2));
6360 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6361 Name: "vget_lane");
6362 case NEON::BI__builtin_neon_vdups_lane_f32:
6363 Ops[0] =
6364 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 2));
6365 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6366 Name: "vdups_lane");
6367 case NEON::BI__builtin_neon_vgetq_lane_i32:
6368 case NEON::BI__builtin_neon_vdups_laneq_i32:
6369 Ops[0] =
6370 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4));
6371 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6372 Name: "vgetq_lane");
6373 case NEON::BI__builtin_neon_vget_lane_i64:
6374 case NEON::BI__builtin_neon_vdupd_lane_i64:
6375 Ops[0] =
6376 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 1));
6377 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6378 Name: "vget_lane");
6379 case NEON::BI__builtin_neon_vdupd_lane_f64:
6380 Ops[0] =
6381 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 1));
6382 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6383 Name: "vdupd_lane");
6384 case NEON::BI__builtin_neon_vgetq_lane_i64:
6385 case NEON::BI__builtin_neon_vdupd_laneq_i64:
6386 Ops[0] =
6387 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2));
6388 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6389 Name: "vgetq_lane");
6390 case NEON::BI__builtin_neon_vget_lane_f32:
6391 Ops[0] =
6392 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 2));
6393 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6394 Name: "vget_lane");
6395 case NEON::BI__builtin_neon_vget_lane_f64:
6396 Ops[0] =
6397 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 1));
6398 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6399 Name: "vget_lane");
6400 case NEON::BI__builtin_neon_vgetq_lane_f32:
6401 case NEON::BI__builtin_neon_vdups_laneq_f32:
6402 Ops[0] =
6403 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4));
6404 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6405 Name: "vgetq_lane");
6406 case NEON::BI__builtin_neon_vgetq_lane_f64:
6407 case NEON::BI__builtin_neon_vdupd_laneq_f64:
6408 Ops[0] =
6409 Builder.CreateBitCast(V: Ops[0], DestTy: llvm::FixedVectorType::get(ElementType: DoubleTy, NumElts: 2));
6410 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6411 Name: "vgetq_lane");
6412 case NEON::BI__builtin_neon_vaddh_f16:
6413 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6414 return Builder.CreateFAdd(L: Ops[0], R: Ops[1], Name: "vaddh");
6415 case NEON::BI__builtin_neon_vsubh_f16:
6416 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6417 return Builder.CreateFSub(L: Ops[0], R: Ops[1], Name: "vsubh");
6418 case NEON::BI__builtin_neon_vmulh_f16:
6419 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6420 return Builder.CreateFMul(L: Ops[0], R: Ops[1], Name: "vmulh");
6421 case NEON::BI__builtin_neon_vdivh_f16:
6422 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1)));
6423 return Builder.CreateFDiv(L: Ops[0], R: Ops[1], Name: "vdivh");
6424 case NEON::BI__builtin_neon_vfmah_f16:
6425 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6426 return emitCallMaybeConstrainedFPBuiltin(
6427 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty: HalfTy,
6428 Args: {EmitScalarExpr(E: E->getArg(Arg: 1)), EmitScalarExpr(E: E->getArg(Arg: 2)), Ops[0]});
6429 case NEON::BI__builtin_neon_vfmsh_f16: {
6430 Value* Neg = Builder.CreateFNeg(V: EmitScalarExpr(E: E->getArg(Arg: 1)), Name: "vsubh");
6431
6432 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6433 return emitCallMaybeConstrainedFPBuiltin(
6434 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty: HalfTy,
6435 Args: {Neg, EmitScalarExpr(E: E->getArg(Arg: 2)), Ops[0]});
6436 }
6437 case NEON::BI__builtin_neon_vaddd_s64:
6438 case NEON::BI__builtin_neon_vaddd_u64:
6439 return Builder.CreateAdd(LHS: Ops[0], RHS: EmitScalarExpr(E: E->getArg(Arg: 1)), Name: "vaddd");
6440 case NEON::BI__builtin_neon_vsubd_s64:
6441 case NEON::BI__builtin_neon_vsubd_u64:
6442 return Builder.CreateSub(LHS: Ops[0], RHS: EmitScalarExpr(E: E->getArg(Arg: 1)), Name: "vsubd");
6443 case NEON::BI__builtin_neon_vqdmlalh_s16:
6444 case NEON::BI__builtin_neon_vqdmlslh_s16: {
6445 SmallVector<Value *, 2> ProductOps;
6446 ProductOps.push_back(Elt: vectorWrapScalar16(Op: Ops[1]));
6447 ProductOps.push_back(Elt: vectorWrapScalar16(Op: Ops[2]));
6448 auto *VTy = llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4);
6449 Ops[1] = EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_sqdmull, Tys: VTy),
6450 Ops&: ProductOps, name: "vqdmlXl");
6451 Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
6452 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: CI, Name: "lane0");
6453
6454 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
6455 ? Intrinsic::aarch64_neon_sqadd
6456 : Intrinsic::aarch64_neon_sqsub;
6457 // Drop the 2nd multiplication argument before the accumulation
6458 Ops.pop_back();
6459 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccumInt, Tys: Int32Ty), Ops, name: "vqdmlXl");
6460 }
6461 case NEON::BI__builtin_neon_vqshlud_n_s64: {
6462 Ops[1] = Builder.CreateZExt(V: Ops[1], DestTy: Int64Ty);
6463 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_sqshlu, Tys: Int64Ty),
6464 Ops, name: "vqshlu_n");
6465 }
6466 case NEON::BI__builtin_neon_vqshld_n_u64:
6467 case NEON::BI__builtin_neon_vqshld_n_s64: {
6468 Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
6469 ? Intrinsic::aarch64_neon_uqshl
6470 : Intrinsic::aarch64_neon_sqshl;
6471 Ops[1] = Builder.CreateZExt(V: Ops[1], DestTy: Int64Ty);
6472 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Int64Ty), Ops, name: "vqshl_n");
6473 }
6474 case NEON::BI__builtin_neon_vrshrd_n_u64:
6475 case NEON::BI__builtin_neon_vrshrd_n_s64: {
6476 Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
6477 ? Intrinsic::aarch64_neon_urshl
6478 : Intrinsic::aarch64_neon_srshl;
6479 int SV = cast<ConstantInt>(Val: Ops[1])->getSExtValue();
6480 Ops[1] = ConstantInt::get(Ty: Int64Ty, V: -SV);
6481 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Int64Ty), Ops, name: "vrshr_n");
6482 }
6483 case NEON::BI__builtin_neon_vrsrad_n_u64:
6484 case NEON::BI__builtin_neon_vrsrad_n_s64: {
6485 Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
6486 ? Intrinsic::aarch64_neon_urshl
6487 : Intrinsic::aarch64_neon_srshl;
6488 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty);
6489 Ops[2] = Builder.CreateNeg(V: Ops[2]);
6490 Ops[1] = Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Int, Tys: Int64Ty),
6491 Args: {Ops[1], Builder.CreateSExt(V: Ops[2], DestTy: Int64Ty)});
6492 return Builder.CreateAdd(LHS: Ops[0], RHS: Builder.CreateBitCast(V: Ops[1], DestTy: Int64Ty));
6493 }
6494 case NEON::BI__builtin_neon_vshld_n_s64:
6495 case NEON::BI__builtin_neon_vshld_n_u64: {
6496 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
6497 return Builder.CreateShl(
6498 LHS: Ops[0], RHS: ConstantInt::get(Ty: Int64Ty, V: Amt->getZExtValue()), Name: "shld_n");
6499 }
6500 case NEON::BI__builtin_neon_vshrd_n_s64: {
6501 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
6502 return Builder.CreateAShr(
6503 LHS: Ops[0], RHS: ConstantInt::get(Ty: Int64Ty, V: std::min(a: static_cast<uint64_t>(63),
6504 b: Amt->getZExtValue())),
6505 Name: "shrd_n");
6506 }
6507 case NEON::BI__builtin_neon_vshrd_n_u64: {
6508 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
6509 uint64_t ShiftAmt = Amt->getZExtValue();
6510 // Right-shifting an unsigned value by its size yields 0.
6511 if (ShiftAmt == 64)
6512 return ConstantInt::get(Ty: Int64Ty, V: 0);
6513 return Builder.CreateLShr(LHS: Ops[0], RHS: ConstantInt::get(Ty: Int64Ty, V: ShiftAmt),
6514 Name: "shrd_n");
6515 }
6516 case NEON::BI__builtin_neon_vsrad_n_s64: {
6517 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 2)));
6518 Ops[1] = Builder.CreateAShr(
6519 LHS: Ops[1], RHS: ConstantInt::get(Ty: Int64Ty, V: std::min(a: static_cast<uint64_t>(63),
6520 b: Amt->getZExtValue())),
6521 Name: "shrd_n");
6522 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
6523 }
6524 case NEON::BI__builtin_neon_vsrad_n_u64: {
6525 llvm::ConstantInt *Amt = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 2)));
6526 uint64_t ShiftAmt = Amt->getZExtValue();
6527 // Right-shifting an unsigned value by its size yields 0.
6528 // As Op + 0 = Op, return Ops[0] directly.
6529 if (ShiftAmt == 64)
6530 return Ops[0];
6531 Ops[1] = Builder.CreateLShr(LHS: Ops[1], RHS: ConstantInt::get(Ty: Int64Ty, V: ShiftAmt),
6532 Name: "shrd_n");
6533 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
6534 }
6535 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6536 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6537 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6538 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6539 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: Ops[3], Name: "lane");
6540 SmallVector<Value *, 2> ProductOps;
6541 ProductOps.push_back(Elt: vectorWrapScalar16(Op: Ops[1]));
6542 ProductOps.push_back(Elt: vectorWrapScalar16(Op: Ops[2]));
6543 auto *VTy = llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4);
6544 Ops[1] = EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_sqdmull, Tys: VTy),
6545 Ops&: ProductOps, name: "vqdmlXl");
6546 Constant *CI = ConstantInt::get(Ty: SizeTy, V: 0);
6547 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: CI, Name: "lane0");
6548 // Drop lane-selection and the corresponding vector argument (these have
6549 // already been used)
6550 Ops.pop_back_n(NumItems: 2);
6551
6552 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6553 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6554 ? Intrinsic::aarch64_neon_sqadd
6555 : Intrinsic::aarch64_neon_sqsub;
6556 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccInt, Tys: Int32Ty), Ops, name: "vqdmlXl");
6557 }
6558 case NEON::BI__builtin_neon_vqdmlals_s32:
6559 case NEON::BI__builtin_neon_vqdmlsls_s32: {
6560 SmallVector<Value *, 2> ProductOps;
6561 ProductOps.push_back(Elt: Ops[1]);
6562 ProductOps.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2)));
6563 Ops[1] =
6564 EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_sqdmulls_scalar),
6565 Ops&: ProductOps, name: "vqdmlXl");
6566
6567 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6568 ? Intrinsic::aarch64_neon_sqadd
6569 : Intrinsic::aarch64_neon_sqsub;
6570 // Drop the 2nd multiplication argument before the accumulation
6571 Ops.pop_back();
6572 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccumInt, Tys: Int64Ty), Ops, name: "vqdmlXl");
6573 }
6574 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6575 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6576 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6577 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6578 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: Ops[3], Name: "lane");
6579 SmallVector<Value *, 2> ProductOps;
6580 ProductOps.push_back(Elt: Ops[1]);
6581 ProductOps.push_back(Elt: Ops[2]);
6582 Ops[1] =
6583 EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_sqdmulls_scalar),
6584 Ops&: ProductOps, name: "vqdmlXl");
6585 // Drop lane-selection and the corresponding vector argument (these have
6586 // already been used)
6587 Ops.pop_back_n(NumItems: 2);
6588
6589 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6590 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6591 ? Intrinsic::aarch64_neon_sqadd
6592 : Intrinsic::aarch64_neon_sqsub;
6593 return EmitNeonCall(F: CGM.getIntrinsic(IID: AccInt, Tys: Int64Ty), Ops, name: "vqdmlXl");
6594 }
6595 case NEON::BI__builtin_neon_vget_lane_bf16:
6596 case NEON::BI__builtin_neon_vduph_lane_bf16:
6597 case NEON::BI__builtin_neon_vduph_lane_f16: {
6598 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6599 Name: "vget_lane");
6600 }
6601 case NEON::BI__builtin_neon_vgetq_lane_bf16:
6602 case NEON::BI__builtin_neon_vduph_laneq_bf16:
6603 case NEON::BI__builtin_neon_vduph_laneq_f16: {
6604 return Builder.CreateExtractElement(Vec: Ops[0], Idx: EmitScalarExpr(E: E->getArg(Arg: 1)),
6605 Name: "vgetq_lane");
6606 }
6607 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
6608 llvm::Type *V4F32 = FixedVectorType::get(ElementType: Builder.getFloatTy(), NumElts: 4);
6609 llvm::Type *V4BF16 = FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 4);
6610 return Builder.CreateFPTrunc(V: Builder.CreateBitCast(V: Ops[0], DestTy: V4F32), DestTy: V4BF16);
6611 }
6612 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
6613 SmallVector<int, 16> ConcatMask(8);
6614 std::iota(first: ConcatMask.begin(), last: ConcatMask.end(), value: 0);
6615 llvm::Type *V4F32 = FixedVectorType::get(ElementType: Builder.getFloatTy(), NumElts: 4);
6616 llvm::Type *V4BF16 = FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 4);
6617 llvm::Value *Trunc =
6618 Builder.CreateFPTrunc(V: Builder.CreateBitCast(V: Ops[0], DestTy: V4F32), DestTy: V4BF16);
6619 return Builder.CreateShuffleVector(
6620 V1: Trunc, V2: ConstantAggregateZero::get(Ty: V4BF16), Mask: ConcatMask);
6621 }
6622 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
6623 SmallVector<int, 16> ConcatMask(8);
6624 std::iota(first: ConcatMask.begin(), last: ConcatMask.end(), value: 0);
6625 SmallVector<int, 16> LoMask(4);
6626 std::iota(first: LoMask.begin(), last: LoMask.end(), value: 0);
6627 llvm::Type *V4F32 = FixedVectorType::get(ElementType: Builder.getFloatTy(), NumElts: 4);
6628 llvm::Type *V4BF16 = FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 4);
6629 llvm::Type *V8BF16 = FixedVectorType::get(ElementType: Builder.getBFloatTy(), NumElts: 8);
6630 llvm::Value *Inactive = Builder.CreateShuffleVector(
6631 V: Builder.CreateBitCast(V: Ops[0], DestTy: V8BF16), Mask: LoMask);
6632 llvm::Value *Trunc =
6633 Builder.CreateFPTrunc(V: Builder.CreateBitCast(V: Ops[1], DestTy: V4F32), DestTy: V4BF16);
6634 return Builder.CreateShuffleVector(V1: Inactive, V2: Trunc, Mask: ConcatMask);
6635 }
6636
6637 case clang::AArch64::BI_InterlockedAdd:
6638 case clang::AArch64::BI_InterlockedAdd_acq:
6639 case clang::AArch64::BI_InterlockedAdd_rel:
6640 case clang::AArch64::BI_InterlockedAdd_nf:
6641 case clang::AArch64::BI_InterlockedAdd64:
6642 case clang::AArch64::BI_InterlockedAdd64_acq:
6643 case clang::AArch64::BI_InterlockedAdd64_rel:
6644 case clang::AArch64::BI_InterlockedAdd64_nf: {
6645 Address DestAddr = CheckAtomicAlignment(CGF&: *this, E);
6646 Value *Val = Ops[1];
6647 llvm::AtomicOrdering Ordering;
6648 switch (BuiltinID) {
6649 case clang::AArch64::BI_InterlockedAdd:
6650 case clang::AArch64::BI_InterlockedAdd64:
6651 Ordering = llvm::AtomicOrdering::SequentiallyConsistent;
6652 break;
6653 case clang::AArch64::BI_InterlockedAdd_acq:
6654 case clang::AArch64::BI_InterlockedAdd64_acq:
6655 Ordering = llvm::AtomicOrdering::Acquire;
6656 break;
6657 case clang::AArch64::BI_InterlockedAdd_rel:
6658 case clang::AArch64::BI_InterlockedAdd64_rel:
6659 Ordering = llvm::AtomicOrdering::Release;
6660 break;
6661 case clang::AArch64::BI_InterlockedAdd_nf:
6662 case clang::AArch64::BI_InterlockedAdd64_nf:
6663 Ordering = llvm::AtomicOrdering::Monotonic;
6664 break;
6665 default:
6666 llvm_unreachable("missing builtin ID in switch!");
6667 }
6668 AtomicRMWInst *RMWI =
6669 Builder.CreateAtomicRMW(Op: AtomicRMWInst::Add, Addr: DestAddr, Val, Ordering);
6670 return Builder.CreateAdd(LHS: RMWI, RHS: Val);
6671 }
6672 }
6673
6674 llvm::FixedVectorType *VTy = GetNeonType(CGF: this, TypeFlags: Type);
6675 llvm::Type *Ty = VTy;
6676 if (!Ty)
6677 return nullptr;
6678
6679 bool ExtractLow = false;
6680 bool ExtendLaneArg = false;
6681 switch (BuiltinID) {
6682 default: return nullptr;
6683 case NEON::BI__builtin_neon_vbsl_v:
6684 case NEON::BI__builtin_neon_vbslq_v: {
6685 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6686 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: BitTy, Name: "vbsl");
6687 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: BitTy, Name: "vbsl");
6688 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: BitTy, Name: "vbsl");
6689
6690 Ops[1] = Builder.CreateAnd(LHS: Ops[0], RHS: Ops[1], Name: "vbsl");
6691 Ops[2] = Builder.CreateAnd(LHS: Builder.CreateNot(V: Ops[0]), RHS: Ops[2], Name: "vbsl");
6692 Ops[0] = Builder.CreateOr(LHS: Ops[1], RHS: Ops[2], Name: "vbsl");
6693 return Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
6694 }
6695 case NEON::BI__builtin_neon_vfma_lane_v:
6696 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6697 // The ARM builtins (and instructions) have the addend as the first
6698 // operand, but the 'fma' intrinsics have it last. Swap it around here.
6699 Value *Addend = Ops[0];
6700 Value *Multiplicand = Ops[1];
6701 Value *LaneSource = Ops[2];
6702 Ops[0] = Multiplicand;
6703 Ops[1] = LaneSource;
6704 Ops[2] = Addend;
6705
6706 // Now adjust things to handle the lane access.
6707 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
6708 ? llvm::FixedVectorType::get(ElementType: VTy->getElementType(),
6709 NumElts: VTy->getNumElements() / 2)
6710 : VTy;
6711 llvm::Constant *cst = cast<Constant>(Val: Ops[3]);
6712 Value *SV = llvm::ConstantVector::getSplat(EC: VTy->getElementCount(), Elt: cst);
6713 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: SourceTy);
6714 Ops[1] = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[1], Mask: SV, Name: "lane");
6715
6716 Ops.pop_back();
6717 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
6718 : Intrinsic::fma;
6719 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "fmla");
6720 }
6721 case NEON::BI__builtin_neon_vfma_laneq_v: {
6722 auto *VTy = cast<llvm::FixedVectorType>(Val: Ty);
6723 // v1f64 fma should be mapped to Neon scalar f64 fma
6724 if (VTy && VTy->getElementType() == DoubleTy) {
6725 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
6726 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: DoubleTy);
6727 llvm::FixedVectorType *VTy =
6728 GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float64, false, true));
6729 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: VTy);
6730 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: Ops[3], Name: "extract");
6731 Value *Result;
6732 Result = emitCallMaybeConstrainedFPBuiltin(
6733 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma,
6734 Ty: DoubleTy, Args: {Ops[1], Ops[2], Ops[0]});
6735 return Builder.CreateBitCast(V: Result, DestTy: Ty);
6736 }
6737 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
6738 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
6739
6740 auto *STy = llvm::FixedVectorType::get(ElementType: VTy->getElementType(),
6741 NumElts: VTy->getNumElements() * 2);
6742 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: STy);
6743 Value *SV = llvm::ConstantVector::getSplat(EC: VTy->getElementCount(),
6744 Elt: cast<ConstantInt>(Val: Ops[3]));
6745 Ops[2] = Builder.CreateShuffleVector(V1: Ops[2], V2: Ops[2], Mask: SV, Name: "lane");
6746
6747 return emitCallMaybeConstrainedFPBuiltin(
6748 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty,
6749 Args: {Ops[2], Ops[1], Ops[0]});
6750 }
6751 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6752 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
6753 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
6754
6755 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
6756 Ops[2] = EmitNeonSplat(V: Ops[2], C: cast<ConstantInt>(Val: Ops[3]));
6757 return emitCallMaybeConstrainedFPBuiltin(
6758 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty,
6759 Args: {Ops[2], Ops[1], Ops[0]});
6760 }
6761 case NEON::BI__builtin_neon_vfmah_lane_f16:
6762 case NEON::BI__builtin_neon_vfmas_lane_f32:
6763 case NEON::BI__builtin_neon_vfmah_laneq_f16:
6764 case NEON::BI__builtin_neon_vfmas_laneq_f32:
6765 case NEON::BI__builtin_neon_vfmad_lane_f64:
6766 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6767 Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 3)));
6768 llvm::Type *Ty = ConvertType(T: E->getCallReturnType(Ctx: getContext()));
6769 Ops[2] = Builder.CreateExtractElement(Vec: Ops[2], Idx: Ops[3], Name: "extract");
6770 return emitCallMaybeConstrainedFPBuiltin(
6771 CGF&: *this, IntrinsicID: Intrinsic::fma, ConstrainedIntrinsicID: Intrinsic::experimental_constrained_fma, Ty,
6772 Args: {Ops[1], Ops[2], Ops[0]});
6773 }
6774 case NEON::BI__builtin_neon_vmull_v:
6775 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6776 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6777 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6778 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmull");
6779 case NEON::BI__builtin_neon_vmax_v:
6780 case NEON::BI__builtin_neon_vmaxq_v:
6781 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6782 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6783 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6784 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmax");
6785 case NEON::BI__builtin_neon_vmaxh_f16: {
6786 Int = Intrinsic::aarch64_neon_fmax;
6787 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmax");
6788 }
6789 case NEON::BI__builtin_neon_vmin_v:
6790 case NEON::BI__builtin_neon_vminq_v:
6791 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6792 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6793 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6794 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmin");
6795 case NEON::BI__builtin_neon_vminh_f16: {
6796 Int = Intrinsic::aarch64_neon_fmin;
6797 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmin");
6798 }
6799 case NEON::BI__builtin_neon_vabd_v:
6800 case NEON::BI__builtin_neon_vabdq_v:
6801 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6802 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6803 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6804 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vabd");
6805 case NEON::BI__builtin_neon_vpadal_v:
6806 case NEON::BI__builtin_neon_vpadalq_v: {
6807 unsigned ArgElts = VTy->getNumElements();
6808 llvm::IntegerType *EltTy = cast<IntegerType>(Val: VTy->getElementType());
6809 unsigned BitWidth = EltTy->getBitWidth();
6810 auto *ArgTy = llvm::FixedVectorType::get(
6811 ElementType: llvm::IntegerType::get(C&: getLLVMContext(), NumBits: BitWidth / 2), NumElts: 2 * ArgElts);
6812 llvm::Type* Tys[2] = { VTy, ArgTy };
6813 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6814 SmallVector<llvm::Value*, 1> TmpOps;
6815 TmpOps.push_back(Elt: Ops[1]);
6816 Function *F = CGM.getIntrinsic(IID: Int, Tys);
6817 llvm::Value *tmp = EmitNeonCall(F, Ops&: TmpOps, name: "vpadal");
6818 llvm::Value *addend = Builder.CreateBitCast(V: Ops[0], DestTy: tmp->getType());
6819 return Builder.CreateAdd(LHS: tmp, RHS: addend);
6820 }
6821 case NEON::BI__builtin_neon_vpmin_v:
6822 case NEON::BI__builtin_neon_vpminq_v:
6823 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6824 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6825 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6826 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpmin");
6827 case NEON::BI__builtin_neon_vpmax_v:
6828 case NEON::BI__builtin_neon_vpmaxq_v:
6829 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6830 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6831 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6832 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpmax");
6833 case NEON::BI__builtin_neon_vminnm_v:
6834 case NEON::BI__builtin_neon_vminnmq_v:
6835 Int = Intrinsic::aarch64_neon_fminnm;
6836 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vminnm");
6837 case NEON::BI__builtin_neon_vminnmh_f16:
6838 Int = Intrinsic::aarch64_neon_fminnm;
6839 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vminnm");
6840 case NEON::BI__builtin_neon_vmaxnm_v:
6841 case NEON::BI__builtin_neon_vmaxnmq_v:
6842 Int = Intrinsic::aarch64_neon_fmaxnm;
6843 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmaxnm");
6844 case NEON::BI__builtin_neon_vmaxnmh_f16:
6845 Int = Intrinsic::aarch64_neon_fmaxnm;
6846 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmaxnm");
6847 case NEON::BI__builtin_neon_vrecpss_f32: {
6848 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_frecps, Tys: FloatTy),
6849 Ops, name: "vrecps");
6850 }
6851 case NEON::BI__builtin_neon_vrecpsd_f64:
6852 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_frecps, Tys: DoubleTy),
6853 Ops, name: "vrecps");
6854 case NEON::BI__builtin_neon_vrecpsh_f16:
6855 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_frecps, Tys: HalfTy),
6856 Ops, name: "vrecps");
6857 case NEON::BI__builtin_neon_vqshrun_n_v:
6858 Int = Intrinsic::aarch64_neon_sqshrun;
6859 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshrun_n");
6860 case NEON::BI__builtin_neon_vqrshrun_n_v:
6861 Int = Intrinsic::aarch64_neon_sqrshrun;
6862 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqrshrun_n");
6863 case NEON::BI__builtin_neon_vqshrn_n_v:
6864 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6865 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqshrn_n");
6866 case NEON::BI__builtin_neon_vrshrn_n_v:
6867 Int = Intrinsic::aarch64_neon_rshrn;
6868 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrshrn_n");
6869 case NEON::BI__builtin_neon_vqrshrn_n_v:
6870 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6871 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vqrshrn_n");
6872 case NEON::BI__builtin_neon_vrndah_f16: {
6873 Int = Builder.getIsFPConstrained()
6874 ? Intrinsic::experimental_constrained_round
6875 : Intrinsic::round;
6876 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrnda");
6877 }
6878 case NEON::BI__builtin_neon_vrnda_v:
6879 case NEON::BI__builtin_neon_vrndaq_v: {
6880 Int = Builder.getIsFPConstrained()
6881 ? Intrinsic::experimental_constrained_round
6882 : Intrinsic::round;
6883 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnda");
6884 }
6885 case NEON::BI__builtin_neon_vrndih_f16: {
6886 Int = Builder.getIsFPConstrained()
6887 ? Intrinsic::experimental_constrained_nearbyint
6888 : Intrinsic::nearbyint;
6889 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndi");
6890 }
6891 case NEON::BI__builtin_neon_vrndmh_f16: {
6892 Int = Builder.getIsFPConstrained()
6893 ? Intrinsic::experimental_constrained_floor
6894 : Intrinsic::floor;
6895 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndm");
6896 }
6897 case NEON::BI__builtin_neon_vrndm_v:
6898 case NEON::BI__builtin_neon_vrndmq_v: {
6899 Int = Builder.getIsFPConstrained()
6900 ? Intrinsic::experimental_constrained_floor
6901 : Intrinsic::floor;
6902 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndm");
6903 }
6904 case NEON::BI__builtin_neon_vrndnh_f16: {
6905 Int = Builder.getIsFPConstrained()
6906 ? Intrinsic::experimental_constrained_roundeven
6907 : Intrinsic::roundeven;
6908 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndn");
6909 }
6910 case NEON::BI__builtin_neon_vrndn_v:
6911 case NEON::BI__builtin_neon_vrndnq_v: {
6912 Int = Builder.getIsFPConstrained()
6913 ? Intrinsic::experimental_constrained_roundeven
6914 : Intrinsic::roundeven;
6915 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndn");
6916 }
6917 case NEON::BI__builtin_neon_vrndns_f32: {
6918 Int = Builder.getIsFPConstrained()
6919 ? Intrinsic::experimental_constrained_roundeven
6920 : Intrinsic::roundeven;
6921 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: FloatTy), Ops, name: "vrndn");
6922 }
6923 case NEON::BI__builtin_neon_vrndph_f16: {
6924 Int = Builder.getIsFPConstrained()
6925 ? Intrinsic::experimental_constrained_ceil
6926 : Intrinsic::ceil;
6927 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndp");
6928 }
6929 case NEON::BI__builtin_neon_vrndp_v:
6930 case NEON::BI__builtin_neon_vrndpq_v: {
6931 Int = Builder.getIsFPConstrained()
6932 ? Intrinsic::experimental_constrained_ceil
6933 : Intrinsic::ceil;
6934 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndp");
6935 }
6936 case NEON::BI__builtin_neon_vrndxh_f16: {
6937 Int = Builder.getIsFPConstrained()
6938 ? Intrinsic::experimental_constrained_rint
6939 : Intrinsic::rint;
6940 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndx");
6941 }
6942 case NEON::BI__builtin_neon_vrndx_v:
6943 case NEON::BI__builtin_neon_vrndxq_v: {
6944 Int = Builder.getIsFPConstrained()
6945 ? Intrinsic::experimental_constrained_rint
6946 : Intrinsic::rint;
6947 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndx");
6948 }
6949 case NEON::BI__builtin_neon_vrndh_f16: {
6950 Int = Builder.getIsFPConstrained()
6951 ? Intrinsic::experimental_constrained_trunc
6952 : Intrinsic::trunc;
6953 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vrndz");
6954 }
6955 case NEON::BI__builtin_neon_vrnd32x_f32:
6956 case NEON::BI__builtin_neon_vrnd32xq_f32:
6957 case NEON::BI__builtin_neon_vrnd32x_f64:
6958 case NEON::BI__builtin_neon_vrnd32xq_f64: {
6959 Int = Intrinsic::aarch64_neon_frint32x;
6960 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnd32x");
6961 }
6962 case NEON::BI__builtin_neon_vrnd32z_f32:
6963 case NEON::BI__builtin_neon_vrnd32zq_f32:
6964 case NEON::BI__builtin_neon_vrnd32z_f64:
6965 case NEON::BI__builtin_neon_vrnd32zq_f64: {
6966 Int = Intrinsic::aarch64_neon_frint32z;
6967 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnd32z");
6968 }
6969 case NEON::BI__builtin_neon_vrnd64x_f32:
6970 case NEON::BI__builtin_neon_vrnd64xq_f32:
6971 case NEON::BI__builtin_neon_vrnd64x_f64:
6972 case NEON::BI__builtin_neon_vrnd64xq_f64: {
6973 Int = Intrinsic::aarch64_neon_frint64x;
6974 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnd64x");
6975 }
6976 case NEON::BI__builtin_neon_vrnd64z_f32:
6977 case NEON::BI__builtin_neon_vrnd64zq_f32:
6978 case NEON::BI__builtin_neon_vrnd64z_f64:
6979 case NEON::BI__builtin_neon_vrnd64zq_f64: {
6980 Int = Intrinsic::aarch64_neon_frint64z;
6981 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrnd64z");
6982 }
6983 case NEON::BI__builtin_neon_vrnd_v:
6984 case NEON::BI__builtin_neon_vrndq_v: {
6985 Int = Builder.getIsFPConstrained()
6986 ? Intrinsic::experimental_constrained_trunc
6987 : Intrinsic::trunc;
6988 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrndz");
6989 }
6990 case NEON::BI__builtin_neon_vcvt_f64_v:
6991 case NEON::BI__builtin_neon_vcvtq_f64_v:
6992 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
6993 Ty = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6994 return usgn ? Builder.CreateUIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt")
6995 : Builder.CreateSIToFP(V: Ops[0], DestTy: Ty, Name: "vcvt");
6996 case NEON::BI__builtin_neon_vcvt_f64_f32: {
6997 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6998 "unexpected vcvt_f64_f32 builtin");
6999 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
7000 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: GetNeonType(CGF: this, TypeFlags: SrcFlag));
7001
7002 return Builder.CreateFPExt(V: Ops[0], DestTy: Ty, Name: "vcvt");
7003 }
7004 case NEON::BI__builtin_neon_vcvt_f32_f64: {
7005 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
7006 "unexpected vcvt_f32_f64 builtin");
7007 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
7008 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: GetNeonType(CGF: this, TypeFlags: SrcFlag));
7009
7010 return Builder.CreateFPTrunc(V: Ops[0], DestTy: Ty, Name: "vcvt");
7011 }
7012 case NEON::BI__builtin_neon_vcvt_s32_v:
7013 case NEON::BI__builtin_neon_vcvt_u32_v:
7014 case NEON::BI__builtin_neon_vcvt_s64_v:
7015 case NEON::BI__builtin_neon_vcvt_u64_v:
7016 case NEON::BI__builtin_neon_vcvt_s16_f16:
7017 case NEON::BI__builtin_neon_vcvt_u16_f16:
7018 case NEON::BI__builtin_neon_vcvtq_s32_v:
7019 case NEON::BI__builtin_neon_vcvtq_u32_v:
7020 case NEON::BI__builtin_neon_vcvtq_s64_v:
7021 case NEON::BI__builtin_neon_vcvtq_u64_v:
7022 case NEON::BI__builtin_neon_vcvtq_s16_f16:
7023 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
7024 Int =
7025 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
7026 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type)};
7027 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtz");
7028 }
7029 case NEON::BI__builtin_neon_vcvta_s16_f16:
7030 case NEON::BI__builtin_neon_vcvta_u16_f16:
7031 case NEON::BI__builtin_neon_vcvta_s32_v:
7032 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
7033 case NEON::BI__builtin_neon_vcvtaq_s32_v:
7034 case NEON::BI__builtin_neon_vcvta_u32_v:
7035 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
7036 case NEON::BI__builtin_neon_vcvtaq_u32_v:
7037 case NEON::BI__builtin_neon_vcvta_s64_v:
7038 case NEON::BI__builtin_neon_vcvtaq_s64_v:
7039 case NEON::BI__builtin_neon_vcvta_u64_v:
7040 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
7041 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
7042 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
7043 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvta");
7044 }
7045 case NEON::BI__builtin_neon_vcvtm_s16_f16:
7046 case NEON::BI__builtin_neon_vcvtm_s32_v:
7047 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
7048 case NEON::BI__builtin_neon_vcvtmq_s32_v:
7049 case NEON::BI__builtin_neon_vcvtm_u16_f16:
7050 case NEON::BI__builtin_neon_vcvtm_u32_v:
7051 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
7052 case NEON::BI__builtin_neon_vcvtmq_u32_v:
7053 case NEON::BI__builtin_neon_vcvtm_s64_v:
7054 case NEON::BI__builtin_neon_vcvtmq_s64_v:
7055 case NEON::BI__builtin_neon_vcvtm_u64_v:
7056 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
7057 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
7058 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
7059 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtm");
7060 }
7061 case NEON::BI__builtin_neon_vcvtn_s16_f16:
7062 case NEON::BI__builtin_neon_vcvtn_s32_v:
7063 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
7064 case NEON::BI__builtin_neon_vcvtnq_s32_v:
7065 case NEON::BI__builtin_neon_vcvtn_u16_f16:
7066 case NEON::BI__builtin_neon_vcvtn_u32_v:
7067 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
7068 case NEON::BI__builtin_neon_vcvtnq_u32_v:
7069 case NEON::BI__builtin_neon_vcvtn_s64_v:
7070 case NEON::BI__builtin_neon_vcvtnq_s64_v:
7071 case NEON::BI__builtin_neon_vcvtn_u64_v:
7072 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
7073 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
7074 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
7075 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtn");
7076 }
7077 case NEON::BI__builtin_neon_vcvtp_s16_f16:
7078 case NEON::BI__builtin_neon_vcvtp_s32_v:
7079 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
7080 case NEON::BI__builtin_neon_vcvtpq_s32_v:
7081 case NEON::BI__builtin_neon_vcvtp_u16_f16:
7082 case NEON::BI__builtin_neon_vcvtp_u32_v:
7083 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
7084 case NEON::BI__builtin_neon_vcvtpq_u32_v:
7085 case NEON::BI__builtin_neon_vcvtp_s64_v:
7086 case NEON::BI__builtin_neon_vcvtpq_s64_v:
7087 case NEON::BI__builtin_neon_vcvtp_u64_v:
7088 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
7089 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
7090 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(CGF: this, IntTypeFlags: Type) };
7091 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vcvtp");
7092 }
7093 case NEON::BI__builtin_neon_vmulx_v:
7094 case NEON::BI__builtin_neon_vmulxq_v: {
7095 Int = Intrinsic::aarch64_neon_fmulx;
7096 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vmulx");
7097 }
7098 case NEON::BI__builtin_neon_vmulxh_lane_f16:
7099 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
7100 // vmulx_lane should be mapped to Neon scalar mulx after
7101 // extracting the scalar element
7102 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2], Name: "extract");
7103 Ops.pop_back();
7104 Int = Intrinsic::aarch64_neon_fmulx;
7105 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vmulx");
7106 }
7107 case NEON::BI__builtin_neon_vmul_lane_v:
7108 case NEON::BI__builtin_neon_vmul_laneq_v: {
7109 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
7110 bool Quad = false;
7111 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
7112 Quad = true;
7113 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
7114 llvm::FixedVectorType *VTy =
7115 GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
7116 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: VTy);
7117 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2], Name: "extract");
7118 Value *Result = Builder.CreateFMul(L: Ops[0], R: Ops[1]);
7119 return Builder.CreateBitCast(V: Result, DestTy: Ty);
7120 }
7121 case NEON::BI__builtin_neon_vpmaxnm_v:
7122 case NEON::BI__builtin_neon_vpmaxnmq_v: {
7123 Int = Intrinsic::aarch64_neon_fmaxnmp;
7124 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpmaxnm");
7125 }
7126 case NEON::BI__builtin_neon_vpminnm_v:
7127 case NEON::BI__builtin_neon_vpminnmq_v: {
7128 Int = Intrinsic::aarch64_neon_fminnmp;
7129 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vpminnm");
7130 }
7131 case NEON::BI__builtin_neon_vsqrth_f16: {
7132 Int = Builder.getIsFPConstrained()
7133 ? Intrinsic::experimental_constrained_sqrt
7134 : Intrinsic::sqrt;
7135 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: HalfTy), Ops, name: "vsqrt");
7136 }
7137 case NEON::BI__builtin_neon_vsqrt_v:
7138 case NEON::BI__builtin_neon_vsqrtq_v: {
7139 Int = Builder.getIsFPConstrained()
7140 ? Intrinsic::experimental_constrained_sqrt
7141 : Intrinsic::sqrt;
7142 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
7143 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vsqrt");
7144 }
7145 case NEON::BI__builtin_neon_vrbit_v:
7146 case NEON::BI__builtin_neon_vrbitq_v: {
7147 Int = Intrinsic::bitreverse;
7148 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vrbit");
7149 }
7150 case NEON::BI__builtin_neon_vmaxv_f16: {
7151 Int = Intrinsic::aarch64_neon_fmaxv;
7152 Ty = HalfTy;
7153 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
7154 llvm::Type *Tys[2] = {Ty, VTy};
7155 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7156 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7157 }
7158 case NEON::BI__builtin_neon_vmaxvq_f16: {
7159 Int = Intrinsic::aarch64_neon_fmaxv;
7160 Ty = HalfTy;
7161 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
7162 llvm::Type *Tys[2] = {Ty, VTy};
7163 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxv");
7164 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7165 }
7166 case NEON::BI__builtin_neon_vminv_f16: {
7167 Int = Intrinsic::aarch64_neon_fminv;
7168 Ty = HalfTy;
7169 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
7170 llvm::Type *Tys[2] = {Ty, VTy};
7171 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7172 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7173 }
7174 case NEON::BI__builtin_neon_vminvq_f16: {
7175 Int = Intrinsic::aarch64_neon_fminv;
7176 Ty = HalfTy;
7177 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
7178 llvm::Type *Tys[2] = {Ty, VTy};
7179 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminv");
7180 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7181 }
7182 case NEON::BI__builtin_neon_vmaxnmv_f16: {
7183 Int = Intrinsic::aarch64_neon_fmaxnmv;
7184 Ty = HalfTy;
7185 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
7186 llvm::Type *Tys[2] = {Ty, VTy};
7187 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxnmv");
7188 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7189 }
7190 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
7191 Int = Intrinsic::aarch64_neon_fmaxnmv;
7192 Ty = HalfTy;
7193 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
7194 llvm::Type *Tys[2] = {Ty, VTy};
7195 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vmaxnmv");
7196 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7197 }
7198 case NEON::BI__builtin_neon_vminnmv_f16: {
7199 Int = Intrinsic::aarch64_neon_fminnmv;
7200 Ty = HalfTy;
7201 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4);
7202 llvm::Type *Tys[2] = {Ty, VTy};
7203 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminnmv");
7204 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7205 }
7206 case NEON::BI__builtin_neon_vminnmvq_f16: {
7207 Int = Intrinsic::aarch64_neon_fminnmv;
7208 Ty = HalfTy;
7209 VTy = llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8);
7210 llvm::Type *Tys[2] = {Ty, VTy};
7211 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vminnmv");
7212 return Builder.CreateTrunc(V: Ops[0], DestTy: HalfTy);
7213 }
7214 case NEON::BI__builtin_neon_vmul_n_f64: {
7215 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: DoubleTy);
7216 Value *RHS = Builder.CreateBitCast(V: Ops[1], DestTy: DoubleTy);
7217 return Builder.CreateFMul(L: Ops[0], R: RHS);
7218 }
7219 case NEON::BI__builtin_neon_vaddlv_u8: {
7220 Int = Intrinsic::aarch64_neon_uaddlv;
7221 Ty = Int32Ty;
7222 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
7223 llvm::Type *Tys[2] = {Ty, VTy};
7224 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7225 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7226 }
7227 case NEON::BI__builtin_neon_vaddlv_u16: {
7228 Int = Intrinsic::aarch64_neon_uaddlv;
7229 Ty = Int32Ty;
7230 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
7231 llvm::Type *Tys[2] = {Ty, VTy};
7232 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7233 }
7234 case NEON::BI__builtin_neon_vaddlvq_u8: {
7235 Int = Intrinsic::aarch64_neon_uaddlv;
7236 Ty = Int32Ty;
7237 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7238 llvm::Type *Tys[2] = {Ty, VTy};
7239 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7240 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7241 }
7242 case NEON::BI__builtin_neon_vaddlvq_u16: {
7243 Int = Intrinsic::aarch64_neon_uaddlv;
7244 Ty = Int32Ty;
7245 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
7246 llvm::Type *Tys[2] = {Ty, VTy};
7247 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7248 }
7249 case NEON::BI__builtin_neon_vaddlv_s8: {
7250 Int = Intrinsic::aarch64_neon_saddlv;
7251 Ty = Int32Ty;
7252 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8);
7253 llvm::Type *Tys[2] = {Ty, VTy};
7254 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7255 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7256 }
7257 case NEON::BI__builtin_neon_vaddlv_s16: {
7258 Int = Intrinsic::aarch64_neon_saddlv;
7259 Ty = Int32Ty;
7260 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 4);
7261 llvm::Type *Tys[2] = {Ty, VTy};
7262 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7263 }
7264 case NEON::BI__builtin_neon_vaddlvq_s8: {
7265 Int = Intrinsic::aarch64_neon_saddlv;
7266 Ty = Int32Ty;
7267 VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7268 llvm::Type *Tys[2] = {Ty, VTy};
7269 Ops[0] = EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7270 return Builder.CreateTrunc(V: Ops[0], DestTy: Int16Ty);
7271 }
7272 case NEON::BI__builtin_neon_vaddlvq_s16: {
7273 Int = Intrinsic::aarch64_neon_saddlv;
7274 Ty = Int32Ty;
7275 VTy = llvm::FixedVectorType::get(ElementType: Int16Ty, NumElts: 8);
7276 llvm::Type *Tys[2] = {Ty, VTy};
7277 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vaddlv");
7278 }
7279 case NEON::BI__builtin_neon_vsri_n_v:
7280 case NEON::BI__builtin_neon_vsriq_n_v: {
7281 Int = Intrinsic::aarch64_neon_vsri;
7282 llvm::Function *Intrin = CGM.getIntrinsic(IID: Int, Tys: Ty);
7283 return EmitNeonCall(F: Intrin, Ops, name: "vsri_n");
7284 }
7285 case NEON::BI__builtin_neon_vsli_n_v:
7286 case NEON::BI__builtin_neon_vsliq_n_v: {
7287 Int = Intrinsic::aarch64_neon_vsli;
7288 llvm::Function *Intrin = CGM.getIntrinsic(IID: Int, Tys: Ty);
7289 return EmitNeonCall(F: Intrin, Ops, name: "vsli_n");
7290 }
7291 case NEON::BI__builtin_neon_vsra_n_v:
7292 case NEON::BI__builtin_neon_vsraq_n_v:
7293 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: Ty);
7294 Ops[1] = EmitNeonRShiftImm(Vec: Ops[1], Shift: Ops[2], Ty, usgn, name: "vsra_n");
7295 return Builder.CreateAdd(LHS: Ops[0], RHS: Ops[1]);
7296 case NEON::BI__builtin_neon_vrsra_n_v:
7297 case NEON::BI__builtin_neon_vrsraq_n_v: {
7298 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
7299 SmallVector<llvm::Value*,2> TmpOps;
7300 TmpOps.push_back(Elt: Ops[1]);
7301 TmpOps.push_back(Elt: Ops[2]);
7302 Function* F = CGM.getIntrinsic(IID: Int, Tys: Ty);
7303 llvm::Value *tmp = EmitNeonCall(F, Ops&: TmpOps, name: "vrshr_n", shift: 1, rightshift: true);
7304 Ops[0] = Builder.CreateBitCast(V: Ops[0], DestTy: VTy);
7305 return Builder.CreateAdd(LHS: Ops[0], RHS: tmp);
7306 }
7307 case NEON::BI__builtin_neon_vld1_v:
7308 case NEON::BI__builtin_neon_vld1q_v: {
7309 return Builder.CreateAlignedLoad(Ty: VTy, Addr: Ops[0], Align: PtrOp0.getAlignment());
7310 }
7311 case NEON::BI__builtin_neon_vst1_v:
7312 case NEON::BI__builtin_neon_vst1q_v:
7313 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: VTy);
7314 return Builder.CreateAlignedStore(Val: Ops[1], Addr: Ops[0], Align: PtrOp0.getAlignment());
7315 case NEON::BI__builtin_neon_vld1_lane_v:
7316 case NEON::BI__builtin_neon_vld1q_lane_v: {
7317 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7318 Ops[0] = Builder.CreateAlignedLoad(Ty: VTy->getElementType(), Addr: Ops[0],
7319 Align: PtrOp0.getAlignment());
7320 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vld1_lane");
7321 }
7322 case NEON::BI__builtin_neon_vldap1_lane_s64:
7323 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
7324 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7325 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
7326 Ty: VTy->getElementType(), Addr: Ops[0], Align: PtrOp0.getAlignment());
7327 LI->setAtomic(Ordering: llvm::AtomicOrdering::Acquire);
7328 Ops[0] = LI;
7329 return Builder.CreateInsertElement(Vec: Ops[1], NewElt: Ops[0], Idx: Ops[2], Name: "vldap1_lane");
7330 }
7331 case NEON::BI__builtin_neon_vld1_dup_v:
7332 case NEON::BI__builtin_neon_vld1q_dup_v: {
7333 Value *V = PoisonValue::get(T: Ty);
7334 Ops[0] = Builder.CreateAlignedLoad(Ty: VTy->getElementType(), Addr: Ops[0],
7335 Align: PtrOp0.getAlignment());
7336 llvm::Constant *CI = ConstantInt::get(Ty: Int32Ty, V: 0);
7337 Ops[0] = Builder.CreateInsertElement(Vec: V, NewElt: Ops[0], Idx: CI);
7338 return EmitNeonSplat(V: Ops[0], C: CI);
7339 }
7340 case NEON::BI__builtin_neon_vst1_lane_v:
7341 case NEON::BI__builtin_neon_vst1q_lane_v:
7342 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7343 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2]);
7344 return Builder.CreateAlignedStore(Val: Ops[1], Addr: Ops[0], Align: PtrOp0.getAlignment());
7345 case NEON::BI__builtin_neon_vstl1_lane_s64:
7346 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
7347 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7348 Ops[1] = Builder.CreateExtractElement(Vec: Ops[1], Idx: Ops[2]);
7349 llvm::StoreInst *SI =
7350 Builder.CreateAlignedStore(Val: Ops[1], Addr: Ops[0], Align: PtrOp0.getAlignment());
7351 SI->setAtomic(Ordering: llvm::AtomicOrdering::Release);
7352 return SI;
7353 }
7354 case NEON::BI__builtin_neon_vld2_v:
7355 case NEON::BI__builtin_neon_vld2q_v: {
7356 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7357 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld2, Tys);
7358 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld2");
7359 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7360 }
7361 case NEON::BI__builtin_neon_vld3_v:
7362 case NEON::BI__builtin_neon_vld3q_v: {
7363 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7364 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld3, Tys);
7365 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld3");
7366 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7367 }
7368 case NEON::BI__builtin_neon_vld4_v:
7369 case NEON::BI__builtin_neon_vld4q_v: {
7370 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7371 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld4, Tys);
7372 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld4");
7373 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7374 }
7375 case NEON::BI__builtin_neon_vld2_dup_v:
7376 case NEON::BI__builtin_neon_vld2q_dup_v: {
7377 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7378 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld2r, Tys);
7379 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld2");
7380 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7381 }
7382 case NEON::BI__builtin_neon_vld3_dup_v:
7383 case NEON::BI__builtin_neon_vld3q_dup_v: {
7384 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7385 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld3r, Tys);
7386 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld3");
7387 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7388 }
7389 case NEON::BI__builtin_neon_vld4_dup_v:
7390 case NEON::BI__builtin_neon_vld4q_dup_v: {
7391 llvm::Type *Tys[2] = {VTy, DefaultPtrTy};
7392 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld4r, Tys);
7393 Ops[1] = Builder.CreateCall(Callee: F, Args: Ops[1], Name: "vld4");
7394 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7395 }
7396 case NEON::BI__builtin_neon_vld2_lane_v:
7397 case NEON::BI__builtin_neon_vld2q_lane_v: {
7398 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7399 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld2lane, Tys);
7400 std::rotate(first: Ops.begin() + 1, middle: Ops.begin() + 2, last: Ops.end());
7401 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7402 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7403 Ops[3] = Builder.CreateZExt(V: Ops[3], DestTy: Int64Ty);
7404 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: "vld2_lane");
7405 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7406 }
7407 case NEON::BI__builtin_neon_vld3_lane_v:
7408 case NEON::BI__builtin_neon_vld3q_lane_v: {
7409 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7410 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld3lane, Tys);
7411 std::rotate(first: Ops.begin() + 1, middle: Ops.begin() + 2, last: Ops.end());
7412 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7413 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7414 Ops[3] = Builder.CreateBitCast(V: Ops[3], DestTy: Ty);
7415 Ops[4] = Builder.CreateZExt(V: Ops[4], DestTy: Int64Ty);
7416 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: "vld3_lane");
7417 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7418 }
7419 case NEON::BI__builtin_neon_vld4_lane_v:
7420 case NEON::BI__builtin_neon_vld4q_lane_v: {
7421 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7422 Function *F = CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_ld4lane, Tys);
7423 std::rotate(first: Ops.begin() + 1, middle: Ops.begin() + 2, last: Ops.end());
7424 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7425 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7426 Ops[3] = Builder.CreateBitCast(V: Ops[3], DestTy: Ty);
7427 Ops[4] = Builder.CreateBitCast(V: Ops[4], DestTy: Ty);
7428 Ops[5] = Builder.CreateZExt(V: Ops[5], DestTy: Int64Ty);
7429 Ops[1] = Builder.CreateCall(Callee: F, Args: ArrayRef(Ops).slice(N: 1), Name: "vld4_lane");
7430 return Builder.CreateDefaultAlignedStore(Val: Ops[1], Addr: Ops[0]);
7431 }
7432 case NEON::BI__builtin_neon_vst2_v:
7433 case NEON::BI__builtin_neon_vst2q_v: {
7434 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7435 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
7436 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st2, Tys),
7437 Ops, name: "");
7438 }
7439 case NEON::BI__builtin_neon_vst2_lane_v:
7440 case NEON::BI__builtin_neon_vst2q_lane_v: {
7441 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7442 Ops[2] = Builder.CreateZExt(V: Ops[2], DestTy: Int64Ty);
7443 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7444 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st2lane, Tys),
7445 Ops, name: "");
7446 }
7447 case NEON::BI__builtin_neon_vst3_v:
7448 case NEON::BI__builtin_neon_vst3q_v: {
7449 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7450 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7451 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st3, Tys),
7452 Ops, name: "");
7453 }
7454 case NEON::BI__builtin_neon_vst3_lane_v:
7455 case NEON::BI__builtin_neon_vst3q_lane_v: {
7456 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7457 Ops[3] = Builder.CreateZExt(V: Ops[3], DestTy: Int64Ty);
7458 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7459 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st3lane, Tys),
7460 Ops, name: "");
7461 }
7462 case NEON::BI__builtin_neon_vst4_v:
7463 case NEON::BI__builtin_neon_vst4q_v: {
7464 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7465 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7466 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st4, Tys),
7467 Ops, name: "");
7468 }
7469 case NEON::BI__builtin_neon_vst4_lane_v:
7470 case NEON::BI__builtin_neon_vst4q_lane_v: {
7471 std::rotate(first: Ops.begin(), middle: Ops.begin() + 1, last: Ops.end());
7472 Ops[4] = Builder.CreateZExt(V: Ops[4], DestTy: Int64Ty);
7473 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7474 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_st4lane, Tys),
7475 Ops, name: "");
7476 }
7477 case NEON::BI__builtin_neon_vtrn_v:
7478 case NEON::BI__builtin_neon_vtrnq_v: {
7479 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7480 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7481 Value *SV = nullptr;
7482
7483 for (unsigned vi = 0; vi != 2; ++vi) {
7484 SmallVector<int, 16> Indices;
7485 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7486 Indices.push_back(Elt: i+vi);
7487 Indices.push_back(Elt: i+e+vi);
7488 }
7489 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
7490 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vtrn");
7491 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
7492 }
7493 return SV;
7494 }
7495 case NEON::BI__builtin_neon_vuzp_v:
7496 case NEON::BI__builtin_neon_vuzpq_v: {
7497 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7498 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7499 Value *SV = nullptr;
7500
7501 for (unsigned vi = 0; vi != 2; ++vi) {
7502 SmallVector<int, 16> Indices;
7503 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7504 Indices.push_back(Elt: 2*i+vi);
7505
7506 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
7507 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vuzp");
7508 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
7509 }
7510 return SV;
7511 }
7512 case NEON::BI__builtin_neon_vzip_v:
7513 case NEON::BI__builtin_neon_vzipq_v: {
7514 Ops[1] = Builder.CreateBitCast(V: Ops[1], DestTy: Ty);
7515 Ops[2] = Builder.CreateBitCast(V: Ops[2], DestTy: Ty);
7516 Value *SV = nullptr;
7517
7518 for (unsigned vi = 0; vi != 2; ++vi) {
7519 SmallVector<int, 16> Indices;
7520 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7521 Indices.push_back(Elt: (i + vi*e) >> 1);
7522 Indices.push_back(Elt: ((i + vi*e) >> 1)+e);
7523 }
7524 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ptr: Ops[0], Idx0: vi);
7525 SV = Builder.CreateShuffleVector(V1: Ops[1], V2: Ops[2], Mask: Indices, Name: "vzip");
7526 SV = Builder.CreateDefaultAlignedStore(Val: SV, Addr);
7527 }
7528 return SV;
7529 }
7530 case NEON::BI__builtin_neon_vqtbl1q_v: {
7531 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbl1, Tys: Ty),
7532 Ops, name: "vtbl1");
7533 }
7534 case NEON::BI__builtin_neon_vqtbl2q_v: {
7535 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbl2, Tys: Ty),
7536 Ops, name: "vtbl2");
7537 }
7538 case NEON::BI__builtin_neon_vqtbl3q_v: {
7539 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbl3, Tys: Ty),
7540 Ops, name: "vtbl3");
7541 }
7542 case NEON::BI__builtin_neon_vqtbl4q_v: {
7543 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbl4, Tys: Ty),
7544 Ops, name: "vtbl4");
7545 }
7546 case NEON::BI__builtin_neon_vqtbx1q_v: {
7547 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbx1, Tys: Ty),
7548 Ops, name: "vtbx1");
7549 }
7550 case NEON::BI__builtin_neon_vqtbx2q_v: {
7551 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbx2, Tys: Ty),
7552 Ops, name: "vtbx2");
7553 }
7554 case NEON::BI__builtin_neon_vqtbx3q_v: {
7555 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbx3, Tys: Ty),
7556 Ops, name: "vtbx3");
7557 }
7558 case NEON::BI__builtin_neon_vqtbx4q_v: {
7559 return EmitNeonCall(F: CGM.getIntrinsic(IID: Intrinsic::aarch64_neon_tbx4, Tys: Ty),
7560 Ops, name: "vtbx4");
7561 }
7562 case NEON::BI__builtin_neon_vsqadd_v:
7563 case NEON::BI__builtin_neon_vsqaddq_v: {
7564 Int = Intrinsic::aarch64_neon_usqadd;
7565 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vsqadd");
7566 }
7567 case NEON::BI__builtin_neon_vuqadd_v:
7568 case NEON::BI__builtin_neon_vuqaddq_v: {
7569 Int = Intrinsic::aarch64_neon_suqadd;
7570 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vuqadd");
7571 }
7572
7573 case NEON::BI__builtin_neon_vluti2_laneq_mf8:
7574 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
7575 case NEON::BI__builtin_neon_vluti2_laneq_f16:
7576 case NEON::BI__builtin_neon_vluti2_laneq_p16:
7577 case NEON::BI__builtin_neon_vluti2_laneq_p8:
7578 case NEON::BI__builtin_neon_vluti2_laneq_s16:
7579 case NEON::BI__builtin_neon_vluti2_laneq_s8:
7580 case NEON::BI__builtin_neon_vluti2_laneq_u16:
7581 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
7582 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7583 llvm::Type *Tys[2];
7584 Tys[0] = Ty;
7585 Tys[1] = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
7586 /*isQuad*/ false));
7587 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vluti2_laneq");
7588 }
7589 case NEON::BI__builtin_neon_vluti2q_laneq_mf8:
7590 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
7591 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
7592 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
7593 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
7594 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
7595 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
7596 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
7597 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
7598 Int = Intrinsic::aarch64_neon_vluti2_laneq;
7599 llvm::Type *Tys[2];
7600 Tys[0] = Ty;
7601 Tys[1] = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
7602 /*isQuad*/ true));
7603 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vluti2_laneq");
7604 }
7605 case NEON::BI__builtin_neon_vluti2_lane_mf8:
7606 case NEON::BI__builtin_neon_vluti2_lane_bf16:
7607 case NEON::BI__builtin_neon_vluti2_lane_f16:
7608 case NEON::BI__builtin_neon_vluti2_lane_p16:
7609 case NEON::BI__builtin_neon_vluti2_lane_p8:
7610 case NEON::BI__builtin_neon_vluti2_lane_s16:
7611 case NEON::BI__builtin_neon_vluti2_lane_s8:
7612 case NEON::BI__builtin_neon_vluti2_lane_u16:
7613 case NEON::BI__builtin_neon_vluti2_lane_u8: {
7614 Int = Intrinsic::aarch64_neon_vluti2_lane;
7615 llvm::Type *Tys[2];
7616 Tys[0] = Ty;
7617 Tys[1] = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
7618 /*isQuad*/ false));
7619 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vluti2_lane");
7620 }
7621 case NEON::BI__builtin_neon_vluti2q_lane_mf8:
7622 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
7623 case NEON::BI__builtin_neon_vluti2q_lane_f16:
7624 case NEON::BI__builtin_neon_vluti2q_lane_p16:
7625 case NEON::BI__builtin_neon_vluti2q_lane_p8:
7626 case NEON::BI__builtin_neon_vluti2q_lane_s16:
7627 case NEON::BI__builtin_neon_vluti2q_lane_s8:
7628 case NEON::BI__builtin_neon_vluti2q_lane_u16:
7629 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
7630 Int = Intrinsic::aarch64_neon_vluti2_lane;
7631 llvm::Type *Tys[2];
7632 Tys[0] = Ty;
7633 Tys[1] = GetNeonType(CGF: this, TypeFlags: NeonTypeFlags(Type.getEltType(), false,
7634 /*isQuad*/ true));
7635 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys), Ops, name: "vluti2_lane");
7636 }
7637 case NEON::BI__builtin_neon_vluti4q_lane_mf8:
7638 case NEON::BI__builtin_neon_vluti4q_lane_p8:
7639 case NEON::BI__builtin_neon_vluti4q_lane_s8:
7640 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
7641 Int = Intrinsic::aarch64_neon_vluti4q_lane;
7642 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vluti4q_lane");
7643 }
7644 case NEON::BI__builtin_neon_vluti4q_laneq_mf8:
7645 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
7646 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
7647 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
7648 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
7649 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vluti4q_laneq");
7650 }
7651 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
7652 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
7653 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
7654 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
7655 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
7656 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
7657 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vluti4q_lane_x2");
7658 }
7659 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
7660 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
7661 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
7662 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
7663 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
7664 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
7665 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "vluti4q_laneq_x2");
7666 }
7667 case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm:
7668 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fmmla,
7669 Tys: {llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8),
7670 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16)},
7671 Ops, E, name: "fmmla");
7672 case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm:
7673 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fmmla,
7674 Tys: {llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4),
7675 llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16)},
7676 Ops, E, name: "fmmla");
7677 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
7678 ExtractLow = true;
7679 [[fallthrough]];
7680 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
7681 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
7682 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_cvtl1,
7683 Ty0: llvm::FixedVectorType::get(ElementType: BFloatTy, NumElts: 8),
7684 Ty1: Ops[0]->getType(), Extract: ExtractLow, Ops, E, name: "vbfcvt1");
7685 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
7686 ExtractLow = true;
7687 [[fallthrough]];
7688 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
7689 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
7690 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_cvtl2,
7691 Ty0: llvm::FixedVectorType::get(ElementType: BFloatTy, NumElts: 8),
7692 Ty1: Ops[0]->getType(), Extract: ExtractLow, Ops, E, name: "vbfcvt2");
7693 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
7694 ExtractLow = true;
7695 [[fallthrough]];
7696 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
7697 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
7698 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_cvtl1,
7699 Ty0: llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8),
7700 Ty1: Ops[0]->getType(), Extract: ExtractLow, Ops, E, name: "vbfcvt1");
7701 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
7702 ExtractLow = true;
7703 [[fallthrough]];
7704 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
7705 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
7706 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_cvtl2,
7707 Ty0: llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8),
7708 Ty1: Ops[0]->getType(), Extract: ExtractLow, Ops, E, name: "vbfcvt2");
7709 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
7710 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_fcvtn,
7711 Ty0: llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8),
7712 Ty1: Ops[0]->getType(), Extract: false, Ops, E, name: "vfcvtn");
7713 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
7714 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_fcvtn,
7715 Ty0: llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 8),
7716 Ty1: llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 4), Extract: false, Ops,
7717 E, name: "vfcvtn");
7718 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
7719 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_fcvtn,
7720 Ty0: llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16),
7721 Ty1: llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8), Extract: false, Ops,
7722 E, name: "vfcvtn");
7723 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
7724 llvm::Type *Ty = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16);
7725 Ops[0] = Builder.CreateInsertVector(DstType: Ty, SrcVec: PoisonValue::get(T: Ty), SubVec: Ops[0],
7726 Idx: uint64_t(0));
7727 return EmitFP8NeonCvtCall(IID: Intrinsic::aarch64_neon_fp8_fcvtn2, Ty0: Ty,
7728 Ty1: Ops[1]->getType(), Extract: false, Ops, E, name: "vfcvtn2");
7729 }
7730
7731 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
7732 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
7733 return EmitFP8NeonFDOTCall(IID: Intrinsic::aarch64_neon_fp8_fdot2, ExtendLaneArg: false, RetTy: HalfTy,
7734 Ops, E, name: "fdot2");
7735 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
7736 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
7737 ExtendLaneArg = true;
7738 [[fallthrough]];
7739 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
7740 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
7741 return EmitFP8NeonFDOTCall(IID: Intrinsic::aarch64_neon_fp8_fdot2_lane,
7742 ExtendLaneArg, RetTy: HalfTy, Ops, E, name: "fdot2_lane");
7743 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
7744 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
7745 return EmitFP8NeonFDOTCall(IID: Intrinsic::aarch64_neon_fp8_fdot4, ExtendLaneArg: false,
7746 RetTy: FloatTy, Ops, E, name: "fdot4");
7747 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
7748 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
7749 ExtendLaneArg = true;
7750 [[fallthrough]];
7751 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
7752 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
7753 return EmitFP8NeonFDOTCall(IID: Intrinsic::aarch64_neon_fp8_fdot4_lane,
7754 ExtendLaneArg, RetTy: FloatTy, Ops, E, name: "fdot4_lane");
7755
7756 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
7757 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlalb,
7758 Tys: {llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8)}, Ops, E,
7759 name: "vmlal");
7760 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
7761 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlalt,
7762 Tys: {llvm::FixedVectorType::get(ElementType: HalfTy, NumElts: 8)}, Ops, E,
7763 name: "vmlal");
7764 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
7765 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlallbb,
7766 Tys: {llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4)}, Ops, E,
7767 name: "vmlall");
7768 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
7769 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlallbt,
7770 Tys: {llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4)}, Ops, E,
7771 name: "vmlall");
7772 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
7773 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlalltb,
7774 Tys: {llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4)}, Ops, E,
7775 name: "vmlall");
7776 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
7777 return EmitFP8NeonCall(IID: Intrinsic::aarch64_neon_fp8_fmlalltt,
7778 Tys: {llvm::FixedVectorType::get(ElementType: FloatTy, NumElts: 4)}, Ops, E,
7779 name: "vmlall");
7780 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
7781 ExtendLaneArg = true;
7782 [[fallthrough]];
7783 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
7784 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlalb_lane,
7785 ExtendLaneArg, RetTy: HalfTy, Ops, E, name: "vmlal_lane");
7786 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
7787 ExtendLaneArg = true;
7788 [[fallthrough]];
7789 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
7790 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlalt_lane,
7791 ExtendLaneArg, RetTy: HalfTy, Ops, E, name: "vmlal_lane");
7792 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
7793 ExtendLaneArg = true;
7794 [[fallthrough]];
7795 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
7796 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlallbb_lane,
7797 ExtendLaneArg, RetTy: FloatTy, Ops, E, name: "vmlall_lane");
7798 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
7799 ExtendLaneArg = true;
7800 [[fallthrough]];
7801 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
7802 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlallbt_lane,
7803 ExtendLaneArg, RetTy: FloatTy, Ops, E, name: "vmlall_lane");
7804 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
7805 ExtendLaneArg = true;
7806 [[fallthrough]];
7807 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
7808 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlalltb_lane,
7809 ExtendLaneArg, RetTy: FloatTy, Ops, E, name: "vmlall_lane");
7810 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
7811 ExtendLaneArg = true;
7812 [[fallthrough]];
7813 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
7814 return EmitFP8NeonFMLACall(IID: Intrinsic::aarch64_neon_fp8_fmlalltt_lane,
7815 ExtendLaneArg, RetTy: FloatTy, Ops, E, name: "vmlall_lane");
7816 case NEON::BI__builtin_neon_vamin_f16:
7817 case NEON::BI__builtin_neon_vaminq_f16:
7818 case NEON::BI__builtin_neon_vamin_f32:
7819 case NEON::BI__builtin_neon_vaminq_f32:
7820 case NEON::BI__builtin_neon_vaminq_f64: {
7821 Int = Intrinsic::aarch64_neon_famin;
7822 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "famin");
7823 }
7824 case NEON::BI__builtin_neon_vamax_f16:
7825 case NEON::BI__builtin_neon_vamaxq_f16:
7826 case NEON::BI__builtin_neon_vamax_f32:
7827 case NEON::BI__builtin_neon_vamaxq_f32:
7828 case NEON::BI__builtin_neon_vamaxq_f64: {
7829 Int = Intrinsic::aarch64_neon_famax;
7830 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "famax");
7831 }
7832 case NEON::BI__builtin_neon_vscale_f16:
7833 case NEON::BI__builtin_neon_vscaleq_f16:
7834 case NEON::BI__builtin_neon_vscale_f32:
7835 case NEON::BI__builtin_neon_vscaleq_f32:
7836 case NEON::BI__builtin_neon_vscaleq_f64: {
7837 Int = Intrinsic::aarch64_neon_fp8_fscale;
7838 return EmitNeonCall(F: CGM.getIntrinsic(IID: Int, Tys: Ty), Ops, name: "fscale");
7839 }
7840 }
7841}
7842
7843Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
7844 const CallExpr *E) {
7845 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
7846 BuiltinID == BPF::BI__builtin_btf_type_id ||
7847 BuiltinID == BPF::BI__builtin_preserve_type_info ||
7848 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
7849 "unexpected BPF builtin");
7850
7851 // A sequence number, injected into IR builtin functions, to
7852 // prevent CSE given the only difference of the function
7853 // may just be the debuginfo metadata.
7854 static uint32_t BuiltinSeqNum;
7855
7856 switch (BuiltinID) {
7857 default:
7858 llvm_unreachable("Unexpected BPF builtin");
7859 case BPF::BI__builtin_preserve_field_info: {
7860 const Expr *Arg = E->getArg(Arg: 0);
7861 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
7862
7863 if (!getDebugInfo()) {
7864 CGM.Error(loc: E->getExprLoc(),
7865 error: "using __builtin_preserve_field_info() without -g");
7866 return IsBitField ? EmitLValue(E: Arg).getRawBitFieldPointer(CGF&: *this)
7867 : EmitLValue(E: Arg).emitRawPointer(CGF&: *this);
7868 }
7869
7870 // Enable underlying preserve_*_access_index() generation.
7871 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
7872 IsInPreservedAIRegion = true;
7873 Value *FieldAddr = IsBitField ? EmitLValue(E: Arg).getRawBitFieldPointer(CGF&: *this)
7874 : EmitLValue(E: Arg).emitRawPointer(CGF&: *this);
7875 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
7876
7877 ConstantInt *C = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
7878 Value *InfoKind = ConstantInt::get(Ty: Int64Ty, V: C->getSExtValue());
7879
7880 // Built the IR for the preserve_field_info intrinsic.
7881 llvm::Function *FnGetFieldInfo = Intrinsic::getOrInsertDeclaration(
7882 M: &CGM.getModule(), id: Intrinsic::bpf_preserve_field_info,
7883 Tys: {FieldAddr->getType()});
7884 return Builder.CreateCall(Callee: FnGetFieldInfo, Args: {FieldAddr, InfoKind});
7885 }
7886 case BPF::BI__builtin_btf_type_id:
7887 case BPF::BI__builtin_preserve_type_info: {
7888 if (!getDebugInfo()) {
7889 CGM.Error(loc: E->getExprLoc(), error: "using builtin function without -g");
7890 return nullptr;
7891 }
7892
7893 const Expr *Arg0 = E->getArg(Arg: 0);
7894 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7895 Ty: Arg0->getType(), Loc: Arg0->getExprLoc());
7896
7897 ConstantInt *Flag = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
7898 Value *FlagValue = ConstantInt::get(Ty: Int64Ty, V: Flag->getSExtValue());
7899 Value *SeqNumVal = ConstantInt::get(Ty: Int32Ty, V: BuiltinSeqNum++);
7900
7901 llvm::Function *FnDecl;
7902 if (BuiltinID == BPF::BI__builtin_btf_type_id)
7903 FnDecl = Intrinsic::getOrInsertDeclaration(
7904 M: &CGM.getModule(), id: Intrinsic::bpf_btf_type_id, Tys: {});
7905 else
7906 FnDecl = Intrinsic::getOrInsertDeclaration(
7907 M: &CGM.getModule(), id: Intrinsic::bpf_preserve_type_info, Tys: {});
7908 CallInst *Fn = Builder.CreateCall(Callee: FnDecl, Args: {SeqNumVal, FlagValue});
7909 Fn->setMetadata(KindID: LLVMContext::MD_preserve_access_index, Node: DbgInfo);
7910 return Fn;
7911 }
7912 case BPF::BI__builtin_preserve_enum_value: {
7913 if (!getDebugInfo()) {
7914 CGM.Error(loc: E->getExprLoc(), error: "using builtin function without -g");
7915 return nullptr;
7916 }
7917
7918 const Expr *Arg0 = E->getArg(Arg: 0);
7919 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
7920 Ty: Arg0->getType(), Loc: Arg0->getExprLoc());
7921
7922 // Find enumerator
7923 const auto *UO = cast<UnaryOperator>(Val: Arg0->IgnoreParens());
7924 const auto *CE = cast<CStyleCastExpr>(Val: UO->getSubExpr());
7925 const auto *DR = cast<DeclRefExpr>(Val: CE->getSubExpr());
7926 const auto *Enumerator = cast<EnumConstantDecl>(Val: DR->getDecl());
7927
7928 auto InitVal = Enumerator->getInitVal();
7929 std::string InitValStr;
7930 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
7931 InitValStr = std::to_string(val: InitVal.getSExtValue());
7932 else
7933 InitValStr = std::to_string(val: InitVal.getZExtValue());
7934 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
7935 Value *EnumStrVal = Builder.CreateGlobalString(Str: EnumStr);
7936
7937 ConstantInt *Flag = cast<ConstantInt>(Val: EmitScalarExpr(E: E->getArg(Arg: 1)));
7938 Value *FlagValue = ConstantInt::get(Ty: Int64Ty, V: Flag->getSExtValue());
7939 Value *SeqNumVal = ConstantInt::get(Ty: Int32Ty, V: BuiltinSeqNum++);
7940
7941 llvm::Function *IntrinsicFn = Intrinsic::getOrInsertDeclaration(
7942 M: &CGM.getModule(), id: Intrinsic::bpf_preserve_enum_value, Tys: {});
7943 CallInst *Fn =
7944 Builder.CreateCall(Callee: IntrinsicFn, Args: {SeqNumVal, EnumStrVal, FlagValue});
7945 Fn->setMetadata(KindID: LLVMContext::MD_preserve_access_index, Node: DbgInfo);
7946 return Fn;
7947 }
7948 }
7949}
7950
7951llvm::Value *CodeGenFunction::
7952BuildVector(ArrayRef<llvm::Value*> Ops) {
7953 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7954 "Not a power-of-two sized vector!");
7955 bool AllConstants = true;
7956 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
7957 AllConstants &= isa<Constant>(Val: Ops[i]);
7958
7959 // If this is a constant vector, create a ConstantVector.
7960 if (AllConstants) {
7961 SmallVector<llvm::Constant*, 16> CstOps;
7962 for (llvm::Value *Op : Ops)
7963 CstOps.push_back(Elt: cast<Constant>(Val: Op));
7964 return llvm::ConstantVector::get(V: CstOps);
7965 }
7966
7967 // Otherwise, insertelement the values to build the vector.
7968 Value *Result = llvm::PoisonValue::get(
7969 T: llvm::FixedVectorType::get(ElementType: Ops[0]->getType(), NumElts: Ops.size()));
7970
7971 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7972 Result = Builder.CreateInsertElement(Vec: Result, NewElt: Ops[i], Idx: Builder.getInt64(C: i));
7973
7974 return Result;
7975}
7976
7977Value *CodeGenFunction::EmitAArch64CpuInit() {
7978 llvm::FunctionType *FTy = llvm::FunctionType::get(Result: VoidTy, isVarArg: false);
7979 llvm::FunctionCallee Func =
7980 CGM.CreateRuntimeFunction(Ty: FTy, Name: "__init_cpu_features_resolver");
7981 cast<llvm::GlobalValue>(Val: Func.getCallee())->setDSOLocal(true);
7982 cast<llvm::GlobalValue>(Val: Func.getCallee())
7983 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
7984 return Builder.CreateCall(Callee: Func);
7985}
7986
7987Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
7988 const Expr *ArgExpr = E->getArg(Arg: 0)->IgnoreParenCasts();
7989 StringRef ArgStr = cast<StringLiteral>(Val: ArgExpr)->getString();
7990 llvm::SmallVector<StringRef, 8> Features;
7991 ArgStr.split(A&: Features, Separator: "+");
7992 for (auto &Feature : Features) {
7993 Feature = Feature.trim();
7994 if (!llvm::AArch64::parseFMVExtension(Extension: Feature))
7995 return Builder.getFalse();
7996 if (Feature != "default")
7997 Features.push_back(Elt: Feature);
7998 }
7999 return EmitAArch64CpuSupports(FeatureStrs: Features);
8000}
8001
8002llvm::Value *
8003CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
8004 llvm::APInt FeaturesMask = llvm::AArch64::getCpuSupportsMask(Features: FeaturesStrs);
8005 Value *Result = Builder.getTrue();
8006 if (FeaturesMask != 0) {
8007 // Get features from structure in runtime library
8008 // struct {
8009 // unsigned long long features;
8010 // } __aarch64_cpu_features;
8011 llvm::Type *STy = llvm::StructType::get(elt1: Int64Ty);
8012 llvm::Constant *AArch64CPUFeatures =
8013 CGM.CreateRuntimeVariable(Ty: STy, Name: "__aarch64_cpu_features");
8014 cast<llvm::GlobalValue>(Val: AArch64CPUFeatures)->setDSOLocal(true);
8015 llvm::Value *CpuFeatures = Builder.CreateGEP(
8016 Ty: STy, Ptr: AArch64CPUFeatures,
8017 IdxList: {ConstantInt::get(Ty: Int32Ty, V: 0), ConstantInt::get(Ty: Int32Ty, V: 0)});
8018 Value *Features = Builder.CreateAlignedLoad(Ty: Int64Ty, Addr: CpuFeatures,
8019 Align: CharUnits::fromQuantity(Quantity: 8));
8020 Value *Mask = Builder.getInt(AI: FeaturesMask.trunc(width: 64));
8021 Value *Bitset = Builder.CreateAnd(LHS: Features, RHS: Mask);
8022 Value *Cmp = Builder.CreateICmpEQ(LHS: Bitset, RHS: Mask);
8023 Result = Builder.CreateAnd(LHS: Result, RHS: Cmp);
8024 }
8025 return Result;
8026}
8027