1//===--- InterpBuiltin.cpp - Interpreter for the constexpr VM ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#include "../ExprConstShared.h"
9#include "Boolean.h"
10#include "EvalEmitter.h"
11#include "InterpBuiltinBitCast.h"
12#include "InterpHelpers.h"
13#include "PrimType.h"
14#include "Program.h"
15#include "clang/AST/InferAlloc.h"
16#include "clang/AST/OSLog.h"
17#include "clang/AST/RecordLayout.h"
18#include "clang/Basic/Builtins.h"
19#include "clang/Basic/TargetBuiltins.h"
20#include "clang/Basic/TargetInfo.h"
21#include "llvm/ADT/StringExtras.h"
22#include "llvm/Support/AllocToken.h"
23#include "llvm/Support/ErrorHandling.h"
24#include "llvm/Support/SipHash.h"
25
26namespace clang {
27namespace interp {
28
29[[maybe_unused]] static bool isNoopBuiltin(unsigned ID) {
30 switch (ID) {
31 case Builtin::BIas_const:
32 case Builtin::BIforward:
33 case Builtin::BIforward_like:
34 case Builtin::BImove:
35 case Builtin::BImove_if_noexcept:
36 case Builtin::BIaddressof:
37 case Builtin::BI__addressof:
38 case Builtin::BI__builtin_addressof:
39 case Builtin::BI__builtin_launder:
40 return true;
41 default:
42 return false;
43 }
44 return false;
45}
46
47static void discard(InterpStack &Stk, PrimType T) {
48 TYPE_SWITCH(T, { Stk.discard<T>(); });
49}
50
51static uint64_t popToUInt64(const InterpState &S, const Expr *E) {
52 INT_TYPE_SWITCH(*S.getContext().classify(E->getType()),
53 return static_cast<uint64_t>(S.Stk.pop<T>()));
54}
55
56static APSInt popToAPSInt(InterpStack &Stk, PrimType T) {
57 INT_TYPE_SWITCH(T, return Stk.pop<T>().toAPSInt());
58}
59
60static APSInt popToAPSInt(InterpState &S, const Expr *E) {
61 return popToAPSInt(Stk&: S.Stk, T: *S.getContext().classify(T: E->getType()));
62}
63static APSInt popToAPSInt(InterpState &S, QualType T) {
64 return popToAPSInt(Stk&: S.Stk, T: *S.getContext().classify(T));
65}
66
67/// Check for common reasons a pointer can't be read from, which
68/// are usually not diagnosed in a builtin function.
69static bool isReadable(const Pointer &P) {
70 if (P.isDummy())
71 return false;
72 if (!P.isBlockPointer())
73 return false;
74 if (!P.isLive())
75 return false;
76 if (P.isOnePastEnd())
77 return false;
78 return true;
79}
80
81/// Pushes \p Val on the stack as the type given by \p QT.
82static void pushInteger(InterpState &S, const APSInt &Val, QualType QT) {
83 assert(QT->isSignedIntegerOrEnumerationType() ||
84 QT->isUnsignedIntegerOrEnumerationType());
85 OptPrimType T = S.getContext().classify(T: QT);
86 assert(T);
87 unsigned BitWidth = S.getASTContext().getIntWidth(T: QT);
88
89 if (T == PT_IntAPS) {
90 auto Result = S.allocAP<IntegralAP<true>>(BitWidth);
91 Result.copy(V: Val);
92 S.Stk.push<IntegralAP<true>>(Args&: Result);
93 return;
94 }
95
96 if (T == PT_IntAP) {
97 auto Result = S.allocAP<IntegralAP<false>>(BitWidth);
98 Result.copy(V: Val);
99 S.Stk.push<IntegralAP<false>>(Args&: Result);
100 return;
101 }
102
103 if (QT->isSignedIntegerOrEnumerationType()) {
104 int64_t V = Val.getSExtValue();
105 INT_TYPE_SWITCH(*T, { S.Stk.push<T>(T::from(V, BitWidth)); });
106 } else {
107 assert(QT->isUnsignedIntegerOrEnumerationType());
108 uint64_t V = Val.getZExtValue();
109 INT_TYPE_SWITCH(*T, { S.Stk.push<T>(T::from(V, BitWidth)); });
110 }
111}
112
113template <typename T>
114static void pushInteger(InterpState &S, T Val, QualType QT) {
115 if constexpr (std::is_same_v<T, APInt>)
116 pushInteger(S, Val: APSInt(Val, !std::is_signed_v<T>), QT);
117 else if constexpr (std::is_same_v<T, APSInt>)
118 pushInteger(S, Val, QT);
119 else
120 pushInteger(S,
121 Val: APSInt(APInt(sizeof(T) * 8, static_cast<uint64_t>(Val),
122 std::is_signed_v<T>),
123 !std::is_signed_v<T>),
124 QT);
125}
126
127static void assignInteger(InterpState &S, const Pointer &Dest, PrimType ValueT,
128 const APSInt &Value) {
129
130 if (ValueT == PT_IntAPS) {
131 Dest.deref<IntegralAP<true>>() =
132 S.allocAP<IntegralAP<true>>(BitWidth: Value.getBitWidth());
133 Dest.deref<IntegralAP<true>>().copy(V: Value);
134 } else if (ValueT == PT_IntAP) {
135 Dest.deref<IntegralAP<false>>() =
136 S.allocAP<IntegralAP<false>>(BitWidth: Value.getBitWidth());
137 Dest.deref<IntegralAP<false>>().copy(V: Value);
138 } else {
139 INT_TYPE_SWITCH_NO_BOOL(
140 ValueT, { Dest.deref<T>() = T::from(static_cast<T>(Value)); });
141 }
142}
143
144static QualType getElemType(const Pointer &P) {
145 const Descriptor *Desc = P.getFieldDesc();
146 QualType T = Desc->getType();
147 if (Desc->isPrimitive())
148 return T;
149 if (T->isPointerType())
150 return T->castAs<PointerType>()->getPointeeType();
151 if (Desc->isArray())
152 return Desc->getElemQualType();
153 if (const auto *AT = T->getAsArrayTypeUnsafe())
154 return AT->getElementType();
155 return T;
156}
157
158static void diagnoseNonConstexprBuiltin(InterpState &S, CodePtr OpPC,
159 unsigned ID) {
160 if (!S.diagnosing())
161 return;
162
163 auto Loc = S.Current->getSource(PC: OpPC);
164 if (S.getLangOpts().CPlusPlus11)
165 S.CCEDiag(SI: Loc, DiagId: diag::note_constexpr_invalid_function)
166 << /*isConstexpr=*/0 << /*isConstructor=*/0
167 << S.getASTContext().BuiltinInfo.getQuotedName(ID);
168 else
169 S.CCEDiag(SI: Loc, DiagId: diag::note_invalid_subexpr_in_const_expr);
170}
171
172static llvm::APSInt convertBoolVectorToInt(const Pointer &Val) {
173 assert(Val.getFieldDesc()->isPrimitiveArray() &&
174 Val.getFieldDesc()->getElemQualType()->isBooleanType() &&
175 "Not a boolean vector");
176 unsigned NumElems = Val.getNumElems();
177
178 // Each element is one bit, so create an integer with NumElts bits.
179 llvm::APSInt Result(NumElems, 0);
180 for (unsigned I = 0; I != NumElems; ++I) {
181 if (Val.elem<bool>(I))
182 Result.setBit(I);
183 }
184
185 return Result;
186}
187
188// Strict double -> float conversion used for X86 PD2PS/cvtsd2ss intrinsics.
189// Reject NaN/Inf/Subnormal inputs and any lossy/inexact conversions.
190static bool convertDoubleToFloatStrict(APFloat Src, Floating &Dst,
191 InterpState &S, const Expr *DiagExpr) {
192 if (Src.isInfinity()) {
193 if (S.diagnosing())
194 S.CCEDiag(E: DiagExpr, DiagId: diag::note_constexpr_float_arithmetic) << 0;
195 return false;
196 }
197 if (Src.isNaN()) {
198 if (S.diagnosing())
199 S.CCEDiag(E: DiagExpr, DiagId: diag::note_constexpr_float_arithmetic) << 1;
200 return false;
201 }
202 APFloat Val = Src;
203 bool LosesInfo = false;
204 APFloat::opStatus Status = Val.convert(
205 ToSemantics: APFloat::IEEEsingle(), RM: APFloat::rmNearestTiesToEven, losesInfo: &LosesInfo);
206 if (LosesInfo || Val.isDenormal()) {
207 if (S.diagnosing())
208 S.CCEDiag(E: DiagExpr, DiagId: diag::note_constexpr_float_arithmetic_strict);
209 return false;
210 }
211 if (Status != APFloat::opOK) {
212 if (S.diagnosing())
213 S.CCEDiag(E: DiagExpr, DiagId: diag::note_invalid_subexpr_in_const_expr);
214 return false;
215 }
216 Dst.copy(F: Val);
217 return true;
218}
219
220static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC,
221 const InterpFrame *Frame,
222 const CallExpr *Call) {
223 unsigned Depth = S.Current->getDepth();
224 auto isStdCall = [](const FunctionDecl *F) -> bool {
225 return F && F->isInStdNamespace() && F->getIdentifier() &&
226 F->getIdentifier()->isStr(Str: "is_constant_evaluated");
227 };
228 const InterpFrame *Caller = Frame->Caller;
229 // The current frame is the one for __builtin_is_constant_evaluated.
230 // The one above that, potentially the one for std::is_constant_evaluated().
231 if (S.inConstantContext() && !S.checkingPotentialConstantExpression() &&
232 S.getEvalStatus().Diag &&
233 (Depth == 0 || (Depth == 1 && isStdCall(Frame->getCallee())))) {
234 if (Caller && isStdCall(Frame->getCallee())) {
235 const Expr *E = Caller->getExpr(PC: Caller->getRetPC());
236 S.report(Loc: E->getExprLoc(),
237 DiagId: diag::warn_is_constant_evaluated_always_true_constexpr)
238 << "std::is_constant_evaluated" << E->getSourceRange();
239 } else {
240 S.report(Loc: Call->getExprLoc(),
241 DiagId: diag::warn_is_constant_evaluated_always_true_constexpr)
242 << "__builtin_is_constant_evaluated" << Call->getSourceRange();
243 }
244 }
245
246 S.Stk.push<Boolean>(Args: Boolean::from(Value: S.inConstantContext()));
247 return true;
248}
249
250// __builtin_assume
251// __assume (MS extension)
252static bool interp__builtin_assume(InterpState &S, CodePtr OpPC,
253 const InterpFrame *Frame,
254 const CallExpr *Call) {
255 // Nothing to be done here since the argument is NOT evaluated.
256 assert(Call->getNumArgs() == 1);
257 return true;
258}
259
260static bool interp__builtin_strcmp(InterpState &S, CodePtr OpPC,
261 const InterpFrame *Frame,
262 const CallExpr *Call, unsigned ID) {
263 uint64_t Limit = ~static_cast<uint64_t>(0);
264 if (ID == Builtin::BIstrncmp || ID == Builtin::BI__builtin_strncmp ||
265 ID == Builtin::BIwcsncmp || ID == Builtin::BI__builtin_wcsncmp)
266 Limit = popToUInt64(S, E: Call->getArg(Arg: 2));
267
268 const Pointer &B = S.Stk.pop<Pointer>();
269 const Pointer &A = S.Stk.pop<Pointer>();
270 if (ID == Builtin::BIstrcmp || ID == Builtin::BIstrncmp ||
271 ID == Builtin::BIwcscmp || ID == Builtin::BIwcsncmp)
272 diagnoseNonConstexprBuiltin(S, OpPC, ID);
273
274 if (Limit == 0) {
275 pushInteger(S, Val: 0, QT: Call->getType());
276 return true;
277 }
278
279 if (!CheckLive(S, OpPC, Ptr: A, AK: AK_Read) || !CheckLive(S, OpPC, Ptr: B, AK: AK_Read))
280 return false;
281
282 if (A.isDummy() || B.isDummy())
283 return false;
284 if (!A.isBlockPointer() || !B.isBlockPointer())
285 return false;
286 if (!A.getFieldDesc()->isPrimitiveArray() ||
287 !B.getFieldDesc()->isPrimitiveArray())
288 return false;
289
290 bool IsWide = ID == Builtin::BIwcscmp || ID == Builtin::BIwcsncmp ||
291 ID == Builtin::BI__builtin_wcscmp ||
292 ID == Builtin::BI__builtin_wcsncmp;
293 assert(A.getFieldDesc()->isPrimitiveArray());
294 assert(B.getFieldDesc()->isPrimitiveArray());
295
296 // Different element types shouldn't happen, but with casts they can.
297 if (!S.getASTContext().hasSameUnqualifiedType(T1: getElemType(P: A), T2: getElemType(P: B)))
298 return false;
299
300 PrimType ElemT = *S.getContext().classify(T: getElemType(P: A));
301
302 auto returnResult = [&](int V) -> bool {
303 pushInteger(S, Val: V, QT: Call->getType());
304 return true;
305 };
306
307 unsigned IndexA = A.getIndex();
308 unsigned IndexB = B.getIndex();
309 uint64_t Steps = 0;
310 for (;; ++IndexA, ++IndexB, ++Steps) {
311
312 if (Steps >= Limit)
313 break;
314 const Pointer &PA = A.atIndex(Idx: IndexA);
315 const Pointer &PB = B.atIndex(Idx: IndexB);
316 if (!CheckRange(S, OpPC, Ptr: PA, AK: AK_Read) ||
317 !CheckRange(S, OpPC, Ptr: PB, AK: AK_Read)) {
318 return false;
319 }
320
321 if (IsWide) {
322 INT_TYPE_SWITCH(ElemT, {
323 T CA = PA.deref<T>();
324 T CB = PB.deref<T>();
325 if (CA > CB)
326 return returnResult(1);
327 if (CA < CB)
328 return returnResult(-1);
329 if (CA.isZero() || CB.isZero())
330 return returnResult(0);
331 });
332 continue;
333 }
334
335 uint8_t CA = PA.deref<uint8_t>();
336 uint8_t CB = PB.deref<uint8_t>();
337
338 if (CA > CB)
339 return returnResult(1);
340 if (CA < CB)
341 return returnResult(-1);
342 if (CA == 0 || CB == 0)
343 return returnResult(0);
344 }
345
346 return returnResult(0);
347}
348
349static bool interp__builtin_strlen(InterpState &S, CodePtr OpPC,
350 const InterpFrame *Frame,
351 const CallExpr *Call, unsigned ID) {
352 const Pointer &StrPtr = S.Stk.pop<Pointer>().expand();
353
354 if (ID == Builtin::BIstrlen || ID == Builtin::BIwcslen)
355 diagnoseNonConstexprBuiltin(S, OpPC, ID);
356
357 if (!CheckArray(S, OpPC, Ptr: StrPtr))
358 return false;
359
360 if (!CheckLive(S, OpPC, Ptr: StrPtr, AK: AK_Read))
361 return false;
362
363 if (!StrPtr.isBlockPointer())
364 return false;
365
366 if (!CheckDummy(S, OpPC, B: StrPtr.block(), AK: AK_Read))
367 return false;
368
369 if (!StrPtr.getFieldDesc()->isPrimitiveArray())
370 return false;
371
372 assert(StrPtr.getFieldDesc()->isPrimitiveArray());
373 unsigned ElemSize = StrPtr.getFieldDesc()->getElemSize();
374 if (ElemSize != 1 && ElemSize != 2 && ElemSize != 4)
375 return Invalid(S, OpPC);
376
377 if (ID == Builtin::BI__builtin_wcslen || ID == Builtin::BIwcslen) {
378 const ASTContext &AC = S.getASTContext();
379 unsigned WCharSize = AC.getTypeSizeInChars(T: AC.getWCharType()).getQuantity();
380 if (ElemSize != WCharSize)
381 return false;
382 }
383
384 size_t Len = 0;
385 for (size_t I = StrPtr.getIndex();; ++I, ++Len) {
386 const Pointer &ElemPtr = StrPtr.atIndex(Idx: I);
387
388 if (!CheckRange(S, OpPC, Ptr: ElemPtr, AK: AK_Read))
389 return false;
390
391 uint32_t Val;
392 switch (ElemSize) {
393 case 1:
394 Val = ElemPtr.deref<uint8_t>();
395 break;
396 case 2:
397 Val = ElemPtr.deref<uint16_t>();
398 break;
399 case 4:
400 Val = ElemPtr.deref<uint32_t>();
401 break;
402 default:
403 llvm_unreachable("Unsupported char size");
404 }
405 if (Val == 0)
406 break;
407 }
408
409 pushInteger(S, Val: Len, QT: Call->getType());
410
411 return true;
412}
413
414static bool interp__builtin_nan(InterpState &S, CodePtr OpPC,
415 const InterpFrame *Frame, const CallExpr *Call,
416 bool Signaling) {
417 const Pointer &Arg = S.Stk.pop<Pointer>();
418
419 if (!CheckLoad(S, OpPC, Ptr: Arg))
420 return false;
421
422 if (!Arg.getFieldDesc()->isPrimitiveArray())
423 return Invalid(S, OpPC);
424
425 // Convert the given string to an integer using StringRef's API.
426 llvm::APInt Fill;
427 std::string Str;
428 unsigned ArgLength = Arg.getNumElems();
429 bool FoundZero = false;
430 for (unsigned I = 0; I != ArgLength; ++I) {
431 if (!Arg.isElementInitialized(Index: I))
432 return false;
433
434 if (Arg.elem<int8_t>(I) == 0) {
435 FoundZero = true;
436 break;
437 }
438 Str += Arg.elem<char>(I);
439 }
440
441 // If we didn't find a NUL byte, diagnose as a one-past-the-end read.
442 if (!FoundZero)
443 return CheckRange(S, OpPC, Ptr: Arg.atIndex(Idx: ArgLength), AK: AK_Read);
444
445 // Treat empty strings as if they were zero.
446 if (Str.empty())
447 Fill = llvm::APInt(32, 0);
448 else if (StringRef(Str).getAsInteger(Radix: 0, Result&: Fill))
449 return false;
450
451 const llvm::fltSemantics &TargetSemantics =
452 S.getASTContext().getFloatTypeSemantics(
453 T: Call->getDirectCallee()->getReturnType());
454
455 Floating Result = S.allocFloat(Sem: TargetSemantics);
456 if (S.getASTContext().getTargetInfo().isNan2008()) {
457 if (Signaling)
458 Result.copy(
459 F: llvm::APFloat::getSNaN(Sem: TargetSemantics, /*Negative=*/false, payload: &Fill));
460 else
461 Result.copy(
462 F: llvm::APFloat::getQNaN(Sem: TargetSemantics, /*Negative=*/false, payload: &Fill));
463 } else {
464 // Prior to IEEE 754-2008, architectures were allowed to choose whether
465 // the first bit of their significand was set for qNaN or sNaN. MIPS chose
466 // a different encoding to what became a standard in 2008, and for pre-
467 // 2008 revisions, MIPS interpreted sNaN-2008 as qNan and qNaN-2008 as
468 // sNaN. This is now known as "legacy NaN" encoding.
469 if (Signaling)
470 Result.copy(
471 F: llvm::APFloat::getQNaN(Sem: TargetSemantics, /*Negative=*/false, payload: &Fill));
472 else
473 Result.copy(
474 F: llvm::APFloat::getSNaN(Sem: TargetSemantics, /*Negative=*/false, payload: &Fill));
475 }
476
477 S.Stk.push<Floating>(Args&: Result);
478 return true;
479}
480
481static bool interp__builtin_inf(InterpState &S, CodePtr OpPC,
482 const InterpFrame *Frame,
483 const CallExpr *Call) {
484 const llvm::fltSemantics &TargetSemantics =
485 S.getASTContext().getFloatTypeSemantics(
486 T: Call->getDirectCallee()->getReturnType());
487
488 Floating Result = S.allocFloat(Sem: TargetSemantics);
489 Result.copy(F: APFloat::getInf(Sem: TargetSemantics));
490 S.Stk.push<Floating>(Args&: Result);
491 return true;
492}
493
494static bool interp__builtin_copysign(InterpState &S, CodePtr OpPC,
495 const InterpFrame *Frame) {
496 const Floating &Arg2 = S.Stk.pop<Floating>();
497 const Floating &Arg1 = S.Stk.pop<Floating>();
498 Floating Result = S.allocFloat(Sem: Arg1.getSemantics());
499
500 APFloat Copy = Arg1.getAPFloat();
501 Copy.copySign(RHS: Arg2.getAPFloat());
502 Result.copy(F: Copy);
503 S.Stk.push<Floating>(Args&: Result);
504
505 return true;
506}
507
508static bool interp__builtin_fmin(InterpState &S, CodePtr OpPC,
509 const InterpFrame *Frame, bool IsNumBuiltin) {
510 const Floating &RHS = S.Stk.pop<Floating>();
511 const Floating &LHS = S.Stk.pop<Floating>();
512 Floating Result = S.allocFloat(Sem: LHS.getSemantics());
513
514 if (IsNumBuiltin)
515 Result.copy(F: llvm::minimumnum(A: LHS.getAPFloat(), B: RHS.getAPFloat()));
516 else
517 Result.copy(F: minnum(A: LHS.getAPFloat(), B: RHS.getAPFloat()));
518 S.Stk.push<Floating>(Args&: Result);
519 return true;
520}
521
522static bool interp__builtin_fmax(InterpState &S, CodePtr OpPC,
523 const InterpFrame *Frame, bool IsNumBuiltin) {
524 const Floating &RHS = S.Stk.pop<Floating>();
525 const Floating &LHS = S.Stk.pop<Floating>();
526 Floating Result = S.allocFloat(Sem: LHS.getSemantics());
527
528 if (IsNumBuiltin)
529 Result.copy(F: llvm::maximumnum(A: LHS.getAPFloat(), B: RHS.getAPFloat()));
530 else
531 Result.copy(F: maxnum(A: LHS.getAPFloat(), B: RHS.getAPFloat()));
532 S.Stk.push<Floating>(Args&: Result);
533 return true;
534}
535
536/// Defined as __builtin_isnan(...), to accommodate the fact that it can
537/// take a float, double, long double, etc.
538/// But for us, that's all a Floating anyway.
539static bool interp__builtin_isnan(InterpState &S, CodePtr OpPC,
540 const InterpFrame *Frame,
541 const CallExpr *Call) {
542 const Floating &Arg = S.Stk.pop<Floating>();
543
544 pushInteger(S, Val: Arg.isNan(), QT: Call->getType());
545 return true;
546}
547
548static bool interp__builtin_issignaling(InterpState &S, CodePtr OpPC,
549 const InterpFrame *Frame,
550 const CallExpr *Call) {
551 const Floating &Arg = S.Stk.pop<Floating>();
552
553 pushInteger(S, Val: Arg.isSignaling(), QT: Call->getType());
554 return true;
555}
556
557static bool interp__builtin_isinf(InterpState &S, CodePtr OpPC,
558 const InterpFrame *Frame, bool CheckSign,
559 const CallExpr *Call) {
560 const Floating &Arg = S.Stk.pop<Floating>();
561 APFloat F = Arg.getAPFloat();
562 bool IsInf = F.isInfinity();
563
564 if (CheckSign)
565 pushInteger(S, Val: IsInf ? (F.isNegative() ? -1 : 1) : 0, QT: Call->getType());
566 else
567 pushInteger(S, Val: IsInf, QT: Call->getType());
568 return true;
569}
570
571static bool interp__builtin_isfinite(InterpState &S, CodePtr OpPC,
572 const InterpFrame *Frame,
573 const CallExpr *Call) {
574 const Floating &Arg = S.Stk.pop<Floating>();
575
576 pushInteger(S, Val: Arg.isFinite(), QT: Call->getType());
577 return true;
578}
579
580static bool interp__builtin_isnormal(InterpState &S, CodePtr OpPC,
581 const InterpFrame *Frame,
582 const CallExpr *Call) {
583 const Floating &Arg = S.Stk.pop<Floating>();
584
585 pushInteger(S, Val: Arg.isNormal(), QT: Call->getType());
586 return true;
587}
588
589static bool interp__builtin_issubnormal(InterpState &S, CodePtr OpPC,
590 const InterpFrame *Frame,
591 const CallExpr *Call) {
592 const Floating &Arg = S.Stk.pop<Floating>();
593
594 pushInteger(S, Val: Arg.isDenormal(), QT: Call->getType());
595 return true;
596}
597
598static bool interp__builtin_iszero(InterpState &S, CodePtr OpPC,
599 const InterpFrame *Frame,
600 const CallExpr *Call) {
601 const Floating &Arg = S.Stk.pop<Floating>();
602
603 pushInteger(S, Val: Arg.isZero(), QT: Call->getType());
604 return true;
605}
606
607static bool interp__builtin_signbit(InterpState &S, CodePtr OpPC,
608 const InterpFrame *Frame,
609 const CallExpr *Call) {
610 const Floating &Arg = S.Stk.pop<Floating>();
611
612 pushInteger(S, Val: Arg.isNegative(), QT: Call->getType());
613 return true;
614}
615
616static bool interp_floating_comparison(InterpState &S, CodePtr OpPC,
617 const CallExpr *Call, unsigned ID) {
618 const Floating &RHS = S.Stk.pop<Floating>();
619 const Floating &LHS = S.Stk.pop<Floating>();
620
621 pushInteger(
622 S,
623 Val: [&] {
624 switch (ID) {
625 case Builtin::BI__builtin_isgreater:
626 return LHS > RHS;
627 case Builtin::BI__builtin_isgreaterequal:
628 return LHS >= RHS;
629 case Builtin::BI__builtin_isless:
630 return LHS < RHS;
631 case Builtin::BI__builtin_islessequal:
632 return LHS <= RHS;
633 case Builtin::BI__builtin_islessgreater: {
634 ComparisonCategoryResult Cmp = LHS.compare(RHS);
635 return Cmp == ComparisonCategoryResult::Less ||
636 Cmp == ComparisonCategoryResult::Greater;
637 }
638 case Builtin::BI__builtin_isunordered:
639 return LHS.compare(RHS) == ComparisonCategoryResult::Unordered;
640 default:
641 llvm_unreachable("Unexpected builtin ID: Should be a floating point "
642 "comparison function");
643 }
644 }(),
645 QT: Call->getType());
646 return true;
647}
648
649/// First parameter to __builtin_isfpclass is the floating value, the
650/// second one is an integral value.
651static bool interp__builtin_isfpclass(InterpState &S, CodePtr OpPC,
652 const InterpFrame *Frame,
653 const CallExpr *Call) {
654 APSInt FPClassArg = popToAPSInt(S, E: Call->getArg(Arg: 1));
655 const Floating &F = S.Stk.pop<Floating>();
656
657 int32_t Result = static_cast<int32_t>(
658 (F.classify() & std::move(FPClassArg)).getZExtValue());
659 pushInteger(S, Val: Result, QT: Call->getType());
660
661 return true;
662}
663
664/// Five int values followed by one floating value.
665/// __builtin_fpclassify(int, int, int, int, int, float)
666static bool interp__builtin_fpclassify(InterpState &S, CodePtr OpPC,
667 const InterpFrame *Frame,
668 const CallExpr *Call) {
669 const Floating &Val = S.Stk.pop<Floating>();
670
671 PrimType IntT = *S.getContext().classify(E: Call->getArg(Arg: 0));
672 APSInt Values[5];
673 for (unsigned I = 0; I != 5; ++I)
674 Values[4 - I] = popToAPSInt(Stk&: S.Stk, T: IntT);
675
676 unsigned Index;
677 switch (Val.getCategory()) {
678 case APFloat::fcNaN:
679 Index = 0;
680 break;
681 case APFloat::fcInfinity:
682 Index = 1;
683 break;
684 case APFloat::fcNormal:
685 Index = Val.isDenormal() ? 3 : 2;
686 break;
687 case APFloat::fcZero:
688 Index = 4;
689 break;
690 }
691
692 // The last argument is first on the stack.
693 assert(Index <= 4);
694
695 pushInteger(S, Val: Values[Index], QT: Call->getType());
696 return true;
697}
698
699static inline Floating abs(InterpState &S, const Floating &In) {
700 if (!In.isNegative())
701 return In;
702
703 Floating Output = S.allocFloat(Sem: In.getSemantics());
704 APFloat New = In.getAPFloat();
705 New.changeSign();
706 Output.copy(F: New);
707 return Output;
708}
709
710// The C standard says "fabs raises no floating-point exceptions,
711// even if x is a signaling NaN. The returned value is independent of
712// the current rounding direction mode." Therefore constant folding can
713// proceed without regard to the floating point settings.
714// Reference, WG14 N2478 F.10.4.3
715static bool interp__builtin_fabs(InterpState &S, CodePtr OpPC,
716 const InterpFrame *Frame) {
717 const Floating &Val = S.Stk.pop<Floating>();
718 S.Stk.push<Floating>(Args: abs(S, In: Val));
719 return true;
720}
721
722static bool interp__builtin_abs(InterpState &S, CodePtr OpPC,
723 const InterpFrame *Frame,
724 const CallExpr *Call) {
725 APSInt Val = popToAPSInt(S, E: Call->getArg(Arg: 0));
726 if (Val ==
727 APSInt(APInt::getSignedMinValue(numBits: Val.getBitWidth()), /*IsUnsigned=*/false))
728 return false;
729 if (Val.isNegative())
730 Val.negate();
731 pushInteger(S, Val, QT: Call->getType());
732 return true;
733}
734
735static bool interp__builtin_popcount(InterpState &S, CodePtr OpPC,
736 const InterpFrame *Frame,
737 const CallExpr *Call) {
738 APSInt Val;
739 if (Call->getArg(Arg: 0)->getType()->isExtVectorBoolType()) {
740 const Pointer &Arg = S.Stk.pop<Pointer>();
741 Val = convertBoolVectorToInt(Val: Arg);
742 } else {
743 Val = popToAPSInt(S, E: Call->getArg(Arg: 0));
744 }
745 pushInteger(S, Val: Val.popcount(), QT: Call->getType());
746 return true;
747}
748
749static bool interp__builtin_ia32_crc32(InterpState &S, CodePtr OpPC,
750 const InterpFrame *Frame,
751 const CallExpr *Call,
752 unsigned DataBytes) {
753 uint64_t DataVal = popToUInt64(S, E: Call->getArg(Arg: 1));
754 uint64_t CRCVal = popToUInt64(S, E: Call->getArg(Arg: 0));
755
756 // CRC32C polynomial (iSCSI polynomial, bit-reversed)
757 static const uint32_t CRC32C_POLY = 0x82F63B78;
758
759 // Process each byte
760 uint32_t Result = static_cast<uint32_t>(CRCVal);
761 for (unsigned I = 0; I != DataBytes; ++I) {
762 uint8_t Byte = static_cast<uint8_t>((DataVal >> (I * 8)) & 0xFF);
763 Result ^= Byte;
764 for (int J = 0; J != 8; ++J) {
765 Result = (Result >> 1) ^ ((Result & 1) ? CRC32C_POLY : 0);
766 }
767 }
768
769 pushInteger(S, Val: Result, QT: Call->getType());
770 return true;
771}
772
773static bool interp__builtin_classify_type(InterpState &S, CodePtr OpPC,
774 const InterpFrame *Frame,
775 const CallExpr *Call) {
776 // This is an unevaluated call, so there are no arguments on the stack.
777 assert(Call->getNumArgs() == 1);
778 const Expr *Arg = Call->getArg(Arg: 0);
779
780 GCCTypeClass ResultClass =
781 EvaluateBuiltinClassifyType(T: Arg->getType(), LangOpts: S.getLangOpts());
782 int32_t ReturnVal = static_cast<int32_t>(ResultClass);
783 pushInteger(S, Val: ReturnVal, QT: Call->getType());
784 return true;
785}
786
787// __builtin_expect(long, long)
788// __builtin_expect_with_probability(long, long, double)
789static bool interp__builtin_expect(InterpState &S, CodePtr OpPC,
790 const InterpFrame *Frame,
791 const CallExpr *Call) {
792 // The return value is simply the value of the first parameter.
793 // We ignore the probability.
794 unsigned NumArgs = Call->getNumArgs();
795 assert(NumArgs == 2 || NumArgs == 3);
796
797 PrimType ArgT = *S.getContext().classify(T: Call->getArg(Arg: 0)->getType());
798 if (NumArgs == 3)
799 S.Stk.discard<Floating>();
800 discard(Stk&: S.Stk, T: ArgT);
801
802 APSInt Val = popToAPSInt(Stk&: S.Stk, T: ArgT);
803 pushInteger(S, Val, QT: Call->getType());
804 return true;
805}
806
807static bool interp__builtin_addressof(InterpState &S, CodePtr OpPC,
808 const InterpFrame *Frame,
809 const CallExpr *Call) {
810#ifndef NDEBUG
811 assert(Call->getArg(0)->isLValue());
812 PrimType PtrT = S.getContext().classify(Call->getArg(0)).value_or(PT_Ptr);
813 assert(PtrT == PT_Ptr &&
814 "Unsupported pointer type passed to __builtin_addressof()");
815#endif
816 return true;
817}
818
819static bool interp__builtin_move(InterpState &S, CodePtr OpPC,
820 const InterpFrame *Frame,
821 const CallExpr *Call) {
822 return Call->getDirectCallee()->isConstexpr();
823}
824
825static bool interp__builtin_eh_return_data_regno(InterpState &S, CodePtr OpPC,
826 const InterpFrame *Frame,
827 const CallExpr *Call) {
828 APSInt Arg = popToAPSInt(S, E: Call->getArg(Arg: 0));
829
830 int Result = S.getASTContext().getTargetInfo().getEHDataRegisterNumber(
831 RegNo: Arg.getZExtValue());
832 pushInteger(S, Val: Result, QT: Call->getType());
833 return true;
834}
835
836// Two integral values followed by a pointer (lhs, rhs, resultOut)
837static bool interp__builtin_overflowop(InterpState &S, CodePtr OpPC,
838 const CallExpr *Call,
839 unsigned BuiltinOp) {
840 const Pointer &ResultPtr = S.Stk.pop<Pointer>();
841 if (ResultPtr.isDummy() || !ResultPtr.isBlockPointer())
842 return false;
843
844 PrimType RHST = *S.getContext().classify(T: Call->getArg(Arg: 1)->getType());
845 PrimType LHST = *S.getContext().classify(T: Call->getArg(Arg: 0)->getType());
846 APSInt RHS = popToAPSInt(Stk&: S.Stk, T: RHST);
847 APSInt LHS = popToAPSInt(Stk&: S.Stk, T: LHST);
848 QualType ResultType = Call->getArg(Arg: 2)->getType()->getPointeeType();
849 PrimType ResultT = *S.getContext().classify(T: ResultType);
850 bool Overflow;
851
852 APSInt Result;
853 if (BuiltinOp == Builtin::BI__builtin_add_overflow ||
854 BuiltinOp == Builtin::BI__builtin_sub_overflow ||
855 BuiltinOp == Builtin::BI__builtin_mul_overflow) {
856 bool IsSigned = LHS.isSigned() || RHS.isSigned() ||
857 ResultType->isSignedIntegerOrEnumerationType();
858 bool AllSigned = LHS.isSigned() && RHS.isSigned() &&
859 ResultType->isSignedIntegerOrEnumerationType();
860 uint64_t LHSSize = LHS.getBitWidth();
861 uint64_t RHSSize = RHS.getBitWidth();
862 uint64_t ResultSize = S.getASTContext().getTypeSize(T: ResultType);
863 uint64_t MaxBits = std::max(a: std::max(a: LHSSize, b: RHSSize), b: ResultSize);
864
865 // Add an additional bit if the signedness isn't uniformly agreed to. We
866 // could do this ONLY if there is a signed and an unsigned that both have
867 // MaxBits, but the code to check that is pretty nasty. The issue will be
868 // caught in the shrink-to-result later anyway.
869 if (IsSigned && !AllSigned)
870 ++MaxBits;
871
872 LHS = APSInt(LHS.extOrTrunc(width: MaxBits), !IsSigned);
873 RHS = APSInt(RHS.extOrTrunc(width: MaxBits), !IsSigned);
874 Result = APSInt(MaxBits, !IsSigned);
875 }
876
877 // Find largest int.
878 switch (BuiltinOp) {
879 default:
880 llvm_unreachable("Invalid value for BuiltinOp");
881 case Builtin::BI__builtin_add_overflow:
882 case Builtin::BI__builtin_sadd_overflow:
883 case Builtin::BI__builtin_saddl_overflow:
884 case Builtin::BI__builtin_saddll_overflow:
885 case Builtin::BI__builtin_uadd_overflow:
886 case Builtin::BI__builtin_uaddl_overflow:
887 case Builtin::BI__builtin_uaddll_overflow:
888 Result = LHS.isSigned() ? LHS.sadd_ov(RHS, Overflow)
889 : LHS.uadd_ov(RHS, Overflow);
890 break;
891 case Builtin::BI__builtin_sub_overflow:
892 case Builtin::BI__builtin_ssub_overflow:
893 case Builtin::BI__builtin_ssubl_overflow:
894 case Builtin::BI__builtin_ssubll_overflow:
895 case Builtin::BI__builtin_usub_overflow:
896 case Builtin::BI__builtin_usubl_overflow:
897 case Builtin::BI__builtin_usubll_overflow:
898 Result = LHS.isSigned() ? LHS.ssub_ov(RHS, Overflow)
899 : LHS.usub_ov(RHS, Overflow);
900 break;
901 case Builtin::BI__builtin_mul_overflow:
902 case Builtin::BI__builtin_smul_overflow:
903 case Builtin::BI__builtin_smull_overflow:
904 case Builtin::BI__builtin_smulll_overflow:
905 case Builtin::BI__builtin_umul_overflow:
906 case Builtin::BI__builtin_umull_overflow:
907 case Builtin::BI__builtin_umulll_overflow:
908 Result = LHS.isSigned() ? LHS.smul_ov(RHS, Overflow)
909 : LHS.umul_ov(RHS, Overflow);
910 break;
911 }
912
913 // In the case where multiple sizes are allowed, truncate and see if
914 // the values are the same.
915 if (BuiltinOp == Builtin::BI__builtin_add_overflow ||
916 BuiltinOp == Builtin::BI__builtin_sub_overflow ||
917 BuiltinOp == Builtin::BI__builtin_mul_overflow) {
918 // APSInt doesn't have a TruncOrSelf, so we use extOrTrunc instead,
919 // since it will give us the behavior of a TruncOrSelf in the case where
920 // its parameter <= its size. We previously set Result to be at least the
921 // type-size of the result, so getTypeSize(ResultType) <= Resu
922 APSInt Temp = Result.extOrTrunc(width: S.getASTContext().getTypeSize(T: ResultType));
923 Temp.setIsSigned(ResultType->isSignedIntegerOrEnumerationType());
924
925 if (!APSInt::isSameValue(I1: Temp, I2: Result))
926 Overflow = true;
927 Result = std::move(Temp);
928 }
929
930 // Write Result to ResultPtr and put Overflow on the stack.
931 assignInteger(S, Dest: ResultPtr, ValueT: ResultT, Value: Result);
932 if (ResultPtr.canBeInitialized())
933 ResultPtr.initialize();
934
935 assert(Call->getDirectCallee()->getReturnType()->isBooleanType());
936 S.Stk.push<Boolean>(Args&: Overflow);
937 return true;
938}
939
940/// Three integral values followed by a pointer (lhs, rhs, carry, carryOut).
941static bool interp__builtin_carryop(InterpState &S, CodePtr OpPC,
942 const InterpFrame *Frame,
943 const CallExpr *Call, unsigned BuiltinOp) {
944 const Pointer &CarryOutPtr = S.Stk.pop<Pointer>();
945 PrimType LHST = *S.getContext().classify(T: Call->getArg(Arg: 0)->getType());
946 PrimType RHST = *S.getContext().classify(T: Call->getArg(Arg: 1)->getType());
947 APSInt CarryIn = popToAPSInt(Stk&: S.Stk, T: LHST);
948 APSInt RHS = popToAPSInt(Stk&: S.Stk, T: RHST);
949 APSInt LHS = popToAPSInt(Stk&: S.Stk, T: LHST);
950
951 if (CarryOutPtr.isDummy() || !CarryOutPtr.isBlockPointer())
952 return false;
953
954 APSInt CarryOut;
955
956 APSInt Result;
957 // Copy the number of bits and sign.
958 Result = LHS;
959 CarryOut = LHS;
960
961 bool FirstOverflowed = false;
962 bool SecondOverflowed = false;
963 switch (BuiltinOp) {
964 default:
965 llvm_unreachable("Invalid value for BuiltinOp");
966 case Builtin::BI__builtin_addcb:
967 case Builtin::BI__builtin_addcs:
968 case Builtin::BI__builtin_addc:
969 case Builtin::BI__builtin_addcl:
970 case Builtin::BI__builtin_addcll:
971 Result =
972 LHS.uadd_ov(RHS, Overflow&: FirstOverflowed).uadd_ov(RHS: CarryIn, Overflow&: SecondOverflowed);
973 break;
974 case Builtin::BI__builtin_subcb:
975 case Builtin::BI__builtin_subcs:
976 case Builtin::BI__builtin_subc:
977 case Builtin::BI__builtin_subcl:
978 case Builtin::BI__builtin_subcll:
979 Result =
980 LHS.usub_ov(RHS, Overflow&: FirstOverflowed).usub_ov(RHS: CarryIn, Overflow&: SecondOverflowed);
981 break;
982 }
983 // It is possible for both overflows to happen but CGBuiltin uses an OR so
984 // this is consistent.
985 CarryOut = (uint64_t)(FirstOverflowed | SecondOverflowed);
986
987 QualType CarryOutType = Call->getArg(Arg: 3)->getType()->getPointeeType();
988 PrimType CarryOutT = *S.getContext().classify(T: CarryOutType);
989 assignInteger(S, Dest: CarryOutPtr, ValueT: CarryOutT, Value: CarryOut);
990 CarryOutPtr.initialize();
991
992 assert(Call->getType() == Call->getArg(0)->getType());
993 pushInteger(S, Val: Result, QT: Call->getType());
994 return true;
995}
996
997static bool interp__builtin_clz(InterpState &S, CodePtr OpPC,
998 const InterpFrame *Frame, const CallExpr *Call,
999 unsigned BuiltinOp) {
1000
1001 std::optional<APSInt> Fallback;
1002 if (BuiltinOp == Builtin::BI__builtin_clzg && Call->getNumArgs() == 2)
1003 Fallback = popToAPSInt(S, E: Call->getArg(Arg: 1));
1004
1005 APSInt Val;
1006 if (Call->getArg(Arg: 0)->getType()->isExtVectorBoolType()) {
1007 const Pointer &Arg = S.Stk.pop<Pointer>();
1008 Val = convertBoolVectorToInt(Val: Arg);
1009 } else {
1010 Val = popToAPSInt(S, E: Call->getArg(Arg: 0));
1011 }
1012
1013 // When the argument is 0, the result of GCC builtins is undefined, whereas
1014 // for Microsoft intrinsics, the result is the bit-width of the argument.
1015 bool ZeroIsUndefined = BuiltinOp != Builtin::BI__lzcnt16 &&
1016 BuiltinOp != Builtin::BI__lzcnt &&
1017 BuiltinOp != Builtin::BI__lzcnt64;
1018
1019 if (Val == 0) {
1020 if (Fallback) {
1021 pushInteger(S, Val: *Fallback, QT: Call->getType());
1022 return true;
1023 }
1024
1025 if (ZeroIsUndefined)
1026 return false;
1027 }
1028
1029 pushInteger(S, Val: Val.countl_zero(), QT: Call->getType());
1030 return true;
1031}
1032
1033static bool interp__builtin_ctz(InterpState &S, CodePtr OpPC,
1034 const InterpFrame *Frame, const CallExpr *Call,
1035 unsigned BuiltinID) {
1036 std::optional<APSInt> Fallback;
1037 if (BuiltinID == Builtin::BI__builtin_ctzg && Call->getNumArgs() == 2)
1038 Fallback = popToAPSInt(S, E: Call->getArg(Arg: 1));
1039
1040 APSInt Val;
1041 if (Call->getArg(Arg: 0)->getType()->isExtVectorBoolType()) {
1042 const Pointer &Arg = S.Stk.pop<Pointer>();
1043 Val = convertBoolVectorToInt(Val: Arg);
1044 } else {
1045 Val = popToAPSInt(S, E: Call->getArg(Arg: 0));
1046 }
1047
1048 if (Val == 0) {
1049 if (Fallback) {
1050 pushInteger(S, Val: *Fallback, QT: Call->getType());
1051 return true;
1052 }
1053 return false;
1054 }
1055
1056 pushInteger(S, Val: Val.countr_zero(), QT: Call->getType());
1057 return true;
1058}
1059
1060static bool interp__builtin_bswap(InterpState &S, CodePtr OpPC,
1061 const InterpFrame *Frame,
1062 const CallExpr *Call) {
1063 const APSInt &Val = popToAPSInt(S, E: Call->getArg(Arg: 0));
1064 if (Val.getBitWidth() == 8 || Val.getBitWidth() == 1)
1065 pushInteger(S, Val, QT: Call->getType());
1066 else
1067 pushInteger(S, Val: Val.byteSwap(), QT: Call->getType());
1068 return true;
1069}
1070
1071/// bool __atomic_always_lock_free(size_t, void const volatile*)
1072/// bool __atomic_is_lock_free(size_t, void const volatile*)
1073static bool interp__builtin_atomic_lock_free(InterpState &S, CodePtr OpPC,
1074 const InterpFrame *Frame,
1075 const CallExpr *Call,
1076 unsigned BuiltinOp) {
1077 auto returnBool = [&S](bool Value) -> bool {
1078 S.Stk.push<Boolean>(Args&: Value);
1079 return true;
1080 };
1081
1082 const Pointer &Ptr = S.Stk.pop<Pointer>();
1083 uint64_t SizeVal = popToUInt64(S, E: Call->getArg(Arg: 0));
1084
1085 // For __atomic_is_lock_free(sizeof(_Atomic(T))), if the size is a power
1086 // of two less than or equal to the maximum inline atomic width, we know it
1087 // is lock-free. If the size isn't a power of two, or greater than the
1088 // maximum alignment where we promote atomics, we know it is not lock-free
1089 // (at least not in the sense of atomic_is_lock_free). Otherwise,
1090 // the answer can only be determined at runtime; for example, 16-byte
1091 // atomics have lock-free implementations on some, but not all,
1092 // x86-64 processors.
1093
1094 // Check power-of-two.
1095 CharUnits Size = CharUnits::fromQuantity(Quantity: SizeVal);
1096 if (Size.isPowerOfTwo()) {
1097 // Check against inlining width.
1098 unsigned InlineWidthBits =
1099 S.getASTContext().getTargetInfo().getMaxAtomicInlineWidth();
1100 if (Size <= S.getASTContext().toCharUnitsFromBits(BitSize: InlineWidthBits)) {
1101
1102 // OK, we will inline appropriately-aligned operations of this size,
1103 // and _Atomic(T) is appropriately-aligned.
1104 if (Size == CharUnits::One())
1105 return returnBool(true);
1106
1107 // Same for null pointers.
1108 assert(BuiltinOp != Builtin::BI__c11_atomic_is_lock_free);
1109 if (Ptr.isZero())
1110 return returnBool(true);
1111
1112 if (Ptr.isIntegralPointer()) {
1113 uint64_t IntVal = Ptr.getIntegerRepresentation();
1114 if (APSInt(APInt(64, IntVal, false), true).isAligned(A: Size.getAsAlign()))
1115 return returnBool(true);
1116 }
1117
1118 const Expr *PtrArg = Call->getArg(Arg: 1);
1119 // Otherwise, check if the type's alignment against Size.
1120 if (const auto *ICE = dyn_cast<ImplicitCastExpr>(Val: PtrArg)) {
1121 // Drop the potential implicit-cast to 'const volatile void*', getting
1122 // the underlying type.
1123 if (ICE->getCastKind() == CK_BitCast)
1124 PtrArg = ICE->getSubExpr();
1125 }
1126
1127 if (const auto *PtrTy = PtrArg->getType()->getAs<PointerType>()) {
1128 QualType PointeeType = PtrTy->getPointeeType();
1129 if (!PointeeType->isIncompleteType() &&
1130 S.getASTContext().getTypeAlignInChars(T: PointeeType) >= Size) {
1131 // OK, we will inline operations on this object.
1132 return returnBool(true);
1133 }
1134 }
1135 }
1136 }
1137
1138 if (BuiltinOp == Builtin::BI__atomic_always_lock_free)
1139 return returnBool(false);
1140
1141 return Invalid(S, OpPC);
1142}
1143
1144/// bool __c11_atomic_is_lock_free(size_t)
1145static bool interp__builtin_c11_atomic_is_lock_free(InterpState &S,
1146 CodePtr OpPC,
1147 const InterpFrame *Frame,
1148 const CallExpr *Call) {
1149 uint64_t SizeVal = popToUInt64(S, E: Call->getArg(Arg: 0));
1150
1151 CharUnits Size = CharUnits::fromQuantity(Quantity: SizeVal);
1152 if (Size.isPowerOfTwo()) {
1153 // Check against inlining width.
1154 unsigned InlineWidthBits =
1155 S.getASTContext().getTargetInfo().getMaxAtomicInlineWidth();
1156 if (Size <= S.getASTContext().toCharUnitsFromBits(BitSize: InlineWidthBits)) {
1157 S.Stk.push<Boolean>(Args: true);
1158 return true;
1159 }
1160 }
1161
1162 return false; // returnBool(false);
1163}
1164
1165/// __builtin_complex(Float A, float B);
1166static bool interp__builtin_complex(InterpState &S, CodePtr OpPC,
1167 const InterpFrame *Frame,
1168 const CallExpr *Call) {
1169 const Floating &Arg2 = S.Stk.pop<Floating>();
1170 const Floating &Arg1 = S.Stk.pop<Floating>();
1171 Pointer &Result = S.Stk.peek<Pointer>();
1172
1173 Result.elem<Floating>(I: 0) = Arg1;
1174 Result.elem<Floating>(I: 1) = Arg2;
1175 Result.initializeAllElements();
1176
1177 return true;
1178}
1179
1180/// __builtin_is_aligned()
1181/// __builtin_align_up()
1182/// __builtin_align_down()
1183/// The first parameter is either an integer or a pointer.
1184/// The second parameter is the requested alignment as an integer.
1185static bool interp__builtin_is_aligned_up_down(InterpState &S, CodePtr OpPC,
1186 const InterpFrame *Frame,
1187 const CallExpr *Call,
1188 unsigned BuiltinOp) {
1189 const APSInt &Alignment = popToAPSInt(S, E: Call->getArg(Arg: 1));
1190
1191 if (Alignment < 0 || !Alignment.isPowerOf2()) {
1192 S.FFDiag(E: Call, DiagId: diag::note_constexpr_invalid_alignment) << Alignment;
1193 return false;
1194 }
1195 unsigned SrcWidth = S.getASTContext().getIntWidth(T: Call->getArg(Arg: 0)->getType());
1196 APSInt MaxValue(APInt::getOneBitSet(numBits: SrcWidth, BitNo: SrcWidth - 1));
1197 if (APSInt::compareValues(I1: Alignment, I2: MaxValue) > 0) {
1198 S.FFDiag(E: Call, DiagId: diag::note_constexpr_alignment_too_big)
1199 << MaxValue << Call->getArg(Arg: 0)->getType() << Alignment;
1200 return false;
1201 }
1202
1203 // The first parameter is either an integer or a pointer.
1204 PrimType FirstArgT = *S.Ctx.classify(E: Call->getArg(Arg: 0));
1205
1206 if (isIntegerType(T: FirstArgT)) {
1207 const APSInt &Src = popToAPSInt(Stk&: S.Stk, T: FirstArgT);
1208 APInt AlignMinusOne = Alignment.extOrTrunc(width: Src.getBitWidth()) - 1;
1209 if (BuiltinOp == Builtin::BI__builtin_align_up) {
1210 APSInt AlignedVal =
1211 APSInt((Src + AlignMinusOne) & ~AlignMinusOne, Src.isUnsigned());
1212 pushInteger(S, Val: AlignedVal, QT: Call->getType());
1213 } else if (BuiltinOp == Builtin::BI__builtin_align_down) {
1214 APSInt AlignedVal = APSInt(Src & ~AlignMinusOne, Src.isUnsigned());
1215 pushInteger(S, Val: AlignedVal, QT: Call->getType());
1216 } else {
1217 assert(*S.Ctx.classify(Call->getType()) == PT_Bool);
1218 S.Stk.push<Boolean>(Args: (Src & AlignMinusOne) == 0);
1219 }
1220 return true;
1221 }
1222 assert(FirstArgT == PT_Ptr);
1223 const Pointer &Ptr = S.Stk.pop<Pointer>();
1224 if (!Ptr.isBlockPointer())
1225 return false;
1226
1227 const ValueDecl *PtrDecl = Ptr.getDeclDesc()->asValueDecl();
1228 // We need a pointer for a declaration here.
1229 if (!PtrDecl) {
1230 if (BuiltinOp == Builtin::BI__builtin_is_aligned)
1231 S.FFDiag(E: Call->getArg(Arg: 0), DiagId: diag::note_constexpr_alignment_compute)
1232 << Alignment;
1233 else
1234 S.FFDiag(E: Call->getArg(Arg: 0), DiagId: diag::note_constexpr_alignment_adjust)
1235 << Alignment;
1236 return false;
1237 }
1238
1239 // For one-past-end pointers, we can't call getIndex() since it asserts.
1240 // Use getNumElems() instead which gives the correct index for past-end.
1241 unsigned PtrOffset =
1242 Ptr.isElementPastEnd() ? Ptr.getNumElems() : Ptr.getIndex();
1243 CharUnits BaseAlignment = S.getASTContext().getDeclAlign(D: PtrDecl);
1244 CharUnits PtrAlign =
1245 BaseAlignment.alignmentAtOffset(offset: CharUnits::fromQuantity(Quantity: PtrOffset));
1246
1247 if (BuiltinOp == Builtin::BI__builtin_is_aligned) {
1248 if (PtrAlign.getQuantity() >= Alignment) {
1249 S.Stk.push<Boolean>(Args: true);
1250 return true;
1251 }
1252 // If the alignment is not known to be sufficient, some cases could still
1253 // be aligned at run time. However, if the requested alignment is less or
1254 // equal to the base alignment and the offset is not aligned, we know that
1255 // the run-time value can never be aligned.
1256 if (BaseAlignment.getQuantity() >= Alignment &&
1257 PtrAlign.getQuantity() < Alignment) {
1258 S.Stk.push<Boolean>(Args: false);
1259 return true;
1260 }
1261
1262 S.FFDiag(E: Call->getArg(Arg: 0), DiagId: diag::note_constexpr_alignment_compute)
1263 << Alignment;
1264 return false;
1265 }
1266
1267 assert(BuiltinOp == Builtin::BI__builtin_align_down ||
1268 BuiltinOp == Builtin::BI__builtin_align_up);
1269
1270 // For align_up/align_down, we can return the same value if the alignment
1271 // is known to be greater or equal to the requested value.
1272 if (PtrAlign.getQuantity() >= Alignment) {
1273 S.Stk.push<Pointer>(Args: Ptr);
1274 return true;
1275 }
1276
1277 // The alignment could be greater than the minimum at run-time, so we cannot
1278 // infer much about the resulting pointer value. One case is possible:
1279 // For `_Alignas(32) char buf[N]; __builtin_align_down(&buf[idx], 32)` we
1280 // can infer the correct index if the requested alignment is smaller than
1281 // the base alignment so we can perform the computation on the offset.
1282 if (BaseAlignment.getQuantity() >= Alignment) {
1283 assert(Alignment.getBitWidth() <= 64 &&
1284 "Cannot handle > 64-bit address-space");
1285 uint64_t Alignment64 = Alignment.getZExtValue();
1286 CharUnits NewOffset =
1287 CharUnits::fromQuantity(Quantity: BuiltinOp == Builtin::BI__builtin_align_down
1288 ? llvm::alignDown(Value: PtrOffset, Align: Alignment64)
1289 : llvm::alignTo(Value: PtrOffset, Align: Alignment64));
1290
1291 S.Stk.push<Pointer>(Args: Ptr.atIndex(Idx: NewOffset.getQuantity()));
1292 return true;
1293 }
1294
1295 // Otherwise, we cannot constant-evaluate the result.
1296 S.FFDiag(E: Call->getArg(Arg: 0), DiagId: diag::note_constexpr_alignment_adjust) << Alignment;
1297 return false;
1298}
1299
1300/// __builtin_assume_aligned(Ptr, Alignment[, ExtraOffset])
1301static bool interp__builtin_assume_aligned(InterpState &S, CodePtr OpPC,
1302 const InterpFrame *Frame,
1303 const CallExpr *Call) {
1304 assert(Call->getNumArgs() == 2 || Call->getNumArgs() == 3);
1305
1306 std::optional<APSInt> ExtraOffset;
1307 if (Call->getNumArgs() == 3)
1308 ExtraOffset = popToAPSInt(Stk&: S.Stk, T: *S.Ctx.classify(E: Call->getArg(Arg: 2)));
1309
1310 APSInt Alignment = popToAPSInt(Stk&: S.Stk, T: *S.Ctx.classify(E: Call->getArg(Arg: 1)));
1311 const Pointer &Ptr = S.Stk.pop<Pointer>();
1312
1313 CharUnits Align = CharUnits::fromQuantity(Quantity: Alignment.getZExtValue());
1314
1315 // If there is a base object, then it must have the correct alignment.
1316 if (Ptr.isBlockPointer()) {
1317 CharUnits BaseAlignment;
1318 if (const auto *VD = Ptr.getDeclDesc()->asValueDecl())
1319 BaseAlignment = S.getASTContext().getDeclAlign(D: VD);
1320 else if (const auto *E = Ptr.getDeclDesc()->asExpr())
1321 BaseAlignment = GetAlignOfExpr(Ctx: S.getASTContext(), E, ExprKind: UETT_AlignOf);
1322
1323 if (BaseAlignment < Align) {
1324 S.CCEDiag(E: Call->getArg(Arg: 0),
1325 DiagId: diag::note_constexpr_baa_insufficient_alignment)
1326 << 0 << BaseAlignment.getQuantity() << Align.getQuantity();
1327 return false;
1328 }
1329 }
1330
1331 APValue AV = Ptr.toAPValue(ASTCtx: S.getASTContext());
1332 CharUnits AVOffset = AV.getLValueOffset();
1333 if (ExtraOffset)
1334 AVOffset -= CharUnits::fromQuantity(Quantity: ExtraOffset->getZExtValue());
1335 if (AVOffset.alignTo(Align) != AVOffset) {
1336 if (Ptr.isBlockPointer())
1337 S.CCEDiag(E: Call->getArg(Arg: 0),
1338 DiagId: diag::note_constexpr_baa_insufficient_alignment)
1339 << 1 << AVOffset.getQuantity() << Align.getQuantity();
1340 else
1341 S.CCEDiag(E: Call->getArg(Arg: 0),
1342 DiagId: diag::note_constexpr_baa_value_insufficient_alignment)
1343 << AVOffset.getQuantity() << Align.getQuantity();
1344 return false;
1345 }
1346
1347 S.Stk.push<Pointer>(Args: Ptr);
1348 return true;
1349}
1350
1351/// (CarryIn, LHS, RHS, Result)
1352static bool interp__builtin_ia32_addcarry_subborrow(InterpState &S,
1353 CodePtr OpPC,
1354 const InterpFrame *Frame,
1355 const CallExpr *Call,
1356 unsigned BuiltinOp) {
1357 if (Call->getNumArgs() != 4 || !Call->getArg(Arg: 0)->getType()->isIntegerType() ||
1358 !Call->getArg(Arg: 1)->getType()->isIntegerType() ||
1359 !Call->getArg(Arg: 2)->getType()->isIntegerType())
1360 return false;
1361
1362 const Pointer &CarryOutPtr = S.Stk.pop<Pointer>();
1363
1364 APSInt RHS = popToAPSInt(S, E: Call->getArg(Arg: 2));
1365 APSInt LHS = popToAPSInt(S, E: Call->getArg(Arg: 1));
1366 APSInt CarryIn = popToAPSInt(S, E: Call->getArg(Arg: 0));
1367
1368 bool IsAdd = BuiltinOp == clang::X86::BI__builtin_ia32_addcarryx_u32 ||
1369 BuiltinOp == clang::X86::BI__builtin_ia32_addcarryx_u64;
1370
1371 unsigned BitWidth = LHS.getBitWidth();
1372 unsigned CarryInBit = CarryIn.ugt(RHS: 0) ? 1 : 0;
1373 APInt ExResult =
1374 IsAdd ? (LHS.zext(width: BitWidth + 1) + (RHS.zext(width: BitWidth + 1) + CarryInBit))
1375 : (LHS.zext(width: BitWidth + 1) - (RHS.zext(width: BitWidth + 1) + CarryInBit));
1376
1377 APInt Result = ExResult.extractBits(numBits: BitWidth, bitPosition: 0);
1378 APSInt CarryOut =
1379 APSInt(ExResult.extractBits(numBits: 1, bitPosition: BitWidth), /*IsUnsigned=*/true);
1380
1381 QualType CarryOutType = Call->getArg(Arg: 3)->getType()->getPointeeType();
1382 PrimType CarryOutT = *S.getContext().classify(T: CarryOutType);
1383 assignInteger(S, Dest: CarryOutPtr, ValueT: CarryOutT, Value: APSInt(std::move(Result), true));
1384
1385 pushInteger(S, Val: CarryOut, QT: Call->getType());
1386
1387 return true;
1388}
1389
1390static bool interp__builtin_os_log_format_buffer_size(InterpState &S,
1391 CodePtr OpPC,
1392 const InterpFrame *Frame,
1393 const CallExpr *Call) {
1394 analyze_os_log::OSLogBufferLayout Layout;
1395 analyze_os_log::computeOSLogBufferLayout(Ctx&: S.getASTContext(), E: Call, layout&: Layout);
1396 pushInteger(S, Val: Layout.size().getQuantity(), QT: Call->getType());
1397 return true;
1398}
1399
1400static bool
1401interp__builtin_ptrauth_string_discriminator(InterpState &S, CodePtr OpPC,
1402 const InterpFrame *Frame,
1403 const CallExpr *Call) {
1404 const auto &Ptr = S.Stk.pop<Pointer>();
1405 assert(Ptr.getFieldDesc()->isPrimitiveArray());
1406
1407 // This should be created for a StringLiteral, so should alway shold at least
1408 // one array element.
1409 assert(Ptr.getFieldDesc()->getNumElems() >= 1);
1410 StringRef R(&Ptr.deref<char>(), Ptr.getFieldDesc()->getNumElems() - 1);
1411 uint64_t Result = getPointerAuthStableSipHash(S: R);
1412 pushInteger(S, Val: Result, QT: Call->getType());
1413 return true;
1414}
1415
1416static bool interp__builtin_infer_alloc_token(InterpState &S, CodePtr OpPC,
1417 const InterpFrame *Frame,
1418 const CallExpr *Call) {
1419 const ASTContext &ASTCtx = S.getASTContext();
1420 uint64_t BitWidth = ASTCtx.getTypeSize(T: ASTCtx.getSizeType());
1421 auto Mode =
1422 ASTCtx.getLangOpts().AllocTokenMode.value_or(u: llvm::DefaultAllocTokenMode);
1423 auto MaxTokensOpt = ASTCtx.getLangOpts().AllocTokenMax;
1424 uint64_t MaxTokens =
1425 MaxTokensOpt.value_or(u: 0) ? *MaxTokensOpt : (~0ULL >> (64 - BitWidth));
1426
1427 // We do not read any of the arguments; discard them.
1428 for (int I = Call->getNumArgs() - 1; I >= 0; --I)
1429 discard(Stk&: S.Stk, T: S.getContext().classify(E: Call->getArg(Arg: I)).value_or(PT: PT_Ptr));
1430
1431 // Note: Type inference from a surrounding cast is not supported in
1432 // constexpr evaluation.
1433 QualType AllocType = infer_alloc::inferPossibleType(E: Call, Ctx: ASTCtx, CastE: nullptr);
1434 if (AllocType.isNull()) {
1435 S.CCEDiag(E: Call,
1436 DiagId: diag::note_constexpr_infer_alloc_token_type_inference_failed);
1437 return false;
1438 }
1439
1440 auto ATMD = infer_alloc::getAllocTokenMetadata(T: AllocType, Ctx: ASTCtx);
1441 if (!ATMD) {
1442 S.CCEDiag(E: Call, DiagId: diag::note_constexpr_infer_alloc_token_no_metadata);
1443 return false;
1444 }
1445
1446 auto MaybeToken = llvm::getAllocToken(Mode, Metadata: *ATMD, MaxTokens);
1447 if (!MaybeToken) {
1448 S.CCEDiag(E: Call, DiagId: diag::note_constexpr_infer_alloc_token_stateful_mode);
1449 return false;
1450 }
1451
1452 pushInteger(S, Val: llvm::APInt(BitWidth, *MaybeToken), QT: ASTCtx.getSizeType());
1453 return true;
1454}
1455
1456static bool interp__builtin_operator_new(InterpState &S, CodePtr OpPC,
1457 const InterpFrame *Frame,
1458 const CallExpr *Call) {
1459 // A call to __operator_new is only valid within std::allocate<>::allocate.
1460 // Walk up the call stack to find the appropriate caller and get the
1461 // element type from it.
1462 auto [NewCall, ElemType] = S.getStdAllocatorCaller(Name: "allocate");
1463
1464 if (ElemType.isNull()) {
1465 S.FFDiag(E: Call, DiagId: S.getLangOpts().CPlusPlus20
1466 ? diag::note_constexpr_new_untyped
1467 : diag::note_constexpr_new);
1468 return false;
1469 }
1470 assert(NewCall);
1471
1472 if (ElemType->isIncompleteType() || ElemType->isFunctionType()) {
1473 S.FFDiag(E: Call, DiagId: diag::note_constexpr_new_not_complete_object_type)
1474 << (ElemType->isIncompleteType() ? 0 : 1) << ElemType;
1475 return false;
1476 }
1477
1478 // We only care about the first parameter (the size), so discard all the
1479 // others.
1480 {
1481 unsigned NumArgs = Call->getNumArgs();
1482 assert(NumArgs >= 1);
1483
1484 // The std::nothrow_t arg never gets put on the stack.
1485 if (Call->getArg(Arg: NumArgs - 1)->getType()->isNothrowT())
1486 --NumArgs;
1487 auto Args = ArrayRef(Call->getArgs(), Call->getNumArgs());
1488 // First arg is needed.
1489 Args = Args.drop_front();
1490
1491 // Discard the rest.
1492 for (const Expr *Arg : Args)
1493 discard(Stk&: S.Stk, T: *S.getContext().classify(E: Arg));
1494 }
1495
1496 APSInt Bytes = popToAPSInt(S, E: Call->getArg(Arg: 0));
1497 CharUnits ElemSize = S.getASTContext().getTypeSizeInChars(T: ElemType);
1498 assert(!ElemSize.isZero());
1499 // Divide the number of bytes by sizeof(ElemType), so we get the number of
1500 // elements we should allocate.
1501 APInt NumElems, Remainder;
1502 APInt ElemSizeAP(Bytes.getBitWidth(), ElemSize.getQuantity());
1503 APInt::udivrem(LHS: Bytes, RHS: ElemSizeAP, Quotient&: NumElems, Remainder);
1504 if (Remainder != 0) {
1505 // This likely indicates a bug in the implementation of 'std::allocator'.
1506 S.FFDiag(E: Call, DiagId: diag::note_constexpr_operator_new_bad_size)
1507 << Bytes << APSInt(ElemSizeAP, true) << ElemType;
1508 return false;
1509 }
1510
1511 // NB: The same check we're using in CheckArraySize()
1512 if (NumElems.getActiveBits() >
1513 ConstantArrayType::getMaxSizeBits(Context: S.getASTContext()) ||
1514 NumElems.ugt(RHS: Descriptor::MaxArrayElemBytes / ElemSize.getQuantity())) {
1515 // FIXME: NoThrow check?
1516 const SourceInfo &Loc = S.Current->getSource(PC: OpPC);
1517 S.FFDiag(SI: Loc, DiagId: diag::note_constexpr_new_too_large)
1518 << NumElems.getZExtValue();
1519 return false;
1520 }
1521
1522 if (!CheckArraySize(S, OpPC, NumElems: NumElems.getZExtValue()))
1523 return false;
1524
1525 bool IsArray = NumElems.ugt(RHS: 1);
1526 OptPrimType ElemT = S.getContext().classify(T: ElemType);
1527 DynamicAllocator &Allocator = S.getAllocator();
1528 if (ElemT) {
1529 Block *B =
1530 Allocator.allocate(Source: NewCall, T: *ElemT, NumElements: NumElems.getZExtValue(),
1531 EvalID: S.Ctx.getEvalID(), AllocForm: DynamicAllocator::Form::Operator);
1532 assert(B);
1533 S.Stk.push<Pointer>(Args: Pointer(B).atIndex(Idx: 0));
1534 return true;
1535 }
1536
1537 assert(!ElemT);
1538
1539 // Composite arrays
1540 if (IsArray) {
1541 const Descriptor *Desc =
1542 S.P.createDescriptor(D: NewCall, Ty: ElemType.getTypePtr(), MDSize: std::nullopt);
1543 Block *B =
1544 Allocator.allocate(D: Desc, NumElements: NumElems.getZExtValue(), EvalID: S.Ctx.getEvalID(),
1545 AllocForm: DynamicAllocator::Form::Operator);
1546 assert(B);
1547 S.Stk.push<Pointer>(Args: Pointer(B).atIndex(Idx: 0).narrow());
1548 return true;
1549 }
1550
1551 // Records. Still allocate them as single-element arrays.
1552 QualType AllocType = S.getASTContext().getConstantArrayType(
1553 EltTy: ElemType, ArySize: NumElems, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, IndexTypeQuals: 0);
1554
1555 const Descriptor *Desc = S.P.createDescriptor(D: NewCall, Ty: AllocType.getTypePtr(),
1556 MDSize: Descriptor::InlineDescMD);
1557 Block *B = Allocator.allocate(D: Desc, EvalID: S.getContext().getEvalID(),
1558 AllocForm: DynamicAllocator::Form::Operator);
1559 assert(B);
1560 S.Stk.push<Pointer>(Args: Pointer(B).atIndex(Idx: 0).narrow());
1561 return true;
1562}
1563
1564static bool interp__builtin_operator_delete(InterpState &S, CodePtr OpPC,
1565 const InterpFrame *Frame,
1566 const CallExpr *Call) {
1567 const Expr *Source = nullptr;
1568 const Block *BlockToDelete = nullptr;
1569
1570 if (S.checkingPotentialConstantExpression()) {
1571 S.Stk.discard<Pointer>();
1572 return false;
1573 }
1574
1575 // This is permitted only within a call to std::allocator<T>::deallocate.
1576 if (!S.getStdAllocatorCaller(Name: "deallocate")) {
1577 S.FFDiag(E: Call);
1578 S.Stk.discard<Pointer>();
1579 return true;
1580 }
1581
1582 {
1583 const Pointer &Ptr = S.Stk.pop<Pointer>();
1584
1585 if (Ptr.isZero()) {
1586 S.CCEDiag(E: Call, DiagId: diag::note_constexpr_deallocate_null);
1587 return true;
1588 }
1589
1590 Source = Ptr.getDeclDesc()->asExpr();
1591 BlockToDelete = Ptr.block();
1592
1593 if (!BlockToDelete->isDynamic()) {
1594 S.FFDiag(E: Call, DiagId: diag::note_constexpr_delete_not_heap_alloc)
1595 << Ptr.toDiagnosticString(Ctx: S.getASTContext());
1596 if (const auto *D = Ptr.getFieldDesc()->asDecl())
1597 S.Note(Loc: D->getLocation(), DiagId: diag::note_declared_at);
1598 }
1599 }
1600 assert(BlockToDelete);
1601
1602 DynamicAllocator &Allocator = S.getAllocator();
1603 const Descriptor *BlockDesc = BlockToDelete->getDescriptor();
1604 std::optional<DynamicAllocator::Form> AllocForm =
1605 Allocator.getAllocationForm(Source);
1606
1607 if (!Allocator.deallocate(Source, BlockToDelete, S)) {
1608 // Nothing has been deallocated, this must be a double-delete.
1609 const SourceInfo &Loc = S.Current->getSource(PC: OpPC);
1610 S.FFDiag(SI: Loc, DiagId: diag::note_constexpr_double_delete);
1611 return false;
1612 }
1613 assert(AllocForm);
1614
1615 return CheckNewDeleteForms(
1616 S, OpPC, AllocForm: *AllocForm, DeleteForm: DynamicAllocator::Form::Operator, D: BlockDesc, NewExpr: Source);
1617}
1618
1619static bool interp__builtin_arithmetic_fence(InterpState &S, CodePtr OpPC,
1620 const InterpFrame *Frame,
1621 const CallExpr *Call) {
1622 const Floating &Arg0 = S.Stk.pop<Floating>();
1623 S.Stk.push<Floating>(Args: Arg0);
1624 return true;
1625}
1626
1627static bool interp__builtin_vector_reduce(InterpState &S, CodePtr OpPC,
1628 const CallExpr *Call, unsigned ID) {
1629 const Pointer &Arg = S.Stk.pop<Pointer>();
1630 assert(Arg.getFieldDesc()->isPrimitiveArray());
1631
1632 QualType ElemType = Arg.getFieldDesc()->getElemQualType();
1633 assert(Call->getType() == ElemType);
1634 PrimType ElemT = *S.getContext().classify(T: ElemType);
1635 unsigned NumElems = Arg.getNumElems();
1636
1637 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
1638 T Result = Arg.elem<T>(0);
1639 unsigned BitWidth = Result.bitWidth();
1640 for (unsigned I = 1; I != NumElems; ++I) {
1641 T Elem = Arg.elem<T>(I);
1642 T PrevResult = Result;
1643
1644 if (ID == Builtin::BI__builtin_reduce_add) {
1645 if (T::add(Result, Elem, BitWidth, &Result)) {
1646 unsigned OverflowBits = BitWidth + 1;
1647 (void)handleOverflow(S, OpPC,
1648 (PrevResult.toAPSInt(OverflowBits) +
1649 Elem.toAPSInt(OverflowBits)));
1650 return false;
1651 }
1652 } else if (ID == Builtin::BI__builtin_reduce_mul) {
1653 if (T::mul(Result, Elem, BitWidth, &Result)) {
1654 unsigned OverflowBits = BitWidth * 2;
1655 (void)handleOverflow(S, OpPC,
1656 (PrevResult.toAPSInt(OverflowBits) *
1657 Elem.toAPSInt(OverflowBits)));
1658 return false;
1659 }
1660
1661 } else if (ID == Builtin::BI__builtin_reduce_and) {
1662 (void)T::bitAnd(Result, Elem, BitWidth, &Result);
1663 } else if (ID == Builtin::BI__builtin_reduce_or) {
1664 (void)T::bitOr(Result, Elem, BitWidth, &Result);
1665 } else if (ID == Builtin::BI__builtin_reduce_xor) {
1666 (void)T::bitXor(Result, Elem, BitWidth, &Result);
1667 } else if (ID == Builtin::BI__builtin_reduce_min) {
1668 if (Elem < Result)
1669 Result = Elem;
1670 } else if (ID == Builtin::BI__builtin_reduce_max) {
1671 if (Elem > Result)
1672 Result = Elem;
1673 } else {
1674 llvm_unreachable("Unhandled vector reduce builtin");
1675 }
1676 }
1677 pushInteger(S, Result.toAPSInt(), Call->getType());
1678 });
1679
1680 return true;
1681}
1682
1683static bool interp__builtin_elementwise_abs(InterpState &S, CodePtr OpPC,
1684 const InterpFrame *Frame,
1685 const CallExpr *Call,
1686 unsigned BuiltinID) {
1687 assert(Call->getNumArgs() == 1);
1688 QualType Ty = Call->getArg(Arg: 0)->getType();
1689 if (Ty->isIntegerType()) {
1690 APSInt Val = popToAPSInt(S, E: Call->getArg(Arg: 0));
1691 pushInteger(S, Val: Val.abs(), QT: Call->getType());
1692 return true;
1693 }
1694
1695 if (Ty->isFloatingType()) {
1696 Floating Val = S.Stk.pop<Floating>();
1697 Floating Result = abs(S, In: Val);
1698 S.Stk.push<Floating>(Args&: Result);
1699 return true;
1700 }
1701
1702 // Otherwise, the argument must be a vector.
1703 assert(Call->getArg(0)->getType()->isVectorType());
1704 const Pointer &Arg = S.Stk.pop<Pointer>();
1705 assert(Arg.getFieldDesc()->isPrimitiveArray());
1706 const Pointer &Dst = S.Stk.peek<Pointer>();
1707 assert(Dst.getFieldDesc()->isPrimitiveArray());
1708 assert(Arg.getFieldDesc()->getNumElems() ==
1709 Dst.getFieldDesc()->getNumElems());
1710
1711 QualType ElemType = Arg.getFieldDesc()->getElemQualType();
1712 PrimType ElemT = *S.getContext().classify(T: ElemType);
1713 unsigned NumElems = Arg.getNumElems();
1714 // we can either have a vector of integer or a vector of floating point
1715 for (unsigned I = 0; I != NumElems; ++I) {
1716 if (ElemType->isIntegerType()) {
1717 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
1718 Dst.elem<T>(I) = T::from(static_cast<T>(
1719 APSInt(Arg.elem<T>(I).toAPSInt().abs(),
1720 ElemType->isUnsignedIntegerOrEnumerationType())));
1721 });
1722 } else {
1723 Floating Val = Arg.elem<Floating>(I);
1724 Dst.elem<Floating>(I) = abs(S, In: Val);
1725 }
1726 }
1727 Dst.initializeAllElements();
1728
1729 return true;
1730}
1731
1732/// Can be called with an integer or vector as the first and only parameter.
1733static bool interp__builtin_elementwise_countzeroes(InterpState &S,
1734 CodePtr OpPC,
1735 const InterpFrame *Frame,
1736 const CallExpr *Call,
1737 unsigned BuiltinID) {
1738 bool HasZeroArg = Call->getNumArgs() == 2;
1739 bool IsCTTZ = BuiltinID == Builtin::BI__builtin_elementwise_ctzg;
1740 assert(Call->getNumArgs() == 1 || HasZeroArg);
1741 if (Call->getArg(Arg: 0)->getType()->isIntegerType()) {
1742 PrimType ArgT = *S.getContext().classify(T: Call->getArg(Arg: 0)->getType());
1743 APSInt Val = popToAPSInt(Stk&: S.Stk, T: ArgT);
1744 std::optional<APSInt> ZeroVal;
1745 if (HasZeroArg) {
1746 ZeroVal = Val;
1747 Val = popToAPSInt(Stk&: S.Stk, T: ArgT);
1748 }
1749
1750 if (Val.isZero()) {
1751 if (ZeroVal) {
1752 pushInteger(S, Val: *ZeroVal, QT: Call->getType());
1753 return true;
1754 }
1755 // If we haven't been provided the second argument, the result is
1756 // undefined
1757 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1758 DiagId: diag::note_constexpr_countzeroes_zero)
1759 << /*IsTrailing=*/IsCTTZ;
1760 return false;
1761 }
1762
1763 if (BuiltinID == Builtin::BI__builtin_elementwise_clzg) {
1764 pushInteger(S, Val: Val.countLeadingZeros(), QT: Call->getType());
1765 } else {
1766 pushInteger(S, Val: Val.countTrailingZeros(), QT: Call->getType());
1767 }
1768 return true;
1769 }
1770 // Otherwise, the argument must be a vector.
1771 const ASTContext &ASTCtx = S.getASTContext();
1772 Pointer ZeroArg;
1773 if (HasZeroArg) {
1774 assert(Call->getArg(1)->getType()->isVectorType() &&
1775 ASTCtx.hasSameUnqualifiedType(Call->getArg(0)->getType(),
1776 Call->getArg(1)->getType()));
1777 (void)ASTCtx;
1778 ZeroArg = S.Stk.pop<Pointer>();
1779 assert(ZeroArg.getFieldDesc()->isPrimitiveArray());
1780 }
1781 assert(Call->getArg(0)->getType()->isVectorType());
1782 const Pointer &Arg = S.Stk.pop<Pointer>();
1783 assert(Arg.getFieldDesc()->isPrimitiveArray());
1784 const Pointer &Dst = S.Stk.peek<Pointer>();
1785 assert(Dst.getFieldDesc()->isPrimitiveArray());
1786 assert(Arg.getFieldDesc()->getNumElems() ==
1787 Dst.getFieldDesc()->getNumElems());
1788
1789 QualType ElemType = Arg.getFieldDesc()->getElemQualType();
1790 PrimType ElemT = *S.getContext().classify(T: ElemType);
1791 unsigned NumElems = Arg.getNumElems();
1792
1793 // FIXME: Reading from uninitialized vector elements?
1794 for (unsigned I = 0; I != NumElems; ++I) {
1795 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
1796 APInt EltVal = Arg.atIndex(I).deref<T>().toAPSInt();
1797 if (EltVal.isZero()) {
1798 if (HasZeroArg) {
1799 Dst.atIndex(I).deref<T>() = ZeroArg.atIndex(I).deref<T>();
1800 } else {
1801 // If we haven't been provided the second argument, the result is
1802 // undefined
1803 S.FFDiag(S.Current->getSource(OpPC),
1804 diag::note_constexpr_countzeroes_zero)
1805 << /*IsTrailing=*/IsCTTZ;
1806 return false;
1807 }
1808 } else if (IsCTTZ) {
1809 Dst.atIndex(I).deref<T>() = T::from(EltVal.countTrailingZeros());
1810 } else {
1811 Dst.atIndex(I).deref<T>() = T::from(EltVal.countLeadingZeros());
1812 }
1813 Dst.atIndex(I).initialize();
1814 });
1815 }
1816
1817 return true;
1818}
1819
1820static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC,
1821 const InterpFrame *Frame,
1822 const CallExpr *Call, unsigned ID) {
1823 assert(Call->getNumArgs() == 3);
1824 const ASTContext &ASTCtx = S.getASTContext();
1825 uint64_t Size = popToUInt64(S, E: Call->getArg(Arg: 2));
1826 Pointer SrcPtr = S.Stk.pop<Pointer>().expand();
1827 Pointer DestPtr = S.Stk.pop<Pointer>().expand();
1828
1829 if (ID == Builtin::BImemcpy || ID == Builtin::BImemmove)
1830 diagnoseNonConstexprBuiltin(S, OpPC, ID);
1831
1832 bool Move =
1833 (ID == Builtin::BI__builtin_memmove || ID == Builtin::BImemmove ||
1834 ID == Builtin::BI__builtin_wmemmove || ID == Builtin::BIwmemmove);
1835 bool WChar = ID == Builtin::BIwmemcpy || ID == Builtin::BIwmemmove ||
1836 ID == Builtin::BI__builtin_wmemcpy ||
1837 ID == Builtin::BI__builtin_wmemmove;
1838
1839 // If the size is zero, we treat this as always being a valid no-op.
1840 if (Size == 0) {
1841 S.Stk.push<Pointer>(Args&: DestPtr);
1842 return true;
1843 }
1844
1845 if (SrcPtr.isZero() || DestPtr.isZero()) {
1846 Pointer DiagPtr = (SrcPtr.isZero() ? SrcPtr : DestPtr);
1847 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_null)
1848 << /*IsMove=*/Move << /*IsWchar=*/WChar << !SrcPtr.isZero()
1849 << DiagPtr.toDiagnosticString(Ctx: ASTCtx);
1850 return false;
1851 }
1852
1853 // Diagnose integral src/dest pointers specially.
1854 if (SrcPtr.isIntegralPointer() || DestPtr.isIntegralPointer()) {
1855 std::string DiagVal = "(void *)";
1856 DiagVal += SrcPtr.isIntegralPointer()
1857 ? std::to_string(val: SrcPtr.getIntegerRepresentation())
1858 : std::to_string(val: DestPtr.getIntegerRepresentation());
1859 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_null)
1860 << Move << WChar << DestPtr.isIntegralPointer() << DiagVal;
1861 return false;
1862 }
1863
1864 if (!isReadable(P: DestPtr) || !isReadable(P: SrcPtr))
1865 return false;
1866
1867 if (DestPtr.getType()->isIncompleteType()) {
1868 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1869 DiagId: diag::note_constexpr_memcpy_incomplete_type)
1870 << Move << DestPtr.getType();
1871 return false;
1872 }
1873 if (SrcPtr.getType()->isIncompleteType()) {
1874 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1875 DiagId: diag::note_constexpr_memcpy_incomplete_type)
1876 << Move << SrcPtr.getType();
1877 return false;
1878 }
1879
1880 QualType DestElemType = getElemType(P: DestPtr);
1881 if (DestElemType->isIncompleteType()) {
1882 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1883 DiagId: diag::note_constexpr_memcpy_incomplete_type)
1884 << Move << DestElemType;
1885 return false;
1886 }
1887
1888 size_t RemainingDestElems;
1889 if (DestPtr.getFieldDesc()->isArray()) {
1890 RemainingDestElems = DestPtr.isUnknownSizeArray()
1891 ? 0
1892 : (DestPtr.getNumElems() - DestPtr.getIndex());
1893 } else {
1894 RemainingDestElems = 1;
1895 }
1896 unsigned DestElemSize = ASTCtx.getTypeSizeInChars(T: DestElemType).getQuantity();
1897
1898 if (WChar) {
1899 uint64_t WCharSize =
1900 ASTCtx.getTypeSizeInChars(T: ASTCtx.getWCharType()).getQuantity();
1901 Size *= WCharSize;
1902 }
1903
1904 if (Size % DestElemSize != 0) {
1905 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1906 DiagId: diag::note_constexpr_memcpy_unsupported)
1907 << Move << WChar << 0 << DestElemType << Size << DestElemSize;
1908 return false;
1909 }
1910
1911 QualType SrcElemType = getElemType(P: SrcPtr);
1912 size_t RemainingSrcElems;
1913 if (SrcPtr.getFieldDesc()->isArray()) {
1914 RemainingSrcElems = SrcPtr.isUnknownSizeArray()
1915 ? 0
1916 : (SrcPtr.getNumElems() - SrcPtr.getIndex());
1917 } else {
1918 RemainingSrcElems = 1;
1919 }
1920 unsigned SrcElemSize = ASTCtx.getTypeSizeInChars(T: SrcElemType).getQuantity();
1921
1922 if (!ASTCtx.hasSameUnqualifiedType(T1: DestElemType, T2: SrcElemType)) {
1923 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_type_pun)
1924 << Move << SrcElemType << DestElemType;
1925 return false;
1926 }
1927
1928 if (!DestElemType.isTriviallyCopyableType(Context: ASTCtx)) {
1929 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_nontrivial)
1930 << Move << DestElemType;
1931 return false;
1932 }
1933
1934 // Check if we have enough elements to read from and write to.
1935 size_t RemainingDestBytes = RemainingDestElems * DestElemSize;
1936 size_t RemainingSrcBytes = RemainingSrcElems * SrcElemSize;
1937 if (Size > RemainingDestBytes || Size > RemainingSrcBytes) {
1938 APInt N = APInt(64, Size / DestElemSize);
1939 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1940 DiagId: diag::note_constexpr_memcpy_unsupported)
1941 << Move << WChar << (Size > RemainingSrcBytes ? 1 : 2) << DestElemType
1942 << toString(I: N, Radix: 10, /*Signed=*/false);
1943 return false;
1944 }
1945
1946 // Check for overlapping memory regions.
1947 if (!Move && Pointer::pointToSameBlock(A: SrcPtr, B: DestPtr)) {
1948 // Remove base casts.
1949 Pointer SrcP = SrcPtr.stripBaseCasts();
1950 Pointer DestP = DestPtr.stripBaseCasts();
1951
1952 unsigned SrcIndex = SrcP.expand().getIndex() * SrcP.elemSize();
1953 unsigned DstIndex = DestP.expand().getIndex() * DestP.elemSize();
1954
1955 if ((SrcIndex <= DstIndex && (SrcIndex + Size) > DstIndex) ||
1956 (DstIndex <= SrcIndex && (DstIndex + Size) > SrcIndex)) {
1957 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_overlap)
1958 << /*IsWChar=*/false;
1959 return false;
1960 }
1961 }
1962
1963 assert(Size % DestElemSize == 0);
1964 if (!DoMemcpy(S, OpPC, SrcPtr, DestPtr, Size: Bytes(Size).toBits()))
1965 return false;
1966
1967 S.Stk.push<Pointer>(Args&: DestPtr);
1968 return true;
1969}
1970
1971/// Determine if T is a character type for which we guarantee that
1972/// sizeof(T) == 1.
1973static bool isOneByteCharacterType(QualType T) {
1974 return T->isCharType() || T->isChar8Type();
1975}
1976
1977static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC,
1978 const InterpFrame *Frame,
1979 const CallExpr *Call, unsigned ID) {
1980 assert(Call->getNumArgs() == 3);
1981 uint64_t Size = popToUInt64(S, E: Call->getArg(Arg: 2));
1982 const Pointer &PtrB = S.Stk.pop<Pointer>();
1983 const Pointer &PtrA = S.Stk.pop<Pointer>();
1984
1985 if (ID == Builtin::BImemcmp || ID == Builtin::BIbcmp ||
1986 ID == Builtin::BIwmemcmp)
1987 diagnoseNonConstexprBuiltin(S, OpPC, ID);
1988
1989 if (Size == 0) {
1990 pushInteger(S, Val: 0, QT: Call->getType());
1991 return true;
1992 }
1993
1994 if (!PtrA.isBlockPointer() || !PtrB.isBlockPointer())
1995 return false;
1996
1997 bool IsWide =
1998 (ID == Builtin::BIwmemcmp || ID == Builtin::BI__builtin_wmemcmp);
1999
2000 const ASTContext &ASTCtx = S.getASTContext();
2001 QualType ElemTypeA = getElemType(P: PtrA);
2002 QualType ElemTypeB = getElemType(P: PtrB);
2003 // FIXME: This is an arbitrary limitation the current constant interpreter
2004 // had. We could remove this.
2005 if (!IsWide && (!isOneByteCharacterType(T: ElemTypeA) ||
2006 !isOneByteCharacterType(T: ElemTypeB))) {
2007 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
2008 DiagId: diag::note_constexpr_memcmp_unsupported)
2009 << ASTCtx.BuiltinInfo.getQuotedName(ID) << PtrA.getType()
2010 << PtrB.getType();
2011 return false;
2012 }
2013
2014 if (!CheckLoad(S, OpPC, Ptr: PtrA, AK: AK_Read) || !CheckLoad(S, OpPC, Ptr: PtrB, AK: AK_Read))
2015 return false;
2016
2017 // Now, read both pointers to a buffer and compare those.
2018 BitcastBuffer BufferA(
2019 Bits(ASTCtx.getTypeSize(T: ElemTypeA) * PtrA.getNumElems()));
2020 readPointerToBuffer(Ctx: S.getContext(), FromPtr: PtrA, Buffer&: BufferA, ReturnOnUninit: false);
2021 // FIXME: The swapping here is UNDOING something we do when reading the
2022 // data into the buffer.
2023 if (ASTCtx.getTargetInfo().isBigEndian())
2024 swapBytes(M: BufferA.Data.get(), N: BufferA.byteSize().getQuantity());
2025
2026 BitcastBuffer BufferB(
2027 Bits(ASTCtx.getTypeSize(T: ElemTypeB) * PtrB.getNumElems()));
2028 readPointerToBuffer(Ctx: S.getContext(), FromPtr: PtrB, Buffer&: BufferB, ReturnOnUninit: false);
2029 // FIXME: The swapping here is UNDOING something we do when reading the
2030 // data into the buffer.
2031 if (ASTCtx.getTargetInfo().isBigEndian())
2032 swapBytes(M: BufferB.Data.get(), N: BufferB.byteSize().getQuantity());
2033
2034 size_t MinBufferSize = std::min(a: BufferA.byteSize().getQuantity(),
2035 b: BufferB.byteSize().getQuantity());
2036
2037 unsigned ElemSize = 1;
2038 if (IsWide)
2039 ElemSize = ASTCtx.getTypeSizeInChars(T: ASTCtx.getWCharType()).getQuantity();
2040 // The Size given for the wide variants is in wide-char units. Convert it
2041 // to bytes.
2042 size_t ByteSize = Size * ElemSize;
2043 size_t CmpSize = std::min(a: MinBufferSize, b: ByteSize);
2044
2045 for (size_t I = 0; I != CmpSize; I += ElemSize) {
2046 if (IsWide) {
2047 INT_TYPE_SWITCH(*S.getContext().classify(ASTCtx.getWCharType()), {
2048 T A = *reinterpret_cast<T *>(BufferA.atByte(I));
2049 T B = *reinterpret_cast<T *>(BufferB.atByte(I));
2050 if (A < B) {
2051 pushInteger(S, -1, Call->getType());
2052 return true;
2053 }
2054 if (A > B) {
2055 pushInteger(S, 1, Call->getType());
2056 return true;
2057 }
2058 });
2059 } else {
2060 std::byte A = BufferA.deref<std::byte>(Offset: Bytes(I));
2061 std::byte B = BufferB.deref<std::byte>(Offset: Bytes(I));
2062
2063 if (A < B) {
2064 pushInteger(S, Val: -1, QT: Call->getType());
2065 return true;
2066 }
2067 if (A > B) {
2068 pushInteger(S, Val: 1, QT: Call->getType());
2069 return true;
2070 }
2071 }
2072 }
2073
2074 // We compared CmpSize bytes above. If the limiting factor was the Size
2075 // passed, we're done and the result is equality (0).
2076 if (ByteSize <= CmpSize) {
2077 pushInteger(S, Val: 0, QT: Call->getType());
2078 return true;
2079 }
2080
2081 // However, if we read all the available bytes but were instructed to read
2082 // even more, diagnose this as a "read of dereferenced one-past-the-end
2083 // pointer". This is what would happen if we called CheckLoad() on every array
2084 // element.
2085 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_access_past_end)
2086 << AK_Read << S.Current->getRange(PC: OpPC);
2087 return false;
2088}
2089
2090// __builtin_memchr(ptr, int, int)
2091// __builtin_strchr(ptr, int)
2092static bool interp__builtin_memchr(InterpState &S, CodePtr OpPC,
2093 const CallExpr *Call, unsigned ID) {
2094 if (ID == Builtin::BImemchr || ID == Builtin::BIwcschr ||
2095 ID == Builtin::BIstrchr || ID == Builtin::BIwmemchr)
2096 diagnoseNonConstexprBuiltin(S, OpPC, ID);
2097
2098 std::optional<APSInt> MaxLength;
2099 if (Call->getNumArgs() == 3)
2100 MaxLength = popToAPSInt(S, E: Call->getArg(Arg: 2));
2101
2102 APSInt Desired = popToAPSInt(S, E: Call->getArg(Arg: 1));
2103 const Pointer &Ptr = S.Stk.pop<Pointer>();
2104
2105 if (MaxLength && MaxLength->isZero()) {
2106 S.Stk.push<Pointer>();
2107 return true;
2108 }
2109
2110 if (Ptr.isDummy()) {
2111 if (Ptr.getType()->isIncompleteType())
2112 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
2113 DiagId: diag::note_constexpr_ltor_incomplete_type)
2114 << Ptr.getType();
2115 return false;
2116 }
2117
2118 // Null is only okay if the given size is 0.
2119 if (Ptr.isZero()) {
2120 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_access_null)
2121 << AK_Read;
2122 return false;
2123 }
2124
2125 if (!Ptr.isBlockPointer())
2126 return false;
2127
2128 QualType ElemTy = Ptr.getFieldDesc()->isArray()
2129 ? Ptr.getFieldDesc()->getElemQualType()
2130 : Ptr.getFieldDesc()->getType();
2131 bool IsRawByte = ID == Builtin::BImemchr || ID == Builtin::BI__builtin_memchr;
2132
2133 // Give up on byte-oriented matching against multibyte elements.
2134 if (IsRawByte && !isOneByteCharacterType(T: ElemTy)) {
2135 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
2136 DiagId: diag::note_constexpr_memchr_unsupported)
2137 << S.getASTContext().BuiltinInfo.getQuotedName(ID) << ElemTy;
2138 return false;
2139 }
2140
2141 if (!isReadable(P: Ptr))
2142 return false;
2143
2144 if (ID == Builtin::BIstrchr || ID == Builtin::BI__builtin_strchr) {
2145 int64_t DesiredTrunc;
2146 if (S.getASTContext().CharTy->isSignedIntegerType())
2147 DesiredTrunc =
2148 Desired.trunc(width: S.getASTContext().getCharWidth()).getSExtValue();
2149 else
2150 DesiredTrunc =
2151 Desired.trunc(width: S.getASTContext().getCharWidth()).getZExtValue();
2152 // strchr compares directly to the passed integer, and therefore
2153 // always fails if given an int that is not a char.
2154 if (Desired != DesiredTrunc) {
2155 S.Stk.push<Pointer>();
2156 return true;
2157 }
2158 }
2159
2160 uint64_t DesiredVal;
2161 if (ID == Builtin::BIwmemchr || ID == Builtin::BI__builtin_wmemchr ||
2162 ID == Builtin::BIwcschr || ID == Builtin::BI__builtin_wcschr) {
2163 // wcschr and wmemchr are given a wchar_t to look for. Just use it.
2164 DesiredVal = Desired.getZExtValue();
2165 } else {
2166 DesiredVal = Desired.trunc(width: S.getASTContext().getCharWidth()).getZExtValue();
2167 }
2168
2169 bool StopAtZero =
2170 (ID == Builtin::BIstrchr || ID == Builtin::BI__builtin_strchr ||
2171 ID == Builtin::BIwcschr || ID == Builtin::BI__builtin_wcschr);
2172
2173 PrimType ElemT =
2174 IsRawByte ? PT_Sint8 : *S.getContext().classify(T: getElemType(P: Ptr));
2175
2176 size_t Index = Ptr.getIndex();
2177 size_t Step = 0;
2178 for (;;) {
2179 const Pointer &ElemPtr =
2180 (Index + Step) > 0 ? Ptr.atIndex(Idx: Index + Step) : Ptr;
2181
2182 if (!CheckLoad(S, OpPC, Ptr: ElemPtr))
2183 return false;
2184
2185 uint64_t V;
2186 INT_TYPE_SWITCH_NO_BOOL(
2187 ElemT, { V = static_cast<uint64_t>(ElemPtr.deref<T>().toUnsigned()); });
2188
2189 if (V == DesiredVal) {
2190 S.Stk.push<Pointer>(Args: ElemPtr);
2191 return true;
2192 }
2193
2194 if (StopAtZero && V == 0)
2195 break;
2196
2197 ++Step;
2198 if (MaxLength && Step == MaxLength->getZExtValue())
2199 break;
2200 }
2201
2202 S.Stk.push<Pointer>();
2203 return true;
2204}
2205
2206static std::optional<unsigned> computeFullDescSize(const ASTContext &ASTCtx,
2207 const Descriptor *Desc) {
2208 if (Desc->isPrimitive())
2209 return ASTCtx.getTypeSizeInChars(T: Desc->getType()).getQuantity();
2210 if (Desc->isArray())
2211 return ASTCtx.getTypeSizeInChars(T: Desc->getElemQualType()).getQuantity() *
2212 Desc->getNumElems();
2213 if (Desc->isRecord()) {
2214 // Can't use Descriptor::getType() as that may return a pointer type. Look
2215 // at the decl directly.
2216 return ASTCtx
2217 .getTypeSizeInChars(
2218 T: ASTCtx.getCanonicalTagType(TD: Desc->ElemRecord->getDecl()))
2219 .getQuantity();
2220 }
2221
2222 return std::nullopt;
2223}
2224
2225/// Compute the byte offset of \p Ptr in the full declaration.
2226static unsigned computePointerOffset(const ASTContext &ASTCtx,
2227 const Pointer &Ptr) {
2228 unsigned Result = 0;
2229
2230 Pointer P = Ptr;
2231 while (P.isField() || P.isArrayElement()) {
2232 P = P.expand();
2233 const Descriptor *D = P.getFieldDesc();
2234
2235 if (P.isArrayElement()) {
2236 unsigned ElemSize =
2237 ASTCtx.getTypeSizeInChars(T: D->getElemQualType()).getQuantity();
2238 if (P.isOnePastEnd())
2239 Result += ElemSize * P.getNumElems();
2240 else
2241 Result += ElemSize * P.getIndex();
2242 P = P.expand().getArray();
2243 } else if (P.isBaseClass()) {
2244 const auto *RD = cast<CXXRecordDecl>(Val: D->asDecl());
2245 bool IsVirtual = Ptr.isVirtualBaseClass();
2246 P = P.getBase();
2247 const Record *BaseRecord = P.getRecord();
2248
2249 const ASTRecordLayout &Layout =
2250 ASTCtx.getASTRecordLayout(D: cast<CXXRecordDecl>(Val: BaseRecord->getDecl()));
2251 if (IsVirtual)
2252 Result += Layout.getVBaseClassOffset(VBase: RD).getQuantity();
2253 else
2254 Result += Layout.getBaseClassOffset(Base: RD).getQuantity();
2255 } else if (P.isField()) {
2256 const FieldDecl *FD = P.getField();
2257 const ASTRecordLayout &Layout =
2258 ASTCtx.getASTRecordLayout(D: FD->getParent());
2259 unsigned FieldIndex = FD->getFieldIndex();
2260 uint64_t FieldOffset =
2261 ASTCtx.toCharUnitsFromBits(BitSize: Layout.getFieldOffset(FieldNo: FieldIndex))
2262 .getQuantity();
2263 Result += FieldOffset;
2264 P = P.getBase();
2265 } else
2266 llvm_unreachable("Unhandled descriptor type");
2267 }
2268
2269 return Result;
2270}
2271
2272/// Does Ptr point to the last subobject?
2273static bool pointsToLastObject(const Pointer &Ptr) {
2274 Pointer P = Ptr;
2275 while (!P.isRoot()) {
2276
2277 if (P.isArrayElement()) {
2278 P = P.expand().getArray();
2279 continue;
2280 }
2281 if (P.isBaseClass()) {
2282 if (P.getRecord()->getNumFields() > 0)
2283 return false;
2284 P = P.getBase();
2285 continue;
2286 }
2287
2288 Pointer Base = P.getBase();
2289 if (const Record *R = Base.getRecord()) {
2290 assert(P.getField());
2291 if (P.getField()->getFieldIndex() != R->getNumFields() - 1)
2292 return false;
2293 }
2294 P = Base;
2295 }
2296
2297 return true;
2298}
2299
2300/// Does Ptr point to the last object AND to a flexible array member?
2301static bool isUserWritingOffTheEnd(const ASTContext &Ctx, const Pointer &Ptr,
2302 bool InvalidBase) {
2303 auto isFlexibleArrayMember = [&](const Descriptor *FieldDesc) {
2304 using FAMKind = LangOptions::StrictFlexArraysLevelKind;
2305 FAMKind StrictFlexArraysLevel =
2306 Ctx.getLangOpts().getStrictFlexArraysLevel();
2307
2308 if (StrictFlexArraysLevel == FAMKind::Default)
2309 return true;
2310
2311 unsigned NumElems = FieldDesc->getNumElems();
2312 if (NumElems == 0 && StrictFlexArraysLevel != FAMKind::IncompleteOnly)
2313 return true;
2314
2315 if (NumElems == 1 && StrictFlexArraysLevel == FAMKind::OneZeroOrIncomplete)
2316 return true;
2317 return false;
2318 };
2319
2320 const Descriptor *FieldDesc = Ptr.getFieldDesc();
2321 if (!FieldDesc->isArray())
2322 return false;
2323
2324 return InvalidBase && pointsToLastObject(Ptr) &&
2325 isFlexibleArrayMember(FieldDesc);
2326}
2327
2328UnsignedOrNone evaluateBuiltinObjectSize(const ASTContext &ASTCtx,
2329 unsigned Kind, Pointer &Ptr) {
2330 if (Ptr.isZero() || !Ptr.isBlockPointer())
2331 return std::nullopt;
2332
2333 if (Ptr.isDummy() && Ptr.getType()->isPointerType())
2334 return std::nullopt;
2335
2336 bool InvalidBase = false;
2337
2338 if (Ptr.isDummy()) {
2339 if (const VarDecl *VD = Ptr.getDeclDesc()->asVarDecl();
2340 VD && VD->getType()->isPointerType())
2341 InvalidBase = true;
2342 }
2343
2344 // According to the GCC documentation, we want the size of the subobject
2345 // denoted by the pointer. But that's not quite right -- what we actually
2346 // want is the size of the immediately-enclosing array, if there is one.
2347 if (Ptr.isArrayElement())
2348 Ptr = Ptr.expand();
2349
2350 bool DetermineForCompleteObject = Ptr.getFieldDesc() == Ptr.getDeclDesc();
2351 const Descriptor *DeclDesc = Ptr.getDeclDesc();
2352 assert(DeclDesc);
2353
2354 bool UseFieldDesc = (Kind & 1u);
2355 bool ReportMinimum = (Kind & 2u);
2356 if (!UseFieldDesc || DetermineForCompleteObject) {
2357 // Can't read beyond the pointer decl desc.
2358 if (!ReportMinimum && DeclDesc->getType()->isPointerType())
2359 return std::nullopt;
2360
2361 if (InvalidBase)
2362 return std::nullopt;
2363 } else {
2364 if (isUserWritingOffTheEnd(Ctx: ASTCtx, Ptr, InvalidBase)) {
2365 // If we cannot determine the size of the initial allocation, then we
2366 // can't given an accurate upper-bound. However, we are still able to give
2367 // conservative lower-bounds for Type=3.
2368 if (Kind == 1)
2369 return std::nullopt;
2370 }
2371 }
2372
2373 // The "closest surrounding subobject" is NOT a base class,
2374 // so strip the base class casts.
2375 if (UseFieldDesc && Ptr.isBaseClass())
2376 Ptr = Ptr.stripBaseCasts();
2377
2378 const Descriptor *Desc = UseFieldDesc ? Ptr.getFieldDesc() : DeclDesc;
2379 assert(Desc);
2380
2381 std::optional<unsigned> FullSize = computeFullDescSize(ASTCtx, Desc);
2382 if (!FullSize)
2383 return std::nullopt;
2384
2385 unsigned ByteOffset;
2386 if (UseFieldDesc) {
2387 if (Ptr.isBaseClass()) {
2388 assert(computePointerOffset(ASTCtx, Ptr.getBase()) <=
2389 computePointerOffset(ASTCtx, Ptr));
2390 ByteOffset = computePointerOffset(ASTCtx, Ptr: Ptr.getBase()) -
2391 computePointerOffset(ASTCtx, Ptr);
2392 } else {
2393 if (Ptr.inArray())
2394 ByteOffset =
2395 computePointerOffset(ASTCtx, Ptr) -
2396 computePointerOffset(ASTCtx, Ptr: Ptr.expand().atIndex(Idx: 0).narrow());
2397 else
2398 ByteOffset = 0;
2399 }
2400 } else
2401 ByteOffset = computePointerOffset(ASTCtx, Ptr);
2402
2403 assert(ByteOffset <= *FullSize);
2404 return *FullSize - ByteOffset;
2405}
2406
2407static bool interp__builtin_object_size(InterpState &S, CodePtr OpPC,
2408 const InterpFrame *Frame,
2409 const CallExpr *Call) {
2410 const ASTContext &ASTCtx = S.getASTContext();
2411 // From the GCC docs:
2412 // Kind is an integer constant from 0 to 3. If the least significant bit is
2413 // clear, objects are whole variables. If it is set, a closest surrounding
2414 // subobject is considered the object a pointer points to. The second bit
2415 // determines if maximum or minimum of remaining bytes is computed.
2416 unsigned Kind = popToUInt64(S, E: Call->getArg(Arg: 1));
2417 assert(Kind <= 3 && "unexpected kind");
2418 Pointer Ptr = S.Stk.pop<Pointer>();
2419
2420 if (Call->getArg(Arg: 0)->HasSideEffects(Ctx: ASTCtx)) {
2421 // "If there are any side effects in them, it returns (size_t) -1
2422 // for type 0 or 1 and (size_t) 0 for type 2 or 3."
2423 pushInteger(S, Val: Kind <= 1 ? -1 : 0, QT: Call->getType());
2424 return true;
2425 }
2426
2427 if (auto Result = evaluateBuiltinObjectSize(ASTCtx, Kind, Ptr)) {
2428 pushInteger(S, Val: *Result, QT: Call->getType());
2429 return true;
2430 }
2431 return false;
2432}
2433
2434static bool interp__builtin_is_within_lifetime(InterpState &S, CodePtr OpPC,
2435 const CallExpr *Call) {
2436
2437 if (!S.inConstantContext())
2438 return false;
2439
2440 const Pointer &Ptr = S.Stk.pop<Pointer>();
2441
2442 auto Error = [&](int Diag) {
2443 bool CalledFromStd = false;
2444 const auto *Callee = S.Current->getCallee();
2445 if (Callee && Callee->isInStdNamespace()) {
2446 const IdentifierInfo *Identifier = Callee->getIdentifier();
2447 CalledFromStd = Identifier && Identifier->isStr(Str: "is_within_lifetime");
2448 }
2449 S.CCEDiag(SI: CalledFromStd
2450 ? S.Current->Caller->getSource(PC: S.Current->getRetPC())
2451 : S.Current->getSource(PC: OpPC),
2452 DiagId: diag::err_invalid_is_within_lifetime)
2453 << (CalledFromStd ? "std::is_within_lifetime"
2454 : "__builtin_is_within_lifetime")
2455 << Diag;
2456 return false;
2457 };
2458
2459 if (Ptr.isZero())
2460 return Error(0);
2461 if (Ptr.isOnePastEnd())
2462 return Error(1);
2463
2464 bool Result = Ptr.getLifetime() != Lifetime::Ended;
2465 if (!Ptr.isActive()) {
2466 Result = false;
2467 } else {
2468 if (!CheckLive(S, OpPC, Ptr, AK: AK_Read))
2469 return false;
2470 if (!CheckMutable(S, OpPC, Ptr))
2471 return false;
2472 if (!CheckDummy(S, OpPC, B: Ptr.block(), AK: AK_Read))
2473 return false;
2474 }
2475
2476 // Check if we're currently running an initializer.
2477 if (llvm::is_contained(Range&: S.InitializingBlocks, Element: Ptr.block()))
2478 return Error(2);
2479 if (S.EvaluatingDecl && Ptr.getDeclDesc()->asVarDecl() == S.EvaluatingDecl)
2480 return Error(2);
2481
2482 pushInteger(S, Val: Result, QT: Call->getType());
2483 return true;
2484}
2485
2486static bool interp__builtin_elementwise_int_unaryop(
2487 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2488 llvm::function_ref<APInt(const APSInt &)> Fn) {
2489 assert(Call->getNumArgs() == 1);
2490
2491 // Single integer case.
2492 if (!Call->getArg(Arg: 0)->getType()->isVectorType()) {
2493 assert(Call->getType()->isIntegerType());
2494 APSInt Src = popToAPSInt(S, E: Call->getArg(Arg: 0));
2495 APInt Result = Fn(Src);
2496 pushInteger(S, Val: APSInt(std::move(Result), !Src.isSigned()), QT: Call->getType());
2497 return true;
2498 }
2499
2500 // Vector case.
2501 const Pointer &Arg = S.Stk.pop<Pointer>();
2502 assert(Arg.getFieldDesc()->isPrimitiveArray());
2503 const Pointer &Dst = S.Stk.peek<Pointer>();
2504 assert(Dst.getFieldDesc()->isPrimitiveArray());
2505 assert(Arg.getFieldDesc()->getNumElems() ==
2506 Dst.getFieldDesc()->getNumElems());
2507
2508 QualType ElemType = Arg.getFieldDesc()->getElemQualType();
2509 PrimType ElemT = *S.getContext().classify(T: ElemType);
2510 unsigned NumElems = Arg.getNumElems();
2511 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2512
2513 for (unsigned I = 0; I != NumElems; ++I) {
2514 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2515 APSInt Src = Arg.elem<T>(I).toAPSInt();
2516 APInt Result = Fn(Src);
2517 Dst.elem<T>(I) = static_cast<T>(APSInt(std::move(Result), DestUnsigned));
2518 });
2519 }
2520 Dst.initializeAllElements();
2521
2522 return true;
2523}
2524
2525static bool interp__builtin_elementwise_fp_binop(
2526 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2527 llvm::function_ref<std::optional<APFloat>(
2528 const APFloat &, const APFloat &, std::optional<APSInt> RoundingMode)>
2529 Fn,
2530 bool IsScalar = false) {
2531 assert((Call->getNumArgs() == 2) || (Call->getNumArgs() == 3));
2532 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2533 assert(VT->getElementType()->isFloatingType());
2534 unsigned NumElems = VT->getNumElements();
2535
2536 // Vector case.
2537 assert(Call->getArg(0)->getType()->isVectorType() &&
2538 Call->getArg(1)->getType()->isVectorType());
2539 assert(VT->getElementType() ==
2540 Call->getArg(1)->getType()->castAs<VectorType>()->getElementType());
2541 assert(VT->getNumElements() ==
2542 Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements());
2543
2544 std::optional<APSInt> RoundingMode = std::nullopt;
2545 if (Call->getNumArgs() == 3)
2546 RoundingMode = popToAPSInt(S, E: Call->getArg(Arg: 2));
2547
2548 const Pointer &BPtr = S.Stk.pop<Pointer>();
2549 const Pointer &APtr = S.Stk.pop<Pointer>();
2550 const Pointer &Dst = S.Stk.peek<Pointer>();
2551 for (unsigned ElemIdx = 0; ElemIdx != NumElems; ++ElemIdx) {
2552 using T = PrimConv<PT_Float>::T;
2553 if (IsScalar && ElemIdx > 0) {
2554 Dst.elem<T>(I: ElemIdx) = APtr.elem<T>(I: ElemIdx);
2555 continue;
2556 }
2557 APFloat ElemA = APtr.elem<T>(I: ElemIdx).getAPFloat();
2558 APFloat ElemB = BPtr.elem<T>(I: ElemIdx).getAPFloat();
2559 std::optional<APFloat> Result = Fn(ElemA, ElemB, RoundingMode);
2560 if (!Result)
2561 return false;
2562 Dst.elem<T>(I: ElemIdx) = static_cast<T>(*Result);
2563 }
2564
2565 Dst.initializeAllElements();
2566
2567 return true;
2568}
2569
2570static bool interp__builtin_scalar_fp_round_mask_binop(
2571 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2572 llvm::function_ref<std::optional<APFloat>(const APFloat &, const APFloat &,
2573 std::optional<APSInt>)>
2574 Fn) {
2575 assert(Call->getNumArgs() == 5);
2576 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2577 unsigned NumElems = VT->getNumElements();
2578
2579 APSInt RoundingMode = popToAPSInt(S, E: Call->getArg(Arg: 4));
2580 uint64_t MaskVal = popToUInt64(S, E: Call->getArg(Arg: 3));
2581 const Pointer &SrcPtr = S.Stk.pop<Pointer>();
2582 const Pointer &BPtr = S.Stk.pop<Pointer>();
2583 const Pointer &APtr = S.Stk.pop<Pointer>();
2584 const Pointer &Dst = S.Stk.peek<Pointer>();
2585
2586 using T = PrimConv<PT_Float>::T;
2587
2588 if (MaskVal & 1) {
2589 APFloat ElemA = APtr.elem<T>(I: 0).getAPFloat();
2590 APFloat ElemB = BPtr.elem<T>(I: 0).getAPFloat();
2591 std::optional<APFloat> Result = Fn(ElemA, ElemB, RoundingMode);
2592 if (!Result)
2593 return false;
2594 Dst.elem<T>(I: 0) = static_cast<T>(*Result);
2595 } else {
2596 Dst.elem<T>(I: 0) = SrcPtr.elem<T>(I: 0);
2597 }
2598
2599 for (unsigned I = 1; I < NumElems; ++I)
2600 Dst.elem<T>(I) = APtr.elem<T>(I);
2601
2602 Dst.initializeAllElements();
2603
2604 return true;
2605}
2606
2607static bool interp__builtin_elementwise_int_binop(
2608 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2609 llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
2610 assert(Call->getNumArgs() == 2);
2611
2612 // Single integer case.
2613 if (!Call->getArg(Arg: 0)->getType()->isVectorType()) {
2614 assert(!Call->getArg(1)->getType()->isVectorType());
2615 APSInt RHS = popToAPSInt(S, E: Call->getArg(Arg: 1));
2616 APSInt LHS = popToAPSInt(S, E: Call->getArg(Arg: 0));
2617 APInt Result = Fn(LHS, RHS);
2618 pushInteger(S, Val: APSInt(std::move(Result), !LHS.isSigned()), QT: Call->getType());
2619 return true;
2620 }
2621
2622 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2623 assert(VT->getElementType()->isIntegralOrEnumerationType());
2624 PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2625 unsigned NumElems = VT->getNumElements();
2626 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2627
2628 // Vector + Scalar case.
2629 if (!Call->getArg(Arg: 1)->getType()->isVectorType()) {
2630 assert(Call->getArg(1)->getType()->isIntegralOrEnumerationType());
2631
2632 APSInt RHS = popToAPSInt(S, E: Call->getArg(Arg: 1));
2633 const Pointer &LHS = S.Stk.pop<Pointer>();
2634 const Pointer &Dst = S.Stk.peek<Pointer>();
2635
2636 for (unsigned I = 0; I != NumElems; ++I) {
2637 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2638 Dst.elem<T>(I) = static_cast<T>(
2639 APSInt(Fn(LHS.elem<T>(I).toAPSInt(), RHS), DestUnsigned));
2640 });
2641 }
2642 Dst.initializeAllElements();
2643 return true;
2644 }
2645
2646 // Vector case.
2647 assert(Call->getArg(0)->getType()->isVectorType() &&
2648 Call->getArg(1)->getType()->isVectorType());
2649 assert(VT->getElementType() ==
2650 Call->getArg(1)->getType()->castAs<VectorType>()->getElementType());
2651 assert(VT->getNumElements() ==
2652 Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements());
2653 assert(VT->getElementType()->isIntegralOrEnumerationType());
2654
2655 const Pointer &RHS = S.Stk.pop<Pointer>();
2656 const Pointer &LHS = S.Stk.pop<Pointer>();
2657 const Pointer &Dst = S.Stk.peek<Pointer>();
2658 for (unsigned I = 0; I != NumElems; ++I) {
2659 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2660 APSInt Elem1 = LHS.elem<T>(I).toAPSInt();
2661 APSInt Elem2 = RHS.elem<T>(I).toAPSInt();
2662 Dst.elem<T>(I) = static_cast<T>(APSInt(Fn(Elem1, Elem2), DestUnsigned));
2663 });
2664 }
2665 Dst.initializeAllElements();
2666
2667 return true;
2668}
2669
2670static bool
2671interp__builtin_x86_pack(InterpState &S, CodePtr, const CallExpr *E,
2672 llvm::function_ref<APInt(const APSInt &)> PackFn) {
2673 const auto *VT0 = E->getArg(Arg: 0)->getType()->castAs<VectorType>();
2674 [[maybe_unused]] const auto *VT1 =
2675 E->getArg(Arg: 1)->getType()->castAs<VectorType>();
2676 assert(VT0 && VT1 && "pack builtin VT0 and VT1 must be VectorType");
2677 assert(VT0->getElementType() == VT1->getElementType() &&
2678 VT0->getNumElements() == VT1->getNumElements() &&
2679 "pack builtin VT0 and VT1 ElementType must be same");
2680
2681 const Pointer &RHS = S.Stk.pop<Pointer>();
2682 const Pointer &LHS = S.Stk.pop<Pointer>();
2683 const Pointer &Dst = S.Stk.peek<Pointer>();
2684
2685 const ASTContext &ASTCtx = S.getASTContext();
2686 unsigned SrcBits = ASTCtx.getIntWidth(T: VT0->getElementType());
2687 unsigned LHSVecLen = VT0->getNumElements();
2688 unsigned SrcPerLane = 128 / SrcBits;
2689 unsigned Lanes = LHSVecLen * SrcBits / 128;
2690
2691 PrimType SrcT = *S.getContext().classify(T: VT0->getElementType());
2692 PrimType DstT = *S.getContext().classify(T: getElemType(P: Dst));
2693 bool IsUnsigend = getElemType(P: Dst)->isUnsignedIntegerType();
2694
2695 for (unsigned Lane = 0; Lane != Lanes; ++Lane) {
2696 unsigned BaseSrc = Lane * SrcPerLane;
2697 unsigned BaseDst = Lane * (2 * SrcPerLane);
2698
2699 for (unsigned I = 0; I != SrcPerLane; ++I) {
2700 INT_TYPE_SWITCH_NO_BOOL(SrcT, {
2701 APSInt A = LHS.elem<T>(BaseSrc + I).toAPSInt();
2702 APSInt B = RHS.elem<T>(BaseSrc + I).toAPSInt();
2703
2704 assignInteger(S, Dst.atIndex(BaseDst + I), DstT,
2705 APSInt(PackFn(A), IsUnsigend));
2706 assignInteger(S, Dst.atIndex(BaseDst + SrcPerLane + I), DstT,
2707 APSInt(PackFn(B), IsUnsigend));
2708 });
2709 }
2710 }
2711
2712 Dst.initializeAllElements();
2713 return true;
2714}
2715
2716static bool interp__builtin_elementwise_maxmin(InterpState &S, CodePtr OpPC,
2717 const CallExpr *Call,
2718 unsigned BuiltinID) {
2719 assert(Call->getNumArgs() == 2);
2720
2721 QualType Arg0Type = Call->getArg(Arg: 0)->getType();
2722
2723 // TODO: Support floating-point types.
2724 if (!(Arg0Type->isIntegerType() ||
2725 (Arg0Type->isVectorType() &&
2726 Arg0Type->castAs<VectorType>()->getElementType()->isIntegerType())))
2727 return false;
2728
2729 if (!Arg0Type->isVectorType()) {
2730 assert(!Call->getArg(1)->getType()->isVectorType());
2731 APSInt RHS = popToAPSInt(S, E: Call->getArg(Arg: 1));
2732 APSInt LHS = popToAPSInt(S, T: Arg0Type);
2733 APInt Result;
2734 if (BuiltinID == Builtin::BI__builtin_elementwise_max) {
2735 Result = std::max(a: LHS, b: RHS);
2736 } else if (BuiltinID == Builtin::BI__builtin_elementwise_min) {
2737 Result = std::min(a: LHS, b: RHS);
2738 } else {
2739 llvm_unreachable("Wrong builtin ID");
2740 }
2741
2742 pushInteger(S, Val: APSInt(Result, !LHS.isSigned()), QT: Call->getType());
2743 return true;
2744 }
2745
2746 // Vector case.
2747 assert(Call->getArg(0)->getType()->isVectorType() &&
2748 Call->getArg(1)->getType()->isVectorType());
2749 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2750 assert(VT->getElementType() ==
2751 Call->getArg(1)->getType()->castAs<VectorType>()->getElementType());
2752 assert(VT->getNumElements() ==
2753 Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements());
2754 assert(VT->getElementType()->isIntegralOrEnumerationType());
2755
2756 const Pointer &RHS = S.Stk.pop<Pointer>();
2757 const Pointer &LHS = S.Stk.pop<Pointer>();
2758 const Pointer &Dst = S.Stk.peek<Pointer>();
2759 PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2760 unsigned NumElems = VT->getNumElements();
2761 for (unsigned I = 0; I != NumElems; ++I) {
2762 APSInt Elem1;
2763 APSInt Elem2;
2764 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2765 Elem1 = LHS.elem<T>(I).toAPSInt();
2766 Elem2 = RHS.elem<T>(I).toAPSInt();
2767 });
2768
2769 APSInt Result;
2770 if (BuiltinID == Builtin::BI__builtin_elementwise_max) {
2771 Result = APSInt(std::max(a: Elem1, b: Elem2),
2772 Call->getType()->isUnsignedIntegerOrEnumerationType());
2773 } else if (BuiltinID == Builtin::BI__builtin_elementwise_min) {
2774 Result = APSInt(std::min(a: Elem1, b: Elem2),
2775 Call->getType()->isUnsignedIntegerOrEnumerationType());
2776 } else {
2777 llvm_unreachable("Wrong builtin ID");
2778 }
2779
2780 INT_TYPE_SWITCH_NO_BOOL(ElemT,
2781 { Dst.elem<T>(I) = static_cast<T>(Result); });
2782 }
2783 Dst.initializeAllElements();
2784
2785 return true;
2786}
2787
2788static bool interp__builtin_ia32_pmul(
2789 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2790 llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &,
2791 const APSInt &)>
2792 Fn) {
2793 assert(Call->getArg(0)->getType()->isVectorType() &&
2794 Call->getArg(1)->getType()->isVectorType());
2795 const Pointer &RHS = S.Stk.pop<Pointer>();
2796 const Pointer &LHS = S.Stk.pop<Pointer>();
2797 const Pointer &Dst = S.Stk.peek<Pointer>();
2798
2799 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2800 PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2801 unsigned NumElems = VT->getNumElements();
2802 const auto *DestVT = Call->getType()->castAs<VectorType>();
2803 PrimType DestElemT = *S.getContext().classify(T: DestVT->getElementType());
2804 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2805
2806 unsigned DstElem = 0;
2807 for (unsigned I = 0; I != NumElems; I += 2) {
2808 APSInt Result;
2809 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2810 APSInt LoLHS = LHS.elem<T>(I).toAPSInt();
2811 APSInt HiLHS = LHS.elem<T>(I + 1).toAPSInt();
2812 APSInt LoRHS = RHS.elem<T>(I).toAPSInt();
2813 APSInt HiRHS = RHS.elem<T>(I + 1).toAPSInt();
2814 Result = APSInt(Fn(LoLHS, HiLHS, LoRHS, HiRHS), DestUnsigned);
2815 });
2816
2817 INT_TYPE_SWITCH_NO_BOOL(DestElemT,
2818 { Dst.elem<T>(DstElem) = static_cast<T>(Result); });
2819 ++DstElem;
2820 }
2821
2822 Dst.initializeAllElements();
2823 return true;
2824}
2825
2826static bool interp_builtin_horizontal_int_binop(
2827 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2828 llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
2829 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2830 PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2831 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2832
2833 const Pointer &RHS = S.Stk.pop<Pointer>();
2834 const Pointer &LHS = S.Stk.pop<Pointer>();
2835 const Pointer &Dst = S.Stk.peek<Pointer>();
2836 unsigned NumElts = VT->getNumElements();
2837 unsigned EltBits = S.getASTContext().getIntWidth(T: VT->getElementType());
2838 unsigned EltsPerLane = 128 / EltBits;
2839 unsigned Lanes = NumElts * EltBits / 128;
2840 unsigned DestIndex = 0;
2841
2842 for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
2843 unsigned LaneStart = Lane * EltsPerLane;
2844 for (unsigned I = 0; I < EltsPerLane; I += 2) {
2845 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2846 APSInt Elem1 = LHS.elem<T>(LaneStart + I).toAPSInt();
2847 APSInt Elem2 = LHS.elem<T>(LaneStart + I + 1).toAPSInt();
2848 APSInt ResL = APSInt(Fn(Elem1, Elem2), DestUnsigned);
2849 Dst.elem<T>(DestIndex++) = static_cast<T>(ResL);
2850 });
2851 }
2852
2853 for (unsigned I = 0; I < EltsPerLane; I += 2) {
2854 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2855 APSInt Elem1 = RHS.elem<T>(LaneStart + I).toAPSInt();
2856 APSInt Elem2 = RHS.elem<T>(LaneStart + I + 1).toAPSInt();
2857 APSInt ResR = APSInt(Fn(Elem1, Elem2), DestUnsigned);
2858 Dst.elem<T>(DestIndex++) = static_cast<T>(ResR);
2859 });
2860 }
2861 }
2862 Dst.initializeAllElements();
2863 return true;
2864}
2865
2866static bool interp_builtin_horizontal_fp_binop(
2867 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2868 llvm::function_ref<APFloat(const APFloat &, const APFloat &,
2869 llvm::RoundingMode)>
2870 Fn) {
2871 const Pointer &RHS = S.Stk.pop<Pointer>();
2872 const Pointer &LHS = S.Stk.pop<Pointer>();
2873 const Pointer &Dst = S.Stk.peek<Pointer>();
2874 FPOptions FPO = Call->getFPFeaturesInEffect(LO: S.Ctx.getLangOpts());
2875 llvm::RoundingMode RM = getRoundingMode(FPO);
2876 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2877
2878 unsigned NumElts = VT->getNumElements();
2879 unsigned EltBits = S.getASTContext().getTypeSize(T: VT->getElementType());
2880 unsigned NumLanes = NumElts * EltBits / 128;
2881 unsigned NumElemsPerLane = NumElts / NumLanes;
2882 unsigned HalfElemsPerLane = NumElemsPerLane / 2;
2883
2884 for (unsigned L = 0; L != NumElts; L += NumElemsPerLane) {
2885 using T = PrimConv<PT_Float>::T;
2886 for (unsigned E = 0; E != HalfElemsPerLane; ++E) {
2887 APFloat Elem1 = LHS.elem<T>(I: L + (2 * E) + 0).getAPFloat();
2888 APFloat Elem2 = LHS.elem<T>(I: L + (2 * E) + 1).getAPFloat();
2889 Dst.elem<T>(I: L + E) = static_cast<T>(Fn(Elem1, Elem2, RM));
2890 }
2891 for (unsigned E = 0; E != HalfElemsPerLane; ++E) {
2892 APFloat Elem1 = RHS.elem<T>(I: L + (2 * E) + 0).getAPFloat();
2893 APFloat Elem2 = RHS.elem<T>(I: L + (2 * E) + 1).getAPFloat();
2894 Dst.elem<T>(I: L + E + HalfElemsPerLane) =
2895 static_cast<T>(Fn(Elem1, Elem2, RM));
2896 }
2897 }
2898 Dst.initializeAllElements();
2899 return true;
2900}
2901
2902static bool interp__builtin_ia32_addsub(InterpState &S, CodePtr OpPC,
2903 const CallExpr *Call) {
2904 // Addsub: alternates between subtraction and addition
2905 // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i])
2906 const Pointer &RHS = S.Stk.pop<Pointer>();
2907 const Pointer &LHS = S.Stk.pop<Pointer>();
2908 const Pointer &Dst = S.Stk.peek<Pointer>();
2909 FPOptions FPO = Call->getFPFeaturesInEffect(LO: S.Ctx.getLangOpts());
2910 llvm::RoundingMode RM = getRoundingMode(FPO);
2911 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2912 unsigned NumElems = VT->getNumElements();
2913
2914 using T = PrimConv<PT_Float>::T;
2915 for (unsigned I = 0; I != NumElems; ++I) {
2916 APFloat LElem = LHS.elem<T>(I).getAPFloat();
2917 APFloat RElem = RHS.elem<T>(I).getAPFloat();
2918 if (I % 2 == 0) {
2919 // Even indices: subtract
2920 LElem.subtract(RHS: RElem, RM);
2921 } else {
2922 // Odd indices: add
2923 LElem.add(RHS: RElem, RM);
2924 }
2925 Dst.elem<T>(I) = static_cast<T>(LElem);
2926 }
2927 Dst.initializeAllElements();
2928 return true;
2929}
2930
2931static bool interp__builtin_ia32_pclmulqdq(InterpState &S, CodePtr OpPC,
2932 const CallExpr *Call) {
2933 // PCLMULQDQ: carry-less multiplication of selected 64-bit halves
2934 // imm8 bit 0: selects lower (0) or upper (1) 64 bits of first operand
2935 // imm8 bit 4: selects lower (0) or upper (1) 64 bits of second operand
2936 assert(Call->getArg(0)->getType()->isVectorType() &&
2937 Call->getArg(1)->getType()->isVectorType());
2938
2939 // Extract imm8 argument
2940 APSInt Imm8 = popToAPSInt(S, E: Call->getArg(Arg: 2));
2941 bool SelectUpperA = (Imm8 & 0x01) != 0;
2942 bool SelectUpperB = (Imm8 & 0x10) != 0;
2943
2944 const Pointer &RHS = S.Stk.pop<Pointer>();
2945 const Pointer &LHS = S.Stk.pop<Pointer>();
2946 const Pointer &Dst = S.Stk.peek<Pointer>();
2947
2948 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2949 PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2950 unsigned NumElems = VT->getNumElements();
2951 const auto *DestVT = Call->getType()->castAs<VectorType>();
2952 PrimType DestElemT = *S.getContext().classify(T: DestVT->getElementType());
2953 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2954
2955 // Process each 128-bit lane (2 elements at a time)
2956 for (unsigned Lane = 0; Lane < NumElems; Lane += 2) {
2957 APSInt A0, A1, B0, B1;
2958 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2959 A0 = LHS.elem<T>(Lane + 0).toAPSInt();
2960 A1 = LHS.elem<T>(Lane + 1).toAPSInt();
2961 B0 = RHS.elem<T>(Lane + 0).toAPSInt();
2962 B1 = RHS.elem<T>(Lane + 1).toAPSInt();
2963 });
2964
2965 // Select the appropriate 64-bit values based on imm8
2966 APInt A = SelectUpperA ? A1 : A0;
2967 APInt B = SelectUpperB ? B1 : B0;
2968
2969 // Extend both operands to 128 bits for carry-less multiplication
2970 APInt A128 = A.zext(width: 128);
2971 APInt B128 = B.zext(width: 128);
2972
2973 // Use APIntOps::clmul for carry-less multiplication
2974 APInt Result = llvm::APIntOps::clmul(LHS: A128, RHS: B128);
2975
2976 // Split the 128-bit result into two 64-bit halves
2977 APSInt ResultLow(Result.extractBits(numBits: 64, bitPosition: 0), DestUnsigned);
2978 APSInt ResultHigh(Result.extractBits(numBits: 64, bitPosition: 64), DestUnsigned);
2979
2980 INT_TYPE_SWITCH_NO_BOOL(DestElemT, {
2981 Dst.elem<T>(Lane + 0) = static_cast<T>(ResultLow);
2982 Dst.elem<T>(Lane + 1) = static_cast<T>(ResultHigh);
2983 });
2984 }
2985
2986 Dst.initializeAllElements();
2987 return true;
2988}
2989
2990static bool interp__builtin_elementwise_triop_fp(
2991 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2992 llvm::function_ref<APFloat(const APFloat &, const APFloat &,
2993 const APFloat &, llvm::RoundingMode)>
2994 Fn) {
2995 assert(Call->getNumArgs() == 3);
2996
2997 FPOptions FPO = Call->getFPFeaturesInEffect(LO: S.Ctx.getLangOpts());
2998 llvm::RoundingMode RM = getRoundingMode(FPO);
2999 QualType Arg1Type = Call->getArg(Arg: 0)->getType();
3000 QualType Arg2Type = Call->getArg(Arg: 1)->getType();
3001 QualType Arg3Type = Call->getArg(Arg: 2)->getType();
3002
3003 // Non-vector floating point types.
3004 if (!Arg1Type->isVectorType()) {
3005 assert(!Arg2Type->isVectorType());
3006 assert(!Arg3Type->isVectorType());
3007 (void)Arg2Type;
3008 (void)Arg3Type;
3009
3010 const Floating &Z = S.Stk.pop<Floating>();
3011 const Floating &Y = S.Stk.pop<Floating>();
3012 const Floating &X = S.Stk.pop<Floating>();
3013 APFloat F = Fn(X.getAPFloat(), Y.getAPFloat(), Z.getAPFloat(), RM);
3014 Floating Result = S.allocFloat(Sem: X.getSemantics());
3015 Result.copy(F);
3016 S.Stk.push<Floating>(Args&: Result);
3017 return true;
3018 }
3019
3020 // Vector type.
3021 assert(Arg1Type->isVectorType() && Arg2Type->isVectorType() &&
3022 Arg3Type->isVectorType());
3023
3024 const VectorType *VecTy = Arg1Type->castAs<VectorType>();
3025 QualType ElemQT = VecTy->getElementType();
3026 unsigned NumElems = VecTy->getNumElements();
3027
3028 assert(ElemQT == Arg2Type->castAs<VectorType>()->getElementType() &&
3029 ElemQT == Arg3Type->castAs<VectorType>()->getElementType());
3030 assert(NumElems == Arg2Type->castAs<VectorType>()->getNumElements() &&
3031 NumElems == Arg3Type->castAs<VectorType>()->getNumElements());
3032 assert(ElemQT->isRealFloatingType());
3033 (void)ElemQT;
3034
3035 const Pointer &VZ = S.Stk.pop<Pointer>();
3036 const Pointer &VY = S.Stk.pop<Pointer>();
3037 const Pointer &VX = S.Stk.pop<Pointer>();
3038 const Pointer &Dst = S.Stk.peek<Pointer>();
3039 for (unsigned I = 0; I != NumElems; ++I) {
3040 using T = PrimConv<PT_Float>::T;
3041 APFloat X = VX.elem<T>(I).getAPFloat();
3042 APFloat Y = VY.elem<T>(I).getAPFloat();
3043 APFloat Z = VZ.elem<T>(I).getAPFloat();
3044 APFloat F = Fn(X, Y, Z, RM);
3045 Dst.elem<Floating>(I) = Floating(F);
3046 }
3047 Dst.initializeAllElements();
3048 return true;
3049}
3050
3051/// AVX512 predicated move: "Result = Mask[] ? LHS[] : RHS[]".
3052static bool interp__builtin_select(InterpState &S, CodePtr OpPC,
3053 const CallExpr *Call) {
3054 const Pointer &RHS = S.Stk.pop<Pointer>();
3055 const Pointer &LHS = S.Stk.pop<Pointer>();
3056 APSInt Mask = popToAPSInt(S, E: Call->getArg(Arg: 0));
3057 const Pointer &Dst = S.Stk.peek<Pointer>();
3058
3059 assert(LHS.getNumElems() == RHS.getNumElems());
3060 assert(LHS.getNumElems() == Dst.getNumElems());
3061 unsigned NumElems = LHS.getNumElems();
3062 PrimType ElemT = LHS.getFieldDesc()->getPrimType();
3063 PrimType DstElemT = Dst.getFieldDesc()->getPrimType();
3064
3065 for (unsigned I = 0; I != NumElems; ++I) {
3066 if (ElemT == PT_Float) {
3067 assert(DstElemT == PT_Float);
3068 Dst.elem<Floating>(I) =
3069 Mask[I] ? LHS.elem<Floating>(I) : RHS.elem<Floating>(I);
3070 } else {
3071 APSInt Elem;
3072 INT_TYPE_SWITCH(ElemT, {
3073 Elem = Mask[I] ? LHS.elem<T>(I).toAPSInt() : RHS.elem<T>(I).toAPSInt();
3074 });
3075 INT_TYPE_SWITCH_NO_BOOL(DstElemT,
3076 { Dst.elem<T>(I) = static_cast<T>(Elem); });
3077 }
3078 }
3079 Dst.initializeAllElements();
3080
3081 return true;
3082}
3083
3084/// Scalar variant of AVX512 predicated select:
3085/// Result[i] = (Mask bit 0) ? LHS[i] : RHS[i], but only element 0 may change.
3086/// All other elements are taken from RHS.
3087static bool interp__builtin_select_scalar(InterpState &S,
3088 const CallExpr *Call) {
3089 unsigned N =
3090 Call->getArg(Arg: 1)->getType()->castAs<VectorType>()->getNumElements();
3091
3092 const Pointer &W = S.Stk.pop<Pointer>();
3093 const Pointer &A = S.Stk.pop<Pointer>();
3094 APSInt U = popToAPSInt(S, E: Call->getArg(Arg: 0));
3095 const Pointer &Dst = S.Stk.peek<Pointer>();
3096
3097 bool TakeA0 = U.getZExtValue() & 1ULL;
3098
3099 for (unsigned I = TakeA0; I != N; ++I)
3100 Dst.elem<Floating>(I) = W.elem<Floating>(I);
3101 if (TakeA0)
3102 Dst.elem<Floating>(I: 0) = A.elem<Floating>(I: 0);
3103
3104 Dst.initializeAllElements();
3105 return true;
3106}
3107
3108static bool interp__builtin_ia32_test_op(
3109 InterpState &S, CodePtr OpPC, const CallExpr *Call,
3110 llvm::function_ref<bool(const APInt &A, const APInt &B)> Fn) {
3111 const Pointer &RHS = S.Stk.pop<Pointer>();
3112 const Pointer &LHS = S.Stk.pop<Pointer>();
3113
3114 assert(LHS.getNumElems() == RHS.getNumElems());
3115
3116 unsigned SourceLen = LHS.getNumElems();
3117 QualType ElemQT = getElemType(P: LHS);
3118 OptPrimType ElemPT = S.getContext().classify(T: ElemQT);
3119 unsigned LaneWidth = S.getASTContext().getTypeSize(T: ElemQT);
3120
3121 APInt AWide(LaneWidth * SourceLen, 0);
3122 APInt BWide(LaneWidth * SourceLen, 0);
3123
3124 for (unsigned I = 0; I != SourceLen; ++I) {
3125 APInt ALane;
3126 APInt BLane;
3127
3128 if (ElemQT->isIntegerType()) { // Get value.
3129 INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
3130 ALane = LHS.elem<T>(I).toAPSInt();
3131 BLane = RHS.elem<T>(I).toAPSInt();
3132 });
3133 } else if (ElemQT->isFloatingType()) { // Get only sign bit.
3134 using T = PrimConv<PT_Float>::T;
3135 ALane = LHS.elem<T>(I).getAPFloat().bitcastToAPInt().isNegative();
3136 BLane = RHS.elem<T>(I).getAPFloat().bitcastToAPInt().isNegative();
3137 } else { // Must be integer or floating type.
3138 return false;
3139 }
3140 AWide.insertBits(SubBits: ALane, bitPosition: I * LaneWidth);
3141 BWide.insertBits(SubBits: BLane, bitPosition: I * LaneWidth);
3142 }
3143 pushInteger(S, Val: Fn(AWide, BWide), QT: Call->getType());
3144 return true;
3145}
3146
3147static bool interp__builtin_ia32_movmsk_op(InterpState &S, CodePtr OpPC,
3148 const CallExpr *Call) {
3149 assert(Call->getNumArgs() == 1);
3150
3151 const Pointer &Source = S.Stk.pop<Pointer>();
3152
3153 unsigned SourceLen = Source.getNumElems();
3154 QualType ElemQT = getElemType(P: Source);
3155 OptPrimType ElemT = S.getContext().classify(T: ElemQT);
3156 unsigned ResultLen =
3157 S.getASTContext().getTypeSize(T: Call->getType()); // Always 32-bit integer.
3158 APInt Result(ResultLen, 0);
3159
3160 for (unsigned I = 0; I != SourceLen; ++I) {
3161 APInt Elem;
3162 if (ElemQT->isIntegerType()) {
3163 INT_TYPE_SWITCH_NO_BOOL(*ElemT, { Elem = Source.elem<T>(I).toAPSInt(); });
3164 } else if (ElemQT->isRealFloatingType()) {
3165 using T = PrimConv<PT_Float>::T;
3166 Elem = Source.elem<T>(I).getAPFloat().bitcastToAPInt();
3167 } else {
3168 return false;
3169 }
3170 Result.setBitVal(BitPosition: I, BitValue: Elem.isNegative());
3171 }
3172 pushInteger(S, Val: Result, QT: Call->getType());
3173 return true;
3174}
3175
3176static bool interp__builtin_elementwise_triop(
3177 InterpState &S, CodePtr OpPC, const CallExpr *Call,
3178 llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
3179 Fn) {
3180 assert(Call->getNumArgs() == 3);
3181
3182 QualType Arg0Type = Call->getArg(Arg: 0)->getType();
3183 QualType Arg2Type = Call->getArg(Arg: 2)->getType();
3184 // Non-vector integer types.
3185 if (!Arg0Type->isVectorType()) {
3186 const APSInt &Op2 = popToAPSInt(S, T: Arg2Type);
3187 const APSInt &Op1 = popToAPSInt(S, E: Call->getArg(Arg: 1));
3188 const APSInt &Op0 = popToAPSInt(S, T: Arg0Type);
3189 APSInt Result = APSInt(Fn(Op0, Op1, Op2), Op0.isUnsigned());
3190 pushInteger(S, Val: Result, QT: Call->getType());
3191 return true;
3192 }
3193
3194 const auto *VecT = Arg0Type->castAs<VectorType>();
3195 PrimType ElemT = *S.getContext().classify(T: VecT->getElementType());
3196 unsigned NumElems = VecT->getNumElements();
3197 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
3198
3199 // Vector + Vector + Scalar case.
3200 if (!Arg2Type->isVectorType()) {
3201 APSInt Op2 = popToAPSInt(S, T: Arg2Type);
3202
3203 const Pointer &Op1 = S.Stk.pop<Pointer>();
3204 const Pointer &Op0 = S.Stk.pop<Pointer>();
3205 const Pointer &Dst = S.Stk.peek<Pointer>();
3206 for (unsigned I = 0; I != NumElems; ++I) {
3207 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3208 Dst.elem<T>(I) = static_cast<T>(APSInt(
3209 Fn(Op0.elem<T>(I).toAPSInt(), Op1.elem<T>(I).toAPSInt(), Op2),
3210 DestUnsigned));
3211 });
3212 }
3213 Dst.initializeAllElements();
3214
3215 return true;
3216 }
3217
3218 // Vector type.
3219 const Pointer &Op2 = S.Stk.pop<Pointer>();
3220 const Pointer &Op1 = S.Stk.pop<Pointer>();
3221 const Pointer &Op0 = S.Stk.pop<Pointer>();
3222 const Pointer &Dst = S.Stk.peek<Pointer>();
3223 for (unsigned I = 0; I != NumElems; ++I) {
3224 APSInt Val0, Val1, Val2;
3225 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3226 Val0 = Op0.elem<T>(I).toAPSInt();
3227 Val1 = Op1.elem<T>(I).toAPSInt();
3228 Val2 = Op2.elem<T>(I).toAPSInt();
3229 });
3230 APSInt Result = APSInt(Fn(Val0, Val1, Val2), Val0.isUnsigned());
3231 INT_TYPE_SWITCH_NO_BOOL(ElemT,
3232 { Dst.elem<T>(I) = static_cast<T>(Result); });
3233 }
3234 Dst.initializeAllElements();
3235
3236 return true;
3237}
3238
3239static bool interp__builtin_x86_extract_vector(InterpState &S, CodePtr OpPC,
3240 const CallExpr *Call,
3241 unsigned ID) {
3242 assert(Call->getNumArgs() == 2);
3243
3244 APSInt ImmAPS = popToAPSInt(S, E: Call->getArg(Arg: 1));
3245 uint64_t Index = ImmAPS.getZExtValue();
3246
3247 const Pointer &Src = S.Stk.pop<Pointer>();
3248 if (!Src.getFieldDesc()->isPrimitiveArray())
3249 return false;
3250
3251 const Pointer &Dst = S.Stk.peek<Pointer>();
3252 if (!Dst.getFieldDesc()->isPrimitiveArray())
3253 return false;
3254
3255 unsigned SrcElems = Src.getNumElems();
3256 unsigned DstElems = Dst.getNumElems();
3257
3258 unsigned NumLanes = SrcElems / DstElems;
3259 unsigned Lane = static_cast<unsigned>(Index % NumLanes);
3260 unsigned ExtractPos = Lane * DstElems;
3261
3262 PrimType ElemT = Src.getFieldDesc()->getPrimType();
3263
3264 TYPE_SWITCH(ElemT, {
3265 for (unsigned I = 0; I != DstElems; ++I) {
3266 Dst.elem<T>(I) = Src.elem<T>(ExtractPos + I);
3267 }
3268 });
3269
3270 Dst.initializeAllElements();
3271 return true;
3272}
3273
3274static bool interp__builtin_x86_extract_vector_masked(InterpState &S,
3275 CodePtr OpPC,
3276 const CallExpr *Call,
3277 unsigned ID) {
3278 assert(Call->getNumArgs() == 4);
3279
3280 APSInt MaskAPS = popToAPSInt(S, E: Call->getArg(Arg: 3));
3281 const Pointer &Merge = S.Stk.pop<Pointer>();
3282 APSInt ImmAPS = popToAPSInt(S, E: Call->getArg(Arg: 1));
3283 const Pointer &Src = S.Stk.pop<Pointer>();
3284
3285 if (!Src.getFieldDesc()->isPrimitiveArray() ||
3286 !Merge.getFieldDesc()->isPrimitiveArray())
3287 return false;
3288
3289 const Pointer &Dst = S.Stk.peek<Pointer>();
3290 if (!Dst.getFieldDesc()->isPrimitiveArray())
3291 return false;
3292
3293 unsigned SrcElems = Src.getNumElems();
3294 unsigned DstElems = Dst.getNumElems();
3295
3296 unsigned NumLanes = SrcElems / DstElems;
3297 unsigned Lane = static_cast<unsigned>(ImmAPS.getZExtValue() % NumLanes);
3298 unsigned Base = Lane * DstElems;
3299
3300 PrimType ElemT = Src.getFieldDesc()->getPrimType();
3301
3302 TYPE_SWITCH(ElemT, {
3303 for (unsigned I = 0; I != DstElems; ++I) {
3304 if (MaskAPS[I])
3305 Dst.elem<T>(I) = Src.elem<T>(Base + I);
3306 else
3307 Dst.elem<T>(I) = Merge.elem<T>(I);
3308 }
3309 });
3310
3311 Dst.initializeAllElements();
3312 return true;
3313}
3314
3315static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
3316 const CallExpr *Call,
3317 unsigned ID) {
3318 assert(Call->getNumArgs() == 3);
3319
3320 APSInt ImmAPS = popToAPSInt(S, E: Call->getArg(Arg: 2));
3321 uint64_t Index = ImmAPS.getZExtValue();
3322
3323 const Pointer &SubVec = S.Stk.pop<Pointer>();
3324 if (!SubVec.getFieldDesc()->isPrimitiveArray())
3325 return false;
3326
3327 const Pointer &BaseVec = S.Stk.pop<Pointer>();
3328 if (!BaseVec.getFieldDesc()->isPrimitiveArray())
3329 return false;
3330
3331 const Pointer &Dst = S.Stk.peek<Pointer>();
3332
3333 unsigned BaseElements = BaseVec.getNumElems();
3334 unsigned SubElements = SubVec.getNumElems();
3335
3336 assert(SubElements != 0 && BaseElements != 0 &&
3337 (BaseElements % SubElements) == 0);
3338
3339 unsigned NumLanes = BaseElements / SubElements;
3340 unsigned Lane = static_cast<unsigned>(Index % NumLanes);
3341 unsigned InsertPos = Lane * SubElements;
3342
3343 PrimType ElemT = BaseVec.getFieldDesc()->getPrimType();
3344
3345 TYPE_SWITCH(ElemT, {
3346 for (unsigned I = 0; I != BaseElements; ++I)
3347 Dst.elem<T>(I) = BaseVec.elem<T>(I);
3348 for (unsigned I = 0; I != SubElements; ++I)
3349 Dst.elem<T>(InsertPos + I) = SubVec.elem<T>(I);
3350 });
3351
3352 Dst.initializeAllElements();
3353 return true;
3354}
3355
3356static bool interp__builtin_ia32_phminposuw(InterpState &S, CodePtr OpPC,
3357 const CallExpr *Call) {
3358 assert(Call->getNumArgs() == 1);
3359
3360 const Pointer &Source = S.Stk.pop<Pointer>();
3361 const Pointer &Dest = S.Stk.peek<Pointer>();
3362
3363 unsigned SourceLen = Source.getNumElems();
3364 QualType ElemQT = getElemType(P: Source);
3365 OptPrimType ElemT = S.getContext().classify(T: ElemQT);
3366 unsigned ElemBitWidth = S.getASTContext().getTypeSize(T: ElemQT);
3367
3368 bool DestUnsigned = Call->getCallReturnType(Ctx: S.getASTContext())
3369 ->castAs<VectorType>()
3370 ->getElementType()
3371 ->isUnsignedIntegerOrEnumerationType();
3372
3373 INT_TYPE_SWITCH_NO_BOOL(*ElemT, {
3374 APSInt MinIndex(ElemBitWidth, DestUnsigned);
3375 APSInt MinVal = Source.elem<T>(0).toAPSInt();
3376
3377 for (unsigned I = 1; I != SourceLen; ++I) {
3378 APSInt Val = Source.elem<T>(I).toAPSInt();
3379 if (MinVal.ugt(Val)) {
3380 MinVal = Val;
3381 MinIndex = I;
3382 }
3383 }
3384
3385 Dest.elem<T>(0) = static_cast<T>(MinVal);
3386 Dest.elem<T>(1) = static_cast<T>(MinIndex);
3387 for (unsigned I = 2; I != SourceLen; ++I) {
3388 Dest.elem<T>(I) = static_cast<T>(APSInt(ElemBitWidth, DestUnsigned));
3389 }
3390 });
3391 Dest.initializeAllElements();
3392 return true;
3393}
3394
3395static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC,
3396 const CallExpr *Call, bool MaskZ) {
3397 assert(Call->getNumArgs() == 5);
3398
3399 APInt U = popToAPSInt(S, E: Call->getArg(Arg: 4)); // Lane mask
3400 APInt Imm = popToAPSInt(S, E: Call->getArg(Arg: 3)); // Ternary truth table
3401 const Pointer &C = S.Stk.pop<Pointer>();
3402 const Pointer &B = S.Stk.pop<Pointer>();
3403 const Pointer &A = S.Stk.pop<Pointer>();
3404 const Pointer &Dst = S.Stk.peek<Pointer>();
3405
3406 unsigned DstLen = A.getNumElems();
3407 QualType ElemQT = getElemType(P: A);
3408 OptPrimType ElemT = S.getContext().classify(T: ElemQT);
3409 unsigned LaneWidth = S.getASTContext().getTypeSize(T: ElemQT);
3410 bool DstUnsigned = ElemQT->isUnsignedIntegerOrEnumerationType();
3411
3412 INT_TYPE_SWITCH_NO_BOOL(*ElemT, {
3413 for (unsigned I = 0; I != DstLen; ++I) {
3414 APInt ALane = A.elem<T>(I).toAPSInt();
3415 APInt BLane = B.elem<T>(I).toAPSInt();
3416 APInt CLane = C.elem<T>(I).toAPSInt();
3417 APInt RLane(LaneWidth, 0);
3418 if (U[I]) { // If lane not masked, compute ternary logic.
3419 for (unsigned Bit = 0; Bit != LaneWidth; ++Bit) {
3420 unsigned ABit = ALane[Bit];
3421 unsigned BBit = BLane[Bit];
3422 unsigned CBit = CLane[Bit];
3423 unsigned Idx = (ABit << 2) | (BBit << 1) | (CBit);
3424 RLane.setBitVal(Bit, Imm[Idx]);
3425 }
3426 Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
3427 } else if (MaskZ) { // If zero masked, zero the lane.
3428 Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
3429 } else { // Just masked, put in A lane.
3430 Dst.elem<T>(I) = static_cast<T>(APSInt(ALane, DstUnsigned));
3431 }
3432 }
3433 });
3434 Dst.initializeAllElements();
3435 return true;
3436}
3437
3438static bool interp__builtin_vec_ext(InterpState &S, CodePtr OpPC,
3439 const CallExpr *Call, unsigned ID) {
3440 assert(Call->getNumArgs() == 2);
3441
3442 APSInt ImmAPS = popToAPSInt(S, E: Call->getArg(Arg: 1));
3443 const Pointer &Vec = S.Stk.pop<Pointer>();
3444 if (!Vec.getFieldDesc()->isPrimitiveArray())
3445 return false;
3446
3447 unsigned NumElems = Vec.getNumElems();
3448 unsigned Index =
3449 static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElems - 1));
3450
3451 PrimType ElemT = Vec.getFieldDesc()->getPrimType();
3452 // FIXME(#161685): Replace float+int split with a numeric-only type switch
3453 if (ElemT == PT_Float) {
3454 S.Stk.push<Floating>(Args&: Vec.elem<Floating>(I: Index));
3455 return true;
3456 }
3457 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3458 APSInt V = Vec.elem<T>(Index).toAPSInt();
3459 pushInteger(S, V, Call->getType());
3460 });
3461
3462 return true;
3463}
3464
3465static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC,
3466 const CallExpr *Call, unsigned ID) {
3467 assert(Call->getNumArgs() == 3);
3468
3469 APSInt ImmAPS = popToAPSInt(S, E: Call->getArg(Arg: 2));
3470 APSInt ValAPS = popToAPSInt(S, E: Call->getArg(Arg: 1));
3471
3472 const Pointer &Base = S.Stk.pop<Pointer>();
3473 if (!Base.getFieldDesc()->isPrimitiveArray())
3474 return false;
3475
3476 const Pointer &Dst = S.Stk.peek<Pointer>();
3477
3478 unsigned NumElems = Base.getNumElems();
3479 unsigned Index =
3480 static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElems - 1));
3481
3482 PrimType ElemT = Base.getFieldDesc()->getPrimType();
3483 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3484 for (unsigned I = 0; I != NumElems; ++I)
3485 Dst.elem<T>(I) = Base.elem<T>(I);
3486 Dst.elem<T>(Index) = static_cast<T>(ValAPS);
3487 });
3488
3489 Dst.initializeAllElements();
3490 return true;
3491}
3492
3493static bool evalICmpImm(uint8_t Imm, const APSInt &A, const APSInt &B,
3494 bool IsUnsigned) {
3495 switch (Imm & 0x7) {
3496 case 0x00: // _MM_CMPINT_EQ
3497 return (A == B);
3498 case 0x01: // _MM_CMPINT_LT
3499 return IsUnsigned ? A.ult(RHS: B) : A.slt(RHS: B);
3500 case 0x02: // _MM_CMPINT_LE
3501 return IsUnsigned ? A.ule(RHS: B) : A.sle(RHS: B);
3502 case 0x03: // _MM_CMPINT_FALSE
3503 return false;
3504 case 0x04: // _MM_CMPINT_NE
3505 return (A != B);
3506 case 0x05: // _MM_CMPINT_NLT
3507 return IsUnsigned ? A.ugt(RHS: B) : A.sgt(RHS: B);
3508 case 0x06: // _MM_CMPINT_NLE
3509 return IsUnsigned ? A.uge(RHS: B) : A.sge(RHS: B);
3510 case 0x07: // _MM_CMPINT_TRUE
3511 return true;
3512 default:
3513 llvm_unreachable("Invalid Op");
3514 }
3515}
3516
3517static bool interp__builtin_ia32_cmp_mask(InterpState &S, CodePtr OpPC,
3518 const CallExpr *Call, unsigned ID,
3519 bool IsUnsigned) {
3520 assert(Call->getNumArgs() == 4);
3521
3522 APSInt Mask = popToAPSInt(S, E: Call->getArg(Arg: 3));
3523 APSInt Opcode = popToAPSInt(S, E: Call->getArg(Arg: 2));
3524 unsigned CmpOp = static_cast<unsigned>(Opcode.getZExtValue());
3525 const Pointer &RHS = S.Stk.pop<Pointer>();
3526 const Pointer &LHS = S.Stk.pop<Pointer>();
3527
3528 assert(LHS.getNumElems() == RHS.getNumElems());
3529
3530 APInt RetMask = APInt::getZero(numBits: LHS.getNumElems());
3531 unsigned VectorLen = LHS.getNumElems();
3532 PrimType ElemT = LHS.getFieldDesc()->getPrimType();
3533
3534 for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
3535 APSInt A, B;
3536 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3537 A = LHS.elem<T>(ElemNum).toAPSInt();
3538 B = RHS.elem<T>(ElemNum).toAPSInt();
3539 });
3540 RetMask.setBitVal(BitPosition: ElemNum,
3541 BitValue: Mask[ElemNum] && evalICmpImm(Imm: CmpOp, A, B, IsUnsigned));
3542 }
3543 pushInteger(S, Val: RetMask, QT: Call->getType());
3544 return true;
3545}
3546
3547static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
3548 const CallExpr *Call) {
3549 assert(Call->getNumArgs() == 1);
3550
3551 QualType Arg0Type = Call->getArg(Arg: 0)->getType();
3552 const auto *VecT = Arg0Type->castAs<VectorType>();
3553 PrimType ElemT = *S.getContext().classify(T: VecT->getElementType());
3554 unsigned NumElems = VecT->getNumElements();
3555 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
3556 const Pointer &Src = S.Stk.pop<Pointer>();
3557 const Pointer &Dst = S.Stk.peek<Pointer>();
3558
3559 for (unsigned I = 0; I != NumElems; ++I) {
3560 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3561 APSInt ElemI = Src.elem<T>(I).toAPSInt();
3562 APInt ConflictMask(ElemI.getBitWidth(), 0);
3563 for (unsigned J = 0; J != I; ++J) {
3564 APSInt ElemJ = Src.elem<T>(J).toAPSInt();
3565 ConflictMask.setBitVal(J, ElemI == ElemJ);
3566 }
3567 Dst.elem<T>(I) = static_cast<T>(APSInt(ConflictMask, DestUnsigned));
3568 });
3569 }
3570 Dst.initializeAllElements();
3571 return true;
3572}
3573
3574static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC,
3575 const CallExpr *Call,
3576 unsigned ID) {
3577 assert(Call->getNumArgs() == 1);
3578
3579 const Pointer &Vec = S.Stk.pop<Pointer>();
3580 unsigned RetWidth = S.getASTContext().getIntWidth(T: Call->getType());
3581 APInt RetMask(RetWidth, 0);
3582
3583 unsigned VectorLen = Vec.getNumElems();
3584 PrimType ElemT = Vec.getFieldDesc()->getPrimType();
3585
3586 for (unsigned ElemNum = 0; ElemNum != VectorLen; ++ElemNum) {
3587 APSInt A;
3588 INT_TYPE_SWITCH_NO_BOOL(ElemT, { A = Vec.elem<T>(ElemNum).toAPSInt(); });
3589 unsigned MSB = A[A.getBitWidth() - 1];
3590 RetMask.setBitVal(BitPosition: ElemNum, BitValue: MSB);
3591 }
3592 pushInteger(S, Val: RetMask, QT: Call->getType());
3593 return true;
3594}
3595
3596static bool interp__builtin_ia32_cvt_mask2vec(InterpState &S, CodePtr OpPC,
3597 const CallExpr *Call,
3598 unsigned ID) {
3599 assert(Call->getNumArgs() == 1);
3600
3601 APSInt Mask = popToAPSInt(S, E: Call->getArg(Arg: 0));
3602
3603 const Pointer &Vec = S.Stk.peek<Pointer>();
3604 unsigned NumElems = Vec.getNumElems();
3605 PrimType ElemT = Vec.getFieldDesc()->getPrimType();
3606
3607 for (unsigned I = 0; I != NumElems; ++I) {
3608 bool BitSet = Mask[I];
3609
3610 INT_TYPE_SWITCH_NO_BOOL(
3611 ElemT, { Vec.elem<T>(I) = BitSet ? T::from(-1) : T::from(0); });
3612 }
3613
3614 Vec.initializeAllElements();
3615
3616 return true;
3617}
3618
3619static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
3620 const CallExpr *Call,
3621 bool HasRoundingMask) {
3622 APSInt Rounding, MaskInt;
3623 Pointer Src, B, A;
3624
3625 if (HasRoundingMask) {
3626 assert(Call->getNumArgs() == 5);
3627 Rounding = popToAPSInt(S, E: Call->getArg(Arg: 4));
3628 MaskInt = popToAPSInt(S, E: Call->getArg(Arg: 3));
3629 Src = S.Stk.pop<Pointer>();
3630 B = S.Stk.pop<Pointer>();
3631 A = S.Stk.pop<Pointer>();
3632 if (!CheckLoad(S, OpPC, Ptr: A) || !CheckLoad(S, OpPC, Ptr: B) ||
3633 !CheckLoad(S, OpPC, Ptr: Src))
3634 return false;
3635 } else {
3636 assert(Call->getNumArgs() == 2);
3637 B = S.Stk.pop<Pointer>();
3638 A = S.Stk.pop<Pointer>();
3639 if (!CheckLoad(S, OpPC, Ptr: A) || !CheckLoad(S, OpPC, Ptr: B))
3640 return false;
3641 }
3642
3643 const auto *DstVTy = Call->getType()->castAs<VectorType>();
3644 unsigned NumElems = DstVTy->getNumElements();
3645 const Pointer &Dst = S.Stk.peek<Pointer>();
3646
3647 // Copy all elements except lane 0 (overwritten below) from A to Dst.
3648 for (unsigned I = 1; I != NumElems; ++I)
3649 Dst.elem<Floating>(I) = A.elem<Floating>(I);
3650
3651 // Convert element 0 from double to float, or use Src if masked off.
3652 if (!HasRoundingMask || (MaskInt.getZExtValue() & 0x1)) {
3653 assert(S.getASTContext().FloatTy == DstVTy->getElementType() &&
3654 "cvtsd2ss requires float element type in destination vector");
3655
3656 Floating Conv = S.allocFloat(
3657 Sem: S.getASTContext().getFloatTypeSemantics(T: DstVTy->getElementType()));
3658 APFloat SrcVal = B.elem<Floating>(I: 0).getAPFloat();
3659 if (!convertDoubleToFloatStrict(Src: SrcVal, Dst&: Conv, S, DiagExpr: Call))
3660 return false;
3661 Dst.elem<Floating>(I: 0) = Conv;
3662 } else {
3663 Dst.elem<Floating>(I: 0) = Src.elem<Floating>(I: 0);
3664 }
3665
3666 Dst.initializeAllElements();
3667 return true;
3668}
3669
3670static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
3671 const CallExpr *Call, bool IsMasked,
3672 bool HasRounding) {
3673
3674 APSInt MaskVal;
3675 Pointer PassThrough;
3676 Pointer Src;
3677 APSInt Rounding;
3678
3679 if (IsMasked) {
3680 // Pop in reverse order.
3681 if (HasRounding) {
3682 Rounding = popToAPSInt(S, E: Call->getArg(Arg: 3));
3683 MaskVal = popToAPSInt(S, E: Call->getArg(Arg: 2));
3684 PassThrough = S.Stk.pop<Pointer>();
3685 Src = S.Stk.pop<Pointer>();
3686 } else {
3687 MaskVal = popToAPSInt(S, E: Call->getArg(Arg: 2));
3688 PassThrough = S.Stk.pop<Pointer>();
3689 Src = S.Stk.pop<Pointer>();
3690 }
3691
3692 if (!CheckLoad(S, OpPC, Ptr: PassThrough))
3693 return false;
3694 } else {
3695 // Pop source only.
3696 Src = S.Stk.pop<Pointer>();
3697 }
3698
3699 if (!CheckLoad(S, OpPC, Ptr: Src))
3700 return false;
3701
3702 const auto *RetVTy = Call->getType()->castAs<VectorType>();
3703 unsigned RetElems = RetVTy->getNumElements();
3704 unsigned SrcElems = Src.getNumElems();
3705 const Pointer &Dst = S.Stk.peek<Pointer>();
3706
3707 // Initialize destination with passthrough or zeros.
3708 for (unsigned I = 0; I != RetElems; ++I)
3709 if (IsMasked)
3710 Dst.elem<Floating>(I) = PassThrough.elem<Floating>(I);
3711 else
3712 Dst.elem<Floating>(I) = Floating(APFloat(0.0f));
3713
3714 assert(S.getASTContext().FloatTy == RetVTy->getElementType() &&
3715 "cvtpd2ps requires float element type in return vector");
3716
3717 // Convert double to float for enabled elements (only process source elements
3718 // that exist).
3719 for (unsigned I = 0; I != SrcElems; ++I) {
3720 if (IsMasked && !MaskVal[I])
3721 continue;
3722
3723 APFloat SrcVal = Src.elem<Floating>(I).getAPFloat();
3724
3725 Floating Conv = S.allocFloat(
3726 Sem: S.getASTContext().getFloatTypeSemantics(T: RetVTy->getElementType()));
3727 if (!convertDoubleToFloatStrict(Src: SrcVal, Dst&: Conv, S, DiagExpr: Call))
3728 return false;
3729 Dst.elem<Floating>(I) = Conv;
3730 }
3731
3732 Dst.initializeAllElements();
3733 return true;
3734}
3735
3736static bool interp__builtin_ia32_shuffle_generic(
3737 InterpState &S, CodePtr OpPC, const CallExpr *Call,
3738 llvm::function_ref<std::pair<unsigned, int>(unsigned, const APInt &)>
3739 GetSourceIndex) {
3740
3741 assert(Call->getNumArgs() == 2 || Call->getNumArgs() == 3);
3742
3743 APInt ShuffleMask;
3744 Pointer A, MaskVector, B;
3745 bool IsVectorMask = false;
3746 bool IsSingleOperand = (Call->getNumArgs() == 2);
3747
3748 if (IsSingleOperand) {
3749 QualType MaskType = Call->getArg(Arg: 1)->getType();
3750 if (MaskType->isVectorType()) {
3751 IsVectorMask = true;
3752 MaskVector = S.Stk.pop<Pointer>();
3753 A = S.Stk.pop<Pointer>();
3754 B = A;
3755 } else if (MaskType->isIntegerType()) {
3756 ShuffleMask = popToAPSInt(S, E: Call->getArg(Arg: 1));
3757 A = S.Stk.pop<Pointer>();
3758 B = A;
3759 } else {
3760 return false;
3761 }
3762 } else {
3763 QualType Arg2Type = Call->getArg(Arg: 2)->getType();
3764 if (Arg2Type->isVectorType()) {
3765 IsVectorMask = true;
3766 B = S.Stk.pop<Pointer>();
3767 MaskVector = S.Stk.pop<Pointer>();
3768 A = S.Stk.pop<Pointer>();
3769 } else if (Arg2Type->isIntegerType()) {
3770 ShuffleMask = popToAPSInt(S, E: Call->getArg(Arg: 2));
3771 B = S.Stk.pop<Pointer>();
3772 A = S.Stk.pop<Pointer>();
3773 } else {
3774 return false;
3775 }
3776 }
3777
3778 QualType Arg0Type = Call->getArg(Arg: 0)->getType();
3779 const auto *VecT = Arg0Type->castAs<VectorType>();
3780 PrimType ElemT = *S.getContext().classify(T: VecT->getElementType());
3781 unsigned NumElems = VecT->getNumElements();
3782
3783 const Pointer &Dst = S.Stk.peek<Pointer>();
3784
3785 PrimType MaskElemT = PT_Uint32;
3786 if (IsVectorMask) {
3787 QualType Arg1Type = Call->getArg(Arg: 1)->getType();
3788 const auto *MaskVecT = Arg1Type->castAs<VectorType>();
3789 QualType MaskElemType = MaskVecT->getElementType();
3790 MaskElemT = *S.getContext().classify(T: MaskElemType);
3791 }
3792
3793 for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) {
3794 if (IsVectorMask) {
3795 INT_TYPE_SWITCH(MaskElemT,
3796 { ShuffleMask = MaskVector.elem<T>(DstIdx).toAPSInt(); });
3797 }
3798
3799 auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
3800
3801 if (SrcIdx < 0) {
3802 // Zero out this element
3803 if (ElemT == PT_Float) {
3804 Dst.elem<Floating>(I: DstIdx) = Floating(
3805 S.getASTContext().getFloatTypeSemantics(T: VecT->getElementType()));
3806 } else {
3807 INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(DstIdx) = T::from(0); });
3808 }
3809 } else {
3810 const Pointer &Src = (SrcVecIdx == 0) ? A : B;
3811 TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); });
3812 }
3813 }
3814 Dst.initializeAllElements();
3815
3816 return true;
3817}
3818
3819static bool interp__builtin_ia32_shuffle_generic(
3820 InterpState &S, CodePtr OpPC, const CallExpr *Call,
3821 llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)>
3822 GetSourceIndex) {
3823 return interp__builtin_ia32_shuffle_generic(
3824 S, OpPC, Call,
3825 GetSourceIndex: [&GetSourceIndex](unsigned DstIdx,
3826 const APInt &Mask) -> std::pair<unsigned, int> {
3827 return GetSourceIndex(DstIdx, Mask.getZExtValue());
3828 });
3829}
3830
3831static bool interp__builtin_ia32_shift_with_count(
3832 InterpState &S, CodePtr OpPC, const CallExpr *Call,
3833 llvm::function_ref<APInt(const APInt &, uint64_t)> ShiftOp,
3834 llvm::function_ref<APInt(const APInt &, unsigned)> OverflowOp) {
3835
3836 assert(Call->getNumArgs() == 2);
3837
3838 const Pointer &Count = S.Stk.pop<Pointer>();
3839 const Pointer &Source = S.Stk.pop<Pointer>();
3840
3841 QualType SourceType = Call->getArg(Arg: 0)->getType();
3842 QualType CountType = Call->getArg(Arg: 1)->getType();
3843 assert(SourceType->isVectorType() && CountType->isVectorType());
3844
3845 const auto *SourceVecT = SourceType->castAs<VectorType>();
3846 const auto *CountVecT = CountType->castAs<VectorType>();
3847 PrimType SourceElemT = *S.getContext().classify(T: SourceVecT->getElementType());
3848 PrimType CountElemT = *S.getContext().classify(T: CountVecT->getElementType());
3849
3850 const Pointer &Dst = S.Stk.peek<Pointer>();
3851
3852 unsigned DestEltWidth =
3853 S.getASTContext().getTypeSize(T: SourceVecT->getElementType());
3854 bool IsDestUnsigned = SourceVecT->getElementType()->isUnsignedIntegerType();
3855 unsigned DestLen = SourceVecT->getNumElements();
3856 unsigned CountEltWidth =
3857 S.getASTContext().getTypeSize(T: CountVecT->getElementType());
3858 unsigned NumBitsInQWord = 64;
3859 unsigned NumCountElts = NumBitsInQWord / CountEltWidth;
3860
3861 uint64_t CountLQWord = 0;
3862 for (unsigned EltIdx = 0; EltIdx != NumCountElts; ++EltIdx) {
3863 uint64_t Elt = 0;
3864 INT_TYPE_SWITCH(CountElemT,
3865 { Elt = static_cast<uint64_t>(Count.elem<T>(EltIdx)); });
3866 CountLQWord |= (Elt << (EltIdx * CountEltWidth));
3867 }
3868
3869 for (unsigned EltIdx = 0; EltIdx != DestLen; ++EltIdx) {
3870 APSInt Elt;
3871 INT_TYPE_SWITCH(SourceElemT, { Elt = Source.elem<T>(EltIdx).toAPSInt(); });
3872
3873 APInt Result;
3874 if (CountLQWord < DestEltWidth) {
3875 Result = ShiftOp(Elt, CountLQWord);
3876 } else {
3877 Result = OverflowOp(Elt, DestEltWidth);
3878 }
3879 if (IsDestUnsigned) {
3880 INT_TYPE_SWITCH(SourceElemT, {
3881 Dst.elem<T>(EltIdx) = T::from(Result.getZExtValue());
3882 });
3883 } else {
3884 INT_TYPE_SWITCH(SourceElemT, {
3885 Dst.elem<T>(EltIdx) = T::from(Result.getSExtValue());
3886 });
3887 }
3888 }
3889
3890 Dst.initializeAllElements();
3891 return true;
3892}
3893
3894static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC,
3895 const CallExpr *Call) {
3896
3897 assert(Call->getNumArgs() == 3);
3898
3899 QualType SourceType = Call->getArg(Arg: 0)->getType();
3900 QualType ShuffleMaskType = Call->getArg(Arg: 1)->getType();
3901 QualType ZeroMaskType = Call->getArg(Arg: 2)->getType();
3902 if (!SourceType->isVectorType() || !ShuffleMaskType->isVectorType() ||
3903 !ZeroMaskType->isIntegerType()) {
3904 return false;
3905 }
3906
3907 Pointer Source, ShuffleMask;
3908 APSInt ZeroMask = popToAPSInt(S, E: Call->getArg(Arg: 2));
3909 ShuffleMask = S.Stk.pop<Pointer>();
3910 Source = S.Stk.pop<Pointer>();
3911
3912 const auto *SourceVecT = SourceType->castAs<VectorType>();
3913 const auto *ShuffleMaskVecT = ShuffleMaskType->castAs<VectorType>();
3914 assert(SourceVecT->getNumElements() == ShuffleMaskVecT->getNumElements());
3915 assert(ZeroMask.getBitWidth() == SourceVecT->getNumElements());
3916
3917 PrimType SourceElemT = *S.getContext().classify(T: SourceVecT->getElementType());
3918 PrimType ShuffleMaskElemT =
3919 *S.getContext().classify(T: ShuffleMaskVecT->getElementType());
3920
3921 unsigned NumBytesInQWord = 8;
3922 unsigned NumBitsInByte = 8;
3923 unsigned NumBytes = SourceVecT->getNumElements();
3924 unsigned NumQWords = NumBytes / NumBytesInQWord;
3925 unsigned RetWidth = ZeroMask.getBitWidth();
3926 APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true);
3927
3928 for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
3929 APInt SourceQWord(64, 0);
3930 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
3931 uint64_t Byte = 0;
3932 INT_TYPE_SWITCH(SourceElemT, {
3933 Byte = static_cast<uint64_t>(
3934 Source.elem<T>(QWordId * NumBytesInQWord + ByteIdx));
3935 });
3936 SourceQWord.insertBits(SubBits: APInt(8, Byte & 0xFF), bitPosition: ByteIdx * NumBitsInByte);
3937 }
3938
3939 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
3940 unsigned SelIdx = QWordId * NumBytesInQWord + ByteIdx;
3941 unsigned M = 0;
3942 INT_TYPE_SWITCH(ShuffleMaskElemT, {
3943 M = static_cast<unsigned>(ShuffleMask.elem<T>(SelIdx)) & 0x3F;
3944 });
3945
3946 if (ZeroMask[SelIdx]) {
3947 RetMask.setBitVal(BitPosition: SelIdx, BitValue: SourceQWord[M]);
3948 }
3949 }
3950 }
3951
3952 pushInteger(S, Val: RetMask, QT: Call->getType());
3953 return true;
3954}
3955
3956static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
3957 const CallExpr *Call) {
3958 // Arguments are: vector of floats, rounding immediate
3959 assert(Call->getNumArgs() == 2);
3960
3961 APSInt Imm = popToAPSInt(S, E: Call->getArg(Arg: 1));
3962 const Pointer &Src = S.Stk.pop<Pointer>();
3963 const Pointer &Dst = S.Stk.peek<Pointer>();
3964
3965 assert(Src.getFieldDesc()->isPrimitiveArray());
3966 assert(Dst.getFieldDesc()->isPrimitiveArray());
3967
3968 const auto *SrcVTy = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
3969 unsigned SrcNumElems = SrcVTy->getNumElements();
3970 const auto *DstVTy = Call->getType()->castAs<VectorType>();
3971 unsigned DstNumElems = DstVTy->getNumElements();
3972
3973 const llvm::fltSemantics &HalfSem =
3974 S.getASTContext().getFloatTypeSemantics(T: S.getASTContext().HalfTy);
3975
3976 // imm[2] == 1 means use MXCSR rounding mode.
3977 // In that case, we can only evaluate if the conversion is exact.
3978 int ImmVal = Imm.getZExtValue();
3979 bool UseMXCSR = (ImmVal & 4) != 0;
3980 bool IsFPConstrained =
3981 Call->getFPFeaturesInEffect(LO: S.getASTContext().getLangOpts())
3982 .isFPConstrained();
3983
3984 llvm::RoundingMode RM;
3985 if (!UseMXCSR) {
3986 switch (ImmVal & 3) {
3987 case 0:
3988 RM = llvm::RoundingMode::NearestTiesToEven;
3989 break;
3990 case 1:
3991 RM = llvm::RoundingMode::TowardNegative;
3992 break;
3993 case 2:
3994 RM = llvm::RoundingMode::TowardPositive;
3995 break;
3996 case 3:
3997 RM = llvm::RoundingMode::TowardZero;
3998 break;
3999 default:
4000 llvm_unreachable("Invalid immediate rounding mode");
4001 }
4002 } else {
4003 // For MXCSR, we must check for exactness. We can use any rounding mode
4004 // for the trial conversion since the result is the same if it's exact.
4005 RM = llvm::RoundingMode::NearestTiesToEven;
4006 }
4007
4008 QualType DstElemQT = Dst.getFieldDesc()->getElemQualType();
4009 PrimType DstElemT = *S.getContext().classify(T: DstElemQT);
4010
4011 for (unsigned I = 0; I != SrcNumElems; ++I) {
4012 Floating SrcVal = Src.elem<Floating>(I);
4013 APFloat DstVal = SrcVal.getAPFloat();
4014
4015 bool LostInfo;
4016 APFloat::opStatus St = DstVal.convert(ToSemantics: HalfSem, RM, losesInfo: &LostInfo);
4017
4018 if (UseMXCSR && IsFPConstrained && St != APFloat::opOK) {
4019 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
4020 DiagId: diag::note_constexpr_dynamic_rounding);
4021 return false;
4022 }
4023
4024 INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
4025 // Convert the destination value's bit pattern to an unsigned integer,
4026 // then reconstruct the element using the target type's 'from' method.
4027 uint64_t RawBits = DstVal.bitcastToAPInt().getZExtValue();
4028 Dst.elem<T>(I) = T::from(RawBits);
4029 });
4030 }
4031
4032 // Zero out remaining elements if the destination has more elements
4033 // (e.g., vcvtps2ph converting 4 floats to 8 shorts).
4034 if (DstNumElems > SrcNumElems) {
4035 for (unsigned I = SrcNumElems; I != DstNumElems; ++I) {
4036 INT_TYPE_SWITCH_NO_BOOL(DstElemT, { Dst.elem<T>(I) = T::from(0); });
4037 }
4038 }
4039
4040 Dst.initializeAllElements();
4041 return true;
4042}
4043
4044static bool interp__builtin_ia32_multishiftqb(InterpState &S, CodePtr OpPC,
4045 const CallExpr *Call) {
4046 assert(Call->getNumArgs() == 2);
4047
4048 QualType ATy = Call->getArg(Arg: 0)->getType();
4049 QualType BTy = Call->getArg(Arg: 1)->getType();
4050 if (!ATy->isVectorType() || !BTy->isVectorType()) {
4051 return false;
4052 }
4053
4054 const Pointer &BPtr = S.Stk.pop<Pointer>();
4055 const Pointer &APtr = S.Stk.pop<Pointer>();
4056 const auto *AVecT = ATy->castAs<VectorType>();
4057 assert(AVecT->getNumElements() ==
4058 BTy->castAs<VectorType>()->getNumElements());
4059
4060 PrimType ElemT = *S.getContext().classify(T: AVecT->getElementType());
4061
4062 unsigned NumBytesInQWord = 8;
4063 unsigned NumBitsInByte = 8;
4064 unsigned NumBytes = AVecT->getNumElements();
4065 unsigned NumQWords = NumBytes / NumBytesInQWord;
4066 const Pointer &Dst = S.Stk.peek<Pointer>();
4067
4068 for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
4069 APInt BQWord(64, 0);
4070 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
4071 unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
4072 INT_TYPE_SWITCH(ElemT, {
4073 uint64_t Byte = static_cast<uint64_t>(BPtr.elem<T>(Idx));
4074 BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
4075 });
4076 }
4077
4078 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
4079 unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
4080 uint64_t Ctrl = 0;
4081 INT_TYPE_SWITCH(
4082 ElemT, { Ctrl = static_cast<uint64_t>(APtr.elem<T>(Idx)) & 0x3F; });
4083
4084 APInt Byte(8, 0);
4085 for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) {
4086 Byte.setBitVal(BitPosition: BitIdx, BitValue: BQWord[(Ctrl + BitIdx) & 0x3F]);
4087 }
4088 INT_TYPE_SWITCH(ElemT,
4089 { Dst.elem<T>(Idx) = T::from(Byte.getZExtValue()); });
4090 }
4091 }
4092
4093 Dst.initializeAllElements();
4094
4095 return true;
4096}
4097
4098static bool interp_builtin_ia32_gfni_affine(InterpState &S, CodePtr OpPC,
4099 const CallExpr *Call,
4100 bool Inverse) {
4101 assert(Call->getNumArgs() == 3);
4102 QualType XType = Call->getArg(Arg: 0)->getType();
4103 QualType AType = Call->getArg(Arg: 1)->getType();
4104 QualType ImmType = Call->getArg(Arg: 2)->getType();
4105 if (!XType->isVectorType() || !AType->isVectorType() ||
4106 !ImmType->isIntegerType()) {
4107 return false;
4108 }
4109
4110 Pointer X, A;
4111 APSInt Imm = popToAPSInt(S, E: Call->getArg(Arg: 2));
4112 A = S.Stk.pop<Pointer>();
4113 X = S.Stk.pop<Pointer>();
4114
4115 const Pointer &Dst = S.Stk.peek<Pointer>();
4116 const auto *AVecT = AType->castAs<VectorType>();
4117 assert(XType->castAs<VectorType>()->getNumElements() ==
4118 AVecT->getNumElements());
4119 unsigned NumBytesInQWord = 8;
4120 unsigned NumBytes = AVecT->getNumElements();
4121 unsigned NumBitsInQWord = 64;
4122 unsigned NumQWords = NumBytes / NumBytesInQWord;
4123 unsigned NumBitsInByte = 8;
4124 PrimType AElemT = *S.getContext().classify(T: AVecT->getElementType());
4125
4126 // computing A*X + Imm
4127 for (unsigned QWordIdx = 0; QWordIdx != NumQWords; ++QWordIdx) {
4128 // Extract the QWords from X, A
4129 APInt XQWord(NumBitsInQWord, 0);
4130 APInt AQWord(NumBitsInQWord, 0);
4131 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
4132 unsigned Idx = QWordIdx * NumBytesInQWord + ByteIdx;
4133 uint8_t XByte;
4134 uint8_t AByte;
4135 INT_TYPE_SWITCH(AElemT, {
4136 XByte = static_cast<uint8_t>(X.elem<T>(Idx));
4137 AByte = static_cast<uint8_t>(A.elem<T>(Idx));
4138 });
4139
4140 XQWord.insertBits(SubBits: APInt(NumBitsInByte, XByte), bitPosition: ByteIdx * NumBitsInByte);
4141 AQWord.insertBits(SubBits: APInt(NumBitsInByte, AByte), bitPosition: ByteIdx * NumBitsInByte);
4142 }
4143
4144 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
4145 unsigned Idx = QWordIdx * NumBytesInQWord + ByteIdx;
4146 uint8_t XByte =
4147 XQWord.lshr(shiftAmt: ByteIdx * NumBitsInByte).getLoBits(numBits: 8).getZExtValue();
4148 INT_TYPE_SWITCH(AElemT, {
4149 Dst.elem<T>(Idx) = T::from(GFNIAffine(XByte, AQWord, Imm, Inverse));
4150 });
4151 }
4152 }
4153 Dst.initializeAllElements();
4154 return true;
4155}
4156
4157static bool interp__builtin_ia32_gfni_mul(InterpState &S, CodePtr OpPC,
4158 const CallExpr *Call) {
4159 assert(Call->getNumArgs() == 2);
4160
4161 QualType AType = Call->getArg(Arg: 0)->getType();
4162 QualType BType = Call->getArg(Arg: 1)->getType();
4163 if (!AType->isVectorType() || !BType->isVectorType()) {
4164 return false;
4165 }
4166
4167 Pointer A, B;
4168 B = S.Stk.pop<Pointer>();
4169 A = S.Stk.pop<Pointer>();
4170
4171 const Pointer &Dst = S.Stk.peek<Pointer>();
4172 const auto *AVecT = AType->castAs<VectorType>();
4173 assert(AVecT->getNumElements() ==
4174 BType->castAs<VectorType>()->getNumElements());
4175
4176 PrimType AElemT = *S.getContext().classify(T: AVecT->getElementType());
4177 unsigned NumBytes = A.getNumElems();
4178
4179 for (unsigned ByteIdx = 0; ByteIdx != NumBytes; ++ByteIdx) {
4180 uint8_t AByte, BByte;
4181 INT_TYPE_SWITCH(AElemT, {
4182 AByte = static_cast<uint8_t>(A.elem<T>(ByteIdx));
4183 BByte = static_cast<uint8_t>(B.elem<T>(ByteIdx));
4184 Dst.elem<T>(ByteIdx) = T::from(GFNIMul(AByte, BByte));
4185 });
4186 }
4187
4188 Dst.initializeAllElements();
4189 return true;
4190}
4191
4192bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
4193 uint32_t BuiltinID) {
4194 if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(ID: BuiltinID))
4195 return Invalid(S, OpPC);
4196
4197 const InterpFrame *Frame = S.Current;
4198 switch (BuiltinID) {
4199 case Builtin::BI__builtin_is_constant_evaluated:
4200 return interp__builtin_is_constant_evaluated(S, OpPC, Frame, Call);
4201
4202 case Builtin::BI__builtin_assume:
4203 case Builtin::BI__assume:
4204 return interp__builtin_assume(S, OpPC, Frame, Call);
4205
4206 case Builtin::BI__builtin_strcmp:
4207 case Builtin::BIstrcmp:
4208 case Builtin::BI__builtin_strncmp:
4209 case Builtin::BIstrncmp:
4210 case Builtin::BI__builtin_wcsncmp:
4211 case Builtin::BIwcsncmp:
4212 case Builtin::BI__builtin_wcscmp:
4213 case Builtin::BIwcscmp:
4214 return interp__builtin_strcmp(S, OpPC, Frame, Call, ID: BuiltinID);
4215
4216 case Builtin::BI__builtin_strlen:
4217 case Builtin::BIstrlen:
4218 case Builtin::BI__builtin_wcslen:
4219 case Builtin::BIwcslen:
4220 return interp__builtin_strlen(S, OpPC, Frame, Call, ID: BuiltinID);
4221
4222 case Builtin::BI__builtin_nan:
4223 case Builtin::BI__builtin_nanf:
4224 case Builtin::BI__builtin_nanl:
4225 case Builtin::BI__builtin_nanf16:
4226 case Builtin::BI__builtin_nanf128:
4227 return interp__builtin_nan(S, OpPC, Frame, Call, /*Signaling=*/false);
4228
4229 case Builtin::BI__builtin_nans:
4230 case Builtin::BI__builtin_nansf:
4231 case Builtin::BI__builtin_nansl:
4232 case Builtin::BI__builtin_nansf16:
4233 case Builtin::BI__builtin_nansf128:
4234 return interp__builtin_nan(S, OpPC, Frame, Call, /*Signaling=*/true);
4235
4236 case Builtin::BI__builtin_huge_val:
4237 case Builtin::BI__builtin_huge_valf:
4238 case Builtin::BI__builtin_huge_vall:
4239 case Builtin::BI__builtin_huge_valf16:
4240 case Builtin::BI__builtin_huge_valf128:
4241 case Builtin::BI__builtin_inf:
4242 case Builtin::BI__builtin_inff:
4243 case Builtin::BI__builtin_infl:
4244 case Builtin::BI__builtin_inff16:
4245 case Builtin::BI__builtin_inff128:
4246 return interp__builtin_inf(S, OpPC, Frame, Call);
4247
4248 case Builtin::BI__builtin_copysign:
4249 case Builtin::BI__builtin_copysignf:
4250 case Builtin::BI__builtin_copysignl:
4251 case Builtin::BI__builtin_copysignf128:
4252 return interp__builtin_copysign(S, OpPC, Frame);
4253
4254 case Builtin::BI__builtin_fmin:
4255 case Builtin::BI__builtin_fminf:
4256 case Builtin::BI__builtin_fminl:
4257 case Builtin::BI__builtin_fminf16:
4258 case Builtin::BI__builtin_fminf128:
4259 return interp__builtin_fmin(S, OpPC, Frame, /*IsNumBuiltin=*/false);
4260
4261 case Builtin::BI__builtin_fminimum_num:
4262 case Builtin::BI__builtin_fminimum_numf:
4263 case Builtin::BI__builtin_fminimum_numl:
4264 case Builtin::BI__builtin_fminimum_numf16:
4265 case Builtin::BI__builtin_fminimum_numf128:
4266 return interp__builtin_fmin(S, OpPC, Frame, /*IsNumBuiltin=*/true);
4267
4268 case Builtin::BI__builtin_fmax:
4269 case Builtin::BI__builtin_fmaxf:
4270 case Builtin::BI__builtin_fmaxl:
4271 case Builtin::BI__builtin_fmaxf16:
4272 case Builtin::BI__builtin_fmaxf128:
4273 return interp__builtin_fmax(S, OpPC, Frame, /*IsNumBuiltin=*/false);
4274
4275 case Builtin::BI__builtin_fmaximum_num:
4276 case Builtin::BI__builtin_fmaximum_numf:
4277 case Builtin::BI__builtin_fmaximum_numl:
4278 case Builtin::BI__builtin_fmaximum_numf16:
4279 case Builtin::BI__builtin_fmaximum_numf128:
4280 return interp__builtin_fmax(S, OpPC, Frame, /*IsNumBuiltin=*/true);
4281
4282 case Builtin::BI__builtin_isnan:
4283 return interp__builtin_isnan(S, OpPC, Frame, Call);
4284
4285 case Builtin::BI__builtin_issignaling:
4286 return interp__builtin_issignaling(S, OpPC, Frame, Call);
4287
4288 case Builtin::BI__builtin_isinf:
4289 return interp__builtin_isinf(S, OpPC, Frame, /*Sign=*/CheckSign: false, Call);
4290
4291 case Builtin::BI__builtin_isinf_sign:
4292 return interp__builtin_isinf(S, OpPC, Frame, /*Sign=*/CheckSign: true, Call);
4293
4294 case Builtin::BI__builtin_isfinite:
4295 return interp__builtin_isfinite(S, OpPC, Frame, Call);
4296
4297 case Builtin::BI__builtin_isnormal:
4298 return interp__builtin_isnormal(S, OpPC, Frame, Call);
4299
4300 case Builtin::BI__builtin_issubnormal:
4301 return interp__builtin_issubnormal(S, OpPC, Frame, Call);
4302
4303 case Builtin::BI__builtin_iszero:
4304 return interp__builtin_iszero(S, OpPC, Frame, Call);
4305
4306 case Builtin::BI__builtin_signbit:
4307 case Builtin::BI__builtin_signbitf:
4308 case Builtin::BI__builtin_signbitl:
4309 return interp__builtin_signbit(S, OpPC, Frame, Call);
4310
4311 case Builtin::BI__builtin_isgreater:
4312 case Builtin::BI__builtin_isgreaterequal:
4313 case Builtin::BI__builtin_isless:
4314 case Builtin::BI__builtin_islessequal:
4315 case Builtin::BI__builtin_islessgreater:
4316 case Builtin::BI__builtin_isunordered:
4317 return interp_floating_comparison(S, OpPC, Call, ID: BuiltinID);
4318
4319 case Builtin::BI__builtin_isfpclass:
4320 return interp__builtin_isfpclass(S, OpPC, Frame, Call);
4321
4322 case Builtin::BI__builtin_fpclassify:
4323 return interp__builtin_fpclassify(S, OpPC, Frame, Call);
4324
4325 case Builtin::BI__builtin_fabs:
4326 case Builtin::BI__builtin_fabsf:
4327 case Builtin::BI__builtin_fabsl:
4328 case Builtin::BI__builtin_fabsf128:
4329 return interp__builtin_fabs(S, OpPC, Frame);
4330
4331 case Builtin::BI__builtin_abs:
4332 case Builtin::BI__builtin_labs:
4333 case Builtin::BI__builtin_llabs:
4334 return interp__builtin_abs(S, OpPC, Frame, Call);
4335
4336 case Builtin::BI__builtin_popcount:
4337 case Builtin::BI__builtin_popcountl:
4338 case Builtin::BI__builtin_popcountll:
4339 case Builtin::BI__builtin_popcountg:
4340 case Builtin::BI__popcnt16: // Microsoft variants of popcount
4341 case Builtin::BI__popcnt:
4342 case Builtin::BI__popcnt64:
4343 return interp__builtin_popcount(S, OpPC, Frame, Call);
4344
4345 case Builtin::BI__builtin_parity:
4346 case Builtin::BI__builtin_parityl:
4347 case Builtin::BI__builtin_parityll:
4348 return interp__builtin_elementwise_int_unaryop(
4349 S, OpPC, Call, Fn: [](const APSInt &Val) {
4350 return APInt(Val.getBitWidth(), Val.popcount() % 2);
4351 });
4352 case Builtin::BI__builtin_clrsb:
4353 case Builtin::BI__builtin_clrsbl:
4354 case Builtin::BI__builtin_clrsbll:
4355 return interp__builtin_elementwise_int_unaryop(
4356 S, OpPC, Call, Fn: [](const APSInt &Val) {
4357 return APInt(Val.getBitWidth(),
4358 Val.getBitWidth() - Val.getSignificantBits());
4359 });
4360 case Builtin::BI__builtin_bitreverseg:
4361 case Builtin::BI__builtin_bitreverse8:
4362 case Builtin::BI__builtin_bitreverse16:
4363 case Builtin::BI__builtin_bitreverse32:
4364 case Builtin::BI__builtin_bitreverse64:
4365 return interp__builtin_elementwise_int_unaryop(
4366 S, OpPC, Call, Fn: [](const APSInt &Val) { return Val.reverseBits(); });
4367
4368 case Builtin::BI__builtin_classify_type:
4369 return interp__builtin_classify_type(S, OpPC, Frame, Call);
4370
4371 case Builtin::BI__builtin_expect:
4372 case Builtin::BI__builtin_expect_with_probability:
4373 return interp__builtin_expect(S, OpPC, Frame, Call);
4374
4375 case Builtin::BI__builtin_rotateleft8:
4376 case Builtin::BI__builtin_rotateleft16:
4377 case Builtin::BI__builtin_rotateleft32:
4378 case Builtin::BI__builtin_rotateleft64:
4379 case Builtin::BI__builtin_stdc_rotate_left:
4380 case Builtin::BI_rotl8: // Microsoft variants of rotate left
4381 case Builtin::BI_rotl16:
4382 case Builtin::BI_rotl:
4383 case Builtin::BI_lrotl:
4384 case Builtin::BI_rotl64:
4385 case Builtin::BI__builtin_rotateright8:
4386 case Builtin::BI__builtin_rotateright16:
4387 case Builtin::BI__builtin_rotateright32:
4388 case Builtin::BI__builtin_rotateright64:
4389 case Builtin::BI__builtin_stdc_rotate_right:
4390 case Builtin::BI_rotr8: // Microsoft variants of rotate right
4391 case Builtin::BI_rotr16:
4392 case Builtin::BI_rotr:
4393 case Builtin::BI_lrotr:
4394 case Builtin::BI_rotr64: {
4395 // Determine if this is a rotate right operation
4396 bool IsRotateRight;
4397 switch (BuiltinID) {
4398 case Builtin::BI__builtin_rotateright8:
4399 case Builtin::BI__builtin_rotateright16:
4400 case Builtin::BI__builtin_rotateright32:
4401 case Builtin::BI__builtin_rotateright64:
4402 case Builtin::BI__builtin_stdc_rotate_right:
4403 case Builtin::BI_rotr8:
4404 case Builtin::BI_rotr16:
4405 case Builtin::BI_rotr:
4406 case Builtin::BI_lrotr:
4407 case Builtin::BI_rotr64:
4408 IsRotateRight = true;
4409 break;
4410 default:
4411 IsRotateRight = false;
4412 break;
4413 }
4414
4415 return interp__builtin_elementwise_int_binop(
4416 S, OpPC, Call, Fn: [IsRotateRight](const APSInt &Value, APSInt Amount) {
4417 Amount = NormalizeRotateAmount(Value, Amount);
4418 return IsRotateRight ? Value.rotr(rotateAmt: Amount.getZExtValue())
4419 : Value.rotl(rotateAmt: Amount.getZExtValue());
4420 });
4421 }
4422
4423 case Builtin::BI__builtin_ffs:
4424 case Builtin::BI__builtin_ffsl:
4425 case Builtin::BI__builtin_ffsll:
4426 return interp__builtin_elementwise_int_unaryop(
4427 S, OpPC, Call, Fn: [](const APSInt &Val) {
4428 return APInt(Val.getBitWidth(),
4429 Val.isZero() ? 0u : Val.countTrailingZeros() + 1u);
4430 });
4431
4432 case Builtin::BIaddressof:
4433 case Builtin::BI__addressof:
4434 case Builtin::BI__builtin_addressof:
4435 assert(isNoopBuiltin(BuiltinID));
4436 return interp__builtin_addressof(S, OpPC, Frame, Call);
4437
4438 case Builtin::BIas_const:
4439 case Builtin::BIforward:
4440 case Builtin::BIforward_like:
4441 case Builtin::BImove:
4442 case Builtin::BImove_if_noexcept:
4443 assert(isNoopBuiltin(BuiltinID));
4444 return interp__builtin_move(S, OpPC, Frame, Call);
4445
4446 case Builtin::BI__builtin_eh_return_data_regno:
4447 return interp__builtin_eh_return_data_regno(S, OpPC, Frame, Call);
4448
4449 case Builtin::BI__builtin_launder:
4450 assert(isNoopBuiltin(BuiltinID));
4451 return true;
4452
4453 case Builtin::BI__builtin_add_overflow:
4454 case Builtin::BI__builtin_sub_overflow:
4455 case Builtin::BI__builtin_mul_overflow:
4456 case Builtin::BI__builtin_sadd_overflow:
4457 case Builtin::BI__builtin_uadd_overflow:
4458 case Builtin::BI__builtin_uaddl_overflow:
4459 case Builtin::BI__builtin_uaddll_overflow:
4460 case Builtin::BI__builtin_usub_overflow:
4461 case Builtin::BI__builtin_usubl_overflow:
4462 case Builtin::BI__builtin_usubll_overflow:
4463 case Builtin::BI__builtin_umul_overflow:
4464 case Builtin::BI__builtin_umull_overflow:
4465 case Builtin::BI__builtin_umulll_overflow:
4466 case Builtin::BI__builtin_saddl_overflow:
4467 case Builtin::BI__builtin_saddll_overflow:
4468 case Builtin::BI__builtin_ssub_overflow:
4469 case Builtin::BI__builtin_ssubl_overflow:
4470 case Builtin::BI__builtin_ssubll_overflow:
4471 case Builtin::BI__builtin_smul_overflow:
4472 case Builtin::BI__builtin_smull_overflow:
4473 case Builtin::BI__builtin_smulll_overflow:
4474 return interp__builtin_overflowop(S, OpPC, Call, BuiltinOp: BuiltinID);
4475
4476 case Builtin::BI__builtin_addcb:
4477 case Builtin::BI__builtin_addcs:
4478 case Builtin::BI__builtin_addc:
4479 case Builtin::BI__builtin_addcl:
4480 case Builtin::BI__builtin_addcll:
4481 case Builtin::BI__builtin_subcb:
4482 case Builtin::BI__builtin_subcs:
4483 case Builtin::BI__builtin_subc:
4484 case Builtin::BI__builtin_subcl:
4485 case Builtin::BI__builtin_subcll:
4486 return interp__builtin_carryop(S, OpPC, Frame, Call, BuiltinOp: BuiltinID);
4487
4488 case Builtin::BI__builtin_clz:
4489 case Builtin::BI__builtin_clzl:
4490 case Builtin::BI__builtin_clzll:
4491 case Builtin::BI__builtin_clzs:
4492 case Builtin::BI__builtin_clzg:
4493 case Builtin::BI__lzcnt16: // Microsoft variants of count leading-zeroes
4494 case Builtin::BI__lzcnt:
4495 case Builtin::BI__lzcnt64:
4496 return interp__builtin_clz(S, OpPC, Frame, Call, BuiltinOp: BuiltinID);
4497
4498 case Builtin::BI__builtin_ctz:
4499 case Builtin::BI__builtin_ctzl:
4500 case Builtin::BI__builtin_ctzll:
4501 case Builtin::BI__builtin_ctzs:
4502 case Builtin::BI__builtin_ctzg:
4503 return interp__builtin_ctz(S, OpPC, Frame, Call, BuiltinID);
4504
4505 case Builtin::BI__builtin_elementwise_clzg:
4506 case Builtin::BI__builtin_elementwise_ctzg:
4507 return interp__builtin_elementwise_countzeroes(S, OpPC, Frame, Call,
4508 BuiltinID);
4509 case Builtin::BI__builtin_bswapg:
4510 case Builtin::BI__builtin_bswap16:
4511 case Builtin::BI__builtin_bswap32:
4512 case Builtin::BI__builtin_bswap64:
4513 return interp__builtin_bswap(S, OpPC, Frame, Call);
4514
4515 case Builtin::BI__atomic_always_lock_free:
4516 case Builtin::BI__atomic_is_lock_free:
4517 return interp__builtin_atomic_lock_free(S, OpPC, Frame, Call, BuiltinOp: BuiltinID);
4518
4519 case Builtin::BI__c11_atomic_is_lock_free:
4520 return interp__builtin_c11_atomic_is_lock_free(S, OpPC, Frame, Call);
4521
4522 case Builtin::BI__builtin_complex:
4523 return interp__builtin_complex(S, OpPC, Frame, Call);
4524
4525 case Builtin::BI__builtin_is_aligned:
4526 case Builtin::BI__builtin_align_up:
4527 case Builtin::BI__builtin_align_down:
4528 return interp__builtin_is_aligned_up_down(S, OpPC, Frame, Call, BuiltinOp: BuiltinID);
4529
4530 case Builtin::BI__builtin_assume_aligned:
4531 return interp__builtin_assume_aligned(S, OpPC, Frame, Call);
4532
4533 case clang::X86::BI__builtin_ia32_crc32qi:
4534 return interp__builtin_ia32_crc32(S, OpPC, Frame, Call, DataBytes: 1);
4535 case clang::X86::BI__builtin_ia32_crc32hi:
4536 return interp__builtin_ia32_crc32(S, OpPC, Frame, Call, DataBytes: 2);
4537 case clang::X86::BI__builtin_ia32_crc32si:
4538 return interp__builtin_ia32_crc32(S, OpPC, Frame, Call, DataBytes: 4);
4539 case clang::X86::BI__builtin_ia32_crc32di:
4540 return interp__builtin_ia32_crc32(S, OpPC, Frame, Call, DataBytes: 8);
4541
4542 case clang::X86::BI__builtin_ia32_bextr_u32:
4543 case clang::X86::BI__builtin_ia32_bextr_u64:
4544 case clang::X86::BI__builtin_ia32_bextri_u32:
4545 case clang::X86::BI__builtin_ia32_bextri_u64:
4546 return interp__builtin_elementwise_int_binop(
4547 S, OpPC, Call, Fn: [](const APSInt &Val, const APSInt &Idx) {
4548 unsigned BitWidth = Val.getBitWidth();
4549 uint64_t Shift = Idx.extractBitsAsZExtValue(numBits: 8, bitPosition: 0);
4550 uint64_t Length = Idx.extractBitsAsZExtValue(numBits: 8, bitPosition: 8);
4551 if (Length > BitWidth) {
4552 Length = BitWidth;
4553 }
4554
4555 // Handle out of bounds cases.
4556 if (Length == 0 || Shift >= BitWidth)
4557 return APInt(BitWidth, 0);
4558
4559 uint64_t Result = Val.getZExtValue() >> Shift;
4560 Result &= llvm::maskTrailingOnes<uint64_t>(N: Length);
4561 return APInt(BitWidth, Result);
4562 });
4563
4564 case clang::X86::BI__builtin_ia32_bzhi_si:
4565 case clang::X86::BI__builtin_ia32_bzhi_di:
4566 return interp__builtin_elementwise_int_binop(
4567 S, OpPC, Call, Fn: [](const APSInt &Val, const APSInt &Idx) {
4568 unsigned BitWidth = Val.getBitWidth();
4569 uint64_t Index = Idx.extractBitsAsZExtValue(numBits: 8, bitPosition: 0);
4570 APSInt Result = Val;
4571
4572 if (Index < BitWidth)
4573 Result.clearHighBits(hiBits: BitWidth - Index);
4574
4575 return Result;
4576 });
4577
4578 case clang::X86::BI__builtin_ia32_ktestcqi:
4579 case clang::X86::BI__builtin_ia32_ktestchi:
4580 case clang::X86::BI__builtin_ia32_ktestcsi:
4581 case clang::X86::BI__builtin_ia32_ktestcdi:
4582 return interp__builtin_elementwise_int_binop(
4583 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
4584 return APInt(sizeof(unsigned char) * 8, (~A & B) == 0);
4585 });
4586
4587 case clang::X86::BI__builtin_ia32_ktestzqi:
4588 case clang::X86::BI__builtin_ia32_ktestzhi:
4589 case clang::X86::BI__builtin_ia32_ktestzsi:
4590 case clang::X86::BI__builtin_ia32_ktestzdi:
4591 return interp__builtin_elementwise_int_binop(
4592 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
4593 return APInt(sizeof(unsigned char) * 8, (A & B) == 0);
4594 });
4595
4596 case clang::X86::BI__builtin_ia32_kortestcqi:
4597 case clang::X86::BI__builtin_ia32_kortestchi:
4598 case clang::X86::BI__builtin_ia32_kortestcsi:
4599 case clang::X86::BI__builtin_ia32_kortestcdi:
4600 return interp__builtin_elementwise_int_binop(
4601 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
4602 return APInt(sizeof(unsigned char) * 8, ~(A | B) == 0);
4603 });
4604
4605 case clang::X86::BI__builtin_ia32_kortestzqi:
4606 case clang::X86::BI__builtin_ia32_kortestzhi:
4607 case clang::X86::BI__builtin_ia32_kortestzsi:
4608 case clang::X86::BI__builtin_ia32_kortestzdi:
4609 return interp__builtin_elementwise_int_binop(
4610 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
4611 return APInt(sizeof(unsigned char) * 8, (A | B) == 0);
4612 });
4613
4614 case clang::X86::BI__builtin_ia32_kshiftliqi:
4615 case clang::X86::BI__builtin_ia32_kshiftlihi:
4616 case clang::X86::BI__builtin_ia32_kshiftlisi:
4617 case clang::X86::BI__builtin_ia32_kshiftlidi:
4618 return interp__builtin_elementwise_int_binop(
4619 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4620 unsigned Amt = RHS.getZExtValue() & 0xFF;
4621 if (Amt >= LHS.getBitWidth())
4622 return APInt::getZero(numBits: LHS.getBitWidth());
4623 return LHS.shl(shiftAmt: Amt);
4624 });
4625
4626 case clang::X86::BI__builtin_ia32_kshiftriqi:
4627 case clang::X86::BI__builtin_ia32_kshiftrihi:
4628 case clang::X86::BI__builtin_ia32_kshiftrisi:
4629 case clang::X86::BI__builtin_ia32_kshiftridi:
4630 return interp__builtin_elementwise_int_binop(
4631 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4632 unsigned Amt = RHS.getZExtValue() & 0xFF;
4633 if (Amt >= LHS.getBitWidth())
4634 return APInt::getZero(numBits: LHS.getBitWidth());
4635 return LHS.lshr(shiftAmt: Amt);
4636 });
4637
4638 case clang::X86::BI__builtin_ia32_lzcnt_u16:
4639 case clang::X86::BI__builtin_ia32_lzcnt_u32:
4640 case clang::X86::BI__builtin_ia32_lzcnt_u64:
4641 return interp__builtin_elementwise_int_unaryop(
4642 S, OpPC, Call, Fn: [](const APSInt &Src) {
4643 return APInt(Src.getBitWidth(), Src.countLeadingZeros());
4644 });
4645
4646 case clang::X86::BI__builtin_ia32_tzcnt_u16:
4647 case clang::X86::BI__builtin_ia32_tzcnt_u32:
4648 case clang::X86::BI__builtin_ia32_tzcnt_u64:
4649 return interp__builtin_elementwise_int_unaryop(
4650 S, OpPC, Call, Fn: [](const APSInt &Src) {
4651 return APInt(Src.getBitWidth(), Src.countTrailingZeros());
4652 });
4653
4654 case clang::X86::BI__builtin_ia32_pdep_si:
4655 case clang::X86::BI__builtin_ia32_pdep_di:
4656 return interp__builtin_elementwise_int_binop(
4657 S, OpPC, Call, Fn: [](const APSInt &Val, const APSInt &Mask) {
4658 unsigned BitWidth = Val.getBitWidth();
4659 APInt Result = APInt::getZero(numBits: BitWidth);
4660
4661 for (unsigned I = 0, P = 0; I != BitWidth; ++I) {
4662 if (Mask[I])
4663 Result.setBitVal(BitPosition: I, BitValue: Val[P++]);
4664 }
4665
4666 return Result;
4667 });
4668
4669 case clang::X86::BI__builtin_ia32_pext_si:
4670 case clang::X86::BI__builtin_ia32_pext_di:
4671 return interp__builtin_elementwise_int_binop(
4672 S, OpPC, Call, Fn: [](const APSInt &Val, const APSInt &Mask) {
4673 unsigned BitWidth = Val.getBitWidth();
4674 APInt Result = APInt::getZero(numBits: BitWidth);
4675
4676 for (unsigned I = 0, P = 0; I != BitWidth; ++I) {
4677 if (Mask[I])
4678 Result.setBitVal(BitPosition: P++, BitValue: Val[I]);
4679 }
4680
4681 return Result;
4682 });
4683
4684 case clang::X86::BI__builtin_ia32_addcarryx_u32:
4685 case clang::X86::BI__builtin_ia32_addcarryx_u64:
4686 case clang::X86::BI__builtin_ia32_subborrow_u32:
4687 case clang::X86::BI__builtin_ia32_subborrow_u64:
4688 return interp__builtin_ia32_addcarry_subborrow(S, OpPC, Frame, Call,
4689 BuiltinOp: BuiltinID);
4690
4691 case Builtin::BI__builtin_os_log_format_buffer_size:
4692 return interp__builtin_os_log_format_buffer_size(S, OpPC, Frame, Call);
4693
4694 case Builtin::BI__builtin_ptrauth_string_discriminator:
4695 return interp__builtin_ptrauth_string_discriminator(S, OpPC, Frame, Call);
4696
4697 case Builtin::BI__builtin_infer_alloc_token:
4698 return interp__builtin_infer_alloc_token(S, OpPC, Frame, Call);
4699
4700 case Builtin::BI__noop:
4701 pushInteger(S, Val: 0, QT: Call->getType());
4702 return true;
4703
4704 case Builtin::BI__builtin_operator_new:
4705 return interp__builtin_operator_new(S, OpPC, Frame, Call);
4706
4707 case Builtin::BI__builtin_operator_delete:
4708 return interp__builtin_operator_delete(S, OpPC, Frame, Call);
4709
4710 case Builtin::BI__arithmetic_fence:
4711 return interp__builtin_arithmetic_fence(S, OpPC, Frame, Call);
4712
4713 case Builtin::BI__builtin_reduce_add:
4714 case Builtin::BI__builtin_reduce_mul:
4715 case Builtin::BI__builtin_reduce_and:
4716 case Builtin::BI__builtin_reduce_or:
4717 case Builtin::BI__builtin_reduce_xor:
4718 case Builtin::BI__builtin_reduce_min:
4719 case Builtin::BI__builtin_reduce_max:
4720 return interp__builtin_vector_reduce(S, OpPC, Call, ID: BuiltinID);
4721
4722 case Builtin::BI__builtin_elementwise_popcount:
4723 return interp__builtin_elementwise_int_unaryop(
4724 S, OpPC, Call, Fn: [](const APSInt &Src) {
4725 return APInt(Src.getBitWidth(), Src.popcount());
4726 });
4727 case Builtin::BI__builtin_elementwise_bitreverse:
4728 return interp__builtin_elementwise_int_unaryop(
4729 S, OpPC, Call, Fn: [](const APSInt &Src) { return Src.reverseBits(); });
4730
4731 case Builtin::BI__builtin_elementwise_abs:
4732 return interp__builtin_elementwise_abs(S, OpPC, Frame, Call, BuiltinID);
4733
4734 case Builtin::BI__builtin_memcpy:
4735 case Builtin::BImemcpy:
4736 case Builtin::BI__builtin_wmemcpy:
4737 case Builtin::BIwmemcpy:
4738 case Builtin::BI__builtin_memmove:
4739 case Builtin::BImemmove:
4740 case Builtin::BI__builtin_wmemmove:
4741 case Builtin::BIwmemmove:
4742 return interp__builtin_memcpy(S, OpPC, Frame, Call, ID: BuiltinID);
4743
4744 case Builtin::BI__builtin_memcmp:
4745 case Builtin::BImemcmp:
4746 case Builtin::BI__builtin_bcmp:
4747 case Builtin::BIbcmp:
4748 case Builtin::BI__builtin_wmemcmp:
4749 case Builtin::BIwmemcmp:
4750 return interp__builtin_memcmp(S, OpPC, Frame, Call, ID: BuiltinID);
4751
4752 case Builtin::BImemchr:
4753 case Builtin::BI__builtin_memchr:
4754 case Builtin::BIstrchr:
4755 case Builtin::BI__builtin_strchr:
4756 case Builtin::BIwmemchr:
4757 case Builtin::BI__builtin_wmemchr:
4758 case Builtin::BIwcschr:
4759 case Builtin::BI__builtin_wcschr:
4760 case Builtin::BI__builtin_char_memchr:
4761 return interp__builtin_memchr(S, OpPC, Call, ID: BuiltinID);
4762
4763 case Builtin::BI__builtin_object_size:
4764 case Builtin::BI__builtin_dynamic_object_size:
4765 return interp__builtin_object_size(S, OpPC, Frame, Call);
4766
4767 case Builtin::BI__builtin_is_within_lifetime:
4768 return interp__builtin_is_within_lifetime(S, OpPC, Call);
4769
4770 case Builtin::BI__builtin_elementwise_add_sat:
4771 return interp__builtin_elementwise_int_binop(
4772 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4773 return LHS.isSigned() ? LHS.sadd_sat(RHS) : LHS.uadd_sat(RHS);
4774 });
4775
4776 case Builtin::BI__builtin_elementwise_sub_sat:
4777 return interp__builtin_elementwise_int_binop(
4778 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4779 return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
4780 });
4781 case X86::BI__builtin_ia32_extract128i256:
4782 case X86::BI__builtin_ia32_vextractf128_pd256:
4783 case X86::BI__builtin_ia32_vextractf128_ps256:
4784 case X86::BI__builtin_ia32_vextractf128_si256:
4785 return interp__builtin_x86_extract_vector(S, OpPC, Call, ID: BuiltinID);
4786
4787 case X86::BI__builtin_ia32_extractf32x4_256_mask:
4788 case X86::BI__builtin_ia32_extractf32x4_mask:
4789 case X86::BI__builtin_ia32_extractf32x8_mask:
4790 case X86::BI__builtin_ia32_extractf64x2_256_mask:
4791 case X86::BI__builtin_ia32_extractf64x2_512_mask:
4792 case X86::BI__builtin_ia32_extractf64x4_mask:
4793 case X86::BI__builtin_ia32_extracti32x4_256_mask:
4794 case X86::BI__builtin_ia32_extracti32x4_mask:
4795 case X86::BI__builtin_ia32_extracti32x8_mask:
4796 case X86::BI__builtin_ia32_extracti64x2_256_mask:
4797 case X86::BI__builtin_ia32_extracti64x2_512_mask:
4798 case X86::BI__builtin_ia32_extracti64x4_mask:
4799 return interp__builtin_x86_extract_vector_masked(S, OpPC, Call, ID: BuiltinID);
4800
4801 case clang::X86::BI__builtin_ia32_pmulhrsw128:
4802 case clang::X86::BI__builtin_ia32_pmulhrsw256:
4803 case clang::X86::BI__builtin_ia32_pmulhrsw512:
4804 return interp__builtin_elementwise_int_binop(
4805 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4806 return (llvm::APIntOps::mulsExtended(C1: LHS, C2: RHS).ashr(ShiftAmt: 14) + 1)
4807 .extractBits(numBits: 16, bitPosition: 1);
4808 });
4809
4810 case clang::X86::BI__builtin_ia32_movmskps:
4811 case clang::X86::BI__builtin_ia32_movmskpd:
4812 case clang::X86::BI__builtin_ia32_pmovmskb128:
4813 case clang::X86::BI__builtin_ia32_pmovmskb256:
4814 case clang::X86::BI__builtin_ia32_movmskps256:
4815 case clang::X86::BI__builtin_ia32_movmskpd256: {
4816 return interp__builtin_ia32_movmsk_op(S, OpPC, Call);
4817 }
4818
4819 case X86::BI__builtin_ia32_psignb128:
4820 case X86::BI__builtin_ia32_psignb256:
4821 case X86::BI__builtin_ia32_psignw128:
4822 case X86::BI__builtin_ia32_psignw256:
4823 case X86::BI__builtin_ia32_psignd128:
4824 case X86::BI__builtin_ia32_psignd256:
4825 return interp__builtin_elementwise_int_binop(
4826 S, OpPC, Call, Fn: [](const APInt &AElem, const APInt &BElem) {
4827 if (BElem.isZero())
4828 return APInt::getZero(numBits: AElem.getBitWidth());
4829 if (BElem.isNegative())
4830 return -AElem;
4831 return AElem;
4832 });
4833
4834 case clang::X86::BI__builtin_ia32_pavgb128:
4835 case clang::X86::BI__builtin_ia32_pavgw128:
4836 case clang::X86::BI__builtin_ia32_pavgb256:
4837 case clang::X86::BI__builtin_ia32_pavgw256:
4838 case clang::X86::BI__builtin_ia32_pavgb512:
4839 case clang::X86::BI__builtin_ia32_pavgw512:
4840 return interp__builtin_elementwise_int_binop(S, OpPC, Call,
4841 Fn: llvm::APIntOps::avgCeilU);
4842
4843 case clang::X86::BI__builtin_ia32_pmaddubsw128:
4844 case clang::X86::BI__builtin_ia32_pmaddubsw256:
4845 case clang::X86::BI__builtin_ia32_pmaddubsw512:
4846 return interp__builtin_ia32_pmul(
4847 S, OpPC, Call,
4848 Fn: [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
4849 const APSInt &HiRHS) {
4850 unsigned BitWidth = 2 * LoLHS.getBitWidth();
4851 return (LoLHS.zext(width: BitWidth) * LoRHS.sext(width: BitWidth))
4852 .sadd_sat(RHS: (HiLHS.zext(width: BitWidth) * HiRHS.sext(width: BitWidth)));
4853 });
4854
4855 case clang::X86::BI__builtin_ia32_pmaddwd128:
4856 case clang::X86::BI__builtin_ia32_pmaddwd256:
4857 case clang::X86::BI__builtin_ia32_pmaddwd512:
4858 return interp__builtin_ia32_pmul(
4859 S, OpPC, Call,
4860 Fn: [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
4861 const APSInt &HiRHS) {
4862 unsigned BitWidth = 2 * LoLHS.getBitWidth();
4863 return (LoLHS.sext(width: BitWidth) * LoRHS.sext(width: BitWidth)) +
4864 (HiLHS.sext(width: BitWidth) * HiRHS.sext(width: BitWidth));
4865 });
4866
4867 case clang::X86::BI__builtin_ia32_pmulhuw128:
4868 case clang::X86::BI__builtin_ia32_pmulhuw256:
4869 case clang::X86::BI__builtin_ia32_pmulhuw512:
4870 return interp__builtin_elementwise_int_binop(S, OpPC, Call,
4871 Fn: llvm::APIntOps::mulhu);
4872
4873 case clang::X86::BI__builtin_ia32_pmulhw128:
4874 case clang::X86::BI__builtin_ia32_pmulhw256:
4875 case clang::X86::BI__builtin_ia32_pmulhw512:
4876 return interp__builtin_elementwise_int_binop(S, OpPC, Call,
4877 Fn: llvm::APIntOps::mulhs);
4878
4879 case clang::X86::BI__builtin_ia32_psllv2di:
4880 case clang::X86::BI__builtin_ia32_psllv4di:
4881 case clang::X86::BI__builtin_ia32_psllv4si:
4882 case clang::X86::BI__builtin_ia32_psllv8di:
4883 case clang::X86::BI__builtin_ia32_psllv8hi:
4884 case clang::X86::BI__builtin_ia32_psllv8si:
4885 case clang::X86::BI__builtin_ia32_psllv16hi:
4886 case clang::X86::BI__builtin_ia32_psllv16si:
4887 case clang::X86::BI__builtin_ia32_psllv32hi:
4888 case clang::X86::BI__builtin_ia32_psllwi128:
4889 case clang::X86::BI__builtin_ia32_psllwi256:
4890 case clang::X86::BI__builtin_ia32_psllwi512:
4891 case clang::X86::BI__builtin_ia32_pslldi128:
4892 case clang::X86::BI__builtin_ia32_pslldi256:
4893 case clang::X86::BI__builtin_ia32_pslldi512:
4894 case clang::X86::BI__builtin_ia32_psllqi128:
4895 case clang::X86::BI__builtin_ia32_psllqi256:
4896 case clang::X86::BI__builtin_ia32_psllqi512:
4897 return interp__builtin_elementwise_int_binop(
4898 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4899 if (RHS.uge(RHS: LHS.getBitWidth())) {
4900 return APInt::getZero(numBits: LHS.getBitWidth());
4901 }
4902 return LHS.shl(shiftAmt: RHS.getZExtValue());
4903 });
4904
4905 case clang::X86::BI__builtin_ia32_psrav4si:
4906 case clang::X86::BI__builtin_ia32_psrav8di:
4907 case clang::X86::BI__builtin_ia32_psrav8hi:
4908 case clang::X86::BI__builtin_ia32_psrav8si:
4909 case clang::X86::BI__builtin_ia32_psrav16hi:
4910 case clang::X86::BI__builtin_ia32_psrav16si:
4911 case clang::X86::BI__builtin_ia32_psrav32hi:
4912 case clang::X86::BI__builtin_ia32_psravq128:
4913 case clang::X86::BI__builtin_ia32_psravq256:
4914 case clang::X86::BI__builtin_ia32_psrawi128:
4915 case clang::X86::BI__builtin_ia32_psrawi256:
4916 case clang::X86::BI__builtin_ia32_psrawi512:
4917 case clang::X86::BI__builtin_ia32_psradi128:
4918 case clang::X86::BI__builtin_ia32_psradi256:
4919 case clang::X86::BI__builtin_ia32_psradi512:
4920 case clang::X86::BI__builtin_ia32_psraqi128:
4921 case clang::X86::BI__builtin_ia32_psraqi256:
4922 case clang::X86::BI__builtin_ia32_psraqi512:
4923 return interp__builtin_elementwise_int_binop(
4924 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4925 if (RHS.uge(RHS: LHS.getBitWidth())) {
4926 return LHS.ashr(ShiftAmt: LHS.getBitWidth() - 1);
4927 }
4928 return LHS.ashr(ShiftAmt: RHS.getZExtValue());
4929 });
4930
4931 case clang::X86::BI__builtin_ia32_psrlv2di:
4932 case clang::X86::BI__builtin_ia32_psrlv4di:
4933 case clang::X86::BI__builtin_ia32_psrlv4si:
4934 case clang::X86::BI__builtin_ia32_psrlv8di:
4935 case clang::X86::BI__builtin_ia32_psrlv8hi:
4936 case clang::X86::BI__builtin_ia32_psrlv8si:
4937 case clang::X86::BI__builtin_ia32_psrlv16hi:
4938 case clang::X86::BI__builtin_ia32_psrlv16si:
4939 case clang::X86::BI__builtin_ia32_psrlv32hi:
4940 case clang::X86::BI__builtin_ia32_psrlwi128:
4941 case clang::X86::BI__builtin_ia32_psrlwi256:
4942 case clang::X86::BI__builtin_ia32_psrlwi512:
4943 case clang::X86::BI__builtin_ia32_psrldi128:
4944 case clang::X86::BI__builtin_ia32_psrldi256:
4945 case clang::X86::BI__builtin_ia32_psrldi512:
4946 case clang::X86::BI__builtin_ia32_psrlqi128:
4947 case clang::X86::BI__builtin_ia32_psrlqi256:
4948 case clang::X86::BI__builtin_ia32_psrlqi512:
4949 return interp__builtin_elementwise_int_binop(
4950 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4951 if (RHS.uge(RHS: LHS.getBitWidth())) {
4952 return APInt::getZero(numBits: LHS.getBitWidth());
4953 }
4954 return LHS.lshr(shiftAmt: RHS.getZExtValue());
4955 });
4956 case clang::X86::BI__builtin_ia32_packsswb128:
4957 case clang::X86::BI__builtin_ia32_packsswb256:
4958 case clang::X86::BI__builtin_ia32_packsswb512:
4959 case clang::X86::BI__builtin_ia32_packssdw128:
4960 case clang::X86::BI__builtin_ia32_packssdw256:
4961 case clang::X86::BI__builtin_ia32_packssdw512:
4962 return interp__builtin_x86_pack(S, OpPC, E: Call, PackFn: [](const APSInt &Src) {
4963 return APInt(Src).truncSSat(width: Src.getBitWidth() / 2);
4964 });
4965 case clang::X86::BI__builtin_ia32_packusdw128:
4966 case clang::X86::BI__builtin_ia32_packusdw256:
4967 case clang::X86::BI__builtin_ia32_packusdw512:
4968 case clang::X86::BI__builtin_ia32_packuswb128:
4969 case clang::X86::BI__builtin_ia32_packuswb256:
4970 case clang::X86::BI__builtin_ia32_packuswb512:
4971 return interp__builtin_x86_pack(S, OpPC, E: Call, PackFn: [](const APSInt &Src) {
4972 return APInt(Src).truncSSatU(width: Src.getBitWidth() / 2);
4973 });
4974
4975 case clang::X86::BI__builtin_ia32_selectss_128:
4976 case clang::X86::BI__builtin_ia32_selectsd_128:
4977 case clang::X86::BI__builtin_ia32_selectsh_128:
4978 case clang::X86::BI__builtin_ia32_selectsbf_128:
4979 return interp__builtin_select_scalar(S, Call);
4980 case clang::X86::BI__builtin_ia32_vprotbi:
4981 case clang::X86::BI__builtin_ia32_vprotdi:
4982 case clang::X86::BI__builtin_ia32_vprotqi:
4983 case clang::X86::BI__builtin_ia32_vprotwi:
4984 case clang::X86::BI__builtin_ia32_prold128:
4985 case clang::X86::BI__builtin_ia32_prold256:
4986 case clang::X86::BI__builtin_ia32_prold512:
4987 case clang::X86::BI__builtin_ia32_prolq128:
4988 case clang::X86::BI__builtin_ia32_prolq256:
4989 case clang::X86::BI__builtin_ia32_prolq512:
4990 return interp__builtin_elementwise_int_binop(
4991 S, OpPC, Call,
4992 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS.rotl(rotateAmt: RHS); });
4993
4994 case clang::X86::BI__builtin_ia32_prord128:
4995 case clang::X86::BI__builtin_ia32_prord256:
4996 case clang::X86::BI__builtin_ia32_prord512:
4997 case clang::X86::BI__builtin_ia32_prorq128:
4998 case clang::X86::BI__builtin_ia32_prorq256:
4999 case clang::X86::BI__builtin_ia32_prorq512:
5000 return interp__builtin_elementwise_int_binop(
5001 S, OpPC, Call,
5002 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS.rotr(rotateAmt: RHS); });
5003
5004 case Builtin::BI__builtin_elementwise_max:
5005 case Builtin::BI__builtin_elementwise_min:
5006 return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID);
5007
5008 case clang::X86::BI__builtin_ia32_phaddw128:
5009 case clang::X86::BI__builtin_ia32_phaddw256:
5010 case clang::X86::BI__builtin_ia32_phaddd128:
5011 case clang::X86::BI__builtin_ia32_phaddd256:
5012 return interp_builtin_horizontal_int_binop(
5013 S, OpPC, Call,
5014 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
5015 case clang::X86::BI__builtin_ia32_phaddsw128:
5016 case clang::X86::BI__builtin_ia32_phaddsw256:
5017 return interp_builtin_horizontal_int_binop(
5018 S, OpPC, Call,
5019 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS.sadd_sat(RHS); });
5020 case clang::X86::BI__builtin_ia32_phsubw128:
5021 case clang::X86::BI__builtin_ia32_phsubw256:
5022 case clang::X86::BI__builtin_ia32_phsubd128:
5023 case clang::X86::BI__builtin_ia32_phsubd256:
5024 return interp_builtin_horizontal_int_binop(
5025 S, OpPC, Call,
5026 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS - RHS; });
5027 case clang::X86::BI__builtin_ia32_phsubsw128:
5028 case clang::X86::BI__builtin_ia32_phsubsw256:
5029 return interp_builtin_horizontal_int_binop(
5030 S, OpPC, Call,
5031 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS.ssub_sat(RHS); });
5032 case clang::X86::BI__builtin_ia32_haddpd:
5033 case clang::X86::BI__builtin_ia32_haddps:
5034 case clang::X86::BI__builtin_ia32_haddpd256:
5035 case clang::X86::BI__builtin_ia32_haddps256:
5036 return interp_builtin_horizontal_fp_binop(
5037 S, OpPC, Call,
5038 Fn: [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) {
5039 APFloat F = LHS;
5040 F.add(RHS, RM);
5041 return F;
5042 });
5043 case clang::X86::BI__builtin_ia32_hsubpd:
5044 case clang::X86::BI__builtin_ia32_hsubps:
5045 case clang::X86::BI__builtin_ia32_hsubpd256:
5046 case clang::X86::BI__builtin_ia32_hsubps256:
5047 return interp_builtin_horizontal_fp_binop(
5048 S, OpPC, Call,
5049 Fn: [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) {
5050 APFloat F = LHS;
5051 F.subtract(RHS, RM);
5052 return F;
5053 });
5054 case clang::X86::BI__builtin_ia32_addsubpd:
5055 case clang::X86::BI__builtin_ia32_addsubps:
5056 case clang::X86::BI__builtin_ia32_addsubpd256:
5057 case clang::X86::BI__builtin_ia32_addsubps256:
5058 return interp__builtin_ia32_addsub(S, OpPC, Call);
5059
5060 case clang::X86::BI__builtin_ia32_pmuldq128:
5061 case clang::X86::BI__builtin_ia32_pmuldq256:
5062 case clang::X86::BI__builtin_ia32_pmuldq512:
5063 return interp__builtin_ia32_pmul(
5064 S, OpPC, Call,
5065 Fn: [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
5066 const APSInt &HiRHS) {
5067 return llvm::APIntOps::mulsExtended(C1: LoLHS, C2: LoRHS);
5068 });
5069
5070 case clang::X86::BI__builtin_ia32_pmuludq128:
5071 case clang::X86::BI__builtin_ia32_pmuludq256:
5072 case clang::X86::BI__builtin_ia32_pmuludq512:
5073 return interp__builtin_ia32_pmul(
5074 S, OpPC, Call,
5075 Fn: [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
5076 const APSInt &HiRHS) {
5077 return llvm::APIntOps::muluExtended(C1: LoLHS, C2: LoRHS);
5078 });
5079
5080 case clang::X86::BI__builtin_ia32_pclmulqdq128:
5081 case clang::X86::BI__builtin_ia32_pclmulqdq256:
5082 case clang::X86::BI__builtin_ia32_pclmulqdq512:
5083 return interp__builtin_ia32_pclmulqdq(S, OpPC, Call);
5084
5085 case Builtin::BI__builtin_elementwise_fma:
5086 return interp__builtin_elementwise_triop_fp(
5087 S, OpPC, Call,
5088 Fn: [](const APFloat &X, const APFloat &Y, const APFloat &Z,
5089 llvm::RoundingMode RM) {
5090 APFloat F = X;
5091 F.fusedMultiplyAdd(Multiplicand: Y, Addend: Z, RM);
5092 return F;
5093 });
5094
5095 case X86::BI__builtin_ia32_vpmadd52luq128:
5096 case X86::BI__builtin_ia32_vpmadd52luq256:
5097 case X86::BI__builtin_ia32_vpmadd52luq512:
5098 return interp__builtin_elementwise_triop(
5099 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B, const APSInt &C) {
5100 return A + (B.trunc(width: 52) * C.trunc(width: 52)).zext(width: 64);
5101 });
5102 case X86::BI__builtin_ia32_vpmadd52huq128:
5103 case X86::BI__builtin_ia32_vpmadd52huq256:
5104 case X86::BI__builtin_ia32_vpmadd52huq512:
5105 return interp__builtin_elementwise_triop(
5106 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B, const APSInt &C) {
5107 return A + llvm::APIntOps::mulhu(C1: B.trunc(width: 52), C2: C.trunc(width: 52)).zext(width: 64);
5108 });
5109
5110 case X86::BI__builtin_ia32_vpshldd128:
5111 case X86::BI__builtin_ia32_vpshldd256:
5112 case X86::BI__builtin_ia32_vpshldd512:
5113 case X86::BI__builtin_ia32_vpshldq128:
5114 case X86::BI__builtin_ia32_vpshldq256:
5115 case X86::BI__builtin_ia32_vpshldq512:
5116 case X86::BI__builtin_ia32_vpshldw128:
5117 case X86::BI__builtin_ia32_vpshldw256:
5118 case X86::BI__builtin_ia32_vpshldw512:
5119 return interp__builtin_elementwise_triop(
5120 S, OpPC, Call,
5121 Fn: [](const APSInt &Hi, const APSInt &Lo, const APSInt &Amt) {
5122 return llvm::APIntOps::fshl(Hi, Lo, Shift: Amt);
5123 });
5124
5125 case X86::BI__builtin_ia32_vpshrdd128:
5126 case X86::BI__builtin_ia32_vpshrdd256:
5127 case X86::BI__builtin_ia32_vpshrdd512:
5128 case X86::BI__builtin_ia32_vpshrdq128:
5129 case X86::BI__builtin_ia32_vpshrdq256:
5130 case X86::BI__builtin_ia32_vpshrdq512:
5131 case X86::BI__builtin_ia32_vpshrdw128:
5132 case X86::BI__builtin_ia32_vpshrdw256:
5133 case X86::BI__builtin_ia32_vpshrdw512:
5134 // NOTE: Reversed Hi/Lo operands.
5135 return interp__builtin_elementwise_triop(
5136 S, OpPC, Call,
5137 Fn: [](const APSInt &Lo, const APSInt &Hi, const APSInt &Amt) {
5138 return llvm::APIntOps::fshr(Hi, Lo, Shift: Amt);
5139 });
5140 case X86::BI__builtin_ia32_vpconflictsi_128:
5141 case X86::BI__builtin_ia32_vpconflictsi_256:
5142 case X86::BI__builtin_ia32_vpconflictsi_512:
5143 case X86::BI__builtin_ia32_vpconflictdi_128:
5144 case X86::BI__builtin_ia32_vpconflictdi_256:
5145 case X86::BI__builtin_ia32_vpconflictdi_512:
5146 return interp__builtin_ia32_vpconflict(S, OpPC, Call);
5147 case X86::BI__builtin_ia32_compressdf128_mask:
5148 case X86::BI__builtin_ia32_compressdf256_mask:
5149 case X86::BI__builtin_ia32_compressdf512_mask:
5150 case X86::BI__builtin_ia32_compressdi128_mask:
5151 case X86::BI__builtin_ia32_compressdi256_mask:
5152 case X86::BI__builtin_ia32_compressdi512_mask:
5153 case X86::BI__builtin_ia32_compresshi128_mask:
5154 case X86::BI__builtin_ia32_compresshi256_mask:
5155 case X86::BI__builtin_ia32_compresshi512_mask:
5156 case X86::BI__builtin_ia32_compressqi128_mask:
5157 case X86::BI__builtin_ia32_compressqi256_mask:
5158 case X86::BI__builtin_ia32_compressqi512_mask:
5159 case X86::BI__builtin_ia32_compresssf128_mask:
5160 case X86::BI__builtin_ia32_compresssf256_mask:
5161 case X86::BI__builtin_ia32_compresssf512_mask:
5162 case X86::BI__builtin_ia32_compresssi128_mask:
5163 case X86::BI__builtin_ia32_compresssi256_mask:
5164 case X86::BI__builtin_ia32_compresssi512_mask: {
5165 unsigned NumElems =
5166 Call->getArg(Arg: 0)->getType()->castAs<VectorType>()->getNumElements();
5167 return interp__builtin_ia32_shuffle_generic(
5168 S, OpPC, Call, GetSourceIndex: [NumElems](unsigned DstIdx, const APInt &ShuffleMask) {
5169 APInt CompressMask = ShuffleMask.trunc(width: NumElems);
5170 if (DstIdx < CompressMask.popcount()) {
5171 while (DstIdx != 0) {
5172 CompressMask = CompressMask & (CompressMask - 1);
5173 DstIdx--;
5174 }
5175 return std::pair<unsigned, int>{
5176 0, static_cast<int>(CompressMask.countr_zero())};
5177 }
5178 return std::pair<unsigned, int>{1, static_cast<int>(DstIdx)};
5179 });
5180 }
5181 case X86::BI__builtin_ia32_expanddf128_mask:
5182 case X86::BI__builtin_ia32_expanddf256_mask:
5183 case X86::BI__builtin_ia32_expanddf512_mask:
5184 case X86::BI__builtin_ia32_expanddi128_mask:
5185 case X86::BI__builtin_ia32_expanddi256_mask:
5186 case X86::BI__builtin_ia32_expanddi512_mask:
5187 case X86::BI__builtin_ia32_expandhi128_mask:
5188 case X86::BI__builtin_ia32_expandhi256_mask:
5189 case X86::BI__builtin_ia32_expandhi512_mask:
5190 case X86::BI__builtin_ia32_expandqi128_mask:
5191 case X86::BI__builtin_ia32_expandqi256_mask:
5192 case X86::BI__builtin_ia32_expandqi512_mask:
5193 case X86::BI__builtin_ia32_expandsf128_mask:
5194 case X86::BI__builtin_ia32_expandsf256_mask:
5195 case X86::BI__builtin_ia32_expandsf512_mask:
5196 case X86::BI__builtin_ia32_expandsi128_mask:
5197 case X86::BI__builtin_ia32_expandsi256_mask:
5198 case X86::BI__builtin_ia32_expandsi512_mask: {
5199 return interp__builtin_ia32_shuffle_generic(
5200 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, const APInt &ShuffleMask) {
5201 // Trunc to the sub-mask for the dst index and count the number of
5202 // src elements used prior to that.
5203 APInt ExpandMask = ShuffleMask.trunc(width: DstIdx + 1);
5204 if (ExpandMask[DstIdx]) {
5205 int SrcIdx = ExpandMask.popcount() - 1;
5206 return std::pair<unsigned, int>{0, SrcIdx};
5207 }
5208 return std::pair<unsigned, int>{1, static_cast<int>(DstIdx)};
5209 });
5210 }
5211 case clang::X86::BI__builtin_ia32_blendpd:
5212 case clang::X86::BI__builtin_ia32_blendpd256:
5213 case clang::X86::BI__builtin_ia32_blendps:
5214 case clang::X86::BI__builtin_ia32_blendps256:
5215 case clang::X86::BI__builtin_ia32_pblendw128:
5216 case clang::X86::BI__builtin_ia32_pblendw256:
5217 case clang::X86::BI__builtin_ia32_pblendd128:
5218 case clang::X86::BI__builtin_ia32_pblendd256:
5219 return interp__builtin_ia32_shuffle_generic(
5220 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5221 // Bit index for mask.
5222 unsigned MaskBit = (ShuffleMask >> (DstIdx % 8)) & 0x1;
5223 unsigned SrcVecIdx = MaskBit ? 1 : 0; // 1 = TrueVec, 0 = FalseVec
5224 return std::pair<unsigned, int>{SrcVecIdx, static_cast<int>(DstIdx)};
5225 });
5226
5227
5228
5229 case clang::X86::BI__builtin_ia32_blendvpd:
5230 case clang::X86::BI__builtin_ia32_blendvpd256:
5231 case clang::X86::BI__builtin_ia32_blendvps:
5232 case clang::X86::BI__builtin_ia32_blendvps256:
5233 return interp__builtin_elementwise_triop_fp(
5234 S, OpPC, Call,
5235 Fn: [](const APFloat &F, const APFloat &T, const APFloat &C,
5236 llvm::RoundingMode) { return C.isNegative() ? T : F; });
5237
5238 case clang::X86::BI__builtin_ia32_pblendvb128:
5239 case clang::X86::BI__builtin_ia32_pblendvb256:
5240 return interp__builtin_elementwise_triop(
5241 S, OpPC, Call, Fn: [](const APSInt &F, const APSInt &T, const APSInt &C) {
5242 return ((APInt)C).isNegative() ? T : F;
5243 });
5244 case X86::BI__builtin_ia32_ptestz128:
5245 case X86::BI__builtin_ia32_ptestz256:
5246 case X86::BI__builtin_ia32_vtestzps:
5247 case X86::BI__builtin_ia32_vtestzps256:
5248 case X86::BI__builtin_ia32_vtestzpd:
5249 case X86::BI__builtin_ia32_vtestzpd256:
5250 return interp__builtin_ia32_test_op(
5251 S, OpPC, Call,
5252 Fn: [](const APInt &A, const APInt &B) { return (A & B) == 0; });
5253 case X86::BI__builtin_ia32_ptestc128:
5254 case X86::BI__builtin_ia32_ptestc256:
5255 case X86::BI__builtin_ia32_vtestcps:
5256 case X86::BI__builtin_ia32_vtestcps256:
5257 case X86::BI__builtin_ia32_vtestcpd:
5258 case X86::BI__builtin_ia32_vtestcpd256:
5259 return interp__builtin_ia32_test_op(
5260 S, OpPC, Call,
5261 Fn: [](const APInt &A, const APInt &B) { return (~A & B) == 0; });
5262 case X86::BI__builtin_ia32_ptestnzc128:
5263 case X86::BI__builtin_ia32_ptestnzc256:
5264 case X86::BI__builtin_ia32_vtestnzcps:
5265 case X86::BI__builtin_ia32_vtestnzcps256:
5266 case X86::BI__builtin_ia32_vtestnzcpd:
5267 case X86::BI__builtin_ia32_vtestnzcpd256:
5268 return interp__builtin_ia32_test_op(
5269 S, OpPC, Call, Fn: [](const APInt &A, const APInt &B) {
5270 return ((A & B) != 0) && ((~A & B) != 0);
5271 });
5272 case X86::BI__builtin_ia32_selectb_128:
5273 case X86::BI__builtin_ia32_selectb_256:
5274 case X86::BI__builtin_ia32_selectb_512:
5275 case X86::BI__builtin_ia32_selectw_128:
5276 case X86::BI__builtin_ia32_selectw_256:
5277 case X86::BI__builtin_ia32_selectw_512:
5278 case X86::BI__builtin_ia32_selectd_128:
5279 case X86::BI__builtin_ia32_selectd_256:
5280 case X86::BI__builtin_ia32_selectd_512:
5281 case X86::BI__builtin_ia32_selectq_128:
5282 case X86::BI__builtin_ia32_selectq_256:
5283 case X86::BI__builtin_ia32_selectq_512:
5284 case X86::BI__builtin_ia32_selectph_128:
5285 case X86::BI__builtin_ia32_selectph_256:
5286 case X86::BI__builtin_ia32_selectph_512:
5287 case X86::BI__builtin_ia32_selectpbf_128:
5288 case X86::BI__builtin_ia32_selectpbf_256:
5289 case X86::BI__builtin_ia32_selectpbf_512:
5290 case X86::BI__builtin_ia32_selectps_128:
5291 case X86::BI__builtin_ia32_selectps_256:
5292 case X86::BI__builtin_ia32_selectps_512:
5293 case X86::BI__builtin_ia32_selectpd_128:
5294 case X86::BI__builtin_ia32_selectpd_256:
5295 case X86::BI__builtin_ia32_selectpd_512:
5296 return interp__builtin_select(S, OpPC, Call);
5297
5298 case X86::BI__builtin_ia32_shufps:
5299 case X86::BI__builtin_ia32_shufps256:
5300 case X86::BI__builtin_ia32_shufps512:
5301 return interp__builtin_ia32_shuffle_generic(
5302 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5303 unsigned NumElemPerLane = 4;
5304 unsigned NumSelectableElems = NumElemPerLane / 2;
5305 unsigned BitsPerElem = 2;
5306 unsigned IndexMask = 0x3;
5307 unsigned MaskBits = 8;
5308 unsigned Lane = DstIdx / NumElemPerLane;
5309 unsigned ElemInLane = DstIdx % NumElemPerLane;
5310 unsigned LaneOffset = Lane * NumElemPerLane;
5311 unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0;
5312 unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
5313 unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
5314 return std::pair<unsigned, int>{SrcIdx,
5315 static_cast<int>(LaneOffset + Index)};
5316 });
5317 case X86::BI__builtin_ia32_shufpd:
5318 case X86::BI__builtin_ia32_shufpd256:
5319 case X86::BI__builtin_ia32_shufpd512:
5320 return interp__builtin_ia32_shuffle_generic(
5321 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5322 unsigned NumElemPerLane = 2;
5323 unsigned NumSelectableElems = NumElemPerLane / 2;
5324 unsigned BitsPerElem = 1;
5325 unsigned IndexMask = 0x1;
5326 unsigned MaskBits = 8;
5327 unsigned Lane = DstIdx / NumElemPerLane;
5328 unsigned ElemInLane = DstIdx % NumElemPerLane;
5329 unsigned LaneOffset = Lane * NumElemPerLane;
5330 unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0;
5331 unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
5332 unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
5333 return std::pair<unsigned, int>{SrcIdx,
5334 static_cast<int>(LaneOffset + Index)};
5335 });
5336
5337 case X86::BI__builtin_ia32_vgf2p8affineinvqb_v16qi:
5338 case X86::BI__builtin_ia32_vgf2p8affineinvqb_v32qi:
5339 case X86::BI__builtin_ia32_vgf2p8affineinvqb_v64qi:
5340 return interp_builtin_ia32_gfni_affine(S, OpPC, Call, Inverse: true);
5341 case X86::BI__builtin_ia32_vgf2p8affineqb_v16qi:
5342 case X86::BI__builtin_ia32_vgf2p8affineqb_v32qi:
5343 case X86::BI__builtin_ia32_vgf2p8affineqb_v64qi:
5344 return interp_builtin_ia32_gfni_affine(S, OpPC, Call, Inverse: false);
5345
5346 case X86::BI__builtin_ia32_vgf2p8mulb_v16qi:
5347 case X86::BI__builtin_ia32_vgf2p8mulb_v32qi:
5348 case X86::BI__builtin_ia32_vgf2p8mulb_v64qi:
5349 return interp__builtin_ia32_gfni_mul(S, OpPC, Call);
5350
5351 case X86::BI__builtin_ia32_insertps128:
5352 return interp__builtin_ia32_shuffle_generic(
5353 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned Mask) {
5354 // Bits [3:0]: zero mask - if bit is set, zero this element
5355 if ((Mask & (1 << DstIdx)) != 0) {
5356 return std::pair<unsigned, int>{0, -1};
5357 }
5358 // Bits [7:6]: select element from source vector Y (0-3)
5359 // Bits [5:4]: select destination position (0-3)
5360 unsigned SrcElem = (Mask >> 6) & 0x3;
5361 unsigned DstElem = (Mask >> 4) & 0x3;
5362 if (DstIdx == DstElem) {
5363 // Insert element from source vector (B) at this position
5364 return std::pair<unsigned, int>{1, static_cast<int>(SrcElem)};
5365 } else {
5366 // Copy from destination vector (A)
5367 return std::pair<unsigned, int>{0, static_cast<int>(DstIdx)};
5368 }
5369 });
5370 case X86::BI__builtin_ia32_permvarsi256:
5371 case X86::BI__builtin_ia32_permvarsf256:
5372 case X86::BI__builtin_ia32_permvardf512:
5373 case X86::BI__builtin_ia32_permvardi512:
5374 case X86::BI__builtin_ia32_permvarhi128:
5375 return interp__builtin_ia32_shuffle_generic(
5376 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5377 int Offset = ShuffleMask & 0x7;
5378 return std::pair<unsigned, int>{0, Offset};
5379 });
5380 case X86::BI__builtin_ia32_permvarqi128:
5381 case X86::BI__builtin_ia32_permvarhi256:
5382 case X86::BI__builtin_ia32_permvarsi512:
5383 case X86::BI__builtin_ia32_permvarsf512:
5384 return interp__builtin_ia32_shuffle_generic(
5385 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5386 int Offset = ShuffleMask & 0xF;
5387 return std::pair<unsigned, int>{0, Offset};
5388 });
5389 case X86::BI__builtin_ia32_permvardi256:
5390 case X86::BI__builtin_ia32_permvardf256:
5391 return interp__builtin_ia32_shuffle_generic(
5392 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5393 int Offset = ShuffleMask & 0x3;
5394 return std::pair<unsigned, int>{0, Offset};
5395 });
5396 case X86::BI__builtin_ia32_permvarqi256:
5397 case X86::BI__builtin_ia32_permvarhi512:
5398 return interp__builtin_ia32_shuffle_generic(
5399 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5400 int Offset = ShuffleMask & 0x1F;
5401 return std::pair<unsigned, int>{0, Offset};
5402 });
5403 case X86::BI__builtin_ia32_permvarqi512:
5404 return interp__builtin_ia32_shuffle_generic(
5405 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5406 int Offset = ShuffleMask & 0x3F;
5407 return std::pair<unsigned, int>{0, Offset};
5408 });
5409 case X86::BI__builtin_ia32_vpermi2varq128:
5410 case X86::BI__builtin_ia32_vpermi2varpd128:
5411 return interp__builtin_ia32_shuffle_generic(
5412 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5413 int Offset = ShuffleMask & 0x1;
5414 unsigned SrcIdx = (ShuffleMask >> 1) & 0x1;
5415 return std::pair<unsigned, int>{SrcIdx, Offset};
5416 });
5417 case X86::BI__builtin_ia32_vpermi2vard128:
5418 case X86::BI__builtin_ia32_vpermi2varps128:
5419 case X86::BI__builtin_ia32_vpermi2varq256:
5420 case X86::BI__builtin_ia32_vpermi2varpd256:
5421 return interp__builtin_ia32_shuffle_generic(
5422 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5423 int Offset = ShuffleMask & 0x3;
5424 unsigned SrcIdx = (ShuffleMask >> 2) & 0x1;
5425 return std::pair<unsigned, int>{SrcIdx, Offset};
5426 });
5427 case X86::BI__builtin_ia32_vpermi2varhi128:
5428 case X86::BI__builtin_ia32_vpermi2vard256:
5429 case X86::BI__builtin_ia32_vpermi2varps256:
5430 case X86::BI__builtin_ia32_vpermi2varq512:
5431 case X86::BI__builtin_ia32_vpermi2varpd512:
5432 return interp__builtin_ia32_shuffle_generic(
5433 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5434 int Offset = ShuffleMask & 0x7;
5435 unsigned SrcIdx = (ShuffleMask >> 3) & 0x1;
5436 return std::pair<unsigned, int>{SrcIdx, Offset};
5437 });
5438 case X86::BI__builtin_ia32_vpermi2varqi128:
5439 case X86::BI__builtin_ia32_vpermi2varhi256:
5440 case X86::BI__builtin_ia32_vpermi2vard512:
5441 case X86::BI__builtin_ia32_vpermi2varps512:
5442 return interp__builtin_ia32_shuffle_generic(
5443 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5444 int Offset = ShuffleMask & 0xF;
5445 unsigned SrcIdx = (ShuffleMask >> 4) & 0x1;
5446 return std::pair<unsigned, int>{SrcIdx, Offset};
5447 });
5448 case X86::BI__builtin_ia32_vpermi2varqi256:
5449 case X86::BI__builtin_ia32_vpermi2varhi512:
5450 return interp__builtin_ia32_shuffle_generic(
5451 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5452 int Offset = ShuffleMask & 0x1F;
5453 unsigned SrcIdx = (ShuffleMask >> 5) & 0x1;
5454 return std::pair<unsigned, int>{SrcIdx, Offset};
5455 });
5456 case X86::BI__builtin_ia32_vpermi2varqi512:
5457 return interp__builtin_ia32_shuffle_generic(
5458 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5459 int Offset = ShuffleMask & 0x3F;
5460 unsigned SrcIdx = (ShuffleMask >> 6) & 0x1;
5461 return std::pair<unsigned, int>{SrcIdx, Offset};
5462 });
5463 case X86::BI__builtin_ia32_vperm2f128_pd256:
5464 case X86::BI__builtin_ia32_vperm2f128_ps256:
5465 case X86::BI__builtin_ia32_vperm2f128_si256:
5466 case X86::BI__builtin_ia32_permti256: {
5467 unsigned NumElements =
5468 Call->getArg(Arg: 0)->getType()->castAs<VectorType>()->getNumElements();
5469 unsigned PreservedBitsCnt = NumElements >> 2;
5470 return interp__builtin_ia32_shuffle_generic(
5471 S, OpPC, Call,
5472 GetSourceIndex: [PreservedBitsCnt](unsigned DstIdx, unsigned ShuffleMask) {
5473 unsigned ControlBitsCnt = DstIdx >> PreservedBitsCnt << 2;
5474 unsigned ControlBits = ShuffleMask >> ControlBitsCnt;
5475
5476 if (ControlBits & 0b1000)
5477 return std::make_pair(x: 0u, y: -1);
5478
5479 unsigned SrcVecIdx = (ControlBits & 0b10) >> 1;
5480 unsigned PreservedBitsMask = (1 << PreservedBitsCnt) - 1;
5481 int SrcIdx = ((ControlBits & 0b1) << PreservedBitsCnt) |
5482 (DstIdx & PreservedBitsMask);
5483 return std::make_pair(x&: SrcVecIdx, y&: SrcIdx);
5484 });
5485 }
5486 case X86::BI__builtin_ia32_pshufb128:
5487 case X86::BI__builtin_ia32_pshufb256:
5488 case X86::BI__builtin_ia32_pshufb512:
5489 return interp__builtin_ia32_shuffle_generic(
5490 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5491 uint8_t Ctlb = static_cast<uint8_t>(ShuffleMask);
5492 if (Ctlb & 0x80)
5493 return std::make_pair(x: 0, y: -1);
5494
5495 unsigned LaneBase = (DstIdx / 16) * 16;
5496 unsigned SrcOffset = Ctlb & 0x0F;
5497 unsigned SrcIdx = LaneBase + SrcOffset;
5498 return std::make_pair(x: 0, y: static_cast<int>(SrcIdx));
5499 });
5500
5501 case X86::BI__builtin_ia32_pshuflw:
5502 case X86::BI__builtin_ia32_pshuflw256:
5503 case X86::BI__builtin_ia32_pshuflw512:
5504 return interp__builtin_ia32_shuffle_generic(
5505 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5506 unsigned LaneBase = (DstIdx / 8) * 8;
5507 unsigned LaneIdx = DstIdx % 8;
5508 if (LaneIdx < 4) {
5509 unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3;
5510 return std::make_pair(x: 0, y: static_cast<int>(LaneBase + Sel));
5511 }
5512
5513 return std::make_pair(x: 0, y: static_cast<int>(DstIdx));
5514 });
5515
5516 case X86::BI__builtin_ia32_pshufhw:
5517 case X86::BI__builtin_ia32_pshufhw256:
5518 case X86::BI__builtin_ia32_pshufhw512:
5519 return interp__builtin_ia32_shuffle_generic(
5520 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5521 unsigned LaneBase = (DstIdx / 8) * 8;
5522 unsigned LaneIdx = DstIdx % 8;
5523 if (LaneIdx >= 4) {
5524 unsigned Sel = (ShuffleMask >> (2 * (LaneIdx - 4))) & 0x3;
5525 return std::make_pair(x: 0, y: static_cast<int>(LaneBase + 4 + Sel));
5526 }
5527
5528 return std::make_pair(x: 0, y: static_cast<int>(DstIdx));
5529 });
5530
5531 case X86::BI__builtin_ia32_pshufd:
5532 case X86::BI__builtin_ia32_pshufd256:
5533 case X86::BI__builtin_ia32_pshufd512:
5534 case X86::BI__builtin_ia32_vpermilps:
5535 case X86::BI__builtin_ia32_vpermilps256:
5536 case X86::BI__builtin_ia32_vpermilps512:
5537 return interp__builtin_ia32_shuffle_generic(
5538 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5539 unsigned LaneBase = (DstIdx / 4) * 4;
5540 unsigned LaneIdx = DstIdx % 4;
5541 unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3;
5542 return std::make_pair(x: 0, y: static_cast<int>(LaneBase + Sel));
5543 });
5544
5545 case X86::BI__builtin_ia32_vpermilvarpd:
5546 case X86::BI__builtin_ia32_vpermilvarpd256:
5547 case X86::BI__builtin_ia32_vpermilvarpd512:
5548 return interp__builtin_ia32_shuffle_generic(
5549 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5550 unsigned NumElemPerLane = 2;
5551 unsigned Lane = DstIdx / NumElemPerLane;
5552 unsigned Offset = ShuffleMask & 0b10 ? 1 : 0;
5553 return std::make_pair(
5554 x: 0, y: static_cast<int>(Lane * NumElemPerLane + Offset));
5555 });
5556
5557 case X86::BI__builtin_ia32_vpermilvarps:
5558 case X86::BI__builtin_ia32_vpermilvarps256:
5559 case X86::BI__builtin_ia32_vpermilvarps512:
5560 return interp__builtin_ia32_shuffle_generic(
5561 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5562 unsigned NumElemPerLane = 4;
5563 unsigned Lane = DstIdx / NumElemPerLane;
5564 unsigned Offset = ShuffleMask & 0b11;
5565 return std::make_pair(
5566 x: 0, y: static_cast<int>(Lane * NumElemPerLane + Offset));
5567 });
5568
5569 case X86::BI__builtin_ia32_vpermilpd:
5570 case X86::BI__builtin_ia32_vpermilpd256:
5571 case X86::BI__builtin_ia32_vpermilpd512:
5572 return interp__builtin_ia32_shuffle_generic(
5573 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned Control) {
5574 unsigned NumElemPerLane = 2;
5575 unsigned BitsPerElem = 1;
5576 unsigned MaskBits = 8;
5577 unsigned IndexMask = 0x1;
5578 unsigned Lane = DstIdx / NumElemPerLane;
5579 unsigned LaneOffset = Lane * NumElemPerLane;
5580 unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
5581 unsigned Index = (Control >> BitIndex) & IndexMask;
5582 return std::make_pair(x: 0, y: static_cast<int>(LaneOffset + Index));
5583 });
5584
5585 case X86::BI__builtin_ia32_permdf256:
5586 case X86::BI__builtin_ia32_permdi256:
5587 return interp__builtin_ia32_shuffle_generic(
5588 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned Control) {
5589 // permute4x64 operates on 4 64-bit elements
5590 // For element i (0-3), extract bits [2*i+1:2*i] from Control
5591 unsigned Index = (Control >> (2 * DstIdx)) & 0x3;
5592 return std::make_pair(x: 0, y: static_cast<int>(Index));
5593 });
5594
5595 case X86::BI__builtin_ia32_vpmultishiftqb128:
5596 case X86::BI__builtin_ia32_vpmultishiftqb256:
5597 case X86::BI__builtin_ia32_vpmultishiftqb512:
5598 return interp__builtin_ia32_multishiftqb(S, OpPC, Call);
5599 case X86::BI__builtin_ia32_kandqi:
5600 case X86::BI__builtin_ia32_kandhi:
5601 case X86::BI__builtin_ia32_kandsi:
5602 case X86::BI__builtin_ia32_kanddi:
5603 return interp__builtin_elementwise_int_binop(
5604 S, OpPC, Call,
5605 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS & RHS; });
5606
5607 case X86::BI__builtin_ia32_kandnqi:
5608 case X86::BI__builtin_ia32_kandnhi:
5609 case X86::BI__builtin_ia32_kandnsi:
5610 case X86::BI__builtin_ia32_kandndi:
5611 return interp__builtin_elementwise_int_binop(
5612 S, OpPC, Call,
5613 Fn: [](const APSInt &LHS, const APSInt &RHS) { return ~LHS & RHS; });
5614
5615 case X86::BI__builtin_ia32_korqi:
5616 case X86::BI__builtin_ia32_korhi:
5617 case X86::BI__builtin_ia32_korsi:
5618 case X86::BI__builtin_ia32_kordi:
5619 return interp__builtin_elementwise_int_binop(
5620 S, OpPC, Call,
5621 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS | RHS; });
5622
5623 case X86::BI__builtin_ia32_kxnorqi:
5624 case X86::BI__builtin_ia32_kxnorhi:
5625 case X86::BI__builtin_ia32_kxnorsi:
5626 case X86::BI__builtin_ia32_kxnordi:
5627 return interp__builtin_elementwise_int_binop(
5628 S, OpPC, Call,
5629 Fn: [](const APSInt &LHS, const APSInt &RHS) { return ~(LHS ^ RHS); });
5630
5631 case X86::BI__builtin_ia32_kxorqi:
5632 case X86::BI__builtin_ia32_kxorhi:
5633 case X86::BI__builtin_ia32_kxorsi:
5634 case X86::BI__builtin_ia32_kxordi:
5635 return interp__builtin_elementwise_int_binop(
5636 S, OpPC, Call,
5637 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS ^ RHS; });
5638
5639 case X86::BI__builtin_ia32_knotqi:
5640 case X86::BI__builtin_ia32_knothi:
5641 case X86::BI__builtin_ia32_knotsi:
5642 case X86::BI__builtin_ia32_knotdi:
5643 return interp__builtin_elementwise_int_unaryop(
5644 S, OpPC, Call, Fn: [](const APSInt &Src) { return ~Src; });
5645
5646 case X86::BI__builtin_ia32_kaddqi:
5647 case X86::BI__builtin_ia32_kaddhi:
5648 case X86::BI__builtin_ia32_kaddsi:
5649 case X86::BI__builtin_ia32_kadddi:
5650 return interp__builtin_elementwise_int_binop(
5651 S, OpPC, Call,
5652 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
5653
5654 case X86::BI__builtin_ia32_kmovb:
5655 case X86::BI__builtin_ia32_kmovw:
5656 case X86::BI__builtin_ia32_kmovd:
5657 case X86::BI__builtin_ia32_kmovq:
5658 return interp__builtin_elementwise_int_unaryop(
5659 S, OpPC, Call, Fn: [](const APSInt &Src) { return Src; });
5660
5661 case X86::BI__builtin_ia32_kunpckhi:
5662 case X86::BI__builtin_ia32_kunpckdi:
5663 case X86::BI__builtin_ia32_kunpcksi:
5664 return interp__builtin_elementwise_int_binop(
5665 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
5666 // Generic kunpack: extract lower half of each operand and concatenate
5667 // Result = A[HalfWidth-1:0] concat B[HalfWidth-1:0]
5668 unsigned BW = A.getBitWidth();
5669 return APSInt(A.trunc(width: BW / 2).concat(NewLSB: B.trunc(width: BW / 2)),
5670 A.isUnsigned());
5671 });
5672
5673 case X86::BI__builtin_ia32_phminposuw128:
5674 return interp__builtin_ia32_phminposuw(S, OpPC, Call);
5675
5676 case X86::BI__builtin_ia32_psraq128:
5677 case X86::BI__builtin_ia32_psraq256:
5678 case X86::BI__builtin_ia32_psraq512:
5679 case X86::BI__builtin_ia32_psrad128:
5680 case X86::BI__builtin_ia32_psrad256:
5681 case X86::BI__builtin_ia32_psrad512:
5682 case X86::BI__builtin_ia32_psraw128:
5683 case X86::BI__builtin_ia32_psraw256:
5684 case X86::BI__builtin_ia32_psraw512:
5685 return interp__builtin_ia32_shift_with_count(
5686 S, OpPC, Call,
5687 ShiftOp: [](const APInt &Elt, uint64_t Count) { return Elt.ashr(ShiftAmt: Count); },
5688 OverflowOp: [](const APInt &Elt, unsigned Width) { return Elt.ashr(ShiftAmt: Width - 1); });
5689
5690 case X86::BI__builtin_ia32_psllq128:
5691 case X86::BI__builtin_ia32_psllq256:
5692 case X86::BI__builtin_ia32_psllq512:
5693 case X86::BI__builtin_ia32_pslld128:
5694 case X86::BI__builtin_ia32_pslld256:
5695 case X86::BI__builtin_ia32_pslld512:
5696 case X86::BI__builtin_ia32_psllw128:
5697 case X86::BI__builtin_ia32_psllw256:
5698 case X86::BI__builtin_ia32_psllw512:
5699 return interp__builtin_ia32_shift_with_count(
5700 S, OpPC, Call,
5701 ShiftOp: [](const APInt &Elt, uint64_t Count) { return Elt.shl(shiftAmt: Count); },
5702 OverflowOp: [](const APInt &Elt, unsigned Width) { return APInt::getZero(numBits: Width); });
5703
5704 case X86::BI__builtin_ia32_psrlq128:
5705 case X86::BI__builtin_ia32_psrlq256:
5706 case X86::BI__builtin_ia32_psrlq512:
5707 case X86::BI__builtin_ia32_psrld128:
5708 case X86::BI__builtin_ia32_psrld256:
5709 case X86::BI__builtin_ia32_psrld512:
5710 case X86::BI__builtin_ia32_psrlw128:
5711 case X86::BI__builtin_ia32_psrlw256:
5712 case X86::BI__builtin_ia32_psrlw512:
5713 return interp__builtin_ia32_shift_with_count(
5714 S, OpPC, Call,
5715 ShiftOp: [](const APInt &Elt, uint64_t Count) { return Elt.lshr(shiftAmt: Count); },
5716 OverflowOp: [](const APInt &Elt, unsigned Width) { return APInt::getZero(numBits: Width); });
5717
5718 case X86::BI__builtin_ia32_pternlogd128_mask:
5719 case X86::BI__builtin_ia32_pternlogd256_mask:
5720 case X86::BI__builtin_ia32_pternlogd512_mask:
5721 case X86::BI__builtin_ia32_pternlogq128_mask:
5722 case X86::BI__builtin_ia32_pternlogq256_mask:
5723 case X86::BI__builtin_ia32_pternlogq512_mask:
5724 return interp__builtin_ia32_pternlog(S, OpPC, Call, /*MaskZ=*/false);
5725 case X86::BI__builtin_ia32_pternlogd128_maskz:
5726 case X86::BI__builtin_ia32_pternlogd256_maskz:
5727 case X86::BI__builtin_ia32_pternlogd512_maskz:
5728 case X86::BI__builtin_ia32_pternlogq128_maskz:
5729 case X86::BI__builtin_ia32_pternlogq256_maskz:
5730 case X86::BI__builtin_ia32_pternlogq512_maskz:
5731 return interp__builtin_ia32_pternlog(S, OpPC, Call, /*MaskZ=*/true);
5732 case Builtin::BI__builtin_elementwise_fshl:
5733 return interp__builtin_elementwise_triop(S, OpPC, Call,
5734 Fn: llvm::APIntOps::fshl);
5735 case Builtin::BI__builtin_elementwise_fshr:
5736 return interp__builtin_elementwise_triop(S, OpPC, Call,
5737 Fn: llvm::APIntOps::fshr);
5738
5739 case X86::BI__builtin_ia32_shuf_f32x4_256:
5740 case X86::BI__builtin_ia32_shuf_i32x4_256:
5741 case X86::BI__builtin_ia32_shuf_f64x2_256:
5742 case X86::BI__builtin_ia32_shuf_i64x2_256:
5743 case X86::BI__builtin_ia32_shuf_f32x4:
5744 case X86::BI__builtin_ia32_shuf_i32x4:
5745 case X86::BI__builtin_ia32_shuf_f64x2:
5746 case X86::BI__builtin_ia32_shuf_i64x2: {
5747 // Destination and sources A, B all have the same type.
5748 QualType VecQT = Call->getArg(Arg: 0)->getType();
5749 const auto *VecT = VecQT->castAs<VectorType>();
5750 unsigned NumElems = VecT->getNumElements();
5751 unsigned ElemBits = S.getASTContext().getTypeSize(T: VecT->getElementType());
5752 unsigned LaneBits = 128u;
5753 unsigned NumLanes = (NumElems * ElemBits) / LaneBits;
5754 unsigned NumElemsPerLane = LaneBits / ElemBits;
5755
5756 return interp__builtin_ia32_shuffle_generic(
5757 S, OpPC, Call,
5758 GetSourceIndex: [NumLanes, NumElemsPerLane](unsigned DstIdx, unsigned ShuffleMask) {
5759 // DstIdx determines source. ShuffleMask selects lane in source.
5760 unsigned BitsPerElem = NumLanes / 2;
5761 unsigned IndexMask = (1u << BitsPerElem) - 1;
5762 unsigned Lane = DstIdx / NumElemsPerLane;
5763 unsigned SrcIdx = (Lane < NumLanes / 2) ? 0 : 1;
5764 unsigned BitIdx = BitsPerElem * Lane;
5765 unsigned SrcLaneIdx = (ShuffleMask >> BitIdx) & IndexMask;
5766 unsigned ElemInLane = DstIdx % NumElemsPerLane;
5767 unsigned IdxToPick = SrcLaneIdx * NumElemsPerLane + ElemInLane;
5768 return std::pair<unsigned, int>{SrcIdx, IdxToPick};
5769 });
5770 }
5771
5772 case X86::BI__builtin_ia32_insertf32x4_256:
5773 case X86::BI__builtin_ia32_inserti32x4_256:
5774 case X86::BI__builtin_ia32_insertf64x2_256:
5775 case X86::BI__builtin_ia32_inserti64x2_256:
5776 case X86::BI__builtin_ia32_insertf32x4:
5777 case X86::BI__builtin_ia32_inserti32x4:
5778 case X86::BI__builtin_ia32_insertf64x2_512:
5779 case X86::BI__builtin_ia32_inserti64x2_512:
5780 case X86::BI__builtin_ia32_insertf32x8:
5781 case X86::BI__builtin_ia32_inserti32x8:
5782 case X86::BI__builtin_ia32_insertf64x4:
5783 case X86::BI__builtin_ia32_inserti64x4:
5784 case X86::BI__builtin_ia32_vinsertf128_ps256:
5785 case X86::BI__builtin_ia32_vinsertf128_pd256:
5786 case X86::BI__builtin_ia32_vinsertf128_si256:
5787 case X86::BI__builtin_ia32_insert128i256:
5788 return interp__builtin_x86_insert_subvector(S, OpPC, Call, ID: BuiltinID);
5789
5790 case clang::X86::BI__builtin_ia32_vcvtps2ph:
5791 case clang::X86::BI__builtin_ia32_vcvtps2ph256:
5792 return interp__builtin_ia32_vcvtps2ph(S, OpPC, Call);
5793
5794 case X86::BI__builtin_ia32_vec_ext_v4hi:
5795 case X86::BI__builtin_ia32_vec_ext_v16qi:
5796 case X86::BI__builtin_ia32_vec_ext_v8hi:
5797 case X86::BI__builtin_ia32_vec_ext_v4si:
5798 case X86::BI__builtin_ia32_vec_ext_v2di:
5799 case X86::BI__builtin_ia32_vec_ext_v32qi:
5800 case X86::BI__builtin_ia32_vec_ext_v16hi:
5801 case X86::BI__builtin_ia32_vec_ext_v8si:
5802 case X86::BI__builtin_ia32_vec_ext_v4di:
5803 case X86::BI__builtin_ia32_vec_ext_v4sf:
5804 return interp__builtin_vec_ext(S, OpPC, Call, ID: BuiltinID);
5805
5806 case X86::BI__builtin_ia32_vec_set_v4hi:
5807 case X86::BI__builtin_ia32_vec_set_v16qi:
5808 case X86::BI__builtin_ia32_vec_set_v8hi:
5809 case X86::BI__builtin_ia32_vec_set_v4si:
5810 case X86::BI__builtin_ia32_vec_set_v2di:
5811 case X86::BI__builtin_ia32_vec_set_v32qi:
5812 case X86::BI__builtin_ia32_vec_set_v16hi:
5813 case X86::BI__builtin_ia32_vec_set_v8si:
5814 case X86::BI__builtin_ia32_vec_set_v4di:
5815 return interp__builtin_vec_set(S, OpPC, Call, ID: BuiltinID);
5816
5817 case X86::BI__builtin_ia32_cvtb2mask128:
5818 case X86::BI__builtin_ia32_cvtb2mask256:
5819 case X86::BI__builtin_ia32_cvtb2mask512:
5820 case X86::BI__builtin_ia32_cvtw2mask128:
5821 case X86::BI__builtin_ia32_cvtw2mask256:
5822 case X86::BI__builtin_ia32_cvtw2mask512:
5823 case X86::BI__builtin_ia32_cvtd2mask128:
5824 case X86::BI__builtin_ia32_cvtd2mask256:
5825 case X86::BI__builtin_ia32_cvtd2mask512:
5826 case X86::BI__builtin_ia32_cvtq2mask128:
5827 case X86::BI__builtin_ia32_cvtq2mask256:
5828 case X86::BI__builtin_ia32_cvtq2mask512:
5829 return interp__builtin_ia32_cvt_vec2mask(S, OpPC, Call, ID: BuiltinID);
5830
5831 case X86::BI__builtin_ia32_cvtmask2b128:
5832 case X86::BI__builtin_ia32_cvtmask2b256:
5833 case X86::BI__builtin_ia32_cvtmask2b512:
5834 case X86::BI__builtin_ia32_cvtmask2w128:
5835 case X86::BI__builtin_ia32_cvtmask2w256:
5836 case X86::BI__builtin_ia32_cvtmask2w512:
5837 case X86::BI__builtin_ia32_cvtmask2d128:
5838 case X86::BI__builtin_ia32_cvtmask2d256:
5839 case X86::BI__builtin_ia32_cvtmask2d512:
5840 case X86::BI__builtin_ia32_cvtmask2q128:
5841 case X86::BI__builtin_ia32_cvtmask2q256:
5842 case X86::BI__builtin_ia32_cvtmask2q512:
5843 return interp__builtin_ia32_cvt_mask2vec(S, OpPC, Call, ID: BuiltinID);
5844
5845 case X86::BI__builtin_ia32_cvtsd2ss:
5846 return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, HasRoundingMask: false);
5847
5848 case X86::BI__builtin_ia32_cvtsd2ss_round_mask:
5849 return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, HasRoundingMask: true);
5850
5851 case X86::BI__builtin_ia32_cvtpd2ps:
5852 case X86::BI__builtin_ia32_cvtpd2ps256:
5853 return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, IsMasked: false, HasRounding: false);
5854 case X86::BI__builtin_ia32_cvtpd2ps_mask:
5855 return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, IsMasked: true, HasRounding: false);
5856 case X86::BI__builtin_ia32_cvtpd2ps512_mask:
5857 return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, IsMasked: true, HasRounding: true);
5858
5859 case X86::BI__builtin_ia32_cmpb128_mask:
5860 case X86::BI__builtin_ia32_cmpw128_mask:
5861 case X86::BI__builtin_ia32_cmpd128_mask:
5862 case X86::BI__builtin_ia32_cmpq128_mask:
5863 case X86::BI__builtin_ia32_cmpb256_mask:
5864 case X86::BI__builtin_ia32_cmpw256_mask:
5865 case X86::BI__builtin_ia32_cmpd256_mask:
5866 case X86::BI__builtin_ia32_cmpq256_mask:
5867 case X86::BI__builtin_ia32_cmpb512_mask:
5868 case X86::BI__builtin_ia32_cmpw512_mask:
5869 case X86::BI__builtin_ia32_cmpd512_mask:
5870 case X86::BI__builtin_ia32_cmpq512_mask:
5871 return interp__builtin_ia32_cmp_mask(S, OpPC, Call, ID: BuiltinID,
5872 /*IsUnsigned=*/false);
5873
5874 case X86::BI__builtin_ia32_ucmpb128_mask:
5875 case X86::BI__builtin_ia32_ucmpw128_mask:
5876 case X86::BI__builtin_ia32_ucmpd128_mask:
5877 case X86::BI__builtin_ia32_ucmpq128_mask:
5878 case X86::BI__builtin_ia32_ucmpb256_mask:
5879 case X86::BI__builtin_ia32_ucmpw256_mask:
5880 case X86::BI__builtin_ia32_ucmpd256_mask:
5881 case X86::BI__builtin_ia32_ucmpq256_mask:
5882 case X86::BI__builtin_ia32_ucmpb512_mask:
5883 case X86::BI__builtin_ia32_ucmpw512_mask:
5884 case X86::BI__builtin_ia32_ucmpd512_mask:
5885 case X86::BI__builtin_ia32_ucmpq512_mask:
5886 return interp__builtin_ia32_cmp_mask(S, OpPC, Call, ID: BuiltinID,
5887 /*IsUnsigned=*/true);
5888
5889 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
5890 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
5891 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
5892 return interp__builtin_ia32_shufbitqmb_mask(S, OpPC, Call);
5893
5894 case X86::BI__builtin_ia32_pslldqi128_byteshift:
5895 case X86::BI__builtin_ia32_pslldqi256_byteshift:
5896 case X86::BI__builtin_ia32_pslldqi512_byteshift:
5897 // These SLLDQ intrinsics always operate on byte elements (8 bits).
5898 // The lane width is hardcoded to 16 to match the SIMD register size,
5899 // but the algorithm processes one byte per iteration,
5900 // so APInt(8, ...) is correct and intentional.
5901 return interp__builtin_ia32_shuffle_generic(
5902 S, OpPC, Call,
5903 GetSourceIndex: [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> {
5904 unsigned LaneBase = (DstIdx / 16) * 16;
5905 unsigned LaneIdx = DstIdx % 16;
5906 if (LaneIdx < Shift)
5907 return std::make_pair(x: 0, y: -1);
5908
5909 return std::make_pair(x: 0,
5910 y: static_cast<int>(LaneBase + LaneIdx - Shift));
5911 });
5912
5913 case X86::BI__builtin_ia32_psrldqi128_byteshift:
5914 case X86::BI__builtin_ia32_psrldqi256_byteshift:
5915 case X86::BI__builtin_ia32_psrldqi512_byteshift:
5916 // These SRLDQ intrinsics always operate on byte elements (8 bits).
5917 // The lane width is hardcoded to 16 to match the SIMD register size,
5918 // but the algorithm processes one byte per iteration,
5919 // so APInt(8, ...) is correct and intentional.
5920 return interp__builtin_ia32_shuffle_generic(
5921 S, OpPC, Call,
5922 GetSourceIndex: [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> {
5923 unsigned LaneBase = (DstIdx / 16) * 16;
5924 unsigned LaneIdx = DstIdx % 16;
5925 if (LaneIdx + Shift < 16)
5926 return std::make_pair(x: 0,
5927 y: static_cast<int>(LaneBase + LaneIdx + Shift));
5928
5929 return std::make_pair(x: 0, y: -1);
5930 });
5931
5932 case X86::BI__builtin_ia32_palignr128:
5933 case X86::BI__builtin_ia32_palignr256:
5934 case X86::BI__builtin_ia32_palignr512:
5935 return interp__builtin_ia32_shuffle_generic(
5936 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned Shift) {
5937 // Default to -1 → zero-fill this destination element
5938 unsigned VecIdx = 1;
5939 int ElemIdx = -1;
5940
5941 int Lane = DstIdx / 16;
5942 int Offset = DstIdx % 16;
5943
5944 // Elements come from VecB first, then VecA after the shift boundary
5945 unsigned ShiftedIdx = Offset + (Shift & 0xFF);
5946 if (ShiftedIdx < 16) { // from VecB
5947 ElemIdx = ShiftedIdx + (Lane * 16);
5948 } else if (ShiftedIdx < 32) { // from VecA
5949 VecIdx = 0;
5950 ElemIdx = (ShiftedIdx - 16) + (Lane * 16);
5951 }
5952
5953 return std::pair<unsigned, int>{VecIdx, ElemIdx};
5954 });
5955
5956 case X86::BI__builtin_ia32_alignd128:
5957 case X86::BI__builtin_ia32_alignd256:
5958 case X86::BI__builtin_ia32_alignd512:
5959 case X86::BI__builtin_ia32_alignq128:
5960 case X86::BI__builtin_ia32_alignq256:
5961 case X86::BI__builtin_ia32_alignq512: {
5962 unsigned NumElems = Call->getType()->castAs<VectorType>()->getNumElements();
5963 return interp__builtin_ia32_shuffle_generic(
5964 S, OpPC, Call, GetSourceIndex: [NumElems](unsigned DstIdx, unsigned Shift) {
5965 unsigned Imm = Shift & 0xFF;
5966 unsigned EffectiveShift = Imm & (NumElems - 1);
5967 unsigned SourcePos = DstIdx + EffectiveShift;
5968 unsigned VecIdx = SourcePos < NumElems ? 1u : 0u;
5969 unsigned ElemIdx = SourcePos & (NumElems - 1);
5970 return std::pair<unsigned, int>{VecIdx, static_cast<int>(ElemIdx)};
5971 });
5972 }
5973
5974 case clang::X86::BI__builtin_ia32_minps:
5975 case clang::X86::BI__builtin_ia32_minpd:
5976 case clang::X86::BI__builtin_ia32_minph128:
5977 case clang::X86::BI__builtin_ia32_minph256:
5978 case clang::X86::BI__builtin_ia32_minps256:
5979 case clang::X86::BI__builtin_ia32_minpd256:
5980 case clang::X86::BI__builtin_ia32_minps512:
5981 case clang::X86::BI__builtin_ia32_minpd512:
5982 case clang::X86::BI__builtin_ia32_minph512:
5983 return interp__builtin_elementwise_fp_binop(
5984 S, OpPC, Call,
5985 Fn: [](const APFloat &A, const APFloat &B,
5986 std::optional<APSInt>) -> std::optional<APFloat> {
5987 if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
5988 B.isInfinity() || B.isDenormal())
5989 return std::nullopt;
5990 if (A.isZero() && B.isZero())
5991 return B;
5992 return llvm::minimum(A, B);
5993 });
5994
5995 case clang::X86::BI__builtin_ia32_minss:
5996 case clang::X86::BI__builtin_ia32_minsd:
5997 return interp__builtin_elementwise_fp_binop(
5998 S, OpPC, Call,
5999 Fn: [](const APFloat &A, const APFloat &B,
6000 std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
6001 return EvalScalarMinMaxFp(A, B, RoundingMode, /*IsMin=*/true);
6002 },
6003 /*IsScalar=*/true);
6004
6005 case clang::X86::BI__builtin_ia32_minsd_round_mask:
6006 case clang::X86::BI__builtin_ia32_minss_round_mask:
6007 case clang::X86::BI__builtin_ia32_minsh_round_mask:
6008 case clang::X86::BI__builtin_ia32_maxsd_round_mask:
6009 case clang::X86::BI__builtin_ia32_maxss_round_mask:
6010 case clang::X86::BI__builtin_ia32_maxsh_round_mask: {
6011 bool IsMin = BuiltinID == clang::X86::BI__builtin_ia32_minsd_round_mask ||
6012 BuiltinID == clang::X86::BI__builtin_ia32_minss_round_mask ||
6013 BuiltinID == clang::X86::BI__builtin_ia32_minsh_round_mask;
6014 return interp__builtin_scalar_fp_round_mask_binop(
6015 S, OpPC, Call,
6016 Fn: [IsMin](const APFloat &A, const APFloat &B,
6017 std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
6018 return EvalScalarMinMaxFp(A, B, RoundingMode, IsMin);
6019 });
6020 }
6021
6022 case clang::X86::BI__builtin_ia32_maxps:
6023 case clang::X86::BI__builtin_ia32_maxpd:
6024 case clang::X86::BI__builtin_ia32_maxph128:
6025 case clang::X86::BI__builtin_ia32_maxph256:
6026 case clang::X86::BI__builtin_ia32_maxps256:
6027 case clang::X86::BI__builtin_ia32_maxpd256:
6028 case clang::X86::BI__builtin_ia32_maxps512:
6029 case clang::X86::BI__builtin_ia32_maxpd512:
6030 case clang::X86::BI__builtin_ia32_maxph512:
6031 return interp__builtin_elementwise_fp_binop(
6032 S, OpPC, Call,
6033 Fn: [](const APFloat &A, const APFloat &B,
6034 std::optional<APSInt>) -> std::optional<APFloat> {
6035 if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
6036 B.isInfinity() || B.isDenormal())
6037 return std::nullopt;
6038 if (A.isZero() && B.isZero())
6039 return B;
6040 return llvm::maximum(A, B);
6041 });
6042
6043 case clang::X86::BI__builtin_ia32_maxss:
6044 case clang::X86::BI__builtin_ia32_maxsd:
6045 return interp__builtin_elementwise_fp_binop(
6046 S, OpPC, Call,
6047 Fn: [](const APFloat &A, const APFloat &B,
6048 std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
6049 return EvalScalarMinMaxFp(A, B, RoundingMode, /*IsMin=*/false);
6050 },
6051 /*IsScalar=*/true);
6052
6053 default:
6054 S.FFDiag(Loc: S.Current->getLocation(PC: OpPC),
6055 DiagId: diag::note_invalid_subexpr_in_const_expr)
6056 << S.Current->getRange(PC: OpPC);
6057
6058 return false;
6059 }
6060
6061 llvm_unreachable("Unhandled builtin ID");
6062}
6063
6064bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E,
6065 ArrayRef<int64_t> ArrayIndices, int64_t &IntResult) {
6066 S.getASTContext().recordOffsetOfEvaluation(E);
6067 CharUnits Result;
6068 unsigned N = E->getNumComponents();
6069 assert(N > 0);
6070
6071 unsigned ArrayIndex = 0;
6072 QualType CurrentType = E->getTypeSourceInfo()->getType();
6073 for (unsigned I = 0; I != N; ++I) {
6074 const OffsetOfNode &Node = E->getComponent(Idx: I);
6075 switch (Node.getKind()) {
6076 case OffsetOfNode::Field: {
6077 const FieldDecl *MemberDecl = Node.getField();
6078 const auto *RD = CurrentType->getAsRecordDecl();
6079 if (!RD || RD->isInvalidDecl())
6080 return false;
6081 const ASTRecordLayout &RL = S.getASTContext().getASTRecordLayout(D: RD);
6082 unsigned FieldIndex = MemberDecl->getFieldIndex();
6083 assert(FieldIndex < RL.getFieldCount() && "offsetof field in wrong type");
6084 Result +=
6085 S.getASTContext().toCharUnitsFromBits(BitSize: RL.getFieldOffset(FieldNo: FieldIndex));
6086 CurrentType = MemberDecl->getType().getNonReferenceType();
6087 break;
6088 }
6089 case OffsetOfNode::Array: {
6090 // When generating bytecode, we put all the index expressions as Sint64 on
6091 // the stack.
6092 int64_t Index = ArrayIndices[ArrayIndex];
6093 const ArrayType *AT = S.getASTContext().getAsArrayType(T: CurrentType);
6094 if (!AT)
6095 return false;
6096 CurrentType = AT->getElementType();
6097 CharUnits ElementSize = S.getASTContext().getTypeSizeInChars(T: CurrentType);
6098 Result += Index * ElementSize;
6099 ++ArrayIndex;
6100 break;
6101 }
6102 case OffsetOfNode::Base: {
6103 const CXXBaseSpecifier *BaseSpec = Node.getBase();
6104 if (BaseSpec->isVirtual())
6105 return false;
6106
6107 // Find the layout of the class whose base we are looking into.
6108 const auto *RD = CurrentType->getAsCXXRecordDecl();
6109 if (!RD || RD->isInvalidDecl())
6110 return false;
6111 const ASTRecordLayout &RL = S.getASTContext().getASTRecordLayout(D: RD);
6112
6113 // Find the base class itself.
6114 CurrentType = BaseSpec->getType();
6115 const auto *BaseRD = CurrentType->getAsCXXRecordDecl();
6116 if (!BaseRD)
6117 return false;
6118
6119 // Add the offset to the base.
6120 Result += RL.getBaseClassOffset(Base: BaseRD);
6121 break;
6122 }
6123 case OffsetOfNode::Identifier:
6124 llvm_unreachable("Dependent OffsetOfExpr?");
6125 }
6126 }
6127
6128 IntResult = Result.getQuantity();
6129
6130 return true;
6131}
6132
6133bool SetThreeWayComparisonField(InterpState &S, CodePtr OpPC,
6134 const Pointer &Ptr, const APSInt &IntValue) {
6135
6136 const Record *R = Ptr.getRecord();
6137 assert(R);
6138 assert(R->getNumFields() == 1);
6139
6140 unsigned FieldOffset = R->getField(I: 0u)->Offset;
6141 const Pointer &FieldPtr = Ptr.atField(Off: FieldOffset);
6142 PrimType FieldT = *S.getContext().classify(T: FieldPtr.getType());
6143
6144 INT_TYPE_SWITCH(FieldT,
6145 FieldPtr.deref<T>() = T::from(IntValue.getSExtValue()));
6146 FieldPtr.initialize();
6147 return true;
6148}
6149
6150static void zeroAll(Pointer &Dest) {
6151 const Descriptor *Desc = Dest.getFieldDesc();
6152
6153 if (Desc->isPrimitive()) {
6154 TYPE_SWITCH(Desc->getPrimType(), {
6155 Dest.deref<T>().~T();
6156 new (&Dest.deref<T>()) T();
6157 });
6158 return;
6159 }
6160
6161 if (Desc->isRecord()) {
6162 const Record *R = Desc->ElemRecord;
6163 for (const Record::Field &F : R->fields()) {
6164 Pointer FieldPtr = Dest.atField(Off: F.Offset);
6165 zeroAll(Dest&: FieldPtr);
6166 }
6167 return;
6168 }
6169
6170 if (Desc->isPrimitiveArray()) {
6171 for (unsigned I = 0, N = Desc->getNumElems(); I != N; ++I) {
6172 TYPE_SWITCH(Desc->getPrimType(), {
6173 Dest.deref<T>().~T();
6174 new (&Dest.deref<T>()) T();
6175 });
6176 }
6177 return;
6178 }
6179
6180 if (Desc->isCompositeArray()) {
6181 for (unsigned I = 0, N = Desc->getNumElems(); I != N; ++I) {
6182 Pointer ElemPtr = Dest.atIndex(Idx: I).narrow();
6183 zeroAll(Dest&: ElemPtr);
6184 }
6185 return;
6186 }
6187}
6188
6189static bool copyComposite(InterpState &S, CodePtr OpPC, const Pointer &Src,
6190 Pointer &Dest, bool Activate);
6191static bool copyRecord(InterpState &S, CodePtr OpPC, const Pointer &Src,
6192 Pointer &Dest, bool Activate = false) {
6193 [[maybe_unused]] const Descriptor *SrcDesc = Src.getFieldDesc();
6194 const Descriptor *DestDesc = Dest.getFieldDesc();
6195
6196 auto copyField = [&](const Record::Field &F, bool Activate) -> bool {
6197 Pointer DestField = Dest.atField(Off: F.Offset);
6198 if (OptPrimType FT = S.Ctx.classify(T: F.Decl->getType())) {
6199 TYPE_SWITCH(*FT, {
6200 DestField.deref<T>() = Src.atField(F.Offset).deref<T>();
6201 if (Src.atField(F.Offset).isInitialized())
6202 DestField.initialize();
6203 if (Activate)
6204 DestField.activate();
6205 });
6206 return true;
6207 }
6208 // Composite field.
6209 return copyComposite(S, OpPC, Src: Src.atField(Off: F.Offset), Dest&: DestField, Activate);
6210 };
6211
6212 assert(SrcDesc->isRecord());
6213 assert(SrcDesc->ElemRecord == DestDesc->ElemRecord);
6214 const Record *R = DestDesc->ElemRecord;
6215 for (const Record::Field &F : R->fields()) {
6216 if (R->isUnion()) {
6217 // For unions, only copy the active field. Zero all others.
6218 const Pointer &SrcField = Src.atField(Off: F.Offset);
6219 if (SrcField.isActive()) {
6220 if (!copyField(F, /*Activate=*/true))
6221 return false;
6222 } else {
6223 if (!CheckMutable(S, OpPC, Ptr: Src.atField(Off: F.Offset)))
6224 return false;
6225 Pointer DestField = Dest.atField(Off: F.Offset);
6226 zeroAll(Dest&: DestField);
6227 }
6228 } else {
6229 if (!copyField(F, Activate))
6230 return false;
6231 }
6232 }
6233
6234 for (const Record::Base &B : R->bases()) {
6235 Pointer DestBase = Dest.atField(Off: B.Offset);
6236 if (!copyRecord(S, OpPC, Src: Src.atField(Off: B.Offset), Dest&: DestBase, Activate))
6237 return false;
6238 }
6239
6240 Dest.initialize();
6241 return true;
6242}
6243
6244static bool copyComposite(InterpState &S, CodePtr OpPC, const Pointer &Src,
6245 Pointer &Dest, bool Activate = false) {
6246 assert(Src.isLive() && Dest.isLive());
6247
6248 [[maybe_unused]] const Descriptor *SrcDesc = Src.getFieldDesc();
6249 const Descriptor *DestDesc = Dest.getFieldDesc();
6250
6251 assert(!DestDesc->isPrimitive() && !SrcDesc->isPrimitive());
6252
6253 if (DestDesc->isPrimitiveArray()) {
6254 assert(SrcDesc->isPrimitiveArray());
6255 assert(SrcDesc->getNumElems() == DestDesc->getNumElems());
6256 PrimType ET = DestDesc->getPrimType();
6257 for (unsigned I = 0, N = DestDesc->getNumElems(); I != N; ++I) {
6258 Pointer DestElem = Dest.atIndex(Idx: I);
6259 TYPE_SWITCH(ET, {
6260 DestElem.deref<T>() = Src.elem<T>(I);
6261 DestElem.initialize();
6262 });
6263 }
6264 return true;
6265 }
6266
6267 if (DestDesc->isCompositeArray()) {
6268 assert(SrcDesc->isCompositeArray());
6269 assert(SrcDesc->getNumElems() == DestDesc->getNumElems());
6270 for (unsigned I = 0, N = DestDesc->getNumElems(); I != N; ++I) {
6271 const Pointer &SrcElem = Src.atIndex(Idx: I).narrow();
6272 Pointer DestElem = Dest.atIndex(Idx: I).narrow();
6273 if (!copyComposite(S, OpPC, Src: SrcElem, Dest&: DestElem, Activate))
6274 return false;
6275 }
6276 return true;
6277 }
6278
6279 if (DestDesc->isRecord())
6280 return copyRecord(S, OpPC, Src, Dest, Activate);
6281 return Invalid(S, OpPC);
6282}
6283
6284bool DoMemcpy(InterpState &S, CodePtr OpPC, const Pointer &Src, Pointer &Dest) {
6285 if (!Src.isBlockPointer() || Src.getFieldDesc()->isPrimitive())
6286 return false;
6287 if (!Dest.isBlockPointer() || Dest.getFieldDesc()->isPrimitive())
6288 return false;
6289
6290 return copyComposite(S, OpPC, Src, Dest);
6291}
6292
6293} // namespace interp
6294} // namespace clang
6295