1//===--- InterpBuiltin.cpp - Interpreter for the constexpr VM ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#include "../ExprConstShared.h"
9#include "Boolean.h"
10#include "Char.h"
11#include "EvalEmitter.h"
12#include "InterpBuiltinBitCast.h"
13#include "InterpHelpers.h"
14#include "PrimType.h"
15#include "Program.h"
16#include "clang/AST/InferAlloc.h"
17#include "clang/AST/OSLog.h"
18#include "clang/AST/RecordLayout.h"
19#include "clang/Basic/Builtins.h"
20#include "clang/Basic/TargetBuiltins.h"
21#include "clang/Basic/TargetInfo.h"
22#include "llvm/ADT/StringExtras.h"
23#include "llvm/Support/AllocToken.h"
24#include "llvm/Support/ErrorHandling.h"
25#include "llvm/Support/SipHash.h"
26
27namespace clang {
28namespace interp {
29
30[[maybe_unused]] static bool isNoopBuiltin(unsigned ID) {
31 switch (ID) {
32 case Builtin::BIas_const:
33 case Builtin::BIforward:
34 case Builtin::BIforward_like:
35 case Builtin::BImove:
36 case Builtin::BImove_if_noexcept:
37 case Builtin::BIaddressof:
38 case Builtin::BI__addressof:
39 case Builtin::BI__builtin_addressof:
40 case Builtin::BI__builtin_launder:
41 return true;
42 default:
43 return false;
44 }
45 return false;
46}
47
48static void discard(InterpStack &Stk, PrimType T) {
49 TYPE_SWITCH(T, { Stk.discard<T>(); });
50}
51
52static bool popToUInt64(const InterpState &S, const Expr *E, uint64_t &Out) {
53 INT_TYPE_SWITCH(*S.getContext().classify(E->getType()), {
54 const auto &Val = S.Stk.pop<T>();
55 if (!Val.isNumber())
56 return false;
57 Out = static_cast<uint64_t>(Val);
58 return true;
59 });
60}
61
62static bool popToAPSInt(InterpStack &Stk, PrimType T, APSInt &Out) {
63 INT_TYPE_SWITCH(T, {
64 const auto &Val = Stk.pop<T>();
65 if (!Val.isNumber())
66 return false;
67 Out = Val.toAPSInt();
68 return true;
69 });
70}
71
72static bool popToAPSInt(InterpState &S, const Expr *E, APSInt &Out) {
73 return popToAPSInt(Stk&: S.Stk, T: *S.getContext().classify(T: E->getType()), Out);
74}
75static bool popToAPSInt(InterpState &S, QualType T, APSInt &Out) {
76 return popToAPSInt(Stk&: S.Stk, T: *S.getContext().classify(T), Out);
77}
78
79/// Check for common reasons a pointer can't be read from, which
80/// are usually not diagnosed in a builtin function.
81static bool isReadable(const Pointer &P) {
82 if (P.isDummy())
83 return false;
84 if (!P.isBlockPointer())
85 return false;
86 if (!P.isLive())
87 return false;
88 if (P.isOnePastEnd())
89 return false;
90 return true;
91}
92
93/// Pushes \p Val on the stack as the type given by \p QT.
94static void pushInteger(InterpState &S, const APSInt &Val, QualType QT) {
95 assert(QT->isSignedIntegerOrEnumerationType() ||
96 QT->isUnsignedIntegerOrEnumerationType());
97 OptPrimType T = *S.getContext().classify(T: QT);
98 assert(T);
99
100 if (T == PT_IntAPS) {
101 unsigned BitWidth = S.getASTContext().getIntWidth(T: QT);
102 auto Result = S.allocAP<IntegralAP<true>>(BitWidth);
103 Result.copy(V: Val.extOrTrunc(width: BitWidth));
104 S.Stk.push<IntegralAP<true>>(Args&: Result);
105 return;
106 }
107
108 if (T == PT_IntAP) {
109 unsigned BitWidth = S.getASTContext().getIntWidth(T: QT);
110 auto Result = S.allocAP<IntegralAP<false>>(BitWidth);
111 Result.copy(V: Val.extOrTrunc(width: BitWidth));
112 S.Stk.push<IntegralAP<false>>(Args&: Result);
113 return;
114 }
115
116 if (isSignedType(T: *T)) {
117 int64_t V = Val.getSExtValue();
118 INT_TYPE_SWITCH(*T, { S.Stk.push<T>(T::from(V)); });
119 } else {
120 assert(QT->isUnsignedIntegerOrEnumerationType());
121 uint64_t V = Val.getZExtValue();
122 INT_TYPE_SWITCH(*T, { S.Stk.push<T>(T::from(V)); });
123 }
124}
125
126template <typename T>
127static void pushInteger(InterpState &S, T Val, QualType QT) {
128 if constexpr (std::is_same_v<T, APInt>)
129 pushInteger(S, Val: APSInt(Val, !std::is_signed_v<T>), QT);
130 else if constexpr (std::is_same_v<T, APSInt>)
131 pushInteger(S, Val, QT);
132 else
133 pushInteger(S,
134 Val: APSInt(APInt(sizeof(T) * 8, static_cast<uint64_t>(Val),
135 std::is_signed_v<T>),
136 !std::is_signed_v<T>),
137 QT);
138}
139
140static void assignIntegral(InterpState &S, const Pointer &Dest, PrimType ValueT,
141 const APSInt &Value) {
142
143 if (ValueT == PT_IntAPS) {
144 Dest.deref<IntegralAP<true>>() =
145 S.allocAP<IntegralAP<true>>(BitWidth: Value.getBitWidth());
146 Dest.deref<IntegralAP<true>>().copy(V: Value);
147 } else if (ValueT == PT_IntAP) {
148 Dest.deref<IntegralAP<false>>() =
149 S.allocAP<IntegralAP<false>>(BitWidth: Value.getBitWidth());
150 Dest.deref<IntegralAP<false>>().copy(V: Value);
151 } else if (ValueT == PT_Bool) {
152 Dest.deref<Boolean>() = Boolean::from(Value: !Value.isZero());
153 } else {
154 INT_TYPE_SWITCH_NO_BOOL(
155 ValueT, { Dest.deref<T>() = T::from(static_cast<T>(Value)); });
156 }
157}
158
159static QualType getElemType(const Pointer &P) {
160 const Descriptor *Desc = P.getFieldDesc();
161 QualType T = Desc->getType();
162 if (Desc->isPrimitive())
163 return T;
164 if (T->isPointerType())
165 return T->castAs<PointerType>()->getPointeeType();
166 if (Desc->isArray())
167 return Desc->getElemQualType();
168 if (const auto *AT = T->getAsArrayTypeUnsafe())
169 return AT->getElementType();
170 return T;
171}
172
173static void diagnoseNonConstexprBuiltin(InterpState &S, CodePtr OpPC,
174 unsigned ID) {
175 if (!S.diagnosing())
176 return;
177
178 auto Loc = S.Current->getSource(PC: OpPC);
179 if (S.getLangOpts().CPlusPlus11)
180 S.CCEDiag(SI: Loc, DiagId: diag::note_constexpr_invalid_function)
181 << /*isConstexpr=*/0 << /*isConstructor=*/0
182 << S.getASTContext().BuiltinInfo.getQuotedName(ID);
183 else
184 S.CCEDiag(SI: Loc, DiagId: diag::note_invalid_subexpr_in_const_expr);
185}
186
187static llvm::APSInt convertBoolVectorToInt(const Pointer &Val) {
188 assert(Val.getFieldDesc()->isPrimitiveArray() &&
189 Val.getFieldDesc()->getElemQualType()->isBooleanType() &&
190 "Not a boolean vector");
191 unsigned NumElems = Val.getNumElems();
192
193 // Each element is one bit, so create an integer with NumElts bits.
194 llvm::APSInt Result(NumElems, 0);
195 for (unsigned I = 0; I != NumElems; ++I) {
196 if (Val.elem<bool>(I))
197 Result.setBit(I);
198 }
199
200 return Result;
201}
202
203// Strict double -> float conversion used for X86 PD2PS/cvtsd2ss intrinsics.
204// Reject NaN/Inf/Subnormal inputs and any lossy/inexact conversions.
205static bool convertDoubleToFloatStrict(APFloat Src, Floating &Dst,
206 InterpState &S, const Expr *DiagExpr) {
207 if (Src.isInfinity()) {
208 if (S.diagnosing())
209 S.CCEDiag(E: DiagExpr, DiagId: diag::note_constexpr_float_arithmetic) << 0;
210 return false;
211 }
212 if (Src.isNaN()) {
213 if (S.diagnosing())
214 S.CCEDiag(E: DiagExpr, DiagId: diag::note_constexpr_float_arithmetic) << 1;
215 return false;
216 }
217 APFloat Val = Src;
218 bool LosesInfo = false;
219 APFloat::opStatus Status = Val.convert(
220 ToSemantics: APFloat::IEEEsingle(), RM: APFloat::rmNearestTiesToEven, losesInfo: &LosesInfo);
221 if (LosesInfo || Val.isDenormal()) {
222 if (S.diagnosing())
223 S.CCEDiag(E: DiagExpr, DiagId: diag::note_constexpr_float_arithmetic_strict);
224 return false;
225 }
226 if (Status != APFloat::opOK) {
227 if (S.diagnosing())
228 S.CCEDiag(E: DiagExpr, DiagId: diag::note_invalid_subexpr_in_const_expr);
229 return false;
230 }
231 Dst.copy(F: Val);
232 return true;
233}
234
235static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC,
236 const InterpFrame *Frame,
237 const CallExpr *Call) {
238 unsigned Depth = S.Current->getDepth();
239 auto isStdCall = [](const FunctionDecl *F) -> bool {
240 return F && F->isInStdNamespace() && F->getIdentifier() &&
241 F->getIdentifier()->isStr(Str: "is_constant_evaluated");
242 };
243 const InterpFrame *Caller = Frame->Caller;
244 // The current frame is the one for __builtin_is_constant_evaluated.
245 // The one above that, potentially the one for std::is_constant_evaluated().
246 if (S.inConstantContext() && !S.checkingPotentialConstantExpression() &&
247 S.getEvalStatus().Diag &&
248 (Depth == 0 || (Depth == 1 && isStdCall(Frame->getCallee())))) {
249 if (Caller && isStdCall(Frame->getCallee())) {
250 const Expr *E = Caller->getExpr(PC: Caller->getRetPC());
251 S.report(Loc: E->getExprLoc(),
252 DiagId: diag::warn_is_constant_evaluated_always_true_constexpr)
253 << "std::is_constant_evaluated" << E->getSourceRange();
254 } else {
255 S.report(Loc: Call->getExprLoc(),
256 DiagId: diag::warn_is_constant_evaluated_always_true_constexpr)
257 << "__builtin_is_constant_evaluated" << Call->getSourceRange();
258 }
259 }
260
261 S.Stk.push<Boolean>(Args: Boolean::from(Value: S.inConstantContext()));
262 return true;
263}
264
265// __builtin_assume
266// __assume (MS extension)
267static bool interp__builtin_assume(InterpState &S, CodePtr OpPC,
268 const InterpFrame *Frame,
269 const CallExpr *Call) {
270 // Nothing to be done here since the argument is NOT evaluated.
271 assert(Call->getNumArgs() == 1);
272 return true;
273}
274
275static bool interp__builtin_strcmp(InterpState &S, CodePtr OpPC,
276 const InterpFrame *Frame,
277 const CallExpr *Call, unsigned ID) {
278 uint64_t Limit = ~static_cast<uint64_t>(0);
279 if (ID == Builtin::BIstrncmp || ID == Builtin::BI__builtin_strncmp ||
280 ID == Builtin::BIwcsncmp || ID == Builtin::BI__builtin_wcsncmp) {
281 if (!popToUInt64(S, E: Call->getArg(Arg: 2), Out&: Limit))
282 return false;
283 }
284
285 const Pointer &B = S.Stk.pop<Pointer>();
286 const Pointer &A = S.Stk.pop<Pointer>();
287 if (ID == Builtin::BIstrcmp || ID == Builtin::BIstrncmp ||
288 ID == Builtin::BIwcscmp || ID == Builtin::BIwcsncmp)
289 diagnoseNonConstexprBuiltin(S, OpPC, ID);
290
291 if (Limit == 0) {
292 pushInteger(S, Val: 0, QT: Call->getType());
293 return true;
294 }
295
296 if (!CheckLive(S, OpPC, Ptr: A, AK: AK_Read) || !CheckLive(S, OpPC, Ptr: B, AK: AK_Read))
297 return false;
298
299 if (A.isDummy() || B.isDummy())
300 return false;
301 if (!A.isBlockPointer() || !B.isBlockPointer())
302 return false;
303 if (!A.getFieldDesc()->isPrimitiveArray() ||
304 !B.getFieldDesc()->isPrimitiveArray())
305 return false;
306
307 bool IsWide = ID == Builtin::BIwcscmp || ID == Builtin::BIwcsncmp ||
308 ID == Builtin::BI__builtin_wcscmp ||
309 ID == Builtin::BI__builtin_wcsncmp;
310 assert(A.getFieldDesc()->isPrimitiveArray());
311 assert(B.getFieldDesc()->isPrimitiveArray());
312
313 // Different element types shouldn't happen, but with casts they can.
314 if (!S.getASTContext().hasSameUnqualifiedType(T1: getElemType(P: A), T2: getElemType(P: B)))
315 return false;
316
317 PrimType ElemT = *S.getContext().classify(T: getElemType(P: A));
318
319 auto returnResult = [&](int V) -> bool {
320 pushInteger(S, Val: V, QT: Call->getType());
321 return true;
322 };
323
324 unsigned IndexA = A.getIndex();
325 unsigned IndexB = B.getIndex();
326 uint64_t Steps = 0;
327 for (;; ++IndexA, ++IndexB, ++Steps) {
328
329 if (Steps >= Limit)
330 break;
331 PtrView PA = A.view().atIndex(Idx: IndexA);
332 PtrView PB = B.view().atIndex(Idx: IndexB);
333 if (!CheckRange(S, OpPC, Ptr: PA, AK: AK_Read) ||
334 !CheckRange(S, OpPC, Ptr: PB, AK: AK_Read)) {
335 return false;
336 }
337
338 if (IsWide) {
339 INT_TYPE_SWITCH(ElemT, {
340 T CA = PA.deref<T>();
341 T CB = PB.deref<T>();
342 if (CA > CB)
343 return returnResult(1);
344 if (CA < CB)
345 return returnResult(-1);
346 if (CA.isZero() || CB.isZero())
347 return returnResult(0);
348 });
349 continue;
350 }
351
352 uint8_t CA = PA.deref<uint8_t>();
353 uint8_t CB = PB.deref<uint8_t>();
354
355 if (CA > CB)
356 return returnResult(1);
357 if (CA < CB)
358 return returnResult(-1);
359 if (CA == 0 || CB == 0)
360 return returnResult(0);
361 }
362
363 return returnResult(0);
364}
365
366static bool interp__builtin_strlen(InterpState &S, CodePtr OpPC,
367 const InterpFrame *Frame,
368 const CallExpr *Call, unsigned ID) {
369 const Pointer &StrPtr = S.Stk.pop<Pointer>().expand();
370
371 if (ID == Builtin::BIstrlen || ID == Builtin::BIwcslen)
372 diagnoseNonConstexprBuiltin(S, OpPC, ID);
373
374 if (StrPtr.isConstexprUnknown())
375 return false;
376
377 if (!CheckArray(S, OpPC, Ptr: StrPtr))
378 return false;
379
380 if (!CheckLive(S, OpPC, Ptr: StrPtr, AK: AK_Read))
381 return false;
382
383 if (!StrPtr.isBlockPointer())
384 return false;
385
386 if (!CheckDummy(S, OpPC, B: StrPtr.block(), AK: AK_Read))
387 return false;
388
389 if (!StrPtr.getFieldDesc()->isPrimitiveArray())
390 return false;
391
392 assert(StrPtr.getFieldDesc()->isPrimitiveArray());
393 PrimType ElemT = StrPtr.getFieldDesc()->getPrimType();
394 unsigned ElemSize = StrPtr.getFieldDesc()->getElemDataSize();
395 if (ElemSize != 1 && ElemSize != 2 && ElemSize != 4)
396 return Invalid(S, OpPC);
397
398 if (ID == Builtin::BI__builtin_wcslen || ID == Builtin::BIwcslen) {
399 const ASTContext &AC = S.getASTContext();
400 unsigned WCharSize = AC.getTypeSizeInChars(T: AC.getWCharType()).getQuantity();
401 if (StrPtr.getFieldDesc()->getElemDataSize() != WCharSize)
402 return false;
403 }
404
405 size_t Len = 0;
406 for (size_t I = StrPtr.getIndex();; ++I, ++Len) {
407 PtrView ElemPtr = StrPtr.view().atIndex(Idx: I);
408
409 if (!CheckRange(S, OpPC, Ptr: ElemPtr, AK: AK_Read))
410 return false;
411
412 uint32_t Val;
413 FIXED_SIZE_INT_TYPE_SWITCH(
414 ElemT, { Val = static_cast<uint32_t>(ElemPtr.deref<T>()); });
415 if (Val == 0)
416 break;
417 }
418
419 pushInteger(S, Val: Len, QT: Call->getType());
420
421 return true;
422}
423
424static bool interp__builtin_nan(InterpState &S, CodePtr OpPC,
425 const InterpFrame *Frame, const CallExpr *Call,
426 bool Signaling) {
427 const Pointer &Arg = S.Stk.pop<Pointer>();
428
429 if (!CheckLoad(S, OpPC, Ptr: Arg))
430 return false;
431
432 if (!Arg.getFieldDesc()->isPrimitiveArray())
433 return Invalid(S, OpPC);
434
435 // Convert the given string to an integer using StringRef's API.
436 llvm::APInt Fill;
437 std::string Str;
438 unsigned ArgLength = Arg.getNumElems();
439 bool FoundZero = false;
440 for (unsigned I = 0; I != ArgLength; ++I) {
441 if (!Arg.isElementInitialized(Index: I))
442 return false;
443
444 if (Arg.elem<int8_t>(I) == 0) {
445 FoundZero = true;
446 break;
447 }
448 Str += Arg.elem<char>(I);
449 }
450
451 // If we didn't find a NUL byte, diagnose as a one-past-the-end read.
452 if (!FoundZero)
453 return CheckRange(S, OpPC, Ptr: Arg.atIndex(Idx: ArgLength), AK: AK_Read);
454
455 // Treat empty strings as if they were zero.
456 if (Str.empty())
457 Fill = llvm::APInt(32, 0);
458 else if (StringRef(Str).getAsInteger(Radix: 0, Result&: Fill))
459 return false;
460
461 const llvm::fltSemantics &TargetSemantics =
462 S.getASTContext().getFloatTypeSemantics(
463 T: Call->getDirectCallee()->getReturnType());
464
465 Floating Result = S.allocFloat(Sem: TargetSemantics);
466 if (S.getASTContext().getTargetInfo().isNan2008()) {
467 if (Signaling)
468 Result.copy(
469 F: llvm::APFloat::getSNaN(Sem: TargetSemantics, /*Negative=*/false, payload: &Fill));
470 else
471 Result.copy(
472 F: llvm::APFloat::getQNaN(Sem: TargetSemantics, /*Negative=*/false, payload: &Fill));
473 } else {
474 // Prior to IEEE 754-2008, architectures were allowed to choose whether
475 // the first bit of their significand was set for qNaN or sNaN. MIPS chose
476 // a different encoding to what became a standard in 2008, and for pre-
477 // 2008 revisions, MIPS interpreted sNaN-2008 as qNan and qNaN-2008 as
478 // sNaN. This is now known as "legacy NaN" encoding.
479 if (Signaling)
480 Result.copy(
481 F: llvm::APFloat::getQNaN(Sem: TargetSemantics, /*Negative=*/false, payload: &Fill));
482 else
483 Result.copy(
484 F: llvm::APFloat::getSNaN(Sem: TargetSemantics, /*Negative=*/false, payload: &Fill));
485 }
486
487 S.Stk.push<Floating>(Args&: Result);
488 return true;
489}
490
491static bool interp__builtin_inf(InterpState &S, CodePtr OpPC,
492 const InterpFrame *Frame,
493 const CallExpr *Call) {
494 const llvm::fltSemantics &TargetSemantics =
495 S.getASTContext().getFloatTypeSemantics(
496 T: Call->getDirectCallee()->getReturnType());
497
498 Floating Result = S.allocFloat(Sem: TargetSemantics);
499 Result.copy(F: APFloat::getInf(Sem: TargetSemantics));
500 S.Stk.push<Floating>(Args&: Result);
501 return true;
502}
503
504static bool interp__builtin_copysign(InterpState &S, CodePtr OpPC,
505 const InterpFrame *Frame) {
506 const Floating &Arg2 = S.Stk.pop<Floating>();
507 const Floating &Arg1 = S.Stk.pop<Floating>();
508 Floating Result = S.allocFloat(Sem: Arg1.getSemantics());
509
510 APFloat Copy = Arg1.getAPFloat();
511 Copy.copySign(RHS: Arg2.getAPFloat());
512 Result.copy(F: Copy);
513 S.Stk.push<Floating>(Args&: Result);
514
515 return true;
516}
517
518static bool interp__builtin_fmin(InterpState &S, CodePtr OpPC,
519 const InterpFrame *Frame, bool IsNumBuiltin) {
520 const Floating &RHS = S.Stk.pop<Floating>();
521 const Floating &LHS = S.Stk.pop<Floating>();
522 Floating Result = S.allocFloat(Sem: LHS.getSemantics());
523
524 if (IsNumBuiltin)
525 Result.copy(F: llvm::minimumnum(A: LHS.getAPFloat(), B: RHS.getAPFloat()));
526 else
527 Result.copy(F: minnum(A: LHS.getAPFloat(), B: RHS.getAPFloat()));
528 S.Stk.push<Floating>(Args&: Result);
529 return true;
530}
531
532static bool interp__builtin_fmax(InterpState &S, CodePtr OpPC,
533 const InterpFrame *Frame, bool IsNumBuiltin) {
534 const Floating &RHS = S.Stk.pop<Floating>();
535 const Floating &LHS = S.Stk.pop<Floating>();
536 Floating Result = S.allocFloat(Sem: LHS.getSemantics());
537
538 if (IsNumBuiltin)
539 Result.copy(F: llvm::maximumnum(A: LHS.getAPFloat(), B: RHS.getAPFloat()));
540 else
541 Result.copy(F: maxnum(A: LHS.getAPFloat(), B: RHS.getAPFloat()));
542 S.Stk.push<Floating>(Args&: Result);
543 return true;
544}
545
546/// Defined as __builtin_isnan(...), to accommodate the fact that it can
547/// take a float, double, long double, etc.
548/// But for us, that's all a Floating anyway.
549static bool interp__builtin_isnan(InterpState &S, CodePtr OpPC,
550 const InterpFrame *Frame,
551 const CallExpr *Call) {
552 const Floating &Arg = S.Stk.pop<Floating>();
553
554 pushInteger(S, Val: Arg.isNan(), QT: Call->getType());
555 return true;
556}
557
558static bool interp__builtin_issignaling(InterpState &S, CodePtr OpPC,
559 const InterpFrame *Frame,
560 const CallExpr *Call) {
561 const Floating &Arg = S.Stk.pop<Floating>();
562
563 pushInteger(S, Val: Arg.isSignaling(), QT: Call->getType());
564 return true;
565}
566
567static bool interp__builtin_isinf(InterpState &S, CodePtr OpPC,
568 const InterpFrame *Frame, bool CheckSign,
569 const CallExpr *Call) {
570 const Floating &Arg = S.Stk.pop<Floating>();
571 APFloat F = Arg.getAPFloat();
572 bool IsInf = F.isInfinity();
573
574 if (CheckSign)
575 pushInteger(S, Val: IsInf ? (F.isNegative() ? -1 : 1) : 0, QT: Call->getType());
576 else
577 pushInteger(S, Val: IsInf, QT: Call->getType());
578 return true;
579}
580
581static bool interp__builtin_isfinite(InterpState &S, CodePtr OpPC,
582 const InterpFrame *Frame,
583 const CallExpr *Call) {
584 const Floating &Arg = S.Stk.pop<Floating>();
585
586 pushInteger(S, Val: Arg.isFinite(), QT: Call->getType());
587 return true;
588}
589
590static bool interp__builtin_isnormal(InterpState &S, CodePtr OpPC,
591 const InterpFrame *Frame,
592 const CallExpr *Call) {
593 const Floating &Arg = S.Stk.pop<Floating>();
594
595 pushInteger(S, Val: Arg.isNormal(), QT: Call->getType());
596 return true;
597}
598
599static bool interp__builtin_issubnormal(InterpState &S, CodePtr OpPC,
600 const InterpFrame *Frame,
601 const CallExpr *Call) {
602 const Floating &Arg = S.Stk.pop<Floating>();
603
604 pushInteger(S, Val: Arg.isDenormal(), QT: Call->getType());
605 return true;
606}
607
608static bool interp__builtin_iszero(InterpState &S, CodePtr OpPC,
609 const InterpFrame *Frame,
610 const CallExpr *Call) {
611 const Floating &Arg = S.Stk.pop<Floating>();
612
613 pushInteger(S, Val: Arg.isZero(), QT: Call->getType());
614 return true;
615}
616
617static bool interp__builtin_signbit(InterpState &S, CodePtr OpPC,
618 const InterpFrame *Frame,
619 const CallExpr *Call) {
620 const Floating &Arg = S.Stk.pop<Floating>();
621
622 pushInteger(S, Val: Arg.isNegative(), QT: Call->getType());
623 return true;
624}
625
626static bool interp_floating_comparison(InterpState &S, CodePtr OpPC,
627 const CallExpr *Call, unsigned ID) {
628 const Floating &RHS = S.Stk.pop<Floating>();
629 const Floating &LHS = S.Stk.pop<Floating>();
630
631 pushInteger(
632 S,
633 Val: [&] {
634 switch (ID) {
635 case Builtin::BI__builtin_isgreater:
636 return LHS > RHS;
637 case Builtin::BI__builtin_isgreaterequal:
638 return LHS >= RHS;
639 case Builtin::BI__builtin_isless:
640 return LHS < RHS;
641 case Builtin::BI__builtin_islessequal:
642 return LHS <= RHS;
643 case Builtin::BI__builtin_islessgreater: {
644 ComparisonCategoryResult Cmp = LHS.compare(RHS);
645 return Cmp == ComparisonCategoryResult::Less ||
646 Cmp == ComparisonCategoryResult::Greater;
647 }
648 case Builtin::BI__builtin_isunordered:
649 return LHS.compare(RHS) == ComparisonCategoryResult::Unordered;
650 default:
651 llvm_unreachable("Unexpected builtin ID: Should be a floating point "
652 "comparison function");
653 }
654 }(),
655 QT: Call->getType());
656 return true;
657}
658
659/// First parameter to __builtin_isfpclass is the floating value, the
660/// second one is an integral value.
661static bool interp__builtin_isfpclass(InterpState &S, CodePtr OpPC,
662 const InterpFrame *Frame,
663 const CallExpr *Call) {
664 APSInt FPClassArg;
665 if (!popToAPSInt(S, E: Call->getArg(Arg: 1), Out&: FPClassArg))
666 return false;
667 const Floating &F = S.Stk.pop<Floating>();
668
669 int32_t Result = static_cast<int32_t>(
670 (F.classify() & std::move(FPClassArg)).getZExtValue());
671 pushInteger(S, Val: Result, QT: Call->getType());
672
673 return true;
674}
675
676/// Five int values followed by one floating value.
677/// __builtin_fpclassify(int, int, int, int, int, float)
678static bool interp__builtin_fpclassify(InterpState &S, CodePtr OpPC,
679 const InterpFrame *Frame,
680 const CallExpr *Call) {
681 const Floating &Val = S.Stk.pop<Floating>();
682
683 PrimType IntT = *S.getContext().classify(E: Call->getArg(Arg: 0));
684 APSInt Values[5];
685 for (unsigned I = 0; I != 5; ++I) {
686 if (!popToAPSInt(Stk&: S.Stk, T: IntT, Out&: Values[4 - I]))
687 return false;
688 }
689
690 unsigned Index;
691 switch (Val.getCategory()) {
692 case APFloat::fcNaN:
693 Index = 0;
694 break;
695 case APFloat::fcInfinity:
696 Index = 1;
697 break;
698 case APFloat::fcNormal:
699 Index = Val.isDenormal() ? 3 : 2;
700 break;
701 case APFloat::fcZero:
702 Index = 4;
703 break;
704 }
705
706 // The last argument is first on the stack.
707 assert(Index <= 4);
708
709 pushInteger(S, Val: Values[Index], QT: Call->getType());
710 return true;
711}
712
713static inline Floating abs(InterpState &S, const Floating &In) {
714 if (!In.isNegative())
715 return In;
716
717 Floating Output = S.allocFloat(Sem: In.getSemantics());
718 APFloat New = In.getAPFloat();
719 New.changeSign();
720 Output.copy(F: New);
721 return Output;
722}
723
724// The C standard says "fabs raises no floating-point exceptions,
725// even if x is a signaling NaN. The returned value is independent of
726// the current rounding direction mode." Therefore constant folding can
727// proceed without regard to the floating point settings.
728// Reference, WG14 N2478 F.10.4.3
729static bool interp__builtin_fabs(InterpState &S, CodePtr OpPC,
730 const InterpFrame *Frame) {
731 const Floating &Val = S.Stk.pop<Floating>();
732 S.Stk.push<Floating>(Args: abs(S, In: Val));
733 return true;
734}
735
736static bool interp__builtin_abs(InterpState &S, CodePtr OpPC,
737 const InterpFrame *Frame,
738 const CallExpr *Call) {
739 APSInt Val;
740 if (!popToAPSInt(S, E: Call->getArg(Arg: 0), Out&: Val))
741 return false;
742 if (Val ==
743 APSInt(APInt::getSignedMinValue(numBits: Val.getBitWidth()), /*IsUnsigned=*/false))
744 return false;
745 if (Val.isNegative())
746 Val.negate();
747 pushInteger(S, Val, QT: Call->getType());
748 return true;
749}
750
751static bool interp__builtin_popcount(InterpState &S, CodePtr OpPC,
752 const InterpFrame *Frame,
753 const CallExpr *Call) {
754 APSInt Val;
755 if (Call->getArg(Arg: 0)->getType()->isExtVectorBoolType()) {
756 const Pointer &Arg = S.Stk.pop<Pointer>();
757 Val = convertBoolVectorToInt(Val: Arg);
758 } else {
759 if (!popToAPSInt(S, E: Call->getArg(Arg: 0), Out&: Val))
760 return false;
761 }
762 pushInteger(S, Val: Val.popcount(), QT: Call->getType());
763 return true;
764}
765
766static bool interp__builtin_ia32_crc32(InterpState &S, CodePtr OpPC,
767 const InterpFrame *Frame,
768 const CallExpr *Call,
769 unsigned DataBytes) {
770 uint64_t DataVal;
771 if (!popToUInt64(S, E: Call->getArg(Arg: 1), Out&: DataVal))
772 return false;
773 uint64_t CRCVal;
774 if (!popToUInt64(S, E: Call->getArg(Arg: 0), Out&: CRCVal))
775 return false;
776
777 // CRC32C polynomial (iSCSI polynomial, bit-reversed)
778 static const uint32_t CRC32C_POLY = 0x82F63B78;
779
780 // Process each byte
781 uint32_t Result = static_cast<uint32_t>(CRCVal);
782 for (unsigned I = 0; I != DataBytes; ++I) {
783 uint8_t Byte = static_cast<uint8_t>((DataVal >> (I * 8)) & 0xFF);
784 Result ^= Byte;
785 for (int J = 0; J != 8; ++J) {
786 Result = (Result >> 1) ^ ((Result & 1) ? CRC32C_POLY : 0);
787 }
788 }
789
790 pushInteger(S, Val: Result, QT: Call->getType());
791 return true;
792}
793
794static bool interp__builtin_classify_type(InterpState &S, CodePtr OpPC,
795 const InterpFrame *Frame,
796 const CallExpr *Call) {
797 // This is an unevaluated call, so there are no arguments on the stack.
798 assert(Call->getNumArgs() == 1);
799 const Expr *Arg = Call->getArg(Arg: 0);
800
801 GCCTypeClass ResultClass =
802 EvaluateBuiltinClassifyType(T: Arg->getType(), LangOpts: S.getLangOpts());
803 int32_t ReturnVal = static_cast<int32_t>(ResultClass);
804 pushInteger(S, Val: ReturnVal, QT: Call->getType());
805 return true;
806}
807
808// __builtin_expect(long, long)
809// __builtin_expect_with_probability(long, long, double)
810static bool interp__builtin_expect(InterpState &S, CodePtr OpPC,
811 const InterpFrame *Frame,
812 const CallExpr *Call) {
813 // The return value is simply the value of the first parameter.
814 // We ignore the probability.
815 unsigned NumArgs = Call->getNumArgs();
816 assert(NumArgs == 2 || NumArgs == 3);
817
818 PrimType ArgT = *S.getContext().classify(T: Call->getArg(Arg: 0)->getType());
819 if (NumArgs == 3)
820 S.Stk.discard<Floating>();
821 discard(Stk&: S.Stk, T: ArgT);
822
823 APSInt Val;
824 if (!popToAPSInt(Stk&: S.Stk, T: ArgT, Out&: Val))
825 return false;
826 pushInteger(S, Val, QT: Call->getType());
827 return true;
828}
829
830static bool interp__builtin_addressof(InterpState &S, CodePtr OpPC,
831 const InterpFrame *Frame,
832 const CallExpr *Call) {
833#ifndef NDEBUG
834 assert(Call->getArg(0)->isLValue());
835 PrimType PtrT = S.getContext().classify(Call->getArg(0)).value_or(PT_Ptr);
836 assert(PtrT == PT_Ptr &&
837 "Unsupported pointer type passed to __builtin_addressof()");
838#endif
839 return true;
840}
841
842static bool interp__builtin_move(InterpState &S, CodePtr OpPC,
843 const InterpFrame *Frame,
844 const CallExpr *Call) {
845 return Call->getDirectCallee()->isConstexpr();
846}
847
848static bool interp__builtin_eh_return_data_regno(InterpState &S, CodePtr OpPC,
849 const InterpFrame *Frame,
850 const CallExpr *Call) {
851 APSInt Arg;
852 if (!popToAPSInt(S, E: Call->getArg(Arg: 0), Out&: Arg))
853 return false;
854
855 int Result = S.getASTContext().getTargetInfo().getEHDataRegisterNumber(
856 RegNo: Arg.getZExtValue());
857 pushInteger(S, Val: Result, QT: Call->getType());
858 return true;
859}
860
861// Two integral values followed by a pointer (lhs, rhs, resultOut)
862static bool interp__builtin_overflowop(InterpState &S, CodePtr OpPC,
863 const CallExpr *Call,
864 unsigned BuiltinOp) {
865 const Pointer &ResultPtr = S.Stk.pop<Pointer>();
866 if (ResultPtr.isDummy() || !ResultPtr.isBlockPointer())
867 return false;
868
869 PrimType RHST = *S.getContext().classify(T: Call->getArg(Arg: 1)->getType());
870 PrimType LHST = *S.getContext().classify(T: Call->getArg(Arg: 0)->getType());
871 APSInt RHS;
872 if (!popToAPSInt(Stk&: S.Stk, T: RHST, Out&: RHS))
873 return false;
874 APSInt LHS;
875 if (!popToAPSInt(Stk&: S.Stk, T: LHST, Out&: LHS))
876 return false;
877 QualType ResultType = Call->getArg(Arg: 2)->getType()->getPointeeType();
878 PrimType ResultT = *S.getContext().classify(T: ResultType);
879 bool Overflow;
880
881 APSInt Result;
882 if (BuiltinOp == Builtin::BI__builtin_add_overflow ||
883 BuiltinOp == Builtin::BI__builtin_sub_overflow ||
884 BuiltinOp == Builtin::BI__builtin_mul_overflow) {
885 bool IsSigned = LHS.isSigned() || RHS.isSigned() ||
886 ResultType->isSignedIntegerOrEnumerationType();
887 bool AllSigned = LHS.isSigned() && RHS.isSigned() &&
888 ResultType->isSignedIntegerOrEnumerationType();
889 uint64_t LHSSize = LHS.getBitWidth();
890 uint64_t RHSSize = RHS.getBitWidth();
891 uint64_t ResultSize = S.getASTContext().getIntWidth(T: ResultType);
892 uint64_t MaxBits = std::max(a: std::max(a: LHSSize, b: RHSSize), b: ResultSize);
893
894 // Add an additional bit if the signedness isn't uniformly agreed to. We
895 // could do this ONLY if there is a signed and an unsigned that both have
896 // MaxBits, but the code to check that is pretty nasty. The issue will be
897 // caught in the shrink-to-result later anyway.
898 if (IsSigned && !AllSigned)
899 ++MaxBits;
900
901 LHS = APSInt(LHS.extOrTrunc(width: MaxBits), !IsSigned);
902 RHS = APSInt(RHS.extOrTrunc(width: MaxBits), !IsSigned);
903 Result = APSInt(MaxBits, !IsSigned);
904 }
905
906 // Find largest int.
907 switch (BuiltinOp) {
908 default:
909 llvm_unreachable("Invalid value for BuiltinOp");
910 case Builtin::BI__builtin_add_overflow:
911 case Builtin::BI__builtin_sadd_overflow:
912 case Builtin::BI__builtin_saddl_overflow:
913 case Builtin::BI__builtin_saddll_overflow:
914 case Builtin::BI__builtin_uadd_overflow:
915 case Builtin::BI__builtin_uaddl_overflow:
916 case Builtin::BI__builtin_uaddll_overflow:
917 Result = LHS.isSigned() ? LHS.sadd_ov(RHS, Overflow)
918 : LHS.uadd_ov(RHS, Overflow);
919 break;
920 case Builtin::BI__builtin_sub_overflow:
921 case Builtin::BI__builtin_ssub_overflow:
922 case Builtin::BI__builtin_ssubl_overflow:
923 case Builtin::BI__builtin_ssubll_overflow:
924 case Builtin::BI__builtin_usub_overflow:
925 case Builtin::BI__builtin_usubl_overflow:
926 case Builtin::BI__builtin_usubll_overflow:
927 Result = LHS.isSigned() ? LHS.ssub_ov(RHS, Overflow)
928 : LHS.usub_ov(RHS, Overflow);
929 break;
930 case Builtin::BI__builtin_mul_overflow:
931 case Builtin::BI__builtin_smul_overflow:
932 case Builtin::BI__builtin_smull_overflow:
933 case Builtin::BI__builtin_smulll_overflow:
934 case Builtin::BI__builtin_umul_overflow:
935 case Builtin::BI__builtin_umull_overflow:
936 case Builtin::BI__builtin_umulll_overflow:
937 Result = LHS.isSigned() ? LHS.smul_ov(RHS, Overflow)
938 : LHS.umul_ov(RHS, Overflow);
939 break;
940 }
941
942 // In the case where multiple sizes are allowed, truncate and see if
943 // the values are the same.
944 if (BuiltinOp == Builtin::BI__builtin_add_overflow ||
945 BuiltinOp == Builtin::BI__builtin_sub_overflow ||
946 BuiltinOp == Builtin::BI__builtin_mul_overflow) {
947 // APSInt doesn't have a TruncOrSelf, so we use extOrTrunc instead,
948 // since it will give us the behavior of a TruncOrSelf in the case where
949 // its parameter <= its size. We previously set Result to be at least the
950 // integer width of the result, so getIntWidth(ResultType) <=
951 // Result.BitWidth
952 APSInt Temp = Result.extOrTrunc(width: S.getASTContext().getIntWidth(T: ResultType));
953 Temp.setIsSigned(ResultType->isSignedIntegerOrEnumerationType());
954
955 if (!APSInt::isSameValue(I1: Temp, I2: Result))
956 Overflow = true;
957 Result = std::move(Temp);
958 }
959
960 // Write Result to ResultPtr and put Overflow on the stack.
961 assignIntegral(S, Dest: ResultPtr, ValueT: ResultT, Value: Result);
962 if (ResultPtr.canBeInitialized())
963 ResultPtr.initialize();
964
965 assert(Call->getDirectCallee()->getReturnType()->isBooleanType());
966 S.Stk.push<Boolean>(Args&: Overflow);
967 return true;
968}
969
970/// Three integral values followed by a pointer (lhs, rhs, carry, carryOut).
971static bool interp__builtin_carryop(InterpState &S, CodePtr OpPC,
972 const InterpFrame *Frame,
973 const CallExpr *Call, unsigned BuiltinOp) {
974 const Pointer &CarryOutPtr = S.Stk.pop<Pointer>();
975 PrimType LHST = *S.getContext().classify(T: Call->getArg(Arg: 0)->getType());
976 PrimType RHST = *S.getContext().classify(T: Call->getArg(Arg: 1)->getType());
977 APSInt CarryIn;
978 if (!popToAPSInt(Stk&: S.Stk, T: LHST, Out&: CarryIn))
979 return false;
980 APSInt RHS;
981 if (!popToAPSInt(Stk&: S.Stk, T: RHST, Out&: RHS))
982 return false;
983 APSInt LHS;
984 if (!popToAPSInt(Stk&: S.Stk, T: LHST, Out&: LHS))
985 return false;
986
987 if (!isReadable(P: CarryOutPtr))
988 return false;
989
990 APSInt CarryOut;
991
992 APSInt Result;
993 // Copy the number of bits and sign.
994 Result = LHS;
995 CarryOut = LHS;
996
997 bool FirstOverflowed = false;
998 bool SecondOverflowed = false;
999 switch (BuiltinOp) {
1000 default:
1001 llvm_unreachable("Invalid value for BuiltinOp");
1002 case Builtin::BI__builtin_addcb:
1003 case Builtin::BI__builtin_addcs:
1004 case Builtin::BI__builtin_addc:
1005 case Builtin::BI__builtin_addcl:
1006 case Builtin::BI__builtin_addcll:
1007 Result =
1008 LHS.uadd_ov(RHS, Overflow&: FirstOverflowed).uadd_ov(RHS: CarryIn, Overflow&: SecondOverflowed);
1009 break;
1010 case Builtin::BI__builtin_subcb:
1011 case Builtin::BI__builtin_subcs:
1012 case Builtin::BI__builtin_subc:
1013 case Builtin::BI__builtin_subcl:
1014 case Builtin::BI__builtin_subcll:
1015 Result =
1016 LHS.usub_ov(RHS, Overflow&: FirstOverflowed).usub_ov(RHS: CarryIn, Overflow&: SecondOverflowed);
1017 break;
1018 }
1019 // It is possible for both overflows to happen but CGBuiltin uses an OR so
1020 // this is consistent.
1021 CarryOut = (uint64_t)(FirstOverflowed | SecondOverflowed);
1022
1023 QualType CarryOutType = Call->getArg(Arg: 3)->getType()->getPointeeType();
1024 PrimType CarryOutT = *S.getContext().classify(T: CarryOutType);
1025 assignIntegral(S, Dest: CarryOutPtr, ValueT: CarryOutT, Value: CarryOut);
1026 if (CarryOutPtr.canBeInitialized())
1027 CarryOutPtr.initialize();
1028
1029 assert(S.getASTContext().hasSimilarType(Call->getType(),
1030 Call->getArg(0)->getType()));
1031 pushInteger(S, Val: Result, QT: Call->getType());
1032 return true;
1033}
1034
1035static bool interp__builtin_clz(InterpState &S, CodePtr OpPC,
1036 const InterpFrame *Frame, const CallExpr *Call,
1037 unsigned BuiltinOp) {
1038
1039 std::optional<APSInt> Fallback;
1040 if (BuiltinOp == Builtin::BI__builtin_clzg && Call->getNumArgs() == 2) {
1041 APSInt FallbackVal;
1042 if (!popToAPSInt(S, E: Call->getArg(Arg: 1), Out&: FallbackVal))
1043 return false;
1044 Fallback = FallbackVal;
1045 }
1046
1047 APSInt Val;
1048 if (Call->getArg(Arg: 0)->getType()->isExtVectorBoolType()) {
1049 const Pointer &Arg = S.Stk.pop<Pointer>();
1050 Val = convertBoolVectorToInt(Val: Arg);
1051 } else {
1052 if (!popToAPSInt(S, E: Call->getArg(Arg: 0), Out&: Val))
1053 return false;
1054 }
1055
1056 // When the argument is 0, the result of GCC builtins is undefined, whereas
1057 // for Microsoft intrinsics, the result is the bit-width of the argument.
1058 bool ZeroIsUndefined = BuiltinOp != Builtin::BI__lzcnt16 &&
1059 BuiltinOp != Builtin::BI__lzcnt &&
1060 BuiltinOp != Builtin::BI__lzcnt64;
1061
1062 if (Val == 0) {
1063 if (Fallback) {
1064 pushInteger(S, Val: *Fallback, QT: Call->getType());
1065 return true;
1066 }
1067
1068 if (ZeroIsUndefined)
1069 return false;
1070 }
1071
1072 pushInteger(S, Val: Val.countl_zero(), QT: Call->getType());
1073 return true;
1074}
1075
1076static bool interp__builtin_ctz(InterpState &S, CodePtr OpPC,
1077 const InterpFrame *Frame, const CallExpr *Call,
1078 unsigned BuiltinID) {
1079 std::optional<APSInt> Fallback;
1080 if (BuiltinID == Builtin::BI__builtin_ctzg && Call->getNumArgs() == 2) {
1081 APSInt FallbackVal;
1082 if (!popToAPSInt(S, E: Call->getArg(Arg: 1), Out&: FallbackVal))
1083 return false;
1084 Fallback = FallbackVal;
1085 }
1086
1087 APSInt Val;
1088 if (Call->getArg(Arg: 0)->getType()->isExtVectorBoolType()) {
1089 const Pointer &Arg = S.Stk.pop<Pointer>();
1090 Val = convertBoolVectorToInt(Val: Arg);
1091 } else {
1092 if (!popToAPSInt(S, E: Call->getArg(Arg: 0), Out&: Val))
1093 return false;
1094 }
1095
1096 if (Val == 0) {
1097 if (Fallback) {
1098 pushInteger(S, Val: *Fallback, QT: Call->getType());
1099 return true;
1100 }
1101 return false;
1102 }
1103
1104 pushInteger(S, Val: Val.countr_zero(), QT: Call->getType());
1105 return true;
1106}
1107
1108static bool interp__builtin_bswap(InterpState &S, CodePtr OpPC,
1109 const InterpFrame *Frame,
1110 const CallExpr *Call) {
1111 APSInt Val;
1112 if (!popToAPSInt(S, E: Call->getArg(Arg: 0), Out&: Val))
1113 return false;
1114 if (Val.getBitWidth() == 8 || Val.getBitWidth() == 1)
1115 pushInteger(S, Val, QT: Call->getType());
1116 else
1117 pushInteger(S, Val: Val.byteSwap(), QT: Call->getType());
1118 return true;
1119}
1120
1121/// bool __atomic_always_lock_free(size_t, void const volatile*)
1122/// bool __atomic_is_lock_free(size_t, void const volatile*)
1123static bool interp__builtin_atomic_lock_free(InterpState &S, CodePtr OpPC,
1124 const InterpFrame *Frame,
1125 const CallExpr *Call,
1126 unsigned BuiltinOp) {
1127 auto returnBool = [&S](bool Value) -> bool {
1128 S.Stk.push<Boolean>(Args&: Value);
1129 return true;
1130 };
1131
1132 const Pointer &Ptr = S.Stk.pop<Pointer>();
1133 uint64_t SizeVal;
1134 if (!popToUInt64(S, E: Call->getArg(Arg: 0), Out&: SizeVal))
1135 return false;
1136
1137 // For __atomic_is_lock_free(sizeof(_Atomic(T))), if the size is a power
1138 // of two less than or equal to the maximum inline atomic width, we know it
1139 // is lock-free. If the size isn't a power of two, or greater than the
1140 // maximum alignment where we promote atomics, we know it is not lock-free
1141 // (at least not in the sense of atomic_is_lock_free). Otherwise,
1142 // the answer can only be determined at runtime; for example, 16-byte
1143 // atomics have lock-free implementations on some, but not all,
1144 // x86-64 processors.
1145
1146 // Check power-of-two.
1147 CharUnits Size = CharUnits::fromQuantity(Quantity: SizeVal);
1148 if (Size.isPowerOfTwo()) {
1149 // Check against inlining width.
1150 unsigned InlineWidthBits =
1151 S.getASTContext().getTargetInfo().getMaxAtomicInlineWidth();
1152 if (Size <= S.getASTContext().toCharUnitsFromBits(BitSize: InlineWidthBits)) {
1153
1154 // OK, we will inline appropriately-aligned operations of this size,
1155 // and _Atomic(T) is appropriately-aligned.
1156 if (Size == CharUnits::One())
1157 return returnBool(true);
1158
1159 // Same for null pointers.
1160 assert(BuiltinOp != Builtin::BI__c11_atomic_is_lock_free);
1161 if (Ptr.isZero())
1162 return returnBool(true);
1163
1164 if (Ptr.isIntegralPointer()) {
1165 uint64_t IntVal = Ptr.getIntegerRepresentation();
1166 if (APSInt(APInt(64, IntVal, false), true).isAligned(A: Size.getAsAlign()))
1167 return returnBool(true);
1168 }
1169
1170 const Expr *PtrArg = Call->getArg(Arg: 1);
1171 // Otherwise, check if the type's alignment against Size.
1172 if (const auto *ICE = dyn_cast<ImplicitCastExpr>(Val: PtrArg)) {
1173 // Drop the potential implicit-cast to 'const volatile void*', getting
1174 // the underlying type.
1175 if (ICE->getCastKind() == CK_BitCast)
1176 PtrArg = ICE->getSubExpr();
1177 }
1178
1179 if (const auto *PtrTy = PtrArg->getType()->getAs<PointerType>()) {
1180 QualType PointeeType = PtrTy->getPointeeType();
1181 if (!PointeeType->isIncompleteType() &&
1182 S.getASTContext().getTypeAlignInChars(T: PointeeType) >= Size) {
1183 // OK, we will inline operations on this object.
1184 return returnBool(true);
1185 }
1186 }
1187 }
1188 }
1189
1190 if (BuiltinOp == Builtin::BI__atomic_always_lock_free)
1191 return returnBool(false);
1192
1193 return Invalid(S, OpPC);
1194}
1195
1196/// bool __c11_atomic_is_lock_free(size_t)
1197static bool interp__builtin_c11_atomic_is_lock_free(InterpState &S,
1198 CodePtr OpPC,
1199 const InterpFrame *Frame,
1200 const CallExpr *Call) {
1201 uint64_t SizeVal;
1202 if (!popToUInt64(S, E: Call->getArg(Arg: 0), Out&: SizeVal))
1203 return false;
1204
1205 CharUnits Size = CharUnits::fromQuantity(Quantity: SizeVal);
1206 if (Size.isPowerOfTwo()) {
1207 // Check against inlining width.
1208 unsigned InlineWidthBits =
1209 S.getASTContext().getTargetInfo().getMaxAtomicInlineWidth();
1210 if (Size <= S.getASTContext().toCharUnitsFromBits(BitSize: InlineWidthBits)) {
1211 S.Stk.push<Boolean>(Args: true);
1212 return true;
1213 }
1214 }
1215
1216 return false; // returnBool(false);
1217}
1218
1219/// __builtin_complex(Float A, float B);
1220static bool interp__builtin_complex(InterpState &S, CodePtr OpPC,
1221 const InterpFrame *Frame,
1222 const CallExpr *Call) {
1223 const Floating &Arg2 = S.Stk.pop<Floating>();
1224 const Floating &Arg1 = S.Stk.pop<Floating>();
1225 Pointer &Result = S.Stk.peek<Pointer>();
1226
1227 Result.elem<Floating>(I: 0) = Arg1;
1228 Result.elem<Floating>(I: 1) = Arg2;
1229 Result.initializeAllElements();
1230
1231 return true;
1232}
1233
1234/// __builtin_is_aligned()
1235/// __builtin_align_up()
1236/// __builtin_align_down()
1237/// The first parameter is either an integer or a pointer.
1238/// The second parameter is the requested alignment as an integer.
1239static bool interp__builtin_is_aligned_up_down(InterpState &S, CodePtr OpPC,
1240 const InterpFrame *Frame,
1241 const CallExpr *Call,
1242 unsigned BuiltinOp) {
1243 APSInt Alignment;
1244 if (!popToAPSInt(S, E: Call->getArg(Arg: 1), Out&: Alignment))
1245 return false;
1246
1247 if (Alignment < 0 || !Alignment.isPowerOf2()) {
1248 S.FFDiag(E: Call, DiagId: diag::note_constexpr_invalid_alignment) << Alignment;
1249 return false;
1250 }
1251 unsigned SrcWidth = S.getASTContext().getIntWidth(T: Call->getArg(Arg: 0)->getType());
1252 APSInt MaxValue(APInt::getOneBitSet(numBits: SrcWidth, BitNo: SrcWidth - 1));
1253 if (APSInt::compareValues(I1: Alignment, I2: MaxValue) > 0) {
1254 S.FFDiag(E: Call, DiagId: diag::note_constexpr_alignment_too_big)
1255 << MaxValue << Call->getArg(Arg: 0)->getType() << Alignment;
1256 return false;
1257 }
1258
1259 // The first parameter is either an integer or a pointer.
1260 PrimType FirstArgT = *S.Ctx.classify(E: Call->getArg(Arg: 0));
1261
1262 if (isIntegerType(T: FirstArgT)) {
1263 APSInt Src;
1264 if (!popToAPSInt(Stk&: S.Stk, T: FirstArgT, Out&: Src))
1265 return false;
1266 APInt AlignMinusOne = Alignment.extOrTrunc(width: Src.getBitWidth()) - 1;
1267 if (BuiltinOp == Builtin::BI__builtin_align_up) {
1268 APSInt AlignedVal =
1269 APSInt((Src + AlignMinusOne) & ~AlignMinusOne, Src.isUnsigned());
1270 pushInteger(S, Val: AlignedVal, QT: Call->getType());
1271 } else if (BuiltinOp == Builtin::BI__builtin_align_down) {
1272 APSInt AlignedVal = APSInt(Src & ~AlignMinusOne, Src.isUnsigned());
1273 pushInteger(S, Val: AlignedVal, QT: Call->getType());
1274 } else {
1275 assert(*S.Ctx.classify(Call->getType()) == PT_Bool);
1276 S.Stk.push<Boolean>(Args: (Src & AlignMinusOne) == 0);
1277 }
1278 return true;
1279 }
1280 assert(FirstArgT == PT_Ptr);
1281 const Pointer &Ptr = S.Stk.pop<Pointer>();
1282 if (!Ptr.isBlockPointer())
1283 return false;
1284
1285 const ValueDecl *PtrDecl = Ptr.getDeclDesc()->asValueDecl();
1286 // We need a pointer for a declaration here.
1287 if (!PtrDecl) {
1288 if (BuiltinOp == Builtin::BI__builtin_is_aligned)
1289 S.FFDiag(E: Call->getArg(Arg: 0), DiagId: diag::note_constexpr_alignment_compute)
1290 << Alignment;
1291 else
1292 S.FFDiag(E: Call->getArg(Arg: 0), DiagId: diag::note_constexpr_alignment_adjust)
1293 << Alignment;
1294 return false;
1295 }
1296
1297 // For one-past-end pointers, we can't call getIndex() since it asserts.
1298 // Use getNumElems() instead which gives the correct index for past-end.
1299 unsigned PtrOffset =
1300 Ptr.isElementPastEnd() ? Ptr.getNumElems() : Ptr.getIndex();
1301 CharUnits BaseAlignment = S.getASTContext().getDeclAlign(D: PtrDecl);
1302 CharUnits PtrAlign =
1303 BaseAlignment.alignmentAtOffset(offset: CharUnits::fromQuantity(Quantity: PtrOffset));
1304
1305 if (BuiltinOp == Builtin::BI__builtin_is_aligned) {
1306 if (PtrAlign.getQuantity() >= Alignment) {
1307 S.Stk.push<Boolean>(Args: true);
1308 return true;
1309 }
1310 // If the alignment is not known to be sufficient, some cases could still
1311 // be aligned at run time. However, if the requested alignment is less or
1312 // equal to the base alignment and the offset is not aligned, we know that
1313 // the run-time value can never be aligned.
1314 if (BaseAlignment.getQuantity() >= Alignment &&
1315 PtrAlign.getQuantity() < Alignment) {
1316 S.Stk.push<Boolean>(Args: false);
1317 return true;
1318 }
1319
1320 S.FFDiag(E: Call->getArg(Arg: 0), DiagId: diag::note_constexpr_alignment_compute)
1321 << Alignment;
1322 return false;
1323 }
1324
1325 assert(BuiltinOp == Builtin::BI__builtin_align_down ||
1326 BuiltinOp == Builtin::BI__builtin_align_up);
1327
1328 // For align_up/align_down, we can return the same value if the alignment
1329 // is known to be greater or equal to the requested value.
1330 if (PtrAlign.getQuantity() >= Alignment) {
1331 S.Stk.push<Pointer>(Args: Ptr);
1332 return true;
1333 }
1334
1335 // The alignment could be greater than the minimum at run-time, so we cannot
1336 // infer much about the resulting pointer value. One case is possible:
1337 // For `_Alignas(32) char buf[N]; __builtin_align_down(&buf[idx], 32)` we
1338 // can infer the correct index if the requested alignment is smaller than
1339 // the base alignment so we can perform the computation on the offset.
1340 if (BaseAlignment.getQuantity() >= Alignment) {
1341 assert(Alignment.getBitWidth() <= 64 &&
1342 "Cannot handle > 64-bit address-space");
1343 uint64_t Alignment64 = Alignment.getZExtValue();
1344 CharUnits NewOffset =
1345 CharUnits::fromQuantity(Quantity: BuiltinOp == Builtin::BI__builtin_align_down
1346 ? llvm::alignDown(Value: PtrOffset, Align: Alignment64)
1347 : llvm::alignTo(Value: PtrOffset, Align: Alignment64));
1348
1349 S.Stk.push<Pointer>(Args: Ptr.atIndex(Idx: NewOffset.getQuantity()));
1350 return true;
1351 }
1352
1353 // Otherwise, we cannot constant-evaluate the result.
1354 S.FFDiag(E: Call->getArg(Arg: 0), DiagId: diag::note_constexpr_alignment_adjust) << Alignment;
1355 return false;
1356}
1357
1358/// __builtin_assume_aligned(Ptr, Alignment[, ExtraOffset])
1359static bool interp__builtin_assume_aligned(InterpState &S, CodePtr OpPC,
1360 const InterpFrame *Frame,
1361 const CallExpr *Call) {
1362 assert(Call->getNumArgs() == 2 || Call->getNumArgs() == 3);
1363
1364 std::optional<APSInt> ExtraOffset;
1365 if (Call->getNumArgs() == 3) {
1366 APSInt ExtraOffsetVal;
1367 if (!popToAPSInt(Stk&: S.Stk, T: *S.Ctx.classify(E: Call->getArg(Arg: 2)), Out&: ExtraOffsetVal))
1368 return false;
1369 ExtraOffset = ExtraOffsetVal;
1370 }
1371
1372 APSInt Alignment;
1373 if (!popToAPSInt(Stk&: S.Stk, T: *S.Ctx.classify(E: Call->getArg(Arg: 1)), Out&: Alignment))
1374 return false;
1375 const Pointer &Ptr = S.Stk.pop<Pointer>();
1376
1377 CharUnits Align = CharUnits::fromQuantity(Quantity: Alignment.getZExtValue());
1378
1379 // If there is a base object, then it must have the correct alignment.
1380 if (Ptr.isBlockPointer()) {
1381 CharUnits BaseAlignment;
1382 if (const auto *VD = Ptr.getDeclDesc()->asValueDecl())
1383 BaseAlignment = S.getASTContext().getDeclAlign(D: VD);
1384 else if (const auto *E = Ptr.getDeclDesc()->asExpr())
1385 BaseAlignment = GetAlignOfExpr(Ctx: S.getASTContext(), E, ExprKind: UETT_AlignOf);
1386
1387 if (BaseAlignment < Align) {
1388 S.CCEDiag(E: Call->getArg(Arg: 0),
1389 DiagId: diag::note_constexpr_baa_insufficient_alignment)
1390 << 0 << BaseAlignment.getQuantity() << Align.getQuantity();
1391 return false;
1392 }
1393 }
1394
1395 APValue AV = Ptr.toAPValue(ASTCtx: S.getASTContext());
1396 CharUnits AVOffset = AV.getLValueOffset();
1397 if (ExtraOffset)
1398 AVOffset -= CharUnits::fromQuantity(Quantity: ExtraOffset->getZExtValue());
1399 if (AVOffset.alignTo(Align) != AVOffset) {
1400 if (Ptr.isBlockPointer())
1401 S.CCEDiag(E: Call->getArg(Arg: 0),
1402 DiagId: diag::note_constexpr_baa_insufficient_alignment)
1403 << 1 << AVOffset.getQuantity() << Align.getQuantity();
1404 else
1405 S.CCEDiag(E: Call->getArg(Arg: 0),
1406 DiagId: diag::note_constexpr_baa_value_insufficient_alignment)
1407 << AVOffset.getQuantity() << Align.getQuantity();
1408 return false;
1409 }
1410
1411 S.Stk.push<Pointer>(Args: Ptr);
1412 return true;
1413}
1414
1415/// (CarryIn, LHS, RHS, Result)
1416static bool interp__builtin_ia32_addcarry_subborrow(InterpState &S,
1417 CodePtr OpPC,
1418 const InterpFrame *Frame,
1419 const CallExpr *Call,
1420 bool IsAdd) {
1421 if (Call->getNumArgs() != 4 || !Call->getArg(Arg: 0)->getType()->isIntegerType() ||
1422 !Call->getArg(Arg: 1)->getType()->isIntegerType() ||
1423 !Call->getArg(Arg: 2)->getType()->isIntegerType())
1424 return false;
1425
1426 const Pointer &CarryOutPtr = S.Stk.pop<Pointer>();
1427
1428 APSInt RHS;
1429 if (!popToAPSInt(S, E: Call->getArg(Arg: 2), Out&: RHS))
1430 return false;
1431 APSInt LHS;
1432 if (!popToAPSInt(S, E: Call->getArg(Arg: 1), Out&: LHS))
1433 return false;
1434 APSInt CarryIn;
1435 if (!popToAPSInt(S, E: Call->getArg(Arg: 0), Out&: CarryIn))
1436 return false;
1437
1438 unsigned BitWidth = LHS.getBitWidth();
1439 unsigned CarryInBit = CarryIn.ugt(RHS: 0) ? 1 : 0;
1440 APInt ExResult =
1441 IsAdd ? (LHS.zext(width: BitWidth + 1) + (RHS.zext(width: BitWidth + 1) + CarryInBit))
1442 : (LHS.zext(width: BitWidth + 1) - (RHS.zext(width: BitWidth + 1) + CarryInBit));
1443
1444 APInt Result = ExResult.extractBits(numBits: BitWidth, bitPosition: 0);
1445 APSInt CarryOut =
1446 APSInt(ExResult.extractBits(numBits: 1, bitPosition: BitWidth), /*IsUnsigned=*/true);
1447
1448 QualType CarryOutType = Call->getArg(Arg: 3)->getType()->getPointeeType();
1449 PrimType CarryOutT = *S.getContext().classify(T: CarryOutType);
1450 assignIntegral(S, Dest: CarryOutPtr, ValueT: CarryOutT, Value: APSInt(std::move(Result), true));
1451
1452 pushInteger(S, Val: CarryOut, QT: Call->getType());
1453
1454 return true;
1455}
1456
1457static bool interp__builtin_os_log_format_buffer_size(InterpState &S,
1458 CodePtr OpPC,
1459 const InterpFrame *Frame,
1460 const CallExpr *Call) {
1461 analyze_os_log::OSLogBufferLayout Layout;
1462 analyze_os_log::computeOSLogBufferLayout(Ctx&: S.getASTContext(), E: Call, layout&: Layout);
1463 pushInteger(S, Val: Layout.size().getQuantity(), QT: Call->getType());
1464 return true;
1465}
1466
1467static bool
1468interp__builtin_ptrauth_string_discriminator(InterpState &S, CodePtr OpPC,
1469 const InterpFrame *Frame,
1470 const CallExpr *Call) {
1471 const auto &Ptr = S.Stk.pop<Pointer>();
1472 assert(Ptr.getFieldDesc()->isPrimitiveArray());
1473
1474 // This should be created for a StringLiteral, so always holds at least
1475 // one array element.
1476 assert(Ptr.getFieldDesc()->getNumElems() >= 1);
1477 uint64_t Result = getPointerAuthStableSipHash(
1478 S: cast<StringLiteral>(Val: Ptr.getFieldDesc()->asExpr())->getString());
1479 pushInteger(S, Val: Result, QT: Call->getType());
1480 return true;
1481}
1482
1483static bool interp__builtin_infer_alloc_token(InterpState &S, CodePtr OpPC,
1484 const InterpFrame *Frame,
1485 const CallExpr *Call) {
1486 const ASTContext &ASTCtx = S.getASTContext();
1487 uint64_t BitWidth = ASTCtx.getTypeSize(T: ASTCtx.getSizeType());
1488 auto Mode =
1489 ASTCtx.getLangOpts().AllocTokenMode.value_or(u: llvm::DefaultAllocTokenMode);
1490 auto MaxTokensOpt = ASTCtx.getLangOpts().AllocTokenMax;
1491 uint64_t MaxTokens =
1492 MaxTokensOpt.value_or(u: 0) ? *MaxTokensOpt : (~0ULL >> (64 - BitWidth));
1493
1494 // We do not read any of the arguments; discard them.
1495 for (int I = Call->getNumArgs() - 1; I >= 0; --I)
1496 discard(Stk&: S.Stk, T: S.getContext().classify(E: Call->getArg(Arg: I)).value_or(PT: PT_Ptr));
1497
1498 // Note: Type inference from a surrounding cast is not supported in
1499 // constexpr evaluation.
1500 QualType AllocType = infer_alloc::inferPossibleType(E: Call, Ctx: ASTCtx, CastE: nullptr);
1501 if (AllocType.isNull()) {
1502 S.CCEDiag(E: Call,
1503 DiagId: diag::note_constexpr_infer_alloc_token_type_inference_failed);
1504 return false;
1505 }
1506
1507 auto ATMD = infer_alloc::getAllocTokenMetadata(T: AllocType, Ctx: ASTCtx);
1508 if (!ATMD) {
1509 S.CCEDiag(E: Call, DiagId: diag::note_constexpr_infer_alloc_token_no_metadata);
1510 return false;
1511 }
1512
1513 auto MaybeToken = llvm::getAllocToken(Mode, Metadata: *ATMD, MaxTokens);
1514 if (!MaybeToken) {
1515 S.CCEDiag(E: Call, DiagId: diag::note_constexpr_infer_alloc_token_stateful_mode);
1516 return false;
1517 }
1518
1519 pushInteger(S, Val: llvm::APInt(BitWidth, *MaybeToken), QT: ASTCtx.getSizeType());
1520 return true;
1521}
1522
1523static bool interp__builtin_operator_new(InterpState &S, CodePtr OpPC,
1524 const InterpFrame *Frame,
1525 const CallExpr *Call) {
1526 // A call to __operator_new is only valid within std::allocate<>::allocate.
1527 // Walk up the call stack to find the appropriate caller and get the
1528 // element type from it.
1529 auto [NewCall, ElemType] = S.getStdAllocatorCaller(Name: "allocate");
1530
1531 if (ElemType.isNull()) {
1532 S.FFDiag(E: Call, DiagId: S.getLangOpts().CPlusPlus20
1533 ? diag::note_constexpr_new_untyped
1534 : diag::note_constexpr_new);
1535 return false;
1536 }
1537 assert(NewCall);
1538
1539 if (ElemType->isIncompleteType() || ElemType->isFunctionType()) {
1540 S.FFDiag(E: Call, DiagId: diag::note_constexpr_new_not_complete_object_type)
1541 << (ElemType->isIncompleteType() ? 0 : 1) << ElemType;
1542 return false;
1543 }
1544
1545 // We only care about the first parameter (the size), so discard all the
1546 // others.
1547 {
1548 unsigned NumArgs = Call->getNumArgs();
1549 assert(NumArgs >= 1);
1550
1551 // The std::nothrow_t arg never gets put on the stack.
1552 if (Call->getArg(Arg: NumArgs - 1)->getType()->isNothrowT())
1553 --NumArgs;
1554 auto Args = ArrayRef(Call->getArgs(), Call->getNumArgs());
1555 // First arg is needed.
1556 Args = Args.drop_front();
1557
1558 // Discard the rest.
1559 for (const Expr *Arg : Args)
1560 discard(Stk&: S.Stk, T: *S.getContext().classify(E: Arg));
1561 }
1562
1563 APSInt Bytes;
1564 if (!popToAPSInt(S, E: Call->getArg(Arg: 0), Out&: Bytes))
1565 return false;
1566 CharUnits ElemSize = S.getASTContext().getTypeSizeInChars(T: ElemType);
1567 assert(!ElemSize.isZero());
1568 // Divide the number of bytes by sizeof(ElemType), so we get the number of
1569 // elements we should allocate.
1570 APInt NumElems, Remainder;
1571 APInt ElemSizeAP(Bytes.getBitWidth(), ElemSize.getQuantity());
1572 APInt::udivrem(LHS: Bytes, RHS: ElemSizeAP, Quotient&: NumElems, Remainder);
1573 if (Remainder != 0) {
1574 // This likely indicates a bug in the implementation of 'std::allocator'.
1575 S.FFDiag(E: Call, DiagId: diag::note_constexpr_operator_new_bad_size)
1576 << Bytes << APSInt(ElemSizeAP, true) << ElemType;
1577 return false;
1578 }
1579
1580 // NB: The same check we're using in CheckArraySize()
1581 if (NumElems.getActiveBits() >
1582 ConstantArrayType::getMaxSizeBits(Context: S.getASTContext()) ||
1583 NumElems.ugt(RHS: Descriptor::MaxArrayElemBytes / ElemSize.getQuantity())) {
1584 // FIXME: NoThrow check?
1585 const SourceInfo &Loc = S.Current->getSource(PC: OpPC);
1586 S.FFDiag(SI: Loc, DiagId: diag::note_constexpr_new_too_large)
1587 << NumElems.getZExtValue();
1588 return false;
1589 }
1590
1591 if (!CheckArraySize(S, OpPC, NumElems: NumElems.getZExtValue()))
1592 return false;
1593
1594 bool IsArray = NumElems.ugt(RHS: 1);
1595 OptPrimType ElemT = S.getContext().classify(T: ElemType);
1596 DynamicAllocator &Allocator = S.getAllocator();
1597 if (ElemT) {
1598 Block *B =
1599 Allocator.allocate(Source: NewCall, T: *ElemT, NumElements: NumElems.getZExtValue(),
1600 EvalID: S.Ctx.getEvalID(), AllocForm: DynamicAllocator::Form::Operator);
1601 assert(B);
1602 S.Stk.push<Pointer>(Args: Pointer(B).atIndex(Idx: 0));
1603 return true;
1604 }
1605
1606 assert(!ElemT);
1607
1608 // Composite arrays
1609 if (IsArray) {
1610 const Descriptor *Desc =
1611 S.P.createDescriptor(D: NewCall, Ty: ElemType.getTypePtr(), MDSize: std::nullopt);
1612 Block *B =
1613 Allocator.allocate(D: Desc, NumElements: NumElems.getZExtValue(), EvalID: S.Ctx.getEvalID(),
1614 AllocForm: DynamicAllocator::Form::Operator);
1615 assert(B);
1616 S.Stk.push<Pointer>(Args: Pointer(B).atIndex(Idx: 0).narrow());
1617 return true;
1618 }
1619
1620 // Records. Still allocate them as single-element arrays.
1621 QualType AllocType = S.getASTContext().getConstantArrayType(
1622 EltTy: ElemType, ArySize: NumElems, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, IndexTypeQuals: 0);
1623
1624 const Descriptor *Desc = S.P.createDescriptor(D: NewCall, Ty: AllocType.getTypePtr(),
1625 MDSize: Descriptor::InlineDescMD);
1626 Block *B = Allocator.allocate(D: Desc, EvalID: S.getContext().getEvalID(),
1627 AllocForm: DynamicAllocator::Form::Operator);
1628 assert(B);
1629 S.Stk.push<Pointer>(Args: Pointer(B).atIndex(Idx: 0).narrow());
1630 return true;
1631}
1632
1633static bool interp__builtin_operator_delete(InterpState &S, CodePtr OpPC,
1634 const InterpFrame *Frame,
1635 const CallExpr *Call) {
1636 const Expr *Source = nullptr;
1637 const Block *BlockToDelete = nullptr;
1638
1639 unsigned NumArgs = Call->getNumArgs();
1640 assert(NumArgs >= 1);
1641
1642 // Args are pushed in source order. The trailing sized/aligned delete
1643 // operands are above the pointer on the stack.
1644 for (unsigned I = NumArgs - 1; I != 0; --I)
1645 discard(Stk&: S.Stk, T: *S.getContext().classify(E: Call->getArg(Arg: I)));
1646
1647 if (S.checkingPotentialConstantExpression()) {
1648 S.Stk.discard<Pointer>();
1649 return false;
1650 }
1651
1652 // This is permitted only within a call to std::allocator<T>::deallocate.
1653 if (!S.getStdAllocatorCaller(Name: "deallocate")) {
1654 S.FFDiag(E: Call);
1655 S.Stk.discard<Pointer>();
1656 return true;
1657 }
1658
1659 {
1660 const Pointer &Ptr = S.Stk.pop<Pointer>();
1661
1662 if (Ptr.isZero()) {
1663 S.CCEDiag(E: Call, DiagId: diag::note_constexpr_deallocate_null);
1664 return true;
1665 }
1666
1667 Source = Ptr.getDeclDesc()->asExpr();
1668 BlockToDelete = Ptr.block();
1669
1670 if (!BlockToDelete->isDynamic()) {
1671 S.FFDiag(E: Call, DiagId: diag::note_constexpr_delete_not_heap_alloc)
1672 << Ptr.toDiagnosticString(Ctx: S.getASTContext());
1673 if (const auto *D = Ptr.getFieldDesc()->asDecl())
1674 S.Note(Loc: D->getLocation(), DiagId: diag::note_declared_at);
1675 }
1676 }
1677 assert(BlockToDelete);
1678
1679 DynamicAllocator &Allocator = S.getAllocator();
1680 const Descriptor *BlockDesc = BlockToDelete->getDescriptor();
1681 std::optional<DynamicAllocator::Form> AllocForm =
1682 Allocator.getAllocationForm(Source);
1683
1684 if (!Allocator.deallocate(Source, BlockToDelete)) {
1685 // Nothing has been deallocated, this must be a double-delete.
1686 const SourceInfo &Loc = S.Current->getSource(PC: OpPC);
1687 S.FFDiag(SI: Loc, DiagId: diag::note_constexpr_double_delete);
1688 return false;
1689 }
1690 assert(AllocForm);
1691
1692 return CheckNewDeleteForms(
1693 S, OpPC, AllocForm: *AllocForm, DeleteForm: DynamicAllocator::Form::Operator, D: BlockDesc, NewExpr: Source);
1694}
1695
1696static bool interp__builtin_arithmetic_fence(InterpState &S, CodePtr OpPC,
1697 const InterpFrame *Frame,
1698 const CallExpr *Call) {
1699 const Floating &Arg0 = S.Stk.pop<Floating>();
1700 S.Stk.push<Floating>(Args: Arg0);
1701 return true;
1702}
1703
1704static bool interp__builtin_vector_reduce(InterpState &S, CodePtr OpPC,
1705 const CallExpr *Call, unsigned ID) {
1706 const Pointer &Arg = S.Stk.pop<Pointer>();
1707 assert(Arg.getFieldDesc()->isPrimitiveArray());
1708
1709 QualType ElemType = Arg.getFieldDesc()->getElemQualType();
1710 assert(Call->getType() == ElemType);
1711 PrimType ElemT = *S.getContext().classify(T: ElemType);
1712 unsigned NumElems = Arg.getNumElems();
1713
1714 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
1715 T Result = Arg.elem<T>(0);
1716 unsigned BitWidth = Result.bitWidth();
1717 for (unsigned I = 1; I != NumElems; ++I) {
1718 T Elem = Arg.elem<T>(I);
1719 T PrevResult = Result;
1720
1721 if (ID == Builtin::BI__builtin_reduce_add) {
1722 if (T::add(Result, Elem, BitWidth, &Result)) {
1723 unsigned OverflowBits = BitWidth + 1;
1724 (void)handleOverflow(S, OpPC,
1725 (PrevResult.toAPSInt(OverflowBits) +
1726 Elem.toAPSInt(OverflowBits)));
1727 return false;
1728 }
1729 } else if (ID == Builtin::BI__builtin_reduce_mul) {
1730 if (T::mul(Result, Elem, BitWidth, &Result)) {
1731 unsigned OverflowBits = BitWidth * 2;
1732 (void)handleOverflow(S, OpPC,
1733 (PrevResult.toAPSInt(OverflowBits) *
1734 Elem.toAPSInt(OverflowBits)));
1735 return false;
1736 }
1737
1738 } else if (ID == Builtin::BI__builtin_reduce_and) {
1739 (void)T::bitAnd(Result, Elem, BitWidth, &Result);
1740 } else if (ID == Builtin::BI__builtin_reduce_or) {
1741 (void)T::bitOr(Result, Elem, BitWidth, &Result);
1742 } else if (ID == Builtin::BI__builtin_reduce_xor) {
1743 (void)T::bitXor(Result, Elem, BitWidth, &Result);
1744 } else if (ID == Builtin::BI__builtin_reduce_min) {
1745 if (Elem < Result)
1746 Result = Elem;
1747 } else if (ID == Builtin::BI__builtin_reduce_max) {
1748 if (Elem > Result)
1749 Result = Elem;
1750 } else {
1751 llvm_unreachable("Unhandled vector reduce builtin");
1752 }
1753 }
1754 pushInteger(S, Result.toAPSInt(), Call->getType());
1755 });
1756
1757 return true;
1758}
1759
1760static bool interp__builtin_elementwise_abs(InterpState &S, CodePtr OpPC,
1761 const InterpFrame *Frame,
1762 const CallExpr *Call,
1763 unsigned BuiltinID) {
1764 assert(Call->getNumArgs() == 1);
1765 QualType Ty = Call->getArg(Arg: 0)->getType();
1766 if (Ty->isIntegerType()) {
1767 APSInt Val;
1768 if (!popToAPSInt(S, E: Call->getArg(Arg: 0), Out&: Val))
1769 return false;
1770 pushInteger(S, Val: Val.abs(), QT: Call->getType());
1771 return true;
1772 }
1773
1774 if (Ty->isFloatingType()) {
1775 Floating Val = S.Stk.pop<Floating>();
1776 Floating Result = abs(S, In: Val);
1777 S.Stk.push<Floating>(Args&: Result);
1778 return true;
1779 }
1780
1781 // Otherwise, the argument must be a vector.
1782 assert(Call->getArg(0)->getType()->isVectorType());
1783 const Pointer &Arg = S.Stk.pop<Pointer>();
1784 assert(Arg.getFieldDesc()->isPrimitiveArray());
1785 const Pointer &Dst = S.Stk.peek<Pointer>();
1786 assert(Dst.getFieldDesc()->isPrimitiveArray());
1787 assert(Arg.getFieldDesc()->getNumElems() ==
1788 Dst.getFieldDesc()->getNumElems());
1789
1790 QualType ElemType = Arg.getFieldDesc()->getElemQualType();
1791 PrimType ElemT = *S.getContext().classify(T: ElemType);
1792 unsigned NumElems = Arg.getNumElems();
1793 // we can either have a vector of integer or a vector of floating point
1794 for (unsigned I = 0; I != NumElems; ++I) {
1795 if (ElemType->isIntegerType()) {
1796 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
1797 Dst.elem<T>(I) = T::from(static_cast<T>(
1798 APSInt(Arg.elem<T>(I).toAPSInt().abs(),
1799 ElemType->isUnsignedIntegerOrEnumerationType())));
1800 });
1801 } else {
1802 Floating Val = Arg.elem<Floating>(I);
1803 Dst.elem<Floating>(I) = abs(S, In: Val);
1804 }
1805 }
1806 Dst.initializeAllElements();
1807
1808 return true;
1809}
1810
1811/// Can be called with an integer or vector as the first and only parameter.
1812static bool interp__builtin_elementwise_countzeroes(InterpState &S,
1813 CodePtr OpPC,
1814 const InterpFrame *Frame,
1815 const CallExpr *Call,
1816 unsigned BuiltinID) {
1817 bool HasZeroArg = Call->getNumArgs() == 2;
1818 bool IsCTTZ = BuiltinID == Builtin::BI__builtin_elementwise_ctzg;
1819 assert(Call->getNumArgs() == 1 || HasZeroArg);
1820 if (Call->getArg(Arg: 0)->getType()->isIntegerType()) {
1821 PrimType ArgT = *S.getContext().classify(T: Call->getArg(Arg: 0)->getType());
1822 APSInt Val;
1823 if (!popToAPSInt(Stk&: S.Stk, T: ArgT, Out&: Val))
1824 return false;
1825 std::optional<APSInt> ZeroVal;
1826 if (HasZeroArg) {
1827 ZeroVal = Val;
1828 if (!popToAPSInt(Stk&: S.Stk, T: ArgT, Out&: Val))
1829 return false;
1830 }
1831
1832 if (Val.isZero()) {
1833 if (ZeroVal) {
1834 pushInteger(S, Val: *ZeroVal, QT: Call->getType());
1835 return true;
1836 }
1837 // If we haven't been provided the second argument, the result is
1838 // undefined
1839 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1840 DiagId: diag::note_constexpr_countzeroes_zero)
1841 << /*IsTrailing=*/IsCTTZ;
1842 return false;
1843 }
1844
1845 if (BuiltinID == Builtin::BI__builtin_elementwise_clzg) {
1846 pushInteger(S, Val: Val.countLeadingZeros(), QT: Call->getType());
1847 } else {
1848 pushInteger(S, Val: Val.countTrailingZeros(), QT: Call->getType());
1849 }
1850 return true;
1851 }
1852 // Otherwise, the argument must be a vector.
1853 const ASTContext &ASTCtx = S.getASTContext();
1854 Pointer ZeroArg;
1855 if (HasZeroArg) {
1856 assert(Call->getArg(1)->getType()->isVectorType() &&
1857 ASTCtx.hasSameUnqualifiedType(Call->getArg(0)->getType(),
1858 Call->getArg(1)->getType()));
1859 (void)ASTCtx;
1860 ZeroArg = S.Stk.pop<Pointer>();
1861 assert(ZeroArg.getFieldDesc()->isPrimitiveArray());
1862 }
1863 assert(Call->getArg(0)->getType()->isVectorType());
1864 const Pointer &Arg = S.Stk.pop<Pointer>();
1865 assert(Arg.getFieldDesc()->isPrimitiveArray());
1866 const Pointer &Dst = S.Stk.peek<Pointer>();
1867 assert(Dst.getFieldDesc()->isPrimitiveArray());
1868 assert(Arg.getFieldDesc()->getNumElems() ==
1869 Dst.getFieldDesc()->getNumElems());
1870
1871 QualType ElemType = Arg.getFieldDesc()->getElemQualType();
1872 PrimType ElemT = *S.getContext().classify(T: ElemType);
1873 unsigned NumElems = Arg.getNumElems();
1874
1875 // FIXME: Reading from uninitialized vector elements?
1876 for (unsigned I = 0; I != NumElems; ++I) {
1877 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
1878 APInt EltVal = Arg.atIndex(I).deref<T>().toAPSInt();
1879 if (EltVal.isZero()) {
1880 if (HasZeroArg) {
1881 Dst.atIndex(I).deref<T>() = ZeroArg.atIndex(I).deref<T>();
1882 } else {
1883 // If we haven't been provided the second argument, the result is
1884 // undefined
1885 S.FFDiag(S.Current->getSource(OpPC),
1886 diag::note_constexpr_countzeroes_zero)
1887 << /*IsTrailing=*/IsCTTZ;
1888 return false;
1889 }
1890 } else if (IsCTTZ) {
1891 Dst.atIndex(I).deref<T>() = T::from(EltVal.countTrailingZeros());
1892 } else {
1893 Dst.atIndex(I).deref<T>() = T::from(EltVal.countLeadingZeros());
1894 }
1895 Dst.atIndex(I).initialize();
1896 });
1897 }
1898
1899 return true;
1900}
1901
1902static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC,
1903 const InterpFrame *Frame,
1904 const CallExpr *Call, unsigned ID) {
1905 assert(Call->getNumArgs() == 3);
1906 const ASTContext &ASTCtx = S.getASTContext();
1907 uint64_t Size;
1908 if (!popToUInt64(S, E: Call->getArg(Arg: 2), Out&: Size))
1909 return false;
1910 Pointer SrcPtr = S.Stk.pop<Pointer>().expand();
1911 Pointer DestPtr = S.Stk.pop<Pointer>().expand();
1912
1913 if (ID == Builtin::BImemcpy || ID == Builtin::BImemmove)
1914 diagnoseNonConstexprBuiltin(S, OpPC, ID);
1915
1916 bool Move =
1917 (ID == Builtin::BI__builtin_memmove || ID == Builtin::BImemmove ||
1918 ID == Builtin::BI__builtin_wmemmove || ID == Builtin::BIwmemmove);
1919 bool WChar = ID == Builtin::BIwmemcpy || ID == Builtin::BIwmemmove ||
1920 ID == Builtin::BI__builtin_wmemcpy ||
1921 ID == Builtin::BI__builtin_wmemmove;
1922
1923 // If the size is zero, we treat this as always being a valid no-op.
1924 if (Size == 0) {
1925 S.Stk.push<Pointer>(Args&: DestPtr);
1926 return true;
1927 }
1928
1929 if (SrcPtr.isZero() || DestPtr.isZero()) {
1930 Pointer DiagPtr = (SrcPtr.isZero() ? SrcPtr : DestPtr);
1931 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_null)
1932 << /*IsMove=*/Move << /*IsWchar=*/WChar << !SrcPtr.isZero()
1933 << DiagPtr.toDiagnosticString(Ctx: ASTCtx);
1934 return false;
1935 }
1936
1937 // Diagnose integral src/dest pointers specially.
1938 if (SrcPtr.isIntegralPointer() || DestPtr.isIntegralPointer()) {
1939 std::string DiagVal = "(void *)";
1940 DiagVal += SrcPtr.isIntegralPointer()
1941 ? std::to_string(val: SrcPtr.getIntegerRepresentation())
1942 : std::to_string(val: DestPtr.getIntegerRepresentation());
1943 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_null)
1944 << Move << WChar << DestPtr.isIntegralPointer() << DiagVal;
1945 return false;
1946 }
1947
1948 if (!isReadable(P: DestPtr) || !isReadable(P: SrcPtr))
1949 return false;
1950
1951 if (DestPtr.getType()->isIncompleteType()) {
1952 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1953 DiagId: diag::note_constexpr_memcpy_incomplete_type)
1954 << Move << DestPtr.getType();
1955 return false;
1956 }
1957 if (SrcPtr.getType()->isIncompleteType()) {
1958 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1959 DiagId: diag::note_constexpr_memcpy_incomplete_type)
1960 << Move << SrcPtr.getType();
1961 return false;
1962 }
1963
1964 QualType DestElemType = getElemType(P: DestPtr);
1965 if (DestElemType->isIncompleteType()) {
1966 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1967 DiagId: diag::note_constexpr_memcpy_incomplete_type)
1968 << Move << DestElemType;
1969 return false;
1970 }
1971
1972 size_t RemainingDestElems;
1973 if (DestPtr.getFieldDesc()->isArray()) {
1974 RemainingDestElems = DestPtr.isUnknownSizeArray()
1975 ? 0
1976 : (DestPtr.getNumElems() - DestPtr.getIndex());
1977 } else {
1978 RemainingDestElems = 1;
1979 }
1980 unsigned DestElemSize = ASTCtx.getTypeSizeInChars(T: DestElemType).getQuantity();
1981
1982 if (WChar) {
1983 uint64_t WCharSize =
1984 ASTCtx.getTypeSizeInChars(T: ASTCtx.getWCharType()).getQuantity();
1985 Size *= WCharSize;
1986 }
1987
1988 if (Size % DestElemSize != 0) {
1989 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1990 DiagId: diag::note_constexpr_memcpy_unsupported)
1991 << Move << WChar << 0 << DestElemType << Size << DestElemSize;
1992 return false;
1993 }
1994
1995 QualType SrcElemType = getElemType(P: SrcPtr);
1996 size_t RemainingSrcElems;
1997 if (SrcPtr.getFieldDesc()->isArray()) {
1998 RemainingSrcElems = SrcPtr.isUnknownSizeArray()
1999 ? 0
2000 : (SrcPtr.getNumElems() - SrcPtr.getIndex());
2001 } else {
2002 RemainingSrcElems = 1;
2003 }
2004 unsigned SrcElemSize = ASTCtx.getTypeSizeInChars(T: SrcElemType).getQuantity();
2005
2006 if (!ASTCtx.hasSameUnqualifiedType(T1: DestElemType, T2: SrcElemType)) {
2007 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_type_pun)
2008 << Move << SrcElemType << DestElemType;
2009 return false;
2010 }
2011
2012 if (!DestElemType.isTriviallyCopyableType(Context: ASTCtx)) {
2013 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_nontrivial)
2014 << Move << DestElemType;
2015 return false;
2016 }
2017
2018 // Check if we have enough elements to read from and write to.
2019 size_t RemainingDestBytes = RemainingDestElems * DestElemSize;
2020 size_t RemainingSrcBytes = RemainingSrcElems * SrcElemSize;
2021 if (Size > RemainingDestBytes || Size > RemainingSrcBytes) {
2022 APInt N = APInt(64, Size / DestElemSize);
2023 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
2024 DiagId: diag::note_constexpr_memcpy_unsupported)
2025 << Move << WChar << (Size > RemainingSrcBytes ? 1 : 2) << DestElemType
2026 << toString(I: N, Radix: 10, /*Signed=*/false);
2027 return false;
2028 }
2029
2030 // Check for overlapping memory regions.
2031 if (!Move && Pointer::pointToSameBlock(A: SrcPtr, B: DestPtr)) {
2032 // Remove base casts.
2033 Pointer SrcP = SrcPtr.stripBaseCasts();
2034 Pointer DestP = DestPtr.stripBaseCasts();
2035
2036 unsigned SrcIndex = SrcP.expand().getIndex() * SrcElemSize;
2037 unsigned DstIndex = DestP.expand().getIndex() * DestElemSize;
2038
2039 if ((SrcIndex <= DstIndex && (SrcIndex + Size) > DstIndex) ||
2040 (DstIndex <= SrcIndex && (DstIndex + Size) > SrcIndex)) {
2041 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_overlap)
2042 << /*IsWChar=*/false;
2043 return false;
2044 }
2045 }
2046
2047 assert(Size % DestElemSize == 0);
2048 if (!DoMemcpy(S, OpPC, SrcPtr, DestPtr, Size: Bytes(Size).toBits()))
2049 return false;
2050
2051 S.Stk.push<Pointer>(Args&: DestPtr);
2052 return true;
2053}
2054
2055/// Determine if T is a character type for which we guarantee that
2056/// sizeof(T) == 1.
2057static bool isOneByteCharacterType(QualType T) {
2058 return T->isCharType() || T->isChar8Type();
2059}
2060
2061static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC,
2062 const InterpFrame *Frame,
2063 const CallExpr *Call, unsigned ID) {
2064 assert(Call->getNumArgs() == 3);
2065 uint64_t Size;
2066 if (!popToUInt64(S, E: Call->getArg(Arg: 2), Out&: Size))
2067 return false;
2068 const Pointer &PtrB = S.Stk.pop<Pointer>();
2069 const Pointer &PtrA = S.Stk.pop<Pointer>();
2070
2071 if (ID == Builtin::BImemcmp || ID == Builtin::BIbcmp ||
2072 ID == Builtin::BIwmemcmp)
2073 diagnoseNonConstexprBuiltin(S, OpPC, ID);
2074
2075 if (Size == 0) {
2076 pushInteger(S, Val: 0, QT: Call->getType());
2077 return true;
2078 }
2079
2080 if (!PtrA.isBlockPointer() || !PtrB.isBlockPointer())
2081 return false;
2082
2083 bool IsWide =
2084 (ID == Builtin::BIwmemcmp || ID == Builtin::BI__builtin_wmemcmp);
2085
2086 const ASTContext &ASTCtx = S.getASTContext();
2087 QualType ElemTypeA = getElemType(P: PtrA);
2088 QualType ElemTypeB = getElemType(P: PtrB);
2089 // FIXME: This is an arbitrary limitation the current constant interpreter
2090 // had. We could remove this.
2091 if (!IsWide && (!isOneByteCharacterType(T: ElemTypeA) ||
2092 !isOneByteCharacterType(T: ElemTypeB))) {
2093 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
2094 DiagId: diag::note_constexpr_memcmp_unsupported)
2095 << ASTCtx.BuiltinInfo.getQuotedName(ID) << PtrA.getType()
2096 << PtrB.getType();
2097 return false;
2098 }
2099
2100 if (!CheckLoad(S, OpPC, Ptr: PtrA, AK: AK_Read) || !CheckLoad(S, OpPC, Ptr: PtrB, AK: AK_Read))
2101 return false;
2102
2103 // Now, read both pointers to a buffer and compare those.
2104 BitcastBuffer BufferA(
2105 Bits(ASTCtx.getTypeSize(T: ElemTypeA) * PtrA.getNumElems()));
2106 readPointerToBuffer(Ctx: S.getContext(), FromPtr: PtrA, Buffer&: BufferA, ReturnOnUninit: false);
2107
2108 // FIXME: The swapping here is UNDOING something we do when reading the
2109 // data into the buffer.
2110 if (ASTCtx.getTargetInfo().isBigEndian())
2111 swapBytes(M: BufferA.Data.get(), N: BufferA.byteSize().getQuantity());
2112
2113 BitcastBuffer BufferB(
2114 Bits(ASTCtx.getTypeSize(T: ElemTypeB) * PtrB.getNumElems()));
2115 readPointerToBuffer(Ctx: S.getContext(), FromPtr: PtrB, Buffer&: BufferB, ReturnOnUninit: false);
2116 // FIXME: The swapping here is UNDOING something we do when reading the
2117 // data into the buffer.
2118 if (ASTCtx.getTargetInfo().isBigEndian())
2119 swapBytes(M: BufferB.Data.get(), N: BufferB.byteSize().getQuantity());
2120
2121 size_t MinBufferSize = std::min(a: BufferA.byteSize().getQuantity(),
2122 b: BufferB.byteSize().getQuantity());
2123
2124 unsigned ElemSize = 1;
2125 if (IsWide)
2126 ElemSize = ASTCtx.getTypeSizeInChars(T: ASTCtx.getWCharType()).getQuantity();
2127 // The Size given for the wide variants is in wide-char units. Convert it
2128 // to bytes.
2129 size_t ByteSize = Size * ElemSize;
2130 size_t CmpSize = std::min(a: MinBufferSize, b: ByteSize);
2131
2132 for (size_t I = 0; I != CmpSize; I += ElemSize) {
2133 if (IsWide) {
2134 FIXED_SIZE_INT_TYPE_SWITCH(
2135 *S.getContext().classify(ASTCtx.getWCharType()), {
2136 T A = T::bitcastFromMemory(BufferA.atByte(I), T::bitWidth());
2137 T B = T::bitcastFromMemory(BufferB.atByte(I), T::bitWidth());
2138 if (A < B) {
2139 pushInteger(S, -1, Call->getType());
2140 return true;
2141 }
2142 if (A > B) {
2143 pushInteger(S, 1, Call->getType());
2144 return true;
2145 }
2146 });
2147 } else {
2148 auto A = BufferA.deref<std::byte>(Offset: Bytes(I));
2149 auto B = BufferB.deref<std::byte>(Offset: Bytes(I));
2150
2151 if (A < B) {
2152 pushInteger(S, Val: -1, QT: Call->getType());
2153 return true;
2154 }
2155 if (A > B) {
2156 pushInteger(S, Val: 1, QT: Call->getType());
2157 return true;
2158 }
2159 }
2160 }
2161
2162 // We compared CmpSize bytes above. If the limiting factor was the Size
2163 // passed, we're done and the result is equality (0).
2164 if (ByteSize <= CmpSize) {
2165 pushInteger(S, Val: 0, QT: Call->getType());
2166 return true;
2167 }
2168
2169 // However, if we read all the available bytes but were instructed to read
2170 // even more, diagnose this as a "read of dereferenced one-past-the-end
2171 // pointer". This is what would happen if we called CheckLoad() on every array
2172 // element.
2173 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_access_past_end)
2174 << AK_Read << S.Current->getRange(PC: OpPC);
2175 return false;
2176}
2177
2178// __builtin_memchr(ptr, int, int)
2179// __builtin_strchr(ptr, int)
2180static bool interp__builtin_memchr(InterpState &S, CodePtr OpPC,
2181 const CallExpr *Call, unsigned ID) {
2182 if (ID == Builtin::BImemchr || ID == Builtin::BIwcschr ||
2183 ID == Builtin::BIstrchr || ID == Builtin::BIwmemchr)
2184 diagnoseNonConstexprBuiltin(S, OpPC, ID);
2185
2186 std::optional<APSInt> MaxLength;
2187 if (Call->getNumArgs() == 3) {
2188 APSInt MaxLengthVal;
2189 if (!popToAPSInt(S, E: Call->getArg(Arg: 2), Out&: MaxLengthVal))
2190 return false;
2191 MaxLength = MaxLengthVal;
2192 }
2193
2194 APSInt Desired;
2195 if (!popToAPSInt(S, E: Call->getArg(Arg: 1), Out&: Desired))
2196 return false;
2197 const Pointer &Ptr = S.Stk.pop<Pointer>();
2198
2199 if (MaxLength && MaxLength->isZero()) {
2200 S.Stk.push<Pointer>();
2201 return true;
2202 }
2203
2204 if (Ptr.isDummy()) {
2205 if (Ptr.getType()->isIncompleteType())
2206 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
2207 DiagId: diag::note_constexpr_ltor_incomplete_type)
2208 << Ptr.getType();
2209 return false;
2210 }
2211
2212 // Null is only okay if the given size is 0.
2213 if (Ptr.isZero()) {
2214 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_access_null)
2215 << AK_Read;
2216 return false;
2217 }
2218
2219 if (!Ptr.isBlockPointer())
2220 return false;
2221
2222 QualType ElemTy = Ptr.getFieldDesc()->isArray()
2223 ? Ptr.getFieldDesc()->getElemQualType()
2224 : Ptr.getFieldDesc()->getType();
2225 bool IsRawByte = ID == Builtin::BImemchr || ID == Builtin::BI__builtin_memchr;
2226
2227 // Give up on byte-oriented matching against multibyte elements.
2228 if (IsRawByte && !isOneByteCharacterType(T: ElemTy)) {
2229 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
2230 DiagId: diag::note_constexpr_memchr_unsupported)
2231 << S.getASTContext().BuiltinInfo.getQuotedName(ID) << ElemTy;
2232 return false;
2233 }
2234
2235 if (!isReadable(P: Ptr))
2236 return false;
2237
2238 if (ID == Builtin::BIstrchr || ID == Builtin::BI__builtin_strchr) {
2239 int64_t DesiredTrunc;
2240 if (S.getASTContext().CharTy->isSignedIntegerType())
2241 DesiredTrunc =
2242 Desired.trunc(width: S.getASTContext().getCharWidth()).getSExtValue();
2243 else
2244 DesiredTrunc =
2245 Desired.trunc(width: S.getASTContext().getCharWidth()).getZExtValue();
2246 // strchr compares directly to the passed integer, and therefore
2247 // always fails if given an int that is not a char.
2248 if (Desired != DesiredTrunc) {
2249 S.Stk.push<Pointer>();
2250 return true;
2251 }
2252 }
2253
2254 uint64_t DesiredVal;
2255 if (ID == Builtin::BIwmemchr || ID == Builtin::BI__builtin_wmemchr ||
2256 ID == Builtin::BIwcschr || ID == Builtin::BI__builtin_wcschr) {
2257 // wcschr and wmemchr are given a wchar_t to look for. Just use it.
2258 DesiredVal = Desired.getZExtValue();
2259 } else {
2260 DesiredVal = Desired.trunc(width: S.getASTContext().getCharWidth()).getZExtValue();
2261 }
2262
2263 bool StopAtZero =
2264 (ID == Builtin::BIstrchr || ID == Builtin::BI__builtin_strchr ||
2265 ID == Builtin::BIwcschr || ID == Builtin::BI__builtin_wcschr);
2266
2267 PrimType ElemT =
2268 IsRawByte ? PT_Sint8 : *S.getContext().classify(T: getElemType(P: Ptr));
2269
2270 size_t Index = Ptr.getIndex();
2271 size_t Step = 0;
2272 for (;;) {
2273 const Pointer &ElemPtr =
2274 (Index + Step) > 0 ? Ptr.atIndex(Idx: Index + Step) : Ptr;
2275
2276 if (!CheckLoad(S, OpPC, Ptr: ElemPtr))
2277 return false;
2278
2279 uint64_t V;
2280 INT_TYPE_SWITCH_NO_BOOL(
2281 ElemT, { V = static_cast<uint64_t>(ElemPtr.deref<T>().toUnsigned()); });
2282
2283 if (V == DesiredVal) {
2284 S.Stk.push<Pointer>(Args: ElemPtr);
2285 return true;
2286 }
2287
2288 if (StopAtZero && V == 0)
2289 break;
2290
2291 ++Step;
2292 if (MaxLength && Step == MaxLength->getZExtValue())
2293 break;
2294 }
2295
2296 S.Stk.push<Pointer>();
2297 return true;
2298}
2299
2300static std::optional<unsigned> computeFullDescSize(const ASTContext &ASTCtx,
2301 const Descriptor *Desc) {
2302 if (Desc->isPrimitive())
2303 return ASTCtx.getTypeSizeInChars(T: Desc->getType()).getQuantity();
2304 if (Desc->isArray())
2305 return ASTCtx.getTypeSizeInChars(T: Desc->getElemQualType()).getQuantity() *
2306 Desc->getNumElems();
2307 if (Desc->isRecord()) {
2308 // Can't use Descriptor::getType() as that may return a pointer type. Look
2309 // at the decl directly.
2310 return ASTCtx
2311 .getTypeSizeInChars(
2312 T: ASTCtx.getCanonicalTagType(TD: Desc->ElemRecord->getDecl()))
2313 .getQuantity();
2314 }
2315
2316 return std::nullopt;
2317}
2318
2319/// Compute the byte offset of \p Ptr in the full declaration.
2320static unsigned computePointerOffset(const ASTContext &ASTCtx,
2321 const Pointer &Ptr) {
2322 unsigned Result = 0;
2323
2324 Pointer P = Ptr;
2325 while (P.isField() || P.isArrayElement()) {
2326 P = P.expand();
2327 const Descriptor *D = P.getFieldDesc();
2328
2329 if (P.isArrayElement()) {
2330 unsigned ElemSize =
2331 ASTCtx.getTypeSizeInChars(T: D->getElemQualType()).getQuantity();
2332 if (P.isOnePastEnd())
2333 Result += ElemSize * P.getNumElems();
2334 else
2335 Result += ElemSize * P.getIndex();
2336 P = P.expand().getArray();
2337 } else if (P.isBaseClass()) {
2338 const auto *RD = cast<CXXRecordDecl>(Val: D->asDecl());
2339 bool IsVirtual = Ptr.isVirtualBaseClass();
2340 P = P.getBase();
2341 const Record *BaseRecord = P.getRecord();
2342
2343 const ASTRecordLayout &Layout =
2344 ASTCtx.getASTRecordLayout(D: cast<CXXRecordDecl>(Val: BaseRecord->getDecl()));
2345 if (IsVirtual)
2346 Result += Layout.getVBaseClassOffset(VBase: RD).getQuantity();
2347 else
2348 Result += Layout.getBaseClassOffset(Base: RD).getQuantity();
2349 } else if (P.isField()) {
2350 const FieldDecl *FD = P.getField();
2351 const ASTRecordLayout &Layout =
2352 ASTCtx.getASTRecordLayout(D: FD->getParent());
2353 unsigned FieldIndex = FD->getFieldIndex();
2354 uint64_t FieldOffset =
2355 ASTCtx.toCharUnitsFromBits(BitSize: Layout.getFieldOffset(FieldNo: FieldIndex))
2356 .getQuantity();
2357 Result += FieldOffset;
2358 P = P.getBase();
2359 } else
2360 llvm_unreachable("Unhandled descriptor type");
2361 }
2362
2363 return Result;
2364}
2365
2366/// Does Ptr point to the last subobject?
2367static bool pointsToLastObject(const Pointer &Ptr) {
2368 Pointer P = Ptr;
2369 while (!P.isRoot()) {
2370
2371 if (P.isArrayElement()) {
2372 P = P.expand().getArray();
2373 continue;
2374 }
2375 if (P.isBaseClass()) {
2376 if (P.getRecord()->getNumFields() > 0)
2377 return false;
2378 P = P.getBase();
2379 continue;
2380 }
2381
2382 Pointer Base = P.getBase();
2383 if (const Record *R = Base.getRecord()) {
2384 assert(P.getField());
2385 if (P.getField()->getFieldIndex() != R->getNumFields() - 1)
2386 return false;
2387 }
2388 P = Base;
2389 }
2390
2391 return true;
2392}
2393
2394/// Does Ptr point to the last object AND to a flexible array member?
2395static bool isUserWritingOffTheEnd(const ASTContext &Ctx, const Pointer &Ptr,
2396 bool InvalidBase) {
2397 auto isFlexibleArrayMember = [&](const Descriptor *FieldDesc) {
2398 using FAMKind = LangOptions::StrictFlexArraysLevelKind;
2399 FAMKind StrictFlexArraysLevel =
2400 Ctx.getLangOpts().getStrictFlexArraysLevel();
2401
2402 if (StrictFlexArraysLevel == FAMKind::Default)
2403 return true;
2404
2405 unsigned NumElems = FieldDesc->getNumElems();
2406 if (NumElems == 0 && StrictFlexArraysLevel != FAMKind::IncompleteOnly)
2407 return true;
2408
2409 if (NumElems == 1 && StrictFlexArraysLevel == FAMKind::OneZeroOrIncomplete)
2410 return true;
2411 return false;
2412 };
2413
2414 const Descriptor *FieldDesc = Ptr.getFieldDesc();
2415 if (!FieldDesc->isArray())
2416 return false;
2417
2418 return InvalidBase && pointsToLastObject(Ptr) &&
2419 isFlexibleArrayMember(FieldDesc);
2420}
2421
2422UnsignedOrNone evaluateBuiltinObjectSize(const ASTContext &ASTCtx,
2423 unsigned Kind, Pointer &Ptr) {
2424 if (Ptr.isZero() || !Ptr.isBlockPointer())
2425 return std::nullopt;
2426
2427 if (Ptr.isDummy() && Ptr.getType()->isPointerType())
2428 return std::nullopt;
2429
2430 bool InvalidBase = false;
2431
2432 if (Ptr.isDummy()) {
2433 if (const VarDecl *VD = Ptr.getDeclDesc()->asVarDecl();
2434 VD && VD->getType()->isPointerType())
2435 InvalidBase = true;
2436 }
2437
2438 // According to the GCC documentation, we want the size of the subobject
2439 // denoted by the pointer. But that's not quite right -- what we actually
2440 // want is the size of the immediately-enclosing array, if there is one.
2441 if (Ptr.isArrayElement())
2442 Ptr = Ptr.expand();
2443
2444 bool DetermineForCompleteObject = Ptr.getFieldDesc() == Ptr.getDeclDesc();
2445 const Descriptor *DeclDesc = Ptr.getDeclDesc();
2446 assert(DeclDesc);
2447
2448 bool UseFieldDesc = (Kind & 1u);
2449 bool ReportMinimum = (Kind & 2u);
2450 if (!UseFieldDesc || DetermineForCompleteObject) {
2451 // Can't read beyond the pointer decl desc.
2452 if (!ReportMinimum && DeclDesc->getType()->isPointerType())
2453 return std::nullopt;
2454
2455 if (InvalidBase)
2456 return std::nullopt;
2457 } else {
2458 if (isUserWritingOffTheEnd(Ctx: ASTCtx, Ptr, InvalidBase)) {
2459 // If we cannot determine the size of the initial allocation, then we
2460 // can't given an accurate upper-bound. However, we are still able to give
2461 // conservative lower-bounds for Type=3.
2462 if (Kind == 1)
2463 return std::nullopt;
2464 }
2465 // For Type=1, defer to the runtime path on a true incomplete-array
2466 // flexible array member (e.g. 'char fam[]') even when the base is a
2467 // concrete local/global. Without this, the bytecode interpreter would
2468 // happily fold &af.fam to 'NumElems * elemSize = 0' below; the default
2469 // const-evaluator avoids the same trap, and CGBuiltin emits
2470 // @llvm.objectsize for the correct layout-derived answer (matching
2471 // GCC's __bos/__bdos on '&af.fam').
2472 if (Kind == 1 && pointsToLastObject(Ptr) && Ptr.getFieldDesc()->isArray() &&
2473 Ptr.getFieldDesc()->getType()->isIncompleteArrayType())
2474 return std::nullopt;
2475 }
2476
2477 // The "closest surrounding subobject" is NOT a base class,
2478 // so strip the base class casts.
2479 if (UseFieldDesc && Ptr.isBaseClass())
2480 Ptr = Ptr.stripBaseCasts();
2481
2482 const Descriptor *Desc = UseFieldDesc ? Ptr.getFieldDesc() : DeclDesc;
2483 assert(Desc);
2484
2485 std::optional<unsigned> FullSize = computeFullDescSize(ASTCtx, Desc);
2486 if (!FullSize)
2487 return std::nullopt;
2488
2489 unsigned ByteOffset;
2490 if (UseFieldDesc) {
2491 if (Ptr.isBaseClass()) {
2492 assert(computePointerOffset(ASTCtx, Ptr.getBase()) <=
2493 computePointerOffset(ASTCtx, Ptr));
2494 ByteOffset = computePointerOffset(ASTCtx, Ptr: Ptr.getBase()) -
2495 computePointerOffset(ASTCtx, Ptr);
2496 } else {
2497 if (Ptr.inArray())
2498 ByteOffset =
2499 computePointerOffset(ASTCtx, Ptr) -
2500 computePointerOffset(ASTCtx, Ptr: Ptr.expand().atIndex(Idx: 0).narrow());
2501 else
2502 ByteOffset = 0;
2503 }
2504 } else
2505 ByteOffset = computePointerOffset(ASTCtx, Ptr);
2506
2507 assert(ByteOffset <= *FullSize);
2508 return *FullSize - ByteOffset;
2509}
2510
2511static bool interp__builtin_object_size(InterpState &S, CodePtr OpPC,
2512 const InterpFrame *Frame,
2513 const CallExpr *Call) {
2514 const ASTContext &ASTCtx = S.getASTContext();
2515 // From the GCC docs:
2516 // Kind is an integer constant from 0 to 3. If the least significant bit is
2517 // clear, objects are whole variables. If it is set, a closest surrounding
2518 // subobject is considered the object a pointer points to. The second bit
2519 // determines if maximum or minimum of remaining bytes is computed.
2520 uint64_t Kind;
2521 if (!popToUInt64(S, E: Call->getArg(Arg: 1), Out&: Kind))
2522 return false;
2523 assert(Kind <= 3 && "unexpected kind");
2524 Pointer Ptr = S.Stk.pop<Pointer>();
2525
2526 if (Call->getArg(Arg: 0)->HasSideEffects(Ctx: ASTCtx)) {
2527 // "If there are any side effects in them, it returns (size_t) -1
2528 // for type 0 or 1 and (size_t) 0 for type 2 or 3."
2529 pushInteger(S, Val: Kind <= 1 ? -1 : 0, QT: Call->getType());
2530 return true;
2531 }
2532
2533 if (auto Result = evaluateBuiltinObjectSize(ASTCtx, Kind, Ptr)) {
2534 pushInteger(S, Val: *Result, QT: Call->getType());
2535 return true;
2536 }
2537 return false;
2538}
2539
2540static bool interp__builtin_is_within_lifetime(InterpState &S, CodePtr OpPC,
2541 const CallExpr *Call) {
2542
2543 if (!S.inConstantContext())
2544 return false;
2545
2546 const Pointer &Ptr = S.Stk.pop<Pointer>();
2547
2548 auto Error = [&](int Diag) {
2549 bool CalledFromStd = false;
2550 const auto *Callee = S.Current->getCallee();
2551 if (Callee && Callee->isInStdNamespace()) {
2552 const IdentifierInfo *Identifier = Callee->getIdentifier();
2553 CalledFromStd = Identifier && Identifier->isStr(Str: "is_within_lifetime");
2554 }
2555 S.CCEDiag(SI: CalledFromStd
2556 ? S.Current->Caller->getSource(PC: S.Current->getRetPC())
2557 : S.Current->getSource(PC: OpPC),
2558 DiagId: diag::err_invalid_is_within_lifetime)
2559 << (CalledFromStd ? "std::is_within_lifetime"
2560 : "__builtin_is_within_lifetime")
2561 << Diag;
2562 return false;
2563 };
2564
2565 if (Ptr.isZero())
2566 return Error(0);
2567 if (Ptr.isOnePastEnd())
2568 return Error(1);
2569
2570 bool Result = Ptr.getLifetime() != Lifetime::Ended;
2571 if (!Ptr.isActive()) {
2572 Result = false;
2573 } else {
2574 if (!CheckLive(S, OpPC, Ptr, AK: AK_Read))
2575 return false;
2576 if (!CheckMutable(S, OpPC, Ptr))
2577 return false;
2578 if (!CheckDummy(S, OpPC, B: Ptr.block(), AK: AK_Read))
2579 return false;
2580 }
2581
2582 // Check if we're currently running an initializer.
2583 if (S.initializingBlock(B: Ptr.block()))
2584 return Error(2);
2585 if (S.EvaluatingDecl && Ptr.getDeclDesc()->asVarDecl() == S.EvaluatingDecl)
2586 return Error(2);
2587
2588 pushInteger(S, Val: Result, QT: Call->getType());
2589 return true;
2590}
2591
2592static bool interp__builtin_elementwise_int_unaryop(
2593 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2594 llvm::function_ref<APInt(const APSInt &)> Fn) {
2595 assert(Call->getNumArgs() == 1);
2596
2597 // Single integer case.
2598 if (!Call->getArg(Arg: 0)->getType()->isVectorType()) {
2599 assert(Call->getType()->isIntegerType());
2600 APSInt Src;
2601 if (!popToAPSInt(S, E: Call->getArg(Arg: 0), Out&: Src))
2602 return false;
2603 APInt Result = Fn(Src);
2604 pushInteger(S, Val: APSInt(std::move(Result), !Src.isSigned()), QT: Call->getType());
2605 return true;
2606 }
2607
2608 // Vector case.
2609 const Pointer &Arg = S.Stk.pop<Pointer>();
2610 assert(Arg.getFieldDesc()->isPrimitiveArray());
2611 const Pointer &Dst = S.Stk.peek<Pointer>();
2612 assert(Dst.getFieldDesc()->isPrimitiveArray());
2613 assert(Arg.getFieldDesc()->getNumElems() ==
2614 Dst.getFieldDesc()->getNumElems());
2615
2616 QualType ElemType = Arg.getFieldDesc()->getElemQualType();
2617 PrimType ElemT = *S.getContext().classify(T: ElemType);
2618 unsigned NumElems = Arg.getNumElems();
2619 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2620
2621 for (unsigned I = 0; I != NumElems; ++I) {
2622 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2623 APSInt Src = Arg.elem<T>(I).toAPSInt();
2624 APInt Result = Fn(Src);
2625 Dst.elem<T>(I) = static_cast<T>(APSInt(std::move(Result), DestUnsigned));
2626 });
2627 }
2628 Dst.initializeAllElements();
2629
2630 return true;
2631}
2632
2633static bool interp__builtin_elementwise_fp_binop(
2634 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2635 llvm::function_ref<std::optional<APFloat>(
2636 const APFloat &, const APFloat &, std::optional<APSInt> RoundingMode)>
2637 Fn,
2638 bool IsScalar = false) {
2639 assert((Call->getNumArgs() == 2) || (Call->getNumArgs() == 3));
2640 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2641 assert(VT->getElementType()->isFloatingType());
2642 unsigned NumElems = VT->getNumElements();
2643
2644 // Vector case.
2645 assert(Call->getArg(0)->getType()->isVectorType() &&
2646 Call->getArg(1)->getType()->isVectorType());
2647 assert(VT->getElementType() ==
2648 Call->getArg(1)->getType()->castAs<VectorType>()->getElementType());
2649 assert(VT->getNumElements() ==
2650 Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements());
2651
2652 std::optional<APSInt> RoundingMode = std::nullopt;
2653 if (Call->getNumArgs() == 3) {
2654 APSInt RoundingModeVal;
2655 if (!popToAPSInt(S, E: Call->getArg(Arg: 2), Out&: RoundingModeVal))
2656 return false;
2657 RoundingMode = RoundingModeVal;
2658 }
2659
2660 const Pointer &BPtr = S.Stk.pop<Pointer>();
2661 const Pointer &APtr = S.Stk.pop<Pointer>();
2662 const Pointer &Dst = S.Stk.peek<Pointer>();
2663 for (unsigned ElemIdx = 0; ElemIdx != NumElems; ++ElemIdx) {
2664 using T = PrimConv<PT_Float>::T;
2665 if (IsScalar && ElemIdx > 0) {
2666 Dst.elem<T>(I: ElemIdx) = APtr.elem<T>(I: ElemIdx);
2667 continue;
2668 }
2669 APFloat ElemA = APtr.elem<T>(I: ElemIdx).getAPFloat();
2670 APFloat ElemB = BPtr.elem<T>(I: ElemIdx).getAPFloat();
2671 std::optional<APFloat> Result = Fn(ElemA, ElemB, RoundingMode);
2672 if (!Result)
2673 return false;
2674 Dst.elem<T>(I: ElemIdx) = static_cast<T>(*Result);
2675 }
2676
2677 Dst.initializeAllElements();
2678
2679 return true;
2680}
2681
2682static bool interp__builtin_scalar_fp_round_mask_binop(
2683 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2684 llvm::function_ref<std::optional<APFloat>(const APFloat &, const APFloat &,
2685 std::optional<APSInt>)>
2686 Fn) {
2687 assert(Call->getNumArgs() == 5);
2688 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2689 unsigned NumElems = VT->getNumElements();
2690
2691 APSInt RoundingMode;
2692 if (!popToAPSInt(S, E: Call->getArg(Arg: 4), Out&: RoundingMode))
2693 return false;
2694 uint64_t MaskVal;
2695 if (!popToUInt64(S, E: Call->getArg(Arg: 3), Out&: MaskVal))
2696 return false;
2697 const Pointer &SrcPtr = S.Stk.pop<Pointer>();
2698 const Pointer &BPtr = S.Stk.pop<Pointer>();
2699 const Pointer &APtr = S.Stk.pop<Pointer>();
2700 const Pointer &Dst = S.Stk.peek<Pointer>();
2701
2702 using T = PrimConv<PT_Float>::T;
2703
2704 if (MaskVal & 1) {
2705 APFloat ElemA = APtr.elem<T>(I: 0).getAPFloat();
2706 APFloat ElemB = BPtr.elem<T>(I: 0).getAPFloat();
2707 std::optional<APFloat> Result = Fn(ElemA, ElemB, RoundingMode);
2708 if (!Result)
2709 return false;
2710 Dst.elem<T>(I: 0) = static_cast<T>(*Result);
2711 } else {
2712 Dst.elem<T>(I: 0) = SrcPtr.elem<T>(I: 0);
2713 }
2714
2715 for (unsigned I = 1; I < NumElems; ++I)
2716 Dst.elem<T>(I) = APtr.elem<T>(I);
2717
2718 Dst.initializeAllElements();
2719
2720 return true;
2721}
2722
2723static bool interp__builtin_elementwise_int_binop(
2724 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2725 llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
2726 assert(Call->getNumArgs() == 2);
2727
2728 // Single integer case.
2729 if (!Call->getArg(Arg: 0)->getType()->isVectorType()) {
2730 assert(!Call->getArg(1)->getType()->isVectorType());
2731 APSInt RHS;
2732 if (!popToAPSInt(S, E: Call->getArg(Arg: 1), Out&: RHS))
2733 return false;
2734 APSInt LHS;
2735 if (!popToAPSInt(S, E: Call->getArg(Arg: 0), Out&: LHS))
2736 return false;
2737 APInt Result = Fn(LHS, RHS);
2738 pushInteger(S, Val: APSInt(std::move(Result), !LHS.isSigned()), QT: Call->getType());
2739 return true;
2740 }
2741
2742 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2743 assert(VT->getElementType()->isIntegralOrEnumerationType());
2744 PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2745 unsigned NumElems = VT->getNumElements();
2746 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2747
2748 // Vector + Scalar case.
2749 if (!Call->getArg(Arg: 1)->getType()->isVectorType()) {
2750 assert(Call->getArg(1)->getType()->isIntegralOrEnumerationType());
2751
2752 APSInt RHS;
2753 if (!popToAPSInt(S, E: Call->getArg(Arg: 1), Out&: RHS))
2754 return false;
2755 const Pointer &LHS = S.Stk.pop<Pointer>();
2756 const Pointer &Dst = S.Stk.peek<Pointer>();
2757
2758 for (unsigned I = 0; I != NumElems; ++I) {
2759 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2760 Dst.elem<T>(I) = static_cast<T>(
2761 APSInt(Fn(LHS.elem<T>(I).toAPSInt(), RHS), DestUnsigned));
2762 });
2763 }
2764 Dst.initializeAllElements();
2765 return true;
2766 }
2767
2768 // Vector case.
2769 assert(Call->getArg(0)->getType()->isVectorType() &&
2770 Call->getArg(1)->getType()->isVectorType());
2771 assert(VT->getElementType() ==
2772 Call->getArg(1)->getType()->castAs<VectorType>()->getElementType());
2773 assert(VT->getNumElements() ==
2774 Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements());
2775 assert(VT->getElementType()->isIntegralOrEnumerationType());
2776
2777 const Pointer &RHS = S.Stk.pop<Pointer>();
2778 const Pointer &LHS = S.Stk.pop<Pointer>();
2779 const Pointer &Dst = S.Stk.peek<Pointer>();
2780 for (unsigned I = 0; I != NumElems; ++I) {
2781 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2782 APSInt Elem1 = LHS.elem<T>(I).toAPSInt();
2783 APSInt Elem2 = RHS.elem<T>(I).toAPSInt();
2784 Dst.elem<T>(I) = static_cast<T>(APSInt(Fn(Elem1, Elem2), DestUnsigned));
2785 });
2786 }
2787 Dst.initializeAllElements();
2788
2789 return true;
2790}
2791
2792static bool
2793interp__builtin_ia32_pack(InterpState &S, CodePtr, const CallExpr *E,
2794 llvm::function_ref<APInt(const APSInt &)> PackFn) {
2795 const auto *VT0 = E->getArg(Arg: 0)->getType()->castAs<VectorType>();
2796 [[maybe_unused]] const auto *VT1 =
2797 E->getArg(Arg: 1)->getType()->castAs<VectorType>();
2798 assert(VT0 && VT1 && "pack builtin VT0 and VT1 must be VectorType");
2799 assert(VT0->getElementType() == VT1->getElementType() &&
2800 VT0->getNumElements() == VT1->getNumElements() &&
2801 "pack builtin VT0 and VT1 ElementType must be same");
2802
2803 const Pointer &RHS = S.Stk.pop<Pointer>();
2804 const Pointer &LHS = S.Stk.pop<Pointer>();
2805 const Pointer &Dst = S.Stk.peek<Pointer>();
2806
2807 const ASTContext &ASTCtx = S.getASTContext();
2808 unsigned SrcBits = ASTCtx.getIntWidth(T: VT0->getElementType());
2809 unsigned LHSVecLen = VT0->getNumElements();
2810 unsigned SrcPerLane = 128 / SrcBits;
2811 unsigned Lanes = LHSVecLen * SrcBits / 128;
2812
2813 PrimType SrcT = *S.getContext().classify(T: VT0->getElementType());
2814 PrimType DstT = *S.getContext().classify(T: getElemType(P: Dst));
2815 bool IsUnsigend = getElemType(P: Dst)->isUnsignedIntegerType();
2816
2817 for (unsigned Lane = 0; Lane != Lanes; ++Lane) {
2818 unsigned BaseSrc = Lane * SrcPerLane;
2819 unsigned BaseDst = Lane * (2 * SrcPerLane);
2820
2821 for (unsigned I = 0; I != SrcPerLane; ++I) {
2822 INT_TYPE_SWITCH_NO_BOOL(SrcT, {
2823 APSInt A = LHS.elem<T>(BaseSrc + I).toAPSInt();
2824 APSInt B = RHS.elem<T>(BaseSrc + I).toAPSInt();
2825
2826 assignIntegral(S, Dst.atIndex(BaseDst + I), DstT,
2827 APSInt(PackFn(A), IsUnsigend));
2828 assignIntegral(S, Dst.atIndex(BaseDst + SrcPerLane + I), DstT,
2829 APSInt(PackFn(B), IsUnsigend));
2830 });
2831 }
2832 }
2833
2834 Dst.initializeAllElements();
2835 return true;
2836}
2837
2838static bool interp__builtin_elementwise_maxmin(InterpState &S, CodePtr OpPC,
2839 const CallExpr *Call,
2840 unsigned BuiltinID) {
2841 assert(Call->getNumArgs() == 2);
2842
2843 QualType Arg0Type = Call->getArg(Arg: 0)->getType();
2844
2845 // TODO: Support floating-point types.
2846 if (!(Arg0Type->isIntegerType() ||
2847 (Arg0Type->isVectorType() &&
2848 Arg0Type->castAs<VectorType>()->getElementType()->isIntegerType())))
2849 return false;
2850
2851 if (!Arg0Type->isVectorType()) {
2852 assert(!Call->getArg(1)->getType()->isVectorType());
2853 APSInt RHS;
2854 if (!popToAPSInt(S, E: Call->getArg(Arg: 1), Out&: RHS))
2855 return false;
2856 APSInt LHS;
2857 if (!popToAPSInt(S, T: Arg0Type, Out&: LHS))
2858 return false;
2859 APInt Result;
2860 if (BuiltinID == Builtin::BI__builtin_elementwise_max) {
2861 Result = std::max(a: LHS, b: RHS);
2862 } else if (BuiltinID == Builtin::BI__builtin_elementwise_min) {
2863 Result = std::min(a: LHS, b: RHS);
2864 } else {
2865 llvm_unreachable("Wrong builtin ID");
2866 }
2867
2868 pushInteger(S, Val: APSInt(Result, !LHS.isSigned()), QT: Call->getType());
2869 return true;
2870 }
2871
2872 // Vector case.
2873 assert(Call->getArg(0)->getType()->isVectorType() &&
2874 Call->getArg(1)->getType()->isVectorType());
2875 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2876 assert(VT->getElementType() ==
2877 Call->getArg(1)->getType()->castAs<VectorType>()->getElementType());
2878 assert(VT->getNumElements() ==
2879 Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements());
2880 assert(VT->getElementType()->isIntegralOrEnumerationType());
2881
2882 const Pointer &RHS = S.Stk.pop<Pointer>();
2883 const Pointer &LHS = S.Stk.pop<Pointer>();
2884 const Pointer &Dst = S.Stk.peek<Pointer>();
2885 PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2886 unsigned NumElems = VT->getNumElements();
2887 for (unsigned I = 0; I != NumElems; ++I) {
2888 APSInt Elem1;
2889 APSInt Elem2;
2890 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2891 Elem1 = LHS.elem<T>(I).toAPSInt();
2892 Elem2 = RHS.elem<T>(I).toAPSInt();
2893 });
2894
2895 APSInt Result;
2896 if (BuiltinID == Builtin::BI__builtin_elementwise_max) {
2897 Result = APSInt(std::max(a: Elem1, b: Elem2),
2898 Call->getType()->isUnsignedIntegerOrEnumerationType());
2899 } else if (BuiltinID == Builtin::BI__builtin_elementwise_min) {
2900 Result = APSInt(std::min(a: Elem1, b: Elem2),
2901 Call->getType()->isUnsignedIntegerOrEnumerationType());
2902 } else {
2903 llvm_unreachable("Wrong builtin ID");
2904 }
2905
2906 INT_TYPE_SWITCH_NO_BOOL(ElemT,
2907 { Dst.elem<T>(I) = static_cast<T>(Result); });
2908 }
2909 Dst.initializeAllElements();
2910
2911 return true;
2912}
2913
2914static bool interp__builtin_ia32_pmul(
2915 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2916 llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &,
2917 const APSInt &)>
2918 Fn) {
2919 assert(Call->getArg(0)->getType()->isVectorType() &&
2920 Call->getArg(1)->getType()->isVectorType());
2921 const Pointer &RHS = S.Stk.pop<Pointer>();
2922 const Pointer &LHS = S.Stk.pop<Pointer>();
2923 const Pointer &Dst = S.Stk.peek<Pointer>();
2924
2925 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2926 PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2927 unsigned NumElems = VT->getNumElements();
2928 const auto *DestVT = Call->getType()->castAs<VectorType>();
2929 PrimType DestElemT = *S.getContext().classify(T: DestVT->getElementType());
2930 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2931
2932 unsigned DstElem = 0;
2933 for (unsigned I = 0; I != NumElems; I += 2) {
2934 APSInt Result;
2935 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2936 APSInt LoLHS = LHS.elem<T>(I).toAPSInt();
2937 APSInt HiLHS = LHS.elem<T>(I + 1).toAPSInt();
2938 APSInt LoRHS = RHS.elem<T>(I).toAPSInt();
2939 APSInt HiRHS = RHS.elem<T>(I + 1).toAPSInt();
2940 Result = APSInt(Fn(LoLHS, HiLHS, LoRHS, HiRHS), DestUnsigned);
2941 });
2942
2943 INT_TYPE_SWITCH_NO_BOOL(DestElemT,
2944 { Dst.elem<T>(DstElem) = static_cast<T>(Result); });
2945 ++DstElem;
2946 }
2947
2948 Dst.initializeAllElements();
2949 return true;
2950}
2951
2952static bool interp__builtin_ia32_dbpsadbw(InterpState &S, CodePtr OpPC,
2953 const CallExpr *Call) {
2954 assert(Call->getNumArgs() == 3);
2955 uint64_t Imm;
2956 if (!popToUInt64(S, E: Call->getArg(Arg: 2), Out&: Imm))
2957 return false;
2958
2959 const Pointer &Src2 = S.Stk.pop<Pointer>();
2960 const Pointer &Src1 = S.Stk.pop<Pointer>();
2961 const Pointer &Dst = S.Stk.peek<Pointer>();
2962
2963 const auto *SrcVT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2964 PrimType SrcElemT = *S.getContext().classify(T: SrcVT->getElementType());
2965 unsigned SourceLen = SrcVT->getNumElements();
2966
2967 const auto *DestVT = Call->getType()->castAs<VectorType>();
2968 PrimType DestElemT = *S.getContext().classify(T: DestVT->getElementType());
2969 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2970
2971 constexpr unsigned LaneSize = 16; // 128-bit lane = 16 bytes
2972
2973 // Phase 1: Shuffle Src2 using all four 2-bit fields of imm8.
2974 // Within each 128-bit lane, for group j (0..3), select a 4-byte block
2975 // from Src2 based on bits [2*j+1:2*j] of imm8.
2976 SmallVector<uint8_t, 64> Shuffled(SourceLen);
2977 for (unsigned I = 0; I < SourceLen; I += LaneSize) {
2978 for (unsigned J = 0; J < 4; ++J) {
2979 unsigned Part = (Imm >> (2 * J)) & 3;
2980 for (unsigned K = 0; K < 4; ++K) {
2981 INT_TYPE_SWITCH_NO_BOOL(SrcElemT, {
2982 Shuffled[I + 4 * J + K] =
2983 static_cast<uint8_t>(Src2.elem<T>(I + 4 * Part + K));
2984 });
2985 }
2986 }
2987 }
2988
2989 // Phase 2: Sliding SAD computation.
2990 // For every group of 4 output u16 values, compute absolute differences
2991 // using overlapping windows into Src1 and the shuffled array.
2992 unsigned Size = SourceLen / 2; // number of output u16 elements
2993 for (unsigned I = 0; I < Size; I += 4) {
2994 unsigned Sad[4] = {0, 0, 0, 0};
2995 for (unsigned J = 0; J < 4; ++J) {
2996 uint8_t A1, A2;
2997 INT_TYPE_SWITCH_NO_BOOL(SrcElemT, {
2998 A1 = static_cast<uint8_t>(Src1.elem<T>(2 * I + J));
2999 A2 = static_cast<uint8_t>(Src1.elem<T>(2 * I + J + 4));
3000 });
3001 uint8_t B0 = Shuffled[2 * I + J];
3002 uint8_t B1 = Shuffled[2 * I + J + 1];
3003 uint8_t B2 = Shuffled[2 * I + J + 2];
3004 uint8_t B3 = Shuffled[2 * I + J + 3];
3005 Sad[0] += (A1 > B0) ? (A1 - B0) : (B0 - A1);
3006 Sad[1] += (A1 > B1) ? (A1 - B1) : (B1 - A1);
3007 Sad[2] += (A2 > B2) ? (A2 - B2) : (B2 - A2);
3008 Sad[3] += (A2 > B3) ? (A2 - B3) : (B3 - A2);
3009 }
3010 for (unsigned R = 0; R < 4; ++R) {
3011 INT_TYPE_SWITCH_NO_BOOL(DestElemT, {
3012 Dst.elem<T>(I + R) =
3013 static_cast<T>(APSInt(APInt(16, Sad[R]), DestUnsigned));
3014 });
3015 }
3016 }
3017
3018 Dst.initializeAllElements();
3019 return true;
3020}
3021
3022static bool interp__builtin_ia32_mpsadbw(InterpState &S, CodePtr OpPC,
3023 const CallExpr *Call) {
3024 assert(Call->getNumArgs() == 3);
3025 uint64_t Imm;
3026 if (!popToUInt64(S, E: Call->getArg(Arg: 2), Out&: Imm))
3027 return false;
3028
3029 const Pointer &Src2 = S.Stk.pop<Pointer>();
3030 const Pointer &Src1 = S.Stk.pop<Pointer>();
3031 const Pointer &Dst = S.Stk.peek<Pointer>();
3032
3033 const auto *SrcVT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
3034 PrimType SrcElemT = *S.getContext().classify(T: SrcVT->getElementType());
3035 unsigned SourceLen = SrcVT->getNumElements();
3036 assert((SourceLen == 16 || SourceLen == 32) &&
3037 "MPSADBW operates on 128-bit or 256-bit vectors");
3038
3039 const auto *DestVT = Call->getType()->castAs<VectorType>();
3040 PrimType DestElemT = *S.getContext().classify(T: DestVT->getElementType());
3041 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
3042
3043 constexpr unsigned LaneSize = 16; // 128-bit lane = 16 bytes
3044 unsigned NumLanes = SourceLen / LaneSize;
3045
3046 for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3047 unsigned Ctrl = (Imm >> (3 * Lane)) & 0x7;
3048 unsigned AOff = ((Ctrl >> 2) & 1) * 4;
3049 unsigned BOff = (Ctrl & 3) * 4;
3050 for (unsigned J = 0; J != 8; ++J) {
3051 uint16_t Sad = 0;
3052 for (unsigned K = 0; K != 4; ++K) {
3053 uint8_t A, B;
3054 INT_TYPE_SWITCH_NO_BOOL(SrcElemT, {
3055 A = static_cast<uint8_t>(
3056 Src1.elem<T>(Lane * LaneSize + AOff + J + K));
3057 B = static_cast<uint8_t>(Src2.elem<T>(Lane * LaneSize + BOff + K));
3058 });
3059 Sad += (A > B) ? (A - B) : (B - A);
3060 }
3061 INT_TYPE_SWITCH_NO_BOOL(DestElemT, {
3062 Dst.elem<T>(Lane * 8 + J) =
3063 static_cast<T>(APSInt(APInt(16, Sad), DestUnsigned));
3064 });
3065 }
3066 }
3067
3068 Dst.initializeAllElements();
3069 return true;
3070}
3071
3072static bool interp_builtin_horizontal_int_binop(
3073 InterpState &S, CodePtr OpPC, const CallExpr *Call,
3074 llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
3075 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
3076 PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
3077 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
3078
3079 const Pointer &RHS = S.Stk.pop<Pointer>();
3080 const Pointer &LHS = S.Stk.pop<Pointer>();
3081 const Pointer &Dst = S.Stk.peek<Pointer>();
3082 unsigned NumElts = VT->getNumElements();
3083 unsigned EltBits = S.getASTContext().getIntWidth(T: VT->getElementType());
3084 unsigned EltsPerLane = 128 / EltBits;
3085 unsigned Lanes = NumElts * EltBits / 128;
3086 unsigned DestIndex = 0;
3087
3088 for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
3089 unsigned LaneStart = Lane * EltsPerLane;
3090 for (unsigned I = 0; I < EltsPerLane; I += 2) {
3091 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3092 APSInt Elem1 = LHS.elem<T>(LaneStart + I).toAPSInt();
3093 APSInt Elem2 = LHS.elem<T>(LaneStart + I + 1).toAPSInt();
3094 APSInt ResL = APSInt(Fn(Elem1, Elem2), DestUnsigned);
3095 Dst.elem<T>(DestIndex++) = static_cast<T>(ResL);
3096 });
3097 }
3098
3099 for (unsigned I = 0; I < EltsPerLane; I += 2) {
3100 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3101 APSInt Elem1 = RHS.elem<T>(LaneStart + I).toAPSInt();
3102 APSInt Elem2 = RHS.elem<T>(LaneStart + I + 1).toAPSInt();
3103 APSInt ResR = APSInt(Fn(Elem1, Elem2), DestUnsigned);
3104 Dst.elem<T>(DestIndex++) = static_cast<T>(ResR);
3105 });
3106 }
3107 }
3108 Dst.initializeAllElements();
3109 return true;
3110}
3111
3112static bool interp_builtin_horizontal_fp_binop(
3113 InterpState &S, CodePtr OpPC, const CallExpr *Call,
3114 llvm::function_ref<APFloat(const APFloat &, const APFloat &,
3115 llvm::RoundingMode)>
3116 Fn) {
3117 const Pointer &RHS = S.Stk.pop<Pointer>();
3118 const Pointer &LHS = S.Stk.pop<Pointer>();
3119 const Pointer &Dst = S.Stk.peek<Pointer>();
3120 FPOptions FPO = Call->getFPFeaturesInEffect(LO: S.Ctx.getLangOpts());
3121 llvm::RoundingMode RM = getRoundingMode(FPO);
3122 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
3123
3124 unsigned NumElts = VT->getNumElements();
3125 unsigned EltBits = S.getASTContext().getTypeSize(T: VT->getElementType());
3126 unsigned NumLanes = NumElts * EltBits / 128;
3127 unsigned NumElemsPerLane = NumElts / NumLanes;
3128 unsigned HalfElemsPerLane = NumElemsPerLane / 2;
3129
3130 for (unsigned L = 0; L != NumElts; L += NumElemsPerLane) {
3131 using T = PrimConv<PT_Float>::T;
3132 for (unsigned E = 0; E != HalfElemsPerLane; ++E) {
3133 APFloat Elem1 = LHS.elem<T>(I: L + (2 * E) + 0).getAPFloat();
3134 APFloat Elem2 = LHS.elem<T>(I: L + (2 * E) + 1).getAPFloat();
3135 Dst.elem<T>(I: L + E) = static_cast<T>(Fn(Elem1, Elem2, RM));
3136 }
3137 for (unsigned E = 0; E != HalfElemsPerLane; ++E) {
3138 APFloat Elem1 = RHS.elem<T>(I: L + (2 * E) + 0).getAPFloat();
3139 APFloat Elem2 = RHS.elem<T>(I: L + (2 * E) + 1).getAPFloat();
3140 Dst.elem<T>(I: L + E + HalfElemsPerLane) =
3141 static_cast<T>(Fn(Elem1, Elem2, RM));
3142 }
3143 }
3144 Dst.initializeAllElements();
3145 return true;
3146}
3147
3148static bool interp__builtin_ia32_addsub(InterpState &S, CodePtr OpPC,
3149 const CallExpr *Call) {
3150 // Addsub: alternates between subtraction and addition
3151 // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i])
3152 const Pointer &RHS = S.Stk.pop<Pointer>();
3153 const Pointer &LHS = S.Stk.pop<Pointer>();
3154 const Pointer &Dst = S.Stk.peek<Pointer>();
3155 FPOptions FPO = Call->getFPFeaturesInEffect(LO: S.Ctx.getLangOpts());
3156 llvm::RoundingMode RM = getRoundingMode(FPO);
3157 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
3158 unsigned NumElems = VT->getNumElements();
3159
3160 using T = PrimConv<PT_Float>::T;
3161 for (unsigned I = 0; I != NumElems; ++I) {
3162 APFloat LElem = LHS.elem<T>(I).getAPFloat();
3163 APFloat RElem = RHS.elem<T>(I).getAPFloat();
3164 if (I % 2 == 0) {
3165 // Even indices: subtract
3166 LElem.subtract(RHS: RElem, RM);
3167 } else {
3168 // Odd indices: add
3169 LElem.add(RHS: RElem, RM);
3170 }
3171 Dst.elem<T>(I) = static_cast<T>(LElem);
3172 }
3173 Dst.initializeAllElements();
3174 return true;
3175}
3176
3177static bool interp__builtin_ia32_pclmulqdq(InterpState &S, CodePtr OpPC,
3178 const CallExpr *Call) {
3179 // PCLMULQDQ: carry-less multiplication of selected 64-bit halves
3180 // imm8 bit 0: selects lower (0) or upper (1) 64 bits of first operand
3181 // imm8 bit 4: selects lower (0) or upper (1) 64 bits of second operand
3182 assert(Call->getArg(0)->getType()->isVectorType() &&
3183 Call->getArg(1)->getType()->isVectorType());
3184
3185 // Extract imm8 argument
3186 APSInt Imm8;
3187 if (!popToAPSInt(S, E: Call->getArg(Arg: 2), Out&: Imm8))
3188 return false;
3189 bool SelectUpperA = (Imm8 & 0x01) != 0;
3190 bool SelectUpperB = (Imm8 & 0x10) != 0;
3191
3192 const Pointer &RHS = S.Stk.pop<Pointer>();
3193 const Pointer &LHS = S.Stk.pop<Pointer>();
3194 const Pointer &Dst = S.Stk.peek<Pointer>();
3195
3196 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
3197 PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
3198 unsigned NumElems = VT->getNumElements();
3199 const auto *DestVT = Call->getType()->castAs<VectorType>();
3200 PrimType DestElemT = *S.getContext().classify(T: DestVT->getElementType());
3201 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
3202
3203 // Process each 128-bit lane (2 elements at a time)
3204 for (unsigned Lane = 0; Lane < NumElems; Lane += 2) {
3205 APSInt A0, A1, B0, B1;
3206 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3207 A0 = LHS.elem<T>(Lane + 0).toAPSInt();
3208 A1 = LHS.elem<T>(Lane + 1).toAPSInt();
3209 B0 = RHS.elem<T>(Lane + 0).toAPSInt();
3210 B1 = RHS.elem<T>(Lane + 1).toAPSInt();
3211 });
3212
3213 // Select the appropriate 64-bit values based on imm8
3214 APInt A = SelectUpperA ? A1 : A0;
3215 APInt B = SelectUpperB ? B1 : B0;
3216
3217 // Extend both operands to 128 bits for carry-less multiplication
3218 APInt A128 = A.zext(width: 128);
3219 APInt B128 = B.zext(width: 128);
3220
3221 // Use APIntOps::clmul for carry-less multiplication
3222 APInt Result = llvm::APIntOps::clmul(LHS: A128, RHS: B128);
3223
3224 // Split the 128-bit result into two 64-bit halves
3225 APSInt ResultLow(Result.extractBits(numBits: 64, bitPosition: 0), DestUnsigned);
3226 APSInt ResultHigh(Result.extractBits(numBits: 64, bitPosition: 64), DestUnsigned);
3227
3228 INT_TYPE_SWITCH_NO_BOOL(DestElemT, {
3229 Dst.elem<T>(Lane + 0) = static_cast<T>(ResultLow);
3230 Dst.elem<T>(Lane + 1) = static_cast<T>(ResultHigh);
3231 });
3232 }
3233
3234 Dst.initializeAllElements();
3235 return true;
3236}
3237
3238static bool interp__builtin_elementwise_triop_fp(
3239 InterpState &S, CodePtr OpPC, const CallExpr *Call,
3240 llvm::function_ref<APFloat(const APFloat &, const APFloat &,
3241 const APFloat &, llvm::RoundingMode)>
3242 Fn) {
3243 assert(Call->getNumArgs() == 3);
3244
3245 FPOptions FPO = Call->getFPFeaturesInEffect(LO: S.Ctx.getLangOpts());
3246 llvm::RoundingMode RM = getRoundingMode(FPO);
3247 QualType Arg1Type = Call->getArg(Arg: 0)->getType();
3248 QualType Arg2Type = Call->getArg(Arg: 1)->getType();
3249 QualType Arg3Type = Call->getArg(Arg: 2)->getType();
3250
3251 // Non-vector floating point types.
3252 if (!Arg1Type->isVectorType()) {
3253 assert(!Arg2Type->isVectorType());
3254 assert(!Arg3Type->isVectorType());
3255 (void)Arg2Type;
3256 (void)Arg3Type;
3257
3258 const Floating &Z = S.Stk.pop<Floating>();
3259 const Floating &Y = S.Stk.pop<Floating>();
3260 const Floating &X = S.Stk.pop<Floating>();
3261 APFloat F = Fn(X.getAPFloat(), Y.getAPFloat(), Z.getAPFloat(), RM);
3262 Floating Result = S.allocFloat(Sem: X.getSemantics());
3263 Result.copy(F);
3264 S.Stk.push<Floating>(Args&: Result);
3265 return true;
3266 }
3267
3268 // Vector type.
3269 assert(Arg1Type->isVectorType() && Arg2Type->isVectorType() &&
3270 Arg3Type->isVectorType());
3271
3272 const VectorType *VecTy = Arg1Type->castAs<VectorType>();
3273 QualType ElemQT = VecTy->getElementType();
3274 unsigned NumElems = VecTy->getNumElements();
3275
3276 assert(ElemQT == Arg2Type->castAs<VectorType>()->getElementType() &&
3277 ElemQT == Arg3Type->castAs<VectorType>()->getElementType());
3278 assert(NumElems == Arg2Type->castAs<VectorType>()->getNumElements() &&
3279 NumElems == Arg3Type->castAs<VectorType>()->getNumElements());
3280 assert(ElemQT->isRealFloatingType());
3281 (void)ElemQT;
3282
3283 const Pointer &VZ = S.Stk.pop<Pointer>();
3284 const Pointer &VY = S.Stk.pop<Pointer>();
3285 const Pointer &VX = S.Stk.pop<Pointer>();
3286 const Pointer &Dst = S.Stk.peek<Pointer>();
3287 for (unsigned I = 0; I != NumElems; ++I) {
3288 using T = PrimConv<PT_Float>::T;
3289 APFloat X = VX.elem<T>(I).getAPFloat();
3290 APFloat Y = VY.elem<T>(I).getAPFloat();
3291 APFloat Z = VZ.elem<T>(I).getAPFloat();
3292 APFloat F = Fn(X, Y, Z, RM);
3293 Dst.elem<Floating>(I) = Floating(F);
3294 }
3295 Dst.initializeAllElements();
3296 return true;
3297}
3298
3299/// AVX512 predicated move: "Result = Mask[] ? LHS[] : RHS[]".
3300static bool interp__builtin_ia32_select(InterpState &S, CodePtr OpPC,
3301 const CallExpr *Call) {
3302 const Pointer &RHS = S.Stk.pop<Pointer>();
3303 const Pointer &LHS = S.Stk.pop<Pointer>();
3304 APSInt Mask;
3305 if (!popToAPSInt(S, E: Call->getArg(Arg: 0), Out&: Mask))
3306 return false;
3307 const Pointer &Dst = S.Stk.peek<Pointer>();
3308
3309 assert(LHS.getNumElems() == RHS.getNumElems());
3310 assert(LHS.getNumElems() == Dst.getNumElems());
3311 unsigned NumElems = LHS.getNumElems();
3312 PrimType ElemT = LHS.getFieldDesc()->getPrimType();
3313 PrimType DstElemT = Dst.getFieldDesc()->getPrimType();
3314
3315 for (unsigned I = 0; I != NumElems; ++I) {
3316 if (ElemT == PT_Float) {
3317 assert(DstElemT == PT_Float);
3318 Dst.elem<Floating>(I) =
3319 Mask[I] ? LHS.elem<Floating>(I) : RHS.elem<Floating>(I);
3320 } else {
3321 APSInt Elem;
3322 INT_TYPE_SWITCH(ElemT, {
3323 Elem = Mask[I] ? LHS.elem<T>(I).toAPSInt() : RHS.elem<T>(I).toAPSInt();
3324 });
3325 INT_TYPE_SWITCH_NO_BOOL(DstElemT,
3326 { Dst.elem<T>(I) = static_cast<T>(Elem); });
3327 }
3328 }
3329 Dst.initializeAllElements();
3330
3331 return true;
3332}
3333
3334/// Scalar variant of AVX512 predicated select:
3335/// Result[i] = (Mask bit 0) ? LHS[i] : RHS[i], but only element 0 may change.
3336/// All other elements are taken from RHS.
3337static bool interp__builtin_ia32_select_scalar(InterpState &S,
3338 const CallExpr *Call) {
3339 unsigned N =
3340 Call->getArg(Arg: 1)->getType()->castAs<VectorType>()->getNumElements();
3341
3342 const Pointer &W = S.Stk.pop<Pointer>();
3343 const Pointer &A = S.Stk.pop<Pointer>();
3344 APSInt U;
3345 if (!popToAPSInt(S, E: Call->getArg(Arg: 0), Out&: U))
3346 return false;
3347 const Pointer &Dst = S.Stk.peek<Pointer>();
3348
3349 bool TakeA0 = U.getZExtValue() & 1ULL;
3350
3351 for (unsigned I = TakeA0; I != N; ++I)
3352 Dst.elem<Floating>(I) = W.elem<Floating>(I);
3353 if (TakeA0)
3354 Dst.elem<Floating>(I: 0) = A.elem<Floating>(I: 0);
3355
3356 Dst.initializeAllElements();
3357 return true;
3358}
3359
3360static bool interp__builtin_ia32_test_op(
3361 InterpState &S, CodePtr OpPC, const CallExpr *Call,
3362 llvm::function_ref<bool(const APInt &A, const APInt &B)> Fn) {
3363 const Pointer &RHS = S.Stk.pop<Pointer>();
3364 const Pointer &LHS = S.Stk.pop<Pointer>();
3365
3366 assert(LHS.getNumElems() == RHS.getNumElems());
3367
3368 unsigned SourceLen = LHS.getNumElems();
3369 QualType ElemQT = getElemType(P: LHS);
3370 OptPrimType ElemPT = S.getContext().classify(T: ElemQT);
3371 unsigned LaneWidth = S.getASTContext().getTypeSize(T: ElemQT);
3372
3373 APInt AWide(LaneWidth * SourceLen, 0);
3374 APInt BWide(LaneWidth * SourceLen, 0);
3375
3376 for (unsigned I = 0; I != SourceLen; ++I) {
3377 APInt ALane;
3378 APInt BLane;
3379
3380 if (ElemQT->isIntegerType()) { // Get value.
3381 INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
3382 ALane = LHS.elem<T>(I).toAPSInt();
3383 BLane = RHS.elem<T>(I).toAPSInt();
3384 });
3385 } else if (ElemQT->isFloatingType()) { // Get only sign bit.
3386 using T = PrimConv<PT_Float>::T;
3387 ALane = LHS.elem<T>(I).getAPFloat().bitcastToAPInt().isNegative();
3388 BLane = RHS.elem<T>(I).getAPFloat().bitcastToAPInt().isNegative();
3389 } else { // Must be integer or floating type.
3390 return false;
3391 }
3392 AWide.insertBits(SubBits: ALane, bitPosition: I * LaneWidth);
3393 BWide.insertBits(SubBits: BLane, bitPosition: I * LaneWidth);
3394 }
3395 pushInteger(S, Val: Fn(AWide, BWide), QT: Call->getType());
3396 return true;
3397}
3398
3399static bool interp__builtin_ia32_movmsk_op(InterpState &S, CodePtr OpPC,
3400 const CallExpr *Call) {
3401 assert(Call->getNumArgs() == 1);
3402
3403 const Pointer &Source = S.Stk.pop<Pointer>();
3404
3405 unsigned SourceLen = Source.getNumElems();
3406 QualType ElemQT = getElemType(P: Source);
3407 OptPrimType ElemT = S.getContext().classify(T: ElemQT);
3408 unsigned ResultLen =
3409 S.getASTContext().getTypeSize(T: Call->getType()); // Always 32-bit integer.
3410 APInt Result(ResultLen, 0);
3411
3412 for (unsigned I = 0; I != SourceLen; ++I) {
3413 APInt Elem;
3414 if (ElemQT->isIntegerType()) {
3415 INT_TYPE_SWITCH_NO_BOOL(*ElemT, { Elem = Source.elem<T>(I).toAPSInt(); });
3416 } else if (ElemQT->isRealFloatingType()) {
3417 using T = PrimConv<PT_Float>::T;
3418 Elem = Source.elem<T>(I).getAPFloat().bitcastToAPInt();
3419 } else {
3420 return false;
3421 }
3422 Result.setBitVal(BitPosition: I, BitValue: Elem.isNegative());
3423 }
3424 pushInteger(S, Val: Result, QT: Call->getType());
3425 return true;
3426}
3427
3428static bool interp__builtin_elementwise_triop(
3429 InterpState &S, CodePtr OpPC, const CallExpr *Call,
3430 llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
3431 Fn) {
3432 assert(Call->getNumArgs() == 3);
3433
3434 QualType Arg0Type = Call->getArg(Arg: 0)->getType();
3435 QualType Arg2Type = Call->getArg(Arg: 2)->getType();
3436 // Non-vector integer types.
3437 if (!Arg0Type->isVectorType()) {
3438 APSInt Op2;
3439 if (!popToAPSInt(S, T: Arg2Type, Out&: Op2))
3440 return false;
3441 APSInt Op1;
3442 if (!popToAPSInt(S, E: Call->getArg(Arg: 1), Out&: Op1))
3443 return false;
3444 APSInt Op0;
3445 if (!popToAPSInt(S, T: Arg0Type, Out&: Op0))
3446 return false;
3447 APSInt Result = APSInt(Fn(Op0, Op1, Op2), Op0.isUnsigned());
3448 pushInteger(S, Val: Result, QT: Call->getType());
3449 return true;
3450 }
3451
3452 const auto *VecT = Arg0Type->castAs<VectorType>();
3453 PrimType ElemT = *S.getContext().classify(T: VecT->getElementType());
3454 unsigned NumElems = VecT->getNumElements();
3455 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
3456
3457 // Vector + Vector + Scalar case.
3458 if (!Arg2Type->isVectorType()) {
3459 APSInt Op2;
3460 if (!popToAPSInt(S, T: Arg2Type, Out&: Op2))
3461 return false;
3462
3463 const Pointer &Op1 = S.Stk.pop<Pointer>();
3464 const Pointer &Op0 = S.Stk.pop<Pointer>();
3465 const Pointer &Dst = S.Stk.peek<Pointer>();
3466 for (unsigned I = 0; I != NumElems; ++I) {
3467 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3468 Dst.elem<T>(I) = static_cast<T>(APSInt(
3469 Fn(Op0.elem<T>(I).toAPSInt(), Op1.elem<T>(I).toAPSInt(), Op2),
3470 DestUnsigned));
3471 });
3472 }
3473 Dst.initializeAllElements();
3474
3475 return true;
3476 }
3477
3478 // Vector type.
3479 const Pointer &Op2 = S.Stk.pop<Pointer>();
3480 const Pointer &Op1 = S.Stk.pop<Pointer>();
3481 const Pointer &Op0 = S.Stk.pop<Pointer>();
3482 const Pointer &Dst = S.Stk.peek<Pointer>();
3483 for (unsigned I = 0; I != NumElems; ++I) {
3484 APSInt Val0, Val1, Val2;
3485 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3486 Val0 = Op0.elem<T>(I).toAPSInt();
3487 Val1 = Op1.elem<T>(I).toAPSInt();
3488 Val2 = Op2.elem<T>(I).toAPSInt();
3489 });
3490 APSInt Result = APSInt(Fn(Val0, Val1, Val2), Val0.isUnsigned());
3491 INT_TYPE_SWITCH_NO_BOOL(ElemT,
3492 { Dst.elem<T>(I) = static_cast<T>(Result); });
3493 }
3494 Dst.initializeAllElements();
3495
3496 return true;
3497}
3498
3499static bool interp__builtin_ia32_extract_vector(InterpState &S, CodePtr OpPC,
3500 const CallExpr *Call,
3501 unsigned ID) {
3502 assert(Call->getNumArgs() == 2);
3503
3504 APSInt ImmAPS;
3505 if (!popToAPSInt(S, E: Call->getArg(Arg: 1), Out&: ImmAPS))
3506 return false;
3507 uint64_t Index = ImmAPS.getZExtValue();
3508
3509 const Pointer &Src = S.Stk.pop<Pointer>();
3510 if (!Src.getFieldDesc()->isPrimitiveArray())
3511 return false;
3512
3513 const Pointer &Dst = S.Stk.peek<Pointer>();
3514 if (!Dst.getFieldDesc()->isPrimitiveArray())
3515 return false;
3516
3517 unsigned SrcElems = Src.getNumElems();
3518 unsigned DstElems = Dst.getNumElems();
3519
3520 unsigned NumLanes = SrcElems / DstElems;
3521 unsigned Lane = static_cast<unsigned>(Index % NumLanes);
3522 unsigned ExtractPos = Lane * DstElems;
3523
3524 PrimType ElemT = Src.getFieldDesc()->getPrimType();
3525
3526 TYPE_SWITCH(ElemT, {
3527 for (unsigned I = 0; I != DstElems; ++I) {
3528 Dst.elem<T>(I) = Src.elem<T>(ExtractPos + I);
3529 }
3530 });
3531
3532 Dst.initializeAllElements();
3533 return true;
3534}
3535
3536static bool interp__builtin_ia32_extract_vector_masked(InterpState &S,
3537 CodePtr OpPC,
3538 const CallExpr *Call,
3539 unsigned ID) {
3540 assert(Call->getNumArgs() == 4);
3541
3542 APSInt MaskAPS;
3543 if (!popToAPSInt(S, E: Call->getArg(Arg: 3), Out&: MaskAPS))
3544 return false;
3545 const Pointer &Merge = S.Stk.pop<Pointer>();
3546 APSInt ImmAPS;
3547 if (!popToAPSInt(S, E: Call->getArg(Arg: 1), Out&: ImmAPS))
3548 return false;
3549 const Pointer &Src = S.Stk.pop<Pointer>();
3550
3551 if (!Src.getFieldDesc()->isPrimitiveArray() ||
3552 !Merge.getFieldDesc()->isPrimitiveArray())
3553 return false;
3554
3555 const Pointer &Dst = S.Stk.peek<Pointer>();
3556 if (!Dst.getFieldDesc()->isPrimitiveArray())
3557 return false;
3558
3559 unsigned SrcElems = Src.getNumElems();
3560 unsigned DstElems = Dst.getNumElems();
3561
3562 unsigned NumLanes = SrcElems / DstElems;
3563 unsigned Lane = static_cast<unsigned>(ImmAPS.getZExtValue() % NumLanes);
3564 unsigned Base = Lane * DstElems;
3565
3566 PrimType ElemT = Src.getFieldDesc()->getPrimType();
3567
3568 TYPE_SWITCH(ElemT, {
3569 for (unsigned I = 0; I != DstElems; ++I) {
3570 if (MaskAPS[I])
3571 Dst.elem<T>(I) = Src.elem<T>(Base + I);
3572 else
3573 Dst.elem<T>(I) = Merge.elem<T>(I);
3574 }
3575 });
3576
3577 Dst.initializeAllElements();
3578 return true;
3579}
3580
3581static bool interp__builtin_ia32_insert_subvector(InterpState &S, CodePtr OpPC,
3582 const CallExpr *Call,
3583 unsigned ID) {
3584 assert(Call->getNumArgs() == 3);
3585
3586 APSInt ImmAPS;
3587 if (!popToAPSInt(S, E: Call->getArg(Arg: 2), Out&: ImmAPS))
3588 return false;
3589 uint64_t Index = ImmAPS.getZExtValue();
3590
3591 const Pointer &SubVec = S.Stk.pop<Pointer>();
3592 if (!SubVec.getFieldDesc()->isPrimitiveArray())
3593 return false;
3594
3595 const Pointer &BaseVec = S.Stk.pop<Pointer>();
3596 if (!BaseVec.getFieldDesc()->isPrimitiveArray())
3597 return false;
3598
3599 const Pointer &Dst = S.Stk.peek<Pointer>();
3600
3601 unsigned BaseElements = BaseVec.getNumElems();
3602 unsigned SubElements = SubVec.getNumElems();
3603
3604 assert(SubElements != 0 && BaseElements != 0 &&
3605 (BaseElements % SubElements) == 0);
3606
3607 unsigned NumLanes = BaseElements / SubElements;
3608 unsigned Lane = static_cast<unsigned>(Index % NumLanes);
3609 unsigned InsertPos = Lane * SubElements;
3610
3611 PrimType ElemT = BaseVec.getFieldDesc()->getPrimType();
3612
3613 TYPE_SWITCH(ElemT, {
3614 for (unsigned I = 0; I != BaseElements; ++I)
3615 Dst.elem<T>(I) = BaseVec.elem<T>(I);
3616 for (unsigned I = 0; I != SubElements; ++I)
3617 Dst.elem<T>(InsertPos + I) = SubVec.elem<T>(I);
3618 });
3619
3620 Dst.initializeAllElements();
3621 return true;
3622}
3623
3624static bool interp__builtin_ia32_phminposuw(InterpState &S, CodePtr OpPC,
3625 const CallExpr *Call) {
3626 assert(Call->getNumArgs() == 1);
3627
3628 const Pointer &Source = S.Stk.pop<Pointer>();
3629 const Pointer &Dest = S.Stk.peek<Pointer>();
3630
3631 unsigned SourceLen = Source.getNumElems();
3632 QualType ElemQT = getElemType(P: Source);
3633 OptPrimType ElemT = S.getContext().classify(T: ElemQT);
3634 unsigned ElemBitWidth = S.getASTContext().getTypeSize(T: ElemQT);
3635
3636 bool DestUnsigned = Call->getCallReturnType(Ctx: S.getASTContext())
3637 ->castAs<VectorType>()
3638 ->getElementType()
3639 ->isUnsignedIntegerOrEnumerationType();
3640
3641 INT_TYPE_SWITCH_NO_BOOL(*ElemT, {
3642 APSInt MinIndex(ElemBitWidth, DestUnsigned);
3643 APSInt MinVal = Source.elem<T>(0).toAPSInt();
3644
3645 for (unsigned I = 1; I != SourceLen; ++I) {
3646 APSInt Val = Source.elem<T>(I).toAPSInt();
3647 if (MinVal.ugt(Val)) {
3648 MinVal = Val;
3649 MinIndex = I;
3650 }
3651 }
3652
3653 Dest.elem<T>(0) = static_cast<T>(MinVal);
3654 Dest.elem<T>(1) = static_cast<T>(MinIndex);
3655 for (unsigned I = 2; I != SourceLen; ++I) {
3656 Dest.elem<T>(I) = static_cast<T>(APSInt(ElemBitWidth, DestUnsigned));
3657 }
3658 });
3659 Dest.initializeAllElements();
3660 return true;
3661}
3662
3663static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC,
3664 const CallExpr *Call, bool MaskZ) {
3665 assert(Call->getNumArgs() == 5);
3666
3667 APSInt UVal;
3668 if (!popToAPSInt(S, E: Call->getArg(Arg: 4), Out&: UVal))
3669 return false;
3670 APInt U = UVal; // Lane mask
3671 APSInt ImmVal;
3672 if (!popToAPSInt(S, E: Call->getArg(Arg: 3), Out&: ImmVal))
3673 return false;
3674 APInt Imm = ImmVal; // Ternary truth table
3675 const Pointer &C = S.Stk.pop<Pointer>();
3676 const Pointer &B = S.Stk.pop<Pointer>();
3677 const Pointer &A = S.Stk.pop<Pointer>();
3678 const Pointer &Dst = S.Stk.peek<Pointer>();
3679
3680 unsigned DstLen = A.getNumElems();
3681 QualType ElemQT = getElemType(P: A);
3682 OptPrimType ElemT = S.getContext().classify(T: ElemQT);
3683 unsigned LaneWidth = S.getASTContext().getTypeSize(T: ElemQT);
3684 bool DstUnsigned = ElemQT->isUnsignedIntegerOrEnumerationType();
3685
3686 INT_TYPE_SWITCH_NO_BOOL(*ElemT, {
3687 for (unsigned I = 0; I != DstLen; ++I) {
3688 APInt ALane = A.elem<T>(I).toAPSInt();
3689 APInt BLane = B.elem<T>(I).toAPSInt();
3690 APInt CLane = C.elem<T>(I).toAPSInt();
3691 APInt RLane(LaneWidth, 0);
3692 if (U[I]) { // If lane not masked, compute ternary logic.
3693 for (unsigned Bit = 0; Bit != LaneWidth; ++Bit) {
3694 unsigned ABit = ALane[Bit];
3695 unsigned BBit = BLane[Bit];
3696 unsigned CBit = CLane[Bit];
3697 unsigned Idx = (ABit << 2) | (BBit << 1) | (CBit);
3698 RLane.setBitVal(Bit, Imm[Idx]);
3699 }
3700 Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
3701 } else if (MaskZ) { // If zero masked, zero the lane.
3702 Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
3703 } else { // Just masked, put in A lane.
3704 Dst.elem<T>(I) = static_cast<T>(APSInt(ALane, DstUnsigned));
3705 }
3706 }
3707 });
3708 Dst.initializeAllElements();
3709 return true;
3710}
3711
3712static bool interp__builtin_ia32_vec_ext(InterpState &S, CodePtr OpPC,
3713 const CallExpr *Call, unsigned ID) {
3714 assert(Call->getNumArgs() == 2);
3715
3716 APSInt ImmAPS;
3717 if (!popToAPSInt(S, E: Call->getArg(Arg: 1), Out&: ImmAPS))
3718 return false;
3719 const Pointer &Vec = S.Stk.pop<Pointer>();
3720 if (!Vec.getFieldDesc()->isPrimitiveArray())
3721 return false;
3722
3723 unsigned NumElems = Vec.getNumElems();
3724 unsigned Index =
3725 static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElems - 1));
3726
3727 PrimType ElemT = Vec.getFieldDesc()->getPrimType();
3728 // FIXME(#161685): Replace float+int split with a numeric-only type switch
3729 if (ElemT == PT_Float) {
3730 S.Stk.push<Floating>(Args&: Vec.elem<Floating>(I: Index));
3731 return true;
3732 }
3733 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3734 APSInt V = Vec.elem<T>(Index).toAPSInt();
3735 pushInteger(S, V, Call->getType());
3736 });
3737
3738 return true;
3739}
3740
3741static bool interp__builtin_ia32_vec_set(InterpState &S, CodePtr OpPC,
3742 const CallExpr *Call, unsigned ID) {
3743 assert(Call->getNumArgs() == 3);
3744
3745 APSInt ImmAPS;
3746 if (!popToAPSInt(S, E: Call->getArg(Arg: 2), Out&: ImmAPS))
3747 return false;
3748 APSInt ValAPS;
3749 if (!popToAPSInt(S, E: Call->getArg(Arg: 1), Out&: ValAPS))
3750 return false;
3751
3752 const Pointer &Base = S.Stk.pop<Pointer>();
3753 if (!Base.getFieldDesc()->isPrimitiveArray())
3754 return false;
3755
3756 const Pointer &Dst = S.Stk.peek<Pointer>();
3757
3758 unsigned NumElems = Base.getNumElems();
3759 unsigned Index =
3760 static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElems - 1));
3761
3762 PrimType ElemT = Base.getFieldDesc()->getPrimType();
3763 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3764 for (unsigned I = 0; I != NumElems; ++I)
3765 Dst.elem<T>(I) = Base.elem<T>(I);
3766 Dst.elem<T>(Index) = static_cast<T>(ValAPS);
3767 });
3768
3769 Dst.initializeAllElements();
3770 return true;
3771}
3772
3773static bool evalICmpImm(uint8_t Imm, const APSInt &A, const APSInt &B,
3774 bool IsUnsigned) {
3775 switch (Imm & 0x7) {
3776 case 0x00: // _MM_CMPINT_EQ
3777 return (A == B);
3778 case 0x01: // _MM_CMPINT_LT
3779 return IsUnsigned ? A.ult(RHS: B) : A.slt(RHS: B);
3780 case 0x02: // _MM_CMPINT_LE
3781 return IsUnsigned ? A.ule(RHS: B) : A.sle(RHS: B);
3782 case 0x03: // _MM_CMPINT_FALSE
3783 return false;
3784 case 0x04: // _MM_CMPINT_NE
3785 return (A != B);
3786 case 0x05: // _MM_CMPINT_NLT
3787 return IsUnsigned ? A.ugt(RHS: B) : A.sgt(RHS: B);
3788 case 0x06: // _MM_CMPINT_NLE
3789 return IsUnsigned ? A.uge(RHS: B) : A.sge(RHS: B);
3790 case 0x07: // _MM_CMPINT_TRUE
3791 return true;
3792 default:
3793 llvm_unreachable("Invalid Op");
3794 }
3795}
3796
3797static bool interp__builtin_ia32_cmp_mask(InterpState &S, CodePtr OpPC,
3798 const CallExpr *Call, unsigned ID,
3799 bool IsUnsigned) {
3800 assert(Call->getNumArgs() == 4);
3801
3802 APSInt Mask;
3803 if (!popToAPSInt(S, E: Call->getArg(Arg: 3), Out&: Mask))
3804 return false;
3805 APSInt Opcode;
3806 if (!popToAPSInt(S, E: Call->getArg(Arg: 2), Out&: Opcode))
3807 return false;
3808 unsigned CmpOp = static_cast<unsigned>(Opcode.getZExtValue());
3809 const Pointer &RHS = S.Stk.pop<Pointer>();
3810 const Pointer &LHS = S.Stk.pop<Pointer>();
3811
3812 assert(LHS.getNumElems() == RHS.getNumElems());
3813
3814 APInt RetMask = APInt::getZero(numBits: LHS.getNumElems());
3815 unsigned VectorLen = LHS.getNumElems();
3816 PrimType ElemT = LHS.getFieldDesc()->getPrimType();
3817
3818 for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
3819 APSInt A, B;
3820 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3821 A = LHS.elem<T>(ElemNum).toAPSInt();
3822 B = RHS.elem<T>(ElemNum).toAPSInt();
3823 });
3824 RetMask.setBitVal(BitPosition: ElemNum,
3825 BitValue: Mask[ElemNum] && evalICmpImm(Imm: CmpOp, A, B, IsUnsigned));
3826 }
3827 pushInteger(S, Val: RetMask, QT: Call->getType());
3828 return true;
3829}
3830
3831static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
3832 const CallExpr *Call) {
3833 assert(Call->getNumArgs() == 1);
3834
3835 QualType Arg0Type = Call->getArg(Arg: 0)->getType();
3836 const auto *VecT = Arg0Type->castAs<VectorType>();
3837 PrimType ElemT = *S.getContext().classify(T: VecT->getElementType());
3838 unsigned NumElems = VecT->getNumElements();
3839 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
3840 const Pointer &Src = S.Stk.pop<Pointer>();
3841 const Pointer &Dst = S.Stk.peek<Pointer>();
3842
3843 for (unsigned I = 0; I != NumElems; ++I) {
3844 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3845 APSInt ElemI = Src.elem<T>(I).toAPSInt();
3846 APInt ConflictMask(ElemI.getBitWidth(), 0);
3847 for (unsigned J = 0; J != I; ++J) {
3848 APSInt ElemJ = Src.elem<T>(J).toAPSInt();
3849 ConflictMask.setBitVal(J, ElemI == ElemJ);
3850 }
3851 Dst.elem<T>(I) = static_cast<T>(APSInt(ConflictMask, DestUnsigned));
3852 });
3853 }
3854 Dst.initializeAllElements();
3855 return true;
3856}
3857
3858static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC,
3859 const CallExpr *Call,
3860 unsigned ID) {
3861 assert(Call->getNumArgs() == 1);
3862
3863 const Pointer &Vec = S.Stk.pop<Pointer>();
3864 unsigned RetWidth = S.getASTContext().getIntWidth(T: Call->getType());
3865 APInt RetMask(RetWidth, 0);
3866
3867 unsigned VectorLen = Vec.getNumElems();
3868 PrimType ElemT = Vec.getFieldDesc()->getPrimType();
3869
3870 for (unsigned ElemNum = 0; ElemNum != VectorLen; ++ElemNum) {
3871 APSInt A;
3872 INT_TYPE_SWITCH_NO_BOOL(ElemT, { A = Vec.elem<T>(ElemNum).toAPSInt(); });
3873 unsigned MSB = A[A.getBitWidth() - 1];
3874 RetMask.setBitVal(BitPosition: ElemNum, BitValue: MSB);
3875 }
3876 pushInteger(S, Val: RetMask, QT: Call->getType());
3877 return true;
3878}
3879
3880static bool interp__builtin_ia32_cvt_mask2vec(InterpState &S, CodePtr OpPC,
3881 const CallExpr *Call,
3882 unsigned ID) {
3883 assert(Call->getNumArgs() == 1);
3884
3885 APSInt Mask;
3886 if (!popToAPSInt(S, E: Call->getArg(Arg: 0), Out&: Mask))
3887 return false;
3888
3889 const Pointer &Vec = S.Stk.peek<Pointer>();
3890 unsigned NumElems = Vec.getNumElems();
3891 PrimType ElemT = Vec.getFieldDesc()->getPrimType();
3892
3893 for (unsigned I = 0; I != NumElems; ++I) {
3894 bool BitSet = Mask[I];
3895
3896 INT_TYPE_SWITCH_NO_BOOL(
3897 ElemT, { Vec.elem<T>(I) = BitSet ? T::from(-1) : T::from(0); });
3898 }
3899
3900 Vec.initializeAllElements();
3901
3902 return true;
3903}
3904
3905static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
3906 const CallExpr *Call,
3907 bool HasRoundingMask) {
3908 APSInt Rounding, MaskInt;
3909 Pointer Src, B, A;
3910
3911 if (HasRoundingMask) {
3912 assert(Call->getNumArgs() == 5);
3913 if (!popToAPSInt(S, E: Call->getArg(Arg: 4), Out&: Rounding))
3914 return false;
3915 if (!popToAPSInt(S, E: Call->getArg(Arg: 3), Out&: MaskInt))
3916 return false;
3917 Src = S.Stk.pop<Pointer>();
3918 B = S.Stk.pop<Pointer>();
3919 A = S.Stk.pop<Pointer>();
3920 if (!CheckLoad(S, OpPC, Ptr: A) || !CheckLoad(S, OpPC, Ptr: B) ||
3921 !CheckLoad(S, OpPC, Ptr: Src))
3922 return false;
3923 } else {
3924 assert(Call->getNumArgs() == 2);
3925 B = S.Stk.pop<Pointer>();
3926 A = S.Stk.pop<Pointer>();
3927 if (!CheckLoad(S, OpPC, Ptr: A) || !CheckLoad(S, OpPC, Ptr: B))
3928 return false;
3929 }
3930
3931 const auto *DstVTy = Call->getType()->castAs<VectorType>();
3932 unsigned NumElems = DstVTy->getNumElements();
3933 const Pointer &Dst = S.Stk.peek<Pointer>();
3934
3935 // Copy all elements except lane 0 (overwritten below) from A to Dst.
3936 for (unsigned I = 1; I != NumElems; ++I)
3937 Dst.elem<Floating>(I) = A.elem<Floating>(I);
3938
3939 // Convert element 0 from double to float, or use Src if masked off.
3940 if (!HasRoundingMask || (MaskInt.getZExtValue() & 0x1)) {
3941 assert(S.getASTContext().FloatTy == DstVTy->getElementType() &&
3942 "cvtsd2ss requires float element type in destination vector");
3943
3944 Floating Conv = S.allocFloat(
3945 Sem: S.getASTContext().getFloatTypeSemantics(T: DstVTy->getElementType()));
3946 APFloat SrcVal = B.elem<Floating>(I: 0).getAPFloat();
3947 if (!convertDoubleToFloatStrict(Src: SrcVal, Dst&: Conv, S, DiagExpr: Call))
3948 return false;
3949 Dst.elem<Floating>(I: 0) = Conv;
3950 } else {
3951 Dst.elem<Floating>(I: 0) = Src.elem<Floating>(I: 0);
3952 }
3953
3954 Dst.initializeAllElements();
3955 return true;
3956}
3957
3958static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
3959 const CallExpr *Call, bool IsMasked,
3960 bool HasRounding) {
3961 APSInt MaskVal;
3962 Pointer PassThrough;
3963 Pointer Src;
3964 APSInt Rounding;
3965
3966 if (IsMasked) {
3967 // Pop in reverse order.
3968 if (HasRounding) {
3969 if (!popToAPSInt(S, E: Call->getArg(Arg: 3), Out&: Rounding))
3970 return false;
3971 if (!popToAPSInt(S, E: Call->getArg(Arg: 2), Out&: MaskVal))
3972 return false;
3973 PassThrough = S.Stk.pop<Pointer>();
3974 Src = S.Stk.pop<Pointer>();
3975 } else {
3976 if (!popToAPSInt(S, E: Call->getArg(Arg: 2), Out&: MaskVal))
3977 return false;
3978 PassThrough = S.Stk.pop<Pointer>();
3979 Src = S.Stk.pop<Pointer>();
3980 }
3981
3982 if (!CheckLoad(S, OpPC, Ptr: PassThrough))
3983 return false;
3984 } else {
3985 // Pop source only.
3986 Src = S.Stk.pop<Pointer>();
3987 }
3988
3989 if (!CheckLoad(S, OpPC, Ptr: Src))
3990 return false;
3991
3992 const auto *RetVTy = Call->getType()->castAs<VectorType>();
3993 unsigned RetElems = RetVTy->getNumElements();
3994 unsigned SrcElems = Src.getNumElems();
3995 const Pointer &Dst = S.Stk.peek<Pointer>();
3996
3997 // Initialize destination with passthrough or zeros.
3998 for (unsigned I = 0; I != RetElems; ++I)
3999 if (IsMasked)
4000 Dst.elem<Floating>(I) = PassThrough.elem<Floating>(I);
4001 else
4002 Dst.elem<Floating>(I) = Floating(APFloat(0.0f));
4003
4004 assert(S.getASTContext().FloatTy == RetVTy->getElementType() &&
4005 "cvtpd2ps requires float element type in return vector");
4006
4007 // Convert double to float for enabled elements (only process source elements
4008 // that exist).
4009 for (unsigned I = 0; I != SrcElems; ++I) {
4010 if (IsMasked && !MaskVal[I])
4011 continue;
4012
4013 APFloat SrcVal = Src.elem<Floating>(I).getAPFloat();
4014
4015 Floating Conv = S.allocFloat(
4016 Sem: S.getASTContext().getFloatTypeSemantics(T: RetVTy->getElementType()));
4017 if (!convertDoubleToFloatStrict(Src: SrcVal, Dst&: Conv, S, DiagExpr: Call))
4018 return false;
4019 Dst.elem<Floating>(I) = Conv;
4020 }
4021
4022 Dst.initializeAllElements();
4023 return true;
4024}
4025
4026static bool interp__builtin_ia32_shuffle_generic(
4027 InterpState &S, CodePtr OpPC, const CallExpr *Call,
4028 llvm::function_ref<std::pair<unsigned, int>(unsigned, const APInt &)>
4029 GetSourceIndex) {
4030
4031 assert(Call->getNumArgs() == 2 || Call->getNumArgs() == 3);
4032
4033 APInt ShuffleMask;
4034 Pointer A, MaskVector, B;
4035 bool IsVectorMask = false;
4036 bool IsSingleOperand = (Call->getNumArgs() == 2);
4037
4038 if (IsSingleOperand) {
4039 QualType MaskType = Call->getArg(Arg: 1)->getType();
4040 if (MaskType->isVectorType()) {
4041 IsVectorMask = true;
4042 MaskVector = S.Stk.pop<Pointer>();
4043 A = S.Stk.pop<Pointer>();
4044 B = A;
4045 } else if (MaskType->isIntegerType()) {
4046 APSInt MaskVal;
4047 if (!popToAPSInt(S, E: Call->getArg(Arg: 1), Out&: MaskVal))
4048 return false;
4049 ShuffleMask = MaskVal;
4050 A = S.Stk.pop<Pointer>();
4051 B = A;
4052 } else {
4053 return false;
4054 }
4055 } else {
4056 QualType Arg2Type = Call->getArg(Arg: 2)->getType();
4057 if (Arg2Type->isVectorType()) {
4058 IsVectorMask = true;
4059 B = S.Stk.pop<Pointer>();
4060 MaskVector = S.Stk.pop<Pointer>();
4061 A = S.Stk.pop<Pointer>();
4062 } else if (Arg2Type->isIntegerType()) {
4063 APSInt MaskVal;
4064 if (!popToAPSInt(S, E: Call->getArg(Arg: 2), Out&: MaskVal))
4065 return false;
4066 ShuffleMask = MaskVal;
4067 B = S.Stk.pop<Pointer>();
4068 A = S.Stk.pop<Pointer>();
4069 } else {
4070 return false;
4071 }
4072 }
4073
4074 QualType Arg0Type = Call->getArg(Arg: 0)->getType();
4075 const auto *VecT = Arg0Type->castAs<VectorType>();
4076 PrimType ElemT = *S.getContext().classify(T: VecT->getElementType());
4077 unsigned NumElems = VecT->getNumElements();
4078
4079 const Pointer &Dst = S.Stk.peek<Pointer>();
4080
4081 PrimType MaskElemT = PT_Uint32;
4082 if (IsVectorMask) {
4083 QualType Arg1Type = Call->getArg(Arg: 1)->getType();
4084 const auto *MaskVecT = Arg1Type->castAs<VectorType>();
4085 QualType MaskElemType = MaskVecT->getElementType();
4086 MaskElemT = *S.getContext().classify(T: MaskElemType);
4087 }
4088
4089 for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) {
4090 if (IsVectorMask) {
4091 INT_TYPE_SWITCH(MaskElemT,
4092 { ShuffleMask = MaskVector.elem<T>(DstIdx).toAPSInt(); });
4093 }
4094
4095 auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
4096
4097 if (SrcIdx < 0) {
4098 // Zero out this element
4099 if (ElemT == PT_Float) {
4100 Dst.elem<Floating>(I: DstIdx) = Floating(
4101 S.getASTContext().getFloatTypeSemantics(T: VecT->getElementType()));
4102 } else {
4103 INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(DstIdx) = T::from(0); });
4104 }
4105 } else {
4106 const Pointer &Src = (SrcVecIdx == 0) ? A : B;
4107 TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); });
4108 }
4109 }
4110 Dst.initializeAllElements();
4111
4112 return true;
4113}
4114
4115static bool interp__builtin_ia32_shuffle_generic(
4116 InterpState &S, CodePtr OpPC, const CallExpr *Call,
4117 llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)>
4118 GetSourceIndex) {
4119 return interp__builtin_ia32_shuffle_generic(
4120 S, OpPC, Call,
4121 GetSourceIndex: [&GetSourceIndex](unsigned DstIdx,
4122 const APInt &Mask) -> std::pair<unsigned, int> {
4123 return GetSourceIndex(DstIdx, Mask.getZExtValue());
4124 });
4125}
4126
4127static bool interp__builtin_ia32_shift_with_count(
4128 InterpState &S, CodePtr OpPC, const CallExpr *Call,
4129 llvm::function_ref<APInt(const APInt &, uint64_t)> ShiftOp,
4130 llvm::function_ref<APInt(const APInt &, unsigned)> OverflowOp) {
4131
4132 assert(Call->getNumArgs() == 2);
4133
4134 const Pointer &Count = S.Stk.pop<Pointer>();
4135 const Pointer &Source = S.Stk.pop<Pointer>();
4136
4137 QualType SourceType = Call->getArg(Arg: 0)->getType();
4138 QualType CountType = Call->getArg(Arg: 1)->getType();
4139 assert(SourceType->isVectorType() && CountType->isVectorType());
4140
4141 const auto *SourceVecT = SourceType->castAs<VectorType>();
4142 const auto *CountVecT = CountType->castAs<VectorType>();
4143 PrimType SourceElemT = *S.getContext().classify(T: SourceVecT->getElementType());
4144 PrimType CountElemT = *S.getContext().classify(T: CountVecT->getElementType());
4145
4146 const Pointer &Dst = S.Stk.peek<Pointer>();
4147
4148 unsigned DestEltWidth =
4149 S.getASTContext().getTypeSize(T: SourceVecT->getElementType());
4150 bool IsDestUnsigned = SourceVecT->getElementType()->isUnsignedIntegerType();
4151 unsigned DestLen = SourceVecT->getNumElements();
4152 unsigned CountEltWidth =
4153 S.getASTContext().getTypeSize(T: CountVecT->getElementType());
4154 unsigned NumBitsInQWord = 64;
4155 unsigned NumCountElts = NumBitsInQWord / CountEltWidth;
4156
4157 uint64_t CountLQWord = 0;
4158 for (unsigned EltIdx = 0; EltIdx != NumCountElts; ++EltIdx) {
4159 uint64_t Elt = 0;
4160 INT_TYPE_SWITCH(CountElemT,
4161 { Elt = static_cast<uint64_t>(Count.elem<T>(EltIdx)); });
4162 CountLQWord |= (Elt << (EltIdx * CountEltWidth));
4163 }
4164
4165 for (unsigned EltIdx = 0; EltIdx != DestLen; ++EltIdx) {
4166 APSInt Elt;
4167 INT_TYPE_SWITCH(SourceElemT, { Elt = Source.elem<T>(EltIdx).toAPSInt(); });
4168
4169 APInt Result;
4170 if (CountLQWord < DestEltWidth) {
4171 Result = ShiftOp(Elt, CountLQWord);
4172 } else {
4173 Result = OverflowOp(Elt, DestEltWidth);
4174 }
4175 if (IsDestUnsigned) {
4176 INT_TYPE_SWITCH(SourceElemT, {
4177 Dst.elem<T>(EltIdx) = T::from(Result.getZExtValue());
4178 });
4179 } else {
4180 INT_TYPE_SWITCH(SourceElemT, {
4181 Dst.elem<T>(EltIdx) = T::from(Result.getSExtValue());
4182 });
4183 }
4184 }
4185
4186 Dst.initializeAllElements();
4187 return true;
4188}
4189
4190static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC,
4191 const CallExpr *Call) {
4192
4193 assert(Call->getNumArgs() == 3);
4194
4195 QualType SourceType = Call->getArg(Arg: 0)->getType();
4196 QualType ShuffleMaskType = Call->getArg(Arg: 1)->getType();
4197 QualType ZeroMaskType = Call->getArg(Arg: 2)->getType();
4198 if (!SourceType->isVectorType() || !ShuffleMaskType->isVectorType() ||
4199 !ZeroMaskType->isIntegerType()) {
4200 return false;
4201 }
4202
4203 Pointer Source, ShuffleMask;
4204 APSInt ZeroMask;
4205 if (!popToAPSInt(S, E: Call->getArg(Arg: 2), Out&: ZeroMask))
4206 return false;
4207 ShuffleMask = S.Stk.pop<Pointer>();
4208 Source = S.Stk.pop<Pointer>();
4209
4210 const auto *SourceVecT = SourceType->castAs<VectorType>();
4211 const auto *ShuffleMaskVecT = ShuffleMaskType->castAs<VectorType>();
4212 assert(SourceVecT->getNumElements() == ShuffleMaskVecT->getNumElements());
4213 assert(ZeroMask.getBitWidth() == SourceVecT->getNumElements());
4214
4215 PrimType SourceElemT = *S.getContext().classify(T: SourceVecT->getElementType());
4216 PrimType ShuffleMaskElemT =
4217 *S.getContext().classify(T: ShuffleMaskVecT->getElementType());
4218
4219 unsigned NumBytesInQWord = 8;
4220 unsigned NumBitsInByte = 8;
4221 unsigned NumBytes = SourceVecT->getNumElements();
4222 unsigned NumQWords = NumBytes / NumBytesInQWord;
4223 unsigned RetWidth = ZeroMask.getBitWidth();
4224 APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true);
4225
4226 for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
4227 APInt SourceQWord(64, 0);
4228 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
4229 uint64_t Byte = 0;
4230 INT_TYPE_SWITCH(SourceElemT, {
4231 Byte = static_cast<uint64_t>(
4232 Source.elem<T>(QWordId * NumBytesInQWord + ByteIdx));
4233 });
4234 SourceQWord.insertBits(SubBits: APInt(8, Byte & 0xFF), bitPosition: ByteIdx * NumBitsInByte);
4235 }
4236
4237 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
4238 unsigned SelIdx = QWordId * NumBytesInQWord + ByteIdx;
4239 unsigned M = 0;
4240 INT_TYPE_SWITCH(ShuffleMaskElemT, {
4241 M = static_cast<unsigned>(ShuffleMask.elem<T>(SelIdx)) & 0x3F;
4242 });
4243
4244 if (ZeroMask[SelIdx]) {
4245 RetMask.setBitVal(BitPosition: SelIdx, BitValue: SourceQWord[M]);
4246 }
4247 }
4248 }
4249
4250 pushInteger(S, Val: RetMask, QT: Call->getType());
4251 return true;
4252}
4253
4254static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
4255 const CallExpr *Call) {
4256 // Arguments are: vector of floats, rounding immediate
4257 assert(Call->getNumArgs() == 2);
4258
4259 APSInt Imm;
4260 if (!popToAPSInt(S, E: Call->getArg(Arg: 1), Out&: Imm))
4261 return false;
4262 const Pointer &Src = S.Stk.pop<Pointer>();
4263 const Pointer &Dst = S.Stk.peek<Pointer>();
4264
4265 assert(Src.getFieldDesc()->isPrimitiveArray());
4266 assert(Dst.getFieldDesc()->isPrimitiveArray());
4267
4268 const auto *SrcVTy = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
4269 unsigned SrcNumElems = SrcVTy->getNumElements();
4270 const auto *DstVTy = Call->getType()->castAs<VectorType>();
4271 unsigned DstNumElems = DstVTy->getNumElements();
4272
4273 const llvm::fltSemantics &HalfSem =
4274 S.getASTContext().getFloatTypeSemantics(T: S.getASTContext().HalfTy);
4275
4276 // imm[2] == 1 means use MXCSR rounding mode.
4277 // In that case, we can only evaluate if the conversion is exact.
4278 int ImmVal = Imm.getZExtValue();
4279 bool UseMXCSR = (ImmVal & 4) != 0;
4280 bool IsFPConstrained =
4281 Call->getFPFeaturesInEffect(LO: S.getASTContext().getLangOpts())
4282 .isFPConstrained();
4283
4284 llvm::RoundingMode RM;
4285 if (!UseMXCSR) {
4286 switch (ImmVal & 3) {
4287 case 0:
4288 RM = llvm::RoundingMode::NearestTiesToEven;
4289 break;
4290 case 1:
4291 RM = llvm::RoundingMode::TowardNegative;
4292 break;
4293 case 2:
4294 RM = llvm::RoundingMode::TowardPositive;
4295 break;
4296 case 3:
4297 RM = llvm::RoundingMode::TowardZero;
4298 break;
4299 default:
4300 llvm_unreachable("Invalid immediate rounding mode");
4301 }
4302 } else {
4303 // For MXCSR, we must check for exactness. We can use any rounding mode
4304 // for the trial conversion since the result is the same if it's exact.
4305 RM = llvm::RoundingMode::NearestTiesToEven;
4306 }
4307
4308 QualType DstElemQT = Dst.getFieldDesc()->getElemQualType();
4309 PrimType DstElemT = *S.getContext().classify(T: DstElemQT);
4310
4311 for (unsigned I = 0; I != SrcNumElems; ++I) {
4312 Floating SrcVal = Src.elem<Floating>(I);
4313 APFloat DstVal = SrcVal.getAPFloat();
4314
4315 bool LostInfo;
4316 APFloat::opStatus St = DstVal.convert(ToSemantics: HalfSem, RM, losesInfo: &LostInfo);
4317
4318 if (UseMXCSR && IsFPConstrained && St != APFloat::opOK) {
4319 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
4320 DiagId: diag::note_constexpr_dynamic_rounding);
4321 return false;
4322 }
4323
4324 INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
4325 // Convert the destination value's bit pattern to an unsigned integer,
4326 // then reconstruct the element using the target type's 'from' method.
4327 uint64_t RawBits = DstVal.bitcastToAPInt().getZExtValue();
4328 Dst.elem<T>(I) = T::from(RawBits);
4329 });
4330 }
4331
4332 // Zero out remaining elements if the destination has more elements
4333 // (e.g., vcvtps2ph converting 4 floats to 8 shorts).
4334 if (DstNumElems > SrcNumElems) {
4335 for (unsigned I = SrcNumElems; I != DstNumElems; ++I) {
4336 INT_TYPE_SWITCH_NO_BOOL(DstElemT, { Dst.elem<T>(I) = T::from(0); });
4337 }
4338 }
4339
4340 Dst.initializeAllElements();
4341 return true;
4342}
4343
4344static bool interp__builtin_ia32_multishiftqb(InterpState &S, CodePtr OpPC,
4345 const CallExpr *Call) {
4346 assert(Call->getNumArgs() == 2);
4347
4348 QualType ATy = Call->getArg(Arg: 0)->getType();
4349 QualType BTy = Call->getArg(Arg: 1)->getType();
4350 if (!ATy->isVectorType() || !BTy->isVectorType()) {
4351 return false;
4352 }
4353
4354 const Pointer &BPtr = S.Stk.pop<Pointer>();
4355 const Pointer &APtr = S.Stk.pop<Pointer>();
4356 const auto *AVecT = ATy->castAs<VectorType>();
4357 assert(AVecT->getNumElements() ==
4358 BTy->castAs<VectorType>()->getNumElements());
4359
4360 PrimType ElemT = *S.getContext().classify(T: AVecT->getElementType());
4361
4362 unsigned NumBytesInQWord = 8;
4363 unsigned NumBitsInByte = 8;
4364 unsigned NumBytes = AVecT->getNumElements();
4365 unsigned NumQWords = NumBytes / NumBytesInQWord;
4366 const Pointer &Dst = S.Stk.peek<Pointer>();
4367
4368 for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
4369 APInt BQWord(64, 0);
4370 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
4371 unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
4372 INT_TYPE_SWITCH(ElemT, {
4373 uint64_t Byte = static_cast<uint64_t>(BPtr.elem<T>(Idx));
4374 BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
4375 });
4376 }
4377
4378 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
4379 unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
4380 uint64_t Ctrl = 0;
4381 INT_TYPE_SWITCH(
4382 ElemT, { Ctrl = static_cast<uint64_t>(APtr.elem<T>(Idx)) & 0x3F; });
4383
4384 APInt Byte(8, 0);
4385 for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) {
4386 Byte.setBitVal(BitPosition: BitIdx, BitValue: BQWord[(Ctrl + BitIdx) & 0x3F]);
4387 }
4388 INT_TYPE_SWITCH(ElemT,
4389 { Dst.elem<T>(Idx) = T::from(Byte.getZExtValue()); });
4390 }
4391 }
4392
4393 Dst.initializeAllElements();
4394
4395 return true;
4396}
4397
4398static bool interp__builtin_ia32_gfni_affine(InterpState &S, CodePtr OpPC,
4399 const CallExpr *Call,
4400 bool Inverse) {
4401 assert(Call->getNumArgs() == 3);
4402 QualType XType = Call->getArg(Arg: 0)->getType();
4403 QualType AType = Call->getArg(Arg: 1)->getType();
4404 QualType ImmType = Call->getArg(Arg: 2)->getType();
4405 if (!XType->isVectorType() || !AType->isVectorType() ||
4406 !ImmType->isIntegerType()) {
4407 return false;
4408 }
4409
4410 Pointer X, A;
4411 APSInt Imm;
4412 if (!popToAPSInt(S, E: Call->getArg(Arg: 2), Out&: Imm))
4413 return false;
4414 A = S.Stk.pop<Pointer>();
4415 X = S.Stk.pop<Pointer>();
4416
4417 const Pointer &Dst = S.Stk.peek<Pointer>();
4418 const auto *AVecT = AType->castAs<VectorType>();
4419 assert(XType->castAs<VectorType>()->getNumElements() ==
4420 AVecT->getNumElements());
4421 unsigned NumBytesInQWord = 8;
4422 unsigned NumBytes = AVecT->getNumElements();
4423 unsigned NumBitsInQWord = 64;
4424 unsigned NumQWords = NumBytes / NumBytesInQWord;
4425 unsigned NumBitsInByte = 8;
4426 PrimType AElemT = *S.getContext().classify(T: AVecT->getElementType());
4427
4428 // computing A*X + Imm
4429 for (unsigned QWordIdx = 0; QWordIdx != NumQWords; ++QWordIdx) {
4430 // Extract the QWords from X, A
4431 APInt XQWord(NumBitsInQWord, 0);
4432 APInt AQWord(NumBitsInQWord, 0);
4433 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
4434 unsigned Idx = QWordIdx * NumBytesInQWord + ByteIdx;
4435 uint8_t XByte;
4436 uint8_t AByte;
4437 INT_TYPE_SWITCH(AElemT, {
4438 XByte = static_cast<uint8_t>(X.elem<T>(Idx));
4439 AByte = static_cast<uint8_t>(A.elem<T>(Idx));
4440 });
4441
4442 XQWord.insertBits(SubBits: APInt(NumBitsInByte, XByte), bitPosition: ByteIdx * NumBitsInByte);
4443 AQWord.insertBits(SubBits: APInt(NumBitsInByte, AByte), bitPosition: ByteIdx * NumBitsInByte);
4444 }
4445
4446 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
4447 unsigned Idx = QWordIdx * NumBytesInQWord + ByteIdx;
4448 uint8_t XByte =
4449 XQWord.lshr(shiftAmt: ByteIdx * NumBitsInByte).getLoBits(numBits: 8).getZExtValue();
4450 INT_TYPE_SWITCH(AElemT, {
4451 Dst.elem<T>(Idx) = T::from(GFNIAffine(XByte, AQWord, Imm, Inverse));
4452 });
4453 }
4454 }
4455 Dst.initializeAllElements();
4456 return true;
4457}
4458
4459static bool interp__builtin_ia32_gfni_mul(InterpState &S, CodePtr OpPC,
4460 const CallExpr *Call) {
4461 assert(Call->getNumArgs() == 2);
4462
4463 QualType AType = Call->getArg(Arg: 0)->getType();
4464 QualType BType = Call->getArg(Arg: 1)->getType();
4465 if (!AType->isVectorType() || !BType->isVectorType()) {
4466 return false;
4467 }
4468
4469 Pointer A, B;
4470 B = S.Stk.pop<Pointer>();
4471 A = S.Stk.pop<Pointer>();
4472
4473 const Pointer &Dst = S.Stk.peek<Pointer>();
4474 const auto *AVecT = AType->castAs<VectorType>();
4475 assert(AVecT->getNumElements() ==
4476 BType->castAs<VectorType>()->getNumElements());
4477
4478 PrimType AElemT = *S.getContext().classify(T: AVecT->getElementType());
4479 unsigned NumBytes = A.getNumElems();
4480
4481 for (unsigned ByteIdx = 0; ByteIdx != NumBytes; ++ByteIdx) {
4482 uint8_t AByte, BByte;
4483 INT_TYPE_SWITCH(AElemT, {
4484 AByte = static_cast<uint8_t>(A.elem<T>(ByteIdx));
4485 BByte = static_cast<uint8_t>(B.elem<T>(ByteIdx));
4486 Dst.elem<T>(ByteIdx) = T::from(GFNIMul(AByte, BByte));
4487 });
4488 }
4489
4490 Dst.initializeAllElements();
4491 return true;
4492}
4493
4494static bool interp__builtin_ia32_vpdp(InterpState &S, CodePtr OpPC,
4495 const CallExpr *Call, bool IsSaturating) {
4496 assert(Call->getNumArgs() == 3);
4497
4498 QualType SrcT = Call->getArg(Arg: 0)->getType();
4499 QualType OpAT = Call->getArg(Arg: 1)->getType();
4500 QualType OpBT = Call->getArg(Arg: 2)->getType();
4501 QualType DstT = Call->getType();
4502 if (!SrcT->isVectorType() || !OpAT->isVectorType() || !OpBT->isVectorType() ||
4503 !DstT->isVectorType())
4504 return false;
4505
4506 const auto *SrcVecT = SrcT->castAs<VectorType>();
4507 const auto *OpAVecT = OpAT->castAs<VectorType>();
4508 const auto *OpBVecT = OpBT->castAs<VectorType>();
4509 const auto *DstVecT = DstT->castAs<VectorType>();
4510
4511 assert(OpAVecT->getNumElements() == OpBVecT->getNumElements());
4512
4513 unsigned NumSrcElems = SrcVecT->getNumElements();
4514 unsigned NumOperandElems = OpAVecT->getNumElements();
4515 unsigned ElemsPerLane = NumOperandElems / NumSrcElems;
4516
4517 PrimType SrcElemT = *S.getContext().classify(T: SrcVecT->getElementType());
4518 PrimType OpAElemT = *S.getContext().classify(T: OpAVecT->getElementType());
4519 PrimType OpBElemT = *S.getContext().classify(T: OpBVecT->getElementType());
4520 PrimType DstElemT = *S.getContext().classify(T: DstVecT->getElementType());
4521
4522 assert(SrcElemT == DstElemT);
4523
4524 const Pointer &OpBPtr = S.Stk.pop<Pointer>();
4525 const Pointer &OpAPtr = S.Stk.pop<Pointer>();
4526 const Pointer &SrcPtr = S.Stk.pop<Pointer>();
4527 const Pointer &Dst = S.Stk.peek<Pointer>();
4528
4529 for (unsigned I = 0; I != NumSrcElems; ++I) {
4530 APSInt Acc;
4531 INT_TYPE_SWITCH_NO_BOOL(SrcElemT, { Acc = SrcPtr.elem<T>(I).toAPSInt(); });
4532 Acc = Acc.sext(width: 64);
4533 for (unsigned J = 0; J != ElemsPerLane; ++J) {
4534 APSInt OpA, OpB;
4535 INT_TYPE_SWITCH_NO_BOOL(
4536 OpAElemT, { OpA = OpAPtr.elem<T>(ElemsPerLane * I + J).toAPSInt(); });
4537 INT_TYPE_SWITCH_NO_BOOL(
4538 OpBElemT, { OpB = OpBPtr.elem<T>(ElemsPerLane * I + J).toAPSInt(); });
4539 OpA = APSInt(OpA.extend(width: 64), false);
4540 OpB = APSInt(OpB.extend(width: 64), false);
4541 Acc += OpA * OpB;
4542 }
4543 if (IsSaturating)
4544 Acc = APSInt(Acc.truncSSat(width: 32), false);
4545 else
4546 Acc = APSInt(Acc.trunc(width: 32), false);
4547 INT_TYPE_SWITCH_NO_BOOL(DstElemT,
4548 { Dst.elem<T>(I) = static_cast<T>(Acc); });
4549 }
4550 Dst.initializeAllElements();
4551 return true;
4552}
4553
4554bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
4555 uint32_t BuiltinID) {
4556 if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(ID: BuiltinID))
4557 return Invalid(S, OpPC);
4558
4559 const InterpFrame *Frame = S.Current;
4560 switch (BuiltinID) {
4561 case Builtin::BI__builtin_is_constant_evaluated:
4562 return interp__builtin_is_constant_evaluated(S, OpPC, Frame, Call);
4563
4564 case Builtin::BI__builtin_assume:
4565 case Builtin::BI__assume:
4566 return interp__builtin_assume(S, OpPC, Frame, Call);
4567
4568 case Builtin::BI__builtin_strcmp:
4569 case Builtin::BIstrcmp:
4570 case Builtin::BI__builtin_strncmp:
4571 case Builtin::BIstrncmp:
4572 case Builtin::BI__builtin_wcsncmp:
4573 case Builtin::BIwcsncmp:
4574 case Builtin::BI__builtin_wcscmp:
4575 case Builtin::BIwcscmp:
4576 return interp__builtin_strcmp(S, OpPC, Frame, Call, ID: BuiltinID);
4577
4578 case Builtin::BI__builtin_strlen:
4579 case Builtin::BIstrlen:
4580 case Builtin::BI__builtin_wcslen:
4581 case Builtin::BIwcslen:
4582 return interp__builtin_strlen(S, OpPC, Frame, Call, ID: BuiltinID);
4583
4584 case Builtin::BI__builtin_nan:
4585 case Builtin::BI__builtin_nanf:
4586 case Builtin::BI__builtin_nanl:
4587 case Builtin::BI__builtin_nanf16:
4588 case Builtin::BI__builtin_nanf128:
4589 return interp__builtin_nan(S, OpPC, Frame, Call, /*Signaling=*/false);
4590
4591 case Builtin::BI__builtin_nans:
4592 case Builtin::BI__builtin_nansf:
4593 case Builtin::BI__builtin_nansl:
4594 case Builtin::BI__builtin_nansf16:
4595 case Builtin::BI__builtin_nansf128:
4596 return interp__builtin_nan(S, OpPC, Frame, Call, /*Signaling=*/true);
4597
4598 case Builtin::BI__builtin_huge_val:
4599 case Builtin::BI__builtin_huge_valf:
4600 case Builtin::BI__builtin_huge_vall:
4601 case Builtin::BI__builtin_huge_valf16:
4602 case Builtin::BI__builtin_huge_valf128:
4603 case Builtin::BI__builtin_inf:
4604 case Builtin::BI__builtin_inff:
4605 case Builtin::BI__builtin_infl:
4606 case Builtin::BI__builtin_inff16:
4607 case Builtin::BI__builtin_inff128:
4608 return interp__builtin_inf(S, OpPC, Frame, Call);
4609
4610 case Builtin::BI__builtin_copysign:
4611 case Builtin::BI__builtin_copysignf:
4612 case Builtin::BI__builtin_copysignl:
4613 case Builtin::BI__builtin_copysignf128:
4614 return interp__builtin_copysign(S, OpPC, Frame);
4615
4616 case Builtin::BI__builtin_fmin:
4617 case Builtin::BI__builtin_fminf:
4618 case Builtin::BI__builtin_fminl:
4619 case Builtin::BI__builtin_fminf16:
4620 case Builtin::BI__builtin_fminf128:
4621 return interp__builtin_fmin(S, OpPC, Frame, /*IsNumBuiltin=*/false);
4622
4623 case Builtin::BI__builtin_fminimum_num:
4624 case Builtin::BI__builtin_fminimum_numf:
4625 case Builtin::BI__builtin_fminimum_numl:
4626 case Builtin::BI__builtin_fminimum_numf16:
4627 case Builtin::BI__builtin_fminimum_numf128:
4628 return interp__builtin_fmin(S, OpPC, Frame, /*IsNumBuiltin=*/true);
4629
4630 case Builtin::BI__builtin_fmax:
4631 case Builtin::BI__builtin_fmaxf:
4632 case Builtin::BI__builtin_fmaxl:
4633 case Builtin::BI__builtin_fmaxf16:
4634 case Builtin::BI__builtin_fmaxf128:
4635 return interp__builtin_fmax(S, OpPC, Frame, /*IsNumBuiltin=*/false);
4636
4637 case Builtin::BI__builtin_fmaximum_num:
4638 case Builtin::BI__builtin_fmaximum_numf:
4639 case Builtin::BI__builtin_fmaximum_numl:
4640 case Builtin::BI__builtin_fmaximum_numf16:
4641 case Builtin::BI__builtin_fmaximum_numf128:
4642 return interp__builtin_fmax(S, OpPC, Frame, /*IsNumBuiltin=*/true);
4643
4644 case Builtin::BI__builtin_isnan:
4645 return interp__builtin_isnan(S, OpPC, Frame, Call);
4646
4647 case Builtin::BI__builtin_issignaling:
4648 return interp__builtin_issignaling(S, OpPC, Frame, Call);
4649
4650 case Builtin::BI__builtin_isinf:
4651 return interp__builtin_isinf(S, OpPC, Frame, /*Sign=*/CheckSign: false, Call);
4652
4653 case Builtin::BI__builtin_isinf_sign:
4654 return interp__builtin_isinf(S, OpPC, Frame, /*Sign=*/CheckSign: true, Call);
4655
4656 case Builtin::BI__builtin_isfinite:
4657 return interp__builtin_isfinite(S, OpPC, Frame, Call);
4658
4659 case Builtin::BI__builtin_isnormal:
4660 return interp__builtin_isnormal(S, OpPC, Frame, Call);
4661
4662 case Builtin::BI__builtin_issubnormal:
4663 return interp__builtin_issubnormal(S, OpPC, Frame, Call);
4664
4665 case Builtin::BI__builtin_iszero:
4666 return interp__builtin_iszero(S, OpPC, Frame, Call);
4667
4668 case Builtin::BI__builtin_signbit:
4669 case Builtin::BI__builtin_signbitf:
4670 case Builtin::BI__builtin_signbitl:
4671 return interp__builtin_signbit(S, OpPC, Frame, Call);
4672
4673 case Builtin::BI__builtin_isgreater:
4674 case Builtin::BI__builtin_isgreaterequal:
4675 case Builtin::BI__builtin_isless:
4676 case Builtin::BI__builtin_islessequal:
4677 case Builtin::BI__builtin_islessgreater:
4678 case Builtin::BI__builtin_isunordered:
4679 return interp_floating_comparison(S, OpPC, Call, ID: BuiltinID);
4680
4681 case Builtin::BI__builtin_isfpclass:
4682 return interp__builtin_isfpclass(S, OpPC, Frame, Call);
4683
4684 case Builtin::BI__builtin_fpclassify:
4685 return interp__builtin_fpclassify(S, OpPC, Frame, Call);
4686
4687 case Builtin::BI__builtin_fabs:
4688 case Builtin::BI__builtin_fabsf:
4689 case Builtin::BI__builtin_fabsl:
4690 case Builtin::BI__builtin_fabsf128:
4691 return interp__builtin_fabs(S, OpPC, Frame);
4692
4693 case Builtin::BI__builtin_abs:
4694 case Builtin::BI__builtin_labs:
4695 case Builtin::BI__builtin_llabs:
4696 return interp__builtin_abs(S, OpPC, Frame, Call);
4697
4698 case Builtin::BI__builtin_popcount:
4699 case Builtin::BI__builtin_popcountl:
4700 case Builtin::BI__builtin_popcountll:
4701 case Builtin::BI__builtin_popcountg:
4702 case Builtin::BI__popcnt16: // Microsoft variants of popcount
4703 case Builtin::BI__popcnt:
4704 case Builtin::BI__popcnt64:
4705 return interp__builtin_popcount(S, OpPC, Frame, Call);
4706
4707 case Builtin::BI__builtin_parity:
4708 case Builtin::BI__builtin_parityl:
4709 case Builtin::BI__builtin_parityll:
4710 return interp__builtin_elementwise_int_unaryop(
4711 S, OpPC, Call, Fn: [](const APSInt &Val) {
4712 return APInt(Val.getBitWidth(), Val.popcount() % 2);
4713 });
4714 case Builtin::BI__builtin_clrsb:
4715 case Builtin::BI__builtin_clrsbl:
4716 case Builtin::BI__builtin_clrsbll:
4717 return interp__builtin_elementwise_int_unaryop(
4718 S, OpPC, Call, Fn: [](const APSInt &Val) {
4719 return APInt(Val.getBitWidth(),
4720 Val.getBitWidth() - Val.getSignificantBits());
4721 });
4722 case Builtin::BI__builtin_bitreverseg:
4723 case Builtin::BI__builtin_bitreverse8:
4724 case Builtin::BI__builtin_bitreverse16:
4725 case Builtin::BI__builtin_bitreverse32:
4726 case Builtin::BI__builtin_bitreverse64:
4727 return interp__builtin_elementwise_int_unaryop(
4728 S, OpPC, Call, Fn: [](const APSInt &Val) { return Val.reverseBits(); });
4729
4730 case Builtin::BI__builtin_classify_type:
4731 return interp__builtin_classify_type(S, OpPC, Frame, Call);
4732
4733 case Builtin::BI__builtin_expect:
4734 case Builtin::BI__builtin_expect_with_probability:
4735 return interp__builtin_expect(S, OpPC, Frame, Call);
4736
4737 case Builtin::BI__builtin_rotateleft8:
4738 case Builtin::BI__builtin_rotateleft16:
4739 case Builtin::BI__builtin_rotateleft32:
4740 case Builtin::BI__builtin_rotateleft64:
4741 case Builtin::BI__builtin_stdc_rotate_left:
4742 case Builtin::BIstdc_rotate_left_uc:
4743 case Builtin::BIstdc_rotate_left_us:
4744 case Builtin::BIstdc_rotate_left_ui:
4745 case Builtin::BIstdc_rotate_left_ul:
4746 case Builtin::BIstdc_rotate_left_ull:
4747 case Builtin::BI_rotl8: // Microsoft variants of rotate left
4748 case Builtin::BI_rotl16:
4749 case Builtin::BI_rotl:
4750 case Builtin::BI_lrotl:
4751 case Builtin::BI_rotl64:
4752 case Builtin::BI__builtin_rotateright8:
4753 case Builtin::BI__builtin_rotateright16:
4754 case Builtin::BI__builtin_rotateright32:
4755 case Builtin::BI__builtin_rotateright64:
4756 case Builtin::BI__builtin_stdc_rotate_right:
4757 case Builtin::BIstdc_rotate_right_uc:
4758 case Builtin::BIstdc_rotate_right_us:
4759 case Builtin::BIstdc_rotate_right_ui:
4760 case Builtin::BIstdc_rotate_right_ul:
4761 case Builtin::BIstdc_rotate_right_ull:
4762 case Builtin::BI_rotr8: // Microsoft variants of rotate right
4763 case Builtin::BI_rotr16:
4764 case Builtin::BI_rotr:
4765 case Builtin::BI_lrotr:
4766 case Builtin::BI_rotr64: {
4767 // Determine if this is a rotate right operation
4768 bool IsRotateRight;
4769 switch (BuiltinID) {
4770 case Builtin::BI__builtin_rotateright8:
4771 case Builtin::BI__builtin_rotateright16:
4772 case Builtin::BI__builtin_rotateright32:
4773 case Builtin::BI__builtin_rotateright64:
4774 case Builtin::BI__builtin_stdc_rotate_right:
4775 case Builtin::BIstdc_rotate_right_uc:
4776 case Builtin::BIstdc_rotate_right_us:
4777 case Builtin::BIstdc_rotate_right_ui:
4778 case Builtin::BIstdc_rotate_right_ul:
4779 case Builtin::BIstdc_rotate_right_ull:
4780 case Builtin::BI_rotr8:
4781 case Builtin::BI_rotr16:
4782 case Builtin::BI_rotr:
4783 case Builtin::BI_lrotr:
4784 case Builtin::BI_rotr64:
4785 IsRotateRight = true;
4786 break;
4787 default:
4788 IsRotateRight = false;
4789 break;
4790 }
4791
4792 return interp__builtin_elementwise_int_binop(
4793 S, OpPC, Call, Fn: [IsRotateRight](const APSInt &Value, APSInt Amount) {
4794 Amount = NormalizeRotateAmount(Value, Amount);
4795 return IsRotateRight ? Value.rotr(rotateAmt: Amount.getZExtValue())
4796 : Value.rotl(rotateAmt: Amount.getZExtValue());
4797 });
4798 }
4799
4800 case Builtin::BIstdc_leading_zeros_uc:
4801 case Builtin::BIstdc_leading_zeros_us:
4802 case Builtin::BIstdc_leading_zeros_ui:
4803 case Builtin::BIstdc_leading_zeros_ul:
4804 case Builtin::BIstdc_leading_zeros_ull:
4805 case Builtin::BI__builtin_stdc_leading_zeros: {
4806 unsigned ResWidth = S.getASTContext().getIntWidth(T: Call->getType());
4807 return interp__builtin_elementwise_int_unaryop(
4808 S, OpPC, Call, Fn: [ResWidth](const APSInt &Val) {
4809 return APInt(ResWidth, Val.countl_zero());
4810 });
4811 }
4812
4813 case Builtin::BIstdc_leading_ones_uc:
4814 case Builtin::BIstdc_leading_ones_us:
4815 case Builtin::BIstdc_leading_ones_ui:
4816 case Builtin::BIstdc_leading_ones_ul:
4817 case Builtin::BIstdc_leading_ones_ull:
4818 case Builtin::BI__builtin_stdc_leading_ones: {
4819 unsigned ResWidth = S.getASTContext().getIntWidth(T: Call->getType());
4820 return interp__builtin_elementwise_int_unaryop(
4821 S, OpPC, Call, Fn: [ResWidth](const APSInt &Val) {
4822 return APInt(ResWidth, Val.countl_one());
4823 });
4824 }
4825
4826 case Builtin::BIstdc_trailing_zeros_uc:
4827 case Builtin::BIstdc_trailing_zeros_us:
4828 case Builtin::BIstdc_trailing_zeros_ui:
4829 case Builtin::BIstdc_trailing_zeros_ul:
4830 case Builtin::BIstdc_trailing_zeros_ull:
4831 case Builtin::BI__builtin_stdc_trailing_zeros: {
4832 unsigned ResWidth = S.getASTContext().getIntWidth(T: Call->getType());
4833 return interp__builtin_elementwise_int_unaryop(
4834 S, OpPC, Call, Fn: [ResWidth](const APSInt &Val) {
4835 return APInt(ResWidth, Val.countr_zero());
4836 });
4837 }
4838
4839 case Builtin::BIstdc_trailing_ones_uc:
4840 case Builtin::BIstdc_trailing_ones_us:
4841 case Builtin::BIstdc_trailing_ones_ui:
4842 case Builtin::BIstdc_trailing_ones_ul:
4843 case Builtin::BIstdc_trailing_ones_ull:
4844 case Builtin::BI__builtin_stdc_trailing_ones: {
4845 unsigned ResWidth = S.getASTContext().getIntWidth(T: Call->getType());
4846 return interp__builtin_elementwise_int_unaryop(
4847 S, OpPC, Call, Fn: [ResWidth](const APSInt &Val) {
4848 return APInt(ResWidth, Val.countr_one());
4849 });
4850 }
4851
4852 case Builtin::BIstdc_first_leading_zero_uc:
4853 case Builtin::BIstdc_first_leading_zero_us:
4854 case Builtin::BIstdc_first_leading_zero_ui:
4855 case Builtin::BIstdc_first_leading_zero_ul:
4856 case Builtin::BIstdc_first_leading_zero_ull:
4857 case Builtin::BI__builtin_stdc_first_leading_zero: {
4858 unsigned ResWidth = S.getASTContext().getIntWidth(T: Call->getType());
4859 return interp__builtin_elementwise_int_unaryop(
4860 S, OpPC, Call, Fn: [ResWidth](const APSInt &Val) {
4861 return APInt(ResWidth, Val.isAllOnes() ? 0 : Val.countl_one() + 1);
4862 });
4863 }
4864
4865 case Builtin::BIstdc_first_leading_one_uc:
4866 case Builtin::BIstdc_first_leading_one_us:
4867 case Builtin::BIstdc_first_leading_one_ui:
4868 case Builtin::BIstdc_first_leading_one_ul:
4869 case Builtin::BIstdc_first_leading_one_ull:
4870 case Builtin::BI__builtin_stdc_first_leading_one: {
4871 unsigned ResWidth = S.getASTContext().getIntWidth(T: Call->getType());
4872 return interp__builtin_elementwise_int_unaryop(
4873 S, OpPC, Call, Fn: [ResWidth](const APSInt &Val) {
4874 return APInt(ResWidth, Val.isZero() ? 0 : Val.countl_zero() + 1);
4875 });
4876 }
4877
4878 case Builtin::BIstdc_first_trailing_zero_uc:
4879 case Builtin::BIstdc_first_trailing_zero_us:
4880 case Builtin::BIstdc_first_trailing_zero_ui:
4881 case Builtin::BIstdc_first_trailing_zero_ul:
4882 case Builtin::BIstdc_first_trailing_zero_ull:
4883 case Builtin::BI__builtin_stdc_first_trailing_zero: {
4884 unsigned ResWidth = S.getASTContext().getIntWidth(T: Call->getType());
4885 return interp__builtin_elementwise_int_unaryop(
4886 S, OpPC, Call, Fn: [ResWidth](const APSInt &Val) {
4887 return APInt(ResWidth, Val.isAllOnes() ? 0 : Val.countr_one() + 1);
4888 });
4889 }
4890
4891 case Builtin::BIstdc_first_trailing_one_uc:
4892 case Builtin::BIstdc_first_trailing_one_us:
4893 case Builtin::BIstdc_first_trailing_one_ui:
4894 case Builtin::BIstdc_first_trailing_one_ul:
4895 case Builtin::BIstdc_first_trailing_one_ull:
4896 case Builtin::BI__builtin_stdc_first_trailing_one: {
4897 unsigned ResWidth = S.getASTContext().getIntWidth(T: Call->getType());
4898 return interp__builtin_elementwise_int_unaryop(
4899 S, OpPC, Call, Fn: [ResWidth](const APSInt &Val) {
4900 return APInt(ResWidth, Val.isZero() ? 0 : Val.countr_zero() + 1);
4901 });
4902 }
4903
4904 case Builtin::BIstdc_count_zeros_uc:
4905 case Builtin::BIstdc_count_zeros_us:
4906 case Builtin::BIstdc_count_zeros_ui:
4907 case Builtin::BIstdc_count_zeros_ul:
4908 case Builtin::BIstdc_count_zeros_ull:
4909 case Builtin::BI__builtin_stdc_count_zeros: {
4910 unsigned ResWidth = S.getASTContext().getIntWidth(T: Call->getType());
4911 return interp__builtin_elementwise_int_unaryop(
4912 S, OpPC, Call, Fn: [ResWidth](const APSInt &Val) {
4913 unsigned BitWidth = Val.getBitWidth();
4914 return APInt(ResWidth, BitWidth - Val.popcount());
4915 });
4916 }
4917
4918 case Builtin::BIstdc_count_ones_uc:
4919 case Builtin::BIstdc_count_ones_us:
4920 case Builtin::BIstdc_count_ones_ui:
4921 case Builtin::BIstdc_count_ones_ul:
4922 case Builtin::BIstdc_count_ones_ull:
4923 case Builtin::BI__builtin_stdc_count_ones: {
4924 unsigned ResWidth = S.getASTContext().getIntWidth(T: Call->getType());
4925 return interp__builtin_elementwise_int_unaryop(
4926 S, OpPC, Call, Fn: [ResWidth](const APSInt &Val) {
4927 return APInt(ResWidth, Val.popcount());
4928 });
4929 }
4930
4931 case Builtin::BIstdc_has_single_bit_uc:
4932 case Builtin::BIstdc_has_single_bit_us:
4933 case Builtin::BIstdc_has_single_bit_ui:
4934 case Builtin::BIstdc_has_single_bit_ul:
4935 case Builtin::BIstdc_has_single_bit_ull:
4936 case Builtin::BI__builtin_stdc_has_single_bit: {
4937 unsigned ResWidth = S.getASTContext().getIntWidth(T: Call->getType());
4938 return interp__builtin_elementwise_int_unaryop(
4939 S, OpPC, Call, Fn: [ResWidth](const APSInt &Val) {
4940 return APInt(ResWidth, Val.popcount() == 1 ? 1 : 0);
4941 });
4942 }
4943
4944 case Builtin::BIstdc_bit_width_uc:
4945 case Builtin::BIstdc_bit_width_us:
4946 case Builtin::BIstdc_bit_width_ui:
4947 case Builtin::BIstdc_bit_width_ul:
4948 case Builtin::BIstdc_bit_width_ull:
4949 case Builtin::BI__builtin_stdc_bit_width: {
4950 unsigned ResWidth = S.getASTContext().getIntWidth(T: Call->getType());
4951 return interp__builtin_elementwise_int_unaryop(
4952 S, OpPC, Call, Fn: [ResWidth](const APSInt &Val) {
4953 unsigned BitWidth = Val.getBitWidth();
4954 return APInt(ResWidth, BitWidth - Val.countl_zero());
4955 });
4956 }
4957
4958 case Builtin::BIstdc_bit_floor_uc:
4959 case Builtin::BIstdc_bit_floor_us:
4960 case Builtin::BIstdc_bit_floor_ui:
4961 case Builtin::BIstdc_bit_floor_ul:
4962 case Builtin::BIstdc_bit_floor_ull:
4963 case Builtin::BI__builtin_stdc_bit_floor:
4964 return interp__builtin_elementwise_int_unaryop(
4965 S, OpPC, Call, Fn: [](const APSInt &Val) {
4966 unsigned BitWidth = Val.getBitWidth();
4967 if (Val.isZero())
4968 return APInt::getZero(numBits: BitWidth);
4969 return APInt::getOneBitSet(numBits: BitWidth,
4970 BitNo: BitWidth - Val.countl_zero() - 1);
4971 });
4972
4973 case Builtin::BIstdc_bit_ceil_uc:
4974 case Builtin::BIstdc_bit_ceil_us:
4975 case Builtin::BIstdc_bit_ceil_ui:
4976 case Builtin::BIstdc_bit_ceil_ul:
4977 case Builtin::BIstdc_bit_ceil_ull:
4978 case Builtin::BI__builtin_stdc_bit_ceil:
4979 return interp__builtin_elementwise_int_unaryop(
4980 S, OpPC, Call, Fn: [](const APSInt &Val) {
4981 unsigned BitWidth = Val.getBitWidth();
4982 if (Val.ule(RHS: 1))
4983 return APInt(BitWidth, 1);
4984 APInt V = Val;
4985 APInt ValMinusOne = V - 1;
4986 unsigned LeadingZeros = ValMinusOne.countl_zero();
4987 if (LeadingZeros == 0)
4988 return APInt(BitWidth, 0); // overflows; wrap to 0
4989 return APInt::getOneBitSet(numBits: BitWidth, BitNo: BitWidth - LeadingZeros);
4990 });
4991
4992 case Builtin::BI__builtin_ffs:
4993 case Builtin::BI__builtin_ffsl:
4994 case Builtin::BI__builtin_ffsll:
4995 return interp__builtin_elementwise_int_unaryop(
4996 S, OpPC, Call, Fn: [](const APSInt &Val) {
4997 return APInt(Val.getBitWidth(),
4998 Val.isZero() ? 0u : Val.countTrailingZeros() + 1u);
4999 });
5000
5001 case Builtin::BIaddressof:
5002 case Builtin::BI__addressof:
5003 case Builtin::BI__builtin_addressof:
5004 assert(isNoopBuiltin(BuiltinID));
5005 return interp__builtin_addressof(S, OpPC, Frame, Call);
5006
5007 case Builtin::BIas_const:
5008 case Builtin::BIforward:
5009 case Builtin::BIforward_like:
5010 case Builtin::BImove:
5011 case Builtin::BImove_if_noexcept:
5012 assert(isNoopBuiltin(BuiltinID));
5013 return interp__builtin_move(S, OpPC, Frame, Call);
5014
5015 case Builtin::BI__builtin_eh_return_data_regno:
5016 return interp__builtin_eh_return_data_regno(S, OpPC, Frame, Call);
5017
5018 case Builtin::BI__builtin_launder:
5019 assert(isNoopBuiltin(BuiltinID));
5020 return true;
5021
5022 case Builtin::BI__builtin_add_overflow:
5023 case Builtin::BI__builtin_sub_overflow:
5024 case Builtin::BI__builtin_mul_overflow:
5025 case Builtin::BI__builtin_sadd_overflow:
5026 case Builtin::BI__builtin_uadd_overflow:
5027 case Builtin::BI__builtin_uaddl_overflow:
5028 case Builtin::BI__builtin_uaddll_overflow:
5029 case Builtin::BI__builtin_usub_overflow:
5030 case Builtin::BI__builtin_usubl_overflow:
5031 case Builtin::BI__builtin_usubll_overflow:
5032 case Builtin::BI__builtin_umul_overflow:
5033 case Builtin::BI__builtin_umull_overflow:
5034 case Builtin::BI__builtin_umulll_overflow:
5035 case Builtin::BI__builtin_saddl_overflow:
5036 case Builtin::BI__builtin_saddll_overflow:
5037 case Builtin::BI__builtin_ssub_overflow:
5038 case Builtin::BI__builtin_ssubl_overflow:
5039 case Builtin::BI__builtin_ssubll_overflow:
5040 case Builtin::BI__builtin_smul_overflow:
5041 case Builtin::BI__builtin_smull_overflow:
5042 case Builtin::BI__builtin_smulll_overflow:
5043 return interp__builtin_overflowop(S, OpPC, Call, BuiltinOp: BuiltinID);
5044
5045 case Builtin::BI__builtin_addcb:
5046 case Builtin::BI__builtin_addcs:
5047 case Builtin::BI__builtin_addc:
5048 case Builtin::BI__builtin_addcl:
5049 case Builtin::BI__builtin_addcll:
5050 case Builtin::BI__builtin_subcb:
5051 case Builtin::BI__builtin_subcs:
5052 case Builtin::BI__builtin_subc:
5053 case Builtin::BI__builtin_subcl:
5054 case Builtin::BI__builtin_subcll:
5055 return interp__builtin_carryop(S, OpPC, Frame, Call, BuiltinOp: BuiltinID);
5056
5057 case Builtin::BI__builtin_clz:
5058 case Builtin::BI__builtin_clzl:
5059 case Builtin::BI__builtin_clzll:
5060 case Builtin::BI__builtin_clzs:
5061 case Builtin::BI__builtin_clzg:
5062 case Builtin::BI__lzcnt16: // Microsoft variants of count leading-zeroes
5063 case Builtin::BI__lzcnt:
5064 case Builtin::BI__lzcnt64:
5065 return interp__builtin_clz(S, OpPC, Frame, Call, BuiltinOp: BuiltinID);
5066
5067 case Builtin::BI__builtin_ctz:
5068 case Builtin::BI__builtin_ctzl:
5069 case Builtin::BI__builtin_ctzll:
5070 case Builtin::BI__builtin_ctzs:
5071 case Builtin::BI__builtin_ctzg:
5072 return interp__builtin_ctz(S, OpPC, Frame, Call, BuiltinID);
5073
5074 case Builtin::BI__builtin_elementwise_clzg:
5075 case Builtin::BI__builtin_elementwise_ctzg:
5076 return interp__builtin_elementwise_countzeroes(S, OpPC, Frame, Call,
5077 BuiltinID);
5078 case Builtin::BI__builtin_bswapg:
5079 case Builtin::BI__builtin_bswap16:
5080 case Builtin::BI__builtin_bswap32:
5081 case Builtin::BI__builtin_bswap64:
5082 case Builtin::BIstdc_memreverse8u8:
5083 case Builtin::BIstdc_memreverse8u16:
5084 case Builtin::BIstdc_memreverse8u32:
5085 case Builtin::BIstdc_memreverse8u64:
5086 return interp__builtin_bswap(S, OpPC, Frame, Call);
5087
5088 case Builtin::BI__atomic_always_lock_free:
5089 case Builtin::BI__atomic_is_lock_free:
5090 return interp__builtin_atomic_lock_free(S, OpPC, Frame, Call, BuiltinOp: BuiltinID);
5091
5092 case Builtin::BI__c11_atomic_is_lock_free:
5093 return interp__builtin_c11_atomic_is_lock_free(S, OpPC, Frame, Call);
5094
5095 case Builtin::BI__builtin_complex:
5096 return interp__builtin_complex(S, OpPC, Frame, Call);
5097
5098 case Builtin::BI__builtin_is_aligned:
5099 case Builtin::BI__builtin_align_up:
5100 case Builtin::BI__builtin_align_down:
5101 return interp__builtin_is_aligned_up_down(S, OpPC, Frame, Call, BuiltinOp: BuiltinID);
5102
5103 case Builtin::BI__builtin_assume_aligned:
5104 return interp__builtin_assume_aligned(S, OpPC, Frame, Call);
5105
5106 case clang::X86::BI__builtin_ia32_crc32qi:
5107 return interp__builtin_ia32_crc32(S, OpPC, Frame, Call, DataBytes: 1);
5108 case clang::X86::BI__builtin_ia32_crc32hi:
5109 return interp__builtin_ia32_crc32(S, OpPC, Frame, Call, DataBytes: 2);
5110 case clang::X86::BI__builtin_ia32_crc32si:
5111 return interp__builtin_ia32_crc32(S, OpPC, Frame, Call, DataBytes: 4);
5112 case clang::X86::BI__builtin_ia32_crc32di:
5113 return interp__builtin_ia32_crc32(S, OpPC, Frame, Call, DataBytes: 8);
5114
5115 case clang::X86::BI__builtin_ia32_bextr_u32:
5116 case clang::X86::BI__builtin_ia32_bextr_u64:
5117 case clang::X86::BI__builtin_ia32_bextri_u32:
5118 case clang::X86::BI__builtin_ia32_bextri_u64:
5119 return interp__builtin_elementwise_int_binop(
5120 S, OpPC, Call, Fn: [](const APSInt &Val, const APSInt &Idx) {
5121 unsigned BitWidth = Val.getBitWidth();
5122 uint64_t Shift = Idx.extractBitsAsZExtValue(numBits: 8, bitPosition: 0);
5123 uint64_t Length = Idx.extractBitsAsZExtValue(numBits: 8, bitPosition: 8);
5124 if (Length > BitWidth) {
5125 Length = BitWidth;
5126 }
5127
5128 // Handle out of bounds cases.
5129 if (Length == 0 || Shift >= BitWidth)
5130 return APInt(BitWidth, 0);
5131
5132 uint64_t Result = Val.getZExtValue() >> Shift;
5133 Result &= llvm::maskTrailingOnes<uint64_t>(N: Length);
5134 return APInt(BitWidth, Result);
5135 });
5136
5137 case clang::X86::BI__builtin_ia32_bzhi_si:
5138 case clang::X86::BI__builtin_ia32_bzhi_di:
5139 return interp__builtin_elementwise_int_binop(
5140 S, OpPC, Call, Fn: [](const APSInt &Val, const APSInt &Idx) {
5141 unsigned BitWidth = Val.getBitWidth();
5142 uint64_t Index = Idx.extractBitsAsZExtValue(numBits: 8, bitPosition: 0);
5143 APSInt Result = Val;
5144
5145 if (Index < BitWidth)
5146 Result.clearHighBits(hiBits: BitWidth - Index);
5147
5148 return Result;
5149 });
5150
5151 case clang::X86::BI__builtin_ia32_ktestcqi:
5152 case clang::X86::BI__builtin_ia32_ktestchi:
5153 case clang::X86::BI__builtin_ia32_ktestcsi:
5154 case clang::X86::BI__builtin_ia32_ktestcdi:
5155 return interp__builtin_elementwise_int_binop(
5156 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
5157 return APInt(sizeof(unsigned char) * 8, (~A & B) == 0);
5158 });
5159
5160 case clang::X86::BI__builtin_ia32_ktestzqi:
5161 case clang::X86::BI__builtin_ia32_ktestzhi:
5162 case clang::X86::BI__builtin_ia32_ktestzsi:
5163 case clang::X86::BI__builtin_ia32_ktestzdi:
5164 return interp__builtin_elementwise_int_binop(
5165 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
5166 return APInt(sizeof(unsigned char) * 8, (A & B) == 0);
5167 });
5168
5169 case clang::X86::BI__builtin_ia32_kortestcqi:
5170 case clang::X86::BI__builtin_ia32_kortestchi:
5171 case clang::X86::BI__builtin_ia32_kortestcsi:
5172 case clang::X86::BI__builtin_ia32_kortestcdi:
5173 return interp__builtin_elementwise_int_binop(
5174 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
5175 return APInt(sizeof(unsigned char) * 8, ~(A | B) == 0);
5176 });
5177
5178 case clang::X86::BI__builtin_ia32_kortestzqi:
5179 case clang::X86::BI__builtin_ia32_kortestzhi:
5180 case clang::X86::BI__builtin_ia32_kortestzsi:
5181 case clang::X86::BI__builtin_ia32_kortestzdi:
5182 return interp__builtin_elementwise_int_binop(
5183 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
5184 return APInt(sizeof(unsigned char) * 8, (A | B) == 0);
5185 });
5186
5187 case clang::X86::BI__builtin_ia32_kshiftliqi:
5188 case clang::X86::BI__builtin_ia32_kshiftlihi:
5189 case clang::X86::BI__builtin_ia32_kshiftlisi:
5190 case clang::X86::BI__builtin_ia32_kshiftlidi:
5191 return interp__builtin_elementwise_int_binop(
5192 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
5193 unsigned Amt = RHS.getZExtValue() & 0xFF;
5194 if (Amt >= LHS.getBitWidth())
5195 return APInt::getZero(numBits: LHS.getBitWidth());
5196 return LHS.shl(shiftAmt: Amt);
5197 });
5198
5199 case clang::X86::BI__builtin_ia32_kshiftriqi:
5200 case clang::X86::BI__builtin_ia32_kshiftrihi:
5201 case clang::X86::BI__builtin_ia32_kshiftrisi:
5202 case clang::X86::BI__builtin_ia32_kshiftridi:
5203 return interp__builtin_elementwise_int_binop(
5204 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
5205 unsigned Amt = RHS.getZExtValue() & 0xFF;
5206 if (Amt >= LHS.getBitWidth())
5207 return APInt::getZero(numBits: LHS.getBitWidth());
5208 return LHS.lshr(shiftAmt: Amt);
5209 });
5210
5211 case clang::X86::BI__builtin_ia32_lzcnt_u16:
5212 case clang::X86::BI__builtin_ia32_lzcnt_u32:
5213 case clang::X86::BI__builtin_ia32_lzcnt_u64:
5214 return interp__builtin_elementwise_int_unaryop(
5215 S, OpPC, Call, Fn: [](const APSInt &Src) {
5216 return APInt(Src.getBitWidth(), Src.countLeadingZeros());
5217 });
5218
5219 case clang::X86::BI__builtin_ia32_tzcnt_u16:
5220 case clang::X86::BI__builtin_ia32_tzcnt_u32:
5221 case clang::X86::BI__builtin_ia32_tzcnt_u64:
5222 return interp__builtin_elementwise_int_unaryop(
5223 S, OpPC, Call, Fn: [](const APSInt &Src) {
5224 return APInt(Src.getBitWidth(), Src.countTrailingZeros());
5225 });
5226
5227 case clang::X86::BI__builtin_ia32_pdep_si:
5228 case clang::X86::BI__builtin_ia32_pdep_di:
5229 case Builtin::BI__builtin_elementwise_pdep:
5230 return interp__builtin_elementwise_int_binop(S, OpPC, Call,
5231 Fn: llvm::APIntOps::pdep);
5232
5233 case clang::X86::BI__builtin_ia32_pext_si:
5234 case clang::X86::BI__builtin_ia32_pext_di:
5235 case Builtin::BI__builtin_elementwise_pext:
5236 return interp__builtin_elementwise_int_binop(S, OpPC, Call,
5237 Fn: llvm::APIntOps::pext);
5238
5239 case clang::X86::BI__builtin_ia32_addcarryx_u32:
5240 case clang::X86::BI__builtin_ia32_addcarryx_u64:
5241 return interp__builtin_ia32_addcarry_subborrow(S, OpPC, Frame, Call,
5242 /*IsAdd=*/true);
5243
5244 case clang::X86::BI__builtin_ia32_subborrow_u32:
5245 case clang::X86::BI__builtin_ia32_subborrow_u64:
5246 return interp__builtin_ia32_addcarry_subborrow(S, OpPC, Frame, Call,
5247 /*IsAdd=*/false);
5248
5249 case Builtin::BI__builtin_os_log_format_buffer_size:
5250 return interp__builtin_os_log_format_buffer_size(S, OpPC, Frame, Call);
5251
5252 case Builtin::BI__builtin_ptrauth_string_discriminator:
5253 return interp__builtin_ptrauth_string_discriminator(S, OpPC, Frame, Call);
5254
5255 case Builtin::BI__builtin_infer_alloc_token:
5256 return interp__builtin_infer_alloc_token(S, OpPC, Frame, Call);
5257
5258 case Builtin::BI__noop:
5259 pushInteger(S, Val: 0, QT: Call->getType());
5260 return true;
5261
5262 case Builtin::BI__builtin_operator_new:
5263 return interp__builtin_operator_new(S, OpPC, Frame, Call);
5264
5265 case Builtin::BI__builtin_operator_delete:
5266 return interp__builtin_operator_delete(S, OpPC, Frame, Call);
5267
5268 case Builtin::BI__arithmetic_fence:
5269 return interp__builtin_arithmetic_fence(S, OpPC, Frame, Call);
5270
5271 case Builtin::BI__builtin_reduce_add:
5272 case Builtin::BI__builtin_reduce_mul:
5273 case Builtin::BI__builtin_reduce_and:
5274 case Builtin::BI__builtin_reduce_or:
5275 case Builtin::BI__builtin_reduce_xor:
5276 case Builtin::BI__builtin_reduce_min:
5277 case Builtin::BI__builtin_reduce_max:
5278 return interp__builtin_vector_reduce(S, OpPC, Call, ID: BuiltinID);
5279
5280 case Builtin::BI__builtin_elementwise_popcount:
5281 return interp__builtin_elementwise_int_unaryop(
5282 S, OpPC, Call, Fn: [](const APSInt &Src) {
5283 return APInt(Src.getBitWidth(), Src.popcount());
5284 });
5285 case Builtin::BI__builtin_elementwise_bitreverse:
5286 return interp__builtin_elementwise_int_unaryop(
5287 S, OpPC, Call, Fn: [](const APSInt &Src) { return Src.reverseBits(); });
5288
5289 case Builtin::BI__builtin_elementwise_abs:
5290 return interp__builtin_elementwise_abs(S, OpPC, Frame, Call, BuiltinID);
5291
5292 case Builtin::BI__builtin_memcpy:
5293 case Builtin::BImemcpy:
5294 case Builtin::BI__builtin_wmemcpy:
5295 case Builtin::BIwmemcpy:
5296 case Builtin::BI__builtin_memmove:
5297 case Builtin::BImemmove:
5298 case Builtin::BI__builtin_wmemmove:
5299 case Builtin::BIwmemmove:
5300 return interp__builtin_memcpy(S, OpPC, Frame, Call, ID: BuiltinID);
5301
5302 case Builtin::BI__builtin_memcmp:
5303 case Builtin::BImemcmp:
5304 case Builtin::BI__builtin_bcmp:
5305 case Builtin::BIbcmp:
5306 case Builtin::BI__builtin_wmemcmp:
5307 case Builtin::BIwmemcmp:
5308 return interp__builtin_memcmp(S, OpPC, Frame, Call, ID: BuiltinID);
5309
5310 case Builtin::BImemchr:
5311 case Builtin::BI__builtin_memchr:
5312 case Builtin::BIstrchr:
5313 case Builtin::BI__builtin_strchr:
5314 case Builtin::BIwmemchr:
5315 case Builtin::BI__builtin_wmemchr:
5316 case Builtin::BIwcschr:
5317 case Builtin::BI__builtin_wcschr:
5318 case Builtin::BI__builtin_char_memchr:
5319 return interp__builtin_memchr(S, OpPC, Call, ID: BuiltinID);
5320
5321 case Builtin::BI__builtin_object_size:
5322 case Builtin::BI__builtin_dynamic_object_size:
5323 return interp__builtin_object_size(S, OpPC, Frame, Call);
5324
5325 case Builtin::BI__builtin_is_within_lifetime:
5326 return interp__builtin_is_within_lifetime(S, OpPC, Call);
5327
5328 case Builtin::BI__builtin_elementwise_add_sat:
5329 return interp__builtin_elementwise_int_binop(
5330 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
5331 return LHS.isSigned() ? LHS.sadd_sat(RHS) : LHS.uadd_sat(RHS);
5332 });
5333
5334 case Builtin::BI__builtin_elementwise_sub_sat:
5335 return interp__builtin_elementwise_int_binop(
5336 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
5337 return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
5338 });
5339 case X86::BI__builtin_ia32_extract128i256:
5340 case X86::BI__builtin_ia32_vextractf128_pd256:
5341 case X86::BI__builtin_ia32_vextractf128_ps256:
5342 case X86::BI__builtin_ia32_vextractf128_si256:
5343 return interp__builtin_ia32_extract_vector(S, OpPC, Call, ID: BuiltinID);
5344
5345 case X86::BI__builtin_ia32_extractf32x4_256_mask:
5346 case X86::BI__builtin_ia32_extractf32x4_mask:
5347 case X86::BI__builtin_ia32_extractf32x8_mask:
5348 case X86::BI__builtin_ia32_extractf64x2_256_mask:
5349 case X86::BI__builtin_ia32_extractf64x2_512_mask:
5350 case X86::BI__builtin_ia32_extractf64x4_mask:
5351 case X86::BI__builtin_ia32_extracti32x4_256_mask:
5352 case X86::BI__builtin_ia32_extracti32x4_mask:
5353 case X86::BI__builtin_ia32_extracti32x8_mask:
5354 case X86::BI__builtin_ia32_extracti64x2_256_mask:
5355 case X86::BI__builtin_ia32_extracti64x2_512_mask:
5356 case X86::BI__builtin_ia32_extracti64x4_mask:
5357 return interp__builtin_ia32_extract_vector_masked(S, OpPC, Call, ID: BuiltinID);
5358
5359 case clang::X86::BI__builtin_ia32_pmulhrsw128:
5360 case clang::X86::BI__builtin_ia32_pmulhrsw256:
5361 case clang::X86::BI__builtin_ia32_pmulhrsw512:
5362 return interp__builtin_elementwise_int_binop(
5363 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
5364 return (llvm::APIntOps::mulsExtended(C1: LHS, C2: RHS).ashr(ShiftAmt: 14) + 1)
5365 .extractBits(numBits: 16, bitPosition: 1);
5366 });
5367
5368 case clang::X86::BI__builtin_ia32_movmskps:
5369 case clang::X86::BI__builtin_ia32_movmskpd:
5370 case clang::X86::BI__builtin_ia32_pmovmskb128:
5371 case clang::X86::BI__builtin_ia32_pmovmskb256:
5372 case clang::X86::BI__builtin_ia32_movmskps256:
5373 case clang::X86::BI__builtin_ia32_movmskpd256: {
5374 return interp__builtin_ia32_movmsk_op(S, OpPC, Call);
5375 }
5376
5377 case X86::BI__builtin_ia32_psignb128:
5378 case X86::BI__builtin_ia32_psignb256:
5379 case X86::BI__builtin_ia32_psignw128:
5380 case X86::BI__builtin_ia32_psignw256:
5381 case X86::BI__builtin_ia32_psignd128:
5382 case X86::BI__builtin_ia32_psignd256:
5383 return interp__builtin_elementwise_int_binop(
5384 S, OpPC, Call, Fn: [](const APInt &AElem, const APInt &BElem) {
5385 if (BElem.isZero())
5386 return APInt::getZero(numBits: AElem.getBitWidth());
5387 if (BElem.isNegative())
5388 return -AElem;
5389 return AElem;
5390 });
5391
5392 case clang::X86::BI__builtin_ia32_pavgb128:
5393 case clang::X86::BI__builtin_ia32_pavgw128:
5394 case clang::X86::BI__builtin_ia32_pavgb256:
5395 case clang::X86::BI__builtin_ia32_pavgw256:
5396 case clang::X86::BI__builtin_ia32_pavgb512:
5397 case clang::X86::BI__builtin_ia32_pavgw512:
5398 return interp__builtin_elementwise_int_binop(S, OpPC, Call,
5399 Fn: llvm::APIntOps::avgCeilU);
5400
5401 case clang::X86::BI__builtin_ia32_pmaddubsw128:
5402 case clang::X86::BI__builtin_ia32_pmaddubsw256:
5403 case clang::X86::BI__builtin_ia32_pmaddubsw512:
5404 return interp__builtin_ia32_pmul(
5405 S, OpPC, Call,
5406 Fn: [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
5407 const APSInt &HiRHS) {
5408 unsigned BitWidth = 2 * LoLHS.getBitWidth();
5409 return (LoLHS.zext(width: BitWidth) * LoRHS.sext(width: BitWidth))
5410 .sadd_sat(RHS: (HiLHS.zext(width: BitWidth) * HiRHS.sext(width: BitWidth)));
5411 });
5412
5413 case clang::X86::BI__builtin_ia32_pmaddwd128:
5414 case clang::X86::BI__builtin_ia32_pmaddwd256:
5415 case clang::X86::BI__builtin_ia32_pmaddwd512:
5416 return interp__builtin_ia32_pmul(
5417 S, OpPC, Call,
5418 Fn: [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
5419 const APSInt &HiRHS) {
5420 unsigned BitWidth = 2 * LoLHS.getBitWidth();
5421 return (LoLHS.sext(width: BitWidth) * LoRHS.sext(width: BitWidth)) +
5422 (HiLHS.sext(width: BitWidth) * HiRHS.sext(width: BitWidth));
5423 });
5424
5425 case clang::X86::BI__builtin_ia32_dbpsadbw128:
5426 case clang::X86::BI__builtin_ia32_dbpsadbw256:
5427 case clang::X86::BI__builtin_ia32_dbpsadbw512:
5428 return interp__builtin_ia32_dbpsadbw(S, OpPC, Call);
5429
5430 case clang::X86::BI__builtin_ia32_mpsadbw128:
5431 case clang::X86::BI__builtin_ia32_mpsadbw256:
5432 return interp__builtin_ia32_mpsadbw(S, OpPC, Call);
5433
5434 case clang::X86::BI__builtin_ia32_pmulhuw128:
5435 case clang::X86::BI__builtin_ia32_pmulhuw256:
5436 case clang::X86::BI__builtin_ia32_pmulhuw512:
5437 return interp__builtin_elementwise_int_binop(S, OpPC, Call,
5438 Fn: llvm::APIntOps::mulhu);
5439
5440 case clang::X86::BI__builtin_ia32_pmulhw128:
5441 case clang::X86::BI__builtin_ia32_pmulhw256:
5442 case clang::X86::BI__builtin_ia32_pmulhw512:
5443 return interp__builtin_elementwise_int_binop(S, OpPC, Call,
5444 Fn: llvm::APIntOps::mulhs);
5445
5446 case clang::X86::BI__builtin_ia32_psllv2di:
5447 case clang::X86::BI__builtin_ia32_psllv4di:
5448 case clang::X86::BI__builtin_ia32_psllv4si:
5449 case clang::X86::BI__builtin_ia32_psllv8di:
5450 case clang::X86::BI__builtin_ia32_psllv8hi:
5451 case clang::X86::BI__builtin_ia32_psllv8si:
5452 case clang::X86::BI__builtin_ia32_psllv16hi:
5453 case clang::X86::BI__builtin_ia32_psllv16si:
5454 case clang::X86::BI__builtin_ia32_psllv32hi:
5455 case clang::X86::BI__builtin_ia32_psllwi128:
5456 case clang::X86::BI__builtin_ia32_psllwi256:
5457 case clang::X86::BI__builtin_ia32_psllwi512:
5458 case clang::X86::BI__builtin_ia32_pslldi128:
5459 case clang::X86::BI__builtin_ia32_pslldi256:
5460 case clang::X86::BI__builtin_ia32_pslldi512:
5461 case clang::X86::BI__builtin_ia32_psllqi128:
5462 case clang::X86::BI__builtin_ia32_psllqi256:
5463 case clang::X86::BI__builtin_ia32_psllqi512:
5464 return interp__builtin_elementwise_int_binop(
5465 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
5466 if (RHS.uge(RHS: LHS.getBitWidth())) {
5467 return APInt::getZero(numBits: LHS.getBitWidth());
5468 }
5469 return LHS.shl(shiftAmt: RHS.getZExtValue());
5470 });
5471
5472 case clang::X86::BI__builtin_ia32_psrav4si:
5473 case clang::X86::BI__builtin_ia32_psrav8di:
5474 case clang::X86::BI__builtin_ia32_psrav8hi:
5475 case clang::X86::BI__builtin_ia32_psrav8si:
5476 case clang::X86::BI__builtin_ia32_psrav16hi:
5477 case clang::X86::BI__builtin_ia32_psrav16si:
5478 case clang::X86::BI__builtin_ia32_psrav32hi:
5479 case clang::X86::BI__builtin_ia32_psravq128:
5480 case clang::X86::BI__builtin_ia32_psravq256:
5481 case clang::X86::BI__builtin_ia32_psrawi128:
5482 case clang::X86::BI__builtin_ia32_psrawi256:
5483 case clang::X86::BI__builtin_ia32_psrawi512:
5484 case clang::X86::BI__builtin_ia32_psradi128:
5485 case clang::X86::BI__builtin_ia32_psradi256:
5486 case clang::X86::BI__builtin_ia32_psradi512:
5487 case clang::X86::BI__builtin_ia32_psraqi128:
5488 case clang::X86::BI__builtin_ia32_psraqi256:
5489 case clang::X86::BI__builtin_ia32_psraqi512:
5490 return interp__builtin_elementwise_int_binop(
5491 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
5492 if (RHS.uge(RHS: LHS.getBitWidth())) {
5493 return LHS.ashr(ShiftAmt: LHS.getBitWidth() - 1);
5494 }
5495 return LHS.ashr(ShiftAmt: RHS.getZExtValue());
5496 });
5497
5498 case clang::X86::BI__builtin_ia32_psrlv2di:
5499 case clang::X86::BI__builtin_ia32_psrlv4di:
5500 case clang::X86::BI__builtin_ia32_psrlv4si:
5501 case clang::X86::BI__builtin_ia32_psrlv8di:
5502 case clang::X86::BI__builtin_ia32_psrlv8hi:
5503 case clang::X86::BI__builtin_ia32_psrlv8si:
5504 case clang::X86::BI__builtin_ia32_psrlv16hi:
5505 case clang::X86::BI__builtin_ia32_psrlv16si:
5506 case clang::X86::BI__builtin_ia32_psrlv32hi:
5507 case clang::X86::BI__builtin_ia32_psrlwi128:
5508 case clang::X86::BI__builtin_ia32_psrlwi256:
5509 case clang::X86::BI__builtin_ia32_psrlwi512:
5510 case clang::X86::BI__builtin_ia32_psrldi128:
5511 case clang::X86::BI__builtin_ia32_psrldi256:
5512 case clang::X86::BI__builtin_ia32_psrldi512:
5513 case clang::X86::BI__builtin_ia32_psrlqi128:
5514 case clang::X86::BI__builtin_ia32_psrlqi256:
5515 case clang::X86::BI__builtin_ia32_psrlqi512:
5516 return interp__builtin_elementwise_int_binop(
5517 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
5518 if (RHS.uge(RHS: LHS.getBitWidth())) {
5519 return APInt::getZero(numBits: LHS.getBitWidth());
5520 }
5521 return LHS.lshr(shiftAmt: RHS.getZExtValue());
5522 });
5523 case clang::X86::BI__builtin_ia32_packsswb128:
5524 case clang::X86::BI__builtin_ia32_packsswb256:
5525 case clang::X86::BI__builtin_ia32_packsswb512:
5526 case clang::X86::BI__builtin_ia32_packssdw128:
5527 case clang::X86::BI__builtin_ia32_packssdw256:
5528 case clang::X86::BI__builtin_ia32_packssdw512:
5529 return interp__builtin_ia32_pack(S, OpPC, E: Call, PackFn: [](const APSInt &Src) {
5530 return APInt(Src).truncSSat(width: Src.getBitWidth() / 2);
5531 });
5532 case clang::X86::BI__builtin_ia32_packusdw128:
5533 case clang::X86::BI__builtin_ia32_packusdw256:
5534 case clang::X86::BI__builtin_ia32_packusdw512:
5535 case clang::X86::BI__builtin_ia32_packuswb128:
5536 case clang::X86::BI__builtin_ia32_packuswb256:
5537 case clang::X86::BI__builtin_ia32_packuswb512:
5538 return interp__builtin_ia32_pack(S, OpPC, E: Call, PackFn: [](const APSInt &Src) {
5539 return APInt(Src).truncSSatU(width: Src.getBitWidth() / 2);
5540 });
5541
5542 case clang::X86::BI__builtin_ia32_selectss_128:
5543 case clang::X86::BI__builtin_ia32_selectsd_128:
5544 case clang::X86::BI__builtin_ia32_selectsh_128:
5545 case clang::X86::BI__builtin_ia32_selectsbf_128:
5546 return interp__builtin_ia32_select_scalar(S, Call);
5547 case clang::X86::BI__builtin_ia32_vprotbi:
5548 case clang::X86::BI__builtin_ia32_vprotdi:
5549 case clang::X86::BI__builtin_ia32_vprotqi:
5550 case clang::X86::BI__builtin_ia32_vprotwi:
5551 case clang::X86::BI__builtin_ia32_prold128:
5552 case clang::X86::BI__builtin_ia32_prold256:
5553 case clang::X86::BI__builtin_ia32_prold512:
5554 case clang::X86::BI__builtin_ia32_prolq128:
5555 case clang::X86::BI__builtin_ia32_prolq256:
5556 case clang::X86::BI__builtin_ia32_prolq512:
5557 return interp__builtin_elementwise_int_binop(
5558 S, OpPC, Call,
5559 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS.rotl(rotateAmt: RHS); });
5560
5561 case clang::X86::BI__builtin_ia32_prord128:
5562 case clang::X86::BI__builtin_ia32_prord256:
5563 case clang::X86::BI__builtin_ia32_prord512:
5564 case clang::X86::BI__builtin_ia32_prorq128:
5565 case clang::X86::BI__builtin_ia32_prorq256:
5566 case clang::X86::BI__builtin_ia32_prorq512:
5567 return interp__builtin_elementwise_int_binop(
5568 S, OpPC, Call,
5569 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS.rotr(rotateAmt: RHS); });
5570
5571 case Builtin::BI__builtin_elementwise_max:
5572 case Builtin::BI__builtin_elementwise_min:
5573 return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID);
5574
5575 case clang::X86::BI__builtin_ia32_phaddw128:
5576 case clang::X86::BI__builtin_ia32_phaddw256:
5577 case clang::X86::BI__builtin_ia32_phaddd128:
5578 case clang::X86::BI__builtin_ia32_phaddd256:
5579 return interp_builtin_horizontal_int_binop(
5580 S, OpPC, Call,
5581 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
5582 case clang::X86::BI__builtin_ia32_phaddsw128:
5583 case clang::X86::BI__builtin_ia32_phaddsw256:
5584 return interp_builtin_horizontal_int_binop(
5585 S, OpPC, Call,
5586 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS.sadd_sat(RHS); });
5587 case clang::X86::BI__builtin_ia32_phsubw128:
5588 case clang::X86::BI__builtin_ia32_phsubw256:
5589 case clang::X86::BI__builtin_ia32_phsubd128:
5590 case clang::X86::BI__builtin_ia32_phsubd256:
5591 return interp_builtin_horizontal_int_binop(
5592 S, OpPC, Call,
5593 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS - RHS; });
5594 case clang::X86::BI__builtin_ia32_phsubsw128:
5595 case clang::X86::BI__builtin_ia32_phsubsw256:
5596 return interp_builtin_horizontal_int_binop(
5597 S, OpPC, Call,
5598 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS.ssub_sat(RHS); });
5599 case clang::X86::BI__builtin_ia32_haddpd:
5600 case clang::X86::BI__builtin_ia32_haddps:
5601 case clang::X86::BI__builtin_ia32_haddpd256:
5602 case clang::X86::BI__builtin_ia32_haddps256:
5603 return interp_builtin_horizontal_fp_binop(
5604 S, OpPC, Call,
5605 Fn: [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) {
5606 APFloat F = LHS;
5607 F.add(RHS, RM);
5608 return F;
5609 });
5610 case clang::X86::BI__builtin_ia32_hsubpd:
5611 case clang::X86::BI__builtin_ia32_hsubps:
5612 case clang::X86::BI__builtin_ia32_hsubpd256:
5613 case clang::X86::BI__builtin_ia32_hsubps256:
5614 return interp_builtin_horizontal_fp_binop(
5615 S, OpPC, Call,
5616 Fn: [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) {
5617 APFloat F = LHS;
5618 F.subtract(RHS, RM);
5619 return F;
5620 });
5621 case clang::X86::BI__builtin_ia32_addsubpd:
5622 case clang::X86::BI__builtin_ia32_addsubps:
5623 case clang::X86::BI__builtin_ia32_addsubpd256:
5624 case clang::X86::BI__builtin_ia32_addsubps256:
5625 return interp__builtin_ia32_addsub(S, OpPC, Call);
5626
5627 case clang::X86::BI__builtin_ia32_pmuldq128:
5628 case clang::X86::BI__builtin_ia32_pmuldq256:
5629 case clang::X86::BI__builtin_ia32_pmuldq512:
5630 return interp__builtin_ia32_pmul(
5631 S, OpPC, Call,
5632 Fn: [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
5633 const APSInt &HiRHS) {
5634 return llvm::APIntOps::mulsExtended(C1: LoLHS, C2: LoRHS);
5635 });
5636
5637 case clang::X86::BI__builtin_ia32_pmuludq128:
5638 case clang::X86::BI__builtin_ia32_pmuludq256:
5639 case clang::X86::BI__builtin_ia32_pmuludq512:
5640 return interp__builtin_ia32_pmul(
5641 S, OpPC, Call,
5642 Fn: [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
5643 const APSInt &HiRHS) {
5644 return llvm::APIntOps::muluExtended(C1: LoLHS, C2: LoRHS);
5645 });
5646
5647 case clang::X86::BI__builtin_ia32_pclmulqdq128:
5648 case clang::X86::BI__builtin_ia32_pclmulqdq256:
5649 case clang::X86::BI__builtin_ia32_pclmulqdq512:
5650 return interp__builtin_ia32_pclmulqdq(S, OpPC, Call);
5651 case Builtin::BI__builtin_elementwise_clmul:
5652 return interp__builtin_elementwise_int_binop(S, OpPC, Call,
5653 Fn: llvm::APIntOps::clmul);
5654
5655 case Builtin::BI__builtin_elementwise_fma:
5656 return interp__builtin_elementwise_triop_fp(
5657 S, OpPC, Call,
5658 Fn: [](const APFloat &X, const APFloat &Y, const APFloat &Z,
5659 llvm::RoundingMode RM) {
5660 APFloat F = X;
5661 F.fusedMultiplyAdd(Multiplicand: Y, Addend: Z, RM);
5662 return F;
5663 });
5664
5665 case X86::BI__builtin_ia32_vpmadd52luq128:
5666 case X86::BI__builtin_ia32_vpmadd52luq256:
5667 case X86::BI__builtin_ia32_vpmadd52luq512:
5668 return interp__builtin_elementwise_triop(
5669 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B, const APSInt &C) {
5670 return A + (B.trunc(width: 52) * C.trunc(width: 52)).zext(width: 64);
5671 });
5672 case X86::BI__builtin_ia32_vpmadd52huq128:
5673 case X86::BI__builtin_ia32_vpmadd52huq256:
5674 case X86::BI__builtin_ia32_vpmadd52huq512:
5675 return interp__builtin_elementwise_triop(
5676 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B, const APSInt &C) {
5677 return A + llvm::APIntOps::mulhu(C1: B.trunc(width: 52), C2: C.trunc(width: 52)).zext(width: 64);
5678 });
5679
5680 case X86::BI__builtin_ia32_vpshldd128:
5681 case X86::BI__builtin_ia32_vpshldd256:
5682 case X86::BI__builtin_ia32_vpshldd512:
5683 case X86::BI__builtin_ia32_vpshldq128:
5684 case X86::BI__builtin_ia32_vpshldq256:
5685 case X86::BI__builtin_ia32_vpshldq512:
5686 case X86::BI__builtin_ia32_vpshldw128:
5687 case X86::BI__builtin_ia32_vpshldw256:
5688 case X86::BI__builtin_ia32_vpshldw512:
5689 return interp__builtin_elementwise_triop(
5690 S, OpPC, Call,
5691 Fn: [](const APSInt &Hi, const APSInt &Lo, const APSInt &Amt) {
5692 return llvm::APIntOps::fshl(Hi, Lo, Shift: Amt);
5693 });
5694
5695 case X86::BI__builtin_ia32_vpshrdd128:
5696 case X86::BI__builtin_ia32_vpshrdd256:
5697 case X86::BI__builtin_ia32_vpshrdd512:
5698 case X86::BI__builtin_ia32_vpshrdq128:
5699 case X86::BI__builtin_ia32_vpshrdq256:
5700 case X86::BI__builtin_ia32_vpshrdq512:
5701 case X86::BI__builtin_ia32_vpshrdw128:
5702 case X86::BI__builtin_ia32_vpshrdw256:
5703 case X86::BI__builtin_ia32_vpshrdw512:
5704 // NOTE: Reversed Hi/Lo operands.
5705 return interp__builtin_elementwise_triop(
5706 S, OpPC, Call,
5707 Fn: [](const APSInt &Lo, const APSInt &Hi, const APSInt &Amt) {
5708 return llvm::APIntOps::fshr(Hi, Lo, Shift: Amt);
5709 });
5710 case X86::BI__builtin_ia32_vpconflictsi_128:
5711 case X86::BI__builtin_ia32_vpconflictsi_256:
5712 case X86::BI__builtin_ia32_vpconflictsi_512:
5713 case X86::BI__builtin_ia32_vpconflictdi_128:
5714 case X86::BI__builtin_ia32_vpconflictdi_256:
5715 case X86::BI__builtin_ia32_vpconflictdi_512:
5716 return interp__builtin_ia32_vpconflict(S, OpPC, Call);
5717 case X86::BI__builtin_ia32_compressdf128_mask:
5718 case X86::BI__builtin_ia32_compressdf256_mask:
5719 case X86::BI__builtin_ia32_compressdf512_mask:
5720 case X86::BI__builtin_ia32_compressdi128_mask:
5721 case X86::BI__builtin_ia32_compressdi256_mask:
5722 case X86::BI__builtin_ia32_compressdi512_mask:
5723 case X86::BI__builtin_ia32_compresshi128_mask:
5724 case X86::BI__builtin_ia32_compresshi256_mask:
5725 case X86::BI__builtin_ia32_compresshi512_mask:
5726 case X86::BI__builtin_ia32_compressqi128_mask:
5727 case X86::BI__builtin_ia32_compressqi256_mask:
5728 case X86::BI__builtin_ia32_compressqi512_mask:
5729 case X86::BI__builtin_ia32_compresssf128_mask:
5730 case X86::BI__builtin_ia32_compresssf256_mask:
5731 case X86::BI__builtin_ia32_compresssf512_mask:
5732 case X86::BI__builtin_ia32_compresssi128_mask:
5733 case X86::BI__builtin_ia32_compresssi256_mask:
5734 case X86::BI__builtin_ia32_compresssi512_mask: {
5735 unsigned NumElems =
5736 Call->getArg(Arg: 0)->getType()->castAs<VectorType>()->getNumElements();
5737 return interp__builtin_ia32_shuffle_generic(
5738 S, OpPC, Call, GetSourceIndex: [NumElems](unsigned DstIdx, const APInt &ShuffleMask) {
5739 APInt CompressMask = ShuffleMask.trunc(width: NumElems);
5740 if (DstIdx < CompressMask.popcount()) {
5741 while (DstIdx != 0) {
5742 CompressMask = CompressMask & (CompressMask - 1);
5743 DstIdx--;
5744 }
5745 return std::pair<unsigned, int>{
5746 0, static_cast<int>(CompressMask.countr_zero())};
5747 }
5748 return std::pair<unsigned, int>{1, static_cast<int>(DstIdx)};
5749 });
5750 }
5751 case X86::BI__builtin_ia32_expanddf128_mask:
5752 case X86::BI__builtin_ia32_expanddf256_mask:
5753 case X86::BI__builtin_ia32_expanddf512_mask:
5754 case X86::BI__builtin_ia32_expanddi128_mask:
5755 case X86::BI__builtin_ia32_expanddi256_mask:
5756 case X86::BI__builtin_ia32_expanddi512_mask:
5757 case X86::BI__builtin_ia32_expandhi128_mask:
5758 case X86::BI__builtin_ia32_expandhi256_mask:
5759 case X86::BI__builtin_ia32_expandhi512_mask:
5760 case X86::BI__builtin_ia32_expandqi128_mask:
5761 case X86::BI__builtin_ia32_expandqi256_mask:
5762 case X86::BI__builtin_ia32_expandqi512_mask:
5763 case X86::BI__builtin_ia32_expandsf128_mask:
5764 case X86::BI__builtin_ia32_expandsf256_mask:
5765 case X86::BI__builtin_ia32_expandsf512_mask:
5766 case X86::BI__builtin_ia32_expandsi128_mask:
5767 case X86::BI__builtin_ia32_expandsi256_mask:
5768 case X86::BI__builtin_ia32_expandsi512_mask: {
5769 return interp__builtin_ia32_shuffle_generic(
5770 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, const APInt &ShuffleMask) {
5771 // Trunc to the sub-mask for the dst index and count the number of
5772 // src elements used prior to that.
5773 APInt ExpandMask = ShuffleMask.trunc(width: DstIdx + 1);
5774 if (ExpandMask[DstIdx]) {
5775 int SrcIdx = ExpandMask.popcount() - 1;
5776 return std::pair<unsigned, int>{0, SrcIdx};
5777 }
5778 return std::pair<unsigned, int>{1, static_cast<int>(DstIdx)};
5779 });
5780 }
5781 case clang::X86::BI__builtin_ia32_blendpd:
5782 case clang::X86::BI__builtin_ia32_blendpd256:
5783 case clang::X86::BI__builtin_ia32_blendps:
5784 case clang::X86::BI__builtin_ia32_blendps256:
5785 case clang::X86::BI__builtin_ia32_pblendw128:
5786 case clang::X86::BI__builtin_ia32_pblendw256:
5787 case clang::X86::BI__builtin_ia32_pblendd128:
5788 case clang::X86::BI__builtin_ia32_pblendd256:
5789 return interp__builtin_ia32_shuffle_generic(
5790 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5791 // Bit index for mask.
5792 unsigned MaskBit = (ShuffleMask >> (DstIdx % 8)) & 0x1;
5793 unsigned SrcVecIdx = MaskBit ? 1 : 0; // 1 = TrueVec, 0 = FalseVec
5794 return std::pair<unsigned, int>{SrcVecIdx, static_cast<int>(DstIdx)};
5795 });
5796
5797
5798
5799 case clang::X86::BI__builtin_ia32_blendvpd:
5800 case clang::X86::BI__builtin_ia32_blendvpd256:
5801 case clang::X86::BI__builtin_ia32_blendvps:
5802 case clang::X86::BI__builtin_ia32_blendvps256:
5803 return interp__builtin_elementwise_triop_fp(
5804 S, OpPC, Call,
5805 Fn: [](const APFloat &F, const APFloat &T, const APFloat &C,
5806 llvm::RoundingMode) { return C.isNegative() ? T : F; });
5807
5808 case clang::X86::BI__builtin_ia32_pblendvb128:
5809 case clang::X86::BI__builtin_ia32_pblendvb256:
5810 return interp__builtin_elementwise_triop(
5811 S, OpPC, Call, Fn: [](const APSInt &F, const APSInt &T, const APSInt &C) {
5812 return ((APInt)C).isNegative() ? T : F;
5813 });
5814 case X86::BI__builtin_ia32_ptestz128:
5815 case X86::BI__builtin_ia32_ptestz256:
5816 case X86::BI__builtin_ia32_vtestzps:
5817 case X86::BI__builtin_ia32_vtestzps256:
5818 case X86::BI__builtin_ia32_vtestzpd:
5819 case X86::BI__builtin_ia32_vtestzpd256:
5820 return interp__builtin_ia32_test_op(
5821 S, OpPC, Call,
5822 Fn: [](const APInt &A, const APInt &B) { return (A & B) == 0; });
5823 case X86::BI__builtin_ia32_ptestc128:
5824 case X86::BI__builtin_ia32_ptestc256:
5825 case X86::BI__builtin_ia32_vtestcps:
5826 case X86::BI__builtin_ia32_vtestcps256:
5827 case X86::BI__builtin_ia32_vtestcpd:
5828 case X86::BI__builtin_ia32_vtestcpd256:
5829 return interp__builtin_ia32_test_op(
5830 S, OpPC, Call,
5831 Fn: [](const APInt &A, const APInt &B) { return (~A & B) == 0; });
5832 case X86::BI__builtin_ia32_ptestnzc128:
5833 case X86::BI__builtin_ia32_ptestnzc256:
5834 case X86::BI__builtin_ia32_vtestnzcps:
5835 case X86::BI__builtin_ia32_vtestnzcps256:
5836 case X86::BI__builtin_ia32_vtestnzcpd:
5837 case X86::BI__builtin_ia32_vtestnzcpd256:
5838 return interp__builtin_ia32_test_op(
5839 S, OpPC, Call, Fn: [](const APInt &A, const APInt &B) {
5840 return ((A & B) != 0) && ((~A & B) != 0);
5841 });
5842 case X86::BI__builtin_ia32_selectb_128:
5843 case X86::BI__builtin_ia32_selectb_256:
5844 case X86::BI__builtin_ia32_selectb_512:
5845 case X86::BI__builtin_ia32_selectw_128:
5846 case X86::BI__builtin_ia32_selectw_256:
5847 case X86::BI__builtin_ia32_selectw_512:
5848 case X86::BI__builtin_ia32_selectd_128:
5849 case X86::BI__builtin_ia32_selectd_256:
5850 case X86::BI__builtin_ia32_selectd_512:
5851 case X86::BI__builtin_ia32_selectq_128:
5852 case X86::BI__builtin_ia32_selectq_256:
5853 case X86::BI__builtin_ia32_selectq_512:
5854 case X86::BI__builtin_ia32_selectph_128:
5855 case X86::BI__builtin_ia32_selectph_256:
5856 case X86::BI__builtin_ia32_selectph_512:
5857 case X86::BI__builtin_ia32_selectpbf_128:
5858 case X86::BI__builtin_ia32_selectpbf_256:
5859 case X86::BI__builtin_ia32_selectpbf_512:
5860 case X86::BI__builtin_ia32_selectps_128:
5861 case X86::BI__builtin_ia32_selectps_256:
5862 case X86::BI__builtin_ia32_selectps_512:
5863 case X86::BI__builtin_ia32_selectpd_128:
5864 case X86::BI__builtin_ia32_selectpd_256:
5865 case X86::BI__builtin_ia32_selectpd_512:
5866 return interp__builtin_ia32_select(S, OpPC, Call);
5867
5868 case X86::BI__builtin_ia32_shufps:
5869 case X86::BI__builtin_ia32_shufps256:
5870 case X86::BI__builtin_ia32_shufps512:
5871 return interp__builtin_ia32_shuffle_generic(
5872 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5873 unsigned NumElemPerLane = 4;
5874 unsigned NumSelectableElems = NumElemPerLane / 2;
5875 unsigned BitsPerElem = 2;
5876 unsigned IndexMask = 0x3;
5877 unsigned MaskBits = 8;
5878 unsigned Lane = DstIdx / NumElemPerLane;
5879 unsigned ElemInLane = DstIdx % NumElemPerLane;
5880 unsigned LaneOffset = Lane * NumElemPerLane;
5881 unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0;
5882 unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
5883 unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
5884 return std::pair<unsigned, int>{SrcIdx,
5885 static_cast<int>(LaneOffset + Index)};
5886 });
5887 case X86::BI__builtin_ia32_shufpd:
5888 case X86::BI__builtin_ia32_shufpd256:
5889 case X86::BI__builtin_ia32_shufpd512:
5890 return interp__builtin_ia32_shuffle_generic(
5891 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5892 unsigned NumElemPerLane = 2;
5893 unsigned NumSelectableElems = NumElemPerLane / 2;
5894 unsigned BitsPerElem = 1;
5895 unsigned IndexMask = 0x1;
5896 unsigned MaskBits = 8;
5897 unsigned Lane = DstIdx / NumElemPerLane;
5898 unsigned ElemInLane = DstIdx % NumElemPerLane;
5899 unsigned LaneOffset = Lane * NumElemPerLane;
5900 unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0;
5901 unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
5902 unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
5903 return std::pair<unsigned, int>{SrcIdx,
5904 static_cast<int>(LaneOffset + Index)};
5905 });
5906
5907 case X86::BI__builtin_ia32_vgf2p8affineinvqb_v16qi:
5908 case X86::BI__builtin_ia32_vgf2p8affineinvqb_v32qi:
5909 case X86::BI__builtin_ia32_vgf2p8affineinvqb_v64qi:
5910 return interp__builtin_ia32_gfni_affine(S, OpPC, Call, Inverse: true);
5911 case X86::BI__builtin_ia32_vgf2p8affineqb_v16qi:
5912 case X86::BI__builtin_ia32_vgf2p8affineqb_v32qi:
5913 case X86::BI__builtin_ia32_vgf2p8affineqb_v64qi:
5914 return interp__builtin_ia32_gfni_affine(S, OpPC, Call, Inverse: false);
5915
5916 case X86::BI__builtin_ia32_vgf2p8mulb_v16qi:
5917 case X86::BI__builtin_ia32_vgf2p8mulb_v32qi:
5918 case X86::BI__builtin_ia32_vgf2p8mulb_v64qi:
5919 return interp__builtin_ia32_gfni_mul(S, OpPC, Call);
5920
5921 case X86::BI__builtin_ia32_insertps128:
5922 return interp__builtin_ia32_shuffle_generic(
5923 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned Mask) {
5924 // Bits [3:0]: zero mask - if bit is set, zero this element
5925 if ((Mask & (1 << DstIdx)) != 0) {
5926 return std::pair<unsigned, int>{0, -1};
5927 }
5928 // Bits [7:6]: select element from source vector Y (0-3)
5929 // Bits [5:4]: select destination position (0-3)
5930 unsigned SrcElem = (Mask >> 6) & 0x3;
5931 unsigned DstElem = (Mask >> 4) & 0x3;
5932 if (DstIdx == DstElem) {
5933 // Insert element from source vector (B) at this position
5934 return std::pair<unsigned, int>{1, static_cast<int>(SrcElem)};
5935 } else {
5936 // Copy from destination vector (A)
5937 return std::pair<unsigned, int>{0, static_cast<int>(DstIdx)};
5938 }
5939 });
5940 case X86::BI__builtin_ia32_permvarsi256:
5941 case X86::BI__builtin_ia32_permvarsf256:
5942 case X86::BI__builtin_ia32_permvardf512:
5943 case X86::BI__builtin_ia32_permvardi512:
5944 case X86::BI__builtin_ia32_permvarhi128:
5945 return interp__builtin_ia32_shuffle_generic(
5946 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5947 int Offset = ShuffleMask & 0x7;
5948 return std::pair<unsigned, int>{0, Offset};
5949 });
5950 case X86::BI__builtin_ia32_permvarqi128:
5951 case X86::BI__builtin_ia32_permvarhi256:
5952 case X86::BI__builtin_ia32_permvarsi512:
5953 case X86::BI__builtin_ia32_permvarsf512:
5954 return interp__builtin_ia32_shuffle_generic(
5955 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5956 int Offset = ShuffleMask & 0xF;
5957 return std::pair<unsigned, int>{0, Offset};
5958 });
5959 case X86::BI__builtin_ia32_permvardi256:
5960 case X86::BI__builtin_ia32_permvardf256:
5961 return interp__builtin_ia32_shuffle_generic(
5962 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5963 int Offset = ShuffleMask & 0x3;
5964 return std::pair<unsigned, int>{0, Offset};
5965 });
5966 case X86::BI__builtin_ia32_permvarqi256:
5967 case X86::BI__builtin_ia32_permvarhi512:
5968 return interp__builtin_ia32_shuffle_generic(
5969 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5970 int Offset = ShuffleMask & 0x1F;
5971 return std::pair<unsigned, int>{0, Offset};
5972 });
5973 case X86::BI__builtin_ia32_permvarqi512:
5974 return interp__builtin_ia32_shuffle_generic(
5975 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5976 int Offset = ShuffleMask & 0x3F;
5977 return std::pair<unsigned, int>{0, Offset};
5978 });
5979 case X86::BI__builtin_ia32_vpermi2varq128:
5980 case X86::BI__builtin_ia32_vpermi2varpd128:
5981 return interp__builtin_ia32_shuffle_generic(
5982 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5983 int Offset = ShuffleMask & 0x1;
5984 unsigned SrcIdx = (ShuffleMask >> 1) & 0x1;
5985 return std::pair<unsigned, int>{SrcIdx, Offset};
5986 });
5987 case X86::BI__builtin_ia32_vpermi2vard128:
5988 case X86::BI__builtin_ia32_vpermi2varps128:
5989 case X86::BI__builtin_ia32_vpermi2varq256:
5990 case X86::BI__builtin_ia32_vpermi2varpd256:
5991 return interp__builtin_ia32_shuffle_generic(
5992 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5993 int Offset = ShuffleMask & 0x3;
5994 unsigned SrcIdx = (ShuffleMask >> 2) & 0x1;
5995 return std::pair<unsigned, int>{SrcIdx, Offset};
5996 });
5997 case X86::BI__builtin_ia32_vpermi2varhi128:
5998 case X86::BI__builtin_ia32_vpermi2vard256:
5999 case X86::BI__builtin_ia32_vpermi2varps256:
6000 case X86::BI__builtin_ia32_vpermi2varq512:
6001 case X86::BI__builtin_ia32_vpermi2varpd512:
6002 return interp__builtin_ia32_shuffle_generic(
6003 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
6004 int Offset = ShuffleMask & 0x7;
6005 unsigned SrcIdx = (ShuffleMask >> 3) & 0x1;
6006 return std::pair<unsigned, int>{SrcIdx, Offset};
6007 });
6008 case X86::BI__builtin_ia32_vpermi2varqi128:
6009 case X86::BI__builtin_ia32_vpermi2varhi256:
6010 case X86::BI__builtin_ia32_vpermi2vard512:
6011 case X86::BI__builtin_ia32_vpermi2varps512:
6012 return interp__builtin_ia32_shuffle_generic(
6013 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
6014 int Offset = ShuffleMask & 0xF;
6015 unsigned SrcIdx = (ShuffleMask >> 4) & 0x1;
6016 return std::pair<unsigned, int>{SrcIdx, Offset};
6017 });
6018 case X86::BI__builtin_ia32_vpermi2varqi256:
6019 case X86::BI__builtin_ia32_vpermi2varhi512:
6020 return interp__builtin_ia32_shuffle_generic(
6021 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
6022 int Offset = ShuffleMask & 0x1F;
6023 unsigned SrcIdx = (ShuffleMask >> 5) & 0x1;
6024 return std::pair<unsigned, int>{SrcIdx, Offset};
6025 });
6026 case X86::BI__builtin_ia32_vpermi2varqi512:
6027 return interp__builtin_ia32_shuffle_generic(
6028 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
6029 int Offset = ShuffleMask & 0x3F;
6030 unsigned SrcIdx = (ShuffleMask >> 6) & 0x1;
6031 return std::pair<unsigned, int>{SrcIdx, Offset};
6032 });
6033 case X86::BI__builtin_ia32_vperm2f128_pd256:
6034 case X86::BI__builtin_ia32_vperm2f128_ps256:
6035 case X86::BI__builtin_ia32_vperm2f128_si256:
6036 case X86::BI__builtin_ia32_permti256: {
6037 unsigned NumElements =
6038 Call->getArg(Arg: 0)->getType()->castAs<VectorType>()->getNumElements();
6039 unsigned PreservedBitsCnt = NumElements >> 2;
6040 return interp__builtin_ia32_shuffle_generic(
6041 S, OpPC, Call,
6042 GetSourceIndex: [PreservedBitsCnt](unsigned DstIdx, unsigned ShuffleMask) {
6043 unsigned ControlBitsCnt = DstIdx >> PreservedBitsCnt << 2;
6044 unsigned ControlBits = ShuffleMask >> ControlBitsCnt;
6045
6046 if (ControlBits & 0b1000)
6047 return std::make_pair(x: 0u, y: -1);
6048
6049 unsigned SrcVecIdx = (ControlBits & 0b10) >> 1;
6050 unsigned PreservedBitsMask = (1 << PreservedBitsCnt) - 1;
6051 int SrcIdx = ((ControlBits & 0b1) << PreservedBitsCnt) |
6052 (DstIdx & PreservedBitsMask);
6053 return std::make_pair(x&: SrcVecIdx, y&: SrcIdx);
6054 });
6055 }
6056 case X86::BI__builtin_ia32_pshufb128:
6057 case X86::BI__builtin_ia32_pshufb256:
6058 case X86::BI__builtin_ia32_pshufb512:
6059 return interp__builtin_ia32_shuffle_generic(
6060 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
6061 uint8_t Ctlb = static_cast<uint8_t>(ShuffleMask);
6062 if (Ctlb & 0x80)
6063 return std::make_pair(x: 0, y: -1);
6064
6065 unsigned LaneBase = (DstIdx / 16) * 16;
6066 unsigned SrcOffset = Ctlb & 0x0F;
6067 unsigned SrcIdx = LaneBase + SrcOffset;
6068 return std::make_pair(x: 0, y: static_cast<int>(SrcIdx));
6069 });
6070
6071 case X86::BI__builtin_ia32_pshuflw:
6072 case X86::BI__builtin_ia32_pshuflw256:
6073 case X86::BI__builtin_ia32_pshuflw512:
6074 return interp__builtin_ia32_shuffle_generic(
6075 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
6076 unsigned LaneBase = (DstIdx / 8) * 8;
6077 unsigned LaneIdx = DstIdx % 8;
6078 if (LaneIdx < 4) {
6079 unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3;
6080 return std::make_pair(x: 0, y: static_cast<int>(LaneBase + Sel));
6081 }
6082
6083 return std::make_pair(x: 0, y: static_cast<int>(DstIdx));
6084 });
6085
6086 case X86::BI__builtin_ia32_pshufhw:
6087 case X86::BI__builtin_ia32_pshufhw256:
6088 case X86::BI__builtin_ia32_pshufhw512:
6089 return interp__builtin_ia32_shuffle_generic(
6090 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
6091 unsigned LaneBase = (DstIdx / 8) * 8;
6092 unsigned LaneIdx = DstIdx % 8;
6093 if (LaneIdx >= 4) {
6094 unsigned Sel = (ShuffleMask >> (2 * (LaneIdx - 4))) & 0x3;
6095 return std::make_pair(x: 0, y: static_cast<int>(LaneBase + 4 + Sel));
6096 }
6097
6098 return std::make_pair(x: 0, y: static_cast<int>(DstIdx));
6099 });
6100
6101 case X86::BI__builtin_ia32_pshufd:
6102 case X86::BI__builtin_ia32_pshufd256:
6103 case X86::BI__builtin_ia32_pshufd512:
6104 case X86::BI__builtin_ia32_vpermilps:
6105 case X86::BI__builtin_ia32_vpermilps256:
6106 case X86::BI__builtin_ia32_vpermilps512:
6107 return interp__builtin_ia32_shuffle_generic(
6108 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
6109 unsigned LaneBase = (DstIdx / 4) * 4;
6110 unsigned LaneIdx = DstIdx % 4;
6111 unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3;
6112 return std::make_pair(x: 0, y: static_cast<int>(LaneBase + Sel));
6113 });
6114
6115 case X86::BI__builtin_ia32_vpermilvarpd:
6116 case X86::BI__builtin_ia32_vpermilvarpd256:
6117 case X86::BI__builtin_ia32_vpermilvarpd512:
6118 return interp__builtin_ia32_shuffle_generic(
6119 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
6120 unsigned NumElemPerLane = 2;
6121 unsigned Lane = DstIdx / NumElemPerLane;
6122 unsigned Offset = ShuffleMask & 0b10 ? 1 : 0;
6123 return std::make_pair(
6124 x: 0, y: static_cast<int>(Lane * NumElemPerLane + Offset));
6125 });
6126
6127 case X86::BI__builtin_ia32_vpermilvarps:
6128 case X86::BI__builtin_ia32_vpermilvarps256:
6129 case X86::BI__builtin_ia32_vpermilvarps512:
6130 return interp__builtin_ia32_shuffle_generic(
6131 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
6132 unsigned NumElemPerLane = 4;
6133 unsigned Lane = DstIdx / NumElemPerLane;
6134 unsigned Offset = ShuffleMask & 0b11;
6135 return std::make_pair(
6136 x: 0, y: static_cast<int>(Lane * NumElemPerLane + Offset));
6137 });
6138
6139 case X86::BI__builtin_ia32_vpermilpd:
6140 case X86::BI__builtin_ia32_vpermilpd256:
6141 case X86::BI__builtin_ia32_vpermilpd512:
6142 return interp__builtin_ia32_shuffle_generic(
6143 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned Control) {
6144 unsigned NumElemPerLane = 2;
6145 unsigned BitsPerElem = 1;
6146 unsigned MaskBits = 8;
6147 unsigned IndexMask = 0x1;
6148 unsigned Lane = DstIdx / NumElemPerLane;
6149 unsigned LaneOffset = Lane * NumElemPerLane;
6150 unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
6151 unsigned Index = (Control >> BitIndex) & IndexMask;
6152 return std::make_pair(x: 0, y: static_cast<int>(LaneOffset + Index));
6153 });
6154
6155 case X86::BI__builtin_ia32_permdf256:
6156 case X86::BI__builtin_ia32_permdi256:
6157 return interp__builtin_ia32_shuffle_generic(
6158 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned Control) {
6159 // permute4x64 operates on 4 64-bit elements
6160 // For element i (0-3), extract bits [2*i+1:2*i] from Control
6161 unsigned Index = (Control >> (2 * DstIdx)) & 0x3;
6162 return std::make_pair(x: 0, y: static_cast<int>(Index));
6163 });
6164
6165 case X86::BI__builtin_ia32_vpmultishiftqb128:
6166 case X86::BI__builtin_ia32_vpmultishiftqb256:
6167 case X86::BI__builtin_ia32_vpmultishiftqb512:
6168 return interp__builtin_ia32_multishiftqb(S, OpPC, Call);
6169 case X86::BI__builtin_ia32_kandqi:
6170 case X86::BI__builtin_ia32_kandhi:
6171 case X86::BI__builtin_ia32_kandsi:
6172 case X86::BI__builtin_ia32_kanddi:
6173 return interp__builtin_elementwise_int_binop(
6174 S, OpPC, Call,
6175 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS & RHS; });
6176
6177 case X86::BI__builtin_ia32_kandnqi:
6178 case X86::BI__builtin_ia32_kandnhi:
6179 case X86::BI__builtin_ia32_kandnsi:
6180 case X86::BI__builtin_ia32_kandndi:
6181 return interp__builtin_elementwise_int_binop(
6182 S, OpPC, Call,
6183 Fn: [](const APSInt &LHS, const APSInt &RHS) { return ~LHS & RHS; });
6184
6185 case X86::BI__builtin_ia32_korqi:
6186 case X86::BI__builtin_ia32_korhi:
6187 case X86::BI__builtin_ia32_korsi:
6188 case X86::BI__builtin_ia32_kordi:
6189 return interp__builtin_elementwise_int_binop(
6190 S, OpPC, Call,
6191 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS | RHS; });
6192
6193 case X86::BI__builtin_ia32_kxnorqi:
6194 case X86::BI__builtin_ia32_kxnorhi:
6195 case X86::BI__builtin_ia32_kxnorsi:
6196 case X86::BI__builtin_ia32_kxnordi:
6197 return interp__builtin_elementwise_int_binop(
6198 S, OpPC, Call,
6199 Fn: [](const APSInt &LHS, const APSInt &RHS) { return ~(LHS ^ RHS); });
6200
6201 case X86::BI__builtin_ia32_kxorqi:
6202 case X86::BI__builtin_ia32_kxorhi:
6203 case X86::BI__builtin_ia32_kxorsi:
6204 case X86::BI__builtin_ia32_kxordi:
6205 return interp__builtin_elementwise_int_binop(
6206 S, OpPC, Call,
6207 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS ^ RHS; });
6208
6209 case X86::BI__builtin_ia32_knotqi:
6210 case X86::BI__builtin_ia32_knothi:
6211 case X86::BI__builtin_ia32_knotsi:
6212 case X86::BI__builtin_ia32_knotdi:
6213 return interp__builtin_elementwise_int_unaryop(
6214 S, OpPC, Call, Fn: [](const APSInt &Src) { return ~Src; });
6215
6216 case X86::BI__builtin_ia32_kaddqi:
6217 case X86::BI__builtin_ia32_kaddhi:
6218 case X86::BI__builtin_ia32_kaddsi:
6219 case X86::BI__builtin_ia32_kadddi:
6220 return interp__builtin_elementwise_int_binop(
6221 S, OpPC, Call,
6222 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
6223
6224 case X86::BI__builtin_ia32_kmovb:
6225 case X86::BI__builtin_ia32_kmovw:
6226 case X86::BI__builtin_ia32_kmovd:
6227 case X86::BI__builtin_ia32_kmovq:
6228 return interp__builtin_elementwise_int_unaryop(
6229 S, OpPC, Call, Fn: [](const APSInt &Src) { return Src; });
6230
6231 case X86::BI__builtin_ia32_kunpckhi:
6232 case X86::BI__builtin_ia32_kunpckdi:
6233 case X86::BI__builtin_ia32_kunpcksi:
6234 return interp__builtin_elementwise_int_binop(
6235 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
6236 // Generic kunpack: extract lower half of each operand and concatenate
6237 // Result = A[HalfWidth-1:0] concat B[HalfWidth-1:0]
6238 unsigned BW = A.getBitWidth();
6239 return APSInt(A.trunc(width: BW / 2).concat(NewLSB: B.trunc(width: BW / 2)),
6240 A.isUnsigned());
6241 });
6242
6243 case X86::BI__builtin_ia32_phminposuw128:
6244 return interp__builtin_ia32_phminposuw(S, OpPC, Call);
6245
6246 case X86::BI__builtin_ia32_psraq128:
6247 case X86::BI__builtin_ia32_psraq256:
6248 case X86::BI__builtin_ia32_psraq512:
6249 case X86::BI__builtin_ia32_psrad128:
6250 case X86::BI__builtin_ia32_psrad256:
6251 case X86::BI__builtin_ia32_psrad512:
6252 case X86::BI__builtin_ia32_psraw128:
6253 case X86::BI__builtin_ia32_psraw256:
6254 case X86::BI__builtin_ia32_psraw512:
6255 return interp__builtin_ia32_shift_with_count(
6256 S, OpPC, Call,
6257 ShiftOp: [](const APInt &Elt, uint64_t Count) { return Elt.ashr(ShiftAmt: Count); },
6258 OverflowOp: [](const APInt &Elt, unsigned Width) { return Elt.ashr(ShiftAmt: Width - 1); });
6259
6260 case X86::BI__builtin_ia32_psllq128:
6261 case X86::BI__builtin_ia32_psllq256:
6262 case X86::BI__builtin_ia32_psllq512:
6263 case X86::BI__builtin_ia32_pslld128:
6264 case X86::BI__builtin_ia32_pslld256:
6265 case X86::BI__builtin_ia32_pslld512:
6266 case X86::BI__builtin_ia32_psllw128:
6267 case X86::BI__builtin_ia32_psllw256:
6268 case X86::BI__builtin_ia32_psllw512:
6269 return interp__builtin_ia32_shift_with_count(
6270 S, OpPC, Call,
6271 ShiftOp: [](const APInt &Elt, uint64_t Count) { return Elt.shl(shiftAmt: Count); },
6272 OverflowOp: [](const APInt &Elt, unsigned Width) { return APInt::getZero(numBits: Width); });
6273
6274 case X86::BI__builtin_ia32_psrlq128:
6275 case X86::BI__builtin_ia32_psrlq256:
6276 case X86::BI__builtin_ia32_psrlq512:
6277 case X86::BI__builtin_ia32_psrld128:
6278 case X86::BI__builtin_ia32_psrld256:
6279 case X86::BI__builtin_ia32_psrld512:
6280 case X86::BI__builtin_ia32_psrlw128:
6281 case X86::BI__builtin_ia32_psrlw256:
6282 case X86::BI__builtin_ia32_psrlw512:
6283 return interp__builtin_ia32_shift_with_count(
6284 S, OpPC, Call,
6285 ShiftOp: [](const APInt &Elt, uint64_t Count) { return Elt.lshr(shiftAmt: Count); },
6286 OverflowOp: [](const APInt &Elt, unsigned Width) { return APInt::getZero(numBits: Width); });
6287
6288 case X86::BI__builtin_ia32_pternlogd128_mask:
6289 case X86::BI__builtin_ia32_pternlogd256_mask:
6290 case X86::BI__builtin_ia32_pternlogd512_mask:
6291 case X86::BI__builtin_ia32_pternlogq128_mask:
6292 case X86::BI__builtin_ia32_pternlogq256_mask:
6293 case X86::BI__builtin_ia32_pternlogq512_mask:
6294 return interp__builtin_ia32_pternlog(S, OpPC, Call, /*MaskZ=*/false);
6295 case X86::BI__builtin_ia32_pternlogd128_maskz:
6296 case X86::BI__builtin_ia32_pternlogd256_maskz:
6297 case X86::BI__builtin_ia32_pternlogd512_maskz:
6298 case X86::BI__builtin_ia32_pternlogq128_maskz:
6299 case X86::BI__builtin_ia32_pternlogq256_maskz:
6300 case X86::BI__builtin_ia32_pternlogq512_maskz:
6301 return interp__builtin_ia32_pternlog(S, OpPC, Call, /*MaskZ=*/true);
6302 case Builtin::BI__builtin_elementwise_fshl:
6303 return interp__builtin_elementwise_triop(S, OpPC, Call,
6304 Fn: llvm::APIntOps::fshl);
6305 case Builtin::BI__builtin_elementwise_fshr:
6306 return interp__builtin_elementwise_triop(S, OpPC, Call,
6307 Fn: llvm::APIntOps::fshr);
6308
6309 case X86::BI__builtin_ia32_shuf_f32x4_256:
6310 case X86::BI__builtin_ia32_shuf_i32x4_256:
6311 case X86::BI__builtin_ia32_shuf_f64x2_256:
6312 case X86::BI__builtin_ia32_shuf_i64x2_256:
6313 case X86::BI__builtin_ia32_shuf_f32x4:
6314 case X86::BI__builtin_ia32_shuf_i32x4:
6315 case X86::BI__builtin_ia32_shuf_f64x2:
6316 case X86::BI__builtin_ia32_shuf_i64x2: {
6317 // Destination and sources A, B all have the same type.
6318 QualType VecQT = Call->getArg(Arg: 0)->getType();
6319 const auto *VecT = VecQT->castAs<VectorType>();
6320 unsigned NumElems = VecT->getNumElements();
6321 unsigned ElemBits = S.getASTContext().getTypeSize(T: VecT->getElementType());
6322 unsigned LaneBits = 128u;
6323 unsigned NumLanes = (NumElems * ElemBits) / LaneBits;
6324 unsigned NumElemsPerLane = LaneBits / ElemBits;
6325
6326 return interp__builtin_ia32_shuffle_generic(
6327 S, OpPC, Call,
6328 GetSourceIndex: [NumLanes, NumElemsPerLane](unsigned DstIdx, unsigned ShuffleMask) {
6329 // DstIdx determines source. ShuffleMask selects lane in source.
6330 unsigned BitsPerElem = NumLanes / 2;
6331 unsigned IndexMask = (1u << BitsPerElem) - 1;
6332 unsigned Lane = DstIdx / NumElemsPerLane;
6333 unsigned SrcIdx = (Lane < NumLanes / 2) ? 0 : 1;
6334 unsigned BitIdx = BitsPerElem * Lane;
6335 unsigned SrcLaneIdx = (ShuffleMask >> BitIdx) & IndexMask;
6336 unsigned ElemInLane = DstIdx % NumElemsPerLane;
6337 unsigned IdxToPick = SrcLaneIdx * NumElemsPerLane + ElemInLane;
6338 return std::pair<unsigned, int>{SrcIdx, IdxToPick};
6339 });
6340 }
6341
6342 case X86::BI__builtin_ia32_insertf32x4_256:
6343 case X86::BI__builtin_ia32_inserti32x4_256:
6344 case X86::BI__builtin_ia32_insertf64x2_256:
6345 case X86::BI__builtin_ia32_inserti64x2_256:
6346 case X86::BI__builtin_ia32_insertf32x4:
6347 case X86::BI__builtin_ia32_inserti32x4:
6348 case X86::BI__builtin_ia32_insertf64x2_512:
6349 case X86::BI__builtin_ia32_inserti64x2_512:
6350 case X86::BI__builtin_ia32_insertf32x8:
6351 case X86::BI__builtin_ia32_inserti32x8:
6352 case X86::BI__builtin_ia32_insertf64x4:
6353 case X86::BI__builtin_ia32_inserti64x4:
6354 case X86::BI__builtin_ia32_vinsertf128_ps256:
6355 case X86::BI__builtin_ia32_vinsertf128_pd256:
6356 case X86::BI__builtin_ia32_vinsertf128_si256:
6357 case X86::BI__builtin_ia32_insert128i256:
6358 return interp__builtin_ia32_insert_subvector(S, OpPC, Call, ID: BuiltinID);
6359
6360 case clang::X86::BI__builtin_ia32_vcvtps2ph:
6361 case clang::X86::BI__builtin_ia32_vcvtps2ph256:
6362 return interp__builtin_ia32_vcvtps2ph(S, OpPC, Call);
6363
6364 case X86::BI__builtin_ia32_vec_ext_v4hi:
6365 case X86::BI__builtin_ia32_vec_ext_v16qi:
6366 case X86::BI__builtin_ia32_vec_ext_v8hi:
6367 case X86::BI__builtin_ia32_vec_ext_v4si:
6368 case X86::BI__builtin_ia32_vec_ext_v2di:
6369 case X86::BI__builtin_ia32_vec_ext_v32qi:
6370 case X86::BI__builtin_ia32_vec_ext_v16hi:
6371 case X86::BI__builtin_ia32_vec_ext_v8si:
6372 case X86::BI__builtin_ia32_vec_ext_v4di:
6373 case X86::BI__builtin_ia32_vec_ext_v4sf:
6374 return interp__builtin_ia32_vec_ext(S, OpPC, Call, ID: BuiltinID);
6375
6376 case X86::BI__builtin_ia32_vec_set_v4hi:
6377 case X86::BI__builtin_ia32_vec_set_v16qi:
6378 case X86::BI__builtin_ia32_vec_set_v8hi:
6379 case X86::BI__builtin_ia32_vec_set_v4si:
6380 case X86::BI__builtin_ia32_vec_set_v2di:
6381 case X86::BI__builtin_ia32_vec_set_v32qi:
6382 case X86::BI__builtin_ia32_vec_set_v16hi:
6383 case X86::BI__builtin_ia32_vec_set_v8si:
6384 case X86::BI__builtin_ia32_vec_set_v4di:
6385 return interp__builtin_ia32_vec_set(S, OpPC, Call, ID: BuiltinID);
6386
6387 case X86::BI__builtin_ia32_cvtb2mask128:
6388 case X86::BI__builtin_ia32_cvtb2mask256:
6389 case X86::BI__builtin_ia32_cvtb2mask512:
6390 case X86::BI__builtin_ia32_cvtw2mask128:
6391 case X86::BI__builtin_ia32_cvtw2mask256:
6392 case X86::BI__builtin_ia32_cvtw2mask512:
6393 case X86::BI__builtin_ia32_cvtd2mask128:
6394 case X86::BI__builtin_ia32_cvtd2mask256:
6395 case X86::BI__builtin_ia32_cvtd2mask512:
6396 case X86::BI__builtin_ia32_cvtq2mask128:
6397 case X86::BI__builtin_ia32_cvtq2mask256:
6398 case X86::BI__builtin_ia32_cvtq2mask512:
6399 return interp__builtin_ia32_cvt_vec2mask(S, OpPC, Call, ID: BuiltinID);
6400
6401 case X86::BI__builtin_ia32_cvtmask2b128:
6402 case X86::BI__builtin_ia32_cvtmask2b256:
6403 case X86::BI__builtin_ia32_cvtmask2b512:
6404 case X86::BI__builtin_ia32_cvtmask2w128:
6405 case X86::BI__builtin_ia32_cvtmask2w256:
6406 case X86::BI__builtin_ia32_cvtmask2w512:
6407 case X86::BI__builtin_ia32_cvtmask2d128:
6408 case X86::BI__builtin_ia32_cvtmask2d256:
6409 case X86::BI__builtin_ia32_cvtmask2d512:
6410 case X86::BI__builtin_ia32_cvtmask2q128:
6411 case X86::BI__builtin_ia32_cvtmask2q256:
6412 case X86::BI__builtin_ia32_cvtmask2q512:
6413 return interp__builtin_ia32_cvt_mask2vec(S, OpPC, Call, ID: BuiltinID);
6414
6415 case X86::BI__builtin_ia32_cvtsd2ss:
6416 return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, HasRoundingMask: false);
6417
6418 case X86::BI__builtin_ia32_cvtsd2ss_round_mask:
6419 return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, HasRoundingMask: true);
6420
6421 case X86::BI__builtin_ia32_cvtpd2ps:
6422 case X86::BI__builtin_ia32_cvtpd2ps256:
6423 return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, IsMasked: false, HasRounding: false);
6424 case X86::BI__builtin_ia32_cvtpd2ps_mask:
6425 return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, IsMasked: true, HasRounding: false);
6426 case X86::BI__builtin_ia32_cvtpd2ps512_mask:
6427 return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, IsMasked: true, HasRounding: true);
6428
6429 case X86::BI__builtin_ia32_cmpb128_mask:
6430 case X86::BI__builtin_ia32_cmpw128_mask:
6431 case X86::BI__builtin_ia32_cmpd128_mask:
6432 case X86::BI__builtin_ia32_cmpq128_mask:
6433 case X86::BI__builtin_ia32_cmpb256_mask:
6434 case X86::BI__builtin_ia32_cmpw256_mask:
6435 case X86::BI__builtin_ia32_cmpd256_mask:
6436 case X86::BI__builtin_ia32_cmpq256_mask:
6437 case X86::BI__builtin_ia32_cmpb512_mask:
6438 case X86::BI__builtin_ia32_cmpw512_mask:
6439 case X86::BI__builtin_ia32_cmpd512_mask:
6440 case X86::BI__builtin_ia32_cmpq512_mask:
6441 return interp__builtin_ia32_cmp_mask(S, OpPC, Call, ID: BuiltinID,
6442 /*IsUnsigned=*/false);
6443
6444 case X86::BI__builtin_ia32_ucmpb128_mask:
6445 case X86::BI__builtin_ia32_ucmpw128_mask:
6446 case X86::BI__builtin_ia32_ucmpd128_mask:
6447 case X86::BI__builtin_ia32_ucmpq128_mask:
6448 case X86::BI__builtin_ia32_ucmpb256_mask:
6449 case X86::BI__builtin_ia32_ucmpw256_mask:
6450 case X86::BI__builtin_ia32_ucmpd256_mask:
6451 case X86::BI__builtin_ia32_ucmpq256_mask:
6452 case X86::BI__builtin_ia32_ucmpb512_mask:
6453 case X86::BI__builtin_ia32_ucmpw512_mask:
6454 case X86::BI__builtin_ia32_ucmpd512_mask:
6455 case X86::BI__builtin_ia32_ucmpq512_mask:
6456 return interp__builtin_ia32_cmp_mask(S, OpPC, Call, ID: BuiltinID,
6457 /*IsUnsigned=*/true);
6458
6459 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
6460 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
6461 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
6462 return interp__builtin_ia32_shufbitqmb_mask(S, OpPC, Call);
6463
6464 case X86::BI__builtin_ia32_pslldqi128_byteshift:
6465 case X86::BI__builtin_ia32_pslldqi256_byteshift:
6466 case X86::BI__builtin_ia32_pslldqi512_byteshift:
6467 // These SLLDQ intrinsics always operate on byte elements (8 bits).
6468 // The lane width is hardcoded to 16 to match the SIMD register size,
6469 // but the algorithm processes one byte per iteration,
6470 // so APInt(8, ...) is correct and intentional.
6471 return interp__builtin_ia32_shuffle_generic(
6472 S, OpPC, Call,
6473 GetSourceIndex: [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> {
6474 unsigned LaneBase = (DstIdx / 16) * 16;
6475 unsigned LaneIdx = DstIdx % 16;
6476 if (LaneIdx < Shift)
6477 return std::make_pair(x: 0, y: -1);
6478
6479 return std::make_pair(x: 0,
6480 y: static_cast<int>(LaneBase + LaneIdx - Shift));
6481 });
6482
6483 case X86::BI__builtin_ia32_psrldqi128_byteshift:
6484 case X86::BI__builtin_ia32_psrldqi256_byteshift:
6485 case X86::BI__builtin_ia32_psrldqi512_byteshift:
6486 // These SRLDQ intrinsics always operate on byte elements (8 bits).
6487 // The lane width is hardcoded to 16 to match the SIMD register size,
6488 // but the algorithm processes one byte per iteration,
6489 // so APInt(8, ...) is correct and intentional.
6490 return interp__builtin_ia32_shuffle_generic(
6491 S, OpPC, Call,
6492 GetSourceIndex: [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> {
6493 unsigned LaneBase = (DstIdx / 16) * 16;
6494 unsigned LaneIdx = DstIdx % 16;
6495 if (LaneIdx + Shift < 16)
6496 return std::make_pair(x: 0,
6497 y: static_cast<int>(LaneBase + LaneIdx + Shift));
6498
6499 return std::make_pair(x: 0, y: -1);
6500 });
6501
6502 case X86::BI__builtin_ia32_palignr128:
6503 case X86::BI__builtin_ia32_palignr256:
6504 case X86::BI__builtin_ia32_palignr512:
6505 return interp__builtin_ia32_shuffle_generic(
6506 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned Shift) {
6507 // Default to -1 → zero-fill this destination element
6508 unsigned VecIdx = 1;
6509 int ElemIdx = -1;
6510
6511 int Lane = DstIdx / 16;
6512 int Offset = DstIdx % 16;
6513
6514 // Elements come from VecB first, then VecA after the shift boundary
6515 unsigned ShiftedIdx = Offset + (Shift & 0xFF);
6516 if (ShiftedIdx < 16) { // from VecB
6517 ElemIdx = ShiftedIdx + (Lane * 16);
6518 } else if (ShiftedIdx < 32) { // from VecA
6519 VecIdx = 0;
6520 ElemIdx = (ShiftedIdx - 16) + (Lane * 16);
6521 }
6522
6523 return std::pair<unsigned, int>{VecIdx, ElemIdx};
6524 });
6525
6526 case X86::BI__builtin_ia32_alignd128:
6527 case X86::BI__builtin_ia32_alignd256:
6528 case X86::BI__builtin_ia32_alignd512:
6529 case X86::BI__builtin_ia32_alignq128:
6530 case X86::BI__builtin_ia32_alignq256:
6531 case X86::BI__builtin_ia32_alignq512: {
6532 unsigned NumElems = Call->getType()->castAs<VectorType>()->getNumElements();
6533 return interp__builtin_ia32_shuffle_generic(
6534 S, OpPC, Call, GetSourceIndex: [NumElems](unsigned DstIdx, unsigned Shift) {
6535 unsigned Imm = Shift & 0xFF;
6536 unsigned EffectiveShift = Imm & (NumElems - 1);
6537 unsigned SourcePos = DstIdx + EffectiveShift;
6538 unsigned VecIdx = SourcePos < NumElems ? 1u : 0u;
6539 unsigned ElemIdx = SourcePos & (NumElems - 1);
6540 return std::pair<unsigned, int>{VecIdx, static_cast<int>(ElemIdx)};
6541 });
6542 }
6543
6544 case clang::X86::BI__builtin_ia32_minps:
6545 case clang::X86::BI__builtin_ia32_minpd:
6546 case clang::X86::BI__builtin_ia32_minph128:
6547 case clang::X86::BI__builtin_ia32_minph256:
6548 case clang::X86::BI__builtin_ia32_minps256:
6549 case clang::X86::BI__builtin_ia32_minpd256:
6550 case clang::X86::BI__builtin_ia32_minps512:
6551 case clang::X86::BI__builtin_ia32_minpd512:
6552 case clang::X86::BI__builtin_ia32_minph512:
6553 return interp__builtin_elementwise_fp_binop(
6554 S, OpPC, Call,
6555 Fn: [](const APFloat &A, const APFloat &B,
6556 std::optional<APSInt>) -> std::optional<APFloat> {
6557 if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
6558 B.isInfinity() || B.isDenormal())
6559 return std::nullopt;
6560 if (A.isZero() && B.isZero())
6561 return B;
6562 return llvm::minimum(A, B);
6563 });
6564
6565 case clang::X86::BI__builtin_ia32_minss:
6566 case clang::X86::BI__builtin_ia32_minsd:
6567 return interp__builtin_elementwise_fp_binop(
6568 S, OpPC, Call,
6569 Fn: [](const APFloat &A, const APFloat &B,
6570 std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
6571 return EvalScalarMinMaxFp(A, B, RoundingMode, /*IsMin=*/true);
6572 },
6573 /*IsScalar=*/true);
6574
6575 case clang::X86::BI__builtin_ia32_minsd_round_mask:
6576 case clang::X86::BI__builtin_ia32_minss_round_mask:
6577 case clang::X86::BI__builtin_ia32_minsh_round_mask:
6578 case clang::X86::BI__builtin_ia32_maxsd_round_mask:
6579 case clang::X86::BI__builtin_ia32_maxss_round_mask:
6580 case clang::X86::BI__builtin_ia32_maxsh_round_mask: {
6581 bool IsMin = BuiltinID == clang::X86::BI__builtin_ia32_minsd_round_mask ||
6582 BuiltinID == clang::X86::BI__builtin_ia32_minss_round_mask ||
6583 BuiltinID == clang::X86::BI__builtin_ia32_minsh_round_mask;
6584 return interp__builtin_scalar_fp_round_mask_binop(
6585 S, OpPC, Call,
6586 Fn: [IsMin](const APFloat &A, const APFloat &B,
6587 std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
6588 return EvalScalarMinMaxFp(A, B, RoundingMode, IsMin);
6589 });
6590 }
6591
6592 case clang::X86::BI__builtin_ia32_maxps:
6593 case clang::X86::BI__builtin_ia32_maxpd:
6594 case clang::X86::BI__builtin_ia32_maxph128:
6595 case clang::X86::BI__builtin_ia32_maxph256:
6596 case clang::X86::BI__builtin_ia32_maxps256:
6597 case clang::X86::BI__builtin_ia32_maxpd256:
6598 case clang::X86::BI__builtin_ia32_maxps512:
6599 case clang::X86::BI__builtin_ia32_maxpd512:
6600 case clang::X86::BI__builtin_ia32_maxph512:
6601 return interp__builtin_elementwise_fp_binop(
6602 S, OpPC, Call,
6603 Fn: [](const APFloat &A, const APFloat &B,
6604 std::optional<APSInt>) -> std::optional<APFloat> {
6605 if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
6606 B.isInfinity() || B.isDenormal())
6607 return std::nullopt;
6608 if (A.isZero() && B.isZero())
6609 return B;
6610 return llvm::maximum(A, B);
6611 });
6612
6613 case clang::X86::BI__builtin_ia32_maxss:
6614 case clang::X86::BI__builtin_ia32_maxsd:
6615 return interp__builtin_elementwise_fp_binop(
6616 S, OpPC, Call,
6617 Fn: [](const APFloat &A, const APFloat &B,
6618 std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
6619 return EvalScalarMinMaxFp(A, B, RoundingMode, /*IsMin=*/false);
6620 },
6621 /*IsScalar=*/true);
6622 case X86::BI__builtin_ia32_vpdpwssd128:
6623 case X86::BI__builtin_ia32_vpdpwssd256:
6624 case X86::BI__builtin_ia32_vpdpwssd512:
6625 case X86::BI__builtin_ia32_vpdpbusd128:
6626 case X86::BI__builtin_ia32_vpdpbusd256:
6627 case X86::BI__builtin_ia32_vpdpbusd512:
6628 return interp__builtin_ia32_vpdp(S, OpPC, Call, IsSaturating: false);
6629 case X86::BI__builtin_ia32_vpdpwssds128:
6630 case X86::BI__builtin_ia32_vpdpwssds256:
6631 case X86::BI__builtin_ia32_vpdpwssds512:
6632 case X86::BI__builtin_ia32_vpdpbusds128:
6633 case X86::BI__builtin_ia32_vpdpbusds256:
6634 case X86::BI__builtin_ia32_vpdpbusds512:
6635 return interp__builtin_ia32_vpdp(S, OpPC, Call, IsSaturating: true);
6636 default:
6637 S.FFDiag(Loc: S.Current->getLocation(PC: OpPC),
6638 DiagId: diag::note_invalid_subexpr_in_const_expr)
6639 << S.Current->getRange(PC: OpPC);
6640
6641 return false;
6642 }
6643
6644 llvm_unreachable("Unhandled builtin ID");
6645}
6646
6647bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E,
6648 ArrayRef<int64_t> ArrayIndices, int64_t &IntResult) {
6649 S.getASTContext().recordOffsetOfEvaluation(E);
6650 CharUnits Result;
6651 unsigned N = E->getNumComponents();
6652 assert(N > 0);
6653
6654 unsigned ArrayIndex = 0;
6655 QualType CurrentType = E->getTypeSourceInfo()->getType();
6656 for (unsigned I = 0; I != N; ++I) {
6657 const OffsetOfNode &Node = E->getComponent(Idx: I);
6658 switch (Node.getKind()) {
6659 case OffsetOfNode::Field: {
6660 const FieldDecl *MemberDecl = Node.getField();
6661 const auto *RD = CurrentType->getAsRecordDecl();
6662 if (!RD || RD->isInvalidDecl())
6663 return false;
6664 const ASTRecordLayout &RL = S.getASTContext().getASTRecordLayout(D: RD);
6665 unsigned FieldIndex = MemberDecl->getFieldIndex();
6666 assert(FieldIndex < RL.getFieldCount() && "offsetof field in wrong type");
6667 Result +=
6668 S.getASTContext().toCharUnitsFromBits(BitSize: RL.getFieldOffset(FieldNo: FieldIndex));
6669 CurrentType = MemberDecl->getType().getNonReferenceType();
6670 break;
6671 }
6672 case OffsetOfNode::Array: {
6673 // When generating bytecode, we put all the index expressions as Sint64 on
6674 // the stack.
6675 int64_t Index = ArrayIndices[ArrayIndex];
6676 const ArrayType *AT = S.getASTContext().getAsArrayType(T: CurrentType);
6677 if (!AT)
6678 return false;
6679 CurrentType = AT->getElementType();
6680 CharUnits ElementSize = S.getASTContext().getTypeSizeInChars(T: CurrentType);
6681 Result += Index * ElementSize;
6682 ++ArrayIndex;
6683 break;
6684 }
6685 case OffsetOfNode::Base: {
6686 const CXXBaseSpecifier *BaseSpec = Node.getBase();
6687 if (BaseSpec->isVirtual())
6688 return false;
6689
6690 // Find the layout of the class whose base we are looking into.
6691 const auto *RD = CurrentType->getAsCXXRecordDecl();
6692 if (!RD || RD->isInvalidDecl())
6693 return false;
6694 const ASTRecordLayout &RL = S.getASTContext().getASTRecordLayout(D: RD);
6695
6696 // Find the base class itself.
6697 CurrentType = BaseSpec->getType();
6698 const auto *BaseRD = CurrentType->getAsCXXRecordDecl();
6699 if (!BaseRD)
6700 return false;
6701
6702 // Add the offset to the base.
6703 Result += RL.getBaseClassOffset(Base: BaseRD);
6704 break;
6705 }
6706 case OffsetOfNode::Identifier:
6707 llvm_unreachable("Dependent OffsetOfExpr?");
6708 }
6709 }
6710
6711 IntResult = Result.getQuantity();
6712
6713 return true;
6714}
6715
6716bool SetThreeWayComparisonField(InterpState &S, CodePtr OpPC,
6717 const Pointer &Ptr, const APSInt &IntValue) {
6718
6719 const Record *R = Ptr.getRecord();
6720 assert(R);
6721 assert(R->getNumFields() == 1);
6722
6723 unsigned FieldOffset = R->getField(I: 0u)->Offset;
6724 PtrView FieldPtr = Ptr.view().atField(Offset: FieldOffset);
6725 PrimType FieldT = FieldPtr.getFieldDesc()->getPrimType();
6726
6727 INT_TYPE_SWITCH(FieldT,
6728 FieldPtr.deref<T>() = T::from(IntValue.getSExtValue()));
6729 FieldPtr.initialize();
6730 return true;
6731}
6732
6733static void zeroAll(PtrView Dest) {
6734 const Descriptor *Desc = Dest.getFieldDesc();
6735
6736 if (Desc->isPrimitive()) {
6737 TYPE_SWITCH(Desc->getPrimType(), {
6738 Dest.deref<T>().~T();
6739 new (&Dest.deref<T>()) T();
6740 });
6741 return;
6742 }
6743
6744 if (Desc->isRecord()) {
6745 const Record *R = Desc->ElemRecord;
6746 for (const Record::Field &F : R->fields()) {
6747 PtrView FieldPtr = Dest.atField(Offset: F.Offset);
6748 zeroAll(Dest: FieldPtr);
6749 }
6750 return;
6751 }
6752
6753 if (Desc->isPrimitiveArray()) {
6754 for (unsigned I = 0, N = Desc->getNumElems(); I != N; ++I) {
6755 TYPE_SWITCH(Desc->getPrimType(), {
6756 Dest.deref<T>().~T();
6757 new (&Dest.deref<T>()) T();
6758 });
6759 }
6760 return;
6761 }
6762
6763 if (Desc->isCompositeArray()) {
6764 for (unsigned I = 0, N = Desc->getNumElems(); I != N; ++I) {
6765 PtrView ElemPtr = Dest.atIndex(Idx: I).narrow();
6766 zeroAll(Dest: ElemPtr);
6767 }
6768 return;
6769 }
6770}
6771
6772static bool copyComposite(InterpState &S, CodePtr OpPC, PtrView Src,
6773 PtrView Dest, bool Activate);
6774static bool copyRecord(InterpState &S, CodePtr OpPC, PtrView Src, PtrView Dest,
6775 bool Activate = false) {
6776 [[maybe_unused]] const Descriptor *SrcDesc = Src.getFieldDesc();
6777 const Descriptor *DestDesc = Dest.getFieldDesc();
6778
6779 auto copyField = [&](const Record::Field &F, bool Activate) -> bool {
6780 PtrView DestField = Dest.atField(Offset: F.Offset);
6781 if (OptPrimType FT = S.Ctx.classify(T: F.Decl->getType())) {
6782 TYPE_SWITCH(*FT, {
6783 DestField.deref<T>() = Src.atField(F.Offset).deref<T>();
6784 if (Src.atField(F.Offset).isInitialized())
6785 DestField.initialize();
6786 if (Activate)
6787 DestField.activate();
6788 });
6789 return true;
6790 }
6791 // Composite field.
6792 return copyComposite(S, OpPC, Src: Src.atField(Offset: F.Offset), Dest: DestField, Activate);
6793 };
6794
6795 assert(SrcDesc->isRecord());
6796 assert(SrcDesc->ElemRecord == DestDesc->ElemRecord);
6797 const Record *R = DestDesc->ElemRecord;
6798 for (const Record::Field &F : R->fields()) {
6799 PtrView FP = Src.atField(Offset: F.Offset);
6800
6801 if (!CheckMutable(S, OpPC, Ptr: FP))
6802 return false;
6803
6804 if (R->isUnion()) {
6805 // For unions, only copy the active field. Zero all others.
6806 if (FP.isActive()) {
6807 if (!copyField(F, /*Activate=*/true))
6808 return false;
6809 } else {
6810 PtrView DestField = Dest.atField(Offset: F.Offset);
6811 zeroAll(Dest: DestField);
6812 }
6813 } else {
6814 if (!copyField(F, Activate))
6815 return false;
6816 }
6817 }
6818
6819 for (const Record::Base &B : R->bases()) {
6820 PtrView DestBase = Dest.atField(Offset: B.Offset);
6821 if (!copyRecord(S, OpPC, Src: Src.atField(Offset: B.Offset), Dest: DestBase, Activate))
6822 return false;
6823 }
6824
6825 Dest.initialize();
6826 return true;
6827}
6828
6829static bool copyComposite(InterpState &S, CodePtr OpPC, PtrView Src,
6830 PtrView Dest, bool Activate = false) {
6831 assert(Src.isLive() && Dest.isLive());
6832
6833 [[maybe_unused]] const Descriptor *SrcDesc = Src.getFieldDesc();
6834 const Descriptor *DestDesc = Dest.getFieldDesc();
6835
6836 assert(!DestDesc->isPrimitive() && !SrcDesc->isPrimitive());
6837
6838 if (DestDesc->isPrimitiveArray()) {
6839 if (!SrcDesc->isPrimitiveArray())
6840 return false;
6841 // For floating types, check the actual QualType so we don't accidentally
6842 // mix up semantics.
6843 if (SrcDesc->getPrimType() == PT_Float) {
6844 if (!S.getASTContext().hasSimilarType(T1: SrcDesc->getElemQualType(),
6845 T2: DestDesc->getElemQualType()))
6846 return false;
6847 }
6848
6849 assert(SrcDesc->isPrimitiveArray());
6850 assert(SrcDesc->getNumElems() == DestDesc->getNumElems());
6851 assert(SrcDesc->getPrimType() == DestDesc->getPrimType());
6852 PrimType ET = DestDesc->getPrimType();
6853 for (unsigned I = 0, N = DestDesc->getNumElems(); I != N; ++I) {
6854 PtrView DestElem = Dest.atIndex(Idx: I);
6855 TYPE_SWITCH(ET, { DestElem.deref<T>() = Src.elem<T>(I); });
6856 DestElem.initializeElement(Index: I);
6857 }
6858 return true;
6859 }
6860
6861 if (DestDesc->isCompositeArray()) {
6862 if (!SrcDesc->isCompositeArray())
6863 return false;
6864 assert(SrcDesc->isCompositeArray());
6865 assert(SrcDesc->getNumElems() == DestDesc->getNumElems());
6866 for (unsigned I = 0, N = DestDesc->getNumElems(); I != N; ++I) {
6867 PtrView SrcElem = Src.atIndex(Idx: I).narrow();
6868 PtrView DestElem = Dest.atIndex(Idx: I).narrow();
6869 if (!copyComposite(S, OpPC, Src: SrcElem, Dest: DestElem, Activate))
6870 return false;
6871 }
6872 return true;
6873 }
6874
6875 if (DestDesc->isRecord()) {
6876 if (!SrcDesc->isRecord())
6877 return false;
6878 return copyRecord(S, OpPC, Src, Dest, Activate);
6879 }
6880 return Invalid(S, OpPC);
6881}
6882
6883bool DoMemcpy(InterpState &S, CodePtr OpPC, const Pointer &Src, Pointer &Dest) {
6884 if (!Src.isBlockPointer() || Src.getFieldDesc()->isPrimitive())
6885 return false;
6886 if (!Dest.isBlockPointer() || Dest.getFieldDesc()->isPrimitive())
6887 return false;
6888
6889 return copyComposite(S, OpPC, Src: Src.view(), Dest: Dest.view());
6890}
6891
6892} // namespace interp
6893} // namespace clang
6894