1//===--- InterpBuiltin.cpp - Interpreter for the constexpr VM ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#include "../ExprConstShared.h"
9#include "Boolean.h"
10#include "EvalEmitter.h"
11#include "InterpBuiltinBitCast.h"
12#include "InterpHelpers.h"
13#include "PrimType.h"
14#include "Program.h"
15#include "clang/AST/InferAlloc.h"
16#include "clang/AST/OSLog.h"
17#include "clang/AST/RecordLayout.h"
18#include "clang/Basic/Builtins.h"
19#include "clang/Basic/TargetBuiltins.h"
20#include "clang/Basic/TargetInfo.h"
21#include "llvm/ADT/StringExtras.h"
22#include "llvm/Support/AllocToken.h"
23#include "llvm/Support/ErrorHandling.h"
24#include "llvm/Support/SipHash.h"
25
26namespace clang {
27namespace interp {
28
29[[maybe_unused]] static bool isNoopBuiltin(unsigned ID) {
30 switch (ID) {
31 case Builtin::BIas_const:
32 case Builtin::BIforward:
33 case Builtin::BIforward_like:
34 case Builtin::BImove:
35 case Builtin::BImove_if_noexcept:
36 case Builtin::BIaddressof:
37 case Builtin::BI__addressof:
38 case Builtin::BI__builtin_addressof:
39 case Builtin::BI__builtin_launder:
40 return true;
41 default:
42 return false;
43 }
44 return false;
45}
46
47static void discard(InterpStack &Stk, PrimType T) {
48 TYPE_SWITCH(T, { Stk.discard<T>(); });
49}
50
51static uint64_t popToUInt64(const InterpState &S, const Expr *E) {
52 INT_TYPE_SWITCH(*S.getContext().classify(E->getType()),
53 return static_cast<uint64_t>(S.Stk.pop<T>()));
54}
55
56static APSInt popToAPSInt(InterpStack &Stk, PrimType T) {
57 INT_TYPE_SWITCH(T, return Stk.pop<T>().toAPSInt());
58}
59
60static APSInt popToAPSInt(InterpState &S, const Expr *E) {
61 return popToAPSInt(Stk&: S.Stk, T: *S.getContext().classify(T: E->getType()));
62}
63static APSInt popToAPSInt(InterpState &S, QualType T) {
64 return popToAPSInt(Stk&: S.Stk, T: *S.getContext().classify(T));
65}
66
67/// Check for common reasons a pointer can't be read from, which
68/// are usually not diagnosed in a builtin function.
69static bool isReadable(const Pointer &P) {
70 if (P.isDummy())
71 return false;
72 if (!P.isBlockPointer())
73 return false;
74 if (!P.isLive())
75 return false;
76 if (P.isOnePastEnd())
77 return false;
78 return true;
79}
80
81/// Pushes \p Val on the stack as the type given by \p QT.
82static void pushInteger(InterpState &S, const APSInt &Val, QualType QT) {
83 assert(QT->isSignedIntegerOrEnumerationType() ||
84 QT->isUnsignedIntegerOrEnumerationType());
85 OptPrimType T = S.getContext().classify(T: QT);
86 assert(T);
87 unsigned BitWidth = S.getASTContext().getIntWidth(T: QT);
88
89 if (T == PT_IntAPS) {
90 auto Result = S.allocAP<IntegralAP<true>>(BitWidth);
91 Result.copy(V: Val);
92 S.Stk.push<IntegralAP<true>>(Args&: Result);
93 return;
94 }
95
96 if (T == PT_IntAP) {
97 auto Result = S.allocAP<IntegralAP<false>>(BitWidth);
98 Result.copy(V: Val);
99 S.Stk.push<IntegralAP<false>>(Args&: Result);
100 return;
101 }
102
103 if (QT->isSignedIntegerOrEnumerationType()) {
104 int64_t V = Val.getSExtValue();
105 INT_TYPE_SWITCH(*T, { S.Stk.push<T>(T::from(V, BitWidth)); });
106 } else {
107 assert(QT->isUnsignedIntegerOrEnumerationType());
108 uint64_t V = Val.getZExtValue();
109 INT_TYPE_SWITCH(*T, { S.Stk.push<T>(T::from(V, BitWidth)); });
110 }
111}
112
113template <typename T>
114static void pushInteger(InterpState &S, T Val, QualType QT) {
115 if constexpr (std::is_same_v<T, APInt>)
116 pushInteger(S, Val: APSInt(Val, !std::is_signed_v<T>), QT);
117 else if constexpr (std::is_same_v<T, APSInt>)
118 pushInteger(S, Val, QT);
119 else
120 pushInteger(S,
121 Val: APSInt(APInt(sizeof(T) * 8, static_cast<uint64_t>(Val),
122 std::is_signed_v<T>),
123 !std::is_signed_v<T>),
124 QT);
125}
126
127static void assignInteger(InterpState &S, const Pointer &Dest, PrimType ValueT,
128 const APSInt &Value) {
129
130 if (ValueT == PT_IntAPS) {
131 Dest.deref<IntegralAP<true>>() =
132 S.allocAP<IntegralAP<true>>(BitWidth: Value.getBitWidth());
133 Dest.deref<IntegralAP<true>>().copy(V: Value);
134 } else if (ValueT == PT_IntAP) {
135 Dest.deref<IntegralAP<false>>() =
136 S.allocAP<IntegralAP<false>>(BitWidth: Value.getBitWidth());
137 Dest.deref<IntegralAP<false>>().copy(V: Value);
138 } else {
139 INT_TYPE_SWITCH_NO_BOOL(
140 ValueT, { Dest.deref<T>() = T::from(static_cast<T>(Value)); });
141 }
142}
143
144static QualType getElemType(const Pointer &P) {
145 const Descriptor *Desc = P.getFieldDesc();
146 QualType T = Desc->getType();
147 if (Desc->isPrimitive())
148 return T;
149 if (T->isPointerType())
150 return T->castAs<PointerType>()->getPointeeType();
151 if (Desc->isArray())
152 return Desc->getElemQualType();
153 if (const auto *AT = T->getAsArrayTypeUnsafe())
154 return AT->getElementType();
155 return T;
156}
157
158static void diagnoseNonConstexprBuiltin(InterpState &S, CodePtr OpPC,
159 unsigned ID) {
160 if (!S.diagnosing())
161 return;
162
163 auto Loc = S.Current->getSource(PC: OpPC);
164 if (S.getLangOpts().CPlusPlus11)
165 S.CCEDiag(SI: Loc, DiagId: diag::note_constexpr_invalid_function)
166 << /*isConstexpr=*/0 << /*isConstructor=*/0
167 << S.getASTContext().BuiltinInfo.getQuotedName(ID);
168 else
169 S.CCEDiag(SI: Loc, DiagId: diag::note_invalid_subexpr_in_const_expr);
170}
171
172static llvm::APSInt convertBoolVectorToInt(const Pointer &Val) {
173 assert(Val.getFieldDesc()->isPrimitiveArray() &&
174 Val.getFieldDesc()->getElemQualType()->isBooleanType() &&
175 "Not a boolean vector");
176 unsigned NumElems = Val.getNumElems();
177
178 // Each element is one bit, so create an integer with NumElts bits.
179 llvm::APSInt Result(NumElems, 0);
180 for (unsigned I = 0; I != NumElems; ++I) {
181 if (Val.elem<bool>(I))
182 Result.setBit(I);
183 }
184
185 return Result;
186}
187
188// Strict double -> float conversion used for X86 PD2PS/cvtsd2ss intrinsics.
189// Reject NaN/Inf/Subnormal inputs and any lossy/inexact conversions.
190static bool convertDoubleToFloatStrict(APFloat Src, Floating &Dst,
191 InterpState &S, const Expr *DiagExpr) {
192 if (Src.isInfinity()) {
193 if (S.diagnosing())
194 S.CCEDiag(E: DiagExpr, DiagId: diag::note_constexpr_float_arithmetic) << 0;
195 return false;
196 }
197 if (Src.isNaN()) {
198 if (S.diagnosing())
199 S.CCEDiag(E: DiagExpr, DiagId: diag::note_constexpr_float_arithmetic) << 1;
200 return false;
201 }
202 APFloat Val = Src;
203 bool LosesInfo = false;
204 APFloat::opStatus Status = Val.convert(
205 ToSemantics: APFloat::IEEEsingle(), RM: APFloat::rmNearestTiesToEven, losesInfo: &LosesInfo);
206 if (LosesInfo || Val.isDenormal()) {
207 if (S.diagnosing())
208 S.CCEDiag(E: DiagExpr, DiagId: diag::note_constexpr_float_arithmetic_strict);
209 return false;
210 }
211 if (Status != APFloat::opOK) {
212 if (S.diagnosing())
213 S.CCEDiag(E: DiagExpr, DiagId: diag::note_invalid_subexpr_in_const_expr);
214 return false;
215 }
216 Dst.copy(F: Val);
217 return true;
218}
219
220static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC,
221 const InterpFrame *Frame,
222 const CallExpr *Call) {
223 unsigned Depth = S.Current->getDepth();
224 auto isStdCall = [](const FunctionDecl *F) -> bool {
225 return F && F->isInStdNamespace() && F->getIdentifier() &&
226 F->getIdentifier()->isStr(Str: "is_constant_evaluated");
227 };
228 const InterpFrame *Caller = Frame->Caller;
229 // The current frame is the one for __builtin_is_constant_evaluated.
230 // The one above that, potentially the one for std::is_constant_evaluated().
231 if (S.inConstantContext() && !S.checkingPotentialConstantExpression() &&
232 S.getEvalStatus().Diag &&
233 (Depth == 0 || (Depth == 1 && isStdCall(Frame->getCallee())))) {
234 if (Caller && isStdCall(Frame->getCallee())) {
235 const Expr *E = Caller->getExpr(PC: Caller->getRetPC());
236 S.report(Loc: E->getExprLoc(),
237 DiagId: diag::warn_is_constant_evaluated_always_true_constexpr)
238 << "std::is_constant_evaluated" << E->getSourceRange();
239 } else {
240 S.report(Loc: Call->getExprLoc(),
241 DiagId: diag::warn_is_constant_evaluated_always_true_constexpr)
242 << "__builtin_is_constant_evaluated" << Call->getSourceRange();
243 }
244 }
245
246 S.Stk.push<Boolean>(Args: Boolean::from(Value: S.inConstantContext()));
247 return true;
248}
249
250// __builtin_assume
251// __assume (MS extension)
252static bool interp__builtin_assume(InterpState &S, CodePtr OpPC,
253 const InterpFrame *Frame,
254 const CallExpr *Call) {
255 // Nothing to be done here since the argument is NOT evaluated.
256 assert(Call->getNumArgs() == 1);
257 return true;
258}
259
260static bool interp__builtin_strcmp(InterpState &S, CodePtr OpPC,
261 const InterpFrame *Frame,
262 const CallExpr *Call, unsigned ID) {
263 uint64_t Limit = ~static_cast<uint64_t>(0);
264 if (ID == Builtin::BIstrncmp || ID == Builtin::BI__builtin_strncmp ||
265 ID == Builtin::BIwcsncmp || ID == Builtin::BI__builtin_wcsncmp)
266 Limit = popToUInt64(S, E: Call->getArg(Arg: 2));
267
268 const Pointer &B = S.Stk.pop<Pointer>();
269 const Pointer &A = S.Stk.pop<Pointer>();
270 if (ID == Builtin::BIstrcmp || ID == Builtin::BIstrncmp ||
271 ID == Builtin::BIwcscmp || ID == Builtin::BIwcsncmp)
272 diagnoseNonConstexprBuiltin(S, OpPC, ID);
273
274 if (Limit == 0) {
275 pushInteger(S, Val: 0, QT: Call->getType());
276 return true;
277 }
278
279 if (!CheckLive(S, OpPC, Ptr: A, AK: AK_Read) || !CheckLive(S, OpPC, Ptr: B, AK: AK_Read))
280 return false;
281
282 if (A.isDummy() || B.isDummy())
283 return false;
284 if (!A.isBlockPointer() || !B.isBlockPointer())
285 return false;
286
287 bool IsWide = ID == Builtin::BIwcscmp || ID == Builtin::BIwcsncmp ||
288 ID == Builtin::BI__builtin_wcscmp ||
289 ID == Builtin::BI__builtin_wcsncmp;
290 assert(A.getFieldDesc()->isPrimitiveArray());
291 assert(B.getFieldDesc()->isPrimitiveArray());
292
293 // Different element types shouldn't happen, but with casts they can.
294 if (!S.getASTContext().hasSameUnqualifiedType(T1: getElemType(P: A), T2: getElemType(P: B)))
295 return false;
296
297 PrimType ElemT = *S.getContext().classify(T: getElemType(P: A));
298
299 auto returnResult = [&](int V) -> bool {
300 pushInteger(S, Val: V, QT: Call->getType());
301 return true;
302 };
303
304 unsigned IndexA = A.getIndex();
305 unsigned IndexB = B.getIndex();
306 uint64_t Steps = 0;
307 for (;; ++IndexA, ++IndexB, ++Steps) {
308
309 if (Steps >= Limit)
310 break;
311 const Pointer &PA = A.atIndex(Idx: IndexA);
312 const Pointer &PB = B.atIndex(Idx: IndexB);
313 if (!CheckRange(S, OpPC, Ptr: PA, AK: AK_Read) ||
314 !CheckRange(S, OpPC, Ptr: PB, AK: AK_Read)) {
315 return false;
316 }
317
318 if (IsWide) {
319 INT_TYPE_SWITCH(ElemT, {
320 T CA = PA.deref<T>();
321 T CB = PB.deref<T>();
322 if (CA > CB)
323 return returnResult(1);
324 if (CA < CB)
325 return returnResult(-1);
326 if (CA.isZero() || CB.isZero())
327 return returnResult(0);
328 });
329 continue;
330 }
331
332 uint8_t CA = PA.deref<uint8_t>();
333 uint8_t CB = PB.deref<uint8_t>();
334
335 if (CA > CB)
336 return returnResult(1);
337 if (CA < CB)
338 return returnResult(-1);
339 if (CA == 0 || CB == 0)
340 return returnResult(0);
341 }
342
343 return returnResult(0);
344}
345
346static bool interp__builtin_strlen(InterpState &S, CodePtr OpPC,
347 const InterpFrame *Frame,
348 const CallExpr *Call, unsigned ID) {
349 const Pointer &StrPtr = S.Stk.pop<Pointer>().expand();
350
351 if (ID == Builtin::BIstrlen || ID == Builtin::BIwcslen)
352 diagnoseNonConstexprBuiltin(S, OpPC, ID);
353
354 if (!CheckArray(S, OpPC, Ptr: StrPtr))
355 return false;
356
357 if (!CheckLive(S, OpPC, Ptr: StrPtr, AK: AK_Read))
358 return false;
359
360 if (!CheckDummy(S, OpPC, B: StrPtr.block(), AK: AK_Read))
361 return false;
362
363 if (!StrPtr.getFieldDesc()->isPrimitiveArray())
364 return false;
365
366 assert(StrPtr.getFieldDesc()->isPrimitiveArray());
367 unsigned ElemSize = StrPtr.getFieldDesc()->getElemSize();
368 if (ElemSize != 1 && ElemSize != 2 && ElemSize != 4)
369 return Invalid(S, OpPC);
370
371 if (ID == Builtin::BI__builtin_wcslen || ID == Builtin::BIwcslen) {
372 const ASTContext &AC = S.getASTContext();
373 unsigned WCharSize = AC.getTypeSizeInChars(T: AC.getWCharType()).getQuantity();
374 if (ElemSize != WCharSize)
375 return false;
376 }
377
378 size_t Len = 0;
379 for (size_t I = StrPtr.getIndex();; ++I, ++Len) {
380 const Pointer &ElemPtr = StrPtr.atIndex(Idx: I);
381
382 if (!CheckRange(S, OpPC, Ptr: ElemPtr, AK: AK_Read))
383 return false;
384
385 uint32_t Val;
386 switch (ElemSize) {
387 case 1:
388 Val = ElemPtr.deref<uint8_t>();
389 break;
390 case 2:
391 Val = ElemPtr.deref<uint16_t>();
392 break;
393 case 4:
394 Val = ElemPtr.deref<uint32_t>();
395 break;
396 default:
397 llvm_unreachable("Unsupported char size");
398 }
399 if (Val == 0)
400 break;
401 }
402
403 pushInteger(S, Val: Len, QT: Call->getType());
404
405 return true;
406}
407
408static bool interp__builtin_nan(InterpState &S, CodePtr OpPC,
409 const InterpFrame *Frame, const CallExpr *Call,
410 bool Signaling) {
411 const Pointer &Arg = S.Stk.pop<Pointer>();
412
413 if (!CheckLoad(S, OpPC, Ptr: Arg))
414 return false;
415
416 if (!Arg.getFieldDesc()->isPrimitiveArray())
417 return Invalid(S, OpPC);
418
419 // Convert the given string to an integer using StringRef's API.
420 llvm::APInt Fill;
421 std::string Str;
422 assert(Arg.getNumElems() >= 1);
423 for (unsigned I = 0;; ++I) {
424 const Pointer &Elem = Arg.atIndex(Idx: I);
425
426 if (!CheckLoad(S, OpPC, Ptr: Elem))
427 return false;
428
429 if (Elem.deref<int8_t>() == 0)
430 break;
431
432 Str += Elem.deref<char>();
433 }
434
435 // Treat empty strings as if they were zero.
436 if (Str.empty())
437 Fill = llvm::APInt(32, 0);
438 else if (StringRef(Str).getAsInteger(Radix: 0, Result&: Fill))
439 return false;
440
441 const llvm::fltSemantics &TargetSemantics =
442 S.getASTContext().getFloatTypeSemantics(
443 T: Call->getDirectCallee()->getReturnType());
444
445 Floating Result = S.allocFloat(Sem: TargetSemantics);
446 if (S.getASTContext().getTargetInfo().isNan2008()) {
447 if (Signaling)
448 Result.copy(
449 F: llvm::APFloat::getSNaN(Sem: TargetSemantics, /*Negative=*/false, payload: &Fill));
450 else
451 Result.copy(
452 F: llvm::APFloat::getQNaN(Sem: TargetSemantics, /*Negative=*/false, payload: &Fill));
453 } else {
454 // Prior to IEEE 754-2008, architectures were allowed to choose whether
455 // the first bit of their significand was set for qNaN or sNaN. MIPS chose
456 // a different encoding to what became a standard in 2008, and for pre-
457 // 2008 revisions, MIPS interpreted sNaN-2008 as qNan and qNaN-2008 as
458 // sNaN. This is now known as "legacy NaN" encoding.
459 if (Signaling)
460 Result.copy(
461 F: llvm::APFloat::getQNaN(Sem: TargetSemantics, /*Negative=*/false, payload: &Fill));
462 else
463 Result.copy(
464 F: llvm::APFloat::getSNaN(Sem: TargetSemantics, /*Negative=*/false, payload: &Fill));
465 }
466
467 S.Stk.push<Floating>(Args&: Result);
468 return true;
469}
470
471static bool interp__builtin_inf(InterpState &S, CodePtr OpPC,
472 const InterpFrame *Frame,
473 const CallExpr *Call) {
474 const llvm::fltSemantics &TargetSemantics =
475 S.getASTContext().getFloatTypeSemantics(
476 T: Call->getDirectCallee()->getReturnType());
477
478 Floating Result = S.allocFloat(Sem: TargetSemantics);
479 Result.copy(F: APFloat::getInf(Sem: TargetSemantics));
480 S.Stk.push<Floating>(Args&: Result);
481 return true;
482}
483
484static bool interp__builtin_copysign(InterpState &S, CodePtr OpPC,
485 const InterpFrame *Frame) {
486 const Floating &Arg2 = S.Stk.pop<Floating>();
487 const Floating &Arg1 = S.Stk.pop<Floating>();
488 Floating Result = S.allocFloat(Sem: Arg1.getSemantics());
489
490 APFloat Copy = Arg1.getAPFloat();
491 Copy.copySign(RHS: Arg2.getAPFloat());
492 Result.copy(F: Copy);
493 S.Stk.push<Floating>(Args&: Result);
494
495 return true;
496}
497
498static bool interp__builtin_fmin(InterpState &S, CodePtr OpPC,
499 const InterpFrame *Frame, bool IsNumBuiltin) {
500 const Floating &RHS = S.Stk.pop<Floating>();
501 const Floating &LHS = S.Stk.pop<Floating>();
502 Floating Result = S.allocFloat(Sem: LHS.getSemantics());
503
504 if (IsNumBuiltin)
505 Result.copy(F: llvm::minimumnum(A: LHS.getAPFloat(), B: RHS.getAPFloat()));
506 else
507 Result.copy(F: minnum(A: LHS.getAPFloat(), B: RHS.getAPFloat()));
508 S.Stk.push<Floating>(Args&: Result);
509 return true;
510}
511
512static bool interp__builtin_fmax(InterpState &S, CodePtr OpPC,
513 const InterpFrame *Frame, bool IsNumBuiltin) {
514 const Floating &RHS = S.Stk.pop<Floating>();
515 const Floating &LHS = S.Stk.pop<Floating>();
516 Floating Result = S.allocFloat(Sem: LHS.getSemantics());
517
518 if (IsNumBuiltin)
519 Result.copy(F: llvm::maximumnum(A: LHS.getAPFloat(), B: RHS.getAPFloat()));
520 else
521 Result.copy(F: maxnum(A: LHS.getAPFloat(), B: RHS.getAPFloat()));
522 S.Stk.push<Floating>(Args&: Result);
523 return true;
524}
525
526/// Defined as __builtin_isnan(...), to accommodate the fact that it can
527/// take a float, double, long double, etc.
528/// But for us, that's all a Floating anyway.
529static bool interp__builtin_isnan(InterpState &S, CodePtr OpPC,
530 const InterpFrame *Frame,
531 const CallExpr *Call) {
532 const Floating &Arg = S.Stk.pop<Floating>();
533
534 pushInteger(S, Val: Arg.isNan(), QT: Call->getType());
535 return true;
536}
537
538static bool interp__builtin_issignaling(InterpState &S, CodePtr OpPC,
539 const InterpFrame *Frame,
540 const CallExpr *Call) {
541 const Floating &Arg = S.Stk.pop<Floating>();
542
543 pushInteger(S, Val: Arg.isSignaling(), QT: Call->getType());
544 return true;
545}
546
547static bool interp__builtin_isinf(InterpState &S, CodePtr OpPC,
548 const InterpFrame *Frame, bool CheckSign,
549 const CallExpr *Call) {
550 const Floating &Arg = S.Stk.pop<Floating>();
551 APFloat F = Arg.getAPFloat();
552 bool IsInf = F.isInfinity();
553
554 if (CheckSign)
555 pushInteger(S, Val: IsInf ? (F.isNegative() ? -1 : 1) : 0, QT: Call->getType());
556 else
557 pushInteger(S, Val: IsInf, QT: Call->getType());
558 return true;
559}
560
561static bool interp__builtin_isfinite(InterpState &S, CodePtr OpPC,
562 const InterpFrame *Frame,
563 const CallExpr *Call) {
564 const Floating &Arg = S.Stk.pop<Floating>();
565
566 pushInteger(S, Val: Arg.isFinite(), QT: Call->getType());
567 return true;
568}
569
570static bool interp__builtin_isnormal(InterpState &S, CodePtr OpPC,
571 const InterpFrame *Frame,
572 const CallExpr *Call) {
573 const Floating &Arg = S.Stk.pop<Floating>();
574
575 pushInteger(S, Val: Arg.isNormal(), QT: Call->getType());
576 return true;
577}
578
579static bool interp__builtin_issubnormal(InterpState &S, CodePtr OpPC,
580 const InterpFrame *Frame,
581 const CallExpr *Call) {
582 const Floating &Arg = S.Stk.pop<Floating>();
583
584 pushInteger(S, Val: Arg.isDenormal(), QT: Call->getType());
585 return true;
586}
587
588static bool interp__builtin_iszero(InterpState &S, CodePtr OpPC,
589 const InterpFrame *Frame,
590 const CallExpr *Call) {
591 const Floating &Arg = S.Stk.pop<Floating>();
592
593 pushInteger(S, Val: Arg.isZero(), QT: Call->getType());
594 return true;
595}
596
597static bool interp__builtin_signbit(InterpState &S, CodePtr OpPC,
598 const InterpFrame *Frame,
599 const CallExpr *Call) {
600 const Floating &Arg = S.Stk.pop<Floating>();
601
602 pushInteger(S, Val: Arg.isNegative(), QT: Call->getType());
603 return true;
604}
605
606static bool interp_floating_comparison(InterpState &S, CodePtr OpPC,
607 const CallExpr *Call, unsigned ID) {
608 const Floating &RHS = S.Stk.pop<Floating>();
609 const Floating &LHS = S.Stk.pop<Floating>();
610
611 pushInteger(
612 S,
613 Val: [&] {
614 switch (ID) {
615 case Builtin::BI__builtin_isgreater:
616 return LHS > RHS;
617 case Builtin::BI__builtin_isgreaterequal:
618 return LHS >= RHS;
619 case Builtin::BI__builtin_isless:
620 return LHS < RHS;
621 case Builtin::BI__builtin_islessequal:
622 return LHS <= RHS;
623 case Builtin::BI__builtin_islessgreater: {
624 ComparisonCategoryResult Cmp = LHS.compare(RHS);
625 return Cmp == ComparisonCategoryResult::Less ||
626 Cmp == ComparisonCategoryResult::Greater;
627 }
628 case Builtin::BI__builtin_isunordered:
629 return LHS.compare(RHS) == ComparisonCategoryResult::Unordered;
630 default:
631 llvm_unreachable("Unexpected builtin ID: Should be a floating point "
632 "comparison function");
633 }
634 }(),
635 QT: Call->getType());
636 return true;
637}
638
639/// First parameter to __builtin_isfpclass is the floating value, the
640/// second one is an integral value.
641static bool interp__builtin_isfpclass(InterpState &S, CodePtr OpPC,
642 const InterpFrame *Frame,
643 const CallExpr *Call) {
644 APSInt FPClassArg = popToAPSInt(S, E: Call->getArg(Arg: 1));
645 const Floating &F = S.Stk.pop<Floating>();
646
647 int32_t Result = static_cast<int32_t>(
648 (F.classify() & std::move(FPClassArg)).getZExtValue());
649 pushInteger(S, Val: Result, QT: Call->getType());
650
651 return true;
652}
653
654/// Five int values followed by one floating value.
655/// __builtin_fpclassify(int, int, int, int, int, float)
656static bool interp__builtin_fpclassify(InterpState &S, CodePtr OpPC,
657 const InterpFrame *Frame,
658 const CallExpr *Call) {
659 const Floating &Val = S.Stk.pop<Floating>();
660
661 PrimType IntT = *S.getContext().classify(E: Call->getArg(Arg: 0));
662 APSInt Values[5];
663 for (unsigned I = 0; I != 5; ++I)
664 Values[4 - I] = popToAPSInt(Stk&: S.Stk, T: IntT);
665
666 unsigned Index;
667 switch (Val.getCategory()) {
668 case APFloat::fcNaN:
669 Index = 0;
670 break;
671 case APFloat::fcInfinity:
672 Index = 1;
673 break;
674 case APFloat::fcNormal:
675 Index = Val.isDenormal() ? 3 : 2;
676 break;
677 case APFloat::fcZero:
678 Index = 4;
679 break;
680 }
681
682 // The last argument is first on the stack.
683 assert(Index <= 4);
684
685 pushInteger(S, Val: Values[Index], QT: Call->getType());
686 return true;
687}
688
689static inline Floating abs(InterpState &S, const Floating &In) {
690 if (!In.isNegative())
691 return In;
692
693 Floating Output = S.allocFloat(Sem: In.getSemantics());
694 APFloat New = In.getAPFloat();
695 New.changeSign();
696 Output.copy(F: New);
697 return Output;
698}
699
700// The C standard says "fabs raises no floating-point exceptions,
701// even if x is a signaling NaN. The returned value is independent of
702// the current rounding direction mode." Therefore constant folding can
703// proceed without regard to the floating point settings.
704// Reference, WG14 N2478 F.10.4.3
705static bool interp__builtin_fabs(InterpState &S, CodePtr OpPC,
706 const InterpFrame *Frame) {
707 const Floating &Val = S.Stk.pop<Floating>();
708 S.Stk.push<Floating>(Args: abs(S, In: Val));
709 return true;
710}
711
712static bool interp__builtin_abs(InterpState &S, CodePtr OpPC,
713 const InterpFrame *Frame,
714 const CallExpr *Call) {
715 APSInt Val = popToAPSInt(S, E: Call->getArg(Arg: 0));
716 if (Val ==
717 APSInt(APInt::getSignedMinValue(numBits: Val.getBitWidth()), /*IsUnsigned=*/false))
718 return false;
719 if (Val.isNegative())
720 Val.negate();
721 pushInteger(S, Val, QT: Call->getType());
722 return true;
723}
724
725static bool interp__builtin_popcount(InterpState &S, CodePtr OpPC,
726 const InterpFrame *Frame,
727 const CallExpr *Call) {
728 APSInt Val;
729 if (Call->getArg(Arg: 0)->getType()->isExtVectorBoolType()) {
730 const Pointer &Arg = S.Stk.pop<Pointer>();
731 Val = convertBoolVectorToInt(Val: Arg);
732 } else {
733 Val = popToAPSInt(S, E: Call->getArg(Arg: 0));
734 }
735 pushInteger(S, Val: Val.popcount(), QT: Call->getType());
736 return true;
737}
738
739static bool interp__builtin_ia32_crc32(InterpState &S, CodePtr OpPC,
740 const InterpFrame *Frame,
741 const CallExpr *Call,
742 unsigned DataBytes) {
743 uint64_t DataVal = popToUInt64(S, E: Call->getArg(Arg: 1));
744 uint64_t CRCVal = popToUInt64(S, E: Call->getArg(Arg: 0));
745
746 // CRC32C polynomial (iSCSI polynomial, bit-reversed)
747 static const uint32_t CRC32C_POLY = 0x82F63B78;
748
749 // Process each byte
750 uint32_t Result = static_cast<uint32_t>(CRCVal);
751 for (unsigned I = 0; I != DataBytes; ++I) {
752 uint8_t Byte = static_cast<uint8_t>((DataVal >> (I * 8)) & 0xFF);
753 Result ^= Byte;
754 for (int J = 0; J != 8; ++J) {
755 Result = (Result >> 1) ^ ((Result & 1) ? CRC32C_POLY : 0);
756 }
757 }
758
759 pushInteger(S, Val: Result, QT: Call->getType());
760 return true;
761}
762
763static bool interp__builtin_classify_type(InterpState &S, CodePtr OpPC,
764 const InterpFrame *Frame,
765 const CallExpr *Call) {
766 // This is an unevaluated call, so there are no arguments on the stack.
767 assert(Call->getNumArgs() == 1);
768 const Expr *Arg = Call->getArg(Arg: 0);
769
770 GCCTypeClass ResultClass =
771 EvaluateBuiltinClassifyType(T: Arg->getType(), LangOpts: S.getLangOpts());
772 int32_t ReturnVal = static_cast<int32_t>(ResultClass);
773 pushInteger(S, Val: ReturnVal, QT: Call->getType());
774 return true;
775}
776
777// __builtin_expect(long, long)
778// __builtin_expect_with_probability(long, long, double)
779static bool interp__builtin_expect(InterpState &S, CodePtr OpPC,
780 const InterpFrame *Frame,
781 const CallExpr *Call) {
782 // The return value is simply the value of the first parameter.
783 // We ignore the probability.
784 unsigned NumArgs = Call->getNumArgs();
785 assert(NumArgs == 2 || NumArgs == 3);
786
787 PrimType ArgT = *S.getContext().classify(T: Call->getArg(Arg: 0)->getType());
788 if (NumArgs == 3)
789 S.Stk.discard<Floating>();
790 discard(Stk&: S.Stk, T: ArgT);
791
792 APSInt Val = popToAPSInt(Stk&: S.Stk, T: ArgT);
793 pushInteger(S, Val, QT: Call->getType());
794 return true;
795}
796
797static bool interp__builtin_addressof(InterpState &S, CodePtr OpPC,
798 const InterpFrame *Frame,
799 const CallExpr *Call) {
800#ifndef NDEBUG
801 assert(Call->getArg(0)->isLValue());
802 PrimType PtrT = S.getContext().classify(Call->getArg(0)).value_or(PT_Ptr);
803 assert(PtrT == PT_Ptr &&
804 "Unsupported pointer type passed to __builtin_addressof()");
805#endif
806 return true;
807}
808
809static bool interp__builtin_move(InterpState &S, CodePtr OpPC,
810 const InterpFrame *Frame,
811 const CallExpr *Call) {
812 return Call->getDirectCallee()->isConstexpr();
813}
814
815static bool interp__builtin_eh_return_data_regno(InterpState &S, CodePtr OpPC,
816 const InterpFrame *Frame,
817 const CallExpr *Call) {
818 APSInt Arg = popToAPSInt(S, E: Call->getArg(Arg: 0));
819
820 int Result = S.getASTContext().getTargetInfo().getEHDataRegisterNumber(
821 RegNo: Arg.getZExtValue());
822 pushInteger(S, Val: Result, QT: Call->getType());
823 return true;
824}
825
826// Two integral values followed by a pointer (lhs, rhs, resultOut)
827static bool interp__builtin_overflowop(InterpState &S, CodePtr OpPC,
828 const CallExpr *Call,
829 unsigned BuiltinOp) {
830 const Pointer &ResultPtr = S.Stk.pop<Pointer>();
831 if (ResultPtr.isDummy() || !ResultPtr.isBlockPointer())
832 return false;
833
834 PrimType RHST = *S.getContext().classify(T: Call->getArg(Arg: 1)->getType());
835 PrimType LHST = *S.getContext().classify(T: Call->getArg(Arg: 0)->getType());
836 APSInt RHS = popToAPSInt(Stk&: S.Stk, T: RHST);
837 APSInt LHS = popToAPSInt(Stk&: S.Stk, T: LHST);
838 QualType ResultType = Call->getArg(Arg: 2)->getType()->getPointeeType();
839 PrimType ResultT = *S.getContext().classify(T: ResultType);
840 bool Overflow;
841
842 APSInt Result;
843 if (BuiltinOp == Builtin::BI__builtin_add_overflow ||
844 BuiltinOp == Builtin::BI__builtin_sub_overflow ||
845 BuiltinOp == Builtin::BI__builtin_mul_overflow) {
846 bool IsSigned = LHS.isSigned() || RHS.isSigned() ||
847 ResultType->isSignedIntegerOrEnumerationType();
848 bool AllSigned = LHS.isSigned() && RHS.isSigned() &&
849 ResultType->isSignedIntegerOrEnumerationType();
850 uint64_t LHSSize = LHS.getBitWidth();
851 uint64_t RHSSize = RHS.getBitWidth();
852 uint64_t ResultSize = S.getASTContext().getTypeSize(T: ResultType);
853 uint64_t MaxBits = std::max(a: std::max(a: LHSSize, b: RHSSize), b: ResultSize);
854
855 // Add an additional bit if the signedness isn't uniformly agreed to. We
856 // could do this ONLY if there is a signed and an unsigned that both have
857 // MaxBits, but the code to check that is pretty nasty. The issue will be
858 // caught in the shrink-to-result later anyway.
859 if (IsSigned && !AllSigned)
860 ++MaxBits;
861
862 LHS = APSInt(LHS.extOrTrunc(width: MaxBits), !IsSigned);
863 RHS = APSInt(RHS.extOrTrunc(width: MaxBits), !IsSigned);
864 Result = APSInt(MaxBits, !IsSigned);
865 }
866
867 // Find largest int.
868 switch (BuiltinOp) {
869 default:
870 llvm_unreachable("Invalid value for BuiltinOp");
871 case Builtin::BI__builtin_add_overflow:
872 case Builtin::BI__builtin_sadd_overflow:
873 case Builtin::BI__builtin_saddl_overflow:
874 case Builtin::BI__builtin_saddll_overflow:
875 case Builtin::BI__builtin_uadd_overflow:
876 case Builtin::BI__builtin_uaddl_overflow:
877 case Builtin::BI__builtin_uaddll_overflow:
878 Result = LHS.isSigned() ? LHS.sadd_ov(RHS, Overflow)
879 : LHS.uadd_ov(RHS, Overflow);
880 break;
881 case Builtin::BI__builtin_sub_overflow:
882 case Builtin::BI__builtin_ssub_overflow:
883 case Builtin::BI__builtin_ssubl_overflow:
884 case Builtin::BI__builtin_ssubll_overflow:
885 case Builtin::BI__builtin_usub_overflow:
886 case Builtin::BI__builtin_usubl_overflow:
887 case Builtin::BI__builtin_usubll_overflow:
888 Result = LHS.isSigned() ? LHS.ssub_ov(RHS, Overflow)
889 : LHS.usub_ov(RHS, Overflow);
890 break;
891 case Builtin::BI__builtin_mul_overflow:
892 case Builtin::BI__builtin_smul_overflow:
893 case Builtin::BI__builtin_smull_overflow:
894 case Builtin::BI__builtin_smulll_overflow:
895 case Builtin::BI__builtin_umul_overflow:
896 case Builtin::BI__builtin_umull_overflow:
897 case Builtin::BI__builtin_umulll_overflow:
898 Result = LHS.isSigned() ? LHS.smul_ov(RHS, Overflow)
899 : LHS.umul_ov(RHS, Overflow);
900 break;
901 }
902
903 // In the case where multiple sizes are allowed, truncate and see if
904 // the values are the same.
905 if (BuiltinOp == Builtin::BI__builtin_add_overflow ||
906 BuiltinOp == Builtin::BI__builtin_sub_overflow ||
907 BuiltinOp == Builtin::BI__builtin_mul_overflow) {
908 // APSInt doesn't have a TruncOrSelf, so we use extOrTrunc instead,
909 // since it will give us the behavior of a TruncOrSelf in the case where
910 // its parameter <= its size. We previously set Result to be at least the
911 // type-size of the result, so getTypeSize(ResultType) <= Resu
912 APSInt Temp = Result.extOrTrunc(width: S.getASTContext().getTypeSize(T: ResultType));
913 Temp.setIsSigned(ResultType->isSignedIntegerOrEnumerationType());
914
915 if (!APSInt::isSameValue(I1: Temp, I2: Result))
916 Overflow = true;
917 Result = std::move(Temp);
918 }
919
920 // Write Result to ResultPtr and put Overflow on the stack.
921 assignInteger(S, Dest: ResultPtr, ValueT: ResultT, Value: Result);
922 if (ResultPtr.canBeInitialized())
923 ResultPtr.initialize();
924
925 assert(Call->getDirectCallee()->getReturnType()->isBooleanType());
926 S.Stk.push<Boolean>(Args&: Overflow);
927 return true;
928}
929
930/// Three integral values followed by a pointer (lhs, rhs, carry, carryOut).
931static bool interp__builtin_carryop(InterpState &S, CodePtr OpPC,
932 const InterpFrame *Frame,
933 const CallExpr *Call, unsigned BuiltinOp) {
934 const Pointer &CarryOutPtr = S.Stk.pop<Pointer>();
935 PrimType LHST = *S.getContext().classify(T: Call->getArg(Arg: 0)->getType());
936 PrimType RHST = *S.getContext().classify(T: Call->getArg(Arg: 1)->getType());
937 APSInt CarryIn = popToAPSInt(Stk&: S.Stk, T: LHST);
938 APSInt RHS = popToAPSInt(Stk&: S.Stk, T: RHST);
939 APSInt LHS = popToAPSInt(Stk&: S.Stk, T: LHST);
940
941 if (CarryOutPtr.isDummy() || !CarryOutPtr.isBlockPointer())
942 return false;
943
944 APSInt CarryOut;
945
946 APSInt Result;
947 // Copy the number of bits and sign.
948 Result = LHS;
949 CarryOut = LHS;
950
951 bool FirstOverflowed = false;
952 bool SecondOverflowed = false;
953 switch (BuiltinOp) {
954 default:
955 llvm_unreachable("Invalid value for BuiltinOp");
956 case Builtin::BI__builtin_addcb:
957 case Builtin::BI__builtin_addcs:
958 case Builtin::BI__builtin_addc:
959 case Builtin::BI__builtin_addcl:
960 case Builtin::BI__builtin_addcll:
961 Result =
962 LHS.uadd_ov(RHS, Overflow&: FirstOverflowed).uadd_ov(RHS: CarryIn, Overflow&: SecondOverflowed);
963 break;
964 case Builtin::BI__builtin_subcb:
965 case Builtin::BI__builtin_subcs:
966 case Builtin::BI__builtin_subc:
967 case Builtin::BI__builtin_subcl:
968 case Builtin::BI__builtin_subcll:
969 Result =
970 LHS.usub_ov(RHS, Overflow&: FirstOverflowed).usub_ov(RHS: CarryIn, Overflow&: SecondOverflowed);
971 break;
972 }
973 // It is possible for both overflows to happen but CGBuiltin uses an OR so
974 // this is consistent.
975 CarryOut = (uint64_t)(FirstOverflowed | SecondOverflowed);
976
977 QualType CarryOutType = Call->getArg(Arg: 3)->getType()->getPointeeType();
978 PrimType CarryOutT = *S.getContext().classify(T: CarryOutType);
979 assignInteger(S, Dest: CarryOutPtr, ValueT: CarryOutT, Value: CarryOut);
980 CarryOutPtr.initialize();
981
982 assert(Call->getType() == Call->getArg(0)->getType());
983 pushInteger(S, Val: Result, QT: Call->getType());
984 return true;
985}
986
987static bool interp__builtin_clz(InterpState &S, CodePtr OpPC,
988 const InterpFrame *Frame, const CallExpr *Call,
989 unsigned BuiltinOp) {
990
991 std::optional<APSInt> Fallback;
992 if (BuiltinOp == Builtin::BI__builtin_clzg && Call->getNumArgs() == 2)
993 Fallback = popToAPSInt(S, E: Call->getArg(Arg: 1));
994
995 APSInt Val;
996 if (Call->getArg(Arg: 0)->getType()->isExtVectorBoolType()) {
997 const Pointer &Arg = S.Stk.pop<Pointer>();
998 Val = convertBoolVectorToInt(Val: Arg);
999 } else {
1000 Val = popToAPSInt(S, E: Call->getArg(Arg: 0));
1001 }
1002
1003 // When the argument is 0, the result of GCC builtins is undefined, whereas
1004 // for Microsoft intrinsics, the result is the bit-width of the argument.
1005 bool ZeroIsUndefined = BuiltinOp != Builtin::BI__lzcnt16 &&
1006 BuiltinOp != Builtin::BI__lzcnt &&
1007 BuiltinOp != Builtin::BI__lzcnt64;
1008
1009 if (Val == 0) {
1010 if (Fallback) {
1011 pushInteger(S, Val: *Fallback, QT: Call->getType());
1012 return true;
1013 }
1014
1015 if (ZeroIsUndefined)
1016 return false;
1017 }
1018
1019 pushInteger(S, Val: Val.countl_zero(), QT: Call->getType());
1020 return true;
1021}
1022
1023static bool interp__builtin_ctz(InterpState &S, CodePtr OpPC,
1024 const InterpFrame *Frame, const CallExpr *Call,
1025 unsigned BuiltinID) {
1026 std::optional<APSInt> Fallback;
1027 if (BuiltinID == Builtin::BI__builtin_ctzg && Call->getNumArgs() == 2)
1028 Fallback = popToAPSInt(S, E: Call->getArg(Arg: 1));
1029
1030 APSInt Val;
1031 if (Call->getArg(Arg: 0)->getType()->isExtVectorBoolType()) {
1032 const Pointer &Arg = S.Stk.pop<Pointer>();
1033 Val = convertBoolVectorToInt(Val: Arg);
1034 } else {
1035 Val = popToAPSInt(S, E: Call->getArg(Arg: 0));
1036 }
1037
1038 if (Val == 0) {
1039 if (Fallback) {
1040 pushInteger(S, Val: *Fallback, QT: Call->getType());
1041 return true;
1042 }
1043 return false;
1044 }
1045
1046 pushInteger(S, Val: Val.countr_zero(), QT: Call->getType());
1047 return true;
1048}
1049
1050static bool interp__builtin_bswap(InterpState &S, CodePtr OpPC,
1051 const InterpFrame *Frame,
1052 const CallExpr *Call) {
1053 const APSInt &Val = popToAPSInt(S, E: Call->getArg(Arg: 0));
1054 if (Val.getBitWidth() == 8 || Val.getBitWidth() == 1)
1055 pushInteger(S, Val, QT: Call->getType());
1056 else
1057 pushInteger(S, Val: Val.byteSwap(), QT: Call->getType());
1058 return true;
1059}
1060
1061/// bool __atomic_always_lock_free(size_t, void const volatile*)
1062/// bool __atomic_is_lock_free(size_t, void const volatile*)
1063static bool interp__builtin_atomic_lock_free(InterpState &S, CodePtr OpPC,
1064 const InterpFrame *Frame,
1065 const CallExpr *Call,
1066 unsigned BuiltinOp) {
1067 auto returnBool = [&S](bool Value) -> bool {
1068 S.Stk.push<Boolean>(Args&: Value);
1069 return true;
1070 };
1071
1072 const Pointer &Ptr = S.Stk.pop<Pointer>();
1073 uint64_t SizeVal = popToUInt64(S, E: Call->getArg(Arg: 0));
1074
1075 // For __atomic_is_lock_free(sizeof(_Atomic(T))), if the size is a power
1076 // of two less than or equal to the maximum inline atomic width, we know it
1077 // is lock-free. If the size isn't a power of two, or greater than the
1078 // maximum alignment where we promote atomics, we know it is not lock-free
1079 // (at least not in the sense of atomic_is_lock_free). Otherwise,
1080 // the answer can only be determined at runtime; for example, 16-byte
1081 // atomics have lock-free implementations on some, but not all,
1082 // x86-64 processors.
1083
1084 // Check power-of-two.
1085 CharUnits Size = CharUnits::fromQuantity(Quantity: SizeVal);
1086 if (Size.isPowerOfTwo()) {
1087 // Check against inlining width.
1088 unsigned InlineWidthBits =
1089 S.getASTContext().getTargetInfo().getMaxAtomicInlineWidth();
1090 if (Size <= S.getASTContext().toCharUnitsFromBits(BitSize: InlineWidthBits)) {
1091
1092 // OK, we will inline appropriately-aligned operations of this size,
1093 // and _Atomic(T) is appropriately-aligned.
1094 if (Size == CharUnits::One())
1095 return returnBool(true);
1096
1097 // Same for null pointers.
1098 assert(BuiltinOp != Builtin::BI__c11_atomic_is_lock_free);
1099 if (Ptr.isZero())
1100 return returnBool(true);
1101
1102 if (Ptr.isIntegralPointer()) {
1103 uint64_t IntVal = Ptr.getIntegerRepresentation();
1104 if (APSInt(APInt(64, IntVal, false), true).isAligned(A: Size.getAsAlign()))
1105 return returnBool(true);
1106 }
1107
1108 const Expr *PtrArg = Call->getArg(Arg: 1);
1109 // Otherwise, check if the type's alignment against Size.
1110 if (const auto *ICE = dyn_cast<ImplicitCastExpr>(Val: PtrArg)) {
1111 // Drop the potential implicit-cast to 'const volatile void*', getting
1112 // the underlying type.
1113 if (ICE->getCastKind() == CK_BitCast)
1114 PtrArg = ICE->getSubExpr();
1115 }
1116
1117 if (const auto *PtrTy = PtrArg->getType()->getAs<PointerType>()) {
1118 QualType PointeeType = PtrTy->getPointeeType();
1119 if (!PointeeType->isIncompleteType() &&
1120 S.getASTContext().getTypeAlignInChars(T: PointeeType) >= Size) {
1121 // OK, we will inline operations on this object.
1122 return returnBool(true);
1123 }
1124 }
1125 }
1126 }
1127
1128 if (BuiltinOp == Builtin::BI__atomic_always_lock_free)
1129 return returnBool(false);
1130
1131 return Invalid(S, OpPC);
1132}
1133
1134/// bool __c11_atomic_is_lock_free(size_t)
1135static bool interp__builtin_c11_atomic_is_lock_free(InterpState &S,
1136 CodePtr OpPC,
1137 const InterpFrame *Frame,
1138 const CallExpr *Call) {
1139 uint64_t SizeVal = popToUInt64(S, E: Call->getArg(Arg: 0));
1140
1141 CharUnits Size = CharUnits::fromQuantity(Quantity: SizeVal);
1142 if (Size.isPowerOfTwo()) {
1143 // Check against inlining width.
1144 unsigned InlineWidthBits =
1145 S.getASTContext().getTargetInfo().getMaxAtomicInlineWidth();
1146 if (Size <= S.getASTContext().toCharUnitsFromBits(BitSize: InlineWidthBits)) {
1147 S.Stk.push<Boolean>(Args: true);
1148 return true;
1149 }
1150 }
1151
1152 return false; // returnBool(false);
1153}
1154
1155/// __builtin_complex(Float A, float B);
1156static bool interp__builtin_complex(InterpState &S, CodePtr OpPC,
1157 const InterpFrame *Frame,
1158 const CallExpr *Call) {
1159 const Floating &Arg2 = S.Stk.pop<Floating>();
1160 const Floating &Arg1 = S.Stk.pop<Floating>();
1161 Pointer &Result = S.Stk.peek<Pointer>();
1162
1163 Result.elem<Floating>(I: 0) = Arg1;
1164 Result.elem<Floating>(I: 1) = Arg2;
1165 Result.initializeAllElements();
1166
1167 return true;
1168}
1169
1170/// __builtin_is_aligned()
1171/// __builtin_align_up()
1172/// __builtin_align_down()
1173/// The first parameter is either an integer or a pointer.
1174/// The second parameter is the requested alignment as an integer.
1175static bool interp__builtin_is_aligned_up_down(InterpState &S, CodePtr OpPC,
1176 const InterpFrame *Frame,
1177 const CallExpr *Call,
1178 unsigned BuiltinOp) {
1179 const APSInt &Alignment = popToAPSInt(S, E: Call->getArg(Arg: 1));
1180
1181 if (Alignment < 0 || !Alignment.isPowerOf2()) {
1182 S.FFDiag(E: Call, DiagId: diag::note_constexpr_invalid_alignment) << Alignment;
1183 return false;
1184 }
1185 unsigned SrcWidth = S.getASTContext().getIntWidth(T: Call->getArg(Arg: 0)->getType());
1186 APSInt MaxValue(APInt::getOneBitSet(numBits: SrcWidth, BitNo: SrcWidth - 1));
1187 if (APSInt::compareValues(I1: Alignment, I2: MaxValue) > 0) {
1188 S.FFDiag(E: Call, DiagId: diag::note_constexpr_alignment_too_big)
1189 << MaxValue << Call->getArg(Arg: 0)->getType() << Alignment;
1190 return false;
1191 }
1192
1193 // The first parameter is either an integer or a pointer.
1194 PrimType FirstArgT = *S.Ctx.classify(E: Call->getArg(Arg: 0));
1195
1196 if (isIntegerType(T: FirstArgT)) {
1197 const APSInt &Src = popToAPSInt(Stk&: S.Stk, T: FirstArgT);
1198 APInt AlignMinusOne = Alignment.extOrTrunc(width: Src.getBitWidth()) - 1;
1199 if (BuiltinOp == Builtin::BI__builtin_align_up) {
1200 APSInt AlignedVal =
1201 APSInt((Src + AlignMinusOne) & ~AlignMinusOne, Src.isUnsigned());
1202 pushInteger(S, Val: AlignedVal, QT: Call->getType());
1203 } else if (BuiltinOp == Builtin::BI__builtin_align_down) {
1204 APSInt AlignedVal = APSInt(Src & ~AlignMinusOne, Src.isUnsigned());
1205 pushInteger(S, Val: AlignedVal, QT: Call->getType());
1206 } else {
1207 assert(*S.Ctx.classify(Call->getType()) == PT_Bool);
1208 S.Stk.push<Boolean>(Args: (Src & AlignMinusOne) == 0);
1209 }
1210 return true;
1211 }
1212 assert(FirstArgT == PT_Ptr);
1213 const Pointer &Ptr = S.Stk.pop<Pointer>();
1214 if (!Ptr.isBlockPointer())
1215 return false;
1216
1217 // For one-past-end pointers, we can't call getIndex() since it asserts.
1218 // Use getNumElems() instead which gives the correct index for past-end.
1219 unsigned PtrOffset =
1220 Ptr.isElementPastEnd() ? Ptr.getNumElems() : Ptr.getIndex();
1221 CharUnits BaseAlignment =
1222 S.getASTContext().getDeclAlign(D: Ptr.getDeclDesc()->asValueDecl());
1223 CharUnits PtrAlign =
1224 BaseAlignment.alignmentAtOffset(offset: CharUnits::fromQuantity(Quantity: PtrOffset));
1225
1226 if (BuiltinOp == Builtin::BI__builtin_is_aligned) {
1227 if (PtrAlign.getQuantity() >= Alignment) {
1228 S.Stk.push<Boolean>(Args: true);
1229 return true;
1230 }
1231 // If the alignment is not known to be sufficient, some cases could still
1232 // be aligned at run time. However, if the requested alignment is less or
1233 // equal to the base alignment and the offset is not aligned, we know that
1234 // the run-time value can never be aligned.
1235 if (BaseAlignment.getQuantity() >= Alignment &&
1236 PtrAlign.getQuantity() < Alignment) {
1237 S.Stk.push<Boolean>(Args: false);
1238 return true;
1239 }
1240
1241 S.FFDiag(E: Call->getArg(Arg: 0), DiagId: diag::note_constexpr_alignment_compute)
1242 << Alignment;
1243 return false;
1244 }
1245
1246 assert(BuiltinOp == Builtin::BI__builtin_align_down ||
1247 BuiltinOp == Builtin::BI__builtin_align_up);
1248
1249 // For align_up/align_down, we can return the same value if the alignment
1250 // is known to be greater or equal to the requested value.
1251 if (PtrAlign.getQuantity() >= Alignment) {
1252 S.Stk.push<Pointer>(Args: Ptr);
1253 return true;
1254 }
1255
1256 // The alignment could be greater than the minimum at run-time, so we cannot
1257 // infer much about the resulting pointer value. One case is possible:
1258 // For `_Alignas(32) char buf[N]; __builtin_align_down(&buf[idx], 32)` we
1259 // can infer the correct index if the requested alignment is smaller than
1260 // the base alignment so we can perform the computation on the offset.
1261 if (BaseAlignment.getQuantity() >= Alignment) {
1262 assert(Alignment.getBitWidth() <= 64 &&
1263 "Cannot handle > 64-bit address-space");
1264 uint64_t Alignment64 = Alignment.getZExtValue();
1265 CharUnits NewOffset =
1266 CharUnits::fromQuantity(Quantity: BuiltinOp == Builtin::BI__builtin_align_down
1267 ? llvm::alignDown(Value: PtrOffset, Align: Alignment64)
1268 : llvm::alignTo(Value: PtrOffset, Align: Alignment64));
1269
1270 S.Stk.push<Pointer>(Args: Ptr.atIndex(Idx: NewOffset.getQuantity()));
1271 return true;
1272 }
1273
1274 // Otherwise, we cannot constant-evaluate the result.
1275 S.FFDiag(E: Call->getArg(Arg: 0), DiagId: diag::note_constexpr_alignment_adjust) << Alignment;
1276 return false;
1277}
1278
1279/// __builtin_assume_aligned(Ptr, Alignment[, ExtraOffset])
1280static bool interp__builtin_assume_aligned(InterpState &S, CodePtr OpPC,
1281 const InterpFrame *Frame,
1282 const CallExpr *Call) {
1283 assert(Call->getNumArgs() == 2 || Call->getNumArgs() == 3);
1284
1285 std::optional<APSInt> ExtraOffset;
1286 if (Call->getNumArgs() == 3)
1287 ExtraOffset = popToAPSInt(Stk&: S.Stk, T: *S.Ctx.classify(E: Call->getArg(Arg: 2)));
1288
1289 APSInt Alignment = popToAPSInt(Stk&: S.Stk, T: *S.Ctx.classify(E: Call->getArg(Arg: 1)));
1290 const Pointer &Ptr = S.Stk.pop<Pointer>();
1291
1292 CharUnits Align = CharUnits::fromQuantity(Quantity: Alignment.getZExtValue());
1293
1294 // If there is a base object, then it must have the correct alignment.
1295 if (Ptr.isBlockPointer()) {
1296 CharUnits BaseAlignment;
1297 if (const auto *VD = Ptr.getDeclDesc()->asValueDecl())
1298 BaseAlignment = S.getASTContext().getDeclAlign(D: VD);
1299 else if (const auto *E = Ptr.getDeclDesc()->asExpr())
1300 BaseAlignment = GetAlignOfExpr(Ctx: S.getASTContext(), E, ExprKind: UETT_AlignOf);
1301
1302 if (BaseAlignment < Align) {
1303 S.CCEDiag(E: Call->getArg(Arg: 0),
1304 DiagId: diag::note_constexpr_baa_insufficient_alignment)
1305 << 0 << BaseAlignment.getQuantity() << Align.getQuantity();
1306 return false;
1307 }
1308 }
1309
1310 APValue AV = Ptr.toAPValue(ASTCtx: S.getASTContext());
1311 CharUnits AVOffset = AV.getLValueOffset();
1312 if (ExtraOffset)
1313 AVOffset -= CharUnits::fromQuantity(Quantity: ExtraOffset->getZExtValue());
1314 if (AVOffset.alignTo(Align) != AVOffset) {
1315 if (Ptr.isBlockPointer())
1316 S.CCEDiag(E: Call->getArg(Arg: 0),
1317 DiagId: diag::note_constexpr_baa_insufficient_alignment)
1318 << 1 << AVOffset.getQuantity() << Align.getQuantity();
1319 else
1320 S.CCEDiag(E: Call->getArg(Arg: 0),
1321 DiagId: diag::note_constexpr_baa_value_insufficient_alignment)
1322 << AVOffset.getQuantity() << Align.getQuantity();
1323 return false;
1324 }
1325
1326 S.Stk.push<Pointer>(Args: Ptr);
1327 return true;
1328}
1329
1330/// (CarryIn, LHS, RHS, Result)
1331static bool interp__builtin_ia32_addcarry_subborrow(InterpState &S,
1332 CodePtr OpPC,
1333 const InterpFrame *Frame,
1334 const CallExpr *Call,
1335 unsigned BuiltinOp) {
1336 if (Call->getNumArgs() != 4 || !Call->getArg(Arg: 0)->getType()->isIntegerType() ||
1337 !Call->getArg(Arg: 1)->getType()->isIntegerType() ||
1338 !Call->getArg(Arg: 2)->getType()->isIntegerType())
1339 return false;
1340
1341 const Pointer &CarryOutPtr = S.Stk.pop<Pointer>();
1342
1343 APSInt RHS = popToAPSInt(S, E: Call->getArg(Arg: 2));
1344 APSInt LHS = popToAPSInt(S, E: Call->getArg(Arg: 1));
1345 APSInt CarryIn = popToAPSInt(S, E: Call->getArg(Arg: 0));
1346
1347 bool IsAdd = BuiltinOp == clang::X86::BI__builtin_ia32_addcarryx_u32 ||
1348 BuiltinOp == clang::X86::BI__builtin_ia32_addcarryx_u64;
1349
1350 unsigned BitWidth = LHS.getBitWidth();
1351 unsigned CarryInBit = CarryIn.ugt(RHS: 0) ? 1 : 0;
1352 APInt ExResult =
1353 IsAdd ? (LHS.zext(width: BitWidth + 1) + (RHS.zext(width: BitWidth + 1) + CarryInBit))
1354 : (LHS.zext(width: BitWidth + 1) - (RHS.zext(width: BitWidth + 1) + CarryInBit));
1355
1356 APInt Result = ExResult.extractBits(numBits: BitWidth, bitPosition: 0);
1357 APSInt CarryOut =
1358 APSInt(ExResult.extractBits(numBits: 1, bitPosition: BitWidth), /*IsUnsigned=*/true);
1359
1360 QualType CarryOutType = Call->getArg(Arg: 3)->getType()->getPointeeType();
1361 PrimType CarryOutT = *S.getContext().classify(T: CarryOutType);
1362 assignInteger(S, Dest: CarryOutPtr, ValueT: CarryOutT, Value: APSInt(std::move(Result), true));
1363
1364 pushInteger(S, Val: CarryOut, QT: Call->getType());
1365
1366 return true;
1367}
1368
1369static bool interp__builtin_os_log_format_buffer_size(InterpState &S,
1370 CodePtr OpPC,
1371 const InterpFrame *Frame,
1372 const CallExpr *Call) {
1373 analyze_os_log::OSLogBufferLayout Layout;
1374 analyze_os_log::computeOSLogBufferLayout(Ctx&: S.getASTContext(), E: Call, layout&: Layout);
1375 pushInteger(S, Val: Layout.size().getQuantity(), QT: Call->getType());
1376 return true;
1377}
1378
1379static bool
1380interp__builtin_ptrauth_string_discriminator(InterpState &S, CodePtr OpPC,
1381 const InterpFrame *Frame,
1382 const CallExpr *Call) {
1383 const auto &Ptr = S.Stk.pop<Pointer>();
1384 assert(Ptr.getFieldDesc()->isPrimitiveArray());
1385
1386 // This should be created for a StringLiteral, so should alway shold at least
1387 // one array element.
1388 assert(Ptr.getFieldDesc()->getNumElems() >= 1);
1389 StringRef R(&Ptr.deref<char>(), Ptr.getFieldDesc()->getNumElems() - 1);
1390 uint64_t Result = getPointerAuthStableSipHash(S: R);
1391 pushInteger(S, Val: Result, QT: Call->getType());
1392 return true;
1393}
1394
1395static bool interp__builtin_infer_alloc_token(InterpState &S, CodePtr OpPC,
1396 const InterpFrame *Frame,
1397 const CallExpr *Call) {
1398 const ASTContext &ASTCtx = S.getASTContext();
1399 uint64_t BitWidth = ASTCtx.getTypeSize(T: ASTCtx.getSizeType());
1400 auto Mode =
1401 ASTCtx.getLangOpts().AllocTokenMode.value_or(u: llvm::DefaultAllocTokenMode);
1402 auto MaxTokensOpt = ASTCtx.getLangOpts().AllocTokenMax;
1403 uint64_t MaxTokens =
1404 MaxTokensOpt.value_or(u: 0) ? *MaxTokensOpt : (~0ULL >> (64 - BitWidth));
1405
1406 // We do not read any of the arguments; discard them.
1407 for (int I = Call->getNumArgs() - 1; I >= 0; --I)
1408 discard(Stk&: S.Stk, T: S.getContext().classify(E: Call->getArg(Arg: I)).value_or(PT: PT_Ptr));
1409
1410 // Note: Type inference from a surrounding cast is not supported in
1411 // constexpr evaluation.
1412 QualType AllocType = infer_alloc::inferPossibleType(E: Call, Ctx: ASTCtx, CastE: nullptr);
1413 if (AllocType.isNull()) {
1414 S.CCEDiag(E: Call,
1415 DiagId: diag::note_constexpr_infer_alloc_token_type_inference_failed);
1416 return false;
1417 }
1418
1419 auto ATMD = infer_alloc::getAllocTokenMetadata(T: AllocType, Ctx: ASTCtx);
1420 if (!ATMD) {
1421 S.CCEDiag(E: Call, DiagId: diag::note_constexpr_infer_alloc_token_no_metadata);
1422 return false;
1423 }
1424
1425 auto MaybeToken = llvm::getAllocToken(Mode, Metadata: *ATMD, MaxTokens);
1426 if (!MaybeToken) {
1427 S.CCEDiag(E: Call, DiagId: diag::note_constexpr_infer_alloc_token_stateful_mode);
1428 return false;
1429 }
1430
1431 pushInteger(S, Val: llvm::APInt(BitWidth, *MaybeToken), QT: ASTCtx.getSizeType());
1432 return true;
1433}
1434
1435static bool interp__builtin_operator_new(InterpState &S, CodePtr OpPC,
1436 const InterpFrame *Frame,
1437 const CallExpr *Call) {
1438 // A call to __operator_new is only valid within std::allocate<>::allocate.
1439 // Walk up the call stack to find the appropriate caller and get the
1440 // element type from it.
1441 auto [NewCall, ElemType] = S.getStdAllocatorCaller(Name: "allocate");
1442
1443 if (ElemType.isNull()) {
1444 S.FFDiag(E: Call, DiagId: S.getLangOpts().CPlusPlus20
1445 ? diag::note_constexpr_new_untyped
1446 : diag::note_constexpr_new);
1447 return false;
1448 }
1449 assert(NewCall);
1450
1451 if (ElemType->isIncompleteType() || ElemType->isFunctionType()) {
1452 S.FFDiag(E: Call, DiagId: diag::note_constexpr_new_not_complete_object_type)
1453 << (ElemType->isIncompleteType() ? 0 : 1) << ElemType;
1454 return false;
1455 }
1456
1457 // We only care about the first parameter (the size), so discard all the
1458 // others.
1459 {
1460 unsigned NumArgs = Call->getNumArgs();
1461 assert(NumArgs >= 1);
1462
1463 // The std::nothrow_t arg never gets put on the stack.
1464 if (Call->getArg(Arg: NumArgs - 1)->getType()->isNothrowT())
1465 --NumArgs;
1466 auto Args = ArrayRef(Call->getArgs(), Call->getNumArgs());
1467 // First arg is needed.
1468 Args = Args.drop_front();
1469
1470 // Discard the rest.
1471 for (const Expr *Arg : Args)
1472 discard(Stk&: S.Stk, T: *S.getContext().classify(E: Arg));
1473 }
1474
1475 APSInt Bytes = popToAPSInt(S, E: Call->getArg(Arg: 0));
1476 CharUnits ElemSize = S.getASTContext().getTypeSizeInChars(T: ElemType);
1477 assert(!ElemSize.isZero());
1478 // Divide the number of bytes by sizeof(ElemType), so we get the number of
1479 // elements we should allocate.
1480 APInt NumElems, Remainder;
1481 APInt ElemSizeAP(Bytes.getBitWidth(), ElemSize.getQuantity());
1482 APInt::udivrem(LHS: Bytes, RHS: ElemSizeAP, Quotient&: NumElems, Remainder);
1483 if (Remainder != 0) {
1484 // This likely indicates a bug in the implementation of 'std::allocator'.
1485 S.FFDiag(E: Call, DiagId: diag::note_constexpr_operator_new_bad_size)
1486 << Bytes << APSInt(ElemSizeAP, true) << ElemType;
1487 return false;
1488 }
1489
1490 // NB: The same check we're using in CheckArraySize()
1491 if (NumElems.getActiveBits() >
1492 ConstantArrayType::getMaxSizeBits(Context: S.getASTContext()) ||
1493 NumElems.ugt(RHS: Descriptor::MaxArrayElemBytes / ElemSize.getQuantity())) {
1494 // FIXME: NoThrow check?
1495 const SourceInfo &Loc = S.Current->getSource(PC: OpPC);
1496 S.FFDiag(SI: Loc, DiagId: diag::note_constexpr_new_too_large)
1497 << NumElems.getZExtValue();
1498 return false;
1499 }
1500
1501 if (!CheckArraySize(S, OpPC, NumElems: NumElems.getZExtValue()))
1502 return false;
1503
1504 bool IsArray = NumElems.ugt(RHS: 1);
1505 OptPrimType ElemT = S.getContext().classify(T: ElemType);
1506 DynamicAllocator &Allocator = S.getAllocator();
1507 if (ElemT) {
1508 Block *B =
1509 Allocator.allocate(Source: NewCall, T: *ElemT, NumElements: NumElems.getZExtValue(),
1510 EvalID: S.Ctx.getEvalID(), AllocForm: DynamicAllocator::Form::Operator);
1511 assert(B);
1512 S.Stk.push<Pointer>(Args: Pointer(B).atIndex(Idx: 0));
1513 return true;
1514 }
1515
1516 assert(!ElemT);
1517
1518 // Composite arrays
1519 if (IsArray) {
1520 const Descriptor *Desc =
1521 S.P.createDescriptor(D: NewCall, Ty: ElemType.getTypePtr(), MDSize: std::nullopt);
1522 Block *B =
1523 Allocator.allocate(D: Desc, NumElements: NumElems.getZExtValue(), EvalID: S.Ctx.getEvalID(),
1524 AllocForm: DynamicAllocator::Form::Operator);
1525 assert(B);
1526 S.Stk.push<Pointer>(Args: Pointer(B).atIndex(Idx: 0).narrow());
1527 return true;
1528 }
1529
1530 // Records. Still allocate them as single-element arrays.
1531 QualType AllocType = S.getASTContext().getConstantArrayType(
1532 EltTy: ElemType, ArySize: NumElems, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, IndexTypeQuals: 0);
1533
1534 const Descriptor *Desc = S.P.createDescriptor(D: NewCall, Ty: AllocType.getTypePtr(),
1535 MDSize: Descriptor::InlineDescMD);
1536 Block *B = Allocator.allocate(D: Desc, EvalID: S.getContext().getEvalID(),
1537 AllocForm: DynamicAllocator::Form::Operator);
1538 assert(B);
1539 S.Stk.push<Pointer>(Args: Pointer(B).atIndex(Idx: 0).narrow());
1540 return true;
1541}
1542
1543static bool interp__builtin_operator_delete(InterpState &S, CodePtr OpPC,
1544 const InterpFrame *Frame,
1545 const CallExpr *Call) {
1546 const Expr *Source = nullptr;
1547 const Block *BlockToDelete = nullptr;
1548
1549 if (S.checkingPotentialConstantExpression()) {
1550 S.Stk.discard<Pointer>();
1551 return false;
1552 }
1553
1554 // This is permitted only within a call to std::allocator<T>::deallocate.
1555 if (!S.getStdAllocatorCaller(Name: "deallocate")) {
1556 S.FFDiag(E: Call);
1557 S.Stk.discard<Pointer>();
1558 return true;
1559 }
1560
1561 {
1562 const Pointer &Ptr = S.Stk.pop<Pointer>();
1563
1564 if (Ptr.isZero()) {
1565 S.CCEDiag(E: Call, DiagId: diag::note_constexpr_deallocate_null);
1566 return true;
1567 }
1568
1569 Source = Ptr.getDeclDesc()->asExpr();
1570 BlockToDelete = Ptr.block();
1571
1572 if (!BlockToDelete->isDynamic()) {
1573 S.FFDiag(E: Call, DiagId: diag::note_constexpr_delete_not_heap_alloc)
1574 << Ptr.toDiagnosticString(Ctx: S.getASTContext());
1575 if (const auto *D = Ptr.getFieldDesc()->asDecl())
1576 S.Note(Loc: D->getLocation(), DiagId: diag::note_declared_at);
1577 }
1578 }
1579 assert(BlockToDelete);
1580
1581 DynamicAllocator &Allocator = S.getAllocator();
1582 const Descriptor *BlockDesc = BlockToDelete->getDescriptor();
1583 std::optional<DynamicAllocator::Form> AllocForm =
1584 Allocator.getAllocationForm(Source);
1585
1586 if (!Allocator.deallocate(Source, BlockToDelete, S)) {
1587 // Nothing has been deallocated, this must be a double-delete.
1588 const SourceInfo &Loc = S.Current->getSource(PC: OpPC);
1589 S.FFDiag(SI: Loc, DiagId: diag::note_constexpr_double_delete);
1590 return false;
1591 }
1592 assert(AllocForm);
1593
1594 return CheckNewDeleteForms(
1595 S, OpPC, AllocForm: *AllocForm, DeleteForm: DynamicAllocator::Form::Operator, D: BlockDesc, NewExpr: Source);
1596}
1597
1598static bool interp__builtin_arithmetic_fence(InterpState &S, CodePtr OpPC,
1599 const InterpFrame *Frame,
1600 const CallExpr *Call) {
1601 const Floating &Arg0 = S.Stk.pop<Floating>();
1602 S.Stk.push<Floating>(Args: Arg0);
1603 return true;
1604}
1605
1606static bool interp__builtin_vector_reduce(InterpState &S, CodePtr OpPC,
1607 const CallExpr *Call, unsigned ID) {
1608 const Pointer &Arg = S.Stk.pop<Pointer>();
1609 assert(Arg.getFieldDesc()->isPrimitiveArray());
1610
1611 QualType ElemType = Arg.getFieldDesc()->getElemQualType();
1612 assert(Call->getType() == ElemType);
1613 PrimType ElemT = *S.getContext().classify(T: ElemType);
1614 unsigned NumElems = Arg.getNumElems();
1615
1616 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
1617 T Result = Arg.elem<T>(0);
1618 unsigned BitWidth = Result.bitWidth();
1619 for (unsigned I = 1; I != NumElems; ++I) {
1620 T Elem = Arg.elem<T>(I);
1621 T PrevResult = Result;
1622
1623 if (ID == Builtin::BI__builtin_reduce_add) {
1624 if (T::add(Result, Elem, BitWidth, &Result)) {
1625 unsigned OverflowBits = BitWidth + 1;
1626 (void)handleOverflow(S, OpPC,
1627 (PrevResult.toAPSInt(OverflowBits) +
1628 Elem.toAPSInt(OverflowBits)));
1629 return false;
1630 }
1631 } else if (ID == Builtin::BI__builtin_reduce_mul) {
1632 if (T::mul(Result, Elem, BitWidth, &Result)) {
1633 unsigned OverflowBits = BitWidth * 2;
1634 (void)handleOverflow(S, OpPC,
1635 (PrevResult.toAPSInt(OverflowBits) *
1636 Elem.toAPSInt(OverflowBits)));
1637 return false;
1638 }
1639
1640 } else if (ID == Builtin::BI__builtin_reduce_and) {
1641 (void)T::bitAnd(Result, Elem, BitWidth, &Result);
1642 } else if (ID == Builtin::BI__builtin_reduce_or) {
1643 (void)T::bitOr(Result, Elem, BitWidth, &Result);
1644 } else if (ID == Builtin::BI__builtin_reduce_xor) {
1645 (void)T::bitXor(Result, Elem, BitWidth, &Result);
1646 } else if (ID == Builtin::BI__builtin_reduce_min) {
1647 if (Elem < Result)
1648 Result = Elem;
1649 } else if (ID == Builtin::BI__builtin_reduce_max) {
1650 if (Elem > Result)
1651 Result = Elem;
1652 } else {
1653 llvm_unreachable("Unhandled vector reduce builtin");
1654 }
1655 }
1656 pushInteger(S, Result.toAPSInt(), Call->getType());
1657 });
1658
1659 return true;
1660}
1661
1662static bool interp__builtin_elementwise_abs(InterpState &S, CodePtr OpPC,
1663 const InterpFrame *Frame,
1664 const CallExpr *Call,
1665 unsigned BuiltinID) {
1666 assert(Call->getNumArgs() == 1);
1667 QualType Ty = Call->getArg(Arg: 0)->getType();
1668 if (Ty->isIntegerType()) {
1669 APSInt Val = popToAPSInt(S, E: Call->getArg(Arg: 0));
1670 pushInteger(S, Val: Val.abs(), QT: Call->getType());
1671 return true;
1672 }
1673
1674 if (Ty->isFloatingType()) {
1675 Floating Val = S.Stk.pop<Floating>();
1676 Floating Result = abs(S, In: Val);
1677 S.Stk.push<Floating>(Args&: Result);
1678 return true;
1679 }
1680
1681 // Otherwise, the argument must be a vector.
1682 assert(Call->getArg(0)->getType()->isVectorType());
1683 const Pointer &Arg = S.Stk.pop<Pointer>();
1684 assert(Arg.getFieldDesc()->isPrimitiveArray());
1685 const Pointer &Dst = S.Stk.peek<Pointer>();
1686 assert(Dst.getFieldDesc()->isPrimitiveArray());
1687 assert(Arg.getFieldDesc()->getNumElems() ==
1688 Dst.getFieldDesc()->getNumElems());
1689
1690 QualType ElemType = Arg.getFieldDesc()->getElemQualType();
1691 PrimType ElemT = *S.getContext().classify(T: ElemType);
1692 unsigned NumElems = Arg.getNumElems();
1693 // we can either have a vector of integer or a vector of floating point
1694 for (unsigned I = 0; I != NumElems; ++I) {
1695 if (ElemType->isIntegerType()) {
1696 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
1697 Dst.elem<T>(I) = T::from(static_cast<T>(
1698 APSInt(Arg.elem<T>(I).toAPSInt().abs(),
1699 ElemType->isUnsignedIntegerOrEnumerationType())));
1700 });
1701 } else {
1702 Floating Val = Arg.elem<Floating>(I);
1703 Dst.elem<Floating>(I) = abs(S, In: Val);
1704 }
1705 }
1706 Dst.initializeAllElements();
1707
1708 return true;
1709}
1710
1711/// Can be called with an integer or vector as the first and only parameter.
1712static bool interp__builtin_elementwise_countzeroes(InterpState &S,
1713 CodePtr OpPC,
1714 const InterpFrame *Frame,
1715 const CallExpr *Call,
1716 unsigned BuiltinID) {
1717 bool HasZeroArg = Call->getNumArgs() == 2;
1718 bool IsCTTZ = BuiltinID == Builtin::BI__builtin_elementwise_ctzg;
1719 assert(Call->getNumArgs() == 1 || HasZeroArg);
1720 if (Call->getArg(Arg: 0)->getType()->isIntegerType()) {
1721 PrimType ArgT = *S.getContext().classify(T: Call->getArg(Arg: 0)->getType());
1722 APSInt Val = popToAPSInt(Stk&: S.Stk, T: ArgT);
1723 std::optional<APSInt> ZeroVal;
1724 if (HasZeroArg) {
1725 ZeroVal = Val;
1726 Val = popToAPSInt(Stk&: S.Stk, T: ArgT);
1727 }
1728
1729 if (Val.isZero()) {
1730 if (ZeroVal) {
1731 pushInteger(S, Val: *ZeroVal, QT: Call->getType());
1732 return true;
1733 }
1734 // If we haven't been provided the second argument, the result is
1735 // undefined
1736 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1737 DiagId: diag::note_constexpr_countzeroes_zero)
1738 << /*IsTrailing=*/IsCTTZ;
1739 return false;
1740 }
1741
1742 if (BuiltinID == Builtin::BI__builtin_elementwise_clzg) {
1743 pushInteger(S, Val: Val.countLeadingZeros(), QT: Call->getType());
1744 } else {
1745 pushInteger(S, Val: Val.countTrailingZeros(), QT: Call->getType());
1746 }
1747 return true;
1748 }
1749 // Otherwise, the argument must be a vector.
1750 const ASTContext &ASTCtx = S.getASTContext();
1751 Pointer ZeroArg;
1752 if (HasZeroArg) {
1753 assert(Call->getArg(1)->getType()->isVectorType() &&
1754 ASTCtx.hasSameUnqualifiedType(Call->getArg(0)->getType(),
1755 Call->getArg(1)->getType()));
1756 (void)ASTCtx;
1757 ZeroArg = S.Stk.pop<Pointer>();
1758 assert(ZeroArg.getFieldDesc()->isPrimitiveArray());
1759 }
1760 assert(Call->getArg(0)->getType()->isVectorType());
1761 const Pointer &Arg = S.Stk.pop<Pointer>();
1762 assert(Arg.getFieldDesc()->isPrimitiveArray());
1763 const Pointer &Dst = S.Stk.peek<Pointer>();
1764 assert(Dst.getFieldDesc()->isPrimitiveArray());
1765 assert(Arg.getFieldDesc()->getNumElems() ==
1766 Dst.getFieldDesc()->getNumElems());
1767
1768 QualType ElemType = Arg.getFieldDesc()->getElemQualType();
1769 PrimType ElemT = *S.getContext().classify(T: ElemType);
1770 unsigned NumElems = Arg.getNumElems();
1771
1772 // FIXME: Reading from uninitialized vector elements?
1773 for (unsigned I = 0; I != NumElems; ++I) {
1774 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
1775 APInt EltVal = Arg.atIndex(I).deref<T>().toAPSInt();
1776 if (EltVal.isZero()) {
1777 if (HasZeroArg) {
1778 Dst.atIndex(I).deref<T>() = ZeroArg.atIndex(I).deref<T>();
1779 } else {
1780 // If we haven't been provided the second argument, the result is
1781 // undefined
1782 S.FFDiag(S.Current->getSource(OpPC),
1783 diag::note_constexpr_countzeroes_zero)
1784 << /*IsTrailing=*/IsCTTZ;
1785 return false;
1786 }
1787 } else if (IsCTTZ) {
1788 Dst.atIndex(I).deref<T>() = T::from(EltVal.countTrailingZeros());
1789 } else {
1790 Dst.atIndex(I).deref<T>() = T::from(EltVal.countLeadingZeros());
1791 }
1792 Dst.atIndex(I).initialize();
1793 });
1794 }
1795
1796 return true;
1797}
1798
1799static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC,
1800 const InterpFrame *Frame,
1801 const CallExpr *Call, unsigned ID) {
1802 assert(Call->getNumArgs() == 3);
1803 const ASTContext &ASTCtx = S.getASTContext();
1804 uint64_t Size = popToUInt64(S, E: Call->getArg(Arg: 2));
1805 Pointer SrcPtr = S.Stk.pop<Pointer>().expand();
1806 Pointer DestPtr = S.Stk.pop<Pointer>().expand();
1807
1808 if (ID == Builtin::BImemcpy || ID == Builtin::BImemmove)
1809 diagnoseNonConstexprBuiltin(S, OpPC, ID);
1810
1811 bool Move =
1812 (ID == Builtin::BI__builtin_memmove || ID == Builtin::BImemmove ||
1813 ID == Builtin::BI__builtin_wmemmove || ID == Builtin::BIwmemmove);
1814 bool WChar = ID == Builtin::BIwmemcpy || ID == Builtin::BIwmemmove ||
1815 ID == Builtin::BI__builtin_wmemcpy ||
1816 ID == Builtin::BI__builtin_wmemmove;
1817
1818 // If the size is zero, we treat this as always being a valid no-op.
1819 if (Size == 0) {
1820 S.Stk.push<Pointer>(Args&: DestPtr);
1821 return true;
1822 }
1823
1824 if (SrcPtr.isZero() || DestPtr.isZero()) {
1825 Pointer DiagPtr = (SrcPtr.isZero() ? SrcPtr : DestPtr);
1826 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_null)
1827 << /*IsMove=*/Move << /*IsWchar=*/WChar << !SrcPtr.isZero()
1828 << DiagPtr.toDiagnosticString(Ctx: ASTCtx);
1829 return false;
1830 }
1831
1832 // Diagnose integral src/dest pointers specially.
1833 if (SrcPtr.isIntegralPointer() || DestPtr.isIntegralPointer()) {
1834 std::string DiagVal = "(void *)";
1835 DiagVal += SrcPtr.isIntegralPointer()
1836 ? std::to_string(val: SrcPtr.getIntegerRepresentation())
1837 : std::to_string(val: DestPtr.getIntegerRepresentation());
1838 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_null)
1839 << Move << WChar << DestPtr.isIntegralPointer() << DiagVal;
1840 return false;
1841 }
1842
1843 if (!isReadable(P: DestPtr) || !isReadable(P: SrcPtr))
1844 return false;
1845
1846 if (DestPtr.getType()->isIncompleteType()) {
1847 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1848 DiagId: diag::note_constexpr_memcpy_incomplete_type)
1849 << Move << DestPtr.getType();
1850 return false;
1851 }
1852 if (SrcPtr.getType()->isIncompleteType()) {
1853 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1854 DiagId: diag::note_constexpr_memcpy_incomplete_type)
1855 << Move << SrcPtr.getType();
1856 return false;
1857 }
1858
1859 QualType DestElemType = getElemType(P: DestPtr);
1860 if (DestElemType->isIncompleteType()) {
1861 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1862 DiagId: diag::note_constexpr_memcpy_incomplete_type)
1863 << Move << DestElemType;
1864 return false;
1865 }
1866
1867 size_t RemainingDestElems;
1868 if (DestPtr.getFieldDesc()->isArray()) {
1869 RemainingDestElems = DestPtr.isUnknownSizeArray()
1870 ? 0
1871 : (DestPtr.getNumElems() - DestPtr.getIndex());
1872 } else {
1873 RemainingDestElems = 1;
1874 }
1875 unsigned DestElemSize = ASTCtx.getTypeSizeInChars(T: DestElemType).getQuantity();
1876
1877 if (WChar) {
1878 uint64_t WCharSize =
1879 ASTCtx.getTypeSizeInChars(T: ASTCtx.getWCharType()).getQuantity();
1880 Size *= WCharSize;
1881 }
1882
1883 if (Size % DestElemSize != 0) {
1884 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1885 DiagId: diag::note_constexpr_memcpy_unsupported)
1886 << Move << WChar << 0 << DestElemType << Size << DestElemSize;
1887 return false;
1888 }
1889
1890 QualType SrcElemType = getElemType(P: SrcPtr);
1891 size_t RemainingSrcElems;
1892 if (SrcPtr.getFieldDesc()->isArray()) {
1893 RemainingSrcElems = SrcPtr.isUnknownSizeArray()
1894 ? 0
1895 : (SrcPtr.getNumElems() - SrcPtr.getIndex());
1896 } else {
1897 RemainingSrcElems = 1;
1898 }
1899 unsigned SrcElemSize = ASTCtx.getTypeSizeInChars(T: SrcElemType).getQuantity();
1900
1901 if (!ASTCtx.hasSameUnqualifiedType(T1: DestElemType, T2: SrcElemType)) {
1902 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_type_pun)
1903 << Move << SrcElemType << DestElemType;
1904 return false;
1905 }
1906
1907 if (!DestElemType.isTriviallyCopyableType(Context: ASTCtx)) {
1908 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_nontrivial)
1909 << Move << DestElemType;
1910 return false;
1911 }
1912
1913 // Check if we have enough elements to read from and write to.
1914 size_t RemainingDestBytes = RemainingDestElems * DestElemSize;
1915 size_t RemainingSrcBytes = RemainingSrcElems * SrcElemSize;
1916 if (Size > RemainingDestBytes || Size > RemainingSrcBytes) {
1917 APInt N = APInt(64, Size / DestElemSize);
1918 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1919 DiagId: diag::note_constexpr_memcpy_unsupported)
1920 << Move << WChar << (Size > RemainingSrcBytes ? 1 : 2) << DestElemType
1921 << toString(I: N, Radix: 10, /*Signed=*/false);
1922 return false;
1923 }
1924
1925 // Check for overlapping memory regions.
1926 if (!Move && Pointer::pointToSameBlock(A: SrcPtr, B: DestPtr)) {
1927 // Remove base casts.
1928 Pointer SrcP = SrcPtr.stripBaseCasts();
1929 Pointer DestP = DestPtr.stripBaseCasts();
1930
1931 unsigned SrcIndex = SrcP.expand().getIndex() * SrcP.elemSize();
1932 unsigned DstIndex = DestP.expand().getIndex() * DestP.elemSize();
1933
1934 if ((SrcIndex <= DstIndex && (SrcIndex + Size) > DstIndex) ||
1935 (DstIndex <= SrcIndex && (DstIndex + Size) > SrcIndex)) {
1936 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_overlap)
1937 << /*IsWChar=*/false;
1938 return false;
1939 }
1940 }
1941
1942 assert(Size % DestElemSize == 0);
1943 if (!DoMemcpy(S, OpPC, SrcPtr, DestPtr, Size: Bytes(Size).toBits()))
1944 return false;
1945
1946 S.Stk.push<Pointer>(Args&: DestPtr);
1947 return true;
1948}
1949
1950/// Determine if T is a character type for which we guarantee that
1951/// sizeof(T) == 1.
1952static bool isOneByteCharacterType(QualType T) {
1953 return T->isCharType() || T->isChar8Type();
1954}
1955
1956static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC,
1957 const InterpFrame *Frame,
1958 const CallExpr *Call, unsigned ID) {
1959 assert(Call->getNumArgs() == 3);
1960 uint64_t Size = popToUInt64(S, E: Call->getArg(Arg: 2));
1961 const Pointer &PtrB = S.Stk.pop<Pointer>();
1962 const Pointer &PtrA = S.Stk.pop<Pointer>();
1963
1964 if (ID == Builtin::BImemcmp || ID == Builtin::BIbcmp ||
1965 ID == Builtin::BIwmemcmp)
1966 diagnoseNonConstexprBuiltin(S, OpPC, ID);
1967
1968 if (Size == 0) {
1969 pushInteger(S, Val: 0, QT: Call->getType());
1970 return true;
1971 }
1972
1973 if (!PtrA.isBlockPointer() || !PtrB.isBlockPointer())
1974 return false;
1975
1976 bool IsWide =
1977 (ID == Builtin::BIwmemcmp || ID == Builtin::BI__builtin_wmemcmp);
1978
1979 const ASTContext &ASTCtx = S.getASTContext();
1980 QualType ElemTypeA = getElemType(P: PtrA);
1981 QualType ElemTypeB = getElemType(P: PtrB);
1982 // FIXME: This is an arbitrary limitation the current constant interpreter
1983 // had. We could remove this.
1984 if (!IsWide && (!isOneByteCharacterType(T: ElemTypeA) ||
1985 !isOneByteCharacterType(T: ElemTypeB))) {
1986 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1987 DiagId: diag::note_constexpr_memcmp_unsupported)
1988 << ASTCtx.BuiltinInfo.getQuotedName(ID) << PtrA.getType()
1989 << PtrB.getType();
1990 return false;
1991 }
1992
1993 if (!CheckLoad(S, OpPC, Ptr: PtrA, AK: AK_Read) || !CheckLoad(S, OpPC, Ptr: PtrB, AK: AK_Read))
1994 return false;
1995
1996 // Now, read both pointers to a buffer and compare those.
1997 BitcastBuffer BufferA(
1998 Bits(ASTCtx.getTypeSize(T: ElemTypeA) * PtrA.getNumElems()));
1999 readPointerToBuffer(Ctx: S.getContext(), FromPtr: PtrA, Buffer&: BufferA, ReturnOnUninit: false);
2000 // FIXME: The swapping here is UNDOING something we do when reading the
2001 // data into the buffer.
2002 if (ASTCtx.getTargetInfo().isBigEndian())
2003 swapBytes(M: BufferA.Data.get(), N: BufferA.byteSize().getQuantity());
2004
2005 BitcastBuffer BufferB(
2006 Bits(ASTCtx.getTypeSize(T: ElemTypeB) * PtrB.getNumElems()));
2007 readPointerToBuffer(Ctx: S.getContext(), FromPtr: PtrB, Buffer&: BufferB, ReturnOnUninit: false);
2008 // FIXME: The swapping here is UNDOING something we do when reading the
2009 // data into the buffer.
2010 if (ASTCtx.getTargetInfo().isBigEndian())
2011 swapBytes(M: BufferB.Data.get(), N: BufferB.byteSize().getQuantity());
2012
2013 size_t MinBufferSize = std::min(a: BufferA.byteSize().getQuantity(),
2014 b: BufferB.byteSize().getQuantity());
2015
2016 unsigned ElemSize = 1;
2017 if (IsWide)
2018 ElemSize = ASTCtx.getTypeSizeInChars(T: ASTCtx.getWCharType()).getQuantity();
2019 // The Size given for the wide variants is in wide-char units. Convert it
2020 // to bytes.
2021 size_t ByteSize = Size * ElemSize;
2022 size_t CmpSize = std::min(a: MinBufferSize, b: ByteSize);
2023
2024 for (size_t I = 0; I != CmpSize; I += ElemSize) {
2025 if (IsWide) {
2026 INT_TYPE_SWITCH(*S.getContext().classify(ASTCtx.getWCharType()), {
2027 T A = *reinterpret_cast<T *>(BufferA.atByte(I));
2028 T B = *reinterpret_cast<T *>(BufferB.atByte(I));
2029 if (A < B) {
2030 pushInteger(S, -1, Call->getType());
2031 return true;
2032 }
2033 if (A > B) {
2034 pushInteger(S, 1, Call->getType());
2035 return true;
2036 }
2037 });
2038 } else {
2039 std::byte A = BufferA.deref<std::byte>(Offset: Bytes(I));
2040 std::byte B = BufferB.deref<std::byte>(Offset: Bytes(I));
2041
2042 if (A < B) {
2043 pushInteger(S, Val: -1, QT: Call->getType());
2044 return true;
2045 }
2046 if (A > B) {
2047 pushInteger(S, Val: 1, QT: Call->getType());
2048 return true;
2049 }
2050 }
2051 }
2052
2053 // We compared CmpSize bytes above. If the limiting factor was the Size
2054 // passed, we're done and the result is equality (0).
2055 if (ByteSize <= CmpSize) {
2056 pushInteger(S, Val: 0, QT: Call->getType());
2057 return true;
2058 }
2059
2060 // However, if we read all the available bytes but were instructed to read
2061 // even more, diagnose this as a "read of dereferenced one-past-the-end
2062 // pointer". This is what would happen if we called CheckLoad() on every array
2063 // element.
2064 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_access_past_end)
2065 << AK_Read << S.Current->getRange(PC: OpPC);
2066 return false;
2067}
2068
2069// __builtin_memchr(ptr, int, int)
2070// __builtin_strchr(ptr, int)
2071static bool interp__builtin_memchr(InterpState &S, CodePtr OpPC,
2072 const CallExpr *Call, unsigned ID) {
2073 if (ID == Builtin::BImemchr || ID == Builtin::BIwcschr ||
2074 ID == Builtin::BIstrchr || ID == Builtin::BIwmemchr)
2075 diagnoseNonConstexprBuiltin(S, OpPC, ID);
2076
2077 std::optional<APSInt> MaxLength;
2078 if (Call->getNumArgs() == 3)
2079 MaxLength = popToAPSInt(S, E: Call->getArg(Arg: 2));
2080
2081 APSInt Desired = popToAPSInt(S, E: Call->getArg(Arg: 1));
2082 const Pointer &Ptr = S.Stk.pop<Pointer>();
2083
2084 if (MaxLength && MaxLength->isZero()) {
2085 S.Stk.push<Pointer>();
2086 return true;
2087 }
2088
2089 if (Ptr.isDummy()) {
2090 if (Ptr.getType()->isIncompleteType())
2091 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
2092 DiagId: diag::note_constexpr_ltor_incomplete_type)
2093 << Ptr.getType();
2094 return false;
2095 }
2096
2097 // Null is only okay if the given size is 0.
2098 if (Ptr.isZero()) {
2099 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_access_null)
2100 << AK_Read;
2101 return false;
2102 }
2103
2104 if (!Ptr.isBlockPointer())
2105 return false;
2106
2107 QualType ElemTy = Ptr.getFieldDesc()->isArray()
2108 ? Ptr.getFieldDesc()->getElemQualType()
2109 : Ptr.getFieldDesc()->getType();
2110 bool IsRawByte = ID == Builtin::BImemchr || ID == Builtin::BI__builtin_memchr;
2111
2112 // Give up on byte-oriented matching against multibyte elements.
2113 if (IsRawByte && !isOneByteCharacterType(T: ElemTy)) {
2114 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
2115 DiagId: diag::note_constexpr_memchr_unsupported)
2116 << S.getASTContext().BuiltinInfo.getQuotedName(ID) << ElemTy;
2117 return false;
2118 }
2119
2120 if (!isReadable(P: Ptr))
2121 return false;
2122
2123 if (ID == Builtin::BIstrchr || ID == Builtin::BI__builtin_strchr) {
2124 int64_t DesiredTrunc;
2125 if (S.getASTContext().CharTy->isSignedIntegerType())
2126 DesiredTrunc =
2127 Desired.trunc(width: S.getASTContext().getCharWidth()).getSExtValue();
2128 else
2129 DesiredTrunc =
2130 Desired.trunc(width: S.getASTContext().getCharWidth()).getZExtValue();
2131 // strchr compares directly to the passed integer, and therefore
2132 // always fails if given an int that is not a char.
2133 if (Desired != DesiredTrunc) {
2134 S.Stk.push<Pointer>();
2135 return true;
2136 }
2137 }
2138
2139 uint64_t DesiredVal;
2140 if (ID == Builtin::BIwmemchr || ID == Builtin::BI__builtin_wmemchr ||
2141 ID == Builtin::BIwcschr || ID == Builtin::BI__builtin_wcschr) {
2142 // wcschr and wmemchr are given a wchar_t to look for. Just use it.
2143 DesiredVal = Desired.getZExtValue();
2144 } else {
2145 DesiredVal = Desired.trunc(width: S.getASTContext().getCharWidth()).getZExtValue();
2146 }
2147
2148 bool StopAtZero =
2149 (ID == Builtin::BIstrchr || ID == Builtin::BI__builtin_strchr ||
2150 ID == Builtin::BIwcschr || ID == Builtin::BI__builtin_wcschr);
2151
2152 PrimType ElemT =
2153 IsRawByte ? PT_Sint8 : *S.getContext().classify(T: getElemType(P: Ptr));
2154
2155 size_t Index = Ptr.getIndex();
2156 size_t Step = 0;
2157 for (;;) {
2158 const Pointer &ElemPtr =
2159 (Index + Step) > 0 ? Ptr.atIndex(Idx: Index + Step) : Ptr;
2160
2161 if (!CheckLoad(S, OpPC, Ptr: ElemPtr))
2162 return false;
2163
2164 uint64_t V;
2165 INT_TYPE_SWITCH_NO_BOOL(
2166 ElemT, { V = static_cast<uint64_t>(ElemPtr.deref<T>().toUnsigned()); });
2167
2168 if (V == DesiredVal) {
2169 S.Stk.push<Pointer>(Args: ElemPtr);
2170 return true;
2171 }
2172
2173 if (StopAtZero && V == 0)
2174 break;
2175
2176 ++Step;
2177 if (MaxLength && Step == MaxLength->getZExtValue())
2178 break;
2179 }
2180
2181 S.Stk.push<Pointer>();
2182 return true;
2183}
2184
2185static std::optional<unsigned> computeFullDescSize(const ASTContext &ASTCtx,
2186 const Descriptor *Desc) {
2187 if (Desc->isPrimitive())
2188 return ASTCtx.getTypeSizeInChars(T: Desc->getType()).getQuantity();
2189 if (Desc->isArray())
2190 return ASTCtx.getTypeSizeInChars(T: Desc->getElemQualType()).getQuantity() *
2191 Desc->getNumElems();
2192 if (Desc->isRecord()) {
2193 // Can't use Descriptor::getType() as that may return a pointer type. Look
2194 // at the decl directly.
2195 return ASTCtx
2196 .getTypeSizeInChars(
2197 T: ASTCtx.getCanonicalTagType(TD: Desc->ElemRecord->getDecl()))
2198 .getQuantity();
2199 }
2200
2201 return std::nullopt;
2202}
2203
2204/// Compute the byte offset of \p Ptr in the full declaration.
2205static unsigned computePointerOffset(const ASTContext &ASTCtx,
2206 const Pointer &Ptr) {
2207 unsigned Result = 0;
2208
2209 Pointer P = Ptr;
2210 while (P.isField() || P.isArrayElement()) {
2211 P = P.expand();
2212 const Descriptor *D = P.getFieldDesc();
2213
2214 if (P.isArrayElement()) {
2215 unsigned ElemSize =
2216 ASTCtx.getTypeSizeInChars(T: D->getElemQualType()).getQuantity();
2217 if (P.isOnePastEnd())
2218 Result += ElemSize * P.getNumElems();
2219 else
2220 Result += ElemSize * P.getIndex();
2221 P = P.expand().getArray();
2222 } else if (P.isBaseClass()) {
2223 const auto *RD = cast<CXXRecordDecl>(Val: D->asDecl());
2224 bool IsVirtual = Ptr.isVirtualBaseClass();
2225 P = P.getBase();
2226 const Record *BaseRecord = P.getRecord();
2227
2228 const ASTRecordLayout &Layout =
2229 ASTCtx.getASTRecordLayout(D: cast<CXXRecordDecl>(Val: BaseRecord->getDecl()));
2230 if (IsVirtual)
2231 Result += Layout.getVBaseClassOffset(VBase: RD).getQuantity();
2232 else
2233 Result += Layout.getBaseClassOffset(Base: RD).getQuantity();
2234 } else if (P.isField()) {
2235 const FieldDecl *FD = P.getField();
2236 const ASTRecordLayout &Layout =
2237 ASTCtx.getASTRecordLayout(D: FD->getParent());
2238 unsigned FieldIndex = FD->getFieldIndex();
2239 uint64_t FieldOffset =
2240 ASTCtx.toCharUnitsFromBits(BitSize: Layout.getFieldOffset(FieldNo: FieldIndex))
2241 .getQuantity();
2242 Result += FieldOffset;
2243 P = P.getBase();
2244 } else
2245 llvm_unreachable("Unhandled descriptor type");
2246 }
2247
2248 return Result;
2249}
2250
2251/// Does Ptr point to the last subobject?
2252static bool pointsToLastObject(const Pointer &Ptr) {
2253 Pointer P = Ptr;
2254 while (!P.isRoot()) {
2255
2256 if (P.isArrayElement()) {
2257 P = P.expand().getArray();
2258 continue;
2259 }
2260 if (P.isBaseClass()) {
2261 if (P.getRecord()->getNumFields() > 0)
2262 return false;
2263 P = P.getBase();
2264 continue;
2265 }
2266
2267 Pointer Base = P.getBase();
2268 if (const Record *R = Base.getRecord()) {
2269 assert(P.getField());
2270 if (P.getField()->getFieldIndex() != R->getNumFields() - 1)
2271 return false;
2272 }
2273 P = Base;
2274 }
2275
2276 return true;
2277}
2278
2279/// Does Ptr point to the last object AND to a flexible array member?
2280static bool isUserWritingOffTheEnd(const ASTContext &Ctx, const Pointer &Ptr,
2281 bool InvalidBase) {
2282 auto isFlexibleArrayMember = [&](const Descriptor *FieldDesc) {
2283 using FAMKind = LangOptions::StrictFlexArraysLevelKind;
2284 FAMKind StrictFlexArraysLevel =
2285 Ctx.getLangOpts().getStrictFlexArraysLevel();
2286
2287 if (StrictFlexArraysLevel == FAMKind::Default)
2288 return true;
2289
2290 unsigned NumElems = FieldDesc->getNumElems();
2291 if (NumElems == 0 && StrictFlexArraysLevel != FAMKind::IncompleteOnly)
2292 return true;
2293
2294 if (NumElems == 1 && StrictFlexArraysLevel == FAMKind::OneZeroOrIncomplete)
2295 return true;
2296 return false;
2297 };
2298
2299 const Descriptor *FieldDesc = Ptr.getFieldDesc();
2300 if (!FieldDesc->isArray())
2301 return false;
2302
2303 return InvalidBase && pointsToLastObject(Ptr) &&
2304 isFlexibleArrayMember(FieldDesc);
2305}
2306
2307UnsignedOrNone evaluateBuiltinObjectSize(const ASTContext &ASTCtx,
2308 unsigned Kind, Pointer &Ptr) {
2309 if (Ptr.isZero() || !Ptr.isBlockPointer())
2310 return std::nullopt;
2311
2312 if (Ptr.isDummy() && Ptr.getType()->isPointerType())
2313 return std::nullopt;
2314
2315 bool InvalidBase = false;
2316
2317 if (Ptr.isDummy()) {
2318 if (const VarDecl *VD = Ptr.getDeclDesc()->asVarDecl();
2319 VD && VD->getType()->isPointerType())
2320 InvalidBase = true;
2321 }
2322
2323 // According to the GCC documentation, we want the size of the subobject
2324 // denoted by the pointer. But that's not quite right -- what we actually
2325 // want is the size of the immediately-enclosing array, if there is one.
2326 if (Ptr.isArrayElement())
2327 Ptr = Ptr.expand();
2328
2329 bool DetermineForCompleteObject = Ptr.getFieldDesc() == Ptr.getDeclDesc();
2330 const Descriptor *DeclDesc = Ptr.getDeclDesc();
2331 assert(DeclDesc);
2332
2333 bool UseFieldDesc = (Kind & 1u);
2334 bool ReportMinimum = (Kind & 2u);
2335 if (!UseFieldDesc || DetermineForCompleteObject) {
2336 // Can't read beyond the pointer decl desc.
2337 if (!ReportMinimum && DeclDesc->getType()->isPointerType())
2338 return std::nullopt;
2339
2340 if (InvalidBase)
2341 return std::nullopt;
2342 } else {
2343 if (isUserWritingOffTheEnd(Ctx: ASTCtx, Ptr, InvalidBase)) {
2344 // If we cannot determine the size of the initial allocation, then we
2345 // can't given an accurate upper-bound. However, we are still able to give
2346 // conservative lower-bounds for Type=3.
2347 if (Kind == 1)
2348 return std::nullopt;
2349 }
2350 }
2351
2352 // The "closest surrounding subobject" is NOT a base class,
2353 // so strip the base class casts.
2354 if (UseFieldDesc && Ptr.isBaseClass())
2355 Ptr = Ptr.stripBaseCasts();
2356
2357 const Descriptor *Desc = UseFieldDesc ? Ptr.getFieldDesc() : DeclDesc;
2358 assert(Desc);
2359
2360 std::optional<unsigned> FullSize = computeFullDescSize(ASTCtx, Desc);
2361 if (!FullSize)
2362 return std::nullopt;
2363
2364 unsigned ByteOffset;
2365 if (UseFieldDesc) {
2366 if (Ptr.isBaseClass()) {
2367 assert(computePointerOffset(ASTCtx, Ptr.getBase()) <=
2368 computePointerOffset(ASTCtx, Ptr));
2369 ByteOffset = computePointerOffset(ASTCtx, Ptr: Ptr.getBase()) -
2370 computePointerOffset(ASTCtx, Ptr);
2371 } else {
2372 if (Ptr.inArray())
2373 ByteOffset =
2374 computePointerOffset(ASTCtx, Ptr) -
2375 computePointerOffset(ASTCtx, Ptr: Ptr.expand().atIndex(Idx: 0).narrow());
2376 else
2377 ByteOffset = 0;
2378 }
2379 } else
2380 ByteOffset = computePointerOffset(ASTCtx, Ptr);
2381
2382 assert(ByteOffset <= *FullSize);
2383 return *FullSize - ByteOffset;
2384}
2385
2386static bool interp__builtin_object_size(InterpState &S, CodePtr OpPC,
2387 const InterpFrame *Frame,
2388 const CallExpr *Call) {
2389 const ASTContext &ASTCtx = S.getASTContext();
2390 // From the GCC docs:
2391 // Kind is an integer constant from 0 to 3. If the least significant bit is
2392 // clear, objects are whole variables. If it is set, a closest surrounding
2393 // subobject is considered the object a pointer points to. The second bit
2394 // determines if maximum or minimum of remaining bytes is computed.
2395 unsigned Kind = popToUInt64(S, E: Call->getArg(Arg: 1));
2396 assert(Kind <= 3 && "unexpected kind");
2397 Pointer Ptr = S.Stk.pop<Pointer>();
2398
2399 if (Call->getArg(Arg: 0)->HasSideEffects(Ctx: ASTCtx)) {
2400 // "If there are any side effects in them, it returns (size_t) -1
2401 // for type 0 or 1 and (size_t) 0 for type 2 or 3."
2402 pushInteger(S, Val: Kind <= 1 ? -1 : 0, QT: Call->getType());
2403 return true;
2404 }
2405
2406 if (auto Result = evaluateBuiltinObjectSize(ASTCtx, Kind, Ptr)) {
2407 pushInteger(S, Val: *Result, QT: Call->getType());
2408 return true;
2409 }
2410 return false;
2411}
2412
2413static bool interp__builtin_is_within_lifetime(InterpState &S, CodePtr OpPC,
2414 const CallExpr *Call) {
2415
2416 if (!S.inConstantContext())
2417 return false;
2418
2419 const Pointer &Ptr = S.Stk.pop<Pointer>();
2420
2421 auto Error = [&](int Diag) {
2422 bool CalledFromStd = false;
2423 const auto *Callee = S.Current->getCallee();
2424 if (Callee && Callee->isInStdNamespace()) {
2425 const IdentifierInfo *Identifier = Callee->getIdentifier();
2426 CalledFromStd = Identifier && Identifier->isStr(Str: "is_within_lifetime");
2427 }
2428 S.CCEDiag(SI: CalledFromStd
2429 ? S.Current->Caller->getSource(PC: S.Current->getRetPC())
2430 : S.Current->getSource(PC: OpPC),
2431 DiagId: diag::err_invalid_is_within_lifetime)
2432 << (CalledFromStd ? "std::is_within_lifetime"
2433 : "__builtin_is_within_lifetime")
2434 << Diag;
2435 return false;
2436 };
2437
2438 if (Ptr.isZero())
2439 return Error(0);
2440 if (Ptr.isOnePastEnd())
2441 return Error(1);
2442
2443 bool Result = Ptr.getLifetime() != Lifetime::Ended;
2444 if (!Ptr.isActive()) {
2445 Result = false;
2446 } else {
2447 if (!CheckLive(S, OpPC, Ptr, AK: AK_Read))
2448 return false;
2449 if (!CheckMutable(S, OpPC, Ptr))
2450 return false;
2451 if (!CheckDummy(S, OpPC, B: Ptr.block(), AK: AK_Read))
2452 return false;
2453 }
2454
2455 // Check if we're currently running an initializer.
2456 if (llvm::is_contained(Range&: S.InitializingBlocks, Element: Ptr.block()))
2457 return Error(2);
2458 if (S.EvaluatingDecl && Ptr.getDeclDesc()->asVarDecl() == S.EvaluatingDecl)
2459 return Error(2);
2460
2461 pushInteger(S, Val: Result, QT: Call->getType());
2462 return true;
2463}
2464
2465static bool interp__builtin_elementwise_int_unaryop(
2466 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2467 llvm::function_ref<APInt(const APSInt &)> Fn) {
2468 assert(Call->getNumArgs() == 1);
2469
2470 // Single integer case.
2471 if (!Call->getArg(Arg: 0)->getType()->isVectorType()) {
2472 assert(Call->getType()->isIntegerType());
2473 APSInt Src = popToAPSInt(S, E: Call->getArg(Arg: 0));
2474 APInt Result = Fn(Src);
2475 pushInteger(S, Val: APSInt(std::move(Result), !Src.isSigned()), QT: Call->getType());
2476 return true;
2477 }
2478
2479 // Vector case.
2480 const Pointer &Arg = S.Stk.pop<Pointer>();
2481 assert(Arg.getFieldDesc()->isPrimitiveArray());
2482 const Pointer &Dst = S.Stk.peek<Pointer>();
2483 assert(Dst.getFieldDesc()->isPrimitiveArray());
2484 assert(Arg.getFieldDesc()->getNumElems() ==
2485 Dst.getFieldDesc()->getNumElems());
2486
2487 QualType ElemType = Arg.getFieldDesc()->getElemQualType();
2488 PrimType ElemT = *S.getContext().classify(T: ElemType);
2489 unsigned NumElems = Arg.getNumElems();
2490 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2491
2492 for (unsigned I = 0; I != NumElems; ++I) {
2493 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2494 APSInt Src = Arg.elem<T>(I).toAPSInt();
2495 APInt Result = Fn(Src);
2496 Dst.elem<T>(I) = static_cast<T>(APSInt(std::move(Result), DestUnsigned));
2497 });
2498 }
2499 Dst.initializeAllElements();
2500
2501 return true;
2502}
2503
2504static bool interp__builtin_elementwise_fp_binop(
2505 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2506 llvm::function_ref<std::optional<APFloat>(
2507 const APFloat &, const APFloat &, std::optional<APSInt> RoundingMode)>
2508 Fn,
2509 bool IsScalar = false) {
2510 assert((Call->getNumArgs() == 2) || (Call->getNumArgs() == 3));
2511 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2512 assert(VT->getElementType()->isFloatingType());
2513 unsigned NumElems = VT->getNumElements();
2514
2515 // Vector case.
2516 assert(Call->getArg(0)->getType()->isVectorType() &&
2517 Call->getArg(1)->getType()->isVectorType());
2518 assert(VT->getElementType() ==
2519 Call->getArg(1)->getType()->castAs<VectorType>()->getElementType());
2520 assert(VT->getNumElements() ==
2521 Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements());
2522
2523 std::optional<APSInt> RoundingMode = std::nullopt;
2524 if (Call->getNumArgs() == 3)
2525 RoundingMode = popToAPSInt(S, E: Call->getArg(Arg: 2));
2526
2527 const Pointer &BPtr = S.Stk.pop<Pointer>();
2528 const Pointer &APtr = S.Stk.pop<Pointer>();
2529 const Pointer &Dst = S.Stk.peek<Pointer>();
2530 for (unsigned ElemIdx = 0; ElemIdx != NumElems; ++ElemIdx) {
2531 using T = PrimConv<PT_Float>::T;
2532 if (IsScalar && ElemIdx > 0) {
2533 Dst.elem<T>(I: ElemIdx) = APtr.elem<T>(I: ElemIdx);
2534 continue;
2535 }
2536 APFloat ElemA = APtr.elem<T>(I: ElemIdx).getAPFloat();
2537 APFloat ElemB = BPtr.elem<T>(I: ElemIdx).getAPFloat();
2538 std::optional<APFloat> Result = Fn(ElemA, ElemB, RoundingMode);
2539 if (!Result)
2540 return false;
2541 Dst.elem<T>(I: ElemIdx) = static_cast<T>(*Result);
2542 }
2543
2544 Dst.initializeAllElements();
2545
2546 return true;
2547}
2548
2549static bool interp__builtin_scalar_fp_round_mask_binop(
2550 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2551 llvm::function_ref<std::optional<APFloat>(const APFloat &, const APFloat &,
2552 std::optional<APSInt>)>
2553 Fn) {
2554 assert(Call->getNumArgs() == 5);
2555 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2556 unsigned NumElems = VT->getNumElements();
2557
2558 APSInt RoundingMode = popToAPSInt(S, E: Call->getArg(Arg: 4));
2559 uint64_t MaskVal = popToUInt64(S, E: Call->getArg(Arg: 3));
2560 const Pointer &SrcPtr = S.Stk.pop<Pointer>();
2561 const Pointer &BPtr = S.Stk.pop<Pointer>();
2562 const Pointer &APtr = S.Stk.pop<Pointer>();
2563 const Pointer &Dst = S.Stk.peek<Pointer>();
2564
2565 using T = PrimConv<PT_Float>::T;
2566
2567 if (MaskVal & 1) {
2568 APFloat ElemA = APtr.elem<T>(I: 0).getAPFloat();
2569 APFloat ElemB = BPtr.elem<T>(I: 0).getAPFloat();
2570 std::optional<APFloat> Result = Fn(ElemA, ElemB, RoundingMode);
2571 if (!Result)
2572 return false;
2573 Dst.elem<T>(I: 0) = static_cast<T>(*Result);
2574 } else {
2575 Dst.elem<T>(I: 0) = SrcPtr.elem<T>(I: 0);
2576 }
2577
2578 for (unsigned I = 1; I < NumElems; ++I)
2579 Dst.elem<T>(I) = APtr.elem<T>(I);
2580
2581 Dst.initializeAllElements();
2582
2583 return true;
2584}
2585
2586static bool interp__builtin_elementwise_int_binop(
2587 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2588 llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
2589 assert(Call->getNumArgs() == 2);
2590
2591 // Single integer case.
2592 if (!Call->getArg(Arg: 0)->getType()->isVectorType()) {
2593 assert(!Call->getArg(1)->getType()->isVectorType());
2594 APSInt RHS = popToAPSInt(S, E: Call->getArg(Arg: 1));
2595 APSInt LHS = popToAPSInt(S, E: Call->getArg(Arg: 0));
2596 APInt Result = Fn(LHS, RHS);
2597 pushInteger(S, Val: APSInt(std::move(Result), !LHS.isSigned()), QT: Call->getType());
2598 return true;
2599 }
2600
2601 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2602 assert(VT->getElementType()->isIntegralOrEnumerationType());
2603 PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2604 unsigned NumElems = VT->getNumElements();
2605 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2606
2607 // Vector + Scalar case.
2608 if (!Call->getArg(Arg: 1)->getType()->isVectorType()) {
2609 assert(Call->getArg(1)->getType()->isIntegralOrEnumerationType());
2610
2611 APSInt RHS = popToAPSInt(S, E: Call->getArg(Arg: 1));
2612 const Pointer &LHS = S.Stk.pop<Pointer>();
2613 const Pointer &Dst = S.Stk.peek<Pointer>();
2614
2615 for (unsigned I = 0; I != NumElems; ++I) {
2616 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2617 Dst.elem<T>(I) = static_cast<T>(
2618 APSInt(Fn(LHS.elem<T>(I).toAPSInt(), RHS), DestUnsigned));
2619 });
2620 }
2621 Dst.initializeAllElements();
2622 return true;
2623 }
2624
2625 // Vector case.
2626 assert(Call->getArg(0)->getType()->isVectorType() &&
2627 Call->getArg(1)->getType()->isVectorType());
2628 assert(VT->getElementType() ==
2629 Call->getArg(1)->getType()->castAs<VectorType>()->getElementType());
2630 assert(VT->getNumElements() ==
2631 Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements());
2632 assert(VT->getElementType()->isIntegralOrEnumerationType());
2633
2634 const Pointer &RHS = S.Stk.pop<Pointer>();
2635 const Pointer &LHS = S.Stk.pop<Pointer>();
2636 const Pointer &Dst = S.Stk.peek<Pointer>();
2637 for (unsigned I = 0; I != NumElems; ++I) {
2638 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2639 APSInt Elem1 = LHS.elem<T>(I).toAPSInt();
2640 APSInt Elem2 = RHS.elem<T>(I).toAPSInt();
2641 Dst.elem<T>(I) = static_cast<T>(APSInt(Fn(Elem1, Elem2), DestUnsigned));
2642 });
2643 }
2644 Dst.initializeAllElements();
2645
2646 return true;
2647}
2648
2649static bool
2650interp__builtin_x86_pack(InterpState &S, CodePtr, const CallExpr *E,
2651 llvm::function_ref<APInt(const APSInt &)> PackFn) {
2652 const auto *VT0 = E->getArg(Arg: 0)->getType()->castAs<VectorType>();
2653 [[maybe_unused]] const auto *VT1 =
2654 E->getArg(Arg: 1)->getType()->castAs<VectorType>();
2655 assert(VT0 && VT1 && "pack builtin VT0 and VT1 must be VectorType");
2656 assert(VT0->getElementType() == VT1->getElementType() &&
2657 VT0->getNumElements() == VT1->getNumElements() &&
2658 "pack builtin VT0 and VT1 ElementType must be same");
2659
2660 const Pointer &RHS = S.Stk.pop<Pointer>();
2661 const Pointer &LHS = S.Stk.pop<Pointer>();
2662 const Pointer &Dst = S.Stk.peek<Pointer>();
2663
2664 const ASTContext &ASTCtx = S.getASTContext();
2665 unsigned SrcBits = ASTCtx.getIntWidth(T: VT0->getElementType());
2666 unsigned LHSVecLen = VT0->getNumElements();
2667 unsigned SrcPerLane = 128 / SrcBits;
2668 unsigned Lanes = LHSVecLen * SrcBits / 128;
2669
2670 PrimType SrcT = *S.getContext().classify(T: VT0->getElementType());
2671 PrimType DstT = *S.getContext().classify(T: getElemType(P: Dst));
2672 bool IsUnsigend = getElemType(P: Dst)->isUnsignedIntegerType();
2673
2674 for (unsigned Lane = 0; Lane != Lanes; ++Lane) {
2675 unsigned BaseSrc = Lane * SrcPerLane;
2676 unsigned BaseDst = Lane * (2 * SrcPerLane);
2677
2678 for (unsigned I = 0; I != SrcPerLane; ++I) {
2679 INT_TYPE_SWITCH_NO_BOOL(SrcT, {
2680 APSInt A = LHS.elem<T>(BaseSrc + I).toAPSInt();
2681 APSInt B = RHS.elem<T>(BaseSrc + I).toAPSInt();
2682
2683 assignInteger(S, Dst.atIndex(BaseDst + I), DstT,
2684 APSInt(PackFn(A), IsUnsigend));
2685 assignInteger(S, Dst.atIndex(BaseDst + SrcPerLane + I), DstT,
2686 APSInt(PackFn(B), IsUnsigend));
2687 });
2688 }
2689 }
2690
2691 Dst.initializeAllElements();
2692 return true;
2693}
2694
2695static bool interp__builtin_elementwise_maxmin(InterpState &S, CodePtr OpPC,
2696 const CallExpr *Call,
2697 unsigned BuiltinID) {
2698 assert(Call->getNumArgs() == 2);
2699
2700 QualType Arg0Type = Call->getArg(Arg: 0)->getType();
2701
2702 // TODO: Support floating-point types.
2703 if (!(Arg0Type->isIntegerType() ||
2704 (Arg0Type->isVectorType() &&
2705 Arg0Type->castAs<VectorType>()->getElementType()->isIntegerType())))
2706 return false;
2707
2708 if (!Arg0Type->isVectorType()) {
2709 assert(!Call->getArg(1)->getType()->isVectorType());
2710 APSInt RHS = popToAPSInt(S, E: Call->getArg(Arg: 1));
2711 APSInt LHS = popToAPSInt(S, T: Arg0Type);
2712 APInt Result;
2713 if (BuiltinID == Builtin::BI__builtin_elementwise_max) {
2714 Result = std::max(a: LHS, b: RHS);
2715 } else if (BuiltinID == Builtin::BI__builtin_elementwise_min) {
2716 Result = std::min(a: LHS, b: RHS);
2717 } else {
2718 llvm_unreachable("Wrong builtin ID");
2719 }
2720
2721 pushInteger(S, Val: APSInt(Result, !LHS.isSigned()), QT: Call->getType());
2722 return true;
2723 }
2724
2725 // Vector case.
2726 assert(Call->getArg(0)->getType()->isVectorType() &&
2727 Call->getArg(1)->getType()->isVectorType());
2728 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2729 assert(VT->getElementType() ==
2730 Call->getArg(1)->getType()->castAs<VectorType>()->getElementType());
2731 assert(VT->getNumElements() ==
2732 Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements());
2733 assert(VT->getElementType()->isIntegralOrEnumerationType());
2734
2735 const Pointer &RHS = S.Stk.pop<Pointer>();
2736 const Pointer &LHS = S.Stk.pop<Pointer>();
2737 const Pointer &Dst = S.Stk.peek<Pointer>();
2738 PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2739 unsigned NumElems = VT->getNumElements();
2740 for (unsigned I = 0; I != NumElems; ++I) {
2741 APSInt Elem1;
2742 APSInt Elem2;
2743 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2744 Elem1 = LHS.elem<T>(I).toAPSInt();
2745 Elem2 = RHS.elem<T>(I).toAPSInt();
2746 });
2747
2748 APSInt Result;
2749 if (BuiltinID == Builtin::BI__builtin_elementwise_max) {
2750 Result = APSInt(std::max(a: Elem1, b: Elem2),
2751 Call->getType()->isUnsignedIntegerOrEnumerationType());
2752 } else if (BuiltinID == Builtin::BI__builtin_elementwise_min) {
2753 Result = APSInt(std::min(a: Elem1, b: Elem2),
2754 Call->getType()->isUnsignedIntegerOrEnumerationType());
2755 } else {
2756 llvm_unreachable("Wrong builtin ID");
2757 }
2758
2759 INT_TYPE_SWITCH_NO_BOOL(ElemT,
2760 { Dst.elem<T>(I) = static_cast<T>(Result); });
2761 }
2762 Dst.initializeAllElements();
2763
2764 return true;
2765}
2766
2767static bool interp__builtin_ia32_pmul(
2768 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2769 llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &,
2770 const APSInt &)>
2771 Fn) {
2772 assert(Call->getArg(0)->getType()->isVectorType() &&
2773 Call->getArg(1)->getType()->isVectorType());
2774 const Pointer &RHS = S.Stk.pop<Pointer>();
2775 const Pointer &LHS = S.Stk.pop<Pointer>();
2776 const Pointer &Dst = S.Stk.peek<Pointer>();
2777
2778 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2779 PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2780 unsigned NumElems = VT->getNumElements();
2781 const auto *DestVT = Call->getType()->castAs<VectorType>();
2782 PrimType DestElemT = *S.getContext().classify(T: DestVT->getElementType());
2783 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2784
2785 unsigned DstElem = 0;
2786 for (unsigned I = 0; I != NumElems; I += 2) {
2787 APSInt Result;
2788 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2789 APSInt LoLHS = LHS.elem<T>(I).toAPSInt();
2790 APSInt HiLHS = LHS.elem<T>(I + 1).toAPSInt();
2791 APSInt LoRHS = RHS.elem<T>(I).toAPSInt();
2792 APSInt HiRHS = RHS.elem<T>(I + 1).toAPSInt();
2793 Result = APSInt(Fn(LoLHS, HiLHS, LoRHS, HiRHS), DestUnsigned);
2794 });
2795
2796 INT_TYPE_SWITCH_NO_BOOL(DestElemT,
2797 { Dst.elem<T>(DstElem) = static_cast<T>(Result); });
2798 ++DstElem;
2799 }
2800
2801 Dst.initializeAllElements();
2802 return true;
2803}
2804
2805static bool interp_builtin_horizontal_int_binop(
2806 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2807 llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
2808 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2809 PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2810 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2811
2812 const Pointer &RHS = S.Stk.pop<Pointer>();
2813 const Pointer &LHS = S.Stk.pop<Pointer>();
2814 const Pointer &Dst = S.Stk.peek<Pointer>();
2815 unsigned NumElts = VT->getNumElements();
2816 unsigned EltBits = S.getASTContext().getIntWidth(T: VT->getElementType());
2817 unsigned EltsPerLane = 128 / EltBits;
2818 unsigned Lanes = NumElts * EltBits / 128;
2819 unsigned DestIndex = 0;
2820
2821 for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
2822 unsigned LaneStart = Lane * EltsPerLane;
2823 for (unsigned I = 0; I < EltsPerLane; I += 2) {
2824 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2825 APSInt Elem1 = LHS.elem<T>(LaneStart + I).toAPSInt();
2826 APSInt Elem2 = LHS.elem<T>(LaneStart + I + 1).toAPSInt();
2827 APSInt ResL = APSInt(Fn(Elem1, Elem2), DestUnsigned);
2828 Dst.elem<T>(DestIndex++) = static_cast<T>(ResL);
2829 });
2830 }
2831
2832 for (unsigned I = 0; I < EltsPerLane; I += 2) {
2833 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2834 APSInt Elem1 = RHS.elem<T>(LaneStart + I).toAPSInt();
2835 APSInt Elem2 = RHS.elem<T>(LaneStart + I + 1).toAPSInt();
2836 APSInt ResR = APSInt(Fn(Elem1, Elem2), DestUnsigned);
2837 Dst.elem<T>(DestIndex++) = static_cast<T>(ResR);
2838 });
2839 }
2840 }
2841 Dst.initializeAllElements();
2842 return true;
2843}
2844
2845static bool interp_builtin_horizontal_fp_binop(
2846 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2847 llvm::function_ref<APFloat(const APFloat &, const APFloat &,
2848 llvm::RoundingMode)>
2849 Fn) {
2850 const Pointer &RHS = S.Stk.pop<Pointer>();
2851 const Pointer &LHS = S.Stk.pop<Pointer>();
2852 const Pointer &Dst = S.Stk.peek<Pointer>();
2853 FPOptions FPO = Call->getFPFeaturesInEffect(LO: S.Ctx.getLangOpts());
2854 llvm::RoundingMode RM = getRoundingMode(FPO);
2855 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2856
2857 unsigned NumElts = VT->getNumElements();
2858 unsigned EltBits = S.getASTContext().getTypeSize(T: VT->getElementType());
2859 unsigned NumLanes = NumElts * EltBits / 128;
2860 unsigned NumElemsPerLane = NumElts / NumLanes;
2861 unsigned HalfElemsPerLane = NumElemsPerLane / 2;
2862
2863 for (unsigned L = 0; L != NumElts; L += NumElemsPerLane) {
2864 using T = PrimConv<PT_Float>::T;
2865 for (unsigned E = 0; E != HalfElemsPerLane; ++E) {
2866 APFloat Elem1 = LHS.elem<T>(I: L + (2 * E) + 0).getAPFloat();
2867 APFloat Elem2 = LHS.elem<T>(I: L + (2 * E) + 1).getAPFloat();
2868 Dst.elem<T>(I: L + E) = static_cast<T>(Fn(Elem1, Elem2, RM));
2869 }
2870 for (unsigned E = 0; E != HalfElemsPerLane; ++E) {
2871 APFloat Elem1 = RHS.elem<T>(I: L + (2 * E) + 0).getAPFloat();
2872 APFloat Elem2 = RHS.elem<T>(I: L + (2 * E) + 1).getAPFloat();
2873 Dst.elem<T>(I: L + E + HalfElemsPerLane) =
2874 static_cast<T>(Fn(Elem1, Elem2, RM));
2875 }
2876 }
2877 Dst.initializeAllElements();
2878 return true;
2879}
2880
2881static bool interp__builtin_ia32_addsub(InterpState &S, CodePtr OpPC,
2882 const CallExpr *Call) {
2883 // Addsub: alternates between subtraction and addition
2884 // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i])
2885 const Pointer &RHS = S.Stk.pop<Pointer>();
2886 const Pointer &LHS = S.Stk.pop<Pointer>();
2887 const Pointer &Dst = S.Stk.peek<Pointer>();
2888 FPOptions FPO = Call->getFPFeaturesInEffect(LO: S.Ctx.getLangOpts());
2889 llvm::RoundingMode RM = getRoundingMode(FPO);
2890 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2891 unsigned NumElems = VT->getNumElements();
2892
2893 using T = PrimConv<PT_Float>::T;
2894 for (unsigned I = 0; I != NumElems; ++I) {
2895 APFloat LElem = LHS.elem<T>(I).getAPFloat();
2896 APFloat RElem = RHS.elem<T>(I).getAPFloat();
2897 if (I % 2 == 0) {
2898 // Even indices: subtract
2899 LElem.subtract(RHS: RElem, RM);
2900 } else {
2901 // Odd indices: add
2902 LElem.add(RHS: RElem, RM);
2903 }
2904 Dst.elem<T>(I) = static_cast<T>(LElem);
2905 }
2906 Dst.initializeAllElements();
2907 return true;
2908}
2909
2910static bool interp__builtin_ia32_pclmulqdq(InterpState &S, CodePtr OpPC,
2911 const CallExpr *Call) {
2912 // PCLMULQDQ: carry-less multiplication of selected 64-bit halves
2913 // imm8 bit 0: selects lower (0) or upper (1) 64 bits of first operand
2914 // imm8 bit 4: selects lower (0) or upper (1) 64 bits of second operand
2915 assert(Call->getArg(0)->getType()->isVectorType() &&
2916 Call->getArg(1)->getType()->isVectorType());
2917
2918 // Extract imm8 argument
2919 APSInt Imm8 = popToAPSInt(S, E: Call->getArg(Arg: 2));
2920 bool SelectUpperA = (Imm8 & 0x01) != 0;
2921 bool SelectUpperB = (Imm8 & 0x10) != 0;
2922
2923 const Pointer &RHS = S.Stk.pop<Pointer>();
2924 const Pointer &LHS = S.Stk.pop<Pointer>();
2925 const Pointer &Dst = S.Stk.peek<Pointer>();
2926
2927 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2928 PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2929 unsigned NumElems = VT->getNumElements();
2930 const auto *DestVT = Call->getType()->castAs<VectorType>();
2931 PrimType DestElemT = *S.getContext().classify(T: DestVT->getElementType());
2932 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2933
2934 // Process each 128-bit lane (2 elements at a time)
2935 for (unsigned Lane = 0; Lane < NumElems; Lane += 2) {
2936 APSInt A0, A1, B0, B1;
2937 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2938 A0 = LHS.elem<T>(Lane + 0).toAPSInt();
2939 A1 = LHS.elem<T>(Lane + 1).toAPSInt();
2940 B0 = RHS.elem<T>(Lane + 0).toAPSInt();
2941 B1 = RHS.elem<T>(Lane + 1).toAPSInt();
2942 });
2943
2944 // Select the appropriate 64-bit values based on imm8
2945 APInt A = SelectUpperA ? A1 : A0;
2946 APInt B = SelectUpperB ? B1 : B0;
2947
2948 // Extend both operands to 128 bits for carry-less multiplication
2949 APInt A128 = A.zext(width: 128);
2950 APInt B128 = B.zext(width: 128);
2951
2952 // Use APIntOps::clmul for carry-less multiplication
2953 APInt Result = llvm::APIntOps::clmul(LHS: A128, RHS: B128);
2954
2955 // Split the 128-bit result into two 64-bit halves
2956 APSInt ResultLow(Result.extractBits(numBits: 64, bitPosition: 0), DestUnsigned);
2957 APSInt ResultHigh(Result.extractBits(numBits: 64, bitPosition: 64), DestUnsigned);
2958
2959 INT_TYPE_SWITCH_NO_BOOL(DestElemT, {
2960 Dst.elem<T>(Lane + 0) = static_cast<T>(ResultLow);
2961 Dst.elem<T>(Lane + 1) = static_cast<T>(ResultHigh);
2962 });
2963 }
2964
2965 Dst.initializeAllElements();
2966 return true;
2967}
2968
2969static bool interp__builtin_elementwise_triop_fp(
2970 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2971 llvm::function_ref<APFloat(const APFloat &, const APFloat &,
2972 const APFloat &, llvm::RoundingMode)>
2973 Fn) {
2974 assert(Call->getNumArgs() == 3);
2975
2976 FPOptions FPO = Call->getFPFeaturesInEffect(LO: S.Ctx.getLangOpts());
2977 llvm::RoundingMode RM = getRoundingMode(FPO);
2978 QualType Arg1Type = Call->getArg(Arg: 0)->getType();
2979 QualType Arg2Type = Call->getArg(Arg: 1)->getType();
2980 QualType Arg3Type = Call->getArg(Arg: 2)->getType();
2981
2982 // Non-vector floating point types.
2983 if (!Arg1Type->isVectorType()) {
2984 assert(!Arg2Type->isVectorType());
2985 assert(!Arg3Type->isVectorType());
2986 (void)Arg2Type;
2987 (void)Arg3Type;
2988
2989 const Floating &Z = S.Stk.pop<Floating>();
2990 const Floating &Y = S.Stk.pop<Floating>();
2991 const Floating &X = S.Stk.pop<Floating>();
2992 APFloat F = Fn(X.getAPFloat(), Y.getAPFloat(), Z.getAPFloat(), RM);
2993 Floating Result = S.allocFloat(Sem: X.getSemantics());
2994 Result.copy(F);
2995 S.Stk.push<Floating>(Args&: Result);
2996 return true;
2997 }
2998
2999 // Vector type.
3000 assert(Arg1Type->isVectorType() && Arg2Type->isVectorType() &&
3001 Arg3Type->isVectorType());
3002
3003 const VectorType *VecTy = Arg1Type->castAs<VectorType>();
3004 QualType ElemQT = VecTy->getElementType();
3005 unsigned NumElems = VecTy->getNumElements();
3006
3007 assert(ElemQT == Arg2Type->castAs<VectorType>()->getElementType() &&
3008 ElemQT == Arg3Type->castAs<VectorType>()->getElementType());
3009 assert(NumElems == Arg2Type->castAs<VectorType>()->getNumElements() &&
3010 NumElems == Arg3Type->castAs<VectorType>()->getNumElements());
3011 assert(ElemQT->isRealFloatingType());
3012 (void)ElemQT;
3013
3014 const Pointer &VZ = S.Stk.pop<Pointer>();
3015 const Pointer &VY = S.Stk.pop<Pointer>();
3016 const Pointer &VX = S.Stk.pop<Pointer>();
3017 const Pointer &Dst = S.Stk.peek<Pointer>();
3018 for (unsigned I = 0; I != NumElems; ++I) {
3019 using T = PrimConv<PT_Float>::T;
3020 APFloat X = VX.elem<T>(I).getAPFloat();
3021 APFloat Y = VY.elem<T>(I).getAPFloat();
3022 APFloat Z = VZ.elem<T>(I).getAPFloat();
3023 APFloat F = Fn(X, Y, Z, RM);
3024 Dst.elem<Floating>(I) = Floating(F);
3025 }
3026 Dst.initializeAllElements();
3027 return true;
3028}
3029
3030/// AVX512 predicated move: "Result = Mask[] ? LHS[] : RHS[]".
3031static bool interp__builtin_select(InterpState &S, CodePtr OpPC,
3032 const CallExpr *Call) {
3033 const Pointer &RHS = S.Stk.pop<Pointer>();
3034 const Pointer &LHS = S.Stk.pop<Pointer>();
3035 APSInt Mask = popToAPSInt(S, E: Call->getArg(Arg: 0));
3036 const Pointer &Dst = S.Stk.peek<Pointer>();
3037
3038 assert(LHS.getNumElems() == RHS.getNumElems());
3039 assert(LHS.getNumElems() == Dst.getNumElems());
3040 unsigned NumElems = LHS.getNumElems();
3041 PrimType ElemT = LHS.getFieldDesc()->getPrimType();
3042 PrimType DstElemT = Dst.getFieldDesc()->getPrimType();
3043
3044 for (unsigned I = 0; I != NumElems; ++I) {
3045 if (ElemT == PT_Float) {
3046 assert(DstElemT == PT_Float);
3047 Dst.elem<Floating>(I) =
3048 Mask[I] ? LHS.elem<Floating>(I) : RHS.elem<Floating>(I);
3049 } else {
3050 APSInt Elem;
3051 INT_TYPE_SWITCH(ElemT, {
3052 Elem = Mask[I] ? LHS.elem<T>(I).toAPSInt() : RHS.elem<T>(I).toAPSInt();
3053 });
3054 INT_TYPE_SWITCH_NO_BOOL(DstElemT,
3055 { Dst.elem<T>(I) = static_cast<T>(Elem); });
3056 }
3057 }
3058 Dst.initializeAllElements();
3059
3060 return true;
3061}
3062
3063/// Scalar variant of AVX512 predicated select:
3064/// Result[i] = (Mask bit 0) ? LHS[i] : RHS[i], but only element 0 may change.
3065/// All other elements are taken from RHS.
3066static bool interp__builtin_select_scalar(InterpState &S,
3067 const CallExpr *Call) {
3068 unsigned N =
3069 Call->getArg(Arg: 1)->getType()->castAs<VectorType>()->getNumElements();
3070
3071 const Pointer &W = S.Stk.pop<Pointer>();
3072 const Pointer &A = S.Stk.pop<Pointer>();
3073 APSInt U = popToAPSInt(S, E: Call->getArg(Arg: 0));
3074 const Pointer &Dst = S.Stk.peek<Pointer>();
3075
3076 bool TakeA0 = U.getZExtValue() & 1ULL;
3077
3078 for (unsigned I = TakeA0; I != N; ++I)
3079 Dst.elem<Floating>(I) = W.elem<Floating>(I);
3080 if (TakeA0)
3081 Dst.elem<Floating>(I: 0) = A.elem<Floating>(I: 0);
3082
3083 Dst.initializeAllElements();
3084 return true;
3085}
3086
3087static bool interp__builtin_ia32_test_op(
3088 InterpState &S, CodePtr OpPC, const CallExpr *Call,
3089 llvm::function_ref<bool(const APInt &A, const APInt &B)> Fn) {
3090 const Pointer &RHS = S.Stk.pop<Pointer>();
3091 const Pointer &LHS = S.Stk.pop<Pointer>();
3092
3093 assert(LHS.getNumElems() == RHS.getNumElems());
3094
3095 unsigned SourceLen = LHS.getNumElems();
3096 QualType ElemQT = getElemType(P: LHS);
3097 OptPrimType ElemPT = S.getContext().classify(T: ElemQT);
3098 unsigned LaneWidth = S.getASTContext().getTypeSize(T: ElemQT);
3099
3100 APInt AWide(LaneWidth * SourceLen, 0);
3101 APInt BWide(LaneWidth * SourceLen, 0);
3102
3103 for (unsigned I = 0; I != SourceLen; ++I) {
3104 APInt ALane;
3105 APInt BLane;
3106
3107 if (ElemQT->isIntegerType()) { // Get value.
3108 INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
3109 ALane = LHS.elem<T>(I).toAPSInt();
3110 BLane = RHS.elem<T>(I).toAPSInt();
3111 });
3112 } else if (ElemQT->isFloatingType()) { // Get only sign bit.
3113 using T = PrimConv<PT_Float>::T;
3114 ALane = LHS.elem<T>(I).getAPFloat().bitcastToAPInt().isNegative();
3115 BLane = RHS.elem<T>(I).getAPFloat().bitcastToAPInt().isNegative();
3116 } else { // Must be integer or floating type.
3117 return false;
3118 }
3119 AWide.insertBits(SubBits: ALane, bitPosition: I * LaneWidth);
3120 BWide.insertBits(SubBits: BLane, bitPosition: I * LaneWidth);
3121 }
3122 pushInteger(S, Val: Fn(AWide, BWide), QT: Call->getType());
3123 return true;
3124}
3125
3126static bool interp__builtin_ia32_movmsk_op(InterpState &S, CodePtr OpPC,
3127 const CallExpr *Call) {
3128 assert(Call->getNumArgs() == 1);
3129
3130 const Pointer &Source = S.Stk.pop<Pointer>();
3131
3132 unsigned SourceLen = Source.getNumElems();
3133 QualType ElemQT = getElemType(P: Source);
3134 OptPrimType ElemT = S.getContext().classify(T: ElemQT);
3135 unsigned ResultLen =
3136 S.getASTContext().getTypeSize(T: Call->getType()); // Always 32-bit integer.
3137 APInt Result(ResultLen, 0);
3138
3139 for (unsigned I = 0; I != SourceLen; ++I) {
3140 APInt Elem;
3141 if (ElemQT->isIntegerType()) {
3142 INT_TYPE_SWITCH_NO_BOOL(*ElemT, { Elem = Source.elem<T>(I).toAPSInt(); });
3143 } else if (ElemQT->isRealFloatingType()) {
3144 using T = PrimConv<PT_Float>::T;
3145 Elem = Source.elem<T>(I).getAPFloat().bitcastToAPInt();
3146 } else {
3147 return false;
3148 }
3149 Result.setBitVal(BitPosition: I, BitValue: Elem.isNegative());
3150 }
3151 pushInteger(S, Val: Result, QT: Call->getType());
3152 return true;
3153}
3154
3155static bool interp__builtin_elementwise_triop(
3156 InterpState &S, CodePtr OpPC, const CallExpr *Call,
3157 llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
3158 Fn) {
3159 assert(Call->getNumArgs() == 3);
3160
3161 QualType Arg0Type = Call->getArg(Arg: 0)->getType();
3162 QualType Arg2Type = Call->getArg(Arg: 2)->getType();
3163 // Non-vector integer types.
3164 if (!Arg0Type->isVectorType()) {
3165 const APSInt &Op2 = popToAPSInt(S, T: Arg2Type);
3166 const APSInt &Op1 = popToAPSInt(S, E: Call->getArg(Arg: 1));
3167 const APSInt &Op0 = popToAPSInt(S, T: Arg0Type);
3168 APSInt Result = APSInt(Fn(Op0, Op1, Op2), Op0.isUnsigned());
3169 pushInteger(S, Val: Result, QT: Call->getType());
3170 return true;
3171 }
3172
3173 const auto *VecT = Arg0Type->castAs<VectorType>();
3174 PrimType ElemT = *S.getContext().classify(T: VecT->getElementType());
3175 unsigned NumElems = VecT->getNumElements();
3176 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
3177
3178 // Vector + Vector + Scalar case.
3179 if (!Arg2Type->isVectorType()) {
3180 APSInt Op2 = popToAPSInt(S, T: Arg2Type);
3181
3182 const Pointer &Op1 = S.Stk.pop<Pointer>();
3183 const Pointer &Op0 = S.Stk.pop<Pointer>();
3184 const Pointer &Dst = S.Stk.peek<Pointer>();
3185 for (unsigned I = 0; I != NumElems; ++I) {
3186 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3187 Dst.elem<T>(I) = static_cast<T>(APSInt(
3188 Fn(Op0.elem<T>(I).toAPSInt(), Op1.elem<T>(I).toAPSInt(), Op2),
3189 DestUnsigned));
3190 });
3191 }
3192 Dst.initializeAllElements();
3193
3194 return true;
3195 }
3196
3197 // Vector type.
3198 const Pointer &Op2 = S.Stk.pop<Pointer>();
3199 const Pointer &Op1 = S.Stk.pop<Pointer>();
3200 const Pointer &Op0 = S.Stk.pop<Pointer>();
3201 const Pointer &Dst = S.Stk.peek<Pointer>();
3202 for (unsigned I = 0; I != NumElems; ++I) {
3203 APSInt Val0, Val1, Val2;
3204 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3205 Val0 = Op0.elem<T>(I).toAPSInt();
3206 Val1 = Op1.elem<T>(I).toAPSInt();
3207 Val2 = Op2.elem<T>(I).toAPSInt();
3208 });
3209 APSInt Result = APSInt(Fn(Val0, Val1, Val2), Val0.isUnsigned());
3210 INT_TYPE_SWITCH_NO_BOOL(ElemT,
3211 { Dst.elem<T>(I) = static_cast<T>(Result); });
3212 }
3213 Dst.initializeAllElements();
3214
3215 return true;
3216}
3217
3218static bool interp__builtin_x86_extract_vector(InterpState &S, CodePtr OpPC,
3219 const CallExpr *Call,
3220 unsigned ID) {
3221 assert(Call->getNumArgs() == 2);
3222
3223 APSInt ImmAPS = popToAPSInt(S, E: Call->getArg(Arg: 1));
3224 uint64_t Index = ImmAPS.getZExtValue();
3225
3226 const Pointer &Src = S.Stk.pop<Pointer>();
3227 if (!Src.getFieldDesc()->isPrimitiveArray())
3228 return false;
3229
3230 const Pointer &Dst = S.Stk.peek<Pointer>();
3231 if (!Dst.getFieldDesc()->isPrimitiveArray())
3232 return false;
3233
3234 unsigned SrcElems = Src.getNumElems();
3235 unsigned DstElems = Dst.getNumElems();
3236
3237 unsigned NumLanes = SrcElems / DstElems;
3238 unsigned Lane = static_cast<unsigned>(Index % NumLanes);
3239 unsigned ExtractPos = Lane * DstElems;
3240
3241 PrimType ElemT = Src.getFieldDesc()->getPrimType();
3242
3243 TYPE_SWITCH(ElemT, {
3244 for (unsigned I = 0; I != DstElems; ++I) {
3245 Dst.elem<T>(I) = Src.elem<T>(ExtractPos + I);
3246 }
3247 });
3248
3249 Dst.initializeAllElements();
3250 return true;
3251}
3252
3253static bool interp__builtin_x86_extract_vector_masked(InterpState &S,
3254 CodePtr OpPC,
3255 const CallExpr *Call,
3256 unsigned ID) {
3257 assert(Call->getNumArgs() == 4);
3258
3259 APSInt MaskAPS = popToAPSInt(S, E: Call->getArg(Arg: 3));
3260 const Pointer &Merge = S.Stk.pop<Pointer>();
3261 APSInt ImmAPS = popToAPSInt(S, E: Call->getArg(Arg: 1));
3262 const Pointer &Src = S.Stk.pop<Pointer>();
3263
3264 if (!Src.getFieldDesc()->isPrimitiveArray() ||
3265 !Merge.getFieldDesc()->isPrimitiveArray())
3266 return false;
3267
3268 const Pointer &Dst = S.Stk.peek<Pointer>();
3269 if (!Dst.getFieldDesc()->isPrimitiveArray())
3270 return false;
3271
3272 unsigned SrcElems = Src.getNumElems();
3273 unsigned DstElems = Dst.getNumElems();
3274
3275 unsigned NumLanes = SrcElems / DstElems;
3276 unsigned Lane = static_cast<unsigned>(ImmAPS.getZExtValue() % NumLanes);
3277 unsigned Base = Lane * DstElems;
3278
3279 PrimType ElemT = Src.getFieldDesc()->getPrimType();
3280
3281 TYPE_SWITCH(ElemT, {
3282 for (unsigned I = 0; I != DstElems; ++I) {
3283 if (MaskAPS[I])
3284 Dst.elem<T>(I) = Src.elem<T>(Base + I);
3285 else
3286 Dst.elem<T>(I) = Merge.elem<T>(I);
3287 }
3288 });
3289
3290 Dst.initializeAllElements();
3291 return true;
3292}
3293
3294static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
3295 const CallExpr *Call,
3296 unsigned ID) {
3297 assert(Call->getNumArgs() == 3);
3298
3299 APSInt ImmAPS = popToAPSInt(S, E: Call->getArg(Arg: 2));
3300 uint64_t Index = ImmAPS.getZExtValue();
3301
3302 const Pointer &SubVec = S.Stk.pop<Pointer>();
3303 if (!SubVec.getFieldDesc()->isPrimitiveArray())
3304 return false;
3305
3306 const Pointer &BaseVec = S.Stk.pop<Pointer>();
3307 if (!BaseVec.getFieldDesc()->isPrimitiveArray())
3308 return false;
3309
3310 const Pointer &Dst = S.Stk.peek<Pointer>();
3311
3312 unsigned BaseElements = BaseVec.getNumElems();
3313 unsigned SubElements = SubVec.getNumElems();
3314
3315 assert(SubElements != 0 && BaseElements != 0 &&
3316 (BaseElements % SubElements) == 0);
3317
3318 unsigned NumLanes = BaseElements / SubElements;
3319 unsigned Lane = static_cast<unsigned>(Index % NumLanes);
3320 unsigned InsertPos = Lane * SubElements;
3321
3322 PrimType ElemT = BaseVec.getFieldDesc()->getPrimType();
3323
3324 TYPE_SWITCH(ElemT, {
3325 for (unsigned I = 0; I != BaseElements; ++I)
3326 Dst.elem<T>(I) = BaseVec.elem<T>(I);
3327 for (unsigned I = 0; I != SubElements; ++I)
3328 Dst.elem<T>(InsertPos + I) = SubVec.elem<T>(I);
3329 });
3330
3331 Dst.initializeAllElements();
3332 return true;
3333}
3334
3335static bool interp__builtin_ia32_phminposuw(InterpState &S, CodePtr OpPC,
3336 const CallExpr *Call) {
3337 assert(Call->getNumArgs() == 1);
3338
3339 const Pointer &Source = S.Stk.pop<Pointer>();
3340 const Pointer &Dest = S.Stk.peek<Pointer>();
3341
3342 unsigned SourceLen = Source.getNumElems();
3343 QualType ElemQT = getElemType(P: Source);
3344 OptPrimType ElemT = S.getContext().classify(T: ElemQT);
3345 unsigned ElemBitWidth = S.getASTContext().getTypeSize(T: ElemQT);
3346
3347 bool DestUnsigned = Call->getCallReturnType(Ctx: S.getASTContext())
3348 ->castAs<VectorType>()
3349 ->getElementType()
3350 ->isUnsignedIntegerOrEnumerationType();
3351
3352 INT_TYPE_SWITCH_NO_BOOL(*ElemT, {
3353 APSInt MinIndex(ElemBitWidth, DestUnsigned);
3354 APSInt MinVal = Source.elem<T>(0).toAPSInt();
3355
3356 for (unsigned I = 1; I != SourceLen; ++I) {
3357 APSInt Val = Source.elem<T>(I).toAPSInt();
3358 if (MinVal.ugt(Val)) {
3359 MinVal = Val;
3360 MinIndex = I;
3361 }
3362 }
3363
3364 Dest.elem<T>(0) = static_cast<T>(MinVal);
3365 Dest.elem<T>(1) = static_cast<T>(MinIndex);
3366 for (unsigned I = 2; I != SourceLen; ++I) {
3367 Dest.elem<T>(I) = static_cast<T>(APSInt(ElemBitWidth, DestUnsigned));
3368 }
3369 });
3370 Dest.initializeAllElements();
3371 return true;
3372}
3373
3374static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC,
3375 const CallExpr *Call, bool MaskZ) {
3376 assert(Call->getNumArgs() == 5);
3377
3378 APInt U = popToAPSInt(S, E: Call->getArg(Arg: 4)); // Lane mask
3379 APInt Imm = popToAPSInt(S, E: Call->getArg(Arg: 3)); // Ternary truth table
3380 const Pointer &C = S.Stk.pop<Pointer>();
3381 const Pointer &B = S.Stk.pop<Pointer>();
3382 const Pointer &A = S.Stk.pop<Pointer>();
3383 const Pointer &Dst = S.Stk.peek<Pointer>();
3384
3385 unsigned DstLen = A.getNumElems();
3386 QualType ElemQT = getElemType(P: A);
3387 OptPrimType ElemT = S.getContext().classify(T: ElemQT);
3388 unsigned LaneWidth = S.getASTContext().getTypeSize(T: ElemQT);
3389 bool DstUnsigned = ElemQT->isUnsignedIntegerOrEnumerationType();
3390
3391 INT_TYPE_SWITCH_NO_BOOL(*ElemT, {
3392 for (unsigned I = 0; I != DstLen; ++I) {
3393 APInt ALane = A.elem<T>(I).toAPSInt();
3394 APInt BLane = B.elem<T>(I).toAPSInt();
3395 APInt CLane = C.elem<T>(I).toAPSInt();
3396 APInt RLane(LaneWidth, 0);
3397 if (U[I]) { // If lane not masked, compute ternary logic.
3398 for (unsigned Bit = 0; Bit != LaneWidth; ++Bit) {
3399 unsigned ABit = ALane[Bit];
3400 unsigned BBit = BLane[Bit];
3401 unsigned CBit = CLane[Bit];
3402 unsigned Idx = (ABit << 2) | (BBit << 1) | (CBit);
3403 RLane.setBitVal(Bit, Imm[Idx]);
3404 }
3405 Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
3406 } else if (MaskZ) { // If zero masked, zero the lane.
3407 Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
3408 } else { // Just masked, put in A lane.
3409 Dst.elem<T>(I) = static_cast<T>(APSInt(ALane, DstUnsigned));
3410 }
3411 }
3412 });
3413 Dst.initializeAllElements();
3414 return true;
3415}
3416
3417static bool interp__builtin_vec_ext(InterpState &S, CodePtr OpPC,
3418 const CallExpr *Call, unsigned ID) {
3419 assert(Call->getNumArgs() == 2);
3420
3421 APSInt ImmAPS = popToAPSInt(S, E: Call->getArg(Arg: 1));
3422 const Pointer &Vec = S.Stk.pop<Pointer>();
3423 if (!Vec.getFieldDesc()->isPrimitiveArray())
3424 return false;
3425
3426 unsigned NumElems = Vec.getNumElems();
3427 unsigned Index =
3428 static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElems - 1));
3429
3430 PrimType ElemT = Vec.getFieldDesc()->getPrimType();
3431 // FIXME(#161685): Replace float+int split with a numeric-only type switch
3432 if (ElemT == PT_Float) {
3433 S.Stk.push<Floating>(Args&: Vec.elem<Floating>(I: Index));
3434 return true;
3435 }
3436 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3437 APSInt V = Vec.elem<T>(Index).toAPSInt();
3438 pushInteger(S, V, Call->getType());
3439 });
3440
3441 return true;
3442}
3443
3444static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC,
3445 const CallExpr *Call, unsigned ID) {
3446 assert(Call->getNumArgs() == 3);
3447
3448 APSInt ImmAPS = popToAPSInt(S, E: Call->getArg(Arg: 2));
3449 APSInt ValAPS = popToAPSInt(S, E: Call->getArg(Arg: 1));
3450
3451 const Pointer &Base = S.Stk.pop<Pointer>();
3452 if (!Base.getFieldDesc()->isPrimitiveArray())
3453 return false;
3454
3455 const Pointer &Dst = S.Stk.peek<Pointer>();
3456
3457 unsigned NumElems = Base.getNumElems();
3458 unsigned Index =
3459 static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElems - 1));
3460
3461 PrimType ElemT = Base.getFieldDesc()->getPrimType();
3462 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3463 for (unsigned I = 0; I != NumElems; ++I)
3464 Dst.elem<T>(I) = Base.elem<T>(I);
3465 Dst.elem<T>(Index) = static_cast<T>(ValAPS);
3466 });
3467
3468 Dst.initializeAllElements();
3469 return true;
3470}
3471
3472static bool evalICmpImm(uint8_t Imm, const APSInt &A, const APSInt &B,
3473 bool IsUnsigned) {
3474 switch (Imm & 0x7) {
3475 case 0x00: // _MM_CMPINT_EQ
3476 return (A == B);
3477 case 0x01: // _MM_CMPINT_LT
3478 return IsUnsigned ? A.ult(RHS: B) : A.slt(RHS: B);
3479 case 0x02: // _MM_CMPINT_LE
3480 return IsUnsigned ? A.ule(RHS: B) : A.sle(RHS: B);
3481 case 0x03: // _MM_CMPINT_FALSE
3482 return false;
3483 case 0x04: // _MM_CMPINT_NE
3484 return (A != B);
3485 case 0x05: // _MM_CMPINT_NLT
3486 return IsUnsigned ? A.ugt(RHS: B) : A.sgt(RHS: B);
3487 case 0x06: // _MM_CMPINT_NLE
3488 return IsUnsigned ? A.uge(RHS: B) : A.sge(RHS: B);
3489 case 0x07: // _MM_CMPINT_TRUE
3490 return true;
3491 default:
3492 llvm_unreachable("Invalid Op");
3493 }
3494}
3495
3496static bool interp__builtin_ia32_cmp_mask(InterpState &S, CodePtr OpPC,
3497 const CallExpr *Call, unsigned ID,
3498 bool IsUnsigned) {
3499 assert(Call->getNumArgs() == 4);
3500
3501 APSInt Mask = popToAPSInt(S, E: Call->getArg(Arg: 3));
3502 APSInt Opcode = popToAPSInt(S, E: Call->getArg(Arg: 2));
3503 unsigned CmpOp = static_cast<unsigned>(Opcode.getZExtValue());
3504 const Pointer &RHS = S.Stk.pop<Pointer>();
3505 const Pointer &LHS = S.Stk.pop<Pointer>();
3506
3507 assert(LHS.getNumElems() == RHS.getNumElems());
3508
3509 APInt RetMask = APInt::getZero(numBits: LHS.getNumElems());
3510 unsigned VectorLen = LHS.getNumElems();
3511 PrimType ElemT = LHS.getFieldDesc()->getPrimType();
3512
3513 for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
3514 APSInt A, B;
3515 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3516 A = LHS.elem<T>(ElemNum).toAPSInt();
3517 B = RHS.elem<T>(ElemNum).toAPSInt();
3518 });
3519 RetMask.setBitVal(BitPosition: ElemNum,
3520 BitValue: Mask[ElemNum] && evalICmpImm(Imm: CmpOp, A, B, IsUnsigned));
3521 }
3522 pushInteger(S, Val: RetMask, QT: Call->getType());
3523 return true;
3524}
3525
3526static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
3527 const CallExpr *Call) {
3528 assert(Call->getNumArgs() == 1);
3529
3530 QualType Arg0Type = Call->getArg(Arg: 0)->getType();
3531 const auto *VecT = Arg0Type->castAs<VectorType>();
3532 PrimType ElemT = *S.getContext().classify(T: VecT->getElementType());
3533 unsigned NumElems = VecT->getNumElements();
3534 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
3535 const Pointer &Src = S.Stk.pop<Pointer>();
3536 const Pointer &Dst = S.Stk.peek<Pointer>();
3537
3538 for (unsigned I = 0; I != NumElems; ++I) {
3539 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3540 APSInt ElemI = Src.elem<T>(I).toAPSInt();
3541 APInt ConflictMask(ElemI.getBitWidth(), 0);
3542 for (unsigned J = 0; J != I; ++J) {
3543 APSInt ElemJ = Src.elem<T>(J).toAPSInt();
3544 ConflictMask.setBitVal(J, ElemI == ElemJ);
3545 }
3546 Dst.elem<T>(I) = static_cast<T>(APSInt(ConflictMask, DestUnsigned));
3547 });
3548 }
3549 Dst.initializeAllElements();
3550 return true;
3551}
3552
3553static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC,
3554 const CallExpr *Call,
3555 unsigned ID) {
3556 assert(Call->getNumArgs() == 1);
3557
3558 const Pointer &Vec = S.Stk.pop<Pointer>();
3559 unsigned RetWidth = S.getASTContext().getIntWidth(T: Call->getType());
3560 APInt RetMask(RetWidth, 0);
3561
3562 unsigned VectorLen = Vec.getNumElems();
3563 PrimType ElemT = Vec.getFieldDesc()->getPrimType();
3564
3565 for (unsigned ElemNum = 0; ElemNum != VectorLen; ++ElemNum) {
3566 APSInt A;
3567 INT_TYPE_SWITCH_NO_BOOL(ElemT, { A = Vec.elem<T>(ElemNum).toAPSInt(); });
3568 unsigned MSB = A[A.getBitWidth() - 1];
3569 RetMask.setBitVal(BitPosition: ElemNum, BitValue: MSB);
3570 }
3571 pushInteger(S, Val: RetMask, QT: Call->getType());
3572 return true;
3573}
3574
3575static bool interp__builtin_ia32_cvt_mask2vec(InterpState &S, CodePtr OpPC,
3576 const CallExpr *Call,
3577 unsigned ID) {
3578 assert(Call->getNumArgs() == 1);
3579
3580 APSInt Mask = popToAPSInt(S, E: Call->getArg(Arg: 0));
3581
3582 const Pointer &Vec = S.Stk.peek<Pointer>();
3583 unsigned NumElems = Vec.getNumElems();
3584 PrimType ElemT = Vec.getFieldDesc()->getPrimType();
3585
3586 for (unsigned I = 0; I != NumElems; ++I) {
3587 bool BitSet = Mask[I];
3588
3589 INT_TYPE_SWITCH_NO_BOOL(
3590 ElemT, { Vec.elem<T>(I) = BitSet ? T::from(-1) : T::from(0); });
3591 }
3592
3593 Vec.initializeAllElements();
3594
3595 return true;
3596}
3597
3598static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
3599 const CallExpr *Call,
3600 bool HasRoundingMask) {
3601 APSInt Rounding, MaskInt;
3602 Pointer Src, B, A;
3603
3604 if (HasRoundingMask) {
3605 assert(Call->getNumArgs() == 5);
3606 Rounding = popToAPSInt(S, E: Call->getArg(Arg: 4));
3607 MaskInt = popToAPSInt(S, E: Call->getArg(Arg: 3));
3608 Src = S.Stk.pop<Pointer>();
3609 B = S.Stk.pop<Pointer>();
3610 A = S.Stk.pop<Pointer>();
3611 if (!CheckLoad(S, OpPC, Ptr: A) || !CheckLoad(S, OpPC, Ptr: B) ||
3612 !CheckLoad(S, OpPC, Ptr: Src))
3613 return false;
3614 } else {
3615 assert(Call->getNumArgs() == 2);
3616 B = S.Stk.pop<Pointer>();
3617 A = S.Stk.pop<Pointer>();
3618 if (!CheckLoad(S, OpPC, Ptr: A) || !CheckLoad(S, OpPC, Ptr: B))
3619 return false;
3620 }
3621
3622 const auto *DstVTy = Call->getType()->castAs<VectorType>();
3623 unsigned NumElems = DstVTy->getNumElements();
3624 const Pointer &Dst = S.Stk.peek<Pointer>();
3625
3626 // Copy all elements except lane 0 (overwritten below) from A to Dst.
3627 for (unsigned I = 1; I != NumElems; ++I)
3628 Dst.elem<Floating>(I) = A.elem<Floating>(I);
3629
3630 // Convert element 0 from double to float, or use Src if masked off.
3631 if (!HasRoundingMask || (MaskInt.getZExtValue() & 0x1)) {
3632 assert(S.getASTContext().FloatTy == DstVTy->getElementType() &&
3633 "cvtsd2ss requires float element type in destination vector");
3634
3635 Floating Conv = S.allocFloat(
3636 Sem: S.getASTContext().getFloatTypeSemantics(T: DstVTy->getElementType()));
3637 APFloat SrcVal = B.elem<Floating>(I: 0).getAPFloat();
3638 if (!convertDoubleToFloatStrict(Src: SrcVal, Dst&: Conv, S, DiagExpr: Call))
3639 return false;
3640 Dst.elem<Floating>(I: 0) = Conv;
3641 } else {
3642 Dst.elem<Floating>(I: 0) = Src.elem<Floating>(I: 0);
3643 }
3644
3645 Dst.initializeAllElements();
3646 return true;
3647}
3648
3649static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
3650 const CallExpr *Call, bool IsMasked,
3651 bool HasRounding) {
3652
3653 APSInt MaskVal;
3654 Pointer PassThrough;
3655 Pointer Src;
3656 APSInt Rounding;
3657
3658 if (IsMasked) {
3659 // Pop in reverse order.
3660 if (HasRounding) {
3661 Rounding = popToAPSInt(S, E: Call->getArg(Arg: 3));
3662 MaskVal = popToAPSInt(S, E: Call->getArg(Arg: 2));
3663 PassThrough = S.Stk.pop<Pointer>();
3664 Src = S.Stk.pop<Pointer>();
3665 } else {
3666 MaskVal = popToAPSInt(S, E: Call->getArg(Arg: 2));
3667 PassThrough = S.Stk.pop<Pointer>();
3668 Src = S.Stk.pop<Pointer>();
3669 }
3670
3671 if (!CheckLoad(S, OpPC, Ptr: PassThrough))
3672 return false;
3673 } else {
3674 // Pop source only.
3675 Src = S.Stk.pop<Pointer>();
3676 }
3677
3678 if (!CheckLoad(S, OpPC, Ptr: Src))
3679 return false;
3680
3681 const auto *RetVTy = Call->getType()->castAs<VectorType>();
3682 unsigned RetElems = RetVTy->getNumElements();
3683 unsigned SrcElems = Src.getNumElems();
3684 const Pointer &Dst = S.Stk.peek<Pointer>();
3685
3686 // Initialize destination with passthrough or zeros.
3687 for (unsigned I = 0; I != RetElems; ++I)
3688 if (IsMasked)
3689 Dst.elem<Floating>(I) = PassThrough.elem<Floating>(I);
3690 else
3691 Dst.elem<Floating>(I) = Floating(APFloat(0.0f));
3692
3693 assert(S.getASTContext().FloatTy == RetVTy->getElementType() &&
3694 "cvtpd2ps requires float element type in return vector");
3695
3696 // Convert double to float for enabled elements (only process source elements
3697 // that exist).
3698 for (unsigned I = 0; I != SrcElems; ++I) {
3699 if (IsMasked && !MaskVal[I])
3700 continue;
3701
3702 APFloat SrcVal = Src.elem<Floating>(I).getAPFloat();
3703
3704 Floating Conv = S.allocFloat(
3705 Sem: S.getASTContext().getFloatTypeSemantics(T: RetVTy->getElementType()));
3706 if (!convertDoubleToFloatStrict(Src: SrcVal, Dst&: Conv, S, DiagExpr: Call))
3707 return false;
3708 Dst.elem<Floating>(I) = Conv;
3709 }
3710
3711 Dst.initializeAllElements();
3712 return true;
3713}
3714
3715static bool interp__builtin_ia32_shuffle_generic(
3716 InterpState &S, CodePtr OpPC, const CallExpr *Call,
3717 llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)>
3718 GetSourceIndex) {
3719
3720 assert(Call->getNumArgs() == 2 || Call->getNumArgs() == 3);
3721
3722 unsigned ShuffleMask = 0;
3723 Pointer A, MaskVector, B;
3724 bool IsVectorMask = false;
3725 bool IsSingleOperand = (Call->getNumArgs() == 2);
3726
3727 if (IsSingleOperand) {
3728 QualType MaskType = Call->getArg(Arg: 1)->getType();
3729 if (MaskType->isVectorType()) {
3730 IsVectorMask = true;
3731 MaskVector = S.Stk.pop<Pointer>();
3732 A = S.Stk.pop<Pointer>();
3733 B = A;
3734 } else if (MaskType->isIntegerType()) {
3735 ShuffleMask = popToAPSInt(S, E: Call->getArg(Arg: 1)).getZExtValue();
3736 A = S.Stk.pop<Pointer>();
3737 B = A;
3738 } else {
3739 return false;
3740 }
3741 } else {
3742 QualType Arg2Type = Call->getArg(Arg: 2)->getType();
3743 if (Arg2Type->isVectorType()) {
3744 IsVectorMask = true;
3745 B = S.Stk.pop<Pointer>();
3746 MaskVector = S.Stk.pop<Pointer>();
3747 A = S.Stk.pop<Pointer>();
3748 } else if (Arg2Type->isIntegerType()) {
3749 ShuffleMask = popToAPSInt(S, E: Call->getArg(Arg: 2)).getZExtValue();
3750 B = S.Stk.pop<Pointer>();
3751 A = S.Stk.pop<Pointer>();
3752 } else {
3753 return false;
3754 }
3755 }
3756
3757 QualType Arg0Type = Call->getArg(Arg: 0)->getType();
3758 const auto *VecT = Arg0Type->castAs<VectorType>();
3759 PrimType ElemT = *S.getContext().classify(T: VecT->getElementType());
3760 unsigned NumElems = VecT->getNumElements();
3761
3762 const Pointer &Dst = S.Stk.peek<Pointer>();
3763
3764 PrimType MaskElemT = PT_Uint32;
3765 if (IsVectorMask) {
3766 QualType Arg1Type = Call->getArg(Arg: 1)->getType();
3767 const auto *MaskVecT = Arg1Type->castAs<VectorType>();
3768 QualType MaskElemType = MaskVecT->getElementType();
3769 MaskElemT = *S.getContext().classify(T: MaskElemType);
3770 }
3771
3772 for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) {
3773 if (IsVectorMask) {
3774 INT_TYPE_SWITCH(MaskElemT, {
3775 ShuffleMask = static_cast<unsigned>(MaskVector.elem<T>(DstIdx));
3776 });
3777 }
3778
3779 auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
3780
3781 if (SrcIdx < 0) {
3782 // Zero out this element
3783 if (ElemT == PT_Float) {
3784 Dst.elem<Floating>(I: DstIdx) = Floating(
3785 S.getASTContext().getFloatTypeSemantics(T: VecT->getElementType()));
3786 } else {
3787 INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(DstIdx) = T::from(0); });
3788 }
3789 } else {
3790 const Pointer &Src = (SrcVecIdx == 0) ? A : B;
3791 TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); });
3792 }
3793 }
3794 Dst.initializeAllElements();
3795
3796 return true;
3797}
3798
3799static bool interp__builtin_ia32_shift_with_count(
3800 InterpState &S, CodePtr OpPC, const CallExpr *Call,
3801 llvm::function_ref<APInt(const APInt &, uint64_t)> ShiftOp,
3802 llvm::function_ref<APInt(const APInt &, unsigned)> OverflowOp) {
3803
3804 assert(Call->getNumArgs() == 2);
3805
3806 const Pointer &Count = S.Stk.pop<Pointer>();
3807 const Pointer &Source = S.Stk.pop<Pointer>();
3808
3809 QualType SourceType = Call->getArg(Arg: 0)->getType();
3810 QualType CountType = Call->getArg(Arg: 1)->getType();
3811 assert(SourceType->isVectorType() && CountType->isVectorType());
3812
3813 const auto *SourceVecT = SourceType->castAs<VectorType>();
3814 const auto *CountVecT = CountType->castAs<VectorType>();
3815 PrimType SourceElemT = *S.getContext().classify(T: SourceVecT->getElementType());
3816 PrimType CountElemT = *S.getContext().classify(T: CountVecT->getElementType());
3817
3818 const Pointer &Dst = S.Stk.peek<Pointer>();
3819
3820 unsigned DestEltWidth =
3821 S.getASTContext().getTypeSize(T: SourceVecT->getElementType());
3822 bool IsDestUnsigned = SourceVecT->getElementType()->isUnsignedIntegerType();
3823 unsigned DestLen = SourceVecT->getNumElements();
3824 unsigned CountEltWidth =
3825 S.getASTContext().getTypeSize(T: CountVecT->getElementType());
3826 unsigned NumBitsInQWord = 64;
3827 unsigned NumCountElts = NumBitsInQWord / CountEltWidth;
3828
3829 uint64_t CountLQWord = 0;
3830 for (unsigned EltIdx = 0; EltIdx != NumCountElts; ++EltIdx) {
3831 uint64_t Elt = 0;
3832 INT_TYPE_SWITCH(CountElemT,
3833 { Elt = static_cast<uint64_t>(Count.elem<T>(EltIdx)); });
3834 CountLQWord |= (Elt << (EltIdx * CountEltWidth));
3835 }
3836
3837 for (unsigned EltIdx = 0; EltIdx != DestLen; ++EltIdx) {
3838 APSInt Elt;
3839 INT_TYPE_SWITCH(SourceElemT, { Elt = Source.elem<T>(EltIdx).toAPSInt(); });
3840
3841 APInt Result;
3842 if (CountLQWord < DestEltWidth) {
3843 Result = ShiftOp(Elt, CountLQWord);
3844 } else {
3845 Result = OverflowOp(Elt, DestEltWidth);
3846 }
3847 if (IsDestUnsigned) {
3848 INT_TYPE_SWITCH(SourceElemT, {
3849 Dst.elem<T>(EltIdx) = T::from(Result.getZExtValue());
3850 });
3851 } else {
3852 INT_TYPE_SWITCH(SourceElemT, {
3853 Dst.elem<T>(EltIdx) = T::from(Result.getSExtValue());
3854 });
3855 }
3856 }
3857
3858 Dst.initializeAllElements();
3859 return true;
3860}
3861
3862static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC,
3863 const CallExpr *Call) {
3864
3865 assert(Call->getNumArgs() == 3);
3866
3867 QualType SourceType = Call->getArg(Arg: 0)->getType();
3868 QualType ShuffleMaskType = Call->getArg(Arg: 1)->getType();
3869 QualType ZeroMaskType = Call->getArg(Arg: 2)->getType();
3870 if (!SourceType->isVectorType() || !ShuffleMaskType->isVectorType() ||
3871 !ZeroMaskType->isIntegerType()) {
3872 return false;
3873 }
3874
3875 Pointer Source, ShuffleMask;
3876 APSInt ZeroMask = popToAPSInt(S, E: Call->getArg(Arg: 2));
3877 ShuffleMask = S.Stk.pop<Pointer>();
3878 Source = S.Stk.pop<Pointer>();
3879
3880 const auto *SourceVecT = SourceType->castAs<VectorType>();
3881 const auto *ShuffleMaskVecT = ShuffleMaskType->castAs<VectorType>();
3882 assert(SourceVecT->getNumElements() == ShuffleMaskVecT->getNumElements());
3883 assert(ZeroMask.getBitWidth() == SourceVecT->getNumElements());
3884
3885 PrimType SourceElemT = *S.getContext().classify(T: SourceVecT->getElementType());
3886 PrimType ShuffleMaskElemT =
3887 *S.getContext().classify(T: ShuffleMaskVecT->getElementType());
3888
3889 unsigned NumBytesInQWord = 8;
3890 unsigned NumBitsInByte = 8;
3891 unsigned NumBytes = SourceVecT->getNumElements();
3892 unsigned NumQWords = NumBytes / NumBytesInQWord;
3893 unsigned RetWidth = ZeroMask.getBitWidth();
3894 APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true);
3895
3896 for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
3897 APInt SourceQWord(64, 0);
3898 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
3899 uint64_t Byte = 0;
3900 INT_TYPE_SWITCH(SourceElemT, {
3901 Byte = static_cast<uint64_t>(
3902 Source.elem<T>(QWordId * NumBytesInQWord + ByteIdx));
3903 });
3904 SourceQWord.insertBits(SubBits: APInt(8, Byte & 0xFF), bitPosition: ByteIdx * NumBitsInByte);
3905 }
3906
3907 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
3908 unsigned SelIdx = QWordId * NumBytesInQWord + ByteIdx;
3909 unsigned M = 0;
3910 INT_TYPE_SWITCH(ShuffleMaskElemT, {
3911 M = static_cast<unsigned>(ShuffleMask.elem<T>(SelIdx)) & 0x3F;
3912 });
3913
3914 if (ZeroMask[SelIdx]) {
3915 RetMask.setBitVal(BitPosition: SelIdx, BitValue: SourceQWord[M]);
3916 }
3917 }
3918 }
3919
3920 pushInteger(S, Val: RetMask, QT: Call->getType());
3921 return true;
3922}
3923
3924static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
3925 const CallExpr *Call) {
3926 // Arguments are: vector of floats, rounding immediate
3927 assert(Call->getNumArgs() == 2);
3928
3929 APSInt Imm = popToAPSInt(S, E: Call->getArg(Arg: 1));
3930 const Pointer &Src = S.Stk.pop<Pointer>();
3931 const Pointer &Dst = S.Stk.peek<Pointer>();
3932
3933 assert(Src.getFieldDesc()->isPrimitiveArray());
3934 assert(Dst.getFieldDesc()->isPrimitiveArray());
3935
3936 const auto *SrcVTy = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
3937 unsigned SrcNumElems = SrcVTy->getNumElements();
3938 const auto *DstVTy = Call->getType()->castAs<VectorType>();
3939 unsigned DstNumElems = DstVTy->getNumElements();
3940
3941 const llvm::fltSemantics &HalfSem =
3942 S.getASTContext().getFloatTypeSemantics(T: S.getASTContext().HalfTy);
3943
3944 // imm[2] == 1 means use MXCSR rounding mode.
3945 // In that case, we can only evaluate if the conversion is exact.
3946 int ImmVal = Imm.getZExtValue();
3947 bool UseMXCSR = (ImmVal & 4) != 0;
3948 bool IsFPConstrained =
3949 Call->getFPFeaturesInEffect(LO: S.getASTContext().getLangOpts())
3950 .isFPConstrained();
3951
3952 llvm::RoundingMode RM;
3953 if (!UseMXCSR) {
3954 switch (ImmVal & 3) {
3955 case 0:
3956 RM = llvm::RoundingMode::NearestTiesToEven;
3957 break;
3958 case 1:
3959 RM = llvm::RoundingMode::TowardNegative;
3960 break;
3961 case 2:
3962 RM = llvm::RoundingMode::TowardPositive;
3963 break;
3964 case 3:
3965 RM = llvm::RoundingMode::TowardZero;
3966 break;
3967 default:
3968 llvm_unreachable("Invalid immediate rounding mode");
3969 }
3970 } else {
3971 // For MXCSR, we must check for exactness. We can use any rounding mode
3972 // for the trial conversion since the result is the same if it's exact.
3973 RM = llvm::RoundingMode::NearestTiesToEven;
3974 }
3975
3976 QualType DstElemQT = Dst.getFieldDesc()->getElemQualType();
3977 PrimType DstElemT = *S.getContext().classify(T: DstElemQT);
3978
3979 for (unsigned I = 0; I != SrcNumElems; ++I) {
3980 Floating SrcVal = Src.elem<Floating>(I);
3981 APFloat DstVal = SrcVal.getAPFloat();
3982
3983 bool LostInfo;
3984 APFloat::opStatus St = DstVal.convert(ToSemantics: HalfSem, RM, losesInfo: &LostInfo);
3985
3986 if (UseMXCSR && IsFPConstrained && St != APFloat::opOK) {
3987 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
3988 DiagId: diag::note_constexpr_dynamic_rounding);
3989 return false;
3990 }
3991
3992 INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
3993 // Convert the destination value's bit pattern to an unsigned integer,
3994 // then reconstruct the element using the target type's 'from' method.
3995 uint64_t RawBits = DstVal.bitcastToAPInt().getZExtValue();
3996 Dst.elem<T>(I) = T::from(RawBits);
3997 });
3998 }
3999
4000 // Zero out remaining elements if the destination has more elements
4001 // (e.g., vcvtps2ph converting 4 floats to 8 shorts).
4002 if (DstNumElems > SrcNumElems) {
4003 for (unsigned I = SrcNumElems; I != DstNumElems; ++I) {
4004 INT_TYPE_SWITCH_NO_BOOL(DstElemT, { Dst.elem<T>(I) = T::from(0); });
4005 }
4006 }
4007
4008 Dst.initializeAllElements();
4009 return true;
4010}
4011
4012static bool interp__builtin_ia32_multishiftqb(InterpState &S, CodePtr OpPC,
4013 const CallExpr *Call) {
4014 assert(Call->getNumArgs() == 2);
4015
4016 QualType ATy = Call->getArg(Arg: 0)->getType();
4017 QualType BTy = Call->getArg(Arg: 1)->getType();
4018 if (!ATy->isVectorType() || !BTy->isVectorType()) {
4019 return false;
4020 }
4021
4022 const Pointer &BPtr = S.Stk.pop<Pointer>();
4023 const Pointer &APtr = S.Stk.pop<Pointer>();
4024 const auto *AVecT = ATy->castAs<VectorType>();
4025 assert(AVecT->getNumElements() ==
4026 BTy->castAs<VectorType>()->getNumElements());
4027
4028 PrimType ElemT = *S.getContext().classify(T: AVecT->getElementType());
4029
4030 unsigned NumBytesInQWord = 8;
4031 unsigned NumBitsInByte = 8;
4032 unsigned NumBytes = AVecT->getNumElements();
4033 unsigned NumQWords = NumBytes / NumBytesInQWord;
4034 const Pointer &Dst = S.Stk.peek<Pointer>();
4035
4036 for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
4037 APInt BQWord(64, 0);
4038 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
4039 unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
4040 INT_TYPE_SWITCH(ElemT, {
4041 uint64_t Byte = static_cast<uint64_t>(BPtr.elem<T>(Idx));
4042 BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
4043 });
4044 }
4045
4046 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
4047 unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
4048 uint64_t Ctrl = 0;
4049 INT_TYPE_SWITCH(
4050 ElemT, { Ctrl = static_cast<uint64_t>(APtr.elem<T>(Idx)) & 0x3F; });
4051
4052 APInt Byte(8, 0);
4053 for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) {
4054 Byte.setBitVal(BitPosition: BitIdx, BitValue: BQWord[(Ctrl + BitIdx) & 0x3F]);
4055 }
4056 INT_TYPE_SWITCH(ElemT,
4057 { Dst.elem<T>(Idx) = T::from(Byte.getZExtValue()); });
4058 }
4059 }
4060
4061 Dst.initializeAllElements();
4062
4063 return true;
4064}
4065
4066static bool interp_builtin_ia32_gfni_affine(InterpState &S, CodePtr OpPC,
4067 const CallExpr *Call,
4068 bool Inverse) {
4069 assert(Call->getNumArgs() == 3);
4070 QualType XType = Call->getArg(Arg: 0)->getType();
4071 QualType AType = Call->getArg(Arg: 1)->getType();
4072 QualType ImmType = Call->getArg(Arg: 2)->getType();
4073 if (!XType->isVectorType() || !AType->isVectorType() ||
4074 !ImmType->isIntegerType()) {
4075 return false;
4076 }
4077
4078 Pointer X, A;
4079 APSInt Imm = popToAPSInt(S, E: Call->getArg(Arg: 2));
4080 A = S.Stk.pop<Pointer>();
4081 X = S.Stk.pop<Pointer>();
4082
4083 const Pointer &Dst = S.Stk.peek<Pointer>();
4084 const auto *AVecT = AType->castAs<VectorType>();
4085 assert(XType->castAs<VectorType>()->getNumElements() ==
4086 AVecT->getNumElements());
4087 unsigned NumBytesInQWord = 8;
4088 unsigned NumBytes = AVecT->getNumElements();
4089 unsigned NumBitsInQWord = 64;
4090 unsigned NumQWords = NumBytes / NumBytesInQWord;
4091 unsigned NumBitsInByte = 8;
4092 PrimType AElemT = *S.getContext().classify(T: AVecT->getElementType());
4093
4094 // computing A*X + Imm
4095 for (unsigned QWordIdx = 0; QWordIdx != NumQWords; ++QWordIdx) {
4096 // Extract the QWords from X, A
4097 APInt XQWord(NumBitsInQWord, 0);
4098 APInt AQWord(NumBitsInQWord, 0);
4099 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
4100 unsigned Idx = QWordIdx * NumBytesInQWord + ByteIdx;
4101 uint8_t XByte;
4102 uint8_t AByte;
4103 INT_TYPE_SWITCH(AElemT, {
4104 XByte = static_cast<uint8_t>(X.elem<T>(Idx));
4105 AByte = static_cast<uint8_t>(A.elem<T>(Idx));
4106 });
4107
4108 XQWord.insertBits(SubBits: APInt(NumBitsInByte, XByte), bitPosition: ByteIdx * NumBitsInByte);
4109 AQWord.insertBits(SubBits: APInt(NumBitsInByte, AByte), bitPosition: ByteIdx * NumBitsInByte);
4110 }
4111
4112 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
4113 unsigned Idx = QWordIdx * NumBytesInQWord + ByteIdx;
4114 uint8_t XByte =
4115 XQWord.lshr(shiftAmt: ByteIdx * NumBitsInByte).getLoBits(numBits: 8).getZExtValue();
4116 INT_TYPE_SWITCH(AElemT, {
4117 Dst.elem<T>(Idx) = T::from(GFNIAffine(XByte, AQWord, Imm, Inverse));
4118 });
4119 }
4120 }
4121 Dst.initializeAllElements();
4122 return true;
4123}
4124
4125static bool interp__builtin_ia32_gfni_mul(InterpState &S, CodePtr OpPC,
4126 const CallExpr *Call) {
4127 assert(Call->getNumArgs() == 2);
4128
4129 QualType AType = Call->getArg(Arg: 0)->getType();
4130 QualType BType = Call->getArg(Arg: 1)->getType();
4131 if (!AType->isVectorType() || !BType->isVectorType()) {
4132 return false;
4133 }
4134
4135 Pointer A, B;
4136 B = S.Stk.pop<Pointer>();
4137 A = S.Stk.pop<Pointer>();
4138
4139 const Pointer &Dst = S.Stk.peek<Pointer>();
4140 const auto *AVecT = AType->castAs<VectorType>();
4141 assert(AVecT->getNumElements() ==
4142 BType->castAs<VectorType>()->getNumElements());
4143
4144 PrimType AElemT = *S.getContext().classify(T: AVecT->getElementType());
4145 unsigned NumBytes = A.getNumElems();
4146
4147 for (unsigned ByteIdx = 0; ByteIdx != NumBytes; ++ByteIdx) {
4148 uint8_t AByte, BByte;
4149 INT_TYPE_SWITCH(AElemT, {
4150 AByte = static_cast<uint8_t>(A.elem<T>(ByteIdx));
4151 BByte = static_cast<uint8_t>(B.elem<T>(ByteIdx));
4152 Dst.elem<T>(ByteIdx) = T::from(GFNIMul(AByte, BByte));
4153 });
4154 }
4155
4156 Dst.initializeAllElements();
4157 return true;
4158}
4159
4160bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
4161 uint32_t BuiltinID) {
4162 if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(ID: BuiltinID))
4163 return Invalid(S, OpPC);
4164
4165 const InterpFrame *Frame = S.Current;
4166 switch (BuiltinID) {
4167 case Builtin::BI__builtin_is_constant_evaluated:
4168 return interp__builtin_is_constant_evaluated(S, OpPC, Frame, Call);
4169
4170 case Builtin::BI__builtin_assume:
4171 case Builtin::BI__assume:
4172 return interp__builtin_assume(S, OpPC, Frame, Call);
4173
4174 case Builtin::BI__builtin_strcmp:
4175 case Builtin::BIstrcmp:
4176 case Builtin::BI__builtin_strncmp:
4177 case Builtin::BIstrncmp:
4178 case Builtin::BI__builtin_wcsncmp:
4179 case Builtin::BIwcsncmp:
4180 case Builtin::BI__builtin_wcscmp:
4181 case Builtin::BIwcscmp:
4182 return interp__builtin_strcmp(S, OpPC, Frame, Call, ID: BuiltinID);
4183
4184 case Builtin::BI__builtin_strlen:
4185 case Builtin::BIstrlen:
4186 case Builtin::BI__builtin_wcslen:
4187 case Builtin::BIwcslen:
4188 return interp__builtin_strlen(S, OpPC, Frame, Call, ID: BuiltinID);
4189
4190 case Builtin::BI__builtin_nan:
4191 case Builtin::BI__builtin_nanf:
4192 case Builtin::BI__builtin_nanl:
4193 case Builtin::BI__builtin_nanf16:
4194 case Builtin::BI__builtin_nanf128:
4195 return interp__builtin_nan(S, OpPC, Frame, Call, /*Signaling=*/false);
4196
4197 case Builtin::BI__builtin_nans:
4198 case Builtin::BI__builtin_nansf:
4199 case Builtin::BI__builtin_nansl:
4200 case Builtin::BI__builtin_nansf16:
4201 case Builtin::BI__builtin_nansf128:
4202 return interp__builtin_nan(S, OpPC, Frame, Call, /*Signaling=*/true);
4203
4204 case Builtin::BI__builtin_huge_val:
4205 case Builtin::BI__builtin_huge_valf:
4206 case Builtin::BI__builtin_huge_vall:
4207 case Builtin::BI__builtin_huge_valf16:
4208 case Builtin::BI__builtin_huge_valf128:
4209 case Builtin::BI__builtin_inf:
4210 case Builtin::BI__builtin_inff:
4211 case Builtin::BI__builtin_infl:
4212 case Builtin::BI__builtin_inff16:
4213 case Builtin::BI__builtin_inff128:
4214 return interp__builtin_inf(S, OpPC, Frame, Call);
4215
4216 case Builtin::BI__builtin_copysign:
4217 case Builtin::BI__builtin_copysignf:
4218 case Builtin::BI__builtin_copysignl:
4219 case Builtin::BI__builtin_copysignf128:
4220 return interp__builtin_copysign(S, OpPC, Frame);
4221
4222 case Builtin::BI__builtin_fmin:
4223 case Builtin::BI__builtin_fminf:
4224 case Builtin::BI__builtin_fminl:
4225 case Builtin::BI__builtin_fminf16:
4226 case Builtin::BI__builtin_fminf128:
4227 return interp__builtin_fmin(S, OpPC, Frame, /*IsNumBuiltin=*/false);
4228
4229 case Builtin::BI__builtin_fminimum_num:
4230 case Builtin::BI__builtin_fminimum_numf:
4231 case Builtin::BI__builtin_fminimum_numl:
4232 case Builtin::BI__builtin_fminimum_numf16:
4233 case Builtin::BI__builtin_fminimum_numf128:
4234 return interp__builtin_fmin(S, OpPC, Frame, /*IsNumBuiltin=*/true);
4235
4236 case Builtin::BI__builtin_fmax:
4237 case Builtin::BI__builtin_fmaxf:
4238 case Builtin::BI__builtin_fmaxl:
4239 case Builtin::BI__builtin_fmaxf16:
4240 case Builtin::BI__builtin_fmaxf128:
4241 return interp__builtin_fmax(S, OpPC, Frame, /*IsNumBuiltin=*/false);
4242
4243 case Builtin::BI__builtin_fmaximum_num:
4244 case Builtin::BI__builtin_fmaximum_numf:
4245 case Builtin::BI__builtin_fmaximum_numl:
4246 case Builtin::BI__builtin_fmaximum_numf16:
4247 case Builtin::BI__builtin_fmaximum_numf128:
4248 return interp__builtin_fmax(S, OpPC, Frame, /*IsNumBuiltin=*/true);
4249
4250 case Builtin::BI__builtin_isnan:
4251 return interp__builtin_isnan(S, OpPC, Frame, Call);
4252
4253 case Builtin::BI__builtin_issignaling:
4254 return interp__builtin_issignaling(S, OpPC, Frame, Call);
4255
4256 case Builtin::BI__builtin_isinf:
4257 return interp__builtin_isinf(S, OpPC, Frame, /*Sign=*/CheckSign: false, Call);
4258
4259 case Builtin::BI__builtin_isinf_sign:
4260 return interp__builtin_isinf(S, OpPC, Frame, /*Sign=*/CheckSign: true, Call);
4261
4262 case Builtin::BI__builtin_isfinite:
4263 return interp__builtin_isfinite(S, OpPC, Frame, Call);
4264
4265 case Builtin::BI__builtin_isnormal:
4266 return interp__builtin_isnormal(S, OpPC, Frame, Call);
4267
4268 case Builtin::BI__builtin_issubnormal:
4269 return interp__builtin_issubnormal(S, OpPC, Frame, Call);
4270
4271 case Builtin::BI__builtin_iszero:
4272 return interp__builtin_iszero(S, OpPC, Frame, Call);
4273
4274 case Builtin::BI__builtin_signbit:
4275 case Builtin::BI__builtin_signbitf:
4276 case Builtin::BI__builtin_signbitl:
4277 return interp__builtin_signbit(S, OpPC, Frame, Call);
4278
4279 case Builtin::BI__builtin_isgreater:
4280 case Builtin::BI__builtin_isgreaterequal:
4281 case Builtin::BI__builtin_isless:
4282 case Builtin::BI__builtin_islessequal:
4283 case Builtin::BI__builtin_islessgreater:
4284 case Builtin::BI__builtin_isunordered:
4285 return interp_floating_comparison(S, OpPC, Call, ID: BuiltinID);
4286
4287 case Builtin::BI__builtin_isfpclass:
4288 return interp__builtin_isfpclass(S, OpPC, Frame, Call);
4289
4290 case Builtin::BI__builtin_fpclassify:
4291 return interp__builtin_fpclassify(S, OpPC, Frame, Call);
4292
4293 case Builtin::BI__builtin_fabs:
4294 case Builtin::BI__builtin_fabsf:
4295 case Builtin::BI__builtin_fabsl:
4296 case Builtin::BI__builtin_fabsf128:
4297 return interp__builtin_fabs(S, OpPC, Frame);
4298
4299 case Builtin::BI__builtin_abs:
4300 case Builtin::BI__builtin_labs:
4301 case Builtin::BI__builtin_llabs:
4302 return interp__builtin_abs(S, OpPC, Frame, Call);
4303
4304 case Builtin::BI__builtin_popcount:
4305 case Builtin::BI__builtin_popcountl:
4306 case Builtin::BI__builtin_popcountll:
4307 case Builtin::BI__builtin_popcountg:
4308 case Builtin::BI__popcnt16: // Microsoft variants of popcount
4309 case Builtin::BI__popcnt:
4310 case Builtin::BI__popcnt64:
4311 return interp__builtin_popcount(S, OpPC, Frame, Call);
4312
4313 case Builtin::BI__builtin_parity:
4314 case Builtin::BI__builtin_parityl:
4315 case Builtin::BI__builtin_parityll:
4316 return interp__builtin_elementwise_int_unaryop(
4317 S, OpPC, Call, Fn: [](const APSInt &Val) {
4318 return APInt(Val.getBitWidth(), Val.popcount() % 2);
4319 });
4320 case Builtin::BI__builtin_clrsb:
4321 case Builtin::BI__builtin_clrsbl:
4322 case Builtin::BI__builtin_clrsbll:
4323 return interp__builtin_elementwise_int_unaryop(
4324 S, OpPC, Call, Fn: [](const APSInt &Val) {
4325 return APInt(Val.getBitWidth(),
4326 Val.getBitWidth() - Val.getSignificantBits());
4327 });
4328 case Builtin::BI__builtin_bitreverseg:
4329 case Builtin::BI__builtin_bitreverse8:
4330 case Builtin::BI__builtin_bitreverse16:
4331 case Builtin::BI__builtin_bitreverse32:
4332 case Builtin::BI__builtin_bitreverse64:
4333 return interp__builtin_elementwise_int_unaryop(
4334 S, OpPC, Call, Fn: [](const APSInt &Val) { return Val.reverseBits(); });
4335
4336 case Builtin::BI__builtin_classify_type:
4337 return interp__builtin_classify_type(S, OpPC, Frame, Call);
4338
4339 case Builtin::BI__builtin_expect:
4340 case Builtin::BI__builtin_expect_with_probability:
4341 return interp__builtin_expect(S, OpPC, Frame, Call);
4342
4343 case Builtin::BI__builtin_rotateleft8:
4344 case Builtin::BI__builtin_rotateleft16:
4345 case Builtin::BI__builtin_rotateleft32:
4346 case Builtin::BI__builtin_rotateleft64:
4347 case Builtin::BI__builtin_stdc_rotate_left:
4348 case Builtin::BI_rotl8: // Microsoft variants of rotate left
4349 case Builtin::BI_rotl16:
4350 case Builtin::BI_rotl:
4351 case Builtin::BI_lrotl:
4352 case Builtin::BI_rotl64:
4353 case Builtin::BI__builtin_rotateright8:
4354 case Builtin::BI__builtin_rotateright16:
4355 case Builtin::BI__builtin_rotateright32:
4356 case Builtin::BI__builtin_rotateright64:
4357 case Builtin::BI__builtin_stdc_rotate_right:
4358 case Builtin::BI_rotr8: // Microsoft variants of rotate right
4359 case Builtin::BI_rotr16:
4360 case Builtin::BI_rotr:
4361 case Builtin::BI_lrotr:
4362 case Builtin::BI_rotr64: {
4363 // Determine if this is a rotate right operation
4364 bool IsRotateRight;
4365 switch (BuiltinID) {
4366 case Builtin::BI__builtin_rotateright8:
4367 case Builtin::BI__builtin_rotateright16:
4368 case Builtin::BI__builtin_rotateright32:
4369 case Builtin::BI__builtin_rotateright64:
4370 case Builtin::BI__builtin_stdc_rotate_right:
4371 case Builtin::BI_rotr8:
4372 case Builtin::BI_rotr16:
4373 case Builtin::BI_rotr:
4374 case Builtin::BI_lrotr:
4375 case Builtin::BI_rotr64:
4376 IsRotateRight = true;
4377 break;
4378 default:
4379 IsRotateRight = false;
4380 break;
4381 }
4382
4383 return interp__builtin_elementwise_int_binop(
4384 S, OpPC, Call, Fn: [IsRotateRight](const APSInt &Value, APSInt Amount) {
4385 Amount = NormalizeRotateAmount(Value, Amount);
4386 return IsRotateRight ? Value.rotr(rotateAmt: Amount.getZExtValue())
4387 : Value.rotl(rotateAmt: Amount.getZExtValue());
4388 });
4389 }
4390
4391 case Builtin::BI__builtin_ffs:
4392 case Builtin::BI__builtin_ffsl:
4393 case Builtin::BI__builtin_ffsll:
4394 return interp__builtin_elementwise_int_unaryop(
4395 S, OpPC, Call, Fn: [](const APSInt &Val) {
4396 return APInt(Val.getBitWidth(),
4397 Val.isZero() ? 0u : Val.countTrailingZeros() + 1u);
4398 });
4399
4400 case Builtin::BIaddressof:
4401 case Builtin::BI__addressof:
4402 case Builtin::BI__builtin_addressof:
4403 assert(isNoopBuiltin(BuiltinID));
4404 return interp__builtin_addressof(S, OpPC, Frame, Call);
4405
4406 case Builtin::BIas_const:
4407 case Builtin::BIforward:
4408 case Builtin::BIforward_like:
4409 case Builtin::BImove:
4410 case Builtin::BImove_if_noexcept:
4411 assert(isNoopBuiltin(BuiltinID));
4412 return interp__builtin_move(S, OpPC, Frame, Call);
4413
4414 case Builtin::BI__builtin_eh_return_data_regno:
4415 return interp__builtin_eh_return_data_regno(S, OpPC, Frame, Call);
4416
4417 case Builtin::BI__builtin_launder:
4418 assert(isNoopBuiltin(BuiltinID));
4419 return true;
4420
4421 case Builtin::BI__builtin_add_overflow:
4422 case Builtin::BI__builtin_sub_overflow:
4423 case Builtin::BI__builtin_mul_overflow:
4424 case Builtin::BI__builtin_sadd_overflow:
4425 case Builtin::BI__builtin_uadd_overflow:
4426 case Builtin::BI__builtin_uaddl_overflow:
4427 case Builtin::BI__builtin_uaddll_overflow:
4428 case Builtin::BI__builtin_usub_overflow:
4429 case Builtin::BI__builtin_usubl_overflow:
4430 case Builtin::BI__builtin_usubll_overflow:
4431 case Builtin::BI__builtin_umul_overflow:
4432 case Builtin::BI__builtin_umull_overflow:
4433 case Builtin::BI__builtin_umulll_overflow:
4434 case Builtin::BI__builtin_saddl_overflow:
4435 case Builtin::BI__builtin_saddll_overflow:
4436 case Builtin::BI__builtin_ssub_overflow:
4437 case Builtin::BI__builtin_ssubl_overflow:
4438 case Builtin::BI__builtin_ssubll_overflow:
4439 case Builtin::BI__builtin_smul_overflow:
4440 case Builtin::BI__builtin_smull_overflow:
4441 case Builtin::BI__builtin_smulll_overflow:
4442 return interp__builtin_overflowop(S, OpPC, Call, BuiltinOp: BuiltinID);
4443
4444 case Builtin::BI__builtin_addcb:
4445 case Builtin::BI__builtin_addcs:
4446 case Builtin::BI__builtin_addc:
4447 case Builtin::BI__builtin_addcl:
4448 case Builtin::BI__builtin_addcll:
4449 case Builtin::BI__builtin_subcb:
4450 case Builtin::BI__builtin_subcs:
4451 case Builtin::BI__builtin_subc:
4452 case Builtin::BI__builtin_subcl:
4453 case Builtin::BI__builtin_subcll:
4454 return interp__builtin_carryop(S, OpPC, Frame, Call, BuiltinOp: BuiltinID);
4455
4456 case Builtin::BI__builtin_clz:
4457 case Builtin::BI__builtin_clzl:
4458 case Builtin::BI__builtin_clzll:
4459 case Builtin::BI__builtin_clzs:
4460 case Builtin::BI__builtin_clzg:
4461 case Builtin::BI__lzcnt16: // Microsoft variants of count leading-zeroes
4462 case Builtin::BI__lzcnt:
4463 case Builtin::BI__lzcnt64:
4464 return interp__builtin_clz(S, OpPC, Frame, Call, BuiltinOp: BuiltinID);
4465
4466 case Builtin::BI__builtin_ctz:
4467 case Builtin::BI__builtin_ctzl:
4468 case Builtin::BI__builtin_ctzll:
4469 case Builtin::BI__builtin_ctzs:
4470 case Builtin::BI__builtin_ctzg:
4471 return interp__builtin_ctz(S, OpPC, Frame, Call, BuiltinID);
4472
4473 case Builtin::BI__builtin_elementwise_clzg:
4474 case Builtin::BI__builtin_elementwise_ctzg:
4475 return interp__builtin_elementwise_countzeroes(S, OpPC, Frame, Call,
4476 BuiltinID);
4477 case Builtin::BI__builtin_bswapg:
4478 case Builtin::BI__builtin_bswap16:
4479 case Builtin::BI__builtin_bswap32:
4480 case Builtin::BI__builtin_bswap64:
4481 return interp__builtin_bswap(S, OpPC, Frame, Call);
4482
4483 case Builtin::BI__atomic_always_lock_free:
4484 case Builtin::BI__atomic_is_lock_free:
4485 return interp__builtin_atomic_lock_free(S, OpPC, Frame, Call, BuiltinOp: BuiltinID);
4486
4487 case Builtin::BI__c11_atomic_is_lock_free:
4488 return interp__builtin_c11_atomic_is_lock_free(S, OpPC, Frame, Call);
4489
4490 case Builtin::BI__builtin_complex:
4491 return interp__builtin_complex(S, OpPC, Frame, Call);
4492
4493 case Builtin::BI__builtin_is_aligned:
4494 case Builtin::BI__builtin_align_up:
4495 case Builtin::BI__builtin_align_down:
4496 return interp__builtin_is_aligned_up_down(S, OpPC, Frame, Call, BuiltinOp: BuiltinID);
4497
4498 case Builtin::BI__builtin_assume_aligned:
4499 return interp__builtin_assume_aligned(S, OpPC, Frame, Call);
4500
4501 case clang::X86::BI__builtin_ia32_crc32qi:
4502 return interp__builtin_ia32_crc32(S, OpPC, Frame, Call, DataBytes: 1);
4503 case clang::X86::BI__builtin_ia32_crc32hi:
4504 return interp__builtin_ia32_crc32(S, OpPC, Frame, Call, DataBytes: 2);
4505 case clang::X86::BI__builtin_ia32_crc32si:
4506 return interp__builtin_ia32_crc32(S, OpPC, Frame, Call, DataBytes: 4);
4507 case clang::X86::BI__builtin_ia32_crc32di:
4508 return interp__builtin_ia32_crc32(S, OpPC, Frame, Call, DataBytes: 8);
4509
4510 case clang::X86::BI__builtin_ia32_bextr_u32:
4511 case clang::X86::BI__builtin_ia32_bextr_u64:
4512 case clang::X86::BI__builtin_ia32_bextri_u32:
4513 case clang::X86::BI__builtin_ia32_bextri_u64:
4514 return interp__builtin_elementwise_int_binop(
4515 S, OpPC, Call, Fn: [](const APSInt &Val, const APSInt &Idx) {
4516 unsigned BitWidth = Val.getBitWidth();
4517 uint64_t Shift = Idx.extractBitsAsZExtValue(numBits: 8, bitPosition: 0);
4518 uint64_t Length = Idx.extractBitsAsZExtValue(numBits: 8, bitPosition: 8);
4519 if (Length > BitWidth) {
4520 Length = BitWidth;
4521 }
4522
4523 // Handle out of bounds cases.
4524 if (Length == 0 || Shift >= BitWidth)
4525 return APInt(BitWidth, 0);
4526
4527 uint64_t Result = Val.getZExtValue() >> Shift;
4528 Result &= llvm::maskTrailingOnes<uint64_t>(N: Length);
4529 return APInt(BitWidth, Result);
4530 });
4531
4532 case clang::X86::BI__builtin_ia32_bzhi_si:
4533 case clang::X86::BI__builtin_ia32_bzhi_di:
4534 return interp__builtin_elementwise_int_binop(
4535 S, OpPC, Call, Fn: [](const APSInt &Val, const APSInt &Idx) {
4536 unsigned BitWidth = Val.getBitWidth();
4537 uint64_t Index = Idx.extractBitsAsZExtValue(numBits: 8, bitPosition: 0);
4538 APSInt Result = Val;
4539
4540 if (Index < BitWidth)
4541 Result.clearHighBits(hiBits: BitWidth - Index);
4542
4543 return Result;
4544 });
4545
4546 case clang::X86::BI__builtin_ia32_ktestcqi:
4547 case clang::X86::BI__builtin_ia32_ktestchi:
4548 case clang::X86::BI__builtin_ia32_ktestcsi:
4549 case clang::X86::BI__builtin_ia32_ktestcdi:
4550 return interp__builtin_elementwise_int_binop(
4551 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
4552 return APInt(sizeof(unsigned char) * 8, (~A & B) == 0);
4553 });
4554
4555 case clang::X86::BI__builtin_ia32_ktestzqi:
4556 case clang::X86::BI__builtin_ia32_ktestzhi:
4557 case clang::X86::BI__builtin_ia32_ktestzsi:
4558 case clang::X86::BI__builtin_ia32_ktestzdi:
4559 return interp__builtin_elementwise_int_binop(
4560 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
4561 return APInt(sizeof(unsigned char) * 8, (A & B) == 0);
4562 });
4563
4564 case clang::X86::BI__builtin_ia32_kortestcqi:
4565 case clang::X86::BI__builtin_ia32_kortestchi:
4566 case clang::X86::BI__builtin_ia32_kortestcsi:
4567 case clang::X86::BI__builtin_ia32_kortestcdi:
4568 return interp__builtin_elementwise_int_binop(
4569 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
4570 return APInt(sizeof(unsigned char) * 8, ~(A | B) == 0);
4571 });
4572
4573 case clang::X86::BI__builtin_ia32_kortestzqi:
4574 case clang::X86::BI__builtin_ia32_kortestzhi:
4575 case clang::X86::BI__builtin_ia32_kortestzsi:
4576 case clang::X86::BI__builtin_ia32_kortestzdi:
4577 return interp__builtin_elementwise_int_binop(
4578 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
4579 return APInt(sizeof(unsigned char) * 8, (A | B) == 0);
4580 });
4581
4582 case clang::X86::BI__builtin_ia32_kshiftliqi:
4583 case clang::X86::BI__builtin_ia32_kshiftlihi:
4584 case clang::X86::BI__builtin_ia32_kshiftlisi:
4585 case clang::X86::BI__builtin_ia32_kshiftlidi:
4586 return interp__builtin_elementwise_int_binop(
4587 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4588 unsigned Amt = RHS.getZExtValue() & 0xFF;
4589 if (Amt >= LHS.getBitWidth())
4590 return APInt::getZero(numBits: LHS.getBitWidth());
4591 return LHS.shl(shiftAmt: Amt);
4592 });
4593
4594 case clang::X86::BI__builtin_ia32_kshiftriqi:
4595 case clang::X86::BI__builtin_ia32_kshiftrihi:
4596 case clang::X86::BI__builtin_ia32_kshiftrisi:
4597 case clang::X86::BI__builtin_ia32_kshiftridi:
4598 return interp__builtin_elementwise_int_binop(
4599 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4600 unsigned Amt = RHS.getZExtValue() & 0xFF;
4601 if (Amt >= LHS.getBitWidth())
4602 return APInt::getZero(numBits: LHS.getBitWidth());
4603 return LHS.lshr(shiftAmt: Amt);
4604 });
4605
4606 case clang::X86::BI__builtin_ia32_lzcnt_u16:
4607 case clang::X86::BI__builtin_ia32_lzcnt_u32:
4608 case clang::X86::BI__builtin_ia32_lzcnt_u64:
4609 return interp__builtin_elementwise_int_unaryop(
4610 S, OpPC, Call, Fn: [](const APSInt &Src) {
4611 return APInt(Src.getBitWidth(), Src.countLeadingZeros());
4612 });
4613
4614 case clang::X86::BI__builtin_ia32_tzcnt_u16:
4615 case clang::X86::BI__builtin_ia32_tzcnt_u32:
4616 case clang::X86::BI__builtin_ia32_tzcnt_u64:
4617 return interp__builtin_elementwise_int_unaryop(
4618 S, OpPC, Call, Fn: [](const APSInt &Src) {
4619 return APInt(Src.getBitWidth(), Src.countTrailingZeros());
4620 });
4621
4622 case clang::X86::BI__builtin_ia32_pdep_si:
4623 case clang::X86::BI__builtin_ia32_pdep_di:
4624 return interp__builtin_elementwise_int_binop(
4625 S, OpPC, Call, Fn: [](const APSInt &Val, const APSInt &Mask) {
4626 unsigned BitWidth = Val.getBitWidth();
4627 APInt Result = APInt::getZero(numBits: BitWidth);
4628
4629 for (unsigned I = 0, P = 0; I != BitWidth; ++I) {
4630 if (Mask[I])
4631 Result.setBitVal(BitPosition: I, BitValue: Val[P++]);
4632 }
4633
4634 return Result;
4635 });
4636
4637 case clang::X86::BI__builtin_ia32_pext_si:
4638 case clang::X86::BI__builtin_ia32_pext_di:
4639 return interp__builtin_elementwise_int_binop(
4640 S, OpPC, Call, Fn: [](const APSInt &Val, const APSInt &Mask) {
4641 unsigned BitWidth = Val.getBitWidth();
4642 APInt Result = APInt::getZero(numBits: BitWidth);
4643
4644 for (unsigned I = 0, P = 0; I != BitWidth; ++I) {
4645 if (Mask[I])
4646 Result.setBitVal(BitPosition: P++, BitValue: Val[I]);
4647 }
4648
4649 return Result;
4650 });
4651
4652 case clang::X86::BI__builtin_ia32_addcarryx_u32:
4653 case clang::X86::BI__builtin_ia32_addcarryx_u64:
4654 case clang::X86::BI__builtin_ia32_subborrow_u32:
4655 case clang::X86::BI__builtin_ia32_subborrow_u64:
4656 return interp__builtin_ia32_addcarry_subborrow(S, OpPC, Frame, Call,
4657 BuiltinOp: BuiltinID);
4658
4659 case Builtin::BI__builtin_os_log_format_buffer_size:
4660 return interp__builtin_os_log_format_buffer_size(S, OpPC, Frame, Call);
4661
4662 case Builtin::BI__builtin_ptrauth_string_discriminator:
4663 return interp__builtin_ptrauth_string_discriminator(S, OpPC, Frame, Call);
4664
4665 case Builtin::BI__builtin_infer_alloc_token:
4666 return interp__builtin_infer_alloc_token(S, OpPC, Frame, Call);
4667
4668 case Builtin::BI__noop:
4669 pushInteger(S, Val: 0, QT: Call->getType());
4670 return true;
4671
4672 case Builtin::BI__builtin_operator_new:
4673 return interp__builtin_operator_new(S, OpPC, Frame, Call);
4674
4675 case Builtin::BI__builtin_operator_delete:
4676 return interp__builtin_operator_delete(S, OpPC, Frame, Call);
4677
4678 case Builtin::BI__arithmetic_fence:
4679 return interp__builtin_arithmetic_fence(S, OpPC, Frame, Call);
4680
4681 case Builtin::BI__builtin_reduce_add:
4682 case Builtin::BI__builtin_reduce_mul:
4683 case Builtin::BI__builtin_reduce_and:
4684 case Builtin::BI__builtin_reduce_or:
4685 case Builtin::BI__builtin_reduce_xor:
4686 case Builtin::BI__builtin_reduce_min:
4687 case Builtin::BI__builtin_reduce_max:
4688 return interp__builtin_vector_reduce(S, OpPC, Call, ID: BuiltinID);
4689
4690 case Builtin::BI__builtin_elementwise_popcount:
4691 return interp__builtin_elementwise_int_unaryop(
4692 S, OpPC, Call, Fn: [](const APSInt &Src) {
4693 return APInt(Src.getBitWidth(), Src.popcount());
4694 });
4695 case Builtin::BI__builtin_elementwise_bitreverse:
4696 return interp__builtin_elementwise_int_unaryop(
4697 S, OpPC, Call, Fn: [](const APSInt &Src) { return Src.reverseBits(); });
4698
4699 case Builtin::BI__builtin_elementwise_abs:
4700 return interp__builtin_elementwise_abs(S, OpPC, Frame, Call, BuiltinID);
4701
4702 case Builtin::BI__builtin_memcpy:
4703 case Builtin::BImemcpy:
4704 case Builtin::BI__builtin_wmemcpy:
4705 case Builtin::BIwmemcpy:
4706 case Builtin::BI__builtin_memmove:
4707 case Builtin::BImemmove:
4708 case Builtin::BI__builtin_wmemmove:
4709 case Builtin::BIwmemmove:
4710 return interp__builtin_memcpy(S, OpPC, Frame, Call, ID: BuiltinID);
4711
4712 case Builtin::BI__builtin_memcmp:
4713 case Builtin::BImemcmp:
4714 case Builtin::BI__builtin_bcmp:
4715 case Builtin::BIbcmp:
4716 case Builtin::BI__builtin_wmemcmp:
4717 case Builtin::BIwmemcmp:
4718 return interp__builtin_memcmp(S, OpPC, Frame, Call, ID: BuiltinID);
4719
4720 case Builtin::BImemchr:
4721 case Builtin::BI__builtin_memchr:
4722 case Builtin::BIstrchr:
4723 case Builtin::BI__builtin_strchr:
4724 case Builtin::BIwmemchr:
4725 case Builtin::BI__builtin_wmemchr:
4726 case Builtin::BIwcschr:
4727 case Builtin::BI__builtin_wcschr:
4728 case Builtin::BI__builtin_char_memchr:
4729 return interp__builtin_memchr(S, OpPC, Call, ID: BuiltinID);
4730
4731 case Builtin::BI__builtin_object_size:
4732 case Builtin::BI__builtin_dynamic_object_size:
4733 return interp__builtin_object_size(S, OpPC, Frame, Call);
4734
4735 case Builtin::BI__builtin_is_within_lifetime:
4736 return interp__builtin_is_within_lifetime(S, OpPC, Call);
4737
4738 case Builtin::BI__builtin_elementwise_add_sat:
4739 return interp__builtin_elementwise_int_binop(
4740 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4741 return LHS.isSigned() ? LHS.sadd_sat(RHS) : LHS.uadd_sat(RHS);
4742 });
4743
4744 case Builtin::BI__builtin_elementwise_sub_sat:
4745 return interp__builtin_elementwise_int_binop(
4746 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4747 return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
4748 });
4749 case X86::BI__builtin_ia32_extract128i256:
4750 case X86::BI__builtin_ia32_vextractf128_pd256:
4751 case X86::BI__builtin_ia32_vextractf128_ps256:
4752 case X86::BI__builtin_ia32_vextractf128_si256:
4753 return interp__builtin_x86_extract_vector(S, OpPC, Call, ID: BuiltinID);
4754
4755 case X86::BI__builtin_ia32_extractf32x4_256_mask:
4756 case X86::BI__builtin_ia32_extractf32x4_mask:
4757 case X86::BI__builtin_ia32_extractf32x8_mask:
4758 case X86::BI__builtin_ia32_extractf64x2_256_mask:
4759 case X86::BI__builtin_ia32_extractf64x2_512_mask:
4760 case X86::BI__builtin_ia32_extractf64x4_mask:
4761 case X86::BI__builtin_ia32_extracti32x4_256_mask:
4762 case X86::BI__builtin_ia32_extracti32x4_mask:
4763 case X86::BI__builtin_ia32_extracti32x8_mask:
4764 case X86::BI__builtin_ia32_extracti64x2_256_mask:
4765 case X86::BI__builtin_ia32_extracti64x2_512_mask:
4766 case X86::BI__builtin_ia32_extracti64x4_mask:
4767 return interp__builtin_x86_extract_vector_masked(S, OpPC, Call, ID: BuiltinID);
4768
4769 case clang::X86::BI__builtin_ia32_pmulhrsw128:
4770 case clang::X86::BI__builtin_ia32_pmulhrsw256:
4771 case clang::X86::BI__builtin_ia32_pmulhrsw512:
4772 return interp__builtin_elementwise_int_binop(
4773 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4774 return (llvm::APIntOps::mulsExtended(C1: LHS, C2: RHS).ashr(ShiftAmt: 14) + 1)
4775 .extractBits(numBits: 16, bitPosition: 1);
4776 });
4777
4778 case clang::X86::BI__builtin_ia32_movmskps:
4779 case clang::X86::BI__builtin_ia32_movmskpd:
4780 case clang::X86::BI__builtin_ia32_pmovmskb128:
4781 case clang::X86::BI__builtin_ia32_pmovmskb256:
4782 case clang::X86::BI__builtin_ia32_movmskps256:
4783 case clang::X86::BI__builtin_ia32_movmskpd256: {
4784 return interp__builtin_ia32_movmsk_op(S, OpPC, Call);
4785 }
4786
4787 case X86::BI__builtin_ia32_psignb128:
4788 case X86::BI__builtin_ia32_psignb256:
4789 case X86::BI__builtin_ia32_psignw128:
4790 case X86::BI__builtin_ia32_psignw256:
4791 case X86::BI__builtin_ia32_psignd128:
4792 case X86::BI__builtin_ia32_psignd256:
4793 return interp__builtin_elementwise_int_binop(
4794 S, OpPC, Call, Fn: [](const APInt &AElem, const APInt &BElem) {
4795 if (BElem.isZero())
4796 return APInt::getZero(numBits: AElem.getBitWidth());
4797 if (BElem.isNegative())
4798 return -AElem;
4799 return AElem;
4800 });
4801
4802 case clang::X86::BI__builtin_ia32_pavgb128:
4803 case clang::X86::BI__builtin_ia32_pavgw128:
4804 case clang::X86::BI__builtin_ia32_pavgb256:
4805 case clang::X86::BI__builtin_ia32_pavgw256:
4806 case clang::X86::BI__builtin_ia32_pavgb512:
4807 case clang::X86::BI__builtin_ia32_pavgw512:
4808 return interp__builtin_elementwise_int_binop(S, OpPC, Call,
4809 Fn: llvm::APIntOps::avgCeilU);
4810
4811 case clang::X86::BI__builtin_ia32_pmaddubsw128:
4812 case clang::X86::BI__builtin_ia32_pmaddubsw256:
4813 case clang::X86::BI__builtin_ia32_pmaddubsw512:
4814 return interp__builtin_ia32_pmul(
4815 S, OpPC, Call,
4816 Fn: [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
4817 const APSInt &HiRHS) {
4818 unsigned BitWidth = 2 * LoLHS.getBitWidth();
4819 return (LoLHS.zext(width: BitWidth) * LoRHS.sext(width: BitWidth))
4820 .sadd_sat(RHS: (HiLHS.zext(width: BitWidth) * HiRHS.sext(width: BitWidth)));
4821 });
4822
4823 case clang::X86::BI__builtin_ia32_pmaddwd128:
4824 case clang::X86::BI__builtin_ia32_pmaddwd256:
4825 case clang::X86::BI__builtin_ia32_pmaddwd512:
4826 return interp__builtin_ia32_pmul(
4827 S, OpPC, Call,
4828 Fn: [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
4829 const APSInt &HiRHS) {
4830 unsigned BitWidth = 2 * LoLHS.getBitWidth();
4831 return (LoLHS.sext(width: BitWidth) * LoRHS.sext(width: BitWidth)) +
4832 (HiLHS.sext(width: BitWidth) * HiRHS.sext(width: BitWidth));
4833 });
4834
4835 case clang::X86::BI__builtin_ia32_pmulhuw128:
4836 case clang::X86::BI__builtin_ia32_pmulhuw256:
4837 case clang::X86::BI__builtin_ia32_pmulhuw512:
4838 return interp__builtin_elementwise_int_binop(S, OpPC, Call,
4839 Fn: llvm::APIntOps::mulhu);
4840
4841 case clang::X86::BI__builtin_ia32_pmulhw128:
4842 case clang::X86::BI__builtin_ia32_pmulhw256:
4843 case clang::X86::BI__builtin_ia32_pmulhw512:
4844 return interp__builtin_elementwise_int_binop(S, OpPC, Call,
4845 Fn: llvm::APIntOps::mulhs);
4846
4847 case clang::X86::BI__builtin_ia32_psllv2di:
4848 case clang::X86::BI__builtin_ia32_psllv4di:
4849 case clang::X86::BI__builtin_ia32_psllv4si:
4850 case clang::X86::BI__builtin_ia32_psllv8di:
4851 case clang::X86::BI__builtin_ia32_psllv8hi:
4852 case clang::X86::BI__builtin_ia32_psllv8si:
4853 case clang::X86::BI__builtin_ia32_psllv16hi:
4854 case clang::X86::BI__builtin_ia32_psllv16si:
4855 case clang::X86::BI__builtin_ia32_psllv32hi:
4856 case clang::X86::BI__builtin_ia32_psllwi128:
4857 case clang::X86::BI__builtin_ia32_psllwi256:
4858 case clang::X86::BI__builtin_ia32_psllwi512:
4859 case clang::X86::BI__builtin_ia32_pslldi128:
4860 case clang::X86::BI__builtin_ia32_pslldi256:
4861 case clang::X86::BI__builtin_ia32_pslldi512:
4862 case clang::X86::BI__builtin_ia32_psllqi128:
4863 case clang::X86::BI__builtin_ia32_psllqi256:
4864 case clang::X86::BI__builtin_ia32_psllqi512:
4865 return interp__builtin_elementwise_int_binop(
4866 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4867 if (RHS.uge(RHS: LHS.getBitWidth())) {
4868 return APInt::getZero(numBits: LHS.getBitWidth());
4869 }
4870 return LHS.shl(shiftAmt: RHS.getZExtValue());
4871 });
4872
4873 case clang::X86::BI__builtin_ia32_psrav4si:
4874 case clang::X86::BI__builtin_ia32_psrav8di:
4875 case clang::X86::BI__builtin_ia32_psrav8hi:
4876 case clang::X86::BI__builtin_ia32_psrav8si:
4877 case clang::X86::BI__builtin_ia32_psrav16hi:
4878 case clang::X86::BI__builtin_ia32_psrav16si:
4879 case clang::X86::BI__builtin_ia32_psrav32hi:
4880 case clang::X86::BI__builtin_ia32_psravq128:
4881 case clang::X86::BI__builtin_ia32_psravq256:
4882 case clang::X86::BI__builtin_ia32_psrawi128:
4883 case clang::X86::BI__builtin_ia32_psrawi256:
4884 case clang::X86::BI__builtin_ia32_psrawi512:
4885 case clang::X86::BI__builtin_ia32_psradi128:
4886 case clang::X86::BI__builtin_ia32_psradi256:
4887 case clang::X86::BI__builtin_ia32_psradi512:
4888 case clang::X86::BI__builtin_ia32_psraqi128:
4889 case clang::X86::BI__builtin_ia32_psraqi256:
4890 case clang::X86::BI__builtin_ia32_psraqi512:
4891 return interp__builtin_elementwise_int_binop(
4892 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4893 if (RHS.uge(RHS: LHS.getBitWidth())) {
4894 return LHS.ashr(ShiftAmt: LHS.getBitWidth() - 1);
4895 }
4896 return LHS.ashr(ShiftAmt: RHS.getZExtValue());
4897 });
4898
4899 case clang::X86::BI__builtin_ia32_psrlv2di:
4900 case clang::X86::BI__builtin_ia32_psrlv4di:
4901 case clang::X86::BI__builtin_ia32_psrlv4si:
4902 case clang::X86::BI__builtin_ia32_psrlv8di:
4903 case clang::X86::BI__builtin_ia32_psrlv8hi:
4904 case clang::X86::BI__builtin_ia32_psrlv8si:
4905 case clang::X86::BI__builtin_ia32_psrlv16hi:
4906 case clang::X86::BI__builtin_ia32_psrlv16si:
4907 case clang::X86::BI__builtin_ia32_psrlv32hi:
4908 case clang::X86::BI__builtin_ia32_psrlwi128:
4909 case clang::X86::BI__builtin_ia32_psrlwi256:
4910 case clang::X86::BI__builtin_ia32_psrlwi512:
4911 case clang::X86::BI__builtin_ia32_psrldi128:
4912 case clang::X86::BI__builtin_ia32_psrldi256:
4913 case clang::X86::BI__builtin_ia32_psrldi512:
4914 case clang::X86::BI__builtin_ia32_psrlqi128:
4915 case clang::X86::BI__builtin_ia32_psrlqi256:
4916 case clang::X86::BI__builtin_ia32_psrlqi512:
4917 return interp__builtin_elementwise_int_binop(
4918 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4919 if (RHS.uge(RHS: LHS.getBitWidth())) {
4920 return APInt::getZero(numBits: LHS.getBitWidth());
4921 }
4922 return LHS.lshr(shiftAmt: RHS.getZExtValue());
4923 });
4924 case clang::X86::BI__builtin_ia32_packsswb128:
4925 case clang::X86::BI__builtin_ia32_packsswb256:
4926 case clang::X86::BI__builtin_ia32_packsswb512:
4927 case clang::X86::BI__builtin_ia32_packssdw128:
4928 case clang::X86::BI__builtin_ia32_packssdw256:
4929 case clang::X86::BI__builtin_ia32_packssdw512:
4930 return interp__builtin_x86_pack(S, OpPC, E: Call, PackFn: [](const APSInt &Src) {
4931 return APInt(Src).truncSSat(width: Src.getBitWidth() / 2);
4932 });
4933 case clang::X86::BI__builtin_ia32_packusdw128:
4934 case clang::X86::BI__builtin_ia32_packusdw256:
4935 case clang::X86::BI__builtin_ia32_packusdw512:
4936 case clang::X86::BI__builtin_ia32_packuswb128:
4937 case clang::X86::BI__builtin_ia32_packuswb256:
4938 case clang::X86::BI__builtin_ia32_packuswb512:
4939 return interp__builtin_x86_pack(S, OpPC, E: Call, PackFn: [](const APSInt &Src) {
4940 return APInt(Src).truncSSatU(width: Src.getBitWidth() / 2);
4941 });
4942
4943 case clang::X86::BI__builtin_ia32_selectss_128:
4944 case clang::X86::BI__builtin_ia32_selectsd_128:
4945 case clang::X86::BI__builtin_ia32_selectsh_128:
4946 case clang::X86::BI__builtin_ia32_selectsbf_128:
4947 return interp__builtin_select_scalar(S, Call);
4948 case clang::X86::BI__builtin_ia32_vprotbi:
4949 case clang::X86::BI__builtin_ia32_vprotdi:
4950 case clang::X86::BI__builtin_ia32_vprotqi:
4951 case clang::X86::BI__builtin_ia32_vprotwi:
4952 case clang::X86::BI__builtin_ia32_prold128:
4953 case clang::X86::BI__builtin_ia32_prold256:
4954 case clang::X86::BI__builtin_ia32_prold512:
4955 case clang::X86::BI__builtin_ia32_prolq128:
4956 case clang::X86::BI__builtin_ia32_prolq256:
4957 case clang::X86::BI__builtin_ia32_prolq512:
4958 return interp__builtin_elementwise_int_binop(
4959 S, OpPC, Call,
4960 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS.rotl(rotateAmt: RHS); });
4961
4962 case clang::X86::BI__builtin_ia32_prord128:
4963 case clang::X86::BI__builtin_ia32_prord256:
4964 case clang::X86::BI__builtin_ia32_prord512:
4965 case clang::X86::BI__builtin_ia32_prorq128:
4966 case clang::X86::BI__builtin_ia32_prorq256:
4967 case clang::X86::BI__builtin_ia32_prorq512:
4968 return interp__builtin_elementwise_int_binop(
4969 S, OpPC, Call,
4970 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS.rotr(rotateAmt: RHS); });
4971
4972 case Builtin::BI__builtin_elementwise_max:
4973 case Builtin::BI__builtin_elementwise_min:
4974 return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID);
4975
4976 case clang::X86::BI__builtin_ia32_phaddw128:
4977 case clang::X86::BI__builtin_ia32_phaddw256:
4978 case clang::X86::BI__builtin_ia32_phaddd128:
4979 case clang::X86::BI__builtin_ia32_phaddd256:
4980 return interp_builtin_horizontal_int_binop(
4981 S, OpPC, Call,
4982 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
4983 case clang::X86::BI__builtin_ia32_phaddsw128:
4984 case clang::X86::BI__builtin_ia32_phaddsw256:
4985 return interp_builtin_horizontal_int_binop(
4986 S, OpPC, Call,
4987 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS.sadd_sat(RHS); });
4988 case clang::X86::BI__builtin_ia32_phsubw128:
4989 case clang::X86::BI__builtin_ia32_phsubw256:
4990 case clang::X86::BI__builtin_ia32_phsubd128:
4991 case clang::X86::BI__builtin_ia32_phsubd256:
4992 return interp_builtin_horizontal_int_binop(
4993 S, OpPC, Call,
4994 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS - RHS; });
4995 case clang::X86::BI__builtin_ia32_phsubsw128:
4996 case clang::X86::BI__builtin_ia32_phsubsw256:
4997 return interp_builtin_horizontal_int_binop(
4998 S, OpPC, Call,
4999 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS.ssub_sat(RHS); });
5000 case clang::X86::BI__builtin_ia32_haddpd:
5001 case clang::X86::BI__builtin_ia32_haddps:
5002 case clang::X86::BI__builtin_ia32_haddpd256:
5003 case clang::X86::BI__builtin_ia32_haddps256:
5004 return interp_builtin_horizontal_fp_binop(
5005 S, OpPC, Call,
5006 Fn: [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) {
5007 APFloat F = LHS;
5008 F.add(RHS, RM);
5009 return F;
5010 });
5011 case clang::X86::BI__builtin_ia32_hsubpd:
5012 case clang::X86::BI__builtin_ia32_hsubps:
5013 case clang::X86::BI__builtin_ia32_hsubpd256:
5014 case clang::X86::BI__builtin_ia32_hsubps256:
5015 return interp_builtin_horizontal_fp_binop(
5016 S, OpPC, Call,
5017 Fn: [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) {
5018 APFloat F = LHS;
5019 F.subtract(RHS, RM);
5020 return F;
5021 });
5022 case clang::X86::BI__builtin_ia32_addsubpd:
5023 case clang::X86::BI__builtin_ia32_addsubps:
5024 case clang::X86::BI__builtin_ia32_addsubpd256:
5025 case clang::X86::BI__builtin_ia32_addsubps256:
5026 return interp__builtin_ia32_addsub(S, OpPC, Call);
5027
5028 case clang::X86::BI__builtin_ia32_pmuldq128:
5029 case clang::X86::BI__builtin_ia32_pmuldq256:
5030 case clang::X86::BI__builtin_ia32_pmuldq512:
5031 return interp__builtin_ia32_pmul(
5032 S, OpPC, Call,
5033 Fn: [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
5034 const APSInt &HiRHS) {
5035 return llvm::APIntOps::mulsExtended(C1: LoLHS, C2: LoRHS);
5036 });
5037
5038 case clang::X86::BI__builtin_ia32_pmuludq128:
5039 case clang::X86::BI__builtin_ia32_pmuludq256:
5040 case clang::X86::BI__builtin_ia32_pmuludq512:
5041 return interp__builtin_ia32_pmul(
5042 S, OpPC, Call,
5043 Fn: [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
5044 const APSInt &HiRHS) {
5045 return llvm::APIntOps::muluExtended(C1: LoLHS, C2: LoRHS);
5046 });
5047
5048 case clang::X86::BI__builtin_ia32_pclmulqdq128:
5049 case clang::X86::BI__builtin_ia32_pclmulqdq256:
5050 case clang::X86::BI__builtin_ia32_pclmulqdq512:
5051 return interp__builtin_ia32_pclmulqdq(S, OpPC, Call);
5052
5053 case Builtin::BI__builtin_elementwise_fma:
5054 return interp__builtin_elementwise_triop_fp(
5055 S, OpPC, Call,
5056 Fn: [](const APFloat &X, const APFloat &Y, const APFloat &Z,
5057 llvm::RoundingMode RM) {
5058 APFloat F = X;
5059 F.fusedMultiplyAdd(Multiplicand: Y, Addend: Z, RM);
5060 return F;
5061 });
5062
5063 case X86::BI__builtin_ia32_vpmadd52luq128:
5064 case X86::BI__builtin_ia32_vpmadd52luq256:
5065 case X86::BI__builtin_ia32_vpmadd52luq512:
5066 return interp__builtin_elementwise_triop(
5067 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B, const APSInt &C) {
5068 return A + (B.trunc(width: 52) * C.trunc(width: 52)).zext(width: 64);
5069 });
5070 case X86::BI__builtin_ia32_vpmadd52huq128:
5071 case X86::BI__builtin_ia32_vpmadd52huq256:
5072 case X86::BI__builtin_ia32_vpmadd52huq512:
5073 return interp__builtin_elementwise_triop(
5074 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B, const APSInt &C) {
5075 return A + llvm::APIntOps::mulhu(C1: B.trunc(width: 52), C2: C.trunc(width: 52)).zext(width: 64);
5076 });
5077
5078 case X86::BI__builtin_ia32_vpshldd128:
5079 case X86::BI__builtin_ia32_vpshldd256:
5080 case X86::BI__builtin_ia32_vpshldd512:
5081 case X86::BI__builtin_ia32_vpshldq128:
5082 case X86::BI__builtin_ia32_vpshldq256:
5083 case X86::BI__builtin_ia32_vpshldq512:
5084 case X86::BI__builtin_ia32_vpshldw128:
5085 case X86::BI__builtin_ia32_vpshldw256:
5086 case X86::BI__builtin_ia32_vpshldw512:
5087 return interp__builtin_elementwise_triop(
5088 S, OpPC, Call,
5089 Fn: [](const APSInt &Hi, const APSInt &Lo, const APSInt &Amt) {
5090 return llvm::APIntOps::fshl(Hi, Lo, Shift: Amt);
5091 });
5092
5093 case X86::BI__builtin_ia32_vpshrdd128:
5094 case X86::BI__builtin_ia32_vpshrdd256:
5095 case X86::BI__builtin_ia32_vpshrdd512:
5096 case X86::BI__builtin_ia32_vpshrdq128:
5097 case X86::BI__builtin_ia32_vpshrdq256:
5098 case X86::BI__builtin_ia32_vpshrdq512:
5099 case X86::BI__builtin_ia32_vpshrdw128:
5100 case X86::BI__builtin_ia32_vpshrdw256:
5101 case X86::BI__builtin_ia32_vpshrdw512:
5102 // NOTE: Reversed Hi/Lo operands.
5103 return interp__builtin_elementwise_triop(
5104 S, OpPC, Call,
5105 Fn: [](const APSInt &Lo, const APSInt &Hi, const APSInt &Amt) {
5106 return llvm::APIntOps::fshr(Hi, Lo, Shift: Amt);
5107 });
5108 case X86::BI__builtin_ia32_vpconflictsi_128:
5109 case X86::BI__builtin_ia32_vpconflictsi_256:
5110 case X86::BI__builtin_ia32_vpconflictsi_512:
5111 case X86::BI__builtin_ia32_vpconflictdi_128:
5112 case X86::BI__builtin_ia32_vpconflictdi_256:
5113 case X86::BI__builtin_ia32_vpconflictdi_512:
5114 return interp__builtin_ia32_vpconflict(S, OpPC, Call);
5115 case clang::X86::BI__builtin_ia32_blendpd:
5116 case clang::X86::BI__builtin_ia32_blendpd256:
5117 case clang::X86::BI__builtin_ia32_blendps:
5118 case clang::X86::BI__builtin_ia32_blendps256:
5119 case clang::X86::BI__builtin_ia32_pblendw128:
5120 case clang::X86::BI__builtin_ia32_pblendw256:
5121 case clang::X86::BI__builtin_ia32_pblendd128:
5122 case clang::X86::BI__builtin_ia32_pblendd256:
5123 return interp__builtin_ia32_shuffle_generic(
5124 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5125 // Bit index for mask.
5126 unsigned MaskBit = (ShuffleMask >> (DstIdx % 8)) & 0x1;
5127 unsigned SrcVecIdx = MaskBit ? 1 : 0; // 1 = TrueVec, 0 = FalseVec
5128 return std::pair<unsigned, int>{SrcVecIdx, static_cast<int>(DstIdx)};
5129 });
5130
5131
5132
5133 case clang::X86::BI__builtin_ia32_blendvpd:
5134 case clang::X86::BI__builtin_ia32_blendvpd256:
5135 case clang::X86::BI__builtin_ia32_blendvps:
5136 case clang::X86::BI__builtin_ia32_blendvps256:
5137 return interp__builtin_elementwise_triop_fp(
5138 S, OpPC, Call,
5139 Fn: [](const APFloat &F, const APFloat &T, const APFloat &C,
5140 llvm::RoundingMode) { return C.isNegative() ? T : F; });
5141
5142 case clang::X86::BI__builtin_ia32_pblendvb128:
5143 case clang::X86::BI__builtin_ia32_pblendvb256:
5144 return interp__builtin_elementwise_triop(
5145 S, OpPC, Call, Fn: [](const APSInt &F, const APSInt &T, const APSInt &C) {
5146 return ((APInt)C).isNegative() ? T : F;
5147 });
5148 case X86::BI__builtin_ia32_ptestz128:
5149 case X86::BI__builtin_ia32_ptestz256:
5150 case X86::BI__builtin_ia32_vtestzps:
5151 case X86::BI__builtin_ia32_vtestzps256:
5152 case X86::BI__builtin_ia32_vtestzpd:
5153 case X86::BI__builtin_ia32_vtestzpd256:
5154 return interp__builtin_ia32_test_op(
5155 S, OpPC, Call,
5156 Fn: [](const APInt &A, const APInt &B) { return (A & B) == 0; });
5157 case X86::BI__builtin_ia32_ptestc128:
5158 case X86::BI__builtin_ia32_ptestc256:
5159 case X86::BI__builtin_ia32_vtestcps:
5160 case X86::BI__builtin_ia32_vtestcps256:
5161 case X86::BI__builtin_ia32_vtestcpd:
5162 case X86::BI__builtin_ia32_vtestcpd256:
5163 return interp__builtin_ia32_test_op(
5164 S, OpPC, Call,
5165 Fn: [](const APInt &A, const APInt &B) { return (~A & B) == 0; });
5166 case X86::BI__builtin_ia32_ptestnzc128:
5167 case X86::BI__builtin_ia32_ptestnzc256:
5168 case X86::BI__builtin_ia32_vtestnzcps:
5169 case X86::BI__builtin_ia32_vtestnzcps256:
5170 case X86::BI__builtin_ia32_vtestnzcpd:
5171 case X86::BI__builtin_ia32_vtestnzcpd256:
5172 return interp__builtin_ia32_test_op(
5173 S, OpPC, Call, Fn: [](const APInt &A, const APInt &B) {
5174 return ((A & B) != 0) && ((~A & B) != 0);
5175 });
5176 case X86::BI__builtin_ia32_selectb_128:
5177 case X86::BI__builtin_ia32_selectb_256:
5178 case X86::BI__builtin_ia32_selectb_512:
5179 case X86::BI__builtin_ia32_selectw_128:
5180 case X86::BI__builtin_ia32_selectw_256:
5181 case X86::BI__builtin_ia32_selectw_512:
5182 case X86::BI__builtin_ia32_selectd_128:
5183 case X86::BI__builtin_ia32_selectd_256:
5184 case X86::BI__builtin_ia32_selectd_512:
5185 case X86::BI__builtin_ia32_selectq_128:
5186 case X86::BI__builtin_ia32_selectq_256:
5187 case X86::BI__builtin_ia32_selectq_512:
5188 case X86::BI__builtin_ia32_selectph_128:
5189 case X86::BI__builtin_ia32_selectph_256:
5190 case X86::BI__builtin_ia32_selectph_512:
5191 case X86::BI__builtin_ia32_selectpbf_128:
5192 case X86::BI__builtin_ia32_selectpbf_256:
5193 case X86::BI__builtin_ia32_selectpbf_512:
5194 case X86::BI__builtin_ia32_selectps_128:
5195 case X86::BI__builtin_ia32_selectps_256:
5196 case X86::BI__builtin_ia32_selectps_512:
5197 case X86::BI__builtin_ia32_selectpd_128:
5198 case X86::BI__builtin_ia32_selectpd_256:
5199 case X86::BI__builtin_ia32_selectpd_512:
5200 return interp__builtin_select(S, OpPC, Call);
5201
5202 case X86::BI__builtin_ia32_shufps:
5203 case X86::BI__builtin_ia32_shufps256:
5204 case X86::BI__builtin_ia32_shufps512:
5205 return interp__builtin_ia32_shuffle_generic(
5206 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5207 unsigned NumElemPerLane = 4;
5208 unsigned NumSelectableElems = NumElemPerLane / 2;
5209 unsigned BitsPerElem = 2;
5210 unsigned IndexMask = 0x3;
5211 unsigned MaskBits = 8;
5212 unsigned Lane = DstIdx / NumElemPerLane;
5213 unsigned ElemInLane = DstIdx % NumElemPerLane;
5214 unsigned LaneOffset = Lane * NumElemPerLane;
5215 unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0;
5216 unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
5217 unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
5218 return std::pair<unsigned, int>{SrcIdx,
5219 static_cast<int>(LaneOffset + Index)};
5220 });
5221 case X86::BI__builtin_ia32_shufpd:
5222 case X86::BI__builtin_ia32_shufpd256:
5223 case X86::BI__builtin_ia32_shufpd512:
5224 return interp__builtin_ia32_shuffle_generic(
5225 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5226 unsigned NumElemPerLane = 2;
5227 unsigned NumSelectableElems = NumElemPerLane / 2;
5228 unsigned BitsPerElem = 1;
5229 unsigned IndexMask = 0x1;
5230 unsigned MaskBits = 8;
5231 unsigned Lane = DstIdx / NumElemPerLane;
5232 unsigned ElemInLane = DstIdx % NumElemPerLane;
5233 unsigned LaneOffset = Lane * NumElemPerLane;
5234 unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0;
5235 unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
5236 unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
5237 return std::pair<unsigned, int>{SrcIdx,
5238 static_cast<int>(LaneOffset + Index)};
5239 });
5240
5241 case X86::BI__builtin_ia32_vgf2p8affineinvqb_v16qi:
5242 case X86::BI__builtin_ia32_vgf2p8affineinvqb_v32qi:
5243 case X86::BI__builtin_ia32_vgf2p8affineinvqb_v64qi:
5244 return interp_builtin_ia32_gfni_affine(S, OpPC, Call, Inverse: true);
5245 case X86::BI__builtin_ia32_vgf2p8affineqb_v16qi:
5246 case X86::BI__builtin_ia32_vgf2p8affineqb_v32qi:
5247 case X86::BI__builtin_ia32_vgf2p8affineqb_v64qi:
5248 return interp_builtin_ia32_gfni_affine(S, OpPC, Call, Inverse: false);
5249
5250 case X86::BI__builtin_ia32_vgf2p8mulb_v16qi:
5251 case X86::BI__builtin_ia32_vgf2p8mulb_v32qi:
5252 case X86::BI__builtin_ia32_vgf2p8mulb_v64qi:
5253 return interp__builtin_ia32_gfni_mul(S, OpPC, Call);
5254
5255 case X86::BI__builtin_ia32_insertps128:
5256 return interp__builtin_ia32_shuffle_generic(
5257 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned Mask) {
5258 // Bits [3:0]: zero mask - if bit is set, zero this element
5259 if ((Mask & (1 << DstIdx)) != 0) {
5260 return std::pair<unsigned, int>{0, -1};
5261 }
5262 // Bits [7:6]: select element from source vector Y (0-3)
5263 // Bits [5:4]: select destination position (0-3)
5264 unsigned SrcElem = (Mask >> 6) & 0x3;
5265 unsigned DstElem = (Mask >> 4) & 0x3;
5266 if (DstIdx == DstElem) {
5267 // Insert element from source vector (B) at this position
5268 return std::pair<unsigned, int>{1, static_cast<int>(SrcElem)};
5269 } else {
5270 // Copy from destination vector (A)
5271 return std::pair<unsigned, int>{0, static_cast<int>(DstIdx)};
5272 }
5273 });
5274 case X86::BI__builtin_ia32_permvarsi256:
5275 case X86::BI__builtin_ia32_permvarsf256:
5276 case X86::BI__builtin_ia32_permvardf512:
5277 case X86::BI__builtin_ia32_permvardi512:
5278 case X86::BI__builtin_ia32_permvarhi128:
5279 return interp__builtin_ia32_shuffle_generic(
5280 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5281 int Offset = ShuffleMask & 0x7;
5282 return std::pair<unsigned, int>{0, Offset};
5283 });
5284 case X86::BI__builtin_ia32_permvarqi128:
5285 case X86::BI__builtin_ia32_permvarhi256:
5286 case X86::BI__builtin_ia32_permvarsi512:
5287 case X86::BI__builtin_ia32_permvarsf512:
5288 return interp__builtin_ia32_shuffle_generic(
5289 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5290 int Offset = ShuffleMask & 0xF;
5291 return std::pair<unsigned, int>{0, Offset};
5292 });
5293 case X86::BI__builtin_ia32_permvardi256:
5294 case X86::BI__builtin_ia32_permvardf256:
5295 return interp__builtin_ia32_shuffle_generic(
5296 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5297 int Offset = ShuffleMask & 0x3;
5298 return std::pair<unsigned, int>{0, Offset};
5299 });
5300 case X86::BI__builtin_ia32_permvarqi256:
5301 case X86::BI__builtin_ia32_permvarhi512:
5302 return interp__builtin_ia32_shuffle_generic(
5303 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5304 int Offset = ShuffleMask & 0x1F;
5305 return std::pair<unsigned, int>{0, Offset};
5306 });
5307 case X86::BI__builtin_ia32_permvarqi512:
5308 return interp__builtin_ia32_shuffle_generic(
5309 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5310 int Offset = ShuffleMask & 0x3F;
5311 return std::pair<unsigned, int>{0, Offset};
5312 });
5313 case X86::BI__builtin_ia32_vpermi2varq128:
5314 case X86::BI__builtin_ia32_vpermi2varpd128:
5315 return interp__builtin_ia32_shuffle_generic(
5316 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5317 int Offset = ShuffleMask & 0x1;
5318 unsigned SrcIdx = (ShuffleMask >> 1) & 0x1;
5319 return std::pair<unsigned, int>{SrcIdx, Offset};
5320 });
5321 case X86::BI__builtin_ia32_vpermi2vard128:
5322 case X86::BI__builtin_ia32_vpermi2varps128:
5323 case X86::BI__builtin_ia32_vpermi2varq256:
5324 case X86::BI__builtin_ia32_vpermi2varpd256:
5325 return interp__builtin_ia32_shuffle_generic(
5326 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5327 int Offset = ShuffleMask & 0x3;
5328 unsigned SrcIdx = (ShuffleMask >> 2) & 0x1;
5329 return std::pair<unsigned, int>{SrcIdx, Offset};
5330 });
5331 case X86::BI__builtin_ia32_vpermi2varhi128:
5332 case X86::BI__builtin_ia32_vpermi2vard256:
5333 case X86::BI__builtin_ia32_vpermi2varps256:
5334 case X86::BI__builtin_ia32_vpermi2varq512:
5335 case X86::BI__builtin_ia32_vpermi2varpd512:
5336 return interp__builtin_ia32_shuffle_generic(
5337 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5338 int Offset = ShuffleMask & 0x7;
5339 unsigned SrcIdx = (ShuffleMask >> 3) & 0x1;
5340 return std::pair<unsigned, int>{SrcIdx, Offset};
5341 });
5342 case X86::BI__builtin_ia32_vpermi2varqi128:
5343 case X86::BI__builtin_ia32_vpermi2varhi256:
5344 case X86::BI__builtin_ia32_vpermi2vard512:
5345 case X86::BI__builtin_ia32_vpermi2varps512:
5346 return interp__builtin_ia32_shuffle_generic(
5347 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5348 int Offset = ShuffleMask & 0xF;
5349 unsigned SrcIdx = (ShuffleMask >> 4) & 0x1;
5350 return std::pair<unsigned, int>{SrcIdx, Offset};
5351 });
5352 case X86::BI__builtin_ia32_vpermi2varqi256:
5353 case X86::BI__builtin_ia32_vpermi2varhi512:
5354 return interp__builtin_ia32_shuffle_generic(
5355 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5356 int Offset = ShuffleMask & 0x1F;
5357 unsigned SrcIdx = (ShuffleMask >> 5) & 0x1;
5358 return std::pair<unsigned, int>{SrcIdx, Offset};
5359 });
5360 case X86::BI__builtin_ia32_vpermi2varqi512:
5361 return interp__builtin_ia32_shuffle_generic(
5362 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5363 int Offset = ShuffleMask & 0x3F;
5364 unsigned SrcIdx = (ShuffleMask >> 6) & 0x1;
5365 return std::pair<unsigned, int>{SrcIdx, Offset};
5366 });
5367 case X86::BI__builtin_ia32_vperm2f128_pd256:
5368 case X86::BI__builtin_ia32_vperm2f128_ps256:
5369 case X86::BI__builtin_ia32_vperm2f128_si256:
5370 case X86::BI__builtin_ia32_permti256: {
5371 unsigned NumElements =
5372 Call->getArg(Arg: 0)->getType()->castAs<VectorType>()->getNumElements();
5373 unsigned PreservedBitsCnt = NumElements >> 2;
5374 return interp__builtin_ia32_shuffle_generic(
5375 S, OpPC, Call,
5376 GetSourceIndex: [PreservedBitsCnt](unsigned DstIdx, unsigned ShuffleMask) {
5377 unsigned ControlBitsCnt = DstIdx >> PreservedBitsCnt << 2;
5378 unsigned ControlBits = ShuffleMask >> ControlBitsCnt;
5379
5380 if (ControlBits & 0b1000)
5381 return std::make_pair(x: 0u, y: -1);
5382
5383 unsigned SrcVecIdx = (ControlBits & 0b10) >> 1;
5384 unsigned PreservedBitsMask = (1 << PreservedBitsCnt) - 1;
5385 int SrcIdx = ((ControlBits & 0b1) << PreservedBitsCnt) |
5386 (DstIdx & PreservedBitsMask);
5387 return std::make_pair(x&: SrcVecIdx, y&: SrcIdx);
5388 });
5389 }
5390 case X86::BI__builtin_ia32_pshufb128:
5391 case X86::BI__builtin_ia32_pshufb256:
5392 case X86::BI__builtin_ia32_pshufb512:
5393 return interp__builtin_ia32_shuffle_generic(
5394 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5395 uint8_t Ctlb = static_cast<uint8_t>(ShuffleMask);
5396 if (Ctlb & 0x80)
5397 return std::make_pair(x: 0, y: -1);
5398
5399 unsigned LaneBase = (DstIdx / 16) * 16;
5400 unsigned SrcOffset = Ctlb & 0x0F;
5401 unsigned SrcIdx = LaneBase + SrcOffset;
5402 return std::make_pair(x: 0, y: static_cast<int>(SrcIdx));
5403 });
5404
5405 case X86::BI__builtin_ia32_pshuflw:
5406 case X86::BI__builtin_ia32_pshuflw256:
5407 case X86::BI__builtin_ia32_pshuflw512:
5408 return interp__builtin_ia32_shuffle_generic(
5409 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5410 unsigned LaneBase = (DstIdx / 8) * 8;
5411 unsigned LaneIdx = DstIdx % 8;
5412 if (LaneIdx < 4) {
5413 unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3;
5414 return std::make_pair(x: 0, y: static_cast<int>(LaneBase + Sel));
5415 }
5416
5417 return std::make_pair(x: 0, y: static_cast<int>(DstIdx));
5418 });
5419
5420 case X86::BI__builtin_ia32_pshufhw:
5421 case X86::BI__builtin_ia32_pshufhw256:
5422 case X86::BI__builtin_ia32_pshufhw512:
5423 return interp__builtin_ia32_shuffle_generic(
5424 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5425 unsigned LaneBase = (DstIdx / 8) * 8;
5426 unsigned LaneIdx = DstIdx % 8;
5427 if (LaneIdx >= 4) {
5428 unsigned Sel = (ShuffleMask >> (2 * (LaneIdx - 4))) & 0x3;
5429 return std::make_pair(x: 0, y: static_cast<int>(LaneBase + 4 + Sel));
5430 }
5431
5432 return std::make_pair(x: 0, y: static_cast<int>(DstIdx));
5433 });
5434
5435 case X86::BI__builtin_ia32_pshufd:
5436 case X86::BI__builtin_ia32_pshufd256:
5437 case X86::BI__builtin_ia32_pshufd512:
5438 case X86::BI__builtin_ia32_vpermilps:
5439 case X86::BI__builtin_ia32_vpermilps256:
5440 case X86::BI__builtin_ia32_vpermilps512:
5441 return interp__builtin_ia32_shuffle_generic(
5442 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5443 unsigned LaneBase = (DstIdx / 4) * 4;
5444 unsigned LaneIdx = DstIdx % 4;
5445 unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3;
5446 return std::make_pair(x: 0, y: static_cast<int>(LaneBase + Sel));
5447 });
5448
5449 case X86::BI__builtin_ia32_vpermilvarpd:
5450 case X86::BI__builtin_ia32_vpermilvarpd256:
5451 case X86::BI__builtin_ia32_vpermilvarpd512:
5452 return interp__builtin_ia32_shuffle_generic(
5453 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5454 unsigned NumElemPerLane = 2;
5455 unsigned Lane = DstIdx / NumElemPerLane;
5456 unsigned Offset = ShuffleMask & 0b10 ? 1 : 0;
5457 return std::make_pair(
5458 x: 0, y: static_cast<int>(Lane * NumElemPerLane + Offset));
5459 });
5460
5461 case X86::BI__builtin_ia32_vpermilvarps:
5462 case X86::BI__builtin_ia32_vpermilvarps256:
5463 case X86::BI__builtin_ia32_vpermilvarps512:
5464 return interp__builtin_ia32_shuffle_generic(
5465 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5466 unsigned NumElemPerLane = 4;
5467 unsigned Lane = DstIdx / NumElemPerLane;
5468 unsigned Offset = ShuffleMask & 0b11;
5469 return std::make_pair(
5470 x: 0, y: static_cast<int>(Lane * NumElemPerLane + Offset));
5471 });
5472
5473 case X86::BI__builtin_ia32_vpermilpd:
5474 case X86::BI__builtin_ia32_vpermilpd256:
5475 case X86::BI__builtin_ia32_vpermilpd512:
5476 return interp__builtin_ia32_shuffle_generic(
5477 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned Control) {
5478 unsigned NumElemPerLane = 2;
5479 unsigned BitsPerElem = 1;
5480 unsigned MaskBits = 8;
5481 unsigned IndexMask = 0x1;
5482 unsigned Lane = DstIdx / NumElemPerLane;
5483 unsigned LaneOffset = Lane * NumElemPerLane;
5484 unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
5485 unsigned Index = (Control >> BitIndex) & IndexMask;
5486 return std::make_pair(x: 0, y: static_cast<int>(LaneOffset + Index));
5487 });
5488
5489 case X86::BI__builtin_ia32_permdf256:
5490 case X86::BI__builtin_ia32_permdi256:
5491 return interp__builtin_ia32_shuffle_generic(
5492 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned Control) {
5493 // permute4x64 operates on 4 64-bit elements
5494 // For element i (0-3), extract bits [2*i+1:2*i] from Control
5495 unsigned Index = (Control >> (2 * DstIdx)) & 0x3;
5496 return std::make_pair(x: 0, y: static_cast<int>(Index));
5497 });
5498
5499 case X86::BI__builtin_ia32_vpmultishiftqb128:
5500 case X86::BI__builtin_ia32_vpmultishiftqb256:
5501 case X86::BI__builtin_ia32_vpmultishiftqb512:
5502 return interp__builtin_ia32_multishiftqb(S, OpPC, Call);
5503 case X86::BI__builtin_ia32_kandqi:
5504 case X86::BI__builtin_ia32_kandhi:
5505 case X86::BI__builtin_ia32_kandsi:
5506 case X86::BI__builtin_ia32_kanddi:
5507 return interp__builtin_elementwise_int_binop(
5508 S, OpPC, Call,
5509 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS & RHS; });
5510
5511 case X86::BI__builtin_ia32_kandnqi:
5512 case X86::BI__builtin_ia32_kandnhi:
5513 case X86::BI__builtin_ia32_kandnsi:
5514 case X86::BI__builtin_ia32_kandndi:
5515 return interp__builtin_elementwise_int_binop(
5516 S, OpPC, Call,
5517 Fn: [](const APSInt &LHS, const APSInt &RHS) { return ~LHS & RHS; });
5518
5519 case X86::BI__builtin_ia32_korqi:
5520 case X86::BI__builtin_ia32_korhi:
5521 case X86::BI__builtin_ia32_korsi:
5522 case X86::BI__builtin_ia32_kordi:
5523 return interp__builtin_elementwise_int_binop(
5524 S, OpPC, Call,
5525 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS | RHS; });
5526
5527 case X86::BI__builtin_ia32_kxnorqi:
5528 case X86::BI__builtin_ia32_kxnorhi:
5529 case X86::BI__builtin_ia32_kxnorsi:
5530 case X86::BI__builtin_ia32_kxnordi:
5531 return interp__builtin_elementwise_int_binop(
5532 S, OpPC, Call,
5533 Fn: [](const APSInt &LHS, const APSInt &RHS) { return ~(LHS ^ RHS); });
5534
5535 case X86::BI__builtin_ia32_kxorqi:
5536 case X86::BI__builtin_ia32_kxorhi:
5537 case X86::BI__builtin_ia32_kxorsi:
5538 case X86::BI__builtin_ia32_kxordi:
5539 return interp__builtin_elementwise_int_binop(
5540 S, OpPC, Call,
5541 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS ^ RHS; });
5542
5543 case X86::BI__builtin_ia32_knotqi:
5544 case X86::BI__builtin_ia32_knothi:
5545 case X86::BI__builtin_ia32_knotsi:
5546 case X86::BI__builtin_ia32_knotdi:
5547 return interp__builtin_elementwise_int_unaryop(
5548 S, OpPC, Call, Fn: [](const APSInt &Src) { return ~Src; });
5549
5550 case X86::BI__builtin_ia32_kaddqi:
5551 case X86::BI__builtin_ia32_kaddhi:
5552 case X86::BI__builtin_ia32_kaddsi:
5553 case X86::BI__builtin_ia32_kadddi:
5554 return interp__builtin_elementwise_int_binop(
5555 S, OpPC, Call,
5556 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
5557
5558 case X86::BI__builtin_ia32_kmovb:
5559 case X86::BI__builtin_ia32_kmovw:
5560 case X86::BI__builtin_ia32_kmovd:
5561 case X86::BI__builtin_ia32_kmovq:
5562 return interp__builtin_elementwise_int_unaryop(
5563 S, OpPC, Call, Fn: [](const APSInt &Src) { return Src; });
5564
5565 case X86::BI__builtin_ia32_kunpckhi:
5566 case X86::BI__builtin_ia32_kunpckdi:
5567 case X86::BI__builtin_ia32_kunpcksi:
5568 return interp__builtin_elementwise_int_binop(
5569 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
5570 // Generic kunpack: extract lower half of each operand and concatenate
5571 // Result = A[HalfWidth-1:0] concat B[HalfWidth-1:0]
5572 unsigned BW = A.getBitWidth();
5573 return APSInt(A.trunc(width: BW / 2).concat(NewLSB: B.trunc(width: BW / 2)),
5574 A.isUnsigned());
5575 });
5576
5577 case X86::BI__builtin_ia32_phminposuw128:
5578 return interp__builtin_ia32_phminposuw(S, OpPC, Call);
5579
5580 case X86::BI__builtin_ia32_psraq128:
5581 case X86::BI__builtin_ia32_psraq256:
5582 case X86::BI__builtin_ia32_psraq512:
5583 case X86::BI__builtin_ia32_psrad128:
5584 case X86::BI__builtin_ia32_psrad256:
5585 case X86::BI__builtin_ia32_psrad512:
5586 case X86::BI__builtin_ia32_psraw128:
5587 case X86::BI__builtin_ia32_psraw256:
5588 case X86::BI__builtin_ia32_psraw512:
5589 return interp__builtin_ia32_shift_with_count(
5590 S, OpPC, Call,
5591 ShiftOp: [](const APInt &Elt, uint64_t Count) { return Elt.ashr(ShiftAmt: Count); },
5592 OverflowOp: [](const APInt &Elt, unsigned Width) { return Elt.ashr(ShiftAmt: Width - 1); });
5593
5594 case X86::BI__builtin_ia32_psllq128:
5595 case X86::BI__builtin_ia32_psllq256:
5596 case X86::BI__builtin_ia32_psllq512:
5597 case X86::BI__builtin_ia32_pslld128:
5598 case X86::BI__builtin_ia32_pslld256:
5599 case X86::BI__builtin_ia32_pslld512:
5600 case X86::BI__builtin_ia32_psllw128:
5601 case X86::BI__builtin_ia32_psllw256:
5602 case X86::BI__builtin_ia32_psllw512:
5603 return interp__builtin_ia32_shift_with_count(
5604 S, OpPC, Call,
5605 ShiftOp: [](const APInt &Elt, uint64_t Count) { return Elt.shl(shiftAmt: Count); },
5606 OverflowOp: [](const APInt &Elt, unsigned Width) { return APInt::getZero(numBits: Width); });
5607
5608 case X86::BI__builtin_ia32_psrlq128:
5609 case X86::BI__builtin_ia32_psrlq256:
5610 case X86::BI__builtin_ia32_psrlq512:
5611 case X86::BI__builtin_ia32_psrld128:
5612 case X86::BI__builtin_ia32_psrld256:
5613 case X86::BI__builtin_ia32_psrld512:
5614 case X86::BI__builtin_ia32_psrlw128:
5615 case X86::BI__builtin_ia32_psrlw256:
5616 case X86::BI__builtin_ia32_psrlw512:
5617 return interp__builtin_ia32_shift_with_count(
5618 S, OpPC, Call,
5619 ShiftOp: [](const APInt &Elt, uint64_t Count) { return Elt.lshr(shiftAmt: Count); },
5620 OverflowOp: [](const APInt &Elt, unsigned Width) { return APInt::getZero(numBits: Width); });
5621
5622 case X86::BI__builtin_ia32_pternlogd128_mask:
5623 case X86::BI__builtin_ia32_pternlogd256_mask:
5624 case X86::BI__builtin_ia32_pternlogd512_mask:
5625 case X86::BI__builtin_ia32_pternlogq128_mask:
5626 case X86::BI__builtin_ia32_pternlogq256_mask:
5627 case X86::BI__builtin_ia32_pternlogq512_mask:
5628 return interp__builtin_ia32_pternlog(S, OpPC, Call, /*MaskZ=*/false);
5629 case X86::BI__builtin_ia32_pternlogd128_maskz:
5630 case X86::BI__builtin_ia32_pternlogd256_maskz:
5631 case X86::BI__builtin_ia32_pternlogd512_maskz:
5632 case X86::BI__builtin_ia32_pternlogq128_maskz:
5633 case X86::BI__builtin_ia32_pternlogq256_maskz:
5634 case X86::BI__builtin_ia32_pternlogq512_maskz:
5635 return interp__builtin_ia32_pternlog(S, OpPC, Call, /*MaskZ=*/true);
5636 case Builtin::BI__builtin_elementwise_fshl:
5637 return interp__builtin_elementwise_triop(S, OpPC, Call,
5638 Fn: llvm::APIntOps::fshl);
5639 case Builtin::BI__builtin_elementwise_fshr:
5640 return interp__builtin_elementwise_triop(S, OpPC, Call,
5641 Fn: llvm::APIntOps::fshr);
5642
5643 case X86::BI__builtin_ia32_shuf_f32x4_256:
5644 case X86::BI__builtin_ia32_shuf_i32x4_256:
5645 case X86::BI__builtin_ia32_shuf_f64x2_256:
5646 case X86::BI__builtin_ia32_shuf_i64x2_256:
5647 case X86::BI__builtin_ia32_shuf_f32x4:
5648 case X86::BI__builtin_ia32_shuf_i32x4:
5649 case X86::BI__builtin_ia32_shuf_f64x2:
5650 case X86::BI__builtin_ia32_shuf_i64x2: {
5651 // Destination and sources A, B all have the same type.
5652 QualType VecQT = Call->getArg(Arg: 0)->getType();
5653 const auto *VecT = VecQT->castAs<VectorType>();
5654 unsigned NumElems = VecT->getNumElements();
5655 unsigned ElemBits = S.getASTContext().getTypeSize(T: VecT->getElementType());
5656 unsigned LaneBits = 128u;
5657 unsigned NumLanes = (NumElems * ElemBits) / LaneBits;
5658 unsigned NumElemsPerLane = LaneBits / ElemBits;
5659
5660 return interp__builtin_ia32_shuffle_generic(
5661 S, OpPC, Call,
5662 GetSourceIndex: [NumLanes, NumElemsPerLane](unsigned DstIdx, unsigned ShuffleMask) {
5663 // DstIdx determines source. ShuffleMask selects lane in source.
5664 unsigned BitsPerElem = NumLanes / 2;
5665 unsigned IndexMask = (1u << BitsPerElem) - 1;
5666 unsigned Lane = DstIdx / NumElemsPerLane;
5667 unsigned SrcIdx = (Lane < NumLanes / 2) ? 0 : 1;
5668 unsigned BitIdx = BitsPerElem * Lane;
5669 unsigned SrcLaneIdx = (ShuffleMask >> BitIdx) & IndexMask;
5670 unsigned ElemInLane = DstIdx % NumElemsPerLane;
5671 unsigned IdxToPick = SrcLaneIdx * NumElemsPerLane + ElemInLane;
5672 return std::pair<unsigned, int>{SrcIdx, IdxToPick};
5673 });
5674 }
5675
5676 case X86::BI__builtin_ia32_insertf32x4_256:
5677 case X86::BI__builtin_ia32_inserti32x4_256:
5678 case X86::BI__builtin_ia32_insertf64x2_256:
5679 case X86::BI__builtin_ia32_inserti64x2_256:
5680 case X86::BI__builtin_ia32_insertf32x4:
5681 case X86::BI__builtin_ia32_inserti32x4:
5682 case X86::BI__builtin_ia32_insertf64x2_512:
5683 case X86::BI__builtin_ia32_inserti64x2_512:
5684 case X86::BI__builtin_ia32_insertf32x8:
5685 case X86::BI__builtin_ia32_inserti32x8:
5686 case X86::BI__builtin_ia32_insertf64x4:
5687 case X86::BI__builtin_ia32_inserti64x4:
5688 case X86::BI__builtin_ia32_vinsertf128_ps256:
5689 case X86::BI__builtin_ia32_vinsertf128_pd256:
5690 case X86::BI__builtin_ia32_vinsertf128_si256:
5691 case X86::BI__builtin_ia32_insert128i256:
5692 return interp__builtin_x86_insert_subvector(S, OpPC, Call, ID: BuiltinID);
5693
5694 case clang::X86::BI__builtin_ia32_vcvtps2ph:
5695 case clang::X86::BI__builtin_ia32_vcvtps2ph256:
5696 return interp__builtin_ia32_vcvtps2ph(S, OpPC, Call);
5697
5698 case X86::BI__builtin_ia32_vec_ext_v4hi:
5699 case X86::BI__builtin_ia32_vec_ext_v16qi:
5700 case X86::BI__builtin_ia32_vec_ext_v8hi:
5701 case X86::BI__builtin_ia32_vec_ext_v4si:
5702 case X86::BI__builtin_ia32_vec_ext_v2di:
5703 case X86::BI__builtin_ia32_vec_ext_v32qi:
5704 case X86::BI__builtin_ia32_vec_ext_v16hi:
5705 case X86::BI__builtin_ia32_vec_ext_v8si:
5706 case X86::BI__builtin_ia32_vec_ext_v4di:
5707 case X86::BI__builtin_ia32_vec_ext_v4sf:
5708 return interp__builtin_vec_ext(S, OpPC, Call, ID: BuiltinID);
5709
5710 case X86::BI__builtin_ia32_vec_set_v4hi:
5711 case X86::BI__builtin_ia32_vec_set_v16qi:
5712 case X86::BI__builtin_ia32_vec_set_v8hi:
5713 case X86::BI__builtin_ia32_vec_set_v4si:
5714 case X86::BI__builtin_ia32_vec_set_v2di:
5715 case X86::BI__builtin_ia32_vec_set_v32qi:
5716 case X86::BI__builtin_ia32_vec_set_v16hi:
5717 case X86::BI__builtin_ia32_vec_set_v8si:
5718 case X86::BI__builtin_ia32_vec_set_v4di:
5719 return interp__builtin_vec_set(S, OpPC, Call, ID: BuiltinID);
5720
5721 case X86::BI__builtin_ia32_cvtb2mask128:
5722 case X86::BI__builtin_ia32_cvtb2mask256:
5723 case X86::BI__builtin_ia32_cvtb2mask512:
5724 case X86::BI__builtin_ia32_cvtw2mask128:
5725 case X86::BI__builtin_ia32_cvtw2mask256:
5726 case X86::BI__builtin_ia32_cvtw2mask512:
5727 case X86::BI__builtin_ia32_cvtd2mask128:
5728 case X86::BI__builtin_ia32_cvtd2mask256:
5729 case X86::BI__builtin_ia32_cvtd2mask512:
5730 case X86::BI__builtin_ia32_cvtq2mask128:
5731 case X86::BI__builtin_ia32_cvtq2mask256:
5732 case X86::BI__builtin_ia32_cvtq2mask512:
5733 return interp__builtin_ia32_cvt_vec2mask(S, OpPC, Call, ID: BuiltinID);
5734
5735 case X86::BI__builtin_ia32_cvtmask2b128:
5736 case X86::BI__builtin_ia32_cvtmask2b256:
5737 case X86::BI__builtin_ia32_cvtmask2b512:
5738 case X86::BI__builtin_ia32_cvtmask2w128:
5739 case X86::BI__builtin_ia32_cvtmask2w256:
5740 case X86::BI__builtin_ia32_cvtmask2w512:
5741 case X86::BI__builtin_ia32_cvtmask2d128:
5742 case X86::BI__builtin_ia32_cvtmask2d256:
5743 case X86::BI__builtin_ia32_cvtmask2d512:
5744 case X86::BI__builtin_ia32_cvtmask2q128:
5745 case X86::BI__builtin_ia32_cvtmask2q256:
5746 case X86::BI__builtin_ia32_cvtmask2q512:
5747 return interp__builtin_ia32_cvt_mask2vec(S, OpPC, Call, ID: BuiltinID);
5748
5749 case X86::BI__builtin_ia32_cvtsd2ss:
5750 return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, HasRoundingMask: false);
5751
5752 case X86::BI__builtin_ia32_cvtsd2ss_round_mask:
5753 return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, HasRoundingMask: true);
5754
5755 case X86::BI__builtin_ia32_cvtpd2ps:
5756 case X86::BI__builtin_ia32_cvtpd2ps256:
5757 return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, IsMasked: false, HasRounding: false);
5758 case X86::BI__builtin_ia32_cvtpd2ps_mask:
5759 return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, IsMasked: true, HasRounding: false);
5760 case X86::BI__builtin_ia32_cvtpd2ps512_mask:
5761 return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, IsMasked: true, HasRounding: true);
5762
5763 case X86::BI__builtin_ia32_cmpb128_mask:
5764 case X86::BI__builtin_ia32_cmpw128_mask:
5765 case X86::BI__builtin_ia32_cmpd128_mask:
5766 case X86::BI__builtin_ia32_cmpq128_mask:
5767 case X86::BI__builtin_ia32_cmpb256_mask:
5768 case X86::BI__builtin_ia32_cmpw256_mask:
5769 case X86::BI__builtin_ia32_cmpd256_mask:
5770 case X86::BI__builtin_ia32_cmpq256_mask:
5771 case X86::BI__builtin_ia32_cmpb512_mask:
5772 case X86::BI__builtin_ia32_cmpw512_mask:
5773 case X86::BI__builtin_ia32_cmpd512_mask:
5774 case X86::BI__builtin_ia32_cmpq512_mask:
5775 return interp__builtin_ia32_cmp_mask(S, OpPC, Call, ID: BuiltinID,
5776 /*IsUnsigned=*/false);
5777
5778 case X86::BI__builtin_ia32_ucmpb128_mask:
5779 case X86::BI__builtin_ia32_ucmpw128_mask:
5780 case X86::BI__builtin_ia32_ucmpd128_mask:
5781 case X86::BI__builtin_ia32_ucmpq128_mask:
5782 case X86::BI__builtin_ia32_ucmpb256_mask:
5783 case X86::BI__builtin_ia32_ucmpw256_mask:
5784 case X86::BI__builtin_ia32_ucmpd256_mask:
5785 case X86::BI__builtin_ia32_ucmpq256_mask:
5786 case X86::BI__builtin_ia32_ucmpb512_mask:
5787 case X86::BI__builtin_ia32_ucmpw512_mask:
5788 case X86::BI__builtin_ia32_ucmpd512_mask:
5789 case X86::BI__builtin_ia32_ucmpq512_mask:
5790 return interp__builtin_ia32_cmp_mask(S, OpPC, Call, ID: BuiltinID,
5791 /*IsUnsigned=*/true);
5792
5793 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
5794 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
5795 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
5796 return interp__builtin_ia32_shufbitqmb_mask(S, OpPC, Call);
5797
5798 case X86::BI__builtin_ia32_pslldqi128_byteshift:
5799 case X86::BI__builtin_ia32_pslldqi256_byteshift:
5800 case X86::BI__builtin_ia32_pslldqi512_byteshift:
5801 // These SLLDQ intrinsics always operate on byte elements (8 bits).
5802 // The lane width is hardcoded to 16 to match the SIMD register size,
5803 // but the algorithm processes one byte per iteration,
5804 // so APInt(8, ...) is correct and intentional.
5805 return interp__builtin_ia32_shuffle_generic(
5806 S, OpPC, Call,
5807 GetSourceIndex: [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> {
5808 unsigned LaneBase = (DstIdx / 16) * 16;
5809 unsigned LaneIdx = DstIdx % 16;
5810 if (LaneIdx < Shift)
5811 return std::make_pair(x: 0, y: -1);
5812
5813 return std::make_pair(x: 0,
5814 y: static_cast<int>(LaneBase + LaneIdx - Shift));
5815 });
5816
5817 case X86::BI__builtin_ia32_psrldqi128_byteshift:
5818 case X86::BI__builtin_ia32_psrldqi256_byteshift:
5819 case X86::BI__builtin_ia32_psrldqi512_byteshift:
5820 // These SRLDQ intrinsics always operate on byte elements (8 bits).
5821 // The lane width is hardcoded to 16 to match the SIMD register size,
5822 // but the algorithm processes one byte per iteration,
5823 // so APInt(8, ...) is correct and intentional.
5824 return interp__builtin_ia32_shuffle_generic(
5825 S, OpPC, Call,
5826 GetSourceIndex: [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> {
5827 unsigned LaneBase = (DstIdx / 16) * 16;
5828 unsigned LaneIdx = DstIdx % 16;
5829 if (LaneIdx + Shift < 16)
5830 return std::make_pair(x: 0,
5831 y: static_cast<int>(LaneBase + LaneIdx + Shift));
5832
5833 return std::make_pair(x: 0, y: -1);
5834 });
5835
5836 case X86::BI__builtin_ia32_palignr128:
5837 case X86::BI__builtin_ia32_palignr256:
5838 case X86::BI__builtin_ia32_palignr512:
5839 return interp__builtin_ia32_shuffle_generic(
5840 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned Shift) {
5841 // Default to -1 → zero-fill this destination element
5842 unsigned VecIdx = 1;
5843 int ElemIdx = -1;
5844
5845 int Lane = DstIdx / 16;
5846 int Offset = DstIdx % 16;
5847
5848 // Elements come from VecB first, then VecA after the shift boundary
5849 unsigned ShiftedIdx = Offset + (Shift & 0xFF);
5850 if (ShiftedIdx < 16) { // from VecB
5851 ElemIdx = ShiftedIdx + (Lane * 16);
5852 } else if (ShiftedIdx < 32) { // from VecA
5853 VecIdx = 0;
5854 ElemIdx = (ShiftedIdx - 16) + (Lane * 16);
5855 }
5856
5857 return std::pair<unsigned, int>{VecIdx, ElemIdx};
5858 });
5859
5860 case X86::BI__builtin_ia32_alignd128:
5861 case X86::BI__builtin_ia32_alignd256:
5862 case X86::BI__builtin_ia32_alignd512:
5863 case X86::BI__builtin_ia32_alignq128:
5864 case X86::BI__builtin_ia32_alignq256:
5865 case X86::BI__builtin_ia32_alignq512: {
5866 unsigned NumElems = Call->getType()->castAs<VectorType>()->getNumElements();
5867 return interp__builtin_ia32_shuffle_generic(
5868 S, OpPC, Call, GetSourceIndex: [NumElems](unsigned DstIdx, unsigned Shift) {
5869 unsigned Imm = Shift & 0xFF;
5870 unsigned EffectiveShift = Imm & (NumElems - 1);
5871 unsigned SourcePos = DstIdx + EffectiveShift;
5872 unsigned VecIdx = SourcePos < NumElems ? 1u : 0u;
5873 unsigned ElemIdx = SourcePos & (NumElems - 1);
5874 return std::pair<unsigned, int>{VecIdx, static_cast<int>(ElemIdx)};
5875 });
5876 }
5877
5878 case clang::X86::BI__builtin_ia32_minps:
5879 case clang::X86::BI__builtin_ia32_minpd:
5880 case clang::X86::BI__builtin_ia32_minph128:
5881 case clang::X86::BI__builtin_ia32_minph256:
5882 case clang::X86::BI__builtin_ia32_minps256:
5883 case clang::X86::BI__builtin_ia32_minpd256:
5884 case clang::X86::BI__builtin_ia32_minps512:
5885 case clang::X86::BI__builtin_ia32_minpd512:
5886 case clang::X86::BI__builtin_ia32_minph512:
5887 return interp__builtin_elementwise_fp_binop(
5888 S, OpPC, Call,
5889 Fn: [](const APFloat &A, const APFloat &B,
5890 std::optional<APSInt>) -> std::optional<APFloat> {
5891 if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
5892 B.isInfinity() || B.isDenormal())
5893 return std::nullopt;
5894 if (A.isZero() && B.isZero())
5895 return B;
5896 return llvm::minimum(A, B);
5897 });
5898
5899 case clang::X86::BI__builtin_ia32_minss:
5900 case clang::X86::BI__builtin_ia32_minsd:
5901 return interp__builtin_elementwise_fp_binop(
5902 S, OpPC, Call,
5903 Fn: [](const APFloat &A, const APFloat &B,
5904 std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
5905 return EvalScalarMinMaxFp(A, B, RoundingMode, /*IsMin=*/true);
5906 },
5907 /*IsScalar=*/true);
5908
5909 case clang::X86::BI__builtin_ia32_minsd_round_mask:
5910 case clang::X86::BI__builtin_ia32_minss_round_mask:
5911 case clang::X86::BI__builtin_ia32_minsh_round_mask:
5912 case clang::X86::BI__builtin_ia32_maxsd_round_mask:
5913 case clang::X86::BI__builtin_ia32_maxss_round_mask:
5914 case clang::X86::BI__builtin_ia32_maxsh_round_mask: {
5915 bool IsMin = BuiltinID == clang::X86::BI__builtin_ia32_minsd_round_mask ||
5916 BuiltinID == clang::X86::BI__builtin_ia32_minss_round_mask ||
5917 BuiltinID == clang::X86::BI__builtin_ia32_minsh_round_mask;
5918 return interp__builtin_scalar_fp_round_mask_binop(
5919 S, OpPC, Call,
5920 Fn: [IsMin](const APFloat &A, const APFloat &B,
5921 std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
5922 return EvalScalarMinMaxFp(A, B, RoundingMode, IsMin);
5923 });
5924 }
5925
5926 case clang::X86::BI__builtin_ia32_maxps:
5927 case clang::X86::BI__builtin_ia32_maxpd:
5928 case clang::X86::BI__builtin_ia32_maxph128:
5929 case clang::X86::BI__builtin_ia32_maxph256:
5930 case clang::X86::BI__builtin_ia32_maxps256:
5931 case clang::X86::BI__builtin_ia32_maxpd256:
5932 case clang::X86::BI__builtin_ia32_maxps512:
5933 case clang::X86::BI__builtin_ia32_maxpd512:
5934 case clang::X86::BI__builtin_ia32_maxph512:
5935 return interp__builtin_elementwise_fp_binop(
5936 S, OpPC, Call,
5937 Fn: [](const APFloat &A, const APFloat &B,
5938 std::optional<APSInt>) -> std::optional<APFloat> {
5939 if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
5940 B.isInfinity() || B.isDenormal())
5941 return std::nullopt;
5942 if (A.isZero() && B.isZero())
5943 return B;
5944 return llvm::maximum(A, B);
5945 });
5946
5947 case clang::X86::BI__builtin_ia32_maxss:
5948 case clang::X86::BI__builtin_ia32_maxsd:
5949 return interp__builtin_elementwise_fp_binop(
5950 S, OpPC, Call,
5951 Fn: [](const APFloat &A, const APFloat &B,
5952 std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
5953 return EvalScalarMinMaxFp(A, B, RoundingMode, /*IsMin=*/false);
5954 },
5955 /*IsScalar=*/true);
5956
5957 default:
5958 S.FFDiag(Loc: S.Current->getLocation(PC: OpPC),
5959 DiagId: diag::note_invalid_subexpr_in_const_expr)
5960 << S.Current->getRange(PC: OpPC);
5961
5962 return false;
5963 }
5964
5965 llvm_unreachable("Unhandled builtin ID");
5966}
5967
5968bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E,
5969 ArrayRef<int64_t> ArrayIndices, int64_t &IntResult) {
5970 S.getASTContext().recordOffsetOfEvaluation(E);
5971 CharUnits Result;
5972 unsigned N = E->getNumComponents();
5973 assert(N > 0);
5974
5975 unsigned ArrayIndex = 0;
5976 QualType CurrentType = E->getTypeSourceInfo()->getType();
5977 for (unsigned I = 0; I != N; ++I) {
5978 const OffsetOfNode &Node = E->getComponent(Idx: I);
5979 switch (Node.getKind()) {
5980 case OffsetOfNode::Field: {
5981 const FieldDecl *MemberDecl = Node.getField();
5982 const auto *RD = CurrentType->getAsRecordDecl();
5983 if (!RD || RD->isInvalidDecl())
5984 return false;
5985 const ASTRecordLayout &RL = S.getASTContext().getASTRecordLayout(D: RD);
5986 unsigned FieldIndex = MemberDecl->getFieldIndex();
5987 assert(FieldIndex < RL.getFieldCount() && "offsetof field in wrong type");
5988 Result +=
5989 S.getASTContext().toCharUnitsFromBits(BitSize: RL.getFieldOffset(FieldNo: FieldIndex));
5990 CurrentType = MemberDecl->getType().getNonReferenceType();
5991 break;
5992 }
5993 case OffsetOfNode::Array: {
5994 // When generating bytecode, we put all the index expressions as Sint64 on
5995 // the stack.
5996 int64_t Index = ArrayIndices[ArrayIndex];
5997 const ArrayType *AT = S.getASTContext().getAsArrayType(T: CurrentType);
5998 if (!AT)
5999 return false;
6000 CurrentType = AT->getElementType();
6001 CharUnits ElementSize = S.getASTContext().getTypeSizeInChars(T: CurrentType);
6002 Result += Index * ElementSize;
6003 ++ArrayIndex;
6004 break;
6005 }
6006 case OffsetOfNode::Base: {
6007 const CXXBaseSpecifier *BaseSpec = Node.getBase();
6008 if (BaseSpec->isVirtual())
6009 return false;
6010
6011 // Find the layout of the class whose base we are looking into.
6012 const auto *RD = CurrentType->getAsCXXRecordDecl();
6013 if (!RD || RD->isInvalidDecl())
6014 return false;
6015 const ASTRecordLayout &RL = S.getASTContext().getASTRecordLayout(D: RD);
6016
6017 // Find the base class itself.
6018 CurrentType = BaseSpec->getType();
6019 const auto *BaseRD = CurrentType->getAsCXXRecordDecl();
6020 if (!BaseRD)
6021 return false;
6022
6023 // Add the offset to the base.
6024 Result += RL.getBaseClassOffset(Base: BaseRD);
6025 break;
6026 }
6027 case OffsetOfNode::Identifier:
6028 llvm_unreachable("Dependent OffsetOfExpr?");
6029 }
6030 }
6031
6032 IntResult = Result.getQuantity();
6033
6034 return true;
6035}
6036
6037bool SetThreeWayComparisonField(InterpState &S, CodePtr OpPC,
6038 const Pointer &Ptr, const APSInt &IntValue) {
6039
6040 const Record *R = Ptr.getRecord();
6041 assert(R);
6042 assert(R->getNumFields() == 1);
6043
6044 unsigned FieldOffset = R->getField(I: 0u)->Offset;
6045 const Pointer &FieldPtr = Ptr.atField(Off: FieldOffset);
6046 PrimType FieldT = *S.getContext().classify(T: FieldPtr.getType());
6047
6048 INT_TYPE_SWITCH(FieldT,
6049 FieldPtr.deref<T>() = T::from(IntValue.getSExtValue()));
6050 FieldPtr.initialize();
6051 return true;
6052}
6053
6054static void zeroAll(Pointer &Dest) {
6055 const Descriptor *Desc = Dest.getFieldDesc();
6056
6057 if (Desc->isPrimitive()) {
6058 TYPE_SWITCH(Desc->getPrimType(), {
6059 Dest.deref<T>().~T();
6060 new (&Dest.deref<T>()) T();
6061 });
6062 return;
6063 }
6064
6065 if (Desc->isRecord()) {
6066 const Record *R = Desc->ElemRecord;
6067 for (const Record::Field &F : R->fields()) {
6068 Pointer FieldPtr = Dest.atField(Off: F.Offset);
6069 zeroAll(Dest&: FieldPtr);
6070 }
6071 return;
6072 }
6073
6074 if (Desc->isPrimitiveArray()) {
6075 for (unsigned I = 0, N = Desc->getNumElems(); I != N; ++I) {
6076 TYPE_SWITCH(Desc->getPrimType(), {
6077 Dest.deref<T>().~T();
6078 new (&Dest.deref<T>()) T();
6079 });
6080 }
6081 return;
6082 }
6083
6084 if (Desc->isCompositeArray()) {
6085 for (unsigned I = 0, N = Desc->getNumElems(); I != N; ++I) {
6086 Pointer ElemPtr = Dest.atIndex(Idx: I).narrow();
6087 zeroAll(Dest&: ElemPtr);
6088 }
6089 return;
6090 }
6091}
6092
6093static bool copyComposite(InterpState &S, CodePtr OpPC, const Pointer &Src,
6094 Pointer &Dest, bool Activate);
6095static bool copyRecord(InterpState &S, CodePtr OpPC, const Pointer &Src,
6096 Pointer &Dest, bool Activate = false) {
6097 [[maybe_unused]] const Descriptor *SrcDesc = Src.getFieldDesc();
6098 const Descriptor *DestDesc = Dest.getFieldDesc();
6099
6100 auto copyField = [&](const Record::Field &F, bool Activate) -> bool {
6101 Pointer DestField = Dest.atField(Off: F.Offset);
6102 if (OptPrimType FT = S.Ctx.classify(T: F.Decl->getType())) {
6103 TYPE_SWITCH(*FT, {
6104 DestField.deref<T>() = Src.atField(F.Offset).deref<T>();
6105 if (Src.atField(F.Offset).isInitialized())
6106 DestField.initialize();
6107 if (Activate)
6108 DestField.activate();
6109 });
6110 return true;
6111 }
6112 // Composite field.
6113 return copyComposite(S, OpPC, Src: Src.atField(Off: F.Offset), Dest&: DestField, Activate);
6114 };
6115
6116 assert(SrcDesc->isRecord());
6117 assert(SrcDesc->ElemRecord == DestDesc->ElemRecord);
6118 const Record *R = DestDesc->ElemRecord;
6119 for (const Record::Field &F : R->fields()) {
6120 if (R->isUnion()) {
6121 // For unions, only copy the active field. Zero all others.
6122 const Pointer &SrcField = Src.atField(Off: F.Offset);
6123 if (SrcField.isActive()) {
6124 if (!copyField(F, /*Activate=*/true))
6125 return false;
6126 } else {
6127 if (!CheckMutable(S, OpPC, Ptr: Src.atField(Off: F.Offset)))
6128 return false;
6129 Pointer DestField = Dest.atField(Off: F.Offset);
6130 zeroAll(Dest&: DestField);
6131 }
6132 } else {
6133 if (!copyField(F, Activate))
6134 return false;
6135 }
6136 }
6137
6138 for (const Record::Base &B : R->bases()) {
6139 Pointer DestBase = Dest.atField(Off: B.Offset);
6140 if (!copyRecord(S, OpPC, Src: Src.atField(Off: B.Offset), Dest&: DestBase, Activate))
6141 return false;
6142 }
6143
6144 Dest.initialize();
6145 return true;
6146}
6147
6148static bool copyComposite(InterpState &S, CodePtr OpPC, const Pointer &Src,
6149 Pointer &Dest, bool Activate = false) {
6150 assert(Src.isLive() && Dest.isLive());
6151
6152 [[maybe_unused]] const Descriptor *SrcDesc = Src.getFieldDesc();
6153 const Descriptor *DestDesc = Dest.getFieldDesc();
6154
6155 assert(!DestDesc->isPrimitive() && !SrcDesc->isPrimitive());
6156
6157 if (DestDesc->isPrimitiveArray()) {
6158 assert(SrcDesc->isPrimitiveArray());
6159 assert(SrcDesc->getNumElems() == DestDesc->getNumElems());
6160 PrimType ET = DestDesc->getPrimType();
6161 for (unsigned I = 0, N = DestDesc->getNumElems(); I != N; ++I) {
6162 Pointer DestElem = Dest.atIndex(Idx: I);
6163 TYPE_SWITCH(ET, {
6164 DestElem.deref<T>() = Src.elem<T>(I);
6165 DestElem.initialize();
6166 });
6167 }
6168 return true;
6169 }
6170
6171 if (DestDesc->isCompositeArray()) {
6172 assert(SrcDesc->isCompositeArray());
6173 assert(SrcDesc->getNumElems() == DestDesc->getNumElems());
6174 for (unsigned I = 0, N = DestDesc->getNumElems(); I != N; ++I) {
6175 const Pointer &SrcElem = Src.atIndex(Idx: I).narrow();
6176 Pointer DestElem = Dest.atIndex(Idx: I).narrow();
6177 if (!copyComposite(S, OpPC, Src: SrcElem, Dest&: DestElem, Activate))
6178 return false;
6179 }
6180 return true;
6181 }
6182
6183 if (DestDesc->isRecord())
6184 return copyRecord(S, OpPC, Src, Dest, Activate);
6185 return Invalid(S, OpPC);
6186}
6187
6188bool DoMemcpy(InterpState &S, CodePtr OpPC, const Pointer &Src, Pointer &Dest) {
6189 if (!Src.isBlockPointer() || Src.getFieldDesc()->isPrimitive())
6190 return false;
6191 if (!Dest.isBlockPointer() || Dest.getFieldDesc()->isPrimitive())
6192 return false;
6193
6194 return copyComposite(S, OpPC, Src, Dest);
6195}
6196
6197} // namespace interp
6198} // namespace clang
6199