1//===--- InterpBuiltin.cpp - Interpreter for the constexpr VM ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#include "../ExprConstShared.h"
9#include "Boolean.h"
10#include "EvalEmitter.h"
11#include "InterpBuiltinBitCast.h"
12#include "InterpHelpers.h"
13#include "PrimType.h"
14#include "Program.h"
15#include "clang/AST/InferAlloc.h"
16#include "clang/AST/OSLog.h"
17#include "clang/AST/RecordLayout.h"
18#include "clang/Basic/Builtins.h"
19#include "clang/Basic/TargetBuiltins.h"
20#include "clang/Basic/TargetInfo.h"
21#include "llvm/ADT/StringExtras.h"
22#include "llvm/Support/AllocToken.h"
23#include "llvm/Support/ErrorHandling.h"
24#include "llvm/Support/SipHash.h"
25
26namespace clang {
27namespace interp {
28
29[[maybe_unused]] static bool isNoopBuiltin(unsigned ID) {
30 switch (ID) {
31 case Builtin::BIas_const:
32 case Builtin::BIforward:
33 case Builtin::BIforward_like:
34 case Builtin::BImove:
35 case Builtin::BImove_if_noexcept:
36 case Builtin::BIaddressof:
37 case Builtin::BI__addressof:
38 case Builtin::BI__builtin_addressof:
39 case Builtin::BI__builtin_launder:
40 return true;
41 default:
42 return false;
43 }
44 return false;
45}
46
47static void discard(InterpStack &Stk, PrimType T) {
48 TYPE_SWITCH(T, { Stk.discard<T>(); });
49}
50
51static uint64_t popToUInt64(const InterpState &S, const Expr *E) {
52 INT_TYPE_SWITCH(*S.getContext().classify(E->getType()),
53 return static_cast<uint64_t>(S.Stk.pop<T>()));
54}
55
56static APSInt popToAPSInt(InterpStack &Stk, PrimType T) {
57 INT_TYPE_SWITCH(T, return Stk.pop<T>().toAPSInt());
58}
59
60static APSInt popToAPSInt(InterpState &S, const Expr *E) {
61 return popToAPSInt(Stk&: S.Stk, T: *S.getContext().classify(T: E->getType()));
62}
63static APSInt popToAPSInt(InterpState &S, QualType T) {
64 return popToAPSInt(Stk&: S.Stk, T: *S.getContext().classify(T));
65}
66
67/// Check for common reasons a pointer can't be read from, which
68/// are usually not diagnosed in a builtin function.
69static bool isReadable(const Pointer &P) {
70 if (P.isDummy())
71 return false;
72 if (!P.isBlockPointer())
73 return false;
74 if (!P.isLive())
75 return false;
76 if (P.isOnePastEnd())
77 return false;
78 return true;
79}
80
81/// Pushes \p Val on the stack as the type given by \p QT.
82static void pushInteger(InterpState &S, const APSInt &Val, QualType QT) {
83 assert(QT->isSignedIntegerOrEnumerationType() ||
84 QT->isUnsignedIntegerOrEnumerationType());
85 OptPrimType T = S.getContext().classify(T: QT);
86 assert(T);
87 unsigned BitWidth = S.getASTContext().getIntWidth(T: QT);
88
89 if (T == PT_IntAPS) {
90 auto Result = S.allocAP<IntegralAP<true>>(BitWidth);
91 Result.copy(V: Val);
92 S.Stk.push<IntegralAP<true>>(Args&: Result);
93 return;
94 }
95
96 if (T == PT_IntAP) {
97 auto Result = S.allocAP<IntegralAP<false>>(BitWidth);
98 Result.copy(V: Val);
99 S.Stk.push<IntegralAP<false>>(Args&: Result);
100 return;
101 }
102
103 if (QT->isSignedIntegerOrEnumerationType()) {
104 int64_t V = Val.getSExtValue();
105 INT_TYPE_SWITCH(*T, { S.Stk.push<T>(T::from(V, BitWidth)); });
106 } else {
107 assert(QT->isUnsignedIntegerOrEnumerationType());
108 uint64_t V = Val.getZExtValue();
109 INT_TYPE_SWITCH(*T, { S.Stk.push<T>(T::from(V, BitWidth)); });
110 }
111}
112
113template <typename T>
114static void pushInteger(InterpState &S, T Val, QualType QT) {
115 if constexpr (std::is_same_v<T, APInt>)
116 pushInteger(S, Val: APSInt(Val, !std::is_signed_v<T>), QT);
117 else if constexpr (std::is_same_v<T, APSInt>)
118 pushInteger(S, Val, QT);
119 else
120 pushInteger(S,
121 Val: APSInt(APInt(sizeof(T) * 8, static_cast<uint64_t>(Val),
122 std::is_signed_v<T>),
123 !std::is_signed_v<T>),
124 QT);
125}
126
127static void assignInteger(InterpState &S, const Pointer &Dest, PrimType ValueT,
128 const APSInt &Value) {
129
130 if (ValueT == PT_IntAPS) {
131 Dest.deref<IntegralAP<true>>() =
132 S.allocAP<IntegralAP<true>>(BitWidth: Value.getBitWidth());
133 Dest.deref<IntegralAP<true>>().copy(V: Value);
134 } else if (ValueT == PT_IntAP) {
135 Dest.deref<IntegralAP<false>>() =
136 S.allocAP<IntegralAP<false>>(BitWidth: Value.getBitWidth());
137 Dest.deref<IntegralAP<false>>().copy(V: Value);
138 } else {
139 INT_TYPE_SWITCH_NO_BOOL(
140 ValueT, { Dest.deref<T>() = T::from(static_cast<T>(Value)); });
141 }
142}
143
144static QualType getElemType(const Pointer &P) {
145 const Descriptor *Desc = P.getFieldDesc();
146 QualType T = Desc->getType();
147 if (Desc->isPrimitive())
148 return T;
149 if (T->isPointerType())
150 return T->castAs<PointerType>()->getPointeeType();
151 if (Desc->isArray())
152 return Desc->getElemQualType();
153 if (const auto *AT = T->getAsArrayTypeUnsafe())
154 return AT->getElementType();
155 return T;
156}
157
158static void diagnoseNonConstexprBuiltin(InterpState &S, CodePtr OpPC,
159 unsigned ID) {
160 if (!S.diagnosing())
161 return;
162
163 auto Loc = S.Current->getSource(PC: OpPC);
164 if (S.getLangOpts().CPlusPlus11)
165 S.CCEDiag(SI: Loc, DiagId: diag::note_constexpr_invalid_function)
166 << /*isConstexpr=*/0 << /*isConstructor=*/0
167 << S.getASTContext().BuiltinInfo.getQuotedName(ID);
168 else
169 S.CCEDiag(SI: Loc, DiagId: diag::note_invalid_subexpr_in_const_expr);
170}
171
172static llvm::APSInt convertBoolVectorToInt(const Pointer &Val) {
173 assert(Val.getFieldDesc()->isPrimitiveArray() &&
174 Val.getFieldDesc()->getElemQualType()->isBooleanType() &&
175 "Not a boolean vector");
176 unsigned NumElems = Val.getNumElems();
177
178 // Each element is one bit, so create an integer with NumElts bits.
179 llvm::APSInt Result(NumElems, 0);
180 for (unsigned I = 0; I != NumElems; ++I) {
181 if (Val.elem<bool>(I))
182 Result.setBit(I);
183 }
184
185 return Result;
186}
187
188// Strict double -> float conversion used for X86 PD2PS/cvtsd2ss intrinsics.
189// Reject NaN/Inf/Subnormal inputs and any lossy/inexact conversions.
190static bool convertDoubleToFloatStrict(APFloat Src, Floating &Dst,
191 InterpState &S, const Expr *DiagExpr) {
192 if (Src.isInfinity()) {
193 if (S.diagnosing())
194 S.CCEDiag(E: DiagExpr, DiagId: diag::note_constexpr_float_arithmetic) << 0;
195 return false;
196 }
197 if (Src.isNaN()) {
198 if (S.diagnosing())
199 S.CCEDiag(E: DiagExpr, DiagId: diag::note_constexpr_float_arithmetic) << 1;
200 return false;
201 }
202 APFloat Val = Src;
203 bool LosesInfo = false;
204 APFloat::opStatus Status = Val.convert(
205 ToSemantics: APFloat::IEEEsingle(), RM: APFloat::rmNearestTiesToEven, losesInfo: &LosesInfo);
206 if (LosesInfo || Val.isDenormal()) {
207 if (S.diagnosing())
208 S.CCEDiag(E: DiagExpr, DiagId: diag::note_constexpr_float_arithmetic_strict);
209 return false;
210 }
211 if (Status != APFloat::opOK) {
212 if (S.diagnosing())
213 S.CCEDiag(E: DiagExpr, DiagId: diag::note_invalid_subexpr_in_const_expr);
214 return false;
215 }
216 Dst.copy(F: Val);
217 return true;
218}
219
220static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC,
221 const InterpFrame *Frame,
222 const CallExpr *Call) {
223 unsigned Depth = S.Current->getDepth();
224 auto isStdCall = [](const FunctionDecl *F) -> bool {
225 return F && F->isInStdNamespace() && F->getIdentifier() &&
226 F->getIdentifier()->isStr(Str: "is_constant_evaluated");
227 };
228 const InterpFrame *Caller = Frame->Caller;
229 // The current frame is the one for __builtin_is_constant_evaluated.
230 // The one above that, potentially the one for std::is_constant_evaluated().
231 if (S.inConstantContext() && !S.checkingPotentialConstantExpression() &&
232 S.getEvalStatus().Diag &&
233 (Depth == 0 || (Depth == 1 && isStdCall(Frame->getCallee())))) {
234 if (Caller && isStdCall(Frame->getCallee())) {
235 const Expr *E = Caller->getExpr(PC: Caller->getRetPC());
236 S.report(Loc: E->getExprLoc(),
237 DiagId: diag::warn_is_constant_evaluated_always_true_constexpr)
238 << "std::is_constant_evaluated" << E->getSourceRange();
239 } else {
240 S.report(Loc: Call->getExprLoc(),
241 DiagId: diag::warn_is_constant_evaluated_always_true_constexpr)
242 << "__builtin_is_constant_evaluated" << Call->getSourceRange();
243 }
244 }
245
246 S.Stk.push<Boolean>(Args: Boolean::from(Value: S.inConstantContext()));
247 return true;
248}
249
250// __builtin_assume
251// __assume (MS extension)
252static bool interp__builtin_assume(InterpState &S, CodePtr OpPC,
253 const InterpFrame *Frame,
254 const CallExpr *Call) {
255 // Nothing to be done here since the argument is NOT evaluated.
256 assert(Call->getNumArgs() == 1);
257 return true;
258}
259
260static bool interp__builtin_strcmp(InterpState &S, CodePtr OpPC,
261 const InterpFrame *Frame,
262 const CallExpr *Call, unsigned ID) {
263 uint64_t Limit = ~static_cast<uint64_t>(0);
264 if (ID == Builtin::BIstrncmp || ID == Builtin::BI__builtin_strncmp ||
265 ID == Builtin::BIwcsncmp || ID == Builtin::BI__builtin_wcsncmp)
266 Limit = popToUInt64(S, E: Call->getArg(Arg: 2));
267
268 const Pointer &B = S.Stk.pop<Pointer>();
269 const Pointer &A = S.Stk.pop<Pointer>();
270 if (ID == Builtin::BIstrcmp || ID == Builtin::BIstrncmp ||
271 ID == Builtin::BIwcscmp || ID == Builtin::BIwcsncmp)
272 diagnoseNonConstexprBuiltin(S, OpPC, ID);
273
274 if (Limit == 0) {
275 pushInteger(S, Val: 0, QT: Call->getType());
276 return true;
277 }
278
279 if (!CheckLive(S, OpPC, Ptr: A, AK: AK_Read) || !CheckLive(S, OpPC, Ptr: B, AK: AK_Read))
280 return false;
281
282 if (A.isDummy() || B.isDummy())
283 return false;
284 if (!A.isBlockPointer() || !B.isBlockPointer())
285 return false;
286
287 bool IsWide = ID == Builtin::BIwcscmp || ID == Builtin::BIwcsncmp ||
288 ID == Builtin::BI__builtin_wcscmp ||
289 ID == Builtin::BI__builtin_wcsncmp;
290 assert(A.getFieldDesc()->isPrimitiveArray());
291 assert(B.getFieldDesc()->isPrimitiveArray());
292
293 // Different element types shouldn't happen, but with casts they can.
294 if (!S.getASTContext().hasSameUnqualifiedType(T1: getElemType(P: A), T2: getElemType(P: B)))
295 return false;
296
297 PrimType ElemT = *S.getContext().classify(T: getElemType(P: A));
298
299 auto returnResult = [&](int V) -> bool {
300 pushInteger(S, Val: V, QT: Call->getType());
301 return true;
302 };
303
304 unsigned IndexA = A.getIndex();
305 unsigned IndexB = B.getIndex();
306 uint64_t Steps = 0;
307 for (;; ++IndexA, ++IndexB, ++Steps) {
308
309 if (Steps >= Limit)
310 break;
311 const Pointer &PA = A.atIndex(Idx: IndexA);
312 const Pointer &PB = B.atIndex(Idx: IndexB);
313 if (!CheckRange(S, OpPC, Ptr: PA, AK: AK_Read) ||
314 !CheckRange(S, OpPC, Ptr: PB, AK: AK_Read)) {
315 return false;
316 }
317
318 if (IsWide) {
319 INT_TYPE_SWITCH(ElemT, {
320 T CA = PA.deref<T>();
321 T CB = PB.deref<T>();
322 if (CA > CB)
323 return returnResult(1);
324 if (CA < CB)
325 return returnResult(-1);
326 if (CA.isZero() || CB.isZero())
327 return returnResult(0);
328 });
329 continue;
330 }
331
332 uint8_t CA = PA.deref<uint8_t>();
333 uint8_t CB = PB.deref<uint8_t>();
334
335 if (CA > CB)
336 return returnResult(1);
337 if (CA < CB)
338 return returnResult(-1);
339 if (CA == 0 || CB == 0)
340 return returnResult(0);
341 }
342
343 return returnResult(0);
344}
345
346static bool interp__builtin_strlen(InterpState &S, CodePtr OpPC,
347 const InterpFrame *Frame,
348 const CallExpr *Call, unsigned ID) {
349 const Pointer &StrPtr = S.Stk.pop<Pointer>().expand();
350
351 if (ID == Builtin::BIstrlen || ID == Builtin::BIwcslen)
352 diagnoseNonConstexprBuiltin(S, OpPC, ID);
353
354 if (!CheckArray(S, OpPC, Ptr: StrPtr))
355 return false;
356
357 if (!CheckLive(S, OpPC, Ptr: StrPtr, AK: AK_Read))
358 return false;
359
360 if (!CheckDummy(S, OpPC, B: StrPtr.block(), AK: AK_Read))
361 return false;
362
363 if (!StrPtr.getFieldDesc()->isPrimitiveArray())
364 return false;
365
366 assert(StrPtr.getFieldDesc()->isPrimitiveArray());
367 unsigned ElemSize = StrPtr.getFieldDesc()->getElemSize();
368 if (ElemSize != 1 && ElemSize != 2 && ElemSize != 4)
369 return Invalid(S, OpPC);
370
371 if (ID == Builtin::BI__builtin_wcslen || ID == Builtin::BIwcslen) {
372 const ASTContext &AC = S.getASTContext();
373 unsigned WCharSize = AC.getTypeSizeInChars(T: AC.getWCharType()).getQuantity();
374 if (ElemSize != WCharSize)
375 return false;
376 }
377
378 size_t Len = 0;
379 for (size_t I = StrPtr.getIndex();; ++I, ++Len) {
380 const Pointer &ElemPtr = StrPtr.atIndex(Idx: I);
381
382 if (!CheckRange(S, OpPC, Ptr: ElemPtr, AK: AK_Read))
383 return false;
384
385 uint32_t Val;
386 switch (ElemSize) {
387 case 1:
388 Val = ElemPtr.deref<uint8_t>();
389 break;
390 case 2:
391 Val = ElemPtr.deref<uint16_t>();
392 break;
393 case 4:
394 Val = ElemPtr.deref<uint32_t>();
395 break;
396 default:
397 llvm_unreachable("Unsupported char size");
398 }
399 if (Val == 0)
400 break;
401 }
402
403 pushInteger(S, Val: Len, QT: Call->getType());
404
405 return true;
406}
407
408static bool interp__builtin_nan(InterpState &S, CodePtr OpPC,
409 const InterpFrame *Frame, const CallExpr *Call,
410 bool Signaling) {
411 const Pointer &Arg = S.Stk.pop<Pointer>();
412
413 if (!CheckLoad(S, OpPC, Ptr: Arg))
414 return false;
415
416 if (!Arg.getFieldDesc()->isPrimitiveArray())
417 return Invalid(S, OpPC);
418
419 // Convert the given string to an integer using StringRef's API.
420 llvm::APInt Fill;
421 std::string Str;
422 assert(Arg.getNumElems() >= 1);
423 for (unsigned I = 0;; ++I) {
424 const Pointer &Elem = Arg.atIndex(Idx: I);
425
426 if (!CheckLoad(S, OpPC, Ptr: Elem))
427 return false;
428
429 if (Elem.deref<int8_t>() == 0)
430 break;
431
432 Str += Elem.deref<char>();
433 }
434
435 // Treat empty strings as if they were zero.
436 if (Str.empty())
437 Fill = llvm::APInt(32, 0);
438 else if (StringRef(Str).getAsInteger(Radix: 0, Result&: Fill))
439 return false;
440
441 const llvm::fltSemantics &TargetSemantics =
442 S.getASTContext().getFloatTypeSemantics(
443 T: Call->getDirectCallee()->getReturnType());
444
445 Floating Result = S.allocFloat(Sem: TargetSemantics);
446 if (S.getASTContext().getTargetInfo().isNan2008()) {
447 if (Signaling)
448 Result.copy(
449 F: llvm::APFloat::getSNaN(Sem: TargetSemantics, /*Negative=*/false, payload: &Fill));
450 else
451 Result.copy(
452 F: llvm::APFloat::getQNaN(Sem: TargetSemantics, /*Negative=*/false, payload: &Fill));
453 } else {
454 // Prior to IEEE 754-2008, architectures were allowed to choose whether
455 // the first bit of their significand was set for qNaN or sNaN. MIPS chose
456 // a different encoding to what became a standard in 2008, and for pre-
457 // 2008 revisions, MIPS interpreted sNaN-2008 as qNan and qNaN-2008 as
458 // sNaN. This is now known as "legacy NaN" encoding.
459 if (Signaling)
460 Result.copy(
461 F: llvm::APFloat::getQNaN(Sem: TargetSemantics, /*Negative=*/false, payload: &Fill));
462 else
463 Result.copy(
464 F: llvm::APFloat::getSNaN(Sem: TargetSemantics, /*Negative=*/false, payload: &Fill));
465 }
466
467 S.Stk.push<Floating>(Args&: Result);
468 return true;
469}
470
471static bool interp__builtin_inf(InterpState &S, CodePtr OpPC,
472 const InterpFrame *Frame,
473 const CallExpr *Call) {
474 const llvm::fltSemantics &TargetSemantics =
475 S.getASTContext().getFloatTypeSemantics(
476 T: Call->getDirectCallee()->getReturnType());
477
478 Floating Result = S.allocFloat(Sem: TargetSemantics);
479 Result.copy(F: APFloat::getInf(Sem: TargetSemantics));
480 S.Stk.push<Floating>(Args&: Result);
481 return true;
482}
483
484static bool interp__builtin_copysign(InterpState &S, CodePtr OpPC,
485 const InterpFrame *Frame) {
486 const Floating &Arg2 = S.Stk.pop<Floating>();
487 const Floating &Arg1 = S.Stk.pop<Floating>();
488 Floating Result = S.allocFloat(Sem: Arg1.getSemantics());
489
490 APFloat Copy = Arg1.getAPFloat();
491 Copy.copySign(RHS: Arg2.getAPFloat());
492 Result.copy(F: Copy);
493 S.Stk.push<Floating>(Args&: Result);
494
495 return true;
496}
497
498static bool interp__builtin_fmin(InterpState &S, CodePtr OpPC,
499 const InterpFrame *Frame, bool IsNumBuiltin) {
500 const Floating &RHS = S.Stk.pop<Floating>();
501 const Floating &LHS = S.Stk.pop<Floating>();
502 Floating Result = S.allocFloat(Sem: LHS.getSemantics());
503
504 if (IsNumBuiltin)
505 Result.copy(F: llvm::minimumnum(A: LHS.getAPFloat(), B: RHS.getAPFloat()));
506 else
507 Result.copy(F: minnum(A: LHS.getAPFloat(), B: RHS.getAPFloat()));
508 S.Stk.push<Floating>(Args&: Result);
509 return true;
510}
511
512static bool interp__builtin_fmax(InterpState &S, CodePtr OpPC,
513 const InterpFrame *Frame, bool IsNumBuiltin) {
514 const Floating &RHS = S.Stk.pop<Floating>();
515 const Floating &LHS = S.Stk.pop<Floating>();
516 Floating Result = S.allocFloat(Sem: LHS.getSemantics());
517
518 if (IsNumBuiltin)
519 Result.copy(F: llvm::maximumnum(A: LHS.getAPFloat(), B: RHS.getAPFloat()));
520 else
521 Result.copy(F: maxnum(A: LHS.getAPFloat(), B: RHS.getAPFloat()));
522 S.Stk.push<Floating>(Args&: Result);
523 return true;
524}
525
526/// Defined as __builtin_isnan(...), to accommodate the fact that it can
527/// take a float, double, long double, etc.
528/// But for us, that's all a Floating anyway.
529static bool interp__builtin_isnan(InterpState &S, CodePtr OpPC,
530 const InterpFrame *Frame,
531 const CallExpr *Call) {
532 const Floating &Arg = S.Stk.pop<Floating>();
533
534 pushInteger(S, Val: Arg.isNan(), QT: Call->getType());
535 return true;
536}
537
538static bool interp__builtin_issignaling(InterpState &S, CodePtr OpPC,
539 const InterpFrame *Frame,
540 const CallExpr *Call) {
541 const Floating &Arg = S.Stk.pop<Floating>();
542
543 pushInteger(S, Val: Arg.isSignaling(), QT: Call->getType());
544 return true;
545}
546
547static bool interp__builtin_isinf(InterpState &S, CodePtr OpPC,
548 const InterpFrame *Frame, bool CheckSign,
549 const CallExpr *Call) {
550 const Floating &Arg = S.Stk.pop<Floating>();
551 APFloat F = Arg.getAPFloat();
552 bool IsInf = F.isInfinity();
553
554 if (CheckSign)
555 pushInteger(S, Val: IsInf ? (F.isNegative() ? -1 : 1) : 0, QT: Call->getType());
556 else
557 pushInteger(S, Val: IsInf, QT: Call->getType());
558 return true;
559}
560
561static bool interp__builtin_isfinite(InterpState &S, CodePtr OpPC,
562 const InterpFrame *Frame,
563 const CallExpr *Call) {
564 const Floating &Arg = S.Stk.pop<Floating>();
565
566 pushInteger(S, Val: Arg.isFinite(), QT: Call->getType());
567 return true;
568}
569
570static bool interp__builtin_isnormal(InterpState &S, CodePtr OpPC,
571 const InterpFrame *Frame,
572 const CallExpr *Call) {
573 const Floating &Arg = S.Stk.pop<Floating>();
574
575 pushInteger(S, Val: Arg.isNormal(), QT: Call->getType());
576 return true;
577}
578
579static bool interp__builtin_issubnormal(InterpState &S, CodePtr OpPC,
580 const InterpFrame *Frame,
581 const CallExpr *Call) {
582 const Floating &Arg = S.Stk.pop<Floating>();
583
584 pushInteger(S, Val: Arg.isDenormal(), QT: Call->getType());
585 return true;
586}
587
588static bool interp__builtin_iszero(InterpState &S, CodePtr OpPC,
589 const InterpFrame *Frame,
590 const CallExpr *Call) {
591 const Floating &Arg = S.Stk.pop<Floating>();
592
593 pushInteger(S, Val: Arg.isZero(), QT: Call->getType());
594 return true;
595}
596
597static bool interp__builtin_signbit(InterpState &S, CodePtr OpPC,
598 const InterpFrame *Frame,
599 const CallExpr *Call) {
600 const Floating &Arg = S.Stk.pop<Floating>();
601
602 pushInteger(S, Val: Arg.isNegative(), QT: Call->getType());
603 return true;
604}
605
606static bool interp_floating_comparison(InterpState &S, CodePtr OpPC,
607 const CallExpr *Call, unsigned ID) {
608 const Floating &RHS = S.Stk.pop<Floating>();
609 const Floating &LHS = S.Stk.pop<Floating>();
610
611 pushInteger(
612 S,
613 Val: [&] {
614 switch (ID) {
615 case Builtin::BI__builtin_isgreater:
616 return LHS > RHS;
617 case Builtin::BI__builtin_isgreaterequal:
618 return LHS >= RHS;
619 case Builtin::BI__builtin_isless:
620 return LHS < RHS;
621 case Builtin::BI__builtin_islessequal:
622 return LHS <= RHS;
623 case Builtin::BI__builtin_islessgreater: {
624 ComparisonCategoryResult Cmp = LHS.compare(RHS);
625 return Cmp == ComparisonCategoryResult::Less ||
626 Cmp == ComparisonCategoryResult::Greater;
627 }
628 case Builtin::BI__builtin_isunordered:
629 return LHS.compare(RHS) == ComparisonCategoryResult::Unordered;
630 default:
631 llvm_unreachable("Unexpected builtin ID: Should be a floating point "
632 "comparison function");
633 }
634 }(),
635 QT: Call->getType());
636 return true;
637}
638
639/// First parameter to __builtin_isfpclass is the floating value, the
640/// second one is an integral value.
641static bool interp__builtin_isfpclass(InterpState &S, CodePtr OpPC,
642 const InterpFrame *Frame,
643 const CallExpr *Call) {
644 APSInt FPClassArg = popToAPSInt(S, E: Call->getArg(Arg: 1));
645 const Floating &F = S.Stk.pop<Floating>();
646
647 int32_t Result = static_cast<int32_t>(
648 (F.classify() & std::move(FPClassArg)).getZExtValue());
649 pushInteger(S, Val: Result, QT: Call->getType());
650
651 return true;
652}
653
654/// Five int values followed by one floating value.
655/// __builtin_fpclassify(int, int, int, int, int, float)
656static bool interp__builtin_fpclassify(InterpState &S, CodePtr OpPC,
657 const InterpFrame *Frame,
658 const CallExpr *Call) {
659 const Floating &Val = S.Stk.pop<Floating>();
660
661 PrimType IntT = *S.getContext().classify(E: Call->getArg(Arg: 0));
662 APSInt Values[5];
663 for (unsigned I = 0; I != 5; ++I)
664 Values[4 - I] = popToAPSInt(Stk&: S.Stk, T: IntT);
665
666 unsigned Index;
667 switch (Val.getCategory()) {
668 case APFloat::fcNaN:
669 Index = 0;
670 break;
671 case APFloat::fcInfinity:
672 Index = 1;
673 break;
674 case APFloat::fcNormal:
675 Index = Val.isDenormal() ? 3 : 2;
676 break;
677 case APFloat::fcZero:
678 Index = 4;
679 break;
680 }
681
682 // The last argument is first on the stack.
683 assert(Index <= 4);
684
685 pushInteger(S, Val: Values[Index], QT: Call->getType());
686 return true;
687}
688
689static inline Floating abs(InterpState &S, const Floating &In) {
690 if (!In.isNegative())
691 return In;
692
693 Floating Output = S.allocFloat(Sem: In.getSemantics());
694 APFloat New = In.getAPFloat();
695 New.changeSign();
696 Output.copy(F: New);
697 return Output;
698}
699
700// The C standard says "fabs raises no floating-point exceptions,
701// even if x is a signaling NaN. The returned value is independent of
702// the current rounding direction mode." Therefore constant folding can
703// proceed without regard to the floating point settings.
704// Reference, WG14 N2478 F.10.4.3
705static bool interp__builtin_fabs(InterpState &S, CodePtr OpPC,
706 const InterpFrame *Frame) {
707 const Floating &Val = S.Stk.pop<Floating>();
708 S.Stk.push<Floating>(Args: abs(S, In: Val));
709 return true;
710}
711
712static bool interp__builtin_abs(InterpState &S, CodePtr OpPC,
713 const InterpFrame *Frame,
714 const CallExpr *Call) {
715 APSInt Val = popToAPSInt(S, E: Call->getArg(Arg: 0));
716 if (Val ==
717 APSInt(APInt::getSignedMinValue(numBits: Val.getBitWidth()), /*IsUnsigned=*/false))
718 return false;
719 if (Val.isNegative())
720 Val.negate();
721 pushInteger(S, Val, QT: Call->getType());
722 return true;
723}
724
725static bool interp__builtin_popcount(InterpState &S, CodePtr OpPC,
726 const InterpFrame *Frame,
727 const CallExpr *Call) {
728 APSInt Val;
729 if (Call->getArg(Arg: 0)->getType()->isExtVectorBoolType()) {
730 const Pointer &Arg = S.Stk.pop<Pointer>();
731 Val = convertBoolVectorToInt(Val: Arg);
732 } else {
733 Val = popToAPSInt(S, E: Call->getArg(Arg: 0));
734 }
735 pushInteger(S, Val: Val.popcount(), QT: Call->getType());
736 return true;
737}
738
739static bool interp__builtin_ia32_crc32(InterpState &S, CodePtr OpPC,
740 const InterpFrame *Frame,
741 const CallExpr *Call,
742 unsigned DataBytes) {
743 uint64_t DataVal = popToUInt64(S, E: Call->getArg(Arg: 1));
744 uint64_t CRCVal = popToUInt64(S, E: Call->getArg(Arg: 0));
745
746 // CRC32C polynomial (iSCSI polynomial, bit-reversed)
747 static const uint32_t CRC32C_POLY = 0x82F63B78;
748
749 // Process each byte
750 uint32_t Result = static_cast<uint32_t>(CRCVal);
751 for (unsigned I = 0; I != DataBytes; ++I) {
752 uint8_t Byte = static_cast<uint8_t>((DataVal >> (I * 8)) & 0xFF);
753 Result ^= Byte;
754 for (int J = 0; J != 8; ++J) {
755 Result = (Result >> 1) ^ ((Result & 1) ? CRC32C_POLY : 0);
756 }
757 }
758
759 pushInteger(S, Val: Result, QT: Call->getType());
760 return true;
761}
762
763static bool interp__builtin_classify_type(InterpState &S, CodePtr OpPC,
764 const InterpFrame *Frame,
765 const CallExpr *Call) {
766 // This is an unevaluated call, so there are no arguments on the stack.
767 assert(Call->getNumArgs() == 1);
768 const Expr *Arg = Call->getArg(Arg: 0);
769
770 GCCTypeClass ResultClass =
771 EvaluateBuiltinClassifyType(T: Arg->getType(), LangOpts: S.getLangOpts());
772 int32_t ReturnVal = static_cast<int32_t>(ResultClass);
773 pushInteger(S, Val: ReturnVal, QT: Call->getType());
774 return true;
775}
776
777// __builtin_expect(long, long)
778// __builtin_expect_with_probability(long, long, double)
779static bool interp__builtin_expect(InterpState &S, CodePtr OpPC,
780 const InterpFrame *Frame,
781 const CallExpr *Call) {
782 // The return value is simply the value of the first parameter.
783 // We ignore the probability.
784 unsigned NumArgs = Call->getNumArgs();
785 assert(NumArgs == 2 || NumArgs == 3);
786
787 PrimType ArgT = *S.getContext().classify(T: Call->getArg(Arg: 0)->getType());
788 if (NumArgs == 3)
789 S.Stk.discard<Floating>();
790 discard(Stk&: S.Stk, T: ArgT);
791
792 APSInt Val = popToAPSInt(Stk&: S.Stk, T: ArgT);
793 pushInteger(S, Val, QT: Call->getType());
794 return true;
795}
796
797static bool interp__builtin_addressof(InterpState &S, CodePtr OpPC,
798 const InterpFrame *Frame,
799 const CallExpr *Call) {
800#ifndef NDEBUG
801 assert(Call->getArg(0)->isLValue());
802 PrimType PtrT = S.getContext().classify(Call->getArg(0)).value_or(PT_Ptr);
803 assert(PtrT == PT_Ptr &&
804 "Unsupported pointer type passed to __builtin_addressof()");
805#endif
806 return true;
807}
808
809static bool interp__builtin_move(InterpState &S, CodePtr OpPC,
810 const InterpFrame *Frame,
811 const CallExpr *Call) {
812 return Call->getDirectCallee()->isConstexpr();
813}
814
815static bool interp__builtin_eh_return_data_regno(InterpState &S, CodePtr OpPC,
816 const InterpFrame *Frame,
817 const CallExpr *Call) {
818 APSInt Arg = popToAPSInt(S, E: Call->getArg(Arg: 0));
819
820 int Result = S.getASTContext().getTargetInfo().getEHDataRegisterNumber(
821 RegNo: Arg.getZExtValue());
822 pushInteger(S, Val: Result, QT: Call->getType());
823 return true;
824}
825
826// Two integral values followed by a pointer (lhs, rhs, resultOut)
827static bool interp__builtin_overflowop(InterpState &S, CodePtr OpPC,
828 const CallExpr *Call,
829 unsigned BuiltinOp) {
830 const Pointer &ResultPtr = S.Stk.pop<Pointer>();
831 if (ResultPtr.isDummy() || !ResultPtr.isBlockPointer())
832 return false;
833
834 PrimType RHST = *S.getContext().classify(T: Call->getArg(Arg: 1)->getType());
835 PrimType LHST = *S.getContext().classify(T: Call->getArg(Arg: 0)->getType());
836 APSInt RHS = popToAPSInt(Stk&: S.Stk, T: RHST);
837 APSInt LHS = popToAPSInt(Stk&: S.Stk, T: LHST);
838 QualType ResultType = Call->getArg(Arg: 2)->getType()->getPointeeType();
839 PrimType ResultT = *S.getContext().classify(T: ResultType);
840 bool Overflow;
841
842 APSInt Result;
843 if (BuiltinOp == Builtin::BI__builtin_add_overflow ||
844 BuiltinOp == Builtin::BI__builtin_sub_overflow ||
845 BuiltinOp == Builtin::BI__builtin_mul_overflow) {
846 bool IsSigned = LHS.isSigned() || RHS.isSigned() ||
847 ResultType->isSignedIntegerOrEnumerationType();
848 bool AllSigned = LHS.isSigned() && RHS.isSigned() &&
849 ResultType->isSignedIntegerOrEnumerationType();
850 uint64_t LHSSize = LHS.getBitWidth();
851 uint64_t RHSSize = RHS.getBitWidth();
852 uint64_t ResultSize = S.getASTContext().getTypeSize(T: ResultType);
853 uint64_t MaxBits = std::max(a: std::max(a: LHSSize, b: RHSSize), b: ResultSize);
854
855 // Add an additional bit if the signedness isn't uniformly agreed to. We
856 // could do this ONLY if there is a signed and an unsigned that both have
857 // MaxBits, but the code to check that is pretty nasty. The issue will be
858 // caught in the shrink-to-result later anyway.
859 if (IsSigned && !AllSigned)
860 ++MaxBits;
861
862 LHS = APSInt(LHS.extOrTrunc(width: MaxBits), !IsSigned);
863 RHS = APSInt(RHS.extOrTrunc(width: MaxBits), !IsSigned);
864 Result = APSInt(MaxBits, !IsSigned);
865 }
866
867 // Find largest int.
868 switch (BuiltinOp) {
869 default:
870 llvm_unreachable("Invalid value for BuiltinOp");
871 case Builtin::BI__builtin_add_overflow:
872 case Builtin::BI__builtin_sadd_overflow:
873 case Builtin::BI__builtin_saddl_overflow:
874 case Builtin::BI__builtin_saddll_overflow:
875 case Builtin::BI__builtin_uadd_overflow:
876 case Builtin::BI__builtin_uaddl_overflow:
877 case Builtin::BI__builtin_uaddll_overflow:
878 Result = LHS.isSigned() ? LHS.sadd_ov(RHS, Overflow)
879 : LHS.uadd_ov(RHS, Overflow);
880 break;
881 case Builtin::BI__builtin_sub_overflow:
882 case Builtin::BI__builtin_ssub_overflow:
883 case Builtin::BI__builtin_ssubl_overflow:
884 case Builtin::BI__builtin_ssubll_overflow:
885 case Builtin::BI__builtin_usub_overflow:
886 case Builtin::BI__builtin_usubl_overflow:
887 case Builtin::BI__builtin_usubll_overflow:
888 Result = LHS.isSigned() ? LHS.ssub_ov(RHS, Overflow)
889 : LHS.usub_ov(RHS, Overflow);
890 break;
891 case Builtin::BI__builtin_mul_overflow:
892 case Builtin::BI__builtin_smul_overflow:
893 case Builtin::BI__builtin_smull_overflow:
894 case Builtin::BI__builtin_smulll_overflow:
895 case Builtin::BI__builtin_umul_overflow:
896 case Builtin::BI__builtin_umull_overflow:
897 case Builtin::BI__builtin_umulll_overflow:
898 Result = LHS.isSigned() ? LHS.smul_ov(RHS, Overflow)
899 : LHS.umul_ov(RHS, Overflow);
900 break;
901 }
902
903 // In the case where multiple sizes are allowed, truncate and see if
904 // the values are the same.
905 if (BuiltinOp == Builtin::BI__builtin_add_overflow ||
906 BuiltinOp == Builtin::BI__builtin_sub_overflow ||
907 BuiltinOp == Builtin::BI__builtin_mul_overflow) {
908 // APSInt doesn't have a TruncOrSelf, so we use extOrTrunc instead,
909 // since it will give us the behavior of a TruncOrSelf in the case where
910 // its parameter <= its size. We previously set Result to be at least the
911 // type-size of the result, so getTypeSize(ResultType) <= Resu
912 APSInt Temp = Result.extOrTrunc(width: S.getASTContext().getTypeSize(T: ResultType));
913 Temp.setIsSigned(ResultType->isSignedIntegerOrEnumerationType());
914
915 if (!APSInt::isSameValue(I1: Temp, I2: Result))
916 Overflow = true;
917 Result = std::move(Temp);
918 }
919
920 // Write Result to ResultPtr and put Overflow on the stack.
921 assignInteger(S, Dest: ResultPtr, ValueT: ResultT, Value: Result);
922 if (ResultPtr.canBeInitialized())
923 ResultPtr.initialize();
924
925 assert(Call->getDirectCallee()->getReturnType()->isBooleanType());
926 S.Stk.push<Boolean>(Args&: Overflow);
927 return true;
928}
929
930/// Three integral values followed by a pointer (lhs, rhs, carry, carryOut).
931static bool interp__builtin_carryop(InterpState &S, CodePtr OpPC,
932 const InterpFrame *Frame,
933 const CallExpr *Call, unsigned BuiltinOp) {
934 const Pointer &CarryOutPtr = S.Stk.pop<Pointer>();
935 PrimType LHST = *S.getContext().classify(T: Call->getArg(Arg: 0)->getType());
936 PrimType RHST = *S.getContext().classify(T: Call->getArg(Arg: 1)->getType());
937 APSInt CarryIn = popToAPSInt(Stk&: S.Stk, T: LHST);
938 APSInt RHS = popToAPSInt(Stk&: S.Stk, T: RHST);
939 APSInt LHS = popToAPSInt(Stk&: S.Stk, T: LHST);
940
941 if (CarryOutPtr.isDummy() || !CarryOutPtr.isBlockPointer())
942 return false;
943
944 APSInt CarryOut;
945
946 APSInt Result;
947 // Copy the number of bits and sign.
948 Result = LHS;
949 CarryOut = LHS;
950
951 bool FirstOverflowed = false;
952 bool SecondOverflowed = false;
953 switch (BuiltinOp) {
954 default:
955 llvm_unreachable("Invalid value for BuiltinOp");
956 case Builtin::BI__builtin_addcb:
957 case Builtin::BI__builtin_addcs:
958 case Builtin::BI__builtin_addc:
959 case Builtin::BI__builtin_addcl:
960 case Builtin::BI__builtin_addcll:
961 Result =
962 LHS.uadd_ov(RHS, Overflow&: FirstOverflowed).uadd_ov(RHS: CarryIn, Overflow&: SecondOverflowed);
963 break;
964 case Builtin::BI__builtin_subcb:
965 case Builtin::BI__builtin_subcs:
966 case Builtin::BI__builtin_subc:
967 case Builtin::BI__builtin_subcl:
968 case Builtin::BI__builtin_subcll:
969 Result =
970 LHS.usub_ov(RHS, Overflow&: FirstOverflowed).usub_ov(RHS: CarryIn, Overflow&: SecondOverflowed);
971 break;
972 }
973 // It is possible for both overflows to happen but CGBuiltin uses an OR so
974 // this is consistent.
975 CarryOut = (uint64_t)(FirstOverflowed | SecondOverflowed);
976
977 QualType CarryOutType = Call->getArg(Arg: 3)->getType()->getPointeeType();
978 PrimType CarryOutT = *S.getContext().classify(T: CarryOutType);
979 assignInteger(S, Dest: CarryOutPtr, ValueT: CarryOutT, Value: CarryOut);
980 CarryOutPtr.initialize();
981
982 assert(Call->getType() == Call->getArg(0)->getType());
983 pushInteger(S, Val: Result, QT: Call->getType());
984 return true;
985}
986
987static bool interp__builtin_clz(InterpState &S, CodePtr OpPC,
988 const InterpFrame *Frame, const CallExpr *Call,
989 unsigned BuiltinOp) {
990
991 std::optional<APSInt> Fallback;
992 if (BuiltinOp == Builtin::BI__builtin_clzg && Call->getNumArgs() == 2)
993 Fallback = popToAPSInt(S, E: Call->getArg(Arg: 1));
994
995 APSInt Val;
996 if (Call->getArg(Arg: 0)->getType()->isExtVectorBoolType()) {
997 const Pointer &Arg = S.Stk.pop<Pointer>();
998 Val = convertBoolVectorToInt(Val: Arg);
999 } else {
1000 Val = popToAPSInt(S, E: Call->getArg(Arg: 0));
1001 }
1002
1003 // When the argument is 0, the result of GCC builtins is undefined, whereas
1004 // for Microsoft intrinsics, the result is the bit-width of the argument.
1005 bool ZeroIsUndefined = BuiltinOp != Builtin::BI__lzcnt16 &&
1006 BuiltinOp != Builtin::BI__lzcnt &&
1007 BuiltinOp != Builtin::BI__lzcnt64;
1008
1009 if (Val == 0) {
1010 if (Fallback) {
1011 pushInteger(S, Val: *Fallback, QT: Call->getType());
1012 return true;
1013 }
1014
1015 if (ZeroIsUndefined)
1016 return false;
1017 }
1018
1019 pushInteger(S, Val: Val.countl_zero(), QT: Call->getType());
1020 return true;
1021}
1022
1023static bool interp__builtin_ctz(InterpState &S, CodePtr OpPC,
1024 const InterpFrame *Frame, const CallExpr *Call,
1025 unsigned BuiltinID) {
1026 std::optional<APSInt> Fallback;
1027 if (BuiltinID == Builtin::BI__builtin_ctzg && Call->getNumArgs() == 2)
1028 Fallback = popToAPSInt(S, E: Call->getArg(Arg: 1));
1029
1030 APSInt Val;
1031 if (Call->getArg(Arg: 0)->getType()->isExtVectorBoolType()) {
1032 const Pointer &Arg = S.Stk.pop<Pointer>();
1033 Val = convertBoolVectorToInt(Val: Arg);
1034 } else {
1035 Val = popToAPSInt(S, E: Call->getArg(Arg: 0));
1036 }
1037
1038 if (Val == 0) {
1039 if (Fallback) {
1040 pushInteger(S, Val: *Fallback, QT: Call->getType());
1041 return true;
1042 }
1043 return false;
1044 }
1045
1046 pushInteger(S, Val: Val.countr_zero(), QT: Call->getType());
1047 return true;
1048}
1049
1050static bool interp__builtin_bswap(InterpState &S, CodePtr OpPC,
1051 const InterpFrame *Frame,
1052 const CallExpr *Call) {
1053 const APSInt &Val = popToAPSInt(S, E: Call->getArg(Arg: 0));
1054 if (Val.getBitWidth() == 8 || Val.getBitWidth() == 1)
1055 pushInteger(S, Val, QT: Call->getType());
1056 else
1057 pushInteger(S, Val: Val.byteSwap(), QT: Call->getType());
1058 return true;
1059}
1060
1061/// bool __atomic_always_lock_free(size_t, void const volatile*)
1062/// bool __atomic_is_lock_free(size_t, void const volatile*)
1063static bool interp__builtin_atomic_lock_free(InterpState &S, CodePtr OpPC,
1064 const InterpFrame *Frame,
1065 const CallExpr *Call,
1066 unsigned BuiltinOp) {
1067 auto returnBool = [&S](bool Value) -> bool {
1068 S.Stk.push<Boolean>(Args&: Value);
1069 return true;
1070 };
1071
1072 const Pointer &Ptr = S.Stk.pop<Pointer>();
1073 uint64_t SizeVal = popToUInt64(S, E: Call->getArg(Arg: 0));
1074
1075 // For __atomic_is_lock_free(sizeof(_Atomic(T))), if the size is a power
1076 // of two less than or equal to the maximum inline atomic width, we know it
1077 // is lock-free. If the size isn't a power of two, or greater than the
1078 // maximum alignment where we promote atomics, we know it is not lock-free
1079 // (at least not in the sense of atomic_is_lock_free). Otherwise,
1080 // the answer can only be determined at runtime; for example, 16-byte
1081 // atomics have lock-free implementations on some, but not all,
1082 // x86-64 processors.
1083
1084 // Check power-of-two.
1085 CharUnits Size = CharUnits::fromQuantity(Quantity: SizeVal);
1086 if (Size.isPowerOfTwo()) {
1087 // Check against inlining width.
1088 unsigned InlineWidthBits =
1089 S.getASTContext().getTargetInfo().getMaxAtomicInlineWidth();
1090 if (Size <= S.getASTContext().toCharUnitsFromBits(BitSize: InlineWidthBits)) {
1091
1092 // OK, we will inline appropriately-aligned operations of this size,
1093 // and _Atomic(T) is appropriately-aligned.
1094 if (Size == CharUnits::One())
1095 return returnBool(true);
1096
1097 // Same for null pointers.
1098 assert(BuiltinOp != Builtin::BI__c11_atomic_is_lock_free);
1099 if (Ptr.isZero())
1100 return returnBool(true);
1101
1102 if (Ptr.isIntegralPointer()) {
1103 uint64_t IntVal = Ptr.getIntegerRepresentation();
1104 if (APSInt(APInt(64, IntVal, false), true).isAligned(A: Size.getAsAlign()))
1105 return returnBool(true);
1106 }
1107
1108 const Expr *PtrArg = Call->getArg(Arg: 1);
1109 // Otherwise, check if the type's alignment against Size.
1110 if (const auto *ICE = dyn_cast<ImplicitCastExpr>(Val: PtrArg)) {
1111 // Drop the potential implicit-cast to 'const volatile void*', getting
1112 // the underlying type.
1113 if (ICE->getCastKind() == CK_BitCast)
1114 PtrArg = ICE->getSubExpr();
1115 }
1116
1117 if (const auto *PtrTy = PtrArg->getType()->getAs<PointerType>()) {
1118 QualType PointeeType = PtrTy->getPointeeType();
1119 if (!PointeeType->isIncompleteType() &&
1120 S.getASTContext().getTypeAlignInChars(T: PointeeType) >= Size) {
1121 // OK, we will inline operations on this object.
1122 return returnBool(true);
1123 }
1124 }
1125 }
1126 }
1127
1128 if (BuiltinOp == Builtin::BI__atomic_always_lock_free)
1129 return returnBool(false);
1130
1131 return Invalid(S, OpPC);
1132}
1133
1134/// bool __c11_atomic_is_lock_free(size_t)
1135static bool interp__builtin_c11_atomic_is_lock_free(InterpState &S,
1136 CodePtr OpPC,
1137 const InterpFrame *Frame,
1138 const CallExpr *Call) {
1139 uint64_t SizeVal = popToUInt64(S, E: Call->getArg(Arg: 0));
1140
1141 CharUnits Size = CharUnits::fromQuantity(Quantity: SizeVal);
1142 if (Size.isPowerOfTwo()) {
1143 // Check against inlining width.
1144 unsigned InlineWidthBits =
1145 S.getASTContext().getTargetInfo().getMaxAtomicInlineWidth();
1146 if (Size <= S.getASTContext().toCharUnitsFromBits(BitSize: InlineWidthBits)) {
1147 S.Stk.push<Boolean>(Args: true);
1148 return true;
1149 }
1150 }
1151
1152 return false; // returnBool(false);
1153}
1154
1155/// __builtin_complex(Float A, float B);
1156static bool interp__builtin_complex(InterpState &S, CodePtr OpPC,
1157 const InterpFrame *Frame,
1158 const CallExpr *Call) {
1159 const Floating &Arg2 = S.Stk.pop<Floating>();
1160 const Floating &Arg1 = S.Stk.pop<Floating>();
1161 Pointer &Result = S.Stk.peek<Pointer>();
1162
1163 Result.elem<Floating>(I: 0) = Arg1;
1164 Result.elem<Floating>(I: 1) = Arg2;
1165 Result.initializeAllElements();
1166
1167 return true;
1168}
1169
1170/// __builtin_is_aligned()
1171/// __builtin_align_up()
1172/// __builtin_align_down()
1173/// The first parameter is either an integer or a pointer.
1174/// The second parameter is the requested alignment as an integer.
1175static bool interp__builtin_is_aligned_up_down(InterpState &S, CodePtr OpPC,
1176 const InterpFrame *Frame,
1177 const CallExpr *Call,
1178 unsigned BuiltinOp) {
1179 const APSInt &Alignment = popToAPSInt(S, E: Call->getArg(Arg: 1));
1180
1181 if (Alignment < 0 || !Alignment.isPowerOf2()) {
1182 S.FFDiag(E: Call, DiagId: diag::note_constexpr_invalid_alignment) << Alignment;
1183 return false;
1184 }
1185 unsigned SrcWidth = S.getASTContext().getIntWidth(T: Call->getArg(Arg: 0)->getType());
1186 APSInt MaxValue(APInt::getOneBitSet(numBits: SrcWidth, BitNo: SrcWidth - 1));
1187 if (APSInt::compareValues(I1: Alignment, I2: MaxValue) > 0) {
1188 S.FFDiag(E: Call, DiagId: diag::note_constexpr_alignment_too_big)
1189 << MaxValue << Call->getArg(Arg: 0)->getType() << Alignment;
1190 return false;
1191 }
1192
1193 // The first parameter is either an integer or a pointer.
1194 PrimType FirstArgT = *S.Ctx.classify(E: Call->getArg(Arg: 0));
1195
1196 if (isIntegralType(T: FirstArgT)) {
1197 const APSInt &Src = popToAPSInt(Stk&: S.Stk, T: FirstArgT);
1198 APInt AlignMinusOne = Alignment.extOrTrunc(width: Src.getBitWidth()) - 1;
1199 if (BuiltinOp == Builtin::BI__builtin_align_up) {
1200 APSInt AlignedVal =
1201 APSInt((Src + AlignMinusOne) & ~AlignMinusOne, Src.isUnsigned());
1202 pushInteger(S, Val: AlignedVal, QT: Call->getType());
1203 } else if (BuiltinOp == Builtin::BI__builtin_align_down) {
1204 APSInt AlignedVal = APSInt(Src & ~AlignMinusOne, Src.isUnsigned());
1205 pushInteger(S, Val: AlignedVal, QT: Call->getType());
1206 } else {
1207 assert(*S.Ctx.classify(Call->getType()) == PT_Bool);
1208 S.Stk.push<Boolean>(Args: (Src & AlignMinusOne) == 0);
1209 }
1210 return true;
1211 }
1212 assert(FirstArgT == PT_Ptr);
1213 const Pointer &Ptr = S.Stk.pop<Pointer>();
1214 if (!Ptr.isBlockPointer())
1215 return false;
1216
1217 // For one-past-end pointers, we can't call getIndex() since it asserts.
1218 // Use getNumElems() instead which gives the correct index for past-end.
1219 unsigned PtrOffset =
1220 Ptr.isElementPastEnd() ? Ptr.getNumElems() : Ptr.getIndex();
1221 CharUnits BaseAlignment =
1222 S.getASTContext().getDeclAlign(D: Ptr.getDeclDesc()->asValueDecl());
1223 CharUnits PtrAlign =
1224 BaseAlignment.alignmentAtOffset(offset: CharUnits::fromQuantity(Quantity: PtrOffset));
1225
1226 if (BuiltinOp == Builtin::BI__builtin_is_aligned) {
1227 if (PtrAlign.getQuantity() >= Alignment) {
1228 S.Stk.push<Boolean>(Args: true);
1229 return true;
1230 }
1231 // If the alignment is not known to be sufficient, some cases could still
1232 // be aligned at run time. However, if the requested alignment is less or
1233 // equal to the base alignment and the offset is not aligned, we know that
1234 // the run-time value can never be aligned.
1235 if (BaseAlignment.getQuantity() >= Alignment &&
1236 PtrAlign.getQuantity() < Alignment) {
1237 S.Stk.push<Boolean>(Args: false);
1238 return true;
1239 }
1240
1241 S.FFDiag(E: Call->getArg(Arg: 0), DiagId: diag::note_constexpr_alignment_compute)
1242 << Alignment;
1243 return false;
1244 }
1245
1246 assert(BuiltinOp == Builtin::BI__builtin_align_down ||
1247 BuiltinOp == Builtin::BI__builtin_align_up);
1248
1249 // For align_up/align_down, we can return the same value if the alignment
1250 // is known to be greater or equal to the requested value.
1251 if (PtrAlign.getQuantity() >= Alignment) {
1252 S.Stk.push<Pointer>(Args: Ptr);
1253 return true;
1254 }
1255
1256 // The alignment could be greater than the minimum at run-time, so we cannot
1257 // infer much about the resulting pointer value. One case is possible:
1258 // For `_Alignas(32) char buf[N]; __builtin_align_down(&buf[idx], 32)` we
1259 // can infer the correct index if the requested alignment is smaller than
1260 // the base alignment so we can perform the computation on the offset.
1261 if (BaseAlignment.getQuantity() >= Alignment) {
1262 assert(Alignment.getBitWidth() <= 64 &&
1263 "Cannot handle > 64-bit address-space");
1264 uint64_t Alignment64 = Alignment.getZExtValue();
1265 CharUnits NewOffset =
1266 CharUnits::fromQuantity(Quantity: BuiltinOp == Builtin::BI__builtin_align_down
1267 ? llvm::alignDown(Value: PtrOffset, Align: Alignment64)
1268 : llvm::alignTo(Value: PtrOffset, Align: Alignment64));
1269
1270 S.Stk.push<Pointer>(Args: Ptr.atIndex(Idx: NewOffset.getQuantity()));
1271 return true;
1272 }
1273
1274 // Otherwise, we cannot constant-evaluate the result.
1275 S.FFDiag(E: Call->getArg(Arg: 0), DiagId: diag::note_constexpr_alignment_adjust) << Alignment;
1276 return false;
1277}
1278
1279/// __builtin_assume_aligned(Ptr, Alignment[, ExtraOffset])
1280static bool interp__builtin_assume_aligned(InterpState &S, CodePtr OpPC,
1281 const InterpFrame *Frame,
1282 const CallExpr *Call) {
1283 assert(Call->getNumArgs() == 2 || Call->getNumArgs() == 3);
1284
1285 std::optional<APSInt> ExtraOffset;
1286 if (Call->getNumArgs() == 3)
1287 ExtraOffset = popToAPSInt(Stk&: S.Stk, T: *S.Ctx.classify(E: Call->getArg(Arg: 2)));
1288
1289 APSInt Alignment = popToAPSInt(Stk&: S.Stk, T: *S.Ctx.classify(E: Call->getArg(Arg: 1)));
1290 const Pointer &Ptr = S.Stk.pop<Pointer>();
1291
1292 CharUnits Align = CharUnits::fromQuantity(Quantity: Alignment.getZExtValue());
1293
1294 // If there is a base object, then it must have the correct alignment.
1295 if (Ptr.isBlockPointer()) {
1296 CharUnits BaseAlignment;
1297 if (const auto *VD = Ptr.getDeclDesc()->asValueDecl())
1298 BaseAlignment = S.getASTContext().getDeclAlign(D: VD);
1299 else if (const auto *E = Ptr.getDeclDesc()->asExpr())
1300 BaseAlignment = GetAlignOfExpr(Ctx: S.getASTContext(), E, ExprKind: UETT_AlignOf);
1301
1302 if (BaseAlignment < Align) {
1303 S.CCEDiag(E: Call->getArg(Arg: 0),
1304 DiagId: diag::note_constexpr_baa_insufficient_alignment)
1305 << 0 << BaseAlignment.getQuantity() << Align.getQuantity();
1306 return false;
1307 }
1308 }
1309
1310 APValue AV = Ptr.toAPValue(ASTCtx: S.getASTContext());
1311 CharUnits AVOffset = AV.getLValueOffset();
1312 if (ExtraOffset)
1313 AVOffset -= CharUnits::fromQuantity(Quantity: ExtraOffset->getZExtValue());
1314 if (AVOffset.alignTo(Align) != AVOffset) {
1315 if (Ptr.isBlockPointer())
1316 S.CCEDiag(E: Call->getArg(Arg: 0),
1317 DiagId: diag::note_constexpr_baa_insufficient_alignment)
1318 << 1 << AVOffset.getQuantity() << Align.getQuantity();
1319 else
1320 S.CCEDiag(E: Call->getArg(Arg: 0),
1321 DiagId: diag::note_constexpr_baa_value_insufficient_alignment)
1322 << AVOffset.getQuantity() << Align.getQuantity();
1323 return false;
1324 }
1325
1326 S.Stk.push<Pointer>(Args: Ptr);
1327 return true;
1328}
1329
1330/// (CarryIn, LHS, RHS, Result)
1331static bool interp__builtin_ia32_addcarry_subborrow(InterpState &S,
1332 CodePtr OpPC,
1333 const InterpFrame *Frame,
1334 const CallExpr *Call,
1335 unsigned BuiltinOp) {
1336 if (Call->getNumArgs() != 4 || !Call->getArg(Arg: 0)->getType()->isIntegerType() ||
1337 !Call->getArg(Arg: 1)->getType()->isIntegerType() ||
1338 !Call->getArg(Arg: 2)->getType()->isIntegerType())
1339 return false;
1340
1341 const Pointer &CarryOutPtr = S.Stk.pop<Pointer>();
1342
1343 APSInt RHS = popToAPSInt(S, E: Call->getArg(Arg: 2));
1344 APSInt LHS = popToAPSInt(S, E: Call->getArg(Arg: 1));
1345 APSInt CarryIn = popToAPSInt(S, E: Call->getArg(Arg: 0));
1346
1347 bool IsAdd = BuiltinOp == clang::X86::BI__builtin_ia32_addcarryx_u32 ||
1348 BuiltinOp == clang::X86::BI__builtin_ia32_addcarryx_u64;
1349
1350 unsigned BitWidth = LHS.getBitWidth();
1351 unsigned CarryInBit = CarryIn.ugt(RHS: 0) ? 1 : 0;
1352 APInt ExResult =
1353 IsAdd ? (LHS.zext(width: BitWidth + 1) + (RHS.zext(width: BitWidth + 1) + CarryInBit))
1354 : (LHS.zext(width: BitWidth + 1) - (RHS.zext(width: BitWidth + 1) + CarryInBit));
1355
1356 APInt Result = ExResult.extractBits(numBits: BitWidth, bitPosition: 0);
1357 APSInt CarryOut =
1358 APSInt(ExResult.extractBits(numBits: 1, bitPosition: BitWidth), /*IsUnsigned=*/true);
1359
1360 QualType CarryOutType = Call->getArg(Arg: 3)->getType()->getPointeeType();
1361 PrimType CarryOutT = *S.getContext().classify(T: CarryOutType);
1362 assignInteger(S, Dest: CarryOutPtr, ValueT: CarryOutT, Value: APSInt(std::move(Result), true));
1363
1364 pushInteger(S, Val: CarryOut, QT: Call->getType());
1365
1366 return true;
1367}
1368
1369static bool interp__builtin_os_log_format_buffer_size(InterpState &S,
1370 CodePtr OpPC,
1371 const InterpFrame *Frame,
1372 const CallExpr *Call) {
1373 analyze_os_log::OSLogBufferLayout Layout;
1374 analyze_os_log::computeOSLogBufferLayout(Ctx&: S.getASTContext(), E: Call, layout&: Layout);
1375 pushInteger(S, Val: Layout.size().getQuantity(), QT: Call->getType());
1376 return true;
1377}
1378
1379static bool
1380interp__builtin_ptrauth_string_discriminator(InterpState &S, CodePtr OpPC,
1381 const InterpFrame *Frame,
1382 const CallExpr *Call) {
1383 const auto &Ptr = S.Stk.pop<Pointer>();
1384 assert(Ptr.getFieldDesc()->isPrimitiveArray());
1385
1386 // This should be created for a StringLiteral, so should alway shold at least
1387 // one array element.
1388 assert(Ptr.getFieldDesc()->getNumElems() >= 1);
1389 StringRef R(&Ptr.deref<char>(), Ptr.getFieldDesc()->getNumElems() - 1);
1390 uint64_t Result = getPointerAuthStableSipHash(S: R);
1391 pushInteger(S, Val: Result, QT: Call->getType());
1392 return true;
1393}
1394
1395static bool interp__builtin_infer_alloc_token(InterpState &S, CodePtr OpPC,
1396 const InterpFrame *Frame,
1397 const CallExpr *Call) {
1398 const ASTContext &ASTCtx = S.getASTContext();
1399 uint64_t BitWidth = ASTCtx.getTypeSize(T: ASTCtx.getSizeType());
1400 auto Mode =
1401 ASTCtx.getLangOpts().AllocTokenMode.value_or(u: llvm::DefaultAllocTokenMode);
1402 auto MaxTokensOpt = ASTCtx.getLangOpts().AllocTokenMax;
1403 uint64_t MaxTokens =
1404 MaxTokensOpt.value_or(u: 0) ? *MaxTokensOpt : (~0ULL >> (64 - BitWidth));
1405
1406 // We do not read any of the arguments; discard them.
1407 for (int I = Call->getNumArgs() - 1; I >= 0; --I)
1408 discard(Stk&: S.Stk, T: S.getContext().classify(E: Call->getArg(Arg: I)).value_or(PT: PT_Ptr));
1409
1410 // Note: Type inference from a surrounding cast is not supported in
1411 // constexpr evaluation.
1412 QualType AllocType = infer_alloc::inferPossibleType(E: Call, Ctx: ASTCtx, CastE: nullptr);
1413 if (AllocType.isNull()) {
1414 S.CCEDiag(E: Call,
1415 DiagId: diag::note_constexpr_infer_alloc_token_type_inference_failed);
1416 return false;
1417 }
1418
1419 auto ATMD = infer_alloc::getAllocTokenMetadata(T: AllocType, Ctx: ASTCtx);
1420 if (!ATMD) {
1421 S.CCEDiag(E: Call, DiagId: diag::note_constexpr_infer_alloc_token_no_metadata);
1422 return false;
1423 }
1424
1425 auto MaybeToken = llvm::getAllocToken(Mode, Metadata: *ATMD, MaxTokens);
1426 if (!MaybeToken) {
1427 S.CCEDiag(E: Call, DiagId: diag::note_constexpr_infer_alloc_token_stateful_mode);
1428 return false;
1429 }
1430
1431 pushInteger(S, Val: llvm::APInt(BitWidth, *MaybeToken), QT: ASTCtx.getSizeType());
1432 return true;
1433}
1434
1435static bool interp__builtin_operator_new(InterpState &S, CodePtr OpPC,
1436 const InterpFrame *Frame,
1437 const CallExpr *Call) {
1438 // A call to __operator_new is only valid within std::allocate<>::allocate.
1439 // Walk up the call stack to find the appropriate caller and get the
1440 // element type from it.
1441 auto [NewCall, ElemType] = S.getStdAllocatorCaller(Name: "allocate");
1442
1443 if (ElemType.isNull()) {
1444 S.FFDiag(E: Call, DiagId: S.getLangOpts().CPlusPlus20
1445 ? diag::note_constexpr_new_untyped
1446 : diag::note_constexpr_new);
1447 return false;
1448 }
1449 assert(NewCall);
1450
1451 if (ElemType->isIncompleteType() || ElemType->isFunctionType()) {
1452 S.FFDiag(E: Call, DiagId: diag::note_constexpr_new_not_complete_object_type)
1453 << (ElemType->isIncompleteType() ? 0 : 1) << ElemType;
1454 return false;
1455 }
1456
1457 // We only care about the first parameter (the size), so discard all the
1458 // others.
1459 {
1460 unsigned NumArgs = Call->getNumArgs();
1461 assert(NumArgs >= 1);
1462
1463 // The std::nothrow_t arg never gets put on the stack.
1464 if (Call->getArg(Arg: NumArgs - 1)->getType()->isNothrowT())
1465 --NumArgs;
1466 auto Args = ArrayRef(Call->getArgs(), Call->getNumArgs());
1467 // First arg is needed.
1468 Args = Args.drop_front();
1469
1470 // Discard the rest.
1471 for (const Expr *Arg : Args)
1472 discard(Stk&: S.Stk, T: *S.getContext().classify(E: Arg));
1473 }
1474
1475 APSInt Bytes = popToAPSInt(S, E: Call->getArg(Arg: 0));
1476 CharUnits ElemSize = S.getASTContext().getTypeSizeInChars(T: ElemType);
1477 assert(!ElemSize.isZero());
1478 // Divide the number of bytes by sizeof(ElemType), so we get the number of
1479 // elements we should allocate.
1480 APInt NumElems, Remainder;
1481 APInt ElemSizeAP(Bytes.getBitWidth(), ElemSize.getQuantity());
1482 APInt::udivrem(LHS: Bytes, RHS: ElemSizeAP, Quotient&: NumElems, Remainder);
1483 if (Remainder != 0) {
1484 // This likely indicates a bug in the implementation of 'std::allocator'.
1485 S.FFDiag(E: Call, DiagId: diag::note_constexpr_operator_new_bad_size)
1486 << Bytes << APSInt(ElemSizeAP, true) << ElemType;
1487 return false;
1488 }
1489
1490 // NB: The same check we're using in CheckArraySize()
1491 if (NumElems.getActiveBits() >
1492 ConstantArrayType::getMaxSizeBits(Context: S.getASTContext()) ||
1493 NumElems.ugt(RHS: Descriptor::MaxArrayElemBytes / ElemSize.getQuantity())) {
1494 // FIXME: NoThrow check?
1495 const SourceInfo &Loc = S.Current->getSource(PC: OpPC);
1496 S.FFDiag(SI: Loc, DiagId: diag::note_constexpr_new_too_large)
1497 << NumElems.getZExtValue();
1498 return false;
1499 }
1500
1501 if (!CheckArraySize(S, OpPC, NumElems: NumElems.getZExtValue()))
1502 return false;
1503
1504 bool IsArray = NumElems.ugt(RHS: 1);
1505 OptPrimType ElemT = S.getContext().classify(T: ElemType);
1506 DynamicAllocator &Allocator = S.getAllocator();
1507 if (ElemT) {
1508 Block *B =
1509 Allocator.allocate(Source: NewCall, T: *ElemT, NumElements: NumElems.getZExtValue(),
1510 EvalID: S.Ctx.getEvalID(), AllocForm: DynamicAllocator::Form::Operator);
1511 assert(B);
1512 S.Stk.push<Pointer>(Args: Pointer(B).atIndex(Idx: 0));
1513 return true;
1514 }
1515
1516 assert(!ElemT);
1517
1518 // Composite arrays
1519 if (IsArray) {
1520 const Descriptor *Desc =
1521 S.P.createDescriptor(D: NewCall, Ty: ElemType.getTypePtr(), MDSize: std::nullopt);
1522 Block *B =
1523 Allocator.allocate(D: Desc, NumElements: NumElems.getZExtValue(), EvalID: S.Ctx.getEvalID(),
1524 AllocForm: DynamicAllocator::Form::Operator);
1525 assert(B);
1526 S.Stk.push<Pointer>(Args: Pointer(B).atIndex(Idx: 0).narrow());
1527 return true;
1528 }
1529
1530 // Records. Still allocate them as single-element arrays.
1531 QualType AllocType = S.getASTContext().getConstantArrayType(
1532 EltTy: ElemType, ArySize: NumElems, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, IndexTypeQuals: 0);
1533
1534 const Descriptor *Desc = S.P.createDescriptor(D: NewCall, Ty: AllocType.getTypePtr(),
1535 MDSize: Descriptor::InlineDescMD);
1536 Block *B = Allocator.allocate(D: Desc, EvalID: S.getContext().getEvalID(),
1537 AllocForm: DynamicAllocator::Form::Operator);
1538 assert(B);
1539 S.Stk.push<Pointer>(Args: Pointer(B).atIndex(Idx: 0).narrow());
1540 return true;
1541}
1542
1543static bool interp__builtin_operator_delete(InterpState &S, CodePtr OpPC,
1544 const InterpFrame *Frame,
1545 const CallExpr *Call) {
1546 const Expr *Source = nullptr;
1547 const Block *BlockToDelete = nullptr;
1548
1549 if (S.checkingPotentialConstantExpression()) {
1550 S.Stk.discard<Pointer>();
1551 return false;
1552 }
1553
1554 // This is permitted only within a call to std::allocator<T>::deallocate.
1555 if (!S.getStdAllocatorCaller(Name: "deallocate")) {
1556 S.FFDiag(E: Call);
1557 S.Stk.discard<Pointer>();
1558 return true;
1559 }
1560
1561 {
1562 const Pointer &Ptr = S.Stk.pop<Pointer>();
1563
1564 if (Ptr.isZero()) {
1565 S.CCEDiag(E: Call, DiagId: diag::note_constexpr_deallocate_null);
1566 return true;
1567 }
1568
1569 Source = Ptr.getDeclDesc()->asExpr();
1570 BlockToDelete = Ptr.block();
1571
1572 if (!BlockToDelete->isDynamic()) {
1573 S.FFDiag(E: Call, DiagId: diag::note_constexpr_delete_not_heap_alloc)
1574 << Ptr.toDiagnosticString(Ctx: S.getASTContext());
1575 if (const auto *D = Ptr.getFieldDesc()->asDecl())
1576 S.Note(Loc: D->getLocation(), DiagId: diag::note_declared_at);
1577 }
1578 }
1579 assert(BlockToDelete);
1580
1581 DynamicAllocator &Allocator = S.getAllocator();
1582 const Descriptor *BlockDesc = BlockToDelete->getDescriptor();
1583 std::optional<DynamicAllocator::Form> AllocForm =
1584 Allocator.getAllocationForm(Source);
1585
1586 if (!Allocator.deallocate(Source, BlockToDelete, S)) {
1587 // Nothing has been deallocated, this must be a double-delete.
1588 const SourceInfo &Loc = S.Current->getSource(PC: OpPC);
1589 S.FFDiag(SI: Loc, DiagId: diag::note_constexpr_double_delete);
1590 return false;
1591 }
1592 assert(AllocForm);
1593
1594 return CheckNewDeleteForms(
1595 S, OpPC, AllocForm: *AllocForm, DeleteForm: DynamicAllocator::Form::Operator, D: BlockDesc, NewExpr: Source);
1596}
1597
1598static bool interp__builtin_arithmetic_fence(InterpState &S, CodePtr OpPC,
1599 const InterpFrame *Frame,
1600 const CallExpr *Call) {
1601 const Floating &Arg0 = S.Stk.pop<Floating>();
1602 S.Stk.push<Floating>(Args: Arg0);
1603 return true;
1604}
1605
1606static bool interp__builtin_vector_reduce(InterpState &S, CodePtr OpPC,
1607 const CallExpr *Call, unsigned ID) {
1608 const Pointer &Arg = S.Stk.pop<Pointer>();
1609 assert(Arg.getFieldDesc()->isPrimitiveArray());
1610
1611 QualType ElemType = Arg.getFieldDesc()->getElemQualType();
1612 assert(Call->getType() == ElemType);
1613 PrimType ElemT = *S.getContext().classify(T: ElemType);
1614 unsigned NumElems = Arg.getNumElems();
1615
1616 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
1617 T Result = Arg.elem<T>(0);
1618 unsigned BitWidth = Result.bitWidth();
1619 for (unsigned I = 1; I != NumElems; ++I) {
1620 T Elem = Arg.elem<T>(I);
1621 T PrevResult = Result;
1622
1623 if (ID == Builtin::BI__builtin_reduce_add) {
1624 if (T::add(Result, Elem, BitWidth, &Result)) {
1625 unsigned OverflowBits = BitWidth + 1;
1626 (void)handleOverflow(S, OpPC,
1627 (PrevResult.toAPSInt(OverflowBits) +
1628 Elem.toAPSInt(OverflowBits)));
1629 return false;
1630 }
1631 } else if (ID == Builtin::BI__builtin_reduce_mul) {
1632 if (T::mul(Result, Elem, BitWidth, &Result)) {
1633 unsigned OverflowBits = BitWidth * 2;
1634 (void)handleOverflow(S, OpPC,
1635 (PrevResult.toAPSInt(OverflowBits) *
1636 Elem.toAPSInt(OverflowBits)));
1637 return false;
1638 }
1639
1640 } else if (ID == Builtin::BI__builtin_reduce_and) {
1641 (void)T::bitAnd(Result, Elem, BitWidth, &Result);
1642 } else if (ID == Builtin::BI__builtin_reduce_or) {
1643 (void)T::bitOr(Result, Elem, BitWidth, &Result);
1644 } else if (ID == Builtin::BI__builtin_reduce_xor) {
1645 (void)T::bitXor(Result, Elem, BitWidth, &Result);
1646 } else if (ID == Builtin::BI__builtin_reduce_min) {
1647 if (Elem < Result)
1648 Result = Elem;
1649 } else if (ID == Builtin::BI__builtin_reduce_max) {
1650 if (Elem > Result)
1651 Result = Elem;
1652 } else {
1653 llvm_unreachable("Unhandled vector reduce builtin");
1654 }
1655 }
1656 pushInteger(S, Result.toAPSInt(), Call->getType());
1657 });
1658
1659 return true;
1660}
1661
1662static bool interp__builtin_elementwise_abs(InterpState &S, CodePtr OpPC,
1663 const InterpFrame *Frame,
1664 const CallExpr *Call,
1665 unsigned BuiltinID) {
1666 assert(Call->getNumArgs() == 1);
1667 QualType Ty = Call->getArg(Arg: 0)->getType();
1668 if (Ty->isIntegerType()) {
1669 APSInt Val = popToAPSInt(S, E: Call->getArg(Arg: 0));
1670 pushInteger(S, Val: Val.abs(), QT: Call->getType());
1671 return true;
1672 }
1673
1674 if (Ty->isFloatingType()) {
1675 Floating Val = S.Stk.pop<Floating>();
1676 Floating Result = abs(S, In: Val);
1677 S.Stk.push<Floating>(Args&: Result);
1678 return true;
1679 }
1680
1681 // Otherwise, the argument must be a vector.
1682 assert(Call->getArg(0)->getType()->isVectorType());
1683 const Pointer &Arg = S.Stk.pop<Pointer>();
1684 assert(Arg.getFieldDesc()->isPrimitiveArray());
1685 const Pointer &Dst = S.Stk.peek<Pointer>();
1686 assert(Dst.getFieldDesc()->isPrimitiveArray());
1687 assert(Arg.getFieldDesc()->getNumElems() ==
1688 Dst.getFieldDesc()->getNumElems());
1689
1690 QualType ElemType = Arg.getFieldDesc()->getElemQualType();
1691 PrimType ElemT = *S.getContext().classify(T: ElemType);
1692 unsigned NumElems = Arg.getNumElems();
1693 // we can either have a vector of integer or a vector of floating point
1694 for (unsigned I = 0; I != NumElems; ++I) {
1695 if (ElemType->isIntegerType()) {
1696 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
1697 Dst.elem<T>(I) = T::from(static_cast<T>(
1698 APSInt(Arg.elem<T>(I).toAPSInt().abs(),
1699 ElemType->isUnsignedIntegerOrEnumerationType())));
1700 });
1701 } else {
1702 Floating Val = Arg.elem<Floating>(I);
1703 Dst.elem<Floating>(I) = abs(S, In: Val);
1704 }
1705 }
1706 Dst.initializeAllElements();
1707
1708 return true;
1709}
1710
1711/// Can be called with an integer or vector as the first and only parameter.
1712static bool interp__builtin_elementwise_countzeroes(InterpState &S,
1713 CodePtr OpPC,
1714 const InterpFrame *Frame,
1715 const CallExpr *Call,
1716 unsigned BuiltinID) {
1717 bool HasZeroArg = Call->getNumArgs() == 2;
1718 bool IsCTTZ = BuiltinID == Builtin::BI__builtin_elementwise_ctzg;
1719 assert(Call->getNumArgs() == 1 || HasZeroArg);
1720 if (Call->getArg(Arg: 0)->getType()->isIntegerType()) {
1721 PrimType ArgT = *S.getContext().classify(T: Call->getArg(Arg: 0)->getType());
1722 APSInt Val = popToAPSInt(Stk&: S.Stk, T: ArgT);
1723 std::optional<APSInt> ZeroVal;
1724 if (HasZeroArg) {
1725 ZeroVal = Val;
1726 Val = popToAPSInt(Stk&: S.Stk, T: ArgT);
1727 }
1728
1729 if (Val.isZero()) {
1730 if (ZeroVal) {
1731 pushInteger(S, Val: *ZeroVal, QT: Call->getType());
1732 return true;
1733 }
1734 // If we haven't been provided the second argument, the result is
1735 // undefined
1736 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1737 DiagId: diag::note_constexpr_countzeroes_zero)
1738 << /*IsTrailing=*/IsCTTZ;
1739 return false;
1740 }
1741
1742 if (BuiltinID == Builtin::BI__builtin_elementwise_clzg) {
1743 pushInteger(S, Val: Val.countLeadingZeros(), QT: Call->getType());
1744 } else {
1745 pushInteger(S, Val: Val.countTrailingZeros(), QT: Call->getType());
1746 }
1747 return true;
1748 }
1749 // Otherwise, the argument must be a vector.
1750 const ASTContext &ASTCtx = S.getASTContext();
1751 Pointer ZeroArg;
1752 if (HasZeroArg) {
1753 assert(Call->getArg(1)->getType()->isVectorType() &&
1754 ASTCtx.hasSameUnqualifiedType(Call->getArg(0)->getType(),
1755 Call->getArg(1)->getType()));
1756 (void)ASTCtx;
1757 ZeroArg = S.Stk.pop<Pointer>();
1758 assert(ZeroArg.getFieldDesc()->isPrimitiveArray());
1759 }
1760 assert(Call->getArg(0)->getType()->isVectorType());
1761 const Pointer &Arg = S.Stk.pop<Pointer>();
1762 assert(Arg.getFieldDesc()->isPrimitiveArray());
1763 const Pointer &Dst = S.Stk.peek<Pointer>();
1764 assert(Dst.getFieldDesc()->isPrimitiveArray());
1765 assert(Arg.getFieldDesc()->getNumElems() ==
1766 Dst.getFieldDesc()->getNumElems());
1767
1768 QualType ElemType = Arg.getFieldDesc()->getElemQualType();
1769 PrimType ElemT = *S.getContext().classify(T: ElemType);
1770 unsigned NumElems = Arg.getNumElems();
1771
1772 // FIXME: Reading from uninitialized vector elements?
1773 for (unsigned I = 0; I != NumElems; ++I) {
1774 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
1775 APInt EltVal = Arg.atIndex(I).deref<T>().toAPSInt();
1776 if (EltVal.isZero()) {
1777 if (HasZeroArg) {
1778 Dst.atIndex(I).deref<T>() = ZeroArg.atIndex(I).deref<T>();
1779 } else {
1780 // If we haven't been provided the second argument, the result is
1781 // undefined
1782 S.FFDiag(S.Current->getSource(OpPC),
1783 diag::note_constexpr_countzeroes_zero)
1784 << /*IsTrailing=*/IsCTTZ;
1785 return false;
1786 }
1787 } else if (IsCTTZ) {
1788 Dst.atIndex(I).deref<T>() = T::from(EltVal.countTrailingZeros());
1789 } else {
1790 Dst.atIndex(I).deref<T>() = T::from(EltVal.countLeadingZeros());
1791 }
1792 Dst.atIndex(I).initialize();
1793 });
1794 }
1795
1796 return true;
1797}
1798
1799static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC,
1800 const InterpFrame *Frame,
1801 const CallExpr *Call, unsigned ID) {
1802 assert(Call->getNumArgs() == 3);
1803 const ASTContext &ASTCtx = S.getASTContext();
1804 uint64_t Size = popToUInt64(S, E: Call->getArg(Arg: 2));
1805 Pointer SrcPtr = S.Stk.pop<Pointer>().expand();
1806 Pointer DestPtr = S.Stk.pop<Pointer>().expand();
1807
1808 if (ID == Builtin::BImemcpy || ID == Builtin::BImemmove)
1809 diagnoseNonConstexprBuiltin(S, OpPC, ID);
1810
1811 bool Move =
1812 (ID == Builtin::BI__builtin_memmove || ID == Builtin::BImemmove ||
1813 ID == Builtin::BI__builtin_wmemmove || ID == Builtin::BIwmemmove);
1814 bool WChar = ID == Builtin::BIwmemcpy || ID == Builtin::BIwmemmove ||
1815 ID == Builtin::BI__builtin_wmemcpy ||
1816 ID == Builtin::BI__builtin_wmemmove;
1817
1818 // If the size is zero, we treat this as always being a valid no-op.
1819 if (Size == 0) {
1820 S.Stk.push<Pointer>(Args&: DestPtr);
1821 return true;
1822 }
1823
1824 if (SrcPtr.isZero() || DestPtr.isZero()) {
1825 Pointer DiagPtr = (SrcPtr.isZero() ? SrcPtr : DestPtr);
1826 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_null)
1827 << /*IsMove=*/Move << /*IsWchar=*/WChar << !SrcPtr.isZero()
1828 << DiagPtr.toDiagnosticString(Ctx: ASTCtx);
1829 return false;
1830 }
1831
1832 // Diagnose integral src/dest pointers specially.
1833 if (SrcPtr.isIntegralPointer() || DestPtr.isIntegralPointer()) {
1834 std::string DiagVal = "(void *)";
1835 DiagVal += SrcPtr.isIntegralPointer()
1836 ? std::to_string(val: SrcPtr.getIntegerRepresentation())
1837 : std::to_string(val: DestPtr.getIntegerRepresentation());
1838 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_null)
1839 << Move << WChar << DestPtr.isIntegralPointer() << DiagVal;
1840 return false;
1841 }
1842
1843 if (!isReadable(P: DestPtr) || !isReadable(P: SrcPtr))
1844 return false;
1845
1846 if (DestPtr.getType()->isIncompleteType()) {
1847 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1848 DiagId: diag::note_constexpr_memcpy_incomplete_type)
1849 << Move << DestPtr.getType();
1850 return false;
1851 }
1852 if (SrcPtr.getType()->isIncompleteType()) {
1853 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1854 DiagId: diag::note_constexpr_memcpy_incomplete_type)
1855 << Move << SrcPtr.getType();
1856 return false;
1857 }
1858
1859 QualType DestElemType = getElemType(P: DestPtr);
1860 if (DestElemType->isIncompleteType()) {
1861 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1862 DiagId: diag::note_constexpr_memcpy_incomplete_type)
1863 << Move << DestElemType;
1864 return false;
1865 }
1866
1867 size_t RemainingDestElems;
1868 if (DestPtr.getFieldDesc()->isArray()) {
1869 RemainingDestElems = DestPtr.isUnknownSizeArray()
1870 ? 0
1871 : (DestPtr.getNumElems() - DestPtr.getIndex());
1872 } else {
1873 RemainingDestElems = 1;
1874 }
1875 unsigned DestElemSize = ASTCtx.getTypeSizeInChars(T: DestElemType).getQuantity();
1876
1877 if (WChar) {
1878 uint64_t WCharSize =
1879 ASTCtx.getTypeSizeInChars(T: ASTCtx.getWCharType()).getQuantity();
1880 Size *= WCharSize;
1881 }
1882
1883 if (Size % DestElemSize != 0) {
1884 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1885 DiagId: diag::note_constexpr_memcpy_unsupported)
1886 << Move << WChar << 0 << DestElemType << Size << DestElemSize;
1887 return false;
1888 }
1889
1890 QualType SrcElemType = getElemType(P: SrcPtr);
1891 size_t RemainingSrcElems;
1892 if (SrcPtr.getFieldDesc()->isArray()) {
1893 RemainingSrcElems = SrcPtr.isUnknownSizeArray()
1894 ? 0
1895 : (SrcPtr.getNumElems() - SrcPtr.getIndex());
1896 } else {
1897 RemainingSrcElems = 1;
1898 }
1899 unsigned SrcElemSize = ASTCtx.getTypeSizeInChars(T: SrcElemType).getQuantity();
1900
1901 if (!ASTCtx.hasSameUnqualifiedType(T1: DestElemType, T2: SrcElemType)) {
1902 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_type_pun)
1903 << Move << SrcElemType << DestElemType;
1904 return false;
1905 }
1906
1907 if (!DestElemType.isTriviallyCopyableType(Context: ASTCtx)) {
1908 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_nontrivial)
1909 << Move << DestElemType;
1910 return false;
1911 }
1912
1913 // Check if we have enough elements to read from and write to.
1914 size_t RemainingDestBytes = RemainingDestElems * DestElemSize;
1915 size_t RemainingSrcBytes = RemainingSrcElems * SrcElemSize;
1916 if (Size > RemainingDestBytes || Size > RemainingSrcBytes) {
1917 APInt N = APInt(64, Size / DestElemSize);
1918 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1919 DiagId: diag::note_constexpr_memcpy_unsupported)
1920 << Move << WChar << (Size > RemainingSrcBytes ? 1 : 2) << DestElemType
1921 << toString(I: N, Radix: 10, /*Signed=*/false);
1922 return false;
1923 }
1924
1925 // Check for overlapping memory regions.
1926 if (!Move && Pointer::pointToSameBlock(A: SrcPtr, B: DestPtr)) {
1927 // Remove base casts.
1928 Pointer SrcP = SrcPtr.stripBaseCasts();
1929 Pointer DestP = DestPtr.stripBaseCasts();
1930
1931 unsigned SrcIndex = SrcP.expand().getIndex() * SrcP.elemSize();
1932 unsigned DstIndex = DestP.expand().getIndex() * DestP.elemSize();
1933
1934 if ((SrcIndex <= DstIndex && (SrcIndex + Size) > DstIndex) ||
1935 (DstIndex <= SrcIndex && (DstIndex + Size) > SrcIndex)) {
1936 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_overlap)
1937 << /*IsWChar=*/false;
1938 return false;
1939 }
1940 }
1941
1942 assert(Size % DestElemSize == 0);
1943 if (!DoMemcpy(S, OpPC, SrcPtr, DestPtr, Size: Bytes(Size).toBits()))
1944 return false;
1945
1946 S.Stk.push<Pointer>(Args&: DestPtr);
1947 return true;
1948}
1949
1950/// Determine if T is a character type for which we guarantee that
1951/// sizeof(T) == 1.
1952static bool isOneByteCharacterType(QualType T) {
1953 return T->isCharType() || T->isChar8Type();
1954}
1955
1956static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC,
1957 const InterpFrame *Frame,
1958 const CallExpr *Call, unsigned ID) {
1959 assert(Call->getNumArgs() == 3);
1960 uint64_t Size = popToUInt64(S, E: Call->getArg(Arg: 2));
1961 const Pointer &PtrB = S.Stk.pop<Pointer>();
1962 const Pointer &PtrA = S.Stk.pop<Pointer>();
1963
1964 if (ID == Builtin::BImemcmp || ID == Builtin::BIbcmp ||
1965 ID == Builtin::BIwmemcmp)
1966 diagnoseNonConstexprBuiltin(S, OpPC, ID);
1967
1968 if (Size == 0) {
1969 pushInteger(S, Val: 0, QT: Call->getType());
1970 return true;
1971 }
1972
1973 if (!PtrA.isBlockPointer() || !PtrB.isBlockPointer())
1974 return false;
1975
1976 bool IsWide =
1977 (ID == Builtin::BIwmemcmp || ID == Builtin::BI__builtin_wmemcmp);
1978
1979 const ASTContext &ASTCtx = S.getASTContext();
1980 QualType ElemTypeA = getElemType(P: PtrA);
1981 QualType ElemTypeB = getElemType(P: PtrB);
1982 // FIXME: This is an arbitrary limitation the current constant interpreter
1983 // had. We could remove this.
1984 if (!IsWide && (!isOneByteCharacterType(T: ElemTypeA) ||
1985 !isOneByteCharacterType(T: ElemTypeB))) {
1986 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1987 DiagId: diag::note_constexpr_memcmp_unsupported)
1988 << ASTCtx.BuiltinInfo.getQuotedName(ID) << PtrA.getType()
1989 << PtrB.getType();
1990 return false;
1991 }
1992
1993 if (!CheckLoad(S, OpPC, Ptr: PtrA, AK: AK_Read) || !CheckLoad(S, OpPC, Ptr: PtrB, AK: AK_Read))
1994 return false;
1995
1996 // Now, read both pointers to a buffer and compare those.
1997 BitcastBuffer BufferA(
1998 Bits(ASTCtx.getTypeSize(T: ElemTypeA) * PtrA.getNumElems()));
1999 readPointerToBuffer(Ctx: S.getContext(), FromPtr: PtrA, Buffer&: BufferA, ReturnOnUninit: false);
2000 // FIXME: The swapping here is UNDOING something we do when reading the
2001 // data into the buffer.
2002 if (ASTCtx.getTargetInfo().isBigEndian())
2003 swapBytes(M: BufferA.Data.get(), N: BufferA.byteSize().getQuantity());
2004
2005 BitcastBuffer BufferB(
2006 Bits(ASTCtx.getTypeSize(T: ElemTypeB) * PtrB.getNumElems()));
2007 readPointerToBuffer(Ctx: S.getContext(), FromPtr: PtrB, Buffer&: BufferB, ReturnOnUninit: false);
2008 // FIXME: The swapping here is UNDOING something we do when reading the
2009 // data into the buffer.
2010 if (ASTCtx.getTargetInfo().isBigEndian())
2011 swapBytes(M: BufferB.Data.get(), N: BufferB.byteSize().getQuantity());
2012
2013 size_t MinBufferSize = std::min(a: BufferA.byteSize().getQuantity(),
2014 b: BufferB.byteSize().getQuantity());
2015
2016 unsigned ElemSize = 1;
2017 if (IsWide)
2018 ElemSize = ASTCtx.getTypeSizeInChars(T: ASTCtx.getWCharType()).getQuantity();
2019 // The Size given for the wide variants is in wide-char units. Convert it
2020 // to bytes.
2021 size_t ByteSize = Size * ElemSize;
2022 size_t CmpSize = std::min(a: MinBufferSize, b: ByteSize);
2023
2024 for (size_t I = 0; I != CmpSize; I += ElemSize) {
2025 if (IsWide) {
2026 INT_TYPE_SWITCH(*S.getContext().classify(ASTCtx.getWCharType()), {
2027 T A = *reinterpret_cast<T *>(BufferA.atByte(I));
2028 T B = *reinterpret_cast<T *>(BufferB.atByte(I));
2029 if (A < B) {
2030 pushInteger(S, -1, Call->getType());
2031 return true;
2032 }
2033 if (A > B) {
2034 pushInteger(S, 1, Call->getType());
2035 return true;
2036 }
2037 });
2038 } else {
2039 std::byte A = BufferA.deref<std::byte>(Offset: Bytes(I));
2040 std::byte B = BufferB.deref<std::byte>(Offset: Bytes(I));
2041
2042 if (A < B) {
2043 pushInteger(S, Val: -1, QT: Call->getType());
2044 return true;
2045 }
2046 if (A > B) {
2047 pushInteger(S, Val: 1, QT: Call->getType());
2048 return true;
2049 }
2050 }
2051 }
2052
2053 // We compared CmpSize bytes above. If the limiting factor was the Size
2054 // passed, we're done and the result is equality (0).
2055 if (ByteSize <= CmpSize) {
2056 pushInteger(S, Val: 0, QT: Call->getType());
2057 return true;
2058 }
2059
2060 // However, if we read all the available bytes but were instructed to read
2061 // even more, diagnose this as a "read of dereferenced one-past-the-end
2062 // pointer". This is what would happen if we called CheckLoad() on every array
2063 // element.
2064 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_access_past_end)
2065 << AK_Read << S.Current->getRange(PC: OpPC);
2066 return false;
2067}
2068
2069// __builtin_memchr(ptr, int, int)
2070// __builtin_strchr(ptr, int)
2071static bool interp__builtin_memchr(InterpState &S, CodePtr OpPC,
2072 const CallExpr *Call, unsigned ID) {
2073 if (ID == Builtin::BImemchr || ID == Builtin::BIwcschr ||
2074 ID == Builtin::BIstrchr || ID == Builtin::BIwmemchr)
2075 diagnoseNonConstexprBuiltin(S, OpPC, ID);
2076
2077 std::optional<APSInt> MaxLength;
2078 if (Call->getNumArgs() == 3)
2079 MaxLength = popToAPSInt(S, E: Call->getArg(Arg: 2));
2080
2081 APSInt Desired = popToAPSInt(S, E: Call->getArg(Arg: 1));
2082 const Pointer &Ptr = S.Stk.pop<Pointer>();
2083
2084 if (MaxLength && MaxLength->isZero()) {
2085 S.Stk.push<Pointer>();
2086 return true;
2087 }
2088
2089 if (Ptr.isDummy()) {
2090 if (Ptr.getType()->isIncompleteType())
2091 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
2092 DiagId: diag::note_constexpr_ltor_incomplete_type)
2093 << Ptr.getType();
2094 return false;
2095 }
2096
2097 // Null is only okay if the given size is 0.
2098 if (Ptr.isZero()) {
2099 S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_access_null)
2100 << AK_Read;
2101 return false;
2102 }
2103
2104 if (!Ptr.isBlockPointer())
2105 return false;
2106
2107 QualType ElemTy = Ptr.getFieldDesc()->isArray()
2108 ? Ptr.getFieldDesc()->getElemQualType()
2109 : Ptr.getFieldDesc()->getType();
2110 bool IsRawByte = ID == Builtin::BImemchr || ID == Builtin::BI__builtin_memchr;
2111
2112 // Give up on byte-oriented matching against multibyte elements.
2113 if (IsRawByte && !isOneByteCharacterType(T: ElemTy)) {
2114 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
2115 DiagId: diag::note_constexpr_memchr_unsupported)
2116 << S.getASTContext().BuiltinInfo.getQuotedName(ID) << ElemTy;
2117 return false;
2118 }
2119
2120 if (!isReadable(P: Ptr))
2121 return false;
2122
2123 if (ID == Builtin::BIstrchr || ID == Builtin::BI__builtin_strchr) {
2124 int64_t DesiredTrunc;
2125 if (S.getASTContext().CharTy->isSignedIntegerType())
2126 DesiredTrunc =
2127 Desired.trunc(width: S.getASTContext().getCharWidth()).getSExtValue();
2128 else
2129 DesiredTrunc =
2130 Desired.trunc(width: S.getASTContext().getCharWidth()).getZExtValue();
2131 // strchr compares directly to the passed integer, and therefore
2132 // always fails if given an int that is not a char.
2133 if (Desired != DesiredTrunc) {
2134 S.Stk.push<Pointer>();
2135 return true;
2136 }
2137 }
2138
2139 uint64_t DesiredVal;
2140 if (ID == Builtin::BIwmemchr || ID == Builtin::BI__builtin_wmemchr ||
2141 ID == Builtin::BIwcschr || ID == Builtin::BI__builtin_wcschr) {
2142 // wcschr and wmemchr are given a wchar_t to look for. Just use it.
2143 DesiredVal = Desired.getZExtValue();
2144 } else {
2145 DesiredVal = Desired.trunc(width: S.getASTContext().getCharWidth()).getZExtValue();
2146 }
2147
2148 bool StopAtZero =
2149 (ID == Builtin::BIstrchr || ID == Builtin::BI__builtin_strchr ||
2150 ID == Builtin::BIwcschr || ID == Builtin::BI__builtin_wcschr);
2151
2152 PrimType ElemT =
2153 IsRawByte ? PT_Sint8 : *S.getContext().classify(T: getElemType(P: Ptr));
2154
2155 size_t Index = Ptr.getIndex();
2156 size_t Step = 0;
2157 for (;;) {
2158 const Pointer &ElemPtr =
2159 (Index + Step) > 0 ? Ptr.atIndex(Idx: Index + Step) : Ptr;
2160
2161 if (!CheckLoad(S, OpPC, Ptr: ElemPtr))
2162 return false;
2163
2164 uint64_t V;
2165 INT_TYPE_SWITCH_NO_BOOL(
2166 ElemT, { V = static_cast<uint64_t>(ElemPtr.deref<T>().toUnsigned()); });
2167
2168 if (V == DesiredVal) {
2169 S.Stk.push<Pointer>(Args: ElemPtr);
2170 return true;
2171 }
2172
2173 if (StopAtZero && V == 0)
2174 break;
2175
2176 ++Step;
2177 if (MaxLength && Step == MaxLength->getZExtValue())
2178 break;
2179 }
2180
2181 S.Stk.push<Pointer>();
2182 return true;
2183}
2184
2185static std::optional<unsigned> computeFullDescSize(const ASTContext &ASTCtx,
2186 const Descriptor *Desc) {
2187 if (Desc->isPrimitive())
2188 return ASTCtx.getTypeSizeInChars(T: Desc->getType()).getQuantity();
2189 if (Desc->isArray())
2190 return ASTCtx.getTypeSizeInChars(T: Desc->getElemQualType()).getQuantity() *
2191 Desc->getNumElems();
2192 if (Desc->isRecord()) {
2193 // Can't use Descriptor::getType() as that may return a pointer type. Look
2194 // at the decl directly.
2195 return ASTCtx
2196 .getTypeSizeInChars(
2197 T: ASTCtx.getCanonicalTagType(TD: Desc->ElemRecord->getDecl()))
2198 .getQuantity();
2199 }
2200
2201 return std::nullopt;
2202}
2203
2204/// Compute the byte offset of \p Ptr in the full declaration.
2205static unsigned computePointerOffset(const ASTContext &ASTCtx,
2206 const Pointer &Ptr) {
2207 unsigned Result = 0;
2208
2209 Pointer P = Ptr;
2210 while (P.isField() || P.isArrayElement()) {
2211 P = P.expand();
2212 const Descriptor *D = P.getFieldDesc();
2213
2214 if (P.isArrayElement()) {
2215 unsigned ElemSize =
2216 ASTCtx.getTypeSizeInChars(T: D->getElemQualType()).getQuantity();
2217 if (P.isOnePastEnd())
2218 Result += ElemSize * P.getNumElems();
2219 else
2220 Result += ElemSize * P.getIndex();
2221 P = P.expand().getArray();
2222 } else if (P.isBaseClass()) {
2223 const auto *RD = cast<CXXRecordDecl>(Val: D->asDecl());
2224 bool IsVirtual = Ptr.isVirtualBaseClass();
2225 P = P.getBase();
2226 const Record *BaseRecord = P.getRecord();
2227
2228 const ASTRecordLayout &Layout =
2229 ASTCtx.getASTRecordLayout(D: cast<CXXRecordDecl>(Val: BaseRecord->getDecl()));
2230 if (IsVirtual)
2231 Result += Layout.getVBaseClassOffset(VBase: RD).getQuantity();
2232 else
2233 Result += Layout.getBaseClassOffset(Base: RD).getQuantity();
2234 } else if (P.isField()) {
2235 const FieldDecl *FD = P.getField();
2236 const ASTRecordLayout &Layout =
2237 ASTCtx.getASTRecordLayout(D: FD->getParent());
2238 unsigned FieldIndex = FD->getFieldIndex();
2239 uint64_t FieldOffset =
2240 ASTCtx.toCharUnitsFromBits(BitSize: Layout.getFieldOffset(FieldNo: FieldIndex))
2241 .getQuantity();
2242 Result += FieldOffset;
2243 P = P.getBase();
2244 } else
2245 llvm_unreachable("Unhandled descriptor type");
2246 }
2247
2248 return Result;
2249}
2250
2251/// Does Ptr point to the last subobject?
2252static bool pointsToLastObject(const Pointer &Ptr) {
2253 Pointer P = Ptr;
2254 while (!P.isRoot()) {
2255
2256 if (P.isArrayElement()) {
2257 P = P.expand().getArray();
2258 continue;
2259 }
2260 if (P.isBaseClass()) {
2261 if (P.getRecord()->getNumFields() > 0)
2262 return false;
2263 P = P.getBase();
2264 continue;
2265 }
2266
2267 Pointer Base = P.getBase();
2268 if (const Record *R = Base.getRecord()) {
2269 assert(P.getField());
2270 if (P.getField()->getFieldIndex() != R->getNumFields() - 1)
2271 return false;
2272 }
2273 P = Base;
2274 }
2275
2276 return true;
2277}
2278
2279/// Does Ptr point to the last object AND to a flexible array member?
2280static bool isUserWritingOffTheEnd(const ASTContext &Ctx, const Pointer &Ptr,
2281 bool InvalidBase) {
2282 auto isFlexibleArrayMember = [&](const Descriptor *FieldDesc) {
2283 using FAMKind = LangOptions::StrictFlexArraysLevelKind;
2284 FAMKind StrictFlexArraysLevel =
2285 Ctx.getLangOpts().getStrictFlexArraysLevel();
2286
2287 if (StrictFlexArraysLevel == FAMKind::Default)
2288 return true;
2289
2290 unsigned NumElems = FieldDesc->getNumElems();
2291 if (NumElems == 0 && StrictFlexArraysLevel != FAMKind::IncompleteOnly)
2292 return true;
2293
2294 if (NumElems == 1 && StrictFlexArraysLevel == FAMKind::OneZeroOrIncomplete)
2295 return true;
2296 return false;
2297 };
2298
2299 const Descriptor *FieldDesc = Ptr.getFieldDesc();
2300 if (!FieldDesc->isArray())
2301 return false;
2302
2303 return InvalidBase && pointsToLastObject(Ptr) &&
2304 isFlexibleArrayMember(FieldDesc);
2305}
2306
2307UnsignedOrNone evaluateBuiltinObjectSize(const ASTContext &ASTCtx,
2308 unsigned Kind, Pointer &Ptr) {
2309 if (Ptr.isZero() || !Ptr.isBlockPointer())
2310 return std::nullopt;
2311
2312 if (Ptr.isDummy() && Ptr.getType()->isPointerType())
2313 return std::nullopt;
2314
2315 bool InvalidBase = false;
2316
2317 if (Ptr.isDummy()) {
2318 if (const VarDecl *VD = Ptr.getDeclDesc()->asVarDecl();
2319 VD && VD->getType()->isPointerType())
2320 InvalidBase = true;
2321 }
2322
2323 // According to the GCC documentation, we want the size of the subobject
2324 // denoted by the pointer. But that's not quite right -- what we actually
2325 // want is the size of the immediately-enclosing array, if there is one.
2326 if (Ptr.isArrayElement())
2327 Ptr = Ptr.expand();
2328
2329 bool DetermineForCompleteObject = Ptr.getFieldDesc() == Ptr.getDeclDesc();
2330 const Descriptor *DeclDesc = Ptr.getDeclDesc();
2331 assert(DeclDesc);
2332
2333 bool UseFieldDesc = (Kind & 1u);
2334 bool ReportMinimum = (Kind & 2u);
2335 if (!UseFieldDesc || DetermineForCompleteObject) {
2336 // Lower bound, so we can't fall back to this.
2337 if (ReportMinimum && UseFieldDesc && !DetermineForCompleteObject)
2338 return std::nullopt;
2339
2340 // Can't read beyond the pointer decl desc.
2341 if (!UseFieldDesc && !ReportMinimum && DeclDesc->getType()->isPointerType())
2342 return std::nullopt;
2343
2344 if (InvalidBase)
2345 return std::nullopt;
2346 } else {
2347 if (isUserWritingOffTheEnd(Ctx: ASTCtx, Ptr, InvalidBase)) {
2348 // If we cannot determine the size of the initial allocation, then we
2349 // can't given an accurate upper-bound. However, we are still able to give
2350 // conservative lower-bounds for Type=3.
2351 if (Kind == 1)
2352 return std::nullopt;
2353 }
2354 }
2355
2356 // The "closest surrounding subobject" is NOT a base class,
2357 // so strip the base class casts.
2358 if (UseFieldDesc && Ptr.isBaseClass())
2359 Ptr = Ptr.stripBaseCasts();
2360
2361 const Descriptor *Desc = UseFieldDesc ? Ptr.getFieldDesc() : DeclDesc;
2362 assert(Desc);
2363
2364 std::optional<unsigned> FullSize = computeFullDescSize(ASTCtx, Desc);
2365 if (!FullSize)
2366 return std::nullopt;
2367
2368 unsigned ByteOffset;
2369 if (UseFieldDesc) {
2370 if (Ptr.isBaseClass()) {
2371 assert(computePointerOffset(ASTCtx, Ptr.getBase()) <=
2372 computePointerOffset(ASTCtx, Ptr));
2373 ByteOffset = computePointerOffset(ASTCtx, Ptr: Ptr.getBase()) -
2374 computePointerOffset(ASTCtx, Ptr);
2375 } else {
2376 if (Ptr.inArray())
2377 ByteOffset =
2378 computePointerOffset(ASTCtx, Ptr) -
2379 computePointerOffset(ASTCtx, Ptr: Ptr.expand().atIndex(Idx: 0).narrow());
2380 else
2381 ByteOffset = 0;
2382 }
2383 } else
2384 ByteOffset = computePointerOffset(ASTCtx, Ptr);
2385
2386 assert(ByteOffset <= *FullSize);
2387 return *FullSize - ByteOffset;
2388}
2389
2390static bool interp__builtin_object_size(InterpState &S, CodePtr OpPC,
2391 const InterpFrame *Frame,
2392 const CallExpr *Call) {
2393 const ASTContext &ASTCtx = S.getASTContext();
2394 // From the GCC docs:
2395 // Kind is an integer constant from 0 to 3. If the least significant bit is
2396 // clear, objects are whole variables. If it is set, a closest surrounding
2397 // subobject is considered the object a pointer points to. The second bit
2398 // determines if maximum or minimum of remaining bytes is computed.
2399 unsigned Kind = popToUInt64(S, E: Call->getArg(Arg: 1));
2400 assert(Kind <= 3 && "unexpected kind");
2401 Pointer Ptr = S.Stk.pop<Pointer>();
2402
2403 if (Call->getArg(Arg: 0)->HasSideEffects(Ctx: ASTCtx)) {
2404 // "If there are any side effects in them, it returns (size_t) -1
2405 // for type 0 or 1 and (size_t) 0 for type 2 or 3."
2406 pushInteger(S, Val: Kind <= 1 ? -1 : 0, QT: Call->getType());
2407 return true;
2408 }
2409
2410 if (auto Result = evaluateBuiltinObjectSize(ASTCtx, Kind, Ptr)) {
2411 pushInteger(S, Val: *Result, QT: Call->getType());
2412 return true;
2413 }
2414 return false;
2415}
2416
2417static bool interp__builtin_is_within_lifetime(InterpState &S, CodePtr OpPC,
2418 const CallExpr *Call) {
2419
2420 if (!S.inConstantContext())
2421 return false;
2422
2423 const Pointer &Ptr = S.Stk.pop<Pointer>();
2424
2425 auto Error = [&](int Diag) {
2426 bool CalledFromStd = false;
2427 const auto *Callee = S.Current->getCallee();
2428 if (Callee && Callee->isInStdNamespace()) {
2429 const IdentifierInfo *Identifier = Callee->getIdentifier();
2430 CalledFromStd = Identifier && Identifier->isStr(Str: "is_within_lifetime");
2431 }
2432 S.CCEDiag(SI: CalledFromStd
2433 ? S.Current->Caller->getSource(PC: S.Current->getRetPC())
2434 : S.Current->getSource(PC: OpPC),
2435 DiagId: diag::err_invalid_is_within_lifetime)
2436 << (CalledFromStd ? "std::is_within_lifetime"
2437 : "__builtin_is_within_lifetime")
2438 << Diag;
2439 return false;
2440 };
2441
2442 if (Ptr.isZero())
2443 return Error(0);
2444 if (Ptr.isOnePastEnd())
2445 return Error(1);
2446
2447 bool Result = Ptr.getLifetime() != Lifetime::Ended;
2448 if (!Ptr.isActive()) {
2449 Result = false;
2450 } else {
2451 if (!CheckLive(S, OpPC, Ptr, AK: AK_Read))
2452 return false;
2453 if (!CheckMutable(S, OpPC, Ptr))
2454 return false;
2455 if (!CheckDummy(S, OpPC, B: Ptr.block(), AK: AK_Read))
2456 return false;
2457 }
2458
2459 // Check if we're currently running an initializer.
2460 if (llvm::is_contained(Range&: S.InitializingBlocks, Element: Ptr.block()))
2461 return Error(2);
2462 if (S.EvaluatingDecl && Ptr.getDeclDesc()->asVarDecl() == S.EvaluatingDecl)
2463 return Error(2);
2464
2465 pushInteger(S, Val: Result, QT: Call->getType());
2466 return true;
2467}
2468
2469static bool interp__builtin_elementwise_int_unaryop(
2470 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2471 llvm::function_ref<APInt(const APSInt &)> Fn) {
2472 assert(Call->getNumArgs() == 1);
2473
2474 // Single integer case.
2475 if (!Call->getArg(Arg: 0)->getType()->isVectorType()) {
2476 assert(Call->getType()->isIntegerType());
2477 APSInt Src = popToAPSInt(S, E: Call->getArg(Arg: 0));
2478 APInt Result = Fn(Src);
2479 pushInteger(S, Val: APSInt(std::move(Result), !Src.isSigned()), QT: Call->getType());
2480 return true;
2481 }
2482
2483 // Vector case.
2484 const Pointer &Arg = S.Stk.pop<Pointer>();
2485 assert(Arg.getFieldDesc()->isPrimitiveArray());
2486 const Pointer &Dst = S.Stk.peek<Pointer>();
2487 assert(Dst.getFieldDesc()->isPrimitiveArray());
2488 assert(Arg.getFieldDesc()->getNumElems() ==
2489 Dst.getFieldDesc()->getNumElems());
2490
2491 QualType ElemType = Arg.getFieldDesc()->getElemQualType();
2492 PrimType ElemT = *S.getContext().classify(T: ElemType);
2493 unsigned NumElems = Arg.getNumElems();
2494 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2495
2496 for (unsigned I = 0; I != NumElems; ++I) {
2497 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2498 APSInt Src = Arg.elem<T>(I).toAPSInt();
2499 APInt Result = Fn(Src);
2500 Dst.elem<T>(I) = static_cast<T>(APSInt(std::move(Result), DestUnsigned));
2501 });
2502 }
2503 Dst.initializeAllElements();
2504
2505 return true;
2506}
2507
2508static bool interp__builtin_elementwise_fp_binop(
2509 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2510 llvm::function_ref<std::optional<APFloat>(
2511 const APFloat &, const APFloat &, std::optional<APSInt> RoundingMode)>
2512 Fn,
2513 bool IsScalar = false) {
2514 assert((Call->getNumArgs() == 2) || (Call->getNumArgs() == 3));
2515 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2516 assert(VT->getElementType()->isFloatingType());
2517 unsigned NumElems = VT->getNumElements();
2518
2519 // Vector case.
2520 assert(Call->getArg(0)->getType()->isVectorType() &&
2521 Call->getArg(1)->getType()->isVectorType());
2522 assert(VT->getElementType() ==
2523 Call->getArg(1)->getType()->castAs<VectorType>()->getElementType());
2524 assert(VT->getNumElements() ==
2525 Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements());
2526
2527 std::optional<APSInt> RoundingMode = std::nullopt;
2528 if (Call->getNumArgs() == 3)
2529 RoundingMode = popToAPSInt(S, E: Call->getArg(Arg: 2));
2530
2531 const Pointer &BPtr = S.Stk.pop<Pointer>();
2532 const Pointer &APtr = S.Stk.pop<Pointer>();
2533 const Pointer &Dst = S.Stk.peek<Pointer>();
2534 for (unsigned ElemIdx = 0; ElemIdx != NumElems; ++ElemIdx) {
2535 using T = PrimConv<PT_Float>::T;
2536 if (IsScalar && ElemIdx > 0) {
2537 Dst.elem<T>(I: ElemIdx) = APtr.elem<T>(I: ElemIdx);
2538 continue;
2539 }
2540 APFloat ElemA = APtr.elem<T>(I: ElemIdx).getAPFloat();
2541 APFloat ElemB = BPtr.elem<T>(I: ElemIdx).getAPFloat();
2542 std::optional<APFloat> Result = Fn(ElemA, ElemB, RoundingMode);
2543 if (!Result)
2544 return false;
2545 Dst.elem<T>(I: ElemIdx) = static_cast<T>(*Result);
2546 }
2547
2548 Dst.initializeAllElements();
2549
2550 return true;
2551}
2552
2553static bool interp__builtin_scalar_fp_round_mask_binop(
2554 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2555 llvm::function_ref<std::optional<APFloat>(const APFloat &, const APFloat &,
2556 std::optional<APSInt>)>
2557 Fn) {
2558 assert(Call->getNumArgs() == 5);
2559 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2560 unsigned NumElems = VT->getNumElements();
2561
2562 APSInt RoundingMode = popToAPSInt(S, E: Call->getArg(Arg: 4));
2563 uint64_t MaskVal = popToUInt64(S, E: Call->getArg(Arg: 3));
2564 const Pointer &SrcPtr = S.Stk.pop<Pointer>();
2565 const Pointer &BPtr = S.Stk.pop<Pointer>();
2566 const Pointer &APtr = S.Stk.pop<Pointer>();
2567 const Pointer &Dst = S.Stk.peek<Pointer>();
2568
2569 using T = PrimConv<PT_Float>::T;
2570
2571 if (MaskVal & 1) {
2572 APFloat ElemA = APtr.elem<T>(I: 0).getAPFloat();
2573 APFloat ElemB = BPtr.elem<T>(I: 0).getAPFloat();
2574 std::optional<APFloat> Result = Fn(ElemA, ElemB, RoundingMode);
2575 if (!Result)
2576 return false;
2577 Dst.elem<T>(I: 0) = static_cast<T>(*Result);
2578 } else {
2579 Dst.elem<T>(I: 0) = SrcPtr.elem<T>(I: 0);
2580 }
2581
2582 for (unsigned I = 1; I < NumElems; ++I)
2583 Dst.elem<T>(I) = APtr.elem<T>(I);
2584
2585 Dst.initializeAllElements();
2586
2587 return true;
2588}
2589
2590static bool interp__builtin_elementwise_int_binop(
2591 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2592 llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
2593 assert(Call->getNumArgs() == 2);
2594
2595 // Single integer case.
2596 if (!Call->getArg(Arg: 0)->getType()->isVectorType()) {
2597 assert(!Call->getArg(1)->getType()->isVectorType());
2598 APSInt RHS = popToAPSInt(S, E: Call->getArg(Arg: 1));
2599 APSInt LHS = popToAPSInt(S, E: Call->getArg(Arg: 0));
2600 APInt Result = Fn(LHS, RHS);
2601 pushInteger(S, Val: APSInt(std::move(Result), !LHS.isSigned()), QT: Call->getType());
2602 return true;
2603 }
2604
2605 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2606 assert(VT->getElementType()->isIntegralOrEnumerationType());
2607 PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2608 unsigned NumElems = VT->getNumElements();
2609 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2610
2611 // Vector + Scalar case.
2612 if (!Call->getArg(Arg: 1)->getType()->isVectorType()) {
2613 assert(Call->getArg(1)->getType()->isIntegralOrEnumerationType());
2614
2615 APSInt RHS = popToAPSInt(S, E: Call->getArg(Arg: 1));
2616 const Pointer &LHS = S.Stk.pop<Pointer>();
2617 const Pointer &Dst = S.Stk.peek<Pointer>();
2618
2619 for (unsigned I = 0; I != NumElems; ++I) {
2620 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2621 Dst.elem<T>(I) = static_cast<T>(
2622 APSInt(Fn(LHS.elem<T>(I).toAPSInt(), RHS), DestUnsigned));
2623 });
2624 }
2625 Dst.initializeAllElements();
2626 return true;
2627 }
2628
2629 // Vector case.
2630 assert(Call->getArg(0)->getType()->isVectorType() &&
2631 Call->getArg(1)->getType()->isVectorType());
2632 assert(VT->getElementType() ==
2633 Call->getArg(1)->getType()->castAs<VectorType>()->getElementType());
2634 assert(VT->getNumElements() ==
2635 Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements());
2636 assert(VT->getElementType()->isIntegralOrEnumerationType());
2637
2638 const Pointer &RHS = S.Stk.pop<Pointer>();
2639 const Pointer &LHS = S.Stk.pop<Pointer>();
2640 const Pointer &Dst = S.Stk.peek<Pointer>();
2641 for (unsigned I = 0; I != NumElems; ++I) {
2642 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2643 APSInt Elem1 = LHS.elem<T>(I).toAPSInt();
2644 APSInt Elem2 = RHS.elem<T>(I).toAPSInt();
2645 Dst.elem<T>(I) = static_cast<T>(APSInt(Fn(Elem1, Elem2), DestUnsigned));
2646 });
2647 }
2648 Dst.initializeAllElements();
2649
2650 return true;
2651}
2652
2653static bool
2654interp__builtin_x86_pack(InterpState &S, CodePtr, const CallExpr *E,
2655 llvm::function_ref<APInt(const APSInt &)> PackFn) {
2656 const auto *VT0 = E->getArg(Arg: 0)->getType()->castAs<VectorType>();
2657 [[maybe_unused]] const auto *VT1 =
2658 E->getArg(Arg: 1)->getType()->castAs<VectorType>();
2659 assert(VT0 && VT1 && "pack builtin VT0 and VT1 must be VectorType");
2660 assert(VT0->getElementType() == VT1->getElementType() &&
2661 VT0->getNumElements() == VT1->getNumElements() &&
2662 "pack builtin VT0 and VT1 ElementType must be same");
2663
2664 const Pointer &RHS = S.Stk.pop<Pointer>();
2665 const Pointer &LHS = S.Stk.pop<Pointer>();
2666 const Pointer &Dst = S.Stk.peek<Pointer>();
2667
2668 const ASTContext &ASTCtx = S.getASTContext();
2669 unsigned SrcBits = ASTCtx.getIntWidth(T: VT0->getElementType());
2670 unsigned LHSVecLen = VT0->getNumElements();
2671 unsigned SrcPerLane = 128 / SrcBits;
2672 unsigned Lanes = LHSVecLen * SrcBits / 128;
2673
2674 PrimType SrcT = *S.getContext().classify(T: VT0->getElementType());
2675 PrimType DstT = *S.getContext().classify(T: getElemType(P: Dst));
2676 bool IsUnsigend = getElemType(P: Dst)->isUnsignedIntegerType();
2677
2678 for (unsigned Lane = 0; Lane != Lanes; ++Lane) {
2679 unsigned BaseSrc = Lane * SrcPerLane;
2680 unsigned BaseDst = Lane * (2 * SrcPerLane);
2681
2682 for (unsigned I = 0; I != SrcPerLane; ++I) {
2683 INT_TYPE_SWITCH_NO_BOOL(SrcT, {
2684 APSInt A = LHS.elem<T>(BaseSrc + I).toAPSInt();
2685 APSInt B = RHS.elem<T>(BaseSrc + I).toAPSInt();
2686
2687 assignInteger(S, Dst.atIndex(BaseDst + I), DstT,
2688 APSInt(PackFn(A), IsUnsigend));
2689 assignInteger(S, Dst.atIndex(BaseDst + SrcPerLane + I), DstT,
2690 APSInt(PackFn(B), IsUnsigend));
2691 });
2692 }
2693 }
2694
2695 Dst.initializeAllElements();
2696 return true;
2697}
2698
2699static bool interp__builtin_elementwise_maxmin(InterpState &S, CodePtr OpPC,
2700 const CallExpr *Call,
2701 unsigned BuiltinID) {
2702 assert(Call->getNumArgs() == 2);
2703
2704 QualType Arg0Type = Call->getArg(Arg: 0)->getType();
2705
2706 // TODO: Support floating-point types.
2707 if (!(Arg0Type->isIntegerType() ||
2708 (Arg0Type->isVectorType() &&
2709 Arg0Type->castAs<VectorType>()->getElementType()->isIntegerType())))
2710 return false;
2711
2712 if (!Arg0Type->isVectorType()) {
2713 assert(!Call->getArg(1)->getType()->isVectorType());
2714 APSInt RHS = popToAPSInt(S, E: Call->getArg(Arg: 1));
2715 APSInt LHS = popToAPSInt(S, T: Arg0Type);
2716 APInt Result;
2717 if (BuiltinID == Builtin::BI__builtin_elementwise_max) {
2718 Result = std::max(a: LHS, b: RHS);
2719 } else if (BuiltinID == Builtin::BI__builtin_elementwise_min) {
2720 Result = std::min(a: LHS, b: RHS);
2721 } else {
2722 llvm_unreachable("Wrong builtin ID");
2723 }
2724
2725 pushInteger(S, Val: APSInt(Result, !LHS.isSigned()), QT: Call->getType());
2726 return true;
2727 }
2728
2729 // Vector case.
2730 assert(Call->getArg(0)->getType()->isVectorType() &&
2731 Call->getArg(1)->getType()->isVectorType());
2732 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2733 assert(VT->getElementType() ==
2734 Call->getArg(1)->getType()->castAs<VectorType>()->getElementType());
2735 assert(VT->getNumElements() ==
2736 Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements());
2737 assert(VT->getElementType()->isIntegralOrEnumerationType());
2738
2739 const Pointer &RHS = S.Stk.pop<Pointer>();
2740 const Pointer &LHS = S.Stk.pop<Pointer>();
2741 const Pointer &Dst = S.Stk.peek<Pointer>();
2742 PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2743 unsigned NumElems = VT->getNumElements();
2744 for (unsigned I = 0; I != NumElems; ++I) {
2745 APSInt Elem1;
2746 APSInt Elem2;
2747 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2748 Elem1 = LHS.elem<T>(I).toAPSInt();
2749 Elem2 = RHS.elem<T>(I).toAPSInt();
2750 });
2751
2752 APSInt Result;
2753 if (BuiltinID == Builtin::BI__builtin_elementwise_max) {
2754 Result = APSInt(std::max(a: Elem1, b: Elem2),
2755 Call->getType()->isUnsignedIntegerOrEnumerationType());
2756 } else if (BuiltinID == Builtin::BI__builtin_elementwise_min) {
2757 Result = APSInt(std::min(a: Elem1, b: Elem2),
2758 Call->getType()->isUnsignedIntegerOrEnumerationType());
2759 } else {
2760 llvm_unreachable("Wrong builtin ID");
2761 }
2762
2763 INT_TYPE_SWITCH_NO_BOOL(ElemT,
2764 { Dst.elem<T>(I) = static_cast<T>(Result); });
2765 }
2766 Dst.initializeAllElements();
2767
2768 return true;
2769}
2770
2771static bool interp__builtin_ia32_pmul(
2772 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2773 llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &,
2774 const APSInt &)>
2775 Fn) {
2776 assert(Call->getArg(0)->getType()->isVectorType() &&
2777 Call->getArg(1)->getType()->isVectorType());
2778 const Pointer &RHS = S.Stk.pop<Pointer>();
2779 const Pointer &LHS = S.Stk.pop<Pointer>();
2780 const Pointer &Dst = S.Stk.peek<Pointer>();
2781
2782 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2783 PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2784 unsigned NumElems = VT->getNumElements();
2785 const auto *DestVT = Call->getType()->castAs<VectorType>();
2786 PrimType DestElemT = *S.getContext().classify(T: DestVT->getElementType());
2787 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2788
2789 unsigned DstElem = 0;
2790 for (unsigned I = 0; I != NumElems; I += 2) {
2791 APSInt Result;
2792 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2793 APSInt LoLHS = LHS.elem<T>(I).toAPSInt();
2794 APSInt HiLHS = LHS.elem<T>(I + 1).toAPSInt();
2795 APSInt LoRHS = RHS.elem<T>(I).toAPSInt();
2796 APSInt HiRHS = RHS.elem<T>(I + 1).toAPSInt();
2797 Result = APSInt(Fn(LoLHS, HiLHS, LoRHS, HiRHS), DestUnsigned);
2798 });
2799
2800 INT_TYPE_SWITCH_NO_BOOL(DestElemT,
2801 { Dst.elem<T>(DstElem) = static_cast<T>(Result); });
2802 ++DstElem;
2803 }
2804
2805 Dst.initializeAllElements();
2806 return true;
2807}
2808
2809static bool interp_builtin_horizontal_int_binop(
2810 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2811 llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
2812 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2813 PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2814 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2815
2816 const Pointer &RHS = S.Stk.pop<Pointer>();
2817 const Pointer &LHS = S.Stk.pop<Pointer>();
2818 const Pointer &Dst = S.Stk.peek<Pointer>();
2819 unsigned NumElts = VT->getNumElements();
2820 unsigned EltBits = S.getASTContext().getIntWidth(T: VT->getElementType());
2821 unsigned EltsPerLane = 128 / EltBits;
2822 unsigned Lanes = NumElts * EltBits / 128;
2823 unsigned DestIndex = 0;
2824
2825 for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
2826 unsigned LaneStart = Lane * EltsPerLane;
2827 for (unsigned I = 0; I < EltsPerLane; I += 2) {
2828 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2829 APSInt Elem1 = LHS.elem<T>(LaneStart + I).toAPSInt();
2830 APSInt Elem2 = LHS.elem<T>(LaneStart + I + 1).toAPSInt();
2831 APSInt ResL = APSInt(Fn(Elem1, Elem2), DestUnsigned);
2832 Dst.elem<T>(DestIndex++) = static_cast<T>(ResL);
2833 });
2834 }
2835
2836 for (unsigned I = 0; I < EltsPerLane; I += 2) {
2837 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2838 APSInt Elem1 = RHS.elem<T>(LaneStart + I).toAPSInt();
2839 APSInt Elem2 = RHS.elem<T>(LaneStart + I + 1).toAPSInt();
2840 APSInt ResR = APSInt(Fn(Elem1, Elem2), DestUnsigned);
2841 Dst.elem<T>(DestIndex++) = static_cast<T>(ResR);
2842 });
2843 }
2844 }
2845 Dst.initializeAllElements();
2846 return true;
2847}
2848
2849static bool interp_builtin_horizontal_fp_binop(
2850 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2851 llvm::function_ref<APFloat(const APFloat &, const APFloat &,
2852 llvm::RoundingMode)>
2853 Fn) {
2854 const Pointer &RHS = S.Stk.pop<Pointer>();
2855 const Pointer &LHS = S.Stk.pop<Pointer>();
2856 const Pointer &Dst = S.Stk.peek<Pointer>();
2857 FPOptions FPO = Call->getFPFeaturesInEffect(LO: S.Ctx.getLangOpts());
2858 llvm::RoundingMode RM = getRoundingMode(FPO);
2859 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2860
2861 unsigned NumElts = VT->getNumElements();
2862 unsigned EltBits = S.getASTContext().getTypeSize(T: VT->getElementType());
2863 unsigned NumLanes = NumElts * EltBits / 128;
2864 unsigned NumElemsPerLane = NumElts / NumLanes;
2865 unsigned HalfElemsPerLane = NumElemsPerLane / 2;
2866
2867 for (unsigned L = 0; L != NumElts; L += NumElemsPerLane) {
2868 using T = PrimConv<PT_Float>::T;
2869 for (unsigned E = 0; E != HalfElemsPerLane; ++E) {
2870 APFloat Elem1 = LHS.elem<T>(I: L + (2 * E) + 0).getAPFloat();
2871 APFloat Elem2 = LHS.elem<T>(I: L + (2 * E) + 1).getAPFloat();
2872 Dst.elem<T>(I: L + E) = static_cast<T>(Fn(Elem1, Elem2, RM));
2873 }
2874 for (unsigned E = 0; E != HalfElemsPerLane; ++E) {
2875 APFloat Elem1 = RHS.elem<T>(I: L + (2 * E) + 0).getAPFloat();
2876 APFloat Elem2 = RHS.elem<T>(I: L + (2 * E) + 1).getAPFloat();
2877 Dst.elem<T>(I: L + E + HalfElemsPerLane) =
2878 static_cast<T>(Fn(Elem1, Elem2, RM));
2879 }
2880 }
2881 Dst.initializeAllElements();
2882 return true;
2883}
2884
2885static bool interp__builtin_ia32_addsub(InterpState &S, CodePtr OpPC,
2886 const CallExpr *Call) {
2887 // Addsub: alternates between subtraction and addition
2888 // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i])
2889 const Pointer &RHS = S.Stk.pop<Pointer>();
2890 const Pointer &LHS = S.Stk.pop<Pointer>();
2891 const Pointer &Dst = S.Stk.peek<Pointer>();
2892 FPOptions FPO = Call->getFPFeaturesInEffect(LO: S.Ctx.getLangOpts());
2893 llvm::RoundingMode RM = getRoundingMode(FPO);
2894 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2895 unsigned NumElems = VT->getNumElements();
2896
2897 using T = PrimConv<PT_Float>::T;
2898 for (unsigned I = 0; I != NumElems; ++I) {
2899 APFloat LElem = LHS.elem<T>(I).getAPFloat();
2900 APFloat RElem = RHS.elem<T>(I).getAPFloat();
2901 if (I % 2 == 0) {
2902 // Even indices: subtract
2903 LElem.subtract(RHS: RElem, RM);
2904 } else {
2905 // Odd indices: add
2906 LElem.add(RHS: RElem, RM);
2907 }
2908 Dst.elem<T>(I) = static_cast<T>(LElem);
2909 }
2910 Dst.initializeAllElements();
2911 return true;
2912}
2913
2914static bool interp__builtin_ia32_pclmulqdq(InterpState &S, CodePtr OpPC,
2915 const CallExpr *Call) {
2916 // PCLMULQDQ: carry-less multiplication of selected 64-bit halves
2917 // imm8 bit 0: selects lower (0) or upper (1) 64 bits of first operand
2918 // imm8 bit 4: selects lower (0) or upper (1) 64 bits of second operand
2919 assert(Call->getArg(0)->getType()->isVectorType() &&
2920 Call->getArg(1)->getType()->isVectorType());
2921
2922 // Extract imm8 argument
2923 APSInt Imm8 = popToAPSInt(S, E: Call->getArg(Arg: 2));
2924 bool SelectUpperA = (Imm8 & 0x01) != 0;
2925 bool SelectUpperB = (Imm8 & 0x10) != 0;
2926
2927 const Pointer &RHS = S.Stk.pop<Pointer>();
2928 const Pointer &LHS = S.Stk.pop<Pointer>();
2929 const Pointer &Dst = S.Stk.peek<Pointer>();
2930
2931 const auto *VT = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
2932 PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2933 unsigned NumElems = VT->getNumElements();
2934 const auto *DestVT = Call->getType()->castAs<VectorType>();
2935 PrimType DestElemT = *S.getContext().classify(T: DestVT->getElementType());
2936 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2937
2938 // Process each 128-bit lane (2 elements at a time)
2939 for (unsigned Lane = 0; Lane < NumElems; Lane += 2) {
2940 APSInt A0, A1, B0, B1;
2941 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2942 A0 = LHS.elem<T>(Lane + 0).toAPSInt();
2943 A1 = LHS.elem<T>(Lane + 1).toAPSInt();
2944 B0 = RHS.elem<T>(Lane + 0).toAPSInt();
2945 B1 = RHS.elem<T>(Lane + 1).toAPSInt();
2946 });
2947
2948 // Select the appropriate 64-bit values based on imm8
2949 APInt A = SelectUpperA ? A1 : A0;
2950 APInt B = SelectUpperB ? B1 : B0;
2951
2952 // Extend both operands to 128 bits for carry-less multiplication
2953 APInt A128 = A.zext(width: 128);
2954 APInt B128 = B.zext(width: 128);
2955
2956 // Use APIntOps::clmul for carry-less multiplication
2957 APInt Result = llvm::APIntOps::clmul(LHS: A128, RHS: B128);
2958
2959 // Split the 128-bit result into two 64-bit halves
2960 APSInt ResultLow(Result.extractBits(numBits: 64, bitPosition: 0), DestUnsigned);
2961 APSInt ResultHigh(Result.extractBits(numBits: 64, bitPosition: 64), DestUnsigned);
2962
2963 INT_TYPE_SWITCH_NO_BOOL(DestElemT, {
2964 Dst.elem<T>(Lane + 0) = static_cast<T>(ResultLow);
2965 Dst.elem<T>(Lane + 1) = static_cast<T>(ResultHigh);
2966 });
2967 }
2968
2969 Dst.initializeAllElements();
2970 return true;
2971}
2972
2973static bool interp__builtin_elementwise_triop_fp(
2974 InterpState &S, CodePtr OpPC, const CallExpr *Call,
2975 llvm::function_ref<APFloat(const APFloat &, const APFloat &,
2976 const APFloat &, llvm::RoundingMode)>
2977 Fn) {
2978 assert(Call->getNumArgs() == 3);
2979
2980 FPOptions FPO = Call->getFPFeaturesInEffect(LO: S.Ctx.getLangOpts());
2981 llvm::RoundingMode RM = getRoundingMode(FPO);
2982 QualType Arg1Type = Call->getArg(Arg: 0)->getType();
2983 QualType Arg2Type = Call->getArg(Arg: 1)->getType();
2984 QualType Arg3Type = Call->getArg(Arg: 2)->getType();
2985
2986 // Non-vector floating point types.
2987 if (!Arg1Type->isVectorType()) {
2988 assert(!Arg2Type->isVectorType());
2989 assert(!Arg3Type->isVectorType());
2990 (void)Arg2Type;
2991 (void)Arg3Type;
2992
2993 const Floating &Z = S.Stk.pop<Floating>();
2994 const Floating &Y = S.Stk.pop<Floating>();
2995 const Floating &X = S.Stk.pop<Floating>();
2996 APFloat F = Fn(X.getAPFloat(), Y.getAPFloat(), Z.getAPFloat(), RM);
2997 Floating Result = S.allocFloat(Sem: X.getSemantics());
2998 Result.copy(F);
2999 S.Stk.push<Floating>(Args&: Result);
3000 return true;
3001 }
3002
3003 // Vector type.
3004 assert(Arg1Type->isVectorType() && Arg2Type->isVectorType() &&
3005 Arg3Type->isVectorType());
3006
3007 const VectorType *VecTy = Arg1Type->castAs<VectorType>();
3008 QualType ElemQT = VecTy->getElementType();
3009 unsigned NumElems = VecTy->getNumElements();
3010
3011 assert(ElemQT == Arg2Type->castAs<VectorType>()->getElementType() &&
3012 ElemQT == Arg3Type->castAs<VectorType>()->getElementType());
3013 assert(NumElems == Arg2Type->castAs<VectorType>()->getNumElements() &&
3014 NumElems == Arg3Type->castAs<VectorType>()->getNumElements());
3015 assert(ElemQT->isRealFloatingType());
3016 (void)ElemQT;
3017
3018 const Pointer &VZ = S.Stk.pop<Pointer>();
3019 const Pointer &VY = S.Stk.pop<Pointer>();
3020 const Pointer &VX = S.Stk.pop<Pointer>();
3021 const Pointer &Dst = S.Stk.peek<Pointer>();
3022 for (unsigned I = 0; I != NumElems; ++I) {
3023 using T = PrimConv<PT_Float>::T;
3024 APFloat X = VX.elem<T>(I).getAPFloat();
3025 APFloat Y = VY.elem<T>(I).getAPFloat();
3026 APFloat Z = VZ.elem<T>(I).getAPFloat();
3027 APFloat F = Fn(X, Y, Z, RM);
3028 Dst.elem<Floating>(I) = Floating(F);
3029 }
3030 Dst.initializeAllElements();
3031 return true;
3032}
3033
3034/// AVX512 predicated move: "Result = Mask[] ? LHS[] : RHS[]".
3035static bool interp__builtin_select(InterpState &S, CodePtr OpPC,
3036 const CallExpr *Call) {
3037 const Pointer &RHS = S.Stk.pop<Pointer>();
3038 const Pointer &LHS = S.Stk.pop<Pointer>();
3039 APSInt Mask = popToAPSInt(S, E: Call->getArg(Arg: 0));
3040 const Pointer &Dst = S.Stk.peek<Pointer>();
3041
3042 assert(LHS.getNumElems() == RHS.getNumElems());
3043 assert(LHS.getNumElems() == Dst.getNumElems());
3044 unsigned NumElems = LHS.getNumElems();
3045 PrimType ElemT = LHS.getFieldDesc()->getPrimType();
3046 PrimType DstElemT = Dst.getFieldDesc()->getPrimType();
3047
3048 for (unsigned I = 0; I != NumElems; ++I) {
3049 if (ElemT == PT_Float) {
3050 assert(DstElemT == PT_Float);
3051 Dst.elem<Floating>(I) =
3052 Mask[I] ? LHS.elem<Floating>(I) : RHS.elem<Floating>(I);
3053 } else {
3054 APSInt Elem;
3055 INT_TYPE_SWITCH(ElemT, {
3056 Elem = Mask[I] ? LHS.elem<T>(I).toAPSInt() : RHS.elem<T>(I).toAPSInt();
3057 });
3058 INT_TYPE_SWITCH_NO_BOOL(DstElemT,
3059 { Dst.elem<T>(I) = static_cast<T>(Elem); });
3060 }
3061 }
3062 Dst.initializeAllElements();
3063
3064 return true;
3065}
3066
3067/// Scalar variant of AVX512 predicated select:
3068/// Result[i] = (Mask bit 0) ? LHS[i] : RHS[i], but only element 0 may change.
3069/// All other elements are taken from RHS.
3070static bool interp__builtin_select_scalar(InterpState &S,
3071 const CallExpr *Call) {
3072 unsigned N =
3073 Call->getArg(Arg: 1)->getType()->castAs<VectorType>()->getNumElements();
3074
3075 const Pointer &W = S.Stk.pop<Pointer>();
3076 const Pointer &A = S.Stk.pop<Pointer>();
3077 APSInt U = popToAPSInt(S, E: Call->getArg(Arg: 0));
3078 const Pointer &Dst = S.Stk.peek<Pointer>();
3079
3080 bool TakeA0 = U.getZExtValue() & 1ULL;
3081
3082 for (unsigned I = TakeA0; I != N; ++I)
3083 Dst.elem<Floating>(I) = W.elem<Floating>(I);
3084 if (TakeA0)
3085 Dst.elem<Floating>(I: 0) = A.elem<Floating>(I: 0);
3086
3087 Dst.initializeAllElements();
3088 return true;
3089}
3090
3091static bool interp__builtin_ia32_test_op(
3092 InterpState &S, CodePtr OpPC, const CallExpr *Call,
3093 llvm::function_ref<bool(const APInt &A, const APInt &B)> Fn) {
3094 const Pointer &RHS = S.Stk.pop<Pointer>();
3095 const Pointer &LHS = S.Stk.pop<Pointer>();
3096
3097 assert(LHS.getNumElems() == RHS.getNumElems());
3098
3099 unsigned SourceLen = LHS.getNumElems();
3100 QualType ElemQT = getElemType(P: LHS);
3101 OptPrimType ElemPT = S.getContext().classify(T: ElemQT);
3102 unsigned LaneWidth = S.getASTContext().getTypeSize(T: ElemQT);
3103
3104 APInt AWide(LaneWidth * SourceLen, 0);
3105 APInt BWide(LaneWidth * SourceLen, 0);
3106
3107 for (unsigned I = 0; I != SourceLen; ++I) {
3108 APInt ALane;
3109 APInt BLane;
3110
3111 if (ElemQT->isIntegerType()) { // Get value.
3112 INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
3113 ALane = LHS.elem<T>(I).toAPSInt();
3114 BLane = RHS.elem<T>(I).toAPSInt();
3115 });
3116 } else if (ElemQT->isFloatingType()) { // Get only sign bit.
3117 using T = PrimConv<PT_Float>::T;
3118 ALane = LHS.elem<T>(I).getAPFloat().bitcastToAPInt().isNegative();
3119 BLane = RHS.elem<T>(I).getAPFloat().bitcastToAPInt().isNegative();
3120 } else { // Must be integer or floating type.
3121 return false;
3122 }
3123 AWide.insertBits(SubBits: ALane, bitPosition: I * LaneWidth);
3124 BWide.insertBits(SubBits: BLane, bitPosition: I * LaneWidth);
3125 }
3126 pushInteger(S, Val: Fn(AWide, BWide), QT: Call->getType());
3127 return true;
3128}
3129
3130static bool interp__builtin_ia32_movmsk_op(InterpState &S, CodePtr OpPC,
3131 const CallExpr *Call) {
3132 assert(Call->getNumArgs() == 1);
3133
3134 const Pointer &Source = S.Stk.pop<Pointer>();
3135
3136 unsigned SourceLen = Source.getNumElems();
3137 QualType ElemQT = getElemType(P: Source);
3138 OptPrimType ElemT = S.getContext().classify(T: ElemQT);
3139 unsigned ResultLen =
3140 S.getASTContext().getTypeSize(T: Call->getType()); // Always 32-bit integer.
3141 APInt Result(ResultLen, 0);
3142
3143 for (unsigned I = 0; I != SourceLen; ++I) {
3144 APInt Elem;
3145 if (ElemQT->isIntegerType()) {
3146 INT_TYPE_SWITCH_NO_BOOL(*ElemT, { Elem = Source.elem<T>(I).toAPSInt(); });
3147 } else if (ElemQT->isRealFloatingType()) {
3148 using T = PrimConv<PT_Float>::T;
3149 Elem = Source.elem<T>(I).getAPFloat().bitcastToAPInt();
3150 } else {
3151 return false;
3152 }
3153 Result.setBitVal(BitPosition: I, BitValue: Elem.isNegative());
3154 }
3155 pushInteger(S, Val: Result, QT: Call->getType());
3156 return true;
3157}
3158
3159static bool interp__builtin_elementwise_triop(
3160 InterpState &S, CodePtr OpPC, const CallExpr *Call,
3161 llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
3162 Fn) {
3163 assert(Call->getNumArgs() == 3);
3164
3165 QualType Arg0Type = Call->getArg(Arg: 0)->getType();
3166 QualType Arg2Type = Call->getArg(Arg: 2)->getType();
3167 // Non-vector integer types.
3168 if (!Arg0Type->isVectorType()) {
3169 const APSInt &Op2 = popToAPSInt(S, T: Arg2Type);
3170 const APSInt &Op1 = popToAPSInt(S, E: Call->getArg(Arg: 1));
3171 const APSInt &Op0 = popToAPSInt(S, T: Arg0Type);
3172 APSInt Result = APSInt(Fn(Op0, Op1, Op2), Op0.isUnsigned());
3173 pushInteger(S, Val: Result, QT: Call->getType());
3174 return true;
3175 }
3176
3177 const auto *VecT = Arg0Type->castAs<VectorType>();
3178 PrimType ElemT = *S.getContext().classify(T: VecT->getElementType());
3179 unsigned NumElems = VecT->getNumElements();
3180 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
3181
3182 // Vector + Vector + Scalar case.
3183 if (!Arg2Type->isVectorType()) {
3184 APSInt Op2 = popToAPSInt(S, T: Arg2Type);
3185
3186 const Pointer &Op1 = S.Stk.pop<Pointer>();
3187 const Pointer &Op0 = S.Stk.pop<Pointer>();
3188 const Pointer &Dst = S.Stk.peek<Pointer>();
3189 for (unsigned I = 0; I != NumElems; ++I) {
3190 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3191 Dst.elem<T>(I) = static_cast<T>(APSInt(
3192 Fn(Op0.elem<T>(I).toAPSInt(), Op1.elem<T>(I).toAPSInt(), Op2),
3193 DestUnsigned));
3194 });
3195 }
3196 Dst.initializeAllElements();
3197
3198 return true;
3199 }
3200
3201 // Vector type.
3202 const Pointer &Op2 = S.Stk.pop<Pointer>();
3203 const Pointer &Op1 = S.Stk.pop<Pointer>();
3204 const Pointer &Op0 = S.Stk.pop<Pointer>();
3205 const Pointer &Dst = S.Stk.peek<Pointer>();
3206 for (unsigned I = 0; I != NumElems; ++I) {
3207 APSInt Val0, Val1, Val2;
3208 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3209 Val0 = Op0.elem<T>(I).toAPSInt();
3210 Val1 = Op1.elem<T>(I).toAPSInt();
3211 Val2 = Op2.elem<T>(I).toAPSInt();
3212 });
3213 APSInt Result = APSInt(Fn(Val0, Val1, Val2), Val0.isUnsigned());
3214 INT_TYPE_SWITCH_NO_BOOL(ElemT,
3215 { Dst.elem<T>(I) = static_cast<T>(Result); });
3216 }
3217 Dst.initializeAllElements();
3218
3219 return true;
3220}
3221
3222static bool interp__builtin_x86_extract_vector(InterpState &S, CodePtr OpPC,
3223 const CallExpr *Call,
3224 unsigned ID) {
3225 assert(Call->getNumArgs() == 2);
3226
3227 APSInt ImmAPS = popToAPSInt(S, E: Call->getArg(Arg: 1));
3228 uint64_t Index = ImmAPS.getZExtValue();
3229
3230 const Pointer &Src = S.Stk.pop<Pointer>();
3231 if (!Src.getFieldDesc()->isPrimitiveArray())
3232 return false;
3233
3234 const Pointer &Dst = S.Stk.peek<Pointer>();
3235 if (!Dst.getFieldDesc()->isPrimitiveArray())
3236 return false;
3237
3238 unsigned SrcElems = Src.getNumElems();
3239 unsigned DstElems = Dst.getNumElems();
3240
3241 unsigned NumLanes = SrcElems / DstElems;
3242 unsigned Lane = static_cast<unsigned>(Index % NumLanes);
3243 unsigned ExtractPos = Lane * DstElems;
3244
3245 PrimType ElemT = Src.getFieldDesc()->getPrimType();
3246
3247 TYPE_SWITCH(ElemT, {
3248 for (unsigned I = 0; I != DstElems; ++I) {
3249 Dst.elem<T>(I) = Src.elem<T>(ExtractPos + I);
3250 }
3251 });
3252
3253 Dst.initializeAllElements();
3254 return true;
3255}
3256
3257static bool interp__builtin_x86_extract_vector_masked(InterpState &S,
3258 CodePtr OpPC,
3259 const CallExpr *Call,
3260 unsigned ID) {
3261 assert(Call->getNumArgs() == 4);
3262
3263 APSInt MaskAPS = popToAPSInt(S, E: Call->getArg(Arg: 3));
3264 const Pointer &Merge = S.Stk.pop<Pointer>();
3265 APSInt ImmAPS = popToAPSInt(S, E: Call->getArg(Arg: 1));
3266 const Pointer &Src = S.Stk.pop<Pointer>();
3267
3268 if (!Src.getFieldDesc()->isPrimitiveArray() ||
3269 !Merge.getFieldDesc()->isPrimitiveArray())
3270 return false;
3271
3272 const Pointer &Dst = S.Stk.peek<Pointer>();
3273 if (!Dst.getFieldDesc()->isPrimitiveArray())
3274 return false;
3275
3276 unsigned SrcElems = Src.getNumElems();
3277 unsigned DstElems = Dst.getNumElems();
3278
3279 unsigned NumLanes = SrcElems / DstElems;
3280 unsigned Lane = static_cast<unsigned>(ImmAPS.getZExtValue() % NumLanes);
3281 unsigned Base = Lane * DstElems;
3282
3283 PrimType ElemT = Src.getFieldDesc()->getPrimType();
3284
3285 TYPE_SWITCH(ElemT, {
3286 for (unsigned I = 0; I != DstElems; ++I) {
3287 if (MaskAPS[I])
3288 Dst.elem<T>(I) = Src.elem<T>(Base + I);
3289 else
3290 Dst.elem<T>(I) = Merge.elem<T>(I);
3291 }
3292 });
3293
3294 Dst.initializeAllElements();
3295 return true;
3296}
3297
3298static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
3299 const CallExpr *Call,
3300 unsigned ID) {
3301 assert(Call->getNumArgs() == 3);
3302
3303 APSInt ImmAPS = popToAPSInt(S, E: Call->getArg(Arg: 2));
3304 uint64_t Index = ImmAPS.getZExtValue();
3305
3306 const Pointer &SubVec = S.Stk.pop<Pointer>();
3307 if (!SubVec.getFieldDesc()->isPrimitiveArray())
3308 return false;
3309
3310 const Pointer &BaseVec = S.Stk.pop<Pointer>();
3311 if (!BaseVec.getFieldDesc()->isPrimitiveArray())
3312 return false;
3313
3314 const Pointer &Dst = S.Stk.peek<Pointer>();
3315
3316 unsigned BaseElements = BaseVec.getNumElems();
3317 unsigned SubElements = SubVec.getNumElems();
3318
3319 assert(SubElements != 0 && BaseElements != 0 &&
3320 (BaseElements % SubElements) == 0);
3321
3322 unsigned NumLanes = BaseElements / SubElements;
3323 unsigned Lane = static_cast<unsigned>(Index % NumLanes);
3324 unsigned InsertPos = Lane * SubElements;
3325
3326 PrimType ElemT = BaseVec.getFieldDesc()->getPrimType();
3327
3328 TYPE_SWITCH(ElemT, {
3329 for (unsigned I = 0; I != BaseElements; ++I)
3330 Dst.elem<T>(I) = BaseVec.elem<T>(I);
3331 for (unsigned I = 0; I != SubElements; ++I)
3332 Dst.elem<T>(InsertPos + I) = SubVec.elem<T>(I);
3333 });
3334
3335 Dst.initializeAllElements();
3336 return true;
3337}
3338
3339static bool interp__builtin_ia32_phminposuw(InterpState &S, CodePtr OpPC,
3340 const CallExpr *Call) {
3341 assert(Call->getNumArgs() == 1);
3342
3343 const Pointer &Source = S.Stk.pop<Pointer>();
3344 const Pointer &Dest = S.Stk.peek<Pointer>();
3345
3346 unsigned SourceLen = Source.getNumElems();
3347 QualType ElemQT = getElemType(P: Source);
3348 OptPrimType ElemT = S.getContext().classify(T: ElemQT);
3349 unsigned ElemBitWidth = S.getASTContext().getTypeSize(T: ElemQT);
3350
3351 bool DestUnsigned = Call->getCallReturnType(Ctx: S.getASTContext())
3352 ->castAs<VectorType>()
3353 ->getElementType()
3354 ->isUnsignedIntegerOrEnumerationType();
3355
3356 INT_TYPE_SWITCH_NO_BOOL(*ElemT, {
3357 APSInt MinIndex(ElemBitWidth, DestUnsigned);
3358 APSInt MinVal = Source.elem<T>(0).toAPSInt();
3359
3360 for (unsigned I = 1; I != SourceLen; ++I) {
3361 APSInt Val = Source.elem<T>(I).toAPSInt();
3362 if (MinVal.ugt(Val)) {
3363 MinVal = Val;
3364 MinIndex = I;
3365 }
3366 }
3367
3368 Dest.elem<T>(0) = static_cast<T>(MinVal);
3369 Dest.elem<T>(1) = static_cast<T>(MinIndex);
3370 for (unsigned I = 2; I != SourceLen; ++I) {
3371 Dest.elem<T>(I) = static_cast<T>(APSInt(ElemBitWidth, DestUnsigned));
3372 }
3373 });
3374 Dest.initializeAllElements();
3375 return true;
3376}
3377
3378static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC,
3379 const CallExpr *Call, bool MaskZ) {
3380 assert(Call->getNumArgs() == 5);
3381
3382 APInt U = popToAPSInt(S, E: Call->getArg(Arg: 4)); // Lane mask
3383 APInt Imm = popToAPSInt(S, E: Call->getArg(Arg: 3)); // Ternary truth table
3384 const Pointer &C = S.Stk.pop<Pointer>();
3385 const Pointer &B = S.Stk.pop<Pointer>();
3386 const Pointer &A = S.Stk.pop<Pointer>();
3387 const Pointer &Dst = S.Stk.peek<Pointer>();
3388
3389 unsigned DstLen = A.getNumElems();
3390 QualType ElemQT = getElemType(P: A);
3391 OptPrimType ElemT = S.getContext().classify(T: ElemQT);
3392 unsigned LaneWidth = S.getASTContext().getTypeSize(T: ElemQT);
3393 bool DstUnsigned = ElemQT->isUnsignedIntegerOrEnumerationType();
3394
3395 INT_TYPE_SWITCH_NO_BOOL(*ElemT, {
3396 for (unsigned I = 0; I != DstLen; ++I) {
3397 APInt ALane = A.elem<T>(I).toAPSInt();
3398 APInt BLane = B.elem<T>(I).toAPSInt();
3399 APInt CLane = C.elem<T>(I).toAPSInt();
3400 APInt RLane(LaneWidth, 0);
3401 if (U[I]) { // If lane not masked, compute ternary logic.
3402 for (unsigned Bit = 0; Bit != LaneWidth; ++Bit) {
3403 unsigned ABit = ALane[Bit];
3404 unsigned BBit = BLane[Bit];
3405 unsigned CBit = CLane[Bit];
3406 unsigned Idx = (ABit << 2) | (BBit << 1) | (CBit);
3407 RLane.setBitVal(Bit, Imm[Idx]);
3408 }
3409 Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
3410 } else if (MaskZ) { // If zero masked, zero the lane.
3411 Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
3412 } else { // Just masked, put in A lane.
3413 Dst.elem<T>(I) = static_cast<T>(APSInt(ALane, DstUnsigned));
3414 }
3415 }
3416 });
3417 Dst.initializeAllElements();
3418 return true;
3419}
3420
3421static bool interp__builtin_vec_ext(InterpState &S, CodePtr OpPC,
3422 const CallExpr *Call, unsigned ID) {
3423 assert(Call->getNumArgs() == 2);
3424
3425 APSInt ImmAPS = popToAPSInt(S, E: Call->getArg(Arg: 1));
3426 const Pointer &Vec = S.Stk.pop<Pointer>();
3427 if (!Vec.getFieldDesc()->isPrimitiveArray())
3428 return false;
3429
3430 unsigned NumElems = Vec.getNumElems();
3431 unsigned Index =
3432 static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElems - 1));
3433
3434 PrimType ElemT = Vec.getFieldDesc()->getPrimType();
3435 // FIXME(#161685): Replace float+int split with a numeric-only type switch
3436 if (ElemT == PT_Float) {
3437 S.Stk.push<Floating>(Args&: Vec.elem<Floating>(I: Index));
3438 return true;
3439 }
3440 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3441 APSInt V = Vec.elem<T>(Index).toAPSInt();
3442 pushInteger(S, V, Call->getType());
3443 });
3444
3445 return true;
3446}
3447
3448static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC,
3449 const CallExpr *Call, unsigned ID) {
3450 assert(Call->getNumArgs() == 3);
3451
3452 APSInt ImmAPS = popToAPSInt(S, E: Call->getArg(Arg: 2));
3453 APSInt ValAPS = popToAPSInt(S, E: Call->getArg(Arg: 1));
3454
3455 const Pointer &Base = S.Stk.pop<Pointer>();
3456 if (!Base.getFieldDesc()->isPrimitiveArray())
3457 return false;
3458
3459 const Pointer &Dst = S.Stk.peek<Pointer>();
3460
3461 unsigned NumElems = Base.getNumElems();
3462 unsigned Index =
3463 static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElems - 1));
3464
3465 PrimType ElemT = Base.getFieldDesc()->getPrimType();
3466 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3467 for (unsigned I = 0; I != NumElems; ++I)
3468 Dst.elem<T>(I) = Base.elem<T>(I);
3469 Dst.elem<T>(Index) = static_cast<T>(ValAPS);
3470 });
3471
3472 Dst.initializeAllElements();
3473 return true;
3474}
3475
3476static bool evalICmpImm(uint8_t Imm, const APSInt &A, const APSInt &B,
3477 bool IsUnsigned) {
3478 switch (Imm & 0x7) {
3479 case 0x00: // _MM_CMPINT_EQ
3480 return (A == B);
3481 case 0x01: // _MM_CMPINT_LT
3482 return IsUnsigned ? A.ult(RHS: B) : A.slt(RHS: B);
3483 case 0x02: // _MM_CMPINT_LE
3484 return IsUnsigned ? A.ule(RHS: B) : A.sle(RHS: B);
3485 case 0x03: // _MM_CMPINT_FALSE
3486 return false;
3487 case 0x04: // _MM_CMPINT_NE
3488 return (A != B);
3489 case 0x05: // _MM_CMPINT_NLT
3490 return IsUnsigned ? A.ugt(RHS: B) : A.sgt(RHS: B);
3491 case 0x06: // _MM_CMPINT_NLE
3492 return IsUnsigned ? A.uge(RHS: B) : A.sge(RHS: B);
3493 case 0x07: // _MM_CMPINT_TRUE
3494 return true;
3495 default:
3496 llvm_unreachable("Invalid Op");
3497 }
3498}
3499
3500static bool interp__builtin_ia32_cmp_mask(InterpState &S, CodePtr OpPC,
3501 const CallExpr *Call, unsigned ID,
3502 bool IsUnsigned) {
3503 assert(Call->getNumArgs() == 4);
3504
3505 APSInt Mask = popToAPSInt(S, E: Call->getArg(Arg: 3));
3506 APSInt Opcode = popToAPSInt(S, E: Call->getArg(Arg: 2));
3507 unsigned CmpOp = static_cast<unsigned>(Opcode.getZExtValue());
3508 const Pointer &RHS = S.Stk.pop<Pointer>();
3509 const Pointer &LHS = S.Stk.pop<Pointer>();
3510
3511 assert(LHS.getNumElems() == RHS.getNumElems());
3512
3513 APInt RetMask = APInt::getZero(numBits: LHS.getNumElems());
3514 unsigned VectorLen = LHS.getNumElems();
3515 PrimType ElemT = LHS.getFieldDesc()->getPrimType();
3516
3517 for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
3518 APSInt A, B;
3519 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3520 A = LHS.elem<T>(ElemNum).toAPSInt();
3521 B = RHS.elem<T>(ElemNum).toAPSInt();
3522 });
3523 RetMask.setBitVal(BitPosition: ElemNum,
3524 BitValue: Mask[ElemNum] && evalICmpImm(Imm: CmpOp, A, B, IsUnsigned));
3525 }
3526 pushInteger(S, Val: RetMask, QT: Call->getType());
3527 return true;
3528}
3529
3530static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
3531 const CallExpr *Call) {
3532 assert(Call->getNumArgs() == 1);
3533
3534 QualType Arg0Type = Call->getArg(Arg: 0)->getType();
3535 const auto *VecT = Arg0Type->castAs<VectorType>();
3536 PrimType ElemT = *S.getContext().classify(T: VecT->getElementType());
3537 unsigned NumElems = VecT->getNumElements();
3538 bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
3539 const Pointer &Src = S.Stk.pop<Pointer>();
3540 const Pointer &Dst = S.Stk.peek<Pointer>();
3541
3542 for (unsigned I = 0; I != NumElems; ++I) {
3543 INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3544 APSInt ElemI = Src.elem<T>(I).toAPSInt();
3545 APInt ConflictMask(ElemI.getBitWidth(), 0);
3546 for (unsigned J = 0; J != I; ++J) {
3547 APSInt ElemJ = Src.elem<T>(J).toAPSInt();
3548 ConflictMask.setBitVal(J, ElemI == ElemJ);
3549 }
3550 Dst.elem<T>(I) = static_cast<T>(APSInt(ConflictMask, DestUnsigned));
3551 });
3552 }
3553 Dst.initializeAllElements();
3554 return true;
3555}
3556
3557static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC,
3558 const CallExpr *Call,
3559 unsigned ID) {
3560 assert(Call->getNumArgs() == 1);
3561
3562 const Pointer &Vec = S.Stk.pop<Pointer>();
3563 unsigned RetWidth = S.getASTContext().getIntWidth(T: Call->getType());
3564 APInt RetMask(RetWidth, 0);
3565
3566 unsigned VectorLen = Vec.getNumElems();
3567 PrimType ElemT = Vec.getFieldDesc()->getPrimType();
3568
3569 for (unsigned ElemNum = 0; ElemNum != VectorLen; ++ElemNum) {
3570 APSInt A;
3571 INT_TYPE_SWITCH_NO_BOOL(ElemT, { A = Vec.elem<T>(ElemNum).toAPSInt(); });
3572 unsigned MSB = A[A.getBitWidth() - 1];
3573 RetMask.setBitVal(BitPosition: ElemNum, BitValue: MSB);
3574 }
3575 pushInteger(S, Val: RetMask, QT: Call->getType());
3576 return true;
3577}
3578
3579static bool interp__builtin_ia32_cvt_mask2vec(InterpState &S, CodePtr OpPC,
3580 const CallExpr *Call,
3581 unsigned ID) {
3582 assert(Call->getNumArgs() == 1);
3583
3584 APSInt Mask = popToAPSInt(S, E: Call->getArg(Arg: 0));
3585
3586 const Pointer &Vec = S.Stk.peek<Pointer>();
3587 unsigned NumElems = Vec.getNumElems();
3588 PrimType ElemT = Vec.getFieldDesc()->getPrimType();
3589
3590 for (unsigned I = 0; I != NumElems; ++I) {
3591 bool BitSet = Mask[I];
3592
3593 INT_TYPE_SWITCH_NO_BOOL(
3594 ElemT, { Vec.elem<T>(I) = BitSet ? T::from(-1) : T::from(0); });
3595 }
3596
3597 Vec.initializeAllElements();
3598
3599 return true;
3600}
3601
3602static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
3603 const CallExpr *Call,
3604 bool HasRoundingMask) {
3605 APSInt Rounding, MaskInt;
3606 Pointer Src, B, A;
3607
3608 if (HasRoundingMask) {
3609 assert(Call->getNumArgs() == 5);
3610 Rounding = popToAPSInt(S, E: Call->getArg(Arg: 4));
3611 MaskInt = popToAPSInt(S, E: Call->getArg(Arg: 3));
3612 Src = S.Stk.pop<Pointer>();
3613 B = S.Stk.pop<Pointer>();
3614 A = S.Stk.pop<Pointer>();
3615 if (!CheckLoad(S, OpPC, Ptr: A) || !CheckLoad(S, OpPC, Ptr: B) ||
3616 !CheckLoad(S, OpPC, Ptr: Src))
3617 return false;
3618 } else {
3619 assert(Call->getNumArgs() == 2);
3620 B = S.Stk.pop<Pointer>();
3621 A = S.Stk.pop<Pointer>();
3622 if (!CheckLoad(S, OpPC, Ptr: A) || !CheckLoad(S, OpPC, Ptr: B))
3623 return false;
3624 }
3625
3626 const auto *DstVTy = Call->getType()->castAs<VectorType>();
3627 unsigned NumElems = DstVTy->getNumElements();
3628 const Pointer &Dst = S.Stk.peek<Pointer>();
3629
3630 // Copy all elements except lane 0 (overwritten below) from A to Dst.
3631 for (unsigned I = 1; I != NumElems; ++I)
3632 Dst.elem<Floating>(I) = A.elem<Floating>(I);
3633
3634 // Convert element 0 from double to float, or use Src if masked off.
3635 if (!HasRoundingMask || (MaskInt.getZExtValue() & 0x1)) {
3636 assert(S.getASTContext().FloatTy == DstVTy->getElementType() &&
3637 "cvtsd2ss requires float element type in destination vector");
3638
3639 Floating Conv = S.allocFloat(
3640 Sem: S.getASTContext().getFloatTypeSemantics(T: DstVTy->getElementType()));
3641 APFloat SrcVal = B.elem<Floating>(I: 0).getAPFloat();
3642 if (!convertDoubleToFloatStrict(Src: SrcVal, Dst&: Conv, S, DiagExpr: Call))
3643 return false;
3644 Dst.elem<Floating>(I: 0) = Conv;
3645 } else {
3646 Dst.elem<Floating>(I: 0) = Src.elem<Floating>(I: 0);
3647 }
3648
3649 Dst.initializeAllElements();
3650 return true;
3651}
3652
3653static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
3654 const CallExpr *Call, bool IsMasked,
3655 bool HasRounding) {
3656
3657 APSInt MaskVal;
3658 Pointer PassThrough;
3659 Pointer Src;
3660 APSInt Rounding;
3661
3662 if (IsMasked) {
3663 // Pop in reverse order.
3664 if (HasRounding) {
3665 Rounding = popToAPSInt(S, E: Call->getArg(Arg: 3));
3666 MaskVal = popToAPSInt(S, E: Call->getArg(Arg: 2));
3667 PassThrough = S.Stk.pop<Pointer>();
3668 Src = S.Stk.pop<Pointer>();
3669 } else {
3670 MaskVal = popToAPSInt(S, E: Call->getArg(Arg: 2));
3671 PassThrough = S.Stk.pop<Pointer>();
3672 Src = S.Stk.pop<Pointer>();
3673 }
3674
3675 if (!CheckLoad(S, OpPC, Ptr: PassThrough))
3676 return false;
3677 } else {
3678 // Pop source only.
3679 Src = S.Stk.pop<Pointer>();
3680 }
3681
3682 if (!CheckLoad(S, OpPC, Ptr: Src))
3683 return false;
3684
3685 const auto *RetVTy = Call->getType()->castAs<VectorType>();
3686 unsigned RetElems = RetVTy->getNumElements();
3687 unsigned SrcElems = Src.getNumElems();
3688 const Pointer &Dst = S.Stk.peek<Pointer>();
3689
3690 // Initialize destination with passthrough or zeros.
3691 for (unsigned I = 0; I != RetElems; ++I)
3692 if (IsMasked)
3693 Dst.elem<Floating>(I) = PassThrough.elem<Floating>(I);
3694 else
3695 Dst.elem<Floating>(I) = Floating(APFloat(0.0f));
3696
3697 assert(S.getASTContext().FloatTy == RetVTy->getElementType() &&
3698 "cvtpd2ps requires float element type in return vector");
3699
3700 // Convert double to float for enabled elements (only process source elements
3701 // that exist).
3702 for (unsigned I = 0; I != SrcElems; ++I) {
3703 if (IsMasked && !MaskVal[I])
3704 continue;
3705
3706 APFloat SrcVal = Src.elem<Floating>(I).getAPFloat();
3707
3708 Floating Conv = S.allocFloat(
3709 Sem: S.getASTContext().getFloatTypeSemantics(T: RetVTy->getElementType()));
3710 if (!convertDoubleToFloatStrict(Src: SrcVal, Dst&: Conv, S, DiagExpr: Call))
3711 return false;
3712 Dst.elem<Floating>(I) = Conv;
3713 }
3714
3715 Dst.initializeAllElements();
3716 return true;
3717}
3718
3719static bool interp__builtin_ia32_shuffle_generic(
3720 InterpState &S, CodePtr OpPC, const CallExpr *Call,
3721 llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)>
3722 GetSourceIndex) {
3723
3724 assert(Call->getNumArgs() == 2 || Call->getNumArgs() == 3);
3725
3726 unsigned ShuffleMask = 0;
3727 Pointer A, MaskVector, B;
3728 bool IsVectorMask = false;
3729 bool IsSingleOperand = (Call->getNumArgs() == 2);
3730
3731 if (IsSingleOperand) {
3732 QualType MaskType = Call->getArg(Arg: 1)->getType();
3733 if (MaskType->isVectorType()) {
3734 IsVectorMask = true;
3735 MaskVector = S.Stk.pop<Pointer>();
3736 A = S.Stk.pop<Pointer>();
3737 B = A;
3738 } else if (MaskType->isIntegerType()) {
3739 ShuffleMask = popToAPSInt(S, E: Call->getArg(Arg: 1)).getZExtValue();
3740 A = S.Stk.pop<Pointer>();
3741 B = A;
3742 } else {
3743 return false;
3744 }
3745 } else {
3746 QualType Arg2Type = Call->getArg(Arg: 2)->getType();
3747 if (Arg2Type->isVectorType()) {
3748 IsVectorMask = true;
3749 B = S.Stk.pop<Pointer>();
3750 MaskVector = S.Stk.pop<Pointer>();
3751 A = S.Stk.pop<Pointer>();
3752 } else if (Arg2Type->isIntegerType()) {
3753 ShuffleMask = popToAPSInt(S, E: Call->getArg(Arg: 2)).getZExtValue();
3754 B = S.Stk.pop<Pointer>();
3755 A = S.Stk.pop<Pointer>();
3756 } else {
3757 return false;
3758 }
3759 }
3760
3761 QualType Arg0Type = Call->getArg(Arg: 0)->getType();
3762 const auto *VecT = Arg0Type->castAs<VectorType>();
3763 PrimType ElemT = *S.getContext().classify(T: VecT->getElementType());
3764 unsigned NumElems = VecT->getNumElements();
3765
3766 const Pointer &Dst = S.Stk.peek<Pointer>();
3767
3768 PrimType MaskElemT = PT_Uint32;
3769 if (IsVectorMask) {
3770 QualType Arg1Type = Call->getArg(Arg: 1)->getType();
3771 const auto *MaskVecT = Arg1Type->castAs<VectorType>();
3772 QualType MaskElemType = MaskVecT->getElementType();
3773 MaskElemT = *S.getContext().classify(T: MaskElemType);
3774 }
3775
3776 for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) {
3777 if (IsVectorMask) {
3778 INT_TYPE_SWITCH(MaskElemT, {
3779 ShuffleMask = static_cast<unsigned>(MaskVector.elem<T>(DstIdx));
3780 });
3781 }
3782
3783 auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
3784
3785 if (SrcIdx < 0) {
3786 // Zero out this element
3787 if (ElemT == PT_Float) {
3788 Dst.elem<Floating>(I: DstIdx) = Floating(
3789 S.getASTContext().getFloatTypeSemantics(T: VecT->getElementType()));
3790 } else {
3791 INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(DstIdx) = T::from(0); });
3792 }
3793 } else {
3794 const Pointer &Src = (SrcVecIdx == 0) ? A : B;
3795 TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); });
3796 }
3797 }
3798 Dst.initializeAllElements();
3799
3800 return true;
3801}
3802
3803static bool interp__builtin_ia32_shift_with_count(
3804 InterpState &S, CodePtr OpPC, const CallExpr *Call,
3805 llvm::function_ref<APInt(const APInt &, uint64_t)> ShiftOp,
3806 llvm::function_ref<APInt(const APInt &, unsigned)> OverflowOp) {
3807
3808 assert(Call->getNumArgs() == 2);
3809
3810 const Pointer &Count = S.Stk.pop<Pointer>();
3811 const Pointer &Source = S.Stk.pop<Pointer>();
3812
3813 QualType SourceType = Call->getArg(Arg: 0)->getType();
3814 QualType CountType = Call->getArg(Arg: 1)->getType();
3815 assert(SourceType->isVectorType() && CountType->isVectorType());
3816
3817 const auto *SourceVecT = SourceType->castAs<VectorType>();
3818 const auto *CountVecT = CountType->castAs<VectorType>();
3819 PrimType SourceElemT = *S.getContext().classify(T: SourceVecT->getElementType());
3820 PrimType CountElemT = *S.getContext().classify(T: CountVecT->getElementType());
3821
3822 const Pointer &Dst = S.Stk.peek<Pointer>();
3823
3824 unsigned DestEltWidth =
3825 S.getASTContext().getTypeSize(T: SourceVecT->getElementType());
3826 bool IsDestUnsigned = SourceVecT->getElementType()->isUnsignedIntegerType();
3827 unsigned DestLen = SourceVecT->getNumElements();
3828 unsigned CountEltWidth =
3829 S.getASTContext().getTypeSize(T: CountVecT->getElementType());
3830 unsigned NumBitsInQWord = 64;
3831 unsigned NumCountElts = NumBitsInQWord / CountEltWidth;
3832
3833 uint64_t CountLQWord = 0;
3834 for (unsigned EltIdx = 0; EltIdx != NumCountElts; ++EltIdx) {
3835 uint64_t Elt = 0;
3836 INT_TYPE_SWITCH(CountElemT,
3837 { Elt = static_cast<uint64_t>(Count.elem<T>(EltIdx)); });
3838 CountLQWord |= (Elt << (EltIdx * CountEltWidth));
3839 }
3840
3841 for (unsigned EltIdx = 0; EltIdx != DestLen; ++EltIdx) {
3842 APSInt Elt;
3843 INT_TYPE_SWITCH(SourceElemT, { Elt = Source.elem<T>(EltIdx).toAPSInt(); });
3844
3845 APInt Result;
3846 if (CountLQWord < DestEltWidth) {
3847 Result = ShiftOp(Elt, CountLQWord);
3848 } else {
3849 Result = OverflowOp(Elt, DestEltWidth);
3850 }
3851 if (IsDestUnsigned) {
3852 INT_TYPE_SWITCH(SourceElemT, {
3853 Dst.elem<T>(EltIdx) = T::from(Result.getZExtValue());
3854 });
3855 } else {
3856 INT_TYPE_SWITCH(SourceElemT, {
3857 Dst.elem<T>(EltIdx) = T::from(Result.getSExtValue());
3858 });
3859 }
3860 }
3861
3862 Dst.initializeAllElements();
3863 return true;
3864}
3865
3866static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC,
3867 const CallExpr *Call) {
3868
3869 assert(Call->getNumArgs() == 3);
3870
3871 QualType SourceType = Call->getArg(Arg: 0)->getType();
3872 QualType ShuffleMaskType = Call->getArg(Arg: 1)->getType();
3873 QualType ZeroMaskType = Call->getArg(Arg: 2)->getType();
3874 if (!SourceType->isVectorType() || !ShuffleMaskType->isVectorType() ||
3875 !ZeroMaskType->isIntegerType()) {
3876 return false;
3877 }
3878
3879 Pointer Source, ShuffleMask;
3880 APSInt ZeroMask = popToAPSInt(S, E: Call->getArg(Arg: 2));
3881 ShuffleMask = S.Stk.pop<Pointer>();
3882 Source = S.Stk.pop<Pointer>();
3883
3884 const auto *SourceVecT = SourceType->castAs<VectorType>();
3885 const auto *ShuffleMaskVecT = ShuffleMaskType->castAs<VectorType>();
3886 assert(SourceVecT->getNumElements() == ShuffleMaskVecT->getNumElements());
3887 assert(ZeroMask.getBitWidth() == SourceVecT->getNumElements());
3888
3889 PrimType SourceElemT = *S.getContext().classify(T: SourceVecT->getElementType());
3890 PrimType ShuffleMaskElemT =
3891 *S.getContext().classify(T: ShuffleMaskVecT->getElementType());
3892
3893 unsigned NumBytesInQWord = 8;
3894 unsigned NumBitsInByte = 8;
3895 unsigned NumBytes = SourceVecT->getNumElements();
3896 unsigned NumQWords = NumBytes / NumBytesInQWord;
3897 unsigned RetWidth = ZeroMask.getBitWidth();
3898 APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true);
3899
3900 for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
3901 APInt SourceQWord(64, 0);
3902 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
3903 uint64_t Byte = 0;
3904 INT_TYPE_SWITCH(SourceElemT, {
3905 Byte = static_cast<uint64_t>(
3906 Source.elem<T>(QWordId * NumBytesInQWord + ByteIdx));
3907 });
3908 SourceQWord.insertBits(SubBits: APInt(8, Byte & 0xFF), bitPosition: ByteIdx * NumBitsInByte);
3909 }
3910
3911 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
3912 unsigned SelIdx = QWordId * NumBytesInQWord + ByteIdx;
3913 unsigned M = 0;
3914 INT_TYPE_SWITCH(ShuffleMaskElemT, {
3915 M = static_cast<unsigned>(ShuffleMask.elem<T>(SelIdx)) & 0x3F;
3916 });
3917
3918 if (ZeroMask[SelIdx]) {
3919 RetMask.setBitVal(BitPosition: SelIdx, BitValue: SourceQWord[M]);
3920 }
3921 }
3922 }
3923
3924 pushInteger(S, Val: RetMask, QT: Call->getType());
3925 return true;
3926}
3927
3928static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
3929 const CallExpr *Call) {
3930 // Arguments are: vector of floats, rounding immediate
3931 assert(Call->getNumArgs() == 2);
3932
3933 APSInt Imm = popToAPSInt(S, E: Call->getArg(Arg: 1));
3934 const Pointer &Src = S.Stk.pop<Pointer>();
3935 const Pointer &Dst = S.Stk.peek<Pointer>();
3936
3937 assert(Src.getFieldDesc()->isPrimitiveArray());
3938 assert(Dst.getFieldDesc()->isPrimitiveArray());
3939
3940 const auto *SrcVTy = Call->getArg(Arg: 0)->getType()->castAs<VectorType>();
3941 unsigned SrcNumElems = SrcVTy->getNumElements();
3942 const auto *DstVTy = Call->getType()->castAs<VectorType>();
3943 unsigned DstNumElems = DstVTy->getNumElements();
3944
3945 const llvm::fltSemantics &HalfSem =
3946 S.getASTContext().getFloatTypeSemantics(T: S.getASTContext().HalfTy);
3947
3948 // imm[2] == 1 means use MXCSR rounding mode.
3949 // In that case, we can only evaluate if the conversion is exact.
3950 int ImmVal = Imm.getZExtValue();
3951 bool UseMXCSR = (ImmVal & 4) != 0;
3952 bool IsFPConstrained =
3953 Call->getFPFeaturesInEffect(LO: S.getASTContext().getLangOpts())
3954 .isFPConstrained();
3955
3956 llvm::RoundingMode RM;
3957 if (!UseMXCSR) {
3958 switch (ImmVal & 3) {
3959 case 0:
3960 RM = llvm::RoundingMode::NearestTiesToEven;
3961 break;
3962 case 1:
3963 RM = llvm::RoundingMode::TowardNegative;
3964 break;
3965 case 2:
3966 RM = llvm::RoundingMode::TowardPositive;
3967 break;
3968 case 3:
3969 RM = llvm::RoundingMode::TowardZero;
3970 break;
3971 default:
3972 llvm_unreachable("Invalid immediate rounding mode");
3973 }
3974 } else {
3975 // For MXCSR, we must check for exactness. We can use any rounding mode
3976 // for the trial conversion since the result is the same if it's exact.
3977 RM = llvm::RoundingMode::NearestTiesToEven;
3978 }
3979
3980 QualType DstElemQT = Dst.getFieldDesc()->getElemQualType();
3981 PrimType DstElemT = *S.getContext().classify(T: DstElemQT);
3982
3983 for (unsigned I = 0; I != SrcNumElems; ++I) {
3984 Floating SrcVal = Src.elem<Floating>(I);
3985 APFloat DstVal = SrcVal.getAPFloat();
3986
3987 bool LostInfo;
3988 APFloat::opStatus St = DstVal.convert(ToSemantics: HalfSem, RM, losesInfo: &LostInfo);
3989
3990 if (UseMXCSR && IsFPConstrained && St != APFloat::opOK) {
3991 S.FFDiag(SI: S.Current->getSource(PC: OpPC),
3992 DiagId: diag::note_constexpr_dynamic_rounding);
3993 return false;
3994 }
3995
3996 INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
3997 // Convert the destination value's bit pattern to an unsigned integer,
3998 // then reconstruct the element using the target type's 'from' method.
3999 uint64_t RawBits = DstVal.bitcastToAPInt().getZExtValue();
4000 Dst.elem<T>(I) = T::from(RawBits);
4001 });
4002 }
4003
4004 // Zero out remaining elements if the destination has more elements
4005 // (e.g., vcvtps2ph converting 4 floats to 8 shorts).
4006 if (DstNumElems > SrcNumElems) {
4007 for (unsigned I = SrcNumElems; I != DstNumElems; ++I) {
4008 INT_TYPE_SWITCH_NO_BOOL(DstElemT, { Dst.elem<T>(I) = T::from(0); });
4009 }
4010 }
4011
4012 Dst.initializeAllElements();
4013 return true;
4014}
4015
4016static bool interp__builtin_ia32_multishiftqb(InterpState &S, CodePtr OpPC,
4017 const CallExpr *Call) {
4018 assert(Call->getNumArgs() == 2);
4019
4020 QualType ATy = Call->getArg(Arg: 0)->getType();
4021 QualType BTy = Call->getArg(Arg: 1)->getType();
4022 if (!ATy->isVectorType() || !BTy->isVectorType()) {
4023 return false;
4024 }
4025
4026 const Pointer &BPtr = S.Stk.pop<Pointer>();
4027 const Pointer &APtr = S.Stk.pop<Pointer>();
4028 const auto *AVecT = ATy->castAs<VectorType>();
4029 assert(AVecT->getNumElements() ==
4030 BTy->castAs<VectorType>()->getNumElements());
4031
4032 PrimType ElemT = *S.getContext().classify(T: AVecT->getElementType());
4033
4034 unsigned NumBytesInQWord = 8;
4035 unsigned NumBitsInByte = 8;
4036 unsigned NumBytes = AVecT->getNumElements();
4037 unsigned NumQWords = NumBytes / NumBytesInQWord;
4038 const Pointer &Dst = S.Stk.peek<Pointer>();
4039
4040 for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
4041 APInt BQWord(64, 0);
4042 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
4043 unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
4044 INT_TYPE_SWITCH(ElemT, {
4045 uint64_t Byte = static_cast<uint64_t>(BPtr.elem<T>(Idx));
4046 BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
4047 });
4048 }
4049
4050 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
4051 unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
4052 uint64_t Ctrl = 0;
4053 INT_TYPE_SWITCH(
4054 ElemT, { Ctrl = static_cast<uint64_t>(APtr.elem<T>(Idx)) & 0x3F; });
4055
4056 APInt Byte(8, 0);
4057 for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) {
4058 Byte.setBitVal(BitPosition: BitIdx, BitValue: BQWord[(Ctrl + BitIdx) & 0x3F]);
4059 }
4060 INT_TYPE_SWITCH(ElemT,
4061 { Dst.elem<T>(Idx) = T::from(Byte.getZExtValue()); });
4062 }
4063 }
4064
4065 Dst.initializeAllElements();
4066
4067 return true;
4068}
4069
4070static bool interp_builtin_ia32_gfni_affine(InterpState &S, CodePtr OpPC,
4071 const CallExpr *Call,
4072 bool Inverse) {
4073 assert(Call->getNumArgs() == 3);
4074 QualType XType = Call->getArg(Arg: 0)->getType();
4075 QualType AType = Call->getArg(Arg: 1)->getType();
4076 QualType ImmType = Call->getArg(Arg: 2)->getType();
4077 if (!XType->isVectorType() || !AType->isVectorType() ||
4078 !ImmType->isIntegerType()) {
4079 return false;
4080 }
4081
4082 Pointer X, A;
4083 APSInt Imm = popToAPSInt(S, E: Call->getArg(Arg: 2));
4084 A = S.Stk.pop<Pointer>();
4085 X = S.Stk.pop<Pointer>();
4086
4087 const Pointer &Dst = S.Stk.peek<Pointer>();
4088 const auto *AVecT = AType->castAs<VectorType>();
4089 assert(XType->castAs<VectorType>()->getNumElements() ==
4090 AVecT->getNumElements());
4091 unsigned NumBytesInQWord = 8;
4092 unsigned NumBytes = AVecT->getNumElements();
4093 unsigned NumBitsInQWord = 64;
4094 unsigned NumQWords = NumBytes / NumBytesInQWord;
4095 unsigned NumBitsInByte = 8;
4096 PrimType AElemT = *S.getContext().classify(T: AVecT->getElementType());
4097
4098 // computing A*X + Imm
4099 for (unsigned QWordIdx = 0; QWordIdx != NumQWords; ++QWordIdx) {
4100 // Extract the QWords from X, A
4101 APInt XQWord(NumBitsInQWord, 0);
4102 APInt AQWord(NumBitsInQWord, 0);
4103 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
4104 unsigned Idx = QWordIdx * NumBytesInQWord + ByteIdx;
4105 uint8_t XByte;
4106 uint8_t AByte;
4107 INT_TYPE_SWITCH(AElemT, {
4108 XByte = static_cast<uint8_t>(X.elem<T>(Idx));
4109 AByte = static_cast<uint8_t>(A.elem<T>(Idx));
4110 });
4111
4112 XQWord.insertBits(SubBits: APInt(NumBitsInByte, XByte), bitPosition: ByteIdx * NumBitsInByte);
4113 AQWord.insertBits(SubBits: APInt(NumBitsInByte, AByte), bitPosition: ByteIdx * NumBitsInByte);
4114 }
4115
4116 for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
4117 unsigned Idx = QWordIdx * NumBytesInQWord + ByteIdx;
4118 uint8_t XByte =
4119 XQWord.lshr(shiftAmt: ByteIdx * NumBitsInByte).getLoBits(numBits: 8).getZExtValue();
4120 INT_TYPE_SWITCH(AElemT, {
4121 Dst.elem<T>(Idx) = T::from(GFNIAffine(XByte, AQWord, Imm, Inverse));
4122 });
4123 }
4124 }
4125 Dst.initializeAllElements();
4126 return true;
4127}
4128
4129static bool interp__builtin_ia32_gfni_mul(InterpState &S, CodePtr OpPC,
4130 const CallExpr *Call) {
4131 assert(Call->getNumArgs() == 2);
4132
4133 QualType AType = Call->getArg(Arg: 0)->getType();
4134 QualType BType = Call->getArg(Arg: 1)->getType();
4135 if (!AType->isVectorType() || !BType->isVectorType()) {
4136 return false;
4137 }
4138
4139 Pointer A, B;
4140 B = S.Stk.pop<Pointer>();
4141 A = S.Stk.pop<Pointer>();
4142
4143 const Pointer &Dst = S.Stk.peek<Pointer>();
4144 const auto *AVecT = AType->castAs<VectorType>();
4145 assert(AVecT->getNumElements() ==
4146 BType->castAs<VectorType>()->getNumElements());
4147
4148 PrimType AElemT = *S.getContext().classify(T: AVecT->getElementType());
4149 unsigned NumBytes = A.getNumElems();
4150
4151 for (unsigned ByteIdx = 0; ByteIdx != NumBytes; ++ByteIdx) {
4152 uint8_t AByte, BByte;
4153 INT_TYPE_SWITCH(AElemT, {
4154 AByte = static_cast<uint8_t>(A.elem<T>(ByteIdx));
4155 BByte = static_cast<uint8_t>(B.elem<T>(ByteIdx));
4156 Dst.elem<T>(ByteIdx) = T::from(GFNIMul(AByte, BByte));
4157 });
4158 }
4159
4160 Dst.initializeAllElements();
4161 return true;
4162}
4163
4164bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
4165 uint32_t BuiltinID) {
4166 if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(ID: BuiltinID))
4167 return Invalid(S, OpPC);
4168
4169 const InterpFrame *Frame = S.Current;
4170 switch (BuiltinID) {
4171 case Builtin::BI__builtin_is_constant_evaluated:
4172 return interp__builtin_is_constant_evaluated(S, OpPC, Frame, Call);
4173
4174 case Builtin::BI__builtin_assume:
4175 case Builtin::BI__assume:
4176 return interp__builtin_assume(S, OpPC, Frame, Call);
4177
4178 case Builtin::BI__builtin_strcmp:
4179 case Builtin::BIstrcmp:
4180 case Builtin::BI__builtin_strncmp:
4181 case Builtin::BIstrncmp:
4182 case Builtin::BI__builtin_wcsncmp:
4183 case Builtin::BIwcsncmp:
4184 case Builtin::BI__builtin_wcscmp:
4185 case Builtin::BIwcscmp:
4186 return interp__builtin_strcmp(S, OpPC, Frame, Call, ID: BuiltinID);
4187
4188 case Builtin::BI__builtin_strlen:
4189 case Builtin::BIstrlen:
4190 case Builtin::BI__builtin_wcslen:
4191 case Builtin::BIwcslen:
4192 return interp__builtin_strlen(S, OpPC, Frame, Call, ID: BuiltinID);
4193
4194 case Builtin::BI__builtin_nan:
4195 case Builtin::BI__builtin_nanf:
4196 case Builtin::BI__builtin_nanl:
4197 case Builtin::BI__builtin_nanf16:
4198 case Builtin::BI__builtin_nanf128:
4199 return interp__builtin_nan(S, OpPC, Frame, Call, /*Signaling=*/false);
4200
4201 case Builtin::BI__builtin_nans:
4202 case Builtin::BI__builtin_nansf:
4203 case Builtin::BI__builtin_nansl:
4204 case Builtin::BI__builtin_nansf16:
4205 case Builtin::BI__builtin_nansf128:
4206 return interp__builtin_nan(S, OpPC, Frame, Call, /*Signaling=*/true);
4207
4208 case Builtin::BI__builtin_huge_val:
4209 case Builtin::BI__builtin_huge_valf:
4210 case Builtin::BI__builtin_huge_vall:
4211 case Builtin::BI__builtin_huge_valf16:
4212 case Builtin::BI__builtin_huge_valf128:
4213 case Builtin::BI__builtin_inf:
4214 case Builtin::BI__builtin_inff:
4215 case Builtin::BI__builtin_infl:
4216 case Builtin::BI__builtin_inff16:
4217 case Builtin::BI__builtin_inff128:
4218 return interp__builtin_inf(S, OpPC, Frame, Call);
4219
4220 case Builtin::BI__builtin_copysign:
4221 case Builtin::BI__builtin_copysignf:
4222 case Builtin::BI__builtin_copysignl:
4223 case Builtin::BI__builtin_copysignf128:
4224 return interp__builtin_copysign(S, OpPC, Frame);
4225
4226 case Builtin::BI__builtin_fmin:
4227 case Builtin::BI__builtin_fminf:
4228 case Builtin::BI__builtin_fminl:
4229 case Builtin::BI__builtin_fminf16:
4230 case Builtin::BI__builtin_fminf128:
4231 return interp__builtin_fmin(S, OpPC, Frame, /*IsNumBuiltin=*/false);
4232
4233 case Builtin::BI__builtin_fminimum_num:
4234 case Builtin::BI__builtin_fminimum_numf:
4235 case Builtin::BI__builtin_fminimum_numl:
4236 case Builtin::BI__builtin_fminimum_numf16:
4237 case Builtin::BI__builtin_fminimum_numf128:
4238 return interp__builtin_fmin(S, OpPC, Frame, /*IsNumBuiltin=*/true);
4239
4240 case Builtin::BI__builtin_fmax:
4241 case Builtin::BI__builtin_fmaxf:
4242 case Builtin::BI__builtin_fmaxl:
4243 case Builtin::BI__builtin_fmaxf16:
4244 case Builtin::BI__builtin_fmaxf128:
4245 return interp__builtin_fmax(S, OpPC, Frame, /*IsNumBuiltin=*/false);
4246
4247 case Builtin::BI__builtin_fmaximum_num:
4248 case Builtin::BI__builtin_fmaximum_numf:
4249 case Builtin::BI__builtin_fmaximum_numl:
4250 case Builtin::BI__builtin_fmaximum_numf16:
4251 case Builtin::BI__builtin_fmaximum_numf128:
4252 return interp__builtin_fmax(S, OpPC, Frame, /*IsNumBuiltin=*/true);
4253
4254 case Builtin::BI__builtin_isnan:
4255 return interp__builtin_isnan(S, OpPC, Frame, Call);
4256
4257 case Builtin::BI__builtin_issignaling:
4258 return interp__builtin_issignaling(S, OpPC, Frame, Call);
4259
4260 case Builtin::BI__builtin_isinf:
4261 return interp__builtin_isinf(S, OpPC, Frame, /*Sign=*/CheckSign: false, Call);
4262
4263 case Builtin::BI__builtin_isinf_sign:
4264 return interp__builtin_isinf(S, OpPC, Frame, /*Sign=*/CheckSign: true, Call);
4265
4266 case Builtin::BI__builtin_isfinite:
4267 return interp__builtin_isfinite(S, OpPC, Frame, Call);
4268
4269 case Builtin::BI__builtin_isnormal:
4270 return interp__builtin_isnormal(S, OpPC, Frame, Call);
4271
4272 case Builtin::BI__builtin_issubnormal:
4273 return interp__builtin_issubnormal(S, OpPC, Frame, Call);
4274
4275 case Builtin::BI__builtin_iszero:
4276 return interp__builtin_iszero(S, OpPC, Frame, Call);
4277
4278 case Builtin::BI__builtin_signbit:
4279 case Builtin::BI__builtin_signbitf:
4280 case Builtin::BI__builtin_signbitl:
4281 return interp__builtin_signbit(S, OpPC, Frame, Call);
4282
4283 case Builtin::BI__builtin_isgreater:
4284 case Builtin::BI__builtin_isgreaterequal:
4285 case Builtin::BI__builtin_isless:
4286 case Builtin::BI__builtin_islessequal:
4287 case Builtin::BI__builtin_islessgreater:
4288 case Builtin::BI__builtin_isunordered:
4289 return interp_floating_comparison(S, OpPC, Call, ID: BuiltinID);
4290
4291 case Builtin::BI__builtin_isfpclass:
4292 return interp__builtin_isfpclass(S, OpPC, Frame, Call);
4293
4294 case Builtin::BI__builtin_fpclassify:
4295 return interp__builtin_fpclassify(S, OpPC, Frame, Call);
4296
4297 case Builtin::BI__builtin_fabs:
4298 case Builtin::BI__builtin_fabsf:
4299 case Builtin::BI__builtin_fabsl:
4300 case Builtin::BI__builtin_fabsf128:
4301 return interp__builtin_fabs(S, OpPC, Frame);
4302
4303 case Builtin::BI__builtin_abs:
4304 case Builtin::BI__builtin_labs:
4305 case Builtin::BI__builtin_llabs:
4306 return interp__builtin_abs(S, OpPC, Frame, Call);
4307
4308 case Builtin::BI__builtin_popcount:
4309 case Builtin::BI__builtin_popcountl:
4310 case Builtin::BI__builtin_popcountll:
4311 case Builtin::BI__builtin_popcountg:
4312 case Builtin::BI__popcnt16: // Microsoft variants of popcount
4313 case Builtin::BI__popcnt:
4314 case Builtin::BI__popcnt64:
4315 return interp__builtin_popcount(S, OpPC, Frame, Call);
4316
4317 case Builtin::BI__builtin_parity:
4318 case Builtin::BI__builtin_parityl:
4319 case Builtin::BI__builtin_parityll:
4320 return interp__builtin_elementwise_int_unaryop(
4321 S, OpPC, Call, Fn: [](const APSInt &Val) {
4322 return APInt(Val.getBitWidth(), Val.popcount() % 2);
4323 });
4324 case Builtin::BI__builtin_clrsb:
4325 case Builtin::BI__builtin_clrsbl:
4326 case Builtin::BI__builtin_clrsbll:
4327 return interp__builtin_elementwise_int_unaryop(
4328 S, OpPC, Call, Fn: [](const APSInt &Val) {
4329 return APInt(Val.getBitWidth(),
4330 Val.getBitWidth() - Val.getSignificantBits());
4331 });
4332 case Builtin::BI__builtin_bitreverseg:
4333 case Builtin::BI__builtin_bitreverse8:
4334 case Builtin::BI__builtin_bitreverse16:
4335 case Builtin::BI__builtin_bitreverse32:
4336 case Builtin::BI__builtin_bitreverse64:
4337 return interp__builtin_elementwise_int_unaryop(
4338 S, OpPC, Call, Fn: [](const APSInt &Val) { return Val.reverseBits(); });
4339
4340 case Builtin::BI__builtin_classify_type:
4341 return interp__builtin_classify_type(S, OpPC, Frame, Call);
4342
4343 case Builtin::BI__builtin_expect:
4344 case Builtin::BI__builtin_expect_with_probability:
4345 return interp__builtin_expect(S, OpPC, Frame, Call);
4346
4347 case Builtin::BI__builtin_rotateleft8:
4348 case Builtin::BI__builtin_rotateleft16:
4349 case Builtin::BI__builtin_rotateleft32:
4350 case Builtin::BI__builtin_rotateleft64:
4351 case Builtin::BI__builtin_stdc_rotate_left:
4352 case Builtin::BI_rotl8: // Microsoft variants of rotate left
4353 case Builtin::BI_rotl16:
4354 case Builtin::BI_rotl:
4355 case Builtin::BI_lrotl:
4356 case Builtin::BI_rotl64:
4357 case Builtin::BI__builtin_rotateright8:
4358 case Builtin::BI__builtin_rotateright16:
4359 case Builtin::BI__builtin_rotateright32:
4360 case Builtin::BI__builtin_rotateright64:
4361 case Builtin::BI__builtin_stdc_rotate_right:
4362 case Builtin::BI_rotr8: // Microsoft variants of rotate right
4363 case Builtin::BI_rotr16:
4364 case Builtin::BI_rotr:
4365 case Builtin::BI_lrotr:
4366 case Builtin::BI_rotr64: {
4367 // Determine if this is a rotate right operation
4368 bool IsRotateRight;
4369 switch (BuiltinID) {
4370 case Builtin::BI__builtin_rotateright8:
4371 case Builtin::BI__builtin_rotateright16:
4372 case Builtin::BI__builtin_rotateright32:
4373 case Builtin::BI__builtin_rotateright64:
4374 case Builtin::BI__builtin_stdc_rotate_right:
4375 case Builtin::BI_rotr8:
4376 case Builtin::BI_rotr16:
4377 case Builtin::BI_rotr:
4378 case Builtin::BI_lrotr:
4379 case Builtin::BI_rotr64:
4380 IsRotateRight = true;
4381 break;
4382 default:
4383 IsRotateRight = false;
4384 break;
4385 }
4386
4387 return interp__builtin_elementwise_int_binop(
4388 S, OpPC, Call, Fn: [IsRotateRight](const APSInt &Value, APSInt Amount) {
4389 Amount = NormalizeRotateAmount(Value, Amount);
4390 return IsRotateRight ? Value.rotr(rotateAmt: Amount.getZExtValue())
4391 : Value.rotl(rotateAmt: Amount.getZExtValue());
4392 });
4393 }
4394
4395 case Builtin::BI__builtin_ffs:
4396 case Builtin::BI__builtin_ffsl:
4397 case Builtin::BI__builtin_ffsll:
4398 return interp__builtin_elementwise_int_unaryop(
4399 S, OpPC, Call, Fn: [](const APSInt &Val) {
4400 return APInt(Val.getBitWidth(),
4401 Val.isZero() ? 0u : Val.countTrailingZeros() + 1u);
4402 });
4403
4404 case Builtin::BIaddressof:
4405 case Builtin::BI__addressof:
4406 case Builtin::BI__builtin_addressof:
4407 assert(isNoopBuiltin(BuiltinID));
4408 return interp__builtin_addressof(S, OpPC, Frame, Call);
4409
4410 case Builtin::BIas_const:
4411 case Builtin::BIforward:
4412 case Builtin::BIforward_like:
4413 case Builtin::BImove:
4414 case Builtin::BImove_if_noexcept:
4415 assert(isNoopBuiltin(BuiltinID));
4416 return interp__builtin_move(S, OpPC, Frame, Call);
4417
4418 case Builtin::BI__builtin_eh_return_data_regno:
4419 return interp__builtin_eh_return_data_regno(S, OpPC, Frame, Call);
4420
4421 case Builtin::BI__builtin_launder:
4422 assert(isNoopBuiltin(BuiltinID));
4423 return true;
4424
4425 case Builtin::BI__builtin_add_overflow:
4426 case Builtin::BI__builtin_sub_overflow:
4427 case Builtin::BI__builtin_mul_overflow:
4428 case Builtin::BI__builtin_sadd_overflow:
4429 case Builtin::BI__builtin_uadd_overflow:
4430 case Builtin::BI__builtin_uaddl_overflow:
4431 case Builtin::BI__builtin_uaddll_overflow:
4432 case Builtin::BI__builtin_usub_overflow:
4433 case Builtin::BI__builtin_usubl_overflow:
4434 case Builtin::BI__builtin_usubll_overflow:
4435 case Builtin::BI__builtin_umul_overflow:
4436 case Builtin::BI__builtin_umull_overflow:
4437 case Builtin::BI__builtin_umulll_overflow:
4438 case Builtin::BI__builtin_saddl_overflow:
4439 case Builtin::BI__builtin_saddll_overflow:
4440 case Builtin::BI__builtin_ssub_overflow:
4441 case Builtin::BI__builtin_ssubl_overflow:
4442 case Builtin::BI__builtin_ssubll_overflow:
4443 case Builtin::BI__builtin_smul_overflow:
4444 case Builtin::BI__builtin_smull_overflow:
4445 case Builtin::BI__builtin_smulll_overflow:
4446 return interp__builtin_overflowop(S, OpPC, Call, BuiltinOp: BuiltinID);
4447
4448 case Builtin::BI__builtin_addcb:
4449 case Builtin::BI__builtin_addcs:
4450 case Builtin::BI__builtin_addc:
4451 case Builtin::BI__builtin_addcl:
4452 case Builtin::BI__builtin_addcll:
4453 case Builtin::BI__builtin_subcb:
4454 case Builtin::BI__builtin_subcs:
4455 case Builtin::BI__builtin_subc:
4456 case Builtin::BI__builtin_subcl:
4457 case Builtin::BI__builtin_subcll:
4458 return interp__builtin_carryop(S, OpPC, Frame, Call, BuiltinOp: BuiltinID);
4459
4460 case Builtin::BI__builtin_clz:
4461 case Builtin::BI__builtin_clzl:
4462 case Builtin::BI__builtin_clzll:
4463 case Builtin::BI__builtin_clzs:
4464 case Builtin::BI__builtin_clzg:
4465 case Builtin::BI__lzcnt16: // Microsoft variants of count leading-zeroes
4466 case Builtin::BI__lzcnt:
4467 case Builtin::BI__lzcnt64:
4468 return interp__builtin_clz(S, OpPC, Frame, Call, BuiltinOp: BuiltinID);
4469
4470 case Builtin::BI__builtin_ctz:
4471 case Builtin::BI__builtin_ctzl:
4472 case Builtin::BI__builtin_ctzll:
4473 case Builtin::BI__builtin_ctzs:
4474 case Builtin::BI__builtin_ctzg:
4475 return interp__builtin_ctz(S, OpPC, Frame, Call, BuiltinID);
4476
4477 case Builtin::BI__builtin_elementwise_clzg:
4478 case Builtin::BI__builtin_elementwise_ctzg:
4479 return interp__builtin_elementwise_countzeroes(S, OpPC, Frame, Call,
4480 BuiltinID);
4481 case Builtin::BI__builtin_bswapg:
4482 case Builtin::BI__builtin_bswap16:
4483 case Builtin::BI__builtin_bswap32:
4484 case Builtin::BI__builtin_bswap64:
4485 return interp__builtin_bswap(S, OpPC, Frame, Call);
4486
4487 case Builtin::BI__atomic_always_lock_free:
4488 case Builtin::BI__atomic_is_lock_free:
4489 return interp__builtin_atomic_lock_free(S, OpPC, Frame, Call, BuiltinOp: BuiltinID);
4490
4491 case Builtin::BI__c11_atomic_is_lock_free:
4492 return interp__builtin_c11_atomic_is_lock_free(S, OpPC, Frame, Call);
4493
4494 case Builtin::BI__builtin_complex:
4495 return interp__builtin_complex(S, OpPC, Frame, Call);
4496
4497 case Builtin::BI__builtin_is_aligned:
4498 case Builtin::BI__builtin_align_up:
4499 case Builtin::BI__builtin_align_down:
4500 return interp__builtin_is_aligned_up_down(S, OpPC, Frame, Call, BuiltinOp: BuiltinID);
4501
4502 case Builtin::BI__builtin_assume_aligned:
4503 return interp__builtin_assume_aligned(S, OpPC, Frame, Call);
4504
4505 case clang::X86::BI__builtin_ia32_crc32qi:
4506 return interp__builtin_ia32_crc32(S, OpPC, Frame, Call, DataBytes: 1);
4507 case clang::X86::BI__builtin_ia32_crc32hi:
4508 return interp__builtin_ia32_crc32(S, OpPC, Frame, Call, DataBytes: 2);
4509 case clang::X86::BI__builtin_ia32_crc32si:
4510 return interp__builtin_ia32_crc32(S, OpPC, Frame, Call, DataBytes: 4);
4511 case clang::X86::BI__builtin_ia32_crc32di:
4512 return interp__builtin_ia32_crc32(S, OpPC, Frame, Call, DataBytes: 8);
4513
4514 case clang::X86::BI__builtin_ia32_bextr_u32:
4515 case clang::X86::BI__builtin_ia32_bextr_u64:
4516 case clang::X86::BI__builtin_ia32_bextri_u32:
4517 case clang::X86::BI__builtin_ia32_bextri_u64:
4518 return interp__builtin_elementwise_int_binop(
4519 S, OpPC, Call, Fn: [](const APSInt &Val, const APSInt &Idx) {
4520 unsigned BitWidth = Val.getBitWidth();
4521 uint64_t Shift = Idx.extractBitsAsZExtValue(numBits: 8, bitPosition: 0);
4522 uint64_t Length = Idx.extractBitsAsZExtValue(numBits: 8, bitPosition: 8);
4523 if (Length > BitWidth) {
4524 Length = BitWidth;
4525 }
4526
4527 // Handle out of bounds cases.
4528 if (Length == 0 || Shift >= BitWidth)
4529 return APInt(BitWidth, 0);
4530
4531 uint64_t Result = Val.getZExtValue() >> Shift;
4532 Result &= llvm::maskTrailingOnes<uint64_t>(N: Length);
4533 return APInt(BitWidth, Result);
4534 });
4535
4536 case clang::X86::BI__builtin_ia32_bzhi_si:
4537 case clang::X86::BI__builtin_ia32_bzhi_di:
4538 return interp__builtin_elementwise_int_binop(
4539 S, OpPC, Call, Fn: [](const APSInt &Val, const APSInt &Idx) {
4540 unsigned BitWidth = Val.getBitWidth();
4541 uint64_t Index = Idx.extractBitsAsZExtValue(numBits: 8, bitPosition: 0);
4542 APSInt Result = Val;
4543
4544 if (Index < BitWidth)
4545 Result.clearHighBits(hiBits: BitWidth - Index);
4546
4547 return Result;
4548 });
4549
4550 case clang::X86::BI__builtin_ia32_ktestcqi:
4551 case clang::X86::BI__builtin_ia32_ktestchi:
4552 case clang::X86::BI__builtin_ia32_ktestcsi:
4553 case clang::X86::BI__builtin_ia32_ktestcdi:
4554 return interp__builtin_elementwise_int_binop(
4555 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
4556 return APInt(sizeof(unsigned char) * 8, (~A & B) == 0);
4557 });
4558
4559 case clang::X86::BI__builtin_ia32_ktestzqi:
4560 case clang::X86::BI__builtin_ia32_ktestzhi:
4561 case clang::X86::BI__builtin_ia32_ktestzsi:
4562 case clang::X86::BI__builtin_ia32_ktestzdi:
4563 return interp__builtin_elementwise_int_binop(
4564 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
4565 return APInt(sizeof(unsigned char) * 8, (A & B) == 0);
4566 });
4567
4568 case clang::X86::BI__builtin_ia32_kortestcqi:
4569 case clang::X86::BI__builtin_ia32_kortestchi:
4570 case clang::X86::BI__builtin_ia32_kortestcsi:
4571 case clang::X86::BI__builtin_ia32_kortestcdi:
4572 return interp__builtin_elementwise_int_binop(
4573 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
4574 return APInt(sizeof(unsigned char) * 8, ~(A | B) == 0);
4575 });
4576
4577 case clang::X86::BI__builtin_ia32_kortestzqi:
4578 case clang::X86::BI__builtin_ia32_kortestzhi:
4579 case clang::X86::BI__builtin_ia32_kortestzsi:
4580 case clang::X86::BI__builtin_ia32_kortestzdi:
4581 return interp__builtin_elementwise_int_binop(
4582 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
4583 return APInt(sizeof(unsigned char) * 8, (A | B) == 0);
4584 });
4585
4586 case clang::X86::BI__builtin_ia32_kshiftliqi:
4587 case clang::X86::BI__builtin_ia32_kshiftlihi:
4588 case clang::X86::BI__builtin_ia32_kshiftlisi:
4589 case clang::X86::BI__builtin_ia32_kshiftlidi:
4590 return interp__builtin_elementwise_int_binop(
4591 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4592 unsigned Amt = RHS.getZExtValue() & 0xFF;
4593 if (Amt >= LHS.getBitWidth())
4594 return APInt::getZero(numBits: LHS.getBitWidth());
4595 return LHS.shl(shiftAmt: Amt);
4596 });
4597
4598 case clang::X86::BI__builtin_ia32_kshiftriqi:
4599 case clang::X86::BI__builtin_ia32_kshiftrihi:
4600 case clang::X86::BI__builtin_ia32_kshiftrisi:
4601 case clang::X86::BI__builtin_ia32_kshiftridi:
4602 return interp__builtin_elementwise_int_binop(
4603 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4604 unsigned Amt = RHS.getZExtValue() & 0xFF;
4605 if (Amt >= LHS.getBitWidth())
4606 return APInt::getZero(numBits: LHS.getBitWidth());
4607 return LHS.lshr(shiftAmt: Amt);
4608 });
4609
4610 case clang::X86::BI__builtin_ia32_lzcnt_u16:
4611 case clang::X86::BI__builtin_ia32_lzcnt_u32:
4612 case clang::X86::BI__builtin_ia32_lzcnt_u64:
4613 return interp__builtin_elementwise_int_unaryop(
4614 S, OpPC, Call, Fn: [](const APSInt &Src) {
4615 return APInt(Src.getBitWidth(), Src.countLeadingZeros());
4616 });
4617
4618 case clang::X86::BI__builtin_ia32_tzcnt_u16:
4619 case clang::X86::BI__builtin_ia32_tzcnt_u32:
4620 case clang::X86::BI__builtin_ia32_tzcnt_u64:
4621 return interp__builtin_elementwise_int_unaryop(
4622 S, OpPC, Call, Fn: [](const APSInt &Src) {
4623 return APInt(Src.getBitWidth(), Src.countTrailingZeros());
4624 });
4625
4626 case clang::X86::BI__builtin_ia32_pdep_si:
4627 case clang::X86::BI__builtin_ia32_pdep_di:
4628 return interp__builtin_elementwise_int_binop(
4629 S, OpPC, Call, Fn: [](const APSInt &Val, const APSInt &Mask) {
4630 unsigned BitWidth = Val.getBitWidth();
4631 APInt Result = APInt::getZero(numBits: BitWidth);
4632
4633 for (unsigned I = 0, P = 0; I != BitWidth; ++I) {
4634 if (Mask[I])
4635 Result.setBitVal(BitPosition: I, BitValue: Val[P++]);
4636 }
4637
4638 return Result;
4639 });
4640
4641 case clang::X86::BI__builtin_ia32_pext_si:
4642 case clang::X86::BI__builtin_ia32_pext_di:
4643 return interp__builtin_elementwise_int_binop(
4644 S, OpPC, Call, Fn: [](const APSInt &Val, const APSInt &Mask) {
4645 unsigned BitWidth = Val.getBitWidth();
4646 APInt Result = APInt::getZero(numBits: BitWidth);
4647
4648 for (unsigned I = 0, P = 0; I != BitWidth; ++I) {
4649 if (Mask[I])
4650 Result.setBitVal(BitPosition: P++, BitValue: Val[I]);
4651 }
4652
4653 return Result;
4654 });
4655
4656 case clang::X86::BI__builtin_ia32_addcarryx_u32:
4657 case clang::X86::BI__builtin_ia32_addcarryx_u64:
4658 case clang::X86::BI__builtin_ia32_subborrow_u32:
4659 case clang::X86::BI__builtin_ia32_subborrow_u64:
4660 return interp__builtin_ia32_addcarry_subborrow(S, OpPC, Frame, Call,
4661 BuiltinOp: BuiltinID);
4662
4663 case Builtin::BI__builtin_os_log_format_buffer_size:
4664 return interp__builtin_os_log_format_buffer_size(S, OpPC, Frame, Call);
4665
4666 case Builtin::BI__builtin_ptrauth_string_discriminator:
4667 return interp__builtin_ptrauth_string_discriminator(S, OpPC, Frame, Call);
4668
4669 case Builtin::BI__builtin_infer_alloc_token:
4670 return interp__builtin_infer_alloc_token(S, OpPC, Frame, Call);
4671
4672 case Builtin::BI__noop:
4673 pushInteger(S, Val: 0, QT: Call->getType());
4674 return true;
4675
4676 case Builtin::BI__builtin_operator_new:
4677 return interp__builtin_operator_new(S, OpPC, Frame, Call);
4678
4679 case Builtin::BI__builtin_operator_delete:
4680 return interp__builtin_operator_delete(S, OpPC, Frame, Call);
4681
4682 case Builtin::BI__arithmetic_fence:
4683 return interp__builtin_arithmetic_fence(S, OpPC, Frame, Call);
4684
4685 case Builtin::BI__builtin_reduce_add:
4686 case Builtin::BI__builtin_reduce_mul:
4687 case Builtin::BI__builtin_reduce_and:
4688 case Builtin::BI__builtin_reduce_or:
4689 case Builtin::BI__builtin_reduce_xor:
4690 case Builtin::BI__builtin_reduce_min:
4691 case Builtin::BI__builtin_reduce_max:
4692 return interp__builtin_vector_reduce(S, OpPC, Call, ID: BuiltinID);
4693
4694 case Builtin::BI__builtin_elementwise_popcount:
4695 return interp__builtin_elementwise_int_unaryop(
4696 S, OpPC, Call, Fn: [](const APSInt &Src) {
4697 return APInt(Src.getBitWidth(), Src.popcount());
4698 });
4699 case Builtin::BI__builtin_elementwise_bitreverse:
4700 return interp__builtin_elementwise_int_unaryop(
4701 S, OpPC, Call, Fn: [](const APSInt &Src) { return Src.reverseBits(); });
4702
4703 case Builtin::BI__builtin_elementwise_abs:
4704 return interp__builtin_elementwise_abs(S, OpPC, Frame, Call, BuiltinID);
4705
4706 case Builtin::BI__builtin_memcpy:
4707 case Builtin::BImemcpy:
4708 case Builtin::BI__builtin_wmemcpy:
4709 case Builtin::BIwmemcpy:
4710 case Builtin::BI__builtin_memmove:
4711 case Builtin::BImemmove:
4712 case Builtin::BI__builtin_wmemmove:
4713 case Builtin::BIwmemmove:
4714 return interp__builtin_memcpy(S, OpPC, Frame, Call, ID: BuiltinID);
4715
4716 case Builtin::BI__builtin_memcmp:
4717 case Builtin::BImemcmp:
4718 case Builtin::BI__builtin_bcmp:
4719 case Builtin::BIbcmp:
4720 case Builtin::BI__builtin_wmemcmp:
4721 case Builtin::BIwmemcmp:
4722 return interp__builtin_memcmp(S, OpPC, Frame, Call, ID: BuiltinID);
4723
4724 case Builtin::BImemchr:
4725 case Builtin::BI__builtin_memchr:
4726 case Builtin::BIstrchr:
4727 case Builtin::BI__builtin_strchr:
4728 case Builtin::BIwmemchr:
4729 case Builtin::BI__builtin_wmemchr:
4730 case Builtin::BIwcschr:
4731 case Builtin::BI__builtin_wcschr:
4732 case Builtin::BI__builtin_char_memchr:
4733 return interp__builtin_memchr(S, OpPC, Call, ID: BuiltinID);
4734
4735 case Builtin::BI__builtin_object_size:
4736 case Builtin::BI__builtin_dynamic_object_size:
4737 return interp__builtin_object_size(S, OpPC, Frame, Call);
4738
4739 case Builtin::BI__builtin_is_within_lifetime:
4740 return interp__builtin_is_within_lifetime(S, OpPC, Call);
4741
4742 case Builtin::BI__builtin_elementwise_add_sat:
4743 return interp__builtin_elementwise_int_binop(
4744 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4745 return LHS.isSigned() ? LHS.sadd_sat(RHS) : LHS.uadd_sat(RHS);
4746 });
4747
4748 case Builtin::BI__builtin_elementwise_sub_sat:
4749 return interp__builtin_elementwise_int_binop(
4750 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4751 return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
4752 });
4753 case X86::BI__builtin_ia32_extract128i256:
4754 case X86::BI__builtin_ia32_vextractf128_pd256:
4755 case X86::BI__builtin_ia32_vextractf128_ps256:
4756 case X86::BI__builtin_ia32_vextractf128_si256:
4757 return interp__builtin_x86_extract_vector(S, OpPC, Call, ID: BuiltinID);
4758
4759 case X86::BI__builtin_ia32_extractf32x4_256_mask:
4760 case X86::BI__builtin_ia32_extractf32x4_mask:
4761 case X86::BI__builtin_ia32_extractf32x8_mask:
4762 case X86::BI__builtin_ia32_extractf64x2_256_mask:
4763 case X86::BI__builtin_ia32_extractf64x2_512_mask:
4764 case X86::BI__builtin_ia32_extractf64x4_mask:
4765 case X86::BI__builtin_ia32_extracti32x4_256_mask:
4766 case X86::BI__builtin_ia32_extracti32x4_mask:
4767 case X86::BI__builtin_ia32_extracti32x8_mask:
4768 case X86::BI__builtin_ia32_extracti64x2_256_mask:
4769 case X86::BI__builtin_ia32_extracti64x2_512_mask:
4770 case X86::BI__builtin_ia32_extracti64x4_mask:
4771 return interp__builtin_x86_extract_vector_masked(S, OpPC, Call, ID: BuiltinID);
4772
4773 case clang::X86::BI__builtin_ia32_pmulhrsw128:
4774 case clang::X86::BI__builtin_ia32_pmulhrsw256:
4775 case clang::X86::BI__builtin_ia32_pmulhrsw512:
4776 return interp__builtin_elementwise_int_binop(
4777 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4778 return (llvm::APIntOps::mulsExtended(C1: LHS, C2: RHS).ashr(ShiftAmt: 14) + 1)
4779 .extractBits(numBits: 16, bitPosition: 1);
4780 });
4781
4782 case clang::X86::BI__builtin_ia32_movmskps:
4783 case clang::X86::BI__builtin_ia32_movmskpd:
4784 case clang::X86::BI__builtin_ia32_pmovmskb128:
4785 case clang::X86::BI__builtin_ia32_pmovmskb256:
4786 case clang::X86::BI__builtin_ia32_movmskps256:
4787 case clang::X86::BI__builtin_ia32_movmskpd256: {
4788 return interp__builtin_ia32_movmsk_op(S, OpPC, Call);
4789 }
4790
4791 case X86::BI__builtin_ia32_psignb128:
4792 case X86::BI__builtin_ia32_psignb256:
4793 case X86::BI__builtin_ia32_psignw128:
4794 case X86::BI__builtin_ia32_psignw256:
4795 case X86::BI__builtin_ia32_psignd128:
4796 case X86::BI__builtin_ia32_psignd256:
4797 return interp__builtin_elementwise_int_binop(
4798 S, OpPC, Call, Fn: [](const APInt &AElem, const APInt &BElem) {
4799 if (BElem.isZero())
4800 return APInt::getZero(numBits: AElem.getBitWidth());
4801 if (BElem.isNegative())
4802 return -AElem;
4803 return AElem;
4804 });
4805
4806 case clang::X86::BI__builtin_ia32_pavgb128:
4807 case clang::X86::BI__builtin_ia32_pavgw128:
4808 case clang::X86::BI__builtin_ia32_pavgb256:
4809 case clang::X86::BI__builtin_ia32_pavgw256:
4810 case clang::X86::BI__builtin_ia32_pavgb512:
4811 case clang::X86::BI__builtin_ia32_pavgw512:
4812 return interp__builtin_elementwise_int_binop(S, OpPC, Call,
4813 Fn: llvm::APIntOps::avgCeilU);
4814
4815 case clang::X86::BI__builtin_ia32_pmaddubsw128:
4816 case clang::X86::BI__builtin_ia32_pmaddubsw256:
4817 case clang::X86::BI__builtin_ia32_pmaddubsw512:
4818 return interp__builtin_ia32_pmul(
4819 S, OpPC, Call,
4820 Fn: [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
4821 const APSInt &HiRHS) {
4822 unsigned BitWidth = 2 * LoLHS.getBitWidth();
4823 return (LoLHS.zext(width: BitWidth) * LoRHS.sext(width: BitWidth))
4824 .sadd_sat(RHS: (HiLHS.zext(width: BitWidth) * HiRHS.sext(width: BitWidth)));
4825 });
4826
4827 case clang::X86::BI__builtin_ia32_pmaddwd128:
4828 case clang::X86::BI__builtin_ia32_pmaddwd256:
4829 case clang::X86::BI__builtin_ia32_pmaddwd512:
4830 return interp__builtin_ia32_pmul(
4831 S, OpPC, Call,
4832 Fn: [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
4833 const APSInt &HiRHS) {
4834 unsigned BitWidth = 2 * LoLHS.getBitWidth();
4835 return (LoLHS.sext(width: BitWidth) * LoRHS.sext(width: BitWidth)) +
4836 (HiLHS.sext(width: BitWidth) * HiRHS.sext(width: BitWidth));
4837 });
4838
4839 case clang::X86::BI__builtin_ia32_pmulhuw128:
4840 case clang::X86::BI__builtin_ia32_pmulhuw256:
4841 case clang::X86::BI__builtin_ia32_pmulhuw512:
4842 return interp__builtin_elementwise_int_binop(S, OpPC, Call,
4843 Fn: llvm::APIntOps::mulhu);
4844
4845 case clang::X86::BI__builtin_ia32_pmulhw128:
4846 case clang::X86::BI__builtin_ia32_pmulhw256:
4847 case clang::X86::BI__builtin_ia32_pmulhw512:
4848 return interp__builtin_elementwise_int_binop(S, OpPC, Call,
4849 Fn: llvm::APIntOps::mulhs);
4850
4851 case clang::X86::BI__builtin_ia32_psllv2di:
4852 case clang::X86::BI__builtin_ia32_psllv4di:
4853 case clang::X86::BI__builtin_ia32_psllv4si:
4854 case clang::X86::BI__builtin_ia32_psllv8di:
4855 case clang::X86::BI__builtin_ia32_psllv8hi:
4856 case clang::X86::BI__builtin_ia32_psllv8si:
4857 case clang::X86::BI__builtin_ia32_psllv16hi:
4858 case clang::X86::BI__builtin_ia32_psllv16si:
4859 case clang::X86::BI__builtin_ia32_psllv32hi:
4860 case clang::X86::BI__builtin_ia32_psllwi128:
4861 case clang::X86::BI__builtin_ia32_psllwi256:
4862 case clang::X86::BI__builtin_ia32_psllwi512:
4863 case clang::X86::BI__builtin_ia32_pslldi128:
4864 case clang::X86::BI__builtin_ia32_pslldi256:
4865 case clang::X86::BI__builtin_ia32_pslldi512:
4866 case clang::X86::BI__builtin_ia32_psllqi128:
4867 case clang::X86::BI__builtin_ia32_psllqi256:
4868 case clang::X86::BI__builtin_ia32_psllqi512:
4869 return interp__builtin_elementwise_int_binop(
4870 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4871 if (RHS.uge(RHS: LHS.getBitWidth())) {
4872 return APInt::getZero(numBits: LHS.getBitWidth());
4873 }
4874 return LHS.shl(shiftAmt: RHS.getZExtValue());
4875 });
4876
4877 case clang::X86::BI__builtin_ia32_psrav4si:
4878 case clang::X86::BI__builtin_ia32_psrav8di:
4879 case clang::X86::BI__builtin_ia32_psrav8hi:
4880 case clang::X86::BI__builtin_ia32_psrav8si:
4881 case clang::X86::BI__builtin_ia32_psrav16hi:
4882 case clang::X86::BI__builtin_ia32_psrav16si:
4883 case clang::X86::BI__builtin_ia32_psrav32hi:
4884 case clang::X86::BI__builtin_ia32_psravq128:
4885 case clang::X86::BI__builtin_ia32_psravq256:
4886 case clang::X86::BI__builtin_ia32_psrawi128:
4887 case clang::X86::BI__builtin_ia32_psrawi256:
4888 case clang::X86::BI__builtin_ia32_psrawi512:
4889 case clang::X86::BI__builtin_ia32_psradi128:
4890 case clang::X86::BI__builtin_ia32_psradi256:
4891 case clang::X86::BI__builtin_ia32_psradi512:
4892 case clang::X86::BI__builtin_ia32_psraqi128:
4893 case clang::X86::BI__builtin_ia32_psraqi256:
4894 case clang::X86::BI__builtin_ia32_psraqi512:
4895 return interp__builtin_elementwise_int_binop(
4896 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4897 if (RHS.uge(RHS: LHS.getBitWidth())) {
4898 return LHS.ashr(ShiftAmt: LHS.getBitWidth() - 1);
4899 }
4900 return LHS.ashr(ShiftAmt: RHS.getZExtValue());
4901 });
4902
4903 case clang::X86::BI__builtin_ia32_psrlv2di:
4904 case clang::X86::BI__builtin_ia32_psrlv4di:
4905 case clang::X86::BI__builtin_ia32_psrlv4si:
4906 case clang::X86::BI__builtin_ia32_psrlv8di:
4907 case clang::X86::BI__builtin_ia32_psrlv8hi:
4908 case clang::X86::BI__builtin_ia32_psrlv8si:
4909 case clang::X86::BI__builtin_ia32_psrlv16hi:
4910 case clang::X86::BI__builtin_ia32_psrlv16si:
4911 case clang::X86::BI__builtin_ia32_psrlv32hi:
4912 case clang::X86::BI__builtin_ia32_psrlwi128:
4913 case clang::X86::BI__builtin_ia32_psrlwi256:
4914 case clang::X86::BI__builtin_ia32_psrlwi512:
4915 case clang::X86::BI__builtin_ia32_psrldi128:
4916 case clang::X86::BI__builtin_ia32_psrldi256:
4917 case clang::X86::BI__builtin_ia32_psrldi512:
4918 case clang::X86::BI__builtin_ia32_psrlqi128:
4919 case clang::X86::BI__builtin_ia32_psrlqi256:
4920 case clang::X86::BI__builtin_ia32_psrlqi512:
4921 return interp__builtin_elementwise_int_binop(
4922 S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4923 if (RHS.uge(RHS: LHS.getBitWidth())) {
4924 return APInt::getZero(numBits: LHS.getBitWidth());
4925 }
4926 return LHS.lshr(shiftAmt: RHS.getZExtValue());
4927 });
4928 case clang::X86::BI__builtin_ia32_packsswb128:
4929 case clang::X86::BI__builtin_ia32_packsswb256:
4930 case clang::X86::BI__builtin_ia32_packsswb512:
4931 case clang::X86::BI__builtin_ia32_packssdw128:
4932 case clang::X86::BI__builtin_ia32_packssdw256:
4933 case clang::X86::BI__builtin_ia32_packssdw512:
4934 return interp__builtin_x86_pack(S, OpPC, E: Call, PackFn: [](const APSInt &Src) {
4935 return APInt(Src).truncSSat(width: Src.getBitWidth() / 2);
4936 });
4937 case clang::X86::BI__builtin_ia32_packusdw128:
4938 case clang::X86::BI__builtin_ia32_packusdw256:
4939 case clang::X86::BI__builtin_ia32_packusdw512:
4940 case clang::X86::BI__builtin_ia32_packuswb128:
4941 case clang::X86::BI__builtin_ia32_packuswb256:
4942 case clang::X86::BI__builtin_ia32_packuswb512:
4943 return interp__builtin_x86_pack(S, OpPC, E: Call, PackFn: [](const APSInt &Src) {
4944 return APInt(Src).truncSSatU(width: Src.getBitWidth() / 2);
4945 });
4946
4947 case clang::X86::BI__builtin_ia32_selectss_128:
4948 case clang::X86::BI__builtin_ia32_selectsd_128:
4949 case clang::X86::BI__builtin_ia32_selectsh_128:
4950 case clang::X86::BI__builtin_ia32_selectsbf_128:
4951 return interp__builtin_select_scalar(S, Call);
4952 case clang::X86::BI__builtin_ia32_vprotbi:
4953 case clang::X86::BI__builtin_ia32_vprotdi:
4954 case clang::X86::BI__builtin_ia32_vprotqi:
4955 case clang::X86::BI__builtin_ia32_vprotwi:
4956 case clang::X86::BI__builtin_ia32_prold128:
4957 case clang::X86::BI__builtin_ia32_prold256:
4958 case clang::X86::BI__builtin_ia32_prold512:
4959 case clang::X86::BI__builtin_ia32_prolq128:
4960 case clang::X86::BI__builtin_ia32_prolq256:
4961 case clang::X86::BI__builtin_ia32_prolq512:
4962 return interp__builtin_elementwise_int_binop(
4963 S, OpPC, Call,
4964 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS.rotl(rotateAmt: RHS); });
4965
4966 case clang::X86::BI__builtin_ia32_prord128:
4967 case clang::X86::BI__builtin_ia32_prord256:
4968 case clang::X86::BI__builtin_ia32_prord512:
4969 case clang::X86::BI__builtin_ia32_prorq128:
4970 case clang::X86::BI__builtin_ia32_prorq256:
4971 case clang::X86::BI__builtin_ia32_prorq512:
4972 return interp__builtin_elementwise_int_binop(
4973 S, OpPC, Call,
4974 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS.rotr(rotateAmt: RHS); });
4975
4976 case Builtin::BI__builtin_elementwise_max:
4977 case Builtin::BI__builtin_elementwise_min:
4978 return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID);
4979
4980 case clang::X86::BI__builtin_ia32_phaddw128:
4981 case clang::X86::BI__builtin_ia32_phaddw256:
4982 case clang::X86::BI__builtin_ia32_phaddd128:
4983 case clang::X86::BI__builtin_ia32_phaddd256:
4984 return interp_builtin_horizontal_int_binop(
4985 S, OpPC, Call,
4986 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
4987 case clang::X86::BI__builtin_ia32_phaddsw128:
4988 case clang::X86::BI__builtin_ia32_phaddsw256:
4989 return interp_builtin_horizontal_int_binop(
4990 S, OpPC, Call,
4991 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS.sadd_sat(RHS); });
4992 case clang::X86::BI__builtin_ia32_phsubw128:
4993 case clang::X86::BI__builtin_ia32_phsubw256:
4994 case clang::X86::BI__builtin_ia32_phsubd128:
4995 case clang::X86::BI__builtin_ia32_phsubd256:
4996 return interp_builtin_horizontal_int_binop(
4997 S, OpPC, Call,
4998 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS - RHS; });
4999 case clang::X86::BI__builtin_ia32_phsubsw128:
5000 case clang::X86::BI__builtin_ia32_phsubsw256:
5001 return interp_builtin_horizontal_int_binop(
5002 S, OpPC, Call,
5003 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS.ssub_sat(RHS); });
5004 case clang::X86::BI__builtin_ia32_haddpd:
5005 case clang::X86::BI__builtin_ia32_haddps:
5006 case clang::X86::BI__builtin_ia32_haddpd256:
5007 case clang::X86::BI__builtin_ia32_haddps256:
5008 return interp_builtin_horizontal_fp_binop(
5009 S, OpPC, Call,
5010 Fn: [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) {
5011 APFloat F = LHS;
5012 F.add(RHS, RM);
5013 return F;
5014 });
5015 case clang::X86::BI__builtin_ia32_hsubpd:
5016 case clang::X86::BI__builtin_ia32_hsubps:
5017 case clang::X86::BI__builtin_ia32_hsubpd256:
5018 case clang::X86::BI__builtin_ia32_hsubps256:
5019 return interp_builtin_horizontal_fp_binop(
5020 S, OpPC, Call,
5021 Fn: [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) {
5022 APFloat F = LHS;
5023 F.subtract(RHS, RM);
5024 return F;
5025 });
5026 case clang::X86::BI__builtin_ia32_addsubpd:
5027 case clang::X86::BI__builtin_ia32_addsubps:
5028 case clang::X86::BI__builtin_ia32_addsubpd256:
5029 case clang::X86::BI__builtin_ia32_addsubps256:
5030 return interp__builtin_ia32_addsub(S, OpPC, Call);
5031
5032 case clang::X86::BI__builtin_ia32_pmuldq128:
5033 case clang::X86::BI__builtin_ia32_pmuldq256:
5034 case clang::X86::BI__builtin_ia32_pmuldq512:
5035 return interp__builtin_ia32_pmul(
5036 S, OpPC, Call,
5037 Fn: [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
5038 const APSInt &HiRHS) {
5039 return llvm::APIntOps::mulsExtended(C1: LoLHS, C2: LoRHS);
5040 });
5041
5042 case clang::X86::BI__builtin_ia32_pmuludq128:
5043 case clang::X86::BI__builtin_ia32_pmuludq256:
5044 case clang::X86::BI__builtin_ia32_pmuludq512:
5045 return interp__builtin_ia32_pmul(
5046 S, OpPC, Call,
5047 Fn: [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
5048 const APSInt &HiRHS) {
5049 return llvm::APIntOps::muluExtended(C1: LoLHS, C2: LoRHS);
5050 });
5051
5052 case clang::X86::BI__builtin_ia32_pclmulqdq128:
5053 case clang::X86::BI__builtin_ia32_pclmulqdq256:
5054 case clang::X86::BI__builtin_ia32_pclmulqdq512:
5055 return interp__builtin_ia32_pclmulqdq(S, OpPC, Call);
5056
5057 case Builtin::BI__builtin_elementwise_fma:
5058 return interp__builtin_elementwise_triop_fp(
5059 S, OpPC, Call,
5060 Fn: [](const APFloat &X, const APFloat &Y, const APFloat &Z,
5061 llvm::RoundingMode RM) {
5062 APFloat F = X;
5063 F.fusedMultiplyAdd(Multiplicand: Y, Addend: Z, RM);
5064 return F;
5065 });
5066
5067 case X86::BI__builtin_ia32_vpmadd52luq128:
5068 case X86::BI__builtin_ia32_vpmadd52luq256:
5069 case X86::BI__builtin_ia32_vpmadd52luq512:
5070 return interp__builtin_elementwise_triop(
5071 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B, const APSInt &C) {
5072 return A + (B.trunc(width: 52) * C.trunc(width: 52)).zext(width: 64);
5073 });
5074 case X86::BI__builtin_ia32_vpmadd52huq128:
5075 case X86::BI__builtin_ia32_vpmadd52huq256:
5076 case X86::BI__builtin_ia32_vpmadd52huq512:
5077 return interp__builtin_elementwise_triop(
5078 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B, const APSInt &C) {
5079 return A + llvm::APIntOps::mulhu(C1: B.trunc(width: 52), C2: C.trunc(width: 52)).zext(width: 64);
5080 });
5081
5082 case X86::BI__builtin_ia32_vpshldd128:
5083 case X86::BI__builtin_ia32_vpshldd256:
5084 case X86::BI__builtin_ia32_vpshldd512:
5085 case X86::BI__builtin_ia32_vpshldq128:
5086 case X86::BI__builtin_ia32_vpshldq256:
5087 case X86::BI__builtin_ia32_vpshldq512:
5088 case X86::BI__builtin_ia32_vpshldw128:
5089 case X86::BI__builtin_ia32_vpshldw256:
5090 case X86::BI__builtin_ia32_vpshldw512:
5091 return interp__builtin_elementwise_triop(
5092 S, OpPC, Call,
5093 Fn: [](const APSInt &Hi, const APSInt &Lo, const APSInt &Amt) {
5094 return llvm::APIntOps::fshl(Hi, Lo, Shift: Amt);
5095 });
5096
5097 case X86::BI__builtin_ia32_vpshrdd128:
5098 case X86::BI__builtin_ia32_vpshrdd256:
5099 case X86::BI__builtin_ia32_vpshrdd512:
5100 case X86::BI__builtin_ia32_vpshrdq128:
5101 case X86::BI__builtin_ia32_vpshrdq256:
5102 case X86::BI__builtin_ia32_vpshrdq512:
5103 case X86::BI__builtin_ia32_vpshrdw128:
5104 case X86::BI__builtin_ia32_vpshrdw256:
5105 case X86::BI__builtin_ia32_vpshrdw512:
5106 // NOTE: Reversed Hi/Lo operands.
5107 return interp__builtin_elementwise_triop(
5108 S, OpPC, Call,
5109 Fn: [](const APSInt &Lo, const APSInt &Hi, const APSInt &Amt) {
5110 return llvm::APIntOps::fshr(Hi, Lo, Shift: Amt);
5111 });
5112 case X86::BI__builtin_ia32_vpconflictsi_128:
5113 case X86::BI__builtin_ia32_vpconflictsi_256:
5114 case X86::BI__builtin_ia32_vpconflictsi_512:
5115 case X86::BI__builtin_ia32_vpconflictdi_128:
5116 case X86::BI__builtin_ia32_vpconflictdi_256:
5117 case X86::BI__builtin_ia32_vpconflictdi_512:
5118 return interp__builtin_ia32_vpconflict(S, OpPC, Call);
5119 case clang::X86::BI__builtin_ia32_blendpd:
5120 case clang::X86::BI__builtin_ia32_blendpd256:
5121 case clang::X86::BI__builtin_ia32_blendps:
5122 case clang::X86::BI__builtin_ia32_blendps256:
5123 case clang::X86::BI__builtin_ia32_pblendw128:
5124 case clang::X86::BI__builtin_ia32_pblendw256:
5125 case clang::X86::BI__builtin_ia32_pblendd128:
5126 case clang::X86::BI__builtin_ia32_pblendd256:
5127 return interp__builtin_ia32_shuffle_generic(
5128 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5129 // Bit index for mask.
5130 unsigned MaskBit = (ShuffleMask >> (DstIdx % 8)) & 0x1;
5131 unsigned SrcVecIdx = MaskBit ? 1 : 0; // 1 = TrueVec, 0 = FalseVec
5132 return std::pair<unsigned, int>{SrcVecIdx, static_cast<int>(DstIdx)};
5133 });
5134
5135
5136
5137 case clang::X86::BI__builtin_ia32_blendvpd:
5138 case clang::X86::BI__builtin_ia32_blendvpd256:
5139 case clang::X86::BI__builtin_ia32_blendvps:
5140 case clang::X86::BI__builtin_ia32_blendvps256:
5141 return interp__builtin_elementwise_triop_fp(
5142 S, OpPC, Call,
5143 Fn: [](const APFloat &F, const APFloat &T, const APFloat &C,
5144 llvm::RoundingMode) { return C.isNegative() ? T : F; });
5145
5146 case clang::X86::BI__builtin_ia32_pblendvb128:
5147 case clang::X86::BI__builtin_ia32_pblendvb256:
5148 return interp__builtin_elementwise_triop(
5149 S, OpPC, Call, Fn: [](const APSInt &F, const APSInt &T, const APSInt &C) {
5150 return ((APInt)C).isNegative() ? T : F;
5151 });
5152 case X86::BI__builtin_ia32_ptestz128:
5153 case X86::BI__builtin_ia32_ptestz256:
5154 case X86::BI__builtin_ia32_vtestzps:
5155 case X86::BI__builtin_ia32_vtestzps256:
5156 case X86::BI__builtin_ia32_vtestzpd:
5157 case X86::BI__builtin_ia32_vtestzpd256:
5158 return interp__builtin_ia32_test_op(
5159 S, OpPC, Call,
5160 Fn: [](const APInt &A, const APInt &B) { return (A & B) == 0; });
5161 case X86::BI__builtin_ia32_ptestc128:
5162 case X86::BI__builtin_ia32_ptestc256:
5163 case X86::BI__builtin_ia32_vtestcps:
5164 case X86::BI__builtin_ia32_vtestcps256:
5165 case X86::BI__builtin_ia32_vtestcpd:
5166 case X86::BI__builtin_ia32_vtestcpd256:
5167 return interp__builtin_ia32_test_op(
5168 S, OpPC, Call,
5169 Fn: [](const APInt &A, const APInt &B) { return (~A & B) == 0; });
5170 case X86::BI__builtin_ia32_ptestnzc128:
5171 case X86::BI__builtin_ia32_ptestnzc256:
5172 case X86::BI__builtin_ia32_vtestnzcps:
5173 case X86::BI__builtin_ia32_vtestnzcps256:
5174 case X86::BI__builtin_ia32_vtestnzcpd:
5175 case X86::BI__builtin_ia32_vtestnzcpd256:
5176 return interp__builtin_ia32_test_op(
5177 S, OpPC, Call, Fn: [](const APInt &A, const APInt &B) {
5178 return ((A & B) != 0) && ((~A & B) != 0);
5179 });
5180 case X86::BI__builtin_ia32_selectb_128:
5181 case X86::BI__builtin_ia32_selectb_256:
5182 case X86::BI__builtin_ia32_selectb_512:
5183 case X86::BI__builtin_ia32_selectw_128:
5184 case X86::BI__builtin_ia32_selectw_256:
5185 case X86::BI__builtin_ia32_selectw_512:
5186 case X86::BI__builtin_ia32_selectd_128:
5187 case X86::BI__builtin_ia32_selectd_256:
5188 case X86::BI__builtin_ia32_selectd_512:
5189 case X86::BI__builtin_ia32_selectq_128:
5190 case X86::BI__builtin_ia32_selectq_256:
5191 case X86::BI__builtin_ia32_selectq_512:
5192 case X86::BI__builtin_ia32_selectph_128:
5193 case X86::BI__builtin_ia32_selectph_256:
5194 case X86::BI__builtin_ia32_selectph_512:
5195 case X86::BI__builtin_ia32_selectpbf_128:
5196 case X86::BI__builtin_ia32_selectpbf_256:
5197 case X86::BI__builtin_ia32_selectpbf_512:
5198 case X86::BI__builtin_ia32_selectps_128:
5199 case X86::BI__builtin_ia32_selectps_256:
5200 case X86::BI__builtin_ia32_selectps_512:
5201 case X86::BI__builtin_ia32_selectpd_128:
5202 case X86::BI__builtin_ia32_selectpd_256:
5203 case X86::BI__builtin_ia32_selectpd_512:
5204 return interp__builtin_select(S, OpPC, Call);
5205
5206 case X86::BI__builtin_ia32_shufps:
5207 case X86::BI__builtin_ia32_shufps256:
5208 case X86::BI__builtin_ia32_shufps512:
5209 return interp__builtin_ia32_shuffle_generic(
5210 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5211 unsigned NumElemPerLane = 4;
5212 unsigned NumSelectableElems = NumElemPerLane / 2;
5213 unsigned BitsPerElem = 2;
5214 unsigned IndexMask = 0x3;
5215 unsigned MaskBits = 8;
5216 unsigned Lane = DstIdx / NumElemPerLane;
5217 unsigned ElemInLane = DstIdx % NumElemPerLane;
5218 unsigned LaneOffset = Lane * NumElemPerLane;
5219 unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0;
5220 unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
5221 unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
5222 return std::pair<unsigned, int>{SrcIdx,
5223 static_cast<int>(LaneOffset + Index)};
5224 });
5225 case X86::BI__builtin_ia32_shufpd:
5226 case X86::BI__builtin_ia32_shufpd256:
5227 case X86::BI__builtin_ia32_shufpd512:
5228 return interp__builtin_ia32_shuffle_generic(
5229 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5230 unsigned NumElemPerLane = 2;
5231 unsigned NumSelectableElems = NumElemPerLane / 2;
5232 unsigned BitsPerElem = 1;
5233 unsigned IndexMask = 0x1;
5234 unsigned MaskBits = 8;
5235 unsigned Lane = DstIdx / NumElemPerLane;
5236 unsigned ElemInLane = DstIdx % NumElemPerLane;
5237 unsigned LaneOffset = Lane * NumElemPerLane;
5238 unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0;
5239 unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
5240 unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
5241 return std::pair<unsigned, int>{SrcIdx,
5242 static_cast<int>(LaneOffset + Index)};
5243 });
5244
5245 case X86::BI__builtin_ia32_vgf2p8affineinvqb_v16qi:
5246 case X86::BI__builtin_ia32_vgf2p8affineinvqb_v32qi:
5247 case X86::BI__builtin_ia32_vgf2p8affineinvqb_v64qi:
5248 return interp_builtin_ia32_gfni_affine(S, OpPC, Call, Inverse: true);
5249 case X86::BI__builtin_ia32_vgf2p8affineqb_v16qi:
5250 case X86::BI__builtin_ia32_vgf2p8affineqb_v32qi:
5251 case X86::BI__builtin_ia32_vgf2p8affineqb_v64qi:
5252 return interp_builtin_ia32_gfni_affine(S, OpPC, Call, Inverse: false);
5253
5254 case X86::BI__builtin_ia32_vgf2p8mulb_v16qi:
5255 case X86::BI__builtin_ia32_vgf2p8mulb_v32qi:
5256 case X86::BI__builtin_ia32_vgf2p8mulb_v64qi:
5257 return interp__builtin_ia32_gfni_mul(S, OpPC, Call);
5258
5259 case X86::BI__builtin_ia32_insertps128:
5260 return interp__builtin_ia32_shuffle_generic(
5261 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned Mask) {
5262 // Bits [3:0]: zero mask - if bit is set, zero this element
5263 if ((Mask & (1 << DstIdx)) != 0) {
5264 return std::pair<unsigned, int>{0, -1};
5265 }
5266 // Bits [7:6]: select element from source vector Y (0-3)
5267 // Bits [5:4]: select destination position (0-3)
5268 unsigned SrcElem = (Mask >> 6) & 0x3;
5269 unsigned DstElem = (Mask >> 4) & 0x3;
5270 if (DstIdx == DstElem) {
5271 // Insert element from source vector (B) at this position
5272 return std::pair<unsigned, int>{1, static_cast<int>(SrcElem)};
5273 } else {
5274 // Copy from destination vector (A)
5275 return std::pair<unsigned, int>{0, static_cast<int>(DstIdx)};
5276 }
5277 });
5278 case X86::BI__builtin_ia32_permvarsi256:
5279 case X86::BI__builtin_ia32_permvarsf256:
5280 case X86::BI__builtin_ia32_permvardf512:
5281 case X86::BI__builtin_ia32_permvardi512:
5282 case X86::BI__builtin_ia32_permvarhi128:
5283 return interp__builtin_ia32_shuffle_generic(
5284 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5285 int Offset = ShuffleMask & 0x7;
5286 return std::pair<unsigned, int>{0, Offset};
5287 });
5288 case X86::BI__builtin_ia32_permvarqi128:
5289 case X86::BI__builtin_ia32_permvarhi256:
5290 case X86::BI__builtin_ia32_permvarsi512:
5291 case X86::BI__builtin_ia32_permvarsf512:
5292 return interp__builtin_ia32_shuffle_generic(
5293 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5294 int Offset = ShuffleMask & 0xF;
5295 return std::pair<unsigned, int>{0, Offset};
5296 });
5297 case X86::BI__builtin_ia32_permvardi256:
5298 case X86::BI__builtin_ia32_permvardf256:
5299 return interp__builtin_ia32_shuffle_generic(
5300 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5301 int Offset = ShuffleMask & 0x3;
5302 return std::pair<unsigned, int>{0, Offset};
5303 });
5304 case X86::BI__builtin_ia32_permvarqi256:
5305 case X86::BI__builtin_ia32_permvarhi512:
5306 return interp__builtin_ia32_shuffle_generic(
5307 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5308 int Offset = ShuffleMask & 0x1F;
5309 return std::pair<unsigned, int>{0, Offset};
5310 });
5311 case X86::BI__builtin_ia32_permvarqi512:
5312 return interp__builtin_ia32_shuffle_generic(
5313 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5314 int Offset = ShuffleMask & 0x3F;
5315 return std::pair<unsigned, int>{0, Offset};
5316 });
5317 case X86::BI__builtin_ia32_vpermi2varq128:
5318 case X86::BI__builtin_ia32_vpermi2varpd128:
5319 return interp__builtin_ia32_shuffle_generic(
5320 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5321 int Offset = ShuffleMask & 0x1;
5322 unsigned SrcIdx = (ShuffleMask >> 1) & 0x1;
5323 return std::pair<unsigned, int>{SrcIdx, Offset};
5324 });
5325 case X86::BI__builtin_ia32_vpermi2vard128:
5326 case X86::BI__builtin_ia32_vpermi2varps128:
5327 case X86::BI__builtin_ia32_vpermi2varq256:
5328 case X86::BI__builtin_ia32_vpermi2varpd256:
5329 return interp__builtin_ia32_shuffle_generic(
5330 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5331 int Offset = ShuffleMask & 0x3;
5332 unsigned SrcIdx = (ShuffleMask >> 2) & 0x1;
5333 return std::pair<unsigned, int>{SrcIdx, Offset};
5334 });
5335 case X86::BI__builtin_ia32_vpermi2varhi128:
5336 case X86::BI__builtin_ia32_vpermi2vard256:
5337 case X86::BI__builtin_ia32_vpermi2varps256:
5338 case X86::BI__builtin_ia32_vpermi2varq512:
5339 case X86::BI__builtin_ia32_vpermi2varpd512:
5340 return interp__builtin_ia32_shuffle_generic(
5341 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5342 int Offset = ShuffleMask & 0x7;
5343 unsigned SrcIdx = (ShuffleMask >> 3) & 0x1;
5344 return std::pair<unsigned, int>{SrcIdx, Offset};
5345 });
5346 case X86::BI__builtin_ia32_vpermi2varqi128:
5347 case X86::BI__builtin_ia32_vpermi2varhi256:
5348 case X86::BI__builtin_ia32_vpermi2vard512:
5349 case X86::BI__builtin_ia32_vpermi2varps512:
5350 return interp__builtin_ia32_shuffle_generic(
5351 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5352 int Offset = ShuffleMask & 0xF;
5353 unsigned SrcIdx = (ShuffleMask >> 4) & 0x1;
5354 return std::pair<unsigned, int>{SrcIdx, Offset};
5355 });
5356 case X86::BI__builtin_ia32_vpermi2varqi256:
5357 case X86::BI__builtin_ia32_vpermi2varhi512:
5358 return interp__builtin_ia32_shuffle_generic(
5359 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5360 int Offset = ShuffleMask & 0x1F;
5361 unsigned SrcIdx = (ShuffleMask >> 5) & 0x1;
5362 return std::pair<unsigned, int>{SrcIdx, Offset};
5363 });
5364 case X86::BI__builtin_ia32_vpermi2varqi512:
5365 return interp__builtin_ia32_shuffle_generic(
5366 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5367 int Offset = ShuffleMask & 0x3F;
5368 unsigned SrcIdx = (ShuffleMask >> 6) & 0x1;
5369 return std::pair<unsigned, int>{SrcIdx, Offset};
5370 });
5371 case X86::BI__builtin_ia32_vperm2f128_pd256:
5372 case X86::BI__builtin_ia32_vperm2f128_ps256:
5373 case X86::BI__builtin_ia32_vperm2f128_si256:
5374 case X86::BI__builtin_ia32_permti256: {
5375 unsigned NumElements =
5376 Call->getArg(Arg: 0)->getType()->castAs<VectorType>()->getNumElements();
5377 unsigned PreservedBitsCnt = NumElements >> 2;
5378 return interp__builtin_ia32_shuffle_generic(
5379 S, OpPC, Call,
5380 GetSourceIndex: [PreservedBitsCnt](unsigned DstIdx, unsigned ShuffleMask) {
5381 unsigned ControlBitsCnt = DstIdx >> PreservedBitsCnt << 2;
5382 unsigned ControlBits = ShuffleMask >> ControlBitsCnt;
5383
5384 if (ControlBits & 0b1000)
5385 return std::make_pair(x: 0u, y: -1);
5386
5387 unsigned SrcVecIdx = (ControlBits & 0b10) >> 1;
5388 unsigned PreservedBitsMask = (1 << PreservedBitsCnt) - 1;
5389 int SrcIdx = ((ControlBits & 0b1) << PreservedBitsCnt) |
5390 (DstIdx & PreservedBitsMask);
5391 return std::make_pair(x&: SrcVecIdx, y&: SrcIdx);
5392 });
5393 }
5394 case X86::BI__builtin_ia32_pshufb128:
5395 case X86::BI__builtin_ia32_pshufb256:
5396 case X86::BI__builtin_ia32_pshufb512:
5397 return interp__builtin_ia32_shuffle_generic(
5398 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5399 uint8_t Ctlb = static_cast<uint8_t>(ShuffleMask);
5400 if (Ctlb & 0x80)
5401 return std::make_pair(x: 0, y: -1);
5402
5403 unsigned LaneBase = (DstIdx / 16) * 16;
5404 unsigned SrcOffset = Ctlb & 0x0F;
5405 unsigned SrcIdx = LaneBase + SrcOffset;
5406 return std::make_pair(x: 0, y: static_cast<int>(SrcIdx));
5407 });
5408
5409 case X86::BI__builtin_ia32_pshuflw:
5410 case X86::BI__builtin_ia32_pshuflw256:
5411 case X86::BI__builtin_ia32_pshuflw512:
5412 return interp__builtin_ia32_shuffle_generic(
5413 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5414 unsigned LaneBase = (DstIdx / 8) * 8;
5415 unsigned LaneIdx = DstIdx % 8;
5416 if (LaneIdx < 4) {
5417 unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3;
5418 return std::make_pair(x: 0, y: static_cast<int>(LaneBase + Sel));
5419 }
5420
5421 return std::make_pair(x: 0, y: static_cast<int>(DstIdx));
5422 });
5423
5424 case X86::BI__builtin_ia32_pshufhw:
5425 case X86::BI__builtin_ia32_pshufhw256:
5426 case X86::BI__builtin_ia32_pshufhw512:
5427 return interp__builtin_ia32_shuffle_generic(
5428 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5429 unsigned LaneBase = (DstIdx / 8) * 8;
5430 unsigned LaneIdx = DstIdx % 8;
5431 if (LaneIdx >= 4) {
5432 unsigned Sel = (ShuffleMask >> (2 * (LaneIdx - 4))) & 0x3;
5433 return std::make_pair(x: 0, y: static_cast<int>(LaneBase + 4 + Sel));
5434 }
5435
5436 return std::make_pair(x: 0, y: static_cast<int>(DstIdx));
5437 });
5438
5439 case X86::BI__builtin_ia32_pshufd:
5440 case X86::BI__builtin_ia32_pshufd256:
5441 case X86::BI__builtin_ia32_pshufd512:
5442 case X86::BI__builtin_ia32_vpermilps:
5443 case X86::BI__builtin_ia32_vpermilps256:
5444 case X86::BI__builtin_ia32_vpermilps512:
5445 return interp__builtin_ia32_shuffle_generic(
5446 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5447 unsigned LaneBase = (DstIdx / 4) * 4;
5448 unsigned LaneIdx = DstIdx % 4;
5449 unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3;
5450 return std::make_pair(x: 0, y: static_cast<int>(LaneBase + Sel));
5451 });
5452
5453 case X86::BI__builtin_ia32_vpermilvarpd:
5454 case X86::BI__builtin_ia32_vpermilvarpd256:
5455 case X86::BI__builtin_ia32_vpermilvarpd512:
5456 return interp__builtin_ia32_shuffle_generic(
5457 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5458 unsigned NumElemPerLane = 2;
5459 unsigned Lane = DstIdx / NumElemPerLane;
5460 unsigned Offset = ShuffleMask & 0b10 ? 1 : 0;
5461 return std::make_pair(
5462 x: 0, y: static_cast<int>(Lane * NumElemPerLane + Offset));
5463 });
5464
5465 case X86::BI__builtin_ia32_vpermilvarps:
5466 case X86::BI__builtin_ia32_vpermilvarps256:
5467 case X86::BI__builtin_ia32_vpermilvarps512:
5468 return interp__builtin_ia32_shuffle_generic(
5469 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5470 unsigned NumElemPerLane = 4;
5471 unsigned Lane = DstIdx / NumElemPerLane;
5472 unsigned Offset = ShuffleMask & 0b11;
5473 return std::make_pair(
5474 x: 0, y: static_cast<int>(Lane * NumElemPerLane + Offset));
5475 });
5476
5477 case X86::BI__builtin_ia32_vpermilpd:
5478 case X86::BI__builtin_ia32_vpermilpd256:
5479 case X86::BI__builtin_ia32_vpermilpd512:
5480 return interp__builtin_ia32_shuffle_generic(
5481 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned Control) {
5482 unsigned NumElemPerLane = 2;
5483 unsigned BitsPerElem = 1;
5484 unsigned MaskBits = 8;
5485 unsigned IndexMask = 0x1;
5486 unsigned Lane = DstIdx / NumElemPerLane;
5487 unsigned LaneOffset = Lane * NumElemPerLane;
5488 unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
5489 unsigned Index = (Control >> BitIndex) & IndexMask;
5490 return std::make_pair(x: 0, y: static_cast<int>(LaneOffset + Index));
5491 });
5492
5493 case X86::BI__builtin_ia32_permdf256:
5494 case X86::BI__builtin_ia32_permdi256:
5495 return interp__builtin_ia32_shuffle_generic(
5496 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned Control) {
5497 // permute4x64 operates on 4 64-bit elements
5498 // For element i (0-3), extract bits [2*i+1:2*i] from Control
5499 unsigned Index = (Control >> (2 * DstIdx)) & 0x3;
5500 return std::make_pair(x: 0, y: static_cast<int>(Index));
5501 });
5502
5503 case X86::BI__builtin_ia32_vpmultishiftqb128:
5504 case X86::BI__builtin_ia32_vpmultishiftqb256:
5505 case X86::BI__builtin_ia32_vpmultishiftqb512:
5506 return interp__builtin_ia32_multishiftqb(S, OpPC, Call);
5507 case X86::BI__builtin_ia32_kandqi:
5508 case X86::BI__builtin_ia32_kandhi:
5509 case X86::BI__builtin_ia32_kandsi:
5510 case X86::BI__builtin_ia32_kanddi:
5511 return interp__builtin_elementwise_int_binop(
5512 S, OpPC, Call,
5513 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS & RHS; });
5514
5515 case X86::BI__builtin_ia32_kandnqi:
5516 case X86::BI__builtin_ia32_kandnhi:
5517 case X86::BI__builtin_ia32_kandnsi:
5518 case X86::BI__builtin_ia32_kandndi:
5519 return interp__builtin_elementwise_int_binop(
5520 S, OpPC, Call,
5521 Fn: [](const APSInt &LHS, const APSInt &RHS) { return ~LHS & RHS; });
5522
5523 case X86::BI__builtin_ia32_korqi:
5524 case X86::BI__builtin_ia32_korhi:
5525 case X86::BI__builtin_ia32_korsi:
5526 case X86::BI__builtin_ia32_kordi:
5527 return interp__builtin_elementwise_int_binop(
5528 S, OpPC, Call,
5529 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS | RHS; });
5530
5531 case X86::BI__builtin_ia32_kxnorqi:
5532 case X86::BI__builtin_ia32_kxnorhi:
5533 case X86::BI__builtin_ia32_kxnorsi:
5534 case X86::BI__builtin_ia32_kxnordi:
5535 return interp__builtin_elementwise_int_binop(
5536 S, OpPC, Call,
5537 Fn: [](const APSInt &LHS, const APSInt &RHS) { return ~(LHS ^ RHS); });
5538
5539 case X86::BI__builtin_ia32_kxorqi:
5540 case X86::BI__builtin_ia32_kxorhi:
5541 case X86::BI__builtin_ia32_kxorsi:
5542 case X86::BI__builtin_ia32_kxordi:
5543 return interp__builtin_elementwise_int_binop(
5544 S, OpPC, Call,
5545 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS ^ RHS; });
5546
5547 case X86::BI__builtin_ia32_knotqi:
5548 case X86::BI__builtin_ia32_knothi:
5549 case X86::BI__builtin_ia32_knotsi:
5550 case X86::BI__builtin_ia32_knotdi:
5551 return interp__builtin_elementwise_int_unaryop(
5552 S, OpPC, Call, Fn: [](const APSInt &Src) { return ~Src; });
5553
5554 case X86::BI__builtin_ia32_kaddqi:
5555 case X86::BI__builtin_ia32_kaddhi:
5556 case X86::BI__builtin_ia32_kaddsi:
5557 case X86::BI__builtin_ia32_kadddi:
5558 return interp__builtin_elementwise_int_binop(
5559 S, OpPC, Call,
5560 Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
5561
5562 case X86::BI__builtin_ia32_kmovb:
5563 case X86::BI__builtin_ia32_kmovw:
5564 case X86::BI__builtin_ia32_kmovd:
5565 case X86::BI__builtin_ia32_kmovq:
5566 return interp__builtin_elementwise_int_unaryop(
5567 S, OpPC, Call, Fn: [](const APSInt &Src) { return Src; });
5568
5569 case X86::BI__builtin_ia32_kunpckhi:
5570 case X86::BI__builtin_ia32_kunpckdi:
5571 case X86::BI__builtin_ia32_kunpcksi:
5572 return interp__builtin_elementwise_int_binop(
5573 S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
5574 // Generic kunpack: extract lower half of each operand and concatenate
5575 // Result = A[HalfWidth-1:0] concat B[HalfWidth-1:0]
5576 unsigned BW = A.getBitWidth();
5577 return APSInt(A.trunc(width: BW / 2).concat(NewLSB: B.trunc(width: BW / 2)),
5578 A.isUnsigned());
5579 });
5580
5581 case X86::BI__builtin_ia32_phminposuw128:
5582 return interp__builtin_ia32_phminposuw(S, OpPC, Call);
5583
5584 case X86::BI__builtin_ia32_psraq128:
5585 case X86::BI__builtin_ia32_psraq256:
5586 case X86::BI__builtin_ia32_psraq512:
5587 case X86::BI__builtin_ia32_psrad128:
5588 case X86::BI__builtin_ia32_psrad256:
5589 case X86::BI__builtin_ia32_psrad512:
5590 case X86::BI__builtin_ia32_psraw128:
5591 case X86::BI__builtin_ia32_psraw256:
5592 case X86::BI__builtin_ia32_psraw512:
5593 return interp__builtin_ia32_shift_with_count(
5594 S, OpPC, Call,
5595 ShiftOp: [](const APInt &Elt, uint64_t Count) { return Elt.ashr(ShiftAmt: Count); },
5596 OverflowOp: [](const APInt &Elt, unsigned Width) { return Elt.ashr(ShiftAmt: Width - 1); });
5597
5598 case X86::BI__builtin_ia32_psllq128:
5599 case X86::BI__builtin_ia32_psllq256:
5600 case X86::BI__builtin_ia32_psllq512:
5601 case X86::BI__builtin_ia32_pslld128:
5602 case X86::BI__builtin_ia32_pslld256:
5603 case X86::BI__builtin_ia32_pslld512:
5604 case X86::BI__builtin_ia32_psllw128:
5605 case X86::BI__builtin_ia32_psllw256:
5606 case X86::BI__builtin_ia32_psllw512:
5607 return interp__builtin_ia32_shift_with_count(
5608 S, OpPC, Call,
5609 ShiftOp: [](const APInt &Elt, uint64_t Count) { return Elt.shl(shiftAmt: Count); },
5610 OverflowOp: [](const APInt &Elt, unsigned Width) { return APInt::getZero(numBits: Width); });
5611
5612 case X86::BI__builtin_ia32_psrlq128:
5613 case X86::BI__builtin_ia32_psrlq256:
5614 case X86::BI__builtin_ia32_psrlq512:
5615 case X86::BI__builtin_ia32_psrld128:
5616 case X86::BI__builtin_ia32_psrld256:
5617 case X86::BI__builtin_ia32_psrld512:
5618 case X86::BI__builtin_ia32_psrlw128:
5619 case X86::BI__builtin_ia32_psrlw256:
5620 case X86::BI__builtin_ia32_psrlw512:
5621 return interp__builtin_ia32_shift_with_count(
5622 S, OpPC, Call,
5623 ShiftOp: [](const APInt &Elt, uint64_t Count) { return Elt.lshr(shiftAmt: Count); },
5624 OverflowOp: [](const APInt &Elt, unsigned Width) { return APInt::getZero(numBits: Width); });
5625
5626 case X86::BI__builtin_ia32_pternlogd128_mask:
5627 case X86::BI__builtin_ia32_pternlogd256_mask:
5628 case X86::BI__builtin_ia32_pternlogd512_mask:
5629 case X86::BI__builtin_ia32_pternlogq128_mask:
5630 case X86::BI__builtin_ia32_pternlogq256_mask:
5631 case X86::BI__builtin_ia32_pternlogq512_mask:
5632 return interp__builtin_ia32_pternlog(S, OpPC, Call, /*MaskZ=*/false);
5633 case X86::BI__builtin_ia32_pternlogd128_maskz:
5634 case X86::BI__builtin_ia32_pternlogd256_maskz:
5635 case X86::BI__builtin_ia32_pternlogd512_maskz:
5636 case X86::BI__builtin_ia32_pternlogq128_maskz:
5637 case X86::BI__builtin_ia32_pternlogq256_maskz:
5638 case X86::BI__builtin_ia32_pternlogq512_maskz:
5639 return interp__builtin_ia32_pternlog(S, OpPC, Call, /*MaskZ=*/true);
5640 case Builtin::BI__builtin_elementwise_fshl:
5641 return interp__builtin_elementwise_triop(S, OpPC, Call,
5642 Fn: llvm::APIntOps::fshl);
5643 case Builtin::BI__builtin_elementwise_fshr:
5644 return interp__builtin_elementwise_triop(S, OpPC, Call,
5645 Fn: llvm::APIntOps::fshr);
5646
5647 case X86::BI__builtin_ia32_shuf_f32x4_256:
5648 case X86::BI__builtin_ia32_shuf_i32x4_256:
5649 case X86::BI__builtin_ia32_shuf_f64x2_256:
5650 case X86::BI__builtin_ia32_shuf_i64x2_256:
5651 case X86::BI__builtin_ia32_shuf_f32x4:
5652 case X86::BI__builtin_ia32_shuf_i32x4:
5653 case X86::BI__builtin_ia32_shuf_f64x2:
5654 case X86::BI__builtin_ia32_shuf_i64x2: {
5655 // Destination and sources A, B all have the same type.
5656 QualType VecQT = Call->getArg(Arg: 0)->getType();
5657 const auto *VecT = VecQT->castAs<VectorType>();
5658 unsigned NumElems = VecT->getNumElements();
5659 unsigned ElemBits = S.getASTContext().getTypeSize(T: VecT->getElementType());
5660 unsigned LaneBits = 128u;
5661 unsigned NumLanes = (NumElems * ElemBits) / LaneBits;
5662 unsigned NumElemsPerLane = LaneBits / ElemBits;
5663
5664 return interp__builtin_ia32_shuffle_generic(
5665 S, OpPC, Call,
5666 GetSourceIndex: [NumLanes, NumElemsPerLane](unsigned DstIdx, unsigned ShuffleMask) {
5667 // DstIdx determines source. ShuffleMask selects lane in source.
5668 unsigned BitsPerElem = NumLanes / 2;
5669 unsigned IndexMask = (1u << BitsPerElem) - 1;
5670 unsigned Lane = DstIdx / NumElemsPerLane;
5671 unsigned SrcIdx = (Lane < NumLanes / 2) ? 0 : 1;
5672 unsigned BitIdx = BitsPerElem * Lane;
5673 unsigned SrcLaneIdx = (ShuffleMask >> BitIdx) & IndexMask;
5674 unsigned ElemInLane = DstIdx % NumElemsPerLane;
5675 unsigned IdxToPick = SrcLaneIdx * NumElemsPerLane + ElemInLane;
5676 return std::pair<unsigned, int>{SrcIdx, IdxToPick};
5677 });
5678 }
5679
5680 case X86::BI__builtin_ia32_insertf32x4_256:
5681 case X86::BI__builtin_ia32_inserti32x4_256:
5682 case X86::BI__builtin_ia32_insertf64x2_256:
5683 case X86::BI__builtin_ia32_inserti64x2_256:
5684 case X86::BI__builtin_ia32_insertf32x4:
5685 case X86::BI__builtin_ia32_inserti32x4:
5686 case X86::BI__builtin_ia32_insertf64x2_512:
5687 case X86::BI__builtin_ia32_inserti64x2_512:
5688 case X86::BI__builtin_ia32_insertf32x8:
5689 case X86::BI__builtin_ia32_inserti32x8:
5690 case X86::BI__builtin_ia32_insertf64x4:
5691 case X86::BI__builtin_ia32_inserti64x4:
5692 case X86::BI__builtin_ia32_vinsertf128_ps256:
5693 case X86::BI__builtin_ia32_vinsertf128_pd256:
5694 case X86::BI__builtin_ia32_vinsertf128_si256:
5695 case X86::BI__builtin_ia32_insert128i256:
5696 return interp__builtin_x86_insert_subvector(S, OpPC, Call, ID: BuiltinID);
5697
5698 case clang::X86::BI__builtin_ia32_vcvtps2ph:
5699 case clang::X86::BI__builtin_ia32_vcvtps2ph256:
5700 return interp__builtin_ia32_vcvtps2ph(S, OpPC, Call);
5701
5702 case X86::BI__builtin_ia32_vec_ext_v4hi:
5703 case X86::BI__builtin_ia32_vec_ext_v16qi:
5704 case X86::BI__builtin_ia32_vec_ext_v8hi:
5705 case X86::BI__builtin_ia32_vec_ext_v4si:
5706 case X86::BI__builtin_ia32_vec_ext_v2di:
5707 case X86::BI__builtin_ia32_vec_ext_v32qi:
5708 case X86::BI__builtin_ia32_vec_ext_v16hi:
5709 case X86::BI__builtin_ia32_vec_ext_v8si:
5710 case X86::BI__builtin_ia32_vec_ext_v4di:
5711 case X86::BI__builtin_ia32_vec_ext_v4sf:
5712 return interp__builtin_vec_ext(S, OpPC, Call, ID: BuiltinID);
5713
5714 case X86::BI__builtin_ia32_vec_set_v4hi:
5715 case X86::BI__builtin_ia32_vec_set_v16qi:
5716 case X86::BI__builtin_ia32_vec_set_v8hi:
5717 case X86::BI__builtin_ia32_vec_set_v4si:
5718 case X86::BI__builtin_ia32_vec_set_v2di:
5719 case X86::BI__builtin_ia32_vec_set_v32qi:
5720 case X86::BI__builtin_ia32_vec_set_v16hi:
5721 case X86::BI__builtin_ia32_vec_set_v8si:
5722 case X86::BI__builtin_ia32_vec_set_v4di:
5723 return interp__builtin_vec_set(S, OpPC, Call, ID: BuiltinID);
5724
5725 case X86::BI__builtin_ia32_cvtb2mask128:
5726 case X86::BI__builtin_ia32_cvtb2mask256:
5727 case X86::BI__builtin_ia32_cvtb2mask512:
5728 case X86::BI__builtin_ia32_cvtw2mask128:
5729 case X86::BI__builtin_ia32_cvtw2mask256:
5730 case X86::BI__builtin_ia32_cvtw2mask512:
5731 case X86::BI__builtin_ia32_cvtd2mask128:
5732 case X86::BI__builtin_ia32_cvtd2mask256:
5733 case X86::BI__builtin_ia32_cvtd2mask512:
5734 case X86::BI__builtin_ia32_cvtq2mask128:
5735 case X86::BI__builtin_ia32_cvtq2mask256:
5736 case X86::BI__builtin_ia32_cvtq2mask512:
5737 return interp__builtin_ia32_cvt_vec2mask(S, OpPC, Call, ID: BuiltinID);
5738
5739 case X86::BI__builtin_ia32_cvtmask2b128:
5740 case X86::BI__builtin_ia32_cvtmask2b256:
5741 case X86::BI__builtin_ia32_cvtmask2b512:
5742 case X86::BI__builtin_ia32_cvtmask2w128:
5743 case X86::BI__builtin_ia32_cvtmask2w256:
5744 case X86::BI__builtin_ia32_cvtmask2w512:
5745 case X86::BI__builtin_ia32_cvtmask2d128:
5746 case X86::BI__builtin_ia32_cvtmask2d256:
5747 case X86::BI__builtin_ia32_cvtmask2d512:
5748 case X86::BI__builtin_ia32_cvtmask2q128:
5749 case X86::BI__builtin_ia32_cvtmask2q256:
5750 case X86::BI__builtin_ia32_cvtmask2q512:
5751 return interp__builtin_ia32_cvt_mask2vec(S, OpPC, Call, ID: BuiltinID);
5752
5753 case X86::BI__builtin_ia32_cvtsd2ss:
5754 return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, HasRoundingMask: false);
5755
5756 case X86::BI__builtin_ia32_cvtsd2ss_round_mask:
5757 return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, HasRoundingMask: true);
5758
5759 case X86::BI__builtin_ia32_cvtpd2ps:
5760 case X86::BI__builtin_ia32_cvtpd2ps256:
5761 return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, IsMasked: false, HasRounding: false);
5762 case X86::BI__builtin_ia32_cvtpd2ps_mask:
5763 return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, IsMasked: true, HasRounding: false);
5764 case X86::BI__builtin_ia32_cvtpd2ps512_mask:
5765 return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, IsMasked: true, HasRounding: true);
5766
5767 case X86::BI__builtin_ia32_cmpb128_mask:
5768 case X86::BI__builtin_ia32_cmpw128_mask:
5769 case X86::BI__builtin_ia32_cmpd128_mask:
5770 case X86::BI__builtin_ia32_cmpq128_mask:
5771 case X86::BI__builtin_ia32_cmpb256_mask:
5772 case X86::BI__builtin_ia32_cmpw256_mask:
5773 case X86::BI__builtin_ia32_cmpd256_mask:
5774 case X86::BI__builtin_ia32_cmpq256_mask:
5775 case X86::BI__builtin_ia32_cmpb512_mask:
5776 case X86::BI__builtin_ia32_cmpw512_mask:
5777 case X86::BI__builtin_ia32_cmpd512_mask:
5778 case X86::BI__builtin_ia32_cmpq512_mask:
5779 return interp__builtin_ia32_cmp_mask(S, OpPC, Call, ID: BuiltinID,
5780 /*IsUnsigned=*/false);
5781
5782 case X86::BI__builtin_ia32_ucmpb128_mask:
5783 case X86::BI__builtin_ia32_ucmpw128_mask:
5784 case X86::BI__builtin_ia32_ucmpd128_mask:
5785 case X86::BI__builtin_ia32_ucmpq128_mask:
5786 case X86::BI__builtin_ia32_ucmpb256_mask:
5787 case X86::BI__builtin_ia32_ucmpw256_mask:
5788 case X86::BI__builtin_ia32_ucmpd256_mask:
5789 case X86::BI__builtin_ia32_ucmpq256_mask:
5790 case X86::BI__builtin_ia32_ucmpb512_mask:
5791 case X86::BI__builtin_ia32_ucmpw512_mask:
5792 case X86::BI__builtin_ia32_ucmpd512_mask:
5793 case X86::BI__builtin_ia32_ucmpq512_mask:
5794 return interp__builtin_ia32_cmp_mask(S, OpPC, Call, ID: BuiltinID,
5795 /*IsUnsigned=*/true);
5796
5797 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
5798 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
5799 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
5800 return interp__builtin_ia32_shufbitqmb_mask(S, OpPC, Call);
5801
5802 case X86::BI__builtin_ia32_pslldqi128_byteshift:
5803 case X86::BI__builtin_ia32_pslldqi256_byteshift:
5804 case X86::BI__builtin_ia32_pslldqi512_byteshift:
5805 // These SLLDQ intrinsics always operate on byte elements (8 bits).
5806 // The lane width is hardcoded to 16 to match the SIMD register size,
5807 // but the algorithm processes one byte per iteration,
5808 // so APInt(8, ...) is correct and intentional.
5809 return interp__builtin_ia32_shuffle_generic(
5810 S, OpPC, Call,
5811 GetSourceIndex: [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> {
5812 unsigned LaneBase = (DstIdx / 16) * 16;
5813 unsigned LaneIdx = DstIdx % 16;
5814 if (LaneIdx < Shift)
5815 return std::make_pair(x: 0, y: -1);
5816
5817 return std::make_pair(x: 0,
5818 y: static_cast<int>(LaneBase + LaneIdx - Shift));
5819 });
5820
5821 case X86::BI__builtin_ia32_psrldqi128_byteshift:
5822 case X86::BI__builtin_ia32_psrldqi256_byteshift:
5823 case X86::BI__builtin_ia32_psrldqi512_byteshift:
5824 // These SRLDQ intrinsics always operate on byte elements (8 bits).
5825 // The lane width is hardcoded to 16 to match the SIMD register size,
5826 // but the algorithm processes one byte per iteration,
5827 // so APInt(8, ...) is correct and intentional.
5828 return interp__builtin_ia32_shuffle_generic(
5829 S, OpPC, Call,
5830 GetSourceIndex: [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> {
5831 unsigned LaneBase = (DstIdx / 16) * 16;
5832 unsigned LaneIdx = DstIdx % 16;
5833 if (LaneIdx + Shift < 16)
5834 return std::make_pair(x: 0,
5835 y: static_cast<int>(LaneBase + LaneIdx + Shift));
5836
5837 return std::make_pair(x: 0, y: -1);
5838 });
5839
5840 case X86::BI__builtin_ia32_palignr128:
5841 case X86::BI__builtin_ia32_palignr256:
5842 case X86::BI__builtin_ia32_palignr512:
5843 return interp__builtin_ia32_shuffle_generic(
5844 S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned Shift) {
5845 // Default to -1 → zero-fill this destination element
5846 unsigned VecIdx = 1;
5847 int ElemIdx = -1;
5848
5849 int Lane = DstIdx / 16;
5850 int Offset = DstIdx % 16;
5851
5852 // Elements come from VecB first, then VecA after the shift boundary
5853 unsigned ShiftedIdx = Offset + (Shift & 0xFF);
5854 if (ShiftedIdx < 16) { // from VecB
5855 ElemIdx = ShiftedIdx + (Lane * 16);
5856 } else if (ShiftedIdx < 32) { // from VecA
5857 VecIdx = 0;
5858 ElemIdx = (ShiftedIdx - 16) + (Lane * 16);
5859 }
5860
5861 return std::pair<unsigned, int>{VecIdx, ElemIdx};
5862 });
5863
5864 case X86::BI__builtin_ia32_alignd128:
5865 case X86::BI__builtin_ia32_alignd256:
5866 case X86::BI__builtin_ia32_alignd512:
5867 case X86::BI__builtin_ia32_alignq128:
5868 case X86::BI__builtin_ia32_alignq256:
5869 case X86::BI__builtin_ia32_alignq512: {
5870 unsigned NumElems = Call->getType()->castAs<VectorType>()->getNumElements();
5871 return interp__builtin_ia32_shuffle_generic(
5872 S, OpPC, Call, GetSourceIndex: [NumElems](unsigned DstIdx, unsigned Shift) {
5873 unsigned Imm = Shift & 0xFF;
5874 unsigned EffectiveShift = Imm & (NumElems - 1);
5875 unsigned SourcePos = DstIdx + EffectiveShift;
5876 unsigned VecIdx = SourcePos < NumElems ? 1u : 0u;
5877 unsigned ElemIdx = SourcePos & (NumElems - 1);
5878 return std::pair<unsigned, int>{VecIdx, static_cast<int>(ElemIdx)};
5879 });
5880 }
5881
5882 case clang::X86::BI__builtin_ia32_minps:
5883 case clang::X86::BI__builtin_ia32_minpd:
5884 case clang::X86::BI__builtin_ia32_minph128:
5885 case clang::X86::BI__builtin_ia32_minph256:
5886 case clang::X86::BI__builtin_ia32_minps256:
5887 case clang::X86::BI__builtin_ia32_minpd256:
5888 case clang::X86::BI__builtin_ia32_minps512:
5889 case clang::X86::BI__builtin_ia32_minpd512:
5890 case clang::X86::BI__builtin_ia32_minph512:
5891 return interp__builtin_elementwise_fp_binop(
5892 S, OpPC, Call,
5893 Fn: [](const APFloat &A, const APFloat &B,
5894 std::optional<APSInt>) -> std::optional<APFloat> {
5895 if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
5896 B.isInfinity() || B.isDenormal())
5897 return std::nullopt;
5898 if (A.isZero() && B.isZero())
5899 return B;
5900 return llvm::minimum(A, B);
5901 });
5902
5903 case clang::X86::BI__builtin_ia32_minss:
5904 case clang::X86::BI__builtin_ia32_minsd:
5905 return interp__builtin_elementwise_fp_binop(
5906 S, OpPC, Call,
5907 Fn: [](const APFloat &A, const APFloat &B,
5908 std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
5909 return EvalScalarMinMaxFp(A, B, RoundingMode, /*IsMin=*/true);
5910 },
5911 /*IsScalar=*/true);
5912
5913 case clang::X86::BI__builtin_ia32_minsd_round_mask:
5914 case clang::X86::BI__builtin_ia32_minss_round_mask:
5915 case clang::X86::BI__builtin_ia32_minsh_round_mask:
5916 case clang::X86::BI__builtin_ia32_maxsd_round_mask:
5917 case clang::X86::BI__builtin_ia32_maxss_round_mask:
5918 case clang::X86::BI__builtin_ia32_maxsh_round_mask: {
5919 bool IsMin = BuiltinID == clang::X86::BI__builtin_ia32_minsd_round_mask ||
5920 BuiltinID == clang::X86::BI__builtin_ia32_minss_round_mask ||
5921 BuiltinID == clang::X86::BI__builtin_ia32_minsh_round_mask;
5922 return interp__builtin_scalar_fp_round_mask_binop(
5923 S, OpPC, Call,
5924 Fn: [IsMin](const APFloat &A, const APFloat &B,
5925 std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
5926 return EvalScalarMinMaxFp(A, B, RoundingMode, IsMin);
5927 });
5928 }
5929
5930 case clang::X86::BI__builtin_ia32_maxps:
5931 case clang::X86::BI__builtin_ia32_maxpd:
5932 case clang::X86::BI__builtin_ia32_maxph128:
5933 case clang::X86::BI__builtin_ia32_maxph256:
5934 case clang::X86::BI__builtin_ia32_maxps256:
5935 case clang::X86::BI__builtin_ia32_maxpd256:
5936 case clang::X86::BI__builtin_ia32_maxps512:
5937 case clang::X86::BI__builtin_ia32_maxpd512:
5938 case clang::X86::BI__builtin_ia32_maxph512:
5939 return interp__builtin_elementwise_fp_binop(
5940 S, OpPC, Call,
5941 Fn: [](const APFloat &A, const APFloat &B,
5942 std::optional<APSInt>) -> std::optional<APFloat> {
5943 if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
5944 B.isInfinity() || B.isDenormal())
5945 return std::nullopt;
5946 if (A.isZero() && B.isZero())
5947 return B;
5948 return llvm::maximum(A, B);
5949 });
5950
5951 case clang::X86::BI__builtin_ia32_maxss:
5952 case clang::X86::BI__builtin_ia32_maxsd:
5953 return interp__builtin_elementwise_fp_binop(
5954 S, OpPC, Call,
5955 Fn: [](const APFloat &A, const APFloat &B,
5956 std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
5957 return EvalScalarMinMaxFp(A, B, RoundingMode, /*IsMin=*/false);
5958 },
5959 /*IsScalar=*/true);
5960
5961 default:
5962 S.FFDiag(Loc: S.Current->getLocation(PC: OpPC),
5963 DiagId: diag::note_invalid_subexpr_in_const_expr)
5964 << S.Current->getRange(PC: OpPC);
5965
5966 return false;
5967 }
5968
5969 llvm_unreachable("Unhandled builtin ID");
5970}
5971
5972bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E,
5973 ArrayRef<int64_t> ArrayIndices, int64_t &IntResult) {
5974 CharUnits Result;
5975 unsigned N = E->getNumComponents();
5976 assert(N > 0);
5977
5978 unsigned ArrayIndex = 0;
5979 QualType CurrentType = E->getTypeSourceInfo()->getType();
5980 for (unsigned I = 0; I != N; ++I) {
5981 const OffsetOfNode &Node = E->getComponent(Idx: I);
5982 switch (Node.getKind()) {
5983 case OffsetOfNode::Field: {
5984 const FieldDecl *MemberDecl = Node.getField();
5985 const auto *RD = CurrentType->getAsRecordDecl();
5986 if (!RD || RD->isInvalidDecl())
5987 return false;
5988 const ASTRecordLayout &RL = S.getASTContext().getASTRecordLayout(D: RD);
5989 unsigned FieldIndex = MemberDecl->getFieldIndex();
5990 assert(FieldIndex < RL.getFieldCount() && "offsetof field in wrong type");
5991 Result +=
5992 S.getASTContext().toCharUnitsFromBits(BitSize: RL.getFieldOffset(FieldNo: FieldIndex));
5993 CurrentType = MemberDecl->getType().getNonReferenceType();
5994 break;
5995 }
5996 case OffsetOfNode::Array: {
5997 // When generating bytecode, we put all the index expressions as Sint64 on
5998 // the stack.
5999 int64_t Index = ArrayIndices[ArrayIndex];
6000 const ArrayType *AT = S.getASTContext().getAsArrayType(T: CurrentType);
6001 if (!AT)
6002 return false;
6003 CurrentType = AT->getElementType();
6004 CharUnits ElementSize = S.getASTContext().getTypeSizeInChars(T: CurrentType);
6005 Result += Index * ElementSize;
6006 ++ArrayIndex;
6007 break;
6008 }
6009 case OffsetOfNode::Base: {
6010 const CXXBaseSpecifier *BaseSpec = Node.getBase();
6011 if (BaseSpec->isVirtual())
6012 return false;
6013
6014 // Find the layout of the class whose base we are looking into.
6015 const auto *RD = CurrentType->getAsCXXRecordDecl();
6016 if (!RD || RD->isInvalidDecl())
6017 return false;
6018 const ASTRecordLayout &RL = S.getASTContext().getASTRecordLayout(D: RD);
6019
6020 // Find the base class itself.
6021 CurrentType = BaseSpec->getType();
6022 const auto *BaseRD = CurrentType->getAsCXXRecordDecl();
6023 if (!BaseRD)
6024 return false;
6025
6026 // Add the offset to the base.
6027 Result += RL.getBaseClassOffset(Base: BaseRD);
6028 break;
6029 }
6030 case OffsetOfNode::Identifier:
6031 llvm_unreachable("Dependent OffsetOfExpr?");
6032 }
6033 }
6034
6035 IntResult = Result.getQuantity();
6036
6037 return true;
6038}
6039
6040bool SetThreeWayComparisonField(InterpState &S, CodePtr OpPC,
6041 const Pointer &Ptr, const APSInt &IntValue) {
6042
6043 const Record *R = Ptr.getRecord();
6044 assert(R);
6045 assert(R->getNumFields() == 1);
6046
6047 unsigned FieldOffset = R->getField(I: 0u)->Offset;
6048 const Pointer &FieldPtr = Ptr.atField(Off: FieldOffset);
6049 PrimType FieldT = *S.getContext().classify(T: FieldPtr.getType());
6050
6051 INT_TYPE_SWITCH(FieldT,
6052 FieldPtr.deref<T>() = T::from(IntValue.getSExtValue()));
6053 FieldPtr.initialize();
6054 return true;
6055}
6056
6057static void zeroAll(Pointer &Dest) {
6058 const Descriptor *Desc = Dest.getFieldDesc();
6059
6060 if (Desc->isPrimitive()) {
6061 TYPE_SWITCH(Desc->getPrimType(), {
6062 Dest.deref<T>().~T();
6063 new (&Dest.deref<T>()) T();
6064 });
6065 return;
6066 }
6067
6068 if (Desc->isRecord()) {
6069 const Record *R = Desc->ElemRecord;
6070 for (const Record::Field &F : R->fields()) {
6071 Pointer FieldPtr = Dest.atField(Off: F.Offset);
6072 zeroAll(Dest&: FieldPtr);
6073 }
6074 return;
6075 }
6076
6077 if (Desc->isPrimitiveArray()) {
6078 for (unsigned I = 0, N = Desc->getNumElems(); I != N; ++I) {
6079 TYPE_SWITCH(Desc->getPrimType(), {
6080 Dest.deref<T>().~T();
6081 new (&Dest.deref<T>()) T();
6082 });
6083 }
6084 return;
6085 }
6086
6087 if (Desc->isCompositeArray()) {
6088 for (unsigned I = 0, N = Desc->getNumElems(); I != N; ++I) {
6089 Pointer ElemPtr = Dest.atIndex(Idx: I).narrow();
6090 zeroAll(Dest&: ElemPtr);
6091 }
6092 return;
6093 }
6094}
6095
6096static bool copyComposite(InterpState &S, CodePtr OpPC, const Pointer &Src,
6097 Pointer &Dest, bool Activate);
6098static bool copyRecord(InterpState &S, CodePtr OpPC, const Pointer &Src,
6099 Pointer &Dest, bool Activate = false) {
6100 [[maybe_unused]] const Descriptor *SrcDesc = Src.getFieldDesc();
6101 const Descriptor *DestDesc = Dest.getFieldDesc();
6102
6103 auto copyField = [&](const Record::Field &F, bool Activate) -> bool {
6104 Pointer DestField = Dest.atField(Off: F.Offset);
6105 if (OptPrimType FT = S.Ctx.classify(T: F.Decl->getType())) {
6106 TYPE_SWITCH(*FT, {
6107 DestField.deref<T>() = Src.atField(F.Offset).deref<T>();
6108 if (Src.atField(F.Offset).isInitialized())
6109 DestField.initialize();
6110 if (Activate)
6111 DestField.activate();
6112 });
6113 return true;
6114 }
6115 // Composite field.
6116 return copyComposite(S, OpPC, Src: Src.atField(Off: F.Offset), Dest&: DestField, Activate);
6117 };
6118
6119 assert(SrcDesc->isRecord());
6120 assert(SrcDesc->ElemRecord == DestDesc->ElemRecord);
6121 const Record *R = DestDesc->ElemRecord;
6122 for (const Record::Field &F : R->fields()) {
6123 if (R->isUnion()) {
6124 // For unions, only copy the active field. Zero all others.
6125 const Pointer &SrcField = Src.atField(Off: F.Offset);
6126 if (SrcField.isActive()) {
6127 if (!copyField(F, /*Activate=*/true))
6128 return false;
6129 } else {
6130 if (!CheckMutable(S, OpPC, Ptr: Src.atField(Off: F.Offset)))
6131 return false;
6132 Pointer DestField = Dest.atField(Off: F.Offset);
6133 zeroAll(Dest&: DestField);
6134 }
6135 } else {
6136 if (!copyField(F, Activate))
6137 return false;
6138 }
6139 }
6140
6141 for (const Record::Base &B : R->bases()) {
6142 Pointer DestBase = Dest.atField(Off: B.Offset);
6143 if (!copyRecord(S, OpPC, Src: Src.atField(Off: B.Offset), Dest&: DestBase, Activate))
6144 return false;
6145 }
6146
6147 Dest.initialize();
6148 return true;
6149}
6150
6151static bool copyComposite(InterpState &S, CodePtr OpPC, const Pointer &Src,
6152 Pointer &Dest, bool Activate = false) {
6153 assert(Src.isLive() && Dest.isLive());
6154
6155 [[maybe_unused]] const Descriptor *SrcDesc = Src.getFieldDesc();
6156 const Descriptor *DestDesc = Dest.getFieldDesc();
6157
6158 assert(!DestDesc->isPrimitive() && !SrcDesc->isPrimitive());
6159
6160 if (DestDesc->isPrimitiveArray()) {
6161 assert(SrcDesc->isPrimitiveArray());
6162 assert(SrcDesc->getNumElems() == DestDesc->getNumElems());
6163 PrimType ET = DestDesc->getPrimType();
6164 for (unsigned I = 0, N = DestDesc->getNumElems(); I != N; ++I) {
6165 Pointer DestElem = Dest.atIndex(Idx: I);
6166 TYPE_SWITCH(ET, {
6167 DestElem.deref<T>() = Src.elem<T>(I);
6168 DestElem.initialize();
6169 });
6170 }
6171 return true;
6172 }
6173
6174 if (DestDesc->isCompositeArray()) {
6175 assert(SrcDesc->isCompositeArray());
6176 assert(SrcDesc->getNumElems() == DestDesc->getNumElems());
6177 for (unsigned I = 0, N = DestDesc->getNumElems(); I != N; ++I) {
6178 const Pointer &SrcElem = Src.atIndex(Idx: I).narrow();
6179 Pointer DestElem = Dest.atIndex(Idx: I).narrow();
6180 if (!copyComposite(S, OpPC, Src: SrcElem, Dest&: DestElem, Activate))
6181 return false;
6182 }
6183 return true;
6184 }
6185
6186 if (DestDesc->isRecord())
6187 return copyRecord(S, OpPC, Src, Dest, Activate);
6188 return Invalid(S, OpPC);
6189}
6190
6191bool DoMemcpy(InterpState &S, CodePtr OpPC, const Pointer &Src, Pointer &Dest) {
6192 if (!Src.isBlockPointer() || Src.getFieldDesc()->isPrimitive())
6193 return false;
6194 if (!Dest.isBlockPointer() || Dest.getFieldDesc()->isPrimitive())
6195 return false;
6196
6197 return copyComposite(S, OpPC, Src, Dest);
6198}
6199
6200} // namespace interp
6201} // namespace clang
6202