InterpBuiltin.cpp source code [llvm_projects/clang/lib/AST/ByteCode/InterpBuiltin.cpp]

1	//===--- InterpBuiltin.cpp - Interpreter for the constexpr VM ---- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	#include "../ExprConstShared.h"
9	#include "Boolean.h"
10	#include "EvalEmitter.h"
11	#include "InterpBuiltinBitCast.h"
12	#include "InterpHelpers.h"
13	#include "PrimType.h"
14	#include "Program.h"
15	#include "clang/AST/InferAlloc.h"
16	#include "clang/AST/OSLog.h"
17	#include "clang/AST/RecordLayout.h"
18	#include "clang/Basic/Builtins.h"
19	#include "clang/Basic/TargetBuiltins.h"
20	#include "clang/Basic/TargetInfo.h"
21	#include "llvm/ADT/StringExtras.h"
22	#include "llvm/Support/AllocToken.h"
23	#include "llvm/Support/ErrorHandling.h"
24	#include "llvm/Support/SipHash.h"
25
26	namespace clang {
27	namespace interp {
28
29	[[maybe_unused]] static bool isNoopBuiltin(unsigned ID) {
30	switch (ID) {
31	case Builtin::BIas_const:
32	case Builtin::BIforward:
33	case Builtin::BIforward_like:
34	case Builtin::BImove:
35	case Builtin::BImove_if_noexcept:
36	case Builtin::BIaddressof:
37	case Builtin::BI__addressof:
38	case Builtin::BI__builtin_addressof:
39	case Builtin::BI__builtin_launder:
40	return true;
41	default:
42	return false;
43	}
44	return false;
45	}
46
47	static void discard(InterpStack &Stk, PrimType T) {
48	TYPE_SWITCH(T, { Stk.discard<T>(); });
49	}
50
51	static uint64_t popToUInt64(const InterpState &S, const Expr *E) {
52	INT_TYPE_SWITCH(*S.getContext().classify(E->getType()),
53	return static_cast<uint64_t>(S.Stk.pop<T>()));
54	}
55
56	static APSInt popToAPSInt(InterpStack &Stk, PrimType T) {
57	INT_TYPE_SWITCH(T, return Stk.pop<T>().toAPSInt());
58	}
59
60	static APSInt popToAPSInt(InterpState &S, const Expr *E) {
61	return popToAPSInt(Stk&: S.Stk, T: *S.getContext().classify(T: E->getType()));
62	}
63	static APSInt popToAPSInt(InterpState &S, QualType T) {
64	return popToAPSInt(Stk&: S.Stk, T: *S.getContext().classify(T));
65	}
66
67	/// Check for common reasons a pointer can't be read from, which
68	/// are usually not diagnosed in a builtin function.
69	static bool isReadable(const Pointer &P) {
70	if (P.isDummy())
71	return false;
72	if (!P.isBlockPointer())
73	return false;
74	if (!P.isLive())
75	return false;
76	if (P.isOnePastEnd())
77	return false;
78	return true;
79	}
80
81	/// Pushes \p Val on the stack as the type given by \p QT.
82	static void pushInteger(InterpState &S, const APSInt &Val, QualType QT) {
83	assert(QT->isSignedIntegerOrEnumerationType() \|\|
84	QT->isUnsignedIntegerOrEnumerationType());
85	OptPrimType T = S.getContext().classify(T: QT);
86	assert(T);
87	unsigned BitWidth = S.getASTContext().getIntWidth(T: QT);
88
89	if (T == PT_IntAPS) {
90	auto Result = S.allocAP<IntegralAP<true>>(BitWidth);
91	Result.copy(V: Val);
92	S.Stk.push<IntegralAP<true>>(Args&: Result);
93	return;
94	}
95
96	if (T == PT_IntAP) {
97	auto Result = S.allocAP<IntegralAP<false>>(BitWidth);
98	Result.copy(V: Val);
99	S.Stk.push<IntegralAP<false>>(Args&: Result);
100	return;
101	}
102
103	if (QT ->isSignedIntegerOrEnumerationType()) {
104	int64_t V = Val.getSExtValue();
105	INT_TYPE_SWITCH(*T, { S.Stk.push<T>(T::from(V, BitWidth)); });
106	} else {
107	assert(QT->isUnsignedIntegerOrEnumerationType());
108	uint64_t V = Val.getZExtValue();
109	INT_TYPE_SWITCH(*T, { S.Stk.push<T>(T::from(V, BitWidth)); });
110	}
111	}
112
113	template <typename T>
114	static void pushInteger(InterpState &S, T Val, QualType QT) {
115	if constexpr (std::is_same_v<T, APInt>)
116	pushInteger(S, Val: APSInt(Val, !std::is_signed_v<T>), QT);
117	else if constexpr (std::is_same_v<T, APSInt>)
118	pushInteger(S, Val, QT);
119	else
120	pushInteger(S,
121	Val: APSInt(APInt(sizeof(T) * `8`, static_cast<uint64_t>(Val),
122	std::is_signed_v<T>),
123	!std::is_signed_v<T>),
124	QT);
125	}
126
127	static void assignInteger(InterpState &S, const Pointer &Dest, PrimType ValueT,
128	const APSInt &Value) {
129
130	if (ValueT == PT_IntAPS) {
131	Dest.deref<IntegralAP<true>>() =
132	S.allocAP<IntegralAP<true>>(BitWidth: Value.getBitWidth());
133	Dest.deref<IntegralAP<true>>().copy(V: Value);
134	} else if (ValueT == PT_IntAP) {
135	Dest.deref<IntegralAP<false>>() =
136	S.allocAP<IntegralAP<false>>(BitWidth: Value.getBitWidth());
137	Dest.deref<IntegralAP<false>>().copy(V: Value);
138	} else {
139	INT_TYPE_SWITCH_NO_BOOL(
140	ValueT, { Dest.deref<T>() = T::from(static_cast<T>(Value)); });
141	}
142	}
143
144	static QualType getElemType(const Pointer &P) {
145	const Descriptor *Desc = P.getFieldDesc();
146	QualType T = Desc->getType();
147	if (Desc->isPrimitive())
148	return T;
149	if (T ->isPointerType())
150	return T ->castAs<PointerType>()->getPointeeType();
151	if (Desc->isArray())
152	return Desc->getElemQualType();
153	if (const auto *AT = T ->getAsArrayTypeUnsafe())
154	return AT->getElementType();
155	return T;
156	}
157
158	static void diagnoseNonConstexprBuiltin(InterpState &S, CodePtr OpPC,
159	unsigned ID) {
160	if (!S.diagnosing())
161	return;
162
163	auto Loc = S.Current->getSource(PC: OpPC);
164	if (S.getLangOpts().CPlusPlus11)
165	S.CCEDiag(SI: Loc, DiagId: diag::note_constexpr_invalid_function)
166	<< /isConstexpr=/`0` << /isConstructor=/`0`
167	<< S.getASTContext().BuiltinInfo.getQuotedName(ID);
168	else
169	S.CCEDiag(SI: Loc, DiagId: diag::note_invalid_subexpr_in_const_expr);
170	}
171
172	static llvm::APSInt convertBoolVectorToInt(const Pointer &Val) {
173	assert(Val.getFieldDesc()->isPrimitiveArray() &&
174	Val.getFieldDesc()->getElemQualType()->isBooleanType() &&
175	"Not a boolean vector");
176	unsigned NumElems = Val.getNumElems();
177
178	// Each element is one bit, so create an integer with NumElts bits.
179	llvm::APSInt Result(NumElems, `0`);
180	for (unsigned I = `0`; I != NumElems; ++I) {
181	if (Val.elem<bool>(I))
182	Result.setBit(I);
183	}
184
185	return Result;
186	}
187
188	// Strict double -> float conversion used for X86 PD2PS/cvtsd2ss intrinsics.
189	// Reject NaN/Inf/Subnormal inputs and any lossy/inexact conversions.
190	static bool convertDoubleToFloatStrict(APFloat Src, Floating &Dst,
191	InterpState &S, const Expr *DiagExpr) {
192	if (Src.isInfinity()) {
193	if (S.diagnosing())
194	S.CCEDiag(E: DiagExpr, DiagId: diag::note_constexpr_float_arithmetic) << `0`;
195	return false;
196	}
197	if (Src.isNaN()) {
198	if (S.diagnosing())
199	S.CCEDiag(E: DiagExpr, DiagId: diag::note_constexpr_float_arithmetic) << `1`;
200	return false;
201	}
202	APFloat Val = Src;
203	bool LosesInfo = false;
204	APFloat::opStatus Status = Val.convert(
205	ToSemantics: APFloat::IEEEsingle(), RM: APFloat::rmNearestTiesToEven, losesInfo: &LosesInfo);
206	if (LosesInfo \|\| Val.isDenormal()) {
207	if (S.diagnosing())
208	S.CCEDiag(E: DiagExpr, DiagId: diag::note_constexpr_float_arithmetic_strict);
209	return false;
210	}
211	if (Status != APFloat::opOK) {
212	if (S.diagnosing())
213	S.CCEDiag(E: DiagExpr, DiagId: diag::note_invalid_subexpr_in_const_expr);
214	return false;
215	}
216	Dst.copy(F: Val);
217	return true;
218	}
219
220	static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC,
221	const InterpFrame *Frame,
222	const CallExpr *Call) {
223	unsigned Depth = S.Current->getDepth();
224	auto isStdCall = [](const FunctionDecl F) -> bool* {
225	return F && F->isInStdNamespace() && F->getIdentifier() &&
226	F->getIdentifier()->isStr(Str: "is_constant_evaluated");
227	};
228	const InterpFrame *Caller = Frame->Caller;
229	// The current frame is the one for __builtin_is_constant_evaluated.
230	// The one above that, potentially the one for std::is_constant_evaluated().
231	if (S.inConstantContext() && !S.checkingPotentialConstantExpression() &&
232	S.getEvalStatus().Diag &&
233	(Depth == `0` \|\| (Depth == `1` && isStdCall (Frame->getCallee())))) {
234	if (Caller && isStdCall (Frame->getCallee())) {
235	const Expr *E = Caller->getExpr(PC: Caller->getRetPC());
236	S.report(Loc: E->getExprLoc(),
237	DiagId: diag::warn_is_constant_evaluated_always_true_constexpr)
238	<< "std::is_constant_evaluated" << E->getSourceRange();
239	} else {
240	S.report(Loc: Call->getExprLoc(),
241	DiagId: diag::warn_is_constant_evaluated_always_true_constexpr)
242	<< "__builtin_is_constant_evaluated" << Call->getSourceRange();
243	}
244	}
245
246	S.Stk.push<Boolean>(Args: Boolean::from(Value: S.inConstantContext()));
247	return true;
248	}
249
250	// __builtin_assume
251	// __assume (MS extension)
252	static bool interp__builtin_assume(InterpState &S, CodePtr OpPC,
253	const InterpFrame *Frame,
254	const CallExpr *Call) {
255	// Nothing to be done here since the argument is NOT evaluated.
256	assert(Call->getNumArgs() == `1`);
257	return true;
258	}
259
260	static bool interp__builtin_strcmp(InterpState &S, CodePtr OpPC,
261	const InterpFrame *Frame,
262	const CallExpr Call, unsigned* ID) {
263	uint64_t Limit = ~static_cast<uint64_t>(`0`);
264	if (ID == Builtin::BIstrncmp \|\| ID == Builtin::BI__builtin_strncmp \|\|
265	ID == Builtin::BIwcsncmp \|\| ID == Builtin::BI__builtin_wcsncmp)
266	Limit = popToUInt64(S, E: Call->getArg(Arg: `2`));
267
268	const Pointer &B = S.Stk.pop<Pointer>();
269	const Pointer &A = S.Stk.pop<Pointer>();
270	if (ID == Builtin::BIstrcmp \|\| ID == Builtin::BIstrncmp \|\|
271	ID == Builtin::BIwcscmp \|\| ID == Builtin::BIwcsncmp)
272	diagnoseNonConstexprBuiltin(S, OpPC, ID);
273
274	if (Limit == `0`) {
275	pushInteger(S, Val: `0`, QT: Call->getType());
276	return true;
277	}
278
279	if (!CheckLive(S, OpPC, Ptr: A, AK: AK_Read) \|\| !CheckLive(S, OpPC, Ptr: B, AK: AK_Read))
280	return false;
281
282	if (A.isDummy() \|\| B.isDummy())
283	return false;
284	if (!A.isBlockPointer() \|\| !B.isBlockPointer())
285	return false;
286
287	bool IsWide = ID == Builtin::BIwcscmp \|\| ID == Builtin::BIwcsncmp \|\|
288	ID == Builtin::BI__builtin_wcscmp \|\|
289	ID == Builtin::BI__builtin_wcsncmp;
290	assert(A.getFieldDesc()->isPrimitiveArray());
291	assert(B.getFieldDesc()->isPrimitiveArray());
292
293	// Different element types shouldn't happen, but with casts they can.
294	if (!S.getASTContext().hasSameUnqualifiedType(T1: getElemType(P: A), T2: getElemType(P: B)))
295	return false;
296
297	PrimType ElemT = *S.getContext().classify(T: getElemType(P: A));
298
299	auto returnResult = [&](int V) -> bool {
300	pushInteger(S, Val: V, QT: Call->getType());
301	return true;
302	};
303
304	unsigned IndexA = A.getIndex();
305	unsigned IndexB = B.getIndex();
306	uint64_t Steps = `0`;
307	for (;; ++IndexA, ++IndexB, ++Steps) {
308
309	if (Steps >= Limit)
310	break;
311	const Pointer &PA = A.atIndex(Idx: IndexA);
312	const Pointer &PB = B.atIndex(Idx: IndexB);
313	if (!CheckRange(S, OpPC, Ptr: PA, AK: AK_Read) \|\|
314	!CheckRange(S, OpPC, Ptr: PB, AK: AK_Read)) {
315	return false;
316	}
317
318	if (IsWide) {
319	INT_TYPE_SWITCH(ElemT, {
320	T CA = PA.deref<T>();
321	T CB = PB.deref<T>();
322	if (CA > CB)
323	return returnResult(`1`);
324	if (CA < CB)
325	return returnResult(-`1`);
326	if (CA.isZero() \|\| CB.isZero())
327	return returnResult(`0`);
328	});
329	continue;
330	}
331
332	uint8_t CA = PA.deref<uint8_t>();
333	uint8_t CB = PB.deref<uint8_t>();
334
335	if (CA > CB)
336	return returnResult (`1`);
337	if (CA < CB)
338	return returnResult (-`1`);
339	if (CA == `0` \|\| CB == `0`)
340	return returnResult (`0`);
341	}
342
343	return returnResult (`0`);
344	}
345
346	static bool interp__builtin_strlen(InterpState &S, CodePtr OpPC,
347	const InterpFrame *Frame,
348	const CallExpr Call, unsigned* ID) {
349	const Pointer &StrPtr = S.Stk.pop<Pointer>().expand();
350
351	if (ID == Builtin::BIstrlen \|\| ID == Builtin::BIwcslen)
352	diagnoseNonConstexprBuiltin(S, OpPC, ID);
353
354	if (!CheckArray(S, OpPC, Ptr: StrPtr))
355	return false;
356
357	if (!CheckLive(S, OpPC, Ptr: StrPtr, AK: AK_Read))
358	return false;
359
360	if (!CheckDummy(S, OpPC, B: StrPtr.block(), AK: AK_Read))
361	return false;
362
363	if (!StrPtr.getFieldDesc()->isPrimitiveArray())
364	return false;
365
366	assert(StrPtr.getFieldDesc()->isPrimitiveArray());
367	unsigned ElemSize = StrPtr.getFieldDesc()->getElemSize();
368	if (ElemSize != `1` && ElemSize != `2` && ElemSize != `4`)
369	return Invalid(S, OpPC);
370
371	if (ID == Builtin::BI__builtin_wcslen \|\| ID == Builtin::BIwcslen) {
372	const ASTContext &AC = S.getASTContext();
373	unsigned WCharSize = AC.getTypeSizeInChars(T: AC.getWCharType()).getQuantity();
374	if (ElemSize != WCharSize)
375	return false;
376	}
377
378	size_t Len = `0`;
379	for (size_t I = StrPtr.getIndex();; ++I, ++Len) {
380	const Pointer &ElemPtr = StrPtr.atIndex(Idx: I);
381
382	if (!CheckRange(S, OpPC, Ptr: ElemPtr, AK: AK_Read))
383	return false;
384
385	uint32_t Val;
386	switch (ElemSize) {
387	case `1`:
388	Val = ElemPtr.deref<uint8_t>();
389	break;
390	case `2`:
391	Val = ElemPtr.deref<uint16_t>();
392	break;
393	case `4`:
394	Val = ElemPtr.deref<uint32_t>();
395	break;
396	default:
397	llvm_unreachable("Unsupported char size");
398	}
399	if (Val == `0`)
400	break;
401	}
402
403	pushInteger(S, Val: Len, QT: Call->getType());
404
405	return true;
406	}
407
408	static bool interp__builtin_nan(InterpState &S, CodePtr OpPC,
409	const InterpFrame Frame, const* CallExpr *Call,
410	bool Signaling) {
411	const Pointer &Arg = S.Stk.pop<Pointer>();
412
413	if (!CheckLoad(S, OpPC, Ptr: Arg))
414	return false;
415
416	if (!Arg.getFieldDesc()->isPrimitiveArray())
417	return Invalid(S, OpPC);
418
419	// Convert the given string to an integer using StringRef's API.
420	llvm::APInt Fill;
421	std::string Str;
422	assert(Arg.getNumElems() >= `1`);
423	for (unsigned I = `0`;; ++I) {
424	const Pointer &Elem = Arg.atIndex(Idx: I);
425
426	if (!CheckLoad(S, OpPC, Ptr: Elem))
427	return false;
428
429	if (Elem.deref<int8_t>() == `0`)
430	break;
431
432	Str += Elem.deref<char>();
433	}
434
435	// Treat empty strings as if they were zero.
436	if (Str.empty())
437	Fill = llvm::APInt (`32`, `0`);
438	else if (StringRef(Str).getAsInteger(Radix: `0`, Result&: Fill))
439	return false;
440
441	const llvm::fltSemantics &TargetSemantics =
442	S.getASTContext().getFloatTypeSemantics(
443	T: Call->getDirectCallee()->getReturnType());
444
445	Floating Result = S.allocFloat(Sem: TargetSemantics);
446	if (S.getASTContext().getTargetInfo().isNan2008()) {
447	if (Signaling)
448	Result.copy(
449	F: llvm::APFloat::getSNaN(Sem: TargetSemantics, /Negative=/false, payload: &Fill));
450	else
451	Result.copy(
452	F: llvm::APFloat::getQNaN(Sem: TargetSemantics, /Negative=/false, payload: &Fill));
453	} else {
454	// Prior to IEEE 754-2008, architectures were allowed to choose whether
455	// the first bit of their significand was set for qNaN or sNaN. MIPS chose
456	// a different encoding to what became a standard in 2008, and for pre-
457	// 2008 revisions, MIPS interpreted sNaN-2008 as qNan and qNaN-2008 as
458	// sNaN. This is now known as "legacy NaN" encoding.
459	if (Signaling)
460	Result.copy(
461	F: llvm::APFloat::getQNaN(Sem: TargetSemantics, /Negative=/false, payload: &Fill));
462	else
463	Result.copy(
464	F: llvm::APFloat::getSNaN(Sem: TargetSemantics, /Negative=/false, payload: &Fill));
465	}
466
467	S.Stk.push<Floating>(Args&: Result);
468	return true;
469	}
470
471	static bool interp__builtin_inf(InterpState &S, CodePtr OpPC,
472	const InterpFrame *Frame,
473	const CallExpr *Call) {
474	const llvm::fltSemantics &TargetSemantics =
475	S.getASTContext().getFloatTypeSemantics(
476	T: Call->getDirectCallee()->getReturnType());
477
478	Floating Result = S.allocFloat(Sem: TargetSemantics);
479	Result.copy(F: APFloat::getInf(Sem: TargetSemantics));
480	S.Stk.push<Floating>(Args&: Result);
481	return true;
482	}
483
484	static bool interp__builtin_copysign(InterpState &S, CodePtr OpPC,
485	const InterpFrame *Frame) {
486	const Floating &Arg2 = S.Stk.pop<Floating>();
487	const Floating &Arg1 = S.Stk.pop<Floating>();
488	Floating Result = S.allocFloat(Sem: Arg1.getSemantics());
489
490	APFloat Copy = Arg1.getAPFloat();
491	Copy.copySign(RHS: Arg2.getAPFloat());
492	Result.copy(F: Copy);
493	S.Stk.push<Floating>(Args&: Result);
494
495	return true;
496	}
497
498	static bool interp__builtin_fmin(InterpState &S, CodePtr OpPC,
499	const InterpFrame Frame, bool* IsNumBuiltin) {
500	const Floating &RHS = S.Stk.pop<Floating>();
501	const Floating &LHS = S.Stk.pop<Floating>();
502	Floating Result = S.allocFloat(Sem: LHS.getSemantics());
503
504	if (IsNumBuiltin)
505	Result.copy(F: llvm::minimumnum(A: LHS.getAPFloat(), B: RHS.getAPFloat()));
506	else
507	Result.copy(F: minnum(A: LHS.getAPFloat(), B: RHS.getAPFloat()));
508	S.Stk.push<Floating>(Args&: Result);
509	return true;
510	}
511
512	static bool interp__builtin_fmax(InterpState &S, CodePtr OpPC,
513	const InterpFrame Frame, bool* IsNumBuiltin) {
514	const Floating &RHS = S.Stk.pop<Floating>();
515	const Floating &LHS = S.Stk.pop<Floating>();
516	Floating Result = S.allocFloat(Sem: LHS.getSemantics());
517
518	if (IsNumBuiltin)
519	Result.copy(F: llvm::maximumnum(A: LHS.getAPFloat(), B: RHS.getAPFloat()));
520	else
521	Result.copy(F: maxnum(A: LHS.getAPFloat(), B: RHS.getAPFloat()));
522	S.Stk.push<Floating>(Args&: Result);
523	return true;
524	}
525
526	/// Defined as __builtin_isnan(...), to accommodate the fact that it can
527	/// take a float, double, long double, etc.
528	/// But for us, that's all a Floating anyway.
529	static bool interp__builtin_isnan(InterpState &S, CodePtr OpPC,
530	const InterpFrame *Frame,
531	const CallExpr *Call) {
532	const Floating &Arg = S.Stk.pop<Floating>();
533
534	pushInteger(S, Val: Arg.isNan(), QT: Call->getType());
535	return true;
536	}
537
538	static bool interp__builtin_issignaling(InterpState &S, CodePtr OpPC,
539	const InterpFrame *Frame,
540	const CallExpr *Call) {
541	const Floating &Arg = S.Stk.pop<Floating>();
542
543	pushInteger(S, Val: Arg.isSignaling(), QT: Call->getType());
544	return true;
545	}
546
547	static bool interp__builtin_isinf(InterpState &S, CodePtr OpPC,
548	const InterpFrame Frame, bool* CheckSign,
549	const CallExpr *Call) {
550	const Floating &Arg = S.Stk.pop<Floating>();
551	APFloat F = Arg.getAPFloat();
552	bool IsInf = F.isInfinity();
553
554	if (CheckSign)
555	pushInteger(S, Val: IsInf ? (F.isNegative() ? -`1` : `1`) : `0`, QT: Call->getType());
556	else
557	pushInteger(S, Val: IsInf, QT: Call->getType());
558	return true;
559	}
560
561	static bool interp__builtin_isfinite(InterpState &S, CodePtr OpPC,
562	const InterpFrame *Frame,
563	const CallExpr *Call) {
564	const Floating &Arg = S.Stk.pop<Floating>();
565
566	pushInteger(S, Val: Arg.isFinite(), QT: Call->getType());
567	return true;
568	}
569
570	static bool interp__builtin_isnormal(InterpState &S, CodePtr OpPC,
571	const InterpFrame *Frame,
572	const CallExpr *Call) {
573	const Floating &Arg = S.Stk.pop<Floating>();
574
575	pushInteger(S, Val: Arg.isNormal(), QT: Call->getType());
576	return true;
577	}
578
579	static bool interp__builtin_issubnormal(InterpState &S, CodePtr OpPC,
580	const InterpFrame *Frame,
581	const CallExpr *Call) {
582	const Floating &Arg = S.Stk.pop<Floating>();
583
584	pushInteger(S, Val: Arg.isDenormal(), QT: Call->getType());
585	return true;
586	}
587
588	static bool interp__builtin_iszero(InterpState &S, CodePtr OpPC,
589	const InterpFrame *Frame,
590	const CallExpr *Call) {
591	const Floating &Arg = S.Stk.pop<Floating>();
592
593	pushInteger(S, Val: Arg.isZero(), QT: Call->getType());
594	return true;
595	}
596
597	static bool interp__builtin_signbit(InterpState &S, CodePtr OpPC,
598	const InterpFrame *Frame,
599	const CallExpr *Call) {
600	const Floating &Arg = S.Stk.pop<Floating>();
601
602	pushInteger(S, Val: Arg.isNegative(), QT: Call->getType());
603	return true;
604	}
605
606	static bool interp_floating_comparison(InterpState &S, CodePtr OpPC,
607	const CallExpr Call, unsigned* ID) {
608	const Floating &RHS = S.Stk.pop<Floating>();
609	const Floating &LHS = S.Stk.pop<Floating>();
610
611	pushInteger(
612	S,
613	Val: [&] {
614	switch (ID) {
615	case Builtin::BI__builtin_isgreater:
616	return LHS > RHS;
617	case Builtin::BI__builtin_isgreaterequal:
618	return LHS >= RHS;
619	case Builtin::BI__builtin_isless:
620	return LHS < RHS;
621	case Builtin::BI__builtin_islessequal:
622	return LHS <= RHS;
623	case Builtin::BI__builtin_islessgreater: {
624	ComparisonCategoryResult Cmp = LHS.compare(RHS);
625	return Cmp == ComparisonCategoryResult::Less \|\|
626	Cmp == ComparisonCategoryResult::Greater;
627	}
628	case Builtin::BI__builtin_isunordered:
629	return LHS.compare(RHS) == ComparisonCategoryResult::Unordered;
630	default:
631	llvm_unreachable("Unexpected builtin ID: Should be a floating point "
632	"comparison function");
633	}
634	}(),
635	QT: Call->getType());
636	return true;
637	}
638
639	/// First parameter to __builtin_isfpclass is the floating value, the
640	/// second one is an integral value.
641	static bool interp__builtin_isfpclass(InterpState &S, CodePtr OpPC,
642	const InterpFrame *Frame,
643	const CallExpr *Call) {
644	APSInt FPClassArg = popToAPSInt(S, E: Call->getArg(Arg: `1`));
645	const Floating &F = S.Stk.pop<Floating>();
646
647	int32_t Result = static_cast<int32_t>(
648	(F.classify() & std::move(FPClassArg)).getZExtValue());
649	pushInteger(S, Val: Result, QT: Call->getType());
650
651	return true;
652	}
653
654	/// Five int values followed by one floating value.
655	/// __builtin_fpclassify(int, int, int, int, int, float)
656	static bool interp__builtin_fpclassify(InterpState &S, CodePtr OpPC,
657	const InterpFrame *Frame,
658	const CallExpr *Call) {
659	const Floating &Val = S.Stk.pop<Floating>();
660
661	PrimType IntT = *S.getContext().classify(E: Call->getArg(Arg: `0`));
662	APSInt Values[`5`];
663	for (unsigned I = `0`; I != `5`; ++I)
664	Values[`4` - I] = popToAPSInt(Stk&: S.Stk, T: IntT);
665
666	unsigned Index;
667	switch (Val.getCategory()) {
668	case APFloat::fcNaN:
669	Index = `0`;
670	break;
671	case APFloat::fcInfinity:
672	Index = `1`;
673	break;
674	case APFloat::fcNormal:
675	Index = Val.isDenormal() ? `3` : `2`;
676	break;
677	case APFloat::fcZero:
678	Index = `4`;
679	break;
680	}
681
682	// The last argument is first on the stack.
683	assert(Index <= `4`);
684
685	pushInteger(S, Val: Values[Index], QT: Call->getType());
686	return true;
687	}
688
689	static inline Floating abs(InterpState &S, const Floating &In) {
690	if (!In.isNegative())
691	return In;
692
693	Floating Output = S.allocFloat(Sem: In.getSemantics());
694	APFloat New = In.getAPFloat();
695	New.changeSign();
696	Output.copy(F: New);
697	return Output;
698	}
699
700	// The C standard says "fabs raises no floating-point exceptions,
701	// even if x is a signaling NaN. The returned value is independent of
702	// the current rounding direction mode." Therefore constant folding can
703	// proceed without regard to the floating point settings.
704	// Reference, WG14 N2478 F.10.4.3
705	static bool interp__builtin_fabs(InterpState &S, CodePtr OpPC,
706	const InterpFrame *Frame) {
707	const Floating &Val = S.Stk.pop<Floating>();
708	S.Stk.push<Floating>(Args: abs(S, In: Val));
709	return true;
710	}
711
712	static bool interp__builtin_abs(InterpState &S, CodePtr OpPC,
713	const InterpFrame *Frame,
714	const CallExpr *Call) {
715	APSInt Val = popToAPSInt(S, E: Call->getArg(Arg: `0`));
716	if (Val ==
717	APSInt (APInt::getSignedMinValue(numBits: Val.getBitWidth()), /IsUnsigned=/false))
718	return false;
719	if (Val.isNegative())
720	Val.negate();
721	pushInteger(S, Val, QT: Call->getType());
722	return true;
723	}
724
725	static bool interp__builtin_popcount(InterpState &S, CodePtr OpPC,
726	const InterpFrame *Frame,
727	const CallExpr *Call) {
728	APSInt Val;
729	if (Call->getArg(Arg: `0`)->getType()->isExtVectorBoolType()) {
730	const Pointer &Arg = S.Stk.pop<Pointer>();
731	Val = convertBoolVectorToInt(Val: Arg);
732	} else {
733	Val = popToAPSInt(S, E: Call->getArg(Arg: `0`));
734	}
735	pushInteger(S, Val: Val.popcount(), QT: Call->getType());
736	return true;
737	}
738
739	static bool interp__builtin_ia32_crc32(InterpState &S, CodePtr OpPC,
740	const InterpFrame *Frame,
741	const CallExpr *Call,
742	unsigned DataBytes) {
743	uint64_t DataVal = popToUInt64(S, E: Call->getArg(Arg: `1`));
744	uint64_t CRCVal = popToUInt64(S, E: Call->getArg(Arg: `0`));
745
746	// CRC32C polynomial (iSCSI polynomial, bit-reversed)
747	static const uint32_t CRC32C_POLY = `0x82F63B78`;
748
749	// Process each byte
750	uint32_t Result = static_cast<uint32_t>(CRCVal);
751	for (unsigned I = `0`; I != DataBytes; ++I) {
752	uint8_t Byte = static_cast<uint8_t>((DataVal >> (I * `8`)) & `0xFF`);
753	Result ^= Byte;
754	for (int J = `0`; J != `8`; ++J) {
755	Result = (Result >> `1`) ^ ((Result & `1`) ? CRC32C_POLY : `0`);
756	}
757	}
758
759	pushInteger(S, Val: Result, QT: Call->getType());
760	return true;
761	}
762
763	static bool interp__builtin_classify_type(InterpState &S, CodePtr OpPC,
764	const InterpFrame *Frame,
765	const CallExpr *Call) {
766	// This is an unevaluated call, so there are no arguments on the stack.
767	assert(Call->getNumArgs() == `1`);
768	const Expr *Arg = Call->getArg(Arg: `0`);
769
770	GCCTypeClass ResultClass =
771	EvaluateBuiltinClassifyType(T: Arg->getType(), LangOpts: S.getLangOpts());
772	int32_t ReturnVal = static_cast<int32_t>(ResultClass);
773	pushInteger(S, Val: ReturnVal, QT: Call->getType());
774	return true;
775	}
776
777	// __builtin_expect(long, long)
778	// __builtin_expect_with_probability(long, long, double)
779	static bool interp__builtin_expect(InterpState &S, CodePtr OpPC,
780	const InterpFrame *Frame,
781	const CallExpr *Call) {
782	// The return value is simply the value of the first parameter.
783	// We ignore the probability.
784	unsigned NumArgs = Call->getNumArgs();
785	assert(NumArgs == `2` \|\| NumArgs == `3`);
786
787	PrimType ArgT = *S.getContext().classify(T: Call->getArg(Arg: `0`)->getType());
788	if (NumArgs == `3`)
789	S.Stk.discard<Floating>();
790	discard(Stk&: S.Stk, T: ArgT);
791
792	APSInt Val = popToAPSInt(Stk&: S.Stk, T: ArgT);
793	pushInteger(S, Val, QT: Call->getType());
794	return true;
795	}
796
797	static bool interp__builtin_addressof(InterpState &S, CodePtr OpPC,
798	const InterpFrame *Frame,
799	const CallExpr *Call) {
800	#ifndef NDEBUG
801	assert(Call->getArg(`0`)->isLValue());
802	PrimType PtrT = S.getContext().classify(Call->getArg(`0`)).value_or(PT_Ptr);
803	assert(PtrT == PT_Ptr &&
804	"Unsupported pointer type passed to __builtin_addressof()");
805	#endif
806	return true;
807	}
808
809	static bool interp__builtin_move(InterpState &S, CodePtr OpPC,
810	const InterpFrame *Frame,
811	const CallExpr *Call) {
812	return Call->getDirectCallee()->isConstexpr();
813	}
814
815	static bool interp__builtin_eh_return_data_regno(InterpState &S, CodePtr OpPC,
816	const InterpFrame *Frame,
817	const CallExpr *Call) {
818	APSInt Arg = popToAPSInt(S, E: Call->getArg(Arg: `0`));
819
820	int Result = S.getASTContext().getTargetInfo().getEHDataRegisterNumber(
821	RegNo: Arg.getZExtValue());
822	pushInteger(S, Val: Result, QT: Call->getType());
823	return true;
824	}
825
826	// Two integral values followed by a pointer (lhs, rhs, resultOut)
827	static bool interp__builtin_overflowop(InterpState &S, CodePtr OpPC,
828	const CallExpr *Call,
829	unsigned BuiltinOp) {
830	const Pointer &ResultPtr = S.Stk.pop<Pointer>();
831	if (ResultPtr.isDummy() \|\| !ResultPtr.isBlockPointer())
832	return false;
833
834	PrimType RHST = *S.getContext().classify(T: Call->getArg(Arg: `1`)->getType());
835	PrimType LHST = *S.getContext().classify(T: Call->getArg(Arg: `0`)->getType());
836	APSInt RHS = popToAPSInt(Stk&: S.Stk, T: RHST);
837	APSInt LHS = popToAPSInt(Stk&: S.Stk, T: LHST);
838	QualType ResultType = Call->getArg(Arg: `2`)->getType()->getPointeeType();
839	PrimType ResultT = *S.getContext().classify(T: ResultType);
840	bool Overflow;
841
842	APSInt Result;
843	if (BuiltinOp == Builtin::BI__builtin_add_overflow \|\|
844	BuiltinOp == Builtin::BI__builtin_sub_overflow \|\|
845	BuiltinOp == Builtin::BI__builtin_mul_overflow) {
846	bool IsSigned = LHS.isSigned() \|\| RHS.isSigned() \|\|
847	ResultType ->isSignedIntegerOrEnumerationType();
848	bool AllSigned = LHS.isSigned() && RHS.isSigned() &&
849	ResultType ->isSignedIntegerOrEnumerationType();
850	uint64_t LHSSize = LHS.getBitWidth();
851	uint64_t RHSSize = RHS.getBitWidth();
852	uint64_t ResultSize = S.getASTContext().getTypeSize(T: ResultType);
853	uint64_t MaxBits = std::max(a: std::max(a: LHSSize, b: RHSSize), b: ResultSize);
854
855	// Add an additional bit if the signedness isn't uniformly agreed to. We
856	// could do this ONLY if there is a signed and an unsigned that both have
857	// MaxBits, but the code to check that is pretty nasty. The issue will be
858	// caught in the shrink-to-result later anyway.
859	if (IsSigned && !AllSigned)
860	++MaxBits;
861
862	LHS = APSInt (LHS.extOrTrunc(width: MaxBits), !IsSigned);
863	RHS = APSInt (RHS.extOrTrunc(width: MaxBits), !IsSigned);
864	Result = APSInt (MaxBits, !IsSigned);
865	}
866
867	// Find largest int.
868	switch (BuiltinOp) {
869	default:
870	llvm_unreachable("Invalid value for BuiltinOp");
871	case Builtin::BI__builtin_add_overflow:
872	case Builtin::BI__builtin_sadd_overflow:
873	case Builtin::BI__builtin_saddl_overflow:
874	case Builtin::BI__builtin_saddll_overflow:
875	case Builtin::BI__builtin_uadd_overflow:
876	case Builtin::BI__builtin_uaddl_overflow:
877	case Builtin::BI__builtin_uaddll_overflow:
878	Result = LHS.isSigned() ? LHS.sadd_ov(RHS, Overflow)
879	: LHS.uadd_ov(RHS, Overflow);
880	break;
881	case Builtin::BI__builtin_sub_overflow:
882	case Builtin::BI__builtin_ssub_overflow:
883	case Builtin::BI__builtin_ssubl_overflow:
884	case Builtin::BI__builtin_ssubll_overflow:
885	case Builtin::BI__builtin_usub_overflow:
886	case Builtin::BI__builtin_usubl_overflow:
887	case Builtin::BI__builtin_usubll_overflow:
888	Result = LHS.isSigned() ? LHS.ssub_ov(RHS, Overflow)
889	: LHS.usub_ov(RHS, Overflow);
890	break;
891	case Builtin::BI__builtin_mul_overflow:
892	case Builtin::BI__builtin_smul_overflow:
893	case Builtin::BI__builtin_smull_overflow:
894	case Builtin::BI__builtin_smulll_overflow:
895	case Builtin::BI__builtin_umul_overflow:
896	case Builtin::BI__builtin_umull_overflow:
897	case Builtin::BI__builtin_umulll_overflow:
898	Result = LHS.isSigned() ? LHS.smul_ov(RHS, Overflow)
899	: LHS.umul_ov(RHS, Overflow);
900	break;
901	}
902
903	// In the case where multiple sizes are allowed, truncate and see if
904	// the values are the same.
905	if (BuiltinOp == Builtin::BI__builtin_add_overflow \|\|
906	BuiltinOp == Builtin::BI__builtin_sub_overflow \|\|
907	BuiltinOp == Builtin::BI__builtin_mul_overflow) {
908	// APSInt doesn't have a TruncOrSelf, so we use extOrTrunc instead,
909	// since it will give us the behavior of a TruncOrSelf in the case where
910	// its parameter <= its size. We previously set Result to be at least the
911	// type-size of the result, so getTypeSize(ResultType) <= Resu
912	APSInt Temp = Result.extOrTrunc(width: S.getASTContext().getTypeSize(T: ResultType));
913	Temp.setIsSigned(ResultType ->isSignedIntegerOrEnumerationType());
914
915	if (!APSInt::isSameValue(I1: Temp, I2: Result))
916	Overflow = true;
917	Result = std::move(Temp);
918	}
919
920	// Write Result to ResultPtr and put Overflow on the stack.
921	assignInteger(S, Dest: ResultPtr, ValueT: ResultT, Value: Result);
922	if (ResultPtr.canBeInitialized())
923	ResultPtr.initialize();
924
925	assert(Call->getDirectCallee()->getReturnType()->isBooleanType());
926	S.Stk.push<Boolean>(Args&: Overflow);
927	return true;
928	}
929
930	/// Three integral values followed by a pointer (lhs, rhs, carry, carryOut).
931	static bool interp__builtin_carryop(InterpState &S, CodePtr OpPC,
932	const InterpFrame *Frame,
933	const CallExpr Call, unsigned* BuiltinOp) {
934	const Pointer &CarryOutPtr = S.Stk.pop<Pointer>();
935	PrimType LHST = *S.getContext().classify(T: Call->getArg(Arg: `0`)->getType());
936	PrimType RHST = *S.getContext().classify(T: Call->getArg(Arg: `1`)->getType());
937	APSInt CarryIn = popToAPSInt(Stk&: S.Stk, T: LHST);
938	APSInt RHS = popToAPSInt(Stk&: S.Stk, T: RHST);
939	APSInt LHS = popToAPSInt(Stk&: S.Stk, T: LHST);
940
941	if (CarryOutPtr.isDummy() \|\| !CarryOutPtr.isBlockPointer())
942	return false;
943
944	APSInt CarryOut;
945
946	APSInt Result;
947	// Copy the number of bits and sign.
948	Result = LHS;
949	CarryOut = LHS;
950
951	bool FirstOverflowed = false;
952	bool SecondOverflowed = false;
953	switch (BuiltinOp) {
954	default:
955	llvm_unreachable("Invalid value for BuiltinOp");
956	case Builtin::BI__builtin_addcb:
957	case Builtin::BI__builtin_addcs:
958	case Builtin::BI__builtin_addc:
959	case Builtin::BI__builtin_addcl:
960	case Builtin::BI__builtin_addcll:
961	Result =
962	LHS.uadd_ov(RHS, Overflow&: FirstOverflowed).uadd_ov(RHS: CarryIn, Overflow&: SecondOverflowed);
963	break;
964	case Builtin::BI__builtin_subcb:
965	case Builtin::BI__builtin_subcs:
966	case Builtin::BI__builtin_subc:
967	case Builtin::BI__builtin_subcl:
968	case Builtin::BI__builtin_subcll:
969	Result =
970	LHS.usub_ov(RHS, Overflow&: FirstOverflowed).usub_ov(RHS: CarryIn, Overflow&: SecondOverflowed);
971	break;
972	}
973	// It is possible for both overflows to happen but CGBuiltin uses an OR so
974	// this is consistent.
975	CarryOut = (uint64_t)(FirstOverflowed \| SecondOverflowed);
976
977	QualType CarryOutType = Call->getArg(Arg: `3`)->getType()->getPointeeType();
978	PrimType CarryOutT = *S.getContext().classify(T: CarryOutType);
979	assignInteger(S, Dest: CarryOutPtr, ValueT: CarryOutT, Value: CarryOut);
980	CarryOutPtr.initialize();
981
982	assert(Call->getType() == Call->getArg(`0`)->getType());
983	pushInteger(S, Val: Result, QT: Call->getType());
984	return true;
985	}
986
987	static bool interp__builtin_clz(InterpState &S, CodePtr OpPC,
988	const InterpFrame Frame, const* CallExpr *Call,
989	unsigned BuiltinOp) {
990
991	std::optional<APSInt> Fallback;
992	if (BuiltinOp == Builtin::BI__builtin_clzg && Call->getNumArgs() == `2`)
993	Fallback = popToAPSInt(S, E: Call->getArg(Arg: `1`));
994
995	APSInt Val;
996	if (Call->getArg(Arg: `0`)->getType()->isExtVectorBoolType()) {
997	const Pointer &Arg = S.Stk.pop<Pointer>();
998	Val = convertBoolVectorToInt(Val: Arg);
999	} else {
1000	Val = popToAPSInt(S, E: Call->getArg(Arg: `0`));
1001	}
1002
1003	// When the argument is 0, the result of GCC builtins is undefined, whereas
1004	// for Microsoft intrinsics, the result is the bit-width of the argument.
1005	bool ZeroIsUndefined = BuiltinOp != Builtin::BI__lzcnt16 &&
1006	BuiltinOp != Builtin::BI__lzcnt &&
1007	BuiltinOp != Builtin::BI__lzcnt64;
1008
1009	if (Val == `0`) {
1010	if (Fallback) {
1011	pushInteger(S, Val: *Fallback, QT: Call->getType());
1012	return true;
1013	}
1014
1015	if (ZeroIsUndefined)
1016	return false;
1017	}
1018
1019	pushInteger(S, Val: Val.countl_zero(), QT: Call->getType());
1020	return true;
1021	}
1022
1023	static bool interp__builtin_ctz(InterpState &S, CodePtr OpPC,
1024	const InterpFrame Frame, const* CallExpr *Call,
1025	unsigned BuiltinID) {
1026	std::optional<APSInt> Fallback;
1027	if (BuiltinID == Builtin::BI__builtin_ctzg && Call->getNumArgs() == `2`)
1028	Fallback = popToAPSInt(S, E: Call->getArg(Arg: `1`));
1029
1030	APSInt Val;
1031	if (Call->getArg(Arg: `0`)->getType()->isExtVectorBoolType()) {
1032	const Pointer &Arg = S.Stk.pop<Pointer>();
1033	Val = convertBoolVectorToInt(Val: Arg);
1034	} else {
1035	Val = popToAPSInt(S, E: Call->getArg(Arg: `0`));
1036	}
1037
1038	if (Val == `0`) {
1039	if (Fallback) {
1040	pushInteger(S, Val: *Fallback, QT: Call->getType());
1041	return true;
1042	}
1043	return false;
1044	}
1045
1046	pushInteger(S, Val: Val.countr_zero(), QT: Call->getType());
1047	return true;
1048	}
1049
1050	static bool interp__builtin_bswap(InterpState &S, CodePtr OpPC,
1051	const InterpFrame *Frame,
1052	const CallExpr *Call) {
1053	const APSInt &Val = popToAPSInt(S, E: Call->getArg(Arg: `0`));
1054	if (Val.getBitWidth() == `8` \|\| Val.getBitWidth() == `1`)
1055	pushInteger(S, Val, QT: Call->getType());
1056	else
1057	pushInteger(S, Val: Val.byteSwap(), QT: Call->getType());
1058	return true;
1059	}
1060
1061	/// bool __atomic_always_lock_free(size_t, void const volatile)*
1062	/// bool __atomic_is_lock_free(size_t, void const volatile)*
1063	static bool interp__builtin_atomic_lock_free(InterpState &S, CodePtr OpPC,
1064	const InterpFrame *Frame,
1065	const CallExpr *Call,
1066	unsigned BuiltinOp) {
1067	auto returnBool = [&S](bool Value) -> bool {
1068	S.Stk.push<Boolean>(Args&: Value);
1069	return true;
1070	};
1071
1072	const Pointer &Ptr = S.Stk.pop<Pointer>();
1073	uint64_t SizeVal = popToUInt64(S, E: Call->getArg(Arg: `0`));
1074
1075	// For __atomic_is_lock_free(sizeof(_Atomic(T))), if the size is a power
1076	// of two less than or equal to the maximum inline atomic width, we know it
1077	// is lock-free. If the size isn't a power of two, or greater than the
1078	// maximum alignment where we promote atomics, we know it is not lock-free
1079	// (at least not in the sense of atomic_is_lock_free). Otherwise,
1080	// the answer can only be determined at runtime; for example, 16-byte
1081	// atomics have lock-free implementations on some, but not all,
1082	// x86-64 processors.
1083
1084	// Check power-of-two.
1085	CharUnits Size = CharUnits::fromQuantity(Quantity: SizeVal);
1086	if (Size.isPowerOfTwo()) {
1087	// Check against inlining width.
1088	unsigned InlineWidthBits =
1089	S.getASTContext().getTargetInfo().getMaxAtomicInlineWidth();
1090	if (Size <= S.getASTContext().toCharUnitsFromBits(BitSize: InlineWidthBits)) {
1091
1092	// OK, we will inline appropriately-aligned operations of this size,
1093	// and _Atomic(T) is appropriately-aligned.
1094	if (Size == CharUnits::One())
1095	return returnBool (true);
1096
1097	// Same for null pointers.
1098	assert(BuiltinOp != Builtin::BI__c11_atomic_is_lock_free);
1099	if (Ptr.isZero())
1100	return returnBool (true);
1101
1102	if (Ptr.isIntegralPointer()) {
1103	uint64_t IntVal = Ptr.getIntegerRepresentation();
1104	if (APSInt (APInt (`64`, IntVal, false), true).isAligned(A: Size.getAsAlign()))
1105	return returnBool (true);
1106	}
1107
1108	const Expr *PtrArg = Call->getArg(Arg: `1`);
1109	// Otherwise, check if the type's alignment against Size.
1110	if (const auto *ICE = dyn_cast<ImplicitCastExpr>(Val: PtrArg)) {
1111	// Drop the potential implicit-cast to 'const volatile void', getting*
1112	// the underlying type.
1113	if (ICE->getCastKind() == CK_BitCast)
1114	PtrArg = ICE->getSubExpr();
1115	}
1116
1117	if (const auto *PtrTy = PtrArg->getType()->getAs<PointerType>()) {
1118	QualType PointeeType = PtrTy->getPointeeType();
1119	if (!PointeeType ->isIncompleteType() &&
1120	S.getASTContext().getTypeAlignInChars(T: PointeeType) >= Size) {
1121	// OK, we will inline operations on this object.
1122	return returnBool (true);
1123	}
1124	}
1125	}
1126	}
1127
1128	if (BuiltinOp == Builtin::BI__atomic_always_lock_free)
1129	return returnBool (false);
1130
1131	return Invalid(S, OpPC);
1132	}
1133
1134	/// bool __c11_atomic_is_lock_free(size_t)
1135	static bool interp__builtin_c11_atomic_is_lock_free(InterpState &S,
1136	CodePtr OpPC,
1137	const InterpFrame *Frame,
1138	const CallExpr *Call) {
1139	uint64_t SizeVal = popToUInt64(S, E: Call->getArg(Arg: `0`));
1140
1141	CharUnits Size = CharUnits::fromQuantity(Quantity: SizeVal);
1142	if (Size.isPowerOfTwo()) {
1143	// Check against inlining width.
1144	unsigned InlineWidthBits =
1145	S.getASTContext().getTargetInfo().getMaxAtomicInlineWidth();
1146	if (Size <= S.getASTContext().toCharUnitsFromBits(BitSize: InlineWidthBits)) {
1147	S.Stk.push<Boolean>(Args: true);
1148	return true;
1149	}
1150	}
1151
1152	return false; // returnBool(false);
1153	}
1154
1155	/// __builtin_complex(Float A, float B);
1156	static bool interp__builtin_complex(InterpState &S, CodePtr OpPC,
1157	const InterpFrame *Frame,
1158	const CallExpr *Call) {
1159	const Floating &Arg2 = S.Stk.pop<Floating>();
1160	const Floating &Arg1 = S.Stk.pop<Floating>();
1161	Pointer &Result = S.Stk.peek<Pointer>();
1162
1163	Result.elem<Floating>(I: `0`) = Arg1;
1164	Result.elem<Floating>(I: `1`) = Arg2;
1165	Result.initializeAllElements();
1166
1167	return true;
1168	}
1169
1170	/// __builtin_is_aligned()
1171	/// __builtin_align_up()
1172	/// __builtin_align_down()
1173	/// The first parameter is either an integer or a pointer.
1174	/// The second parameter is the requested alignment as an integer.
1175	static bool interp__builtin_is_aligned_up_down(InterpState &S, CodePtr OpPC,
1176	const InterpFrame *Frame,
1177	const CallExpr *Call,
1178	unsigned BuiltinOp) {
1179	const APSInt &Alignment = popToAPSInt(S, E: Call->getArg(Arg: `1`));
1180
1181	if (Alignment < `0` \|\| !Alignment.isPowerOf2()) {
1182	S.FFDiag(E: Call, DiagId: diag::note_constexpr_invalid_alignment) << Alignment;
1183	return false;
1184	}
1185	unsigned SrcWidth = S.getASTContext().getIntWidth(T: Call->getArg(Arg: `0`)->getType());
1186	APSInt MaxValue(APInt::getOneBitSet(numBits: SrcWidth, BitNo: SrcWidth - `1`));
1187	if (APSInt::compareValues(I1: Alignment, I2: MaxValue) > `0`) {
1188	S.FFDiag(E: Call, DiagId: diag::note_constexpr_alignment_too_big)
1189	<< MaxValue << Call->getArg(Arg: `0`)->getType() << Alignment;
1190	return false;
1191	}
1192
1193	// The first parameter is either an integer or a pointer.
1194	PrimType FirstArgT = *S.Ctx.classify(E: Call->getArg(Arg: `0`));
1195
1196	if (isIntegerType(T: FirstArgT)) {
1197	const APSInt &Src = popToAPSInt(Stk&: S.Stk, T: FirstArgT);
1198	APInt AlignMinusOne = Alignment.extOrTrunc(width: Src.getBitWidth()) - `1`;
1199	if (BuiltinOp == Builtin::BI__builtin_align_up) {
1200	APSInt AlignedVal =
1201	APSInt ((Src + AlignMinusOne) & ~AlignMinusOne, Src.isUnsigned());
1202	pushInteger(S, Val: AlignedVal, QT: Call->getType());
1203	} else if (BuiltinOp == Builtin::BI__builtin_align_down) {
1204	APSInt AlignedVal = APSInt (Src & ~AlignMinusOne, Src.isUnsigned());
1205	pushInteger(S, Val: AlignedVal, QT: Call->getType());
1206	} else {
1207	assert(*S.Ctx.classify(Call->getType()) == PT_Bool);
1208	S.Stk.push<Boolean>(Args: (Src & AlignMinusOne) == `0`);
1209	}
1210	return true;
1211	}
1212	assert(FirstArgT == PT_Ptr);
1213	const Pointer &Ptr = S.Stk.pop<Pointer>();
1214	if (!Ptr.isBlockPointer())
1215	return false;
1216
1217	// For one-past-end pointers, we can't call getIndex() since it asserts.
1218	// Use getNumElems() instead which gives the correct index for past-end.
1219	unsigned PtrOffset =
1220	Ptr.isElementPastEnd() ? Ptr.getNumElems() : Ptr.getIndex();
1221	CharUnits BaseAlignment =
1222	S.getASTContext().getDeclAlign(D: Ptr.getDeclDesc()->asValueDecl());
1223	CharUnits PtrAlign =
1224	BaseAlignment.alignmentAtOffset(offset: CharUnits::fromQuantity(Quantity: PtrOffset));
1225
1226	if (BuiltinOp == Builtin::BI__builtin_is_aligned) {
1227	if (PtrAlign.getQuantity() >= Alignment) {
1228	S.Stk.push<Boolean>(Args: true);
1229	return true;
1230	}
1231	// If the alignment is not known to be sufficient, some cases could still
1232	// be aligned at run time. However, if the requested alignment is less or
1233	// equal to the base alignment and the offset is not aligned, we know that
1234	// the run-time value can never be aligned.
1235	if (BaseAlignment.getQuantity() >= Alignment &&
1236	PtrAlign.getQuantity() < Alignment) {
1237	S.Stk.push<Boolean>(Args: false);
1238	return true;
1239	}
1240
1241	S.FFDiag(E: Call->getArg(Arg: `0`), DiagId: diag::note_constexpr_alignment_compute)
1242	<< Alignment;
1243	return false;
1244	}
1245
1246	assert(BuiltinOp == Builtin::BI__builtin_align_down \|\|
1247	BuiltinOp == Builtin::BI__builtin_align_up);
1248
1249	// For align_up/align_down, we can return the same value if the alignment
1250	// is known to be greater or equal to the requested value.
1251	if (PtrAlign.getQuantity() >= Alignment) {
1252	S.Stk.push<Pointer>(Args: Ptr);
1253	return true;
1254	}
1255
1256	// The alignment could be greater than the minimum at run-time, so we cannot
1257	// infer much about the resulting pointer value. One case is possible:
1258	// For `_Alignas(32) char buf[N]; __builtin_align_down(&buf[idx], 32)` we
1259	// can infer the correct index if the requested alignment is smaller than
1260	// the base alignment so we can perform the computation on the offset.
1261	if (BaseAlignment.getQuantity() >= Alignment) {
1262	assert(Alignment.getBitWidth() <= `64` &&
1263	"Cannot handle > 64-bit address-space");
1264	uint64_t Alignment64 = Alignment.getZExtValue();
1265	CharUnits NewOffset =
1266	CharUnits::fromQuantity(Quantity: BuiltinOp == Builtin::BI__builtin_align_down
1267	? llvm::alignDown(Value: PtrOffset, Align: Alignment64)
1268	: llvm::alignTo(Value: PtrOffset, Align: Alignment64));
1269
1270	S.Stk.push<Pointer>(Args: Ptr.atIndex(Idx: NewOffset.getQuantity()));
1271	return true;
1272	}
1273
1274	// Otherwise, we cannot constant-evaluate the result.
1275	S.FFDiag(E: Call->getArg(Arg: `0`), DiagId: diag::note_constexpr_alignment_adjust) << Alignment;
1276	return false;
1277	}
1278
1279	/// __builtin_assume_aligned(Ptr, Alignment[, ExtraOffset])
1280	static bool interp__builtin_assume_aligned(InterpState &S, CodePtr OpPC,
1281	const InterpFrame *Frame,
1282	const CallExpr *Call) {
1283	assert(Call->getNumArgs() == `2` \|\| Call->getNumArgs() == `3`);
1284
1285	std::optional<APSInt> ExtraOffset;
1286	if (Call->getNumArgs() == `3`)
1287	ExtraOffset = popToAPSInt(Stk&: S.Stk, T: *S.Ctx.classify(E: Call->getArg(Arg: `2`)));
1288
1289	APSInt Alignment = popToAPSInt(Stk&: S.Stk, T: *S.Ctx.classify(E: Call->getArg(Arg: `1`)));
1290	const Pointer &Ptr = S.Stk.pop<Pointer>();
1291
1292	CharUnits Align = CharUnits::fromQuantity(Quantity: Alignment.getZExtValue());
1293
1294	// If there is a base object, then it must have the correct alignment.
1295	if (Ptr.isBlockPointer()) {
1296	CharUnits BaseAlignment;
1297	if (const auto *VD = Ptr.getDeclDesc()->asValueDecl())
1298	BaseAlignment = S.getASTContext().getDeclAlign(D: VD);
1299	else if (const auto *E = Ptr.getDeclDesc()->asExpr())
1300	BaseAlignment = GetAlignOfExpr(Ctx: S.getASTContext(), E, ExprKind: UETT_AlignOf);
1301
1302	if (BaseAlignment < Align) {
1303	S.CCEDiag(E: Call->getArg(Arg: `0`),
1304	DiagId: diag::note_constexpr_baa_insufficient_alignment)
1305	<< `0` << BaseAlignment.getQuantity() << Align.getQuantity();
1306	return false;
1307	}
1308	}
1309
1310	APValue AV = Ptr.toAPValue(ASTCtx: S.getASTContext());
1311	CharUnits AVOffset = AV.getLValueOffset();
1312	if (ExtraOffset)
1313	AVOffset -= CharUnits::fromQuantity(Quantity: ExtraOffset ->getZExtValue());
1314	if (AVOffset.alignTo(Align) != AVOffset) {
1315	if (Ptr.isBlockPointer())
1316	S.CCEDiag(E: Call->getArg(Arg: `0`),
1317	DiagId: diag::note_constexpr_baa_insufficient_alignment)
1318	<< `1` << AVOffset.getQuantity() << Align.getQuantity();
1319	else
1320	S.CCEDiag(E: Call->getArg(Arg: `0`),
1321	DiagId: diag::note_constexpr_baa_value_insufficient_alignment)
1322	<< AVOffset.getQuantity() << Align.getQuantity();
1323	return false;
1324	}
1325
1326	S.Stk.push<Pointer>(Args: Ptr);
1327	return true;
1328	}
1329
1330	/// (CarryIn, LHS, RHS, Result)
1331	static bool interp__builtin_ia32_addcarry_subborrow(InterpState &S,
1332	CodePtr OpPC,
1333	const InterpFrame *Frame,
1334	const CallExpr *Call,
1335	unsigned BuiltinOp) {
1336	if (Call->getNumArgs() != `4` \|\| !Call->getArg(Arg: `0`)->getType()->isIntegerType() \|\|
1337	!Call->getArg(Arg: `1`)->getType()->isIntegerType() \|\|
1338	!Call->getArg(Arg: `2`)->getType()->isIntegerType())
1339	return false;
1340
1341	const Pointer &CarryOutPtr = S.Stk.pop<Pointer>();
1342
1343	APSInt RHS = popToAPSInt(S, E: Call->getArg(Arg: `2`));
1344	APSInt LHS = popToAPSInt(S, E: Call->getArg(Arg: `1`));
1345	APSInt CarryIn = popToAPSInt(S, E: Call->getArg(Arg: `0`));
1346
1347	bool IsAdd = BuiltinOp == clang::X86::BI__builtin_ia32_addcarryx_u32 \|\|
1348	BuiltinOp == clang::X86::BI__builtin_ia32_addcarryx_u64;
1349
1350	unsigned BitWidth = LHS.getBitWidth();
1351	unsigned CarryInBit = CarryIn.ugt(RHS: `0`) ? `1` : `0`;
1352	APInt ExResult =
1353	IsAdd ? (LHS.zext(width: BitWidth + `1`) + (RHS.zext(width: BitWidth + `1`) + CarryInBit))
1354	: (LHS.zext(width: BitWidth + `1`) - (RHS.zext(width: BitWidth + `1`) + CarryInBit));
1355
1356	APInt Result = ExResult.extractBits(numBits: BitWidth, bitPosition: `0`);
1357	APSInt CarryOut =
1358	APSInt (ExResult.extractBits(numBits: `1`, bitPosition: BitWidth), /IsUnsigned=/true);
1359
1360	QualType CarryOutType = Call->getArg(Arg: `3`)->getType()->getPointeeType();
1361	PrimType CarryOutT = *S.getContext().classify(T: CarryOutType);
1362	assignInteger(S, Dest: CarryOutPtr, ValueT: CarryOutT, Value: APSInt (std::move(Result), true));
1363
1364	pushInteger(S, Val: CarryOut, QT: Call->getType());
1365
1366	return true;
1367	}
1368
1369	static bool interp__builtin_os_log_format_buffer_size(InterpState &S,
1370	CodePtr OpPC,
1371	const InterpFrame *Frame,
1372	const CallExpr *Call) {
1373	analyze_os_log::OSLogBufferLayout Layout;
1374	analyze_os_log::computeOSLogBufferLayout(Ctx&: S.getASTContext(), E: Call, layout&: Layout);
1375	pushInteger(S, Val: Layout.size().getQuantity(), QT: Call->getType());
1376	return true;
1377	}
1378
1379	static bool
1380	interp__builtin_ptrauth_string_discriminator(InterpState &S, CodePtr OpPC,
1381	const InterpFrame *Frame,
1382	const CallExpr *Call) {
1383	const auto &Ptr = S.Stk.pop<Pointer>();
1384	assert(Ptr.getFieldDesc()->isPrimitiveArray());
1385
1386	// This should be created for a StringLiteral, so should alway shold at least
1387	// one array element.
1388	assert(Ptr.getFieldDesc()->getNumElems() >= `1`);
1389	StringRef R(&Ptr.deref<char>(), Ptr.getFieldDesc()->getNumElems() - `1`);
1390	uint64_t Result = getPointerAuthStableSipHash(S: R);
1391	pushInteger(S, Val: Result, QT: Call->getType());
1392	return true;
1393	}
1394
1395	static bool interp__builtin_infer_alloc_token(InterpState &S, CodePtr OpPC,
1396	const InterpFrame *Frame,
1397	const CallExpr *Call) {
1398	const ASTContext &ASTCtx = S.getASTContext();
1399	uint64_t BitWidth = ASTCtx.getTypeSize(T: ASTCtx.getSizeType());
1400	auto Mode =
1401	ASTCtx.getLangOpts().AllocTokenMode.value_or(u: llvm::DefaultAllocTokenMode);
1402	auto MaxTokensOpt = ASTCtx.getLangOpts().AllocTokenMax;
1403	uint64_t MaxTokens =
1404	MaxTokensOpt.value_or(u: `0`) ? *MaxTokensOpt : (~`0ULL` >> (`64` - BitWidth));
1405
1406	// We do not read any of the arguments; discard them.
1407	for (int I = Call->getNumArgs() - `1`; I >= `0`; --I)
1408	discard(Stk&: S.Stk, T: S.getContext().classify(E: Call->getArg(Arg: I)).value_or(PT: PT_Ptr));
1409
1410	// Note: Type inference from a surrounding cast is not supported in
1411	// constexpr evaluation.
1412	QualType AllocType = infer_alloc::inferPossibleType(E: Call, Ctx: ASTCtx, CastE: nullptr);
1413	if (AllocType.isNull()) {
1414	S.CCEDiag(E: Call,
1415	DiagId: diag::note_constexpr_infer_alloc_token_type_inference_failed);
1416	return false;
1417	}
1418
1419	auto ATMD = infer_alloc::getAllocTokenMetadata(T: AllocType, Ctx: ASTCtx);
1420	if (!ATMD) {
1421	S.CCEDiag(E: Call, DiagId: diag::note_constexpr_infer_alloc_token_no_metadata);
1422	return false;
1423	}
1424
1425	auto MaybeToken = llvm::getAllocToken(Mode, Metadata: *ATMD, MaxTokens);
1426	if (!MaybeToken) {
1427	S.CCEDiag(E: Call, DiagId: diag::note_constexpr_infer_alloc_token_stateful_mode);
1428	return false;
1429	}
1430
1431	pushInteger(S, Val: llvm::APInt (BitWidth, *MaybeToken), QT: ASTCtx.getSizeType());
1432	return true;
1433	}
1434
1435	static bool interp__builtin_operator_new(InterpState &S, CodePtr OpPC,
1436	const InterpFrame *Frame,
1437	const CallExpr *Call) {
1438	// A call to __operator_new is only valid within std::allocate<>::allocate.
1439	// Walk up the call stack to find the appropriate caller and get the
1440	// element type from it.
1441	auto [NewCall, ElemType] = S.getStdAllocatorCaller(Name: "allocate");
1442
1443	if (ElemType.isNull()) {
1444	S.FFDiag(E: Call, DiagId: S.getLangOpts().CPlusPlus20
1445	? diag::note_constexpr_new_untyped
1446	: diag::note_constexpr_new);
1447	return false;
1448	}
1449	assert(NewCall);
1450
1451	if (ElemType ->isIncompleteType() \|\| ElemType ->isFunctionType()) {
1452	S.FFDiag(E: Call, DiagId: diag::note_constexpr_new_not_complete_object_type)
1453	<< (ElemType ->isIncompleteType() ? `0` : `1`) << ElemType;
1454	return false;
1455	}
1456
1457	// We only care about the first parameter (the size), so discard all the
1458	// others.
1459	{
1460	unsigned NumArgs = Call->getNumArgs();
1461	assert(NumArgs >= `1`);
1462
1463	// The std::nothrow_t arg never gets put on the stack.
1464	if (Call->getArg(Arg: NumArgs - `1`)->getType()->isNothrowT())
1465	--NumArgs;
1466	auto Args = ArrayRef(Call->getArgs(), Call->getNumArgs());
1467	// First arg is needed.
1468	Args = Args.drop_front();
1469
1470	// Discard the rest.
1471	for (const Expr *Arg : Args)
1472	discard(Stk&: S.Stk, T: *S.getContext().classify(E: Arg));
1473	}
1474
1475	APSInt Bytes = popToAPSInt(S, E: Call->getArg(Arg: `0`));
1476	CharUnits ElemSize = S.getASTContext().getTypeSizeInChars(T: ElemType);
1477	assert(!ElemSize.isZero());
1478	// Divide the number of bytes by sizeof(ElemType), so we get the number of
1479	// elements we should allocate.
1480	APInt NumElems, Remainder;
1481	APInt ElemSizeAP(Bytes.getBitWidth(), ElemSize.getQuantity());
1482	APInt::udivrem(LHS: Bytes, RHS: ElemSizeAP, Quotient&: NumElems, Remainder);
1483	if (Remainder != `0`) {
1484	// This likely indicates a bug in the implementation of 'std::allocator'.
1485	S.FFDiag(E: Call, DiagId: diag::note_constexpr_operator_new_bad_size)
1486	<< Bytes << APSInt (ElemSizeAP, true) << ElemType;
1487	return false;
1488	}
1489
1490	// NB: The same check we're using in CheckArraySize()
1491	if (NumElems.getActiveBits() >
1492	ConstantArrayType::getMaxSizeBits(Context: S.getASTContext()) \|\|
1493	NumElems.ugt(RHS: Descriptor::MaxArrayElemBytes / ElemSize.getQuantity())) {
1494	// FIXME: NoThrow check?
1495	const SourceInfo &Loc = S.Current->getSource(PC: OpPC);
1496	S.FFDiag(SI: Loc, DiagId: diag::note_constexpr_new_too_large)
1497	<< NumElems.getZExtValue();
1498	return false;
1499	}
1500
1501	if (!CheckArraySize(S, OpPC, NumElems: NumElems.getZExtValue()))
1502	return false;
1503
1504	bool IsArray = NumElems.ugt(RHS: `1`);
1505	OptPrimType ElemT = S.getContext().classify(T: ElemType);
1506	DynamicAllocator &Allocator = S.getAllocator();
1507	if (ElemT) {
1508	Block *B =
1509	Allocator.allocate(Source: NewCall, T: *ElemT, NumElements: NumElems.getZExtValue(),
1510	EvalID: S.Ctx.getEvalID(), AllocForm: DynamicAllocator::Form::Operator);
1511	assert(B);
1512	S.Stk.push<Pointer>(Args: Pointer (B).atIndex(Idx: `0`));
1513	return true;
1514	}
1515
1516	assert(!ElemT);
1517
1518	// Composite arrays
1519	if (IsArray) {
1520	const Descriptor *Desc =
1521	S.P.createDescriptor(D: NewCall, Ty: ElemType.getTypePtr(), MDSize: std::nullopt);
1522	Block *B =
1523	Allocator.allocate(D: Desc, NumElements: NumElems.getZExtValue(), EvalID: S.Ctx.getEvalID(),
1524	AllocForm: DynamicAllocator::Form::Operator);
1525	assert(B);
1526	S.Stk.push<Pointer>(Args: Pointer (B).atIndex(Idx: `0`).narrow());
1527	return true;
1528	}
1529
1530	// Records. Still allocate them as single-element arrays.
1531	QualType AllocType = S.getASTContext().getConstantArrayType(
1532	EltTy: ElemType, ArySize: NumElems, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, IndexTypeQuals: `0`);
1533
1534	const Descriptor *Desc = S.P.createDescriptor(D: NewCall, Ty: AllocType.getTypePtr(),
1535	MDSize: Descriptor::InlineDescMD);
1536	Block *B = Allocator.allocate(D: Desc, EvalID: S.getContext().getEvalID(),
1537	AllocForm: DynamicAllocator::Form::Operator);
1538	assert(B);
1539	S.Stk.push<Pointer>(Args: Pointer (B).atIndex(Idx: `0`).narrow());
1540	return true;
1541	}
1542
1543	static bool interp__builtin_operator_delete(InterpState &S, CodePtr OpPC,
1544	const InterpFrame *Frame,
1545	const CallExpr *Call) {
1546	const Expr Source = nullptr*;
1547	const Block BlockToDelete = nullptr*;
1548
1549	if (S.checkingPotentialConstantExpression()) {
1550	S.Stk.discard<Pointer>();
1551	return false;
1552	}
1553
1554	// This is permitted only within a call to std::allocator<T>::deallocate.
1555	if (!S.getStdAllocatorCaller(Name: "deallocate")) {
1556	S.FFDiag(E: Call);
1557	S.Stk.discard<Pointer>();
1558	return true;
1559	}
1560
1561	{
1562	const Pointer &Ptr = S.Stk.pop<Pointer>();
1563
1564	if (Ptr.isZero()) {
1565	S.CCEDiag(E: Call, DiagId: diag::note_constexpr_deallocate_null);
1566	return true;
1567	}
1568
1569	Source = Ptr.getDeclDesc()->asExpr();
1570	BlockToDelete = Ptr.block();
1571
1572	if (!BlockToDelete->isDynamic()) {
1573	S.FFDiag(E: Call, DiagId: diag::note_constexpr_delete_not_heap_alloc)
1574	<< Ptr.toDiagnosticString(Ctx: S.getASTContext());
1575	if (const auto *D = Ptr.getFieldDesc()->asDecl())
1576	S.Note(Loc: D->getLocation(), DiagId: diag::note_declared_at);
1577	}
1578	}
1579	assert(BlockToDelete);
1580
1581	DynamicAllocator &Allocator = S.getAllocator();
1582	const Descriptor *BlockDesc = BlockToDelete->getDescriptor();
1583	std::optional<DynamicAllocator::Form> AllocForm =
1584	Allocator.getAllocationForm(Source);
1585
1586	if (!Allocator.deallocate(Source, BlockToDelete, S)) {
1587	// Nothing has been deallocated, this must be a double-delete.
1588	const SourceInfo &Loc = S.Current->getSource(PC: OpPC);
1589	S.FFDiag(SI: Loc, DiagId: diag::note_constexpr_double_delete);
1590	return false;
1591	}
1592	assert(AllocForm);
1593
1594	return CheckNewDeleteForms(
1595	S, OpPC, AllocForm: *AllocForm, DeleteForm: DynamicAllocator::Form::Operator, D: BlockDesc, NewExpr: Source);
1596	}
1597
1598	static bool interp__builtin_arithmetic_fence(InterpState &S, CodePtr OpPC,
1599	const InterpFrame *Frame,
1600	const CallExpr *Call) {
1601	const Floating &Arg0 = S.Stk.pop<Floating>();
1602	S.Stk.push<Floating>(Args: Arg0);
1603	return true;
1604	}
1605
1606	static bool interp__builtin_vector_reduce(InterpState &S, CodePtr OpPC,
1607	const CallExpr Call, unsigned* ID) {
1608	const Pointer &Arg = S.Stk.pop<Pointer>();
1609	assert(Arg.getFieldDesc()->isPrimitiveArray());
1610
1611	QualType ElemType = Arg.getFieldDesc()->getElemQualType();
1612	assert(Call->getType() == ElemType);
1613	PrimType ElemT = *S.getContext().classify(T: ElemType);
1614	unsigned NumElems = Arg.getNumElems();
1615
1616	INT_TYPE_SWITCH_NO_BOOL(ElemT, {
1617	T Result = Arg.elem<T>(`0`);
1618	unsigned BitWidth = Result.bitWidth();
1619	for (unsigned I = `1`; I != NumElems; ++I) {
1620	T Elem = Arg.elem<T>(I);
1621	T PrevResult = Result;
1622
1623	if (ID == Builtin::BI__builtin_reduce_add) {
1624	if (T::add(Result, Elem, BitWidth, &Result)) {
1625	unsigned OverflowBits = BitWidth + `1`;
1626	(void)handleOverflow(S, OpPC,
1627	(PrevResult.toAPSInt(OverflowBits) +
1628	Elem.toAPSInt(OverflowBits)));
1629	return false;
1630	}
1631	} else if (ID == Builtin::BI__builtin_reduce_mul) {
1632	if (T::mul(Result, Elem, BitWidth, &Result)) {
1633	unsigned OverflowBits = BitWidth * `2`;
1634	(void)handleOverflow(S, OpPC,
1635	(PrevResult.toAPSInt(OverflowBits) *
1636	Elem.toAPSInt(OverflowBits)));
1637	return false;
1638	}
1639
1640	} else if (ID == Builtin::BI__builtin_reduce_and) {
1641	(void)T::bitAnd(Result, Elem, BitWidth, &Result);
1642	} else if (ID == Builtin::BI__builtin_reduce_or) {
1643	(void)T::bitOr(Result, Elem, BitWidth, &Result);
1644	} else if (ID == Builtin::BI__builtin_reduce_xor) {
1645	(void)T::bitXor(Result, Elem, BitWidth, &Result);
1646	} else if (ID == Builtin::BI__builtin_reduce_min) {
1647	if (Elem < Result)
1648	Result = Elem;
1649	} else if (ID == Builtin::BI__builtin_reduce_max) {
1650	if (Elem > Result)
1651	Result = Elem;
1652	} else {
1653	llvm_unreachable("Unhandled vector reduce builtin");
1654	}
1655	}
1656	pushInteger(S, Result.toAPSInt(), Call->getType());
1657	});
1658
1659	return true;
1660	}
1661
1662	static bool interp__builtin_elementwise_abs(InterpState &S, CodePtr OpPC,
1663	const InterpFrame *Frame,
1664	const CallExpr *Call,
1665	unsigned BuiltinID) {
1666	assert(Call->getNumArgs() == `1`);
1667	QualType Ty = Call->getArg(Arg: `0`)->getType();
1668	if (Ty ->isIntegerType()) {
1669	APSInt Val = popToAPSInt(S, E: Call->getArg(Arg: `0`));
1670	pushInteger(S, Val: Val.abs(), QT: Call->getType());
1671	return true;
1672	}
1673
1674	if (Ty ->isFloatingType()) {
1675	Floating Val = S.Stk.pop<Floating>();
1676	Floating Result = abs(S, In: Val);
1677	S.Stk.push<Floating>(Args&: Result);
1678	return true;
1679	}
1680
1681	// Otherwise, the argument must be a vector.
1682	assert(Call->getArg(`0`)->getType()->isVectorType());
1683	const Pointer &Arg = S.Stk.pop<Pointer>();
1684	assert(Arg.getFieldDesc()->isPrimitiveArray());
1685	const Pointer &Dst = S.Stk.peek<Pointer>();
1686	assert(Dst.getFieldDesc()->isPrimitiveArray());
1687	assert(Arg.getFieldDesc()->getNumElems() ==
1688	Dst.getFieldDesc()->getNumElems());
1689
1690	QualType ElemType = Arg.getFieldDesc()->getElemQualType();
1691	PrimType ElemT = *S.getContext().classify(T: ElemType);
1692	unsigned NumElems = Arg.getNumElems();
1693	// we can either have a vector of integer or a vector of floating point
1694	for (unsigned I = `0`; I != NumElems; ++I) {
1695	if (ElemType ->isIntegerType()) {
1696	INT_TYPE_SWITCH_NO_BOOL(ElemT, {
1697	Dst.elem<T>(I) = T::from(static_cast<T>(
1698	APSInt (Arg.elem<T>(I).toAPSInt().abs(),
1699	ElemType ->isUnsignedIntegerOrEnumerationType())));
1700	});
1701	} else {
1702	Floating Val = Arg.elem<Floating>(I);
1703	Dst.elem<Floating>(I) = abs(S, In: Val);
1704	}
1705	}
1706	Dst.initializeAllElements();
1707
1708	return true;
1709	}
1710
1711	/// Can be called with an integer or vector as the first and only parameter.
1712	static bool interp__builtin_elementwise_countzeroes(InterpState &S,
1713	CodePtr OpPC,
1714	const InterpFrame *Frame,
1715	const CallExpr *Call,
1716	unsigned BuiltinID) {
1717	bool HasZeroArg = Call->getNumArgs() == `2`;
1718	bool IsCTTZ = BuiltinID == Builtin::BI__builtin_elementwise_ctzg;
1719	assert(Call->getNumArgs() == `1` \|\| HasZeroArg);
1720	if (Call->getArg(Arg: `0`)->getType()->isIntegerType()) {
1721	PrimType ArgT = *S.getContext().classify(T: Call->getArg(Arg: `0`)->getType());
1722	APSInt Val = popToAPSInt(Stk&: S.Stk, T: ArgT);
1723	std::optional<APSInt> ZeroVal;
1724	if (HasZeroArg) {
1725	ZeroVal = Val;
1726	Val = popToAPSInt(Stk&: S.Stk, T: ArgT);
1727	}
1728
1729	if (Val.isZero()) {
1730	if (ZeroVal) {
1731	pushInteger(S, Val: *ZeroVal, QT: Call->getType());
1732	return true;
1733	}
1734	// If we haven't been provided the second argument, the result is
1735	// undefined
1736	S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1737	DiagId: diag::note_constexpr_countzeroes_zero)
1738	<< /IsTrailing=/IsCTTZ;
1739	return false;
1740	}
1741
1742	if (BuiltinID == Builtin::BI__builtin_elementwise_clzg) {
1743	pushInteger(S, Val: Val.countLeadingZeros(), QT: Call->getType());
1744	} else {
1745	pushInteger(S, Val: Val.countTrailingZeros(), QT: Call->getType());
1746	}
1747	return true;
1748	}
1749	// Otherwise, the argument must be a vector.
1750	const ASTContext &ASTCtx = S.getASTContext();
1751	Pointer ZeroArg;
1752	if (HasZeroArg) {
1753	assert(Call->getArg(`1`)->getType()->isVectorType() &&
1754	ASTCtx.hasSameUnqualifiedType(Call->getArg(`0`)->getType(),
1755	Call->getArg(`1`)->getType()));
1756	(void)ASTCtx;
1757	ZeroArg = S.Stk.pop<Pointer>();
1758	assert(ZeroArg.getFieldDesc()->isPrimitiveArray());
1759	}
1760	assert(Call->getArg(`0`)->getType()->isVectorType());
1761	const Pointer &Arg = S.Stk.pop<Pointer>();
1762	assert(Arg.getFieldDesc()->isPrimitiveArray());
1763	const Pointer &Dst = S.Stk.peek<Pointer>();
1764	assert(Dst.getFieldDesc()->isPrimitiveArray());
1765	assert(Arg.getFieldDesc()->getNumElems() ==
1766	Dst.getFieldDesc()->getNumElems());
1767
1768	QualType ElemType = Arg.getFieldDesc()->getElemQualType();
1769	PrimType ElemT = *S.getContext().classify(T: ElemType);
1770	unsigned NumElems = Arg.getNumElems();
1771
1772	// FIXME: Reading from uninitialized vector elements?
1773	for (unsigned I = `0`; I != NumElems; ++I) {
1774	INT_TYPE_SWITCH_NO_BOOL(ElemT, {
1775	APInt EltVal = Arg.atIndex(I).deref<T>().toAPSInt();
1776	if (EltVal.isZero()) {
1777	if (HasZeroArg) {
1778	Dst.atIndex(I).deref<T>() = ZeroArg.atIndex(I).deref<T>();
1779	} else {
1780	// If we haven't been provided the second argument, the result is
1781	// undefined
1782	S.FFDiag(S.Current->getSource(OpPC),
1783	diag::note_constexpr_countzeroes_zero)
1784	<< /IsTrailing=/IsCTTZ;
1785	return false;
1786	}
1787	} else if (IsCTTZ) {
1788	Dst.atIndex(I).deref<T>() = T::from(EltVal.countTrailingZeros());
1789	} else {
1790	Dst.atIndex(I).deref<T>() = T::from(EltVal.countLeadingZeros());
1791	}
1792	Dst.atIndex(I).initialize();
1793	});
1794	}
1795
1796	return true;
1797	}
1798
1799	static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC,
1800	const InterpFrame *Frame,
1801	const CallExpr Call, unsigned* ID) {
1802	assert(Call->getNumArgs() == `3`);
1803	const ASTContext &ASTCtx = S.getASTContext();
1804	uint64_t Size = popToUInt64(S, E: Call->getArg(Arg: `2`));
1805	Pointer SrcPtr = S.Stk.pop<Pointer>().expand();
1806	Pointer DestPtr = S.Stk.pop<Pointer>().expand();
1807
1808	if (ID == Builtin::BImemcpy \|\| ID == Builtin::BImemmove)
1809	diagnoseNonConstexprBuiltin(S, OpPC, ID);
1810
1811	bool Move =
1812	(ID == Builtin::BI__builtin_memmove \|\| ID == Builtin::BImemmove \|\|
1813	ID == Builtin::BI__builtin_wmemmove \|\| ID == Builtin::BIwmemmove);
1814	bool WChar = ID == Builtin::BIwmemcpy \|\| ID == Builtin::BIwmemmove \|\|
1815	ID == Builtin::BI__builtin_wmemcpy \|\|
1816	ID == Builtin::BI__builtin_wmemmove;
1817
1818	// If the size is zero, we treat this as always being a valid no-op.
1819	if (Size == `0`) {
1820	S.Stk.push<Pointer>(Args&: DestPtr);
1821	return true;
1822	}
1823
1824	if (SrcPtr.isZero() \|\| DestPtr.isZero()) {
1825	Pointer DiagPtr = (SrcPtr.isZero() ? SrcPtr : DestPtr);
1826	S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_null)
1827	<< /IsMove=/Move << /IsWchar=/WChar << !SrcPtr.isZero()
1828	<< DiagPtr.toDiagnosticString(Ctx: ASTCtx);
1829	return false;
1830	}
1831
1832	// Diagnose integral src/dest pointers specially.
1833	if (SrcPtr.isIntegralPointer() \|\| DestPtr.isIntegralPointer()) {
1834	std::string DiagVal = "(void *)";
1835	DiagVal += SrcPtr.isIntegralPointer()
1836	? std::to_string(val: SrcPtr.getIntegerRepresentation())
1837	: std::to_string(val: DestPtr.getIntegerRepresentation());
1838	S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_null)
1839	<< Move << WChar << DestPtr.isIntegralPointer() << DiagVal;
1840	return false;
1841	}
1842
1843	if (!isReadable(P: DestPtr) \|\| !isReadable(P: SrcPtr))
1844	return false;
1845
1846	if (DestPtr.getType()->isIncompleteType()) {
1847	S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1848	DiagId: diag::note_constexpr_memcpy_incomplete_type)
1849	<< Move << DestPtr.getType();
1850	return false;
1851	}
1852	if (SrcPtr.getType()->isIncompleteType()) {
1853	S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1854	DiagId: diag::note_constexpr_memcpy_incomplete_type)
1855	<< Move << SrcPtr.getType();
1856	return false;
1857	}
1858
1859	QualType DestElemType = getElemType(P: DestPtr);
1860	if (DestElemType ->isIncompleteType()) {
1861	S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1862	DiagId: diag::note_constexpr_memcpy_incomplete_type)
1863	<< Move << DestElemType;
1864	return false;
1865	}
1866
1867	size_t RemainingDestElems;
1868	if (DestPtr.getFieldDesc()->isArray()) {
1869	RemainingDestElems = DestPtr.isUnknownSizeArray()
1870	? `0`
1871	: (DestPtr.getNumElems() - DestPtr.getIndex());
1872	} else {
1873	RemainingDestElems = `1`;
1874	}
1875	unsigned DestElemSize = ASTCtx.getTypeSizeInChars(T: DestElemType).getQuantity();
1876
1877	if (WChar) {
1878	uint64_t WCharSize =
1879	ASTCtx.getTypeSizeInChars(T: ASTCtx.getWCharType()).getQuantity();
1880	Size *= WCharSize;
1881	}
1882
1883	if (Size % DestElemSize != `0`) {
1884	S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1885	DiagId: diag::note_constexpr_memcpy_unsupported)
1886	<< Move << WChar << `0` << DestElemType << Size << DestElemSize;
1887	return false;
1888	}
1889
1890	QualType SrcElemType = getElemType(P: SrcPtr);
1891	size_t RemainingSrcElems;
1892	if (SrcPtr.getFieldDesc()->isArray()) {
1893	RemainingSrcElems = SrcPtr.isUnknownSizeArray()
1894	? `0`
1895	: (SrcPtr.getNumElems() - SrcPtr.getIndex());
1896	} else {
1897	RemainingSrcElems = `1`;
1898	}
1899	unsigned SrcElemSize = ASTCtx.getTypeSizeInChars(T: SrcElemType).getQuantity();
1900
1901	if (!ASTCtx.hasSameUnqualifiedType(T1: DestElemType, T2: SrcElemType)) {
1902	S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_type_pun)
1903	<< Move << SrcElemType << DestElemType;
1904	return false;
1905	}
1906
1907	if (!DestElemType.isTriviallyCopyableType(Context: ASTCtx)) {
1908	S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_nontrivial)
1909	<< Move << DestElemType;
1910	return false;
1911	}
1912
1913	// Check if we have enough elements to read from and write to.
1914	size_t RemainingDestBytes = RemainingDestElems * DestElemSize;
1915	size_t RemainingSrcBytes = RemainingSrcElems * SrcElemSize;
1916	if (Size > RemainingDestBytes \|\| Size > RemainingSrcBytes) {
1917	APInt N = APInt (`64`, Size / DestElemSize);
1918	S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1919	DiagId: diag::note_constexpr_memcpy_unsupported)
1920	<< Move << WChar << (Size > RemainingSrcBytes ? `1` : `2`) << DestElemType
1921	<< toString(I: N, Radix: `10`, /Signed=/false);
1922	return false;
1923	}
1924
1925	// Check for overlapping memory regions.
1926	if (!Move && Pointer::pointToSameBlock(A: SrcPtr, B: DestPtr)) {
1927	// Remove base casts.
1928	Pointer SrcP = SrcPtr.stripBaseCasts();
1929	Pointer DestP = DestPtr.stripBaseCasts();
1930
1931	unsigned SrcIndex = SrcP.expand().getIndex() * SrcP.elemSize();
1932	unsigned DstIndex = DestP.expand().getIndex() * DestP.elemSize();
1933
1934	if ((SrcIndex <= DstIndex && (SrcIndex + Size) > DstIndex) \|\|
1935	(DstIndex <= SrcIndex && (DstIndex + Size) > SrcIndex)) {
1936	S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_memcpy_overlap)
1937	<< /IsWChar=/false;
1938	return false;
1939	}
1940	}
1941
1942	assert(Size % DestElemSize == `0`);
1943	if (!DoMemcpy(S, OpPC, SrcPtr, DestPtr, Size: Bytes (Size).toBits()))
1944	return false;
1945
1946	S.Stk.push<Pointer>(Args&: DestPtr);
1947	return true;
1948	}
1949
1950	/// Determine if T is a character type for which we guarantee that
1951	/// sizeof(T) == 1.
1952	static bool isOneByteCharacterType(QualType T) {
1953	return T ->isCharType() \|\| T ->isChar8Type();
1954	}
1955
1956	static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC,
1957	const InterpFrame *Frame,
1958	const CallExpr Call, unsigned* ID) {
1959	assert(Call->getNumArgs() == `3`);
1960	uint64_t Size = popToUInt64(S, E: Call->getArg(Arg: `2`));
1961	const Pointer &PtrB = S.Stk.pop<Pointer>();
1962	const Pointer &PtrA = S.Stk.pop<Pointer>();
1963
1964	if (ID == Builtin::BImemcmp \|\| ID == Builtin::BIbcmp \|\|
1965	ID == Builtin::BIwmemcmp)
1966	diagnoseNonConstexprBuiltin(S, OpPC, ID);
1967
1968	if (Size == `0`) {
1969	pushInteger(S, Val: `0`, QT: Call->getType());
1970	return true;
1971	}
1972
1973	if (!PtrA.isBlockPointer() \|\| !PtrB.isBlockPointer())
1974	return false;
1975
1976	bool IsWide =
1977	(ID == Builtin::BIwmemcmp \|\| ID == Builtin::BI__builtin_wmemcmp);
1978
1979	const ASTContext &ASTCtx = S.getASTContext();
1980	QualType ElemTypeA = getElemType(P: PtrA);
1981	QualType ElemTypeB = getElemType(P: PtrB);
1982	// FIXME: This is an arbitrary limitation the current constant interpreter
1983	// had. We could remove this.
1984	if (!IsWide && (!isOneByteCharacterType(T: ElemTypeA) \|\|
1985	!isOneByteCharacterType(T: ElemTypeB))) {
1986	S.FFDiag(SI: S.Current->getSource(PC: OpPC),
1987	DiagId: diag::note_constexpr_memcmp_unsupported)
1988	<< ASTCtx.BuiltinInfo.getQuotedName(ID) << PtrA.getType()
1989	<< PtrB.getType();
1990	return false;
1991	}
1992
1993	if (!CheckLoad(S, OpPC, Ptr: PtrA, AK: AK_Read) \|\| !CheckLoad(S, OpPC, Ptr: PtrB, AK: AK_Read))
1994	return false;
1995
1996	// Now, read both pointers to a buffer and compare those.
1997	BitcastBuffer BufferA(
1998	Bits (ASTCtx.getTypeSize(T: ElemTypeA) * PtrA.getNumElems()));
1999	readPointerToBuffer(Ctx: S.getContext(), FromPtr: PtrA, Buffer&: BufferA, ReturnOnUninit: false);
2000	// FIXME: The swapping here is UNDOING something we do when reading the
2001	// data into the buffer.
2002	if (ASTCtx.getTargetInfo().isBigEndian())
2003	swapBytes(M: BufferA.Data.get(), N: BufferA.byteSize().getQuantity());
2004
2005	BitcastBuffer BufferB(
2006	Bits (ASTCtx.getTypeSize(T: ElemTypeB) * PtrB.getNumElems()));
2007	readPointerToBuffer(Ctx: S.getContext(), FromPtr: PtrB, Buffer&: BufferB, ReturnOnUninit: false);
2008	// FIXME: The swapping here is UNDOING something we do when reading the
2009	// data into the buffer.
2010	if (ASTCtx.getTargetInfo().isBigEndian())
2011	swapBytes(M: BufferB.Data.get(), N: BufferB.byteSize().getQuantity());
2012
2013	size_t MinBufferSize = std::min(a: BufferA.byteSize().getQuantity(),
2014	b: BufferB.byteSize().getQuantity());
2015
2016	unsigned ElemSize = `1`;
2017	if (IsWide)
2018	ElemSize = ASTCtx.getTypeSizeInChars(T: ASTCtx.getWCharType()).getQuantity();
2019	// The Size given for the wide variants is in wide-char units. Convert it
2020	// to bytes.
2021	size_t ByteSize = Size * ElemSize;
2022	size_t CmpSize = std::min(a: MinBufferSize, b: ByteSize);
2023
2024	for (size_t I = `0`; I != CmpSize; I += ElemSize) {
2025	if (IsWide) {
2026	INT_TYPE_SWITCH(*S.getContext().classify(ASTCtx.getWCharType()), {
2027	T A = *reinterpret_cast<T *>(BufferA.atByte(I));
2028	T B = *reinterpret_cast<T *>(BufferB.atByte(I));
2029	if (A < B) {
2030	pushInteger(S, -`1`, Call->getType());
2031	return true;
2032	}
2033	if (A > B) {
2034	pushInteger(S, `1`, Call->getType());
2035	return true;
2036	}
2037	});
2038	} else {
2039	std::byte A = BufferA.deref<std::byte>(Offset: Bytes (I));
2040	std::byte B = BufferB.deref<std::byte>(Offset: Bytes (I));
2041
2042	if (A < B) {
2043	pushInteger(S, Val: -`1`, QT: Call->getType());
2044	return true;
2045	}
2046	if (A > B) {
2047	pushInteger(S, Val: `1`, QT: Call->getType());
2048	return true;
2049	}
2050	}
2051	}
2052
2053	// We compared CmpSize bytes above. If the limiting factor was the Size
2054	// passed, we're done and the result is equality (0).
2055	if (ByteSize <= CmpSize) {
2056	pushInteger(S, Val: `0`, QT: Call->getType());
2057	return true;
2058	}
2059
2060	// However, if we read all the available bytes but were instructed to read
2061	// even more, diagnose this as a "read of dereferenced one-past-the-end
2062	// pointer". This is what would happen if we called CheckLoad() on every array
2063	// element.
2064	S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_access_past_end)
2065	<< AK_Read << S.Current->getRange(PC: OpPC);
2066	return false;
2067	}
2068
2069	// __builtin_memchr(ptr, int, int)
2070	// __builtin_strchr(ptr, int)
2071	static bool interp__builtin_memchr(InterpState &S, CodePtr OpPC,
2072	const CallExpr Call, unsigned* ID) {
2073	if (ID == Builtin::BImemchr \|\| ID == Builtin::BIwcschr \|\|
2074	ID == Builtin::BIstrchr \|\| ID == Builtin::BIwmemchr)
2075	diagnoseNonConstexprBuiltin(S, OpPC, ID);
2076
2077	std::optional<APSInt> MaxLength;
2078	if (Call->getNumArgs() == `3`)
2079	MaxLength = popToAPSInt(S, E: Call->getArg(Arg: `2`));
2080
2081	APSInt Desired = popToAPSInt(S, E: Call->getArg(Arg: `1`));
2082	const Pointer &Ptr = S.Stk.pop<Pointer>();
2083
2084	if (MaxLength && MaxLength ->isZero()) {
2085	S.Stk.push<Pointer>();
2086	return true;
2087	}
2088
2089	if (Ptr.isDummy()) {
2090	if (Ptr.getType()->isIncompleteType())
2091	S.FFDiag(SI: S.Current->getSource(PC: OpPC),
2092	DiagId: diag::note_constexpr_ltor_incomplete_type)
2093	<< Ptr.getType();
2094	return false;
2095	}
2096
2097	// Null is only okay if the given size is 0.
2098	if (Ptr.isZero()) {
2099	S.FFDiag(SI: S.Current->getSource(PC: OpPC), DiagId: diag::note_constexpr_access_null)
2100	<< AK_Read;
2101	return false;
2102	}
2103
2104	if (!Ptr.isBlockPointer())
2105	return false;
2106
2107	QualType ElemTy = Ptr.getFieldDesc()->isArray()
2108	? Ptr.getFieldDesc()->getElemQualType()
2109	: Ptr.getFieldDesc()->getType();
2110	bool IsRawByte = ID == Builtin::BImemchr \|\| ID == Builtin::BI__builtin_memchr;
2111
2112	// Give up on byte-oriented matching against multibyte elements.
2113	if (IsRawByte && !isOneByteCharacterType(T: ElemTy)) {
2114	S.FFDiag(SI: S.Current->getSource(PC: OpPC),
2115	DiagId: diag::note_constexpr_memchr_unsupported)
2116	<< S.getASTContext().BuiltinInfo.getQuotedName(ID) << ElemTy;
2117	return false;
2118	}
2119
2120	if (!isReadable(P: Ptr))
2121	return false;
2122
2123	if (ID == Builtin::BIstrchr \|\| ID == Builtin::BI__builtin_strchr) {
2124	int64_t DesiredTrunc;
2125	if (S.getASTContext().CharTy ->isSignedIntegerType())
2126	DesiredTrunc =
2127	Desired.trunc(width: S.getASTContext().getCharWidth()).getSExtValue();
2128	else
2129	DesiredTrunc =
2130	Desired.trunc(width: S.getASTContext().getCharWidth()).getZExtValue();
2131	// strchr compares directly to the passed integer, and therefore
2132	// always fails if given an int that is not a char.
2133	if (Desired != DesiredTrunc) {
2134	S.Stk.push<Pointer>();
2135	return true;
2136	}
2137	}
2138
2139	uint64_t DesiredVal;
2140	if (ID == Builtin::BIwmemchr \|\| ID == Builtin::BI__builtin_wmemchr \|\|
2141	ID == Builtin::BIwcschr \|\| ID == Builtin::BI__builtin_wcschr) {
2142	// wcschr and wmemchr are given a wchar_t to look for. Just use it.
2143	DesiredVal = Desired.getZExtValue();
2144	} else {
2145	DesiredVal = Desired.trunc(width: S.getASTContext().getCharWidth()).getZExtValue();
2146	}
2147
2148	bool StopAtZero =
2149	(ID == Builtin::BIstrchr \|\| ID == Builtin::BI__builtin_strchr \|\|
2150	ID == Builtin::BIwcschr \|\| ID == Builtin::BI__builtin_wcschr);
2151
2152	PrimType ElemT =
2153	IsRawByte ? PT_Sint8 : *S.getContext().classify(T: getElemType(P: Ptr));
2154
2155	size_t Index = Ptr.getIndex();
2156	size_t Step = `0`;
2157	for (;;) {
2158	const Pointer &ElemPtr =
2159	(Index + Step) > `0` ? Ptr.atIndex(Idx: Index + Step) : Ptr;
2160
2161	if (!CheckLoad(S, OpPC, Ptr: ElemPtr))
2162	return false;
2163
2164	uint64_t V;
2165	INT_TYPE_SWITCH_NO_BOOL(
2166	ElemT, { V = static_cast<uint64_t>(ElemPtr.deref<T>().toUnsigned()); });
2167
2168	if (V == DesiredVal) {
2169	S.Stk.push<Pointer>(Args: ElemPtr);
2170	return true;
2171	}
2172
2173	if (StopAtZero && V == `0`)
2174	break;
2175
2176	++Step;
2177	if (MaxLength && Step == MaxLength ->getZExtValue())
2178	break;
2179	}
2180
2181	S.Stk.push<Pointer>();
2182	return true;
2183	}
2184
2185	static std::optional<unsigned> computeFullDescSize(const ASTContext &ASTCtx,
2186	const Descriptor *Desc) {
2187	if (Desc->isPrimitive())
2188	return ASTCtx.getTypeSizeInChars(T: Desc->getType()).getQuantity();
2189	if (Desc->isArray())
2190	return ASTCtx.getTypeSizeInChars(T: Desc->getElemQualType()).getQuantity() *
2191	Desc->getNumElems();
2192	if (Desc->isRecord()) {
2193	// Can't use Descriptor::getType() as that may return a pointer type. Look
2194	// at the decl directly.
2195	return ASTCtx
2196	.getTypeSizeInChars(
2197	T: ASTCtx.getCanonicalTagType(TD: Desc->ElemRecord->getDecl()))
2198	.getQuantity();
2199	}
2200
2201	return std::nullopt;
2202	}
2203
2204	/// Compute the byte offset of \p Ptr in the full declaration.
2205	static unsigned computePointerOffset(const ASTContext &ASTCtx,
2206	const Pointer &Ptr) {
2207	unsigned Result = `0`;
2208
2209	Pointer P = Ptr;
2210	while (P.isField() \|\| P.isArrayElement()) {
2211	P = P.expand();
2212	const Descriptor *D = P.getFieldDesc();
2213
2214	if (P.isArrayElement()) {
2215	unsigned ElemSize =
2216	ASTCtx.getTypeSizeInChars(T: D->getElemQualType()).getQuantity();
2217	if (P.isOnePastEnd())
2218	Result += ElemSize * P.getNumElems();
2219	else
2220	Result += ElemSize * P.getIndex();
2221	P = P.expand().getArray();
2222	} else if (P.isBaseClass()) {
2223	const auto *RD = cast<CXXRecordDecl>(Val: D->asDecl());
2224	bool IsVirtual = Ptr.isVirtualBaseClass();
2225	P = P.getBase();
2226	const Record *BaseRecord = P.getRecord();
2227
2228	const ASTRecordLayout &Layout =
2229	ASTCtx.getASTRecordLayout(D: cast<CXXRecordDecl>(Val: BaseRecord->getDecl()));
2230	if (IsVirtual)
2231	Result += Layout.getVBaseClassOffset(VBase: RD).getQuantity();
2232	else
2233	Result += Layout.getBaseClassOffset(Base: RD).getQuantity();
2234	} else if (P.isField()) {
2235	const FieldDecl *FD = P.getField();
2236	const ASTRecordLayout &Layout =
2237	ASTCtx.getASTRecordLayout(D: FD->getParent());
2238	unsigned FieldIndex = FD->getFieldIndex();
2239	uint64_t FieldOffset =
2240	ASTCtx.toCharUnitsFromBits(BitSize: Layout.getFieldOffset(FieldNo: FieldIndex))
2241	.getQuantity();
2242	Result += FieldOffset;
2243	P = P.getBase();
2244	} else
2245	llvm_unreachable("Unhandled descriptor type");
2246	}
2247
2248	return Result;
2249	}
2250
2251	/// Does Ptr point to the last subobject?
2252	static bool pointsToLastObject(const Pointer &Ptr) {
2253	Pointer P = Ptr;
2254	while (!P.isRoot()) {
2255
2256	if (P.isArrayElement()) {
2257	P = P.expand().getArray();
2258	continue;
2259	}
2260	if (P.isBaseClass()) {
2261	if (P.getRecord()->getNumFields() > `0`)
2262	return false;
2263	P = P.getBase();
2264	continue;
2265	}
2266
2267	Pointer Base = P.getBase();
2268	if (const Record *R = Base.getRecord()) {
2269	assert(P.getField());
2270	if (P.getField()->getFieldIndex() != R->getNumFields() - `1`)
2271	return false;
2272	}
2273	P = Base;
2274	}
2275
2276	return true;
2277	}
2278
2279	/// Does Ptr point to the last object AND to a flexible array member?
2280	static bool isUserWritingOffTheEnd(const ASTContext &Ctx, const Pointer &Ptr,
2281	bool InvalidBase) {
2282	auto isFlexibleArrayMember = [&](const Descriptor *FieldDesc) {
2283	using FAMKind = LangOptions::StrictFlexArraysLevelKind;
2284	FAMKind StrictFlexArraysLevel =
2285	Ctx.getLangOpts().getStrictFlexArraysLevel();
2286
2287	if (StrictFlexArraysLevel == FAMKind::Default)
2288	return true;
2289
2290	unsigned NumElems = FieldDesc->getNumElems();
2291	if (NumElems == `0` && StrictFlexArraysLevel != FAMKind::IncompleteOnly)
2292	return true;
2293
2294	if (NumElems == `1` && StrictFlexArraysLevel == FAMKind::OneZeroOrIncomplete)
2295	return true;
2296	return false;
2297	};
2298
2299	const Descriptor *FieldDesc = Ptr.getFieldDesc();
2300	if (!FieldDesc->isArray())
2301	return false;
2302
2303	return InvalidBase && pointsToLastObject(Ptr) &&
2304	isFlexibleArrayMember (FieldDesc);
2305	}
2306
2307	UnsignedOrNone evaluateBuiltinObjectSize(const ASTContext &ASTCtx,
2308	unsigned Kind, Pointer &Ptr) {
2309	if (Ptr.isZero() \|\| !Ptr.isBlockPointer())
2310	return std::nullopt;
2311
2312	if (Ptr.isDummy() && Ptr.getType()->isPointerType())
2313	return std::nullopt;
2314
2315	bool InvalidBase = false;
2316
2317	if (Ptr.isDummy()) {
2318	if (const VarDecl *VD = Ptr.getDeclDesc()->asVarDecl();
2319	VD && VD->getType()->isPointerType())
2320	InvalidBase = true;
2321	}
2322
2323	// According to the GCC documentation, we want the size of the subobject
2324	// denoted by the pointer. But that's not quite right -- what we actually
2325	// want is the size of the immediately-enclosing array, if there is one.
2326	if (Ptr.isArrayElement())
2327	Ptr = Ptr.expand();
2328
2329	bool DetermineForCompleteObject = Ptr.getFieldDesc() == Ptr.getDeclDesc();
2330	const Descriptor *DeclDesc = Ptr.getDeclDesc();
2331	assert(DeclDesc);
2332
2333	bool UseFieldDesc = (Kind & `1u`);
2334	bool ReportMinimum = (Kind & `2u`);
2335	if (!UseFieldDesc \|\| DetermineForCompleteObject) {
2336	// Can't read beyond the pointer decl desc.
2337	if (!ReportMinimum && DeclDesc->getType()->isPointerType())
2338	return std::nullopt;
2339
2340	if (InvalidBase)
2341	return std::nullopt;
2342	} else {
2343	if (isUserWritingOffTheEnd(Ctx: ASTCtx, Ptr, InvalidBase)) {
2344	// If we cannot determine the size of the initial allocation, then we
2345	// can't given an accurate upper-bound. However, we are still able to give
2346	// conservative lower-bounds for Type=3.
2347	if (Kind == `1`)
2348	return std::nullopt;
2349	}
2350	}
2351
2352	// The "closest surrounding subobject" is NOT a base class,
2353	// so strip the base class casts.
2354	if (UseFieldDesc && Ptr.isBaseClass())
2355	Ptr = Ptr.stripBaseCasts();
2356
2357	const Descriptor *Desc = UseFieldDesc ? Ptr.getFieldDesc() : DeclDesc;
2358	assert(Desc);
2359
2360	std::optional<unsigned> FullSize = computeFullDescSize(ASTCtx, Desc);
2361	if (!FullSize)
2362	return std::nullopt;
2363
2364	unsigned ByteOffset;
2365	if (UseFieldDesc) {
2366	if (Ptr.isBaseClass()) {
2367	assert(computePointerOffset(ASTCtx, Ptr.getBase()) <=
2368	computePointerOffset(ASTCtx, Ptr));
2369	ByteOffset = computePointerOffset(ASTCtx, Ptr: Ptr.getBase()) -
2370	computePointerOffset(ASTCtx, Ptr);
2371	} else {
2372	if (Ptr.inArray())
2373	ByteOffset =
2374	computePointerOffset(ASTCtx, Ptr) -
2375	computePointerOffset(ASTCtx, Ptr: Ptr.expand().atIndex(Idx: `0`).narrow());
2376	else
2377	ByteOffset = `0`;
2378	}
2379	} else
2380	ByteOffset = computePointerOffset(ASTCtx, Ptr);
2381
2382	assert(ByteOffset <= *FullSize);
2383	return *FullSize - ByteOffset;
2384	}
2385
2386	static bool interp__builtin_object_size(InterpState &S, CodePtr OpPC,
2387	const InterpFrame *Frame,
2388	const CallExpr *Call) {
2389	const ASTContext &ASTCtx = S.getASTContext();
2390	// From the GCC docs:
2391	// Kind is an integer constant from 0 to 3. If the least significant bit is
2392	// clear, objects are whole variables. If it is set, a closest surrounding
2393	// subobject is considered the object a pointer points to. The second bit
2394	// determines if maximum or minimum of remaining bytes is computed.
2395	unsigned Kind = popToUInt64(S, E: Call->getArg(Arg: `1`));
2396	assert(Kind <= `3` && "unexpected kind");
2397	Pointer Ptr = S.Stk.pop<Pointer>();
2398
2399	if (Call->getArg(Arg: `0`)->HasSideEffects(Ctx: ASTCtx)) {
2400	// "If there are any side effects in them, it returns (size_t) -1
2401	// for type 0 or 1 and (size_t) 0 for type 2 or 3."
2402	pushInteger(S, Val: Kind <= `1` ? -`1` : `0`, QT: Call->getType());
2403	return true;
2404	}
2405
2406	if (auto Result = evaluateBuiltinObjectSize(ASTCtx, Kind, Ptr)) {
2407	pushInteger(S, Val: *Result, QT: Call->getType());
2408	return true;
2409	}
2410	return false;
2411	}
2412
2413	static bool interp__builtin_is_within_lifetime(InterpState &S, CodePtr OpPC,
2414	const CallExpr *Call) {
2415
2416	if (!S.inConstantContext())
2417	return false;
2418
2419	const Pointer &Ptr = S.Stk.pop<Pointer>();
2420
2421	auto Error = [&](int Diag) {
2422	bool CalledFromStd = false;
2423	const auto *Callee = S.Current->getCallee();
2424	if (Callee && Callee->isInStdNamespace()) {
2425	const IdentifierInfo *Identifier = Callee->getIdentifier();
2426	CalledFromStd = Identifier && Identifier->isStr(Str: "is_within_lifetime");
2427	}
2428	S.CCEDiag(SI: CalledFromStd
2429	? S.Current->Caller->getSource(PC: S.Current->getRetPC())
2430	: S.Current->getSource(PC: OpPC),
2431	DiagId: diag::err_invalid_is_within_lifetime)
2432	<< (CalledFromStd ? "std::is_within_lifetime"
2433	: "__builtin_is_within_lifetime")
2434	<< Diag;
2435	return false;
2436	};
2437
2438	if (Ptr.isZero())
2439	return Error (`0`);
2440	if (Ptr.isOnePastEnd())
2441	return Error (`1`);
2442
2443	bool Result = Ptr.getLifetime() != Lifetime::Ended;
2444	if (!Ptr.isActive()) {
2445	Result = false;
2446	} else {
2447	if (!CheckLive(S, OpPC, Ptr, AK: AK_Read))
2448	return false;
2449	if (!CheckMutable(S, OpPC, Ptr))
2450	return false;
2451	if (!CheckDummy(S, OpPC, B: Ptr.block(), AK: AK_Read))
2452	return false;
2453	}
2454
2455	// Check if we're currently running an initializer.
2456	if (llvm::is_contained(Range&: S.InitializingBlocks, Element: Ptr.block()))
2457	return Error (`2`);
2458	if (S.EvaluatingDecl && Ptr.getDeclDesc()->asVarDecl() == S.EvaluatingDecl)
2459	return Error (`2`);
2460
2461	pushInteger(S, Val: Result, QT: Call->getType());
2462	return true;
2463	}
2464
2465	static bool interp__builtin_elementwise_int_unaryop(
2466	InterpState &S, CodePtr OpPC, const CallExpr *Call,
2467	llvm::function_ref<APInt(const APSInt &)> Fn) {
2468	assert(Call->getNumArgs() == `1`);
2469
2470	// Single integer case.
2471	if (!Call->getArg(Arg: `0`)->getType()->isVectorType()) {
2472	assert(Call->getType()->isIntegerType());
2473	APSInt Src = popToAPSInt(S, E: Call->getArg(Arg: `0`));
2474	APInt Result = Fn (Src);
2475	pushInteger(S, Val: APSInt (std::move(Result), !Src.isSigned()), QT: Call->getType());
2476	return true;
2477	}
2478
2479	// Vector case.
2480	const Pointer &Arg = S.Stk.pop<Pointer>();
2481	assert(Arg.getFieldDesc()->isPrimitiveArray());
2482	const Pointer &Dst = S.Stk.peek<Pointer>();
2483	assert(Dst.getFieldDesc()->isPrimitiveArray());
2484	assert(Arg.getFieldDesc()->getNumElems() ==
2485	Dst.getFieldDesc()->getNumElems());
2486
2487	QualType ElemType = Arg.getFieldDesc()->getElemQualType();
2488	PrimType ElemT = *S.getContext().classify(T: ElemType);
2489	unsigned NumElems = Arg.getNumElems();
2490	bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2491
2492	for (unsigned I = `0`; I != NumElems; ++I) {
2493	INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2494	APSInt Src = Arg.elem<T>(I).toAPSInt();
2495	APInt Result = Fn(Src);
2496	Dst.elem<T>(I) = static_cast<T>(APSInt (std::move(Result), DestUnsigned));
2497	});
2498	}
2499	Dst.initializeAllElements();
2500
2501	return true;
2502	}
2503
2504	static bool interp__builtin_elementwise_fp_binop(
2505	InterpState &S, CodePtr OpPC, const CallExpr *Call,
2506	llvm::function_ref<std::optional<APFloat>(
2507	const APFloat &, const APFloat &, std::optional<APSInt> RoundingMode)>
2508	Fn,
2509	bool IsScalar = false) {
2510	assert((Call->getNumArgs() == `2`) \|\| (Call->getNumArgs() == `3`));
2511	const auto *VT = Call->getArg(Arg: `0`)->getType()->castAs<VectorType>();
2512	assert(VT->getElementType()->isFloatingType());
2513	unsigned NumElems = VT->getNumElements();
2514
2515	// Vector case.
2516	assert(Call->getArg(`0`)->getType()->isVectorType() &&
2517	Call->getArg(`1`)->getType()->isVectorType());
2518	assert(VT->getElementType() ==
2519	Call->getArg(`1`)->getType()->castAs<VectorType>()->getElementType());
2520	assert(VT->getNumElements() ==
2521	Call->getArg(`1`)->getType()->castAs<VectorType>()->getNumElements());
2522
2523	std::optional<APSInt> RoundingMode = std::nullopt;
2524	if (Call->getNumArgs() == `3`)
2525	RoundingMode = popToAPSInt(S, E: Call->getArg(Arg: `2`));
2526
2527	const Pointer &BPtr = S.Stk.pop<Pointer>();
2528	const Pointer &APtr = S.Stk.pop<Pointer>();
2529	const Pointer &Dst = S.Stk.peek<Pointer>();
2530	for (unsigned ElemIdx = `0`; ElemIdx != NumElems; ++ElemIdx) {
2531	using T = PrimConv<PT_Float>::T;
2532	if (IsScalar && ElemIdx > `0`) {
2533	Dst.elem<T>(I: ElemIdx) = APtr.elem<T>(I: ElemIdx);
2534	continue;
2535	}
2536	APFloat ElemA = APtr.elem<T>(I: ElemIdx).getAPFloat();
2537	APFloat ElemB = BPtr.elem<T>(I: ElemIdx).getAPFloat();
2538	std::optional<APFloat> Result = Fn (ElemA, ElemB, RoundingMode);
2539	if (!Result)
2540	return false;
2541	Dst.elem<T>(I: ElemIdx) = static_cast<T>(*Result);
2542	}
2543
2544	Dst.initializeAllElements();
2545
2546	return true;
2547	}
2548
2549	static bool interp__builtin_scalar_fp_round_mask_binop(
2550	InterpState &S, CodePtr OpPC, const CallExpr *Call,
2551	llvm::function_ref<std::optional<APFloat>(const APFloat &, const APFloat &,
2552	std::optional<APSInt>)>
2553	Fn) {
2554	assert(Call->getNumArgs() == `5`);
2555	const auto *VT = Call->getArg(Arg: `0`)->getType()->castAs<VectorType>();
2556	unsigned NumElems = VT->getNumElements();
2557
2558	APSInt RoundingMode = popToAPSInt(S, E: Call->getArg(Arg: `4`));
2559	uint64_t MaskVal = popToUInt64(S, E: Call->getArg(Arg: `3`));
2560	const Pointer &SrcPtr = S.Stk.pop<Pointer>();
2561	const Pointer &BPtr = S.Stk.pop<Pointer>();
2562	const Pointer &APtr = S.Stk.pop<Pointer>();
2563	const Pointer &Dst = S.Stk.peek<Pointer>();
2564
2565	using T = PrimConv<PT_Float>::T;
2566
2567	if (MaskVal & `1`) {
2568	APFloat ElemA = APtr.elem<T>(I: `0`).getAPFloat();
2569	APFloat ElemB = BPtr.elem<T>(I: `0`).getAPFloat();
2570	std::optional<APFloat> Result = Fn (ElemA, ElemB, RoundingMode);
2571	if (!Result)
2572	return false;
2573	Dst.elem<T>(I: `0`) = static_cast<T>(*Result);
2574	} else {
2575	Dst.elem<T>(I: `0`) = SrcPtr.elem<T>(I: `0`);
2576	}
2577
2578	for (unsigned I = `1`; I < NumElems; ++I)
2579	Dst.elem<T>(I) = APtr.elem<T>(I);
2580
2581	Dst.initializeAllElements();
2582
2583	return true;
2584	}
2585
2586	static bool interp__builtin_elementwise_int_binop(
2587	InterpState &S, CodePtr OpPC, const CallExpr *Call,
2588	llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
2589	assert(Call->getNumArgs() == `2`);
2590
2591	// Single integer case.
2592	if (!Call->getArg(Arg: `0`)->getType()->isVectorType()) {
2593	assert(!Call->getArg(`1`)->getType()->isVectorType());
2594	APSInt RHS = popToAPSInt(S, E: Call->getArg(Arg: `1`));
2595	APSInt LHS = popToAPSInt(S, E: Call->getArg(Arg: `0`));
2596	APInt Result = Fn (LHS, RHS);
2597	pushInteger(S, Val: APSInt (std::move(Result), !LHS.isSigned()), QT: Call->getType());
2598	return true;
2599	}
2600
2601	const auto *VT = Call->getArg(Arg: `0`)->getType()->castAs<VectorType>();
2602	assert(VT->getElementType()->isIntegralOrEnumerationType());
2603	PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2604	unsigned NumElems = VT->getNumElements();
2605	bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2606
2607	// Vector + Scalar case.
2608	if (!Call->getArg(Arg: `1`)->getType()->isVectorType()) {
2609	assert(Call->getArg(`1`)->getType()->isIntegralOrEnumerationType());
2610
2611	APSInt RHS = popToAPSInt(S, E: Call->getArg(Arg: `1`));
2612	const Pointer &LHS = S.Stk.pop<Pointer>();
2613	const Pointer &Dst = S.Stk.peek<Pointer>();
2614
2615	for (unsigned I = `0`; I != NumElems; ++I) {
2616	INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2617	Dst.elem<T>(I) = static_cast<T>(
2618	APSInt (Fn(LHS.elem<T>(I).toAPSInt(), RHS), DestUnsigned));
2619	});
2620	}
2621	Dst.initializeAllElements();
2622	return true;
2623	}
2624
2625	// Vector case.
2626	assert(Call->getArg(`0`)->getType()->isVectorType() &&
2627	Call->getArg(`1`)->getType()->isVectorType());
2628	assert(VT->getElementType() ==
2629	Call->getArg(`1`)->getType()->castAs<VectorType>()->getElementType());
2630	assert(VT->getNumElements() ==
2631	Call->getArg(`1`)->getType()->castAs<VectorType>()->getNumElements());
2632	assert(VT->getElementType()->isIntegralOrEnumerationType());
2633
2634	const Pointer &RHS = S.Stk.pop<Pointer>();
2635	const Pointer &LHS = S.Stk.pop<Pointer>();
2636	const Pointer &Dst = S.Stk.peek<Pointer>();
2637	for (unsigned I = `0`; I != NumElems; ++I) {
2638	INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2639	APSInt Elem1 = LHS.elem<T>(I).toAPSInt();
2640	APSInt Elem2 = RHS.elem<T>(I).toAPSInt();
2641	Dst.elem<T>(I) = static_cast<T>(APSInt (Fn(Elem1, Elem2), DestUnsigned));
2642	});
2643	}
2644	Dst.initializeAllElements();
2645
2646	return true;
2647	}
2648
2649	static bool
2650	interp__builtin_x86_pack(InterpState &S, CodePtr, const CallExpr *E,
2651	llvm::function_ref<APInt(const APSInt &)> PackFn) {
2652	const auto *VT0 = E->getArg(Arg: `0`)->getType()->castAs<VectorType>();
2653	[[maybe_unused]] const auto *VT1 =
2654	E->getArg(Arg: `1`)->getType()->castAs<VectorType>();
2655	assert(VT0 && VT1 && "pack builtin VT0 and VT1 must be VectorType");
2656	assert(VT0->getElementType() == VT1->getElementType() &&
2657	VT0->getNumElements() == VT1->getNumElements() &&
2658	"pack builtin VT0 and VT1 ElementType must be same");
2659
2660	const Pointer &RHS = S.Stk.pop<Pointer>();
2661	const Pointer &LHS = S.Stk.pop<Pointer>();
2662	const Pointer &Dst = S.Stk.peek<Pointer>();
2663
2664	const ASTContext &ASTCtx = S.getASTContext();
2665	unsigned SrcBits = ASTCtx.getIntWidth(T: VT0->getElementType());
2666	unsigned LHSVecLen = VT0->getNumElements();
2667	unsigned SrcPerLane = `128` / SrcBits;
2668	unsigned Lanes = LHSVecLen * SrcBits / `128`;
2669
2670	PrimType SrcT = *S.getContext().classify(T: VT0->getElementType());
2671	PrimType DstT = *S.getContext().classify(T: getElemType(P: Dst));
2672	bool IsUnsigend = getElemType(P: Dst)->isUnsignedIntegerType();
2673
2674	for (unsigned Lane = `0`; Lane != Lanes; ++Lane) {
2675	unsigned BaseSrc = Lane * SrcPerLane;
2676	unsigned BaseDst = Lane * (`2` * SrcPerLane);
2677
2678	for (unsigned I = `0`; I != SrcPerLane; ++I) {
2679	INT_TYPE_SWITCH_NO_BOOL(SrcT, {
2680	APSInt A = LHS.elem<T>(BaseSrc + I).toAPSInt();
2681	APSInt B = RHS.elem<T>(BaseSrc + I).toAPSInt();
2682
2683	assignInteger(S, Dst.atIndex(BaseDst + I), DstT,
2684	APSInt (PackFn(A), IsUnsigend));
2685	assignInteger(S, Dst.atIndex(BaseDst + SrcPerLane + I), DstT,
2686	APSInt (PackFn(B), IsUnsigend));
2687	});
2688	}
2689	}
2690
2691	Dst.initializeAllElements();
2692	return true;
2693	}
2694
2695	static bool interp__builtin_elementwise_maxmin(InterpState &S, CodePtr OpPC,
2696	const CallExpr *Call,
2697	unsigned BuiltinID) {
2698	assert(Call->getNumArgs() == `2`);
2699
2700	QualType Arg0Type = Call->getArg(Arg: `0`)->getType();
2701
2702	// TODO: Support floating-point types.
2703	if (!(Arg0Type ->isIntegerType() \|\|
2704	(Arg0Type ->isVectorType() &&
2705	Arg0Type ->castAs<VectorType>()->getElementType()->isIntegerType())))
2706	return false;
2707
2708	if (!Arg0Type ->isVectorType()) {
2709	assert(!Call->getArg(`1`)->getType()->isVectorType());
2710	APSInt RHS = popToAPSInt(S, E: Call->getArg(Arg: `1`));
2711	APSInt LHS = popToAPSInt(S, T: Arg0Type);
2712	APInt Result;
2713	if (BuiltinID == Builtin::BI__builtin_elementwise_max) {
2714	Result = std::max(a: LHS, b: RHS);
2715	} else if (BuiltinID == Builtin::BI__builtin_elementwise_min) {
2716	Result = std::min(a: LHS, b: RHS);
2717	} else {
2718	llvm_unreachable("Wrong builtin ID");
2719	}
2720
2721	pushInteger(S, Val: APSInt (Result, !LHS.isSigned()), QT: Call->getType());
2722	return true;
2723	}
2724
2725	// Vector case.
2726	assert(Call->getArg(`0`)->getType()->isVectorType() &&
2727	Call->getArg(`1`)->getType()->isVectorType());
2728	const auto *VT = Call->getArg(Arg: `0`)->getType()->castAs<VectorType>();
2729	assert(VT->getElementType() ==
2730	Call->getArg(`1`)->getType()->castAs<VectorType>()->getElementType());
2731	assert(VT->getNumElements() ==
2732	Call->getArg(`1`)->getType()->castAs<VectorType>()->getNumElements());
2733	assert(VT->getElementType()->isIntegralOrEnumerationType());
2734
2735	const Pointer &RHS = S.Stk.pop<Pointer>();
2736	const Pointer &LHS = S.Stk.pop<Pointer>();
2737	const Pointer &Dst = S.Stk.peek<Pointer>();
2738	PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2739	unsigned NumElems = VT->getNumElements();
2740	for (unsigned I = `0`; I != NumElems; ++I) {
2741	APSInt Elem1;
2742	APSInt Elem2;
2743	INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2744	Elem1 = LHS.elem<T>(I).toAPSInt();
2745	Elem2 = RHS.elem<T>(I).toAPSInt();
2746	});
2747
2748	APSInt Result;
2749	if (BuiltinID == Builtin::BI__builtin_elementwise_max) {
2750	Result = APSInt (std::max(a: Elem1, b: Elem2),
2751	Call->getType()->isUnsignedIntegerOrEnumerationType());
2752	} else if (BuiltinID == Builtin::BI__builtin_elementwise_min) {
2753	Result = APSInt (std::min(a: Elem1, b: Elem2),
2754	Call->getType()->isUnsignedIntegerOrEnumerationType());
2755	} else {
2756	llvm_unreachable("Wrong builtin ID");
2757	}
2758
2759	INT_TYPE_SWITCH_NO_BOOL(ElemT,
2760	{ Dst.elem<T>(I) = static_cast<T>(Result); });
2761	}
2762	Dst.initializeAllElements();
2763
2764	return true;
2765	}
2766
2767	static bool interp__builtin_ia32_pmul(
2768	InterpState &S, CodePtr OpPC, const CallExpr *Call,
2769	llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &,
2770	const APSInt &)>
2771	Fn) {
2772	assert(Call->getArg(`0`)->getType()->isVectorType() &&
2773	Call->getArg(`1`)->getType()->isVectorType());
2774	const Pointer &RHS = S.Stk.pop<Pointer>();
2775	const Pointer &LHS = S.Stk.pop<Pointer>();
2776	const Pointer &Dst = S.Stk.peek<Pointer>();
2777
2778	const auto *VT = Call->getArg(Arg: `0`)->getType()->castAs<VectorType>();
2779	PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2780	unsigned NumElems = VT->getNumElements();
2781	const auto *DestVT = Call->getType()->castAs<VectorType>();
2782	PrimType DestElemT = *S.getContext().classify(T: DestVT->getElementType());
2783	bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2784
2785	unsigned DstElem = `0`;
2786	for (unsigned I = `0`; I != NumElems; I += `2`) {
2787	APSInt Result;
2788	INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2789	APSInt LoLHS = LHS.elem<T>(I).toAPSInt();
2790	APSInt HiLHS = LHS.elem<T>(I + `1`).toAPSInt();
2791	APSInt LoRHS = RHS.elem<T>(I).toAPSInt();
2792	APSInt HiRHS = RHS.elem<T>(I + `1`).toAPSInt();
2793	Result = APSInt (Fn(LoLHS, HiLHS, LoRHS, HiRHS), DestUnsigned);
2794	});
2795
2796	INT_TYPE_SWITCH_NO_BOOL(DestElemT,
2797	{ Dst.elem<T>(DstElem) = static_cast<T>(Result); });
2798	++DstElem;
2799	}
2800
2801	Dst.initializeAllElements();
2802	return true;
2803	}
2804
2805	static bool interp_builtin_horizontal_int_binop(
2806	InterpState &S, CodePtr OpPC, const CallExpr *Call,
2807	llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
2808	const auto *VT = Call->getArg(Arg: `0`)->getType()->castAs<VectorType>();
2809	PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2810	bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2811
2812	const Pointer &RHS = S.Stk.pop<Pointer>();
2813	const Pointer &LHS = S.Stk.pop<Pointer>();
2814	const Pointer &Dst = S.Stk.peek<Pointer>();
2815	unsigned NumElts = VT->getNumElements();
2816	unsigned EltBits = S.getASTContext().getIntWidth(T: VT->getElementType());
2817	unsigned EltsPerLane = `128` / EltBits;
2818	unsigned Lanes = NumElts * EltBits / `128`;
2819	unsigned DestIndex = `0`;
2820
2821	for (unsigned Lane = `0`; Lane < Lanes; ++Lane) {
2822	unsigned LaneStart = Lane * EltsPerLane;
2823	for (unsigned I = `0`; I < EltsPerLane; I += `2`) {
2824	INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2825	APSInt Elem1 = LHS.elem<T>(LaneStart + I).toAPSInt();
2826	APSInt Elem2 = LHS.elem<T>(LaneStart + I + `1`).toAPSInt();
2827	APSInt ResL = APSInt (Fn(Elem1, Elem2), DestUnsigned);
2828	Dst.elem<T>(DestIndex++) = static_cast<T>(ResL);
2829	});
2830	}
2831
2832	for (unsigned I = `0`; I < EltsPerLane; I += `2`) {
2833	INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2834	APSInt Elem1 = RHS.elem<T>(LaneStart + I).toAPSInt();
2835	APSInt Elem2 = RHS.elem<T>(LaneStart + I + `1`).toAPSInt();
2836	APSInt ResR = APSInt (Fn(Elem1, Elem2), DestUnsigned);
2837	Dst.elem<T>(DestIndex++) = static_cast<T>(ResR);
2838	});
2839	}
2840	}
2841	Dst.initializeAllElements();
2842	return true;
2843	}
2844
2845	static bool interp_builtin_horizontal_fp_binop(
2846	InterpState &S, CodePtr OpPC, const CallExpr *Call,
2847	llvm::function_ref<APFloat(const APFloat &, const APFloat &,
2848	llvm::RoundingMode)>
2849	Fn) {
2850	const Pointer &RHS = S.Stk.pop<Pointer>();
2851	const Pointer &LHS = S.Stk.pop<Pointer>();
2852	const Pointer &Dst = S.Stk.peek<Pointer>();
2853	FPOptions FPO = Call->getFPFeaturesInEffect(LO: S.Ctx.getLangOpts());
2854	llvm::RoundingMode RM = getRoundingMode(FPO);
2855	const auto *VT = Call->getArg(Arg: `0`)->getType()->castAs<VectorType>();
2856
2857	unsigned NumElts = VT->getNumElements();
2858	unsigned EltBits = S.getASTContext().getTypeSize(T: VT->getElementType());
2859	unsigned NumLanes = NumElts * EltBits / `128`;
2860	unsigned NumElemsPerLane = NumElts / NumLanes;
2861	unsigned HalfElemsPerLane = NumElemsPerLane / `2`;
2862
2863	for (unsigned L = `0`; L != NumElts; L += NumElemsPerLane) {
2864	using T = PrimConv<PT_Float>::T;
2865	for (unsigned E = `0`; E != HalfElemsPerLane; ++E) {
2866	APFloat Elem1 = LHS.elem<T>(I: L + (`2` * E) + `0`).getAPFloat();
2867	APFloat Elem2 = LHS.elem<T>(I: L + (`2` * E) + `1`).getAPFloat();
2868	Dst.elem<T>(I: L + E) = static_cast<T>(Fn (Elem1, Elem2, RM));
2869	}
2870	for (unsigned E = `0`; E != HalfElemsPerLane; ++E) {
2871	APFloat Elem1 = RHS.elem<T>(I: L + (`2` * E) + `0`).getAPFloat();
2872	APFloat Elem2 = RHS.elem<T>(I: L + (`2` * E) + `1`).getAPFloat();
2873	Dst.elem<T>(I: L + E + HalfElemsPerLane) =
2874	static_cast<T>(Fn (Elem1, Elem2, RM));
2875	}
2876	}
2877	Dst.initializeAllElements();
2878	return true;
2879	}
2880
2881	static bool interp__builtin_ia32_addsub(InterpState &S, CodePtr OpPC,
2882	const CallExpr *Call) {
2883	// Addsub: alternates between subtraction and addition
2884	// Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i])
2885	const Pointer &RHS = S.Stk.pop<Pointer>();
2886	const Pointer &LHS = S.Stk.pop<Pointer>();
2887	const Pointer &Dst = S.Stk.peek<Pointer>();
2888	FPOptions FPO = Call->getFPFeaturesInEffect(LO: S.Ctx.getLangOpts());
2889	llvm::RoundingMode RM = getRoundingMode(FPO);
2890	const auto *VT = Call->getArg(Arg: `0`)->getType()->castAs<VectorType>();
2891	unsigned NumElems = VT->getNumElements();
2892
2893	using T = PrimConv<PT_Float>::T;
2894	for (unsigned I = `0`; I != NumElems; ++I) {
2895	APFloat LElem = LHS.elem<T>(I).getAPFloat();
2896	APFloat RElem = RHS.elem<T>(I).getAPFloat();
2897	if (I % `2` == `0`) {
2898	// Even indices: subtract
2899	LElem.subtract(RHS: RElem, RM);
2900	} else {
2901	// Odd indices: add
2902	LElem.add(RHS: RElem, RM);
2903	}
2904	Dst.elem<T>(I) = static_cast<T>(LElem);
2905	}
2906	Dst.initializeAllElements();
2907	return true;
2908	}
2909
2910	static bool interp__builtin_ia32_pclmulqdq(InterpState &S, CodePtr OpPC,
2911	const CallExpr *Call) {
2912	// PCLMULQDQ: carry-less multiplication of selected 64-bit halves
2913	// imm8 bit 0: selects lower (0) or upper (1) 64 bits of first operand
2914	// imm8 bit 4: selects lower (0) or upper (1) 64 bits of second operand
2915	assert(Call->getArg(`0`)->getType()->isVectorType() &&
2916	Call->getArg(`1`)->getType()->isVectorType());
2917
2918	// Extract imm8 argument
2919	APSInt Imm8 = popToAPSInt(S, E: Call->getArg(Arg: `2`));
2920	bool SelectUpperA = (Imm8 & `0x01`) != `0`;
2921	bool SelectUpperB = (Imm8 & `0x10`) != `0`;
2922
2923	const Pointer &RHS = S.Stk.pop<Pointer>();
2924	const Pointer &LHS = S.Stk.pop<Pointer>();
2925	const Pointer &Dst = S.Stk.peek<Pointer>();
2926
2927	const auto *VT = Call->getArg(Arg: `0`)->getType()->castAs<VectorType>();
2928	PrimType ElemT = *S.getContext().classify(T: VT->getElementType());
2929	unsigned NumElems = VT->getNumElements();
2930	const auto *DestVT = Call->getType()->castAs<VectorType>();
2931	PrimType DestElemT = *S.getContext().classify(T: DestVT->getElementType());
2932	bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2933
2934	// Process each 128-bit lane (2 elements at a time)
2935	for (unsigned Lane = `0`; Lane < NumElems; Lane += `2`) {
2936	APSInt A0, A1, B0, B1;
2937	INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2938	A0 = LHS.elem<T>(Lane + `0`).toAPSInt();
2939	A1 = LHS.elem<T>(Lane + `1`).toAPSInt();
2940	B0 = RHS.elem<T>(Lane + `0`).toAPSInt();
2941	B1 = RHS.elem<T>(Lane + `1`).toAPSInt();
2942	});
2943
2944	// Select the appropriate 64-bit values based on imm8
2945	APInt A = SelectUpperA ? A1 : A0;
2946	APInt B = SelectUpperB ? B1 : B0;
2947
2948	// Extend both operands to 128 bits for carry-less multiplication
2949	APInt A128 = A.zext(width: `128`);
2950	APInt B128 = B.zext(width: `128`);
2951
2952	// Use APIntOps::clmul for carry-less multiplication
2953	APInt Result = llvm::APIntOps::clmul(LHS: A128, RHS: B128);
2954
2955	// Split the 128-bit result into two 64-bit halves
2956	APSInt ResultLow(Result.extractBits(numBits: `64`, bitPosition: `0`), DestUnsigned);
2957	APSInt ResultHigh(Result.extractBits(numBits: `64`, bitPosition: `64`), DestUnsigned);
2958
2959	INT_TYPE_SWITCH_NO_BOOL(DestElemT, {
2960	Dst.elem<T>(Lane + `0`) = static_cast<T>(ResultLow);
2961	Dst.elem<T>(Lane + `1`) = static_cast<T>(ResultHigh);
2962	});
2963	}
2964
2965	Dst.initializeAllElements();
2966	return true;
2967	}
2968
2969	static bool interp__builtin_elementwise_triop_fp(
2970	InterpState &S, CodePtr OpPC, const CallExpr *Call,
2971	llvm::function_ref<APFloat(const APFloat &, const APFloat &,
2972	const APFloat &, llvm::RoundingMode)>
2973	Fn) {
2974	assert(Call->getNumArgs() == `3`);
2975
2976	FPOptions FPO = Call->getFPFeaturesInEffect(LO: S.Ctx.getLangOpts());
2977	llvm::RoundingMode RM = getRoundingMode(FPO);
2978	QualType Arg1Type = Call->getArg(Arg: `0`)->getType();
2979	QualType Arg2Type = Call->getArg(Arg: `1`)->getType();
2980	QualType Arg3Type = Call->getArg(Arg: `2`)->getType();
2981
2982	// Non-vector floating point types.
2983	if (!Arg1Type ->isVectorType()) {
2984	assert(!Arg2Type->isVectorType());
2985	assert(!Arg3Type->isVectorType());
2986	(void)Arg2Type;
2987	(void)Arg3Type;
2988
2989	const Floating &Z = S.Stk.pop<Floating>();
2990	const Floating &Y = S.Stk.pop<Floating>();
2991	const Floating &X = S.Stk.pop<Floating>();
2992	APFloat F = Fn (X.getAPFloat(), Y.getAPFloat(), Z.getAPFloat(), RM);
2993	Floating Result = S.allocFloat(Sem: X.getSemantics());
2994	Result.copy(F);
2995	S.Stk.push<Floating>(Args&: Result);
2996	return true;
2997	}
2998
2999	// Vector type.
3000	assert(Arg1Type->isVectorType() && Arg2Type->isVectorType() &&
3001	Arg3Type->isVectorType());
3002
3003	const VectorType *VecTy = Arg1Type ->castAs<VectorType>();
3004	QualType ElemQT = VecTy->getElementType();
3005	unsigned NumElems = VecTy->getNumElements();
3006
3007	assert(ElemQT == Arg2Type->castAs<VectorType>()->getElementType() &&
3008	ElemQT == Arg3Type->castAs<VectorType>()->getElementType());
3009	assert(NumElems == Arg2Type->castAs<VectorType>()->getNumElements() &&
3010	NumElems == Arg3Type->castAs<VectorType>()->getNumElements());
3011	assert(ElemQT->isRealFloatingType());
3012	(void)ElemQT;
3013
3014	const Pointer &VZ = S.Stk.pop<Pointer>();
3015	const Pointer &VY = S.Stk.pop<Pointer>();
3016	const Pointer &VX = S.Stk.pop<Pointer>();
3017	const Pointer &Dst = S.Stk.peek<Pointer>();
3018	for (unsigned I = `0`; I != NumElems; ++I) {
3019	using T = PrimConv<PT_Float>::T;
3020	APFloat X = VX.elem<T>(I).getAPFloat();
3021	APFloat Y = VY.elem<T>(I).getAPFloat();
3022	APFloat Z = VZ.elem<T>(I).getAPFloat();
3023	APFloat F = Fn (X, Y, Z, RM);
3024	Dst.elem<Floating>(I) = Floating (F);
3025	}
3026	Dst.initializeAllElements();
3027	return true;
3028	}
3029
3030	/// AVX512 predicated move: "Result = Mask[] ? LHS[] : RHS[]".
3031	static bool interp__builtin_select(InterpState &S, CodePtr OpPC,
3032	const CallExpr *Call) {
3033	const Pointer &RHS = S.Stk.pop<Pointer>();
3034	const Pointer &LHS = S.Stk.pop<Pointer>();
3035	APSInt Mask = popToAPSInt(S, E: Call->getArg(Arg: `0`));
3036	const Pointer &Dst = S.Stk.peek<Pointer>();
3037
3038	assert(LHS.getNumElems() == RHS.getNumElems());
3039	assert(LHS.getNumElems() == Dst.getNumElems());
3040	unsigned NumElems = LHS.getNumElems();
3041	PrimType ElemT = LHS.getFieldDesc()->getPrimType();
3042	PrimType DstElemT = Dst.getFieldDesc()->getPrimType();
3043
3044	for (unsigned I = `0`; I != NumElems; ++I) {
3045	if (ElemT == PT_Float) {
3046	assert(DstElemT == PT_Float);
3047	Dst.elem<Floating>(I) =
3048	Mask [I] ? LHS.elem<Floating>(I) : RHS.elem<Floating>(I);
3049	} else {
3050	APSInt Elem;
3051	INT_TYPE_SWITCH(ElemT, {
3052	Elem = Mask[I] ? LHS.elem<T>(I).toAPSInt() : RHS.elem<T>(I).toAPSInt();
3053	});
3054	INT_TYPE_SWITCH_NO_BOOL(DstElemT,
3055	{ Dst.elem<T>(I) = static_cast<T>(Elem); });
3056	}
3057	}
3058	Dst.initializeAllElements();
3059
3060	return true;
3061	}
3062
3063	/// Scalar variant of AVX512 predicated select:
3064	/// Result[i] = (Mask bit 0) ? LHS[i] : RHS[i], but only element 0 may change.
3065	/// All other elements are taken from RHS.
3066	static bool interp__builtin_select_scalar(InterpState &S,
3067	const CallExpr *Call) {
3068	unsigned N =
3069	Call->getArg(Arg: `1`)->getType()->castAs<VectorType>()->getNumElements();
3070
3071	const Pointer &W = S.Stk.pop<Pointer>();
3072	const Pointer &A = S.Stk.pop<Pointer>();
3073	APSInt U = popToAPSInt(S, E: Call->getArg(Arg: `0`));
3074	const Pointer &Dst = S.Stk.peek<Pointer>();
3075
3076	bool TakeA0 = U.getZExtValue() & `1ULL`;
3077
3078	for (unsigned I = TakeA0; I != N; ++I)
3079	Dst.elem<Floating>(I) = W.elem<Floating>(I);
3080	if (TakeA0)
3081	Dst.elem<Floating>(I: `0`) = A.elem<Floating>(I: `0`);
3082
3083	Dst.initializeAllElements();
3084	return true;
3085	}
3086
3087	static bool interp__builtin_ia32_test_op(
3088	InterpState &S, CodePtr OpPC, const CallExpr *Call,
3089	llvm::function_ref<bool(const APInt &A, const APInt &B)> Fn) {
3090	const Pointer &RHS = S.Stk.pop<Pointer>();
3091	const Pointer &LHS = S.Stk.pop<Pointer>();
3092
3093	assert(LHS.getNumElems() == RHS.getNumElems());
3094
3095	unsigned SourceLen = LHS.getNumElems();
3096	QualType ElemQT = getElemType(P: LHS);
3097	OptPrimType ElemPT = S.getContext().classify(T: ElemQT);
3098	unsigned LaneWidth = S.getASTContext().getTypeSize(T: ElemQT);
3099
3100	APInt AWide(LaneWidth * SourceLen, `0`);
3101	APInt BWide(LaneWidth * SourceLen, `0`);
3102
3103	for (unsigned I = `0`; I != SourceLen; ++I) {
3104	APInt ALane;
3105	APInt BLane;
3106
3107	if (ElemQT ->isIntegerType()) { // Get value.
3108	INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
3109	ALane = LHS.elem<T>(I).toAPSInt();
3110	BLane = RHS.elem<T>(I).toAPSInt();
3111	});
3112	} else if (ElemQT ->isFloatingType()) { // Get only sign bit.
3113	using T = PrimConv<PT_Float>::T;
3114	ALane = LHS.elem<T>(I).getAPFloat().bitcastToAPInt().isNegative();
3115	BLane = RHS.elem<T>(I).getAPFloat().bitcastToAPInt().isNegative();
3116	} else { // Must be integer or floating type.
3117	return false;
3118	}
3119	AWide.insertBits(SubBits: ALane, bitPosition: I * LaneWidth);
3120	BWide.insertBits(SubBits: BLane, bitPosition: I * LaneWidth);
3121	}
3122	pushInteger(S, Val: Fn (AWide, BWide), QT: Call->getType());
3123	return true;
3124	}
3125
3126	static bool interp__builtin_ia32_movmsk_op(InterpState &S, CodePtr OpPC,
3127	const CallExpr *Call) {
3128	assert(Call->getNumArgs() == `1`);
3129
3130	const Pointer &Source = S.Stk.pop<Pointer>();
3131
3132	unsigned SourceLen = Source.getNumElems();
3133	QualType ElemQT = getElemType(P: Source);
3134	OptPrimType ElemT = S.getContext().classify(T: ElemQT);
3135	unsigned ResultLen =
3136	S.getASTContext().getTypeSize(T: Call->getType()); // Always 32-bit integer.
3137	APInt Result(ResultLen, `0`);
3138
3139	for (unsigned I = `0`; I != SourceLen; ++I) {
3140	APInt Elem;
3141	if (ElemQT ->isIntegerType()) {
3142	INT_TYPE_SWITCH_NO_BOOL(*ElemT, { Elem = Source.elem<T>(I).toAPSInt(); });
3143	} else if (ElemQT ->isRealFloatingType()) {
3144	using T = PrimConv<PT_Float>::T;
3145	Elem = Source.elem<T>(I).getAPFloat().bitcastToAPInt();
3146	} else {
3147	return false;
3148	}
3149	Result.setBitVal(BitPosition: I, BitValue: Elem.isNegative());
3150	}
3151	pushInteger(S, Val: Result, QT: Call->getType());
3152	return true;
3153	}
3154
3155	static bool interp__builtin_elementwise_triop(
3156	InterpState &S, CodePtr OpPC, const CallExpr *Call,
3157	llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
3158	Fn) {
3159	assert(Call->getNumArgs() == `3`);
3160
3161	QualType Arg0Type = Call->getArg(Arg: `0`)->getType();
3162	QualType Arg2Type = Call->getArg(Arg: `2`)->getType();
3163	// Non-vector integer types.
3164	if (!Arg0Type ->isVectorType()) {
3165	const APSInt &Op2 = popToAPSInt(S, T: Arg2Type);
3166	const APSInt &Op1 = popToAPSInt(S, E: Call->getArg(Arg: `1`));
3167	const APSInt &Op0 = popToAPSInt(S, T: Arg0Type);
3168	APSInt Result = APSInt (Fn (Op0, Op1, Op2), Op0.isUnsigned());
3169	pushInteger(S, Val: Result, QT: Call->getType());
3170	return true;
3171	}
3172
3173	const auto *VecT = Arg0Type ->castAs<VectorType>();
3174	PrimType ElemT = *S.getContext().classify(T: VecT->getElementType());
3175	unsigned NumElems = VecT->getNumElements();
3176	bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
3177
3178	// Vector + Vector + Scalar case.
3179	if (!Arg2Type ->isVectorType()) {
3180	APSInt Op2 = popToAPSInt(S, T: Arg2Type);
3181
3182	const Pointer &Op1 = S.Stk.pop<Pointer>();
3183	const Pointer &Op0 = S.Stk.pop<Pointer>();
3184	const Pointer &Dst = S.Stk.peek<Pointer>();
3185	for (unsigned I = `0`; I != NumElems; ++I) {
3186	INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3187	Dst.elem<T>(I) = static_cast<T>(APSInt (
3188	Fn(Op0.elem<T>(I).toAPSInt(), Op1.elem<T>(I).toAPSInt(), Op2),
3189	DestUnsigned));
3190	});
3191	}
3192	Dst.initializeAllElements();
3193
3194	return true;
3195	}
3196
3197	// Vector type.
3198	const Pointer &Op2 = S.Stk.pop<Pointer>();
3199	const Pointer &Op1 = S.Stk.pop<Pointer>();
3200	const Pointer &Op0 = S.Stk.pop<Pointer>();
3201	const Pointer &Dst = S.Stk.peek<Pointer>();
3202	for (unsigned I = `0`; I != NumElems; ++I) {
3203	APSInt Val0, Val1, Val2;
3204	INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3205	Val0 = Op0.elem<T>(I).toAPSInt();
3206	Val1 = Op1.elem<T>(I).toAPSInt();
3207	Val2 = Op2.elem<T>(I).toAPSInt();
3208	});
3209	APSInt Result = APSInt (Fn (Val0, Val1, Val2), Val0.isUnsigned());
3210	INT_TYPE_SWITCH_NO_BOOL(ElemT,
3211	{ Dst.elem<T>(I) = static_cast<T>(Result); });
3212	}
3213	Dst.initializeAllElements();
3214
3215	return true;
3216	}
3217
3218	static bool interp__builtin_x86_extract_vector(InterpState &S, CodePtr OpPC,
3219	const CallExpr *Call,
3220	unsigned ID) {
3221	assert(Call->getNumArgs() == `2`);
3222
3223	APSInt ImmAPS = popToAPSInt(S, E: Call->getArg(Arg: `1`));
3224	uint64_t Index = ImmAPS.getZExtValue();
3225
3226	const Pointer &Src = S.Stk.pop<Pointer>();
3227	if (!Src.getFieldDesc()->isPrimitiveArray())
3228	return false;
3229
3230	const Pointer &Dst = S.Stk.peek<Pointer>();
3231	if (!Dst.getFieldDesc()->isPrimitiveArray())
3232	return false;
3233
3234	unsigned SrcElems = Src.getNumElems();
3235	unsigned DstElems = Dst.getNumElems();
3236
3237	unsigned NumLanes = SrcElems / DstElems;
3238	unsigned Lane = static_cast<unsigned>(Index % NumLanes);
3239	unsigned ExtractPos = Lane * DstElems;
3240
3241	PrimType ElemT = Src.getFieldDesc()->getPrimType();
3242
3243	TYPE_SWITCH(ElemT, {
3244	for (unsigned I = `0`; I != DstElems; ++I) {
3245	Dst.elem<T>(I) = Src.elem<T>(ExtractPos + I);
3246	}
3247	});
3248
3249	Dst.initializeAllElements();
3250	return true;
3251	}
3252
3253	static bool interp__builtin_x86_extract_vector_masked(InterpState &S,
3254	CodePtr OpPC,
3255	const CallExpr *Call,
3256	unsigned ID) {
3257	assert(Call->getNumArgs() == `4`);
3258
3259	APSInt MaskAPS = popToAPSInt(S, E: Call->getArg(Arg: `3`));
3260	const Pointer &Merge = S.Stk.pop<Pointer>();
3261	APSInt ImmAPS = popToAPSInt(S, E: Call->getArg(Arg: `1`));
3262	const Pointer &Src = S.Stk.pop<Pointer>();
3263
3264	if (!Src.getFieldDesc()->isPrimitiveArray() \|\|
3265	!Merge.getFieldDesc()->isPrimitiveArray())
3266	return false;
3267
3268	const Pointer &Dst = S.Stk.peek<Pointer>();
3269	if (!Dst.getFieldDesc()->isPrimitiveArray())
3270	return false;
3271
3272	unsigned SrcElems = Src.getNumElems();
3273	unsigned DstElems = Dst.getNumElems();
3274
3275	unsigned NumLanes = SrcElems / DstElems;
3276	unsigned Lane = static_cast<unsigned>(ImmAPS.getZExtValue() % NumLanes);
3277	unsigned Base = Lane * DstElems;
3278
3279	PrimType ElemT = Src.getFieldDesc()->getPrimType();
3280
3281	TYPE_SWITCH(ElemT, {
3282	for (unsigned I = `0`; I != DstElems; ++I) {
3283	if (MaskAPS[I])
3284	Dst.elem<T>(I) = Src.elem<T>(Base + I);
3285	else
3286	Dst.elem<T>(I) = Merge.elem<T>(I);
3287	}
3288	});
3289
3290	Dst.initializeAllElements();
3291	return true;
3292	}
3293
3294	static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
3295	const CallExpr *Call,
3296	unsigned ID) {
3297	assert(Call->getNumArgs() == `3`);
3298
3299	APSInt ImmAPS = popToAPSInt(S, E: Call->getArg(Arg: `2`));
3300	uint64_t Index = ImmAPS.getZExtValue();
3301
3302	const Pointer &SubVec = S.Stk.pop<Pointer>();
3303	if (!SubVec.getFieldDesc()->isPrimitiveArray())
3304	return false;
3305
3306	const Pointer &BaseVec = S.Stk.pop<Pointer>();
3307	if (!BaseVec.getFieldDesc()->isPrimitiveArray())
3308	return false;
3309
3310	const Pointer &Dst = S.Stk.peek<Pointer>();
3311
3312	unsigned BaseElements = BaseVec.getNumElems();
3313	unsigned SubElements = SubVec.getNumElems();
3314
3315	assert(SubElements != `0` && BaseElements != `0` &&
3316	(BaseElements % SubElements) == `0`);
3317
3318	unsigned NumLanes = BaseElements / SubElements;
3319	unsigned Lane = static_cast<unsigned>(Index % NumLanes);
3320	unsigned InsertPos = Lane * SubElements;
3321
3322	PrimType ElemT = BaseVec.getFieldDesc()->getPrimType();
3323
3324	TYPE_SWITCH(ElemT, {
3325	for (unsigned I = `0`; I != BaseElements; ++I)
3326	Dst.elem<T>(I) = BaseVec.elem<T>(I);
3327	for (unsigned I = `0`; I != SubElements; ++I)
3328	Dst.elem<T>(InsertPos + I) = SubVec.elem<T>(I);
3329	});
3330
3331	Dst.initializeAllElements();
3332	return true;
3333	}
3334
3335	static bool interp__builtin_ia32_phminposuw(InterpState &S, CodePtr OpPC,
3336	const CallExpr *Call) {
3337	assert(Call->getNumArgs() == `1`);
3338
3339	const Pointer &Source = S.Stk.pop<Pointer>();
3340	const Pointer &Dest = S.Stk.peek<Pointer>();
3341
3342	unsigned SourceLen = Source.getNumElems();
3343	QualType ElemQT = getElemType(P: Source);
3344	OptPrimType ElemT = S.getContext().classify(T: ElemQT);
3345	unsigned ElemBitWidth = S.getASTContext().getTypeSize(T: ElemQT);
3346
3347	bool DestUnsigned = Call->getCallReturnType(Ctx: S.getASTContext())
3348	->castAs<VectorType>()
3349	->getElementType()
3350	->isUnsignedIntegerOrEnumerationType();
3351
3352	INT_TYPE_SWITCH_NO_BOOL(*ElemT, {
3353	APSInt MinIndex(ElemBitWidth, DestUnsigned);
3354	APSInt MinVal = Source.elem<T>(`0`).toAPSInt();
3355
3356	for (unsigned I = `1`; I != SourceLen; ++I) {
3357	APSInt Val = Source.elem<T>(I).toAPSInt();
3358	if (MinVal.ugt(Val)) {
3359	MinVal = Val;
3360	MinIndex = I;
3361	}
3362	}
3363
3364	Dest.elem<T>(`0`) = static_cast<T>(MinVal);
3365	Dest.elem<T>(`1`) = static_cast<T>(MinIndex);
3366	for (unsigned I = `2`; I != SourceLen; ++I) {
3367	Dest.elem<T>(I) = static_cast<T>(APSInt (ElemBitWidth, DestUnsigned));
3368	}
3369	});
3370	Dest.initializeAllElements();
3371	return true;
3372	}
3373
3374	static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC,
3375	const CallExpr Call, bool* MaskZ) {
3376	assert(Call->getNumArgs() == `5`);
3377
3378	APInt U = popToAPSInt(S, E: Call->getArg(Arg: `4`)); // Lane mask
3379	APInt Imm = popToAPSInt(S, E: Call->getArg(Arg: `3`)); // Ternary truth table
3380	const Pointer &C = S.Stk.pop<Pointer>();
3381	const Pointer &B = S.Stk.pop<Pointer>();
3382	const Pointer &A = S.Stk.pop<Pointer>();
3383	const Pointer &Dst = S.Stk.peek<Pointer>();
3384
3385	unsigned DstLen = A.getNumElems();
3386	QualType ElemQT = getElemType(P: A);
3387	OptPrimType ElemT = S.getContext().classify(T: ElemQT);
3388	unsigned LaneWidth = S.getASTContext().getTypeSize(T: ElemQT);
3389	bool DstUnsigned = ElemQT ->isUnsignedIntegerOrEnumerationType();
3390
3391	INT_TYPE_SWITCH_NO_BOOL(*ElemT, {
3392	for (unsigned I = `0`; I != DstLen; ++I) {
3393	APInt ALane = A.elem<T>(I).toAPSInt();
3394	APInt BLane = B.elem<T>(I).toAPSInt();
3395	APInt CLane = C.elem<T>(I).toAPSInt();
3396	APInt RLane(LaneWidth, `0`);
3397	if (U[I]) { // If lane not masked, compute ternary logic.
3398	for (unsigned Bit = `0`; Bit != LaneWidth; ++Bit) {
3399	unsigned ABit = ALane[Bit];
3400	unsigned BBit = BLane[Bit];
3401	unsigned CBit = CLane[Bit];
3402	unsigned Idx = (ABit << `2`) \| (BBit << `1`) \| (CBit);
3403	RLane.setBitVal(Bit, Imm[Idx]);
3404	}
3405	Dst.elem<T>(I) = static_cast<T>(APSInt (RLane, DstUnsigned));
3406	} else if (MaskZ) { // If zero masked, zero the lane.
3407	Dst.elem<T>(I) = static_cast<T>(APSInt (RLane, DstUnsigned));
3408	} else { // Just masked, put in A lane.
3409	Dst.elem<T>(I) = static_cast<T>(APSInt (ALane, DstUnsigned));
3410	}
3411	}
3412	});
3413	Dst.initializeAllElements();
3414	return true;
3415	}
3416
3417	static bool interp__builtin_vec_ext(InterpState &S, CodePtr OpPC,
3418	const CallExpr Call, unsigned* ID) {
3419	assert(Call->getNumArgs() == `2`);
3420
3421	APSInt ImmAPS = popToAPSInt(S, E: Call->getArg(Arg: `1`));
3422	const Pointer &Vec = S.Stk.pop<Pointer>();
3423	if (!Vec.getFieldDesc()->isPrimitiveArray())
3424	return false;
3425
3426	unsigned NumElems = Vec.getNumElems();
3427	unsigned Index =
3428	static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElems - `1`));
3429
3430	PrimType ElemT = Vec.getFieldDesc()->getPrimType();
3431	// FIXME(#161685): Replace float+int split with a numeric-only type switch
3432	if (ElemT == PT_Float) {
3433	S.Stk.push<Floating>(Args&: Vec.elem<Floating>(I: Index));
3434	return true;
3435	}
3436	INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3437	APSInt V = Vec.elem<T>(Index).toAPSInt();
3438	pushInteger(S, V, Call->getType());
3439	});
3440
3441	return true;
3442	}
3443
3444	static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC,
3445	const CallExpr Call, unsigned* ID) {
3446	assert(Call->getNumArgs() == `3`);
3447
3448	APSInt ImmAPS = popToAPSInt(S, E: Call->getArg(Arg: `2`));
3449	APSInt ValAPS = popToAPSInt(S, E: Call->getArg(Arg: `1`));
3450
3451	const Pointer &Base = S.Stk.pop<Pointer>();
3452	if (!Base.getFieldDesc()->isPrimitiveArray())
3453	return false;
3454
3455	const Pointer &Dst = S.Stk.peek<Pointer>();
3456
3457	unsigned NumElems = Base.getNumElems();
3458	unsigned Index =
3459	static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElems - `1`));
3460
3461	PrimType ElemT = Base.getFieldDesc()->getPrimType();
3462	INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3463	for (unsigned I = `0`; I != NumElems; ++I)
3464	Dst.elem<T>(I) = Base.elem<T>(I);
3465	Dst.elem<T>(Index) = static_cast<T>(ValAPS);
3466	});
3467
3468	Dst.initializeAllElements();
3469	return true;
3470	}
3471
3472	static bool evalICmpImm(uint8_t Imm, const APSInt &A, const APSInt &B,
3473	bool IsUnsigned) {
3474	switch (Imm & `0x7`) {
3475	case `0x00`: // _MM_CMPINT_EQ
3476	return (A == B);
3477	case `0x01`: // _MM_CMPINT_LT
3478	return IsUnsigned ? A.ult(RHS: B) : A.slt(RHS: B);
3479	case `0x02`: // _MM_CMPINT_LE
3480	return IsUnsigned ? A.ule(RHS: B) : A.sle(RHS: B);
3481	case `0x03`: // _MM_CMPINT_FALSE
3482	return false;
3483	case `0x04`: // _MM_CMPINT_NE
3484	return (A != B);
3485	case `0x05`: // _MM_CMPINT_NLT
3486	return IsUnsigned ? A.ugt(RHS: B) : A.sgt(RHS: B);
3487	case `0x06`: // _MM_CMPINT_NLE
3488	return IsUnsigned ? A.uge(RHS: B) : A.sge(RHS: B);
3489	case `0x07`: // _MM_CMPINT_TRUE
3490	return true;
3491	default:
3492	llvm_unreachable("Invalid Op");
3493	}
3494	}
3495
3496	static bool interp__builtin_ia32_cmp_mask(InterpState &S, CodePtr OpPC,
3497	const CallExpr Call, unsigned* ID,
3498	bool IsUnsigned) {
3499	assert(Call->getNumArgs() == `4`);
3500
3501	APSInt Mask = popToAPSInt(S, E: Call->getArg(Arg: `3`));
3502	APSInt Opcode = popToAPSInt(S, E: Call->getArg(Arg: `2`));
3503	unsigned CmpOp = static_cast<unsigned>(Opcode.getZExtValue());
3504	const Pointer &RHS = S.Stk.pop<Pointer>();
3505	const Pointer &LHS = S.Stk.pop<Pointer>();
3506
3507	assert(LHS.getNumElems() == RHS.getNumElems());
3508
3509	APInt RetMask = APInt::getZero(numBits: LHS.getNumElems());
3510	unsigned VectorLen = LHS.getNumElems();
3511	PrimType ElemT = LHS.getFieldDesc()->getPrimType();
3512
3513	for (unsigned ElemNum = `0`; ElemNum < VectorLen; ++ElemNum) {
3514	APSInt A, B;
3515	INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3516	A = LHS.elem<T>(ElemNum).toAPSInt();
3517	B = RHS.elem<T>(ElemNum).toAPSInt();
3518	});
3519	RetMask.setBitVal(BitPosition: ElemNum,
3520	BitValue: Mask [ElemNum] && evalICmpImm(Imm: CmpOp, A, B, IsUnsigned));
3521	}
3522	pushInteger(S, Val: RetMask, QT: Call->getType());
3523	return true;
3524	}
3525
3526	static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
3527	const CallExpr *Call) {
3528	assert(Call->getNumArgs() == `1`);
3529
3530	QualType Arg0Type = Call->getArg(Arg: `0`)->getType();
3531	const auto *VecT = Arg0Type ->castAs<VectorType>();
3532	PrimType ElemT = *S.getContext().classify(T: VecT->getElementType());
3533	unsigned NumElems = VecT->getNumElements();
3534	bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
3535	const Pointer &Src = S.Stk.pop<Pointer>();
3536	const Pointer &Dst = S.Stk.peek<Pointer>();
3537
3538	for (unsigned I = `0`; I != NumElems; ++I) {
3539	INT_TYPE_SWITCH_NO_BOOL(ElemT, {
3540	APSInt ElemI = Src.elem<T>(I).toAPSInt();
3541	APInt ConflictMask(ElemI.getBitWidth(), `0`);
3542	for (unsigned J = `0`; J != I; ++J) {
3543	APSInt ElemJ = Src.elem<T>(J).toAPSInt();
3544	ConflictMask.setBitVal(J, ElemI == ElemJ);
3545	}
3546	Dst.elem<T>(I) = static_cast<T>(APSInt (ConflictMask, DestUnsigned));
3547	});
3548	}
3549	Dst.initializeAllElements();
3550	return true;
3551	}
3552
3553	static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC,
3554	const CallExpr *Call,
3555	unsigned ID) {
3556	assert(Call->getNumArgs() == `1`);
3557
3558	const Pointer &Vec = S.Stk.pop<Pointer>();
3559	unsigned RetWidth = S.getASTContext().getIntWidth(T: Call->getType());
3560	APInt RetMask(RetWidth, `0`);
3561
3562	unsigned VectorLen = Vec.getNumElems();
3563	PrimType ElemT = Vec.getFieldDesc()->getPrimType();
3564
3565	for (unsigned ElemNum = `0`; ElemNum != VectorLen; ++ElemNum) {
3566	APSInt A;
3567	INT_TYPE_SWITCH_NO_BOOL(ElemT, { A = Vec.elem<T>(ElemNum).toAPSInt(); });
3568	unsigned MSB = A [A.getBitWidth() - `1`];
3569	RetMask.setBitVal(BitPosition: ElemNum, BitValue: MSB);
3570	}
3571	pushInteger(S, Val: RetMask, QT: Call->getType());
3572	return true;
3573	}
3574
3575	static bool interp__builtin_ia32_cvt_mask2vec(InterpState &S, CodePtr OpPC,
3576	const CallExpr *Call,
3577	unsigned ID) {
3578	assert(Call->getNumArgs() == `1`);
3579
3580	APSInt Mask = popToAPSInt(S, E: Call->getArg(Arg: `0`));
3581
3582	const Pointer &Vec = S.Stk.peek<Pointer>();
3583	unsigned NumElems = Vec.getNumElems();
3584	PrimType ElemT = Vec.getFieldDesc()->getPrimType();
3585
3586	for (unsigned I = `0`; I != NumElems; ++I) {
3587	bool BitSet = Mask [I];
3588
3589	INT_TYPE_SWITCH_NO_BOOL(
3590	ElemT, { Vec.elem<T>(I) = BitSet ? T::from(-`1`) : T::from(`0`); });
3591	}
3592
3593	Vec.initializeAllElements();
3594
3595	return true;
3596	}
3597
3598	static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
3599	const CallExpr *Call,
3600	bool HasRoundingMask) {
3601	APSInt Rounding, MaskInt;
3602	Pointer Src, B, A;
3603
3604	if (HasRoundingMask) {
3605	assert(Call->getNumArgs() == `5`);
3606	Rounding = popToAPSInt(S, E: Call->getArg(Arg: `4`));
3607	MaskInt = popToAPSInt(S, E: Call->getArg(Arg: `3`));
3608	Src = S.Stk.pop<Pointer>();
3609	B = S.Stk.pop<Pointer>();
3610	A = S.Stk.pop<Pointer>();
3611	if (!CheckLoad(S, OpPC, Ptr: A) \|\| !CheckLoad(S, OpPC, Ptr: B) \|\|
3612	!CheckLoad(S, OpPC, Ptr: Src))
3613	return false;
3614	} else {
3615	assert(Call->getNumArgs() == `2`);
3616	B = S.Stk.pop<Pointer>();
3617	A = S.Stk.pop<Pointer>();
3618	if (!CheckLoad(S, OpPC, Ptr: A) \|\| !CheckLoad(S, OpPC, Ptr: B))
3619	return false;
3620	}
3621
3622	const auto *DstVTy = Call->getType()->castAs<VectorType>();
3623	unsigned NumElems = DstVTy->getNumElements();
3624	const Pointer &Dst = S.Stk.peek<Pointer>();
3625
3626	// Copy all elements except lane 0 (overwritten below) from A to Dst.
3627	for (unsigned I = `1`; I != NumElems; ++I)
3628	Dst.elem<Floating>(I) = A.elem<Floating>(I);
3629
3630	// Convert element 0 from double to float, or use Src if masked off.
3631	if (!HasRoundingMask \|\| (MaskInt.getZExtValue() & `0x1`)) {
3632	assert(S.getASTContext().FloatTy == DstVTy->getElementType() &&
3633	"cvtsd2ss requires float element type in destination vector");
3634
3635	Floating Conv = S.allocFloat(
3636	Sem: S.getASTContext().getFloatTypeSemantics(T: DstVTy->getElementType()));
3637	APFloat SrcVal = B.elem<Floating>(I: `0`).getAPFloat();
3638	if (!convertDoubleToFloatStrict(Src: SrcVal, Dst&: Conv, S, DiagExpr: Call))
3639	return false;
3640	Dst.elem<Floating>(I: `0`) = Conv;
3641	} else {
3642	Dst.elem<Floating>(I: `0`) = Src.elem<Floating>(I: `0`);
3643	}
3644
3645	Dst.initializeAllElements();
3646	return true;
3647	}
3648
3649	static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
3650	const CallExpr Call, bool* IsMasked,
3651	bool HasRounding) {
3652
3653	APSInt MaskVal;
3654	Pointer PassThrough;
3655	Pointer Src;
3656	APSInt Rounding;
3657
3658	if (IsMasked) {
3659	// Pop in reverse order.
3660	if (HasRounding) {
3661	Rounding = popToAPSInt(S, E: Call->getArg(Arg: `3`));
3662	MaskVal = popToAPSInt(S, E: Call->getArg(Arg: `2`));
3663	PassThrough = S.Stk.pop<Pointer>();
3664	Src = S.Stk.pop<Pointer>();
3665	} else {
3666	MaskVal = popToAPSInt(S, E: Call->getArg(Arg: `2`));
3667	PassThrough = S.Stk.pop<Pointer>();
3668	Src = S.Stk.pop<Pointer>();
3669	}
3670
3671	if (!CheckLoad(S, OpPC, Ptr: PassThrough))
3672	return false;
3673	} else {
3674	// Pop source only.
3675	Src = S.Stk.pop<Pointer>();
3676	}
3677
3678	if (!CheckLoad(S, OpPC, Ptr: Src))
3679	return false;
3680
3681	const auto *RetVTy = Call->getType()->castAs<VectorType>();
3682	unsigned RetElems = RetVTy->getNumElements();
3683	unsigned SrcElems = Src.getNumElems();
3684	const Pointer &Dst = S.Stk.peek<Pointer>();
3685
3686	// Initialize destination with passthrough or zeros.
3687	for (unsigned I = `0`; I != RetElems; ++I)
3688	if (IsMasked)
3689	Dst.elem<Floating>(I) = PassThrough.elem<Floating>(I);
3690	else
3691	Dst.elem<Floating>(I) = Floating (APFloat (`0.0f`));
3692
3693	assert(S.getASTContext().FloatTy == RetVTy->getElementType() &&
3694	"cvtpd2ps requires float element type in return vector");
3695
3696	// Convert double to float for enabled elements (only process source elements
3697	// that exist).
3698	for (unsigned I = `0`; I != SrcElems; ++I) {
3699	if (IsMasked && !MaskVal [I])
3700	continue;
3701
3702	APFloat SrcVal = Src.elem<Floating>(I).getAPFloat();
3703
3704	Floating Conv = S.allocFloat(
3705	Sem: S.getASTContext().getFloatTypeSemantics(T: RetVTy->getElementType()));
3706	if (!convertDoubleToFloatStrict(Src: SrcVal, Dst&: Conv, S, DiagExpr: Call))
3707	return false;
3708	Dst.elem<Floating>(I) = Conv;
3709	}
3710
3711	Dst.initializeAllElements();
3712	return true;
3713	}
3714
3715	static bool interp__builtin_ia32_shuffle_generic(
3716	InterpState &S, CodePtr OpPC, const CallExpr *Call,
3717	llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)>
3718	GetSourceIndex) {
3719
3720	assert(Call->getNumArgs() == `2` \|\| Call->getNumArgs() == `3`);
3721
3722	unsigned ShuffleMask = `0`;
3723	Pointer A, MaskVector, B;
3724	bool IsVectorMask = false;
3725	bool IsSingleOperand = (Call->getNumArgs() == `2`);
3726
3727	if (IsSingleOperand) {
3728	QualType MaskType = Call->getArg(Arg: `1`)->getType();
3729	if (MaskType ->isVectorType()) {
3730	IsVectorMask = true;
3731	MaskVector = S.Stk.pop<Pointer>();
3732	A = S.Stk.pop<Pointer>();
3733	B = A;
3734	} else if (MaskType ->isIntegerType()) {
3735	ShuffleMask = popToAPSInt(S, E: Call->getArg(Arg: `1`)).getZExtValue();
3736	A = S.Stk.pop<Pointer>();
3737	B = A;
3738	} else {
3739	return false;
3740	}
3741	} else {
3742	QualType Arg2Type = Call->getArg(Arg: `2`)->getType();
3743	if (Arg2Type ->isVectorType()) {
3744	IsVectorMask = true;
3745	B = S.Stk.pop<Pointer>();
3746	MaskVector = S.Stk.pop<Pointer>();
3747	A = S.Stk.pop<Pointer>();
3748	} else if (Arg2Type ->isIntegerType()) {
3749	ShuffleMask = popToAPSInt(S, E: Call->getArg(Arg: `2`)).getZExtValue();
3750	B = S.Stk.pop<Pointer>();
3751	A = S.Stk.pop<Pointer>();
3752	} else {
3753	return false;
3754	}
3755	}
3756
3757	QualType Arg0Type = Call->getArg(Arg: `0`)->getType();
3758	const auto *VecT = Arg0Type ->castAs<VectorType>();
3759	PrimType ElemT = *S.getContext().classify(T: VecT->getElementType());
3760	unsigned NumElems = VecT->getNumElements();
3761
3762	const Pointer &Dst = S.Stk.peek<Pointer>();
3763
3764	PrimType MaskElemT = PT_Uint32;
3765	if (IsVectorMask) {
3766	QualType Arg1Type = Call->getArg(Arg: `1`)->getType();
3767	const auto *MaskVecT = Arg1Type ->castAs<VectorType>();
3768	QualType MaskElemType = MaskVecT->getElementType();
3769	MaskElemT = *S.getContext().classify(T: MaskElemType);
3770	}
3771
3772	for (unsigned DstIdx = `0`; DstIdx != NumElems; ++DstIdx) {
3773	if (IsVectorMask) {
3774	INT_TYPE_SWITCH(MaskElemT, {
3775	ShuffleMask = static_cast<unsigned>(MaskVector.elem<T>(DstIdx));
3776	});
3777	}
3778
3779	auto [SrcVecIdx, SrcIdx] = GetSourceIndex (DstIdx, ShuffleMask);
3780
3781	if (SrcIdx < `0`) {
3782	// Zero out this element
3783	if (ElemT == PT_Float) {
3784	Dst.elem<Floating>(I: DstIdx) = Floating (
3785	S.getASTContext().getFloatTypeSemantics(T: VecT->getElementType()));
3786	} else {
3787	INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(DstIdx) = T::from(`0`); });
3788	}
3789	} else {
3790	const Pointer &Src = (SrcVecIdx == `0`) ? A : B;
3791	TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); });
3792	}
3793	}
3794	Dst.initializeAllElements();
3795
3796	return true;
3797	}
3798
3799	static bool interp__builtin_ia32_shift_with_count(
3800	InterpState &S, CodePtr OpPC, const CallExpr *Call,
3801	llvm::function_ref<APInt(const APInt &, uint64_t)> ShiftOp,
3802	llvm::function_ref<APInt(const APInt &, unsigned)> OverflowOp) {
3803
3804	assert(Call->getNumArgs() == `2`);
3805
3806	const Pointer &Count = S.Stk.pop<Pointer>();
3807	const Pointer &Source = S.Stk.pop<Pointer>();
3808
3809	QualType SourceType = Call->getArg(Arg: `0`)->getType();
3810	QualType CountType = Call->getArg(Arg: `1`)->getType();
3811	assert(SourceType->isVectorType() && CountType->isVectorType());
3812
3813	const auto *SourceVecT = SourceType ->castAs<VectorType>();
3814	const auto *CountVecT = CountType ->castAs<VectorType>();
3815	PrimType SourceElemT = *S.getContext().classify(T: SourceVecT->getElementType());
3816	PrimType CountElemT = *S.getContext().classify(T: CountVecT->getElementType());
3817
3818	const Pointer &Dst = S.Stk.peek<Pointer>();
3819
3820	unsigned DestEltWidth =
3821	S.getASTContext().getTypeSize(T: SourceVecT->getElementType());
3822	bool IsDestUnsigned = SourceVecT->getElementType()->isUnsignedIntegerType();
3823	unsigned DestLen = SourceVecT->getNumElements();
3824	unsigned CountEltWidth =
3825	S.getASTContext().getTypeSize(T: CountVecT->getElementType());
3826	unsigned NumBitsInQWord = `64`;
3827	unsigned NumCountElts = NumBitsInQWord / CountEltWidth;
3828
3829	uint64_t CountLQWord = `0`;
3830	for (unsigned EltIdx = `0`; EltIdx != NumCountElts; ++EltIdx) {
3831	uint64_t Elt = `0`;
3832	INT_TYPE_SWITCH(CountElemT,
3833	{ Elt = static_cast<uint64_t>(Count.elem<T>(EltIdx)); });
3834	CountLQWord \|= (Elt << (EltIdx * CountEltWidth));
3835	}
3836
3837	for (unsigned EltIdx = `0`; EltIdx != DestLen; ++EltIdx) {
3838	APSInt Elt;
3839	INT_TYPE_SWITCH(SourceElemT, { Elt = Source.elem<T>(EltIdx).toAPSInt(); });
3840
3841	APInt Result;
3842	if (CountLQWord < DestEltWidth) {
3843	Result = ShiftOp (Elt, CountLQWord);
3844	} else {
3845	Result = OverflowOp (Elt, DestEltWidth);
3846	}
3847	if (IsDestUnsigned) {
3848	INT_TYPE_SWITCH(SourceElemT, {
3849	Dst.elem<T>(EltIdx) = T::from(Result.getZExtValue());
3850	});
3851	} else {
3852	INT_TYPE_SWITCH(SourceElemT, {
3853	Dst.elem<T>(EltIdx) = T::from(Result.getSExtValue());
3854	});
3855	}
3856	}
3857
3858	Dst.initializeAllElements();
3859	return true;
3860	}
3861
3862	static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC,
3863	const CallExpr *Call) {
3864
3865	assert(Call->getNumArgs() == `3`);
3866
3867	QualType SourceType = Call->getArg(Arg: `0`)->getType();
3868	QualType ShuffleMaskType = Call->getArg(Arg: `1`)->getType();
3869	QualType ZeroMaskType = Call->getArg(Arg: `2`)->getType();
3870	if (!SourceType ->isVectorType() \|\| !ShuffleMaskType ->isVectorType() \|\|
3871	!ZeroMaskType ->isIntegerType()) {
3872	return false;
3873	}
3874
3875	Pointer Source, ShuffleMask;
3876	APSInt ZeroMask = popToAPSInt(S, E: Call->getArg(Arg: `2`));
3877	ShuffleMask = S.Stk.pop<Pointer>();
3878	Source = S.Stk.pop<Pointer>();
3879
3880	const auto *SourceVecT = SourceType ->castAs<VectorType>();
3881	const auto *ShuffleMaskVecT = ShuffleMaskType ->castAs<VectorType>();
3882	assert(SourceVecT->getNumElements() == ShuffleMaskVecT->getNumElements());
3883	assert(ZeroMask.getBitWidth() == SourceVecT->getNumElements());
3884
3885	PrimType SourceElemT = *S.getContext().classify(T: SourceVecT->getElementType());
3886	PrimType ShuffleMaskElemT =
3887	*S.getContext().classify(T: ShuffleMaskVecT->getElementType());
3888
3889	unsigned NumBytesInQWord = `8`;
3890	unsigned NumBitsInByte = `8`;
3891	unsigned NumBytes = SourceVecT->getNumElements();
3892	unsigned NumQWords = NumBytes / NumBytesInQWord;
3893	unsigned RetWidth = ZeroMask.getBitWidth();
3894	APSInt RetMask(llvm::APInt (RetWidth, `0`), /isUnsigned=/true);
3895
3896	for (unsigned QWordId = `0`; QWordId != NumQWords; ++QWordId) {
3897	APInt SourceQWord(`64`, `0`);
3898	for (unsigned ByteIdx = `0`; ByteIdx != NumBytesInQWord; ++ByteIdx) {
3899	uint64_t Byte = `0`;
3900	INT_TYPE_SWITCH(SourceElemT, {
3901	Byte = static_cast<uint64_t>(
3902	Source.elem<T>(QWordId * NumBytesInQWord + ByteIdx));
3903	});
3904	SourceQWord.insertBits(SubBits: APInt (`8`, Byte & `0xFF`), bitPosition: ByteIdx * NumBitsInByte);
3905	}
3906
3907	for (unsigned ByteIdx = `0`; ByteIdx != NumBytesInQWord; ++ByteIdx) {
3908	unsigned SelIdx = QWordId * NumBytesInQWord + ByteIdx;
3909	unsigned M = `0`;
3910	INT_TYPE_SWITCH(ShuffleMaskElemT, {
3911	M = static_cast<unsigned>(ShuffleMask.elem<T>(SelIdx)) & `0x3F`;
3912	});
3913
3914	if (ZeroMask [SelIdx]) {
3915	RetMask.setBitVal(BitPosition: SelIdx, BitValue: SourceQWord [M]);
3916	}
3917	}
3918	}
3919
3920	pushInteger(S, Val: RetMask, QT: Call->getType());
3921	return true;
3922	}
3923
3924	static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
3925	const CallExpr *Call) {
3926	// Arguments are: vector of floats, rounding immediate
3927	assert(Call->getNumArgs() == `2`);
3928
3929	APSInt Imm = popToAPSInt(S, E: Call->getArg(Arg: `1`));
3930	const Pointer &Src = S.Stk.pop<Pointer>();
3931	const Pointer &Dst = S.Stk.peek<Pointer>();
3932
3933	assert(Src.getFieldDesc()->isPrimitiveArray());
3934	assert(Dst.getFieldDesc()->isPrimitiveArray());
3935
3936	const auto *SrcVTy = Call->getArg(Arg: `0`)->getType()->castAs<VectorType>();
3937	unsigned SrcNumElems = SrcVTy->getNumElements();
3938	const auto *DstVTy = Call->getType()->castAs<VectorType>();
3939	unsigned DstNumElems = DstVTy->getNumElements();
3940
3941	const llvm::fltSemantics &HalfSem =
3942	S.getASTContext().getFloatTypeSemantics(T: S.getASTContext().HalfTy);
3943
3944	// imm[2] == 1 means use MXCSR rounding mode.
3945	// In that case, we can only evaluate if the conversion is exact.
3946	int ImmVal = Imm.getZExtValue();
3947	bool UseMXCSR = (ImmVal & `4`) != `0`;
3948	bool IsFPConstrained =
3949	Call->getFPFeaturesInEffect(LO: S.getASTContext().getLangOpts())
3950	.isFPConstrained();
3951
3952	llvm::RoundingMode RM;
3953	if (!UseMXCSR) {
3954	switch (ImmVal & `3`) {
3955	case `0`:
3956	RM = llvm::RoundingMode::NearestTiesToEven;
3957	break;
3958	case `1`:
3959	RM = llvm::RoundingMode::TowardNegative;
3960	break;
3961	case `2`:
3962	RM = llvm::RoundingMode::TowardPositive;
3963	break;
3964	case `3`:
3965	RM = llvm::RoundingMode::TowardZero;
3966	break;
3967	default:
3968	llvm_unreachable("Invalid immediate rounding mode");
3969	}
3970	} else {
3971	// For MXCSR, we must check for exactness. We can use any rounding mode
3972	// for the trial conversion since the result is the same if it's exact.
3973	RM = llvm::RoundingMode::NearestTiesToEven;
3974	}
3975
3976	QualType DstElemQT = Dst.getFieldDesc()->getElemQualType();
3977	PrimType DstElemT = *S.getContext().classify(T: DstElemQT);
3978
3979	for (unsigned I = `0`; I != SrcNumElems; ++I) {
3980	Floating SrcVal = Src.elem<Floating>(I);
3981	APFloat DstVal = SrcVal.getAPFloat();
3982
3983	bool LostInfo;
3984	APFloat::opStatus St = DstVal.convert(ToSemantics: HalfSem, RM, losesInfo: &LostInfo);
3985
3986	if (UseMXCSR && IsFPConstrained && St != APFloat::opOK) {
3987	S.FFDiag(SI: S.Current->getSource(PC: OpPC),
3988	DiagId: diag::note_constexpr_dynamic_rounding);
3989	return false;
3990	}
3991
3992	INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
3993	// Convert the destination value's bit pattern to an unsigned integer,
3994	// then reconstruct the element using the target type's 'from' method.
3995	uint64_t RawBits = DstVal.bitcastToAPInt().getZExtValue();
3996	Dst.elem<T>(I) = T::from(RawBits);
3997	});
3998	}
3999
4000	// Zero out remaining elements if the destination has more elements
4001	// (e.g., vcvtps2ph converting 4 floats to 8 shorts).
4002	if (DstNumElems > SrcNumElems) {
4003	for (unsigned I = SrcNumElems; I != DstNumElems; ++I) {
4004	INT_TYPE_SWITCH_NO_BOOL(DstElemT, { Dst.elem<T>(I) = T::from(`0`); });
4005	}
4006	}
4007
4008	Dst.initializeAllElements();
4009	return true;
4010	}
4011
4012	static bool interp__builtin_ia32_multishiftqb(InterpState &S, CodePtr OpPC,
4013	const CallExpr *Call) {
4014	assert(Call->getNumArgs() == `2`);
4015
4016	QualType ATy = Call->getArg(Arg: `0`)->getType();
4017	QualType BTy = Call->getArg(Arg: `1`)->getType();
4018	if (!ATy ->isVectorType() \|\| !BTy ->isVectorType()) {
4019	return false;
4020	}
4021
4022	const Pointer &BPtr = S.Stk.pop<Pointer>();
4023	const Pointer &APtr = S.Stk.pop<Pointer>();
4024	const auto *AVecT = ATy ->castAs<VectorType>();
4025	assert(AVecT->getNumElements() ==
4026	BTy->castAs<VectorType>()->getNumElements());
4027
4028	PrimType ElemT = *S.getContext().classify(T: AVecT->getElementType());
4029
4030	unsigned NumBytesInQWord = `8`;
4031	unsigned NumBitsInByte = `8`;
4032	unsigned NumBytes = AVecT->getNumElements();
4033	unsigned NumQWords = NumBytes / NumBytesInQWord;
4034	const Pointer &Dst = S.Stk.peek<Pointer>();
4035
4036	for (unsigned QWordId = `0`; QWordId != NumQWords; ++QWordId) {
4037	APInt BQWord(`64`, `0`);
4038	for (unsigned ByteIdx = `0`; ByteIdx != NumBytesInQWord; ++ByteIdx) {
4039	unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
4040	INT_TYPE_SWITCH(ElemT, {
4041	uint64_t Byte = static_cast<uint64_t>(BPtr.elem<T>(Idx));
4042	BQWord.insertBits(APInt (`8`, Byte & `0xFF`), ByteIdx * NumBitsInByte);
4043	});
4044	}
4045
4046	for (unsigned ByteIdx = `0`; ByteIdx != NumBytesInQWord; ++ByteIdx) {
4047	unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
4048	uint64_t Ctrl = `0`;
4049	INT_TYPE_SWITCH(
4050	ElemT, { Ctrl = static_cast<uint64_t>(APtr.elem<T>(Idx)) & `0x3F`; });
4051
4052	APInt Byte(`8`, `0`);
4053	for (unsigned BitIdx = `0`; BitIdx != NumBitsInByte; ++BitIdx) {
4054	Byte.setBitVal(BitPosition: BitIdx, BitValue: BQWord [(Ctrl + BitIdx) & `0x3F`]);
4055	}
4056	INT_TYPE_SWITCH(ElemT,
4057	{ Dst.elem<T>(Idx) = T::from(Byte.getZExtValue()); });
4058	}
4059	}
4060
4061	Dst.initializeAllElements();
4062
4063	return true;
4064	}
4065
4066	static bool interp_builtin_ia32_gfni_affine(InterpState &S, CodePtr OpPC,
4067	const CallExpr *Call,
4068	bool Inverse) {
4069	assert(Call->getNumArgs() == `3`);
4070	QualType XType = Call->getArg(Arg: `0`)->getType();
4071	QualType AType = Call->getArg(Arg: `1`)->getType();
4072	QualType ImmType = Call->getArg(Arg: `2`)->getType();
4073	if (!XType ->isVectorType() \|\| !AType ->isVectorType() \|\|
4074	!ImmType ->isIntegerType()) {
4075	return false;
4076	}
4077
4078	Pointer X, A;
4079	APSInt Imm = popToAPSInt(S, E: Call->getArg(Arg: `2`));
4080	A = S.Stk.pop<Pointer>();
4081	X = S.Stk.pop<Pointer>();
4082
4083	const Pointer &Dst = S.Stk.peek<Pointer>();
4084	const auto *AVecT = AType ->castAs<VectorType>();
4085	assert(XType->castAs<VectorType>()->getNumElements() ==
4086	AVecT->getNumElements());
4087	unsigned NumBytesInQWord = `8`;
4088	unsigned NumBytes = AVecT->getNumElements();
4089	unsigned NumBitsInQWord = `64`;
4090	unsigned NumQWords = NumBytes / NumBytesInQWord;
4091	unsigned NumBitsInByte = `8`;
4092	PrimType AElemT = *S.getContext().classify(T: AVecT->getElementType());
4093
4094	// computing AX + Imm*
4095	for (unsigned QWordIdx = `0`; QWordIdx != NumQWords; ++QWordIdx) {
4096	// Extract the QWords from X, A
4097	APInt XQWord(NumBitsInQWord, `0`);
4098	APInt AQWord(NumBitsInQWord, `0`);
4099	for (unsigned ByteIdx = `0`; ByteIdx != NumBytesInQWord; ++ByteIdx) {
4100	unsigned Idx = QWordIdx * NumBytesInQWord + ByteIdx;
4101	uint8_t XByte;
4102	uint8_t AByte;
4103	INT_TYPE_SWITCH(AElemT, {
4104	XByte = static_cast<uint8_t>(X.elem<T>(Idx));
4105	AByte = static_cast<uint8_t>(A.elem<T>(Idx));
4106	});
4107
4108	XQWord.insertBits(SubBits: APInt (NumBitsInByte, XByte), bitPosition: ByteIdx * NumBitsInByte);
4109	AQWord.insertBits(SubBits: APInt (NumBitsInByte, AByte), bitPosition: ByteIdx * NumBitsInByte);
4110	}
4111
4112	for (unsigned ByteIdx = `0`; ByteIdx != NumBytesInQWord; ++ByteIdx) {
4113	unsigned Idx = QWordIdx * NumBytesInQWord + ByteIdx;
4114	uint8_t XByte =
4115	XQWord.lshr(shiftAmt: ByteIdx * NumBitsInByte).getLoBits(numBits: `8`).getZExtValue();
4116	INT_TYPE_SWITCH(AElemT, {
4117	Dst.elem<T>(Idx) = T::from(GFNIAffine(XByte, AQWord, Imm, Inverse));
4118	});
4119	}
4120	}
4121	Dst.initializeAllElements();
4122	return true;
4123	}
4124
4125	static bool interp__builtin_ia32_gfni_mul(InterpState &S, CodePtr OpPC,
4126	const CallExpr *Call) {
4127	assert(Call->getNumArgs() == `2`);
4128
4129	QualType AType = Call->getArg(Arg: `0`)->getType();
4130	QualType BType = Call->getArg(Arg: `1`)->getType();
4131	if (!AType ->isVectorType() \|\| !BType ->isVectorType()) {
4132	return false;
4133	}
4134
4135	Pointer A, B;
4136	B = S.Stk.pop<Pointer>();
4137	A = S.Stk.pop<Pointer>();
4138
4139	const Pointer &Dst = S.Stk.peek<Pointer>();
4140	const auto *AVecT = AType ->castAs<VectorType>();
4141	assert(AVecT->getNumElements() ==
4142	BType->castAs<VectorType>()->getNumElements());
4143
4144	PrimType AElemT = *S.getContext().classify(T: AVecT->getElementType());
4145	unsigned NumBytes = A.getNumElems();
4146
4147	for (unsigned ByteIdx = `0`; ByteIdx != NumBytes; ++ByteIdx) {
4148	uint8_t AByte, BByte;
4149	INT_TYPE_SWITCH(AElemT, {
4150	AByte = static_cast<uint8_t>(A.elem<T>(ByteIdx));
4151	BByte = static_cast<uint8_t>(B.elem<T>(ByteIdx));
4152	Dst.elem<T>(ByteIdx) = T::from(GFNIMul(AByte, BByte));
4153	});
4154	}
4155
4156	Dst.initializeAllElements();
4157	return true;
4158	}
4159
4160	bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
4161	uint32_t BuiltinID) {
4162	if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(ID: BuiltinID))
4163	return Invalid(S, OpPC);
4164
4165	const InterpFrame *Frame = S.Current;
4166	switch (BuiltinID) {
4167	case Builtin::BI__builtin_is_constant_evaluated:
4168	return interp__builtin_is_constant_evaluated(S, OpPC, Frame, Call);
4169
4170	case Builtin::BI__builtin_assume:
4171	case Builtin::BI__assume:
4172	return interp__builtin_assume(S, OpPC, Frame, Call);
4173
4174	case Builtin::BI__builtin_strcmp:
4175	case Builtin::BIstrcmp:
4176	case Builtin::BI__builtin_strncmp:
4177	case Builtin::BIstrncmp:
4178	case Builtin::BI__builtin_wcsncmp:
4179	case Builtin::BIwcsncmp:
4180	case Builtin::BI__builtin_wcscmp:
4181	case Builtin::BIwcscmp:
4182	return interp__builtin_strcmp(S, OpPC, Frame, Call, ID: BuiltinID);
4183
4184	case Builtin::BI__builtin_strlen:
4185	case Builtin::BIstrlen:
4186	case Builtin::BI__builtin_wcslen:
4187	case Builtin::BIwcslen:
4188	return interp__builtin_strlen(S, OpPC, Frame, Call, ID: BuiltinID);
4189
4190	case Builtin::BI__builtin_nan:
4191	case Builtin::BI__builtin_nanf:
4192	case Builtin::BI__builtin_nanl:
4193	case Builtin::BI__builtin_nanf16:
4194	case Builtin::BI__builtin_nanf128:
4195	return interp__builtin_nan(S, OpPC, Frame, Call, /Signaling=/false);
4196
4197	case Builtin::BI__builtin_nans:
4198	case Builtin::BI__builtin_nansf:
4199	case Builtin::BI__builtin_nansl:
4200	case Builtin::BI__builtin_nansf16:
4201	case Builtin::BI__builtin_nansf128:
4202	return interp__builtin_nan(S, OpPC, Frame, Call, /Signaling=/true);
4203
4204	case Builtin::BI__builtin_huge_val:
4205	case Builtin::BI__builtin_huge_valf:
4206	case Builtin::BI__builtin_huge_vall:
4207	case Builtin::BI__builtin_huge_valf16:
4208	case Builtin::BI__builtin_huge_valf128:
4209	case Builtin::BI__builtin_inf:
4210	case Builtin::BI__builtin_inff:
4211	case Builtin::BI__builtin_infl:
4212	case Builtin::BI__builtin_inff16:
4213	case Builtin::BI__builtin_inff128:
4214	return interp__builtin_inf(S, OpPC, Frame, Call);
4215
4216	case Builtin::BI__builtin_copysign:
4217	case Builtin::BI__builtin_copysignf:
4218	case Builtin::BI__builtin_copysignl:
4219	case Builtin::BI__builtin_copysignf128:
4220	return interp__builtin_copysign(S, OpPC, Frame);
4221
4222	case Builtin::BI__builtin_fmin:
4223	case Builtin::BI__builtin_fminf:
4224	case Builtin::BI__builtin_fminl:
4225	case Builtin::BI__builtin_fminf16:
4226	case Builtin::BI__builtin_fminf128:
4227	return interp__builtin_fmin(S, OpPC, Frame, /IsNumBuiltin=/false);
4228
4229	case Builtin::BI__builtin_fminimum_num:
4230	case Builtin::BI__builtin_fminimum_numf:
4231	case Builtin::BI__builtin_fminimum_numl:
4232	case Builtin::BI__builtin_fminimum_numf16:
4233	case Builtin::BI__builtin_fminimum_numf128:
4234	return interp__builtin_fmin(S, OpPC, Frame, /IsNumBuiltin=/true);
4235
4236	case Builtin::BI__builtin_fmax:
4237	case Builtin::BI__builtin_fmaxf:
4238	case Builtin::BI__builtin_fmaxl:
4239	case Builtin::BI__builtin_fmaxf16:
4240	case Builtin::BI__builtin_fmaxf128:
4241	return interp__builtin_fmax(S, OpPC, Frame, /IsNumBuiltin=/false);
4242
4243	case Builtin::BI__builtin_fmaximum_num:
4244	case Builtin::BI__builtin_fmaximum_numf:
4245	case Builtin::BI__builtin_fmaximum_numl:
4246	case Builtin::BI__builtin_fmaximum_numf16:
4247	case Builtin::BI__builtin_fmaximum_numf128:
4248	return interp__builtin_fmax(S, OpPC, Frame, /IsNumBuiltin=/true);
4249
4250	case Builtin::BI__builtin_isnan:
4251	return interp__builtin_isnan(S, OpPC, Frame, Call);
4252
4253	case Builtin::BI__builtin_issignaling:
4254	return interp__builtin_issignaling(S, OpPC, Frame, Call);
4255
4256	case Builtin::BI__builtin_isinf:
4257	return interp__builtin_isinf(S, OpPC, Frame, /Sign=/CheckSign: false, Call);
4258
4259	case Builtin::BI__builtin_isinf_sign:
4260	return interp__builtin_isinf(S, OpPC, Frame, /Sign=/CheckSign: true, Call);
4261
4262	case Builtin::BI__builtin_isfinite:
4263	return interp__builtin_isfinite(S, OpPC, Frame, Call);
4264
4265	case Builtin::BI__builtin_isnormal:
4266	return interp__builtin_isnormal(S, OpPC, Frame, Call);
4267
4268	case Builtin::BI__builtin_issubnormal:
4269	return interp__builtin_issubnormal(S, OpPC, Frame, Call);
4270
4271	case Builtin::BI__builtin_iszero:
4272	return interp__builtin_iszero(S, OpPC, Frame, Call);
4273
4274	case Builtin::BI__builtin_signbit:
4275	case Builtin::BI__builtin_signbitf:
4276	case Builtin::BI__builtin_signbitl:
4277	return interp__builtin_signbit(S, OpPC, Frame, Call);
4278
4279	case Builtin::BI__builtin_isgreater:
4280	case Builtin::BI__builtin_isgreaterequal:
4281	case Builtin::BI__builtin_isless:
4282	case Builtin::BI__builtin_islessequal:
4283	case Builtin::BI__builtin_islessgreater:
4284	case Builtin::BI__builtin_isunordered:
4285	return interp_floating_comparison(S, OpPC, Call, ID: BuiltinID);
4286
4287	case Builtin::BI__builtin_isfpclass:
4288	return interp__builtin_isfpclass(S, OpPC, Frame, Call);
4289
4290	case Builtin::BI__builtin_fpclassify:
4291	return interp__builtin_fpclassify(S, OpPC, Frame, Call);
4292
4293	case Builtin::BI__builtin_fabs:
4294	case Builtin::BI__builtin_fabsf:
4295	case Builtin::BI__builtin_fabsl:
4296	case Builtin::BI__builtin_fabsf128:
4297	return interp__builtin_fabs(S, OpPC, Frame);
4298
4299	case Builtin::BI__builtin_abs:
4300	case Builtin::BI__builtin_labs:
4301	case Builtin::BI__builtin_llabs:
4302	return interp__builtin_abs(S, OpPC, Frame, Call);
4303
4304	case Builtin::BI__builtin_popcount:
4305	case Builtin::BI__builtin_popcountl:
4306	case Builtin::BI__builtin_popcountll:
4307	case Builtin::BI__builtin_popcountg:
4308	case Builtin::BI__popcnt16: // Microsoft variants of popcount
4309	case Builtin::BI__popcnt:
4310	case Builtin::BI__popcnt64:
4311	return interp__builtin_popcount(S, OpPC, Frame, Call);
4312
4313	case Builtin::BI__builtin_parity:
4314	case Builtin::BI__builtin_parityl:
4315	case Builtin::BI__builtin_parityll:
4316	return interp__builtin_elementwise_int_unaryop(
4317	S, OpPC, Call, Fn: [](const APSInt &Val) {
4318	return APInt (Val.getBitWidth(), Val.popcount() % `2`);
4319	});
4320	case Builtin::BI__builtin_clrsb:
4321	case Builtin::BI__builtin_clrsbl:
4322	case Builtin::BI__builtin_clrsbll:
4323	return interp__builtin_elementwise_int_unaryop(
4324	S, OpPC, Call, Fn: [](const APSInt &Val) {
4325	return APInt (Val.getBitWidth(),
4326	Val.getBitWidth() - Val.getSignificantBits());
4327	});
4328	case Builtin::BI__builtin_bitreverseg:
4329	case Builtin::BI__builtin_bitreverse8:
4330	case Builtin::BI__builtin_bitreverse16:
4331	case Builtin::BI__builtin_bitreverse32:
4332	case Builtin::BI__builtin_bitreverse64:
4333	return interp__builtin_elementwise_int_unaryop(
4334	S, OpPC, Call, Fn: [](const APSInt &Val) { return Val.reverseBits(); });
4335
4336	case Builtin::BI__builtin_classify_type:
4337	return interp__builtin_classify_type(S, OpPC, Frame, Call);
4338
4339	case Builtin::BI__builtin_expect:
4340	case Builtin::BI__builtin_expect_with_probability:
4341	return interp__builtin_expect(S, OpPC, Frame, Call);
4342
4343	case Builtin::BI__builtin_rotateleft8:
4344	case Builtin::BI__builtin_rotateleft16:
4345	case Builtin::BI__builtin_rotateleft32:
4346	case Builtin::BI__builtin_rotateleft64:
4347	case Builtin::BI__builtin_stdc_rotate_left:
4348	case Builtin::BI_rotl8: // Microsoft variants of rotate left
4349	case Builtin::BI_rotl16:
4350	case Builtin::BI_rotl:
4351	case Builtin::BI_lrotl:
4352	case Builtin::BI_rotl64:
4353	case Builtin::BI__builtin_rotateright8:
4354	case Builtin::BI__builtin_rotateright16:
4355	case Builtin::BI__builtin_rotateright32:
4356	case Builtin::BI__builtin_rotateright64:
4357	case Builtin::BI__builtin_stdc_rotate_right:
4358	case Builtin::BI_rotr8: // Microsoft variants of rotate right
4359	case Builtin::BI_rotr16:
4360	case Builtin::BI_rotr:
4361	case Builtin::BI_lrotr:
4362	case Builtin::BI_rotr64: {
4363	// Determine if this is a rotate right operation
4364	bool IsRotateRight;
4365	switch (BuiltinID) {
4366	case Builtin::BI__builtin_rotateright8:
4367	case Builtin::BI__builtin_rotateright16:
4368	case Builtin::BI__builtin_rotateright32:
4369	case Builtin::BI__builtin_rotateright64:
4370	case Builtin::BI__builtin_stdc_rotate_right:
4371	case Builtin::BI_rotr8:
4372	case Builtin::BI_rotr16:
4373	case Builtin::BI_rotr:
4374	case Builtin::BI_lrotr:
4375	case Builtin::BI_rotr64:
4376	IsRotateRight = true;
4377	break;
4378	default:
4379	IsRotateRight = false;
4380	break;
4381	}
4382
4383	return interp__builtin_elementwise_int_binop(
4384	S, OpPC, Call, Fn: [IsRotateRight](const APSInt &Value, APSInt Amount) {
4385	Amount = NormalizeRotateAmount(Value, Amount);
4386	return IsRotateRight ? Value.rotr(rotateAmt: Amount.getZExtValue())
4387	: Value.rotl(rotateAmt: Amount.getZExtValue());
4388	});
4389	}
4390
4391	case Builtin::BI__builtin_ffs:
4392	case Builtin::BI__builtin_ffsl:
4393	case Builtin::BI__builtin_ffsll:
4394	return interp__builtin_elementwise_int_unaryop(
4395	S, OpPC, Call, Fn: [](const APSInt &Val) {
4396	return APInt (Val.getBitWidth(),
4397	Val.isZero() ? `0u` : Val.countTrailingZeros() + `1u`);
4398	});
4399
4400	case Builtin::BIaddressof:
4401	case Builtin::BI__addressof:
4402	case Builtin::BI__builtin_addressof:
4403	assert(isNoopBuiltin(BuiltinID));
4404	return interp__builtin_addressof(S, OpPC, Frame, Call);
4405
4406	case Builtin::BIas_const:
4407	case Builtin::BIforward:
4408	case Builtin::BIforward_like:
4409	case Builtin::BImove:
4410	case Builtin::BImove_if_noexcept:
4411	assert(isNoopBuiltin(BuiltinID));
4412	return interp__builtin_move(S, OpPC, Frame, Call);
4413
4414	case Builtin::BI__builtin_eh_return_data_regno:
4415	return interp__builtin_eh_return_data_regno(S, OpPC, Frame, Call);
4416
4417	case Builtin::BI__builtin_launder:
4418	assert(isNoopBuiltin(BuiltinID));
4419	return true;
4420
4421	case Builtin::BI__builtin_add_overflow:
4422	case Builtin::BI__builtin_sub_overflow:
4423	case Builtin::BI__builtin_mul_overflow:
4424	case Builtin::BI__builtin_sadd_overflow:
4425	case Builtin::BI__builtin_uadd_overflow:
4426	case Builtin::BI__builtin_uaddl_overflow:
4427	case Builtin::BI__builtin_uaddll_overflow:
4428	case Builtin::BI__builtin_usub_overflow:
4429	case Builtin::BI__builtin_usubl_overflow:
4430	case Builtin::BI__builtin_usubll_overflow:
4431	case Builtin::BI__builtin_umul_overflow:
4432	case Builtin::BI__builtin_umull_overflow:
4433	case Builtin::BI__builtin_umulll_overflow:
4434	case Builtin::BI__builtin_saddl_overflow:
4435	case Builtin::BI__builtin_saddll_overflow:
4436	case Builtin::BI__builtin_ssub_overflow:
4437	case Builtin::BI__builtin_ssubl_overflow:
4438	case Builtin::BI__builtin_ssubll_overflow:
4439	case Builtin::BI__builtin_smul_overflow:
4440	case Builtin::BI__builtin_smull_overflow:
4441	case Builtin::BI__builtin_smulll_overflow:
4442	return interp__builtin_overflowop(S, OpPC, Call, BuiltinOp: BuiltinID);
4443
4444	case Builtin::BI__builtin_addcb:
4445	case Builtin::BI__builtin_addcs:
4446	case Builtin::BI__builtin_addc:
4447	case Builtin::BI__builtin_addcl:
4448	case Builtin::BI__builtin_addcll:
4449	case Builtin::BI__builtin_subcb:
4450	case Builtin::BI__builtin_subcs:
4451	case Builtin::BI__builtin_subc:
4452	case Builtin::BI__builtin_subcl:
4453	case Builtin::BI__builtin_subcll:
4454	return interp__builtin_carryop(S, OpPC, Frame, Call, BuiltinOp: BuiltinID);
4455
4456	case Builtin::BI__builtin_clz:
4457	case Builtin::BI__builtin_clzl:
4458	case Builtin::BI__builtin_clzll:
4459	case Builtin::BI__builtin_clzs:
4460	case Builtin::BI__builtin_clzg:
4461	case Builtin::BI__lzcnt16: // Microsoft variants of count leading-zeroes
4462	case Builtin::BI__lzcnt:
4463	case Builtin::BI__lzcnt64:
4464	return interp__builtin_clz(S, OpPC, Frame, Call, BuiltinOp: BuiltinID);
4465
4466	case Builtin::BI__builtin_ctz:
4467	case Builtin::BI__builtin_ctzl:
4468	case Builtin::BI__builtin_ctzll:
4469	case Builtin::BI__builtin_ctzs:
4470	case Builtin::BI__builtin_ctzg:
4471	return interp__builtin_ctz(S, OpPC, Frame, Call, BuiltinID);
4472
4473	case Builtin::BI__builtin_elementwise_clzg:
4474	case Builtin::BI__builtin_elementwise_ctzg:
4475	return interp__builtin_elementwise_countzeroes(S, OpPC, Frame, Call,
4476	BuiltinID);
4477	case Builtin::BI__builtin_bswapg:
4478	case Builtin::BI__builtin_bswap16:
4479	case Builtin::BI__builtin_bswap32:
4480	case Builtin::BI__builtin_bswap64:
4481	return interp__builtin_bswap(S, OpPC, Frame, Call);
4482
4483	case Builtin::BI__atomic_always_lock_free:
4484	case Builtin::BI__atomic_is_lock_free:
4485	return interp__builtin_atomic_lock_free(S, OpPC, Frame, Call, BuiltinOp: BuiltinID);
4486
4487	case Builtin::BI__c11_atomic_is_lock_free:
4488	return interp__builtin_c11_atomic_is_lock_free(S, OpPC, Frame, Call);
4489
4490	case Builtin::BI__builtin_complex:
4491	return interp__builtin_complex(S, OpPC, Frame, Call);
4492
4493	case Builtin::BI__builtin_is_aligned:
4494	case Builtin::BI__builtin_align_up:
4495	case Builtin::BI__builtin_align_down:
4496	return interp__builtin_is_aligned_up_down(S, OpPC, Frame, Call, BuiltinOp: BuiltinID);
4497
4498	case Builtin::BI__builtin_assume_aligned:
4499	return interp__builtin_assume_aligned(S, OpPC, Frame, Call);
4500
4501	case clang::X86::BI__builtin_ia32_crc32qi:
4502	return interp__builtin_ia32_crc32(S, OpPC, Frame, Call, DataBytes: `1`);
4503	case clang::X86::BI__builtin_ia32_crc32hi:
4504	return interp__builtin_ia32_crc32(S, OpPC, Frame, Call, DataBytes: `2`);
4505	case clang::X86::BI__builtin_ia32_crc32si:
4506	return interp__builtin_ia32_crc32(S, OpPC, Frame, Call, DataBytes: `4`);
4507	case clang::X86::BI__builtin_ia32_crc32di:
4508	return interp__builtin_ia32_crc32(S, OpPC, Frame, Call, DataBytes: `8`);
4509
4510	case clang::X86::BI__builtin_ia32_bextr_u32:
4511	case clang::X86::BI__builtin_ia32_bextr_u64:
4512	case clang::X86::BI__builtin_ia32_bextri_u32:
4513	case clang::X86::BI__builtin_ia32_bextri_u64:
4514	return interp__builtin_elementwise_int_binop(
4515	S, OpPC, Call, Fn: [](const APSInt &Val, const APSInt &Idx) {
4516	unsigned BitWidth = Val.getBitWidth();
4517	uint64_t Shift = Idx.extractBitsAsZExtValue(numBits: `8`, bitPosition: `0`);
4518	uint64_t Length = Idx.extractBitsAsZExtValue(numBits: `8`, bitPosition: `8`);
4519	if (Length > BitWidth) {
4520	Length = BitWidth;
4521	}
4522
4523	// Handle out of bounds cases.
4524	if (Length == `0` \|\| Shift >= BitWidth)
4525	return APInt (BitWidth, `0`);
4526
4527	uint64_t Result = Val.getZExtValue() >> Shift;
4528	Result &= llvm::maskTrailingOnes<uint64_t>(N: Length);
4529	return APInt (BitWidth, Result);
4530	});
4531
4532	case clang::X86::BI__builtin_ia32_bzhi_si:
4533	case clang::X86::BI__builtin_ia32_bzhi_di:
4534	return interp__builtin_elementwise_int_binop(
4535	S, OpPC, Call, Fn: [](const APSInt &Val, const APSInt &Idx) {
4536	unsigned BitWidth = Val.getBitWidth();
4537	uint64_t Index = Idx.extractBitsAsZExtValue(numBits: `8`, bitPosition: `0`);
4538	APSInt Result = Val;
4539
4540	if (Index < BitWidth)
4541	Result.clearHighBits(hiBits: BitWidth - Index);
4542
4543	return Result;
4544	});
4545
4546	case clang::X86::BI__builtin_ia32_ktestcqi:
4547	case clang::X86::BI__builtin_ia32_ktestchi:
4548	case clang::X86::BI__builtin_ia32_ktestcsi:
4549	case clang::X86::BI__builtin_ia32_ktestcdi:
4550	return interp__builtin_elementwise_int_binop(
4551	S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
4552	return APInt (sizeof(unsigned char) * `8`, (~A & B) == `0`);
4553	});
4554
4555	case clang::X86::BI__builtin_ia32_ktestzqi:
4556	case clang::X86::BI__builtin_ia32_ktestzhi:
4557	case clang::X86::BI__builtin_ia32_ktestzsi:
4558	case clang::X86::BI__builtin_ia32_ktestzdi:
4559	return interp__builtin_elementwise_int_binop(
4560	S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
4561	return APInt (sizeof(unsigned char) * `8`, (A & B) == `0`);
4562	});
4563
4564	case clang::X86::BI__builtin_ia32_kortestcqi:
4565	case clang::X86::BI__builtin_ia32_kortestchi:
4566	case clang::X86::BI__builtin_ia32_kortestcsi:
4567	case clang::X86::BI__builtin_ia32_kortestcdi:
4568	return interp__builtin_elementwise_int_binop(
4569	S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
4570	return APInt (sizeof(unsigned char) * `8`, ~(A \| B) == `0`);
4571	});
4572
4573	case clang::X86::BI__builtin_ia32_kortestzqi:
4574	case clang::X86::BI__builtin_ia32_kortestzhi:
4575	case clang::X86::BI__builtin_ia32_kortestzsi:
4576	case clang::X86::BI__builtin_ia32_kortestzdi:
4577	return interp__builtin_elementwise_int_binop(
4578	S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
4579	return APInt (sizeof(unsigned char) * `8`, (A \| B) == `0`);
4580	});
4581
4582	case clang::X86::BI__builtin_ia32_kshiftliqi:
4583	case clang::X86::BI__builtin_ia32_kshiftlihi:
4584	case clang::X86::BI__builtin_ia32_kshiftlisi:
4585	case clang::X86::BI__builtin_ia32_kshiftlidi:
4586	return interp__builtin_elementwise_int_binop(
4587	S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4588	unsigned Amt = RHS.getZExtValue() & `0xFF`;
4589	if (Amt >= LHS.getBitWidth())
4590	return APInt::getZero(numBits: LHS.getBitWidth());
4591	return LHS.shl(shiftAmt: Amt);
4592	});
4593
4594	case clang::X86::BI__builtin_ia32_kshiftriqi:
4595	case clang::X86::BI__builtin_ia32_kshiftrihi:
4596	case clang::X86::BI__builtin_ia32_kshiftrisi:
4597	case clang::X86::BI__builtin_ia32_kshiftridi:
4598	return interp__builtin_elementwise_int_binop(
4599	S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4600	unsigned Amt = RHS.getZExtValue() & `0xFF`;
4601	if (Amt >= LHS.getBitWidth())
4602	return APInt::getZero(numBits: LHS.getBitWidth());
4603	return LHS.lshr(shiftAmt: Amt);
4604	});
4605
4606	case clang::X86::BI__builtin_ia32_lzcnt_u16:
4607	case clang::X86::BI__builtin_ia32_lzcnt_u32:
4608	case clang::X86::BI__builtin_ia32_lzcnt_u64:
4609	return interp__builtin_elementwise_int_unaryop(
4610	S, OpPC, Call, Fn: [](const APSInt &Src) {
4611	return APInt (Src.getBitWidth(), Src.countLeadingZeros());
4612	});
4613
4614	case clang::X86::BI__builtin_ia32_tzcnt_u16:
4615	case clang::X86::BI__builtin_ia32_tzcnt_u32:
4616	case clang::X86::BI__builtin_ia32_tzcnt_u64:
4617	return interp__builtin_elementwise_int_unaryop(
4618	S, OpPC, Call, Fn: [](const APSInt &Src) {
4619	return APInt (Src.getBitWidth(), Src.countTrailingZeros());
4620	});
4621
4622	case clang::X86::BI__builtin_ia32_pdep_si:
4623	case clang::X86::BI__builtin_ia32_pdep_di:
4624	return interp__builtin_elementwise_int_binop(
4625	S, OpPC, Call, Fn: [](const APSInt &Val, const APSInt &Mask) {
4626	unsigned BitWidth = Val.getBitWidth();
4627	APInt Result = APInt::getZero(numBits: BitWidth);
4628
4629	for (unsigned I = `0`, P = `0`; I != BitWidth; ++I) {
4630	if (Mask [I])
4631	Result.setBitVal(BitPosition: I, BitValue: Val [P++]);
4632	}
4633
4634	return Result;
4635	});
4636
4637	case clang::X86::BI__builtin_ia32_pext_si:
4638	case clang::X86::BI__builtin_ia32_pext_di:
4639	return interp__builtin_elementwise_int_binop(
4640	S, OpPC, Call, Fn: [](const APSInt &Val, const APSInt &Mask) {
4641	unsigned BitWidth = Val.getBitWidth();
4642	APInt Result = APInt::getZero(numBits: BitWidth);
4643
4644	for (unsigned I = `0`, P = `0`; I != BitWidth; ++I) {
4645	if (Mask [I])
4646	Result.setBitVal(BitPosition: P++, BitValue: Val [I]);
4647	}
4648
4649	return Result;
4650	});
4651
4652	case clang::X86::BI__builtin_ia32_addcarryx_u32:
4653	case clang::X86::BI__builtin_ia32_addcarryx_u64:
4654	case clang::X86::BI__builtin_ia32_subborrow_u32:
4655	case clang::X86::BI__builtin_ia32_subborrow_u64:
4656	return interp__builtin_ia32_addcarry_subborrow(S, OpPC, Frame, Call,
4657	BuiltinOp: BuiltinID);
4658
4659	case Builtin::BI__builtin_os_log_format_buffer_size:
4660	return interp__builtin_os_log_format_buffer_size(S, OpPC, Frame, Call);
4661
4662	case Builtin::BI__builtin_ptrauth_string_discriminator:
4663	return interp__builtin_ptrauth_string_discriminator(S, OpPC, Frame, Call);
4664
4665	case Builtin::BI__builtin_infer_alloc_token:
4666	return interp__builtin_infer_alloc_token(S, OpPC, Frame, Call);
4667
4668	case Builtin::BI__noop:
4669	pushInteger(S, Val: `0`, QT: Call->getType());
4670	return true;
4671
4672	case Builtin::BI__builtin_operator_new:
4673	return interp__builtin_operator_new(S, OpPC, Frame, Call);
4674
4675	case Builtin::BI__builtin_operator_delete:
4676	return interp__builtin_operator_delete(S, OpPC, Frame, Call);
4677
4678	case Builtin::BI__arithmetic_fence:
4679	return interp__builtin_arithmetic_fence(S, OpPC, Frame, Call);
4680
4681	case Builtin::BI__builtin_reduce_add:
4682	case Builtin::BI__builtin_reduce_mul:
4683	case Builtin::BI__builtin_reduce_and:
4684	case Builtin::BI__builtin_reduce_or:
4685	case Builtin::BI__builtin_reduce_xor:
4686	case Builtin::BI__builtin_reduce_min:
4687	case Builtin::BI__builtin_reduce_max:
4688	return interp__builtin_vector_reduce(S, OpPC, Call, ID: BuiltinID);
4689
4690	case Builtin::BI__builtin_elementwise_popcount:
4691	return interp__builtin_elementwise_int_unaryop(
4692	S, OpPC, Call, Fn: [](const APSInt &Src) {
4693	return APInt (Src.getBitWidth(), Src.popcount());
4694	});
4695	case Builtin::BI__builtin_elementwise_bitreverse:
4696	return interp__builtin_elementwise_int_unaryop(
4697	S, OpPC, Call, Fn: [](const APSInt &Src) { return Src.reverseBits(); });
4698
4699	case Builtin::BI__builtin_elementwise_abs:
4700	return interp__builtin_elementwise_abs(S, OpPC, Frame, Call, BuiltinID);
4701
4702	case Builtin::BI__builtin_memcpy:
4703	case Builtin::BImemcpy:
4704	case Builtin::BI__builtin_wmemcpy:
4705	case Builtin::BIwmemcpy:
4706	case Builtin::BI__builtin_memmove:
4707	case Builtin::BImemmove:
4708	case Builtin::BI__builtin_wmemmove:
4709	case Builtin::BIwmemmove:
4710	return interp__builtin_memcpy(S, OpPC, Frame, Call, ID: BuiltinID);
4711
4712	case Builtin::BI__builtin_memcmp:
4713	case Builtin::BImemcmp:
4714	case Builtin::BI__builtin_bcmp:
4715	case Builtin::BIbcmp:
4716	case Builtin::BI__builtin_wmemcmp:
4717	case Builtin::BIwmemcmp:
4718	return interp__builtin_memcmp(S, OpPC, Frame, Call, ID: BuiltinID);
4719
4720	case Builtin::BImemchr:
4721	case Builtin::BI__builtin_memchr:
4722	case Builtin::BIstrchr:
4723	case Builtin::BI__builtin_strchr:
4724	case Builtin::BIwmemchr:
4725	case Builtin::BI__builtin_wmemchr:
4726	case Builtin::BIwcschr:
4727	case Builtin::BI__builtin_wcschr:
4728	case Builtin::BI__builtin_char_memchr:
4729	return interp__builtin_memchr(S, OpPC, Call, ID: BuiltinID);
4730
4731	case Builtin::BI__builtin_object_size:
4732	case Builtin::BI__builtin_dynamic_object_size:
4733	return interp__builtin_object_size(S, OpPC, Frame, Call);
4734
4735	case Builtin::BI__builtin_is_within_lifetime:
4736	return interp__builtin_is_within_lifetime(S, OpPC, Call);
4737
4738	case Builtin::BI__builtin_elementwise_add_sat:
4739	return interp__builtin_elementwise_int_binop(
4740	S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4741	return LHS.isSigned() ? LHS.sadd_sat(RHS) : LHS.uadd_sat(RHS);
4742	});
4743
4744	case Builtin::BI__builtin_elementwise_sub_sat:
4745	return interp__builtin_elementwise_int_binop(
4746	S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4747	return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
4748	});
4749	case X86::BI__builtin_ia32_extract128i256:
4750	case X86::BI__builtin_ia32_vextractf128_pd256:
4751	case X86::BI__builtin_ia32_vextractf128_ps256:
4752	case X86::BI__builtin_ia32_vextractf128_si256:
4753	return interp__builtin_x86_extract_vector(S, OpPC, Call, ID: BuiltinID);
4754
4755	case X86::BI__builtin_ia32_extractf32x4_256_mask:
4756	case X86::BI__builtin_ia32_extractf32x4_mask:
4757	case X86::BI__builtin_ia32_extractf32x8_mask:
4758	case X86::BI__builtin_ia32_extractf64x2_256_mask:
4759	case X86::BI__builtin_ia32_extractf64x2_512_mask:
4760	case X86::BI__builtin_ia32_extractf64x4_mask:
4761	case X86::BI__builtin_ia32_extracti32x4_256_mask:
4762	case X86::BI__builtin_ia32_extracti32x4_mask:
4763	case X86::BI__builtin_ia32_extracti32x8_mask:
4764	case X86::BI__builtin_ia32_extracti64x2_256_mask:
4765	case X86::BI__builtin_ia32_extracti64x2_512_mask:
4766	case X86::BI__builtin_ia32_extracti64x4_mask:
4767	return interp__builtin_x86_extract_vector_masked(S, OpPC, Call, ID: BuiltinID);
4768
4769	case clang::X86::BI__builtin_ia32_pmulhrsw128:
4770	case clang::X86::BI__builtin_ia32_pmulhrsw256:
4771	case clang::X86::BI__builtin_ia32_pmulhrsw512:
4772	return interp__builtin_elementwise_int_binop(
4773	S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4774	return (llvm::APIntOps::mulsExtended(C1: LHS, C2: RHS).ashr(ShiftAmt: `14`) + `1`)
4775	.extractBits(numBits: `16`, bitPosition: `1`);
4776	});
4777
4778	case clang::X86::BI__builtin_ia32_movmskps:
4779	case clang::X86::BI__builtin_ia32_movmskpd:
4780	case clang::X86::BI__builtin_ia32_pmovmskb128:
4781	case clang::X86::BI__builtin_ia32_pmovmskb256:
4782	case clang::X86::BI__builtin_ia32_movmskps256:
4783	case clang::X86::BI__builtin_ia32_movmskpd256: {
4784	return interp__builtin_ia32_movmsk_op(S, OpPC, Call);
4785	}
4786
4787	case X86::BI__builtin_ia32_psignb128:
4788	case X86::BI__builtin_ia32_psignb256:
4789	case X86::BI__builtin_ia32_psignw128:
4790	case X86::BI__builtin_ia32_psignw256:
4791	case X86::BI__builtin_ia32_psignd128:
4792	case X86::BI__builtin_ia32_psignd256:
4793	return interp__builtin_elementwise_int_binop(
4794	S, OpPC, Call, Fn: [](const APInt &AElem, const APInt &BElem) {
4795	if (BElem.isZero())
4796	return APInt::getZero(numBits: AElem.getBitWidth());
4797	if (BElem.isNegative())
4798	return -AElem;
4799	return AElem;
4800	});
4801
4802	case clang::X86::BI__builtin_ia32_pavgb128:
4803	case clang::X86::BI__builtin_ia32_pavgw128:
4804	case clang::X86::BI__builtin_ia32_pavgb256:
4805	case clang::X86::BI__builtin_ia32_pavgw256:
4806	case clang::X86::BI__builtin_ia32_pavgb512:
4807	case clang::X86::BI__builtin_ia32_pavgw512:
4808	return interp__builtin_elementwise_int_binop(S, OpPC, Call,
4809	Fn: llvm::APIntOps::avgCeilU);
4810
4811	case clang::X86::BI__builtin_ia32_pmaddubsw128:
4812	case clang::X86::BI__builtin_ia32_pmaddubsw256:
4813	case clang::X86::BI__builtin_ia32_pmaddubsw512:
4814	return interp__builtin_ia32_pmul(
4815	S, OpPC, Call,
4816	Fn: [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
4817	const APSInt &HiRHS) {
4818	unsigned BitWidth = `2` * LoLHS.getBitWidth();
4819	return (LoLHS.zext(width: BitWidth) * LoRHS.sext(width: BitWidth))
4820	.sadd_sat(RHS: (HiLHS.zext(width: BitWidth) * HiRHS.sext(width: BitWidth)));
4821	});
4822
4823	case clang::X86::BI__builtin_ia32_pmaddwd128:
4824	case clang::X86::BI__builtin_ia32_pmaddwd256:
4825	case clang::X86::BI__builtin_ia32_pmaddwd512:
4826	return interp__builtin_ia32_pmul(
4827	S, OpPC, Call,
4828	Fn: [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
4829	const APSInt &HiRHS) {
4830	unsigned BitWidth = `2` * LoLHS.getBitWidth();
4831	return (LoLHS.sext(width: BitWidth) * LoRHS.sext(width: BitWidth)) +
4832	(HiLHS.sext(width: BitWidth) * HiRHS.sext(width: BitWidth));
4833	});
4834
4835	case clang::X86::BI__builtin_ia32_pmulhuw128:
4836	case clang::X86::BI__builtin_ia32_pmulhuw256:
4837	case clang::X86::BI__builtin_ia32_pmulhuw512:
4838	return interp__builtin_elementwise_int_binop(S, OpPC, Call,
4839	Fn: llvm::APIntOps::mulhu);
4840
4841	case clang::X86::BI__builtin_ia32_pmulhw128:
4842	case clang::X86::BI__builtin_ia32_pmulhw256:
4843	case clang::X86::BI__builtin_ia32_pmulhw512:
4844	return interp__builtin_elementwise_int_binop(S, OpPC, Call,
4845	Fn: llvm::APIntOps::mulhs);
4846
4847	case clang::X86::BI__builtin_ia32_psllv2di:
4848	case clang::X86::BI__builtin_ia32_psllv4di:
4849	case clang::X86::BI__builtin_ia32_psllv4si:
4850	case clang::X86::BI__builtin_ia32_psllv8di:
4851	case clang::X86::BI__builtin_ia32_psllv8hi:
4852	case clang::X86::BI__builtin_ia32_psllv8si:
4853	case clang::X86::BI__builtin_ia32_psllv16hi:
4854	case clang::X86::BI__builtin_ia32_psllv16si:
4855	case clang::X86::BI__builtin_ia32_psllv32hi:
4856	case clang::X86::BI__builtin_ia32_psllwi128:
4857	case clang::X86::BI__builtin_ia32_psllwi256:
4858	case clang::X86::BI__builtin_ia32_psllwi512:
4859	case clang::X86::BI__builtin_ia32_pslldi128:
4860	case clang::X86::BI__builtin_ia32_pslldi256:
4861	case clang::X86::BI__builtin_ia32_pslldi512:
4862	case clang::X86::BI__builtin_ia32_psllqi128:
4863	case clang::X86::BI__builtin_ia32_psllqi256:
4864	case clang::X86::BI__builtin_ia32_psllqi512:
4865	return interp__builtin_elementwise_int_binop(
4866	S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4867	if (RHS.uge(RHS: LHS.getBitWidth())) {
4868	return APInt::getZero(numBits: LHS.getBitWidth());
4869	}
4870	return LHS.shl(shiftAmt: RHS.getZExtValue());
4871	});
4872
4873	case clang::X86::BI__builtin_ia32_psrav4si:
4874	case clang::X86::BI__builtin_ia32_psrav8di:
4875	case clang::X86::BI__builtin_ia32_psrav8hi:
4876	case clang::X86::BI__builtin_ia32_psrav8si:
4877	case clang::X86::BI__builtin_ia32_psrav16hi:
4878	case clang::X86::BI__builtin_ia32_psrav16si:
4879	case clang::X86::BI__builtin_ia32_psrav32hi:
4880	case clang::X86::BI__builtin_ia32_psravq128:
4881	case clang::X86::BI__builtin_ia32_psravq256:
4882	case clang::X86::BI__builtin_ia32_psrawi128:
4883	case clang::X86::BI__builtin_ia32_psrawi256:
4884	case clang::X86::BI__builtin_ia32_psrawi512:
4885	case clang::X86::BI__builtin_ia32_psradi128:
4886	case clang::X86::BI__builtin_ia32_psradi256:
4887	case clang::X86::BI__builtin_ia32_psradi512:
4888	case clang::X86::BI__builtin_ia32_psraqi128:
4889	case clang::X86::BI__builtin_ia32_psraqi256:
4890	case clang::X86::BI__builtin_ia32_psraqi512:
4891	return interp__builtin_elementwise_int_binop(
4892	S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4893	if (RHS.uge(RHS: LHS.getBitWidth())) {
4894	return LHS.ashr(ShiftAmt: LHS.getBitWidth() - `1`);
4895	}
4896	return LHS.ashr(ShiftAmt: RHS.getZExtValue());
4897	});
4898
4899	case clang::X86::BI__builtin_ia32_psrlv2di:
4900	case clang::X86::BI__builtin_ia32_psrlv4di:
4901	case clang::X86::BI__builtin_ia32_psrlv4si:
4902	case clang::X86::BI__builtin_ia32_psrlv8di:
4903	case clang::X86::BI__builtin_ia32_psrlv8hi:
4904	case clang::X86::BI__builtin_ia32_psrlv8si:
4905	case clang::X86::BI__builtin_ia32_psrlv16hi:
4906	case clang::X86::BI__builtin_ia32_psrlv16si:
4907	case clang::X86::BI__builtin_ia32_psrlv32hi:
4908	case clang::X86::BI__builtin_ia32_psrlwi128:
4909	case clang::X86::BI__builtin_ia32_psrlwi256:
4910	case clang::X86::BI__builtin_ia32_psrlwi512:
4911	case clang::X86::BI__builtin_ia32_psrldi128:
4912	case clang::X86::BI__builtin_ia32_psrldi256:
4913	case clang::X86::BI__builtin_ia32_psrldi512:
4914	case clang::X86::BI__builtin_ia32_psrlqi128:
4915	case clang::X86::BI__builtin_ia32_psrlqi256:
4916	case clang::X86::BI__builtin_ia32_psrlqi512:
4917	return interp__builtin_elementwise_int_binop(
4918	S, OpPC, Call, Fn: [](const APSInt &LHS, const APSInt &RHS) {
4919	if (RHS.uge(RHS: LHS.getBitWidth())) {
4920	return APInt::getZero(numBits: LHS.getBitWidth());
4921	}
4922	return LHS.lshr(shiftAmt: RHS.getZExtValue());
4923	});
4924	case clang::X86::BI__builtin_ia32_packsswb128:
4925	case clang::X86::BI__builtin_ia32_packsswb256:
4926	case clang::X86::BI__builtin_ia32_packsswb512:
4927	case clang::X86::BI__builtin_ia32_packssdw128:
4928	case clang::X86::BI__builtin_ia32_packssdw256:
4929	case clang::X86::BI__builtin_ia32_packssdw512:
4930	return interp__builtin_x86_pack(S, OpPC, E: Call, PackFn: [](const APSInt &Src) {
4931	return APInt (Src).truncSSat(width: Src.getBitWidth() / `2`);
4932	});
4933	case clang::X86::BI__builtin_ia32_packusdw128:
4934	case clang::X86::BI__builtin_ia32_packusdw256:
4935	case clang::X86::BI__builtin_ia32_packusdw512:
4936	case clang::X86::BI__builtin_ia32_packuswb128:
4937	case clang::X86::BI__builtin_ia32_packuswb256:
4938	case clang::X86::BI__builtin_ia32_packuswb512:
4939	return interp__builtin_x86_pack(S, OpPC, E: Call, PackFn: [](const APSInt &Src) {
4940	return APInt (Src).truncSSatU(width: Src.getBitWidth() / `2`);
4941	});
4942
4943	case clang::X86::BI__builtin_ia32_selectss_128:
4944	case clang::X86::BI__builtin_ia32_selectsd_128:
4945	case clang::X86::BI__builtin_ia32_selectsh_128:
4946	case clang::X86::BI__builtin_ia32_selectsbf_128:
4947	return interp__builtin_select_scalar(S, Call);
4948	case clang::X86::BI__builtin_ia32_vprotbi:
4949	case clang::X86::BI__builtin_ia32_vprotdi:
4950	case clang::X86::BI__builtin_ia32_vprotqi:
4951	case clang::X86::BI__builtin_ia32_vprotwi:
4952	case clang::X86::BI__builtin_ia32_prold128:
4953	case clang::X86::BI__builtin_ia32_prold256:
4954	case clang::X86::BI__builtin_ia32_prold512:
4955	case clang::X86::BI__builtin_ia32_prolq128:
4956	case clang::X86::BI__builtin_ia32_prolq256:
4957	case clang::X86::BI__builtin_ia32_prolq512:
4958	return interp__builtin_elementwise_int_binop(
4959	S, OpPC, Call,
4960	Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS.rotl(rotateAmt: RHS); });
4961
4962	case clang::X86::BI__builtin_ia32_prord128:
4963	case clang::X86::BI__builtin_ia32_prord256:
4964	case clang::X86::BI__builtin_ia32_prord512:
4965	case clang::X86::BI__builtin_ia32_prorq128:
4966	case clang::X86::BI__builtin_ia32_prorq256:
4967	case clang::X86::BI__builtin_ia32_prorq512:
4968	return interp__builtin_elementwise_int_binop(
4969	S, OpPC, Call,
4970	Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS.rotr(rotateAmt: RHS); });
4971
4972	case Builtin::BI__builtin_elementwise_max:
4973	case Builtin::BI__builtin_elementwise_min:
4974	return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID);
4975
4976	case clang::X86::BI__builtin_ia32_phaddw128:
4977	case clang::X86::BI__builtin_ia32_phaddw256:
4978	case clang::X86::BI__builtin_ia32_phaddd128:
4979	case clang::X86::BI__builtin_ia32_phaddd256:
4980	return interp_builtin_horizontal_int_binop(
4981	S, OpPC, Call,
4982	Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
4983	case clang::X86::BI__builtin_ia32_phaddsw128:
4984	case clang::X86::BI__builtin_ia32_phaddsw256:
4985	return interp_builtin_horizontal_int_binop(
4986	S, OpPC, Call,
4987	Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS.sadd_sat(RHS); });
4988	case clang::X86::BI__builtin_ia32_phsubw128:
4989	case clang::X86::BI__builtin_ia32_phsubw256:
4990	case clang::X86::BI__builtin_ia32_phsubd128:
4991	case clang::X86::BI__builtin_ia32_phsubd256:
4992	return interp_builtin_horizontal_int_binop(
4993	S, OpPC, Call,
4994	Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS - RHS; });
4995	case clang::X86::BI__builtin_ia32_phsubsw128:
4996	case clang::X86::BI__builtin_ia32_phsubsw256:
4997	return interp_builtin_horizontal_int_binop(
4998	S, OpPC, Call,
4999	Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS.ssub_sat(RHS); });
5000	case clang::X86::BI__builtin_ia32_haddpd:
5001	case clang::X86::BI__builtin_ia32_haddps:
5002	case clang::X86::BI__builtin_ia32_haddpd256:
5003	case clang::X86::BI__builtin_ia32_haddps256:
5004	return interp_builtin_horizontal_fp_binop(
5005	S, OpPC, Call,
5006	Fn: [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) {
5007	APFloat F = LHS;
5008	F.add(RHS, RM);
5009	return F;
5010	});
5011	case clang::X86::BI__builtin_ia32_hsubpd:
5012	case clang::X86::BI__builtin_ia32_hsubps:
5013	case clang::X86::BI__builtin_ia32_hsubpd256:
5014	case clang::X86::BI__builtin_ia32_hsubps256:
5015	return interp_builtin_horizontal_fp_binop(
5016	S, OpPC, Call,
5017	Fn: [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) {
5018	APFloat F = LHS;
5019	F.subtract(RHS, RM);
5020	return F;
5021	});
5022	case clang::X86::BI__builtin_ia32_addsubpd:
5023	case clang::X86::BI__builtin_ia32_addsubps:
5024	case clang::X86::BI__builtin_ia32_addsubpd256:
5025	case clang::X86::BI__builtin_ia32_addsubps256:
5026	return interp__builtin_ia32_addsub(S, OpPC, Call);
5027
5028	case clang::X86::BI__builtin_ia32_pmuldq128:
5029	case clang::X86::BI__builtin_ia32_pmuldq256:
5030	case clang::X86::BI__builtin_ia32_pmuldq512:
5031	return interp__builtin_ia32_pmul(
5032	S, OpPC, Call,
5033	Fn: [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
5034	const APSInt &HiRHS) {
5035	return llvm::APIntOps::mulsExtended(C1: LoLHS, C2: LoRHS);
5036	});
5037
5038	case clang::X86::BI__builtin_ia32_pmuludq128:
5039	case clang::X86::BI__builtin_ia32_pmuludq256:
5040	case clang::X86::BI__builtin_ia32_pmuludq512:
5041	return interp__builtin_ia32_pmul(
5042	S, OpPC, Call,
5043	Fn: [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
5044	const APSInt &HiRHS) {
5045	return llvm::APIntOps::muluExtended(C1: LoLHS, C2: LoRHS);
5046	});
5047
5048	case clang::X86::BI__builtin_ia32_pclmulqdq128:
5049	case clang::X86::BI__builtin_ia32_pclmulqdq256:
5050	case clang::X86::BI__builtin_ia32_pclmulqdq512:
5051	return interp__builtin_ia32_pclmulqdq(S, OpPC, Call);
5052
5053	case Builtin::BI__builtin_elementwise_fma:
5054	return interp__builtin_elementwise_triop_fp(
5055	S, OpPC, Call,
5056	Fn: [](const APFloat &X, const APFloat &Y, const APFloat &Z,
5057	llvm::RoundingMode RM) {
5058	APFloat F = X;
5059	F.fusedMultiplyAdd(Multiplicand: Y, Addend: Z, RM);
5060	return F;
5061	});
5062
5063	case X86::BI__builtin_ia32_vpmadd52luq128:
5064	case X86::BI__builtin_ia32_vpmadd52luq256:
5065	case X86::BI__builtin_ia32_vpmadd52luq512:
5066	return interp__builtin_elementwise_triop(
5067	S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B, const APSInt &C) {
5068	return A + (B.trunc(width: `52`) * C.trunc(width: `52`)).zext(width: `64`);
5069	});
5070	case X86::BI__builtin_ia32_vpmadd52huq128:
5071	case X86::BI__builtin_ia32_vpmadd52huq256:
5072	case X86::BI__builtin_ia32_vpmadd52huq512:
5073	return interp__builtin_elementwise_triop(
5074	S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B, const APSInt &C) {
5075	return A + llvm::APIntOps::mulhu(C1: B.trunc(width: `52`), C2: C.trunc(width: `52`)).zext(width: `64`);
5076	});
5077
5078	case X86::BI__builtin_ia32_vpshldd128:
5079	case X86::BI__builtin_ia32_vpshldd256:
5080	case X86::BI__builtin_ia32_vpshldd512:
5081	case X86::BI__builtin_ia32_vpshldq128:
5082	case X86::BI__builtin_ia32_vpshldq256:
5083	case X86::BI__builtin_ia32_vpshldq512:
5084	case X86::BI__builtin_ia32_vpshldw128:
5085	case X86::BI__builtin_ia32_vpshldw256:
5086	case X86::BI__builtin_ia32_vpshldw512:
5087	return interp__builtin_elementwise_triop(
5088	S, OpPC, Call,
5089	Fn: [](const APSInt &Hi, const APSInt &Lo, const APSInt &Amt) {
5090	return llvm::APIntOps::fshl(Hi, Lo, Shift: Amt);
5091	});
5092
5093	case X86::BI__builtin_ia32_vpshrdd128:
5094	case X86::BI__builtin_ia32_vpshrdd256:
5095	case X86::BI__builtin_ia32_vpshrdd512:
5096	case X86::BI__builtin_ia32_vpshrdq128:
5097	case X86::BI__builtin_ia32_vpshrdq256:
5098	case X86::BI__builtin_ia32_vpshrdq512:
5099	case X86::BI__builtin_ia32_vpshrdw128:
5100	case X86::BI__builtin_ia32_vpshrdw256:
5101	case X86::BI__builtin_ia32_vpshrdw512:
5102	// NOTE: Reversed Hi/Lo operands.
5103	return interp__builtin_elementwise_triop(
5104	S, OpPC, Call,
5105	Fn: [](const APSInt &Lo, const APSInt &Hi, const APSInt &Amt) {
5106	return llvm::APIntOps::fshr(Hi, Lo, Shift: Amt);
5107	});
5108	case X86::BI__builtin_ia32_vpconflictsi_128:
5109	case X86::BI__builtin_ia32_vpconflictsi_256:
5110	case X86::BI__builtin_ia32_vpconflictsi_512:
5111	case X86::BI__builtin_ia32_vpconflictdi_128:
5112	case X86::BI__builtin_ia32_vpconflictdi_256:
5113	case X86::BI__builtin_ia32_vpconflictdi_512:
5114	return interp__builtin_ia32_vpconflict(S, OpPC, Call);
5115	case clang::X86::BI__builtin_ia32_blendpd:
5116	case clang::X86::BI__builtin_ia32_blendpd256:
5117	case clang::X86::BI__builtin_ia32_blendps:
5118	case clang::X86::BI__builtin_ia32_blendps256:
5119	case clang::X86::BI__builtin_ia32_pblendw128:
5120	case clang::X86::BI__builtin_ia32_pblendw256:
5121	case clang::X86::BI__builtin_ia32_pblendd128:
5122	case clang::X86::BI__builtin_ia32_pblendd256:
5123	return interp__builtin_ia32_shuffle_generic(
5124	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5125	// Bit index for mask.
5126	unsigned MaskBit = (ShuffleMask >> (DstIdx % `8`)) & `0x1`;
5127	unsigned SrcVecIdx = MaskBit ? `1` : `0`; // 1 = TrueVec, 0 = FalseVec
5128	return std::pair<unsigned, int>{SrcVecIdx, static_cast<int>(DstIdx)};
5129	});
5130
5131
5132
5133	case clang::X86::BI__builtin_ia32_blendvpd:
5134	case clang::X86::BI__builtin_ia32_blendvpd256:
5135	case clang::X86::BI__builtin_ia32_blendvps:
5136	case clang::X86::BI__builtin_ia32_blendvps256:
5137	return interp__builtin_elementwise_triop_fp(
5138	S, OpPC, Call,
5139	Fn: [](const APFloat &F, const APFloat &T, const APFloat &C,
5140	llvm::RoundingMode) { return C.isNegative() ? T : F; });
5141
5142	case clang::X86::BI__builtin_ia32_pblendvb128:
5143	case clang::X86::BI__builtin_ia32_pblendvb256:
5144	return interp__builtin_elementwise_triop(
5145	S, OpPC, Call, Fn: [](const APSInt &F, const APSInt &T, const APSInt &C) {
5146	return ((APInt)C).isNegative() ? T : F;
5147	});
5148	case X86::BI__builtin_ia32_ptestz128:
5149	case X86::BI__builtin_ia32_ptestz256:
5150	case X86::BI__builtin_ia32_vtestzps:
5151	case X86::BI__builtin_ia32_vtestzps256:
5152	case X86::BI__builtin_ia32_vtestzpd:
5153	case X86::BI__builtin_ia32_vtestzpd256:
5154	return interp__builtin_ia32_test_op(
5155	S, OpPC, Call,
5156	Fn: [](const APInt &A, const APInt &B) { return (A & B) == `0`; });
5157	case X86::BI__builtin_ia32_ptestc128:
5158	case X86::BI__builtin_ia32_ptestc256:
5159	case X86::BI__builtin_ia32_vtestcps:
5160	case X86::BI__builtin_ia32_vtestcps256:
5161	case X86::BI__builtin_ia32_vtestcpd:
5162	case X86::BI__builtin_ia32_vtestcpd256:
5163	return interp__builtin_ia32_test_op(
5164	S, OpPC, Call,
5165	Fn: [](const APInt &A, const APInt &B) { return (~A & B) == `0`; });
5166	case X86::BI__builtin_ia32_ptestnzc128:
5167	case X86::BI__builtin_ia32_ptestnzc256:
5168	case X86::BI__builtin_ia32_vtestnzcps:
5169	case X86::BI__builtin_ia32_vtestnzcps256:
5170	case X86::BI__builtin_ia32_vtestnzcpd:
5171	case X86::BI__builtin_ia32_vtestnzcpd256:
5172	return interp__builtin_ia32_test_op(
5173	S, OpPC, Call, Fn: [](const APInt &A, const APInt &B) {
5174	return ((A & B) != `0`) && ((~A & B) != `0`);
5175	});
5176	case X86::BI__builtin_ia32_selectb_128:
5177	case X86::BI__builtin_ia32_selectb_256:
5178	case X86::BI__builtin_ia32_selectb_512:
5179	case X86::BI__builtin_ia32_selectw_128:
5180	case X86::BI__builtin_ia32_selectw_256:
5181	case X86::BI__builtin_ia32_selectw_512:
5182	case X86::BI__builtin_ia32_selectd_128:
5183	case X86::BI__builtin_ia32_selectd_256:
5184	case X86::BI__builtin_ia32_selectd_512:
5185	case X86::BI__builtin_ia32_selectq_128:
5186	case X86::BI__builtin_ia32_selectq_256:
5187	case X86::BI__builtin_ia32_selectq_512:
5188	case X86::BI__builtin_ia32_selectph_128:
5189	case X86::BI__builtin_ia32_selectph_256:
5190	case X86::BI__builtin_ia32_selectph_512:
5191	case X86::BI__builtin_ia32_selectpbf_128:
5192	case X86::BI__builtin_ia32_selectpbf_256:
5193	case X86::BI__builtin_ia32_selectpbf_512:
5194	case X86::BI__builtin_ia32_selectps_128:
5195	case X86::BI__builtin_ia32_selectps_256:
5196	case X86::BI__builtin_ia32_selectps_512:
5197	case X86::BI__builtin_ia32_selectpd_128:
5198	case X86::BI__builtin_ia32_selectpd_256:
5199	case X86::BI__builtin_ia32_selectpd_512:
5200	return interp__builtin_select(S, OpPC, Call);
5201
5202	case X86::BI__builtin_ia32_shufps:
5203	case X86::BI__builtin_ia32_shufps256:
5204	case X86::BI__builtin_ia32_shufps512:
5205	return interp__builtin_ia32_shuffle_generic(
5206	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5207	unsigned NumElemPerLane = `4`;
5208	unsigned NumSelectableElems = NumElemPerLane / `2`;
5209	unsigned BitsPerElem = `2`;
5210	unsigned IndexMask = `0x3`;
5211	unsigned MaskBits = `8`;
5212	unsigned Lane = DstIdx / NumElemPerLane;
5213	unsigned ElemInLane = DstIdx % NumElemPerLane;
5214	unsigned LaneOffset = Lane * NumElemPerLane;
5215	unsigned SrcIdx = ElemInLane >= NumSelectableElems ? `1` : `0`;
5216	unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
5217	unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
5218	return std::pair<unsigned, int>{SrcIdx,
5219	static_cast<int>(LaneOffset + Index)};
5220	});
5221	case X86::BI__builtin_ia32_shufpd:
5222	case X86::BI__builtin_ia32_shufpd256:
5223	case X86::BI__builtin_ia32_shufpd512:
5224	return interp__builtin_ia32_shuffle_generic(
5225	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5226	unsigned NumElemPerLane = `2`;
5227	unsigned NumSelectableElems = NumElemPerLane / `2`;
5228	unsigned BitsPerElem = `1`;
5229	unsigned IndexMask = `0x1`;
5230	unsigned MaskBits = `8`;
5231	unsigned Lane = DstIdx / NumElemPerLane;
5232	unsigned ElemInLane = DstIdx % NumElemPerLane;
5233	unsigned LaneOffset = Lane * NumElemPerLane;
5234	unsigned SrcIdx = ElemInLane >= NumSelectableElems ? `1` : `0`;
5235	unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
5236	unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
5237	return std::pair<unsigned, int>{SrcIdx,
5238	static_cast<int>(LaneOffset + Index)};
5239	});
5240
5241	case X86::BI__builtin_ia32_vgf2p8affineinvqb_v16qi:
5242	case X86::BI__builtin_ia32_vgf2p8affineinvqb_v32qi:
5243	case X86::BI__builtin_ia32_vgf2p8affineinvqb_v64qi:
5244	return interp_builtin_ia32_gfni_affine(S, OpPC, Call, Inverse: true);
5245	case X86::BI__builtin_ia32_vgf2p8affineqb_v16qi:
5246	case X86::BI__builtin_ia32_vgf2p8affineqb_v32qi:
5247	case X86::BI__builtin_ia32_vgf2p8affineqb_v64qi:
5248	return interp_builtin_ia32_gfni_affine(S, OpPC, Call, Inverse: false);
5249
5250	case X86::BI__builtin_ia32_vgf2p8mulb_v16qi:
5251	case X86::BI__builtin_ia32_vgf2p8mulb_v32qi:
5252	case X86::BI__builtin_ia32_vgf2p8mulb_v64qi:
5253	return interp__builtin_ia32_gfni_mul(S, OpPC, Call);
5254
5255	case X86::BI__builtin_ia32_insertps128:
5256	return interp__builtin_ia32_shuffle_generic(
5257	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned Mask) {
5258	// Bits [3:0]: zero mask - if bit is set, zero this element
5259	if ((Mask & (`1` << DstIdx)) != `0`) {
5260	return std::pair<unsigned, int>{`0`, -`1`};
5261	}
5262	// Bits [7:6]: select element from source vector Y (0-3)
5263	// Bits [5:4]: select destination position (0-3)
5264	unsigned SrcElem = (Mask >> `6`) & `0x3`;
5265	unsigned DstElem = (Mask >> `4`) & `0x3`;
5266	if (DstIdx == DstElem) {
5267	// Insert element from source vector (B) at this position
5268	return std::pair<unsigned, int>{`1`, static_cast<int>(SrcElem)};
5269	} else {
5270	// Copy from destination vector (A)
5271	return std::pair<unsigned, int>{`0`, static_cast<int>(DstIdx)};
5272	}
5273	});
5274	case X86::BI__builtin_ia32_permvarsi256:
5275	case X86::BI__builtin_ia32_permvarsf256:
5276	case X86::BI__builtin_ia32_permvardf512:
5277	case X86::BI__builtin_ia32_permvardi512:
5278	case X86::BI__builtin_ia32_permvarhi128:
5279	return interp__builtin_ia32_shuffle_generic(
5280	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5281	int Offset = ShuffleMask & `0x7`;
5282	return std::pair<unsigned, int>{`0`, Offset};
5283	});
5284	case X86::BI__builtin_ia32_permvarqi128:
5285	case X86::BI__builtin_ia32_permvarhi256:
5286	case X86::BI__builtin_ia32_permvarsi512:
5287	case X86::BI__builtin_ia32_permvarsf512:
5288	return interp__builtin_ia32_shuffle_generic(
5289	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5290	int Offset = ShuffleMask & `0xF`;
5291	return std::pair<unsigned, int>{`0`, Offset};
5292	});
5293	case X86::BI__builtin_ia32_permvardi256:
5294	case X86::BI__builtin_ia32_permvardf256:
5295	return interp__builtin_ia32_shuffle_generic(
5296	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5297	int Offset = ShuffleMask & `0x3`;
5298	return std::pair<unsigned, int>{`0`, Offset};
5299	});
5300	case X86::BI__builtin_ia32_permvarqi256:
5301	case X86::BI__builtin_ia32_permvarhi512:
5302	return interp__builtin_ia32_shuffle_generic(
5303	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5304	int Offset = ShuffleMask & `0x1F`;
5305	return std::pair<unsigned, int>{`0`, Offset};
5306	});
5307	case X86::BI__builtin_ia32_permvarqi512:
5308	return interp__builtin_ia32_shuffle_generic(
5309	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5310	int Offset = ShuffleMask & `0x3F`;
5311	return std::pair<unsigned, int>{`0`, Offset};
5312	});
5313	case X86::BI__builtin_ia32_vpermi2varq128:
5314	case X86::BI__builtin_ia32_vpermi2varpd128:
5315	return interp__builtin_ia32_shuffle_generic(
5316	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5317	int Offset = ShuffleMask & `0x1`;
5318	unsigned SrcIdx = (ShuffleMask >> `1`) & `0x1`;
5319	return std::pair<unsigned, int>{SrcIdx, Offset};
5320	});
5321	case X86::BI__builtin_ia32_vpermi2vard128:
5322	case X86::BI__builtin_ia32_vpermi2varps128:
5323	case X86::BI__builtin_ia32_vpermi2varq256:
5324	case X86::BI__builtin_ia32_vpermi2varpd256:
5325	return interp__builtin_ia32_shuffle_generic(
5326	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5327	int Offset = ShuffleMask & `0x3`;
5328	unsigned SrcIdx = (ShuffleMask >> `2`) & `0x1`;
5329	return std::pair<unsigned, int>{SrcIdx, Offset};
5330	});
5331	case X86::BI__builtin_ia32_vpermi2varhi128:
5332	case X86::BI__builtin_ia32_vpermi2vard256:
5333	case X86::BI__builtin_ia32_vpermi2varps256:
5334	case X86::BI__builtin_ia32_vpermi2varq512:
5335	case X86::BI__builtin_ia32_vpermi2varpd512:
5336	return interp__builtin_ia32_shuffle_generic(
5337	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5338	int Offset = ShuffleMask & `0x7`;
5339	unsigned SrcIdx = (ShuffleMask >> `3`) & `0x1`;
5340	return std::pair<unsigned, int>{SrcIdx, Offset};
5341	});
5342	case X86::BI__builtin_ia32_vpermi2varqi128:
5343	case X86::BI__builtin_ia32_vpermi2varhi256:
5344	case X86::BI__builtin_ia32_vpermi2vard512:
5345	case X86::BI__builtin_ia32_vpermi2varps512:
5346	return interp__builtin_ia32_shuffle_generic(
5347	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5348	int Offset = ShuffleMask & `0xF`;
5349	unsigned SrcIdx = (ShuffleMask >> `4`) & `0x1`;
5350	return std::pair<unsigned, int>{SrcIdx, Offset};
5351	});
5352	case X86::BI__builtin_ia32_vpermi2varqi256:
5353	case X86::BI__builtin_ia32_vpermi2varhi512:
5354	return interp__builtin_ia32_shuffle_generic(
5355	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5356	int Offset = ShuffleMask & `0x1F`;
5357	unsigned SrcIdx = (ShuffleMask >> `5`) & `0x1`;
5358	return std::pair<unsigned, int>{SrcIdx, Offset};
5359	});
5360	case X86::BI__builtin_ia32_vpermi2varqi512:
5361	return interp__builtin_ia32_shuffle_generic(
5362	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5363	int Offset = ShuffleMask & `0x3F`;
5364	unsigned SrcIdx = (ShuffleMask >> `6`) & `0x1`;
5365	return std::pair<unsigned, int>{SrcIdx, Offset};
5366	});
5367	case X86::BI__builtin_ia32_vperm2f128_pd256:
5368	case X86::BI__builtin_ia32_vperm2f128_ps256:
5369	case X86::BI__builtin_ia32_vperm2f128_si256:
5370	case X86::BI__builtin_ia32_permti256: {
5371	unsigned NumElements =
5372	Call->getArg(Arg: `0`)->getType()->castAs<VectorType>()->getNumElements();
5373	unsigned PreservedBitsCnt = NumElements >> `2`;
5374	return interp__builtin_ia32_shuffle_generic(
5375	S, OpPC, Call,
5376	GetSourceIndex: [PreservedBitsCnt](unsigned DstIdx, unsigned ShuffleMask) {
5377	unsigned ControlBitsCnt = DstIdx >> PreservedBitsCnt << `2`;
5378	unsigned ControlBits = ShuffleMask >> ControlBitsCnt;
5379
5380	if (ControlBits & `0b1000`)
5381	return std::make_pair(x: `0u`, y: -`1`);
5382
5383	unsigned SrcVecIdx = (ControlBits & `0b10`) >> `1`;
5384	unsigned PreservedBitsMask = (`1` << PreservedBitsCnt) - `1`;
5385	int SrcIdx = ((ControlBits & `0b1`) << PreservedBitsCnt) \|
5386	(DstIdx & PreservedBitsMask);
5387	return std::make_pair(x&: SrcVecIdx, y&: SrcIdx);
5388	});
5389	}
5390	case X86::BI__builtin_ia32_pshufb128:
5391	case X86::BI__builtin_ia32_pshufb256:
5392	case X86::BI__builtin_ia32_pshufb512:
5393	return interp__builtin_ia32_shuffle_generic(
5394	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5395	uint8_t Ctlb = static_cast<uint8_t>(ShuffleMask);
5396	if (Ctlb & `0x80`)
5397	return std::make_pair(x: `0`, y: -`1`);
5398
5399	unsigned LaneBase = (DstIdx / `16`) * `16`;
5400	unsigned SrcOffset = Ctlb & `0x0F`;
5401	unsigned SrcIdx = LaneBase + SrcOffset;
5402	return std::make_pair(x: `0`, y: static_cast<int>(SrcIdx));
5403	});
5404
5405	case X86::BI__builtin_ia32_pshuflw:
5406	case X86::BI__builtin_ia32_pshuflw256:
5407	case X86::BI__builtin_ia32_pshuflw512:
5408	return interp__builtin_ia32_shuffle_generic(
5409	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5410	unsigned LaneBase = (DstIdx / `8`) * `8`;
5411	unsigned LaneIdx = DstIdx % `8`;
5412	if (LaneIdx < `4`) {
5413	unsigned Sel = (ShuffleMask >> (`2` * LaneIdx)) & `0x3`;
5414	return std::make_pair(x: `0`, y: static_cast<int>(LaneBase + Sel));
5415	}
5416
5417	return std::make_pair(x: `0`, y: static_cast<int>(DstIdx));
5418	});
5419
5420	case X86::BI__builtin_ia32_pshufhw:
5421	case X86::BI__builtin_ia32_pshufhw256:
5422	case X86::BI__builtin_ia32_pshufhw512:
5423	return interp__builtin_ia32_shuffle_generic(
5424	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5425	unsigned LaneBase = (DstIdx / `8`) * `8`;
5426	unsigned LaneIdx = DstIdx % `8`;
5427	if (LaneIdx >= `4`) {
5428	unsigned Sel = (ShuffleMask >> (`2` * (LaneIdx - `4`))) & `0x3`;
5429	return std::make_pair(x: `0`, y: static_cast<int>(LaneBase + `4` + Sel));
5430	}
5431
5432	return std::make_pair(x: `0`, y: static_cast<int>(DstIdx));
5433	});
5434
5435	case X86::BI__builtin_ia32_pshufd:
5436	case X86::BI__builtin_ia32_pshufd256:
5437	case X86::BI__builtin_ia32_pshufd512:
5438	case X86::BI__builtin_ia32_vpermilps:
5439	case X86::BI__builtin_ia32_vpermilps256:
5440	case X86::BI__builtin_ia32_vpermilps512:
5441	return interp__builtin_ia32_shuffle_generic(
5442	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5443	unsigned LaneBase = (DstIdx / `4`) * `4`;
5444	unsigned LaneIdx = DstIdx % `4`;
5445	unsigned Sel = (ShuffleMask >> (`2` * LaneIdx)) & `0x3`;
5446	return std::make_pair(x: `0`, y: static_cast<int>(LaneBase + Sel));
5447	});
5448
5449	case X86::BI__builtin_ia32_vpermilvarpd:
5450	case X86::BI__builtin_ia32_vpermilvarpd256:
5451	case X86::BI__builtin_ia32_vpermilvarpd512:
5452	return interp__builtin_ia32_shuffle_generic(
5453	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5454	unsigned NumElemPerLane = `2`;
5455	unsigned Lane = DstIdx / NumElemPerLane;
5456	unsigned Offset = ShuffleMask & `0b10` ? `1` : `0`;
5457	return std::make_pair(
5458	x: `0`, y: static_cast<int>(Lane * NumElemPerLane + Offset));
5459	});
5460
5461	case X86::BI__builtin_ia32_vpermilvarps:
5462	case X86::BI__builtin_ia32_vpermilvarps256:
5463	case X86::BI__builtin_ia32_vpermilvarps512:
5464	return interp__builtin_ia32_shuffle_generic(
5465	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned ShuffleMask) {
5466	unsigned NumElemPerLane = `4`;
5467	unsigned Lane = DstIdx / NumElemPerLane;
5468	unsigned Offset = ShuffleMask & `0b11`;
5469	return std::make_pair(
5470	x: `0`, y: static_cast<int>(Lane * NumElemPerLane + Offset));
5471	});
5472
5473	case X86::BI__builtin_ia32_vpermilpd:
5474	case X86::BI__builtin_ia32_vpermilpd256:
5475	case X86::BI__builtin_ia32_vpermilpd512:
5476	return interp__builtin_ia32_shuffle_generic(
5477	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned Control) {
5478	unsigned NumElemPerLane = `2`;
5479	unsigned BitsPerElem = `1`;
5480	unsigned MaskBits = `8`;
5481	unsigned IndexMask = `0x1`;
5482	unsigned Lane = DstIdx / NumElemPerLane;
5483	unsigned LaneOffset = Lane * NumElemPerLane;
5484	unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
5485	unsigned Index = (Control >> BitIndex) & IndexMask;
5486	return std::make_pair(x: `0`, y: static_cast<int>(LaneOffset + Index));
5487	});
5488
5489	case X86::BI__builtin_ia32_permdf256:
5490	case X86::BI__builtin_ia32_permdi256:
5491	return interp__builtin_ia32_shuffle_generic(
5492	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned Control) {
5493	// permute4x64 operates on 4 64-bit elements
5494	// For element i (0-3), extract bits [2i+1:2i] from Control
5495	unsigned Index = (Control >> (`2` * DstIdx)) & `0x3`;
5496	return std::make_pair(x: `0`, y: static_cast<int>(Index));
5497	});
5498
5499	case X86::BI__builtin_ia32_vpmultishiftqb128:
5500	case X86::BI__builtin_ia32_vpmultishiftqb256:
5501	case X86::BI__builtin_ia32_vpmultishiftqb512:
5502	return interp__builtin_ia32_multishiftqb(S, OpPC, Call);
5503	case X86::BI__builtin_ia32_kandqi:
5504	case X86::BI__builtin_ia32_kandhi:
5505	case X86::BI__builtin_ia32_kandsi:
5506	case X86::BI__builtin_ia32_kanddi:
5507	return interp__builtin_elementwise_int_binop(
5508	S, OpPC, Call,
5509	Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS & RHS; });
5510
5511	case X86::BI__builtin_ia32_kandnqi:
5512	case X86::BI__builtin_ia32_kandnhi:
5513	case X86::BI__builtin_ia32_kandnsi:
5514	case X86::BI__builtin_ia32_kandndi:
5515	return interp__builtin_elementwise_int_binop(
5516	S, OpPC, Call,
5517	Fn: [](const APSInt &LHS, const APSInt &RHS) { return ~LHS & RHS; });
5518
5519	case X86::BI__builtin_ia32_korqi:
5520	case X86::BI__builtin_ia32_korhi:
5521	case X86::BI__builtin_ia32_korsi:
5522	case X86::BI__builtin_ia32_kordi:
5523	return interp__builtin_elementwise_int_binop(
5524	S, OpPC, Call,
5525	Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS \| RHS; });
5526
5527	case X86::BI__builtin_ia32_kxnorqi:
5528	case X86::BI__builtin_ia32_kxnorhi:
5529	case X86::BI__builtin_ia32_kxnorsi:
5530	case X86::BI__builtin_ia32_kxnordi:
5531	return interp__builtin_elementwise_int_binop(
5532	S, OpPC, Call,
5533	Fn: [](const APSInt &LHS, const APSInt &RHS) { return ~(LHS ^ RHS); });
5534
5535	case X86::BI__builtin_ia32_kxorqi:
5536	case X86::BI__builtin_ia32_kxorhi:
5537	case X86::BI__builtin_ia32_kxorsi:
5538	case X86::BI__builtin_ia32_kxordi:
5539	return interp__builtin_elementwise_int_binop(
5540	S, OpPC, Call,
5541	Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS ^ RHS; });
5542
5543	case X86::BI__builtin_ia32_knotqi:
5544	case X86::BI__builtin_ia32_knothi:
5545	case X86::BI__builtin_ia32_knotsi:
5546	case X86::BI__builtin_ia32_knotdi:
5547	return interp__builtin_elementwise_int_unaryop(
5548	S, OpPC, Call, Fn: [](const APSInt &Src) { return ~Src; });
5549
5550	case X86::BI__builtin_ia32_kaddqi:
5551	case X86::BI__builtin_ia32_kaddhi:
5552	case X86::BI__builtin_ia32_kaddsi:
5553	case X86::BI__builtin_ia32_kadddi:
5554	return interp__builtin_elementwise_int_binop(
5555	S, OpPC, Call,
5556	Fn: [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
5557
5558	case X86::BI__builtin_ia32_kmovb:
5559	case X86::BI__builtin_ia32_kmovw:
5560	case X86::BI__builtin_ia32_kmovd:
5561	case X86::BI__builtin_ia32_kmovq:
5562	return interp__builtin_elementwise_int_unaryop(
5563	S, OpPC, Call, Fn: [](const APSInt &Src) { return Src; });
5564
5565	case X86::BI__builtin_ia32_kunpckhi:
5566	case X86::BI__builtin_ia32_kunpckdi:
5567	case X86::BI__builtin_ia32_kunpcksi:
5568	return interp__builtin_elementwise_int_binop(
5569	S, OpPC, Call, Fn: [](const APSInt &A, const APSInt &B) {
5570	// Generic kunpack: extract lower half of each operand and concatenate
5571	// Result = A[HalfWidth-1:0] concat B[HalfWidth-1:0]
5572	unsigned BW = A.getBitWidth();
5573	return APSInt (A.trunc(width: BW / `2`).concat(NewLSB: B.trunc(width: BW / `2`)),
5574	A.isUnsigned());
5575	});
5576
5577	case X86::BI__builtin_ia32_phminposuw128:
5578	return interp__builtin_ia32_phminposuw(S, OpPC, Call);
5579
5580	case X86::BI__builtin_ia32_psraq128:
5581	case X86::BI__builtin_ia32_psraq256:
5582	case X86::BI__builtin_ia32_psraq512:
5583	case X86::BI__builtin_ia32_psrad128:
5584	case X86::BI__builtin_ia32_psrad256:
5585	case X86::BI__builtin_ia32_psrad512:
5586	case X86::BI__builtin_ia32_psraw128:
5587	case X86::BI__builtin_ia32_psraw256:
5588	case X86::BI__builtin_ia32_psraw512:
5589	return interp__builtin_ia32_shift_with_count(
5590	S, OpPC, Call,
5591	ShiftOp: [](const APInt &Elt, uint64_t Count) { return Elt.ashr(ShiftAmt: Count); },
5592	OverflowOp: [](const APInt &Elt, unsigned Width) { return Elt.ashr(ShiftAmt: Width - `1`); });
5593
5594	case X86::BI__builtin_ia32_psllq128:
5595	case X86::BI__builtin_ia32_psllq256:
5596	case X86::BI__builtin_ia32_psllq512:
5597	case X86::BI__builtin_ia32_pslld128:
5598	case X86::BI__builtin_ia32_pslld256:
5599	case X86::BI__builtin_ia32_pslld512:
5600	case X86::BI__builtin_ia32_psllw128:
5601	case X86::BI__builtin_ia32_psllw256:
5602	case X86::BI__builtin_ia32_psllw512:
5603	return interp__builtin_ia32_shift_with_count(
5604	S, OpPC, Call,
5605	ShiftOp: [](const APInt &Elt, uint64_t Count) { return Elt.shl(shiftAmt: Count); },
5606	OverflowOp: [](const APInt &Elt, unsigned Width) { return APInt::getZero(numBits: Width); });
5607
5608	case X86::BI__builtin_ia32_psrlq128:
5609	case X86::BI__builtin_ia32_psrlq256:
5610	case X86::BI__builtin_ia32_psrlq512:
5611	case X86::BI__builtin_ia32_psrld128:
5612	case X86::BI__builtin_ia32_psrld256:
5613	case X86::BI__builtin_ia32_psrld512:
5614	case X86::BI__builtin_ia32_psrlw128:
5615	case X86::BI__builtin_ia32_psrlw256:
5616	case X86::BI__builtin_ia32_psrlw512:
5617	return interp__builtin_ia32_shift_with_count(
5618	S, OpPC, Call,
5619	ShiftOp: [](const APInt &Elt, uint64_t Count) { return Elt.lshr(shiftAmt: Count); },
5620	OverflowOp: [](const APInt &Elt, unsigned Width) { return APInt::getZero(numBits: Width); });
5621
5622	case X86::BI__builtin_ia32_pternlogd128_mask:
5623	case X86::BI__builtin_ia32_pternlogd256_mask:
5624	case X86::BI__builtin_ia32_pternlogd512_mask:
5625	case X86::BI__builtin_ia32_pternlogq128_mask:
5626	case X86::BI__builtin_ia32_pternlogq256_mask:
5627	case X86::BI__builtin_ia32_pternlogq512_mask:
5628	return interp__builtin_ia32_pternlog(S, OpPC, Call, /MaskZ=/false);
5629	case X86::BI__builtin_ia32_pternlogd128_maskz:
5630	case X86::BI__builtin_ia32_pternlogd256_maskz:
5631	case X86::BI__builtin_ia32_pternlogd512_maskz:
5632	case X86::BI__builtin_ia32_pternlogq128_maskz:
5633	case X86::BI__builtin_ia32_pternlogq256_maskz:
5634	case X86::BI__builtin_ia32_pternlogq512_maskz:
5635	return interp__builtin_ia32_pternlog(S, OpPC, Call, /MaskZ=/true);
5636	case Builtin::BI__builtin_elementwise_fshl:
5637	return interp__builtin_elementwise_triop(S, OpPC, Call,
5638	Fn: llvm::APIntOps::fshl);
5639	case Builtin::BI__builtin_elementwise_fshr:
5640	return interp__builtin_elementwise_triop(S, OpPC, Call,
5641	Fn: llvm::APIntOps::fshr);
5642
5643	case X86::BI__builtin_ia32_shuf_f32x4_256:
5644	case X86::BI__builtin_ia32_shuf_i32x4_256:
5645	case X86::BI__builtin_ia32_shuf_f64x2_256:
5646	case X86::BI__builtin_ia32_shuf_i64x2_256:
5647	case X86::BI__builtin_ia32_shuf_f32x4:
5648	case X86::BI__builtin_ia32_shuf_i32x4:
5649	case X86::BI__builtin_ia32_shuf_f64x2:
5650	case X86::BI__builtin_ia32_shuf_i64x2: {
5651	// Destination and sources A, B all have the same type.
5652	QualType VecQT = Call->getArg(Arg: `0`)->getType();
5653	const auto *VecT = VecQT ->castAs<VectorType>();
5654	unsigned NumElems = VecT->getNumElements();
5655	unsigned ElemBits = S.getASTContext().getTypeSize(T: VecT->getElementType());
5656	unsigned LaneBits = `128u`;
5657	unsigned NumLanes = (NumElems * ElemBits) / LaneBits;
5658	unsigned NumElemsPerLane = LaneBits / ElemBits;
5659
5660	return interp__builtin_ia32_shuffle_generic(
5661	S, OpPC, Call,
5662	GetSourceIndex: [NumLanes, NumElemsPerLane](unsigned DstIdx, unsigned ShuffleMask) {
5663	// DstIdx determines source. ShuffleMask selects lane in source.
5664	unsigned BitsPerElem = NumLanes / `2`;
5665	unsigned IndexMask = (`1u` << BitsPerElem) - `1`;
5666	unsigned Lane = DstIdx / NumElemsPerLane;
5667	unsigned SrcIdx = (Lane < NumLanes / `2`) ? `0` : `1`;
5668	unsigned BitIdx = BitsPerElem * Lane;
5669	unsigned SrcLaneIdx = (ShuffleMask >> BitIdx) & IndexMask;
5670	unsigned ElemInLane = DstIdx % NumElemsPerLane;
5671	unsigned IdxToPick = SrcLaneIdx * NumElemsPerLane + ElemInLane;
5672	return std::pair<unsigned, int>{SrcIdx, IdxToPick};
5673	});
5674	}
5675
5676	case X86::BI__builtin_ia32_insertf32x4_256:
5677	case X86::BI__builtin_ia32_inserti32x4_256:
5678	case X86::BI__builtin_ia32_insertf64x2_256:
5679	case X86::BI__builtin_ia32_inserti64x2_256:
5680	case X86::BI__builtin_ia32_insertf32x4:
5681	case X86::BI__builtin_ia32_inserti32x4:
5682	case X86::BI__builtin_ia32_insertf64x2_512:
5683	case X86::BI__builtin_ia32_inserti64x2_512:
5684	case X86::BI__builtin_ia32_insertf32x8:
5685	case X86::BI__builtin_ia32_inserti32x8:
5686	case X86::BI__builtin_ia32_insertf64x4:
5687	case X86::BI__builtin_ia32_inserti64x4:
5688	case X86::BI__builtin_ia32_vinsertf128_ps256:
5689	case X86::BI__builtin_ia32_vinsertf128_pd256:
5690	case X86::BI__builtin_ia32_vinsertf128_si256:
5691	case X86::BI__builtin_ia32_insert128i256:
5692	return interp__builtin_x86_insert_subvector(S, OpPC, Call, ID: BuiltinID);
5693
5694	case clang::X86::BI__builtin_ia32_vcvtps2ph:
5695	case clang::X86::BI__builtin_ia32_vcvtps2ph256:
5696	return interp__builtin_ia32_vcvtps2ph(S, OpPC, Call);
5697
5698	case X86::BI__builtin_ia32_vec_ext_v4hi:
5699	case X86::BI__builtin_ia32_vec_ext_v16qi:
5700	case X86::BI__builtin_ia32_vec_ext_v8hi:
5701	case X86::BI__builtin_ia32_vec_ext_v4si:
5702	case X86::BI__builtin_ia32_vec_ext_v2di:
5703	case X86::BI__builtin_ia32_vec_ext_v32qi:
5704	case X86::BI__builtin_ia32_vec_ext_v16hi:
5705	case X86::BI__builtin_ia32_vec_ext_v8si:
5706	case X86::BI__builtin_ia32_vec_ext_v4di:
5707	case X86::BI__builtin_ia32_vec_ext_v4sf:
5708	return interp__builtin_vec_ext(S, OpPC, Call, ID: BuiltinID);
5709
5710	case X86::BI__builtin_ia32_vec_set_v4hi:
5711	case X86::BI__builtin_ia32_vec_set_v16qi:
5712	case X86::BI__builtin_ia32_vec_set_v8hi:
5713	case X86::BI__builtin_ia32_vec_set_v4si:
5714	case X86::BI__builtin_ia32_vec_set_v2di:
5715	case X86::BI__builtin_ia32_vec_set_v32qi:
5716	case X86::BI__builtin_ia32_vec_set_v16hi:
5717	case X86::BI__builtin_ia32_vec_set_v8si:
5718	case X86::BI__builtin_ia32_vec_set_v4di:
5719	return interp__builtin_vec_set(S, OpPC, Call, ID: BuiltinID);
5720
5721	case X86::BI__builtin_ia32_cvtb2mask128:
5722	case X86::BI__builtin_ia32_cvtb2mask256:
5723	case X86::BI__builtin_ia32_cvtb2mask512:
5724	case X86::BI__builtin_ia32_cvtw2mask128:
5725	case X86::BI__builtin_ia32_cvtw2mask256:
5726	case X86::BI__builtin_ia32_cvtw2mask512:
5727	case X86::BI__builtin_ia32_cvtd2mask128:
5728	case X86::BI__builtin_ia32_cvtd2mask256:
5729	case X86::BI__builtin_ia32_cvtd2mask512:
5730	case X86::BI__builtin_ia32_cvtq2mask128:
5731	case X86::BI__builtin_ia32_cvtq2mask256:
5732	case X86::BI__builtin_ia32_cvtq2mask512:
5733	return interp__builtin_ia32_cvt_vec2mask(S, OpPC, Call, ID: BuiltinID);
5734
5735	case X86::BI__builtin_ia32_cvtmask2b128:
5736	case X86::BI__builtin_ia32_cvtmask2b256:
5737	case X86::BI__builtin_ia32_cvtmask2b512:
5738	case X86::BI__builtin_ia32_cvtmask2w128:
5739	case X86::BI__builtin_ia32_cvtmask2w256:
5740	case X86::BI__builtin_ia32_cvtmask2w512:
5741	case X86::BI__builtin_ia32_cvtmask2d128:
5742	case X86::BI__builtin_ia32_cvtmask2d256:
5743	case X86::BI__builtin_ia32_cvtmask2d512:
5744	case X86::BI__builtin_ia32_cvtmask2q128:
5745	case X86::BI__builtin_ia32_cvtmask2q256:
5746	case X86::BI__builtin_ia32_cvtmask2q512:
5747	return interp__builtin_ia32_cvt_mask2vec(S, OpPC, Call, ID: BuiltinID);
5748
5749	case X86::BI__builtin_ia32_cvtsd2ss:
5750	return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, HasRoundingMask: false);
5751
5752	case X86::BI__builtin_ia32_cvtsd2ss_round_mask:
5753	return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, HasRoundingMask: true);
5754
5755	case X86::BI__builtin_ia32_cvtpd2ps:
5756	case X86::BI__builtin_ia32_cvtpd2ps256:
5757	return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, IsMasked: false, HasRounding: false);
5758	case X86::BI__builtin_ia32_cvtpd2ps_mask:
5759	return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, IsMasked: true, HasRounding: false);
5760	case X86::BI__builtin_ia32_cvtpd2ps512_mask:
5761	return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, IsMasked: true, HasRounding: true);
5762
5763	case X86::BI__builtin_ia32_cmpb128_mask:
5764	case X86::BI__builtin_ia32_cmpw128_mask:
5765	case X86::BI__builtin_ia32_cmpd128_mask:
5766	case X86::BI__builtin_ia32_cmpq128_mask:
5767	case X86::BI__builtin_ia32_cmpb256_mask:
5768	case X86::BI__builtin_ia32_cmpw256_mask:
5769	case X86::BI__builtin_ia32_cmpd256_mask:
5770	case X86::BI__builtin_ia32_cmpq256_mask:
5771	case X86::BI__builtin_ia32_cmpb512_mask:
5772	case X86::BI__builtin_ia32_cmpw512_mask:
5773	case X86::BI__builtin_ia32_cmpd512_mask:
5774	case X86::BI__builtin_ia32_cmpq512_mask:
5775	return interp__builtin_ia32_cmp_mask(S, OpPC, Call, ID: BuiltinID,
5776	/IsUnsigned=/false);
5777
5778	case X86::BI__builtin_ia32_ucmpb128_mask:
5779	case X86::BI__builtin_ia32_ucmpw128_mask:
5780	case X86::BI__builtin_ia32_ucmpd128_mask:
5781	case X86::BI__builtin_ia32_ucmpq128_mask:
5782	case X86::BI__builtin_ia32_ucmpb256_mask:
5783	case X86::BI__builtin_ia32_ucmpw256_mask:
5784	case X86::BI__builtin_ia32_ucmpd256_mask:
5785	case X86::BI__builtin_ia32_ucmpq256_mask:
5786	case X86::BI__builtin_ia32_ucmpb512_mask:
5787	case X86::BI__builtin_ia32_ucmpw512_mask:
5788	case X86::BI__builtin_ia32_ucmpd512_mask:
5789	case X86::BI__builtin_ia32_ucmpq512_mask:
5790	return interp__builtin_ia32_cmp_mask(S, OpPC, Call, ID: BuiltinID,
5791	/IsUnsigned=/true);
5792
5793	case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
5794	case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
5795	case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
5796	return interp__builtin_ia32_shufbitqmb_mask(S, OpPC, Call);
5797
5798	case X86::BI__builtin_ia32_pslldqi128_byteshift:
5799	case X86::BI__builtin_ia32_pslldqi256_byteshift:
5800	case X86::BI__builtin_ia32_pslldqi512_byteshift:
5801	// These SLLDQ intrinsics always operate on byte elements (8 bits).
5802	// The lane width is hardcoded to 16 to match the SIMD register size,
5803	// but the algorithm processes one byte per iteration,
5804	// so APInt(8, ...) is correct and intentional.
5805	return interp__builtin_ia32_shuffle_generic(
5806	S, OpPC, Call,
5807	GetSourceIndex: [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> {
5808	unsigned LaneBase = (DstIdx / `16`) * `16`;
5809	unsigned LaneIdx = DstIdx % `16`;
5810	if (LaneIdx < Shift)
5811	return std::make_pair(x: `0`, y: -`1`);
5812
5813	return std::make_pair(x: `0`,
5814	y: static_cast<int>(LaneBase + LaneIdx - Shift));
5815	});
5816
5817	case X86::BI__builtin_ia32_psrldqi128_byteshift:
5818	case X86::BI__builtin_ia32_psrldqi256_byteshift:
5819	case X86::BI__builtin_ia32_psrldqi512_byteshift:
5820	// These SRLDQ intrinsics always operate on byte elements (8 bits).
5821	// The lane width is hardcoded to 16 to match the SIMD register size,
5822	// but the algorithm processes one byte per iteration,
5823	// so APInt(8, ...) is correct and intentional.
5824	return interp__builtin_ia32_shuffle_generic(
5825	S, OpPC, Call,
5826	GetSourceIndex: [](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> {
5827	unsigned LaneBase = (DstIdx / `16`) * `16`;
5828	unsigned LaneIdx = DstIdx % `16`;
5829	if (LaneIdx + Shift < `16`)
5830	return std::make_pair(x: `0`,
5831	y: static_cast<int>(LaneBase + LaneIdx + Shift));
5832
5833	return std::make_pair(x: `0`, y: -`1`);
5834	});
5835
5836	case X86::BI__builtin_ia32_palignr128:
5837	case X86::BI__builtin_ia32_palignr256:
5838	case X86::BI__builtin_ia32_palignr512:
5839	return interp__builtin_ia32_shuffle_generic(
5840	S, OpPC, Call, GetSourceIndex: [](unsigned DstIdx, unsigned Shift) {
5841	// Default to -1 → zero-fill this destination element
5842	unsigned VecIdx = `1`;
5843	int ElemIdx = -`1`;
5844
5845	int Lane = DstIdx / `16`;
5846	int Offset = DstIdx % `16`;
5847
5848	// Elements come from VecB first, then VecA after the shift boundary
5849	unsigned ShiftedIdx = Offset + (Shift & `0xFF`);
5850	if (ShiftedIdx < `16`) { // from VecB
5851	ElemIdx = ShiftedIdx + (Lane * `16`);
5852	} else if (ShiftedIdx < `32`) { // from VecA
5853	VecIdx = `0`;
5854	ElemIdx = (ShiftedIdx - `16`) + (Lane * `16`);
5855	}
5856
5857	return std::pair<unsigned, int>{VecIdx, ElemIdx};
5858	});
5859
5860	case X86::BI__builtin_ia32_alignd128:
5861	case X86::BI__builtin_ia32_alignd256:
5862	case X86::BI__builtin_ia32_alignd512:
5863	case X86::BI__builtin_ia32_alignq128:
5864	case X86::BI__builtin_ia32_alignq256:
5865	case X86::BI__builtin_ia32_alignq512: {
5866	unsigned NumElems = Call->getType()->castAs<VectorType>()->getNumElements();
5867	return interp__builtin_ia32_shuffle_generic(
5868	S, OpPC, Call, GetSourceIndex: [NumElems](unsigned DstIdx, unsigned Shift) {
5869	unsigned Imm = Shift & `0xFF`;
5870	unsigned EffectiveShift = Imm & (NumElems - `1`);
5871	unsigned SourcePos = DstIdx + EffectiveShift;
5872	unsigned VecIdx = SourcePos < NumElems ? `1u` : `0u`;
5873	unsigned ElemIdx = SourcePos & (NumElems - `1`);
5874	return std::pair<unsigned, int>{VecIdx, static_cast<int>(ElemIdx)};
5875	});
5876	}
5877
5878	case clang::X86::BI__builtin_ia32_minps:
5879	case clang::X86::BI__builtin_ia32_minpd:
5880	case clang::X86::BI__builtin_ia32_minph128:
5881	case clang::X86::BI__builtin_ia32_minph256:
5882	case clang::X86::BI__builtin_ia32_minps256:
5883	case clang::X86::BI__builtin_ia32_minpd256:
5884	case clang::X86::BI__builtin_ia32_minps512:
5885	case clang::X86::BI__builtin_ia32_minpd512:
5886	case clang::X86::BI__builtin_ia32_minph512:
5887	return interp__builtin_elementwise_fp_binop(
5888	S, OpPC, Call,
5889	Fn: [](const APFloat &A, const APFloat &B,
5890	std::optional<APSInt>) -> std::optional<APFloat> {
5891	if (A.isNaN() \|\| A.isInfinity() \|\| A.isDenormal() \|\| B.isNaN() \|\|
5892	B.isInfinity() \|\| B.isDenormal())
5893	return std::nullopt;
5894	if (A.isZero() && B.isZero())
5895	return B;
5896	return llvm::minimum(A, B);
5897	});
5898
5899	case clang::X86::BI__builtin_ia32_minss:
5900	case clang::X86::BI__builtin_ia32_minsd:
5901	return interp__builtin_elementwise_fp_binop(
5902	S, OpPC, Call,
5903	Fn: [](const APFloat &A, const APFloat &B,
5904	std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
5905	return EvalScalarMinMaxFp(A, B, RoundingMode, /IsMin=/true);
5906	},
5907	/IsScalar=/true);
5908
5909	case clang::X86::BI__builtin_ia32_minsd_round_mask:
5910	case clang::X86::BI__builtin_ia32_minss_round_mask:
5911	case clang::X86::BI__builtin_ia32_minsh_round_mask:
5912	case clang::X86::BI__builtin_ia32_maxsd_round_mask:
5913	case clang::X86::BI__builtin_ia32_maxss_round_mask:
5914	case clang::X86::BI__builtin_ia32_maxsh_round_mask: {
5915	bool IsMin = BuiltinID == clang::X86::BI__builtin_ia32_minsd_round_mask \|\|
5916	BuiltinID == clang::X86::BI__builtin_ia32_minss_round_mask \|\|
5917	BuiltinID == clang::X86::BI__builtin_ia32_minsh_round_mask;
5918	return interp__builtin_scalar_fp_round_mask_binop(
5919	S, OpPC, Call,
5920	Fn: [IsMin](const APFloat &A, const APFloat &B,
5921	std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
5922	return EvalScalarMinMaxFp(A, B, RoundingMode, IsMin);
5923	});
5924	}
5925
5926	case clang::X86::BI__builtin_ia32_maxps:
5927	case clang::X86::BI__builtin_ia32_maxpd:
5928	case clang::X86::BI__builtin_ia32_maxph128:
5929	case clang::X86::BI__builtin_ia32_maxph256:
5930	case clang::X86::BI__builtin_ia32_maxps256:
5931	case clang::X86::BI__builtin_ia32_maxpd256:
5932	case clang::X86::BI__builtin_ia32_maxps512:
5933	case clang::X86::BI__builtin_ia32_maxpd512:
5934	case clang::X86::BI__builtin_ia32_maxph512:
5935	return interp__builtin_elementwise_fp_binop(
5936	S, OpPC, Call,
5937	Fn: [](const APFloat &A, const APFloat &B,
5938	std::optional<APSInt>) -> std::optional<APFloat> {
5939	if (A.isNaN() \|\| A.isInfinity() \|\| A.isDenormal() \|\| B.isNaN() \|\|
5940	B.isInfinity() \|\| B.isDenormal())
5941	return std::nullopt;
5942	if (A.isZero() && B.isZero())
5943	return B;
5944	return llvm::maximum(A, B);
5945	});
5946
5947	case clang::X86::BI__builtin_ia32_maxss:
5948	case clang::X86::BI__builtin_ia32_maxsd:
5949	return interp__builtin_elementwise_fp_binop(
5950	S, OpPC, Call,
5951	Fn: [](const APFloat &A, const APFloat &B,
5952	std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
5953	return EvalScalarMinMaxFp(A, B, RoundingMode, /IsMin=/false);
5954	},
5955	/IsScalar=/true);
5956
5957	default:
5958	S.FFDiag(Loc: S.Current->getLocation(PC: OpPC),
5959	DiagId: diag::note_invalid_subexpr_in_const_expr)
5960	<< S.Current->getRange(PC: OpPC);
5961
5962	return false;
5963	}
5964
5965	llvm_unreachable("Unhandled builtin ID");
5966	}
5967
5968	bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E,
5969	ArrayRef<int64_t> ArrayIndices, int64_t &IntResult) {
5970	S.getASTContext().recordOffsetOfEvaluation(E);
5971	CharUnits Result;
5972	unsigned N = E->getNumComponents();
5973	assert(N > `0`);
5974
5975	unsigned ArrayIndex = `0`;
5976	QualType CurrentType = E->getTypeSourceInfo()->getType();
5977	for (unsigned I = `0`; I != N; ++I) {
5978	const OffsetOfNode &Node = E->getComponent(Idx: I);
5979	switch (Node.getKind()) {
5980	case OffsetOfNode::Field: {
5981	const FieldDecl *MemberDecl = Node.getField();
5982	const auto *RD = CurrentType ->getAsRecordDecl();
5983	if (!RD \|\| RD->isInvalidDecl())
5984	return false;
5985	const ASTRecordLayout &RL = S.getASTContext().getASTRecordLayout(D: RD);
5986	unsigned FieldIndex = MemberDecl->getFieldIndex();
5987	assert(FieldIndex < RL.getFieldCount() && "offsetof field in wrong type");
5988	Result +=
5989	S.getASTContext().toCharUnitsFromBits(BitSize: RL.getFieldOffset(FieldNo: FieldIndex));
5990	CurrentType = MemberDecl->getType().getNonReferenceType();
5991	break;
5992	}
5993	case OffsetOfNode::Array: {
5994	// When generating bytecode, we put all the index expressions as Sint64 on
5995	// the stack.
5996	int64_t Index = ArrayIndices [ArrayIndex];
5997	const ArrayType *AT = S.getASTContext().getAsArrayType(T: CurrentType);
5998	if (!AT)
5999	return false;
6000	CurrentType = AT->getElementType();
6001	CharUnits ElementSize = S.getASTContext().getTypeSizeInChars(T: CurrentType);
6002	Result += Index * ElementSize;
6003	++ArrayIndex;
6004	break;
6005	}
6006	case OffsetOfNode::Base: {
6007	const CXXBaseSpecifier *BaseSpec = Node.getBase();
6008	if (BaseSpec->isVirtual())
6009	return false;
6010
6011	// Find the layout of the class whose base we are looking into.
6012	const auto *RD = CurrentType ->getAsCXXRecordDecl();
6013	if (!RD \|\| RD->isInvalidDecl())
6014	return false;
6015	const ASTRecordLayout &RL = S.getASTContext().getASTRecordLayout(D: RD);
6016
6017	// Find the base class itself.
6018	CurrentType = BaseSpec->getType();
6019	const auto *BaseRD = CurrentType ->getAsCXXRecordDecl();
6020	if (!BaseRD)
6021	return false;
6022
6023	// Add the offset to the base.
6024	Result += RL.getBaseClassOffset(Base: BaseRD);
6025	break;
6026	}
6027	case OffsetOfNode::Identifier:
6028	llvm_unreachable("Dependent OffsetOfExpr?");
6029	}
6030	}
6031
6032	IntResult = Result.getQuantity();
6033
6034	return true;
6035	}
6036
6037	bool SetThreeWayComparisonField(InterpState &S, CodePtr OpPC,
6038	const Pointer &Ptr, const APSInt &IntValue) {
6039
6040	const Record *R = Ptr.getRecord();
6041	assert(R);
6042	assert(R->getNumFields() == `1`);
6043
6044	unsigned FieldOffset = R->getField(I: `0u`)->Offset;
6045	const Pointer &FieldPtr = Ptr.atField(Off: FieldOffset);
6046	PrimType FieldT = *S.getContext().classify(T: FieldPtr.getType());
6047
6048	INT_TYPE_SWITCH(FieldT,
6049	FieldPtr.deref<T>() = T::from(IntValue.getSExtValue()));
6050	FieldPtr.initialize();
6051	return true;
6052	}
6053
6054	static void zeroAll(Pointer &Dest) {
6055	const Descriptor *Desc = Dest.getFieldDesc();
6056
6057	if (Desc->isPrimitive()) {
6058	TYPE_SWITCH(Desc->getPrimType(), {
6059	Dest.deref<T>().~T();
6060	new (&Dest.deref<T>()) T ();
6061	});
6062	return;
6063	}
6064
6065	if (Desc->isRecord()) {
6066	const Record *R = Desc->ElemRecord;
6067	for (const Record::Field &F : R->fields()) {
6068	Pointer FieldPtr = Dest.atField(Off: F.Offset);
6069	zeroAll(Dest&: FieldPtr);
6070	}
6071	return;
6072	}
6073
6074	if (Desc->isPrimitiveArray()) {
6075	for (unsigned I = `0`, N = Desc->getNumElems(); I != N; ++I) {
6076	TYPE_SWITCH(Desc->getPrimType(), {
6077	Dest.deref<T>().~T();
6078	new (&Dest.deref<T>()) T ();
6079	});
6080	}
6081	return;
6082	}
6083
6084	if (Desc->isCompositeArray()) {
6085	for (unsigned I = `0`, N = Desc->getNumElems(); I != N; ++I) {
6086	Pointer ElemPtr = Dest.atIndex(Idx: I).narrow();
6087	zeroAll(Dest&: ElemPtr);
6088	}
6089	return;
6090	}
6091	}
6092
6093	static bool copyComposite(InterpState &S, CodePtr OpPC, const Pointer &Src,
6094	Pointer &Dest, bool Activate);
6095	static bool copyRecord(InterpState &S, CodePtr OpPC, const Pointer &Src,
6096	Pointer &Dest, bool Activate = false) {
6097	[[maybe_unused]] const Descriptor *SrcDesc = Src.getFieldDesc();
6098	const Descriptor *DestDesc = Dest.getFieldDesc();
6099
6100	auto copyField = [&](const Record::Field &F, bool Activate) -> bool {
6101	Pointer DestField = Dest.atField(Off: F.Offset);
6102	if (OptPrimType FT = S.Ctx.classify(T: F.Decl->getType())) {
6103	TYPE_SWITCH(*FT, {
6104	DestField.deref<T>() = Src.atField(F.Offset).deref<T>();
6105	if (Src.atField(F.Offset).isInitialized())
6106	DestField.initialize();
6107	if (Activate)
6108	DestField.activate();
6109	});
6110	return true;
6111	}
6112	// Composite field.
6113	return copyComposite(S, OpPC, Src: Src.atField(Off: F.Offset), Dest&: DestField, Activate);
6114	};
6115
6116	assert(SrcDesc->isRecord());
6117	assert(SrcDesc->ElemRecord == DestDesc->ElemRecord);
6118	const Record *R = DestDesc->ElemRecord;
6119	for (const Record::Field &F : R->fields()) {
6120	if (R->isUnion()) {
6121	// For unions, only copy the active field. Zero all others.
6122	const Pointer &SrcField = Src.atField(Off: F.Offset);
6123	if (SrcField.isActive()) {
6124	if (!copyField (F, /Activate=/true))
6125	return false;
6126	} else {
6127	if (!CheckMutable(S, OpPC, Ptr: Src.atField(Off: F.Offset)))
6128	return false;
6129	Pointer DestField = Dest.atField(Off: F.Offset);
6130	zeroAll(Dest&: DestField);
6131	}
6132	} else {
6133	if (!copyField (F, Activate))
6134	return false;
6135	}
6136	}
6137
6138	for (const Record::Base &B : R->bases()) {
6139	Pointer DestBase = Dest.atField(Off: B.Offset);
6140	if (!copyRecord(S, OpPC, Src: Src.atField(Off: B.Offset), Dest&: DestBase, Activate))
6141	return false;
6142	}
6143
6144	Dest.initialize();
6145	return true;
6146	}
6147
6148	static bool copyComposite(InterpState &S, CodePtr OpPC, const Pointer &Src,
6149	Pointer &Dest, bool Activate = false) {
6150	assert(Src.isLive() && Dest.isLive());
6151
6152	[[maybe_unused]] const Descriptor *SrcDesc = Src.getFieldDesc();
6153	const Descriptor *DestDesc = Dest.getFieldDesc();
6154
6155	assert(!DestDesc->isPrimitive() && !SrcDesc->isPrimitive());
6156
6157	if (DestDesc->isPrimitiveArray()) {
6158	assert(SrcDesc->isPrimitiveArray());
6159	assert(SrcDesc->getNumElems() == DestDesc->getNumElems());
6160	PrimType ET = DestDesc->getPrimType();
6161	for (unsigned I = `0`, N = DestDesc->getNumElems(); I != N; ++I) {
6162	Pointer DestElem = Dest.atIndex(Idx: I);
6163	TYPE_SWITCH(ET, {
6164	DestElem.deref<T>() = Src.elem<T>(I);
6165	DestElem.initialize();
6166	});
6167	}
6168	return true;
6169	}
6170
6171	if (DestDesc->isCompositeArray()) {
6172	assert(SrcDesc->isCompositeArray());
6173	assert(SrcDesc->getNumElems() == DestDesc->getNumElems());
6174	for (unsigned I = `0`, N = DestDesc->getNumElems(); I != N; ++I) {
6175	const Pointer &SrcElem = Src.atIndex(Idx: I).narrow();
6176	Pointer DestElem = Dest.atIndex(Idx: I).narrow();
6177	if (!copyComposite(S, OpPC, Src: SrcElem, Dest&: DestElem, Activate))
6178	return false;
6179	}
6180	return true;
6181	}
6182
6183	if (DestDesc->isRecord())
6184	return copyRecord(S, OpPC, Src, Dest, Activate);
6185	return Invalid(S, OpPC);
6186	}
6187
6188	bool DoMemcpy(InterpState &S, CodePtr OpPC, const Pointer &Src, Pointer &Dest) {
6189	if (!Src.isBlockPointer() \|\| Src.getFieldDesc()->isPrimitive())
6190	return false;
6191	if (!Dest.isBlockPointer() \|\| Dest.getFieldDesc()->isPrimitive())
6192	return false;
6193
6194	return copyComposite(S, OpPC, Src, Dest);
6195	}
6196
6197	} // namespace interp
6198	} // namespace clang
6199

Browse the source code of llvm_projects/clang/lib/AST/ByteCode/InterpBuiltin.cpp