NeonEmitter.cpp source code [llvm_projects/clang/utils/TableGen/NeonEmitter.cpp]

1	//===-- NeonEmitter.cpp - Generate arm_neon.h for use with clang ----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This tablegen backend is responsible for emitting arm_neon.h, which includes
10	// a declaration and definition of each function specified by the ARM NEON
11	// compiler interface. See ARM document DUI0348B.
12	//
13	// Each NEON instruction is implemented in terms of 1 or more functions which
14	// are suffixed with the element type of the input vectors. Functions may be
15	// implemented in terms of generic vector operations such as +, , -, etc. or*
16	// by calling a __builtin_-prefixed function which will be handled by clang's
17	// CodeGen library.
18	//
19	// Additional validation code can be generated by this file when runHeader() is
20	// called, rather than the normal run() entry point.
21	//
22	// See also the documentation in include/clang/Basic/arm_neon.td.
23	//
24	//===----------------------------------------------------------------------===//
25
26	#include "TableGenBackends.h"
27	#include "llvm/ADT/ArrayRef.h"
28	#include "llvm/ADT/DenseMap.h"
29	#include "llvm/ADT/STLExtras.h"
30	#include "llvm/ADT/SmallVector.h"
31	#include "llvm/ADT/StringExtras.h"
32	#include "llvm/ADT/StringRef.h"
33	#include "llvm/Support/Casting.h"
34	#include "llvm/Support/ErrorHandling.h"
35	#include "llvm/Support/raw_ostream.h"
36	#include "llvm/TableGen/AArch64ImmCheck.h"
37	#include "llvm/TableGen/Error.h"
38	#include "llvm/TableGen/Record.h"
39	#include "llvm/TableGen/SetTheory.h"
40	#include "llvm/TableGen/StringToOffsetTable.h"
41	#include <algorithm>
42	#include <cassert>
43	#include <cctype>
44	#include <cstddef>
45	#include <cstdint>
46	#include <deque>
47	#include <map>
48	#include <optional>
49	#include <set>
50	#include <sstream>
51	#include <string>
52	#include <unordered_map>
53	#include <utility>
54	#include <vector>
55
56	using namespace llvm;
57
58	namespace {
59
60	// While globals are generally bad, this one allows us to perform assertions
61	// liberally and somehow still trace them back to the def they indirectly
62	// came from.
63	static const Record CurrentRecord = nullptr*;
64	static void assert_with_loc(bool Assertion, const std::string &Str) {
65	if (!Assertion) {
66	if (CurrentRecord)
67	PrintFatalError(ErrorLoc: CurrentRecord->getLoc(), Msg: Str);
68	else
69	PrintFatalError(Msg: Str);
70	}
71	}
72
73	enum ClassKind {
74	ClassNone,
75	ClassI, // generic integer instruction, e.g., "i8" suffix
76	ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
77	ClassW, // width-specific instruction, e.g., "8" suffix
78	ClassV, // void-suffix instruction, no suffix
79	ClassB, // bitcast arguments with enum argument to specify type
80	ClassL, // Logical instructions which are op instructions
81	// but we need to not emit any suffix for in our
82	// tests.
83	ClassNoTest // Instructions which we do not test since they are
84	// not TRUE instructions.
85	};
86
87	/// NeonTypeFlags - Flags to identify the types for overloaded Neon
88	/// builtins. These must be kept in sync with the flags in
89	/// include/clang/Basic/TargetBuiltins.h.
90	namespace NeonTypeFlags {
91
92	enum { EltTypeMask = `0xf`, UnsignedFlag = `0x10`, QuadFlag = `0x20` };
93
94	enum EltType {
95	Int8,
96	Int16,
97	Int32,
98	Int64,
99	Poly8,
100	Poly16,
101	Poly64,
102	Poly128,
103	Float16,
104	Float32,
105	Float64,
106	BFloat16,
107	MFloat8
108	};
109
110	} // end namespace NeonTypeFlags
111
112	class NeonEmitter;
113
114	//===----------------------------------------------------------------------===//
115	// TypeSpec
116	//===----------------------------------------------------------------------===//
117
118	/// A TypeSpec is just a simple wrapper around a string, but gets its own type
119	/// for strong typing purposes.
120	///
121	/// A TypeSpec can be used to create a type.
122	class TypeSpec : public std::string {
123	public:
124	static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) {
125	std::vector<TypeSpec> Ret;
126	TypeSpec Acc;
127	for (char I : Str.str()) {
128	if (islower(I)) {
129	Acc.push_back(c: I);
130	Ret.push_back(x: TypeSpec (Acc));
131	Acc.clear();
132	} else {
133	Acc.push_back(c: I);
134	}
135	}
136	return Ret;
137	}
138	};
139
140	//===----------------------------------------------------------------------===//
141	// Type
142	//===----------------------------------------------------------------------===//
143
144	/// A Type. Not much more to say here.
145	class Type {
146	private:
147	TypeSpec TS;
148
149	enum TypeKind { Void, Float, SInt, UInt, Poly, BFloat16, MFloat8, FPM };
150	TypeKind Kind;
151	bool Immediate, Constant, Pointer;
152	// ScalarForMangling and NoManglingQ are really not suited to live here as
153	// they are not related to the type. But they live in the TypeSpec (not the
154	// prototype), so this is really the only place to store them.
155	bool ScalarForMangling, NoManglingQ;
156	unsigned Bitwidth, ElementBitwidth, NumVectors;
157
158	public:
159	Type()
160	: Kind(Void), Immediate(false), Constant(false),
161	Pointer(false), ScalarForMangling(false), NoManglingQ(false),
162	Bitwidth(`0`), ElementBitwidth(`0`), NumVectors(`0`) {}
163
164	Type(TypeSpec TS, StringRef CharMods)
165	: TS (std::move(TS)), Kind(Void), Immediate(false),
166	Constant(false), Pointer(false), ScalarForMangling(false),
167	NoManglingQ(false), Bitwidth(`0`), ElementBitwidth(`0`), NumVectors(`0`) {
168	applyModifiers(Mods: CharMods);
169	}
170
171	/// Returns a type representing "void".
172	static Type getVoid() { return Type (); }
173
174	bool operator==(const Type &Other) const { return str() == Other.str(); }
175	bool operator!=(const Type &Other) const { return !operator==(Other); }
176
177	//
178	// Query functions
179	//
180	bool isScalarForMangling() const { return ScalarForMangling; }
181	bool noManglingQ() const { return NoManglingQ; }
182
183	bool isPointer() const { return Pointer; }
184	bool isValue() const { return !isVoid() && !isPointer(); }
185	bool isScalar() const { return isValue() && NumVectors == `0`; }
186	bool isVector() const { return isValue() && NumVectors > `0`; }
187	bool isConstPointer() const { return Constant; }
188	bool isFloating() const { return Kind == Float; }
189	bool isInteger() const { return Kind == SInt \|\| Kind == UInt; }
190	bool isPoly() const { return Kind == Poly; }
191	bool isSigned() const { return Kind == SInt; }
192	bool isImmediate() const { return Immediate; }
193	bool isFloat() const { return isFloating() && ElementBitwidth == `32`; }
194	bool isDouble() const { return isFloating() && ElementBitwidth == `64`; }
195	bool isHalf() const { return isFloating() && ElementBitwidth == `16`; }
196	bool isChar() const { return ElementBitwidth == `8`; }
197	bool isShort() const { return isInteger() && ElementBitwidth == `16`; }
198	bool isInt() const { return isInteger() && ElementBitwidth == `32`; }
199	bool isLong() const { return isInteger() && ElementBitwidth == `64`; }
200	bool isVoid() const { return Kind == Void; }
201	bool isBFloat16() const { return Kind == BFloat16; }
202	bool isMFloat8() const { return Kind == MFloat8; }
203	bool isFPM() const { return Kind == FPM; }
204	unsigned getNumElements() const { return Bitwidth / ElementBitwidth; }
205	unsigned getSizeInBits() const { return Bitwidth; }
206	unsigned getElementSizeInBits() const { return ElementBitwidth; }
207	unsigned getNumVectors() const { return NumVectors; }
208
209	//
210	// Mutator functions
211	//
212	void makeUnsigned() {
213	assert(!isVoid() && "not a potentially signed type");
214	Kind = UInt;
215	}
216	void makeSigned() {
217	assert(!isVoid() && "not a potentially signed type");
218	Kind = SInt;
219	}
220
221	void makeInteger(unsigned ElemWidth, bool Sign) {
222	assert(!isVoid() && "converting void to int probably not useful");
223	Kind = Sign ? SInt : UInt;
224	Immediate = false;
225	ElementBitwidth = ElemWidth;
226	}
227
228	void makeImmediate(unsigned ElemWidth) {
229	Kind = SInt;
230	Immediate = true;
231	ElementBitwidth = ElemWidth;
232	}
233
234	void makeScalar() {
235	Bitwidth = ElementBitwidth;
236	NumVectors = `0`;
237	}
238
239	void makeOneVector() {
240	assert(isVector());
241	NumVectors = `1`;
242	}
243
244	void make32BitElement() {
245	assert_with_loc(Assertion: Bitwidth > `32`, Str: "Not enough bits to make it 32!");
246	ElementBitwidth = `32`;
247	}
248
249	void doubleLanes() {
250	assert_with_loc(Assertion: Bitwidth != `128`, Str: "Can't get bigger than 128!");
251	Bitwidth = `128`;
252	}
253
254	void halveLanes() {
255	assert_with_loc(Assertion: Bitwidth != `64`, Str: "Can't get smaller than 64!");
256	Bitwidth = `64`;
257	}
258
259	/// Return the C string representation of a type, which is the typename
260	/// defined in stdint.h or arm_neon.h.
261	std::string str() const;
262
263	/// Return the string representation of a type, which is an encoded
264	/// string for passing to the BUILTIN() macro in Builtins.def.
265	std::string builtin_str() const;
266
267	/// Return the value in NeonTypeFlags for this type.
268	unsigned getNeonEnum() const;
269
270	/// Parse a type from a stdint.h or arm_neon.h typedef name,
271	/// for example uint32x2_t or int64_t.
272	static Type fromTypedefName(StringRef Name);
273
274	private:
275	/// Creates the type based on the typespec string in TS.
276	/// Sets "Quad" to true if the "Q" or "H" modifiers were
277	/// seen. This is needed by applyModifier as some modifiers
278	/// only take effect if the type size was changed by "Q" or "H".
279	void applyTypespec(bool &Quad);
280	/// Applies prototype modifiers to the type.
281	void applyModifiers(StringRef Mods);
282	};
283
284	//===----------------------------------------------------------------------===//
285	// Variable
286	//===----------------------------------------------------------------------===//
287
288	/// A variable is a simple class that just has a type and a name.
289	class Variable {
290	Type T;
291	std::string N;
292
293	public:
294	Variable() : T(Type::getVoid()) {}
295	Variable(Type T, std::string N) : T (std::move(T)), N (std::move(N)) {}
296
297	Type getType() const { return T; }
298	std::string getName() const { return "__" + N; }
299	};
300
301	//===----------------------------------------------------------------------===//
302	// Intrinsic
303	//===----------------------------------------------------------------------===//
304
305	/// The main grunt class. This represents an instantiation of an intrinsic with
306	/// a particular typespec and prototype.
307	class Intrinsic {
308	/// The Record this intrinsic was created from.
309	const Record *R;
310	/// The unmangled name.
311	std::string Name;
312	/// The input and output typespecs. InTS == OutTS except when
313	/// CartesianProductWith is non-empty - this is the case for vreinterpret.
314	TypeSpec OutTS, InTS;
315	/// The base class kind. Most intrinsics use ClassS, which has full type
316	/// info for integers (s32/u32). Some use ClassI, which doesn't care about
317	/// signedness (i32), while some (ClassB) have no type at all, only a width
318	/// (32).
319	ClassKind CK;
320	/// The list of DAGs for the body. May be empty, in which case we should
321	/// emit a builtin call.
322	const ListInit *Body;
323	/// The architectural ifdef guard.
324	std::string ArchGuard;
325	/// The architectural target() guard.
326	std::string TargetGuard;
327	/// Set if the Unavailable bit is 1. This means we don't generate a body,
328	/// just an "unavailable" attribute on a declaration.
329	bool IsUnavailable;
330	/// Is this intrinsic safe for big-endian? or does it need its arguments
331	/// reversing?
332	bool BigEndianSafe;
333
334	/// The types of return value [0] and parameters [1..].
335	std::vector<Type> Types;
336
337	SmallVector<ImmCheck, `2`> ImmChecks;
338	/// The index of the key type passed to CGBuiltin.cpp for polymorphic calls.
339	int PolymorphicKeyType;
340	/// The local variables defined.
341	std::map<std::string, Variable, std::less<>> Variables;
342	/// NeededEarly - set if any other intrinsic depends on this intrinsic.
343	bool NeededEarly;
344	/// UseMacro - set if we should implement using a macro or unset for a
345	/// function.
346	bool UseMacro;
347	/// The set of intrinsics that this intrinsic uses/requires.
348	std::set<Intrinsic *> Dependencies;
349	/// The "base type", which is Type('d', OutTS). InBaseType is only
350	/// different if CartesianProductWith is non-empty (for vreinterpret).
351	Type BaseType, InBaseType;
352	/// The return variable.
353	Variable RetVar;
354	/// A postfix to apply to every variable. Defaults to "".
355	std::string VariablePostfix;
356
357	NeonEmitter &Emitter;
358	std::stringstream OS;
359
360	bool isBigEndianSafe() const {
361	if (BigEndianSafe)
362	return true;
363
364	for (const auto &T : Types){
365	if (T.isVector() && T.getNumElements() > `1`)
366	return false;
367	}
368	return true;
369	}
370
371	public:
372	Intrinsic(const Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
373	TypeSpec InTS, ClassKind CK, const ListInit *Body,
374	NeonEmitter &Emitter, StringRef ArchGuard, StringRef TargetGuard,
375	bool IsUnavailable, bool BigEndianSafe)
376	: R(R), Name(Name.str()), OutTS (OutTS), InTS (InTS), CK(CK), Body(Body),
377	ArchGuard(ArchGuard.str()), TargetGuard(TargetGuard.str()),
378	IsUnavailable(IsUnavailable), BigEndianSafe(BigEndianSafe),
379	PolymorphicKeyType(`0`), NeededEarly(false), UseMacro(false),
380	BaseType (OutTS, "."), InBaseType (InTS, "."), Emitter(Emitter) {
381	// Modify the TypeSpec per-argument to get a concrete Type, and create
382	// known variables for each.
383	// Types[0] is the return value.
384	unsigned Pos = `0`;
385	Types.emplace_back(args&: OutTS, args: getNextModifiers(Proto, Pos));
386	StringRef Mods = getNextModifiers(Proto, Pos);
387	while (!Mods.empty()) {
388	Types.emplace_back(args&: InTS, args&: Mods);
389	if (Mods.contains(C: `'!'`))
390	PolymorphicKeyType = Types.size() - `1`;
391
392	Mods = getNextModifiers(Proto, Pos);
393	}
394
395	for (const auto &Type : Types) {
396	// If this builtin takes an immediate argument, we need to #define it rather
397	// than use a standard declaration, so that SemaChecking can range check
398	// the immediate passed by the user.
399
400	// Pointer arguments need to use macros to avoid hiding aligned attributes
401	// from the pointer type.
402
403	// It is not permitted to pass or return an __fp16 by value, so intrinsics
404	// taking a scalar float16_t must be implemented as macros.
405	if (Type.isImmediate() \|\| Type.isPointer() \|\|
406	(Type.isScalar() && Type.isHalf()))
407	UseMacro = true;
408	}
409
410	int ArgIdx, Kind, TypeArgIdx;
411	for (const Record *I : R->getValueAsListOfDefs(FieldName: "ImmChecks")) {
412	unsigned EltSizeInBits = `0`, VecSizeInBits = `0`;
413
414	ArgIdx = I->getValueAsInt(FieldName: "ImmArgIdx");
415	TypeArgIdx = I->getValueAsInt(FieldName: "TypeContextArgIdx");
416	Kind = I->getValueAsDef(FieldName: "Kind")->getValueAsInt(FieldName: "Value");
417
418	assert((ArgIdx >= `0` && Kind >= `0`) &&
419	"ImmArgIdx and Kind must be nonnegative");
420
421	if (TypeArgIdx >= `0`) {
422	Type ContextType = getParamType(I: TypeArgIdx);
423
424	// Element size cannot be set for intrinscs that map to polymorphic
425	// builtins.
426	if (CK != ClassB)
427	EltSizeInBits = ContextType.getElementSizeInBits();
428
429	VecSizeInBits = ContextType.getSizeInBits();
430	}
431
432	ImmChecks.emplace_back(Args&: ArgIdx, Args&: Kind, Args&: EltSizeInBits, Args&: VecSizeInBits);
433	}
434	sort(Start: ImmChecks.begin(), End: ImmChecks.end(),
435	Comp: [](const ImmCheck &a, const ImmCheck &b) {
436	return a.getImmArgIdx() < b.getImmArgIdx();
437	}); // Sort for comparison with other intrinsics which map to the
438	// same builtin
439	}
440
441	/// Get the Record that this intrinsic is based off.
442	const Record getRecord() const* { return R; }
443	/// Get the set of Intrinsics that this intrinsic calls.
444	/// this is the set of immediate dependencies, NOT the
445	/// transitive closure.
446	const std::set<Intrinsic > &getDependencies() const* { return Dependencies; }
447	/// Get the architectural guard string (#ifdef).
448	std::string getArchGuard() const { return ArchGuard; }
449	std::string getTargetGuard() const { return TargetGuard; }
450	ArrayRef<ImmCheck> getImmChecks() const { return ImmChecks; }
451	/// Get the non-mangled name.
452	std::string getName() const { return Name; }
453
454	/// Return true if the intrinsic takes an immediate operand.
455	bool hasImmediate() const {
456	return any_of(Range: Types, P: [](const Type &T) { return T.isImmediate(); });
457	}
458
459	// Return if the supplied argument is an immediate
460	bool isArgImmediate(unsigned idx) const {
461	return Types [idx + `1`].isImmediate();
462	}
463
464	unsigned getNumParams() const { return Types.size() - `1`; }
465	Type getReturnType() const { return Types [`0`]; }
466	Type getParamType(unsigned I) const { return Types [I + `1`]; }
467	Type getBaseType() const { return BaseType; }
468	Type getPolymorphicKeyType() const { return Types [PolymorphicKeyType]; }
469
470	/// Return true if the prototype has a scalar argument.
471	bool protoHasScalar() const;
472
473	/// Return the index that parameter PIndex will sit at
474	/// in a generated function call. This is often just PIndex,
475	/// but may not be as things such as multiple-vector operands
476	/// and sret parameters need to be taken into account.
477	unsigned getGeneratedParamIdx(unsigned PIndex) {
478	unsigned Idx = `0`;
479	if (getReturnType().getNumVectors() > `1`)
480	// Multiple vectors are passed as sret.
481	++Idx;
482
483	for (unsigned I = `0`; I < PIndex; ++I)
484	Idx += std::max(a: `1U`, b: getParamType(I).getNumVectors());
485
486	return Idx;
487	}
488
489	bool hasBody() const { return Body && !Body->empty(); }
490
491	void setNeededEarly() { NeededEarly = true; }
492
493	bool operator<(const Intrinsic &Other) const {
494	// Sort lexicographically on a three-tuple (ArchGuard, TargetGuard, Name)
495	return std::tie(args: ArchGuard, args: TargetGuard, args: Name) <
496	std::tie(args: Other.ArchGuard, args: Other.TargetGuard, args: Other.Name);
497	}
498
499	ClassKind getClassKind(bool UseClassBIfScalar = false) {
500	if (UseClassBIfScalar && !protoHasScalar())
501	return ClassB;
502	return CK;
503	}
504
505	/// Return the name, mangled with type information.
506	/// If ForceClassS is true, use ClassS (u32/s32) instead
507	/// of the intrinsic's own type class.
508	std::string getMangledName(bool ForceClassS = false) const;
509	/// Return the type code for a builtin function call.
510	std::string getInstTypeCode(Type T, ClassKind CK) const;
511	/// Return the type string for a BUILTIN() macro in Builtins.def.
512	std::string getBuiltinTypeStr();
513
514	/// Generate the intrinsic, returning code.
515	std::string generate();
516	/// Perform type checking and populate the dependency graph, but
517	/// don't generate code yet.
518	void indexBody();
519
520	private:
521	StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const;
522
523	std::string mangleName(std::string Name, ClassKind CK) const;
524
525	void initVariables();
526	std::string replaceParamsIn(std::string S);
527
528	void emitBodyAsBuiltinCall();
529
530	void generateImpl(bool ReverseArguments,
531	StringRef NamePrefix, StringRef CallPrefix);
532	void emitReturn();
533	void emitBody(StringRef CallPrefix);
534	void emitShadowedArgs();
535	void emitArgumentReversal();
536	void emitReturnVarDecl();
537	void emitReturnReversal();
538	void emitReverseVariable(Variable &Dest, Variable &Src);
539	void emitNewLine();
540	void emitClosingBrace();
541	void emitOpeningBrace();
542	void emitPrototype(StringRef NamePrefix);
543
544	class DagEmitter {
545	Intrinsic &Intr;
546	StringRef CallPrefix;
547
548	public:
549	DagEmitter(Intrinsic &Intr, StringRef CallPrefix) :
550	Intr(Intr), CallPrefix (CallPrefix) {
551	}
552	std::pair<Type, std::string> emitDagArg(const Init *Arg,
553	std::string ArgName);
554	std::pair<Type, std::string> emitDagSaveTemp(const DagInit *DI);
555	std::pair<Type, std::string> emitDagSplat(const DagInit *DI);
556	std::pair<Type, std::string> emitDagDup(const DagInit *DI);
557	std::pair<Type, std::string> emitDagDupTyped(const DagInit *DI);
558	std::pair<Type, std::string> emitDagShuffle(const DagInit *DI);
559	std::pair<Type, std::string> emitDagCast(const DagInit DI, bool* IsBitCast);
560	std::pair<Type, std::string> emitDagCall(const DagInit *DI,
561	bool MatchMangledName);
562	std::pair<Type, std::string> emitDagNameReplace(const DagInit *DI);
563	std::pair<Type, std::string> emitDagLiteral(const DagInit *DI);
564	std::pair<Type, std::string> emitDagOp(const DagInit *DI);
565	std::pair<Type, std::string> emitDag(const DagInit *DI);
566	};
567	};
568
569	//===----------------------------------------------------------------------===//
570	// NeonEmitter
571	//===----------------------------------------------------------------------===//
572
573	class NeonEmitter {
574	const RecordKeeper &Records;
575	DenseMap<const Record *, ClassKind> ClassMap;
576	std::map<std::string, std::deque<Intrinsic>, std::less<>> IntrinsicMap;
577	unsigned UniqueNumber;
578
579	void createIntrinsic(const Record R, SmallVectorImpl<Intrinsic > &Out);
580	void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs);
581	void genStreamingSVECompatibleList(raw_ostream &OS,
582	SmallVectorImpl<Intrinsic *> &Defs);
583	void genOverloadTypeCheckCode(raw_ostream &OS,
584	SmallVectorImpl<Intrinsic *> &Defs);
585	bool areRangeChecksCompatible(const ArrayRef<ImmCheck> ChecksA,
586	const ArrayRef<ImmCheck> ChecksB);
587	void genIntrinsicRangeCheckCode(raw_ostream &OS,
588	SmallVectorImpl<Intrinsic *> &Defs);
589
590	public:
591	/// Called by Intrinsic - this attempts to get an intrinsic that takes
592	/// the given types as arguments.
593	Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types,
594	std::optional<std::string> MangledName);
595
596	/// Called by Intrinsic - returns a globally-unique number.
597	unsigned getUniqueNumber() { return UniqueNumber++; }
598
599	NeonEmitter(const RecordKeeper &R) : Records(R), UniqueNumber(`0`) {
600	const Record *SI = R.getClass(Name: "SInst");
601	const Record *II = R.getClass(Name: "IInst");
602	const Record *WI = R.getClass(Name: "WInst");
603	const Record *VI = R.getClass(Name: "VInst");
604	const Record *SOpI = R.getClass(Name: "SOpInst");
605	const Record *IOpI = R.getClass(Name: "IOpInst");
606	const Record *WOpI = R.getClass(Name: "WOpInst");
607	const Record *LOpI = R.getClass(Name: "LOpInst");
608	const Record *NoTestOpI = R.getClass(Name: "NoTestOpInst");
609
610	ClassMap [SI] = ClassS;
611	ClassMap [II] = ClassI;
612	ClassMap [WI] = ClassW;
613	ClassMap [VI] = ClassV;
614	ClassMap [SOpI] = ClassS;
615	ClassMap [IOpI] = ClassI;
616	ClassMap [WOpI] = ClassW;
617	ClassMap [LOpI] = ClassL;
618	ClassMap [NoTestOpI] = ClassNoTest;
619	}
620
621	// Emit arm_neon.h.inc
622	void run(raw_ostream &o);
623
624	// Emit arm_fp16.h.inc
625	void runFP16(raw_ostream &o);
626
627	// Emit arm_bf16.h.inc
628	void runBF16(raw_ostream &o);
629
630	void runVectorTypes(raw_ostream &o);
631
632	// Emit all the __builtin prototypes used in arm_neon.h, arm_fp16.h and
633	// arm_bf16.h
634	void runHeader(raw_ostream &o);
635	};
636
637	} // end anonymous namespace
638
639	//===----------------------------------------------------------------------===//
640	// Type implementation
641	//===----------------------------------------------------------------------===//
642
643	std::string Type::str() const {
644	if (isVoid())
645	return "void";
646	if (isFPM())
647	return "fpm_t";
648
649	std::string S;
650
651	if (isInteger() && !isSigned())
652	S += "u";
653
654	if (isPoly())
655	S += "poly";
656	else if (isFloating())
657	S += "float";
658	else if (isBFloat16())
659	S += "bfloat";
660	else if (isMFloat8())
661	S += "mfloat";
662	else
663	S += "int";
664
665	S += utostr(X: ElementBitwidth);
666	if (isVector())
667	S += "x" + utostr(X: getNumElements());
668	if (NumVectors > `1`)
669	S += "x" + utostr(X: NumVectors);
670	S += "_t";
671
672	if (Constant)
673	S += " const";
674	if (Pointer)
675	S += " *";
676
677	return S;
678	}
679
680	std::string Type::builtin_str() const {
681	std::string S;
682	if (isVoid())
683	return "v";
684
685	if (isPointer()) {
686	// All pointers are void pointers.
687	S = "v";
688	if (isConstPointer())
689	S += "C";
690	S += "*";
691	return S;
692	} else if (isInteger())
693	switch (ElementBitwidth) {
694	case `8`: S += "c"; break;
695	case `16`: S += "s"; break;
696	case `32`: S += "i"; break;
697	case `64`: S += "Wi"; break;
698	case `128`: S += "LLLi"; break;
699	default: llvm_unreachable("Unhandled case!");
700	}
701	else if (isBFloat16()) {
702	assert(ElementBitwidth == `16` && "BFloat16 can only be 16 bits");
703	S += "y";
704	} else if (isMFloat8()) {
705	assert(ElementBitwidth == `8` && "MFloat8 can only be 8 bits");
706	S += "m";
707	} else if (isFPM()) {
708	S += "UWi";
709	} else
710	switch (ElementBitwidth) {
711	case `16`: S += "h"; break;
712	case `32`: S += "f"; break;
713	case `64`: S += "d"; break;
714	default: llvm_unreachable("Unhandled case!");
715	}
716
717	// FIXME: NECESSARY???????????????????????????????????????????????????????????????????????
718	if (isChar() && !isPointer() && isSigned())
719	// Make chars explicitly signed.
720	S = "S" + S;
721	else if (isInteger() && !isSigned())
722	S = "U" + S;
723
724	// Constant indices are "int", but have the "constant expression" modifier.
725	if (isImmediate()) {
726	assert(isInteger() && isSigned());
727	S = "I" + S;
728	}
729
730	if (isScalar())
731	return S;
732
733	std::string Ret;
734	for (unsigned I = `0`; I < NumVectors; ++I)
735	Ret += "V" + utostr(X: getNumElements()) + S;
736
737	return Ret;
738	}
739
740	unsigned Type::getNeonEnum() const {
741	unsigned Addend;
742	switch (ElementBitwidth) {
743	case `8`: Addend = `0`; break;
744	case `16`: Addend = `1`; break;
745	case `32`: Addend = `2`; break;
746	case `64`: Addend = `3`; break;
747	case `128`: Addend = `4`; break;
748	default: llvm_unreachable("Unhandled element bitwidth!");
749	}
750
751	unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend;
752	if (isPoly()) {
753	// Adjustment needed because Poly32 doesn't exist.
754	if (Addend >= `2`)
755	--Addend;
756	Base = (unsigned)NeonTypeFlags::Poly8 + Addend;
757	}
758	if (isFloating()) {
759	assert(Addend != `0` && "Float8 doesn't exist!");
760	Base = (unsigned)NeonTypeFlags::Float16 + (Addend - `1`);
761	}
762
763	if (isBFloat16()) {
764	assert(Addend == `1` && "BFloat16 is only 16 bit");
765	Base = (unsigned)NeonTypeFlags::BFloat16;
766	}
767
768	if (isMFloat8()) {
769	Base = (unsigned)NeonTypeFlags::MFloat8;
770	}
771
772	if (Bitwidth == `128`)
773	Base \|= (unsigned)NeonTypeFlags::QuadFlag;
774	if (isInteger() && !isSigned())
775	Base \|= (unsigned)NeonTypeFlags::UnsignedFlag;
776
777	return Base;
778	}
779
780	Type Type::fromTypedefName(StringRef Name) {
781	Type T;
782	T.Kind = SInt;
783
784	if (Name.consume_front(Prefix: "u"))
785	T.Kind = UInt;
786
787	if (Name.consume_front(Prefix: "float")) {
788	T.Kind = Float;
789	} else if (Name.consume_front(Prefix: "poly")) {
790	T.Kind = Poly;
791	} else if (Name.consume_front(Prefix: "bfloat")) {
792	T.Kind = BFloat16;
793	} else if (Name.consume_front(Prefix: "mfloat")) {
794	T.Kind = MFloat8;
795	} else {
796	assert(Name.starts_with("int"));
797	Name = Name.drop_front(N: `3`);
798	}
799
800	unsigned I = `0`;
801	for (I = `0`; I < Name.size(); ++I) {
802	if (!isdigit(Name [I]))
803	break;
804	}
805	Name.substr(Start: `0`, N: I).getAsInteger(Radix: `10`, Result&: T.ElementBitwidth);
806	Name = Name.drop_front(N: I);
807
808	T.Bitwidth = T.ElementBitwidth;
809	T.NumVectors = `1`;
810
811	if (Name.consume_front(Prefix: "x")) {
812	unsigned I = `0`;
813	for (I = `0`; I < Name.size(); ++I) {
814	if (!isdigit(Name [I]))
815	break;
816	}
817	unsigned NumLanes;
818	Name.substr(Start: `0`, N: I).getAsInteger(Radix: `10`, Result&: NumLanes);
819	Name = Name.drop_front(N: I);
820	T.Bitwidth = T.ElementBitwidth * NumLanes;
821	} else {
822	// Was scalar.
823	T.NumVectors = `0`;
824	}
825	if (Name.consume_front(Prefix: "x")) {
826	unsigned I = `0`;
827	for (I = `0`; I < Name.size(); ++I) {
828	if (!isdigit(Name [I]))
829	break;
830	}
831	Name.substr(Start: `0`, N: I).getAsInteger(Radix: `10`, Result&: T.NumVectors);
832	Name = Name.drop_front(N: I);
833	}
834
835	assert(Name.starts_with("_t") && "Malformed typedef!");
836	return T;
837	}
838
839	void Type::applyTypespec(bool &Quad) {
840	std::string S = TS;
841	ScalarForMangling = false;
842	Kind = SInt;
843	ElementBitwidth = ~`0U`;
844	NumVectors = `1`;
845
846	for (char I : S) {
847	switch (I) {
848	case `'S'`:
849	ScalarForMangling = true;
850	break;
851	case `'H'`:
852	NoManglingQ = true;
853	Quad = true;
854	break;
855	case `'Q'`:
856	Quad = true;
857	break;
858	case `'P'`:
859	Kind = Poly;
860	break;
861	case `'U'`:
862	Kind = UInt;
863	break;
864	case `'c'`:
865	ElementBitwidth = `8`;
866	break;
867	case `'h'`:
868	Kind = Float;
869	[[fallthrough]];
870	case `'s'`:
871	ElementBitwidth = `16`;
872	break;
873	case `'f'`:
874	Kind = Float;
875	[[fallthrough]];
876	case `'i'`:
877	ElementBitwidth = `32`;
878	break;
879	case `'d'`:
880	Kind = Float;
881	[[fallthrough]];
882	case `'l'`:
883	ElementBitwidth = `64`;
884	break;
885	case `'k'`:
886	ElementBitwidth = `128`;
887	// Poly doesn't have a 128x1 type.
888	if (isPoly())
889	NumVectors = `0`;
890	break;
891	case `'b'`:
892	Kind = BFloat16;
893	ElementBitwidth = `16`;
894	break;
895	case `'m'`:
896	Kind = MFloat8;
897	ElementBitwidth = `8`;
898	break;
899	default:
900	llvm_unreachable("Unhandled type code!");
901	}
902	}
903	assert(ElementBitwidth != ~`0U` && "Bad element bitwidth!");
904
905	Bitwidth = Quad ? `128` : `64`;
906	}
907
908	void Type::applyModifiers(StringRef Mods) {
909	bool AppliedQuad = false;
910	applyTypespec(Quad&: AppliedQuad);
911
912	for (char Mod : Mods) {
913	switch (Mod) {
914	case `'.'`:
915	break;
916	case `'v'`:
917	Kind = Void;
918	break;
919	case `'S'`:
920	Kind = SInt;
921	break;
922	case `'U'`:
923	Kind = UInt;
924	break;
925	case `'B'`:
926	Kind = BFloat16;
927	ElementBitwidth = `16`;
928	break;
929	case `'F'`:
930	Kind = Float;
931	break;
932	case `'P'`:
933	Kind = Poly;
934	break;
935	case `'V'`:
936	Kind = FPM;
937	Bitwidth = ElementBitwidth = `64`;
938	NumVectors = `0`;
939	Immediate = Constant = Pointer = false;
940	ScalarForMangling = NoManglingQ = true;
941	break;
942	case `'>'`:
943	assert(ElementBitwidth < `128`);
944	ElementBitwidth *= `2`;
945	break;
946	case `'<'`:
947	assert(ElementBitwidth > `8`);
948	ElementBitwidth /= `2`;
949	break;
950	case `'1'`:
951	NumVectors = `0`;
952	break;
953	case `'2'`:
954	NumVectors = `2`;
955	break;
956	case `'3'`:
957	NumVectors = `3`;
958	break;
959	case `'4'`:
960	NumVectors = `4`;
961	break;
962	case `'*'`:
963	Pointer = true;
964	break;
965	case `'c'`:
966	Constant = true;
967	break;
968	case `'Q'`:
969	Bitwidth = `128`;
970	break;
971	case `'q'`:
972	Bitwidth = `64`;
973	break;
974	case `'I'`:
975	Kind = SInt;
976	ElementBitwidth = Bitwidth = `32`;
977	NumVectors = `0`;
978	Immediate = true;
979	break;
980	case `'p'`:
981	if (isPoly())
982	Kind = UInt;
983	break;
984	case `'!'`:
985	// Key type, handled elsewhere.
986	break;
987	default:
988	llvm_unreachable("Unhandled character!");
989	}
990	}
991	}
992
993	//===----------------------------------------------------------------------===//
994	// Intrinsic implementation
995	//===----------------------------------------------------------------------===//
996
997	StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const {
998	if (Proto.size() == Pos)
999	return StringRef ();
1000	else if (Proto [Pos] != `'('`)
1001	return Proto.substr(Start: Pos++, N: `1`);
1002
1003	size_t Start = Pos + `1`;
1004	size_t End = Proto.find(C: `')'`, From: Start);
1005	assert_with_loc(Assertion: End != StringRef::npos, Str: "unmatched modifier group paren");
1006	Pos = End + `1`;
1007	return Proto.slice(Start, End);
1008	}
1009
1010	std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
1011	char typeCode = `'\0'`;
1012	bool printNumber = true;
1013
1014	if (CK == ClassB && TargetGuard == "neon")
1015	return "";
1016
1017	if (this->CK == ClassV)
1018	return "";
1019
1020	if (T.isBFloat16())
1021	return "bf16";
1022
1023	if (T.isMFloat8())
1024	return "mf8";
1025
1026	if (T.isPoly())
1027	typeCode = `'p'`;
1028	else if (T.isInteger())
1029	typeCode = T.isSigned() ? `'s'` : `'u'`;
1030	else
1031	typeCode = `'f'`;
1032
1033	if (CK == ClassI) {
1034	switch (typeCode) {
1035	default:
1036	break;
1037	case `'s'`:
1038	case `'u'`:
1039	case `'p'`:
1040	typeCode = `'i'`;
1041	break;
1042	}
1043	}
1044	if (CK == ClassB && TargetGuard == "neon") {
1045	typeCode = `'\0'`;
1046	}
1047
1048	std::string S;
1049	if (typeCode != `'\0'`)
1050	S.push_back(c: typeCode);
1051	if (printNumber)
1052	S += utostr(X: T.getElementSizeInBits());
1053
1054	return S;
1055	}
1056
1057	std::string Intrinsic::getBuiltinTypeStr() {
1058	ClassKind LocalCK = getClassKind(UseClassBIfScalar: true);
1059	std::string S;
1060
1061	Type RetT = getReturnType();
1062	if ((LocalCK == ClassI \|\| LocalCK == ClassW) && RetT.isScalar() &&
1063	!RetT.isFloating() && !RetT.isBFloat16() && !RetT.isMFloat8())
1064	RetT.makeInteger(ElemWidth: RetT.getElementSizeInBits(), Sign: false);
1065
1066	// Since the return value must be one type, return a vector type of the
1067	// appropriate width which we will bitcast. An exception is made for
1068	// returning structs of 2, 3, or 4 vectors which are returned in a sret-like
1069	// fashion, storing them to a pointer arg.
1070	if (RetT.getNumVectors() > `1`) {
1071	S += "vv"; // void result with void* first argument*
1072	} else {
1073	if (RetT.isPoly())
1074	RetT.makeInteger(ElemWidth: RetT.getElementSizeInBits(), Sign: false);
1075	if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned())
1076	RetT.makeSigned();
1077
1078	if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar())
1079	// Cast to vector of 8-bit elements.
1080	RetT.makeInteger(ElemWidth: `8`, Sign: true);
1081
1082	S += RetT.builtin_str();
1083	}
1084
1085	for (unsigned I = `0`; I < getNumParams(); ++I) {
1086	Type T = getParamType(I);
1087	if (T.isPoly())
1088	T.makeInteger(ElemWidth: T.getElementSizeInBits(), Sign: false);
1089
1090	if (LocalCK == ClassB && !T.isScalar())
1091	T.makeInteger(ElemWidth: `8`, Sign: true);
1092	// Halves always get converted to 8-bit elements.
1093	if (T.isHalf() && T.isVector() && !T.isScalarForMangling())
1094	T.makeInteger(ElemWidth: `8`, Sign: true);
1095
1096	if (LocalCK == ClassI && T.isInteger())
1097	T.makeSigned();
1098
1099	if (isArgImmediate(idx: I))
1100	T.makeImmediate(ElemWidth: `32`);
1101
1102	S += T.builtin_str();
1103	}
1104
1105	// Extra constant integer to hold type class enum for this function, e.g. s8
1106	if (LocalCK == ClassB)
1107	S += "i";
1108
1109	return S;
1110	}
1111
1112	std::string Intrinsic::getMangledName(bool ForceClassS) const {
1113	// Check if the prototype has a scalar operand with the type of the vector
1114	// elements. If not, bitcasting the args will take care of arg checking.
1115	// The actual signedness etc. will be taken care of with special enums.
1116	ClassKind LocalCK = CK;
1117	if (!protoHasScalar())
1118	LocalCK = ClassB;
1119
1120	return mangleName(Name, CK: ForceClassS ? ClassS : LocalCK);
1121	}
1122
1123	std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const {
1124	std::string typeCode = getInstTypeCode(T: BaseType, CK: LocalCK);
1125	std::string S = Name;
1126
1127	if (Name == "vcvt_f16_f32" \|\| Name == "vcvt_f32_f16" \|\|
1128	Name == "vcvt_f32_f64" \|\| Name == "vcvt_f64_f32" \|\|
1129	Name == "vcvt_f32_bf16")
1130	return Name;
1131
1132	if (!typeCode.empty()) {
1133	// If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN.
1134	if (Name.size() >= `3` && isdigit(Name.back()) &&
1135	Name [Name.length() - `2`] == `'x'` && Name [Name.length() - `3`] == `'_'`)
1136	S.insert(pos1: S.length() - `3`, str: "_" + typeCode);
1137	else
1138	S += "_" + typeCode;
1139	}
1140
1141	if (BaseType != InBaseType) {
1142	// A reinterpret - out the input base type at the end.
1143	S += "_" + getInstTypeCode(T: InBaseType, CK: LocalCK);
1144	}
1145
1146	if (LocalCK == ClassB && TargetGuard == "neon")
1147	S += "_v";
1148
1149	// Insert a 'q' before the first '_' character so that it ends up before
1150	// _lane or _n on vector-scalar operations.
1151	if (BaseType.getSizeInBits() == `128` && !BaseType.noManglingQ()) {
1152	size_t Pos = S.find(c: `'_'`);
1153	S.insert(pos: Pos, s: "q");
1154	}
1155
1156	char Suffix = `'\0'`;
1157	if (BaseType.isScalarForMangling()) {
1158	switch (BaseType.getElementSizeInBits()) {
1159	case `8`: Suffix = `'b'`; break;
1160	case `16`: Suffix = `'h'`; break;
1161	case `32`: Suffix = `'s'`; break;
1162	case `64`: Suffix = `'d'`; break;
1163	default: llvm_unreachable("Bad suffix!");
1164	}
1165	}
1166	if (Suffix != `'\0'`) {
1167	size_t Pos = S.find(c: `'_'`);
1168	S.insert(pos: Pos, s: &Suffix, n: `1`);
1169	}
1170
1171	return S;
1172	}
1173
1174	std::string Intrinsic::replaceParamsIn(std::string S) {
1175	while (S.find(c: `'$'`) != std::string::npos) {
1176	size_t Pos = S.find(c: `'$'`);
1177	size_t End = Pos + `1`;
1178	while (isalpha(S [End]))
1179	++End;
1180
1181	std::string VarName = S.substr(pos: Pos + `1`, n: End - Pos - `1`);
1182	assert_with_loc(Assertion: Variables.find(x: VarName) != Variables.end(),
1183	Str: "Variable not defined!");
1184	S.replace(pos: Pos, n: End - Pos, str: Variables.find(x: VarName)->second.getName());
1185	}
1186
1187	return S;
1188	}
1189
1190	void Intrinsic::initVariables() {
1191	Variables.clear();
1192
1193	// Modify the TypeSpec per-argument to get a concrete Type, and create
1194	// known variables for each.
1195	for (unsigned I = `1`; I < Types.size(); ++I) {
1196	char NameC = `'0'` + (I - `1`);
1197	std::string Name = "p";
1198	Name.push_back(c: NameC);
1199
1200	Variables [Name] = Variable (Types [I], Name + VariablePostfix);
1201	}
1202	RetVar = Variable (Types [`0`], "ret" + VariablePostfix);
1203	}
1204
1205	void Intrinsic::emitPrototype(StringRef NamePrefix) {
1206	if (UseMacro) {
1207	OS << "#define ";
1208	} else {
1209	OS << "__ai ";
1210	if (TargetGuard != "")
1211	OS << "__attribute__((target(\"" << TargetGuard << "\"))) ";
1212	OS << Types [`0`].str() << " ";
1213	}
1214
1215	OS << NamePrefix.str() << mangleName(Name, LocalCK: ClassS) << "(";
1216
1217	for (unsigned I = `0`; I < getNumParams(); ++I) {
1218	if (I != `0`)
1219	OS << ", ";
1220
1221	char NameC = `'0'` + I;
1222	std::string Name = "p";
1223	Name.push_back(c: NameC);
1224	assert(Variables.find(Name) != Variables.end());
1225	Variable &V = Variables [Name];
1226
1227	if (!UseMacro)
1228	OS << V.getType().str() << " ";
1229	OS << V.getName();
1230	}
1231
1232	OS << ")";
1233	}
1234
1235	void Intrinsic::emitOpeningBrace() {
1236	if (UseMacro)
1237	OS << " __extension__ ({";
1238	else
1239	OS << " {";
1240	emitNewLine();
1241	}
1242
1243	void Intrinsic::emitClosingBrace() {
1244	if (UseMacro)
1245	OS << "})";
1246	else
1247	OS << "}";
1248	}
1249
1250	void Intrinsic::emitNewLine() {
1251	if (UseMacro)
1252	OS << " \\\n";
1253	else
1254	OS << "\n";
1255	}
1256
1257	void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) {
1258	if (Dest.getType().getNumVectors() > `1`) {
1259	emitNewLine();
1260
1261	for (unsigned K = `0`; K < Dest.getType().getNumVectors(); ++K) {
1262	OS << " " << Dest.getName() << ".val[" << K << "] = "
1263	<< "__builtin_shufflevector(" << Src.getName() << ".val[" << K << "], "
1264	<< Src.getName() << ".val[" << K << "], __lane_reverse_"
1265	<< Dest.getType().getSizeInBits() << "_"
1266	<< Dest.getType().getElementSizeInBits() << ");";
1267	emitNewLine();
1268	}
1269	} else {
1270	OS << " " << Dest.getName() << " = __builtin_shufflevector("
1271	<< Src.getName() << ", " << Src.getName() << ", __lane_reverse_"
1272	<< Dest.getType().getSizeInBits() << "_"
1273	<< Dest.getType().getElementSizeInBits() << ");";
1274	emitNewLine();
1275	}
1276	}
1277
1278	void Intrinsic::emitArgumentReversal() {
1279	if (isBigEndianSafe())
1280	return;
1281
1282	// Reverse all vector arguments.
1283	for (unsigned I = `0`; I < getNumParams(); ++I) {
1284	std::string Name = "p" + utostr(X: I);
1285	std::string NewName = "rev" + utostr(X: I);
1286
1287	Variable &V = Variables [Name];
1288	Variable NewV(V.getType(), NewName + VariablePostfix);
1289
1290	if (!NewV.getType().isVector() \|\| NewV.getType().getNumElements() == `1`)
1291	continue;
1292
1293	OS << " " << NewV.getType().str() << " " << NewV.getName() << ";";
1294	emitReverseVariable(Dest&: NewV, Src&: V);
1295	V = NewV;
1296	}
1297	}
1298
1299	void Intrinsic::emitReturnVarDecl() {
1300	assert(RetVar.getType() == Types[`0`]);
1301	// Create a return variable, if we're not void.
1302	if (!RetVar.getType().isVoid()) {
1303	OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";";
1304	emitNewLine();
1305	}
1306	}
1307
1308	void Intrinsic::emitReturnReversal() {
1309	if (isBigEndianSafe())
1310	return;
1311	if (!getReturnType().isVector() \|\| getReturnType().isVoid() \|\|
1312	getReturnType().getNumElements() == `1`)
1313	return;
1314	emitReverseVariable(Dest&: RetVar, Src&: RetVar);
1315	}
1316
1317	void Intrinsic::emitShadowedArgs() {
1318	// Macro arguments are not type-checked like inline function arguments,
1319	// so assign them to local temporaries to get the right type checking.
1320	if (!UseMacro)
1321	return;
1322
1323	for (unsigned I = `0`; I < getNumParams(); ++I) {
1324	// Do not create a temporary for an immediate argument.
1325	// That would defeat the whole point of using a macro!
1326	if (getParamType(I).isImmediate())
1327	continue;
1328	// Do not create a temporary for pointer arguments. The input
1329	// pointer may have an alignment hint.
1330	if (getParamType(I).isPointer())
1331	continue;
1332
1333	std::string Name = "p" + utostr(X: I);
1334
1335	assert(Variables.find(Name) != Variables.end());
1336	Variable &V = Variables [Name];
1337
1338	std::string NewName = "s" + utostr(X: I);
1339	Variable V2(V.getType(), NewName + VariablePostfix);
1340
1341	OS << " " << V2.getType().str() << " " << V2.getName() << " = "
1342	<< V.getName() << ";";
1343	emitNewLine();
1344
1345	V = V2;
1346	}
1347	}
1348
1349	bool Intrinsic::protoHasScalar() const {
1350	return any_of(Range: Types,
1351	P: [](const Type &T) { return T.isScalar() && !T.isImmediate(); });
1352	}
1353
1354	void Intrinsic::emitBodyAsBuiltinCall() {
1355	std::string S;
1356
1357	// If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
1358	// sret-like argument.
1359	bool SRet = getReturnType().getNumVectors() >= `2`;
1360
1361	StringRef N = Name;
1362	ClassKind LocalCK = CK;
1363	if (!protoHasScalar())
1364	LocalCK = ClassB;
1365
1366	if (!getReturnType().isVoid() && !SRet)
1367	S += "__builtin_bit_cast(" + RetVar.getType().str() + ", ";
1368
1369	S += "__builtin_neon_" + mangleName(Name: std::string (N), LocalCK) + "(";
1370
1371	if (SRet)
1372	S += "&" + RetVar.getName() + ", ";
1373
1374	for (unsigned I = `0`; I < getNumParams(); ++I) {
1375	Variable &V = Variables ["p" + utostr(X: I)];
1376	Type T = V.getType();
1377
1378	// Handle multiple-vector values specially, emitting each subvector as an
1379	// argument to the builtin.
1380	if (T.getNumVectors() > `1`) {
1381	// Check if an explicit cast is needed.
1382	std::string Cast;
1383	if (LocalCK == ClassB) {
1384	Type T2 = T;
1385	T2.makeOneVector();
1386	T2.makeInteger(ElemWidth: `8`, /Sign=/true);
1387	Cast = "__builtin_bit_cast(" + T2.str() + ", ";
1388	}
1389
1390	for (unsigned J = `0`; J < T.getNumVectors(); ++J)
1391	S += Cast + V.getName() + ".val[" + utostr(X: J) + "]" +
1392	(Cast.empty() ? ", " : "), ");
1393	continue;
1394	}
1395
1396	std::string Arg = V.getName();
1397	Type CastToType = T;
1398
1399	// Check if an explicit cast is needed.
1400	if (CastToType.isVector()) {
1401	if (LocalCK == ClassB \|\| (T.isHalf() && !T.isScalarForMangling())) {
1402	CastToType.makeInteger(ElemWidth: `8`, Sign: true);
1403	Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
1404	} else if (LocalCK == ClassI &&
1405	(CastToType.isInteger() \|\| CastToType.isPoly())) {
1406	CastToType.makeSigned();
1407	Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
1408	}
1409	}
1410	S += Arg + ", ";
1411	}
1412
1413	// Extra constant integer to hold type class enum for this function, e.g. s8
1414	if (getClassKind(UseClassBIfScalar: true) == ClassB) {
1415	S += utostr(X: getPolymorphicKeyType().getNeonEnum());
1416	} else {
1417	// Remove extraneous ", ".
1418	S.pop_back();
1419	S.pop_back();
1420	}
1421
1422	if (!getReturnType().isVoid() && !SRet)
1423	S += ")";
1424	S += ");";
1425
1426	std::string RetExpr;
1427	if (!SRet && !RetVar.getType().isVoid())
1428	RetExpr = RetVar.getName() + " = ";
1429
1430	OS << " " << RetExpr << S;
1431	emitNewLine();
1432	}
1433
1434	void Intrinsic::emitBody(StringRef CallPrefix) {
1435	std::vector<std::string> Lines;
1436
1437	if (!Body \|\| Body->empty()) {
1438	// Nothing specific to output - must output a builtin.
1439	emitBodyAsBuiltinCall();
1440	return;
1441	}
1442
1443	// We have a list of "things to output". The last should be returned.
1444	for (auto *I : Body->getElements()) {
1445	if (const auto *SI = dyn_cast<StringInit>(Val: I)) {
1446	Lines.push_back(x: replaceParamsIn(S: SI->getAsString()));
1447	} else if (const auto *DI = dyn_cast<DagInit>(Val: I)) {
1448	DagEmitter DE(*this, CallPrefix);
1449	Lines.push_back(x: DE.emitDag(DI).second + ";");
1450	}
1451	}
1452
1453	assert(!Lines.empty() && "Empty def?");
1454	if (!RetVar.getType().isVoid())
1455	Lines.back().insert(pos1: `0`, str: RetVar.getName() + " = ");
1456
1457	for (auto &L : Lines) {
1458	OS << " " << L;
1459	emitNewLine();
1460	}
1461	}
1462
1463	void Intrinsic::emitReturn() {
1464	if (RetVar.getType().isVoid())
1465	return;
1466	if (UseMacro)
1467	OS << " " << RetVar.getName() << ";";
1468	else
1469	OS << " return " << RetVar.getName() << ";";
1470	emitNewLine();
1471	}
1472
1473	std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(const DagInit *DI) {
1474	// At this point we should only be seeing a def.
1475	const DefInit *DefI = cast<DefInit>(Val: DI->getOperator());
1476	std::string Op = DefI->getAsString();
1477
1478	if (Op == "cast" \|\| Op == "bitcast")
1479	return emitDagCast(DI, IsBitCast: Op == "bitcast");
1480	if (Op == "shuffle")
1481	return emitDagShuffle(DI);
1482	if (Op == "dup")
1483	return emitDagDup(DI);
1484	if (Op == "dup_typed")
1485	return emitDagDupTyped(DI);
1486	if (Op == "splat")
1487	return emitDagSplat(DI);
1488	if (Op == "save_temp")
1489	return emitDagSaveTemp(DI);
1490	if (Op == "op")
1491	return emitDagOp(DI);
1492	if (Op == "call" \|\| Op == "call_mangled")
1493	return emitDagCall(DI, MatchMangledName: Op == "call_mangled");
1494	if (Op == "name_replace")
1495	return emitDagNameReplace(DI);
1496	if (Op == "literal")
1497	return emitDagLiteral(DI);
1498	assert_with_loc(Assertion: false, Str: "Unknown operation!");
1499	return std::make_pair(x: Type::getVoid(), y: "");
1500	}
1501
1502	std::pair<Type, std::string>
1503	Intrinsic::DagEmitter::emitDagOp(const DagInit *DI) {
1504	std::string Op = cast<StringInit>(Val: DI->getArg(Num: `0`))->getAsUnquotedString();
1505	if (DI->getNumArgs() == `2`) {
1506	// Unary op.
1507	std::pair<Type, std::string> R =
1508	emitDagArg(Arg: DI->getArg(Num: `1`), ArgName: std::string (DI->getArgNameStr(Num: `1`)));
1509	return std::make_pair(x&: R.first, y: Op + R.second);
1510	} else {
1511	assert(DI->getNumArgs() == `3` && "Can only handle unary and binary ops!");
1512	std::pair<Type, std::string> R1 =
1513	emitDagArg(Arg: DI->getArg(Num: `1`), ArgName: std::string (DI->getArgNameStr(Num: `1`)));
1514	std::pair<Type, std::string> R2 =
1515	emitDagArg(Arg: DI->getArg(Num: `2`), ArgName: std::string (DI->getArgNameStr(Num: `2`)));
1516	assert_with_loc(Assertion: R1.first == R2.first, Str: "Argument type mismatch!");
1517	return std::make_pair(x&: R1.first, y: R1.second + " " + Op + " " + R2.second);
1518	}
1519	}
1520
1521	std::pair<Type, std::string>
1522	Intrinsic::DagEmitter::emitDagCall(const DagInit DI, bool* MatchMangledName) {
1523	std::vector<Type> Types;
1524	std::vector<std::string> Values;
1525	for (unsigned I = `0`; I < DI->getNumArgs() - `1`; ++I) {
1526	std::pair<Type, std::string> R =
1527	emitDagArg(Arg: DI->getArg(Num: I + `1`), ArgName: std::string (DI->getArgNameStr(Num: I + `1`)));
1528	Types.push_back(x: R.first);
1529	Values.push_back(x: R.second);
1530	}
1531
1532	// Look up the called intrinsic.
1533	std::string N;
1534	if (const auto *SI = dyn_cast<StringInit>(Val: DI->getArg(Num: `0`)))
1535	N = SI->getAsUnquotedString();
1536	else
1537	N = emitDagArg(Arg: DI->getArg(Num: `0`), ArgName: "").second;
1538	std::optional<std::string> MangledName;
1539	if (MatchMangledName) {
1540	if (Intr.getRecord()->getValueAsString(FieldName: "Name").contains(Other: "laneq"))
1541	N += "q";
1542	MangledName = Intr.mangleName(Name: N, LocalCK: ClassS);
1543	}
1544	Intrinsic &Callee = Intr.Emitter.getIntrinsic(Name: N, Types, MangledName);
1545
1546	// Make sure the callee is known as an early def.
1547	Callee.setNeededEarly();
1548	Intr.Dependencies.insert(x: &Callee);
1549
1550	// Now create the call itself.
1551	std::string S;
1552	if (!Callee.isBigEndianSafe())
1553	S += CallPrefix.str();
1554	S += Callee.getMangledName(ForceClassS: true) + "(";
1555	for (unsigned I = `0`; I < DI->getNumArgs() - `1`; ++I) {
1556	if (I != `0`)
1557	S += ", ";
1558	S += Values [I];
1559	}
1560	S += ")";
1561
1562	return std::make_pair(x: Callee.getReturnType(), y&: S);
1563	}
1564
1565	std::pair<Type, std::string>
1566	Intrinsic::DagEmitter::emitDagCast(const DagInit DI, bool* IsBitCast) {
1567	// (cast MOD VAL) -> cast VAL to type given by MOD.*
1568	std::pair<Type, std::string> R =
1569	emitDagArg(Arg: DI->getArg(Num: DI->getNumArgs() - `1`),
1570	ArgName: std::string (DI->getArgNameStr(Num: DI->getNumArgs() - `1`)));
1571	Type castToType = R.first;
1572	for (unsigned ArgIdx = `0`; ArgIdx < DI->getNumArgs() - `1`; ++ArgIdx) {
1573
1574	// MOD can take several forms:
1575	// 1. $X - take the type of parameter / variable X.
1576	// 2. The value "R" - take the type of the return type.
1577	// 3. a type string
1578	// 4. The value "U" or "S" to switch the signedness.
1579	// 5. The value "H" or "D" to half or double the bitwidth.
1580	// 6. The value "8" to convert to 8-bit (signed) integer lanes.
1581	if (!DI->getArgNameStr(Num: ArgIdx).empty()) {
1582	assert_with_loc(Assertion: Intr.Variables.find(x: DI->getArgNameStr(Num: ArgIdx)) !=
1583	Intr.Variables.end(),
1584	Str: "Variable not found");
1585	castToType =
1586	Intr.Variables [std::string (DI->getArgNameStr(Num: ArgIdx))].getType();
1587	} else {
1588	const auto *SI = dyn_cast<StringInit>(Val: DI->getArg(Num: ArgIdx));
1589	assert_with_loc(Assertion: SI, Str: "Expected string type or $Name for cast type");
1590
1591	if (SI->getAsUnquotedString() == "R") {
1592	castToType = Intr.getReturnType();
1593	} else if (SI->getAsUnquotedString() == "U") {
1594	castToType.makeUnsigned();
1595	} else if (SI->getAsUnquotedString() == "S") {
1596	castToType.makeSigned();
1597	} else if (SI->getAsUnquotedString() == "H") {
1598	castToType.halveLanes();
1599	} else if (SI->getAsUnquotedString() == "D") {
1600	castToType.doubleLanes();
1601	} else if (SI->getAsUnquotedString() == "8") {
1602	castToType.makeInteger(ElemWidth: `8`, Sign: true);
1603	} else if (SI->getAsUnquotedString() == "32") {
1604	castToType.make32BitElement();
1605	} else {
1606	castToType = Type::fromTypedefName(Name: SI->getAsUnquotedString());
1607	assert_with_loc(Assertion: !castToType.isVoid(), Str: "Unknown typedef");
1608	}
1609	}
1610	}
1611
1612	std::string S;
1613	if (IsBitCast)
1614	S = "__builtin_bit_cast(" + castToType.str() + ", " + R.second + ")";
1615	else
1616	S = "(" + castToType.str() + ")(" + R.second + ")";
1617
1618	return std::make_pair(x&: castToType, y&: S);
1619	}
1620
1621	std::pair<Type, std::string>
1622	Intrinsic::DagEmitter::emitDagShuffle(const DagInit *DI) {
1623	// See the documentation in arm_neon.td for a description of these operators.
1624	class LowHalf : public SetTheory::Operator {
1625	public:
1626	void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts,
1627	ArrayRef<SMLoc> Loc) override {
1628	SetTheory::RecSet Elts2;
1629	ST.evaluate(begin: Expr->arg_begin(), end: Expr->arg_end(), Elts&: Elts2, Loc);
1630	Elts.insert(Start: Elts2.begin(), End: Elts2.begin() + (Elts2.size() / `2`));
1631	}
1632	};
1633
1634	class HighHalf : public SetTheory::Operator {
1635	public:
1636	void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts,
1637	ArrayRef<SMLoc> Loc) override {
1638	SetTheory::RecSet Elts2;
1639	ST.evaluate(begin: Expr->arg_begin(), end: Expr->arg_end(), Elts&: Elts2, Loc);
1640	Elts.insert(Start: Elts2.begin() + (Elts2.size() / `2`), End: Elts2.end());
1641	}
1642	};
1643
1644	class Rev : public SetTheory::Operator {
1645	unsigned ElementSize;
1646
1647	public:
1648	Rev(unsigned ElementSize) : ElementSize(ElementSize) {}
1649
1650	void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts,
1651	ArrayRef<SMLoc> Loc) override {
1652	SetTheory::RecSet Elts2;
1653	ST.evaluate(begin: Expr->arg_begin() + `1`, end: Expr->arg_end(), Elts&: Elts2, Loc);
1654
1655	int64_t VectorSize = cast<IntInit>(Val: Expr->getArg(Num: `0`))->getValue();
1656	VectorSize /= ElementSize;
1657
1658	std::vector<const Record *> Revved;
1659	for (unsigned VI = `0`; VI < Elts2.size(); VI += VectorSize) {
1660	for (int LI = VectorSize - `1`; LI >= `0`; --LI) {
1661	Revved.push_back(x: Elts2 [VI + LI]);
1662	}
1663	}
1664
1665	Elts.insert_range(R&: Revved);
1666	}
1667	};
1668
1669	class MaskExpander : public SetTheory::Expander {
1670	unsigned N;
1671
1672	public:
1673	MaskExpander(unsigned N) : N(N) {}
1674
1675	void expand(SetTheory &ST, const Record *R,
1676	SetTheory::RecSet &Elts) override {
1677	unsigned Addend = `0`;
1678	if (R->getName() == "mask0")
1679	Addend = `0`;
1680	else if (R->getName() == "mask1")
1681	Addend = N;
1682	else
1683	return;
1684	for (unsigned I = `0`; I < N; ++I)
1685	Elts.insert(X: R->getRecords().getDef(Name: "sv" + utostr(X: I + Addend)));
1686	}
1687	};
1688
1689	// (shuffle arg1, arg2, sequence)
1690	std::pair<Type, std::string> Arg1 =
1691	emitDagArg(Arg: DI->getArg(Num: `0`), ArgName: std::string (DI->getArgNameStr(Num: `0`)));
1692	std::pair<Type, std::string> Arg2 =
1693	emitDagArg(Arg: DI->getArg(Num: `1`), ArgName: std::string (DI->getArgNameStr(Num: `1`)));
1694	assert_with_loc(Assertion: Arg1.first == Arg2.first,
1695	Str: "Different types in arguments to shuffle!");
1696
1697	SetTheory ST;
1698	SetTheory::RecSet Elts;
1699	ST.addOperator(Name: "lowhalf", std::make_unique<LowHalf>());
1700	ST.addOperator(Name: "highhalf", std::make_unique<HighHalf>());
1701	ST.addOperator(Name: "rev",
1702	std::make_unique<Rev>(args: Arg1.first.getElementSizeInBits()));
1703	ST.addExpander(ClassName: "MaskExpand",
1704	std::make_unique<MaskExpander>(args: Arg1.first.getNumElements()));
1705	ST.evaluate(Expr: DI->getArg(Num: `2`), Elts, Loc: {});
1706
1707	std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second;
1708	for (auto &E : Elts) {
1709	StringRef Name = E->getName();
1710	assert_with_loc(Assertion: Name.starts_with(Prefix: "sv"),
1711	Str: "Incorrect element kind in shuffle mask!");
1712	S += ", " + Name.drop_front(N: `2`).str();
1713	}
1714	S += ")";
1715
1716	// Recalculate the return type - the shuffle may have halved or doubled it.
1717	Type T(Arg1.first);
1718	if (Elts.size() > T.getNumElements()) {
1719	assert_with_loc(
1720	Assertion: Elts.size() == T.getNumElements() * `2`,
1721	Str: "Can only double or half the number of elements in a shuffle!");
1722	T.doubleLanes();
1723	} else if (Elts.size() < T.getNumElements()) {
1724	assert_with_loc(
1725	Assertion: Elts.size() == T.getNumElements() / `2`,
1726	Str: "Can only double or half the number of elements in a shuffle!");
1727	T.halveLanes();
1728	}
1729
1730	return std::make_pair(x&: T, y&: S);
1731	}
1732
1733	std::pair<Type, std::string>
1734	Intrinsic::DagEmitter::emitDagDup(const DagInit *DI) {
1735	assert_with_loc(Assertion: DI->getNumArgs() == `1`, Str: "dup() expects one argument");
1736	std::pair<Type, std::string> A =
1737	emitDagArg(Arg: DI->getArg(Num: `0`), ArgName: std::string (DI->getArgNameStr(Num: `0`)));
1738	assert_with_loc(Assertion: A.first.isScalar(), Str: "dup() expects a scalar argument");
1739
1740	Type T = Intr.getBaseType();
1741	assert_with_loc(Assertion: T.isVector(), Str: "dup() used but default type is scalar!");
1742	std::string S = "(" + T.str() + ") {";
1743	for (unsigned I = `0`; I < T.getNumElements(); ++I) {
1744	if (I != `0`)
1745	S += ", ";
1746	S += A.second;
1747	}
1748	S += "}";
1749
1750	return std::make_pair(x&: T, y&: S);
1751	}
1752
1753	std::pair<Type, std::string>
1754	Intrinsic::DagEmitter::emitDagDupTyped(const DagInit *DI) {
1755	assert_with_loc(Assertion: DI->getNumArgs() == `2`, Str: "dup_typed() expects two arguments");
1756	std::pair<Type, std::string> B =
1757	emitDagArg(Arg: DI->getArg(Num: `1`), ArgName: std::string (DI->getArgNameStr(Num: `1`)));
1758	assert_with_loc(Assertion: B.first.isScalar(),
1759	Str: "dup_typed() requires a scalar as the second argument");
1760	Type T;
1761	// If the type argument is a constant string, construct the type directly.
1762	if (const auto *SI = dyn_cast<StringInit>(Val: DI->getArg(Num: `0`))) {
1763	T = Type::fromTypedefName(Name: SI->getAsUnquotedString());
1764	assert_with_loc(Assertion: !T.isVoid(), Str: "Unknown typedef");
1765	} else
1766	T = emitDagArg(Arg: DI->getArg(Num: `0`), ArgName: std::string (DI->getArgNameStr(Num: `0`))).first;
1767
1768	assert_with_loc(Assertion: T.isVector(), Str: "dup_typed() used but target type is scalar!");
1769	std::string S = "(" + T.str() + ") {";
1770	for (unsigned I = `0`; I < T.getNumElements(); ++I) {
1771	if (I != `0`)
1772	S += ", ";
1773	S += B.second;
1774	}
1775	S += "}";
1776
1777	return std::make_pair(x&: T, y&: S);
1778	}
1779
1780	std::pair<Type, std::string>
1781	Intrinsic::DagEmitter::emitDagSplat(const DagInit *DI) {
1782	assert_with_loc(Assertion: DI->getNumArgs() == `2`, Str: "splat() expects two arguments");
1783	std::pair<Type, std::string> A =
1784	emitDagArg(Arg: DI->getArg(Num: `0`), ArgName: std::string (DI->getArgNameStr(Num: `0`)));
1785	std::pair<Type, std::string> B =
1786	emitDagArg(Arg: DI->getArg(Num: `1`), ArgName: std::string (DI->getArgNameStr(Num: `1`)));
1787
1788	assert_with_loc(Assertion: B.first.isScalar(),
1789	Str: "splat() requires a scalar int as the second argument");
1790
1791	std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second;
1792	for (unsigned I = `0`; I < Intr.getBaseType().getNumElements(); ++I) {
1793	S += ", " + B.second;
1794	}
1795	S += ")";
1796
1797	return std::make_pair(x: Intr.getBaseType(), y&: S);
1798	}
1799
1800	std::pair<Type, std::string>
1801	Intrinsic::DagEmitter::emitDagSaveTemp(const DagInit *DI) {
1802	assert_with_loc(Assertion: DI->getNumArgs() == `2`, Str: "save_temp() expects two arguments");
1803	std::pair<Type, std::string> A =
1804	emitDagArg(Arg: DI->getArg(Num: `1`), ArgName: std::string (DI->getArgNameStr(Num: `1`)));
1805
1806	assert_with_loc(Assertion: !A.first.isVoid(),
1807	Str: "Argument to save_temp() must have non-void type!");
1808
1809	std::string N = std::string (DI->getArgNameStr(Num: `0`));
1810	assert_with_loc(Assertion: !N.empty(),
1811	Str: "save_temp() expects a name as the first argument");
1812
1813	auto [It, Inserted] =
1814	Intr.Variables.try_emplace(k: N, args&: A.first, args: N + Intr.VariablePostfix);
1815	assert_with_loc(Assertion: Inserted, Str: "Variable already defined!");
1816
1817	std::string S = A.first.str() + " " + It ->second.getName() + " = " + A.second;
1818
1819	return std::make_pair(x: Type::getVoid(), y&: S);
1820	}
1821
1822	std::pair<Type, std::string>
1823	Intrinsic::DagEmitter::emitDagNameReplace(const DagInit *DI) {
1824	std::string S = Intr.Name;
1825
1826	assert_with_loc(Assertion: DI->getNumArgs() == `2`, Str: "name_replace requires 2 arguments!");
1827	std::string ToReplace = cast<StringInit>(Val: DI->getArg(Num: `0`))->getAsUnquotedString();
1828	std::string ReplaceWith = cast<StringInit>(Val: DI->getArg(Num: `1`))->getAsUnquotedString();
1829
1830	size_t Idx = S.find(str: ToReplace);
1831
1832	assert_with_loc(Assertion: Idx != std::string::npos, Str: "name should contain '" + ToReplace + "'!");
1833	S.replace(pos: Idx, n: ToReplace.size(), str: ReplaceWith);
1834
1835	return std::make_pair(x: Type::getVoid(), y&: S);
1836	}
1837
1838	std::pair<Type, std::string>
1839	Intrinsic::DagEmitter::emitDagLiteral(const DagInit *DI) {
1840	std::string Ty = cast<StringInit>(Val: DI->getArg(Num: `0`))->getAsUnquotedString();
1841	std::string Value = cast<StringInit>(Val: DI->getArg(Num: `1`))->getAsUnquotedString();
1842	return std::make_pair(x: Type::fromTypedefName(Name: Ty), y&: Value);
1843	}
1844
1845	std::pair<Type, std::string>
1846	Intrinsic::DagEmitter::emitDagArg(const Init *Arg, std::string ArgName) {
1847	if (!ArgName.empty()) {
1848	assert_with_loc(Assertion: !Arg->isComplete(),
1849	Str: "Arguments must either be DAGs or names, not both!");
1850	assert_with_loc(Assertion: Intr.Variables.find(x: ArgName) != Intr.Variables.end(),
1851	Str: "Variable not defined!");
1852	Variable &V = Intr.Variables [ArgName];
1853	return std::make_pair(x: V.getType(), y: V.getName());
1854	}
1855
1856	assert(Arg && "Neither ArgName nor Arg?!");
1857	const auto *DI = dyn_cast<DagInit>(Val: Arg);
1858	assert_with_loc(Assertion: DI, Str: "Arguments must either be DAGs or names!");
1859
1860	return emitDag(DI);
1861	}
1862
1863	std::string Intrinsic::generate() {
1864	// Avoid duplicated code for big and little endian
1865	if (isBigEndianSafe()) {
1866	generateImpl(ReverseArguments: false, NamePrefix: "", CallPrefix: "");
1867	return OS.str();
1868	}
1869	// Little endian intrinsics are simple and don't require any argument
1870	// swapping.
1871	OS << "#ifdef __LITTLE_ENDIAN__\n";
1872
1873	generateImpl(ReverseArguments: false, NamePrefix: "", CallPrefix: "");
1874
1875	OS << "#else\n";
1876
1877	// Big endian intrinsics are more complex. The user intended these intrinsics
1878	// to operate on a vector "as-if" loaded by LDR (for AArch64), VLDR (for
1879	// 64-bit vectors on AArch32), or VLDM (for 128-bit vectors on AArch32) but
1880	// we load as-if LD1 (for AArch64) or VLD1 (for AArch32). So we should swap
1881	// all arguments and swap the return value too.
1882	//
1883	// If we call sub-intrinsics, we should call a version that does
1884	// not re-swap the arguments!
1885	generateImpl(ReverseArguments: true, NamePrefix: "", CallPrefix: "__noswap_");
1886
1887	// If we're needed early, create a non-swapping variant for
1888	// big-endian.
1889	if (NeededEarly) {
1890	generateImpl(ReverseArguments: false, NamePrefix: "__noswap_", CallPrefix: "__noswap_");
1891	}
1892	OS << "#endif\n\n";
1893
1894	return OS.str();
1895	}
1896
1897	void Intrinsic::generateImpl(bool ReverseArguments,
1898	StringRef NamePrefix, StringRef CallPrefix) {
1899	CurrentRecord = R;
1900
1901	// If we call a macro, our local variables may be corrupted due to
1902	// lack of proper lexical scoping. So, add a globally unique postfix
1903	// to every variable.
1904	//
1905	// indexBody() should have set up the Dependencies set by now.
1906	for (auto *I : Dependencies)
1907	if (I->UseMacro) {
1908	VariablePostfix = "_" + utostr(X: Emitter.getUniqueNumber());
1909	break;
1910	}
1911
1912	initVariables();
1913
1914	emitPrototype(NamePrefix);
1915
1916	if (IsUnavailable) {
1917	OS << " __attribute__((unavailable));";
1918	} else {
1919	emitOpeningBrace();
1920	// Emit return variable declaration first as to not trigger
1921	// -Wdeclaration-after-statement.
1922	emitReturnVarDecl();
1923	emitShadowedArgs();
1924	if (ReverseArguments)
1925	emitArgumentReversal();
1926	emitBody(CallPrefix);
1927	if (ReverseArguments)
1928	emitReturnReversal();
1929	emitReturn();
1930	emitClosingBrace();
1931	}
1932	OS << "\n";
1933
1934	CurrentRecord = nullptr;
1935	}
1936
1937	void Intrinsic::indexBody() {
1938	CurrentRecord = R;
1939
1940	initVariables();
1941	// Emit return variable declaration first as to not trigger
1942	// -Wdeclaration-after-statement.
1943	emitReturnVarDecl();
1944	emitBody(CallPrefix: "");
1945	OS.str(s: "");
1946
1947	CurrentRecord = nullptr;
1948	}
1949
1950	//===----------------------------------------------------------------------===//
1951	// NeonEmitter implementation
1952	//===----------------------------------------------------------------------===//
1953
1954	Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types,
1955	std::optional<std::string> MangledName) {
1956	// First, look up the name in the intrinsic map.
1957	assert_with_loc(Assertion: IntrinsicMap.find(x: Name) != IntrinsicMap.end(),
1958	Str: ("Intrinsic '" + Name + "' not found!").str());
1959	auto &V = IntrinsicMap.find(x: Name)->second;
1960	std::vector<Intrinsic *> GoodVec;
1961
1962	// Create a string to print if we end up failing.
1963	std::string ErrMsg = "looking up intrinsic '" + Name.str() + "(";
1964	for (unsigned I = `0`; I < Types.size(); ++I) {
1965	if (I != `0`)
1966	ErrMsg += ", ";
1967	ErrMsg += Types [I].str();
1968	}
1969	ErrMsg += ")'\n";
1970	ErrMsg += "Available overloads:\n";
1971
1972	// Now, look through each intrinsic implementation and see if the types are
1973	// compatible.
1974	for (auto &I : V) {
1975	ErrMsg += " - " + I.getReturnType().str() + " " + I.getMangledName();
1976	ErrMsg += "(";
1977	for (unsigned A = `0`; A < I.getNumParams(); ++A) {
1978	if (A != `0`)
1979	ErrMsg += ", ";
1980	ErrMsg += I.getParamType(I: A).str();
1981	}
1982	ErrMsg += ")\n";
1983
1984	if (MangledName && MangledName != I.getMangledName(ForceClassS: true))
1985	continue;
1986
1987	if (I.getNumParams() != Types.size())
1988	continue;
1989
1990	unsigned ArgNum = `0`;
1991	bool MatchingArgumentTypes = all_of(Range&: Types, P: [&](const auto &Type) {
1992	return Type == I.getParamType(I: ArgNum++);
1993	});
1994
1995	if (MatchingArgumentTypes)
1996	GoodVec.push_back(x: &I);
1997	}
1998
1999	assert_with_loc(Assertion: !GoodVec.empty(),
2000	Str: "No compatible intrinsic found - " + ErrMsg);
2001	assert_with_loc(Assertion: GoodVec.size() == `1`, Str: "Multiple overloads found - " + ErrMsg);
2002
2003	return *GoodVec.front();
2004	}
2005
2006	void NeonEmitter::createIntrinsic(const Record *R,
2007	SmallVectorImpl<Intrinsic *> &Out) {
2008	std::string Name = std::string (R->getValueAsString(FieldName: "Name"));
2009	std::string Proto = std::string (R->getValueAsString(FieldName: "Prototype"));
2010	std::string Types = std::string (R->getValueAsString(FieldName: "Types"));
2011	const Record *OperationRec = R->getValueAsDef(FieldName: "Operation");
2012	bool BigEndianSafe = R->getValueAsBit(FieldName: "BigEndianSafe");
2013	std::string ArchGuard = std::string (R->getValueAsString(FieldName: "ArchGuard"));
2014	std::string TargetGuard = std::string (R->getValueAsString(FieldName: "TargetGuard"));
2015	bool IsUnavailable = OperationRec->getValueAsBit(FieldName: "Unavailable");
2016	std::string CartesianProductWith = std::string (R->getValueAsString(FieldName: "CartesianProductWith"));
2017
2018	// Set the global current record. This allows assert_with_loc to produce
2019	// decent location information even when highly nested.
2020	CurrentRecord = R;
2021
2022	const ListInit *Body = OperationRec->getValueAsListInit(FieldName: "Ops");
2023
2024	std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Str: Types);
2025
2026	ClassKind CK = ClassNone;
2027	if (!R->getDirectSuperClasses().empty())
2028	CK = ClassMap [R->getDirectSuperClasses()[`0`].first];
2029
2030	std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs;
2031	if (!CartesianProductWith.empty()) {
2032	std::vector<TypeSpec> ProductTypeSpecs = TypeSpec::fromTypeSpecs(Str: CartesianProductWith);
2033	for (auto TS : TypeSpecs) {
2034	Type DefaultT(TS, ".");
2035	for (auto SrcTS : ProductTypeSpecs) {
2036	Type DefaultSrcT(SrcTS, ".");
2037	if (TS == SrcTS \|\|
2038	DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits())
2039	continue;
2040	NewTypeSpecs.push_back(x: std::make_pair(x&: TS, y&: SrcTS));
2041	}
2042	}
2043	} else {
2044	for (auto TS : TypeSpecs) {
2045	NewTypeSpecs.push_back(x: std::make_pair(x&: TS, y&: TS));
2046	}
2047	}
2048
2049	sort(C&: NewTypeSpecs);
2050	NewTypeSpecs.erase(first: llvm::unique(R&: NewTypeSpecs), last: NewTypeSpecs.end());
2051	auto &Entry = IntrinsicMap [Name];
2052
2053	for (auto &I : NewTypeSpecs) {
2054
2055	// MFloat8 type is only available on AArch64. If encountered set ArchGuard
2056	// correctly.
2057	std::string NewArchGuard = ArchGuard;
2058	if (Type (I.first, ".").isMFloat8()) {
2059	if (NewArchGuard.empty()) {
2060	NewArchGuard = "defined(__aarch64__)";
2061	} else if (NewArchGuard.find(s: "defined(__aarch64__)") ==
2062	std::string::npos) {
2063	NewArchGuard = "defined(__aarch64__) && (" + NewArchGuard + ")";
2064	}
2065	}
2066	Entry.emplace_back(args&: R, args&: Name, args&: Proto, args&: I.first, args&: I.second, args&: CK, args&: Body, args&: *this,
2067	args&: NewArchGuard, args&: TargetGuard, args&: IsUnavailable, args&: BigEndianSafe);
2068	Out.push_back(Elt: &Entry.back());
2069	}
2070
2071	CurrentRecord = nullptr;
2072	}
2073
2074	/// genBuiltinsDef: Generate the builtin infos, checking for unique builtin
2075	/// declarations.
2076	void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
2077	SmallVectorImpl<Intrinsic *> &Defs) {
2078	// We only want to emit a builtin once, and in order of its name.
2079	std::map<std::string, Intrinsic *> Builtins;
2080
2081	llvm::StringToOffsetTable Table;
2082	Table.GetOrAddStringOffset(Str: "");
2083	Table.GetOrAddStringOffset(Str: "n");
2084
2085	for (auto *Def : Defs) {
2086	if (Def->hasBody())
2087	continue;
2088
2089	if (Builtins.insert(x: {Def->getMangledName(), Def}).second) {
2090	Table.GetOrAddStringOffset(Str: Def->getMangledName());
2091	Table.GetOrAddStringOffset(Str: Def->getBuiltinTypeStr());
2092	Table.GetOrAddStringOffset(Str: Def->getTargetGuard());
2093	}
2094	}
2095
2096	OS << "#ifdef GET_NEON_BUILTIN_ENUMERATORS\n";
2097	for (const auto &[Name, Def] : Builtins) {
2098	OS << " BI__builtin_neon_" << Name << ",\n";
2099	}
2100	OS << "#endif // GET_NEON_BUILTIN_ENUMERATORS\n\n";
2101
2102	OS << "#ifdef GET_NEON_BUILTIN_STR_TABLE\n";
2103	Table.EmitStringTableDef(OS, Name: "BuiltinStrings");
2104	OS << "#endif // GET_NEON_BUILTIN_STR_TABLE\n\n";
2105
2106	OS << "#ifdef GET_NEON_BUILTIN_INFOS\n";
2107	for (const auto &[Name, Def] : Builtins) {
2108	OS << " Builtin::Info{Builtin::Info::StrOffsets{"
2109	<< Table.GetStringOffset(Str: Def->getMangledName()) << " /* "
2110	<< Def->getMangledName() << " */, ";
2111	OS << Table.GetStringOffset(Str: Def->getBuiltinTypeStr()) << " /* "
2112	<< Def->getBuiltinTypeStr() << " */, ";
2113	OS << Table.GetStringOffset(Str: "n") << " /* n */, ";
2114	OS << Table.GetStringOffset(Str: Def->getTargetGuard()) << " /* "
2115	<< Def->getTargetGuard() << " */}, ";
2116	OS << "HeaderDesc::NO_HEADER, ALL_LANGUAGES},\n";
2117	}
2118	OS << "#endif // GET_NEON_BUILTIN_INFOS\n\n";
2119	}
2120
2121	void NeonEmitter::genStreamingSVECompatibleList(
2122	raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
2123	OS << "#ifdef GET_NEON_STREAMING_COMPAT_FLAG\n";
2124
2125	std::set<std::string> Emitted;
2126	for (auto *Def : Defs) {
2127	// If the def has a body (that is, it has Operation DAGs), it won't call
2128	// __builtin_neon_ so we don't need to generate a definition for it.*
2129	if (Def->hasBody())
2130	continue;
2131
2132	std::string Name = Def->getMangledName();
2133	if (Emitted.find(x: Name) != Emitted.end())
2134	continue;
2135
2136	// FIXME: We should make exceptions here for some NEON builtins that are
2137	// permitted in streaming mode.
2138	OS << "case NEON::BI__builtin_neon_" << Name
2139	<< ": BuiltinType = ArmNonStreaming; break;\n";
2140	Emitted.insert(x: Name);
2141	}
2142	OS << "#endif\n\n";
2143	}
2144
2145	/// Generate the ARM and AArch64 overloaded type checking code for
2146	/// SemaChecking.cpp, checking for unique builtin declarations.
2147	void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2148	SmallVectorImpl<Intrinsic *> &Defs) {
2149	OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
2150
2151	// We record each overload check line before emitting because subsequent Inst
2152	// definitions may extend the number of permitted types (i.e. augment the
2153	// Mask). Use std::map to avoid sorting the table by hash number.
2154	struct OverloadInfo {
2155	uint64_t Mask = `0ULL`;
2156	int PtrArgNum = `0`;
2157	bool HasConstPtr = false;
2158	OverloadInfo() = default;
2159	};
2160	std::map<std::string, OverloadInfo> OverloadMap;
2161
2162	for (auto *Def : Defs) {
2163	// If the def has a body (that is, it has Operation DAGs), it won't call
2164	// __builtin_neon_ so we don't need to generate a definition for it.*
2165	if (Def->hasBody())
2166	continue;
2167	// Functions which have a scalar argument cannot be overloaded, no need to
2168	// check them if we are emitting the type checking code.
2169	if (Def->protoHasScalar())
2170	continue;
2171
2172	uint64_t Mask = `0ULL`;
2173	Mask \|= `1ULL` << Def->getPolymorphicKeyType().getNeonEnum();
2174
2175	// Check if the function has a pointer or const pointer argument.
2176	int PtrArgNum = -`1`;
2177	bool HasConstPtr = false;
2178	for (unsigned I = `0`; I < Def->getNumParams(); ++I) {
2179	const auto &Type = Def->getParamType(I);
2180	if (Type.isPointer()) {
2181	PtrArgNum = I;
2182	HasConstPtr = Type.isConstPointer();
2183	}
2184	}
2185
2186	// For sret builtins, adjust the pointer argument index.
2187	if (PtrArgNum >= `0` && Def->getReturnType().getNumVectors() > `1`)
2188	PtrArgNum += `1`;
2189
2190	std::string Name = Def->getName();
2191	// Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
2192	// vst1_lane, vldap1_lane, and vstl1_lane intrinsics. Using a pointer to
2193	// the vector element type with one of those operations causes codegen to
2194	// select an aligned load/store instruction. If you want an unaligned
2195	// operation, the pointer argument needs to have less alignment than element
2196	// type, so just accept any pointer type.
2197	if (Name == "vld1_lane" \|\| Name == "vld1_dup" \|\| Name == "vst1_lane" \|\|
2198	Name == "vldap1_lane" \|\| Name == "vstl1_lane") {
2199	PtrArgNum = -`1`;
2200	HasConstPtr = false;
2201	}
2202
2203	if (Mask) {
2204	OverloadInfo &OI = OverloadMap [Def->getMangledName()];
2205	OI.Mask \|= Mask;
2206	OI.PtrArgNum \|= PtrArgNum;
2207	OI.HasConstPtr = HasConstPtr;
2208	}
2209	}
2210
2211	for (auto &I : OverloadMap) {
2212	OverloadInfo &OI = I.second;
2213
2214	OS << "case NEON::BI__builtin_neon_" << I.first << ": ";
2215	OS << "mask = 0x" << Twine::utohexstr(Val: OI.Mask) << "ULL";
2216	if (OI.PtrArgNum >= `0`)
2217	OS << "; PtrArgNum = " << OI.PtrArgNum;
2218	if (OI.HasConstPtr)
2219	OS << "; HasConstPtr = true";
2220	OS << "; break;\n";
2221	}
2222	OS << "#endif\n\n";
2223	}
2224
2225	inline bool
2226	NeonEmitter::areRangeChecksCompatible(const ArrayRef<ImmCheck> ChecksA,
2227	const ArrayRef<ImmCheck> ChecksB) {
2228	// If multiple intrinsics map to the same builtin, we must ensure that the
2229	// intended range checks performed in SemaArm.cpp do not contradict each
2230	// other, as these are emitted once per-buitlin.
2231	//
2232	// The arguments to be checked and type of each check to be performed must be
2233	// the same. The element types may differ as they will be resolved
2234	// per-intrinsic as overloaded types by SemaArm.cpp, though the vector sizes
2235	// are not and so must be the same.
2236	bool compat = llvm::equal(LRange: ChecksA, RRange: ChecksB, P: [](const auto &A, const auto &B) {
2237	return A.getImmArgIdx() == B.getImmArgIdx() && A.getKind() == B.getKind() &&
2238	A.getVecSizeInBits() == B.getVecSizeInBits();
2239	});
2240
2241	return compat;
2242	}
2243
2244	void NeonEmitter::genIntrinsicRangeCheckCode(
2245	raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
2246	std::unordered_map<std::string, ArrayRef<ImmCheck>> Emitted;
2247
2248	OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2249	for (auto &Def : Defs) {
2250	// If the Def has a body (operation DAGs), it is not a __builtin_neon_
2251	if (Def->hasBody() \|\| !Def->hasImmediate())
2252	continue;
2253
2254	// Sorted by immediate argument index
2255	ArrayRef<ImmCheck> Checks = Def->getImmChecks();
2256
2257	auto [It, Inserted] = Emitted.try_emplace(k: Def->getMangledName(), args&: Checks);
2258	if (!Inserted) {
2259	assert(areRangeChecksCompatible(Checks, It->second) &&
2260	"Neon intrinsics with incompatible immediate range checks cannot "
2261	"share a builtin.");
2262	continue; // Ensure this is emitted only once
2263	}
2264
2265	// Emit builtin's range checks
2266	OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ":\n";
2267	for (const auto &Check : Checks) {
2268	OS << " ImmChecks.emplace_back(" << Check.getImmArgIdx() << ", "
2269	<< Check.getKind() << ", " << Check.getElementSizeInBits() << ", "
2270	<< Check.getVecSizeInBits() << ");\n"
2271	<< " break;\n";
2272	}
2273	}
2274
2275	OS << "#endif\n\n";
2276	}
2277
2278	/// runHeader - Emit a file with sections defining:
2279	/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
2280	/// 2. the SemaChecking code for the type overload checking.
2281	/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
2282	void NeonEmitter::runHeader(raw_ostream &OS) {
2283	SmallVector<Intrinsic *, `128`> Defs;
2284	for (const Record *R : Records.getAllDerivedDefinitions(ClassName: "Inst"))
2285	createIntrinsic(R, Out&: Defs);
2286
2287	// Generate shared BuiltinsXXX.def
2288	genBuiltinsDef(OS, Defs);
2289
2290	// Generate ARM overloaded type checking code for SemaChecking.cpp
2291	genOverloadTypeCheckCode(OS, Defs);
2292
2293	genStreamingSVECompatibleList(OS, Defs);
2294
2295	// Generate ARM range checking code for shift/lane immediates.
2296	genIntrinsicRangeCheckCode(OS, Defs);
2297	}
2298
2299	static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) {
2300	std::string TypedefTypes(types);
2301	std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(Str: TypedefTypes);
2302
2303	// Emit vector typedefs.
2304	bool InIfdef = false;
2305	for (auto &TS : TDTypeVec) {
2306	bool IsA64 = false;
2307	Type T(TS, ".");
2308	if (T.isDouble() \|\| T.isMFloat8())
2309	IsA64 = true;
2310
2311	if (InIfdef && !IsA64) {
2312	OS << "#endif\n";
2313	InIfdef = false;
2314	}
2315	if (!InIfdef && IsA64) {
2316	OS << "#if defined(__aarch64__) \|\| defined(__arm64ec__)\n";
2317	InIfdef = true;
2318	}
2319
2320	if (T.isPoly())
2321	OS << "typedef __attribute__((neon_polyvector_type(";
2322	else
2323	OS << "typedef __attribute__((neon_vector_type(";
2324
2325	Type T2 = T;
2326	T2.makeScalar();
2327	OS << T.getNumElements();
2328	OS << "))) " << T2.str();
2329	OS << " " << T.str() << ";\n";
2330	}
2331	if (InIfdef)
2332	OS << "#endif\n";
2333	OS << "\n";
2334
2335	// Emit struct typedefs.
2336	InIfdef = false;
2337	for (unsigned NumMembers = `2`; NumMembers <= `4`; ++NumMembers) {
2338	for (auto &TS : TDTypeVec) {
2339	bool IsA64 = false;
2340	Type T(TS, ".");
2341	if (T.isDouble() \|\| T.isMFloat8())
2342	IsA64 = true;
2343
2344	if (InIfdef && !IsA64) {
2345	OS << "#endif\n";
2346	InIfdef = false;
2347	}
2348	if (!InIfdef && IsA64) {
2349	OS << "#if defined(__aarch64__) \|\| defined(__arm64ec__)\n";
2350	InIfdef = true;
2351	}
2352
2353	const char Mods[] = { static_cast<char>(`'2'` + (NumMembers - `2`)), `0`};
2354	Type VT(TS, Mods);
2355	OS << "typedef struct " << VT.str() << " {\n";
2356	OS << " " << T.str() << " val";
2357	OS << "[" << NumMembers << "]";
2358	OS << ";\n} ";
2359	OS << VT.str() << ";\n";
2360	OS << "\n";
2361	}
2362	}
2363	if (InIfdef)
2364	OS << "#endif\n";
2365	}
2366
2367	/// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h
2368	/// is comprised of type definitions and function declarations.
2369	void NeonEmitter::run(raw_ostream &OS) {
2370	OS << "/*===---- arm_neon.h - ARM Neon intrinsics "
2371	"------------------------------"
2372	"---===\n"
2373	" *\n"
2374	" * Permission is hereby granted, free of charge, to any person "
2375	"obtaining "
2376	"a copy\n"
2377	" * of this software and associated documentation files (the "
2378	"\"Software\"),"
2379	" to deal\n"
2380	" * in the Software without restriction, including without limitation "
2381	"the "
2382	"rights\n"
2383	" * to use, copy, modify, merge, publish, distribute, sublicense, "
2384	"and/or sell\n"
2385	" * copies of the Software, and to permit persons to whom the Software "
2386	"is\n"
2387	" * furnished to do so, subject to the following conditions:\n"
2388	" *\n"
2389	" * The above copyright notice and this permission notice shall be "
2390	"included in\n"
2391	" * all copies or substantial portions of the Software.\n"
2392	" *\n"
2393	" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2394	"EXPRESS OR\n"
2395	" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2396	"MERCHANTABILITY,\n"
2397	" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2398	"SHALL THE\n"
2399	" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2400	"OTHER\n"
2401	" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2402	"ARISING FROM,\n"
2403	" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2404	"DEALINGS IN\n"
2405	" * THE SOFTWARE.\n"
2406	" *\n"
2407	" *===-----------------------------------------------------------------"
2408	"---"
2409	"---===\n"
2410	" */\n\n";
2411
2412	OS << "#ifndef __ARM_NEON_H\n";
2413	OS << "#define __ARM_NEON_H\n\n";
2414
2415	OS << "#if !defined(__arm__) && !defined(__aarch64__) && "
2416	"!defined(__arm64ec__)\n";
2417	OS << "#error \"<arm_neon.h> is intended only for ARM and AArch64 "
2418	"targets\"\n";
2419	OS << "#elif !defined(__ARM_FP)\n";
2420	OS << "#error \"NEON intrinsics not available with the soft-float ABI. "
2421	"Please use -mfloat-abi=softfp or -mfloat-abi=hard\"\n";
2422	OS << "#else\n\n";
2423
2424	OS << "#include <stdint.h>\n\n";
2425
2426	OS << "#include <arm_bf16.h>\n";
2427
2428	OS << "#include <arm_vector_types.h>\n";
2429
2430	// For now, signedness of polynomial types depends on target
2431	OS << "#if defined(__aarch64__) \|\| defined(__arm64ec__)\n";
2432	OS << "typedef uint8_t poly8_t;\n";
2433	OS << "typedef uint16_t poly16_t;\n";
2434	OS << "typedef uint64_t poly64_t;\n";
2435	OS << "typedef __uint128_t poly128_t;\n";
2436	OS << "#else\n";
2437	OS << "typedef int8_t poly8_t;\n";
2438	OS << "typedef int16_t poly16_t;\n";
2439	OS << "typedef int64_t poly64_t;\n";
2440	OS << "#endif\n";
2441	emitNeonTypeDefs(types: "PcQPcPsQPsPlQPl", OS);
2442
2443	OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
2444	"__nodebug__))\n\n";
2445
2446	// Shufflevector arguments lists for endian-swapping vectors for big-endian
2447	// targets. For AArch64, we need to reverse every lane in the vector, but for
2448	// AArch32 we need to reverse the lanes within each 64-bit chunk of the
2449	// vector. The naming convention here is __lane_reverse_<n>_<m>, where <n> is
2450	// the length of the vector in bits, and <m> is length of each lane in bits.
2451	OS << "#if !defined(__LITTLE_ENDIAN__)\n";
2452	OS << "#if defined(__aarch64__) \|\| defined(__arm64ec__)\n";
2453	OS << "#define __lane_reverse_64_32 1,0\n";
2454	OS << "#define __lane_reverse_64_16 3,2,1,0\n";
2455	OS << "#define __lane_reverse_64_8 7,6,5,4,3,2,1,0\n";
2456	OS << "#define __lane_reverse_128_64 1,0\n";
2457	OS << "#define __lane_reverse_128_32 3,2,1,0\n";
2458	OS << "#define __lane_reverse_128_16 7,6,5,4,3,2,1,0\n";
2459	OS << "#define __lane_reverse_128_8 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0\n";
2460	OS << "#else\n";
2461	OS << "#define __lane_reverse_64_32 1,0\n";
2462	OS << "#define __lane_reverse_64_16 3,2,1,0\n";
2463	OS << "#define __lane_reverse_64_8 7,6,5,4,3,2,1,0\n";
2464	OS << "#define __lane_reverse_128_64 0,1\n";
2465	OS << "#define __lane_reverse_128_32 1,0,3,2\n";
2466	OS << "#define __lane_reverse_128_16 3,2,1,0,7,6,5,4\n";
2467	OS << "#define __lane_reverse_128_8 7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8\n";
2468	OS << "#endif\n";
2469	OS << "#endif\n";
2470
2471	SmallVector<Intrinsic *, `128`> Defs;
2472	for (const Record *R : Records.getAllDerivedDefinitions(ClassName: "Inst"))
2473	createIntrinsic(R, Out&: Defs);
2474
2475	for (auto *I : Defs)
2476	I->indexBody();
2477
2478	stable_sort(Range&: Defs, C: deref<std::less<>>());
2479
2480	// Only emit a def when its requirements have been met.
2481	// FIXME: This loop could be made faster, but it's fast enough for now.
2482	bool MadeProgress = true;
2483	std::string InGuard;
2484	while (!Defs.empty() && MadeProgress) {
2485	MadeProgress = false;
2486
2487	for (SmallVector<Intrinsic *, `128`>::iterator I = Defs.begin();
2488	I != Defs.end(); /No step/) {
2489	bool DependenciesSatisfied = true;
2490	for (auto II : (I)->getDependencies()) {
2491	if (is_contained(Range&: Defs, Element: II))
2492	DependenciesSatisfied = false;
2493	}
2494	if (!DependenciesSatisfied) {
2495	// Try the next one.
2496	++I;
2497	continue;
2498	}
2499
2500	// Emit #endif/#if pair if needed.
2501	if ((*I)->getArchGuard() != InGuard) {
2502	if (!InGuard.empty())
2503	OS << "#endif\n";
2504	InGuard = (*I)->getArchGuard();
2505	if (!InGuard.empty())
2506	OS << "#if " << InGuard << "\n";
2507	}
2508
2509	// Actually generate the intrinsic code.
2510	OS << (*I)->generate();
2511
2512	MadeProgress = true;
2513	I = Defs.erase(CI: I);
2514	}
2515	}
2516	assert(Defs.empty() && "Some requirements were not satisfied!");
2517	if (!InGuard.empty())
2518	OS << "#endif\n";
2519
2520	OS << "\n";
2521	OS << "#undef __ai\n\n";
2522	OS << "#endif /* if !defined(__ARM_NEON) */\n";
2523	OS << "#endif /* ifndef __ARM_FP */\n";
2524	}
2525
2526	/// run - Read the records in arm_fp16.td and output arm_fp16.h. arm_fp16.h
2527	/// is comprised of type definitions and function declarations.
2528	void NeonEmitter::runFP16(raw_ostream &OS) {
2529	OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics "
2530	"------------------------------"
2531	"---===\n"
2532	" *\n"
2533	" * Permission is hereby granted, free of charge, to any person "
2534	"obtaining a copy\n"
2535	" * of this software and associated documentation files (the "
2536	"\"Software\"), to deal\n"
2537	" * in the Software without restriction, including without limitation "
2538	"the rights\n"
2539	" * to use, copy, modify, merge, publish, distribute, sublicense, "
2540	"and/or sell\n"
2541	" * copies of the Software, and to permit persons to whom the Software "
2542	"is\n"
2543	" * furnished to do so, subject to the following conditions:\n"
2544	" *\n"
2545	" * The above copyright notice and this permission notice shall be "
2546	"included in\n"
2547	" * all copies or substantial portions of the Software.\n"
2548	" *\n"
2549	" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2550	"EXPRESS OR\n"
2551	" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2552	"MERCHANTABILITY,\n"
2553	" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2554	"SHALL THE\n"
2555	" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2556	"OTHER\n"
2557	" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2558	"ARISING FROM,\n"
2559	" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2560	"DEALINGS IN\n"
2561	" * THE SOFTWARE.\n"
2562	" *\n"
2563	" *===-----------------------------------------------------------------"
2564	"---"
2565	"---===\n"
2566	" */\n\n";
2567
2568	OS << "#ifndef __ARM_FP16_H\n";
2569	OS << "#define __ARM_FP16_H\n\n";
2570
2571	OS << "#include <stdint.h>\n\n";
2572
2573	OS << "typedef __fp16 float16_t;\n";
2574
2575	OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
2576	"__nodebug__))\n\n";
2577
2578	SmallVector<Intrinsic *, `128`> Defs;
2579	for (const Record *R : Records.getAllDerivedDefinitions(ClassName: "Inst"))
2580	createIntrinsic(R, Out&: Defs);
2581
2582	for (auto *I : Defs)
2583	I->indexBody();
2584
2585	stable_sort(Range&: Defs, C: deref<std::less<>>());
2586
2587	// Only emit a def when its requirements have been met.
2588	// FIXME: This loop could be made faster, but it's fast enough for now.
2589	bool MadeProgress = true;
2590	std::string InGuard;
2591	while (!Defs.empty() && MadeProgress) {
2592	MadeProgress = false;
2593
2594	for (SmallVector<Intrinsic *, `128`>::iterator I = Defs.begin();
2595	I != Defs.end(); /No step/) {
2596	bool DependenciesSatisfied = true;
2597	for (auto II : (I)->getDependencies()) {
2598	if (is_contained(Range&: Defs, Element: II))
2599	DependenciesSatisfied = false;
2600	}
2601	if (!DependenciesSatisfied) {
2602	// Try the next one.
2603	++I;
2604	continue;
2605	}
2606
2607	// Emit #endif/#if pair if needed.
2608	if ((*I)->getArchGuard() != InGuard) {
2609	if (!InGuard.empty())
2610	OS << "#endif\n";
2611	InGuard = (*I)->getArchGuard();
2612	if (!InGuard.empty())
2613	OS << "#if " << InGuard << "\n";
2614	}
2615
2616	// Actually generate the intrinsic code.
2617	OS << (*I)->generate();
2618
2619	MadeProgress = true;
2620	I = Defs.erase(CI: I);
2621	}
2622	}
2623	assert(Defs.empty() && "Some requirements were not satisfied!");
2624	if (!InGuard.empty())
2625	OS << "#endif\n";
2626
2627	OS << "\n";
2628	OS << "#undef __ai\n\n";
2629	OS << "#endif /* __ARM_FP16_H */\n";
2630	}
2631
2632	void NeonEmitter::runVectorTypes(raw_ostream &OS) {
2633	OS << "/*===---- arm_vector_types - ARM vector type "
2634	"------===\n"
2635	" *\n"
2636	" *\n"
2637	" * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
2638	"Exceptions.\n"
2639	" * See https://llvm.org/LICENSE.txt for license information.\n"
2640	" * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
2641	" *\n"
2642	" *===-----------------------------------------------------------------"
2643	"------===\n"
2644	" */\n\n";
2645	OS << "#if !defined(__ARM_NEON_H) && !defined(__ARM_SVE_H)\n";
2646	OS << "#error \"This file should not be used standalone. Please include"
2647	" arm_neon.h or arm_sve.h instead\"\n\n";
2648	OS << "#endif\n";
2649	OS << "#ifndef __ARM_NEON_TYPES_H\n";
2650	OS << "#define __ARM_NEON_TYPES_H\n";
2651	OS << "typedef float float32_t;\n";
2652	OS << "typedef __fp16 float16_t;\n";
2653
2654	OS << "#if defined(__aarch64__) \|\| defined(__arm64ec__)\n";
2655	OS << "typedef __mfp8 mfloat8_t;\n";
2656	OS << "typedef double float64_t;\n";
2657	OS << "#endif\n\n";
2658
2659	OS << R"(
2660	typedef uint64_t fpm_t;
2661
2662	enum __ARM_FPM_FORMAT { __ARM_FPM_E5M2, __ARM_FPM_E4M3 };
2663
2664	enum __ARM_FPM_OVERFLOW { __ARM_FPM_INFNAN, __ARM_FPM_SATURATE };
2665
2666	static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2667	__arm_fpm_init(void) {
2668	return 0;
2669	}
2670
2671	static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2672	__arm_set_fpm_src1_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
2673	return (__fpm & ~7ull) \| (fpm_t)__format;
2674	}
2675
2676	static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2677	__arm_set_fpm_src2_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
2678	return (__fpm & ~0x38ull) \| ((fpm_t)__format << 3u);
2679	}
2680
2681	static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2682	__arm_set_fpm_dst_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
2683	return (__fpm & ~0x1c0ull) \| ((fpm_t)__format << 6u);
2684	}
2685
2686	static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2687	__arm_set_fpm_overflow_mul(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) {
2688	return (__fpm & ~0x4000ull) \| ((fpm_t)__behaviour << 14u);
2689	}
2690
2691	static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2692	__arm_set_fpm_overflow_cvt(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) {
2693	return (__fpm & ~0x8000ull) \| ((fpm_t)__behaviour << 15u);
2694	}
2695
2696	static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2697	__arm_set_fpm_lscale(fpm_t __fpm, uint64_t __scale) {
2698	return (__fpm & ~0x7f0000ull) \| (__scale << 16u);
2699	}
2700
2701	static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2702	__arm_set_fpm_nscale(fpm_t __fpm, int64_t __scale) {
2703	return (__fpm & ~0xff000000ull) \| (((fpm_t)__scale & 0xffu) << 24u);
2704	}
2705
2706	static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2707	__arm_set_fpm_lscale2(fpm_t __fpm, uint64_t __scale) {
2708	return (uint32_t)__fpm \| (__scale << 32u);
2709	}
2710
2711	)";
2712
2713	emitNeonTypeDefs(types: "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlmQmhQhfQfdQd", OS);
2714
2715	emitNeonTypeDefs(types: "bQb", OS);
2716	OS << "#endif // __ARM_NEON_TYPES_H\n";
2717	}
2718
2719	void NeonEmitter::runBF16(raw_ostream &OS) {
2720	OS << "/*===---- arm_bf16.h - ARM BF16 intrinsics "
2721	"-----------------------------------===\n"
2722	" *\n"
2723	" *\n"
2724	" * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
2725	"Exceptions.\n"
2726	" * See https://llvm.org/LICENSE.txt for license information.\n"
2727	" * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
2728	" *\n"
2729	" *===-----------------------------------------------------------------"
2730	"------===\n"
2731	" */\n\n";
2732
2733	OS << "#ifndef __ARM_BF16_H\n";
2734	OS << "#define __ARM_BF16_H\n\n";
2735
2736	OS << "typedef __bf16 bfloat16_t;\n";
2737
2738	OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
2739	"__nodebug__))\n\n";
2740
2741	SmallVector<Intrinsic *, `128`> Defs;
2742	for (const Record *R : Records.getAllDerivedDefinitions(ClassName: "Inst"))
2743	createIntrinsic(R, Out&: Defs);
2744
2745	for (auto *I : Defs)
2746	I->indexBody();
2747
2748	stable_sort(Range&: Defs, C: deref<std::less<>>());
2749
2750	// Only emit a def when its requirements have been met.
2751	// FIXME: This loop could be made faster, but it's fast enough for now.
2752	bool MadeProgress = true;
2753	std::string InGuard;
2754	while (!Defs.empty() && MadeProgress) {
2755	MadeProgress = false;
2756
2757	for (SmallVector<Intrinsic *, `128`>::iterator I = Defs.begin();
2758	I != Defs.end(); /No step/) {
2759	bool DependenciesSatisfied = true;
2760	for (auto II : (I)->getDependencies()) {
2761	if (is_contained(Range&: Defs, Element: II))
2762	DependenciesSatisfied = false;
2763	}
2764	if (!DependenciesSatisfied) {
2765	// Try the next one.
2766	++I;
2767	continue;
2768	}
2769
2770	// Emit #endif/#if pair if needed.
2771	if ((*I)->getArchGuard() != InGuard) {
2772	if (!InGuard.empty())
2773	OS << "#endif\n";
2774	InGuard = (*I)->getArchGuard();
2775	if (!InGuard.empty())
2776	OS << "#if " << InGuard << "\n";
2777	}
2778
2779	// Actually generate the intrinsic code.
2780	OS << (*I)->generate();
2781
2782	MadeProgress = true;
2783	I = Defs.erase(CI: I);
2784	}
2785	}
2786	assert(Defs.empty() && "Some requirements were not satisfied!");
2787	if (!InGuard.empty())
2788	OS << "#endif\n";
2789
2790	OS << "\n";
2791	OS << "#undef __ai\n\n";
2792
2793	OS << "#endif\n";
2794	}
2795
2796	void clang::EmitNeon(const RecordKeeper &Records, raw_ostream &OS) {
2797	NeonEmitter (Records).run(OS);
2798	}
2799
2800	void clang::EmitFP16(const RecordKeeper &Records, raw_ostream &OS) {
2801	NeonEmitter (Records).runFP16(OS);
2802	}
2803
2804	void clang::EmitBF16(const RecordKeeper &Records, raw_ostream &OS) {
2805	NeonEmitter (Records).runBF16(OS);
2806	}
2807
2808	void clang::EmitNeonSema(const RecordKeeper &Records, raw_ostream &OS) {
2809	NeonEmitter (Records).runHeader(OS);
2810	}
2811
2812	void clang::EmitVectorTypes(const RecordKeeper &Records, raw_ostream &OS) {
2813	NeonEmitter (Records).runVectorTypes(OS);
2814	}
2815
2816	void clang::EmitNeonTest(const RecordKeeper &Records, raw_ostream &OS) {
2817	llvm_unreachable("Neon test generation no longer implemented!");
2818	}
2819

Browse the source code of llvm_projects/clang/utils/TableGen/NeonEmitter.cpp