1//===-- NeonEmitter.cpp - Generate arm_neon.h for use with clang ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This tablegen backend is responsible for emitting arm_neon.h, which includes
10// a declaration and definition of each function specified by the ARM NEON
11// compiler interface. See ARM document DUI0348B.
12//
13// Each NEON instruction is implemented in terms of 1 or more functions which
14// are suffixed with the element type of the input vectors. Functions may be
15// implemented in terms of generic vector operations such as +, *, -, etc. or
16// by calling a __builtin_-prefixed function which will be handled by clang's
17// CodeGen library.
18//
19// Additional validation code can be generated by this file when runHeader() is
20// called, rather than the normal run() entry point.
21//
22// See also the documentation in include/clang/Basic/arm_neon.td.
23//
24//===----------------------------------------------------------------------===//
25
26#include "TableGenBackends.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/SmallVector.h"
31#include "llvm/ADT/StringExtras.h"
32#include "llvm/ADT/StringRef.h"
33#include "llvm/Support/Casting.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Support/raw_ostream.h"
36#include "llvm/TableGen/AArch64ImmCheck.h"
37#include "llvm/TableGen/Error.h"
38#include "llvm/TableGen/Record.h"
39#include "llvm/TableGen/SetTheory.h"
40#include "llvm/TableGen/StringToOffsetTable.h"
41#include <algorithm>
42#include <cassert>
43#include <cctype>
44#include <cstddef>
45#include <cstdint>
46#include <deque>
47#include <map>
48#include <optional>
49#include <set>
50#include <sstream>
51#include <string>
52#include <unordered_map>
53#include <utility>
54#include <vector>
55
56using namespace llvm;
57
58namespace {
59
60// While globals are generally bad, this one allows us to perform assertions
61// liberally and somehow still trace them back to the def they indirectly
62// came from.
63static const Record *CurrentRecord = nullptr;
64static void assert_with_loc(bool Assertion, const std::string &Str) {
65 if (!Assertion) {
66 if (CurrentRecord)
67 PrintFatalError(ErrorLoc: CurrentRecord->getLoc(), Msg: Str);
68 else
69 PrintFatalError(Msg: Str);
70 }
71}
72
73enum ClassKind {
74 ClassNone,
75 ClassI, // generic integer instruction, e.g., "i8" suffix
76 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
77 ClassW, // width-specific instruction, e.g., "8" suffix
78 ClassV, // void-suffix instruction, no suffix
79 ClassB, // bitcast arguments with enum argument to specify type
80 ClassL, // Logical instructions which are op instructions
81 // but we need to not emit any suffix for in our
82 // tests.
83 ClassNoTest // Instructions which we do not test since they are
84 // not TRUE instructions.
85};
86
87/// NeonTypeFlags - Flags to identify the types for overloaded Neon
88/// builtins. These must be kept in sync with the flags in
89/// include/clang/Basic/TargetBuiltins.h.
90namespace NeonTypeFlags {
91
92enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 };
93
94enum EltType {
95 Int8,
96 Int16,
97 Int32,
98 Int64,
99 Poly8,
100 Poly16,
101 Poly64,
102 Poly128,
103 Float16,
104 Float32,
105 Float64,
106 BFloat16,
107 MFloat8
108};
109
110} // end namespace NeonTypeFlags
111
112class NeonEmitter;
113
114//===----------------------------------------------------------------------===//
115// TypeSpec
116//===----------------------------------------------------------------------===//
117
118/// A TypeSpec is just a simple wrapper around a string, but gets its own type
119/// for strong typing purposes.
120///
121/// A TypeSpec can be used to create a type.
122class TypeSpec : public std::string {
123public:
124 static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) {
125 std::vector<TypeSpec> Ret;
126 TypeSpec Acc;
127 for (char I : Str.str()) {
128 if (islower(I)) {
129 Acc.push_back(c: I);
130 Ret.push_back(x: TypeSpec(Acc));
131 Acc.clear();
132 } else {
133 Acc.push_back(c: I);
134 }
135 }
136 return Ret;
137 }
138};
139
140//===----------------------------------------------------------------------===//
141// Type
142//===----------------------------------------------------------------------===//
143
144/// A Type. Not much more to say here.
145class Type {
146private:
147 TypeSpec TS;
148
149 enum TypeKind { Void, Float, SInt, UInt, Poly, BFloat16, MFloat8, FPM };
150 TypeKind Kind;
151 bool Immediate, Constant, Pointer;
152 // ScalarForMangling and NoManglingQ are really not suited to live here as
153 // they are not related to the type. But they live in the TypeSpec (not the
154 // prototype), so this is really the only place to store them.
155 bool ScalarForMangling, NoManglingQ;
156 unsigned Bitwidth, ElementBitwidth, NumVectors;
157
158public:
159 Type()
160 : Kind(Void), Immediate(false), Constant(false),
161 Pointer(false), ScalarForMangling(false), NoManglingQ(false),
162 Bitwidth(0), ElementBitwidth(0), NumVectors(0) {}
163
164 Type(TypeSpec TS, StringRef CharMods)
165 : TS(std::move(TS)), Kind(Void), Immediate(false),
166 Constant(false), Pointer(false), ScalarForMangling(false),
167 NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {
168 applyModifiers(Mods: CharMods);
169 }
170
171 /// Returns a type representing "void".
172 static Type getVoid() { return Type(); }
173
174 bool operator==(const Type &Other) const { return str() == Other.str(); }
175 bool operator!=(const Type &Other) const { return !operator==(Other); }
176
177 //
178 // Query functions
179 //
180 bool isScalarForMangling() const { return ScalarForMangling; }
181 bool noManglingQ() const { return NoManglingQ; }
182
183 bool isPointer() const { return Pointer; }
184 bool isValue() const { return !isVoid() && !isPointer(); }
185 bool isScalar() const { return isValue() && NumVectors == 0; }
186 bool isVector() const { return isValue() && NumVectors > 0; }
187 bool isConstPointer() const { return Constant; }
188 bool isFloating() const { return Kind == Float; }
189 bool isInteger() const { return Kind == SInt || Kind == UInt; }
190 bool isPoly() const { return Kind == Poly; }
191 bool isSigned() const { return Kind == SInt; }
192 bool isImmediate() const { return Immediate; }
193 bool isFloat() const { return isFloating() && ElementBitwidth == 32; }
194 bool isDouble() const { return isFloating() && ElementBitwidth == 64; }
195 bool isHalf() const { return isFloating() && ElementBitwidth == 16; }
196 bool isChar() const { return ElementBitwidth == 8; }
197 bool isShort() const { return isInteger() && ElementBitwidth == 16; }
198 bool isInt() const { return isInteger() && ElementBitwidth == 32; }
199 bool isLong() const { return isInteger() && ElementBitwidth == 64; }
200 bool isVoid() const { return Kind == Void; }
201 bool isBFloat16() const { return Kind == BFloat16; }
202 bool isMFloat8() const { return Kind == MFloat8; }
203 bool isFPM() const { return Kind == FPM; }
204 unsigned getNumElements() const { return Bitwidth / ElementBitwidth; }
205 unsigned getSizeInBits() const { return Bitwidth; }
206 unsigned getElementSizeInBits() const { return ElementBitwidth; }
207 unsigned getNumVectors() const { return NumVectors; }
208
209 //
210 // Mutator functions
211 //
212 void makeUnsigned() {
213 assert(!isVoid() && "not a potentially signed type");
214 Kind = UInt;
215 }
216 void makeSigned() {
217 assert(!isVoid() && "not a potentially signed type");
218 Kind = SInt;
219 }
220
221 void makeInteger(unsigned ElemWidth, bool Sign) {
222 assert(!isVoid() && "converting void to int probably not useful");
223 Kind = Sign ? SInt : UInt;
224 Immediate = false;
225 ElementBitwidth = ElemWidth;
226 }
227
228 void makeImmediate(unsigned ElemWidth) {
229 Kind = SInt;
230 Immediate = true;
231 ElementBitwidth = ElemWidth;
232 }
233
234 void makeScalar() {
235 Bitwidth = ElementBitwidth;
236 NumVectors = 0;
237 }
238
239 void makeOneVector() {
240 assert(isVector());
241 NumVectors = 1;
242 }
243
244 void make32BitElement() {
245 assert_with_loc(Assertion: Bitwidth > 32, Str: "Not enough bits to make it 32!");
246 ElementBitwidth = 32;
247 }
248
249 void doubleLanes() {
250 assert_with_loc(Assertion: Bitwidth != 128, Str: "Can't get bigger than 128!");
251 Bitwidth = 128;
252 }
253
254 void halveLanes() {
255 assert_with_loc(Assertion: Bitwidth != 64, Str: "Can't get smaller than 64!");
256 Bitwidth = 64;
257 }
258
259 /// Return the C string representation of a type, which is the typename
260 /// defined in stdint.h or arm_neon.h.
261 std::string str() const;
262
263 /// Return the string representation of a type, which is an encoded
264 /// string for passing to the BUILTIN() macro in Builtins.def.
265 std::string builtin_str() const;
266
267 /// Return the value in NeonTypeFlags for this type.
268 unsigned getNeonEnum() const;
269
270 /// Parse a type from a stdint.h or arm_neon.h typedef name,
271 /// for example uint32x2_t or int64_t.
272 static Type fromTypedefName(StringRef Name);
273
274private:
275 /// Creates the type based on the typespec string in TS.
276 /// Sets "Quad" to true if the "Q" or "H" modifiers were
277 /// seen. This is needed by applyModifier as some modifiers
278 /// only take effect if the type size was changed by "Q" or "H".
279 void applyTypespec(bool &Quad);
280 /// Applies prototype modifiers to the type.
281 void applyModifiers(StringRef Mods);
282};
283
284//===----------------------------------------------------------------------===//
285// Variable
286//===----------------------------------------------------------------------===//
287
288/// A variable is a simple class that just has a type and a name.
289class Variable {
290 Type T;
291 std::string N;
292
293public:
294 Variable() : T(Type::getVoid()) {}
295 Variable(Type T, std::string N) : T(std::move(T)), N(std::move(N)) {}
296
297 Type getType() const { return T; }
298 std::string getName() const { return "__" + N; }
299};
300
301//===----------------------------------------------------------------------===//
302// Intrinsic
303//===----------------------------------------------------------------------===//
304
305/// The main grunt class. This represents an instantiation of an intrinsic with
306/// a particular typespec and prototype.
307class Intrinsic {
308 /// The Record this intrinsic was created from.
309 const Record *R;
310 /// The unmangled name.
311 std::string Name;
312 /// The input and output typespecs. InTS == OutTS except when
313 /// CartesianProductWith is non-empty - this is the case for vreinterpret.
314 TypeSpec OutTS, InTS;
315 /// The base class kind. Most intrinsics use ClassS, which has full type
316 /// info for integers (s32/u32). Some use ClassI, which doesn't care about
317 /// signedness (i32), while some (ClassB) have no type at all, only a width
318 /// (32).
319 ClassKind CK;
320 /// The list of DAGs for the body. May be empty, in which case we should
321 /// emit a builtin call.
322 const ListInit *Body;
323 /// The architectural ifdef guard.
324 std::string ArchGuard;
325 /// The architectural target() guard.
326 std::string TargetGuard;
327 /// Set if the Unavailable bit is 1. This means we don't generate a body,
328 /// just an "unavailable" attribute on a declaration.
329 bool IsUnavailable;
330 /// Is this intrinsic safe for big-endian? or does it need its arguments
331 /// reversing?
332 bool BigEndianSafe;
333
334 /// The types of return value [0] and parameters [1..].
335 std::vector<Type> Types;
336
337 SmallVector<ImmCheck, 2> ImmChecks;
338 /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls.
339 int PolymorphicKeyType;
340 /// The local variables defined.
341 std::map<std::string, Variable, std::less<>> Variables;
342 /// NeededEarly - set if any other intrinsic depends on this intrinsic.
343 bool NeededEarly;
344 /// UseMacro - set if we should implement using a macro or unset for a
345 /// function.
346 bool UseMacro;
347 /// The set of intrinsics that this intrinsic uses/requires.
348 std::set<Intrinsic *> Dependencies;
349 /// The "base type", which is Type('d', OutTS). InBaseType is only
350 /// different if CartesianProductWith is non-empty (for vreinterpret).
351 Type BaseType, InBaseType;
352 /// The return variable.
353 Variable RetVar;
354 /// A postfix to apply to every variable. Defaults to "".
355 std::string VariablePostfix;
356
357 NeonEmitter &Emitter;
358 std::stringstream OS;
359
360 bool isBigEndianSafe() const {
361 if (BigEndianSafe)
362 return true;
363
364 for (const auto &T : Types){
365 if (T.isVector() && T.getNumElements() > 1)
366 return false;
367 }
368 return true;
369 }
370
371public:
372 Intrinsic(const Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
373 TypeSpec InTS, ClassKind CK, const ListInit *Body,
374 NeonEmitter &Emitter, StringRef ArchGuard, StringRef TargetGuard,
375 bool IsUnavailable, bool BigEndianSafe)
376 : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body),
377 ArchGuard(ArchGuard.str()), TargetGuard(TargetGuard.str()),
378 IsUnavailable(IsUnavailable), BigEndianSafe(BigEndianSafe),
379 PolymorphicKeyType(0), NeededEarly(false), UseMacro(false),
380 BaseType(OutTS, "."), InBaseType(InTS, "."), Emitter(Emitter) {
381 // Modify the TypeSpec per-argument to get a concrete Type, and create
382 // known variables for each.
383 // Types[0] is the return value.
384 unsigned Pos = 0;
385 Types.emplace_back(args&: OutTS, args: getNextModifiers(Proto, Pos));
386 StringRef Mods = getNextModifiers(Proto, Pos);
387 while (!Mods.empty()) {
388 Types.emplace_back(args&: InTS, args&: Mods);
389 if (Mods.contains(C: '!'))
390 PolymorphicKeyType = Types.size() - 1;
391
392 Mods = getNextModifiers(Proto, Pos);
393 }
394
395 for (const auto &Type : Types) {
396 // If this builtin takes an immediate argument, we need to #define it rather
397 // than use a standard declaration, so that SemaChecking can range check
398 // the immediate passed by the user.
399
400 // Pointer arguments need to use macros to avoid hiding aligned attributes
401 // from the pointer type.
402
403 // It is not permitted to pass or return an __fp16 by value, so intrinsics
404 // taking a scalar float16_t must be implemented as macros.
405 if (Type.isImmediate() || Type.isPointer() ||
406 (Type.isScalar() && Type.isHalf()))
407 UseMacro = true;
408 }
409
410 int ArgIdx, Kind, TypeArgIdx;
411 for (const Record *I : R->getValueAsListOfDefs(FieldName: "ImmChecks")) {
412 unsigned EltSizeInBits = 0, VecSizeInBits = 0;
413
414 ArgIdx = I->getValueAsInt(FieldName: "ImmArgIdx");
415 TypeArgIdx = I->getValueAsInt(FieldName: "TypeContextArgIdx");
416 Kind = I->getValueAsDef(FieldName: "Kind")->getValueAsInt(FieldName: "Value");
417
418 assert((ArgIdx >= 0 && Kind >= 0) &&
419 "ImmArgIdx and Kind must be nonnegative");
420
421 if (TypeArgIdx >= 0) {
422 Type ContextType = getParamType(I: TypeArgIdx);
423
424 // Element size cannot be set for intrinscs that map to polymorphic
425 // builtins.
426 if (CK != ClassB)
427 EltSizeInBits = ContextType.getElementSizeInBits();
428
429 VecSizeInBits = ContextType.getSizeInBits();
430 }
431
432 ImmChecks.emplace_back(Args&: ArgIdx, Args&: Kind, Args&: EltSizeInBits, Args&: VecSizeInBits);
433 }
434 sort(Start: ImmChecks.begin(), End: ImmChecks.end(),
435 Comp: [](const ImmCheck &a, const ImmCheck &b) {
436 return a.getImmArgIdx() < b.getImmArgIdx();
437 }); // Sort for comparison with other intrinsics which map to the
438 // same builtin
439 }
440
441 /// Get the Record that this intrinsic is based off.
442 const Record *getRecord() const { return R; }
443 /// Get the set of Intrinsics that this intrinsic calls.
444 /// this is the set of immediate dependencies, NOT the
445 /// transitive closure.
446 const std::set<Intrinsic *> &getDependencies() const { return Dependencies; }
447 /// Get the architectural guard string (#ifdef).
448 std::string getArchGuard() const { return ArchGuard; }
449 std::string getTargetGuard() const { return TargetGuard; }
450 ArrayRef<ImmCheck> getImmChecks() const { return ImmChecks; }
451 /// Get the non-mangled name.
452 std::string getName() const { return Name; }
453
454 /// Return true if the intrinsic takes an immediate operand.
455 bool hasImmediate() const {
456 return any_of(Range: Types, P: [](const Type &T) { return T.isImmediate(); });
457 }
458
459 // Return if the supplied argument is an immediate
460 bool isArgImmediate(unsigned idx) const {
461 return Types[idx + 1].isImmediate();
462 }
463
464 unsigned getNumParams() const { return Types.size() - 1; }
465 Type getReturnType() const { return Types[0]; }
466 Type getParamType(unsigned I) const { return Types[I + 1]; }
467 Type getBaseType() const { return BaseType; }
468 Type getPolymorphicKeyType() const { return Types[PolymorphicKeyType]; }
469
470 /// Return true if the prototype has a scalar argument.
471 bool protoHasScalar() const;
472
473 /// Return the index that parameter PIndex will sit at
474 /// in a generated function call. This is often just PIndex,
475 /// but may not be as things such as multiple-vector operands
476 /// and sret parameters need to be taken into account.
477 unsigned getGeneratedParamIdx(unsigned PIndex) {
478 unsigned Idx = 0;
479 if (getReturnType().getNumVectors() > 1)
480 // Multiple vectors are passed as sret.
481 ++Idx;
482
483 for (unsigned I = 0; I < PIndex; ++I)
484 Idx += std::max(a: 1U, b: getParamType(I).getNumVectors());
485
486 return Idx;
487 }
488
489 bool hasBody() const { return Body && !Body->empty(); }
490
491 void setNeededEarly() { NeededEarly = true; }
492
493 bool operator<(const Intrinsic &Other) const {
494 // Sort lexicographically on a three-tuple (ArchGuard, TargetGuard, Name)
495 return std::tie(args: ArchGuard, args: TargetGuard, args: Name) <
496 std::tie(args: Other.ArchGuard, args: Other.TargetGuard, args: Other.Name);
497 }
498
499 ClassKind getClassKind(bool UseClassBIfScalar = false) {
500 if (UseClassBIfScalar && !protoHasScalar())
501 return ClassB;
502 return CK;
503 }
504
505 /// Return the name, mangled with type information.
506 /// If ForceClassS is true, use ClassS (u32/s32) instead
507 /// of the intrinsic's own type class.
508 std::string getMangledName(bool ForceClassS = false) const;
509 /// Return the type code for a builtin function call.
510 std::string getInstTypeCode(Type T, ClassKind CK) const;
511 /// Return the type string for a BUILTIN() macro in Builtins.def.
512 std::string getBuiltinTypeStr();
513
514 /// Generate the intrinsic, returning code.
515 std::string generate();
516 /// Perform type checking and populate the dependency graph, but
517 /// don't generate code yet.
518 void indexBody();
519
520private:
521 StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const;
522
523 std::string mangleName(std::string Name, ClassKind CK) const;
524
525 void initVariables();
526 std::string replaceParamsIn(std::string S);
527
528 void emitBodyAsBuiltinCall();
529
530 void generateImpl(bool ReverseArguments,
531 StringRef NamePrefix, StringRef CallPrefix);
532 void emitReturn();
533 void emitBody(StringRef CallPrefix);
534 void emitShadowedArgs();
535 void emitArgumentReversal();
536 void emitReturnVarDecl();
537 void emitReturnReversal();
538 void emitReverseVariable(Variable &Dest, Variable &Src);
539 void emitNewLine();
540 void emitClosingBrace();
541 void emitOpeningBrace();
542 void emitPrototype(StringRef NamePrefix);
543
544 class DagEmitter {
545 Intrinsic &Intr;
546 StringRef CallPrefix;
547
548 public:
549 DagEmitter(Intrinsic &Intr, StringRef CallPrefix) :
550 Intr(Intr), CallPrefix(CallPrefix) {
551 }
552 std::pair<Type, std::string> emitDagArg(const Init *Arg,
553 std::string ArgName);
554 std::pair<Type, std::string> emitDagSaveTemp(const DagInit *DI);
555 std::pair<Type, std::string> emitDagSplat(const DagInit *DI);
556 std::pair<Type, std::string> emitDagDup(const DagInit *DI);
557 std::pair<Type, std::string> emitDagDupTyped(const DagInit *DI);
558 std::pair<Type, std::string> emitDagShuffle(const DagInit *DI);
559 std::pair<Type, std::string> emitDagCast(const DagInit *DI, bool IsBitCast);
560 std::pair<Type, std::string> emitDagCall(const DagInit *DI,
561 bool MatchMangledName);
562 std::pair<Type, std::string> emitDagNameReplace(const DagInit *DI);
563 std::pair<Type, std::string> emitDagLiteral(const DagInit *DI);
564 std::pair<Type, std::string> emitDagOp(const DagInit *DI);
565 std::pair<Type, std::string> emitDag(const DagInit *DI);
566 };
567};
568
569//===----------------------------------------------------------------------===//
570// NeonEmitter
571//===----------------------------------------------------------------------===//
572
573class NeonEmitter {
574 const RecordKeeper &Records;
575 DenseMap<const Record *, ClassKind> ClassMap;
576 std::map<std::string, std::deque<Intrinsic>, std::less<>> IntrinsicMap;
577 unsigned UniqueNumber;
578
579 void createIntrinsic(const Record *R, SmallVectorImpl<Intrinsic *> &Out);
580 void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs);
581 void genStreamingSVECompatibleList(raw_ostream &OS,
582 SmallVectorImpl<Intrinsic *> &Defs);
583 void genOverloadTypeCheckCode(raw_ostream &OS,
584 SmallVectorImpl<Intrinsic *> &Defs);
585 bool areRangeChecksCompatible(const ArrayRef<ImmCheck> ChecksA,
586 const ArrayRef<ImmCheck> ChecksB);
587 void genIntrinsicRangeCheckCode(raw_ostream &OS,
588 SmallVectorImpl<Intrinsic *> &Defs);
589
590public:
591 /// Called by Intrinsic - this attempts to get an intrinsic that takes
592 /// the given types as arguments.
593 Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types,
594 std::optional<std::string> MangledName);
595
596 /// Called by Intrinsic - returns a globally-unique number.
597 unsigned getUniqueNumber() { return UniqueNumber++; }
598
599 NeonEmitter(const RecordKeeper &R) : Records(R), UniqueNumber(0) {
600 const Record *SI = R.getClass(Name: "SInst");
601 const Record *II = R.getClass(Name: "IInst");
602 const Record *WI = R.getClass(Name: "WInst");
603 const Record *VI = R.getClass(Name: "VInst");
604 const Record *SOpI = R.getClass(Name: "SOpInst");
605 const Record *IOpI = R.getClass(Name: "IOpInst");
606 const Record *WOpI = R.getClass(Name: "WOpInst");
607 const Record *LOpI = R.getClass(Name: "LOpInst");
608 const Record *NoTestOpI = R.getClass(Name: "NoTestOpInst");
609
610 ClassMap[SI] = ClassS;
611 ClassMap[II] = ClassI;
612 ClassMap[WI] = ClassW;
613 ClassMap[VI] = ClassV;
614 ClassMap[SOpI] = ClassS;
615 ClassMap[IOpI] = ClassI;
616 ClassMap[WOpI] = ClassW;
617 ClassMap[LOpI] = ClassL;
618 ClassMap[NoTestOpI] = ClassNoTest;
619 }
620
621 // Emit arm_neon.h.inc
622 void run(raw_ostream &o);
623
624 // Emit arm_fp16.h.inc
625 void runFP16(raw_ostream &o);
626
627 // Emit arm_bf16.h.inc
628 void runBF16(raw_ostream &o);
629
630 void runVectorTypes(raw_ostream &o);
631
632 // Emit all the __builtin prototypes used in arm_neon.h, arm_fp16.h and
633 // arm_bf16.h
634 void runHeader(raw_ostream &o);
635};
636
637} // end anonymous namespace
638
639//===----------------------------------------------------------------------===//
640// Type implementation
641//===----------------------------------------------------------------------===//
642
643std::string Type::str() const {
644 if (isVoid())
645 return "void";
646 if (isFPM())
647 return "fpm_t";
648
649 std::string S;
650
651 if (isInteger() && !isSigned())
652 S += "u";
653
654 if (isPoly())
655 S += "poly";
656 else if (isFloating())
657 S += "float";
658 else if (isBFloat16())
659 S += "bfloat";
660 else if (isMFloat8())
661 S += "mfloat";
662 else
663 S += "int";
664
665 S += utostr(X: ElementBitwidth);
666 if (isVector())
667 S += "x" + utostr(X: getNumElements());
668 if (NumVectors > 1)
669 S += "x" + utostr(X: NumVectors);
670 S += "_t";
671
672 if (Constant)
673 S += " const";
674 if (Pointer)
675 S += " *";
676
677 return S;
678}
679
680std::string Type::builtin_str() const {
681 std::string S;
682 if (isVoid())
683 return "v";
684
685 if (isPointer()) {
686 // All pointers are void pointers.
687 S = "v";
688 if (isConstPointer())
689 S += "C";
690 S += "*";
691 return S;
692 } else if (isInteger())
693 switch (ElementBitwidth) {
694 case 8: S += "c"; break;
695 case 16: S += "s"; break;
696 case 32: S += "i"; break;
697 case 64: S += "Wi"; break;
698 case 128: S += "LLLi"; break;
699 default: llvm_unreachable("Unhandled case!");
700 }
701 else if (isBFloat16()) {
702 assert(ElementBitwidth == 16 && "BFloat16 can only be 16 bits");
703 S += "y";
704 } else if (isMFloat8()) {
705 assert(ElementBitwidth == 8 && "MFloat8 can only be 8 bits");
706 S += "m";
707 } else if (isFPM()) {
708 S += "UWi";
709 } else
710 switch (ElementBitwidth) {
711 case 16: S += "h"; break;
712 case 32: S += "f"; break;
713 case 64: S += "d"; break;
714 default: llvm_unreachable("Unhandled case!");
715 }
716
717 // FIXME: NECESSARY???????????????????????????????????????????????????????????????????????
718 if (isChar() && !isPointer() && isSigned())
719 // Make chars explicitly signed.
720 S = "S" + S;
721 else if (isInteger() && !isSigned())
722 S = "U" + S;
723
724 // Constant indices are "int", but have the "constant expression" modifier.
725 if (isImmediate()) {
726 assert(isInteger() && isSigned());
727 S = "I" + S;
728 }
729
730 if (isScalar())
731 return S;
732
733 std::string Ret;
734 for (unsigned I = 0; I < NumVectors; ++I)
735 Ret += "V" + utostr(X: getNumElements()) + S;
736
737 return Ret;
738}
739
740unsigned Type::getNeonEnum() const {
741 unsigned Addend;
742 switch (ElementBitwidth) {
743 case 8: Addend = 0; break;
744 case 16: Addend = 1; break;
745 case 32: Addend = 2; break;
746 case 64: Addend = 3; break;
747 case 128: Addend = 4; break;
748 default: llvm_unreachable("Unhandled element bitwidth!");
749 }
750
751 unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend;
752 if (isPoly()) {
753 // Adjustment needed because Poly32 doesn't exist.
754 if (Addend >= 2)
755 --Addend;
756 Base = (unsigned)NeonTypeFlags::Poly8 + Addend;
757 }
758 if (isFloating()) {
759 assert(Addend != 0 && "Float8 doesn't exist!");
760 Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1);
761 }
762
763 if (isBFloat16()) {
764 assert(Addend == 1 && "BFloat16 is only 16 bit");
765 Base = (unsigned)NeonTypeFlags::BFloat16;
766 }
767
768 if (isMFloat8()) {
769 Base = (unsigned)NeonTypeFlags::MFloat8;
770 }
771
772 if (Bitwidth == 128)
773 Base |= (unsigned)NeonTypeFlags::QuadFlag;
774 if (isInteger() && !isSigned())
775 Base |= (unsigned)NeonTypeFlags::UnsignedFlag;
776
777 return Base;
778}
779
780Type Type::fromTypedefName(StringRef Name) {
781 Type T;
782 T.Kind = SInt;
783
784 if (Name.consume_front(Prefix: "u"))
785 T.Kind = UInt;
786
787 if (Name.consume_front(Prefix: "float")) {
788 T.Kind = Float;
789 } else if (Name.consume_front(Prefix: "poly")) {
790 T.Kind = Poly;
791 } else if (Name.consume_front(Prefix: "bfloat")) {
792 T.Kind = BFloat16;
793 } else if (Name.consume_front(Prefix: "mfloat")) {
794 T.Kind = MFloat8;
795 } else {
796 assert(Name.starts_with("int"));
797 Name = Name.drop_front(N: 3);
798 }
799
800 unsigned I = 0;
801 for (I = 0; I < Name.size(); ++I) {
802 if (!isdigit(Name[I]))
803 break;
804 }
805 Name.substr(Start: 0, N: I).getAsInteger(Radix: 10, Result&: T.ElementBitwidth);
806 Name = Name.drop_front(N: I);
807
808 T.Bitwidth = T.ElementBitwidth;
809 T.NumVectors = 1;
810
811 if (Name.consume_front(Prefix: "x")) {
812 unsigned I = 0;
813 for (I = 0; I < Name.size(); ++I) {
814 if (!isdigit(Name[I]))
815 break;
816 }
817 unsigned NumLanes;
818 Name.substr(Start: 0, N: I).getAsInteger(Radix: 10, Result&: NumLanes);
819 Name = Name.drop_front(N: I);
820 T.Bitwidth = T.ElementBitwidth * NumLanes;
821 } else {
822 // Was scalar.
823 T.NumVectors = 0;
824 }
825 if (Name.consume_front(Prefix: "x")) {
826 unsigned I = 0;
827 for (I = 0; I < Name.size(); ++I) {
828 if (!isdigit(Name[I]))
829 break;
830 }
831 Name.substr(Start: 0, N: I).getAsInteger(Radix: 10, Result&: T.NumVectors);
832 Name = Name.drop_front(N: I);
833 }
834
835 assert(Name.starts_with("_t") && "Malformed typedef!");
836 return T;
837}
838
839void Type::applyTypespec(bool &Quad) {
840 std::string S = TS;
841 ScalarForMangling = false;
842 Kind = SInt;
843 ElementBitwidth = ~0U;
844 NumVectors = 1;
845
846 for (char I : S) {
847 switch (I) {
848 case 'S':
849 ScalarForMangling = true;
850 break;
851 case 'H':
852 NoManglingQ = true;
853 Quad = true;
854 break;
855 case 'Q':
856 Quad = true;
857 break;
858 case 'P':
859 Kind = Poly;
860 break;
861 case 'U':
862 Kind = UInt;
863 break;
864 case 'c':
865 ElementBitwidth = 8;
866 break;
867 case 'h':
868 Kind = Float;
869 [[fallthrough]];
870 case 's':
871 ElementBitwidth = 16;
872 break;
873 case 'f':
874 Kind = Float;
875 [[fallthrough]];
876 case 'i':
877 ElementBitwidth = 32;
878 break;
879 case 'd':
880 Kind = Float;
881 [[fallthrough]];
882 case 'l':
883 ElementBitwidth = 64;
884 break;
885 case 'k':
886 ElementBitwidth = 128;
887 // Poly doesn't have a 128x1 type.
888 if (isPoly())
889 NumVectors = 0;
890 break;
891 case 'b':
892 Kind = BFloat16;
893 ElementBitwidth = 16;
894 break;
895 case 'm':
896 Kind = MFloat8;
897 ElementBitwidth = 8;
898 break;
899 default:
900 llvm_unreachable("Unhandled type code!");
901 }
902 }
903 assert(ElementBitwidth != ~0U && "Bad element bitwidth!");
904
905 Bitwidth = Quad ? 128 : 64;
906}
907
908void Type::applyModifiers(StringRef Mods) {
909 bool AppliedQuad = false;
910 applyTypespec(Quad&: AppliedQuad);
911
912 for (char Mod : Mods) {
913 switch (Mod) {
914 case '.':
915 break;
916 case 'v':
917 Kind = Void;
918 break;
919 case 'S':
920 Kind = SInt;
921 break;
922 case 'U':
923 Kind = UInt;
924 break;
925 case 'B':
926 Kind = BFloat16;
927 ElementBitwidth = 16;
928 break;
929 case 'F':
930 Kind = Float;
931 break;
932 case 'P':
933 Kind = Poly;
934 break;
935 case 'V':
936 Kind = FPM;
937 Bitwidth = ElementBitwidth = 64;
938 NumVectors = 0;
939 Immediate = Constant = Pointer = false;
940 ScalarForMangling = NoManglingQ = true;
941 break;
942 case '>':
943 assert(ElementBitwidth < 128);
944 ElementBitwidth *= 2;
945 break;
946 case '<':
947 assert(ElementBitwidth > 8);
948 ElementBitwidth /= 2;
949 break;
950 case '1':
951 NumVectors = 0;
952 break;
953 case '2':
954 NumVectors = 2;
955 break;
956 case '3':
957 NumVectors = 3;
958 break;
959 case '4':
960 NumVectors = 4;
961 break;
962 case '*':
963 Pointer = true;
964 break;
965 case 'c':
966 Constant = true;
967 break;
968 case 'Q':
969 Bitwidth = 128;
970 break;
971 case 'q':
972 Bitwidth = 64;
973 break;
974 case 'I':
975 Kind = SInt;
976 ElementBitwidth = Bitwidth = 32;
977 NumVectors = 0;
978 Immediate = true;
979 break;
980 case 'p':
981 if (isPoly())
982 Kind = UInt;
983 break;
984 case '!':
985 // Key type, handled elsewhere.
986 break;
987 default:
988 llvm_unreachable("Unhandled character!");
989 }
990 }
991}
992
993//===----------------------------------------------------------------------===//
994// Intrinsic implementation
995//===----------------------------------------------------------------------===//
996
997StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const {
998 if (Proto.size() == Pos)
999 return StringRef();
1000 else if (Proto[Pos] != '(')
1001 return Proto.substr(Start: Pos++, N: 1);
1002
1003 size_t Start = Pos + 1;
1004 size_t End = Proto.find(C: ')', From: Start);
1005 assert_with_loc(Assertion: End != StringRef::npos, Str: "unmatched modifier group paren");
1006 Pos = End + 1;
1007 return Proto.slice(Start, End);
1008}
1009
1010std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
1011 char typeCode = '\0';
1012 bool printNumber = true;
1013
1014 if (CK == ClassB && TargetGuard == "neon")
1015 return "";
1016
1017 if (this->CK == ClassV)
1018 return "";
1019
1020 if (T.isBFloat16())
1021 return "bf16";
1022
1023 if (T.isMFloat8())
1024 return "mf8";
1025
1026 if (T.isPoly())
1027 typeCode = 'p';
1028 else if (T.isInteger())
1029 typeCode = T.isSigned() ? 's' : 'u';
1030 else
1031 typeCode = 'f';
1032
1033 if (CK == ClassI) {
1034 switch (typeCode) {
1035 default:
1036 break;
1037 case 's':
1038 case 'u':
1039 case 'p':
1040 typeCode = 'i';
1041 break;
1042 }
1043 }
1044 if (CK == ClassB && TargetGuard == "neon") {
1045 typeCode = '\0';
1046 }
1047
1048 std::string S;
1049 if (typeCode != '\0')
1050 S.push_back(c: typeCode);
1051 if (printNumber)
1052 S += utostr(X: T.getElementSizeInBits());
1053
1054 return S;
1055}
1056
1057std::string Intrinsic::getBuiltinTypeStr() {
1058 ClassKind LocalCK = getClassKind(UseClassBIfScalar: true);
1059 std::string S;
1060
1061 Type RetT = getReturnType();
1062 if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() &&
1063 !RetT.isFloating() && !RetT.isBFloat16() && !RetT.isMFloat8())
1064 RetT.makeInteger(ElemWidth: RetT.getElementSizeInBits(), Sign: false);
1065
1066 // Since the return value must be one type, return a vector type of the
1067 // appropriate width which we will bitcast. An exception is made for
1068 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
1069 // fashion, storing them to a pointer arg.
1070 if (RetT.getNumVectors() > 1) {
1071 S += "vv*"; // void result with void* first argument
1072 } else {
1073 if (RetT.isPoly())
1074 RetT.makeInteger(ElemWidth: RetT.getElementSizeInBits(), Sign: false);
1075 if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned())
1076 RetT.makeSigned();
1077
1078 if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar())
1079 // Cast to vector of 8-bit elements.
1080 RetT.makeInteger(ElemWidth: 8, Sign: true);
1081
1082 S += RetT.builtin_str();
1083 }
1084
1085 for (unsigned I = 0; I < getNumParams(); ++I) {
1086 Type T = getParamType(I);
1087 if (T.isPoly())
1088 T.makeInteger(ElemWidth: T.getElementSizeInBits(), Sign: false);
1089
1090 if (LocalCK == ClassB && !T.isScalar())
1091 T.makeInteger(ElemWidth: 8, Sign: true);
1092 // Halves always get converted to 8-bit elements.
1093 if (T.isHalf() && T.isVector() && !T.isScalarForMangling())
1094 T.makeInteger(ElemWidth: 8, Sign: true);
1095
1096 if (LocalCK == ClassI && T.isInteger())
1097 T.makeSigned();
1098
1099 if (isArgImmediate(idx: I))
1100 T.makeImmediate(ElemWidth: 32);
1101
1102 S += T.builtin_str();
1103 }
1104
1105 // Extra constant integer to hold type class enum for this function, e.g. s8
1106 if (LocalCK == ClassB)
1107 S += "i";
1108
1109 return S;
1110}
1111
1112std::string Intrinsic::getMangledName(bool ForceClassS) const {
1113 // Check if the prototype has a scalar operand with the type of the vector
1114 // elements. If not, bitcasting the args will take care of arg checking.
1115 // The actual signedness etc. will be taken care of with special enums.
1116 ClassKind LocalCK = CK;
1117 if (!protoHasScalar())
1118 LocalCK = ClassB;
1119
1120 return mangleName(Name, CK: ForceClassS ? ClassS : LocalCK);
1121}
1122
1123std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const {
1124 std::string typeCode = getInstTypeCode(T: BaseType, CK: LocalCK);
1125 std::string S = Name;
1126
1127 if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" ||
1128 Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32" ||
1129 Name == "vcvt_f32_bf16")
1130 return Name;
1131
1132 if (!typeCode.empty()) {
1133 // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN.
1134 if (Name.size() >= 3 && isdigit(Name.back()) &&
1135 Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_')
1136 S.insert(pos1: S.length() - 3, str: "_" + typeCode);
1137 else
1138 S += "_" + typeCode;
1139 }
1140
1141 if (BaseType != InBaseType) {
1142 // A reinterpret - out the input base type at the end.
1143 S += "_" + getInstTypeCode(T: InBaseType, CK: LocalCK);
1144 }
1145
1146 if (LocalCK == ClassB && TargetGuard == "neon")
1147 S += "_v";
1148
1149 // Insert a 'q' before the first '_' character so that it ends up before
1150 // _lane or _n on vector-scalar operations.
1151 if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) {
1152 size_t Pos = S.find(c: '_');
1153 S.insert(pos: Pos, s: "q");
1154 }
1155
1156 char Suffix = '\0';
1157 if (BaseType.isScalarForMangling()) {
1158 switch (BaseType.getElementSizeInBits()) {
1159 case 8: Suffix = 'b'; break;
1160 case 16: Suffix = 'h'; break;
1161 case 32: Suffix = 's'; break;
1162 case 64: Suffix = 'd'; break;
1163 default: llvm_unreachable("Bad suffix!");
1164 }
1165 }
1166 if (Suffix != '\0') {
1167 size_t Pos = S.find(c: '_');
1168 S.insert(pos: Pos, s: &Suffix, n: 1);
1169 }
1170
1171 return S;
1172}
1173
1174std::string Intrinsic::replaceParamsIn(std::string S) {
1175 while (S.find(c: '$') != std::string::npos) {
1176 size_t Pos = S.find(c: '$');
1177 size_t End = Pos + 1;
1178 while (isalpha(S[End]))
1179 ++End;
1180
1181 std::string VarName = S.substr(pos: Pos + 1, n: End - Pos - 1);
1182 assert_with_loc(Assertion: Variables.find(x: VarName) != Variables.end(),
1183 Str: "Variable not defined!");
1184 S.replace(pos: Pos, n: End - Pos, str: Variables.find(x: VarName)->second.getName());
1185 }
1186
1187 return S;
1188}
1189
1190void Intrinsic::initVariables() {
1191 Variables.clear();
1192
1193 // Modify the TypeSpec per-argument to get a concrete Type, and create
1194 // known variables for each.
1195 for (unsigned I = 1; I < Types.size(); ++I) {
1196 char NameC = '0' + (I - 1);
1197 std::string Name = "p";
1198 Name.push_back(c: NameC);
1199
1200 Variables[Name] = Variable(Types[I], Name + VariablePostfix);
1201 }
1202 RetVar = Variable(Types[0], "ret" + VariablePostfix);
1203}
1204
1205void Intrinsic::emitPrototype(StringRef NamePrefix) {
1206 if (UseMacro) {
1207 OS << "#define ";
1208 } else {
1209 OS << "__ai ";
1210 if (TargetGuard != "")
1211 OS << "__attribute__((target(\"" << TargetGuard << "\"))) ";
1212 OS << Types[0].str() << " ";
1213 }
1214
1215 OS << NamePrefix.str() << mangleName(Name, LocalCK: ClassS) << "(";
1216
1217 for (unsigned I = 0; I < getNumParams(); ++I) {
1218 if (I != 0)
1219 OS << ", ";
1220
1221 char NameC = '0' + I;
1222 std::string Name = "p";
1223 Name.push_back(c: NameC);
1224 assert(Variables.find(Name) != Variables.end());
1225 Variable &V = Variables[Name];
1226
1227 if (!UseMacro)
1228 OS << V.getType().str() << " ";
1229 OS << V.getName();
1230 }
1231
1232 OS << ")";
1233}
1234
1235void Intrinsic::emitOpeningBrace() {
1236 if (UseMacro)
1237 OS << " __extension__ ({";
1238 else
1239 OS << " {";
1240 emitNewLine();
1241}
1242
1243void Intrinsic::emitClosingBrace() {
1244 if (UseMacro)
1245 OS << "})";
1246 else
1247 OS << "}";
1248}
1249
1250void Intrinsic::emitNewLine() {
1251 if (UseMacro)
1252 OS << " \\\n";
1253 else
1254 OS << "\n";
1255}
1256
1257void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) {
1258 if (Dest.getType().getNumVectors() > 1) {
1259 emitNewLine();
1260
1261 for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) {
1262 OS << " " << Dest.getName() << ".val[" << K << "] = "
1263 << "__builtin_shufflevector(" << Src.getName() << ".val[" << K << "], "
1264 << Src.getName() << ".val[" << K << "], __lane_reverse_"
1265 << Dest.getType().getSizeInBits() << "_"
1266 << Dest.getType().getElementSizeInBits() << ");";
1267 emitNewLine();
1268 }
1269 } else {
1270 OS << " " << Dest.getName() << " = __builtin_shufflevector("
1271 << Src.getName() << ", " << Src.getName() << ", __lane_reverse_"
1272 << Dest.getType().getSizeInBits() << "_"
1273 << Dest.getType().getElementSizeInBits() << ");";
1274 emitNewLine();
1275 }
1276}
1277
1278void Intrinsic::emitArgumentReversal() {
1279 if (isBigEndianSafe())
1280 return;
1281
1282 // Reverse all vector arguments.
1283 for (unsigned I = 0; I < getNumParams(); ++I) {
1284 std::string Name = "p" + utostr(X: I);
1285 std::string NewName = "rev" + utostr(X: I);
1286
1287 Variable &V = Variables[Name];
1288 Variable NewV(V.getType(), NewName + VariablePostfix);
1289
1290 if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1)
1291 continue;
1292
1293 OS << " " << NewV.getType().str() << " " << NewV.getName() << ";";
1294 emitReverseVariable(Dest&: NewV, Src&: V);
1295 V = NewV;
1296 }
1297}
1298
1299void Intrinsic::emitReturnVarDecl() {
1300 assert(RetVar.getType() == Types[0]);
1301 // Create a return variable, if we're not void.
1302 if (!RetVar.getType().isVoid()) {
1303 OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";";
1304 emitNewLine();
1305 }
1306}
1307
1308void Intrinsic::emitReturnReversal() {
1309 if (isBigEndianSafe())
1310 return;
1311 if (!getReturnType().isVector() || getReturnType().isVoid() ||
1312 getReturnType().getNumElements() == 1)
1313 return;
1314 emitReverseVariable(Dest&: RetVar, Src&: RetVar);
1315}
1316
1317void Intrinsic::emitShadowedArgs() {
1318 // Macro arguments are not type-checked like inline function arguments,
1319 // so assign them to local temporaries to get the right type checking.
1320 if (!UseMacro)
1321 return;
1322
1323 for (unsigned I = 0; I < getNumParams(); ++I) {
1324 // Do not create a temporary for an immediate argument.
1325 // That would defeat the whole point of using a macro!
1326 if (getParamType(I).isImmediate())
1327 continue;
1328 // Do not create a temporary for pointer arguments. The input
1329 // pointer may have an alignment hint.
1330 if (getParamType(I).isPointer())
1331 continue;
1332
1333 std::string Name = "p" + utostr(X: I);
1334
1335 assert(Variables.find(Name) != Variables.end());
1336 Variable &V = Variables[Name];
1337
1338 std::string NewName = "s" + utostr(X: I);
1339 Variable V2(V.getType(), NewName + VariablePostfix);
1340
1341 OS << " " << V2.getType().str() << " " << V2.getName() << " = "
1342 << V.getName() << ";";
1343 emitNewLine();
1344
1345 V = V2;
1346 }
1347}
1348
1349bool Intrinsic::protoHasScalar() const {
1350 return any_of(Range: Types,
1351 P: [](const Type &T) { return T.isScalar() && !T.isImmediate(); });
1352}
1353
1354void Intrinsic::emitBodyAsBuiltinCall() {
1355 std::string S;
1356
1357 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
1358 // sret-like argument.
1359 bool SRet = getReturnType().getNumVectors() >= 2;
1360
1361 StringRef N = Name;
1362 ClassKind LocalCK = CK;
1363 if (!protoHasScalar())
1364 LocalCK = ClassB;
1365
1366 if (!getReturnType().isVoid() && !SRet)
1367 S += "__builtin_bit_cast(" + RetVar.getType().str() + ", ";
1368
1369 S += "__builtin_neon_" + mangleName(Name: std::string(N), LocalCK) + "(";
1370
1371 if (SRet)
1372 S += "&" + RetVar.getName() + ", ";
1373
1374 for (unsigned I = 0; I < getNumParams(); ++I) {
1375 Variable &V = Variables["p" + utostr(X: I)];
1376 Type T = V.getType();
1377
1378 // Handle multiple-vector values specially, emitting each subvector as an
1379 // argument to the builtin.
1380 if (T.getNumVectors() > 1) {
1381 // Check if an explicit cast is needed.
1382 std::string Cast;
1383 if (LocalCK == ClassB) {
1384 Type T2 = T;
1385 T2.makeOneVector();
1386 T2.makeInteger(ElemWidth: 8, /*Sign=*/true);
1387 Cast = "__builtin_bit_cast(" + T2.str() + ", ";
1388 }
1389
1390 for (unsigned J = 0; J < T.getNumVectors(); ++J)
1391 S += Cast + V.getName() + ".val[" + utostr(X: J) + "]" +
1392 (Cast.empty() ? ", " : "), ");
1393 continue;
1394 }
1395
1396 std::string Arg = V.getName();
1397 Type CastToType = T;
1398
1399 // Check if an explicit cast is needed.
1400 if (CastToType.isVector()) {
1401 if (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling())) {
1402 CastToType.makeInteger(ElemWidth: 8, Sign: true);
1403 Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
1404 } else if (LocalCK == ClassI &&
1405 (CastToType.isInteger() || CastToType.isPoly())) {
1406 CastToType.makeSigned();
1407 Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
1408 }
1409 }
1410 S += Arg + ", ";
1411 }
1412
1413 // Extra constant integer to hold type class enum for this function, e.g. s8
1414 if (getClassKind(UseClassBIfScalar: true) == ClassB) {
1415 S += utostr(X: getPolymorphicKeyType().getNeonEnum());
1416 } else {
1417 // Remove extraneous ", ".
1418 S.pop_back();
1419 S.pop_back();
1420 }
1421
1422 if (!getReturnType().isVoid() && !SRet)
1423 S += ")";
1424 S += ");";
1425
1426 std::string RetExpr;
1427 if (!SRet && !RetVar.getType().isVoid())
1428 RetExpr = RetVar.getName() + " = ";
1429
1430 OS << " " << RetExpr << S;
1431 emitNewLine();
1432}
1433
1434void Intrinsic::emitBody(StringRef CallPrefix) {
1435 std::vector<std::string> Lines;
1436
1437 if (!Body || Body->empty()) {
1438 // Nothing specific to output - must output a builtin.
1439 emitBodyAsBuiltinCall();
1440 return;
1441 }
1442
1443 // We have a list of "things to output". The last should be returned.
1444 for (auto *I : Body->getElements()) {
1445 if (const auto *SI = dyn_cast<StringInit>(Val: I)) {
1446 Lines.push_back(x: replaceParamsIn(S: SI->getAsString()));
1447 } else if (const auto *DI = dyn_cast<DagInit>(Val: I)) {
1448 DagEmitter DE(*this, CallPrefix);
1449 Lines.push_back(x: DE.emitDag(DI).second + ";");
1450 }
1451 }
1452
1453 assert(!Lines.empty() && "Empty def?");
1454 if (!RetVar.getType().isVoid())
1455 Lines.back().insert(pos1: 0, str: RetVar.getName() + " = ");
1456
1457 for (auto &L : Lines) {
1458 OS << " " << L;
1459 emitNewLine();
1460 }
1461}
1462
1463void Intrinsic::emitReturn() {
1464 if (RetVar.getType().isVoid())
1465 return;
1466 if (UseMacro)
1467 OS << " " << RetVar.getName() << ";";
1468 else
1469 OS << " return " << RetVar.getName() << ";";
1470 emitNewLine();
1471}
1472
1473std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(const DagInit *DI) {
1474 // At this point we should only be seeing a def.
1475 const DefInit *DefI = cast<DefInit>(Val: DI->getOperator());
1476 std::string Op = DefI->getAsString();
1477
1478 if (Op == "cast" || Op == "bitcast")
1479 return emitDagCast(DI, IsBitCast: Op == "bitcast");
1480 if (Op == "shuffle")
1481 return emitDagShuffle(DI);
1482 if (Op == "dup")
1483 return emitDagDup(DI);
1484 if (Op == "dup_typed")
1485 return emitDagDupTyped(DI);
1486 if (Op == "splat")
1487 return emitDagSplat(DI);
1488 if (Op == "save_temp")
1489 return emitDagSaveTemp(DI);
1490 if (Op == "op")
1491 return emitDagOp(DI);
1492 if (Op == "call" || Op == "call_mangled")
1493 return emitDagCall(DI, MatchMangledName: Op == "call_mangled");
1494 if (Op == "name_replace")
1495 return emitDagNameReplace(DI);
1496 if (Op == "literal")
1497 return emitDagLiteral(DI);
1498 assert_with_loc(Assertion: false, Str: "Unknown operation!");
1499 return std::make_pair(x: Type::getVoid(), y: "");
1500}
1501
1502std::pair<Type, std::string>
1503Intrinsic::DagEmitter::emitDagOp(const DagInit *DI) {
1504 std::string Op = cast<StringInit>(Val: DI->getArg(Num: 0))->getAsUnquotedString();
1505 if (DI->getNumArgs() == 2) {
1506 // Unary op.
1507 std::pair<Type, std::string> R =
1508 emitDagArg(Arg: DI->getArg(Num: 1), ArgName: std::string(DI->getArgNameStr(Num: 1)));
1509 return std::make_pair(x&: R.first, y: Op + R.second);
1510 } else {
1511 assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!");
1512 std::pair<Type, std::string> R1 =
1513 emitDagArg(Arg: DI->getArg(Num: 1), ArgName: std::string(DI->getArgNameStr(Num: 1)));
1514 std::pair<Type, std::string> R2 =
1515 emitDagArg(Arg: DI->getArg(Num: 2), ArgName: std::string(DI->getArgNameStr(Num: 2)));
1516 assert_with_loc(Assertion: R1.first == R2.first, Str: "Argument type mismatch!");
1517 return std::make_pair(x&: R1.first, y: R1.second + " " + Op + " " + R2.second);
1518 }
1519}
1520
1521std::pair<Type, std::string>
1522Intrinsic::DagEmitter::emitDagCall(const DagInit *DI, bool MatchMangledName) {
1523 std::vector<Type> Types;
1524 std::vector<std::string> Values;
1525 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
1526 std::pair<Type, std::string> R =
1527 emitDagArg(Arg: DI->getArg(Num: I + 1), ArgName: std::string(DI->getArgNameStr(Num: I + 1)));
1528 Types.push_back(x: R.first);
1529 Values.push_back(x: R.second);
1530 }
1531
1532 // Look up the called intrinsic.
1533 std::string N;
1534 if (const auto *SI = dyn_cast<StringInit>(Val: DI->getArg(Num: 0)))
1535 N = SI->getAsUnquotedString();
1536 else
1537 N = emitDagArg(Arg: DI->getArg(Num: 0), ArgName: "").second;
1538 std::optional<std::string> MangledName;
1539 if (MatchMangledName) {
1540 if (Intr.getRecord()->getValueAsString(FieldName: "Name").contains(Other: "laneq"))
1541 N += "q";
1542 MangledName = Intr.mangleName(Name: N, LocalCK: ClassS);
1543 }
1544 Intrinsic &Callee = Intr.Emitter.getIntrinsic(Name: N, Types, MangledName);
1545
1546 // Make sure the callee is known as an early def.
1547 Callee.setNeededEarly();
1548 Intr.Dependencies.insert(x: &Callee);
1549
1550 // Now create the call itself.
1551 std::string S;
1552 if (!Callee.isBigEndianSafe())
1553 S += CallPrefix.str();
1554 S += Callee.getMangledName(ForceClassS: true) + "(";
1555 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
1556 if (I != 0)
1557 S += ", ";
1558 S += Values[I];
1559 }
1560 S += ")";
1561
1562 return std::make_pair(x: Callee.getReturnType(), y&: S);
1563}
1564
1565std::pair<Type, std::string>
1566Intrinsic::DagEmitter::emitDagCast(const DagInit *DI, bool IsBitCast) {
1567 // (cast MOD* VAL) -> cast VAL to type given by MOD.
1568 std::pair<Type, std::string> R =
1569 emitDagArg(Arg: DI->getArg(Num: DI->getNumArgs() - 1),
1570 ArgName: std::string(DI->getArgNameStr(Num: DI->getNumArgs() - 1)));
1571 Type castToType = R.first;
1572 for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) {
1573
1574 // MOD can take several forms:
1575 // 1. $X - take the type of parameter / variable X.
1576 // 2. The value "R" - take the type of the return type.
1577 // 3. a type string
1578 // 4. The value "U" or "S" to switch the signedness.
1579 // 5. The value "H" or "D" to half or double the bitwidth.
1580 // 6. The value "8" to convert to 8-bit (signed) integer lanes.
1581 if (!DI->getArgNameStr(Num: ArgIdx).empty()) {
1582 assert_with_loc(Assertion: Intr.Variables.find(x: DI->getArgNameStr(Num: ArgIdx)) !=
1583 Intr.Variables.end(),
1584 Str: "Variable not found");
1585 castToType =
1586 Intr.Variables[std::string(DI->getArgNameStr(Num: ArgIdx))].getType();
1587 } else {
1588 const auto *SI = dyn_cast<StringInit>(Val: DI->getArg(Num: ArgIdx));
1589 assert_with_loc(Assertion: SI, Str: "Expected string type or $Name for cast type");
1590
1591 if (SI->getAsUnquotedString() == "R") {
1592 castToType = Intr.getReturnType();
1593 } else if (SI->getAsUnquotedString() == "U") {
1594 castToType.makeUnsigned();
1595 } else if (SI->getAsUnquotedString() == "S") {
1596 castToType.makeSigned();
1597 } else if (SI->getAsUnquotedString() == "H") {
1598 castToType.halveLanes();
1599 } else if (SI->getAsUnquotedString() == "D") {
1600 castToType.doubleLanes();
1601 } else if (SI->getAsUnquotedString() == "8") {
1602 castToType.makeInteger(ElemWidth: 8, Sign: true);
1603 } else if (SI->getAsUnquotedString() == "32") {
1604 castToType.make32BitElement();
1605 } else {
1606 castToType = Type::fromTypedefName(Name: SI->getAsUnquotedString());
1607 assert_with_loc(Assertion: !castToType.isVoid(), Str: "Unknown typedef");
1608 }
1609 }
1610 }
1611
1612 std::string S;
1613 if (IsBitCast)
1614 S = "__builtin_bit_cast(" + castToType.str() + ", " + R.second + ")";
1615 else
1616 S = "(" + castToType.str() + ")(" + R.second + ")";
1617
1618 return std::make_pair(x&: castToType, y&: S);
1619}
1620
1621std::pair<Type, std::string>
1622Intrinsic::DagEmitter::emitDagShuffle(const DagInit *DI) {
1623 // See the documentation in arm_neon.td for a description of these operators.
1624 class LowHalf : public SetTheory::Operator {
1625 public:
1626 void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts,
1627 ArrayRef<SMLoc> Loc) override {
1628 SetTheory::RecSet Elts2;
1629 ST.evaluate(begin: Expr->arg_begin(), end: Expr->arg_end(), Elts&: Elts2, Loc);
1630 Elts.insert(Start: Elts2.begin(), End: Elts2.begin() + (Elts2.size() / 2));
1631 }
1632 };
1633
1634 class HighHalf : public SetTheory::Operator {
1635 public:
1636 void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts,
1637 ArrayRef<SMLoc> Loc) override {
1638 SetTheory::RecSet Elts2;
1639 ST.evaluate(begin: Expr->arg_begin(), end: Expr->arg_end(), Elts&: Elts2, Loc);
1640 Elts.insert(Start: Elts2.begin() + (Elts2.size() / 2), End: Elts2.end());
1641 }
1642 };
1643
1644 class Rev : public SetTheory::Operator {
1645 unsigned ElementSize;
1646
1647 public:
1648 Rev(unsigned ElementSize) : ElementSize(ElementSize) {}
1649
1650 void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts,
1651 ArrayRef<SMLoc> Loc) override {
1652 SetTheory::RecSet Elts2;
1653 ST.evaluate(begin: Expr->arg_begin() + 1, end: Expr->arg_end(), Elts&: Elts2, Loc);
1654
1655 int64_t VectorSize = cast<IntInit>(Val: Expr->getArg(Num: 0))->getValue();
1656 VectorSize /= ElementSize;
1657
1658 std::vector<const Record *> Revved;
1659 for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) {
1660 for (int LI = VectorSize - 1; LI >= 0; --LI) {
1661 Revved.push_back(x: Elts2[VI + LI]);
1662 }
1663 }
1664
1665 Elts.insert_range(R&: Revved);
1666 }
1667 };
1668
1669 class MaskExpander : public SetTheory::Expander {
1670 unsigned N;
1671
1672 public:
1673 MaskExpander(unsigned N) : N(N) {}
1674
1675 void expand(SetTheory &ST, const Record *R,
1676 SetTheory::RecSet &Elts) override {
1677 unsigned Addend = 0;
1678 if (R->getName() == "mask0")
1679 Addend = 0;
1680 else if (R->getName() == "mask1")
1681 Addend = N;
1682 else
1683 return;
1684 for (unsigned I = 0; I < N; ++I)
1685 Elts.insert(X: R->getRecords().getDef(Name: "sv" + utostr(X: I + Addend)));
1686 }
1687 };
1688
1689 // (shuffle arg1, arg2, sequence)
1690 std::pair<Type, std::string> Arg1 =
1691 emitDagArg(Arg: DI->getArg(Num: 0), ArgName: std::string(DI->getArgNameStr(Num: 0)));
1692 std::pair<Type, std::string> Arg2 =
1693 emitDagArg(Arg: DI->getArg(Num: 1), ArgName: std::string(DI->getArgNameStr(Num: 1)));
1694 assert_with_loc(Assertion: Arg1.first == Arg2.first,
1695 Str: "Different types in arguments to shuffle!");
1696
1697 SetTheory ST;
1698 SetTheory::RecSet Elts;
1699 ST.addOperator(Name: "lowhalf", std::make_unique<LowHalf>());
1700 ST.addOperator(Name: "highhalf", std::make_unique<HighHalf>());
1701 ST.addOperator(Name: "rev",
1702 std::make_unique<Rev>(args: Arg1.first.getElementSizeInBits()));
1703 ST.addExpander(ClassName: "MaskExpand",
1704 std::make_unique<MaskExpander>(args: Arg1.first.getNumElements()));
1705 ST.evaluate(Expr: DI->getArg(Num: 2), Elts, Loc: {});
1706
1707 std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second;
1708 for (auto &E : Elts) {
1709 StringRef Name = E->getName();
1710 assert_with_loc(Assertion: Name.starts_with(Prefix: "sv"),
1711 Str: "Incorrect element kind in shuffle mask!");
1712 S += ", " + Name.drop_front(N: 2).str();
1713 }
1714 S += ")";
1715
1716 // Recalculate the return type - the shuffle may have halved or doubled it.
1717 Type T(Arg1.first);
1718 if (Elts.size() > T.getNumElements()) {
1719 assert_with_loc(
1720 Assertion: Elts.size() == T.getNumElements() * 2,
1721 Str: "Can only double or half the number of elements in a shuffle!");
1722 T.doubleLanes();
1723 } else if (Elts.size() < T.getNumElements()) {
1724 assert_with_loc(
1725 Assertion: Elts.size() == T.getNumElements() / 2,
1726 Str: "Can only double or half the number of elements in a shuffle!");
1727 T.halveLanes();
1728 }
1729
1730 return std::make_pair(x&: T, y&: S);
1731}
1732
1733std::pair<Type, std::string>
1734Intrinsic::DagEmitter::emitDagDup(const DagInit *DI) {
1735 assert_with_loc(Assertion: DI->getNumArgs() == 1, Str: "dup() expects one argument");
1736 std::pair<Type, std::string> A =
1737 emitDagArg(Arg: DI->getArg(Num: 0), ArgName: std::string(DI->getArgNameStr(Num: 0)));
1738 assert_with_loc(Assertion: A.first.isScalar(), Str: "dup() expects a scalar argument");
1739
1740 Type T = Intr.getBaseType();
1741 assert_with_loc(Assertion: T.isVector(), Str: "dup() used but default type is scalar!");
1742 std::string S = "(" + T.str() + ") {";
1743 for (unsigned I = 0; I < T.getNumElements(); ++I) {
1744 if (I != 0)
1745 S += ", ";
1746 S += A.second;
1747 }
1748 S += "}";
1749
1750 return std::make_pair(x&: T, y&: S);
1751}
1752
1753std::pair<Type, std::string>
1754Intrinsic::DagEmitter::emitDagDupTyped(const DagInit *DI) {
1755 assert_with_loc(Assertion: DI->getNumArgs() == 2, Str: "dup_typed() expects two arguments");
1756 std::pair<Type, std::string> B =
1757 emitDagArg(Arg: DI->getArg(Num: 1), ArgName: std::string(DI->getArgNameStr(Num: 1)));
1758 assert_with_loc(Assertion: B.first.isScalar(),
1759 Str: "dup_typed() requires a scalar as the second argument");
1760 Type T;
1761 // If the type argument is a constant string, construct the type directly.
1762 if (const auto *SI = dyn_cast<StringInit>(Val: DI->getArg(Num: 0))) {
1763 T = Type::fromTypedefName(Name: SI->getAsUnquotedString());
1764 assert_with_loc(Assertion: !T.isVoid(), Str: "Unknown typedef");
1765 } else
1766 T = emitDagArg(Arg: DI->getArg(Num: 0), ArgName: std::string(DI->getArgNameStr(Num: 0))).first;
1767
1768 assert_with_loc(Assertion: T.isVector(), Str: "dup_typed() used but target type is scalar!");
1769 std::string S = "(" + T.str() + ") {";
1770 for (unsigned I = 0; I < T.getNumElements(); ++I) {
1771 if (I != 0)
1772 S += ", ";
1773 S += B.second;
1774 }
1775 S += "}";
1776
1777 return std::make_pair(x&: T, y&: S);
1778}
1779
1780std::pair<Type, std::string>
1781Intrinsic::DagEmitter::emitDagSplat(const DagInit *DI) {
1782 assert_with_loc(Assertion: DI->getNumArgs() == 2, Str: "splat() expects two arguments");
1783 std::pair<Type, std::string> A =
1784 emitDagArg(Arg: DI->getArg(Num: 0), ArgName: std::string(DI->getArgNameStr(Num: 0)));
1785 std::pair<Type, std::string> B =
1786 emitDagArg(Arg: DI->getArg(Num: 1), ArgName: std::string(DI->getArgNameStr(Num: 1)));
1787
1788 assert_with_loc(Assertion: B.first.isScalar(),
1789 Str: "splat() requires a scalar int as the second argument");
1790
1791 std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second;
1792 for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) {
1793 S += ", " + B.second;
1794 }
1795 S += ")";
1796
1797 return std::make_pair(x: Intr.getBaseType(), y&: S);
1798}
1799
1800std::pair<Type, std::string>
1801Intrinsic::DagEmitter::emitDagSaveTemp(const DagInit *DI) {
1802 assert_with_loc(Assertion: DI->getNumArgs() == 2, Str: "save_temp() expects two arguments");
1803 std::pair<Type, std::string> A =
1804 emitDagArg(Arg: DI->getArg(Num: 1), ArgName: std::string(DI->getArgNameStr(Num: 1)));
1805
1806 assert_with_loc(Assertion: !A.first.isVoid(),
1807 Str: "Argument to save_temp() must have non-void type!");
1808
1809 std::string N = std::string(DI->getArgNameStr(Num: 0));
1810 assert_with_loc(Assertion: !N.empty(),
1811 Str: "save_temp() expects a name as the first argument");
1812
1813 auto [It, Inserted] =
1814 Intr.Variables.try_emplace(k: N, args&: A.first, args: N + Intr.VariablePostfix);
1815 assert_with_loc(Assertion: Inserted, Str: "Variable already defined!");
1816
1817 std::string S = A.first.str() + " " + It->second.getName() + " = " + A.second;
1818
1819 return std::make_pair(x: Type::getVoid(), y&: S);
1820}
1821
1822std::pair<Type, std::string>
1823Intrinsic::DagEmitter::emitDagNameReplace(const DagInit *DI) {
1824 std::string S = Intr.Name;
1825
1826 assert_with_loc(Assertion: DI->getNumArgs() == 2, Str: "name_replace requires 2 arguments!");
1827 std::string ToReplace = cast<StringInit>(Val: DI->getArg(Num: 0))->getAsUnquotedString();
1828 std::string ReplaceWith = cast<StringInit>(Val: DI->getArg(Num: 1))->getAsUnquotedString();
1829
1830 size_t Idx = S.find(str: ToReplace);
1831
1832 assert_with_loc(Assertion: Idx != std::string::npos, Str: "name should contain '" + ToReplace + "'!");
1833 S.replace(pos: Idx, n: ToReplace.size(), str: ReplaceWith);
1834
1835 return std::make_pair(x: Type::getVoid(), y&: S);
1836}
1837
1838std::pair<Type, std::string>
1839Intrinsic::DagEmitter::emitDagLiteral(const DagInit *DI) {
1840 std::string Ty = cast<StringInit>(Val: DI->getArg(Num: 0))->getAsUnquotedString();
1841 std::string Value = cast<StringInit>(Val: DI->getArg(Num: 1))->getAsUnquotedString();
1842 return std::make_pair(x: Type::fromTypedefName(Name: Ty), y&: Value);
1843}
1844
1845std::pair<Type, std::string>
1846Intrinsic::DagEmitter::emitDagArg(const Init *Arg, std::string ArgName) {
1847 if (!ArgName.empty()) {
1848 assert_with_loc(Assertion: !Arg->isComplete(),
1849 Str: "Arguments must either be DAGs or names, not both!");
1850 assert_with_loc(Assertion: Intr.Variables.find(x: ArgName) != Intr.Variables.end(),
1851 Str: "Variable not defined!");
1852 Variable &V = Intr.Variables[ArgName];
1853 return std::make_pair(x: V.getType(), y: V.getName());
1854 }
1855
1856 assert(Arg && "Neither ArgName nor Arg?!");
1857 const auto *DI = dyn_cast<DagInit>(Val: Arg);
1858 assert_with_loc(Assertion: DI, Str: "Arguments must either be DAGs or names!");
1859
1860 return emitDag(DI);
1861}
1862
1863std::string Intrinsic::generate() {
1864 // Avoid duplicated code for big and little endian
1865 if (isBigEndianSafe()) {
1866 generateImpl(ReverseArguments: false, NamePrefix: "", CallPrefix: "");
1867 return OS.str();
1868 }
1869 // Little endian intrinsics are simple and don't require any argument
1870 // swapping.
1871 OS << "#ifdef __LITTLE_ENDIAN__\n";
1872
1873 generateImpl(ReverseArguments: false, NamePrefix: "", CallPrefix: "");
1874
1875 OS << "#else\n";
1876
1877 // Big endian intrinsics are more complex. The user intended these intrinsics
1878 // to operate on a vector "as-if" loaded by LDR (for AArch64), VLDR (for
1879 // 64-bit vectors on AArch32), or VLDM (for 128-bit vectors on AArch32) but
1880 // we load as-if LD1 (for AArch64) or VLD1 (for AArch32). So we should swap
1881 // all arguments and swap the return value too.
1882 //
1883 // If we call sub-intrinsics, we should call a version that does
1884 // not re-swap the arguments!
1885 generateImpl(ReverseArguments: true, NamePrefix: "", CallPrefix: "__noswap_");
1886
1887 // If we're needed early, create a non-swapping variant for
1888 // big-endian.
1889 if (NeededEarly) {
1890 generateImpl(ReverseArguments: false, NamePrefix: "__noswap_", CallPrefix: "__noswap_");
1891 }
1892 OS << "#endif\n\n";
1893
1894 return OS.str();
1895}
1896
1897void Intrinsic::generateImpl(bool ReverseArguments,
1898 StringRef NamePrefix, StringRef CallPrefix) {
1899 CurrentRecord = R;
1900
1901 // If we call a macro, our local variables may be corrupted due to
1902 // lack of proper lexical scoping. So, add a globally unique postfix
1903 // to every variable.
1904 //
1905 // indexBody() should have set up the Dependencies set by now.
1906 for (auto *I : Dependencies)
1907 if (I->UseMacro) {
1908 VariablePostfix = "_" + utostr(X: Emitter.getUniqueNumber());
1909 break;
1910 }
1911
1912 initVariables();
1913
1914 emitPrototype(NamePrefix);
1915
1916 if (IsUnavailable) {
1917 OS << " __attribute__((unavailable));";
1918 } else {
1919 emitOpeningBrace();
1920 // Emit return variable declaration first as to not trigger
1921 // -Wdeclaration-after-statement.
1922 emitReturnVarDecl();
1923 emitShadowedArgs();
1924 if (ReverseArguments)
1925 emitArgumentReversal();
1926 emitBody(CallPrefix);
1927 if (ReverseArguments)
1928 emitReturnReversal();
1929 emitReturn();
1930 emitClosingBrace();
1931 }
1932 OS << "\n";
1933
1934 CurrentRecord = nullptr;
1935}
1936
1937void Intrinsic::indexBody() {
1938 CurrentRecord = R;
1939
1940 initVariables();
1941 // Emit return variable declaration first as to not trigger
1942 // -Wdeclaration-after-statement.
1943 emitReturnVarDecl();
1944 emitBody(CallPrefix: "");
1945 OS.str(s: "");
1946
1947 CurrentRecord = nullptr;
1948}
1949
1950//===----------------------------------------------------------------------===//
1951// NeonEmitter implementation
1952//===----------------------------------------------------------------------===//
1953
1954Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types,
1955 std::optional<std::string> MangledName) {
1956 // First, look up the name in the intrinsic map.
1957 assert_with_loc(Assertion: IntrinsicMap.find(x: Name) != IntrinsicMap.end(),
1958 Str: ("Intrinsic '" + Name + "' not found!").str());
1959 auto &V = IntrinsicMap.find(x: Name)->second;
1960 std::vector<Intrinsic *> GoodVec;
1961
1962 // Create a string to print if we end up failing.
1963 std::string ErrMsg = "looking up intrinsic '" + Name.str() + "(";
1964 for (unsigned I = 0; I < Types.size(); ++I) {
1965 if (I != 0)
1966 ErrMsg += ", ";
1967 ErrMsg += Types[I].str();
1968 }
1969 ErrMsg += ")'\n";
1970 ErrMsg += "Available overloads:\n";
1971
1972 // Now, look through each intrinsic implementation and see if the types are
1973 // compatible.
1974 for (auto &I : V) {
1975 ErrMsg += " - " + I.getReturnType().str() + " " + I.getMangledName();
1976 ErrMsg += "(";
1977 for (unsigned A = 0; A < I.getNumParams(); ++A) {
1978 if (A != 0)
1979 ErrMsg += ", ";
1980 ErrMsg += I.getParamType(I: A).str();
1981 }
1982 ErrMsg += ")\n";
1983
1984 if (MangledName && MangledName != I.getMangledName(ForceClassS: true))
1985 continue;
1986
1987 if (I.getNumParams() != Types.size())
1988 continue;
1989
1990 unsigned ArgNum = 0;
1991 bool MatchingArgumentTypes = all_of(Range&: Types, P: [&](const auto &Type) {
1992 return Type == I.getParamType(I: ArgNum++);
1993 });
1994
1995 if (MatchingArgumentTypes)
1996 GoodVec.push_back(x: &I);
1997 }
1998
1999 assert_with_loc(Assertion: !GoodVec.empty(),
2000 Str: "No compatible intrinsic found - " + ErrMsg);
2001 assert_with_loc(Assertion: GoodVec.size() == 1, Str: "Multiple overloads found - " + ErrMsg);
2002
2003 return *GoodVec.front();
2004}
2005
2006void NeonEmitter::createIntrinsic(const Record *R,
2007 SmallVectorImpl<Intrinsic *> &Out) {
2008 std::string Name = std::string(R->getValueAsString(FieldName: "Name"));
2009 std::string Proto = std::string(R->getValueAsString(FieldName: "Prototype"));
2010 std::string Types = std::string(R->getValueAsString(FieldName: "Types"));
2011 const Record *OperationRec = R->getValueAsDef(FieldName: "Operation");
2012 bool BigEndianSafe = R->getValueAsBit(FieldName: "BigEndianSafe");
2013 std::string ArchGuard = std::string(R->getValueAsString(FieldName: "ArchGuard"));
2014 std::string TargetGuard = std::string(R->getValueAsString(FieldName: "TargetGuard"));
2015 bool IsUnavailable = OperationRec->getValueAsBit(FieldName: "Unavailable");
2016 std::string CartesianProductWith = std::string(R->getValueAsString(FieldName: "CartesianProductWith"));
2017
2018 // Set the global current record. This allows assert_with_loc to produce
2019 // decent location information even when highly nested.
2020 CurrentRecord = R;
2021
2022 const ListInit *Body = OperationRec->getValueAsListInit(FieldName: "Ops");
2023
2024 std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Str: Types);
2025
2026 ClassKind CK = ClassNone;
2027 if (!R->getDirectSuperClasses().empty())
2028 CK = ClassMap[R->getDirectSuperClasses()[0].first];
2029
2030 std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs;
2031 if (!CartesianProductWith.empty()) {
2032 std::vector<TypeSpec> ProductTypeSpecs = TypeSpec::fromTypeSpecs(Str: CartesianProductWith);
2033 for (auto TS : TypeSpecs) {
2034 Type DefaultT(TS, ".");
2035 for (auto SrcTS : ProductTypeSpecs) {
2036 Type DefaultSrcT(SrcTS, ".");
2037 if (TS == SrcTS ||
2038 DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits())
2039 continue;
2040 NewTypeSpecs.push_back(x: std::make_pair(x&: TS, y&: SrcTS));
2041 }
2042 }
2043 } else {
2044 for (auto TS : TypeSpecs) {
2045 NewTypeSpecs.push_back(x: std::make_pair(x&: TS, y&: TS));
2046 }
2047 }
2048
2049 sort(C&: NewTypeSpecs);
2050 NewTypeSpecs.erase(first: llvm::unique(R&: NewTypeSpecs), last: NewTypeSpecs.end());
2051 auto &Entry = IntrinsicMap[Name];
2052
2053 for (auto &I : NewTypeSpecs) {
2054
2055 // MFloat8 type is only available on AArch64. If encountered set ArchGuard
2056 // correctly.
2057 std::string NewArchGuard = ArchGuard;
2058 if (Type(I.first, ".").isMFloat8()) {
2059 if (NewArchGuard.empty()) {
2060 NewArchGuard = "defined(__aarch64__)";
2061 } else if (NewArchGuard.find(s: "defined(__aarch64__)") ==
2062 std::string::npos) {
2063 NewArchGuard = "defined(__aarch64__) && (" + NewArchGuard + ")";
2064 }
2065 }
2066 Entry.emplace_back(args&: R, args&: Name, args&: Proto, args&: I.first, args&: I.second, args&: CK, args&: Body, args&: *this,
2067 args&: NewArchGuard, args&: TargetGuard, args&: IsUnavailable, args&: BigEndianSafe);
2068 Out.push_back(Elt: &Entry.back());
2069 }
2070
2071 CurrentRecord = nullptr;
2072}
2073
2074/// genBuiltinsDef: Generate the builtin infos, checking for unique builtin
2075/// declarations.
2076void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
2077 SmallVectorImpl<Intrinsic *> &Defs) {
2078 // We only want to emit a builtin once, and in order of its name.
2079 std::map<std::string, Intrinsic *> Builtins;
2080
2081 llvm::StringToOffsetTable Table;
2082 Table.GetOrAddStringOffset(Str: "");
2083 Table.GetOrAddStringOffset(Str: "n");
2084
2085 for (auto *Def : Defs) {
2086 if (Def->hasBody())
2087 continue;
2088
2089 if (Builtins.insert(x: {Def->getMangledName(), Def}).second) {
2090 Table.GetOrAddStringOffset(Str: Def->getMangledName());
2091 Table.GetOrAddStringOffset(Str: Def->getBuiltinTypeStr());
2092 Table.GetOrAddStringOffset(Str: Def->getTargetGuard());
2093 }
2094 }
2095
2096 OS << "#ifdef GET_NEON_BUILTIN_ENUMERATORS\n";
2097 for (const auto &[Name, Def] : Builtins) {
2098 OS << " BI__builtin_neon_" << Name << ",\n";
2099 }
2100 OS << "#endif // GET_NEON_BUILTIN_ENUMERATORS\n\n";
2101
2102 OS << "#ifdef GET_NEON_BUILTIN_STR_TABLE\n";
2103 Table.EmitStringTableDef(OS, Name: "BuiltinStrings");
2104 OS << "#endif // GET_NEON_BUILTIN_STR_TABLE\n\n";
2105
2106 OS << "#ifdef GET_NEON_BUILTIN_INFOS\n";
2107 for (const auto &[Name, Def] : Builtins) {
2108 OS << " Builtin::Info{Builtin::Info::StrOffsets{"
2109 << Table.GetStringOffset(Str: Def->getMangledName()) << " /* "
2110 << Def->getMangledName() << " */, ";
2111 OS << Table.GetStringOffset(Str: Def->getBuiltinTypeStr()) << " /* "
2112 << Def->getBuiltinTypeStr() << " */, ";
2113 OS << Table.GetStringOffset(Str: "n") << " /* n */, ";
2114 OS << Table.GetStringOffset(Str: Def->getTargetGuard()) << " /* "
2115 << Def->getTargetGuard() << " */}, ";
2116 OS << "HeaderDesc::NO_HEADER, ALL_LANGUAGES},\n";
2117 }
2118 OS << "#endif // GET_NEON_BUILTIN_INFOS\n\n";
2119}
2120
2121void NeonEmitter::genStreamingSVECompatibleList(
2122 raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
2123 OS << "#ifdef GET_NEON_STREAMING_COMPAT_FLAG\n";
2124
2125 std::set<std::string> Emitted;
2126 for (auto *Def : Defs) {
2127 // If the def has a body (that is, it has Operation DAGs), it won't call
2128 // __builtin_neon_* so we don't need to generate a definition for it.
2129 if (Def->hasBody())
2130 continue;
2131
2132 std::string Name = Def->getMangledName();
2133 if (Emitted.find(x: Name) != Emitted.end())
2134 continue;
2135
2136 // FIXME: We should make exceptions here for some NEON builtins that are
2137 // permitted in streaming mode.
2138 OS << "case NEON::BI__builtin_neon_" << Name
2139 << ": BuiltinType = ArmNonStreaming; break;\n";
2140 Emitted.insert(x: Name);
2141 }
2142 OS << "#endif\n\n";
2143}
2144
2145/// Generate the ARM and AArch64 overloaded type checking code for
2146/// SemaChecking.cpp, checking for unique builtin declarations.
2147void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2148 SmallVectorImpl<Intrinsic *> &Defs) {
2149 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
2150
2151 // We record each overload check line before emitting because subsequent Inst
2152 // definitions may extend the number of permitted types (i.e. augment the
2153 // Mask). Use std::map to avoid sorting the table by hash number.
2154 struct OverloadInfo {
2155 uint64_t Mask = 0ULL;
2156 int PtrArgNum = 0;
2157 bool HasConstPtr = false;
2158 OverloadInfo() = default;
2159 };
2160 std::map<std::string, OverloadInfo> OverloadMap;
2161
2162 for (auto *Def : Defs) {
2163 // If the def has a body (that is, it has Operation DAGs), it won't call
2164 // __builtin_neon_* so we don't need to generate a definition for it.
2165 if (Def->hasBody())
2166 continue;
2167 // Functions which have a scalar argument cannot be overloaded, no need to
2168 // check them if we are emitting the type checking code.
2169 if (Def->protoHasScalar())
2170 continue;
2171
2172 uint64_t Mask = 0ULL;
2173 Mask |= 1ULL << Def->getPolymorphicKeyType().getNeonEnum();
2174
2175 // Check if the function has a pointer or const pointer argument.
2176 int PtrArgNum = -1;
2177 bool HasConstPtr = false;
2178 for (unsigned I = 0; I < Def->getNumParams(); ++I) {
2179 const auto &Type = Def->getParamType(I);
2180 if (Type.isPointer()) {
2181 PtrArgNum = I;
2182 HasConstPtr = Type.isConstPointer();
2183 }
2184 }
2185
2186 // For sret builtins, adjust the pointer argument index.
2187 if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1)
2188 PtrArgNum += 1;
2189
2190 std::string Name = Def->getName();
2191 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
2192 // vst1_lane, vldap1_lane, and vstl1_lane intrinsics. Using a pointer to
2193 // the vector element type with one of those operations causes codegen to
2194 // select an aligned load/store instruction. If you want an unaligned
2195 // operation, the pointer argument needs to have less alignment than element
2196 // type, so just accept any pointer type.
2197 if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane" ||
2198 Name == "vldap1_lane" || Name == "vstl1_lane") {
2199 PtrArgNum = -1;
2200 HasConstPtr = false;
2201 }
2202
2203 if (Mask) {
2204 OverloadInfo &OI = OverloadMap[Def->getMangledName()];
2205 OI.Mask |= Mask;
2206 OI.PtrArgNum |= PtrArgNum;
2207 OI.HasConstPtr = HasConstPtr;
2208 }
2209 }
2210
2211 for (auto &I : OverloadMap) {
2212 OverloadInfo &OI = I.second;
2213
2214 OS << "case NEON::BI__builtin_neon_" << I.first << ": ";
2215 OS << "mask = 0x" << Twine::utohexstr(Val: OI.Mask) << "ULL";
2216 if (OI.PtrArgNum >= 0)
2217 OS << "; PtrArgNum = " << OI.PtrArgNum;
2218 if (OI.HasConstPtr)
2219 OS << "; HasConstPtr = true";
2220 OS << "; break;\n";
2221 }
2222 OS << "#endif\n\n";
2223}
2224
2225inline bool
2226NeonEmitter::areRangeChecksCompatible(const ArrayRef<ImmCheck> ChecksA,
2227 const ArrayRef<ImmCheck> ChecksB) {
2228 // If multiple intrinsics map to the same builtin, we must ensure that the
2229 // intended range checks performed in SemaArm.cpp do not contradict each
2230 // other, as these are emitted once per-buitlin.
2231 //
2232 // The arguments to be checked and type of each check to be performed must be
2233 // the same. The element types may differ as they will be resolved
2234 // per-intrinsic as overloaded types by SemaArm.cpp, though the vector sizes
2235 // are not and so must be the same.
2236 bool compat = llvm::equal(LRange: ChecksA, RRange: ChecksB, P: [](const auto &A, const auto &B) {
2237 return A.getImmArgIdx() == B.getImmArgIdx() && A.getKind() == B.getKind() &&
2238 A.getVecSizeInBits() == B.getVecSizeInBits();
2239 });
2240
2241 return compat;
2242}
2243
2244void NeonEmitter::genIntrinsicRangeCheckCode(
2245 raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
2246 std::unordered_map<std::string, ArrayRef<ImmCheck>> Emitted;
2247
2248 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2249 for (auto &Def : Defs) {
2250 // If the Def has a body (operation DAGs), it is not a __builtin_neon_
2251 if (Def->hasBody() || !Def->hasImmediate())
2252 continue;
2253
2254 // Sorted by immediate argument index
2255 ArrayRef<ImmCheck> Checks = Def->getImmChecks();
2256
2257 auto [It, Inserted] = Emitted.try_emplace(k: Def->getMangledName(), args&: Checks);
2258 if (!Inserted) {
2259 assert(areRangeChecksCompatible(Checks, It->second) &&
2260 "Neon intrinsics with incompatible immediate range checks cannot "
2261 "share a builtin.");
2262 continue; // Ensure this is emitted only once
2263 }
2264
2265 // Emit builtin's range checks
2266 OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ":\n";
2267 for (const auto &Check : Checks) {
2268 OS << " ImmChecks.emplace_back(" << Check.getImmArgIdx() << ", "
2269 << Check.getKind() << ", " << Check.getElementSizeInBits() << ", "
2270 << Check.getVecSizeInBits() << ");\n"
2271 << " break;\n";
2272 }
2273 }
2274
2275 OS << "#endif\n\n";
2276}
2277
2278/// runHeader - Emit a file with sections defining:
2279/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
2280/// 2. the SemaChecking code for the type overload checking.
2281/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
2282void NeonEmitter::runHeader(raw_ostream &OS) {
2283 SmallVector<Intrinsic *, 128> Defs;
2284 for (const Record *R : Records.getAllDerivedDefinitions(ClassName: "Inst"))
2285 createIntrinsic(R, Out&: Defs);
2286
2287 // Generate shared BuiltinsXXX.def
2288 genBuiltinsDef(OS, Defs);
2289
2290 // Generate ARM overloaded type checking code for SemaChecking.cpp
2291 genOverloadTypeCheckCode(OS, Defs);
2292
2293 genStreamingSVECompatibleList(OS, Defs);
2294
2295 // Generate ARM range checking code for shift/lane immediates.
2296 genIntrinsicRangeCheckCode(OS, Defs);
2297}
2298
2299static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) {
2300 std::string TypedefTypes(types);
2301 std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(Str: TypedefTypes);
2302
2303 // Emit vector typedefs.
2304 bool InIfdef = false;
2305 for (auto &TS : TDTypeVec) {
2306 bool IsA64 = false;
2307 Type T(TS, ".");
2308 if (T.isDouble() || T.isMFloat8())
2309 IsA64 = true;
2310
2311 if (InIfdef && !IsA64) {
2312 OS << "#endif\n";
2313 InIfdef = false;
2314 }
2315 if (!InIfdef && IsA64) {
2316 OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
2317 InIfdef = true;
2318 }
2319
2320 if (T.isPoly())
2321 OS << "typedef __attribute__((neon_polyvector_type(";
2322 else
2323 OS << "typedef __attribute__((neon_vector_type(";
2324
2325 Type T2 = T;
2326 T2.makeScalar();
2327 OS << T.getNumElements();
2328 OS << "))) " << T2.str();
2329 OS << " " << T.str() << ";\n";
2330 }
2331 if (InIfdef)
2332 OS << "#endif\n";
2333 OS << "\n";
2334
2335 // Emit struct typedefs.
2336 InIfdef = false;
2337 for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) {
2338 for (auto &TS : TDTypeVec) {
2339 bool IsA64 = false;
2340 Type T(TS, ".");
2341 if (T.isDouble() || T.isMFloat8())
2342 IsA64 = true;
2343
2344 if (InIfdef && !IsA64) {
2345 OS << "#endif\n";
2346 InIfdef = false;
2347 }
2348 if (!InIfdef && IsA64) {
2349 OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
2350 InIfdef = true;
2351 }
2352
2353 const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0};
2354 Type VT(TS, Mods);
2355 OS << "typedef struct " << VT.str() << " {\n";
2356 OS << " " << T.str() << " val";
2357 OS << "[" << NumMembers << "]";
2358 OS << ";\n} ";
2359 OS << VT.str() << ";\n";
2360 OS << "\n";
2361 }
2362 }
2363 if (InIfdef)
2364 OS << "#endif\n";
2365}
2366
2367/// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h
2368/// is comprised of type definitions and function declarations.
2369void NeonEmitter::run(raw_ostream &OS) {
2370 OS << "/*===---- arm_neon.h - ARM Neon intrinsics "
2371 "------------------------------"
2372 "---===\n"
2373 " *\n"
2374 " * Permission is hereby granted, free of charge, to any person "
2375 "obtaining "
2376 "a copy\n"
2377 " * of this software and associated documentation files (the "
2378 "\"Software\"),"
2379 " to deal\n"
2380 " * in the Software without restriction, including without limitation "
2381 "the "
2382 "rights\n"
2383 " * to use, copy, modify, merge, publish, distribute, sublicense, "
2384 "and/or sell\n"
2385 " * copies of the Software, and to permit persons to whom the Software "
2386 "is\n"
2387 " * furnished to do so, subject to the following conditions:\n"
2388 " *\n"
2389 " * The above copyright notice and this permission notice shall be "
2390 "included in\n"
2391 " * all copies or substantial portions of the Software.\n"
2392 " *\n"
2393 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2394 "EXPRESS OR\n"
2395 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2396 "MERCHANTABILITY,\n"
2397 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2398 "SHALL THE\n"
2399 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2400 "OTHER\n"
2401 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2402 "ARISING FROM,\n"
2403 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2404 "DEALINGS IN\n"
2405 " * THE SOFTWARE.\n"
2406 " *\n"
2407 " *===-----------------------------------------------------------------"
2408 "---"
2409 "---===\n"
2410 " */\n\n";
2411
2412 OS << "#ifndef __ARM_NEON_H\n";
2413 OS << "#define __ARM_NEON_H\n\n";
2414
2415 OS << "#if !defined(__arm__) && !defined(__aarch64__) && "
2416 "!defined(__arm64ec__)\n";
2417 OS << "#error \"<arm_neon.h> is intended only for ARM and AArch64 "
2418 "targets\"\n";
2419 OS << "#elif !defined(__ARM_FP)\n";
2420 OS << "#error \"NEON intrinsics not available with the soft-float ABI. "
2421 "Please use -mfloat-abi=softfp or -mfloat-abi=hard\"\n";
2422 OS << "#else\n\n";
2423
2424 OS << "#include <stdint.h>\n\n";
2425
2426 OS << "#include <arm_bf16.h>\n";
2427
2428 OS << "#include <arm_vector_types.h>\n";
2429
2430 // For now, signedness of polynomial types depends on target
2431 OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
2432 OS << "typedef uint8_t poly8_t;\n";
2433 OS << "typedef uint16_t poly16_t;\n";
2434 OS << "typedef uint64_t poly64_t;\n";
2435 OS << "typedef __uint128_t poly128_t;\n";
2436 OS << "#else\n";
2437 OS << "typedef int8_t poly8_t;\n";
2438 OS << "typedef int16_t poly16_t;\n";
2439 OS << "typedef int64_t poly64_t;\n";
2440 OS << "#endif\n";
2441 emitNeonTypeDefs(types: "PcQPcPsQPsPlQPl", OS);
2442
2443 OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
2444 "__nodebug__))\n\n";
2445
2446 // Shufflevector arguments lists for endian-swapping vectors for big-endian
2447 // targets. For AArch64, we need to reverse every lane in the vector, but for
2448 // AArch32 we need to reverse the lanes within each 64-bit chunk of the
2449 // vector. The naming convention here is __lane_reverse_<n>_<m>, where <n> is
2450 // the length of the vector in bits, and <m> is length of each lane in bits.
2451 OS << "#if !defined(__LITTLE_ENDIAN__)\n";
2452 OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
2453 OS << "#define __lane_reverse_64_32 1,0\n";
2454 OS << "#define __lane_reverse_64_16 3,2,1,0\n";
2455 OS << "#define __lane_reverse_64_8 7,6,5,4,3,2,1,0\n";
2456 OS << "#define __lane_reverse_128_64 1,0\n";
2457 OS << "#define __lane_reverse_128_32 3,2,1,0\n";
2458 OS << "#define __lane_reverse_128_16 7,6,5,4,3,2,1,0\n";
2459 OS << "#define __lane_reverse_128_8 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0\n";
2460 OS << "#else\n";
2461 OS << "#define __lane_reverse_64_32 1,0\n";
2462 OS << "#define __lane_reverse_64_16 3,2,1,0\n";
2463 OS << "#define __lane_reverse_64_8 7,6,5,4,3,2,1,0\n";
2464 OS << "#define __lane_reverse_128_64 0,1\n";
2465 OS << "#define __lane_reverse_128_32 1,0,3,2\n";
2466 OS << "#define __lane_reverse_128_16 3,2,1,0,7,6,5,4\n";
2467 OS << "#define __lane_reverse_128_8 7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8\n";
2468 OS << "#endif\n";
2469 OS << "#endif\n";
2470
2471 SmallVector<Intrinsic *, 128> Defs;
2472 for (const Record *R : Records.getAllDerivedDefinitions(ClassName: "Inst"))
2473 createIntrinsic(R, Out&: Defs);
2474
2475 for (auto *I : Defs)
2476 I->indexBody();
2477
2478 stable_sort(Range&: Defs, C: deref<std::less<>>());
2479
2480 // Only emit a def when its requirements have been met.
2481 // FIXME: This loop could be made faster, but it's fast enough for now.
2482 bool MadeProgress = true;
2483 std::string InGuard;
2484 while (!Defs.empty() && MadeProgress) {
2485 MadeProgress = false;
2486
2487 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
2488 I != Defs.end(); /*No step*/) {
2489 bool DependenciesSatisfied = true;
2490 for (auto *II : (*I)->getDependencies()) {
2491 if (is_contained(Range&: Defs, Element: II))
2492 DependenciesSatisfied = false;
2493 }
2494 if (!DependenciesSatisfied) {
2495 // Try the next one.
2496 ++I;
2497 continue;
2498 }
2499
2500 // Emit #endif/#if pair if needed.
2501 if ((*I)->getArchGuard() != InGuard) {
2502 if (!InGuard.empty())
2503 OS << "#endif\n";
2504 InGuard = (*I)->getArchGuard();
2505 if (!InGuard.empty())
2506 OS << "#if " << InGuard << "\n";
2507 }
2508
2509 // Actually generate the intrinsic code.
2510 OS << (*I)->generate();
2511
2512 MadeProgress = true;
2513 I = Defs.erase(CI: I);
2514 }
2515 }
2516 assert(Defs.empty() && "Some requirements were not satisfied!");
2517 if (!InGuard.empty())
2518 OS << "#endif\n";
2519
2520 OS << "\n";
2521 OS << "#undef __ai\n\n";
2522 OS << "#endif /* if !defined(__ARM_NEON) */\n";
2523 OS << "#endif /* ifndef __ARM_FP */\n";
2524}
2525
2526/// run - Read the records in arm_fp16.td and output arm_fp16.h. arm_fp16.h
2527/// is comprised of type definitions and function declarations.
2528void NeonEmitter::runFP16(raw_ostream &OS) {
2529 OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics "
2530 "------------------------------"
2531 "---===\n"
2532 " *\n"
2533 " * Permission is hereby granted, free of charge, to any person "
2534 "obtaining a copy\n"
2535 " * of this software and associated documentation files (the "
2536 "\"Software\"), to deal\n"
2537 " * in the Software without restriction, including without limitation "
2538 "the rights\n"
2539 " * to use, copy, modify, merge, publish, distribute, sublicense, "
2540 "and/or sell\n"
2541 " * copies of the Software, and to permit persons to whom the Software "
2542 "is\n"
2543 " * furnished to do so, subject to the following conditions:\n"
2544 " *\n"
2545 " * The above copyright notice and this permission notice shall be "
2546 "included in\n"
2547 " * all copies or substantial portions of the Software.\n"
2548 " *\n"
2549 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2550 "EXPRESS OR\n"
2551 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2552 "MERCHANTABILITY,\n"
2553 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2554 "SHALL THE\n"
2555 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2556 "OTHER\n"
2557 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2558 "ARISING FROM,\n"
2559 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2560 "DEALINGS IN\n"
2561 " * THE SOFTWARE.\n"
2562 " *\n"
2563 " *===-----------------------------------------------------------------"
2564 "---"
2565 "---===\n"
2566 " */\n\n";
2567
2568 OS << "#ifndef __ARM_FP16_H\n";
2569 OS << "#define __ARM_FP16_H\n\n";
2570
2571 OS << "#include <stdint.h>\n\n";
2572
2573 OS << "typedef __fp16 float16_t;\n";
2574
2575 OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
2576 "__nodebug__))\n\n";
2577
2578 SmallVector<Intrinsic *, 128> Defs;
2579 for (const Record *R : Records.getAllDerivedDefinitions(ClassName: "Inst"))
2580 createIntrinsic(R, Out&: Defs);
2581
2582 for (auto *I : Defs)
2583 I->indexBody();
2584
2585 stable_sort(Range&: Defs, C: deref<std::less<>>());
2586
2587 // Only emit a def when its requirements have been met.
2588 // FIXME: This loop could be made faster, but it's fast enough for now.
2589 bool MadeProgress = true;
2590 std::string InGuard;
2591 while (!Defs.empty() && MadeProgress) {
2592 MadeProgress = false;
2593
2594 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
2595 I != Defs.end(); /*No step*/) {
2596 bool DependenciesSatisfied = true;
2597 for (auto *II : (*I)->getDependencies()) {
2598 if (is_contained(Range&: Defs, Element: II))
2599 DependenciesSatisfied = false;
2600 }
2601 if (!DependenciesSatisfied) {
2602 // Try the next one.
2603 ++I;
2604 continue;
2605 }
2606
2607 // Emit #endif/#if pair if needed.
2608 if ((*I)->getArchGuard() != InGuard) {
2609 if (!InGuard.empty())
2610 OS << "#endif\n";
2611 InGuard = (*I)->getArchGuard();
2612 if (!InGuard.empty())
2613 OS << "#if " << InGuard << "\n";
2614 }
2615
2616 // Actually generate the intrinsic code.
2617 OS << (*I)->generate();
2618
2619 MadeProgress = true;
2620 I = Defs.erase(CI: I);
2621 }
2622 }
2623 assert(Defs.empty() && "Some requirements were not satisfied!");
2624 if (!InGuard.empty())
2625 OS << "#endif\n";
2626
2627 OS << "\n";
2628 OS << "#undef __ai\n\n";
2629 OS << "#endif /* __ARM_FP16_H */\n";
2630}
2631
2632void NeonEmitter::runVectorTypes(raw_ostream &OS) {
2633 OS << "/*===---- arm_vector_types - ARM vector type "
2634 "------===\n"
2635 " *\n"
2636 " *\n"
2637 " * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
2638 "Exceptions.\n"
2639 " * See https://llvm.org/LICENSE.txt for license information.\n"
2640 " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
2641 " *\n"
2642 " *===-----------------------------------------------------------------"
2643 "------===\n"
2644 " */\n\n";
2645 OS << "#if !defined(__ARM_NEON_H) && !defined(__ARM_SVE_H)\n";
2646 OS << "#error \"This file should not be used standalone. Please include"
2647 " arm_neon.h or arm_sve.h instead\"\n\n";
2648 OS << "#endif\n";
2649 OS << "#ifndef __ARM_NEON_TYPES_H\n";
2650 OS << "#define __ARM_NEON_TYPES_H\n";
2651 OS << "typedef float float32_t;\n";
2652 OS << "typedef __fp16 float16_t;\n";
2653
2654 OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
2655 OS << "typedef __mfp8 mfloat8_t;\n";
2656 OS << "typedef double float64_t;\n";
2657 OS << "#endif\n\n";
2658
2659 OS << R"(
2660typedef uint64_t fpm_t;
2661
2662enum __ARM_FPM_FORMAT { __ARM_FPM_E5M2, __ARM_FPM_E4M3 };
2663
2664enum __ARM_FPM_OVERFLOW { __ARM_FPM_INFNAN, __ARM_FPM_SATURATE };
2665
2666static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2667__arm_fpm_init(void) {
2668 return 0;
2669}
2670
2671static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2672__arm_set_fpm_src1_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
2673 return (__fpm & ~7ull) | (fpm_t)__format;
2674}
2675
2676static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2677__arm_set_fpm_src2_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
2678 return (__fpm & ~0x38ull) | ((fpm_t)__format << 3u);
2679}
2680
2681static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2682__arm_set_fpm_dst_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
2683 return (__fpm & ~0x1c0ull) | ((fpm_t)__format << 6u);
2684}
2685
2686static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2687__arm_set_fpm_overflow_mul(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) {
2688 return (__fpm & ~0x4000ull) | ((fpm_t)__behaviour << 14u);
2689}
2690
2691static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2692__arm_set_fpm_overflow_cvt(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) {
2693 return (__fpm & ~0x8000ull) | ((fpm_t)__behaviour << 15u);
2694}
2695
2696static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2697__arm_set_fpm_lscale(fpm_t __fpm, uint64_t __scale) {
2698 return (__fpm & ~0x7f0000ull) | (__scale << 16u);
2699}
2700
2701static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2702__arm_set_fpm_nscale(fpm_t __fpm, int64_t __scale) {
2703 return (__fpm & ~0xff000000ull) | (((fpm_t)__scale & 0xffu) << 24u);
2704}
2705
2706static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2707__arm_set_fpm_lscale2(fpm_t __fpm, uint64_t __scale) {
2708 return (uint32_t)__fpm | (__scale << 32u);
2709}
2710
2711)";
2712
2713 emitNeonTypeDefs(types: "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlmQmhQhfQfdQd", OS);
2714
2715 emitNeonTypeDefs(types: "bQb", OS);
2716 OS << "#endif // __ARM_NEON_TYPES_H\n";
2717}
2718
2719void NeonEmitter::runBF16(raw_ostream &OS) {
2720 OS << "/*===---- arm_bf16.h - ARM BF16 intrinsics "
2721 "-----------------------------------===\n"
2722 " *\n"
2723 " *\n"
2724 " * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
2725 "Exceptions.\n"
2726 " * See https://llvm.org/LICENSE.txt for license information.\n"
2727 " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
2728 " *\n"
2729 " *===-----------------------------------------------------------------"
2730 "------===\n"
2731 " */\n\n";
2732
2733 OS << "#ifndef __ARM_BF16_H\n";
2734 OS << "#define __ARM_BF16_H\n\n";
2735
2736 OS << "typedef __bf16 bfloat16_t;\n";
2737
2738 OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
2739 "__nodebug__))\n\n";
2740
2741 SmallVector<Intrinsic *, 128> Defs;
2742 for (const Record *R : Records.getAllDerivedDefinitions(ClassName: "Inst"))
2743 createIntrinsic(R, Out&: Defs);
2744
2745 for (auto *I : Defs)
2746 I->indexBody();
2747
2748 stable_sort(Range&: Defs, C: deref<std::less<>>());
2749
2750 // Only emit a def when its requirements have been met.
2751 // FIXME: This loop could be made faster, but it's fast enough for now.
2752 bool MadeProgress = true;
2753 std::string InGuard;
2754 while (!Defs.empty() && MadeProgress) {
2755 MadeProgress = false;
2756
2757 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
2758 I != Defs.end(); /*No step*/) {
2759 bool DependenciesSatisfied = true;
2760 for (auto *II : (*I)->getDependencies()) {
2761 if (is_contained(Range&: Defs, Element: II))
2762 DependenciesSatisfied = false;
2763 }
2764 if (!DependenciesSatisfied) {
2765 // Try the next one.
2766 ++I;
2767 continue;
2768 }
2769
2770 // Emit #endif/#if pair if needed.
2771 if ((*I)->getArchGuard() != InGuard) {
2772 if (!InGuard.empty())
2773 OS << "#endif\n";
2774 InGuard = (*I)->getArchGuard();
2775 if (!InGuard.empty())
2776 OS << "#if " << InGuard << "\n";
2777 }
2778
2779 // Actually generate the intrinsic code.
2780 OS << (*I)->generate();
2781
2782 MadeProgress = true;
2783 I = Defs.erase(CI: I);
2784 }
2785 }
2786 assert(Defs.empty() && "Some requirements were not satisfied!");
2787 if (!InGuard.empty())
2788 OS << "#endif\n";
2789
2790 OS << "\n";
2791 OS << "#undef __ai\n\n";
2792
2793 OS << "#endif\n";
2794}
2795
2796void clang::EmitNeon(const RecordKeeper &Records, raw_ostream &OS) {
2797 NeonEmitter(Records).run(OS);
2798}
2799
2800void clang::EmitFP16(const RecordKeeper &Records, raw_ostream &OS) {
2801 NeonEmitter(Records).runFP16(OS);
2802}
2803
2804void clang::EmitBF16(const RecordKeeper &Records, raw_ostream &OS) {
2805 NeonEmitter(Records).runBF16(OS);
2806}
2807
2808void clang::EmitNeonSema(const RecordKeeper &Records, raw_ostream &OS) {
2809 NeonEmitter(Records).runHeader(OS);
2810}
2811
2812void clang::EmitVectorTypes(const RecordKeeper &Records, raw_ostream &OS) {
2813 NeonEmitter(Records).runVectorTypes(OS);
2814}
2815
2816void clang::EmitNeonTest(const RecordKeeper &Records, raw_ostream &OS) {
2817 llvm_unreachable("Neon test generation no longer implemented!");
2818}
2819