LLLexer.cpp source code [llvm_projects/llvm/lib/AsmParser/LLLexer.cpp]

1	//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Implement the Lexer for .ll files.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/AsmParser/LLLexer.h"
14	#include "llvm/ADT/APInt.h"
15	#include "llvm/ADT/STLExtras.h"
16	#include "llvm/ADT/StringExtras.h"
17	#include "llvm/ADT/Twine.h"
18	#include "llvm/IR/DerivedTypes.h"
19	#include "llvm/IR/Instruction.h"
20	#include "llvm/Support/ErrorHandling.h"
21	#include "llvm/Support/SourceMgr.h"
22	#include <cassert>
23	#include <cctype>
24	#include <cstdio>
25
26	using namespace llvm;
27
28	// Both the lexer and parser can issue error messages. If the lexer issues a
29	// lexer error, since we do not terminate execution immediately, usually that
30	// is followed by the parser issuing a parser error. However, the error issued
31	// by the lexer is more relevant in that case as opposed to potentially more
32	// generic parser error. So instead of always recording the last error message
33	// use the `Priority` to establish a priority, with Lexer > Parser > None. We
34	// record the issued message only if the message has same or higher priority
35	// than the existing one. This prevents lexer errors from being overwritten by
36	// parser errors.
37	void LLLexer::Error(LocTy ErrorLoc, const Twine &Msg,
38	LLLexer::ErrorPriority Priority) {
39	if (Priority < ErrorInfo.Priority)
40	return;
41	ErrorInfo.Error = SM.GetMessage(Loc: ErrorLoc, Kind: SourceMgr::DK_Error, Msg);
42	ErrorInfo.Priority = Priority;
43	}
44
45	void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const {
46	SM.PrintMessage(Loc: WarningLoc, Kind: SourceMgr::DK_Warning, Msg);
47	}
48
49	//===----------------------------------------------------------------------===//
50	// Helper functions.
51	//===----------------------------------------------------------------------===//
52
53	// atoull - Convert an ascii string of decimal digits into the unsigned long
54	// long representation... this does not have to do input error checking,
55	// because we know that the input will be matched by a suitable regex...
56	//
57	uint64_t LLLexer::atoull(const char Buffer, const* char *End) {
58	uint64_t Result = `0`;
59	for (; Buffer != End; Buffer++) {
60	uint64_t OldRes = Result;
61	Result *= `10`;
62	Result += *Buffer-`'0'`;
63	if (Result < OldRes) { // overflow detected.
64	LexError(Msg: "constant bigger than 64 bits detected");
65	return `0`;
66	}
67	}
68	return Result;
69	}
70
71	uint64_t LLLexer::HexIntToVal(const char Buffer, const* char *End) {
72	uint64_t Result = `0`;
73	for (; Buffer != End; ++Buffer) {
74	uint64_t OldRes = Result;
75	Result *= `16`;
76	Result += hexDigitValue(C: *Buffer);
77
78	if (Result < OldRes) { // overflow detected.
79	LexError(Msg: "constant bigger than 64 bits detected");
80	return `0`;
81	}
82	}
83	return Result;
84	}
85
86	void LLLexer::HexToIntPair(const char Buffer, const* char *End,
87	uint64_t Pair[`2`]) {
88	Pair[`0`] = `0`;
89	if (End - Buffer >= `16`) {
90	for (int i = `0`; i < `16`; i++, Buffer++) {
91	assert(Buffer != End);
92	Pair[`0`] *= `16`;
93	Pair[`0`] += hexDigitValue(C: *Buffer);
94	}
95	}
96	Pair[`1`] = `0`;
97	for (int i = `0`; i < `16` && Buffer != End; i++, Buffer++) {
98	Pair[`1`] *= `16`;
99	Pair[`1`] += hexDigitValue(C: *Buffer);
100	}
101	if (Buffer != End)
102	LexError(Msg: "constant bigger than 128 bits detected");
103	}
104
105	/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
106	/// { low64, high16 } as usual for an APInt.
107	void LLLexer::FP80HexToIntPair(const char Buffer, const* char *End,
108	uint64_t Pair[`2`]) {
109	Pair[`1`] = `0`;
110	for (int i=`0`; i<`4` && Buffer != End; i++, Buffer++) {
111	assert(Buffer != End);
112	Pair[`1`] *= `16`;
113	Pair[`1`] += hexDigitValue(C: *Buffer);
114	}
115	Pair[`0`] = `0`;
116	for (int i = `0`; i < `16` && Buffer != End; i++, Buffer++) {
117	Pair[`0`] *= `16`;
118	Pair[`0`] += hexDigitValue(C: *Buffer);
119	}
120	if (Buffer != End)
121	LexError(Msg: "constant bigger than 128 bits detected");
122	}
123
124	// UnEscapeLexed - Run through the specified buffer and change \xx codes to the
125	// appropriate character.
126	static void UnEscapeLexed(std::string &Str) {
127	if (Str.empty()) return;
128
129	char Buffer = &Str [`0`], EndBuffer = Buffer+Str.size();
130	char *BOut = Buffer;
131	for (char *BIn = Buffer; BIn != EndBuffer; ) {
132	if (BIn[`0`] == `'\\'`) {
133	if (BIn < EndBuffer-`1` && BIn[`1`] == `'\\'`) {
134	BOut++ = `'\\'`; // Two \ becomes one*
135	BIn += `2`;
136	} else if (BIn < EndBuffer-`2` &&
137	isxdigit(static_cast<unsigned char>(BIn[`1`])) &&
138	isxdigit(static_cast<unsigned char>(BIn[`2`]))) {
139	BOut = hexDigitValue(C: BIn[`1`]) `16` + hexDigitValue(C: BIn[`2`]);
140	BIn += `3`; // Skip over handled chars
141	++BOut;
142	} else {
143	BOut++ = BIn++;
144	}
145	} else {
146	BOut++ = BIn++;
147	}
148	}
149	Str.resize(n: BOut-Buffer);
150	}
151
152	/// isLabelChar - Return true for [-a-zA-Z$._0-9].
153	static bool isLabelChar(char C) {
154	return isalnum(static_cast<unsigned char>(C)) \|\| C == `'-'` \|\| C == `'$'` \|\|
155	C == `'.'` \|\| C == `'_'`;
156	}
157
158	/// isLabelTail - Return true if this pointer points to a valid end of a label.
159	static const char isLabelTail(const* char *CurPtr) {
160	while (true) {
161	if (CurPtr[`0`] == `':'`) return CurPtr+`1`;
162	if (!isLabelChar(C: CurPtr[`0`])) return nullptr;
163	++CurPtr;
164	}
165	}
166
167	//===----------------------------------------------------------------------===//
168	// Lexer definition.
169	//===----------------------------------------------------------------------===//
170
171	LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err,
172	LLVMContext &C)
173	: CurBuf (StartBuf), ErrorInfo (Err), SM(SM), Context(C) {
174	CurPtr = CurBuf.begin();
175	}
176
177	int LLLexer::getNextChar() {
178	char CurChar = *CurPtr++;
179	switch (CurChar) {
180	default: return (unsigned char)CurChar;
181	case `0`:
182	// A nul character in the stream is either the end of the current buffer or
183	// a random nul in the file. Disambiguate that here.
184	if (CurPtr-`1` != CurBuf.end())
185	return `0`; // Just whitespace.
186
187	// Otherwise, return end of file.
188	--CurPtr; // Another call to lex will return EOF again.
189	return EOF;
190	}
191	}
192
193	lltok::Kind LLLexer::LexToken() {
194	// Set token end to next location, since the end is exclusive.
195	PrevTokEnd = CurPtr;
196	while (true) {
197	TokStart = CurPtr;
198
199	int CurChar = getNextChar();
200	switch (CurChar) {
201	default:
202	// Handle letters: [a-zA-Z_]
203	if (isalpha(static_cast<unsigned char>(CurChar)) \|\| CurChar == `'_'`)
204	return LexIdentifier();
205	return lltok::Error;
206	case EOF: return lltok::Eof;
207	case `0`:
208	case `' '`:
209	case `'\t'`:
210	case `'\n'`:
211	case `'\r'`:
212	// Ignore whitespace.
213	continue;
214	case `'+'`: return LexPositive();
215	case `'@'`: return LexAt();
216	case `'$'`: return LexDollar();
217	case `'%'`: return LexPercent();
218	case `'"'`: return LexQuote();
219	case `'.'`:
220	if (const char *Ptr = isLabelTail(CurPtr)) {
221	CurPtr = Ptr;
222	StrVal.assign(first: TokStart, last: CurPtr-`1`);
223	return lltok::LabelStr;
224	}
225	if (CurPtr[`0`] == `'.'` && CurPtr[`1`] == `'.'`) {
226	CurPtr += `2`;
227	return lltok::dotdotdot;
228	}
229	return lltok::Error;
230	case `';'`:
231	SkipLineComment();
232	continue;
233	case `'!'`: return LexExclaim();
234	case `'^'`:
235	return LexCaret();
236	case `':'`:
237	return lltok::colon;
238	case `'#'`: return LexHash();
239	case `'0'`: case `'1'`: case `'2'`: case `'3'`: case `'4'`:
240	case `'5'`: case `'6'`: case `'7'`: case `'8'`: case `'9'`:
241	case `'-'`:
242	return LexDigitOrNegative();
243	case `'='`: return lltok::equal;
244	case `'['`: return lltok::lsquare;
245	case `']'`: return lltok::rsquare;
246	case `'{'`: return lltok::lbrace;
247	case `'}'`: return lltok::rbrace;
248	case `'<'`: return lltok::less;
249	case `'>'`: return lltok::greater;
250	case `'('`: return lltok::lparen;
251	case `')'`: return lltok::rparen;
252	case `','`: return lltok::comma;
253	case `''`: return* lltok::star;
254	case `'\|'`: return lltok::bar;
255	case `'/'`:
256	if (getNextChar() != `'*'`)
257	return lltok::Error;
258	if (SkipCComment())
259	return lltok::Error;
260	continue;
261	}
262	}
263	}
264
265	void LLLexer::SkipLineComment() {
266	while (true) {
267	if (CurPtr[`0`] == `'\n'` \|\| CurPtr[`0`] == `'\r'` \|\| getNextChar() == EOF)
268	return;
269	}
270	}
271
272	/// This skips C-style // comments. Returns true if there
273	/// was an error.
274	bool LLLexer::SkipCComment() {
275	while (true) {
276	int CurChar = getNextChar();
277	switch (CurChar) {
278	case EOF:
279	LexError(Msg: "unterminated comment");
280	return true;
281	case `'*'`:
282	// End of the comment?
283	CurChar = getNextChar();
284	if (CurChar == `'/'`)
285	return false;
286	if (CurChar == EOF) {
287	LexError(Msg: "unterminated comment");
288	return true;
289	}
290	}
291	}
292	}
293
294	/// Lex all tokens that start with an @ character.
295	/// GlobalVar @\"[^\"]\"*
296	/// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]*
297	/// GlobalVarID @[0-9]+
298	lltok::Kind LLLexer::LexAt() {
299	return LexVar(Var: lltok::GlobalVar, VarID: lltok::GlobalID);
300	}
301
302	lltok::Kind LLLexer::LexDollar() {
303	if (const char *Ptr = isLabelTail(CurPtr: TokStart)) {
304	CurPtr = Ptr;
305	StrVal.assign(first: TokStart, last: CurPtr - `1`);
306	return lltok::LabelStr;
307	}
308
309	// Handle DollarStringConstant: $\"[^\"]\"*
310	if (CurPtr[`0`] == `'"'`) {
311	++CurPtr;
312
313	while (true) {
314	int CurChar = getNextChar();
315
316	if (CurChar == EOF) {
317	LexError(Msg: "end of file in COMDAT variable name");
318	return lltok::Error;
319	}
320	if (CurChar == `'"'`) {
321	StrVal.assign(first: TokStart + `2`, last: CurPtr - `1`);
322	UnEscapeLexed(Str&: StrVal);
323	if (StringRef (StrVal).contains(C: `0`)) {
324	LexError(Msg: "NUL character is not allowed in names");
325	return lltok::Error;
326	}
327	return lltok::ComdatVar;
328	}
329	}
330	}
331
332	// Handle ComdatVarName: $[-a-zA-Z$._][-a-zA-Z$._0-9]*
333	if (ReadVarName())
334	return lltok::ComdatVar;
335
336	return lltok::Error;
337	}
338
339	/// ReadString - Read a string until the closing quote.
340	lltok::Kind LLLexer::ReadString(lltok::Kind kind) {
341	const char *Start = CurPtr;
342	while (true) {
343	int CurChar = getNextChar();
344
345	if (CurChar == EOF) {
346	LexError(Msg: "end of file in string constant");
347	return lltok::Error;
348	}
349	if (CurChar == `'"'`) {
350	StrVal.assign(first: Start, last: CurPtr-`1`);
351	UnEscapeLexed(Str&: StrVal);
352	return kind;
353	}
354	}
355	}
356
357	/// ReadVarName - Read the rest of a token containing a variable name.
358	bool LLLexer::ReadVarName() {
359	const char *NameStart = CurPtr;
360	if (isalpha(static_cast<unsigned char>(CurPtr[`0`])) \|\|
361	CurPtr[`0`] == `'-'` \|\| CurPtr[`0`] == `'$'` \|\|
362	CurPtr[`0`] == `'.'` \|\| CurPtr[`0`] == `'_'`) {
363	++CurPtr;
364	while (isalnum(static_cast<unsigned char>(CurPtr[`0`])) \|\|
365	CurPtr[`0`] == `'-'` \|\| CurPtr[`0`] == `'$'` \|\|
366	CurPtr[`0`] == `'.'` \|\| CurPtr[`0`] == `'_'`)
367	++CurPtr;
368
369	StrVal.assign(first: NameStart, last: CurPtr);
370	return true;
371	}
372	return false;
373	}
374
375	// Lex an ID: [0-9]+. On success, the ID is stored in UIntVal and Token is
376	// returned, otherwise the Error token is returned.
377	lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) {
378	if (!isdigit(static_cast<unsigned char>(CurPtr[`0`])))
379	return lltok::Error;
380
381	for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[`0`])); ++CurPtr)
382	/empty/;
383
384	uint64_t Val = atoull(Buffer: TokStart + `1`, End: CurPtr);
385	if ((unsigned)Val != Val)
386	LexError(Msg: "invalid value number (too large)");
387	UIntVal = unsigned(Val);
388	return Token;
389	}
390
391	lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
392	// Handle StringConstant: \"[^\"]\"*
393	if (CurPtr[`0`] == `'"'`) {
394	++CurPtr;
395
396	while (true) {
397	int CurChar = getNextChar();
398
399	if (CurChar == EOF) {
400	LexError(Msg: "end of file in global variable name");
401	return lltok::Error;
402	}
403	if (CurChar == `'"'`) {
404	StrVal.assign(first: TokStart+`2`, last: CurPtr-`1`);
405	UnEscapeLexed(Str&: StrVal);
406	if (StringRef (StrVal).contains(C: `0`)) {
407	LexError(Msg: "NUL character is not allowed in names");
408	return lltok::Error;
409	}
410	return Var;
411	}
412	}
413	}
414
415	// Handle VarName: [-a-zA-Z$._][-a-zA-Z$._0-9]*
416	if (ReadVarName())
417	return Var;
418
419	// Handle VarID: [0-9]+
420	return LexUIntID(Token: VarID);
421	}
422
423	/// Lex all tokens that start with a % character.
424	/// LocalVar ::= %\"[^\"]\"*
425	/// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
426	/// LocalVarID ::= %[0-9]+
427	lltok::Kind LLLexer::LexPercent() {
428	return LexVar(Var: lltok::LocalVar, VarID: lltok::LocalVarID);
429	}
430
431	/// Lex all tokens that start with a " character.
432	/// QuoteLabel "[^"]+":
433	/// StringConstant "[^"]"*
434	lltok::Kind LLLexer::LexQuote() {
435	lltok::Kind kind = ReadString(kind: lltok::StringConstant);
436	if (kind == lltok::Error \|\| kind == lltok::Eof)
437	return kind;
438
439	if (CurPtr[`0`] == `':'`) {
440	++CurPtr;
441	if (StringRef (StrVal).contains(C: `0`)) {
442	LexError(Msg: "NUL character is not allowed in names");
443	kind = lltok::Error;
444	} else {
445	kind = lltok::LabelStr;
446	}
447	}
448
449	return kind;
450	}
451
452	/// Lex all tokens that start with a ! character.
453	/// !foo
454	/// !
455	lltok::Kind LLLexer::LexExclaim() {
456	// Lex a metadata name as a MetadataVar.
457	if (isalpha(static_cast<unsigned char>(CurPtr[`0`])) \|\|
458	CurPtr[`0`] == `'-'` \|\| CurPtr[`0`] == `'$'` \|\|
459	CurPtr[`0`] == `'.'` \|\| CurPtr[`0`] == `'_'` \|\| CurPtr[`0`] == `'\\'`) {
460	++CurPtr;
461	while (isalnum(static_cast<unsigned char>(CurPtr[`0`])) \|\|
462	CurPtr[`0`] == `'-'` \|\| CurPtr[`0`] == `'$'` \|\|
463	CurPtr[`0`] == `'.'` \|\| CurPtr[`0`] == `'_'` \|\| CurPtr[`0`] == `'\\'`)
464	++CurPtr;
465
466	StrVal.assign(first: TokStart+`1`, last: CurPtr); // Skip !
467	UnEscapeLexed(Str&: StrVal);
468	return lltok::MetadataVar;
469	}
470	return lltok::exclaim;
471	}
472
473	/// Lex all tokens that start with a ^ character.
474	/// SummaryID ::= ^[0-9]+
475	lltok::Kind LLLexer::LexCaret() {
476	// Handle SummaryID: ^[0-9]+
477	return LexUIntID(Token: lltok::SummaryID);
478	}
479
480	/// Lex all tokens that start with a # character.
481	/// AttrGrpID ::= #[0-9]+
482	/// Hash ::= #
483	lltok::Kind LLLexer::LexHash() {
484	// Handle AttrGrpID: #[0-9]+
485	if (isdigit(static_cast<unsigned char>(CurPtr[`0`])))
486	return LexUIntID(Token: lltok::AttrGrpID);
487	return lltok::hash;
488	}
489
490	/// Lex a label, integer or byte types, keyword, or hexadecimal integer
491	/// constant.
492	/// Label [-a-zA-Z$._0-9]+:
493	/// ByteType b[0-9]+
494	/// IntegerType i[0-9]+
495	/// Keyword sdiv, float, ...
496	/// HexIntConstant [us]0x[0-9A-Fa-f]+
497	lltok::Kind LLLexer::LexIdentifier() {
498	const char *StartChar = CurPtr;
499	const char IntOrByteIdentifier = CurPtr[-`1`];
500	const char *IntOrByteEnd =
501	(IntOrByteIdentifier == `'i'` \|\| IntOrByteIdentifier == `'b'`) ? nullptr
502	: StartChar;
503	const char KeywordEnd = nullptr*;
504
505	for (; isLabelChar(C: *CurPtr); ++CurPtr) {
506	// If we decide this is a byte or an integer, remember the end of the
507	// sequence.
508	if (!IntOrByteEnd && !isdigit(static_cast<unsigned char>(*CurPtr)))
509	IntOrByteEnd = CurPtr;
510	if (!KeywordEnd && !isalnum(static_cast<unsigned char>(*CurPtr)) &&
511	*CurPtr != `'_'`)
512	KeywordEnd = CurPtr;
513	}
514
515	// If we stopped due to a colon, unless we were directed to ignore it,
516	// this really is a label.
517	if (!IgnoreColonInIdentifiers && *CurPtr == `':'`) {
518	StrVal.assign(first: StartChar-`1`, last: CurPtr++);
519	return lltok::LabelStr;
520	}
521
522	// Otherwise, this wasn't a label. If this was valid as a byte or an integer
523	// type, return it.
524	if (!IntOrByteEnd)
525	IntOrByteEnd = CurPtr;
526	if (IntOrByteEnd != StartChar) {
527	CurPtr = IntOrByteEnd;
528	uint64_t NumBits = atoull(Buffer: StartChar, End: CurPtr);
529	if (NumBits < IntegerType::MIN_INT_BITS \|\|
530	NumBits > IntegerType::MAX_INT_BITS) {
531	LexError(Msg: "bitwidth for integer or byte type out of range");
532	return lltok::Error;
533	}
534	if (IntOrByteIdentifier == `'i'`)
535	TyVal = IntegerType::get(C&: Context, NumBits);
536	else
537	TyVal = ByteType::get(C&: Context, NumBits);
538
539	return lltok::Type;
540	}
541
542	// Otherwise, this was a letter sequence. See which keyword this is.
543	if (!KeywordEnd) KeywordEnd = CurPtr;
544	CurPtr = KeywordEnd;
545	--StartChar;
546	StringRef Keyword(StartChar, CurPtr - StartChar);
547
548	#define KEYWORD(STR) \
549	do { \
550	if (Keyword == #STR) \
551	return lltok::kw_##STR; \
552	} while (false)
553
554	KEYWORD(true); KEYWORD(false);
555	KEYWORD(declare); KEYWORD(define);
556	KEYWORD(global); KEYWORD(constant);
557	KEYWORD(br);
558
559	KEYWORD(dso_local);
560	KEYWORD(dso_preemptable);
561
562	KEYWORD(private);
563	KEYWORD(internal);
564	KEYWORD(available_externally);
565	KEYWORD(linkonce);
566	KEYWORD(linkonce_odr);
567	KEYWORD(weak); // Use as a linkage, and a modifier for "cmpxchg".
568	KEYWORD(weak_odr);
569	KEYWORD(appending);
570	KEYWORD(dllimport);
571	KEYWORD(dllexport);
572	KEYWORD(common);
573	KEYWORD(default);
574	KEYWORD(hidden);
575	KEYWORD(protected);
576	KEYWORD(unnamed_addr);
577	KEYWORD(local_unnamed_addr);
578	KEYWORD(externally_initialized);
579	KEYWORD(extern_weak);
580	KEYWORD(external);
581	KEYWORD(thread_local);
582	KEYWORD(localdynamic);
583	KEYWORD(initialexec);
584	KEYWORD(localexec);
585	KEYWORD(zeroinitializer);
586	KEYWORD(undef);
587	KEYWORD(null);
588	KEYWORD(none);
589	KEYWORD(poison);
590	KEYWORD(to);
591	KEYWORD(caller);
592	KEYWORD(within);
593	KEYWORD(from);
594	KEYWORD(tail);
595	KEYWORD(musttail);
596	KEYWORD(notail);
597	KEYWORD(target);
598	KEYWORD(triple);
599	KEYWORD(source_filename);
600	KEYWORD(unwind);
601	KEYWORD(datalayout);
602	KEYWORD(volatile);
603	KEYWORD(atomic);
604	KEYWORD(unordered);
605	KEYWORD(monotonic);
606	KEYWORD(acquire);
607	KEYWORD(release);
608	KEYWORD(acq_rel);
609	KEYWORD(seq_cst);
610	KEYWORD(syncscope);
611
612	KEYWORD(nnan);
613	KEYWORD(ninf);
614	KEYWORD(nsz);
615	KEYWORD(arcp);
616	KEYWORD(contract);
617	KEYWORD(reassoc);
618	KEYWORD(afn);
619	KEYWORD(fast);
620	KEYWORD(nuw);
621	KEYWORD(nsw);
622	KEYWORD(nusw);
623	KEYWORD(exact);
624	KEYWORD(disjoint);
625	KEYWORD(inbounds);
626	KEYWORD(nneg);
627	KEYWORD(samesign);
628	KEYWORD(inrange);
629	KEYWORD(addrspace);
630	KEYWORD(section);
631	KEYWORD(partition);
632	KEYWORD(code_model);
633	KEYWORD(alias);
634	KEYWORD(ifunc);
635	KEYWORD(module);
636	KEYWORD(asm);
637	KEYWORD(sideeffect);
638	KEYWORD(inteldialect);
639	KEYWORD(gc);
640	KEYWORD(prefix);
641	KEYWORD(prologue);
642	KEYWORD(prefalign);
643
644	KEYWORD(no_sanitize_address);
645	KEYWORD(no_sanitize_hwaddress);
646	KEYWORD(sanitize_address_dyninit);
647
648	KEYWORD(ccc);
649	KEYWORD(fastcc);
650	KEYWORD(coldcc);
651	KEYWORD(cfguard_checkcc);
652	KEYWORD(x86_stdcallcc);
653	KEYWORD(x86_fastcallcc);
654	KEYWORD(x86_thiscallcc);
655	KEYWORD(x86_vectorcallcc);
656	KEYWORD(arm_apcscc);
657	KEYWORD(arm_aapcscc);
658	KEYWORD(arm_aapcs_vfpcc);
659	KEYWORD(aarch64_vector_pcs);
660	KEYWORD(aarch64_sve_vector_pcs);
661	KEYWORD(aarch64_sme_preservemost_from_x0);
662	KEYWORD(aarch64_sme_preservemost_from_x1);
663	KEYWORD(aarch64_sme_preservemost_from_x2);
664	KEYWORD(msp430_intrcc);
665	KEYWORD(avr_intrcc);
666	KEYWORD(avr_signalcc);
667	KEYWORD(ptx_kernel);
668	KEYWORD(ptx_device);
669	KEYWORD(spir_kernel);
670	KEYWORD(spir_func);
671	KEYWORD(intel_ocl_bicc);
672	KEYWORD(x86_64_sysvcc);
673	KEYWORD(win64cc);
674	KEYWORD(x86_regcallcc);
675	KEYWORD(swiftcc);
676	KEYWORD(swifttailcc);
677	KEYWORD(anyregcc);
678	KEYWORD(preserve_mostcc);
679	KEYWORD(preserve_allcc);
680	KEYWORD(preserve_nonecc);
681	KEYWORD(ghccc);
682	KEYWORD(x86_intrcc);
683	KEYWORD(hhvmcc);
684	KEYWORD(hhvm_ccc);
685	KEYWORD(cxx_fast_tlscc);
686	KEYWORD(amdgpu_vs);
687	KEYWORD(amdgpu_ls);
688	KEYWORD(amdgpu_hs);
689	KEYWORD(amdgpu_es);
690	KEYWORD(amdgpu_gs);
691	KEYWORD(amdgpu_ps);
692	KEYWORD(amdgpu_cs);
693	KEYWORD(amdgpu_cs_chain);
694	KEYWORD(amdgpu_cs_chain_preserve);
695	KEYWORD(amdgpu_kernel);
696	KEYWORD(amdgpu_gfx);
697	KEYWORD(amdgpu_gfx_whole_wave);
698	KEYWORD(tailcc);
699	KEYWORD(m68k_rtdcc);
700	KEYWORD(graalcc);
701	KEYWORD(riscv_vector_cc);
702	KEYWORD(riscv_vls_cc);
703	KEYWORD(cheriot_compartmentcallcc);
704	KEYWORD(cheriot_compartmentcalleecc);
705	KEYWORD(cheriot_librarycallcc);
706
707	KEYWORD(cc);
708	KEYWORD(c);
709
710	KEYWORD(attributes);
711	KEYWORD(sync);
712	KEYWORD(async);
713
714	#define GET_ATTR_NAMES
715	#define ATTRIBUTE_ENUM(ENUM_NAME, DISPLAY_NAME) \
716	KEYWORD(DISPLAY_NAME);
717	#include "llvm/IR/Attributes.inc"
718
719	KEYWORD(read);
720	KEYWORD(write);
721	KEYWORD(readwrite);
722	KEYWORD(argmem);
723	KEYWORD(target_mem0);
724	KEYWORD(target_mem1);
725	KEYWORD(inaccessiblemem);
726	KEYWORD(errnomem);
727	KEYWORD(argmemonly);
728	KEYWORD(inaccessiblememonly);
729	KEYWORD(inaccessiblemem_or_argmemonly);
730	KEYWORD(nocapture);
731	KEYWORD(address_is_null);
732	KEYWORD(address);
733	KEYWORD(provenance);
734	KEYWORD(read_provenance);
735
736	// denormal_fpenv attribute
737	KEYWORD(ieee);
738	KEYWORD(preservesign);
739	KEYWORD(positivezero);
740	KEYWORD(dynamic);
741
742	// nofpclass attribute
743	KEYWORD(all);
744	KEYWORD(nan);
745	KEYWORD(snan);
746	KEYWORD(qnan);
747	KEYWORD(inf);
748	// ninf already a keyword
749	KEYWORD(pinf);
750	KEYWORD(norm);
751	KEYWORD(nnorm);
752	KEYWORD(pnorm);
753	// sub already a keyword
754	KEYWORD(nsub);
755	KEYWORD(psub);
756	KEYWORD(zero);
757	KEYWORD(nzero);
758	KEYWORD(pzero);
759
760	KEYWORD(type);
761	KEYWORD(opaque);
762
763	KEYWORD(comdat);
764
765	// Comdat types
766	KEYWORD(any);
767	KEYWORD(exactmatch);
768	KEYWORD(largest);
769	KEYWORD(nodeduplicate);
770	KEYWORD(samesize);
771
772	KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
773	KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
774	KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
775	KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
776
777	KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
778	KEYWORD(umin); KEYWORD(fmax); KEYWORD(fmin);
779	KEYWORD(fmaximum);
780	KEYWORD(fminimum);
781	KEYWORD(uinc_wrap);
782	KEYWORD(udec_wrap);
783	KEYWORD(usub_cond);
784	KEYWORD(usub_sat);
785
786	KEYWORD(splat);
787	KEYWORD(vscale);
788	KEYWORD(x);
789	KEYWORD(blockaddress);
790	KEYWORD(dso_local_equivalent);
791	KEYWORD(no_cfi);
792	KEYWORD(ptrauth);
793
794	// Metadata types.
795	KEYWORD(distinct);
796
797	// Use-list order directives.
798	KEYWORD(uselistorder);
799	KEYWORD(uselistorder_bb);
800
801	KEYWORD(personality);
802	KEYWORD(cleanup);
803	KEYWORD(catch);
804	KEYWORD(filter);
805
806	// Summary index keywords.
807	KEYWORD(path);
808	KEYWORD(hash);
809	KEYWORD(gv);
810	KEYWORD(guid);
811	KEYWORD(name);
812	KEYWORD(summaries);
813	KEYWORD(flags);
814	KEYWORD(blockcount);
815	KEYWORD(linkage);
816	KEYWORD(visibility);
817	KEYWORD(notEligibleToImport);
818	KEYWORD(live);
819	KEYWORD(dsoLocal);
820	KEYWORD(canAutoHide);
821	KEYWORD(importType);
822	KEYWORD(definition);
823	KEYWORD(declaration);
824	KEYWORD(noRenameOnPromotion);
825	KEYWORD(function);
826	KEYWORD(insts);
827	KEYWORD(funcFlags);
828	KEYWORD(readNone);
829	KEYWORD(readOnly);
830	KEYWORD(noRecurse);
831	KEYWORD(returnDoesNotAlias);
832	KEYWORD(noInline);
833	KEYWORD(alwaysInline);
834	KEYWORD(noUnwind);
835	KEYWORD(mayThrow);
836	KEYWORD(hasUnknownCall);
837	KEYWORD(mustBeUnreachable);
838	KEYWORD(calls);
839	KEYWORD(callee);
840	KEYWORD(params);
841	KEYWORD(param);
842	KEYWORD(hotness);
843	KEYWORD(unknown);
844	KEYWORD(critical);
845	// Deprecated, keep in order to support old files.
846	KEYWORD(relbf);
847	KEYWORD(variable);
848	KEYWORD(vTableFuncs);
849	KEYWORD(virtFunc);
850	KEYWORD(aliasee);
851	KEYWORD(refs);
852	KEYWORD(typeIdInfo);
853	KEYWORD(typeTests);
854	KEYWORD(typeTestAssumeVCalls);
855	KEYWORD(typeCheckedLoadVCalls);
856	KEYWORD(typeTestAssumeConstVCalls);
857	KEYWORD(typeCheckedLoadConstVCalls);
858	KEYWORD(vFuncId);
859	KEYWORD(offset);
860	KEYWORD(args);
861	KEYWORD(typeid);
862	KEYWORD(typeidCompatibleVTable);
863	KEYWORD(summary);
864	KEYWORD(typeTestRes);
865	KEYWORD(kind);
866	KEYWORD(unsat);
867	KEYWORD(byteArray);
868	KEYWORD(inline);
869	KEYWORD(single);
870	KEYWORD(allOnes);
871	KEYWORD(sizeM1BitWidth);
872	KEYWORD(alignLog2);
873	KEYWORD(sizeM1);
874	KEYWORD(bitMask);
875	KEYWORD(inlineBits);
876	KEYWORD(vcall_visibility);
877	KEYWORD(wpdResolutions);
878	KEYWORD(wpdRes);
879	KEYWORD(indir);
880	KEYWORD(singleImpl);
881	KEYWORD(branchFunnel);
882	KEYWORD(singleImplName);
883	KEYWORD(resByArg);
884	KEYWORD(byArg);
885	KEYWORD(uniformRetVal);
886	KEYWORD(uniqueRetVal);
887	KEYWORD(virtualConstProp);
888	KEYWORD(info);
889	KEYWORD(byte);
890	KEYWORD(bit);
891	KEYWORD(varFlags);
892	KEYWORD(callsites);
893	KEYWORD(clones);
894	KEYWORD(stackIds);
895	KEYWORD(allocs);
896	KEYWORD(versions);
897	KEYWORD(memProf);
898	KEYWORD(notcold);
899
900	#undef KEYWORD
901
902	// Keywords for types.
903	#define TYPEKEYWORD(STR, LLVMTY) \
904	do { \
905	if (Keyword == STR) { \
906	TyVal = LLVMTY; \
907	return lltok::Type; \
908	} \
909	} while (false)
910
911	TYPEKEYWORD("void", Type::getVoidTy(Context));
912	TYPEKEYWORD("half", Type::getHalfTy(Context));
913	TYPEKEYWORD("bfloat", Type::getBFloatTy(Context));
914	TYPEKEYWORD("float", Type::getFloatTy(Context));
915	TYPEKEYWORD("double", Type::getDoubleTy(Context));
916	TYPEKEYWORD("x86_fp80", Type::getX86_FP80Ty(Context));
917	TYPEKEYWORD("fp128", Type::getFP128Ty(Context));
918	TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
919	TYPEKEYWORD("label", Type::getLabelTy(Context));
920	TYPEKEYWORD("metadata", Type::getMetadataTy(Context));
921	TYPEKEYWORD("x86_amx", Type::getX86_AMXTy(Context));
922	TYPEKEYWORD("token", Type::getTokenTy(Context));
923	TYPEKEYWORD("ptr", PointerType::getUnqual(Context));
924
925	#undef TYPEKEYWORD
926
927	// Keywords for instructions.
928	#define INSTKEYWORD(STR, Enum) \
929	do { \
930	if (Keyword == #STR) { \
931	UIntVal = Instruction::Enum; \
932	return lltok::kw_##STR; \
933	} \
934	} while (false)
935
936	INSTKEYWORD(fneg, FNeg);
937
938	INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd);
939	INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub);
940	INSTKEYWORD(mul, Mul); INSTKEYWORD(fmul, FMul);
941	INSTKEYWORD(udiv, UDiv); INSTKEYWORD(sdiv, SDiv); INSTKEYWORD(fdiv, FDiv);
942	INSTKEYWORD(urem, URem); INSTKEYWORD(srem, SRem); INSTKEYWORD(frem, FRem);
943	INSTKEYWORD(shl, Shl); INSTKEYWORD(lshr, LShr); INSTKEYWORD(ashr, AShr);
944	INSTKEYWORD(and, And); INSTKEYWORD(or, Or); INSTKEYWORD(xor, Xor);
945	INSTKEYWORD(icmp, ICmp); INSTKEYWORD(fcmp, FCmp);
946
947	INSTKEYWORD(phi, PHI);
948	INSTKEYWORD(call, Call);
949	INSTKEYWORD(trunc, Trunc);
950	INSTKEYWORD(zext, ZExt);
951	INSTKEYWORD(sext, SExt);
952	INSTKEYWORD(fptrunc, FPTrunc);
953	INSTKEYWORD(fpext, FPExt);
954	INSTKEYWORD(uitofp, UIToFP);
955	INSTKEYWORD(sitofp, SIToFP);
956	INSTKEYWORD(fptoui, FPToUI);
957	INSTKEYWORD(fptosi, FPToSI);
958	INSTKEYWORD(inttoptr, IntToPtr);
959	INSTKEYWORD(ptrtoaddr, PtrToAddr);
960	INSTKEYWORD(ptrtoint, PtrToInt);
961	INSTKEYWORD(bitcast, BitCast);
962	INSTKEYWORD(addrspacecast, AddrSpaceCast);
963	INSTKEYWORD(select, Select);
964	INSTKEYWORD(va_arg, VAArg);
965	INSTKEYWORD(ret, Ret);
966	INSTKEYWORD(switch, Switch);
967	INSTKEYWORD(indirectbr, IndirectBr);
968	INSTKEYWORD(invoke, Invoke);
969	INSTKEYWORD(resume, Resume);
970	INSTKEYWORD(unreachable, Unreachable);
971	INSTKEYWORD(callbr, CallBr);
972
973	INSTKEYWORD(alloca, Alloca);
974	INSTKEYWORD(load, Load);
975	INSTKEYWORD(store, Store);
976	INSTKEYWORD(cmpxchg, AtomicCmpXchg);
977	INSTKEYWORD(atomicrmw, AtomicRMW);
978	INSTKEYWORD(fence, Fence);
979	INSTKEYWORD(getelementptr, GetElementPtr);
980
981	INSTKEYWORD(extractelement, ExtractElement);
982	INSTKEYWORD(insertelement, InsertElement);
983	INSTKEYWORD(shufflevector, ShuffleVector);
984	INSTKEYWORD(extractvalue, ExtractValue);
985	INSTKEYWORD(insertvalue, InsertValue);
986	INSTKEYWORD(landingpad, LandingPad);
987	INSTKEYWORD(cleanupret, CleanupRet);
988	INSTKEYWORD(catchret, CatchRet);
989	INSTKEYWORD(catchswitch, CatchSwitch);
990	INSTKEYWORD(catchpad, CatchPad);
991	INSTKEYWORD(cleanuppad, CleanupPad);
992
993	INSTKEYWORD(freeze, Freeze);
994
995	#undef INSTKEYWORD
996
997	#define DWKEYWORD(TYPE, TOKEN) \
998	do { \
999	if (Keyword.starts_with("DW_" #TYPE "_")) { \
1000	StrVal.assign(Keyword.begin(), Keyword.end()); \
1001	return lltok::TOKEN; \
1002	} \
1003	} while (false)
1004
1005	DWKEYWORD(TAG, DwarfTag);
1006	DWKEYWORD(ATE, DwarfAttEncoding);
1007	DWKEYWORD(VIRTUALITY, DwarfVirtuality);
1008	DWKEYWORD(LANG, DwarfLang);
1009	DWKEYWORD(LNAME, DwarfSourceLangName);
1010	DWKEYWORD(CC, DwarfCC);
1011	DWKEYWORD(OP, DwarfOp);
1012	DWKEYWORD(MACINFO, DwarfMacinfo);
1013	DWKEYWORD(APPLE_ENUM_KIND, DwarfEnumKind);
1014
1015	#undef DWKEYWORD
1016
1017	// Keywords for debug record types.
1018	#define DBGRECORDTYPEKEYWORD(STR) \
1019	do { \
1020	if (Keyword == "dbg_" #STR) { \
1021	StrVal = #STR; \
1022	return lltok::DbgRecordType; \
1023	} \
1024	} while (false)
1025
1026	DBGRECORDTYPEKEYWORD(value);
1027	DBGRECORDTYPEKEYWORD(declare);
1028	DBGRECORDTYPEKEYWORD(assign);
1029	DBGRECORDTYPEKEYWORD(label);
1030	DBGRECORDTYPEKEYWORD(declare_value);
1031	#undef DBGRECORDTYPEKEYWORD
1032
1033	if (Keyword.starts_with(Prefix: "DIFlag")) {
1034	StrVal.assign(first: Keyword.begin(), last: Keyword.end());
1035	return lltok::DIFlag;
1036	}
1037
1038	if (Keyword.starts_with(Prefix: "DISPFlag")) {
1039	StrVal.assign(first: Keyword.begin(), last: Keyword.end());
1040	return lltok::DISPFlag;
1041	}
1042
1043	if (Keyword.starts_with(Prefix: "CSK_")) {
1044	StrVal.assign(first: Keyword.begin(), last: Keyword.end());
1045	return lltok::ChecksumKind;
1046	}
1047
1048	if (Keyword == "NoDebug" \|\| Keyword == "FullDebug" \|\|
1049	Keyword == "LineTablesOnly" \|\| Keyword == "DebugDirectivesOnly") {
1050	StrVal.assign(first: Keyword.begin(), last: Keyword.end());
1051	return lltok::EmissionKind;
1052	}
1053
1054	if (Keyword == "GNU" \|\| Keyword == "Apple" \|\| Keyword == "None" \|\|
1055	Keyword == "Default") {
1056	StrVal.assign(first: Keyword.begin(), last: Keyword.end());
1057	return lltok::NameTableKind;
1058	}
1059
1060	if (Keyword == "Binary" \|\| Keyword == "Decimal" \|\| Keyword == "Rational") {
1061	StrVal.assign(first: Keyword.begin(), last: Keyword.end());
1062	return lltok::FixedPointKind;
1063	}
1064
1065	// Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
1066	// the CFE to avoid forcing it to deal with 64-bit numbers.
1067	if ((TokStart[`0`] == `'u'` \|\| TokStart[`0`] == `'s'`) &&
1068	TokStart[`1`] == `'0'` && TokStart[`2`] == `'x'` &&
1069	isxdigit(static_cast<unsigned char>(TokStart[`3`]))) {
1070	int len = CurPtr-TokStart-`3`;
1071	uint32_t bits = len * `4`;
1072	StringRef HexStr(TokStart + `3`, len);
1073	if (!all_of(Range&: HexStr, P: isxdigit)) {
1074	// Bad token, return it as an error.
1075	CurPtr = TokStart+`3`;
1076	return lltok::Error;
1077	}
1078	APInt Tmp(bits, HexStr, `16`);
1079	uint32_t activeBits = Tmp.getActiveBits();
1080	if (activeBits > `0` && activeBits < bits)
1081	Tmp = Tmp.trunc(width: activeBits);
1082	APSIntVal = APSInt (Tmp, TokStart[`0`] == `'u'`);
1083	return lltok::APSInt;
1084	}
1085
1086	// If this is "cc1234", return this as just "cc".
1087	if (TokStart[`0`] == `'c'` && TokStart[`1`] == `'c'`) {
1088	CurPtr = TokStart+`2`;
1089	return lltok::kw_cc;
1090	}
1091
1092	// Finally, if this isn't known, return an error.
1093	CurPtr = TokStart+`1`;
1094	return lltok::Error;
1095	}
1096
1097	/// Lex all tokens that start with a 0x prefix, knowing they match and are not
1098	/// labels.
1099	/// HexFPConstant 0x[0-9A-Fa-f]+
1100	/// HexFP80Constant 0xK[0-9A-Fa-f]+
1101	/// HexFP128Constant 0xL[0-9A-Fa-f]+
1102	/// HexPPC128Constant 0xM[0-9A-Fa-f]+
1103	/// HexHalfConstant 0xH[0-9A-Fa-f]+
1104	/// HexBFloatConstant 0xR[0-9A-Fa-f]+
1105	lltok::Kind LLLexer::Lex0x() {
1106	CurPtr = TokStart + `2`;
1107
1108	char Kind;
1109	if ((CurPtr[`0`] >= `'K'` && CurPtr[`0`] <= `'M'`) \|\| CurPtr[`0`] == `'H'` \|\|
1110	CurPtr[`0`] == `'R'`) {
1111	Kind = *CurPtr++;
1112	} else {
1113	Kind = `'J'`;
1114	}
1115
1116	if (!isxdigit(static_cast<unsigned char>(CurPtr[`0`]))) {
1117	// Bad token, return it as an error.
1118	CurPtr = TokStart+`1`;
1119	return lltok::Error;
1120	}
1121
1122	while (isxdigit(static_cast<unsigned char>(CurPtr[`0`])))
1123	++CurPtr;
1124
1125	if (Kind == `'J'`) {
1126	// HexFPConstant - Floating point constant represented in IEEE format as a
1127	// hexadecimal number for when exponential notation is not precise enough.
1128	// Half, BFloat, Float, and double only.
1129	APFloatVal = APFloat (APFloat::IEEEdouble(),
1130	APInt (`64`, HexIntToVal(Buffer: TokStart + `2`, End: CurPtr)));
1131	return lltok::APFloat;
1132	}
1133
1134	uint64_t Pair[`2`];
1135	switch (Kind) {
1136	default: llvm_unreachable("Unknown kind!");
1137	case `'K'`:
1138	// F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
1139	FP80HexToIntPair(Buffer: TokStart+`3`, End: CurPtr, Pair);
1140	APFloatVal = APFloat (APFloat::x87DoubleExtended(), APInt (`80`, Pair));
1141	return lltok::APFloat;
1142	case `'L'`:
1143	// F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
1144	HexToIntPair(Buffer: TokStart+`3`, End: CurPtr, Pair);
1145	APFloatVal = APFloat (APFloat::IEEEquad(), APInt (`128`, Pair));
1146	return lltok::APFloat;
1147	case `'M'`:
1148	// PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
1149	HexToIntPair(Buffer: TokStart+`3`, End: CurPtr, Pair);
1150	APFloatVal = APFloat (APFloat::PPCDoubleDouble(), APInt (`128`, Pair));
1151	return lltok::APFloat;
1152	case `'H'`: {
1153	uint64_t Val = HexIntToVal(Buffer: TokStart + `3`, End: CurPtr);
1154	if (!llvm::isUInt<`16`>(x: Val)) {
1155	LexError(Msg: "hexadecimal constant too large for half (16-bit)");
1156	return lltok::Error;
1157	}
1158	APFloatVal = APFloat (APFloat::IEEEhalf(), APInt (`16`, Val));
1159	return lltok::APFloat;
1160	}
1161	case `'R'`: {
1162	// Brain floating point
1163	uint64_t Val = HexIntToVal(Buffer: TokStart + `3`, End: CurPtr);
1164	if (!llvm::isUInt<`16`>(x: Val)) {
1165	LexError(Msg: "hexadecimal constant too large for bfloat (16-bit)");
1166	return lltok::Error;
1167	}
1168	APFloatVal = APFloat (APFloat::BFloat(), APInt (`16`, Val));
1169	return lltok::APFloat;
1170	}
1171	}
1172	}
1173
1174	/// Lex tokens for a label or a numeric constant, possibly starting with -.
1175	/// Label [-a-zA-Z$._0-9]+:
1176	/// NInteger -[0-9]+
1177	/// FPConstant [-+]?[0-9]+[.][0-9]([eE][-+]?[0-9]+)?*
1178	/// PInteger [0-9]+
1179	/// HexFPConstant 0x[0-9A-Fa-f]+
1180	/// HexFP80Constant 0xK[0-9A-Fa-f]+
1181	/// HexFP128Constant 0xL[0-9A-Fa-f]+
1182	/// HexPPC128Constant 0xM[0-9A-Fa-f]+
1183	lltok::Kind LLLexer::LexDigitOrNegative() {
1184	// If the letter after the negative is not a number, this is probably a label.
1185	if (!isdigit(static_cast<unsigned char>(TokStart[`0`])) &&
1186	!isdigit(static_cast<unsigned char>(CurPtr[`0`]))) {
1187	// Okay, this is not a number after the -, it's probably a label.
1188	if (const char *End = isLabelTail(CurPtr)) {
1189	StrVal.assign(first: TokStart, last: End-`1`);
1190	CurPtr = End;
1191	return lltok::LabelStr;
1192	}
1193
1194	return lltok::Error;
1195	}
1196
1197	// At this point, it is either a label, int or fp constant.
1198
1199	// Skip digits, we have at least one.
1200	for (; isdigit(static_cast<unsigned char>(CurPtr[`0`])); ++CurPtr)
1201	/empty/;
1202
1203	// Check if this is a fully-numeric label:
1204	if (isdigit(TokStart[`0`]) && CurPtr[`0`] == `':'`) {
1205	uint64_t Val = atoull(Buffer: TokStart, End: CurPtr);
1206	++CurPtr; // Skip the colon.
1207	if ((unsigned)Val != Val)
1208	LexError(Msg: "invalid value number (too large)");
1209	UIntVal = unsigned(Val);
1210	return lltok::LabelID;
1211	}
1212
1213	// Check to see if this really is a string label, e.g. "-1:".
1214	if (isLabelChar(C: CurPtr[`0`]) \|\| CurPtr[`0`] == `':'`) {
1215	if (const char *End = isLabelTail(CurPtr)) {
1216	StrVal.assign(first: TokStart, last: End-`1`);
1217	CurPtr = End;
1218	return lltok::LabelStr;
1219	}
1220	}
1221
1222	// If the next character is a '.', then it is a fp value, otherwise its
1223	// integer.
1224	if (CurPtr[`0`] != `'.'`) {
1225	if (TokStart[`0`] == `'0'` && TokStart[`1`] == `'x'`)
1226	return Lex0x();
1227	APSIntVal = APSInt (StringRef (TokStart, CurPtr - TokStart));
1228	return lltok::APSInt;
1229	}
1230
1231	++CurPtr;
1232
1233	// Skip over [0-9]([eE][-+]?[0-9]+)?*
1234	while (isdigit(static_cast<unsigned char>(CurPtr[`0`]))) ++CurPtr;
1235
1236	if (CurPtr[`0`] == `'e'` \|\| CurPtr[`0`] == `'E'`) {
1237	if (isdigit(static_cast<unsigned char>(CurPtr[`1`])) \|\|
1238	((CurPtr[`1`] == `'-'` \|\| CurPtr[`1`] == `'+'`) &&
1239	isdigit(static_cast<unsigned char>(CurPtr[`2`])))) {
1240	CurPtr += `2`;
1241	while (isdigit(static_cast<unsigned char>(CurPtr[`0`]))) ++CurPtr;
1242	}
1243	}
1244
1245	APFloatVal = APFloat (APFloat::IEEEdouble(),
1246	StringRef (TokStart, CurPtr - TokStart));
1247	return lltok::APFloat;
1248	}
1249
1250	/// Lex a floating point constant starting with +.
1251	/// FPConstant [-+]?[0-9]+[.][0-9]([eE][-+]?[0-9]+)?*
1252	lltok::Kind LLLexer::LexPositive() {
1253	// If the letter after the negative is a number, this is probably not a
1254	// label.
1255	if (!isdigit(static_cast<unsigned char>(CurPtr[`0`])))
1256	return lltok::Error;
1257
1258	// Skip digits.
1259	for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[`0`])); ++CurPtr)
1260	/empty/;
1261
1262	// At this point, we need a '.'.
1263	if (CurPtr[`0`] != `'.'`) {
1264	CurPtr = TokStart+`1`;
1265	return lltok::Error;
1266	}
1267
1268	++CurPtr;
1269
1270	// Skip over [0-9]([eE][-+]?[0-9]+)?*
1271	while (isdigit(static_cast<unsigned char>(CurPtr[`0`]))) ++CurPtr;
1272
1273	if (CurPtr[`0`] == `'e'` \|\| CurPtr[`0`] == `'E'`) {
1274	if (isdigit(static_cast<unsigned char>(CurPtr[`1`])) \|\|
1275	((CurPtr[`1`] == `'-'` \|\| CurPtr[`1`] == `'+'`) &&
1276	isdigit(static_cast<unsigned char>(CurPtr[`2`])))) {
1277	CurPtr += `2`;
1278	while (isdigit(static_cast<unsigned char>(CurPtr[`0`]))) ++CurPtr;
1279	}
1280	}
1281
1282	APFloatVal = APFloat (APFloat::IEEEdouble(),
1283	StringRef (TokStart, CurPtr - TokStart));
1284	return lltok::APFloat;
1285	}
1286

Browse the source code of llvm_projects/llvm/lib/AsmParser/LLLexer.cpp