1//===- IdentifierTable.cpp - Hash table for identifier lookup -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the IdentifierInfo, IdentifierVisitor, and
10// IdentifierTable interfaces.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/IdentifierTable.h"
15#include "clang/Basic/CharInfo.h"
16#include "clang/Basic/DiagnosticLex.h"
17#include "clang/Basic/LangOptions.h"
18#include "clang/Basic/OperatorKinds.h"
19#include "clang/Basic/Specifiers.h"
20#include "clang/Basic/TargetBuiltins.h"
21#include "clang/Basic/TokenKinds.h"
22#include "llvm/ADT/DenseMapInfo.h"
23#include "llvm/ADT/FoldingSet.h"
24#include "llvm/ADT/StringMap.h"
25#include "llvm/ADT/StringRef.h"
26#include "llvm/Support/Allocator.h"
27#include "llvm/Support/raw_ostream.h"
28#include <cassert>
29#include <cstdio>
30#include <cstring>
31#include <string>
32
33using namespace clang;
34
35// A check to make sure the ObjCOrBuiltinID has sufficient room to store the
36// largest possible target/aux-target combination. If we exceed this, we likely
37// need to just change the ObjCOrBuiltinIDBits value in IdentifierTable.h.
38static_assert(2 * LargestBuiltinID < (2 << (InterestingIdentifierBits - 1)),
39 "Insufficient ObjCOrBuiltinID Bits");
40
41//===----------------------------------------------------------------------===//
42// IdentifierTable Implementation
43//===----------------------------------------------------------------------===//
44
45IdentifierIterator::~IdentifierIterator() = default;
46
47IdentifierInfoLookup::~IdentifierInfoLookup() = default;
48
49namespace {
50
51/// A simple identifier lookup iterator that represents an
52/// empty sequence of identifiers.
53class EmptyLookupIterator : public IdentifierIterator {
54public:
55 StringRef Next() override { return StringRef(); }
56};
57
58} // namespace
59
60IdentifierIterator *IdentifierInfoLookup::getIdentifiers() {
61 return new EmptyLookupIterator();
62}
63
64IdentifierTable::IdentifierTable(IdentifierInfoLookup *ExternalLookup)
65 : HashTable(8192), // Start with space for 8K identifiers.
66 ExternalLookup(ExternalLookup) {}
67
68IdentifierTable::IdentifierTable(const LangOptions &LangOpts,
69 IdentifierInfoLookup *ExternalLookup)
70 : IdentifierTable(ExternalLookup) {
71 // Populate the identifier table with info about keywords for the current
72 // language.
73 AddKeywords(LangOpts);
74}
75
76//===----------------------------------------------------------------------===//
77// Language Keyword Implementation
78//===----------------------------------------------------------------------===//
79
80// Constants for TokenKinds.def
81namespace {
82
83enum TokenKey : unsigned {
84 KEYC99 = 0x1,
85 KEYCXX = 0x2,
86 KEYCXX11 = 0x4,
87 KEYGNU = 0x8,
88 KEYMS = 0x10,
89 BOOLSUPPORT = 0x20,
90 KEYALTIVEC = 0x40,
91 KEYNOCXX = 0x80,
92 KEYBORLAND = 0x100,
93 KEYOPENCLC = 0x200,
94 KEYC23 = 0x400,
95 KEYNOMS18 = 0x800,
96 KEYNOOPENCL = 0x1000,
97 WCHARSUPPORT = 0x2000,
98 HALFSUPPORT = 0x4000,
99 CHAR8SUPPORT = 0x8000,
100 KEYOBJC = 0x10000,
101 KEYZVECTOR = 0x20000,
102 KEYCOROUTINES = 0x40000,
103 KEYMODULES = 0x80000,
104 KEYCXX20 = 0x100000,
105 KEYOPENCLCXX = 0x200000,
106 KEYMSCOMPAT = 0x400000,
107 KEYSYCL = 0x800000,
108 KEYCUDA = 0x1000000,
109 KEYZOS = 0x2000000,
110 KEYNOZOS = 0x4000000,
111 KEYHLSL = 0x8000000,
112 KEYFIXEDPOINT = 0x10000000,
113 KEYMAX = KEYFIXEDPOINT, // The maximum key
114 KEYALLCXX = KEYCXX | KEYCXX11 | KEYCXX20,
115 KEYALL = (KEYMAX | (KEYMAX - 1)) & ~KEYNOMS18 & ~KEYNOOPENCL &
116 ~KEYNOZOS // KEYNOMS18, KEYNOOPENCL, KEYNOZOS are excluded.
117};
118
119/// How a keyword is treated in the selected standard. This enum is ordered
120/// intentionally so that the value that 'wins' is the most 'permissive'.
121enum KeywordStatus {
122 KS_Unknown, // Not yet calculated. Used when figuring out the status.
123 KS_Disabled, // Disabled
124 KS_Future, // Is a keyword in future standard
125 KS_Extension, // Is an extension
126 KS_Enabled, // Enabled
127};
128
129} // namespace
130
131// This works on a single TokenKey flag and checks the LangOpts to get the
132// KeywordStatus based exclusively on this flag, so that it can be merged in
133// getKeywordStatus. Most should be enabled/disabled, but some might imply
134// 'future' versions, or extensions. Returns 'unknown' unless this is KNOWN to
135// be disabled, and the calling function makes it 'disabled' if no other flag
136// changes it. This is necessary for the KEYNOCXX and KEYNOOPENCL flags.
137static KeywordStatus getKeywordStatusHelper(const LangOptions &LangOpts,
138 TokenKey Flag) {
139 // Flag is a single bit version of TokenKey (that is, not
140 // KEYALL/KEYALLCXX/etc), so we can check with == throughout this function.
141 assert((Flag & ~(Flag - 1)) == Flag && "Multiple bits set?");
142
143 switch (Flag) {
144 case KEYC99:
145 if (LangOpts.C99)
146 return KS_Enabled;
147 return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
148 case KEYC23:
149 if (LangOpts.C23)
150 return KS_Enabled;
151 return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
152 case KEYCXX:
153 return LangOpts.CPlusPlus ? KS_Enabled : KS_Unknown;
154 case KEYCXX11:
155 if (LangOpts.CPlusPlus11)
156 return KS_Enabled;
157 return LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
158 case KEYCXX20:
159 if (LangOpts.CPlusPlus20)
160 return KS_Enabled;
161 return LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
162 case KEYGNU:
163 return LangOpts.GNUKeywords ? KS_Extension : KS_Unknown;
164 case KEYMS:
165 return LangOpts.MicrosoftExt ? KS_Extension : KS_Unknown;
166 case BOOLSUPPORT:
167 if (LangOpts.Bool) return KS_Enabled;
168 return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
169 case KEYALTIVEC:
170 return LangOpts.AltiVec ? KS_Enabled : KS_Unknown;
171 case KEYBORLAND:
172 return LangOpts.Borland ? KS_Extension : KS_Unknown;
173 case KEYOPENCLC:
174 return LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus ? KS_Enabled
175 : KS_Unknown;
176 case WCHARSUPPORT:
177 return LangOpts.WChar ? KS_Enabled : KS_Unknown;
178 case HALFSUPPORT:
179 return LangOpts.Half ? KS_Enabled : KS_Unknown;
180 case CHAR8SUPPORT:
181 if (LangOpts.Char8) return KS_Enabled;
182 if (LangOpts.CPlusPlus20) return KS_Unknown;
183 if (LangOpts.CPlusPlus) return KS_Future;
184 return KS_Unknown;
185 case KEYOBJC:
186 // We treat bridge casts as objective-C keywords so we can warn on them
187 // in non-arc mode.
188 return LangOpts.ObjC ? KS_Enabled : KS_Unknown;
189 case KEYZVECTOR:
190 return LangOpts.ZVector ? KS_Enabled : KS_Unknown;
191 case KEYCOROUTINES:
192 return LangOpts.Coroutines ? KS_Enabled : KS_Unknown;
193 case KEYMODULES:
194 return KS_Unknown;
195 case KEYOPENCLCXX:
196 return LangOpts.OpenCLCPlusPlus ? KS_Enabled : KS_Unknown;
197 case KEYMSCOMPAT:
198 return LangOpts.MSVCCompat ? KS_Enabled : KS_Unknown;
199 case KEYSYCL:
200 return LangOpts.isSYCL() ? KS_Enabled : KS_Unknown;
201 case KEYCUDA:
202 return LangOpts.CUDA ? KS_Enabled : KS_Unknown;
203 case KEYZOS:
204 return LangOpts.ZOSExt ? KS_Enabled : KS_Unknown;
205 case KEYHLSL:
206 return LangOpts.HLSL ? KS_Enabled : KS_Unknown;
207 case KEYNOCXX:
208 // This is enabled in all non-C++ modes, but might be enabled for other
209 // reasons as well.
210 return LangOpts.CPlusPlus ? KS_Unknown : KS_Enabled;
211 case KEYNOOPENCL:
212 case KEYNOMS18:
213 case KEYNOZOS:
214 // The disable behavior for this is handled in getKeywordStatus.
215 return KS_Unknown;
216 case KEYFIXEDPOINT:
217 return LangOpts.FixedPoint ? KS_Enabled : KS_Disabled;
218 default:
219 llvm_unreachable("Unknown KeywordStatus flag");
220 }
221}
222
223/// Translates flags as specified in TokenKinds.def into keyword status
224/// in the given language standard.
225static KeywordStatus getKeywordStatus(const LangOptions &LangOpts,
226 unsigned Flags) {
227 // KEYALL means always enabled, so special case this one.
228 if (Flags == KEYALL) return KS_Enabled;
229 // These are tests that need to 'always win', as they are special in that they
230 // disable based on certain conditions.
231 if (LangOpts.OpenCL && (Flags & KEYNOOPENCL)) return KS_Disabled;
232 if (LangOpts.MSVCCompat && (Flags & KEYNOMS18) &&
233 !LangOpts.isCompatibleWithMSVC(MajorVersion: LangOptions::MSVC2015))
234 return KS_Disabled;
235 if (LangOpts.ZOSExt && (Flags & KEYNOZOS))
236 return KS_Disabled;
237 KeywordStatus CurStatus = KS_Unknown;
238
239 while (Flags != 0) {
240 unsigned CurFlag = Flags & ~(Flags - 1);
241 Flags = Flags & ~CurFlag;
242 CurStatus = std::max(
243 a: CurStatus,
244 b: getKeywordStatusHelper(LangOpts, Flag: static_cast<TokenKey>(CurFlag)));
245 }
246
247 if (CurStatus == KS_Unknown)
248 return KS_Disabled;
249 return CurStatus;
250}
251
252static bool IsKeywordInCpp(unsigned Flags) {
253 return (Flags & (KEYCXX | KEYCXX11 | KEYCXX20 | BOOLSUPPORT | WCHARSUPPORT |
254 CHAR8SUPPORT)) != 0;
255}
256
257static void MarkIdentifierAsKeywordInCpp(IdentifierTable &Table,
258 StringRef Name) {
259 IdentifierInfo &II = Table.get(Name, TokenCode: tok::identifier);
260 II.setIsKeywordInCPlusPlus();
261 II.setHandleIdentifierCase();
262}
263
264/// AddKeyword - This method is used to associate a token ID with specific
265/// identifiers because they are language keywords. This causes the lexer to
266/// automatically map matching identifiers to specialized token codes.
267static void AddKeyword(StringRef Keyword,
268 tok::TokenKind TokenCode, unsigned Flags,
269 const LangOptions &LangOpts, IdentifierTable &Table) {
270 KeywordStatus AddResult = getKeywordStatus(LangOpts, Flags);
271
272 // Don't add this keyword if disabled in this language and isn't otherwise
273 // special.
274 if (AddResult == KS_Disabled) {
275 // We do not consider any identifiers to be C++ keywords when in
276 // Objective-C because @ effectively introduces a custom grammar where C++
277 // keywords can be used (and similar for selectors). We could enable this
278 // for Objective-C, but it would require more logic to ensure we do not
279 // issue compatibility diagnostics in these cases.
280 if (!LangOpts.ObjC && IsKeywordInCpp(Flags))
281 MarkIdentifierAsKeywordInCpp(Table, Name: Keyword);
282 return;
283 }
284
285 IdentifierInfo &Info =
286 Table.get(Name: Keyword, TokenCode: AddResult == KS_Future ? tok::identifier : TokenCode);
287 Info.setIsExtensionToken(AddResult == KS_Extension);
288 Info.setIsFutureCompatKeyword(AddResult == KS_Future);
289}
290
291/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
292/// representations.
293static void AddCXXOperatorKeyword(StringRef Keyword,
294 tok::TokenKind TokenCode,
295 IdentifierTable &Table) {
296 IdentifierInfo &Info = Table.get(Name: Keyword, TokenCode);
297 Info.setIsCPlusPlusOperatorKeyword();
298}
299
300/// AddObjCKeyword - Register an Objective-C \@keyword like "class" "selector"
301/// or "property".
302static void AddObjCKeyword(StringRef Name,
303 tok::ObjCKeywordKind ObjCID,
304 IdentifierTable &Table) {
305 Table.get(Name).setObjCKeywordID(ObjCID);
306}
307
308static void AddNotableIdentifier(StringRef Name,
309 tok::NotableIdentifierKind BTID,
310 IdentifierTable &Table) {
311 // Don't add 'not_notable' identifier.
312 if (BTID != tok::not_notable) {
313 IdentifierInfo &Info = Table.get(Name, TokenCode: tok::identifier);
314 Info.setNotableIdentifierID(BTID);
315 }
316}
317
318/// AddKeywords - Add all keywords to the symbol table.
319///
320void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
321 // Add keywords and tokens for the current language.
322#define KEYWORD(NAME, FLAGS) \
323 AddKeyword(StringRef(#NAME), tok::kw_ ## NAME, \
324 FLAGS, LangOpts, *this);
325#define ALIAS(NAME, TOK, FLAGS) \
326 AddKeyword(StringRef(NAME), tok::kw_ ## TOK, \
327 FLAGS, LangOpts, *this);
328#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
329 if (LangOpts.CXXOperatorNames) \
330 AddCXXOperatorKeyword(StringRef(#NAME), tok::ALIAS, *this); \
331 else \
332 MarkIdentifierAsKeywordInCpp(*this, StringRef(#NAME));
333#define OBJC_AT_KEYWORD(NAME) \
334 if (LangOpts.ObjC) \
335 AddObjCKeyword(StringRef(#NAME), tok::objc_##NAME, *this);
336#define NOTABLE_IDENTIFIER(NAME) \
337 AddNotableIdentifier(StringRef(#NAME), tok::NAME, *this);
338
339#define TESTING_KEYWORD(NAME, FLAGS)
340#include "clang/Basic/TokenKinds.def"
341
342 if (LangOpts.ParseUnknownAnytype)
343 AddKeyword(Keyword: "__unknown_anytype", TokenCode: tok::kw___unknown_anytype, Flags: KEYALL,
344 LangOpts, Table&: *this);
345
346 if (LangOpts.DeclSpecKeyword)
347 AddKeyword(Keyword: "__declspec", TokenCode: tok::kw___declspec, Flags: KEYALL, LangOpts, Table&: *this);
348
349 if (LangOpts.IEEE128)
350 AddKeyword(Keyword: "__ieee128", TokenCode: tok::kw___float128, Flags: KEYALL, LangOpts, Table&: *this);
351
352 // Add the 'import' contextual keyword.
353 get(Name: "import").setModulesImport(true);
354}
355
356/// Checks if the specified token kind represents a keyword in the
357/// specified language.
358/// \returns Status of the keyword in the language.
359static KeywordStatus getTokenKwStatus(const LangOptions &LangOpts,
360 tok::TokenKind K) {
361 switch (K) {
362#define KEYWORD(NAME, FLAGS) \
363 case tok::kw_##NAME: return getKeywordStatus(LangOpts, FLAGS);
364#include "clang/Basic/TokenKinds.def"
365 default: return KS_Disabled;
366 }
367}
368
369/// Returns true if the identifier represents a keyword in the
370/// specified language.
371bool IdentifierInfo::isKeyword(const LangOptions &LangOpts) const {
372 switch (getTokenKwStatus(LangOpts, K: getTokenID())) {
373 case KS_Enabled:
374 case KS_Extension:
375 return true;
376 default:
377 return false;
378 }
379}
380
381/// Returns true if the identifier represents a C++ keyword in the
382/// specified language.
383bool IdentifierInfo::isCPlusPlusKeyword(const LangOptions &LangOpts) const {
384 if (!LangOpts.CPlusPlus || !isKeyword(LangOpts))
385 return false;
386 // This is a C++ keyword if this identifier is not a keyword when checked
387 // using LangOptions without C++ support.
388 LangOptions LangOptsNoCPP = LangOpts;
389 LangOptsNoCPP.CPlusPlus = false;
390 LangOptsNoCPP.CPlusPlus11 = false;
391 LangOptsNoCPP.CPlusPlus20 = false;
392 return !isKeyword(LangOpts: LangOptsNoCPP);
393}
394
395ReservedIdentifierStatus
396IdentifierInfo::isReserved(const LangOptions &LangOpts) const {
397 StringRef Name = getName();
398
399 // '_' is a reserved identifier, but its use is so common (e.g. to store
400 // ignored values) that we don't warn on it.
401 if (Name.size() <= 1)
402 return ReservedIdentifierStatus::NotReserved;
403
404 // [lex.name] p3
405 if (Name[0] == '_') {
406
407 // Each name that begins with an underscore followed by an uppercase letter
408 // or another underscore is reserved.
409 if (Name[1] == '_')
410 return ReservedIdentifierStatus::StartsWithDoubleUnderscore;
411
412 if ('A' <= Name[1] && Name[1] <= 'Z')
413 return ReservedIdentifierStatus::
414 StartsWithUnderscoreFollowedByCapitalLetter;
415
416 // This is a bit misleading: it actually means it's only reserved if we're
417 // at global scope because it starts with an underscore.
418 return ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope;
419 }
420
421 // Each name that contains a double underscore (__) is reserved.
422 if (LangOpts.CPlusPlus && Name.contains(Other: "__"))
423 return ReservedIdentifierStatus::ContainsDoubleUnderscore;
424
425 return ReservedIdentifierStatus::NotReserved;
426}
427
428ReservedLiteralSuffixIdStatus
429IdentifierInfo::isReservedLiteralSuffixId() const {
430 StringRef Name = getName();
431
432 // Note: the diag::warn_deprecated_literal_operator_id diagnostic depends on
433 // this being the first check we do, so if this order changes, we have to fix
434 // that as well.
435 if (Name[0] != '_')
436 return ReservedLiteralSuffixIdStatus::NotStartsWithUnderscore;
437
438 if (Name.contains(Other: "__"))
439 return ReservedLiteralSuffixIdStatus::ContainsDoubleUnderscore;
440
441 return ReservedLiteralSuffixIdStatus::NotReserved;
442}
443
444StringRef IdentifierInfo::deuglifiedName() const {
445 StringRef Name = getName();
446 if (Name.size() >= 2 && Name.front() == '_' &&
447 (Name[1] == '_' || (Name[1] >= 'A' && Name[1] <= 'Z')))
448 return Name.ltrim(Char: '_');
449 return Name;
450}
451
452tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
453 // We use a perfect hash function here involving the length of the keyword,
454 // the first and third character. For preprocessor ID's there are no
455 // collisions (if there were, the switch below would complain about duplicate
456 // case values). Note that this depends on 'if' being null terminated.
457
458#define HASH(LEN, FIRST, THIRD) \
459 (LEN << 6) + (((FIRST - 'a') - (THIRD - 'a')) & 63)
460#define CASE(LEN, FIRST, THIRD, NAME) \
461 case HASH(LEN, FIRST, THIRD): \
462 return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
463
464 unsigned Len = getLength();
465 if (Len < 2) return tok::pp_not_keyword;
466 const char *Name = getNameStart();
467 switch (HASH(Len, Name[0], Name[2])) {
468 default: return tok::pp_not_keyword;
469 CASE( 2, 'i', '\0', if);
470 CASE( 4, 'e', 'i', elif);
471 CASE( 4, 'e', 's', else);
472 CASE( 4, 'l', 'n', line);
473 CASE( 4, 's', 'c', sccs);
474 CASE( 5, 'e', 'b', embed);
475 CASE( 5, 'e', 'd', endif);
476 CASE( 5, 'e', 'r', error);
477 CASE( 5, 'i', 'e', ident);
478 CASE( 5, 'i', 'd', ifdef);
479 CASE( 5, 'u', 'd', undef);
480
481 CASE( 6, 'a', 's', assert);
482 CASE( 6, 'd', 'f', define);
483 CASE( 6, 'i', 'n', ifndef);
484 CASE( 6, 'i', 'p', import);
485 CASE( 6, 'p', 'a', pragma);
486
487 CASE( 7, 'd', 'f', defined);
488 CASE( 7, 'e', 'i', elifdef);
489 CASE( 7, 'i', 'c', include);
490 CASE( 7, 'w', 'r', warning);
491
492 CASE( 8, 'e', 'i', elifndef);
493 CASE( 8, 'u', 'a', unassert);
494 CASE(12, 'i', 'c', include_next);
495
496 CASE(14, '_', 'p', __public_macro);
497
498 CASE(15, '_', 'p', __private_macro);
499
500 CASE(16, '_', 'i', __include_macros);
501#undef CASE
502#undef HASH
503 }
504}
505
506//===----------------------------------------------------------------------===//
507// Stats Implementation
508//===----------------------------------------------------------------------===//
509
510/// PrintStats - Print statistics about how well the identifier table is doing
511/// at hashing identifiers.
512void IdentifierTable::PrintStats() const {
513 unsigned NumBuckets = HashTable.getNumBuckets();
514 unsigned NumIdentifiers = HashTable.getNumItems();
515 unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
516 unsigned AverageIdentifierSize = 0;
517 unsigned MaxIdentifierLength = 0;
518
519 // TODO: Figure out maximum times an identifier had to probe for -stats.
520 for (llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator>::const_iterator
521 I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
522 unsigned IdLen = I->getKeyLength();
523 AverageIdentifierSize += IdLen;
524 if (MaxIdentifierLength < IdLen)
525 MaxIdentifierLength = IdLen;
526 }
527
528 fprintf(stderr, format: "\n*** Identifier Table Stats:\n");
529 fprintf(stderr, format: "# Identifiers: %d\n", NumIdentifiers);
530 fprintf(stderr, format: "# Empty Buckets: %d\n", NumEmptyBuckets);
531 fprintf(stderr, format: "Hash density (#identifiers per bucket): %f\n",
532 NumIdentifiers/(double)NumBuckets);
533 fprintf(stderr, format: "Ave identifier length: %f\n",
534 (AverageIdentifierSize/(double)NumIdentifiers));
535 fprintf(stderr, format: "Max identifier length: %d\n", MaxIdentifierLength);
536
537 // Compute statistics about the memory allocated for identifiers.
538 HashTable.getAllocator().PrintStats();
539}
540
541//===----------------------------------------------------------------------===//
542// SelectorTable Implementation
543//===----------------------------------------------------------------------===//
544
545unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
546 return DenseMapInfo<void*>::getHashValue(PtrVal: S.getAsOpaquePtr());
547}
548
549bool Selector::isKeywordSelector(ArrayRef<StringRef> Names) const {
550 assert(!Names.empty() && "must have >= 1 selector slots");
551 if (getNumArgs() != Names.size())
552 return false;
553 for (unsigned I = 0, E = Names.size(); I != E; ++I) {
554 if (getNameForSlot(argIndex: I) != Names[I])
555 return false;
556 }
557 return true;
558}
559
560bool Selector::isUnarySelector(StringRef Name) const {
561 return isUnarySelector() && getNameForSlot(argIndex: 0) == Name;
562}
563
564unsigned Selector::getNumArgs() const {
565 unsigned IIF = getIdentifierInfoFlag();
566 if (IIF <= ZeroArg)
567 return 0;
568 if (IIF == OneArg)
569 return 1;
570 // We point to a MultiKeywordSelector.
571 MultiKeywordSelector *SI = getMultiKeywordSelector();
572 return SI->getNumArgs();
573}
574
575const IdentifierInfo *
576Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
577 if (getIdentifierInfoFlag() < MultiArg) {
578 assert(argIndex == 0 && "illegal keyword index");
579 return getAsIdentifierInfo();
580 }
581
582 // We point to a MultiKeywordSelector.
583 MultiKeywordSelector *SI = getMultiKeywordSelector();
584 return SI->getIdentifierInfoForSlot(i: argIndex);
585}
586
587StringRef Selector::getNameForSlot(unsigned int argIndex) const {
588 const IdentifierInfo *II = getIdentifierInfoForSlot(argIndex);
589 return II ? II->getName() : StringRef();
590}
591
592std::string MultiKeywordSelector::getName() const {
593 SmallString<256> Str;
594 llvm::raw_svector_ostream OS(Str);
595 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
596 if (*I)
597 OS << (*I)->getName();
598 OS << ':';
599 }
600
601 return std::string(OS.str());
602}
603
604std::string Selector::getAsString() const {
605 if (isNull())
606 return "<null selector>";
607
608 if (getIdentifierInfoFlag() < MultiArg) {
609 const IdentifierInfo *II = getAsIdentifierInfo();
610
611 if (getNumArgs() == 0) {
612 assert(II && "If the number of arguments is 0 then II is guaranteed to "
613 "not be null.");
614 return std::string(II->getName());
615 }
616
617 if (!II)
618 return ":";
619
620 return II->getName().str() + ":";
621 }
622
623 // We have a multiple keyword selector.
624 return getMultiKeywordSelector()->getName();
625}
626
627void Selector::print(llvm::raw_ostream &OS) const {
628 OS << getAsString();
629}
630
631LLVM_DUMP_METHOD void Selector::dump() const { print(OS&: llvm::errs()); }
632
633/// Interpreting the given string using the normal CamelCase
634/// conventions, determine whether the given string starts with the
635/// given "word", which is assumed to end in a lowercase letter.
636static bool startsWithWord(StringRef name, StringRef word) {
637 if (name.size() < word.size()) return false;
638 return ((name.size() == word.size() || !isLowercase(c: name[word.size()])) &&
639 name.starts_with(Prefix: word));
640}
641
642ObjCMethodFamily Selector::getMethodFamilyImpl(Selector sel) {
643 const IdentifierInfo *first = sel.getIdentifierInfoForSlot(argIndex: 0);
644 if (!first) return OMF_None;
645
646 StringRef name = first->getName();
647 if (sel.isUnarySelector()) {
648 if (name == "autorelease") return OMF_autorelease;
649 if (name == "dealloc") return OMF_dealloc;
650 if (name == "finalize") return OMF_finalize;
651 if (name == "release") return OMF_release;
652 if (name == "retain") return OMF_retain;
653 if (name == "retainCount") return OMF_retainCount;
654 if (name == "self") return OMF_self;
655 if (name == "initialize") return OMF_initialize;
656 }
657
658 if (name == "performSelector" || name == "performSelectorInBackground" ||
659 name == "performSelectorOnMainThread")
660 return OMF_performSelector;
661
662 // The other method families may begin with a prefix of underscores.
663 name = name.ltrim(Char: '_');
664
665 if (name.empty()) return OMF_None;
666 switch (name.front()) {
667 case 'a':
668 if (startsWithWord(name, word: "alloc")) return OMF_alloc;
669 break;
670 case 'c':
671 if (startsWithWord(name, word: "copy")) return OMF_copy;
672 break;
673 case 'i':
674 if (startsWithWord(name, word: "init")) return OMF_init;
675 break;
676 case 'm':
677 if (startsWithWord(name, word: "mutableCopy")) return OMF_mutableCopy;
678 break;
679 case 'n':
680 if (startsWithWord(name, word: "new")) return OMF_new;
681 break;
682 default:
683 break;
684 }
685
686 return OMF_None;
687}
688
689ObjCInstanceTypeFamily Selector::getInstTypeMethodFamily(Selector sel) {
690 const IdentifierInfo *first = sel.getIdentifierInfoForSlot(argIndex: 0);
691 if (!first) return OIT_None;
692
693 StringRef name = first->getName();
694
695 if (name.empty()) return OIT_None;
696 switch (name.front()) {
697 case 'a':
698 if (startsWithWord(name, word: "array")) return OIT_Array;
699 break;
700 case 'd':
701 if (startsWithWord(name, word: "default")) return OIT_ReturnsSelf;
702 if (startsWithWord(name, word: "dictionary")) return OIT_Dictionary;
703 break;
704 case 's':
705 if (startsWithWord(name, word: "shared")) return OIT_ReturnsSelf;
706 if (startsWithWord(name, word: "standard")) return OIT_Singleton;
707 break;
708 case 'i':
709 if (startsWithWord(name, word: "init")) return OIT_Init;
710 break;
711 default:
712 break;
713 }
714 return OIT_None;
715}
716
717ObjCStringFormatFamily Selector::getStringFormatFamilyImpl(Selector sel) {
718 const IdentifierInfo *first = sel.getIdentifierInfoForSlot(argIndex: 0);
719 if (!first) return SFF_None;
720
721 StringRef name = first->getName();
722
723 switch (name.front()) {
724 case 'a':
725 if (name == "appendFormat") return SFF_NSString;
726 break;
727
728 case 'i':
729 if (name == "initWithFormat") return SFF_NSString;
730 break;
731
732 case 'l':
733 if (name == "localizedStringWithFormat") return SFF_NSString;
734 break;
735
736 case 's':
737 if (name == "stringByAppendingFormat" ||
738 name == "stringWithFormat") return SFF_NSString;
739 break;
740 }
741 return SFF_None;
742}
743
744namespace {
745
746struct SelectorTableImpl {
747 llvm::FoldingSet<MultiKeywordSelector> Table;
748 llvm::BumpPtrAllocator Allocator;
749};
750
751} // namespace
752
753static SelectorTableImpl &getSelectorTableImpl(void *P) {
754 return *static_cast<SelectorTableImpl*>(P);
755}
756
757SmallString<64>
758SelectorTable::constructSetterName(StringRef Name) {
759 SmallString<64> SetterName("set");
760 SetterName += Name;
761 SetterName[3] = toUppercase(c: SetterName[3]);
762 return SetterName;
763}
764
765Selector
766SelectorTable::constructSetterSelector(IdentifierTable &Idents,
767 SelectorTable &SelTable,
768 const IdentifierInfo *Name) {
769 IdentifierInfo *SetterName =
770 &Idents.get(Name: constructSetterName(Name: Name->getName()));
771 return SelTable.getUnarySelector(ID: SetterName);
772}
773
774std::string SelectorTable::getPropertyNameFromSetterSelector(Selector Sel) {
775 StringRef Name = Sel.getNameForSlot(argIndex: 0);
776 assert(Name.starts_with("set") && "invalid setter name");
777 return (Twine(toLowercase(c: Name[3])) + Name.drop_front(N: 4)).str();
778}
779
780size_t SelectorTable::getTotalMemory() const {
781 SelectorTableImpl &SelTabImpl = getSelectorTableImpl(P: Impl);
782 return SelTabImpl.Allocator.getTotalMemory();
783}
784
785Selector SelectorTable::getSelector(unsigned nKeys,
786 const IdentifierInfo **IIV) {
787 if (nKeys < 2)
788 return Selector(IIV[0], nKeys);
789
790 SelectorTableImpl &SelTabImpl = getSelectorTableImpl(P: Impl);
791
792 // Unique selector, to guarantee there is one per name.
793 llvm::FoldingSetNodeID ID;
794 MultiKeywordSelector::Profile(ID, ArgTys: IIV, NumArgs: nKeys);
795
796 void *InsertPos = nullptr;
797 if (MultiKeywordSelector *SI =
798 SelTabImpl.Table.FindNodeOrInsertPos(ID, InsertPos))
799 return Selector(SI);
800
801 // MultiKeywordSelector objects are not allocated with new because they have a
802 // variable size array (for parameter types) at the end of them.
803 unsigned Size = sizeof(MultiKeywordSelector) + nKeys*sizeof(IdentifierInfo *);
804 MultiKeywordSelector *SI =
805 (MultiKeywordSelector *)SelTabImpl.Allocator.Allocate(
806 Size, Alignment: alignof(MultiKeywordSelector));
807 new (SI) MultiKeywordSelector(nKeys, IIV);
808 SelTabImpl.Table.InsertNode(N: SI, InsertPos);
809 return Selector(SI);
810}
811
812SelectorTable::SelectorTable() {
813 Impl = new SelectorTableImpl();
814}
815
816SelectorTable::~SelectorTable() {
817 delete &getSelectorTableImpl(P: Impl);
818}
819
820const char *clang::getOperatorSpelling(OverloadedOperatorKind Operator) {
821 switch (Operator) {
822 case OO_None:
823 case NUM_OVERLOADED_OPERATORS:
824 return nullptr;
825
826#define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
827 case OO_##Name: return Spelling;
828#include "clang/Basic/OperatorKinds.def"
829 }
830
831 llvm_unreachable("Invalid OverloadedOperatorKind!");
832}
833
834StringRef clang::getNullabilitySpelling(NullabilityKind kind,
835 bool isContextSensitive) {
836 switch (kind) {
837 case NullabilityKind::NonNull:
838 return isContextSensitive ? "nonnull" : "_Nonnull";
839
840 case NullabilityKind::Nullable:
841 return isContextSensitive ? "nullable" : "_Nullable";
842
843 case NullabilityKind::NullableResult:
844 assert(!isContextSensitive &&
845 "_Nullable_result isn't supported as context-sensitive keyword");
846 return "_Nullable_result";
847
848 case NullabilityKind::Unspecified:
849 return isContextSensitive ? "null_unspecified" : "_Null_unspecified";
850 }
851 llvm_unreachable("Unknown nullability kind.");
852}
853
854llvm::raw_ostream &clang::operator<<(llvm::raw_ostream &OS,
855 NullabilityKind NK) {
856 switch (NK) {
857 case NullabilityKind::NonNull:
858 return OS << "NonNull";
859 case NullabilityKind::Nullable:
860 return OS << "Nullable";
861 case NullabilityKind::NullableResult:
862 return OS << "NullableResult";
863 case NullabilityKind::Unspecified:
864 return OS << "Unspecified";
865 }
866 llvm_unreachable("Unknown nullability kind.");
867}
868
869diag::kind
870IdentifierTable::getFutureCompatDiagKind(const IdentifierInfo &II,
871 const LangOptions &LangOpts) {
872 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
873
874 unsigned Flags = llvm::StringSwitch<unsigned>(II.getName())
875#define KEYWORD(NAME, FLAGS) .Case(#NAME, FLAGS)
876#include "clang/Basic/TokenKinds.def"
877#undef KEYWORD
878 ;
879
880 if (LangOpts.CPlusPlus) {
881 if ((Flags & KEYCXX11) == KEYCXX11)
882 return diag::warn_cxx11_keyword;
883
884 // char8_t is not modeled as a CXX20_KEYWORD because it's not
885 // unconditionally enabled in C++20 mode. (It can be disabled
886 // by -fno-char8_t.)
887 if (((Flags & KEYCXX20) == KEYCXX20) ||
888 ((Flags & CHAR8SUPPORT) == CHAR8SUPPORT))
889 return diag::warn_cxx20_keyword;
890 } else {
891 if ((Flags & KEYC99) == KEYC99)
892 return diag::warn_c99_keyword;
893 if ((Flags & KEYC23) == KEYC23)
894 return diag::warn_c23_keyword;
895 }
896
897 llvm_unreachable(
898 "Keyword not known to come from a newer Standard or proposed Standard");
899}
900