1//===- IdentifierTable.cpp - Hash table for identifier lookup -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the IdentifierInfo, IdentifierVisitor, and
10// IdentifierTable interfaces.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/IdentifierTable.h"
15#include "clang/Basic/CharInfo.h"
16#include "clang/Basic/DiagnosticLex.h"
17#include "clang/Basic/LangOptions.h"
18#include "clang/Basic/OperatorKinds.h"
19#include "clang/Basic/Specifiers.h"
20#include "clang/Basic/TargetBuiltins.h"
21#include "clang/Basic/TokenKinds.h"
22#include "llvm/ADT/DenseMapInfo.h"
23#include "llvm/ADT/FoldingSet.h"
24#include "llvm/ADT/StringMap.h"
25#include "llvm/ADT/StringRef.h"
26#include "llvm/Support/Allocator.h"
27#include "llvm/Support/raw_ostream.h"
28#include <cassert>
29#include <cstdio>
30#include <cstring>
31#include <string>
32
33using namespace clang;
34
35// A check to make sure the ObjCOrBuiltinID has sufficient room to store the
36// largest possible target/aux-target combination. If we exceed this, we likely
37// need to just change the ObjCOrBuiltinIDBits value in IdentifierTable.h.
38static_assert(2 * LargestBuiltinID < (2 << (InterestingIdentifierBits - 1)),
39 "Insufficient ObjCOrBuiltinID Bits");
40
41//===----------------------------------------------------------------------===//
42// IdentifierTable Implementation
43//===----------------------------------------------------------------------===//
44
45IdentifierIterator::~IdentifierIterator() = default;
46
47IdentifierInfoLookup::~IdentifierInfoLookup() = default;
48
49namespace {
50
51/// A simple identifier lookup iterator that represents an
52/// empty sequence of identifiers.
53class EmptyLookupIterator : public IdentifierIterator {
54public:
55 StringRef Next() override { return StringRef(); }
56};
57
58} // namespace
59
60IdentifierIterator *IdentifierInfoLookup::getIdentifiers() {
61 return new EmptyLookupIterator();
62}
63
64IdentifierTable::IdentifierTable(IdentifierInfoLookup *ExternalLookup)
65 : HashTable(8192), // Start with space for 8K identifiers.
66 ExternalLookup(ExternalLookup) {}
67
68IdentifierTable::IdentifierTable(const LangOptions &LangOpts,
69 IdentifierInfoLookup *ExternalLookup)
70 : IdentifierTable(ExternalLookup) {
71 // Populate the identifier table with info about keywords for the current
72 // language.
73 AddKeywords(LangOpts);
74}
75
76//===----------------------------------------------------------------------===//
77// Language Keyword Implementation
78//===----------------------------------------------------------------------===//
79
80// This works on a single TokenKey flag and checks the LangOpts to get the
81// KeywordStatus based exclusively on this flag, so that it can be merged in
82// getKeywordStatus. Most should be enabled/disabled, but some might imply
83// 'future' versions, or extensions. Returns 'unknown' unless this is KNOWN to
84// be disabled, and the calling function makes it 'disabled' if no other flag
85// changes it. This is necessary for the KEYNOCXX and KEYNOOPENCL flags.
86static KeywordStatus getKeywordStatusHelper(const LangOptions &LangOpts,
87 TokenKey Flag) {
88 // Flag is a single bit version of TokenKey (that is, not
89 // KEYALL/KEYALLCXX/etc), so we can check with == throughout this function.
90 assert((Flag & ~(Flag - 1)) == Flag && "Multiple bits set?");
91
92 switch (Flag) {
93 case KEYC99:
94 if (LangOpts.C99)
95 return KS_Enabled;
96 return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
97 case KEYC23:
98 if (LangOpts.C23)
99 return KS_Enabled;
100 return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
101 case KEYCXX:
102 return LangOpts.CPlusPlus ? KS_Enabled : KS_Unknown;
103 case KEYCXX11:
104 if (LangOpts.CPlusPlus11)
105 return KS_Enabled;
106 return LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
107 case KEYCXX20:
108 if (LangOpts.CPlusPlus20)
109 return KS_Enabled;
110 return LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
111 case KEYGNU:
112 return LangOpts.GNUKeywords ? KS_Extension : KS_Unknown;
113 case KEYMS:
114 return LangOpts.MicrosoftExt ? KS_Extension : KS_Unknown;
115 case BOOLSUPPORT:
116 if (LangOpts.Bool) return KS_Enabled;
117 return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
118 case KEYALTIVEC:
119 return LangOpts.AltiVec ? KS_Enabled : KS_Unknown;
120 case KEYBORLAND:
121 return LangOpts.Borland ? KS_Extension : KS_Unknown;
122 case KEYOPENCLC:
123 return LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus ? KS_Enabled
124 : KS_Unknown;
125 case WCHARSUPPORT:
126 return LangOpts.WChar ? KS_Enabled : KS_Unknown;
127 case HALFSUPPORT:
128 return LangOpts.Half ? KS_Enabled : KS_Unknown;
129 case CHAR8SUPPORT:
130 if (LangOpts.Char8) return KS_Enabled;
131 if (LangOpts.CPlusPlus20) return KS_Unknown;
132 if (LangOpts.CPlusPlus) return KS_Future;
133 return KS_Unknown;
134 case KEYOBJC:
135 // We treat bridge casts as objective-C keywords so we can warn on them
136 // in non-arc mode.
137 return LangOpts.ObjC ? KS_Enabled : KS_Unknown;
138 case KEYZVECTOR:
139 return LangOpts.ZVector ? KS_Enabled : KS_Unknown;
140 case KEYCOROUTINES:
141 return LangOpts.Coroutines ? KS_Enabled : KS_Unknown;
142 case KEYMODULES:
143 return KS_Unknown;
144 case KEYOPENCLCXX:
145 return LangOpts.OpenCLCPlusPlus ? KS_Enabled : KS_Unknown;
146 case KEYMSCOMPAT:
147 return LangOpts.MSVCCompat ? KS_Enabled : KS_Unknown;
148 case KEYSYCL:
149 return LangOpts.isSYCL() ? KS_Enabled : KS_Unknown;
150 case KEYCUDA:
151 return LangOpts.CUDA ? KS_Enabled : KS_Unknown;
152 case KEYZOS:
153 return LangOpts.ZOSExt ? KS_Enabled : KS_Unknown;
154 case KEYHLSL:
155 return LangOpts.HLSL ? KS_Enabled : KS_Unknown;
156 case KEYNOCXX:
157 // This is enabled in all non-C++ modes, but might be enabled for other
158 // reasons as well.
159 return LangOpts.CPlusPlus ? KS_Unknown : KS_Enabled;
160 case KEYNOOPENCL:
161 case KEYNOMS18:
162 case KEYNOZOS:
163 case KEYNOHLSL:
164 // The disable behavior for this is handled in getKeywordStatus.
165 return KS_Unknown;
166 case KEYFIXEDPOINT:
167 return LangOpts.FixedPoint ? KS_Enabled : KS_Disabled;
168 case KEYDEFERTS:
169 return LangOpts.DeferTS ? KS_Enabled : KS_Disabled;
170 default:
171 llvm_unreachable("Unknown KeywordStatus flag");
172 }
173}
174
175KeywordStatus clang::getKeywordStatus(const LangOptions &LangOpts,
176 unsigned Flags) {
177 // KEYALL means always enabled, so special case this one.
178 if (Flags == KEYALL) return KS_Enabled;
179 // These are tests that need to 'always win', as they are special in that they
180 // disable based on certain conditions.
181 if (LangOpts.OpenCL && (Flags & KEYNOOPENCL)) return KS_Disabled;
182 if (LangOpts.HLSL && (Flags & KEYNOHLSL))
183 return KS_Disabled;
184 if (LangOpts.MSVCCompat && (Flags & KEYNOMS18) &&
185 !LangOpts.isCompatibleWithMSVC(MajorVersion: LangOptions::MSVC2015))
186 return KS_Disabled;
187 if (LangOpts.ZOSExt && (Flags & KEYNOZOS))
188 return KS_Disabled;
189 KeywordStatus CurStatus = KS_Unknown;
190
191 while (Flags != 0) {
192 unsigned CurFlag = Flags & ~(Flags - 1);
193 Flags = Flags & ~CurFlag;
194 CurStatus = std::max(
195 a: CurStatus,
196 b: getKeywordStatusHelper(LangOpts, Flag: static_cast<TokenKey>(CurFlag)));
197 }
198
199 if (CurStatus == KS_Unknown)
200 return KS_Disabled;
201 return CurStatus;
202}
203
204static bool IsKeywordInCpp(unsigned Flags) {
205 return (Flags & (KEYCXX | KEYCXX11 | KEYCXX20 | BOOLSUPPORT | WCHARSUPPORT |
206 CHAR8SUPPORT)) != 0;
207}
208
209static void MarkIdentifierAsKeywordInCpp(IdentifierTable &Table,
210 StringRef Name) {
211 IdentifierInfo &II = Table.get(Name, TokenCode: tok::identifier);
212 II.setIsKeywordInCPlusPlus();
213 II.setHandleIdentifierCase();
214}
215
216/// AddKeyword - This method is used to associate a token ID with specific
217/// identifiers because they are language keywords. This causes the lexer to
218/// automatically map matching identifiers to specialized token codes.
219static void AddKeyword(StringRef Keyword,
220 tok::TokenKind TokenCode, unsigned Flags,
221 const LangOptions &LangOpts, IdentifierTable &Table) {
222 KeywordStatus AddResult = getKeywordStatus(LangOpts, Flags);
223
224 // Don't add this keyword if disabled in this language and isn't otherwise
225 // special.
226 if (AddResult == KS_Disabled) {
227 // We do not consider any identifiers to be C++ keywords when in
228 // Objective-C because @ effectively introduces a custom grammar where C++
229 // keywords can be used (and similar for selectors). We could enable this
230 // for Objective-C, but it would require more logic to ensure we do not
231 // issue compatibility diagnostics in these cases.
232 if (!LangOpts.ObjC && IsKeywordInCpp(Flags))
233 MarkIdentifierAsKeywordInCpp(Table, Name: Keyword);
234 return;
235 }
236
237 IdentifierInfo &Info =
238 Table.get(Name: Keyword, TokenCode: AddResult == KS_Future ? tok::identifier : TokenCode);
239 Info.setIsExtensionToken(AddResult == KS_Extension);
240 Info.setIsFutureCompatKeyword(AddResult == KS_Future);
241}
242
243/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
244/// representations.
245static void AddCXXOperatorKeyword(StringRef Keyword,
246 tok::TokenKind TokenCode,
247 IdentifierTable &Table) {
248 IdentifierInfo &Info = Table.get(Name: Keyword, TokenCode);
249 Info.setIsCPlusPlusOperatorKeyword();
250}
251
252/// AddObjCKeyword - Register an Objective-C \@keyword like "class" "selector"
253/// or "property".
254static void AddObjCKeyword(StringRef Name,
255 tok::ObjCKeywordKind ObjCID,
256 IdentifierTable &Table) {
257 Table.get(Name).setObjCKeywordID(ObjCID);
258}
259
260static void AddNotableIdentifier(StringRef Name,
261 tok::NotableIdentifierKind BTID,
262 IdentifierTable &Table) {
263 // Don't add 'not_notable' identifier.
264 if (BTID != tok::not_notable) {
265 IdentifierInfo &Info = Table.get(Name, TokenCode: tok::identifier);
266 Info.setNotableIdentifierID(BTID);
267 }
268}
269
270/// AddKeywords - Add all keywords to the symbol table.
271///
272void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
273 // Add keywords and tokens for the current language.
274#define KEYWORD(NAME, FLAGS) \
275 AddKeyword(StringRef(#NAME), tok::kw_ ## NAME, \
276 FLAGS, LangOpts, *this);
277#define ALIAS(NAME, TOK, FLAGS) \
278 AddKeyword(StringRef(NAME), tok::kw_ ## TOK, \
279 FLAGS, LangOpts, *this);
280#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
281 if (LangOpts.CXXOperatorNames) \
282 AddCXXOperatorKeyword(StringRef(#NAME), tok::ALIAS, *this); \
283 else \
284 MarkIdentifierAsKeywordInCpp(*this, StringRef(#NAME));
285#define OBJC_AT_KEYWORD(NAME) \
286 if (LangOpts.ObjC) \
287 AddObjCKeyword(StringRef(#NAME), tok::objc_##NAME, *this);
288#define NOTABLE_IDENTIFIER(NAME) \
289 AddNotableIdentifier(StringRef(#NAME), tok::NAME, *this);
290
291#define TESTING_KEYWORD(NAME, FLAGS)
292#include "clang/Basic/TokenKinds.def"
293
294 if (LangOpts.ParseUnknownAnytype)
295 AddKeyword(Keyword: "__unknown_anytype", TokenCode: tok::kw___unknown_anytype, Flags: KEYALL,
296 LangOpts, Table&: *this);
297
298 if (LangOpts.DeclSpecKeyword)
299 AddKeyword(Keyword: "__declspec", TokenCode: tok::kw___declspec, Flags: KEYALL, LangOpts, Table&: *this);
300
301 if (LangOpts.IEEE128)
302 AddKeyword(Keyword: "__ieee128", TokenCode: tok::kw___float128, Flags: KEYALL, LangOpts, Table&: *this);
303
304 // Add the 'import' and 'module' contextual keywords.
305 get(Name: "import").setKeywordImport(true);
306 get(Name: "module").setModuleKeyword(true);
307 get(Name: "__preprocessed_import").setKeywordImport(true);
308 get(Name: "__preprocessed_module").setModuleKeyword(true);
309}
310
311/// Checks if the specified token kind represents a keyword in the
312/// specified language.
313/// \returns Status of the keyword in the language.
314static KeywordStatus getTokenKwStatus(const LangOptions &LangOpts,
315 tok::TokenKind K) {
316 switch (K) {
317#define KEYWORD(NAME, FLAGS) \
318 case tok::kw_##NAME: return getKeywordStatus(LangOpts, FLAGS);
319#include "clang/Basic/TokenKinds.def"
320 default: return KS_Disabled;
321 }
322}
323
324/// Returns true if the identifier represents a keyword in the
325/// specified language.
326bool IdentifierInfo::isKeyword(const LangOptions &LangOpts) const {
327 switch (getTokenKwStatus(LangOpts, K: getTokenID())) {
328 case KS_Enabled:
329 case KS_Extension:
330 return true;
331 default:
332 return false;
333 }
334}
335
336/// Returns true if the identifier represents a C++ keyword in the
337/// specified language.
338bool IdentifierInfo::isCPlusPlusKeyword(const LangOptions &LangOpts) const {
339 if (!LangOpts.CPlusPlus || !isKeyword(LangOpts))
340 return false;
341 // This is a C++ keyword if this identifier is not a keyword when checked
342 // using LangOptions without C++ support.
343 LangOptions LangOptsNoCPP = LangOpts;
344 LangOptsNoCPP.CPlusPlus = false;
345 LangOptsNoCPP.CPlusPlus11 = false;
346 LangOptsNoCPP.CPlusPlus20 = false;
347 return !isKeyword(LangOpts: LangOptsNoCPP);
348}
349
350ReservedIdentifierStatus
351IdentifierInfo::isReserved(const LangOptions &LangOpts) const {
352 StringRef Name = getName();
353
354 // '_' is a reserved identifier, but its use is so common (e.g. to store
355 // ignored values) that we don't warn on it.
356 if (Name.size() <= 1)
357 return ReservedIdentifierStatus::NotReserved;
358
359 // [lex.name] p3
360 if (Name[0] == '_') {
361
362 // Each name that begins with an underscore followed by an uppercase letter
363 // or another underscore is reserved.
364 if (Name[1] == '_')
365 return ReservedIdentifierStatus::StartsWithDoubleUnderscore;
366
367 if ('A' <= Name[1] && Name[1] <= 'Z')
368 return ReservedIdentifierStatus::
369 StartsWithUnderscoreFollowedByCapitalLetter;
370
371 // This is a bit misleading: it actually means it's only reserved if we're
372 // at global scope because it starts with an underscore.
373 return ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope;
374 }
375
376 // Each name that contains a double underscore (__) is reserved.
377 if (LangOpts.CPlusPlus && Name.contains(Other: "__"))
378 return ReservedIdentifierStatus::ContainsDoubleUnderscore;
379
380 return ReservedIdentifierStatus::NotReserved;
381}
382
383ReservedLiteralSuffixIdStatus
384IdentifierInfo::isReservedLiteralSuffixId() const {
385 StringRef Name = getName();
386
387 // Note: the diag::warn_deprecated_literal_operator_id diagnostic depends on
388 // this being the first check we do, so if this order changes, we have to fix
389 // that as well.
390 if (Name[0] != '_')
391 return ReservedLiteralSuffixIdStatus::NotStartsWithUnderscore;
392
393 if (Name.contains(Other: "__"))
394 return ReservedLiteralSuffixIdStatus::ContainsDoubleUnderscore;
395
396 return ReservedLiteralSuffixIdStatus::NotReserved;
397}
398
399StringRef IdentifierInfo::deuglifiedName() const {
400 StringRef Name = getName();
401 if (Name.size() >= 2 && Name.front() == '_' &&
402 (Name[1] == '_' || (Name[1] >= 'A' && Name[1] <= 'Z')))
403 return Name.ltrim(Char: '_');
404 return Name;
405}
406
407tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
408 // We use a perfect hash function here involving the length of the keyword,
409 // the first and third character. For preprocessor ID's there are no
410 // collisions (if there were, the switch below would complain about duplicate
411 // case values). Note that this depends on 'if' being null terminated.
412
413#define HASH(LEN, FIRST, THIRD) \
414 (LEN << 6) + (((FIRST - 'a') - (THIRD - 'a')) & 63)
415#define CASE(LEN, FIRST, THIRD, NAME) \
416 case HASH(LEN, FIRST, THIRD): \
417 return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
418
419 unsigned Len = getLength();
420 if (Len < 2) return tok::pp_not_keyword;
421 const char *Name = getNameStart();
422
423 if (Name[0] == '_' && isImportKeyword())
424 return tok::pp___preprocessed_import;
425 if (Name[0] == '_' && isModuleKeyword())
426 return tok::pp___preprocessed_module;
427
428 // clang-format off
429 switch (HASH(Len, Name[0], Name[2])) {
430 default: return tok::pp_not_keyword;
431 CASE( 2, 'i', '\0', if);
432 CASE( 4, 'e', 'i', elif);
433 CASE( 4, 'e', 's', else);
434 CASE( 4, 'l', 'n', line);
435 CASE( 4, 's', 'c', sccs);
436 CASE( 5, 'e', 'b', embed);
437 CASE( 5, 'e', 'd', endif);
438 CASE( 5, 'e', 'r', error);
439 CASE( 5, 'i', 'e', ident);
440 CASE( 5, 'i', 'd', ifdef);
441 CASE( 5, 'u', 'd', undef);
442
443 CASE( 6, 'a', 's', assert);
444 CASE( 6, 'd', 'f', define);
445 CASE( 6, 'i', 'n', ifndef);
446 CASE( 6, 'i', 'p', import);
447 CASE( 6, 'm', 'd', module);
448 CASE( 6, 'p', 'a', pragma);
449
450 CASE( 7, 'd', 'f', defined);
451 CASE( 7, 'e', 'i', elifdef);
452 CASE( 7, 'i', 'c', include);
453 CASE( 7, 'w', 'r', warning);
454
455 CASE( 8, 'e', 'i', elifndef);
456 CASE( 8, 'u', 'a', unassert);
457 CASE(12, 'i', 'c', include_next);
458
459 CASE(14, '_', 'p', __public_macro);
460
461 CASE(15, '_', 'p', __private_macro);
462
463 CASE(16, '_', 'i', __include_macros);
464#undef CASE
465#undef HASH
466 }
467 // clang-format on
468}
469
470//===----------------------------------------------------------------------===//
471// Stats Implementation
472//===----------------------------------------------------------------------===//
473
474/// PrintStats - Print statistics about how well the identifier table is doing
475/// at hashing identifiers.
476void IdentifierTable::PrintStats() const {
477 unsigned NumBuckets = HashTable.getNumBuckets();
478 unsigned NumIdentifiers = HashTable.getNumItems();
479 unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
480 unsigned AverageIdentifierSize = 0;
481 unsigned MaxIdentifierLength = 0;
482
483 // TODO: Figure out maximum times an identifier had to probe for -stats.
484 for (llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator>::const_iterator
485 I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
486 unsigned IdLen = I->getKeyLength();
487 AverageIdentifierSize += IdLen;
488 if (MaxIdentifierLength < IdLen)
489 MaxIdentifierLength = IdLen;
490 }
491
492 fprintf(stderr, format: "\n*** Identifier Table Stats:\n");
493 fprintf(stderr, format: "# Identifiers: %d\n", NumIdentifiers);
494 fprintf(stderr, format: "# Empty Buckets: %d\n", NumEmptyBuckets);
495 fprintf(stderr, format: "Hash density (#identifiers per bucket): %f\n",
496 NumIdentifiers/(double)NumBuckets);
497 fprintf(stderr, format: "Ave identifier length: %f\n",
498 (AverageIdentifierSize/(double)NumIdentifiers));
499 fprintf(stderr, format: "Max identifier length: %d\n", MaxIdentifierLength);
500
501 // Compute statistics about the memory allocated for identifiers.
502 HashTable.getAllocator().PrintStats();
503}
504
505//===----------------------------------------------------------------------===//
506// SelectorTable Implementation
507//===----------------------------------------------------------------------===//
508
509unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
510 return DenseMapInfo<void*>::getHashValue(PtrVal: S.getAsOpaquePtr());
511}
512
513bool Selector::isKeywordSelector(ArrayRef<StringRef> Names) const {
514 assert(!Names.empty() && "must have >= 1 selector slots");
515 if (getNumArgs() != Names.size())
516 return false;
517 for (unsigned I = 0, E = Names.size(); I != E; ++I) {
518 if (getNameForSlot(argIndex: I) != Names[I])
519 return false;
520 }
521 return true;
522}
523
524bool Selector::isUnarySelector(StringRef Name) const {
525 return isUnarySelector() && getNameForSlot(argIndex: 0) == Name;
526}
527
528unsigned Selector::getNumArgs() const {
529 unsigned IIF = getIdentifierInfoFlag();
530 if (IIF <= ZeroArg)
531 return 0;
532 if (IIF == OneArg)
533 return 1;
534 // We point to a MultiKeywordSelector.
535 MultiKeywordSelector *SI = getMultiKeywordSelector();
536 return SI->getNumArgs();
537}
538
539const IdentifierInfo *
540Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
541 if (getIdentifierInfoFlag() < MultiArg) {
542 assert(argIndex == 0 && "illegal keyword index");
543 return getAsIdentifierInfo();
544 }
545
546 // We point to a MultiKeywordSelector.
547 MultiKeywordSelector *SI = getMultiKeywordSelector();
548 return SI->getIdentifierInfoForSlot(i: argIndex);
549}
550
551StringRef Selector::getNameForSlot(unsigned int argIndex) const {
552 const IdentifierInfo *II = getIdentifierInfoForSlot(argIndex);
553 return II ? II->getName() : StringRef();
554}
555
556std::string MultiKeywordSelector::getName() const {
557 SmallString<256> Str;
558 llvm::raw_svector_ostream OS(Str);
559 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
560 if (*I)
561 OS << (*I)->getName();
562 OS << ':';
563 }
564
565 return std::string(OS.str());
566}
567
568std::string Selector::getAsString() const {
569 if (isNull())
570 return "<null selector>";
571
572 if (getIdentifierInfoFlag() < MultiArg) {
573 const IdentifierInfo *II = getAsIdentifierInfo();
574
575 if (getNumArgs() == 0) {
576 assert(II && "If the number of arguments is 0 then II is guaranteed to "
577 "not be null.");
578 return std::string(II->getName());
579 }
580
581 if (!II)
582 return ":";
583
584 return II->getName().str() + ":";
585 }
586
587 // We have a multiple keyword selector.
588 return getMultiKeywordSelector()->getName();
589}
590
591void Selector::print(llvm::raw_ostream &OS) const {
592 OS << getAsString();
593}
594
595LLVM_DUMP_METHOD void Selector::dump() const { print(OS&: llvm::errs()); }
596
597/// Interpreting the given string using the normal CamelCase
598/// conventions, determine whether the given string starts with the
599/// given "word", which is assumed to end in a lowercase letter.
600static bool startsWithWord(StringRef name, StringRef word) {
601 if (name.size() < word.size()) return false;
602 return ((name.size() == word.size() || !isLowercase(c: name[word.size()])) &&
603 name.starts_with(Prefix: word));
604}
605
606ObjCMethodFamily Selector::getMethodFamilyImpl(Selector sel) {
607 const IdentifierInfo *first = sel.getIdentifierInfoForSlot(argIndex: 0);
608 if (!first) return OMF_None;
609
610 StringRef name = first->getName();
611 if (sel.isUnarySelector()) {
612 if (name == "autorelease") return OMF_autorelease;
613 if (name == "dealloc") return OMF_dealloc;
614 if (name == "finalize") return OMF_finalize;
615 if (name == "release") return OMF_release;
616 if (name == "retain") return OMF_retain;
617 if (name == "retainCount") return OMF_retainCount;
618 if (name == "self") return OMF_self;
619 if (name == "initialize") return OMF_initialize;
620 }
621
622 if (name == "performSelector" || name == "performSelectorInBackground" ||
623 name == "performSelectorOnMainThread")
624 return OMF_performSelector;
625
626 // The other method families may begin with a prefix of underscores.
627 name = name.ltrim(Char: '_');
628
629 if (name.empty()) return OMF_None;
630 switch (name.front()) {
631 case 'a':
632 if (startsWithWord(name, word: "alloc")) return OMF_alloc;
633 break;
634 case 'c':
635 if (startsWithWord(name, word: "copy")) return OMF_copy;
636 break;
637 case 'i':
638 if (startsWithWord(name, word: "init")) return OMF_init;
639 break;
640 case 'm':
641 if (startsWithWord(name, word: "mutableCopy")) return OMF_mutableCopy;
642 break;
643 case 'n':
644 if (startsWithWord(name, word: "new")) return OMF_new;
645 break;
646 default:
647 break;
648 }
649
650 return OMF_None;
651}
652
653ObjCInstanceTypeFamily Selector::getInstTypeMethodFamily(Selector sel) {
654 const IdentifierInfo *first = sel.getIdentifierInfoForSlot(argIndex: 0);
655 if (!first) return OIT_None;
656
657 StringRef name = first->getName();
658
659 if (name.empty()) return OIT_None;
660 switch (name.front()) {
661 case 'a':
662 if (startsWithWord(name, word: "array")) return OIT_Array;
663 break;
664 case 'd':
665 if (startsWithWord(name, word: "default")) return OIT_ReturnsSelf;
666 if (startsWithWord(name, word: "dictionary")) return OIT_Dictionary;
667 break;
668 case 's':
669 if (startsWithWord(name, word: "shared")) return OIT_ReturnsSelf;
670 if (startsWithWord(name, word: "standard")) return OIT_Singleton;
671 break;
672 case 'i':
673 if (startsWithWord(name, word: "init")) return OIT_Init;
674 break;
675 default:
676 break;
677 }
678 return OIT_None;
679}
680
681ObjCStringFormatFamily Selector::getStringFormatFamilyImpl(Selector sel) {
682 const IdentifierInfo *first = sel.getIdentifierInfoForSlot(argIndex: 0);
683 if (!first) return SFF_None;
684
685 StringRef name = first->getName();
686
687 switch (name.front()) {
688 case 'a':
689 if (name == "appendFormat") return SFF_NSString;
690 break;
691
692 case 'i':
693 if (name == "initWithFormat") return SFF_NSString;
694 break;
695
696 case 'l':
697 if (name == "localizedStringWithFormat") return SFF_NSString;
698 break;
699
700 case 's':
701 if (name == "stringByAppendingFormat" ||
702 name == "stringWithFormat") return SFF_NSString;
703 break;
704 }
705 return SFF_None;
706}
707
708namespace {
709
710struct SelectorTableImpl {
711 llvm::FoldingSet<MultiKeywordSelector> Table;
712 llvm::BumpPtrAllocator Allocator;
713};
714
715} // namespace
716
717static SelectorTableImpl &getSelectorTableImpl(void *P) {
718 return *static_cast<SelectorTableImpl*>(P);
719}
720
721SmallString<64>
722SelectorTable::constructSetterName(StringRef Name) {
723 SmallString<64> SetterName("set");
724 SetterName += Name;
725 SetterName[3] = toUppercase(c: SetterName[3]);
726 return SetterName;
727}
728
729Selector
730SelectorTable::constructSetterSelector(IdentifierTable &Idents,
731 SelectorTable &SelTable,
732 const IdentifierInfo *Name) {
733 IdentifierInfo *SetterName =
734 &Idents.get(Name: constructSetterName(Name: Name->getName()));
735 return SelTable.getUnarySelector(ID: SetterName);
736}
737
738std::string SelectorTable::getPropertyNameFromSetterSelector(Selector Sel) {
739 StringRef Name = Sel.getNameForSlot(argIndex: 0);
740 assert(Name.starts_with("set") && "invalid setter name");
741 return (Twine(toLowercase(c: Name[3])) + Name.drop_front(N: 4)).str();
742}
743
744size_t SelectorTable::getTotalMemory() const {
745 SelectorTableImpl &SelTabImpl = getSelectorTableImpl(P: Impl);
746 return SelTabImpl.Allocator.getTotalMemory();
747}
748
749Selector SelectorTable::getSelector(unsigned nKeys,
750 const IdentifierInfo **IIV) {
751 if (nKeys < 2)
752 return Selector(IIV[0], nKeys);
753
754 SelectorTableImpl &SelTabImpl = getSelectorTableImpl(P: Impl);
755
756 // Unique selector, to guarantee there is one per name.
757 llvm::FoldingSetNodeID ID;
758 MultiKeywordSelector::Profile(ID, ArgTys: IIV, NumArgs: nKeys);
759
760 void *InsertPos = nullptr;
761 if (MultiKeywordSelector *SI =
762 SelTabImpl.Table.FindNodeOrInsertPos(ID, InsertPos))
763 return Selector(SI);
764
765 // MultiKeywordSelector objects are not allocated with new because they have a
766 // variable size array (for parameter types) at the end of them.
767 unsigned Size = sizeof(MultiKeywordSelector) + nKeys*sizeof(IdentifierInfo *);
768 MultiKeywordSelector *SI =
769 (MultiKeywordSelector *)SelTabImpl.Allocator.Allocate(
770 Size, Alignment: alignof(MultiKeywordSelector));
771 new (SI) MultiKeywordSelector(nKeys, IIV);
772 SelTabImpl.Table.InsertNode(N: SI, InsertPos);
773 return Selector(SI);
774}
775
776SelectorTable::SelectorTable() {
777 Impl = new SelectorTableImpl();
778}
779
780SelectorTable::~SelectorTable() {
781 delete &getSelectorTableImpl(P: Impl);
782}
783
784const char *clang::getOperatorSpelling(OverloadedOperatorKind Operator) {
785 switch (Operator) {
786 case OO_None:
787 case NUM_OVERLOADED_OPERATORS:
788 return nullptr;
789
790#define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
791 case OO_##Name: return Spelling;
792#include "clang/Basic/OperatorKinds.def"
793 }
794
795 llvm_unreachable("Invalid OverloadedOperatorKind!");
796}
797
798StringRef clang::getNullabilitySpelling(NullabilityKind kind,
799 bool isContextSensitive) {
800 switch (kind) {
801 case NullabilityKind::NonNull:
802 return isContextSensitive ? "nonnull" : "_Nonnull";
803
804 case NullabilityKind::Nullable:
805 return isContextSensitive ? "nullable" : "_Nullable";
806
807 case NullabilityKind::NullableResult:
808 assert(!isContextSensitive &&
809 "_Nullable_result isn't supported as context-sensitive keyword");
810 return "_Nullable_result";
811
812 case NullabilityKind::Unspecified:
813 return isContextSensitive ? "null_unspecified" : "_Null_unspecified";
814 }
815 llvm_unreachable("Unknown nullability kind.");
816}
817
818llvm::raw_ostream &clang::operator<<(llvm::raw_ostream &OS,
819 NullabilityKind NK) {
820 switch (NK) {
821 case NullabilityKind::NonNull:
822 return OS << "NonNull";
823 case NullabilityKind::Nullable:
824 return OS << "Nullable";
825 case NullabilityKind::NullableResult:
826 return OS << "NullableResult";
827 case NullabilityKind::Unspecified:
828 return OS << "Unspecified";
829 }
830 llvm_unreachable("Unknown nullability kind.");
831}
832
833diag::kind
834IdentifierTable::getFutureCompatDiagKind(const IdentifierInfo &II,
835 const LangOptions &LangOpts) {
836 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
837
838 unsigned Flags = llvm::StringSwitch<unsigned>(II.getName())
839#define KEYWORD(NAME, FLAGS) .Case(#NAME, FLAGS)
840#include "clang/Basic/TokenKinds.def"
841#undef KEYWORD
842 ;
843
844 if (LangOpts.CPlusPlus) {
845 if ((Flags & KEYCXX11) == KEYCXX11)
846 return diag::warn_cxx11_keyword;
847
848 // char8_t is not modeled as a CXX20_KEYWORD because it's not
849 // unconditionally enabled in C++20 mode. (It can be disabled
850 // by -fno-char8_t.)
851 if (((Flags & KEYCXX20) == KEYCXX20) ||
852 ((Flags & CHAR8SUPPORT) == CHAR8SUPPORT))
853 return diag::warn_cxx20_keyword;
854 } else {
855 if ((Flags & KEYC99) == KEYC99)
856 return diag::warn_c99_keyword;
857 if ((Flags & KEYC23) == KEYC23)
858 return diag::warn_c23_keyword;
859 }
860
861 llvm_unreachable(
862 "Keyword not known to come from a newer Standard or proposed Standard");
863}
864