1//===- IdentifierTable.h - Hash table for identifier lookup -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Defines the clang::IdentifierInfo, clang::IdentifierTable, and
11/// clang::Selector interfaces.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
16#define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
17
18#include "clang/Basic/Builtins.h"
19#include "clang/Basic/DiagnosticIDs.h"
20#include "clang/Basic/LLVM.h"
21#include "clang/Basic/TokenKinds.h"
22#include "llvm/ADT/DenseMapInfo.h"
23#include "llvm/ADT/FoldingSet.h"
24#include "llvm/ADT/PointerIntPair.h"
25#include "llvm/ADT/PointerUnion.h"
26#include "llvm/ADT/SmallString.h"
27#include "llvm/ADT/StringMap.h"
28#include "llvm/ADT/StringRef.h"
29#include "llvm/Support/Allocator.h"
30#include "llvm/Support/PointerLikeTypeTraits.h"
31#include "llvm/Support/type_traits.h"
32#include <cassert>
33#include <cstddef>
34#include <cstdint>
35#include <cstring>
36#include <string>
37#include <utility>
38
39namespace clang {
40
41class DeclarationName;
42class DeclarationNameTable;
43class IdentifierInfo;
44class LangOptions;
45class MultiKeywordSelector;
46class SourceLocation;
47
48enum class ReservedIdentifierStatus {
49 NotReserved = 0,
50 StartsWithUnderscoreAtGlobalScope,
51 StartsWithUnderscoreAndIsExternC,
52 StartsWithDoubleUnderscore,
53 StartsWithUnderscoreFollowedByCapitalLetter,
54 ContainsDoubleUnderscore,
55};
56
57enum class ReservedLiteralSuffixIdStatus {
58 NotReserved = 0,
59 NotStartsWithUnderscore,
60 ContainsDoubleUnderscore,
61};
62
63/// Determine whether an identifier is reserved for use as a name at global
64/// scope. Such identifiers might be implementation-specific global functions
65/// or variables.
66inline bool isReservedAtGlobalScope(ReservedIdentifierStatus Status) {
67 return Status != ReservedIdentifierStatus::NotReserved;
68}
69
70/// Determine whether an identifier is reserved in all contexts. Such
71/// identifiers might be implementation-specific keywords or macros, for
72/// example.
73inline bool isReservedInAllContexts(ReservedIdentifierStatus Status) {
74 return Status != ReservedIdentifierStatus::NotReserved &&
75 Status != ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope &&
76 Status != ReservedIdentifierStatus::StartsWithUnderscoreAndIsExternC;
77}
78
79/// A simple pair of identifier info and location.
80using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>;
81
82/// IdentifierInfo and other related classes are aligned to
83/// 8 bytes so that DeclarationName can use the lower 3 bits
84/// of a pointer to one of these classes.
85enum { IdentifierInfoAlignment = 8 };
86
87static constexpr int InterestingIdentifierBits = 16;
88
89/// The "layout" of InterestingIdentifier is:
90/// - ObjCKeywordKind enumerators
91/// - NotableIdentifierKind enumerators
92/// - Builtin::ID enumerators
93/// - NotInterestingIdentifier
94enum class InterestingIdentifier {
95#define OBJC_AT_KEYWORD(X) objc_##X,
96#include "clang/Basic/TokenKinds.def"
97 NUM_OBJC_KEYWORDS,
98
99#define NOTABLE_IDENTIFIER(X) X,
100#include "clang/Basic/TokenKinds.def"
101 NUM_OBJC_KEYWORDS_AND_NOTABLE_IDENTIFIERS,
102
103 NotBuiltin,
104#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
105#include "clang/Basic/Builtins.inc"
106 FirstTSBuiltin,
107
108 NotInterestingIdentifier = 65534
109};
110
111/// One of these records is kept for each identifier that
112/// is lexed. This contains information about whether the token was \#define'd,
113/// is a language keyword, or if it is a front-end token of some sort (e.g. a
114/// variable or function name). The preprocessor keeps this information in a
115/// set, and all tok::identifier tokens have a pointer to one of these.
116/// It is aligned to 8 bytes because DeclarationName needs the lower 3 bits.
117class alignas(IdentifierInfoAlignment) IdentifierInfo {
118 friend class IdentifierTable;
119
120 // Front-end token ID or tok::identifier.
121 LLVM_PREFERRED_TYPE(tok::TokenKind)
122 unsigned TokenID : 9;
123
124 LLVM_PREFERRED_TYPE(InterestingIdentifier)
125 unsigned InterestingIdentifierID : InterestingIdentifierBits;
126
127 // True if there is a #define for this.
128 LLVM_PREFERRED_TYPE(bool)
129 unsigned HasMacro : 1;
130
131 // True if there was a #define for this.
132 LLVM_PREFERRED_TYPE(bool)
133 unsigned HadMacro : 1;
134
135 // True if the identifier is a language extension.
136 LLVM_PREFERRED_TYPE(bool)
137 unsigned IsExtension : 1;
138
139 // True if the identifier is a keyword in a newer or proposed Standard.
140 LLVM_PREFERRED_TYPE(bool)
141 unsigned IsFutureCompatKeyword : 1;
142
143 // True if the identifier is poisoned.
144 LLVM_PREFERRED_TYPE(bool)
145 unsigned IsPoisoned : 1;
146
147 // True if the identifier is a C++ operator keyword.
148 LLVM_PREFERRED_TYPE(bool)
149 unsigned IsCPPOperatorKeyword : 1;
150
151 // Internal bit set by the member function RecomputeNeedsHandleIdentifier.
152 // See comment about RecomputeNeedsHandleIdentifier for more info.
153 LLVM_PREFERRED_TYPE(bool)
154 unsigned NeedsHandleIdentifier : 1;
155
156 // True if the identifier was loaded (at least partially) from an AST file.
157 LLVM_PREFERRED_TYPE(bool)
158 unsigned IsFromAST : 1;
159
160 // True if the identifier has changed from the definition
161 // loaded from an AST file.
162 LLVM_PREFERRED_TYPE(bool)
163 unsigned ChangedAfterLoad : 1;
164
165 // True if the identifier's frontend information has changed from the
166 // definition loaded from an AST file.
167 LLVM_PREFERRED_TYPE(bool)
168 unsigned FEChangedAfterLoad : 1;
169
170 // True if revertTokenIDToIdentifier was called.
171 LLVM_PREFERRED_TYPE(bool)
172 unsigned RevertedTokenID : 1;
173
174 // True if there may be additional information about
175 // this identifier stored externally.
176 LLVM_PREFERRED_TYPE(bool)
177 unsigned OutOfDate : 1;
178
179 // True if this is the 'import' contextual keyword.
180 LLVM_PREFERRED_TYPE(bool)
181 unsigned IsModulesImport : 1;
182
183 // True if this is a mangled OpenMP variant name.
184 LLVM_PREFERRED_TYPE(bool)
185 unsigned IsMangledOpenMPVariantName : 1;
186
187 // True if this is a deprecated macro.
188 LLVM_PREFERRED_TYPE(bool)
189 unsigned IsDeprecatedMacro : 1;
190
191 // True if this macro is unsafe in headers.
192 LLVM_PREFERRED_TYPE(bool)
193 unsigned IsRestrictExpansion : 1;
194
195 // True if this macro is final.
196 LLVM_PREFERRED_TYPE(bool)
197 unsigned IsFinal : 1;
198
199 // 22 bits left in a 64-bit word.
200
201 // Managed by the language front-end.
202 void *FETokenInfo = nullptr;
203
204 llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr;
205
206 IdentifierInfo()
207 : TokenID(tok::identifier),
208 InterestingIdentifierID(llvm::to_underlying(
209 E: InterestingIdentifier::NotInterestingIdentifier)),
210 HasMacro(false), HadMacro(false), IsExtension(false),
211 IsFutureCompatKeyword(false), IsPoisoned(false),
212 IsCPPOperatorKeyword(false), NeedsHandleIdentifier(false),
213 IsFromAST(false), ChangedAfterLoad(false), FEChangedAfterLoad(false),
214 RevertedTokenID(false), OutOfDate(false), IsModulesImport(false),
215 IsMangledOpenMPVariantName(false), IsDeprecatedMacro(false),
216 IsRestrictExpansion(false), IsFinal(false) {}
217
218public:
219 IdentifierInfo(const IdentifierInfo &) = delete;
220 IdentifierInfo &operator=(const IdentifierInfo &) = delete;
221 IdentifierInfo(IdentifierInfo &&) = delete;
222 IdentifierInfo &operator=(IdentifierInfo &&) = delete;
223
224 /// Return true if this is the identifier for the specified string.
225 ///
226 /// This is intended to be used for string literals only: II->isStr("foo").
227 template <std::size_t StrLen>
228 bool isStr(const char (&Str)[StrLen]) const {
229 return getLength() == StrLen-1 &&
230 memcmp(getNameStart(), Str, StrLen-1) == 0;
231 }
232
233 /// Return true if this is the identifier for the specified StringRef.
234 bool isStr(llvm::StringRef Str) const {
235 llvm::StringRef ThisStr(getNameStart(), getLength());
236 return ThisStr == Str;
237 }
238
239 /// Return the beginning of the actual null-terminated string for this
240 /// identifier.
241 const char *getNameStart() const { return Entry->getKeyData(); }
242
243 /// Efficiently return the length of this identifier info.
244 unsigned getLength() const { return Entry->getKeyLength(); }
245
246 /// Return the actual identifier string.
247 StringRef getName() const {
248 return StringRef(getNameStart(), getLength());
249 }
250
251 /// Return true if this identifier is \#defined to some other value.
252 /// \note The current definition may be in a module and not currently visible.
253 bool hasMacroDefinition() const {
254 return HasMacro;
255 }
256 void setHasMacroDefinition(bool Val) {
257 if (HasMacro == Val) return;
258
259 HasMacro = Val;
260 if (Val) {
261 NeedsHandleIdentifier = true;
262 HadMacro = true;
263 } else {
264 // If this is a final macro, make the deprecation and header unsafe bits
265 // stick around after the undefinition so they apply to any redefinitions.
266 if (!IsFinal) {
267 // Because calling the setters of these calls recomputes, just set them
268 // manually to avoid recomputing a bunch of times.
269 IsDeprecatedMacro = false;
270 IsRestrictExpansion = false;
271 }
272 RecomputeNeedsHandleIdentifier();
273 }
274 }
275 /// Returns true if this identifier was \#defined to some value at any
276 /// moment. In this case there should be an entry for the identifier in the
277 /// macro history table in Preprocessor.
278 bool hadMacroDefinition() const {
279 return HadMacro;
280 }
281
282 bool isDeprecatedMacro() const { return IsDeprecatedMacro; }
283
284 void setIsDeprecatedMacro(bool Val) {
285 if (IsDeprecatedMacro == Val)
286 return;
287 IsDeprecatedMacro = Val;
288 if (Val)
289 NeedsHandleIdentifier = true;
290 else
291 RecomputeNeedsHandleIdentifier();
292 }
293
294 bool isRestrictExpansion() const { return IsRestrictExpansion; }
295
296 void setIsRestrictExpansion(bool Val) {
297 if (IsRestrictExpansion == Val)
298 return;
299 IsRestrictExpansion = Val;
300 if (Val)
301 NeedsHandleIdentifier = true;
302 else
303 RecomputeNeedsHandleIdentifier();
304 }
305
306 bool isFinal() const { return IsFinal; }
307
308 void setIsFinal(bool Val) { IsFinal = Val; }
309
310 /// If this is a source-language token (e.g. 'for'), this API
311 /// can be used to cause the lexer to map identifiers to source-language
312 /// tokens.
313 tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
314
315 /// True if revertTokenIDToIdentifier() was called.
316 bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; }
317
318 /// Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2
319 /// compatibility.
320 ///
321 /// TokenID is normally read-only but there are 2 instances where we revert it
322 /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
323 /// using this method so we can inform serialization about it.
324 void revertTokenIDToIdentifier() {
325 assert(TokenID != tok::identifier && "Already at tok::identifier");
326 TokenID = tok::identifier;
327 RevertedTokenID = true;
328 }
329 void revertIdentifierToTokenID(tok::TokenKind TK) {
330 assert(TokenID == tok::identifier && "Should be at tok::identifier");
331 TokenID = TK;
332 RevertedTokenID = false;
333 }
334
335 /// Return the preprocessor keyword ID for this identifier.
336 ///
337 /// For example, "define" will return tok::pp_define.
338 tok::PPKeywordKind getPPKeywordID() const;
339
340 /// Return the Objective-C keyword ID for the this identifier.
341 ///
342 /// For example, 'class' will return tok::objc_class if ObjC is enabled.
343 tok::ObjCKeywordKind getObjCKeywordID() const {
344 assert(0 == llvm::to_underlying(InterestingIdentifier::objc_not_keyword));
345 auto Value = static_cast<InterestingIdentifier>(InterestingIdentifierID);
346 if (Value < InterestingIdentifier::NUM_OBJC_KEYWORDS)
347 return static_cast<tok::ObjCKeywordKind>(InterestingIdentifierID);
348 return tok::objc_not_keyword;
349 }
350 void setObjCKeywordID(tok::ObjCKeywordKind ID) {
351 assert(0 == llvm::to_underlying(InterestingIdentifier::objc_not_keyword));
352 InterestingIdentifierID = ID;
353 assert(getObjCKeywordID() == ID && "ID too large for field!");
354 }
355
356 /// Return a value indicating whether this is a builtin function.
357 unsigned getBuiltinID() const {
358 auto Value = static_cast<InterestingIdentifier>(InterestingIdentifierID);
359 if (Value >
360 InterestingIdentifier::NUM_OBJC_KEYWORDS_AND_NOTABLE_IDENTIFIERS &&
361 Value != InterestingIdentifier::NotInterestingIdentifier) {
362 auto FirstBuiltin =
363 llvm::to_underlying(E: InterestingIdentifier::NotBuiltin);
364 return static_cast<Builtin::ID>(InterestingIdentifierID - FirstBuiltin);
365 }
366 return Builtin::ID::NotBuiltin;
367 }
368 void setBuiltinID(unsigned ID) {
369 assert(ID != Builtin::ID::NotBuiltin);
370 auto FirstBuiltin = llvm::to_underlying(E: InterestingIdentifier::NotBuiltin);
371 InterestingIdentifierID = ID + FirstBuiltin;
372 assert(getBuiltinID() == ID && "ID too large for field!");
373 }
374 void clearBuiltinID() {
375 InterestingIdentifierID =
376 llvm::to_underlying(E: InterestingIdentifier::NotInterestingIdentifier);
377 }
378
379 tok::NotableIdentifierKind getNotableIdentifierID() const {
380 auto Value = static_cast<InterestingIdentifier>(InterestingIdentifierID);
381 if (Value > InterestingIdentifier::NUM_OBJC_KEYWORDS &&
382 Value <
383 InterestingIdentifier::NUM_OBJC_KEYWORDS_AND_NOTABLE_IDENTIFIERS) {
384 auto FirstNotableIdentifier =
385 1 + llvm::to_underlying(E: InterestingIdentifier::NUM_OBJC_KEYWORDS);
386 return static_cast<tok::NotableIdentifierKind>(InterestingIdentifierID -
387 FirstNotableIdentifier);
388 }
389 return tok::not_notable;
390 }
391 void setNotableIdentifierID(unsigned ID) {
392 assert(ID != tok::not_notable);
393 auto FirstNotableIdentifier =
394 1 + llvm::to_underlying(E: InterestingIdentifier::NUM_OBJC_KEYWORDS);
395 InterestingIdentifierID = ID + FirstNotableIdentifier;
396 assert(getNotableIdentifierID() == ID && "ID too large for field!");
397 }
398
399 unsigned getObjCOrBuiltinID() const { return InterestingIdentifierID; }
400 void setObjCOrBuiltinID(unsigned ID) { InterestingIdentifierID = ID; }
401
402 /// get/setExtension - Initialize information about whether or not this
403 /// language token is an extension. This controls extension warnings, and is
404 /// only valid if a custom token ID is set.
405 bool isExtensionToken() const { return IsExtension; }
406 void setIsExtensionToken(bool Val) {
407 IsExtension = Val;
408 if (Val)
409 NeedsHandleIdentifier = true;
410 else
411 RecomputeNeedsHandleIdentifier();
412 }
413
414 /// is/setIsFutureCompatKeyword - Initialize information about whether or not
415 /// this language token is a keyword in a newer or proposed Standard. This
416 /// controls compatibility warnings, and is only true when not parsing the
417 /// corresponding Standard. Once a compatibility problem has been diagnosed
418 /// with this keyword, the flag will be cleared.
419 bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; }
420 void setIsFutureCompatKeyword(bool Val) {
421 IsFutureCompatKeyword = Val;
422 if (Val)
423 NeedsHandleIdentifier = true;
424 else
425 RecomputeNeedsHandleIdentifier();
426 }
427
428 /// setIsPoisoned - Mark this identifier as poisoned. After poisoning, the
429 /// Preprocessor will emit an error every time this token is used.
430 void setIsPoisoned(bool Value = true) {
431 IsPoisoned = Value;
432 if (Value)
433 NeedsHandleIdentifier = true;
434 else
435 RecomputeNeedsHandleIdentifier();
436 }
437
438 /// Return true if this token has been poisoned.
439 bool isPoisoned() const { return IsPoisoned; }
440
441 /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
442 /// this identifier is a C++ alternate representation of an operator.
443 void setIsCPlusPlusOperatorKeyword(bool Val = true) {
444 IsCPPOperatorKeyword = Val;
445 }
446 bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
447
448 /// Return true if this token is a keyword in the specified language.
449 bool isKeyword(const LangOptions &LangOpts) const;
450
451 /// Return true if this token is a C++ keyword in the specified
452 /// language.
453 bool isCPlusPlusKeyword(const LangOptions &LangOpts) const;
454
455 /// Get and set FETokenInfo. The language front-end is allowed to associate
456 /// arbitrary metadata with this token.
457 void *getFETokenInfo() const { return FETokenInfo; }
458 void setFETokenInfo(void *T) { FETokenInfo = T; }
459
460 /// Return true if the Preprocessor::HandleIdentifier must be called
461 /// on a token of this identifier.
462 ///
463 /// If this returns false, we know that HandleIdentifier will not affect
464 /// the token.
465 bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
466
467 /// Return true if the identifier in its current state was loaded
468 /// from an AST file.
469 bool isFromAST() const { return IsFromAST; }
470
471 void setIsFromAST() { IsFromAST = true; }
472
473 /// Determine whether this identifier has changed since it was loaded
474 /// from an AST file.
475 bool hasChangedSinceDeserialization() const {
476 return ChangedAfterLoad;
477 }
478
479 /// Note that this identifier has changed since it was loaded from
480 /// an AST file.
481 void setChangedSinceDeserialization() {
482 ChangedAfterLoad = true;
483 }
484
485 /// Determine whether the frontend token information for this
486 /// identifier has changed since it was loaded from an AST file.
487 bool hasFETokenInfoChangedSinceDeserialization() const {
488 return FEChangedAfterLoad;
489 }
490
491 /// Note that the frontend token information for this identifier has
492 /// changed since it was loaded from an AST file.
493 void setFETokenInfoChangedSinceDeserialization() {
494 FEChangedAfterLoad = true;
495 }
496
497 /// Determine whether the information for this identifier is out of
498 /// date with respect to the external source.
499 bool isOutOfDate() const { return OutOfDate; }
500
501 /// Set whether the information for this identifier is out of
502 /// date with respect to the external source.
503 void setOutOfDate(bool OOD) {
504 OutOfDate = OOD;
505 if (OOD)
506 NeedsHandleIdentifier = true;
507 else
508 RecomputeNeedsHandleIdentifier();
509 }
510
511 /// Determine whether this is the contextual keyword \c import.
512 bool isModulesImport() const { return IsModulesImport; }
513
514 /// Set whether this identifier is the contextual keyword \c import.
515 void setModulesImport(bool I) {
516 IsModulesImport = I;
517 if (I)
518 NeedsHandleIdentifier = true;
519 else
520 RecomputeNeedsHandleIdentifier();
521 }
522
523 /// Determine whether this is the mangled name of an OpenMP variant.
524 bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; }
525
526 /// Set whether this is the mangled name of an OpenMP variant.
527 void setMangledOpenMPVariantName(bool I) { IsMangledOpenMPVariantName = I; }
528
529 /// Return true if this identifier is an editor placeholder.
530 ///
531 /// Editor placeholders are produced by the code-completion engine and are
532 /// represented as characters between '<#' and '#>' in the source code. An
533 /// example of auto-completed call with a placeholder parameter is shown
534 /// below:
535 /// \code
536 /// function(<#int x#>);
537 /// \endcode
538 bool isEditorPlaceholder() const {
539 return getName().starts_with(Prefix: "<#") && getName().ends_with(Suffix: "#>");
540 }
541
542 /// Determine whether \p this is a name reserved for the implementation (C99
543 /// 7.1.3, C++ [lib.global.names]).
544 ReservedIdentifierStatus isReserved(const LangOptions &LangOpts) const;
545
546 /// Determine whether \p this is a name reserved for future standardization or
547 /// the implementation (C++ [usrlit.suffix]).
548 ReservedLiteralSuffixIdStatus isReservedLiteralSuffixId() const;
549
550 /// If the identifier is an "uglified" reserved name, return a cleaned form.
551 /// e.g. _Foo => Foo. Otherwise, just returns the name.
552 StringRef deuglifiedName() const;
553 bool isPlaceholder() const {
554 return getLength() == 1 && getNameStart()[0] == '_';
555 }
556
557 /// Provide less than operator for lexicographical sorting.
558 bool operator<(const IdentifierInfo &RHS) const {
559 return getName() < RHS.getName();
560 }
561
562private:
563 /// The Preprocessor::HandleIdentifier does several special (but rare)
564 /// things to identifiers of various sorts. For example, it changes the
565 /// \c for keyword token from tok::identifier to tok::for.
566 ///
567 /// This method is very tied to the definition of HandleIdentifier. Any
568 /// change to it should be reflected here.
569 void RecomputeNeedsHandleIdentifier() {
570 NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() ||
571 isExtensionToken() || isFutureCompatKeyword() ||
572 isOutOfDate() || isModulesImport();
573 }
574};
575
576/// An RAII object for [un]poisoning an identifier within a scope.
577///
578/// \p II is allowed to be null, in which case objects of this type have
579/// no effect.
580class PoisonIdentifierRAIIObject {
581 IdentifierInfo *const II;
582 const bool OldValue;
583
584public:
585 PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue)
586 : II(II), OldValue(II ? II->isPoisoned() : false) {
587 if(II)
588 II->setIsPoisoned(NewValue);
589 }
590
591 ~PoisonIdentifierRAIIObject() {
592 if(II)
593 II->setIsPoisoned(OldValue);
594 }
595};
596
597/// An iterator that walks over all of the known identifiers
598/// in the lookup table.
599///
600/// Since this iterator uses an abstract interface via virtual
601/// functions, it uses an object-oriented interface rather than the
602/// more standard C++ STL iterator interface. In this OO-style
603/// iteration, the single function \c Next() provides dereference,
604/// advance, and end-of-sequence checking in a single
605/// operation. Subclasses of this iterator type will provide the
606/// actual functionality.
607class IdentifierIterator {
608protected:
609 IdentifierIterator() = default;
610
611public:
612 IdentifierIterator(const IdentifierIterator &) = delete;
613 IdentifierIterator &operator=(const IdentifierIterator &) = delete;
614
615 virtual ~IdentifierIterator();
616
617 /// Retrieve the next string in the identifier table and
618 /// advances the iterator for the following string.
619 ///
620 /// \returns The next string in the identifier table. If there is
621 /// no such string, returns an empty \c StringRef.
622 virtual StringRef Next() = 0;
623};
624
625/// Provides lookups to, and iteration over, IdentiferInfo objects.
626class IdentifierInfoLookup {
627public:
628 virtual ~IdentifierInfoLookup();
629
630 /// Return the IdentifierInfo for the specified named identifier.
631 ///
632 /// Unlike the version in IdentifierTable, this returns a pointer instead
633 /// of a reference. If the pointer is null then the IdentifierInfo cannot
634 /// be found.
635 virtual IdentifierInfo* get(StringRef Name) = 0;
636
637 /// Retrieve an iterator into the set of all identifiers
638 /// known to this identifier lookup source.
639 ///
640 /// This routine provides access to all of the identifiers known to
641 /// the identifier lookup, allowing access to the contents of the
642 /// identifiers without introducing the overhead of constructing
643 /// IdentifierInfo objects for each.
644 ///
645 /// \returns A new iterator into the set of known identifiers. The
646 /// caller is responsible for deleting this iterator.
647 virtual IdentifierIterator *getIdentifiers();
648};
649
650/// Implements an efficient mapping from strings to IdentifierInfo nodes.
651///
652/// This has no other purpose, but this is an extremely performance-critical
653/// piece of the code, as each occurrence of every identifier goes through
654/// here when lexed.
655class IdentifierTable {
656 // Shark shows that using MallocAllocator is *much* slower than using this
657 // BumpPtrAllocator!
658 using HashTableTy = llvm::StringMap<IdentifierInfo *, llvm::BumpPtrAllocator>;
659 HashTableTy HashTable;
660
661 IdentifierInfoLookup* ExternalLookup;
662
663public:
664 /// Create the identifier table.
665 explicit IdentifierTable(IdentifierInfoLookup *ExternalLookup = nullptr);
666
667 /// Create the identifier table, populating it with info about the
668 /// language keywords for the language specified by \p LangOpts.
669 explicit IdentifierTable(const LangOptions &LangOpts,
670 IdentifierInfoLookup *ExternalLookup = nullptr);
671
672 /// Set the external identifier lookup mechanism.
673 void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
674 ExternalLookup = IILookup;
675 }
676
677 /// Retrieve the external identifier lookup object, if any.
678 IdentifierInfoLookup *getExternalIdentifierLookup() const {
679 return ExternalLookup;
680 }
681
682 llvm::BumpPtrAllocator& getAllocator() {
683 return HashTable.getAllocator();
684 }
685
686 /// Return the identifier token info for the specified named
687 /// identifier.
688 IdentifierInfo &get(StringRef Name) {
689 auto &Entry = *HashTable.try_emplace(Key: Name, Args: nullptr).first;
690
691 IdentifierInfo *&II = Entry.second;
692 if (II) return *II;
693
694 // No entry; if we have an external lookup, look there first.
695 if (ExternalLookup) {
696 II = ExternalLookup->get(Name);
697 if (II)
698 return *II;
699 }
700
701 // Lookups failed, make a new IdentifierInfo.
702 void *Mem = getAllocator().Allocate<IdentifierInfo>();
703 II = new (Mem) IdentifierInfo();
704
705 // Make sure getName() knows how to find the IdentifierInfo
706 // contents.
707 II->Entry = &Entry;
708
709 return *II;
710 }
711
712 IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
713 IdentifierInfo &II = get(Name);
714 II.TokenID = TokenCode;
715 assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large");
716 return II;
717 }
718
719 /// Gets an IdentifierInfo for the given name without consulting
720 /// external sources.
721 ///
722 /// This is a version of get() meant for external sources that want to
723 /// introduce or modify an identifier. If they called get(), they would
724 /// likely end up in a recursion.
725 IdentifierInfo &getOwn(StringRef Name) {
726 auto &Entry = *HashTable.insert(KV: std::make_pair(x&: Name, y: nullptr)).first;
727
728 IdentifierInfo *&II = Entry.second;
729 if (II)
730 return *II;
731
732 // Lookups failed, make a new IdentifierInfo.
733 void *Mem = getAllocator().Allocate<IdentifierInfo>();
734 II = new (Mem) IdentifierInfo();
735
736 // Make sure getName() knows how to find the IdentifierInfo
737 // contents.
738 II->Entry = &Entry;
739
740 // If this is the 'import' contextual keyword, mark it as such.
741 if (Name == "import")
742 II->setModulesImport(true);
743
744 return *II;
745 }
746
747 using iterator = HashTableTy::const_iterator;
748 using const_iterator = HashTableTy::const_iterator;
749
750 iterator begin() const { return HashTable.begin(); }
751 iterator end() const { return HashTable.end(); }
752 unsigned size() const { return HashTable.size(); }
753
754 iterator find(StringRef Name) const { return HashTable.find(Key: Name); }
755
756 /// Print some statistics to stderr that indicate how well the
757 /// hashing is doing.
758 void PrintStats() const;
759
760 /// Populate the identifier table with info about the language keywords
761 /// for the language specified by \p LangOpts.
762 void AddKeywords(const LangOptions &LangOpts);
763
764 /// Returns the correct diagnostic to issue for a future-compat diagnostic
765 /// warning. Note, this function assumes the identifier passed has already
766 /// been determined to be a future compatible keyword.
767 diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
768 const LangOptions &LangOpts);
769};
770
771/// A family of Objective-C methods.
772///
773/// These families have no inherent meaning in the language, but are
774/// nonetheless central enough in the existing implementations to
775/// merit direct AST support. While, in theory, arbitrary methods can
776/// be considered to form families, we focus here on the methods
777/// involving allocation and retain-count management, as these are the
778/// most "core" and the most likely to be useful to diverse clients
779/// without extra information.
780///
781/// Both selectors and actual method declarations may be classified
782/// into families. Method families may impose additional restrictions
783/// beyond their selector name; for example, a method called '_init'
784/// that returns void is not considered to be in the 'init' family
785/// (but would be if it returned 'id'). It is also possible to
786/// explicitly change or remove a method's family. Therefore the
787/// method's family should be considered the single source of truth.
788enum ObjCMethodFamily {
789 /// No particular method family.
790 OMF_None,
791
792 // Selectors in these families may have arbitrary arity, may be
793 // written with arbitrary leading underscores, and may have
794 // additional CamelCase "words" in their first selector chunk
795 // following the family name.
796 OMF_alloc,
797 OMF_copy,
798 OMF_init,
799 OMF_mutableCopy,
800 OMF_new,
801
802 // These families are singletons consisting only of the nullary
803 // selector with the given name.
804 OMF_autorelease,
805 OMF_dealloc,
806 OMF_finalize,
807 OMF_release,
808 OMF_retain,
809 OMF_retainCount,
810 OMF_self,
811 OMF_initialize,
812
813 // performSelector families
814 OMF_performSelector
815};
816
817/// Enough bits to store any enumerator in ObjCMethodFamily or
818/// InvalidObjCMethodFamily.
819enum { ObjCMethodFamilyBitWidth = 4 };
820
821/// An invalid value of ObjCMethodFamily.
822enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 };
823
824/// A family of Objective-C methods.
825///
826/// These are family of methods whose result type is initially 'id', but
827/// but are candidate for the result type to be changed to 'instancetype'.
828enum ObjCInstanceTypeFamily {
829 OIT_None,
830 OIT_Array,
831 OIT_Dictionary,
832 OIT_Singleton,
833 OIT_Init,
834 OIT_ReturnsSelf
835};
836
837enum ObjCStringFormatFamily {
838 SFF_None,
839 SFF_NSString,
840 SFF_CFString
841};
842
843namespace detail {
844
845/// DeclarationNameExtra is used as a base of various uncommon special names.
846/// This class is needed since DeclarationName has not enough space to store
847/// the kind of every possible names. Therefore the kind of common names is
848/// stored directly in DeclarationName, and the kind of uncommon names is
849/// stored in DeclarationNameExtra. It is aligned to 8 bytes because
850/// DeclarationName needs the lower 3 bits to store the kind of common names.
851/// DeclarationNameExtra is tightly coupled to DeclarationName and any change
852/// here is very likely to require changes in DeclarationName(Table).
853class alignas(IdentifierInfoAlignment) DeclarationNameExtra {
854 friend class clang::DeclarationName;
855 friend class clang::DeclarationNameTable;
856
857protected:
858 /// The kind of "extra" information stored in the DeclarationName. See
859 /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values
860 /// are used. Note that DeclarationName depends on the numerical values
861 /// of the enumerators in this enum. See DeclarationName::StoredNameKind
862 /// for more info.
863 enum ExtraKind {
864 CXXDeductionGuideName,
865 CXXLiteralOperatorName,
866 CXXUsingDirective,
867 ObjCMultiArgSelector
868 };
869
870 /// ExtraKindOrNumArgs has one of the following meaning:
871 /// * The kind of an uncommon C++ special name. This DeclarationNameExtra
872 /// is in this case in fact either a CXXDeductionGuideNameExtra or
873 /// a CXXLiteralOperatorIdName.
874 ///
875 /// * It may be also name common to C++ using-directives (CXXUsingDirective),
876 ///
877 /// * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is
878 /// the number of arguments in the Objective-C selector, in which
879 /// case the DeclarationNameExtra is also a MultiKeywordSelector.
880 unsigned ExtraKindOrNumArgs;
881
882 DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {}
883 DeclarationNameExtra(unsigned NumArgs)
884 : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {}
885
886 /// Return the corresponding ExtraKind.
887 ExtraKind getKind() const {
888 return static_cast<ExtraKind>(ExtraKindOrNumArgs >
889 (unsigned)ObjCMultiArgSelector
890 ? (unsigned)ObjCMultiArgSelector
891 : ExtraKindOrNumArgs);
892 }
893
894 /// Return the number of arguments in an ObjC selector. Only valid when this
895 /// is indeed an ObjCMultiArgSelector.
896 unsigned getNumArgs() const {
897 assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector &&
898 "getNumArgs called but this is not an ObjC selector!");
899 return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector;
900 }
901};
902
903} // namespace detail
904
905/// One of these variable length records is kept for each
906/// selector containing more than one keyword. We use a folding set
907/// to unique aggregate names (keyword selectors in ObjC parlance). Access to
908/// this class is provided strictly through Selector.
909class alignas(IdentifierInfoAlignment) MultiKeywordSelector
910 : public detail::DeclarationNameExtra,
911 public llvm::FoldingSetNode {
912 MultiKeywordSelector(unsigned nKeys) : DeclarationNameExtra(nKeys) {}
913
914public:
915 // Constructor for keyword selectors.
916 MultiKeywordSelector(unsigned nKeys, const IdentifierInfo **IIV)
917 : DeclarationNameExtra(nKeys) {
918 assert((nKeys > 1) && "not a multi-keyword selector");
919
920 // Fill in the trailing keyword array.
921 const IdentifierInfo **KeyInfo =
922 reinterpret_cast<const IdentifierInfo **>(this + 1);
923 for (unsigned i = 0; i != nKeys; ++i)
924 KeyInfo[i] = IIV[i];
925 }
926
927 // getName - Derive the full selector name and return it.
928 std::string getName() const;
929
930 using DeclarationNameExtra::getNumArgs;
931
932 using keyword_iterator = const IdentifierInfo *const *;
933
934 keyword_iterator keyword_begin() const {
935 return reinterpret_cast<keyword_iterator>(this + 1);
936 }
937
938 keyword_iterator keyword_end() const {
939 return keyword_begin() + getNumArgs();
940 }
941
942 const IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const {
943 assert(i < getNumArgs() && "getIdentifierInfoForSlot(): illegal index");
944 return keyword_begin()[i];
945 }
946
947 static void Profile(llvm::FoldingSetNodeID &ID, keyword_iterator ArgTys,
948 unsigned NumArgs) {
949 ID.AddInteger(I: NumArgs);
950 for (unsigned i = 0; i != NumArgs; ++i)
951 ID.AddPointer(Ptr: ArgTys[i]);
952 }
953
954 void Profile(llvm::FoldingSetNodeID &ID) {
955 Profile(ID, ArgTys: keyword_begin(), NumArgs: getNumArgs());
956 }
957};
958
959/// Smart pointer class that efficiently represents Objective-C method
960/// names.
961///
962/// This class will either point to an IdentifierInfo or a
963/// MultiKeywordSelector (which is private). This enables us to optimize
964/// selectors that take no arguments and selectors that take 1 argument, which
965/// accounts for 78% of all selectors in Cocoa.h.
966class Selector {
967 friend class Diagnostic;
968 friend class SelectorTable; // only the SelectorTable can create these
969 friend class DeclarationName; // and the AST's DeclarationName.
970
971 enum IdentifierInfoFlag {
972 // Empty selector = 0. Note that these enumeration values must
973 // correspond to the enumeration values of DeclarationName::StoredNameKind
974 ZeroArg = 0x01,
975 OneArg = 0x02,
976 // IMPORTANT NOTE: see comments in InfoPtr (below) about this enumerator
977 // value.
978 MultiArg = 0x07,
979 };
980
981 /// IMPORTANT NOTE: the order of the types in this PointerUnion are
982 /// important! The DeclarationName class has bidirectional conversion
983 /// to/from Selector through an opaque pointer (void *) which corresponds
984 /// to this PointerIntPair. The discriminator bit from the PointerUnion
985 /// corresponds to the high bit in the MultiArg enumerator. So while this
986 /// PointerIntPair only has two bits for the integer (and we mask off the
987 /// high bit in `MultiArg` when it is used), that discrimator bit is
988 /// still necessary for the opaque conversion. The discriminator bit
989 /// from the PointerUnion and the two integer bits from the
990 /// PointerIntPair are also exposed via the DeclarationName::StoredNameKind
991 /// enumeration; see the comments in DeclarationName.h for more details.
992 /// Do not reorder or add any arguments to this template
993 /// without thoroughly understanding how tightly coupled these classes are.
994 llvm::PointerIntPair<
995 llvm::PointerUnion<const IdentifierInfo *, MultiKeywordSelector *>, 2>
996 InfoPtr;
997
998 Selector(const IdentifierInfo *II, unsigned nArgs) {
999 assert(nArgs < 2 && "nArgs not equal to 0/1");
1000 InfoPtr.setPointerAndInt(PtrVal: II, IntVal: nArgs + 1);
1001 }
1002
1003 Selector(MultiKeywordSelector *SI) {
1004 // IMPORTANT NOTE: we mask off the upper bit of this value because we only
1005 // reserve two bits for the integer in the PointerIntPair. See the comments
1006 // in `InfoPtr` for more details.
1007 InfoPtr.setPointerAndInt(PtrVal: SI, IntVal: MultiArg & 0b11);
1008 }
1009
1010 const IdentifierInfo *getAsIdentifierInfo() const {
1011 return InfoPtr.getPointer().dyn_cast<const IdentifierInfo *>();
1012 }
1013
1014 MultiKeywordSelector *getMultiKeywordSelector() const {
1015 return InfoPtr.getPointer().get<MultiKeywordSelector *>();
1016 }
1017
1018 unsigned getIdentifierInfoFlag() const {
1019 unsigned new_flags = InfoPtr.getInt();
1020 // IMPORTANT NOTE: We have to reconstitute this data rather than use the
1021 // value directly from the PointerIntPair. See the comments in `InfoPtr`
1022 // for more details.
1023 if (InfoPtr.getPointer().is<MultiKeywordSelector *>())
1024 new_flags |= MultiArg;
1025 return new_flags;
1026 }
1027
1028 static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
1029
1030 static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel);
1031
1032public:
1033 /// The default ctor should only be used when creating data structures that
1034 /// will contain selectors.
1035 Selector() = default;
1036 explicit Selector(uintptr_t V) {
1037 InfoPtr.setFromOpaqueValue(reinterpret_cast<void *>(V));
1038 }
1039
1040 /// operator==/!= - Indicate whether the specified selectors are identical.
1041 bool operator==(Selector RHS) const {
1042 return InfoPtr.getOpaqueValue() == RHS.InfoPtr.getOpaqueValue();
1043 }
1044 bool operator!=(Selector RHS) const {
1045 return InfoPtr.getOpaqueValue() != RHS.InfoPtr.getOpaqueValue();
1046 }
1047
1048 void *getAsOpaquePtr() const { return InfoPtr.getOpaqueValue(); }
1049
1050 /// Determine whether this is the empty selector.
1051 bool isNull() const { return InfoPtr.getOpaqueValue() == nullptr; }
1052
1053 // Predicates to identify the selector type.
1054 bool isKeywordSelector() const { return InfoPtr.getInt() != ZeroArg; }
1055
1056 bool isUnarySelector() const { return InfoPtr.getInt() == ZeroArg; }
1057
1058 /// If this selector is the specific keyword selector described by Names.
1059 bool isKeywordSelector(ArrayRef<StringRef> Names) const;
1060
1061 /// If this selector is the specific unary selector described by Name.
1062 bool isUnarySelector(StringRef Name) const;
1063
1064 unsigned getNumArgs() const;
1065
1066 /// Retrieve the identifier at a given position in the selector.
1067 ///
1068 /// Note that the identifier pointer returned may be NULL. Clients that only
1069 /// care about the text of the identifier string, and not the specific,
1070 /// uniqued identifier pointer, should use \c getNameForSlot(), which returns
1071 /// an empty string when the identifier pointer would be NULL.
1072 ///
1073 /// \param argIndex The index for which we want to retrieve the identifier.
1074 /// This index shall be less than \c getNumArgs() unless this is a keyword
1075 /// selector, in which case 0 is the only permissible value.
1076 ///
1077 /// \returns the uniqued identifier for this slot, or NULL if this slot has
1078 /// no corresponding identifier.
1079 const IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
1080
1081 /// Retrieve the name at a given position in the selector.
1082 ///
1083 /// \param argIndex The index for which we want to retrieve the name.
1084 /// This index shall be less than \c getNumArgs() unless this is a keyword
1085 /// selector, in which case 0 is the only permissible value.
1086 ///
1087 /// \returns the name for this slot, which may be the empty string if no
1088 /// name was supplied.
1089 StringRef getNameForSlot(unsigned argIndex) const;
1090
1091 /// Derive the full selector name (e.g. "foo:bar:") and return
1092 /// it as an std::string.
1093 std::string getAsString() const;
1094
1095 /// Prints the full selector name (e.g. "foo:bar:").
1096 void print(llvm::raw_ostream &OS) const;
1097
1098 void dump() const;
1099
1100 /// Derive the conventional family of this method.
1101 ObjCMethodFamily getMethodFamily() const {
1102 return getMethodFamilyImpl(sel: *this);
1103 }
1104
1105 ObjCStringFormatFamily getStringFormatFamily() const {
1106 return getStringFormatFamilyImpl(sel: *this);
1107 }
1108
1109 static Selector getEmptyMarker() {
1110 return Selector(uintptr_t(-1));
1111 }
1112
1113 static Selector getTombstoneMarker() {
1114 return Selector(uintptr_t(-2));
1115 }
1116
1117 static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel);
1118};
1119
1120/// This table allows us to fully hide how we implement
1121/// multi-keyword caching.
1122class SelectorTable {
1123 // Actually a SelectorTableImpl
1124 void *Impl;
1125
1126public:
1127 SelectorTable();
1128 SelectorTable(const SelectorTable &) = delete;
1129 SelectorTable &operator=(const SelectorTable &) = delete;
1130 ~SelectorTable();
1131
1132 /// Can create any sort of selector.
1133 ///
1134 /// \p NumArgs indicates whether this is a no argument selector "foo", a
1135 /// single argument selector "foo:" or multi-argument "foo:bar:".
1136 Selector getSelector(unsigned NumArgs, const IdentifierInfo **IIV);
1137
1138 Selector getUnarySelector(const IdentifierInfo *ID) {
1139 return Selector(ID, 1);
1140 }
1141
1142 Selector getNullarySelector(const IdentifierInfo *ID) {
1143 return Selector(ID, 0);
1144 }
1145
1146 /// Return the total amount of memory allocated for managing selectors.
1147 size_t getTotalMemory() const;
1148
1149 /// Return the default setter name for the given identifier.
1150 ///
1151 /// This is "set" + \p Name where the initial character of \p Name
1152 /// has been capitalized.
1153 static SmallString<64> constructSetterName(StringRef Name);
1154
1155 /// Return the default setter selector for the given identifier.
1156 ///
1157 /// This is "set" + \p Name where the initial character of \p Name
1158 /// has been capitalized.
1159 static Selector constructSetterSelector(IdentifierTable &Idents,
1160 SelectorTable &SelTable,
1161 const IdentifierInfo *Name);
1162
1163 /// Return the property name for the given setter selector.
1164 static std::string getPropertyNameFromSetterSelector(Selector Sel);
1165};
1166
1167} // namespace clang
1168
1169namespace llvm {
1170
1171/// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
1172/// DenseSets.
1173template <>
1174struct DenseMapInfo<clang::Selector> {
1175 static clang::Selector getEmptyKey() {
1176 return clang::Selector::getEmptyMarker();
1177 }
1178
1179 static clang::Selector getTombstoneKey() {
1180 return clang::Selector::getTombstoneMarker();
1181 }
1182
1183 static unsigned getHashValue(clang::Selector S);
1184
1185 static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
1186 return LHS == RHS;
1187 }
1188};
1189
1190template<>
1191struct PointerLikeTypeTraits<clang::Selector> {
1192 static const void *getAsVoidPointer(clang::Selector P) {
1193 return P.getAsOpaquePtr();
1194 }
1195
1196 static clang::Selector getFromVoidPointer(const void *P) {
1197 return clang::Selector(reinterpret_cast<uintptr_t>(P));
1198 }
1199
1200 static constexpr int NumLowBitsAvailable = 0;
1201};
1202
1203} // namespace llvm
1204
1205#endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
1206