| 1 | //===--- NumericLiteralCaseFixer.cpp ----------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// This file implements NumericLiteralCaseFixer that standardizes character |
| 11 | /// case within numeric literals. |
| 12 | /// |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "NumericLiteralCaseFixer.h" |
| 16 | #include "NumericLiteralInfo.h" |
| 17 | |
| 18 | #include "llvm/ADT/StringExtras.h" |
| 19 | |
| 20 | #include <algorithm> |
| 21 | |
| 22 | namespace clang { |
| 23 | namespace format { |
| 24 | |
| 25 | static bool isNumericLiteralCaseFixerNeeded(const FormatStyle &Style) { |
| 26 | // Check if language is supported. |
| 27 | switch (Style.Language) { |
| 28 | case FormatStyle::LK_C: |
| 29 | case FormatStyle::LK_Cpp: |
| 30 | case FormatStyle::LK_ObjC: |
| 31 | case FormatStyle::LK_CSharp: |
| 32 | case FormatStyle::LK_Java: |
| 33 | case FormatStyle::LK_JavaScript: |
| 34 | break; |
| 35 | default: |
| 36 | return false; |
| 37 | } |
| 38 | |
| 39 | // Check if style options are set. |
| 40 | const auto &Option = Style.NumericLiteralCase; |
| 41 | const auto Leave = FormatStyle::NLCS_Leave; |
| 42 | return Option.Prefix != Leave || Option.HexDigit != Leave || |
| 43 | Option.ExponentLetter != Leave || Option.Suffix != Leave; |
| 44 | } |
| 45 | |
| 46 | static std::string |
| 47 | transformComponent(StringRef Component, |
| 48 | FormatStyle::NumericLiteralComponentStyle ConfigValue) { |
| 49 | switch (ConfigValue) { |
| 50 | case FormatStyle::NLCS_Upper: |
| 51 | return Component.upper(); |
| 52 | case FormatStyle::NLCS_Lower: |
| 53 | return Component.lower(); |
| 54 | default: |
| 55 | // Covers FormatStyle::NLCS_Leave. |
| 56 | return Component.str(); |
| 57 | } |
| 58 | } |
| 59 | |
| 60 | /// Test if Suffix matches a C++ literal reserved by the library. |
| 61 | /// Matches against all suffixes reserved in the C++23 standard. |
| 62 | static bool matchesReservedSuffix(StringRef Suffix) { |
| 63 | static constexpr std::array<StringRef, 11> SortedReservedSuffixes = { |
| 64 | "d" , "h" , "i" , "if" , "il" , "min" , "ms" , "ns" , "s" , "us" , "y" , |
| 65 | }; |
| 66 | |
| 67 | // This can be static_assert when we have access to constexpr is_sorted in |
| 68 | // C++ 20. |
| 69 | assert(llvm::is_sorted(SortedReservedSuffixes) && |
| 70 | "Must be sorted as precondition for lower_bound()." ); |
| 71 | |
| 72 | auto entry = llvm::lower_bound(Range: SortedReservedSuffixes, Value&: Suffix); |
| 73 | if (entry == SortedReservedSuffixes.cend()) |
| 74 | return false; |
| 75 | return *entry == Suffix; |
| 76 | } |
| 77 | |
| 78 | static std::string format(StringRef NumericLiteral, const FormatStyle &Style) { |
| 79 | const char Separator = Style.isCpp() ? '\'' : '_'; |
| 80 | const NumericLiteralInfo Info(NumericLiteral, Separator); |
| 81 | const bool HasBaseLetter = Info.BaseLetterPos != StringRef::npos; |
| 82 | const bool HasExponent = Info.ExponentLetterPos != StringRef::npos; |
| 83 | const bool HasSuffix = Info.SuffixPos != StringRef::npos; |
| 84 | |
| 85 | std::string Formatted; |
| 86 | |
| 87 | if (HasBaseLetter) { |
| 88 | Formatted += |
| 89 | transformComponent(Component: NumericLiteral.take_front(N: 1 + Info.BaseLetterPos), |
| 90 | ConfigValue: Style.NumericLiteralCase.Prefix); |
| 91 | } |
| 92 | // Reformat this slice as HexDigit whether or not the digit has hexadecimal |
| 93 | // characters because binary/decimal/octal digits are unchanged. |
| 94 | Formatted += transformComponent( |
| 95 | Component: NumericLiteral.slice(Start: HasBaseLetter ? 1 + Info.BaseLetterPos : 0, |
| 96 | End: HasExponent ? Info.ExponentLetterPos |
| 97 | : HasSuffix ? Info.SuffixPos |
| 98 | : NumericLiteral.size()), |
| 99 | ConfigValue: Style.NumericLiteralCase.HexDigit); |
| 100 | |
| 101 | if (HasExponent) { |
| 102 | Formatted += transformComponent( |
| 103 | Component: NumericLiteral.slice(Start: Info.ExponentLetterPos, |
| 104 | End: HasSuffix ? Info.SuffixPos |
| 105 | : NumericLiteral.size()), |
| 106 | ConfigValue: Style.NumericLiteralCase.ExponentLetter); |
| 107 | } |
| 108 | |
| 109 | if (HasSuffix) { |
| 110 | StringRef Suffix = NumericLiteral.drop_front(N: Info.SuffixPos); |
| 111 | if (matchesReservedSuffix(Suffix) || Suffix.front() == '_') { |
| 112 | // In C++, it is idiomatic, but NOT standardized to define user-defined |
| 113 | // literals with a leading '_'. Omit user defined literals and standard |
| 114 | // reserved suffixes from transformation. |
| 115 | Formatted += Suffix.str(); |
| 116 | } else { |
| 117 | Formatted += transformComponent(Component: Suffix, ConfigValue: Style.NumericLiteralCase.Suffix); |
| 118 | } |
| 119 | } |
| 120 | |
| 121 | return Formatted; |
| 122 | } |
| 123 | |
| 124 | std::pair<tooling::Replacements, unsigned> |
| 125 | NumericLiteralCaseFixer::process(const Environment &Env, |
| 126 | const FormatStyle &Style) { |
| 127 | if (!isNumericLiteralCaseFixerNeeded(Style)) |
| 128 | return {}; |
| 129 | |
| 130 | const auto &SourceMgr = Env.getSourceManager(); |
| 131 | AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges()); |
| 132 | |
| 133 | const auto ID = Env.getFileID(); |
| 134 | const auto LangOpts = getFormattingLangOpts(Style); |
| 135 | Lexer Lex(ID, SourceMgr.getBufferOrFake(FID: ID), SourceMgr, LangOpts); |
| 136 | Lex.SetCommentRetentionState(true); |
| 137 | |
| 138 | Token Tok; |
| 139 | tooling::Replacements Result; |
| 140 | |
| 141 | for (bool Skip = false; !Lex.LexFromRawLexer(Result&: Tok);) { |
| 142 | // Skip tokens that are too small to contain a formattable literal. |
| 143 | // Size=2 is the smallest possible literal that could contain formattable |
| 144 | // components, for example "1u". |
| 145 | auto Length = Tok.getLength(); |
| 146 | if (Length < 2) |
| 147 | continue; |
| 148 | |
| 149 | // Service clang-format off/on comments. |
| 150 | auto Location = Tok.getLocation(); |
| 151 | auto Text = StringRef(SourceMgr.getCharacterData(SL: Location), Length); |
| 152 | if (Tok.is(K: tok::comment)) { |
| 153 | if (isClangFormatOff(Comment: Text)) |
| 154 | Skip = true; |
| 155 | else if (isClangFormatOn(Comment: Text)) |
| 156 | Skip = false; |
| 157 | continue; |
| 158 | } |
| 159 | |
| 160 | if (Skip || Tok.isNot(K: tok::numeric_constant) || |
| 161 | !AffectedRangeMgr.affectsCharSourceRange( |
| 162 | Range: CharSourceRange::getCharRange(B: Location, E: Tok.getEndLoc()))) { |
| 163 | continue; |
| 164 | } |
| 165 | |
| 166 | const auto Formatted = format(NumericLiteral: Text, Style); |
| 167 | if (Formatted != Text) { |
| 168 | cantFail(Err: Result.add( |
| 169 | R: tooling::Replacement(SourceMgr, Location, Length, Formatted))); |
| 170 | } |
| 171 | } |
| 172 | |
| 173 | return {Result, 0}; |
| 174 | } |
| 175 | |
| 176 | } // namespace format |
| 177 | } // namespace clang |
| 178 | |