| 1 | //===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer |
| 11 | /// literal separators. |
| 12 | /// |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "IntegerLiteralSeparatorFixer.h" |
| 16 | |
| 17 | namespace clang { |
| 18 | namespace format { |
| 19 | |
| 20 | enum class Base { Binary, Decimal, Hex, Other }; |
| 21 | |
| 22 | static Base getBase(const StringRef IntegerLiteral) { |
| 23 | assert(IntegerLiteral.size() > 1); |
| 24 | |
| 25 | if (IntegerLiteral[0] > '0') { |
| 26 | assert(IntegerLiteral[0] <= '9'); |
| 27 | return Base::Decimal; |
| 28 | } |
| 29 | |
| 30 | assert(IntegerLiteral[0] == '0'); |
| 31 | |
| 32 | switch (IntegerLiteral[1]) { |
| 33 | case 'b': |
| 34 | case 'B': |
| 35 | return Base::Binary; |
| 36 | case 'x': |
| 37 | case 'X': |
| 38 | return Base::Hex; |
| 39 | default: |
| 40 | return Base::Other; |
| 41 | } |
| 42 | } |
| 43 | |
| 44 | std::pair<tooling::Replacements, unsigned> |
| 45 | IntegerLiteralSeparatorFixer::process(const Environment &Env, |
| 46 | const FormatStyle &Style) { |
| 47 | switch (Style.Language) { |
| 48 | case FormatStyle::LK_Cpp: |
| 49 | case FormatStyle::LK_ObjC: |
| 50 | Separator = '\''; |
| 51 | break; |
| 52 | case FormatStyle::LK_CSharp: |
| 53 | case FormatStyle::LK_Java: |
| 54 | case FormatStyle::LK_JavaScript: |
| 55 | Separator = '_'; |
| 56 | break; |
| 57 | default: |
| 58 | return {}; |
| 59 | } |
| 60 | |
| 61 | const auto &Option = Style.IntegerLiteralSeparator; |
| 62 | const auto Binary = Option.Binary; |
| 63 | const auto Decimal = Option.Decimal; |
| 64 | const auto Hex = Option.Hex; |
| 65 | const bool SkipBinary = Binary == 0; |
| 66 | const bool SkipDecimal = Decimal == 0; |
| 67 | const bool SkipHex = Hex == 0; |
| 68 | |
| 69 | if (SkipBinary && SkipDecimal && SkipHex) |
| 70 | return {}; |
| 71 | |
| 72 | const auto BinaryMinDigits = |
| 73 | std::max(a: (int)Option.BinaryMinDigits, b: Binary + 1); |
| 74 | const auto DecimalMinDigits = |
| 75 | std::max(a: (int)Option.DecimalMinDigits, b: Decimal + 1); |
| 76 | const auto HexMinDigits = std::max(a: (int)Option.HexMinDigits, b: Hex + 1); |
| 77 | |
| 78 | const auto &SourceMgr = Env.getSourceManager(); |
| 79 | AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges()); |
| 80 | |
| 81 | const auto ID = Env.getFileID(); |
| 82 | const auto LangOpts = getFormattingLangOpts(Style); |
| 83 | Lexer Lex(ID, SourceMgr.getBufferOrFake(FID: ID), SourceMgr, LangOpts); |
| 84 | Lex.SetCommentRetentionState(true); |
| 85 | |
| 86 | Token Tok; |
| 87 | tooling::Replacements Result; |
| 88 | |
| 89 | for (bool Skip = false; !Lex.LexFromRawLexer(Result&: Tok);) { |
| 90 | auto Length = Tok.getLength(); |
| 91 | if (Length < 2) |
| 92 | continue; |
| 93 | auto Location = Tok.getLocation(); |
| 94 | auto Text = StringRef(SourceMgr.getCharacterData(SL: Location), Length); |
| 95 | if (Tok.is(K: tok::comment)) { |
| 96 | if (isClangFormatOff(Comment: Text)) |
| 97 | Skip = true; |
| 98 | else if (isClangFormatOn(Comment: Text)) |
| 99 | Skip = false; |
| 100 | continue; |
| 101 | } |
| 102 | if (Skip || Tok.isNot(K: tok::numeric_constant) || Text[0] == '.' || |
| 103 | !AffectedRangeMgr.affectsCharSourceRange( |
| 104 | Range: CharSourceRange::getCharRange(B: Location, E: Tok.getEndLoc()))) { |
| 105 | continue; |
| 106 | } |
| 107 | const auto B = getBase(IntegerLiteral: Text); |
| 108 | const bool IsBase2 = B == Base::Binary; |
| 109 | const bool IsBase10 = B == Base::Decimal; |
| 110 | const bool IsBase16 = B == Base::Hex; |
| 111 | if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) || |
| 112 | (IsBase16 && SkipHex) || B == Base::Other) { |
| 113 | continue; |
| 114 | } |
| 115 | if (Style.isCpp()) { |
| 116 | // Hex alpha digits a-f/A-F must be at the end of the string literal. |
| 117 | StringRef Suffixes = "_himnsuyd" ; |
| 118 | if (const auto Pos = |
| 119 | Text.find_first_of(Chars: IsBase16 ? Suffixes.drop_back() : Suffixes); |
| 120 | Pos != StringRef::npos) { |
| 121 | Text = Text.substr(Start: 0, N: Pos); |
| 122 | Length = Pos; |
| 123 | } |
| 124 | } |
| 125 | if ((IsBase10 && Text.find_last_of(Chars: ".eEfFdDmM" ) != StringRef::npos) || |
| 126 | (IsBase16 && Text.find_last_of(Chars: ".pP" ) != StringRef::npos)) { |
| 127 | continue; |
| 128 | } |
| 129 | const auto Start = Text[0] == '0' ? 2 : 0; |
| 130 | auto End = Text.find_first_of(Chars: "uUlLzZn" , From: Start); |
| 131 | if (End == StringRef::npos) |
| 132 | End = Length; |
| 133 | if (Start > 0 || End < Length) { |
| 134 | Length = End - Start; |
| 135 | Text = Text.substr(Start, N: Length); |
| 136 | } |
| 137 | auto DigitsPerGroup = Decimal; |
| 138 | auto MinDigits = DecimalMinDigits; |
| 139 | if (IsBase2) { |
| 140 | DigitsPerGroup = Binary; |
| 141 | MinDigits = BinaryMinDigits; |
| 142 | } else if (IsBase16) { |
| 143 | DigitsPerGroup = Hex; |
| 144 | MinDigits = HexMinDigits; |
| 145 | } |
| 146 | const auto SeparatorCount = Text.count(C: Separator); |
| 147 | const int DigitCount = Length - SeparatorCount; |
| 148 | const bool RemoveSeparator = DigitsPerGroup < 0 || DigitCount < MinDigits; |
| 149 | if (RemoveSeparator && SeparatorCount == 0) |
| 150 | continue; |
| 151 | if (!RemoveSeparator && SeparatorCount > 0 && |
| 152 | checkSeparator(IntegerLiteral: Text, DigitsPerGroup)) { |
| 153 | continue; |
| 154 | } |
| 155 | const auto &Formatted = |
| 156 | format(IntegerLiteral: Text, DigitsPerGroup, DigitCount, RemoveSeparator); |
| 157 | assert(Formatted != Text); |
| 158 | if (Start > 0) |
| 159 | Location = Location.getLocWithOffset(Offset: Start); |
| 160 | cantFail(Err: Result.add( |
| 161 | R: tooling::Replacement(SourceMgr, Location, Length, Formatted))); |
| 162 | } |
| 163 | |
| 164 | return {Result, 0}; |
| 165 | } |
| 166 | |
| 167 | bool IntegerLiteralSeparatorFixer::checkSeparator( |
| 168 | const StringRef IntegerLiteral, int DigitsPerGroup) const { |
| 169 | assert(DigitsPerGroup > 0); |
| 170 | |
| 171 | int I = 0; |
| 172 | for (auto C : llvm::reverse(C: IntegerLiteral)) { |
| 173 | if (C == Separator) { |
| 174 | if (I < DigitsPerGroup) |
| 175 | return false; |
| 176 | I = 0; |
| 177 | } else { |
| 178 | if (I == DigitsPerGroup) |
| 179 | return false; |
| 180 | ++I; |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | return true; |
| 185 | } |
| 186 | |
| 187 | std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral, |
| 188 | int DigitsPerGroup, |
| 189 | int DigitCount, |
| 190 | bool RemoveSeparator) const { |
| 191 | assert(DigitsPerGroup != 0); |
| 192 | |
| 193 | std::string Formatted; |
| 194 | |
| 195 | if (RemoveSeparator) { |
| 196 | for (auto C : IntegerLiteral) |
| 197 | if (C != Separator) |
| 198 | Formatted.push_back(c: C); |
| 199 | return Formatted; |
| 200 | } |
| 201 | |
| 202 | int Remainder = DigitCount % DigitsPerGroup; |
| 203 | |
| 204 | int I = 0; |
| 205 | for (auto C : IntegerLiteral) { |
| 206 | if (C == Separator) |
| 207 | continue; |
| 208 | if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) { |
| 209 | Formatted.push_back(c: Separator); |
| 210 | I = 0; |
| 211 | Remainder = 0; |
| 212 | } |
| 213 | Formatted.push_back(c: C); |
| 214 | ++I; |
| 215 | } |
| 216 | |
| 217 | return Formatted; |
| 218 | } |
| 219 | |
| 220 | } // namespace format |
| 221 | } // namespace clang |
| 222 | |