| 1 | //===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer |
| 11 | /// literal separators. |
| 12 | /// |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "IntegerLiteralSeparatorFixer.h" |
| 16 | |
| 17 | namespace clang { |
| 18 | namespace format { |
| 19 | |
| 20 | enum class Base { Binary, Decimal, Hex, Other }; |
| 21 | |
| 22 | static Base getBase(StringRef IntegerLiteral) { |
| 23 | assert(IntegerLiteral.size() > 1); |
| 24 | |
| 25 | if (IntegerLiteral[0] > '0') { |
| 26 | assert(IntegerLiteral[0] <= '9'); |
| 27 | return Base::Decimal; |
| 28 | } |
| 29 | |
| 30 | assert(IntegerLiteral[0] == '0'); |
| 31 | |
| 32 | switch (IntegerLiteral[1]) { |
| 33 | case 'b': |
| 34 | case 'B': |
| 35 | return Base::Binary; |
| 36 | case 'x': |
| 37 | case 'X': |
| 38 | return Base::Hex; |
| 39 | default: |
| 40 | return Base::Other; |
| 41 | } |
| 42 | } |
| 43 | |
| 44 | std::pair<tooling::Replacements, unsigned> |
| 45 | IntegerLiteralSeparatorFixer::process(const Environment &Env, |
| 46 | const FormatStyle &Style) { |
| 47 | switch (Style.Language) { |
| 48 | case FormatStyle::LK_CSharp: |
| 49 | case FormatStyle::LK_Java: |
| 50 | case FormatStyle::LK_JavaScript: |
| 51 | Separator = '_'; |
| 52 | break; |
| 53 | case FormatStyle::LK_Cpp: |
| 54 | case FormatStyle::LK_ObjC: |
| 55 | if (Style.Standard >= FormatStyle::LS_Cpp14) { |
| 56 | Separator = '\''; |
| 57 | break; |
| 58 | } |
| 59 | [[fallthrough]]; |
| 60 | default: |
| 61 | return {}; |
| 62 | } |
| 63 | |
| 64 | const auto &Option = Style.IntegerLiteralSeparator; |
| 65 | const auto Binary = Option.Binary; |
| 66 | const auto Decimal = Option.Decimal; |
| 67 | const auto Hex = Option.Hex; |
| 68 | const bool SkipBinary = Binary == 0; |
| 69 | const bool SkipDecimal = Decimal == 0; |
| 70 | const bool SkipHex = Hex == 0; |
| 71 | |
| 72 | if (SkipBinary && SkipDecimal && SkipHex) |
| 73 | return {}; |
| 74 | |
| 75 | auto CalcMinAndMax = [](int DigitsPerGroup, int MinDigitsInsert, |
| 76 | int MaxDigitsRemove) { |
| 77 | MinDigitsInsert = std::max(a: MinDigitsInsert, b: DigitsPerGroup + 1); |
| 78 | if (MinDigitsInsert < 1) |
| 79 | MaxDigitsRemove = 0; |
| 80 | else if (MaxDigitsRemove < 1 || MaxDigitsRemove >= MinDigitsInsert) |
| 81 | MaxDigitsRemove = MinDigitsInsert - 1; |
| 82 | return std::pair(MinDigitsInsert, MaxDigitsRemove); |
| 83 | }; |
| 84 | |
| 85 | const auto [BinaryMinDigitsInsert, BinaryMaxDigitsRemove] = CalcMinAndMax( |
| 86 | Binary, Option.BinaryMinDigitsInsert, Option.BinaryMaxDigitsRemove); |
| 87 | const auto [DecimalMinDigitsInsert, DecimalMaxDigitsRemove] = CalcMinAndMax( |
| 88 | Decimal, Option.DecimalMinDigitsInsert, Option.DecimalMaxDigitsRemove); |
| 89 | const auto [HexMinDigitsInsert, HexMaxDigitsRemove] = |
| 90 | CalcMinAndMax(Hex, Option.HexMinDigitsInsert, Option.HexMaxDigitsRemove); |
| 91 | |
| 92 | const auto &SourceMgr = Env.getSourceManager(); |
| 93 | AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges()); |
| 94 | |
| 95 | const auto ID = Env.getFileID(); |
| 96 | const auto LangOpts = getFormattingLangOpts(Style); |
| 97 | Lexer Lex(ID, SourceMgr.getBufferOrFake(FID: ID), SourceMgr, LangOpts); |
| 98 | Lex.SetCommentRetentionState(true); |
| 99 | |
| 100 | Token Tok; |
| 101 | tooling::Replacements Result; |
| 102 | |
| 103 | for (bool Skip = false; !Lex.LexFromRawLexer(Result&: Tok);) { |
| 104 | auto Length = Tok.getLength(); |
| 105 | if (Length < 2) |
| 106 | continue; |
| 107 | auto Location = Tok.getLocation(); |
| 108 | auto Text = StringRef(SourceMgr.getCharacterData(SL: Location), Length); |
| 109 | if (Tok.is(K: tok::comment)) { |
| 110 | if (isClangFormatOff(Comment: Text)) |
| 111 | Skip = true; |
| 112 | else if (isClangFormatOn(Comment: Text)) |
| 113 | Skip = false; |
| 114 | continue; |
| 115 | } |
| 116 | if (Skip || Tok.isNot(K: tok::numeric_constant) || Text[0] == '.' || |
| 117 | !AffectedRangeMgr.affectsCharSourceRange( |
| 118 | Range: CharSourceRange::getCharRange(B: Location, E: Tok.getEndLoc()))) { |
| 119 | continue; |
| 120 | } |
| 121 | const auto B = getBase(IntegerLiteral: Text); |
| 122 | const bool IsBase2 = B == Base::Binary; |
| 123 | const bool IsBase10 = B == Base::Decimal; |
| 124 | const bool IsBase16 = B == Base::Hex; |
| 125 | if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) || |
| 126 | (IsBase16 && SkipHex) || B == Base::Other) { |
| 127 | continue; |
| 128 | } |
| 129 | if (Style.isCpp()) { |
| 130 | // Hex alpha digits a-f/A-F must be at the end of the string literal. |
| 131 | static constexpr StringRef Suffixes("_himnsuyd" ); |
| 132 | if (const auto Pos = |
| 133 | Text.find_first_of(Chars: IsBase16 ? Suffixes.drop_back() : Suffixes); |
| 134 | Pos != StringRef::npos) { |
| 135 | Text = Text.substr(Start: 0, N: Pos); |
| 136 | Length = Pos; |
| 137 | } |
| 138 | } |
| 139 | if ((IsBase10 && Text.find_last_of(Chars: ".eEfFdDmM" ) != StringRef::npos) || |
| 140 | (IsBase16 && Text.find_last_of(Chars: ".pP" ) != StringRef::npos)) { |
| 141 | continue; |
| 142 | } |
| 143 | const auto Start = Text[0] == '0' ? 2 : 0; |
| 144 | auto End = Text.find_first_of(Chars: "uUlLzZn" , From: Start); |
| 145 | if (End == StringRef::npos) |
| 146 | End = Length; |
| 147 | if (Start > 0 || End < Length) { |
| 148 | Length = End - Start; |
| 149 | Text = Text.substr(Start, N: Length); |
| 150 | } |
| 151 | auto DigitsPerGroup = Decimal; |
| 152 | auto MinDigitsInsert = DecimalMinDigitsInsert; |
| 153 | auto MaxDigitsRemove = DecimalMaxDigitsRemove; |
| 154 | if (IsBase2) { |
| 155 | DigitsPerGroup = Binary; |
| 156 | MinDigitsInsert = BinaryMinDigitsInsert; |
| 157 | MaxDigitsRemove = BinaryMaxDigitsRemove; |
| 158 | } else if (IsBase16) { |
| 159 | DigitsPerGroup = Hex; |
| 160 | MinDigitsInsert = HexMinDigitsInsert; |
| 161 | MaxDigitsRemove = HexMaxDigitsRemove; |
| 162 | } |
| 163 | const auto SeparatorCount = Text.count(C: Separator); |
| 164 | const int DigitCount = Length - SeparatorCount; |
| 165 | if (DigitCount > MaxDigitsRemove && DigitCount < MinDigitsInsert) |
| 166 | continue; |
| 167 | const bool RemoveSeparator = |
| 168 | DigitsPerGroup < 0 || DigitCount <= MaxDigitsRemove; |
| 169 | if (RemoveSeparator && SeparatorCount == 0) |
| 170 | continue; |
| 171 | if (!RemoveSeparator && SeparatorCount > 0 && |
| 172 | checkSeparator(IntegerLiteral: Text, DigitsPerGroup)) { |
| 173 | continue; |
| 174 | } |
| 175 | const auto &Formatted = |
| 176 | format(IntegerLiteral: Text, DigitsPerGroup, DigitCount, RemoveSeparator); |
| 177 | assert(Formatted != Text); |
| 178 | if (Start > 0) |
| 179 | Location = Location.getLocWithOffset(Offset: Start); |
| 180 | cantFail(Err: Result.add( |
| 181 | R: tooling::Replacement(SourceMgr, Location, Length, Formatted))); |
| 182 | } |
| 183 | |
| 184 | return {Result, 0}; |
| 185 | } |
| 186 | |
| 187 | bool IntegerLiteralSeparatorFixer::checkSeparator(StringRef IntegerLiteral, |
| 188 | int DigitsPerGroup) const { |
| 189 | assert(DigitsPerGroup > 0); |
| 190 | |
| 191 | int I = 0; |
| 192 | for (auto C : llvm::reverse(C&: IntegerLiteral)) { |
| 193 | if (C == Separator) { |
| 194 | if (I < DigitsPerGroup) |
| 195 | return false; |
| 196 | I = 0; |
| 197 | } else { |
| 198 | if (I == DigitsPerGroup) |
| 199 | return false; |
| 200 | ++I; |
| 201 | } |
| 202 | } |
| 203 | |
| 204 | return true; |
| 205 | } |
| 206 | |
| 207 | std::string IntegerLiteralSeparatorFixer::format(StringRef IntegerLiteral, |
| 208 | int DigitsPerGroup, |
| 209 | int DigitCount, |
| 210 | bool RemoveSeparator) const { |
| 211 | assert(DigitsPerGroup != 0); |
| 212 | |
| 213 | std::string Formatted; |
| 214 | |
| 215 | if (RemoveSeparator) { |
| 216 | for (auto C : IntegerLiteral) |
| 217 | if (C != Separator) |
| 218 | Formatted.push_back(c: C); |
| 219 | return Formatted; |
| 220 | } |
| 221 | |
| 222 | int Remainder = DigitCount % DigitsPerGroup; |
| 223 | |
| 224 | int I = 0; |
| 225 | for (auto C : IntegerLiteral) { |
| 226 | if (C == Separator) |
| 227 | continue; |
| 228 | if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) { |
| 229 | Formatted.push_back(c: Separator); |
| 230 | I = 0; |
| 231 | Remainder = 0; |
| 232 | } |
| 233 | Formatted.push_back(c: C); |
| 234 | ++I; |
| 235 | } |
| 236 | |
| 237 | return Formatted; |
| 238 | } |
| 239 | |
| 240 | } // namespace format |
| 241 | } // namespace clang |
| 242 | |