1//===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer
11/// literal separators.
12///
13//===----------------------------------------------------------------------===//
14
15#include "IntegerLiteralSeparatorFixer.h"
16
17namespace clang {
18namespace format {
19
20enum class Base { Binary, Decimal, Hex, Other };
21
22static Base getBase(StringRef IntegerLiteral) {
23 assert(IntegerLiteral.size() > 1);
24
25 if (IntegerLiteral[0] > '0') {
26 assert(IntegerLiteral[0] <= '9');
27 return Base::Decimal;
28 }
29
30 assert(IntegerLiteral[0] == '0');
31
32 switch (IntegerLiteral[1]) {
33 case 'b':
34 case 'B':
35 return Base::Binary;
36 case 'x':
37 case 'X':
38 return Base::Hex;
39 default:
40 return Base::Other;
41 }
42}
43
44std::pair<tooling::Replacements, unsigned>
45IntegerLiteralSeparatorFixer::process(const Environment &Env,
46 const FormatStyle &Style) {
47 switch (Style.Language) {
48 case FormatStyle::LK_CSharp:
49 case FormatStyle::LK_Java:
50 case FormatStyle::LK_JavaScript:
51 Separator = '_';
52 break;
53 case FormatStyle::LK_C:
54 case FormatStyle::LK_Cpp:
55 case FormatStyle::LK_ObjC:
56 if (Style.Standard >= FormatStyle::LS_Cpp14) {
57 Separator = '\'';
58 break;
59 }
60 [[fallthrough]];
61 default:
62 return {};
63 }
64
65 const auto &Option = Style.IntegerLiteralSeparator;
66 const auto Binary = Option.Binary;
67 const auto Decimal = Option.Decimal;
68 const auto Hex = Option.Hex;
69 const bool SkipBinary = Binary == 0;
70 const bool SkipDecimal = Decimal == 0;
71 const bool SkipHex = Hex == 0;
72
73 if (SkipBinary && SkipDecimal && SkipHex)
74 return {};
75
76 auto CalcMinAndMax = [](int DigitsPerGroup, int MinDigitsInsert,
77 int MaxDigitsRemove) {
78 MinDigitsInsert = std::max(a: MinDigitsInsert, b: DigitsPerGroup + 1);
79 if (MinDigitsInsert < 1)
80 MaxDigitsRemove = 0;
81 else if (MaxDigitsRemove < 1 || MaxDigitsRemove >= MinDigitsInsert)
82 MaxDigitsRemove = MinDigitsInsert - 1;
83 return std::pair(MinDigitsInsert, MaxDigitsRemove);
84 };
85
86 const auto [BinaryMinDigitsInsert, BinaryMaxDigitsRemove] = CalcMinAndMax(
87 Binary, Option.BinaryMinDigitsInsert, Option.BinaryMaxDigitsRemove);
88 const auto [DecimalMinDigitsInsert, DecimalMaxDigitsRemove] = CalcMinAndMax(
89 Decimal, Option.DecimalMinDigitsInsert, Option.DecimalMaxDigitsRemove);
90 const auto [HexMinDigitsInsert, HexMaxDigitsRemove] =
91 CalcMinAndMax(Hex, Option.HexMinDigitsInsert, Option.HexMaxDigitsRemove);
92
93 const auto &SourceMgr = Env.getSourceManager();
94 AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges());
95
96 const auto ID = Env.getFileID();
97 const auto LangOpts = getFormattingLangOpts(Style);
98 Lexer Lex(ID, SourceMgr.getBufferOrFake(FID: ID), SourceMgr, LangOpts);
99 Lex.SetCommentRetentionState(true);
100
101 Token Tok;
102 tooling::Replacements Result;
103
104 for (bool Skip = false; !Lex.LexFromRawLexer(Result&: Tok);) {
105 auto Length = Tok.getLength();
106 if (Length < 2)
107 continue;
108 auto Location = Tok.getLocation();
109 auto Text = StringRef(SourceMgr.getCharacterData(SL: Location), Length);
110 if (Tok.is(K: tok::comment)) {
111 if (isClangFormatOff(Comment: Text))
112 Skip = true;
113 else if (isClangFormatOn(Comment: Text))
114 Skip = false;
115 continue;
116 }
117 if (Skip || Tok.isNot(K: tok::numeric_constant) || Text[0] == '.' ||
118 !AffectedRangeMgr.affectsCharSourceRange(
119 Range: CharSourceRange::getCharRange(B: Location, E: Tok.getEndLoc()))) {
120 continue;
121 }
122 const auto B = getBase(IntegerLiteral: Text);
123 const bool IsBase2 = B == Base::Binary;
124 const bool IsBase10 = B == Base::Decimal;
125 const bool IsBase16 = B == Base::Hex;
126 if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) ||
127 (IsBase16 && SkipHex) || B == Base::Other) {
128 continue;
129 }
130 if (Style.isCpp()) {
131 // Hex alpha digits a-f/A-F must be at the end of the string literal.
132 static constexpr StringRef Suffixes("_himnsuyd");
133 if (const auto Pos =
134 Text.find_first_of(Chars: IsBase16 ? Suffixes.drop_back() : Suffixes);
135 Pos != StringRef::npos) {
136 Text = Text.substr(Start: 0, N: Pos);
137 Length = Pos;
138 }
139 }
140 if ((IsBase10 && Text.find_last_of(Chars: ".eEfFdDmM") != StringRef::npos) ||
141 (IsBase16 && Text.find_last_of(Chars: ".pP") != StringRef::npos)) {
142 continue;
143 }
144 const auto Start = Text[0] == '0' ? 2 : 0;
145 auto End = Text.find_first_of(Chars: "uUlLzZn", From: Start);
146 if (End == StringRef::npos)
147 End = Length;
148 if (Start > 0 || End < Length) {
149 Length = End - Start;
150 Text = Text.substr(Start, N: Length);
151 }
152 auto DigitsPerGroup = Decimal;
153 auto MinDigitsInsert = DecimalMinDigitsInsert;
154 auto MaxDigitsRemove = DecimalMaxDigitsRemove;
155 if (IsBase2) {
156 DigitsPerGroup = Binary;
157 MinDigitsInsert = BinaryMinDigitsInsert;
158 MaxDigitsRemove = BinaryMaxDigitsRemove;
159 } else if (IsBase16) {
160 DigitsPerGroup = Hex;
161 MinDigitsInsert = HexMinDigitsInsert;
162 MaxDigitsRemove = HexMaxDigitsRemove;
163 }
164 const auto SeparatorCount = Text.count(C: Separator);
165 const int DigitCount = Length - SeparatorCount;
166 if (DigitCount > MaxDigitsRemove && DigitCount < MinDigitsInsert)
167 continue;
168 const bool RemoveSeparator =
169 DigitsPerGroup < 0 || DigitCount <= MaxDigitsRemove;
170 if (RemoveSeparator && SeparatorCount == 0)
171 continue;
172 if (!RemoveSeparator && SeparatorCount > 0 &&
173 checkSeparator(IntegerLiteral: Text, DigitsPerGroup)) {
174 continue;
175 }
176 const auto &Formatted =
177 format(IntegerLiteral: Text, DigitsPerGroup, DigitCount, RemoveSeparator);
178 assert(Formatted != Text);
179 if (Start > 0)
180 Location = Location.getLocWithOffset(Offset: Start);
181 cantFail(Err: Result.add(
182 R: tooling::Replacement(SourceMgr, Location, Length, Formatted)));
183 }
184
185 return {Result, 0};
186}
187
188bool IntegerLiteralSeparatorFixer::checkSeparator(StringRef IntegerLiteral,
189 int DigitsPerGroup) const {
190 assert(DigitsPerGroup > 0);
191
192 int I = 0;
193 for (auto C : llvm::reverse(C&: IntegerLiteral)) {
194 if (C == Separator) {
195 if (I < DigitsPerGroup)
196 return false;
197 I = 0;
198 } else {
199 if (I == DigitsPerGroup)
200 return false;
201 ++I;
202 }
203 }
204
205 return true;
206}
207
208std::string IntegerLiteralSeparatorFixer::format(StringRef IntegerLiteral,
209 int DigitsPerGroup,
210 int DigitCount,
211 bool RemoveSeparator) const {
212 assert(DigitsPerGroup != 0);
213
214 std::string Formatted;
215
216 if (RemoveSeparator) {
217 for (auto C : IntegerLiteral)
218 if (C != Separator)
219 Formatted.push_back(c: C);
220 return Formatted;
221 }
222
223 int Remainder = DigitCount % DigitsPerGroup;
224
225 int I = 0;
226 for (auto C : IntegerLiteral) {
227 if (C == Separator)
228 continue;
229 if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) {
230 Formatted.push_back(c: Separator);
231 I = 0;
232 Remainder = 0;
233 }
234 Formatted.push_back(c: C);
235 ++I;
236 }
237
238 return Formatted;
239}
240
241} // namespace format
242} // namespace clang
243