1 | //===- FormatVariadic.cpp - Format string parsing and analysis ----*-C++-*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | //===----------------------------------------------------------------------===// |
7 | |
8 | #include "llvm/Support/FormatVariadic.h" |
9 | #include <cassert> |
10 | #include <optional> |
11 | |
12 | using namespace llvm; |
13 | |
14 | static std::optional<AlignStyle> translateLocChar(char C) { |
15 | switch (C) { |
16 | case '-': |
17 | return AlignStyle::Left; |
18 | case '=': |
19 | return AlignStyle::Center; |
20 | case '+': |
21 | return AlignStyle::Right; |
22 | default: |
23 | return std::nullopt; |
24 | } |
25 | LLVM_BUILTIN_UNREACHABLE; |
26 | } |
27 | |
28 | static bool consumeFieldLayout(StringRef &Spec, AlignStyle &Where, |
29 | unsigned &Align, char &Pad) { |
30 | Where = AlignStyle::Right; |
31 | Align = 0; |
32 | Pad = ' '; |
33 | if (Spec.empty()) |
34 | return true; |
35 | |
36 | if (Spec.size() > 1) { |
37 | // A maximum of 2 characters at the beginning can be used for something |
38 | // other than the width. |
39 | // If Spec[1] is a loc char, then Spec[0] is a pad char and Spec[2:...] |
40 | // contains the width. |
41 | // Otherwise, if Spec[0] is a loc char, then Spec[1:...] contains the width. |
42 | // Otherwise, Spec[0:...] contains the width. |
43 | if (auto Loc = translateLocChar(C: Spec[1])) { |
44 | Pad = Spec[0]; |
45 | Where = *Loc; |
46 | Spec = Spec.drop_front(N: 2); |
47 | } else if (auto Loc = translateLocChar(C: Spec[0])) { |
48 | Where = *Loc; |
49 | Spec = Spec.drop_front(N: 1); |
50 | } |
51 | } |
52 | |
53 | bool Failed = Spec.consumeInteger(Radix: 0, Result&: Align); |
54 | return !Failed; |
55 | } |
56 | |
57 | static std::optional<ReplacementItem> parseReplacementItem(StringRef Spec) { |
58 | StringRef RepString = Spec.trim(Chars: "{}" ); |
59 | |
60 | // If the replacement sequence does not start with a non-negative integer, |
61 | // this is an error. |
62 | char Pad = ' '; |
63 | unsigned Align = 0; |
64 | AlignStyle Where = AlignStyle::Right; |
65 | StringRef Options; |
66 | unsigned Index = ~0U; |
67 | RepString = RepString.ltrim(); |
68 | |
69 | // If index is not specified, keep it ~0U to indicate unresolved index. |
70 | RepString.consumeInteger(Radix: 0, Result&: Index); |
71 | |
72 | if (RepString.consume_front(Prefix: "," )) { |
73 | if (!consumeFieldLayout(Spec&: RepString, Where, Align, Pad)) { |
74 | assert(false && "Invalid replacement field layout specification!" ); |
75 | return std::nullopt; |
76 | } |
77 | } |
78 | RepString = RepString.ltrim(); |
79 | if (RepString.consume_front(Prefix: ":" )) { |
80 | Options = RepString; |
81 | RepString = StringRef(); |
82 | } |
83 | RepString = RepString.trim(); |
84 | if (!RepString.empty()) { |
85 | assert(0 && "Unexpected characters found in replacement string!" ); |
86 | return std::nullopt; |
87 | } |
88 | |
89 | return ReplacementItem(Spec, Index, Align, Where, Pad, Options); |
90 | } |
91 | |
92 | static std::pair<std::optional<ReplacementItem>, StringRef> |
93 | splitLiteralAndReplacement(StringRef Fmt) { |
94 | assert(!Fmt.empty()); |
95 | // Everything up until the first brace is a literal. |
96 | if (Fmt.front() != '{') { |
97 | size_t BO = Fmt.find_first_of(C: '{'); |
98 | return {ReplacementItem{Fmt.substr(Start: 0, N: BO)}, Fmt.substr(Start: BO)}; |
99 | } |
100 | |
101 | StringRef Braces = Fmt.take_while(F: [](char C) { return C == '{'; }); |
102 | // If there is more than one brace, then some of them are escaped. Treat |
103 | // these as replacements. |
104 | if (Braces.size() > 1) { |
105 | size_t NumEscapedBraces = Braces.size() / 2; |
106 | StringRef Middle = Fmt.take_front(N: NumEscapedBraces); |
107 | StringRef Right = Fmt.drop_front(N: NumEscapedBraces * 2); |
108 | return {ReplacementItem(Middle), Right}; |
109 | } |
110 | // An unterminated open brace is undefined. Assert to indicate that this is |
111 | // undefined and that we consider it an error. When asserts are disabled, |
112 | // build a replacement item with an error message. |
113 | size_t BC = Fmt.find_first_of(C: '}'); |
114 | if (BC == StringRef::npos) { |
115 | assert(false && |
116 | "Unterminated brace sequence. Escape with {{ for a literal brace." ); |
117 | return {ReplacementItem("Unterminated brace sequence. Escape with {{ for a " |
118 | "literal brace." ), |
119 | StringRef()}; |
120 | } |
121 | |
122 | // Even if there is a closing brace, if there is another open brace before |
123 | // this closing brace, treat this portion as literal, and try again with the |
124 | // next one. |
125 | size_t BO2 = Fmt.find_first_of(C: '{', From: 1); |
126 | if (BO2 < BC) |
127 | return {ReplacementItem(Fmt.substr(Start: 0, N: BO2)), Fmt.substr(Start: BO2)}; |
128 | |
129 | StringRef Spec = Fmt.slice(Start: 1, End: BC); |
130 | StringRef Right = Fmt.substr(Start: BC + 1); |
131 | |
132 | return {parseReplacementItem(Spec), Right}; |
133 | } |
134 | |
135 | #ifndef NDEBUG |
136 | #define ENABLE_VALIDATION 1 |
137 | #else |
138 | #define ENABLE_VALIDATION 0 // Conveniently enable validation in release mode. |
139 | #endif |
140 | |
141 | SmallVector<ReplacementItem, 2> |
142 | formatv_object_base::parseFormatString(StringRef Fmt, size_t NumArgs, |
143 | bool Validate) { |
144 | SmallVector<ReplacementItem, 2> Replacements; |
145 | unsigned NextAutomaticIndex = 0; |
146 | |
147 | #if ENABLE_VALIDATION |
148 | const StringRef SavedFmtStr = Fmt; |
149 | unsigned NumExpectedArgs = 0; |
150 | bool HasExplicitIndex = false; |
151 | #endif |
152 | |
153 | while (!Fmt.empty()) { |
154 | std::optional<ReplacementItem> I; |
155 | std::tie(args&: I, args&: Fmt) = splitLiteralAndReplacement(Fmt); |
156 | if (!I) |
157 | continue; |
158 | if (I->Type == ReplacementType::Format) { |
159 | if (I->Index == ~0U) |
160 | I->Index = NextAutomaticIndex++; |
161 | #if ENABLE_VALIDATION |
162 | else |
163 | HasExplicitIndex = true; |
164 | NumExpectedArgs = std::max(NumExpectedArgs, I->Index + 1); |
165 | #endif |
166 | } |
167 | |
168 | Replacements.emplace_back(Args&: *I); |
169 | } |
170 | |
171 | #if ENABLE_VALIDATION |
172 | if (!Validate) |
173 | return Replacements; |
174 | |
175 | // Perform additional validation. Verify that the number of arguments matches |
176 | // the number of replacement indices and that there are no holes in the |
177 | // replacement indices. |
178 | |
179 | // When validation fails, return an array of replacement items that |
180 | // will print an error message as the outout of this formatv() (used when |
181 | // validation is enabled in release mode). |
182 | auto getErrorReplacements = [SavedFmtStr](StringLiteral ErrorMsg) { |
183 | return SmallVector<ReplacementItem, 2>{ |
184 | ReplacementItem("Invalid formatv() call: " ), ReplacementItem(ErrorMsg), |
185 | ReplacementItem(" for format string: " ), ReplacementItem(SavedFmtStr)}; |
186 | }; |
187 | |
188 | if (NumExpectedArgs != NumArgs) { |
189 | errs() << formatv("Expected {} Args, but got {} for format string '{}'\n" , |
190 | NumExpectedArgs, NumArgs, SavedFmtStr); |
191 | assert(0 && "Invalid formatv() call" ); |
192 | return getErrorReplacements("Unexpected number of arguments" ); |
193 | } |
194 | |
195 | // Find the number of unique indices seen. All replacement indices |
196 | // are < NumExpectedArgs. |
197 | SmallVector<bool> Indices(NumExpectedArgs); |
198 | unsigned Count = 0; |
199 | for (const ReplacementItem &I : Replacements) { |
200 | if (I.Type != ReplacementType::Format || Indices[I.Index]) |
201 | continue; |
202 | Indices[I.Index] = true; |
203 | ++Count; |
204 | } |
205 | |
206 | if (Count != NumExpectedArgs) { |
207 | errs() << formatv( |
208 | "Replacement field indices cannot have holes for format string '{}'\n" , |
209 | SavedFmtStr); |
210 | assert(0 && "Invalid format string" ); |
211 | return getErrorReplacements("Replacement indices have holes" ); |
212 | } |
213 | |
214 | // Fail validation if we see both automatic index and explicit index. |
215 | if (NextAutomaticIndex != 0 && HasExplicitIndex) { |
216 | errs() << formatv( |
217 | "Cannot mix automatic and explicit indices for format string '{}'\n" , |
218 | SavedFmtStr); |
219 | assert(0 && "Invalid format string" ); |
220 | return getErrorReplacements("Cannot mix automatic and explicit indices" ); |
221 | } |
222 | #endif // ENABLE_VALIDATION |
223 | return Replacements; |
224 | } |
225 | |
226 | void support::detail::format_adapter::anchor() {} |
227 | |