1//===- FormatVariadic.cpp - Format string parsing and analysis ----*-C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//===----------------------------------------------------------------------===//
7
8#include "llvm/Support/FormatVariadic.h"
9#include <cassert>
10#include <optional>
11
12using namespace llvm;
13
14static std::optional<AlignStyle> translateLocChar(char C) {
15 switch (C) {
16 case '-':
17 return AlignStyle::Left;
18 case '=':
19 return AlignStyle::Center;
20 case '+':
21 return AlignStyle::Right;
22 default:
23 return std::nullopt;
24 }
25 LLVM_BUILTIN_UNREACHABLE;
26}
27
28static bool consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
29 unsigned &Align, char &Pad) {
30 Where = AlignStyle::Right;
31 Align = 0;
32 Pad = ' ';
33 if (Spec.empty())
34 return true;
35
36 if (Spec.size() > 1) {
37 // A maximum of 2 characters at the beginning can be used for something
38 // other than the width.
39 // If Spec[1] is a loc char, then Spec[0] is a pad char and Spec[2:...]
40 // contains the width.
41 // Otherwise, if Spec[0] is a loc char, then Spec[1:...] contains the width.
42 // Otherwise, Spec[0:...] contains the width.
43 if (auto Loc = translateLocChar(C: Spec[1])) {
44 Pad = Spec[0];
45 Where = *Loc;
46 Spec = Spec.drop_front(N: 2);
47 } else if (auto Loc = translateLocChar(C: Spec[0])) {
48 Where = *Loc;
49 Spec = Spec.drop_front(N: 1);
50 }
51 }
52
53 bool Failed = Spec.consumeInteger(Radix: 0, Result&: Align);
54 return !Failed;
55}
56
57static std::optional<ReplacementItem> parseReplacementItem(StringRef Spec) {
58 StringRef RepString = Spec.trim(Chars: "{}");
59
60 // If the replacement sequence does not start with a non-negative integer,
61 // this is an error.
62 char Pad = ' ';
63 unsigned Align = 0;
64 AlignStyle Where = AlignStyle::Right;
65 StringRef Options;
66 unsigned Index = ~0U;
67 RepString = RepString.ltrim();
68
69 // If index is not specified, keep it ~0U to indicate unresolved index.
70 RepString.consumeInteger(Radix: 0, Result&: Index);
71
72 if (RepString.consume_front(Prefix: ",")) {
73 if (!consumeFieldLayout(Spec&: RepString, Where, Align, Pad)) {
74 assert(false && "Invalid replacement field layout specification!");
75 return std::nullopt;
76 }
77 }
78 RepString = RepString.ltrim();
79 if (RepString.consume_front(Prefix: ":")) {
80 Options = RepString;
81 RepString = StringRef();
82 }
83 RepString = RepString.trim();
84 if (!RepString.empty()) {
85 assert(0 && "Unexpected characters found in replacement string!");
86 return std::nullopt;
87 }
88
89 return ReplacementItem(Spec, Index, Align, Where, Pad, Options);
90}
91
92static std::pair<std::optional<ReplacementItem>, StringRef>
93splitLiteralAndReplacement(StringRef Fmt) {
94 assert(!Fmt.empty());
95 // Everything up until the first brace is a literal.
96 if (Fmt.front() != '{') {
97 size_t BO = Fmt.find_first_of(C: '{');
98 return {ReplacementItem{Fmt.substr(Start: 0, N: BO)}, Fmt.substr(Start: BO)};
99 }
100
101 StringRef Braces = Fmt.take_while(F: [](char C) { return C == '{'; });
102 // If there is more than one brace, then some of them are escaped. Treat
103 // these as replacements.
104 if (Braces.size() > 1) {
105 size_t NumEscapedBraces = Braces.size() / 2;
106 StringRef Middle = Fmt.take_front(N: NumEscapedBraces);
107 StringRef Right = Fmt.drop_front(N: NumEscapedBraces * 2);
108 return {ReplacementItem(Middle), Right};
109 }
110 // An unterminated open brace is undefined. Assert to indicate that this is
111 // undefined and that we consider it an error. When asserts are disabled,
112 // build a replacement item with an error message.
113 size_t BC = Fmt.find_first_of(C: '}');
114 if (BC == StringRef::npos) {
115 assert(false &&
116 "Unterminated brace sequence. Escape with {{ for a literal brace.");
117 return {ReplacementItem("Unterminated brace sequence. Escape with {{ for a "
118 "literal brace."),
119 StringRef()};
120 }
121
122 // Even if there is a closing brace, if there is another open brace before
123 // this closing brace, treat this portion as literal, and try again with the
124 // next one.
125 size_t BO2 = Fmt.find_first_of(C: '{', From: 1);
126 if (BO2 < BC)
127 return {ReplacementItem(Fmt.substr(Start: 0, N: BO2)), Fmt.substr(Start: BO2)};
128
129 StringRef Spec = Fmt.slice(Start: 1, End: BC);
130 StringRef Right = Fmt.substr(Start: BC + 1);
131
132 return {parseReplacementItem(Spec), Right};
133}
134
135#ifndef NDEBUG
136#define ENABLE_VALIDATION 1
137#else
138#define ENABLE_VALIDATION 0 // Conveniently enable validation in release mode.
139#endif
140
141SmallVector<ReplacementItem, 2>
142formatv_object_base::parseFormatString(StringRef Fmt, size_t NumArgs,
143 bool Validate) {
144 SmallVector<ReplacementItem, 2> Replacements;
145 unsigned NextAutomaticIndex = 0;
146
147#if ENABLE_VALIDATION
148 const StringRef SavedFmtStr = Fmt;
149 unsigned NumExpectedArgs = 0;
150 bool HasExplicitIndex = false;
151#endif
152
153 while (!Fmt.empty()) {
154 std::optional<ReplacementItem> I;
155 std::tie(args&: I, args&: Fmt) = splitLiteralAndReplacement(Fmt);
156 if (!I)
157 continue;
158 if (I->Type == ReplacementType::Format) {
159 if (I->Index == ~0U)
160 I->Index = NextAutomaticIndex++;
161#if ENABLE_VALIDATION
162 else
163 HasExplicitIndex = true;
164 NumExpectedArgs = std::max(NumExpectedArgs, I->Index + 1);
165#endif
166 }
167
168 Replacements.emplace_back(Args&: *I);
169 }
170
171#if ENABLE_VALIDATION
172 if (!Validate)
173 return Replacements;
174
175 // Perform additional validation. Verify that the number of arguments matches
176 // the number of replacement indices and that there are no holes in the
177 // replacement indices.
178
179 // When validation fails, return an array of replacement items that
180 // will print an error message as the outout of this formatv() (used when
181 // validation is enabled in release mode).
182 auto getErrorReplacements = [SavedFmtStr](StringLiteral ErrorMsg) {
183 return SmallVector<ReplacementItem, 2>{
184 ReplacementItem("Invalid formatv() call: "), ReplacementItem(ErrorMsg),
185 ReplacementItem(" for format string: "), ReplacementItem(SavedFmtStr)};
186 };
187
188 if (NumExpectedArgs != NumArgs) {
189 errs() << formatv("Expected {} Args, but got {} for format string '{}'\n",
190 NumExpectedArgs, NumArgs, SavedFmtStr);
191 assert(0 && "Invalid formatv() call");
192 return getErrorReplacements("Unexpected number of arguments");
193 }
194
195 // Find the number of unique indices seen. All replacement indices
196 // are < NumExpectedArgs.
197 SmallVector<bool> Indices(NumExpectedArgs);
198 unsigned Count = 0;
199 for (const ReplacementItem &I : Replacements) {
200 if (I.Type != ReplacementType::Format || Indices[I.Index])
201 continue;
202 Indices[I.Index] = true;
203 ++Count;
204 }
205
206 if (Count != NumExpectedArgs) {
207 errs() << formatv(
208 "Replacement field indices cannot have holes for format string '{}'\n",
209 SavedFmtStr);
210 assert(0 && "Invalid format string");
211 return getErrorReplacements("Replacement indices have holes");
212 }
213
214 // Fail validation if we see both automatic index and explicit index.
215 if (NextAutomaticIndex != 0 && HasExplicitIndex) {
216 errs() << formatv(
217 "Cannot mix automatic and explicit indices for format string '{}'\n",
218 SavedFmtStr);
219 assert(0 && "Invalid format string");
220 return getErrorReplacements("Cannot mix automatic and explicit indices");
221 }
222#endif // ENABLE_VALIDATION
223 return Replacements;
224}
225
226void support::detail::format_adapter::anchor() {}
227