| 1 | //===- FormatVariadic.cpp - Format string parsing and analysis ----*-C++-*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | //===----------------------------------------------------------------------===// |
| 7 | |
| 8 | #include "llvm/Support/FormatVariadic.h" |
| 9 | #include <cassert> |
| 10 | #include <optional> |
| 11 | |
| 12 | using namespace llvm; |
| 13 | |
| 14 | static std::optional<AlignStyle> translateLocChar(char C) { |
| 15 | switch (C) { |
| 16 | case '-': |
| 17 | return AlignStyle::Left; |
| 18 | case '=': |
| 19 | return AlignStyle::Center; |
| 20 | case '+': |
| 21 | return AlignStyle::Right; |
| 22 | default: |
| 23 | return std::nullopt; |
| 24 | } |
| 25 | LLVM_BUILTIN_UNREACHABLE; |
| 26 | } |
| 27 | |
| 28 | static bool consumeFieldLayout(StringRef &Spec, AlignStyle &Where, |
| 29 | unsigned &Align, char &Pad) { |
| 30 | Where = AlignStyle::Right; |
| 31 | Align = 0; |
| 32 | Pad = ' '; |
| 33 | if (Spec.empty()) |
| 34 | return true; |
| 35 | |
| 36 | if (Spec.size() > 1) { |
| 37 | // A maximum of 2 characters at the beginning can be used for something |
| 38 | // other than the width. |
| 39 | // If Spec[1] is a loc char, then Spec[0] is a pad char and Spec[2:...] |
| 40 | // contains the width. |
| 41 | // Otherwise, if Spec[0] is a loc char, then Spec[1:...] contains the width. |
| 42 | // Otherwise, Spec[0:...] contains the width. |
| 43 | if (auto Loc = translateLocChar(C: Spec[1])) { |
| 44 | Pad = Spec[0]; |
| 45 | Where = *Loc; |
| 46 | Spec = Spec.drop_front(N: 2); |
| 47 | } else if (auto Loc = translateLocChar(C: Spec[0])) { |
| 48 | Where = *Loc; |
| 49 | Spec = Spec.drop_front(N: 1); |
| 50 | } |
| 51 | } |
| 52 | |
| 53 | bool Failed = Spec.consumeInteger(Radix: 0, Result&: Align); |
| 54 | return !Failed; |
| 55 | } |
| 56 | |
| 57 | static std::optional<ReplacementItem> parseReplacementItem(StringRef Spec) { |
| 58 | StringRef RepString = Spec.trim(Chars: "{}" ); |
| 59 | |
| 60 | // If the replacement sequence does not start with a non-negative integer, |
| 61 | // this is an error. |
| 62 | char Pad = ' '; |
| 63 | unsigned Align = 0; |
| 64 | AlignStyle Where = AlignStyle::Right; |
| 65 | StringRef Options; |
| 66 | unsigned Index = ~0U; |
| 67 | RepString = RepString.ltrim(); |
| 68 | |
| 69 | // If index is not specified, keep it ~0U to indicate unresolved index. |
| 70 | RepString.consumeInteger(Radix: 0, Result&: Index); |
| 71 | |
| 72 | if (RepString.consume_front(Prefix: "," )) { |
| 73 | if (!consumeFieldLayout(Spec&: RepString, Where, Align, Pad)) { |
| 74 | assert(false && "Invalid replacement field layout specification!" ); |
| 75 | return std::nullopt; |
| 76 | } |
| 77 | } |
| 78 | RepString = RepString.ltrim(); |
| 79 | if (RepString.consume_front(Prefix: ":" )) { |
| 80 | Options = RepString; |
| 81 | RepString = StringRef(); |
| 82 | } |
| 83 | RepString = RepString.trim(); |
| 84 | if (!RepString.empty()) { |
| 85 | assert(0 && "Unexpected characters found in replacement string!" ); |
| 86 | return std::nullopt; |
| 87 | } |
| 88 | |
| 89 | return ReplacementItem(Spec, Index, Align, Where, Pad, Options); |
| 90 | } |
| 91 | |
| 92 | static std::pair<std::optional<ReplacementItem>, StringRef> |
| 93 | splitLiteralAndReplacement(StringRef Fmt) { |
| 94 | assert(!Fmt.empty()); |
| 95 | // Everything up until the first brace is a literal. |
| 96 | if (Fmt.front() != '{') { |
| 97 | size_t BO = Fmt.find_first_of(C: '{'); |
| 98 | return {ReplacementItem{Fmt.substr(Start: 0, N: BO)}, Fmt.substr(Start: BO)}; |
| 99 | } |
| 100 | |
| 101 | StringRef Braces = Fmt.take_while(F: [](char C) { return C == '{'; }); |
| 102 | // If there is more than one brace, then some of them are escaped. Treat |
| 103 | // these as replacements. |
| 104 | if (Braces.size() > 1) { |
| 105 | size_t NumEscapedBraces = Braces.size() / 2; |
| 106 | StringRef Middle = Fmt.take_front(N: NumEscapedBraces); |
| 107 | StringRef Right = Fmt.drop_front(N: NumEscapedBraces * 2); |
| 108 | return {ReplacementItem(Middle), Right}; |
| 109 | } |
| 110 | // An unterminated open brace is undefined. Assert to indicate that this is |
| 111 | // undefined and that we consider it an error. When asserts are disabled, |
| 112 | // build a replacement item with an error message. |
| 113 | size_t BC = Fmt.find_first_of(C: '}'); |
| 114 | if (BC == StringRef::npos) { |
| 115 | assert(false && |
| 116 | "Unterminated brace sequence. Escape with {{ for a literal brace." ); |
| 117 | return {ReplacementItem("Unterminated brace sequence. Escape with {{ for a " |
| 118 | "literal brace." ), |
| 119 | StringRef()}; |
| 120 | } |
| 121 | |
| 122 | // Even if there is a closing brace, if there is another open brace before |
| 123 | // this closing brace, treat this portion as literal, and try again with the |
| 124 | // next one. |
| 125 | size_t BO2 = Fmt.find_first_of(C: '{', From: 1); |
| 126 | if (BO2 < BC) |
| 127 | return {ReplacementItem(Fmt.substr(Start: 0, N: BO2)), Fmt.substr(Start: BO2)}; |
| 128 | |
| 129 | StringRef Spec = Fmt.slice(Start: 1, End: BC); |
| 130 | StringRef Right = Fmt.substr(Start: BC + 1); |
| 131 | |
| 132 | return {parseReplacementItem(Spec), Right}; |
| 133 | } |
| 134 | |
| 135 | #ifndef NDEBUG |
| 136 | #define ENABLE_VALIDATION 1 |
| 137 | #else |
| 138 | #define ENABLE_VALIDATION 0 // Conveniently enable validation in release mode. |
| 139 | #endif |
| 140 | |
| 141 | SmallVector<ReplacementItem, 2> |
| 142 | formatv_object_base::parseFormatString(StringRef Fmt, size_t NumArgs, |
| 143 | bool Validate) { |
| 144 | SmallVector<ReplacementItem, 2> Replacements; |
| 145 | unsigned NextAutomaticIndex = 0; |
| 146 | |
| 147 | #if ENABLE_VALIDATION |
| 148 | const StringRef SavedFmtStr = Fmt; |
| 149 | unsigned NumExpectedArgs = 0; |
| 150 | bool HasExplicitIndex = false; |
| 151 | #endif |
| 152 | |
| 153 | while (!Fmt.empty()) { |
| 154 | std::optional<ReplacementItem> I; |
| 155 | std::tie(args&: I, args&: Fmt) = splitLiteralAndReplacement(Fmt); |
| 156 | if (!I) |
| 157 | continue; |
| 158 | if (I->Type == ReplacementType::Format) { |
| 159 | if (I->Index == ~0U) |
| 160 | I->Index = NextAutomaticIndex++; |
| 161 | #if ENABLE_VALIDATION |
| 162 | else |
| 163 | HasExplicitIndex = true; |
| 164 | NumExpectedArgs = std::max(NumExpectedArgs, I->Index + 1); |
| 165 | #endif |
| 166 | } |
| 167 | |
| 168 | Replacements.emplace_back(Args&: *I); |
| 169 | } |
| 170 | |
| 171 | #if ENABLE_VALIDATION |
| 172 | if (!Validate) |
| 173 | return Replacements; |
| 174 | |
| 175 | // Perform additional validation. Verify that the number of arguments matches |
| 176 | // the number of replacement indices and that there are no holes in the |
| 177 | // replacement indices. |
| 178 | |
| 179 | // When validation fails, return an array of replacement items that |
| 180 | // will print an error message as the outout of this formatv() (used when |
| 181 | // validation is enabled in release mode). |
| 182 | auto getErrorReplacements = [SavedFmtStr](StringLiteral ErrorMsg) { |
| 183 | return SmallVector<ReplacementItem, 2>{ |
| 184 | ReplacementItem("Invalid formatv() call: " ), ReplacementItem(ErrorMsg), |
| 185 | ReplacementItem(" for format string: " ), ReplacementItem(SavedFmtStr)}; |
| 186 | }; |
| 187 | |
| 188 | if (NumExpectedArgs != NumArgs) { |
| 189 | errs() << formatv("Expected {} Args, but got {} for format string '{}'\n" , |
| 190 | NumExpectedArgs, NumArgs, SavedFmtStr); |
| 191 | assert(0 && "Invalid formatv() call" ); |
| 192 | return getErrorReplacements("Unexpected number of arguments" ); |
| 193 | } |
| 194 | |
| 195 | // Find the number of unique indices seen. All replacement indices |
| 196 | // are < NumExpectedArgs. |
| 197 | SmallVector<bool> Indices(NumExpectedArgs); |
| 198 | unsigned Count = 0; |
| 199 | for (const ReplacementItem &I : Replacements) { |
| 200 | if (I.Type != ReplacementType::Format || Indices[I.Index]) |
| 201 | continue; |
| 202 | Indices[I.Index] = true; |
| 203 | ++Count; |
| 204 | } |
| 205 | |
| 206 | if (Count != NumExpectedArgs) { |
| 207 | errs() << formatv( |
| 208 | "Replacement field indices cannot have holes for format string '{}'\n" , |
| 209 | SavedFmtStr); |
| 210 | assert(0 && "Invalid format string" ); |
| 211 | return getErrorReplacements("Replacement indices have holes" ); |
| 212 | } |
| 213 | |
| 214 | // Fail validation if we see both automatic index and explicit index. |
| 215 | if (NextAutomaticIndex != 0 && HasExplicitIndex) { |
| 216 | errs() << formatv( |
| 217 | "Cannot mix automatic and explicit indices for format string '{}'\n" , |
| 218 | SavedFmtStr); |
| 219 | assert(0 && "Invalid format string" ); |
| 220 | return getErrorReplacements("Cannot mix automatic and explicit indices" ); |
| 221 | } |
| 222 | #endif // ENABLE_VALIDATION |
| 223 | return Replacements; |
| 224 | } |
| 225 | |
| 226 | void support::detail::format_adapter::anchor() {} |
| 227 | |