| 1 | //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "clang/Tooling/Transformer/Parsing.h" |
| 10 | #include "clang/Basic/CharInfo.h" |
| 11 | #include "clang/Tooling/Transformer/RangeSelector.h" |
| 12 | #include "llvm/ADT/StringMap.h" |
| 13 | #include "llvm/ADT/StringRef.h" |
| 14 | #include "llvm/Support/Error.h" |
| 15 | #include <optional> |
| 16 | #include <string> |
| 17 | #include <utility> |
| 18 | |
| 19 | using namespace clang; |
| 20 | using namespace transformer; |
| 21 | |
| 22 | // FIXME: This implementation is entirely separate from that of the AST |
| 23 | // matchers. Given the similarity of the languages and uses of the two parsers, |
| 24 | // the two should share a common parsing infrastructure, as should other |
| 25 | // Transformer types. We intend to unify this implementation soon to share as |
| 26 | // much as possible with the AST Matchers parsing. |
| 27 | |
| 28 | namespace { |
| 29 | using llvm::Expected; |
| 30 | |
| 31 | template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...); |
| 32 | |
| 33 | struct ParseState { |
| 34 | // The remaining input to be processed. |
| 35 | StringRef Input; |
| 36 | // The original input. Not modified during parsing; only for reference in |
| 37 | // error reporting. |
| 38 | StringRef OriginalInput; |
| 39 | }; |
| 40 | |
| 41 | // Represents an intermediate result returned by a parsing function. Functions |
| 42 | // that don't generate values should use `std::nullopt` |
| 43 | template <typename ResultType> struct ParseProgress { |
| 44 | ParseState State; |
| 45 | // Intermediate result generated by the Parser. |
| 46 | ResultType Value; |
| 47 | }; |
| 48 | |
| 49 | template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>; |
| 50 | template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState); |
| 51 | |
| 52 | class ParseError : public llvm::ErrorInfo<ParseError> { |
| 53 | public: |
| 54 | // Required field for all ErrorInfo derivatives. |
| 55 | static char ID; |
| 56 | |
| 57 | ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt) |
| 58 | : Pos(Pos), ErrorMsg(std::move(ErrorMsg)), |
| 59 | Excerpt(std::move(InputExcerpt)) {} |
| 60 | |
| 61 | void log(llvm::raw_ostream &OS) const override { |
| 62 | OS << "parse error at position (" << Pos << "): " << ErrorMsg |
| 63 | << ": " + Excerpt; |
| 64 | } |
| 65 | |
| 66 | std::error_code convertToErrorCode() const override { |
| 67 | return llvm::inconvertibleErrorCode(); |
| 68 | } |
| 69 | |
| 70 | // Position of the error in the input string. |
| 71 | size_t Pos; |
| 72 | std::string ErrorMsg; |
| 73 | // Excerpt of the input starting at the error position. |
| 74 | std::string Excerpt; |
| 75 | }; |
| 76 | |
| 77 | char ParseError::ID; |
| 78 | } // namespace |
| 79 | |
| 80 | static const llvm::StringMap<RangeSelectorOp<std::string>> & |
| 81 | getUnaryStringSelectors() { |
| 82 | static const llvm::StringMap<RangeSelectorOp<std::string>> M = { |
| 83 | {"name" , name}, |
| 84 | {"node" , node}, |
| 85 | {"statement" , statement}, |
| 86 | {"statements" , statements}, |
| 87 | {"member" , member}, |
| 88 | {"callArgs" , callArgs}, |
| 89 | {"elseBranch" , elseBranch}, |
| 90 | {"initListElements" , initListElements}}; |
| 91 | return M; |
| 92 | } |
| 93 | |
| 94 | static const llvm::StringMap<RangeSelectorOp<RangeSelector>> & |
| 95 | getUnaryRangeSelectors() { |
| 96 | static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = { |
| 97 | {"before" , before}, {"after" , after}, {"expansion" , expansion}}; |
| 98 | return M; |
| 99 | } |
| 100 | |
| 101 | static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> & |
| 102 | getBinaryStringSelectors() { |
| 103 | static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = { |
| 104 | {"encloseNodes" , encloseNodes}}; |
| 105 | return M; |
| 106 | } |
| 107 | |
| 108 | static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> & |
| 109 | getBinaryRangeSelectors() { |
| 110 | static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> |
| 111 | M = {{"enclose" , enclose}, {"between" , between}}; |
| 112 | return M; |
| 113 | } |
| 114 | |
| 115 | template <typename Element> |
| 116 | std::optional<Element> findOptional(const llvm::StringMap<Element> &Map, |
| 117 | llvm::StringRef Key) { |
| 118 | auto it = Map.find(Key); |
| 119 | if (it == Map.end()) |
| 120 | return std::nullopt; |
| 121 | return it->second; |
| 122 | } |
| 123 | |
| 124 | template <typename ResultType> |
| 125 | ParseProgress<ResultType> makeParseProgress(ParseState State, |
| 126 | ResultType Result) { |
| 127 | return ParseProgress<ResultType>{State, std::move(Result)}; |
| 128 | } |
| 129 | |
| 130 | static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) { |
| 131 | size_t Pos = S.OriginalInput.size() - S.Input.size(); |
| 132 | return llvm::make_error<ParseError>(Args&: Pos, Args: std::move(ErrorMsg), |
| 133 | Args: S.OriginalInput.substr(Start: Pos, N: 20).str()); |
| 134 | } |
| 135 | |
| 136 | // Returns a new ParseState that advances \c S by \c N characters. |
| 137 | static ParseState advance(ParseState S, size_t N) { |
| 138 | S.Input = S.Input.drop_front(N); |
| 139 | return S; |
| 140 | } |
| 141 | |
| 142 | static StringRef consumeWhitespace(StringRef S) { |
| 143 | return S.drop_while(F: [](char c) { return isASCII(c) && isWhitespace(c); }); |
| 144 | } |
| 145 | |
| 146 | // Parses a single expected character \c c from \c State, skipping preceding |
| 147 | // whitespace. Error if the expected character isn't found. |
| 148 | static ExpectedProgress<std::nullopt_t> parseChar(char c, ParseState State) { |
| 149 | State.Input = consumeWhitespace(S: State.Input); |
| 150 | if (State.Input.empty() || State.Input.front() != c) |
| 151 | return makeParseError(S: State, |
| 152 | ErrorMsg: ("expected char not found: " + llvm::Twine(c)).str()); |
| 153 | return makeParseProgress(State: advance(S: State, N: 1), Result: std::nullopt); |
| 154 | } |
| 155 | |
| 156 | // Parses an identitifer "token" -- handles preceding whitespace. |
| 157 | static ExpectedProgress<std::string> parseId(ParseState State) { |
| 158 | State.Input = consumeWhitespace(S: State.Input); |
| 159 | auto Id = State.Input.take_while( |
| 160 | F: [](char c) { return isASCII(c) && isAsciiIdentifierContinue(c); }); |
| 161 | if (Id.empty()) |
| 162 | return makeParseError(S: State, ErrorMsg: "failed to parse name" ); |
| 163 | return makeParseProgress(State: advance(S: State, N: Id.size()), Result: Id.str()); |
| 164 | } |
| 165 | |
| 166 | // For consistency with the AST matcher parser and C++ code, node ids are |
| 167 | // written as strings. However, we do not support escaping in the string. |
| 168 | static ExpectedProgress<std::string> parseStringId(ParseState State) { |
| 169 | State.Input = consumeWhitespace(S: State.Input); |
| 170 | if (State.Input.empty()) |
| 171 | return makeParseError(S: State, ErrorMsg: "unexpected end of input" ); |
| 172 | if (!State.Input.consume_front(Prefix: "\"" )) |
| 173 | return makeParseError( |
| 174 | S: State, |
| 175 | ErrorMsg: "expecting string, but encountered other character or end of input" ); |
| 176 | |
| 177 | StringRef Id = State.Input.take_until(F: [](char c) { return c == '"'; }); |
| 178 | if (State.Input.size() == Id.size()) |
| 179 | return makeParseError(S: State, ErrorMsg: "unterminated string" ); |
| 180 | // Advance past the trailing quote as well. |
| 181 | return makeParseProgress(State: advance(S: State, N: Id.size() + 1), Result: Id.str()); |
| 182 | } |
| 183 | |
| 184 | // Parses a single element surrounded by parens. `Op` is applied to the parsed |
| 185 | // result to create the result of this function call. |
| 186 | template <typename T> |
| 187 | ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement, |
| 188 | RangeSelectorOp<T> Op, |
| 189 | ParseState State) { |
| 190 | auto P = parseChar(c: '(', State); |
| 191 | if (!P) |
| 192 | return P.takeError(); |
| 193 | |
| 194 | auto E = ParseElement(P->State); |
| 195 | if (!E) |
| 196 | return E.takeError(); |
| 197 | |
| 198 | P = parseChar(')', E->State); |
| 199 | if (!P) |
| 200 | return P.takeError(); |
| 201 | |
| 202 | return makeParseProgress(P->State, Op(std::move(E->Value))); |
| 203 | } |
| 204 | |
| 205 | // Parses a pair of elements surrounded by parens and separated by comma. `Op` |
| 206 | // is applied to the parsed results to create the result of this function call. |
| 207 | template <typename T> |
| 208 | ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement, |
| 209 | RangeSelectorOp<T, T> Op, |
| 210 | ParseState State) { |
| 211 | auto P = parseChar(c: '(', State); |
| 212 | if (!P) |
| 213 | return P.takeError(); |
| 214 | |
| 215 | auto Left = ParseElement(P->State); |
| 216 | if (!Left) |
| 217 | return Left.takeError(); |
| 218 | |
| 219 | P = parseChar(',', Left->State); |
| 220 | if (!P) |
| 221 | return P.takeError(); |
| 222 | |
| 223 | auto Right = ParseElement(P->State); |
| 224 | if (!Right) |
| 225 | return Right.takeError(); |
| 226 | |
| 227 | P = parseChar(')', Right->State); |
| 228 | if (!P) |
| 229 | return P.takeError(); |
| 230 | |
| 231 | return makeParseProgress(P->State, |
| 232 | Op(std::move(Left->Value), std::move(Right->Value))); |
| 233 | } |
| 234 | |
| 235 | // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or |
| 236 | // Id operator). Returns StencilType representing the operator on success and |
| 237 | // error if it fails to parse input for an operator. |
| 238 | static ExpectedProgress<RangeSelector> |
| 239 | parseRangeSelectorImpl(ParseState State) { |
| 240 | auto Id = parseId(State); |
| 241 | if (!Id) |
| 242 | return Id.takeError(); |
| 243 | |
| 244 | std::string OpName = std::move(Id->Value); |
| 245 | if (auto Op = findOptional(Map: getUnaryStringSelectors(), Key: OpName)) |
| 246 | return parseSingle(ParseElement: parseStringId, Op: *Op, State: Id->State); |
| 247 | |
| 248 | if (auto Op = findOptional(Map: getUnaryRangeSelectors(), Key: OpName)) |
| 249 | return parseSingle(ParseElement: parseRangeSelectorImpl, Op: *Op, State: Id->State); |
| 250 | |
| 251 | if (auto Op = findOptional(Map: getBinaryStringSelectors(), Key: OpName)) |
| 252 | return parsePair(ParseElement: parseStringId, Op: *Op, State: Id->State); |
| 253 | |
| 254 | if (auto Op = findOptional(Map: getBinaryRangeSelectors(), Key: OpName)) |
| 255 | return parsePair(ParseElement: parseRangeSelectorImpl, Op: *Op, State: Id->State); |
| 256 | |
| 257 | return makeParseError(S: State, ErrorMsg: "unknown selector name: " + OpName); |
| 258 | } |
| 259 | |
| 260 | Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) { |
| 261 | ParseState State = {.Input: Input, .OriginalInput: Input}; |
| 262 | ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State); |
| 263 | if (!Result) |
| 264 | return Result.takeError(); |
| 265 | State = Result->State; |
| 266 | // Discard any potentially trailing whitespace. |
| 267 | State.Input = consumeWhitespace(S: State.Input); |
| 268 | if (State.Input.empty()) |
| 269 | return Result->Value; |
| 270 | return makeParseError(S: State, ErrorMsg: "unexpected input after selector" ); |
| 271 | } |
| 272 | |