| 1 | //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// This file defines the log symbolizer markup data model and parser. |
| 11 | /// |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "llvm/DebugInfo/Symbolize/Markup.h" |
| 15 | |
| 16 | #include "llvm/ADT/STLExtras.h" |
| 17 | |
| 18 | namespace llvm { |
| 19 | namespace symbolize { |
| 20 | |
| 21 | // Matches the following: |
| 22 | // "\033[0m" |
| 23 | // "\033[1m" |
| 24 | // "\033[30m" -- "\033[37m" |
| 25 | static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m" ; |
| 26 | |
| 27 | MarkupParser::MarkupParser(StringSet<> MultilineTags) |
| 28 | : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {} |
| 29 | |
| 30 | static StringRef takeTo(StringRef Str, StringRef::iterator Pos) { |
| 31 | return Str.take_front(N: Pos - Str.begin()); |
| 32 | } |
| 33 | static void advanceTo(StringRef &Str, StringRef::iterator Pos) { |
| 34 | Str = Str.drop_front(N: Pos - Str.begin()); |
| 35 | } |
| 36 | |
| 37 | void MarkupParser::parseLine(StringRef Line) { |
| 38 | Buffer.clear(); |
| 39 | NextIdx = 0; |
| 40 | FinishedMultiline.clear(); |
| 41 | this->Line = Line; |
| 42 | } |
| 43 | |
| 44 | std::optional<MarkupNode> MarkupParser::nextNode() { |
| 45 | // Pull something out of the buffer if possible. |
| 46 | if (!Buffer.empty()) { |
| 47 | if (NextIdx < Buffer.size()) |
| 48 | return std::move(Buffer[NextIdx++]); |
| 49 | NextIdx = 0; |
| 50 | Buffer.clear(); |
| 51 | } |
| 52 | |
| 53 | // The buffer is empty, so parse the next bit of the line. |
| 54 | |
| 55 | if (Line.empty()) |
| 56 | return std::nullopt; |
| 57 | |
| 58 | if (!InProgressMultiline.empty()) { |
| 59 | if (std::optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) { |
| 60 | llvm::append_range(C&: InProgressMultiline, R&: *MultilineEnd); |
| 61 | assert(FinishedMultiline.empty() && |
| 62 | "At most one multi-line element can be finished at a time." ); |
| 63 | FinishedMultiline.swap(s&: InProgressMultiline); |
| 64 | // Parse the multi-line element as if it were contiguous. |
| 65 | advanceTo(Str&: Line, Pos: MultilineEnd->end()); |
| 66 | return *parseElement(Line: FinishedMultiline); |
| 67 | } |
| 68 | |
| 69 | // The whole line is part of the multi-line element. |
| 70 | llvm::append_range(C&: InProgressMultiline, R&: Line); |
| 71 | Line = Line.drop_front(N: Line.size()); |
| 72 | return std::nullopt; |
| 73 | } |
| 74 | |
| 75 | // Find the first valid markup element, if any. |
| 76 | if (std::optional<MarkupNode> Element = parseElement(Line)) { |
| 77 | parseTextOutsideMarkup(Text: takeTo(Str: Line, Pos: Element->Text.begin())); |
| 78 | Buffer.push_back(Elt: std::move(*Element)); |
| 79 | advanceTo(Str&: Line, Pos: Element->Text.end()); |
| 80 | return nextNode(); |
| 81 | } |
| 82 | |
| 83 | // Since there were no valid elements remaining, see if the line opens a |
| 84 | // multi-line element. |
| 85 | if (std::optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) { |
| 86 | // Emit any text before the element. |
| 87 | parseTextOutsideMarkup(Text: takeTo(Str: Line, Pos: MultilineBegin->begin())); |
| 88 | |
| 89 | // Begin recording the multi-line element. |
| 90 | llvm::append_range(C&: InProgressMultiline, R&: *MultilineBegin); |
| 91 | Line = Line.drop_front(N: Line.size()); |
| 92 | return nextNode(); |
| 93 | } |
| 94 | |
| 95 | // The line doesn't contain any more markup elements, so emit it as text. |
| 96 | parseTextOutsideMarkup(Text: Line); |
| 97 | Line = Line.drop_front(N: Line.size()); |
| 98 | return nextNode(); |
| 99 | } |
| 100 | |
| 101 | void MarkupParser::flush() { |
| 102 | Buffer.clear(); |
| 103 | NextIdx = 0; |
| 104 | Line = {}; |
| 105 | if (InProgressMultiline.empty()) |
| 106 | return; |
| 107 | FinishedMultiline.swap(s&: InProgressMultiline); |
| 108 | parseTextOutsideMarkup(Text: FinishedMultiline); |
| 109 | } |
| 110 | |
| 111 | // Finds and returns the next valid markup element in the given line. Returns |
| 112 | // std::nullopt if the line contains no valid elements. |
| 113 | std::optional<MarkupNode> MarkupParser::parseElement(StringRef Line) { |
| 114 | while (true) { |
| 115 | // Find next element using begin and end markers. |
| 116 | size_t BeginPos = Line.find(Str: "{{{" ); |
| 117 | if (BeginPos == StringRef::npos) |
| 118 | return std::nullopt; |
| 119 | size_t EndPos = Line.find(Str: "}}}" , From: BeginPos + 3); |
| 120 | if (EndPos == StringRef::npos) |
| 121 | return std::nullopt; |
| 122 | EndPos += 3; |
| 123 | MarkupNode Element; |
| 124 | Element.Text = Line.slice(Start: BeginPos, End: EndPos); |
| 125 | Line = Line.substr(Start: EndPos); |
| 126 | |
| 127 | // Parse tag. |
| 128 | StringRef Content = Element.Text.drop_front(N: 3).drop_back(N: 3); |
| 129 | StringRef FieldsContent; |
| 130 | std::tie(args&: Element.Tag, args&: FieldsContent) = Content.split(Separator: ':'); |
| 131 | if (Element.Tag.empty()) |
| 132 | continue; |
| 133 | |
| 134 | // Parse fields. |
| 135 | if (!FieldsContent.empty()) |
| 136 | FieldsContent.split(A&: Element.Fields, Separator: ":" ); |
| 137 | else if (Content.back() == ':') |
| 138 | Element.Fields.push_back(Elt: FieldsContent); |
| 139 | |
| 140 | return Element; |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | static MarkupNode textNode(StringRef Text) { |
| 145 | MarkupNode Node; |
| 146 | Node.Text = Text; |
| 147 | return Node; |
| 148 | } |
| 149 | |
| 150 | // Parses a region of text known to be outside any markup elements. Such text |
| 151 | // may still contain SGR control codes, so the region is further subdivided into |
| 152 | // control codes and true text regions. |
| 153 | void MarkupParser::parseTextOutsideMarkup(StringRef Text) { |
| 154 | if (Text.empty()) |
| 155 | return; |
| 156 | SmallVector<StringRef> Matches; |
| 157 | while (SGRSyntax.match(String: Text, Matches: &Matches)) { |
| 158 | // Emit any text before the SGR element. |
| 159 | if (Matches.begin()->begin() != Text.begin()) |
| 160 | Buffer.push_back(Elt: textNode(Text: takeTo(Str: Text, Pos: Matches.begin()->begin()))); |
| 161 | |
| 162 | Buffer.push_back(Elt: textNode(Text: *Matches.begin())); |
| 163 | advanceTo(Str&: Text, Pos: Matches.begin()->end()); |
| 164 | } |
| 165 | if (!Text.empty()) |
| 166 | Buffer.push_back(Elt: textNode(Text)); |
| 167 | } |
| 168 | |
| 169 | // Given that a line doesn't contain any valid markup, see if it ends with the |
| 170 | // start of a multi-line element. If so, returns the beginning. |
| 171 | std::optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) { |
| 172 | // A multi-line begin marker must be the last one on the line. |
| 173 | size_t BeginPos = Line.rfind(Str: "{{{" ); |
| 174 | if (BeginPos == StringRef::npos) |
| 175 | return std::nullopt; |
| 176 | size_t BeginTagPos = BeginPos + 3; |
| 177 | |
| 178 | // If there are any end markers afterwards, the begin marker cannot belong to |
| 179 | // a multi-line element. |
| 180 | size_t EndPos = Line.find(Str: "}}}" , From: BeginTagPos); |
| 181 | if (EndPos != StringRef::npos) |
| 182 | return std::nullopt; |
| 183 | |
| 184 | // Check whether the tag is registered multi-line. |
| 185 | size_t EndTagPos = Line.find(C: ':', From: BeginTagPos); |
| 186 | if (EndTagPos == StringRef::npos) |
| 187 | return std::nullopt; |
| 188 | StringRef Tag = Line.slice(Start: BeginTagPos, End: EndTagPos); |
| 189 | if (!MultilineTags.contains(key: Tag)) |
| 190 | return std::nullopt; |
| 191 | return Line.substr(Start: BeginPos); |
| 192 | } |
| 193 | |
| 194 | // See if the line begins with the ending of an in-progress multi-line element. |
| 195 | // If so, return the ending. |
| 196 | std::optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) { |
| 197 | size_t EndPos = Line.find(Str: "}}}" ); |
| 198 | if (EndPos == StringRef::npos) |
| 199 | return std::nullopt; |
| 200 | return Line.take_front(N: EndPos + 3); |
| 201 | } |
| 202 | |
| 203 | } // end namespace symbolize |
| 204 | } // end namespace llvm |
| 205 | |