| 1 | //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===// | 
|---|
| 2 | // | 
|---|
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|---|
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|---|
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|---|
| 6 | // | 
|---|
| 7 | //===----------------------------------------------------------------------===// | 
|---|
| 8 | /// | 
|---|
| 9 | /// \file | 
|---|
| 10 | /// This file defines the log symbolizer markup data model and parser. | 
|---|
| 11 | /// | 
|---|
| 12 | //===----------------------------------------------------------------------===// | 
|---|
| 13 |  | 
|---|
| 14 | #include "llvm/DebugInfo/Symbolize/Markup.h" | 
|---|
| 15 |  | 
|---|
| 16 | #include "llvm/ADT/STLExtras.h" | 
|---|
| 17 |  | 
|---|
| 18 | namespace llvm { | 
|---|
| 19 | namespace symbolize { | 
|---|
| 20 |  | 
|---|
| 21 | // Matches the following: | 
|---|
| 22 | //   "\033[0m" | 
|---|
| 23 | //   "\033[1m" | 
|---|
| 24 | //   "\033[30m" -- "\033[37m" | 
|---|
| 25 | static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m"; | 
|---|
| 26 |  | 
|---|
| 27 | MarkupParser::MarkupParser(StringSet<> MultilineTags) | 
|---|
| 28 | : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {} | 
|---|
| 29 |  | 
|---|
| 30 | static StringRef takeTo(StringRef Str, StringRef::iterator Pos) { | 
|---|
| 31 | return Str.take_front(N: Pos - Str.begin()); | 
|---|
| 32 | } | 
|---|
| 33 | static void advanceTo(StringRef &Str, StringRef::iterator Pos) { | 
|---|
| 34 | Str = Str.drop_front(N: Pos - Str.begin()); | 
|---|
| 35 | } | 
|---|
| 36 |  | 
|---|
| 37 | void MarkupParser::parseLine(StringRef Line) { | 
|---|
| 38 | Buffer.clear(); | 
|---|
| 39 | NextIdx = 0; | 
|---|
| 40 | FinishedMultiline.clear(); | 
|---|
| 41 | this->Line = Line; | 
|---|
| 42 | } | 
|---|
| 43 |  | 
|---|
| 44 | std::optional<MarkupNode> MarkupParser::nextNode() { | 
|---|
| 45 | // Pull something out of the buffer if possible. | 
|---|
| 46 | if (!Buffer.empty()) { | 
|---|
| 47 | if (NextIdx < Buffer.size()) | 
|---|
| 48 | return std::move(Buffer[NextIdx++]); | 
|---|
| 49 | NextIdx = 0; | 
|---|
| 50 | Buffer.clear(); | 
|---|
| 51 | } | 
|---|
| 52 |  | 
|---|
| 53 | // The buffer is empty, so parse the next bit of the line. | 
|---|
| 54 |  | 
|---|
| 55 | if (Line.empty()) | 
|---|
| 56 | return std::nullopt; | 
|---|
| 57 |  | 
|---|
| 58 | if (!InProgressMultiline.empty()) { | 
|---|
| 59 | if (std::optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) { | 
|---|
| 60 | llvm::append_range(C&: InProgressMultiline, R&: *MultilineEnd); | 
|---|
| 61 | assert(FinishedMultiline.empty() && | 
|---|
| 62 | "At most one multi-line element can be finished at a time."); | 
|---|
| 63 | FinishedMultiline.swap(s&: InProgressMultiline); | 
|---|
| 64 | // Parse the multi-line element as if it were contiguous. | 
|---|
| 65 | advanceTo(Str&: Line, Pos: MultilineEnd->end()); | 
|---|
| 66 | return *parseElement(Line: FinishedMultiline); | 
|---|
| 67 | } | 
|---|
| 68 |  | 
|---|
| 69 | // The whole line is part of the multi-line element. | 
|---|
| 70 | llvm::append_range(C&: InProgressMultiline, R&: Line); | 
|---|
| 71 | Line = Line.drop_front(N: Line.size()); | 
|---|
| 72 | return std::nullopt; | 
|---|
| 73 | } | 
|---|
| 74 |  | 
|---|
| 75 | // Find the first valid markup element, if any. | 
|---|
| 76 | if (std::optional<MarkupNode> Element = parseElement(Line)) { | 
|---|
| 77 | parseTextOutsideMarkup(Text: takeTo(Str: Line, Pos: Element->Text.begin())); | 
|---|
| 78 | Buffer.push_back(Elt: std::move(*Element)); | 
|---|
| 79 | advanceTo(Str&: Line, Pos: Element->Text.end()); | 
|---|
| 80 | return nextNode(); | 
|---|
| 81 | } | 
|---|
| 82 |  | 
|---|
| 83 | // Since there were no valid elements remaining, see if the line opens a | 
|---|
| 84 | // multi-line element. | 
|---|
| 85 | if (std::optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) { | 
|---|
| 86 | // Emit any text before the element. | 
|---|
| 87 | parseTextOutsideMarkup(Text: takeTo(Str: Line, Pos: MultilineBegin->begin())); | 
|---|
| 88 |  | 
|---|
| 89 | // Begin recording the multi-line element. | 
|---|
| 90 | llvm::append_range(C&: InProgressMultiline, R&: *MultilineBegin); | 
|---|
| 91 | Line = Line.drop_front(N: Line.size()); | 
|---|
| 92 | return nextNode(); | 
|---|
| 93 | } | 
|---|
| 94 |  | 
|---|
| 95 | // The line doesn't contain any more markup elements, so emit it as text. | 
|---|
| 96 | parseTextOutsideMarkup(Text: Line); | 
|---|
| 97 | Line = Line.drop_front(N: Line.size()); | 
|---|
| 98 | return nextNode(); | 
|---|
| 99 | } | 
|---|
| 100 |  | 
|---|
| 101 | void MarkupParser::flush() { | 
|---|
| 102 | Buffer.clear(); | 
|---|
| 103 | NextIdx = 0; | 
|---|
| 104 | Line = {}; | 
|---|
| 105 | if (InProgressMultiline.empty()) | 
|---|
| 106 | return; | 
|---|
| 107 | FinishedMultiline.swap(s&: InProgressMultiline); | 
|---|
| 108 | parseTextOutsideMarkup(Text: FinishedMultiline); | 
|---|
| 109 | } | 
|---|
| 110 |  | 
|---|
| 111 | // Finds and returns the next valid markup element in the given line. Returns | 
|---|
| 112 | // std::nullopt if the line contains no valid elements. | 
|---|
| 113 | std::optional<MarkupNode> MarkupParser::parseElement(StringRef Line) { | 
|---|
| 114 | while (true) { | 
|---|
| 115 | // Find next element using begin and end markers. | 
|---|
| 116 | size_t BeginPos = Line.find(Str: "{{{"); | 
|---|
| 117 | if (BeginPos == StringRef::npos) | 
|---|
| 118 | return std::nullopt; | 
|---|
| 119 | size_t EndPos = Line.find(Str: "}}}", From: BeginPos + 3); | 
|---|
| 120 | if (EndPos == StringRef::npos) | 
|---|
| 121 | return std::nullopt; | 
|---|
| 122 | EndPos += 3; | 
|---|
| 123 | MarkupNode Element; | 
|---|
| 124 | Element.Text = Line.slice(Start: BeginPos, End: EndPos); | 
|---|
| 125 | Line = Line.substr(Start: EndPos); | 
|---|
| 126 |  | 
|---|
| 127 | // Parse tag. | 
|---|
| 128 | StringRef Content = Element.Text.drop_front(N: 3).drop_back(N: 3); | 
|---|
| 129 | StringRef FieldsContent; | 
|---|
| 130 | std::tie(args&: Element.Tag, args&: FieldsContent) = Content.split(Separator: ':'); | 
|---|
| 131 | if (Element.Tag.empty()) | 
|---|
| 132 | continue; | 
|---|
| 133 |  | 
|---|
| 134 | // Parse fields. | 
|---|
| 135 | if (!FieldsContent.empty()) | 
|---|
| 136 | FieldsContent.split(A&: Element.Fields, Separator: ":"); | 
|---|
| 137 | else if (Content.back() == ':') | 
|---|
| 138 | Element.Fields.push_back(Elt: FieldsContent); | 
|---|
| 139 |  | 
|---|
| 140 | return Element; | 
|---|
| 141 | } | 
|---|
| 142 | } | 
|---|
| 143 |  | 
|---|
| 144 | static MarkupNode textNode(StringRef Text) { | 
|---|
| 145 | MarkupNode Node; | 
|---|
| 146 | Node.Text = Text; | 
|---|
| 147 | return Node; | 
|---|
| 148 | } | 
|---|
| 149 |  | 
|---|
| 150 | // Parses a region of text known to be outside any markup elements. Such text | 
|---|
| 151 | // may still contain SGR control codes, so the region is further subdivided into | 
|---|
| 152 | // control codes and true text regions. | 
|---|
| 153 | void MarkupParser::parseTextOutsideMarkup(StringRef Text) { | 
|---|
| 154 | if (Text.empty()) | 
|---|
| 155 | return; | 
|---|
| 156 | SmallVector<StringRef> Matches; | 
|---|
| 157 | while (SGRSyntax.match(String: Text, Matches: &Matches)) { | 
|---|
| 158 | // Emit any text before the SGR element. | 
|---|
| 159 | if (Matches.begin()->begin() != Text.begin()) | 
|---|
| 160 | Buffer.push_back(Elt: textNode(Text: takeTo(Str: Text, Pos: Matches.begin()->begin()))); | 
|---|
| 161 |  | 
|---|
| 162 | Buffer.push_back(Elt: textNode(Text: *Matches.begin())); | 
|---|
| 163 | advanceTo(Str&: Text, Pos: Matches.begin()->end()); | 
|---|
| 164 | } | 
|---|
| 165 | if (!Text.empty()) | 
|---|
| 166 | Buffer.push_back(Elt: textNode(Text)); | 
|---|
| 167 | } | 
|---|
| 168 |  | 
|---|
| 169 | // Given that a line doesn't contain any valid markup, see if it ends with the | 
|---|
| 170 | // start of a multi-line element. If so, returns the beginning. | 
|---|
| 171 | std::optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) { | 
|---|
| 172 | // A multi-line begin marker must be the last one on the line. | 
|---|
| 173 | size_t BeginPos = Line.rfind(Str: "{{{"); | 
|---|
| 174 | if (BeginPos == StringRef::npos) | 
|---|
| 175 | return std::nullopt; | 
|---|
| 176 | size_t BeginTagPos = BeginPos + 3; | 
|---|
| 177 |  | 
|---|
| 178 | // If there are any end markers afterwards, the begin marker cannot belong to | 
|---|
| 179 | // a multi-line element. | 
|---|
| 180 | size_t EndPos = Line.find(Str: "}}}", From: BeginTagPos); | 
|---|
| 181 | if (EndPos != StringRef::npos) | 
|---|
| 182 | return std::nullopt; | 
|---|
| 183 |  | 
|---|
| 184 | // Check whether the tag is registered multi-line. | 
|---|
| 185 | size_t EndTagPos = Line.find(C: ':', From: BeginTagPos); | 
|---|
| 186 | if (EndTagPos == StringRef::npos) | 
|---|
| 187 | return std::nullopt; | 
|---|
| 188 | StringRef Tag = Line.slice(Start: BeginTagPos, End: EndTagPos); | 
|---|
| 189 | if (!MultilineTags.contains(key: Tag)) | 
|---|
| 190 | return std::nullopt; | 
|---|
| 191 | return Line.substr(Start: BeginPos); | 
|---|
| 192 | } | 
|---|
| 193 |  | 
|---|
| 194 | // See if the line begins with the ending of an in-progress multi-line element. | 
|---|
| 195 | // If so, return the ending. | 
|---|
| 196 | std::optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) { | 
|---|
| 197 | size_t EndPos = Line.find(Str: "}}}"); | 
|---|
| 198 | if (EndPos == StringRef::npos) | 
|---|
| 199 | return std::nullopt; | 
|---|
| 200 | return Line.take_front(N: EndPos + 3); | 
|---|
| 201 | } | 
|---|
| 202 |  | 
|---|
| 203 | } // end namespace symbolize | 
|---|
| 204 | } // end namespace llvm | 
|---|
| 205 |  | 
|---|