1 | //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file defines the log symbolizer markup data model and parser. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/DebugInfo/Symbolize/Markup.h" |
15 | |
16 | #include "llvm/ADT/STLExtras.h" |
17 | #include "llvm/ADT/StringExtras.h" |
18 | |
19 | namespace llvm { |
20 | namespace symbolize { |
21 | |
22 | // Matches the following: |
23 | // "\033[0m" |
24 | // "\033[1m" |
25 | // "\033[30m" -- "\033[37m" |
26 | static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m" ; |
27 | |
28 | MarkupParser::MarkupParser(StringSet<> MultilineTags) |
29 | : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {} |
30 | |
31 | static StringRef takeTo(StringRef Str, StringRef::iterator Pos) { |
32 | return Str.take_front(N: Pos - Str.begin()); |
33 | } |
34 | static void advanceTo(StringRef &Str, StringRef::iterator Pos) { |
35 | Str = Str.drop_front(N: Pos - Str.begin()); |
36 | } |
37 | |
38 | void MarkupParser::parseLine(StringRef Line) { |
39 | Buffer.clear(); |
40 | NextIdx = 0; |
41 | FinishedMultiline.clear(); |
42 | this->Line = Line; |
43 | } |
44 | |
45 | std::optional<MarkupNode> MarkupParser::nextNode() { |
46 | // Pull something out of the buffer if possible. |
47 | if (!Buffer.empty()) { |
48 | if (NextIdx < Buffer.size()) |
49 | return std::move(Buffer[NextIdx++]); |
50 | NextIdx = 0; |
51 | Buffer.clear(); |
52 | } |
53 | |
54 | // The buffer is empty, so parse the next bit of the line. |
55 | |
56 | if (Line.empty()) |
57 | return std::nullopt; |
58 | |
59 | if (!InProgressMultiline.empty()) { |
60 | if (std::optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) { |
61 | llvm::append_range(C&: InProgressMultiline, R&: *MultilineEnd); |
62 | assert(FinishedMultiline.empty() && |
63 | "At most one multi-line element can be finished at a time." ); |
64 | FinishedMultiline.swap(s&: InProgressMultiline); |
65 | // Parse the multi-line element as if it were contiguous. |
66 | advanceTo(Str&: Line, Pos: MultilineEnd->end()); |
67 | return *parseElement(Line: FinishedMultiline); |
68 | } |
69 | |
70 | // The whole line is part of the multi-line element. |
71 | llvm::append_range(C&: InProgressMultiline, R&: Line); |
72 | Line = Line.drop_front(N: Line.size()); |
73 | return std::nullopt; |
74 | } |
75 | |
76 | // Find the first valid markup element, if any. |
77 | if (std::optional<MarkupNode> Element = parseElement(Line)) { |
78 | parseTextOutsideMarkup(Text: takeTo(Str: Line, Pos: Element->Text.begin())); |
79 | Buffer.push_back(Elt: std::move(*Element)); |
80 | advanceTo(Str&: Line, Pos: Element->Text.end()); |
81 | return nextNode(); |
82 | } |
83 | |
84 | // Since there were no valid elements remaining, see if the line opens a |
85 | // multi-line element. |
86 | if (std::optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) { |
87 | // Emit any text before the element. |
88 | parseTextOutsideMarkup(Text: takeTo(Str: Line, Pos: MultilineBegin->begin())); |
89 | |
90 | // Begin recording the multi-line element. |
91 | llvm::append_range(C&: InProgressMultiline, R&: *MultilineBegin); |
92 | Line = Line.drop_front(N: Line.size()); |
93 | return nextNode(); |
94 | } |
95 | |
96 | // The line doesn't contain any more markup elements, so emit it as text. |
97 | parseTextOutsideMarkup(Text: Line); |
98 | Line = Line.drop_front(N: Line.size()); |
99 | return nextNode(); |
100 | } |
101 | |
102 | void MarkupParser::flush() { |
103 | Buffer.clear(); |
104 | NextIdx = 0; |
105 | Line = {}; |
106 | if (InProgressMultiline.empty()) |
107 | return; |
108 | FinishedMultiline.swap(s&: InProgressMultiline); |
109 | parseTextOutsideMarkup(Text: FinishedMultiline); |
110 | } |
111 | |
112 | // Finds and returns the next valid markup element in the given line. Returns |
113 | // std::nullopt if the line contains no valid elements. |
114 | std::optional<MarkupNode> MarkupParser::parseElement(StringRef Line) { |
115 | while (true) { |
116 | // Find next element using begin and end markers. |
117 | size_t BeginPos = Line.find(Str: "{{{" ); |
118 | if (BeginPos == StringRef::npos) |
119 | return std::nullopt; |
120 | size_t EndPos = Line.find(Str: "}}}" , From: BeginPos + 3); |
121 | if (EndPos == StringRef::npos) |
122 | return std::nullopt; |
123 | EndPos += 3; |
124 | MarkupNode Element; |
125 | Element.Text = Line.slice(Start: BeginPos, End: EndPos); |
126 | Line = Line.substr(Start: EndPos); |
127 | |
128 | // Parse tag. |
129 | StringRef Content = Element.Text.drop_front(N: 3).drop_back(N: 3); |
130 | StringRef FieldsContent; |
131 | std::tie(args&: Element.Tag, args&: FieldsContent) = Content.split(Separator: ':'); |
132 | if (Element.Tag.empty()) |
133 | continue; |
134 | |
135 | // Parse fields. |
136 | if (!FieldsContent.empty()) |
137 | FieldsContent.split(A&: Element.Fields, Separator: ":" ); |
138 | else if (Content.back() == ':') |
139 | Element.Fields.push_back(Elt: FieldsContent); |
140 | |
141 | return Element; |
142 | } |
143 | } |
144 | |
145 | static MarkupNode textNode(StringRef Text) { |
146 | MarkupNode Node; |
147 | Node.Text = Text; |
148 | return Node; |
149 | } |
150 | |
151 | // Parses a region of text known to be outside any markup elements. Such text |
152 | // may still contain SGR control codes, so the region is further subdivided into |
153 | // control codes and true text regions. |
154 | void MarkupParser::parseTextOutsideMarkup(StringRef Text) { |
155 | if (Text.empty()) |
156 | return; |
157 | SmallVector<StringRef> Matches; |
158 | while (SGRSyntax.match(String: Text, Matches: &Matches)) { |
159 | // Emit any text before the SGR element. |
160 | if (Matches.begin()->begin() != Text.begin()) |
161 | Buffer.push_back(Elt: textNode(Text: takeTo(Str: Text, Pos: Matches.begin()->begin()))); |
162 | |
163 | Buffer.push_back(Elt: textNode(Text: *Matches.begin())); |
164 | advanceTo(Str&: Text, Pos: Matches.begin()->end()); |
165 | } |
166 | if (!Text.empty()) |
167 | Buffer.push_back(Elt: textNode(Text)); |
168 | } |
169 | |
170 | // Given that a line doesn't contain any valid markup, see if it ends with the |
171 | // start of a multi-line element. If so, returns the beginning. |
172 | std::optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) { |
173 | // A multi-line begin marker must be the last one on the line. |
174 | size_t BeginPos = Line.rfind(Str: "{{{" ); |
175 | if (BeginPos == StringRef::npos) |
176 | return std::nullopt; |
177 | size_t BeginTagPos = BeginPos + 3; |
178 | |
179 | // If there are any end markers afterwards, the begin marker cannot belong to |
180 | // a multi-line element. |
181 | size_t EndPos = Line.find(Str: "}}}" , From: BeginTagPos); |
182 | if (EndPos != StringRef::npos) |
183 | return std::nullopt; |
184 | |
185 | // Check whether the tag is registered multi-line. |
186 | size_t EndTagPos = Line.find(C: ':', From: BeginTagPos); |
187 | if (EndTagPos == StringRef::npos) |
188 | return std::nullopt; |
189 | StringRef Tag = Line.slice(Start: BeginTagPos, End: EndTagPos); |
190 | if (!MultilineTags.contains(key: Tag)) |
191 | return std::nullopt; |
192 | return Line.substr(Start: BeginPos); |
193 | } |
194 | |
195 | // See if the line begins with the ending of an in-progress multi-line element. |
196 | // If so, return the ending. |
197 | std::optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) { |
198 | size_t EndPos = Line.find(Str: "}}}" ); |
199 | if (EndPos == StringRef::npos) |
200 | return std::nullopt; |
201 | return Line.take_front(N: EndPos + 3); |
202 | } |
203 | |
204 | } // end namespace symbolize |
205 | } // end namespace llvm |
206 | |