1 | //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file defines the log symbolizer markup data model and parser. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/DebugInfo/Symbolize/Markup.h" |
15 | |
16 | #include "llvm/ADT/STLExtras.h" |
17 | |
18 | namespace llvm { |
19 | namespace symbolize { |
20 | |
21 | // Matches the following: |
22 | // "\033[0m" |
23 | // "\033[1m" |
24 | // "\033[30m" -- "\033[37m" |
25 | static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m" ; |
26 | |
27 | MarkupParser::MarkupParser(StringSet<> MultilineTags) |
28 | : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {} |
29 | |
30 | static StringRef takeTo(StringRef Str, StringRef::iterator Pos) { |
31 | return Str.take_front(N: Pos - Str.begin()); |
32 | } |
33 | static void advanceTo(StringRef &Str, StringRef::iterator Pos) { |
34 | Str = Str.drop_front(N: Pos - Str.begin()); |
35 | } |
36 | |
37 | void MarkupParser::parseLine(StringRef Line) { |
38 | Buffer.clear(); |
39 | NextIdx = 0; |
40 | FinishedMultiline.clear(); |
41 | this->Line = Line; |
42 | } |
43 | |
44 | std::optional<MarkupNode> MarkupParser::nextNode() { |
45 | // Pull something out of the buffer if possible. |
46 | if (!Buffer.empty()) { |
47 | if (NextIdx < Buffer.size()) |
48 | return std::move(Buffer[NextIdx++]); |
49 | NextIdx = 0; |
50 | Buffer.clear(); |
51 | } |
52 | |
53 | // The buffer is empty, so parse the next bit of the line. |
54 | |
55 | if (Line.empty()) |
56 | return std::nullopt; |
57 | |
58 | if (!InProgressMultiline.empty()) { |
59 | if (std::optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) { |
60 | llvm::append_range(C&: InProgressMultiline, R&: *MultilineEnd); |
61 | assert(FinishedMultiline.empty() && |
62 | "At most one multi-line element can be finished at a time." ); |
63 | FinishedMultiline.swap(s&: InProgressMultiline); |
64 | // Parse the multi-line element as if it were contiguous. |
65 | advanceTo(Str&: Line, Pos: MultilineEnd->end()); |
66 | return *parseElement(Line: FinishedMultiline); |
67 | } |
68 | |
69 | // The whole line is part of the multi-line element. |
70 | llvm::append_range(C&: InProgressMultiline, R&: Line); |
71 | Line = Line.drop_front(N: Line.size()); |
72 | return std::nullopt; |
73 | } |
74 | |
75 | // Find the first valid markup element, if any. |
76 | if (std::optional<MarkupNode> Element = parseElement(Line)) { |
77 | parseTextOutsideMarkup(Text: takeTo(Str: Line, Pos: Element->Text.begin())); |
78 | Buffer.push_back(Elt: std::move(*Element)); |
79 | advanceTo(Str&: Line, Pos: Element->Text.end()); |
80 | return nextNode(); |
81 | } |
82 | |
83 | // Since there were no valid elements remaining, see if the line opens a |
84 | // multi-line element. |
85 | if (std::optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) { |
86 | // Emit any text before the element. |
87 | parseTextOutsideMarkup(Text: takeTo(Str: Line, Pos: MultilineBegin->begin())); |
88 | |
89 | // Begin recording the multi-line element. |
90 | llvm::append_range(C&: InProgressMultiline, R&: *MultilineBegin); |
91 | Line = Line.drop_front(N: Line.size()); |
92 | return nextNode(); |
93 | } |
94 | |
95 | // The line doesn't contain any more markup elements, so emit it as text. |
96 | parseTextOutsideMarkup(Text: Line); |
97 | Line = Line.drop_front(N: Line.size()); |
98 | return nextNode(); |
99 | } |
100 | |
101 | void MarkupParser::flush() { |
102 | Buffer.clear(); |
103 | NextIdx = 0; |
104 | Line = {}; |
105 | if (InProgressMultiline.empty()) |
106 | return; |
107 | FinishedMultiline.swap(s&: InProgressMultiline); |
108 | parseTextOutsideMarkup(Text: FinishedMultiline); |
109 | } |
110 | |
111 | // Finds and returns the next valid markup element in the given line. Returns |
112 | // std::nullopt if the line contains no valid elements. |
113 | std::optional<MarkupNode> MarkupParser::parseElement(StringRef Line) { |
114 | while (true) { |
115 | // Find next element using begin and end markers. |
116 | size_t BeginPos = Line.find(Str: "{{{" ); |
117 | if (BeginPos == StringRef::npos) |
118 | return std::nullopt; |
119 | size_t EndPos = Line.find(Str: "}}}" , From: BeginPos + 3); |
120 | if (EndPos == StringRef::npos) |
121 | return std::nullopt; |
122 | EndPos += 3; |
123 | MarkupNode Element; |
124 | Element.Text = Line.slice(Start: BeginPos, End: EndPos); |
125 | Line = Line.substr(Start: EndPos); |
126 | |
127 | // Parse tag. |
128 | StringRef Content = Element.Text.drop_front(N: 3).drop_back(N: 3); |
129 | StringRef FieldsContent; |
130 | std::tie(args&: Element.Tag, args&: FieldsContent) = Content.split(Separator: ':'); |
131 | if (Element.Tag.empty()) |
132 | continue; |
133 | |
134 | // Parse fields. |
135 | if (!FieldsContent.empty()) |
136 | FieldsContent.split(A&: Element.Fields, Separator: ":" ); |
137 | else if (Content.back() == ':') |
138 | Element.Fields.push_back(Elt: FieldsContent); |
139 | |
140 | return Element; |
141 | } |
142 | } |
143 | |
144 | static MarkupNode textNode(StringRef Text) { |
145 | MarkupNode Node; |
146 | Node.Text = Text; |
147 | return Node; |
148 | } |
149 | |
150 | // Parses a region of text known to be outside any markup elements. Such text |
151 | // may still contain SGR control codes, so the region is further subdivided into |
152 | // control codes and true text regions. |
153 | void MarkupParser::parseTextOutsideMarkup(StringRef Text) { |
154 | if (Text.empty()) |
155 | return; |
156 | SmallVector<StringRef> Matches; |
157 | while (SGRSyntax.match(String: Text, Matches: &Matches)) { |
158 | // Emit any text before the SGR element. |
159 | if (Matches.begin()->begin() != Text.begin()) |
160 | Buffer.push_back(Elt: textNode(Text: takeTo(Str: Text, Pos: Matches.begin()->begin()))); |
161 | |
162 | Buffer.push_back(Elt: textNode(Text: *Matches.begin())); |
163 | advanceTo(Str&: Text, Pos: Matches.begin()->end()); |
164 | } |
165 | if (!Text.empty()) |
166 | Buffer.push_back(Elt: textNode(Text)); |
167 | } |
168 | |
169 | // Given that a line doesn't contain any valid markup, see if it ends with the |
170 | // start of a multi-line element. If so, returns the beginning. |
171 | std::optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) { |
172 | // A multi-line begin marker must be the last one on the line. |
173 | size_t BeginPos = Line.rfind(Str: "{{{" ); |
174 | if (BeginPos == StringRef::npos) |
175 | return std::nullopt; |
176 | size_t BeginTagPos = BeginPos + 3; |
177 | |
178 | // If there are any end markers afterwards, the begin marker cannot belong to |
179 | // a multi-line element. |
180 | size_t EndPos = Line.find(Str: "}}}" , From: BeginTagPos); |
181 | if (EndPos != StringRef::npos) |
182 | return std::nullopt; |
183 | |
184 | // Check whether the tag is registered multi-line. |
185 | size_t EndTagPos = Line.find(C: ':', From: BeginTagPos); |
186 | if (EndTagPos == StringRef::npos) |
187 | return std::nullopt; |
188 | StringRef Tag = Line.slice(Start: BeginTagPos, End: EndTagPos); |
189 | if (!MultilineTags.contains(key: Tag)) |
190 | return std::nullopt; |
191 | return Line.substr(Start: BeginPos); |
192 | } |
193 | |
194 | // See if the line begins with the ending of an in-progress multi-line element. |
195 | // If so, return the ending. |
196 | std::optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) { |
197 | size_t EndPos = Line.find(Str: "}}}" ); |
198 | if (EndPos == StringRef::npos) |
199 | return std::nullopt; |
200 | return Line.take_front(N: EndPos + 3); |
201 | } |
202 | |
203 | } // end namespace symbolize |
204 | } // end namespace llvm |
205 | |