1//===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the log symbolizer markup data model and parser.
11///
12//===----------------------------------------------------------------------===//
13
14#include "llvm/DebugInfo/Symbolize/Markup.h"
15
16#include "llvm/ADT/STLExtras.h"
17
18namespace llvm {
19namespace symbolize {
20
21// Matches the following:
22// "\033[0m"
23// "\033[1m"
24// "\033[30m" -- "\033[37m"
25static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";
26
27MarkupParser::MarkupParser(StringSet<> MultilineTags)
28 : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {}
29
30static StringRef takeTo(StringRef Str, StringRef::iterator Pos) {
31 return Str.take_front(N: Pos - Str.begin());
32}
33static void advanceTo(StringRef &Str, StringRef::iterator Pos) {
34 Str = Str.drop_front(N: Pos - Str.begin());
35}
36
37void MarkupParser::parseLine(StringRef Line) {
38 Buffer.clear();
39 NextIdx = 0;
40 FinishedMultiline.clear();
41 this->Line = Line;
42}
43
44std::optional<MarkupNode> MarkupParser::nextNode() {
45 // Pull something out of the buffer if possible.
46 if (!Buffer.empty()) {
47 if (NextIdx < Buffer.size())
48 return std::move(Buffer[NextIdx++]);
49 NextIdx = 0;
50 Buffer.clear();
51 }
52
53 // The buffer is empty, so parse the next bit of the line.
54
55 if (Line.empty())
56 return std::nullopt;
57
58 if (!InProgressMultiline.empty()) {
59 if (std::optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) {
60 llvm::append_range(C&: InProgressMultiline, R&: *MultilineEnd);
61 assert(FinishedMultiline.empty() &&
62 "At most one multi-line element can be finished at a time.");
63 FinishedMultiline.swap(s&: InProgressMultiline);
64 // Parse the multi-line element as if it were contiguous.
65 advanceTo(Str&: Line, Pos: MultilineEnd->end());
66 return *parseElement(Line: FinishedMultiline);
67 }
68
69 // The whole line is part of the multi-line element.
70 llvm::append_range(C&: InProgressMultiline, R&: Line);
71 Line = Line.drop_front(N: Line.size());
72 return std::nullopt;
73 }
74
75 // Find the first valid markup element, if any.
76 if (std::optional<MarkupNode> Element = parseElement(Line)) {
77 parseTextOutsideMarkup(Text: takeTo(Str: Line, Pos: Element->Text.begin()));
78 Buffer.push_back(Elt: std::move(*Element));
79 advanceTo(Str&: Line, Pos: Element->Text.end());
80 return nextNode();
81 }
82
83 // Since there were no valid elements remaining, see if the line opens a
84 // multi-line element.
85 if (std::optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) {
86 // Emit any text before the element.
87 parseTextOutsideMarkup(Text: takeTo(Str: Line, Pos: MultilineBegin->begin()));
88
89 // Begin recording the multi-line element.
90 llvm::append_range(C&: InProgressMultiline, R&: *MultilineBegin);
91 Line = Line.drop_front(N: Line.size());
92 return nextNode();
93 }
94
95 // The line doesn't contain any more markup elements, so emit it as text.
96 parseTextOutsideMarkup(Text: Line);
97 Line = Line.drop_front(N: Line.size());
98 return nextNode();
99}
100
101void MarkupParser::flush() {
102 Buffer.clear();
103 NextIdx = 0;
104 Line = {};
105 if (InProgressMultiline.empty())
106 return;
107 FinishedMultiline.swap(s&: InProgressMultiline);
108 parseTextOutsideMarkup(Text: FinishedMultiline);
109}
110
111// Finds and returns the next valid markup element in the given line. Returns
112// std::nullopt if the line contains no valid elements.
113std::optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {
114 while (true) {
115 // Find next element using begin and end markers.
116 size_t BeginPos = Line.find(Str: "{{{");
117 if (BeginPos == StringRef::npos)
118 return std::nullopt;
119 size_t EndPos = Line.find(Str: "}}}", From: BeginPos + 3);
120 if (EndPos == StringRef::npos)
121 return std::nullopt;
122 EndPos += 3;
123 MarkupNode Element;
124 Element.Text = Line.slice(Start: BeginPos, End: EndPos);
125 Line = Line.substr(Start: EndPos);
126
127 // Parse tag.
128 StringRef Content = Element.Text.drop_front(N: 3).drop_back(N: 3);
129 StringRef FieldsContent;
130 std::tie(args&: Element.Tag, args&: FieldsContent) = Content.split(Separator: ':');
131 if (Element.Tag.empty())
132 continue;
133
134 // Parse fields.
135 if (!FieldsContent.empty())
136 FieldsContent.split(A&: Element.Fields, Separator: ":");
137 else if (Content.back() == ':')
138 Element.Fields.push_back(Elt: FieldsContent);
139
140 return Element;
141 }
142}
143
144static MarkupNode textNode(StringRef Text) {
145 MarkupNode Node;
146 Node.Text = Text;
147 return Node;
148}
149
150// Parses a region of text known to be outside any markup elements. Such text
151// may still contain SGR control codes, so the region is further subdivided into
152// control codes and true text regions.
153void MarkupParser::parseTextOutsideMarkup(StringRef Text) {
154 if (Text.empty())
155 return;
156 SmallVector<StringRef> Matches;
157 while (SGRSyntax.match(String: Text, Matches: &Matches)) {
158 // Emit any text before the SGR element.
159 if (Matches.begin()->begin() != Text.begin())
160 Buffer.push_back(Elt: textNode(Text: takeTo(Str: Text, Pos: Matches.begin()->begin())));
161
162 Buffer.push_back(Elt: textNode(Text: *Matches.begin()));
163 advanceTo(Str&: Text, Pos: Matches.begin()->end());
164 }
165 if (!Text.empty())
166 Buffer.push_back(Elt: textNode(Text));
167}
168
169// Given that a line doesn't contain any valid markup, see if it ends with the
170// start of a multi-line element. If so, returns the beginning.
171std::optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) {
172 // A multi-line begin marker must be the last one on the line.
173 size_t BeginPos = Line.rfind(Str: "{{{");
174 if (BeginPos == StringRef::npos)
175 return std::nullopt;
176 size_t BeginTagPos = BeginPos + 3;
177
178 // If there are any end markers afterwards, the begin marker cannot belong to
179 // a multi-line element.
180 size_t EndPos = Line.find(Str: "}}}", From: BeginTagPos);
181 if (EndPos != StringRef::npos)
182 return std::nullopt;
183
184 // Check whether the tag is registered multi-line.
185 size_t EndTagPos = Line.find(C: ':', From: BeginTagPos);
186 if (EndTagPos == StringRef::npos)
187 return std::nullopt;
188 StringRef Tag = Line.slice(Start: BeginTagPos, End: EndTagPos);
189 if (!MultilineTags.contains(key: Tag))
190 return std::nullopt;
191 return Line.substr(Start: BeginPos);
192}
193
194// See if the line begins with the ending of an in-progress multi-line element.
195// If so, return the ending.
196std::optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) {
197 size_t EndPos = Line.find(Str: "}}}");
198 if (EndPos == StringRef::npos)
199 return std::nullopt;
200 return Line.take_front(N: EndPos + 3);
201}
202
203} // end namespace symbolize
204} // end namespace llvm
205