1//===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the log symbolizer markup data model and parser.
11///
12//===----------------------------------------------------------------------===//
13
14#include "llvm/DebugInfo/Symbolize/Markup.h"
15
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/StringExtras.h"
18
19namespace llvm {
20namespace symbolize {
21
22// Matches the following:
23// "\033[0m"
24// "\033[1m"
25// "\033[30m" -- "\033[37m"
26static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";
27
28MarkupParser::MarkupParser(StringSet<> MultilineTags)
29 : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {}
30
31static StringRef takeTo(StringRef Str, StringRef::iterator Pos) {
32 return Str.take_front(N: Pos - Str.begin());
33}
34static void advanceTo(StringRef &Str, StringRef::iterator Pos) {
35 Str = Str.drop_front(N: Pos - Str.begin());
36}
37
38void MarkupParser::parseLine(StringRef Line) {
39 Buffer.clear();
40 NextIdx = 0;
41 FinishedMultiline.clear();
42 this->Line = Line;
43}
44
45std::optional<MarkupNode> MarkupParser::nextNode() {
46 // Pull something out of the buffer if possible.
47 if (!Buffer.empty()) {
48 if (NextIdx < Buffer.size())
49 return std::move(Buffer[NextIdx++]);
50 NextIdx = 0;
51 Buffer.clear();
52 }
53
54 // The buffer is empty, so parse the next bit of the line.
55
56 if (Line.empty())
57 return std::nullopt;
58
59 if (!InProgressMultiline.empty()) {
60 if (std::optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) {
61 llvm::append_range(C&: InProgressMultiline, R&: *MultilineEnd);
62 assert(FinishedMultiline.empty() &&
63 "At most one multi-line element can be finished at a time.");
64 FinishedMultiline.swap(s&: InProgressMultiline);
65 // Parse the multi-line element as if it were contiguous.
66 advanceTo(Str&: Line, Pos: MultilineEnd->end());
67 return *parseElement(Line: FinishedMultiline);
68 }
69
70 // The whole line is part of the multi-line element.
71 llvm::append_range(C&: InProgressMultiline, R&: Line);
72 Line = Line.drop_front(N: Line.size());
73 return std::nullopt;
74 }
75
76 // Find the first valid markup element, if any.
77 if (std::optional<MarkupNode> Element = parseElement(Line)) {
78 parseTextOutsideMarkup(Text: takeTo(Str: Line, Pos: Element->Text.begin()));
79 Buffer.push_back(Elt: std::move(*Element));
80 advanceTo(Str&: Line, Pos: Element->Text.end());
81 return nextNode();
82 }
83
84 // Since there were no valid elements remaining, see if the line opens a
85 // multi-line element.
86 if (std::optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) {
87 // Emit any text before the element.
88 parseTextOutsideMarkup(Text: takeTo(Str: Line, Pos: MultilineBegin->begin()));
89
90 // Begin recording the multi-line element.
91 llvm::append_range(C&: InProgressMultiline, R&: *MultilineBegin);
92 Line = Line.drop_front(N: Line.size());
93 return nextNode();
94 }
95
96 // The line doesn't contain any more markup elements, so emit it as text.
97 parseTextOutsideMarkup(Text: Line);
98 Line = Line.drop_front(N: Line.size());
99 return nextNode();
100}
101
102void MarkupParser::flush() {
103 Buffer.clear();
104 NextIdx = 0;
105 Line = {};
106 if (InProgressMultiline.empty())
107 return;
108 FinishedMultiline.swap(s&: InProgressMultiline);
109 parseTextOutsideMarkup(Text: FinishedMultiline);
110}
111
112// Finds and returns the next valid markup element in the given line. Returns
113// std::nullopt if the line contains no valid elements.
114std::optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {
115 while (true) {
116 // Find next element using begin and end markers.
117 size_t BeginPos = Line.find(Str: "{{{");
118 if (BeginPos == StringRef::npos)
119 return std::nullopt;
120 size_t EndPos = Line.find(Str: "}}}", From: BeginPos + 3);
121 if (EndPos == StringRef::npos)
122 return std::nullopt;
123 EndPos += 3;
124 MarkupNode Element;
125 Element.Text = Line.slice(Start: BeginPos, End: EndPos);
126 Line = Line.substr(Start: EndPos);
127
128 // Parse tag.
129 StringRef Content = Element.Text.drop_front(N: 3).drop_back(N: 3);
130 StringRef FieldsContent;
131 std::tie(args&: Element.Tag, args&: FieldsContent) = Content.split(Separator: ':');
132 if (Element.Tag.empty())
133 continue;
134
135 // Parse fields.
136 if (!FieldsContent.empty())
137 FieldsContent.split(A&: Element.Fields, Separator: ":");
138 else if (Content.back() == ':')
139 Element.Fields.push_back(Elt: FieldsContent);
140
141 return Element;
142 }
143}
144
145static MarkupNode textNode(StringRef Text) {
146 MarkupNode Node;
147 Node.Text = Text;
148 return Node;
149}
150
151// Parses a region of text known to be outside any markup elements. Such text
152// may still contain SGR control codes, so the region is further subdivided into
153// control codes and true text regions.
154void MarkupParser::parseTextOutsideMarkup(StringRef Text) {
155 if (Text.empty())
156 return;
157 SmallVector<StringRef> Matches;
158 while (SGRSyntax.match(String: Text, Matches: &Matches)) {
159 // Emit any text before the SGR element.
160 if (Matches.begin()->begin() != Text.begin())
161 Buffer.push_back(Elt: textNode(Text: takeTo(Str: Text, Pos: Matches.begin()->begin())));
162
163 Buffer.push_back(Elt: textNode(Text: *Matches.begin()));
164 advanceTo(Str&: Text, Pos: Matches.begin()->end());
165 }
166 if (!Text.empty())
167 Buffer.push_back(Elt: textNode(Text));
168}
169
170// Given that a line doesn't contain any valid markup, see if it ends with the
171// start of a multi-line element. If so, returns the beginning.
172std::optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) {
173 // A multi-line begin marker must be the last one on the line.
174 size_t BeginPos = Line.rfind(Str: "{{{");
175 if (BeginPos == StringRef::npos)
176 return std::nullopt;
177 size_t BeginTagPos = BeginPos + 3;
178
179 // If there are any end markers afterwards, the begin marker cannot belong to
180 // a multi-line element.
181 size_t EndPos = Line.find(Str: "}}}", From: BeginTagPos);
182 if (EndPos != StringRef::npos)
183 return std::nullopt;
184
185 // Check whether the tag is registered multi-line.
186 size_t EndTagPos = Line.find(C: ':', From: BeginTagPos);
187 if (EndTagPos == StringRef::npos)
188 return std::nullopt;
189 StringRef Tag = Line.slice(Start: BeginTagPos, End: EndTagPos);
190 if (!MultilineTags.contains(key: Tag))
191 return std::nullopt;
192 return Line.substr(Start: BeginPos);
193}
194
195// See if the line begins with the ending of an in-progress multi-line element.
196// If so, return the ending.
197std::optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) {
198 size_t EndPos = Line.find(Str: "}}}");
199 if (EndPos == StringRef::npos)
200 return std::nullopt;
201 return Line.take_front(N: EndPos + 3);
202}
203
204} // end namespace symbolize
205} // end namespace llvm
206