1 | //== HTMLRewrite.cpp - Translate source code into prettified HTML --*- C++ -*-// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the HTMLRewriter class, which is used to translate the |
10 | // text of a source file into prettified HTML. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "clang/Rewrite/Core/HTMLRewrite.h" |
15 | #include "clang/Basic/SourceManager.h" |
16 | #include "clang/Lex/Preprocessor.h" |
17 | #include "clang/Lex/TokenConcatenation.h" |
18 | #include "clang/Rewrite/Core/Rewriter.h" |
19 | #include "llvm/ADT/SmallString.h" |
20 | #include "llvm/Support/ErrorHandling.h" |
21 | #include "llvm/Support/MemoryBuffer.h" |
22 | #include "llvm/Support/raw_ostream.h" |
23 | #include <memory> |
24 | |
25 | using namespace clang; |
26 | using namespace llvm; |
27 | using namespace html; |
28 | |
29 | /// HighlightRange - Highlight a range in the source code with the specified |
30 | /// start/end tags. B/E must be in the same file. This ensures that |
31 | /// start/end tags are placed at the start/end of each line if the range is |
32 | /// multiline. |
33 | void html::HighlightRange(Rewriter &R, SourceLocation B, SourceLocation E, |
34 | const char *StartTag, const char *EndTag, |
35 | bool IsTokenRange) { |
36 | SourceManager &SM = R.getSourceMgr(); |
37 | B = SM.getExpansionLoc(Loc: B); |
38 | E = SM.getExpansionLoc(Loc: E); |
39 | FileID FID = SM.getFileID(SpellingLoc: B); |
40 | assert(SM.getFileID(E) == FID && "B/E not in the same file!" ); |
41 | |
42 | unsigned BOffset = SM.getFileOffset(SpellingLoc: B); |
43 | unsigned EOffset = SM.getFileOffset(SpellingLoc: E); |
44 | |
45 | // Include the whole end token in the range. |
46 | if (IsTokenRange) |
47 | EOffset += Lexer::MeasureTokenLength(Loc: E, SM: R.getSourceMgr(), LangOpts: R.getLangOpts()); |
48 | |
49 | bool Invalid = false; |
50 | const char *BufferStart = SM.getBufferData(FID, Invalid: &Invalid).data(); |
51 | if (Invalid) |
52 | return; |
53 | |
54 | HighlightRange(RB&: R.getEditBuffer(FID), B: BOffset, E: EOffset, |
55 | BufferStart, StartTag, EndTag); |
56 | } |
57 | |
58 | /// HighlightRange - This is the same as the above method, but takes |
59 | /// decomposed file locations. |
60 | void html::HighlightRange(RewriteBuffer &RB, unsigned B, unsigned E, |
61 | const char *BufferStart, |
62 | const char *StartTag, const char *EndTag) { |
63 | // Insert the tag at the absolute start/end of the range. |
64 | RB.InsertTextAfter(OrigOffset: B, Str: StartTag); |
65 | RB.InsertTextBefore(OrigOffset: E, Str: EndTag); |
66 | |
67 | // Scan the range to see if there is a \r or \n. If so, and if the line is |
68 | // not blank, insert tags on that line as well. |
69 | bool HadOpenTag = true; |
70 | |
71 | unsigned LastNonWhiteSpace = B; |
72 | for (unsigned i = B; i != E; ++i) { |
73 | switch (BufferStart[i]) { |
74 | case '\r': |
75 | case '\n': |
76 | // Okay, we found a newline in the range. If we have an open tag, we need |
77 | // to insert a close tag at the first non-whitespace before the newline. |
78 | if (HadOpenTag) |
79 | RB.InsertTextBefore(OrigOffset: LastNonWhiteSpace+1, Str: EndTag); |
80 | |
81 | // Instead of inserting an open tag immediately after the newline, we |
82 | // wait until we see a non-whitespace character. This prevents us from |
83 | // inserting tags around blank lines, and also allows the open tag to |
84 | // be put *after* whitespace on a non-blank line. |
85 | HadOpenTag = false; |
86 | break; |
87 | case '\0': |
88 | case ' ': |
89 | case '\t': |
90 | case '\f': |
91 | case '\v': |
92 | // Ignore whitespace. |
93 | break; |
94 | |
95 | default: |
96 | // If there is no tag open, do it now. |
97 | if (!HadOpenTag) { |
98 | RB.InsertTextAfter(OrigOffset: i, Str: StartTag); |
99 | HadOpenTag = true; |
100 | } |
101 | |
102 | // Remember this character. |
103 | LastNonWhiteSpace = i; |
104 | break; |
105 | } |
106 | } |
107 | } |
108 | |
109 | namespace clang::html { |
110 | struct RelexRewriteCache { |
111 | // These structs mimic input arguments of HighlightRange(). |
112 | struct Highlight { |
113 | SourceLocation B, E; |
114 | std::string StartTag, EndTag; |
115 | bool IsTokenRange; |
116 | }; |
117 | struct RawHighlight { |
118 | unsigned B, E; |
119 | std::string StartTag, EndTag; |
120 | }; |
121 | |
122 | // SmallVector isn't appropriate because these vectors are almost never small. |
123 | using HighlightList = std::vector<Highlight>; |
124 | using RawHighlightList = std::vector<RawHighlight>; |
125 | |
126 | DenseMap<FileID, RawHighlightList> SyntaxHighlights; |
127 | DenseMap<FileID, HighlightList> MacroHighlights; |
128 | }; |
129 | } // namespace clang::html |
130 | |
131 | html::RelexRewriteCacheRef html::instantiateRelexRewriteCache() { |
132 | return std::make_shared<RelexRewriteCache>(); |
133 | } |
134 | |
135 | void html::EscapeText(Rewriter &R, FileID FID, |
136 | bool EscapeSpaces, bool ReplaceTabs) { |
137 | |
138 | llvm::MemoryBufferRef Buf = R.getSourceMgr().getBufferOrFake(FID); |
139 | const char* C = Buf.getBufferStart(); |
140 | const char* FileEnd = Buf.getBufferEnd(); |
141 | |
142 | assert (C <= FileEnd); |
143 | |
144 | RewriteBuffer &RB = R.getEditBuffer(FID); |
145 | |
146 | unsigned ColNo = 0; |
147 | for (unsigned FilePos = 0; C != FileEnd ; ++C, ++FilePos) { |
148 | switch (*C) { |
149 | default: ++ColNo; break; |
150 | case '\n': |
151 | case '\r': |
152 | ColNo = 0; |
153 | break; |
154 | |
155 | case ' ': |
156 | if (EscapeSpaces) |
157 | RB.ReplaceText(OrigOffset: FilePos, OrigLength: 1, NewStr: " " ); |
158 | ++ColNo; |
159 | break; |
160 | case '\f': |
161 | RB.ReplaceText(OrigOffset: FilePos, OrigLength: 1, NewStr: "<hr>" ); |
162 | ColNo = 0; |
163 | break; |
164 | |
165 | case '\t': { |
166 | if (!ReplaceTabs) |
167 | break; |
168 | unsigned NumSpaces = 8-(ColNo&7); |
169 | if (EscapeSpaces) |
170 | RB.ReplaceText(OrigOffset: FilePos, OrigLength: 1, |
171 | NewStr: StringRef(" " |
172 | " " , 6*NumSpaces)); |
173 | else |
174 | RB.ReplaceText(OrigOffset: FilePos, OrigLength: 1, NewStr: StringRef(" " , NumSpaces)); |
175 | ColNo += NumSpaces; |
176 | break; |
177 | } |
178 | case '<': |
179 | RB.ReplaceText(OrigOffset: FilePos, OrigLength: 1, NewStr: "<" ); |
180 | ++ColNo; |
181 | break; |
182 | |
183 | case '>': |
184 | RB.ReplaceText(OrigOffset: FilePos, OrigLength: 1, NewStr: ">" ); |
185 | ++ColNo; |
186 | break; |
187 | |
188 | case '&': |
189 | RB.ReplaceText(OrigOffset: FilePos, OrigLength: 1, NewStr: "&" ); |
190 | ++ColNo; |
191 | break; |
192 | } |
193 | } |
194 | } |
195 | |
196 | std::string html::EscapeText(StringRef s, bool EscapeSpaces, bool ReplaceTabs) { |
197 | |
198 | unsigned len = s.size(); |
199 | std::string Str; |
200 | llvm::raw_string_ostream os(Str); |
201 | |
202 | for (unsigned i = 0 ; i < len; ++i) { |
203 | |
204 | char c = s[i]; |
205 | switch (c) { |
206 | default: |
207 | os << c; break; |
208 | |
209 | case ' ': |
210 | if (EscapeSpaces) os << " " ; |
211 | else os << ' '; |
212 | break; |
213 | |
214 | case '\t': |
215 | if (ReplaceTabs) { |
216 | if (EscapeSpaces) |
217 | for (unsigned i = 0; i < 4; ++i) |
218 | os << " " ; |
219 | else |
220 | for (unsigned i = 0; i < 4; ++i) |
221 | os << " " ; |
222 | } |
223 | else |
224 | os << c; |
225 | |
226 | break; |
227 | |
228 | case '<': os << "<" ; break; |
229 | case '>': os << ">" ; break; |
230 | case '&': os << "&" ; break; |
231 | } |
232 | } |
233 | |
234 | return Str; |
235 | } |
236 | |
237 | static void AddLineNumber(RewriteBuffer &RB, unsigned LineNo, |
238 | unsigned B, unsigned E) { |
239 | SmallString<256> Str; |
240 | llvm::raw_svector_ostream OS(Str); |
241 | |
242 | OS << "<tr class=\"codeline\" data-linenumber=\"" << LineNo << "\">" |
243 | << "<td class=\"num\" id=\"LN" << LineNo << "\">" << LineNo |
244 | << "</td><td class=\"line\">" ; |
245 | |
246 | if (B == E) { // Handle empty lines. |
247 | OS << " </td></tr>" ; |
248 | RB.InsertTextBefore(OrigOffset: B, Str: OS.str()); |
249 | } else { |
250 | RB.InsertTextBefore(OrigOffset: B, Str: OS.str()); |
251 | RB.InsertTextBefore(OrigOffset: E, Str: "</td></tr>" ); |
252 | } |
253 | } |
254 | |
255 | void html::AddLineNumbers(Rewriter& R, FileID FID) { |
256 | |
257 | llvm::MemoryBufferRef Buf = R.getSourceMgr().getBufferOrFake(FID); |
258 | const char* FileBeg = Buf.getBufferStart(); |
259 | const char* FileEnd = Buf.getBufferEnd(); |
260 | const char* C = FileBeg; |
261 | RewriteBuffer &RB = R.getEditBuffer(FID); |
262 | |
263 | assert (C <= FileEnd); |
264 | |
265 | unsigned LineNo = 0; |
266 | unsigned FilePos = 0; |
267 | |
268 | while (C != FileEnd) { |
269 | |
270 | ++LineNo; |
271 | unsigned LineStartPos = FilePos; |
272 | unsigned LineEndPos = FileEnd - FileBeg; |
273 | |
274 | assert (FilePos <= LineEndPos); |
275 | assert (C < FileEnd); |
276 | |
277 | // Scan until the newline (or end-of-file). |
278 | |
279 | while (C != FileEnd) { |
280 | char c = *C; |
281 | ++C; |
282 | |
283 | if (c == '\n') { |
284 | LineEndPos = FilePos++; |
285 | break; |
286 | } |
287 | |
288 | ++FilePos; |
289 | } |
290 | |
291 | AddLineNumber(RB, LineNo, B: LineStartPos, E: LineEndPos); |
292 | } |
293 | |
294 | // Add one big table tag that surrounds all of the code. |
295 | std::string s; |
296 | llvm::raw_string_ostream os(s); |
297 | os << "<table class=\"code\" data-fileid=\"" << FID.getHashValue() << "\">\n" ; |
298 | RB.InsertTextBefore(OrigOffset: 0, Str: os.str()); |
299 | RB.InsertTextAfter(OrigOffset: FileEnd - FileBeg, Str: "</table>" ); |
300 | } |
301 | |
302 | void html::(Rewriter &R, FileID FID, |
303 | StringRef title) { |
304 | |
305 | llvm::MemoryBufferRef Buf = R.getSourceMgr().getBufferOrFake(FID); |
306 | const char* FileStart = Buf.getBufferStart(); |
307 | const char* FileEnd = Buf.getBufferEnd(); |
308 | |
309 | SourceLocation StartLoc = R.getSourceMgr().getLocForStartOfFile(FID); |
310 | SourceLocation EndLoc = StartLoc.getLocWithOffset(Offset: FileEnd-FileStart); |
311 | |
312 | std::string s; |
313 | llvm::raw_string_ostream os(s); |
314 | os << "<!doctype html>\n" // Use HTML 5 doctype |
315 | "<html>\n<head>\n" ; |
316 | |
317 | if (!title.empty()) |
318 | os << "<title>" << html::EscapeText(s: title) << "</title>\n" ; |
319 | |
320 | os << R"<<<( |
321 | <style type="text/css"> |
322 | body { color:#000000; background-color:#ffffff } |
323 | body { font-family:Helvetica, sans-serif; font-size:10pt } |
324 | h1 { font-size:14pt } |
325 | .FileName { margin-top: 5px; margin-bottom: 5px; display: inline; } |
326 | .FileNav { margin-left: 5px; margin-right: 5px; display: inline; } |
327 | .FileNav a { text-decoration:none; font-size: larger; } |
328 | .divider { margin-top: 30px; margin-bottom: 30px; height: 15px; } |
329 | .divider { background-color: gray; } |
330 | .code { border-collapse:collapse; width:100%; } |
331 | .code { font-family: "Monospace", monospace; font-size:10pt } |
332 | .code { line-height: 1.2em } |
333 | .comment { color: green; font-style: oblique } |
334 | .keyword { color: blue } |
335 | .string_literal { color: red } |
336 | .directive { color: darkmagenta } |
337 | |
338 | /* Macros and variables could have pop-up notes hidden by default. |
339 | - Macro pop-up: expansion of the macro |
340 | - Variable pop-up: value (table) of the variable */ |
341 | .macro_popup, .variable_popup { display: none; } |
342 | |
343 | /* Pop-up appears on mouse-hover event. */ |
344 | .macro:hover .macro_popup, .variable:hover .variable_popup { |
345 | display: block; |
346 | padding: 2px; |
347 | -webkit-border-radius:5px; |
348 | -webkit-box-shadow:1px 1px 7px #000; |
349 | border-radius:5px; |
350 | box-shadow:1px 1px 7px #000; |
351 | position: absolute; |
352 | top: -1em; |
353 | left:10em; |
354 | z-index: 1 |
355 | } |
356 | |
357 | .macro_popup { |
358 | border: 2px solid red; |
359 | background-color:#FFF0F0; |
360 | font-weight: normal; |
361 | } |
362 | |
363 | .variable_popup { |
364 | border: 2px solid blue; |
365 | background-color:#F0F0FF; |
366 | font-weight: bold; |
367 | font-family: Helvetica, sans-serif; |
368 | font-size: 9pt; |
369 | } |
370 | |
371 | /* Pop-up notes needs a relative position as a base where they pops up. */ |
372 | .macro, .variable { |
373 | background-color: PaleGoldenRod; |
374 | position: relative; |
375 | } |
376 | .macro { color: DarkMagenta; } |
377 | |
378 | #tooltiphint { |
379 | position: fixed; |
380 | width: 50em; |
381 | margin-left: -25em; |
382 | left: 50%; |
383 | padding: 10px; |
384 | border: 1px solid #b0b0b0; |
385 | border-radius: 2px; |
386 | box-shadow: 1px 1px 7px black; |
387 | background-color: #c0c0c0; |
388 | z-index: 2; |
389 | } |
390 | |
391 | .num { width:2.5em; padding-right:2ex; background-color:#eeeeee } |
392 | .num { text-align:right; font-size:8pt } |
393 | .num { color:#444444 } |
394 | .line { padding-left: 1ex; border-left: 3px solid #ccc } |
395 | .line { white-space: pre } |
396 | .msg { -webkit-box-shadow:1px 1px 7px #000 } |
397 | .msg { box-shadow:1px 1px 7px #000 } |
398 | .msg { -webkit-border-radius:5px } |
399 | .msg { border-radius:5px } |
400 | .msg { font-family:Helvetica, sans-serif; font-size:8pt } |
401 | .msg { float:left } |
402 | .msg { position:relative } |
403 | .msg { padding:0.25em 1ex 0.25em 1ex } |
404 | .msg { margin-top:10px; margin-bottom:10px } |
405 | .msg { font-weight:bold } |
406 | .msg { max-width:60em; word-wrap: break-word; white-space: pre-wrap } |
407 | .msgT { padding:0x; spacing:0x } |
408 | .msgEvent { background-color:#fff8b4; color:#000000 } |
409 | .msgControl { background-color:#bbbbbb; color:#000000 } |
410 | .msgNote { background-color:#ddeeff; color:#000000 } |
411 | .mrange { background-color:#dfddf3 } |
412 | .mrange { border-bottom:1px solid #6F9DBE } |
413 | .PathIndex { font-weight: bold; padding:0px 5px; margin-right:5px; } |
414 | .PathIndex { -webkit-border-radius:8px } |
415 | .PathIndex { border-radius:8px } |
416 | .PathIndexEvent { background-color:#bfba87 } |
417 | .PathIndexControl { background-color:#8c8c8c } |
418 | .PathIndexPopUp { background-color: #879abc; } |
419 | .PathNav a { text-decoration:none; font-size: larger } |
420 | .CodeInsertionHint { font-weight: bold; background-color: #10dd10 } |
421 | .CodeRemovalHint { background-color:#de1010 } |
422 | .CodeRemovalHint { border-bottom:1px solid #6F9DBE } |
423 | .msg.selected{ background-color:orange !important; } |
424 | |
425 | table.simpletable { |
426 | padding: 5px; |
427 | font-size:12pt; |
428 | margin:20px; |
429 | border-collapse: collapse; border-spacing: 0px; |
430 | } |
431 | td.rowname { |
432 | text-align: right; |
433 | vertical-align: top; |
434 | font-weight: bold; |
435 | color:#444444; |
436 | padding-right:2ex; |
437 | } |
438 | |
439 | /* Hidden text. */ |
440 | input.spoilerhider + label { |
441 | cursor: pointer; |
442 | text-decoration: underline; |
443 | display: block; |
444 | } |
445 | input.spoilerhider { |
446 | display: none; |
447 | } |
448 | input.spoilerhider ~ .spoiler { |
449 | overflow: hidden; |
450 | margin: 10px auto 0; |
451 | height: 0; |
452 | opacity: 0; |
453 | } |
454 | input.spoilerhider:checked + label + .spoiler{ |
455 | height: auto; |
456 | opacity: 1; |
457 | } |
458 | </style> |
459 | </head> |
460 | <body>)<<<" ; |
461 | |
462 | // Generate header |
463 | R.InsertTextBefore(Loc: StartLoc, Str: os.str()); |
464 | // Generate footer |
465 | |
466 | R.InsertTextAfter(Loc: EndLoc, Str: "</body></html>\n" ); |
467 | } |
468 | |
469 | /// SyntaxHighlight - Relex the specified FileID and annotate the HTML with |
470 | /// information about keywords, macro expansions etc. This uses the macro |
471 | /// table state from the end of the file, so it won't be perfectly perfect, |
472 | /// but it will be reasonably close. |
473 | static void SyntaxHighlightImpl( |
474 | Rewriter &R, FileID FID, const Preprocessor &PP, |
475 | llvm::function_ref<void(RewriteBuffer &, unsigned, unsigned, const char *, |
476 | const char *, const char *)> |
477 | HighlightRangeCallback) { |
478 | |
479 | RewriteBuffer &RB = R.getEditBuffer(FID); |
480 | const SourceManager &SM = PP.getSourceManager(); |
481 | llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(FID); |
482 | const char *BufferStart = FromFile.getBuffer().data(); |
483 | |
484 | Lexer L(FID, FromFile, SM, PP.getLangOpts()); |
485 | |
486 | // Inform the preprocessor that we want to retain comments as tokens, so we |
487 | // can highlight them. |
488 | L.SetCommentRetentionState(true); |
489 | |
490 | // Lex all the tokens in raw mode, to avoid entering #includes or expanding |
491 | // macros. |
492 | Token Tok; |
493 | L.LexFromRawLexer(Result&: Tok); |
494 | |
495 | while (Tok.isNot(K: tok::eof)) { |
496 | // Since we are lexing unexpanded tokens, all tokens are from the main |
497 | // FileID. |
498 | unsigned TokOffs = SM.getFileOffset(SpellingLoc: Tok.getLocation()); |
499 | unsigned TokLen = Tok.getLength(); |
500 | switch (Tok.getKind()) { |
501 | default: break; |
502 | case tok::identifier: |
503 | llvm_unreachable("tok::identifier in raw lexing mode!" ); |
504 | case tok::raw_identifier: { |
505 | // Fill in Result.IdentifierInfo and update the token kind, |
506 | // looking up the identifier in the identifier table. |
507 | PP.LookUpIdentifierInfo(Identifier&: Tok); |
508 | |
509 | // If this is a pp-identifier, for a keyword, highlight it as such. |
510 | if (Tok.isNot(K: tok::identifier)) |
511 | HighlightRangeCallback(RB, TokOffs, TokOffs + TokLen, BufferStart, |
512 | "<span class='keyword'>" , "</span>" ); |
513 | break; |
514 | } |
515 | case tok::comment: |
516 | HighlightRangeCallback(RB, TokOffs, TokOffs + TokLen, BufferStart, |
517 | "<span class='comment'>" , "</span>" ); |
518 | break; |
519 | case tok::utf8_string_literal: |
520 | // Chop off the u part of u8 prefix |
521 | ++TokOffs; |
522 | --TokLen; |
523 | // FALL THROUGH to chop the 8 |
524 | [[fallthrough]]; |
525 | case tok::wide_string_literal: |
526 | case tok::utf16_string_literal: |
527 | case tok::utf32_string_literal: |
528 | // Chop off the L, u, U or 8 prefix |
529 | ++TokOffs; |
530 | --TokLen; |
531 | [[fallthrough]]; |
532 | case tok::string_literal: |
533 | // FIXME: Exclude the optional ud-suffix from the highlighted range. |
534 | HighlightRangeCallback(RB, TokOffs, TokOffs + TokLen, BufferStart, |
535 | "<span class='string_literal'>" , "</span>" ); |
536 | break; |
537 | case tok::hash: { |
538 | // If this is a preprocessor directive, all tokens to end of line are too. |
539 | if (!Tok.isAtStartOfLine()) |
540 | break; |
541 | |
542 | // Eat all of the tokens until we get to the next one at the start of |
543 | // line. |
544 | unsigned TokEnd = TokOffs+TokLen; |
545 | L.LexFromRawLexer(Result&: Tok); |
546 | while (!Tok.isAtStartOfLine() && Tok.isNot(K: tok::eof)) { |
547 | TokEnd = SM.getFileOffset(SpellingLoc: Tok.getLocation())+Tok.getLength(); |
548 | L.LexFromRawLexer(Result&: Tok); |
549 | } |
550 | |
551 | // Find end of line. This is a hack. |
552 | HighlightRangeCallback(RB, TokOffs, TokEnd, BufferStart, |
553 | "<span class='directive'>" , "</span>" ); |
554 | |
555 | // Don't skip the next token. |
556 | continue; |
557 | } |
558 | } |
559 | |
560 | L.LexFromRawLexer(Result&: Tok); |
561 | } |
562 | } |
563 | void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP, |
564 | RelexRewriteCacheRef Cache) { |
565 | RewriteBuffer &RB = R.getEditBuffer(FID); |
566 | const SourceManager &SM = PP.getSourceManager(); |
567 | llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(FID); |
568 | const char *BufferStart = FromFile.getBuffer().data(); |
569 | |
570 | if (Cache) { |
571 | auto CacheIt = Cache->SyntaxHighlights.find(Val: FID); |
572 | if (CacheIt != Cache->SyntaxHighlights.end()) { |
573 | for (const RelexRewriteCache::RawHighlight &H : CacheIt->second) { |
574 | HighlightRange(RB, B: H.B, E: H.E, BufferStart, StartTag: H.StartTag.data(), |
575 | EndTag: H.EndTag.data()); |
576 | } |
577 | return; |
578 | } |
579 | } |
580 | |
581 | // "Every time you would call HighlightRange, cache the inputs as well." |
582 | auto HighlightRangeCallback = [&](RewriteBuffer &RB, unsigned B, unsigned E, |
583 | const char *BufferStart, |
584 | const char *StartTag, const char *EndTag) { |
585 | HighlightRange(RB, B, E, BufferStart, StartTag, EndTag); |
586 | |
587 | if (Cache) |
588 | Cache->SyntaxHighlights[FID].push_back(x: {.B: B, .E: E, .StartTag: StartTag, .EndTag: EndTag}); |
589 | }; |
590 | |
591 | SyntaxHighlightImpl(R, FID, PP, HighlightRangeCallback); |
592 | } |
593 | |
594 | static void HighlightMacrosImpl( |
595 | Rewriter &R, FileID FID, const Preprocessor &PP, |
596 | llvm::function_ref<void(Rewriter &, SourceLocation, SourceLocation, |
597 | const char *, const char *, bool)> |
598 | HighlightRangeCallback) { |
599 | |
600 | // Re-lex the raw token stream into a token buffer. |
601 | const SourceManager &SM = PP.getSourceManager(); |
602 | std::vector<Token> TokenStream; |
603 | |
604 | llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(FID); |
605 | Lexer L(FID, FromFile, SM, PP.getLangOpts()); |
606 | |
607 | // Lex all the tokens in raw mode, to avoid entering #includes or expanding |
608 | // macros. |
609 | while (true) { |
610 | Token Tok; |
611 | L.LexFromRawLexer(Result&: Tok); |
612 | |
613 | // If this is a # at the start of a line, discard it from the token stream. |
614 | // We don't want the re-preprocess step to see #defines, #includes or other |
615 | // preprocessor directives. |
616 | if (Tok.is(K: tok::hash) && Tok.isAtStartOfLine()) |
617 | continue; |
618 | |
619 | // If this is a ## token, change its kind to unknown so that repreprocessing |
620 | // it will not produce an error. |
621 | if (Tok.is(K: tok::hashhash)) |
622 | Tok.setKind(tok::unknown); |
623 | |
624 | // If this raw token is an identifier, the raw lexer won't have looked up |
625 | // the corresponding identifier info for it. Do this now so that it will be |
626 | // macro expanded when we re-preprocess it. |
627 | if (Tok.is(K: tok::raw_identifier)) |
628 | PP.LookUpIdentifierInfo(Identifier&: Tok); |
629 | |
630 | TokenStream.push_back(x: Tok); |
631 | |
632 | if (Tok.is(K: tok::eof)) break; |
633 | } |
634 | |
635 | // Temporarily change the diagnostics object so that we ignore any generated |
636 | // diagnostics from this pass. |
637 | DiagnosticsEngine TmpDiags(PP.getDiagnostics().getDiagnosticIDs(), |
638 | &PP.getDiagnostics().getDiagnosticOptions(), |
639 | new IgnoringDiagConsumer); |
640 | |
641 | // FIXME: This is a huge hack; we reuse the input preprocessor because we want |
642 | // its state, but we aren't actually changing it (we hope). This should really |
643 | // construct a copy of the preprocessor. |
644 | Preprocessor &TmpPP = const_cast<Preprocessor&>(PP); |
645 | DiagnosticsEngine *OldDiags = &TmpPP.getDiagnostics(); |
646 | TmpPP.setDiagnostics(TmpDiags); |
647 | |
648 | // Inform the preprocessor that we don't want comments. |
649 | TmpPP.SetCommentRetentionState(KeepComments: false, KeepMacroComments: false); |
650 | |
651 | // We don't want pragmas either. Although we filtered out #pragma, removing |
652 | // _Pragma and __pragma is much harder. |
653 | bool PragmasPreviouslyEnabled = TmpPP.getPragmasEnabled(); |
654 | TmpPP.setPragmasEnabled(false); |
655 | |
656 | // Enter the tokens we just lexed. This will cause them to be macro expanded |
657 | // but won't enter sub-files (because we removed #'s). |
658 | TmpPP.EnterTokenStream(Toks: TokenStream, DisableMacroExpansion: false, /*IsReinject=*/false); |
659 | |
660 | TokenConcatenation ConcatInfo(TmpPP); |
661 | |
662 | // Lex all the tokens. |
663 | Token Tok; |
664 | TmpPP.Lex(Result&: Tok); |
665 | while (Tok.isNot(K: tok::eof)) { |
666 | // Ignore non-macro tokens. |
667 | if (!Tok.getLocation().isMacroID()) { |
668 | TmpPP.Lex(Result&: Tok); |
669 | continue; |
670 | } |
671 | |
672 | // Okay, we have the first token of a macro expansion: highlight the |
673 | // expansion by inserting a start tag before the macro expansion and |
674 | // end tag after it. |
675 | CharSourceRange LLoc = SM.getExpansionRange(Loc: Tok.getLocation()); |
676 | |
677 | // Ignore tokens whose instantiation location was not the main file. |
678 | if (SM.getFileID(SpellingLoc: LLoc.getBegin()) != FID) { |
679 | TmpPP.Lex(Result&: Tok); |
680 | continue; |
681 | } |
682 | |
683 | assert(SM.getFileID(LLoc.getEnd()) == FID && |
684 | "Start and end of expansion must be in the same ultimate file!" ); |
685 | |
686 | std::string Expansion = EscapeText(s: TmpPP.getSpelling(Tok)); |
687 | unsigned LineLen = Expansion.size(); |
688 | |
689 | Token PrevPrevTok; |
690 | Token PrevTok = Tok; |
691 | // Okay, eat this token, getting the next one. |
692 | TmpPP.Lex(Result&: Tok); |
693 | |
694 | // Skip all the rest of the tokens that are part of this macro |
695 | // instantiation. It would be really nice to pop up a window with all the |
696 | // spelling of the tokens or something. |
697 | while (!Tok.is(K: tok::eof) && |
698 | SM.getExpansionLoc(Loc: Tok.getLocation()) == LLoc.getBegin()) { |
699 | // Insert a newline if the macro expansion is getting large. |
700 | if (LineLen > 60) { |
701 | Expansion += "<br>" ; |
702 | LineLen = 0; |
703 | } |
704 | |
705 | LineLen -= Expansion.size(); |
706 | |
707 | // If the tokens were already space separated, or if they must be to avoid |
708 | // them being implicitly pasted, add a space between them. |
709 | if (Tok.hasLeadingSpace() || |
710 | ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok)) |
711 | Expansion += ' '; |
712 | |
713 | // Escape any special characters in the token text. |
714 | Expansion += EscapeText(s: TmpPP.getSpelling(Tok)); |
715 | LineLen += Expansion.size(); |
716 | |
717 | PrevPrevTok = PrevTok; |
718 | PrevTok = Tok; |
719 | TmpPP.Lex(Result&: Tok); |
720 | } |
721 | |
722 | // Insert the 'macro_popup' as the end tag, so that multi-line macros all |
723 | // get highlighted. |
724 | Expansion = "<span class='macro_popup'>" + Expansion + "</span></span>" ; |
725 | |
726 | HighlightRangeCallback(R, LLoc.getBegin(), LLoc.getEnd(), |
727 | "<span class='macro'>" , Expansion.c_str(), |
728 | LLoc.isTokenRange()); |
729 | } |
730 | |
731 | // Restore the preprocessor's old state. |
732 | TmpPP.setDiagnostics(*OldDiags); |
733 | TmpPP.setPragmasEnabled(PragmasPreviouslyEnabled); |
734 | } |
735 | |
736 | /// HighlightMacros - This uses the macro table state from the end of the |
737 | /// file, to re-expand macros and insert (into the HTML) information about the |
738 | /// macro expansions. This won't be perfectly perfect, but it will be |
739 | /// reasonably close. |
740 | void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor &PP, |
741 | RelexRewriteCacheRef Cache) { |
742 | if (Cache) { |
743 | auto CacheIt = Cache->MacroHighlights.find(Val: FID); |
744 | if (CacheIt != Cache->MacroHighlights.end()) { |
745 | for (const RelexRewriteCache::Highlight &H : CacheIt->second) { |
746 | HighlightRange(R, B: H.B, E: H.E, StartTag: H.StartTag.data(), EndTag: H.EndTag.data(), |
747 | IsTokenRange: H.IsTokenRange); |
748 | } |
749 | return; |
750 | } |
751 | } |
752 | |
753 | // "Every time you would call HighlightRange, cache the inputs as well." |
754 | auto HighlightRangeCallback = [&](Rewriter &R, SourceLocation B, |
755 | SourceLocation E, const char *StartTag, |
756 | const char *EndTag, bool isTokenRange) { |
757 | HighlightRange(R, B, E, StartTag, EndTag, IsTokenRange: isTokenRange); |
758 | |
759 | if (Cache) { |
760 | Cache->MacroHighlights[FID].push_back( |
761 | x: {.B: B, .E: E, .StartTag: StartTag, .EndTag: EndTag, .IsTokenRange: isTokenRange}); |
762 | } |
763 | }; |
764 | |
765 | HighlightMacrosImpl(R, FID, PP, HighlightRangeCallback); |
766 | } |
767 | |