1//===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This tablegen backend emits an efficient function to translate HTML named
10// character references to UTF-8 sequences.
11//
12//===----------------------------------------------------------------------===//
13
14#include "TableGenBackends.h"
15#include "llvm/ADT/SmallString.h"
16#include "llvm/Support/ConvertUTF.h"
17#include "llvm/TableGen/Error.h"
18#include "llvm/TableGen/Record.h"
19#include "llvm/TableGen/StringMatcher.h"
20#include "llvm/TableGen/TableGenBackend.h"
21#include <vector>
22
23using namespace llvm;
24
25/// Convert a code point to the corresponding UTF-8 sequence represented
26/// as a C string literal.
27///
28/// \returns true on success.
29static bool translateCodePointToUTF8(unsigned CodePoint,
30 SmallVectorImpl<char> &CLiteral) {
31 char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
32 char *TranslatedPtr = Translated;
33 if (!ConvertCodePointToUTF8(Source: CodePoint, ResultPtr&: TranslatedPtr))
34 return false;
35
36 StringRef UTF8(Translated, TranslatedPtr - Translated);
37
38 raw_svector_ostream OS(CLiteral);
39 OS << "\"";
40 for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
41 OS << "\\x";
42 OS.write_hex(N: static_cast<unsigned char>(UTF8[i]));
43 }
44 OS << "\"";
45
46 return true;
47}
48
49void clang::EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
50 raw_ostream &OS) {
51 std::vector<Record *> Tags = Records.getAllDerivedDefinitions(ClassName: "NCR");
52 std::vector<StringMatcher::StringPair> NameToUTF8;
53 SmallString<32> CLiteral;
54 for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
55 I != E; ++I) {
56 Record &Tag = **I;
57 std::string Spelling = std::string(Tag.getValueAsString(FieldName: "Spelling"));
58 uint64_t CodePoint = Tag.getValueAsInt(FieldName: "CodePoint");
59 CLiteral.clear();
60 CLiteral.append(RHS: "return ");
61 if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
62 SrcMgr.PrintMessage(Loc: Tag.getLoc().front(),
63 Kind: SourceMgr::DK_Error,
64 Msg: Twine("invalid code point"));
65 continue;
66 }
67 CLiteral.append(RHS: ";");
68
69 StringMatcher::StringPair Match(Spelling, std::string(CLiteral));
70 NameToUTF8.push_back(x: Match);
71 }
72
73 emitSourceFileHeader(Desc: "HTML named character reference to UTF-8 translation",
74 OS, Record: Records);
75
76 OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
77 " StringRef Name) {\n";
78 StringMatcher("Name", NameToUTF8, OS).Emit();
79 OS << " return StringRef();\n"
80 << "}\n\n";
81}
82