1 | //===- StringToOffsetTable.cpp - Emit a big concatenated string -*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/TableGen/StringToOffsetTable.h" |
10 | #include "llvm/Support/FormatVariadic.h" |
11 | #include "llvm/Support/raw_ostream.h" |
12 | #include "llvm/TableGen/Main.h" |
13 | |
14 | using namespace llvm; |
15 | |
16 | unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str, |
17 | bool appendZero) { |
18 | auto [II, Inserted] = StringOffset.insert(KV: {Str, size()}); |
19 | if (Inserted) { |
20 | // Add the string to the aggregate if this is the first time found. |
21 | AggregateString.append(first: Str.begin(), last: Str.end()); |
22 | if (appendZero) |
23 | AggregateString += '\0'; |
24 | } |
25 | |
26 | return II->second; |
27 | } |
28 | |
29 | void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name, |
30 | const Twine &Indent) const { |
31 | OS << formatv(Fmt: R"( |
32 | #ifdef __GNUC__ |
33 | #pragma GCC diagnostic push |
34 | #pragma GCC diagnostic ignored "-Woverlength-strings" |
35 | #endif |
36 | {0}static constexpr char {1}Storage[] = )" , |
37 | Vals: Indent, Vals: Name); |
38 | |
39 | // MSVC silently miscompiles string literals longer than 64k in some |
40 | // circumstances. The build system sets EmitLongStrLiterals to false when it |
41 | // detects that it is targetting MSVC. When that option is false and the |
42 | // string table is longer than 64k, emit it as an array of character |
43 | // literals. |
44 | bool UseChars = !EmitLongStrLiterals && AggregateString.size() > (64 * 1024); |
45 | OS << (UseChars ? "{\n" : "\n" ); |
46 | |
47 | ListSeparator LineSep(UseChars ? ",\n" : "\n" ); |
48 | SmallVector<StringRef> Strings(split(Str: AggregateString, Separator: '\0')); |
49 | // We should always have an empty string at the start, and because these are |
50 | // null terminators rather than separators, we'll have one at the end as |
51 | // well. Skip the end one. |
52 | assert(Strings.front().empty() && "Expected empty initial string!" ); |
53 | assert(Strings.back().empty() && |
54 | "Expected empty string at the end due to terminators!" ); |
55 | Strings.pop_back(); |
56 | for (StringRef Str : Strings) { |
57 | OS << LineSep << Indent << " " ; |
58 | // If we can, just emit this as a string literal to be concatenated. |
59 | if (!UseChars) { |
60 | OS << "\"" ; |
61 | OS.write_escaped(Str); |
62 | OS << "\\0\"" ; |
63 | continue; |
64 | } |
65 | |
66 | ListSeparator CharSep(", " ); |
67 | for (char C : Str) { |
68 | OS << CharSep << "'" ; |
69 | OS.write_escaped(Str: StringRef(&C, 1)); |
70 | OS << "'" ; |
71 | } |
72 | OS << CharSep << "'\\0'" ; |
73 | } |
74 | OS << LineSep << Indent << (UseChars ? "};" : " ;" ); |
75 | |
76 | OS << formatv(Fmt: R"( |
77 | #ifdef __GNUC__ |
78 | #pragma GCC diagnostic pop |
79 | #endif |
80 | |
81 | {0}static constexpr llvm::StringTable {1} = |
82 | {0} {1}Storage; |
83 | )" , |
84 | Vals: Indent, Vals: Name); |
85 | } |
86 | |
87 | void StringToOffsetTable::EmitString(raw_ostream &O) const { |
88 | // Escape the string. |
89 | SmallString<256> EscapedStr; |
90 | raw_svector_ostream(EscapedStr).write_escaped(Str: AggregateString); |
91 | |
92 | O << " \"" ; |
93 | unsigned CharsPrinted = 0; |
94 | for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) { |
95 | if (CharsPrinted > 70) { |
96 | O << "\"\n \"" ; |
97 | CharsPrinted = 0; |
98 | } |
99 | O << EscapedStr[i]; |
100 | ++CharsPrinted; |
101 | |
102 | // Print escape sequences all together. |
103 | if (EscapedStr[i] != '\\') |
104 | continue; |
105 | |
106 | assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!" ); |
107 | if (isDigit(C: EscapedStr[i + 1])) { |
108 | assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) && |
109 | "Expected 3 digit octal escape!" ); |
110 | O << EscapedStr[++i]; |
111 | O << EscapedStr[++i]; |
112 | O << EscapedStr[++i]; |
113 | CharsPrinted += 3; |
114 | } else { |
115 | O << EscapedStr[++i]; |
116 | ++CharsPrinted; |
117 | } |
118 | } |
119 | O << "\"" ; |
120 | } |
121 | |