1//===- StringToOffsetTable.cpp - Emit a big concatenated string -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/TableGen/StringToOffsetTable.h"
10#include "llvm/Support/FormatVariadic.h"
11#include "llvm/Support/raw_ostream.h"
12#include "llvm/TableGen/Error.h"
13#include "llvm/TableGen/Main.h"
14
15using namespace llvm;
16
17unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str) {
18 auto [II, Inserted] = StringOffset.insert(KV: {Str, size()});
19 if (Inserted) {
20 // Add the string to the aggregate if this is the first time found.
21 AggregateString.append(first: Str.begin(), last: Str.end());
22 if (AppendZero)
23 AggregateString += '\0';
24 }
25
26 return II->second;
27}
28
29void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS,
30 const Twine &Name) const {
31 // This generates a `llvm::StringTable` which expects that entries are null
32 // terminated. So fail with an error if `AppendZero` is false.
33 if (!AppendZero)
34 PrintFatalError(Msg: "llvm::StringTable requires null terminated strings");
35
36 OS << formatv(Fmt: R"(
37#ifdef __GNUC__
38#pragma GCC diagnostic push
39#pragma GCC diagnostic ignored "-Woverlength-strings"
40#endif
41{} constexpr char {}{}Storage[] =)",
42 Vals: ClassPrefix.empty() ? "static" : "",
43 Vals: UsePrefixForStorageMember ? ClassPrefix : "", Vals: Name);
44
45 // MSVC silently miscompiles string literals longer than 64k in some
46 // circumstances. The build system sets EmitLongStrLiterals to false when it
47 // detects that it is targetting MSVC. When that option is false and the
48 // string table is longer than 64k, emit it as an array of character
49 // literals.
50 bool UseChars = !EmitLongStrLiterals && AggregateString.size() > (64 * 1024);
51 OS << (UseChars ? "{\n" : "\n");
52
53 ListSeparator LineSep(UseChars ? ",\n" : "\n");
54 SmallVector<StringRef> Strings(split(Str: AggregateString, Separator: '\0'));
55 // We should always have an empty string at the start, and because these are
56 // null terminators rather than separators, we'll have one at the end as
57 // well. Skip the end one.
58 assert(Strings.front().empty() && "Expected empty initial string!");
59 assert(Strings.back().empty() &&
60 "Expected empty string at the end due to terminators!");
61 Strings.pop_back();
62 for (StringRef Str : Strings) {
63 OS << LineSep << " ";
64 // If we can, just emit this as a string literal to be concatenated.
65 if (!UseChars) {
66 OS << "\"";
67 OS.write_escaped(Str);
68 OS << "\\0\"";
69 continue;
70 }
71
72 ListSeparator CharSep(", ");
73 for (char C : Str) {
74 OS << CharSep << "'";
75 OS.write_escaped(Str: StringRef(&C, 1));
76 OS << "'";
77 }
78 OS << CharSep << "'\\0'";
79 }
80 OS << LineSep << (UseChars ? "};" : " ;");
81
82 OS << formatv(Fmt: R"(
83#ifdef __GNUC__
84#pragma GCC diagnostic pop
85#endif
86
87{1} llvm::StringTable
88{2}{0} = {0}Storage;
89)",
90 Vals: Name, Vals: ClassPrefix.empty() ? "static constexpr" : "const",
91 Vals: ClassPrefix);
92}
93
94void StringToOffsetTable::EmitString(raw_ostream &O) const {
95 // Escape the string.
96 SmallString<256> EscapedStr;
97 raw_svector_ostream(EscapedStr).write_escaped(Str: AggregateString);
98
99 O << " \"";
100 unsigned CharsPrinted = 0;
101 for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) {
102 if (CharsPrinted > 70) {
103 O << "\"\n \"";
104 CharsPrinted = 0;
105 }
106 O << EscapedStr[i];
107 ++CharsPrinted;
108
109 // Print escape sequences all together.
110 if (EscapedStr[i] != '\\')
111 continue;
112
113 assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!");
114 if (isDigit(C: EscapedStr[i + 1])) {
115 assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) &&
116 "Expected 3 digit octal escape!");
117 O << EscapedStr[++i];
118 O << EscapedStr[++i];
119 O << EscapedStr[++i];
120 CharsPrinted += 3;
121 } else {
122 O << EscapedStr[++i];
123 ++CharsPrinted;
124 }
125 }
126 O << "\"";
127}
128