1//===- StringToOffsetTable.cpp - Emit a big concatenated string -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/TableGen/StringToOffsetTable.h"
10#include "llvm/Support/FormatVariadic.h"
11#include "llvm/Support/raw_ostream.h"
12#include "llvm/TableGen/Main.h"
13
14using namespace llvm;
15
16unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str,
17 bool appendZero) {
18 auto [II, Inserted] = StringOffset.insert(KV: {Str, size()});
19 if (Inserted) {
20 // Add the string to the aggregate if this is the first time found.
21 AggregateString.append(first: Str.begin(), last: Str.end());
22 if (appendZero)
23 AggregateString += '\0';
24 }
25
26 return II->second;
27}
28
29void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name,
30 const Twine &Indent) const {
31 OS << formatv(Fmt: R"(
32#ifdef __GNUC__
33#pragma GCC diagnostic push
34#pragma GCC diagnostic ignored "-Woverlength-strings"
35#endif
36{0}static constexpr char {1}Storage[] = )",
37 Vals: Indent, Vals: Name);
38
39 // MSVC silently miscompiles string literals longer than 64k in some
40 // circumstances. The build system sets EmitLongStrLiterals to false when it
41 // detects that it is targetting MSVC. When that option is false and the
42 // string table is longer than 64k, emit it as an array of character
43 // literals.
44 bool UseChars = !EmitLongStrLiterals && AggregateString.size() > (64 * 1024);
45 OS << (UseChars ? "{\n" : "\n");
46
47 ListSeparator LineSep(UseChars ? ",\n" : "\n");
48 SmallVector<StringRef> Strings(split(Str: AggregateString, Separator: '\0'));
49 // We should always have an empty string at the start, and because these are
50 // null terminators rather than separators, we'll have one at the end as
51 // well. Skip the end one.
52 assert(Strings.front().empty() && "Expected empty initial string!");
53 assert(Strings.back().empty() &&
54 "Expected empty string at the end due to terminators!");
55 Strings.pop_back();
56 for (StringRef Str : Strings) {
57 OS << LineSep << Indent << " ";
58 // If we can, just emit this as a string literal to be concatenated.
59 if (!UseChars) {
60 OS << "\"";
61 OS.write_escaped(Str);
62 OS << "\\0\"";
63 continue;
64 }
65
66 ListSeparator CharSep(", ");
67 for (char C : Str) {
68 OS << CharSep << "'";
69 OS.write_escaped(Str: StringRef(&C, 1));
70 OS << "'";
71 }
72 OS << CharSep << "'\\0'";
73 }
74 OS << LineSep << Indent << (UseChars ? "};" : " ;");
75
76 OS << formatv(Fmt: R"(
77#ifdef __GNUC__
78#pragma GCC diagnostic pop
79#endif
80
81{0}static constexpr llvm::StringTable {1} =
82{0} {1}Storage;
83)",
84 Vals: Indent, Vals: Name);
85}
86
87void StringToOffsetTable::EmitString(raw_ostream &O) const {
88 // Escape the string.
89 SmallString<256> EscapedStr;
90 raw_svector_ostream(EscapedStr).write_escaped(Str: AggregateString);
91
92 O << " \"";
93 unsigned CharsPrinted = 0;
94 for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) {
95 if (CharsPrinted > 70) {
96 O << "\"\n \"";
97 CharsPrinted = 0;
98 }
99 O << EscapedStr[i];
100 ++CharsPrinted;
101
102 // Print escape sequences all together.
103 if (EscapedStr[i] != '\\')
104 continue;
105
106 assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!");
107 if (isDigit(C: EscapedStr[i + 1])) {
108 assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) &&
109 "Expected 3 digit octal escape!");
110 O << EscapedStr[++i];
111 O << EscapedStr[++i];
112 O << EscapedStr[++i];
113 CharsPrinted += 3;
114 } else {
115 O << EscapedStr[++i];
116 ++CharsPrinted;
117 }
118 }
119 O << "\"";
120}
121