| 1 | //===- StringToOffsetTable.cpp - Emit a big concatenated string -*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "llvm/TableGen/StringToOffsetTable.h" |
| 10 | #include "llvm/Support/FormatVariadic.h" |
| 11 | #include "llvm/Support/raw_ostream.h" |
| 12 | #include "llvm/TableGen/Main.h" |
| 13 | |
| 14 | using namespace llvm; |
| 15 | |
| 16 | unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str, |
| 17 | bool appendZero) { |
| 18 | auto [II, Inserted] = StringOffset.insert(KV: {Str, size()}); |
| 19 | if (Inserted) { |
| 20 | // Add the string to the aggregate if this is the first time found. |
| 21 | AggregateString.append(first: Str.begin(), last: Str.end()); |
| 22 | if (appendZero) |
| 23 | AggregateString += '\0'; |
| 24 | } |
| 25 | |
| 26 | return II->second; |
| 27 | } |
| 28 | |
| 29 | void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name, |
| 30 | const Twine &Indent) const { |
| 31 | OS << formatv(Fmt: R"( |
| 32 | #ifdef __GNUC__ |
| 33 | #pragma GCC diagnostic push |
| 34 | #pragma GCC diagnostic ignored "-Woverlength-strings" |
| 35 | #endif |
| 36 | {0}static constexpr char {1}Storage[] = )" , |
| 37 | Vals: Indent, Vals: Name); |
| 38 | |
| 39 | // MSVC silently miscompiles string literals longer than 64k in some |
| 40 | // circumstances. The build system sets EmitLongStrLiterals to false when it |
| 41 | // detects that it is targetting MSVC. When that option is false and the |
| 42 | // string table is longer than 64k, emit it as an array of character |
| 43 | // literals. |
| 44 | bool UseChars = !EmitLongStrLiterals && AggregateString.size() > (64 * 1024); |
| 45 | OS << (UseChars ? "{\n" : "\n" ); |
| 46 | |
| 47 | ListSeparator LineSep(UseChars ? ",\n" : "\n" ); |
| 48 | SmallVector<StringRef> Strings(split(Str: AggregateString, Separator: '\0')); |
| 49 | // We should always have an empty string at the start, and because these are |
| 50 | // null terminators rather than separators, we'll have one at the end as |
| 51 | // well. Skip the end one. |
| 52 | assert(Strings.front().empty() && "Expected empty initial string!" ); |
| 53 | assert(Strings.back().empty() && |
| 54 | "Expected empty string at the end due to terminators!" ); |
| 55 | Strings.pop_back(); |
| 56 | for (StringRef Str : Strings) { |
| 57 | OS << LineSep << Indent << " " ; |
| 58 | // If we can, just emit this as a string literal to be concatenated. |
| 59 | if (!UseChars) { |
| 60 | OS << "\"" ; |
| 61 | OS.write_escaped(Str); |
| 62 | OS << "\\0\"" ; |
| 63 | continue; |
| 64 | } |
| 65 | |
| 66 | ListSeparator CharSep(", " ); |
| 67 | for (char C : Str) { |
| 68 | OS << CharSep << "'" ; |
| 69 | OS.write_escaped(Str: StringRef(&C, 1)); |
| 70 | OS << "'" ; |
| 71 | } |
| 72 | OS << CharSep << "'\\0'" ; |
| 73 | } |
| 74 | OS << LineSep << Indent << (UseChars ? "};" : " ;" ); |
| 75 | |
| 76 | OS << formatv(Fmt: R"( |
| 77 | #ifdef __GNUC__ |
| 78 | #pragma GCC diagnostic pop |
| 79 | #endif |
| 80 | |
| 81 | {0}static constexpr llvm::StringTable {1} = |
| 82 | {0} {1}Storage; |
| 83 | )" , |
| 84 | Vals: Indent, Vals: Name); |
| 85 | } |
| 86 | |
| 87 | void StringToOffsetTable::EmitString(raw_ostream &O) const { |
| 88 | // Escape the string. |
| 89 | SmallString<256> EscapedStr; |
| 90 | raw_svector_ostream(EscapedStr).write_escaped(Str: AggregateString); |
| 91 | |
| 92 | O << " \"" ; |
| 93 | unsigned CharsPrinted = 0; |
| 94 | for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) { |
| 95 | if (CharsPrinted > 70) { |
| 96 | O << "\"\n \"" ; |
| 97 | CharsPrinted = 0; |
| 98 | } |
| 99 | O << EscapedStr[i]; |
| 100 | ++CharsPrinted; |
| 101 | |
| 102 | // Print escape sequences all together. |
| 103 | if (EscapedStr[i] != '\\') |
| 104 | continue; |
| 105 | |
| 106 | assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!" ); |
| 107 | if (isDigit(C: EscapedStr[i + 1])) { |
| 108 | assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) && |
| 109 | "Expected 3 digit octal escape!" ); |
| 110 | O << EscapedStr[++i]; |
| 111 | O << EscapedStr[++i]; |
| 112 | O << EscapedStr[++i]; |
| 113 | CharsPrinted += 3; |
| 114 | } else { |
| 115 | O << EscapedStr[++i]; |
| 116 | ++CharsPrinted; |
| 117 | } |
| 118 | } |
| 119 | O << "\"" ; |
| 120 | } |
| 121 | |