1//===-- SequenceToOffsetTable.h - Compress similar sequences ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// SequenceToOffsetTable can be used to emit a number of sequences as one big
10// array. Uses the same memory when a sequence is a suffix of another.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H
15#define LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H
16
17#include "llvm/ADT/StringExtras.h"
18#include "llvm/Support/raw_ostream.h"
19#include "llvm/TableGen/Main.h"
20#include <algorithm>
21#include <cassert>
22#include <functional>
23#include <map>
24
25namespace llvm {
26
27inline void printChar(raw_ostream &OS, char C) {
28 unsigned char UC(C);
29 if (isAlnum(C: UC) || isPunct(C: UC)) {
30 OS << '\'';
31 if (C == '\\' || C == '\'')
32 OS << '\\';
33 OS << C << '\'';
34 } else {
35 OS << unsigned(UC);
36 }
37}
38
39/// SequenceToOffsetTable - Collect a number of terminated sequences of T.
40/// Compute the layout of a table that contains all the sequences, possibly by
41/// reusing entries.
42///
43/// @tparam SeqT The sequence container. (vector or string).
44/// @tparam Less A stable comparator for SeqT elements.
45template <typename SeqT, typename Less = std::less<typename SeqT::value_type>>
46class SequenceToOffsetTable {
47 typedef typename SeqT::value_type ElemT;
48
49 // Define a comparator for SeqT that sorts a suffix immediately before a
50 // sequence with that suffix.
51 struct SeqLess {
52 Less L;
53 bool operator()(const SeqT &A, const SeqT &B) const {
54 return std::lexicographical_compare(A.rbegin(), A.rend(), B.rbegin(),
55 B.rend(), L);
56 }
57 };
58
59 // Keep sequences ordered according to SeqLess so suffixes are easy to find.
60 // Map each sequence to its offset in the table.
61 typedef std::map<SeqT, unsigned, SeqLess> SeqMap;
62
63 // Sequences added so far, with suffixes removed.
64 SeqMap Seqs;
65
66 // Terminator element to be appended to each added sequence.
67 std::optional<ElemT> Terminator;
68
69 // True if `layout` method was called.
70 bool IsLaidOut = false;
71
72 // Entries in the final table, or 0 before layout was called.
73 unsigned Entries = 0;
74
75 // isSuffix - Returns true if A is a suffix of B.
76 static bool isSuffix(const SeqT &A, const SeqT &B) {
77 return A.size() <= B.size() && std::equal(A.rbegin(), A.rend(), B.rbegin());
78 }
79
80public:
81 explicit SequenceToOffsetTable(std::optional<ElemT> Terminator = ElemT())
82 : Terminator(Terminator) {}
83
84 /// add - Add a sequence to the table.
85 /// This must be called before layout().
86 void add(const SeqT &Seq) {
87 assert(!IsLaidOut && "Cannot call add() after layout()");
88 typename SeqMap::iterator I = Seqs.lower_bound(Seq);
89
90 // If SeqMap contains a sequence that has Seq as a suffix, I will be
91 // pointing to it.
92 if (I != Seqs.end() && isSuffix(A: Seq, B: I->first))
93 return;
94
95 I = Seqs.insert(I, {Seq, 0u});
96
97 // The entry before I may be a suffix of Seq that can now be erased.
98 if (I != Seqs.begin() && isSuffix(A: (--I)->first, B: Seq))
99 Seqs.erase(I);
100 }
101
102 bool empty() const { return Seqs.empty(); }
103
104 unsigned size() const {
105 assert(IsLaidOut && "Call layout() before size()");
106 return Entries;
107 }
108
109 /// layout - Computes the final table layout.
110 void layout() {
111 assert(!IsLaidOut && "Can only call layout() once");
112 IsLaidOut = true;
113
114 // Lay out the table in Seqs iteration order.
115 for (typename SeqMap::iterator I = Seqs.begin(), E = Seqs.end(); I != E;
116 ++I) {
117 I->second = Entries;
118 // Include space for a terminator.
119 Entries += I->first.size() + Terminator.has_value();
120 }
121 }
122
123 /// get - Returns the offset of Seq in the final table.
124 unsigned get(const SeqT &Seq) const {
125 assert(IsLaidOut && "Call layout() before get()");
126 typename SeqMap::const_iterator I = Seqs.lower_bound(Seq);
127 assert(I != Seqs.end() && isSuffix(Seq, I->first) &&
128 "get() called with sequence that wasn't added first");
129 return I->second + (I->first.size() - Seq.size());
130 }
131
132 /// `emitStringLiteralDef` - Print out the table as the body of an array
133 /// initializer, where each element is a C string literal terminated by
134 /// `\0`. Falls back to emitting a comma-separated integer list if
135 /// `EmitLongStrLiterals` is false
136 void emitStringLiteralDef(raw_ostream &OS, const Twine &Decl) const {
137 assert(IsLaidOut && "Call layout() before emitStringLiteralDef()");
138 if (!EmitLongStrLiterals) {
139 OS << Decl << " = {\n";
140 emit(OS, Print: printChar);
141 OS << " 0\n};\n\n";
142 return;
143 }
144
145 OS << "\n#ifdef __GNUC__\n"
146 << "#pragma GCC diagnostic push\n"
147 << "#pragma GCC diagnostic ignored \"-Woverlength-strings\"\n"
148 << "#endif\n"
149 << Decl << " = {\n";
150 for (const auto &[Seq, Offset] : Seqs) {
151 OS << " /* " << Offset << " */ \"";
152 OS.write_escaped(Str: Seq);
153 if (Terminator)
154 OS.write_escaped(Str: StringRef(&*Terminator, 1));
155 OS << "\"\n";
156 }
157 OS << "};\n"
158 << "#ifdef __GNUC__\n"
159 << "#pragma GCC diagnostic pop\n"
160 << "#endif\n\n";
161 }
162
163 /// emit - Print out the table as the body of an array initializer.
164 /// Use the Print function to print elements.
165 void emit(raw_ostream &OS, void (*Print)(raw_ostream &, ElemT)) const {
166 assert(IsLaidOut && "Call layout() before emit()");
167 for (const auto &[Seq, Offset] : Seqs) {
168 OS << " /* " << Offset << " */ ";
169 for (const ElemT &Element : Seq) {
170 Print(OS, Element);
171 OS << ", ";
172 }
173 if (Terminator) {
174 Print(OS, *Terminator);
175 OS << ',';
176 }
177 OS << '\n';
178 }
179
180 // Print a dummy element if the array would be empty otherwise.
181 if (!Entries) {
182 OS << " /* dummy */ ";
183 Print(OS, ElemT());
184 OS << '\n';
185 }
186 }
187};
188
189} // end namespace llvm
190
191#endif // LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H
192