1//===-- X86InstrFoldTables.cpp - X86 Instruction Folding Tables -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 memory folding tables.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86InstrFoldTables.h"
14#include "X86InstrInfo.h"
15#include "llvm/ADT/STLExtras.h"
16#include <atomic>
17#include <vector>
18
19using namespace llvm;
20
21// These tables are sorted by their RegOp value allowing them to be binary
22// searched at runtime without the need for additional storage. The enum values
23// are currently emitted in X86GenInstrInfo.inc in alphabetical order. Which
24// makes sorting these tables a simple matter of alphabetizing the table.
25#include "X86GenFoldTables.inc"
26
27// Table to map instructions safe to broadcast using a different width from the
28// element width.
29static const X86FoldTableEntry BroadcastSizeTable2[] = {
30 { .KeyOp: X86::VANDNPDZ128rr, .DstOp: X86::VANDNPSZ128rmb, .Flags: TB_BCAST_SS },
31 { .KeyOp: X86::VANDNPDZ256rr, .DstOp: X86::VANDNPSZ256rmb, .Flags: TB_BCAST_SS },
32 { .KeyOp: X86::VANDNPDZrr, .DstOp: X86::VANDNPSZrmb, .Flags: TB_BCAST_SS },
33 { .KeyOp: X86::VANDNPSZ128rr, .DstOp: X86::VANDNPDZ128rmb, .Flags: TB_BCAST_SD },
34 { .KeyOp: X86::VANDNPSZ256rr, .DstOp: X86::VANDNPDZ256rmb, .Flags: TB_BCAST_SD },
35 { .KeyOp: X86::VANDNPSZrr, .DstOp: X86::VANDNPDZrmb, .Flags: TB_BCAST_SD },
36 { .KeyOp: X86::VANDPDZ128rr, .DstOp: X86::VANDPSZ128rmb, .Flags: TB_BCAST_SS },
37 { .KeyOp: X86::VANDPDZ256rr, .DstOp: X86::VANDPSZ256rmb, .Flags: TB_BCAST_SS },
38 { .KeyOp: X86::VANDPDZrr, .DstOp: X86::VANDPSZrmb, .Flags: TB_BCAST_SS },
39 { .KeyOp: X86::VANDPSZ128rr, .DstOp: X86::VANDPDZ128rmb, .Flags: TB_BCAST_SD },
40 { .KeyOp: X86::VANDPSZ256rr, .DstOp: X86::VANDPDZ256rmb, .Flags: TB_BCAST_SD },
41 { .KeyOp: X86::VANDPSZrr, .DstOp: X86::VANDPDZrmb, .Flags: TB_BCAST_SD },
42 { .KeyOp: X86::VORPDZ128rr, .DstOp: X86::VORPSZ128rmb, .Flags: TB_BCAST_SS },
43 { .KeyOp: X86::VORPDZ256rr, .DstOp: X86::VORPSZ256rmb, .Flags: TB_BCAST_SS },
44 { .KeyOp: X86::VORPDZrr, .DstOp: X86::VORPSZrmb, .Flags: TB_BCAST_SS },
45 { .KeyOp: X86::VORPSZ128rr, .DstOp: X86::VORPDZ128rmb, .Flags: TB_BCAST_SD },
46 { .KeyOp: X86::VORPSZ256rr, .DstOp: X86::VORPDZ256rmb, .Flags: TB_BCAST_SD },
47 { .KeyOp: X86::VORPSZrr, .DstOp: X86::VORPDZrmb, .Flags: TB_BCAST_SD },
48 { .KeyOp: X86::VPANDDZ128rr, .DstOp: X86::VPANDQZ128rmb, .Flags: TB_BCAST_Q },
49 { .KeyOp: X86::VPANDDZ256rr, .DstOp: X86::VPANDQZ256rmb, .Flags: TB_BCAST_Q },
50 { .KeyOp: X86::VPANDDZrr, .DstOp: X86::VPANDQZrmb, .Flags: TB_BCAST_Q },
51 { .KeyOp: X86::VPANDNDZ128rr, .DstOp: X86::VPANDNQZ128rmb, .Flags: TB_BCAST_Q },
52 { .KeyOp: X86::VPANDNDZ256rr, .DstOp: X86::VPANDNQZ256rmb, .Flags: TB_BCAST_Q },
53 { .KeyOp: X86::VPANDNDZrr, .DstOp: X86::VPANDNQZrmb, .Flags: TB_BCAST_Q },
54 { .KeyOp: X86::VPANDNQZ128rr, .DstOp: X86::VPANDNDZ128rmb, .Flags: TB_BCAST_D },
55 { .KeyOp: X86::VPANDNQZ256rr, .DstOp: X86::VPANDNDZ256rmb, .Flags: TB_BCAST_D },
56 { .KeyOp: X86::VPANDNQZrr, .DstOp: X86::VPANDNDZrmb, .Flags: TB_BCAST_D },
57 { .KeyOp: X86::VPANDQZ128rr, .DstOp: X86::VPANDDZ128rmb, .Flags: TB_BCAST_D },
58 { .KeyOp: X86::VPANDQZ256rr, .DstOp: X86::VPANDDZ256rmb, .Flags: TB_BCAST_D },
59 { .KeyOp: X86::VPANDQZrr, .DstOp: X86::VPANDDZrmb, .Flags: TB_BCAST_D },
60 { .KeyOp: X86::VPORDZ128rr, .DstOp: X86::VPORQZ128rmb, .Flags: TB_BCAST_Q },
61 { .KeyOp: X86::VPORDZ256rr, .DstOp: X86::VPORQZ256rmb, .Flags: TB_BCAST_Q },
62 { .KeyOp: X86::VPORDZrr, .DstOp: X86::VPORQZrmb, .Flags: TB_BCAST_Q },
63 { .KeyOp: X86::VPORQZ128rr, .DstOp: X86::VPORDZ128rmb, .Flags: TB_BCAST_D },
64 { .KeyOp: X86::VPORQZ256rr, .DstOp: X86::VPORDZ256rmb, .Flags: TB_BCAST_D },
65 { .KeyOp: X86::VPORQZrr, .DstOp: X86::VPORDZrmb, .Flags: TB_BCAST_D },
66 { .KeyOp: X86::VPXORDZ128rr, .DstOp: X86::VPXORQZ128rmb, .Flags: TB_BCAST_Q },
67 { .KeyOp: X86::VPXORDZ256rr, .DstOp: X86::VPXORQZ256rmb, .Flags: TB_BCAST_Q },
68 { .KeyOp: X86::VPXORDZrr, .DstOp: X86::VPXORQZrmb, .Flags: TB_BCAST_Q },
69 { .KeyOp: X86::VPXORQZ128rr, .DstOp: X86::VPXORDZ128rmb, .Flags: TB_BCAST_D },
70 { .KeyOp: X86::VPXORQZ256rr, .DstOp: X86::VPXORDZ256rmb, .Flags: TB_BCAST_D },
71 { .KeyOp: X86::VPXORQZrr, .DstOp: X86::VPXORDZrmb, .Flags: TB_BCAST_D },
72 { .KeyOp: X86::VXORPDZ128rr, .DstOp: X86::VXORPSZ128rmb, .Flags: TB_BCAST_SS },
73 { .KeyOp: X86::VXORPDZ256rr, .DstOp: X86::VXORPSZ256rmb, .Flags: TB_BCAST_SS },
74 { .KeyOp: X86::VXORPDZrr, .DstOp: X86::VXORPSZrmb, .Flags: TB_BCAST_SS },
75 { .KeyOp: X86::VXORPSZ128rr, .DstOp: X86::VXORPDZ128rmb, .Flags: TB_BCAST_SD },
76 { .KeyOp: X86::VXORPSZ256rr, .DstOp: X86::VXORPDZ256rmb, .Flags: TB_BCAST_SD },
77 { .KeyOp: X86::VXORPSZrr, .DstOp: X86::VXORPDZrmb, .Flags: TB_BCAST_SD },
78};
79
80static const X86FoldTableEntry BroadcastSizeTable3[] = {
81 { .KeyOp: X86::VPTERNLOGDZ128rri, .DstOp: X86::VPTERNLOGQZ128rmbi, .Flags: TB_BCAST_Q },
82 { .KeyOp: X86::VPTERNLOGDZ256rri, .DstOp: X86::VPTERNLOGQZ256rmbi, .Flags: TB_BCAST_Q },
83 { .KeyOp: X86::VPTERNLOGDZrri, .DstOp: X86::VPTERNLOGQZrmbi, .Flags: TB_BCAST_Q },
84 { .KeyOp: X86::VPTERNLOGQZ128rri, .DstOp: X86::VPTERNLOGDZ128rmbi, .Flags: TB_BCAST_D },
85 { .KeyOp: X86::VPTERNLOGQZ256rri, .DstOp: X86::VPTERNLOGDZ256rmbi, .Flags: TB_BCAST_D },
86 { .KeyOp: X86::VPTERNLOGQZrri, .DstOp: X86::VPTERNLOGDZrmbi, .Flags: TB_BCAST_D },
87};
88
89static const X86FoldTableEntry *
90lookupFoldTableImpl(ArrayRef<X86FoldTableEntry> Table, unsigned RegOp) {
91#ifndef NDEBUG
92#define CHECK_SORTED_UNIQUE(TABLE) \
93 assert(llvm::is_sorted(TABLE) && #TABLE " is not sorted"); \
94 assert(std::adjacent_find(std::begin(Table), std::end(Table)) == \
95 std::end(Table) && \
96 #TABLE " is not unique");
97
98 // Make sure the tables are sorted.
99 static std::atomic<bool> FoldTablesChecked(false);
100 if (!FoldTablesChecked.load(std::memory_order_relaxed)) {
101 CHECK_SORTED_UNIQUE(Table2Addr)
102 CHECK_SORTED_UNIQUE(Table0)
103 CHECK_SORTED_UNIQUE(Table1)
104 CHECK_SORTED_UNIQUE(Table2)
105 CHECK_SORTED_UNIQUE(Table3)
106 CHECK_SORTED_UNIQUE(Table4)
107 CHECK_SORTED_UNIQUE(BroadcastTable1)
108 CHECK_SORTED_UNIQUE(BroadcastTable2)
109 CHECK_SORTED_UNIQUE(BroadcastTable3)
110 CHECK_SORTED_UNIQUE(BroadcastTable4)
111 CHECK_SORTED_UNIQUE(BroadcastSizeTable2)
112 CHECK_SORTED_UNIQUE(BroadcastSizeTable3)
113 FoldTablesChecked.store(true, std::memory_order_relaxed);
114 }
115#endif
116
117 const X86FoldTableEntry *Data = llvm::lower_bound(Range&: Table, Value&: RegOp);
118 if (Data != Table.end() && Data->KeyOp == RegOp &&
119 !(Data->Flags & TB_NO_FORWARD))
120 return Data;
121 return nullptr;
122}
123
124const X86FoldTableEntry *llvm::lookupTwoAddrFoldTable(unsigned RegOp) {
125 return lookupFoldTableImpl(Table: Table2Addr, RegOp);
126}
127
128const X86FoldTableEntry *llvm::lookupFoldTable(unsigned RegOp, unsigned OpNum) {
129 ArrayRef<X86FoldTableEntry> FoldTable;
130 if (OpNum == 0)
131 FoldTable = ArrayRef(Table0);
132 else if (OpNum == 1)
133 FoldTable = ArrayRef(Table1);
134 else if (OpNum == 2)
135 FoldTable = ArrayRef(Table2);
136 else if (OpNum == 3)
137 FoldTable = ArrayRef(Table3);
138 else if (OpNum == 4)
139 FoldTable = ArrayRef(Table4);
140 else
141 return nullptr;
142
143 return lookupFoldTableImpl(Table: FoldTable, RegOp);
144}
145
146bool llvm::isNonFoldableWithSameMask(unsigned RegOp) {
147 // NonFoldableWithSameMask table stores instruction opcodes that are unsafe
148 // for masked-load folding when the same mask is used.
149 ArrayRef<unsigned> Table(NonFoldableWithSameMaskTable);
150 auto I = llvm::lower_bound(Range&: Table, Value&: RegOp);
151 return I != Table.end() && *I == RegOp;
152}
153
154const X86FoldTableEntry *llvm::lookupBroadcastFoldTable(unsigned RegOp,
155 unsigned OpNum) {
156 ArrayRef<X86FoldTableEntry> FoldTable;
157 if (OpNum == 1)
158 FoldTable = ArrayRef(BroadcastTable1);
159 else if (OpNum == 2)
160 FoldTable = ArrayRef(BroadcastTable2);
161 else if (OpNum == 3)
162 FoldTable = ArrayRef(BroadcastTable3);
163 else if (OpNum == 4)
164 FoldTable = ArrayRef(BroadcastTable4);
165 else
166 return nullptr;
167
168 return lookupFoldTableImpl(Table: FoldTable, RegOp);
169}
170
171namespace {
172
173// This class stores the memory unfolding tables. It is instantiated as a
174// function scope static variable to lazily init the unfolding table.
175struct X86MemUnfoldTable {
176 // Stores memory unfolding tables entries sorted by opcode.
177 std::vector<X86FoldTableEntry> Table;
178
179 X86MemUnfoldTable() {
180 for (const X86FoldTableEntry &Entry : Table2Addr)
181 // Index 0, folded load and store, no alignment requirement.
182 addTableEntry(Entry, ExtraFlags: TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
183
184 for (const X86FoldTableEntry &Entry : Table0)
185 // Index 0, mix of loads and stores.
186 addTableEntry(Entry, ExtraFlags: TB_INDEX_0);
187
188 for (const X86FoldTableEntry &Entry : Table1)
189 // Index 1, folded load
190 addTableEntry(Entry, ExtraFlags: TB_INDEX_1 | TB_FOLDED_LOAD);
191
192 for (const X86FoldTableEntry &Entry : Table2)
193 // Index 2, folded load
194 addTableEntry(Entry, ExtraFlags: TB_INDEX_2 | TB_FOLDED_LOAD);
195
196 for (const X86FoldTableEntry &Entry : Table3)
197 // Index 3, folded load
198 addTableEntry(Entry, ExtraFlags: TB_INDEX_3 | TB_FOLDED_LOAD);
199
200 for (const X86FoldTableEntry &Entry : Table4)
201 // Index 4, folded load
202 addTableEntry(Entry, ExtraFlags: TB_INDEX_4 | TB_FOLDED_LOAD);
203
204 // Broadcast tables.
205 for (const X86FoldTableEntry &Entry : BroadcastTable1)
206 // Index 1, folded broadcast
207 addTableEntry(Entry, ExtraFlags: TB_INDEX_1 | TB_FOLDED_LOAD);
208
209 for (const X86FoldTableEntry &Entry : BroadcastTable2)
210 // Index 2, folded broadcast
211 addTableEntry(Entry, ExtraFlags: TB_INDEX_2 | TB_FOLDED_LOAD);
212
213 for (const X86FoldTableEntry &Entry : BroadcastTable3)
214 // Index 3, folded broadcast
215 addTableEntry(Entry, ExtraFlags: TB_INDEX_3 | TB_FOLDED_LOAD);
216
217 for (const X86FoldTableEntry &Entry : BroadcastTable4)
218 // Index 4, folded broadcast
219 addTableEntry(Entry, ExtraFlags: TB_INDEX_4 | TB_FOLDED_LOAD);
220
221 // Sort the memory->reg unfold table.
222 array_pod_sort(Start: Table.begin(), End: Table.end());
223
224 // Now that it's sorted, ensure its unique.
225 assert(std::adjacent_find(Table.begin(), Table.end()) == Table.end() &&
226 "Memory unfolding table is not unique!");
227 }
228
229 void addTableEntry(const X86FoldTableEntry &Entry, uint16_t ExtraFlags) {
230 // NOTE: This swaps the KeyOp and DstOp in the table so we can sort it.
231 if ((Entry.Flags & TB_NO_REVERSE) == 0)
232 Table.push_back(x: {.KeyOp: Entry.DstOp, .DstOp: Entry.KeyOp,
233 .Flags: static_cast<uint16_t>(Entry.Flags | ExtraFlags)});
234 }
235};
236} // namespace
237
238const X86FoldTableEntry *llvm::lookupUnfoldTable(unsigned MemOp) {
239 static X86MemUnfoldTable MemUnfoldTable;
240 auto &Table = MemUnfoldTable.Table;
241 auto I = llvm::lower_bound(Range&: Table, Value&: MemOp);
242 if (I != Table.end() && I->KeyOp == MemOp)
243 return &*I;
244 return nullptr;
245}
246
247namespace {
248
249// This class stores the memory -> broadcast folding tables. It is instantiated
250// as a function scope static variable to lazily init the folding table.
251struct X86BroadcastFoldTable {
252 // Stores memory broadcast folding tables entries sorted by opcode.
253 std::vector<X86FoldTableEntry> Table;
254
255 X86BroadcastFoldTable() {
256 // Broadcast tables.
257 for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable2) {
258 unsigned RegOp = Reg2Bcst.KeyOp;
259 unsigned BcstOp = Reg2Bcst.DstOp;
260 if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, OpNum: 2)) {
261 unsigned MemOp = Reg2Mem->DstOp;
262 uint16_t Flags =
263 Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 | TB_FOLDED_LOAD;
264 Table.push_back(x: {.KeyOp: MemOp, .DstOp: BcstOp, .Flags: Flags});
265 }
266 }
267 for (const X86FoldTableEntry &Reg2Bcst : BroadcastSizeTable2) {
268 unsigned RegOp = Reg2Bcst.KeyOp;
269 unsigned BcstOp = Reg2Bcst.DstOp;
270 if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, OpNum: 2)) {
271 unsigned MemOp = Reg2Mem->DstOp;
272 uint16_t Flags =
273 Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 | TB_FOLDED_LOAD;
274 Table.push_back(x: {.KeyOp: MemOp, .DstOp: BcstOp, .Flags: Flags});
275 }
276 }
277
278 for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable3) {
279 unsigned RegOp = Reg2Bcst.KeyOp;
280 unsigned BcstOp = Reg2Bcst.DstOp;
281 if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, OpNum: 3)) {
282 unsigned MemOp = Reg2Mem->DstOp;
283 uint16_t Flags =
284 Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 | TB_FOLDED_LOAD;
285 Table.push_back(x: {.KeyOp: MemOp, .DstOp: BcstOp, .Flags: Flags});
286 }
287 }
288 for (const X86FoldTableEntry &Reg2Bcst : BroadcastSizeTable3) {
289 unsigned RegOp = Reg2Bcst.KeyOp;
290 unsigned BcstOp = Reg2Bcst.DstOp;
291 if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, OpNum: 3)) {
292 unsigned MemOp = Reg2Mem->DstOp;
293 uint16_t Flags =
294 Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 | TB_FOLDED_LOAD;
295 Table.push_back(x: {.KeyOp: MemOp, .DstOp: BcstOp, .Flags: Flags});
296 }
297 }
298
299 for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable4) {
300 unsigned RegOp = Reg2Bcst.KeyOp;
301 unsigned BcstOp = Reg2Bcst.DstOp;
302 if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, OpNum: 4)) {
303 unsigned MemOp = Reg2Mem->DstOp;
304 uint16_t Flags =
305 Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_4 | TB_FOLDED_LOAD;
306 Table.push_back(x: {.KeyOp: MemOp, .DstOp: BcstOp, .Flags: Flags});
307 }
308 }
309
310 // Sort the memory->broadcast fold table.
311 array_pod_sort(Start: Table.begin(), End: Table.end());
312 }
313};
314} // namespace
315
316bool llvm::matchBroadcastSize(const X86FoldTableEntry &Entry,
317 unsigned BroadcastBits) {
318 switch (Entry.Flags & TB_BCAST_MASK) {
319 case TB_BCAST_W:
320 case TB_BCAST_SH:
321 return BroadcastBits == 16;
322 case TB_BCAST_D:
323 case TB_BCAST_SS:
324 return BroadcastBits == 32;
325 case TB_BCAST_Q:
326 case TB_BCAST_SD:
327 return BroadcastBits == 64;
328 }
329 return false;
330}
331
332const X86FoldTableEntry *
333llvm::lookupBroadcastFoldTableBySize(unsigned MemOp, unsigned BroadcastBits) {
334 static X86BroadcastFoldTable BroadcastFoldTable;
335 auto &Table = BroadcastFoldTable.Table;
336 for (auto I = llvm::lower_bound(Range&: Table, Value&: MemOp);
337 I != Table.end() && I->KeyOp == MemOp; ++I) {
338 if (matchBroadcastSize(Entry: *I, BroadcastBits))
339 return &*I;
340 }
341 return nullptr;
342}
343