1 | //===-- X86InstrFoldTables.cpp - X86 Instruction Folding Tables -----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the X86 memory folding tables. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "X86InstrFoldTables.h" |
14 | #include "X86InstrInfo.h" |
15 | #include "llvm/ADT/STLExtras.h" |
16 | #include <atomic> |
17 | #include <vector> |
18 | |
19 | using namespace llvm; |
20 | |
21 | // These tables are sorted by their RegOp value allowing them to be binary |
22 | // searched at runtime without the need for additional storage. The enum values |
23 | // are currently emitted in X86GenInstrInfo.inc in alphabetical order. Which |
24 | // makes sorting these tables a simple matter of alphabetizing the table. |
25 | #include "X86GenFoldTables.inc" |
26 | |
27 | // Table to map instructions safe to broadcast using a different width from the |
28 | // element width. |
29 | static const X86FoldTableEntry BroadcastSizeTable2[] = { |
30 | { .KeyOp: X86::VANDNPDZ128rr, .DstOp: X86::VANDNPSZ128rmb, .Flags: TB_BCAST_SS }, |
31 | { .KeyOp: X86::VANDNPDZ256rr, .DstOp: X86::VANDNPSZ256rmb, .Flags: TB_BCAST_SS }, |
32 | { .KeyOp: X86::VANDNPDZrr, .DstOp: X86::VANDNPSZrmb, .Flags: TB_BCAST_SS }, |
33 | { .KeyOp: X86::VANDNPSZ128rr, .DstOp: X86::VANDNPDZ128rmb, .Flags: TB_BCAST_SD }, |
34 | { .KeyOp: X86::VANDNPSZ256rr, .DstOp: X86::VANDNPDZ256rmb, .Flags: TB_BCAST_SD }, |
35 | { .KeyOp: X86::VANDNPSZrr, .DstOp: X86::VANDNPDZrmb, .Flags: TB_BCAST_SD }, |
36 | { .KeyOp: X86::VANDPDZ128rr, .DstOp: X86::VANDPSZ128rmb, .Flags: TB_BCAST_SS }, |
37 | { .KeyOp: X86::VANDPDZ256rr, .DstOp: X86::VANDPSZ256rmb, .Flags: TB_BCAST_SS }, |
38 | { .KeyOp: X86::VANDPDZrr, .DstOp: X86::VANDPSZrmb, .Flags: TB_BCAST_SS }, |
39 | { .KeyOp: X86::VANDPSZ128rr, .DstOp: X86::VANDPDZ128rmb, .Flags: TB_BCAST_SD }, |
40 | { .KeyOp: X86::VANDPSZ256rr, .DstOp: X86::VANDPDZ256rmb, .Flags: TB_BCAST_SD }, |
41 | { .KeyOp: X86::VANDPSZrr, .DstOp: X86::VANDPDZrmb, .Flags: TB_BCAST_SD }, |
42 | { .KeyOp: X86::VORPDZ128rr, .DstOp: X86::VORPSZ128rmb, .Flags: TB_BCAST_SS }, |
43 | { .KeyOp: X86::VORPDZ256rr, .DstOp: X86::VORPSZ256rmb, .Flags: TB_BCAST_SS }, |
44 | { .KeyOp: X86::VORPDZrr, .DstOp: X86::VORPSZrmb, .Flags: TB_BCAST_SS }, |
45 | { .KeyOp: X86::VORPSZ128rr, .DstOp: X86::VORPDZ128rmb, .Flags: TB_BCAST_SD }, |
46 | { .KeyOp: X86::VORPSZ256rr, .DstOp: X86::VORPDZ256rmb, .Flags: TB_BCAST_SD }, |
47 | { .KeyOp: X86::VORPSZrr, .DstOp: X86::VORPDZrmb, .Flags: TB_BCAST_SD }, |
48 | { .KeyOp: X86::VPANDDZ128rr, .DstOp: X86::VPANDQZ128rmb, .Flags: TB_BCAST_Q }, |
49 | { .KeyOp: X86::VPANDDZ256rr, .DstOp: X86::VPANDQZ256rmb, .Flags: TB_BCAST_Q }, |
50 | { .KeyOp: X86::VPANDDZrr, .DstOp: X86::VPANDQZrmb, .Flags: TB_BCAST_Q }, |
51 | { .KeyOp: X86::VPANDNDZ128rr, .DstOp: X86::VPANDNQZ128rmb, .Flags: TB_BCAST_Q }, |
52 | { .KeyOp: X86::VPANDNDZ256rr, .DstOp: X86::VPANDNQZ256rmb, .Flags: TB_BCAST_Q }, |
53 | { .KeyOp: X86::VPANDNDZrr, .DstOp: X86::VPANDNQZrmb, .Flags: TB_BCAST_Q }, |
54 | { .KeyOp: X86::VPANDNQZ128rr, .DstOp: X86::VPANDNDZ128rmb, .Flags: TB_BCAST_D }, |
55 | { .KeyOp: X86::VPANDNQZ256rr, .DstOp: X86::VPANDNDZ256rmb, .Flags: TB_BCAST_D }, |
56 | { .KeyOp: X86::VPANDNQZrr, .DstOp: X86::VPANDNDZrmb, .Flags: TB_BCAST_D }, |
57 | { .KeyOp: X86::VPANDQZ128rr, .DstOp: X86::VPANDDZ128rmb, .Flags: TB_BCAST_D }, |
58 | { .KeyOp: X86::VPANDQZ256rr, .DstOp: X86::VPANDDZ256rmb, .Flags: TB_BCAST_D }, |
59 | { .KeyOp: X86::VPANDQZrr, .DstOp: X86::VPANDDZrmb, .Flags: TB_BCAST_D }, |
60 | { .KeyOp: X86::VPORDZ128rr, .DstOp: X86::VPORQZ128rmb, .Flags: TB_BCAST_Q }, |
61 | { .KeyOp: X86::VPORDZ256rr, .DstOp: X86::VPORQZ256rmb, .Flags: TB_BCAST_Q }, |
62 | { .KeyOp: X86::VPORDZrr, .DstOp: X86::VPORQZrmb, .Flags: TB_BCAST_Q }, |
63 | { .KeyOp: X86::VPORQZ128rr, .DstOp: X86::VPORDZ128rmb, .Flags: TB_BCAST_D }, |
64 | { .KeyOp: X86::VPORQZ256rr, .DstOp: X86::VPORDZ256rmb, .Flags: TB_BCAST_D }, |
65 | { .KeyOp: X86::VPORQZrr, .DstOp: X86::VPORDZrmb, .Flags: TB_BCAST_D }, |
66 | { .KeyOp: X86::VPXORDZ128rr, .DstOp: X86::VPXORQZ128rmb, .Flags: TB_BCAST_Q }, |
67 | { .KeyOp: X86::VPXORDZ256rr, .DstOp: X86::VPXORQZ256rmb, .Flags: TB_BCAST_Q }, |
68 | { .KeyOp: X86::VPXORDZrr, .DstOp: X86::VPXORQZrmb, .Flags: TB_BCAST_Q }, |
69 | { .KeyOp: X86::VPXORQZ128rr, .DstOp: X86::VPXORDZ128rmb, .Flags: TB_BCAST_D }, |
70 | { .KeyOp: X86::VPXORQZ256rr, .DstOp: X86::VPXORDZ256rmb, .Flags: TB_BCAST_D }, |
71 | { .KeyOp: X86::VPXORQZrr, .DstOp: X86::VPXORDZrmb, .Flags: TB_BCAST_D }, |
72 | { .KeyOp: X86::VXORPDZ128rr, .DstOp: X86::VXORPSZ128rmb, .Flags: TB_BCAST_SS }, |
73 | { .KeyOp: X86::VXORPDZ256rr, .DstOp: X86::VXORPSZ256rmb, .Flags: TB_BCAST_SS }, |
74 | { .KeyOp: X86::VXORPDZrr, .DstOp: X86::VXORPSZrmb, .Flags: TB_BCAST_SS }, |
75 | { .KeyOp: X86::VXORPSZ128rr, .DstOp: X86::VXORPDZ128rmb, .Flags: TB_BCAST_SD }, |
76 | { .KeyOp: X86::VXORPSZ256rr, .DstOp: X86::VXORPDZ256rmb, .Flags: TB_BCAST_SD }, |
77 | { .KeyOp: X86::VXORPSZrr, .DstOp: X86::VXORPDZrmb, .Flags: TB_BCAST_SD }, |
78 | }; |
79 | |
80 | static const X86FoldTableEntry BroadcastSizeTable3[] = { |
81 | { .KeyOp: X86::VPTERNLOGDZ128rri, .DstOp: X86::VPTERNLOGQZ128rmbi, .Flags: TB_BCAST_Q }, |
82 | { .KeyOp: X86::VPTERNLOGDZ256rri, .DstOp: X86::VPTERNLOGQZ256rmbi, .Flags: TB_BCAST_Q }, |
83 | { .KeyOp: X86::VPTERNLOGDZrri, .DstOp: X86::VPTERNLOGQZrmbi, .Flags: TB_BCAST_Q }, |
84 | { .KeyOp: X86::VPTERNLOGQZ128rri, .DstOp: X86::VPTERNLOGDZ128rmbi, .Flags: TB_BCAST_D }, |
85 | { .KeyOp: X86::VPTERNLOGQZ256rri, .DstOp: X86::VPTERNLOGDZ256rmbi, .Flags: TB_BCAST_D }, |
86 | { .KeyOp: X86::VPTERNLOGQZrri, .DstOp: X86::VPTERNLOGDZrmbi, .Flags: TB_BCAST_D }, |
87 | }; |
88 | |
89 | static const X86FoldTableEntry * |
90 | lookupFoldTableImpl(ArrayRef<X86FoldTableEntry> Table, unsigned RegOp) { |
91 | #ifndef NDEBUG |
92 | #define CHECK_SORTED_UNIQUE(TABLE) \ |
93 | assert(llvm::is_sorted(TABLE) && #TABLE " is not sorted"); \ |
94 | assert(std::adjacent_find(std::begin(Table), std::end(Table)) == \ |
95 | std::end(Table) && \ |
96 | #TABLE " is not unique"); |
97 | |
98 | // Make sure the tables are sorted. |
99 | static std::atomic<bool> FoldTablesChecked(false); |
100 | if (!FoldTablesChecked.load(std::memory_order_relaxed)) { |
101 | CHECK_SORTED_UNIQUE(Table2Addr) |
102 | CHECK_SORTED_UNIQUE(Table0) |
103 | CHECK_SORTED_UNIQUE(Table1) |
104 | CHECK_SORTED_UNIQUE(Table2) |
105 | CHECK_SORTED_UNIQUE(Table3) |
106 | CHECK_SORTED_UNIQUE(Table4) |
107 | CHECK_SORTED_UNIQUE(BroadcastTable1) |
108 | CHECK_SORTED_UNIQUE(BroadcastTable2) |
109 | CHECK_SORTED_UNIQUE(BroadcastTable3) |
110 | CHECK_SORTED_UNIQUE(BroadcastTable4) |
111 | CHECK_SORTED_UNIQUE(BroadcastSizeTable2) |
112 | CHECK_SORTED_UNIQUE(BroadcastSizeTable3) |
113 | FoldTablesChecked.store(true, std::memory_order_relaxed); |
114 | } |
115 | #endif |
116 | |
117 | const X86FoldTableEntry *Data = llvm::lower_bound(Range&: Table, Value&: RegOp); |
118 | if (Data != Table.end() && Data->KeyOp == RegOp && |
119 | !(Data->Flags & TB_NO_FORWARD)) |
120 | return Data; |
121 | return nullptr; |
122 | } |
123 | |
124 | const X86FoldTableEntry *llvm::lookupTwoAddrFoldTable(unsigned RegOp) { |
125 | return lookupFoldTableImpl(Table: Table2Addr, RegOp); |
126 | } |
127 | |
128 | const X86FoldTableEntry *llvm::lookupFoldTable(unsigned RegOp, unsigned OpNum) { |
129 | ArrayRef<X86FoldTableEntry> FoldTable; |
130 | if (OpNum == 0) |
131 | FoldTable = ArrayRef(Table0); |
132 | else if (OpNum == 1) |
133 | FoldTable = ArrayRef(Table1); |
134 | else if (OpNum == 2) |
135 | FoldTable = ArrayRef(Table2); |
136 | else if (OpNum == 3) |
137 | FoldTable = ArrayRef(Table3); |
138 | else if (OpNum == 4) |
139 | FoldTable = ArrayRef(Table4); |
140 | else |
141 | return nullptr; |
142 | |
143 | return lookupFoldTableImpl(Table: FoldTable, RegOp); |
144 | } |
145 | |
146 | const X86FoldTableEntry *llvm::lookupBroadcastFoldTable(unsigned RegOp, |
147 | unsigned OpNum) { |
148 | ArrayRef<X86FoldTableEntry> FoldTable; |
149 | if (OpNum == 1) |
150 | FoldTable = ArrayRef(BroadcastTable1); |
151 | else if (OpNum == 2) |
152 | FoldTable = ArrayRef(BroadcastTable2); |
153 | else if (OpNum == 3) |
154 | FoldTable = ArrayRef(BroadcastTable3); |
155 | else if (OpNum == 4) |
156 | FoldTable = ArrayRef(BroadcastTable4); |
157 | else |
158 | return nullptr; |
159 | |
160 | return lookupFoldTableImpl(Table: FoldTable, RegOp); |
161 | } |
162 | |
163 | namespace { |
164 | |
165 | // This class stores the memory unfolding tables. It is instantiated as a |
166 | // function scope static variable to lazily init the unfolding table. |
167 | struct X86MemUnfoldTable { |
168 | // Stores memory unfolding tables entries sorted by opcode. |
169 | std::vector<X86FoldTableEntry> Table; |
170 | |
171 | X86MemUnfoldTable() { |
172 | for (const X86FoldTableEntry &Entry : Table2Addr) |
173 | // Index 0, folded load and store, no alignment requirement. |
174 | addTableEntry(Entry, ExtraFlags: TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE); |
175 | |
176 | for (const X86FoldTableEntry &Entry : Table0) |
177 | // Index 0, mix of loads and stores. |
178 | addTableEntry(Entry, ExtraFlags: TB_INDEX_0); |
179 | |
180 | for (const X86FoldTableEntry &Entry : Table1) |
181 | // Index 1, folded load |
182 | addTableEntry(Entry, ExtraFlags: TB_INDEX_1 | TB_FOLDED_LOAD); |
183 | |
184 | for (const X86FoldTableEntry &Entry : Table2) |
185 | // Index 2, folded load |
186 | addTableEntry(Entry, ExtraFlags: TB_INDEX_2 | TB_FOLDED_LOAD); |
187 | |
188 | for (const X86FoldTableEntry &Entry : Table3) |
189 | // Index 3, folded load |
190 | addTableEntry(Entry, ExtraFlags: TB_INDEX_3 | TB_FOLDED_LOAD); |
191 | |
192 | for (const X86FoldTableEntry &Entry : Table4) |
193 | // Index 4, folded load |
194 | addTableEntry(Entry, ExtraFlags: TB_INDEX_4 | TB_FOLDED_LOAD); |
195 | |
196 | // Broadcast tables. |
197 | for (const X86FoldTableEntry &Entry : BroadcastTable1) |
198 | // Index 1, folded broadcast |
199 | addTableEntry(Entry, ExtraFlags: TB_INDEX_1 | TB_FOLDED_LOAD); |
200 | |
201 | for (const X86FoldTableEntry &Entry : BroadcastTable2) |
202 | // Index 2, folded broadcast |
203 | addTableEntry(Entry, ExtraFlags: TB_INDEX_2 | TB_FOLDED_LOAD); |
204 | |
205 | for (const X86FoldTableEntry &Entry : BroadcastTable3) |
206 | // Index 3, folded broadcast |
207 | addTableEntry(Entry, ExtraFlags: TB_INDEX_3 | TB_FOLDED_LOAD); |
208 | |
209 | for (const X86FoldTableEntry &Entry : BroadcastTable4) |
210 | // Index 4, folded broadcast |
211 | addTableEntry(Entry, ExtraFlags: TB_INDEX_4 | TB_FOLDED_LOAD); |
212 | |
213 | // Sort the memory->reg unfold table. |
214 | array_pod_sort(Start: Table.begin(), End: Table.end()); |
215 | |
216 | // Now that it's sorted, ensure its unique. |
217 | assert(std::adjacent_find(Table.begin(), Table.end()) == Table.end() && |
218 | "Memory unfolding table is not unique!" ); |
219 | } |
220 | |
221 | void addTableEntry(const X86FoldTableEntry &Entry, uint16_t ) { |
222 | // NOTE: This swaps the KeyOp and DstOp in the table so we can sort it. |
223 | if ((Entry.Flags & TB_NO_REVERSE) == 0) |
224 | Table.push_back(x: {.KeyOp: Entry.DstOp, .DstOp: Entry.KeyOp, |
225 | .Flags: static_cast<uint16_t>(Entry.Flags | ExtraFlags)}); |
226 | } |
227 | }; |
228 | } // namespace |
229 | |
230 | const X86FoldTableEntry *llvm::lookupUnfoldTable(unsigned MemOp) { |
231 | static X86MemUnfoldTable MemUnfoldTable; |
232 | auto &Table = MemUnfoldTable.Table; |
233 | auto I = llvm::lower_bound(Range&: Table, Value&: MemOp); |
234 | if (I != Table.end() && I->KeyOp == MemOp) |
235 | return &*I; |
236 | return nullptr; |
237 | } |
238 | |
239 | namespace { |
240 | |
241 | // This class stores the memory -> broadcast folding tables. It is instantiated |
242 | // as a function scope static variable to lazily init the folding table. |
243 | struct X86BroadcastFoldTable { |
244 | // Stores memory broadcast folding tables entries sorted by opcode. |
245 | std::vector<X86FoldTableEntry> Table; |
246 | |
247 | X86BroadcastFoldTable() { |
248 | // Broadcast tables. |
249 | for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable2) { |
250 | unsigned RegOp = Reg2Bcst.KeyOp; |
251 | unsigned BcstOp = Reg2Bcst.DstOp; |
252 | if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, OpNum: 2)) { |
253 | unsigned MemOp = Reg2Mem->DstOp; |
254 | uint16_t Flags = |
255 | Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 | TB_FOLDED_LOAD; |
256 | Table.push_back(x: {.KeyOp: MemOp, .DstOp: BcstOp, .Flags: Flags}); |
257 | } |
258 | } |
259 | for (const X86FoldTableEntry &Reg2Bcst : BroadcastSizeTable2) { |
260 | unsigned RegOp = Reg2Bcst.KeyOp; |
261 | unsigned BcstOp = Reg2Bcst.DstOp; |
262 | if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, OpNum: 2)) { |
263 | unsigned MemOp = Reg2Mem->DstOp; |
264 | uint16_t Flags = |
265 | Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 | TB_FOLDED_LOAD; |
266 | Table.push_back(x: {.KeyOp: MemOp, .DstOp: BcstOp, .Flags: Flags}); |
267 | } |
268 | } |
269 | |
270 | for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable3) { |
271 | unsigned RegOp = Reg2Bcst.KeyOp; |
272 | unsigned BcstOp = Reg2Bcst.DstOp; |
273 | if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, OpNum: 3)) { |
274 | unsigned MemOp = Reg2Mem->DstOp; |
275 | uint16_t Flags = |
276 | Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 | TB_FOLDED_LOAD; |
277 | Table.push_back(x: {.KeyOp: MemOp, .DstOp: BcstOp, .Flags: Flags}); |
278 | } |
279 | } |
280 | for (const X86FoldTableEntry &Reg2Bcst : BroadcastSizeTable3) { |
281 | unsigned RegOp = Reg2Bcst.KeyOp; |
282 | unsigned BcstOp = Reg2Bcst.DstOp; |
283 | if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, OpNum: 3)) { |
284 | unsigned MemOp = Reg2Mem->DstOp; |
285 | uint16_t Flags = |
286 | Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 | TB_FOLDED_LOAD; |
287 | Table.push_back(x: {.KeyOp: MemOp, .DstOp: BcstOp, .Flags: Flags}); |
288 | } |
289 | } |
290 | |
291 | for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable4) { |
292 | unsigned RegOp = Reg2Bcst.KeyOp; |
293 | unsigned BcstOp = Reg2Bcst.DstOp; |
294 | if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, OpNum: 4)) { |
295 | unsigned MemOp = Reg2Mem->DstOp; |
296 | uint16_t Flags = |
297 | Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_4 | TB_FOLDED_LOAD; |
298 | Table.push_back(x: {.KeyOp: MemOp, .DstOp: BcstOp, .Flags: Flags}); |
299 | } |
300 | } |
301 | |
302 | // Sort the memory->broadcast fold table. |
303 | array_pod_sort(Start: Table.begin(), End: Table.end()); |
304 | } |
305 | }; |
306 | } // namespace |
307 | |
308 | bool llvm::matchBroadcastSize(const X86FoldTableEntry &Entry, |
309 | unsigned BroadcastBits) { |
310 | switch (Entry.Flags & TB_BCAST_MASK) { |
311 | case TB_BCAST_W: |
312 | case TB_BCAST_SH: |
313 | return BroadcastBits == 16; |
314 | case TB_BCAST_D: |
315 | case TB_BCAST_SS: |
316 | return BroadcastBits == 32; |
317 | case TB_BCAST_Q: |
318 | case TB_BCAST_SD: |
319 | return BroadcastBits == 64; |
320 | } |
321 | return false; |
322 | } |
323 | |
324 | const X86FoldTableEntry * |
325 | llvm::lookupBroadcastFoldTableBySize(unsigned MemOp, unsigned BroadcastBits) { |
326 | static X86BroadcastFoldTable BroadcastFoldTable; |
327 | auto &Table = BroadcastFoldTable.Table; |
328 | for (auto I = llvm::lower_bound(Range&: Table, Value&: MemOp); |
329 | I != Table.end() && I->KeyOp == MemOp; ++I) { |
330 | if (matchBroadcastSize(Entry: *I, BroadcastBits)) |
331 | return &*I; |
332 | } |
333 | return nullptr; |
334 | } |
335 | |