1//===- utils/TableGen/X86FoldTablesEmitter.cpp - X86 backend-*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This tablegen backend is responsible for emitting the memory fold tables of
10// the X86 backend instructions.
11//
12//===----------------------------------------------------------------------===//
13
14#include "Common/CodeGenInstruction.h"
15#include "Common/CodeGenTarget.h"
16#include "X86RecognizableInstr.h"
17#include "llvm/ADT/StringSwitch.h"
18#include "llvm/Support/X86FoldTablesUtils.h"
19#include "llvm/TableGen/Record.h"
20#include "llvm/TableGen/TableGenBackend.h"
21#include <set>
22
23using namespace llvm;
24using namespace X86Disassembler;
25
26namespace {
27// Represents an entry in the manual mapped instructions set.
28struct ManualMapEntry {
29 const char *RegInstStr;
30 const char *MemInstStr;
31 uint16_t Strategy;
32};
33} // namespace
34
35// List of instructions requiring explicitly aligned memory.
36static constexpr const char *ExplicitAlign[] = {
37 "MOVDQA", "MOVAPS", "MOVAPD", "MOVNTPS", "MOVNTPD", "MOVNTDQ", "MOVNTDQA"};
38
39// List of instructions NOT requiring explicit memory alignment.
40static constexpr const char *ExplicitUnalign[] = {
41 "MOVDQU", "MOVUPS", "MOVUPD", "PCMPESTRM",
42 "PCMPESTRI", "PCMPISTRM", "PCMPISTRI"};
43
44static const ManualMapEntry ManualMapSet[] = {
45#define ENTRY(REG, MEM, FLAGS) {#REG, #MEM, FLAGS},
46#include "X86ManualFoldTables.def"
47};
48
49static const std::set<StringRef> NoFoldSet = {
50#define NOFOLD(INSN) #INSN,
51#include "X86ManualFoldTables.def"
52};
53
54const std::set<StringRef> NoFoldSameMaskPrefixSet = {
55#define NOFOLD_SAME_MASK_PREFIX(PREFIX) #PREFIX,
56#include "X86ManualFoldTables.def"
57};
58
59const std::set<StringRef> NoFoldSameMaskSet = {
60#define NOFOLD_SAME_MASK(INSN) #INSN,
61#include "X86ManualFoldTables.def"
62};
63
64// Check if instruction is unsafe for masked-load folding.
65static bool isNoFoldMaskedInstruction(const CodeGenInstruction *Inst) {
66 StringRef Name = Inst->getName();
67
68 // First check exact instruction name
69 if (NoFoldSameMaskSet.count(x: Name))
70 return true;
71
72 // Then strip suffixes to get base name for prefix matching
73 // Strip k-register suffix: kz or k
74 if (Name.ends_with(Suffix: "kz"))
75 Name = Name.drop_back(N: 2);
76 else if (Name.ends_with(Suffix: "k"))
77 Name = Name.drop_back(N: 1);
78 else
79 return false; // Not a k-register instruction
80
81 // Strip operand form suffix (check longer patterns first)
82 if (Name.ends_with(Suffix: "rri"))
83 Name = Name.drop_back(N: 3);
84 else if (Name.ends_with(Suffix: "rr") || Name.ends_with(Suffix: "ri"))
85 Name = Name.drop_back(N: 2);
86
87 // Strip vector size suffix: Z128, Z256, or Z
88 if (Name.ends_with(Suffix: "Z128") || Name.ends_with(Suffix: "Z256"))
89 Name = Name.drop_back(N: 4);
90 else if (Name.ends_with(Suffix: "Z"))
91 Name = Name.drop_back(N: 1);
92 else
93 return false; // Not a AVX512 instruction
94
95 return NoFoldSameMaskPrefixSet.count(x: Name);
96}
97
98static bool isExplicitAlign(const CodeGenInstruction *Inst) {
99 return any_of(Range: ExplicitAlign, P: [Inst](const char *InstStr) {
100 return Inst->getName().contains(Other: InstStr);
101 });
102}
103
104static bool isExplicitUnalign(const CodeGenInstruction *Inst) {
105 return any_of(Range: ExplicitUnalign, P: [Inst](const char *InstStr) {
106 return Inst->getName().contains(Other: InstStr);
107 });
108}
109
110namespace {
111class X86FoldTablesEmitter {
112 const RecordKeeper &Records;
113 const CodeGenTarget Target;
114
115 // Represents an entry in the folding table
116 class X86FoldTableEntry {
117 const CodeGenInstruction *RegInst;
118 const CodeGenInstruction *MemInst;
119
120 public:
121 bool NoReverse = false;
122 bool NoForward = false;
123 bool FoldLoad = false;
124 bool FoldStore = false;
125 enum BcastType {
126 BCAST_NONE,
127 BCAST_W,
128 BCAST_D,
129 BCAST_Q,
130 BCAST_SS,
131 BCAST_SD,
132 BCAST_SH,
133 };
134 BcastType BroadcastKind = BCAST_NONE;
135
136 Align Alignment;
137
138 X86FoldTableEntry() = default;
139 X86FoldTableEntry(const CodeGenInstruction *RegInst,
140 const CodeGenInstruction *MemInst)
141 : RegInst(RegInst), MemInst(MemInst) {}
142
143 void print(raw_ostream &OS) const {
144 OS.indent(NumSpaces: 2);
145 OS << "{X86::" << RegInst->getName() << ", ";
146 OS << "X86::" << MemInst->getName() << ", ";
147
148 std::string Attrs;
149 if (FoldLoad)
150 Attrs += "TB_FOLDED_LOAD|";
151 if (FoldStore)
152 Attrs += "TB_FOLDED_STORE|";
153 if (NoReverse)
154 Attrs += "TB_NO_REVERSE|";
155 if (NoForward)
156 Attrs += "TB_NO_FORWARD|";
157 if (Alignment != Align(1))
158 Attrs += "TB_ALIGN_" + std::to_string(val: Alignment.value()) + "|";
159 switch (BroadcastKind) {
160 case BCAST_NONE:
161 break;
162 case BCAST_W:
163 Attrs += "TB_BCAST_W|";
164 break;
165 case BCAST_D:
166 Attrs += "TB_BCAST_D|";
167 break;
168 case BCAST_Q:
169 Attrs += "TB_BCAST_Q|";
170 break;
171 case BCAST_SS:
172 Attrs += "TB_BCAST_SS|";
173 break;
174 case BCAST_SD:
175 Attrs += "TB_BCAST_SD|";
176 break;
177 case BCAST_SH:
178 Attrs += "TB_BCAST_SH|";
179 break;
180 }
181
182 StringRef SimplifiedAttrs = StringRef(Attrs).rtrim(Chars: "|");
183 if (SimplifiedAttrs.empty())
184 SimplifiedAttrs = "0";
185
186 OS << SimplifiedAttrs << "},\n";
187 }
188
189#ifndef NDEBUG
190 // Check that Uses and Defs are same after memory fold.
191 void checkCorrectness() const {
192 auto &RegInstRec = *RegInst->TheDef;
193 auto &MemInstRec = *MemInst->TheDef;
194 auto ListOfUsesReg = RegInstRec.getValueAsListOfDefs("Uses");
195 auto ListOfUsesMem = MemInstRec.getValueAsListOfDefs("Uses");
196 auto ListOfDefsReg = RegInstRec.getValueAsListOfDefs("Defs");
197 auto ListOfDefsMem = MemInstRec.getValueAsListOfDefs("Defs");
198 if (ListOfUsesReg != ListOfUsesMem || ListOfDefsReg != ListOfDefsMem)
199 report_fatal_error("Uses/Defs couldn't be changed after folding " +
200 RegInstRec.getName() + " to " +
201 MemInstRec.getName());
202 }
203#endif
204 };
205
206 // NOTE: We check the fold tables are sorted in X86InstrFoldTables.cpp by the
207 // enum of the instruction, which is computed in
208 // CodeGenTarget::ComputeInstrsByEnum. So we should use the same comparator
209 // here.
210 // FIXME: Could we share the code with CodeGenTarget::ComputeInstrsByEnum?
211 struct CompareInstrsByEnum {
212 bool operator()(const CodeGenInstruction *LHS,
213 const CodeGenInstruction *RHS) const {
214 assert(LHS && RHS && "LHS and RHS shouldn't be nullptr");
215 const auto &D1 = *LHS->TheDef;
216 const auto &D2 = *RHS->TheDef;
217 return std::tuple(!D1.getValueAsBit(FieldName: "isPseudo"), D1.getName()) <
218 std::tuple(!D2.getValueAsBit(FieldName: "isPseudo"), D2.getName());
219 }
220 };
221
222 using FoldTable = std::map<const CodeGenInstruction *, X86FoldTableEntry,
223 CompareInstrsByEnum>;
224 // Table2Addr - Holds instructions which their memory form performs
225 // load+store.
226 //
227 // Table#i - Holds instructions which the their memory form
228 // performs a load OR a store, and their #i'th operand is folded.
229 //
230 // BroadcastTable#i - Holds instructions which the their memory form performs
231 // a broadcast load and their #i'th operand is folded.
232 FoldTable Table2Addr;
233 FoldTable Table0;
234 FoldTable Table1;
235 FoldTable Table2;
236 FoldTable Table3;
237 FoldTable Table4;
238 FoldTable BroadcastTable1;
239 FoldTable BroadcastTable2;
240 FoldTable BroadcastTable3;
241 FoldTable BroadcastTable4;
242 std::vector<const CodeGenInstruction *> NonFoldableWithSameMaskTable;
243
244public:
245 X86FoldTablesEmitter(const RecordKeeper &R) : Records(R), Target(R) {}
246
247 // run - Generate the 6 X86 memory fold tables.
248 void run(raw_ostream &OS);
249
250private:
251 // Decides to which table to add the entry with the given instructions.
252 // S sets the strategy of adding the TB_NO_REVERSE flag.
253 void updateTables(const CodeGenInstruction *RegInst,
254 const CodeGenInstruction *MemInst, uint16_t S = 0,
255 bool IsManual = false, bool IsBroadcast = false);
256
257 // Generates X86FoldTableEntry with the given instructions and fill it with
258 // the appropriate flags, then adds it to a memory fold table.
259 void addEntryWithFlags(FoldTable &Table, const CodeGenInstruction *RegInst,
260 const CodeGenInstruction *MemInst, uint16_t S,
261 unsigned FoldedIdx, bool IsManual);
262 // Generates X86FoldTableEntry with the given instructions and adds it to a
263 // broadcast table.
264 void addBroadcastEntry(FoldTable &Table, const CodeGenInstruction *RegInst,
265 const CodeGenInstruction *MemInst);
266
267 // Print the given table as a static const C++ array of type
268 // X86FoldTableEntry.
269 void printTable(const FoldTable &Table, StringRef TableName,
270 raw_ostream &OS) {
271 OS << "static const X86FoldTableEntry " << TableName << "[] = {\n";
272
273 for (auto &E : Table)
274 E.second.print(OS);
275
276 OS << "};\n\n";
277 }
278
279 void printTable(const std::vector<const CodeGenInstruction *> &Instructions,
280 StringRef TableName, raw_ostream &OS) {
281 OS << "static const unsigned " << TableName << "[] = {\n";
282 for (auto Inst : Instructions)
283 OS << " X86::" << Inst->getName() << ",\n";
284 OS << "};\n\n";
285 }
286};
287} // namespace
288
289// Return true if one of the instruction's operands is a RST register class
290static bool hasRSTRegClass(const CodeGenInstruction *Inst) {
291 return any_of(Range: Inst->Operands, P: [](const CGIOperandList::OperandInfo &OpIn) {
292 return OpIn.Rec->getName() == "RST" || OpIn.Rec->getName() == "RSTi";
293 });
294}
295
296// Return true if one of the instruction's operands is a ptr_rc_tailcall
297static bool hasPtrTailcallRegClass(const CodeGenInstruction *Inst) {
298 return any_of(Range: Inst->Operands, P: [](const CGIOperandList::OperandInfo &OpIn) {
299 return OpIn.Rec->getName() == "ptr_rc_tailcall";
300 });
301}
302
303static bool mayFoldFromForm(uint8_t Form) {
304 switch (Form) {
305 default:
306 return Form >= X86Local::MRM0r && Form <= X86Local::MRM7r;
307 case X86Local::MRMXr:
308 case X86Local::MRMXrCC:
309 case X86Local::MRMDestReg:
310 case X86Local::MRMSrcReg:
311 case X86Local::MRMSrcReg4VOp3:
312 case X86Local::MRMSrcRegOp4:
313 case X86Local::MRMSrcRegCC:
314 return true;
315 }
316}
317
318static bool mayFoldToForm(uint8_t Form) {
319 switch (Form) {
320 default:
321 return Form >= X86Local::MRM0m && Form <= X86Local::MRM7m;
322 case X86Local::MRMXm:
323 case X86Local::MRMXmCC:
324 case X86Local::MRMDestMem:
325 case X86Local::MRMSrcMem:
326 case X86Local::MRMSrcMem4VOp3:
327 case X86Local::MRMSrcMemOp4:
328 case X86Local::MRMSrcMemCC:
329 return true;
330 }
331}
332
333static bool mayFoldFromLeftToRight(uint8_t LHS, uint8_t RHS) {
334 switch (LHS) {
335 default:
336 llvm_unreachable("Unexpected Form!");
337 case X86Local::MRM0r:
338 return RHS == X86Local::MRM0m;
339 case X86Local::MRM1r:
340 return RHS == X86Local::MRM1m;
341 case X86Local::MRM2r:
342 return RHS == X86Local::MRM2m;
343 case X86Local::MRM3r:
344 return RHS == X86Local::MRM3m;
345 case X86Local::MRM4r:
346 return RHS == X86Local::MRM4m;
347 case X86Local::MRM5r:
348 return RHS == X86Local::MRM5m;
349 case X86Local::MRM6r:
350 return RHS == X86Local::MRM6m;
351 case X86Local::MRM7r:
352 return RHS == X86Local::MRM7m;
353 case X86Local::MRMXr:
354 return RHS == X86Local::MRMXm;
355 case X86Local::MRMXrCC:
356 return RHS == X86Local::MRMXmCC;
357 case X86Local::MRMDestReg:
358 return RHS == X86Local::MRMDestMem;
359 case X86Local::MRMSrcReg:
360 return RHS == X86Local::MRMSrcMem;
361 case X86Local::MRMSrcReg4VOp3:
362 return RHS == X86Local::MRMSrcMem4VOp3;
363 case X86Local::MRMSrcRegOp4:
364 return RHS == X86Local::MRMSrcMemOp4;
365 case X86Local::MRMSrcRegCC:
366 return RHS == X86Local::MRMSrcMemCC;
367 }
368}
369
370static bool isNOREXRegClass(const Record *Op) {
371 return Op->getName().contains(Other: "_NOREX");
372}
373
374// Function object - Operator() returns true if the given Reg instruction
375// matches the Mem instruction of this object.
376namespace {
377class IsMatch {
378 const CodeGenInstruction *MemInst;
379 const X86Disassembler::RecognizableInstrBase MemRI;
380 bool IsBroadcast;
381 const unsigned Variant;
382
383public:
384 IsMatch(const CodeGenInstruction *Inst, bool IsBroadcast, unsigned V)
385 : MemInst(Inst), MemRI(*MemInst), IsBroadcast(IsBroadcast), Variant(V) {}
386
387 bool operator()(const CodeGenInstruction *RegInst) {
388 X86Disassembler::RecognizableInstrBase RegRI(*RegInst);
389 const Record *RegRec = RegInst->TheDef;
390 const Record *MemRec = MemInst->TheDef;
391
392 // EVEX_B means different things for memory and register forms.
393 // register form: rounding control or SAE
394 // memory form: broadcast
395 if (IsBroadcast && (RegRI.HasEVEX_B || !MemRI.HasEVEX_B))
396 return false;
397 // EVEX_B indicates NDD for MAP4 instructions
398 if (!IsBroadcast && (RegRI.HasEVEX_B || MemRI.HasEVEX_B) &&
399 RegRI.OpMap != X86Local::T_MAP4)
400 return false;
401
402 if (!mayFoldFromLeftToRight(LHS: RegRI.Form, RHS: MemRI.Form))
403 return false;
404
405 // X86 encoding is crazy, e.g
406 //
407 // f3 0f c7 30 vmxon (%rax)
408 // f3 0f c7 f0 senduipi %rax
409 //
410 // This two instruction have similiar encoding fields but are unrelated
411 if (X86Disassembler::getMnemonic(I: MemInst, Variant) !=
412 X86Disassembler::getMnemonic(I: RegInst, Variant))
413 return false;
414
415 // Return false if any of the following fields of does not match.
416 if (std::tuple(RegRI.Encoding, RegRI.Opcode, RegRI.OpPrefix, RegRI.OpMap,
417 RegRI.OpSize, RegRI.AdSize, RegRI.HasREX_W, RegRI.HasVEX_4V,
418 RegRI.HasVEX_L, RegRI.IgnoresVEX_L, RegRI.IgnoresW,
419 RegRI.HasEVEX_K, RegRI.HasEVEX_KZ, RegRI.HasEVEX_L2,
420 RegRI.HasEVEX_NF, RegRec->getValueAsBit(FieldName: "hasEVEX_RC"),
421 RegRec->getValueAsBit(FieldName: "hasLockPrefix"),
422 RegRec->getValueAsBit(FieldName: "hasNoTrackPrefix")) !=
423 std::tuple(MemRI.Encoding, MemRI.Opcode, MemRI.OpPrefix, MemRI.OpMap,
424 MemRI.OpSize, MemRI.AdSize, MemRI.HasREX_W, MemRI.HasVEX_4V,
425 MemRI.HasVEX_L, MemRI.IgnoresVEX_L, MemRI.IgnoresW,
426 MemRI.HasEVEX_K, MemRI.HasEVEX_KZ, MemRI.HasEVEX_L2,
427 MemRI.HasEVEX_NF, MemRec->getValueAsBit(FieldName: "hasEVEX_RC"),
428 MemRec->getValueAsBit(FieldName: "hasLockPrefix"),
429 MemRec->getValueAsBit(FieldName: "hasNoTrackPrefix")))
430 return false;
431
432 // Make sure the sizes of the operands of both instructions suit each other.
433 // This is needed for instructions with intrinsic version (_Int).
434 // Where the only difference is the size of the operands.
435 // For example: VUCOMISDZrm and VUCOMISDrm_Int
436 // Also for instructions that their EVEX version was upgraded to work with
437 // k-registers. For example VPCMPEQBrm (xmm output register) and
438 // VPCMPEQBZ128rm (k register output register).
439 unsigned MemOutSize = MemRec->getValueAsDag(FieldName: "OutOperandList")->getNumArgs();
440 unsigned RegOutSize = RegRec->getValueAsDag(FieldName: "OutOperandList")->getNumArgs();
441 unsigned MemInSize = MemRec->getValueAsDag(FieldName: "InOperandList")->getNumArgs();
442 unsigned RegInSize = RegRec->getValueAsDag(FieldName: "InOperandList")->getNumArgs();
443
444 // Instructions with one output in their memory form use the memory folded
445 // operand as source and destination (Read-Modify-Write).
446 unsigned RegStartIdx =
447 (MemOutSize + 1 == RegOutSize) && (MemInSize == RegInSize) ? 1 : 0;
448
449 bool FoundFoldedOp = false;
450 for (unsigned I = 0, E = MemInst->Operands.size(); I != E; I++) {
451 const Record *MemOpRec = MemInst->Operands[I].Rec;
452 const Record *RegOpRec = RegInst->Operands[I + RegStartIdx].Rec;
453
454 if (MemOpRec == RegOpRec)
455 continue;
456
457 if (isRegisterOperand(Rec: MemOpRec) && isRegisterOperand(Rec: RegOpRec) &&
458 ((getRegOperandSize(RegRec: MemOpRec) != getRegOperandSize(RegRec: RegOpRec)) ||
459 (isNOREXRegClass(Op: MemOpRec) != isNOREXRegClass(Op: RegOpRec))))
460 return false;
461
462 if (isMemoryOperand(Rec: MemOpRec) && isMemoryOperand(Rec: RegOpRec) &&
463 (getMemOperandSize(MemRec: MemOpRec) != getMemOperandSize(MemRec: RegOpRec)))
464 return false;
465
466 if (isImmediateOperand(Rec: MemOpRec) && isImmediateOperand(Rec: RegOpRec) &&
467 (MemOpRec->getValueAsDef(FieldName: "Type") != RegOpRec->getValueAsDef(FieldName: "Type")))
468 return false;
469
470 // Only one operand can be folded.
471 if (FoundFoldedOp)
472 return false;
473
474 assert(isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec));
475 FoundFoldedOp = true;
476 }
477
478 return FoundFoldedOp;
479 }
480};
481
482} // end anonymous namespace
483
484void X86FoldTablesEmitter::addEntryWithFlags(FoldTable &Table,
485 const CodeGenInstruction *RegInst,
486 const CodeGenInstruction *MemInst,
487 uint16_t S, unsigned FoldedIdx,
488 bool IsManual) {
489
490 assert((IsManual || Table.find(RegInst) == Table.end()) &&
491 "Override entry unexpectedly");
492 X86FoldTableEntry Result = X86FoldTableEntry(RegInst, MemInst);
493 const Record *RegRec = RegInst->TheDef;
494 Result.NoReverse = S & TB_NO_REVERSE;
495 Result.NoForward = S & TB_NO_FORWARD;
496 Result.FoldLoad = S & TB_FOLDED_LOAD;
497 Result.FoldStore = S & TB_FOLDED_STORE;
498 Result.Alignment = Align(1ULL << ((S & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT));
499 if (IsManual) {
500 Table[RegInst] = Result;
501 return;
502 }
503
504 const Record *RegOpRec = RegInst->Operands[FoldedIdx].Rec;
505 const Record *MemOpRec = MemInst->Operands[FoldedIdx].Rec;
506
507 // Unfolding code generates a load/store instruction according to the size of
508 // the register in the register form instruction.
509 // If the register's size is greater than the memory's operand size, do not
510 // allow unfolding.
511
512 // the unfolded load size will be based on the register size. If that’s bigger
513 // than the memory operand size, the unfolded load will load more memory and
514 // potentially cause a memory fault.
515 if (getRegOperandSize(RegRec: RegOpRec) > getMemOperandSize(MemRec: MemOpRec))
516 Result.NoReverse = true;
517
518 // Check no-kz version's isMoveReg
519 StringRef RegInstName = RegRec->getName();
520 unsigned DropLen =
521 RegInstName.ends_with(Suffix: "rkz") ? 2 : (RegInstName.ends_with(Suffix: "rk") ? 1 : 0);
522 const Record *BaseDef =
523 DropLen ? Records.getDef(Name: RegInstName.drop_back(N: DropLen)) : nullptr;
524 bool IsMoveReg =
525 BaseDef ? Target.getInstruction(InstRec: BaseDef).isMoveReg : RegInst->isMoveReg;
526 // A masked load can not be unfolded to a full load, otherwise it would access
527 // unexpected memory. A simple store can not be unfolded.
528 if (IsMoveReg && (BaseDef || Result.FoldStore))
529 Result.NoReverse = true;
530
531 uint8_t Enc = byteFromBitsInit(B: RegRec->getValueAsBitsInit(FieldName: "OpEncBits"));
532 if (isExplicitAlign(Inst: RegInst)) {
533 // The instruction require explicitly aligned memory.
534 const BitsInit *VectSize = RegRec->getValueAsBitsInit(FieldName: "VectSize");
535 Result.Alignment = Align(byteFromBitsInit(B: VectSize));
536 } else if (!Enc && !isExplicitUnalign(Inst: RegInst) &&
537 getMemOperandSize(MemRec: MemOpRec) > 64) {
538 // Instructions with XOP/VEX/EVEX encoding do not require alignment while
539 // SSE packed vector instructions require a 16 byte alignment.
540 Result.Alignment = Align(16);
541 }
542 // Expand is only ever created as a masked instruction. It is not safe to
543 // unfold a masked expand because we don't know if it came from an expand load
544 // intrinsic or folding a plain load. If it is from a expand load intrinsic,
545 // Unfolding to plain load would read more elements and could trigger a fault.
546 if (RegRec->getName().contains(Other: "EXPAND"))
547 Result.NoReverse = true;
548
549 Table[RegInst] = Result;
550}
551
552void X86FoldTablesEmitter::addBroadcastEntry(
553 FoldTable &Table, const CodeGenInstruction *RegInst,
554 const CodeGenInstruction *MemInst) {
555
556 assert(Table.find(RegInst) == Table.end() && "Override entry unexpectedly");
557 X86FoldTableEntry Result = X86FoldTableEntry(RegInst, MemInst);
558
559 const DagInit *In = MemInst->TheDef->getValueAsDag(FieldName: "InOperandList");
560 for (unsigned I = 0, E = In->getNumArgs(); I != E; ++I) {
561 Result.BroadcastKind =
562 StringSwitch<X86FoldTableEntry::BcastType>(In->getArg(Num: I)->getAsString())
563 .Case(S: "i16mem", Value: X86FoldTableEntry::BCAST_W)
564 .Case(S: "i32mem", Value: X86FoldTableEntry::BCAST_D)
565 .Case(S: "i64mem", Value: X86FoldTableEntry::BCAST_Q)
566 .Case(S: "f16mem", Value: X86FoldTableEntry::BCAST_SH)
567 .Case(S: "f32mem", Value: X86FoldTableEntry::BCAST_SS)
568 .Case(S: "f64mem", Value: X86FoldTableEntry::BCAST_SD)
569 .Default(Value: X86FoldTableEntry::BCAST_NONE);
570 if (Result.BroadcastKind != X86FoldTableEntry::BCAST_NONE)
571 break;
572 }
573 assert(Result.BroadcastKind != X86FoldTableEntry::BCAST_NONE &&
574 "Unknown memory operand for broadcast");
575
576 Table[RegInst] = Result;
577}
578
579void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInst,
580 const CodeGenInstruction *MemInst,
581 uint16_t S, bool IsManual,
582 bool IsBroadcast) {
583
584 const Record *RegRec = RegInst->TheDef;
585 const Record *MemRec = MemInst->TheDef;
586 unsigned MemOutSize = MemRec->getValueAsDag(FieldName: "OutOperandList")->getNumArgs();
587 unsigned RegOutSize = RegRec->getValueAsDag(FieldName: "OutOperandList")->getNumArgs();
588 unsigned MemInSize = MemRec->getValueAsDag(FieldName: "InOperandList")->getNumArgs();
589 unsigned RegInSize = RegRec->getValueAsDag(FieldName: "InOperandList")->getNumArgs();
590
591 // Instructions which Read-Modify-Write should be added to Table2Addr.
592 if (!MemOutSize && RegOutSize == 1 && MemInSize == RegInSize) {
593 assert(!IsBroadcast && "Read-Modify-Write can not be broadcast");
594 // X86 would not unfold Read-Modify-Write instructions so add TB_NO_REVERSE.
595 addEntryWithFlags(Table&: Table2Addr, RegInst, MemInst, S: S | TB_NO_REVERSE, FoldedIdx: 0,
596 IsManual);
597 return;
598 }
599
600 // Only table0 entries should explicitly specify a load or store flag.
601 // If the instruction writes to the folded operand, it will appear as
602 // an output in the register form instruction and as an input in the
603 // memory form instruction. If the instruction reads from the folded
604 // operand, it will appear as in input in both forms.
605 if (MemInSize == RegInSize && MemOutSize == RegOutSize) {
606 // Load-Folding cases.
607 // If the i'th register form operand is a register and the i'th memory form
608 // operand is a memory operand, add instructions to Table#i.
609 for (unsigned I = RegOutSize, E = RegInst->Operands.size(); I < E; I++) {
610 const Record *RegOpRec = RegInst->Operands[I].Rec;
611 const Record *MemOpRec = MemInst->Operands[I].Rec;
612 // RegClassByHwMode: For instructions like TAILJMPr, TAILJMPr64,
613 // TAILJMPr64_REX
614 if ((isRegisterOperand(Rec: RegOpRec) ||
615 (RegOpRec->isSubClassOf(Name: "RegClassByHwMode"))) &&
616 isMemoryOperand(Rec: MemOpRec)) {
617 switch (I) {
618 case 0:
619 assert(!IsBroadcast && "BroadcastTable0 needs to be added");
620 addEntryWithFlags(Table&: Table0, RegInst, MemInst, S: S | TB_FOLDED_LOAD, FoldedIdx: 0,
621 IsManual);
622 return;
623 case 1:
624 IsBroadcast
625 ? addBroadcastEntry(Table&: BroadcastTable1, RegInst, MemInst)
626 : addEntryWithFlags(Table&: Table1, RegInst, MemInst, S, FoldedIdx: 1, IsManual);
627 return;
628 case 2:
629 IsBroadcast
630 ? addBroadcastEntry(Table&: BroadcastTable2, RegInst, MemInst)
631 : addEntryWithFlags(Table&: Table2, RegInst, MemInst, S, FoldedIdx: 2, IsManual);
632 return;
633 case 3:
634 IsBroadcast
635 ? addBroadcastEntry(Table&: BroadcastTable3, RegInst, MemInst)
636 : addEntryWithFlags(Table&: Table3, RegInst, MemInst, S, FoldedIdx: 3, IsManual);
637 return;
638 case 4:
639 IsBroadcast
640 ? addBroadcastEntry(Table&: BroadcastTable4, RegInst, MemInst)
641 : addEntryWithFlags(Table&: Table4, RegInst, MemInst, S, FoldedIdx: 4, IsManual);
642 return;
643 }
644 }
645 }
646 } else if (MemInSize == RegInSize + 1 && MemOutSize + 1 == RegOutSize) {
647 // Store-Folding cases.
648 // If the memory form instruction performs a store, the *output*
649 // register of the register form instructions disappear and instead a
650 // memory *input* operand appears in the memory form instruction.
651 // For example:
652 // MOVAPSrr => (outs VR128:$dst), (ins VR128:$src)
653 // MOVAPSmr => (outs), (ins f128mem:$dst, VR128:$src)
654 const Record *RegOpRec = RegInst->Operands[RegOutSize - 1].Rec;
655 const Record *MemOpRec = MemInst->Operands[RegOutSize - 1].Rec;
656 if (isRegisterOperand(Rec: RegOpRec) && isMemoryOperand(Rec: MemOpRec) &&
657 getRegOperandSize(RegRec: RegOpRec) == getMemOperandSize(MemRec: MemOpRec)) {
658 assert(!IsBroadcast && "Store can not be broadcast");
659 addEntryWithFlags(Table&: Table0, RegInst, MemInst, S: S | TB_FOLDED_STORE, FoldedIdx: 0,
660 IsManual);
661 }
662 }
663}
664
665void X86FoldTablesEmitter::run(raw_ostream &OS) {
666 // Holds all memory instructions
667 std::vector<const CodeGenInstruction *> MemInsts;
668 // Holds all register instructions - divided according to opcode.
669 std::map<uint8_t, std::vector<const CodeGenInstruction *>> RegInsts;
670
671 ArrayRef<const CodeGenInstruction *> NumberedInstructions =
672 Target.getInstructions();
673
674 for (const CodeGenInstruction *Inst : NumberedInstructions) {
675 const Record *Rec = Inst->TheDef;
676 if (!Rec->isSubClassOf(Name: "X86Inst") || Rec->getValueAsBit(FieldName: "isAsmParserOnly"))
677 continue;
678
679 if (NoFoldSet.find(x: Rec->getName()) != NoFoldSet.end())
680 continue;
681
682 // Promoted legacy instruction is in EVEX space, and has REX2-encoding
683 // alternative. It's added due to HW design and never emitted by compiler.
684 if (byteFromBitsInit(B: Rec->getValueAsBitsInit(FieldName: "OpMapBits")) ==
685 X86Local::T_MAP4 &&
686 byteFromBitsInit(B: Rec->getValueAsBitsInit(FieldName: "explicitOpPrefixBits")) ==
687 X86Local::ExplicitEVEX)
688 continue;
689
690 // - Instructions including RST register class operands are not relevant
691 // for memory folding (for further details check the explanation in
692 // lib/Target/X86/X86InstrFPStack.td file).
693 // - Some instructions (listed in the manual map above) use the register
694 // class ptr_rc_tailcall, which can be of a size 32 or 64, to ensure
695 // safe mapping of these instruction we manually map them and exclude
696 // them from the automation.
697 if (hasRSTRegClass(Inst) || hasPtrTailcallRegClass(Inst))
698 continue;
699
700 // Check if this instruction has a prefix in NoFoldSameMaskPrefixSet or is
701 // in NoFoldSameMaskSet (problematic for masked-load folding) and add to
702 // NonFoldableWithSameMaskTable.
703 if (isNoFoldMaskedInstruction(Inst)) {
704 NonFoldableWithSameMaskTable.push_back(x: Inst);
705 }
706
707 // Add all the memory form instructions to MemInsts, and all the register
708 // form instructions to RegInsts[Opc], where Opc is the opcode of each
709 // instructions. this helps reducing the runtime of the backend.
710 const BitsInit *FormBits = Rec->getValueAsBitsInit(FieldName: "FormBits");
711 uint8_t Form = byteFromBitsInit(B: FormBits);
712 if (mayFoldToForm(Form))
713 MemInsts.push_back(x: Inst);
714 else if (mayFoldFromForm(Form)) {
715 uint8_t Opc = byteFromBitsInit(B: Rec->getValueAsBitsInit(FieldName: "Opcode"));
716 RegInsts[Opc].push_back(x: Inst);
717 }
718 }
719
720 // Create a copy b/c the register instruction will removed when a new entry is
721 // added into memory fold tables.
722 auto RegInstsForBroadcast = RegInsts;
723
724 const Record *AsmWriter = Target.getAsmWriter();
725 unsigned Variant = AsmWriter->getValueAsInt(FieldName: "Variant");
726 auto FixUp = [&](const CodeGenInstruction *RegInst) {
727 StringRef RegInstName = RegInst->getName();
728 if (RegInstName.ends_with(Suffix: "_REV") || RegInstName.ends_with(Suffix: "_alt"))
729 if (auto *RegAltRec = Records.getDef(Name: RegInstName.drop_back(N: 4)))
730 RegInst = &Target.getInstruction(InstRec: RegAltRec);
731 return RegInst;
732 };
733 // For each memory form instruction, try to find its register form
734 // instruction.
735 for (const CodeGenInstruction *MemInst : MemInsts) {
736 uint8_t Opc =
737 byteFromBitsInit(B: MemInst->TheDef->getValueAsBitsInit(FieldName: "Opcode"));
738
739 auto RegInstsIt = RegInsts.find(x: Opc);
740 if (RegInstsIt == RegInsts.end())
741 continue;
742
743 // Two forms (memory & register) of the same instruction must have the same
744 // opcode.
745 std::vector<const CodeGenInstruction *> &OpcRegInsts = RegInstsIt->second;
746
747 // Memory fold tables
748 auto Match =
749 find_if(Range&: OpcRegInsts, P: IsMatch(MemInst, /*IsBroadcast=*/false, Variant));
750 if (Match != OpcRegInsts.end()) {
751 updateTables(RegInst: FixUp(*Match), MemInst);
752 OpcRegInsts.erase(position: Match);
753 }
754
755 // Broadcast tables
756 StringRef MemInstName = MemInst->getName();
757 if (!MemInstName.contains(Other: "mb") && !MemInstName.contains(Other: "mib"))
758 continue;
759 RegInstsIt = RegInstsForBroadcast.find(x: Opc);
760 assert(RegInstsIt != RegInstsForBroadcast.end() &&
761 "Unexpected control flow");
762 std::vector<const CodeGenInstruction *> &OpcRegInstsForBroadcast =
763 RegInstsIt->second;
764 Match = find_if(Range&: OpcRegInstsForBroadcast,
765 P: IsMatch(MemInst, /*IsBroadcast=*/true, Variant));
766 if (Match != OpcRegInstsForBroadcast.end()) {
767 updateTables(RegInst: FixUp(*Match), MemInst, S: 0, /*IsManual=*/false,
768 /*IsBroadcast=*/true);
769 OpcRegInstsForBroadcast.erase(position: Match);
770 }
771 }
772
773 // Add the manually mapped instructions listed above.
774 for (const ManualMapEntry &Entry : ManualMapSet) {
775 const Record *RegInstIter = Records.getDef(Name: Entry.RegInstStr);
776 const Record *MemInstIter = Records.getDef(Name: Entry.MemInstStr);
777
778 updateTables(RegInst: &(Target.getInstruction(InstRec: RegInstIter)),
779 MemInst: &(Target.getInstruction(InstRec: MemInstIter)), S: Entry.Strategy, IsManual: true);
780 }
781
782#ifndef NDEBUG
783 auto CheckMemFoldTable = [](const FoldTable &Table) -> void {
784 for (const auto &Record : Table) {
785 auto &FoldEntry = Record.second;
786 FoldEntry.checkCorrectness();
787 }
788 };
789 CheckMemFoldTable(Table2Addr);
790 CheckMemFoldTable(Table0);
791 CheckMemFoldTable(Table1);
792 CheckMemFoldTable(Table2);
793 CheckMemFoldTable(Table3);
794 CheckMemFoldTable(Table4);
795 CheckMemFoldTable(BroadcastTable1);
796 CheckMemFoldTable(BroadcastTable2);
797 CheckMemFoldTable(BroadcastTable3);
798 CheckMemFoldTable(BroadcastTable4);
799#endif
800#define PRINT_TABLE(TABLE) printTable(TABLE, #TABLE, OS);
801 // Print all tables.
802 PRINT_TABLE(Table2Addr)
803 PRINT_TABLE(Table0)
804 PRINT_TABLE(Table1)
805 PRINT_TABLE(Table2)
806 PRINT_TABLE(Table3)
807 PRINT_TABLE(Table4)
808 PRINT_TABLE(BroadcastTable1)
809 PRINT_TABLE(BroadcastTable2)
810 PRINT_TABLE(BroadcastTable3)
811 PRINT_TABLE(BroadcastTable4)
812 PRINT_TABLE(NonFoldableWithSameMaskTable)
813}
814
815static TableGen::Emitter::OptClass<X86FoldTablesEmitter>
816 X("gen-x86-fold-tables", "Generate X86 fold tables");
817