1 | //===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// |
10 | /// This file implements the InstrBuilder interface. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/MCA/InstrBuilder.h" |
15 | #include "llvm/ADT/APInt.h" |
16 | #include "llvm/ADT/DenseMap.h" |
17 | #include "llvm/ADT/Hashing.h" |
18 | #include "llvm/ADT/Statistic.h" |
19 | #include "llvm/MC/MCInst.h" |
20 | #include "llvm/Support/Debug.h" |
21 | #include "llvm/Support/WithColor.h" |
22 | #include "llvm/Support/raw_ostream.h" |
23 | |
24 | #define DEBUG_TYPE "llvm-mca-instrbuilder" |
25 | |
26 | namespace llvm { |
27 | namespace mca { |
28 | |
29 | char RecycledInstErr::ID = 0; |
30 | |
31 | InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, |
32 | const llvm::MCInstrInfo &mcii, |
33 | const llvm::MCRegisterInfo &mri, |
34 | const llvm::MCInstrAnalysis *mcia, |
35 | const mca::InstrumentManager &im, unsigned cl) |
36 | : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), IM(im), FirstCallInst(true), |
37 | FirstReturnInst(true), CallLatency(cl) { |
38 | const MCSchedModel &SM = STI.getSchedModel(); |
39 | ProcResourceMasks.resize(N: SM.getNumProcResourceKinds()); |
40 | computeProcResourceMasks(SM: STI.getSchedModel(), Masks: ProcResourceMasks); |
41 | } |
42 | |
43 | static void initializeUsedResources(InstrDesc &ID, |
44 | const MCSchedClassDesc &SCDesc, |
45 | const MCSubtargetInfo &STI, |
46 | ArrayRef<uint64_t> ProcResourceMasks) { |
47 | const MCSchedModel &SM = STI.getSchedModel(); |
48 | |
49 | // Populate resources consumed. |
50 | using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>; |
51 | SmallVector<ResourcePlusCycles, 4> Worklist; |
52 | |
53 | // Track cycles contributed by resources that are in a "Super" relationship. |
54 | // This is required if we want to correctly match the behavior of method |
55 | // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set |
56 | // of "consumed" processor resources and resource cycles, the logic in |
57 | // ExpandProcResource() doesn't update the number of resource cycles |
58 | // contributed by a "Super" resource to a group. |
59 | // We need to take this into account when we find that a processor resource is |
60 | // part of a group, and it is also used as the "Super" of other resources. |
61 | // This map stores the number of cycles contributed by sub-resources that are |
62 | // part of a "Super" resource. The key value is the "Super" resource mask ID. |
63 | DenseMap<uint64_t, unsigned> SuperResources; |
64 | |
65 | unsigned NumProcResources = SM.getNumProcResourceKinds(); |
66 | APInt Buffers(NumProcResources, 0); |
67 | |
68 | bool AllInOrderResources = true; |
69 | bool AnyDispatchHazards = false; |
70 | for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) { |
71 | const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(SC: &SCDesc) + I; |
72 | const MCProcResourceDesc &PR = *SM.getProcResource(ProcResourceIdx: PRE->ProcResourceIdx); |
73 | if (!PRE->ReleaseAtCycle) { |
74 | #ifndef NDEBUG |
75 | WithColor::warning() |
76 | << "Ignoring invalid write of zero cycles on processor resource " |
77 | << PR.Name << "\n" ; |
78 | WithColor::note() << "found in scheduling class " << SCDesc.Name |
79 | << " (write index #" << I << ")\n" ; |
80 | #endif |
81 | continue; |
82 | } |
83 | |
84 | uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx]; |
85 | if (PR.BufferSize < 0) { |
86 | AllInOrderResources = false; |
87 | } else { |
88 | Buffers.setBit(getResourceStateIndex(Mask)); |
89 | AnyDispatchHazards |= (PR.BufferSize == 0); |
90 | AllInOrderResources &= (PR.BufferSize <= 1); |
91 | } |
92 | |
93 | CycleSegment RCy(0, PRE->ReleaseAtCycle, false); |
94 | Worklist.emplace_back(Args: ResourcePlusCycles(Mask, ResourceUsage(RCy))); |
95 | if (PR.SuperIdx) { |
96 | uint64_t Super = ProcResourceMasks[PR.SuperIdx]; |
97 | SuperResources[Super] += PRE->ReleaseAtCycle; |
98 | } |
99 | } |
100 | |
101 | ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards; |
102 | |
103 | // Sort elements by mask popcount, so that we prioritize resource units over |
104 | // resource groups, and smaller groups over larger groups. |
105 | sort(C&: Worklist, Comp: [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) { |
106 | unsigned popcntA = llvm::popcount(Value: A.first); |
107 | unsigned popcntB = llvm::popcount(Value: B.first); |
108 | if (popcntA < popcntB) |
109 | return true; |
110 | if (popcntA > popcntB) |
111 | return false; |
112 | return A.first < B.first; |
113 | }); |
114 | |
115 | uint64_t UsedResourceUnits = 0; |
116 | uint64_t UsedResourceGroups = 0; |
117 | uint64_t UnitsFromResourceGroups = 0; |
118 | |
119 | // Remove cycles contributed by smaller resources, and check if there |
120 | // are partially overlapping resource groups. |
121 | ID.HasPartiallyOverlappingGroups = false; |
122 | |
123 | for (unsigned I = 0, E = Worklist.size(); I < E; ++I) { |
124 | ResourcePlusCycles &A = Worklist[I]; |
125 | if (!A.second.size()) { |
126 | assert(llvm::popcount(A.first) > 1 && "Expected a group!" ); |
127 | UsedResourceGroups |= llvm::bit_floor(Value: A.first); |
128 | continue; |
129 | } |
130 | |
131 | ID.Resources.emplace_back(Args&: A); |
132 | uint64_t NormalizedMask = A.first; |
133 | |
134 | if (llvm::popcount(Value: A.first) == 1) { |
135 | UsedResourceUnits |= A.first; |
136 | } else { |
137 | // Remove the leading 1 from the resource group mask. |
138 | NormalizedMask ^= llvm::bit_floor(Value: NormalizedMask); |
139 | if (UnitsFromResourceGroups & NormalizedMask) |
140 | ID.HasPartiallyOverlappingGroups = true; |
141 | |
142 | UnitsFromResourceGroups |= NormalizedMask; |
143 | UsedResourceGroups |= (A.first ^ NormalizedMask); |
144 | } |
145 | |
146 | for (unsigned J = I + 1; J < E; ++J) { |
147 | ResourcePlusCycles &B = Worklist[J]; |
148 | if ((NormalizedMask & B.first) == NormalizedMask) { |
149 | B.second.CS.subtract(Cycles: A.second.size() - SuperResources[A.first]); |
150 | if (llvm::popcount(Value: B.first) > 1) |
151 | B.second.NumUnits++; |
152 | } |
153 | } |
154 | } |
155 | |
156 | // A SchedWrite may specify a number of cycles in which a resource group |
157 | // is reserved. For example (on target x86; cpu Haswell): |
158 | // |
159 | // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> { |
160 | // let ReleaseAtCycles = [2, 2, 3]; |
161 | // } |
162 | // |
163 | // This means: |
164 | // Resource units HWPort0 and HWPort1 are both used for 2cy. |
165 | // Resource group HWPort01 is the union of HWPort0 and HWPort1. |
166 | // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01 |
167 | // will not be usable for 2 entire cycles from instruction issue. |
168 | // |
169 | // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency |
170 | // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an |
171 | // extra delay on top of the 2 cycles latency. |
172 | // During those extra cycles, HWPort01 is not usable by other instructions. |
173 | for (ResourcePlusCycles &RPC : ID.Resources) { |
174 | if (llvm::popcount(Value: RPC.first) > 1 && !RPC.second.isReserved()) { |
175 | // Remove the leading 1 from the resource group mask. |
176 | uint64_t Mask = RPC.first ^ llvm::bit_floor(Value: RPC.first); |
177 | uint64_t MaxResourceUnits = llvm::popcount(Value: Mask); |
178 | if (RPC.second.NumUnits > (unsigned)llvm::popcount(Value: Mask)) { |
179 | RPC.second.setReserved(); |
180 | RPC.second.NumUnits = MaxResourceUnits; |
181 | } |
182 | } |
183 | } |
184 | |
185 | // Identify extra buffers that are consumed through super resources. |
186 | for (const std::pair<uint64_t, unsigned> &SR : SuperResources) { |
187 | for (unsigned I = 1, E = NumProcResources; I < E; ++I) { |
188 | const MCProcResourceDesc &PR = *SM.getProcResource(ProcResourceIdx: I); |
189 | if (PR.BufferSize == -1) |
190 | continue; |
191 | |
192 | uint64_t Mask = ProcResourceMasks[I]; |
193 | if (Mask != SR.first && ((Mask & SR.first) == SR.first)) |
194 | Buffers.setBit(getResourceStateIndex(Mask)); |
195 | } |
196 | } |
197 | |
198 | ID.UsedBuffers = Buffers.getZExtValue(); |
199 | ID.UsedProcResUnits = UsedResourceUnits; |
200 | ID.UsedProcResGroups = UsedResourceGroups; |
201 | |
202 | LLVM_DEBUG({ |
203 | for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources) |
204 | dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", " |
205 | << "Reserved=" << R.second.isReserved() << ", " |
206 | << "#Units=" << R.second.NumUnits << ", " |
207 | << "cy=" << R.second.size() << '\n'; |
208 | uint64_t BufferIDs = ID.UsedBuffers; |
209 | while (BufferIDs) { |
210 | uint64_t Current = BufferIDs & (-BufferIDs); |
211 | dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n'; |
212 | BufferIDs ^= Current; |
213 | } |
214 | dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n'; |
215 | dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16) |
216 | << '\n'; |
217 | dbgs() << "\t\tHasPartiallyOverlappingGroups=" |
218 | << ID.HasPartiallyOverlappingGroups << '\n'; |
219 | }); |
220 | } |
221 | |
222 | static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, |
223 | const MCSchedClassDesc &SCDesc, |
224 | const MCSubtargetInfo &STI, |
225 | unsigned CallLatency) { |
226 | if (MCDesc.isCall()) { |
227 | // We cannot estimate how long this call will take. |
228 | // Artificially set an arbitrarily high latency. |
229 | ID.MaxLatency = CallLatency; |
230 | return; |
231 | } |
232 | |
233 | int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); |
234 | // If latency is unknown, then conservatively assume the MaxLatency set for |
235 | // calls. |
236 | ID.MaxLatency = Latency < 0 ? CallLatency : static_cast<unsigned>(Latency); |
237 | } |
238 | |
239 | static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) { |
240 | // Count register definitions, and skip non register operands in the process. |
241 | unsigned I, E; |
242 | unsigned NumExplicitDefs = MCDesc.getNumDefs(); |
243 | for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) { |
244 | const MCOperand &Op = MCI.getOperand(i: I); |
245 | if (Op.isReg()) |
246 | --NumExplicitDefs; |
247 | } |
248 | |
249 | if (NumExplicitDefs) { |
250 | return make_error<InstructionError<MCInst>>( |
251 | Args: "Expected more register operand definitions." , Args: MCI); |
252 | } |
253 | |
254 | if (MCDesc.hasOptionalDef()) { |
255 | // Always assume that the optional definition is the last operand. |
256 | const MCOperand &Op = MCI.getOperand(i: MCDesc.getNumOperands() - 1); |
257 | if (I == MCI.getNumOperands() || !Op.isReg()) { |
258 | std::string Message = |
259 | "expected a register operand for an optional definition. Instruction " |
260 | "has not been correctly analyzed." ; |
261 | return make_error<InstructionError<MCInst>>(Args&: Message, Args: MCI); |
262 | } |
263 | } |
264 | |
265 | return ErrorSuccess(); |
266 | } |
267 | |
268 | void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI, |
269 | unsigned SchedClassID) { |
270 | const MCInstrDesc &MCDesc = MCII.get(Opcode: MCI.getOpcode()); |
271 | const MCSchedModel &SM = STI.getSchedModel(); |
272 | const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassIdx: SchedClassID); |
273 | |
274 | // Assumptions made by this algorithm: |
275 | // 1. The number of explicit and implicit register definitions in a MCInst |
276 | // matches the number of explicit and implicit definitions according to |
277 | // the opcode descriptor (MCInstrDesc). |
278 | // 2. Uses start at index #(MCDesc.getNumDefs()). |
279 | // 3. There can only be a single optional register definition, an it is |
280 | // either the last operand of the sequence (excluding extra operands |
281 | // contributed by variadic opcodes) or one of the explicit register |
282 | // definitions. The latter occurs for some Thumb1 instructions. |
283 | // |
284 | // These assumptions work quite well for most out-of-order in-tree targets |
285 | // like x86. This is mainly because the vast majority of instructions is |
286 | // expanded to MCInst using a straightforward lowering logic that preserves |
287 | // the ordering of the operands. |
288 | // |
289 | // About assumption 1. |
290 | // The algorithm allows non-register operands between register operand |
291 | // definitions. This helps to handle some special ARM instructions with |
292 | // implicit operand increment (-mtriple=armv7): |
293 | // |
294 | // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed |
295 | // @ <MCOperand Reg:59> |
296 | // @ <MCOperand Imm:0> (!!) |
297 | // @ <MCOperand Reg:67> |
298 | // @ <MCOperand Imm:0> |
299 | // @ <MCOperand Imm:14> |
300 | // @ <MCOperand Reg:0>> |
301 | // |
302 | // MCDesc reports: |
303 | // 6 explicit operands. |
304 | // 1 optional definition |
305 | // 2 explicit definitions (!!) |
306 | // |
307 | // The presence of an 'Imm' operand between the two register definitions |
308 | // breaks the assumption that "register definitions are always at the |
309 | // beginning of the operand sequence". |
310 | // |
311 | // To workaround this issue, this algorithm ignores (i.e. skips) any |
312 | // non-register operands between register definitions. The optional |
313 | // definition is still at index #(NumOperands-1). |
314 | // |
315 | // According to assumption 2. register reads start at #(NumExplicitDefs-1). |
316 | // That means, register R1 from the example is both read and written. |
317 | unsigned NumExplicitDefs = MCDesc.getNumDefs(); |
318 | unsigned NumImplicitDefs = MCDesc.implicit_defs().size(); |
319 | unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries; |
320 | unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs; |
321 | if (MCDesc.hasOptionalDef()) |
322 | TotalDefs++; |
323 | |
324 | unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); |
325 | ID.Writes.resize(N: TotalDefs + NumVariadicOps); |
326 | // Iterate over the operands list, and skip non-register or constant register |
327 | // operands. The first NumExplicitDefs register operands are expected to be |
328 | // register definitions. |
329 | unsigned CurrentDef = 0; |
330 | unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1; |
331 | unsigned i = 0; |
332 | for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) { |
333 | const MCOperand &Op = MCI.getOperand(i); |
334 | if (!Op.isReg()) |
335 | continue; |
336 | |
337 | if (MCDesc.operands()[CurrentDef].isOptionalDef()) { |
338 | OptionalDefIdx = CurrentDef++; |
339 | continue; |
340 | } |
341 | if (MRI.isConstant(RegNo: Op.getReg())) { |
342 | CurrentDef++; |
343 | continue; |
344 | } |
345 | |
346 | WriteDescriptor &Write = ID.Writes[CurrentDef]; |
347 | Write.OpIndex = i; |
348 | if (CurrentDef < NumWriteLatencyEntries) { |
349 | const MCWriteLatencyEntry &WLE = |
350 | *STI.getWriteLatencyEntry(SC: &SCDesc, DefIdx: CurrentDef); |
351 | // Conservatively default to MaxLatency. |
352 | Write.Latency = |
353 | WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); |
354 | Write.SClassOrWriteResourceID = WLE.WriteResourceID; |
355 | } else { |
356 | // Assign a default latency for this write. |
357 | Write.Latency = ID.MaxLatency; |
358 | Write.SClassOrWriteResourceID = 0; |
359 | } |
360 | Write.IsOptionalDef = false; |
361 | LLVM_DEBUG({ |
362 | dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex |
363 | << ", Latency=" << Write.Latency |
364 | << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; |
365 | }); |
366 | CurrentDef++; |
367 | } |
368 | |
369 | assert(CurrentDef == NumExplicitDefs && |
370 | "Expected more register operand definitions." ); |
371 | for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) { |
372 | unsigned Index = NumExplicitDefs + CurrentDef; |
373 | WriteDescriptor &Write = ID.Writes[Index]; |
374 | Write.OpIndex = ~CurrentDef; |
375 | Write.RegisterID = MCDesc.implicit_defs()[CurrentDef]; |
376 | if (Index < NumWriteLatencyEntries) { |
377 | const MCWriteLatencyEntry &WLE = |
378 | *STI.getWriteLatencyEntry(SC: &SCDesc, DefIdx: Index); |
379 | // Conservatively default to MaxLatency. |
380 | Write.Latency = |
381 | WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); |
382 | Write.SClassOrWriteResourceID = WLE.WriteResourceID; |
383 | } else { |
384 | // Assign a default latency for this write. |
385 | Write.Latency = ID.MaxLatency; |
386 | Write.SClassOrWriteResourceID = 0; |
387 | } |
388 | |
389 | Write.IsOptionalDef = false; |
390 | assert(Write.RegisterID != 0 && "Expected a valid phys register!" ); |
391 | LLVM_DEBUG({ |
392 | dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex |
393 | << ", PhysReg=" << MRI.getName(Write.RegisterID) |
394 | << ", Latency=" << Write.Latency |
395 | << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; |
396 | }); |
397 | } |
398 | |
399 | if (MCDesc.hasOptionalDef()) { |
400 | WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs]; |
401 | Write.OpIndex = OptionalDefIdx; |
402 | // Assign a default latency for this write. |
403 | Write.Latency = ID.MaxLatency; |
404 | Write.SClassOrWriteResourceID = 0; |
405 | Write.IsOptionalDef = true; |
406 | LLVM_DEBUG({ |
407 | dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex |
408 | << ", Latency=" << Write.Latency |
409 | << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; |
410 | }); |
411 | } |
412 | |
413 | if (!NumVariadicOps) |
414 | return; |
415 | |
416 | bool AssumeUsesOnly = !MCDesc.variadicOpsAreDefs(); |
417 | CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef(); |
418 | for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); |
419 | I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) { |
420 | const MCOperand &Op = MCI.getOperand(i: OpIndex); |
421 | if (!Op.isReg()) |
422 | continue; |
423 | if (MRI.isConstant(RegNo: Op.getReg())) |
424 | continue; |
425 | |
426 | WriteDescriptor &Write = ID.Writes[CurrentDef]; |
427 | Write.OpIndex = OpIndex; |
428 | // Assign a default latency for this write. |
429 | Write.Latency = ID.MaxLatency; |
430 | Write.SClassOrWriteResourceID = 0; |
431 | Write.IsOptionalDef = false; |
432 | ++CurrentDef; |
433 | LLVM_DEBUG({ |
434 | dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex |
435 | << ", Latency=" << Write.Latency |
436 | << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; |
437 | }); |
438 | } |
439 | |
440 | ID.Writes.resize(N: CurrentDef); |
441 | } |
442 | |
443 | void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, |
444 | unsigned SchedClassID) { |
445 | const MCInstrDesc &MCDesc = MCII.get(Opcode: MCI.getOpcode()); |
446 | unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs(); |
447 | unsigned NumImplicitUses = MCDesc.implicit_uses().size(); |
448 | // Remove the optional definition. |
449 | if (MCDesc.hasOptionalDef()) |
450 | --NumExplicitUses; |
451 | unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); |
452 | unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps; |
453 | ID.Reads.resize(N: TotalUses); |
454 | unsigned CurrentUse = 0; |
455 | for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses; |
456 | ++I, ++OpIndex) { |
457 | const MCOperand &Op = MCI.getOperand(i: OpIndex); |
458 | if (!Op.isReg()) |
459 | continue; |
460 | if (MRI.isConstant(RegNo: Op.getReg())) |
461 | continue; |
462 | |
463 | ReadDescriptor &Read = ID.Reads[CurrentUse]; |
464 | Read.OpIndex = OpIndex; |
465 | Read.UseIndex = I; |
466 | Read.SchedClassID = SchedClassID; |
467 | ++CurrentUse; |
468 | LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex |
469 | << ", UseIndex=" << Read.UseIndex << '\n'); |
470 | } |
471 | |
472 | // For the purpose of ReadAdvance, implicit uses come directly after explicit |
473 | // uses. The "UseIndex" must be updated according to that implicit layout. |
474 | for (unsigned I = 0; I < NumImplicitUses; ++I) { |
475 | ReadDescriptor &Read = ID.Reads[CurrentUse + I]; |
476 | Read.OpIndex = ~I; |
477 | Read.UseIndex = NumExplicitUses + I; |
478 | Read.RegisterID = MCDesc.implicit_uses()[I]; |
479 | if (MRI.isConstant(RegNo: Read.RegisterID)) |
480 | continue; |
481 | Read.SchedClassID = SchedClassID; |
482 | LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex |
483 | << ", UseIndex=" << Read.UseIndex << ", RegisterID=" |
484 | << MRI.getName(Read.RegisterID) << '\n'); |
485 | } |
486 | |
487 | CurrentUse += NumImplicitUses; |
488 | |
489 | bool AssumeDefsOnly = MCDesc.variadicOpsAreDefs(); |
490 | for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); |
491 | I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) { |
492 | const MCOperand &Op = MCI.getOperand(i: OpIndex); |
493 | if (!Op.isReg()) |
494 | continue; |
495 | |
496 | ReadDescriptor &Read = ID.Reads[CurrentUse]; |
497 | Read.OpIndex = OpIndex; |
498 | Read.UseIndex = NumExplicitUses + NumImplicitUses + I; |
499 | Read.SchedClassID = SchedClassID; |
500 | ++CurrentUse; |
501 | LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex |
502 | << ", UseIndex=" << Read.UseIndex << '\n'); |
503 | } |
504 | |
505 | ID.Reads.resize(N: CurrentUse); |
506 | } |
507 | |
508 | hash_code hashMCOperand(const MCOperand &MCO) { |
509 | hash_code TypeHash = hash_combine(args: MCO.isReg(), args: MCO.isImm(), args: MCO.isSFPImm(), |
510 | args: MCO.isDFPImm(), args: MCO.isExpr(), args: MCO.isInst()); |
511 | if (MCO.isReg()) |
512 | return hash_combine(args: TypeHash, args: MCO.getReg()); |
513 | |
514 | return TypeHash; |
515 | } |
516 | |
517 | hash_code hashMCInst(const MCInst &MCI) { |
518 | hash_code InstructionHash = hash_combine(args: MCI.getOpcode(), args: MCI.getFlags()); |
519 | for (unsigned I = 0; I < MCI.getNumOperands(); ++I) { |
520 | InstructionHash = |
521 | hash_combine(args: InstructionHash, args: hashMCOperand(MCO: MCI.getOperand(i: I))); |
522 | } |
523 | return InstructionHash; |
524 | } |
525 | |
526 | Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID, |
527 | const MCInst &MCI) const { |
528 | if (ID.NumMicroOps != 0) |
529 | return ErrorSuccess(); |
530 | |
531 | bool UsesBuffers = ID.UsedBuffers; |
532 | bool UsesResources = !ID.Resources.empty(); |
533 | if (!UsesBuffers && !UsesResources) |
534 | return ErrorSuccess(); |
535 | |
536 | // FIXME: see PR44797. We should revisit these checks and possibly move them |
537 | // in CodeGenSchedule.cpp. |
538 | StringRef Message = "found an inconsistent instruction that decodes to zero " |
539 | "opcodes and that consumes scheduler resources." ; |
540 | return make_error<InstructionError<MCInst>>(Args: std::string(Message), Args: MCI); |
541 | } |
542 | |
543 | Expected<unsigned> InstrBuilder::getVariantSchedClassID(const MCInst &MCI, |
544 | unsigned SchedClassID) { |
545 | const MCSchedModel &SM = STI.getSchedModel(); |
546 | unsigned CPUID = SM.getProcessorID(); |
547 | while (SchedClassID && SM.getSchedClassDesc(SchedClassIdx: SchedClassID)->isVariant()) |
548 | SchedClassID = |
549 | STI.resolveVariantSchedClass(SchedClass: SchedClassID, MI: &MCI, MCII: &MCII, CPUID); |
550 | |
551 | if (!SchedClassID) { |
552 | return make_error<InstructionError<MCInst>>( |
553 | Args: "unable to resolve scheduling class for write variant." , Args: MCI); |
554 | } |
555 | |
556 | return SchedClassID; |
557 | } |
558 | |
559 | Expected<const InstrDesc &> |
560 | InstrBuilder::createInstrDescImpl(const MCInst &MCI, |
561 | const SmallVector<Instrument *> &IVec) { |
562 | assert(STI.getSchedModel().hasInstrSchedModel() && |
563 | "Itineraries are not yet supported!" ); |
564 | |
565 | // Obtain the instruction descriptor from the opcode. |
566 | unsigned short Opcode = MCI.getOpcode(); |
567 | const MCInstrDesc &MCDesc = MCII.get(Opcode); |
568 | const MCSchedModel &SM = STI.getSchedModel(); |
569 | |
570 | // Then obtain the scheduling class information from the instruction. |
571 | // Allow InstrumentManager to override and use a different SchedClassID |
572 | unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec); |
573 | bool IsVariant = SM.getSchedClassDesc(SchedClassIdx: SchedClassID)->isVariant(); |
574 | |
575 | // Try to solve variant scheduling classes. |
576 | if (IsVariant) { |
577 | Expected<unsigned> VariantSchedClassIDOrErr = |
578 | getVariantSchedClassID(MCI, SchedClassID); |
579 | if (!VariantSchedClassIDOrErr) { |
580 | return VariantSchedClassIDOrErr.takeError(); |
581 | } |
582 | |
583 | SchedClassID = *VariantSchedClassIDOrErr; |
584 | } |
585 | |
586 | // Check if this instruction is supported. Otherwise, report an error. |
587 | const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassIdx: SchedClassID); |
588 | if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) { |
589 | return make_error<InstructionError<MCInst>>( |
590 | Args: "found an unsupported instruction in the input assembly sequence" , Args: MCI); |
591 | } |
592 | |
593 | LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n'); |
594 | LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n'); |
595 | LLVM_DEBUG(dbgs() << "\t\tOpcode=" << Opcode << '\n'); |
596 | |
597 | // Create a new empty descriptor. |
598 | std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>(); |
599 | ID->NumMicroOps = SCDesc.NumMicroOps; |
600 | ID->SchedClassID = SchedClassID; |
601 | |
602 | if (MCDesc.isCall() && FirstCallInst) { |
603 | // We don't correctly model calls. |
604 | WithColor::warning() << "found a call in the input assembly sequence.\n" ; |
605 | WithColor::note() << "call instructions are not correctly modeled. " |
606 | << "Assume a latency of " << CallLatency << "cy.\n" ; |
607 | FirstCallInst = false; |
608 | } |
609 | |
610 | if (MCDesc.isReturn() && FirstReturnInst) { |
611 | WithColor::warning() << "found a return instruction in the input" |
612 | << " assembly sequence.\n" ; |
613 | WithColor::note() << "program counter updates are ignored.\n" ; |
614 | FirstReturnInst = false; |
615 | } |
616 | |
617 | initializeUsedResources(ID&: *ID, SCDesc, STI, ProcResourceMasks); |
618 | computeMaxLatency(ID&: *ID, MCDesc, SCDesc, STI, CallLatency); |
619 | |
620 | if (Error Err = verifyOperands(MCDesc, MCI)) |
621 | return std::move(Err); |
622 | |
623 | populateWrites(ID&: *ID, MCI, SchedClassID); |
624 | populateReads(ID&: *ID, MCI, SchedClassID); |
625 | |
626 | LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n'); |
627 | LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n'); |
628 | |
629 | // Validation check on the instruction descriptor. |
630 | if (Error Err = verifyInstrDesc(ID: *ID, MCI)) |
631 | return std::move(Err); |
632 | |
633 | // Now add the new descriptor. |
634 | bool IsVariadic = MCDesc.isVariadic(); |
635 | if ((ID->IsRecyclable = !IsVariadic && !IsVariant)) { |
636 | auto DKey = std::make_pair(x: MCI.getOpcode(), y&: SchedClassID); |
637 | Descriptors[DKey] = std::move(ID); |
638 | return *Descriptors[DKey]; |
639 | } |
640 | |
641 | auto VDKey = std::make_pair(x: hashMCInst(MCI), y&: SchedClassID); |
642 | assert( |
643 | !VariantDescriptors.contains(VDKey) && |
644 | "Expected VariantDescriptors to not already have a value for this key." ); |
645 | VariantDescriptors[VDKey] = std::move(ID); |
646 | return *VariantDescriptors[VDKey]; |
647 | } |
648 | |
649 | Expected<const InstrDesc &> |
650 | InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI, |
651 | const SmallVector<Instrument *> &IVec) { |
652 | // Cache lookup using SchedClassID from Instrumentation |
653 | unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec); |
654 | |
655 | auto DKey = std::make_pair(x: MCI.getOpcode(), y&: SchedClassID); |
656 | if (Descriptors.find_as(Val: DKey) != Descriptors.end()) |
657 | return *Descriptors[DKey]; |
658 | |
659 | Expected<unsigned> VariantSchedClassIDOrErr = |
660 | getVariantSchedClassID(MCI, SchedClassID); |
661 | if (!VariantSchedClassIDOrErr) { |
662 | return VariantSchedClassIDOrErr.takeError(); |
663 | } |
664 | |
665 | SchedClassID = *VariantSchedClassIDOrErr; |
666 | |
667 | auto VDKey = std::make_pair(x: hashMCInst(MCI), y&: SchedClassID); |
668 | if (VariantDescriptors.contains(Val: VDKey)) |
669 | return *VariantDescriptors[VDKey]; |
670 | |
671 | return createInstrDescImpl(MCI, IVec); |
672 | } |
673 | |
674 | STATISTIC(NumVariantInst, "Number of MCInsts that doesn't have static Desc" ); |
675 | |
676 | Expected<std::unique_ptr<Instruction>> |
677 | InstrBuilder::createInstruction(const MCInst &MCI, |
678 | const SmallVector<Instrument *> &IVec) { |
679 | Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI, IVec); |
680 | if (!DescOrErr) |
681 | return DescOrErr.takeError(); |
682 | const InstrDesc &D = *DescOrErr; |
683 | Instruction *NewIS = nullptr; |
684 | std::unique_ptr<Instruction> CreatedIS; |
685 | bool IsInstRecycled = false; |
686 | |
687 | if (!D.IsRecyclable) |
688 | ++NumVariantInst; |
689 | |
690 | if (D.IsRecyclable && InstRecycleCB) { |
691 | if (auto *I = InstRecycleCB(D)) { |
692 | NewIS = I; |
693 | NewIS->reset(); |
694 | IsInstRecycled = true; |
695 | } |
696 | } |
697 | if (!IsInstRecycled) { |
698 | CreatedIS = std::make_unique<Instruction>(args: D, args: MCI.getOpcode()); |
699 | NewIS = CreatedIS.get(); |
700 | } |
701 | |
702 | const MCInstrDesc &MCDesc = MCII.get(Opcode: MCI.getOpcode()); |
703 | const MCSchedClassDesc &SCDesc = |
704 | *STI.getSchedModel().getSchedClassDesc(SchedClassIdx: D.SchedClassID); |
705 | |
706 | NewIS->setMayLoad(MCDesc.mayLoad()); |
707 | NewIS->setMayStore(MCDesc.mayStore()); |
708 | NewIS->setHasSideEffects(MCDesc.hasUnmodeledSideEffects()); |
709 | NewIS->setBeginGroup(SCDesc.BeginGroup); |
710 | NewIS->setEndGroup(SCDesc.EndGroup); |
711 | NewIS->setRetireOOO(SCDesc.RetireOOO); |
712 | |
713 | // Check if this is a dependency breaking instruction. |
714 | APInt Mask; |
715 | |
716 | bool IsZeroIdiom = false; |
717 | bool IsDepBreaking = false; |
718 | if (MCIA) { |
719 | unsigned ProcID = STI.getSchedModel().getProcessorID(); |
720 | IsZeroIdiom = MCIA->isZeroIdiom(MI: MCI, Mask, CPUID: ProcID); |
721 | IsDepBreaking = |
722 | IsZeroIdiom || MCIA->isDependencyBreaking(MI: MCI, Mask, CPUID: ProcID); |
723 | if (MCIA->isOptimizableRegisterMove(MI: MCI, CPUID: ProcID)) |
724 | NewIS->setOptimizableMove(); |
725 | } |
726 | |
727 | // Initialize Reads first. |
728 | MCPhysReg RegID = 0; |
729 | size_t Idx = 0U; |
730 | for (const ReadDescriptor &RD : D.Reads) { |
731 | if (!RD.isImplicitRead()) { |
732 | // explicit read. |
733 | const MCOperand &Op = MCI.getOperand(i: RD.OpIndex); |
734 | // Skip non-register operands. |
735 | if (!Op.isReg()) |
736 | continue; |
737 | RegID = Op.getReg(); |
738 | } else { |
739 | // Implicit read. |
740 | RegID = RD.RegisterID; |
741 | } |
742 | |
743 | // Skip invalid register operands. |
744 | if (!RegID) |
745 | continue; |
746 | |
747 | // Okay, this is a register operand. Create a ReadState for it. |
748 | ReadState *RS = nullptr; |
749 | if (IsInstRecycled && Idx < NewIS->getUses().size()) { |
750 | NewIS->getUses()[Idx] = ReadState(RD, RegID); |
751 | RS = &NewIS->getUses()[Idx++]; |
752 | } else { |
753 | NewIS->getUses().emplace_back(Args: RD, Args&: RegID); |
754 | RS = &NewIS->getUses().back(); |
755 | ++Idx; |
756 | } |
757 | |
758 | if (IsDepBreaking) { |
759 | // A mask of all zeroes means: explicit input operands are not |
760 | // independent. |
761 | if (Mask.isZero()) { |
762 | if (!RD.isImplicitRead()) |
763 | RS->setIndependentFromDef(); |
764 | } else { |
765 | // Check if this register operand is independent according to `Mask`. |
766 | // Note that Mask may not have enough bits to describe all explicit and |
767 | // implicit input operands. If this register operand doesn't have a |
768 | // corresponding bit in Mask, then conservatively assume that it is |
769 | // dependent. |
770 | if (Mask.getBitWidth() > RD.UseIndex) { |
771 | // Okay. This map describe register use `RD.UseIndex`. |
772 | if (Mask[RD.UseIndex]) |
773 | RS->setIndependentFromDef(); |
774 | } |
775 | } |
776 | } |
777 | } |
778 | if (IsInstRecycled && Idx < NewIS->getUses().size()) |
779 | NewIS->getUses().pop_back_n(NumItems: NewIS->getUses().size() - Idx); |
780 | |
781 | // Early exit if there are no writes. |
782 | if (D.Writes.empty()) { |
783 | if (IsInstRecycled) |
784 | return llvm::make_error<RecycledInstErr>(Args&: NewIS); |
785 | else |
786 | return std::move(CreatedIS); |
787 | } |
788 | |
789 | // Track register writes that implicitly clear the upper portion of the |
790 | // underlying super-registers using an APInt. |
791 | APInt WriteMask(D.Writes.size(), 0); |
792 | |
793 | // Now query the MCInstrAnalysis object to obtain information about which |
794 | // register writes implicitly clear the upper portion of a super-register. |
795 | if (MCIA) |
796 | MCIA->clearsSuperRegisters(MRI, Inst: MCI, Writes&: WriteMask); |
797 | |
798 | // Initialize writes. |
799 | unsigned WriteIndex = 0; |
800 | Idx = 0U; |
801 | for (const WriteDescriptor &WD : D.Writes) { |
802 | RegID = WD.isImplicitWrite() ? WD.RegisterID |
803 | : MCI.getOperand(i: WD.OpIndex).getReg(); |
804 | // Check if this is a optional definition that references NoReg or a write |
805 | // to a constant register. |
806 | if ((WD.IsOptionalDef && !RegID) || MRI.isConstant(RegNo: RegID)) { |
807 | ++WriteIndex; |
808 | continue; |
809 | } |
810 | |
811 | assert(RegID && "Expected a valid register ID!" ); |
812 | if (IsInstRecycled && Idx < NewIS->getDefs().size()) { |
813 | NewIS->getDefs()[Idx++] = |
814 | WriteState(WD, RegID, |
815 | /* ClearsSuperRegs */ WriteMask[WriteIndex], |
816 | /* WritesZero */ IsZeroIdiom); |
817 | } else { |
818 | NewIS->getDefs().emplace_back(Args: WD, Args&: RegID, |
819 | /* ClearsSuperRegs */ Args: WriteMask[WriteIndex], |
820 | /* WritesZero */ Args&: IsZeroIdiom); |
821 | ++Idx; |
822 | } |
823 | ++WriteIndex; |
824 | } |
825 | if (IsInstRecycled && Idx < NewIS->getDefs().size()) |
826 | NewIS->getDefs().pop_back_n(NumItems: NewIS->getDefs().size() - Idx); |
827 | |
828 | if (IsInstRecycled) |
829 | return llvm::make_error<RecycledInstErr>(Args&: NewIS); |
830 | else |
831 | return std::move(CreatedIS); |
832 | } |
833 | } // namespace mca |
834 | } // namespace llvm |
835 | |