1 | //===-- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp -------*- C++ -*--===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Common functionality for different debug information format backends. |
10 | // LLVM currently supports DWARF and CodeView. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/CodeGen/DebugHandlerBase.h" |
15 | #include "llvm/CodeGen/AsmPrinter.h" |
16 | #include "llvm/CodeGen/MachineFunction.h" |
17 | #include "llvm/CodeGen/MachineInstr.h" |
18 | #include "llvm/CodeGen/MachineModuleInfo.h" |
19 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
20 | #include "llvm/IR/DebugInfo.h" |
21 | #include "llvm/IR/Module.h" |
22 | #include "llvm/MC/MCStreamer.h" |
23 | #include "llvm/Support/CommandLine.h" |
24 | |
25 | using namespace llvm; |
26 | |
27 | #define DEBUG_TYPE "dwarfdebug" |
28 | |
29 | /// If true, we drop variable location ranges which exist entirely outside the |
30 | /// variable's lexical scope instruction ranges. |
31 | static cl::opt<bool> TrimVarLocs("trim-var-locs" , cl::Hidden, cl::init(Val: true)); |
32 | |
33 | std::optional<DbgVariableLocation> |
34 | DbgVariableLocation::( |
35 | const MachineInstr &Instruction) { |
36 | DbgVariableLocation Location; |
37 | // Variables calculated from multiple locations can't be represented here. |
38 | if (Instruction.getNumDebugOperands() != 1) |
39 | return std::nullopt; |
40 | if (!Instruction.getDebugOperand(Index: 0).isReg()) |
41 | return std::nullopt; |
42 | Location.Register = Instruction.getDebugOperand(Index: 0).getReg(); |
43 | Location.FragmentInfo.reset(); |
44 | // We only handle expressions generated by DIExpression::appendOffset, |
45 | // which doesn't require a full stack machine. |
46 | int64_t Offset = 0; |
47 | const DIExpression *DIExpr = Instruction.getDebugExpression(); |
48 | auto Op = DIExpr->expr_op_begin(); |
49 | // We can handle a DBG_VALUE_LIST iff it has exactly one location operand that |
50 | // appears exactly once at the start of the expression. |
51 | if (Instruction.isDebugValueList()) { |
52 | if (Instruction.getNumDebugOperands() == 1 && |
53 | Op->getOp() == dwarf::DW_OP_LLVM_arg) |
54 | ++Op; |
55 | else |
56 | return std::nullopt; |
57 | } |
58 | while (Op != DIExpr->expr_op_end()) { |
59 | switch (Op->getOp()) { |
60 | case dwarf::DW_OP_constu: { |
61 | int Value = Op->getArg(I: 0); |
62 | ++Op; |
63 | if (Op != DIExpr->expr_op_end()) { |
64 | switch (Op->getOp()) { |
65 | case dwarf::DW_OP_minus: |
66 | Offset -= Value; |
67 | break; |
68 | case dwarf::DW_OP_plus: |
69 | Offset += Value; |
70 | break; |
71 | default: |
72 | continue; |
73 | } |
74 | } |
75 | } break; |
76 | case dwarf::DW_OP_plus_uconst: |
77 | Offset += Op->getArg(I: 0); |
78 | break; |
79 | case dwarf::DW_OP_LLVM_fragment: |
80 | Location.FragmentInfo = {Op->getArg(I: 1), Op->getArg(I: 0)}; |
81 | break; |
82 | case dwarf::DW_OP_deref: |
83 | Location.LoadChain.push_back(Elt: Offset); |
84 | Offset = 0; |
85 | break; |
86 | default: |
87 | return std::nullopt; |
88 | } |
89 | ++Op; |
90 | } |
91 | |
92 | // Do one final implicit DW_OP_deref if this was an indirect DBG_VALUE |
93 | // instruction. |
94 | // FIXME: Replace these with DIExpression. |
95 | if (Instruction.isIndirectDebugValue()) |
96 | Location.LoadChain.push_back(Elt: Offset); |
97 | |
98 | return Location; |
99 | } |
100 | |
101 | DebugHandlerBase::DebugHandlerBase(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {} |
102 | |
103 | DebugHandlerBase::~DebugHandlerBase() = default; |
104 | |
105 | void DebugHandlerBase::beginModule(Module *M) { |
106 | if (M->debug_compile_units().empty()) |
107 | Asm = nullptr; |
108 | } |
109 | |
110 | // Each LexicalScope has first instruction and last instruction to mark |
111 | // beginning and end of a scope respectively. Create an inverse map that list |
112 | // scopes starts (and ends) with an instruction. One instruction may start (or |
113 | // end) multiple scopes. Ignore scopes that are not reachable. |
114 | void DebugHandlerBase::identifyScopeMarkers() { |
115 | SmallVector<LexicalScope *, 4> WorkList; |
116 | WorkList.push_back(Elt: LScopes.getCurrentFunctionScope()); |
117 | while (!WorkList.empty()) { |
118 | LexicalScope *S = WorkList.pop_back_val(); |
119 | |
120 | const SmallVectorImpl<LexicalScope *> &Children = S->getChildren(); |
121 | if (!Children.empty()) |
122 | WorkList.append(in_start: Children.begin(), in_end: Children.end()); |
123 | |
124 | if (S->isAbstractScope()) |
125 | continue; |
126 | |
127 | for (const InsnRange &R : S->getRanges()) { |
128 | assert(R.first && "InsnRange does not have first instruction!" ); |
129 | assert(R.second && "InsnRange does not have second instruction!" ); |
130 | requestLabelBeforeInsn(MI: R.first); |
131 | requestLabelAfterInsn(MI: R.second); |
132 | } |
133 | } |
134 | } |
135 | |
136 | // Return Label preceding the instruction. |
137 | MCSymbol *DebugHandlerBase::getLabelBeforeInsn(const MachineInstr *MI) { |
138 | MCSymbol *Label = LabelsBeforeInsn.lookup(Val: MI); |
139 | assert(Label && "Didn't insert label before instruction" ); |
140 | return Label; |
141 | } |
142 | |
143 | // Return Label immediately following the instruction. |
144 | MCSymbol *DebugHandlerBase::getLabelAfterInsn(const MachineInstr *MI) { |
145 | return LabelsAfterInsn.lookup(Val: MI); |
146 | } |
147 | |
148 | /// If this type is derived from a base type then return base type size. |
149 | uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) { |
150 | assert(Ty); |
151 | const DIDerivedType *DDTy = dyn_cast<DIDerivedType>(Val: Ty); |
152 | if (!DDTy) |
153 | return Ty->getSizeInBits(); |
154 | |
155 | unsigned Tag = DDTy->getTag(); |
156 | |
157 | if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef && |
158 | Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && |
159 | Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_atomic_type && |
160 | Tag != dwarf::DW_TAG_immutable_type && |
161 | Tag != dwarf::DW_TAG_template_alias) |
162 | return DDTy->getSizeInBits(); |
163 | |
164 | DIType *BaseType = DDTy->getBaseType(); |
165 | |
166 | if (!BaseType) |
167 | return 0; |
168 | |
169 | // If this is a derived type, go ahead and get the base type, unless it's a |
170 | // reference then it's just the size of the field. Pointer types have no need |
171 | // of this since they're a different type of qualification on the type. |
172 | if (BaseType->getTag() == dwarf::DW_TAG_reference_type || |
173 | BaseType->getTag() == dwarf::DW_TAG_rvalue_reference_type) |
174 | return Ty->getSizeInBits(); |
175 | |
176 | return getBaseTypeSize(Ty: BaseType); |
177 | } |
178 | |
179 | bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) { |
180 | if (isa<DIStringType>(Val: Ty)) { |
181 | // Some transformations (e.g. instcombine) may decide to turn a Fortran |
182 | // character object into an integer, and later ones (e.g. SROA) may |
183 | // further inject a constant integer in a llvm.dbg.value call to track |
184 | // the object's value. Here we trust the transformations are doing the |
185 | // right thing, and treat the constant as unsigned to preserve that value |
186 | // (i.e. avoid sign extension). |
187 | return true; |
188 | } |
189 | |
190 | if (auto *CTy = dyn_cast<DICompositeType>(Val: Ty)) { |
191 | if (CTy->getTag() == dwarf::DW_TAG_enumeration_type) { |
192 | if (!(Ty = CTy->getBaseType())) |
193 | // FIXME: Enums without a fixed underlying type have unknown signedness |
194 | // here, leading to incorrectly emitted constants. |
195 | return false; |
196 | } else |
197 | // (Pieces of) aggregate types that get hacked apart by SROA may be |
198 | // represented by a constant. Encode them as unsigned bytes. |
199 | return true; |
200 | } |
201 | |
202 | if (auto *DTy = dyn_cast<DIDerivedType>(Val: Ty)) { |
203 | dwarf::Tag T = (dwarf::Tag)Ty->getTag(); |
204 | // Encode pointer constants as unsigned bytes. This is used at least for |
205 | // null pointer constant emission. |
206 | // FIXME: reference and rvalue_reference /probably/ shouldn't be allowed |
207 | // here, but accept them for now due to a bug in SROA producing bogus |
208 | // dbg.values. |
209 | if (T == dwarf::DW_TAG_pointer_type || |
210 | T == dwarf::DW_TAG_ptr_to_member_type || |
211 | T == dwarf::DW_TAG_reference_type || |
212 | T == dwarf::DW_TAG_rvalue_reference_type) |
213 | return true; |
214 | assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type || |
215 | T == dwarf::DW_TAG_volatile_type || |
216 | T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type || |
217 | T == dwarf::DW_TAG_immutable_type || |
218 | T == dwarf::DW_TAG_template_alias); |
219 | assert(DTy->getBaseType() && "Expected valid base type" ); |
220 | return isUnsignedDIType(Ty: DTy->getBaseType()); |
221 | } |
222 | |
223 | auto *BTy = cast<DIBasicType>(Val: Ty); |
224 | unsigned Encoding = BTy->getEncoding(); |
225 | assert((Encoding == dwarf::DW_ATE_unsigned || |
226 | Encoding == dwarf::DW_ATE_unsigned_char || |
227 | Encoding == dwarf::DW_ATE_signed || |
228 | Encoding == dwarf::DW_ATE_signed_char || |
229 | Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF || |
230 | Encoding == dwarf::DW_ATE_boolean || |
231 | Encoding == dwarf::DW_ATE_complex_float || |
232 | Encoding == dwarf::DW_ATE_signed_fixed || |
233 | Encoding == dwarf::DW_ATE_unsigned_fixed || |
234 | (Ty->getTag() == dwarf::DW_TAG_unspecified_type && |
235 | Ty->getName() == "decltype(nullptr)" )) && |
236 | "Unsupported encoding" ); |
237 | return Encoding == dwarf::DW_ATE_unsigned || |
238 | Encoding == dwarf::DW_ATE_unsigned_char || |
239 | Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean || |
240 | Encoding == llvm::dwarf::DW_ATE_unsigned_fixed || |
241 | Ty->getTag() == dwarf::DW_TAG_unspecified_type; |
242 | } |
243 | |
244 | static bool hasDebugInfo(const MachineModuleInfo *MMI, |
245 | const MachineFunction *MF) { |
246 | if (!MMI->hasDebugInfo()) |
247 | return false; |
248 | auto *SP = MF->getFunction().getSubprogram(); |
249 | if (!SP) |
250 | return false; |
251 | assert(SP->getUnit()); |
252 | auto EK = SP->getUnit()->getEmissionKind(); |
253 | if (EK == DICompileUnit::NoDebug) |
254 | return false; |
255 | return true; |
256 | } |
257 | |
258 | void DebugHandlerBase::beginFunction(const MachineFunction *MF) { |
259 | PrevInstBB = nullptr; |
260 | |
261 | if (!Asm || !hasDebugInfo(MMI, MF)) { |
262 | skippedNonDebugFunction(); |
263 | return; |
264 | } |
265 | |
266 | // Grab the lexical scopes for the function, if we don't have any of those |
267 | // then we're not going to be able to do anything. |
268 | LScopes.initialize(*MF); |
269 | if (LScopes.empty()) { |
270 | beginFunctionImpl(MF); |
271 | return; |
272 | } |
273 | |
274 | // Make sure that each lexical scope will have a begin/end label. |
275 | identifyScopeMarkers(); |
276 | |
277 | // Calculate history for local variables. |
278 | assert(DbgValues.empty() && "DbgValues map wasn't cleaned!" ); |
279 | assert(DbgLabels.empty() && "DbgLabels map wasn't cleaned!" ); |
280 | calculateDbgEntityHistory(MF, TRI: Asm->MF->getSubtarget().getRegisterInfo(), |
281 | DbgValues, DbgLabels); |
282 | InstOrdering.initialize(MF: *MF); |
283 | if (TrimVarLocs) |
284 | DbgValues.trimLocationRanges(MF: *MF, LScopes, Ordering: InstOrdering); |
285 | LLVM_DEBUG(DbgValues.dump(MF->getName())); |
286 | |
287 | // Request labels for the full history. |
288 | for (const auto &I : DbgValues) { |
289 | const auto &Entries = I.second; |
290 | if (Entries.empty()) |
291 | continue; |
292 | |
293 | auto IsDescribedByReg = [](const MachineInstr *MI) { |
294 | return any_of(Range: MI->debug_operands(), |
295 | P: [](auto &MO) { return MO.isReg() && MO.getReg(); }); |
296 | }; |
297 | |
298 | // The first mention of a function argument gets the CurrentFnBegin label, |
299 | // so arguments are visible when breaking at function entry. |
300 | // |
301 | // We do not change the label for values that are described by registers, |
302 | // as that could place them above their defining instructions. We should |
303 | // ideally not change the labels for constant debug values either, since |
304 | // doing that violates the ranges that are calculated in the history map. |
305 | // However, we currently do not emit debug values for constant arguments |
306 | // directly at the start of the function, so this code is still useful. |
307 | const DILocalVariable *DIVar = |
308 | Entries.front().getInstr()->getDebugVariable(); |
309 | if (DIVar->isParameter() && |
310 | getDISubprogram(Scope: DIVar->getScope())->describes(F: &MF->getFunction())) { |
311 | if (!IsDescribedByReg(Entries.front().getInstr())) |
312 | LabelsBeforeInsn[Entries.front().getInstr()] = Asm->getFunctionBegin(); |
313 | if (Entries.front().getInstr()->getDebugExpression()->isFragment()) { |
314 | // Mark all non-overlapping initial fragments. |
315 | for (const auto *I = Entries.begin(); I != Entries.end(); ++I) { |
316 | if (!I->isDbgValue()) |
317 | continue; |
318 | const DIExpression *Fragment = I->getInstr()->getDebugExpression(); |
319 | if (std::any_of(first: Entries.begin(), last: I, |
320 | pred: [&](DbgValueHistoryMap::Entry Pred) { |
321 | return Pred.isDbgValue() && |
322 | Fragment->fragmentsOverlap( |
323 | Other: Pred.getInstr()->getDebugExpression()); |
324 | })) |
325 | break; |
326 | // The code that generates location lists for DWARF assumes that the |
327 | // entries' start labels are monotonically increasing, and since we |
328 | // don't change the label for fragments that are described by |
329 | // registers, we must bail out when encountering such a fragment. |
330 | if (IsDescribedByReg(I->getInstr())) |
331 | break; |
332 | LabelsBeforeInsn[I->getInstr()] = Asm->getFunctionBegin(); |
333 | } |
334 | } |
335 | } |
336 | |
337 | for (const auto &Entry : Entries) { |
338 | if (Entry.isDbgValue()) |
339 | requestLabelBeforeInsn(MI: Entry.getInstr()); |
340 | else |
341 | requestLabelAfterInsn(MI: Entry.getInstr()); |
342 | } |
343 | } |
344 | |
345 | // Ensure there is a symbol before DBG_LABEL. |
346 | for (const auto &I : DbgLabels) { |
347 | const MachineInstr *MI = I.second; |
348 | requestLabelBeforeInsn(MI); |
349 | } |
350 | |
351 | PrevInstLoc = DebugLoc(); |
352 | PrevLabel = Asm->getFunctionBegin(); |
353 | beginFunctionImpl(MF); |
354 | } |
355 | |
356 | void DebugHandlerBase::beginInstruction(const MachineInstr *MI) { |
357 | if (!Asm || !MMI->hasDebugInfo()) |
358 | return; |
359 | |
360 | assert(CurMI == nullptr); |
361 | CurMI = MI; |
362 | |
363 | // Insert labels where requested. |
364 | DenseMap<const MachineInstr *, MCSymbol *>::iterator I = |
365 | LabelsBeforeInsn.find(Val: MI); |
366 | |
367 | // No label needed. |
368 | if (I == LabelsBeforeInsn.end()) |
369 | return; |
370 | |
371 | // Label already assigned. |
372 | if (I->second) |
373 | return; |
374 | |
375 | if (!PrevLabel) { |
376 | PrevLabel = MMI->getContext().createTempSymbol(); |
377 | Asm->OutStreamer->emitLabel(Symbol: PrevLabel); |
378 | } |
379 | I->second = PrevLabel; |
380 | } |
381 | |
382 | void DebugHandlerBase::endInstruction() { |
383 | if (!Asm || !MMI->hasDebugInfo()) |
384 | return; |
385 | |
386 | assert(CurMI != nullptr); |
387 | // Don't create a new label after DBG_VALUE and other instructions that don't |
388 | // generate code. |
389 | if (!CurMI->isMetaInstruction()) { |
390 | PrevLabel = nullptr; |
391 | PrevInstBB = CurMI->getParent(); |
392 | } |
393 | |
394 | DenseMap<const MachineInstr *, MCSymbol *>::iterator I = |
395 | LabelsAfterInsn.find(Val: CurMI); |
396 | |
397 | // No label needed or label already assigned. |
398 | if (I == LabelsAfterInsn.end() || I->second) { |
399 | CurMI = nullptr; |
400 | return; |
401 | } |
402 | |
403 | // We need a label after this instruction. With basic block sections, just |
404 | // use the end symbol of the section if this is the last instruction of the |
405 | // section. This reduces the need for an additional label and also helps |
406 | // merging ranges. |
407 | if (CurMI->getParent()->isEndSection() && CurMI->getNextNode() == nullptr) { |
408 | PrevLabel = CurMI->getParent()->getEndSymbol(); |
409 | } else if (!PrevLabel) { |
410 | PrevLabel = MMI->getContext().createTempSymbol(); |
411 | Asm->OutStreamer->emitLabel(Symbol: PrevLabel); |
412 | } |
413 | I->second = PrevLabel; |
414 | CurMI = nullptr; |
415 | } |
416 | |
417 | void DebugHandlerBase::endFunction(const MachineFunction *MF) { |
418 | if (Asm && hasDebugInfo(MMI, MF)) |
419 | endFunctionImpl(MF); |
420 | DbgValues.clear(); |
421 | DbgLabels.clear(); |
422 | LabelsBeforeInsn.clear(); |
423 | LabelsAfterInsn.clear(); |
424 | InstOrdering.clear(); |
425 | } |
426 | |
427 | void DebugHandlerBase::beginBasicBlockSection(const MachineBasicBlock &MBB) { |
428 | EpilogBeginBlock = nullptr; |
429 | if (!MBB.isEntryBlock()) |
430 | PrevLabel = MBB.getSymbol(); |
431 | } |
432 | |
433 | void DebugHandlerBase::endBasicBlockSection(const MachineBasicBlock &MBB) { |
434 | PrevLabel = nullptr; |
435 | } |
436 | |