| 1 | //===-- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp -------*- C++ -*--===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Common functionality for different debug information format backends. |
| 10 | // LLVM currently supports DWARF and CodeView. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "llvm/CodeGen/DebugHandlerBase.h" |
| 15 | #include "llvm/CodeGen/AsmPrinter.h" |
| 16 | #include "llvm/CodeGen/MachineFunction.h" |
| 17 | #include "llvm/CodeGen/MachineInstr.h" |
| 18 | #include "llvm/CodeGen/MachineModuleInfo.h" |
| 19 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
| 20 | #include "llvm/IR/DebugInfo.h" |
| 21 | #include "llvm/IR/Module.h" |
| 22 | #include "llvm/MC/MCStreamer.h" |
| 23 | #include "llvm/Support/CommandLine.h" |
| 24 | |
| 25 | using namespace llvm; |
| 26 | |
| 27 | #define DEBUG_TYPE "dwarfdebug" |
| 28 | |
| 29 | /// If true, we drop variable location ranges which exist entirely outside the |
| 30 | /// variable's lexical scope instruction ranges. |
| 31 | static cl::opt<bool> TrimVarLocs("trim-var-locs" , cl::Hidden, cl::init(Val: true)); |
| 32 | |
| 33 | std::optional<DbgVariableLocation> |
| 34 | DbgVariableLocation::( |
| 35 | const MachineInstr &Instruction) { |
| 36 | DbgVariableLocation Location; |
| 37 | // Variables calculated from multiple locations can't be represented here. |
| 38 | if (Instruction.getNumDebugOperands() != 1) |
| 39 | return std::nullopt; |
| 40 | if (!Instruction.getDebugOperand(Index: 0).isReg()) |
| 41 | return std::nullopt; |
| 42 | Location.Register = Instruction.getDebugOperand(Index: 0).getReg().asMCReg(); |
| 43 | Location.FragmentInfo.reset(); |
| 44 | // We only handle expressions generated by DIExpression::appendOffset, |
| 45 | // which doesn't require a full stack machine. |
| 46 | int64_t Offset = 0; |
| 47 | const DIExpression *DIExpr = Instruction.getDebugExpression(); |
| 48 | auto Op = DIExpr->expr_op_begin(); |
| 49 | // We can handle a DBG_VALUE_LIST iff it has exactly one location operand that |
| 50 | // appears exactly once at the start of the expression. |
| 51 | if (Instruction.isDebugValueList()) { |
| 52 | if (Instruction.getNumDebugOperands() == 1 && |
| 53 | Op->getOp() == dwarf::DW_OP_LLVM_arg) |
| 54 | ++Op; |
| 55 | else |
| 56 | return std::nullopt; |
| 57 | } |
| 58 | while (Op != DIExpr->expr_op_end()) { |
| 59 | switch (Op->getOp()) { |
| 60 | case dwarf::DW_OP_constu: { |
| 61 | int Value = Op->getArg(I: 0); |
| 62 | ++Op; |
| 63 | if (Op != DIExpr->expr_op_end()) { |
| 64 | switch (Op->getOp()) { |
| 65 | case dwarf::DW_OP_minus: |
| 66 | Offset -= Value; |
| 67 | break; |
| 68 | case dwarf::DW_OP_plus: |
| 69 | Offset += Value; |
| 70 | break; |
| 71 | default: |
| 72 | continue; |
| 73 | } |
| 74 | } |
| 75 | } break; |
| 76 | case dwarf::DW_OP_plus_uconst: |
| 77 | Offset += Op->getArg(I: 0); |
| 78 | break; |
| 79 | case dwarf::DW_OP_LLVM_fragment: |
| 80 | Location.FragmentInfo = {Op->getArg(I: 1), Op->getArg(I: 0)}; |
| 81 | break; |
| 82 | case dwarf::DW_OP_deref: |
| 83 | Location.LoadChain.push_back(Elt: Offset); |
| 84 | Offset = 0; |
| 85 | break; |
| 86 | default: |
| 87 | return std::nullopt; |
| 88 | } |
| 89 | ++Op; |
| 90 | } |
| 91 | |
| 92 | // Do one final implicit DW_OP_deref if this was an indirect DBG_VALUE |
| 93 | // instruction. |
| 94 | // FIXME: Replace these with DIExpression. |
| 95 | if (Instruction.isIndirectDebugValue()) |
| 96 | Location.LoadChain.push_back(Elt: Offset); |
| 97 | |
| 98 | return Location; |
| 99 | } |
| 100 | |
| 101 | DebugHandlerBase::DebugHandlerBase(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {} |
| 102 | |
| 103 | DebugHandlerBase::~DebugHandlerBase() = default; |
| 104 | |
| 105 | void DebugHandlerBase::beginModule(Module *M) { |
| 106 | if (M->debug_compile_units().empty()) |
| 107 | Asm = nullptr; |
| 108 | else |
| 109 | LScopes.initialize(*M); |
| 110 | } |
| 111 | |
| 112 | // Each LexicalScope has first instruction and last instruction to mark |
| 113 | // beginning and end of a scope respectively. Create an inverse map that list |
| 114 | // scopes starts (and ends) with an instruction. One instruction may start (or |
| 115 | // end) multiple scopes. Ignore scopes that are not reachable. |
| 116 | void DebugHandlerBase::identifyScopeMarkers() { |
| 117 | SmallVector<LexicalScope *, 4> WorkList; |
| 118 | WorkList.push_back(Elt: LScopes.getCurrentFunctionScope()); |
| 119 | while (!WorkList.empty()) { |
| 120 | LexicalScope *S = WorkList.pop_back_val(); |
| 121 | |
| 122 | const SmallVectorImpl<LexicalScope *> &Children = S->getChildren(); |
| 123 | if (!Children.empty()) |
| 124 | WorkList.append(in_start: Children.begin(), in_end: Children.end()); |
| 125 | |
| 126 | if (S->isAbstractScope()) |
| 127 | continue; |
| 128 | |
| 129 | for (const InsnRange &R : S->getRanges()) { |
| 130 | assert(R.first && "InsnRange does not have first instruction!" ); |
| 131 | assert(R.second && "InsnRange does not have second instruction!" ); |
| 132 | requestLabelBeforeInsn(MI: R.first); |
| 133 | requestLabelAfterInsn(MI: R.second); |
| 134 | } |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | // Return Label preceding the instruction. |
| 139 | MCSymbol *DebugHandlerBase::getLabelBeforeInsn(const MachineInstr *MI) { |
| 140 | MCSymbol *Label = LabelsBeforeInsn.lookup(Val: MI); |
| 141 | assert(Label && "Didn't insert label before instruction" ); |
| 142 | return Label; |
| 143 | } |
| 144 | |
| 145 | // Return Label immediately following the instruction. |
| 146 | MCSymbol *DebugHandlerBase::getLabelAfterInsn(const MachineInstr *MI) { |
| 147 | return LabelsAfterInsn.lookup(Val: MI); |
| 148 | } |
| 149 | |
| 150 | /// If this type is derived from a base type then return base type size. |
| 151 | uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) { |
| 152 | assert(Ty); |
| 153 | |
| 154 | unsigned Tag = Ty->getTag(); |
| 155 | |
| 156 | if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef && |
| 157 | Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && |
| 158 | Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_atomic_type && |
| 159 | Tag != dwarf::DW_TAG_immutable_type && |
| 160 | Tag != dwarf::DW_TAG_template_alias) |
| 161 | return Ty->getSizeInBits(); |
| 162 | |
| 163 | DIType *BaseType = nullptr; |
| 164 | if (const DIDerivedType *DDTy = dyn_cast<DIDerivedType>(Val: Ty)) |
| 165 | BaseType = DDTy->getBaseType(); |
| 166 | else if (const DISubrangeType *SRTy = dyn_cast<DISubrangeType>(Val: Ty)) |
| 167 | BaseType = SRTy->getBaseType(); |
| 168 | |
| 169 | if (!BaseType) |
| 170 | return 0; |
| 171 | |
| 172 | // If this is a derived type, go ahead and get the base type, unless it's a |
| 173 | // reference then it's just the size of the field. Pointer types have no need |
| 174 | // of this since they're a different type of qualification on the type. |
| 175 | if (BaseType->getTag() == dwarf::DW_TAG_reference_type || |
| 176 | BaseType->getTag() == dwarf::DW_TAG_rvalue_reference_type) |
| 177 | return Ty->getSizeInBits(); |
| 178 | |
| 179 | return getBaseTypeSize(Ty: BaseType); |
| 180 | } |
| 181 | |
| 182 | bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) { |
| 183 | if (isa<DIStringType>(Val: Ty)) { |
| 184 | // Some transformations (e.g. instcombine) may decide to turn a Fortran |
| 185 | // character object into an integer, and later ones (e.g. SROA) may |
| 186 | // further inject a constant integer in a llvm.dbg.value call to track |
| 187 | // the object's value. Here we trust the transformations are doing the |
| 188 | // right thing, and treat the constant as unsigned to preserve that value |
| 189 | // (i.e. avoid sign extension). |
| 190 | return true; |
| 191 | } |
| 192 | |
| 193 | if (auto *SRTy = dyn_cast<DISubrangeType>(Val: Ty)) { |
| 194 | Ty = SRTy->getBaseType(); |
| 195 | if (!Ty) |
| 196 | return false; |
| 197 | } |
| 198 | |
| 199 | if (auto *CTy = dyn_cast<DICompositeType>(Val: Ty)) { |
| 200 | if (CTy->getTag() == dwarf::DW_TAG_enumeration_type) { |
| 201 | if (!(Ty = CTy->getBaseType())) |
| 202 | // FIXME: Enums without a fixed underlying type have unknown signedness |
| 203 | // here, leading to incorrectly emitted constants. |
| 204 | return false; |
| 205 | } else |
| 206 | // (Pieces of) aggregate types that get hacked apart by SROA may be |
| 207 | // represented by a constant. Encode them as unsigned bytes. |
| 208 | return true; |
| 209 | } |
| 210 | |
| 211 | if (auto *DTy = dyn_cast<DIDerivedType>(Val: Ty)) { |
| 212 | dwarf::Tag T = (dwarf::Tag)Ty->getTag(); |
| 213 | // Encode pointer constants as unsigned bytes. This is used at least for |
| 214 | // null pointer constant emission. |
| 215 | // FIXME: reference and rvalue_reference /probably/ shouldn't be allowed |
| 216 | // here, but accept them for now due to a bug in SROA producing bogus |
| 217 | // dbg.values. |
| 218 | if (T == dwarf::DW_TAG_pointer_type || |
| 219 | T == dwarf::DW_TAG_ptr_to_member_type || |
| 220 | T == dwarf::DW_TAG_reference_type || |
| 221 | T == dwarf::DW_TAG_rvalue_reference_type) |
| 222 | return true; |
| 223 | assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type || |
| 224 | T == dwarf::DW_TAG_volatile_type || |
| 225 | T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type || |
| 226 | T == dwarf::DW_TAG_immutable_type || |
| 227 | T == dwarf::DW_TAG_template_alias); |
| 228 | assert(DTy->getBaseType() && "Expected valid base type" ); |
| 229 | return isUnsignedDIType(Ty: DTy->getBaseType()); |
| 230 | } |
| 231 | |
| 232 | auto *BTy = cast<DIBasicType>(Val: Ty); |
| 233 | unsigned Encoding = BTy->getEncoding(); |
| 234 | assert((Encoding == dwarf::DW_ATE_unsigned || |
| 235 | Encoding == dwarf::DW_ATE_unsigned_char || |
| 236 | Encoding == dwarf::DW_ATE_signed || |
| 237 | Encoding == dwarf::DW_ATE_signed_char || |
| 238 | Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF || |
| 239 | Encoding == dwarf::DW_ATE_boolean || |
| 240 | Encoding == dwarf::DW_ATE_complex_float || |
| 241 | Encoding == dwarf::DW_ATE_signed_fixed || |
| 242 | Encoding == dwarf::DW_ATE_unsigned_fixed || |
| 243 | (Encoding >= dwarf::DW_ATE_lo_user && |
| 244 | Encoding <= dwarf::DW_ATE_hi_user) || |
| 245 | (Ty->getTag() == dwarf::DW_TAG_unspecified_type && |
| 246 | Ty->getName() == "decltype(nullptr)" )) && |
| 247 | "Unsupported encoding" ); |
| 248 | return Encoding == dwarf::DW_ATE_unsigned || |
| 249 | Encoding == dwarf::DW_ATE_unsigned_char || |
| 250 | Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean || |
| 251 | Encoding == llvm::dwarf::DW_ATE_unsigned_fixed || |
| 252 | Ty->getTag() == dwarf::DW_TAG_unspecified_type; |
| 253 | } |
| 254 | |
| 255 | static bool hasDebugInfo(const MachineFunction *MF) { |
| 256 | auto *SP = MF->getFunction().getSubprogram(); |
| 257 | if (!SP) |
| 258 | return false; |
| 259 | assert(SP->getUnit()); |
| 260 | auto EK = SP->getUnit()->getEmissionKind(); |
| 261 | if (EK == DICompileUnit::NoDebug) |
| 262 | return false; |
| 263 | return true; |
| 264 | } |
| 265 | |
| 266 | void DebugHandlerBase::beginFunction(const MachineFunction *MF) { |
| 267 | PrevInstBB = nullptr; |
| 268 | |
| 269 | if (!Asm || !hasDebugInfo(MF)) { |
| 270 | skippedNonDebugFunction(); |
| 271 | return; |
| 272 | } |
| 273 | |
| 274 | // Grab the lexical scopes for the function, if we don't have any of those |
| 275 | // then we're not going to be able to do anything. |
| 276 | LScopes.scanFunction(*MF); |
| 277 | if (LScopes.empty()) { |
| 278 | beginFunctionImpl(MF); |
| 279 | return; |
| 280 | } |
| 281 | |
| 282 | // Make sure that each lexical scope will have a begin/end label. |
| 283 | identifyScopeMarkers(); |
| 284 | |
| 285 | // Calculate history for local variables. |
| 286 | assert(DbgValues.empty() && "DbgValues map wasn't cleaned!" ); |
| 287 | assert(DbgLabels.empty() && "DbgLabels map wasn't cleaned!" ); |
| 288 | calculateDbgEntityHistory(MF, TRI: Asm->MF->getSubtarget().getRegisterInfo(), |
| 289 | DbgValues, DbgLabels); |
| 290 | InstOrdering.initialize(MF: *MF); |
| 291 | if (TrimVarLocs) |
| 292 | DbgValues.trimLocationRanges(MF: *MF, LScopes, Ordering: InstOrdering); |
| 293 | LLVM_DEBUG(DbgValues.dump(MF->getName())); |
| 294 | |
| 295 | // Request labels for the full history. |
| 296 | for (const auto &I : DbgValues) { |
| 297 | const auto &Entries = I.second; |
| 298 | if (Entries.empty()) |
| 299 | continue; |
| 300 | |
| 301 | auto IsDescribedByReg = [](const MachineInstr *MI) { |
| 302 | return any_of(Range: MI->debug_operands(), |
| 303 | P: [](auto &MO) { return MO.isReg() && MO.getReg(); }); |
| 304 | }; |
| 305 | |
| 306 | // The first mention of a function argument gets the CurrentFnBegin label, |
| 307 | // so arguments are visible when breaking at function entry. |
| 308 | // |
| 309 | // We do not change the label for values that are described by registers, |
| 310 | // as that could place them above their defining instructions. We should |
| 311 | // ideally not change the labels for constant debug values either, since |
| 312 | // doing that violates the ranges that are calculated in the history map. |
| 313 | // However, we currently do not emit debug values for constant arguments |
| 314 | // directly at the start of the function, so this code is still useful. |
| 315 | const DILocalVariable *DIVar = |
| 316 | Entries.front().getInstr()->getDebugVariable(); |
| 317 | if (DIVar->isParameter() && |
| 318 | getDISubprogram(Scope: DIVar->getScope())->describes(F: &MF->getFunction())) { |
| 319 | if (!IsDescribedByReg(Entries.front().getInstr())) |
| 320 | LabelsBeforeInsn[Entries.front().getInstr()] = Asm->getFunctionBegin(); |
| 321 | if (Entries.front().getInstr()->getDebugExpression()->isFragment()) { |
| 322 | // Mark all non-overlapping initial fragments. |
| 323 | for (const auto *I = Entries.begin(); I != Entries.end(); ++I) { |
| 324 | if (!I->isDbgValue()) |
| 325 | continue; |
| 326 | const DIExpression *Fragment = I->getInstr()->getDebugExpression(); |
| 327 | if (std::any_of(first: Entries.begin(), last: I, |
| 328 | pred: [&](DbgValueHistoryMap::Entry Pred) { |
| 329 | return Pred.isDbgValue() && |
| 330 | Fragment->fragmentsOverlap( |
| 331 | Other: Pred.getInstr()->getDebugExpression()); |
| 332 | })) |
| 333 | break; |
| 334 | // The code that generates location lists for DWARF assumes that the |
| 335 | // entries' start labels are monotonically increasing, and since we |
| 336 | // don't change the label for fragments that are described by |
| 337 | // registers, we must bail out when encountering such a fragment. |
| 338 | if (IsDescribedByReg(I->getInstr())) |
| 339 | break; |
| 340 | LabelsBeforeInsn[I->getInstr()] = Asm->getFunctionBegin(); |
| 341 | } |
| 342 | } |
| 343 | } |
| 344 | |
| 345 | for (const auto &Entry : Entries) { |
| 346 | if (Entry.isDbgValue()) |
| 347 | requestLabelBeforeInsn(MI: Entry.getInstr()); |
| 348 | else |
| 349 | requestLabelAfterInsn(MI: Entry.getInstr()); |
| 350 | } |
| 351 | } |
| 352 | |
| 353 | // Ensure there is a symbol before DBG_LABEL. |
| 354 | for (const auto &I : DbgLabels) { |
| 355 | const MachineInstr *MI = I.second; |
| 356 | requestLabelBeforeInsn(MI); |
| 357 | } |
| 358 | |
| 359 | PrevInstLoc = DebugLoc(); |
| 360 | PrevLabel = Asm->getFunctionBegin(); |
| 361 | beginFunctionImpl(MF); |
| 362 | } |
| 363 | |
| 364 | void DebugHandlerBase::beginInstruction(const MachineInstr *MI) { |
| 365 | if (!Asm || !Asm->hasDebugInfo()) |
| 366 | return; |
| 367 | |
| 368 | assert(CurMI == nullptr); |
| 369 | CurMI = MI; |
| 370 | |
| 371 | // Insert labels where requested. |
| 372 | DenseMap<const MachineInstr *, MCSymbol *>::iterator I = |
| 373 | LabelsBeforeInsn.find(Val: MI); |
| 374 | |
| 375 | // No label needed. |
| 376 | if (I == LabelsBeforeInsn.end()) |
| 377 | return; |
| 378 | |
| 379 | // Label already assigned. |
| 380 | if (I->second) |
| 381 | return; |
| 382 | |
| 383 | if (!PrevLabel) { |
| 384 | PrevLabel = MMI->getContext().createTempSymbol(); |
| 385 | Asm->OutStreamer->emitLabel(Symbol: PrevLabel); |
| 386 | } |
| 387 | I->second = PrevLabel; |
| 388 | } |
| 389 | |
| 390 | void DebugHandlerBase::endInstruction() { |
| 391 | if (!Asm || !Asm->hasDebugInfo()) |
| 392 | return; |
| 393 | |
| 394 | assert(CurMI != nullptr); |
| 395 | // Don't create a new label after DBG_VALUE and other instructions that don't |
| 396 | // generate code. |
| 397 | if (!CurMI->isMetaInstruction()) { |
| 398 | PrevLabel = nullptr; |
| 399 | PrevInstBB = CurMI->getParent(); |
| 400 | } |
| 401 | |
| 402 | DenseMap<const MachineInstr *, MCSymbol *>::iterator I = |
| 403 | LabelsAfterInsn.find(Val: CurMI); |
| 404 | |
| 405 | // No label needed or label already assigned. |
| 406 | if (I == LabelsAfterInsn.end() || I->second) { |
| 407 | CurMI = nullptr; |
| 408 | return; |
| 409 | } |
| 410 | |
| 411 | // We need a label after this instruction. With basic block sections, just |
| 412 | // use the end symbol of the section if this is the last instruction of the |
| 413 | // section. This reduces the need for an additional label and also helps |
| 414 | // merging ranges. |
| 415 | if (CurMI->getParent()->isEndSection() && CurMI->getNextNode() == nullptr) { |
| 416 | PrevLabel = CurMI->getParent()->getEndSymbol(); |
| 417 | } else if (!PrevLabel) { |
| 418 | PrevLabel = MMI->getContext().createTempSymbol(); |
| 419 | Asm->OutStreamer->emitLabel(Symbol: PrevLabel); |
| 420 | } |
| 421 | I->second = PrevLabel; |
| 422 | CurMI = nullptr; |
| 423 | } |
| 424 | |
| 425 | void DebugHandlerBase::endFunction(const MachineFunction *MF) { |
| 426 | if (Asm && hasDebugInfo(MF)) |
| 427 | endFunctionImpl(MF); |
| 428 | DbgValues.clear(); |
| 429 | DbgLabels.clear(); |
| 430 | LabelsBeforeInsn.clear(); |
| 431 | LabelsAfterInsn.clear(); |
| 432 | InstOrdering.clear(); |
| 433 | } |
| 434 | |
| 435 | void DebugHandlerBase::beginBasicBlockSection(const MachineBasicBlock &MBB) { |
| 436 | EpilogBeginBlock = nullptr; |
| 437 | if (!MBB.isEntryBlock()) |
| 438 | PrevLabel = MBB.getSymbol(); |
| 439 | } |
| 440 | |
| 441 | void DebugHandlerBase::endBasicBlockSection(const MachineBasicBlock &MBB) { |
| 442 | PrevLabel = nullptr; |
| 443 | } |
| 444 | |