1 #pragma once 2 #include <torch/csrc/profiler/unwind/dwarf_enums.h> 3 #include <torch/csrc/profiler/unwind/dwarf_symbolize_enums.h> 4 #include <torch/csrc/profiler/unwind/lexer.h> 5 #include <torch/csrc/profiler/unwind/sections.h> 6 #include <torch/csrc/profiler/unwind/unwind_error.h> 7 #include <cstdint> 8 #include <optional> 9 10 namespace torch::unwind { 11 12 struct DebugInfo { DebugInfoDebugInfo13 DebugInfo(Sections& s) : s_(s) {} 14 parseDebugInfo15 void parse(uint64_t offset) { 16 auto L = parseHeader(offset); 17 parseCompileUnit(L); 18 } lineNumberProgramOffsetDebugInfo19 std::optional<uint64_t> lineNumberProgramOffset() { 20 return line_number_program_offset_; 21 } nextOffsetDebugInfo22 uint64_t nextOffset() { 23 return end_ - s_.debug_info.data; 24 } rangesDebugInfo25 std::vector<std::pair<uint64_t, uint64_t>> ranges() { 26 if (range_ptr_) { 27 auto offset = range_ptr_->first; 28 if (range_ptr_->second == DW_FORM_rnglistx) { 29 UNWIND_CHECK(rnglists_base_, "rnglistx but not rnglists_base_ set"); 30 LOG_INFO("index for rnglistx {:x} + {:x}\n", *rnglists_base_, offset); 31 CheckedLexer L = s_.debug_rnglists.lexer( 32 *rnglists_base_ + offset * sec_offset_size_); 33 auto read = readSegmentOffset(L); 34 offset = *rnglists_base_ + read; 35 } 36 return version_ == 4 ? readRanges4(offset) : readRanges5(offset); 37 } 38 if (!highpc_) { 39 return {}; 40 } 41 return {{lowpc_, lowpc_ + *highpc_}}; 42 } 43 is64bitDebugInfo44 bool is64bit() { 45 return is_64bit_; 46 } 47 48 private: parseHeaderDebugInfo49 CheckedLexer parseHeader(uint64_t offset) { 50 offset_ = offset; 51 CheckedLexer L = s_.debug_info.lexer(offset_); 52 std::tie(length_, is_64bit_) = L.readSectionLength(); 53 sec_offset_size_ = is_64bit_ ? 8 : 4; 54 end_ = (const char*)L.loc() + length_; 55 version_ = L.read<uint16_t>(); 56 UNWIND_CHECK( 57 version_ == 5 || version_ == 4, 58 "unexpected dwarf version {}", 59 version_); 60 uint8_t address_size = 0; 61 if (version_ == 5) { 62 auto unit_type = L.read<uint8_t>(); 63 UNWIND_CHECK(unit_type == 0x1, "unexpected unit type {}", unit_type); 64 address_size = L.read<uint8_t>(); 65 debug_abbrev_offset_ = 66 is_64bit_ ? L.read<uint64_t>() : L.read<uint32_t>(); 67 } else { 68 debug_abbrev_offset_ = 69 is_64bit_ ? L.read<uint64_t>() : L.read<uint32_t>(); 70 address_size = L.read<uint8_t>(); 71 } 72 LOG_INFO( 73 "compilation unit at offset {:x} with length {:x} and debug_abbrev_offset {:x}\n", 74 offset, 75 length_, 76 debug_abbrev_offset_); 77 UNWIND_CHECK( 78 address_size == 8, 79 "expected 64-bit dwarf but found address size {}", 80 address_size); 81 return L; 82 } 83 readSegmentOffsetDebugInfo84 uint64_t readSegmentOffset(CheckedLexer& L) { 85 return s_.readSegmentOffset(L, is_64bit_); 86 } 87 readEncodedDebugInfo88 uint64_t readEncoded(CheckedLexer& L, uint64_t encoding) { 89 switch (encoding) { 90 case DW_FORM_data8: 91 case DW_FORM_addr: 92 return L.read<uint64_t>(); 93 case DW_FORM_data4: 94 return L.read<uint32_t>(); 95 case DW_FORM_addrx: { 96 auto idx = L.readULEB128(); 97 return s_.debug_addr.lexer(address_base_ + sizeof(uint64_t) * idx) 98 .read<uint64_t>(); 99 } 100 case DW_FORM_sec_offset: 101 return readSegmentOffset(L); 102 case DW_FORM_rnglistx: { 103 return L.readULEB128(); 104 } 105 default: 106 UNWIND_CHECK(false, "unexpected encoding"); 107 } 108 } 109 parseCompileUnitDebugInfo110 void parseCompileUnit(CheckedLexer& L) { 111 auto entry = L.readULEB128(); 112 auto A = findAbbrev(debug_abbrev_offset_, entry); 113 while (true) { 114 auto attr = A.readULEB128(); 115 auto form = A.readULEB128(); 116 if (attr == 0 && form == 0) { 117 break; 118 } 119 if (form == DW_FORM_implicit_const) { 120 A.readSLEB128(); 121 } 122 if (attr == DW_AT_low_pc) { 123 lowpc_ = readEncoded(L, form); 124 LOG_INFO(" lowpc {:x}\n", lowpc_); 125 } else if (attr == DW_AT_high_pc) { 126 highpc_ = readEncoded(L, form); 127 range_ptr_ = std::nullopt; 128 LOG_INFO(" highpc {:x}\n", *highpc_); 129 } else if (attr == DW_AT_addr_base) { 130 UNWIND_CHECK(form == DW_FORM_sec_offset, "unexpected addr_base form"); 131 address_base_ = readSegmentOffset(L); 132 LOG_INFO(" address base {:x}\n", address_base_); 133 } else if (attr == DW_AT_rnglists_base) { 134 UNWIND_CHECK( 135 form == DW_FORM_sec_offset, "unexpected rnglists_base form"); 136 rnglists_base_ = readSegmentOffset(L); 137 LOG_INFO(" range base {:x}\n", *rnglists_base_); 138 } else if (form == DW_FORM_string) { 139 L.readCString(); 140 } else if (attr == DW_AT_stmt_list) { 141 UNWIND_CHECK(form == DW_FORM_sec_offset, "unexpected stmt_list form"); 142 LOG_INFO(" program table offset {:x}\n", *line_number_program_offset_); 143 line_number_program_offset_ = readSegmentOffset(L); 144 } else if (form == DW_FORM_exprloc) { 145 auto sz = L.readULEB128(); 146 L.skip(int64_t(sz)); 147 } else if (form == DW_FORM_block1) { 148 auto sz = L.read<uint8_t>(); 149 L.skip(int64_t(sz)); 150 } else if (attr == DW_AT_ranges) { 151 auto range_offset = readEncoded(L, form); 152 LOG_INFO("setting range_ptr to {:x} {:x}\n", range_offset, form); 153 range_ptr_.emplace(range_offset, form); 154 } else if ( 155 form == DW_FORM_udata || form == DW_FORM_rnglistx || 156 form == DW_FORM_strx || form == DW_FORM_loclistx || 157 form == DW_FORM_addrx) { 158 L.readULEB128(); 159 } else if (form == DW_FORM_sdata) { 160 L.readSLEB128(); 161 } else { 162 auto sz = formSize(form, sec_offset_size_); 163 UNWIND_CHECK(sz, "unsupported form in compilation unit {:x}", form); 164 L.skip(int64_t(*sz)); 165 } 166 } 167 } 168 readRanges4DebugInfo169 std::vector<std::pair<uint64_t, uint64_t>> readRanges4(uint64_t offset) { 170 CheckedLexer L = s_.debug_ranges.lexer(offset); 171 std::vector<std::pair<uint64_t, uint64_t>> ranges; 172 uint64_t base = lowpc_; 173 while (true) { 174 auto start = L.read<uint64_t>(); 175 auto end = L.read<uint64_t>(); 176 if (start == 0 && end == 0) { 177 break; 178 } 179 if (start == std::numeric_limits<uint64_t>::max()) { 180 base = end; 181 } else { 182 ranges.emplace_back(base + start, base + end); 183 } 184 } 185 return ranges; 186 } 187 readRanges5DebugInfo188 std::vector<std::pair<uint64_t, uint64_t>> readRanges5(uint64_t offset) { 189 CheckedLexer L = s_.debug_rnglists.lexer(offset); 190 uint64_t base = 0; 191 LOG_INFO("BEGIN RANGES {:x}\n", offset); 192 std::vector<std::pair<uint64_t, uint64_t>> ranges; 193 while (true) { 194 auto op = L.read<uint8_t>(); 195 switch (op) { 196 case DW_RLE_end_of_list: 197 LOG_INFO("END RANGES\n"); 198 return ranges; 199 case DW_RLE_base_addressx: { 200 base = readEncoded(L, DW_FORM_addrx); 201 LOG_INFO("BASE ADDRX {:x}\n", base); 202 } break; 203 case DW_RLE_startx_length: { 204 auto s = readEncoded(L, DW_FORM_addrx); 205 auto e = L.readULEB128(); 206 LOG_INFO("startx_length {:x} {:x}\n", s, e); 207 ranges.emplace_back(s, s + e); 208 } break; 209 case DW_RLE_base_address: 210 base = L.read<uint64_t>(); 211 LOG_INFO("BASE ADDR {:x}\n", base); 212 break; 213 case DW_RLE_offset_pair: { 214 auto s = L.readULEB128(); 215 auto e = L.readULEB128(); 216 LOG_INFO("offset_pair {:x} {:x}\n", s, e); 217 ranges.emplace_back(base + s, base + e); 218 } break; 219 case DW_RLE_start_length: { 220 auto s = L.read<uint64_t>(); 221 auto e = L.readULEB128(); 222 LOG_INFO("start_length {:x} {:x}\n", s, e); 223 ranges.emplace_back(s, s + e); 224 } break; 225 default: 226 UNWIND_CHECK(false, "unknown range op: {}", op); 227 } 228 } 229 } 230 findAbbrevDebugInfo231 CheckedLexer findAbbrev(uint64_t offset, uint64_t entry) { 232 CheckedLexer L = s_.debug_abbrev.lexer(offset); 233 while (true) { 234 auto abbrev_code = L.readULEB128(); 235 UNWIND_CHECK( 236 abbrev_code != 0, 237 "could not find entry {} at offset {:x}", 238 entry, 239 offset); 240 auto tag = L.readULEB128(); 241 L.read<uint8_t>(); // has children 242 if (abbrev_code == entry) { 243 UNWIND_CHECK( 244 tag == DW_TAG_compile_unit, 245 "first entry was not a compile unit but {}", 246 tag); 247 return L; 248 } 249 while (true) { 250 auto attr = L.readULEB128(); 251 auto form = L.readULEB128(); 252 if (attr == 0 && form == 0) { 253 break; 254 } 255 if (form == DW_FORM_implicit_const) { 256 L.readSLEB128(); 257 } 258 } 259 } 260 } 261 262 Sections& s_; 263 std::optional<uint64_t> line_number_program_offset_; 264 uint64_t offset_ = 0; 265 uint8_t sec_offset_size_ = 0; 266 uint64_t length_ = 0; 267 const char* end_ = nullptr; 268 uint64_t debug_abbrev_offset_ = 0; 269 bool is_64bit_ = false; 270 271 std::optional<std::pair<uint64_t, uint8_t>> range_ptr_; 272 uint64_t lowpc_ = 0; 273 std::optional<uint64_t> highpc_; 274 uint16_t version_ = 0; 275 uint64_t address_base_ = 0; 276 std::optional<uint64_t> rnglists_base_; 277 }; 278 279 } // namespace torch::unwind 280