xref: /aosp_15_r20/external/pytorch/torch/csrc/profiler/unwind/debug_info.h (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #pragma once
2 #include <torch/csrc/profiler/unwind/dwarf_enums.h>
3 #include <torch/csrc/profiler/unwind/dwarf_symbolize_enums.h>
4 #include <torch/csrc/profiler/unwind/lexer.h>
5 #include <torch/csrc/profiler/unwind/sections.h>
6 #include <torch/csrc/profiler/unwind/unwind_error.h>
7 #include <cstdint>
8 #include <optional>
9 
10 namespace torch::unwind {
11 
12 struct DebugInfo {
DebugInfoDebugInfo13   DebugInfo(Sections& s) : s_(s) {}
14 
parseDebugInfo15   void parse(uint64_t offset) {
16     auto L = parseHeader(offset);
17     parseCompileUnit(L);
18   }
lineNumberProgramOffsetDebugInfo19   std::optional<uint64_t> lineNumberProgramOffset() {
20     return line_number_program_offset_;
21   }
nextOffsetDebugInfo22   uint64_t nextOffset() {
23     return end_ - s_.debug_info.data;
24   }
rangesDebugInfo25   std::vector<std::pair<uint64_t, uint64_t>> ranges() {
26     if (range_ptr_) {
27       auto offset = range_ptr_->first;
28       if (range_ptr_->second == DW_FORM_rnglistx) {
29         UNWIND_CHECK(rnglists_base_, "rnglistx but not rnglists_base_ set");
30         LOG_INFO("index for rnglistx {:x} + {:x}\n", *rnglists_base_, offset);
31         CheckedLexer L = s_.debug_rnglists.lexer(
32             *rnglists_base_ + offset * sec_offset_size_);
33         auto read = readSegmentOffset(L);
34         offset = *rnglists_base_ + read;
35       }
36       return version_ == 4 ? readRanges4(offset) : readRanges5(offset);
37     }
38     if (!highpc_) {
39       return {};
40     }
41     return {{lowpc_, lowpc_ + *highpc_}};
42   }
43 
is64bitDebugInfo44   bool is64bit() {
45     return is_64bit_;
46   }
47 
48  private:
parseHeaderDebugInfo49   CheckedLexer parseHeader(uint64_t offset) {
50     offset_ = offset;
51     CheckedLexer L = s_.debug_info.lexer(offset_);
52     std::tie(length_, is_64bit_) = L.readSectionLength();
53     sec_offset_size_ = is_64bit_ ? 8 : 4;
54     end_ = (const char*)L.loc() + length_;
55     version_ = L.read<uint16_t>();
56     UNWIND_CHECK(
57         version_ == 5 || version_ == 4,
58         "unexpected dwarf version {}",
59         version_);
60     uint8_t address_size = 0;
61     if (version_ == 5) {
62       auto unit_type = L.read<uint8_t>();
63       UNWIND_CHECK(unit_type == 0x1, "unexpected unit type {}", unit_type);
64       address_size = L.read<uint8_t>();
65       debug_abbrev_offset_ =
66           is_64bit_ ? L.read<uint64_t>() : L.read<uint32_t>();
67     } else {
68       debug_abbrev_offset_ =
69           is_64bit_ ? L.read<uint64_t>() : L.read<uint32_t>();
70       address_size = L.read<uint8_t>();
71     }
72     LOG_INFO(
73         "compilation unit at offset {:x} with length {:x} and debug_abbrev_offset {:x}\n",
74         offset,
75         length_,
76         debug_abbrev_offset_);
77     UNWIND_CHECK(
78         address_size == 8,
79         "expected 64-bit dwarf but found address size {}",
80         address_size);
81     return L;
82   }
83 
readSegmentOffsetDebugInfo84   uint64_t readSegmentOffset(CheckedLexer& L) {
85     return s_.readSegmentOffset(L, is_64bit_);
86   }
87 
readEncodedDebugInfo88   uint64_t readEncoded(CheckedLexer& L, uint64_t encoding) {
89     switch (encoding) {
90       case DW_FORM_data8:
91       case DW_FORM_addr:
92         return L.read<uint64_t>();
93       case DW_FORM_data4:
94         return L.read<uint32_t>();
95       case DW_FORM_addrx: {
96         auto idx = L.readULEB128();
97         return s_.debug_addr.lexer(address_base_ + sizeof(uint64_t) * idx)
98             .read<uint64_t>();
99       }
100       case DW_FORM_sec_offset:
101         return readSegmentOffset(L);
102       case DW_FORM_rnglistx: {
103         return L.readULEB128();
104       }
105       default:
106         UNWIND_CHECK(false, "unexpected encoding");
107     }
108   }
109 
parseCompileUnitDebugInfo110   void parseCompileUnit(CheckedLexer& L) {
111     auto entry = L.readULEB128();
112     auto A = findAbbrev(debug_abbrev_offset_, entry);
113     while (true) {
114       auto attr = A.readULEB128();
115       auto form = A.readULEB128();
116       if (attr == 0 && form == 0) {
117         break;
118       }
119       if (form == DW_FORM_implicit_const) {
120         A.readSLEB128();
121       }
122       if (attr == DW_AT_low_pc) {
123         lowpc_ = readEncoded(L, form);
124         LOG_INFO("  lowpc {:x}\n", lowpc_);
125       } else if (attr == DW_AT_high_pc) {
126         highpc_ = readEncoded(L, form);
127         range_ptr_ = std::nullopt;
128         LOG_INFO("  highpc {:x}\n", *highpc_);
129       } else if (attr == DW_AT_addr_base) {
130         UNWIND_CHECK(form == DW_FORM_sec_offset, "unexpected addr_base form");
131         address_base_ = readSegmentOffset(L);
132         LOG_INFO("  address base {:x}\n", address_base_);
133       } else if (attr == DW_AT_rnglists_base) {
134         UNWIND_CHECK(
135             form == DW_FORM_sec_offset, "unexpected rnglists_base form");
136         rnglists_base_ = readSegmentOffset(L);
137         LOG_INFO("  range base {:x}\n", *rnglists_base_);
138       } else if (form == DW_FORM_string) {
139         L.readCString();
140       } else if (attr == DW_AT_stmt_list) {
141         UNWIND_CHECK(form == DW_FORM_sec_offset, "unexpected stmt_list form");
142         LOG_INFO("  program table offset {:x}\n", *line_number_program_offset_);
143         line_number_program_offset_ = readSegmentOffset(L);
144       } else if (form == DW_FORM_exprloc) {
145         auto sz = L.readULEB128();
146         L.skip(int64_t(sz));
147       } else if (form == DW_FORM_block1) {
148         auto sz = L.read<uint8_t>();
149         L.skip(int64_t(sz));
150       } else if (attr == DW_AT_ranges) {
151         auto range_offset = readEncoded(L, form);
152         LOG_INFO("setting range_ptr to {:x} {:x}\n", range_offset, form);
153         range_ptr_.emplace(range_offset, form);
154       } else if (
155           form == DW_FORM_udata || form == DW_FORM_rnglistx ||
156           form == DW_FORM_strx || form == DW_FORM_loclistx ||
157           form == DW_FORM_addrx) {
158         L.readULEB128();
159       } else if (form == DW_FORM_sdata) {
160         L.readSLEB128();
161       } else {
162         auto sz = formSize(form, sec_offset_size_);
163         UNWIND_CHECK(sz, "unsupported form in compilation unit {:x}", form);
164         L.skip(int64_t(*sz));
165       }
166     }
167   }
168 
readRanges4DebugInfo169   std::vector<std::pair<uint64_t, uint64_t>> readRanges4(uint64_t offset) {
170     CheckedLexer L = s_.debug_ranges.lexer(offset);
171     std::vector<std::pair<uint64_t, uint64_t>> ranges;
172     uint64_t base = lowpc_;
173     while (true) {
174       auto start = L.read<uint64_t>();
175       auto end = L.read<uint64_t>();
176       if (start == 0 && end == 0) {
177         break;
178       }
179       if (start == std::numeric_limits<uint64_t>::max()) {
180         base = end;
181       } else {
182         ranges.emplace_back(base + start, base + end);
183       }
184     }
185     return ranges;
186   }
187 
readRanges5DebugInfo188   std::vector<std::pair<uint64_t, uint64_t>> readRanges5(uint64_t offset) {
189     CheckedLexer L = s_.debug_rnglists.lexer(offset);
190     uint64_t base = 0;
191     LOG_INFO("BEGIN RANGES {:x}\n", offset);
192     std::vector<std::pair<uint64_t, uint64_t>> ranges;
193     while (true) {
194       auto op = L.read<uint8_t>();
195       switch (op) {
196         case DW_RLE_end_of_list:
197           LOG_INFO("END RANGES\n");
198           return ranges;
199         case DW_RLE_base_addressx: {
200           base = readEncoded(L, DW_FORM_addrx);
201           LOG_INFO("BASE ADDRX {:x}\n", base);
202         } break;
203         case DW_RLE_startx_length: {
204           auto s = readEncoded(L, DW_FORM_addrx);
205           auto e = L.readULEB128();
206           LOG_INFO("startx_length {:x} {:x}\n", s, e);
207           ranges.emplace_back(s, s + e);
208         } break;
209         case DW_RLE_base_address:
210           base = L.read<uint64_t>();
211           LOG_INFO("BASE ADDR {:x}\n", base);
212           break;
213         case DW_RLE_offset_pair: {
214           auto s = L.readULEB128();
215           auto e = L.readULEB128();
216           LOG_INFO("offset_pair {:x} {:x}\n", s, e);
217           ranges.emplace_back(base + s, base + e);
218         } break;
219         case DW_RLE_start_length: {
220           auto s = L.read<uint64_t>();
221           auto e = L.readULEB128();
222           LOG_INFO("start_length {:x} {:x}\n", s, e);
223           ranges.emplace_back(s, s + e);
224         } break;
225         default:
226           UNWIND_CHECK(false, "unknown range op: {}", op);
227       }
228     }
229   }
230 
findAbbrevDebugInfo231   CheckedLexer findAbbrev(uint64_t offset, uint64_t entry) {
232     CheckedLexer L = s_.debug_abbrev.lexer(offset);
233     while (true) {
234       auto abbrev_code = L.readULEB128();
235       UNWIND_CHECK(
236           abbrev_code != 0,
237           "could not find entry {} at offset {:x}",
238           entry,
239           offset);
240       auto tag = L.readULEB128();
241       L.read<uint8_t>(); // has children
242       if (abbrev_code == entry) {
243         UNWIND_CHECK(
244             tag == DW_TAG_compile_unit,
245             "first entry was not a compile unit but {}",
246             tag);
247         return L;
248       }
249       while (true) {
250         auto attr = L.readULEB128();
251         auto form = L.readULEB128();
252         if (attr == 0 && form == 0) {
253           break;
254         }
255         if (form == DW_FORM_implicit_const) {
256           L.readSLEB128();
257         }
258       }
259     }
260   }
261 
262   Sections& s_;
263   std::optional<uint64_t> line_number_program_offset_;
264   uint64_t offset_ = 0;
265   uint8_t sec_offset_size_ = 0;
266   uint64_t length_ = 0;
267   const char* end_ = nullptr;
268   uint64_t debug_abbrev_offset_ = 0;
269   bool is_64bit_ = false;
270 
271   std::optional<std::pair<uint64_t, uint8_t>> range_ptr_;
272   uint64_t lowpc_ = 0;
273   std::optional<uint64_t> highpc_;
274   uint16_t version_ = 0;
275   uint64_t address_base_ = 0;
276   std::optional<uint64_t> rnglists_base_;
277 };
278 
279 } // namespace torch::unwind
280