1 // Copyright 2010 Google LLC
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 // * Neither the name of Google LLC nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 // CFI reader author: Jim Blandy <[email protected]> <[email protected]>
30
31 // Implementation of LineInfo, CompilationUnit,
32 // and CallFrameInfo. See dwarf2reader.h for details.
33
34 #ifdef HAVE_CONFIG_H
35 #include <config.h> // Must come first
36 #endif
37
38 #include "common/dwarf/dwarf2reader.h"
39
40 #include <stdint.h>
41 #include <stdio.h>
42 #include <string.h>
43
44 #include <algorithm>
45 #include <map>
46 #include <memory>
47 #include <stack>
48 #include <string>
49 #include <utility>
50
51 #include <sys/stat.h>
52
53 #include "common/dwarf/bytereader-inl.h"
54 #include "common/dwarf/bytereader.h"
55 #include "common/dwarf/line_state_machine.h"
56 #include "common/using_std_string.h"
57 #include "google_breakpad/common/breakpad_types.h"
58
59 namespace google_breakpad {
60
GetSectionByName(const SectionMap & sections,const char * name)61 const SectionMap::const_iterator GetSectionByName(const SectionMap&
62 sections, const char *name) {
63 assert(name[0] == '.');
64 auto iter = sections.find(name);
65 if (iter != sections.end())
66 return iter;
67 std::string macho_name("__");
68 macho_name += name + 1;
69 iter = sections.find(macho_name);
70 return iter;
71 }
72
CompilationUnit(const string & path,const SectionMap & sections,uint64_t offset,ByteReader * reader,Dwarf2Handler * handler)73 CompilationUnit::CompilationUnit(const string& path,
74 const SectionMap& sections, uint64_t offset,
75 ByteReader* reader, Dwarf2Handler* handler)
76 : path_(path), offset_from_section_start_(offset), reader_(reader),
77 sections_(sections), handler_(handler), abbrevs_(),
78 string_buffer_(NULL), string_buffer_length_(0),
79 line_string_buffer_(NULL), line_string_buffer_length_(0),
80 str_offsets_buffer_(NULL), str_offsets_buffer_length_(0),
81 addr_buffer_(NULL), addr_buffer_length_(0),
82 is_split_dwarf_(false), is_type_unit_(false), dwo_id_(0), dwo_name_(),
83 skeleton_dwo_id_(0), addr_base_(0),
84 str_offsets_base_(0), have_checked_for_dwp_(false),
85 should_process_split_dwarf_(false), low_pc_(0),
86 has_source_line_info_(false), source_line_offset_(0) {}
87
88 // Initialize a compilation unit from a .dwo or .dwp file.
89 // In this case, we need the .debug_addr section from the
90 // executable file that contains the corresponding skeleton
91 // compilation unit. We also inherit the Dwarf2Handler from
92 // the executable file, and call it as if we were still
93 // processing the original compilation unit.
94
SetSplitDwarf(uint64_t addr_base,uint64_t dwo_id)95 void CompilationUnit::SetSplitDwarf(uint64_t addr_base,
96 uint64_t dwo_id) {
97 is_split_dwarf_ = true;
98 addr_base_ = addr_base;
99 skeleton_dwo_id_ = dwo_id;
100 }
101
102 // Read a DWARF2/3 abbreviation section.
103 // Each abbrev consists of a abbreviation number, a tag, a byte
104 // specifying whether the tag has children, and a list of
105 // attribute/form pairs.
106 // The list of forms is terminated by a 0 for the attribute, and a
107 // zero for the form. The entire abbreviation section is terminated
108 // by a zero for the code.
109
ReadAbbrevs()110 void CompilationUnit::ReadAbbrevs() {
111 if (abbrevs_)
112 return;
113
114 // First get the debug_abbrev section.
115 SectionMap::const_iterator iter =
116 GetSectionByName(sections_, ".debug_abbrev");
117 assert(iter != sections_.end());
118
119 abbrevs_ = new std::vector<Abbrev>;
120 abbrevs_->resize(1);
121
122 // The only way to check whether we are reading over the end of the
123 // buffer would be to first compute the size of the leb128 data by
124 // reading it, then go back and read it again.
125 const uint8_t* abbrev_start = iter->second.first +
126 header_.abbrev_offset;
127 const uint8_t* abbrevptr = abbrev_start;
128 #ifndef NDEBUG
129 const uint64_t abbrev_length = iter->second.second - header_.abbrev_offset;
130 #endif
131
132 uint64_t highest_number = 0;
133
134 while (1) {
135 CompilationUnit::Abbrev abbrev;
136 size_t len;
137 const uint64_t number = reader_->ReadUnsignedLEB128(abbrevptr, &len);
138 highest_number = std::max(highest_number, number);
139
140 if (number == 0)
141 break;
142 abbrev.number = number;
143 abbrevptr += len;
144
145 assert(abbrevptr < abbrev_start + abbrev_length);
146 const uint64_t tag = reader_->ReadUnsignedLEB128(abbrevptr, &len);
147 abbrevptr += len;
148 abbrev.tag = static_cast<enum DwarfTag>(tag);
149
150 assert(abbrevptr < abbrev_start + abbrev_length);
151 abbrev.has_children = reader_->ReadOneByte(abbrevptr);
152 abbrevptr += 1;
153
154 assert(abbrevptr < abbrev_start + abbrev_length);
155
156 while (1) {
157 const uint64_t nametemp = reader_->ReadUnsignedLEB128(abbrevptr, &len);
158 abbrevptr += len;
159
160 assert(abbrevptr < abbrev_start + abbrev_length);
161 const uint64_t formtemp = reader_->ReadUnsignedLEB128(abbrevptr, &len);
162 abbrevptr += len;
163 if (nametemp == 0 && formtemp == 0)
164 break;
165
166 uint64_t value = 0;
167 if (formtemp == DW_FORM_implicit_const) {
168 value = reader_->ReadUnsignedLEB128(abbrevptr, &len);
169 abbrevptr += len;
170 }
171 AttrForm abbrev_attr(static_cast<enum DwarfAttribute>(nametemp),
172 static_cast<enum DwarfForm>(formtemp),
173 value);
174 abbrev.attributes.push_back(abbrev_attr);
175 }
176 abbrevs_->push_back(abbrev);
177 }
178
179 // Account of cases where entries are out of order.
180 std::sort(abbrevs_->begin(), abbrevs_->end(),
181 [](const CompilationUnit::Abbrev& lhs, const CompilationUnit::Abbrev& rhs) {
182 return lhs.number < rhs.number;
183 });
184
185 // Ensure that there are no missing sections.
186 assert(abbrevs_->size() == highest_number + 1);
187 }
188
189 // Skips a single DIE's attributes.
SkipDIE(const uint8_t * start,const Abbrev & abbrev)190 const uint8_t* CompilationUnit::SkipDIE(const uint8_t* start,
191 const Abbrev& abbrev) {
192 for (AttributeList::const_iterator i = abbrev.attributes.begin();
193 i != abbrev.attributes.end();
194 i++) {
195 start = SkipAttribute(start, i->form_);
196 }
197 return start;
198 }
199
200 // Skips a single attribute form's data.
SkipAttribute(const uint8_t * start,enum DwarfForm form)201 const uint8_t* CompilationUnit::SkipAttribute(const uint8_t* start,
202 enum DwarfForm form) {
203 size_t len;
204
205 switch (form) {
206 case DW_FORM_indirect:
207 form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
208 &len));
209 start += len;
210 return SkipAttribute(start, form);
211
212 case DW_FORM_flag_present:
213 case DW_FORM_implicit_const:
214 return start;
215 case DW_FORM_addrx1:
216 case DW_FORM_data1:
217 case DW_FORM_flag:
218 case DW_FORM_ref1:
219 case DW_FORM_strx1:
220 return start + 1;
221 case DW_FORM_addrx2:
222 case DW_FORM_ref2:
223 case DW_FORM_data2:
224 case DW_FORM_strx2:
225 return start + 2;
226 case DW_FORM_addrx3:
227 case DW_FORM_strx3:
228 return start + 3;
229 case DW_FORM_addrx4:
230 case DW_FORM_ref4:
231 case DW_FORM_data4:
232 case DW_FORM_strx4:
233 case DW_FORM_ref_sup4:
234 return start + 4;
235 case DW_FORM_ref8:
236 case DW_FORM_data8:
237 case DW_FORM_ref_sig8:
238 case DW_FORM_ref_sup8:
239 return start + 8;
240 case DW_FORM_data16:
241 return start + 16;
242 case DW_FORM_string:
243 return start + strlen(reinterpret_cast<const char*>(start)) + 1;
244 case DW_FORM_udata:
245 case DW_FORM_ref_udata:
246 case DW_FORM_strx:
247 case DW_FORM_GNU_str_index:
248 case DW_FORM_GNU_addr_index:
249 case DW_FORM_addrx:
250 case DW_FORM_rnglistx:
251 case DW_FORM_loclistx:
252 reader_->ReadUnsignedLEB128(start, &len);
253 return start + len;
254
255 case DW_FORM_sdata:
256 reader_->ReadSignedLEB128(start, &len);
257 return start + len;
258 case DW_FORM_addr:
259 return start + reader_->AddressSize();
260 case DW_FORM_ref_addr:
261 // DWARF2 and 3/4 differ on whether ref_addr is address size or
262 // offset size.
263 assert(header_.version >= 2);
264 if (header_.version == 2) {
265 return start + reader_->AddressSize();
266 } else if (header_.version >= 3) {
267 return start + reader_->OffsetSize();
268 }
269 break;
270
271 case DW_FORM_block1:
272 return start + 1 + reader_->ReadOneByte(start);
273 case DW_FORM_block2:
274 return start + 2 + reader_->ReadTwoBytes(start);
275 case DW_FORM_block4:
276 return start + 4 + reader_->ReadFourBytes(start);
277 case DW_FORM_block:
278 case DW_FORM_exprloc: {
279 uint64_t size = reader_->ReadUnsignedLEB128(start, &len);
280 return start + size + len;
281 }
282 case DW_FORM_strp:
283 case DW_FORM_line_strp:
284 case DW_FORM_strp_sup:
285 case DW_FORM_sec_offset:
286 return start + reader_->OffsetSize();
287 }
288 fprintf(stderr,"Unhandled form type");
289 return NULL;
290 }
291
292 // Read the abbreviation offset from a compilation unit header.
ReadAbbrevOffset(const uint8_t * headerptr)293 size_t CompilationUnit::ReadAbbrevOffset(const uint8_t* headerptr) {
294 assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_);
295 header_.abbrev_offset = reader_->ReadOffset(headerptr);
296 return reader_->OffsetSize();
297 }
298
299 // Read the address size from a compilation unit header.
ReadAddressSize(const uint8_t * headerptr)300 size_t CompilationUnit::ReadAddressSize(const uint8_t* headerptr) {
301 // Compare against less than or equal because this may be the last
302 // section in the file.
303 assert(headerptr + 1 <= buffer_ + buffer_length_);
304 header_.address_size = reader_->ReadOneByte(headerptr);
305 reader_->SetAddressSize(header_.address_size);
306 return 1;
307 }
308
309 // Read the DWO id from a split or skeleton compilation unit header.
ReadDwoId(const uint8_t * headerptr)310 size_t CompilationUnit::ReadDwoId(const uint8_t* headerptr) {
311 assert(headerptr + 8 <= buffer_ + buffer_length_);
312 dwo_id_ = reader_->ReadEightBytes(headerptr);
313 return 8;
314 }
315
316 // Read the type signature from a type or split type compilation unit header.
ReadTypeSignature(const uint8_t * headerptr)317 size_t CompilationUnit::ReadTypeSignature(const uint8_t* headerptr) {
318 assert(headerptr + 8 <= buffer_ + buffer_length_);
319 type_signature_ = reader_->ReadEightBytes(headerptr);
320 return 8;
321 }
322
323 // Read the DWO id from a split or skeleton compilation unit header.
ReadTypeOffset(const uint8_t * headerptr)324 size_t CompilationUnit::ReadTypeOffset(const uint8_t* headerptr) {
325 assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_);
326 type_offset_ = reader_->ReadOffset(headerptr);
327 return reader_->OffsetSize();
328 }
329
330
331 // Read a DWARF header. The header is variable length in DWARF3 and DWARF4
332 // (and DWARF2 as extended by most compilers), and consists of an length
333 // field, a version number, the offset in the .debug_abbrev section for our
334 // abbrevs, and an address size. DWARF5 adds a unit_type to distinguish
335 // between partial-, full-, skeleton-, split-, and type- compilation units.
ReadHeader()336 void CompilationUnit::ReadHeader() {
337 const uint8_t* headerptr = buffer_;
338 size_t initial_length_size;
339
340 assert(headerptr + 4 < buffer_ + buffer_length_);
341 const uint64_t initial_length
342 = reader_->ReadInitialLength(headerptr, &initial_length_size);
343 headerptr += initial_length_size;
344 header_.length = initial_length;
345
346 assert(headerptr + 2 < buffer_ + buffer_length_);
347 header_.version = reader_->ReadTwoBytes(headerptr);
348 headerptr += 2;
349
350 if (header_.version <= 4) {
351 // Older versions of dwarf have a relatively simple structure.
352 headerptr += ReadAbbrevOffset(headerptr);
353 headerptr += ReadAddressSize(headerptr);
354 } else {
355 // DWARF5 adds a unit_type field, and various fields based on unit_type.
356 assert(headerptr + 1 < buffer_ + buffer_length_);
357 uint8_t unit_type = reader_->ReadOneByte(headerptr);
358 headerptr += 1;
359 headerptr += ReadAddressSize(headerptr);
360 headerptr += ReadAbbrevOffset(headerptr);
361 switch (unit_type) {
362 case DW_UT_compile:
363 case DW_UT_partial:
364 // nothing else to read
365 break;
366 case DW_UT_skeleton:
367 case DW_UT_split_compile:
368 headerptr += ReadDwoId(headerptr);
369 break;
370 case DW_UT_type:
371 case DW_UT_split_type:
372 is_type_unit_ = true;
373 headerptr += ReadTypeSignature(headerptr);
374 headerptr += ReadTypeOffset(headerptr);
375 break;
376 default:
377 fprintf(stderr, "Unhandled compilation unit type 0x%x", unit_type);
378 break;
379 }
380 }
381 after_header_ = headerptr;
382
383 // This check ensures that we don't have to do checking during the
384 // reading of DIEs. header_.length does not include the size of the
385 // initial length.
386 assert(buffer_ + initial_length_size + header_.length <=
387 buffer_ + buffer_length_);
388 }
389
Start()390 uint64_t CompilationUnit::Start() {
391 // First get the debug_info section.
392 SectionMap::const_iterator iter =
393 GetSectionByName(sections_, ".debug_info");
394 assert(iter != sections_.end());
395
396 // Set up our buffer
397 buffer_ = iter->second.first + offset_from_section_start_;
398 if (is_split_dwarf_) {
399 iter = GetSectionByName(sections_, ".debug_info_offset");
400 assert(iter != sections_.end());
401 buffer_length_ = iter->second.second;
402 } else {
403 buffer_length_ = iter->second.second - offset_from_section_start_;
404 }
405
406 // Read the header
407 ReadHeader();
408
409 // Figure out the real length from the end of the initial length to
410 // the end of the compilation unit, since that is the value we
411 // return.
412 uint64_t ourlength = header_.length;
413 if (reader_->OffsetSize() == 8)
414 ourlength += 12;
415 else
416 ourlength += 4;
417
418 // See if the user wants this compilation unit, and if not, just return.
419 if (!handler_->StartCompilationUnit(offset_from_section_start_,
420 reader_->AddressSize(),
421 reader_->OffsetSize(),
422 header_.length,
423 header_.version))
424 return ourlength;
425 else if (header_.version == 5 && is_type_unit_)
426 return ourlength;
427
428 // Otherwise, continue by reading our abbreviation entries.
429 ReadAbbrevs();
430
431 // Set the string section if we have one.
432 iter = GetSectionByName(sections_, ".debug_str");
433 if (iter != sections_.end()) {
434 string_buffer_ = iter->second.first;
435 string_buffer_length_ = iter->second.second;
436 }
437
438 iter = GetSectionByName(sections_, ".debug_line");
439 if (iter != sections_.end()) {
440 line_buffer_ = iter->second.first;
441 line_buffer_length_ = iter->second.second;
442 }
443
444 // Set the line string section if we have one.
445 iter = GetSectionByName(sections_, ".debug_line_str");
446 if (iter != sections_.end()) {
447 line_string_buffer_ = iter->second.first;
448 line_string_buffer_length_ = iter->second.second;
449 }
450
451 // Set the string offsets section if we have one.
452 iter = GetSectionByName(sections_, ".debug_str_offsets");
453 if (iter != sections_.end()) {
454 str_offsets_buffer_ = iter->second.first;
455 str_offsets_buffer_length_ = iter->second.second;
456 }
457
458 // Set the address section if we have one.
459 iter = GetSectionByName(sections_, ".debug_addr");
460 if (iter != sections_.end()) {
461 addr_buffer_ = iter->second.first;
462 addr_buffer_length_ = iter->second.second;
463 }
464
465 // Now that we have our abbreviations, start processing DIE's.
466 ProcessDIEs();
467
468 // If this is a skeleton compilation unit generated with split DWARF,
469 // and the client needs the full debug info, we need to find the full
470 // compilation unit in a .dwo or .dwp file.
471 should_process_split_dwarf_ =
472 !is_split_dwarf_ && dwo_name_ != NULL && handler_->NeedSplitDebugInfo();
473
474 return ourlength;
475 }
476
ProcessFormStringIndex(uint64_t dieoffset,enum DwarfAttribute attr,enum DwarfForm form,uint64_t str_index)477 void CompilationUnit::ProcessFormStringIndex(
478 uint64_t dieoffset, enum DwarfAttribute attr, enum DwarfForm form,
479 uint64_t str_index) {
480 const size_t kStringOffsetsTableHeaderSize =
481 header_.version >= 5 ? (reader_->OffsetSize() == 8 ? 16 : 8) : 0;
482 const uint8_t* str_offsets_table_after_header = str_offsets_base_ ?
483 str_offsets_buffer_ + str_offsets_base_ :
484 str_offsets_buffer_ + kStringOffsetsTableHeaderSize;
485 const uint8_t* offset_ptr =
486 str_offsets_table_after_header + str_index * reader_->OffsetSize();
487
488 const uint64_t offset = reader_->ReadOffset(offset_ptr);
489 if (offset >= string_buffer_length_) {
490 return;
491 }
492
493 const char* str = reinterpret_cast<const char*>(string_buffer_) + offset;
494 ProcessAttributeString(dieoffset, attr, form, str);
495 }
496
497 // Special function for pre-processing the
498 // DW_AT_str_offsets_base and DW_AT_addr_base in a DW_TAG_compile_unit die (for
499 // DWARF v5). We must make sure to find and process the
500 // DW_AT_str_offsets_base and DW_AT_addr_base attributes before attempting to
501 // read any string and address attribute in the compile unit.
ProcessOffsetBaseAttribute(uint64_t dieoffset,const uint8_t * start,enum DwarfAttribute attr,enum DwarfForm form,uint64_t implicit_const)502 const uint8_t* CompilationUnit::ProcessOffsetBaseAttribute(
503 uint64_t dieoffset, const uint8_t* start, enum DwarfAttribute attr,
504 enum DwarfForm form, uint64_t implicit_const) {
505 size_t len;
506
507 switch (form) {
508 // DW_FORM_indirect is never used because it is such a space
509 // waster.
510 case DW_FORM_indirect:
511 form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
512 &len));
513 start += len;
514 return ProcessOffsetBaseAttribute(dieoffset, start, attr, form,
515 implicit_const);
516
517 case DW_FORM_flag_present:
518 return start;
519 case DW_FORM_data1:
520 case DW_FORM_flag:
521 return start + 1;
522 case DW_FORM_data2:
523 return start + 2;
524 case DW_FORM_data4:
525 return start + 4;
526 case DW_FORM_data8:
527 return start + 8;
528 case DW_FORM_data16:
529 // This form is designed for an md5 checksum inside line tables.
530 return start + 16;
531 case DW_FORM_string: {
532 const char* str = reinterpret_cast<const char*>(start);
533 return start + strlen(str) + 1;
534 }
535 case DW_FORM_udata:
536 reader_->ReadUnsignedLEB128(start, &len);
537 return start + len;
538 case DW_FORM_sdata:
539 reader_->ReadSignedLEB128(start, &len);
540 return start + len;
541 case DW_FORM_addr:
542 reader_->ReadAddress(start);
543 return start + reader_->AddressSize();
544
545 // This is the important one here!
546 case DW_FORM_sec_offset:
547 if (attr == DW_AT_str_offsets_base ||
548 attr == DW_AT_addr_base)
549 ProcessAttributeUnsigned(dieoffset, attr, form,
550 reader_->ReadOffset(start));
551 else
552 reader_->ReadOffset(start);
553 return start + reader_->OffsetSize();
554
555 case DW_FORM_ref1:
556 return start + 1;
557 case DW_FORM_ref2:
558 return start + 2;
559 case DW_FORM_ref4:
560 return start + 4;
561 case DW_FORM_ref8:
562 return start + 8;
563 case DW_FORM_ref_udata:
564 reader_->ReadUnsignedLEB128(start, &len);
565 return start + len;
566 case DW_FORM_ref_addr:
567 // DWARF2 and 3/4 differ on whether ref_addr is address size or
568 // offset size.
569 assert(header_.version >= 2);
570 if (header_.version == 2) {
571 reader_->ReadAddress(start);
572 return start + reader_->AddressSize();
573 } else if (header_.version >= 3) {
574 reader_->ReadOffset(start);
575 return start + reader_->OffsetSize();
576 }
577 break;
578 case DW_FORM_ref_sig8:
579 return start + 8;
580 case DW_FORM_implicit_const:
581 return start;
582 case DW_FORM_block1: {
583 uint64_t datalen = reader_->ReadOneByte(start);
584 return start + 1 + datalen;
585 }
586 case DW_FORM_block2: {
587 uint64_t datalen = reader_->ReadTwoBytes(start);
588 return start + 2 + datalen;
589 }
590 case DW_FORM_block4: {
591 uint64_t datalen = reader_->ReadFourBytes(start);
592 return start + 4 + datalen;
593 }
594 case DW_FORM_block:
595 case DW_FORM_exprloc: {
596 uint64_t datalen = reader_->ReadUnsignedLEB128(start, &len);
597 return start + datalen + len;
598 }
599 case DW_FORM_strp: {
600 reader_->ReadOffset(start);
601 return start + reader_->OffsetSize();
602 }
603 case DW_FORM_line_strp: {
604 reader_->ReadOffset(start);
605 return start + reader_->OffsetSize();
606 }
607 case DW_FORM_strp_sup:
608 return start + 4;
609 case DW_FORM_ref_sup4:
610 return start + 4;
611 case DW_FORM_ref_sup8:
612 return start + 8;
613 case DW_FORM_loclistx:
614 reader_->ReadUnsignedLEB128(start, &len);
615 return start + len;
616 case DW_FORM_strx:
617 case DW_FORM_GNU_str_index: {
618 reader_->ReadUnsignedLEB128(start, &len);
619 return start + len;
620 }
621 case DW_FORM_strx1: {
622 return start + 1;
623 }
624 case DW_FORM_strx2: {
625 return start + 2;
626 }
627 case DW_FORM_strx3: {
628 return start + 3;
629 }
630 case DW_FORM_strx4: {
631 return start + 4;
632 }
633
634 case DW_FORM_addrx:
635 case DW_FORM_GNU_addr_index:
636 reader_->ReadUnsignedLEB128(start, &len);
637 return start + len;
638 case DW_FORM_addrx1:
639 return start + 1;
640 case DW_FORM_addrx2:
641 return start + 2;
642 case DW_FORM_addrx3:
643 return start + 3;
644 case DW_FORM_addrx4:
645 return start + 4;
646 case DW_FORM_rnglistx:
647 reader_->ReadUnsignedLEB128(start, &len);
648 return start + len;
649 }
650 fprintf(stderr, "Unhandled form type\n");
651 return NULL;
652 }
653
654 // If one really wanted, you could merge SkipAttribute and
655 // ProcessAttribute
656 // This is all boring data manipulation and calling of the handler.
ProcessAttribute(uint64_t dieoffset,const uint8_t * start,enum DwarfAttribute attr,enum DwarfForm form,uint64_t implicit_const)657 const uint8_t* CompilationUnit::ProcessAttribute(
658 uint64_t dieoffset, const uint8_t* start, enum DwarfAttribute attr,
659 enum DwarfForm form, uint64_t implicit_const) {
660 size_t len;
661
662 switch (form) {
663 // DW_FORM_indirect is never used because it is such a space
664 // waster.
665 case DW_FORM_indirect:
666 form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
667 &len));
668 start += len;
669 return ProcessAttribute(dieoffset, start, attr, form, implicit_const);
670
671 case DW_FORM_flag_present:
672 ProcessAttributeUnsigned(dieoffset, attr, form, 1);
673 return start;
674 case DW_FORM_data1:
675 case DW_FORM_flag:
676 ProcessAttributeUnsigned(dieoffset, attr, form,
677 reader_->ReadOneByte(start));
678 return start + 1;
679 case DW_FORM_data2:
680 ProcessAttributeUnsigned(dieoffset, attr, form,
681 reader_->ReadTwoBytes(start));
682 return start + 2;
683 case DW_FORM_data4:
684 ProcessAttributeUnsigned(dieoffset, attr, form,
685 reader_->ReadFourBytes(start));
686 return start + 4;
687 case DW_FORM_data8:
688 ProcessAttributeUnsigned(dieoffset, attr, form,
689 reader_->ReadEightBytes(start));
690 return start + 8;
691 case DW_FORM_data16:
692 // This form is designed for an md5 checksum inside line tables.
693 fprintf(stderr, "Unhandled form type: DW_FORM_data16\n");
694 return start + 16;
695 case DW_FORM_string: {
696 const char* str = reinterpret_cast<const char*>(start);
697 ProcessAttributeString(dieoffset, attr, form, str);
698 return start + strlen(str) + 1;
699 }
700 case DW_FORM_udata:
701 ProcessAttributeUnsigned(dieoffset, attr, form,
702 reader_->ReadUnsignedLEB128(start, &len));
703 return start + len;
704
705 case DW_FORM_sdata:
706 ProcessAttributeSigned(dieoffset, attr, form,
707 reader_->ReadSignedLEB128(start, &len));
708 return start + len;
709 case DW_FORM_addr:
710 ProcessAttributeUnsigned(dieoffset, attr, form,
711 reader_->ReadAddress(start));
712 return start + reader_->AddressSize();
713 case DW_FORM_sec_offset:
714 ProcessAttributeUnsigned(dieoffset, attr, form,
715 reader_->ReadOffset(start));
716 return start + reader_->OffsetSize();
717
718 case DW_FORM_ref1:
719 handler_->ProcessAttributeReference(dieoffset, attr, form,
720 reader_->ReadOneByte(start)
721 + offset_from_section_start_);
722 return start + 1;
723 case DW_FORM_ref2:
724 handler_->ProcessAttributeReference(dieoffset, attr, form,
725 reader_->ReadTwoBytes(start)
726 + offset_from_section_start_);
727 return start + 2;
728 case DW_FORM_ref4:
729 handler_->ProcessAttributeReference(dieoffset, attr, form,
730 reader_->ReadFourBytes(start)
731 + offset_from_section_start_);
732 return start + 4;
733 case DW_FORM_ref8:
734 handler_->ProcessAttributeReference(dieoffset, attr, form,
735 reader_->ReadEightBytes(start)
736 + offset_from_section_start_);
737 return start + 8;
738 case DW_FORM_ref_udata:
739 handler_->ProcessAttributeReference(dieoffset, attr, form,
740 reader_->ReadUnsignedLEB128(start,
741 &len)
742 + offset_from_section_start_);
743 return start + len;
744 case DW_FORM_ref_addr:
745 // DWARF2 and 3/4 differ on whether ref_addr is address size or
746 // offset size.
747 assert(header_.version >= 2);
748 if (header_.version == 2) {
749 handler_->ProcessAttributeReference(dieoffset, attr, form,
750 reader_->ReadAddress(start));
751 return start + reader_->AddressSize();
752 } else if (header_.version >= 3) {
753 handler_->ProcessAttributeReference(dieoffset, attr, form,
754 reader_->ReadOffset(start));
755 return start + reader_->OffsetSize();
756 }
757 break;
758 case DW_FORM_ref_sig8:
759 handler_->ProcessAttributeSignature(dieoffset, attr, form,
760 reader_->ReadEightBytes(start));
761 return start + 8;
762 case DW_FORM_implicit_const:
763 handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
764 implicit_const);
765 return start;
766 case DW_FORM_block1: {
767 uint64_t datalen = reader_->ReadOneByte(start);
768 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 1,
769 datalen);
770 return start + 1 + datalen;
771 }
772 case DW_FORM_block2: {
773 uint64_t datalen = reader_->ReadTwoBytes(start);
774 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 2,
775 datalen);
776 return start + 2 + datalen;
777 }
778 case DW_FORM_block4: {
779 uint64_t datalen = reader_->ReadFourBytes(start);
780 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 4,
781 datalen);
782 return start + 4 + datalen;
783 }
784 case DW_FORM_block:
785 case DW_FORM_exprloc: {
786 uint64_t datalen = reader_->ReadUnsignedLEB128(start, &len);
787 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + len,
788 datalen);
789 return start + datalen + len;
790 }
791 case DW_FORM_strp: {
792 assert(string_buffer_ != NULL);
793
794 const uint64_t offset = reader_->ReadOffset(start);
795 assert(string_buffer_ + offset < string_buffer_ + string_buffer_length_);
796
797 const char* str = reinterpret_cast<const char*>(string_buffer_ + offset);
798 ProcessAttributeString(dieoffset, attr, form, str);
799 return start + reader_->OffsetSize();
800 }
801 case DW_FORM_line_strp: {
802 assert(line_string_buffer_ != NULL);
803
804 const uint64_t offset = reader_->ReadOffset(start);
805 assert(line_string_buffer_ + offset <
806 line_string_buffer_ + line_string_buffer_length_);
807
808 const char* str =
809 reinterpret_cast<const char*>(line_string_buffer_ + offset);
810 ProcessAttributeString(dieoffset, attr, form, str);
811 return start + reader_->OffsetSize();
812 }
813 case DW_FORM_strp_sup:
814 // No support currently for suplementary object files.
815 fprintf(stderr, "Unhandled form type: DW_FORM_strp_sup\n");
816 return start + 4;
817 case DW_FORM_ref_sup4:
818 // No support currently for suplementary object files.
819 fprintf(stderr, "Unhandled form type: DW_FORM_ref_sup4\n");
820 return start + 4;
821 case DW_FORM_ref_sup8:
822 // No support currently for suplementary object files.
823 fprintf(stderr, "Unhandled form type: DW_FORM_ref_sup8\n");
824 return start + 8;
825 case DW_FORM_loclistx:
826 ProcessAttributeUnsigned(dieoffset, attr, form,
827 reader_->ReadUnsignedLEB128(start, &len));
828 return start + len;
829 case DW_FORM_strx:
830 case DW_FORM_GNU_str_index: {
831 uint64_t str_index = reader_->ReadUnsignedLEB128(start, &len);
832 ProcessFormStringIndex(dieoffset, attr, form, str_index);
833 return start + len;
834 }
835 case DW_FORM_strx1: {
836 uint64_t str_index = reader_->ReadOneByte(start);
837 ProcessFormStringIndex(dieoffset, attr, form, str_index);
838 return start + 1;
839 }
840 case DW_FORM_strx2: {
841 uint64_t str_index = reader_->ReadTwoBytes(start);
842 ProcessFormStringIndex(dieoffset, attr, form, str_index);
843 return start + 2;
844 }
845 case DW_FORM_strx3: {
846 uint64_t str_index = reader_->ReadThreeBytes(start);
847 ProcessFormStringIndex(dieoffset, attr, form, str_index);
848 return start + 3;
849 }
850 case DW_FORM_strx4: {
851 uint64_t str_index = reader_->ReadFourBytes(start);
852 ProcessFormStringIndex(dieoffset, attr, form, str_index);
853 return start + 4;
854 }
855
856 case DW_FORM_addrx:
857 case DW_FORM_GNU_addr_index:
858 ProcessAttributeAddrIndex(
859 dieoffset, attr, form, reader_->ReadUnsignedLEB128(start, &len));
860 return start + len;
861 case DW_FORM_addrx1:
862 ProcessAttributeAddrIndex(
863 dieoffset, attr, form, reader_->ReadOneByte(start));
864 return start + 1;
865 case DW_FORM_addrx2:
866 ProcessAttributeAddrIndex(
867 dieoffset, attr, form, reader_->ReadTwoBytes(start));
868 return start + 2;
869 case DW_FORM_addrx3:
870 ProcessAttributeAddrIndex(
871 dieoffset, attr, form, reader_->ReadThreeBytes(start));
872 return start + 3;
873 case DW_FORM_addrx4:
874 ProcessAttributeAddrIndex(
875 dieoffset, attr, form, reader_->ReadFourBytes(start));
876 return start + 4;
877 case DW_FORM_rnglistx:
878 ProcessAttributeUnsigned(
879 dieoffset, attr, form, reader_->ReadUnsignedLEB128(start, &len));
880 return start + len;
881 }
882 fprintf(stderr, "Unhandled form type\n");
883 return NULL;
884 }
885
ProcessDIE(uint64_t dieoffset,const uint8_t * start,const Abbrev & abbrev)886 const uint8_t* CompilationUnit::ProcessDIE(uint64_t dieoffset,
887 const uint8_t* start,
888 const Abbrev& abbrev) {
889 // With DWARF v5, the compile_unit die may contain a
890 // DW_AT_str_offsets_base or DW_AT_addr_base. If it does, that attribute must
891 // be found and processed before trying to process the other attributes;
892 // otherwise the string or address values will all come out incorrect.
893 if ((abbrev.tag == DW_TAG_compile_unit ||
894 abbrev.tag == DW_TAG_skeleton_unit) &&
895 header_.version == 5) {
896 uint64_t dieoffset_copy = dieoffset;
897 const uint8_t* start_copy = start;
898 for (AttributeList::const_iterator i = abbrev.attributes.begin();
899 i != abbrev.attributes.end();
900 i++) {
901 start_copy = ProcessOffsetBaseAttribute(dieoffset_copy, start_copy,
902 i->attr_, i->form_,
903 i->value_);
904 }
905 }
906
907 for (AttributeList::const_iterator i = abbrev.attributes.begin();
908 i != abbrev.attributes.end();
909 i++) {
910 start = ProcessAttribute(dieoffset, start, i->attr_, i->form_, i->value_);
911 }
912
913 // If this is a compilation unit in a split DWARF object, verify that
914 // the dwo_id matches. If it does not match, we will ignore this
915 // compilation unit.
916 if (abbrev.tag == DW_TAG_compile_unit
917 && is_split_dwarf_
918 && dwo_id_ != skeleton_dwo_id_) {
919 return NULL;
920 }
921
922 return start;
923 }
924
ProcessDIEs()925 void CompilationUnit::ProcessDIEs() {
926 const uint8_t* dieptr = after_header_;
927 size_t len;
928
929 // lengthstart is the place the length field is based on.
930 // It is the point in the header after the initial length field
931 const uint8_t* lengthstart = buffer_;
932
933 // In 64 bit dwarf, the initial length is 12 bytes, because of the
934 // 0xffffffff at the start.
935 if (reader_->OffsetSize() == 8)
936 lengthstart += 12;
937 else
938 lengthstart += 4;
939
940 std::stack<uint64_t> die_stack;
941
942 while (dieptr < (lengthstart + header_.length)) {
943 // We give the user the absolute offset from the beginning of
944 // debug_info, since they need it to deal with ref_addr forms.
945 uint64_t absolute_offset = (dieptr - buffer_) + offset_from_section_start_;
946
947 uint64_t abbrev_num = reader_->ReadUnsignedLEB128(dieptr, &len);
948
949 dieptr += len;
950
951 // Abbrev == 0 represents the end of a list of children, or padding
952 // at the end of the compilation unit.
953 if (abbrev_num == 0) {
954 if (die_stack.size() == 0)
955 // If it is padding, then we are done with the compilation unit's DIEs.
956 return;
957 const uint64_t offset = die_stack.top();
958 die_stack.pop();
959 handler_->EndDIE(offset);
960 continue;
961 }
962
963 const Abbrev& abbrev = abbrevs_->at(static_cast<size_t>(abbrev_num));
964 const enum DwarfTag tag = abbrev.tag;
965 if (!handler_->StartDIE(absolute_offset, tag)) {
966 dieptr = SkipDIE(dieptr, abbrev);
967 if (!dieptr) {
968 fprintf(stderr,
969 "An error happens when skipping a DIE's attributes at offset "
970 "0x%" PRIx64
971 ". Stopped processing following DIEs in this CU.\n",
972 absolute_offset);
973 exit(1);
974 }
975 } else {
976 dieptr = ProcessDIE(absolute_offset, dieptr, abbrev);
977 if (!dieptr) {
978 fprintf(stderr,
979 "An error happens when processing a DIE at offset 0x%" PRIx64
980 ". Stopped processing following DIEs in this CU.\n",
981 absolute_offset);
982 exit(1);
983 }
984 }
985
986 if (abbrev.has_children) {
987 die_stack.push(absolute_offset);
988 } else {
989 handler_->EndDIE(absolute_offset);
990 }
991 }
992 }
993
994 // Check for a valid ELF file and return the Address size.
995 // Returns 0 if not a valid ELF file.
GetElfWidth(const ElfReader & elf)996 inline int GetElfWidth(const ElfReader& elf) {
997 if (elf.IsElf32File())
998 return 4;
999 if (elf.IsElf64File())
1000 return 8;
1001 return 0;
1002 }
1003
ProcessSplitDwarf(std::string & split_file,SectionMap & sections,ByteReader & split_byte_reader,uint64_t & cu_offset)1004 bool CompilationUnit::ProcessSplitDwarf(std::string& split_file,
1005 SectionMap& sections,
1006 ByteReader& split_byte_reader,
1007 uint64_t& cu_offset) {
1008 if (!should_process_split_dwarf_)
1009 return false;
1010 struct stat statbuf;
1011 bool found_in_dwp = false;
1012 if (!have_checked_for_dwp_) {
1013 // Look for a .dwp file in the same directory as the executable.
1014 have_checked_for_dwp_ = true;
1015 string dwp_suffix(".dwp");
1016 std::string dwp_path = path_ + dwp_suffix;
1017 if (stat(dwp_path.c_str(), &statbuf) != 0) {
1018 // Fall back to a split .debug file in the same directory.
1019 string debug_suffix(".debug");
1020 dwp_path = path_;
1021 size_t found = path_.rfind(debug_suffix);
1022 if (found != string::npos &&
1023 found + debug_suffix.length() == path_.length())
1024 dwp_path = dwp_path.replace(found, debug_suffix.length(), dwp_suffix);
1025 }
1026 if (stat(dwp_path.c_str(), &statbuf) == 0) {
1027 split_elf_reader_ = std::make_unique<ElfReader>(dwp_path);
1028 int width = GetElfWidth(*split_elf_reader_.get());
1029 if (width != 0) {
1030 split_byte_reader = ByteReader(reader_->GetEndianness());
1031 split_byte_reader.SetAddressSize(width);
1032 dwp_reader_ = std::make_unique<DwpReader>(split_byte_reader,
1033 split_elf_reader_.get());
1034 dwp_reader_->Initialize();
1035 // If we have a .dwp file, read the debug sections for the requested CU.
1036 dwp_reader_->ReadDebugSectionsForCU(dwo_id_, §ions);
1037 if (!sections.empty()) {
1038 SectionMap::const_iterator cu_iter =
1039 GetSectionByName(sections, ".debug_info_offset");
1040 SectionMap::const_iterator debug_info_iter =
1041 GetSectionByName(sections, ".debug_info");
1042 assert(cu_iter != sections.end());
1043 assert(debug_info_iter != sections.end());
1044 cu_offset = cu_iter->second.first - debug_info_iter->second.first;
1045 found_in_dwp = true;
1046 split_file = dwp_path;
1047 }
1048 }
1049 }
1050 }
1051 if (!found_in_dwp) {
1052 // If no .dwp file, try to open the .dwo file.
1053 if (stat(dwo_name_, &statbuf) == 0) {
1054 split_elf_reader_ = std::make_unique<ElfReader>(dwo_name_);
1055 int width = GetElfWidth(*split_elf_reader_.get());
1056 if (width != 0) {
1057 split_byte_reader = ByteReader(ENDIANNESS_LITTLE);
1058 split_byte_reader.SetAddressSize(width);
1059 ReadDebugSectionsFromDwo(split_elf_reader_.get(), §ions);
1060 if (!sections.empty()) {
1061 split_file = dwo_name_;
1062 }
1063 }
1064 }
1065 }
1066 return !split_file.empty();
1067 }
1068
ReadDebugSectionsFromDwo(ElfReader * elf_reader,SectionMap * sections)1069 void CompilationUnit::ReadDebugSectionsFromDwo(ElfReader* elf_reader,
1070 SectionMap* sections) {
1071 static const char* const section_names[] = {
1072 ".debug_abbrev",
1073 ".debug_info",
1074 ".debug_str_offsets",
1075 ".debug_str"
1076 };
1077 for (unsigned int i = 0u;
1078 i < sizeof(section_names)/sizeof(*(section_names)); ++i) {
1079 string base_name = section_names[i];
1080 string dwo_name = base_name + ".dwo";
1081 size_t section_size;
1082 const char* section_data = elf_reader->GetSectionByName(dwo_name,
1083 §ion_size);
1084 if (section_data != NULL)
1085 sections->insert(std::make_pair(
1086 base_name, std::make_pair(
1087 reinterpret_cast<const uint8_t*>(section_data),
1088 section_size)));
1089 }
1090 }
1091
DwpReader(const ByteReader & byte_reader,ElfReader * elf_reader)1092 DwpReader::DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader)
1093 : elf_reader_(elf_reader), byte_reader_(byte_reader),
1094 cu_index_(NULL), cu_index_size_(0), string_buffer_(NULL),
1095 string_buffer_size_(0), version_(0), ncolumns_(0), nunits_(0),
1096 nslots_(0), phash_(NULL), pindex_(NULL), shndx_pool_(NULL),
1097 offset_table_(NULL), size_table_(NULL), abbrev_data_(NULL),
1098 abbrev_size_(0), info_data_(NULL), info_size_(0),
1099 str_offsets_data_(NULL), str_offsets_size_(0) {}
1100
Initialize()1101 void DwpReader::Initialize() {
1102 cu_index_ = elf_reader_->GetSectionByName(".debug_cu_index",
1103 &cu_index_size_);
1104 if (cu_index_ == NULL) {
1105 return;
1106 }
1107 // The .debug_str.dwo section is shared by all CUs in the file.
1108 string_buffer_ = elf_reader_->GetSectionByName(".debug_str.dwo",
1109 &string_buffer_size_);
1110
1111 version_ = byte_reader_.ReadFourBytes(
1112 reinterpret_cast<const uint8_t*>(cu_index_));
1113
1114 if (version_ == 1) {
1115 nslots_ = byte_reader_.ReadFourBytes(
1116 reinterpret_cast<const uint8_t*>(cu_index_)
1117 + 3 * sizeof(uint32_t));
1118 phash_ = cu_index_ + 4 * sizeof(uint32_t);
1119 pindex_ = phash_ + nslots_ * sizeof(uint64_t);
1120 shndx_pool_ = pindex_ + nslots_ * sizeof(uint32_t);
1121 if (shndx_pool_ >= cu_index_ + cu_index_size_) {
1122 version_ = 0;
1123 }
1124 } else if (version_ == 2 || version_ == 5) {
1125 ncolumns_ = byte_reader_.ReadFourBytes(
1126 reinterpret_cast<const uint8_t*>(cu_index_) + sizeof(uint32_t));
1127 nunits_ = byte_reader_.ReadFourBytes(
1128 reinterpret_cast<const uint8_t*>(cu_index_) + 2 * sizeof(uint32_t));
1129 nslots_ = byte_reader_.ReadFourBytes(
1130 reinterpret_cast<const uint8_t*>(cu_index_) + 3 * sizeof(uint32_t));
1131 phash_ = cu_index_ + 4 * sizeof(uint32_t);
1132 pindex_ = phash_ + nslots_ * sizeof(uint64_t);
1133 offset_table_ = pindex_ + nslots_ * sizeof(uint32_t);
1134 size_table_ = offset_table_ + ncolumns_ * (nunits_ + 1) * sizeof(uint32_t);
1135 abbrev_data_ = elf_reader_->GetSectionByName(".debug_abbrev.dwo",
1136 &abbrev_size_);
1137 info_data_ = elf_reader_->GetSectionByName(".debug_info.dwo", &info_size_);
1138 str_offsets_data_ = elf_reader_->GetSectionByName(".debug_str_offsets.dwo",
1139 &str_offsets_size_);
1140 rnglist_data_ =
1141 elf_reader_->GetSectionByName(".debug_rnglists.dwo", &rnglist_size_);
1142 if (size_table_ >= cu_index_ + cu_index_size_) {
1143 version_ = 0;
1144 }
1145 }
1146 }
1147
ReadDebugSectionsForCU(uint64_t dwo_id,SectionMap * sections)1148 void DwpReader::ReadDebugSectionsForCU(uint64_t dwo_id,
1149 SectionMap* sections) {
1150 if (version_ == 1) {
1151 int slot = LookupCU(dwo_id);
1152 if (slot == -1) {
1153 return;
1154 }
1155
1156 // The index table points to the section index pool, where we
1157 // can read a list of section indexes for the debug sections
1158 // for the CU whose dwo_id we are looking for.
1159 int index = byte_reader_.ReadFourBytes(
1160 reinterpret_cast<const uint8_t*>(pindex_)
1161 + slot * sizeof(uint32_t));
1162 const char* shndx_list = shndx_pool_ + index * sizeof(uint32_t);
1163 for (;;) {
1164 if (shndx_list >= cu_index_ + cu_index_size_) {
1165 version_ = 0;
1166 return;
1167 }
1168 unsigned int shndx = byte_reader_.ReadFourBytes(
1169 reinterpret_cast<const uint8_t*>(shndx_list));
1170 shndx_list += sizeof(uint32_t);
1171 if (shndx == 0)
1172 break;
1173 const char* section_name = elf_reader_->GetSectionName(shndx);
1174 size_t section_size;
1175 const char* section_data;
1176 // We're only interested in these four debug sections.
1177 // The section names in the .dwo file end with ".dwo", but we
1178 // add them to the sections table with their normal names.
1179 if (!strncmp(section_name, ".debug_abbrev", strlen(".debug_abbrev"))) {
1180 section_data = elf_reader_->GetSectionByIndex(shndx, §ion_size);
1181 sections->insert(std::make_pair(
1182 ".debug_abbrev",
1183 std::make_pair(reinterpret_cast<const uint8_t*> (section_data),
1184 section_size)));
1185 } else if (!strncmp(section_name, ".debug_info", strlen(".debug_info"))) {
1186 section_data = elf_reader_->GetSectionByIndex(shndx, §ion_size);
1187 sections->insert(std::make_pair(
1188 ".debug_info",
1189 std::make_pair(reinterpret_cast<const uint8_t*> (section_data),
1190 section_size)));
1191 } else if (!strncmp(section_name, ".debug_str_offsets",
1192 strlen(".debug_str_offsets"))) {
1193 section_data = elf_reader_->GetSectionByIndex(shndx, §ion_size);
1194 sections->insert(std::make_pair(
1195 ".debug_str_offsets",
1196 std::make_pair(reinterpret_cast<const uint8_t*> (section_data),
1197 section_size)));
1198 }
1199 }
1200 sections->insert(std::make_pair(
1201 ".debug_str",
1202 std::make_pair(reinterpret_cast<const uint8_t*> (string_buffer_),
1203 string_buffer_size_)));
1204 } else if (version_ == 2 || version_ == 5) {
1205 uint32_t index = LookupCUv2(dwo_id);
1206 if (index == 0) {
1207 return;
1208 }
1209
1210 // The index points to a row in each of the section offsets table
1211 // and the section size table, where we can read the offsets and sizes
1212 // of the contributions to each debug section from the CU whose dwo_id
1213 // we are looking for. Row 0 of the section offsets table has the
1214 // section ids for each column of the table. The size table begins
1215 // with row 1.
1216 const char* id_row = offset_table_;
1217 const char* offset_row = offset_table_
1218 + index * ncolumns_ * sizeof(uint32_t);
1219 const char* size_row =
1220 size_table_ + (index - 1) * ncolumns_ * sizeof(uint32_t);
1221 if (size_row + ncolumns_ * sizeof(uint32_t) > cu_index_ + cu_index_size_) {
1222 version_ = 0;
1223 return;
1224 }
1225 for (unsigned int col = 0u; col < ncolumns_; ++col) {
1226 uint32_t section_id =
1227 byte_reader_.ReadFourBytes(reinterpret_cast<const uint8_t*>(id_row)
1228 + col * sizeof(uint32_t));
1229 uint32_t offset = byte_reader_.ReadFourBytes(
1230 reinterpret_cast<const uint8_t*>(offset_row)
1231 + col * sizeof(uint32_t));
1232 uint32_t size = byte_reader_.ReadFourBytes(
1233 reinterpret_cast<const uint8_t*>(size_row) + col * sizeof(uint32_t));
1234 if (section_id == DW_SECT_ABBREV) {
1235 sections->insert(std::make_pair(
1236 ".debug_abbrev",
1237 std::make_pair(reinterpret_cast<const uint8_t*> (abbrev_data_)
1238 + offset, size)));
1239 } else if (section_id == DW_SECT_INFO) {
1240 sections->insert(std::make_pair(
1241 ".debug_info",
1242 std::make_pair(reinterpret_cast<const uint8_t*>(info_data_), 0)));
1243 // .debug_info_offset will points the buffer for the CU with given
1244 // dwo_id.
1245 sections->insert(std::make_pair(
1246 ".debug_info_offset",
1247 std::make_pair(
1248 reinterpret_cast<const uint8_t*>(info_data_) + offset, size)));
1249 } else if (section_id == DW_SECT_STR_OFFSETS) {
1250 sections->insert(std::make_pair(
1251 ".debug_str_offsets",
1252 std::make_pair(reinterpret_cast<const uint8_t*> (str_offsets_data_)
1253 + offset, size)));
1254 } else if (section_id == DW_SECT_RNGLISTS) {
1255 sections->insert(std::make_pair(
1256 ".debug_rnglists",
1257 std::make_pair(
1258 reinterpret_cast<const uint8_t*>(rnglist_data_) + offset,
1259 size)));
1260 }
1261 }
1262 sections->insert(std::make_pair(
1263 ".debug_str",
1264 std::make_pair(reinterpret_cast<const uint8_t*> (string_buffer_),
1265 string_buffer_size_)));
1266 }
1267 }
1268
LookupCU(uint64_t dwo_id)1269 int DwpReader::LookupCU(uint64_t dwo_id) {
1270 uint32_t slot = static_cast<uint32_t>(dwo_id) & (nslots_ - 1);
1271 uint64_t probe = byte_reader_.ReadEightBytes(
1272 reinterpret_cast<const uint8_t*>(phash_) + slot * sizeof(uint64_t));
1273 if (probe != 0 && probe != dwo_id) {
1274 uint32_t secondary_hash =
1275 (static_cast<uint32_t>(dwo_id >> 32) & (nslots_ - 1)) | 1;
1276 do {
1277 slot = (slot + secondary_hash) & (nslots_ - 1);
1278 probe = byte_reader_.ReadEightBytes(
1279 reinterpret_cast<const uint8_t*>(phash_) + slot * sizeof(uint64_t));
1280 } while (probe != 0 && probe != dwo_id);
1281 }
1282 if (probe == 0)
1283 return -1;
1284 return slot;
1285 }
1286
LookupCUv2(uint64_t dwo_id)1287 uint32_t DwpReader::LookupCUv2(uint64_t dwo_id) {
1288 uint32_t slot = static_cast<uint32_t>(dwo_id) & (nslots_ - 1);
1289 uint64_t probe = byte_reader_.ReadEightBytes(
1290 reinterpret_cast<const uint8_t*>(phash_) + slot * sizeof(uint64_t));
1291 uint32_t index = byte_reader_.ReadFourBytes(
1292 reinterpret_cast<const uint8_t*>(pindex_) + slot * sizeof(uint32_t));
1293 if (index != 0 && probe != dwo_id) {
1294 uint32_t secondary_hash =
1295 (static_cast<uint32_t>(dwo_id >> 32) & (nslots_ - 1)) | 1;
1296 do {
1297 slot = (slot + secondary_hash) & (nslots_ - 1);
1298 probe = byte_reader_.ReadEightBytes(
1299 reinterpret_cast<const uint8_t*>(phash_) + slot * sizeof(uint64_t));
1300 index = byte_reader_.ReadFourBytes(
1301 reinterpret_cast<const uint8_t*>(pindex_) + slot * sizeof(uint32_t));
1302 } while (index != 0 && probe != dwo_id);
1303 }
1304 return index;
1305 }
1306
LineInfo(const uint8_t * buffer,uint64_t buffer_length,ByteReader * reader,const uint8_t * string_buffer,size_t string_buffer_length,const uint8_t * line_string_buffer,size_t line_string_buffer_length,LineInfoHandler * handler)1307 LineInfo::LineInfo(const uint8_t* buffer, uint64_t buffer_length,
1308 ByteReader* reader, const uint8_t* string_buffer,
1309 size_t string_buffer_length,
1310 const uint8_t* line_string_buffer,
1311 size_t line_string_buffer_length, LineInfoHandler* handler):
1312 handler_(handler), reader_(reader), buffer_(buffer),
1313 string_buffer_(string_buffer),
1314 line_string_buffer_(line_string_buffer) {
1315 #ifndef NDEBUG
1316 buffer_length_ = buffer_length;
1317 string_buffer_length_ = string_buffer_length;
1318 line_string_buffer_length_ = line_string_buffer_length;
1319 #endif
1320 header_.std_opcode_lengths = NULL;
1321 }
1322
Start()1323 uint64_t LineInfo::Start() {
1324 ReadHeader();
1325 ReadLines();
1326 return after_header_ - buffer_;
1327 }
1328
ReadTypesAndForms(const uint8_t ** lineptr,uint32_t * content_types,uint32_t * content_forms,uint32_t max_types,uint32_t * format_count)1329 void LineInfo::ReadTypesAndForms(const uint8_t** lineptr,
1330 uint32_t* content_types,
1331 uint32_t* content_forms,
1332 uint32_t max_types,
1333 uint32_t* format_count) {
1334 size_t len;
1335
1336 uint32_t count = reader_->ReadUnsignedLEB128(*lineptr, &len);
1337 *lineptr += len;
1338 if (count < 1 || count > max_types) {
1339 return;
1340 }
1341 for (uint32_t col = 0; col < count; ++col) {
1342 content_types[col] = reader_->ReadUnsignedLEB128(*lineptr, &len);
1343 *lineptr += len;
1344 content_forms[col] = reader_->ReadUnsignedLEB128(*lineptr, &len);
1345 *lineptr += len;
1346 }
1347 *format_count = count;
1348 }
1349
ReadStringForm(uint32_t form,const uint8_t ** lineptr)1350 const char* LineInfo::ReadStringForm(uint32_t form, const uint8_t** lineptr) {
1351 const char* name = nullptr;
1352 if (form == DW_FORM_string) {
1353 name = reinterpret_cast<const char*>(*lineptr);
1354 *lineptr += strlen(name) + 1;
1355 return name;
1356 } else if (form == DW_FORM_strp) {
1357 uint64_t offset = reader_->ReadOffset(*lineptr);
1358 assert(offset < string_buffer_length_);
1359 *lineptr += reader_->OffsetSize();
1360 if (string_buffer_ != nullptr) {
1361 name = reinterpret_cast<const char*>(string_buffer_) + offset;
1362 return name;
1363 }
1364 } else if (form == DW_FORM_line_strp) {
1365 uint64_t offset = reader_->ReadOffset(*lineptr);
1366 assert(offset < line_string_buffer_length_);
1367 *lineptr += reader_->OffsetSize();
1368 if (line_string_buffer_ != nullptr) {
1369 name = reinterpret_cast<const char*>(line_string_buffer_) + offset;
1370 return name;
1371 }
1372 }
1373 // Shouldn't be called with a non-string-form, and
1374 // if there is a string form but no string buffer,
1375 // that is a problem too.
1376 assert(0);
1377 return nullptr;
1378 }
1379
ReadUnsignedData(uint32_t form,const uint8_t ** lineptr)1380 uint64_t LineInfo::ReadUnsignedData(uint32_t form, const uint8_t** lineptr) {
1381 size_t len;
1382 uint64_t value;
1383
1384 switch (form) {
1385 case DW_FORM_data1:
1386 value = reader_->ReadOneByte(*lineptr);
1387 *lineptr += 1;
1388 return value;
1389 case DW_FORM_data2:
1390 value = reader_->ReadTwoBytes(*lineptr);
1391 *lineptr += 2;
1392 return value;
1393 case DW_FORM_data4:
1394 value = reader_->ReadFourBytes(*lineptr);
1395 *lineptr += 4;
1396 return value;
1397 case DW_FORM_data8:
1398 value = reader_->ReadEightBytes(*lineptr);
1399 *lineptr += 8;
1400 return value;
1401 case DW_FORM_udata:
1402 value = reader_->ReadUnsignedLEB128(*lineptr, &len);
1403 *lineptr += len;
1404 return value;
1405 default:
1406 fprintf(stderr, "Unrecognized data form.");
1407 return 0;
1408 }
1409 }
1410
ReadFileRow(const uint8_t ** lineptr,const uint32_t * content_types,const uint32_t * content_forms,uint32_t row,uint32_t format_count)1411 void LineInfo::ReadFileRow(const uint8_t** lineptr,
1412 const uint32_t* content_types,
1413 const uint32_t* content_forms, uint32_t row,
1414 uint32_t format_count) {
1415 const char* filename = nullptr;
1416 uint64_t dirindex = 0;
1417 uint64_t mod_time = 0;
1418 uint64_t filelength = 0;
1419
1420 for (uint32_t col = 0; col < format_count; ++col) {
1421 switch (content_types[col]) {
1422 case DW_LNCT_path:
1423 filename = ReadStringForm(content_forms[col], lineptr);
1424 break;
1425 case DW_LNCT_directory_index:
1426 dirindex = ReadUnsignedData(content_forms[col], lineptr);
1427 break;
1428 case DW_LNCT_timestamp:
1429 mod_time = ReadUnsignedData(content_forms[col], lineptr);
1430 break;
1431 case DW_LNCT_size:
1432 filelength = ReadUnsignedData(content_forms[col], lineptr);
1433 break;
1434 case DW_LNCT_MD5:
1435 // MD5 entries help a debugger sort different versions of files with
1436 // the same name. It is always paired with a DW_FORM_data16 and is
1437 // unused in this case.
1438 *lineptr += 16;
1439 break;
1440 default:
1441 fprintf(stderr, "Unrecognized form in line table header. %d\n",
1442 content_types[col]);
1443 assert(false);
1444 break;
1445 }
1446 }
1447 assert(filename != nullptr);
1448 handler_->DefineFile(filename, row, dirindex, mod_time, filelength);
1449 }
1450
1451 // The header for a debug_line section is mildly complicated, because
1452 // the line info is very tightly encoded.
ReadHeader()1453 void LineInfo::ReadHeader() {
1454 const uint8_t* lineptr = buffer_;
1455 size_t initial_length_size;
1456
1457 const uint64_t initial_length
1458 = reader_->ReadInitialLength(lineptr, &initial_length_size);
1459
1460 lineptr += initial_length_size;
1461 header_.total_length = initial_length;
1462 assert(buffer_ + initial_length_size + header_.total_length <=
1463 buffer_ + buffer_length_);
1464
1465
1466 header_.version = reader_->ReadTwoBytes(lineptr);
1467 lineptr += 2;
1468
1469 if (header_.version >= 5) {
1470 uint8_t address_size = reader_->ReadOneByte(lineptr);
1471 reader_->SetAddressSize(address_size);
1472 lineptr += 1;
1473 uint8_t segment_selector_size = reader_->ReadOneByte(lineptr);
1474 if (segment_selector_size != 0) {
1475 fprintf(stderr,"No support for segmented memory.");
1476 }
1477 lineptr += 1;
1478 } else {
1479 // Address size *must* be set by CU ahead of time.
1480 assert(reader_->AddressSize() != 0);
1481 }
1482
1483 header_.prologue_length = reader_->ReadOffset(lineptr);
1484 lineptr += reader_->OffsetSize();
1485
1486 header_.min_insn_length = reader_->ReadOneByte(lineptr);
1487 lineptr += 1;
1488
1489 if (header_.version >= 4) {
1490 __attribute__((unused)) uint8_t max_ops_per_insn =
1491 reader_->ReadOneByte(lineptr);
1492 ++lineptr;
1493 assert(max_ops_per_insn == 1);
1494 }
1495
1496 header_.default_is_stmt = reader_->ReadOneByte(lineptr);
1497 lineptr += 1;
1498
1499 header_.line_base = *reinterpret_cast<const int8_t*>(lineptr);
1500 lineptr += 1;
1501
1502 header_.line_range = reader_->ReadOneByte(lineptr);
1503 lineptr += 1;
1504
1505 header_.opcode_base = reader_->ReadOneByte(lineptr);
1506 lineptr += 1;
1507
1508 header_.std_opcode_lengths = new std::vector<unsigned char>;
1509 header_.std_opcode_lengths->resize(header_.opcode_base + 1);
1510 (*header_.std_opcode_lengths)[0] = 0;
1511 for (int i = 1; i < header_.opcode_base; i++) {
1512 (*header_.std_opcode_lengths)[i] = reader_->ReadOneByte(lineptr);
1513 lineptr += 1;
1514 }
1515
1516 if (header_.version <= 4) {
1517 // Directory zero is assumed to be the compilation directory and special
1518 // cased where used. It is not actually stored in the dwarf data. But an
1519 // empty entry here avoids off-by-one errors elsewhere in the code.
1520 handler_->DefineDir("", 0);
1521 // It is legal for the directory entry table to be empty.
1522 if (*lineptr) {
1523 uint32_t dirindex = 1;
1524 while (*lineptr) {
1525 const char* dirname = reinterpret_cast<const char*>(lineptr);
1526 handler_->DefineDir(dirname, dirindex);
1527 lineptr += strlen(dirname) + 1;
1528 dirindex++;
1529 }
1530 }
1531 lineptr++;
1532 // It is also legal for the file entry table to be empty.
1533
1534 // Similarly for file zero.
1535 handler_->DefineFile("", 0, 0, 0, 0);
1536 if (*lineptr) {
1537 uint32_t fileindex = 1;
1538 size_t len;
1539 while (*lineptr) {
1540 const char* filename = ReadStringForm(DW_FORM_string, &lineptr);
1541
1542 uint64_t dirindex = reader_->ReadUnsignedLEB128(lineptr, &len);
1543 lineptr += len;
1544
1545 uint64_t mod_time = reader_->ReadUnsignedLEB128(lineptr, &len);
1546 lineptr += len;
1547
1548 uint64_t filelength = reader_->ReadUnsignedLEB128(lineptr, &len);
1549 lineptr += len;
1550 handler_->DefineFile(filename, fileindex,
1551 static_cast<uint32_t>(dirindex), mod_time,
1552 filelength);
1553 fileindex++;
1554 }
1555 }
1556 lineptr++;
1557 } else {
1558 // Read the DWARF-5 directory table.
1559
1560 // Dwarf5 supports five different types and forms per directory- and
1561 // file-table entry. Theoretically, there could be duplicate entries
1562 // in this table, but that would be quite unusual.
1563 static const uint32_t kMaxTypesAndForms = 5;
1564 uint32_t content_types[kMaxTypesAndForms];
1565 uint32_t content_forms[kMaxTypesAndForms];
1566 uint32_t format_count;
1567 size_t len;
1568
1569 ReadTypesAndForms(&lineptr, content_types, content_forms, kMaxTypesAndForms,
1570 &format_count);
1571 uint32_t entry_count = reader_->ReadUnsignedLEB128(lineptr, &len);
1572 lineptr += len;
1573 for (uint32_t row = 0; row < entry_count; ++row) {
1574 const char* dirname = nullptr;
1575 for (uint32_t col = 0; col < format_count; ++col) {
1576 // The path is the only relevant content type for this implementation.
1577 if (content_types[col] == DW_LNCT_path) {
1578 dirname = ReadStringForm(content_forms[col], &lineptr);
1579 }
1580 }
1581 handler_->DefineDir(dirname, row);
1582 }
1583
1584 // Read the DWARF-5 filename table.
1585 ReadTypesAndForms(&lineptr, content_types, content_forms, kMaxTypesAndForms,
1586 &format_count);
1587 entry_count = reader_->ReadUnsignedLEB128(lineptr, &len);
1588 lineptr += len;
1589
1590 for (uint32_t row = 0; row < entry_count; ++row) {
1591 ReadFileRow(&lineptr, content_types, content_forms, row, format_count);
1592 }
1593 }
1594 after_header_ = lineptr;
1595 }
1596
1597 /* static */
ProcessOneOpcode(ByteReader * reader,LineInfoHandler * handler,const struct LineInfoHeader & header,const uint8_t * start,struct LineStateMachine * lsm,size_t * len,uintptr pc,bool * lsm_passes_pc)1598 bool LineInfo::ProcessOneOpcode(ByteReader* reader,
1599 LineInfoHandler* handler,
1600 const struct LineInfoHeader& header,
1601 const uint8_t* start,
1602 struct LineStateMachine* lsm,
1603 size_t* len,
1604 uintptr pc,
1605 bool* lsm_passes_pc) {
1606 size_t oplen = 0;
1607 size_t templen;
1608 uint8_t opcode = reader->ReadOneByte(start);
1609 oplen++;
1610 start++;
1611
1612 // If the opcode is great than the opcode_base, it is a special
1613 // opcode. Most line programs consist mainly of special opcodes.
1614 if (opcode >= header.opcode_base) {
1615 opcode -= header.opcode_base;
1616 const int64_t advance_address = (opcode / header.line_range)
1617 * header.min_insn_length;
1618 const int32_t advance_line = (opcode % header.line_range)
1619 + header.line_base;
1620
1621 // Check if the lsm passes "pc". If so, mark it as passed.
1622 if (lsm_passes_pc &&
1623 lsm->address <= pc && pc < lsm->address + advance_address) {
1624 *lsm_passes_pc = true;
1625 }
1626
1627 lsm->address += advance_address;
1628 lsm->line_num += advance_line;
1629 lsm->basic_block = true;
1630 *len = oplen;
1631 return true;
1632 }
1633
1634 // Otherwise, we have the regular opcodes
1635 switch (opcode) {
1636 case DW_LNS_copy: {
1637 lsm->basic_block = false;
1638 *len = oplen;
1639 return true;
1640 }
1641
1642 case DW_LNS_advance_pc: {
1643 uint64_t advance_address = reader->ReadUnsignedLEB128(start, &templen);
1644 oplen += templen;
1645
1646 // Check if the lsm passes "pc". If so, mark it as passed.
1647 if (lsm_passes_pc && lsm->address <= pc &&
1648 pc < lsm->address + header.min_insn_length * advance_address) {
1649 *lsm_passes_pc = true;
1650 }
1651
1652 lsm->address += header.min_insn_length * advance_address;
1653 }
1654 break;
1655 case DW_LNS_advance_line: {
1656 const int64_t advance_line = reader->ReadSignedLEB128(start, &templen);
1657 oplen += templen;
1658 lsm->line_num += static_cast<int32_t>(advance_line);
1659
1660 // With gcc 4.2.1, we can get the line_no here for the first time
1661 // since DW_LNS_advance_line is called after DW_LNE_set_address is
1662 // called. So we check if the lsm passes "pc" here, not in
1663 // DW_LNE_set_address.
1664 if (lsm_passes_pc && lsm->address == pc) {
1665 *lsm_passes_pc = true;
1666 }
1667 }
1668 break;
1669 case DW_LNS_set_file: {
1670 const uint64_t fileno = reader->ReadUnsignedLEB128(start, &templen);
1671 oplen += templen;
1672 lsm->file_num = static_cast<uint32_t>(fileno);
1673 }
1674 break;
1675 case DW_LNS_set_column: {
1676 const uint64_t colno = reader->ReadUnsignedLEB128(start, &templen);
1677 oplen += templen;
1678 lsm->column_num = static_cast<uint32_t>(colno);
1679 }
1680 break;
1681 case DW_LNS_negate_stmt: {
1682 lsm->is_stmt = !lsm->is_stmt;
1683 }
1684 break;
1685 case DW_LNS_set_basic_block: {
1686 lsm->basic_block = true;
1687 }
1688 break;
1689 case DW_LNS_fixed_advance_pc: {
1690 const uint16_t advance_address = reader->ReadTwoBytes(start);
1691 oplen += 2;
1692
1693 // Check if the lsm passes "pc". If so, mark it as passed.
1694 if (lsm_passes_pc &&
1695 lsm->address <= pc && pc < lsm->address + advance_address) {
1696 *lsm_passes_pc = true;
1697 }
1698
1699 lsm->address += advance_address;
1700 }
1701 break;
1702 case DW_LNS_const_add_pc: {
1703 const int64_t advance_address = header.min_insn_length
1704 * ((255 - header.opcode_base)
1705 / header.line_range);
1706
1707 // Check if the lsm passes "pc". If so, mark it as passed.
1708 if (lsm_passes_pc &&
1709 lsm->address <= pc && pc < lsm->address + advance_address) {
1710 *lsm_passes_pc = true;
1711 }
1712
1713 lsm->address += advance_address;
1714 }
1715 break;
1716 case DW_LNS_extended_op: {
1717 const uint64_t extended_op_len = reader->ReadUnsignedLEB128(start,
1718 &templen);
1719 start += templen;
1720 oplen += templen + extended_op_len;
1721
1722 const uint64_t extended_op = reader->ReadOneByte(start);
1723 start++;
1724
1725 switch (extended_op) {
1726 case DW_LNE_end_sequence: {
1727 lsm->end_sequence = true;
1728 *len = oplen;
1729 return true;
1730 }
1731 break;
1732 case DW_LNE_set_address: {
1733 // With gcc 4.2.1, we cannot tell the line_no here since
1734 // DW_LNE_set_address is called before DW_LNS_advance_line is
1735 // called. So we do not check if the lsm passes "pc" here. See
1736 // also the comment in DW_LNS_advance_line.
1737 uint64_t address = reader->ReadAddress(start);
1738 lsm->address = address;
1739 }
1740 break;
1741 case DW_LNE_define_file: {
1742 const char* filename = reinterpret_cast<const char*>(start);
1743
1744 templen = strlen(filename) + 1;
1745 start += templen;
1746
1747 uint64_t dirindex = reader->ReadUnsignedLEB128(start, &templen);
1748 oplen += templen;
1749
1750 const uint64_t mod_time = reader->ReadUnsignedLEB128(start,
1751 &templen);
1752 oplen += templen;
1753
1754 const uint64_t filelength = reader->ReadUnsignedLEB128(start,
1755 &templen);
1756 oplen += templen;
1757
1758 if (handler) {
1759 handler->DefineFile(filename, -1, static_cast<uint32_t>(dirindex),
1760 mod_time, filelength);
1761 }
1762 }
1763 break;
1764 }
1765 }
1766 break;
1767
1768 default: {
1769 // Ignore unknown opcode silently
1770 if (header.std_opcode_lengths) {
1771 for (int i = 0; i < (*header.std_opcode_lengths)[opcode]; i++) {
1772 reader->ReadUnsignedLEB128(start, &templen);
1773 start += templen;
1774 oplen += templen;
1775 }
1776 }
1777 }
1778 break;
1779 }
1780 *len = oplen;
1781 return false;
1782 }
1783
ReadLines()1784 void LineInfo::ReadLines() {
1785 struct LineStateMachine lsm;
1786
1787 // lengthstart is the place the length field is based on.
1788 // It is the point in the header after the initial length field
1789 const uint8_t* lengthstart = buffer_;
1790
1791 // In 64 bit dwarf, the initial length is 12 bytes, because of the
1792 // 0xffffffff at the start.
1793 if (reader_->OffsetSize() == 8)
1794 lengthstart += 12;
1795 else
1796 lengthstart += 4;
1797
1798 const uint8_t* lineptr = after_header_;
1799 lsm.Reset(header_.default_is_stmt);
1800
1801 // The LineInfoHandler interface expects each line's length along
1802 // with its address, but DWARF only provides addresses (sans
1803 // length), and an end-of-sequence address; one infers the length
1804 // from the next address. So we report a line only when we get the
1805 // next line's address, or the end-of-sequence address.
1806 bool have_pending_line = false;
1807 uint64_t pending_address = 0;
1808 uint32_t pending_file_num = 0, pending_line_num = 0, pending_column_num = 0;
1809
1810 while (lineptr < lengthstart + header_.total_length) {
1811 size_t oplength;
1812 bool add_row = ProcessOneOpcode(reader_, handler_, header_,
1813 lineptr, &lsm, &oplength, (uintptr)-1,
1814 NULL);
1815 if (add_row) {
1816 if (have_pending_line)
1817 handler_->AddLine(pending_address, lsm.address - pending_address,
1818 pending_file_num, pending_line_num,
1819 pending_column_num);
1820 if (lsm.end_sequence) {
1821 lsm.Reset(header_.default_is_stmt);
1822 have_pending_line = false;
1823 } else {
1824 pending_address = lsm.address;
1825 pending_file_num = lsm.file_num;
1826 pending_line_num = lsm.line_num;
1827 pending_column_num = lsm.column_num;
1828 have_pending_line = true;
1829 }
1830 }
1831 lineptr += oplength;
1832 }
1833
1834 after_header_ = lengthstart + header_.total_length;
1835 }
1836
ReadRanges(enum DwarfForm form,uint64_t data)1837 bool RangeListReader::ReadRanges(enum DwarfForm form, uint64_t data) {
1838 if (form == DW_FORM_sec_offset) {
1839 if (cu_info_->version_ <= 4) {
1840 return ReadDebugRanges(data);
1841 } else {
1842 return ReadDebugRngList(data);
1843 }
1844 } else if (form == DW_FORM_rnglistx) {
1845 if (cu_info_->ranges_base_ == 0) {
1846 // In split dwarf, there's no DW_AT_rnglists_base attribute, range_base
1847 // will just be the first byte after the header.
1848 cu_info_->ranges_base_ = reader_->OffsetSize() == 4? 12: 20;
1849 }
1850 offset_array_ = cu_info_->ranges_base_;
1851 uint64_t index_offset = reader_->OffsetSize() * data;
1852 uint64_t range_list_offset =
1853 reader_->ReadOffset(cu_info_->buffer_ + offset_array_ + index_offset);
1854
1855 return ReadDebugRngList(offset_array_ + range_list_offset);
1856 }
1857 return false;
1858 }
1859
ReadDebugRanges(uint64_t offset)1860 bool RangeListReader::ReadDebugRanges(uint64_t offset) {
1861 const uint64_t max_address =
1862 (reader_->AddressSize() == 4) ? 0xffffffffUL
1863 : 0xffffffffffffffffULL;
1864 const uint64_t entry_size = reader_->AddressSize() * 2;
1865 bool list_end = false;
1866
1867 do {
1868 if (offset > cu_info_->size_ - entry_size) {
1869 return false; // Invalid range detected
1870 }
1871
1872 uint64_t start_address = reader_->ReadAddress(cu_info_->buffer_ + offset);
1873 uint64_t end_address = reader_->ReadAddress(
1874 cu_info_->buffer_ + offset + reader_->AddressSize());
1875
1876 if (start_address == max_address) { // Base address selection
1877 cu_info_->base_address_ = end_address;
1878 } else if (start_address == 0 && end_address == 0) { // End-of-list
1879 handler_->Finish();
1880 list_end = true;
1881 } else { // Add a range entry
1882 handler_->AddRange(start_address + cu_info_->base_address_,
1883 end_address + cu_info_->base_address_);
1884 }
1885
1886 offset += entry_size;
1887 } while (!list_end);
1888
1889 return true;
1890 }
1891
ReadDebugRngList(uint64_t offset)1892 bool RangeListReader::ReadDebugRngList(uint64_t offset) {
1893 uint64_t start = 0;
1894 uint64_t end = 0;
1895 uint64_t range_len = 0;
1896 uint64_t index = 0;
1897 // A uleb128's length isn't known until after it has been read, so overruns
1898 // are only caught after an entire entry.
1899 while (offset < cu_info_->size_) {
1900 uint8_t entry_type = reader_->ReadOneByte(cu_info_->buffer_ + offset);
1901 offset += 1;
1902 // Handle each entry type per Dwarf 5 Standard, section 2.17.3.
1903 switch (entry_type) {
1904 case DW_RLE_end_of_list:
1905 handler_->Finish();
1906 return true;
1907 case DW_RLE_base_addressx:
1908 offset += ReadULEB(offset, &index);
1909 cu_info_->base_address_ = GetAddressAtIndex(index);
1910 break;
1911 case DW_RLE_startx_endx:
1912 offset += ReadULEB(offset, &index);
1913 start = GetAddressAtIndex(index);
1914 offset += ReadULEB(offset, &index);
1915 end = GetAddressAtIndex(index);
1916 handler_->AddRange(start, end);
1917 break;
1918 case DW_RLE_startx_length:
1919 offset += ReadULEB(offset, &index);
1920 start = GetAddressAtIndex(index);
1921 offset += ReadULEB(offset, &range_len);
1922 handler_->AddRange(start, start + range_len);
1923 break;
1924 case DW_RLE_offset_pair:
1925 offset += ReadULEB(offset, &start);
1926 offset += ReadULEB(offset, &end);
1927 handler_->AddRange(start + cu_info_->base_address_,
1928 end + cu_info_->base_address_);
1929 break;
1930 case DW_RLE_base_address:
1931 offset += ReadAddress(offset, &cu_info_->base_address_);
1932 break;
1933 case DW_RLE_start_end:
1934 offset += ReadAddress(offset, &start);
1935 offset += ReadAddress(offset, &end);
1936 handler_->AddRange(start, end);
1937 break;
1938 case DW_RLE_start_length:
1939 offset += ReadAddress(offset, &start);
1940 offset += ReadULEB(offset, &end);
1941 handler_->AddRange(start, start + end);
1942 break;
1943 }
1944 }
1945 return false;
1946 }
1947
1948 // A DWARF rule for recovering the address or value of a register, or
1949 // computing the canonical frame address. There is one subclass of this for
1950 // each '*Rule' member function in CallFrameInfo::Handler.
1951 //
1952 // It's annoying that we have to handle Rules using pointers (because
1953 // the concrete instances can have an arbitrary size). They're small,
1954 // so it would be much nicer if we could just handle them by value
1955 // instead of fretting about ownership and destruction.
1956 //
1957 // It seems like all these could simply be instances of std::tr1::bind,
1958 // except that we need instances to be EqualityComparable, too.
1959 //
1960 // This could logically be nested within State, but then the qualified names
1961 // get horrendous.
1962 class CallFrameInfo::Rule {
1963 public:
~Rule()1964 virtual ~Rule() { }
1965
1966 // Tell HANDLER that, at ADDRESS in the program, REG can be recovered using
1967 // this rule. If REG is kCFARegister, then this rule describes how to compute
1968 // the canonical frame address. Return what the HANDLER member function
1969 // returned.
1970 virtual bool Handle(Handler* handler,
1971 uint64_t address, int reg) const = 0;
1972
1973 // Equality on rules. We use these to decide which rules we need
1974 // to report after a DW_CFA_restore_state instruction.
1975 virtual bool operator==(const Rule& rhs) const = 0;
1976
operator !=(const Rule & rhs) const1977 bool operator!=(const Rule& rhs) const { return ! (*this == rhs); }
1978
1979 // Return a pointer to a copy of this rule.
1980 virtual Rule* Copy() const = 0;
1981
1982 // If this is a base+offset rule, change its base register to REG.
1983 // Otherwise, do nothing. (Ugly, but required for DW_CFA_def_cfa_register.)
SetBaseRegister(unsigned reg)1984 virtual void SetBaseRegister(unsigned reg) { }
1985
1986 // If this is a base+offset rule, change its offset to OFFSET. Otherwise,
1987 // do nothing. (Ugly, but required for DW_CFA_def_cfa_offset.)
SetOffset(long long offset)1988 virtual void SetOffset(long long offset) { }
1989 };
1990
1991 // Rule: the value the register had in the caller cannot be recovered.
1992 class CallFrameInfo::UndefinedRule: public CallFrameInfo::Rule {
1993 public:
UndefinedRule()1994 UndefinedRule() { }
~UndefinedRule()1995 ~UndefinedRule() { }
Handle(Handler * handler,uint64_t address,int reg) const1996 bool Handle(Handler* handler, uint64_t address, int reg) const {
1997 return handler->UndefinedRule(address, reg);
1998 }
operator ==(const Rule & rhs) const1999 bool operator==(const Rule& rhs) const {
2000 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
2001 // been carefully considered; cheap RTTI-like workarounds are forbidden.
2002 const UndefinedRule* our_rhs = dynamic_cast<const UndefinedRule*>(&rhs);
2003 return (our_rhs != NULL);
2004 }
Copy() const2005 Rule* Copy() const { return new UndefinedRule(*this); }
2006 };
2007
2008 // Rule: the register's value is the same as that it had in the caller.
2009 class CallFrameInfo::SameValueRule: public CallFrameInfo::Rule {
2010 public:
SameValueRule()2011 SameValueRule() { }
~SameValueRule()2012 ~SameValueRule() { }
Handle(Handler * handler,uint64_t address,int reg) const2013 bool Handle(Handler* handler, uint64_t address, int reg) const {
2014 return handler->SameValueRule(address, reg);
2015 }
operator ==(const Rule & rhs) const2016 bool operator==(const Rule& rhs) const {
2017 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
2018 // been carefully considered; cheap RTTI-like workarounds are forbidden.
2019 const SameValueRule* our_rhs = dynamic_cast<const SameValueRule*>(&rhs);
2020 return (our_rhs != NULL);
2021 }
Copy() const2022 Rule* Copy() const { return new SameValueRule(*this); }
2023 };
2024
2025 // Rule: the register is saved at OFFSET from BASE_REGISTER. BASE_REGISTER
2026 // may be CallFrameInfo::Handler::kCFARegister.
2027 class CallFrameInfo::OffsetRule: public CallFrameInfo::Rule {
2028 public:
OffsetRule(int base_register,long offset)2029 OffsetRule(int base_register, long offset)
2030 : base_register_(base_register), offset_(offset) { }
~OffsetRule()2031 ~OffsetRule() { }
Handle(Handler * handler,uint64_t address,int reg) const2032 bool Handle(Handler* handler, uint64_t address, int reg) const {
2033 return handler->OffsetRule(address, reg, base_register_, offset_);
2034 }
operator ==(const Rule & rhs) const2035 bool operator==(const Rule& rhs) const {
2036 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
2037 // been carefully considered; cheap RTTI-like workarounds are forbidden.
2038 const OffsetRule* our_rhs = dynamic_cast<const OffsetRule*>(&rhs);
2039 return (our_rhs &&
2040 base_register_ == our_rhs->base_register_ &&
2041 offset_ == our_rhs->offset_);
2042 }
Copy() const2043 Rule* Copy() const { return new OffsetRule(*this); }
2044 // We don't actually need SetBaseRegister or SetOffset here, since they
2045 // are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it
2046 // doesn't make sense to use OffsetRule for computing the CFA: it
2047 // computes the address at which a register is saved, not a value.
2048 private:
2049 int base_register_;
2050 long offset_;
2051 };
2052
2053 // Rule: the value the register had in the caller is the value of
2054 // BASE_REGISTER plus offset. BASE_REGISTER may be
2055 // CallFrameInfo::Handler::kCFARegister.
2056 class CallFrameInfo::ValOffsetRule: public CallFrameInfo::Rule {
2057 public:
ValOffsetRule(int base_register,long offset)2058 ValOffsetRule(int base_register, long offset)
2059 : base_register_(base_register), offset_(offset) { }
~ValOffsetRule()2060 ~ValOffsetRule() { }
Handle(Handler * handler,uint64_t address,int reg) const2061 bool Handle(Handler* handler, uint64_t address, int reg) const {
2062 return handler->ValOffsetRule(address, reg, base_register_, offset_);
2063 }
operator ==(const Rule & rhs) const2064 bool operator==(const Rule& rhs) const {
2065 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
2066 // been carefully considered; cheap RTTI-like workarounds are forbidden.
2067 const ValOffsetRule* our_rhs = dynamic_cast<const ValOffsetRule*>(&rhs);
2068 return (our_rhs &&
2069 base_register_ == our_rhs->base_register_ &&
2070 offset_ == our_rhs->offset_);
2071 }
Copy() const2072 Rule* Copy() const { return new ValOffsetRule(*this); }
SetBaseRegister(unsigned reg)2073 void SetBaseRegister(unsigned reg) { base_register_ = reg; }
SetOffset(long long offset)2074 void SetOffset(long long offset) { offset_ = offset; }
2075 private:
2076 int base_register_;
2077 long offset_;
2078 };
2079
2080 // Rule: the register has been saved in another register REGISTER_NUMBER_.
2081 class CallFrameInfo::RegisterRule: public CallFrameInfo::Rule {
2082 public:
RegisterRule(int register_number)2083 explicit RegisterRule(int register_number)
2084 : register_number_(register_number) { }
~RegisterRule()2085 ~RegisterRule() { }
Handle(Handler * handler,uint64_t address,int reg) const2086 bool Handle(Handler* handler, uint64_t address, int reg) const {
2087 return handler->RegisterRule(address, reg, register_number_);
2088 }
operator ==(const Rule & rhs) const2089 bool operator==(const Rule& rhs) const {
2090 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
2091 // been carefully considered; cheap RTTI-like workarounds are forbidden.
2092 const RegisterRule* our_rhs = dynamic_cast<const RegisterRule*>(&rhs);
2093 return (our_rhs && register_number_ == our_rhs->register_number_);
2094 }
Copy() const2095 Rule* Copy() const { return new RegisterRule(*this); }
2096 private:
2097 int register_number_;
2098 };
2099
2100 // Rule: EXPRESSION evaluates to the address at which the register is saved.
2101 class CallFrameInfo::ExpressionRule: public CallFrameInfo::Rule {
2102 public:
ExpressionRule(const string & expression)2103 explicit ExpressionRule(const string& expression)
2104 : expression_(expression) { }
~ExpressionRule()2105 ~ExpressionRule() { }
Handle(Handler * handler,uint64_t address,int reg) const2106 bool Handle(Handler* handler, uint64_t address, int reg) const {
2107 return handler->ExpressionRule(address, reg, expression_);
2108 }
operator ==(const Rule & rhs) const2109 bool operator==(const Rule& rhs) const {
2110 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
2111 // been carefully considered; cheap RTTI-like workarounds are forbidden.
2112 const ExpressionRule* our_rhs = dynamic_cast<const ExpressionRule*>(&rhs);
2113 return (our_rhs && expression_ == our_rhs->expression_);
2114 }
Copy() const2115 Rule* Copy() const { return new ExpressionRule(*this); }
2116 private:
2117 string expression_;
2118 };
2119
2120 // Rule: EXPRESSION evaluates to the address at which the register is saved.
2121 class CallFrameInfo::ValExpressionRule: public CallFrameInfo::Rule {
2122 public:
ValExpressionRule(const string & expression)2123 explicit ValExpressionRule(const string& expression)
2124 : expression_(expression) { }
~ValExpressionRule()2125 ~ValExpressionRule() { }
Handle(Handler * handler,uint64_t address,int reg) const2126 bool Handle(Handler* handler, uint64_t address, int reg) const {
2127 return handler->ValExpressionRule(address, reg, expression_);
2128 }
operator ==(const Rule & rhs) const2129 bool operator==(const Rule& rhs) const {
2130 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
2131 // been carefully considered; cheap RTTI-like workarounds are forbidden.
2132 const ValExpressionRule* our_rhs =
2133 dynamic_cast<const ValExpressionRule*>(&rhs);
2134 return (our_rhs && expression_ == our_rhs->expression_);
2135 }
Copy() const2136 Rule* Copy() const { return new ValExpressionRule(*this); }
2137 private:
2138 string expression_;
2139 };
2140
2141 // A map from register numbers to rules.
2142 class CallFrameInfo::RuleMap {
2143 public:
RuleMap()2144 RuleMap() : cfa_rule_(NULL) { }
RuleMap(const RuleMap & rhs)2145 RuleMap(const RuleMap& rhs) : cfa_rule_(NULL) { *this = rhs; }
~RuleMap()2146 ~RuleMap() { Clear(); }
2147
2148 RuleMap& operator=(const RuleMap& rhs);
2149
2150 // Set the rule for computing the CFA to RULE. Take ownership of RULE.
SetCFARule(Rule * rule)2151 void SetCFARule(Rule* rule) { delete cfa_rule_; cfa_rule_ = rule; }
2152
2153 // Return the current CFA rule. Unlike RegisterRule, this RuleMap retains
2154 // ownership of the rule. We use this for DW_CFA_def_cfa_offset and
2155 // DW_CFA_def_cfa_register, and for detecting references to the CFA before
2156 // a rule for it has been established.
CFARule() const2157 Rule* CFARule() const { return cfa_rule_; }
2158
2159 // Return the rule for REG, or NULL if there is none. The caller takes
2160 // ownership of the result.
2161 Rule* RegisterRule(int reg) const;
2162
2163 // Set the rule for computing REG to RULE. Take ownership of RULE.
2164 void SetRegisterRule(int reg, Rule* rule);
2165
2166 // Make all the appropriate calls to HANDLER as if we were changing from
2167 // this RuleMap to NEW_RULES at ADDRESS. We use this to implement
2168 // DW_CFA_restore_state, where lots of rules can change simultaneously.
2169 // Return true if all handlers returned true; otherwise, return false.
2170 bool HandleTransitionTo(Handler* handler, uint64_t address,
2171 const RuleMap& new_rules) const;
2172
2173 private:
2174 // A map from register numbers to Rules.
2175 typedef std::map<int, Rule*> RuleByNumber;
2176
2177 // Remove all register rules and clear cfa_rule_.
2178 void Clear();
2179
2180 // The rule for computing the canonical frame address. This RuleMap owns
2181 // this rule.
2182 Rule* cfa_rule_;
2183
2184 // A map from register numbers to postfix expressions to recover
2185 // their values. This RuleMap owns the Rules the map refers to.
2186 RuleByNumber registers_;
2187 };
2188
operator =(const RuleMap & rhs)2189 CallFrameInfo::RuleMap& CallFrameInfo::RuleMap::operator=(const RuleMap& rhs) {
2190 Clear();
2191 // Since each map owns the rules it refers to, assignment must copy them.
2192 if (rhs.cfa_rule_) cfa_rule_ = rhs.cfa_rule_->Copy();
2193 for (RuleByNumber::const_iterator it = rhs.registers_.begin();
2194 it != rhs.registers_.end(); it++)
2195 registers_[it->first] = it->second->Copy();
2196 return *this;
2197 }
2198
RegisterRule(int reg) const2199 CallFrameInfo::Rule* CallFrameInfo::RuleMap::RegisterRule(int reg) const {
2200 assert(reg != Handler::kCFARegister);
2201 RuleByNumber::const_iterator it = registers_.find(reg);
2202 if (it != registers_.end())
2203 return it->second->Copy();
2204 else
2205 return NULL;
2206 }
2207
SetRegisterRule(int reg,Rule * rule)2208 void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule* rule) {
2209 assert(reg != Handler::kCFARegister);
2210 assert(rule);
2211 Rule** slot = ®isters_[reg];
2212 delete *slot;
2213 *slot = rule;
2214 }
2215
HandleTransitionTo(Handler * handler,uint64_t address,const RuleMap & new_rules) const2216 bool CallFrameInfo::RuleMap::HandleTransitionTo(
2217 Handler* handler,
2218 uint64_t address,
2219 const RuleMap& new_rules) const {
2220 // Transition from cfa_rule_ to new_rules.cfa_rule_.
2221 if (cfa_rule_ && new_rules.cfa_rule_) {
2222 if (*cfa_rule_ != *new_rules.cfa_rule_ &&
2223 !new_rules.cfa_rule_->Handle(handler, address,
2224 Handler::kCFARegister))
2225 return false;
2226 } else if (cfa_rule_) {
2227 // this RuleMap has a CFA rule but new_rules doesn't.
2228 // CallFrameInfo::Handler has no way to handle this --- and shouldn't;
2229 // it's garbage input. The instruction interpreter should have
2230 // detected this and warned, so take no action here.
2231 } else if (new_rules.cfa_rule_) {
2232 // This shouldn't be possible: NEW_RULES is some prior state, and
2233 // there's no way to remove entries.
2234 assert(0);
2235 } else {
2236 // Both CFA rules are empty. No action needed.
2237 }
2238
2239 // Traverse the two maps in order by register number, and report
2240 // whatever differences we find.
2241 RuleByNumber::const_iterator old_it = registers_.begin();
2242 RuleByNumber::const_iterator new_it = new_rules.registers_.begin();
2243 while (old_it != registers_.end() && new_it != new_rules.registers_.end()) {
2244 if (old_it->first < new_it->first) {
2245 // This RuleMap has an entry for old_it->first, but NEW_RULES
2246 // doesn't.
2247 //
2248 // This isn't really the right thing to do, but since CFI generally
2249 // only mentions callee-saves registers, and GCC's convention for
2250 // callee-saves registers is that they are unchanged, it's a good
2251 // approximation.
2252 if (!handler->SameValueRule(address, old_it->first))
2253 return false;
2254 old_it++;
2255 } else if (old_it->first > new_it->first) {
2256 // NEW_RULES has entry for new_it->first, but this RuleMap
2257 // doesn't. This shouldn't be possible: NEW_RULES is some prior
2258 // state, and there's no way to remove entries.
2259 assert(0);
2260 } else {
2261 // Both maps have an entry for this register. Report the new
2262 // rule if it is different.
2263 if (*old_it->second != *new_it->second &&
2264 !new_it->second->Handle(handler, address, new_it->first))
2265 return false;
2266 new_it++, old_it++;
2267 }
2268 }
2269 // Finish off entries from this RuleMap with no counterparts in new_rules.
2270 while (old_it != registers_.end()) {
2271 if (!handler->SameValueRule(address, old_it->first))
2272 return false;
2273 old_it++;
2274 }
2275 // Since we only make transitions from a rule set to some previously
2276 // saved rule set, and we can only add rules to the map, NEW_RULES
2277 // must have fewer rules than *this.
2278 assert(new_it == new_rules.registers_.end());
2279
2280 return true;
2281 }
2282
2283 // Remove all register rules and clear cfa_rule_.
Clear()2284 void CallFrameInfo::RuleMap::Clear() {
2285 delete cfa_rule_;
2286 cfa_rule_ = NULL;
2287 for (RuleByNumber::iterator it = registers_.begin();
2288 it != registers_.end(); it++)
2289 delete it->second;
2290 registers_.clear();
2291 }
2292
2293 // The state of the call frame information interpreter as it processes
2294 // instructions from a CIE and FDE.
2295 class CallFrameInfo::State {
2296 public:
2297 // Create a call frame information interpreter state with the given
2298 // reporter, reader, handler, and initial call frame info address.
State(ByteReader * reader,Handler * handler,Reporter * reporter,uint64_t address)2299 State(ByteReader* reader, Handler* handler, Reporter* reporter,
2300 uint64_t address)
2301 : reader_(reader), handler_(handler), reporter_(reporter),
2302 address_(address), entry_(NULL), cursor_(NULL) { }
2303
2304 // Interpret instructions from CIE, save the resulting rule set for
2305 // DW_CFA_restore instructions, and return true. On error, report
2306 // the problem to reporter_ and return false.
2307 bool InterpretCIE(const CIE& cie);
2308
2309 // Interpret instructions from FDE, and return true. On error,
2310 // report the problem to reporter_ and return false.
2311 bool InterpretFDE(const FDE& fde);
2312
2313 private:
2314 // The operands of a CFI instruction, for ParseOperands.
2315 struct Operands {
2316 unsigned register_number; // A register number.
2317 uint64_t offset; // An offset or address.
2318 long signed_offset; // A signed offset.
2319 string expression; // A DWARF expression.
2320 };
2321
2322 // Parse CFI instruction operands from STATE's instruction stream as
2323 // described by FORMAT. On success, populate OPERANDS with the
2324 // results, and return true. On failure, report the problem and
2325 // return false.
2326 //
2327 // Each character of FORMAT should be one of the following:
2328 //
2329 // 'r' unsigned LEB128 register number (OPERANDS->register_number)
2330 // 'o' unsigned LEB128 offset (OPERANDS->offset)
2331 // 's' signed LEB128 offset (OPERANDS->signed_offset)
2332 // 'a' machine-size address (OPERANDS->offset)
2333 // (If the CIE has a 'z' augmentation string, 'a' uses the
2334 // encoding specified by the 'R' argument.)
2335 // '1' a one-byte offset (OPERANDS->offset)
2336 // '2' a two-byte offset (OPERANDS->offset)
2337 // '4' a four-byte offset (OPERANDS->offset)
2338 // '8' an eight-byte offset (OPERANDS->offset)
2339 // 'e' a DW_FORM_block holding a (OPERANDS->expression)
2340 // DWARF expression
2341 bool ParseOperands(const char* format, Operands* operands);
2342
2343 // Interpret one CFI instruction from STATE's instruction stream, update
2344 // STATE, report any rule changes to handler_, and return true. On
2345 // failure, report the problem and return false.
2346 bool DoInstruction();
2347
2348 // The following Do* member functions are subroutines of DoInstruction,
2349 // factoring out the actual work of operations that have several
2350 // different encodings.
2351
2352 // Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and
2353 // return true. On failure, report and return false. (Used for
2354 // DW_CFA_def_cfa and DW_CFA_def_cfa_sf.)
2355 bool DoDefCFA(unsigned base_register, long offset);
2356
2357 // Change the offset of the CFA rule to OFFSET, and return true. On
2358 // failure, report and return false. (Subroutine for
2359 // DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.)
2360 bool DoDefCFAOffset(long offset);
2361
2362 // Specify that REG can be recovered using RULE, and return true. On
2363 // failure, report and return false.
2364 bool DoRule(unsigned reg, Rule* rule);
2365
2366 // Specify that REG can be found at OFFSET from the CFA, and return true.
2367 // On failure, report and return false. (Subroutine for DW_CFA_offset,
2368 // DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.)
2369 bool DoOffset(unsigned reg, long offset);
2370
2371 // Specify that the caller's value for REG is the CFA plus OFFSET,
2372 // and return true. On failure, report and return false. (Subroutine
2373 // for DW_CFA_val_offset and DW_CFA_val_offset_sf.)
2374 bool DoValOffset(unsigned reg, long offset);
2375
2376 // Restore REG to the rule established in the CIE, and return true. On
2377 // failure, report and return false. (Subroutine for DW_CFA_restore and
2378 // DW_CFA_restore_extended.)
2379 bool DoRestore(unsigned reg);
2380
2381 // Return the section offset of the instruction at cursor. For use
2382 // in error messages.
CursorOffset()2383 uint64_t CursorOffset() { return entry_->offset + (cursor_ - entry_->start); }
2384
2385 // Report that entry_ is incomplete, and return false. For brevity.
ReportIncomplete()2386 bool ReportIncomplete() {
2387 reporter_->Incomplete(entry_->offset, entry_->kind);
2388 return false;
2389 }
2390
2391 // For reading multi-byte values with the appropriate endianness.
2392 ByteReader* reader_;
2393
2394 // The handler to which we should report the data we find.
2395 Handler* handler_;
2396
2397 // For reporting problems in the info we're parsing.
2398 Reporter* reporter_;
2399
2400 // The code address to which the next instruction in the stream applies.
2401 uint64_t address_;
2402
2403 // The entry whose instructions we are currently processing. This is
2404 // first a CIE, and then an FDE.
2405 const Entry* entry_;
2406
2407 // The next instruction to process.
2408 const uint8_t* cursor_;
2409
2410 // The current set of rules.
2411 RuleMap rules_;
2412
2413 // The set of rules established by the CIE, used by DW_CFA_restore
2414 // and DW_CFA_restore_extended. We set this after interpreting the
2415 // CIE's instructions.
2416 RuleMap cie_rules_;
2417
2418 // A stack of saved states, for DW_CFA_remember_state and
2419 // DW_CFA_restore_state.
2420 std::stack<RuleMap> saved_rules_;
2421 };
2422
InterpretCIE(const CIE & cie)2423 bool CallFrameInfo::State::InterpretCIE(const CIE& cie) {
2424 entry_ = &cie;
2425 cursor_ = entry_->instructions;
2426 while (cursor_ < entry_->end)
2427 if (!DoInstruction())
2428 return false;
2429 // Note the rules established by the CIE, for use by DW_CFA_restore
2430 // and DW_CFA_restore_extended.
2431 cie_rules_ = rules_;
2432 return true;
2433 }
2434
InterpretFDE(const FDE & fde)2435 bool CallFrameInfo::State::InterpretFDE(const FDE& fde) {
2436 entry_ = &fde;
2437 cursor_ = entry_->instructions;
2438 while (cursor_ < entry_->end)
2439 if (!DoInstruction())
2440 return false;
2441 return true;
2442 }
2443
ParseOperands(const char * format,Operands * operands)2444 bool CallFrameInfo::State::ParseOperands(const char* format,
2445 Operands* operands) {
2446 size_t len;
2447 const char* operand;
2448
2449 for (operand = format; *operand; operand++) {
2450 size_t bytes_left = entry_->end - cursor_;
2451 switch (*operand) {
2452 case 'r':
2453 operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len);
2454 if (len > bytes_left) return ReportIncomplete();
2455 cursor_ += len;
2456 break;
2457
2458 case 'o':
2459 operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len);
2460 if (len > bytes_left) return ReportIncomplete();
2461 cursor_ += len;
2462 break;
2463
2464 case 's':
2465 operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len);
2466 if (len > bytes_left) return ReportIncomplete();
2467 cursor_ += len;
2468 break;
2469
2470 case 'a':
2471 operands->offset =
2472 reader_->ReadEncodedPointer(cursor_, entry_->cie->pointer_encoding,
2473 &len);
2474 if (len > bytes_left) return ReportIncomplete();
2475 cursor_ += len;
2476 break;
2477
2478 case '1':
2479 if (1 > bytes_left) return ReportIncomplete();
2480 operands->offset = static_cast<unsigned char>(*cursor_++);
2481 break;
2482
2483 case '2':
2484 if (2 > bytes_left) return ReportIncomplete();
2485 operands->offset = reader_->ReadTwoBytes(cursor_);
2486 cursor_ += 2;
2487 break;
2488
2489 case '4':
2490 if (4 > bytes_left) return ReportIncomplete();
2491 operands->offset = reader_->ReadFourBytes(cursor_);
2492 cursor_ += 4;
2493 break;
2494
2495 case '8':
2496 if (8 > bytes_left) return ReportIncomplete();
2497 operands->offset = reader_->ReadEightBytes(cursor_);
2498 cursor_ += 8;
2499 break;
2500
2501 case 'e': {
2502 size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len);
2503 if (len > bytes_left || expression_length > bytes_left - len)
2504 return ReportIncomplete();
2505 cursor_ += len;
2506 operands->expression = string(reinterpret_cast<const char*>(cursor_),
2507 expression_length);
2508 cursor_ += expression_length;
2509 break;
2510 }
2511
2512 default:
2513 assert(0);
2514 }
2515 }
2516
2517 return true;
2518 }
2519
DoInstruction()2520 bool CallFrameInfo::State::DoInstruction() {
2521 CIE* cie = entry_->cie;
2522 Operands ops;
2523
2524 // Our entry's kind should have been set by now.
2525 assert(entry_->kind != kUnknown);
2526
2527 // We shouldn't have been invoked unless there were more
2528 // instructions to parse.
2529 assert(cursor_ < entry_->end);
2530
2531 unsigned opcode = *cursor_++;
2532 if ((opcode & 0xc0) != 0) {
2533 switch (opcode & 0xc0) {
2534 // Advance the address.
2535 case DW_CFA_advance_loc: {
2536 size_t code_offset = opcode & 0x3f;
2537 address_ += code_offset * cie->code_alignment_factor;
2538 break;
2539 }
2540
2541 // Find a register at an offset from the CFA.
2542 case DW_CFA_offset:
2543 if (!ParseOperands("o", &ops) ||
2544 !DoOffset(opcode & 0x3f, ops.offset * cie->data_alignment_factor))
2545 return false;
2546 break;
2547
2548 // Restore the rule established for a register by the CIE.
2549 case DW_CFA_restore:
2550 if (!DoRestore(opcode & 0x3f)) return false;
2551 break;
2552
2553 // The 'if' above should have excluded this possibility.
2554 default:
2555 assert(0);
2556 }
2557
2558 // Return here, so the big switch below won't be indented.
2559 return true;
2560 }
2561
2562 switch (opcode) {
2563 // Set the address.
2564 case DW_CFA_set_loc:
2565 if (!ParseOperands("a", &ops)) return false;
2566 address_ = ops.offset;
2567 break;
2568
2569 // Advance the address.
2570 case DW_CFA_advance_loc1:
2571 if (!ParseOperands("1", &ops)) return false;
2572 address_ += ops.offset * cie->code_alignment_factor;
2573 break;
2574
2575 // Advance the address.
2576 case DW_CFA_advance_loc2:
2577 if (!ParseOperands("2", &ops)) return false;
2578 address_ += ops.offset * cie->code_alignment_factor;
2579 break;
2580
2581 // Advance the address.
2582 case DW_CFA_advance_loc4:
2583 if (!ParseOperands("4", &ops)) return false;
2584 address_ += ops.offset * cie->code_alignment_factor;
2585 break;
2586
2587 // Advance the address.
2588 case DW_CFA_MIPS_advance_loc8:
2589 if (!ParseOperands("8", &ops)) return false;
2590 address_ += ops.offset * cie->code_alignment_factor;
2591 break;
2592
2593 // Compute the CFA by adding an offset to a register.
2594 case DW_CFA_def_cfa:
2595 if (!ParseOperands("ro", &ops) ||
2596 !DoDefCFA(ops.register_number, ops.offset))
2597 return false;
2598 break;
2599
2600 // Compute the CFA by adding an offset to a register.
2601 case DW_CFA_def_cfa_sf:
2602 if (!ParseOperands("rs", &ops) ||
2603 !DoDefCFA(ops.register_number,
2604 ops.signed_offset * cie->data_alignment_factor))
2605 return false;
2606 break;
2607
2608 // Change the base register used to compute the CFA.
2609 case DW_CFA_def_cfa_register: {
2610 if (!ParseOperands("r", &ops)) return false;
2611 Rule* cfa_rule = rules_.CFARule();
2612 if (!cfa_rule) {
2613 if (!DoDefCFA(ops.register_number, ops.offset)) {
2614 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2615 return false;
2616 }
2617 } else {
2618 cfa_rule->SetBaseRegister(ops.register_number);
2619 if (!cfa_rule->Handle(handler_, address_,
2620 Handler::kCFARegister))
2621 return false;
2622 }
2623 break;
2624 }
2625
2626 // Change the offset used to compute the CFA.
2627 case DW_CFA_def_cfa_offset:
2628 if (!ParseOperands("o", &ops) ||
2629 !DoDefCFAOffset(ops.offset))
2630 return false;
2631 break;
2632
2633 // Change the offset used to compute the CFA.
2634 case DW_CFA_def_cfa_offset_sf:
2635 if (!ParseOperands("s", &ops) ||
2636 !DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor))
2637 return false;
2638 break;
2639
2640 // Specify an expression whose value is the CFA.
2641 case DW_CFA_def_cfa_expression: {
2642 if (!ParseOperands("e", &ops))
2643 return false;
2644 Rule* rule = new ValExpressionRule(ops.expression);
2645 rules_.SetCFARule(rule);
2646 if (!rule->Handle(handler_, address_,
2647 Handler::kCFARegister))
2648 return false;
2649 break;
2650 }
2651
2652 // The register's value cannot be recovered.
2653 case DW_CFA_undefined: {
2654 if (!ParseOperands("r", &ops) ||
2655 !DoRule(ops.register_number, new UndefinedRule()))
2656 return false;
2657 break;
2658 }
2659
2660 // The register's value is unchanged from its value in the caller.
2661 case DW_CFA_same_value: {
2662 if (!ParseOperands("r", &ops) ||
2663 !DoRule(ops.register_number, new SameValueRule()))
2664 return false;
2665 break;
2666 }
2667
2668 // Find a register at an offset from the CFA.
2669 case DW_CFA_offset_extended:
2670 if (!ParseOperands("ro", &ops) ||
2671 !DoOffset(ops.register_number,
2672 ops.offset * cie->data_alignment_factor))
2673 return false;
2674 break;
2675
2676 // The register is saved at an offset from the CFA.
2677 case DW_CFA_offset_extended_sf:
2678 if (!ParseOperands("rs", &ops) ||
2679 !DoOffset(ops.register_number,
2680 ops.signed_offset * cie->data_alignment_factor))
2681 return false;
2682 break;
2683
2684 // The register is saved at an offset from the CFA.
2685 case DW_CFA_GNU_negative_offset_extended:
2686 if (!ParseOperands("ro", &ops) ||
2687 !DoOffset(ops.register_number,
2688 -ops.offset * cie->data_alignment_factor))
2689 return false;
2690 break;
2691
2692 // The register's value is the sum of the CFA plus an offset.
2693 case DW_CFA_val_offset:
2694 if (!ParseOperands("ro", &ops) ||
2695 !DoValOffset(ops.register_number,
2696 ops.offset * cie->data_alignment_factor))
2697 return false;
2698 break;
2699
2700 // The register's value is the sum of the CFA plus an offset.
2701 case DW_CFA_val_offset_sf:
2702 if (!ParseOperands("rs", &ops) ||
2703 !DoValOffset(ops.register_number,
2704 ops.signed_offset * cie->data_alignment_factor))
2705 return false;
2706 break;
2707
2708 // The register has been saved in another register.
2709 case DW_CFA_register: {
2710 if (!ParseOperands("ro", &ops) ||
2711 !DoRule(ops.register_number, new RegisterRule(ops.offset)))
2712 return false;
2713 break;
2714 }
2715
2716 // An expression yields the address at which the register is saved.
2717 case DW_CFA_expression: {
2718 if (!ParseOperands("re", &ops) ||
2719 !DoRule(ops.register_number, new ExpressionRule(ops.expression)))
2720 return false;
2721 break;
2722 }
2723
2724 // An expression yields the caller's value for the register.
2725 case DW_CFA_val_expression: {
2726 if (!ParseOperands("re", &ops) ||
2727 !DoRule(ops.register_number, new ValExpressionRule(ops.expression)))
2728 return false;
2729 break;
2730 }
2731
2732 // Restore the rule established for a register by the CIE.
2733 case DW_CFA_restore_extended:
2734 if (!ParseOperands("r", &ops) ||
2735 !DoRestore( ops.register_number))
2736 return false;
2737 break;
2738
2739 // Save the current set of rules on a stack.
2740 case DW_CFA_remember_state:
2741 saved_rules_.push(rules_);
2742 break;
2743
2744 // Pop the current set of rules off the stack.
2745 case DW_CFA_restore_state: {
2746 if (saved_rules_.empty()) {
2747 reporter_->EmptyStateStack(entry_->offset, entry_->kind,
2748 CursorOffset());
2749 return false;
2750 }
2751 const RuleMap& new_rules = saved_rules_.top();
2752 if (rules_.CFARule() && !new_rules.CFARule()) {
2753 reporter_->ClearingCFARule(entry_->offset, entry_->kind,
2754 CursorOffset());
2755 return false;
2756 }
2757 rules_.HandleTransitionTo(handler_, address_, new_rules);
2758 rules_ = new_rules;
2759 saved_rules_.pop();
2760 break;
2761 }
2762
2763 // No operation. (Padding instruction.)
2764 case DW_CFA_nop:
2765 break;
2766
2767 // case DW_CFA_AARCH64_negate_ra_state
2768 case DW_CFA_GNU_window_save: {
2769 if (handler_->Architecture() == "arm64") {
2770 // Indicates that the return address, x30 has been signed.
2771 // Breakpad will speculatively remove pointer-authentication codes when
2772 // interpreting return addresses, regardless of this bit.
2773 } else if (handler_->Architecture() == "sparc" ||
2774 handler_->Architecture() == "sparcv9") {
2775 // A SPARC register window save: Registers 8 through 15 (%o0-%o7)
2776 // are saved in registers 24 through 31 (%i0-%i7), and registers
2777 // 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets
2778 // (0-15 * the register size). The register numbers must be
2779 // hard-coded. A GNU extension, and not a pretty one.
2780
2781 // Save %o0-%o7 in %i0-%i7.
2782 for (int i = 8; i < 16; i++)
2783 if (!DoRule(i, new RegisterRule(i + 16)))
2784 return false;
2785 // Save %l0-%l7 and %i0-%i7 at the CFA.
2786 for (int i = 16; i < 32; i++)
2787 // Assume that the byte reader's address size is the same as
2788 // the architecture's register size. !@#%*^ hilarious.
2789 if (!DoRule(i, new OffsetRule(Handler::kCFARegister,
2790 (i - 16) * reader_->AddressSize())))
2791 return false;
2792 }
2793 break;
2794 }
2795
2796 // I'm not sure what this is. GDB doesn't use it for unwinding.
2797 case DW_CFA_GNU_args_size:
2798 if (!ParseOperands("o", &ops)) return false;
2799 break;
2800
2801 // An opcode we don't recognize.
2802 default: {
2803 reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset());
2804 return false;
2805 }
2806 }
2807
2808 return true;
2809 }
2810
DoDefCFA(unsigned base_register,long offset)2811 bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) {
2812 Rule* rule = new ValOffsetRule(base_register, offset);
2813 rules_.SetCFARule(rule);
2814 return rule->Handle(handler_, address_,
2815 Handler::kCFARegister);
2816 }
2817
DoDefCFAOffset(long offset)2818 bool CallFrameInfo::State::DoDefCFAOffset(long offset) {
2819 Rule* cfa_rule = rules_.CFARule();
2820 if (!cfa_rule) {
2821 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2822 return false;
2823 }
2824 cfa_rule->SetOffset(offset);
2825 return cfa_rule->Handle(handler_, address_,
2826 Handler::kCFARegister);
2827 }
2828
DoRule(unsigned reg,Rule * rule)2829 bool CallFrameInfo::State::DoRule(unsigned reg, Rule* rule) {
2830 rules_.SetRegisterRule(reg, rule);
2831 return rule->Handle(handler_, address_, reg);
2832 }
2833
DoOffset(unsigned reg,long offset)2834 bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) {
2835 if (!rules_.CFARule()) {
2836 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2837 return false;
2838 }
2839 return DoRule(reg,
2840 new OffsetRule(Handler::kCFARegister, offset));
2841 }
2842
DoValOffset(unsigned reg,long offset)2843 bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) {
2844 if (!rules_.CFARule()) {
2845 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2846 return false;
2847 }
2848 return DoRule(reg,
2849 new ValOffsetRule(Handler::kCFARegister, offset));
2850 }
2851
DoRestore(unsigned reg)2852 bool CallFrameInfo::State::DoRestore(unsigned reg) {
2853 // DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE.
2854 if (entry_->kind == kCIE) {
2855 reporter_->RestoreInCIE(entry_->offset, CursorOffset());
2856 return false;
2857 }
2858 Rule* rule = cie_rules_.RegisterRule(reg);
2859 if (!rule) {
2860 // This isn't really the right thing to do, but since CFI generally
2861 // only mentions callee-saves registers, and GCC's convention for
2862 // callee-saves registers is that they are unchanged, it's a good
2863 // approximation.
2864 rule = new SameValueRule();
2865 }
2866 return DoRule(reg, rule);
2867 }
2868
ReadEntryPrologue(const uint8_t * cursor,Entry * entry)2869 bool CallFrameInfo::ReadEntryPrologue(const uint8_t* cursor, Entry* entry) {
2870 const uint8_t* buffer_end = buffer_ + buffer_length_;
2871
2872 // Initialize enough of ENTRY for use in error reporting.
2873 entry->offset = cursor - buffer_;
2874 entry->start = cursor;
2875 entry->kind = kUnknown;
2876 entry->end = NULL;
2877
2878 // Read the initial length. This sets reader_'s offset size.
2879 size_t length_size;
2880 uint64_t length = reader_->ReadInitialLength(cursor, &length_size);
2881 if (length_size > size_t(buffer_end - cursor))
2882 return ReportIncomplete(entry);
2883 cursor += length_size;
2884
2885 // In a .eh_frame section, a length of zero marks the end of the series
2886 // of entries.
2887 if (length == 0 && eh_frame_) {
2888 entry->kind = kTerminator;
2889 entry->end = cursor;
2890 return true;
2891 }
2892
2893 // Validate the length.
2894 if (length > size_t(buffer_end - cursor))
2895 return ReportIncomplete(entry);
2896
2897 // The length is the number of bytes after the initial length field;
2898 // we have that position handy at this point, so compute the end
2899 // now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine,
2900 // and the length didn't fit in a size_t, we would have rejected it
2901 // above.)
2902 entry->end = cursor + length;
2903
2904 // Parse the next field: either the offset of a CIE or a CIE id.
2905 size_t offset_size = reader_->OffsetSize();
2906 if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry);
2907 entry->id = reader_->ReadOffset(cursor);
2908
2909 // Don't advance cursor past id field yet; in .eh_frame data we need
2910 // the id's position to compute the section offset of an FDE's CIE.
2911
2912 // Now we can decide what kind of entry this is.
2913 if (eh_frame_) {
2914 // In .eh_frame data, an ID of zero marks the entry as a CIE, and
2915 // anything else is an offset from the id field of the FDE to the start
2916 // of the CIE.
2917 if (entry->id == 0) {
2918 entry->kind = kCIE;
2919 } else {
2920 entry->kind = kFDE;
2921 // Turn the offset from the id into an offset from the buffer's start.
2922 entry->id = (cursor - buffer_) - entry->id;
2923 }
2924 } else {
2925 // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the
2926 // offset size for the entry) marks the entry as a CIE, and anything
2927 // else is the offset of the CIE from the beginning of the section.
2928 if (offset_size == 4)
2929 entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE;
2930 else {
2931 assert(offset_size == 8);
2932 entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE;
2933 }
2934 }
2935
2936 // Now advance cursor past the id.
2937 cursor += offset_size;
2938
2939 // The fields specific to this kind of entry start here.
2940 entry->fields = cursor;
2941
2942 entry->cie = NULL;
2943
2944 return true;
2945 }
2946
ReadCIEFields(CIE * cie)2947 bool CallFrameInfo::ReadCIEFields(CIE* cie) {
2948 const uint8_t* cursor = cie->fields;
2949 size_t len;
2950
2951 assert(cie->kind == kCIE);
2952
2953 // Prepare for early exit.
2954 cie->version = 0;
2955 cie->augmentation.clear();
2956 cie->code_alignment_factor = 0;
2957 cie->data_alignment_factor = 0;
2958 cie->return_address_register = 0;
2959 cie->has_z_augmentation = false;
2960 cie->pointer_encoding = DW_EH_PE_absptr;
2961 cie->instructions = 0;
2962
2963 // Parse the version number.
2964 if (cie->end - cursor < 1)
2965 return ReportIncomplete(cie);
2966 cie->version = reader_->ReadOneByte(cursor);
2967 cursor++;
2968
2969 // If we don't recognize the version, we can't parse any more fields of the
2970 // CIE. For DWARF CFI, we handle versions 1 through 4 (there was never a
2971 // version 2 of CFI data). For .eh_frame, we handle versions 1 and 4 as well;
2972 // the difference between those versions seems to be the same as for
2973 // .debug_frame.
2974 if (cie->version < 1 || cie->version > 4) {
2975 reporter_->UnrecognizedVersion(cie->offset, cie->version);
2976 return false;
2977 }
2978
2979 const uint8_t* augmentation_start = cursor;
2980 const uint8_t* augmentation_end =
2981 reinterpret_cast<const uint8_t*>(memchr(augmentation_start, '\0',
2982 cie->end - augmentation_start));
2983 if (! augmentation_end) return ReportIncomplete(cie);
2984 cursor = augmentation_end;
2985 cie->augmentation = string(reinterpret_cast<const char*>(augmentation_start),
2986 cursor - augmentation_start);
2987 // Skip the terminating '\0'.
2988 cursor++;
2989
2990 // Is this CFI augmented?
2991 if (!cie->augmentation.empty()) {
2992 // Is it an augmentation we recognize?
2993 if (cie->augmentation[0] == DW_Z_augmentation_start) {
2994 // Linux C++ ABI 'z' augmentation, used for exception handling data.
2995 cie->has_z_augmentation = true;
2996 } else {
2997 // Not an augmentation we recognize. Augmentations can have arbitrary
2998 // effects on the form of rest of the content, so we have to give up.
2999 reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
3000 return false;
3001 }
3002 }
3003
3004 if (cie->version >= 4) {
3005 cie->address_size = *cursor++;
3006 if (cie->address_size != 8 && cie->address_size != 4) {
3007 reporter_->UnexpectedAddressSize(cie->offset, cie->address_size);
3008 return false;
3009 }
3010
3011 cie->segment_size = *cursor++;
3012 if (cie->segment_size != 0) {
3013 reporter_->UnexpectedSegmentSize(cie->offset, cie->segment_size);
3014 return false;
3015 }
3016 }
3017
3018 // Parse the code alignment factor.
3019 cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len);
3020 if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
3021 cursor += len;
3022
3023 // Parse the data alignment factor.
3024 cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len);
3025 if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
3026 cursor += len;
3027
3028 // Parse the return address register. This is a ubyte in version 1, and
3029 // a ULEB128 in version 3.
3030 if (cie->version == 1) {
3031 if (cursor >= cie->end) return ReportIncomplete(cie);
3032 cie->return_address_register = uint8_t(*cursor++);
3033 } else {
3034 cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len);
3035 if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
3036 cursor += len;
3037 }
3038
3039 // If we have a 'z' augmentation string, find the augmentation data and
3040 // use the augmentation string to parse it.
3041 if (cie->has_z_augmentation) {
3042 uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len);
3043 if (size_t(cie->end - cursor) < len + data_size)
3044 return ReportIncomplete(cie);
3045 cursor += len;
3046 const uint8_t* data = cursor;
3047 cursor += data_size;
3048 const uint8_t* data_end = cursor;
3049
3050 cie->has_z_lsda = false;
3051 cie->has_z_personality = false;
3052 cie->has_z_signal_frame = false;
3053
3054 // Walk the augmentation string, and extract values from the
3055 // augmentation data as the string directs.
3056 for (size_t i = 1; i < cie->augmentation.size(); i++) {
3057 switch (cie->augmentation[i]) {
3058 case DW_Z_has_LSDA:
3059 // The CIE's augmentation data holds the language-specific data
3060 // area pointer's encoding, and the FDE's augmentation data holds
3061 // the pointer itself.
3062 cie->has_z_lsda = true;
3063 // Fetch the LSDA encoding from the augmentation data.
3064 if (data >= data_end) return ReportIncomplete(cie);
3065 cie->lsda_encoding = DwarfPointerEncoding(*data++);
3066 if (!reader_->ValidEncoding(cie->lsda_encoding)) {
3067 reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding);
3068 return false;
3069 }
3070 // Don't check if the encoding is usable here --- we haven't
3071 // read the FDE's fields yet, so we're not prepared for
3072 // DW_EH_PE_funcrel, although that's a fine encoding for the
3073 // LSDA to use, since it appears in the FDE.
3074 break;
3075
3076 case DW_Z_has_personality_routine:
3077 // The CIE's augmentation data holds the personality routine
3078 // pointer's encoding, followed by the pointer itself.
3079 cie->has_z_personality = true;
3080 // Fetch the personality routine pointer's encoding from the
3081 // augmentation data.
3082 if (data >= data_end) return ReportIncomplete(cie);
3083 cie->personality_encoding = DwarfPointerEncoding(*data++);
3084 if (!reader_->ValidEncoding(cie->personality_encoding)) {
3085 reporter_->InvalidPointerEncoding(cie->offset,
3086 cie->personality_encoding);
3087 return false;
3088 }
3089 if (!reader_->UsableEncoding(cie->personality_encoding)) {
3090 reporter_->UnusablePointerEncoding(cie->offset,
3091 cie->personality_encoding);
3092 return false;
3093 }
3094 // Fetch the personality routine's pointer itself from the data.
3095 cie->personality_address =
3096 reader_->ReadEncodedPointer(data, cie->personality_encoding,
3097 &len);
3098 if (len > size_t(data_end - data))
3099 return ReportIncomplete(cie);
3100 data += len;
3101 break;
3102
3103 case DW_Z_has_FDE_address_encoding:
3104 // The CIE's augmentation data holds the pointer encoding to use
3105 // for addresses in the FDE.
3106 if (data >= data_end) return ReportIncomplete(cie);
3107 cie->pointer_encoding = DwarfPointerEncoding(*data++);
3108 if (!reader_->ValidEncoding(cie->pointer_encoding)) {
3109 reporter_->InvalidPointerEncoding(cie->offset,
3110 cie->pointer_encoding);
3111 return false;
3112 }
3113 if (!reader_->UsableEncoding(cie->pointer_encoding)) {
3114 reporter_->UnusablePointerEncoding(cie->offset,
3115 cie->pointer_encoding);
3116 return false;
3117 }
3118 break;
3119
3120 case DW_Z_is_signal_trampoline:
3121 // Frames using this CIE are signal delivery frames.
3122 cie->has_z_signal_frame = true;
3123 break;
3124
3125 default:
3126 // An augmentation we don't recognize.
3127 reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
3128 return false;
3129 }
3130 }
3131 }
3132
3133 // The CIE's instructions start here.
3134 cie->instructions = cursor;
3135
3136 return true;
3137 }
3138
ReadFDEFields(FDE * fde)3139 bool CallFrameInfo::ReadFDEFields(FDE* fde) {
3140 const uint8_t* cursor = fde->fields;
3141 size_t size;
3142
3143 fde->address = reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding,
3144 &size);
3145 if (size > size_t(fde->end - cursor))
3146 return ReportIncomplete(fde);
3147 cursor += size;
3148 reader_->SetFunctionBase(fde->address);
3149
3150 // For the length, we strip off the upper nybble of the encoding used for
3151 // the starting address.
3152 DwarfPointerEncoding length_encoding =
3153 DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f);
3154 fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size);
3155 if (size > size_t(fde->end - cursor))
3156 return ReportIncomplete(fde);
3157 cursor += size;
3158
3159 // If the CIE has a 'z' augmentation string, then augmentation data
3160 // appears here.
3161 if (fde->cie->has_z_augmentation) {
3162 uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size);
3163 if (size_t(fde->end - cursor) < size + data_size)
3164 return ReportIncomplete(fde);
3165 cursor += size;
3166
3167 // In the abstract, we should walk the augmentation string, and extract
3168 // items from the FDE's augmentation data as we encounter augmentation
3169 // string characters that specify their presence: the ordering of items
3170 // in the augmentation string determines the arrangement of values in
3171 // the augmentation data.
3172 //
3173 // In practice, there's only ever one value in FDE augmentation data
3174 // that we support --- the LSDA pointer --- and we have to bail if we
3175 // see any unrecognized augmentation string characters. So if there is
3176 // anything here at all, we know what it is, and where it starts.
3177 if (fde->cie->has_z_lsda) {
3178 // Check whether the LSDA's pointer encoding is usable now: only once
3179 // we've parsed the FDE's starting address do we call reader_->
3180 // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes
3181 // usable.
3182 if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) {
3183 reporter_->UnusablePointerEncoding(fde->cie->offset,
3184 fde->cie->lsda_encoding);
3185 return false;
3186 }
3187
3188 fde->lsda_address =
3189 reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size);
3190 if (size > data_size)
3191 return ReportIncomplete(fde);
3192 // Ideally, we would also complain here if there were unconsumed
3193 // augmentation data.
3194 }
3195
3196 cursor += data_size;
3197 }
3198
3199 // The FDE's instructions start after those.
3200 fde->instructions = cursor;
3201
3202 return true;
3203 }
3204
Start()3205 bool CallFrameInfo::Start() {
3206 const uint8_t* buffer_end = buffer_ + buffer_length_;
3207 const uint8_t* cursor;
3208 bool all_ok = true;
3209 const uint8_t* entry_end;
3210 bool ok;
3211
3212 // Traverse all the entries in buffer_, skipping CIEs and offering
3213 // FDEs to the handler.
3214 for (cursor = buffer_; cursor < buffer_end;
3215 cursor = entry_end, all_ok = all_ok && ok) {
3216 FDE fde;
3217
3218 // Make it easy to skip this entry with 'continue': assume that
3219 // things are not okay until we've checked all the data, and
3220 // prepare the address of the next entry.
3221 ok = false;
3222
3223 // Read the entry's prologue.
3224 if (!ReadEntryPrologue(cursor, &fde)) {
3225 if (!fde.end) {
3226 // If we couldn't even figure out this entry's extent, then we
3227 // must stop processing entries altogether.
3228 all_ok = false;
3229 break;
3230 }
3231 entry_end = fde.end;
3232 continue;
3233 }
3234
3235 // The next iteration picks up after this entry.
3236 entry_end = fde.end;
3237
3238 // Did we see an .eh_frame terminating mark?
3239 if (fde.kind == kTerminator) {
3240 // If there appears to be more data left in the section after the
3241 // terminating mark, warn the user. But this is just a warning;
3242 // we leave all_ok true.
3243 if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset);
3244 break;
3245 }
3246
3247 // In this loop, we skip CIEs. We only parse them fully when we
3248 // parse an FDE that refers to them. This limits our memory
3249 // consumption (beyond the buffer itself) to that needed to
3250 // process the largest single entry.
3251 if (fde.kind != kFDE) {
3252 ok = true;
3253 continue;
3254 }
3255
3256 // Validate the CIE pointer.
3257 if (fde.id > buffer_length_) {
3258 reporter_->CIEPointerOutOfRange(fde.offset, fde.id);
3259 continue;
3260 }
3261
3262 CIE cie;
3263
3264 // Parse this FDE's CIE header.
3265 if (!ReadEntryPrologue(buffer_ + fde.id, &cie))
3266 continue;
3267 // This had better be an actual CIE.
3268 if (cie.kind != kCIE) {
3269 reporter_->BadCIEId(fde.offset, fde.id);
3270 continue;
3271 }
3272 if (!ReadCIEFields(&cie))
3273 continue;
3274
3275 // TODO(nbilling): This could lead to strange behavior if a single buffer
3276 // contained a mixture of DWARF versions as well as address sizes. Not
3277 // sure if it's worth handling such a case.
3278
3279 // DWARF4 CIE specifies address_size, so use it for this call frame.
3280 if (cie.version >= 4) {
3281 reader_->SetAddressSize(cie.address_size);
3282 }
3283
3284 // We now have the values that govern both the CIE and the FDE.
3285 cie.cie = &cie;
3286 fde.cie = &cie;
3287
3288 // Parse the FDE's header.
3289 if (!ReadFDEFields(&fde))
3290 continue;
3291
3292 // Call Entry to ask the consumer if they're interested.
3293 if (!handler_->Entry(fde.offset, fde.address, fde.size,
3294 cie.version, cie.augmentation,
3295 cie.return_address_register)) {
3296 // The handler isn't interested in this entry. That's not an error.
3297 ok = true;
3298 continue;
3299 }
3300
3301 if (cie.has_z_augmentation) {
3302 // Report the personality routine address, if we have one.
3303 if (cie.has_z_personality) {
3304 if (!handler_
3305 ->PersonalityRoutine(cie.personality_address,
3306 IsIndirectEncoding(cie.personality_encoding)))
3307 continue;
3308 }
3309
3310 // Report the language-specific data area address, if we have one.
3311 if (cie.has_z_lsda) {
3312 if (!handler_
3313 ->LanguageSpecificDataArea(fde.lsda_address,
3314 IsIndirectEncoding(cie.lsda_encoding)))
3315 continue;
3316 }
3317
3318 // If this is a signal-handling frame, report that.
3319 if (cie.has_z_signal_frame) {
3320 if (!handler_->SignalHandler())
3321 continue;
3322 }
3323 }
3324
3325 // Interpret the CIE's instructions, and then the FDE's instructions.
3326 State state(reader_, handler_, reporter_, fde.address);
3327 ok = state.InterpretCIE(cie) && state.InterpretFDE(fde);
3328
3329 // Tell the ByteReader that the function start address from the
3330 // FDE header is no longer valid.
3331 reader_->ClearFunctionBase();
3332
3333 // Report the end of the entry.
3334 handler_->End();
3335 }
3336
3337 return all_ok;
3338 }
3339
KindName(EntryKind kind)3340 const char* CallFrameInfo::KindName(EntryKind kind) {
3341 if (kind == CallFrameInfo::kUnknown)
3342 return "entry";
3343 else if (kind == CallFrameInfo::kCIE)
3344 return "common information entry";
3345 else if (kind == CallFrameInfo::kFDE)
3346 return "frame description entry";
3347 else {
3348 assert (kind == CallFrameInfo::kTerminator);
3349 return ".eh_frame sequence terminator";
3350 }
3351 }
3352
ReportIncomplete(Entry * entry)3353 bool CallFrameInfo::ReportIncomplete(Entry* entry) {
3354 reporter_->Incomplete(entry->offset, entry->kind);
3355 return false;
3356 }
3357
Incomplete(uint64_t offset,CallFrameInfo::EntryKind kind)3358 void CallFrameInfo::Reporter::Incomplete(uint64_t offset,
3359 CallFrameInfo::EntryKind kind) {
3360 fprintf(stderr,
3361 "%s: CFI %s at offset 0x%" PRIx64 " in '%s': entry ends early\n",
3362 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
3363 section_.c_str());
3364 }
3365
EarlyEHTerminator(uint64_t offset)3366 void CallFrameInfo::Reporter::EarlyEHTerminator(uint64_t offset) {
3367 fprintf(stderr,
3368 "%s: CFI at offset 0x%" PRIx64 " in '%s': saw end-of-data marker"
3369 " before end of section contents\n",
3370 filename_.c_str(), offset, section_.c_str());
3371 }
3372
CIEPointerOutOfRange(uint64_t offset,uint64_t cie_offset)3373 void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64_t offset,
3374 uint64_t cie_offset) {
3375 fprintf(stderr,
3376 "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
3377 " CIE pointer is out of range: 0x%" PRIx64 "\n",
3378 filename_.c_str(), offset, section_.c_str(), cie_offset);
3379 }
3380
BadCIEId(uint64_t offset,uint64_t cie_offset)3381 void CallFrameInfo::Reporter::BadCIEId(uint64_t offset, uint64_t cie_offset) {
3382 fprintf(stderr,
3383 "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
3384 " CIE pointer does not point to a CIE: 0x%" PRIx64 "\n",
3385 filename_.c_str(), offset, section_.c_str(), cie_offset);
3386 }
3387
UnexpectedAddressSize(uint64_t offset,uint8_t address_size)3388 void CallFrameInfo::Reporter::UnexpectedAddressSize(uint64_t offset,
3389 uint8_t address_size) {
3390 fprintf(stderr,
3391 "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
3392 " CIE specifies unexpected address size: %d\n",
3393 filename_.c_str(), offset, section_.c_str(), address_size);
3394 }
3395
UnexpectedSegmentSize(uint64_t offset,uint8_t segment_size)3396 void CallFrameInfo::Reporter::UnexpectedSegmentSize(uint64_t offset,
3397 uint8_t segment_size) {
3398 fprintf(stderr,
3399 "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
3400 " CIE specifies unexpected segment size: %d\n",
3401 filename_.c_str(), offset, section_.c_str(), segment_size);
3402 }
3403
UnrecognizedVersion(uint64_t offset,int version)3404 void CallFrameInfo::Reporter::UnrecognizedVersion(uint64_t offset, int version) {
3405 fprintf(stderr,
3406 "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
3407 " CIE specifies unrecognized version: %d\n",
3408 filename_.c_str(), offset, section_.c_str(), version);
3409 }
3410
UnrecognizedAugmentation(uint64_t offset,const string & aug)3411 void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64_t offset,
3412 const string& aug) {
3413 fprintf(stderr,
3414 "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
3415 " CIE specifies unrecognized augmentation: '%s'\n",
3416 filename_.c_str(), offset, section_.c_str(), aug.c_str());
3417 }
3418
InvalidPointerEncoding(uint64_t offset,uint8_t encoding)3419 void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64_t offset,
3420 uint8_t encoding) {
3421 fprintf(stderr,
3422 "%s: CFI common information entry at offset 0x%" PRIx64 " in '%s':"
3423 " 'z' augmentation specifies invalid pointer encoding: 0x%02x\n",
3424 filename_.c_str(), offset, section_.c_str(), encoding);
3425 }
3426
UnusablePointerEncoding(uint64_t offset,uint8_t encoding)3427 void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64_t offset,
3428 uint8_t encoding) {
3429 fprintf(stderr,
3430 "%s: CFI common information entry at offset 0x%" PRIx64 " in '%s':"
3431 " 'z' augmentation specifies a pointer encoding for which"
3432 " we have no base address: 0x%02x\n",
3433 filename_.c_str(), offset, section_.c_str(), encoding);
3434 }
3435
RestoreInCIE(uint64_t offset,uint64_t insn_offset)3436 void CallFrameInfo::Reporter::RestoreInCIE(uint64_t offset, uint64_t insn_offset) {
3437 fprintf(stderr,
3438 "%s: CFI common information entry at offset 0x%" PRIx64 " in '%s':"
3439 " the DW_CFA_restore instruction at offset 0x%" PRIx64
3440 " cannot be used in a common information entry\n",
3441 filename_.c_str(), offset, section_.c_str(), insn_offset);
3442 }
3443
BadInstruction(uint64_t offset,CallFrameInfo::EntryKind kind,uint64_t insn_offset)3444 void CallFrameInfo::Reporter::BadInstruction(uint64_t offset,
3445 CallFrameInfo::EntryKind kind,
3446 uint64_t insn_offset) {
3447 fprintf(stderr,
3448 "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
3449 " the instruction at offset 0x%" PRIx64 " is unrecognized\n",
3450 filename_.c_str(), CallFrameInfo::KindName(kind),
3451 offset, section_.c_str(), insn_offset);
3452 }
3453
NoCFARule(uint64_t offset,CallFrameInfo::EntryKind kind,uint64_t insn_offset)3454 void CallFrameInfo::Reporter::NoCFARule(uint64_t offset,
3455 CallFrameInfo::EntryKind kind,
3456 uint64_t insn_offset) {
3457 fprintf(stderr,
3458 "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
3459 " the instruction at offset 0x%" PRIx64 " assumes that a CFA rule has"
3460 " been set, but none has been set\n",
3461 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
3462 section_.c_str(), insn_offset);
3463 }
3464
EmptyStateStack(uint64_t offset,CallFrameInfo::EntryKind kind,uint64_t insn_offset)3465 void CallFrameInfo::Reporter::EmptyStateStack(uint64_t offset,
3466 CallFrameInfo::EntryKind kind,
3467 uint64_t insn_offset) {
3468 fprintf(stderr,
3469 "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
3470 " the DW_CFA_restore_state instruction at offset 0x%" PRIx64
3471 " should pop a saved state from the stack, but the stack is empty\n",
3472 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
3473 section_.c_str(), insn_offset);
3474 }
3475
ClearingCFARule(uint64_t offset,CallFrameInfo::EntryKind kind,uint64_t insn_offset)3476 void CallFrameInfo::Reporter::ClearingCFARule(uint64_t offset,
3477 CallFrameInfo::EntryKind kind,
3478 uint64_t insn_offset) {
3479 fprintf(stderr,
3480 "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
3481 " the DW_CFA_restore_state instruction at offset 0x%" PRIx64
3482 " would clear the CFA rule in effect\n",
3483 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
3484 section_.c_str(), insn_offset);
3485 }
3486
3487 } // namespace google_breakpad
3488