1 // -*- mode: C++ -*- 2 3 // Copyright 2010 Google LLC 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google LLC nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // CFI reader author: Jim Blandy <[email protected]> <[email protected]> 32 33 // This file contains definitions related to the DWARF2/3 reader and 34 // it's handler interfaces. 35 // The DWARF2/3 specification can be found at 36 // http://dwarf.freestandards.org and should be considered required 37 // reading if you wish to modify the implementation. 38 // Only a cursory attempt is made to explain terminology that is 39 // used here, as it is much better explained in the standard documents 40 #ifndef COMMON_DWARF_DWARF2READER_H__ 41 #define COMMON_DWARF_DWARF2READER_H__ 42 43 #include <assert.h> 44 #include <stdint.h> 45 46 #include <list> 47 #include <map> 48 #include <string> 49 #include <utility> 50 #include <vector> 51 #include <memory> 52 53 #include "common/dwarf/bytereader.h" 54 #include "common/dwarf/dwarf2enums.h" 55 #include "common/dwarf/types.h" 56 #include "common/using_std_string.h" 57 #include "common/dwarf/elf_reader.h" 58 59 namespace google_breakpad { 60 struct LineStateMachine; 61 class Dwarf2Handler; 62 class LineInfoHandler; 63 class DwpReader; 64 65 // This maps from a string naming a section to a pair containing a 66 // the data for the section, and the size of the section. 67 typedef std::map<string, std::pair<const uint8_t*, uint64_t> > SectionMap; 68 69 // Abstract away the difference between elf and mach-o section names. 70 // Elf-names use ".section_name, mach-o uses "__section_name". Pass "name" in 71 // the elf form, ".section_name". 72 const SectionMap::const_iterator GetSectionByName(const SectionMap& 73 sections, const char* name); 74 75 // Most of the time, this struct functions as a simple attribute and form pair. 76 // However, Dwarf5 DW_FORM_implicit_const means that a form may have its value 77 // in line in the abbrev table, and that value must be associated with the 78 // pair until the attr's value is needed. 79 struct AttrForm { AttrFormAttrForm80 AttrForm(enum DwarfAttribute attr, enum DwarfForm form, uint64_t value) : 81 attr_(attr), form_(form), value_(value) { } 82 83 enum DwarfAttribute attr_; 84 enum DwarfForm form_; 85 uint64_t value_; 86 }; 87 typedef std::list<AttrForm> AttributeList; 88 typedef AttributeList::iterator AttributeIterator; 89 typedef AttributeList::const_iterator ConstAttributeIterator; 90 91 struct LineInfoHeader { 92 uint64_t total_length; 93 uint16_t version; 94 uint64_t prologue_length; 95 uint8_t min_insn_length; // insn stands for instructin 96 bool default_is_stmt; // stmt stands for statement 97 int8_t line_base; 98 uint8_t line_range; 99 uint8_t opcode_base; 100 // Use a pointer so that signalsafe_addr2line is able to use this structure 101 // without heap allocation problem. 102 std::vector<unsigned char>* std_opcode_lengths; 103 }; 104 105 class LineInfo { 106 public: 107 108 // Initializes a .debug_line reader. Buffer and buffer length point 109 // to the beginning and length of the line information to read. 110 // Reader is a ByteReader class that has the endianness set 111 // properly. 112 LineInfo(const uint8_t* buffer, uint64_t buffer_length, 113 ByteReader* reader, const uint8_t* string_buffer, 114 size_t string_buffer_length, const uint8_t* line_string_buffer, 115 size_t line_string_buffer_length, LineInfoHandler* handler); 116 ~LineInfo()117 virtual ~LineInfo() { 118 if (header_.std_opcode_lengths) { 119 delete header_.std_opcode_lengths; 120 } 121 } 122 123 // Start processing line info, and calling callbacks in the handler. 124 // Consumes the line number information for a single compilation unit. 125 // Returns the number of bytes processed. 126 uint64_t Start(); 127 128 // Process a single line info opcode at START using the state 129 // machine at LSM. Return true if we should define a line using the 130 // current state of the line state machine. Place the length of the 131 // opcode in LEN. 132 // If LSM_PASSES_PC is non-NULL, this function also checks if the lsm 133 // passes the address of PC. In other words, LSM_PASSES_PC will be 134 // set to true, if the following condition is met. 135 // 136 // lsm's old address < PC <= lsm's new address 137 static bool ProcessOneOpcode(ByteReader* reader, 138 LineInfoHandler* handler, 139 const struct LineInfoHeader& header, 140 const uint8_t* start, 141 struct LineStateMachine* lsm, 142 size_t* len, 143 uintptr pc, 144 bool* lsm_passes_pc); 145 146 private: 147 // Reads the DWARF2/3 header for this line info. 148 void ReadHeader(); 149 150 // Reads the DWARF2/3 line information 151 void ReadLines(); 152 153 // Read the DWARF5 types and forms for the file and directory tables. 154 void ReadTypesAndForms(const uint8_t** lineptr, uint32_t* content_types, 155 uint32_t* content_forms, uint32_t max_types, 156 uint32_t* format_count); 157 158 // Read a row from the dwarf5 LineInfo file table. 159 void ReadFileRow(const uint8_t** lineptr, const uint32_t* content_types, 160 const uint32_t* content_forms, uint32_t row, 161 uint32_t format_count); 162 163 // Read and return the data at *lineptr according to form. Advance 164 // *lineptr appropriately. 165 uint64_t ReadUnsignedData(uint32_t form, const uint8_t** lineptr); 166 167 // Read and return the data at *lineptr according to form. Advance 168 // *lineptr appropriately. 169 const char* ReadStringForm(uint32_t form, const uint8_t** lineptr); 170 171 // The associated handler to call processing functions in 172 LineInfoHandler* handler_; 173 174 // The associated ByteReader that handles endianness issues for us 175 ByteReader* reader_; 176 177 // A DWARF line info header. This is not the same size as in the actual file, 178 // as the one in the file may have a 32 bit or 64 bit lengths 179 180 struct LineInfoHeader header_; 181 182 // buffer is the buffer for our line info, starting at exactly where 183 // the line info to read is. after_header is the place right after 184 // the end of the line information header. 185 const uint8_t* buffer_; 186 #ifndef NDEBUG 187 uint64_t buffer_length_; 188 #endif 189 // Convenience pointers into .debug_str and .debug_line_str. These exactly 190 // correspond to those in the compilation unit. 191 const uint8_t* string_buffer_; 192 #ifndef NDEBUG 193 uint64_t string_buffer_length_; 194 #endif 195 const uint8_t* line_string_buffer_; 196 #ifndef NDEBUG 197 uint64_t line_string_buffer_length_; 198 #endif 199 200 const uint8_t* after_header_; 201 }; 202 203 // This class is the main interface between the line info reader and 204 // the client. The virtual functions inside this get called for 205 // interesting events that happen during line info reading. The 206 // default implementation does nothing 207 208 class LineInfoHandler { 209 public: LineInfoHandler()210 LineInfoHandler() { } 211 ~LineInfoHandler()212 virtual ~LineInfoHandler() { } 213 214 // Called when we define a directory. NAME is the directory name, 215 // DIR_NUM is the directory number DefineDir(const string & name,uint32_t dir_num)216 virtual void DefineDir(const string& name, uint32_t dir_num) { } 217 218 // Called when we define a filename. NAME is the filename, FILE_NUM 219 // is the file number which is -1 if the file index is the next 220 // index after the last numbered index (this happens when files are 221 // dynamically defined by the line program), DIR_NUM is the 222 // directory index for the directory name of this file, MOD_TIME is 223 // the modification time of the file, and LENGTH is the length of 224 // the file DefineFile(const string & name,int32_t file_num,uint32_t dir_num,uint64_t mod_time,uint64_t length)225 virtual void DefineFile(const string& name, int32_t file_num, 226 uint32_t dir_num, uint64_t mod_time, 227 uint64_t length) { } 228 229 // Called when the line info reader has a new line, address pair 230 // ready for us. ADDRESS is the address of the code, LENGTH is the 231 // length of its machine code in bytes, FILE_NUM is the file number 232 // containing the code, LINE_NUM is the line number in that file for 233 // the code, and COLUMN_NUM is the column number the code starts at, 234 // if we know it (0 otherwise). AddLine(uint64_t address,uint64_t length,uint32_t file_num,uint32_t line_num,uint32_t column_num)235 virtual void AddLine(uint64_t address, uint64_t length, 236 uint32_t file_num, uint32_t line_num, uint32_t column_num) { } 237 }; 238 239 class RangeListHandler { 240 public: RangeListHandler()241 RangeListHandler() { } 242 ~RangeListHandler()243 virtual ~RangeListHandler() { } 244 245 // Add a range. AddRange(uint64_t begin,uint64_t end)246 virtual void AddRange(uint64_t begin, uint64_t end) { }; 247 248 // Finish processing the range list. Finish()249 virtual void Finish() { }; 250 }; 251 252 class RangeListReader { 253 public: 254 // Reading a range list requires quite a bit of information 255 // from the compilation unit. Package it conveniently. 256 struct CURangesInfo { CURangesInfoCURangesInfo257 CURangesInfo() : 258 version_(0), base_address_(0), ranges_base_(0), 259 buffer_(nullptr), size_(0), addr_buffer_(nullptr), 260 addr_buffer_size_(0), addr_base_(0) { } 261 262 uint16_t version_; 263 // Ranges base address. Ordinarily the CU's low_pc. 264 uint64_t base_address_; 265 // Offset into .debug_rnglists for this CU's rangelists. 266 uint64_t ranges_base_; 267 // Contents of either .debug_ranges or .debug_rnglists. 268 const uint8_t* buffer_; 269 uint64_t size_; 270 // Contents of .debug_addr. This cu's contribution starts at 271 // addr_base_ 272 const uint8_t* addr_buffer_; 273 uint64_t addr_buffer_size_; 274 uint64_t addr_base_; 275 }; 276 RangeListReader(ByteReader * reader,CURangesInfo * cu_info,RangeListHandler * handler)277 RangeListReader(ByteReader* reader, CURangesInfo* cu_info, 278 RangeListHandler* handler) : 279 reader_(reader), cu_info_(cu_info), handler_(handler), 280 offset_array_(0) { } 281 282 // Read ranges from cu_info as specified by form and data. 283 bool ReadRanges(enum DwarfForm form, uint64_t data); 284 285 private: 286 // Read dwarf4 .debug_ranges at offset. 287 bool ReadDebugRanges(uint64_t offset); 288 // Read dwarf5 .debug_rngslist at offset. 289 bool ReadDebugRngList(uint64_t offset); 290 291 // Convenience functions to handle the mechanics of reading entries in the 292 // ranges section. ReadULEB(uint64_t offset,uint64_t * value)293 uint64_t ReadULEB(uint64_t offset, uint64_t* value) { 294 size_t len; 295 *value = reader_->ReadUnsignedLEB128(cu_info_->buffer_ + offset, &len); 296 return len; 297 } 298 ReadAddress(uint64_t offset,uint64_t * value)299 uint64_t ReadAddress(uint64_t offset, uint64_t* value) { 300 *value = reader_->ReadAddress(cu_info_->buffer_ + offset); 301 return reader_->AddressSize(); 302 } 303 304 // Read the address at this CU's addr_index in the .debug_addr section. GetAddressAtIndex(uint64_t addr_index)305 uint64_t GetAddressAtIndex(uint64_t addr_index) { 306 assert(cu_info_->addr_buffer_ != nullptr); 307 uint64_t offset = 308 cu_info_->addr_base_ + addr_index * reader_->AddressSize(); 309 assert(offset < cu_info_->addr_buffer_size_); 310 return reader_->ReadAddress(cu_info_->addr_buffer_ + offset); 311 } 312 313 ByteReader* reader_; 314 CURangesInfo* cu_info_; 315 RangeListHandler* handler_; 316 uint64_t offset_array_; 317 }; 318 319 // This class is the main interface between the reader and the 320 // client. The virtual functions inside this get called for 321 // interesting events that happen during DWARF2 reading. 322 // The default implementation skips everything. 323 class Dwarf2Handler { 324 public: Dwarf2Handler()325 Dwarf2Handler() { } 326 ~Dwarf2Handler()327 virtual ~Dwarf2Handler() { } 328 329 // Start to process a compilation unit at OFFSET from the beginning of the 330 // .debug_info section. Return false if you would like to skip this 331 // compilation unit. StartCompilationUnit(uint64_t offset,uint8_t address_size,uint8_t offset_size,uint64_t cu_length,uint8_t dwarf_version)332 virtual bool StartCompilationUnit(uint64_t offset, uint8_t address_size, 333 uint8_t offset_size, uint64_t cu_length, 334 uint8_t dwarf_version) { return false; } 335 336 // When processing a skeleton compilation unit, resulting from a split 337 // DWARF compilation, once the skeleton debug info has been read, 338 // the reader will call this function to ask the client if it needs 339 // the full debug info from the .dwo or .dwp file. Return true if 340 // you need it, or false to skip processing the split debug info. NeedSplitDebugInfo()341 virtual bool NeedSplitDebugInfo() { return true; } 342 343 // Start to process a split compilation unit at OFFSET from the beginning of 344 // the debug_info section in the .dwp/.dwo file. Return false if you would 345 // like to skip this compilation unit. StartSplitCompilationUnit(uint64_t offset,uint64_t cu_length)346 virtual bool StartSplitCompilationUnit(uint64_t offset, 347 uint64_t cu_length) { return false; } 348 349 // Start to process a DIE at OFFSET from the beginning of the .debug_info 350 // section. Return false if you would like to skip this DIE. StartDIE(uint64_t offset,enum DwarfTag tag)351 virtual bool StartDIE(uint64_t offset, enum DwarfTag tag) { return false; } 352 353 // Called when we have an attribute with unsigned data to give to our 354 // handler. The attribute is for the DIE at OFFSET from the beginning of the 355 // .debug_info section. Its name is ATTR, its form is FORM, and its value is 356 // DATA. ProcessAttributeUnsigned(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,uint64_t data)357 virtual void ProcessAttributeUnsigned(uint64_t offset, 358 enum DwarfAttribute attr, 359 enum DwarfForm form, 360 uint64_t data) { } 361 362 // Called when we have an attribute with signed data to give to our handler. 363 // The attribute is for the DIE at OFFSET from the beginning of the 364 // .debug_info section. Its name is ATTR, its form is FORM, and its value is 365 // DATA. ProcessAttributeSigned(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,int64_t data)366 virtual void ProcessAttributeSigned(uint64_t offset, 367 enum DwarfAttribute attr, 368 enum DwarfForm form, 369 int64_t data) { } 370 371 // Called when we have an attribute whose value is a reference to 372 // another DIE. The attribute belongs to the DIE at OFFSET from the 373 // beginning of the .debug_info section. Its name is ATTR, its form 374 // is FORM, and the offset of the DIE being referred to from the 375 // beginning of the .debug_info section is DATA. ProcessAttributeReference(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,uint64_t data)376 virtual void ProcessAttributeReference(uint64_t offset, 377 enum DwarfAttribute attr, 378 enum DwarfForm form, 379 uint64_t data) { } 380 381 // Called when we have an attribute with a buffer of data to give to our 382 // handler. The attribute is for the DIE at OFFSET from the beginning of the 383 // .debug_info section. Its name is ATTR, its form is FORM, DATA points to 384 // the buffer's contents, and its length in bytes is LENGTH. The buffer is 385 // owned by the caller, not the callee, and may not persist for very long. 386 // If you want the data to be available later, it needs to be copied. ProcessAttributeBuffer(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,const uint8_t * data,uint64_t len)387 virtual void ProcessAttributeBuffer(uint64_t offset, 388 enum DwarfAttribute attr, 389 enum DwarfForm form, 390 const uint8_t* data, 391 uint64_t len) { } 392 393 // Called when we have an attribute with string data to give to our handler. 394 // The attribute is for the DIE at OFFSET from the beginning of the 395 // .debug_info section. Its name is ATTR, its form is FORM, and its value is 396 // DATA. ProcessAttributeString(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,const string & data)397 virtual void ProcessAttributeString(uint64_t offset, 398 enum DwarfAttribute attr, 399 enum DwarfForm form, 400 const string& data) { } 401 402 // Called when we have an attribute whose value is the 64-bit signature 403 // of a type unit in the .debug_types section. OFFSET is the offset of 404 // the DIE whose attribute we're reporting. ATTR and FORM are the 405 // attribute's name and form. SIGNATURE is the type unit's signature. ProcessAttributeSignature(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,uint64_t signature)406 virtual void ProcessAttributeSignature(uint64_t offset, 407 enum DwarfAttribute attr, 408 enum DwarfForm form, 409 uint64_t signature) { } 410 411 // Called when finished processing the DIE at OFFSET. 412 // Because DWARF2/3 specifies a tree of DIEs, you may get starts 413 // before ends of the previous DIE, as we process children before 414 // ending the parent. EndDIE(uint64_t offset)415 virtual void EndDIE(uint64_t offset) { } 416 417 }; 418 419 // The base of DWARF2/3 debug info is a DIE (Debugging Information 420 // Entry. 421 // DWARF groups DIE's into a tree and calls the root of this tree a 422 // "compilation unit". Most of the time, there is one compilation 423 // unit in the .debug_info section for each file that had debug info 424 // generated. 425 // Each DIE consists of 426 427 // 1. a tag specifying a thing that is being described (ie 428 // DW_TAG_subprogram for functions, DW_TAG_variable for variables, etc 429 // 2. attributes (such as DW_AT_location for location in memory, 430 // DW_AT_name for name), and data for each attribute. 431 // 3. A flag saying whether the DIE has children or not 432 433 // In order to gain some amount of compression, the format of 434 // each DIE (tag name, attributes and data forms for the attributes) 435 // are stored in a separate table called the "abbreviation table". 436 // This is done because a large number of DIEs have the exact same tag 437 // and list of attributes, but different data for those attributes. 438 // As a result, the .debug_info section is just a stream of data, and 439 // requires reading of the .debug_abbrev section to say what the data 440 // means. 441 442 // As a warning to the user, it should be noted that the reason for 443 // using absolute offsets from the beginning of .debug_info is that 444 // DWARF2/3 supports referencing DIE's from other DIE's by their offset 445 // from either the current compilation unit start, *or* the beginning 446 // of the .debug_info section. This means it is possible to reference 447 // a DIE in one compilation unit from a DIE in another compilation 448 // unit. This style of reference is usually used to eliminate 449 // duplicated information that occurs across compilation 450 // units, such as base types, etc. GCC 3.4+ support this with 451 // -feliminate-dwarf2-dups. Other toolchains will sometimes do 452 // duplicate elimination in the linker. 453 454 class CompilationUnit { 455 public: 456 457 // Initialize a compilation unit. This requires a map of sections, 458 // the offset of this compilation unit in the .debug_info section, a 459 // ByteReader, and a Dwarf2Handler class to call callbacks in. 460 CompilationUnit(const string& path, const SectionMap& sections, 461 uint64_t offset, ByteReader* reader, Dwarf2Handler* handler); ~CompilationUnit()462 virtual ~CompilationUnit() { 463 if (abbrevs_) delete abbrevs_; 464 } 465 466 // Initialize a compilation unit from a .dwo or .dwp file. 467 // In this case, we need the .debug_addr section from the 468 // executable file that contains the corresponding skeleton 469 // compilation unit. We also inherit the Dwarf2Handler from 470 // the executable file, and call it as if we were still 471 // processing the original compilation unit. 472 void SetSplitDwarf(uint64_t addr_base, uint64_t dwo_id); 473 474 // Begin reading a Dwarf2 compilation unit, and calling the 475 // callbacks in the Dwarf2Handler 476 477 // Return the full length of the compilation unit, including 478 // headers. This plus the starting offset passed to the constructor 479 // is the offset of the end of the compilation unit --- and the 480 // start of the next compilation unit, if there is one. 481 uint64_t Start(); 482 483 // Process the actual debug information in a split DWARF file. 484 bool ProcessSplitDwarf(std::string& split_file, 485 SectionMap& sections, 486 ByteReader& split_byte_reader, 487 uint64_t& cu_offset); 488 GetAddrBuffer()489 const uint8_t* GetAddrBuffer() { return addr_buffer_; } 490 GetAddrBufferLen()491 uint64_t GetAddrBufferLen() { return addr_buffer_length_; } 492 GetAddrBase()493 uint64_t GetAddrBase() { return addr_base_; } 494 GetLowPC()495 uint64_t GetLowPC() { return low_pc_; } 496 GetDWOID()497 uint64_t GetDWOID() { return dwo_id_; } 498 GetLineBuffer()499 const uint8_t* GetLineBuffer() { return line_buffer_; } 500 GetLineBufferLen()501 uint64_t GetLineBufferLen() { return line_buffer_length_; } 502 GetLineStrBuffer()503 const uint8_t* GetLineStrBuffer() { return line_string_buffer_; } 504 GetLineStrBufferLen()505 uint64_t GetLineStrBufferLen() { return line_string_buffer_length_; } 506 HasSourceLineInfo()507 bool HasSourceLineInfo() { return has_source_line_info_; } 508 GetSourceLineOffset()509 uint64_t GetSourceLineOffset() { return source_line_offset_; } 510 ShouldProcessSplitDwarf()511 bool ShouldProcessSplitDwarf() { return should_process_split_dwarf_; } 512 513 private: 514 515 // This struct represents a single DWARF2/3 abbreviation 516 // The abbreviation tells how to read a DWARF2/3 DIE, and consist of a 517 // tag and a list of attributes, as well as the data form of each attribute. 518 struct Abbrev { 519 uint64_t number; 520 enum DwarfTag tag; 521 bool has_children; 522 AttributeList attributes; 523 }; 524 525 // A DWARF2/3 compilation unit header. This is not the same size as 526 // in the actual file, as the one in the file may have a 32 bit or 527 // 64 bit length. 528 struct CompilationUnitHeader { 529 uint64_t length; 530 uint16_t version; 531 uint64_t abbrev_offset; 532 uint8_t address_size; 533 } header_; 534 535 // Reads the DWARF2/3 header for this compilation unit. 536 void ReadHeader(); 537 538 // Reads the DWARF2/3 abbreviations for this compilation unit 539 void ReadAbbrevs(); 540 541 // Read the abbreviation offset for this compilation unit 542 size_t ReadAbbrevOffset(const uint8_t* headerptr); 543 544 // Read the address size for this compilation unit 545 size_t ReadAddressSize(const uint8_t* headerptr); 546 547 // Read the DWO id from a split or skeleton compilation unit header 548 size_t ReadDwoId(const uint8_t* headerptr); 549 550 // Read the type signature from a type or split type compilation unit header 551 size_t ReadTypeSignature(const uint8_t* headerptr); 552 553 // Read the DWO id from a split or skeleton compilation unit header 554 size_t ReadTypeOffset(const uint8_t* headerptr); 555 556 // Processes a single DIE for this compilation unit and return a new 557 // pointer just past the end of it 558 const uint8_t* ProcessDIE(uint64_t dieoffset, 559 const uint8_t* start, 560 const Abbrev& abbrev); 561 562 // Processes a single attribute and return a new pointer just past the 563 // end of it 564 const uint8_t* ProcessAttribute(uint64_t dieoffset, 565 const uint8_t* start, 566 enum DwarfAttribute attr, 567 enum DwarfForm form, 568 uint64_t implicit_const); 569 570 // Special version of ProcessAttribute, for finding str_offsets_base and 571 // DW_AT_addr_base in DW_TAG_compile_unit, for DWARF v5. 572 const uint8_t* ProcessOffsetBaseAttribute(uint64_t dieoffset, 573 const uint8_t* start, 574 enum DwarfAttribute attr, 575 enum DwarfForm form, 576 uint64_t implicit_const); 577 578 // Called when we have an attribute with unsigned data to give to 579 // our handler. The attribute is for the DIE at OFFSET from the 580 // beginning of compilation unit, has a name of ATTR, a form of 581 // FORM, and the actual data of the attribute is in DATA. 582 // If we see a DW_AT_GNU_dwo_id attribute, save the value so that 583 // we can find the debug info in a .dwo or .dwp file. ProcessAttributeUnsigned(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,uint64_t data)584 void ProcessAttributeUnsigned(uint64_t offset, 585 enum DwarfAttribute attr, 586 enum DwarfForm form, 587 uint64_t data) { 588 if (attr == DW_AT_GNU_dwo_id) { 589 dwo_id_ = data; 590 } 591 else if (attr == DW_AT_GNU_addr_base || attr == DW_AT_addr_base) { 592 addr_base_ = data; 593 } 594 else if (attr == DW_AT_str_offsets_base) { 595 str_offsets_base_ = data; 596 } 597 else if (attr == DW_AT_low_pc) { 598 low_pc_ = data; 599 } 600 else if (attr == DW_AT_stmt_list) { 601 has_source_line_info_ = true; 602 source_line_offset_ = data; 603 } 604 handler_->ProcessAttributeUnsigned(offset, attr, form, data); 605 } 606 607 // Called when we have an attribute with signed data to give to 608 // our handler. The attribute is for the DIE at OFFSET from the 609 // beginning of compilation unit, has a name of ATTR, a form of 610 // FORM, and the actual data of the attribute is in DATA. ProcessAttributeSigned(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,int64_t data)611 void ProcessAttributeSigned(uint64_t offset, 612 enum DwarfAttribute attr, 613 enum DwarfForm form, 614 int64_t data) { 615 handler_->ProcessAttributeSigned(offset, attr, form, data); 616 } 617 618 // Called when we have an attribute with a buffer of data to give to 619 // our handler. The attribute is for the DIE at OFFSET from the 620 // beginning of compilation unit, has a name of ATTR, a form of 621 // FORM, and the actual data of the attribute is in DATA, and the 622 // length of the buffer is LENGTH. ProcessAttributeBuffer(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,const uint8_t * data,uint64_t len)623 void ProcessAttributeBuffer(uint64_t offset, 624 enum DwarfAttribute attr, 625 enum DwarfForm form, 626 const uint8_t* data, 627 uint64_t len) { 628 handler_->ProcessAttributeBuffer(offset, attr, form, data, len); 629 } 630 631 // Handles the common parts of DW_FORM_GNU_str_index, DW_FORM_strx, 632 // DW_FORM_strx1, DW_FORM_strx2, DW_FORM_strx3, and DW_FORM_strx4. 633 // Retrieves the data and calls through to ProcessAttributeString. 634 void ProcessFormStringIndex(uint64_t offset, 635 enum DwarfAttribute attr, 636 enum DwarfForm form, 637 uint64_t str_index); 638 639 // Called when we have an attribute with string data to give to 640 // our handler. The attribute is for the DIE at OFFSET from the 641 // beginning of compilation unit, has a name of ATTR, a form of 642 // FORM, and the actual data of the attribute is in DATA. 643 // If we see a DW_AT_GNU_dwo_name attribute, save the value so 644 // that we can find the debug info in a .dwo or .dwp file. ProcessAttributeString(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,const char * data)645 void ProcessAttributeString(uint64_t offset, 646 enum DwarfAttribute attr, 647 enum DwarfForm form, 648 const char* data) { 649 if (attr == DW_AT_GNU_dwo_name || attr == DW_AT_dwo_name) 650 dwo_name_ = data; 651 handler_->ProcessAttributeString(offset, attr, form, data); 652 } 653 654 // Called to handle common portions of DW_FORM_addrx and variations, as well 655 // as DW_FORM_GNU_addr_index. ProcessAttributeAddrIndex(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,uint64_t addr_index)656 void ProcessAttributeAddrIndex(uint64_t offset, 657 enum DwarfAttribute attr, 658 enum DwarfForm form, 659 uint64_t addr_index) { 660 const uint8_t* addr_ptr = 661 addr_buffer_ + addr_base_ + addr_index * reader_->AddressSize(); 662 ProcessAttributeUnsigned( 663 offset, attr, form, reader_->ReadAddress(addr_ptr)); 664 } 665 666 // Processes all DIEs for this compilation unit 667 void ProcessDIEs(); 668 669 // Skips the die with attributes specified in ABBREV starting at 670 // START, and return the new place to position the stream to. 671 const uint8_t* SkipDIE(const uint8_t* start, const Abbrev& abbrev); 672 673 // Skips the attribute starting at START, with FORM, and return the 674 // new place to position the stream to. 675 const uint8_t* SkipAttribute(const uint8_t* start, enum DwarfForm form); 676 677 // Read the debug sections from a .dwo file. 678 void ReadDebugSectionsFromDwo(ElfReader* elf_reader, 679 SectionMap* sections); 680 681 // Path of the file containing the debug information. 682 const string path_; 683 684 // Offset from section start is the offset of this compilation unit 685 // from the beginning of the .debug_info/.debug_info.dwo section. 686 uint64_t offset_from_section_start_; 687 688 // buffer is the buffer for our CU, starting at .debug_info + offset 689 // passed in from constructor. 690 // after_header points to right after the compilation unit header. 691 const uint8_t* buffer_; 692 uint64_t buffer_length_; 693 const uint8_t* after_header_; 694 695 // The associated ByteReader that handles endianness issues for us 696 ByteReader* reader_; 697 698 // The map of sections in our file to buffers containing their data 699 const SectionMap& sections_; 700 701 // The associated handler to call processing functions in 702 Dwarf2Handler* handler_; 703 704 // Set of DWARF2/3 abbreviations for this compilation unit. Indexed 705 // by abbreviation number, which means that abbrevs_[0] is not 706 // valid. 707 std::vector<Abbrev>* abbrevs_; 708 709 // String section buffer and length, if we have a string section. 710 // This is here to avoid doing a section lookup for strings in 711 // ProcessAttribute, which is in the hot path for DWARF2 reading. 712 const uint8_t* string_buffer_; 713 uint64_t string_buffer_length_; 714 715 // Similarly for .debug_line_str. 716 const uint8_t* line_string_buffer_; 717 uint64_t line_string_buffer_length_; 718 719 // String offsets section buffer and length, if we have a string offsets 720 // section (.debug_str_offsets or .debug_str_offsets.dwo). 721 const uint8_t* str_offsets_buffer_; 722 uint64_t str_offsets_buffer_length_; 723 724 // Address section buffer and length, if we have an address section 725 // (.debug_addr). 726 const uint8_t* addr_buffer_; 727 uint64_t addr_buffer_length_; 728 729 // .debug_line section buffer and length. 730 const uint8_t* line_buffer_; 731 uint64_t line_buffer_length_; 732 733 // Flag indicating whether this compilation unit is part of a .dwo 734 // or .dwp file. If true, we are reading this unit because a 735 // skeleton compilation unit in an executable file had a 736 // DW_AT_GNU_dwo_name or DW_AT_GNU_dwo_id attribute. 737 // In a .dwo file, we expect the string offsets section to 738 // have a ".dwo" suffix, and we will use the ".debug_addr" section 739 // associated with the skeleton compilation unit. 740 bool is_split_dwarf_; 741 742 // Flag indicating if it's a Type Unit (only applicable to DWARF v5). 743 bool is_type_unit_; 744 745 // The value of the DW_AT_GNU_dwo_id attribute, if any. 746 uint64_t dwo_id_; 747 748 // The value of the DW_AT_GNU_type_signature attribute, if any. 749 uint64_t type_signature_; 750 751 // The value of the DW_AT_GNU_type_offset attribute, if any. 752 size_t type_offset_; 753 754 // The value of the DW_AT_GNU_dwo_name attribute, if any. 755 const char* dwo_name_; 756 757 // If this is a split DWARF CU, the value of the DW_AT_GNU_dwo_id attribute 758 // from the skeleton CU. 759 uint64_t skeleton_dwo_id_; 760 761 // The value of the DW_AT_GNU_addr_base attribute, if any. 762 uint64_t addr_base_; 763 764 // The value of DW_AT_str_offsets_base attribute, if any. 765 uint64_t str_offsets_base_; 766 767 // True if we have already looked for a .dwp file. 768 bool have_checked_for_dwp_; 769 770 // ElfReader for the dwo/dwo file. 771 std::unique_ptr<ElfReader> split_elf_reader_; 772 773 // DWP reader. 774 std::unique_ptr<DwpReader> dwp_reader_; 775 776 bool should_process_split_dwarf_; 777 778 // The value of the DW_AT_low_pc attribute, if any. 779 uint64_t low_pc_; 780 781 // The value of DW_AT_stmt_list attribute if any. 782 bool has_source_line_info_; 783 uint64_t source_line_offset_; 784 }; 785 786 // A Reader for a .dwp file. Supports the fetching of DWARF debug 787 // info for a given dwo_id. 788 // 789 // There are two versions of .dwp files. In both versions, the 790 // .dwp file is an ELF file containing only debug sections. 791 // In Version 1, the file contains many copies of each debug 792 // section, one for each .dwo file that is packaged in the .dwp 793 // file, and the .debug_cu_index section maps from the dwo_id 794 // to a set of section indexes. In Version 2, the file contains 795 // one of each debug section, and the .debug_cu_index section 796 // maps from the dwo_id to a set of offsets and lengths that 797 // identify each .dwo file's contribution to the larger sections. 798 799 class DwpReader { 800 public: 801 DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader); 802 803 // Read the CU index and initialize data members. 804 void Initialize(); 805 806 // Read the debug sections for the given dwo_id. 807 void ReadDebugSectionsForCU(uint64_t dwo_id, SectionMap* sections); 808 809 private: 810 // Search a v1 hash table for "dwo_id". Returns the slot index 811 // where the dwo_id was found, or -1 if it was not found. 812 int LookupCU(uint64_t dwo_id); 813 814 // Search a v2 hash table for "dwo_id". Returns the row index 815 // in the offsets and sizes tables, or 0 if it was not found. 816 uint32_t LookupCUv2(uint64_t dwo_id); 817 818 // The ELF reader for the .dwp file. 819 ElfReader* elf_reader_; 820 821 // The ByteReader for the .dwp file. 822 const ByteReader& byte_reader_; 823 824 // Pointer to the .debug_cu_index section. 825 const char* cu_index_; 826 827 // Size of the .debug_cu_index section. 828 size_t cu_index_size_; 829 830 // Pointer to the .debug_str.dwo section. 831 const char* string_buffer_; 832 833 // Size of the .debug_str.dwo section. 834 size_t string_buffer_size_; 835 836 // Version of the .dwp file. We support versions 1 and 2 currently. 837 int version_; 838 839 // Number of columns in the section tables (version 2). 840 unsigned int ncolumns_; 841 842 // Number of units in the section tables (version 2). 843 unsigned int nunits_; 844 845 // Number of slots in the hash table. 846 unsigned int nslots_; 847 848 // Pointer to the beginning of the hash table. 849 const char* phash_; 850 851 // Pointer to the beginning of the index table. 852 const char* pindex_; 853 854 // Pointer to the beginning of the section index pool (version 1). 855 const char* shndx_pool_; 856 857 // Pointer to the beginning of the section offset table (version 2). 858 const char* offset_table_; 859 860 // Pointer to the beginning of the section size table (version 2). 861 const char* size_table_; 862 863 // Contents of the sections of interest (version 2). 864 const char* abbrev_data_; 865 size_t abbrev_size_; 866 const char* info_data_; 867 size_t info_size_; 868 const char* str_offsets_data_; 869 size_t str_offsets_size_; 870 const char* rnglist_data_; 871 size_t rnglist_size_; 872 }; 873 874 // This class is a reader for DWARF's Call Frame Information. CFI 875 // describes how to unwind stack frames --- even for functions that do 876 // not follow fixed conventions for saving registers, whose frame size 877 // varies as they execute, etc. 878 // 879 // CFI describes, at each machine instruction, how to compute the 880 // stack frame's base address, how to find the return address, and 881 // where to find the saved values of the caller's registers (if the 882 // callee has stashed them somewhere to free up the registers for its 883 // own use). 884 // 885 // For example, suppose we have a function whose machine code looks 886 // like this (imagine an assembly language that looks like C, for a 887 // machine with 32-bit registers, and a stack that grows towards lower 888 // addresses): 889 // 890 // func: ; entry point; return address at sp 891 // func+0: sp = sp - 16 ; allocate space for stack frame 892 // func+1: sp[12] = r0 ; save r0 at sp+12 893 // ... ; other code, not frame-related 894 // func+10: sp -= 4; *sp = x ; push some x on the stack 895 // ... ; other code, not frame-related 896 // func+20: r0 = sp[16] ; restore saved r0 897 // func+21: sp += 20 ; pop whole stack frame 898 // func+22: pc = *sp; sp += 4 ; pop return address and jump to it 899 // 900 // DWARF CFI is (a very compressed representation of) a table with a 901 // row for each machine instruction address and a column for each 902 // register showing how to restore it, if possible. 903 // 904 // A special column named "CFA", for "Canonical Frame Address", tells how 905 // to compute the base address of the frame; registers' entries may 906 // refer to the CFA in describing where the registers are saved. 907 // 908 // Another special column, named "RA", represents the return address. 909 // 910 // For example, here is a complete (uncompressed) table describing the 911 // function above: 912 // 913 // insn cfa r0 r1 ... ra 914 // ======================================= 915 // func+0: sp cfa[0] 916 // func+1: sp+16 cfa[0] 917 // func+2: sp+16 cfa[-4] cfa[0] 918 // func+11: sp+20 cfa[-4] cfa[0] 919 // func+21: sp+20 cfa[0] 920 // func+22: sp cfa[0] 921 // 922 // Some things to note here: 923 // 924 // - Each row describes the state of affairs *before* executing the 925 // instruction at the given address. Thus, the row for func+0 926 // describes the state before we allocate the stack frame. In the 927 // next row, the formula for computing the CFA has changed, 928 // reflecting that allocation. 929 // 930 // - The other entries are written in terms of the CFA; this allows 931 // them to remain unchanged as the stack pointer gets bumped around. 932 // For example, the rule for recovering the return address (the "ra" 933 // column) remains unchanged throughout the function, even as the 934 // stack pointer takes on three different offsets from the return 935 // address. 936 // 937 // - Although we haven't shown it, most calling conventions designate 938 // "callee-saves" and "caller-saves" registers. The callee must 939 // preserve the values of callee-saves registers; if it uses them, 940 // it must save their original values somewhere, and restore them 941 // before it returns. In contrast, the callee is free to trash 942 // caller-saves registers; if the callee uses these, it will 943 // probably not bother to save them anywhere, and the CFI will 944 // probably mark their values as "unrecoverable". 945 // 946 // (However, since the caller cannot assume the callee was going to 947 // save them, caller-saves registers are probably dead in the caller 948 // anyway, so compilers usually don't generate CFA for caller-saves 949 // registers.) 950 // 951 // - Exactly where the CFA points is a matter of convention that 952 // depends on the architecture and ABI in use. In the example, the 953 // CFA is the value the stack pointer had upon entry to the 954 // function, pointing at the saved return address. But on the x86, 955 // the call frame information generated by GCC follows the 956 // convention that the CFA is the address *after* the saved return 957 // address. 958 // 959 // But by definition, the CFA remains constant throughout the 960 // lifetime of the frame. This makes it a useful value for other 961 // columns to refer to. It is also gives debuggers a useful handle 962 // for identifying a frame. 963 // 964 // If you look at the table above, you'll notice that a given entry is 965 // often the same as the one immediately above it: most instructions 966 // change only one or two aspects of the stack frame, if they affect 967 // it at all. The DWARF format takes advantage of this fact, and 968 // reduces the size of the data by mentioning only the addresses and 969 // columns at which changes take place. So for the above, DWARF CFI 970 // data would only actually mention the following: 971 // 972 // insn cfa r0 r1 ... ra 973 // ======================================= 974 // func+0: sp cfa[0] 975 // func+1: sp+16 976 // func+2: cfa[-4] 977 // func+11: sp+20 978 // func+21: r0 979 // func+22: sp 980 // 981 // In fact, this is the way the parser reports CFI to the consumer: as 982 // a series of statements of the form, "At address X, column Y changed 983 // to Z," and related conventions for describing the initial state. 984 // 985 // Naturally, it would be impractical to have to scan the entire 986 // program's CFI, noting changes as we go, just to recover the 987 // unwinding rules in effect at one particular instruction. To avoid 988 // this, CFI data is grouped into "entries", each of which covers a 989 // specified range of addresses and begins with a complete statement 990 // of the rules for all recoverable registers at that starting 991 // address. Each entry typically covers a single function. 992 // 993 // Thus, to compute the contents of a given row of the table --- that 994 // is, rules for recovering the CFA, RA, and registers at a given 995 // instruction --- the consumer should find the entry that covers that 996 // instruction's address, start with the initial state supplied at the 997 // beginning of the entry, and work forward until it has processed all 998 // the changes up to and including those for the present instruction. 999 // 1000 // There are seven kinds of rules that can appear in an entry of the 1001 // table: 1002 // 1003 // - "undefined": The given register is not preserved by the callee; 1004 // its value cannot be recovered. 1005 // 1006 // - "same value": This register has the same value it did in the callee. 1007 // 1008 // - offset(N): The register is saved at offset N from the CFA. 1009 // 1010 // - val_offset(N): The value the register had in the caller is the 1011 // CFA plus offset N. (This is usually only useful for describing 1012 // the stack pointer.) 1013 // 1014 // - register(R): The register's value was saved in another register R. 1015 // 1016 // - expression(E): Evaluating the DWARF expression E using the 1017 // current frame's registers' values yields the address at which the 1018 // register was saved. 1019 // 1020 // - val_expression(E): Evaluating the DWARF expression E using the 1021 // current frame's registers' values yields the value the register 1022 // had in the caller. 1023 1024 class CallFrameInfo { 1025 public: 1026 // The different kinds of entries one finds in CFI. Used internally, 1027 // and for error reporting. 1028 enum EntryKind { kUnknown, kCIE, kFDE, kTerminator }; 1029 1030 // The handler class to which the parser hands the parsed call frame 1031 // information. Defined below. 1032 class Handler; 1033 1034 // A reporter class, which CallFrameInfo uses to report errors 1035 // encountered while parsing call frame information. Defined below. 1036 class Reporter; 1037 1038 // Create a DWARF CFI parser. BUFFER points to the contents of the 1039 // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes. 1040 // REPORTER is an error reporter the parser should use to report 1041 // problems. READER is a ByteReader instance that has the endianness and 1042 // address size set properly. Report the data we find to HANDLER. 1043 // 1044 // This class can also parse Linux C++ exception handling data, as found 1045 // in '.eh_frame' sections. This data is a variant of DWARF CFI that is 1046 // placed in loadable segments so that it is present in the program's 1047 // address space, and is interpreted by the C++ runtime to search the 1048 // call stack for a handler interested in the exception being thrown, 1049 // actually pop the frames, and find cleanup code to run. 1050 // 1051 // There are two differences between the call frame information described 1052 // in the DWARF standard and the exception handling data Linux places in 1053 // the .eh_frame section: 1054 // 1055 // - Exception handling data uses uses a different format for call frame 1056 // information entry headers. The distinguished CIE id, the way FDEs 1057 // refer to their CIEs, and the way the end of the series of entries is 1058 // determined are all slightly different. 1059 // 1060 // If the constructor's EH_FRAME argument is true, then the 1061 // CallFrameInfo parses the entry headers as Linux C++ exception 1062 // handling data. If EH_FRAME is false or omitted, the CallFrameInfo 1063 // parses standard DWARF call frame information. 1064 // 1065 // - Linux C++ exception handling data uses CIE augmentation strings 1066 // beginning with 'z' to specify the presence of additional data after 1067 // the CIE and FDE headers and special encodings used for addresses in 1068 // frame description entries. 1069 // 1070 // CallFrameInfo can handle 'z' augmentations in either DWARF CFI or 1071 // exception handling data if you have supplied READER with the base 1072 // addresses needed to interpret the pointer encodings that 'z' 1073 // augmentations can specify. See the ByteReader interface for details 1074 // about the base addresses. See the CallFrameInfo::Handler interface 1075 // for details about the additional information one might find in 1076 // 'z'-augmented data. 1077 // 1078 // Thus: 1079 // 1080 // - If you are parsing standard DWARF CFI, as found in a .debug_frame 1081 // section, you should pass false for the EH_FRAME argument, or omit 1082 // it, and you need not worry about providing READER with the 1083 // additional base addresses. 1084 // 1085 // - If you want to parse Linux C++ exception handling data from a 1086 // .eh_frame section, you should pass EH_FRAME as true, and call 1087 // READER's Set*Base member functions before calling our Start method. 1088 // 1089 // - If you want to parse DWARF CFI that uses the 'z' augmentations 1090 // (although I don't think any toolchain ever emits such data), you 1091 // could pass false for EH_FRAME, but call READER's Set*Base members. 1092 // 1093 // The extensions the Linux C++ ABI makes to DWARF for exception 1094 // handling are described here, rather poorly: 1095 // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html 1096 // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html 1097 // 1098 // The mechanics of C++ exception handling, personality routines, 1099 // and language-specific data areas are described here, rather nicely: 1100 // http://www.codesourcery.com/public/cxx-abi/abi-eh.html 1101 CallFrameInfo(const uint8_t* buffer, size_t buffer_length, 1102 ByteReader* reader, Handler* handler, Reporter* reporter, 1103 bool eh_frame = false) buffer_(buffer)1104 : buffer_(buffer), buffer_length_(buffer_length), 1105 reader_(reader), handler_(handler), reporter_(reporter), 1106 eh_frame_(eh_frame) { } 1107 ~CallFrameInfo()1108 ~CallFrameInfo() { } 1109 1110 // Parse the entries in BUFFER, reporting what we find to HANDLER. 1111 // Return true if we reach the end of the section successfully, or 1112 // false if we encounter an error. 1113 bool Start(); 1114 1115 // Return the textual name of KIND. For error reporting. 1116 static const char* KindName(EntryKind kind); 1117 1118 private: 1119 1120 struct CIE; 1121 1122 // A CFI entry, either an FDE or a CIE. 1123 struct Entry { 1124 // The starting offset of the entry in the section, for error 1125 // reporting. 1126 size_t offset; 1127 1128 // The start of this entry in the buffer. 1129 const uint8_t* start; 1130 1131 // Which kind of entry this is. 1132 // 1133 // We want to be able to use this for error reporting even while we're 1134 // in the midst of parsing. Error reporting code may assume that kind, 1135 // offset, and start fields are valid, although kind may be kUnknown. 1136 EntryKind kind; 1137 1138 // The end of this entry's common prologue (initial length and id), and 1139 // the start of this entry's kind-specific fields. 1140 const uint8_t* fields; 1141 1142 // The start of this entry's instructions. 1143 const uint8_t* instructions; 1144 1145 // The address past the entry's last byte in the buffer. (Note that 1146 // since offset points to the entry's initial length field, and the 1147 // length field is the number of bytes after that field, this is not 1148 // simply buffer_ + offset + length.) 1149 const uint8_t* end; 1150 1151 // For both DWARF CFI and .eh_frame sections, this is the CIE id in a 1152 // CIE, and the offset of the associated CIE in an FDE. 1153 uint64_t id; 1154 1155 // The CIE that applies to this entry, if we've parsed it. If this is a 1156 // CIE, then this field points to this structure. 1157 CIE* cie; 1158 }; 1159 1160 // A common information entry (CIE). 1161 struct CIE: public Entry { 1162 uint8_t version; // CFI data version number 1163 string augmentation; // vendor format extension markers 1164 uint64_t code_alignment_factor; // scale for code address adjustments 1165 int data_alignment_factor; // scale for stack pointer adjustments 1166 unsigned return_address_register; // which register holds the return addr 1167 1168 // True if this CIE includes Linux C++ ABI 'z' augmentation data. 1169 bool has_z_augmentation; 1170 1171 // Parsed 'z' augmentation data. These are meaningful only if 1172 // has_z_augmentation is true. 1173 bool has_z_lsda; // The 'z' augmentation included 'L'. 1174 bool has_z_personality; // The 'z' augmentation included 'P'. 1175 bool has_z_signal_frame; // The 'z' augmentation included 'S'. 1176 1177 // If has_z_lsda is true, this is the encoding to be used for language- 1178 // specific data area pointers in FDEs. 1179 DwarfPointerEncoding lsda_encoding; 1180 1181 // If has_z_personality is true, this is the encoding used for the 1182 // personality routine pointer in the augmentation data. 1183 DwarfPointerEncoding personality_encoding; 1184 1185 // If has_z_personality is true, this is the address of the personality 1186 // routine --- or, if personality_encoding & DW_EH_PE_indirect, the 1187 // address where the personality routine's address is stored. 1188 uint64_t personality_address; 1189 1190 // This is the encoding used for addresses in the FDE header and 1191 // in DW_CFA_set_loc instructions. This is always valid, whether 1192 // or not we saw a 'z' augmentation string; its default value is 1193 // DW_EH_PE_absptr, which is what normal DWARF CFI uses. 1194 DwarfPointerEncoding pointer_encoding; 1195 1196 // These were only introduced in DWARF4, so will not be set in older 1197 // versions. 1198 uint8_t address_size; 1199 uint8_t segment_size; 1200 }; 1201 1202 // A frame description entry (FDE). 1203 struct FDE: public Entry { 1204 uint64_t address; // start address of described code 1205 uint64_t size; // size of described code, in bytes 1206 1207 // If cie->has_z_lsda is true, then this is the language-specific data 1208 // area's address --- or its address's address, if cie->lsda_encoding 1209 // has the DW_EH_PE_indirect bit set. 1210 uint64_t lsda_address; 1211 }; 1212 1213 // Internal use. 1214 class Rule; 1215 class UndefinedRule; 1216 class SameValueRule; 1217 class OffsetRule; 1218 class ValOffsetRule; 1219 class RegisterRule; 1220 class ExpressionRule; 1221 class ValExpressionRule; 1222 class RuleMap; 1223 class State; 1224 1225 // Parse the initial length and id of a CFI entry, either a CIE, an FDE, 1226 // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the 1227 // data to parse. On success, populate ENTRY as appropriate, and return 1228 // true. On failure, report the problem, and return false. Even if we 1229 // return false, set ENTRY->end to the first byte after the entry if we 1230 // were able to figure that out, or NULL if we weren't. 1231 bool ReadEntryPrologue(const uint8_t* cursor, Entry* entry); 1232 1233 // Parse the fields of a CIE after the entry prologue, including any 'z' 1234 // augmentation data. Assume that the 'Entry' fields of CIE are 1235 // populated; use CIE->fields and CIE->end as the start and limit for 1236 // parsing. On success, populate the rest of *CIE, and return true; on 1237 // failure, report the problem and return false. 1238 bool ReadCIEFields(CIE* cie); 1239 1240 // Parse the fields of an FDE after the entry prologue, including any 'z' 1241 // augmentation data. Assume that the 'Entry' fields of *FDE are 1242 // initialized; use FDE->fields and FDE->end as the start and limit for 1243 // parsing. Assume that FDE->cie is fully initialized. On success, 1244 // populate the rest of *FDE, and return true; on failure, report the 1245 // problem and return false. 1246 bool ReadFDEFields(FDE* fde); 1247 1248 // Report that ENTRY is incomplete, and return false. This is just a 1249 // trivial wrapper for invoking reporter_->Incomplete; it provides a 1250 // little brevity. 1251 bool ReportIncomplete(Entry* entry); 1252 1253 // Return true if ENCODING has the DW_EH_PE_indirect bit set. IsIndirectEncoding(DwarfPointerEncoding encoding)1254 static bool IsIndirectEncoding(DwarfPointerEncoding encoding) { 1255 return encoding & DW_EH_PE_indirect; 1256 } 1257 1258 // The contents of the DWARF .debug_info section we're parsing. 1259 const uint8_t* buffer_; 1260 size_t buffer_length_; 1261 1262 // For reading multi-byte values with the appropriate endianness. 1263 ByteReader* reader_; 1264 1265 // The handler to which we should report the data we find. 1266 Handler* handler_; 1267 1268 // For reporting problems in the info we're parsing. 1269 Reporter* reporter_; 1270 1271 // True if we are processing .eh_frame-format data. 1272 bool eh_frame_; 1273 }; 1274 1275 // The handler class for CallFrameInfo. The a CFI parser calls the 1276 // member functions of a handler object to report the data it finds. 1277 class CallFrameInfo::Handler { 1278 public: 1279 // The pseudo-register number for the canonical frame address. 1280 enum { kCFARegister = -1 }; 1281 Handler()1282 Handler() { } ~Handler()1283 virtual ~Handler() { } 1284 1285 // The parser has found CFI for the machine code at ADDRESS, 1286 // extending for LENGTH bytes. OFFSET is the offset of the frame 1287 // description entry in the section, for use in error messages. 1288 // VERSION is the version number of the CFI format. AUGMENTATION is 1289 // a string describing any producer-specific extensions present in 1290 // the data. RETURN_ADDRESS is the number of the register that holds 1291 // the address to which the function should return. 1292 // 1293 // Entry should return true to process this CFI, or false to skip to 1294 // the next entry. 1295 // 1296 // The parser invokes Entry for each Frame Description Entry (FDE) 1297 // it finds. The parser doesn't report Common Information Entries 1298 // to the handler explicitly; instead, if the handler elects to 1299 // process a given FDE, the parser reiterates the appropriate CIE's 1300 // contents at the beginning of the FDE's rules. 1301 virtual bool Entry(size_t offset, uint64_t address, uint64_t length, 1302 uint8_t version, const string& augmentation, 1303 unsigned return_address) = 0; 1304 1305 // When the Entry function returns true, the parser calls these 1306 // handler functions repeatedly to describe the rules for recovering 1307 // registers at each instruction in the given range of machine code. 1308 // Immediately after a call to Entry, the handler should assume that 1309 // the rule for each callee-saves register is "unchanged" --- that 1310 // is, that the register still has the value it had in the caller. 1311 // 1312 // If a *Rule function returns true, we continue processing this entry's 1313 // instructions. If a *Rule function returns false, we stop evaluating 1314 // instructions, and skip to the next entry. Either way, we call End 1315 // before going on to the next entry. 1316 // 1317 // In all of these functions, if the REG parameter is kCFARegister, then 1318 // the rule describes how to find the canonical frame address. 1319 // kCFARegister may be passed as a BASE_REGISTER argument, meaning that 1320 // the canonical frame address should be used as the base address for the 1321 // computation. All other REG values will be positive. 1322 1323 // At ADDRESS, register REG's value is not recoverable. 1324 virtual bool UndefinedRule(uint64_t address, int reg) = 0; 1325 1326 // At ADDRESS, register REG's value is the same as that it had in 1327 // the caller. 1328 virtual bool SameValueRule(uint64_t address, int reg) = 0; 1329 1330 // At ADDRESS, register REG has been saved at offset OFFSET from 1331 // BASE_REGISTER. 1332 virtual bool OffsetRule(uint64_t address, int reg, 1333 int base_register, long offset) = 0; 1334 1335 // At ADDRESS, the caller's value of register REG is the current 1336 // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an 1337 // address at which the register's value is saved.) 1338 virtual bool ValOffsetRule(uint64_t address, int reg, 1339 int base_register, long offset) = 0; 1340 1341 // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs 1342 // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that 1343 // BASE_REGISTER is the "home" for REG's saved value: if you want to 1344 // assign to a variable whose home is REG in the calling frame, you 1345 // should put the value in BASE_REGISTER. 1346 virtual bool RegisterRule(uint64_t address, int reg, int base_register) = 0; 1347 1348 // At ADDRESS, the DWARF expression EXPRESSION yields the address at 1349 // which REG was saved. 1350 virtual bool ExpressionRule(uint64_t address, int reg, 1351 const string& expression) = 0; 1352 1353 // At ADDRESS, the DWARF expression EXPRESSION yields the caller's 1354 // value for REG. (This rule doesn't provide an address at which the 1355 // register's value is saved.) 1356 virtual bool ValExpressionRule(uint64_t address, int reg, 1357 const string& expression) = 0; 1358 1359 // Indicate that the rules for the address range reported by the 1360 // last call to Entry are complete. End should return true if 1361 // everything is okay, or false if an error has occurred and parsing 1362 // should stop. 1363 virtual bool End() = 0; 1364 1365 // The target architecture for the data. 1366 virtual string Architecture() = 0; 1367 1368 // Handler functions for Linux C++ exception handling data. These are 1369 // only called if the data includes 'z' augmentation strings. 1370 1371 // The Linux C++ ABI uses an extension of the DWARF CFI format to 1372 // walk the stack to propagate exceptions from the throw to the 1373 // appropriate catch, and do the appropriate cleanups along the way. 1374 // CFI entries used for exception handling have two additional data 1375 // associated with them: 1376 // 1377 // - The "language-specific data area" describes which exception 1378 // types the function has 'catch' clauses for, and indicates how 1379 // to go about re-entering the function at the appropriate catch 1380 // clause. If the exception is not caught, it describes the 1381 // destructors that must run before the frame is popped. 1382 // 1383 // - The "personality routine" is responsible for interpreting the 1384 // language-specific data area's contents, and deciding whether 1385 // the exception should continue to propagate down the stack, 1386 // perhaps after doing some cleanup for this frame, or whether the 1387 // exception will be caught here. 1388 // 1389 // In principle, the language-specific data area is opaque to 1390 // everybody but the personality routine. In practice, these values 1391 // may be useful or interesting to readers with extra context, and 1392 // we have to at least skip them anyway, so we might as well report 1393 // them to the handler. 1394 1395 // This entry's exception handling personality routine's address is 1396 // ADDRESS. If INDIRECT is true, then ADDRESS is the address at 1397 // which the routine's address is stored. The default definition for 1398 // this handler function simply returns true, allowing parsing of 1399 // the entry to continue. PersonalityRoutine(uint64_t address,bool indirect)1400 virtual bool PersonalityRoutine(uint64_t address, bool indirect) { 1401 return true; 1402 } 1403 1404 // This entry's language-specific data area (LSDA) is located at 1405 // ADDRESS. If INDIRECT is true, then ADDRESS is the address at 1406 // which the area's address is stored. The default definition for 1407 // this handler function simply returns true, allowing parsing of 1408 // the entry to continue. LanguageSpecificDataArea(uint64_t address,bool indirect)1409 virtual bool LanguageSpecificDataArea(uint64_t address, bool indirect) { 1410 return true; 1411 } 1412 1413 // This entry describes a signal trampoline --- this frame is the 1414 // caller of a signal handler. The default definition for this 1415 // handler function simply returns true, allowing parsing of the 1416 // entry to continue. 1417 // 1418 // The best description of the rationale for and meaning of signal 1419 // trampoline CFI entries seems to be in the GCC bug database: 1420 // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208 SignalHandler()1421 virtual bool SignalHandler() { return true; } 1422 }; 1423 1424 // The CallFrameInfo class makes calls on an instance of this class to 1425 // report errors or warn about problems in the data it is parsing. The 1426 // default definitions of these methods print a message to stderr, but 1427 // you can make a derived class that overrides them. 1428 class CallFrameInfo::Reporter { 1429 public: 1430 // Create an error reporter which attributes troubles to the section 1431 // named SECTION in FILENAME. 1432 // 1433 // Normally SECTION would be .debug_frame, but the Mac puts CFI data 1434 // in a Mach-O section named __debug_frame. If we support 1435 // Linux-style exception handling data, we could be reading an 1436 // .eh_frame section. 1437 Reporter(const string& filename, 1438 const string& section = ".debug_frame") filename_(filename)1439 : filename_(filename), section_(section) { } ~Reporter()1440 virtual ~Reporter() { } 1441 1442 // The CFI entry at OFFSET ends too early to be well-formed. KIND 1443 // indicates what kind of entry it is; KIND can be kUnknown if we 1444 // haven't parsed enough of the entry to tell yet. 1445 virtual void Incomplete(uint64_t offset, CallFrameInfo::EntryKind kind); 1446 1447 // The .eh_frame data has a four-byte zero at OFFSET where the next 1448 // entry's length would be; this is a terminator. However, the buffer 1449 // length as given to the CallFrameInfo constructor says there should be 1450 // more data. 1451 virtual void EarlyEHTerminator(uint64_t offset); 1452 1453 // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the 1454 // section is not that large. 1455 virtual void CIEPointerOutOfRange(uint64_t offset, uint64_t cie_offset); 1456 1457 // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry 1458 // there is not a CIE. 1459 virtual void BadCIEId(uint64_t offset, uint64_t cie_offset); 1460 1461 // The FDE at OFFSET refers to a CIE with an address size we don't know how 1462 // to handle. 1463 virtual void UnexpectedAddressSize(uint64_t offset, uint8_t address_size); 1464 1465 // The FDE at OFFSET refers to a CIE with an segment descriptor size we 1466 // don't know how to handle. 1467 virtual void UnexpectedSegmentSize(uint64_t offset, uint8_t segment_size); 1468 1469 // The FDE at OFFSET refers to a CIE with version number VERSION, 1470 // which we don't recognize. We cannot parse DWARF CFI if it uses 1471 // a version number we don't recognize. 1472 virtual void UnrecognizedVersion(uint64_t offset, int version); 1473 1474 // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION, 1475 // which we don't recognize. We cannot parse DWARF CFI if it uses 1476 // augmentations we don't recognize. 1477 virtual void UnrecognizedAugmentation(uint64_t offset, 1478 const string& augmentation); 1479 1480 // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not 1481 // a valid encoding. 1482 virtual void InvalidPointerEncoding(uint64_t offset, uint8_t encoding); 1483 1484 // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends 1485 // on a base address which has not been supplied. 1486 virtual void UnusablePointerEncoding(uint64_t offset, uint8_t encoding); 1487 1488 // The CIE at OFFSET contains a DW_CFA_restore instruction at 1489 // INSN_OFFSET, which may not appear in a CIE. 1490 virtual void RestoreInCIE(uint64_t offset, uint64_t insn_offset); 1491 1492 // The entry at OFFSET, of kind KIND, has an unrecognized 1493 // instruction at INSN_OFFSET. 1494 virtual void BadInstruction(uint64_t offset, CallFrameInfo::EntryKind kind, 1495 uint64_t insn_offset); 1496 1497 // The instruction at INSN_OFFSET in the entry at OFFSET, of kind 1498 // KIND, establishes a rule that cites the CFA, but we have not 1499 // established a CFA rule yet. 1500 virtual void NoCFARule(uint64_t offset, CallFrameInfo::EntryKind kind, 1501 uint64_t insn_offset); 1502 1503 // The instruction at INSN_OFFSET in the entry at OFFSET, of kind 1504 // KIND, is a DW_CFA_restore_state instruction, but the stack of 1505 // saved states is empty. 1506 virtual void EmptyStateStack(uint64_t offset, CallFrameInfo::EntryKind kind, 1507 uint64_t insn_offset); 1508 1509 // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry 1510 // at OFFSET, of kind KIND, would restore a state that has no CFA 1511 // rule, whereas the current state does have a CFA rule. This is 1512 // bogus input, which the CallFrameInfo::Handler interface doesn't 1513 // (and shouldn't) have any way to report. 1514 virtual void ClearingCFARule(uint64_t offset, CallFrameInfo::EntryKind kind, 1515 uint64_t insn_offset); 1516 1517 protected: 1518 // The name of the file whose CFI we're reading. 1519 string filename_; 1520 1521 // The name of the CFI section in that file. 1522 string section_; 1523 }; 1524 1525 } // namespace google_breakpad 1526 1527 #endif // UTIL_DEBUGINFO_DWARF2READER_H__ 1528