1 // -*- mode: C++ -*- 2 3 // Copyright 2010 Google LLC 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google LLC nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 #ifndef COMMON_DWARF_BYTEREADER_H__ 32 #define COMMON_DWARF_BYTEREADER_H__ 33 34 #include <stdint.h> 35 36 #include <string> 37 38 #include "common/dwarf/types.h" 39 #include "common/dwarf/dwarf2enums.h" 40 41 namespace google_breakpad { 42 43 // We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN 44 // because it conflicts with a macro 45 enum Endianness { 46 ENDIANNESS_BIG, 47 ENDIANNESS_LITTLE 48 }; 49 50 // A ByteReader knows how to read single- and multi-byte values of 51 // various endiannesses, sizes, and encodings, as used in DWARF 52 // debugging information and Linux C++ exception handling data. 53 class ByteReader { 54 public: 55 // Construct a ByteReader capable of reading one-, two-, four-, and 56 // eight-byte values according to ENDIANNESS, absolute machine-sized 57 // addresses, DWARF-style "initial length" values, signed and 58 // unsigned LEB128 numbers, and Linux C++ exception handling data's 59 // encoded pointers. 60 explicit ByteReader(enum Endianness endianness); 61 virtual ~ByteReader(); 62 63 // Read a single byte from BUFFER and return it as an unsigned 8 bit 64 // number. 65 uint8_t ReadOneByte(const uint8_t* buffer) const; 66 67 // Read two bytes from BUFFER and return them as an unsigned 16 bit 68 // number, using this ByteReader's endianness. 69 uint16_t ReadTwoBytes(const uint8_t* buffer) const; 70 71 // Read three bytes from BUFFER and return them as an unsigned 64 bit 72 // number, using this ByteReader's endianness. DWARF 5 uses this encoding 73 // for various index-related DW_FORMs. 74 uint64_t ReadThreeBytes(const uint8_t* buffer) const; 75 76 // Read four bytes from BUFFER and return them as an unsigned 32 bit 77 // number, using this ByteReader's endianness. This function returns 78 // a uint64_t so that it is compatible with ReadAddress and 79 // ReadOffset. The number it returns will never be outside the range 80 // of an unsigned 32 bit integer. 81 uint64_t ReadFourBytes(const uint8_t* buffer) const; 82 83 // Read eight bytes from BUFFER and return them as an unsigned 64 84 // bit number, using this ByteReader's endianness. 85 uint64_t ReadEightBytes(const uint8_t* buffer) const; 86 87 // Read an unsigned LEB128 (Little Endian Base 128) number from 88 // BUFFER and return it as an unsigned 64 bit integer. Set LEN to 89 // the number of bytes read. 90 // 91 // The unsigned LEB128 representation of an integer N is a variable 92 // number of bytes: 93 // 94 // - If N is between 0 and 0x7f, then its unsigned LEB128 95 // representation is a single byte whose value is N. 96 // 97 // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | 98 // 0x80, followed by the unsigned LEB128 representation of N / 99 // 128, rounded towards negative infinity. 100 // 101 // In other words, we break VALUE into groups of seven bits, put 102 // them in little-endian order, and then write them as eight-bit 103 // bytes with the high bit on all but the last. 104 uint64_t ReadUnsignedLEB128(const uint8_t* buffer, size_t* len) const; 105 106 // Read a signed LEB128 number from BUFFER and return it as an 107 // signed 64 bit integer. Set LEN to the number of bytes read. 108 // 109 // The signed LEB128 representation of an integer N is a variable 110 // number of bytes: 111 // 112 // - If N is between -0x40 and 0x3f, then its signed LEB128 113 // representation is a single byte whose value is N in two's 114 // complement. 115 // 116 // - Otherwise, its signed LEB128 representation is (N & 0x7f) | 117 // 0x80, followed by the signed LEB128 representation of N / 128, 118 // rounded towards negative infinity. 119 // 120 // In other words, we break VALUE into groups of seven bits, put 121 // them in little-endian order, and then write them as eight-bit 122 // bytes with the high bit on all but the last. 123 int64_t ReadSignedLEB128(const uint8_t* buffer, size_t* len) const; 124 125 // Indicate that addresses on this architecture are SIZE bytes long. SIZE 126 // must be either 4 or 8. (DWARF allows addresses to be any number of 127 // bytes in length from 1 to 255, but we only support 32- and 64-bit 128 // addresses at the moment.) You must call this before using the 129 // ReadAddress member function. 130 // 131 // For data in a .debug_info section, or something that .debug_info 132 // refers to like line number or macro data, the compilation unit 133 // header's address_size field indicates the address size to use. Call 134 // frame information doesn't indicate its address size (a shortcoming of 135 // the spec); you must supply the appropriate size based on the 136 // architecture of the target machine. 137 void SetAddressSize(uint8_t size); 138 139 // Return the current address size, in bytes. This is either 4, 140 // indicating 32-bit addresses, or 8, indicating 64-bit addresses. AddressSize()141 uint8_t AddressSize() const { return address_size_; } 142 143 // Read an address from BUFFER and return it as an unsigned 64 bit 144 // integer, respecting this ByteReader's endianness and address size. You 145 // must call SetAddressSize before calling this function. 146 uint64_t ReadAddress(const uint8_t* buffer) const; 147 148 // DWARF actually defines two slightly different formats: 32-bit DWARF 149 // and 64-bit DWARF. This is *not* related to the size of registers or 150 // addresses on the target machine; it refers only to the size of section 151 // offsets and data lengths appearing in the DWARF data. One only needs 152 // 64-bit DWARF when the debugging data itself is larger than 4GiB. 153 // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the 154 // debugging data itself is very large. 155 // 156 // DWARF information identifies itself as 32-bit or 64-bit DWARF: each 157 // compilation unit and call frame information entry begins with an 158 // "initial length" field, which, in addition to giving the length of the 159 // data, also indicates the size of section offsets and lengths appearing 160 // in that data. The ReadInitialLength member function, below, reads an 161 // initial length and sets the ByteReader's offset size as a side effect. 162 // Thus, in the normal process of reading DWARF data, the appropriate 163 // offset size is set automatically. So, you should only need to call 164 // SetOffsetSize if you are using the same ByteReader to jump from the 165 // midst of one block of DWARF data into another. 166 167 // Read a DWARF "initial length" field from START, and return it as 168 // an unsigned 64 bit integer, respecting this ByteReader's 169 // endianness. Set *LEN to the length of the initial length in 170 // bytes, either four or twelve. As a side effect, set this 171 // ByteReader's offset size to either 4 (if we see a 32-bit DWARF 172 // initial length) or 8 (if we see a 64-bit DWARF initial length). 173 // 174 // A DWARF initial length is either: 175 // 176 // - a byte count stored as an unsigned 32-bit value less than 177 // 0xffffff00, indicating that the data whose length is being 178 // measured uses the 32-bit DWARF format, or 179 // 180 // - The 32-bit value 0xffffffff, followed by a 64-bit byte count, 181 // indicating that the data whose length is being measured uses 182 // the 64-bit DWARF format. 183 uint64_t ReadInitialLength(const uint8_t* start, size_t* len); 184 185 // Read an offset from BUFFER and return it as an unsigned 64 bit 186 // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the 187 // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes 188 // long. You must call ReadInitialLength or SetOffsetSize before calling 189 // this function; see the comments above for details. 190 uint64_t ReadOffset(const uint8_t* buffer) const; 191 192 // Return the current offset size, in bytes. 193 // A return value of 4 indicates that we are reading 32-bit DWARF. 194 // A return value of 8 indicates that we are reading 64-bit DWARF. OffsetSize()195 uint8_t OffsetSize() const { return offset_size_; } 196 197 // Indicate that section offsets and lengths are SIZE bytes long. SIZE 198 // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF). 199 // Usually, you should not call this function yourself; instead, let a 200 // call to ReadInitialLength establish the data's offset size 201 // automatically. 202 void SetOffsetSize(uint8_t size); 203 204 // The Linux C++ ABI uses a variant of DWARF call frame information 205 // for exception handling. This data is included in the program's 206 // address space as the ".eh_frame" section, and intepreted at 207 // runtime to walk the stack, find exception handlers, and run 208 // cleanup code. The format is mostly the same as DWARF CFI, with 209 // some adjustments made to provide the additional 210 // exception-handling data, and to make the data easier to work with 211 // in memory --- for example, to allow it to be placed in read-only 212 // memory even when describing position-independent code. 213 // 214 // In particular, exception handling data can select a number of 215 // different encodings for pointers that appear in the data, as 216 // described by the DwarfPointerEncoding enum. There are actually 217 // four axes(!) to the encoding: 218 // 219 // - The pointer size: pointers can be 2, 4, or 8 bytes long, or use 220 // the DWARF LEB128 encoding. 221 // 222 // - The pointer's signedness: pointers can be signed or unsigned. 223 // 224 // - The pointer's base address: the data stored in the exception 225 // handling data can be the actual address (that is, an absolute 226 // pointer), or relative to one of a number of different base 227 // addreses --- including that of the encoded pointer itself, for 228 // a form of "pc-relative" addressing. 229 // 230 // - The pointer may be indirect: it may be the address where the 231 // true pointer is stored. (This is used to refer to things via 232 // global offset table entries, program linkage table entries, or 233 // other tricks used in position-independent code.) 234 // 235 // There are also two options that fall outside that matrix 236 // altogether: the pointer may be omitted, or it may have padding to 237 // align it on an appropriate address boundary. (That last option 238 // may seem like it should be just another axis, but it is not.) 239 240 // Indicate that the exception handling data is loaded starting at 241 // SECTION_BASE, and that the start of its buffer in our own memory 242 // is BUFFER_BASE. This allows us to find the address that a given 243 // byte in our buffer would have when loaded into the program the 244 // data describes. We need this to resolve DW_EH_PE_pcrel pointers. 245 void SetCFIDataBase(uint64_t section_base, const uint8_t* buffer_base); 246 247 // Indicate that the base address of the program's ".text" section 248 // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers. 249 void SetTextBase(uint64_t text_base); 250 251 // Indicate that the base address for DW_EH_PE_datarel pointers is 252 // DATA_BASE. The proper value depends on the ABI; it is usually the 253 // address of the global offset table, held in a designated register in 254 // position-independent code. You will need to look at the startup code 255 // for the target system to be sure. I tried; my eyes bled. 256 void SetDataBase(uint64_t data_base); 257 258 // Indicate that the base address for the FDE we are processing is 259 // FUNCTION_BASE. This is the start address of DW_EH_PE_funcrel 260 // pointers. (This encoding does not seem to be used by the GNU 261 // toolchain.) 262 void SetFunctionBase(uint64_t function_base); 263 264 // Indicate that we are no longer processing any FDE, so any use of 265 // a DW_EH_PE_funcrel encoding is an error. 266 void ClearFunctionBase(); 267 268 // Return true if ENCODING is a valid pointer encoding. 269 bool ValidEncoding(DwarfPointerEncoding encoding) const; 270 271 // Return true if we have all the information we need to read a 272 // pointer that uses ENCODING. This checks that the appropriate 273 // SetFooBase function for ENCODING has been called. 274 bool UsableEncoding(DwarfPointerEncoding encoding) const; 275 276 // Read an encoded pointer from BUFFER using ENCODING; return the 277 // absolute address it represents, and set *LEN to the pointer's 278 // length in bytes, including any padding for aligned pointers. 279 // 280 // This function calls 'abort' if ENCODING is invalid or refers to a 281 // base address this reader hasn't been given, so you should check 282 // with ValidEncoding and UsableEncoding first if you would rather 283 // die in a more helpful way. 284 uint64_t ReadEncodedPointer(const uint8_t* buffer, 285 DwarfPointerEncoding encoding, 286 size_t* len) const; 287 288 Endianness GetEndianness() const; 289 private: 290 291 // Function pointer type for our address and offset readers. 292 typedef uint64_t (ByteReader::*AddressReader)(const uint8_t*) const; 293 294 // Read an offset from BUFFER and return it as an unsigned 64 bit 295 // integer. DWARF2/3 define offsets as either 4 or 8 bytes, 296 // generally depending on the amount of DWARF2/3 info present. 297 // This function pointer gets set by SetOffsetSize. 298 AddressReader offset_reader_; 299 300 // Read an address from BUFFER and return it as an unsigned 64 bit 301 // integer. DWARF2/3 allow addresses to be any size from 0-255 302 // bytes currently. Internally we support 4 and 8 byte addresses, 303 // and will CHECK on anything else. 304 // This function pointer gets set by SetAddressSize. 305 AddressReader address_reader_; 306 307 Endianness endian_; 308 uint8_t address_size_; 309 uint8_t offset_size_; 310 311 // Base addresses for Linux C++ exception handling data's encoded pointers. 312 bool have_section_base_, have_text_base_, have_data_base_; 313 bool have_function_base_; 314 uint64_t section_base_, text_base_, data_base_, function_base_; 315 const uint8_t* buffer_base_; 316 }; 317 318 } // namespace google_breakpad 319 320 #endif // COMMON_DWARF_BYTEREADER_H__ 321