1 // -*- mode: C++ -*- 2 3 // Copyright 2010 Google LLC 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google LLC nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // Original author: Jim Blandy <[email protected]> <[email protected]> 32 33 // macho_reader.h: A class for parsing Mach-O files. 34 35 #ifndef BREAKPAD_COMMON_MAC_MACHO_READER_H_ 36 #define BREAKPAD_COMMON_MAC_MACHO_READER_H_ 37 38 #include <mach-o/loader.h> 39 #include <mach-o/fat.h> 40 #include <stdint.h> 41 #include <stdlib.h> 42 #include <unistd.h> 43 44 #include <map> 45 #include <string> 46 #include <vector> 47 48 #include "common/byte_cursor.h" 49 #include "common/mac/super_fat_arch.h" 50 51 namespace google_breakpad { 52 namespace mach_o { 53 54 using std::map; 55 using std::string; 56 using std::vector; 57 58 // The Mac headers don't specify particular types for these groups of 59 // constants, but defining them here provides some documentation 60 // value. We also give them the same width as the fields in which 61 // they appear, which makes them a bit easier to use with ByteCursors. 62 typedef uint32_t Magic; 63 typedef uint32_t FileType; 64 typedef uint32_t FileFlags; 65 typedef uint32_t LoadCommandType; 66 typedef uint32_t SegmentFlags; 67 typedef uint32_t SectionFlags; 68 69 // A parser for fat binary files, used to store universal binaries. 70 // When applied to a (non-fat) Mach-O file, this behaves as if the 71 // file were a fat file containing a single object file. 72 class FatReader { 73 public: 74 75 // A class for reporting errors found while parsing fat binary files. The 76 // default definitions of these methods print messages to stderr. 77 class Reporter { 78 public: 79 // Create a reporter that attributes problems to |filename|. Reporter(const string & filename)80 explicit Reporter(const string& filename) : filename_(filename) { } 81 ~Reporter()82 virtual ~Reporter() { } 83 84 // The data does not begin with a fat binary or Mach-O magic number. 85 // This is a fatal error. 86 virtual void BadHeader(); 87 88 // The Mach-O fat binary file ends abruptly, without enough space 89 // to contain an object file it claims is present. 90 virtual void MisplacedObjectFile(); 91 92 // The file ends abruptly: either it is not large enough to hold a 93 // complete header, or the header implies that contents are present 94 // beyond the actual end of the file. 95 virtual void TooShort(); 96 97 private: 98 // The filename to which the reader should attribute problems. 99 string filename_; 100 }; 101 102 // Create a fat binary file reader that uses |reporter| to report problems. FatReader(Reporter * reporter)103 explicit FatReader(Reporter* reporter) : reporter_(reporter) { } 104 105 // Read the |size| bytes at |buffer| as a fat binary file. On success, 106 // return true; on failure, report the problem to reporter_ and return 107 // false. 108 // 109 // If the data is a plain Mach-O file, rather than a fat binary file, 110 // then the reader behaves as if it had found a fat binary file whose 111 // single object file is the Mach-O file. 112 bool Read(const uint8_t* buffer, size_t size); 113 114 // Return an array of 'SuperFatArch' structures describing the 115 // object files present in this fat binary file. Set |size| to the 116 // number of elements in the array. 117 // 118 // Assuming Read returned true, the entries are validated: it is safe to 119 // assume that the offsets and sizes in each SuperFatArch refer to subranges 120 // of the bytes passed to Read. 121 // 122 // If there are no object files in this fat binary, then this 123 // function can return NULL. 124 // 125 // The array is owned by this FatReader instance; it will be freed when 126 // this FatReader is destroyed. 127 // 128 // This function returns a C-style array instead of a vector to make it 129 // possible to use the result with OS X functions like NXFindBestFatArch, 130 // so that the symbol dumper will behave consistently with other OS X 131 // utilities that work with fat binaries. object_files(size_t * count)132 const SuperFatArch* object_files(size_t* count) const { 133 *count = object_files_.size(); 134 if (object_files_.size() > 0) 135 return &object_files_[0]; 136 return NULL; 137 } 138 139 private: 140 // We use this to report problems parsing the file's contents. (WEAK) 141 Reporter* reporter_; 142 143 // The contents of the fat binary or Mach-O file we're parsing. We do not 144 // own the storage it refers to. 145 ByteBuffer buffer_; 146 147 // The magic number of this binary, in host byte order. 148 Magic magic_; 149 150 // The list of object files in this binary. 151 // object_files_.size() == fat_header.nfat_arch 152 vector<SuperFatArch> object_files_; 153 }; 154 155 // A segment in a Mach-O file. All these fields have been byte-swapped as 156 // appropriate for use by the executing architecture. 157 struct Segment { 158 // The ByteBuffers below point into the bytes passed to the Reader that 159 // created this Segment. 160 161 ByteBuffer section_list; // This segment's section list. 162 ByteBuffer contents; // This segment's contents. 163 164 // This segment's name. 165 string name; 166 167 // The address at which this segment should be loaded in memory. If 168 // bits_64 is false, only the bottom 32 bits of this value are valid. 169 uint64_t vmaddr; 170 171 // The size of this segment when loaded into memory. This may be larger 172 // than contents.Size(), in which case the extra area will be 173 // initialized with zeros. If bits_64 is false, only the bottom 32 bits 174 // of this value are valid. 175 uint64_t vmsize; 176 177 // The file offset and size of the segment in the Mach-O image. 178 uint64_t fileoff; 179 uint64_t filesize; 180 181 // The maximum and initial VM protection of this segment's contents. 182 uint32_t maxprot; 183 uint32_t initprot; 184 185 // The number of sections in section_list. 186 uint32_t nsects; 187 188 // Flags describing this segment, from SegmentFlags. 189 uint32_t flags; 190 191 // True if this is a 64-bit section; false if it is a 32-bit section. 192 bool bits_64; 193 }; 194 195 // A section in a Mach-O file. All these fields have been byte-swapped as 196 // appropriate for use by the executing architecture. 197 struct Section { 198 // This section's contents. This points into the bytes passed to the 199 // Reader that created this Section. 200 ByteBuffer contents; 201 202 // This section's name. 203 string section_name; // section[_64].sectname 204 // The name of the segment this section belongs to. 205 string segment_name; // section[_64].segname 206 207 // The address at which this section's contents should be loaded in 208 // memory. If bits_64 is false, only the bottom 32 bits of this value 209 // are valid. 210 uint64_t address; 211 212 // The contents of this section should be loaded into memory at an 213 // address which is a multiple of (two raised to this power). 214 uint32_t align; 215 216 // Flags from SectionFlags describing the section's contents. 217 uint32_t flags; 218 219 // We don't support reading relocations yet. 220 221 // True if this is a 64-bit section; false if it is a 32-bit section. 222 bool bits_64; 223 }; 224 225 // A map from section names to Sections. 226 typedef map<string, Section> SectionMap; 227 228 // A reader for a Mach-O file. 229 // 230 // This does not handle fat binaries; see FatReader above. FatReader 231 // provides a friendly interface for parsing data that could be either a 232 // fat binary or a Mach-O file. 233 class Reader { 234 public: 235 236 // A class for reporting errors found while parsing Mach-O files. The 237 // default definitions of these member functions print messages to 238 // stderr. 239 class Reporter { 240 public: 241 // Create a reporter that attributes problems to |filename|. Reporter(const string & filename)242 explicit Reporter(const string& filename) : filename_(filename) { } ~Reporter()243 virtual ~Reporter() { } 244 245 // Reporter functions for fatal errors return void; the reader will 246 // definitely return an error to its caller after calling them 247 248 // The data does not begin with a Mach-O magic number, or the magic 249 // number does not match the expected value for the cpu architecture. 250 // This is a fatal error. 251 virtual void BadHeader(); 252 253 // The data contained in a Mach-O fat binary (|cpu_type|, |cpu_subtype|) 254 // does not match the expected CPU architecture 255 // (|expected_cpu_type|, |expected_cpu_subtype|). 256 virtual void CPUTypeMismatch(cpu_type_t cpu_type, 257 cpu_subtype_t cpu_subtype, 258 cpu_type_t expected_cpu_type, 259 cpu_subtype_t expected_cpu_subtype); 260 261 // The file ends abruptly: either it is not large enough to hold a 262 // complete header, or the header implies that contents are present 263 // beyond the actual end of the file. 264 virtual void HeaderTruncated(); 265 266 // The file's load command region, as given in the Mach-O header, is 267 // too large for the file. 268 virtual void LoadCommandRegionTruncated(); 269 270 // The file's Mach-O header claims the file contains |claimed| load 271 // commands, but the I'th load command, of type |type|, extends beyond 272 // the end of the load command region, as given by the Mach-O header. 273 // If |type| is zero, the command's type was unreadable. 274 virtual void LoadCommandsOverrun(size_t claimed, size_t i, 275 LoadCommandType type); 276 277 // The contents of the |i|'th load command, of type |type|, extend beyond 278 // the size given in the load command's header. 279 virtual void LoadCommandTooShort(size_t i, LoadCommandType type); 280 281 // The LC_SEGMENT or LC_SEGMENT_64 load command for the segment named 282 // |name| is too short to hold the sections that its header says it does. 283 // (This more specific than LoadCommandTooShort.) 284 virtual void SectionsMissing(const string& name); 285 286 // The segment named |name| claims that its contents lie beyond the end 287 // of the file. 288 virtual void MisplacedSegmentData(const string& name); 289 290 // The section named |section| in the segment named |segment| claims that 291 // its contents do not lie entirely within the segment. 292 virtual void MisplacedSectionData(const string& section, 293 const string& segment); 294 295 // The LC_SYMTAB command claims that symbol table contents are located 296 // beyond the end of the file. 297 virtual void MisplacedSymbolTable(); 298 299 // An attempt was made to read a Mach-O file of the unsupported 300 // CPU architecture |cpu_type|. 301 virtual void UnsupportedCPUType(cpu_type_t cpu_type); 302 303 private: 304 string filename_; 305 }; 306 307 // A handler for sections parsed from a segment. The WalkSegmentSections 308 // member function accepts an instance of this class, and applies it to 309 // each section defined in a given segment. 310 class SectionHandler { 311 public: ~SectionHandler()312 virtual ~SectionHandler() { } 313 314 // Called to report that the segment's section list contains |section|. 315 // This should return true if the iteration should continue, or false 316 // if it should stop. 317 virtual bool HandleSection(const Section& section) = 0; 318 }; 319 320 // A handler for the load commands in a Mach-O file. 321 class LoadCommandHandler { 322 public: LoadCommandHandler()323 LoadCommandHandler() { } ~LoadCommandHandler()324 virtual ~LoadCommandHandler() { } 325 326 // When called from WalkLoadCommands, the following handler functions 327 // should return true if they wish to continue iterating over the load 328 // command list, or false if they wish to stop iterating. 329 // 330 // When called from LoadCommandIterator::Handle or Reader::Handle, 331 // these functions' return values are simply passed through to Handle's 332 // caller. 333 // 334 // The definitions provided by this base class simply return true; the 335 // default is to silently ignore sections whose member functions the 336 // subclass doesn't override. 337 338 // COMMAND is load command we don't recognize. We provide only the 339 // command type and a ByteBuffer enclosing the command's data (If we 340 // cannot parse the command type or its size, we call 341 // reporter_->IncompleteLoadCommand instead.) UnknownCommand(LoadCommandType type,const ByteBuffer & contents)342 virtual bool UnknownCommand(LoadCommandType type, 343 const ByteBuffer& contents) { 344 return true; 345 } 346 347 // The load command is LC_SEGMENT or LC_SEGMENT_64, defining a segment 348 // with the properties given in |segment|. SegmentCommand(const Segment & segment)349 virtual bool SegmentCommand(const Segment& segment) { 350 return true; 351 } 352 353 // The load command is LC_SYMTAB. |entries| holds the array of nlist 354 // entries, and |names| holds the strings the entries refer to. SymtabCommand(const ByteBuffer & entries,const ByteBuffer & names)355 virtual bool SymtabCommand(const ByteBuffer& entries, 356 const ByteBuffer& names) { 357 return true; 358 } 359 360 // Add handler functions for more load commands here as needed. 361 }; 362 363 // Create a Mach-O file reader that reports problems to |reporter|. Reader(Reporter * reporter)364 explicit Reader(Reporter* reporter) 365 : reporter_(reporter) { } 366 367 // Read the given data as a Mach-O file. The reader retains pointers 368 // into the data passed, so the data should live as long as the reader 369 // does. On success, return true; on failure, return false. 370 // 371 // At most one of these functions should be invoked once on each Reader 372 // instance. 373 bool Read(const uint8_t* buffer, 374 size_t size, 375 cpu_type_t expected_cpu_type, 376 cpu_subtype_t expected_cpu_subtype); Read(const ByteBuffer & buffer,cpu_type_t expected_cpu_type,cpu_subtype_t expected_cpu_subtype)377 bool Read(const ByteBuffer& buffer, 378 cpu_type_t expected_cpu_type, 379 cpu_subtype_t expected_cpu_subtype) { 380 return Read(buffer.start, 381 buffer.Size(), 382 expected_cpu_type, 383 expected_cpu_subtype); 384 } 385 386 // Return this file's characteristics, as found in the Mach-O header. cpu_type()387 cpu_type_t cpu_type() const { return cpu_type_; } cpu_subtype()388 cpu_subtype_t cpu_subtype() const { return cpu_subtype_; } file_type()389 FileType file_type() const { return file_type_; } flags()390 FileFlags flags() const { return flags_; } 391 392 // Return true if this is a 64-bit Mach-O file, false if it is a 32-bit 393 // Mach-O file. bits_64()394 bool bits_64() const { return bits_64_; } 395 396 // Return true if this is a big-endian Mach-O file, false if it is 397 // little-endian. big_endian()398 bool big_endian() const { return big_endian_; } 399 400 // Apply |handler| to each load command in this Mach-O file, stopping when 401 // a handler function returns false. If we encounter a malformed load 402 // command, report it via reporter_ and return false. Return true if all 403 // load commands were parseable and all handlers returned true. 404 bool WalkLoadCommands(LoadCommandHandler* handler) const; 405 406 // Set |segment| to describe the segment named |name|, if present. If 407 // found, |segment|'s byte buffers refer to a subregion of the bytes 408 // passed to Read. If we find the section, return true; otherwise, 409 // return false. 410 bool FindSegment(const string& name, Segment* segment) const; 411 412 // Apply |handler| to each section defined in |segment|. If |handler| returns 413 // false, stop iterating and return false. If all calls to |handler| return 414 // true and we reach the end of the section list, return true. 415 bool WalkSegmentSections(const Segment& segment, SectionHandler* handler) 416 const; 417 418 // Clear |section_map| and then populate it with a map of the sections 419 // in |segment|, from section names to Section structures. 420 // Each Section's contents refer to bytes in |segment|'s contents. 421 // On success, return true; if a problem occurs, report it and return false. 422 bool MapSegmentSections(const Segment& segment, SectionMap* section_map) 423 const; 424 425 private: 426 // Used internally. 427 class SegmentFinder; 428 class SectionMapper; 429 430 // We use this to report problems parsing the file's contents. (WEAK) 431 Reporter* reporter_; 432 433 // The contents of the Mach-O file we're parsing. We do not own the 434 // storage it refers to. 435 ByteBuffer buffer_; 436 437 // True if this file is big-endian. 438 bool big_endian_; 439 440 // True if this file is a 64-bit Mach-O file. 441 bool bits_64_; 442 443 // This file's cpu type and subtype. 444 cpu_type_t cpu_type_; // mach_header[_64].cputype 445 cpu_subtype_t cpu_subtype_; // mach_header[_64].cpusubtype 446 447 // This file's type. 448 FileType file_type_; // mach_header[_64].filetype 449 450 // The region of buffer_ occupied by load commands. 451 ByteBuffer load_commands_; 452 453 // The number of load commands in load_commands_. 454 uint32_t load_command_count_; // mach_header[_64].ncmds 455 456 // This file's header flags. 457 FileFlags flags_; 458 }; 459 460 } // namespace mach_o 461 } // namespace google_breakpad 462 463 #endif // BREAKPAD_COMMON_MAC_MACHO_READER_H_ 464