xref: /aosp_15_r20/external/google-breakpad/src/common/mac/macho_reader.cc (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1 // Copyright 2010 Google LLC
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 //     * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 //     * Neither the name of Google LLC nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 // Original author: Jim Blandy <[email protected]> <[email protected]>
30 
31 // macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and
32 // google_breakpad::Mach_O::Reader. See macho_reader.h for details.
33 
34 #ifdef HAVE_CONFIG_H
35 #include <config.h>  // Must come first
36 #endif
37 
38 #include "common/mac/macho_reader.h"
39 
40 #include <assert.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 
44 #include <limits>
45 
46 // Unfortunately, CPU_TYPE_ARM is not define for 10.4.
47 #if !defined(CPU_TYPE_ARM)
48 #define CPU_TYPE_ARM 12
49 #endif
50 
51 #if !defined(CPU_TYPE_ARM_64)
52 #define CPU_TYPE_ARM_64 16777228
53 #endif
54 
55 namespace google_breakpad {
56 namespace mach_o {
57 
58 // If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its
59 // arguments, so you can't place expressions that do necessary work in
60 // the argument of an assert. Nor can you assign the result of the
61 // expression to a variable and assert that the variable's value is
62 // true: you'll get unused variable warnings when NDEBUG is #defined.
63 //
64 // ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that
65 // the result is true if NDEBUG is not #defined.
66 #if defined(NDEBUG)
67 #define ASSERT_ALWAYS_EVAL(x) (x)
68 #else
69 #define ASSERT_ALWAYS_EVAL(x) assert(x)
70 #endif
71 
BadHeader()72 void FatReader::Reporter::BadHeader() {
73   fprintf(stderr, "%s: file is neither a fat binary file"
74           " nor a Mach-O object file\n", filename_.c_str());
75 }
76 
TooShort()77 void FatReader::Reporter::TooShort() {
78   fprintf(stderr, "%s: file too short for the data it claims to contain\n",
79           filename_.c_str());
80 }
81 
MisplacedObjectFile()82 void FatReader::Reporter::MisplacedObjectFile() {
83   fprintf(stderr, "%s: file too short for the object files it claims"
84           " to contain\n", filename_.c_str());
85 }
86 
Read(const uint8_t * buffer,size_t size)87 bool FatReader::Read(const uint8_t* buffer, size_t size) {
88   buffer_.start = buffer;
89   buffer_.end = buffer + size;
90   ByteCursor cursor(&buffer_);
91 
92   // Fat binaries always use big-endian, so read the magic number in
93   // that endianness. To recognize Mach-O magic numbers, which can use
94   // either endianness, check for both the proper and reversed forms
95   // of the magic numbers.
96   cursor.set_big_endian(true);
97   if (cursor >> magic_) {
98     if (magic_ == FAT_MAGIC) {
99       // How many object files does this fat binary contain?
100       uint32_t object_files_count;
101       if (!(cursor >> object_files_count)) {  // nfat_arch
102         reporter_->TooShort();
103         return false;
104       }
105 
106       // Read the list of object files.
107       object_files_.resize(object_files_count);
108       for (size_t i = 0; i < object_files_count; i++) {
109         struct fat_arch objfile;
110 
111         // Read this object file entry, byte-swapping as appropriate.
112         cursor >> objfile.cputype
113                >> objfile.cpusubtype
114                >> objfile.offset
115                >> objfile.size
116                >> objfile.align;
117 
118         SuperFatArch super_fat_arch(objfile);
119         object_files_[i] = super_fat_arch;
120 
121         if (!cursor) {
122           reporter_->TooShort();
123           return false;
124         }
125         // Does the file actually have the bytes this entry refers to?
126         size_t fat_size = buffer_.Size();
127         if (objfile.offset > fat_size ||
128             objfile.size > fat_size - objfile.offset) {
129           reporter_->MisplacedObjectFile();
130           return false;
131         }
132       }
133 
134       return true;
135     } else if (magic_ == MH_MAGIC || magic_ == MH_MAGIC_64 ||
136                magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) {
137       // If this is a little-endian Mach-O file, fix the cursor's endianness.
138       if (magic_ == MH_CIGAM || magic_ == MH_CIGAM_64)
139         cursor.set_big_endian(false);
140       // Record the entire file as a single entry in the object file list.
141       object_files_.resize(1);
142 
143       // Get the cpu type and subtype from the Mach-O header.
144       if (!(cursor >> object_files_[0].cputype
145                    >> object_files_[0].cpusubtype)) {
146         reporter_->TooShort();
147         return false;
148       }
149 
150       object_files_[0].offset = 0;
151       object_files_[0].size = static_cast<uint64_t>(buffer_.Size());
152       // This alignment is correct for 32 and 64-bit x86 and ppc.
153       // See get_align in the lipo source for other architectures:
154       // http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c
155       object_files_[0].align = 12;  // 2^12 == 4096
156       return true;
157     }
158   }
159   reporter_->BadHeader();
160   return false;
161 }
162 
BadHeader()163 void Reader::Reporter::BadHeader() {
164   fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str());
165 }
166 
CPUTypeMismatch(cpu_type_t cpu_type,cpu_subtype_t cpu_subtype,cpu_type_t expected_cpu_type,cpu_subtype_t expected_cpu_subtype)167 void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type,
168                                        cpu_subtype_t cpu_subtype,
169                                        cpu_type_t expected_cpu_type,
170                                        cpu_subtype_t expected_cpu_subtype) {
171   fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected"
172           " type %d, subtype %d\n",
173           filename_.c_str(), cpu_type, cpu_subtype,
174           expected_cpu_type, expected_cpu_subtype);
175 }
176 
HeaderTruncated()177 void Reader::Reporter::HeaderTruncated() {
178   fprintf(stderr, "%s: file does not contain a complete Mach-O header\n",
179           filename_.c_str());
180 }
181 
LoadCommandRegionTruncated()182 void Reader::Reporter::LoadCommandRegionTruncated() {
183   fprintf(stderr, "%s: file too short to hold load command region"
184           " given in Mach-O header\n", filename_.c_str());
185 }
186 
LoadCommandsOverrun(size_t claimed,size_t i,LoadCommandType type)187 void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i,
188                                            LoadCommandType type) {
189   fprintf(stderr, "%s: file's header claims there are %zu"
190           " load commands, but load command #%zu",
191           filename_.c_str(), claimed, i);
192   if (type) fprintf(stderr, ", of type %d,", type);
193   fprintf(stderr, " extends beyond the end of the load command region\n");
194 }
195 
LoadCommandTooShort(size_t i,LoadCommandType type)196 void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) {
197   fprintf(stderr, "%s: the contents of load command #%zu, of type %d,"
198           " extend beyond the size given in the load command's header\n",
199           filename_.c_str(), i, type);
200 }
201 
SectionsMissing(const string & name)202 void Reader::Reporter::SectionsMissing(const string& name) {
203   fprintf(stderr, "%s: the load command for segment '%s'"
204           " is too short to hold the section headers it claims to have\n",
205           filename_.c_str(), name.c_str());
206 }
207 
MisplacedSegmentData(const string & name)208 void Reader::Reporter::MisplacedSegmentData(const string& name) {
209   fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond"
210           " the end of the file\n", filename_.c_str(), name.c_str());
211 }
212 
MisplacedSectionData(const string & section,const string & segment)213 void Reader::Reporter::MisplacedSectionData(const string& section,
214                                             const string& segment) {
215   fprintf(stderr, "%s: the section '%s' in segment '%s'"
216           " claims its contents lie outside the segment's contents\n",
217           filename_.c_str(), section.c_str(), segment.c_str());
218 }
219 
MisplacedSymbolTable()220 void Reader::Reporter::MisplacedSymbolTable() {
221   fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol"
222           " table's contents are located beyond the end of the file\n",
223           filename_.c_str());
224 }
225 
UnsupportedCPUType(cpu_type_t cpu_type)226 void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) {
227   fprintf(stderr, "%s: CPU type %d is not supported\n",
228           filename_.c_str(), cpu_type);
229 }
230 
Read(const uint8_t * buffer,size_t size,cpu_type_t expected_cpu_type,cpu_subtype_t expected_cpu_subtype)231 bool Reader::Read(const uint8_t* buffer,
232                   size_t size,
233                   cpu_type_t expected_cpu_type,
234                   cpu_subtype_t expected_cpu_subtype) {
235   assert(!buffer_.start);
236   buffer_.start = buffer;
237   buffer_.end = buffer + size;
238   ByteCursor cursor(&buffer_, true);
239   uint32_t magic;
240   if (!(cursor >> magic)) {
241     reporter_->HeaderTruncated();
242     return false;
243   }
244 
245   if (expected_cpu_type != CPU_TYPE_ANY) {
246     uint32_t expected_magic;
247     // validate that magic matches the expected cpu type
248     switch (expected_cpu_type) {
249       case CPU_TYPE_ARM:
250       case CPU_TYPE_I386:
251         expected_magic = MH_CIGAM;
252         break;
253       case CPU_TYPE_POWERPC:
254         expected_magic = MH_MAGIC;
255         break;
256       case CPU_TYPE_ARM_64:
257       case CPU_TYPE_X86_64:
258         expected_magic = MH_CIGAM_64;
259         break;
260       case CPU_TYPE_POWERPC64:
261         expected_magic = MH_MAGIC_64;
262         break;
263       default:
264         reporter_->UnsupportedCPUType(expected_cpu_type);
265         return false;
266     }
267 
268     if (expected_magic != magic) {
269       reporter_->BadHeader();
270       return false;
271     }
272   }
273 
274   // Since the byte cursor is in big-endian mode, a reversed magic number
275   // always indicates a little-endian file, regardless of our own endianness.
276   switch (magic) {
277     case MH_MAGIC:    big_endian_ = true;  bits_64_ = false; break;
278     case MH_CIGAM:    big_endian_ = false; bits_64_ = false; break;
279     case MH_MAGIC_64: big_endian_ = true;  bits_64_ = true;  break;
280     case MH_CIGAM_64: big_endian_ = false; bits_64_ = true;  break;
281     default:
282       reporter_->BadHeader();
283       return false;
284   }
285   cursor.set_big_endian(big_endian_);
286   uint32_t commands_size, reserved;
287   cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_
288          >> commands_size >> flags_;
289   if (bits_64_)
290     cursor >> reserved;
291   if (!cursor) {
292     reporter_->HeaderTruncated();
293     return false;
294   }
295 
296   if (expected_cpu_type != CPU_TYPE_ANY &&
297       (expected_cpu_type != cpu_type_ ||
298        expected_cpu_subtype != cpu_subtype_)) {
299     reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_,
300                               expected_cpu_type, expected_cpu_subtype);
301     return false;
302   }
303 
304   cursor
305       .PointTo(&load_commands_.start, commands_size)
306       .PointTo(&load_commands_.end, 0);
307   if (!cursor) {
308     reporter_->LoadCommandRegionTruncated();
309     return false;
310   }
311 
312   return true;
313 }
314 
WalkLoadCommands(Reader::LoadCommandHandler * handler) const315 bool Reader::WalkLoadCommands(Reader::LoadCommandHandler* handler) const {
316   ByteCursor list_cursor(&load_commands_, big_endian_);
317 
318   for (size_t index = 0; index < load_command_count_; ++index) {
319     // command refers to this load command alone, so that cursor will
320     // refuse to read past the load command's end. But since we haven't
321     // read the size yet, let command initially refer to the entire
322     // remainder of the load command series.
323     ByteBuffer command(list_cursor.here(), list_cursor.Available());
324     ByteCursor cursor(&command, big_endian_);
325 
326     // Read the command type and size --- fields common to all commands.
327     uint32_t type, size;
328     if (!(cursor >> type)) {
329       reporter_->LoadCommandsOverrun(load_command_count_, index, 0);
330       return false;
331     }
332     if (!(cursor >> size) || size > command.Size()) {
333       reporter_->LoadCommandsOverrun(load_command_count_, index, type);
334       return false;
335     }
336 
337     // Now that we've read the length, restrict command's range to this
338     // load command only.
339     command.end = command.start + size;
340 
341     switch (type) {
342       case LC_SEGMENT:
343       case LC_SEGMENT_64: {
344         Segment segment;
345         segment.bits_64 = (type == LC_SEGMENT_64);
346         size_t word_size = segment.bits_64 ? 8 : 4;
347         cursor.CString(&segment.name, 16);
348         cursor
349             .Read(word_size, false, &segment.vmaddr)
350             .Read(word_size, false, &segment.vmsize)
351             .Read(word_size, false, &segment.fileoff)
352             .Read(word_size, false, &segment.filesize);
353         cursor >> segment.maxprot
354                >> segment.initprot
355                >> segment.nsects
356                >> segment.flags;
357         if (!cursor) {
358           reporter_->LoadCommandTooShort(index, type);
359           return false;
360         }
361         if (segment.fileoff > buffer_.Size() ||
362             segment.filesize > buffer_.Size() - segment.fileoff) {
363           reporter_->MisplacedSegmentData(segment.name);
364           return false;
365         }
366         // Mach-O files in .dSYM bundles have the contents of the loaded
367         // segments removed, and their file offsets and file sizes zeroed
368         // out. To help us handle this special case properly, give such
369         // segments' contents NULL starting and ending pointers.
370         if (segment.fileoff == 0 && segment.filesize == 0) {
371           segment.contents.start = segment.contents.end = NULL;
372         } else {
373           segment.contents.start = buffer_.start + segment.fileoff;
374           segment.contents.end = segment.contents.start + segment.filesize;
375         }
376         // The section list occupies the remainder of this load command's space.
377         segment.section_list.start = cursor.here();
378         segment.section_list.end = command.end;
379 
380         if (!handler->SegmentCommand(segment))
381           return false;
382         break;
383       }
384 
385       case LC_SYMTAB: {
386         uint32_t symoff, nsyms, stroff, strsize;
387         cursor >> symoff >> nsyms >> stroff >> strsize;
388         if (!cursor) {
389           reporter_->LoadCommandTooShort(index, type);
390           return false;
391         }
392         // How big are the entries in the symbol table?
393         // sizeof(struct nlist_64) : sizeof(struct nlist),
394         // but be paranoid about alignment vs. target architecture.
395         size_t symbol_size = bits_64_ ? 16 : 12;
396         // How big is the entire symbol array?
397         size_t symbols_size = nsyms * symbol_size;
398         if (symoff > buffer_.Size() || symbols_size > buffer_.Size() - symoff ||
399             stroff > buffer_.Size() || strsize > buffer_.Size() - stroff) {
400           reporter_->MisplacedSymbolTable();
401           return false;
402         }
403         ByteBuffer entries(buffer_.start + symoff, symbols_size);
404         ByteBuffer names(buffer_.start + stroff, strsize);
405         if (!handler->SymtabCommand(entries, names))
406           return false;
407         break;
408       }
409 
410       default: {
411         if (!handler->UnknownCommand(type, command))
412           return false;
413         break;
414       }
415     }
416 
417     list_cursor.set_here(command.end);
418   }
419 
420   return true;
421 }
422 
423 // A load command handler that looks for a segment of a given name.
424 class Reader::SegmentFinder : public LoadCommandHandler {
425  public:
426   // Create a load command handler that looks for a segment named NAME,
427   // and sets SEGMENT to describe it if found.
SegmentFinder(const string & name,Segment * segment)428   SegmentFinder(const string& name, Segment* segment)
429       : name_(name), segment_(segment), found_() { }
430 
431   // Return true if the traversal found the segment, false otherwise.
found() const432   bool found() const { return found_; }
433 
SegmentCommand(const Segment & segment)434   bool SegmentCommand(const Segment& segment) {
435     if (segment.name == name_) {
436       *segment_ = segment;
437       found_ = true;
438       return false;
439     }
440     return true;
441   }
442 
443  private:
444   // The name of the segment our creator is looking for.
445   const string& name_;
446 
447   // Where we should store the segment if found. (WEAK)
448   Segment* segment_;
449 
450   // True if we found the segment.
451   bool found_;
452 };
453 
FindSegment(const string & name,Segment * segment) const454 bool Reader::FindSegment(const string& name, Segment* segment) const {
455   SegmentFinder finder(name, segment);
456   WalkLoadCommands(&finder);
457   return finder.found();
458 }
459 
WalkSegmentSections(const Segment & segment,SectionHandler * handler) const460 bool Reader::WalkSegmentSections(const Segment& segment,
461                                  SectionHandler* handler) const {
462   size_t word_size = segment.bits_64 ? 8 : 4;
463   ByteCursor cursor(&segment.section_list, big_endian_);
464 
465   for (size_t i = 0; i < segment.nsects; i++) {
466     Section section;
467     section.bits_64 = segment.bits_64;
468     uint64_t size, offset;
469     uint32_t dummy32;
470     cursor
471         .CString(&section.section_name, 16)
472         .CString(&section.segment_name, 16)
473         .Read(word_size, false, &section.address)
474         .Read(word_size, false, &size)
475         .Read(sizeof(uint32_t), false, &offset)  // clears high bits of |offset|
476         >> section.align
477         >> dummy32
478         >> dummy32
479         >> section.flags
480         >> dummy32
481         >> dummy32;
482     if (section.bits_64)
483       cursor >> dummy32;
484     if (!cursor) {
485       reporter_->SectionsMissing(segment.name);
486       return false;
487     }
488 
489     // Even 64-bit Mach-O isn’t a true 64-bit format in that it doesn’t handle
490     // 64-bit file offsets gracefully. Segment load commands do contain 64-bit
491     // file offsets, but sections within do not. Because segments load
492     // contiguously, recompute each section’s file offset on the basis of its
493     // containing segment’s file offset and the difference between the section’s
494     // and segment’s load addresses. If truncation is detected, honor the
495     // recomputed offset.
496     if (segment.bits_64 &&
497         segment.fileoff + segment.filesize >
498             std::numeric_limits<uint32_t>::max()) {
499       const uint64_t section_offset_recomputed =
500           segment.fileoff + section.address - segment.vmaddr;
501       if (offset == static_cast<uint32_t>(section_offset_recomputed)) {
502         offset = section_offset_recomputed;
503       }
504     }
505 
506     const uint32_t section_type = section.flags & SECTION_TYPE;
507     if (section_type == S_ZEROFILL || section_type == S_THREAD_LOCAL_ZEROFILL ||
508             section_type == S_GB_ZEROFILL) {
509       // Zero-fill sections have a size, but no contents.
510       section.contents.start = section.contents.end = NULL;
511     } else if (segment.contents.start == NULL &&
512                segment.contents.end == NULL) {
513       // Mach-O files in .dSYM bundles have the contents of the loaded
514       // segments removed, and their file offsets and file sizes zeroed
515       // out.  However, the sections within those segments still have
516       // non-zero sizes.  There's no reason to call MisplacedSectionData in
517       // this case; the caller may just need the section's load
518       // address. But do set the contents' limits to NULL, for safety.
519       section.contents.start = section.contents.end = NULL;
520     } else {
521       if (offset < size_t(segment.contents.start - buffer_.start) ||
522           offset > size_t(segment.contents.end - buffer_.start) ||
523           size > size_t(segment.contents.end - buffer_.start - offset)) {
524         if (offset > 0) {
525           reporter_->MisplacedSectionData(section.section_name,
526                                           section.segment_name);
527           return false;
528         } else {
529           // Mach-O files in .dSYM bundles have the contents of the loaded
530           // segments partially removed. The removed sections will have zero as
531           // their offset. MisplacedSectionData should not be called in this
532           // case.
533           section.contents.start = section.contents.end = NULL;
534         }
535       } else {
536         section.contents.start = buffer_.start + offset;
537         section.contents.end = section.contents.start + size;
538       }
539     }
540     if (!handler->HandleSection(section))
541       return false;
542   }
543   return true;
544 }
545 
546 // A SectionHandler that builds a SectionMap for the sections within a
547 // given segment.
548 class Reader::SectionMapper: public SectionHandler {
549  public:
550   // Create a SectionHandler that populates MAP with an entry for
551   // each section it is given.
SectionMapper(SectionMap * map)552   SectionMapper(SectionMap* map) : map_(map) { }
HandleSection(const Section & section)553   bool HandleSection(const Section& section) {
554     (*map_)[section.section_name] = section;
555     return true;
556   }
557  private:
558   // The map under construction. (WEAK)
559   SectionMap* map_;
560 };
561 
MapSegmentSections(const Segment & segment,SectionMap * section_map) const562 bool Reader::MapSegmentSections(const Segment& segment,
563                                 SectionMap* section_map) const {
564   section_map->clear();
565   SectionMapper mapper(section_map);
566   return WalkSegmentSections(segment, &mapper);
567 }
568 
569 }  // namespace mach_o
570 }  // namespace google_breakpad
571