1 // Copyright 2010 Google LLC
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 // * Neither the name of Google LLC nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 // Original author: Jim Blandy <[email protected]> <[email protected]>
30
31 // macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and
32 // google_breakpad::Mach_O::Reader. See macho_reader.h for details.
33
34 #ifdef HAVE_CONFIG_H
35 #include <config.h> // Must come first
36 #endif
37
38 #include "common/mac/macho_reader.h"
39
40 #include <assert.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43
44 #include <limits>
45
46 // Unfortunately, CPU_TYPE_ARM is not define for 10.4.
47 #if !defined(CPU_TYPE_ARM)
48 #define CPU_TYPE_ARM 12
49 #endif
50
51 #if !defined(CPU_TYPE_ARM_64)
52 #define CPU_TYPE_ARM_64 16777228
53 #endif
54
55 namespace google_breakpad {
56 namespace mach_o {
57
58 // If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its
59 // arguments, so you can't place expressions that do necessary work in
60 // the argument of an assert. Nor can you assign the result of the
61 // expression to a variable and assert that the variable's value is
62 // true: you'll get unused variable warnings when NDEBUG is #defined.
63 //
64 // ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that
65 // the result is true if NDEBUG is not #defined.
66 #if defined(NDEBUG)
67 #define ASSERT_ALWAYS_EVAL(x) (x)
68 #else
69 #define ASSERT_ALWAYS_EVAL(x) assert(x)
70 #endif
71
BadHeader()72 void FatReader::Reporter::BadHeader() {
73 fprintf(stderr, "%s: file is neither a fat binary file"
74 " nor a Mach-O object file\n", filename_.c_str());
75 }
76
TooShort()77 void FatReader::Reporter::TooShort() {
78 fprintf(stderr, "%s: file too short for the data it claims to contain\n",
79 filename_.c_str());
80 }
81
MisplacedObjectFile()82 void FatReader::Reporter::MisplacedObjectFile() {
83 fprintf(stderr, "%s: file too short for the object files it claims"
84 " to contain\n", filename_.c_str());
85 }
86
Read(const uint8_t * buffer,size_t size)87 bool FatReader::Read(const uint8_t* buffer, size_t size) {
88 buffer_.start = buffer;
89 buffer_.end = buffer + size;
90 ByteCursor cursor(&buffer_);
91
92 // Fat binaries always use big-endian, so read the magic number in
93 // that endianness. To recognize Mach-O magic numbers, which can use
94 // either endianness, check for both the proper and reversed forms
95 // of the magic numbers.
96 cursor.set_big_endian(true);
97 if (cursor >> magic_) {
98 if (magic_ == FAT_MAGIC) {
99 // How many object files does this fat binary contain?
100 uint32_t object_files_count;
101 if (!(cursor >> object_files_count)) { // nfat_arch
102 reporter_->TooShort();
103 return false;
104 }
105
106 // Read the list of object files.
107 object_files_.resize(object_files_count);
108 for (size_t i = 0; i < object_files_count; i++) {
109 struct fat_arch objfile;
110
111 // Read this object file entry, byte-swapping as appropriate.
112 cursor >> objfile.cputype
113 >> objfile.cpusubtype
114 >> objfile.offset
115 >> objfile.size
116 >> objfile.align;
117
118 SuperFatArch super_fat_arch(objfile);
119 object_files_[i] = super_fat_arch;
120
121 if (!cursor) {
122 reporter_->TooShort();
123 return false;
124 }
125 // Does the file actually have the bytes this entry refers to?
126 size_t fat_size = buffer_.Size();
127 if (objfile.offset > fat_size ||
128 objfile.size > fat_size - objfile.offset) {
129 reporter_->MisplacedObjectFile();
130 return false;
131 }
132 }
133
134 return true;
135 } else if (magic_ == MH_MAGIC || magic_ == MH_MAGIC_64 ||
136 magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) {
137 // If this is a little-endian Mach-O file, fix the cursor's endianness.
138 if (magic_ == MH_CIGAM || magic_ == MH_CIGAM_64)
139 cursor.set_big_endian(false);
140 // Record the entire file as a single entry in the object file list.
141 object_files_.resize(1);
142
143 // Get the cpu type and subtype from the Mach-O header.
144 if (!(cursor >> object_files_[0].cputype
145 >> object_files_[0].cpusubtype)) {
146 reporter_->TooShort();
147 return false;
148 }
149
150 object_files_[0].offset = 0;
151 object_files_[0].size = static_cast<uint64_t>(buffer_.Size());
152 // This alignment is correct for 32 and 64-bit x86 and ppc.
153 // See get_align in the lipo source for other architectures:
154 // http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c
155 object_files_[0].align = 12; // 2^12 == 4096
156 return true;
157 }
158 }
159 reporter_->BadHeader();
160 return false;
161 }
162
BadHeader()163 void Reader::Reporter::BadHeader() {
164 fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str());
165 }
166
CPUTypeMismatch(cpu_type_t cpu_type,cpu_subtype_t cpu_subtype,cpu_type_t expected_cpu_type,cpu_subtype_t expected_cpu_subtype)167 void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type,
168 cpu_subtype_t cpu_subtype,
169 cpu_type_t expected_cpu_type,
170 cpu_subtype_t expected_cpu_subtype) {
171 fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected"
172 " type %d, subtype %d\n",
173 filename_.c_str(), cpu_type, cpu_subtype,
174 expected_cpu_type, expected_cpu_subtype);
175 }
176
HeaderTruncated()177 void Reader::Reporter::HeaderTruncated() {
178 fprintf(stderr, "%s: file does not contain a complete Mach-O header\n",
179 filename_.c_str());
180 }
181
LoadCommandRegionTruncated()182 void Reader::Reporter::LoadCommandRegionTruncated() {
183 fprintf(stderr, "%s: file too short to hold load command region"
184 " given in Mach-O header\n", filename_.c_str());
185 }
186
LoadCommandsOverrun(size_t claimed,size_t i,LoadCommandType type)187 void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i,
188 LoadCommandType type) {
189 fprintf(stderr, "%s: file's header claims there are %zu"
190 " load commands, but load command #%zu",
191 filename_.c_str(), claimed, i);
192 if (type) fprintf(stderr, ", of type %d,", type);
193 fprintf(stderr, " extends beyond the end of the load command region\n");
194 }
195
LoadCommandTooShort(size_t i,LoadCommandType type)196 void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) {
197 fprintf(stderr, "%s: the contents of load command #%zu, of type %d,"
198 " extend beyond the size given in the load command's header\n",
199 filename_.c_str(), i, type);
200 }
201
SectionsMissing(const string & name)202 void Reader::Reporter::SectionsMissing(const string& name) {
203 fprintf(stderr, "%s: the load command for segment '%s'"
204 " is too short to hold the section headers it claims to have\n",
205 filename_.c_str(), name.c_str());
206 }
207
MisplacedSegmentData(const string & name)208 void Reader::Reporter::MisplacedSegmentData(const string& name) {
209 fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond"
210 " the end of the file\n", filename_.c_str(), name.c_str());
211 }
212
MisplacedSectionData(const string & section,const string & segment)213 void Reader::Reporter::MisplacedSectionData(const string& section,
214 const string& segment) {
215 fprintf(stderr, "%s: the section '%s' in segment '%s'"
216 " claims its contents lie outside the segment's contents\n",
217 filename_.c_str(), section.c_str(), segment.c_str());
218 }
219
MisplacedSymbolTable()220 void Reader::Reporter::MisplacedSymbolTable() {
221 fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol"
222 " table's contents are located beyond the end of the file\n",
223 filename_.c_str());
224 }
225
UnsupportedCPUType(cpu_type_t cpu_type)226 void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) {
227 fprintf(stderr, "%s: CPU type %d is not supported\n",
228 filename_.c_str(), cpu_type);
229 }
230
Read(const uint8_t * buffer,size_t size,cpu_type_t expected_cpu_type,cpu_subtype_t expected_cpu_subtype)231 bool Reader::Read(const uint8_t* buffer,
232 size_t size,
233 cpu_type_t expected_cpu_type,
234 cpu_subtype_t expected_cpu_subtype) {
235 assert(!buffer_.start);
236 buffer_.start = buffer;
237 buffer_.end = buffer + size;
238 ByteCursor cursor(&buffer_, true);
239 uint32_t magic;
240 if (!(cursor >> magic)) {
241 reporter_->HeaderTruncated();
242 return false;
243 }
244
245 if (expected_cpu_type != CPU_TYPE_ANY) {
246 uint32_t expected_magic;
247 // validate that magic matches the expected cpu type
248 switch (expected_cpu_type) {
249 case CPU_TYPE_ARM:
250 case CPU_TYPE_I386:
251 expected_magic = MH_CIGAM;
252 break;
253 case CPU_TYPE_POWERPC:
254 expected_magic = MH_MAGIC;
255 break;
256 case CPU_TYPE_ARM_64:
257 case CPU_TYPE_X86_64:
258 expected_magic = MH_CIGAM_64;
259 break;
260 case CPU_TYPE_POWERPC64:
261 expected_magic = MH_MAGIC_64;
262 break;
263 default:
264 reporter_->UnsupportedCPUType(expected_cpu_type);
265 return false;
266 }
267
268 if (expected_magic != magic) {
269 reporter_->BadHeader();
270 return false;
271 }
272 }
273
274 // Since the byte cursor is in big-endian mode, a reversed magic number
275 // always indicates a little-endian file, regardless of our own endianness.
276 switch (magic) {
277 case MH_MAGIC: big_endian_ = true; bits_64_ = false; break;
278 case MH_CIGAM: big_endian_ = false; bits_64_ = false; break;
279 case MH_MAGIC_64: big_endian_ = true; bits_64_ = true; break;
280 case MH_CIGAM_64: big_endian_ = false; bits_64_ = true; break;
281 default:
282 reporter_->BadHeader();
283 return false;
284 }
285 cursor.set_big_endian(big_endian_);
286 uint32_t commands_size, reserved;
287 cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_
288 >> commands_size >> flags_;
289 if (bits_64_)
290 cursor >> reserved;
291 if (!cursor) {
292 reporter_->HeaderTruncated();
293 return false;
294 }
295
296 if (expected_cpu_type != CPU_TYPE_ANY &&
297 (expected_cpu_type != cpu_type_ ||
298 expected_cpu_subtype != cpu_subtype_)) {
299 reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_,
300 expected_cpu_type, expected_cpu_subtype);
301 return false;
302 }
303
304 cursor
305 .PointTo(&load_commands_.start, commands_size)
306 .PointTo(&load_commands_.end, 0);
307 if (!cursor) {
308 reporter_->LoadCommandRegionTruncated();
309 return false;
310 }
311
312 return true;
313 }
314
WalkLoadCommands(Reader::LoadCommandHandler * handler) const315 bool Reader::WalkLoadCommands(Reader::LoadCommandHandler* handler) const {
316 ByteCursor list_cursor(&load_commands_, big_endian_);
317
318 for (size_t index = 0; index < load_command_count_; ++index) {
319 // command refers to this load command alone, so that cursor will
320 // refuse to read past the load command's end. But since we haven't
321 // read the size yet, let command initially refer to the entire
322 // remainder of the load command series.
323 ByteBuffer command(list_cursor.here(), list_cursor.Available());
324 ByteCursor cursor(&command, big_endian_);
325
326 // Read the command type and size --- fields common to all commands.
327 uint32_t type, size;
328 if (!(cursor >> type)) {
329 reporter_->LoadCommandsOverrun(load_command_count_, index, 0);
330 return false;
331 }
332 if (!(cursor >> size) || size > command.Size()) {
333 reporter_->LoadCommandsOverrun(load_command_count_, index, type);
334 return false;
335 }
336
337 // Now that we've read the length, restrict command's range to this
338 // load command only.
339 command.end = command.start + size;
340
341 switch (type) {
342 case LC_SEGMENT:
343 case LC_SEGMENT_64: {
344 Segment segment;
345 segment.bits_64 = (type == LC_SEGMENT_64);
346 size_t word_size = segment.bits_64 ? 8 : 4;
347 cursor.CString(&segment.name, 16);
348 cursor
349 .Read(word_size, false, &segment.vmaddr)
350 .Read(word_size, false, &segment.vmsize)
351 .Read(word_size, false, &segment.fileoff)
352 .Read(word_size, false, &segment.filesize);
353 cursor >> segment.maxprot
354 >> segment.initprot
355 >> segment.nsects
356 >> segment.flags;
357 if (!cursor) {
358 reporter_->LoadCommandTooShort(index, type);
359 return false;
360 }
361 if (segment.fileoff > buffer_.Size() ||
362 segment.filesize > buffer_.Size() - segment.fileoff) {
363 reporter_->MisplacedSegmentData(segment.name);
364 return false;
365 }
366 // Mach-O files in .dSYM bundles have the contents of the loaded
367 // segments removed, and their file offsets and file sizes zeroed
368 // out. To help us handle this special case properly, give such
369 // segments' contents NULL starting and ending pointers.
370 if (segment.fileoff == 0 && segment.filesize == 0) {
371 segment.contents.start = segment.contents.end = NULL;
372 } else {
373 segment.contents.start = buffer_.start + segment.fileoff;
374 segment.contents.end = segment.contents.start + segment.filesize;
375 }
376 // The section list occupies the remainder of this load command's space.
377 segment.section_list.start = cursor.here();
378 segment.section_list.end = command.end;
379
380 if (!handler->SegmentCommand(segment))
381 return false;
382 break;
383 }
384
385 case LC_SYMTAB: {
386 uint32_t symoff, nsyms, stroff, strsize;
387 cursor >> symoff >> nsyms >> stroff >> strsize;
388 if (!cursor) {
389 reporter_->LoadCommandTooShort(index, type);
390 return false;
391 }
392 // How big are the entries in the symbol table?
393 // sizeof(struct nlist_64) : sizeof(struct nlist),
394 // but be paranoid about alignment vs. target architecture.
395 size_t symbol_size = bits_64_ ? 16 : 12;
396 // How big is the entire symbol array?
397 size_t symbols_size = nsyms * symbol_size;
398 if (symoff > buffer_.Size() || symbols_size > buffer_.Size() - symoff ||
399 stroff > buffer_.Size() || strsize > buffer_.Size() - stroff) {
400 reporter_->MisplacedSymbolTable();
401 return false;
402 }
403 ByteBuffer entries(buffer_.start + symoff, symbols_size);
404 ByteBuffer names(buffer_.start + stroff, strsize);
405 if (!handler->SymtabCommand(entries, names))
406 return false;
407 break;
408 }
409
410 default: {
411 if (!handler->UnknownCommand(type, command))
412 return false;
413 break;
414 }
415 }
416
417 list_cursor.set_here(command.end);
418 }
419
420 return true;
421 }
422
423 // A load command handler that looks for a segment of a given name.
424 class Reader::SegmentFinder : public LoadCommandHandler {
425 public:
426 // Create a load command handler that looks for a segment named NAME,
427 // and sets SEGMENT to describe it if found.
SegmentFinder(const string & name,Segment * segment)428 SegmentFinder(const string& name, Segment* segment)
429 : name_(name), segment_(segment), found_() { }
430
431 // Return true if the traversal found the segment, false otherwise.
found() const432 bool found() const { return found_; }
433
SegmentCommand(const Segment & segment)434 bool SegmentCommand(const Segment& segment) {
435 if (segment.name == name_) {
436 *segment_ = segment;
437 found_ = true;
438 return false;
439 }
440 return true;
441 }
442
443 private:
444 // The name of the segment our creator is looking for.
445 const string& name_;
446
447 // Where we should store the segment if found. (WEAK)
448 Segment* segment_;
449
450 // True if we found the segment.
451 bool found_;
452 };
453
FindSegment(const string & name,Segment * segment) const454 bool Reader::FindSegment(const string& name, Segment* segment) const {
455 SegmentFinder finder(name, segment);
456 WalkLoadCommands(&finder);
457 return finder.found();
458 }
459
WalkSegmentSections(const Segment & segment,SectionHandler * handler) const460 bool Reader::WalkSegmentSections(const Segment& segment,
461 SectionHandler* handler) const {
462 size_t word_size = segment.bits_64 ? 8 : 4;
463 ByteCursor cursor(&segment.section_list, big_endian_);
464
465 for (size_t i = 0; i < segment.nsects; i++) {
466 Section section;
467 section.bits_64 = segment.bits_64;
468 uint64_t size, offset;
469 uint32_t dummy32;
470 cursor
471 .CString(§ion.section_name, 16)
472 .CString(§ion.segment_name, 16)
473 .Read(word_size, false, §ion.address)
474 .Read(word_size, false, &size)
475 .Read(sizeof(uint32_t), false, &offset) // clears high bits of |offset|
476 >> section.align
477 >> dummy32
478 >> dummy32
479 >> section.flags
480 >> dummy32
481 >> dummy32;
482 if (section.bits_64)
483 cursor >> dummy32;
484 if (!cursor) {
485 reporter_->SectionsMissing(segment.name);
486 return false;
487 }
488
489 // Even 64-bit Mach-O isn’t a true 64-bit format in that it doesn’t handle
490 // 64-bit file offsets gracefully. Segment load commands do contain 64-bit
491 // file offsets, but sections within do not. Because segments load
492 // contiguously, recompute each section’s file offset on the basis of its
493 // containing segment’s file offset and the difference between the section’s
494 // and segment’s load addresses. If truncation is detected, honor the
495 // recomputed offset.
496 if (segment.bits_64 &&
497 segment.fileoff + segment.filesize >
498 std::numeric_limits<uint32_t>::max()) {
499 const uint64_t section_offset_recomputed =
500 segment.fileoff + section.address - segment.vmaddr;
501 if (offset == static_cast<uint32_t>(section_offset_recomputed)) {
502 offset = section_offset_recomputed;
503 }
504 }
505
506 const uint32_t section_type = section.flags & SECTION_TYPE;
507 if (section_type == S_ZEROFILL || section_type == S_THREAD_LOCAL_ZEROFILL ||
508 section_type == S_GB_ZEROFILL) {
509 // Zero-fill sections have a size, but no contents.
510 section.contents.start = section.contents.end = NULL;
511 } else if (segment.contents.start == NULL &&
512 segment.contents.end == NULL) {
513 // Mach-O files in .dSYM bundles have the contents of the loaded
514 // segments removed, and their file offsets and file sizes zeroed
515 // out. However, the sections within those segments still have
516 // non-zero sizes. There's no reason to call MisplacedSectionData in
517 // this case; the caller may just need the section's load
518 // address. But do set the contents' limits to NULL, for safety.
519 section.contents.start = section.contents.end = NULL;
520 } else {
521 if (offset < size_t(segment.contents.start - buffer_.start) ||
522 offset > size_t(segment.contents.end - buffer_.start) ||
523 size > size_t(segment.contents.end - buffer_.start - offset)) {
524 if (offset > 0) {
525 reporter_->MisplacedSectionData(section.section_name,
526 section.segment_name);
527 return false;
528 } else {
529 // Mach-O files in .dSYM bundles have the contents of the loaded
530 // segments partially removed. The removed sections will have zero as
531 // their offset. MisplacedSectionData should not be called in this
532 // case.
533 section.contents.start = section.contents.end = NULL;
534 }
535 } else {
536 section.contents.start = buffer_.start + offset;
537 section.contents.end = section.contents.start + size;
538 }
539 }
540 if (!handler->HandleSection(section))
541 return false;
542 }
543 return true;
544 }
545
546 // A SectionHandler that builds a SectionMap for the sections within a
547 // given segment.
548 class Reader::SectionMapper: public SectionHandler {
549 public:
550 // Create a SectionHandler that populates MAP with an entry for
551 // each section it is given.
SectionMapper(SectionMap * map)552 SectionMapper(SectionMap* map) : map_(map) { }
HandleSection(const Section & section)553 bool HandleSection(const Section& section) {
554 (*map_)[section.section_name] = section;
555 return true;
556 }
557 private:
558 // The map under construction. (WEAK)
559 SectionMap* map_;
560 };
561
MapSegmentSections(const Segment & segment,SectionMap * section_map) const562 bool Reader::MapSegmentSections(const Segment& segment,
563 SectionMap* section_map) const {
564 section_map->clear();
565 SectionMapper mapper(section_map);
566 return WalkSegmentSections(segment, &mapper);
567 }
568
569 } // namespace mach_o
570 } // namespace google_breakpad
571