xref: /aosp_15_r20/external/google-breakpad/src/client/mac/handler/dynamic_images.cc (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1 // Copyright 2007 Google LLC
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 //     * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 //     * Neither the name of Google LLC nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 #ifdef HAVE_CONFIG_H
30 #include <config.h>  // Must come first
31 #endif
32 
33 #include "client/mac/handler/dynamic_images.h"
34 
35 extern "C" { // needed to compile on Leopard
36   #include <mach-o/nlist.h>
37   #include <stdlib.h>
38   #include <stdio.h>
39 }
40 
41 #include <assert.h>
42 #include <AvailabilityMacros.h>
43 #include <dlfcn.h>
44 #include <mach/task_info.h>
45 #include <sys/sysctl.h>
46 #include <TargetConditionals.h>
47 #include <unistd.h>
48 
49 #include <algorithm>
50 #include <string>
51 #include <vector>
52 
53 #include "breakpad_nlist_64.h"
54 
55 #if !TARGET_OS_IPHONE
56 #include <CoreServices/CoreServices.h>
57 
58 #ifndef MAC_OS_X_VERSION_10_6
59 #define MAC_OS_X_VERSION_10_6 1060
60 #endif
61 
62 #if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_6
63 
64 // Fallback declarations for TASK_DYLD_INFO and friends, introduced in
65 // <mach/task_info.h> in the Mac OS X 10.6 SDK.
66 #define TASK_DYLD_INFO 17
67 struct task_dyld_info {
68   mach_vm_address_t all_image_info_addr;
69   mach_vm_size_t all_image_info_size;
70 };
71 typedef struct task_dyld_info task_dyld_info_data_t;
72 typedef struct task_dyld_info* task_dyld_info_t;
73 #define TASK_DYLD_INFO_COUNT (sizeof(task_dyld_info_data_t) / sizeof(natural_t))
74 
75 #endif
76 
77 #endif  // !TARGET_OS_IPHONE
78 
79 namespace google_breakpad {
80 
81 using std::string;
82 using std::vector;
83 
84 //==============================================================================
85 // Returns the size of the memory region containing |address| and the
86 // number of bytes from |address| to the end of the region.
87 // We potentially, will extend the size of the original
88 // region by the size of the following region if it's contiguous with the
89 // first in order to handle cases when we're reading strings and they
90 // straddle two vm regions.
91 //
GetMemoryRegionSize(task_port_t target_task,const uint64_t address,mach_vm_size_t * size_to_end)92 static mach_vm_size_t GetMemoryRegionSize(task_port_t target_task,
93                                           const uint64_t address,
94                                           mach_vm_size_t* size_to_end) {
95   mach_vm_address_t region_base = (mach_vm_address_t)address;
96   mach_vm_size_t region_size;
97   natural_t nesting_level = 0;
98   vm_region_submap_info_64 submap_info;
99   mach_msg_type_number_t info_count = VM_REGION_SUBMAP_INFO_COUNT_64;
100 
101   // Get information about the vm region containing |address|
102   vm_region_recurse_info_t region_info;
103   region_info = reinterpret_cast<vm_region_recurse_info_t>(&submap_info);
104 
105   kern_return_t result =
106     mach_vm_region_recurse(target_task,
107                            &region_base,
108                            &region_size,
109                            &nesting_level,
110                            region_info,
111                            &info_count);
112 
113   if (result == KERN_SUCCESS) {
114     // Get distance from |address| to the end of this region
115     *size_to_end = region_base + region_size -(mach_vm_address_t)address;
116 
117     // If we want to handle strings as long as 4096 characters we may need
118     // to check if there's a vm region immediately following the first one.
119     // If so, we need to extend |*size_to_end| to go all the way to the end
120     // of the second region.
121     if (*size_to_end < 4096) {
122       // Second region starts where the first one ends
123       mach_vm_address_t region_base2 =
124         (mach_vm_address_t)(region_base + region_size);
125       mach_vm_size_t region_size2;
126 
127       // Get information about the following vm region
128       result =
129         mach_vm_region_recurse(target_task,
130                                &region_base2,
131                                &region_size2,
132                                &nesting_level,
133                                region_info,
134                                &info_count);
135 
136       // Extend region_size to go all the way to the end of the 2nd region
137       if (result == KERN_SUCCESS
138           && region_base2 == region_base + region_size) {
139         region_size += region_size2;
140       }
141     }
142 
143     *size_to_end = region_base + region_size -(mach_vm_address_t)address;
144   } else {
145     region_size = 0;
146     *size_to_end = 0;
147   }
148 
149   return region_size;
150 }
151 
152 #define kMaxStringLength 8192
153 //==============================================================================
154 // Reads a NULL-terminated string from another task.
155 //
156 // Warning!  This will not read any strings longer than kMaxStringLength-1
157 //
ReadTaskString(task_port_t target_task,const uint64_t address)158 static string ReadTaskString(task_port_t target_task,
159                              const uint64_t address) {
160   // The problem is we don't know how much to read until we know how long
161   // the string is. And we don't know how long the string is, until we've read
162   // the memory!  So, we'll try to read kMaxStringLength bytes
163   // (or as many bytes as we can until we reach the end of the vm region).
164   mach_vm_size_t size_to_end;
165   GetMemoryRegionSize(target_task, address, &size_to_end);
166 
167   if (size_to_end > 0) {
168     mach_vm_size_t size_to_read =
169       size_to_end > kMaxStringLength ? kMaxStringLength : size_to_end;
170 
171     vector<uint8_t> bytes;
172     if (ReadTaskMemory(target_task, address, (size_t)size_to_read, bytes) !=
173         KERN_SUCCESS)
174       return string();
175 
176     return string(reinterpret_cast<const char*>(&bytes[0]));
177   }
178 
179   return string();
180 }
181 
182 //==============================================================================
183 // Reads an address range from another task. The bytes read will be returned
184 // in bytes, which will be resized as necessary.
ReadTaskMemory(task_port_t target_task,const uint64_t address,size_t length,vector<uint8_t> & bytes)185 kern_return_t ReadTaskMemory(task_port_t target_task,
186                              const uint64_t address,
187                              size_t length,
188                              vector<uint8_t>& bytes) {
189   int systemPageSize = getpagesize();
190 
191   // use the negative of the page size for the mask to find the page address
192   mach_vm_address_t page_address = address & (-systemPageSize);
193 
194   mach_vm_address_t last_page_address =
195       (address + length + (systemPageSize - 1)) & (-systemPageSize);
196 
197   mach_vm_size_t page_size = last_page_address - page_address;
198   uint8_t* local_start;
199   uint32_t local_length;
200 
201   kern_return_t r = mach_vm_read(target_task,
202                                  page_address,
203                                  page_size,
204                                  reinterpret_cast<vm_offset_t*>(&local_start),
205                                  &local_length);
206 
207   if (r != KERN_SUCCESS)
208     return r;
209 
210   bytes.resize(length);
211   memcpy(&bytes[0],
212          &local_start[(mach_vm_address_t)address - page_address],
213          length);
214   mach_vm_deallocate(mach_task_self(), (uintptr_t)local_start, local_length);
215   return KERN_SUCCESS;
216 }
217 
218 #pragma mark -
219 
220 //==============================================================================
221 // Traits structs for specializing function templates to handle
222 // 32-bit/64-bit Mach-O files.
223 struct MachO32 {
224   typedef mach_header mach_header_type;
225   typedef segment_command mach_segment_command_type;
226   typedef dyld_image_info32 dyld_image_info;
227   typedef dyld_all_image_infos32 dyld_all_image_infos;
228   typedef struct nlist nlist_type;
229   static const uint32_t magic = MH_MAGIC;
230   static const uint32_t segment_load_command = LC_SEGMENT;
231 };
232 
233 struct MachO64 {
234   typedef mach_header_64 mach_header_type;
235   typedef segment_command_64 mach_segment_command_type;
236   typedef dyld_image_info64 dyld_image_info;
237   typedef dyld_all_image_infos64 dyld_all_image_infos;
238   typedef struct nlist_64 nlist_type;
239   static const uint32_t magic = MH_MAGIC_64;
240   static const uint32_t segment_load_command = LC_SEGMENT_64;
241 };
242 
243 template<typename MachBits>
FindTextSection(DynamicImage & image)244 bool FindTextSection(DynamicImage& image) {
245   typedef typename MachBits::mach_header_type mach_header_type;
246   typedef typename MachBits::mach_segment_command_type
247       mach_segment_command_type;
248 
249   const mach_header_type* header =
250       reinterpret_cast<const mach_header_type*>(&image.header_[0]);
251 
252   if(header->magic != MachBits::magic) {
253     return false;
254   }
255 
256   const struct load_command* cmd =
257       reinterpret_cast<const struct load_command*>(header + 1);
258 
259   bool found_text_section = false;
260   bool found_dylib_id_command = false;
261   for (unsigned int i = 0; cmd && (i < header->ncmds); ++i) {
262     if (!found_text_section) {
263       if (cmd->cmd == MachBits::segment_load_command) {
264         const mach_segment_command_type* seg =
265             reinterpret_cast<const mach_segment_command_type*>(cmd);
266 
267         if (!strcmp(seg->segname, "__TEXT")) {
268           image.vmaddr_ = static_cast<mach_vm_address_t>(seg->vmaddr);
269           image.vmsize_ = static_cast<mach_vm_size_t>(seg->vmsize);
270           image.slide_ = 0;
271 
272           if (seg->fileoff == 0 && seg->filesize != 0) {
273             image.slide_ =
274                 (uintptr_t)image.GetLoadAddress() - (uintptr_t)seg->vmaddr;
275           }
276           found_text_section = true;
277         }
278       }
279     }
280 
281     if (!found_dylib_id_command) {
282       if (cmd->cmd == LC_ID_DYLIB) {
283         const struct dylib_command* dc =
284             reinterpret_cast<const struct dylib_command*>(cmd);
285 
286         image.version_ = dc->dylib.current_version;
287         found_dylib_id_command = true;
288       }
289     }
290 
291     if (found_dylib_id_command && found_text_section) {
292       return true;
293     }
294 
295     cmd = reinterpret_cast<const struct load_command*>
296         (reinterpret_cast<const char*>(cmd) + cmd->cmdsize);
297   }
298 
299   return false;
300 }
301 
302 //==============================================================================
303 // Initializes vmaddr_, vmsize_, and slide_
CalculateMemoryAndVersionInfo()304 void DynamicImage::CalculateMemoryAndVersionInfo() {
305   // unless we can process the header, ensure that calls to
306   // IsValid() will return false
307   vmaddr_ = 0;
308   vmsize_ = 0;
309   slide_ = 0;
310   version_ = 0;
311 
312   // The function template above does all the real work.
313   if (Is64Bit())
314     FindTextSection<MachO64>(*this);
315   else
316     FindTextSection<MachO32>(*this);
317 }
318 
319 //==============================================================================
320 // The helper function template abstracts the 32/64-bit differences.
321 template<typename MachBits>
GetFileTypeFromHeader(DynamicImage & image)322 uint32_t GetFileTypeFromHeader(DynamicImage& image) {
323   typedef typename MachBits::mach_header_type mach_header_type;
324 
325   const mach_header_type* header =
326       reinterpret_cast<const mach_header_type*>(&image.header_[0]);
327   return header->filetype;
328 }
329 
GetFileType()330 uint32_t DynamicImage::GetFileType() {
331   if (Is64Bit())
332     return GetFileTypeFromHeader<MachO64>(*this);
333 
334   return GetFileTypeFromHeader<MachO32>(*this);
335 }
336 
337 #pragma mark -
338 
339 //==============================================================================
340 // Loads information about dynamically loaded code in the given task.
DynamicImages(mach_port_t task)341 DynamicImages::DynamicImages(mach_port_t task)
342     : task_(task),
343       cpu_type_(DetermineTaskCPUType(task)),
344       image_list_() {
345   ReadImageInfoForTask();
346 }
347 
348 template<typename MachBits>
LookupSymbol(const char * symbol_name,const char * filename,cpu_type_t cpu_type)349 static uint64_t LookupSymbol(const char* symbol_name,
350                              const char* filename,
351                              cpu_type_t cpu_type) {
352   typedef typename MachBits::nlist_type nlist_type;
353 
354   nlist_type symbol_info[8] = {};
355   const char* symbolNames[2] = { symbol_name, "\0" };
356   nlist_type& list = symbol_info[0];
357   int invalidEntriesCount = breakpad_nlist(filename,
358                                            &list,
359                                            symbolNames,
360                                            cpu_type);
361 
362   if(invalidEntriesCount != 0) {
363     return 0;
364   }
365 
366   assert(list.n_value);
367   return list.n_value;
368 }
369 
370 #if TARGET_OS_IPHONE || MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
HasTaskDyldInfo()371 static bool HasTaskDyldInfo() {
372   return true;
373 }
374 #else
GetOSVersionInternal()375 static SInt32 GetOSVersionInternal() {
376   SInt32 os_version = 0;
377   Gestalt(gestaltSystemVersion, &os_version);
378   return os_version;
379 }
380 
GetOSVersion()381 static SInt32 GetOSVersion() {
382   static SInt32 os_version = GetOSVersionInternal();
383   return os_version;
384 }
385 
HasTaskDyldInfo()386 static bool HasTaskDyldInfo() {
387   return GetOSVersion() >= 0x1060;
388 }
389 #endif  // TARGET_OS_IPHONE || MAC_OS_X_VERSION_MIN_REQUIRED >= 10_6
390 
GetDyldAllImageInfosPointer()391 uint64_t DynamicImages::GetDyldAllImageInfosPointer() {
392   if (HasTaskDyldInfo()) {
393     task_dyld_info_data_t task_dyld_info;
394     mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT;
395     if (task_info(task_, TASK_DYLD_INFO, (task_info_t)&task_dyld_info,
396                   &count) != KERN_SUCCESS) {
397       return 0;
398     }
399 
400     return (uint64_t)task_dyld_info.all_image_info_addr;
401   } else {
402     const char* imageSymbolName = "_dyld_all_image_infos";
403     const char* dyldPath = "/usr/lib/dyld";
404 
405     if (Is64Bit())
406       return LookupSymbol<MachO64>(imageSymbolName, dyldPath, cpu_type_);
407     return LookupSymbol<MachO32>(imageSymbolName, dyldPath, cpu_type_);
408   }
409 }
410 
411 //==============================================================================
412 // This code was written using dyld_debug.c (from Darwin) as a guide.
413 
414 template<typename MachBits>
ReadImageInfo(DynamicImages & images,uint64_t image_list_address)415 void ReadImageInfo(DynamicImages& images,
416                    uint64_t image_list_address) {
417   typedef typename MachBits::dyld_image_info dyld_image_info;
418   typedef typename MachBits::dyld_all_image_infos dyld_all_image_infos;
419   typedef typename MachBits::mach_header_type mach_header_type;
420 
421   // Read the structure inside of dyld that contains information about
422   // loaded images.  We're reading from the desired task's address space.
423 
424   // Here we make the assumption that dyld loaded at the same address in
425   // the crashed process vs. this one.  This is an assumption made in
426   // "dyld_debug.c" and is said to be nearly always valid.
427   vector<uint8_t> dyld_all_info_bytes;
428   if (ReadTaskMemory(images.task_,
429                      image_list_address,
430                      sizeof(dyld_all_image_infos),
431                      dyld_all_info_bytes) != KERN_SUCCESS)
432     return;
433 
434   dyld_all_image_infos* dyldInfo =
435     reinterpret_cast<dyld_all_image_infos*>(&dyld_all_info_bytes[0]);
436 
437   // number of loaded images
438   int count = dyldInfo->infoArrayCount;
439 
440   // Read an array of dyld_image_info structures each containing
441   // information about a loaded image.
442   vector<uint8_t> dyld_info_array_bytes;
443     if (ReadTaskMemory(images.task_,
444                        dyldInfo->infoArray,
445                        count * sizeof(dyld_image_info),
446                        dyld_info_array_bytes) != KERN_SUCCESS)
447       return;
448 
449     dyld_image_info* infoArray =
450         reinterpret_cast<dyld_image_info*>(&dyld_info_array_bytes[0]);
451     images.image_list_.reserve(count);
452 
453     for (int i = 0; i < count; ++i) {
454       dyld_image_info& info = infoArray[i];
455 
456       // First read just the mach_header from the image in the task.
457       vector<uint8_t> mach_header_bytes;
458       if (ReadTaskMemory(images.task_,
459                          info.load_address_,
460                          sizeof(mach_header_type),
461                          mach_header_bytes) != KERN_SUCCESS)
462         continue;  // bail on this dynamic image
463 
464       mach_header_type* header =
465           reinterpret_cast<mach_header_type*>(&mach_header_bytes[0]);
466 
467       // Now determine the total amount necessary to read the header
468       // plus all of the load commands.
469       size_t header_size =
470           sizeof(mach_header_type) + header->sizeofcmds;
471 
472       if (ReadTaskMemory(images.task_,
473                          info.load_address_,
474                          header_size,
475                          mach_header_bytes) != KERN_SUCCESS)
476         continue;
477 
478       // Read the file name from the task's memory space.
479       string file_path;
480       if (info.file_path_) {
481         // Although we're reading kMaxStringLength bytes, it's copied in the
482         // the DynamicImage constructor below with the correct string length,
483         // so it's not really wasting memory.
484         file_path = ReadTaskString(images.task_, info.file_path_);
485       }
486 
487       // Create an object representing this image and add it to our list.
488       DynamicImage* new_image;
489       new_image = new DynamicImage(&mach_header_bytes[0],
490                                    header_size,
491                                    info.load_address_,
492                                    file_path,
493                                    static_cast<uintptr_t>(info.file_mod_date_),
494                                    images.task_,
495                                    images.cpu_type_);
496 
497       if (new_image->IsValid()) {
498         images.image_list_.push_back(DynamicImageRef(new_image));
499       } else {
500         delete new_image;
501       }
502     }
503 
504     // sorts based on loading address
505     sort(images.image_list_.begin(), images.image_list_.end());
506     // remove duplicates - this happens in certain strange cases
507     // You can see it in DashboardClient when Google Gadgets plugin
508     // is installed.  Apple's crash reporter log and gdb "info shared"
509     // both show the same library multiple times at the same address
510 
511     vector<DynamicImageRef>::iterator it = unique(images.image_list_.begin(),
512                                                   images.image_list_.end());
513     images.image_list_.erase(it, images.image_list_.end());
514 }
515 
ReadImageInfoForTask()516 void DynamicImages::ReadImageInfoForTask() {
517   uint64_t imageList = GetDyldAllImageInfosPointer();
518 
519   if (imageList) {
520     if (Is64Bit())
521       ReadImageInfo<MachO64>(*this, imageList);
522     else
523       ReadImageInfo<MachO32>(*this, imageList);
524   }
525 }
526 
527 //==============================================================================
GetExecutableImage()528 DynamicImage* DynamicImages::GetExecutableImage() {
529   int executable_index = GetExecutableImageIndex();
530 
531   if (executable_index >= 0) {
532     return GetImage(executable_index);
533   }
534 
535   return NULL;
536 }
537 
538 //==============================================================================
539 // returns -1 if failure to find executable
GetExecutableImageIndex()540 int DynamicImages::GetExecutableImageIndex() {
541   int image_count = GetImageCount();
542 
543   for (int i = 0; i < image_count; ++i) {
544     DynamicImage* image = GetImage(i);
545     if (image->GetFileType() == MH_EXECUTE) {
546       return i;
547     }
548   }
549 
550   return -1;
551 }
552 
553 //==============================================================================
554 // static
DetermineTaskCPUType(task_t task)555 cpu_type_t DynamicImages::DetermineTaskCPUType(task_t task) {
556   if (task == mach_task_self())
557     return GetNativeCPUType();
558 
559   int mib[CTL_MAXNAME];
560   size_t mibLen = CTL_MAXNAME;
561   int err = sysctlnametomib("sysctl.proc_cputype", mib, &mibLen);
562   if (err == 0) {
563     assert(mibLen < CTL_MAXNAME);
564     pid_for_task(task, &mib[mibLen]);
565     mibLen += 1;
566 
567     cpu_type_t cpu_type;
568     size_t cpuTypeSize = sizeof(cpu_type);
569     sysctl(mib, static_cast<u_int>(mibLen), &cpu_type, &cpuTypeSize, 0, 0);
570     return cpu_type;
571   }
572 
573   return GetNativeCPUType();
574 }
575 
576 }  // namespace google_breakpad
577