1 // -*- mode: c++ -*- 2 3 // Copyright 2011 Google LLC 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google LLC nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // Author: Jim Blandy <[email protected]> <[email protected]> 32 33 // dump_syms.h: Declaration of google_breakpad::DumpSymbols, a class for 34 // reading debugging information from Mach-O files and writing it out as a 35 // Breakpad symbol file. 36 37 #include <mach-o/loader.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 41 #include <ostream> 42 #include <string> 43 #include <vector> 44 45 #include "common/byte_cursor.h" 46 #include "common/dwarf/dwarf2reader.h" 47 #include "common/mac/arch_utilities.h" 48 #include "common/mac/macho_reader.h" 49 #include "common/mac/super_fat_arch.h" 50 #include "common/module.h" 51 #include "common/scoped_ptr.h" 52 #include "common/symbol_data.h" 53 54 namespace google_breakpad { 55 56 class DumpSymbols { 57 public: 58 DumpSymbols(SymbolData symbol_data, 59 bool handle_inter_cu_refs, 60 bool enable_multiple = false, 61 const std::string& module_name = "", 62 bool prefer_extern_name = false) symbol_data_(symbol_data)63 : symbol_data_(symbol_data), 64 handle_inter_cu_refs_(handle_inter_cu_refs), 65 object_filename_(), 66 contents_(), 67 size_(0), 68 from_disk_(false), 69 object_files_(), 70 selected_object_file_(), 71 selected_object_name_(), 72 enable_multiple_(enable_multiple), 73 module_name_(module_name), 74 prefer_extern_name_(prefer_extern_name) {} 75 ~DumpSymbols() = default; 76 77 // Prepare to read debugging information from |filename|. |filename| may be 78 // the name of a fat file, a Mach-O file, or a dSYM bundle containing either 79 // of the above. 80 // 81 // If |module_name_| is empty, uses the basename of |filename| as the module 82 // name. Otherwise, uses |module_name_| as the module name. 83 // 84 // On success, return true; if there is a problem reading 85 // |filename|, report it and return false. 86 bool Read(const std::string& filename); 87 88 // Prepare to read debugging information from |contents|. |contents| is 89 // expected to be the data obtained from reading a fat file, or a Mach-O file. 90 // |filename| is used to determine the object filename in the generated 91 // output; there will not be an attempt to open this file as the data 92 // is already expected to be in memory. On success, return true; if there is a 93 // problem reading |contents|, report it and return false. 94 bool ReadData(uint8_t* contents, size_t size, const std::string& filename); 95 96 // If this dumper's file includes an object file for `info`, then select that 97 // object file for dumping, and return true. Otherwise, return false, and 98 // leave this dumper's selected architecture unchanged. 99 // 100 // By default, if this dumper's file contains only one object file, then 101 // the dumper will dump those symbols; and if it contains more than one 102 // object file, then the dumper will dump the object file whose 103 // architecture matches that of this dumper program. 104 bool SetArchitecture(const ArchInfo& info); 105 106 // Return a pointer to an array of SuperFatArch structures describing the 107 // object files contained in this dumper's file. Set *|count| to the number 108 // of elements in the array. The returned array is owned by this DumpSymbols 109 // instance. 110 // 111 // If there are no available architectures, this function 112 // may return NULL. AvailableArchitectures(size_t * count)113 const SuperFatArch* AvailableArchitectures(size_t* count) { 114 *count = object_files_.size(); 115 if (object_files_.size() > 0) 116 return &object_files_[0]; 117 return NULL; 118 } 119 120 // Read the selected object file's debugging information, and write out the 121 // header only to |stream|. Return true on success; if an error occurs, report 122 // it and return false. 123 bool WriteSymbolFileHeader(std::ostream& stream); 124 125 // Read the selected object file's debugging information and store it in 126 // `module`. The caller owns the resulting module object and must delete 127 // it when finished. 128 bool ReadSymbolData(Module** module); 129 130 // Return an identifier string for the file this DumpSymbols is dumping. 131 std::string Identifier(); 132 133 private: 134 // Used internally. 135 class DumperLineToModule; 136 class DumperRangesHandler; 137 class LoadCommandDumper; 138 139 // This method behaves similarly to NXFindBestFatArch, but it supports 140 // SuperFatArch. 141 SuperFatArch* FindBestMatchForArchitecture( 142 cpu_type_t cpu_type, cpu_subtype_t cpu_subtype); 143 144 // Creates an empty module object. 145 bool CreateEmptyModule(scoped_ptr<Module>& module); 146 147 // Process the split dwarf file referenced by reader. 148 void StartProcessSplitDwarf(google_breakpad::CompilationUnit* reader, 149 Module* module, 150 google_breakpad::Endianness endianness, 151 bool handle_inter_cu_refs, 152 bool handle_inline) const; 153 154 // Read debugging information from |dwarf_sections|, which was taken from 155 // |macho_reader|, and add it to |module|. 156 void ReadDwarf(google_breakpad::Module* module, 157 const mach_o::Reader& macho_reader, 158 const mach_o::SectionMap& dwarf_sections, 159 bool handle_inter_cu_refs) const; 160 161 // Read DWARF CFI or .eh_frame data from |section|, belonging to 162 // |macho_reader|, and record it in |module|. If |eh_frame| is true, 163 // then the data is .eh_frame-format data; otherwise, it is standard DWARF 164 // .debug_frame data. On success, return true; on failure, report 165 // the problem and return false. 166 bool ReadCFI(google_breakpad::Module* module, 167 const mach_o::Reader& macho_reader, 168 const mach_o::Section& section, 169 bool eh_frame) const; 170 171 // The selection of what type of symbol data to read/write. 172 const SymbolData symbol_data_; 173 174 // Whether to handle references between compilation units. 175 const bool handle_inter_cu_refs_; 176 177 // The name of the file this DumpSymbols will actually read debugging 178 // information from. If the filename passed to Read refers to a dSYM bundle, 179 // then this is the resource file within that bundle. 180 std::string object_filename_; 181 182 // The complete contents of object_filename_, mapped into memory. 183 scoped_array<uint8_t> contents_; 184 185 // The size of contents_. 186 size_t size_; 187 188 // Indicates which entry point to DumpSymbols was used, i.e. Read vs ReadData. 189 // This is used to indicate that downstream code paths can/should also read 190 // from disk or not. 191 bool from_disk_; 192 193 // A vector of SuperFatArch structures describing the object files 194 // object_filename_ contains. If object_filename_ refers to a fat binary, 195 // this may have more than one element; if it refers to a Mach-O file, this 196 // has exactly one element. 197 vector<SuperFatArch> object_files_; 198 199 // The object file in object_files_ selected to dump, or NULL if 200 // SetArchitecture hasn't been called yet. 201 const SuperFatArch* selected_object_file_; 202 203 // A string that identifies the selected object file, for use in error 204 // messages. This is usually object_filename_, but if that refers to a 205 // fat binary, it includes an indication of the particular architecture 206 // within that binary. 207 string selected_object_name_; 208 209 // Whether symbols sharing an address should be collapsed into a single entry 210 // and marked with an `m` in the output. 211 // See: https://crbug.com/google-breakpad/751 and docs at 212 // docs/symbol_files.md#records-3 213 bool enable_multiple_; 214 215 // If non-empty, used as the module name. Otherwise, the basename of 216 // |object_filename_| is used as the module name. 217 const std::string module_name_; 218 219 // If a Function and an Extern share the same address but have a different 220 // name, prefer the name of the Extern. 221 // 222 // Use this when dumping Mach-O .dSYMs built with -gmlt (Minimum Line Tables), 223 // as the Function's fully-qualified name will only be present in the STABS 224 // (which are placed in the Extern), not in the DWARF symbols (which are 225 // placed in the Function). 226 bool prefer_extern_name_; 227 }; 228 229 } // namespace google_breakpad 230