1 // -*- mode: c++ -*- 2 3 // Copyright 2010 Google LLC 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google LLC nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // Original author: Jim Blandy <[email protected]> <[email protected]> 32 33 // module.h: Define google_breakpad::Module. A Module holds debugging 34 // information, and can write that information out as a Breakpad 35 // symbol file. 36 37 #ifndef COMMON_LINUX_MODULE_H__ 38 #define COMMON_LINUX_MODULE_H__ 39 40 #include <functional> 41 #include <iostream> 42 #include <limits> 43 #include <map> 44 #include <memory> 45 #include <set> 46 #include <string> 47 #include <vector> 48 49 #include "common/string_view.h" 50 #include "common/symbol_data.h" 51 #include "common/unordered.h" 52 #include "common/using_std_string.h" 53 #include "google_breakpad/common/breakpad_types.h" 54 55 namespace google_breakpad { 56 57 using std::set; 58 using std::vector; 59 using std::map; 60 61 // A Module represents the contents of a module, and supports methods 62 // for adding information produced by parsing STABS or DWARF data 63 // --- possibly both from the same file --- and then writing out the 64 // unified contents as a Breakpad-format symbol file. 65 class Module { 66 public: 67 // The type of addresses and sizes in a symbol table. 68 typedef uint64_t Address; 69 static constexpr uint64_t kMaxAddress = std::numeric_limits<Address>::max(); 70 struct File; 71 struct Function; 72 struct InlineOrigin; 73 struct Inline; 74 struct Line; 75 struct Extern; 76 77 // Addresses appearing in File, Function, and Line structures are 78 // absolute, not relative to the the module's load address. That 79 // is, if the module were loaded at its nominal load address, the 80 // addresses would be correct. 81 82 // A source file. 83 struct File { FileFile84 explicit File(const string& name_input) : name(name_input), source_id(0) {} 85 86 // The name of the source file. 87 const string name; 88 89 // The file's source id. The Write member function clears this 90 // field and assigns source ids a fresh, so any value placed here 91 // before calling Write will be lost. 92 int source_id; 93 }; 94 95 // An address range. 96 struct Range { RangeRange97 Range(const Address address_input, const Address size_input) : 98 address(address_input), size(size_input) { } 99 100 Address address; 101 Address size; 102 }; 103 104 // A function. 105 struct Function { FunctionFunction106 Function(StringView name_input, const Address& address_input) : 107 name(name_input), address(address_input), parameter_size(0) {} 108 109 // For sorting by address. (Not style-guide compliant, but it's 110 // stupid not to put this in the struct.) CompareByAddressFunction111 static bool CompareByAddress(const Function* x, const Function* y) { 112 return x->address < y->address; 113 } 114 115 // The function's name. 116 StringView name; 117 118 // The start address and the address ranges covered by the function. 119 const Address address; 120 vector<Range> ranges; 121 122 // The function's parameter size. 123 Address parameter_size; 124 125 // Source lines belonging to this function, sorted by increasing 126 // address. 127 vector<Line> lines; 128 129 // Inlined call sites belonging to this functions. 130 vector<std::unique_ptr<Inline>> inlines; 131 132 // If this symbol has been folded with other symbols in the linked binary. 133 bool is_multiple = false; 134 135 // If the function's name should be filled out from a matching Extern, 136 // should they not match. 137 bool prefer_extern_name = false; 138 }; 139 140 struct InlineOrigin { InlineOriginInlineOrigin141 explicit InlineOrigin(StringView name) : id(-1), name(name) {} 142 143 // A unique id for each InlineOrigin object. INLINE records use the id to 144 // refer to its INLINE_ORIGIN record. 145 int id; 146 147 // The inlined function's name. 148 StringView name; 149 }; 150 151 // A inlined call site. 152 struct Inline { InlineInline153 Inline(InlineOrigin* origin, 154 const vector<Range>& ranges, 155 int call_site_line, 156 int call_site_file_id, 157 int inline_nest_level, 158 vector<std::unique_ptr<Inline>> child_inlines) 159 : origin(origin), 160 ranges(ranges), 161 call_site_line(call_site_line), 162 call_site_file_id(call_site_file_id), 163 call_site_file(nullptr), 164 inline_nest_level(inline_nest_level), 165 child_inlines(std::move(child_inlines)) {} 166 167 InlineOrigin* origin; 168 169 // The list of addresses and sizes. 170 vector<Range> ranges; 171 172 int call_site_line; 173 174 // The id is only meanful inside a CU. It's only used for looking up real 175 // File* after scanning a CU. 176 int call_site_file_id; 177 178 File* call_site_file; 179 180 int inline_nest_level; 181 182 // A list of inlines which are children of this inline. 183 vector<std::unique_ptr<Inline>> child_inlines; 184 getCallSiteFileIDInline185 int getCallSiteFileID() const { 186 return call_site_file ? call_site_file->source_id : -1; 187 } 188 InlineDFSInline189 static void InlineDFS( 190 vector<std::unique_ptr<Module::Inline>>& inlines, 191 std::function<void(std::unique_ptr<Module::Inline>&)> const& forEach) { 192 for (std::unique_ptr<Module::Inline>& in : inlines) { 193 forEach(in); 194 InlineDFS(in->child_inlines, forEach); 195 } 196 } 197 }; 198 199 typedef map<uint64_t, InlineOrigin*> InlineOriginByOffset; 200 201 class InlineOriginMap { 202 public: 203 // Add INLINE ORIGIN to the module. Return a pointer to origin . 204 InlineOrigin* GetOrCreateInlineOrigin(uint64_t offset, StringView name); 205 206 // offset is the offset of a DW_TAG_subprogram. specification_offset is the 207 // value of its DW_AT_specification or equals to offset if 208 // DW_AT_specification doesn't exist in that DIE. 209 void SetReference(uint64_t offset, uint64_t specification_offset); 210 ~InlineOriginMap()211 ~InlineOriginMap() { 212 for (const auto& iter : inline_origins_) { 213 delete iter.second; 214 } 215 } 216 217 private: 218 // A map from a DW_TAG_subprogram's offset to the DW_TAG_subprogram. 219 InlineOriginByOffset inline_origins_; 220 221 // A map from a DW_TAG_subprogram's offset to the offset of its 222 // specification or abstract origin subprogram. The set of values in this 223 // map should always be the same set of keys in inline_origins_. 224 map<uint64_t, uint64_t> references_; 225 }; 226 227 map<std::string, InlineOriginMap> inline_origin_maps; 228 229 // A source line. 230 struct Line { 231 // For sorting by address. (Not style-guide compliant, but it's 232 // stupid not to put this in the struct.) CompareByAddressLine233 static bool CompareByAddress(const Module::Line& x, const Module::Line& y) { 234 return x.address < y.address; 235 } 236 237 Address address, size; // The address and size of the line's code. 238 File* file; // The source file. 239 int number; // The source line number. 240 }; 241 242 // An exported symbol. 243 struct Extern { ExternExtern244 explicit Extern(const Address& address_input) : address(address_input) {} 245 const Address address; 246 string name; 247 // If this symbol has been folded with other symbols in the linked binary. 248 bool is_multiple = false; 249 }; 250 251 // A map from register names to postfix expressions that recover 252 // their their values. This can represent a complete set of rules to 253 // follow at some address, or a set of changes to be applied to an 254 // extant set of rules. 255 typedef map<string, string> RuleMap; 256 257 // A map from addresses to RuleMaps, representing changes that take 258 // effect at given addresses. 259 typedef map<Address, RuleMap> RuleChangeMap; 260 261 // A range of 'STACK CFI' stack walking information. An instance of 262 // this structure corresponds to a 'STACK CFI INIT' record and the 263 // subsequent 'STACK CFI' records that fall within its range. 264 struct StackFrameEntry { 265 // The starting address and number of bytes of machine code this 266 // entry covers. 267 Address address, size; 268 269 // The initial register recovery rules, in force at the starting 270 // address. 271 RuleMap initial_rules; 272 273 // A map from addresses to rule changes. To find the rules in 274 // force at a given address, start with initial_rules, and then 275 // apply the changes given in this map for all addresses up to and 276 // including the address you're interested in. 277 RuleChangeMap rule_changes; 278 }; 279 280 struct FunctionCompare { operatorFunctionCompare281 bool operator() (const Function* lhs, const Function* rhs) const { 282 if (lhs->address == rhs->address) 283 return lhs->name < rhs->name; 284 return lhs->address < rhs->address; 285 } 286 }; 287 288 struct InlineOriginCompare { operatorInlineOriginCompare289 bool operator()(const InlineOrigin* lhs, const InlineOrigin* rhs) const { 290 return lhs->name < rhs->name; 291 } 292 }; 293 294 struct ExternCompare { 295 // Defining is_transparent allows 296 // std::set<std::unique_ptr<Extern>, ExternCompare>::find() to be called 297 // with an Extern* and have set use the overloads below. 298 using is_transparent = void; operatorExternCompare299 bool operator() (const std::unique_ptr<Extern>& lhs, 300 const std::unique_ptr<Extern>& rhs) const { 301 return lhs->address < rhs->address; 302 } operatorExternCompare303 bool operator() (const Extern* lhs, const std::unique_ptr<Extern>& rhs) const { 304 return lhs->address < rhs->address; 305 } operatorExternCompare306 bool operator() (const std::unique_ptr<Extern>& lhs, const Extern* rhs) const { 307 return lhs->address < rhs->address; 308 } 309 }; 310 311 // Create a new module with the given name, operating system, 312 // architecture, and ID string. 313 // NB: `enable_multiple_field` is temporary while transitioning to enabling 314 // writing the multiple field permanently. 315 Module(const string& name, 316 const string& os, 317 const string& architecture, 318 const string& id, 319 const string& code_id = "", 320 bool enable_multiple_field = false, 321 bool prefer_extern_name = false); 322 ~Module(); 323 324 // Set the module's load address to LOAD_ADDRESS; addresses given 325 // for functions and lines will be written to the Breakpad symbol 326 // file as offsets from this address. Construction initializes this 327 // module's load address to zero: addresses written to the symbol 328 // file will be the same as they appear in the Function, Line, and 329 // StackFrameEntry structures. 330 // 331 // Note that this member function has no effect on addresses stored 332 // in the data added to this module; the Write member function 333 // simply subtracts off the load address from addresses before it 334 // prints them. Only the last load address given before calling 335 // Write is used. 336 void SetLoadAddress(Address load_address); 337 338 // Sets address filtering on elements added to the module. This allows 339 // libraries with extraneous debug symbols to generate symbol files containing 340 // only relevant symbols. For example, an LLD-generated partition library may 341 // contain debug information pertaining to all partitions derived from a 342 // single "combined" library. Filtering applies only to elements added after 343 // this method is called. 344 void SetAddressRanges(const vector<Range>& ranges); 345 346 // Add FUNCTION to the module. FUNCTION's name must not be empty. 347 // This module owns all Function objects added with this function: 348 // destroying the module destroys them as well. 349 // Return false if the function is duplicate and needs to be freed. 350 bool AddFunction(Function* function); 351 352 // Add STACK_FRAME_ENTRY to the module. 353 // This module owns all StackFrameEntry objects added with this 354 // function: destroying the module destroys them as well. 355 void AddStackFrameEntry(std::unique_ptr<StackFrameEntry> stack_frame_entry); 356 357 // Add PUBLIC to the module. 358 // This module owns all Extern objects added with this function: 359 // destroying the module destroys them as well. 360 void AddExtern(std::unique_ptr<Extern> ext); 361 362 // If this module has a file named NAME, return a pointer to it. If 363 // it has none, then create one and return a pointer to the new 364 // file. This module owns all File objects created using these 365 // functions; destroying the module destroys them as well. 366 File* FindFile(const string& name); 367 File* FindFile(const char* name); 368 369 // If this module has a file named NAME, return a pointer to it. 370 // Otherwise, return NULL. 371 File* FindExistingFile(const string& name); 372 373 // Insert pointers to the functions added to this module at I in 374 // VEC. The pointed-to Functions are still owned by this module. 375 // (Since this is effectively a copy of the function list, this is 376 // mostly useful for testing; other uses should probably get a more 377 // appropriate interface.) 378 void GetFunctions(vector<Function*>* vec, vector<Function*>::iterator i); 379 380 // Insert pointers to the externs added to this module at I in 381 // VEC. The pointed-to Externs are still owned by this module. 382 // (Since this is effectively a copy of the extern list, this is 383 // mostly useful for testing; other uses should probably get a more 384 // appropriate interface.) 385 void GetExterns(vector<Extern*>* vec, vector<Extern*>::iterator i); 386 387 // Clear VEC and fill it with pointers to the Files added to this 388 // module, sorted by name. The pointed-to Files are still owned by 389 // this module. (Since this is effectively a copy of the file list, 390 // this is mostly useful for testing; other uses should probably get 391 // a more appropriate interface.) 392 void GetFiles(vector<File*>* vec); 393 394 // Clear VEC and fill it with pointers to the StackFrameEntry 395 // objects that have been added to this module. (Since this is 396 // effectively a copy of the stack frame entry list, this is mostly 397 // useful for testing; other uses should probably get 398 // a more appropriate interface.) 399 void GetStackFrameEntries(vector<StackFrameEntry*>* vec) const; 400 401 // Find those files in this module that are actually referred to by 402 // functions' line number data, and assign them source id numbers. 403 // Set the source id numbers for all other files --- unused by the 404 // source line data --- to -1. We do this before writing out the 405 // symbol file, at which point we omit any unused files. 406 void AssignSourceIds(); 407 408 // This function should be called before AssignSourceIds() to get the set of 409 // valid InlineOrigins*. 410 void CreateInlineOrigins( 411 set<InlineOrigin*, InlineOriginCompare>& inline_origins); 412 413 // Call AssignSourceIds, and write this module to STREAM in the 414 // breakpad symbol format. Return true if all goes well, or false if 415 // an error occurs. This method writes out: 416 // - a header based on the values given to the constructor, 417 // If symbol_data is not CFI then: 418 // - the source files added via FindFile, 419 // - the functions added via AddFunctions, each with its lines, 420 // - all public records, 421 // If symbol_data is CFI then: 422 // - all CFI records. 423 // Addresses in the output are all relative to the load address 424 // established by SetLoadAddress. 425 bool Write(std::ostream& stream, SymbolData symbol_data); 426 427 // Place the name in the global set of strings. Return a StringView points to 428 // a string inside the pool. AddStringToPool(const string & str)429 StringView AddStringToPool(const string& str) { 430 auto result = common_strings_.insert(str); 431 return *(result.first); 432 } 433 name()434 string name() const { return name_; } os()435 string os() const { return os_; } architecture()436 string architecture() const { return architecture_; } identifier()437 string identifier() const { return id_; } code_identifier()438 string code_identifier() const { return code_id_; } 439 440 private: 441 // Report an error that has occurred writing the symbol file, using 442 // errno to find the appropriate cause. Return false. 443 static bool ReportError(); 444 445 // Write RULE_MAP to STREAM, in the form appropriate for 'STACK CFI' 446 // records, without a final newline. Return true if all goes well; 447 // if an error occurs, return false, and leave errno set. 448 static bool WriteRuleMap(const RuleMap& rule_map, std::ostream& stream); 449 450 // Returns true of the specified address resides with an specified address 451 // range, or if no ranges have been specified. 452 bool AddressIsInModule(Address address) const; 453 454 // Module header entries. 455 string name_, os_, architecture_, id_, code_id_; 456 457 // The module's nominal load address. Addresses for functions and 458 // lines are absolute, assuming the module is loaded at this 459 // address. 460 Address load_address_; 461 462 // The set of valid address ranges of the module. If specified, attempts to 463 // add elements residing outside these ranges will be silently filtered. 464 vector<Range> address_ranges_; 465 466 // Relation for maps whose keys are strings shared with some other 467 // structure. 468 struct CompareStringPtrs { operatorCompareStringPtrs469 bool operator()(const string* x, const string* y) const { return *x < *y; } 470 }; 471 472 // A map from filenames to File structures. The map's keys are 473 // pointers to the Files' names. 474 typedef map<const string*, File*, CompareStringPtrs> FileByNameMap; 475 476 // A set containing Function structures, sorted by address. 477 typedef set<Function*, FunctionCompare> FunctionSet; 478 479 // A set containing Extern structures, sorted by address. 480 typedef set<std::unique_ptr<Extern>, ExternCompare> ExternSet; 481 482 // The module owns all the files and functions that have been added 483 // to it; destroying the module frees the Files and Functions these 484 // point to. 485 FileByNameMap files_; // This module's source files. 486 FunctionSet functions_; // This module's functions. 487 // Used to quickly look up whether a function exists at a particular address. 488 unordered_set<Address> function_addresses_; 489 490 // The module owns all the call frame info entries that have been 491 // added to it. 492 vector<std::unique_ptr<StackFrameEntry>> stack_frame_entries_; 493 494 // The module owns all the externs that have been added to it; 495 // destroying the module frees the Externs these point to. 496 ExternSet externs_; 497 498 unordered_set<string> common_strings_; 499 500 // Whether symbols sharing an address should be collapsed into a single entry 501 // and marked with an `m` in the output. See 502 // https://bugs.chromium.org/p/google-breakpad/issues/detail?id=751 and docs 503 // at 504 // https://chromium.googlesource.com/breakpad/breakpad/+/master/docs/symbol_files.md#records-3 505 bool enable_multiple_field_; 506 507 // If a Function and an Extern share the same address but have a different 508 // name, prefer the name of the Extern. 509 // 510 // Use this when dumping Mach-O .dSYMs built with -gmlt (Minimum Line Tables), 511 // as the Function's fully-qualified name will only be present in the STABS 512 // (which are placed in the Extern), not in the DWARF symbols (which are 513 // placed in the Function). 514 bool prefer_extern_name_; 515 }; 516 517 } // namespace google_breakpad 518 519 #endif // COMMON_LINUX_MODULE_H__ 520