xref: /aosp_15_r20/external/google-breakpad/src/common/dwarf_cu_to_module.cc (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1 // Copyright 2010 Google LLC
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 //     * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 //     * Neither the name of Google LLC nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 // Original author: Jim Blandy <[email protected]> <[email protected]>
30 
31 // Implement the DwarfCUToModule class; see dwarf_cu_to_module.h.
32 
33 // For <inttypes.h> PRI* macros, before anything else might #include it.
34 #ifndef __STDC_FORMAT_MACROS
35 #define __STDC_FORMAT_MACROS
36 #endif  /* __STDC_FORMAT_MACROS */
37 
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>  // Must come first
40 #endif
41 
42 #include "common/dwarf_cu_to_module.h"
43 
44 #include <assert.h>
45 #include <inttypes.h>
46 #include <stdint.h>
47 #include <stdio.h>
48 
49 #include <algorithm>
50 #include <memory>
51 #include <numeric>
52 #include <utility>
53 
54 #include "common/string_view.h"
55 #include "common/dwarf_line_to_module.h"
56 #include "google_breakpad/common/breakpad_types.h"
57 
58 namespace google_breakpad {
59 
60 using std::accumulate;
61 using std::map;
62 using std::pair;
63 using std::sort;
64 using std::vector;
65 using std::unique_ptr;
66 
67 // Data provided by a DWARF specification DIE.
68 //
69 // In DWARF, the DIE for a definition may contain a DW_AT_specification
70 // attribute giving the offset of the corresponding declaration DIE, and
71 // the definition DIE may omit information given in the declaration. For
72 // example, it's common for a function's address range to appear only in
73 // its definition DIE, but its name to appear only in its declaration
74 // DIE.
75 //
76 // The dumper needs to be able to follow DW_AT_specification links to
77 // bring all this information together in a FUNC record. Conveniently,
78 // DIEs that are the target of such links have a DW_AT_declaration flag
79 // set, so we can identify them when we first see them, and record their
80 // contents for later reference.
81 //
82 // A Specification holds information gathered from a declaration DIE that
83 // we may need if we find a DW_AT_specification link pointing to it.
84 struct DwarfCUToModule::Specification {
85   // The qualified name that can be found by demangling DW_AT_MIPS_linkage_name.
86   StringView qualified_name;
87 
88   // The name of the enclosing scope, or the empty string if there is none.
89   StringView enclosing_name;
90 
91   // The name for the specification DIE itself, without any enclosing
92   // name components.
93   StringView unqualified_name;
94 };
95 
96 // An abstract origin -- base definition of an inline function.
97 struct AbstractOrigin {
AbstractOrigingoogle_breakpad::AbstractOrigin98   explicit AbstractOrigin(StringView name) : name(name) {}
99 
100   StringView name;
101 };
102 
103 typedef map<uint64_t, AbstractOrigin> AbstractOriginByOffset;
104 
105 // Data global to the DWARF-bearing file that is private to the
106 // DWARF-to-Module process.
107 struct DwarfCUToModule::FilePrivate {
108   // A map from offsets of DIEs within the .debug_info section to
109   // Specifications describing those DIEs. Specification references can
110   // cross compilation unit boundaries.
111   SpecificationByOffset specifications;
112 
113   AbstractOriginByOffset origins;
114 
115   // Keep a list of forward references from DW_AT_abstract_origin and
116   // DW_AT_specification attributes so names can be fixed up.
117   std::map<uint64_t, Module::Function*> forward_ref_die_to_func;
118 };
119 
FileContext(const string & filename,Module * module,bool handle_inter_cu_refs)120 DwarfCUToModule::FileContext::FileContext(const string& filename,
121                                           Module* module,
122                                           bool handle_inter_cu_refs)
123     : filename_(filename),
124       module_(module),
125       handle_inter_cu_refs_(handle_inter_cu_refs),
126       file_private_(new FilePrivate()) {
127 }
128 
~FileContext()129 DwarfCUToModule::FileContext::~FileContext() {
130   for (std::vector<uint8_t *>::iterator i = uncompressed_sections_.begin();
131         i != uncompressed_sections_.end(); ++i) {
132     delete[] *i;
133   }
134 }
135 
AddSectionToSectionMap(const string & name,const uint8_t * contents,uint64_t length)136 void DwarfCUToModule::FileContext::AddSectionToSectionMap(
137     const string& name, const uint8_t* contents, uint64_t length) {
138   section_map_[name] = std::make_pair(contents, length);
139 }
140 
AddManagedSectionToSectionMap(const string & name,uint8_t * contents,uint64_t length)141 void DwarfCUToModule::FileContext::AddManagedSectionToSectionMap(
142     const string& name, uint8_t* contents, uint64_t length) {
143   section_map_[name] = std::make_pair(contents, length);
144   uncompressed_sections_.push_back(contents);
145 }
146 
ClearSectionMapForTest()147 void DwarfCUToModule::FileContext::ClearSectionMapForTest() {
148   section_map_.clear();
149 }
150 
151 const SectionMap&
section_map() const152 DwarfCUToModule::FileContext::section_map() const {
153   return section_map_;
154 }
155 
ClearSpecifications()156 void DwarfCUToModule::FileContext::ClearSpecifications() {
157   if (!handle_inter_cu_refs_)
158     file_private_->specifications.clear();
159 }
160 
IsUnhandledInterCUReference(uint64_t offset,uint64_t compilation_unit_start) const161 bool DwarfCUToModule::FileContext::IsUnhandledInterCUReference(
162     uint64_t offset, uint64_t compilation_unit_start) const {
163   if (handle_inter_cu_refs_)
164     return false;
165   return offset < compilation_unit_start;
166 }
167 
168 // Information global to the particular compilation unit we're
169 // parsing. This is for data shared across the CU's entire DIE tree,
170 // and parameters from the code invoking the CU parser.
171 struct DwarfCUToModule::CUContext {
CUContextgoogle_breakpad::DwarfCUToModule::CUContext172   CUContext(FileContext* file_context_arg,
173             WarningReporter* reporter_arg,
174             RangesHandler* ranges_handler_arg,
175             uint64_t low_pc,
176             uint64_t addr_base)
177       : version(0),
178         file_context(file_context_arg),
179         reporter(reporter_arg),
180         ranges_handler(ranges_handler_arg),
181         language(Language::CPlusPlus),
182         low_pc(low_pc),
183         high_pc(0),
184         ranges_form(DW_FORM_sec_offset),
185         ranges_data(0),
186         ranges_base(0),
187         addr_base(addr_base),
188         str_offsets_base(0) {}
189 
~CUContextgoogle_breakpad::DwarfCUToModule::CUContext190   ~CUContext() {
191     for (vector<Module::Function*>::iterator it = functions.begin();
192          it != functions.end(); ++it) {
193       delete *it;
194     }
195   };
196 
197   // Dwarf version of the source CU.
198   uint8_t version;
199 
200   // The DWARF-bearing file into which this CU was incorporated.
201   FileContext* file_context;
202 
203   // For printing error messages.
204   WarningReporter* reporter;
205 
206   // For reading ranges from the .debug_ranges section
207   RangesHandler* ranges_handler;
208 
209   // The source language of this compilation unit.
210   const Language* language;
211 
212   // Addresses covered by this CU. If high_pc_ is non-zero then the CU covers
213   // low_pc to high_pc, otherwise ranges_data is non-zero and low_pc represents
214   // the base address of the ranges covered by the CU. ranges_data will define
215   // the CU's actual ranges.
216   uint64_t low_pc;
217   uint64_t high_pc;
218 
219   // Ranges for this CU are read according to this form.
220   enum DwarfForm ranges_form;
221   uint64_t ranges_data;
222 
223   // Offset into .debug_rngslists where this CU's ranges are stored.
224   // Data in DW_FORM_rnglistx is relative to this offset.
225   uint64_t ranges_base;
226 
227   // Offset into .debug_addr where this CU's addresses are stored. Data in
228   // form DW_FORM_addrxX is relative to this offset.
229   uint64_t addr_base;
230 
231   // Offset into this CU's contribution to .debug_str_offsets.
232   uint64_t str_offsets_base;
233 
234   // Collect all the data from the CU that a RangeListReader needs to read a
235   // range.
AssembleRangeListInfogoogle_breakpad::DwarfCUToModule::CUContext236   bool AssembleRangeListInfo(
237       RangeListReader::CURangesInfo* info) {
238     const SectionMap& section_map
239         = file_context->section_map();
240     info->version_ = version;
241     info->base_address_ = low_pc;
242     info->ranges_base_ = ranges_base;
243     const char* section_name = (version <= 4 ?
244                                 ".debug_ranges" : ".debug_rnglists");
245     SectionMap::const_iterator map_entry
246         = GetSectionByName(section_map, section_name);
247     if (map_entry == section_map.end()) {
248       return false;
249     }
250     info->buffer_ = map_entry->second.first;
251     info->size_ = map_entry->second.second;
252     if (version > 4) {
253       SectionMap::const_iterator map_entry
254           = GetSectionByName(section_map, ".debug_addr");
255       if (map_entry == section_map.end()) {
256         return false;
257       }
258       info->addr_buffer_ = map_entry->second.first;
259       info->addr_buffer_size_ = map_entry->second.second;
260       info->addr_base_ = addr_base;
261     }
262     return true;
263   }
264 
265   // The functions defined in this compilation unit. We accumulate
266   // them here during parsing. Then, in DwarfCUToModule::Finish, we
267   // assign them lines and add them to file_context->module.
268   //
269   // Destroying this destroys all the functions this vector points to.
270   vector<Module::Function*> functions;
271 
272   // A map of function pointers to the its forward specification DIE's offset.
273   map<Module::Function*, uint64_t> spec_function_offsets;
274 };
275 
276 // Information about the context of a particular DIE. This is for
277 // information that changes as we descend the tree towards the leaves:
278 // the containing classes/namespaces, etc.
279 struct DwarfCUToModule::DIEContext {
280   // The fully-qualified name of the context. For example, for a
281   // tree like:
282   //
283   // DW_TAG_namespace Foo
284   //   DW_TAG_class Bar
285   //     DW_TAG_subprogram Baz
286   //
287   // in a C++ compilation unit, the DIEContext's name for the
288   // DW_TAG_subprogram DIE would be "Foo::Bar". The DIEContext's
289   // name for the DW_TAG_namespace DIE would be "".
290   StringView name;
291 };
292 
293 // An abstract base class for all the dumper's DIE handlers.
294 class DwarfCUToModule::GenericDIEHandler: public DIEHandler {
295  public:
296   // Create a handler for the DIE at OFFSET whose compilation unit is
297   // described by CU_CONTEXT, and whose immediate context is described
298   // by PARENT_CONTEXT.
GenericDIEHandler(CUContext * cu_context,DIEContext * parent_context,uint64_t offset)299   GenericDIEHandler(CUContext* cu_context, DIEContext* parent_context,
300                     uint64_t offset)
301       : cu_context_(cu_context),
302         parent_context_(parent_context),
303         offset_(offset),
304         declaration_(false),
305         specification_(NULL),
306         no_specification(false),
307         abstract_origin_(NULL),
308         forward_ref_die_offset_(0), specification_offset_(0) { }
309 
310   // Derived classes' ProcessAttributeUnsigned can defer to this to
311   // handle DW_AT_declaration, or simply not override it.
312   void ProcessAttributeUnsigned(enum DwarfAttribute attr,
313                                 enum DwarfForm form,
314                                 uint64_t data);
315 
316   // Derived classes' ProcessAttributeReference can defer to this to
317   // handle DW_AT_specification, or simply not override it.
318   void ProcessAttributeReference(enum DwarfAttribute attr,
319                                  enum DwarfForm form,
320                                  uint64_t data);
321 
322   // Derived classes' ProcessAttributeReference can defer to this to
323   // handle DW_AT_specification, or simply not override it.
324   void ProcessAttributeString(enum DwarfAttribute attr,
325                               enum DwarfForm form,
326                               const string& data);
327 
328  protected:
329   // Compute and return the fully-qualified name of the DIE. If this
330   // DIE is a declaration DIE, to be cited by other DIEs'
331   // DW_AT_specification attributes, record its enclosing name and
332   // unqualified name in the specification table.
333   //
334   // Use this from EndAttributes member functions, not ProcessAttribute*
335   // functions; only the former can be sure that all the DIE's attributes
336   // have been seen.
337   //
338   // On return, if has_qualified_name is non-NULL, *has_qualified_name is set to
339   // true if the DIE includes a fully-qualified name, false otherwise.
340   StringView ComputeQualifiedName(bool* has_qualified_name);
341 
342   CUContext* cu_context_;
343   DIEContext* parent_context_;
344   uint64_t offset_;
345 
346   // If this DIE has a DW_AT_declaration attribute, this is its value.
347   // It is false on DIEs with no DW_AT_declaration attribute.
348   bool declaration_;
349 
350   // If this DIE has a DW_AT_specification attribute, this is the
351   // Specification structure for the DIE the attribute refers to.
352   // Otherwise, this is NULL.
353   Specification* specification_;
354 
355   // If this DIE has DW_AT_specification with offset smaller than this DIE and
356   // we can't find that in the specification map.
357   bool no_specification;
358 
359   // If this DIE has a DW_AT_abstract_origin attribute, this is the
360   // AbstractOrigin structure for the DIE the attribute refers to.
361   // Otherwise, this is NULL.
362   const AbstractOrigin* abstract_origin_;
363 
364   // If this DIE has a DW_AT_specification or DW_AT_abstract_origin and it is a
365   // forward reference, no Specification will be available. Track the reference
366   // to be fixed up when the DIE is parsed.
367   uint64_t forward_ref_die_offset_;
368 
369   // The root offset of Specification or abstract origin.
370   uint64_t specification_offset_;
371 
372   // The value of the DW_AT_name attribute, or the empty string if the
373   // DIE has no such attribute.
374   StringView name_attribute_;
375 
376   // The demangled value of the DW_AT_MIPS_linkage_name attribute, or the empty
377   // string if the DIE has no such attribute or its content could not be
378   // demangled.
379   StringView demangled_name_;
380 
381   // The non-demangled value of the DW_AT_MIPS_linkage_name attribute,
382   // it its content count not be demangled.
383   StringView raw_name_;
384 };
385 
ProcessAttributeUnsigned(enum DwarfAttribute attr,enum DwarfForm form,uint64_t data)386 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeUnsigned(
387     enum DwarfAttribute attr,
388     enum DwarfForm form,
389     uint64_t data) {
390   switch (attr) {
391     case DW_AT_declaration: declaration_ = (data != 0); break;
392     default: break;
393   }
394 }
395 
ProcessAttributeReference(enum DwarfAttribute attr,enum DwarfForm form,uint64_t data)396 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeReference(
397     enum DwarfAttribute attr,
398     enum DwarfForm form,
399     uint64_t data) {
400   switch (attr) {
401     case DW_AT_specification: {
402       FileContext* file_context = cu_context_->file_context;
403       if (file_context->IsUnhandledInterCUReference(
404               data, cu_context_->reporter->cu_offset())) {
405         cu_context_->reporter->UnhandledInterCUReference(offset_, data);
406         break;
407       }
408       // Find the Specification to which this attribute refers, and
409       // set specification_ appropriately. We could do more processing
410       // here, but it's better to leave the real work to our
411       // EndAttribute member function, at which point we know we have
412       // seen all the DIE's attributes.
413       SpecificationByOffset* specifications =
414           &file_context->file_private_->specifications;
415       SpecificationByOffset::iterator spec = specifications->find(data);
416       if (spec != specifications->end()) {
417         specification_ = &spec->second;
418       } else if (data > offset_) {
419         forward_ref_die_offset_ = data;
420       } else {
421         no_specification = true;
422       }
423       specification_offset_ = data;
424       break;
425     }
426     case DW_AT_abstract_origin: {
427       const AbstractOriginByOffset& origins =
428           cu_context_->file_context->file_private_->origins;
429       AbstractOriginByOffset::const_iterator origin = origins.find(data);
430       if (origin != origins.end()) {
431         abstract_origin_ = &(origin->second);
432       } else if (data > offset_) {
433         forward_ref_die_offset_ = data;
434       }
435       specification_offset_ = data;
436       break;
437     }
438     default: break;
439   }
440 }
441 
ProcessAttributeString(enum DwarfAttribute attr,enum DwarfForm form,const string & data)442 void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString(
443     enum DwarfAttribute attr,
444     enum DwarfForm form,
445     const string& data) {
446   switch (attr) {
447     case DW_AT_name:
448       name_attribute_ =
449           cu_context_->file_context->module_->AddStringToPool(data);
450       break;
451     case DW_AT_MIPS_linkage_name:
452     case DW_AT_linkage_name: {
453       string demangled;
454       Language::DemangleResult result =
455           cu_context_->language->DemangleName(data, &demangled);
456       switch (result) {
457         case Language::kDemangleSuccess:
458           demangled_name_ =
459               cu_context_->file_context->module_->AddStringToPool(demangled);
460           break;
461 
462         case Language::kDemangleFailure:
463           cu_context_->reporter->DemangleError(data);
464           // fallthrough
465         case Language::kDontDemangle:
466           demangled_name_ = StringView();
467           raw_name_ = cu_context_->file_context->module_->AddStringToPool(data);
468           break;
469       }
470       break;
471     }
472     default: break;
473   }
474 }
475 
ComputeQualifiedName(bool * has_qualified_name)476 StringView DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName(
477     bool* has_qualified_name) {
478   // Use the demangled name, if one is available. Demangled names are
479   // preferable to those inferred from the DWARF structure because they
480   // include argument types.
481   StringView* qualified_name = nullptr;
482   if (!demangled_name_.empty()) {
483     // Found it is this DIE.
484     qualified_name = &demangled_name_;
485   } else if (specification_ && !specification_->qualified_name.empty()) {
486     // Found it on the specification.
487     qualified_name = &specification_->qualified_name;
488   }
489 
490   StringView* unqualified_name = nullptr;
491   StringView* enclosing_name = nullptr;
492   if (!qualified_name) {
493     if (has_qualified_name) {
494       // dSYMs built with -gmlt do not include the DW_AT_linkage_name
495       // with the unmangled symbol, but rather include it in the
496       // LC_SYMTAB STABS, which end up in the externs of the module.
497       //
498       // Remember this so the Module can copy over the extern name later.
499       *has_qualified_name = false;
500     }
501 
502     // Find the unqualified name. If the DIE has its own DW_AT_name
503     // attribute, then use that; otherwise, check the specification.
504     if (!name_attribute_.empty()) {
505       unqualified_name = &name_attribute_;
506     } else if (specification_) {
507       unqualified_name = &specification_->unqualified_name;
508     } else if (!raw_name_.empty()) {
509       unqualified_name = &raw_name_;
510     }
511 
512     // Find the name of the enclosing context. If this DIE has a
513     // specification, it's the specification's enclosing context that
514     // counts; otherwise, use this DIE's context.
515     if (specification_) {
516       enclosing_name = &specification_->enclosing_name;
517     } else if (parent_context_) {
518       enclosing_name = &parent_context_->name;
519     }
520   } else {
521     if (has_qualified_name) {
522       *has_qualified_name = true;
523     }
524   }
525 
526   // Prepare the return value before upcoming mutations possibly invalidate the
527   // existing pointers.
528   string return_value;
529   if (qualified_name) {
530     return_value = qualified_name->str();
531   } else if (unqualified_name && enclosing_name) {
532     // Combine the enclosing name and unqualified name to produce our
533     // own fully-qualified name.
534     return_value = cu_context_->language->MakeQualifiedName(
535         enclosing_name->str(), unqualified_name->str());
536   }
537 
538   // If this DIE was marked as a declaration, record its names in the
539   // specification table.
540   if ((declaration_ && qualified_name) ||
541       (unqualified_name && enclosing_name)) {
542     Specification spec;
543     if (qualified_name) {
544       spec.qualified_name = *qualified_name;
545     } else {
546       spec.enclosing_name = *enclosing_name;
547       spec.unqualified_name = *unqualified_name;
548     }
549     cu_context_->file_context->file_private_->specifications[offset_] = spec;
550   }
551 
552   return cu_context_->file_context->module_->AddStringToPool(return_value);
553 }
554 
IsEmptyRange(const vector<Module::Range> & ranges)555 static bool IsEmptyRange(const vector<Module::Range>& ranges) {
556   uint64_t size = accumulate(ranges.cbegin(), ranges.cend(), 0,
557     [](uint64_t total, Module::Range entry) {
558       return total + entry.size;
559     }
560   );
561 
562   return size == 0;
563 }
564 
565 
566 // A handler for DW_TAG_inlined_subroutine DIEs.
567 class DwarfCUToModule::InlineHandler : public GenericDIEHandler {
568  public:
InlineHandler(CUContext * cu_context,DIEContext * parent_context,uint64_t offset,int inline_nest_level,vector<unique_ptr<Module::Inline>> & inlines)569   InlineHandler(CUContext* cu_context,
570                 DIEContext* parent_context,
571                 uint64_t offset,
572                 int inline_nest_level,
573                 vector<unique_ptr<Module::Inline>>& inlines)
574       : GenericDIEHandler(cu_context, parent_context, offset),
575         low_pc_(0),
576         high_pc_(0),
577         high_pc_form_(DW_FORM_addr),
578         ranges_form_(DW_FORM_sec_offset),
579         ranges_data_(0),
580         call_site_line_(0),
581         inline_nest_level_(inline_nest_level),
582         has_range_data_(false),
583         inlines_(inlines) {}
584 
585   void ProcessAttributeUnsigned(enum DwarfAttribute attr,
586                                 enum DwarfForm form,
587                                 uint64_t data);
588   DIEHandler* FindChildHandler(uint64_t offset, enum DwarfTag tag);
589   bool EndAttributes();
590   void Finish();
591 
592  private:
593   // The fully-qualified name, as derived from name_attribute_,
594   // specification_, parent_context_. Computed in EndAttributes.
595   StringView name_;
596   uint64_t low_pc_;            // DW_AT_low_pc
597   uint64_t high_pc_;           // DW_AT_high_pc
598   DwarfForm high_pc_form_;     // DW_AT_high_pc can be length or address.
599   DwarfForm ranges_form_;      // DW_FORM_sec_offset or DW_FORM_rnglistx
600   uint64_t ranges_data_;       // DW_AT_ranges
601   int call_site_line_;         // DW_AT_call_line
602   int call_site_file_id_;      // DW_AT_call_file
603   int inline_nest_level_;
604   bool has_range_data_;
605   // A vector of inlines in the same nest level. It's owned by its parent
606   // function/inline. At Finish(), add this inline into the vector.
607   vector<unique_ptr<Module::Inline>>& inlines_;
608   // A vector of child inlines.
609   vector<unique_ptr<Module::Inline>> child_inlines_;
610 };
611 
ProcessAttributeUnsigned(enum DwarfAttribute attr,enum DwarfForm form,uint64_t data)612 void DwarfCUToModule::InlineHandler::ProcessAttributeUnsigned(
613     enum DwarfAttribute attr,
614     enum DwarfForm form,
615     uint64_t data) {
616   switch (attr) {
617     case DW_AT_low_pc:
618       low_pc_ = data;
619       break;
620     case DW_AT_high_pc:
621       high_pc_form_ = form;
622       high_pc_ = data;
623       break;
624     case DW_AT_ranges:
625       has_range_data_ = true;
626       ranges_data_ = data;
627       ranges_form_ = form;
628       break;
629     case DW_AT_call_line:
630       call_site_line_ = data;
631       break;
632     case DW_AT_call_file:
633       call_site_file_id_ = data;
634       break;
635     default:
636       GenericDIEHandler::ProcessAttributeUnsigned(attr, form, data);
637       break;
638   }
639 }
640 
FindChildHandler(uint64_t offset,enum DwarfTag tag)641 DIEHandler* DwarfCUToModule::InlineHandler::FindChildHandler(
642     uint64_t offset,
643     enum DwarfTag tag) {
644   switch (tag) {
645     case DW_TAG_inlined_subroutine:
646       return new InlineHandler(cu_context_, nullptr, offset,
647                                inline_nest_level_ + 1, child_inlines_);
648     default:
649       return NULL;
650   }
651 }
652 
EndAttributes()653 bool DwarfCUToModule::InlineHandler::EndAttributes() {
654   if (abstract_origin_)
655     name_ = abstract_origin_->name;
656   if (name_.empty()) {
657     // We haven't seen the abstract origin yet, which might appears later and we
658     // will fix the name after calling
659     // InlineOriginMap::GetOrCreateInlineOrigin with right name.
660     name_ =
661         cu_context_->file_context->module_->AddStringToPool("<name omitted>");
662   }
663   return true;
664 }
665 
Finish()666 void DwarfCUToModule::InlineHandler::Finish() {
667   vector<Module::Range> ranges;
668 
669   if (!has_range_data_) {
670     if (high_pc_form_ != DW_FORM_addr &&
671         high_pc_form_ != DW_FORM_GNU_addr_index &&
672         high_pc_form_ != DW_FORM_addrx &&
673         high_pc_form_ != DW_FORM_addrx1 &&
674         high_pc_form_ != DW_FORM_addrx2 &&
675         high_pc_form_ != DW_FORM_addrx3 &&
676         high_pc_form_ != DW_FORM_addrx4) {
677       high_pc_ += low_pc_;
678     }
679 
680     Module::Range range(low_pc_, high_pc_ - low_pc_);
681     ranges.push_back(range);
682   } else {
683     RangesHandler* ranges_handler = cu_context_->ranges_handler;
684     if (ranges_handler) {
685       RangeListReader::CURangesInfo cu_info;
686       if (cu_context_->AssembleRangeListInfo(&cu_info)) {
687         if (!ranges_handler->ReadRanges(ranges_form_, ranges_data_,
688                                         &cu_info, &ranges)) {
689           ranges.clear();
690           cu_context_->reporter->MalformedRangeList(ranges_data_);
691         }
692       } else {
693         cu_context_->reporter->MissingRanges();
694       }
695     }
696   }
697 
698   // Ignore DW_TAG_inlined_subroutine with empty range.
699   if (ranges.empty()) {
700     return;
701   }
702 
703   // Every DW_TAG_inlined_subroutine should have a DW_AT_abstract_origin.
704   assert(specification_offset_ != 0);
705 
706   Module::InlineOriginMap& inline_origin_map =
707       cu_context_->file_context->module_
708           ->inline_origin_maps[cu_context_->file_context->filename_];
709   inline_origin_map.SetReference(specification_offset_, specification_offset_);
710   Module::InlineOrigin* origin =
711       inline_origin_map.GetOrCreateInlineOrigin(specification_offset_, name_);
712   unique_ptr<Module::Inline> in(
713       new Module::Inline(origin, ranges, call_site_line_, call_site_file_id_,
714                          inline_nest_level_, std::move(child_inlines_)));
715   inlines_.push_back(std::move(in));
716 }
717 
718 // A handler for DIEs that contain functions and contribute a
719 // component to their names: namespaces, classes, etc.
720 class DwarfCUToModule::NamedScopeHandler: public GenericDIEHandler {
721  public:
NamedScopeHandler(CUContext * cu_context,DIEContext * parent_context,uint64_t offset,bool handle_inline)722   NamedScopeHandler(CUContext* cu_context,
723                     DIEContext* parent_context,
724                     uint64_t offset,
725                     bool handle_inline)
726       : GenericDIEHandler(cu_context, parent_context, offset),
727         handle_inline_(handle_inline) {}
728   bool EndAttributes();
729   DIEHandler* FindChildHandler(uint64_t offset, enum DwarfTag tag);
730 
731  private:
732   DIEContext child_context_; // A context for our children.
733   bool handle_inline_;
734 };
735 
736 // A handler class for DW_TAG_subprogram DIEs.
737 class DwarfCUToModule::FuncHandler: public GenericDIEHandler {
738  public:
FuncHandler(CUContext * cu_context,DIEContext * parent_context,uint64_t offset,bool handle_inline)739   FuncHandler(CUContext* cu_context,
740               DIEContext* parent_context,
741               uint64_t offset,
742               bool handle_inline)
743       : GenericDIEHandler(cu_context, parent_context, offset),
744         low_pc_(0),
745         high_pc_(0),
746         high_pc_form_(DW_FORM_addr),
747         ranges_form_(DW_FORM_sec_offset),
748         ranges_data_(0),
749         inline_(false),
750         handle_inline_(handle_inline),
751         has_qualified_name_(false),
752         has_range_data_(false) {}
753 
754   void ProcessAttributeUnsigned(enum DwarfAttribute attr,
755                                 enum DwarfForm form,
756                                 uint64_t data);
757   void ProcessAttributeSigned(enum DwarfAttribute attr,
758                               enum DwarfForm form,
759                               int64_t data);
760   DIEHandler* FindChildHandler(uint64_t offset, enum DwarfTag tag);
761   bool EndAttributes();
762   void Finish();
763 
764  private:
765   // The fully-qualified name, as derived from name_attribute_,
766   // specification_, parent_context_.  Computed in EndAttributes.
767   StringView name_;
768   uint64_t low_pc_, high_pc_; // DW_AT_low_pc, DW_AT_high_pc
769   DwarfForm high_pc_form_; // DW_AT_high_pc can be length or address.
770   DwarfForm ranges_form_; // DW_FORM_sec_offset or DW_FORM_rnglistx
771   uint64_t ranges_data_;  // DW_AT_ranges
772   bool inline_;
773   vector<unique_ptr<Module::Inline>> child_inlines_;
774   bool handle_inline_;
775   bool has_qualified_name_;
776   bool has_range_data_;
777   DIEContext child_context_; // A context for our children.
778 };
779 
ProcessAttributeUnsigned(enum DwarfAttribute attr,enum DwarfForm form,uint64_t data)780 void DwarfCUToModule::FuncHandler::ProcessAttributeUnsigned(
781     enum DwarfAttribute attr,
782     enum DwarfForm form,
783     uint64_t data) {
784   switch (attr) {
785     // If this attribute is present at all --- even if its value is
786     // DW_INL_not_inlined --- then GCC may cite it as someone else's
787     // DW_AT_abstract_origin attribute.
788     case DW_AT_inline:      inline_  = true; break;
789 
790     case DW_AT_low_pc:      low_pc_  = data; break;
791     case DW_AT_high_pc:
792       high_pc_form_ = form;
793       high_pc_ = data;
794       break;
795     case DW_AT_ranges:
796       has_range_data_ = true;
797       ranges_data_ = data;
798       ranges_form_ = form;
799       break;
800     default:
801       GenericDIEHandler::ProcessAttributeUnsigned(attr, form, data);
802       break;
803   }
804 }
805 
ProcessAttributeSigned(enum DwarfAttribute attr,enum DwarfForm form,int64_t data)806 void DwarfCUToModule::FuncHandler::ProcessAttributeSigned(
807     enum DwarfAttribute attr,
808     enum DwarfForm form,
809     int64_t data) {
810   switch (attr) {
811     // If this attribute is present at all --- even if its value is
812     // DW_INL_not_inlined --- then GCC may cite it as someone else's
813     // DW_AT_abstract_origin attribute.
814     case DW_AT_inline:      inline_  = true; break;
815 
816     default:
817       break;
818   }
819 }
820 
FindChildHandler(uint64_t offset,enum DwarfTag tag)821 DIEHandler* DwarfCUToModule::FuncHandler::FindChildHandler(
822     uint64_t offset,
823     enum DwarfTag tag) {
824   switch (tag) {
825     case DW_TAG_inlined_subroutine:
826       if (handle_inline_)
827         return new InlineHandler(cu_context_, nullptr, offset, 0,
828                                  child_inlines_);
829     case DW_TAG_class_type:
830     case DW_TAG_structure_type:
831     case DW_TAG_union_type:
832       return new NamedScopeHandler(cu_context_, &child_context_, offset,
833                                    handle_inline_);
834     default:
835       return NULL;
836   }
837 }
838 
EndAttributes()839 bool DwarfCUToModule::FuncHandler::EndAttributes() {
840   // Compute our name, and record a specification, if appropriate.
841   name_ = ComputeQualifiedName(&has_qualified_name_);
842   if (name_.empty() && abstract_origin_) {
843     name_ = abstract_origin_->name;
844   }
845   child_context_.name = name_;
846   if (name_.empty() && no_specification) {
847     cu_context_->reporter->UnknownSpecification(offset_, specification_offset_);
848   }
849   return true;
850 }
851 
Finish()852 void DwarfCUToModule::FuncHandler::Finish() {
853   vector<Module::Range> ranges;
854 
855   // Check if this DIE was one of the forward references that was not able
856   // to be processed, and fix up the name of the appropriate Module::Function.
857   // "name_" will have already been fixed up in EndAttributes().
858   if (!name_.empty()) {
859     auto iter =
860         cu_context_->file_context->file_private_->forward_ref_die_to_func.find(
861             offset_);
862     if (iter !=
863         cu_context_->file_context->file_private_->forward_ref_die_to_func.end())
864       iter->second->name = name_;
865   }
866 
867   if (!has_range_data_) {
868     // Make high_pc_ an address, if it isn't already.
869     if (high_pc_form_ != DW_FORM_addr &&
870         high_pc_form_ != DW_FORM_GNU_addr_index &&
871         high_pc_form_ != DW_FORM_addrx &&
872         high_pc_form_ != DW_FORM_addrx1 &&
873         high_pc_form_ != DW_FORM_addrx2 &&
874         high_pc_form_ != DW_FORM_addrx3 &&
875         high_pc_form_ != DW_FORM_addrx4) {
876       high_pc_ += low_pc_;
877     }
878 
879     Module::Range range(low_pc_, high_pc_ - low_pc_);
880     ranges.push_back(range);
881   } else {
882     RangesHandler* ranges_handler = cu_context_->ranges_handler;
883     if (ranges_handler) {
884       RangeListReader::CURangesInfo cu_info;
885       if (cu_context_->AssembleRangeListInfo(&cu_info)) {
886         if (!ranges_handler->ReadRanges(ranges_form_, ranges_data_,
887                                         &cu_info, &ranges)) {
888           ranges.clear();
889           cu_context_->reporter->MalformedRangeList(ranges_data_);
890         }
891       } else {
892         cu_context_->reporter->MissingRanges();
893       }
894     }
895   }
896 
897   StringView name_omitted =
898       cu_context_->file_context->module_->AddStringToPool("<name omitted>");
899   bool empty_range = IsEmptyRange(ranges);
900   // Did we collect the information we need?  Not all DWARF function
901   // entries are non-empty (for example, inlined functions that were never
902   // used), but all the ones we're interested in cover a non-empty range of
903   // bytes.
904   if (!empty_range) {
905     low_pc_ = ranges.front().address;
906     // Malformed DWARF may omit the name, but all Module::Functions must
907     // have names.
908     StringView name = name_.empty() ? name_omitted : name_;
909     // Create a Module::Function based on the data we've gathered, and
910     // add it to the functions_ list.
911     scoped_ptr<Module::Function> func(new Module::Function(name, low_pc_));
912     func->ranges = ranges;
913     func->parameter_size = 0;
914     // If the name was unqualified, prefer the Extern name if there's a mismatch
915     // (the Extern name will be fully-qualified in that case).
916     func->prefer_extern_name = !has_qualified_name_;
917     if (func->address) {
918       // If the function address is zero this is a sign that this function
919       // description is just empty debug data and should just be discarded.
920       cu_context_->functions.push_back(func.release());
921       if (forward_ref_die_offset_ != 0) {
922         cu_context_->file_context->file_private_
923             ->forward_ref_die_to_func[forward_ref_die_offset_] =
924             cu_context_->functions.back();
925 
926         cu_context_->spec_function_offsets[cu_context_->functions.back()] =
927             forward_ref_die_offset_;
928       }
929 
930       cu_context_->functions.back()->inlines.swap(child_inlines_);
931     }
932   } else if (inline_) {
933     AbstractOrigin origin(name_);
934     cu_context_->file_context->file_private_->origins.insert({offset_, origin});
935   }
936 
937   // Only keep track of DW_TAG_subprogram which have the attributes we are
938   // interested.
939   if (handle_inline_ && (!empty_range || inline_)) {
940     StringView name = name_.empty() ? name_omitted : name_;
941     uint64_t offset =
942         specification_offset_ != 0 ? specification_offset_ : offset_;
943     Module::InlineOriginMap& inline_origin_map =
944         cu_context_->file_context->module_
945             ->inline_origin_maps[cu_context_->file_context->filename_];
946     inline_origin_map.SetReference(offset_, offset);
947     inline_origin_map.GetOrCreateInlineOrigin(offset_, name);
948   }
949 }
950 
EndAttributes()951 bool DwarfCUToModule::NamedScopeHandler::EndAttributes() {
952   child_context_.name = ComputeQualifiedName(NULL);
953   if (child_context_.name.empty() && no_specification) {
954     cu_context_->reporter->UnknownSpecification(offset_, specification_offset_);
955   }
956   return true;
957 }
958 
FindChildHandler(uint64_t offset,enum DwarfTag tag)959 DIEHandler* DwarfCUToModule::NamedScopeHandler::FindChildHandler(
960     uint64_t offset,
961     enum DwarfTag tag) {
962   switch (tag) {
963     case DW_TAG_subprogram:
964       return new FuncHandler(cu_context_, &child_context_, offset,
965                              handle_inline_);
966     case DW_TAG_namespace:
967     case DW_TAG_class_type:
968     case DW_TAG_structure_type:
969     case DW_TAG_union_type:
970       return new NamedScopeHandler(cu_context_, &child_context_, offset,
971                                    handle_inline_);
972     default:
973       return NULL;
974   }
975 }
976 
CUHeading()977 void DwarfCUToModule::WarningReporter::CUHeading() {
978   if (printed_cu_header_)
979     return;
980   fprintf(stderr, "%s: in compilation unit '%s' (offset 0x%" PRIx64 "):\n",
981           filename_.c_str(), cu_name_.c_str(), cu_offset_);
982   printed_cu_header_ = true;
983 }
984 
UnknownSpecification(uint64_t offset,uint64_t target)985 void DwarfCUToModule::WarningReporter::UnknownSpecification(uint64_t offset,
986                                                             uint64_t target) {
987   CUHeading();
988   fprintf(stderr, "%s: the DIE at offset 0x%" PRIx64 " has a "
989           "DW_AT_specification attribute referring to the DIE at offset 0x%"
990           PRIx64 ", which was not marked as a declaration\n",
991           filename_.c_str(), offset, target);
992 }
993 
UnknownAbstractOrigin(uint64_t offset,uint64_t target)994 void DwarfCUToModule::WarningReporter::UnknownAbstractOrigin(uint64_t offset,
995                                                              uint64_t target) {
996   CUHeading();
997   fprintf(stderr, "%s: the DIE at offset 0x%" PRIx64 " has a "
998           "DW_AT_abstract_origin attribute referring to the DIE at offset 0x%"
999           PRIx64 ", which was not marked as an inline\n",
1000           filename_.c_str(), offset, target);
1001 }
1002 
MissingSection(const string & name)1003 void DwarfCUToModule::WarningReporter::MissingSection(const string& name) {
1004   CUHeading();
1005   fprintf(stderr, "%s: warning: couldn't find DWARF '%s' section\n",
1006           filename_.c_str(), name.c_str());
1007 }
1008 
BadLineInfoOffset(uint64_t offset)1009 void DwarfCUToModule::WarningReporter::BadLineInfoOffset(uint64_t offset) {
1010   CUHeading();
1011   fprintf(stderr, "%s: warning: line number data offset beyond end"
1012           " of '.debug_line' section\n",
1013           filename_.c_str());
1014 }
1015 
UncoveredHeading()1016 void DwarfCUToModule::WarningReporter::UncoveredHeading() {
1017   if (printed_unpaired_header_)
1018     return;
1019   CUHeading();
1020   fprintf(stderr, "%s: warning: skipping unpaired lines/functions:\n",
1021           filename_.c_str());
1022   printed_unpaired_header_ = true;
1023 }
1024 
UncoveredFunction(const Module::Function & function)1025 void DwarfCUToModule::WarningReporter::UncoveredFunction(
1026     const Module::Function& function) {
1027   if (!uncovered_warnings_enabled_)
1028     return;
1029   UncoveredHeading();
1030   fprintf(stderr, "    function%s: %s\n",
1031           IsEmptyRange(function.ranges) ? " (zero-length)" : "",
1032           function.name.str().c_str());
1033 }
1034 
UncoveredLine(const Module::Line & line)1035 void DwarfCUToModule::WarningReporter::UncoveredLine(const Module::Line& line) {
1036   if (!uncovered_warnings_enabled_)
1037     return;
1038   UncoveredHeading();
1039   fprintf(stderr, "    line%s: %s:%d at 0x%" PRIx64 "\n",
1040           (line.size == 0 ? " (zero-length)" : ""),
1041           line.file->name.c_str(), line.number, line.address);
1042 }
1043 
UnnamedFunction(uint64_t offset)1044 void DwarfCUToModule::WarningReporter::UnnamedFunction(uint64_t offset) {
1045   CUHeading();
1046   fprintf(stderr, "%s: warning: function at offset 0x%" PRIx64 " has no name\n",
1047           filename_.c_str(), offset);
1048 }
1049 
DemangleError(const string & input)1050 void DwarfCUToModule::WarningReporter::DemangleError(const string& input) {
1051   CUHeading();
1052   fprintf(stderr, "%s: warning: failed to demangle %s\n",
1053           filename_.c_str(), input.c_str());
1054 }
1055 
UnhandledInterCUReference(uint64_t offset,uint64_t target)1056 void DwarfCUToModule::WarningReporter::UnhandledInterCUReference(
1057     uint64_t offset, uint64_t target) {
1058   CUHeading();
1059   fprintf(stderr, "%s: warning: the DIE at offset 0x%" PRIx64 " has a "
1060                   "DW_FORM_ref_addr attribute with an inter-CU reference to "
1061                   "0x%" PRIx64 ", but inter-CU reference handling is turned "
1062                   " off.\n", filename_.c_str(), offset, target);
1063 }
1064 
MalformedRangeList(uint64_t offset)1065 void DwarfCUToModule::WarningReporter::MalformedRangeList(uint64_t offset) {
1066   CUHeading();
1067   fprintf(stderr, "%s: warning: the range list at offset 0x%" PRIx64 " falls "
1068                   " out of the .debug_ranges section.\n",
1069                   filename_.c_str(), offset);
1070 }
1071 
MissingRanges()1072 void DwarfCUToModule::WarningReporter::MissingRanges() {
1073   CUHeading();
1074   fprintf(stderr, "%s: warning: A DW_AT_ranges attribute was encountered but "
1075                   "the .debug_ranges section is missing.\n", filename_.c_str());
1076 }
1077 
DwarfCUToModule(FileContext * file_context,LineToModuleHandler * line_reader,RangesHandler * ranges_handler,WarningReporter * reporter,bool handle_inline,uint64_t low_pc,uint64_t addr_base,bool has_source_line_info,uint64_t source_line_offset)1078 DwarfCUToModule::DwarfCUToModule(FileContext* file_context,
1079                                  LineToModuleHandler* line_reader,
1080                                  RangesHandler* ranges_handler,
1081                                  WarningReporter* reporter,
1082                                  bool handle_inline,
1083                                  uint64_t low_pc,
1084                                  uint64_t addr_base,
1085                                  bool has_source_line_info,
1086                                  uint64_t source_line_offset)
1087     : RootDIEHandler(handle_inline),
1088       line_reader_(line_reader),
1089       cu_context_(new CUContext(file_context,
1090                                 reporter,
1091                                 ranges_handler,
1092                                 low_pc,
1093                                 addr_base)),
1094       child_context_(new DIEContext()),
1095       has_source_line_info_(has_source_line_info),
1096       source_line_offset_(source_line_offset) {}
1097 
~DwarfCUToModule()1098 DwarfCUToModule::~DwarfCUToModule() {
1099 }
1100 
ProcessAttributeSigned(enum DwarfAttribute attr,enum DwarfForm form,int64_t data)1101 void DwarfCUToModule::ProcessAttributeSigned(enum DwarfAttribute attr,
1102                                              enum DwarfForm form,
1103                                              int64_t data) {
1104   switch (attr) {
1105     case DW_AT_language: // source language of this CU
1106       SetLanguage(static_cast<DwarfLanguage>(data));
1107       break;
1108     default:
1109       break;
1110   }
1111 }
1112 
ProcessAttributeUnsigned(enum DwarfAttribute attr,enum DwarfForm form,uint64_t data)1113 void DwarfCUToModule::ProcessAttributeUnsigned(enum DwarfAttribute attr,
1114                                                enum DwarfForm form,
1115                                                uint64_t data) {
1116   switch (attr) {
1117     case DW_AT_stmt_list: // Line number information.
1118       has_source_line_info_ = true;
1119       source_line_offset_ = data;
1120       break;
1121     case DW_AT_language: // source language of this CU
1122       SetLanguage(static_cast<DwarfLanguage>(data));
1123       break;
1124     case DW_AT_low_pc:
1125       cu_context_->low_pc  = data;
1126       break;
1127     case DW_AT_high_pc:
1128       cu_context_->high_pc  = data;
1129       break;
1130     case DW_AT_ranges:
1131       cu_context_->ranges_data = data;
1132       cu_context_->ranges_form = form;
1133       break;
1134     case DW_AT_rnglists_base:
1135       cu_context_->ranges_base = data;
1136       break;
1137     case DW_AT_addr_base:
1138     case DW_AT_GNU_addr_base:
1139       cu_context_->addr_base = data;
1140       break;
1141     case DW_AT_str_offsets_base:
1142       cu_context_->str_offsets_base = data;
1143       break;
1144     default:
1145       break;
1146   }
1147 }
1148 
ProcessAttributeString(enum DwarfAttribute attr,enum DwarfForm form,const string & data)1149 void DwarfCUToModule::ProcessAttributeString(enum DwarfAttribute attr,
1150                                              enum DwarfForm form,
1151                                             const string& data) {
1152   switch (attr) {
1153     case DW_AT_name:
1154       cu_context_->reporter->SetCUName(data);
1155       break;
1156     case DW_AT_comp_dir:
1157       line_reader_->StartCompilationUnit(data);
1158       break;
1159     default:
1160       break;
1161   }
1162 }
1163 
EndAttributes()1164 bool DwarfCUToModule::EndAttributes() {
1165   return true;
1166 }
1167 
FindChildHandler(uint64_t offset,enum DwarfTag tag)1168 DIEHandler* DwarfCUToModule::FindChildHandler(
1169     uint64_t offset,
1170     enum DwarfTag tag) {
1171   switch (tag) {
1172     case DW_TAG_subprogram:
1173       return new FuncHandler(cu_context_.get(), child_context_.get(), offset,
1174                              handle_inline);
1175     case DW_TAG_namespace:
1176     case DW_TAG_class_type:
1177     case DW_TAG_structure_type:
1178     case DW_TAG_union_type:
1179     case DW_TAG_module:
1180       return new NamedScopeHandler(cu_context_.get(), child_context_.get(),
1181                                    offset, handle_inline);
1182     default:
1183       return NULL;
1184   }
1185 }
1186 
SetLanguage(DwarfLanguage language)1187 void DwarfCUToModule::SetLanguage(DwarfLanguage language) {
1188   switch (language) {
1189     case DW_LANG_Java:
1190       cu_context_->language = Language::Java;
1191       break;
1192 
1193     case DW_LANG_Swift:
1194       cu_context_->language = Language::Swift;
1195       break;
1196 
1197     case DW_LANG_Rust:
1198       cu_context_->language = Language::Rust;
1199       break;
1200 
1201     // DWARF has no generic language code for assembly language; this is
1202     // what the GNU toolchain uses.
1203     case DW_LANG_Mips_Assembler:
1204       cu_context_->language = Language::Assembler;
1205       break;
1206 
1207     // C++ covers so many cases that it probably has some way to cope
1208     // with whatever the other languages throw at us. So make it the
1209     // default.
1210     //
1211     // Objective C and Objective C++ seem to create entries for
1212     // methods whose DW_AT_name values are already fully-qualified:
1213     // "-[Classname method:]".  These appear at the top level.
1214     //
1215     // DWARF data for C should never include namespaces or functions
1216     // nested in struct types, but if it ever does, then C++'s
1217     // notation is probably not a bad choice for that.
1218     default:
1219     case DW_LANG_ObjC:
1220     case DW_LANG_ObjC_plus_plus:
1221     case DW_LANG_C:
1222     case DW_LANG_C89:
1223     case DW_LANG_C99:
1224     case DW_LANG_C_plus_plus:
1225       cu_context_->language = Language::CPlusPlus;
1226       break;
1227   }
1228 }
1229 
ReadSourceLines(uint64_t offset)1230 void DwarfCUToModule::ReadSourceLines(uint64_t offset) {
1231   const SectionMap& section_map
1232       = cu_context_->file_context->section_map();
1233   SectionMap::const_iterator map_entry
1234       = GetSectionByName(section_map, ".debug_line");
1235   if (map_entry == section_map.end()) {
1236     cu_context_->reporter->MissingSection(".debug_line");
1237     return;
1238   }
1239   const uint8_t* line_section_start = map_entry->second.first + offset;
1240   uint64_t line_section_length = map_entry->second.second;
1241   if (offset >= line_section_length) {
1242     cu_context_->reporter->BadLineInfoOffset(offset);
1243     return;
1244   }
1245   line_section_length -= offset;
1246   // When reading line tables, string sections are never needed for dwarf4, and
1247   // may or may not be needed by dwarf5, so no error if they are missing.
1248   const uint8_t* string_section_start = nullptr;
1249   uint64_t string_section_length = 0;
1250   map_entry = GetSectionByName(section_map, ".debug_str");
1251   if (map_entry != section_map.end()) {
1252     string_section_start = map_entry->second.first;
1253     string_section_length = map_entry->second.second;
1254   }
1255   const uint8_t* line_string_section_start = nullptr;
1256   uint64_t line_string_section_length = 0;
1257   map_entry = GetSectionByName(section_map, ".debug_line_str");
1258   if (map_entry != section_map.end()) {
1259     line_string_section_start = map_entry->second.first;
1260     line_string_section_length = map_entry->second.second;
1261   }
1262   line_reader_->ReadProgram(
1263       line_section_start, line_section_length,
1264       string_section_start, string_section_length,
1265       line_string_section_start, line_string_section_length,
1266       cu_context_->file_context->module_, &lines_, &files_);
1267 }
1268 
1269 namespace {
1270 class FunctionRange {
1271  public:
FunctionRange(const Module::Range & range,Module::Function * function)1272   FunctionRange(const Module::Range& range, Module::Function* function) :
1273       address(range.address), size(range.size), function(function) { }
1274 
AddLine(Module::Line & line)1275   void AddLine(Module::Line& line) {
1276     function->lines.push_back(line);
1277   }
1278 
1279   Module::Address address;
1280   Module::Address size;
1281   Module::Function* function;
1282 };
1283 
1284 // Fills an array of ranges with pointers to the functions which owns
1285 // them. The array is sorted in ascending order and the ranges are non
1286 // empty and non-overlapping.
1287 
FillSortedFunctionRanges(vector<FunctionRange> & dest_ranges,vector<Module::Function * > * functions)1288 static void FillSortedFunctionRanges(vector<FunctionRange>& dest_ranges,
1289                                      vector<Module::Function*>* functions) {
1290   for (vector<Module::Function*>::const_iterator func_it = functions->cbegin();
1291        func_it != functions->cend();
1292        func_it++)
1293   {
1294     Module::Function* func = *func_it;
1295     vector<Module::Range>& ranges = func->ranges;
1296     for (vector<Module::Range>::const_iterator ranges_it = ranges.cbegin();
1297          ranges_it != ranges.cend();
1298          ++ranges_it) {
1299       FunctionRange range(*ranges_it, func);
1300       if (range.size != 0) {
1301           dest_ranges.push_back(range);
1302       }
1303     }
1304   }
1305 
1306   sort(dest_ranges.begin(), dest_ranges.end(),
1307     [](const FunctionRange& fr1, const FunctionRange& fr2) {
1308       return fr1.address < fr2.address;
1309     }
1310   );
1311 }
1312 
1313 // Return true if ADDRESS falls within the range of ITEM.
1314 template <class T>
within(const T & item,Module::Address address)1315 inline bool within(const T& item, Module::Address address) {
1316   // Because Module::Address is unsigned, and unsigned arithmetic
1317   // wraps around, this will be false if ADDRESS falls before the
1318   // start of ITEM, or if it falls after ITEM's end.
1319   return address - item.address < item.size;
1320 }
1321 }
1322 
AssignLinesToFunctions()1323 void DwarfCUToModule::AssignLinesToFunctions() {
1324   vector<Module::Function*>* functions = &cu_context_->functions;
1325   WarningReporter* reporter = cu_context_->reporter;
1326 
1327   // This would be simpler if we assumed that source line entries
1328   // don't cross function boundaries.  However, there's no real reason
1329   // to assume that (say) a series of function definitions on the same
1330   // line wouldn't get coalesced into one line number entry.  The
1331   // DWARF spec certainly makes no such promises.
1332   //
1333   // So treat the functions and lines as peers, and take the trouble
1334   // to compute their ranges' intersections precisely.  In any case,
1335   // the hair here is a constant factor for performance; the
1336   // complexity from here on out is linear.
1337 
1338   // Put both our functions and lines in order by address.
1339   std::sort(functions->begin(), functions->end(),
1340             Module::Function::CompareByAddress);
1341   std::sort(lines_.begin(), lines_.end(), Module::Line::CompareByAddress);
1342 
1343   // The last line that we used any piece of.  We use this only for
1344   // generating warnings.
1345   const Module::Line* last_line_used = NULL;
1346 
1347   // The last function and line we warned about --- so we can avoid
1348   // doing so more than once.
1349   const Module::Function* last_function_cited = NULL;
1350   const Module::Line* last_line_cited = NULL;
1351 
1352   // Prepare a sorted list of ranges with range-to-function mapping
1353   vector<FunctionRange> sorted_ranges;
1354   FillSortedFunctionRanges(sorted_ranges, functions);
1355 
1356   // Make a single pass through both the range and line vectors from lower to
1357   // higher addresses, populating each range's function lines vector with lines
1358   // from our lines_ vector that fall within the range.
1359   vector<FunctionRange>::iterator range_it = sorted_ranges.begin();
1360   vector<Module::Line>::const_iterator line_it = lines_.begin();
1361 
1362   Module::Address current;
1363 
1364   // Pointers to the referents of func_it and line_it, or NULL if the
1365   // iterator is at the end of the sequence.
1366   FunctionRange* range;
1367   const Module::Line* line;
1368 
1369   // Start current at the beginning of the first line or function,
1370   // whichever is earlier.
1371   if (range_it != sorted_ranges.end() && line_it != lines_.end()) {
1372     range = &*range_it;
1373     line = &*line_it;
1374     current = std::min(range->address, line->address);
1375   } else if (line_it != lines_.end()) {
1376     range = NULL;
1377     line = &*line_it;
1378     current = line->address;
1379   } else if (range_it != sorted_ranges.end()) {
1380     range = &*range_it;
1381     line = NULL;
1382     current = range->address;
1383   } else {
1384     return;
1385   }
1386 
1387   // Some dwarf producers handle linker-removed functions by using -1 as a
1388   // tombstone in the line table. So the end marker can be -1.
1389   if (current == Module::kMaxAddress)
1390     return;
1391 
1392   while (range || line) {
1393     // This loop has two invariants that hold at the top.
1394     //
1395     // First, at least one of the iterators is not at the end of its
1396     // sequence, and those that are not refer to the earliest
1397     // range or line that contains or starts after CURRENT.
1398     //
1399     // Note that every byte is in one of four states: it is covered
1400     // or not covered by a range, and, independently, it is
1401     // covered or not covered by a line.
1402     //
1403     // The second invariant is that CURRENT refers to a byte whose
1404     // state is different from its predecessor, or it refers to the
1405     // first byte in the address space. In other words, CURRENT is
1406     // always the address of a transition.
1407     //
1408     // Note that, although each iteration advances CURRENT from one
1409     // transition address to the next in each iteration, it might
1410     // not advance the iterators. Suppose we have a range that
1411     // starts with a line, has a gap, and then a second line, and
1412     // suppose that we enter an iteration with CURRENT at the end of
1413     // the first line. The next transition address is the start of
1414     // the second line, after the gap, so the iteration should
1415     // advance CURRENT to that point. At the head of that iteration,
1416     // the invariants require that the line iterator be pointing at
1417     // the second line. But this is also true at the head of the
1418     // next. And clearly, the iteration must not change the range
1419     // iterator. So neither iterator moves.
1420 
1421     // Assert the first invariant (see above).
1422     assert(!range || current < range->address || within(*range, current));
1423     assert(!line || current < line->address || within(*line, current));
1424 
1425     // The next transition after CURRENT.
1426     Module::Address next_transition;
1427 
1428     // Figure out which state we're in, add lines or warn, and compute
1429     // the next transition address.
1430     if (range && current >= range->address) {
1431       if (line && current >= line->address) {
1432         // Covered by both a line and a range.
1433         Module::Address range_left = range->size - (current - range->address);
1434         Module::Address line_left = line->size - (current - line->address);
1435         // This may overflow, but things work out.
1436         next_transition = current + std::min(range_left, line_left);
1437         Module::Line l = *line;
1438         l.address = current;
1439         l.size = next_transition - current;
1440         range->AddLine(l);
1441         last_line_used = line;
1442       } else {
1443         // Covered by a range, but no line.
1444         if (range->function != last_function_cited) {
1445           reporter->UncoveredFunction(*(range->function));
1446           last_function_cited = range->function;
1447         }
1448         if (line && within(*range, line->address))
1449           next_transition = line->address;
1450         else
1451           // If this overflows, we'll catch it below.
1452           next_transition = range->address + range->size;
1453       }
1454     } else {
1455       if (line && current >= line->address) {
1456         // Covered by a line, but no range.
1457         //
1458         // If GCC emits padding after one function to align the start
1459         // of the next, then it will attribute the padding
1460         // instructions to the last source line of function (to reduce
1461         // the size of the line number info), but omit it from the
1462         // DW_AT_{low,high}_pc range given in .debug_info (since it
1463         // costs nothing to be precise there). If we did use at least
1464         // some of the line we're about to skip, and it ends at the
1465         // start of the next function, then assume this is what
1466         // happened, and don't warn.
1467         if (line != last_line_cited
1468             && !(range
1469                  && line == last_line_used
1470                  && range->address - line->address == line->size)) {
1471           reporter->UncoveredLine(*line);
1472           last_line_cited = line;
1473         }
1474         if (range && within(*line, range->address))
1475           next_transition = range->address;
1476         else
1477           // If this overflows, we'll catch it below.
1478           next_transition = line->address + line->size;
1479       } else {
1480         // Covered by neither a range nor a line. By the invariant,
1481         // both range and line begin after CURRENT. The next transition
1482         // is the start of the next range or next line, whichever
1483         // is earliest.
1484         assert(range || line);
1485         if (range && line)
1486           next_transition = std::min(range->address, line->address);
1487         else if (range)
1488           next_transition = range->address;
1489         else
1490           next_transition = line->address;
1491       }
1492     }
1493 
1494     // If a function or line abuts the end of the address space, then
1495     // next_transition may end up being zero, in which case we've completed
1496     // our pass. Handle that here, instead of trying to deal with it in
1497     // each place we compute next_transition.
1498 
1499     // Some dwarf producers handle linker-removed functions by using -1 as a
1500     // tombstone in the line table. So the end marker can be -1.
1501     if (!next_transition || next_transition == Module::kMaxAddress)
1502       break;
1503 
1504     // Advance iterators as needed. If lines overlap or functions overlap,
1505     // then we could go around more than once. We don't worry too much
1506     // about what result we produce in that case, just as long as we don't
1507     // hang or crash.
1508     while (range_it != sorted_ranges.end()
1509            && next_transition >= range_it->address
1510            && !within(*range_it, next_transition))
1511       range_it++;
1512     range = (range_it != sorted_ranges.end()) ? &(*range_it) : NULL;
1513     while (line_it != lines_.end()
1514            && next_transition >= line_it->address
1515            && !within(*line_it, next_transition))
1516       line_it++;
1517     line = (line_it != lines_.end()) ? &*line_it : NULL;
1518 
1519     // We must make progress.
1520     assert(next_transition > current);
1521     current = next_transition;
1522   }
1523 }
1524 
AssignFilesToInlines()1525 void DwarfCUToModule::AssignFilesToInlines() {
1526   // Assign File* to Inlines inside this CU.
1527   auto assignFile = [this](unique_ptr<Module::Inline>& in) {
1528     in->call_site_file = files_[in->call_site_file_id];
1529   };
1530   for (auto func : cu_context_->functions) {
1531     Module::Inline::InlineDFS(func->inlines, assignFile);
1532   }
1533 }
1534 
Finish()1535 void DwarfCUToModule::Finish() {
1536   // Assembly language files have no function data, and that gives us
1537   // no place to store our line numbers (even though the GNU toolchain
1538   // will happily produce source line info for assembly language
1539   // files).  To avoid spurious warnings about lines we can't assign
1540   // to functions, skip CUs in languages that lack functions.
1541   if (!cu_context_->language->HasFunctions())
1542     return;
1543 
1544   // Read source line info, if we have any.
1545   if (has_source_line_info_)
1546     ReadSourceLines(source_line_offset_);
1547 
1548   vector<Module::Function*>* functions = &cu_context_->functions;
1549 
1550   // Dole out lines to the appropriate functions.
1551   AssignLinesToFunctions();
1552 
1553   AssignFilesToInlines();
1554 
1555   // Add our functions, which now have source lines assigned to them,
1556   // to module_, and remove duplicate functions.
1557   for (Module::Function* func : *functions)
1558     if (!cu_context_->file_context->module_->AddFunction(func)) {
1559       auto iter = cu_context_->spec_function_offsets.find(func);
1560       if (iter != cu_context_->spec_function_offsets.end())
1561         cu_context_->file_context->file_private_->forward_ref_die_to_func.erase(
1562             iter->second);
1563       delete func;
1564     }
1565 
1566   // Ownership of the function objects has shifted from cu_context to
1567   // the Module.
1568   functions->clear();
1569 
1570   cu_context_->file_context->ClearSpecifications();
1571 }
1572 
StartCompilationUnit(uint64_t offset,uint8_t address_size,uint8_t offset_size,uint64_t cu_length,uint8_t dwarf_version)1573 bool DwarfCUToModule::StartCompilationUnit(uint64_t offset,
1574                                            uint8_t address_size,
1575                                            uint8_t offset_size,
1576                                            uint64_t cu_length,
1577                                            uint8_t dwarf_version) {
1578   cu_context_->version = dwarf_version;
1579   return dwarf_version >= 2;
1580 }
1581 
StartRootDIE(uint64_t offset,enum DwarfTag tag)1582 bool DwarfCUToModule::StartRootDIE(uint64_t offset, enum DwarfTag tag) {
1583   // We don't deal with partial compilation units (the only other tag
1584   // likely to be used for root DIE).
1585   return (tag == DW_TAG_compile_unit
1586 	  || tag == DW_TAG_skeleton_unit);
1587 }
1588 
1589 } // namespace google_breakpad
1590