xref: /aosp_15_r20/external/google-breakpad/src/common/dwarf_line_to_module.h (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1 // -*- mode: c++ -*-
2 
3 // Copyright 2010 Google LLC
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google LLC nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Original author: Jim Blandy <[email protected]> <[email protected]>
32 
33 // The DwarfLineToModule class accepts line number information from a
34 // DWARF parser and adds it to a google_breakpad::Module. The Module
35 // can write that data out as a Breakpad symbol file.
36 
37 #ifndef COMMON_LINUX_DWARF_LINE_TO_MODULE_H
38 #define COMMON_LINUX_DWARF_LINE_TO_MODULE_H
39 
40 #include <string>
41 
42 #include "common/module.h"
43 #include "common/dwarf/dwarf2reader.h"
44 #include "common/using_std_string.h"
45 
46 namespace google_breakpad {
47 
48 // A class for producing a vector of google_breakpad::Module::Line
49 // instances from parsed DWARF line number data.
50 //
51 // An instance of this class can be provided as a handler to a
52 // LineInfo DWARF line number information parser. The
53 // handler accepts source location information from the parser and
54 // uses it to produce a vector of google_breakpad::Module::Line
55 // objects, referring to google_breakpad::Module::File objects added
56 // to a particular google_breakpad::Module.
57 //
58 // GNU toolchain omitted sections support:
59 // ======================================
60 //
61 // Given the right options, the GNU toolchain will omit unreferenced
62 // functions from the final executable. Unfortunately, when it does so, it
63 // does not remove the associated portions of the DWARF line number
64 // program; instead, it gives the DW_LNE_set_address instructions referring
65 // to the now-deleted code addresses of zero. Given this input, the DWARF
66 // line parser will call AddLine with a series of lines starting at address
67 // zero. For example, here is the output from 'readelf -wl' for a program
68 // with four functions, the first three of which have been omitted:
69 //
70 //   Line Number Statements:
71 //    Extended opcode 2: set Address to 0x0
72 //    Advance Line by 14 to 15
73 //    Copy
74 //    Special opcode 48: advance Address by 3 to 0x3 and Line by 1 to 16
75 //    Special opcode 119: advance Address by 8 to 0xb and Line by 2 to 18
76 //    Advance PC by 2 to 0xd
77 //    Extended opcode 1: End of Sequence
78 //
79 //    Extended opcode 2: set Address to 0x0
80 //    Advance Line by 14 to 15
81 //    Copy
82 //    Special opcode 48: advance Address by 3 to 0x3 and Line by 1 to 16
83 //    Special opcode 119: advance Address by 8 to 0xb and Line by 2 to 18
84 //    Advance PC by 2 to 0xd
85 //    Extended opcode 1: End of Sequence
86 //
87 //    Extended opcode 2: set Address to 0x0
88 //    Advance Line by 19 to 20
89 //    Copy
90 //    Special opcode 48: advance Address by 3 to 0x3 and Line by 1 to 21
91 //    Special opcode 76: advance Address by 5 to 0x8 and Line by 1 to 22
92 //    Advance PC by 2 to 0xa
93 //    Extended opcode 1: End of Sequence
94 //
95 //    Extended opcode 2: set Address to 0x80483a4
96 //    Advance Line by 23 to 24
97 //    Copy
98 //    Special opcode 202: advance Address by 14 to 0x80483b2 and Line by 1 to 25
99 //    Special opcode 76: advance Address by 5 to 0x80483b7 and Line by 1 to 26
100 //    Advance PC by 6 to 0x80483bd
101 //    Extended opcode 1: End of Sequence
102 //
103 // Instead of collecting runs of lines describing code that is not there,
104 // we try to recognize and drop them. Since the linker doesn't explicitly
105 // distinguish references to dropped sections from genuine references to
106 // code at address zero, we must use a heuristic. We have chosen:
107 //
108 // - If a line starts at address zero, omit it. (On the platforms
109 //   breakpad targets, it is extremely unlikely that there will be code
110 //   at address zero.)
111 //
112 // - If a line starts immediately after an omitted line, omit it too.
113 class DwarfLineToModule: public LineInfoHandler {
114  public:
115   // As the DWARF line info parser passes us line records, add source
116   // files to MODULE, and add all lines to the end of LINES. LINES
117   // need not be empty. If the parser hands us a zero-length line, we
118   // omit it. If the parser hands us a line that extends beyond the
119   // end of the address space, we clip it. It's up to our client to
120   // sort out which lines belong to which functions; we don't add them
121   // to any particular function in MODULE ourselves.
DwarfLineToModule(Module * module,const string & compilation_dir,vector<Module::Line> * lines,std::map<uint32_t,Module::File * > * files)122   DwarfLineToModule(Module* module,
123                     const string& compilation_dir,
124                     vector<Module::Line>* lines,
125                     std::map<uint32_t, Module::File*>* files)
126       : module_(module),
127         compilation_dir_(compilation_dir),
128         lines_(lines),
129         files_(files),
130         highest_file_number_(-1),
131         omitted_line_end_(0),
132         warned_bad_file_number_(false),
133         warned_bad_directory_number_(false) { }
134 
~DwarfLineToModule()135   ~DwarfLineToModule() { }
136 
137   void DefineDir(const string& name, uint32_t dir_num);
138   void DefineFile(const string& name, int32_t file_num,
139                   uint32_t dir_num, uint64_t mod_time,
140                   uint64_t length);
141   void AddLine(uint64_t address, uint64_t length,
142                uint32_t file_num, uint32_t line_num, uint32_t column_num);
143 
144  private:
145 
146   typedef std::map<uint32_t, string> DirectoryTable;
147   typedef std::map<uint32_t, Module::File*> FileTable;
148 
149   // The module we're contributing debugging info to. Owned by our
150   // client.
151   Module *module_;
152 
153   // The compilation directory for the current compilation unit whose
154   // lines are being accumulated.
155   string compilation_dir_;
156 
157   // The vector of lines we're accumulating. Owned by our client.
158   //
159   // In a Module, as in a breakpad symbol file, lines belong to
160   // specific functions, but DWARF simply assigns lines to addresses;
161   // one must infer the line/function relationship using the
162   // functions' beginning and ending addresses. So we can't add these
163   // to the appropriate function from module_ until we've read the
164   // function info as well. Instead, we accumulate lines here, and let
165   // whoever constructed this sort it all out.
166   vector<Module::Line>* lines_;
167 
168   // A table mapping directory numbers to paths.
169   DirectoryTable directories_;
170 
171   // A table mapping file numbers to Module::File pointers.
172   FileTable* files_;
173 
174   // The highest file number we've seen so far, or -1 if we've seen
175   // none.  Used for dynamically defined file numbers.
176   int32_t highest_file_number_;
177 
178   // This is the ending address of the last line we omitted, or zero if we
179   // didn't omit the previous line. It is zero before we have received any
180   // AddLine calls.
181   uint64_t omitted_line_end_;
182 
183   // True if we've warned about:
184   bool warned_bad_file_number_; // bad file numbers
185   bool warned_bad_directory_number_; // bad directory numbers
186 };
187 
188 } // namespace google_breakpad
189 
190 #endif // COMMON_LINUX_DWARF_LINE_TO_MODULE_H
191