xref: /aosp_15_r20/external/google-breakpad/src/common/windows/pdb_source_line_writer.cc (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1 // Copyright 2006 Google LLC
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 //     * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 //     * Neither the name of Google LLC nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 #ifdef HAVE_CONFIG_H
30 #include <config.h>  // Must come first
31 #endif
32 
33 #include "common/windows/pdb_source_line_writer.h"
34 
35 #include <windows.h>
36 #include <winnt.h>
37 #include <atlbase.h>
38 #include <dia2.h>
39 #include <diacreate.h>
40 #include <ImageHlp.h>
41 #include <stdio.h>
42 
43 #include <algorithm>
44 #include <limits>
45 #include <map>
46 #include <memory>
47 #include <set>
48 #include <utility>
49 
50 #include "common/windows/dia_util.h"
51 #include "common/windows/guid_string.h"
52 #include "common/windows/pe_util.h"
53 #include "common/windows/string_utils-inl.h"
54 
55 // This constant may be missing from DbgHelp.h.  See the documentation for
56 // IDiaSymbol::get_undecoratedNameEx.
57 #ifndef UNDNAME_NO_ECSU
58 #define UNDNAME_NO_ECSU 0x8000  // Suppresses enum/class/struct/union.
59 #endif  // UNDNAME_NO_ECSU
60 
61 namespace google_breakpad {
62 
63 namespace {
64 
65 using std::set;
66 using std::unique_ptr;
67 using std::vector;
68 
69 // The symbol (among possibly many) selected to represent an rva.
70 struct SelectedSymbol {
SelectedSymbolgoogle_breakpad::__anon4ddd43a00111::SelectedSymbol71   SelectedSymbol(const CComPtr<IDiaSymbol>& symbol, bool is_public)
72       : symbol(symbol), is_public(is_public), is_multiple(false) {}
73 
74   // The symbol to use for an rva.
75   CComPtr<IDiaSymbol> symbol;
76   // Whether this is a public or function symbol.
77   bool is_public;
78   // Whether the rva has multiple associated symbols. An rva will correspond to
79   // multiple symbols in the case of linker identical symbol folding.
80   bool is_multiple;
81 };
82 
83 // Maps rva to the symbol to use for that address.
84 typedef std::map<DWORD, SelectedSymbol> SymbolMap;
85 
86 // Record this in the map as the selected symbol for the rva if it satisfies the
87 // necessary conditions.
MaybeRecordSymbol(DWORD rva,const CComPtr<IDiaSymbol> symbol,bool is_public,SymbolMap * map)88 void MaybeRecordSymbol(DWORD rva,
89                        const CComPtr<IDiaSymbol> symbol,
90                        bool is_public,
91                        SymbolMap* map) {
92   SymbolMap::iterator loc = map->find(rva);
93   if (loc == map->end()) {
94     map->insert(std::make_pair(rva, SelectedSymbol(symbol, is_public)));
95     return;
96   }
97 
98   // Prefer function symbols to public symbols.
99   if (is_public && !loc->second.is_public) {
100     return;
101   }
102 
103   loc->second.is_multiple = true;
104 
105   // Take the 'least' symbol by lexicographical order of the decorated name. We
106   // use the decorated rather than undecorated name because computing the latter
107   // is expensive.
108   BSTR current_name, new_name;
109   loc->second.symbol->get_name(&current_name);
110   symbol->get_name(&new_name);
111   if (wcscmp(new_name, current_name) < 0) {
112     loc->second.symbol = symbol;
113     loc->second.is_public = is_public;
114   }
115 }
116 
117 
118 
SymbolsMatch(IDiaSymbol * a,IDiaSymbol * b)119 bool SymbolsMatch(IDiaSymbol* a, IDiaSymbol* b) {
120   DWORD a_section, a_offset, b_section, b_offset;
121   if (FAILED(a->get_addressSection(&a_section)) ||
122       FAILED(a->get_addressOffset(&a_offset)) ||
123       FAILED(b->get_addressSection(&b_section)) ||
124       FAILED(b->get_addressOffset(&b_offset)))
125     return false;
126   return a_section == b_section && a_offset == b_offset;
127 }
128 
CreateDiaDataSourceInstance(CComPtr<IDiaDataSource> & data_source)129 bool CreateDiaDataSourceInstance(CComPtr<IDiaDataSource>& data_source) {
130   if (SUCCEEDED(data_source.CoCreateInstance(CLSID_DiaSource))) {
131     return true;
132   }
133 
134   class DECLSPEC_UUID("B86AE24D-BF2F-4ac9-B5A2-34B14E4CE11D") DiaSource100;
135   class DECLSPEC_UUID("761D3BCD-1304-41D5-94E8-EAC54E4AC172") DiaSource110;
136   class DECLSPEC_UUID("3BFCEA48-620F-4B6B-81F7-B9AF75454C7D") DiaSource120;
137   class DECLSPEC_UUID("E6756135-1E65-4D17-8576-610761398C3C") DiaSource140;
138 
139   // If the CoCreateInstance call above failed, msdia*.dll is not registered.
140   // We can try loading the DLL corresponding to the #included DIA SDK, but
141   // the DIA headers don't provide a version. Lets try to figure out which DIA
142   // version we're compiling against by comparing CLSIDs.
143   const wchar_t* msdia_dll = nullptr;
144   if (CLSID_DiaSource == _uuidof(DiaSource100)) {
145     msdia_dll = L"msdia100.dll";
146   } else if (CLSID_DiaSource == _uuidof(DiaSource110)) {
147     msdia_dll = L"msdia110.dll";
148   } else if (CLSID_DiaSource == _uuidof(DiaSource120)) {
149     msdia_dll = L"msdia120.dll";
150   } else if (CLSID_DiaSource == _uuidof(DiaSource140)) {
151     msdia_dll = L"msdia140.dll";
152   }
153 
154   if (msdia_dll &&
155       SUCCEEDED(NoRegCoCreate(msdia_dll, CLSID_DiaSource, IID_IDiaDataSource,
156                               reinterpret_cast<void**>(&data_source)))) {
157     return true;
158   }
159 
160   return false;
161 }
162 
163 const DWORD kUndecorateOptions = UNDNAME_NO_MS_KEYWORDS |
164                                  UNDNAME_NO_FUNCTION_RETURNS |
165                                  UNDNAME_NO_ALLOCATION_MODEL |
166                                  UNDNAME_NO_ALLOCATION_LANGUAGE |
167                                  UNDNAME_NO_THISTYPE |
168                                  UNDNAME_NO_ACCESS_SPECIFIERS |
169                                  UNDNAME_NO_THROW_SIGNATURES |
170                                  UNDNAME_NO_MEMBER_TYPE |
171                                  UNDNAME_NO_RETURN_UDT_MODEL |
172                                  UNDNAME_NO_ECSU;
173 
174 #define arraysize(f) (sizeof(f) / sizeof(*f))
175 
StripLlvmSuffixAndUndecorate(BSTR * name)176 void StripLlvmSuffixAndUndecorate(BSTR* name) {
177   // LLVM sometimes puts a suffix on symbols to give them a globally unique
178   // name. The suffix is either some string preceded by a period (like in the
179   // Itanium ABI; also on Windows this is safe since periods are otherwise
180   // never part of mangled names), or a dollar sign followed by a 32-char hex
181   // string (this should go away in future LLVM versions). Strip such suffixes
182   // and try demangling again.
183   //
184   //
185   // Example symbol names with such suffixes:
186   //
187   //   ?foo@@YAXXZ$5520c83448162c04f2b239db4b5a2c61
188   //   ?foo@@YAXXZ.llvm.13040715209719948753
189 
190   if (**name != L'?')
191     return;  // The name is already demangled.
192 
193   for (size_t i = 0, len = wcslen(*name); i < len; i++) {
194     wchar_t c = (*name)[i];
195 
196     if (c == L'.' || (c == L'$' && len - i == 32 + 1)) {
197       (*name)[i] = L'\0';
198       wchar_t undecorated[1024];
199       DWORD res = UnDecorateSymbolNameW(*name, undecorated,
200                                         arraysize(undecorated),
201                                         kUndecorateOptions);
202       if (res == 0 || undecorated[0] == L'?') {
203         // Demangling failed; restore the symbol name and return.
204         (*name)[i] = c;
205         return;
206       }
207 
208       SysFreeString(*name);
209       *name = SysAllocString(undecorated);
210       return;
211     }
212   }
213 }
214 
215 // Prints the error message related to the error code as seen in
216 // Microsoft's MSVS documentation for loadDataFromPdb and loadDataForExe.
PrintOpenError(HRESULT hr,const char * fn_name,const wchar_t * file)217 void PrintOpenError(HRESULT hr, const char* fn_name, const wchar_t* file) {
218   switch (hr) {
219     case E_PDB_NOT_FOUND:
220       fprintf(stderr, "%s: Failed to open %ws, or the file has an "
221               "invalid format.\n", fn_name, file);
222       break;
223     case E_PDB_FORMAT:
224       fprintf(stderr, "%s: Attempted to access %ws with an obsolete "
225               "format.\n", fn_name, file);
226       break;
227     case E_PDB_INVALID_SIG:
228       fprintf(stderr, "%s: Signature does not match for %ws.\n", fn_name,
229               file);
230       break;
231     case E_PDB_INVALID_AGE:
232       fprintf(stderr, "%s: Age does not match for %ws.\n", fn_name, file);
233       break;
234     case E_INVALIDARG:
235       fprintf(stderr, "%s: Invalid parameter for %ws.\n", fn_name, file);
236       break;
237     case E_UNEXPECTED:
238       fprintf(stderr, "%s: Data source has already been prepared for %ws.\n",
239               fn_name, file);
240       break;
241     default:
242       fprintf(stderr, "%s: Unexpected error 0x%lx, file: %ws.\n",
243               fn_name, hr, file);
244       break;
245   }
246 }
247 
248 }  // namespace
249 
Inline(int inline_nest_level)250 PDBSourceLineWriter::Inline::Inline(int inline_nest_level)
251     : inline_nest_level_(inline_nest_level) {}
252 
SetOriginId(int origin_id)253 void PDBSourceLineWriter::Inline::SetOriginId(int origin_id) {
254   origin_id_ = origin_id;
255 }
256 
ExtendRanges(const Line & line)257 void PDBSourceLineWriter::Inline::ExtendRanges(const Line& line) {
258   if (ranges_.empty()) {
259     ranges_[line.rva] = line.length;
260     return;
261   }
262   auto iter = ranges_.lower_bound(line.rva);
263   // There is no overlap if this function is called with inlinee lines from
264   // the same callsite.
265   if (iter == ranges_.begin()) {
266     return;
267   }
268   if (line.rva + line.length == iter->first) {
269     // If they are connected, merge their ranges into one.
270     DWORD length = line.length + iter->second;
271     ranges_.erase(iter);
272     ranges_[line.rva] = length;
273   } else {
274     --iter;
275     if (iter->first + iter->second == line.rva) {
276       ranges_[iter->first] = iter->second + line.length;
277     } else {
278       ranges_[line.rva] = line.length;
279     }
280   }
281 }
282 
SetCallSiteLine(DWORD call_site_line)283 void PDBSourceLineWriter::Inline::SetCallSiteLine(DWORD call_site_line) {
284   call_site_line_ = call_site_line;
285 }
286 
SetCallSiteFileId(DWORD call_site_file_id)287 void PDBSourceLineWriter::Inline::SetCallSiteFileId(DWORD call_site_file_id) {
288   call_site_file_id_ = call_site_file_id;
289 }
290 
SetChildInlines(vector<unique_ptr<Inline>> child_inlines)291 void PDBSourceLineWriter::Inline::SetChildInlines(
292     vector<unique_ptr<Inline>> child_inlines) {
293   child_inlines_ = std::move(child_inlines);
294 }
295 
Print(FILE * output) const296 void PDBSourceLineWriter::Inline::Print(FILE* output) const {
297   // Ignore INLINE record that doesn't have any range.
298   if (ranges_.empty())
299     return;
300   fprintf(output, "INLINE %d %lu %lu %d", inline_nest_level_, call_site_line_,
301           call_site_file_id_, origin_id_);
302   for (const auto& r : ranges_) {
303     fprintf(output, " %lx %lx", r.first, r.second);
304   }
305   fprintf(output, "\n");
306   for (const unique_ptr<Inline>& in : child_inlines_) {
307     in->Print(output);
308   }
309 }
310 
GetLine(DWORD rva) const311 const PDBSourceLineWriter::Line* PDBSourceLineWriter::Lines::GetLine(
312     DWORD rva) const {
313   auto iter = line_map_.find(rva);
314   if (iter == line_map_.end()) {
315     // If not found exact rva, check if it's within any range.
316     iter = line_map_.lower_bound(rva);
317     if (iter == line_map_.begin())
318       return nullptr;
319     --iter;
320     auto l = iter->second;
321     // This happens when there is no top level lines cover this rva (e.g. empty
322     // lines found for the function). Then we don't know the call site line
323     // number for this inlined function.
324     if (rva >= l.rva + l.length)
325       return nullptr;
326   }
327   return &iter->second;
328 }
329 
GetLineNum(DWORD rva) const330 DWORD PDBSourceLineWriter::Lines::GetLineNum(DWORD rva) const {
331   const Line* line = GetLine(rva);
332   return line ? line->line_num : 0;
333 }
334 
GetFileId(DWORD rva) const335 DWORD PDBSourceLineWriter::Lines::GetFileId(DWORD rva) const {
336   const Line* line = GetLine(rva);
337   return line ? line->file_id : 0;
338 }
339 
AddLine(const Line & line)340 void PDBSourceLineWriter::Lines::AddLine(const Line& line) {
341   if (line_map_.empty()) {
342     line_map_[line.rva] = line;
343     return;
344   }
345 
346   // Given an existing line in line_map_, remove it from line_map_ if it
347   // overlaps with the line and add a new line for the non-overlap range. Return
348   // true if there is an overlap.
349   auto intercept = [&](Line old_line) {
350     DWORD end = old_line.rva + old_line.length;
351     // No overlap.
352     if (old_line.rva >= line.rva + line.length || line.rva >= end)
353       return false;
354     // old_line is within the line.
355     if (old_line.rva >= line.rva && end <= line.rva + line.length) {
356       line_map_.erase(old_line.rva);
357       return true;
358     }
359     // Then there is a overlap.
360     if (old_line.rva < line.rva) {
361       old_line.length -= end - line.rva;
362       if (end > line.rva + line.length) {
363         Line new_line = old_line;
364         new_line.rva = line.rva + line.length;
365         new_line.length = end - new_line.rva;
366         line_map_[new_line.rva] = new_line;
367       }
368     } else {
369       line_map_.erase(old_line.rva);
370       old_line.length -= line.rva + line.length - old_line.rva;
371       old_line.rva = line.rva + line.length;
372     }
373     line_map_[old_line.rva] = old_line;
374     return true;
375   };
376 
377   bool is_intercept;
378   // Use a loop in cases that there are multiple lines within the given line.
379   do {
380     auto iter = line_map_.lower_bound(line.rva);
381     if (iter == line_map_.end()) {
382       if (!line_map_.empty()) {
383         --iter;
384         intercept(iter->second);
385       }
386       break;
387     }
388     is_intercept = false;
389     if (iter != line_map_.begin()) {
390       // Check if the given line overlaps a line with smaller in the map.
391       auto prev = line_map_.lower_bound(line.rva);
392       --prev;
393       is_intercept = intercept(prev->second);
394     }
395     // Check if the given line overlaps a line with greater or equal rva in the
396     // map. Using operator |= here since it's possible that there are multiple
397     // lines with greater rva in the map overlap with the given line.
398     is_intercept |= intercept(iter->second);
399   } while (is_intercept);
400   line_map_[line.rva] = line;
401 }
402 
PDBSourceLineWriter(bool handle_inline)403 PDBSourceLineWriter::PDBSourceLineWriter(bool handle_inline)
404     : output_(NULL), handle_inline_(handle_inline) {}
405 
~PDBSourceLineWriter()406 PDBSourceLineWriter::~PDBSourceLineWriter() {
407   Close();
408 }
409 
SetCodeFile(const wstring & exe_file)410 bool PDBSourceLineWriter::SetCodeFile(const wstring& exe_file) {
411   if (code_file_.empty()) {
412     code_file_ = exe_file;
413     return true;
414   }
415   // Setting a different code file path is an error.  It is success only if the
416   // file paths are the same.
417   return exe_file == code_file_;
418 }
419 
Open(const wstring & file,FileFormat format)420 bool PDBSourceLineWriter::Open(const wstring& file, FileFormat format) {
421   Close();
422   code_file_.clear();
423 
424   if (FAILED(CoInitialize(NULL))) {
425     fprintf(stderr, "CoInitialize failed\n");
426     return false;
427   }
428 
429   CComPtr<IDiaDataSource> data_source;
430   if (!CreateDiaDataSourceInstance(data_source)) {
431     const int kGuidSize = 64;
432     wchar_t classid[kGuidSize] = {0};
433     StringFromGUID2(CLSID_DiaSource, classid, kGuidSize);
434     fprintf(stderr, "CoCreateInstance CLSID_DiaSource %S failed "
435             "(msdia*.dll unregistered?)\n", classid);
436     return false;
437   }
438 
439   HRESULT from_pdb_result;
440   HRESULT for_exe_result;
441   const wchar_t* file_name = file.c_str();
442   switch (format) {
443     case PDB_FILE:
444       from_pdb_result = data_source->loadDataFromPdb(file_name);
445       if (FAILED(from_pdb_result)) {
446         PrintOpenError(from_pdb_result, "loadDataFromPdb", file_name);
447         return false;
448       }
449       break;
450     case EXE_FILE:
451       for_exe_result = data_source->loadDataForExe(file_name, NULL, NULL);
452       if (FAILED(for_exe_result)) {
453         PrintOpenError(for_exe_result, "loadDataForExe", file_name);
454         return false;
455       }
456       code_file_ = file;
457       break;
458     case ANY_FILE:
459       from_pdb_result = data_source->loadDataFromPdb(file_name);
460       if (FAILED(from_pdb_result)) {
461         for_exe_result = data_source->loadDataForExe(file_name, NULL, NULL);
462         if (FAILED(for_exe_result)) {
463           PrintOpenError(from_pdb_result, "loadDataFromPdb", file_name);
464           PrintOpenError(for_exe_result, "loadDataForExe", file_name);
465           return false;
466         }
467         code_file_ = file;
468       }
469       break;
470     default:
471       fprintf(stderr, "Unknown file format\n");
472       return false;
473   }
474 
475   if (FAILED(data_source->openSession(&session_))) {
476     fprintf(stderr, "openSession failed\n");
477   }
478 
479   return true;
480 }
481 
GetLine(IDiaLineNumber * dia_line,Line * line) const482 bool PDBSourceLineWriter::GetLine(IDiaLineNumber* dia_line, Line* line) const {
483   if (FAILED(dia_line->get_relativeVirtualAddress(&line->rva))) {
484     fprintf(stderr, "failed to get line rva\n");
485     return false;
486   }
487 
488   if (FAILED(dia_line->get_length(&line->length))) {
489     fprintf(stderr, "failed to get line code length\n");
490     return false;
491   }
492 
493   DWORD dia_source_id;
494   if (FAILED(dia_line->get_sourceFileId(&dia_source_id))) {
495     fprintf(stderr, "failed to get line source file id\n");
496     return false;
497   }
498   // duplicate file names are coalesced to share one ID
499   line->file_id = GetRealFileID(dia_source_id);
500 
501   if (FAILED(dia_line->get_lineNumber(&line->line_num))) {
502     fprintf(stderr, "failed to get line number\n");
503     return false;
504   }
505   return true;
506 }
507 
GetLines(IDiaEnumLineNumbers * lines,Lines * line_list) const508 bool PDBSourceLineWriter::GetLines(IDiaEnumLineNumbers* lines,
509                                    Lines* line_list) const {
510   CComPtr<IDiaLineNumber> line;
511   ULONG count;
512 
513   while (SUCCEEDED(lines->Next(1, &line, &count)) && count == 1) {
514     Line l;
515     if (!GetLine(line, &l))
516       return false;
517     // Silently ignore zero-length lines.
518     if (l.length != 0)
519       line_list->AddLine(l);
520     line.Release();
521   }
522   return true;
523 }
524 
PrintLines(const Lines & lines) const525 void PDBSourceLineWriter::PrintLines(const Lines& lines) const {
526   // The line number format is:
527   // <rva> <line number> <source file id>
528   for (const auto& kv : lines.GetLineMap()) {
529     const Line& l = kv.second;
530     AddressRangeVector ranges;
531     MapAddressRange(image_map_, AddressRange(l.rva, l.length), &ranges);
532     for (auto& range : ranges) {
533       fprintf(output_, "%lx %lx %lu %lu\n", range.rva, range.length, l.line_num,
534               l.file_id);
535     }
536   }
537 }
538 
PrintFunction(IDiaSymbol * function,IDiaSymbol * block,bool has_multiple_symbols)539 bool PDBSourceLineWriter::PrintFunction(IDiaSymbol* function,
540                                         IDiaSymbol* block,
541                                         bool has_multiple_symbols) {
542   // The function format is:
543   // FUNC <address> <length> <param_stack_size> <function>
544   DWORD rva;
545   if (FAILED(block->get_relativeVirtualAddress(&rva))) {
546     fprintf(stderr, "couldn't get rva\n");
547     return false;
548   }
549 
550   ULONGLONG length;
551   if (FAILED(block->get_length(&length))) {
552     fprintf(stderr, "failed to get function length\n");
553     return false;
554   }
555 
556   if (length == 0) {
557     // Silently ignore zero-length functions, which can infrequently pop up.
558     return true;
559   }
560 
561   CComBSTR name;
562   int stack_param_size;
563   if (!GetSymbolFunctionName(function, &name, &stack_param_size)) {
564     return false;
565   }
566 
567   // If the decorated name didn't give the parameter size, try to
568   // calculate it.
569   if (stack_param_size < 0) {
570     stack_param_size = GetFunctionStackParamSize(function);
571   }
572 
573   AddressRangeVector ranges;
574   MapAddressRange(image_map_, AddressRange(rva, static_cast<DWORD>(length)),
575                   &ranges);
576   for (size_t i = 0; i < ranges.size(); ++i) {
577     const char* optional_multiple_field = has_multiple_symbols ? "m " : "";
578     fprintf(output_, "FUNC %s%lx %lx %x %ws\n", optional_multiple_field,
579             ranges[i].rva, ranges[i].length, stack_param_size, name.m_str);
580   }
581 
582   CComPtr<IDiaEnumLineNumbers> lines;
583   if (FAILED(session_->findLinesByRVA(rva, DWORD(length), &lines))) {
584     return false;
585   }
586 
587   // Get top level lines first, which later may be split into multiple smaller
588   // lines if any inline exists in their ranges if we want to handle inline.
589   Lines line_list;
590   if (!GetLines(lines, &line_list)) {
591     return false;
592   }
593   if (handle_inline_) {
594     vector<unique_ptr<Inline>> inlines;
595     if (!GetInlines(block, &line_list, 0, &inlines)) {
596       return false;
597     }
598     PrintInlines(inlines);
599   }
600   PrintLines(line_list);
601   return true;
602 }
603 
PrintSourceFiles()604 bool PDBSourceLineWriter::PrintSourceFiles() {
605   CComPtr<IDiaSymbol> global;
606   if (FAILED(session_->get_globalScope(&global))) {
607     fprintf(stderr, "get_globalScope failed\n");
608     return false;
609   }
610 
611   CComPtr<IDiaEnumSymbols> compilands;
612   if (FAILED(global->findChildren(SymTagCompiland, NULL,
613                                   nsNone, &compilands))) {
614     fprintf(stderr, "findChildren failed\n");
615     return false;
616   }
617 
618   // Print a dummy file with id equals 0 to represent unknown file, because
619   // inline records might have unknown call site.
620   fwprintf(output_, L"FILE %d unknown file\n", 0);
621 
622   CComPtr<IDiaSymbol> compiland;
623   ULONG count;
624   while (SUCCEEDED(compilands->Next(1, &compiland, &count)) && count == 1) {
625     CComPtr<IDiaEnumSourceFiles> source_files;
626     if (FAILED(session_->findFile(compiland, NULL, nsNone, &source_files))) {
627       return false;
628     }
629     CComPtr<IDiaSourceFile> file;
630     while (SUCCEEDED(source_files->Next(1, &file, &count)) && count == 1) {
631       DWORD file_id;
632       if (FAILED(file->get_uniqueId(&file_id))) {
633         return false;
634       }
635 
636       CComBSTR file_name;
637       if (FAILED(file->get_fileName(&file_name))) {
638         return false;
639       }
640 
641       wstring file_name_string(file_name);
642       if (!FileIDIsCached(file_name_string)) {
643         // this is a new file name, cache it and output a FILE line.
644         CacheFileID(file_name_string, file_id);
645         fwprintf(output_, L"FILE %d %ws\n", file_id, file_name_string.c_str());
646       } else {
647         // this file name has already been seen, just save this
648         // ID for later lookup.
649         StoreDuplicateFileID(file_name_string, file_id);
650       }
651       file.Release();
652     }
653     compiland.Release();
654   }
655   return true;
656 }
657 
PrintFunctions()658 bool PDBSourceLineWriter::PrintFunctions() {
659   ULONG count = 0;
660   DWORD rva = 0;
661   CComPtr<IDiaSymbol> global;
662   HRESULT hr;
663 
664   if (FAILED(session_->get_globalScope(&global))) {
665     fprintf(stderr, "get_globalScope failed\n");
666     return false;
667   }
668 
669   CComPtr<IDiaEnumSymbols> symbols = NULL;
670 
671   // Find all function symbols first.
672   SymbolMap rva_symbol;
673   hr = global->findChildren(SymTagFunction, NULL, nsNone, &symbols);
674 
675   if (SUCCEEDED(hr)) {
676     CComPtr<IDiaSymbol> symbol = NULL;
677 
678     while (SUCCEEDED(symbols->Next(1, &symbol, &count)) && count == 1) {
679       if (SUCCEEDED(symbol->get_relativeVirtualAddress(&rva))) {
680         // Potentially record this as the canonical symbol for this rva.
681         MaybeRecordSymbol(rva, symbol, false, &rva_symbol);
682       } else {
683         fprintf(stderr, "get_relativeVirtualAddress failed on the symbol\n");
684         return false;
685       }
686 
687       symbol.Release();
688     }
689 
690     symbols.Release();
691   }
692 
693   // Find all public symbols and record public symbols that are not also private
694   // symbols.
695   hr = global->findChildren(SymTagPublicSymbol, NULL, nsNone, &symbols);
696 
697   if (SUCCEEDED(hr)) {
698     CComPtr<IDiaSymbol> symbol = NULL;
699 
700     while (SUCCEEDED(symbols->Next(1, &symbol, &count)) && count == 1) {
701       if (SUCCEEDED(symbol->get_relativeVirtualAddress(&rva))) {
702         // Potentially record this as the canonical symbol for this rva.
703         MaybeRecordSymbol(rva, symbol, true, &rva_symbol);
704       } else {
705         fprintf(stderr, "get_relativeVirtualAddress failed on the symbol\n");
706         return false;
707       }
708 
709       symbol.Release();
710     }
711 
712     symbols.Release();
713   }
714 
715   // For each rva, dump the selected symbol at the address.
716   SymbolMap::iterator it;
717   for (it = rva_symbol.begin(); it != rva_symbol.end(); ++it) {
718     CComPtr<IDiaSymbol> symbol = it->second.symbol;
719     // Only print public symbols if there is no function symbol for the address.
720     if (!it->second.is_public) {
721       if (!PrintFunction(symbol, symbol, it->second.is_multiple))
722         return false;
723     } else {
724       if (!PrintCodePublicSymbol(symbol, it->second.is_multiple))
725         return false;
726     }
727   }
728 
729   // When building with PGO, the compiler can split functions into
730   // "hot" and "cold" blocks, and move the "cold" blocks out to separate
731   // pages, so the function can be noncontiguous. To find these blocks,
732   // we have to iterate over all the compilands, and then find blocks
733   // that are children of them. We can then find the lexical parents
734   // of those blocks and print out an extra FUNC line for blocks
735   // that are not contained in their parent functions.
736   CComPtr<IDiaEnumSymbols> compilands;
737   if (FAILED(global->findChildren(SymTagCompiland, NULL,
738                                   nsNone, &compilands))) {
739     fprintf(stderr, "findChildren failed on the global\n");
740     return false;
741   }
742 
743   CComPtr<IDiaSymbol> compiland;
744   while (SUCCEEDED(compilands->Next(1, &compiland, &count)) && count == 1) {
745     CComPtr<IDiaEnumSymbols> blocks;
746     if (FAILED(compiland->findChildren(SymTagBlock, NULL,
747                                        nsNone, &blocks))) {
748       fprintf(stderr, "findChildren failed on a compiland\n");
749       return false;
750     }
751 
752     CComPtr<IDiaSymbol> block;
753     while (SUCCEEDED(blocks->Next(1, &block, &count)) && count == 1) {
754       // find this block's lexical parent function
755       CComPtr<IDiaSymbol> parent;
756       DWORD tag;
757       if (SUCCEEDED(block->get_lexicalParent(&parent)) &&
758           SUCCEEDED(parent->get_symTag(&tag)) &&
759           tag == SymTagFunction) {
760         // now get the block's offset and the function's offset and size,
761         // and determine if the block is outside of the function
762         DWORD func_rva, block_rva;
763         ULONGLONG func_length;
764         if (SUCCEEDED(block->get_relativeVirtualAddress(&block_rva)) &&
765             SUCCEEDED(parent->get_relativeVirtualAddress(&func_rva)) &&
766             SUCCEEDED(parent->get_length(&func_length))) {
767           if (block_rva < func_rva || block_rva > (func_rva + func_length)) {
768             if (!PrintFunction(parent, block, false)) {
769               return false;
770             }
771           }
772         }
773       }
774       parent.Release();
775       block.Release();
776     }
777     blocks.Release();
778     compiland.Release();
779   }
780 
781   global.Release();
782   return true;
783 }
784 
PrintInlineOrigins() const785 void PDBSourceLineWriter::PrintInlineOrigins() const {
786   struct OriginCompare {
787     bool operator()(const InlineOrigin lhs, const InlineOrigin rhs) const {
788       return lhs.id < rhs.id;
789     }
790   };
791   set<InlineOrigin, OriginCompare> origins;
792   // Sort by origin id.
793   for (auto const& origin : inline_origins_)
794     origins.insert(origin.second);
795   for (auto o : origins) {
796     fprintf(output_, "INLINE_ORIGIN %d %ls\n", o.id, o.name.c_str());
797   }
798 }
799 
GetInlines(IDiaSymbol * block,Lines * line_list,int inline_nest_level,vector<unique_ptr<Inline>> * inlines)800 bool PDBSourceLineWriter::GetInlines(IDiaSymbol* block,
801                                      Lines* line_list,
802                                      int inline_nest_level,
803                                      vector<unique_ptr<Inline>>* inlines) {
804   CComPtr<IDiaEnumSymbols> inline_callsites;
805   if (FAILED(block->findChildrenEx(SymTagInlineSite, nullptr, nsNone,
806                                    &inline_callsites))) {
807     return false;
808   }
809   ULONG count;
810   CComPtr<IDiaSymbol> callsite;
811   while (SUCCEEDED(inline_callsites->Next(1, &callsite, &count)) &&
812          count == 1) {
813     unique_ptr<Inline> new_inline(new Inline(inline_nest_level));
814     CComPtr<IDiaEnumLineNumbers> lines;
815     // All inlinee lines have the same file id.
816     DWORD file_id = 0;
817     DWORD call_site_line = 0;
818     if (FAILED(session_->findInlineeLines(callsite, &lines))) {
819       return false;
820     }
821     CComPtr<IDiaLineNumber> dia_line;
822     while (SUCCEEDED(lines->Next(1, &dia_line, &count)) && count == 1) {
823       Line line;
824       if (!GetLine(dia_line, &line)) {
825         return false;
826       }
827       // Silently ignore zero-length lines.
828       if (line.length != 0) {
829         // Use the first line num and file id at rva as this inline's call site
830         // line number, because after adding lines it may be changed to inner
831         // line number and inner file id.
832         if (call_site_line == 0)
833           call_site_line = line_list->GetLineNum(line.rva);
834         if (file_id == 0)
835           file_id = line_list->GetFileId(line.rva);
836         line_list->AddLine(line);
837         new_inline->ExtendRanges(line);
838       }
839       dia_line.Release();
840     }
841     BSTR name;
842     callsite->get_name(&name);
843     if (SysStringLen(name) == 0) {
844       name = SysAllocString(L"<name omitted>");
845     }
846     auto iter = inline_origins_.find(name);
847     if (iter == inline_origins_.end()) {
848       InlineOrigin origin;
849       origin.id = inline_origins_.size();
850       origin.name = name;
851       inline_origins_[name] = origin;
852     }
853     new_inline->SetOriginId(inline_origins_[name].id);
854     new_inline->SetCallSiteLine(call_site_line);
855     new_inline->SetCallSiteFileId(file_id);
856     // Go to next level.
857     vector<unique_ptr<Inline>> child_inlines;
858     if (!GetInlines(callsite, line_list, inline_nest_level + 1,
859                     &child_inlines)) {
860       return false;
861     }
862     new_inline->SetChildInlines(std::move(child_inlines));
863     inlines->push_back(std::move(new_inline));
864     callsite.Release();
865   }
866   return true;
867 }
868 
PrintInlines(const vector<unique_ptr<Inline>> & inlines) const869 void PDBSourceLineWriter::PrintInlines(
870     const vector<unique_ptr<Inline>>& inlines) const {
871   for (const unique_ptr<Inline>& in : inlines) {
872     in->Print(output_);
873   }
874 }
875 
876 #undef max
877 
PrintFrameDataUsingPDB()878 bool PDBSourceLineWriter::PrintFrameDataUsingPDB() {
879   // It would be nice if it were possible to output frame data alongside the
880   // associated function, as is done with line numbers, but the DIA API
881   // doesn't make it possible to get the frame data in that way.
882 
883   CComPtr<IDiaEnumFrameData> frame_data_enum;
884   if (!FindTable(session_, &frame_data_enum))
885     return false;
886 
887   DWORD last_type = std::numeric_limits<DWORD>::max();
888   DWORD last_rva = std::numeric_limits<DWORD>::max();
889   DWORD last_code_size = 0;
890   DWORD last_prolog_size = std::numeric_limits<DWORD>::max();
891 
892   CComPtr<IDiaFrameData> frame_data;
893   ULONG count = 0;
894   while (SUCCEEDED(frame_data_enum->Next(1, &frame_data, &count)) &&
895          count == 1) {
896     DWORD type;
897     if (FAILED(frame_data->get_type(&type)))
898       return false;
899 
900     DWORD rva;
901     if (FAILED(frame_data->get_relativeVirtualAddress(&rva)))
902       return false;
903 
904     DWORD code_size;
905     if (FAILED(frame_data->get_lengthBlock(&code_size)))
906       return false;
907 
908     DWORD prolog_size;
909     if (FAILED(frame_data->get_lengthProlog(&prolog_size)))
910       return false;
911 
912     // parameter_size is the size of parameters passed on the stack.  If any
913     // parameters are not passed on the stack (such as in registers), their
914     // sizes will not be included in parameter_size.
915     DWORD parameter_size;
916     if (FAILED(frame_data->get_lengthParams(&parameter_size)))
917       return false;
918 
919     DWORD saved_register_size;
920     if (FAILED(frame_data->get_lengthSavedRegisters(&saved_register_size)))
921       return false;
922 
923     DWORD local_size;
924     if (FAILED(frame_data->get_lengthLocals(&local_size)))
925       return false;
926 
927     // get_maxStack can return S_FALSE, just use 0 in that case.
928     DWORD max_stack_size = 0;
929     if (FAILED(frame_data->get_maxStack(&max_stack_size)))
930       return false;
931 
932     // get_programString can return S_FALSE, indicating that there is no
933     // program string.  In that case, check whether %ebp is used.
934     HRESULT program_string_result;
935     CComBSTR program_string;
936     if (FAILED(program_string_result = frame_data->get_program(
937         &program_string))) {
938       return false;
939     }
940 
941     // get_allocatesBasePointer can return S_FALSE, treat that as though
942     // %ebp is not used.
943     BOOL allocates_base_pointer = FALSE;
944     if (program_string_result != S_OK) {
945       if (FAILED(frame_data->get_allocatesBasePointer(
946           &allocates_base_pointer))) {
947         return false;
948       }
949     }
950 
951     // Only print out a line if type, rva, code_size, or prolog_size have
952     // changed from the last line.  It is surprisingly common (especially in
953     // system library PDBs) for DIA to return a series of identical
954     // IDiaFrameData objects.  For kernel32.pdb from Windows XP SP2 on x86,
955     // this check reduces the size of the dumped symbol file by a third.
956     if (type != last_type || rva != last_rva || code_size != last_code_size ||
957         prolog_size != last_prolog_size) {
958       // The prolog and the code portions of the frame have to be treated
959       // independently as they may have independently changed in size, or may
960       // even have been split.
961       // NOTE: If epilog size is ever non-zero, we have to do something
962       //     similar with it.
963 
964       // Figure out where the prolog bytes have landed.
965       AddressRangeVector prolog_ranges;
966       if (prolog_size > 0) {
967         MapAddressRange(image_map_, AddressRange(rva, prolog_size),
968                         &prolog_ranges);
969       }
970 
971       // And figure out where the code bytes have landed.
972       AddressRangeVector code_ranges;
973       MapAddressRange(image_map_,
974                       AddressRange(rva + prolog_size,
975                                    code_size - prolog_size),
976                       &code_ranges);
977 
978       struct FrameInfo {
979         DWORD rva;
980         DWORD code_size;
981         DWORD prolog_size;
982       };
983       std::vector<FrameInfo> frame_infos;
984 
985       // Special case: The prolog and the code bytes remain contiguous. This is
986       // only done for compactness of the symbol file, and we could actually
987       // be outputting independent frame info for the prolog and code portions.
988       if (prolog_ranges.size() == 1 && code_ranges.size() == 1 &&
989           prolog_ranges[0].end() == code_ranges[0].rva) {
990         FrameInfo fi = { prolog_ranges[0].rva,
991                          prolog_ranges[0].length + code_ranges[0].length,
992                          prolog_ranges[0].length };
993         frame_infos.push_back(fi);
994       } else {
995         // Otherwise we output the prolog and code frame info independently.
996         for (size_t i = 0; i < prolog_ranges.size(); ++i) {
997           FrameInfo fi = { prolog_ranges[i].rva,
998                            prolog_ranges[i].length,
999                            prolog_ranges[i].length };
1000           frame_infos.push_back(fi);
1001         }
1002         for (size_t i = 0; i < code_ranges.size(); ++i) {
1003           FrameInfo fi = { code_ranges[i].rva, code_ranges[i].length, 0 };
1004           frame_infos.push_back(fi);
1005         }
1006       }
1007 
1008       for (size_t i = 0; i < frame_infos.size(); ++i) {
1009         const FrameInfo& fi(frame_infos[i]);
1010         fprintf(output_, "STACK WIN %lx %lx %lx %lx %x %lx %lx %lx %lx %d ",
1011                 type, fi.rva, fi.code_size, fi.prolog_size,
1012                 0 /* epilog_size */, parameter_size, saved_register_size,
1013                 local_size, max_stack_size, program_string_result == S_OK);
1014         if (program_string_result == S_OK) {
1015           fprintf(output_, "%ws\n", program_string.m_str);
1016         } else {
1017           fprintf(output_, "%d\n", allocates_base_pointer);
1018         }
1019       }
1020 
1021       last_type = type;
1022       last_rva = rva;
1023       last_code_size = code_size;
1024       last_prolog_size = prolog_size;
1025     }
1026 
1027     frame_data.Release();
1028   }
1029 
1030   return true;
1031 }
1032 
PrintFrameDataUsingEXE()1033 bool PDBSourceLineWriter::PrintFrameDataUsingEXE() {
1034   if (code_file_.empty() && !FindPEFile()) {
1035     fprintf(stderr, "Couldn't locate EXE or DLL file.\n");
1036     return false;
1037   }
1038 
1039   return PrintPEFrameData(code_file_, output_);
1040 }
1041 
PrintFrameData()1042 bool PDBSourceLineWriter::PrintFrameData() {
1043   PDBModuleInfo info;
1044   if (GetModuleInfo(&info) && info.cpu == L"x86_64") {
1045     return PrintFrameDataUsingEXE();
1046   }
1047   return PrintFrameDataUsingPDB();
1048 }
1049 
PrintCodePublicSymbol(IDiaSymbol * symbol,bool has_multiple_symbols)1050 bool PDBSourceLineWriter::PrintCodePublicSymbol(IDiaSymbol* symbol,
1051                                                 bool has_multiple_symbols) {
1052   BOOL is_code;
1053   if (FAILED(symbol->get_code(&is_code))) {
1054     return false;
1055   }
1056   if (!is_code) {
1057     return true;
1058   }
1059 
1060   DWORD rva;
1061   if (FAILED(symbol->get_relativeVirtualAddress(&rva))) {
1062     return false;
1063   }
1064 
1065   CComBSTR name;
1066   int stack_param_size;
1067   if (!GetSymbolFunctionName(symbol, &name, &stack_param_size)) {
1068     return false;
1069   }
1070 
1071   AddressRangeVector ranges;
1072   MapAddressRange(image_map_, AddressRange(rva, 1), &ranges);
1073   for (size_t i = 0; i < ranges.size(); ++i) {
1074     const char* optional_multiple_field = has_multiple_symbols ? "m " : "";
1075     fprintf(output_, "PUBLIC %s%lx %x %ws\n", optional_multiple_field,
1076             ranges[i].rva, stack_param_size > 0 ? stack_param_size : 0,
1077             name.m_str);
1078   }
1079 
1080   // Now walk the function in the original untranslated space, asking DIA
1081   // what function is at that location, stepping through OMAP blocks. If
1082   // we're still in the same function, emit another entry, because the
1083   // symbol could have been split into multiple pieces. If we've gotten to
1084   // another symbol in the original address space, then we're done for
1085   // this symbol. See https://crbug.com/678874.
1086   for (;;) {
1087     // This steps to the next block in the original image. Simply doing
1088     // rva++ would also be correct, but would emit tons of unnecessary
1089     // entries.
1090     rva = image_map_.subsequent_rva_block[rva];
1091     if (rva == 0)
1092       break;
1093 
1094     CComPtr<IDiaSymbol> next_sym = NULL;
1095     LONG displacement;
1096     if (FAILED(session_->findSymbolByRVAEx(rva, SymTagPublicSymbol, &next_sym,
1097                                            &displacement))) {
1098       break;
1099     }
1100 
1101     if (!SymbolsMatch(symbol, next_sym))
1102       break;
1103 
1104     AddressRangeVector next_ranges;
1105     MapAddressRange(image_map_, AddressRange(rva, 1), &next_ranges);
1106     for (size_t i = 0; i < next_ranges.size(); ++i) {
1107       fprintf(output_, "PUBLIC %lx %x %ws\n", next_ranges[i].rva,
1108               stack_param_size > 0 ? stack_param_size : 0, name.m_str);
1109     }
1110   }
1111 
1112   return true;
1113 }
1114 
PrintPDBInfo()1115 bool PDBSourceLineWriter::PrintPDBInfo() {
1116   PDBModuleInfo info;
1117   if (!GetModuleInfo(&info)) {
1118     return false;
1119   }
1120 
1121   // Hard-code "windows" for the OS because that's the only thing that makes
1122   // sense for PDB files.  (This might not be strictly correct for Windows CE
1123   // support, but we don't care about that at the moment.)
1124   fprintf(output_, "MODULE windows %ws %ws %ws\n",
1125           info.cpu.c_str(), info.debug_identifier.c_str(),
1126           info.debug_file.c_str());
1127 
1128   return true;
1129 }
1130 
PrintPEInfo()1131 bool PDBSourceLineWriter::PrintPEInfo() {
1132   PEModuleInfo info;
1133   if (!GetPEInfo(&info)) {
1134     return false;
1135   }
1136 
1137   fprintf(output_, "INFO CODE_ID %ws %ws\n",
1138           info.code_identifier.c_str(),
1139           info.code_file.c_str());
1140   return true;
1141 }
1142 
1143 // wcstol_positive_strict is sort of like wcstol, but much stricter.  string
1144 // should be a buffer pointing to a null-terminated string containing only
1145 // decimal digits.  If the entire string can be converted to an integer
1146 // without overflowing, and there are no non-digit characters before the
1147 // result is set to the value and this function returns true.  Otherwise,
1148 // this function returns false.  This is an alternative to the strtol, atoi,
1149 // and scanf families, which are not as strict about input and in some cases
1150 // don't provide a good way for the caller to determine if a conversion was
1151 // successful.
wcstol_positive_strict(wchar_t * string,int * result)1152 static bool wcstol_positive_strict(wchar_t* string, int* result) {
1153   int value = 0;
1154   for (wchar_t* c = string; *c != '\0'; ++c) {
1155     int last_value = value;
1156     value *= 10;
1157     // Detect overflow.
1158     if (value / 10 != last_value || value < 0) {
1159       return false;
1160     }
1161     if (*c < '0' || *c > '9') {
1162       return false;
1163     }
1164     unsigned int c_value = *c - '0';
1165     last_value = value;
1166     value += c_value;
1167     // Detect overflow.
1168     if (value < last_value) {
1169       return false;
1170     }
1171     // Forbid leading zeroes unless the string is just "0".
1172     if (value == 0 && *(c+1) != '\0') {
1173       return false;
1174     }
1175   }
1176   *result = value;
1177   return true;
1178 }
1179 
FindPEFile()1180 bool PDBSourceLineWriter::FindPEFile() {
1181   CComPtr<IDiaSymbol> global;
1182   if (FAILED(session_->get_globalScope(&global))) {
1183     fprintf(stderr, "get_globalScope failed\n");
1184     return false;
1185   }
1186 
1187   CComBSTR symbols_file;
1188   if (SUCCEEDED(global->get_symbolsFileName(&symbols_file))) {
1189     wstring file(symbols_file);
1190 
1191     // Look for an EXE or DLL file.
1192     const wchar_t* extensions[] = { L"exe", L"dll" };
1193     for (size_t i = 0; i < sizeof(extensions) / sizeof(extensions[0]); i++) {
1194       size_t dot_pos = file.find_last_of(L".");
1195       if (dot_pos != wstring::npos) {
1196         file.replace(dot_pos + 1, wstring::npos, extensions[i]);
1197         // Check if this file exists.
1198         if (GetFileAttributesW(file.c_str()) != INVALID_FILE_ATTRIBUTES) {
1199           code_file_ = file;
1200           return true;
1201         }
1202       }
1203     }
1204   }
1205 
1206   return false;
1207 }
1208 
1209 // static
GetSymbolFunctionName(IDiaSymbol * function,BSTR * name,int * stack_param_size)1210 bool PDBSourceLineWriter::GetSymbolFunctionName(IDiaSymbol* function,
1211                                                 BSTR* name,
1212                                                 int* stack_param_size) {
1213   *stack_param_size = -1;
1214 
1215   // Use get_undecoratedNameEx to get readable C++ names with arguments.
1216   if (function->get_undecoratedNameEx(kUndecorateOptions, name) != S_OK) {
1217     if (function->get_name(name) != S_OK) {
1218       fprintf(stderr, "failed to get function name\n");
1219       return false;
1220     }
1221 
1222     // It's possible for get_name to return an empty string, so
1223     // special-case that.
1224     if (wcscmp(*name, L"") == 0) {
1225       SysFreeString(*name);
1226       // dwarf_cu_to_module.cc uses "<name omitted>", so match that.
1227       *name = SysAllocString(L"<name omitted>");
1228       return true;
1229     }
1230 
1231     // If a name comes from get_name because no undecorated form existed,
1232     // it's already formatted properly to be used as output.  Don't do any
1233     // additional processing.
1234     //
1235     // MSVC7's DIA seems to not undecorate names in as many cases as MSVC8's.
1236     // This will result in calling get_name for some C++ symbols, so
1237     // all of the parameter and return type information may not be included in
1238     // the name string.
1239   } else {
1240     StripLlvmSuffixAndUndecorate(name);
1241 
1242     // C++ uses a bogus "void" argument for functions and methods that don't
1243     // take any parameters.  Take it out of the undecorated name because it's
1244     // ugly and unnecessary.
1245     const wchar_t* replace_string = L"(void)";
1246     const size_t replace_length = wcslen(replace_string);
1247     const wchar_t* replacement_string = L"()";
1248     size_t length = wcslen(*name);
1249     if (length >= replace_length) {
1250       wchar_t* name_end = *name + length - replace_length;
1251       if (wcscmp(name_end, replace_string) == 0) {
1252         WindowsStringUtils::safe_wcscpy(name_end, replace_length,
1253                                         replacement_string);
1254         length = wcslen(*name);
1255       }
1256     }
1257 
1258     // Undecorate names used for stdcall and fastcall.  These names prefix
1259     // the identifier with '_' (stdcall) or '@' (fastcall) and suffix it
1260     // with '@' followed by the number of bytes of parameters, in decimal.
1261     // If such a name is found, take note of the size and undecorate it.
1262     // Only do this for names that aren't C++, which is determined based on
1263     // whether the undecorated name contains any ':' or '(' characters.
1264     if (!wcschr(*name, ':') && !wcschr(*name, '(') &&
1265         (*name[0] == '_' || *name[0] == '@')) {
1266       wchar_t* last_at = wcsrchr(*name + 1, '@');
1267       if (last_at && wcstol_positive_strict(last_at + 1, stack_param_size)) {
1268         // If this function adheres to the fastcall convention, it accepts up
1269         // to the first 8 bytes of parameters in registers (%ecx and %edx).
1270         // We're only interested in the stack space used for parameters, so
1271         // so subtract 8 and don't let the size go below 0.
1272         if (*name[0] == '@') {
1273           if (*stack_param_size > 8) {
1274             *stack_param_size -= 8;
1275           } else {
1276             *stack_param_size = 0;
1277           }
1278         }
1279 
1280         // Undecorate the name by moving it one character to the left in its
1281         // buffer, and terminating it where the last '@' had been.
1282         WindowsStringUtils::safe_wcsncpy(*name, length,
1283                                          *name + 1, last_at - *name - 1);
1284      } else if (*name[0] == '_') {
1285         // This symbol's name is encoded according to the cdecl rules.  The
1286         // name doesn't end in a '@' character followed by a decimal positive
1287         // integer, so it's not a stdcall name.  Strip off the leading
1288         // underscore.
1289         WindowsStringUtils::safe_wcsncpy(*name, length, *name + 1, length);
1290       }
1291     }
1292   }
1293 
1294   return true;
1295 }
1296 
1297 // static
GetFunctionStackParamSize(IDiaSymbol * function)1298 int PDBSourceLineWriter::GetFunctionStackParamSize(IDiaSymbol* function) {
1299   // This implementation is highly x86-specific.
1300 
1301   // Gather the symbols corresponding to data.
1302   CComPtr<IDiaEnumSymbols> data_children;
1303   if (FAILED(function->findChildren(SymTagData, NULL, nsNone,
1304                                     &data_children))) {
1305     return 0;
1306   }
1307 
1308   // lowest_base is the lowest %ebp-relative byte offset used for a parameter.
1309   // highest_end is one greater than the highest offset (i.e. base + length).
1310   // Stack parameters are assumed to be contiguous, because in reality, they
1311   // are.
1312   int lowest_base = INT_MAX;
1313   int highest_end = INT_MIN;
1314 
1315   CComPtr<IDiaSymbol> child;
1316   DWORD count;
1317   while (SUCCEEDED(data_children->Next(1, &child, &count)) && count == 1) {
1318     // If any operation fails at this point, just proceed to the next child.
1319     // Use the next_child label instead of continue because child needs to
1320     // be released before it's reused.  Declare constructable/destructable
1321     // types early to avoid gotos that cross initializations.
1322     CComPtr<IDiaSymbol> child_type;
1323 
1324     // DataIsObjectPtr is only used for |this|.  Because |this| can be passed
1325     // as a stack parameter, look for it in addition to traditional
1326     // parameters.
1327     DWORD child_kind;
1328     if (FAILED(child->get_dataKind(&child_kind)) ||
1329         (child_kind != DataIsParam && child_kind != DataIsObjectPtr)) {
1330       goto next_child;
1331     }
1332 
1333     // Only concentrate on register-relative parameters.  Parameters may also
1334     // be enregistered (passed directly in a register), but those don't
1335     // consume any stack space, so they're not of interest.
1336     DWORD child_location_type;
1337     if (FAILED(child->get_locationType(&child_location_type)) ||
1338         child_location_type != LocIsRegRel) {
1339       goto next_child;
1340     }
1341 
1342     // Of register-relative parameters, the only ones that make any sense are
1343     // %ebp- or %esp-relative.  Note that MSVC's debugging information always
1344     // gives parameters as %ebp-relative even when a function doesn't use a
1345     // traditional frame pointer and stack parameters are accessed relative to
1346     // %esp, so just look for %ebp-relative parameters.  If you wanted to
1347     // access parameters, you'd probably want to treat these %ebp-relative
1348     // offsets as if they were relative to %esp before a function's prolog
1349     // executed.
1350     DWORD child_register;
1351     if (FAILED(child->get_registerId(&child_register)) ||
1352         child_register != CV_REG_EBP) {
1353       goto next_child;
1354     }
1355 
1356     LONG child_register_offset;
1357     if (FAILED(child->get_offset(&child_register_offset))) {
1358       goto next_child;
1359     }
1360 
1361     // IDiaSymbol::get_type can succeed but still pass back a NULL value.
1362     if (FAILED(child->get_type(&child_type)) || !child_type) {
1363       goto next_child;
1364     }
1365 
1366     ULONGLONG child_length;
1367     if (FAILED(child_type->get_length(&child_length))) {
1368       goto next_child;
1369     }
1370 
1371     // Extra scope to avoid goto jumping over variable initialization
1372     {
1373       int child_end = child_register_offset + static_cast<ULONG>(child_length);
1374       if (child_register_offset < lowest_base) {
1375         lowest_base = child_register_offset;
1376       }
1377       if (child_end > highest_end) {
1378         highest_end = child_end;
1379       }
1380     }
1381 
1382 next_child:
1383     child.Release();
1384   }
1385 
1386   int param_size = 0;
1387   // Make sure lowest_base isn't less than 4, because [%esp+4] is the lowest
1388   // possible address to find a stack parameter before executing a function's
1389   // prolog (see above).  Some optimizations cause parameter offsets to be
1390   // lower than 4, but we're not concerned with those because we're only
1391   // looking for parameters contained in addresses higher than where the
1392   // return address is stored.
1393   if (lowest_base < 4) {
1394     lowest_base = 4;
1395   }
1396   if (highest_end > lowest_base) {
1397     // All stack parameters are pushed as at least 4-byte quantities.  If the
1398     // last type was narrower than 4 bytes, promote it.  This assumes that all
1399     // parameters' offsets are 4-byte-aligned, which is always the case.  Only
1400     // worry about the last type, because we're not summing the type sizes,
1401     // just looking at the lowest and highest offsets.
1402     int remainder = highest_end % 4;
1403     if (remainder) {
1404       highest_end += 4 - remainder;
1405     }
1406 
1407     param_size = highest_end - lowest_base;
1408   }
1409 
1410   return param_size;
1411 }
1412 
WriteSymbols(FILE * symbol_file)1413 bool PDBSourceLineWriter::WriteSymbols(FILE* symbol_file) {
1414   output_ = symbol_file;
1415 
1416   // Load the OMAP information, and disable auto-translation of addresses in
1417   // preference of doing it ourselves.
1418   OmapData omap_data;
1419   if (!GetOmapDataAndDisableTranslation(session_, &omap_data))
1420     return false;
1421   BuildImageMap(omap_data, &image_map_);
1422 
1423   bool ret = PrintPDBInfo();
1424   // This is not a critical piece of the symbol file.
1425   PrintPEInfo();
1426   ret = ret && PrintSourceFiles() && PrintFunctions() && PrintFrameData();
1427   PrintInlineOrigins();
1428 
1429   output_ = NULL;
1430   return ret;
1431 }
1432 
Close()1433 void PDBSourceLineWriter::Close() {
1434   if (session_ != nullptr) {
1435     session_.Release();
1436   }
1437 }
1438 
GetModuleInfo(PDBModuleInfo * info)1439 bool PDBSourceLineWriter::GetModuleInfo(PDBModuleInfo* info) {
1440   if (!info) {
1441     return false;
1442   }
1443 
1444   info->debug_file.clear();
1445   info->debug_identifier.clear();
1446   info->cpu.clear();
1447 
1448   CComPtr<IDiaSymbol> global;
1449   if (FAILED(session_->get_globalScope(&global))) {
1450     return false;
1451   }
1452 
1453   DWORD machine_type;
1454   // get_machineType can return S_FALSE.
1455   if (global->get_machineType(&machine_type) == S_OK) {
1456     // The documentation claims that get_machineType returns a value from
1457     // the CV_CPU_TYPE_e enumeration, but that's not the case.
1458     // Instead, it returns one of the IMAGE_FILE_MACHINE values as
1459     // defined here:
1460     // http://msdn.microsoft.com/en-us/library/ms680313%28VS.85%29.aspx
1461     info->cpu = FileHeaderMachineToCpuString(static_cast<WORD>(machine_type));
1462   } else {
1463     // Unexpected, but handle gracefully.
1464     info->cpu = L"unknown";
1465   }
1466 
1467   // DWORD* and int* are not compatible.  This is clean and avoids a cast.
1468   DWORD age;
1469   if (FAILED(global->get_age(&age))) {
1470     return false;
1471   }
1472 
1473   bool uses_guid;
1474   if (!UsesGUID(&uses_guid)) {
1475     return false;
1476   }
1477 
1478   if (uses_guid) {
1479     GUID guid;
1480     if (FAILED(global->get_guid(&guid))) {
1481       return false;
1482     }
1483 
1484     info->debug_identifier = GenerateDebugIdentifier(age, guid);
1485   } else {
1486     DWORD signature;
1487     if (FAILED(global->get_signature(&signature))) {
1488       return false;
1489     }
1490 
1491     info->debug_identifier = GenerateDebugIdentifier(age, signature);
1492   }
1493 
1494   CComBSTR debug_file_string;
1495   if (FAILED(global->get_symbolsFileName(&debug_file_string))) {
1496     return false;
1497   }
1498   info->debug_file =
1499       WindowsStringUtils::GetBaseName(wstring(debug_file_string));
1500 
1501   return true;
1502 }
1503 
GetPEInfo(PEModuleInfo * info)1504 bool PDBSourceLineWriter::GetPEInfo(PEModuleInfo* info) {
1505   if (!info) {
1506     return false;
1507   }
1508 
1509   if (code_file_.empty() && !FindPEFile()) {
1510     fprintf(stderr, "Couldn't locate EXE or DLL file.\n");
1511     return false;
1512   }
1513 
1514   return ReadPEInfo(code_file_, info);
1515 }
1516 
UsesGUID(bool * uses_guid)1517 bool PDBSourceLineWriter::UsesGUID(bool* uses_guid) {
1518   if (!uses_guid)
1519     return false;
1520 
1521   CComPtr<IDiaSymbol> global;
1522   if (FAILED(session_->get_globalScope(&global)))
1523     return false;
1524 
1525   GUID guid;
1526   if (FAILED(global->get_guid(&guid)))
1527     return false;
1528 
1529   DWORD signature;
1530   if (FAILED(global->get_signature(&signature)))
1531     return false;
1532 
1533   // There are two possibilities for guid: either it's a real 128-bit GUID
1534   // as identified in a code module by a new-style CodeView record, or it's
1535   // a 32-bit signature (timestamp) as identified by an old-style record.
1536   // See MDCVInfoPDB70 and MDCVInfoPDB20 in minidump_format.h.
1537   //
1538   // Because DIA doesn't provide a way to directly determine whether a module
1539   // uses a GUID or a 32-bit signature, this code checks whether the first 32
1540   // bits of guid are the same as the signature, and if the rest of guid is
1541   // zero.  If so, then with a pretty high degree of certainty, there's an
1542   // old-style CodeView record in use.  This method will only falsely find an
1543   // an old-style CodeView record if a real 128-bit GUID has its first 32
1544   // bits set the same as the module's signature (timestamp) and the rest of
1545   // the GUID is set to 0.  This is highly unlikely.
1546 
1547   GUID signature_guid = {signature};  // 0-initializes other members
1548   *uses_guid = !IsEqualGUID(guid, signature_guid);
1549   return true;
1550 }
1551 
1552 }  // namespace google_breakpad
1553