1 // Copyright 2006 Google LLC
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 // * Neither the name of Google LLC nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 #ifdef HAVE_CONFIG_H
30 #include <config.h> // Must come first
31 #endif
32
33 #include "common/windows/pdb_source_line_writer.h"
34
35 #include <windows.h>
36 #include <winnt.h>
37 #include <atlbase.h>
38 #include <dia2.h>
39 #include <diacreate.h>
40 #include <ImageHlp.h>
41 #include <stdio.h>
42
43 #include <algorithm>
44 #include <limits>
45 #include <map>
46 #include <memory>
47 #include <set>
48 #include <utility>
49
50 #include "common/windows/dia_util.h"
51 #include "common/windows/guid_string.h"
52 #include "common/windows/pe_util.h"
53 #include "common/windows/string_utils-inl.h"
54
55 // This constant may be missing from DbgHelp.h. See the documentation for
56 // IDiaSymbol::get_undecoratedNameEx.
57 #ifndef UNDNAME_NO_ECSU
58 #define UNDNAME_NO_ECSU 0x8000 // Suppresses enum/class/struct/union.
59 #endif // UNDNAME_NO_ECSU
60
61 namespace google_breakpad {
62
63 namespace {
64
65 using std::set;
66 using std::unique_ptr;
67 using std::vector;
68
69 // The symbol (among possibly many) selected to represent an rva.
70 struct SelectedSymbol {
SelectedSymbolgoogle_breakpad::__anon4ddd43a00111::SelectedSymbol71 SelectedSymbol(const CComPtr<IDiaSymbol>& symbol, bool is_public)
72 : symbol(symbol), is_public(is_public), is_multiple(false) {}
73
74 // The symbol to use for an rva.
75 CComPtr<IDiaSymbol> symbol;
76 // Whether this is a public or function symbol.
77 bool is_public;
78 // Whether the rva has multiple associated symbols. An rva will correspond to
79 // multiple symbols in the case of linker identical symbol folding.
80 bool is_multiple;
81 };
82
83 // Maps rva to the symbol to use for that address.
84 typedef std::map<DWORD, SelectedSymbol> SymbolMap;
85
86 // Record this in the map as the selected symbol for the rva if it satisfies the
87 // necessary conditions.
MaybeRecordSymbol(DWORD rva,const CComPtr<IDiaSymbol> symbol,bool is_public,SymbolMap * map)88 void MaybeRecordSymbol(DWORD rva,
89 const CComPtr<IDiaSymbol> symbol,
90 bool is_public,
91 SymbolMap* map) {
92 SymbolMap::iterator loc = map->find(rva);
93 if (loc == map->end()) {
94 map->insert(std::make_pair(rva, SelectedSymbol(symbol, is_public)));
95 return;
96 }
97
98 // Prefer function symbols to public symbols.
99 if (is_public && !loc->second.is_public) {
100 return;
101 }
102
103 loc->second.is_multiple = true;
104
105 // Take the 'least' symbol by lexicographical order of the decorated name. We
106 // use the decorated rather than undecorated name because computing the latter
107 // is expensive.
108 BSTR current_name, new_name;
109 loc->second.symbol->get_name(¤t_name);
110 symbol->get_name(&new_name);
111 if (wcscmp(new_name, current_name) < 0) {
112 loc->second.symbol = symbol;
113 loc->second.is_public = is_public;
114 }
115 }
116
117
118
SymbolsMatch(IDiaSymbol * a,IDiaSymbol * b)119 bool SymbolsMatch(IDiaSymbol* a, IDiaSymbol* b) {
120 DWORD a_section, a_offset, b_section, b_offset;
121 if (FAILED(a->get_addressSection(&a_section)) ||
122 FAILED(a->get_addressOffset(&a_offset)) ||
123 FAILED(b->get_addressSection(&b_section)) ||
124 FAILED(b->get_addressOffset(&b_offset)))
125 return false;
126 return a_section == b_section && a_offset == b_offset;
127 }
128
CreateDiaDataSourceInstance(CComPtr<IDiaDataSource> & data_source)129 bool CreateDiaDataSourceInstance(CComPtr<IDiaDataSource>& data_source) {
130 if (SUCCEEDED(data_source.CoCreateInstance(CLSID_DiaSource))) {
131 return true;
132 }
133
134 class DECLSPEC_UUID("B86AE24D-BF2F-4ac9-B5A2-34B14E4CE11D") DiaSource100;
135 class DECLSPEC_UUID("761D3BCD-1304-41D5-94E8-EAC54E4AC172") DiaSource110;
136 class DECLSPEC_UUID("3BFCEA48-620F-4B6B-81F7-B9AF75454C7D") DiaSource120;
137 class DECLSPEC_UUID("E6756135-1E65-4D17-8576-610761398C3C") DiaSource140;
138
139 // If the CoCreateInstance call above failed, msdia*.dll is not registered.
140 // We can try loading the DLL corresponding to the #included DIA SDK, but
141 // the DIA headers don't provide a version. Lets try to figure out which DIA
142 // version we're compiling against by comparing CLSIDs.
143 const wchar_t* msdia_dll = nullptr;
144 if (CLSID_DiaSource == _uuidof(DiaSource100)) {
145 msdia_dll = L"msdia100.dll";
146 } else if (CLSID_DiaSource == _uuidof(DiaSource110)) {
147 msdia_dll = L"msdia110.dll";
148 } else if (CLSID_DiaSource == _uuidof(DiaSource120)) {
149 msdia_dll = L"msdia120.dll";
150 } else if (CLSID_DiaSource == _uuidof(DiaSource140)) {
151 msdia_dll = L"msdia140.dll";
152 }
153
154 if (msdia_dll &&
155 SUCCEEDED(NoRegCoCreate(msdia_dll, CLSID_DiaSource, IID_IDiaDataSource,
156 reinterpret_cast<void**>(&data_source)))) {
157 return true;
158 }
159
160 return false;
161 }
162
163 const DWORD kUndecorateOptions = UNDNAME_NO_MS_KEYWORDS |
164 UNDNAME_NO_FUNCTION_RETURNS |
165 UNDNAME_NO_ALLOCATION_MODEL |
166 UNDNAME_NO_ALLOCATION_LANGUAGE |
167 UNDNAME_NO_THISTYPE |
168 UNDNAME_NO_ACCESS_SPECIFIERS |
169 UNDNAME_NO_THROW_SIGNATURES |
170 UNDNAME_NO_MEMBER_TYPE |
171 UNDNAME_NO_RETURN_UDT_MODEL |
172 UNDNAME_NO_ECSU;
173
174 #define arraysize(f) (sizeof(f) / sizeof(*f))
175
StripLlvmSuffixAndUndecorate(BSTR * name)176 void StripLlvmSuffixAndUndecorate(BSTR* name) {
177 // LLVM sometimes puts a suffix on symbols to give them a globally unique
178 // name. The suffix is either some string preceded by a period (like in the
179 // Itanium ABI; also on Windows this is safe since periods are otherwise
180 // never part of mangled names), or a dollar sign followed by a 32-char hex
181 // string (this should go away in future LLVM versions). Strip such suffixes
182 // and try demangling again.
183 //
184 //
185 // Example symbol names with such suffixes:
186 //
187 // ?foo@@YAXXZ$5520c83448162c04f2b239db4b5a2c61
188 // ?foo@@YAXXZ.llvm.13040715209719948753
189
190 if (**name != L'?')
191 return; // The name is already demangled.
192
193 for (size_t i = 0, len = wcslen(*name); i < len; i++) {
194 wchar_t c = (*name)[i];
195
196 if (c == L'.' || (c == L'$' && len - i == 32 + 1)) {
197 (*name)[i] = L'\0';
198 wchar_t undecorated[1024];
199 DWORD res = UnDecorateSymbolNameW(*name, undecorated,
200 arraysize(undecorated),
201 kUndecorateOptions);
202 if (res == 0 || undecorated[0] == L'?') {
203 // Demangling failed; restore the symbol name and return.
204 (*name)[i] = c;
205 return;
206 }
207
208 SysFreeString(*name);
209 *name = SysAllocString(undecorated);
210 return;
211 }
212 }
213 }
214
215 // Prints the error message related to the error code as seen in
216 // Microsoft's MSVS documentation for loadDataFromPdb and loadDataForExe.
PrintOpenError(HRESULT hr,const char * fn_name,const wchar_t * file)217 void PrintOpenError(HRESULT hr, const char* fn_name, const wchar_t* file) {
218 switch (hr) {
219 case E_PDB_NOT_FOUND:
220 fprintf(stderr, "%s: Failed to open %ws, or the file has an "
221 "invalid format.\n", fn_name, file);
222 break;
223 case E_PDB_FORMAT:
224 fprintf(stderr, "%s: Attempted to access %ws with an obsolete "
225 "format.\n", fn_name, file);
226 break;
227 case E_PDB_INVALID_SIG:
228 fprintf(stderr, "%s: Signature does not match for %ws.\n", fn_name,
229 file);
230 break;
231 case E_PDB_INVALID_AGE:
232 fprintf(stderr, "%s: Age does not match for %ws.\n", fn_name, file);
233 break;
234 case E_INVALIDARG:
235 fprintf(stderr, "%s: Invalid parameter for %ws.\n", fn_name, file);
236 break;
237 case E_UNEXPECTED:
238 fprintf(stderr, "%s: Data source has already been prepared for %ws.\n",
239 fn_name, file);
240 break;
241 default:
242 fprintf(stderr, "%s: Unexpected error 0x%lx, file: %ws.\n",
243 fn_name, hr, file);
244 break;
245 }
246 }
247
248 } // namespace
249
Inline(int inline_nest_level)250 PDBSourceLineWriter::Inline::Inline(int inline_nest_level)
251 : inline_nest_level_(inline_nest_level) {}
252
SetOriginId(int origin_id)253 void PDBSourceLineWriter::Inline::SetOriginId(int origin_id) {
254 origin_id_ = origin_id;
255 }
256
ExtendRanges(const Line & line)257 void PDBSourceLineWriter::Inline::ExtendRanges(const Line& line) {
258 if (ranges_.empty()) {
259 ranges_[line.rva] = line.length;
260 return;
261 }
262 auto iter = ranges_.lower_bound(line.rva);
263 // There is no overlap if this function is called with inlinee lines from
264 // the same callsite.
265 if (iter == ranges_.begin()) {
266 return;
267 }
268 if (line.rva + line.length == iter->first) {
269 // If they are connected, merge their ranges into one.
270 DWORD length = line.length + iter->second;
271 ranges_.erase(iter);
272 ranges_[line.rva] = length;
273 } else {
274 --iter;
275 if (iter->first + iter->second == line.rva) {
276 ranges_[iter->first] = iter->second + line.length;
277 } else {
278 ranges_[line.rva] = line.length;
279 }
280 }
281 }
282
SetCallSiteLine(DWORD call_site_line)283 void PDBSourceLineWriter::Inline::SetCallSiteLine(DWORD call_site_line) {
284 call_site_line_ = call_site_line;
285 }
286
SetCallSiteFileId(DWORD call_site_file_id)287 void PDBSourceLineWriter::Inline::SetCallSiteFileId(DWORD call_site_file_id) {
288 call_site_file_id_ = call_site_file_id;
289 }
290
SetChildInlines(vector<unique_ptr<Inline>> child_inlines)291 void PDBSourceLineWriter::Inline::SetChildInlines(
292 vector<unique_ptr<Inline>> child_inlines) {
293 child_inlines_ = std::move(child_inlines);
294 }
295
Print(FILE * output) const296 void PDBSourceLineWriter::Inline::Print(FILE* output) const {
297 // Ignore INLINE record that doesn't have any range.
298 if (ranges_.empty())
299 return;
300 fprintf(output, "INLINE %d %lu %lu %d", inline_nest_level_, call_site_line_,
301 call_site_file_id_, origin_id_);
302 for (const auto& r : ranges_) {
303 fprintf(output, " %lx %lx", r.first, r.second);
304 }
305 fprintf(output, "\n");
306 for (const unique_ptr<Inline>& in : child_inlines_) {
307 in->Print(output);
308 }
309 }
310
GetLine(DWORD rva) const311 const PDBSourceLineWriter::Line* PDBSourceLineWriter::Lines::GetLine(
312 DWORD rva) const {
313 auto iter = line_map_.find(rva);
314 if (iter == line_map_.end()) {
315 // If not found exact rva, check if it's within any range.
316 iter = line_map_.lower_bound(rva);
317 if (iter == line_map_.begin())
318 return nullptr;
319 --iter;
320 auto l = iter->second;
321 // This happens when there is no top level lines cover this rva (e.g. empty
322 // lines found for the function). Then we don't know the call site line
323 // number for this inlined function.
324 if (rva >= l.rva + l.length)
325 return nullptr;
326 }
327 return &iter->second;
328 }
329
GetLineNum(DWORD rva) const330 DWORD PDBSourceLineWriter::Lines::GetLineNum(DWORD rva) const {
331 const Line* line = GetLine(rva);
332 return line ? line->line_num : 0;
333 }
334
GetFileId(DWORD rva) const335 DWORD PDBSourceLineWriter::Lines::GetFileId(DWORD rva) const {
336 const Line* line = GetLine(rva);
337 return line ? line->file_id : 0;
338 }
339
AddLine(const Line & line)340 void PDBSourceLineWriter::Lines::AddLine(const Line& line) {
341 if (line_map_.empty()) {
342 line_map_[line.rva] = line;
343 return;
344 }
345
346 // Given an existing line in line_map_, remove it from line_map_ if it
347 // overlaps with the line and add a new line for the non-overlap range. Return
348 // true if there is an overlap.
349 auto intercept = [&](Line old_line) {
350 DWORD end = old_line.rva + old_line.length;
351 // No overlap.
352 if (old_line.rva >= line.rva + line.length || line.rva >= end)
353 return false;
354 // old_line is within the line.
355 if (old_line.rva >= line.rva && end <= line.rva + line.length) {
356 line_map_.erase(old_line.rva);
357 return true;
358 }
359 // Then there is a overlap.
360 if (old_line.rva < line.rva) {
361 old_line.length -= end - line.rva;
362 if (end > line.rva + line.length) {
363 Line new_line = old_line;
364 new_line.rva = line.rva + line.length;
365 new_line.length = end - new_line.rva;
366 line_map_[new_line.rva] = new_line;
367 }
368 } else {
369 line_map_.erase(old_line.rva);
370 old_line.length -= line.rva + line.length - old_line.rva;
371 old_line.rva = line.rva + line.length;
372 }
373 line_map_[old_line.rva] = old_line;
374 return true;
375 };
376
377 bool is_intercept;
378 // Use a loop in cases that there are multiple lines within the given line.
379 do {
380 auto iter = line_map_.lower_bound(line.rva);
381 if (iter == line_map_.end()) {
382 if (!line_map_.empty()) {
383 --iter;
384 intercept(iter->second);
385 }
386 break;
387 }
388 is_intercept = false;
389 if (iter != line_map_.begin()) {
390 // Check if the given line overlaps a line with smaller in the map.
391 auto prev = line_map_.lower_bound(line.rva);
392 --prev;
393 is_intercept = intercept(prev->second);
394 }
395 // Check if the given line overlaps a line with greater or equal rva in the
396 // map. Using operator |= here since it's possible that there are multiple
397 // lines with greater rva in the map overlap with the given line.
398 is_intercept |= intercept(iter->second);
399 } while (is_intercept);
400 line_map_[line.rva] = line;
401 }
402
PDBSourceLineWriter(bool handle_inline)403 PDBSourceLineWriter::PDBSourceLineWriter(bool handle_inline)
404 : output_(NULL), handle_inline_(handle_inline) {}
405
~PDBSourceLineWriter()406 PDBSourceLineWriter::~PDBSourceLineWriter() {
407 Close();
408 }
409
SetCodeFile(const wstring & exe_file)410 bool PDBSourceLineWriter::SetCodeFile(const wstring& exe_file) {
411 if (code_file_.empty()) {
412 code_file_ = exe_file;
413 return true;
414 }
415 // Setting a different code file path is an error. It is success only if the
416 // file paths are the same.
417 return exe_file == code_file_;
418 }
419
Open(const wstring & file,FileFormat format)420 bool PDBSourceLineWriter::Open(const wstring& file, FileFormat format) {
421 Close();
422 code_file_.clear();
423
424 if (FAILED(CoInitialize(NULL))) {
425 fprintf(stderr, "CoInitialize failed\n");
426 return false;
427 }
428
429 CComPtr<IDiaDataSource> data_source;
430 if (!CreateDiaDataSourceInstance(data_source)) {
431 const int kGuidSize = 64;
432 wchar_t classid[kGuidSize] = {0};
433 StringFromGUID2(CLSID_DiaSource, classid, kGuidSize);
434 fprintf(stderr, "CoCreateInstance CLSID_DiaSource %S failed "
435 "(msdia*.dll unregistered?)\n", classid);
436 return false;
437 }
438
439 HRESULT from_pdb_result;
440 HRESULT for_exe_result;
441 const wchar_t* file_name = file.c_str();
442 switch (format) {
443 case PDB_FILE:
444 from_pdb_result = data_source->loadDataFromPdb(file_name);
445 if (FAILED(from_pdb_result)) {
446 PrintOpenError(from_pdb_result, "loadDataFromPdb", file_name);
447 return false;
448 }
449 break;
450 case EXE_FILE:
451 for_exe_result = data_source->loadDataForExe(file_name, NULL, NULL);
452 if (FAILED(for_exe_result)) {
453 PrintOpenError(for_exe_result, "loadDataForExe", file_name);
454 return false;
455 }
456 code_file_ = file;
457 break;
458 case ANY_FILE:
459 from_pdb_result = data_source->loadDataFromPdb(file_name);
460 if (FAILED(from_pdb_result)) {
461 for_exe_result = data_source->loadDataForExe(file_name, NULL, NULL);
462 if (FAILED(for_exe_result)) {
463 PrintOpenError(from_pdb_result, "loadDataFromPdb", file_name);
464 PrintOpenError(for_exe_result, "loadDataForExe", file_name);
465 return false;
466 }
467 code_file_ = file;
468 }
469 break;
470 default:
471 fprintf(stderr, "Unknown file format\n");
472 return false;
473 }
474
475 if (FAILED(data_source->openSession(&session_))) {
476 fprintf(stderr, "openSession failed\n");
477 }
478
479 return true;
480 }
481
GetLine(IDiaLineNumber * dia_line,Line * line) const482 bool PDBSourceLineWriter::GetLine(IDiaLineNumber* dia_line, Line* line) const {
483 if (FAILED(dia_line->get_relativeVirtualAddress(&line->rva))) {
484 fprintf(stderr, "failed to get line rva\n");
485 return false;
486 }
487
488 if (FAILED(dia_line->get_length(&line->length))) {
489 fprintf(stderr, "failed to get line code length\n");
490 return false;
491 }
492
493 DWORD dia_source_id;
494 if (FAILED(dia_line->get_sourceFileId(&dia_source_id))) {
495 fprintf(stderr, "failed to get line source file id\n");
496 return false;
497 }
498 // duplicate file names are coalesced to share one ID
499 line->file_id = GetRealFileID(dia_source_id);
500
501 if (FAILED(dia_line->get_lineNumber(&line->line_num))) {
502 fprintf(stderr, "failed to get line number\n");
503 return false;
504 }
505 return true;
506 }
507
GetLines(IDiaEnumLineNumbers * lines,Lines * line_list) const508 bool PDBSourceLineWriter::GetLines(IDiaEnumLineNumbers* lines,
509 Lines* line_list) const {
510 CComPtr<IDiaLineNumber> line;
511 ULONG count;
512
513 while (SUCCEEDED(lines->Next(1, &line, &count)) && count == 1) {
514 Line l;
515 if (!GetLine(line, &l))
516 return false;
517 // Silently ignore zero-length lines.
518 if (l.length != 0)
519 line_list->AddLine(l);
520 line.Release();
521 }
522 return true;
523 }
524
PrintLines(const Lines & lines) const525 void PDBSourceLineWriter::PrintLines(const Lines& lines) const {
526 // The line number format is:
527 // <rva> <line number> <source file id>
528 for (const auto& kv : lines.GetLineMap()) {
529 const Line& l = kv.second;
530 AddressRangeVector ranges;
531 MapAddressRange(image_map_, AddressRange(l.rva, l.length), &ranges);
532 for (auto& range : ranges) {
533 fprintf(output_, "%lx %lx %lu %lu\n", range.rva, range.length, l.line_num,
534 l.file_id);
535 }
536 }
537 }
538
PrintFunction(IDiaSymbol * function,IDiaSymbol * block,bool has_multiple_symbols)539 bool PDBSourceLineWriter::PrintFunction(IDiaSymbol* function,
540 IDiaSymbol* block,
541 bool has_multiple_symbols) {
542 // The function format is:
543 // FUNC <address> <length> <param_stack_size> <function>
544 DWORD rva;
545 if (FAILED(block->get_relativeVirtualAddress(&rva))) {
546 fprintf(stderr, "couldn't get rva\n");
547 return false;
548 }
549
550 ULONGLONG length;
551 if (FAILED(block->get_length(&length))) {
552 fprintf(stderr, "failed to get function length\n");
553 return false;
554 }
555
556 if (length == 0) {
557 // Silently ignore zero-length functions, which can infrequently pop up.
558 return true;
559 }
560
561 CComBSTR name;
562 int stack_param_size;
563 if (!GetSymbolFunctionName(function, &name, &stack_param_size)) {
564 return false;
565 }
566
567 // If the decorated name didn't give the parameter size, try to
568 // calculate it.
569 if (stack_param_size < 0) {
570 stack_param_size = GetFunctionStackParamSize(function);
571 }
572
573 AddressRangeVector ranges;
574 MapAddressRange(image_map_, AddressRange(rva, static_cast<DWORD>(length)),
575 &ranges);
576 for (size_t i = 0; i < ranges.size(); ++i) {
577 const char* optional_multiple_field = has_multiple_symbols ? "m " : "";
578 fprintf(output_, "FUNC %s%lx %lx %x %ws\n", optional_multiple_field,
579 ranges[i].rva, ranges[i].length, stack_param_size, name.m_str);
580 }
581
582 CComPtr<IDiaEnumLineNumbers> lines;
583 if (FAILED(session_->findLinesByRVA(rva, DWORD(length), &lines))) {
584 return false;
585 }
586
587 // Get top level lines first, which later may be split into multiple smaller
588 // lines if any inline exists in their ranges if we want to handle inline.
589 Lines line_list;
590 if (!GetLines(lines, &line_list)) {
591 return false;
592 }
593 if (handle_inline_) {
594 vector<unique_ptr<Inline>> inlines;
595 if (!GetInlines(block, &line_list, 0, &inlines)) {
596 return false;
597 }
598 PrintInlines(inlines);
599 }
600 PrintLines(line_list);
601 return true;
602 }
603
PrintSourceFiles()604 bool PDBSourceLineWriter::PrintSourceFiles() {
605 CComPtr<IDiaSymbol> global;
606 if (FAILED(session_->get_globalScope(&global))) {
607 fprintf(stderr, "get_globalScope failed\n");
608 return false;
609 }
610
611 CComPtr<IDiaEnumSymbols> compilands;
612 if (FAILED(global->findChildren(SymTagCompiland, NULL,
613 nsNone, &compilands))) {
614 fprintf(stderr, "findChildren failed\n");
615 return false;
616 }
617
618 // Print a dummy file with id equals 0 to represent unknown file, because
619 // inline records might have unknown call site.
620 fwprintf(output_, L"FILE %d unknown file\n", 0);
621
622 CComPtr<IDiaSymbol> compiland;
623 ULONG count;
624 while (SUCCEEDED(compilands->Next(1, &compiland, &count)) && count == 1) {
625 CComPtr<IDiaEnumSourceFiles> source_files;
626 if (FAILED(session_->findFile(compiland, NULL, nsNone, &source_files))) {
627 return false;
628 }
629 CComPtr<IDiaSourceFile> file;
630 while (SUCCEEDED(source_files->Next(1, &file, &count)) && count == 1) {
631 DWORD file_id;
632 if (FAILED(file->get_uniqueId(&file_id))) {
633 return false;
634 }
635
636 CComBSTR file_name;
637 if (FAILED(file->get_fileName(&file_name))) {
638 return false;
639 }
640
641 wstring file_name_string(file_name);
642 if (!FileIDIsCached(file_name_string)) {
643 // this is a new file name, cache it and output a FILE line.
644 CacheFileID(file_name_string, file_id);
645 fwprintf(output_, L"FILE %d %ws\n", file_id, file_name_string.c_str());
646 } else {
647 // this file name has already been seen, just save this
648 // ID for later lookup.
649 StoreDuplicateFileID(file_name_string, file_id);
650 }
651 file.Release();
652 }
653 compiland.Release();
654 }
655 return true;
656 }
657
PrintFunctions()658 bool PDBSourceLineWriter::PrintFunctions() {
659 ULONG count = 0;
660 DWORD rva = 0;
661 CComPtr<IDiaSymbol> global;
662 HRESULT hr;
663
664 if (FAILED(session_->get_globalScope(&global))) {
665 fprintf(stderr, "get_globalScope failed\n");
666 return false;
667 }
668
669 CComPtr<IDiaEnumSymbols> symbols = NULL;
670
671 // Find all function symbols first.
672 SymbolMap rva_symbol;
673 hr = global->findChildren(SymTagFunction, NULL, nsNone, &symbols);
674
675 if (SUCCEEDED(hr)) {
676 CComPtr<IDiaSymbol> symbol = NULL;
677
678 while (SUCCEEDED(symbols->Next(1, &symbol, &count)) && count == 1) {
679 if (SUCCEEDED(symbol->get_relativeVirtualAddress(&rva))) {
680 // Potentially record this as the canonical symbol for this rva.
681 MaybeRecordSymbol(rva, symbol, false, &rva_symbol);
682 } else {
683 fprintf(stderr, "get_relativeVirtualAddress failed on the symbol\n");
684 return false;
685 }
686
687 symbol.Release();
688 }
689
690 symbols.Release();
691 }
692
693 // Find all public symbols and record public symbols that are not also private
694 // symbols.
695 hr = global->findChildren(SymTagPublicSymbol, NULL, nsNone, &symbols);
696
697 if (SUCCEEDED(hr)) {
698 CComPtr<IDiaSymbol> symbol = NULL;
699
700 while (SUCCEEDED(symbols->Next(1, &symbol, &count)) && count == 1) {
701 if (SUCCEEDED(symbol->get_relativeVirtualAddress(&rva))) {
702 // Potentially record this as the canonical symbol for this rva.
703 MaybeRecordSymbol(rva, symbol, true, &rva_symbol);
704 } else {
705 fprintf(stderr, "get_relativeVirtualAddress failed on the symbol\n");
706 return false;
707 }
708
709 symbol.Release();
710 }
711
712 symbols.Release();
713 }
714
715 // For each rva, dump the selected symbol at the address.
716 SymbolMap::iterator it;
717 for (it = rva_symbol.begin(); it != rva_symbol.end(); ++it) {
718 CComPtr<IDiaSymbol> symbol = it->second.symbol;
719 // Only print public symbols if there is no function symbol for the address.
720 if (!it->second.is_public) {
721 if (!PrintFunction(symbol, symbol, it->second.is_multiple))
722 return false;
723 } else {
724 if (!PrintCodePublicSymbol(symbol, it->second.is_multiple))
725 return false;
726 }
727 }
728
729 // When building with PGO, the compiler can split functions into
730 // "hot" and "cold" blocks, and move the "cold" blocks out to separate
731 // pages, so the function can be noncontiguous. To find these blocks,
732 // we have to iterate over all the compilands, and then find blocks
733 // that are children of them. We can then find the lexical parents
734 // of those blocks and print out an extra FUNC line for blocks
735 // that are not contained in their parent functions.
736 CComPtr<IDiaEnumSymbols> compilands;
737 if (FAILED(global->findChildren(SymTagCompiland, NULL,
738 nsNone, &compilands))) {
739 fprintf(stderr, "findChildren failed on the global\n");
740 return false;
741 }
742
743 CComPtr<IDiaSymbol> compiland;
744 while (SUCCEEDED(compilands->Next(1, &compiland, &count)) && count == 1) {
745 CComPtr<IDiaEnumSymbols> blocks;
746 if (FAILED(compiland->findChildren(SymTagBlock, NULL,
747 nsNone, &blocks))) {
748 fprintf(stderr, "findChildren failed on a compiland\n");
749 return false;
750 }
751
752 CComPtr<IDiaSymbol> block;
753 while (SUCCEEDED(blocks->Next(1, &block, &count)) && count == 1) {
754 // find this block's lexical parent function
755 CComPtr<IDiaSymbol> parent;
756 DWORD tag;
757 if (SUCCEEDED(block->get_lexicalParent(&parent)) &&
758 SUCCEEDED(parent->get_symTag(&tag)) &&
759 tag == SymTagFunction) {
760 // now get the block's offset and the function's offset and size,
761 // and determine if the block is outside of the function
762 DWORD func_rva, block_rva;
763 ULONGLONG func_length;
764 if (SUCCEEDED(block->get_relativeVirtualAddress(&block_rva)) &&
765 SUCCEEDED(parent->get_relativeVirtualAddress(&func_rva)) &&
766 SUCCEEDED(parent->get_length(&func_length))) {
767 if (block_rva < func_rva || block_rva > (func_rva + func_length)) {
768 if (!PrintFunction(parent, block, false)) {
769 return false;
770 }
771 }
772 }
773 }
774 parent.Release();
775 block.Release();
776 }
777 blocks.Release();
778 compiland.Release();
779 }
780
781 global.Release();
782 return true;
783 }
784
PrintInlineOrigins() const785 void PDBSourceLineWriter::PrintInlineOrigins() const {
786 struct OriginCompare {
787 bool operator()(const InlineOrigin lhs, const InlineOrigin rhs) const {
788 return lhs.id < rhs.id;
789 }
790 };
791 set<InlineOrigin, OriginCompare> origins;
792 // Sort by origin id.
793 for (auto const& origin : inline_origins_)
794 origins.insert(origin.second);
795 for (auto o : origins) {
796 fprintf(output_, "INLINE_ORIGIN %d %ls\n", o.id, o.name.c_str());
797 }
798 }
799
GetInlines(IDiaSymbol * block,Lines * line_list,int inline_nest_level,vector<unique_ptr<Inline>> * inlines)800 bool PDBSourceLineWriter::GetInlines(IDiaSymbol* block,
801 Lines* line_list,
802 int inline_nest_level,
803 vector<unique_ptr<Inline>>* inlines) {
804 CComPtr<IDiaEnumSymbols> inline_callsites;
805 if (FAILED(block->findChildrenEx(SymTagInlineSite, nullptr, nsNone,
806 &inline_callsites))) {
807 return false;
808 }
809 ULONG count;
810 CComPtr<IDiaSymbol> callsite;
811 while (SUCCEEDED(inline_callsites->Next(1, &callsite, &count)) &&
812 count == 1) {
813 unique_ptr<Inline> new_inline(new Inline(inline_nest_level));
814 CComPtr<IDiaEnumLineNumbers> lines;
815 // All inlinee lines have the same file id.
816 DWORD file_id = 0;
817 DWORD call_site_line = 0;
818 if (FAILED(session_->findInlineeLines(callsite, &lines))) {
819 return false;
820 }
821 CComPtr<IDiaLineNumber> dia_line;
822 while (SUCCEEDED(lines->Next(1, &dia_line, &count)) && count == 1) {
823 Line line;
824 if (!GetLine(dia_line, &line)) {
825 return false;
826 }
827 // Silently ignore zero-length lines.
828 if (line.length != 0) {
829 // Use the first line num and file id at rva as this inline's call site
830 // line number, because after adding lines it may be changed to inner
831 // line number and inner file id.
832 if (call_site_line == 0)
833 call_site_line = line_list->GetLineNum(line.rva);
834 if (file_id == 0)
835 file_id = line_list->GetFileId(line.rva);
836 line_list->AddLine(line);
837 new_inline->ExtendRanges(line);
838 }
839 dia_line.Release();
840 }
841 BSTR name;
842 callsite->get_name(&name);
843 if (SysStringLen(name) == 0) {
844 name = SysAllocString(L"<name omitted>");
845 }
846 auto iter = inline_origins_.find(name);
847 if (iter == inline_origins_.end()) {
848 InlineOrigin origin;
849 origin.id = inline_origins_.size();
850 origin.name = name;
851 inline_origins_[name] = origin;
852 }
853 new_inline->SetOriginId(inline_origins_[name].id);
854 new_inline->SetCallSiteLine(call_site_line);
855 new_inline->SetCallSiteFileId(file_id);
856 // Go to next level.
857 vector<unique_ptr<Inline>> child_inlines;
858 if (!GetInlines(callsite, line_list, inline_nest_level + 1,
859 &child_inlines)) {
860 return false;
861 }
862 new_inline->SetChildInlines(std::move(child_inlines));
863 inlines->push_back(std::move(new_inline));
864 callsite.Release();
865 }
866 return true;
867 }
868
PrintInlines(const vector<unique_ptr<Inline>> & inlines) const869 void PDBSourceLineWriter::PrintInlines(
870 const vector<unique_ptr<Inline>>& inlines) const {
871 for (const unique_ptr<Inline>& in : inlines) {
872 in->Print(output_);
873 }
874 }
875
876 #undef max
877
PrintFrameDataUsingPDB()878 bool PDBSourceLineWriter::PrintFrameDataUsingPDB() {
879 // It would be nice if it were possible to output frame data alongside the
880 // associated function, as is done with line numbers, but the DIA API
881 // doesn't make it possible to get the frame data in that way.
882
883 CComPtr<IDiaEnumFrameData> frame_data_enum;
884 if (!FindTable(session_, &frame_data_enum))
885 return false;
886
887 DWORD last_type = std::numeric_limits<DWORD>::max();
888 DWORD last_rva = std::numeric_limits<DWORD>::max();
889 DWORD last_code_size = 0;
890 DWORD last_prolog_size = std::numeric_limits<DWORD>::max();
891
892 CComPtr<IDiaFrameData> frame_data;
893 ULONG count = 0;
894 while (SUCCEEDED(frame_data_enum->Next(1, &frame_data, &count)) &&
895 count == 1) {
896 DWORD type;
897 if (FAILED(frame_data->get_type(&type)))
898 return false;
899
900 DWORD rva;
901 if (FAILED(frame_data->get_relativeVirtualAddress(&rva)))
902 return false;
903
904 DWORD code_size;
905 if (FAILED(frame_data->get_lengthBlock(&code_size)))
906 return false;
907
908 DWORD prolog_size;
909 if (FAILED(frame_data->get_lengthProlog(&prolog_size)))
910 return false;
911
912 // parameter_size is the size of parameters passed on the stack. If any
913 // parameters are not passed on the stack (such as in registers), their
914 // sizes will not be included in parameter_size.
915 DWORD parameter_size;
916 if (FAILED(frame_data->get_lengthParams(¶meter_size)))
917 return false;
918
919 DWORD saved_register_size;
920 if (FAILED(frame_data->get_lengthSavedRegisters(&saved_register_size)))
921 return false;
922
923 DWORD local_size;
924 if (FAILED(frame_data->get_lengthLocals(&local_size)))
925 return false;
926
927 // get_maxStack can return S_FALSE, just use 0 in that case.
928 DWORD max_stack_size = 0;
929 if (FAILED(frame_data->get_maxStack(&max_stack_size)))
930 return false;
931
932 // get_programString can return S_FALSE, indicating that there is no
933 // program string. In that case, check whether %ebp is used.
934 HRESULT program_string_result;
935 CComBSTR program_string;
936 if (FAILED(program_string_result = frame_data->get_program(
937 &program_string))) {
938 return false;
939 }
940
941 // get_allocatesBasePointer can return S_FALSE, treat that as though
942 // %ebp is not used.
943 BOOL allocates_base_pointer = FALSE;
944 if (program_string_result != S_OK) {
945 if (FAILED(frame_data->get_allocatesBasePointer(
946 &allocates_base_pointer))) {
947 return false;
948 }
949 }
950
951 // Only print out a line if type, rva, code_size, or prolog_size have
952 // changed from the last line. It is surprisingly common (especially in
953 // system library PDBs) for DIA to return a series of identical
954 // IDiaFrameData objects. For kernel32.pdb from Windows XP SP2 on x86,
955 // this check reduces the size of the dumped symbol file by a third.
956 if (type != last_type || rva != last_rva || code_size != last_code_size ||
957 prolog_size != last_prolog_size) {
958 // The prolog and the code portions of the frame have to be treated
959 // independently as they may have independently changed in size, or may
960 // even have been split.
961 // NOTE: If epilog size is ever non-zero, we have to do something
962 // similar with it.
963
964 // Figure out where the prolog bytes have landed.
965 AddressRangeVector prolog_ranges;
966 if (prolog_size > 0) {
967 MapAddressRange(image_map_, AddressRange(rva, prolog_size),
968 &prolog_ranges);
969 }
970
971 // And figure out where the code bytes have landed.
972 AddressRangeVector code_ranges;
973 MapAddressRange(image_map_,
974 AddressRange(rva + prolog_size,
975 code_size - prolog_size),
976 &code_ranges);
977
978 struct FrameInfo {
979 DWORD rva;
980 DWORD code_size;
981 DWORD prolog_size;
982 };
983 std::vector<FrameInfo> frame_infos;
984
985 // Special case: The prolog and the code bytes remain contiguous. This is
986 // only done for compactness of the symbol file, and we could actually
987 // be outputting independent frame info for the prolog and code portions.
988 if (prolog_ranges.size() == 1 && code_ranges.size() == 1 &&
989 prolog_ranges[0].end() == code_ranges[0].rva) {
990 FrameInfo fi = { prolog_ranges[0].rva,
991 prolog_ranges[0].length + code_ranges[0].length,
992 prolog_ranges[0].length };
993 frame_infos.push_back(fi);
994 } else {
995 // Otherwise we output the prolog and code frame info independently.
996 for (size_t i = 0; i < prolog_ranges.size(); ++i) {
997 FrameInfo fi = { prolog_ranges[i].rva,
998 prolog_ranges[i].length,
999 prolog_ranges[i].length };
1000 frame_infos.push_back(fi);
1001 }
1002 for (size_t i = 0; i < code_ranges.size(); ++i) {
1003 FrameInfo fi = { code_ranges[i].rva, code_ranges[i].length, 0 };
1004 frame_infos.push_back(fi);
1005 }
1006 }
1007
1008 for (size_t i = 0; i < frame_infos.size(); ++i) {
1009 const FrameInfo& fi(frame_infos[i]);
1010 fprintf(output_, "STACK WIN %lx %lx %lx %lx %x %lx %lx %lx %lx %d ",
1011 type, fi.rva, fi.code_size, fi.prolog_size,
1012 0 /* epilog_size */, parameter_size, saved_register_size,
1013 local_size, max_stack_size, program_string_result == S_OK);
1014 if (program_string_result == S_OK) {
1015 fprintf(output_, "%ws\n", program_string.m_str);
1016 } else {
1017 fprintf(output_, "%d\n", allocates_base_pointer);
1018 }
1019 }
1020
1021 last_type = type;
1022 last_rva = rva;
1023 last_code_size = code_size;
1024 last_prolog_size = prolog_size;
1025 }
1026
1027 frame_data.Release();
1028 }
1029
1030 return true;
1031 }
1032
PrintFrameDataUsingEXE()1033 bool PDBSourceLineWriter::PrintFrameDataUsingEXE() {
1034 if (code_file_.empty() && !FindPEFile()) {
1035 fprintf(stderr, "Couldn't locate EXE or DLL file.\n");
1036 return false;
1037 }
1038
1039 return PrintPEFrameData(code_file_, output_);
1040 }
1041
PrintFrameData()1042 bool PDBSourceLineWriter::PrintFrameData() {
1043 PDBModuleInfo info;
1044 if (GetModuleInfo(&info) && info.cpu == L"x86_64") {
1045 return PrintFrameDataUsingEXE();
1046 }
1047 return PrintFrameDataUsingPDB();
1048 }
1049
PrintCodePublicSymbol(IDiaSymbol * symbol,bool has_multiple_symbols)1050 bool PDBSourceLineWriter::PrintCodePublicSymbol(IDiaSymbol* symbol,
1051 bool has_multiple_symbols) {
1052 BOOL is_code;
1053 if (FAILED(symbol->get_code(&is_code))) {
1054 return false;
1055 }
1056 if (!is_code) {
1057 return true;
1058 }
1059
1060 DWORD rva;
1061 if (FAILED(symbol->get_relativeVirtualAddress(&rva))) {
1062 return false;
1063 }
1064
1065 CComBSTR name;
1066 int stack_param_size;
1067 if (!GetSymbolFunctionName(symbol, &name, &stack_param_size)) {
1068 return false;
1069 }
1070
1071 AddressRangeVector ranges;
1072 MapAddressRange(image_map_, AddressRange(rva, 1), &ranges);
1073 for (size_t i = 0; i < ranges.size(); ++i) {
1074 const char* optional_multiple_field = has_multiple_symbols ? "m " : "";
1075 fprintf(output_, "PUBLIC %s%lx %x %ws\n", optional_multiple_field,
1076 ranges[i].rva, stack_param_size > 0 ? stack_param_size : 0,
1077 name.m_str);
1078 }
1079
1080 // Now walk the function in the original untranslated space, asking DIA
1081 // what function is at that location, stepping through OMAP blocks. If
1082 // we're still in the same function, emit another entry, because the
1083 // symbol could have been split into multiple pieces. If we've gotten to
1084 // another symbol in the original address space, then we're done for
1085 // this symbol. See https://crbug.com/678874.
1086 for (;;) {
1087 // This steps to the next block in the original image. Simply doing
1088 // rva++ would also be correct, but would emit tons of unnecessary
1089 // entries.
1090 rva = image_map_.subsequent_rva_block[rva];
1091 if (rva == 0)
1092 break;
1093
1094 CComPtr<IDiaSymbol> next_sym = NULL;
1095 LONG displacement;
1096 if (FAILED(session_->findSymbolByRVAEx(rva, SymTagPublicSymbol, &next_sym,
1097 &displacement))) {
1098 break;
1099 }
1100
1101 if (!SymbolsMatch(symbol, next_sym))
1102 break;
1103
1104 AddressRangeVector next_ranges;
1105 MapAddressRange(image_map_, AddressRange(rva, 1), &next_ranges);
1106 for (size_t i = 0; i < next_ranges.size(); ++i) {
1107 fprintf(output_, "PUBLIC %lx %x %ws\n", next_ranges[i].rva,
1108 stack_param_size > 0 ? stack_param_size : 0, name.m_str);
1109 }
1110 }
1111
1112 return true;
1113 }
1114
PrintPDBInfo()1115 bool PDBSourceLineWriter::PrintPDBInfo() {
1116 PDBModuleInfo info;
1117 if (!GetModuleInfo(&info)) {
1118 return false;
1119 }
1120
1121 // Hard-code "windows" for the OS because that's the only thing that makes
1122 // sense for PDB files. (This might not be strictly correct for Windows CE
1123 // support, but we don't care about that at the moment.)
1124 fprintf(output_, "MODULE windows %ws %ws %ws\n",
1125 info.cpu.c_str(), info.debug_identifier.c_str(),
1126 info.debug_file.c_str());
1127
1128 return true;
1129 }
1130
PrintPEInfo()1131 bool PDBSourceLineWriter::PrintPEInfo() {
1132 PEModuleInfo info;
1133 if (!GetPEInfo(&info)) {
1134 return false;
1135 }
1136
1137 fprintf(output_, "INFO CODE_ID %ws %ws\n",
1138 info.code_identifier.c_str(),
1139 info.code_file.c_str());
1140 return true;
1141 }
1142
1143 // wcstol_positive_strict is sort of like wcstol, but much stricter. string
1144 // should be a buffer pointing to a null-terminated string containing only
1145 // decimal digits. If the entire string can be converted to an integer
1146 // without overflowing, and there are no non-digit characters before the
1147 // result is set to the value and this function returns true. Otherwise,
1148 // this function returns false. This is an alternative to the strtol, atoi,
1149 // and scanf families, which are not as strict about input and in some cases
1150 // don't provide a good way for the caller to determine if a conversion was
1151 // successful.
wcstol_positive_strict(wchar_t * string,int * result)1152 static bool wcstol_positive_strict(wchar_t* string, int* result) {
1153 int value = 0;
1154 for (wchar_t* c = string; *c != '\0'; ++c) {
1155 int last_value = value;
1156 value *= 10;
1157 // Detect overflow.
1158 if (value / 10 != last_value || value < 0) {
1159 return false;
1160 }
1161 if (*c < '0' || *c > '9') {
1162 return false;
1163 }
1164 unsigned int c_value = *c - '0';
1165 last_value = value;
1166 value += c_value;
1167 // Detect overflow.
1168 if (value < last_value) {
1169 return false;
1170 }
1171 // Forbid leading zeroes unless the string is just "0".
1172 if (value == 0 && *(c+1) != '\0') {
1173 return false;
1174 }
1175 }
1176 *result = value;
1177 return true;
1178 }
1179
FindPEFile()1180 bool PDBSourceLineWriter::FindPEFile() {
1181 CComPtr<IDiaSymbol> global;
1182 if (FAILED(session_->get_globalScope(&global))) {
1183 fprintf(stderr, "get_globalScope failed\n");
1184 return false;
1185 }
1186
1187 CComBSTR symbols_file;
1188 if (SUCCEEDED(global->get_symbolsFileName(&symbols_file))) {
1189 wstring file(symbols_file);
1190
1191 // Look for an EXE or DLL file.
1192 const wchar_t* extensions[] = { L"exe", L"dll" };
1193 for (size_t i = 0; i < sizeof(extensions) / sizeof(extensions[0]); i++) {
1194 size_t dot_pos = file.find_last_of(L".");
1195 if (dot_pos != wstring::npos) {
1196 file.replace(dot_pos + 1, wstring::npos, extensions[i]);
1197 // Check if this file exists.
1198 if (GetFileAttributesW(file.c_str()) != INVALID_FILE_ATTRIBUTES) {
1199 code_file_ = file;
1200 return true;
1201 }
1202 }
1203 }
1204 }
1205
1206 return false;
1207 }
1208
1209 // static
GetSymbolFunctionName(IDiaSymbol * function,BSTR * name,int * stack_param_size)1210 bool PDBSourceLineWriter::GetSymbolFunctionName(IDiaSymbol* function,
1211 BSTR* name,
1212 int* stack_param_size) {
1213 *stack_param_size = -1;
1214
1215 // Use get_undecoratedNameEx to get readable C++ names with arguments.
1216 if (function->get_undecoratedNameEx(kUndecorateOptions, name) != S_OK) {
1217 if (function->get_name(name) != S_OK) {
1218 fprintf(stderr, "failed to get function name\n");
1219 return false;
1220 }
1221
1222 // It's possible for get_name to return an empty string, so
1223 // special-case that.
1224 if (wcscmp(*name, L"") == 0) {
1225 SysFreeString(*name);
1226 // dwarf_cu_to_module.cc uses "<name omitted>", so match that.
1227 *name = SysAllocString(L"<name omitted>");
1228 return true;
1229 }
1230
1231 // If a name comes from get_name because no undecorated form existed,
1232 // it's already formatted properly to be used as output. Don't do any
1233 // additional processing.
1234 //
1235 // MSVC7's DIA seems to not undecorate names in as many cases as MSVC8's.
1236 // This will result in calling get_name for some C++ symbols, so
1237 // all of the parameter and return type information may not be included in
1238 // the name string.
1239 } else {
1240 StripLlvmSuffixAndUndecorate(name);
1241
1242 // C++ uses a bogus "void" argument for functions and methods that don't
1243 // take any parameters. Take it out of the undecorated name because it's
1244 // ugly and unnecessary.
1245 const wchar_t* replace_string = L"(void)";
1246 const size_t replace_length = wcslen(replace_string);
1247 const wchar_t* replacement_string = L"()";
1248 size_t length = wcslen(*name);
1249 if (length >= replace_length) {
1250 wchar_t* name_end = *name + length - replace_length;
1251 if (wcscmp(name_end, replace_string) == 0) {
1252 WindowsStringUtils::safe_wcscpy(name_end, replace_length,
1253 replacement_string);
1254 length = wcslen(*name);
1255 }
1256 }
1257
1258 // Undecorate names used for stdcall and fastcall. These names prefix
1259 // the identifier with '_' (stdcall) or '@' (fastcall) and suffix it
1260 // with '@' followed by the number of bytes of parameters, in decimal.
1261 // If such a name is found, take note of the size and undecorate it.
1262 // Only do this for names that aren't C++, which is determined based on
1263 // whether the undecorated name contains any ':' or '(' characters.
1264 if (!wcschr(*name, ':') && !wcschr(*name, '(') &&
1265 (*name[0] == '_' || *name[0] == '@')) {
1266 wchar_t* last_at = wcsrchr(*name + 1, '@');
1267 if (last_at && wcstol_positive_strict(last_at + 1, stack_param_size)) {
1268 // If this function adheres to the fastcall convention, it accepts up
1269 // to the first 8 bytes of parameters in registers (%ecx and %edx).
1270 // We're only interested in the stack space used for parameters, so
1271 // so subtract 8 and don't let the size go below 0.
1272 if (*name[0] == '@') {
1273 if (*stack_param_size > 8) {
1274 *stack_param_size -= 8;
1275 } else {
1276 *stack_param_size = 0;
1277 }
1278 }
1279
1280 // Undecorate the name by moving it one character to the left in its
1281 // buffer, and terminating it where the last '@' had been.
1282 WindowsStringUtils::safe_wcsncpy(*name, length,
1283 *name + 1, last_at - *name - 1);
1284 } else if (*name[0] == '_') {
1285 // This symbol's name is encoded according to the cdecl rules. The
1286 // name doesn't end in a '@' character followed by a decimal positive
1287 // integer, so it's not a stdcall name. Strip off the leading
1288 // underscore.
1289 WindowsStringUtils::safe_wcsncpy(*name, length, *name + 1, length);
1290 }
1291 }
1292 }
1293
1294 return true;
1295 }
1296
1297 // static
GetFunctionStackParamSize(IDiaSymbol * function)1298 int PDBSourceLineWriter::GetFunctionStackParamSize(IDiaSymbol* function) {
1299 // This implementation is highly x86-specific.
1300
1301 // Gather the symbols corresponding to data.
1302 CComPtr<IDiaEnumSymbols> data_children;
1303 if (FAILED(function->findChildren(SymTagData, NULL, nsNone,
1304 &data_children))) {
1305 return 0;
1306 }
1307
1308 // lowest_base is the lowest %ebp-relative byte offset used for a parameter.
1309 // highest_end is one greater than the highest offset (i.e. base + length).
1310 // Stack parameters are assumed to be contiguous, because in reality, they
1311 // are.
1312 int lowest_base = INT_MAX;
1313 int highest_end = INT_MIN;
1314
1315 CComPtr<IDiaSymbol> child;
1316 DWORD count;
1317 while (SUCCEEDED(data_children->Next(1, &child, &count)) && count == 1) {
1318 // If any operation fails at this point, just proceed to the next child.
1319 // Use the next_child label instead of continue because child needs to
1320 // be released before it's reused. Declare constructable/destructable
1321 // types early to avoid gotos that cross initializations.
1322 CComPtr<IDiaSymbol> child_type;
1323
1324 // DataIsObjectPtr is only used for |this|. Because |this| can be passed
1325 // as a stack parameter, look for it in addition to traditional
1326 // parameters.
1327 DWORD child_kind;
1328 if (FAILED(child->get_dataKind(&child_kind)) ||
1329 (child_kind != DataIsParam && child_kind != DataIsObjectPtr)) {
1330 goto next_child;
1331 }
1332
1333 // Only concentrate on register-relative parameters. Parameters may also
1334 // be enregistered (passed directly in a register), but those don't
1335 // consume any stack space, so they're not of interest.
1336 DWORD child_location_type;
1337 if (FAILED(child->get_locationType(&child_location_type)) ||
1338 child_location_type != LocIsRegRel) {
1339 goto next_child;
1340 }
1341
1342 // Of register-relative parameters, the only ones that make any sense are
1343 // %ebp- or %esp-relative. Note that MSVC's debugging information always
1344 // gives parameters as %ebp-relative even when a function doesn't use a
1345 // traditional frame pointer and stack parameters are accessed relative to
1346 // %esp, so just look for %ebp-relative parameters. If you wanted to
1347 // access parameters, you'd probably want to treat these %ebp-relative
1348 // offsets as if they were relative to %esp before a function's prolog
1349 // executed.
1350 DWORD child_register;
1351 if (FAILED(child->get_registerId(&child_register)) ||
1352 child_register != CV_REG_EBP) {
1353 goto next_child;
1354 }
1355
1356 LONG child_register_offset;
1357 if (FAILED(child->get_offset(&child_register_offset))) {
1358 goto next_child;
1359 }
1360
1361 // IDiaSymbol::get_type can succeed but still pass back a NULL value.
1362 if (FAILED(child->get_type(&child_type)) || !child_type) {
1363 goto next_child;
1364 }
1365
1366 ULONGLONG child_length;
1367 if (FAILED(child_type->get_length(&child_length))) {
1368 goto next_child;
1369 }
1370
1371 // Extra scope to avoid goto jumping over variable initialization
1372 {
1373 int child_end = child_register_offset + static_cast<ULONG>(child_length);
1374 if (child_register_offset < lowest_base) {
1375 lowest_base = child_register_offset;
1376 }
1377 if (child_end > highest_end) {
1378 highest_end = child_end;
1379 }
1380 }
1381
1382 next_child:
1383 child.Release();
1384 }
1385
1386 int param_size = 0;
1387 // Make sure lowest_base isn't less than 4, because [%esp+4] is the lowest
1388 // possible address to find a stack parameter before executing a function's
1389 // prolog (see above). Some optimizations cause parameter offsets to be
1390 // lower than 4, but we're not concerned with those because we're only
1391 // looking for parameters contained in addresses higher than where the
1392 // return address is stored.
1393 if (lowest_base < 4) {
1394 lowest_base = 4;
1395 }
1396 if (highest_end > lowest_base) {
1397 // All stack parameters are pushed as at least 4-byte quantities. If the
1398 // last type was narrower than 4 bytes, promote it. This assumes that all
1399 // parameters' offsets are 4-byte-aligned, which is always the case. Only
1400 // worry about the last type, because we're not summing the type sizes,
1401 // just looking at the lowest and highest offsets.
1402 int remainder = highest_end % 4;
1403 if (remainder) {
1404 highest_end += 4 - remainder;
1405 }
1406
1407 param_size = highest_end - lowest_base;
1408 }
1409
1410 return param_size;
1411 }
1412
WriteSymbols(FILE * symbol_file)1413 bool PDBSourceLineWriter::WriteSymbols(FILE* symbol_file) {
1414 output_ = symbol_file;
1415
1416 // Load the OMAP information, and disable auto-translation of addresses in
1417 // preference of doing it ourselves.
1418 OmapData omap_data;
1419 if (!GetOmapDataAndDisableTranslation(session_, &omap_data))
1420 return false;
1421 BuildImageMap(omap_data, &image_map_);
1422
1423 bool ret = PrintPDBInfo();
1424 // This is not a critical piece of the symbol file.
1425 PrintPEInfo();
1426 ret = ret && PrintSourceFiles() && PrintFunctions() && PrintFrameData();
1427 PrintInlineOrigins();
1428
1429 output_ = NULL;
1430 return ret;
1431 }
1432
Close()1433 void PDBSourceLineWriter::Close() {
1434 if (session_ != nullptr) {
1435 session_.Release();
1436 }
1437 }
1438
GetModuleInfo(PDBModuleInfo * info)1439 bool PDBSourceLineWriter::GetModuleInfo(PDBModuleInfo* info) {
1440 if (!info) {
1441 return false;
1442 }
1443
1444 info->debug_file.clear();
1445 info->debug_identifier.clear();
1446 info->cpu.clear();
1447
1448 CComPtr<IDiaSymbol> global;
1449 if (FAILED(session_->get_globalScope(&global))) {
1450 return false;
1451 }
1452
1453 DWORD machine_type;
1454 // get_machineType can return S_FALSE.
1455 if (global->get_machineType(&machine_type) == S_OK) {
1456 // The documentation claims that get_machineType returns a value from
1457 // the CV_CPU_TYPE_e enumeration, but that's not the case.
1458 // Instead, it returns one of the IMAGE_FILE_MACHINE values as
1459 // defined here:
1460 // http://msdn.microsoft.com/en-us/library/ms680313%28VS.85%29.aspx
1461 info->cpu = FileHeaderMachineToCpuString(static_cast<WORD>(machine_type));
1462 } else {
1463 // Unexpected, but handle gracefully.
1464 info->cpu = L"unknown";
1465 }
1466
1467 // DWORD* and int* are not compatible. This is clean and avoids a cast.
1468 DWORD age;
1469 if (FAILED(global->get_age(&age))) {
1470 return false;
1471 }
1472
1473 bool uses_guid;
1474 if (!UsesGUID(&uses_guid)) {
1475 return false;
1476 }
1477
1478 if (uses_guid) {
1479 GUID guid;
1480 if (FAILED(global->get_guid(&guid))) {
1481 return false;
1482 }
1483
1484 info->debug_identifier = GenerateDebugIdentifier(age, guid);
1485 } else {
1486 DWORD signature;
1487 if (FAILED(global->get_signature(&signature))) {
1488 return false;
1489 }
1490
1491 info->debug_identifier = GenerateDebugIdentifier(age, signature);
1492 }
1493
1494 CComBSTR debug_file_string;
1495 if (FAILED(global->get_symbolsFileName(&debug_file_string))) {
1496 return false;
1497 }
1498 info->debug_file =
1499 WindowsStringUtils::GetBaseName(wstring(debug_file_string));
1500
1501 return true;
1502 }
1503
GetPEInfo(PEModuleInfo * info)1504 bool PDBSourceLineWriter::GetPEInfo(PEModuleInfo* info) {
1505 if (!info) {
1506 return false;
1507 }
1508
1509 if (code_file_.empty() && !FindPEFile()) {
1510 fprintf(stderr, "Couldn't locate EXE or DLL file.\n");
1511 return false;
1512 }
1513
1514 return ReadPEInfo(code_file_, info);
1515 }
1516
UsesGUID(bool * uses_guid)1517 bool PDBSourceLineWriter::UsesGUID(bool* uses_guid) {
1518 if (!uses_guid)
1519 return false;
1520
1521 CComPtr<IDiaSymbol> global;
1522 if (FAILED(session_->get_globalScope(&global)))
1523 return false;
1524
1525 GUID guid;
1526 if (FAILED(global->get_guid(&guid)))
1527 return false;
1528
1529 DWORD signature;
1530 if (FAILED(global->get_signature(&signature)))
1531 return false;
1532
1533 // There are two possibilities for guid: either it's a real 128-bit GUID
1534 // as identified in a code module by a new-style CodeView record, or it's
1535 // a 32-bit signature (timestamp) as identified by an old-style record.
1536 // See MDCVInfoPDB70 and MDCVInfoPDB20 in minidump_format.h.
1537 //
1538 // Because DIA doesn't provide a way to directly determine whether a module
1539 // uses a GUID or a 32-bit signature, this code checks whether the first 32
1540 // bits of guid are the same as the signature, and if the rest of guid is
1541 // zero. If so, then with a pretty high degree of certainty, there's an
1542 // old-style CodeView record in use. This method will only falsely find an
1543 // an old-style CodeView record if a real 128-bit GUID has its first 32
1544 // bits set the same as the module's signature (timestamp) and the rest of
1545 // the GUID is set to 0. This is highly unlikely.
1546
1547 GUID signature_guid = {signature}; // 0-initializes other members
1548 *uses_guid = !IsEqualGUID(guid, signature_guid);
1549 return true;
1550 }
1551
1552 } // namespace google_breakpad
1553