xref: /aosp_15_r20/external/google-breakpad/src/processor/basic_source_line_resolver.cc (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1 // Copyright 2010 Google LLC
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 //     * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 //     * Neither the name of Google LLC nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 //
29 // basic_source_line_resolver.cc: BasicSourceLineResolver implementation.
30 //
31 // See basic_source_line_resolver.h and basic_source_line_resolver_types.h
32 // for documentation.
33 
34 #ifdef HAVE_CONFIG_H
35 #include <config.h>  // Must come first
36 #endif
37 
38 #include <assert.h>
39 #include <stdio.h>
40 #include <string.h>
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 
44 #include <limits>
45 #include <map>
46 #include <memory>
47 #include <utility>
48 #include <vector>
49 
50 #include "google_breakpad/processor/basic_source_line_resolver.h"
51 #include "processor/basic_source_line_resolver_types.h"
52 #include "processor/module_factory.h"
53 
54 #include "processor/tokenize.h"
55 
56 using std::deque;
57 using std::make_pair;
58 using std::map;
59 using std::unique_ptr;
60 using std::vector;
61 
62 namespace google_breakpad {
63 
64 #ifdef _WIN32
65 #ifdef _MSC_VER
66 #define strtok_r strtok_s
67 #endif
68 #define strtoull _strtoui64
69 #endif
70 
71 namespace {
72 
73 // Utility function to tokenize given the presence of an optional initial
74 // field. In this case, optional_field is the expected string for the optional
75 // field, and max_tokens is the maximum number of tokens including the optional
76 // field. Refer to the documentation for Tokenize for descriptions of the other
77 // arguments.
TokenizeWithOptionalField(char * line,const char * optional_field,const char * separators,int max_tokens,vector<char * > * tokens)78 bool TokenizeWithOptionalField(char* line,
79                                const char* optional_field,
80                                const char* separators,
81                                int max_tokens,
82                                vector<char*>* tokens) {
83   // First tokenize assuming the optional field is not present.  If we then see
84   // the optional field, additionally tokenize the last token into two tokens.
85   if (!Tokenize(line, separators, max_tokens - 1, tokens)) {
86     return false;
87   }
88 
89   if (strcmp(tokens->front(), optional_field) == 0) {
90     // The optional field is present. Split the last token in two to recover the
91     // field prior to the last.
92     vector<char*> last_tokens;
93     if (!Tokenize(tokens->back(), separators, 2, &last_tokens)) {
94       return false;
95     }
96     // Replace the previous last token with the two new tokens.
97     tokens->pop_back();
98     tokens->push_back(last_tokens[0]);
99     tokens->push_back(last_tokens[1]);
100   }
101 
102   return true;
103 }
104 
105 }  // namespace
106 
107 static const char* kWhitespace = " \r\n";
108 static const int kMaxErrorsPrinted = 5;
109 static const int kMaxErrorsBeforeBailing = 100;
110 
BasicSourceLineResolver()111 BasicSourceLineResolver::BasicSourceLineResolver() :
112     SourceLineResolverBase(new BasicModuleFactory) { }
113 
114 // static
LogParseError(const string & message,int line_number,int * num_errors)115 void BasicSourceLineResolver::Module::LogParseError(
116    const string& message,
117    int line_number,
118    int* num_errors) {
119   if (++(*num_errors) <= kMaxErrorsPrinted) {
120     if (line_number > 0) {
121       BPLOG(ERROR) << "Line " << line_number << ": " << message;
122     } else {
123       BPLOG(ERROR) << message;
124     }
125   }
126 }
127 
LoadMapFromMemory(char * memory_buffer,size_t memory_buffer_size)128 bool BasicSourceLineResolver::Module::LoadMapFromMemory(
129     char* memory_buffer,
130     size_t memory_buffer_size) {
131   linked_ptr<Function> cur_func;
132   int line_number = 0;
133   int num_errors = 0;
134   int inline_num_errors = 0;
135   char* save_ptr;
136 
137   // If the length is 0, we can still pretend we have a symbol file. This is
138   // for scenarios that want to test symbol lookup, but don't necessarily care
139   // if certain modules do not have any information, like system libraries.
140   if (memory_buffer_size == 0) {
141     return true;
142   }
143 
144   // Make sure the last character is null terminator.
145   size_t last_null_terminator = memory_buffer_size - 1;
146   if (memory_buffer[last_null_terminator] != '\0') {
147     memory_buffer[last_null_terminator] = '\0';
148   }
149 
150   // Skip any null terminators at the end of the memory buffer, and make sure
151   // there are no other null terminators in the middle of the memory buffer.
152   bool has_null_terminator_in_the_middle = false;
153   while (last_null_terminator > 0 &&
154          memory_buffer[last_null_terminator - 1] == '\0') {
155     last_null_terminator--;
156   }
157   for (size_t i = 0; i < last_null_terminator; i++) {
158     if (memory_buffer[i] == '\0') {
159       memory_buffer[i] = '_';
160       has_null_terminator_in_the_middle = true;
161     }
162   }
163   if (has_null_terminator_in_the_middle) {
164     LogParseError(
165        "Null terminator is not expected in the middle of the symbol data",
166        line_number,
167        &num_errors);
168   }
169 
170   char* buffer;
171   buffer = strtok_r(memory_buffer, "\r\n", &save_ptr);
172 
173   while (buffer != NULL) {
174     ++line_number;
175 
176     if (strncmp(buffer, "FILE ", 5) == 0) {
177       if (!ParseFile(buffer)) {
178         LogParseError("ParseFile on buffer failed", line_number, &num_errors);
179       }
180     } else if (strncmp(buffer, "STACK ", 6) == 0) {
181       if (!ParseStackInfo(buffer)) {
182         LogParseError("ParseStackInfo failed", line_number, &num_errors);
183       }
184     } else if (strncmp(buffer, "FUNC ", 5) == 0) {
185       cur_func.reset(ParseFunction(buffer));
186       if (!cur_func.get()) {
187         LogParseError("ParseFunction failed", line_number, &num_errors);
188       } else {
189         // StoreRange will fail if the function has an invalid address or size.
190         // We'll silently ignore this, the function and any corresponding lines
191         // will be destroyed when cur_func is released.
192         functions_.StoreRange(cur_func->address, cur_func->size, cur_func);
193       }
194     } else if (strncmp(buffer, "PUBLIC ", 7) == 0) {
195       // Clear cur_func: public symbols don't contain line number information.
196       cur_func.reset();
197 
198       if (!ParsePublicSymbol(buffer)) {
199         LogParseError("ParsePublicSymbol failed", line_number, &num_errors);
200       }
201     } else if (strncmp(buffer, "MODULE ", 7) == 0) {
202       // Ignore these.  They're not of any use to BasicSourceLineResolver,
203       // which is fed modules by a SymbolSupplier.  These lines are present to
204       // aid other tools in properly placing symbol files so that they can
205       // be accessed by a SymbolSupplier.
206       //
207       // MODULE <guid> <age> <filename>
208     } else if (strncmp(buffer, "INFO ", 5) == 0) {
209       // Ignore these as well, they're similarly just for housekeeping.
210       //
211       // INFO CODE_ID <code id> <filename>
212     } else if (strncmp(buffer, "INLINE ", 7) == 0) {
213       linked_ptr<Inline> in = ParseInline(buffer);
214       if (!in.get())
215         LogParseError("ParseInline failed", line_number, &inline_num_errors);
216       else
217         cur_func->AppendInline(in);
218     } else if (strncmp(buffer, "INLINE_ORIGIN ", 14) == 0) {
219       if (!ParseInlineOrigin(buffer)) {
220         LogParseError("ParseInlineOrigin failed", line_number,
221                       &inline_num_errors);
222       }
223     } else {
224       if (!cur_func.get()) {
225         LogParseError("Found source line data without a function",
226                        line_number, &num_errors);
227       } else {
228         Line* line = ParseLine(buffer);
229         if (!line) {
230           LogParseError("ParseLine failed", line_number, &num_errors);
231         } else {
232           cur_func->lines.StoreRange(line->address, line->size,
233                                      linked_ptr<Line>(line));
234         }
235       }
236     }
237     if (num_errors > kMaxErrorsBeforeBailing) {
238       break;
239     }
240     buffer = strtok_r(NULL, "\r\n", &save_ptr);
241   }
242   is_corrupt_ = num_errors > 0;
243   return true;
244 }
245 
ConstructInlineFrames(StackFrame * frame,MemAddr address,const ContainedRangeMap<uint64_t,linked_ptr<Inline>> & inline_map,deque<unique_ptr<StackFrame>> * inlined_frames) const246 void BasicSourceLineResolver::Module::ConstructInlineFrames(
247     StackFrame* frame,
248     MemAddr address,
249     const ContainedRangeMap<uint64_t, linked_ptr<Inline>>& inline_map,
250     deque<unique_ptr<StackFrame>>* inlined_frames) const {
251   vector<const linked_ptr<Inline>*> inlines;
252   if (!inline_map.RetrieveRanges(address, inlines)) {
253     return;
254   }
255 
256   for (const linked_ptr<Inline>* const in : inlines) {
257     unique_ptr<StackFrame> new_frame =
258         unique_ptr<StackFrame>(new StackFrame(*frame));
259     auto origin = inline_origins_.find(in->get()->origin_id);
260     if (origin != inline_origins_.end()) {
261       new_frame->function_name = origin->second->name;
262     } else {
263       new_frame->function_name = "<name omitted>";
264     }
265 
266     // Store call site file and line in current frame, which will be updated
267     // later.
268     new_frame->source_line = in->get()->call_site_line;
269     if (in->get()->has_call_site_file_id) {
270       auto file = files_.find(in->get()->call_site_file_id);
271       if (file != files_.end()) {
272         new_frame->source_file_name = file->second;
273       }
274     }
275 
276     // Use the starting address of the inlined range as inlined function base.
277     new_frame->function_base = new_frame->module->base_address();
278     for (const auto& range : in->get()->inline_ranges) {
279       if (address >= range.first && address < range.first + range.second) {
280         new_frame->function_base += range.first;
281         break;
282       }
283     }
284     new_frame->trust = StackFrame::FRAME_TRUST_INLINE;
285 
286     // The inlines vector has an order from innermost entry to outermost entry.
287     // By push_back, we will have inlined_frames with the same order.
288     inlined_frames->push_back(std::move(new_frame));
289   }
290 
291   // Update the source file and source line for each inlined frame.
292   if (!inlined_frames->empty()) {
293     string parent_frame_source_file_name = frame->source_file_name;
294     int parent_frame_source_line = frame->source_line;
295     frame->source_file_name = inlined_frames->back()->source_file_name;
296     frame->source_line = inlined_frames->back()->source_line;
297     for (unique_ptr<StackFrame>& inlined_frame : *inlined_frames) {
298       std::swap(inlined_frame->source_file_name, parent_frame_source_file_name);
299       std::swap(inlined_frame->source_line, parent_frame_source_line);
300     }
301   }
302 }
303 
LookupAddress(StackFrame * frame,deque<unique_ptr<StackFrame>> * inlined_frames) const304 void BasicSourceLineResolver::Module::LookupAddress(
305     StackFrame* frame,
306     deque<unique_ptr<StackFrame>>* inlined_frames) const {
307   MemAddr address = frame->instruction - frame->module->base_address();
308 
309   // First, look for a FUNC record that covers address. Use
310   // RetrieveNearestRange instead of RetrieveRange so that, if there
311   // is no such function, we can use the next function to bound the
312   // extent of the PUBLIC symbol we find, below. This does mean we
313   // need to check that address indeed falls within the function we
314   // find; do the range comparison in an overflow-friendly way.
315   linked_ptr<Function> func;
316   linked_ptr<PublicSymbol> public_symbol;
317   MemAddr function_base;
318   MemAddr function_size;
319   MemAddr public_address;
320   if (functions_.RetrieveNearestRange(address, &func, &function_base,
321                                       NULL /* delta */, &function_size) &&
322       address >= function_base && address - function_base < function_size) {
323     frame->function_name = func->name;
324     frame->function_base = frame->module->base_address() + function_base;
325     frame->is_multiple = func->is_multiple;
326 
327     linked_ptr<Line> line;
328     MemAddr line_base;
329     if (func->lines.RetrieveRange(address, &line, &line_base, NULL /* delta */,
330                                   NULL /* size */)) {
331       FileMap::const_iterator it = files_.find(line->source_file_id);
332       if (it != files_.end()) {
333         frame->source_file_name = files_.find(line->source_file_id)->second;
334       }
335       frame->source_line = line->line;
336       frame->source_line_base = frame->module->base_address() + line_base;
337     }
338 
339     // Check if this is inlined function call.
340     if (inlined_frames) {
341       ConstructInlineFrames(frame, address, func->inlines, inlined_frames);
342     }
343   } else if (public_symbols_.Retrieve(address,
344                                       &public_symbol, &public_address) &&
345              (!func.get() || public_address > function_base)) {
346     frame->function_name = public_symbol->name;
347     frame->function_base = frame->module->base_address() + public_address;
348     frame->is_multiple = public_symbol->is_multiple;
349   }
350 }
351 
FindWindowsFrameInfo(const StackFrame * frame) const352 WindowsFrameInfo* BasicSourceLineResolver::Module::FindWindowsFrameInfo(
353     const StackFrame* frame) const {
354   MemAddr address = frame->instruction - frame->module->base_address();
355   scoped_ptr<WindowsFrameInfo> result(new WindowsFrameInfo());
356 
357   // We only know about WindowsFrameInfo::STACK_INFO_FRAME_DATA and
358   // WindowsFrameInfo::STACK_INFO_FPO. Prefer them in this order.
359   // WindowsFrameInfo::STACK_INFO_FRAME_DATA is the newer type that
360   // includes its own program string.
361   // WindowsFrameInfo::STACK_INFO_FPO is the older type
362   // corresponding to the FPO_DATA struct. See stackwalker_x86.cc.
363   linked_ptr<WindowsFrameInfo> frame_info;
364   if ((windows_frame_info_[WindowsFrameInfo::STACK_INFO_FRAME_DATA]
365        .RetrieveRange(address, &frame_info))
366       || (windows_frame_info_[WindowsFrameInfo::STACK_INFO_FPO]
367           .RetrieveRange(address, &frame_info))) {
368     result->CopyFrom(*frame_info.get());
369     return result.release();
370   }
371 
372   // Even without a relevant STACK line, many functions contain
373   // information about how much space their parameters consume on the
374   // stack. Use RetrieveNearestRange instead of RetrieveRange, so that
375   // we can use the function to bound the extent of the PUBLIC symbol,
376   // below. However, this does mean we need to check that ADDRESS
377   // falls within the retrieved function's range; do the range
378   // comparison in an overflow-friendly way.
379   linked_ptr<Function> function;
380   MemAddr function_base, function_size;
381   if (functions_.RetrieveNearestRange(address, &function, &function_base,
382                                       NULL /* delta */, &function_size) &&
383       address >= function_base && address - function_base < function_size) {
384     result->parameter_size = function->parameter_size;
385     result->valid |= WindowsFrameInfo::VALID_PARAMETER_SIZE;
386     return result.release();
387   }
388 
389   // PUBLIC symbols might have a parameter size. Use the function we
390   // found above to limit the range the public symbol covers.
391   linked_ptr<PublicSymbol> public_symbol;
392   MemAddr public_address;
393   if (public_symbols_.Retrieve(address, &public_symbol, &public_address) &&
394       (!function.get() || public_address > function_base)) {
395     result->parameter_size = public_symbol->parameter_size;
396   }
397 
398   return NULL;
399 }
400 
FindCFIFrameInfo(const StackFrame * frame) const401 CFIFrameInfo* BasicSourceLineResolver::Module::FindCFIFrameInfo(
402     const StackFrame* frame) const {
403   MemAddr address = frame->instruction - frame->module->base_address();
404   MemAddr initial_base, initial_size;
405   string initial_rules;
406 
407   // Find the initial rule whose range covers this address. That
408   // provides an initial set of register recovery rules. Then, walk
409   // forward from the initial rule's starting address to frame's
410   // instruction address, applying delta rules.
411   if (!cfi_initial_rules_.RetrieveRange(address, &initial_rules, &initial_base,
412                                         NULL /* delta */, &initial_size)) {
413     return NULL;
414   }
415 
416   // Create a frame info structure, and populate it with the rules from
417   // the STACK CFI INIT record.
418   scoped_ptr<CFIFrameInfo> rules(new CFIFrameInfo());
419   if (!ParseCFIRuleSet(initial_rules, rules.get()))
420     return NULL;
421 
422   // Find the first delta rule that falls within the initial rule's range.
423   map<MemAddr, string>::const_iterator delta =
424     cfi_delta_rules_.lower_bound(initial_base);
425 
426   // Apply delta rules up to and including the frame's address.
427   while (delta != cfi_delta_rules_.end() && delta->first <= address) {
428     ParseCFIRuleSet(delta->second, rules.get());
429     delta++;
430   }
431 
432   return rules.release();
433 }
434 
ParseFile(char * file_line)435 bool BasicSourceLineResolver::Module::ParseFile(char* file_line) {
436   long index;
437   char* filename;
438   if (SymbolParseHelper::ParseFile(file_line, &index, &filename)) {
439     files_.insert(make_pair(index, string(filename)));
440     return true;
441   }
442   return false;
443 }
444 
ParseInlineOrigin(char * inline_origin_line)445 bool BasicSourceLineResolver::Module::ParseInlineOrigin(
446   char* inline_origin_line) {
447   bool has_file_id;
448   long origin_id;
449   long source_file_id;
450   char* origin_name;
451   if (SymbolParseHelper::ParseInlineOrigin(inline_origin_line, &has_file_id,
452                                            &origin_id, &source_file_id,
453                                            &origin_name)) {
454     inline_origins_.insert(make_pair(
455         origin_id,
456         new InlineOrigin(has_file_id, source_file_id, origin_name)));
457     return true;
458   }
459   return false;
460 }
461 
462 linked_ptr<BasicSourceLineResolver::Inline>
ParseInline(char * inline_line)463 BasicSourceLineResolver::Module::ParseInline(char* inline_line) {
464   bool has_call_site_file_id;
465   long inline_nest_level;
466   long call_site_line;
467   long call_site_file_id;
468   long origin_id;
469   vector<std::pair<MemAddr, MemAddr>> ranges;
470   if (SymbolParseHelper::ParseInline(inline_line, &has_call_site_file_id,
471                                      &inline_nest_level, &call_site_line,
472                                      &call_site_file_id, &origin_id, &ranges)) {
473     return linked_ptr<Inline>(new Inline(has_call_site_file_id,
474                                          inline_nest_level, call_site_line,
475                                          call_site_file_id, origin_id, ranges));
476   }
477   return linked_ptr<Inline>();
478 }
479 
480 BasicSourceLineResolver::Function*
ParseFunction(char * function_line)481 BasicSourceLineResolver::Module::ParseFunction(char* function_line) {
482   bool is_multiple;
483   uint64_t address;
484   uint64_t size;
485   long stack_param_size;
486   char* name;
487   if (SymbolParseHelper::ParseFunction(function_line, &is_multiple, &address,
488                                        &size, &stack_param_size, &name)) {
489     return new Function(name, address, size, stack_param_size, is_multiple);
490   }
491   return NULL;
492 }
493 
ParseLine(char * line_line)494 BasicSourceLineResolver::Line* BasicSourceLineResolver::Module::ParseLine(
495     char* line_line) {
496   uint64_t address;
497   uint64_t size;
498   long line_number;
499   long source_file;
500 
501   if (SymbolParseHelper::ParseLine(line_line, &address, &size, &line_number,
502                                    &source_file)) {
503     return new Line(address, size, source_file, line_number);
504   }
505   return NULL;
506 }
507 
ParsePublicSymbol(char * public_line)508 bool BasicSourceLineResolver::Module::ParsePublicSymbol(char* public_line) {
509   bool is_multiple;
510   uint64_t address;
511   long stack_param_size;
512   char* name;
513 
514   if (SymbolParseHelper::ParsePublicSymbol(public_line, &is_multiple, &address,
515                                            &stack_param_size, &name)) {
516     // A few public symbols show up with an address of 0.  This has been seen
517     // in the dumped output of ntdll.pdb for symbols such as _CIlog, _CIpow,
518     // RtlDescribeChunkLZNT1, and RtlReserveChunkLZNT1.  They would conflict
519     // with one another if they were allowed into the public_symbols_ map,
520     // but since the address is obviously invalid, gracefully accept them
521     // as input without putting them into the map.
522     if (address == 0) {
523       return true;
524     }
525 
526     linked_ptr<PublicSymbol> symbol(new PublicSymbol(name, address,
527                                                      stack_param_size,
528                                                      is_multiple));
529     return public_symbols_.Store(address, symbol);
530   }
531   return false;
532 }
533 
ParseStackInfo(char * stack_info_line)534 bool BasicSourceLineResolver::Module::ParseStackInfo(char* stack_info_line) {
535   // Skip "STACK " prefix.
536   stack_info_line += 6;
537 
538   // Find the token indicating what sort of stack frame walking
539   // information this is.
540   while (*stack_info_line == ' ')
541     stack_info_line++;
542   const char* platform = stack_info_line;
543   while (!strchr(kWhitespace, *stack_info_line))
544     stack_info_line++;
545   *stack_info_line++ = '\0';
546 
547   // MSVC stack frame info.
548   if (strcmp(platform, "WIN") == 0) {
549     int type = 0;
550     uint64_t rva, code_size;
551     linked_ptr<WindowsFrameInfo>
552       stack_frame_info(WindowsFrameInfo::ParseFromString(stack_info_line,
553                                                          type,
554                                                          rva,
555                                                          code_size));
556     if (stack_frame_info == NULL)
557       return false;
558 
559     // TODO(mmentovai): I wanted to use StoreRange's return value as this
560     // method's return value, but MSVC infrequently outputs stack info that
561     // violates the containment rules.  This happens with a section of code
562     // in strncpy_s in test_app.cc (testdata/minidump2).  There, problem looks
563     // like this:
564     //   STACK WIN 4 4242 1a a 0 ...  (STACK WIN 4 base size prolog 0 ...)
565     //   STACK WIN 4 4243 2e 9 0 ...
566     // ContainedRangeMap treats these two blocks as conflicting.  In reality,
567     // when the prolog lengths are taken into account, the actual code of
568     // these blocks doesn't conflict.  However, we can't take the prolog lengths
569     // into account directly here because we'd wind up with a different set
570     // of range conflicts when MSVC outputs stack info like this:
571     //   STACK WIN 4 1040 73 33 0 ...
572     //   STACK WIN 4 105a 59 19 0 ...
573     // because in both of these entries, the beginning of the code after the
574     // prolog is at 0x1073, and the last byte of contained code is at 0x10b2.
575     // Perhaps we could get away with storing ranges by rva + prolog_size
576     // if ContainedRangeMap were modified to allow replacement of
577     // already-stored values.
578 
579     windows_frame_info_[type].StoreRange(rva, code_size, stack_frame_info);
580     return true;
581   } else if (strcmp(platform, "CFI") == 0) {
582     // DWARF CFI stack frame info
583     return ParseCFIFrameInfo(stack_info_line);
584   } else {
585     // Something unrecognized.
586     return false;
587   }
588 }
589 
ParseCFIFrameInfo(char * stack_info_line)590 bool BasicSourceLineResolver::Module::ParseCFIFrameInfo(
591     char* stack_info_line) {
592   char* cursor;
593 
594   // Is this an INIT record or a delta record?
595   char* init_or_address = strtok_r(stack_info_line, " \r\n", &cursor);
596   if (!init_or_address)
597     return false;
598 
599   if (strcmp(init_or_address, "INIT") == 0) {
600     // This record has the form "STACK INIT <address> <size> <rules...>".
601     char* address_field = strtok_r(NULL, " \r\n", &cursor);
602     if (!address_field) return false;
603 
604     char* size_field = strtok_r(NULL, " \r\n", &cursor);
605     if (!size_field) return false;
606 
607     char* initial_rules = strtok_r(NULL, "\r\n", &cursor);
608     if (!initial_rules) return false;
609 
610     MemAddr address = strtoul(address_field, NULL, 16);
611     MemAddr size    = strtoul(size_field,    NULL, 16);
612     cfi_initial_rules_.StoreRange(address, size, initial_rules);
613     return true;
614   }
615 
616   // This record has the form "STACK <address> <rules...>".
617   char* address_field = init_or_address;
618   char* delta_rules = strtok_r(NULL, "\r\n", &cursor);
619   if (!delta_rules) return false;
620   MemAddr address = strtoul(address_field, NULL, 16);
621   cfi_delta_rules_[address] = delta_rules;
622   return true;
623 }
624 
AppendInline(linked_ptr<Inline> in)625 bool BasicSourceLineResolver::Function::AppendInline(linked_ptr<Inline> in) {
626   // This happends if in's parent wasn't added due to a malformed INLINE record.
627   if (in->inline_nest_level > last_added_inline_nest_level + 1)
628     return false;
629 
630   last_added_inline_nest_level = in->inline_nest_level;
631 
632   // Store all ranges into current level of inlines.
633   for (auto range : in->inline_ranges)
634     inlines.StoreRange(range.first, range.second, in);
635   return true;
636 }
637 
638 // static
ParseFile(char * file_line,long * index,char ** filename)639 bool SymbolParseHelper::ParseFile(char* file_line, long* index,
640                                   char** filename) {
641   // FILE <id> <filename>
642   assert(strncmp(file_line, "FILE ", 5) == 0);
643   file_line += 5;  // skip prefix
644 
645   vector<char*> tokens;
646   if (!Tokenize(file_line, kWhitespace, 2, &tokens)) {
647     return false;
648   }
649 
650   char* after_number;
651   *index = strtol(tokens[0], &after_number, 10);
652   if (!IsValidAfterNumber(after_number) || *index < 0 ||
653       *index == std::numeric_limits<long>::max()) {
654     return false;
655   }
656 
657   *filename = tokens[1];
658   if (!*filename) {
659     return false;
660   }
661 
662   return true;
663 }
664 
665 // static
ParseInlineOrigin(char * inline_origin_line,bool * has_file_id,long * origin_id,long * file_id,char ** name)666 bool SymbolParseHelper::ParseInlineOrigin(char* inline_origin_line,
667                                           bool* has_file_id,
668                                           long* origin_id,
669                                           long* file_id,
670                                           char** name) {
671   // Old INLINE_ORIGIN format:
672   // INLINE_ORIGIN <origin_id> <file_id> <name>
673   // New INLINE_ORIGIN format:
674   // INLINE_ORIGIN <origin_id> <name>
675   assert(strncmp(inline_origin_line, "INLINE_ORIGIN ", 14) == 0);
676   inline_origin_line += 14;  // skip prefix
677   vector<char*> tokens;
678   // Split the line into two parts so that the first token is "<origin_id>", and
679   // second token is either "<file_id> <name>"" or "<name>"" depending on the
680   // format version.
681   if (!Tokenize(inline_origin_line, kWhitespace, 2, &tokens)) {
682     return false;
683   }
684 
685   char* after_number;
686   *origin_id = strtol(tokens[0], &after_number, 10);
687   if (!IsValidAfterNumber(after_number) || *origin_id < 0 ||
688       *origin_id == std::numeric_limits<long>::max()) {
689     return false;
690   }
691 
692   // If the field after origin_id is a number, then it's old format.
693   char* remaining_line = tokens[1];
694   *has_file_id = true;
695   for (size_t i = 0;
696        i < strlen(remaining_line) && remaining_line[i] != ' ' && *has_file_id;
697        ++i) {
698     // If the file id is -1, it might be an artificial function that doesn't
699     // have file id. So, we consider -1 as a valid special case.
700     if (remaining_line[i] == '-' && i == 0) {
701       continue;
702     }
703     *has_file_id = isdigit(remaining_line[i]);
704   }
705 
706   if (*has_file_id) {
707     // If it's old format, split "<file_id> <name>" to {"<field_id>", "<name>"}.
708     if (!Tokenize(remaining_line, kWhitespace, 2, &tokens)) {
709       return false;
710     }
711     *file_id = strtol(tokens[0], &after_number, 10);
712     // If the file id is -1, it might be an artificial function that doesn't
713     // have file id. So, we consider -1 as a valid special case.
714     if (!IsValidAfterNumber(after_number) || *file_id < -1 ||
715         *file_id == std::numeric_limits<long>::max()) {
716       return false;
717     }
718   }
719 
720   *name = tokens[1];
721   if (!*name) {
722     return false;
723   }
724 
725   return true;
726 }
727 
728 // static
ParseInline(char * inline_line,bool * has_call_site_file_id,long * inline_nest_level,long * call_site_line,long * call_site_file_id,long * origin_id,vector<std::pair<MemAddr,MemAddr>> * ranges)729 bool SymbolParseHelper::ParseInline(
730     char* inline_line,
731     bool* has_call_site_file_id,
732     long* inline_nest_level,
733     long* call_site_line,
734     long* call_site_file_id,
735     long* origin_id,
736     vector<std::pair<MemAddr, MemAddr>>* ranges) {
737   // Old INLINE format:
738   // INLINE <inline_nest_level> <call_site_line> <origin_id> [<address> <size>]+
739   // New INLINE format:
740   // INLINE <inline_nest_level> <call_site_line> <call_site_file_id> <origin_id>
741   // [<address> <size>]+
742   assert(strncmp(inline_line, "INLINE ", 7) == 0);
743   inline_line += 7; // skip prefix
744 
745   vector<char*> tokens;
746   // Increase max_tokens if necessary.
747   Tokenize(inline_line, kWhitespace, 512, &tokens);
748 
749   // Determine the version of INLINE record by parity of the vector length.
750   *has_call_site_file_id = tokens.size() % 2 == 0;
751 
752   // The length of the vector should be at least 5.
753   if (tokens.size() < 5) {
754     return false;
755   }
756 
757   char* after_number;
758   size_t next_idx = 0;
759 
760   *inline_nest_level = strtol(tokens[next_idx++], &after_number, 10);
761   if (!IsValidAfterNumber(after_number) || *inline_nest_level < 0 ||
762       *inline_nest_level == std::numeric_limits<long>::max()) {
763     return false;
764   }
765 
766   *call_site_line = strtol(tokens[next_idx++], &after_number, 10);
767   if (!IsValidAfterNumber(after_number) || *call_site_line < 0 ||
768       *call_site_line == std::numeric_limits<long>::max()) {
769     return false;
770   }
771 
772   if (*has_call_site_file_id) {
773     *call_site_file_id = strtol(tokens[next_idx++], &after_number, 10);
774     // If the file id is -1, it might be an artificial function that doesn't
775     // have file id. So, we consider -1 as a valid special case.
776     if (!IsValidAfterNumber(after_number) || *call_site_file_id < -1 ||
777         *call_site_file_id == std::numeric_limits<long>::max()) {
778       return false;
779     }
780   }
781 
782   *origin_id = strtol(tokens[next_idx++], &after_number, 10);
783   if (!IsValidAfterNumber(after_number) || *origin_id < 0 ||
784       *origin_id == std::numeric_limits<long>::max()) {
785     return false;
786   }
787 
788   while (next_idx < tokens.size()) {
789     MemAddr address = strtoull(tokens[next_idx++], &after_number, 16);
790     if (!IsValidAfterNumber(after_number) ||
791         address == std::numeric_limits<unsigned long long>::max()) {
792       return false;
793     }
794     MemAddr size = strtoull(tokens[next_idx++], &after_number, 16);
795     if (!IsValidAfterNumber(after_number) ||
796         size == std::numeric_limits<unsigned long long>::max()) {
797       return false;
798     }
799     ranges->push_back({address, size});
800   }
801 
802   return true;
803 }
804 
805 // static
ParseFunction(char * function_line,bool * is_multiple,uint64_t * address,uint64_t * size,long * stack_param_size,char ** name)806 bool SymbolParseHelper::ParseFunction(char* function_line, bool* is_multiple,
807                                       uint64_t* address, uint64_t* size,
808                                       long* stack_param_size, char** name) {
809   // FUNC [<multiple>] <address> <size> <stack_param_size> <name>
810   assert(strncmp(function_line, "FUNC ", 5) == 0);
811   function_line += 5;  // skip prefix
812 
813   vector<char*> tokens;
814   if (!TokenizeWithOptionalField(function_line, "m", kWhitespace, 5, &tokens)) {
815     return false;
816   }
817 
818   *is_multiple = strcmp(tokens[0], "m") == 0;
819   int next_token = *is_multiple ? 1 : 0;
820 
821   char* after_number;
822   *address = strtoull(tokens[next_token++], &after_number, 16);
823   if (!IsValidAfterNumber(after_number) ||
824       *address == std::numeric_limits<unsigned long long>::max()) {
825     return false;
826   }
827   *size = strtoull(tokens[next_token++], &after_number, 16);
828   if (!IsValidAfterNumber(after_number) ||
829       *size == std::numeric_limits<unsigned long long>::max()) {
830     return false;
831   }
832   *stack_param_size = strtol(tokens[next_token++], &after_number, 16);
833   if (!IsValidAfterNumber(after_number) ||
834       *stack_param_size == std::numeric_limits<long>::max() ||
835       *stack_param_size < 0) {
836     return false;
837   }
838   *name = tokens[next_token++];
839 
840   return true;
841 }
842 
843 // static
ParseLine(char * line_line,uint64_t * address,uint64_t * size,long * line_number,long * source_file)844 bool SymbolParseHelper::ParseLine(char* line_line, uint64_t* address,
845                                   uint64_t* size, long* line_number,
846                                   long* source_file) {
847   // <address> <size> <line number> <source file id>
848   vector<char*> tokens;
849   if (!Tokenize(line_line, kWhitespace, 4, &tokens)) {
850     return false;
851   }
852 
853   char* after_number;
854   *address  = strtoull(tokens[0], &after_number, 16);
855   if (!IsValidAfterNumber(after_number) ||
856       *address == std::numeric_limits<unsigned long long>::max()) {
857     return false;
858   }
859   *size = strtoull(tokens[1], &after_number, 16);
860   if (!IsValidAfterNumber(after_number) ||
861       *size == std::numeric_limits<unsigned long long>::max()) {
862     return false;
863   }
864   *line_number = strtol(tokens[2], &after_number, 10);
865   if (!IsValidAfterNumber(after_number) ||
866       *line_number == std::numeric_limits<long>::max()) {
867     return false;
868   }
869   *source_file = strtol(tokens[3], &after_number, 10);
870   if (!IsValidAfterNumber(after_number) || *source_file < 0 ||
871       *source_file == std::numeric_limits<long>::max()) {
872     return false;
873   }
874 
875   // Valid line numbers normally start from 1, however there are functions that
876   // are associated with a source file but not associated with any line number
877   // (block helper function) and for such functions the symbol file contains 0
878   // for the line numbers.  Hence, 0 should be treated as a valid line number.
879   // For more information on block helper functions, please, take a look at:
880   // http://clang.llvm.org/docs/Block-ABI-Apple.html
881   if (*line_number < 0) {
882     return false;
883   }
884 
885   return true;
886 }
887 
888 // static
ParsePublicSymbol(char * public_line,bool * is_multiple,uint64_t * address,long * stack_param_size,char ** name)889 bool SymbolParseHelper::ParsePublicSymbol(char* public_line, bool* is_multiple,
890                                           uint64_t* address,
891                                           long* stack_param_size,
892                                           char** name) {
893   // PUBLIC [<multiple>] <address> <stack_param_size> <name>
894   assert(strncmp(public_line, "PUBLIC ", 7) == 0);
895   public_line += 7;  // skip prefix
896 
897   vector<char*> tokens;
898   if (!TokenizeWithOptionalField(public_line, "m", kWhitespace, 4, &tokens)) {
899     return false;
900   }
901 
902   *is_multiple = strcmp(tokens[0], "m") == 0;
903   int next_token = *is_multiple ? 1 : 0;
904 
905   char* after_number;
906   *address = strtoull(tokens[next_token++], &after_number, 16);
907   if (!IsValidAfterNumber(after_number) ||
908       *address == std::numeric_limits<unsigned long long>::max()) {
909     return false;
910   }
911   *stack_param_size = strtol(tokens[next_token++], &after_number, 16);
912   if (!IsValidAfterNumber(after_number) ||
913       *stack_param_size == std::numeric_limits<long>::max() ||
914       *stack_param_size < 0) {
915     return false;
916   }
917   *name = tokens[next_token++];
918 
919   return true;
920 }
921 
922 // static
IsValidAfterNumber(char * after_number)923 bool SymbolParseHelper::IsValidAfterNumber(char* after_number) {
924   if (after_number != NULL && strchr(kWhitespace, *after_number) != NULL) {
925     return true;
926   }
927   return false;
928 }
929 
930 }  // namespace google_breakpad
931