xref: /aosp_15_r20/external/google-breakpad/src/processor/disassembler_objdump.cc (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1 // Copyright (c) 2022, Google LLC
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 //     * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 //     * Neither the name of Google LLC nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 // disassembler_objdump.: Disassembler that invokes objdump for disassembly.
30 //
31 // Author: Mark Brand
32 
33 #ifdef HAVE_CONFIG_H
34 #include <config.h>  // Must come first
35 #endif
36 
37 #include "processor/disassembler_objdump.h"
38 
39 #include <unistd.h>
40 #include <sys/wait.h>
41 
42 #include <array>
43 #include <fstream>
44 #include <iostream>
45 #include <iterator>
46 #include <regex>
47 #include <sstream>
48 #include <vector>
49 
50 #include "common/linux/eintr_wrapper.h"
51 #include "common/linux/scoped_pipe.h"
52 #include "common/linux/scoped_tmpfile.h"
53 #include "processor/logging.h"
54 
55 namespace google_breakpad {
56 namespace {
57 
58 const size_t kMaxX86InstructionLength = 15;
59 
IsInstructionPrefix(const string & token)60 bool IsInstructionPrefix(const string& token) {
61   if (token == "lock" || token == "rep" || token == "repz" ||
62       token == "repnz") {
63     return true;
64   }
65   return false;
66 }
67 
IsOperandSize(const string & token)68 bool IsOperandSize(const string& token) {
69   if (token == "BYTE" || token == "WORD" || token == "DWORD" ||
70       token == "QWORD" || token == "PTR") {
71     return true;
72   }
73   return false;
74 }
75 
GetSegmentAddressX86(const DumpContext & context,string segment_name,uint64_t & address)76 bool GetSegmentAddressX86(const DumpContext& context, string segment_name,
77                           uint64_t& address) {
78   if (segment_name == "ds") {
79     address = context.GetContextX86()->ds;
80   } else if (segment_name == "es") {
81     address = context.GetContextX86()->es;
82   } else if (segment_name == "fs") {
83     address = context.GetContextX86()->fs;
84   } else if (segment_name == "gs") {
85     address = context.GetContextX86()->gs;
86   } else {
87     BPLOG(ERROR) << "Unsupported segment register: " << segment_name;
88     return false;
89   }
90 
91   return true;
92 }
93 
GetSegmentAddressAMD64(const DumpContext & context,string segment_name,uint64_t & address)94 bool GetSegmentAddressAMD64(const DumpContext& context, string segment_name,
95                             uint64_t& address) {
96   if (segment_name == "ds") {
97     address = 0;
98   } else if (segment_name == "es") {
99     address = 0;
100   } else {
101     BPLOG(ERROR) << "Unsupported segment register: " << segment_name;
102     return false;
103   }
104 
105   return true;
106 }
107 
GetSegmentAddress(const DumpContext & context,string segment_name,uint64_t & address)108 bool GetSegmentAddress(const DumpContext& context, string segment_name,
109                        uint64_t& address) {
110   if (context.GetContextCPU() == MD_CONTEXT_X86) {
111     return GetSegmentAddressX86(context, segment_name, address);
112   } else if (context.GetContextCPU() == MD_CONTEXT_AMD64) {
113     return GetSegmentAddressAMD64(context, segment_name, address);
114   } else {
115     BPLOG(ERROR) << "Unsupported architecture for GetSegmentAddress\n";
116     return false;
117   }
118 }
119 
GetRegisterValueX86(const DumpContext & context,string register_name,uint64_t & value)120 bool GetRegisterValueX86(const DumpContext& context, string register_name,
121                          uint64_t& value) {
122   if (register_name == "eax") {
123     value = context.GetContextX86()->eax;
124   } else if (register_name == "ebx") {
125     value = context.GetContextX86()->ebx;
126   } else if (register_name == "ecx") {
127     value = context.GetContextX86()->ecx;
128   } else if (register_name == "edx") {
129     value = context.GetContextX86()->edx;
130   } else if (register_name == "edi") {
131     value = context.GetContextX86()->edi;
132   } else if (register_name == "esi") {
133     value = context.GetContextX86()->esi;
134   } else if (register_name == "ebp") {
135     value = context.GetContextX86()->ebp;
136   } else if (register_name == "esp") {
137     value = context.GetContextX86()->esp;
138   } else if (register_name == "eip") {
139     value = context.GetContextX86()->eip;
140   } else {
141     BPLOG(ERROR) << "Unsupported register: " << register_name;
142     return false;
143   }
144 
145   return true;
146 }
147 
GetRegisterValueAMD64(const DumpContext & context,string register_name,uint64_t & value)148 bool GetRegisterValueAMD64(const DumpContext& context, string register_name,
149                            uint64_t& value) {
150   if (register_name == "rax") {
151     value = context.GetContextAMD64()->rax;
152   } else if (register_name == "rbx") {
153     value = context.GetContextAMD64()->rbx;
154   } else if (register_name == "rcx") {
155     value = context.GetContextAMD64()->rcx;
156   } else if (register_name == "rdx") {
157     value = context.GetContextAMD64()->rdx;
158   } else if (register_name == "rdi") {
159     value = context.GetContextAMD64()->rdi;
160   } else if (register_name == "rsi") {
161     value = context.GetContextAMD64()->rsi;
162   } else if (register_name == "rbp") {
163     value = context.GetContextAMD64()->rbp;
164   } else if (register_name == "rsp") {
165     value = context.GetContextAMD64()->rsp;
166   } else if (register_name == "r8") {
167     value = context.GetContextAMD64()->r8;
168   } else if (register_name == "r9") {
169     value = context.GetContextAMD64()->r9;
170   } else if (register_name == "r10") {
171     value = context.GetContextAMD64()->r10;
172   } else if (register_name == "r11") {
173     value = context.GetContextAMD64()->r11;
174   } else if (register_name == "r12") {
175     value = context.GetContextAMD64()->r12;
176   } else if (register_name == "r13") {
177     value = context.GetContextAMD64()->r13;
178   } else if (register_name == "r14") {
179     value = context.GetContextAMD64()->r14;
180   } else if (register_name == "r15") {
181     value = context.GetContextAMD64()->r15;
182   } else if (register_name == "rip") {
183     value = context.GetContextAMD64()->rip;
184   } else {
185     BPLOG(ERROR) << "Unsupported register: " << register_name;
186     return false;
187   }
188 
189   return true;
190 }
191 
192 // Lookup the value of `register_name` in `context`, store it into `value` on
193 // success.
194 // Support for non-full-size registers not implemented, since we're only using
195 // this to evaluate address expressions.
GetRegisterValue(const DumpContext & context,string register_name,uint64_t & value)196 bool GetRegisterValue(const DumpContext& context, string register_name,
197                       uint64_t& value) {
198   if (context.GetContextCPU() == MD_CONTEXT_X86) {
199     return GetRegisterValueX86(context, register_name, value);
200   } else if (context.GetContextCPU() == MD_CONTEXT_AMD64) {
201     return GetRegisterValueAMD64(context, register_name, value);
202   } else {
203     BPLOG(ERROR) << "Unsupported architecture for GetRegisterValue\n";
204     return false;
205   }
206 }
207 }  // namespace
208 
209 // static
DisassembleInstruction(uint32_t cpu,const uint8_t * raw_bytes,unsigned int raw_bytes_len,string & instruction)210 bool DisassemblerObjdump::DisassembleInstruction(uint32_t cpu,
211                                                  const uint8_t* raw_bytes,
212                                                  unsigned int raw_bytes_len,
213                                                  string& instruction) {
214   // Always initialize outputs
215   instruction = "";
216 
217   if (!raw_bytes || raw_bytes_len == 0) {
218     // There's no need to perform any operation in this case, as there's
219     // clearly no instruction there.
220     return false;
221   }
222 
223   string architecture;
224   if (cpu == MD_CONTEXT_X86) {
225     architecture = "i386";
226   } else if (cpu == MD_CONTEXT_AMD64) {
227     architecture = "i386:x86-64";
228   } else {
229     BPLOG(ERROR) << "Unsupported architecture.";
230     return false;
231   }
232 
233   // Create a temporary file for the raw instruction bytes to pass to
234   // objdump, and write the bytes to the input file.
235   ScopedTmpFile raw_bytes_file;
236   if (!raw_bytes_file.InitData(raw_bytes, raw_bytes_len)) {
237     BPLOG(ERROR) << "Failed creating temporary file.";
238     return false;
239   }
240 
241   // Create a pipe to use to read the disassembly back from objdump.
242   ScopedPipe disassembly_pipe;
243   if (!disassembly_pipe.Init()) {
244     BPLOG(ERROR) << "Failed creating pipe for output.";
245     return false;
246   }
247 
248   pid_t child_pid = fork();
249   if (child_pid < 0) {
250     BPLOG(ERROR) << "Fork failed.";
251     return false;
252   }
253 
254   if (child_pid == 0) {
255     // In the child process, set up the input and output file descriptors.
256     if (dup2(raw_bytes_file.GetFd(), STDIN_FILENO) < 0 ||
257         disassembly_pipe.Dup2WriteFd(STDOUT_FILENO) < 0 ||
258         disassembly_pipe.Dup2WriteFd(STDERR_FILENO) < 0) {
259       BPLOG(ERROR) << "Failed dup'ing file descriptors.";
260       exit(-1);
261     }
262 
263     // We need to close the read end of the pipe in the child process so that
264     // when the parent closes it, the pipe is disconnected.
265     disassembly_pipe.CloseReadFd();
266 
267     // We use "/proc/self/fd/0" here to allow objdump to parse an unnamed file,
268     // since objdump does not have a mode to read from stdin. This cannot be
269     // used with a pipe, since objdump requires that the input is a standard
270     // file.
271     execlp("objdump", "objdump", "-D", "--no-show-raw-insn", "-b", "binary",
272            "-M", "intel", "-m", architecture.c_str(), "/proc/self/fd/0",
273            nullptr);
274 
275     BPLOG(ERROR) << "Failed to exec objdump.";
276     exit(-1);
277   } else {
278     // In the parent process, parse the objdump output.
279 
280     // Match the instruction line, from:
281     //    0:        lock cmpxchg DWORD PTR [esi+0x10],eax
282     // extract the string "lock cmpxchg DWORD PTR [esi+0x10],eax"
283     std::regex instruction_regex(
284         "^\\s+[0-9a-f]+:\\s+"  // "   0:"
285         "((?:\\s*\\S*)+)$");   // "lock cmpxchg..."
286 
287     std::string line;
288     std::smatch match;
289     while (disassembly_pipe.ReadLine(line)) {
290       if (std::regex_match(line, match, instruction_regex)) {
291         instruction = match[1].str();
292         break;
293       }
294     }
295 
296     // Close the read pipe so that objdump will exit (in case we broke out of
297     // the loop above before reading all of the output).
298     disassembly_pipe.CloseReadFd();
299 
300     // Now wait for objdump to exit.
301     int status = 0;
302     HANDLE_EINTR(waitpid(child_pid, &status, 0));
303 
304     if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
305       BPLOG(ERROR) << "objdump didn't run successfully.";
306       return false;
307     }
308 
309     if (instruction == "") {
310       BPLOG(ERROR) << "Failed to find instruction in objdump output.";
311       return false;
312     }
313   }
314 
315   return true;
316 }
317 
318 // static
TokenizeInstruction(const string & instruction,string & operation,string & dest,string & src)319 bool DisassemblerObjdump::TokenizeInstruction(const string& instruction,
320                                               string& operation, string& dest,
321                                               string& src) {
322   // Always initialize outputs.
323   operation = "";
324   dest = "";
325   src = "";
326 
327   // Split the instruction into tokens by either whitespace or comma.
328   std::regex token_regex("((?:[^\\s,]+)|,)(?:\\s)*");
329   std::sregex_iterator tokens_begin(instruction.begin(), instruction.end(),
330                                     token_regex);
331 
332   bool found_comma = false;
333   for (auto tokens_iter = tokens_begin; tokens_iter != std::sregex_iterator();
334        ++tokens_iter) {
335     auto token = (*tokens_iter)[1].str();
336     if (operation.size() == 0) {
337       if (IsInstructionPrefix(token))
338         continue;
339       operation = token;
340     } else if (dest.size() == 0) {
341       if (IsOperandSize(token))
342         continue;
343       dest = token;
344     } else if (!found_comma) {
345       if (token == ",") {
346         found_comma = true;
347       } else {
348         BPLOG(ERROR) << "Failed to parse operands from objdump output, expected"
349                         " comma but found \""
350                      << token << "\"";
351         return false;
352       }
353     } else if (src.size() == 0) {
354       if (IsOperandSize(token))
355         continue;
356       src = token;
357     } else {
358       if (token == ",") {
359         BPLOG(ERROR) << "Failed to parse operands from objdump output, found "
360                         "unexpected comma after last operand.";
361         return false;
362       } else {
363         // We just ignore other junk after the last operand unless it's a
364         // comma, which would indicate we're probably still in the middle
365         // of the operands and something has gone wrong
366       }
367     }
368   }
369 
370   if (found_comma && src.size() == 0) {
371     BPLOG(ERROR) << "Failed to parse operands from objdump output, found comma "
372                     "but no src operand.";
373     return false;
374   }
375 
376   return true;
377 }
378 
379 // static
CalculateAddress(const DumpContext & context,const string & expression,uint64_t & address)380 bool DisassemblerObjdump::CalculateAddress(const DumpContext& context,
381                                            const string& expression,
382                                            uint64_t& address) {
383   address = 0;
384 
385   // Extract the components of the expression.
386   // fs:[esi+edi*4+0x80] -> ["fs", "esi", "edi", "4", "-", "0x80"]
387   std::regex expression_regex(
388       "^(?:(\\ws):)?"                // "fs:"
389       "\\[(\\w+)"                    // "[esi"
390       "(?:\\+(\\w+)(?:\\*(\\d+)))?"  // "+edi*4"
391       "(?:([\\+-])(0x[0-9a-f]+))?"   // "-0x80"
392       "\\]$");                       // "]"
393 
394   std::smatch match;
395   if (!std::regex_match(expression, match, expression_regex) ||
396       match.size() != 7) {
397     return false;
398   }
399 
400   string segment_name = match[1].str();
401   string register_name = match[2].str();
402   string index_name = match[3].str();
403   string index_stride = match[4].str();
404   string offset_sign = match[5].str();
405   string offset = match[6].str();
406 
407   uint64_t segment_address = 0;
408   uint64_t register_value = 0;
409   uint64_t index_value = 0;
410   uint64_t index_stride_value = 1;
411   uint64_t offset_value = 0;
412 
413   if (segment_name.size() &&
414       !GetSegmentAddress(context, segment_name, segment_address)) {
415     return false;
416   }
417 
418   if (!GetRegisterValue(context, register_name, register_value)) {
419     return false;
420   }
421 
422   if (index_name.size() &&
423       !GetRegisterValue(context, index_name, index_value)) {
424     return false;
425   }
426 
427   if (index_stride.size()) {
428     index_stride_value = strtoull(index_stride.c_str(), nullptr, 0);
429   }
430 
431   if (offset.size()) {
432     offset_value = strtoull(offset.c_str(), nullptr, 0);
433   }
434 
435   address =
436       segment_address + register_value + (index_value * index_stride_value);
437   if (offset_sign == "+") {
438     address += offset_value;
439   } else if (offset_sign == "-") {
440     address -= offset_value;
441   }
442 
443   return true;
444 }
445 
DisassemblerObjdump(const uint32_t cpu,const MemoryRegion * memory_region,uint64_t address)446 DisassemblerObjdump::DisassemblerObjdump(const uint32_t cpu,
447                                          const MemoryRegion* memory_region,
448                                          uint64_t address) {
449   if (address < memory_region->GetBase() ||
450       memory_region->GetBase() + memory_region->GetSize() <= address) {
451     return;
452   }
453 
454   uint8_t ip_bytes[kMaxX86InstructionLength] = {0};
455   size_t ip_bytes_length;
456   for (ip_bytes_length = 0; ip_bytes_length < kMaxX86InstructionLength;
457        ++ip_bytes_length) {
458     // We have to read byte-by-byte here, since we still want to try and
459     // disassemble an instruction even if we don't have enough bytes.
460     if (!memory_region->GetMemoryAtAddress(address + ip_bytes_length,
461                                            &ip_bytes[ip_bytes_length])) {
462       break;
463     }
464   }
465 
466   string instruction;
467   if (!DisassembleInstruction(cpu, ip_bytes, kMaxX86InstructionLength,
468                               instruction)) {
469     return;
470   }
471 
472   if (!TokenizeInstruction(instruction, operation_, dest_, src_)) {
473     return;
474   }
475 }
476 
CalculateSrcAddress(const DumpContext & context,uint64_t & address)477 bool DisassemblerObjdump::CalculateSrcAddress(const DumpContext& context,
478                                               uint64_t& address) {
479   return CalculateAddress(context, src_, address);
480 }
481 
CalculateDestAddress(const DumpContext & context,uint64_t & address)482 bool DisassemblerObjdump::CalculateDestAddress(const DumpContext& context,
483                                                uint64_t& address) {
484   return CalculateAddress(context, dest_, address);
485 }
486 
487 }  // namespace google_breakpad