xref: /aosp_15_r20/external/google-breakpad/src/processor/disassembler_objdump.h (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1*9712c20fSFrederick Mayle // Copyright (c) 2022, Google LLC
2*9712c20fSFrederick Mayle //
3*9712c20fSFrederick Mayle // Redistribution and use in source and binary forms, with or without
4*9712c20fSFrederick Mayle // modification, are permitted provided that the following conditions are
5*9712c20fSFrederick Mayle // met:
6*9712c20fSFrederick Mayle //
7*9712c20fSFrederick Mayle //     * Redistributions of source code must retain the above copyright
8*9712c20fSFrederick Mayle // notice, this list of conditions and the following disclaimer.
9*9712c20fSFrederick Mayle //     * Redistributions in binary form must reproduce the above
10*9712c20fSFrederick Mayle // copyright notice, this list of conditions and the following disclaimer
11*9712c20fSFrederick Mayle // in the documentation and/or other materials provided with the
12*9712c20fSFrederick Mayle // distribution.
13*9712c20fSFrederick Mayle //     * Neither the name of Google LLC nor the names of its
14*9712c20fSFrederick Mayle // contributors may be used to endorse or promote products derived from
15*9712c20fSFrederick Mayle // this software without specific prior written permission.
16*9712c20fSFrederick Mayle //
17*9712c20fSFrederick Mayle // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18*9712c20fSFrederick Mayle // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19*9712c20fSFrederick Mayle // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20*9712c20fSFrederick Mayle // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21*9712c20fSFrederick Mayle // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22*9712c20fSFrederick Mayle // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23*9712c20fSFrederick Mayle // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24*9712c20fSFrederick Mayle // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25*9712c20fSFrederick Mayle // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26*9712c20fSFrederick Mayle // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27*9712c20fSFrederick Mayle // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*9712c20fSFrederick Mayle 
29*9712c20fSFrederick Mayle // disassembler_objdump.h: Disassembler that invokes objdump for disassembly.
30*9712c20fSFrederick Mayle //
31*9712c20fSFrederick Mayle // Author: Mark Brand
32*9712c20fSFrederick Mayle 
33*9712c20fSFrederick Mayle #ifndef GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_OBJDUMP_H_
34*9712c20fSFrederick Mayle #define GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_OBJDUMP_H_
35*9712c20fSFrederick Mayle 
36*9712c20fSFrederick Mayle #include <string>
37*9712c20fSFrederick Mayle 
38*9712c20fSFrederick Mayle #include "common/using_std_string.h"
39*9712c20fSFrederick Mayle #include "google_breakpad/common/breakpad_types.h"
40*9712c20fSFrederick Mayle #include "google_breakpad/processor/dump_context.h"
41*9712c20fSFrederick Mayle #include "google_breakpad/processor/memory_region.h"
42*9712c20fSFrederick Mayle 
43*9712c20fSFrederick Mayle namespace google_breakpad {
44*9712c20fSFrederick Mayle 
45*9712c20fSFrederick Mayle // Uses objdump to disassemble a single instruction.
46*9712c20fSFrederick Mayle //
47*9712c20fSFrederick Mayle // Currently supports disassembly for x86 and x86_64 on linux hosts only; on
48*9712c20fSFrederick Mayle // unsupported platform or for unsupported architectures disassembly will fail.
49*9712c20fSFrederick Mayle //
50*9712c20fSFrederick Mayle // If disassembly is successful, then this allows extracting the instruction
51*9712c20fSFrederick Mayle // opcode, source and destination operands, and computing the source and
52*9712c20fSFrederick Mayle // destination addresses for instructions that operate on memory.
53*9712c20fSFrederick Mayle //
54*9712c20fSFrederick Mayle // Example:
55*9712c20fSFrederick Mayle //   DisassemblerObjdump disassembler(context->GetContextCPU(), memory_region,
56*9712c20fSFrederick Mayle //                                    instruction_ptr);
57*9712c20fSFrederick Mayle //   if (disassembler.IsValid()) {
58*9712c20fSFrederick Mayle //     uint64_t src_address = 0;
59*9712c20fSFrederick Mayle //     std::cerr << disassembler.operation() << " " << disassembler.src()
60*9712c20fSFrederick Mayle //               << ", " << disassembler.dest() << std::endl;
61*9712c20fSFrederick Mayle //     if (disassembler.CalculateSrcAddress(*context, src_address)) {
62*9712c20fSFrederick Mayle //       std::cerr << "[src_address = " << std::hex << src_address << "]\n";
63*9712c20fSFrederick Mayle //     }
64*9712c20fSFrederick Mayle //   }
65*9712c20fSFrederick Mayle class DisassemblerObjdump {
66*9712c20fSFrederick Mayle  public:
67*9712c20fSFrederick Mayle   // Construct an ObjdumpDisassembler for the provided `cpu` type, where this is
68*9712c20fSFrederick Mayle   // one of MD_CONTEXT_X86 or MD_CONTEXT_AMD64. Provided that `address` is
69*9712c20fSFrederick Mayle   // within `memory_region`, and the memory referenced is a valid instruction,
70*9712c20fSFrederick Mayle   // this will then be initialized with the disassembly for that instruction.
71*9712c20fSFrederick Mayle   DisassemblerObjdump(uint32_t cpu,
72*9712c20fSFrederick Mayle                       const MemoryRegion* memory_region,
73*9712c20fSFrederick Mayle                       uint64_t address);
74*9712c20fSFrederick Mayle   ~DisassemblerObjdump() = default;
75*9712c20fSFrederick Mayle 
76*9712c20fSFrederick Mayle   // If the source operand of the instruction is a memory operand, compute the
77*9712c20fSFrederick Mayle   // address referred to by the operand, and store this in `address`. On success
78*9712c20fSFrederick Mayle   // returns true, otherwise (if computation fails, or if the source operand is
79*9712c20fSFrederick Mayle   // not a memory operand) returns false and sets `address` to 0.
80*9712c20fSFrederick Mayle   bool CalculateSrcAddress(const DumpContext& context, uint64_t& address);
81*9712c20fSFrederick Mayle 
82*9712c20fSFrederick Mayle   // If the destination operand of the instruction is a memory operand, compute
83*9712c20fSFrederick Mayle   // the address referred to by the operand, and store this in `address`. On
84*9712c20fSFrederick Mayle   // success returns true, otherwise (if computation fails, or if the source
85*9712c20fSFrederick Mayle   // operand is not a memory operand) returns false and sets `address` to 0.
86*9712c20fSFrederick Mayle   bool CalculateDestAddress(const DumpContext& context, uint64_t& address);
87*9712c20fSFrederick Mayle 
88*9712c20fSFrederick Mayle   // If the instruction was disassembled successfully, this will be true.
IsValid()89*9712c20fSFrederick Mayle   bool IsValid() const { return operation_.size() != 0; }
90*9712c20fSFrederick Mayle 
91*9712c20fSFrederick Mayle   // Returns the operation part of the disassembly, without any prefixes:
92*9712c20fSFrederick Mayle   //   "pop" eax
93*9712c20fSFrederick Mayle   //   lock "xchg" eax, edx
operation()94*9712c20fSFrederick Mayle   const string& operation() const { return operation_; }
95*9712c20fSFrederick Mayle 
96*9712c20fSFrederick Mayle   // Returns the destination operand of the disassembly, without memory operand
97*9712c20fSFrederick Mayle   // size prefixes:
98*9712c20fSFrederick Mayle   //   mov DWORD PTR "[rax + 16]", edx
dest()99*9712c20fSFrederick Mayle   const string& dest() const { return dest_; }
100*9712c20fSFrederick Mayle 
101*9712c20fSFrederick Mayle   // Returns the source operand of the disassembly, without memory operand
102*9712c20fSFrederick Mayle   // size prefixes:
103*9712c20fSFrederick Mayle   //   mov rax, QWORD PTR "[rdx]"
src()104*9712c20fSFrederick Mayle   const string& src() const { return src_; }
105*9712c20fSFrederick Mayle 
106*9712c20fSFrederick Mayle  private:
107*9712c20fSFrederick Mayle   friend class DisassemblerObjdumpForTest;
108*9712c20fSFrederick Mayle 
109*9712c20fSFrederick Mayle   // Writes out the provided `raw_bytes` to a temporary file, and executes objdump
110*9712c20fSFrederick Mayle   // to disassemble according to `cpu`, which must be either MD_CONTEXT_X86 or
111*9712c20fSFrederick Mayle   // MD_CONTEXT_AMD64. Once objdump has completed, parses out the instruction
112*9712c20fSFrederick Mayle   // string from the first instruction in the output and stores it in
113*9712c20fSFrederick Mayle   // `instruction`.
114*9712c20fSFrederick Mayle   static bool DisassembleInstruction(uint32_t cpu, const uint8_t* raw_bytes,
115*9712c20fSFrederick Mayle                                      unsigned int raw_bytes_len,
116*9712c20fSFrederick Mayle                                      string& instruction);
117*9712c20fSFrederick Mayle 
118*9712c20fSFrederick Mayle   // Splits an `instruction` into three parts, the "main" `operation` and
119*9712c20fSFrederick Mayle   // the `dest` and `src` operands.
120*9712c20fSFrederick Mayle   // Example:
121*9712c20fSFrederick Mayle   //   instruction = "lock cmpxchg QWORD PTR [rdi], rsi"
122*9712c20fSFrederick Mayle   //   operation = "cmpxchg", dest = "[rdi]", src = "rsi"
123*9712c20fSFrederick Mayle   static bool TokenizeInstruction(const string& instruction, string& operation,
124*9712c20fSFrederick Mayle                                   string& dest, string& src);
125*9712c20fSFrederick Mayle 
126*9712c20fSFrederick Mayle   // Compute the address referenced by `expression` in `context`.
127*9712c20fSFrederick Mayle   // Supports memory operands in the form
128*9712c20fSFrederick Mayle   //   (segment:)[base_reg(+index_reg*index_stride)(+-offset)]
129*9712c20fSFrederick Mayle   // Returns false if evaluation fails, or if the operand is not a supported
130*9712c20fSFrederick Mayle   // memory operand.
131*9712c20fSFrederick Mayle   static bool CalculateAddress(const DumpContext& context,
132*9712c20fSFrederick Mayle                                const string& expression,
133*9712c20fSFrederick Mayle                                uint64_t& address);
134*9712c20fSFrederick Mayle 
135*9712c20fSFrederick Mayle   // The parsed components of the disassembly for the instruction.
136*9712c20fSFrederick Mayle   string operation_ = "";
137*9712c20fSFrederick Mayle   string dest_ = "";
138*9712c20fSFrederick Mayle   string src_ = "";
139*9712c20fSFrederick Mayle };
140*9712c20fSFrederick Mayle }  // namespace google_breakpad
141*9712c20fSFrederick Mayle 
142*9712c20fSFrederick Mayle #endif  // GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_OBJDUMP_H_