1*9712c20fSFrederick Mayle // Copyright (c) 2022, Google LLC 2*9712c20fSFrederick Mayle // 3*9712c20fSFrederick Mayle // Redistribution and use in source and binary forms, with or without 4*9712c20fSFrederick Mayle // modification, are permitted provided that the following conditions are 5*9712c20fSFrederick Mayle // met: 6*9712c20fSFrederick Mayle // 7*9712c20fSFrederick Mayle // * Redistributions of source code must retain the above copyright 8*9712c20fSFrederick Mayle // notice, this list of conditions and the following disclaimer. 9*9712c20fSFrederick Mayle // * Redistributions in binary form must reproduce the above 10*9712c20fSFrederick Mayle // copyright notice, this list of conditions and the following disclaimer 11*9712c20fSFrederick Mayle // in the documentation and/or other materials provided with the 12*9712c20fSFrederick Mayle // distribution. 13*9712c20fSFrederick Mayle // * Neither the name of Google LLC nor the names of its 14*9712c20fSFrederick Mayle // contributors may be used to endorse or promote products derived from 15*9712c20fSFrederick Mayle // this software without specific prior written permission. 16*9712c20fSFrederick Mayle // 17*9712c20fSFrederick Mayle // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18*9712c20fSFrederick Mayle // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19*9712c20fSFrederick Mayle // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20*9712c20fSFrederick Mayle // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21*9712c20fSFrederick Mayle // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22*9712c20fSFrederick Mayle // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23*9712c20fSFrederick Mayle // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24*9712c20fSFrederick Mayle // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25*9712c20fSFrederick Mayle // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26*9712c20fSFrederick Mayle // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27*9712c20fSFrederick Mayle // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28*9712c20fSFrederick Mayle 29*9712c20fSFrederick Mayle // disassembler_objdump.h: Disassembler that invokes objdump for disassembly. 30*9712c20fSFrederick Mayle // 31*9712c20fSFrederick Mayle // Author: Mark Brand 32*9712c20fSFrederick Mayle 33*9712c20fSFrederick Mayle #ifndef GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_OBJDUMP_H_ 34*9712c20fSFrederick Mayle #define GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_OBJDUMP_H_ 35*9712c20fSFrederick Mayle 36*9712c20fSFrederick Mayle #include <string> 37*9712c20fSFrederick Mayle 38*9712c20fSFrederick Mayle #include "common/using_std_string.h" 39*9712c20fSFrederick Mayle #include "google_breakpad/common/breakpad_types.h" 40*9712c20fSFrederick Mayle #include "google_breakpad/processor/dump_context.h" 41*9712c20fSFrederick Mayle #include "google_breakpad/processor/memory_region.h" 42*9712c20fSFrederick Mayle 43*9712c20fSFrederick Mayle namespace google_breakpad { 44*9712c20fSFrederick Mayle 45*9712c20fSFrederick Mayle // Uses objdump to disassemble a single instruction. 46*9712c20fSFrederick Mayle // 47*9712c20fSFrederick Mayle // Currently supports disassembly for x86 and x86_64 on linux hosts only; on 48*9712c20fSFrederick Mayle // unsupported platform or for unsupported architectures disassembly will fail. 49*9712c20fSFrederick Mayle // 50*9712c20fSFrederick Mayle // If disassembly is successful, then this allows extracting the instruction 51*9712c20fSFrederick Mayle // opcode, source and destination operands, and computing the source and 52*9712c20fSFrederick Mayle // destination addresses for instructions that operate on memory. 53*9712c20fSFrederick Mayle // 54*9712c20fSFrederick Mayle // Example: 55*9712c20fSFrederick Mayle // DisassemblerObjdump disassembler(context->GetContextCPU(), memory_region, 56*9712c20fSFrederick Mayle // instruction_ptr); 57*9712c20fSFrederick Mayle // if (disassembler.IsValid()) { 58*9712c20fSFrederick Mayle // uint64_t src_address = 0; 59*9712c20fSFrederick Mayle // std::cerr << disassembler.operation() << " " << disassembler.src() 60*9712c20fSFrederick Mayle // << ", " << disassembler.dest() << std::endl; 61*9712c20fSFrederick Mayle // if (disassembler.CalculateSrcAddress(*context, src_address)) { 62*9712c20fSFrederick Mayle // std::cerr << "[src_address = " << std::hex << src_address << "]\n"; 63*9712c20fSFrederick Mayle // } 64*9712c20fSFrederick Mayle // } 65*9712c20fSFrederick Mayle class DisassemblerObjdump { 66*9712c20fSFrederick Mayle public: 67*9712c20fSFrederick Mayle // Construct an ObjdumpDisassembler for the provided `cpu` type, where this is 68*9712c20fSFrederick Mayle // one of MD_CONTEXT_X86 or MD_CONTEXT_AMD64. Provided that `address` is 69*9712c20fSFrederick Mayle // within `memory_region`, and the memory referenced is a valid instruction, 70*9712c20fSFrederick Mayle // this will then be initialized with the disassembly for that instruction. 71*9712c20fSFrederick Mayle DisassemblerObjdump(uint32_t cpu, 72*9712c20fSFrederick Mayle const MemoryRegion* memory_region, 73*9712c20fSFrederick Mayle uint64_t address); 74*9712c20fSFrederick Mayle ~DisassemblerObjdump() = default; 75*9712c20fSFrederick Mayle 76*9712c20fSFrederick Mayle // If the source operand of the instruction is a memory operand, compute the 77*9712c20fSFrederick Mayle // address referred to by the operand, and store this in `address`. On success 78*9712c20fSFrederick Mayle // returns true, otherwise (if computation fails, or if the source operand is 79*9712c20fSFrederick Mayle // not a memory operand) returns false and sets `address` to 0. 80*9712c20fSFrederick Mayle bool CalculateSrcAddress(const DumpContext& context, uint64_t& address); 81*9712c20fSFrederick Mayle 82*9712c20fSFrederick Mayle // If the destination operand of the instruction is a memory operand, compute 83*9712c20fSFrederick Mayle // the address referred to by the operand, and store this in `address`. On 84*9712c20fSFrederick Mayle // success returns true, otherwise (if computation fails, or if the source 85*9712c20fSFrederick Mayle // operand is not a memory operand) returns false and sets `address` to 0. 86*9712c20fSFrederick Mayle bool CalculateDestAddress(const DumpContext& context, uint64_t& address); 87*9712c20fSFrederick Mayle 88*9712c20fSFrederick Mayle // If the instruction was disassembled successfully, this will be true. IsValid()89*9712c20fSFrederick Mayle bool IsValid() const { return operation_.size() != 0; } 90*9712c20fSFrederick Mayle 91*9712c20fSFrederick Mayle // Returns the operation part of the disassembly, without any prefixes: 92*9712c20fSFrederick Mayle // "pop" eax 93*9712c20fSFrederick Mayle // lock "xchg" eax, edx operation()94*9712c20fSFrederick Mayle const string& operation() const { return operation_; } 95*9712c20fSFrederick Mayle 96*9712c20fSFrederick Mayle // Returns the destination operand of the disassembly, without memory operand 97*9712c20fSFrederick Mayle // size prefixes: 98*9712c20fSFrederick Mayle // mov DWORD PTR "[rax + 16]", edx dest()99*9712c20fSFrederick Mayle const string& dest() const { return dest_; } 100*9712c20fSFrederick Mayle 101*9712c20fSFrederick Mayle // Returns the source operand of the disassembly, without memory operand 102*9712c20fSFrederick Mayle // size prefixes: 103*9712c20fSFrederick Mayle // mov rax, QWORD PTR "[rdx]" src()104*9712c20fSFrederick Mayle const string& src() const { return src_; } 105*9712c20fSFrederick Mayle 106*9712c20fSFrederick Mayle private: 107*9712c20fSFrederick Mayle friend class DisassemblerObjdumpForTest; 108*9712c20fSFrederick Mayle 109*9712c20fSFrederick Mayle // Writes out the provided `raw_bytes` to a temporary file, and executes objdump 110*9712c20fSFrederick Mayle // to disassemble according to `cpu`, which must be either MD_CONTEXT_X86 or 111*9712c20fSFrederick Mayle // MD_CONTEXT_AMD64. Once objdump has completed, parses out the instruction 112*9712c20fSFrederick Mayle // string from the first instruction in the output and stores it in 113*9712c20fSFrederick Mayle // `instruction`. 114*9712c20fSFrederick Mayle static bool DisassembleInstruction(uint32_t cpu, const uint8_t* raw_bytes, 115*9712c20fSFrederick Mayle unsigned int raw_bytes_len, 116*9712c20fSFrederick Mayle string& instruction); 117*9712c20fSFrederick Mayle 118*9712c20fSFrederick Mayle // Splits an `instruction` into three parts, the "main" `operation` and 119*9712c20fSFrederick Mayle // the `dest` and `src` operands. 120*9712c20fSFrederick Mayle // Example: 121*9712c20fSFrederick Mayle // instruction = "lock cmpxchg QWORD PTR [rdi], rsi" 122*9712c20fSFrederick Mayle // operation = "cmpxchg", dest = "[rdi]", src = "rsi" 123*9712c20fSFrederick Mayle static bool TokenizeInstruction(const string& instruction, string& operation, 124*9712c20fSFrederick Mayle string& dest, string& src); 125*9712c20fSFrederick Mayle 126*9712c20fSFrederick Mayle // Compute the address referenced by `expression` in `context`. 127*9712c20fSFrederick Mayle // Supports memory operands in the form 128*9712c20fSFrederick Mayle // (segment:)[base_reg(+index_reg*index_stride)(+-offset)] 129*9712c20fSFrederick Mayle // Returns false if evaluation fails, or if the operand is not a supported 130*9712c20fSFrederick Mayle // memory operand. 131*9712c20fSFrederick Mayle static bool CalculateAddress(const DumpContext& context, 132*9712c20fSFrederick Mayle const string& expression, 133*9712c20fSFrederick Mayle uint64_t& address); 134*9712c20fSFrederick Mayle 135*9712c20fSFrederick Mayle // The parsed components of the disassembly for the instruction. 136*9712c20fSFrederick Mayle string operation_ = ""; 137*9712c20fSFrederick Mayle string dest_ = ""; 138*9712c20fSFrederick Mayle string src_ = ""; 139*9712c20fSFrederick Mayle }; 140*9712c20fSFrederick Mayle } // namespace google_breakpad 141*9712c20fSFrederick Mayle 142*9712c20fSFrederick Mayle #endif // GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_OBJDUMP_H_