xref: /aosp_15_r20/tools/dexter/slicer/dex_bytecode.cc (revision f0dffb02cdb5c647d21204e89a92a1ffae2dad87)
1*f0dffb02SXin Li /*
2*f0dffb02SXin Li  * Copyright (C) 2017 The Android Open Source Project
3*f0dffb02SXin Li  *
4*f0dffb02SXin Li  * Licensed under the Apache License, Version 2.0 (the "License");
5*f0dffb02SXin Li  * you may not use this file except in compliance with the License.
6*f0dffb02SXin Li  * You may obtain a copy of the License at
7*f0dffb02SXin Li  *
8*f0dffb02SXin Li  *      http://www.apache.org/licenses/LICENSE-2.0
9*f0dffb02SXin Li  *
10*f0dffb02SXin Li  * Unless required by applicable law or agreed to in writing, software
11*f0dffb02SXin Li  * distributed under the License is distributed on an "AS IS" BASIS,
12*f0dffb02SXin Li  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*f0dffb02SXin Li  * See the License for the specific language governing permissions and
14*f0dffb02SXin Li  * limitations under the License.
15*f0dffb02SXin Li  */
16*f0dffb02SXin Li 
17*f0dffb02SXin Li #include "slicer/dex_bytecode.h"
18*f0dffb02SXin Li 
19*f0dffb02SXin Li #include "slicer/common.h"
20*f0dffb02SXin Li 
21*f0dffb02SXin Li #include <array>
22*f0dffb02SXin Li #include <iomanip>
23*f0dffb02SXin Li #include <sstream>
24*f0dffb02SXin Li 
25*f0dffb02SXin Li namespace dex {
26*f0dffb02SXin Li 
OpcodeFromBytecode(u2 bytecode)27*f0dffb02SXin Li Opcode OpcodeFromBytecode(u2 bytecode) {
28*f0dffb02SXin Li   Opcode opcode = Opcode(bytecode & 0xff);
29*f0dffb02SXin Li   return opcode;
30*f0dffb02SXin Li }
31*f0dffb02SXin Li 
32*f0dffb02SXin Li // Table that maps each opcode to the index type implied by that opcode
33*f0dffb02SXin Li static constexpr std::array<InstructionDescriptor, kNumPackedOpcodes>
34*f0dffb02SXin Li     gInstructionDescriptors = {{
35*f0dffb02SXin Li #define INSTRUCTION_DESCR(o, c, p, format, index, flags, e, vflags) \
36*f0dffb02SXin Li   {                                                                 \
37*f0dffb02SXin Li       vflags,                                                       \
38*f0dffb02SXin Li       format,                                                       \
39*f0dffb02SXin Li       index,                                                        \
40*f0dffb02SXin Li       flags,                                                        \
41*f0dffb02SXin Li   },
42*f0dffb02SXin Li #include "export/slicer/dex_instruction_list.h"
43*f0dffb02SXin Li         DEX_INSTRUCTION_LIST(INSTRUCTION_DESCR)
44*f0dffb02SXin Li #undef DEX_INSTRUCTION_LIST
45*f0dffb02SXin Li #undef INSTRUCTION_DESCR
46*f0dffb02SXin Li     }};
47*f0dffb02SXin Li 
GetIndexTypeFromOpcode(Opcode opcode)48*f0dffb02SXin Li InstructionIndexType GetIndexTypeFromOpcode(Opcode opcode) {
49*f0dffb02SXin Li   return gInstructionDescriptors[opcode].index_type;
50*f0dffb02SXin Li }
51*f0dffb02SXin Li 
GetFormatFromOpcode(Opcode opcode)52*f0dffb02SXin Li InstructionFormat GetFormatFromOpcode(Opcode opcode) {
53*f0dffb02SXin Li   return gInstructionDescriptors[opcode].format;
54*f0dffb02SXin Li }
55*f0dffb02SXin Li 
GetFlagsFromOpcode(Opcode opcode)56*f0dffb02SXin Li OpcodeFlags GetFlagsFromOpcode(Opcode opcode) {
57*f0dffb02SXin Li   return gInstructionDescriptors[opcode].flags;
58*f0dffb02SXin Li }
59*f0dffb02SXin Li 
GetVerifyFlagsFromOpcode(Opcode opcode)60*f0dffb02SXin Li VerifyFlags GetVerifyFlagsFromOpcode(Opcode opcode) {
61*f0dffb02SXin Li   return gInstructionDescriptors[opcode].verify_flags;
62*f0dffb02SXin Li }
63*f0dffb02SXin Li 
GetWidthFromFormat(InstructionFormat format)64*f0dffb02SXin Li size_t GetWidthFromFormat(InstructionFormat format) {
65*f0dffb02SXin Li   switch (format) {
66*f0dffb02SXin Li     case k10x:
67*f0dffb02SXin Li     case k12x:
68*f0dffb02SXin Li     case k11n:
69*f0dffb02SXin Li     case k11x:
70*f0dffb02SXin Li     case k10t:
71*f0dffb02SXin Li       return 1;
72*f0dffb02SXin Li     case k20t:
73*f0dffb02SXin Li     case k20bc:
74*f0dffb02SXin Li     case k21c:
75*f0dffb02SXin Li     case k22x:
76*f0dffb02SXin Li     case k21s:
77*f0dffb02SXin Li     case k21t:
78*f0dffb02SXin Li     case k21h:
79*f0dffb02SXin Li     case k23x:
80*f0dffb02SXin Li     case k22b:
81*f0dffb02SXin Li     case k22s:
82*f0dffb02SXin Li     case k22t:
83*f0dffb02SXin Li     case k22c:
84*f0dffb02SXin Li     case k22cs:
85*f0dffb02SXin Li       return 2;
86*f0dffb02SXin Li     case k30t:
87*f0dffb02SXin Li     case k31t:
88*f0dffb02SXin Li     case k31c:
89*f0dffb02SXin Li     case k32x:
90*f0dffb02SXin Li     case k31i:
91*f0dffb02SXin Li     case k35c:
92*f0dffb02SXin Li     case k35ms:
93*f0dffb02SXin Li     case k35mi:
94*f0dffb02SXin Li     case k3rc:
95*f0dffb02SXin Li     case k3rms:
96*f0dffb02SXin Li     case k3rmi:
97*f0dffb02SXin Li       return 3;
98*f0dffb02SXin Li     case k45cc:
99*f0dffb02SXin Li     case k4rcc:
100*f0dffb02SXin Li       return 4;
101*f0dffb02SXin Li     case k51l:
102*f0dffb02SXin Li       return 5;
103*f0dffb02SXin Li   }
104*f0dffb02SXin Li }
105*f0dffb02SXin Li 
GetWidthFromBytecode(const u2 * bytecode)106*f0dffb02SXin Li size_t GetWidthFromBytecode(const u2* bytecode) {
107*f0dffb02SXin Li   size_t width = 0;
108*f0dffb02SXin Li   if (*bytecode == kPackedSwitchSignature) {
109*f0dffb02SXin Li     width = 4 + bytecode[1] * 2;
110*f0dffb02SXin Li   } else if (*bytecode == kSparseSwitchSignature) {
111*f0dffb02SXin Li     width = 2 + bytecode[1] * 4;
112*f0dffb02SXin Li   } else if (*bytecode == kArrayDataSignature) {
113*f0dffb02SXin Li     u2 elemWidth = bytecode[1];
114*f0dffb02SXin Li     u4 len = bytecode[2] | (((u4)bytecode[3]) << 16);
115*f0dffb02SXin Li     // The plus 1 is to round up for odd size and width.
116*f0dffb02SXin Li     width = 4 + (elemWidth * len + 1) / 2;
117*f0dffb02SXin Li   } else {
118*f0dffb02SXin Li     width = GetWidthFromFormat(
119*f0dffb02SXin Li         GetFormatFromOpcode(OpcodeFromBytecode(bytecode[0])));
120*f0dffb02SXin Li   }
121*f0dffb02SXin Li   return width;
122*f0dffb02SXin Li }
123*f0dffb02SXin Li 
124*f0dffb02SXin Li // Dalvik opcode names.
125*f0dffb02SXin Li static constexpr std::array<const char*, kNumPackedOpcodes> gOpcodeNames = {
126*f0dffb02SXin Li #define INSTRUCTION_NAME(o, c, pname, f, i, a, e, v) pname,
127*f0dffb02SXin Li #include "export/slicer/dex_instruction_list.h"
128*f0dffb02SXin Li     DEX_INSTRUCTION_LIST(INSTRUCTION_NAME)
129*f0dffb02SXin Li #undef DEX_INSTRUCTION_LIST
130*f0dffb02SXin Li #undef INSTRUCTION_NAME
131*f0dffb02SXin Li };
132*f0dffb02SXin Li 
GetOpcodeName(Opcode opcode)133*f0dffb02SXin Li const char* GetOpcodeName(Opcode opcode) { return gOpcodeNames[opcode]; }
134*f0dffb02SXin Li 
135*f0dffb02SXin Li // Helpers for DecodeInstruction()
InstA(u2 inst)136*f0dffb02SXin Li static u4 InstA(u2 inst) { return (inst >> 8) & 0x0f; }
InstB(u2 inst)137*f0dffb02SXin Li static u4 InstB(u2 inst) { return inst >> 12; }
InstAA(u2 inst)138*f0dffb02SXin Li static u4 InstAA(u2 inst) { return inst >> 8; }
139*f0dffb02SXin Li 
140*f0dffb02SXin Li // Helper for DecodeInstruction()
FetchU4(const u2 * ptr)141*f0dffb02SXin Li static u4 FetchU4(const u2* ptr) { return ptr[0] | (u4(ptr[1]) << 16); }
142*f0dffb02SXin Li 
143*f0dffb02SXin Li // Helper for DecodeInstruction()
FetchU8(const u2 * ptr)144*f0dffb02SXin Li static u8 FetchU8(const u2* ptr) {
145*f0dffb02SXin Li   return FetchU4(ptr) | (u8(FetchU4(ptr + 2)) << 32);
146*f0dffb02SXin Li }
147*f0dffb02SXin Li 
148*f0dffb02SXin Li // Decode a Dalvik bytecode and extract the individual fields
DecodeInstruction(const u2 * bytecode)149*f0dffb02SXin Li Instruction DecodeInstruction(const u2* bytecode) {
150*f0dffb02SXin Li   u2 inst = bytecode[0];
151*f0dffb02SXin Li   Opcode opcode = OpcodeFromBytecode(inst);
152*f0dffb02SXin Li   InstructionFormat format = GetFormatFromOpcode(opcode);
153*f0dffb02SXin Li 
154*f0dffb02SXin Li   Instruction dec = {};
155*f0dffb02SXin Li   dec.opcode = opcode;
156*f0dffb02SXin Li 
157*f0dffb02SXin Li   switch (format) {
158*f0dffb02SXin Li     case k10x:  // op
159*f0dffb02SXin Li       return dec;
160*f0dffb02SXin Li     case k12x:  // op vA, vB
161*f0dffb02SXin Li       dec.vA = InstA(inst);
162*f0dffb02SXin Li       dec.vB = InstB(inst);
163*f0dffb02SXin Li       return dec;
164*f0dffb02SXin Li     case k11n:  // op vA, #+B
165*f0dffb02SXin Li       dec.vA = InstA(inst);
166*f0dffb02SXin Li       dec.vB = s4(InstB(inst) << 28) >> 28;  // sign extend 4-bit value
167*f0dffb02SXin Li       return dec;
168*f0dffb02SXin Li     case k11x:  // op vAA
169*f0dffb02SXin Li       dec.vA = InstAA(inst);
170*f0dffb02SXin Li       return dec;
171*f0dffb02SXin Li     case k10t:                    // op +AA
172*f0dffb02SXin Li       dec.vA = s1(InstAA(inst));  // sign-extend 8-bit value
173*f0dffb02SXin Li       return dec;
174*f0dffb02SXin Li     case k20t:                   // op +AAAA
175*f0dffb02SXin Li       dec.vA = s2(bytecode[1]);  // sign-extend 16-bit value
176*f0dffb02SXin Li       return dec;
177*f0dffb02SXin Li     case k20bc:  // [opt] op AA, thing@BBBB
178*f0dffb02SXin Li     case k21c:   // op vAA, thing@BBBB
179*f0dffb02SXin Li     case k22x:   // op vAA, vBBBB
180*f0dffb02SXin Li       dec.vA = InstAA(inst);
181*f0dffb02SXin Li       dec.vB = bytecode[1];
182*f0dffb02SXin Li       return dec;
183*f0dffb02SXin Li     case k21s:  // op vAA, #+BBBB
184*f0dffb02SXin Li     case k21t:  // op vAA, +BBBB
185*f0dffb02SXin Li       dec.vA = InstAA(inst);
186*f0dffb02SXin Li       dec.vB = s2(bytecode[1]);  // sign-extend 16-bit value
187*f0dffb02SXin Li       return dec;
188*f0dffb02SXin Li     case k21h:  // op vAA, #+BBBB0000[00000000]
189*f0dffb02SXin Li       dec.vA = InstAA(inst);
190*f0dffb02SXin Li       // The value should be treated as right-zero-extended, but we don't
191*f0dffb02SXin Li       // actually do that here. Among other things, we don't know if it's
192*f0dffb02SXin Li       // the top bits of a 32- or 64-bit value.
193*f0dffb02SXin Li       dec.vB = bytecode[1];
194*f0dffb02SXin Li       return dec;
195*f0dffb02SXin Li     case k23x:  // op vAA, vBB, vCC
196*f0dffb02SXin Li       dec.vA = InstAA(inst);
197*f0dffb02SXin Li       dec.vB = bytecode[1] & 0xff;
198*f0dffb02SXin Li       dec.vC = bytecode[1] >> 8;
199*f0dffb02SXin Li       return dec;
200*f0dffb02SXin Li     case k22b:  // op vAA, vBB, #+CC
201*f0dffb02SXin Li       dec.vA = InstAA(inst);
202*f0dffb02SXin Li       dec.vB = bytecode[1] & 0xff;
203*f0dffb02SXin Li       dec.vC = s1(bytecode[1] >> 8);  // sign-extend 8-bit value
204*f0dffb02SXin Li       return dec;
205*f0dffb02SXin Li     case k22s:  // op vA, vB, #+CCCC
206*f0dffb02SXin Li     case k22t:  // op vA, vB, +CCCC
207*f0dffb02SXin Li       dec.vA = InstA(inst);
208*f0dffb02SXin Li       dec.vB = InstB(inst);
209*f0dffb02SXin Li       dec.vC = s2(bytecode[1]);  // sign-extend 16-bit value
210*f0dffb02SXin Li       return dec;
211*f0dffb02SXin Li     case k22c:   // op vA, vB, thing@CCCC
212*f0dffb02SXin Li     case k22cs:  // [opt] op vA, vB, field offset CCCC
213*f0dffb02SXin Li       dec.vA = InstA(inst);
214*f0dffb02SXin Li       dec.vB = InstB(inst);
215*f0dffb02SXin Li       dec.vC = bytecode[1];
216*f0dffb02SXin Li       return dec;
217*f0dffb02SXin Li     case k30t:  // op +AAAAAAAA
218*f0dffb02SXin Li       dec.vA = FetchU4(bytecode + 1);
219*f0dffb02SXin Li       return dec;
220*f0dffb02SXin Li     case k31t:  // op vAA, +BBBBBBBB
221*f0dffb02SXin Li     case k31c:  // op vAA, string@BBBBBBBB
222*f0dffb02SXin Li       dec.vA = InstAA(inst);
223*f0dffb02SXin Li       dec.vB = FetchU4(bytecode + 1);
224*f0dffb02SXin Li       return dec;
225*f0dffb02SXin Li     case k32x:  // op vAAAA, vBBBB
226*f0dffb02SXin Li       dec.vA = bytecode[1];
227*f0dffb02SXin Li       dec.vB = bytecode[2];
228*f0dffb02SXin Li       return dec;
229*f0dffb02SXin Li     case k31i:  // op vAA, #+BBBBBBBB
230*f0dffb02SXin Li       dec.vA = InstAA(inst);
231*f0dffb02SXin Li       dec.vB = FetchU4(bytecode + 1);
232*f0dffb02SXin Li       return dec;
233*f0dffb02SXin Li     case k35c:               // op {vC, vD, vE, vF, vG}, thing@BBBB
234*f0dffb02SXin Li     case k35ms:              // [opt] invoke-virtual+super
235*f0dffb02SXin Li     case k35mi: {            // [opt] inline invoke
236*f0dffb02SXin Li       dec.vA = InstB(inst);  // This is labeled A in the spec.
237*f0dffb02SXin Li       dec.vB = bytecode[1];
238*f0dffb02SXin Li 
239*f0dffb02SXin Li       u2 regList = bytecode[2];
240*f0dffb02SXin Li 
241*f0dffb02SXin Li       // Copy the argument registers into the arg[] array, and
242*f0dffb02SXin Li       // also copy the first argument (if any) into vC. (The
243*f0dffb02SXin Li       // Instruction structure doesn't have separate
244*f0dffb02SXin Li       // fields for {vD, vE, vF, vG}, so there's no need to make
245*f0dffb02SXin Li       // copies of those.) Note that cases 5..2 fall through.
246*f0dffb02SXin Li       switch (dec.vA) {
247*f0dffb02SXin Li         case 5:
248*f0dffb02SXin Li           // A fifth arg is verboten for inline invokes
249*f0dffb02SXin Li           SLICER_CHECK_NE(format, k35mi);
250*f0dffb02SXin Li 
251*f0dffb02SXin Li           // Per note at the top of this format decoder, the
252*f0dffb02SXin Li           // fifth argument comes from the A field in the
253*f0dffb02SXin Li           // instruction, but it's labeled G in the spec.
254*f0dffb02SXin Li           dec.arg[4] = InstA(inst);
255*f0dffb02SXin Li           FALLTHROUGH_INTENDED;
256*f0dffb02SXin Li         case 4:
257*f0dffb02SXin Li           dec.arg[3] = (regList >> 12) & 0x0f;
258*f0dffb02SXin Li           FALLTHROUGH_INTENDED;
259*f0dffb02SXin Li         case 3:
260*f0dffb02SXin Li           dec.arg[2] = (regList >> 8) & 0x0f;
261*f0dffb02SXin Li           FALLTHROUGH_INTENDED;
262*f0dffb02SXin Li         case 2:
263*f0dffb02SXin Li           dec.arg[1] = (regList >> 4) & 0x0f;
264*f0dffb02SXin Li           FALLTHROUGH_INTENDED;
265*f0dffb02SXin Li         case 1:
266*f0dffb02SXin Li           dec.vC = dec.arg[0] = regList & 0x0f;
267*f0dffb02SXin Li           FALLTHROUGH_INTENDED;
268*f0dffb02SXin Li         case 0:
269*f0dffb02SXin Li           // Valid, but no need to do anything
270*f0dffb02SXin Li           return dec;
271*f0dffb02SXin Li       }
272*f0dffb02SXin Li     }
273*f0dffb02SXin Li       SLICER_CHECK(!"Invalid arg count in 35c/35ms/35mi");
274*f0dffb02SXin Li     case k3rc:   // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB
275*f0dffb02SXin Li     case k3rms:  // [opt] invoke-virtual+super/range
276*f0dffb02SXin Li     case k3rmi:  // [opt] execute-inline/range
277*f0dffb02SXin Li       dec.vA = InstAA(inst);
278*f0dffb02SXin Li       dec.vB = bytecode[1];
279*f0dffb02SXin Li       dec.vC = bytecode[2];
280*f0dffb02SXin Li       return dec;
281*f0dffb02SXin Li     case k45cc: {
282*f0dffb02SXin Li       // AG op BBBB FEDC HHHH
283*f0dffb02SXin Li       dec.vA = InstB(inst);  // This is labelled A in the spec.
284*f0dffb02SXin Li       dec.vB = bytecode[1];  // vB meth@BBBB
285*f0dffb02SXin Li 
286*f0dffb02SXin Li       u2 regList = bytecode[2];
287*f0dffb02SXin Li       dec.vC = regList & 0xf;
288*f0dffb02SXin Li       dec.arg[0] = (regList >> 4) & 0xf;  // vD
289*f0dffb02SXin Li       dec.arg[1] = (regList >> 8) & 0xf;  // vE
290*f0dffb02SXin Li       dec.arg[2] = (regList >> 12);       // vF
291*f0dffb02SXin Li       dec.arg[3] = InstA(inst);           // vG
292*f0dffb02SXin Li       dec.arg[4] = bytecode[3];           // vH proto@HHHH
293*f0dffb02SXin Li     }
294*f0dffb02SXin Li       return dec;
295*f0dffb02SXin Li     case k4rcc:
296*f0dffb02SXin Li       // AA op BBBB CCCC HHHH
297*f0dffb02SXin Li       dec.vA = InstAA(inst);
298*f0dffb02SXin Li       dec.vB = bytecode[1];
299*f0dffb02SXin Li       dec.vC = bytecode[2];
300*f0dffb02SXin Li       dec.arg[4] = bytecode[3];  // vH proto@HHHH
301*f0dffb02SXin Li       return dec;
302*f0dffb02SXin Li     case k51l:  // op vAA, #+BBBBBBBBBBBBBBBB
303*f0dffb02SXin Li       dec.vA = InstAA(inst);
304*f0dffb02SXin Li       dec.vB_wide = FetchU8(bytecode + 1);
305*f0dffb02SXin Li       return dec;
306*f0dffb02SXin Li   }
307*f0dffb02SXin Li 
308*f0dffb02SXin Li   std::stringstream ss;
309*f0dffb02SXin Li   ss << "Can't decode unexpected format " << format << " for " << opcode;
310*f0dffb02SXin Li   SLICER_FATAL(ss.str());
311*f0dffb02SXin Li }
312*f0dffb02SXin Li 
HexByte(int value)313*f0dffb02SXin Li static inline std::string HexByte(int value) {
314*f0dffb02SXin Li   std::stringstream ss;
315*f0dffb02SXin Li   ss << "0x" << std::setw(2) << std::setfill('0') << std::hex << value;
316*f0dffb02SXin Li   return ss.str();
317*f0dffb02SXin Li }
318*f0dffb02SXin Li 
operator <<(std::ostream & os,Opcode opcode)319*f0dffb02SXin Li std::ostream& operator<<(std::ostream& os, Opcode opcode) {
320*f0dffb02SXin Li   return os << "[" << HexByte(opcode) << "] " << gOpcodeNames[opcode];
321*f0dffb02SXin Li }
322*f0dffb02SXin Li 
operator <<(std::ostream & os,InstructionFormat format)323*f0dffb02SXin Li std::ostream& operator<<(std::ostream& os, InstructionFormat format) {
324*f0dffb02SXin Li   switch (format) {
325*f0dffb02SXin Li   #define EMIT_INSTRUCTION_FORMAT_NAME(name) \
326*f0dffb02SXin Li     case InstructionFormat::k##name: return os << #name;
327*f0dffb02SXin Li   #include "export/slicer/dex_instruction_list.h"
328*f0dffb02SXin Li   DEX_INSTRUCTION_FORMAT_LIST(EMIT_INSTRUCTION_FORMAT_NAME)
329*f0dffb02SXin Li   #undef EMIT_INSTRUCTION_FORMAT_NAME
330*f0dffb02SXin Li   #undef DEX_INSTRUCTION_FORMAT_LIST
331*f0dffb02SXin Li   #undef DEX_INSTRUCTION_LIST
332*f0dffb02SXin Li   }
333*f0dffb02SXin Li   return os << "[" << HexByte(format) << "] " << "Unknown";
334*f0dffb02SXin Li }
335*f0dffb02SXin Li 
336*f0dffb02SXin Li }  // namespace dex
337