xref: /aosp_15_r20/external/angle/build/android/convert_dex_profile.py (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1#!/usr/bin/env vpython3
2#
3# Copyright 2018 The Chromium Authors
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7import argparse
8import collections
9import functools
10import logging
11import re
12import subprocess
13import sys
14
15DEX_CLASS_NAME_RE = re.compile(r'\'L(?P<class_name>[^;]+);\'')
16DEX_METHOD_NAME_RE = re.compile(r'\'(?P<method_name>[^\']+)\'')
17DEX_METHOD_TYPE_RE = re.compile( # type descriptor method signature re
18    r'\''
19    r'\('
20    r'(?P<method_params>[^)]*)'
21    r'\)'
22    r'(?P<method_return_type>[^\']+)'
23    r'\'')
24DEX_METHOD_LINE_NR_RE = re.compile(r'line=(?P<line_number>\d+)')
25
26PROFILE_METHOD_RE = re.compile(
27    r'(?P<tags>[HSP]+)' # tags such as H/S/P
28    r'(?P<class_name>L[^;]+;)' # class name in type descriptor format
29    r'->(?P<method_name>[^(]+)'
30    r'\((?P<method_params>[^)]*)\)'
31    r'(?P<method_return_type>.+)')
32
33PROGUARD_CLASS_MAPPING_RE = re.compile(
34    r'(?P<original_name>[^ ]+)'
35    r' -> '
36    r'(?P<obfuscated_name>[^:]+):')
37PROGUARD_METHOD_MAPPING_RE = re.compile(
38    # line_start:line_end: (optional)
39    r'((?P<line_start>\d+):(?P<line_end>\d+):)?'
40    r'(?P<return_type>[^ ]+)' # original method return type
41    # original method class name (if exists)
42    r' (?:(?P<original_method_class>[a-zA-Z_\d.$]+)\.)?'
43    r'(?P<original_method_name>[^.\(]+)'
44    r'\((?P<params>[^\)]*)\)' # original method params
45    r'(?:[^ ]*)' # original method line numbers (ignored)
46    r' -> '
47    r'(?P<obfuscated_name>.+)') # obfuscated method name
48
49TYPE_DESCRIPTOR_RE = re.compile(
50    r'(?P<brackets>\[*)'
51    r'(?:'
52    r'(?P<class_name>L[^;]+;)'
53    r'|'
54    r'[VZBSCIJFD]'
55    r')')
56
57DOT_NOTATION_MAP = {
58    '': '',
59    'boolean': 'Z',
60    'byte': 'B',
61    'void': 'V',
62    'short': 'S',
63    'char': 'C',
64    'int': 'I',
65    'long': 'J',
66    'float': 'F',
67    'double': 'D'
68}
69
70
71@functools.total_ordering
72class Method:
73  def __init__(self, name, class_name, param_types=None, return_type=None):
74    self.name = name
75    self.class_name = class_name
76    self.param_types = param_types
77    self.return_type = return_type
78
79  def __str__(self):
80    return '{}->{}({}){}'.format(self.class_name, self.name,
81        self.param_types or '', self.return_type or '')
82
83  def __repr__(self):
84    return 'Method<{}->{}({}){}>'.format(self.class_name, self.name,
85        self.param_types or '', self.return_type or '')
86
87  @staticmethod
88  def serialize(method):
89    return (method.class_name, method.name, method.param_types,
90            method.return_type)
91
92  def __eq__(self, other):
93    return self.serialize(self) == self.serialize(other)
94
95  def __lt__(self, other):
96    return self.serialize(self) < self.serialize(other)
97
98  def __hash__(self):
99    # only hash name and class_name since other fields may not be set yet.
100    return hash((self.name, self.class_name))
101
102
103class Class:
104  def __init__(self, name):
105    self.name = name
106    self._methods = []
107
108  def AddMethod(self, method, line_numbers):
109    self._methods.append((method, set(line_numbers)))
110
111  def FindMethodsAtLine(self, method_name, line_start, line_end=None):
112    """Searches through dex class for a method given a name and line numbers
113
114    The dex maps methods to line numbers, this method, given the a method name
115    in this class as well as a start line and an optional end line (which act as
116    hints as to which function in the class is being looked for), returns a list
117    of possible matches (or none if none are found).
118
119    Args:
120      method_name: name of method being searched for
121      line_start: start of hint range for lines in this method
122      line_end: end of hint range for lines in this method (optional)
123
124    Returns:
125      A list of Method objects that could match the hints given, or None if no
126      method is found.
127    """
128    found_methods = []
129    if line_end is None:
130      hint_lines = set([line_start])
131    else:
132      hint_lines = set(range(line_start, line_end+1))
133
134    named_methods = [(method, l) for method, l in self._methods
135                     if method.name == method_name]
136
137    if len(named_methods) == 1:
138      return [method for method, l in named_methods]
139    if len(named_methods) == 0:
140      return None
141
142    for method, line_numbers in named_methods:
143      if not hint_lines.isdisjoint(line_numbers):
144        found_methods.append(method)
145
146    if len(found_methods) > 0:
147      if len(found_methods) > 1:
148        logging.warning('ambigous methods in dex %s at lines %s in class "%s"',
149            found_methods, hint_lines, self.name)
150      return found_methods
151
152    for method, line_numbers in named_methods:
153      if (max(hint_lines) >= min(line_numbers)
154          and min(hint_lines) <= max(line_numbers)):
155        found_methods.append(method)
156
157    if len(found_methods) > 0:
158      if len(found_methods) > 1:
159        logging.warning('ambigous methods in dex %s at lines %s in class "%s"',
160            found_methods, hint_lines, self.name)
161      return found_methods
162    logging.warning(
163        'No method named "%s" in class "%s" is '
164        'mapped to lines %s', method_name, self.name, hint_lines)
165    return None
166
167
168class Profile:
169  def __init__(self):
170    # {Method: set(char)}
171    self._methods = collections.defaultdict(set)
172    self._classes = []
173
174  def AddMethod(self, method, tags):
175    for tag in tags:
176      self._methods[method].add(tag)
177
178  def AddClass(self, cls):
179    self._classes.append(cls)
180
181  def WriteToFile(self, path):
182    with open(path, 'w') as output_profile:
183      for cls in sorted(self._classes):
184        output_profile.write(cls + '\n')
185      for method in sorted(self._methods):
186        tags = sorted(self._methods[method])
187        line = '{}{}\n'.format(''.join(tags), str(method))
188        output_profile.write(line)
189
190
191class ProguardMapping:
192  def __init__(self):
193    # {Method: set(Method)}
194    self._method_mapping = collections.defaultdict(set)
195    # {String: String} String is class name in type descriptor format
196    self._class_mapping = dict()
197
198  def AddMethodMapping(self, from_method, to_method):
199    self._method_mapping[from_method].add(to_method)
200
201  def AddClassMapping(self, from_class, to_class):
202    self._class_mapping[from_class] = to_class
203
204  def GetMethodMapping(self, from_method):
205    return self._method_mapping.get(from_method)
206
207  def GetClassMapping(self, from_class):
208    return self._class_mapping.get(from_class, from_class)
209
210  def MapTypeDescriptor(self, type_descriptor):
211    match = TYPE_DESCRIPTOR_RE.search(type_descriptor)
212    assert match is not None
213    class_name = match.group('class_name')
214    if class_name is not None:
215      return match.group('brackets') + self.GetClassMapping(class_name)
216    # just a native type, return as is
217    return match.group()
218
219  def MapTypeDescriptorList(self, type_descriptor_list):
220    return TYPE_DESCRIPTOR_RE.sub(
221        lambda match: self.MapTypeDescriptor(match.group()),
222        type_descriptor_list)
223
224
225class MalformedLineException(Exception):
226  def __init__(self, message, line_number):
227    super().__init__(message)
228    self.message = message
229    self.line_number = line_number
230
231  def __str__(self):
232    return self.message + ' at line {}'.format(self.line_number)
233
234
235class MalformedProguardMappingException(MalformedLineException):
236  pass
237
238
239class MalformedProfileException(MalformedLineException):
240  pass
241
242
243def _RunDexDump(dexdump_path, dex_file_path):
244  return subprocess.check_output([dexdump_path,
245                                  dex_file_path]).decode('utf-8').splitlines()
246
247
248def _ReadFile(file_path):
249  with open(file_path, 'r') as f:
250    return f.readlines()
251
252
253def _ToTypeDescriptor(dot_notation):
254  """Parses a dot notation type and returns it in type descriptor format
255
256  eg:
257  org.chromium.browser.ChromeActivity -> Lorg/chromium/browser/ChromeActivity;
258  boolean -> Z
259  int[] -> [I
260
261  Args:
262    dot_notation: trimmed string with a single type in dot notation format
263
264  Returns:
265    A string with the type in type descriptor format
266  """
267  dot_notation = dot_notation.strip()
268  prefix = ''
269  while dot_notation.endswith('[]'):
270    prefix += '['
271    dot_notation = dot_notation[:-2]
272  if dot_notation in DOT_NOTATION_MAP:
273    return prefix + DOT_NOTATION_MAP[dot_notation]
274  return prefix + 'L' + dot_notation.replace('.', '/') + ';'
275
276
277def _DotNotationListToTypeDescriptorList(dot_notation_list_string):
278  """Parses a param list of dot notation format and returns it in type
279  descriptor format
280
281  eg:
282  org.chromium.browser.ChromeActivity,boolean,int[] ->
283      Lorg/chromium/browser/ChromeActivity;Z[I
284
285  Args:
286    dot_notation_list_string: single string with multiple comma separated types
287                              in dot notation format
288
289  Returns:
290    A string with the param list in type descriptor format
291  """
292  return ''.join(_ToTypeDescriptor(param) for param in
293      dot_notation_list_string.split(','))
294
295
296def ProcessDex(dex_dump):
297  """Parses dexdump output returning a dict of class names to Class objects
298
299  Parses output of the dexdump command on a dex file and extracts information
300  about classes and their respective methods and which line numbers a method is
301  mapped to.
302
303  Methods that are not mapped to any line number are ignored and not listed
304  inside their respective Class objects.
305
306  Args:
307    dex_dump: An array of lines of dexdump output
308
309  Returns:
310    A dict that maps from class names in type descriptor format (but without the
311    surrounding 'L' and ';') to Class objects.
312  """
313  # class_name: Class
314  classes_by_name = {}
315  current_class = None
316  current_method = None
317  reading_positions = False
318  reading_methods = False
319  method_line_numbers = []
320  for line in dex_dump:
321    line = line.strip()
322    if line.startswith('Class descriptor'):
323      # New class started, no longer reading methods.
324      reading_methods = False
325      current_class = Class(DEX_CLASS_NAME_RE.search(line).group('class_name'))
326      classes_by_name[current_class.name] = current_class
327    elif (line.startswith('Direct methods')
328          or line.startswith('Virtual methods')):
329      reading_methods = True
330    elif reading_methods and line.startswith('name'):
331      assert current_class is not None
332      current_method = Method(
333          DEX_METHOD_NAME_RE.search(line).group('method_name'),
334          "L" + current_class.name + ";")
335    elif reading_methods and line.startswith('type'):
336      assert current_method is not None
337      match = DEX_METHOD_TYPE_RE.search(line)
338      current_method.param_types = match.group('method_params')
339      current_method.return_type = match.group('method_return_type')
340    elif line.startswith('positions'):
341      assert reading_methods
342      reading_positions = True
343      method_line_numbers = []
344    elif reading_positions and line.startswith('0x'):
345      line_number = DEX_METHOD_LINE_NR_RE.search(line).group('line_number')
346      method_line_numbers.append(int(line_number))
347    elif reading_positions and line.startswith('locals'):
348      if len(method_line_numbers) > 0:
349        current_class.AddMethod(current_method, method_line_numbers)
350      # finished reading method line numbers
351      reading_positions = False
352  return classes_by_name
353
354
355def ProcessProguardMapping(proguard_mapping_lines, dex):
356  """Parses a proguard mapping file
357
358  This takes proguard mapping file lines and then uses the obfuscated dex to
359  create a mapping of unobfuscated methods to obfuscated ones and vice versa.
360
361  The dex is used because the proguard mapping file only has the name of the
362  obfuscated methods but not their signature, thus the dex is read to look up
363  which method with a specific name was mapped to the lines mentioned in the
364  proguard mapping file.
365
366  Args:
367    proguard_mapping_lines: Array of strings, each is a line from the proguard
368                            mapping file (in order).
369    dex: a dict of class name (in type descriptor format but without the
370         enclosing 'L' and ';') to a Class object.
371  Returns:
372    Two dicts the first maps from obfuscated methods to a set of non-obfuscated
373    ones. It also maps the obfuscated class names to original class names, both
374    in type descriptor format (with the enclosing 'L' and ';')
375  """
376  mapping = ProguardMapping()
377  reverse_mapping = ProguardMapping()
378  to_be_obfuscated = []
379  current_class_orig = None
380  current_class_obfs = None
381  for index, line in enumerate(proguard_mapping_lines):
382    if line.strip() == '':
383      continue
384    if not line.startswith(' '):
385      match = PROGUARD_CLASS_MAPPING_RE.search(line)
386      if match is None:
387        raise MalformedProguardMappingException(
388            'Malformed class mapping', index)
389      current_class_orig = match.group('original_name')
390      current_class_obfs = match.group('obfuscated_name')
391      mapping.AddClassMapping(_ToTypeDescriptor(current_class_obfs),
392                              _ToTypeDescriptor(current_class_orig))
393      reverse_mapping.AddClassMapping(_ToTypeDescriptor(current_class_orig),
394                                      _ToTypeDescriptor(current_class_obfs))
395      continue
396
397    assert current_class_orig is not None
398    assert current_class_obfs is not None
399    line = line.strip()
400    match = PROGUARD_METHOD_MAPPING_RE.search(line)
401    # check if is a method mapping (we ignore field mappings)
402    if match is not None:
403      # check if this line is an inlining by reading ahead 1 line.
404      if index + 1 < len(proguard_mapping_lines):
405        next_match = PROGUARD_METHOD_MAPPING_RE.search(
406            proguard_mapping_lines[index+1].strip())
407        if (next_match and match.group('line_start') is not None
408            and next_match.group('line_start') == match.group('line_start')
409            and next_match.group('line_end') == match.group('line_end')):
410          continue # This is an inlining, skip
411
412      original_method = Method(
413          match.group('original_method_name'),
414          _ToTypeDescriptor(
415              match.group('original_method_class') or current_class_orig),
416          _DotNotationListToTypeDescriptorList(match.group('params')),
417          _ToTypeDescriptor(match.group('return_type')))
418
419      if match.group('line_start') is not None:
420        obfs_methods = (dex[current_class_obfs.replace('.', '/')]
421            .FindMethodsAtLine(
422                match.group('obfuscated_name'),
423                int(match.group('line_start')),
424                int(match.group('line_end'))))
425
426        if obfs_methods is None:
427          continue
428
429        for obfs_method in obfs_methods:
430          mapping.AddMethodMapping(obfs_method, original_method)
431          reverse_mapping.AddMethodMapping(original_method, obfs_method)
432      else:
433        to_be_obfuscated.append(
434            (original_method, match.group('obfuscated_name')))
435
436  for original_method, obfuscated_name in to_be_obfuscated:
437    obfuscated_method = Method(
438        obfuscated_name,
439        reverse_mapping.GetClassMapping(original_method.class_name),
440        reverse_mapping.MapTypeDescriptorList(original_method.param_types),
441        reverse_mapping.MapTypeDescriptor(original_method.return_type))
442    mapping.AddMethodMapping(obfuscated_method, original_method)
443    reverse_mapping.AddMethodMapping(original_method, obfuscated_method)
444  return mapping, reverse_mapping
445
446
447def ProcessProfile(input_profile, proguard_mapping):
448  """Parses an android profile and uses the proguard mapping to (de)obfuscate it
449
450  This takes the android profile lines and for each method or class for the
451  profile, it uses the mapping to either obfuscate or deobfuscate (based on the
452  provided mapping) and returns a Profile object that stores this information.
453
454  Args:
455    input_profile: array of lines of the input profile
456    proguard_mapping: a proguard mapping that would map from the classes and
457                      methods in the input profile to the classes and methods
458                      that should be in the output profile.
459
460  Returns:
461    A Profile object that stores the information (ie list of mapped classes and
462    methods + tags)
463  """
464  profile = Profile()
465  for index, line in enumerate(input_profile):
466    line = line.strip()
467    if line.startswith('L'):
468      profile.AddClass(proguard_mapping.GetClassMapping(line))
469      continue
470    match = PROFILE_METHOD_RE.search(line)
471    if not match:
472      raise MalformedProfileException("Malformed line", index)
473
474    method = Method(
475        match.group('method_name'),
476        match.group('class_name'),
477        match.group('method_params'),
478        match.group('method_return_type'))
479
480    mapped_methods = proguard_mapping.GetMethodMapping(method)
481    if mapped_methods is None:
482      logging.warning('No method matching "%s" has been found in the proguard '
483                      'mapping file', method)
484      continue
485
486    for original_method in mapped_methods:
487      profile.AddMethod(original_method, match.group('tags'))
488
489  return profile
490
491
492def ObfuscateProfile(nonobfuscated_profile, dex_file, proguard_mapping,
493                     dexdump_path, output_filename):
494  """Helper method for obfuscating a profile.
495
496  Args:
497    nonobfuscated_profile: a profile with nonobfuscated symbols.
498    dex_file: path to the dex file matching the mapping.
499    proguard_mapping: a mapping from nonobfuscated to obfuscated symbols used
500      in the dex file.
501    dexdump_path: path to the dexdump utility.
502    output_filename: output filename in which to write the obfuscated profile.
503  """
504  dexinfo = ProcessDex(_RunDexDump(dexdump_path, dex_file))
505  _, reverse_mapping = ProcessProguardMapping(
506      _ReadFile(proguard_mapping), dexinfo)
507  obfuscated_profile = ProcessProfile(
508      _ReadFile(nonobfuscated_profile), reverse_mapping)
509  obfuscated_profile.WriteToFile(output_filename)
510
511
512def main(args):
513  parser = argparse.ArgumentParser()
514  parser.add_argument(
515      '--dexdump-path',
516      required=True,
517      help='Path to dexdump binary.')
518  parser.add_argument(
519      '--dex-path',
520      required=True,
521      help='Path to dex file corresponding to the proguard mapping file.')
522  parser.add_argument(
523      '--proguard-mapping-path',
524      required=True,
525      help='Path to input proguard mapping file corresponding to the dex file.')
526  parser.add_argument(
527      '--output-profile-path',
528      required=True,
529      help='Path to output profile.')
530  parser.add_argument(
531      '--input-profile-path',
532      required=True,
533      help='Path to output profile.')
534  parser.add_argument(
535      '--verbose',
536      action='store_true',
537      default=False,
538      help='Print verbose output.')
539  obfuscation = parser.add_mutually_exclusive_group(required=True)
540  obfuscation.add_argument('--obfuscate', action='store_true',
541      help='Indicates to output an obfuscated profile given a deobfuscated '
542     'one.')
543  obfuscation.add_argument('--deobfuscate', dest='obfuscate',
544      action='store_false', help='Indicates to output a deobfuscated profile '
545      'given an obfuscated one.')
546  options = parser.parse_args(args)
547
548  if options.verbose:
549    log_level = logging.WARNING
550  else:
551    log_level = logging.ERROR
552  logging.basicConfig(format='%(levelname)s: %(message)s', level=log_level)
553
554  dex = ProcessDex(_RunDexDump(options.dexdump_path, options.dex_path))
555  proguard_mapping, reverse_proguard_mapping = ProcessProguardMapping(
556      _ReadFile(options.proguard_mapping_path), dex)
557  if options.obfuscate:
558    profile = ProcessProfile(
559        _ReadFile(options.input_profile_path),
560        reverse_proguard_mapping)
561  else:
562    profile = ProcessProfile(
563        _ReadFile(options.input_profile_path),
564        proguard_mapping)
565  profile.WriteToFile(options.output_profile_path)
566
567
568if __name__ == '__main__':
569  main(sys.argv[1:])
570