1#!/usr/bin/env vpython3 2# 3# Copyright 2018 The Chromium Authors 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7import argparse 8import collections 9import functools 10import logging 11import re 12import subprocess 13import sys 14 15DEX_CLASS_NAME_RE = re.compile(r'\'L(?P<class_name>[^;]+);\'') 16DEX_METHOD_NAME_RE = re.compile(r'\'(?P<method_name>[^\']+)\'') 17DEX_METHOD_TYPE_RE = re.compile( # type descriptor method signature re 18 r'\'' 19 r'\(' 20 r'(?P<method_params>[^)]*)' 21 r'\)' 22 r'(?P<method_return_type>[^\']+)' 23 r'\'') 24DEX_METHOD_LINE_NR_RE = re.compile(r'line=(?P<line_number>\d+)') 25 26PROFILE_METHOD_RE = re.compile( 27 r'(?P<tags>[HSP]+)' # tags such as H/S/P 28 r'(?P<class_name>L[^;]+;)' # class name in type descriptor format 29 r'->(?P<method_name>[^(]+)' 30 r'\((?P<method_params>[^)]*)\)' 31 r'(?P<method_return_type>.+)') 32 33PROGUARD_CLASS_MAPPING_RE = re.compile( 34 r'(?P<original_name>[^ ]+)' 35 r' -> ' 36 r'(?P<obfuscated_name>[^:]+):') 37PROGUARD_METHOD_MAPPING_RE = re.compile( 38 # line_start:line_end: (optional) 39 r'((?P<line_start>\d+):(?P<line_end>\d+):)?' 40 r'(?P<return_type>[^ ]+)' # original method return type 41 # original method class name (if exists) 42 r' (?:(?P<original_method_class>[a-zA-Z_\d.$]+)\.)?' 43 r'(?P<original_method_name>[^.\(]+)' 44 r'\((?P<params>[^\)]*)\)' # original method params 45 r'(?:[^ ]*)' # original method line numbers (ignored) 46 r' -> ' 47 r'(?P<obfuscated_name>.+)') # obfuscated method name 48 49TYPE_DESCRIPTOR_RE = re.compile( 50 r'(?P<brackets>\[*)' 51 r'(?:' 52 r'(?P<class_name>L[^;]+;)' 53 r'|' 54 r'[VZBSCIJFD]' 55 r')') 56 57DOT_NOTATION_MAP = { 58 '': '', 59 'boolean': 'Z', 60 'byte': 'B', 61 'void': 'V', 62 'short': 'S', 63 'char': 'C', 64 'int': 'I', 65 'long': 'J', 66 'float': 'F', 67 'double': 'D' 68} 69 70 71@functools.total_ordering 72class Method: 73 def __init__(self, name, class_name, param_types=None, return_type=None): 74 self.name = name 75 self.class_name = class_name 76 self.param_types = param_types 77 self.return_type = return_type 78 79 def __str__(self): 80 return '{}->{}({}){}'.format(self.class_name, self.name, 81 self.param_types or '', self.return_type or '') 82 83 def __repr__(self): 84 return 'Method<{}->{}({}){}>'.format(self.class_name, self.name, 85 self.param_types or '', self.return_type or '') 86 87 @staticmethod 88 def serialize(method): 89 return (method.class_name, method.name, method.param_types, 90 method.return_type) 91 92 def __eq__(self, other): 93 return self.serialize(self) == self.serialize(other) 94 95 def __lt__(self, other): 96 return self.serialize(self) < self.serialize(other) 97 98 def __hash__(self): 99 # only hash name and class_name since other fields may not be set yet. 100 return hash((self.name, self.class_name)) 101 102 103class Class: 104 def __init__(self, name): 105 self.name = name 106 self._methods = [] 107 108 def AddMethod(self, method, line_numbers): 109 self._methods.append((method, set(line_numbers))) 110 111 def FindMethodsAtLine(self, method_name, line_start, line_end=None): 112 """Searches through dex class for a method given a name and line numbers 113 114 The dex maps methods to line numbers, this method, given the a method name 115 in this class as well as a start line and an optional end line (which act as 116 hints as to which function in the class is being looked for), returns a list 117 of possible matches (or none if none are found). 118 119 Args: 120 method_name: name of method being searched for 121 line_start: start of hint range for lines in this method 122 line_end: end of hint range for lines in this method (optional) 123 124 Returns: 125 A list of Method objects that could match the hints given, or None if no 126 method is found. 127 """ 128 found_methods = [] 129 if line_end is None: 130 hint_lines = set([line_start]) 131 else: 132 hint_lines = set(range(line_start, line_end+1)) 133 134 named_methods = [(method, l) for method, l in self._methods 135 if method.name == method_name] 136 137 if len(named_methods) == 1: 138 return [method for method, l in named_methods] 139 if len(named_methods) == 0: 140 return None 141 142 for method, line_numbers in named_methods: 143 if not hint_lines.isdisjoint(line_numbers): 144 found_methods.append(method) 145 146 if len(found_methods) > 0: 147 if len(found_methods) > 1: 148 logging.warning('ambigous methods in dex %s at lines %s in class "%s"', 149 found_methods, hint_lines, self.name) 150 return found_methods 151 152 for method, line_numbers in named_methods: 153 if (max(hint_lines) >= min(line_numbers) 154 and min(hint_lines) <= max(line_numbers)): 155 found_methods.append(method) 156 157 if len(found_methods) > 0: 158 if len(found_methods) > 1: 159 logging.warning('ambigous methods in dex %s at lines %s in class "%s"', 160 found_methods, hint_lines, self.name) 161 return found_methods 162 logging.warning( 163 'No method named "%s" in class "%s" is ' 164 'mapped to lines %s', method_name, self.name, hint_lines) 165 return None 166 167 168class Profile: 169 def __init__(self): 170 # {Method: set(char)} 171 self._methods = collections.defaultdict(set) 172 self._classes = [] 173 174 def AddMethod(self, method, tags): 175 for tag in tags: 176 self._methods[method].add(tag) 177 178 def AddClass(self, cls): 179 self._classes.append(cls) 180 181 def WriteToFile(self, path): 182 with open(path, 'w') as output_profile: 183 for cls in sorted(self._classes): 184 output_profile.write(cls + '\n') 185 for method in sorted(self._methods): 186 tags = sorted(self._methods[method]) 187 line = '{}{}\n'.format(''.join(tags), str(method)) 188 output_profile.write(line) 189 190 191class ProguardMapping: 192 def __init__(self): 193 # {Method: set(Method)} 194 self._method_mapping = collections.defaultdict(set) 195 # {String: String} String is class name in type descriptor format 196 self._class_mapping = dict() 197 198 def AddMethodMapping(self, from_method, to_method): 199 self._method_mapping[from_method].add(to_method) 200 201 def AddClassMapping(self, from_class, to_class): 202 self._class_mapping[from_class] = to_class 203 204 def GetMethodMapping(self, from_method): 205 return self._method_mapping.get(from_method) 206 207 def GetClassMapping(self, from_class): 208 return self._class_mapping.get(from_class, from_class) 209 210 def MapTypeDescriptor(self, type_descriptor): 211 match = TYPE_DESCRIPTOR_RE.search(type_descriptor) 212 assert match is not None 213 class_name = match.group('class_name') 214 if class_name is not None: 215 return match.group('brackets') + self.GetClassMapping(class_name) 216 # just a native type, return as is 217 return match.group() 218 219 def MapTypeDescriptorList(self, type_descriptor_list): 220 return TYPE_DESCRIPTOR_RE.sub( 221 lambda match: self.MapTypeDescriptor(match.group()), 222 type_descriptor_list) 223 224 225class MalformedLineException(Exception): 226 def __init__(self, message, line_number): 227 super().__init__(message) 228 self.message = message 229 self.line_number = line_number 230 231 def __str__(self): 232 return self.message + ' at line {}'.format(self.line_number) 233 234 235class MalformedProguardMappingException(MalformedLineException): 236 pass 237 238 239class MalformedProfileException(MalformedLineException): 240 pass 241 242 243def _RunDexDump(dexdump_path, dex_file_path): 244 return subprocess.check_output([dexdump_path, 245 dex_file_path]).decode('utf-8').splitlines() 246 247 248def _ReadFile(file_path): 249 with open(file_path, 'r') as f: 250 return f.readlines() 251 252 253def _ToTypeDescriptor(dot_notation): 254 """Parses a dot notation type and returns it in type descriptor format 255 256 eg: 257 org.chromium.browser.ChromeActivity -> Lorg/chromium/browser/ChromeActivity; 258 boolean -> Z 259 int[] -> [I 260 261 Args: 262 dot_notation: trimmed string with a single type in dot notation format 263 264 Returns: 265 A string with the type in type descriptor format 266 """ 267 dot_notation = dot_notation.strip() 268 prefix = '' 269 while dot_notation.endswith('[]'): 270 prefix += '[' 271 dot_notation = dot_notation[:-2] 272 if dot_notation in DOT_NOTATION_MAP: 273 return prefix + DOT_NOTATION_MAP[dot_notation] 274 return prefix + 'L' + dot_notation.replace('.', '/') + ';' 275 276 277def _DotNotationListToTypeDescriptorList(dot_notation_list_string): 278 """Parses a param list of dot notation format and returns it in type 279 descriptor format 280 281 eg: 282 org.chromium.browser.ChromeActivity,boolean,int[] -> 283 Lorg/chromium/browser/ChromeActivity;Z[I 284 285 Args: 286 dot_notation_list_string: single string with multiple comma separated types 287 in dot notation format 288 289 Returns: 290 A string with the param list in type descriptor format 291 """ 292 return ''.join(_ToTypeDescriptor(param) for param in 293 dot_notation_list_string.split(',')) 294 295 296def ProcessDex(dex_dump): 297 """Parses dexdump output returning a dict of class names to Class objects 298 299 Parses output of the dexdump command on a dex file and extracts information 300 about classes and their respective methods and which line numbers a method is 301 mapped to. 302 303 Methods that are not mapped to any line number are ignored and not listed 304 inside their respective Class objects. 305 306 Args: 307 dex_dump: An array of lines of dexdump output 308 309 Returns: 310 A dict that maps from class names in type descriptor format (but without the 311 surrounding 'L' and ';') to Class objects. 312 """ 313 # class_name: Class 314 classes_by_name = {} 315 current_class = None 316 current_method = None 317 reading_positions = False 318 reading_methods = False 319 method_line_numbers = [] 320 for line in dex_dump: 321 line = line.strip() 322 if line.startswith('Class descriptor'): 323 # New class started, no longer reading methods. 324 reading_methods = False 325 current_class = Class(DEX_CLASS_NAME_RE.search(line).group('class_name')) 326 classes_by_name[current_class.name] = current_class 327 elif (line.startswith('Direct methods') 328 or line.startswith('Virtual methods')): 329 reading_methods = True 330 elif reading_methods and line.startswith('name'): 331 assert current_class is not None 332 current_method = Method( 333 DEX_METHOD_NAME_RE.search(line).group('method_name'), 334 "L" + current_class.name + ";") 335 elif reading_methods and line.startswith('type'): 336 assert current_method is not None 337 match = DEX_METHOD_TYPE_RE.search(line) 338 current_method.param_types = match.group('method_params') 339 current_method.return_type = match.group('method_return_type') 340 elif line.startswith('positions'): 341 assert reading_methods 342 reading_positions = True 343 method_line_numbers = [] 344 elif reading_positions and line.startswith('0x'): 345 line_number = DEX_METHOD_LINE_NR_RE.search(line).group('line_number') 346 method_line_numbers.append(int(line_number)) 347 elif reading_positions and line.startswith('locals'): 348 if len(method_line_numbers) > 0: 349 current_class.AddMethod(current_method, method_line_numbers) 350 # finished reading method line numbers 351 reading_positions = False 352 return classes_by_name 353 354 355def ProcessProguardMapping(proguard_mapping_lines, dex): 356 """Parses a proguard mapping file 357 358 This takes proguard mapping file lines and then uses the obfuscated dex to 359 create a mapping of unobfuscated methods to obfuscated ones and vice versa. 360 361 The dex is used because the proguard mapping file only has the name of the 362 obfuscated methods but not their signature, thus the dex is read to look up 363 which method with a specific name was mapped to the lines mentioned in the 364 proguard mapping file. 365 366 Args: 367 proguard_mapping_lines: Array of strings, each is a line from the proguard 368 mapping file (in order). 369 dex: a dict of class name (in type descriptor format but without the 370 enclosing 'L' and ';') to a Class object. 371 Returns: 372 Two dicts the first maps from obfuscated methods to a set of non-obfuscated 373 ones. It also maps the obfuscated class names to original class names, both 374 in type descriptor format (with the enclosing 'L' and ';') 375 """ 376 mapping = ProguardMapping() 377 reverse_mapping = ProguardMapping() 378 to_be_obfuscated = [] 379 current_class_orig = None 380 current_class_obfs = None 381 for index, line in enumerate(proguard_mapping_lines): 382 if line.strip() == '': 383 continue 384 if not line.startswith(' '): 385 match = PROGUARD_CLASS_MAPPING_RE.search(line) 386 if match is None: 387 raise MalformedProguardMappingException( 388 'Malformed class mapping', index) 389 current_class_orig = match.group('original_name') 390 current_class_obfs = match.group('obfuscated_name') 391 mapping.AddClassMapping(_ToTypeDescriptor(current_class_obfs), 392 _ToTypeDescriptor(current_class_orig)) 393 reverse_mapping.AddClassMapping(_ToTypeDescriptor(current_class_orig), 394 _ToTypeDescriptor(current_class_obfs)) 395 continue 396 397 assert current_class_orig is not None 398 assert current_class_obfs is not None 399 line = line.strip() 400 match = PROGUARD_METHOD_MAPPING_RE.search(line) 401 # check if is a method mapping (we ignore field mappings) 402 if match is not None: 403 # check if this line is an inlining by reading ahead 1 line. 404 if index + 1 < len(proguard_mapping_lines): 405 next_match = PROGUARD_METHOD_MAPPING_RE.search( 406 proguard_mapping_lines[index+1].strip()) 407 if (next_match and match.group('line_start') is not None 408 and next_match.group('line_start') == match.group('line_start') 409 and next_match.group('line_end') == match.group('line_end')): 410 continue # This is an inlining, skip 411 412 original_method = Method( 413 match.group('original_method_name'), 414 _ToTypeDescriptor( 415 match.group('original_method_class') or current_class_orig), 416 _DotNotationListToTypeDescriptorList(match.group('params')), 417 _ToTypeDescriptor(match.group('return_type'))) 418 419 if match.group('line_start') is not None: 420 obfs_methods = (dex[current_class_obfs.replace('.', '/')] 421 .FindMethodsAtLine( 422 match.group('obfuscated_name'), 423 int(match.group('line_start')), 424 int(match.group('line_end')))) 425 426 if obfs_methods is None: 427 continue 428 429 for obfs_method in obfs_methods: 430 mapping.AddMethodMapping(obfs_method, original_method) 431 reverse_mapping.AddMethodMapping(original_method, obfs_method) 432 else: 433 to_be_obfuscated.append( 434 (original_method, match.group('obfuscated_name'))) 435 436 for original_method, obfuscated_name in to_be_obfuscated: 437 obfuscated_method = Method( 438 obfuscated_name, 439 reverse_mapping.GetClassMapping(original_method.class_name), 440 reverse_mapping.MapTypeDescriptorList(original_method.param_types), 441 reverse_mapping.MapTypeDescriptor(original_method.return_type)) 442 mapping.AddMethodMapping(obfuscated_method, original_method) 443 reverse_mapping.AddMethodMapping(original_method, obfuscated_method) 444 return mapping, reverse_mapping 445 446 447def ProcessProfile(input_profile, proguard_mapping): 448 """Parses an android profile and uses the proguard mapping to (de)obfuscate it 449 450 This takes the android profile lines and for each method or class for the 451 profile, it uses the mapping to either obfuscate or deobfuscate (based on the 452 provided mapping) and returns a Profile object that stores this information. 453 454 Args: 455 input_profile: array of lines of the input profile 456 proguard_mapping: a proguard mapping that would map from the classes and 457 methods in the input profile to the classes and methods 458 that should be in the output profile. 459 460 Returns: 461 A Profile object that stores the information (ie list of mapped classes and 462 methods + tags) 463 """ 464 profile = Profile() 465 for index, line in enumerate(input_profile): 466 line = line.strip() 467 if line.startswith('L'): 468 profile.AddClass(proguard_mapping.GetClassMapping(line)) 469 continue 470 match = PROFILE_METHOD_RE.search(line) 471 if not match: 472 raise MalformedProfileException("Malformed line", index) 473 474 method = Method( 475 match.group('method_name'), 476 match.group('class_name'), 477 match.group('method_params'), 478 match.group('method_return_type')) 479 480 mapped_methods = proguard_mapping.GetMethodMapping(method) 481 if mapped_methods is None: 482 logging.warning('No method matching "%s" has been found in the proguard ' 483 'mapping file', method) 484 continue 485 486 for original_method in mapped_methods: 487 profile.AddMethod(original_method, match.group('tags')) 488 489 return profile 490 491 492def ObfuscateProfile(nonobfuscated_profile, dex_file, proguard_mapping, 493 dexdump_path, output_filename): 494 """Helper method for obfuscating a profile. 495 496 Args: 497 nonobfuscated_profile: a profile with nonobfuscated symbols. 498 dex_file: path to the dex file matching the mapping. 499 proguard_mapping: a mapping from nonobfuscated to obfuscated symbols used 500 in the dex file. 501 dexdump_path: path to the dexdump utility. 502 output_filename: output filename in which to write the obfuscated profile. 503 """ 504 dexinfo = ProcessDex(_RunDexDump(dexdump_path, dex_file)) 505 _, reverse_mapping = ProcessProguardMapping( 506 _ReadFile(proguard_mapping), dexinfo) 507 obfuscated_profile = ProcessProfile( 508 _ReadFile(nonobfuscated_profile), reverse_mapping) 509 obfuscated_profile.WriteToFile(output_filename) 510 511 512def main(args): 513 parser = argparse.ArgumentParser() 514 parser.add_argument( 515 '--dexdump-path', 516 required=True, 517 help='Path to dexdump binary.') 518 parser.add_argument( 519 '--dex-path', 520 required=True, 521 help='Path to dex file corresponding to the proguard mapping file.') 522 parser.add_argument( 523 '--proguard-mapping-path', 524 required=True, 525 help='Path to input proguard mapping file corresponding to the dex file.') 526 parser.add_argument( 527 '--output-profile-path', 528 required=True, 529 help='Path to output profile.') 530 parser.add_argument( 531 '--input-profile-path', 532 required=True, 533 help='Path to output profile.') 534 parser.add_argument( 535 '--verbose', 536 action='store_true', 537 default=False, 538 help='Print verbose output.') 539 obfuscation = parser.add_mutually_exclusive_group(required=True) 540 obfuscation.add_argument('--obfuscate', action='store_true', 541 help='Indicates to output an obfuscated profile given a deobfuscated ' 542 'one.') 543 obfuscation.add_argument('--deobfuscate', dest='obfuscate', 544 action='store_false', help='Indicates to output a deobfuscated profile ' 545 'given an obfuscated one.') 546 options = parser.parse_args(args) 547 548 if options.verbose: 549 log_level = logging.WARNING 550 else: 551 log_level = logging.ERROR 552 logging.basicConfig(format='%(levelname)s: %(message)s', level=log_level) 553 554 dex = ProcessDex(_RunDexDump(options.dexdump_path, options.dex_path)) 555 proguard_mapping, reverse_proguard_mapping = ProcessProguardMapping( 556 _ReadFile(options.proguard_mapping_path), dex) 557 if options.obfuscate: 558 profile = ProcessProfile( 559 _ReadFile(options.input_profile_path), 560 reverse_proguard_mapping) 561 else: 562 profile = ProcessProfile( 563 _ReadFile(options.input_profile_path), 564 proguard_mapping) 565 profile.WriteToFile(options.output_profile_path) 566 567 568if __name__ == '__main__': 569 main(sys.argv[1:]) 570