1# Lint as: python3 2# Copyright 2021 The Chromium Authors 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5'''Helper script to use GN's JSON interface to make changes.''' 6 7from __future__ import annotations 8 9import contextlib 10import copy 11import dataclasses 12import json 13import logging 14import os 15import pathlib 16import re 17import shutil 18import subprocess 19import sys 20 21from typing import Dict, Iterator, List, Optional, Tuple 22 23_SRC_PATH = pathlib.Path(__file__).resolve().parents[2] 24 25_BUILD_ANDROID_GYP_PATH = _SRC_PATH / 'build/android/gyp' 26if str(_BUILD_ANDROID_GYP_PATH) not in sys.path: 27 sys.path.append(str(_BUILD_ANDROID_GYP_PATH)) 28 29from util import build_utils 30 31# Refer to parse_tree.cc for GN AST implementation details: 32# https://gn.googlesource.com/gn/+/refs/heads/main/src/gn/parse_tree.cc 33# These constants should match corresponding entries in parse_tree.cc. 34# TODO: Add high-level details for the expected data structure. 35NODE_CHILD = 'child' 36NODE_TYPE = 'type' 37NODE_VALUE = 'value' 38BEFORE_COMMENT = 'before_comment' 39SUFFIX_COMMENT = 'suffix_comment' 40AFTER_COMMENT = 'after_comment' 41 42 43@contextlib.contextmanager 44def _backup_and_restore_file_contents(path: str): 45 with open(path) as f: 46 contents = f.read() 47 try: 48 yield 49 finally: 50 # Ensure that the timestamp is updated since otherwise ninja will not 51 # re-build relevant targets with the original file. 52 with open(path, 'w') as f: 53 f.write(contents) 54 55 56def _build_targets_output( 57 out_dir: str, 58 targets: List[str], 59 should_print: Optional[bool] = None) -> Optional[str]: 60 env = os.environ.copy() 61 if should_print is None: 62 should_print = logging.getLogger().isEnabledFor(logging.DEBUG) 63 # Ensuring ninja does not attempt to summarize the build results in slightly 64 # faster builds. This script does many builds so this time can add up. 65 if 'NINJA_SUMMARIZE_BUILD' in env: 66 del env['NINJA_SUMMARIZE_BUILD'] 67 proc = subprocess.Popen(['autoninja', '-C', out_dir] + targets, 68 stdout=subprocess.PIPE, 69 stderr=subprocess.STDOUT, 70 env=env, 71 text=True) 72 lines = [] 73 prev_line = '' 74 width = shutil.get_terminal_size().columns 75 while proc.poll() is None: 76 line = proc.stdout.readline() 77 lines.append(line) 78 if should_print: 79 if prev_line.startswith('[') and line.startswith('['): 80 # Shrink the line according to terminal size. 81 msg = line.rstrip() 82 if len(msg) > width: 83 # 5 = 3 (Ellipsis) + 2 (header) 84 length_to_show = width - 5 85 msg = f'{msg[:2]}...{msg[-length_to_show:]}' 86 # \r to return the carriage to the beginning of line, \033[K to 87 # replace the normal \n to erase until the end of the line. This 88 # allows ninja output for successful targets to overwrite each 89 # other. 90 msg = f'\r{msg}\033[K' 91 elif prev_line.startswith('['): 92 # Since the previous line likely did not include a newline, an 93 # extra newline is needed to avoid the current line being 94 # appended to the previous line. 95 msg = f'\n{line}' 96 else: 97 msg = line 98 print(msg, end='') 99 prev_line = line 100 if proc.returncode != 0: 101 return None 102 return ''.join(lines) 103 104 105def _generate_project_json_content(out_dir: str) -> str: 106 build_utils.CheckOutput(['gn', 'gen', '--ide=json', out_dir]) 107 with open(os.path.join(out_dir, 'project.json')) as f: 108 return f.read() 109 110 111@dataclasses.dataclass 112class DepList: 113 """Represents a dep list assignment in GN.""" 114 target_name: Optional[str] # The name of the target containing the list. 115 variable_name: str # Left-hand side variable name the list is assigned to. 116 child_nodes: List[dict] # Right-hand side list of nodes. 117 operation: str # The assignment operation, whether += or =. 118 119 120class BuildFile: 121 """Represents the contents of a BUILD.gn file.""" 122 def __init__(self, 123 build_gn_path: str, 124 root_gn_path: pathlib.Path, 125 *, 126 dryrun: bool = False): 127 self._root = root_gn_path 128 self._rel_path = os.path.relpath(build_gn_path, root_gn_path) 129 self._gn_rel_path = '//' + os.path.dirname(self._rel_path) 130 self._full_path = os.path.abspath(build_gn_path) 131 self._skip_write_content = dryrun 132 133 def __enter__(self): 134 output = build_utils.CheckOutput( 135 ['gn', 'format', '--dump-tree=json', self._full_path]) 136 self._content = json.loads(output) 137 self._original_content = json.dumps(self._content) 138 return self 139 140 def __exit__(self, exc, value, tb): 141 if not self._skip_write_content: 142 self.write_content_to_file() 143 144 # See: https://gist.github.com/sgraham/bd9ffee312f307d5f417019a9c0f0777 145 def _find_all(self, match_fn): 146 results = [] 147 148 def get_target_name(node) -> Optional[str]: 149 """Example format (with irrelevant fields omitted): 150 { 151 "child": [ { 152 "child": [ { 153 "type": "LITERAL", 154 "value": "\"hello_world_java\"" 155 } ], 156 "type": "LIST" 157 }, { 158 ... 159 } ], 160 "type": "FUNCTION", 161 "value": "java_library" 162 } 163 164 Example return: hello_world_java 165 """ 166 if node.get(NODE_TYPE) != 'FUNCTION': 167 return None 168 children = node.get(NODE_CHILD) 169 if not children: 170 return None 171 first_child = children[0] 172 if first_child.get(NODE_TYPE) != 'LIST': 173 return None 174 grand_children = first_child.get(NODE_CHILD) 175 if not grand_children: 176 return None 177 grand_child = grand_children[0] 178 if grand_child.get(NODE_TYPE) != 'LITERAL': 179 return None 180 name = grand_child.get(NODE_VALUE) 181 if name.startswith('"'): 182 return name[1:-1] 183 return name 184 185 def recursive_find(root, last_known_target=None): 186 target_name = get_target_name(root) or last_known_target 187 matched = match_fn(root) 188 if matched is not None: 189 results.append((target_name, matched)) 190 return 191 children = root.get(NODE_CHILD) 192 if children: 193 for child in children: 194 recursive_find(child, last_known_target=target_name) 195 196 recursive_find(self._content) 197 return results 198 199 def _normalize(self, name: Optional[str], abs_path: bool = True): 200 """Returns the absolute GN path to the target with |name|. 201 202 This method normalizes target names, assuming that relative targets are 203 referenced based on the current file, allowing targets to be compared 204 by name to determine whether they are the same or not. 205 206 Given the current file is chrome/android/BUILD.gn: 207 208 # Removes surrounding quotation marks. 209 "//chrome/android:chrome_java" -> //chrome/android:chrome_java 210 211 # Makes relative paths absolute. 212 :chrome_java -> //chrome/android:chrome_java 213 214 # Spells out GN shorthands for basenames. 215 //chrome/android -> //chrome/android:android 216 """ 217 if not name: 218 return '' 219 if name.startswith('"'): 220 name = name[1:-1] 221 if not name.startswith('//') and abs_path: 222 name = self._gn_rel_path + name 223 if not ':' in name: 224 name += ':' + os.path.basename(name) 225 return name 226 227 def _find_all_list_assignments(self): 228 def match_list_assignments(node): 229 r"""Matches and returns the list being assigned. 230 231 Binary node (with an operation such as = or +=) 232 / \ 233 / \ 234 name list of nodes 235 236 Returns (name, list of nodes, op) 237 """ 238 if node.get(NODE_TYPE) != 'BINARY': 239 return None 240 operation = node.get(NODE_VALUE) 241 children = node.get(NODE_CHILD) 242 assert len(children) == 2, ( 243 'Binary nodes should have two child nodes, but the node is: ' 244 f'{node}') 245 left_child, right_child = children 246 if left_child.get(NODE_TYPE) != 'IDENTIFIER': 247 return None 248 name = left_child.get(NODE_VALUE) 249 if right_child.get(NODE_TYPE) != 'LIST': 250 return None 251 list_of_nodes = right_child.get(NODE_CHILD) 252 return name, list_of_nodes, operation 253 254 return self._find_all(match_list_assignments) 255 256 def _find_all_deps_lists(self) -> Iterator[DepList]: 257 list_tuples = self._find_all_list_assignments() 258 for target_name, (var_name, node_list, operation) in list_tuples: 259 if (var_name == 'deps' or var_name.startswith('deps_') 260 or var_name.endswith('_deps') or '_deps_' in var_name): 261 yield DepList(target_name=target_name, 262 variable_name=var_name, 263 child_nodes=node_list, 264 operation=operation) 265 266 def _new_literal_node(self, value: str, begin_line: int = 1): 267 return { 268 'location': { 269 'begin_column': 1, 270 'begin_line': begin_line, 271 'end_column': 2, 272 'end_line': begin_line, 273 }, 274 'type': 'LITERAL', 275 'value': f'"{value}"' 276 } 277 278 def _clone_replacing_value(self, node_to_copy: Dict, new_dep_name: str): 279 """Clone the existing node to preserve line numbers and update name. 280 281 It is easier to clone an existing node around the same location, as the 282 actual dict looks like this: 283 { 284 'location': { 285 'begin_column': 5, 286 'begin_line': 137, 287 'end_column': 27, 288 'end_line': 137 289 }, 290 'type': 'LITERAL', 291 'value': '":anr_data_proto_java"' 292 } 293 294 Thus the new node to return should keep the same 'location' value (the 295 parser is tolerant as long as it's roughly in the correct spot) but 296 update the 'value' to the new dependency name. 297 """ 298 new_dep = copy.deepcopy(node_to_copy) 299 # Any comments associated with the previous dep would not apply. 300 for comment_key in (BEFORE_COMMENT, AFTER_COMMENT, SUFFIX_COMMENT): 301 new_dep.pop(comment_key, None) # Remove if exists. 302 new_dep[NODE_VALUE] = f'"{new_dep_name}"' 303 return new_dep 304 305 def add_deps(self, target: str, deps: List[str]) -> bool: 306 added_new_dep = False 307 normalized_target = self._normalize(target) 308 for dep_list in self._find_all_deps_lists(): 309 if dep_list.target_name is None: 310 continue 311 # Only modify the first assignment operation to the deps variable, 312 # otherwise if there are += operations, then the list of deps will 313 # be added multiple times to the same target's deps. 314 if dep_list.operation != '=': 315 continue 316 full_target_name = f'{self._gn_rel_path}:{dep_list.target_name}' 317 # Support both the exact name and the absolute GN target names 318 # starting with //. 319 if (target != dep_list.target_name 320 and normalized_target != full_target_name): 321 continue 322 if dep_list.variable_name != 'deps': 323 continue 324 existing_dep_names = set( 325 self._normalize(child.get(NODE_VALUE), abs_path=False) 326 for child in dep_list.child_nodes) 327 for new_dep_name in deps: 328 if new_dep_name in existing_dep_names: 329 logging.info( 330 f'Skipping existing {new_dep_name} in {target}.deps') 331 continue 332 logging.info(f'Adding {new_dep_name} to {target}.deps') 333 # If there are no existing child nodes, then create a new one. 334 # Otherwise clone an existing child node to ensure more accurate 335 # line numbers and possible better preserve comments. 336 if not dep_list.child_nodes: 337 new_dep = self._new_literal_node(new_dep_name) 338 else: 339 new_dep = self._clone_replacing_value( 340 dep_list.child_nodes[0], new_dep_name) 341 dep_list.child_nodes.append(new_dep) 342 added_new_dep = True 343 if not added_new_dep: 344 # This should match the string in bytecode_processor.py. 345 print(f'Unable to find {target}') 346 return added_new_dep 347 348 def search_deps(self, name_query: Optional[str], 349 path_query: Optional[str]) -> bool: 350 if path_query: 351 if not re.search(path_query, self._rel_path): 352 return False 353 elif not name_query: 354 print(self._rel_path) 355 return True 356 for dep_list in self._find_all_deps_lists(): 357 for child in dep_list.child_nodes: 358 # Typically searches run on non-absolute dep paths. 359 dep_name = self._normalize(child.get(NODE_VALUE), 360 abs_path=False) 361 if name_query and re.search(name_query, dep_name): 362 print(f'{self._rel_path}: {dep_name} in ' 363 f'{dep_list.target_name}.{dep_list.variable_name}') 364 return True 365 return False 366 367 def split_deps(self, original_dep_name: str, 368 new_dep_names: List[str]) -> bool: 369 split = False 370 for new_dep_name in new_dep_names: 371 if self._split_dep(original_dep_name, new_dep_name): 372 split = True 373 return split 374 375 def _split_dep(self, original_dep_name: str, new_dep_name: str) -> bool: 376 """Add |new_dep_name| to GN deps that contains |original_dep_name|. 377 378 Supports deps, public_deps, and other deps variables. 379 380 Works for explicitly assigning a list to deps: 381 deps = [ ..., "original_dep", ...] 382 # Becomes 383 deps = [ ..., "original_dep", "new_dep", ...] 384 Also works for appending a list to deps: 385 public_deps += [ ..., "original_dep", ...] 386 # Becomes 387 public_deps += [ ..., "original_dep", "new_dep", ...] 388 389 Does not work for assigning or appending variables to deps: 390 deps = other_list_of_deps # Does NOT check other_list_of_deps. 391 # Becomes (no changes) 392 deps = other_list_of_deps 393 394 Does not work with parameter expansion, i.e. $variables. 395 396 Returns whether the new dep was added one or more times. 397 """ 398 for dep_name in (original_dep_name, new_dep_name): 399 assert dep_name.startswith('//'), ( 400 f'Absolute GN path required, starting with //: {dep_name}') 401 402 added_new_dep = False 403 normalized_original_dep_name = self._normalize(original_dep_name) 404 normalized_new_dep_name = self._normalize(new_dep_name) 405 for dep_list in self._find_all_deps_lists(): 406 original_dep_idx = None 407 new_dep_already_exists = False 408 for idx, child in enumerate(dep_list.child_nodes): 409 dep_name = self._normalize(child.get(NODE_VALUE)) 410 if dep_name == normalized_original_dep_name: 411 original_dep_idx = idx 412 if dep_name == normalized_new_dep_name: 413 new_dep_already_exists = True 414 if original_dep_idx is not None and not new_dep_already_exists: 415 if dep_list.target_name is None: 416 target_str = self._gn_rel_path 417 else: 418 target_str = f'{self._gn_rel_path}:{dep_list.target_name}' 419 location = f"{target_str}'s {dep_list.variable_name} variable" 420 logging.info(f'Adding {new_dep_name} to {location}') 421 new_dep = self._clone_replacing_value( 422 dep_list.child_nodes[original_dep_idx], new_dep_name) 423 # Add the new dep after the existing dep to preserve comments 424 # before the existing dep. 425 dep_list.child_nodes.insert(original_dep_idx + 1, new_dep) 426 added_new_dep = True 427 428 return added_new_dep 429 430 def remove_deps(self, 431 dep_names: List[str], 432 out_dir: str, 433 targets: List[str], 434 target_name_filter: Optional[str], 435 inline_mode: bool = False) -> Tuple[bool, str]: 436 if not inline_mode: 437 deps_to_remove = dep_names 438 else: 439 # If the first dep cannot be removed (or is not found) then in the 440 # case of inlining we can skip this file for the rest of the deps. 441 first_dep = dep_names[0] 442 if not self._remove_deps([first_dep], out_dir, targets, 443 target_name_filter): 444 return False 445 deps_to_remove = dep_names[1:] 446 return self._remove_deps(deps_to_remove, out_dir, targets, 447 target_name_filter) 448 449 def _remove_deps(self, dep_names: List[str], out_dir: str, 450 targets: List[str], 451 target_name_filter: Optional[str]) -> Tuple[bool, str]: 452 """Remove |dep_names| if the target can still be built in |out_dir|. 453 454 Supports deps, public_deps, and other deps variables. 455 456 Works for explicitly assigning a list to deps: 457 deps = [ ..., "original_dep", ...] 458 # Becomes 459 deps = [ ..., ...] 460 461 Does not work with parameter expansion, i.e. $variables. 462 463 Returns whether any deps were removed. 464 """ 465 normalized_dep_names = set() 466 for dep_name in dep_names: 467 assert dep_name.startswith('//'), ( 468 f'Absolute GN path required, starting with //: {dep_name}') 469 normalized_dep_names.add(self._normalize(dep_name)) 470 471 removed_dep = False 472 for dep_list in self._find_all_deps_lists(): 473 child_deps_to_remove = [ 474 c for c in dep_list.child_nodes 475 if self._normalize(c.get(NODE_VALUE)) in normalized_dep_names 476 ] 477 if not child_deps_to_remove: 478 continue 479 480 if dep_list.target_name is None: 481 target_name_str = self._gn_rel_path 482 else: 483 target_name_str = f'{self._gn_rel_path}:{dep_list.target_name}' 484 if (target_name_filter is not None and 485 re.search(target_name_filter, target_name_str) is None): 486 logging.info(f'Skip: Since re.search("{target_name_filter}", ' 487 f'"{target_name_str}") is None.') 488 continue 489 490 location = f"{target_name_str}'s {dep_list.variable_name} variable" 491 expected_json = _generate_project_json_content(out_dir) 492 num_to_remove = len(child_deps_to_remove) 493 for remove_idx, child_dep in enumerate(child_deps_to_remove): 494 child_dep_name = self._normalize(child_dep.get(NODE_VALUE)) 495 idx_to_remove = dep_list.child_nodes.index(child_dep) 496 logging.info(f'({remove_idx + 1}/{num_to_remove}) Found ' 497 f'{child_dep_name} in {location}.') 498 child_to_remove = dep_list.child_nodes[idx_to_remove] 499 can_remove_dep = False 500 with _backup_and_restore_file_contents(self._full_path): 501 dep_list.child_nodes.remove(child_to_remove) 502 self.write_content_to_file() 503 # Immediately restore deps_list's original value in case the 504 # following build is interrupted. We don't want the 505 # intermediate untested value to be written as the final 506 # build file. 507 dep_list.child_nodes.insert(idx_to_remove, child_to_remove) 508 if expected_json is not None: 509 # If no changes to project.json was detected, this means 510 # the current target is not part of out_dir's build and 511 # cannot be removed even if the build succeeds. 512 after_json = _generate_project_json_content(out_dir) 513 if expected_json == after_json: 514 # If one change in this list isn't part of the 515 # build, no need to try any other in this list. 516 logging.info('Skip: No changes to project.json.') 517 break 518 519 # Avoids testing every dep removal for the same list. 520 expected_json = None 521 if self._can_still_build_everything(out_dir, targets): 522 can_remove_dep = True 523 if not can_remove_dep: 524 continue 525 526 dep_list.child_nodes.remove(child_to_remove) 527 # Comments before a target can apply to the targets after. 528 if (BEFORE_COMMENT in child_to_remove 529 and idx_to_remove < len(dep_list.child_nodes)): 530 child_after = dep_list.child_nodes[idx_to_remove] 531 if BEFORE_COMMENT not in child_after: 532 child_after[BEFORE_COMMENT] = [] 533 child_after[BEFORE_COMMENT][:] = ( 534 child_to_remove[BEFORE_COMMENT] + 535 child_after[BEFORE_COMMENT]) 536 # Comments after or behind a target don't make sense to re- 537 # position, simply ignore AFTER_COMMENT and SUFFIX_COMMENT. 538 removed_dep = True 539 logging.info(f'Removed {child_dep_name} from {location}.') 540 return removed_dep 541 542 def _can_still_build_everything(self, out_dir: str, 543 targets: List[str]) -> bool: 544 output = _build_targets_output(out_dir, targets) 545 if output is None: 546 logging.info('Ninja failed to build all targets') 547 return False 548 # If ninja did not re-build anything, then the target changed is not 549 # among the targets being built. Avoid this change as it's not been 550 # tested/used. 551 if 'ninja: no work to do.' in output: 552 logging.info('Ninja did not find any targets to build') 553 return False 554 return True 555 556 def write_content_to_file(self) -> None: 557 current_content = json.dumps(self._content) 558 if current_content != self._original_content: 559 subprocess.run( 560 ['gn', 'format', '--read-tree=json', self._full_path], 561 text=True, 562 check=True, 563 input=current_content) 564