xref: /aosp_15_r20/external/angle/build/gn_ast/json_gn_editor.py (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1# Lint as: python3
2# Copyright 2021 The Chromium Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5'''Helper script to use GN's JSON interface to make changes.'''
6
7from __future__ import annotations
8
9import contextlib
10import copy
11import dataclasses
12import json
13import logging
14import os
15import pathlib
16import re
17import shutil
18import subprocess
19import sys
20
21from typing import Dict, Iterator, List, Optional, Tuple
22
23_SRC_PATH = pathlib.Path(__file__).resolve().parents[2]
24
25_BUILD_ANDROID_GYP_PATH = _SRC_PATH / 'build/android/gyp'
26if str(_BUILD_ANDROID_GYP_PATH) not in sys.path:
27    sys.path.append(str(_BUILD_ANDROID_GYP_PATH))
28
29from util import build_utils
30
31# Refer to parse_tree.cc for GN AST implementation details:
32# https://gn.googlesource.com/gn/+/refs/heads/main/src/gn/parse_tree.cc
33# These constants should match corresponding entries in parse_tree.cc.
34# TODO: Add high-level details for the expected data structure.
35NODE_CHILD = 'child'
36NODE_TYPE = 'type'
37NODE_VALUE = 'value'
38BEFORE_COMMENT = 'before_comment'
39SUFFIX_COMMENT = 'suffix_comment'
40AFTER_COMMENT = 'after_comment'
41
42
43@contextlib.contextmanager
44def _backup_and_restore_file_contents(path: str):
45    with open(path) as f:
46        contents = f.read()
47    try:
48        yield
49    finally:
50        # Ensure that the timestamp is updated since otherwise ninja will not
51        # re-build relevant targets with the original file.
52        with open(path, 'w') as f:
53            f.write(contents)
54
55
56def _build_targets_output(
57        out_dir: str,
58        targets: List[str],
59        should_print: Optional[bool] = None) -> Optional[str]:
60    env = os.environ.copy()
61    if should_print is None:
62        should_print = logging.getLogger().isEnabledFor(logging.DEBUG)
63    # Ensuring ninja does not attempt to summarize the build results in slightly
64    # faster builds. This script does many builds so this time can add up.
65    if 'NINJA_SUMMARIZE_BUILD' in env:
66        del env['NINJA_SUMMARIZE_BUILD']
67    proc = subprocess.Popen(['autoninja', '-C', out_dir] + targets,
68                            stdout=subprocess.PIPE,
69                            stderr=subprocess.STDOUT,
70                            env=env,
71                            text=True)
72    lines = []
73    prev_line = ''
74    width = shutil.get_terminal_size().columns
75    while proc.poll() is None:
76        line = proc.stdout.readline()
77        lines.append(line)
78        if should_print:
79            if prev_line.startswith('[') and line.startswith('['):
80                # Shrink the line according to terminal size.
81                msg = line.rstrip()
82                if len(msg) > width:
83                    # 5 = 3 (Ellipsis) + 2 (header)
84                    length_to_show = width - 5
85                    msg = f'{msg[:2]}...{msg[-length_to_show:]}'
86                # \r to return the carriage to the beginning of line, \033[K to
87                # replace the normal \n to erase until the end of the line. This
88                # allows ninja output for successful targets to overwrite each
89                # other.
90                msg = f'\r{msg}\033[K'
91            elif prev_line.startswith('['):
92                # Since the previous line likely did not include a newline, an
93                # extra newline is needed to avoid the current line being
94                # appended to the previous line.
95                msg = f'\n{line}'
96            else:
97                msg = line
98            print(msg, end='')
99        prev_line = line
100    if proc.returncode != 0:
101        return None
102    return ''.join(lines)
103
104
105def _generate_project_json_content(out_dir: str) -> str:
106    build_utils.CheckOutput(['gn', 'gen', '--ide=json', out_dir])
107    with open(os.path.join(out_dir, 'project.json')) as f:
108        return f.read()
109
110
111@dataclasses.dataclass
112class DepList:
113    """Represents a dep list assignment in GN."""
114    target_name: Optional[str]  # The name of the target containing the list.
115    variable_name: str  # Left-hand side variable name the list is assigned to.
116    child_nodes: List[dict]  # Right-hand side list of nodes.
117    operation: str  # The assignment operation, whether += or =.
118
119
120class BuildFile:
121    """Represents the contents of a BUILD.gn file."""
122    def __init__(self,
123                 build_gn_path: str,
124                 root_gn_path: pathlib.Path,
125                 *,
126                 dryrun: bool = False):
127        self._root = root_gn_path
128        self._rel_path = os.path.relpath(build_gn_path, root_gn_path)
129        self._gn_rel_path = '//' + os.path.dirname(self._rel_path)
130        self._full_path = os.path.abspath(build_gn_path)
131        self._skip_write_content = dryrun
132
133    def __enter__(self):
134        output = build_utils.CheckOutput(
135            ['gn', 'format', '--dump-tree=json', self._full_path])
136        self._content = json.loads(output)
137        self._original_content = json.dumps(self._content)
138        return self
139
140    def __exit__(self, exc, value, tb):
141        if not self._skip_write_content:
142            self.write_content_to_file()
143
144    # See: https://gist.github.com/sgraham/bd9ffee312f307d5f417019a9c0f0777
145    def _find_all(self, match_fn):
146        results = []
147
148        def get_target_name(node) -> Optional[str]:
149            """Example format (with irrelevant fields omitted):
150            {
151                "child": [ {
152                    "child": [ {
153                        "type": "LITERAL",
154                        "value": "\"hello_world_java\""
155                    } ],
156                    "type": "LIST"
157                }, {
158                    ...
159                } ],
160                "type": "FUNCTION",
161                "value": "java_library"
162            }
163
164            Example return: hello_world_java
165            """
166            if node.get(NODE_TYPE) != 'FUNCTION':
167                return None
168            children = node.get(NODE_CHILD)
169            if not children:
170                return None
171            first_child = children[0]
172            if first_child.get(NODE_TYPE) != 'LIST':
173                return None
174            grand_children = first_child.get(NODE_CHILD)
175            if not grand_children:
176                return None
177            grand_child = grand_children[0]
178            if grand_child.get(NODE_TYPE) != 'LITERAL':
179                return None
180            name = grand_child.get(NODE_VALUE)
181            if name.startswith('"'):
182                return name[1:-1]
183            return name
184
185        def recursive_find(root, last_known_target=None):
186            target_name = get_target_name(root) or last_known_target
187            matched = match_fn(root)
188            if matched is not None:
189                results.append((target_name, matched))
190                return
191            children = root.get(NODE_CHILD)
192            if children:
193                for child in children:
194                    recursive_find(child, last_known_target=target_name)
195
196        recursive_find(self._content)
197        return results
198
199    def _normalize(self, name: Optional[str], abs_path: bool = True):
200        """Returns the absolute GN path to the target with |name|.
201
202        This method normalizes target names, assuming that relative targets are
203        referenced based on the current file, allowing targets to be compared
204        by name to determine whether they are the same or not.
205
206        Given the current file is chrome/android/BUILD.gn:
207
208        # Removes surrounding quotation marks.
209        "//chrome/android:chrome_java" -> //chrome/android:chrome_java
210
211        # Makes relative paths absolute.
212        :chrome_java -> //chrome/android:chrome_java
213
214        # Spells out GN shorthands for basenames.
215        //chrome/android -> //chrome/android:android
216        """
217        if not name:
218            return ''
219        if name.startswith('"'):
220            name = name[1:-1]
221        if not name.startswith('//') and abs_path:
222            name = self._gn_rel_path + name
223        if not ':' in name:
224            name += ':' + os.path.basename(name)
225        return name
226
227    def _find_all_list_assignments(self):
228        def match_list_assignments(node):
229            r"""Matches and returns the list being assigned.
230
231            Binary node (with an operation such as = or +=)
232             /       \
233            /         \
234            name      list of nodes
235
236            Returns (name, list of nodes, op)
237            """
238            if node.get(NODE_TYPE) != 'BINARY':
239                return None
240            operation = node.get(NODE_VALUE)
241            children = node.get(NODE_CHILD)
242            assert len(children) == 2, (
243                'Binary nodes should have two child nodes, but the node is: '
244                f'{node}')
245            left_child, right_child = children
246            if left_child.get(NODE_TYPE) != 'IDENTIFIER':
247                return None
248            name = left_child.get(NODE_VALUE)
249            if right_child.get(NODE_TYPE) != 'LIST':
250                return None
251            list_of_nodes = right_child.get(NODE_CHILD)
252            return name, list_of_nodes, operation
253
254        return self._find_all(match_list_assignments)
255
256    def _find_all_deps_lists(self) -> Iterator[DepList]:
257        list_tuples = self._find_all_list_assignments()
258        for target_name, (var_name, node_list, operation) in list_tuples:
259            if (var_name == 'deps' or var_name.startswith('deps_')
260                    or var_name.endswith('_deps') or '_deps_' in var_name):
261                yield DepList(target_name=target_name,
262                              variable_name=var_name,
263                              child_nodes=node_list,
264                              operation=operation)
265
266    def _new_literal_node(self, value: str, begin_line: int = 1):
267        return {
268            'location': {
269                'begin_column': 1,
270                'begin_line': begin_line,
271                'end_column': 2,
272                'end_line': begin_line,
273            },
274            'type': 'LITERAL',
275            'value': f'"{value}"'
276        }
277
278    def _clone_replacing_value(self, node_to_copy: Dict, new_dep_name: str):
279        """Clone the existing node to preserve line numbers and update name.
280
281        It is easier to clone an existing node around the same location, as the
282        actual dict looks like this:
283        {
284            'location': {
285                'begin_column': 5,
286                'begin_line': 137,
287                'end_column': 27,
288                'end_line': 137
289            },
290            'type': 'LITERAL',
291            'value': '":anr_data_proto_java"'
292        }
293
294        Thus the new node to return should keep the same 'location' value (the
295        parser is tolerant as long as it's roughly in the correct spot) but
296        update the 'value' to the new dependency name.
297        """
298        new_dep = copy.deepcopy(node_to_copy)
299        # Any comments associated with the previous dep would not apply.
300        for comment_key in (BEFORE_COMMENT, AFTER_COMMENT, SUFFIX_COMMENT):
301            new_dep.pop(comment_key, None)  # Remove if exists.
302        new_dep[NODE_VALUE] = f'"{new_dep_name}"'
303        return new_dep
304
305    def add_deps(self, target: str, deps: List[str]) -> bool:
306        added_new_dep = False
307        normalized_target = self._normalize(target)
308        for dep_list in self._find_all_deps_lists():
309            if dep_list.target_name is None:
310                continue
311            # Only modify the first assignment operation to the deps variable,
312            # otherwise if there are += operations, then the list of deps will
313            # be added multiple times to the same target's deps.
314            if dep_list.operation != '=':
315                continue
316            full_target_name = f'{self._gn_rel_path}:{dep_list.target_name}'
317            # Support both the exact name and the absolute GN target names
318            # starting with //.
319            if (target != dep_list.target_name
320                    and normalized_target != full_target_name):
321                continue
322            if dep_list.variable_name != 'deps':
323                continue
324            existing_dep_names = set(
325                self._normalize(child.get(NODE_VALUE), abs_path=False)
326                for child in dep_list.child_nodes)
327            for new_dep_name in deps:
328                if new_dep_name in existing_dep_names:
329                    logging.info(
330                        f'Skipping existing {new_dep_name} in {target}.deps')
331                    continue
332                logging.info(f'Adding {new_dep_name} to {target}.deps')
333                # If there are no existing child nodes, then create a new one.
334                # Otherwise clone an existing child node to ensure more accurate
335                # line numbers and possible better preserve comments.
336                if not dep_list.child_nodes:
337                    new_dep = self._new_literal_node(new_dep_name)
338                else:
339                    new_dep = self._clone_replacing_value(
340                        dep_list.child_nodes[0], new_dep_name)
341                dep_list.child_nodes.append(new_dep)
342                added_new_dep = True
343        if not added_new_dep:
344            # This should match the string in bytecode_processor.py.
345            print(f'Unable to find {target}')
346        return added_new_dep
347
348    def search_deps(self, name_query: Optional[str],
349                    path_query: Optional[str]) -> bool:
350        if path_query:
351            if not re.search(path_query, self._rel_path):
352                return False
353            elif not name_query:
354                print(self._rel_path)
355                return True
356        for dep_list in self._find_all_deps_lists():
357            for child in dep_list.child_nodes:
358                # Typically searches run on non-absolute dep paths.
359                dep_name = self._normalize(child.get(NODE_VALUE),
360                                           abs_path=False)
361                if name_query and re.search(name_query, dep_name):
362                    print(f'{self._rel_path}: {dep_name} in '
363                          f'{dep_list.target_name}.{dep_list.variable_name}')
364                    return True
365        return False
366
367    def split_deps(self, original_dep_name: str,
368                   new_dep_names: List[str]) -> bool:
369        split = False
370        for new_dep_name in new_dep_names:
371            if self._split_dep(original_dep_name, new_dep_name):
372                split = True
373        return split
374
375    def _split_dep(self, original_dep_name: str, new_dep_name: str) -> bool:
376        """Add |new_dep_name| to GN deps that contains |original_dep_name|.
377
378        Supports deps, public_deps, and other deps variables.
379
380        Works for explicitly assigning a list to deps:
381        deps = [ ..., "original_dep", ...]
382        # Becomes
383        deps = [ ..., "original_dep", "new_dep", ...]
384        Also works for appending a list to deps:
385        public_deps += [ ..., "original_dep", ...]
386        # Becomes
387        public_deps += [ ..., "original_dep", "new_dep", ...]
388
389        Does not work for assigning or appending variables to deps:
390        deps = other_list_of_deps # Does NOT check other_list_of_deps.
391        # Becomes (no changes)
392        deps = other_list_of_deps
393
394        Does not work with parameter expansion, i.e. $variables.
395
396        Returns whether the new dep was added one or more times.
397        """
398        for dep_name in (original_dep_name, new_dep_name):
399            assert dep_name.startswith('//'), (
400                f'Absolute GN path required, starting with //: {dep_name}')
401
402        added_new_dep = False
403        normalized_original_dep_name = self._normalize(original_dep_name)
404        normalized_new_dep_name = self._normalize(new_dep_name)
405        for dep_list in self._find_all_deps_lists():
406            original_dep_idx = None
407            new_dep_already_exists = False
408            for idx, child in enumerate(dep_list.child_nodes):
409                dep_name = self._normalize(child.get(NODE_VALUE))
410                if dep_name == normalized_original_dep_name:
411                    original_dep_idx = idx
412                if dep_name == normalized_new_dep_name:
413                    new_dep_already_exists = True
414            if original_dep_idx is not None and not new_dep_already_exists:
415                if dep_list.target_name is None:
416                    target_str = self._gn_rel_path
417                else:
418                    target_str = f'{self._gn_rel_path}:{dep_list.target_name}'
419                location = f"{target_str}'s {dep_list.variable_name} variable"
420                logging.info(f'Adding {new_dep_name} to {location}')
421                new_dep = self._clone_replacing_value(
422                    dep_list.child_nodes[original_dep_idx], new_dep_name)
423                # Add the new dep after the existing dep to preserve comments
424                # before the existing dep.
425                dep_list.child_nodes.insert(original_dep_idx + 1, new_dep)
426                added_new_dep = True
427
428        return added_new_dep
429
430    def remove_deps(self,
431                    dep_names: List[str],
432                    out_dir: str,
433                    targets: List[str],
434                    target_name_filter: Optional[str],
435                    inline_mode: bool = False) -> Tuple[bool, str]:
436        if not inline_mode:
437            deps_to_remove = dep_names
438        else:
439            # If the first dep cannot be removed (or is not found) then in the
440            # case of inlining we can skip this file for the rest of the deps.
441            first_dep = dep_names[0]
442            if not self._remove_deps([first_dep], out_dir, targets,
443                                     target_name_filter):
444                return False
445            deps_to_remove = dep_names[1:]
446        return self._remove_deps(deps_to_remove, out_dir, targets,
447                                 target_name_filter)
448
449    def _remove_deps(self, dep_names: List[str], out_dir: str,
450                     targets: List[str],
451                     target_name_filter: Optional[str]) -> Tuple[bool, str]:
452        """Remove |dep_names| if the target can still be built in |out_dir|.
453
454        Supports deps, public_deps, and other deps variables.
455
456        Works for explicitly assigning a list to deps:
457        deps = [ ..., "original_dep", ...]
458        # Becomes
459        deps = [ ..., ...]
460
461        Does not work with parameter expansion, i.e. $variables.
462
463        Returns whether any deps were removed.
464        """
465        normalized_dep_names = set()
466        for dep_name in dep_names:
467            assert dep_name.startswith('//'), (
468                f'Absolute GN path required, starting with //: {dep_name}')
469            normalized_dep_names.add(self._normalize(dep_name))
470
471        removed_dep = False
472        for dep_list in self._find_all_deps_lists():
473            child_deps_to_remove = [
474                c for c in dep_list.child_nodes
475                if self._normalize(c.get(NODE_VALUE)) in normalized_dep_names
476            ]
477            if not child_deps_to_remove:
478                continue
479
480            if dep_list.target_name is None:
481                target_name_str = self._gn_rel_path
482            else:
483                target_name_str = f'{self._gn_rel_path}:{dep_list.target_name}'
484            if (target_name_filter is not None and
485                    re.search(target_name_filter, target_name_str) is None):
486                logging.info(f'Skip: Since re.search("{target_name_filter}", '
487                             f'"{target_name_str}") is None.')
488                continue
489
490            location = f"{target_name_str}'s {dep_list.variable_name} variable"
491            expected_json = _generate_project_json_content(out_dir)
492            num_to_remove = len(child_deps_to_remove)
493            for remove_idx, child_dep in enumerate(child_deps_to_remove):
494                child_dep_name = self._normalize(child_dep.get(NODE_VALUE))
495                idx_to_remove = dep_list.child_nodes.index(child_dep)
496                logging.info(f'({remove_idx + 1}/{num_to_remove}) Found '
497                             f'{child_dep_name} in {location}.')
498                child_to_remove = dep_list.child_nodes[idx_to_remove]
499                can_remove_dep = False
500                with _backup_and_restore_file_contents(self._full_path):
501                    dep_list.child_nodes.remove(child_to_remove)
502                    self.write_content_to_file()
503                    # Immediately restore deps_list's original value in case the
504                    # following build is interrupted. We don't want the
505                    # intermediate untested value to be written as the final
506                    # build file.
507                    dep_list.child_nodes.insert(idx_to_remove, child_to_remove)
508                    if expected_json is not None:
509                        # If no changes to project.json was detected, this means
510                        # the current target is not part of out_dir's build and
511                        # cannot be removed even if the build succeeds.
512                        after_json = _generate_project_json_content(out_dir)
513                        if expected_json == after_json:
514                            # If one change in this list isn't part of the
515                            # build, no need to try any other in this list.
516                            logging.info('Skip: No changes to project.json.')
517                            break
518
519                        # Avoids testing every dep removal for the same list.
520                        expected_json = None
521                    if self._can_still_build_everything(out_dir, targets):
522                        can_remove_dep = True
523                if not can_remove_dep:
524                    continue
525
526                dep_list.child_nodes.remove(child_to_remove)
527                # Comments before a target can apply to the targets after.
528                if (BEFORE_COMMENT in child_to_remove
529                        and idx_to_remove < len(dep_list.child_nodes)):
530                    child_after = dep_list.child_nodes[idx_to_remove]
531                    if BEFORE_COMMENT not in child_after:
532                        child_after[BEFORE_COMMENT] = []
533                    child_after[BEFORE_COMMENT][:] = (
534                        child_to_remove[BEFORE_COMMENT] +
535                        child_after[BEFORE_COMMENT])
536                # Comments after or behind a target don't make sense to re-
537                # position, simply ignore AFTER_COMMENT and SUFFIX_COMMENT.
538                removed_dep = True
539                logging.info(f'Removed {child_dep_name} from {location}.')
540        return removed_dep
541
542    def _can_still_build_everything(self, out_dir: str,
543                                    targets: List[str]) -> bool:
544        output = _build_targets_output(out_dir, targets)
545        if output is None:
546            logging.info('Ninja failed to build all targets')
547            return False
548        # If ninja did not re-build anything, then the target changed is not
549        # among the targets being built. Avoid this change as it's not been
550        # tested/used.
551        if 'ninja: no work to do.' in output:
552            logging.info('Ninja did not find any targets to build')
553            return False
554        return True
555
556    def write_content_to_file(self) -> None:
557        current_content = json.dumps(self._content)
558        if current_content != self._original_content:
559            subprocess.run(
560                ['gn', 'format', '--read-tree=json', self._full_path],
561                text=True,
562                check=True,
563                input=current_content)
564