xref: /aosp_15_r20/external/pigweed/pw_build/py/pw_build/bazel_to_gn.py (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1# Copyright 2023 The Pigweed Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may not
4# use this file except in compliance with the License. You may obtain a copy of
5# the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations under
13# the License.
14"""Generates BUILD.gn files from rules in Bazel workspace."""
15
16import argparse
17import json
18import os
19import re
20
21from collections import defaultdict, deque
22from pathlib import Path, PurePath, PurePosixPath
23from typing import (
24    Deque,
25    IO,
26    Iterable,
27    Iterator,
28    Set,
29)
30
31from pw_build.bazel_query import (
32    ParseError,
33    BazelLabel,
34    BazelRule,
35    BazelWorkspace,
36)
37from pw_build.gn_target import GnTarget
38from pw_build.gn_writer import GnFile
39
40
41class BazelToGnConverter:
42    """Manages the conversion of Bazel rules into GN targets."""
43
44    def __init__(self, pw_root: Path) -> None:
45        """Instantiates a Bazel workspace.
46
47        Args:
48            pw_root: Path to Pigweed directory, e.g. "$PW_ROOT".
49        """
50        self._names_by_repo: dict[str, str] = {}
51        self._names_by_build_arg: dict[str, str] = {}
52        self._pending: Deque[BazelLabel] = deque()
53        self._loaded: Set[str] = set()
54        self._source_dirs: dict[str, PurePath] = {
55            'pigweed': pw_root,
56        }
57        self._workspaces: dict[str, BazelWorkspace] = {
58            'pigweed': BazelWorkspace(
59                'com_google_pigweed', pw_root, fetch=False
60            ),
61        }
62        self._revisions: dict[str, str] = {}
63
64    def get_name(
65        self,
66        label: BazelLabel | None = None,
67        repo: str | None = None,
68        build_arg: str | None = None,
69    ) -> str:
70        """Returns the name of a third-party module.
71
72        Exactly one of the "label", "repo" or "build_arg" keyword arguments must
73        be provided.
74
75        Args:
76            label:      Bazel label referring to the third party module.
77            repo:       Bazel repository of the third party module,
78                        e.g. "com_google_foo".
79            build_arg:  GN build argument of the third party module,
80                        e.g. "$dir_pw_third_party_foo".
81        """
82        if label:
83            assert not repo, 'multiple keyword arguments provided'
84            repo = label.repo()
85        assert not repo or not build_arg, 'multiple keyword arguments provided'
86        try:
87            if repo:
88                return self._names_by_repo[repo]
89            if build_arg:
90                return self._names_by_build_arg[build_arg]
91            raise AssertionError('no keyword arguments provided')
92        except KeyError as e:
93            raise ParseError(
94                f'unrecognized third party module: "{e.args[0]}"; '
95                'does it have a bazel_to_gn.json file?'
96            )
97
98    def get_source_dir(self, name: str) -> PurePath:
99        """Returns the source directory for a third-party module.
100
101        Args:
102            name: Name of the third party module.
103        """
104        build_arg = self._build_arg(name)
105        source_dir = self._source_dirs.get(build_arg)
106        if not source_dir:
107            raise KeyError(f'GN build argument not set: "{build_arg}"')
108        return source_dir
109
110    def parse_args_gn(self, file: IO) -> None:
111        """Reads third party build arguments from args.gn.
112
113        Args:
114            file: File-like object to read from.
115        """
116        build_arg_pat = r'(dir_pw_third_party_\S*)\s*=\s*"([^"]*)"'
117        for line in file:
118            match = re.search(build_arg_pat, line)
119            if match:
120                build_arg = f'${match.group(1)}'
121                source_dir = PurePath(match.group(2))
122                self._source_dirs[build_arg] = source_dir
123
124    def load_workspace(self, name: str, bazel_to_gn: IO) -> None:
125        """Parses a bazel_to_gn.json file and loads the workspace it describes.
126
127        Recognized fields include:
128            repo:     The Bazel name of the repository.
129            generate: Disables generating GN if present and set to `False`.
130            targets:  A list of Bazel labels to generate GN for.
131            options:  A dictionary of mapping Bazel flags to build settings.
132
133        Args:
134            name: Name of a third party module.
135            bazel_to_gn: A file-like object describing the Bazel workspace.
136        """
137        json_data = json.load(bazel_to_gn)
138        generate = json_data.get('generate', True)
139        source_dir = None
140        if generate:
141            source_dir = self.get_source_dir(name)
142        repo = json_data['repo']
143        workspace = BazelWorkspace(repo, source_dir)
144        workspace.generate = generate
145        workspace.targets = json_data.get('targets', [])
146        workspace.options = json_data.get('options', {})
147        self._names_by_repo[repo] = name
148        self._workspaces[name] = workspace
149
150    def get_initial_targets(self, name: str) -> list[BazelLabel]:
151        """Adds labels from a third party module to the converter queue.
152
153        Returns the number of labels added.
154
155        Args:
156            name: Name of a previously loaded repo.
157        """
158        workspace = self._workspaces[name]
159        repo = workspace.repo()
160        self._loaded = set(workspace.targets)
161        return [BazelLabel(short, repo=repo) for short in self._loaded]
162
163    def pending(self) -> Iterable[BazelLabel]:
164        """Returns the label for the next rule that needs to be loaed."""
165        while self._pending:
166            label = self._pending.popleft()
167            if str(label) in self._loaded:
168                continue
169            self._loaded.add(str(label))
170            yield label
171
172    def load_rules(self, labels: list[BazelLabel]) -> Iterable[BazelRule]:
173        """Queries a Bazel workspace to instantiate a rule.
174
175        Return `None` if the GN files for the workspace are manually
176        generated, otherwise returns the rule. Adds the rules deps to the queue
177        of pending labels to be loaded.
178
179        Args:
180            label:  The Bazel label indicating the workspace and target.
181        """
182        by_repo: dict[str, list[BazelLabel]] = defaultdict(list)
183        deps: Set[str] = set()
184        for label in labels:
185            by_repo[label.repo()].append(label)
186        for repo, labels_for_repo in by_repo.items():
187            name = self.get_name(repo=repo)
188            workspace = self._workspaces[name]
189            for rule in workspace.get_rules(labels_for_repo):
190                label = rule.label()
191                package = label.package()
192                for attr_name in ['deps', 'implementation_deps']:
193                    for dep in rule.get_list(attr_name):
194                        label = BazelLabel(dep, repo=repo, package=package)
195                        deps.add(str(label))
196                yield rule
197        self._pending.extend([BazelLabel(dep) for dep in deps])
198
199    def package(self, rule: BazelRule) -> str:
200        """Returns the relative path to the BUILD.gn corresponding to a rule.
201
202        The relative path is relative to $dir_pw_third_party, and consists of
203        the third party module name and the package portion of the Bazel label.
204
205        Args:
206            rule: The rule to get the relative path for.
207        """
208        label = rule.label()
209        name = self.get_name(label=label)
210        return f'{name}/{label.package()}'
211
212    def convert_rule(self, rule: BazelRule) -> GnTarget:
213        """Creates a GN target from a Bazel rule.
214
215        Args:
216            rule: The rule to convert into a GnTarget.
217        """
218        label = rule.label()
219        name = self.get_name(label=label)
220        if rule.kind() == 'cc_library':
221            if rule.get_bool('linkstatic'):
222                target_type = f'{name}_static_library'.replace('-', '_')
223            else:
224                target_type = f'{name}_source_set'.replace('-', '_')
225        else:
226            raise ParseError(f'unsupported Bazel kind: {rule.kind()}')
227        gn_target = GnTarget(target_type, label.target())
228        gn_target.origin = str(label)
229        gn_target.attrs = {
230            'public': list(self._source_relative(name, rule, 'hdrs')),
231            'sources': list(self._source_relative(name, rule, 'srcs')),
232            'inputs': list(
233                self._source_relative(name, rule, 'additional_linker_inputs')
234            ),
235            'include_dirs': list(self._source_relative(name, rule, 'includes')),
236            'cflags': rule.get_list('copts'),
237            'public_defines': rule.get_list('defines'),
238            'ldflags': rule.get_list('linkopts'),
239            'defines': rule.get_list('local_defines'),
240            'public_deps': list(self._build_relative(name, rule, 'deps')),
241            'deps': list(
242                self._build_relative(name, rule, 'implementation_deps')
243            ),
244        }
245
246        return gn_target
247
248    def num_loaded(self) -> int:
249        """Returns the number of rules loaded thus far."""
250        return len(self._loaded)
251
252    def get_workspace_revisions(self) -> Iterable[str]:
253        """Returns the revisions needed by each generated workspace."""
254        for name, workspace in self._workspaces.items():
255            if name == 'pigweed':
256                continue
257            if workspace.generate:
258                yield f'{name:<16}: {workspace.revision()}'
259
260    def update_pw_package(
261        self, name: str, lines: Iterator[str]
262    ) -> Iterable[str]:
263        """Updates the third party package revision in the pw_package module.
264
265        Args:
266            lines: Contents of the existing pw_package package file.
267        """
268        workspace = self._workspaces[name]
269        if name in self._revisions:
270            revision = self._revisions[name]
271        else:
272            revision = workspace.revision('HEAD')
273        for line in lines:
274            line = line.rstrip()
275            m = re.match(r'(.*commit=[\'"])([a-z0-9]*)([\'"],.*)', line)
276            if not m:
277                yield line
278                continue
279            current = m.group(2)
280            if workspace.timestamp(current) < workspace.timestamp(revision):
281                yield f'{m.group(1)}{revision}{m.group(3)}'
282            else:
283                yield line
284        yield ''
285
286    def get_imports(self, gn_target: GnTarget) -> Iterable[str]:
287        """Returns the GNI files needed by the given target."""
288        for build_arg in gn_target.build_args():
289            name = self.get_name(build_arg=build_arg)
290            yield f'$dir_pw_third_party/{name}/{name}.gni'
291
292    def update_doc_rst(self, name: str, lines: Iterator[str]) -> Iterable[str]:
293        """Replaces the "Version" part of docs.rst with the latest revision.
294
295        This will truncate everything after the "generated section" comment and
296        add the comment and version information. If the file does not have the
297        comment, the comment and information will appended to the end of the
298        file.
299
300        Args:
301            lines: Iterator of lines.
302        """
303        workspace = self._workspaces[name]
304        comment = '.. DO NOT EDIT BELOW THIS LINE. Generated section.'
305        url = workspace.url().rstrip('.git')
306        revision = workspace.revision()
307        short = revision[:8]
308        for line in lines:
309            line = line.rstrip()
310            if line == comment:
311                break
312            yield line
313        yield comment
314        yield ''
315        yield 'Version'
316        yield '======='
317        yield f'The update script was last run for revision `{short}`_.'
318        yield ''
319        yield f'.. _{short}: {url}/tree/{revision}'
320        yield ''
321
322    def _build_arg(self, name: str) -> str:
323        """Returns the GN build argument for a third party module."""
324        build_arg = f'$dir_pw_third_party_{name}'.replace('-', '_')
325        if build_arg not in self._names_by_build_arg:
326            self._names_by_build_arg[build_arg] = name
327        return build_arg
328
329    def _source_relative(
330        self, name: str, rule: BazelRule, attr_name: str
331    ) -> Iterable[str]:
332        """Provides GN paths relative to the third party source directory."""
333        if not rule.has_attr(attr_name):
334            return
335        attr_type = rule.attr_type(attr_name)
336        build_arg = self._build_arg(name)
337        repo = rule.label().repo()
338        if attr_type == 'string_list':
339            for item in rule.get_list(attr_name):
340                yield f'{build_arg}/{item}'
341        elif attr_type == 'label_list':
342            for item in rule.get_list(attr_name):
343                label = BazelLabel(item, repo=repo)
344                yield f'{build_arg}/{label.package()}/{label.target()}'
345        else:
346            raise ParseError(f'unknown attribute type: {attr_type}')
347
348    def _build_relative(
349        self, name: str, rule: BazelRule, attr_name: str
350    ) -> Iterable[str]:
351        """Provides GN labels relative to the directory under //third_party."""
352        label = rule.label()
353        repo = label.repo()
354        for other_str in rule.get_list(attr_name):
355            other = BazelLabel(other_str, repo=repo, package=label.package())
356            package = f'{name}/{label.package()}'
357            other_package = f'{self.get_name(label=other)}/{other.package()}'
358
359            # Abbreviate the label only if it is part of the same repo.
360            if label.repo() != other.repo():
361                path = PurePosixPath('$dir_pw_third_party', other_package)
362            elif other_package == package:
363                path = None
364            else:
365                path = PurePosixPath(package)
366                other_path = PurePosixPath(other_package)
367                common = PurePosixPath(
368                    *os.path.commonprefix([path.parts, other_path.parts])
369                )
370                walk_up = PurePosixPath(
371                    *(['..'] * (len(path.parts) - len(common.parts)))
372                )
373                walk_down = other_path.relative_to(common)
374                path = PurePosixPath(walk_up, walk_down)
375
376            if not path:
377                yield f':{other.target()}'
378            elif path.name == other.target():
379                yield f'{path}'
380            else:
381                yield f'{path}:{other.target()}'
382
383    def _get_http_archives(self) -> dict[str, BazelRule]:
384        """Returns a mapping of third party modules to rules.
385
386        The returned rules described the most recently required version of the
387        third party module.
388        """
389        # First, examine http_archives in the third_party workspaces.
390        http_archives = {}
391        for name, workspace in self._workspaces.items():
392            if name == 'pigweed':
393                continue
394            if not workspace.generate:
395                continue
396            for rule in workspace.get_http_archives():
397                repo = rule.label().target()
398                if repo not in self._names_by_repo:
399                    continue
400                other_name = self._names_by_repo[repo]
401                other = self._workspaces[other_name]
402                if not other.generate:
403                    continue
404                tag = rule.get_str('strip_prefix').replace(f'{other_name}-', '')
405                revision = other.revision(tag)
406                timestamp = other.timestamp(revision)
407                if other_name in self._revisions:
408                    strictest = other.timestamp(self._revisions[other_name])
409                    keep = strictest < timestamp
410                else:
411                    keep = True
412                if keep:
413                    http_archives[repo] = rule
414                    self._revisions[other_name] = revision
415
416        # Next, compare them to those in the WORKSPACE file.
417        pigweed = self._workspaces['pigweed']
418        for rule in pigweed.get_http_archives():
419            repo = rule.label().target()
420            if repo not in self._names_by_repo:
421                continue
422            name = self._names_by_repo[repo]
423            workspace = self._workspaces[name]
424            if not workspace.generate:
425                continue
426            if name not in self._revisions:
427                old_rev = rule.get_str('strip_prefix').replace(f'{name}-', '')
428                new_rev = workspace.revision('HEAD')
429                rule.set_attr('strip_prefix', f'{name}-{new_rev}')
430                if rule.has_attr('url'):
431                    url = rule.get_str('url')
432                    rule.set_attr('url', url.replace(old_rev, new_rev))
433                if rule.has_attr('urls'):
434                    urls = rule.get_list('urls')
435                    urls = [url.replace(old_rev, new_rev) for url in urls]
436                    rule.set_attr('urls', urls)
437                keep = True
438            else:
439                tag = rule.get_str('strip_prefix').replace(f'{name}-', '')
440                new_rev = workspace.revision(tag)
441                timestamp = workspace.timestamp(new_rev)
442                strictest = workspace.timestamp(self._revisions[name])
443                keep = strictest < timestamp
444            if keep:
445                http_archives[repo] = rule
446                self._revisions[name] = new_rev
447
448        # Next, check that the current revisions satisfy the strict revisions.
449        for name, workspace in self._workspaces.items():
450            if name not in self._revisions:
451                continue
452            needed = workspace.timestamp(self._revisions[name])
453            actual = workspace.timestamp('HEAD')
454            if actual < needed:
455                raise RuntimeError(f'{name} must be from after {needed}.')
456
457        # Finally, return the mapping.
458        return http_archives
459
460
461def _parse_args() -> argparse.Namespace:
462    """Parse arguments."""
463    parser = argparse.ArgumentParser(description=__doc__)
464    parser.add_argument(
465        '-b',
466        '--build_dir',
467        type=PurePath,
468        help=('Build output directory, which must contain "args.gn"'),
469    )
470    parser.add_argument(
471        'names',
472        type=str,
473        nargs='+',
474        help=(
475            'Third-party dependencies to generate GN for. '
476            'Must match a subdirectoy of $PW_ROOT/third_party'
477        ),
478    )
479    args = parser.parse_args()
480
481    if not args.build_dir:
482        pw_root = os.getenv('PW_ROOT')
483        if not pw_root:
484            raise RuntimeError('PW_ROOT is not set')
485        args.build_dir = PurePath(pw_root, 'out')
486
487    if not args.build_dir.is_absolute():
488        args.build_dir = args.pw_root.joinpath(args.build_dir)
489
490    return args
491
492
493def _overprint(msg: str) -> None:
494    """Prints with a carriage return instead of a newline."""
495    print(msg.ljust(80), end='\r', flush=True)
496
497
498def _bazel_to_gn(args: argparse.Namespace) -> None:
499    """Generates BUILD.gn files from rules in Bazel workspace.
500
501    This script is intended to be as unit-testable as possible. As a result,
502    most functionality has been pushed into testable methods of
503    BazelToGnConverter.
504
505    This method primarily consists of three things:
506      1. Print statements to provide feedback to the user.
507      2. File operations, to make subroutines more unit testable.
508      3. Control flow and loops around the two previous categories.
509
510    Args:
511        args: Script arguments. See `_parse_args`.
512    """
513    build_dir = Path(args.build_dir)
514    b2g = BazelToGnConverter(build_dir.parent)
515
516    args_gn_path = build_dir.joinpath('args.gn')
517    print(f'Reading build arguments from {args_gn_path}...')
518    with open(args_gn_path) as args_gn:
519        b2g.parse_args_gn(args_gn)
520
521    print('Converting Bazel rules and their dependencies to GN targets...')
522    third_party_path = Path(build_dir.parent, 'third_party')
523    for child in third_party_path.iterdir():
524        try:
525            if child.is_dir():
526                with open(child.joinpath('bazel_to_gn.json')) as file:
527                    b2g.load_workspace(child.name, file)
528                print(f'Bazel workspace loaded for //third_party/{child.name}')
529        except FileNotFoundError:
530            pass
531
532    print('Starting from:')
533    for name in args.names:
534        try:
535            labels = b2g.get_initial_targets(name)
536        except KeyError:
537            print(f'E: Unable to get initial targets for "{name}".')
538            print(f'E: Is "//third_party/{name}/bazel_to_gn.json" missing?')
539            return
540        print(f'  {len(labels)} initial rule(s) in {name}')
541
542    by_package: dict[str, list[GnTarget]] = defaultdict(list)
543    while labels:
544        for rule in b2g.load_rules(labels):
545            by_package[b2g.package(rule)].append(b2g.convert_rule(rule))
546            _overprint(f'[{b2g.num_loaded()}] {rule.label()}')
547        labels = list(b2g.pending())
548    print(f'[{b2g.num_loaded()}] Conversion complete!'.ljust(80))
549
550    for package, gn_targets in sorted(by_package.items()):
551        build_gn_path = third_party_path.joinpath(package, 'BUILD.gn')
552        imports = set().union(
553            *[b2g.get_imports(gn_target) for gn_target in gn_targets]
554        )
555        _overprint(f'Writing {build_gn_path}...')
556        with GnFile(build_gn_path) as build_gn:
557            build_gn.write_file(imports, gn_targets)
558
559    names = {package.split('/')[0] for package in by_package.keys()}
560
561    for name in names:
562        update_path = build_dir.parent.joinpath(
563            'pw_package',
564            'py',
565            'pw_package',
566            'packages',
567            name.replace('-', '_') + '.py',
568        )
569        _overprint(f'Updating {update_path}...')
570        with open(update_path, 'r') as pkg_file:
571            contents = '\n'.join(b2g.update_pw_package(name, pkg_file))
572        with open(update_path, 'w') as pkg_file:
573            pkg_file.write(contents)
574        print(f'Updating {update_path}  with current revision.')
575
576    for name in names:
577        update_path = third_party_path.joinpath(name, 'docs.rst')
578        _overprint(f'Updating {update_path}...')
579        with open(update_path, 'r') as docs_rst:
580            contents = '\n'.join(b2g.update_doc_rst(name, docs_rst))
581        with open(update_path, 'w') as docs_rst:
582            docs_rst.write(contents)
583        print(f'Updated {update_path} with current revision.')
584
585    print('Done!')
586
587    print(
588        'Make sure to update your WORKSPACE file to fetch the following '
589        + 'revisions or later:'
590    )
591    for revision in b2g.get_workspace_revisions():
592        print(revision)
593
594
595if __name__ == '__main__':
596    _bazel_to_gn(_parse_args())
597