xref: /aosp_15_r20/external/executorch/build/extract_sources.py (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1#!/usr/bin/env python3
2# Copyright (c) Meta Platforms, Inc. and affiliates.
3# All rights reserved.
4#
5# This source code is licensed under the BSD-style license found in the
6# LICENSE file in the root directory of this source tree.
7
8import argparse
9import copy
10import os
11import re
12
13from enum import Enum
14from typing import Any, List, Optional, Sequence
15
16from buck_util import Buck2Runner
17
18try:
19    import tomllib  # Standard in 3.11 and later
20except ModuleNotFoundError:
21    import tomli as tomllib
22
23"""Extracts source lists from the buck2 build system and writes them to a file.
24
25The config file is in TOML format and should contains one or more
26`[targets.<target-name>]` entries, along with an optional `[target_base]` entry.
27
28All of these may have the following lists of strings:
29- buck_targets: The list of buck targets that map to `<target-name>`.
30- deps: A list of other `<target-name>` entries that this target depends on.
31  Used to prune sources that are provided by those other targets.
32- filters: A list of regular expressions. This tool will only emit source files
33  whose relative paths match all entries.
34- excludes: A list of regular expressions. This tool will not emit source files
35  whose relative paths match any entry.
36
37The special `[target_base]` entry provides default lists that are inherited by
38the `[target.<target-name>]` entries. When the `[target.<target-name>]` entry defines
39a key that is already present in `[target_base]`, the target-specific entries are
40appended to the base list.
41
42Example config:
43
44    [target_base]
45    excludes = [
46    "^third-party",
47    ]
48
49    [targets.schema]
50    buck_targets = [
51    "//schema:schema",
52    ]
53    filters = [
54    ".fbs$",
55    ]
56
57    [targets.executorch]
58    buck_targets = [
59    "//runtime/executor:program",
60    ]
61    deps = [
62    "schema",
63    ]
64    filters = [
65    ".cpp$",
66    ]
67"""
68
69
70class Target:
71    """Parsed [targets.*] entry from the TOML file.
72
73    Can query buck for its list of source files.
74    """
75
76    class _InitState(Enum):
77        UNINITIALIZED = 0
78        INITIALIZING = 1
79        READY = 2
80
81    def __init__(
82        self,
83        name: str,
84        target_dict: dict[str, Sequence[str]],
85        base_dict: Optional[dict] = None,
86    ) -> None:
87        self._state: Target._InitState = Target._InitState.UNINITIALIZED
88        self._sources = frozenset()
89
90        self.name = name
91        # Extend the base lists with the target-specific entries.
92        self._config = copy.deepcopy(base_dict or {})
93        for k, v in target_dict.items():
94            if k in self._config:
95                self._config[k].extend(v)
96            else:
97                self._config[k] = v
98
99    def get_sources(
100        self, graph: "Graph", runner: Buck2Runner, buck_args: Optional[List[str]]
101    ) -> frozenset[str]:
102        if buck_args is None:
103            buck_args = []
104
105        if self._state == Target._InitState.READY:
106            return self._sources
107        # Detect cycles.
108        assert self._state != Target._InitState.INITIALIZING
109
110        # Assemble the query.
111        query = "inputs({})".format(
112            "+".join(
113                [
114                    "deps('{}')".format(target)
115                    for target in self._config.get("buck_targets", [])
116                ]
117            )
118        )
119
120        # Get the complete list of source files that this target depends on.
121        sources: set[str] = set(runner.run(["cquery", query] + buck_args))
122
123        # Keep entries that match all of the filters.
124        filters = [re.compile(p) for p in self._config.get("filters", [])]
125        sources = {s for s in sources if all(p.search(s) for p in filters)}
126
127        # Remove entries that match any of the excludes.
128        excludes = [re.compile(p) for p in self._config.get("excludes", [])]
129        sources = {s for s in sources if not any(p.search(s) for p in excludes)}
130
131        # The buck query will give us the complete list of sources that this
132        # target depends on, but that list includes sources that are owned by
133        # its deps. Remove entries that are already covered by the transitive
134        # set of dependencies.
135        for dep in self._config.get("deps", []):
136            sources.difference_update(
137                graph.by_name[dep].get_sources(graph, runner, buck_args)
138            )
139
140        self._sources = frozenset(sources)
141        self._state = Target._InitState.READY
142        return self._sources
143
144
145class Graph:
146    """Graph of targets."""
147
148    def __init__(self, config_dict: dict[str, Any]) -> None:
149        base = config_dict.get("target_base", {})
150        targets = config_dict.get("targets", {})
151
152        self.by_name = {}
153        for k, v in targets.items():
154            self.by_name[k] = Target(k, v, base)
155
156
157def parse_args() -> argparse.Namespace:
158    parser = argparse.ArgumentParser(
159        description="Extracts deps from the buck2 build system",
160    )
161    parser.add_argument(
162        "--buck2",
163        default="buck2",
164        help="'buck2' command to use",
165    )
166    parser.add_argument(
167        "--config",
168        metavar="config.toml",
169        required=True,
170        help="Path to the input TOML configuration file",
171    )
172    parser.add_argument(
173        "--format",
174        default="cmake",
175        choices=["cmake"],
176        help="Format to generate.",
177    )
178    parser.add_argument(
179        "--out",
180        metavar="file",
181        help="Path to the file to generate.",
182    )
183    parser.add_argument(
184        "--target-platforms", help="--target-platforms to pass to buck cquery, if any."
185    )
186    return parser.parse_args()
187
188
189def generate_cmake(target_to_srcs: dict[str, list[str]]) -> bytes:
190    lines: list[str] = []
191    lines.append("# @" + f"generated by {os.path.basename(__file__)}")
192    for target, srcs in target_to_srcs.items():
193        lines.append("")
194        lines.append(f"set(_{target}__srcs")
195        for src in srcs:
196            lines.append(f"    {src}")
197        lines.append(")")
198    return "\n".join(lines).encode("utf-8")
199
200
201def main():
202    args = parse_args()
203
204    # Load and parse the TOML configuration
205    with open(args.config, mode="rb") as fp:
206        config_dict = tomllib.load(fp)
207    graph = Graph(config_dict)
208
209    # Run the queries and get the lists of source files.
210    target_to_srcs: dict[str, list[str]] = {}
211    runner: Buck2Runner = Buck2Runner(args.buck2)
212    buck_args = []
213    if args.target_platforms:
214        buck_args = ["--target-platforms"]
215        buck_args.append(args.target_platforms)
216    for name, target in graph.by_name.items():
217        target_to_srcs[name] = sorted(target.get_sources(graph, runner, buck_args))
218
219    # Generate the requested format.
220    output: bytes
221    if args.format == "cmake":
222        output = generate_cmake(target_to_srcs)
223    else:
224        raise ValueError("Unknown format: {}".format(args.format))
225
226    # Write the output.
227    with open(args.out, "wb") as fp:
228        fp.write(output)
229
230
231if __name__ == "__main__":
232    main()
233