xref: /aosp_15_r20/external/toolchain-utils/llvm_tools/fetch_cq_size_diff.py (revision 760c253c1ed00ce9abd48f8546f08516e57485fe)
1#!/usr/bin/env python3
2# Copyright 2024 The ChromiumOS Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Fetches the size diff between two images on gs://.
7
8If given a CL, this will autodetect a passing CQ builder on that CL and find
9a corresponding release build for said CQ builder. The sizes of these images
10will be compared.
11
12**Please note** that there's often version skew between release builds and CQ
13builds. While this skew shouldn't result in _huge_ binary size differences,
14it can still account for a few MB of diff in an average case.
15"""
16
17import abc
18import argparse
19import dataclasses
20import json
21import logging
22import os
23from pathlib import Path
24import subprocess
25import sys
26import tempfile
27from typing import List, Optional, Tuple
28
29import cros_cls
30
31
32@dataclasses.dataclass(frozen=True)
33class SizeDiffInfo:
34    """Holds information about a size difference."""
35
36    baseline_size_bytes: int
37    new_size_bytes: int
38
39
40class ComparableArtifact(abc.ABC):
41    """Artifacts from CQ runs that can be compared."""
42
43    @property
44    @abc.abstractmethod
45    def artifact_name(self) -> str:
46        """Returns the name of the artifact in gs:// e.g., "image.zip"."""
47
48    @abc.abstractmethod
49    def _measure_artifact_size(self, file: Path) -> int:
50        """Given a path to the artifact, extract the relevant size info.
51
52        The directory that `file` is in may be mutated by this function. No
53        guarantees are made about the state of said directory after execution
54        finishes, except that `file` should remain unmodified.
55        """
56
57    def _download_and_measure_size(self, gs_url: str) -> int:
58        with tempfile.TemporaryDirectory(
59            prefix="fetch_size_diff_"
60        ) as tempdir_str:
61            into = Path(tempdir_str)
62            local_file = into / os.path.basename(gs_url)
63            subprocess.run(
64                ["gsutil", "cp", gs_url, local_file],
65                check=True,
66                stdin=subprocess.DEVNULL,
67            )
68            return self._measure_artifact_size(local_file)
69
70    def compare_size_from_gs(self, baseline: str, new: str) -> SizeDiffInfo:
71        return SizeDiffInfo(
72            baseline_size_bytes=self._download_and_measure_size(baseline),
73            new_size_bytes=self._download_and_measure_size(new),
74        )
75
76
77class DebugInfoArtifact(ComparableArtifact):
78    """ComparableArtifact instance for debuginfo."""
79
80    @property
81    def artifact_name(self) -> str:
82        return "debug.tgz"
83
84    def _measure_artifact_size(self, file: Path) -> int:
85        chrome_debug = "./opt/google/chrome/chrome.debug"
86        logging.info("Unpacking debuginfo...")
87        subprocess.run(
88            ["tar", "xaf", file, chrome_debug],
89            check=True,
90            cwd=file.parent,
91            stdin=subprocess.DEVNULL,
92        )
93        return os.path.getsize(file.parent / chrome_debug)
94
95
96class ImageSizeArtifact(ComparableArtifact):
97    """ComparableArtifact instance for image files."""
98
99    @property
100    def artifact_name(self) -> str:
101        return "image.zip"
102
103    def _measure_artifact_size(self, file: Path) -> int:
104        binpkg_sizes_name = "chromiumos_base_image.bin-package-sizes.json"
105        subprocess.run(
106            [
107                "unzip",
108                file.name,
109                binpkg_sizes_name,
110            ],
111            check=True,
112            cwd=file.parent,
113            stdin=subprocess.DEVNULL,
114        )
115        with (file.parent / binpkg_sizes_name).open(encoding="utf-8") as f:
116            loaded = json.load(f)
117            try:
118                size = loaded["total_size"]
119            except KeyError:
120                raise ValueError(f"Missing total_size in {loaded.keys()}")
121
122            if not isinstance(size, int):
123                raise ValueError(
124                    f"total_size was unexpectedly {type(size)}: {size}"
125                )
126            return size
127
128
129def is_probably_non_production_builder(builder_name: str) -> bool:
130    """Quickly determine if a builder doesn't represent a board in production.
131
132    Note that this is a heuristic; results should be taken as mostly accurate.
133    """
134    return any(
135        x in builder_name
136        for x in (
137            "-asan-",
138            "-buildtest-",
139            "-fuzzer-",
140            "-kernelnext-",
141            "-ubsan-",
142            "-vmtest-",
143        )
144    )
145
146
147def guess_release_artifact_path(artifact_link: str) -> Optional[str]:
148    """Guesses a close-enough release path for a CQ artifact.
149
150    Returns:
151        A path to the release artifact. Returns None if the given image_zip
152        wasn't generated by a CQ builder.
153
154    >>> guess_release_artifact_path("gs://chromeos-image-archive/brya-cq/"
155        "R121-15677.0.0-90523-8764532770258575633/image.zip")
156    "gs://chromeos-image-archive/brya-release/R121-15677.0.0/image.zip"
157    """
158    artifacts_link = os.path.dirname(artifact_link)
159    release_version = cros_cls.parse_release_from_builder_artifacts_link(
160        artifacts_link
161    )
162    # Scrape the board name from a level above the artifacts directory.
163    builder = os.path.basename(os.path.dirname(artifacts_link))
164    if not builder.endswith("-cq"):
165        return None
166    board = builder[:-3]
167    return (
168        f"gs://chromeos-image-archive/{board}-release/{release_version}/"
169        f"{os.path.basename(artifact_link)}"
170    )
171
172
173def try_gsutil_ls(paths: List[str]) -> List[str]:
174    """Returns all of the paths `gsutil` matches from `paths`.
175
176    Ignores errors from gsutil about paths not existing.
177    """
178    result = subprocess.run(
179        ["gsutil", "-m", "ls"] + paths,
180        # If any URI doesn't exist, gsutil will fail. Ignore the failure.
181        check=False,
182        encoding="utf-8",
183        stdin=subprocess.DEVNULL,
184        stdout=subprocess.PIPE,
185        stderr=subprocess.PIPE,
186    )
187    if result.returncode:
188        # Ensure the error message is what's expected, rather than e.g.,
189        # invalid credentials.
190        err_msg = "CommandException: One or more URLs matched no objects"
191        if err_msg not in result.stderr:
192            logging.error(
193                "gsutil had unexpected output; stderr: %r", result.stderr
194            )
195            result.check_returncode()
196    return [x.strip() for x in result.stdout.splitlines()]
197
198
199def find_size_diffable_cq_artifacts(
200    cq_build_ids: List[cros_cls.BuildID],
201    artifact_name: str,
202) -> Optional[Tuple[str, str]]:
203    """Searches the cq-orchestrator builds for candidates for size comparison.
204
205    Returns:
206        None if no candidates are found. Otherwise, returns a two-tuple: index
207        0 is the baseline (release) artifact, index 1 is the corresponding
208        artifact generated by the CQ.
209    """
210    for cq_build_id in cq_build_ids:
211        logging.info("Inspecting CQ build %d...", cq_build_id)
212        orch_output = cros_cls.CQOrchestratorOutput.fetch(cq_build_id)
213        child_builder_values = cros_cls.CQBoardBuilderOutput.fetch_many(
214            [
215                val
216                for name, val in orch_output.child_builders.items()
217                if not is_probably_non_production_builder(name)
218            ]
219        )
220        artifacts_links = [
221            x.artifacts_link
222            for x in child_builder_values
223            if x.artifacts_link is not None
224        ]
225        if not artifacts_links:
226            logging.info("No children of CQ run %d had artifacts", cq_build_id)
227            continue
228
229        potential_artifacts = try_gsutil_ls(
230            [os.path.join(x, artifact_name) for x in artifacts_links]
231        )
232        if not potential_artifacts:
233            logging.info(
234                "No children of CQ run %d produced a(n) %s",
235                cq_build_id,
236                artifact_name,
237            )
238            continue
239
240        logging.debug(
241            "Found candidate %s files: %s", artifact_name, potential_artifacts
242        )
243        guessed_paths = [
244            (x, guess_release_artifact_path(x)) for x in potential_artifacts
245        ]
246        logging.debug("Guessed corresponding artifact files: %s", guessed_paths)
247        release_artifacts = try_gsutil_ls([x for _, x in guessed_paths if x])
248        if not release_artifacts:
249            logging.info(
250                "No release %s artifacts could be found for CQ builder %d.",
251                artifact_name,
252                cq_build_id,
253            )
254            continue
255
256        # `try_gsutil_ls` makes no ordering guarantees; always pick the min()
257        # artifact here for consistency across reruns.
258        selected_release_artifact = min(release_artifacts)
259        logging.info("Selected release artifact: %s", selected_release_artifact)
260        cq_artifact = next(
261            cq_path
262            for cq_path, guessed_path in guessed_paths
263            if guessed_path == selected_release_artifact
264        )
265        return selected_release_artifact, cq_artifact
266    return None
267
268
269def inspect_gs_impl(
270    baseline_gs_url: str, new_gs_url: str, artifact: ComparableArtifact
271) -> None:
272    """Compares the `image.zip`s at the given URLs, logging the results."""
273    size_diff = artifact.compare_size_from_gs(baseline_gs_url, new_gs_url)
274    # `%d` doesn't support `,` as a modifier, and commas make these numbers
275    # much easier to read. Prefer to keep strings interpreted as format strings
276    # constant.
277    logging.info("Baseline size: %s", f"{size_diff.baseline_size_bytes:,}")
278    logging.info("New size: %s", f"{size_diff.new_size_bytes:,}")
279
280    diff_pct = abs(size_diff.new_size_bytes / size_diff.baseline_size_bytes) - 1
281    logging.info("Diff: %.2f%%", diff_pct * 100)
282
283
284def inspect_cl(opts: argparse.Namespace, artifact: ComparableArtifact) -> None:
285    """Implements the `cl` subcommand of this script."""
286    cq_build_ids = cros_cls.fetch_cq_orchestrator_ids(opts.cl)
287    if not cq_build_ids:
288        sys.exit(f"No completed cq-orchestrators found for {opts.cl}")
289
290    # Reverse cq_build_ids so we try the newest first.
291    diffable_artifacts = find_size_diffable_cq_artifacts(
292        cq_build_ids, artifact.artifact_name
293    )
294    if not diffable_artifacts:
295        sys.exit("No diffable artifacts were found")
296
297    baseline, new = diffable_artifacts
298    logging.info("Comparing %s (baseline) to %s (new)", baseline, new)
299    inspect_gs_impl(baseline, new, artifact)
300    logging.warning(
301        "Friendly reminder: CL inspection diffs between your CL and a "
302        "corresponding release build. Size differences up to a few megabytes "
303        "are expected and do not necessarily indicate a size difference "
304        "attributable to your CL."
305    )
306
307
308def inspect_gs(opts: argparse.Namespace, artifact: ComparableArtifact) -> None:
309    """Implements the `gs` subcommand of this script."""
310    inspect_gs_impl(opts.baseline, opts.new, artifact)
311
312
313def main(argv: List[str]) -> None:
314    parser = argparse.ArgumentParser(
315        description=__doc__,
316        formatter_class=argparse.RawDescriptionHelpFormatter,
317    )
318    what_to_compare = parser.add_mutually_exclusive_group(required=True)
319    what_to_compare.add_argument(
320        "--image", action="store_true", help="Compare image.zip sizes."
321    )
322    what_to_compare.add_argument(
323        "--debuginfo", action="store_true", help="Compare debuginfo sizes."
324    )
325
326    parser.add_argument(
327        "--debug", action="store_true", help="Enable debug logging"
328    )
329    subparsers = parser.add_subparsers(required=True)
330
331    cl_parser = subparsers.add_parser(
332        "cl", help="Inspect a CL's CQ runs to find artifacts to compare."
333    )
334    cl_parser.set_defaults(func=inspect_cl)
335    cl_parser.add_argument(
336        "cl",
337        type=cros_cls.ChangeListURL.parse_with_patch_set,
338        help="CL to inspect CQ runs of. This must contain a patchset number.",
339    )
340
341    gs_parser = subparsers.add_parser(
342        "gs", help="Directly compare two zip files from gs://."
343    )
344    gs_parser.add_argument("baseline", help="Baseline file to compare.")
345    gs_parser.add_argument("new", help="New file to compare.")
346    gs_parser.set_defaults(func=inspect_gs)
347    opts = parser.parse_args(argv)
348
349    logging.basicConfig(
350        format=">> %(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: "
351        "%(message)s",
352        level=logging.DEBUG if opts.debug else logging.INFO,
353    )
354
355    assert getattr(opts, "func", None), "Unknown subcommand?"
356    if opts.image:
357        artifact: ComparableArtifact = ImageSizeArtifact()
358    else:
359        assert opts.debuginfo
360        artifact = DebugInfoArtifact()
361
362    opts.func(opts, artifact)
363
364
365if __name__ == "__main__":
366    main(sys.argv[1:])
367