1#!/usr/bin/env python3 2# Copyright 2024 The ChromiumOS Authors 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Fetches the size diff between two images on gs://. 7 8If given a CL, this will autodetect a passing CQ builder on that CL and find 9a corresponding release build for said CQ builder. The sizes of these images 10will be compared. 11 12**Please note** that there's often version skew between release builds and CQ 13builds. While this skew shouldn't result in _huge_ binary size differences, 14it can still account for a few MB of diff in an average case. 15""" 16 17import abc 18import argparse 19import dataclasses 20import json 21import logging 22import os 23from pathlib import Path 24import subprocess 25import sys 26import tempfile 27from typing import List, Optional, Tuple 28 29import cros_cls 30 31 32@dataclasses.dataclass(frozen=True) 33class SizeDiffInfo: 34 """Holds information about a size difference.""" 35 36 baseline_size_bytes: int 37 new_size_bytes: int 38 39 40class ComparableArtifact(abc.ABC): 41 """Artifacts from CQ runs that can be compared.""" 42 43 @property 44 @abc.abstractmethod 45 def artifact_name(self) -> str: 46 """Returns the name of the artifact in gs:// e.g., "image.zip".""" 47 48 @abc.abstractmethod 49 def _measure_artifact_size(self, file: Path) -> int: 50 """Given a path to the artifact, extract the relevant size info. 51 52 The directory that `file` is in may be mutated by this function. No 53 guarantees are made about the state of said directory after execution 54 finishes, except that `file` should remain unmodified. 55 """ 56 57 def _download_and_measure_size(self, gs_url: str) -> int: 58 with tempfile.TemporaryDirectory( 59 prefix="fetch_size_diff_" 60 ) as tempdir_str: 61 into = Path(tempdir_str) 62 local_file = into / os.path.basename(gs_url) 63 subprocess.run( 64 ["gsutil", "cp", gs_url, local_file], 65 check=True, 66 stdin=subprocess.DEVNULL, 67 ) 68 return self._measure_artifact_size(local_file) 69 70 def compare_size_from_gs(self, baseline: str, new: str) -> SizeDiffInfo: 71 return SizeDiffInfo( 72 baseline_size_bytes=self._download_and_measure_size(baseline), 73 new_size_bytes=self._download_and_measure_size(new), 74 ) 75 76 77class DebugInfoArtifact(ComparableArtifact): 78 """ComparableArtifact instance for debuginfo.""" 79 80 @property 81 def artifact_name(self) -> str: 82 return "debug.tgz" 83 84 def _measure_artifact_size(self, file: Path) -> int: 85 chrome_debug = "./opt/google/chrome/chrome.debug" 86 logging.info("Unpacking debuginfo...") 87 subprocess.run( 88 ["tar", "xaf", file, chrome_debug], 89 check=True, 90 cwd=file.parent, 91 stdin=subprocess.DEVNULL, 92 ) 93 return os.path.getsize(file.parent / chrome_debug) 94 95 96class ImageSizeArtifact(ComparableArtifact): 97 """ComparableArtifact instance for image files.""" 98 99 @property 100 def artifact_name(self) -> str: 101 return "image.zip" 102 103 def _measure_artifact_size(self, file: Path) -> int: 104 binpkg_sizes_name = "chromiumos_base_image.bin-package-sizes.json" 105 subprocess.run( 106 [ 107 "unzip", 108 file.name, 109 binpkg_sizes_name, 110 ], 111 check=True, 112 cwd=file.parent, 113 stdin=subprocess.DEVNULL, 114 ) 115 with (file.parent / binpkg_sizes_name).open(encoding="utf-8") as f: 116 loaded = json.load(f) 117 try: 118 size = loaded["total_size"] 119 except KeyError: 120 raise ValueError(f"Missing total_size in {loaded.keys()}") 121 122 if not isinstance(size, int): 123 raise ValueError( 124 f"total_size was unexpectedly {type(size)}: {size}" 125 ) 126 return size 127 128 129def is_probably_non_production_builder(builder_name: str) -> bool: 130 """Quickly determine if a builder doesn't represent a board in production. 131 132 Note that this is a heuristic; results should be taken as mostly accurate. 133 """ 134 return any( 135 x in builder_name 136 for x in ( 137 "-asan-", 138 "-buildtest-", 139 "-fuzzer-", 140 "-kernelnext-", 141 "-ubsan-", 142 "-vmtest-", 143 ) 144 ) 145 146 147def guess_release_artifact_path(artifact_link: str) -> Optional[str]: 148 """Guesses a close-enough release path for a CQ artifact. 149 150 Returns: 151 A path to the release artifact. Returns None if the given image_zip 152 wasn't generated by a CQ builder. 153 154 >>> guess_release_artifact_path("gs://chromeos-image-archive/brya-cq/" 155 "R121-15677.0.0-90523-8764532770258575633/image.zip") 156 "gs://chromeos-image-archive/brya-release/R121-15677.0.0/image.zip" 157 """ 158 artifacts_link = os.path.dirname(artifact_link) 159 release_version = cros_cls.parse_release_from_builder_artifacts_link( 160 artifacts_link 161 ) 162 # Scrape the board name from a level above the artifacts directory. 163 builder = os.path.basename(os.path.dirname(artifacts_link)) 164 if not builder.endswith("-cq"): 165 return None 166 board = builder[:-3] 167 return ( 168 f"gs://chromeos-image-archive/{board}-release/{release_version}/" 169 f"{os.path.basename(artifact_link)}" 170 ) 171 172 173def try_gsutil_ls(paths: List[str]) -> List[str]: 174 """Returns all of the paths `gsutil` matches from `paths`. 175 176 Ignores errors from gsutil about paths not existing. 177 """ 178 result = subprocess.run( 179 ["gsutil", "-m", "ls"] + paths, 180 # If any URI doesn't exist, gsutil will fail. Ignore the failure. 181 check=False, 182 encoding="utf-8", 183 stdin=subprocess.DEVNULL, 184 stdout=subprocess.PIPE, 185 stderr=subprocess.PIPE, 186 ) 187 if result.returncode: 188 # Ensure the error message is what's expected, rather than e.g., 189 # invalid credentials. 190 err_msg = "CommandException: One or more URLs matched no objects" 191 if err_msg not in result.stderr: 192 logging.error( 193 "gsutil had unexpected output; stderr: %r", result.stderr 194 ) 195 result.check_returncode() 196 return [x.strip() for x in result.stdout.splitlines()] 197 198 199def find_size_diffable_cq_artifacts( 200 cq_build_ids: List[cros_cls.BuildID], 201 artifact_name: str, 202) -> Optional[Tuple[str, str]]: 203 """Searches the cq-orchestrator builds for candidates for size comparison. 204 205 Returns: 206 None if no candidates are found. Otherwise, returns a two-tuple: index 207 0 is the baseline (release) artifact, index 1 is the corresponding 208 artifact generated by the CQ. 209 """ 210 for cq_build_id in cq_build_ids: 211 logging.info("Inspecting CQ build %d...", cq_build_id) 212 orch_output = cros_cls.CQOrchestratorOutput.fetch(cq_build_id) 213 child_builder_values = cros_cls.CQBoardBuilderOutput.fetch_many( 214 [ 215 val 216 for name, val in orch_output.child_builders.items() 217 if not is_probably_non_production_builder(name) 218 ] 219 ) 220 artifacts_links = [ 221 x.artifacts_link 222 for x in child_builder_values 223 if x.artifacts_link is not None 224 ] 225 if not artifacts_links: 226 logging.info("No children of CQ run %d had artifacts", cq_build_id) 227 continue 228 229 potential_artifacts = try_gsutil_ls( 230 [os.path.join(x, artifact_name) for x in artifacts_links] 231 ) 232 if not potential_artifacts: 233 logging.info( 234 "No children of CQ run %d produced a(n) %s", 235 cq_build_id, 236 artifact_name, 237 ) 238 continue 239 240 logging.debug( 241 "Found candidate %s files: %s", artifact_name, potential_artifacts 242 ) 243 guessed_paths = [ 244 (x, guess_release_artifact_path(x)) for x in potential_artifacts 245 ] 246 logging.debug("Guessed corresponding artifact files: %s", guessed_paths) 247 release_artifacts = try_gsutil_ls([x for _, x in guessed_paths if x]) 248 if not release_artifacts: 249 logging.info( 250 "No release %s artifacts could be found for CQ builder %d.", 251 artifact_name, 252 cq_build_id, 253 ) 254 continue 255 256 # `try_gsutil_ls` makes no ordering guarantees; always pick the min() 257 # artifact here for consistency across reruns. 258 selected_release_artifact = min(release_artifacts) 259 logging.info("Selected release artifact: %s", selected_release_artifact) 260 cq_artifact = next( 261 cq_path 262 for cq_path, guessed_path in guessed_paths 263 if guessed_path == selected_release_artifact 264 ) 265 return selected_release_artifact, cq_artifact 266 return None 267 268 269def inspect_gs_impl( 270 baseline_gs_url: str, new_gs_url: str, artifact: ComparableArtifact 271) -> None: 272 """Compares the `image.zip`s at the given URLs, logging the results.""" 273 size_diff = artifact.compare_size_from_gs(baseline_gs_url, new_gs_url) 274 # `%d` doesn't support `,` as a modifier, and commas make these numbers 275 # much easier to read. Prefer to keep strings interpreted as format strings 276 # constant. 277 logging.info("Baseline size: %s", f"{size_diff.baseline_size_bytes:,}") 278 logging.info("New size: %s", f"{size_diff.new_size_bytes:,}") 279 280 diff_pct = abs(size_diff.new_size_bytes / size_diff.baseline_size_bytes) - 1 281 logging.info("Diff: %.2f%%", diff_pct * 100) 282 283 284def inspect_cl(opts: argparse.Namespace, artifact: ComparableArtifact) -> None: 285 """Implements the `cl` subcommand of this script.""" 286 cq_build_ids = cros_cls.fetch_cq_orchestrator_ids(opts.cl) 287 if not cq_build_ids: 288 sys.exit(f"No completed cq-orchestrators found for {opts.cl}") 289 290 # Reverse cq_build_ids so we try the newest first. 291 diffable_artifacts = find_size_diffable_cq_artifacts( 292 cq_build_ids, artifact.artifact_name 293 ) 294 if not diffable_artifacts: 295 sys.exit("No diffable artifacts were found") 296 297 baseline, new = diffable_artifacts 298 logging.info("Comparing %s (baseline) to %s (new)", baseline, new) 299 inspect_gs_impl(baseline, new, artifact) 300 logging.warning( 301 "Friendly reminder: CL inspection diffs between your CL and a " 302 "corresponding release build. Size differences up to a few megabytes " 303 "are expected and do not necessarily indicate a size difference " 304 "attributable to your CL." 305 ) 306 307 308def inspect_gs(opts: argparse.Namespace, artifact: ComparableArtifact) -> None: 309 """Implements the `gs` subcommand of this script.""" 310 inspect_gs_impl(opts.baseline, opts.new, artifact) 311 312 313def main(argv: List[str]) -> None: 314 parser = argparse.ArgumentParser( 315 description=__doc__, 316 formatter_class=argparse.RawDescriptionHelpFormatter, 317 ) 318 what_to_compare = parser.add_mutually_exclusive_group(required=True) 319 what_to_compare.add_argument( 320 "--image", action="store_true", help="Compare image.zip sizes." 321 ) 322 what_to_compare.add_argument( 323 "--debuginfo", action="store_true", help="Compare debuginfo sizes." 324 ) 325 326 parser.add_argument( 327 "--debug", action="store_true", help="Enable debug logging" 328 ) 329 subparsers = parser.add_subparsers(required=True) 330 331 cl_parser = subparsers.add_parser( 332 "cl", help="Inspect a CL's CQ runs to find artifacts to compare." 333 ) 334 cl_parser.set_defaults(func=inspect_cl) 335 cl_parser.add_argument( 336 "cl", 337 type=cros_cls.ChangeListURL.parse_with_patch_set, 338 help="CL to inspect CQ runs of. This must contain a patchset number.", 339 ) 340 341 gs_parser = subparsers.add_parser( 342 "gs", help="Directly compare two zip files from gs://." 343 ) 344 gs_parser.add_argument("baseline", help="Baseline file to compare.") 345 gs_parser.add_argument("new", help="New file to compare.") 346 gs_parser.set_defaults(func=inspect_gs) 347 opts = parser.parse_args(argv) 348 349 logging.basicConfig( 350 format=">> %(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: " 351 "%(message)s", 352 level=logging.DEBUG if opts.debug else logging.INFO, 353 ) 354 355 assert getattr(opts, "func", None), "Unknown subcommand?" 356 if opts.image: 357 artifact: ComparableArtifact = ImageSizeArtifact() 358 else: 359 assert opts.debuginfo 360 artifact = DebugInfoArtifact() 361 362 opts.func(opts, artifact) 363 364 365if __name__ == "__main__": 366 main(sys.argv[1:]) 367