1#!/usr/bin/env python3 2# Copyright 2024 The ChromiumOS Authors 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Helps reason about -Werror logs emitted by the compiler wrapper. 7 8Specifically, this works with the -Werror reports produced by the compiler 9wrapper in FORCE_DISABLE_WERROR mode. It's intended to be run on trees of these 10reports, so devs can run roughly the following commands: 11 12$ apply_force_disable_werror # (There's no actual script to do this today.) 13$ build_packages --board=foo --nousepkg 14$ ./werror_logs.py aggregate --directory=/build/foo/var/lib/chromeos 15 16And see a full aggregation of all warnings that were suppressed in that 17`build_packages` invocation. 18 19It can also be used to fetch warnings reports from CQ runs, for instance, 20$ ./werror_logs.py fetch-cq --cq-orchestrator-id=123456 21 22In this case, it downloads _all -Werror logs_ from children of the given 23cq-orchestrator, and prints the parent directory of all of these reports. If 24you run `aggregate` on this directory, it's highly recommended to use the 25`--canonicalize-board-roots` flag. 26""" 27 28import argparse 29import collections 30import dataclasses 31import json 32import logging 33import multiprocessing.pool 34import os 35from pathlib import Path 36import re 37import shutil 38import subprocess 39import sys 40import tempfile 41import threading 42from typing import Any, Counter, DefaultDict, Dict, IO, Iterable, List, Optional 43 44import cros_cls 45 46 47_DEFAULT_FETCH_DIRECTORY = Path("/tmp/werror_logs") 48 49 50def canonicalize_file_path_board_root(file_path: str) -> str: 51 # Get rid of double slashes, unnecessary directory traversal 52 # (foo/../bar/..), etc. Easier to read this way. 53 file_path = os.path.normpath(file_path) 54 if file_path.startswith("/build/"): 55 i = file_path.find("/", len("/build/")) 56 if i != -1: 57 return f"/build/{{board}}/{file_path[i+1:]}" 58 return file_path 59 60 61@dataclasses.dataclass(frozen=True, eq=True, order=True) 62class ClangWarningLocation: 63 """Represents a location at which a Clang warning was emitted.""" 64 65 file: str 66 line: int 67 column: int 68 69 @classmethod 70 def parse( 71 cls, location: str, canonicalize_board_root: bool = False 72 ) -> "ClangWarningLocation": 73 split = location.rsplit(":", 2) 74 if len(split) == 3: 75 file = split[0] 76 if canonicalize_board_root: 77 file = canonicalize_file_path_board_root(file) 78 return cls(file=file, line=int(split[1]), column=int(split[2])) 79 raise ValueError(f"Invalid location: {location!r}") 80 81 82@dataclasses.dataclass(frozen=True, eq=True) 83class ClangWarning: 84 """Represents a Clang warning at a specific location (if applicable).""" 85 86 # The name of the warning, e.g., -Wunused-variable 87 name: str 88 # The message of the warning, e.g., "'allocate' is deprecated." 89 message: str 90 # The location of this warning. Not present for frontend diagnostics. 91 location: Optional[ClangWarningLocation] 92 93 # This parses two kinds of errors: 94 # 1. `clang-17: error: foo [-W...]` 95 # 2. `/file/path:123:45: error: foo [-W...]" 96 _WARNING_RE = re.compile( 97 # Capture the location on its own, since `clang-\d+` is unused below. 98 r"^(?:([^:]*:\d+:\d+)|clang-\d+)" 99 r": error: " 100 # Capture the message 101 r"(.*?)\s+" 102 r"\[(-W[^\][]+)]\s*$" 103 ) 104 105 @classmethod 106 def try_parse_line( 107 cls, line: str, canonicalize_board_root: bool = False 108 ) -> Optional["ClangWarning"]: 109 # Fast path: we can expect "error: " in interesting lines. Break early 110 # if that's not present. 111 if "error: " not in line: 112 return None 113 114 m = cls._WARNING_RE.fullmatch(line) 115 if not m: 116 return None 117 118 location, message, warning_flags = m.groups() 119 individual_warning_flags = [ 120 x for x in warning_flags.split(",") if x != "-Werror" 121 ] 122 123 # This isn't impossible to handle in theory, just unexpected. Complain 124 # about it. 125 if len(individual_warning_flags) != 1: 126 raise ValueError( 127 f"Weird: parsed warnings {individual_warning_flags} out " 128 f"of {line}" 129 ) 130 131 if location is None: 132 parsed_location = None 133 else: 134 parsed_location = ClangWarningLocation.parse( 135 location, canonicalize_board_root 136 ) 137 return cls( 138 name=individual_warning_flags[0], 139 message=message, 140 location=parsed_location, 141 ) 142 143 144@dataclasses.dataclass(frozen=True, eq=True) 145class WarningInfo: 146 """Carries information about a ClangWarning.""" 147 148 packages: DefaultDict[str, int] = dataclasses.field( 149 default_factory=lambda: collections.defaultdict(int) 150 ) 151 152 153class UnknownPackageNameError(ValueError): 154 """Raised when a package name can't be determined from a warning report.""" 155 156 157@dataclasses.dataclass 158class AggregatedWarnings: 159 """Aggregates warning reports incrementally.""" 160 161 num_reports: int = 0 162 # Mapping of warning -> list of packages that emitted it. Warnings in 163 # headers may be referred to by multiple packages. 164 warnings: DefaultDict[ClangWarning, WarningInfo] = dataclasses.field( 165 default_factory=lambda: collections.defaultdict(WarningInfo) 166 ) 167 168 _CWD_PACKAGE_RE = re.compile( 169 r"^(?:/build/[^/]+)?/var/(?:cache|tmp)/portage/([^/]+/[^/]+)/" 170 ) 171 172 @classmethod 173 def _guess_package_name(cls, report: Dict[str, Any]) -> str: 174 """Tries to guess what package `report` is from. 175 176 Raises: 177 UnknownPackageNameError if the package's name couldn't be 178 determined. 179 """ 180 m = cls._CWD_PACKAGE_RE.match(report.get("cwd", "")) 181 if not m: 182 raise UnknownPackageNameError() 183 return m.group(1) 184 185 def add_report_json( 186 self, report_json: Dict[str, Any], canonicalize_board_root: bool = False 187 ) -> int: 188 """Adds the given report, returning the number of warnings parsed. 189 190 Raises: 191 UnknownPackageNameError if the package's name couldn't be 192 determined. 193 """ 194 self.num_reports += 1 195 package_name = self._guess_package_name(report_json) 196 197 num_warnings = 0 198 for line in report_json.get("stdout", "").splitlines(): 199 if parsed := ClangWarning.try_parse_line( 200 line, canonicalize_board_root 201 ): 202 self.warnings[parsed].packages[package_name] += 1 203 num_warnings += 1 204 205 return num_warnings 206 207 def add_report( 208 self, report_file: Path, canonicalize_board_root: bool = False 209 ) -> None: 210 with report_file.open(encoding="utf-8") as f: 211 report = json.load(f) 212 213 try: 214 n = self.add_report_json(report, canonicalize_board_root) 215 except UnknownPackageNameError: 216 logging.warning( 217 "Failed guessing package name for report at %r; ignoring file", 218 report_file, 219 ) 220 return 221 222 if not n: 223 logging.warning( 224 "Report at %r had no parseable warnings", report_file 225 ) 226 227 228def print_aligned_counts( 229 name_count_map: Dict[str, int], file: Optional[IO[str]] = None 230) -> None: 231 assert name_count_map 232 # Sort on value, highest first. Name breaks ties. 233 summary = sorted(name_count_map.items(), key=lambda x: (-x[1], x[0])) 234 num_col_width = len(f"{summary[0][1]:,}") 235 name_col_width = max(len(x) for x in name_count_map) 236 for name, count in summary: 237 fmt_name = name.rjust(name_col_width) 238 fmt_count = f"{count:,}".rjust(num_col_width) 239 print(f"\t{fmt_name}: {fmt_count}", file=file) 240 241 242def summarize_per_package_warnings( 243 warning_infos: Iterable[WarningInfo], 244 file: Optional[IO[str]] = None, 245) -> None: 246 warnings_per_package: DefaultDict[str, int] = collections.defaultdict(int) 247 for info in warning_infos: 248 for package_name, warning_count in info.packages.items(): 249 warnings_per_package[package_name] += warning_count 250 251 if not warnings_per_package: 252 return 253 254 print("## Per-package warning counts:", file=file) 255 print_aligned_counts(warnings_per_package, file=file) 256 257 258def summarize_warnings_by_flag( 259 warnings: Dict[ClangWarning, WarningInfo], 260 file: Optional[IO[str]] = None, 261) -> None: 262 if not warnings: 263 return 264 265 warnings_per_flag: Counter[str] = collections.Counter() 266 for warning, info in warnings.items(): 267 warnings_per_flag[warning.name] += sum(info.packages.values()) 268 269 print("## Instances of each fatal warning:", file=file) 270 print_aligned_counts(warnings_per_flag, file=file) 271 272 273def aggregate_reports(opts: argparse.Namespace) -> None: 274 directory = opts.directory 275 aggregated = AggregatedWarnings() 276 for report in directory.glob("**/warnings_report*.json"): 277 logging.debug("Discovered report %s", report) 278 aggregated.add_report(report, opts.canonicalize_board_roots) 279 280 if not aggregated.num_reports: 281 raise ValueError(f"Found no warnings report under {directory}") 282 283 logging.info("Discovered %d report files in total", aggregated.num_reports) 284 summarize_per_package_warnings(aggregated.warnings.values()) 285 summarize_warnings_by_flag(aggregated.warnings) 286 287 288def fetch_werror_tarball_links( 289 child_builders: Dict[str, cros_cls.BuildID] 290) -> List[str]: 291 outputs = cros_cls.CQBoardBuilderOutput.fetch_many(child_builders.values()) 292 artifacts_links = [] 293 for builder_name, out in zip(child_builders, outputs): 294 if out.artifacts_link: 295 artifacts_links.append(out.artifacts_link) 296 else: 297 logging.info("%s had no output artifacts; ignoring", builder_name) 298 299 gsutil_stdout = subprocess.run( 300 ["gsutil", "-m", "ls"] + artifacts_links, 301 check=True, 302 encoding="utf-8", 303 stdin=subprocess.DEVNULL, 304 stdout=subprocess.PIPE, 305 ).stdout 306 307 return [ 308 x 309 for x in gsutil_stdout.splitlines() 310 if x.endswith(".fatal_clang_warnings.tar.xz") 311 ] 312 313 314def cq_builder_name_from_werror_logs_path(werror_logs: str) -> str: 315 """Returns the CQ builder given a -Werror logs path. 316 317 >>> cq_builder_name_from_werror_logs_path( 318 "gs://chromeos-image-archive/staryu-cq/" 319 "R123-15771.0.0-94466-8756713501925941617/" 320 "staryu.20240207.fatal_clang_warnings.tar.xz" 321 ) 322 "staryu-cq" 323 """ 324 return os.path.basename(os.path.dirname(os.path.dirname(werror_logs))) 325 326 327def download_and_unpack_werror_tarballs( 328 unpack_dir: Path, download_dir: Path, gs_urls: List[str] 329): 330 # This is necessary below when we're untarring files. It should trivially 331 # always be the case, and assuming it makes testing easier. 332 assert download_dir.is_absolute(), download_dir 333 334 unpack_dir.mkdir() 335 download_dir.mkdir() 336 337 logging.info( 338 "Fetching and unpacking %d -Werror reports; this may take a bit", 339 len(gs_urls), 340 ) 341 # Run the download in a threadpool since we can have >100 logs, and all of 342 # this is heavily I/O-bound. 343 # Max 8 downloads at a time is arbitrary, but should minimize the chance of 344 # rate-limiting. Don't limit `tar xaf`, since those should be short-lived. 345 download_limiter = threading.BoundedSemaphore(8) 346 347 def download_one_url( 348 unpack_dir: Path, download_dir: Path, gs_url: str 349 ) -> Optional[subprocess.CalledProcessError]: 350 """Downloads and unpacks -Werror logs from the given gs_url. 351 352 Leaves the tarball in `download_dir`, and the unpacked version in 353 `unpack_dir`. 354 355 Returns: 356 None if all went well; otherwise, returns the command that failed. 357 All commands have stderr data piped in. 358 """ 359 file_targ = download_dir / os.path.basename(gs_url) 360 try: 361 with download_limiter: 362 subprocess.run( 363 ["gsutil", "cp", gs_url, file_targ], 364 check=True, 365 stdin=subprocess.DEVNULL, 366 stdout=subprocess.DEVNULL, 367 stderr=subprocess.PIPE, 368 encoding="utf-8", 369 errors="replace", 370 ) 371 372 # N.B., file_targ is absolute, so running with `file_targ` while 373 # changing `cwd` is safe. 374 subprocess.run( 375 ["tar", "xaf", file_targ], 376 check=True, 377 cwd=unpack_dir, 378 stdin=subprocess.DEVNULL, 379 stdout=subprocess.DEVNULL, 380 stderr=subprocess.PIPE, 381 encoding="utf-8", 382 errors="replace", 383 ) 384 except subprocess.CalledProcessError as e: 385 return e 386 return None 387 388 with multiprocessing.pool.ThreadPool() as thread_pool: 389 download_futures = [] 390 for gs_url in gs_urls: 391 name = cq_builder_name_from_werror_logs_path(gs_url) 392 unpack_to = unpack_dir / name 393 unpack_to.mkdir() 394 download_to = download_dir / name 395 download_to.mkdir() 396 download_futures.append( 397 ( 398 name, 399 thread_pool.apply_async( 400 download_one_url, (unpack_to, download_to, gs_url) 401 ), 402 ) 403 ) 404 405 num_failures = 0 406 for name, future in download_futures: 407 result = future.get() 408 if not result: 409 continue 410 411 num_failures += 1 412 logging.error( 413 "Downloading %s failed: running %r. Stderr: %r", 414 name, 415 result.cmd, 416 result.stderr, 417 ) 418 if num_failures: 419 raise ValueError(f"{num_failures} download(s) failed.") 420 421 422def fetch_cq_reports(opts: argparse.Namespace) -> None: 423 if opts.cl: 424 logging.info( 425 "Fetching most recent completed CQ orchestrator from %s", opts.cl 426 ) 427 all_ids = cros_cls.fetch_cq_orchestrator_ids(opts.cl) 428 if not all_ids: 429 raise ValueError( 430 f"No CQ orchestrators found under {opts.cl}. See --help for " 431 "how to pass a build ID directly." 432 ) 433 # Note that these cq-orchestrator runs are returned in oldest-to-newest 434 # order. The user probably wants the newest run. 435 cq_orchestrator_id = all_ids[-1] 436 cq_orchestrator_url = cros_cls.builder_url(cq_orchestrator_id) 437 logging.info("Checking CQ run %s", cq_orchestrator_url) 438 else: 439 cq_orchestrator_id = opts.cq_orchestrator_id 440 cq_orchestrator_url = cros_cls.builder_url(cq_orchestrator_id) 441 442 # This is the earliest point at which we can compute this directory with 443 # certainty. Figure it out now and fail early if it exists. 444 output_directory = opts.directory 445 if not output_directory: 446 output_directory = _DEFAULT_FETCH_DIRECTORY / str(cq_orchestrator_id) 447 448 if output_directory.exists(): 449 if not opts.force: 450 sys.exit( 451 f"Directory at {output_directory} exists; not overwriting. " 452 "Pass --force to overwrite." 453 ) 454 # Actually _remove_ it when we have all logs unpacked and are able to 455 # create the output directory with confidence. 456 457 logging.info("Fetching info on child builders of %s", cq_orchestrator_url) 458 child_builders = cros_cls.CQOrchestratorOutput.fetch( 459 cq_orchestrator_id 460 ).child_builders 461 if not child_builders: 462 raise ValueError(f"No child builders found for {cq_orchestrator_url}") 463 464 logging.info( 465 "%d child builders found; finding associated tarball links", 466 len(child_builders), 467 ) 468 werror_links = fetch_werror_tarball_links(child_builders) 469 if not werror_links: 470 raise ValueError( 471 f"No -Werror logs found in children of {cq_orchestrator_url}" 472 ) 473 474 logging.info("%d -Werror logs found", len(werror_links)) 475 with tempfile.TemporaryDirectory("werror_logs_fetch_cq") as t: 476 tempdir = Path(t) 477 unpack_dir = tempdir / "unpacked" 478 download_and_unpack_werror_tarballs( 479 unpack_dir=unpack_dir, 480 download_dir=tempdir / "tarballs", 481 gs_urls=werror_links, 482 ) 483 484 if output_directory.exists(): 485 logging.info("Removing output directory at %s", output_directory) 486 shutil.rmtree(output_directory) 487 output_directory.parent.mkdir(parents=True, exist_ok=True) 488 # (Convert these to strs to keep mypy happy.) 489 shutil.move(str(unpack_dir), str(output_directory)) 490 logging.info( 491 "CQ logs from %s stored in %s", 492 cq_orchestrator_url, 493 output_directory, 494 ) 495 496 497def main(argv: List[str]) -> None: 498 parser = argparse.ArgumentParser( 499 description=__doc__, 500 formatter_class=argparse.RawDescriptionHelpFormatter, 501 ) 502 parser.add_argument( 503 "--debug", action="store_true", help="Enable debug logging" 504 ) 505 subparsers = parser.add_subparsers(required=True) 506 # b/318833638: While there's only one subparser here for the moment, more 507 # are expected to come (specifically, one to download logs from a CQ run). 508 aggregate = subparsers.add_parser( 509 "aggregate", 510 help=""" 511 Aggregate all -Werror reports beneath a directory. Note that this will 512 traverse all children of the directory, so can be used either on 513 unpacked -Werror reports from CQ builders, or can be used on e.g., 514 /build/cherry/var/lib/chromeos. 515 """, 516 ) 517 aggregate.set_defaults(func=aggregate_reports) 518 aggregate.add_argument( 519 "--canonicalize-board-roots", 520 action="store_true", 521 help=""" 522 Converts warnings paths starting with a board root (e.g., /build/atlas) 523 to a form consistent across many boards. 524 """, 525 ) 526 aggregate.add_argument( 527 "--directory", type=Path, required=True, help="Directory to inspect." 528 ) 529 530 fetch_cq = subparsers.add_parser( 531 "fetch-cq", 532 help="Fetch all -Werror reports for a CQ run.", 533 ) 534 fetch_cq.set_defaults(func=fetch_cq_reports) 535 cl_or_cq_orchestrator = fetch_cq.add_mutually_exclusive_group(required=True) 536 cl_or_cq_orchestrator.add_argument( 537 "--cl", 538 type=cros_cls.ChangeListURL.parse_with_patch_set, 539 help="Link to a CL to get the most recent cq-orchestrator from", 540 ) 541 cl_or_cq_orchestrator.add_argument( 542 "--cq-orchestrator-id", 543 type=cros_cls.BuildID, 544 help=""" 545 Build number for a cq-orchestrator run. Builders invoked by this are 546 examined for -Werror logs. 547 """, 548 ) 549 fetch_cq.add_argument( 550 "--directory", 551 type=Path, 552 help=f""" 553 Directory to put downloaded -Werror logs in. Default is a subdirectory 554 of {_DEFAULT_FETCH_DIRECTORY}. 555 """, 556 ) 557 fetch_cq.add_argument( 558 "-f", 559 "--force", 560 action="store_true", 561 help="Remove the directory at `--directory` if it exists", 562 ) 563 564 opts = parser.parse_args(argv) 565 566 logging.basicConfig( 567 format=">> %(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: " 568 "%(message)s", 569 level=logging.DEBUG if opts.debug else logging.INFO, 570 ) 571 572 assert getattr(opts, "func", None), "Unknown subcommand?" 573 opts.func(opts) 574 575 576if __name__ == "__main__": 577 main(sys.argv[1:]) 578