xref: /aosp_15_r20/external/toolchain-utils/llvm_tools/nightly_revert_checker.py (revision 760c253c1ed00ce9abd48f8546f08516e57485fe)
1#!/usr/bin/env python3
2# Copyright 2020 The ChromiumOS Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Checks for new reverts in LLVM on a nightly basis.
7
8If any reverts are found that were previously unknown, this cherry-picks them or
9fires off an email. All LLVM SHAs to monitor are autodetected.
10"""
11
12import argparse
13import dataclasses
14import json
15import logging
16import os
17from pathlib import Path
18import pprint
19import subprocess
20import sys
21import time
22from typing import Any, Callable, Dict, List, NamedTuple, Set, Tuple
23
24from cros_utils import email_sender
25from cros_utils import tiny_render
26import get_llvm_hash
27import get_upstream_patch
28import git_llvm_rev
29import revert_checker
30
31
32ONE_DAY_SECS = 24 * 60 * 60
33# How often to send an email about a HEAD not moving.
34HEAD_STALENESS_ALERT_INTERVAL_SECS = 21 * ONE_DAY_SECS
35# How long to wait after a HEAD changes for the first 'update' email to be sent.
36HEAD_STALENESS_ALERT_INITIAL_SECS = 60 * ONE_DAY_SECS
37
38
39# Not frozen, as `next_notification_timestamp` may be mutated.
40@dataclasses.dataclass(frozen=False, eq=True)
41class HeadInfo:
42    """Information about about a HEAD that's tracked by this script."""
43
44    # The most recent SHA observed for this HEAD.
45    last_sha: str
46    # The time at which the current value for this HEAD was first seen.
47    first_seen_timestamp: int
48    # The next timestamp to notify users if this HEAD doesn't move.
49    next_notification_timestamp: int
50
51    @classmethod
52    def from_json(cls, json_object: Any) -> "HeadInfo":
53        return cls(**json_object)
54
55    def to_json(self) -> Any:
56        return dataclasses.asdict(self)
57
58
59@dataclasses.dataclass(frozen=True, eq=True)
60class State:
61    """Persistent state for this script."""
62
63    # Mapping of LLVM SHA -> List of reverts that have been seen for it
64    seen_reverts: Dict[str, List[str]] = dataclasses.field(default_factory=dict)
65    # Mapping of friendly HEAD name (e.g., main-legacy) to last-known info
66    # about it.
67    heads: Dict[str, HeadInfo] = dataclasses.field(default_factory=dict)
68
69    @classmethod
70    def from_json(cls, json_object: Any) -> "State":
71        # Autoupgrade old JSON files.
72        if "heads" not in json_object:
73            json_object = {
74                "seen_reverts": json_object,
75                "heads": {},
76            }
77
78        return cls(
79            seen_reverts=json_object["seen_reverts"],
80            heads={
81                k: HeadInfo.from_json(v)
82                for k, v in json_object["heads"].items()
83            },
84        )
85
86    def to_json(self) -> Any:
87        return {
88            "seen_reverts": self.seen_reverts,
89            "heads": {k: v.to_json() for k, v in self.heads.items()},
90        }
91
92
93def _find_interesting_android_shas(
94    android_llvm_toolchain_dir: str,
95) -> List[Tuple[str, str]]:
96    llvm_project = os.path.join(
97        android_llvm_toolchain_dir, "toolchain/llvm-project"
98    )
99
100    def get_llvm_merge_base(branch: str) -> str:
101        head_sha = subprocess.check_output(
102            ["git", "rev-parse", branch],
103            cwd=llvm_project,
104            encoding="utf-8",
105        ).strip()
106        merge_base = subprocess.check_output(
107            ["git", "merge-base", branch, "aosp/upstream-main"],
108            cwd=llvm_project,
109            encoding="utf-8",
110        ).strip()
111        logging.info(
112            "Merge-base for %s (HEAD == %s) and upstream-main is %s",
113            branch,
114            head_sha,
115            merge_base,
116        )
117        return merge_base
118
119    main_legacy = get_llvm_merge_base("aosp/master-legacy")  # nocheck
120    testing_upstream = get_llvm_merge_base("aosp/testing-upstream")
121    result: List[Tuple[str, str]] = [("main-legacy", main_legacy)]
122
123    # If these are the same SHA, there's no point in tracking both.
124    if main_legacy != testing_upstream:
125        result.append(("testing-upstream", testing_upstream))
126    else:
127        logging.info(
128            "main-legacy and testing-upstream are identical; ignoring "
129            "the latter."
130        )
131    return result
132
133
134def _find_interesting_chromeos_shas(
135    chromeos_base: str,
136) -> List[Tuple[str, str]]:
137    chromeos_path = Path(chromeos_base)
138    llvm_hash = get_llvm_hash.LLVMHash()
139
140    current_llvm = llvm_hash.GetCrOSCurrentLLVMHash(chromeos_path)
141    results = [("llvm", current_llvm)]
142    next_llvm = llvm_hash.GetCrOSLLVMNextHash()
143    if current_llvm != next_llvm:
144        results.append(("llvm-next", next_llvm))
145    return results
146
147
148_Email = NamedTuple(
149    "_Email",
150    [
151        ("subject", str),
152        ("body", tiny_render.Piece),
153    ],
154)
155
156
157def _generate_revert_email(
158    repository_name: str,
159    friendly_name: str,
160    sha: str,
161    prettify_sha: Callable[[str], tiny_render.Piece],
162    get_sha_description: Callable[[str], tiny_render.Piece],
163    new_reverts: List[revert_checker.Revert],
164) -> _Email:
165    email_pieces = [
166        "It looks like there may be %s across %s ("
167        % (
168            "a new revert" if len(new_reverts) == 1 else "new reverts",
169            friendly_name,
170        ),
171        prettify_sha(sha),
172        ").",
173        tiny_render.line_break,
174        tiny_render.line_break,
175        "That is:" if len(new_reverts) == 1 else "These are:",
176    ]
177
178    revert_listing = []
179    for revert in sorted(new_reverts, key=lambda r: r.sha):
180        revert_listing.append(
181            [
182                prettify_sha(revert.sha),
183                " (appears to revert ",
184                prettify_sha(revert.reverted_sha),
185                "): ",
186                get_sha_description(revert.sha),
187            ]
188        )
189
190    email_pieces.append(tiny_render.UnorderedList(items=revert_listing))
191    email_pieces += [
192        tiny_render.line_break,
193        "PTAL and consider reverting them locally.",
194    ]
195    return _Email(
196        subject="[revert-checker/%s] new %s discovered across %s"
197        % (
198            repository_name,
199            "revert" if len(new_reverts) == 1 else "reverts",
200            friendly_name,
201        ),
202        body=email_pieces,
203    )
204
205
206_EmailRecipients = NamedTuple(
207    "_EmailRecipients",
208    [
209        ("well_known", List[str]),
210        ("direct", List[str]),
211    ],
212)
213
214
215def _send_revert_email(recipients: _EmailRecipients, email: _Email) -> None:
216    email_sender.EmailSender().SendX20Email(
217        subject=email.subject,
218        identifier="revert-checker",
219        well_known_recipients=recipients.well_known,
220        direct_recipients=["[email protected]"] + recipients.direct,
221        text_body=tiny_render.render_text_pieces(email.body),
222        html_body=tiny_render.render_html_pieces(email.body),
223    )
224
225
226def _write_state(state_file: str, new_state: State) -> None:
227    tmp_file = state_file + ".new"
228    try:
229        with open(tmp_file, "w", encoding="utf-8") as f:
230            json.dump(
231                new_state.to_json(),
232                f,
233                sort_keys=True,
234                indent=2,
235                separators=(",", ": "),
236            )
237        os.rename(tmp_file, state_file)
238    except:
239        try:
240            os.remove(tmp_file)
241        except FileNotFoundError:
242            pass
243        raise
244
245
246def _read_state(state_file: str) -> State:
247    try:
248        with open(state_file, encoding="utf-8") as f:
249            return State.from_json(json.load(f))
250    except FileNotFoundError:
251        logging.info(
252            "No state file found at %r; starting with an empty slate",
253            state_file,
254        )
255        return State()
256
257
258@dataclasses.dataclass(frozen=True)
259class NewRevertInfo:
260    """A list of new reverts for a given SHA."""
261
262    friendly_name: str
263    sha: str
264    new_reverts: List[revert_checker.Revert]
265
266
267def locate_new_reverts_across_shas(
268    llvm_dir: str,
269    interesting_shas: List[Tuple[str, str]],
270    state: State,
271) -> Tuple[State, List[NewRevertInfo]]:
272    """Locates and returns yet-unseen reverts across `interesting_shas`."""
273    new_state = State()
274    revert_infos = []
275    for friendly_name, sha in interesting_shas:
276        logging.info("Finding reverts across %s (%s)", friendly_name, sha)
277        all_reverts = revert_checker.find_reverts(
278            llvm_dir, sha, root="origin/" + git_llvm_rev.MAIN_BRANCH
279        )
280        logging.info(
281            "Detected the following revert(s) across %s:\n%s",
282            friendly_name,
283            pprint.pformat(all_reverts),
284        )
285
286        new_state.seen_reverts[sha] = [r.sha for r in all_reverts]
287
288        if sha not in state.seen_reverts:
289            logging.info("SHA %s is new to me", sha)
290            existing_reverts = set()
291        else:
292            existing_reverts = set(state.seen_reverts[sha])
293
294        new_reverts = [r for r in all_reverts if r.sha not in existing_reverts]
295        if not new_reverts:
296            logging.info("...All of which have been reported.")
297            continue
298
299        new_head_info = None
300        if old_head_info := state.heads.get(friendly_name):
301            if old_head_info.last_sha == sha:
302                new_head_info = old_head_info
303
304        if new_head_info is None:
305            now = int(time.time())
306            notify_at = HEAD_STALENESS_ALERT_INITIAL_SECS + now
307            new_head_info = HeadInfo(
308                last_sha=sha,
309                first_seen_timestamp=now,
310                next_notification_timestamp=notify_at,
311            )
312        new_state.heads[friendly_name] = new_head_info
313
314        revert_infos.append(
315            NewRevertInfo(
316                friendly_name=friendly_name,
317                sha=sha,
318                new_reverts=new_reverts,
319            )
320        )
321    return new_state, revert_infos
322
323
324def do_cherrypick(
325    chroot_path: str,
326    llvm_dir: str,
327    repository: str,
328    interesting_shas: List[Tuple[str, str]],
329    state: State,
330    reviewers: List[str],
331    cc: List[str],
332) -> State:
333    def prettify_sha(sha: str) -> tiny_render.Piece:
334        rev = get_llvm_hash.GetVersionFrom(llvm_dir, sha)
335        return prettify_sha_for_email(sha, rev)
336
337    new_state = State()
338    seen: Set[str] = set()
339
340    new_state, new_reverts = locate_new_reverts_across_shas(
341        llvm_dir, interesting_shas, state
342    )
343
344    for revert_info in new_reverts:
345        if revert_info.friendly_name in seen:
346            continue
347        seen.add(revert_info.friendly_name)
348        for sha, reverted_sha in revert_info.new_reverts:
349            try:
350                # We upload reverts for all platforms by default, since there's
351                # no real reason for them to be CrOS-specific.
352                get_upstream_patch.get_from_upstream(
353                    chroot_path=chroot_path,
354                    create_cl=True,
355                    start_sha=reverted_sha,
356                    patches=[sha],
357                    reviewers=reviewers,
358                    cc=cc,
359                    platforms=(),
360                )
361            except get_upstream_patch.CherrypickError as e:
362                logging.info("%s, skipping...", str(e))
363
364    maybe_email_about_stale_heads(
365        new_state,
366        repository,
367        recipients=_EmailRecipients(
368            well_known=[],
369            direct=reviewers + cc,
370        ),
371        prettify_sha=prettify_sha,
372        is_dry_run=False,
373    )
374    return new_state
375
376
377def prettify_sha_for_email(
378    sha: str,
379    rev: int,
380) -> tiny_render.Piece:
381    """Returns a piece of an email representing the given sha and its rev."""
382    # 12 is arbitrary, but should be unambiguous enough.
383    short_sha = sha[:12]
384    return tiny_render.Switch(
385        text=f"r{rev} ({short_sha})",
386        html=tiny_render.Link(
387            href=f"https://github.com/llvm/llvm-project/commit/{sha}",
388            inner=f"r{rev}",
389        ),
390    )
391
392
393def maybe_email_about_stale_heads(
394    new_state: State,
395    repository_name: str,
396    recipients: _EmailRecipients,
397    prettify_sha: Callable[[str], tiny_render.Piece],
398    is_dry_run: bool,
399) -> bool:
400    """Potentially send an email about stale HEADs in `new_state`.
401
402    These emails are sent to notify users of the current HEADs detected by this
403    script. They:
404    - aren't meant to hurry LLVM rolls along,
405    - are worded to avoid the implication that an LLVM roll is taking an
406      excessive amount of time, and
407    - are initially sent at the 2 month point of seeing the same HEAD.
408
409    We've had multiple instances in the past of upstream changes (e.g., moving
410    to other git branches or repos) leading to this revert checker silently
411    checking a very old HEAD for months. The intent is to send emails when the
412    correctness of the HEADs we're working with _might_ be wrong.
413    """
414    logging.info("Checking HEAD freshness...")
415    now = int(time.time())
416    stale = sorted(
417        (name, info)
418        for name, info in new_state.heads.items()
419        if info.next_notification_timestamp <= now
420    )
421    if not stale:
422        logging.info("All HEADs are fresh-enough; no need to send an email.")
423        return False
424
425    stale_listings = []
426
427    for name, info in stale:
428        days = (now - info.first_seen_timestamp) // ONE_DAY_SECS
429        pretty_rev = prettify_sha(info.last_sha)
430        stale_listings.append(
431            f"{name} at {pretty_rev}, which was last updated ~{days} days ago."
432        )
433
434    shas_are = "SHAs are" if len(stale_listings) > 1 else "SHA is"
435    email_body = [
436        "Hi! This is a friendly notification that the current upstream LLVM "
437        f"{shas_are} being tracked by the LLVM revert checker:",
438        tiny_render.UnorderedList(stale_listings),
439        tiny_render.line_break,
440        "If that's still correct, great! If it looks wrong, the revert "
441        "checker's SHA autodetection may need an update. Please file a bug "
442        "at go/crostc-bug if an update is needed. Thanks!",
443    ]
444
445    email = _Email(
446        subject=f"[revert-checker/{repository_name}] Tracked branch update",
447        body=email_body,
448    )
449    if is_dry_run:
450        logging.info("Dry-run specified; would otherwise send email %s", email)
451    else:
452        _send_revert_email(recipients, email)
453
454    next_notification = now + HEAD_STALENESS_ALERT_INTERVAL_SECS
455    for _, info in stale:
456        info.next_notification_timestamp = next_notification
457    return True
458
459
460def do_email(
461    is_dry_run: bool,
462    llvm_dir: str,
463    repository: str,
464    interesting_shas: List[Tuple[str, str]],
465    state: State,
466    recipients: _EmailRecipients,
467) -> State:
468    def prettify_sha(sha: str) -> tiny_render.Piece:
469        rev = get_llvm_hash.GetVersionFrom(llvm_dir, sha)
470        return prettify_sha_for_email(sha, rev)
471
472    def get_sha_description(sha: str) -> tiny_render.Piece:
473        return subprocess.check_output(
474            ["git", "log", "-n1", "--format=%s", sha],
475            cwd=llvm_dir,
476            encoding="utf-8",
477        ).strip()
478
479    new_state, new_reverts = locate_new_reverts_across_shas(
480        llvm_dir, interesting_shas, state
481    )
482
483    for revert_info in new_reverts:
484        email = _generate_revert_email(
485            repository,
486            revert_info.friendly_name,
487            revert_info.sha,
488            prettify_sha,
489            get_sha_description,
490            revert_info.new_reverts,
491        )
492        if is_dry_run:
493            logging.info(
494                "Would send email:\nSubject: %s\nBody:\n%s\n",
495                email.subject,
496                tiny_render.render_text_pieces(email.body),
497            )
498        else:
499            logging.info("Sending email with subject %r...", email.subject)
500            _send_revert_email(recipients, email)
501            logging.info("Email sent.")
502
503    maybe_email_about_stale_heads(
504        new_state, repository, recipients, prettify_sha, is_dry_run
505    )
506    return new_state
507
508
509def parse_args(argv: List[str]) -> argparse.Namespace:
510    parser = argparse.ArgumentParser(
511        description=__doc__,
512        formatter_class=argparse.RawDescriptionHelpFormatter,
513    )
514    parser.add_argument(
515        "action",
516        choices=["cherry-pick", "email", "dry-run"],
517        help="Automatically cherry-pick upstream reverts, send an email, or "
518        "write to stdout.",
519    )
520    parser.add_argument(
521        "--state_file", required=True, help="File to store persistent state in."
522    )
523    parser.add_argument(
524        "--llvm_dir", required=True, help="Up-to-date LLVM directory to use."
525    )
526    parser.add_argument("--debug", action="store_true")
527    parser.add_argument(
528        "--reviewers",
529        type=str,
530        nargs="*",
531        help="""
532        Requests reviews from REVIEWERS. All REVIEWERS must have existing
533        accounts.
534        """,
535    )
536    parser.add_argument(
537        "--cc",
538        type=str,
539        nargs="*",
540        help="""
541        CCs the CL or email to the recipients. If in cherry-pick mode, all
542        recipients must have Gerrit accounts.
543        """,
544    )
545
546    subparsers = parser.add_subparsers(dest="repository")
547    subparsers.required = True
548
549    chromeos_subparser = subparsers.add_parser("chromeos")
550    chromeos_subparser.add_argument(
551        "--chromeos_dir",
552        required=True,
553        help="Up-to-date CrOS directory to use.",
554    )
555
556    android_subparser = subparsers.add_parser("android")
557    android_subparser.add_argument(
558        "--android_llvm_toolchain_dir",
559        required=True,
560        help="Up-to-date android-llvm-toolchain directory to use.",
561    )
562
563    return parser.parse_args(argv)
564
565
566def find_chroot(
567    opts: argparse.Namespace, cc: List[str]
568) -> Tuple[str, List[Tuple[str, str]], _EmailRecipients]:
569    if opts.repository == "chromeos":
570        chroot_path = opts.chromeos_dir
571        return (
572            chroot_path,
573            _find_interesting_chromeos_shas(chroot_path),
574            _EmailRecipients(well_known=["mage"], direct=cc),
575        )
576    elif opts.repository == "android":
577        if opts.action == "cherry-pick":
578            raise RuntimeError(
579                "android doesn't currently support automatic cherry-picking."
580            )
581
582        chroot_path = opts.android_llvm_toolchain_dir
583        return (
584            chroot_path,
585            _find_interesting_android_shas(chroot_path),
586            _EmailRecipients(
587                well_known=[],
588                direct=["[email protected]"] + cc,
589            ),
590        )
591    else:
592        raise ValueError(f"Unknown repository {opts.repository}")
593
594
595def main(argv: List[str]) -> int:
596    opts = parse_args(argv)
597
598    logging.basicConfig(
599        format="%(asctime)s: %(levelname)s: "
600        "%(filename)s:%(lineno)d: %(message)s",
601        level=logging.DEBUG if opts.debug else logging.INFO,
602    )
603
604    action = opts.action
605    llvm_dir = opts.llvm_dir
606    repository = opts.repository
607    state_file = opts.state_file
608    reviewers = opts.reviewers if opts.reviewers else []
609    cc = opts.cc if opts.cc else []
610
611    chroot_path, interesting_shas, recipients = find_chroot(opts, cc)
612    logging.info("Interesting SHAs were %r", interesting_shas)
613
614    state = _read_state(state_file)
615    logging.info("Loaded state\n%s", pprint.pformat(state))
616
617    # We want to be as free of obvious side-effects as possible in case
618    # something above breaks. Hence, action as late as possible.
619    if action == "cherry-pick":
620        new_state = do_cherrypick(
621            chroot_path=chroot_path,
622            llvm_dir=llvm_dir,
623            repository=repository,
624            interesting_shas=interesting_shas,
625            state=state,
626            reviewers=reviewers,
627            cc=cc,
628        )
629    else:
630        new_state = do_email(
631            is_dry_run=action == "dry-run",
632            llvm_dir=llvm_dir,
633            interesting_shas=interesting_shas,
634            repository=repository,
635            state=state,
636            recipients=recipients,
637        )
638
639    _write_state(state_file, new_state)
640    return 0
641
642
643if __name__ == "__main__":
644    sys.exit(main(sys.argv[1:]))
645