xref: /aosp_15_r20/external/crosvm/tools/impl/presubmit.py (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1#!/usr/bin/env python3
2# Copyright 2022 The ChromiumOS Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6import os
7import subprocess
8import sys
9import traceback
10from concurrent.futures import ThreadPoolExecutor
11from dataclasses import dataclass
12from datetime import datetime, timedelta
13from fnmatch import fnmatch
14from pathlib import Path
15from time import sleep
16from typing import Callable, List, Sequence, NamedTuple, Optional, Union
17
18from impl.common import (
19    Command,
20    ParallelCommands,
21    all_tracked_files,
22    cmd,
23    console,
24    rich,
25    strip_ansi_escape_sequences,
26    verbose,
27)
28
29git = cmd("git")
30
31
32@dataclass
33class CheckContext(object):
34    "Information passed to each check when it's called."
35
36    # Whether or not --fix was set and checks should attempt to fix problems they encounter.
37    fix: bool
38
39    # All files that this check should cover (e.g. all python files on a python check).
40    all_files: List[Path]
41
42    # Those files of all_files that were modified locally.
43    modified_files: List[Path]
44
45    # Files that do not exist upstream and have been added locally.
46    new_files: List[Path]
47
48
49class Check(NamedTuple):
50    "Metadata for each check, definining on which files it should run."
51
52    # Function to call for this check
53    check_function: Callable[[CheckContext], Union[Command, None, List[Command]]]
54
55    custom_name: Optional[str] = None
56
57    # List of globs that this check should be triggered on
58    files: List[str] = []
59
60    python_tools: bool = False
61
62    # List of globs to exclude from this check
63    exclude: List[str] = []
64
65    # Whether or not this check can fix issues.
66    can_fix: bool = False
67
68    # Which groups this check belongs to.
69    groups: List[str] = []
70
71    # Priority tasks usually take lonkger and are started first, and will show preliminary output.
72    priority: bool = False
73
74    @property
75    def name(self):
76        if self.custom_name:
77            return self.custom_name
78        name = self.check_function.__name__
79        if name.startswith("check_"):
80            return name[len("check_") :]
81        return name
82
83    @property
84    def doc(self):
85        if self.check_function.__doc__:
86            return self.check_function.__doc__.strip()
87        else:
88            return None
89
90
91class Group(NamedTuple):
92    "Metadata for a group of checks"
93
94    name: str
95
96    doc: str
97
98    checks: List[str]
99
100
101def list_file_diff():
102    """
103    Lists files there were modified compared to the upstream branch.
104
105    Falls back to all files tracked by git if there is no upstream branch.
106    """
107    upstream = git("rev-parse @{u}").stdout(check=False)
108    if upstream:
109        for line in git("diff --name-status", upstream).lines():
110            parts = line.split("\t", 1)
111            file = Path(parts[1].strip())
112            if file.is_file():
113                yield (parts[0].strip(), file)
114    else:
115        print("WARNING: Not tracking a branch. Checking all files.")
116        for file in all_tracked_files():
117            yield ("M", file)
118
119
120def should_run_check_on_file(check: Check, file: Path):
121    "Returns true if `file` should be run on `check`."
122
123    # Skip third_party except vmm_vhost.
124    if str(file).startswith("third_party") and not str(file).startswith("third_party/vmm_vhost"):
125        return False
126
127    # Skip excluded files
128    for glob in check.exclude:
129        if fnmatch(str(file), glob):
130            return False
131
132    # Match python tools (no file-extension, but with a python shebang line)
133    if check.python_tools:
134        if fnmatch(str(file), "tools/*") and file.suffix == "" and file.is_file():
135            if file.open(errors="ignore").read(32).startswith("#!/usr/bin/env python3"):
136                return True
137
138    # If no constraint is specified, match all files.
139    if not check.files and not check.python_tools:
140        return True
141
142    # Otherwise, match only those specified by `files`.
143    for glob in check.files:
144        if fnmatch(str(file), glob):
145            return True
146
147    return False
148
149
150class Task(object):
151    """
152    Represents a task that needs to be executed to perform a `Check`.
153
154    The task can be executed via `Task.execute`, which will update the state variables with
155    status and progress information.
156
157    This information can then be rendered from a separate thread via `Task.status_widget()`
158    """
159
160    def __init__(self, title: str, commands: Sequence[Command], priority: bool):
161        "Display title."
162        self.title = title
163        "Commands to execute."
164        self.commands = commands
165        "Task is a priority check."
166        self.priority = priority
167        "List of log lines (stdout+stderr) produced by the task."
168        self.log_lines: List[str] = []
169        "Task was compleded, but may or not have been successful."
170        self.done = False
171        "True if the task completed successfully."
172        self.success = False
173        "Time the task was started."
174        self.start_time = datetime.min
175        "Duration the task took to execute. Only filled after completion."
176        self.duration = timedelta.max
177        "Spinner object for status_widget UI."
178        self.spinner = rich.spinner.Spinner("point", title)
179
180    def status_widget(self):
181        "Returns a rich console object showing the currrent status of the task."
182        duration = self.duration if self.done else datetime.now() - self.start_time
183        title = f"[{duration.total_seconds():6.2f}s] [bold]{self.title}[/bold]"
184
185        if self.done:
186            status: str = "[green]OK [/green]" if self.success else "[red]ERR[/red]"
187            title_widget = rich.text.Text.from_markup(f"{status} {title}")
188        else:
189            self.spinner.text = rich.text.Text.from_markup(title)
190            title_widget = self.spinner
191
192        if not self.priority:
193            return title_widget
194
195        last_lines = [
196            self.log_lines[-3] if len(self.log_lines) >= 3 else "",
197            self.log_lines[-2] if len(self.log_lines) >= 2 else "",
198            self.log_lines[-1] if len(self.log_lines) >= 1 else "",
199        ]
200
201        return rich.console.Group(
202            *(
203                # Print last log lines without it's original colors
204                rich.text.Text(
205                    "│ " + strip_ansi_escape_sequences(log_line),
206                    style="light_slate_grey",
207                    overflow="ellipsis",
208                    no_wrap=True,
209                )
210                for log_line in last_lines
211            ),
212            rich.text.Text("└ ", end="", style="light_slate_grey"),
213            title_widget,
214            rich.text.Text(),
215        )
216
217    def execute(self):
218        "Execute the task while updating the status variables."
219        try:
220            self.start_time = datetime.now()
221            success = True
222            if verbose():
223                for command in self.commands:
224                    self.log_lines.append(f"$ {command}")
225
226            # Spawn all commands as separate processes
227            processes = [
228                command.popen(stdout=subprocess.PIPE, stderr=subprocess.STDOUT, errors="replace")
229                for command in self.commands
230            ]
231
232            # The stdout is collected before we wait for the processes to exit so that the UI is
233            # at least real-time for the first process. Note that in this way, the output for
234            # other processes other than the first process are not real-time. In addition, we
235            # can't proactively kill other processes in the same task if any process fails.
236            for process in processes:
237                assert process.stdout
238                for line in iter(process.stdout.readline, ""):
239                    self.log_lines.append(line.strip())
240
241            # Wait for all processes to finish and check return code
242            for process in processes:
243                if process.wait() != 0:
244                    success = False
245
246            self.duration = datetime.now() - self.start_time
247            self.success = success
248            self.done = True
249        except Exception:
250            self.log_lines.append(traceback.format_exc())
251
252
253def print_logs(tasks: List[Task]):
254    "Prints logs of all failed or unfinished tasks."
255    for task in tasks:
256        if not task.done:
257            print()
258            console.rule(f"{task.title} did not finish", style="yellow")
259            for line in task.log_lines:
260                print(line)
261            if not task.log_lines:
262                print(f"{task.title} did not output any logs")
263    for task in tasks:
264        if task.done and not task.success:
265            console.rule(f"{task.title} failed", style="red")
266            for line in task.log_lines:
267                print(line)
268            if not task.log_lines:
269                print(f"{task.title} did not output any logs")
270
271
272def print_summary(tasks: List[Task]):
273    "Prints a summary of all task results."
274    console.rule("Summary")
275    tasks.sort(key=lambda t: t.duration)
276    for task in tasks:
277        title = f"[{task.duration.total_seconds():6.2f}s] [bold]{task.title}[/bold]"
278        status: str = "[green]OK [/green]" if task.success else "[red]ERR[/red]"
279        console.print(f"{status} {title}")
280
281
282def execute_tasks_parallel(tasks: List[Task]):
283    "Executes the list of tasks in parallel, while rendering live status updates."
284    with ThreadPoolExecutor() as executor:
285        try:
286            # Since tasks are executed in subprocesses, we can use a thread pool to parallelize
287            # despite the GIL.
288            task_futures = [executor.submit(lambda: t.execute()) for t in tasks]
289
290            # Render task updates while they are executing in the background.
291            with rich.live.Live(refresh_per_second=30) as live:
292                while True:
293                    live.update(
294                        rich.console.Group(
295                            *(t.status_widget() for t in tasks),
296                            rich.text.Text(),
297                            rich.text.Text.from_markup(
298                                "[green]Tip:[/green] Press CTRL-C to abort execution and see all logs."
299                            ),
300                        )
301                    )
302                    if all(future.done() for future in task_futures):
303                        break
304                    sleep(0.1)
305        except KeyboardInterrupt:
306            print_logs(tasks)
307            # Force exit to skip waiting for the executor to shutdown. This will kill all
308            # running subprocesses.
309            os._exit(1)  # type: ignore
310
311    # Render error logs and summary after execution
312    print_logs(tasks)
313    print_summary(tasks)
314
315    if any(not t.success for t in tasks):
316        raise Exception("Some checks failed")
317
318
319def execute_tasks_serial(tasks: List[Task]):
320    "Executes the list of tasks one-by-one"
321    for task in tasks:
322        console.rule(task.title)
323        for command in task.commands:
324            command.fg()
325        console.print()
326
327
328def generate_plan(
329    checks_list: List[Check],
330    fix: bool,
331    run_on_all_files: bool,
332):
333    "Generates a list of `Task`s to execute the checks provided in `checks_list`"
334    all_files = [*all_tracked_files()]
335    file_diff = [*list_file_diff()]
336    new_files = [f for (s, f) in file_diff if s == "A"]
337    if run_on_all_files:
338        modified_files = all_files
339    else:
340        modified_files = [f for (s, f) in file_diff if s in ("M", "A")]
341    tasks: List[Task] = []
342    unsupported_checks: List[str] = []
343    for check in checks_list:
344        if fix and not check.can_fix:
345            continue
346        context = CheckContext(
347            fix=fix,
348            all_files=[f for f in all_files if should_run_check_on_file(check, f)],
349            modified_files=[f for f in modified_files if should_run_check_on_file(check, f)],
350            new_files=[f for f in new_files if should_run_check_on_file(check, f)],
351        )
352        if context.modified_files:
353            maybe_commands = check.check_function(context)
354            if maybe_commands is None:
355                unsupported_checks.append(check.name)
356                continue
357            commands_list = maybe_commands if isinstance(maybe_commands, list) else [maybe_commands]
358            title = f"fixing {check.name}" if fix else check.name
359            tasks.append(Task(title, commands_list, check.priority))
360
361    if unsupported_checks:
362        console.print("[yellow]Warning:[/yellow] The following checks cannot be run:")
363        for unsupported_check in unsupported_checks:
364            console.print(f" - {unsupported_check}")
365        console.print()
366        console.print("[green]Tip:[/green] Use the dev container to run presubmits:")
367        console.print()
368        console.print(
369            f"  [blue] $ tools/dev_container tools/presubmit {' '.join(sys.argv[1:])}[/blue]"
370        )
371        console.print()
372
373    if not os.access("/dev/kvm", os.W_OK):
374        console.print("[yellow]Warning:[/yellow] Cannot access KVM. Integration tests are not run.")
375
376    # Sort so that priority tasks are launched (and rendered) first
377    tasks.sort(key=lambda t: (t.priority, t.title), reverse=True)
378    return tasks
379
380
381def run_checks(
382    checks_list: List[Check],
383    fix: bool,
384    run_on_all_files: bool,
385    parallel: bool,
386):
387    """
388    Runs all checks in checks_list.
389
390    Arguments:
391        fix: Run fixes instead of checks on `Check`s that support it.
392        run_on_all_files: Do not use git delta, but run on all files.
393        nightly_fmt: Use nightly version of rust tooling.
394        parallel: Run tasks in parallel.
395    """
396    tasks = generate_plan(checks_list, fix, run_on_all_files)
397    if len(tasks) == 1:
398        parallel = False
399
400    if parallel:
401        execute_tasks_parallel(list(tasks))
402    else:
403        execute_tasks_serial(list(tasks))
404