1#!/bin/sh
2#
3# Copyright (C) 2018 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17#
18# This test script to be used by the build server.
19# It is supposed to be executed from trusty root directory
20# and expects the following environment variables:
21#
22""":" # Shell script (in docstring to appease pylint)
23
24# Find and invoke hermetic python3 interpreter
25. "`dirname $0`/envsetup.sh"; exec "$PY3" "$0" "$@"
26# Shell script end
27Run tests for a project.
28"""
29
30import argparse
31from enum import Enum
32import importlib
33import os
34import re
35import subprocess
36import sys
37import time
38from typing import Optional
39
40from trusty_build_config import PortType, TrustyCompositeTest, TrustyTest
41from trusty_build_config import TrustyAndroidTest, TrustyBuildConfig
42from trusty_build_config import TrustyHostTest, TrustyRebootCommand
43from trusty_build_config import TrustyPrintCommand
44
45
46TEST_STATUS = Enum("TEST_STATUS", ["PASSED", "FAILED", "SKIPPED"])
47
48class TestResult:
49    """Stores results for a single test.
50
51    Attributes:
52        test: Name of the test.
53        status: Test's integer return code, or None if this test was skipped.
54        retried: True if this test was retried.
55    """
56    test: str
57    status: Optional[int]
58    retried: bool
59
60    def __init__(self, test: str, status: Optional[int], retried: bool):
61        self.test = test
62        self.status = status
63        self.retried = retried
64
65    def test_status(self) -> TEST_STATUS:
66        if self.status is None:
67            return TEST_STATUS.SKIPPED
68        return TEST_STATUS.PASSED if self.status == 0 else TEST_STATUS.FAILED
69
70    def failed(self) -> bool:
71        return self.test_status() == TEST_STATUS.FAILED
72
73    def __format__(self, _format_spec: str) -> str:
74        return f"{self.test:s} returned {self.status:d}"
75
76
77class TestResults(object):
78    """Stores test results.
79
80    Attributes:
81        project: Name of project that tests were run on.
82        passed: True if all tests passed, False if one or more tests failed.
83        passed_count: Number of tests passed.
84        failed_count: Number of tests failed.
85        flaked_count: Number of tests that failed then passed on second try.
86        retried_count: Number of tests that were given a second try.
87        test_results: List of tuples storing test name an status.
88    """
89
90    def __init__(self, project):
91        """Inits TestResults with project name and empty test results."""
92        self.project = project
93        self.passed = True
94        self.passed_count = 0
95        self.failed_count = 0
96        self.skipped_count = 0
97        self.flaked_count = 0
98        self.retried_count = 0
99        self.test_results = []
100
101    def add_result(self, result: TestResult):
102        """Add a test result."""
103        self.test_results.append(result)
104        if result.test_status() == TEST_STATUS.PASSED:
105            self.passed_count += 1
106            if result.retried:
107                self.flaked_count += 1
108        elif result.test_status() == TEST_STATUS.FAILED:
109            self.failed_count += 1
110            self.passed = False
111        elif result.test_status() == TEST_STATUS.SKIPPED:
112            self.skipped_count += 1
113
114        if result.retried:
115            self.retried_count += 1
116
117    def print_results(self, print_failed_only=False):
118        """Print test results."""
119        if print_failed_only:
120            if self.passed:
121                return
122            sys.stdout.flush()
123            out = sys.stderr
124        else:
125            out = sys.stdout
126        test_count = self.passed_count + self.failed_count + self.skipped_count
127        test_attempted = self.passed_count + self.failed_count
128        out.write(
129            "\n"
130            f"There were {test_count} defined for project {self.project}.\n"
131            f"{test_attempted} ran and {self.skipped_count} were skipped."
132        )
133        if test_count:
134            for result in self.test_results:
135                match (result.test_status(), result.retried, print_failed_only):
136                    case (TEST_STATUS.FAILED, _, _):
137                        out.write(f"[  FAILED  ] {result.test}\n")
138                    case (TEST_STATUS.SKIPPED, _, False):
139                        out.write(f"[  SKIPPED ] {result.test}\n")
140                    case (TEST_STATUS.PASSED, retried, False):
141                        out.write(f"[       OK ] {result.test}\n")
142                        if retried:
143                            out.write(
144                                f"WARNING: {result.test} was re-run and "
145                                "passed on second try; it may be flaky\n"
146                            )
147
148            out.write(
149                f"[==========] {test_count} tests ran for project "
150                f"{self.project}.\n"
151            )
152            if self.passed_count and not print_failed_only:
153                out.write(f"[  PASSED  ] {self.passed_count} tests.\n")
154            if self.failed_count:
155                out.write(f"[  FAILED  ] {self.failed_count} tests.\n")
156            if self.skipped_count:
157                out.write(f"[  SKIPPED ] {self.skipped_count} tests.\n")
158            if self.flaked_count > 0:
159                out.write(
160                    f"WARNING: {self.flaked_count} tests passed when "
161                    "re-run which indicates that they may be flaky.\n"
162                )
163            if self.retried_count == MAX_RETRIES:
164                out.write(
165                    f"WARNING: hit MAX_RETRIES({MAX_RETRIES}) during "
166                    "testing after which point, no tests were retried.\n"
167                )
168
169
170class MultiProjectTestResults:
171    """Stores results from testing multiple projects.
172
173    Attributes:
174        test_results: List containing the results for each project.
175        failed_projects: List of projects with test failures.
176        tests_passed: Count of test passes across all projects.
177        tests_failed: Count of test failures across all projects.
178        had_passes: Count of all projects with any test passes.
179        had_failures: Count of all projects with any test failures.
180    """
181
182    def __init__(self, test_results: list[TestResults]):
183        self.test_results = test_results
184        self.failed_projects = []
185        self.tests_passed = 0
186        self.tests_failed = 0
187        self.tests_skipped = 0
188        self.had_passes = 0
189        self.had_failures = 0
190        self.had_skip = 0
191
192        for result in self.test_results:
193            if not result.passed:
194                self.failed_projects.append(result.project)
195            self.tests_passed += result.passed_count
196            self.tests_failed += result.failed_count
197            self.tests_skipped += result.skipped_count
198            if result.passed_count:
199                self.had_passes += 1
200            if result.failed_count:
201                self.had_failures += 1
202            if result.skipped_count:
203                self.had_skip += 1
204
205    def print_results(self):
206        """Prints the test results to stdout and stderr."""
207        for test_result in self.test_results:
208            test_result.print_results()
209
210        sys.stdout.write("\n")
211        if self.had_passes:
212            sys.stdout.write(
213                f"[  PASSED  ] {self.tests_passed} tests in "
214                f"{self.had_passes} projects.\n"
215            )
216        if self.had_failures:
217            sys.stdout.write(
218                f"[  FAILED  ] {self.tests_failed} tests in "
219                f"{self.had_failures} projects.\n"
220            )
221            sys.stdout.flush()
222        if self.had_skip:
223            sys.stdout.write(
224                f"[  SKIPPED ] {self.tests_skipped} tests in "
225                f"{self.had_skip} projects.\n"
226            )
227            sys.stdout.flush()
228
229            # Print the failed tests again to stderr as the build server will
230            # store this in a separate file with a direct link from the build
231            # status page. The full build long page on the build server, buffers
232            # stdout and stderr and interleaves them at random. By printing
233            # the summary to both stderr and stdout, we get at least one of them
234            # at the bottom of that file.
235            for test_result in self.test_results:
236                test_result.print_results(print_failed_only=True)
237            sys.stderr.write(
238                f"[  FAILED  ] {self.tests_failed,} tests in "
239                f"{self.had_failures} projects.\n"
240            )
241
242
243def test_should_run(testname: str, test_filters: Optional[list[re.Pattern]]):
244    """Check if test should run.
245
246    Args:
247        testname: Name of test to check.
248        test_filters: Regex list that limits the tests to run.
249
250    Returns:
251        True if test_filters list is empty or None, True if testname matches any
252        regex in test_filters, False otherwise.
253    """
254    if not test_filters:
255        return True
256    for r in test_filters:
257        if r.search(testname):
258            return True
259    return False
260
261
262def projects_to_test(
263    build_config: TrustyBuildConfig,
264    projects: list[str],
265    test_filters: list[re.Pattern],
266    run_disabled_tests: bool = False,
267) -> list[str]:
268    """Checks which projects have any of the specified tests.
269
270    Args:
271        build_config: TrustyBuildConfig object.
272        projects: Names of the projects to search for active tests.
273        test_filters: List that limits the tests to run. Projects without any
274          tests that match a filter will be skipped.
275        run_disabled_tests: Also run disabled tests from config file.
276
277    Returns:
278        A list of projects with tests that should be run
279    """
280
281    def has_test(name: str):
282        project = build_config.get_project(name)
283        for test in project.tests:
284            if not test.enabled and not run_disabled_tests:
285                continue
286            if test_should_run(test.name, test_filters):
287                return True
288        return False
289
290    return [project for project in projects if has_test(project)]
291
292
293# Put a global cap on the number of retries to detect flaky tests such that we
294# do not risk increasing the time to try all tests substantially. This should be
295# fine since *most* tests are not flaky.
296# TODO: would it be better to put a cap on the time spent retrying tests? We may
297#       not want to retry long running tests.
298MAX_RETRIES = 10
299
300
301def run_tests(
302    build_config: TrustyBuildConfig,
303    root: os.PathLike,
304    project: str,
305    qemu_instance_id: Optional[str],
306    run_disabled_tests: bool = False,
307    test_filters: Optional[list[re.Pattern]] = None,
308    verbose: bool = False,
309    debug_on_error: bool = False,
310    emulator: bool = True,
311) -> TestResults:
312    """Run tests for a project.
313
314    Args:
315        build_config: TrustyBuildConfig object.
316        root: Trusty build root output directory.
317        project: Project name.
318        qemu_instance_id: name of the QEmu instance to use. If the instance
319            doesn't already exist, a new fresh instance will be created. If
320            None, use the default instance.
321        run_disabled_tests: Also run disabled tests from config file.
322        test_filters: Optional list that limits the tests to run.
323        verbose: Enable debug output.
324        debug_on_error: Wait for debugger connection on errors.
325
326    Returns:
327        TestResults object listing overall and detailed test results.
328    """
329    project_config = build_config.get_project(project=project)
330    project_root = f"{root}/build-{project}"
331
332    test_results = TestResults(project)
333    test_env = None
334    test_runner = None
335
336    if not qemu_instance_id:
337        qemu_instance_id = "default"
338    qemu_instance_dir = f"{project_root}/qemu-instances/{qemu_instance_id}"
339
340    def load_test_environment():
341        sys.path.append(project_root)
342        try:
343            if run := sys.modules.get("run"):
344                if not run.__file__.startswith(project_root):
345                    # Reload qemu and its dependencies because run.py uses them
346                    # We do this in topological sort order
347                    if qemu_error := sys.modules.get("qemu_error"):
348                        importlib.reload(qemu_error)
349                    if qemu_options := sys.modules.get("qemu_options"):
350                        importlib.reload(qemu_options)
351                    if qemu := sys.modules.get("qemu"):
352                        importlib.reload(qemu)
353
354                    # run module was imported for another project and needs
355                    # to be replaced with the one for the current project.
356                    run = importlib.reload(run)
357            else:
358                # first import in this interpreter instance, we use importlib
359                # rather than a regular import statement since it avoids
360                # linter warnings.
361                run = importlib.import_module("run")
362            sys.path.pop()
363        except ImportError:
364            return None
365
366        return run
367
368    def print_test_command(name, cmd: Optional[list[str]] = None):
369        print()
370        print("Running", name, "on", test_results.project)
371        if cmd:
372            print(
373                "Command line:", " ".join([s.replace(" ", "\\ ") for s in cmd])
374            )
375        sys.stdout.flush()
376
377    def run_test(
378        test, parent_test: Optional[TrustyCompositeTest] = None, retry=True
379    ) -> Optional[TestResult]:
380        """Execute a single test and print out helpful information
381
382        Returns:
383            The results of running this test, or None for non-tests, like
384            reboots or tests that don't work in this environment.
385        """
386        nonlocal test_env, test_runner
387        cmd = test.command[1:]
388        disable_rpmb = True if "--disable_rpmb" in cmd else None
389
390        test_start_time = time.time()
391
392        if not emulator and not isinstance(test, TrustyHostTest):
393            return None
394
395        match test:
396            case TrustyHostTest():
397                # append nice and expand path to command
398                cmd = ["nice", f"{project_root}/{test.command[0]}"] + cmd
399                print_test_command(test.name, cmd)
400                cmd_status = subprocess.call(cmd)
401                result = TestResult(test.name, cmd_status, False)
402            case TrustyCompositeTest():
403                status_code: Optional[int] = 0
404                for subtest in test.sequence:
405                    subtest_result = run_test(subtest, test, retry)
406                    if subtest_result and subtest_result.failed():
407                        status_code = subtest_result.status
408                        # fail the composite test with the same status code as
409                        # the first failing subtest
410                        break
411                result = TestResult(test.name, status_code, False)
412
413            case TrustyTest():
414                # Benchmark runs on QEMU are meaningless and take a lot of
415                # CI time. One can still run the bootport test manually
416                # if desired
417                if test.port_type == PortType.BENCHMARK:
418                    return TestResult(test.name, None, False)
419                else:
420                    if isinstance(test, TrustyAndroidTest):
421                        print_test_command(test.name, [test.shell_command])
422                    else:
423                        # port tests are identified by their port name,
424                        # no command
425                        print_test_command(test.name)
426
427                    if not test_env:
428                        test_env = load_test_environment()
429                    if test_env:
430                        if not test_runner:
431                            test_runner = test_env.init(
432                                android=build_config.android,
433                                instance_dir=qemu_instance_dir,
434                                disable_rpmb=disable_rpmb,
435                                verbose=verbose,
436                                debug_on_error=debug_on_error,
437                            )
438                        cmd_status = test_env.run_test(test_runner, cmd)
439                        result = TestResult(test.name, cmd_status, False)
440                    else:
441                        return TestResult(test.name, None, False)
442            case TrustyRebootCommand() if parent_test:
443                assert isinstance(parent_test, TrustyCompositeTest)
444                if test_env:
445                    test_env.shutdown(test_runner, test.mode.factory_reset(),
446                                      full_wipe=test.mode.full_wipe())
447                    test_runner = None
448                    print(f"Shutting down to {test.mode} test environment on "
449                          f"{test_results.project}")
450                # return early so we do not report the time to reboot or try to
451                # add the reboot command to test results.
452                return None
453            case TrustyRebootCommand():
454                raise RuntimeError(
455                    "Reboot may only be used inside compositetest"
456                )
457            case TrustyPrintCommand() if parent_test:
458                print(test.msg())
459                return None
460            case TrustyPrintCommand():
461                raise RuntimeError(
462                    "Print may only be used inside compositetest"
463                )
464            case _:
465                raise NotImplementedError(f"Don't know how to run {test.name}")
466
467        elapsed = time.time() - test_start_time
468        print( f"{result} after {elapsed:.3f} seconds")
469
470        can_retry = retry and test_results.retried_count < MAX_RETRIES
471        if result and result.failed() and can_retry:
472            print(
473                f"retrying potentially flaky test {test.name} on",
474                test_results.project,
475            )
476            # TODO: first retry the test without restarting the test
477            #       environment and if that fails, restart and then
478            #       retry if < MAX_RETRIES.
479            if test_env:
480                test_env.shutdown(test_runner)
481                test_runner = None
482            retried_result = run_test(test, parent_test, retry=False)
483            # Know this is the kind of test that returns a status b/c it failed
484            assert retried_result is not None
485            retried_result.retried = True
486            return retried_result
487        else:
488            # Test passed, was skipped, or we're not retrying it.
489            return result
490
491    # the retry mechanism is intended to allow a batch run of all tests to pass
492    # even if a small handful of tests exhibit flaky behavior. If a test filter
493    # was provided or debug on error is set, we are most likely not doing a
494    # batch run (as is the case for presubmit testing) meaning that it is
495    # not all that helpful to retry failing tests vs. finishing the run faster.
496    retry = test_filters is None and not debug_on_error
497    try:
498        for test in project_config.tests:
499            if not test.enabled and not run_disabled_tests:
500                continue
501            if not test_should_run(test.name, test_filters):
502                continue
503
504            if result := run_test(test, None, retry):
505                test_results.add_result(result)
506    finally:
507        # finally is used here to make sure that we attempt to shutdown the
508        # test environment no matter whether an exception was raised or not
509        # and no matter what kind of test caused an exception to be raised.
510        if test_env:
511            test_env.shutdown(test_runner)
512        # any saved exception from the try block will be re-raised here
513
514    return test_results
515
516
517def test_projects(
518    build_config: TrustyBuildConfig,
519    root: os.PathLike,
520    projects: list[str],
521    qemu_instance_id: Optional[str] = None,
522    run_disabled_tests: bool = False,
523    test_filters: Optional[list[re.Pattern]] = None,
524    verbose: bool = False,
525    debug_on_error: bool = False,
526    emulator: bool = True,
527) -> MultiProjectTestResults:
528    """Run tests for multiple project.
529
530    Args:
531        build_config: TrustyBuildConfig object.
532        root: Trusty build root output directory.
533        projects: Names of the projects to run tests for.
534        qemu_instance_id: name of the QEmu instance to use. If the instance
535            doesn't already exist, a new fresh instance will be created. If
536            None, use the default instance.
537        run_disabled_tests: Also run disabled tests from config file.
538        test_filters: Optional list that limits the tests to run. Projects
539          without any tests that match a filter will be skipped.
540        verbose: Enable debug output.
541        debug_on_error: Wait for debugger connection on errors.
542
543    Returns:
544        MultiProjectTestResults listing overall and detailed test results.
545    """
546    if test_filters:
547        projects = projects_to_test(
548            build_config,
549            projects,
550            test_filters,
551            run_disabled_tests=run_disabled_tests,
552        )
553
554    results = []
555    for project in projects:
556        results.append(
557            run_tests(
558                build_config,
559                root,
560                project,
561                qemu_instance_id=qemu_instance_id,
562                run_disabled_tests=run_disabled_tests,
563                test_filters=test_filters,
564                verbose=verbose,
565                debug_on_error=debug_on_error,
566                emulator=emulator,
567            )
568        )
569    return MultiProjectTestResults(results)
570
571
572def default_root() -> str:
573    script_dir = os.path.dirname(os.path.abspath(__file__))
574    top = os.path.abspath(os.path.join(script_dir, "../../../../.."))
575    return os.path.join(top, "build-root")
576
577
578def main():
579    parser = argparse.ArgumentParser()
580    parser.add_argument(
581        "project", type=str, nargs="+", help="Project(s) to test."
582    )
583    parser.add_argument(
584        "--instance-id",
585        type=str,
586        default=None,
587        help=("ID of a QEmu instance to use for the tests. A fresh instance "
588              "will be created if no instance with this ID already exists."
589              "'default' will be used if no value is provided.")
590    )
591    parser.add_argument(
592        "--build-root",
593        type=str,
594        default=default_root(),
595        help="Root of intermediate build directory.",
596    )
597    parser.add_argument(
598        "--run_disabled_tests",
599        help="Also run disabled tests from config file.",
600        action="store_true",
601    )
602    parser.add_argument(
603        "--test",
604        type=str,
605        action="append",
606        help="Only run tests that match the provided regexes.",
607    )
608    parser.add_argument(
609        "--verbose", help="Enable debug output.", action="store_true"
610    )
611    parser.add_argument(
612        "--debug_on_error",
613        help="Wait for debugger connection on errors.",
614        action="store_true",
615    )
616    parser.add_argument(
617        "--android",
618        type=str,
619        help="Path to an Android build to run tests against.",
620    )
621    args = parser.parse_args()
622
623    build_config = TrustyBuildConfig(android=args.android)
624
625    test_filters = (
626        [re.compile(test) for test in args.test] if args.test else None
627    )
628    test_results = test_projects(
629        build_config,
630        args.build_root,
631        args.project,
632        qemu_instance_id=args.instance_id,
633        run_disabled_tests=args.run_disabled_tests,
634        test_filters=test_filters,
635        verbose=args.verbose,
636        debug_on_error=args.debug_on_error,
637    )
638    test_results.print_results()
639
640    if test_results.failed_projects:
641        sys.exit(1)
642
643
644if __name__ == "__main__":
645    main()
646