toolchain-utils/crosperf/generate_report.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2016 The ChromiumOS Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Given a specially-formatted JSON object, generates results report(s).

The JSON object should look like:
{"data": BenchmarkData, "platforms": BenchmarkPlatforms}

BenchmarkPlatforms is a [str], each of which names a platform the benchmark
  was run on (e.g. peppy, shamu, ...). Note that the order of this list is
  related with the order of items in BenchmarkData.

BenchmarkData is a {str: [PlatformData]}. The str is the name of the benchmark,
and a PlatformData is a set of data for a given platform. There must be one
PlatformData for each benchmark, for each element in BenchmarkPlatforms.

A PlatformData is a [{str: float}], where each str names a metric we recorded,
and the float is the value for that metric. Each element is considered to be
the metrics collected from an independent run of this benchmark. NOTE: Each
PlatformData is expected to have a "retval" key, with the return value of
the benchmark. If the benchmark is successful, said return value should be 0.
Otherwise, this will break some of our JSON functionality.

Putting it all together, a JSON object will end up looking like:
  { "platforms": ["peppy", "peppy-new-crosstool"],
    "data": {
      "bench_draw_line": [
        [{"time (ms)": 1.321, "memory (mb)": 128.1, "retval": 0},
         {"time (ms)": 1.920, "memory (mb)": 128.4, "retval": 0}],
        [{"time (ms)": 1.221, "memory (mb)": 124.3, "retval": 0},
         {"time (ms)": 1.423, "memory (mb)": 123.9, "retval": 0}]
      ]
    }
  }

Which says that we ran a benchmark on platforms named peppy, and
  peppy-new-crosstool.
We ran one benchmark, named bench_draw_line.
It was run twice on each platform.
Peppy's runs took 1.321ms and 1.920ms, while peppy-new-crosstool's took 1.221ms
  and 1.423ms. None of the runs failed to complete.
"""


import argparse
import functools
import json
import os
import sys
import traceback

from results_report import BenchmarkResults
from results_report import HTMLResultsReport
from results_report import JSONResultsReport
from results_report import TextResultsReport


def CountBenchmarks(benchmark_runs):
    """Counts the number of iterations for each benchmark in benchmark_runs."""

    # Example input for benchmark_runs:
    # {"bench": [[run1, run2, run3], [run1, run2, run3, run4]]}
    def _MaxLen(results):
        return 0 if not results else max(len(r) for r in results)

    return [
        (name, _MaxLen(results)) for name, results in benchmark_runs.items()
    ]


def CutResultsInPlace(results, max_keys=50, complain_on_update=True):
    """Limits the given benchmark results to max_keys keys in-place.

    This takes the `data` field from the benchmark input, and mutates each
    benchmark run to contain `max_keys` elements (ignoring special elements, like
    "retval"). At the moment, it just selects the first `max_keys` keyvals,
    alphabetically.

    If complain_on_update is true, this will print a message noting that a
    truncation occurred.

    This returns the `results` object that was passed in, for convenience.

    e.g.
    >>> benchmark_data = {
    ...   "bench_draw_line": [
    ...     [{"time (ms)": 1.321, "memory (mb)": 128.1, "retval": 0},
    ...      {"time (ms)": 1.920, "memory (mb)": 128.4, "retval": 0}],
    ...     [{"time (ms)": 1.221, "memory (mb)": 124.3, "retval": 0},
    ...      {"time (ms)": 1.423, "memory (mb)": 123.9, "retval": 0}]
    ...   ]
    ... }
    >>> CutResultsInPlace(benchmark_data, max_keys=1, complain_on_update=False)
    {
      'bench_draw_line': [
        [{'memory (mb)': 128.1, 'retval': 0},
         {'memory (mb)': 128.4, 'retval': 0}],
        [{'memory (mb)': 124.3, 'retval': 0},
         {'memory (mb)': 123.9, 'retval': 0}]
      ]
    }
    """
    actually_updated = False
    for bench_results in results.values():
        for platform_results in bench_results:
            for i, result in enumerate(platform_results):
                # Keep the keys that come earliest when sorted alphabetically.
                # Forcing alphabetical order is arbitrary, but necessary; otherwise,
                # the keyvals we'd emit would depend on our iteration order through a
                # map.
                removable_keys = sorted(k for k in result if k != "retval")
                retained_keys = removable_keys[:max_keys]
                platform_results[i] = {k: result[k] for k in retained_keys}
                # retval needs to be passed through all of the time.
                retval = result.get("retval")
                if retval is not None:
                    platform_results[i]["retval"] = retval
                actually_updated = actually_updated or len(
                    retained_keys
                ) != len(removable_keys)

    if actually_updated and complain_on_update:
        print(
            "Warning: Some benchmark keyvals have been truncated.",
            file=sys.stderr,
        )
    return results


def _PositiveInt(s):
    i = int(s)
    if i < 0:
        raise argparse.ArgumentTypeError("%d is not a positive integer." % (i,))
    return i


def _AccumulateActions(args):
    """Given program arguments, determines what actions we want to run.

    Returns [(ResultsReportCtor, str)], where ResultsReportCtor can construct a
    ResultsReport, and the str is the file extension for the given report.
    """
    results = []
    # The order of these is arbitrary.
    if args.json:
        results.append((JSONResultsReport, "json"))
    if args.text:
        results.append((TextResultsReport, "txt"))
    if args.email:
        email_ctor = functools.partial(TextResultsReport, email=True)
        results.append((email_ctor, "email"))
    # We emit HTML if nothing else was specified.
    if args.html or not results:
        results.append((HTMLResultsReport, "html"))
    return results


# Note: get_contents is a function, because it may be expensive (generating some
# HTML reports takes O(seconds) on my machine, depending on the size of the
# input data).
def WriteFile(output_prefix, extension, get_contents, overwrite, verbose):
    """Writes `contents` to a file named "${output_prefix}.${extension}".

    get_contents should be a zero-args function that returns a string (of the
    contents to write).
    If output_prefix == '-', this writes to stdout.
    If overwrite is False, this will not overwrite files.
    """
    if output_prefix == "-":
        if verbose:
            print("Writing %s report to stdout" % (extension,), file=sys.stderr)
        sys.stdout.write(get_contents())
        return

    file_name = "%s.%s" % (output_prefix, extension)
    if not overwrite and os.path.exists(file_name):
        raise IOError(
            "Refusing to write %s -- it already exists" % (file_name,)
        )

    with open(file_name, "w") as out_file:
        if verbose:
            print(
                "Writing %s report to %s" % (extension, file_name),
                file=sys.stderr,
            )
        out_file.write(get_contents())


def RunActions(actions, benchmark_results, output_prefix, overwrite, verbose):
    """Runs `actions`, returning True if all succeeded."""
    failed = False

    report_ctor = None  # Make the linter happy
    for report_ctor, extension in actions:
        try:
            get_contents = lambda: report_ctor(benchmark_results).GetReport()
            WriteFile(
                output_prefix, extension, get_contents, overwrite, verbose
            )
        except Exception:
            # Complain and move along; we may have more actions that might complete
            # successfully.
            failed = True
            traceback.print_exc()
    return not failed


def PickInputFile(input_name):
    """Given program arguments, returns file to read for benchmark input."""
    return sys.stdin if input_name == "-" else open(input_name)


def _NoPerfReport(_label_name, _benchmark_name, _benchmark_iteration):
    return {}


def _ParseArgs(argv):
    parser = argparse.ArgumentParser(
        description="Turns JSON into results " "report(s)."
    )
    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        help="Be a tiny bit more verbose.",
    )
    parser.add_argument(
        "-f",
        "--force",
        action="store_true",
        help="Overwrite existing results files.",
    )
    parser.add_argument(
        "-o",
        "--output",
        default="report",
        type=str,
        help="Prefix of the output filename (default: report). "
        "- means stdout.",
    )
    parser.add_argument(
        "-i",
        "--input",
        required=True,
        type=str,
        help="Where to read the JSON from. - means stdin.",
    )
    parser.add_argument(
        "-l",
        "--statistic-limit",
        default=0,
        type=_PositiveInt,
        help="The maximum number of benchmark statistics to "
        "display from a single run. 0 implies unlimited.",
    )
    parser.add_argument(
        "--json", action="store_true", help="Output a JSON report."
    )
    parser.add_argument(
        "--text", action="store_true", help="Output a text report."
    )
    parser.add_argument(
        "--email",
        action="store_true",
        help="Output a text report suitable for email.",
    )
    parser.add_argument(
        "--html",
        action="store_true",
        help="Output an HTML report (this is the default if no "
        "other output format is specified).",
    )
    return parser.parse_args(argv)


def Main(argv):
    args = _ParseArgs(argv)
    with PickInputFile(args.input) as in_file:
        raw_results = json.load(in_file)

    platform_names = raw_results["platforms"]
    results = raw_results["data"]
    if args.statistic_limit:
        results = CutResultsInPlace(results, max_keys=args.statistic_limit)
    benches = CountBenchmarks(results)
    # In crosperf, a label is essentially a platform+configuration. So, a name of
    # a label and a name of a platform are equivalent for our purposes.
    bench_results = BenchmarkResults(
        label_names=platform_names,
        benchmark_names_and_iterations=benches,
        run_keyvals=results,
        read_perf_report=_NoPerfReport,
    )
    actions = _AccumulateActions(args)
    ok = RunActions(
        actions, bench_results, args.output, args.force, args.verbose
    )
    return 0 if ok else 1


if __name__ == "__main__":
    sys.exit(Main(sys.argv[1:]))