toolchain-utils/crosperf/results_organizer.py

*760c253cSXin Li# -*- coding: utf-8 -*-
*760c253cSXin Li# Copyright 2013 The ChromiumOS Authors
*760c253cSXin Li# Use of this source code is governed by a BSD-style license that can be
*760c253cSXin Li# found in the LICENSE file.
*760c253cSXin Li
*760c253cSXin Li"""Parse data from benchmark_runs for tabulator."""
*760c253cSXin Li
*760c253cSXin Li
*760c253cSXin Liimport errno
*760c253cSXin Liimport json
*760c253cSXin Liimport os
*760c253cSXin Liimport re
*760c253cSXin Liimport sys
*760c253cSXin Li
*760c253cSXin Lifrom cros_utils import misc
*760c253cSXin Li
*760c253cSXin Li
*760c253cSXin Li_TELEMETRY_RESULT_DEFAULTS_FILE = "default-telemetry-results.json"
*760c253cSXin Li_DUP_KEY_REGEX = re.compile(r"(\w+)\{(\d+)\}")
*760c253cSXin Li
*760c253cSXin Li
*760c253cSXin Lidef _AdjustIteration(benchmarks, max_dup, bench):
*760c253cSXin Li    """Adjust the interation numbers if they have keys like ABCD{i}."""
*760c253cSXin Li    for benchmark in benchmarks:
*760c253cSXin Li        if benchmark.name != bench or benchmark.iteration_adjusted:
*760c253cSXin Li            continue
*760c253cSXin Li        benchmark.iteration_adjusted = True
*760c253cSXin Li        benchmark.iterations *= max_dup + 1
*760c253cSXin Li
*760c253cSXin Li
*760c253cSXin Lidef _GetMaxDup(data):
*760c253cSXin Li    """Find the maximum i inside ABCD{i}.
*760c253cSXin Li
*760c253cSXin Li    data should be a [[[Key]]], where Key is a string that may look like
*760c253cSXin Li    ABCD{i}.
*760c253cSXin Li    """
*760c253cSXin Li    max_dup = 0
*760c253cSXin Li    for label in data:
*760c253cSXin Li        for run in label:
*760c253cSXin Li            for key in run:
*760c253cSXin Li                match = _DUP_KEY_REGEX.match(key)
*760c253cSXin Li                if match:
*760c253cSXin Li                    max_dup = max(max_dup, int(match.group(2)))
*760c253cSXin Li    return max_dup
*760c253cSXin Li
*760c253cSXin Li
*760c253cSXin Lidef _Repeat(func, times):
*760c253cSXin Li    """Returns the result of running func() n times."""
*760c253cSXin Li    return [func() for _ in range(times)]
*760c253cSXin Li
*760c253cSXin Li
*760c253cSXin Lidef _DictWithReturnValues(retval, pass_fail):
*760c253cSXin Li    """Create a new dictionary pre-populated with success/fail values."""
*760c253cSXin Li    new_dict = {}
*760c253cSXin Li    # Note: 0 is a valid retval; test to make sure it's not None.
*760c253cSXin Li    if retval is not None:
*760c253cSXin Li        new_dict["retval"] = retval
*760c253cSXin Li    if pass_fail:
*760c253cSXin Li        new_dict[""] = pass_fail
*760c253cSXin Li    return new_dict
*760c253cSXin Li
*760c253cSXin Li
*760c253cSXin Lidef _GetNonDupLabel(max_dup, runs):
*760c253cSXin Li    """Create new list for the runs of the same label.
*760c253cSXin Li
*760c253cSXin Li    Specifically, this will split out keys like foo{0}, foo{1} from one run into
*760c253cSXin Li    their own runs. For example, given a run like:
*760c253cSXin Li      {"foo": 1, "bar{0}": 2, "baz": 3, "qux{1}": 4, "pirate{0}": 5}
*760c253cSXin Li
*760c253cSXin Li    You'll get:
*760c253cSXin Li      [{"foo": 1, "baz": 3}, {"bar": 2, "pirate": 5}, {"qux": 4}]
*760c253cSXin Li
*760c253cSXin Li    Hands back the lists of transformed runs, all concatenated together.
*760c253cSXin Li    """
*760c253cSXin Li    new_runs = []
*760c253cSXin Li    for run in runs:
*760c253cSXin Li        run_retval = run.get("retval", None)
*760c253cSXin Li        run_pass_fail = run.get("", None)
*760c253cSXin Li        new_run = {}
*760c253cSXin Li        # pylint: disable=cell-var-from-loop
*760c253cSXin Li        added_runs = _Repeat(
*760c253cSXin Li            lambda: _DictWithReturnValues(run_retval, run_pass_fail), max_dup
*760c253cSXin Li        )
*760c253cSXin Li        for key, value in run.items():
*760c253cSXin Li            match = _DUP_KEY_REGEX.match(key)
*760c253cSXin Li            if not match:
*760c253cSXin Li                new_run[key] = value
*760c253cSXin Li            else:
*760c253cSXin Li                new_key, index_str = match.groups()
*760c253cSXin Li                added_runs[int(index_str) - 1][new_key] = str(value)
*760c253cSXin Li        new_runs.append(new_run)
*760c253cSXin Li        new_runs += added_runs
*760c253cSXin Li    return new_runs
*760c253cSXin Li
*760c253cSXin Li
*760c253cSXin Lidef _DuplicatePass(result, benchmarks):
*760c253cSXin Li    """Properly expands keys like `foo{1}` in `result`."""
*760c253cSXin Li    for bench, data in result.items():
*760c253cSXin Li        max_dup = _GetMaxDup(data)
*760c253cSXin Li        # If there's nothing to expand, there's nothing to do.
*760c253cSXin Li        if not max_dup:
*760c253cSXin Li            continue
*760c253cSXin Li        for i, runs in enumerate(data):
*760c253cSXin Li            data[i] = _GetNonDupLabel(max_dup, runs)
*760c253cSXin Li        _AdjustIteration(benchmarks, max_dup, bench)
*760c253cSXin Li
*760c253cSXin Li
*760c253cSXin Lidef _ReadSummaryFile(filename):
*760c253cSXin Li    """Reads the summary file at filename."""
*760c253cSXin Li    dirname, _ = misc.GetRoot(filename)
*760c253cSXin Li    fullname = os.path.join(dirname, _TELEMETRY_RESULT_DEFAULTS_FILE)
*760c253cSXin Li    try:
*760c253cSXin Li        # Slurp the summary file into a dictionary. The keys in the dictionary are
*760c253cSXin Li        # the benchmark names. The value for a key is a list containing the names
*760c253cSXin Li        # of all the result fields that should be returned in a 'default' report.
*760c253cSXin Li        with open(fullname) as in_file:
*760c253cSXin Li            return json.load(in_file)
*760c253cSXin Li    except IOError as e:
*760c253cSXin Li        # ENOENT means "no such file or directory"
*760c253cSXin Li        if e.errno == errno.ENOENT:
*760c253cSXin Li            return {}
*760c253cSXin Li        raise
*760c253cSXin Li
*760c253cSXin Li
*760c253cSXin Lidef _MakeOrganizeResultOutline(benchmark_runs, labels):
*760c253cSXin Li    """Creates the "outline" of the OrganizeResults result for a set of runs.
*760c253cSXin Li
*760c253cSXin Li    Report generation returns lists of different sizes, depending on the input
*760c253cSXin Li    data. Depending on the order in which we iterate through said input data, we
*760c253cSXin Li    may populate the Nth index of a list, then the N-1st, then the N+Mth, ...
*760c253cSXin Li
*760c253cSXin Li    It's cleaner to figure out the "skeleton"/"outline" ahead of time, so we don't
*760c253cSXin Li    have to worry about resizing while computing results.
*760c253cSXin Li    """
*760c253cSXin Li    # Count how many iterations exist for each benchmark run.
*760c253cSXin Li    # We can't simply count up, since we may be given an incomplete set of
*760c253cSXin Li    # iterations (e.g. [r.iteration for r in benchmark_runs] == [1, 3])
*760c253cSXin Li    iteration_count = {}
*760c253cSXin Li    for run in benchmark_runs:
*760c253cSXin Li        name = run.benchmark.name
*760c253cSXin Li        old_iterations = iteration_count.get(name, -1)
*760c253cSXin Li        # N.B. run.iteration starts at 1, not 0.
*760c253cSXin Li        iteration_count[name] = max(old_iterations, run.iteration)
*760c253cSXin Li
*760c253cSXin Li    # Result structure: {benchmark_name: [[{key: val}]]}
*760c253cSXin Li    result = {}
*760c253cSXin Li    for run in benchmark_runs:
*760c253cSXin Li        name = run.benchmark.name
*760c253cSXin Li        num_iterations = iteration_count[name]
*760c253cSXin Li        # default param makes cros lint be quiet about defining num_iterations in a
*760c253cSXin Li        # loop.
*760c253cSXin Li        make_dicts = lambda n=num_iterations: _Repeat(dict, n)
*760c253cSXin Li        result[name] = _Repeat(make_dicts, len(labels))
*760c253cSXin Li    return result
*760c253cSXin Li
*760c253cSXin Li
*760c253cSXin Lidef OrganizeResults(benchmark_runs, labels, benchmarks=None, json_report=False):
*760c253cSXin Li    """Create a dict from benchmark_runs.
*760c253cSXin Li
*760c253cSXin Li    The structure of the output dict is as follows:
*760c253cSXin Li    {"benchmark_1":[
*760c253cSXin Li      [{"key1":"v1", "key2":"v2"},{"key1":"v1", "key2","v2"}]
*760c253cSXin Li      #one label
*760c253cSXin Li      []
*760c253cSXin Li      #the other label
*760c253cSXin Li      ]
*760c253cSXin Li     "benchmark_2":
*760c253cSXin Li      [
*760c253cSXin Li      ]}.
*760c253cSXin Li    """
*760c253cSXin Li    result = _MakeOrganizeResultOutline(benchmark_runs, labels)
*760c253cSXin Li    label_names = [label.name for label in labels]
*760c253cSXin Li    label_indices = {name: i for i, name in enumerate(label_names)}
*760c253cSXin Li    summary_file = _ReadSummaryFile(sys.argv[0])
*760c253cSXin Li
*760c253cSXin Li    if benchmarks is None:
*760c253cSXin Li        benchmarks = []
*760c253cSXin Li
*760c253cSXin Li    for benchmark_run in benchmark_runs:
*760c253cSXin Li        if not benchmark_run.result:
*760c253cSXin Li            continue
*760c253cSXin Li        benchmark = benchmark_run.benchmark
*760c253cSXin Li        label_index = label_indices[benchmark_run.label.name]
*760c253cSXin Li        cur_label_list = result[benchmark.name][label_index]
*760c253cSXin Li        cur_dict = cur_label_list[benchmark_run.iteration - 1]
*760c253cSXin Li
*760c253cSXin Li        show_all_results = json_report or benchmark.show_all_results
*760c253cSXin Li        if not show_all_results:
*760c253cSXin Li            summary_list = summary_file.get(benchmark.name)
*760c253cSXin Li            if summary_list:
*760c253cSXin Li                for key in benchmark_run.result.keyvals.keys():
*760c253cSXin Li                    if any(
*760c253cSXin Li                        key.startswith(added_key)
*760c253cSXin Li                        for added_key in ["retval", "cpufreq", "cputemp"]
*760c253cSXin Li                    ):
*760c253cSXin Li                        summary_list.append(key)
*760c253cSXin Li            else:
*760c253cSXin Li                # Did not find test_name in json file; show everything.
*760c253cSXin Li                show_all_results = True
*760c253cSXin Li        if benchmark_run.result.cwp_dso:
*760c253cSXin Li            # If we are in cwp approximation mode, we only care about samples
*760c253cSXin Li            if "samples" in benchmark_run.result.keyvals:
*760c253cSXin Li                cur_dict["samples"] = benchmark_run.result.keyvals["samples"]
*760c253cSXin Li            cur_dict["retval"] = benchmark_run.result.keyvals["retval"]
*760c253cSXin Li            for key, value in benchmark_run.result.keyvals.items():
*760c253cSXin Li                if any(
*760c253cSXin Li                    key.startswith(cpustat_keyword)
*760c253cSXin Li                    for cpustat_keyword in ["cpufreq", "cputemp"]
*760c253cSXin Li                ):
*760c253cSXin Li                    cur_dict[key] = value
*760c253cSXin Li        else:
*760c253cSXin Li            for test_key in benchmark_run.result.keyvals:
*760c253cSXin Li                if show_all_results or test_key in summary_list:
*760c253cSXin Li                    cur_dict[test_key] = benchmark_run.result.keyvals[test_key]
*760c253cSXin Li        # Occasionally Telemetry tests will not fail but they will not return a
*760c253cSXin Li        # result, either.  Look for those cases, and force them to be a fail.
*760c253cSXin Li        # (This can happen if, for example, the test has been disabled.)
*760c253cSXin Li        if len(cur_dict) == 1 and cur_dict["retval"] == 0:
*760c253cSXin Li            cur_dict["retval"] = 1
*760c253cSXin Li            benchmark_run.result.keyvals["retval"] = 1
*760c253cSXin Li            # TODO: This output should be sent via logger.
*760c253cSXin Li            print(
*760c253cSXin Li                "WARNING: Test '%s' appears to have succeeded but returned"
*760c253cSXin Li                " no results." % benchmark.name,
*760c253cSXin Li                file=sys.stderr,
*760c253cSXin Li            )
*760c253cSXin Li        if json_report and benchmark_run.machine:
*760c253cSXin Li            cur_dict["machine"] = benchmark_run.machine.name
*760c253cSXin Li            cur_dict["machine_checksum"] = benchmark_run.machine.checksum
*760c253cSXin Li            cur_dict["machine_string"] = benchmark_run.machine.checksum_string
*760c253cSXin Li    _DuplicatePass(result, benchmarks)
*760c253cSXin Li    return result