xref: /aosp_15_r20/external/angle/src/tests/run_perf_tests.py (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1#! /usr/bin/env vpython3
2#
3# Copyright 2021 The ANGLE Project Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6#
7# run_perf_test.py:
8#   Runs ANGLE perf tests using some statistical averaging.
9
10import argparse
11import contextlib
12import glob
13import importlib
14import io
15import json
16import logging
17import tempfile
18import time
19import os
20import pathlib
21import re
22import subprocess
23import shutil
24import sys
25
26SCRIPT_DIR = str(pathlib.Path(__file__).resolve().parent)
27PY_UTILS = str(pathlib.Path(SCRIPT_DIR) / 'py_utils')
28if PY_UTILS not in sys.path:
29    os.stat(PY_UTILS) and sys.path.insert(0, PY_UTILS)
30import android_helper
31import angle_metrics
32import angle_path_util
33import angle_test_util
34
35angle_path_util.AddDepsDirToPath('testing/scripts')
36import common
37
38angle_path_util.AddDepsDirToPath('third_party/catapult/tracing')
39from tracing.value import histogram
40from tracing.value import histogram_set
41from tracing.value import merge_histograms
42
43DEFAULT_TEST_SUITE = 'angle_perftests'
44DEFAULT_LOG = 'info'
45DEFAULT_SAMPLES = 10
46DEFAULT_TRIALS = 4
47DEFAULT_MAX_ERRORS = 3
48DEFAULT_TRIAL_TIME = 3
49
50# Test expectations
51FAIL = 'FAIL'
52PASS = 'PASS'
53SKIP = 'SKIP'
54
55EXIT_FAILURE = 1
56EXIT_SUCCESS = 0
57
58
59@contextlib.contextmanager
60def temporary_dir(prefix=''):
61    path = tempfile.mkdtemp(prefix=prefix)
62    try:
63        yield path
64    finally:
65        shutil.rmtree(path)
66
67
68def _shard_tests(tests, shard_count, shard_index):
69    return [tests[index] for index in range(shard_index, len(tests), shard_count)]
70
71
72def _get_results_from_output(output, result):
73    m = re.search(r'Running (\d+) tests', output)
74    if m and int(m.group(1)) > 1:
75        raise Exception('Found more than one test result in output')
76
77    # Results are reported in the format:
78    # name_backend.result: story= value units.
79    pattern = r'\.' + result + r':.*= ([0-9.]+)'
80    logging.debug('Searching for %s in output' % pattern)
81    m = re.findall(pattern, output)
82    if not m:
83        logging.warning('Did not find the result "%s" in the test output:\n%s' % (result, output))
84        return None
85
86    return [float(value) for value in m]
87
88
89def _truncated_list(data, n):
90    """Compute a truncated list, n is truncation size"""
91    if len(data) < n * 2:
92        raise ValueError('list not large enough to truncate')
93    return sorted(data)[n:-n]
94
95
96def _mean(data):
97    """Return the sample arithmetic mean of data."""
98    n = len(data)
99    if n < 1:
100        raise ValueError('mean requires at least one data point')
101    return float(sum(data)) / float(n)  # in Python 2 use sum(data)/float(n)
102
103
104def _sum_of_square_deviations(data, c):
105    """Return sum of square deviations of sequence data."""
106    ss = sum((float(x) - c)**2 for x in data)
107    return ss
108
109
110def _coefficient_of_variation(data):
111    """Calculates the population coefficient of variation."""
112    n = len(data)
113    if n < 2:
114        raise ValueError('variance requires at least two data points')
115    c = _mean(data)
116    ss = _sum_of_square_deviations(data, c)
117    pvar = ss / n  # the population variance
118    stddev = (pvar**0.5)  # population standard deviation
119    return stddev / c
120
121
122def _save_extra_output_files(args, results, histograms, metrics):
123    isolated_out_dir = os.path.dirname(args.isolated_script_test_output)
124    if not os.path.isdir(isolated_out_dir):
125        return
126    benchmark_path = os.path.join(isolated_out_dir, args.test_suite)
127    if not os.path.isdir(benchmark_path):
128        os.makedirs(benchmark_path)
129    test_output_path = os.path.join(benchmark_path, 'test_results.json')
130    results.save_to_json_file(test_output_path)
131    perf_output_path = os.path.join(benchmark_path, 'perf_results.json')
132    logging.info('Saving perf histograms to %s.' % perf_output_path)
133    with open(perf_output_path, 'w') as out_file:
134        out_file.write(json.dumps(histograms.AsDicts(), indent=2))
135
136    angle_metrics_path = os.path.join(benchmark_path, 'angle_metrics.json')
137    with open(angle_metrics_path, 'w') as f:
138        f.write(json.dumps(metrics, indent=2))
139
140    # Calling here to catch errors earlier (fail shard instead of merge script)
141    assert angle_metrics.ConvertToSkiaPerf([angle_metrics_path])
142
143
144class Results:
145
146    def __init__(self, suffix):
147        self._results = {
148            'tests': {},
149            'interrupted': False,
150            'seconds_since_epoch': time.time(),
151            'path_delimiter': '.',
152            'version': 3,
153            'num_failures_by_type': {
154                FAIL: 0,
155                PASS: 0,
156                SKIP: 0,
157            },
158        }
159        self._test_results = {}
160        self._suffix = suffix
161
162    def _testname(self, name):
163        return name + self._suffix
164
165    def has_failures(self):
166        return self._results['num_failures_by_type'][FAIL] > 0
167
168    def has_result(self, test):
169        return self._testname(test) in self._test_results
170
171    def result_skip(self, test):
172        self._test_results[self._testname(test)] = {'expected': SKIP, 'actual': SKIP}
173        self._results['num_failures_by_type'][SKIP] += 1
174
175    def result_pass(self, test):
176        self._test_results[self._testname(test)] = {'expected': PASS, 'actual': PASS}
177        self._results['num_failures_by_type'][PASS] += 1
178
179    def result_fail(self, test):
180        self._test_results[self._testname(test)] = {
181            'expected': PASS,
182            'actual': FAIL,
183            'is_unexpected': True
184        }
185        self._results['num_failures_by_type'][FAIL] += 1
186
187    def save_to_output_file(self, test_suite, fname):
188        self._update_results(test_suite)
189        with open(fname, 'w') as out_file:
190            out_file.write(json.dumps(self._results, indent=2))
191
192    def save_to_json_file(self, fname):
193        logging.info('Saving test results to %s.' % fname)
194        with open(fname, 'w') as out_file:
195            out_file.write(json.dumps(self._results, indent=2))
196
197    def _update_results(self, test_suite):
198        if self._test_results:
199            self._results['tests'][test_suite] = self._test_results
200            self._test_results = {}
201
202
203def _read_histogram(histogram_file_path):
204    with open(histogram_file_path) as histogram_file:
205        histogram = histogram_set.HistogramSet()
206        histogram.ImportDicts(json.load(histogram_file))
207        return histogram
208
209
210def _read_metrics(metrics_file_path):
211    try:
212        with open(metrics_file_path) as f:
213            return [json.loads(l) for l in f]
214    except FileNotFoundError:
215        return []
216
217
218def _merge_into_one_histogram(test_histogram_set):
219    with common.temporary_file() as merge_histogram_path:
220        logging.info('Writing merged histograms to %s.' % merge_histogram_path)
221        with open(merge_histogram_path, 'w') as merge_histogram_file:
222            json.dump(test_histogram_set.AsDicts(), merge_histogram_file)
223            merge_histogram_file.close()
224        merged_dicts = merge_histograms.MergeHistograms(merge_histogram_path, groupby=['name'])
225        merged_histogram = histogram_set.HistogramSet()
226        merged_histogram.ImportDicts(merged_dicts)
227        return merged_histogram
228
229
230def _wall_times_stats(wall_times):
231    if len(wall_times) > 7:
232        truncation_n = len(wall_times) >> 3
233        logging.debug('Truncation: Removing the %d highest and lowest times from wall_times.' %
234                      truncation_n)
235        wall_times = _truncated_list(wall_times, truncation_n)
236
237    if len(wall_times) > 1:
238        return ('truncated mean wall_time = %.2f, cov = %.2f%%' %
239                (_mean(wall_times), _coefficient_of_variation(wall_times) * 100.0))
240
241    return None
242
243
244def _run_test_suite(args, cmd_args, env):
245    return angle_test_util.RunTestSuite(
246        args.test_suite,
247        cmd_args,
248        env,
249        use_xvfb=args.xvfb,
250        show_test_stdout=args.show_test_stdout)
251
252
253def _run_perf(args, common_args, env, steps_per_trial=None):
254    run_args = common_args + [
255        '--trials',
256        str(args.trials_per_sample),
257    ]
258
259    if steps_per_trial:
260        run_args += ['--steps-per-trial', str(steps_per_trial)]
261    else:
262        run_args += ['--trial-time', str(args.trial_time)]
263
264    if not args.smoke_test_mode:
265        run_args += ['--warmup']  # Render each frame once with glFinish
266
267    if args.perf_counters:
268        run_args += ['--perf-counters', args.perf_counters]
269
270    with temporary_dir() as render_output_dir:
271        histogram_file_path = os.path.join(render_output_dir, 'histogram')
272        run_args += ['--isolated-script-test-perf-output=%s' % histogram_file_path]
273        run_args += ['--render-test-output-dir=%s' % render_output_dir]
274
275        exit_code, output, json_results = _run_test_suite(args, run_args, env)
276        if exit_code != EXIT_SUCCESS:
277            raise RuntimeError('%s failed. Output:\n%s' % (args.test_suite, output))
278        if SKIP in json_results['num_failures_by_type']:
279            return SKIP, None, None
280
281        # Extract debug files for https://issuetracker.google.com/296921272
282        if args.isolated_script_test_output:
283            isolated_out_dir = os.path.dirname(args.isolated_script_test_output)
284            for path in glob.glob(os.path.join(render_output_dir, '*gzdbg*')):
285                shutil.move(path, isolated_out_dir)
286
287        sample_metrics = _read_metrics(os.path.join(render_output_dir, 'angle_metrics'))
288
289        if sample_metrics:
290            sample_histogram = _read_histogram(histogram_file_path)
291            return PASS, sample_metrics, sample_histogram
292
293    return FAIL, None, None
294
295
296class _MaxErrorsException(Exception):
297    pass
298
299
300def _skipped_or_glmark2(test, test_status):
301    if test_status == SKIP:
302        logging.info('Test skipped by suite: %s' % test)
303        return True
304
305    # GLMark2Benchmark logs .fps/.score instead of our perf metrics.
306    if test.startswith('GLMark2Benchmark.Run/'):
307        logging.info('GLMark2Benchmark missing metrics (as expected, skipping): %s' % test)
308        return True
309
310    return False
311
312
313def _sleep_until_temps_below(limit_temp):
314    while True:
315        max_temp = max(android_helper.GetTemps())
316        if max_temp < limit_temp:
317            break
318        logging.info('Waiting for device temps below %.1f, curently %.1f', limit_temp, max_temp)
319        time.sleep(10)
320
321
322def _maybe_throttle_or_log_temps(custom_throttling_temp):
323    is_debug = logging.getLogger().isEnabledFor(logging.DEBUG)
324
325    if angle_test_util.IsAndroid():
326        if custom_throttling_temp:
327            _sleep_until_temps_below(custom_throttling_temp)
328        elif is_debug:
329            android_helper.GetTemps()  # calls log.debug
330    elif sys.platform == 'linux' and is_debug:
331        out = subprocess.check_output('cat /sys/class/hwmon/hwmon*/temp*_input', shell=True)
332        logging.debug('hwmon temps: %s',
333                      ','.join([str(int(n) // 1000) for n in out.decode().split('\n') if n]))
334
335
336def _run_tests(tests, args, extra_flags, env):
337    if args.split_shard_samples and args.shard_index is not None:
338        test_suffix = Results('_shard%d' % args.shard_index)
339    else:
340        test_suffix = ''
341
342    results = Results(test_suffix)
343
344    histograms = histogram_set.HistogramSet()
345    metrics = []
346    total_errors = 0
347    prepared_traces = set()
348
349    for test_index in range(len(tests)):
350        if total_errors >= args.max_errors:
351            raise _MaxErrorsException()
352
353        test = tests[test_index]
354
355        if angle_test_util.IsAndroid():
356            trace = android_helper.GetTraceFromTestName(test)
357            if trace and trace not in prepared_traces:
358                android_helper.PrepareRestrictedTraces([trace])
359                prepared_traces.add(trace)
360
361        common_args = [
362            '--gtest_filter=%s' % test,
363            '--verbose',
364        ] + extra_flags
365
366        if args.steps_per_trial:
367            steps_per_trial = args.steps_per_trial
368            trial_limit = 'steps_per_trial=%d' % steps_per_trial
369        else:
370            steps_per_trial = None
371            trial_limit = 'trial_time=%d' % args.trial_time
372
373        logging.info('Test %d/%d: %s (samples=%d trials_per_sample=%d %s)' %
374                     (test_index + 1, len(tests), test, args.samples_per_test,
375                      args.trials_per_sample, trial_limit))
376
377        wall_times = []
378        test_histogram_set = histogram_set.HistogramSet()
379        for sample in range(args.samples_per_test):
380            try:
381                _maybe_throttle_or_log_temps(args.custom_throttling_temp)
382                test_status, sample_metrics, sample_histogram = _run_perf(
383                    args, common_args, env, steps_per_trial)
384            except RuntimeError as e:
385                logging.error(e)
386                results.result_fail(test)
387                total_errors += 1
388                break
389
390            if _skipped_or_glmark2(test, test_status):
391                results.result_skip(test)
392                break
393
394            if not sample_metrics:
395                logging.error('Test %s failed to produce a sample output' % test)
396                results.result_fail(test)
397                break
398
399            sample_wall_times = [
400                float(m['value']) for m in sample_metrics if m['metric'] == '.wall_time'
401            ]
402
403            logging.info('Test %d/%d Sample %d/%d wall_times: %s' %
404                         (test_index + 1, len(tests), sample + 1, args.samples_per_test,
405                          str(sample_wall_times)))
406
407            if len(sample_wall_times) != args.trials_per_sample:
408                logging.error('Test %s failed to record some wall_times (expected %d, got %d)' %
409                              (test, args.trials_per_sample, len(sample_wall_times)))
410                results.result_fail(test)
411                break
412
413            wall_times += sample_wall_times
414            test_histogram_set.Merge(sample_histogram)
415            metrics.append(sample_metrics)
416
417        if not results.has_result(test):
418            assert len(wall_times) == (args.samples_per_test * args.trials_per_sample)
419            stats = _wall_times_stats(wall_times)
420            if stats:
421                logging.info('Test %d/%d: %s: %s' % (test_index + 1, len(tests), test, stats))
422            histograms.Merge(_merge_into_one_histogram(test_histogram_set))
423            results.result_pass(test)
424
425    return results, histograms, metrics
426
427
428def _find_test_suite_directory(test_suite):
429    if os.path.exists(angle_test_util.ExecutablePathInCurrentDir(test_suite)):
430        return '.'
431
432    if angle_test_util.IsWindows():
433        test_suite += '.exe'
434
435    # Find most recent binary in search paths.
436    newest_binary = None
437    newest_mtime = None
438
439    for path in glob.glob('out/*'):
440        binary_path = str(pathlib.Path(SCRIPT_DIR).parent.parent / path / test_suite)
441        if os.path.exists(binary_path):
442            binary_mtime = os.path.getmtime(binary_path)
443            if (newest_binary is None) or (binary_mtime > newest_mtime):
444                newest_binary = binary_path
445                newest_mtime = binary_mtime
446
447    if newest_binary:
448        logging.info('Found %s in %s' % (test_suite, os.path.dirname(newest_binary)))
449        return os.path.dirname(newest_binary)
450    return None
451
452
453def _split_shard_samples(tests, samples_per_test, shard_count, shard_index):
454    test_samples = [(test, sample) for test in tests for sample in range(samples_per_test)]
455    shard_test_samples = _shard_tests(test_samples, shard_count, shard_index)
456    return [test for (test, sample) in shard_test_samples]
457
458
459def _should_lock_gpu_clocks():
460    if not angle_test_util.IsWindows():
461        return False
462
463    try:
464        gpu_info = subprocess.check_output(
465            ['nvidia-smi', '--query-gpu=gpu_name', '--format=csv,noheader']).decode()
466    except FileNotFoundError:
467        # expected in some cases, e.g. non-nvidia bots
468        return False
469
470    logging.info('nvidia-smi --query-gpu=gpu_name output: %s' % gpu_info)
471
472    return gpu_info.strip() == 'GeForce GTX 1660'
473
474
475def _log_nvidia_gpu_temperature():
476    t = subprocess.check_output(
477        ['nvidia-smi', '--query-gpu=temperature.gpu', '--format=csv,noheader']).decode().strip()
478    logging.info('Current GPU temperature: %s ' % t)
479
480
481@contextlib.contextmanager
482def _maybe_lock_gpu_clocks():
483    if not _should_lock_gpu_clocks():
484        yield
485        return
486
487    # Lock to 1410Mhz (`nvidia-smi --query-supported-clocks=gr --format=csv`)
488    lgc_out = subprocess.check_output(['nvidia-smi', '--lock-gpu-clocks=1410,1410']).decode()
489    logging.info('Lock GPU clocks output: %s' % lgc_out)
490    _log_nvidia_gpu_temperature()
491    try:
492        yield
493    finally:
494        rgc_out = subprocess.check_output(['nvidia-smi', '--reset-gpu-clocks']).decode()
495        logging.info('Reset GPU clocks output: %s' % rgc_out)
496        _log_nvidia_gpu_temperature()
497
498
499def main():
500    parser = argparse.ArgumentParser()
501    parser.add_argument('--isolated-script-test-output', type=str)
502    parser.add_argument('--isolated-script-test-perf-output', type=str)
503    parser.add_argument(
504        '-f', '--filter', '--isolated-script-test-filter', type=str, help='Test filter.')
505    suite_group = parser.add_mutually_exclusive_group()
506    suite_group.add_argument(
507        '--test-suite', '--suite', help='Test suite to run.', default=DEFAULT_TEST_SUITE)
508    suite_group.add_argument(
509        '-T',
510        '--trace-tests',
511        help='Run with the angle_trace_tests test suite.',
512        action='store_true')
513    parser.add_argument('--xvfb', help='Use xvfb.', action='store_true')
514    parser.add_argument(
515        '--shard-count',
516        help='Number of shards for test splitting. Default is 1.',
517        type=int,
518        default=1)
519    parser.add_argument(
520        '--shard-index',
521        help='Index of the current shard for test splitting. Default is 0.',
522        type=int,
523        default=0)
524    parser.add_argument(
525        '-l', '--log', help='Log output level. Default is %s.' % DEFAULT_LOG, default=DEFAULT_LOG)
526    parser.add_argument(
527        '-s',
528        '--samples-per-test',
529        help='Number of samples to run per test. Default is %d.' % DEFAULT_SAMPLES,
530        type=int,
531        default=DEFAULT_SAMPLES)
532    parser.add_argument(
533        '-t',
534        '--trials-per-sample',
535        help='Number of trials to run per sample. Default is %d.' % DEFAULT_TRIALS,
536        type=int,
537        default=DEFAULT_TRIALS)
538    trial_group = parser.add_mutually_exclusive_group()
539    trial_group.add_argument(
540        '--steps-per-trial', help='Fixed number of steps to run per trial.', type=int)
541    trial_group.add_argument(
542        '--trial-time',
543        help='Number of seconds to run per trial. Default is %d.' % DEFAULT_TRIAL_TIME,
544        type=int,
545        default=DEFAULT_TRIAL_TIME)
546    parser.add_argument(
547        '--max-errors',
548        help='After this many errors, abort the run. Default is %d.' % DEFAULT_MAX_ERRORS,
549        type=int,
550        default=DEFAULT_MAX_ERRORS)
551    parser.add_argument(
552        '--smoke-test-mode', help='Do a quick run to validate correctness.', action='store_true')
553    parser.add_argument(
554        '--show-test-stdout', help='Prints all test stdout during execution.', action='store_true')
555    parser.add_argument(
556        '--perf-counters', help='Colon-separated list of extra perf counter metrics.')
557    parser.add_argument(
558        '-a',
559        '--auto-dir',
560        help='Run with the most recent test suite found in the build directories.',
561        action='store_true')
562    parser.add_argument(
563        '--split-shard-samples',
564        help='Attempt to mitigate variance between machines by splitting samples between shards.',
565        action='store_true')
566    parser.add_argument(
567        '--custom-throttling-temp',
568        help='Android: custom thermal throttling with limit set to this temperature (off by default)',
569        type=float)
570
571    args, extra_flags = parser.parse_known_args()
572
573    if args.trace_tests:
574        args.test_suite = angle_test_util.ANGLE_TRACE_TEST_SUITE
575
576    angle_test_util.SetupLogging(args.log.upper())
577
578    start_time = time.time()
579
580    # Use fast execution for smoke test mode.
581    if args.smoke_test_mode:
582        args.steps_per_trial = 1
583        args.trials_per_sample = 1
584        args.samples_per_test = 1
585
586    env = os.environ.copy()
587
588    if angle_test_util.HasGtestShardsAndIndex(env):
589        args.shard_count, args.shard_index = angle_test_util.PopGtestShardsAndIndex(env)
590
591    if args.auto_dir:
592        test_suite_dir = _find_test_suite_directory(args.test_suite)
593        if not test_suite_dir:
594            logging.fatal('Could not find test suite: %s' % args.test_suite)
595            return EXIT_FAILURE
596        else:
597            os.chdir(test_suite_dir)
598
599    angle_test_util.Initialize(args.test_suite)
600
601    # Get test list
602    exit_code, output, _ = _run_test_suite(args, ['--list-tests', '--verbose'] + extra_flags, env)
603    if exit_code != EXIT_SUCCESS:
604        logging.fatal('Could not find test list from test output:\n%s' % output)
605        sys.exit(EXIT_FAILURE)
606    tests = angle_test_util.GetTestsFromOutput(output)
607
608    if args.filter:
609        tests = angle_test_util.FilterTests(tests, args.filter)
610
611    # Get tests for this shard (if using sharding args)
612    if args.split_shard_samples and args.shard_count >= args.samples_per_test:
613        tests = _split_shard_samples(tests, args.samples_per_test, args.shard_count,
614                                     args.shard_index)
615        assert (len(set(tests)) == len(tests))
616        args.samples_per_test = 1
617    else:
618        tests = _shard_tests(tests, args.shard_count, args.shard_index)
619
620    if not tests:
621        logging.error('No tests to run.')
622        return EXIT_FAILURE
623
624    logging.info('Running %d test%s' % (len(tests), 's' if len(tests) > 1 else ' '))
625
626    try:
627        with _maybe_lock_gpu_clocks():
628            results, histograms, metrics = _run_tests(tests, args, extra_flags, env)
629    except _MaxErrorsException:
630        logging.error('Error count exceeded max errors (%d). Aborting.' % args.max_errors)
631        return EXIT_FAILURE
632
633    for test in tests:
634        assert results.has_result(test)
635
636    if args.isolated_script_test_output:
637        results.save_to_output_file(args.test_suite, args.isolated_script_test_output)
638
639        # Uses special output files to match the merge script.
640        _save_extra_output_files(args, results, histograms, metrics)
641
642    if args.isolated_script_test_perf_output:
643        with open(args.isolated_script_test_perf_output, 'w') as out_file:
644            out_file.write(json.dumps(histograms.AsDicts(), indent=2))
645
646    end_time = time.time()
647    logging.info('Elapsed time: %.2lf seconds.' % (end_time - start_time))
648
649    if results.has_failures():
650        return EXIT_FAILURE
651    return EXIT_SUCCESS
652
653
654if __name__ == '__main__':
655    sys.exit(main())
656