1#! /usr/bin/env vpython3 2# 3# Copyright 2021 The ANGLE Project Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6# 7# run_perf_test.py: 8# Runs ANGLE perf tests using some statistical averaging. 9 10import argparse 11import contextlib 12import glob 13import importlib 14import io 15import json 16import logging 17import tempfile 18import time 19import os 20import pathlib 21import re 22import subprocess 23import shutil 24import sys 25 26SCRIPT_DIR = str(pathlib.Path(__file__).resolve().parent) 27PY_UTILS = str(pathlib.Path(SCRIPT_DIR) / 'py_utils') 28if PY_UTILS not in sys.path: 29 os.stat(PY_UTILS) and sys.path.insert(0, PY_UTILS) 30import android_helper 31import angle_metrics 32import angle_path_util 33import angle_test_util 34 35angle_path_util.AddDepsDirToPath('testing/scripts') 36import common 37 38angle_path_util.AddDepsDirToPath('third_party/catapult/tracing') 39from tracing.value import histogram 40from tracing.value import histogram_set 41from tracing.value import merge_histograms 42 43DEFAULT_TEST_SUITE = 'angle_perftests' 44DEFAULT_LOG = 'info' 45DEFAULT_SAMPLES = 10 46DEFAULT_TRIALS = 4 47DEFAULT_MAX_ERRORS = 3 48DEFAULT_TRIAL_TIME = 3 49 50# Test expectations 51FAIL = 'FAIL' 52PASS = 'PASS' 53SKIP = 'SKIP' 54 55EXIT_FAILURE = 1 56EXIT_SUCCESS = 0 57 58 59@contextlib.contextmanager 60def temporary_dir(prefix=''): 61 path = tempfile.mkdtemp(prefix=prefix) 62 try: 63 yield path 64 finally: 65 shutil.rmtree(path) 66 67 68def _shard_tests(tests, shard_count, shard_index): 69 return [tests[index] for index in range(shard_index, len(tests), shard_count)] 70 71 72def _get_results_from_output(output, result): 73 m = re.search(r'Running (\d+) tests', output) 74 if m and int(m.group(1)) > 1: 75 raise Exception('Found more than one test result in output') 76 77 # Results are reported in the format: 78 # name_backend.result: story= value units. 79 pattern = r'\.' + result + r':.*= ([0-9.]+)' 80 logging.debug('Searching for %s in output' % pattern) 81 m = re.findall(pattern, output) 82 if not m: 83 logging.warning('Did not find the result "%s" in the test output:\n%s' % (result, output)) 84 return None 85 86 return [float(value) for value in m] 87 88 89def _truncated_list(data, n): 90 """Compute a truncated list, n is truncation size""" 91 if len(data) < n * 2: 92 raise ValueError('list not large enough to truncate') 93 return sorted(data)[n:-n] 94 95 96def _mean(data): 97 """Return the sample arithmetic mean of data.""" 98 n = len(data) 99 if n < 1: 100 raise ValueError('mean requires at least one data point') 101 return float(sum(data)) / float(n) # in Python 2 use sum(data)/float(n) 102 103 104def _sum_of_square_deviations(data, c): 105 """Return sum of square deviations of sequence data.""" 106 ss = sum((float(x) - c)**2 for x in data) 107 return ss 108 109 110def _coefficient_of_variation(data): 111 """Calculates the population coefficient of variation.""" 112 n = len(data) 113 if n < 2: 114 raise ValueError('variance requires at least two data points') 115 c = _mean(data) 116 ss = _sum_of_square_deviations(data, c) 117 pvar = ss / n # the population variance 118 stddev = (pvar**0.5) # population standard deviation 119 return stddev / c 120 121 122def _save_extra_output_files(args, results, histograms, metrics): 123 isolated_out_dir = os.path.dirname(args.isolated_script_test_output) 124 if not os.path.isdir(isolated_out_dir): 125 return 126 benchmark_path = os.path.join(isolated_out_dir, args.test_suite) 127 if not os.path.isdir(benchmark_path): 128 os.makedirs(benchmark_path) 129 test_output_path = os.path.join(benchmark_path, 'test_results.json') 130 results.save_to_json_file(test_output_path) 131 perf_output_path = os.path.join(benchmark_path, 'perf_results.json') 132 logging.info('Saving perf histograms to %s.' % perf_output_path) 133 with open(perf_output_path, 'w') as out_file: 134 out_file.write(json.dumps(histograms.AsDicts(), indent=2)) 135 136 angle_metrics_path = os.path.join(benchmark_path, 'angle_metrics.json') 137 with open(angle_metrics_path, 'w') as f: 138 f.write(json.dumps(metrics, indent=2)) 139 140 # Calling here to catch errors earlier (fail shard instead of merge script) 141 assert angle_metrics.ConvertToSkiaPerf([angle_metrics_path]) 142 143 144class Results: 145 146 def __init__(self, suffix): 147 self._results = { 148 'tests': {}, 149 'interrupted': False, 150 'seconds_since_epoch': time.time(), 151 'path_delimiter': '.', 152 'version': 3, 153 'num_failures_by_type': { 154 FAIL: 0, 155 PASS: 0, 156 SKIP: 0, 157 }, 158 } 159 self._test_results = {} 160 self._suffix = suffix 161 162 def _testname(self, name): 163 return name + self._suffix 164 165 def has_failures(self): 166 return self._results['num_failures_by_type'][FAIL] > 0 167 168 def has_result(self, test): 169 return self._testname(test) in self._test_results 170 171 def result_skip(self, test): 172 self._test_results[self._testname(test)] = {'expected': SKIP, 'actual': SKIP} 173 self._results['num_failures_by_type'][SKIP] += 1 174 175 def result_pass(self, test): 176 self._test_results[self._testname(test)] = {'expected': PASS, 'actual': PASS} 177 self._results['num_failures_by_type'][PASS] += 1 178 179 def result_fail(self, test): 180 self._test_results[self._testname(test)] = { 181 'expected': PASS, 182 'actual': FAIL, 183 'is_unexpected': True 184 } 185 self._results['num_failures_by_type'][FAIL] += 1 186 187 def save_to_output_file(self, test_suite, fname): 188 self._update_results(test_suite) 189 with open(fname, 'w') as out_file: 190 out_file.write(json.dumps(self._results, indent=2)) 191 192 def save_to_json_file(self, fname): 193 logging.info('Saving test results to %s.' % fname) 194 with open(fname, 'w') as out_file: 195 out_file.write(json.dumps(self._results, indent=2)) 196 197 def _update_results(self, test_suite): 198 if self._test_results: 199 self._results['tests'][test_suite] = self._test_results 200 self._test_results = {} 201 202 203def _read_histogram(histogram_file_path): 204 with open(histogram_file_path) as histogram_file: 205 histogram = histogram_set.HistogramSet() 206 histogram.ImportDicts(json.load(histogram_file)) 207 return histogram 208 209 210def _read_metrics(metrics_file_path): 211 try: 212 with open(metrics_file_path) as f: 213 return [json.loads(l) for l in f] 214 except FileNotFoundError: 215 return [] 216 217 218def _merge_into_one_histogram(test_histogram_set): 219 with common.temporary_file() as merge_histogram_path: 220 logging.info('Writing merged histograms to %s.' % merge_histogram_path) 221 with open(merge_histogram_path, 'w') as merge_histogram_file: 222 json.dump(test_histogram_set.AsDicts(), merge_histogram_file) 223 merge_histogram_file.close() 224 merged_dicts = merge_histograms.MergeHistograms(merge_histogram_path, groupby=['name']) 225 merged_histogram = histogram_set.HistogramSet() 226 merged_histogram.ImportDicts(merged_dicts) 227 return merged_histogram 228 229 230def _wall_times_stats(wall_times): 231 if len(wall_times) > 7: 232 truncation_n = len(wall_times) >> 3 233 logging.debug('Truncation: Removing the %d highest and lowest times from wall_times.' % 234 truncation_n) 235 wall_times = _truncated_list(wall_times, truncation_n) 236 237 if len(wall_times) > 1: 238 return ('truncated mean wall_time = %.2f, cov = %.2f%%' % 239 (_mean(wall_times), _coefficient_of_variation(wall_times) * 100.0)) 240 241 return None 242 243 244def _run_test_suite(args, cmd_args, env): 245 return angle_test_util.RunTestSuite( 246 args.test_suite, 247 cmd_args, 248 env, 249 use_xvfb=args.xvfb, 250 show_test_stdout=args.show_test_stdout) 251 252 253def _run_perf(args, common_args, env, steps_per_trial=None): 254 run_args = common_args + [ 255 '--trials', 256 str(args.trials_per_sample), 257 ] 258 259 if steps_per_trial: 260 run_args += ['--steps-per-trial', str(steps_per_trial)] 261 else: 262 run_args += ['--trial-time', str(args.trial_time)] 263 264 if not args.smoke_test_mode: 265 run_args += ['--warmup'] # Render each frame once with glFinish 266 267 if args.perf_counters: 268 run_args += ['--perf-counters', args.perf_counters] 269 270 with temporary_dir() as render_output_dir: 271 histogram_file_path = os.path.join(render_output_dir, 'histogram') 272 run_args += ['--isolated-script-test-perf-output=%s' % histogram_file_path] 273 run_args += ['--render-test-output-dir=%s' % render_output_dir] 274 275 exit_code, output, json_results = _run_test_suite(args, run_args, env) 276 if exit_code != EXIT_SUCCESS: 277 raise RuntimeError('%s failed. Output:\n%s' % (args.test_suite, output)) 278 if SKIP in json_results['num_failures_by_type']: 279 return SKIP, None, None 280 281 # Extract debug files for https://issuetracker.google.com/296921272 282 if args.isolated_script_test_output: 283 isolated_out_dir = os.path.dirname(args.isolated_script_test_output) 284 for path in glob.glob(os.path.join(render_output_dir, '*gzdbg*')): 285 shutil.move(path, isolated_out_dir) 286 287 sample_metrics = _read_metrics(os.path.join(render_output_dir, 'angle_metrics')) 288 289 if sample_metrics: 290 sample_histogram = _read_histogram(histogram_file_path) 291 return PASS, sample_metrics, sample_histogram 292 293 return FAIL, None, None 294 295 296class _MaxErrorsException(Exception): 297 pass 298 299 300def _skipped_or_glmark2(test, test_status): 301 if test_status == SKIP: 302 logging.info('Test skipped by suite: %s' % test) 303 return True 304 305 # GLMark2Benchmark logs .fps/.score instead of our perf metrics. 306 if test.startswith('GLMark2Benchmark.Run/'): 307 logging.info('GLMark2Benchmark missing metrics (as expected, skipping): %s' % test) 308 return True 309 310 return False 311 312 313def _sleep_until_temps_below(limit_temp): 314 while True: 315 max_temp = max(android_helper.GetTemps()) 316 if max_temp < limit_temp: 317 break 318 logging.info('Waiting for device temps below %.1f, curently %.1f', limit_temp, max_temp) 319 time.sleep(10) 320 321 322def _maybe_throttle_or_log_temps(custom_throttling_temp): 323 is_debug = logging.getLogger().isEnabledFor(logging.DEBUG) 324 325 if angle_test_util.IsAndroid(): 326 if custom_throttling_temp: 327 _sleep_until_temps_below(custom_throttling_temp) 328 elif is_debug: 329 android_helper.GetTemps() # calls log.debug 330 elif sys.platform == 'linux' and is_debug: 331 out = subprocess.check_output('cat /sys/class/hwmon/hwmon*/temp*_input', shell=True) 332 logging.debug('hwmon temps: %s', 333 ','.join([str(int(n) // 1000) for n in out.decode().split('\n') if n])) 334 335 336def _run_tests(tests, args, extra_flags, env): 337 if args.split_shard_samples and args.shard_index is not None: 338 test_suffix = Results('_shard%d' % args.shard_index) 339 else: 340 test_suffix = '' 341 342 results = Results(test_suffix) 343 344 histograms = histogram_set.HistogramSet() 345 metrics = [] 346 total_errors = 0 347 prepared_traces = set() 348 349 for test_index in range(len(tests)): 350 if total_errors >= args.max_errors: 351 raise _MaxErrorsException() 352 353 test = tests[test_index] 354 355 if angle_test_util.IsAndroid(): 356 trace = android_helper.GetTraceFromTestName(test) 357 if trace and trace not in prepared_traces: 358 android_helper.PrepareRestrictedTraces([trace]) 359 prepared_traces.add(trace) 360 361 common_args = [ 362 '--gtest_filter=%s' % test, 363 '--verbose', 364 ] + extra_flags 365 366 if args.steps_per_trial: 367 steps_per_trial = args.steps_per_trial 368 trial_limit = 'steps_per_trial=%d' % steps_per_trial 369 else: 370 steps_per_trial = None 371 trial_limit = 'trial_time=%d' % args.trial_time 372 373 logging.info('Test %d/%d: %s (samples=%d trials_per_sample=%d %s)' % 374 (test_index + 1, len(tests), test, args.samples_per_test, 375 args.trials_per_sample, trial_limit)) 376 377 wall_times = [] 378 test_histogram_set = histogram_set.HistogramSet() 379 for sample in range(args.samples_per_test): 380 try: 381 _maybe_throttle_or_log_temps(args.custom_throttling_temp) 382 test_status, sample_metrics, sample_histogram = _run_perf( 383 args, common_args, env, steps_per_trial) 384 except RuntimeError as e: 385 logging.error(e) 386 results.result_fail(test) 387 total_errors += 1 388 break 389 390 if _skipped_or_glmark2(test, test_status): 391 results.result_skip(test) 392 break 393 394 if not sample_metrics: 395 logging.error('Test %s failed to produce a sample output' % test) 396 results.result_fail(test) 397 break 398 399 sample_wall_times = [ 400 float(m['value']) for m in sample_metrics if m['metric'] == '.wall_time' 401 ] 402 403 logging.info('Test %d/%d Sample %d/%d wall_times: %s' % 404 (test_index + 1, len(tests), sample + 1, args.samples_per_test, 405 str(sample_wall_times))) 406 407 if len(sample_wall_times) != args.trials_per_sample: 408 logging.error('Test %s failed to record some wall_times (expected %d, got %d)' % 409 (test, args.trials_per_sample, len(sample_wall_times))) 410 results.result_fail(test) 411 break 412 413 wall_times += sample_wall_times 414 test_histogram_set.Merge(sample_histogram) 415 metrics.append(sample_metrics) 416 417 if not results.has_result(test): 418 assert len(wall_times) == (args.samples_per_test * args.trials_per_sample) 419 stats = _wall_times_stats(wall_times) 420 if stats: 421 logging.info('Test %d/%d: %s: %s' % (test_index + 1, len(tests), test, stats)) 422 histograms.Merge(_merge_into_one_histogram(test_histogram_set)) 423 results.result_pass(test) 424 425 return results, histograms, metrics 426 427 428def _find_test_suite_directory(test_suite): 429 if os.path.exists(angle_test_util.ExecutablePathInCurrentDir(test_suite)): 430 return '.' 431 432 if angle_test_util.IsWindows(): 433 test_suite += '.exe' 434 435 # Find most recent binary in search paths. 436 newest_binary = None 437 newest_mtime = None 438 439 for path in glob.glob('out/*'): 440 binary_path = str(pathlib.Path(SCRIPT_DIR).parent.parent / path / test_suite) 441 if os.path.exists(binary_path): 442 binary_mtime = os.path.getmtime(binary_path) 443 if (newest_binary is None) or (binary_mtime > newest_mtime): 444 newest_binary = binary_path 445 newest_mtime = binary_mtime 446 447 if newest_binary: 448 logging.info('Found %s in %s' % (test_suite, os.path.dirname(newest_binary))) 449 return os.path.dirname(newest_binary) 450 return None 451 452 453def _split_shard_samples(tests, samples_per_test, shard_count, shard_index): 454 test_samples = [(test, sample) for test in tests for sample in range(samples_per_test)] 455 shard_test_samples = _shard_tests(test_samples, shard_count, shard_index) 456 return [test for (test, sample) in shard_test_samples] 457 458 459def _should_lock_gpu_clocks(): 460 if not angle_test_util.IsWindows(): 461 return False 462 463 try: 464 gpu_info = subprocess.check_output( 465 ['nvidia-smi', '--query-gpu=gpu_name', '--format=csv,noheader']).decode() 466 except FileNotFoundError: 467 # expected in some cases, e.g. non-nvidia bots 468 return False 469 470 logging.info('nvidia-smi --query-gpu=gpu_name output: %s' % gpu_info) 471 472 return gpu_info.strip() == 'GeForce GTX 1660' 473 474 475def _log_nvidia_gpu_temperature(): 476 t = subprocess.check_output( 477 ['nvidia-smi', '--query-gpu=temperature.gpu', '--format=csv,noheader']).decode().strip() 478 logging.info('Current GPU temperature: %s ' % t) 479 480 481@contextlib.contextmanager 482def _maybe_lock_gpu_clocks(): 483 if not _should_lock_gpu_clocks(): 484 yield 485 return 486 487 # Lock to 1410Mhz (`nvidia-smi --query-supported-clocks=gr --format=csv`) 488 lgc_out = subprocess.check_output(['nvidia-smi', '--lock-gpu-clocks=1410,1410']).decode() 489 logging.info('Lock GPU clocks output: %s' % lgc_out) 490 _log_nvidia_gpu_temperature() 491 try: 492 yield 493 finally: 494 rgc_out = subprocess.check_output(['nvidia-smi', '--reset-gpu-clocks']).decode() 495 logging.info('Reset GPU clocks output: %s' % rgc_out) 496 _log_nvidia_gpu_temperature() 497 498 499def main(): 500 parser = argparse.ArgumentParser() 501 parser.add_argument('--isolated-script-test-output', type=str) 502 parser.add_argument('--isolated-script-test-perf-output', type=str) 503 parser.add_argument( 504 '-f', '--filter', '--isolated-script-test-filter', type=str, help='Test filter.') 505 suite_group = parser.add_mutually_exclusive_group() 506 suite_group.add_argument( 507 '--test-suite', '--suite', help='Test suite to run.', default=DEFAULT_TEST_SUITE) 508 suite_group.add_argument( 509 '-T', 510 '--trace-tests', 511 help='Run with the angle_trace_tests test suite.', 512 action='store_true') 513 parser.add_argument('--xvfb', help='Use xvfb.', action='store_true') 514 parser.add_argument( 515 '--shard-count', 516 help='Number of shards for test splitting. Default is 1.', 517 type=int, 518 default=1) 519 parser.add_argument( 520 '--shard-index', 521 help='Index of the current shard for test splitting. Default is 0.', 522 type=int, 523 default=0) 524 parser.add_argument( 525 '-l', '--log', help='Log output level. Default is %s.' % DEFAULT_LOG, default=DEFAULT_LOG) 526 parser.add_argument( 527 '-s', 528 '--samples-per-test', 529 help='Number of samples to run per test. Default is %d.' % DEFAULT_SAMPLES, 530 type=int, 531 default=DEFAULT_SAMPLES) 532 parser.add_argument( 533 '-t', 534 '--trials-per-sample', 535 help='Number of trials to run per sample. Default is %d.' % DEFAULT_TRIALS, 536 type=int, 537 default=DEFAULT_TRIALS) 538 trial_group = parser.add_mutually_exclusive_group() 539 trial_group.add_argument( 540 '--steps-per-trial', help='Fixed number of steps to run per trial.', type=int) 541 trial_group.add_argument( 542 '--trial-time', 543 help='Number of seconds to run per trial. Default is %d.' % DEFAULT_TRIAL_TIME, 544 type=int, 545 default=DEFAULT_TRIAL_TIME) 546 parser.add_argument( 547 '--max-errors', 548 help='After this many errors, abort the run. Default is %d.' % DEFAULT_MAX_ERRORS, 549 type=int, 550 default=DEFAULT_MAX_ERRORS) 551 parser.add_argument( 552 '--smoke-test-mode', help='Do a quick run to validate correctness.', action='store_true') 553 parser.add_argument( 554 '--show-test-stdout', help='Prints all test stdout during execution.', action='store_true') 555 parser.add_argument( 556 '--perf-counters', help='Colon-separated list of extra perf counter metrics.') 557 parser.add_argument( 558 '-a', 559 '--auto-dir', 560 help='Run with the most recent test suite found in the build directories.', 561 action='store_true') 562 parser.add_argument( 563 '--split-shard-samples', 564 help='Attempt to mitigate variance between machines by splitting samples between shards.', 565 action='store_true') 566 parser.add_argument( 567 '--custom-throttling-temp', 568 help='Android: custom thermal throttling with limit set to this temperature (off by default)', 569 type=float) 570 571 args, extra_flags = parser.parse_known_args() 572 573 if args.trace_tests: 574 args.test_suite = angle_test_util.ANGLE_TRACE_TEST_SUITE 575 576 angle_test_util.SetupLogging(args.log.upper()) 577 578 start_time = time.time() 579 580 # Use fast execution for smoke test mode. 581 if args.smoke_test_mode: 582 args.steps_per_trial = 1 583 args.trials_per_sample = 1 584 args.samples_per_test = 1 585 586 env = os.environ.copy() 587 588 if angle_test_util.HasGtestShardsAndIndex(env): 589 args.shard_count, args.shard_index = angle_test_util.PopGtestShardsAndIndex(env) 590 591 if args.auto_dir: 592 test_suite_dir = _find_test_suite_directory(args.test_suite) 593 if not test_suite_dir: 594 logging.fatal('Could not find test suite: %s' % args.test_suite) 595 return EXIT_FAILURE 596 else: 597 os.chdir(test_suite_dir) 598 599 angle_test_util.Initialize(args.test_suite) 600 601 # Get test list 602 exit_code, output, _ = _run_test_suite(args, ['--list-tests', '--verbose'] + extra_flags, env) 603 if exit_code != EXIT_SUCCESS: 604 logging.fatal('Could not find test list from test output:\n%s' % output) 605 sys.exit(EXIT_FAILURE) 606 tests = angle_test_util.GetTestsFromOutput(output) 607 608 if args.filter: 609 tests = angle_test_util.FilterTests(tests, args.filter) 610 611 # Get tests for this shard (if using sharding args) 612 if args.split_shard_samples and args.shard_count >= args.samples_per_test: 613 tests = _split_shard_samples(tests, args.samples_per_test, args.shard_count, 614 args.shard_index) 615 assert (len(set(tests)) == len(tests)) 616 args.samples_per_test = 1 617 else: 618 tests = _shard_tests(tests, args.shard_count, args.shard_index) 619 620 if not tests: 621 logging.error('No tests to run.') 622 return EXIT_FAILURE 623 624 logging.info('Running %d test%s' % (len(tests), 's' if len(tests) > 1 else ' ')) 625 626 try: 627 with _maybe_lock_gpu_clocks(): 628 results, histograms, metrics = _run_tests(tests, args, extra_flags, env) 629 except _MaxErrorsException: 630 logging.error('Error count exceeded max errors (%d). Aborting.' % args.max_errors) 631 return EXIT_FAILURE 632 633 for test in tests: 634 assert results.has_result(test) 635 636 if args.isolated_script_test_output: 637 results.save_to_output_file(args.test_suite, args.isolated_script_test_output) 638 639 # Uses special output files to match the merge script. 640 _save_extra_output_files(args, results, histograms, metrics) 641 642 if args.isolated_script_test_perf_output: 643 with open(args.isolated_script_test_perf_output, 'w') as out_file: 644 out_file.write(json.dumps(histograms.AsDicts(), indent=2)) 645 646 end_time = time.time() 647 logging.info('Elapsed time: %.2lf seconds.' % (end_time - start_time)) 648 649 if results.has_failures(): 650 return EXIT_FAILURE 651 return EXIT_SUCCESS 652 653 654if __name__ == '__main__': 655 sys.exit(main()) 656