xref: /aosp_15_r20/external/cronet/testing/scripts/run_performance_tests.py (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1#!/usr/bin/env vpython3
2# Copyright 2017 The Chromium Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Runs telemetry benchmarks and gtest perf tests.
7
8If optional argument --isolated-script-test-output=[FILENAME] is passed
9to the script, json is written to that file in the format detailed in
10//docs/testing/json-test-results-format.md.
11
12If optional argument --isolated-script-test-filter=[TEST_NAMES] is passed to
13the script, it should be a  double-colon-separated ("::") list of test names,
14to run just that subset of tests.
15
16This script is intended to be the base command invoked by the isolate,
17followed by a subsequent Python script. It could be generalized to
18invoke an arbitrary executable.
19It currently runs several benchmarks. The benchmarks it will execute are
20based on the shard it is running on and the sharding_map_path.
21
22If this is executed with a gtest perf test, the flag --non-telemetry
23has to be passed in to the script so the script knows it is running
24an executable and not the run_benchmark command.
25
26This script merges test results from all the benchmarks into the one
27output.json file. The test results and perf results are also put in separate
28directories per benchmark. Two files will be present in each directory;
29perf_results.json, which is the perf specific results (with unenforced format,
30could be histogram or graph json), and test_results.json.
31
32TESTING:
33To test changes to this script, please run
34cd tools/perf
35./run_tests ScriptsSmokeTest.testRunPerformanceTests
36"""
37
38from __future__ import print_function
39
40import argparse
41import json
42import os
43import requests
44import shutil
45import sys
46import time
47import tempfile
48import traceback
49import six
50
51from collections import OrderedDict
52
53CHROMIUM_SRC_DIR = os.path.abspath(
54    os.path.join(os.path.dirname(__file__),
55                 os.path.pardir, os.path.pardir))
56
57PERF_DIR = os.path.join(CHROMIUM_SRC_DIR, 'tools', 'perf')
58sys.path.append(PERF_DIR)
59import generate_legacy_perf_dashboard_json
60from core import path_util
61
62PERF_CORE_DIR = os.path.join(PERF_DIR, 'core')
63sys.path.append(PERF_CORE_DIR)
64import results_merger
65
66# Add src/testing/ into sys.path for importing xvfb, test_env, and common.
67sys.path.append(
68    os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))
69import xvfb
70import test_env
71from scripts import common
72
73SHARD_MAPS_DIRECTORY = os.path.abspath(
74    os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir,
75                 'tools', 'perf', 'core', 'shard_maps'))
76
77# See https://crbug.com/923564.
78# We want to switch over to using histograms for everything, but converting from
79# the format output by gtest perf tests to histograms has introduced several
80# problems. So, only perform the conversion on tests that are whitelisted and
81# are okay with potentially encountering issues.
82GTEST_CONVERSION_WHITELIST = [
83  'angle_perftests',
84  'base_perftests',
85  'blink_heap_perftests',
86  'blink_platform_perftests',
87  'cc_perftests',
88  'components_perftests',
89  'command_buffer_perftests',
90  'dawn_perf_tests',
91  'gpu_perftests',
92  'load_library_perf_tests',
93  'net_perftests',
94  'browser_tests',
95  'services_perftests',
96  # TODO(jmadill): Remove once migrated. http://anglebug.com/5124
97  'standalone_angle_perftests',
98  'sync_performance_tests',
99  'tracing_perftests',
100  'views_perftests',
101  'viz_perftests',
102  'wayland_client_perftests',
103  'xr.vr.common_perftests',
104]
105
106# pylint: disable=useless-object-inheritance
107
108
109class OutputFilePaths(object):
110  """Provide paths to where results outputs should be written.
111
112  The process_perf_results.py merge script later will pull all of these
113  together, so that's why they aren't in the standard locations. Also,
114  note that because of the OBBS (One Build Bot Step), Telemetry
115  has multiple tests running on a single shard, so we need to prefix
116  these locations with a directory named by the benchmark name.
117  """
118
119  def __init__(self, isolated_out_dir, perf_test_name):
120    self.name = perf_test_name
121    self.benchmark_path = os.path.join(isolated_out_dir, perf_test_name)
122
123  def SetUp(self):
124    os.makedirs(self.benchmark_path)
125    return self
126
127  @property
128  def perf_results(self):
129    return os.path.join(self.benchmark_path, 'perf_results.json')
130
131  @property
132  def test_results(self):
133    return os.path.join(self.benchmark_path, 'test_results.json')
134
135  @property
136  def logs(self):
137    return os.path.join(self.benchmark_path, 'benchmark_log.txt')
138
139  @property
140  def csv_perf_results(self):
141    """Path for csv perf results.
142
143    Note that the chrome.perf waterfall uses the json histogram perf results
144    exclusively. csv_perf_results are implemented here in case a user script
145    passes --output-format=csv.
146    """
147    return os.path.join(self.benchmark_path, 'perf_results.csv')
148
149
150def print_duration(step, start):
151  print('Duration of %s: %d seconds' % (step, time.time() - start))
152
153
154def IsWindows():
155  return sys.platform == 'cygwin' or sys.platform.startswith('win')
156
157
158class GtestCommandGenerator(object):
159  def __init__(self, options, override_executable=None, additional_flags=None,
160               ignore_shard_env_vars=False):
161    self._options = options
162    self._override_executable = override_executable
163    self._additional_flags = additional_flags or []
164    self._ignore_shard_env_vars = ignore_shard_env_vars
165
166  def generate(self, output_dir):
167    """Generate the command to run to start the gtest perf test.
168
169    Returns:
170      list of strings, the executable and its arguments.
171    """
172    return ([self._get_executable()] +
173            self._generate_filter_args() +
174            self._generate_repeat_args() +
175            self._generate_also_run_disabled_tests_args() +
176            self._generate_output_args(output_dir) +
177            self._generate_shard_args() +
178            self._get_additional_flags()
179           )
180
181  @property
182  def executable_name(self):
183    """Gets the platform-independent name of the executable."""
184    return self._override_executable or self._options.executable
185
186  def _get_executable(self):
187    executable = str(self.executable_name)
188    if IsWindows():
189      return r'.\%s.exe' % executable
190    return './%s' % executable
191
192  def _get_additional_flags(self):
193    return self._additional_flags
194
195  def _generate_shard_args(self):
196    """Teach the gtest to ignore the environment variables.
197
198    GTEST_SHARD_INDEX and GTEST_TOTAL_SHARDS will confuse the gtest
199    and convince it to only run some of its tests. Instead run all
200    of them.
201    """
202    if self._ignore_shard_env_vars:
203      return ['--test-launcher-total-shards=1', '--test-launcher-shard-index=0']
204    return []
205
206  def _generate_filter_args(self):
207    if self._options.isolated_script_test_filter:
208      filter_list = common.extract_filter_list(
209        self._options.isolated_script_test_filter)
210      return ['--gtest_filter=' + ':'.join(filter_list)]
211    return []
212
213  def _generate_repeat_args(self):
214    # TODO(crbug.com/920002): Support --isolated-script-test-repeat.
215    return []
216
217  def _generate_also_run_disabled_tests_args(self):
218    # TODO(crbug.com/920002): Support
219    # --isolated-script-test-also-run-disabled-tests.
220    return []
221
222  def _generate_output_args(self, output_dir):
223    output_args = []
224    if self._options.use_gtest_benchmark_script:
225      output_args.append('--output-dir=' + output_dir)
226    # These flags are to make sure that test output perf metrics in the log.
227    if not '--verbose' in self._get_additional_flags():
228      output_args.append('--verbose')
229    if (not '--test-launcher-print-test-stdio=always'
230        in self._get_additional_flags()):
231      output_args.append('--test-launcher-print-test-stdio=always')
232    return output_args
233
234
235def write_simple_test_results(return_code, output_filepath, benchmark_name):
236  # TODO(crbug.com/1115658): Fix to output
237  # https://chromium.googlesource.com/chromium/src/+/main/docs/testing/json_test_results_format.md
238  # for each test rather than this summary.
239  # Append the shard index to the end of the name so that the merge script
240  # doesn't blow up trying to merge unmergeable results.
241  benchmark_name += '_shard_%s' % os.environ.get('GTEST_SHARD_INDEX', '0')
242  output_json = {
243      'tests': {
244          benchmark_name: {
245              'expected': 'PASS',
246              'actual': 'FAIL' if return_code else 'PASS',
247              'is_unexpected': bool(return_code),
248          },
249      },
250      'interrupted': False,
251      'path_delimiter': '/',
252      'version': 3,
253      'seconds_since_epoch': time.time(),
254      'num_failures_by_type': {
255          'FAIL': 1 if return_code else 0,
256          'PASS': 0 if return_code else 1,
257      },
258  }
259  with open(output_filepath, 'w') as fh:
260    json.dump(output_json, fh)
261
262
263def upload_simple_test_results(return_code, benchmark_name):
264  # TODO(crbug.com/1115658): Fix to upload results for each test rather than
265  # this summary.
266  try:
267    with open(os.environ['LUCI_CONTEXT']) as f:
268      sink = json.load(f)['result_sink']
269  except KeyError:
270    return
271
272  if return_code:
273    summary = '<p>Benchmark failed with status code %d</p>' % return_code
274  else:
275    summary = '<p>Benchmark passed</p>'
276
277  result_json = {
278      'testResults': [{
279        'testId': benchmark_name,
280        'expected': not return_code,
281        'status': 'FAIL' if return_code else 'PASS',
282        'summaryHtml': summary,
283        'tags': [{'key': 'exit_code', 'value': str(return_code)}],
284      }]
285  }
286
287  res = requests.post(
288      url='http://%s/prpc/luci.resultsink.v1.Sink/ReportTestResults' %
289      sink['address'],
290      headers={
291          'Content-Type': 'application/json',
292          'Accept': 'application/json',
293          'Authorization': 'ResultSink %s' % sink['auth_token'],
294      },
295      data=json.dumps(result_json))
296  res.raise_for_status()
297
298
299def execute_gtest_perf_test(command_generator, output_paths, use_xvfb=False,
300                            is_unittest=False, results_label=None):
301  start = time.time()
302
303  env = os.environ.copy()
304  env['CHROME_HEADLESS'] = '1'
305  #TODO(crbug/1138988): Some gtests do not implements the unit_test_launcher.cc.
306  # As a result, they will not respect the arguments added by
307  # _generate_shard_args() and will still use the values of GTEST_SHARD_INDEX
308  # and GTEST_TOTAL_SHARDS to run part of the tests.
309  # Removing those environment variables as a workaround.
310  if command_generator._ignore_shard_env_vars:
311    if 'GTEST_TOTAL_SHARDS' in env:
312      env.pop('GTEST_TOTAL_SHARDS')
313    if 'GTEST_SHARD_INDEX' in env:
314      env.pop('GTEST_SHARD_INDEX')
315
316  return_code = 0
317  try:
318    command = command_generator.generate(output_paths.benchmark_path)
319    if use_xvfb:
320      # When running with xvfb, we currently output both to stdout and to the
321      # file. It would be better to only output to the file to keep the logs
322      # clean.
323      return_code = xvfb.run_executable(
324          command, env, stdoutfile=output_paths.logs)
325    else:
326      with open(output_paths.logs, 'w') as handle:
327        try:
328          return_code = test_env.run_command_output_to_handle(
329              command, handle, env=env)
330        except OSError as e:
331          print('Command to run gtest perf test %s failed with an OSError: %s' %
332                (output_paths.name, e))
333          return_code = 1
334    if (not os.path.exists(output_paths.perf_results) and
335        os.path.exists(output_paths.logs)):
336      # Get the correct json format from the stdout to write to the perf
337      # results file if gtest does not generate one.
338      results_processor = generate_legacy_perf_dashboard_json.\
339          LegacyResultsProcessor()
340      graph_json_string = results_processor.GenerateJsonResults(
341          output_paths.logs)
342      with open(output_paths.perf_results, 'w') as fh:
343        fh.write(graph_json_string)
344  except Exception:
345    traceback.print_exc()
346    return_code = 1
347  if os.path.exists(output_paths.perf_results):
348    executable_name = command_generator.executable_name
349    if executable_name.startswith('bin/run_'):
350      # The executable is a wrapper used by Fuchsia. Remove the prefix to get
351      # the actual executable name.
352      executable_name = executable_name[8:]
353    if executable_name in GTEST_CONVERSION_WHITELIST:
354      with path_util.SysPath(path_util.GetTracingDir()):
355        # pylint: disable=no-name-in-module,import-outside-toplevel
356        from tracing.value import gtest_json_converter
357        # pylint: enable=no-name-in-module,import-outside-toplevel
358      gtest_json_converter.ConvertGtestJsonFile(output_paths.perf_results,
359                                                label=results_label)
360  else:
361    print('ERROR: gtest perf test %s did not generate perf output' %
362          output_paths.name)
363    return_code = 1
364  write_simple_test_results(return_code, output_paths.test_results,
365                            output_paths.name)
366  if not is_unittest:
367    upload_simple_test_results(return_code, output_paths.name)
368
369  print_duration(
370      'executing gtest %s' % command_generator.executable_name, start)
371
372  return return_code
373
374
375class _TelemetryFilterArgument(object):
376  def __init__(self, filter_string):
377    self.benchmark, self.story = filter_string.split('/')
378
379
380class TelemetryCommandGenerator(object):
381  def __init__(self, benchmark, options,
382               story_selection_config=None, is_reference=False):
383    self.benchmark = benchmark
384    self._options = options
385    self._story_selection_config = story_selection_config
386    self._is_reference = is_reference
387
388  def generate(self, output_dir):
389    """Generate the command to run to start the benchmark.
390
391    Args:
392      output_dir: The directory to configure the command to put output files
393        into.
394
395    Returns:
396      list of strings, the executable and its arguments.
397    """
398    return ([sys.executable] + self._options.executable.split(' ') +
399            [self.benchmark] +
400            self._generate_filter_args() +
401            self._generate_also_run_disabled_tests_args() +
402            self._generate_output_args(output_dir) +
403            self._generate_story_selection_args() +
404            # passthrough args must be before reference args and repeat args:
405            # crbug.com/928928, crbug.com/894254#c78
406            self._get_passthrough_args() +
407            self._generate_syslog_args() +
408            self._generate_repeat_args() +
409            self._generate_reference_build_args() +
410            self._generate_results_label_args()
411           )
412
413  def _get_passthrough_args(self):
414    return self._options.passthrough_args
415
416  def _generate_filter_args(self):
417    if self._options.isolated_script_test_filter:
418      filter_list = common.extract_filter_list(
419          self._options.isolated_script_test_filter)
420      filter_arguments = [_TelemetryFilterArgument(f) for f in filter_list]
421      applicable_stories = [
422          f.story for f in filter_arguments if f.benchmark == self.benchmark]
423      # Need to convert this to a valid regex.
424      filter_regex = '(' + '|'.join(applicable_stories) + ')'
425      return ['--story-filter=' + filter_regex]
426    return []
427
428  def _generate_repeat_args(self):
429    if self._options.isolated_script_test_repeat:
430      return ['--pageset-repeat=' + str(
431          self._options.isolated_script_test_repeat)]
432    return []
433
434  def _generate_also_run_disabled_tests_args(self):
435    if self._options.isolated_script_test_also_run_disabled_tests:
436      return ['--also-run-disabled-tests']
437    return []
438
439  def _generate_output_args(self, output_dir):
440    return ['--output-format=json-test-results',
441            '--output-format=histograms',
442            '--output-dir=' + output_dir]
443
444  def _generate_story_selection_args(self):
445    """Returns arguments that limit the stories to be run inside the benchmark.
446    """
447    selection_args = []
448    if self._story_selection_config:
449      if 'begin' in self._story_selection_config:
450        selection_args.append('--story-shard-begin-index=%d' % (
451            self._story_selection_config['begin']))
452      if 'end' in self._story_selection_config:
453        selection_args.append('--story-shard-end-index=%d' % (
454            self._story_selection_config['end']))
455      if 'sections' in self._story_selection_config:
456        range_string = self._generate_story_index_ranges(
457            self._story_selection_config['sections'])
458        if range_string:
459          selection_args.append('--story-shard-indexes=%s' % range_string)
460      if self._story_selection_config.get('abridged', True):
461        selection_args.append('--run-abridged-story-set')
462    return selection_args
463
464  def _generate_syslog_args(self):
465    if self._options.per_test_logs_dir:
466      isolated_out_dir = os.path.dirname(
467          self._options.isolated_script_test_output)
468      return ['--logs-dir', os.path.join(
469          isolated_out_dir,
470          self.benchmark)]
471    return []
472
473
474  def _generate_story_index_ranges(self, sections):
475    range_string = ''
476    for section in sections:
477      begin = section.get('begin', '')
478      end = section.get('end', '')
479      # If there only one story in the range, we only keep its index.
480      # In general, we expect either begin or end, or both.
481      if begin != '' and end != '' and end - begin == 1:
482        new_range = str(begin)
483      elif begin != '' or end != '':
484        new_range = '%s-%s' % (str(begin), str(end))
485      else:
486        raise ValueError('Index ranges in "sections" in shard map should have'
487                         'at least one of "begin" and "end": %s' % str(section))
488      if range_string:
489        range_string += ',%s' % new_range
490      else:
491        range_string = new_range
492    return range_string
493
494
495  def _generate_reference_build_args(self):
496    if self._is_reference:
497      reference_browser_flag = '--browser=reference'
498      # TODO(crbug.com/1038137): Make the logic generic once more reference
499      # settings are added
500      if '--browser=android-chrome-bundle' in self._get_passthrough_args():
501        reference_browser_flag = '--browser=reference-android-chrome-bundle'
502      return [reference_browser_flag,
503              '--max-failures=5']
504    return []
505
506  def _generate_results_label_args(self):
507    if self._options.results_label:
508      return ['--results-label=' + self._options.results_label]
509    return []
510
511def execute_telemetry_benchmark(
512    command_generator, output_paths, use_xvfb=False,
513    return_exit_code_zero=False):
514  start = time.time()
515
516  env = os.environ.copy()
517  env['CHROME_HEADLESS'] = '1'
518
519  return_code = 1
520  temp_dir = tempfile.mkdtemp('telemetry')
521  infra_failure = False
522  try:
523    command = command_generator.generate(temp_dir)
524    if use_xvfb:
525      # When running with xvfb, we currently output both to stdout and to the
526      # file. It would be better to only output to the file to keep the logs
527      # clean.
528      return_code = xvfb.run_executable(
529          command, env=env, stdoutfile=output_paths.logs)
530    else:
531      with open(output_paths.logs, 'w') as handle:
532        return_code = test_env.run_command_output_to_handle(
533            command, handle, env=env)
534    expected_results_filename = os.path.join(temp_dir, 'test-results.json')
535    if os.path.exists(expected_results_filename):
536      shutil.move(expected_results_filename, output_paths.test_results)
537    else:
538      common.write_interrupted_test_results_to(output_paths.test_results, start)
539    expected_perf_filename = os.path.join(temp_dir, 'histograms.json')
540    shutil.move(expected_perf_filename, output_paths.perf_results)
541
542    csv_file_path = os.path.join(temp_dir, 'results.csv')
543    if os.path.isfile(csv_file_path):
544      shutil.move(csv_file_path, output_paths.csv_perf_results)
545  except Exception:
546    print('The following exception may have prevented the code from '
547          'outputing structured test results and perf results output:')
548    print(traceback.format_exc())
549    infra_failure = True
550  finally:
551    # On swarming bots, don't remove output directory, since Result Sink might
552    # still be uploading files to Result DB. Also, swarming bots automatically
553    # clean up at the end of each task.
554    if 'SWARMING_TASK_ID' not in os.environ:
555      # Add ignore_errors=True because otherwise rmtree may fail due to leaky
556      # processes of tests are still holding opened handles to files under
557      # |tempfile_dir|. For example, see crbug.com/865896
558      shutil.rmtree(temp_dir, ignore_errors=True)
559
560  print_duration('executing benchmark %s' % command_generator.benchmark, start)
561
562  if infra_failure:
563    print('There was an infrastructure error encountered during the run. '
564          'Please check the logs above for details')
565    return 1
566
567  # Telemetry sets exit code to -1 to indicate that no stories were run. This
568  # becomes 255 on linux because linux doesn't support -1 so it does modulo:
569  # -1 % 256 == 255.
570  # TODO(crbug.com/1019139): Make 111 be the exit code that means
571  # "no stories were run.".
572  if return_code in (111, -1, 255):
573    print('Exit code %s indicates that no stories were run, so we are marking '
574          'this as a success.' % return_code)
575    return 0
576  if return_code:
577    if return_exit_code_zero:
578      print ('run_benchmark returned exit code ' + str(return_code)
579             + ' which indicates there were test failures in the run.')
580      return 0
581    return return_code
582  return 0
583
584def parse_arguments(args):
585  parser = argparse.ArgumentParser()
586  parser.add_argument('executable', help='The name of the executable to run.')
587  parser.add_argument(
588      '--isolated-script-test-output', required=True)
589  # The following two flags may be passed in sometimes by Pinpoint
590  # or by the recipe, but they don't do anything. crbug.com/927482.
591  parser.add_argument(
592      '--isolated-script-test-chartjson-output', required=False)
593  parser.add_argument(
594      '--isolated-script-test-perf-output', required=False)
595
596  parser.add_argument(
597      '--isolated-script-test-filter', type=str, required=False)
598
599  # Note that the following three arguments are only supported by Telemetry
600  # tests right now. See crbug.com/920002.
601  parser.add_argument(
602      '--isolated-script-test-repeat', type=int, required=False)
603  parser.add_argument(
604      '--isolated-script-test-launcher-retry-limit', type=int, required=False,
605      choices=[0])  # Telemetry does not support retries. crbug.com/894254#c21
606  parser.add_argument(
607      '--isolated-script-test-also-run-disabled-tests',
608      default=False, action='store_true', required=False)
609  parser.add_argument('--xvfb', help='Start xvfb.', action='store_true')
610  parser.add_argument('--non-telemetry',
611                      help='Type of perf test', type=bool, default=False)
612  parser.add_argument('--gtest-benchmark-name',
613                      help='Name of the gtest benchmark', type=str,
614                      required=False)
615  parser.add_argument('--use-gtest-benchmark-script',
616                      help='Whether gtest is invoked via benchmark script.',
617                      default=False, action='store_true')
618
619  parser.add_argument('--benchmarks',
620                      help='Comma separated list of benchmark names'
621                      ' to run in lieu of indexing into our benchmark bot maps',
622                      required=False)
623  # crbug.com/1236245: This allows for per-benchmark device logs.
624  parser.add_argument('--per-test-logs-dir',
625                      help='Require --logs-dir args for test', required=False,
626                      default=False, action='store_true')
627  # Some executions may have a different sharding scheme and/or set of tests.
628  # These files must live in src/tools/perf/core/shard_maps
629  parser.add_argument('--test-shard-map-filename', type=str, required=False)
630  parser.add_argument('--run-ref-build',
631                      help='Run test on reference browser', action='store_true')
632  parser.add_argument('--passthrough-arg',
633                      help='Arguments to pass directly through to the test '
634                      'executable.', action='append',
635                      dest='passthrough_args',
636                      default=[])
637  parser.add_argument('--use-dynamic-shards',
638                      help='If set, use dynamic shardmap instead of the file.',
639                      action='store_true',
640                      required=False
641                      )
642  parser.add_argument('--dynamic-shardmap',
643                      help='The dynamically generated shardmap string used to '
644                      'replace the static shardmap file.',
645                      type=str,
646                      required=False)
647  parser.add_argument('--ignore-benchmark-exit-code',
648                      help='If set, return an exit code 0 even if there'
649                            + ' are benchmark failures',
650                      action='store_true',
651                      required=False
652                      )
653  parser.add_argument('--results-label',
654                      help='If set for a non-telemetry test, adds label to'
655                            + ' the result histograms.',
656                      type=str,
657                      required=False
658                      )
659  options, leftover_args = parser.parse_known_args(args)
660  options.passthrough_args.extend(leftover_args)
661  return options
662
663
664def main(sys_args):
665  args = sys_args[1:]  # Skip program name.
666  options = parse_arguments(args)
667  isolated_out_dir = os.path.dirname(options.isolated_script_test_output)
668  overall_return_code = 0
669  # This is a list of test results files to be merged into a standard
670  # output.json file for use by infrastructure including FindIt.
671  # This list should not contain reference build runs
672  # since we do not monitor those. Also, merging test reference build results
673  # with standard build results may not work properly.
674  test_results_files = []
675
676  print('Running a series of performance test subprocesses. Logs, performance\n'
677        'results, and test results JSON will be saved in a subfolder of the\n'
678        'isolated output directory. Inside the hash marks in the following\n'
679        'lines is the name of the subfolder to find results in.\n')
680
681  if options.non_telemetry:
682    benchmark_name = options.gtest_benchmark_name
683    passthrough_args = options.passthrough_args
684    # crbug/1146949#c15
685    # In the case that pinpoint passes all arguments to swarming through http
686    # request, the passthrough_args are converted into a comma-separated string.
687    if passthrough_args and isinstance(passthrough_args, six.text_type):
688      passthrough_args = passthrough_args.split(',')
689    # With --non-telemetry, the gtest executable file path will be passed in as
690    # options.executable, which is different from running on shard map. Thus,
691    # we don't override executable as we do in running on shard map.
692    command_generator = GtestCommandGenerator(
693        options, additional_flags=passthrough_args, ignore_shard_env_vars=True)
694    # Fallback to use the name of the executable if flag isn't set.
695    # TODO(crbug.com/870899): remove fallback logic and raise parser error if
696    # --non-telemetry is set but --gtest-benchmark-name is not set once pinpoint
697    # is converted to always pass --gtest-benchmark-name flag.
698    if not benchmark_name:
699      benchmark_name = options.executable
700    output_paths = OutputFilePaths(isolated_out_dir, benchmark_name).SetUp()
701    print('\n### {folder} ###'.format(folder=benchmark_name))
702    overall_return_code = execute_gtest_perf_test(
703        command_generator, output_paths, options.xvfb,
704        results_label=options.results_label)
705    test_results_files.append(output_paths.test_results)
706  else:
707    if options.use_dynamic_shards:
708      shard_map_str = options.dynamic_shardmap
709      shard_map = json.loads(shard_map_str, object_pairs_hook=OrderedDict)
710      shard_map_path = os.path.join(SHARD_MAPS_DIRECTORY,
711                                    options.test_shard_map_filename)
712      with open(shard_map_path, 'w') as f:
713        json.dump(shard_map, f, indent=4, separators=(',', ': '))
714      shutil.copyfile(
715          shard_map_path,
716          os.path.join(isolated_out_dir, 'benchmarks_shard_map.json'))
717      overall_return_code = _run_benchmarks_on_shardmap(
718          shard_map, options, isolated_out_dir, test_results_files
719      )
720    # If the user has supplied a list of benchmark names, execute those instead
721    # of using the shard map.
722    elif options.benchmarks:
723      benchmarks = options.benchmarks.split(',')
724      for benchmark in benchmarks:
725        output_paths = OutputFilePaths(isolated_out_dir, benchmark).SetUp()
726        command_generator = TelemetryCommandGenerator(
727            benchmark, options)
728        print('\n### {folder} ###'.format(folder=benchmark))
729        return_code = execute_telemetry_benchmark(
730            command_generator, output_paths, options.xvfb,
731            options.ignore_benchmark_exit_code)
732        overall_return_code = return_code or overall_return_code
733        test_results_files.append(output_paths.test_results)
734      if options.run_ref_build:
735        print('Not running reference build. --run-ref-build argument is only '
736              'supported for sharded benchmarks. It is simple to support '
737              'this for unsharded --benchmarks if needed.')
738    elif options.test_shard_map_filename:
739      # First determine what shard we are running on to know how to
740      # index into the bot map to get list of telemetry benchmarks to run.
741      shard_map_path = os.path.join(SHARD_MAPS_DIRECTORY,
742                                    options.test_shard_map_filename)
743      # Copy sharding map file to isolated_out_dir so that the merge script
744      # can collect it later.
745      shutil.copyfile(
746          shard_map_path,
747          os.path.join(isolated_out_dir, 'benchmarks_shard_map.json'))
748      with open(shard_map_path) as f:
749        shard_map = json.load(f)
750      overall_return_code = _run_benchmarks_on_shardmap(
751          shard_map, options, isolated_out_dir, test_results_files
752      )
753    else:
754      raise Exception('Telemetry tests must provide either a shard map or a '
755                      '--benchmarks list so that we know which stories to run.')
756
757  test_results_list = []
758  for test_results_file in test_results_files:
759    if os.path.exists(test_results_file):
760      with open(test_results_file, 'r') as fh:
761        test_results_list.append(json.load(fh))
762  merged_test_results = results_merger.merge_test_results(test_results_list)
763  with open(options.isolated_script_test_output, 'w') as f:
764    json.dump(merged_test_results, f)
765
766  return overall_return_code
767
768def _run_benchmarks_on_shardmap(
769    shard_map, options, isolated_out_dir, test_results_files):
770  overall_return_code = 0
771  shard_index = None
772  env = os.environ.copy()
773  if 'GTEST_SHARD_INDEX' in env:
774    shard_index = env['GTEST_SHARD_INDEX']
775  # TODO(crbug.com/972844): shard environment variables are not specified
776  # for single-shard shard runs.
777  if not shard_index:
778    shard_map_has_multiple_shards = bool(shard_map.get('1', False))
779    if not shard_map_has_multiple_shards:
780      shard_index = '0'
781  if not shard_index:
782    raise Exception(
783        'Sharded Telemetry perf tests must either specify --benchmarks '
784        'list or have GTEST_SHARD_INDEX environment variable present.')
785  shard_configuration = shard_map[shard_index]
786  assert ('benchmarks' in shard_configuration or
787          'executables' in shard_configuration), (
788      'Every shard must have benchmarks or executables associated '
789      'with it.')
790  if 'benchmarks' in shard_configuration:
791    benchmarks_and_configs = shard_configuration['benchmarks']
792    for (benchmark, story_selection_config) in benchmarks_and_configs.items():
793      # Need to run the benchmark on both latest browser and reference
794      # build.
795      output_paths = OutputFilePaths(isolated_out_dir, benchmark).SetUp()
796      command_generator = TelemetryCommandGenerator(
797          benchmark, options,
798          story_selection_config=story_selection_config)
799      print('\n### {folder} ###'.format(folder=benchmark))
800      return_code = execute_telemetry_benchmark(
801          command_generator, output_paths, options.xvfb,
802          options.ignore_benchmark_exit_code)
803      overall_return_code = return_code or overall_return_code
804      test_results_files.append(output_paths.test_results)
805      if options.run_ref_build:
806        reference_benchmark_foldername = benchmark + '.reference'
807        reference_output_paths = OutputFilePaths(
808            isolated_out_dir, reference_benchmark_foldername).SetUp()
809        reference_command_generator = TelemetryCommandGenerator(
810            benchmark, options,
811            story_selection_config=story_selection_config,
812            is_reference=True)
813        print('\n### {folder} ###'.format(
814            folder=reference_benchmark_foldername))
815        # We intentionally ignore the return code and test results of the
816        # reference build.
817        execute_telemetry_benchmark(
818            reference_command_generator, reference_output_paths,
819            options.xvfb, options.ignore_benchmark_exit_code)
820  if 'executables' in shard_configuration:
821    names_and_configs = shard_configuration['executables']
822    for (name, configuration) in names_and_configs.items():
823      additional_flags = []
824      if 'arguments' in configuration:
825        additional_flags = configuration['arguments']
826      command_generator = GtestCommandGenerator(
827          options, override_executable=configuration['path'],
828          additional_flags=additional_flags, ignore_shard_env_vars=True)
829      output_paths = OutputFilePaths(isolated_out_dir, name).SetUp()
830      print('\n### {folder} ###'.format(folder=name))
831      return_code = execute_gtest_perf_test(
832          command_generator, output_paths, options.xvfb)
833      overall_return_code = return_code or overall_return_code
834      test_results_files.append(output_paths.test_results)
835
836  return overall_return_code
837
838# This is not really a "script test" so does not need to manually add
839# any additional compile targets.
840def main_compile_targets(args):
841  json.dump([], args.output)
842
843
844if __name__ == '__main__':
845  # Conform minimally to the protocol defined by ScriptTest.
846  if 'compile_targets' in sys.argv:
847    funcs = {
848      'run': None,
849      'compile_targets': main_compile_targets,
850    }
851    sys.exit(common.run_script(sys.argv[1:], funcs))
852
853  sys.exit(main(sys.argv))
854