xref: /aosp_15_r20/external/cronet/testing/unexpected_passes_common/queries.py (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1*6777b538SAndroid Build Coastguard Worker# Copyright 2020 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker# Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker# found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker"""Methods related to querying the ResultDB BigQuery tables."""
5*6777b538SAndroid Build Coastguard Worker
6*6777b538SAndroid Build Coastguard Workerimport concurrent.futures
7*6777b538SAndroid Build Coastguard Workerimport json
8*6777b538SAndroid Build Coastguard Workerimport logging
9*6777b538SAndroid Build Coastguard Workerimport math
10*6777b538SAndroid Build Coastguard Workerimport multiprocessing.pool
11*6777b538SAndroid Build Coastguard Workerimport os
12*6777b538SAndroid Build Coastguard Workerimport subprocess
13*6777b538SAndroid Build Coastguard Workerimport threading
14*6777b538SAndroid Build Coastguard Workerimport time
15*6777b538SAndroid Build Coastguard Workerfrom typing import Any, Collection, Dict, Iterable, List, Optional, Tuple, Union
16*6777b538SAndroid Build Coastguard Worker
17*6777b538SAndroid Build Coastguard Workerimport six
18*6777b538SAndroid Build Coastguard Worker
19*6777b538SAndroid Build Coastguard Workerfrom typ import expectations_parser
20*6777b538SAndroid Build Coastguard Workerfrom typ import json_results
21*6777b538SAndroid Build Coastguard Workerfrom unexpected_passes_common import builders as builders_module
22*6777b538SAndroid Build Coastguard Workerfrom unexpected_passes_common import constants
23*6777b538SAndroid Build Coastguard Workerfrom unexpected_passes_common import data_types
24*6777b538SAndroid Build Coastguard Workerfrom unexpected_passes_common import expectations
25*6777b538SAndroid Build Coastguard Worker
26*6777b538SAndroid Build Coastguard WorkerDEFAULT_NUM_SAMPLES = 100
27*6777b538SAndroid Build Coastguard WorkerMAX_ROWS = (2**31) - 1
28*6777b538SAndroid Build Coastguard WorkerMAX_QUERY_TRIES = 3
29*6777b538SAndroid Build Coastguard Worker# Used to prevent us from triggering too many queries simultaneously and causing
30*6777b538SAndroid Build Coastguard Worker# a bunch of rate limit errors. Anything below 1.5 seemed to result in enough
31*6777b538SAndroid Build Coastguard Worker# rate limit errors to cause problems. Raising above that for safety.
32*6777b538SAndroid Build Coastguard WorkerQUERY_DELAY = 2
33*6777b538SAndroid Build Coastguard Worker# The target number of results/rows per query when running in large query mode.
34*6777b538SAndroid Build Coastguard Worker# Higher values = longer individual query times and higher chances of running
35*6777b538SAndroid Build Coastguard Worker# out of memory in BigQuery. Lower values = more parallelization overhead and
36*6777b538SAndroid Build Coastguard Worker# more issues with rate limit errors.
37*6777b538SAndroid Build Coastguard WorkerTARGET_RESULTS_PER_QUERY = 20000
38*6777b538SAndroid Build Coastguard Worker
39*6777b538SAndroid Build Coastguard Worker# Subquery for getting all try builds that were used for CL submission. 30 days
40*6777b538SAndroid Build Coastguard Worker# is chosen because the ResultDB tables we pull data from only keep data around
41*6777b538SAndroid Build Coastguard Worker# for 30 days.
42*6777b538SAndroid Build Coastguard WorkerSUBMITTED_BUILDS_TEMPLATE = """\
43*6777b538SAndroid Build Coastguard Worker    SELECT
44*6777b538SAndroid Build Coastguard Worker      CONCAT("build-", CAST(unnested_builds.id AS STRING)) as id
45*6777b538SAndroid Build Coastguard Worker    FROM
46*6777b538SAndroid Build Coastguard Worker      `commit-queue.{project_view}.attempts`,
47*6777b538SAndroid Build Coastguard Worker      UNNEST(builds) as unnested_builds,
48*6777b538SAndroid Build Coastguard Worker      UNNEST(gerrit_changes) as unnested_changes
49*6777b538SAndroid Build Coastguard Worker    WHERE
50*6777b538SAndroid Build Coastguard Worker      unnested_builds.host = "cr-buildbucket.appspot.com"
51*6777b538SAndroid Build Coastguard Worker      AND unnested_changes.submit_status = "SUCCESS"
52*6777b538SAndroid Build Coastguard Worker      AND start_time > TIMESTAMP_SUB(CURRENT_TIMESTAMP(),
53*6777b538SAndroid Build Coastguard Worker                                     INTERVAL 30 DAY)"""
54*6777b538SAndroid Build Coastguard Worker
55*6777b538SAndroid Build Coastguard WorkerQueryResult = Dict[str, Any]
56*6777b538SAndroid Build Coastguard WorkerQueryParameters = Dict[str, Dict[str, Any]]
57*6777b538SAndroid Build Coastguard Worker
58*6777b538SAndroid Build Coastguard Worker# pylint: disable=super-with-arguments,useless-object-inheritance
59*6777b538SAndroid Build Coastguard Worker
60*6777b538SAndroid Build Coastguard Worker
61*6777b538SAndroid Build Coastguard Workerclass BigQueryQuerier(object):
62*6777b538SAndroid Build Coastguard Worker  """Class to handle all BigQuery queries for a script invocation."""
63*6777b538SAndroid Build Coastguard Worker
64*6777b538SAndroid Build Coastguard Worker  def __init__(self,
65*6777b538SAndroid Build Coastguard Worker               suite: Optional[str],
66*6777b538SAndroid Build Coastguard Worker               project: str,
67*6777b538SAndroid Build Coastguard Worker               num_samples: int,
68*6777b538SAndroid Build Coastguard Worker               large_query_mode: bool,
69*6777b538SAndroid Build Coastguard Worker               num_jobs: Optional[int],
70*6777b538SAndroid Build Coastguard Worker               use_batching: bool = True):
71*6777b538SAndroid Build Coastguard Worker    """
72*6777b538SAndroid Build Coastguard Worker    Args:
73*6777b538SAndroid Build Coastguard Worker      suite: A string containing the name of the suite that is being queried
74*6777b538SAndroid Build Coastguard Worker          for. Can be None if there is no differentiation between different
75*6777b538SAndroid Build Coastguard Worker          suites.
76*6777b538SAndroid Build Coastguard Worker      project: A string containing the billing project to use for BigQuery.
77*6777b538SAndroid Build Coastguard Worker      num_samples: An integer containing the number of builds to pull results
78*6777b538SAndroid Build Coastguard Worker          from.
79*6777b538SAndroid Build Coastguard Worker      large_query_mode: A boolean indicating whether large query mode should be
80*6777b538SAndroid Build Coastguard Worker          used. In this mode, an initial, smaller query is made and its results
81*6777b538SAndroid Build Coastguard Worker          are used to perform additional filtering on a second, larger query in
82*6777b538SAndroid Build Coastguard Worker          BigQuery. This works around hitting a hard memory limit when running
83*6777b538SAndroid Build Coastguard Worker          the ORDER BY clause.
84*6777b538SAndroid Build Coastguard Worker      num_jobs: An integer specifying how many jobs to run in parallel. If None,
85*6777b538SAndroid Build Coastguard Worker          all jobs will be run in parallel at the same time.
86*6777b538SAndroid Build Coastguard Worker      use_batching: Whether to use batching when running queries. Batching
87*6777b538SAndroid Build Coastguard Worker          allows a much greater amount of parallelism due to avoiding usage
88*6777b538SAndroid Build Coastguard Worker          limits, but also adds a variable amount of overhead since there need
89*6777b538SAndroid Build Coastguard Worker          to be free resources.
90*6777b538SAndroid Build Coastguard Worker    """
91*6777b538SAndroid Build Coastguard Worker    self._suite = suite
92*6777b538SAndroid Build Coastguard Worker    self._project = project
93*6777b538SAndroid Build Coastguard Worker    self._num_samples = num_samples or DEFAULT_NUM_SAMPLES
94*6777b538SAndroid Build Coastguard Worker    self._large_query_mode = large_query_mode
95*6777b538SAndroid Build Coastguard Worker    self._num_jobs = num_jobs
96*6777b538SAndroid Build Coastguard Worker    self._use_batching = use_batching
97*6777b538SAndroid Build Coastguard Worker
98*6777b538SAndroid Build Coastguard Worker    assert self._num_samples > 0
99*6777b538SAndroid Build Coastguard Worker    assert (self._num_jobs is None or self._num_jobs > 0)
100*6777b538SAndroid Build Coastguard Worker
101*6777b538SAndroid Build Coastguard Worker  def FillExpectationMapForBuilders(
102*6777b538SAndroid Build Coastguard Worker      self, expectation_map: data_types.TestExpectationMap,
103*6777b538SAndroid Build Coastguard Worker      builders: Collection[data_types.BuilderEntry]
104*6777b538SAndroid Build Coastguard Worker  ) -> Dict[str, data_types.ResultListType]:
105*6777b538SAndroid Build Coastguard Worker    """Fills |expectation_map| with results from |builders|.
106*6777b538SAndroid Build Coastguard Worker
107*6777b538SAndroid Build Coastguard Worker    Args:
108*6777b538SAndroid Build Coastguard Worker      expectation_map: A data_types.TestExpectationMap. Will be modified
109*6777b538SAndroid Build Coastguard Worker          in-place.
110*6777b538SAndroid Build Coastguard Worker      builders: An iterable of data_types.BuilderEntry containing the builders
111*6777b538SAndroid Build Coastguard Worker          to query.
112*6777b538SAndroid Build Coastguard Worker
113*6777b538SAndroid Build Coastguard Worker    Returns:
114*6777b538SAndroid Build Coastguard Worker      A dict containing any results that were retrieved that did not have a
115*6777b538SAndroid Build Coastguard Worker      matching expectation in |expectation_map| in the following format:
116*6777b538SAndroid Build Coastguard Worker      {
117*6777b538SAndroid Build Coastguard Worker        |builder_type|:|builder_name| (str): [
118*6777b538SAndroid Build Coastguard Worker          result1 (data_types.Result),
119*6777b538SAndroid Build Coastguard Worker          result2 (data_types.Result),
120*6777b538SAndroid Build Coastguard Worker          ...
121*6777b538SAndroid Build Coastguard Worker        ],
122*6777b538SAndroid Build Coastguard Worker      }
123*6777b538SAndroid Build Coastguard Worker    """
124*6777b538SAndroid Build Coastguard Worker    start_time = time.time()
125*6777b538SAndroid Build Coastguard Worker    logging.debug('Starting to fill expectation map for %d builders',
126*6777b538SAndroid Build Coastguard Worker                  len(builders))
127*6777b538SAndroid Build Coastguard Worker    assert isinstance(expectation_map, data_types.TestExpectationMap)
128*6777b538SAndroid Build Coastguard Worker    # Ensure that all the builders are of the same type since we make some
129*6777b538SAndroid Build Coastguard Worker    # assumptions about that later on.
130*6777b538SAndroid Build Coastguard Worker    assert builders
131*6777b538SAndroid Build Coastguard Worker    builder_type = None
132*6777b538SAndroid Build Coastguard Worker    for b in builders:
133*6777b538SAndroid Build Coastguard Worker      if builder_type is None:
134*6777b538SAndroid Build Coastguard Worker        builder_type = b.builder_type
135*6777b538SAndroid Build Coastguard Worker      else:
136*6777b538SAndroid Build Coastguard Worker        assert b.builder_type == builder_type
137*6777b538SAndroid Build Coastguard Worker
138*6777b538SAndroid Build Coastguard Worker    # Filter out any builders that we can easily determine do not currently
139*6777b538SAndroid Build Coastguard Worker    # produce data we care about.
140*6777b538SAndroid Build Coastguard Worker    builders = self._FilterOutInactiveBuilders(builders, builder_type)
141*6777b538SAndroid Build Coastguard Worker
142*6777b538SAndroid Build Coastguard Worker    # If we don't have an explicit number of jobs set, spin up a separate
143*6777b538SAndroid Build Coastguard Worker    # process for each query/add step. This is wasteful in the sense that we'll
144*6777b538SAndroid Build Coastguard Worker    # have a bunch of idle processes once faster steps start finishing, but
145*6777b538SAndroid Build Coastguard Worker    # ensures that we start slow queries early and avoids the overhead of
146*6777b538SAndroid Build Coastguard Worker    # passing large amounts of data between processes. See crbug.com/1182459 for
147*6777b538SAndroid Build Coastguard Worker    # more information on performance considerations.
148*6777b538SAndroid Build Coastguard Worker    num_jobs = self._num_jobs or len(builders)
149*6777b538SAndroid Build Coastguard Worker    args = [(b, expectation_map) for b in builders]
150*6777b538SAndroid Build Coastguard Worker
151*6777b538SAndroid Build Coastguard Worker    tmp_expectation_map = data_types.TestExpectationMap()
152*6777b538SAndroid Build Coastguard Worker    all_unmatched_results = {}
153*6777b538SAndroid Build Coastguard Worker
154*6777b538SAndroid Build Coastguard Worker    with concurrent.futures.ThreadPoolExecutor(max_workers=num_jobs) as pool:
155*6777b538SAndroid Build Coastguard Worker      for result in pool.map(self._QueryAddCombined, args):
156*6777b538SAndroid Build Coastguard Worker        unmatched_results, prefixed_builder_name, merge_map = result
157*6777b538SAndroid Build Coastguard Worker        tmp_expectation_map.Merge(merge_map, expectation_map)
158*6777b538SAndroid Build Coastguard Worker        if unmatched_results:
159*6777b538SAndroid Build Coastguard Worker          all_unmatched_results[prefixed_builder_name] = unmatched_results
160*6777b538SAndroid Build Coastguard Worker
161*6777b538SAndroid Build Coastguard Worker    expectation_map.clear()
162*6777b538SAndroid Build Coastguard Worker    expectation_map.update(tmp_expectation_map)
163*6777b538SAndroid Build Coastguard Worker
164*6777b538SAndroid Build Coastguard Worker    logging.debug('Filling expectation map took %f', time.time() - start_time)
165*6777b538SAndroid Build Coastguard Worker    return all_unmatched_results
166*6777b538SAndroid Build Coastguard Worker
167*6777b538SAndroid Build Coastguard Worker  def _FilterOutInactiveBuilders(self,
168*6777b538SAndroid Build Coastguard Worker                                 builders: Iterable[data_types.BuilderEntry],
169*6777b538SAndroid Build Coastguard Worker                                 builder_type: str
170*6777b538SAndroid Build Coastguard Worker                                 ) -> List[data_types.BuilderEntry]:
171*6777b538SAndroid Build Coastguard Worker    """Filters out any builders that are not producing data.
172*6777b538SAndroid Build Coastguard Worker
173*6777b538SAndroid Build Coastguard Worker    This helps save time on querying, as querying for the builder names is cheap
174*6777b538SAndroid Build Coastguard Worker    while querying for individual results from a builder is expensive. Filtering
175*6777b538SAndroid Build Coastguard Worker    out inactive builders lets us preemptively remove builders that we know we
176*6777b538SAndroid Build Coastguard Worker    won't get any data from, and thus don't need to waste time querying.
177*6777b538SAndroid Build Coastguard Worker
178*6777b538SAndroid Build Coastguard Worker    Args:
179*6777b538SAndroid Build Coastguard Worker      builders: An iterable of data_types.BuilderEntry containing the builders
180*6777b538SAndroid Build Coastguard Worker          to query.
181*6777b538SAndroid Build Coastguard Worker      builder_type: A string containing the type of builder to query, either
182*6777b538SAndroid Build Coastguard Worker          "ci" or "try".
183*6777b538SAndroid Build Coastguard Worker
184*6777b538SAndroid Build Coastguard Worker    Returns:
185*6777b538SAndroid Build Coastguard Worker      A copy of |builders| with any inactive builders removed.
186*6777b538SAndroid Build Coastguard Worker    """
187*6777b538SAndroid Build Coastguard Worker    include_internal_builders = any(b.is_internal_builder for b in builders)
188*6777b538SAndroid Build Coastguard Worker    query = self._GetActiveBuilderQuery(
189*6777b538SAndroid Build Coastguard Worker        builder_type, include_internal_builders).encode('utf-8')
190*6777b538SAndroid Build Coastguard Worker    cmd = GenerateBigQueryCommand(self._project, {}, batch=False)
191*6777b538SAndroid Build Coastguard Worker    with open(os.devnull, 'w', newline='', encoding='utf-8') as devnull:
192*6777b538SAndroid Build Coastguard Worker      p = subprocess.Popen(cmd,
193*6777b538SAndroid Build Coastguard Worker                           stdout=subprocess.PIPE,
194*6777b538SAndroid Build Coastguard Worker                           stderr=devnull,
195*6777b538SAndroid Build Coastguard Worker                           stdin=subprocess.PIPE)
196*6777b538SAndroid Build Coastguard Worker    stdout, _ = p.communicate(query)
197*6777b538SAndroid Build Coastguard Worker    if not isinstance(stdout, six.string_types):
198*6777b538SAndroid Build Coastguard Worker      stdout = stdout.decode('utf-8')
199*6777b538SAndroid Build Coastguard Worker    results = json.loads(stdout)
200*6777b538SAndroid Build Coastguard Worker
201*6777b538SAndroid Build Coastguard Worker    # We filter from an initial list instead of directly using the returned
202*6777b538SAndroid Build Coastguard Worker    # builders since there are cases where they aren't equivalent, such as for
203*6777b538SAndroid Build Coastguard Worker    # GPU tests if a particular builder doesn't run a particular suite. This
204*6777b538SAndroid Build Coastguard Worker    # could be encapsulated in the query, but this would cause the query to take
205*6777b538SAndroid Build Coastguard Worker    # longer. Since generating the initial list locally is basically
206*6777b538SAndroid Build Coastguard Worker    # instantenous and we're optimizing for runtime, filtering is the better
207*6777b538SAndroid Build Coastguard Worker    # option.
208*6777b538SAndroid Build Coastguard Worker    active_builders = {r['builder_name'] for r in results}
209*6777b538SAndroid Build Coastguard Worker    filtered_builders = [b for b in builders if b.name in active_builders]
210*6777b538SAndroid Build Coastguard Worker    return filtered_builders
211*6777b538SAndroid Build Coastguard Worker
212*6777b538SAndroid Build Coastguard Worker  def _QueryAddCombined(
213*6777b538SAndroid Build Coastguard Worker      self,
214*6777b538SAndroid Build Coastguard Worker      inputs: Tuple[data_types.BuilderEntry, data_types.TestExpectationMap]
215*6777b538SAndroid Build Coastguard Worker  ) -> Tuple[data_types.ResultListType, str, data_types.TestExpectationMap]:
216*6777b538SAndroid Build Coastguard Worker    """Combines the query and add steps for use in a process pool.
217*6777b538SAndroid Build Coastguard Worker
218*6777b538SAndroid Build Coastguard Worker    Args:
219*6777b538SAndroid Build Coastguard Worker      inputs: An iterable of inputs for QueryBuilder() and
220*6777b538SAndroid Build Coastguard Worker          data_types.TestExpectationMap.AddResultList(). Should be in the order:
221*6777b538SAndroid Build Coastguard Worker          builder expectation_map
222*6777b538SAndroid Build Coastguard Worker
223*6777b538SAndroid Build Coastguard Worker    Returns:
224*6777b538SAndroid Build Coastguard Worker      The output of data_types.TestExpectationMap.AddResultList().
225*6777b538SAndroid Build Coastguard Worker    """
226*6777b538SAndroid Build Coastguard Worker    start_time = time.time()
227*6777b538SAndroid Build Coastguard Worker    builder, expectation_map = inputs
228*6777b538SAndroid Build Coastguard Worker    logging.debug('Starting query for builder %s', builder.name)
229*6777b538SAndroid Build Coastguard Worker    results, expectation_files = self.QueryBuilder(builder)
230*6777b538SAndroid Build Coastguard Worker    logging.debug('Query for builder %s took %f', builder.name,
231*6777b538SAndroid Build Coastguard Worker                  time.time() - start_time)
232*6777b538SAndroid Build Coastguard Worker
233*6777b538SAndroid Build Coastguard Worker    start_time = time.time()
234*6777b538SAndroid Build Coastguard Worker    prefixed_builder_name = '%s/%s:%s' % (builder.project, builder.builder_type,
235*6777b538SAndroid Build Coastguard Worker                                          builder.name)
236*6777b538SAndroid Build Coastguard Worker    logging.debug('Starting data processing for builder %s', builder.name)
237*6777b538SAndroid Build Coastguard Worker    unmatched_results = expectation_map.AddResultList(prefixed_builder_name,
238*6777b538SAndroid Build Coastguard Worker                                                      results,
239*6777b538SAndroid Build Coastguard Worker                                                      expectation_files)
240*6777b538SAndroid Build Coastguard Worker    logging.debug('Data processing for builder %s took %f', builder.name,
241*6777b538SAndroid Build Coastguard Worker                  time.time() - start_time)
242*6777b538SAndroid Build Coastguard Worker
243*6777b538SAndroid Build Coastguard Worker    return unmatched_results, prefixed_builder_name, expectation_map
244*6777b538SAndroid Build Coastguard Worker
245*6777b538SAndroid Build Coastguard Worker  def QueryBuilder(self, builder: data_types.BuilderEntry
246*6777b538SAndroid Build Coastguard Worker                   ) -> Tuple[data_types.ResultListType, Optional[List[str]]]:
247*6777b538SAndroid Build Coastguard Worker    """Queries ResultDB for results from |builder|.
248*6777b538SAndroid Build Coastguard Worker
249*6777b538SAndroid Build Coastguard Worker    Args:
250*6777b538SAndroid Build Coastguard Worker      builder: A data_types.BuilderEntry containing the builder to query.
251*6777b538SAndroid Build Coastguard Worker
252*6777b538SAndroid Build Coastguard Worker    Returns:
253*6777b538SAndroid Build Coastguard Worker      A tuple (results, expectation_files). |results| is the results returned by
254*6777b538SAndroid Build Coastguard Worker      the query converted into a list of data_types.Result objects.
255*6777b538SAndroid Build Coastguard Worker      |expectation_files| is a set of strings denoting which expectation files
256*6777b538SAndroid Build Coastguard Worker      are relevant to |results|, or None if all should be used.
257*6777b538SAndroid Build Coastguard Worker    """
258*6777b538SAndroid Build Coastguard Worker
259*6777b538SAndroid Build Coastguard Worker    query_generator = self._GetQueryGeneratorForBuilder(builder)
260*6777b538SAndroid Build Coastguard Worker    if not query_generator:
261*6777b538SAndroid Build Coastguard Worker      # No affected tests on this builder, so early return.
262*6777b538SAndroid Build Coastguard Worker      return [], None
263*6777b538SAndroid Build Coastguard Worker
264*6777b538SAndroid Build Coastguard Worker    # Query for the test data from the builder, splitting the query if we run
265*6777b538SAndroid Build Coastguard Worker    # into the BigQuery hard memory limit. Even if we keep failing, this will
266*6777b538SAndroid Build Coastguard Worker    # eventually stop due to getting a QuerySplitError when we can't split the
267*6777b538SAndroid Build Coastguard Worker    # query any further.
268*6777b538SAndroid Build Coastguard Worker    query_results = None
269*6777b538SAndroid Build Coastguard Worker    while query_results is None:
270*6777b538SAndroid Build Coastguard Worker      try:
271*6777b538SAndroid Build Coastguard Worker        query_results = self._RunBigQueryCommandsForJsonOutput(
272*6777b538SAndroid Build Coastguard Worker            query_generator.GetQueries(), {
273*6777b538SAndroid Build Coastguard Worker                '': {
274*6777b538SAndroid Build Coastguard Worker                    'builder_name': builder.name
275*6777b538SAndroid Build Coastguard Worker                },
276*6777b538SAndroid Build Coastguard Worker                'INT64': {
277*6777b538SAndroid Build Coastguard Worker                    'num_builds': self._num_samples
278*6777b538SAndroid Build Coastguard Worker                }
279*6777b538SAndroid Build Coastguard Worker            })
280*6777b538SAndroid Build Coastguard Worker      except MemoryLimitError:
281*6777b538SAndroid Build Coastguard Worker        logging.warning(
282*6777b538SAndroid Build Coastguard Worker            'Query to builder %s hit BigQuery hard memory limit, trying again '
283*6777b538SAndroid Build Coastguard Worker            'with more query splitting.', builder.name)
284*6777b538SAndroid Build Coastguard Worker        query_generator.SplitQuery()
285*6777b538SAndroid Build Coastguard Worker
286*6777b538SAndroid Build Coastguard Worker    results = []
287*6777b538SAndroid Build Coastguard Worker    if not query_results:
288*6777b538SAndroid Build Coastguard Worker      # Don't bother logging if we know this is a fake CI builder.
289*6777b538SAndroid Build Coastguard Worker      if not (builder.builder_type == constants.BuilderTypes.CI
290*6777b538SAndroid Build Coastguard Worker              and builder in builders_module.GetInstance().GetFakeCiBuilders()):
291*6777b538SAndroid Build Coastguard Worker        logging.warning(
292*6777b538SAndroid Build Coastguard Worker            'Did not get results for "%s", but this may be because its '
293*6777b538SAndroid Build Coastguard Worker            'results do not apply to any expectations for this suite.',
294*6777b538SAndroid Build Coastguard Worker            builder.name)
295*6777b538SAndroid Build Coastguard Worker      return results, None
296*6777b538SAndroid Build Coastguard Worker
297*6777b538SAndroid Build Coastguard Worker    # It's possible that a builder runs multiple versions of a test with
298*6777b538SAndroid Build Coastguard Worker    # different expectation files for each version. So, find a result for each
299*6777b538SAndroid Build Coastguard Worker    # unique step and get the expectation files from all of them.
300*6777b538SAndroid Build Coastguard Worker    results_for_each_step = {}
301*6777b538SAndroid Build Coastguard Worker    for qr in query_results:
302*6777b538SAndroid Build Coastguard Worker      step_name = qr['step_name']
303*6777b538SAndroid Build Coastguard Worker      if step_name not in results_for_each_step:
304*6777b538SAndroid Build Coastguard Worker        results_for_each_step[step_name] = qr
305*6777b538SAndroid Build Coastguard Worker
306*6777b538SAndroid Build Coastguard Worker    expectation_files = set()
307*6777b538SAndroid Build Coastguard Worker    for qr in results_for_each_step.values():
308*6777b538SAndroid Build Coastguard Worker      # None is a special value indicating "use all expectation files", so
309*6777b538SAndroid Build Coastguard Worker      # handle that.
310*6777b538SAndroid Build Coastguard Worker      ef = self._GetRelevantExpectationFilesForQueryResult(qr)
311*6777b538SAndroid Build Coastguard Worker      if ef is None:
312*6777b538SAndroid Build Coastguard Worker        expectation_files = None
313*6777b538SAndroid Build Coastguard Worker        break
314*6777b538SAndroid Build Coastguard Worker      expectation_files |= set(ef)
315*6777b538SAndroid Build Coastguard Worker    if expectation_files is not None:
316*6777b538SAndroid Build Coastguard Worker      expectation_files = list(expectation_files)
317*6777b538SAndroid Build Coastguard Worker
318*6777b538SAndroid Build Coastguard Worker    # The query result list is potentially very large, so reduce the list as we
319*6777b538SAndroid Build Coastguard Worker    # iterate over it instead of using a standard for/in so that we don't
320*6777b538SAndroid Build Coastguard Worker    # temporarily end up with a ~2x increase in memory.
321*6777b538SAndroid Build Coastguard Worker    while query_results:
322*6777b538SAndroid Build Coastguard Worker      r = query_results.pop()
323*6777b538SAndroid Build Coastguard Worker      if self._ShouldSkipOverResult(r):
324*6777b538SAndroid Build Coastguard Worker        continue
325*6777b538SAndroid Build Coastguard Worker      results.append(self._ConvertJsonResultToResultObject(r))
326*6777b538SAndroid Build Coastguard Worker    logging.debug('Got %d results for %s builder %s', len(results),
327*6777b538SAndroid Build Coastguard Worker                  builder.builder_type, builder.name)
328*6777b538SAndroid Build Coastguard Worker    return results, expectation_files
329*6777b538SAndroid Build Coastguard Worker
330*6777b538SAndroid Build Coastguard Worker  def _ConvertJsonResultToResultObject(self, json_result: QueryResult
331*6777b538SAndroid Build Coastguard Worker                                       ) -> data_types.Result:
332*6777b538SAndroid Build Coastguard Worker    """Converts a single BigQuery JSON result to a data_types.Result.
333*6777b538SAndroid Build Coastguard Worker
334*6777b538SAndroid Build Coastguard Worker    Args:
335*6777b538SAndroid Build Coastguard Worker      json_result: A single row/result from BigQuery in JSON format.
336*6777b538SAndroid Build Coastguard Worker
337*6777b538SAndroid Build Coastguard Worker    Returns:
338*6777b538SAndroid Build Coastguard Worker      A data_types.Result object containing the information from |json_result|.
339*6777b538SAndroid Build Coastguard Worker    """
340*6777b538SAndroid Build Coastguard Worker    build_id = _StripPrefixFromBuildId(json_result['id'])
341*6777b538SAndroid Build Coastguard Worker    test_name = self._StripPrefixFromTestId(json_result['test_id'])
342*6777b538SAndroid Build Coastguard Worker    actual_result = _ConvertActualResultToExpectationFileFormat(
343*6777b538SAndroid Build Coastguard Worker        json_result['status'])
344*6777b538SAndroid Build Coastguard Worker    tags = expectations.GetInstance().FilterToKnownTags(json_result['typ_tags'])
345*6777b538SAndroid Build Coastguard Worker    step = json_result['step_name']
346*6777b538SAndroid Build Coastguard Worker    return data_types.Result(test_name, tags, actual_result, step, build_id)
347*6777b538SAndroid Build Coastguard Worker
348*6777b538SAndroid Build Coastguard Worker  def _GetRelevantExpectationFilesForQueryResult(self, query_result: QueryResult
349*6777b538SAndroid Build Coastguard Worker                                                 ) -> Optional[Iterable[str]]:
350*6777b538SAndroid Build Coastguard Worker    """Gets the relevant expectation file names for a given query result.
351*6777b538SAndroid Build Coastguard Worker
352*6777b538SAndroid Build Coastguard Worker    Args:
353*6777b538SAndroid Build Coastguard Worker      query_result: A dict containing single row/result from a BigQuery query.
354*6777b538SAndroid Build Coastguard Worker
355*6777b538SAndroid Build Coastguard Worker    Returns:
356*6777b538SAndroid Build Coastguard Worker      An iterable of strings containing expectation file names that are
357*6777b538SAndroid Build Coastguard Worker      relevant to |query_result|, or None if all expectation files should be
358*6777b538SAndroid Build Coastguard Worker      considered relevant.
359*6777b538SAndroid Build Coastguard Worker    """
360*6777b538SAndroid Build Coastguard Worker    raise NotImplementedError()
361*6777b538SAndroid Build Coastguard Worker
362*6777b538SAndroid Build Coastguard Worker  def _ShouldSkipOverResult(self, result: QueryResult) -> bool:
363*6777b538SAndroid Build Coastguard Worker    """Whether |result| should be ignored and skipped over.
364*6777b538SAndroid Build Coastguard Worker
365*6777b538SAndroid Build Coastguard Worker    Args:
366*6777b538SAndroid Build Coastguard Worker      result: A dict containing a single BigQuery result row.
367*6777b538SAndroid Build Coastguard Worker
368*6777b538SAndroid Build Coastguard Worker    Returns:
369*6777b538SAndroid Build Coastguard Worker      True if the result should be skipped over/ignored, otherwise False.
370*6777b538SAndroid Build Coastguard Worker    """
371*6777b538SAndroid Build Coastguard Worker    del result
372*6777b538SAndroid Build Coastguard Worker    return False
373*6777b538SAndroid Build Coastguard Worker
374*6777b538SAndroid Build Coastguard Worker  def _GetQueryGeneratorForBuilder(self, builder: data_types.BuilderEntry
375*6777b538SAndroid Build Coastguard Worker                                   ) -> Optional['BaseQueryGenerator']:
376*6777b538SAndroid Build Coastguard Worker    """Returns a BaseQueryGenerator instance to only include relevant tests.
377*6777b538SAndroid Build Coastguard Worker
378*6777b538SAndroid Build Coastguard Worker    Args:
379*6777b538SAndroid Build Coastguard Worker      builder: A data_types.BuilderEntry containing the builder to query.
380*6777b538SAndroid Build Coastguard Worker
381*6777b538SAndroid Build Coastguard Worker    Returns:
382*6777b538SAndroid Build Coastguard Worker      None if the query returned no results. Otherwise, some instance of a
383*6777b538SAndroid Build Coastguard Worker      BaseQueryGenerator.
384*6777b538SAndroid Build Coastguard Worker    """
385*6777b538SAndroid Build Coastguard Worker    raise NotImplementedError()
386*6777b538SAndroid Build Coastguard Worker
387*6777b538SAndroid Build Coastguard Worker  def _RunBigQueryCommandsForJsonOutput(self, queries: Union[str, List[str]],
388*6777b538SAndroid Build Coastguard Worker                                        parameters: QueryParameters
389*6777b538SAndroid Build Coastguard Worker                                        ) -> List[QueryResult]:
390*6777b538SAndroid Build Coastguard Worker    """Runs the given BigQuery queries and returns their outputs as JSON.
391*6777b538SAndroid Build Coastguard Worker
392*6777b538SAndroid Build Coastguard Worker    Args:
393*6777b538SAndroid Build Coastguard Worker      queries: A string or list of strings containing valid BigQuery queries to
394*6777b538SAndroid Build Coastguard Worker          run or a single string containing a query.
395*6777b538SAndroid Build Coastguard Worker      parameters: A dict specifying parameters to substitute in the query in
396*6777b538SAndroid Build Coastguard Worker          the format {type: {key: value}}. For example, the dict:
397*6777b538SAndroid Build Coastguard Worker          {'INT64': {'num_builds': 5}}
398*6777b538SAndroid Build Coastguard Worker          would result in --parameter=num_builds:INT64:5 being passed to
399*6777b538SAndroid Build Coastguard Worker          BigQuery.
400*6777b538SAndroid Build Coastguard Worker
401*6777b538SAndroid Build Coastguard Worker    Returns:
402*6777b538SAndroid Build Coastguard Worker      The combined results of |queries| in JSON.
403*6777b538SAndroid Build Coastguard Worker    """
404*6777b538SAndroid Build Coastguard Worker    if isinstance(queries, str):
405*6777b538SAndroid Build Coastguard Worker      queries = [queries]
406*6777b538SAndroid Build Coastguard Worker    assert isinstance(queries, list)
407*6777b538SAndroid Build Coastguard Worker
408*6777b538SAndroid Build Coastguard Worker    processes = set()
409*6777b538SAndroid Build Coastguard Worker    processes_lock = threading.Lock()
410*6777b538SAndroid Build Coastguard Worker
411*6777b538SAndroid Build Coastguard Worker    def run_cmd_in_thread(inputs: Tuple[List[str], str]) -> str:
412*6777b538SAndroid Build Coastguard Worker      cmd, query = inputs
413*6777b538SAndroid Build Coastguard Worker      query = query.encode('utf-8')
414*6777b538SAndroid Build Coastguard Worker      with open(os.devnull, 'w', newline='', encoding='utf-8') as devnull:
415*6777b538SAndroid Build Coastguard Worker        with processes_lock:
416*6777b538SAndroid Build Coastguard Worker          # Starting many queries at once causes us to hit rate limits much more
417*6777b538SAndroid Build Coastguard Worker          # frequently, so stagger query starts to help avoid that.
418*6777b538SAndroid Build Coastguard Worker          time.sleep(QUERY_DELAY)
419*6777b538SAndroid Build Coastguard Worker          p = subprocess.Popen(cmd,
420*6777b538SAndroid Build Coastguard Worker                               stdout=subprocess.PIPE,
421*6777b538SAndroid Build Coastguard Worker                               stderr=devnull,
422*6777b538SAndroid Build Coastguard Worker                               stdin=subprocess.PIPE)
423*6777b538SAndroid Build Coastguard Worker          processes.add(p)
424*6777b538SAndroid Build Coastguard Worker
425*6777b538SAndroid Build Coastguard Worker        # We pass in the query via stdin instead of including it on the
426*6777b538SAndroid Build Coastguard Worker        # commandline because we can run into command length issues in large
427*6777b538SAndroid Build Coastguard Worker        # query mode.
428*6777b538SAndroid Build Coastguard Worker        stdout, _ = p.communicate(query)
429*6777b538SAndroid Build Coastguard Worker        if not isinstance(stdout, six.string_types):
430*6777b538SAndroid Build Coastguard Worker          stdout = stdout.decode('utf-8')
431*6777b538SAndroid Build Coastguard Worker        if p.returncode:
432*6777b538SAndroid Build Coastguard Worker          # When running many queries in parallel, it's possible to hit the
433*6777b538SAndroid Build Coastguard Worker          # rate limit for the account if we're unlucky, so try again if we do.
434*6777b538SAndroid Build Coastguard Worker          if 'Exceeded rate limits' in stdout:
435*6777b538SAndroid Build Coastguard Worker            raise RateLimitError()
436*6777b538SAndroid Build Coastguard Worker          error_msg = 'Error running command %s. stdout: %s' % (cmd, stdout)
437*6777b538SAndroid Build Coastguard Worker          if 'memory' in stdout:
438*6777b538SAndroid Build Coastguard Worker            raise MemoryLimitError(error_msg)
439*6777b538SAndroid Build Coastguard Worker          raise RuntimeError(error_msg)
440*6777b538SAndroid Build Coastguard Worker        return stdout
441*6777b538SAndroid Build Coastguard Worker
442*6777b538SAndroid Build Coastguard Worker    def run_cmd(cmd: List[str], tries: int) -> List[str]:
443*6777b538SAndroid Build Coastguard Worker      if tries >= MAX_QUERY_TRIES:
444*6777b538SAndroid Build Coastguard Worker        raise RuntimeError('Query failed too many times, aborting')
445*6777b538SAndroid Build Coastguard Worker
446*6777b538SAndroid Build Coastguard Worker      # We use a thread pool with a thread for each query/process instead of
447*6777b538SAndroid Build Coastguard Worker      # just creating the processes due to guidance from the Python docs:
448*6777b538SAndroid Build Coastguard Worker      # https://docs.python.org/3/library/subprocess.html#subprocess.Popen.stderr
449*6777b538SAndroid Build Coastguard Worker      # We need to write to stdin to pass the query in, but using
450*6777b538SAndroid Build Coastguard Worker      # stdout/stderr/stdin directly is discouraged due to the potential for
451*6777b538SAndroid Build Coastguard Worker      # deadlocks. The suggested method (using .communicate()) blocks, so we
452*6777b538SAndroid Build Coastguard Worker      # need the thread pool to maintain parallelism.
453*6777b538SAndroid Build Coastguard Worker      pool = multiprocessing.pool.ThreadPool(len(queries))
454*6777b538SAndroid Build Coastguard Worker
455*6777b538SAndroid Build Coastguard Worker      def cleanup():
456*6777b538SAndroid Build Coastguard Worker        pool.terminate()
457*6777b538SAndroid Build Coastguard Worker        for p in processes:
458*6777b538SAndroid Build Coastguard Worker          try:
459*6777b538SAndroid Build Coastguard Worker            p.terminate()
460*6777b538SAndroid Build Coastguard Worker          except OSError:
461*6777b538SAndroid Build Coastguard Worker            # We can fail to terminate if the process is already finished, so
462*6777b538SAndroid Build Coastguard Worker            # ignore such failures.
463*6777b538SAndroid Build Coastguard Worker            pass
464*6777b538SAndroid Build Coastguard Worker        processes.clear()
465*6777b538SAndroid Build Coastguard Worker
466*6777b538SAndroid Build Coastguard Worker      args = [(cmd, q) for q in queries]
467*6777b538SAndroid Build Coastguard Worker      try:
468*6777b538SAndroid Build Coastguard Worker        return pool.map(run_cmd_in_thread, args)
469*6777b538SAndroid Build Coastguard Worker      except RateLimitError:
470*6777b538SAndroid Build Coastguard Worker        logging.warning('Query hit rate limit, retrying')
471*6777b538SAndroid Build Coastguard Worker        cleanup()
472*6777b538SAndroid Build Coastguard Worker        return run_cmd(cmd, tries + 1)
473*6777b538SAndroid Build Coastguard Worker      finally:
474*6777b538SAndroid Build Coastguard Worker        cleanup()
475*6777b538SAndroid Build Coastguard Worker      raise RuntimeError('Hit branch that should  be unreachable')
476*6777b538SAndroid Build Coastguard Worker
477*6777b538SAndroid Build Coastguard Worker    bq_cmd = GenerateBigQueryCommand(self._project,
478*6777b538SAndroid Build Coastguard Worker                                     parameters,
479*6777b538SAndroid Build Coastguard Worker                                     batch=self._use_batching)
480*6777b538SAndroid Build Coastguard Worker    stdouts = run_cmd(bq_cmd, 0)
481*6777b538SAndroid Build Coastguard Worker    combined_json = []
482*6777b538SAndroid Build Coastguard Worker    for result in [json.loads(s) for s in stdouts]:
483*6777b538SAndroid Build Coastguard Worker      for row in result:
484*6777b538SAndroid Build Coastguard Worker        combined_json.append(row)
485*6777b538SAndroid Build Coastguard Worker    return combined_json
486*6777b538SAndroid Build Coastguard Worker
487*6777b538SAndroid Build Coastguard Worker  def _StripPrefixFromTestId(self, test_id: str) -> str:
488*6777b538SAndroid Build Coastguard Worker    """Strips the prefix from a test ID, leaving only the test case name.
489*6777b538SAndroid Build Coastguard Worker
490*6777b538SAndroid Build Coastguard Worker    Args:
491*6777b538SAndroid Build Coastguard Worker      test_id: A string containing a full ResultDB test ID, e.g.
492*6777b538SAndroid Build Coastguard Worker          ninja://target/directory.suite.class.test_case
493*6777b538SAndroid Build Coastguard Worker
494*6777b538SAndroid Build Coastguard Worker    Returns:
495*6777b538SAndroid Build Coastguard Worker      A string containing the test cases name extracted from |test_id|.
496*6777b538SAndroid Build Coastguard Worker    """
497*6777b538SAndroid Build Coastguard Worker    raise NotImplementedError()
498*6777b538SAndroid Build Coastguard Worker
499*6777b538SAndroid Build Coastguard Worker  def _GetActiveBuilderQuery(self, builder_type: str,
500*6777b538SAndroid Build Coastguard Worker                             include_internal_builders: bool) -> str:
501*6777b538SAndroid Build Coastguard Worker    """Gets the SQL query for determining which builders actually produce data.
502*6777b538SAndroid Build Coastguard Worker
503*6777b538SAndroid Build Coastguard Worker    Args:
504*6777b538SAndroid Build Coastguard Worker      builder_type: A string containing the type of builders to query, either
505*6777b538SAndroid Build Coastguard Worker          "ci" or "try".
506*6777b538SAndroid Build Coastguard Worker      include_internal_builders: A boolean indicating whether internal builders
507*6777b538SAndroid Build Coastguard Worker          should be included in the data that the query will access.
508*6777b538SAndroid Build Coastguard Worker
509*6777b538SAndroid Build Coastguard Worker    Returns:
510*6777b538SAndroid Build Coastguard Worker      A string containing a SQL query that will get all the names of all
511*6777b538SAndroid Build Coastguard Worker      relevant builders that are active/producing data.
512*6777b538SAndroid Build Coastguard Worker    """
513*6777b538SAndroid Build Coastguard Worker    raise NotImplementedError()
514*6777b538SAndroid Build Coastguard Worker
515*6777b538SAndroid Build Coastguard Worker
516*6777b538SAndroid Build Coastguard Workerclass BaseQueryGenerator(object):
517*6777b538SAndroid Build Coastguard Worker  """Abstract base class for query generators."""
518*6777b538SAndroid Build Coastguard Worker
519*6777b538SAndroid Build Coastguard Worker  def __init__(self, builder: data_types.BuilderEntry):
520*6777b538SAndroid Build Coastguard Worker    self._builder = builder
521*6777b538SAndroid Build Coastguard Worker
522*6777b538SAndroid Build Coastguard Worker  def SplitQuery(self) -> None:
523*6777b538SAndroid Build Coastguard Worker    """Splits the query into more clauses/queries."""
524*6777b538SAndroid Build Coastguard Worker    raise NotImplementedError('SplitQuery must be overridden in a child class')
525*6777b538SAndroid Build Coastguard Worker
526*6777b538SAndroid Build Coastguard Worker  def GetClauses(self) -> List[str]:
527*6777b538SAndroid Build Coastguard Worker    """Gets string representations of the test filters.
528*6777b538SAndroid Build Coastguard Worker
529*6777b538SAndroid Build Coastguard Worker    Returns:
530*6777b538SAndroid Build Coastguard Worker      A list of strings, each string being a valid SQL clause that applies a
531*6777b538SAndroid Build Coastguard Worker      portion of the test filter to a query.
532*6777b538SAndroid Build Coastguard Worker    """
533*6777b538SAndroid Build Coastguard Worker    raise NotImplementedError('GetClauses must be overridden in a child class')
534*6777b538SAndroid Build Coastguard Worker
535*6777b538SAndroid Build Coastguard Worker  def GetQueries(self) -> List[str]:
536*6777b538SAndroid Build Coastguard Worker    """Gets string representations of the queries to run.
537*6777b538SAndroid Build Coastguard Worker
538*6777b538SAndroid Build Coastguard Worker    Returns:
539*6777b538SAndroid Build Coastguard Worker      A list of strings, each string being a valid SQL query that queries a
540*6777b538SAndroid Build Coastguard Worker      portion of the tests of interest.
541*6777b538SAndroid Build Coastguard Worker    """
542*6777b538SAndroid Build Coastguard Worker    raise NotImplementedError('GetQueries must be overridden in a child class')
543*6777b538SAndroid Build Coastguard Worker
544*6777b538SAndroid Build Coastguard Worker
545*6777b538SAndroid Build Coastguard Worker# pylint: disable=abstract-method
546*6777b538SAndroid Build Coastguard Workerclass FixedQueryGenerator(BaseQueryGenerator):
547*6777b538SAndroid Build Coastguard Worker  """Concrete test filter that cannot be split."""
548*6777b538SAndroid Build Coastguard Worker
549*6777b538SAndroid Build Coastguard Worker  def __init__(self, builder: data_types.BuilderEntry, test_filter: str):
550*6777b538SAndroid Build Coastguard Worker    """
551*6777b538SAndroid Build Coastguard Worker    Args:
552*6777b538SAndroid Build Coastguard Worker      test_filter: A string containing the test filter SQL clause to use.
553*6777b538SAndroid Build Coastguard Worker    """
554*6777b538SAndroid Build Coastguard Worker    super(FixedQueryGenerator, self).__init__(builder)
555*6777b538SAndroid Build Coastguard Worker    self._test_filter = test_filter
556*6777b538SAndroid Build Coastguard Worker
557*6777b538SAndroid Build Coastguard Worker  def SplitQuery(self) -> None:
558*6777b538SAndroid Build Coastguard Worker    raise QuerySplitError('Tried to split a query without any test IDs to use, '
559*6777b538SAndroid Build Coastguard Worker                          'use --large-query-mode')
560*6777b538SAndroid Build Coastguard Worker
561*6777b538SAndroid Build Coastguard Worker  def GetClauses(self) -> List[str]:
562*6777b538SAndroid Build Coastguard Worker    return [self._test_filter]
563*6777b538SAndroid Build Coastguard Worker# pylint: enable=abstract-method
564*6777b538SAndroid Build Coastguard Worker
565*6777b538SAndroid Build Coastguard Worker
566*6777b538SAndroid Build Coastguard Worker# pylint: disable=abstract-method
567*6777b538SAndroid Build Coastguard Workerclass SplitQueryGenerator(BaseQueryGenerator):
568*6777b538SAndroid Build Coastguard Worker  """Concrete test filter that can be split to a desired size."""
569*6777b538SAndroid Build Coastguard Worker
570*6777b538SAndroid Build Coastguard Worker  def __init__(self, builder: data_types.BuilderEntry, test_ids: List[str],
571*6777b538SAndroid Build Coastguard Worker               target_num_samples: int):
572*6777b538SAndroid Build Coastguard Worker    """
573*6777b538SAndroid Build Coastguard Worker    Args:
574*6777b538SAndroid Build Coastguard Worker      test_ids: A list of strings containing the test IDs to use in the test
575*6777b538SAndroid Build Coastguard Worker          test filter.
576*6777b538SAndroid Build Coastguard Worker      target_num_samples: The target/max number of samples to get from each
577*6777b538SAndroid Build Coastguard Worker          query that uses clauses from this test filter.
578*6777b538SAndroid Build Coastguard Worker    """
579*6777b538SAndroid Build Coastguard Worker    super(SplitQueryGenerator, self).__init__(builder)
580*6777b538SAndroid Build Coastguard Worker    self._test_id_lists = []
581*6777b538SAndroid Build Coastguard Worker    self._target_num_samples = target_num_samples
582*6777b538SAndroid Build Coastguard Worker    self._clauses = []
583*6777b538SAndroid Build Coastguard Worker    self._PerformInitialSplit(test_ids)
584*6777b538SAndroid Build Coastguard Worker
585*6777b538SAndroid Build Coastguard Worker  def _PerformInitialSplit(self, test_ids: List[str]) -> None:
586*6777b538SAndroid Build Coastguard Worker    """Evenly splits |test_ids| into lists that are  ~|_target_num_samples| long
587*6777b538SAndroid Build Coastguard Worker
588*6777b538SAndroid Build Coastguard Worker    Only to be called from the constructor.
589*6777b538SAndroid Build Coastguard Worker
590*6777b538SAndroid Build Coastguard Worker    Args:
591*6777b538SAndroid Build Coastguard Worker      test_ids: A list of test IDs to split and assign to the _test_id_lists
592*6777b538SAndroid Build Coastguard Worker          member.
593*6777b538SAndroid Build Coastguard Worker    """
594*6777b538SAndroid Build Coastguard Worker    assert isinstance(test_ids[0], six.string_types)
595*6777b538SAndroid Build Coastguard Worker
596*6777b538SAndroid Build Coastguard Worker    num_lists = int(math.ceil(float(len(test_ids)) / self._target_num_samples))
597*6777b538SAndroid Build Coastguard Worker    list_size = int(math.ceil(float(len(test_ids)) / num_lists))
598*6777b538SAndroid Build Coastguard Worker
599*6777b538SAndroid Build Coastguard Worker    split_lists = []
600*6777b538SAndroid Build Coastguard Worker    start = 0
601*6777b538SAndroid Build Coastguard Worker    for _ in range(num_lists):
602*6777b538SAndroid Build Coastguard Worker      end = min(len(test_ids), start + list_size)
603*6777b538SAndroid Build Coastguard Worker      split_lists.append(test_ids[start:end])
604*6777b538SAndroid Build Coastguard Worker      start = end
605*6777b538SAndroid Build Coastguard Worker    self._test_id_lists = split_lists
606*6777b538SAndroid Build Coastguard Worker    self._GenerateClauses()
607*6777b538SAndroid Build Coastguard Worker
608*6777b538SAndroid Build Coastguard Worker  def _GenerateClauses(self) -> None:
609*6777b538SAndroid Build Coastguard Worker    test_filter_clauses = []
610*6777b538SAndroid Build Coastguard Worker    for id_list in self._test_id_lists:
611*6777b538SAndroid Build Coastguard Worker      clause = 'AND test_id IN UNNEST([%s])' % ', '.join(id_list)
612*6777b538SAndroid Build Coastguard Worker      test_filter_clauses.append(clause)
613*6777b538SAndroid Build Coastguard Worker    self._clauses = test_filter_clauses
614*6777b538SAndroid Build Coastguard Worker
615*6777b538SAndroid Build Coastguard Worker  def SplitQuery(self) -> None:
616*6777b538SAndroid Build Coastguard Worker    def _SplitListInHalf(l: list) -> Tuple[list, list]:
617*6777b538SAndroid Build Coastguard Worker      assert len(l) > 1
618*6777b538SAndroid Build Coastguard Worker      front = l[:len(l) // 2]
619*6777b538SAndroid Build Coastguard Worker      back = l[len(l) // 2:]
620*6777b538SAndroid Build Coastguard Worker      return front, back
621*6777b538SAndroid Build Coastguard Worker
622*6777b538SAndroid Build Coastguard Worker    tmp_test_id_lists = []
623*6777b538SAndroid Build Coastguard Worker    for til in self._test_id_lists:
624*6777b538SAndroid Build Coastguard Worker      if len(til) <= 1:
625*6777b538SAndroid Build Coastguard Worker        raise QuerySplitError(
626*6777b538SAndroid Build Coastguard Worker            'Cannot split query any further, try lowering --num-samples')
627*6777b538SAndroid Build Coastguard Worker      front, back = _SplitListInHalf(til)
628*6777b538SAndroid Build Coastguard Worker      tmp_test_id_lists.append(front)
629*6777b538SAndroid Build Coastguard Worker      tmp_test_id_lists.append(back)
630*6777b538SAndroid Build Coastguard Worker    self._test_id_lists = tmp_test_id_lists
631*6777b538SAndroid Build Coastguard Worker    self._GenerateClauses()
632*6777b538SAndroid Build Coastguard Worker
633*6777b538SAndroid Build Coastguard Worker  def GetClauses(self) -> List[str]:
634*6777b538SAndroid Build Coastguard Worker    return self._clauses
635*6777b538SAndroid Build Coastguard Worker# pylint: enable=abstract-method
636*6777b538SAndroid Build Coastguard Worker
637*6777b538SAndroid Build Coastguard Worker
638*6777b538SAndroid Build Coastguard Workerdef GenerateBigQueryCommand(project: str,
639*6777b538SAndroid Build Coastguard Worker                            parameters: QueryParameters,
640*6777b538SAndroid Build Coastguard Worker                            batch: bool = True) -> List[str]:
641*6777b538SAndroid Build Coastguard Worker  """Generate a BigQuery commandline.
642*6777b538SAndroid Build Coastguard Worker
643*6777b538SAndroid Build Coastguard Worker  Does not contain the actual query, as that is passed in via stdin.
644*6777b538SAndroid Build Coastguard Worker
645*6777b538SAndroid Build Coastguard Worker  Args:
646*6777b538SAndroid Build Coastguard Worker    project: A string containing the billing project to use for BigQuery.
647*6777b538SAndroid Build Coastguard Worker    parameters: A dict specifying parameters to substitute in the query in
648*6777b538SAndroid Build Coastguard Worker        the format {type: {key: value}}. For example, the dict:
649*6777b538SAndroid Build Coastguard Worker        {'INT64': {'num_builds': 5}}
650*6777b538SAndroid Build Coastguard Worker        would result in --parameter=num_builds:INT64:5 being passed to BigQuery.
651*6777b538SAndroid Build Coastguard Worker    batch: Whether to run the query in batch mode or not. Batching adds some
652*6777b538SAndroid Build Coastguard Worker        random amount of overhead since it means the query has to wait for idle
653*6777b538SAndroid Build Coastguard Worker        resources, but also allows for much better parallelism.
654*6777b538SAndroid Build Coastguard Worker
655*6777b538SAndroid Build Coastguard Worker  Returns:
656*6777b538SAndroid Build Coastguard Worker    A list containing the BigQuery commandline, suitable to be passed to a
657*6777b538SAndroid Build Coastguard Worker    method from the subprocess module.
658*6777b538SAndroid Build Coastguard Worker  """
659*6777b538SAndroid Build Coastguard Worker  cmd = [
660*6777b538SAndroid Build Coastguard Worker      'bq',
661*6777b538SAndroid Build Coastguard Worker      'query',
662*6777b538SAndroid Build Coastguard Worker      '--max_rows=%d' % MAX_ROWS,
663*6777b538SAndroid Build Coastguard Worker      '--format=json',
664*6777b538SAndroid Build Coastguard Worker      '--project_id=%s' % project,
665*6777b538SAndroid Build Coastguard Worker      '--use_legacy_sql=false',
666*6777b538SAndroid Build Coastguard Worker  ]
667*6777b538SAndroid Build Coastguard Worker
668*6777b538SAndroid Build Coastguard Worker  if batch:
669*6777b538SAndroid Build Coastguard Worker    cmd.append('--batch')
670*6777b538SAndroid Build Coastguard Worker
671*6777b538SAndroid Build Coastguard Worker  for parameter_type, parameter_pairs in parameters.items():
672*6777b538SAndroid Build Coastguard Worker    for k, v in parameter_pairs.items():
673*6777b538SAndroid Build Coastguard Worker      cmd.append('--parameter=%s:%s:%s' % (k, parameter_type, v))
674*6777b538SAndroid Build Coastguard Worker  return cmd
675*6777b538SAndroid Build Coastguard Worker
676*6777b538SAndroid Build Coastguard Worker
677*6777b538SAndroid Build Coastguard Workerdef _StripPrefixFromBuildId(build_id: str) -> str:
678*6777b538SAndroid Build Coastguard Worker  # Build IDs provided by ResultDB are prefixed with "build-"
679*6777b538SAndroid Build Coastguard Worker  split_id = build_id.split('-')
680*6777b538SAndroid Build Coastguard Worker  assert len(split_id) == 2
681*6777b538SAndroid Build Coastguard Worker  return split_id[-1]
682*6777b538SAndroid Build Coastguard Worker
683*6777b538SAndroid Build Coastguard Worker
684*6777b538SAndroid Build Coastguard Workerdef _ConvertActualResultToExpectationFileFormat(actual_result: str) -> str:
685*6777b538SAndroid Build Coastguard Worker  # Web tests use ResultDB's ABORT value for both test timeouts and device
686*6777b538SAndroid Build Coastguard Worker  # failures, but Abort is not defined in typ. So, map it to timeout now.
687*6777b538SAndroid Build Coastguard Worker  if actual_result == 'ABORT':
688*6777b538SAndroid Build Coastguard Worker    actual_result = json_results.ResultType.Timeout
689*6777b538SAndroid Build Coastguard Worker  # The result reported to ResultDB is in the format PASS/FAIL, while the
690*6777b538SAndroid Build Coastguard Worker  # expected results in an expectation file are in the format Pass/Failure.
691*6777b538SAndroid Build Coastguard Worker  return expectations_parser.RESULT_TAGS[actual_result]
692*6777b538SAndroid Build Coastguard Worker
693*6777b538SAndroid Build Coastguard Worker
694*6777b538SAndroid Build Coastguard Workerclass RateLimitError(Exception):
695*6777b538SAndroid Build Coastguard Worker  """Exception raised when BigQuery hits a rate limit error."""
696*6777b538SAndroid Build Coastguard Worker
697*6777b538SAndroid Build Coastguard Worker
698*6777b538SAndroid Build Coastguard Workerclass MemoryLimitError(Exception):
699*6777b538SAndroid Build Coastguard Worker  """Exception raised when BigQuery hits its hard memory limit."""
700*6777b538SAndroid Build Coastguard Worker
701*6777b538SAndroid Build Coastguard Worker
702*6777b538SAndroid Build Coastguard Workerclass QuerySplitError(Exception):
703*6777b538SAndroid Build Coastguard Worker  """Exception raised when a query cannot be split any further."""
704