1# Copyright 2021 The Chromium Authors 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4"""Module for querying BigQuery.""" 5 6import collections 7import json 8import os 9import subprocess 10from typing import List 11 12from flake_suppressor_common import common_typing as ct 13from flake_suppressor_common import results as results_module 14from flake_suppressor_common import tag_utils 15 16from unexpected_passes_common import queries as upc_queries 17 18MAX_ROWS = (2**31) - 1 19 20# A note about the try version of the queries: The submitted builds subquery is 21# included in each query instead of running it once by itself and including the 22# returned data in other queries because we can end up getting a very large 23# number of build IDs, which can push the query over BigQuery's hard query size 24# limit. The query runs pretty quickly (within a couple of seconds), so 25# duplicating it does not add much runtime. 26 27# Subquery for getting all builds used for CL submission in the past 28# |sample_period| days. Will be inserted into other queries. 29SUBMITTED_BUILDS_TEMPLATE = """\ 30 SELECT 31 CONCAT("build-", CAST(unnested_builds.id AS STRING)) as id 32 FROM 33 `commit-queue.chromium.attempts`, 34 UNNEST(builds) as unnested_builds, 35 UNNEST(gerrit_changes) as unnested_changes 36 WHERE 37 unnested_builds.host = "cr-buildbucket.appspot.com" 38 AND unnested_changes.submit_status = "SUCCESS" 39 AND start_time > TIMESTAMP_SUB(CURRENT_TIMESTAMP(), 40 INTERVAL @sample_period DAY) 41""" 42 43# Subquery for getting all ci builds that are in sheriff rotations in the past 44# |sample_period| days. Will be inserted into other queries. 45SHERIFF_ROTATIONS_CI_BUILDS_TEMPLATE = """\ 46 SELECT DISTINCT builder.builder, 47 FROM 48 `cr-buildbucket.chrome.builds_30d` 49 WHERE 50 input.properties LIKE '%sheriff_rotations%' 51 AND JSON_VALUE_ARRAY(input.properties, '$.sheriff_rotations')[OFFSET(0)] 52 IN ("chromium", "android") 53 AND start_time > TIMESTAMP_SUB(CURRENT_TIMESTAMP(), 54 INTERVAL @sample_period DAY) 55""" 56 57 58class BigQueryQuerier(): 59 def __init__(self, sample_period: int, billing_project: str, 60 result_processor: results_module.ResultProcessor): 61 """Class for making calls to BigQuery. 62 63 Args: 64 sample_period: An int denoting the number of days that data should be 65 queried over. 66 billing_project: A string containing the billing project to use for 67 BigQuery queries. 68 """ 69 self._sample_period = sample_period 70 self._billing_project = billing_project 71 self._result_processor = result_processor 72 73 def GetFlakyOrFailingCiTests(self) -> ct.QueryJsonType: 74 """Gets all flaky or failing tests from CI. 75 76 Returns: 77 A JSON representation of the BigQuery results containing all found flaky 78 or failing test results that came from CI bots. 79 """ 80 return self._GetJsonResultsFromBigQuery(self.GetFlakyOrFailingCiQuery()) 81 82 def GetFailingCiBuildCulpritTests(self) -> ct.QueryJsonType: 83 """Gets all failing build culprit tests from CI builders. 84 85 Returns: 86 A JSON representation of the BigQuery results containing all found 87 all failing build culprit results that came from CI bots. 88 """ 89 return self._GetJsonResultsFromBigQuery( 90 self.GetFailingBuildCulpritFromCiQuery()) 91 92 def GetFlakyOrFailingTestsFromCiBuilders( 93 self, builder_names: List[str]) -> ct.QueryJsonType: 94 """Gets all flaky or failing tests from input CI builders. 95 96 Returns: 97 A JSON representation of the BigQuery results containing all found 98 all failing results that came from input CI builders. 99 """ 100 return self._GetJsonResultsFromBigQuery( 101 self.GetFlakyOrFailingFromCIBuildersQuery(builder_names)) 102 103 def GetFailingBuildCulpritFromCiBuilders( 104 self, builder_names: List[str]) -> ct.QueryJsonType: 105 """Gets all failing build culprit tests from input CI builders. 106 107 Returns: 108 A JSON representation of the BigQuery results containing all found 109 all failing results that came from input CI builders. 110 """ 111 return self._GetJsonResultsFromBigQuery( 112 self.GetFailingBuildCulpritFromCIBuildersQuery(builder_names)) 113 114 def GetFlakyOrFailingTryTests(self) -> ct.QueryJsonType: 115 """Gets all flaky or failing tests from the trybots. 116 117 Limits results to those that came from builds used for CL submission. 118 119 Returns: 120 A JSON representation of the BigQuery results containing all found flaky 121 or failing test results that came from trybots AND came from builds that 122 were used for CL submission. 123 """ 124 return self._GetJsonResultsFromBigQuery(self.GetFlakyOrFailingTryQuery()) 125 126 def GetResultCounts(self) -> ct.ResultCountType: 127 """Gets the result count for each test/config combination. 128 129 Returns: 130 A dict in the format: 131 { 132 typ_tags (tuple): { 133 test_name (str): result_count (int) 134 } 135 } 136 """ 137 # A default dict of default dicts of ints. 138 result_counts = collections.defaultdict(lambda: collections.defaultdict(int) 139 ) 140 self._GetResultCountWithQuery(self.GetResultCountCIQuery(), result_counts) 141 self._GetResultCountWithQuery(self.GetResultCountTryQuery(), result_counts) 142 return result_counts 143 144 def GetResultCountFromCiBuilders( 145 self, builder_names: List[str]) -> ct.ResultCountType: 146 """Gets the result count for the input CI builders. 147 148 Returns: 149 A dict in the format: 150 { 151 typ_tags (tuple): { 152 test_name (str): result_count (int) 153 } 154 } 155 """ 156 result_counts = collections.defaultdict( 157 lambda: collections.defaultdict(int)) 158 self._GetResultCountWithQuery( 159 self.GetResultCountFromCIBuildersQuery(builder_names), result_counts) 160 return result_counts 161 162 def GetFlakyOrFailingCiQuery(self) -> str: 163 """ 164 Returns: 165 Query string to get all the failing or flaky results from CI bots. 166 """ 167 raise NotImplementedError 168 169 def GetFailingBuildCulpritFromCiQuery(self) -> str: 170 """ 171 Returns: 172 Query string to get all failing build culprit results from CI bots. 173 """ 174 raise NotImplementedError 175 176 def GetFlakyOrFailingFromCIBuildersQuery(self, 177 builder_names: List[str]) -> str: 178 """ 179 Returns: 180 Query string to get all the failing or flaky results from input CI 181 builders. 182 """ 183 raise NotImplementedError 184 185 def GetFailingBuildCulpritFromCIBuildersQuery( 186 self, builder_names: List[str]) -> str: 187 """ 188 Returns: 189 Query string to get all failing build culprit results from input CI 190 builders. 191 """ 192 raise NotImplementedError 193 194 def GetFlakyOrFailingTryQuery(self) -> str: 195 """ 196 Returns: 197 Query string to get all the failing or flaky results from Try bots. 198 """ 199 raise NotImplementedError 200 201 def GetResultCountCIQuery(self) -> str: 202 """ 203 Returns: 204 Query string to get the result count for test/tag combination from CI 205 bots. 206 """ 207 raise NotImplementedError 208 209 def GetResultCountTryQuery(self) -> str: 210 """ 211 Returns: 212 Query string to get result count for test/tag combination from Try 213 bots. 214 """ 215 raise NotImplementedError 216 217 def GetResultCountFromCIBuildersQuery(self, builder_names: List[str]) -> str: 218 """ 219 Returns: 220 Query string to get the result count for test/tag combination from input 221 CI builders. 222 """ 223 raise NotImplementedError 224 225 def _GetJsonResultsFromBigQuery(self, query: str) -> ct.QueryJsonType: 226 """Gets the JSON results from a BigQuery query. 227 228 Automatically passes in the "@sample_period" parameterized argument to 229 BigQuery. 230 231 Args: 232 query: A string containing the SQL query to run in BigQuery. 233 234 Returns: 235 The loaded JSON results from running |query|. 236 """ 237 cmd = upc_queries.GenerateBigQueryCommand( 238 self._billing_project, 239 {'INT64': { 240 'sample_period': self._sample_period 241 }}, 242 batch=False) 243 244 with open(os.devnull, 'w') as devnull: 245 completed_process = subprocess.run(cmd, 246 input=query, 247 stdout=subprocess.PIPE, 248 stderr=devnull, 249 check=True, 250 text=True) 251 252 return json.loads(completed_process.stdout) 253 254 def _GetResultCountWithQuery(self, query: str, 255 result_counts: ct.ResultCountType) -> None: 256 """Helper to get result counts using a particular query. 257 258 Args: 259 query: A string containing a SQL query to run. 260 result_counts: A defaultdict of defaultdict of ints that will be modified 261 in place to tally result counts. 262 """ 263 json_results = self._GetJsonResultsFromBigQuery(query) 264 265 for r in json_results: 266 typ_tags = tuple(tag_utils.TagUtils.RemoveIgnoredTags(r['typ_tags'])) 267 test_name = r['test_name'] 268 _, test_name = self._result_processor.GetTestSuiteAndNameFromResultDbName( 269 test_name) 270 count = int(r['result_count']) 271 result_counts[typ_tags][test_name] += count 272