xref: /aosp_15_r20/external/autotest/utils/frozen_chromite/lib/failures_lib.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1# -*- coding: utf-8 -*-
2# Copyright 2014 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Classes of failure types."""
7
8from __future__ import print_function
9
10import collections
11import json
12import sys
13import traceback
14
15from autotest_lib.utils.frozen_chromite.lib import constants
16from autotest_lib.utils.frozen_chromite.lib import cros_build_lib
17from autotest_lib.utils.frozen_chromite.lib import failure_message_lib
18from autotest_lib.utils.frozen_chromite.lib import metrics
19
20
21class StepFailure(Exception):
22  """StepFailure exceptions indicate that a cbuildbot step failed.
23
24  Exceptions that derive from StepFailure should meet the following
25  criteria:
26    1) The failure indicates that a cbuildbot step failed.
27    2) The necessary information to debug the problem has already been
28       printed in the logs for the stage that failed.
29    3) __str__() should be brief enough to include in a Commit Queue
30       failure message.
31  """
32
33  # The constants.EXCEPTION_CATEGORY_ALL_CATEGORIES values that this exception
34  # maps to. Subclasses should redefine this class constant to map to a
35  # different category.
36  EXCEPTION_CATEGORY = constants.EXCEPTION_CATEGORY_UNKNOWN
37
38  def EncodeExtraInfo(self):
39    """Encode extra_info into a json string, can be overwritten by subclasses"""
40
41  def ConvertToStageFailureMessage(self, build_stage_id, stage_name,
42                                   stage_prefix_name=None):
43    """Convert StepFailure to StageFailureMessage.
44
45    Args:
46      build_stage_id: The id of the build stage.
47      stage_name: The name (string) of the failed stage.
48      stage_prefix_name: The prefix name (string) of the failed stage,
49          default to None.
50
51    Returns:
52      An instance of failure_message_lib.StageFailureMessage.
53    """
54    stage_failure = failure_message_lib.StageFailure(
55        None, build_stage_id, None, self.__class__.__name__, str(self),
56        self.EXCEPTION_CATEGORY, self.EncodeExtraInfo(), None, stage_name,
57        None, None, None, None, None, None, None, None, None, None)
58    return failure_message_lib.StageFailureMessage(
59        stage_failure, stage_prefix_name=stage_prefix_name)
60
61
62# A namedtuple to hold information of an exception.
63ExceptInfo = collections.namedtuple(
64    'ExceptInfo', ['type', 'str', 'traceback'])
65
66
67def CreateExceptInfo(exception, tb):
68  """Creates a list of ExceptInfo objects from |exception| and |tb|.
69
70  Creates an ExceptInfo object from |exception| and |tb|. If
71  |exception| is a CompoundFailure with non-empty list of exc_infos,
72  simly returns exception.exc_infos. Note that we do not preserve type
73  of |exception| in this case.
74
75  Args:
76    exception: The exception.
77    tb: The textual traceback.
78
79  Returns:
80    A list of ExceptInfo objects.
81  """
82  if isinstance(exception, CompoundFailure) and exception.exc_infos:
83    return exception.exc_infos
84
85  return [ExceptInfo(exception.__class__, str(exception), tb)]
86
87
88class CompoundFailure(StepFailure):
89  """An exception that contains a list of ExceptInfo objects."""
90
91  def __init__(self, message='', exc_infos=None):
92    """Initializes an CompoundFailure instance.
93
94    Args:
95      message: A string describing the failure.
96      exc_infos: A list of ExceptInfo objects.
97    """
98    self.exc_infos = exc_infos if exc_infos else []
99    if not message:
100      # By default, print all stored ExceptInfo objects. This is the
101      # preferred behavior because we'd always have the full
102      # tracebacks to debug the failure.
103      message = '\n'.join('{e.type}: {e.str}\n{e.traceback}'.format(e=ex)
104                          for ex in self.exc_infos)
105    self.msg = message
106
107    super(CompoundFailure, self).__init__(message)
108
109  def ToSummaryString(self):
110    """Returns a string with type and string of each ExceptInfo object.
111
112    This does not include the textual tracebacks on purpose, so the
113    message is more readable on the waterfall.
114    """
115    if self.HasEmptyList():
116      # Fall back to return self.message if list is empty.
117      return self.msg
118    else:
119      return '\n'.join(['%s: %s' % (e.type, e.str) for e in self.exc_infos])
120
121  def HasEmptyList(self):
122    """Returns True if self.exc_infos is empty."""
123    return not bool(self.exc_infos)
124
125  def HasFailureType(self, cls):
126    """Returns True if any of the failures matches |cls|."""
127    return any(issubclass(x.type, cls) for x in self.exc_infos)
128
129  def MatchesFailureType(self, cls):
130    """Returns True if all failures matches |cls|."""
131    return (not self.HasEmptyList() and
132            all(issubclass(x.type, cls) for x in self.exc_infos))
133
134  def HasFatalFailure(self, whitelist=None):
135    """Determine if there are non-whitlisted failures.
136
137    Args:
138      whitelist: A list of whitelisted exception types.
139
140    Returns:
141      Returns True if any failure is not in |whitelist|.
142    """
143    if not whitelist:
144      return not self.HasEmptyList()
145
146    for ex in self.exc_infos:
147      if all(not issubclass(ex.type, cls) for cls in whitelist):
148        return True
149
150    return False
151
152  def ConvertToStageFailureMessage(self, build_stage_id, stage_name,
153                                   stage_prefix_name=None):
154    """Convert CompoundFailure to StageFailureMessage.
155
156    Args:
157      build_stage_id: The id of the build stage.
158      stage_name: The name (string) of the failed stage.
159      stage_prefix_name: The prefix name (string) of the failed stage,
160          default to None.
161
162    Returns:
163      An instance of failure_message_lib.StageFailureMessage.
164    """
165    stage_failure = failure_message_lib.StageFailure(
166        None, build_stage_id, None, self.__class__.__name__, str(self),
167        self.EXCEPTION_CATEGORY, self.EncodeExtraInfo(), None, stage_name,
168        None, None, None, None, None, None, None, None, None, None)
169    compound_failure_message = failure_message_lib.CompoundFailureMessage(
170        stage_failure, stage_prefix_name=stage_prefix_name)
171
172    for exc_class, exc_str, _ in self.exc_infos:
173      inner_failure = failure_message_lib.StageFailure(
174          None, build_stage_id, None, exc_class.__name__, exc_str,
175          _GetExceptionCategory(exc_class), None, None, stage_name,
176          None, None, None, None, None, None, None, None, None, None)
177      innner_failure_message = failure_message_lib.StageFailureMessage(
178          inner_failure, stage_prefix_name=stage_prefix_name)
179      compound_failure_message.inner_failures.append(innner_failure_message)
180
181    return compound_failure_message
182
183
184class ExitEarlyException(Exception):
185  """Exception when a stage finishes and exits early."""
186
187# ExitEarlyException is to simulate sys.exit(0), and SystemExit derives
188# from BaseException, so should not catch ExitEarlyException as Exception
189# and reset type to re-raise.
190EXCEPTIONS_TO_EXCLUDE = (ExitEarlyException,)
191
192class SetFailureType(object):
193  """A wrapper to re-raise the exception as the pre-set type."""
194
195  def __init__(self, category_exception, source_exception=None,
196               exclude_exceptions=EXCEPTIONS_TO_EXCLUDE):
197    """Initializes the decorator.
198
199    Args:
200      category_exception: The exception type to re-raise as. It must be
201        a subclass of CompoundFailure.
202      source_exception: The exception types to re-raise. By default, re-raise
203        all Exception classes.
204      exclude_exceptions: Do not set the type of the exception if it's subclass
205        of one exception in exclude_exceptions. Default to EXCLUSIVE_EXCEPTIONS.
206    """
207    assert issubclass(category_exception, CompoundFailure)
208    self.category_exception = category_exception
209    self.source_exception = source_exception
210    if self.source_exception is None:
211      self.source_exception = Exception
212    self.exclude_exceptions = exclude_exceptions
213
214  def __call__(self, functor):
215    """Returns a wrapped function."""
216    def wrapped_functor(*args, **kwargs):
217      try:
218        return functor(*args, **kwargs)
219      except self.source_exception:
220        # Get the information about the original exception.
221        exc_type, exc_value, _ = sys.exc_info()
222        exc_traceback = traceback.format_exc()
223        if self.exclude_exceptions is not None:
224          for exclude_exception in self.exclude_exceptions:
225            if issubclass(exc_type, exclude_exception):
226              raise
227        if issubclass(exc_type, self.category_exception):
228          # Do not re-raise if the exception is a subclass of the set
229          # exception type because it offers more information.
230          raise
231        else:
232          exc_infos = CreateExceptInfo(exc_value, exc_traceback)
233          raise self.category_exception(exc_infos=exc_infos)
234
235    return wrapped_functor
236
237
238class RetriableStepFailure(StepFailure):
239  """This exception is thrown when a step failed, but should be retried."""
240
241
242# TODO(nxia): Everytime the class name is changed, add the new class name to
243# BUILD_SCRIPT_FAILURE_TYPES.
244class BuildScriptFailure(StepFailure):
245  """This exception is thrown when a build command failed.
246
247  It is intended to provide a shorter summary of what command failed,
248  for usage in failure messages from the Commit Queue, so as to ensure
249  that developers aren't spammed with giant error messages when common
250  commands (e.g. build_packages) fail.
251  """
252
253  EXCEPTION_CATEGORY = constants.EXCEPTION_CATEGORY_BUILD
254
255  def __init__(self, exception, shortname):
256    """Construct a BuildScriptFailure object.
257
258    Args:
259      exception: A RunCommandError object.
260      shortname: Short name for the command we're running.
261    """
262    StepFailure.__init__(self)
263    assert isinstance(exception, cros_build_lib.RunCommandError)
264    self.exception = exception
265    self.shortname = shortname
266    self.args = (exception, shortname)
267
268  def __str__(self):
269    """Summarize a build command failure briefly."""
270    result = self.exception.result
271    if result.returncode:
272      return '%s failed (code=%s)' % (self.shortname, result.returncode)
273    else:
274      return self.exception.msg
275
276  def EncodeExtraInfo(self):
277    """Encode extra_info into a json string.
278
279    Returns:
280      A json string containing shortname.
281    """
282    extra_info_dict = {
283        'shortname': self.shortname,
284    }
285    return json.dumps(extra_info_dict)
286
287
288# TODO(nxia): Everytime the class name is changed, add the new class name to
289# PACKAGE_BUILD_FAILURE_TYPES
290class PackageBuildFailure(BuildScriptFailure):
291  """This exception is thrown when packages fail to build."""
292
293  def __init__(self, exception, shortname, failed_packages):
294    """Construct a PackageBuildFailure object.
295
296    Args:
297      exception: The underlying exception.
298      shortname: Short name for the command we're running.
299      failed_packages: List of packages that failed to build.
300    """
301    BuildScriptFailure.__init__(self, exception, shortname)
302    self.failed_packages = set(failed_packages)
303    self.args = (exception, shortname, failed_packages)
304
305  def __str__(self):
306    return ('Packages failed in %s: %s'
307            % (self.shortname, ' '.join(sorted(self.failed_packages))))
308
309  def EncodeExtraInfo(self):
310    """Encode extra_info into a json string.
311
312    Returns:
313      A json string containing shortname and failed_packages.
314    """
315    extra_info_dict = {
316        'shortname': self.shortname,
317        'failed_packages': list(self.failed_packages)
318    }
319    return json.dumps(extra_info_dict)
320
321  def BuildCompileFailureOutputJson(self):
322    """Build proto BuildCompileFailureOutput compatible JSON output.
323
324    Returns:
325      A json string with BuildCompileFailureOutput proto as json.
326    """
327    failures = []
328    for pkg in self.failed_packages:
329      failures.append({'rule': 'emerge', 'output_targets': pkg})
330    wrapper = {'failures': failures}
331    return json.dumps(wrapper, indent=2)
332
333class InfrastructureFailure(CompoundFailure):
334  """Raised if a stage fails due to infrastructure issues."""
335
336  EXCEPTION_CATEGORY = constants.EXCEPTION_CATEGORY_INFRA
337
338
339# ChromeOS Test Lab failures.
340class TestLabFailure(InfrastructureFailure):
341  """Raised if a stage fails due to hardware lab infrastructure issues."""
342
343  EXCEPTION_CATEGORY = constants.EXCEPTION_CATEGORY_LAB
344
345
346class SuiteTimedOut(TestLabFailure):
347  """Raised if a test suite timed out with no test failures."""
348
349
350class BoardNotAvailable(TestLabFailure):
351  """Raised if the board is not available in the lab."""
352
353
354class SwarmingProxyFailure(TestLabFailure):
355  """Raised when error related to swarming proxy occurs."""
356
357
358# Gerrit-on-Borg failures.
359class GoBFailure(InfrastructureFailure):
360  """Raised if a stage fails due to Gerrit-on-Borg (GoB) issues."""
361
362
363class GoBQueryFailure(GoBFailure):
364  """Raised if a stage fails due to Gerrit-on-Borg (GoB) query errors."""
365
366
367class GoBSubmitFailure(GoBFailure):
368  """Raised if a stage fails due to Gerrit-on-Borg (GoB) submission errors."""
369
370
371class GoBFetchFailure(GoBFailure):
372  """Raised if a stage fails due to Gerrit-on-Borg (GoB) fetch errors."""
373
374
375# Google Storage failures.
376class GSFailure(InfrastructureFailure):
377  """Raised if a stage fails due to Google Storage (GS) issues."""
378
379
380class GSUploadFailure(GSFailure):
381  """Raised if a stage fails due to Google Storage (GS) upload issues."""
382
383
384class GSDownloadFailure(GSFailure):
385  """Raised if a stage fails due to Google Storage (GS) download issues."""
386
387
388# Builder failures.
389class BuilderFailure(InfrastructureFailure):
390  """Raised if a stage fails due to builder issues."""
391
392
393class MasterSlaveVersionMismatchFailure(BuilderFailure):
394  """Raised if a slave build has a different full_version than its master."""
395
396# Crash collection service failures.
397class CrashCollectionFailure(InfrastructureFailure):
398  """Raised if a stage fails due to crash collection services."""
399
400
401class TestFailure(StepFailure):
402  """Raised if a test stage (e.g. VMTest) fails."""
403
404  EXCEPTION_CATEGORY = constants.EXCEPTION_CATEGORY_TEST
405
406
407class TestWarning(StepFailure):
408  """Raised if a test stage (e.g. VMTest) returns a warning code."""
409
410
411def ReportStageFailure(exception, metrics_fields=None):
412  """Reports stage failure to Mornach along with inner exceptions.
413
414  Args:
415    exception: The failure exception to report.
416    metrics_fields: (Optional) Fields for ts_mon metric.
417  """
418  _InsertFailureToMonarch(
419      exception_category=_GetExceptionCategory(type(exception)),
420      metrics_fields=metrics_fields)
421
422  # This assumes that CompoundFailure can't be nested.
423  if isinstance(exception, CompoundFailure):
424    for exc_class, _, _ in exception.exc_infos:
425      _InsertFailureToMonarch(
426          exception_category=_GetExceptionCategory(exc_class),
427          metrics_fields=metrics_fields)
428
429
430def _InsertFailureToMonarch(
431    exception_category=constants.EXCEPTION_CATEGORY_UNKNOWN,
432    metrics_fields=None):
433  """Report a single stage failure to Mornach if needed.
434
435  Args:
436    exception_category: (Optional) one of
437                        constants.EXCEPTION_CATEGORY_ALL_CATEGORIES,
438                        Default: 'unknown'.
439    metrics_fields: (Optional) Fields for ts_mon metric.
440  """
441  if (metrics_fields is not None and
442      exception_category != constants.EXCEPTION_CATEGORY_UNKNOWN):
443    counter = metrics.Counter(constants.MON_STAGE_FAILURE_COUNT)
444    metrics_fields['exception_category'] = exception_category
445    counter.increment(fields=metrics_fields)
446
447
448def GetStageFailureMessageFromException(stage_name, build_stage_id,
449                                        exception, stage_prefix_name=None):
450  """Get StageFailureMessage from an exception.
451
452  Args:
453    stage_name: The name (string) of the failed stage.
454    build_stage_id: The id of the failed build stage.
455    exception: The BaseException instance to convert to StageFailureMessage.
456    stage_prefix_name: The prefix name (string) of the failed stage,
457        default to None.
458
459  Returns:
460    An instance of failure_message_lib.StageFailureMessage.
461  """
462  if isinstance(exception, StepFailure):
463    return exception.ConvertToStageFailureMessage(
464        build_stage_id, stage_name, stage_prefix_name=stage_prefix_name)
465  else:
466    stage_failure = failure_message_lib.StageFailure(
467        None, build_stage_id, None, type(exception).__name__, str(exception),
468        _GetExceptionCategory(type(exception)), None, None, stage_name,
469        None, None, None, None, None, None, None, None, None, None)
470
471    return failure_message_lib.StageFailureMessage(
472        stage_failure, stage_prefix_name=stage_prefix_name)
473
474
475def _GetExceptionCategory(exception_class):
476  # Do not use try/catch. If a subclass of StepFailure does not have a valid
477  # EXCEPTION_CATEGORY, it is a programming error, not a runtime error.
478  if issubclass(exception_class, StepFailure):
479    return exception_class.EXCEPTION_CATEGORY
480  else:
481    return constants.EXCEPTION_CATEGORY_UNKNOWN
482