xref: /aosp_15_r20/external/cronet/testing/unexpected_passes_common/expectations.py (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1# Copyright 2020 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4"""Methods related to test expectations/expectation files."""
5
6from __future__ import print_function
7
8import collections
9import copy
10import datetime
11import logging
12import os
13import re
14import subprocess
15import sys
16from typing import Dict, FrozenSet, Iterable, List, Optional, Set, Tuple, Union
17
18import six
19
20from typ import expectations_parser
21from unexpected_passes_common import data_types
22from unexpected_passes_common import result_output
23
24FINDER_DISABLE_COMMENT_BASE = 'finder:disable'
25FINDER_ENABLE_COMMENT_BASE = 'finder:enable'
26FINDER_COMMENT_SUFFIX_GENERAL = '-general'
27FINDER_COMMENT_SUFFIX_STALE = '-stale'
28FINDER_COMMENT_SUFFIX_UNUSED = '-unused'
29FINDER_COMMENT_SUFFIX_NARROWING = '-narrowing'
30
31FINDER_GROUP_COMMENT_START = 'finder:group-start'
32FINDER_GROUP_COMMENT_END = 'finder:group-end'
33
34ALL_FINDER_START_ANNOTATION_BASES = frozenset([
35    FINDER_DISABLE_COMMENT_BASE,
36    FINDER_GROUP_COMMENT_START,
37])
38
39ALL_FINDER_END_ANNOTATION_BASES = frozenset([
40    FINDER_ENABLE_COMMENT_BASE,
41    FINDER_GROUP_COMMENT_END,
42])
43
44ALL_FINDER_DISABLE_SUFFIXES = frozenset([
45    FINDER_COMMENT_SUFFIX_GENERAL,
46    FINDER_COMMENT_SUFFIX_STALE,
47    FINDER_COMMENT_SUFFIX_UNUSED,
48    FINDER_COMMENT_SUFFIX_NARROWING,
49])
50
51FINDER_DISABLE_COMMENT_GENERAL = (FINDER_DISABLE_COMMENT_BASE +
52                                  FINDER_COMMENT_SUFFIX_GENERAL)
53FINDER_DISABLE_COMMENT_STALE = (FINDER_DISABLE_COMMENT_BASE +
54                                FINDER_COMMENT_SUFFIX_STALE)
55FINDER_DISABLE_COMMENT_UNUSED = (FINDER_DISABLE_COMMENT_BASE +
56                                 FINDER_COMMENT_SUFFIX_UNUSED)
57FINDER_DISABLE_COMMENT_NARROWING = (FINDER_DISABLE_COMMENT_BASE +
58                                    FINDER_COMMENT_SUFFIX_NARROWING)
59FINDER_ENABLE_COMMENT_GENERAL = (FINDER_ENABLE_COMMENT_BASE +
60                                 FINDER_COMMENT_SUFFIX_GENERAL)
61FINDER_ENABLE_COMMENT_STALE = (FINDER_ENABLE_COMMENT_BASE +
62                               FINDER_COMMENT_SUFFIX_STALE)
63FINDER_ENABLE_COMMENT_UNUSED = (FINDER_ENABLE_COMMENT_BASE +
64                                FINDER_COMMENT_SUFFIX_UNUSED)
65FINDER_ENABLE_COMMENT_NARROWING = (FINDER_ENABLE_COMMENT_BASE +
66                                   FINDER_COMMENT_SUFFIX_NARROWING)
67
68FINDER_DISABLE_COMMENTS = frozenset([
69    FINDER_DISABLE_COMMENT_GENERAL,
70    FINDER_DISABLE_COMMENT_STALE,
71    FINDER_DISABLE_COMMENT_UNUSED,
72    FINDER_DISABLE_COMMENT_NARROWING,
73])
74
75FINDER_ENABLE_COMMENTS = frozenset([
76    FINDER_ENABLE_COMMENT_GENERAL,
77    FINDER_ENABLE_COMMENT_STALE,
78    FINDER_ENABLE_COMMENT_UNUSED,
79    FINDER_ENABLE_COMMENT_NARROWING,
80])
81
82FINDER_ENABLE_DISABLE_PAIRS = frozenset([
83    (FINDER_DISABLE_COMMENT_GENERAL, FINDER_ENABLE_COMMENT_GENERAL),
84    (FINDER_DISABLE_COMMENT_STALE, FINDER_ENABLE_COMMENT_STALE),
85    (FINDER_DISABLE_COMMENT_UNUSED, FINDER_ENABLE_COMMENT_UNUSED),
86    (FINDER_DISABLE_COMMENT_NARROWING, FINDER_ENABLE_COMMENT_NARROWING),
87])
88
89FINDER_GROUP_COMMENTS = frozenset([
90    FINDER_GROUP_COMMENT_START,
91    FINDER_GROUP_COMMENT_END,
92])
93
94ALL_FINDER_COMMENTS = frozenset(FINDER_DISABLE_COMMENTS
95                                | FINDER_ENABLE_COMMENTS
96                                | FINDER_GROUP_COMMENTS)
97
98GIT_BLAME_REGEX = re.compile(
99    r'^[\w\s]+\(.+(?P<date>\d\d\d\d-\d\d-\d\d)[^\)]+\)(?P<content>.*)$',
100    re.DOTALL)
101TAG_GROUP_REGEX = re.compile(r'# tags: \[([^\]]*)\]', re.MULTILINE | re.DOTALL)
102
103# Annotation comment start (with optional leading whitespace) pattern.
104ANNOTATION_COMMENT_START_PATTERN = r' *# '
105# Pattern for matching optional description text after an annotation.
106ANNOTATION_OPTIONAL_TRAILING_TEXT_PATTERN = r'[^\n]*\n'
107# Pattern for matching required description text after an annotation.
108ANNOTATION_REQUIRED_TRAILING_TEXT_PATTERN = r'[^\n]+\n'
109# Pattern for matching blank or comment lines.
110BLANK_OR_COMMENT_LINES_PATTERN = r'(?:\s*| *#[^\n]*\n)*'
111# Looks for cases of the group start and end comments with nothing but optional
112# whitespace between them.
113ALL_STALE_COMMENT_REGEXES = set()
114for start_comment, end_comment in FINDER_ENABLE_DISABLE_PAIRS:
115  ALL_STALE_COMMENT_REGEXES.add(
116      re.compile(
117          ANNOTATION_COMMENT_START_PATTERN + start_comment +
118          ANNOTATION_OPTIONAL_TRAILING_TEXT_PATTERN +
119          BLANK_OR_COMMENT_LINES_PATTERN + ANNOTATION_COMMENT_START_PATTERN +
120          end_comment + r'\n', re.MULTILINE | re.DOTALL))
121ALL_STALE_COMMENT_REGEXES.add(
122    re.compile(
123        ANNOTATION_COMMENT_START_PATTERN + FINDER_GROUP_COMMENT_START +
124        ANNOTATION_REQUIRED_TRAILING_TEXT_PATTERN +
125        BLANK_OR_COMMENT_LINES_PATTERN + ANNOTATION_COMMENT_START_PATTERN +
126        FINDER_GROUP_COMMENT_END + r'\n', re.MULTILINE | re.DOTALL))
127ALL_STALE_COMMENT_REGEXES = frozenset(ALL_STALE_COMMENT_REGEXES)
128
129# pylint: disable=useless-object-inheritance
130
131_registered_instance = None
132
133
134def GetInstance() -> 'Expectations':
135  return _registered_instance
136
137
138def RegisterInstance(instance: 'Expectations') -> None:
139  global _registered_instance
140  assert _registered_instance is None
141  assert isinstance(instance, Expectations)
142  _registered_instance = instance
143
144
145def ClearInstance() -> None:
146  global _registered_instance
147  _registered_instance = None
148
149
150class RemovalType(object):
151  STALE = FINDER_COMMENT_SUFFIX_STALE
152  UNUSED = FINDER_COMMENT_SUFFIX_UNUSED
153  NARROWING = FINDER_COMMENT_SUFFIX_NARROWING
154
155
156class Expectations(object):
157  def __init__(self):
158    self._cached_tag_groups = {}
159
160  def CreateTestExpectationMap(
161      self, expectation_files: Optional[Union[str, List[str]]],
162      tests: Optional[Iterable[str]],
163      grace_period: datetime.timedelta) -> data_types.TestExpectationMap:
164    """Creates an expectation map based off a file or list of tests.
165
166    Args:
167      expectation_files: A filepath or list of filepaths to expectation files to
168          read from, or None. If a filepath is specified, |tests| must be None.
169      tests: An iterable of strings containing test names to check. If
170          specified, |expectation_file| must be None.
171      grace_period: A datetime.timedelta specifying how many days old an
172          expectation must be in order to be parsed, i.e. how many days old an
173          expectation must be before it is a candidate for removal/modification.
174
175    Returns:
176      A data_types.TestExpectationMap, although all its BuilderStepMap contents
177      will be empty.
178    """
179
180    def AddContentToMap(content: str, ex_map: data_types.TestExpectationMap,
181                        expectation_file_name: str) -> None:
182      list_parser = expectations_parser.TaggedTestListParser(content)
183      expectations_for_file = ex_map.setdefault(
184          expectation_file_name, data_types.ExpectationBuilderMap())
185      logging.debug('Parsed %d expectations', len(list_parser.expectations))
186      for e in list_parser.expectations:
187        if 'Skip' in e.raw_results:
188          continue
189        # Expectations that only have a Pass expectation (usually used to
190        # override a broader, failing expectation) are not handled by the
191        # unexpected pass finder, so ignore those.
192        if e.raw_results == ['Pass']:
193          continue
194        expectation = data_types.Expectation(e.test, e.tags, e.raw_results,
195                                             e.reason)
196        assert expectation not in expectations_for_file
197        expectations_for_file[expectation] = data_types.BuilderStepMap()
198
199    logging.info('Creating test expectation map')
200    assert expectation_files or tests
201    assert not (expectation_files and tests)
202
203    expectation_map = data_types.TestExpectationMap()
204
205    if expectation_files:
206      if not isinstance(expectation_files, list):
207        expectation_files = [expectation_files]
208      for ef in expectation_files:
209        # Normalize to '/' as the path separator.
210        expectation_file_name = os.path.normpath(ef).replace(os.path.sep, '/')
211        content = self._GetNonRecentExpectationContent(expectation_file_name,
212                                                       grace_period)
213        AddContentToMap(content, expectation_map, expectation_file_name)
214    else:
215      expectation_file_name = ''
216      content = '# results: [ RetryOnFailure ]\n'
217      for t in tests:
218        content += '%s [ RetryOnFailure ]\n' % t
219      AddContentToMap(content, expectation_map, expectation_file_name)
220
221    return expectation_map
222
223  def _GetNonRecentExpectationContent(self, expectation_file_path: str,
224                                      num_days: datetime.timedelta) -> str:
225    """Gets content from |expectation_file_path| older than |num_days| days.
226
227    Args:
228      expectation_file_path: A string containing a filepath pointing to an
229          expectation file.
230      num_days: A datetime.timedelta containing how old an expectation in the
231          given expectation file must be to be included.
232
233    Returns:
234      The contents of the expectation file located at |expectation_file_path|
235      as a string with any recent expectations removed.
236    """
237    content = ''
238    # `git blame` output is normally in the format:
239    # revision optional_filename (author date time timezone lineno) line_content
240    # The --porcelain option is meant to be more machine readable, but is much
241    # more difficult to parse for what we need to do here. In order to
242    # guarantee that the filename won't be included in the output (by default,
243    # it will be shown if there is content from a renamed file), pass -c to
244    # use the same format as `git annotate`, which is:
245    # revision (author date time timezone lineno)line_content
246    # (Note the lack of space between the ) and the content).
247    cmd = ['git', 'blame', '-c', expectation_file_path]
248    with open(os.devnull, 'w', newline='', encoding='utf-8') as devnull:
249      blame_output = subprocess.check_output(cmd,
250                                             stderr=devnull).decode('utf-8')
251    for line in blame_output.splitlines(True):
252      match = GIT_BLAME_REGEX.match(line)
253      assert match
254      date = match.groupdict()['date']
255      line_content = match.groupdict()['content']
256      stripped_line_content = line_content.strip()
257      # Auto-add comments and blank space, otherwise only add if the grace
258      # period has expired.
259      if not stripped_line_content or stripped_line_content.startswith('#'):
260        content += line_content
261      else:
262        if six.PY2:
263          date_parts = date.split('-')
264          date = datetime.date(year=int(date_parts[0]),
265                               month=int(date_parts[1]),
266                               day=int(date_parts[2]))
267        else:
268          date = datetime.date.fromisoformat(date)
269        date_diff = datetime.date.today() - date
270        if date_diff > num_days:
271          content += line_content
272        else:
273          logging.debug('Omitting expectation %s because it is too new',
274                        line_content.rstrip())
275    return content
276
277  def RemoveExpectationsFromFile(self,
278                                 expectations: List[data_types.Expectation],
279                                 expectation_file: str,
280                                 removal_type: str) -> Set[str]:
281    """Removes lines corresponding to |expectations| from |expectation_file|.
282
283    Ignores any lines that match but are within a disable block or have an
284    inline disable comment.
285
286    Args:
287      expectations: A list of data_types.Expectations to remove.
288      expectation_file: A filepath pointing to an expectation file to remove
289          lines from.
290      removal_type: A RemovalType enum corresponding to the type of expectations
291          being removed.
292
293    Returns:
294      A set of strings containing URLs of bugs associated with the removed
295      expectations.
296    """
297
298    with open(expectation_file, encoding='utf-8') as f:
299      input_contents = f.read()
300
301    group_to_expectations, expectation_to_group = (
302        self._GetExpectationGroupsFromFileContent(expectation_file,
303                                                  input_contents))
304    disable_annotated_expectations = (
305        self._GetDisableAnnotatedExpectationsFromFile(expectation_file,
306                                                      input_contents))
307
308    output_contents = ''
309    removed_urls = set()
310    removed_lines = set()
311    num_removed_lines = 0
312    for line_number, line in enumerate(input_contents.splitlines(True)):
313      # Auto-add any comments or empty lines
314      stripped_line = line.strip()
315      if _IsCommentOrBlankLine(stripped_line):
316        output_contents += line
317        continue
318
319      current_expectation = self._CreateExpectationFromExpectationFileLine(
320          line, expectation_file)
321
322      # Add any lines containing expectations that don't match any of the given
323      # expectations to remove.
324      if any(e for e in expectations if e == current_expectation):
325        # Skip any expectations that match if we're in a disable block or there
326        # is an inline disable comment.
327        disable_block_suffix, disable_block_reason = (
328            disable_annotated_expectations.get(current_expectation,
329                                               (None, None)))
330        if disable_block_suffix and _DisableSuffixIsRelevant(
331            disable_block_suffix, removal_type):
332          output_contents += line
333          logging.info(
334              'Would have removed expectation %s, but it is inside a disable '
335              'block or has an inline disable with reason %s', stripped_line,
336              disable_block_reason)
337        elif _ExpectationPartOfNonRemovableGroup(current_expectation,
338                                                 group_to_expectations,
339                                                 expectation_to_group,
340                                                 expectations):
341          output_contents += line
342          logging.info(
343              'Would have removed expectation %s, but it is part of group "%s" '
344              'whose members are not all removable.', stripped_line,
345              expectation_to_group[current_expectation])
346        else:
347          bug = current_expectation.bug
348          if bug:
349            # It's possible to have multiple whitespace-separated bugs per
350            # expectation, so treat each one separately.
351            removed_urls |= set(bug.split())
352          # Record that we've removed this line. By subtracting the number of
353          # lines we've already removed, we keep the line numbers relative to
354          # the content we're outputting rather than relative to the input
355          # content. This also has the effect of automatically compressing
356          # contiguous blocks of removal into a single line number.
357          removed_lines.add(line_number - num_removed_lines)
358          num_removed_lines += 1
359      else:
360        output_contents += line
361
362    header_length = len(
363        self._GetExpectationFileTagHeader(expectation_file).splitlines(True))
364    output_contents = _RemoveStaleComments(output_contents, removed_lines,
365                                           header_length)
366
367    with open(expectation_file, 'w', newline='', encoding='utf-8') as f:
368      f.write(output_contents)
369
370    return removed_urls
371
372  def _GetDisableAnnotatedExpectationsFromFile(
373      self, expectation_file: str,
374      content: str) -> Dict[data_types.Expectation, Tuple[str, str]]:
375    """Extracts expectations which are affected by disable annotations.
376
377    Args:
378      expectation_file: A filepath pointing to an expectation file.
379      content: A string containing the contents of |expectation_file|.
380
381    Returns:
382      A dict mapping data_types.Expectation to (disable_suffix, disable_reason).
383      If an expectation is present in this dict, it is affected by a disable
384      annotation of some sort. |disable_suffix| is a string specifying which
385      type of annotation is applicable, while |disable_reason| is a string
386      containing the comment/reason why the disable annotation is present.
387    """
388    in_disable_block = False
389    disable_block_reason = ''
390    disable_block_suffix = ''
391    disable_annotated_expectations = {}
392    for line in content.splitlines(True):
393      stripped_line = line.strip()
394      # Look for cases of disable/enable blocks.
395      if _IsCommentOrBlankLine(stripped_line):
396        # Only allow one enable/disable per line.
397        assert len([c for c in ALL_FINDER_COMMENTS if c in line]) <= 1
398        if _LineContainsDisableComment(line):
399          if in_disable_block:
400            raise RuntimeError(
401                'Invalid expectation file %s - contains a disable comment "%s" '
402                'that is in another disable block.' %
403                (expectation_file, stripped_line))
404          in_disable_block = True
405          disable_block_reason = _GetDisableReasonFromComment(line)
406          disable_block_suffix = _GetFinderCommentSuffix(line)
407        elif _LineContainsEnableComment(line):
408          if not in_disable_block:
409            raise RuntimeError(
410                'Invalid expectation file %s - contains an enable comment "%s" '
411                'that is outside of a disable block.' %
412                (expectation_file, stripped_line))
413          in_disable_block = False
414        continue
415
416      current_expectation = self._CreateExpectationFromExpectationFileLine(
417          line, expectation_file)
418
419      if in_disable_block:
420        disable_annotated_expectations[current_expectation] = (
421            disable_block_suffix, disable_block_reason)
422      elif _LineContainsDisableComment(line):
423        disable_block_reason = _GetDisableReasonFromComment(line)
424        disable_block_suffix = _GetFinderCommentSuffix(line)
425        disable_annotated_expectations[current_expectation] = (
426            disable_block_suffix, disable_block_reason)
427    return disable_annotated_expectations
428
429  def _GetExpectationGroupsFromFileContent(
430      self, expectation_file: str, content: str
431  ) -> Tuple[Dict[str, Set[data_types.Expectation]], Dict[data_types.
432                                                          Expectation, str]]:
433    """Extracts all groups of expectations from an expectationfile.
434
435    Args:
436      expectation_file: A filepath pointing to an expectation file.
437      content: A string containing the contents of |expectation_file|.
438
439    Returns:
440      A tuple (group_to_expectations, expectation_to_group).
441      |group_to_expectations| is a dict of group names to sets of
442      data_type.Expectations that belong to that group. |expectation_to_group|
443      is the same, but mapped the other way from data_type.Expectations to group
444      names.
445    """
446    group_to_expectations = collections.defaultdict(set)
447    expectation_to_group = {}
448    group_name = None
449
450    for line in content.splitlines():
451      stripped_line = line.strip()
452      # Possibly starting/ending a group.
453      if _IsCommentOrBlankLine(stripped_line):
454        if _LineContainsGroupStartComment(stripped_line):
455          # Start of a new group.
456          if group_name:
457            raise RuntimeError(
458                'Invalid expectation file %s - contains a group comment "%s" '
459                'that is inside another group block.' %
460                (expectation_file, stripped_line))
461          group_name = _GetGroupNameFromCommentLine(stripped_line)
462        elif _LineContainsGroupEndComment(stripped_line):
463          # End of current group.
464          if not group_name:
465            raise RuntimeError(
466                'Invalid expectation file %s - contains a group comment "%s" '
467                'without a group start comment.' %
468                (expectation_file, stripped_line))
469          group_name = None
470      elif group_name:
471        # Currently in a group.
472        e = self._CreateExpectationFromExpectationFileLine(
473            stripped_line, expectation_file)
474        group_to_expectations[group_name].add(e)
475        expectation_to_group[e] = group_name
476      # If we aren't in a group, do nothing.
477    return group_to_expectations, expectation_to_group
478
479  def _CreateExpectationFromExpectationFileLine(self, line: str,
480                                                expectation_file: str
481                                                ) -> data_types.Expectation:
482    """Creates a data_types.Expectation from |line|.
483
484    Args:
485      line: A string containing a single line from an expectation file.
486      expectation_file: A filepath pointing to an expectation file |line| came
487          from.
488
489    Returns:
490      A data_types.Expectation containing the same information as |line|.
491    """
492    header = self._GetExpectationFileTagHeader(expectation_file)
493    single_line_content = header + line
494    list_parser = expectations_parser.TaggedTestListParser(single_line_content)
495    assert len(list_parser.expectations) == 1
496    typ_expectation = list_parser.expectations[0]
497    return data_types.Expectation(typ_expectation.test, typ_expectation.tags,
498                                  typ_expectation.raw_results,
499                                  typ_expectation.reason)
500
501  def _GetExpectationFileTagHeader(self, expectation_file: str) -> str:
502    """Gets the tag header used for expectation files.
503
504    Args:
505      expectation_file: A filepath pointing to an expectation file to get the
506          tag header from.
507
508    Returns:
509      A string containing an expectation file header, i.e. the comment block at
510      the top of the file defining possible tags and expected results.
511    """
512    raise NotImplementedError()
513
514  def ParseTaggedTestListContent(self, content: str
515                                 ) -> expectations_parser.TaggedTestListParser:
516    """Helper to parse typ expectation files.
517
518    This allows subclasses to avoid adding typ to PYTHONPATH.
519    """
520    return expectations_parser.TaggedTestListParser(content)
521
522  def FilterToKnownTags(self, tags: Iterable[str]) -> Set[str]:
523    """Filters |tags| to only include tags known to expectation files.
524
525    Args:
526      tags: An iterable of strings containing tags.
527
528    Returns:
529      A set containing the elements of |tags| with any tags that are not defined
530      in any expectation files removed.
531    """
532    return self._GetKnownTags() & set(tags)
533
534  def _GetKnownTags(self) -> Set[str]:
535    """Gets all known/defined tags from expectation files.
536
537    Returns:
538      A set of strings containing all known/defined tags from expectation files.
539    """
540    raise NotImplementedError()
541
542  def _FilterToMostSpecificTypTags(self, typ_tags: FrozenSet[str],
543                                   expectation_file: str) -> FrozenSet[str]:
544    """Filters |typ_tags| to the most specific set.
545
546    Assumes that the tags in |expectation_file| are ordered from least specific
547    to most specific within each tag group.
548
549    Args:
550      typ_tags: A frozenset of strings containing the typ tags to filter.
551      expectations_file: A string containing a filepath pointing to the
552          expectation file to filter tags with.
553
554    Returns:
555      A frozenset containing the contents of |typ_tags| with only the most
556      specific tag from each group remaining.
557    """
558    # The logic for this function was lifted from the GPU/Blink flake finders,
559    # so there may be room to share code between the two.
560
561    if expectation_file not in self._cached_tag_groups:
562      with open(expectation_file, encoding='utf-8') as infile:
563        contents = infile.read()
564      tag_groups = []
565      for match in TAG_GROUP_REGEX.findall(contents):
566        tag_groups.append(match.lower().strip().replace('#', '').split())
567      self._cached_tag_groups[expectation_file] = tag_groups
568    tag_groups = self._cached_tag_groups[expectation_file]
569
570    num_matches = 0
571    tags_in_same_group = collections.defaultdict(list)
572    for tag in typ_tags:
573      for index, tag_group in enumerate(tag_groups):
574        if tag in tag_group:
575          tags_in_same_group[index].append(tag)
576          num_matches += 1
577          break
578    if num_matches != len(typ_tags):
579      all_tags = set()
580      for group in tag_groups:
581        all_tags |= set(group)
582      raise RuntimeError('Found tags not in expectation file %s: %s' %
583                         (expectation_file, ' '.join(set(typ_tags) - all_tags)))
584
585    filtered_tags = set()
586    for index, tags in tags_in_same_group.items():
587      if len(tags) == 1:
588        filtered_tags.add(tags[0])
589      else:
590        tag_group = tag_groups[index]
591        best_index = -1
592        for t in tags:
593          i = tag_group.index(t)
594          if i > best_index:
595            best_index = i
596        filtered_tags.add(tag_group[best_index])
597    return frozenset(filtered_tags)
598
599  def _ConsolidateKnownOverlappingTags(self, typ_tags: FrozenSet[str]
600                                       ) -> FrozenSet[str]:
601    """Consolidates tags that are known to overlap/cause issues.
602
603    One known example of this would be dual GPU machines that report tags for
604    both GPUs.
605    """
606    return typ_tags
607
608  def NarrowSemiStaleExpectationScope(
609      self, stale_expectation_map: data_types.TestExpectationMap) -> Set[str]:
610    """Narrows the scope of expectations in |stale_expectation_map|.
611
612    Expectations are modified such that they only apply to configurations that
613    need them, to the best extent possible. If scope narrowing is not possible,
614    e.g. the same hardware/software combination reports fully passing on one bot
615    but reports some failures on another bot, the expectation will not be
616    modified.
617
618    Args:
619      stale_expectation_map: A data_types.TestExpectationMap containing
620          semi-stale expectations.
621
622    Returns:
623      A set of strings containing URLs of bugs associated with the modified
624      expectations.
625    """
626    modified_urls = set()
627    cached_disable_annotated_expectations = {}
628    for expectation_file, e, builder_map in (
629        stale_expectation_map.IterBuilderStepMaps()):
630      # Check if the current annotation has scope narrowing disabled.
631      if expectation_file not in cached_disable_annotated_expectations:
632        with open(expectation_file, encoding='utf-8') as infile:
633          disable_annotated_expectations = (
634              self._GetDisableAnnotatedExpectationsFromFile(
635                  expectation_file, infile.read()))
636          cached_disable_annotated_expectations[
637              expectation_file] = disable_annotated_expectations
638      disable_block_suffix, disable_block_reason = (
639          cached_disable_annotated_expectations[expectation_file].get(
640              e, ('', '')))
641      if _DisableSuffixIsRelevant(disable_block_suffix, RemovalType.NARROWING):
642        logging.info(
643            'Skipping semi-stale narrowing check for expectation %s since it '
644            'has a narrowing disable annotation with reason %s',
645            e.AsExpectationFileString(), disable_block_reason)
646        continue
647
648      skip_to_next_expectation = False
649
650      pass_tag_sets = set()
651      fail_tag_sets = set()
652      # Determine which tags sets failures can occur on vs. tag sets that
653      # don't have any failures.
654      for builder, step, build_stats in builder_map.IterBuildStats():
655        if len(build_stats.tag_sets) > 1:
656          # This shouldn't really be happening during normal operation, but is
657          # expected to happen if a configuration changes, e.g. an OS was
658          # upgraded. In these cases, the old data will eventually age out and
659          # we will stop getting multiple tag sets.
660          logging.warning(
661              'Step %s on builder %s produced multiple tag sets: %s. Not '
662              'narrowing expectation scope for expectation %s.', step, builder,
663              build_stats.tag_sets, e.AsExpectationFileString())
664          skip_to_next_expectation = True
665          break
666        if build_stats.NeverNeededExpectation(e):
667          pass_tag_sets |= build_stats.tag_sets
668        else:
669          fail_tag_sets |= build_stats.tag_sets
670      if skip_to_next_expectation:
671        continue
672
673      # Remove all instances of tags that are shared between all sets other than
674      # the tags that were used by the expectation, as they are redundant.
675      common_tags = set()
676      for ts in pass_tag_sets:
677        common_tags |= ts
678        # We only need one initial tag set, but sets do not have a way of
679        # retrieving a single element other than pop(), which removes the
680        # element, which we don't want.
681        break
682      for ts in pass_tag_sets | fail_tag_sets:
683        common_tags &= ts
684      common_tags -= e.tags
685      pass_tag_sets = {ts - common_tags for ts in pass_tag_sets}
686      fail_tag_sets = {ts - common_tags for ts in fail_tag_sets}
687
688      # Calculate new tag sets that should be functionally equivalent to the
689      # single, more broad tag set that we are replacing. This is done by
690      # checking if the intersection between any pairs of fail tag sets are
691      # still distinct from any pass tag sets, i.e. if the intersection between
692      # fail tag sets is still a valid fail tag set. If so, the original sets
693      # are replaced by the intersection.
694      new_tag_sets = set()
695      covered_fail_tag_sets = set()
696      for fail_tags in fail_tag_sets:
697        if any(fail_tags <= pt for pt in pass_tag_sets):
698          logging.warning(
699              'Unable to determine what makes failing configs unique for %s, '
700              'not narrowing expectation scope.', e.AsExpectationFileString())
701          skip_to_next_expectation = True
702          break
703        if fail_tags in covered_fail_tag_sets:
704          continue
705        tag_set_to_add = fail_tags
706        for ft in fail_tag_sets:
707          if ft in covered_fail_tag_sets:
708            continue
709          intersection = tag_set_to_add & ft
710          if any(intersection <= pt for pt in pass_tag_sets):
711            # Intersection is too small, as it also covers a passing tag set.
712            continue
713          if any(intersection <= cft for cft in covered_fail_tag_sets):
714            # Both the intersection and some tag set from new_tag_sets
715            # apply to the same original failing tag set,
716            # which means if we add the intersection to new_tag_sets,
717            # they will conflict on the bot from the original failing tag set.
718            # The above check works because new_tag_sets and
719            # covered_fail_tag_sets are updated together below.
720            continue
721          tag_set_to_add = intersection
722        new_tag_sets.add(tag_set_to_add)
723        covered_fail_tag_sets.update(cft for cft in fail_tag_sets
724                                     if tag_set_to_add <= cft)
725      if skip_to_next_expectation:
726        continue
727
728      # Remove anything we know could be problematic, e.g. causing expectation
729      # file parsing errors.
730      new_tag_sets = {
731          self._ConsolidateKnownOverlappingTags(nts)
732          for nts in new_tag_sets
733      }
734      new_tag_sets = {
735          self._FilterToMostSpecificTypTags(nts, expectation_file)
736          for nts in new_tag_sets
737      }
738
739      # Replace the existing expectation with our new ones.
740      with open(expectation_file, encoding='utf-8') as infile:
741        file_contents = infile.read()
742      line, _ = self._GetExpectationLine(e, file_contents, expectation_file)
743      modified_urls |= set(e.bug.split())
744      expectation_strs = []
745      for new_tags in new_tag_sets:
746        expectation_copy = copy.copy(e)
747        expectation_copy.tags = new_tags
748        expectation_strs.append(expectation_copy.AsExpectationFileString())
749      expectation_strs.sort()
750      replacement_lines = '\n'.join(expectation_strs)
751      file_contents = file_contents.replace(line, replacement_lines)
752      with open(expectation_file, 'w', newline='', encoding='utf-8') as outfile:
753        outfile.write(file_contents)
754
755    return modified_urls
756
757  def _GetExpectationLine(self, expectation: data_types.Expectation,
758                          file_contents: str, expectation_file: str
759                          ) -> Union[Tuple[None, None], Tuple[str, int]]:
760    """Gets the line and line number of |expectation| in |file_contents|.
761
762    Args:
763      expectation: A data_types.Expectation.
764      file_contents: A string containing the contents read from an expectation
765          file.
766      expectation_file: A string containing the path to the expectation file
767          that |file_contents| came from.
768
769    Returns:
770      A tuple (line, line_number). |line| is a string containing the exact line
771      in |file_contents| corresponding to |expectation|. |line_number| is an int
772      corresponding to where |line| is in |file_contents|. |line_number| may be
773      off if the file on disk has changed since |file_contents| was read. If a
774      corresponding line cannot be found, both |line| and |line_number| are
775      None.
776    """
777    # We have all the information necessary to recreate the expectation line and
778    # line number can be pulled during the initial expectation parsing. However,
779    # the information we have is not necessarily in the same order as the
780    # text file (e.g. tag ordering), and line numbers can change pretty
781    # dramatically between the initial parse and now due to stale expectations
782    # being removed. So, parse this way in order to improve the user experience.
783    file_lines = file_contents.splitlines()
784    for line_number, line in enumerate(file_lines):
785      if _IsCommentOrBlankLine(line.strip()):
786        continue
787      current_expectation = self._CreateExpectationFromExpectationFileLine(
788          line, expectation_file)
789      if expectation == current_expectation:
790        return line, line_number + 1
791    return None, None
792
793  def FindOrphanedBugs(self, affected_urls: Iterable[str]) -> Set[str]:
794    """Finds cases where expectations for bugs no longer exist.
795
796    Args:
797      affected_urls: An iterable of affected bug URLs, as returned by functions
798          such as RemoveExpectationsFromFile.
799
800    Returns:
801      A set containing a subset of |affected_urls| who no longer have any
802      associated expectations in any expectation files.
803    """
804    seen_bugs = set()
805
806    expectation_files = self.GetExpectationFilepaths()
807
808    for ef in expectation_files:
809      with open(ef, encoding='utf-8') as infile:
810        contents = infile.read()
811      for url in affected_urls:
812        if url in seen_bugs:
813          continue
814        if url in contents:
815          seen_bugs.add(url)
816    return set(affected_urls) - seen_bugs
817
818  def GetExpectationFilepaths(self) -> List[str]:
819    """Gets all the filepaths to expectation files of interest.
820
821    Returns:
822      A list of strings, each element being a filepath pointing towards an
823      expectation file.
824    """
825    raise NotImplementedError()
826
827
828def _LineContainsGroupStartComment(line: str) -> bool:
829  return FINDER_GROUP_COMMENT_START in line
830
831
832def _LineContainsGroupEndComment(line: str) -> bool:
833  return FINDER_GROUP_COMMENT_END in line
834
835
836def _LineContainsDisableComment(line: str) -> bool:
837  return FINDER_DISABLE_COMMENT_BASE in line
838
839
840def _LineContainsEnableComment(line: str) -> bool:
841  return FINDER_ENABLE_COMMENT_BASE in line
842
843
844def _GetGroupNameFromCommentLine(line: str) -> str:
845  """Gets the group name from the finder comment on the given line."""
846  assert FINDER_GROUP_COMMENT_START in line
847  uncommented_line = line.lstrip('#').strip()
848  split_line = uncommented_line.split(maxsplit=1)
849  if len(split_line) != 2:
850    raise RuntimeError('Given line %s did not have a group name.' % line)
851  return split_line[1]
852
853
854def _GetFinderCommentSuffix(line: str) -> str:
855  """Gets the suffix of the finder comment on the given line.
856
857  Examples:
858    'foo  # finder:disable' -> ''
859    'foo  # finder:disable-stale some_reason' -> '-stale'
860  """
861  target_str = None
862  if _LineContainsDisableComment(line):
863    target_str = FINDER_DISABLE_COMMENT_BASE
864  elif _LineContainsEnableComment(line):
865    target_str = FINDER_ENABLE_COMMENT_BASE
866  else:
867    raise RuntimeError('Given line %s did not have a finder comment.' % line)
868  line = line[line.find(target_str):]
869  line = line.split()[0]
870  suffix = line.replace(target_str, '')
871  assert suffix in ALL_FINDER_DISABLE_SUFFIXES
872  return suffix
873
874
875def _LineContainsRelevantDisableComment(line: str, removal_type: str) -> bool:
876  """Returns whether the given line contains a relevant disable comment.
877
878  Args:
879    line: A string containing the line to check.
880    removal_type: A RemovalType enum corresponding to the type of expectations
881        being removed.
882
883  Returns:
884    A bool denoting whether |line| contains a relevant disable comment given
885    |removal_type|.
886  """
887  if FINDER_DISABLE_COMMENT_GENERAL in line:
888    return True
889  if FINDER_DISABLE_COMMENT_BASE + removal_type in line:
890    return True
891  return False
892
893
894def _DisableSuffixIsRelevant(suffix: str, removal_type: str) -> bool:
895  """Returns whether the given suffix is relevant given the removal type.
896
897  Args:
898    suffix: A string containing a disable comment suffix.
899    removal_type: A RemovalType enum corresponding to the type of expectations
900        being removed.
901
902  Returns:
903    True if suffix is relevant and its disable request should be honored.
904  """
905  if suffix == FINDER_COMMENT_SUFFIX_GENERAL:
906    return True
907  if suffix == removal_type:
908    return True
909  return False
910
911
912def _GetDisableReasonFromComment(line: str) -> str:
913  suffix = _GetFinderCommentSuffix(line)
914  return line.split(FINDER_DISABLE_COMMENT_BASE + suffix, 1)[1].strip()
915
916
917def _IsCommentOrBlankLine(line: str) -> bool:
918  return (not line or line.startswith('#'))
919
920
921def _ExpectationPartOfNonRemovableGroup(
922    current_expectation: data_types.Expectation,
923    group_to_expectations: Dict[str, Set[data_types.Expectation]],
924    expectation_to_group: Dict[data_types.Expectation, str],
925    removable_expectations: List[data_types.Expectation]):
926  """Determines if the given expectation is part of a non-removable group.
927
928  This is the case if the expectation is part of a group, but not all
929  expectations in that group are marked as removable.
930
931  Args:
932    current_expectation: A data_types.Expectation that is being checked.
933    group_to_expectations: A dict mapping group names to sets of expectations
934        contained within that group.
935    expectation_to_group: A dict mapping an expectation to the group name it
936        belongs to.
937    removable_expectations: A list of all expectations that are removable.
938  """
939  # Since we'll only ever be using this to check for inclusion, use a set
940  # for efficiency.
941  removable_expectations = set(removable_expectations)
942
943  group_name = expectation_to_group.get(current_expectation)
944  if not group_name:
945    return False
946
947  all_expectations_in_group = group_to_expectations[group_name]
948  return not (all_expectations_in_group <= removable_expectations)
949
950
951def _RemoveStaleComments(content: str, removed_lines: Set[int],
952                         header_length: int) -> str:
953  """Attempts to remove stale contents from the given expectation file content.
954
955  Args:
956    content: A string containing the contents of an expectation file.
957    removed_lines: A set of ints denoting which line numbers were removed in
958        the process of creating |content|.
959    header_length: An int denoting how many lines long the tag header is.
960
961  Returns:
962    A copy of |content| with various stale comments removed, e.g. group blocks
963    if the group has been removed.
964  """
965  # Look for the case where we've removed an entire block of expectations that
966  # were preceded by a comment, which we should remove.
967  comment_line_numbers_to_remove = []
968  split_content = content.splitlines(True)
969  for rl in removed_lines:
970    found_trailing_annotation = False
971    found_starting_annotation = False
972    # Check for the end of the file, a blank line, or a comment after the block
973    # we've removed.
974    if rl < len(split_content):
975      stripped_line = split_content[rl].strip()
976      if stripped_line and not stripped_line.startswith('#'):
977        # We found an expectation, so the entire expectation block wasn't
978        # removed.
979        continue
980      if any(annotation in stripped_line
981             for annotation in ALL_FINDER_END_ANNOTATION_BASES):
982        found_trailing_annotation = True
983    # Look for a comment block immediately preceding the block we removed.
984    comment_line_number = rl - 1
985    while comment_line_number != header_length - 1:
986      stripped_line = split_content[comment_line_number].strip()
987      if stripped_line.startswith('#'):
988        # If we find what should be a trailing annotation, stop immediately so
989        # we don't accidentally remove it and create an orphan earlier in the
990        # file.
991        if any(annotation in stripped_line
992               for annotation in ALL_FINDER_END_ANNOTATION_BASES):
993          break
994        if any(annotation in stripped_line
995               for annotation in ALL_FINDER_START_ANNOTATION_BASES):
996          # If we've already found a starting annotation, skip past this line.
997          # This is to handle the case of nested annotations, e.g. a
998          # disable-narrowing block inside of a group block. We'll find the
999          # inner-most block here and remove it. Any outer blocks will be
1000          # removed as part of the lingering stale annotation removal later on.
1001          # If we don't skip past these outer annotations, then we get left with
1002          # orphaned trailing annotations.
1003          if found_starting_annotation:
1004            comment_line_number -= 1
1005            continue
1006          found_starting_annotation = True
1007          # If we found a starting annotation but not a trailing annotation, we
1008          # shouldn't remove the starting one, as that would cause the trailing
1009          # one that is later in the file to be orphaned. We also don't want to
1010          # continue and remove comments above that since it is assumedly still
1011          # valid.
1012          if found_starting_annotation and not found_trailing_annotation:
1013            break
1014        comment_line_numbers_to_remove.append(comment_line_number)
1015        comment_line_number -= 1
1016      else:
1017        break
1018    # In the event that we found both a start and trailing annotation, we need
1019    # to also remove the trailing one.
1020    if found_trailing_annotation and found_starting_annotation:
1021      comment_line_numbers_to_remove.append(rl)
1022
1023  # Actually remove the comments we found above.
1024  for i in comment_line_numbers_to_remove:
1025    split_content[i] = ''
1026  if comment_line_numbers_to_remove:
1027    content = ''.join(split_content)
1028
1029  # Remove any lingering cases of stale annotations that we can easily detect.
1030  for regex in ALL_STALE_COMMENT_REGEXES:
1031    for match in regex.findall(content):
1032      content = content.replace(match, '')
1033
1034  return content
1035