xref: /aosp_15_r20/external/cronet/testing/libfuzzer/gen_fuzzer_owners.py (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1#!/usr/bin/env python3
2#
3# Copyright 2018 The Chromium Authors
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6"""Generates a `foo.owners` file for a `fuzzer_test("foo", ...)` GN target.
7
8By default, the closest `OWNERS` file is located and copied, except for
9`//OWNERS` and `//third_party/OWNERS` for fear of spamming top-level owners with
10fuzzer bugs they know nothing about.
11
12If no such file can be located, then we attempt to use `git blame` to identify
13the author of the main fuzzer `.cc` file. Note that this does not work for code
14in git submodules (e.g. most code in `third_party/`), in which case we generate
15an empty file.
16
17Invoked by GN from `fuzzer_test.gni`.
18"""
19
20import argparse
21import os
22import re
23import subprocess
24import sys
25
26from typing import Optional
27
28AUTHOR_REGEX = re.compile('author-mail <(.+)>')
29CHROMIUM_SRC_DIR = os.path.dirname(
30    os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
31OWNERS_FILENAME = 'OWNERS'
32THIRD_PARTY = 'third_party'
33THIRD_PARTY_SEARCH_STRING = THIRD_PARTY + os.path.sep
34
35
36def GetAuthorFromGitBlame(blame_output):
37  """Return author from git blame output."""
38  for line in blame_output.decode('utf-8').splitlines():
39    m = AUTHOR_REGEX.match(line)
40    if m:
41      return m.group(1)
42
43  return None
44
45
46def GetGitCommand():
47  """Returns a git command that does not need to be executed using shell=True.
48  On non-Windows platforms: 'git'. On Windows: 'git.bat'.
49  """
50  return 'git.bat' if sys.platform == 'win32' else 'git'
51
52
53def GetOwnersFromOwnersFile(source: str) -> Optional[str]:
54  """Finds the owners of `source` from the closest OWNERS file.
55
56  Both //OWNERS or */third_party/OWNERS are ignored so as not to spam top-level
57  owners with unowned fuzzer bugs.
58
59  Args:
60    source: Relative path from the chromium src directory to the target source
61      file.
62
63  Returns:
64    The entire contents of the closest OWNERS file. That is, the first OWNERS
65    file encountered while walking up through the ancestor directories of the
66    target source file.
67  """
68  # TODO(https://crbug.com/1513729): Use `pathlib` instead of `os.path` for
69  # better ergonomics and robustness.
70  dirs = source.split(os.path.sep)[:-1]
71
72  # Note: We never test for //OWNERS, i.e. when `dirs` is empty.
73  while dirs:
74    # Never return the contents of */third_party/OWNERS, and stop searching.
75    if dirs[-1] == THIRD_PARTY:
76      break
77
78    owners_file_path = os.path.join(CHROMIUM_SRC_DIR, *dirs, OWNERS_FILENAME)
79    if os.path.exists(owners_file_path):
80      # TODO(https://crbug.com/1513729): OWNERS files can reference others,
81      # have per-file directives, etc. We should be cleverer than this.
82      return open(owners_file_path).read()
83
84    dirs.pop()
85
86  return None
87
88def GetOwnersForFuzzer(sources):
89  """Return owners given a list of sources as input."""
90  if not sources:
91    return None
92
93  for source in sources:
94    full_source_path = os.path.join(CHROMIUM_SRC_DIR, source)
95    if not os.path.exists(full_source_path):
96      continue
97
98    with open(full_source_path, 'r') as source_file_handle:
99      source_content = source_file_handle.read()
100
101    if SubStringExistsIn(
102        ['FuzzOneInput', 'LLVMFuzzerTestOneInput', 'PROTO_FUZZER'],
103        source_content):
104      # Found the fuzzer source (and not dependency of fuzzer).
105
106      # Try finding the closest OWNERS file first.
107      owners = GetOwnersFromOwnersFile(source)
108      if owners:
109        return owners
110
111      git_dir = os.path.join(CHROMIUM_SRC_DIR, '.git')
112      git_command = GetGitCommand()
113      is_git_file = bool(subprocess.check_output(
114          [git_command, '--git-dir', git_dir, 'ls-files', source],
115          cwd=CHROMIUM_SRC_DIR))
116      if not is_git_file:
117        # File is not in working tree. If no OWNERS file was found, we cannot
118        # tell who it belongs to.
119        return None
120
121      # `git log --follow` and `--reverse` don't work together and using just
122      # `--follow` is too slow. Make a best estimate with an assumption that the
123      # original author has authored the copyright block, which (generally) does
124      # not change even with file rename/move. Look at the last line of the
125      # block, as a copyright block update sweep in late 2022 made one person
126      # responsible for changing the first line of every copyright block in the
127      # repo, and it would be best to avoid assigning ownership of every fuzz
128      # issue predating that year to that one person.
129      blame_output = subprocess.check_output(
130          [git_command, '--git-dir', git_dir, 'blame', '--porcelain', '-L3,3',
131           source], cwd=CHROMIUM_SRC_DIR)
132      return GetAuthorFromGitBlame(blame_output)
133
134  return None
135
136def FindGroupsAndDepsInDeps(deps_list, build_dir):
137  """Return list of groups, as well as their deps, from a list of deps."""
138  groups = []
139  deps_for_groups = {}
140  for deps in deps_list:
141    output = subprocess.check_output(
142        [GNPath(), 'desc', '--fail-on-unused-args', build_dir, deps]).decode(
143                'utf8')
144    needle = 'Type: '
145    for line in output.splitlines():
146      if needle and not line.startswith(needle):
147        continue
148      if needle == 'Type: ':
149        if line != 'Type: group':
150          break
151        groups.append(deps)
152        assert deps not in deps_for_groups
153        deps_for_groups[deps] = []
154        needle = 'Direct dependencies'
155      elif needle == 'Direct dependencies':
156        needle = ''
157      else:
158        assert needle == ''
159        if needle == line:
160          break
161        deps_for_groups[deps].append(line.strip())
162
163  return groups, deps_for_groups
164
165
166def TraverseGroups(deps_list, build_dir):
167  """Filter out groups from a deps list. Add groups' direct dependencies."""
168  full_deps_set = set(deps_list)
169  deps_to_check = full_deps_set.copy()
170
171  # Keep track of groups to break circular dependendies, if any.
172  seen_groups = set()
173
174  while deps_to_check:
175    # Look for groups from the deps set.
176    groups, deps_for_groups = FindGroupsAndDepsInDeps(deps_to_check, build_dir)
177    groups = set(groups).difference(seen_groups)
178    if not groups:
179      break
180
181    # Update sets. Filter out groups from the full deps set.
182    full_deps_set.difference_update(groups)
183    deps_to_check.clear()
184    seen_groups.update(groups)
185
186    # Get the direct dependencies, and filter out known groups there too.
187    for group in groups:
188      deps_to_check.update(deps_for_groups[group])
189    deps_to_check.difference_update(seen_groups)
190    full_deps_set.update(deps_to_check)
191  return list(full_deps_set)
192
193
194def GetSourcesFromDeps(deps_list, build_dir):
195  """Return list of sources from parsing deps."""
196  if not deps_list:
197    return None
198
199  full_deps_list = TraverseGroups(deps_list, build_dir)
200  all_sources = []
201  for deps in full_deps_list:
202    output = subprocess.check_output(
203        [GNPath(), 'desc', '--fail-on-unused-args', build_dir, deps, 'sources'])
204    for source in bytes(output).decode('utf8').splitlines():
205      if source.startswith('//'):
206        source = source[2:]
207      all_sources.append(source)
208
209  return all_sources
210
211
212def GNPath():
213  if sys.platform.startswith('linux'):
214    subdir, exe = 'linux64', 'gn'
215  elif sys.platform == 'darwin':
216    subdir, exe = 'mac', 'gn'
217  else:
218    subdir, exe = 'win', 'gn.exe'
219
220  return os.path.join(CHROMIUM_SRC_DIR, 'buildtools', subdir, exe)
221
222
223def SubStringExistsIn(substring_list, string):
224  """Return true if one of the substring in the list is found in |string|."""
225  return any(substring in string for substring in substring_list)
226
227
228def main():
229  parser = argparse.ArgumentParser(description='Generate fuzzer owners file.')
230  parser.add_argument('--owners', required=True)
231  parser.add_argument('--build-dir')
232  parser.add_argument('--deps', nargs='+')
233  parser.add_argument('--sources', nargs='+')
234  args = parser.parse_args()
235
236  # Generate owners file.
237  with open(args.owners, 'w') as owners_file:
238    # If we found an owner, then write it to file.
239    # Otherwise, leave empty file to keep ninja happy.
240    owners = GetOwnersForFuzzer(args.sources)
241    if owners:
242      owners_file.write(owners)
243      return
244
245    # Could not determine owners from |args.sources|.
246    # So, try parsing sources from |args.deps|.
247    deps_sources = GetSourcesFromDeps(args.deps, args.build_dir)
248    owners = GetOwnersForFuzzer(deps_sources)
249    if owners:
250      owners_file.write(owners)
251
252
253if __name__ == '__main__':
254  main()
255