xref: /aosp_15_r20/external/cronet/build/ios/update_bundle_filelist.py (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1#!/usr/bin/env python3
2# Copyright 2023 The Chromium Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""
6Updates .filelist files using data from corresponding .globlist files (or
7checks whether they are up to date).
8
9bundle_data targets require an explicit source list, but maintaining these large
10lists can be cumbersome. This script aims to simplify the process of updating
11these lists by either expanding globs to update file lists or check that an
12existing file list matches such an expansion (i.e., checking during presubmit).
13
14The .globlist file contains a list of globs that will be expanded to either
15compare or replace a corresponding .filelist. It is possible to exclude items
16from the file list with globs as well. These lines are prefixed with '-' and are
17processed in order, so be sure that exclusions succeed inclusions in the list of
18globs. Comments and empty lines are permitted in .globfiles; comments are
19prefixed with '#'.
20
21By convention, the base name of the .globlist and .filelist files matches the
22label of their corresponding bundle_data from the .gn file. In order to ensure
23that these filelists don't get stale, there should also be a PRESUBMIT.py
24which uses this script to check that list is up to date.
25
26By default, the script will update the file list to match the expanded globs.
27"""
28
29import argparse
30import datetime
31import difflib
32import glob
33import os.path
34import re
35import subprocess
36import sys
37
38# Character to set colors in terminal. Taken, along with the printing routine
39# below, from update_deps.py.
40TERMINAL_ERROR_COLOR = '\033[91m'
41TERMINAL_RESET_COLOR = '\033[0m'
42
43_HEADER = """# Copyright %d The Chromium Authors
44# Use of this source code is governed by a BSD-style license that can be
45# found in the LICENSE file.
46# NOTE: this file is generated by build/ios/update_bundle_filelist.py
47#       If it requires updating, you should get a presubmit error with
48#       instructions on how to regenerate. Otherwise, do not edit.
49""" % (datetime.datetime.now().year)
50
51_HEADER_PATTERN = re.compile(r"""# Copyright [0-9]+ The Chromium Authors
52# Use of this source code is governed by a BSD-style license that can be
53# found in the LICENSE file.
54# NOTE: this file is generated by build/ios/update_bundle_filelist.py
55#       If it requires updating, you should get a presubmit error with
56#       instructions on how to regenerate. Otherwise, do not edit.
57""")
58
59_HEADER_HEIGHT = 6
60
61_START_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR = '# push(ignore-relative)'
62_STOP_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR = '# pop(ignore-relative)'
63
64
65def parse_filelist(filelist_name):
66  try:
67    with open(filelist_name) as filelist:
68      unfiltered = [l for l in filelist]
69      header = ''.join(unfiltered[:_HEADER_HEIGHT])
70      files = sorted(l.strip() for l in unfiltered[_HEADER_HEIGHT:])
71      return (files, header)
72  except Exception as e:
73    print_error(f'Could not read file list: {filelist_name}', f'{type(e)}: {e}')
74    return []
75
76
77def get_git_command_name():
78  if sys.platform.startswith('win'):
79    return 'git.bat'
80  return 'git'
81
82
83def get_tracked_files(directory, globroot, repository_root_relative, verbose):
84  try:
85    git_cmd = get_git_command_name()
86    with subprocess.Popen([git_cmd, 'ls-files', '--error-unmatch', directory],
87                          stdout=subprocess.PIPE,
88                          stderr=subprocess.PIPE,
89                          cwd=globroot) as p:
90      output = p.communicate()
91      if p.returncode != 0:
92        if verbose:
93          print_error(
94              f'Could not gather a list of tracked files in {directory}',
95              f'{output[1]}')
96        return set()
97
98      files = [f.decode('utf-8') for f in output[0].splitlines()]
99
100      # Need paths to be relative to directory in order to match expansions.
101      # This should happen naturally due to cwd above, but we need to take
102      # special care if relative to the repository root.
103      if repository_root_relative:
104        files = ['//' + f for f in files]
105
106      # Handle Windows backslashes
107      files = [f.replace('\\', '/') for f in files]
108
109      return set(files)
110
111  except Exception as e:
112    if verbose:
113      print_error(f'Could not gather a list of tracked files in {directory}',
114                  f'{type(e)}: {e}')
115    return set()
116
117
118def combine_potentially_repository_root_relative_paths(a, b):
119  if b.startswith('//'):
120    # If b is relative to the repository root, os.path will consider it absolute
121    # and os.path.join will fail. In this case, we can simply concatenate the
122    # paths.
123    return (a + b, True)
124  else:
125    return (os.path.join(a, b), False)
126
127
128def parse_and_expand_globlist(globlist_name, glob_root):
129  # The following expects glob_root not to end in a trailing slash.
130  if glob_root.endswith('/'):
131    glob_root = glob_root[:-1]
132
133  check_expansions_outside_globlist_dir = True
134  globlist_dir = os.path.dirname(globlist_name)
135
136  with open(globlist_name) as globlist:
137    # Paths in |files| and |to_check| must use unix separators. Using a set
138    # ensures no unwanted duplicates. The files in |to_check| must be in the
139    # globroot or a subdirectory.
140    files = set()
141    to_check = set()
142    for g in globlist:
143      g = g.strip()
144
145      # Ignore blank lines
146      if not g:
147        continue
148
149      # Toggle error checking.
150      if g == _START_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR:
151        check_expansions_outside_globlist_dir = False
152      elif g == _STOP_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR:
153        check_expansions_outside_globlist_dir = True
154
155      # Ignore comments.
156      if not g or g.startswith('#'):
157        continue
158
159      # Exclusions are prefixed with '-'.
160      is_exclusion = g.startswith('-')
161      if is_exclusion:
162        g = g[1:]
163
164      (combined,
165       root_relative) = combine_potentially_repository_root_relative_paths(
166           glob_root, g)
167
168      prefix_size = len(glob_root)
169      if not root_relative:
170        # We need to account for the separator.
171        prefix_size += 1
172
173      expansion = glob.glob(combined, recursive=True)
174
175      # Filter out directories.
176      expansion = [f for f in expansion if os.path.isfile(f)]
177
178      if check_expansions_outside_globlist_dir:
179        for f in expansion:
180          relative = os.path.relpath(f, globlist_dir)
181          if relative.startswith('..'):
182            raise Exception(f'Globlist expansion outside globlist dir: {f}')
183
184      # Make relative to |glob_root|.
185      expansion = [f[prefix_size:] for f in expansion]
186
187      # Handle Windows backslashes
188      expansion = [f.replace('\\', '/') for f in expansion]
189
190      # Since paths in |expansion| only use unix separators, it is safe to
191      # compare for both the purpose of exclusion and addition.
192      if is_exclusion:
193        files = files.difference(expansion)
194      else:
195        files = files.union(expansion)
196
197    # Return a sorted list.
198    return sorted(files)
199
200
201def compare_lists(a, b):
202  differ = difflib.Differ()
203  full_diff = differ.compare(a, b)
204  lines = [d for d in full_diff if not d.startswith('  ')]
205  additions = [l[2:] for l in lines if l.startswith('+ ')]
206  removals = [l[2:] for l in lines if l.startswith('- ')]
207  return (additions, removals)
208
209
210def write_filelist(filelist_name, files, header):
211  try:
212    with open(filelist_name, 'w', encoding='utf-8', newline='') as filelist:
213      if not _HEADER_PATTERN.search(header):
214        header = _HEADER
215      filelist.write(header)
216      for file in files:
217        filelist.write(f'{file}\n')
218  except Exception as e:
219    print_error(f'Could not write file list: {filelist_name}',
220                f'{type(e)}: {e}')
221    return []
222
223
224def process_filelist(filelist, globlist, globroot, check=False, verbose=False):
225  files_from_globlist = []
226  try:
227    files_from_globlist = parse_and_expand_globlist(globlist, globroot)
228  except Exception as e:
229    if verbose:
230      print_error(f'Could not read glob list: {globlist}', f'{type(e)}: {e}')
231    return 1
232
233  (files, header) = parse_filelist(filelist)
234
235  (additions, removals) = compare_lists(files, files_from_globlist)
236  to_ignore = set()
237
238  # Ignore additions of untracked files.
239  if additions:
240    directories = set([os.path.dirname(f) for f in additions])
241    tracked_files = set()
242    for d in directories:
243      (combined,
244       root_relative) = combine_potentially_repository_root_relative_paths(
245           globroot, d)
246      relative = os.path.relpath(combined, globroot)
247      tracked_files = tracked_files.union(
248          get_tracked_files(relative, globroot, root_relative, verbose))
249    to_ignore = set(additions).difference(tracked_files)
250    additions = [f for f in additions if f in tracked_files]
251
252  files_from_globlist = [f for f in files_from_globlist if f not in to_ignore]
253
254  if check:
255    if not _HEADER_PATTERN.search(header):
256      if verbose:
257        print_error(f'Unexpected header for {filelist}', f'{header}')
258      return 1
259    if not additions and not removals:
260      return 0
261    if verbose:
262      pretty_additions = ['+ ' + f for f in additions]
263      pretty_removals = ['- ' + f for f in removals]
264      pretty_diff = '\n'.join(pretty_additions + pretty_removals)
265      print_error('File list does not match glob expansion', f'{pretty_diff}')
266    return 1
267  else:
268    write_filelist(filelist, files_from_globlist, header)
269    return 0
270
271
272def main(args):
273  parser = argparse.ArgumentParser(
274      description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
275  parser.add_argument('filelist', help='Contains one file per line')
276  parser.add_argument('globlist',
277                      help='Contains globs that, when expanded, '
278                      'should match the filelist. Use '
279                      '--help for details on syntax')
280  parser.add_argument('globroot',
281                      help='Directory from which globs are relative')
282  parser.add_argument('-c',
283                      '--check',
284                      action='store_true',
285                      help='Prevents modifying the file list')
286  parser.add_argument('-v',
287                      '--verbose',
288                      action='store_true',
289                      help='Use this to print details on differences')
290  args = parser.parse_args()
291  return process_filelist(args.filelist,
292                          args.globlist,
293                          args.globroot,
294                          check=args.check,
295                          verbose=args.verbose)
296
297
298def print_error(error_message, error_info):
299  """ Print the `error_message` with additional `error_info` """
300  color_start, color_end = adapted_color_for_output(TERMINAL_ERROR_COLOR,
301                                                    TERMINAL_RESET_COLOR)
302
303  error_message = color_start + 'ERROR: ' + error_message + color_end
304  if len(error_info) > 0:
305    error_message = error_message + '\n' + error_info
306  print(error_message, file=sys.stderr)
307
308
309def adapted_color_for_output(color_start, color_end):
310  """ Returns a the `color_start`, `color_end` tuple if the output is a
311    terminal, or empty strings otherwise """
312  if not sys.stdout.isatty():
313    return '', ''
314  return color_start, color_end
315
316
317if __name__ == '__main__':
318  sys.exit(main(sys.argv[1:]))
319