1#!/usr/bin/env python3 2# Copyright 2023 The Chromium Authors 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5""" 6Updates .filelist files using data from corresponding .globlist files (or 7checks whether they are up to date). 8 9bundle_data targets require an explicit source list, but maintaining these large 10lists can be cumbersome. This script aims to simplify the process of updating 11these lists by either expanding globs to update file lists or check that an 12existing file list matches such an expansion (i.e., checking during presubmit). 13 14The .globlist file contains a list of globs that will be expanded to either 15compare or replace a corresponding .filelist. It is possible to exclude items 16from the file list with globs as well. These lines are prefixed with '-' and are 17processed in order, so be sure that exclusions succeed inclusions in the list of 18globs. Comments and empty lines are permitted in .globfiles; comments are 19prefixed with '#'. 20 21By convention, the base name of the .globlist and .filelist files matches the 22label of their corresponding bundle_data from the .gn file. In order to ensure 23that these filelists don't get stale, there should also be a PRESUBMIT.py 24which uses this script to check that list is up to date. 25 26By default, the script will update the file list to match the expanded globs. 27""" 28 29import argparse 30import datetime 31import difflib 32import glob 33import os.path 34import re 35import subprocess 36import sys 37 38# Character to set colors in terminal. Taken, along with the printing routine 39# below, from update_deps.py. 40TERMINAL_ERROR_COLOR = '\033[91m' 41TERMINAL_RESET_COLOR = '\033[0m' 42 43_HEADER = """# Copyright %d The Chromium Authors 44# Use of this source code is governed by a BSD-style license that can be 45# found in the LICENSE file. 46# NOTE: this file is generated by build/ios/update_bundle_filelist.py 47# If it requires updating, you should get a presubmit error with 48# instructions on how to regenerate. Otherwise, do not edit. 49""" % (datetime.datetime.now().year) 50 51_HEADER_PATTERN = re.compile(r"""# Copyright [0-9]+ The Chromium Authors 52# Use of this source code is governed by a BSD-style license that can be 53# found in the LICENSE file. 54# NOTE: this file is generated by build/ios/update_bundle_filelist.py 55# If it requires updating, you should get a presubmit error with 56# instructions on how to regenerate. Otherwise, do not edit. 57""") 58 59_HEADER_HEIGHT = 6 60 61_START_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR = '# push(ignore-relative)' 62_STOP_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR = '# pop(ignore-relative)' 63 64 65def parse_filelist(filelist_name): 66 try: 67 with open(filelist_name) as filelist: 68 unfiltered = [l for l in filelist] 69 header = ''.join(unfiltered[:_HEADER_HEIGHT]) 70 files = sorted(l.strip() for l in unfiltered[_HEADER_HEIGHT:]) 71 return (files, header) 72 except Exception as e: 73 print_error(f'Could not read file list: {filelist_name}', f'{type(e)}: {e}') 74 return [] 75 76 77def get_git_command_name(): 78 if sys.platform.startswith('win'): 79 return 'git.bat' 80 return 'git' 81 82 83def get_tracked_files(directory, globroot, repository_root_relative, verbose): 84 try: 85 git_cmd = get_git_command_name() 86 with subprocess.Popen([git_cmd, 'ls-files', '--error-unmatch', directory], 87 stdout=subprocess.PIPE, 88 stderr=subprocess.PIPE, 89 cwd=globroot) as p: 90 output = p.communicate() 91 if p.returncode != 0: 92 if verbose: 93 print_error( 94 f'Could not gather a list of tracked files in {directory}', 95 f'{output[1]}') 96 return set() 97 98 files = [f.decode('utf-8') for f in output[0].splitlines()] 99 100 # Need paths to be relative to directory in order to match expansions. 101 # This should happen naturally due to cwd above, but we need to take 102 # special care if relative to the repository root. 103 if repository_root_relative: 104 files = ['//' + f for f in files] 105 106 # Handle Windows backslashes 107 files = [f.replace('\\', '/') for f in files] 108 109 return set(files) 110 111 except Exception as e: 112 if verbose: 113 print_error(f'Could not gather a list of tracked files in {directory}', 114 f'{type(e)}: {e}') 115 return set() 116 117 118def combine_potentially_repository_root_relative_paths(a, b): 119 if b.startswith('//'): 120 # If b is relative to the repository root, os.path will consider it absolute 121 # and os.path.join will fail. In this case, we can simply concatenate the 122 # paths. 123 return (a + b, True) 124 else: 125 return (os.path.join(a, b), False) 126 127 128def parse_and_expand_globlist(globlist_name, glob_root): 129 # The following expects glob_root not to end in a trailing slash. 130 if glob_root.endswith('/'): 131 glob_root = glob_root[:-1] 132 133 check_expansions_outside_globlist_dir = True 134 globlist_dir = os.path.dirname(globlist_name) 135 136 with open(globlist_name) as globlist: 137 # Paths in |files| and |to_check| must use unix separators. Using a set 138 # ensures no unwanted duplicates. The files in |to_check| must be in the 139 # globroot or a subdirectory. 140 files = set() 141 to_check = set() 142 for g in globlist: 143 g = g.strip() 144 145 # Ignore blank lines 146 if not g: 147 continue 148 149 # Toggle error checking. 150 if g == _START_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR: 151 check_expansions_outside_globlist_dir = False 152 elif g == _STOP_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR: 153 check_expansions_outside_globlist_dir = True 154 155 # Ignore comments. 156 if not g or g.startswith('#'): 157 continue 158 159 # Exclusions are prefixed with '-'. 160 is_exclusion = g.startswith('-') 161 if is_exclusion: 162 g = g[1:] 163 164 (combined, 165 root_relative) = combine_potentially_repository_root_relative_paths( 166 glob_root, g) 167 168 prefix_size = len(glob_root) 169 if not root_relative: 170 # We need to account for the separator. 171 prefix_size += 1 172 173 expansion = glob.glob(combined, recursive=True) 174 175 # Filter out directories. 176 expansion = [f for f in expansion if os.path.isfile(f)] 177 178 if check_expansions_outside_globlist_dir: 179 for f in expansion: 180 relative = os.path.relpath(f, globlist_dir) 181 if relative.startswith('..'): 182 raise Exception(f'Globlist expansion outside globlist dir: {f}') 183 184 # Make relative to |glob_root|. 185 expansion = [f[prefix_size:] for f in expansion] 186 187 # Handle Windows backslashes 188 expansion = [f.replace('\\', '/') for f in expansion] 189 190 # Since paths in |expansion| only use unix separators, it is safe to 191 # compare for both the purpose of exclusion and addition. 192 if is_exclusion: 193 files = files.difference(expansion) 194 else: 195 files = files.union(expansion) 196 197 # Return a sorted list. 198 return sorted(files) 199 200 201def compare_lists(a, b): 202 differ = difflib.Differ() 203 full_diff = differ.compare(a, b) 204 lines = [d for d in full_diff if not d.startswith(' ')] 205 additions = [l[2:] for l in lines if l.startswith('+ ')] 206 removals = [l[2:] for l in lines if l.startswith('- ')] 207 return (additions, removals) 208 209 210def write_filelist(filelist_name, files, header): 211 try: 212 with open(filelist_name, 'w', encoding='utf-8', newline='') as filelist: 213 if not _HEADER_PATTERN.search(header): 214 header = _HEADER 215 filelist.write(header) 216 for file in files: 217 filelist.write(f'{file}\n') 218 except Exception as e: 219 print_error(f'Could not write file list: {filelist_name}', 220 f'{type(e)}: {e}') 221 return [] 222 223 224def process_filelist(filelist, globlist, globroot, check=False, verbose=False): 225 files_from_globlist = [] 226 try: 227 files_from_globlist = parse_and_expand_globlist(globlist, globroot) 228 except Exception as e: 229 if verbose: 230 print_error(f'Could not read glob list: {globlist}', f'{type(e)}: {e}') 231 return 1 232 233 (files, header) = parse_filelist(filelist) 234 235 (additions, removals) = compare_lists(files, files_from_globlist) 236 to_ignore = set() 237 238 # Ignore additions of untracked files. 239 if additions: 240 directories = set([os.path.dirname(f) for f in additions]) 241 tracked_files = set() 242 for d in directories: 243 (combined, 244 root_relative) = combine_potentially_repository_root_relative_paths( 245 globroot, d) 246 relative = os.path.relpath(combined, globroot) 247 tracked_files = tracked_files.union( 248 get_tracked_files(relative, globroot, root_relative, verbose)) 249 to_ignore = set(additions).difference(tracked_files) 250 additions = [f for f in additions if f in tracked_files] 251 252 files_from_globlist = [f for f in files_from_globlist if f not in to_ignore] 253 254 if check: 255 if not _HEADER_PATTERN.search(header): 256 if verbose: 257 print_error(f'Unexpected header for {filelist}', f'{header}') 258 return 1 259 if not additions and not removals: 260 return 0 261 if verbose: 262 pretty_additions = ['+ ' + f for f in additions] 263 pretty_removals = ['- ' + f for f in removals] 264 pretty_diff = '\n'.join(pretty_additions + pretty_removals) 265 print_error('File list does not match glob expansion', f'{pretty_diff}') 266 return 1 267 else: 268 write_filelist(filelist, files_from_globlist, header) 269 return 0 270 271 272def main(args): 273 parser = argparse.ArgumentParser( 274 description=__doc__, formatter_class=argparse.RawTextHelpFormatter) 275 parser.add_argument('filelist', help='Contains one file per line') 276 parser.add_argument('globlist', 277 help='Contains globs that, when expanded, ' 278 'should match the filelist. Use ' 279 '--help for details on syntax') 280 parser.add_argument('globroot', 281 help='Directory from which globs are relative') 282 parser.add_argument('-c', 283 '--check', 284 action='store_true', 285 help='Prevents modifying the file list') 286 parser.add_argument('-v', 287 '--verbose', 288 action='store_true', 289 help='Use this to print details on differences') 290 args = parser.parse_args() 291 return process_filelist(args.filelist, 292 args.globlist, 293 args.globroot, 294 check=args.check, 295 verbose=args.verbose) 296 297 298def print_error(error_message, error_info): 299 """ Print the `error_message` with additional `error_info` """ 300 color_start, color_end = adapted_color_for_output(TERMINAL_ERROR_COLOR, 301 TERMINAL_RESET_COLOR) 302 303 error_message = color_start + 'ERROR: ' + error_message + color_end 304 if len(error_info) > 0: 305 error_message = error_message + '\n' + error_info 306 print(error_message, file=sys.stderr) 307 308 309def adapted_color_for_output(color_start, color_end): 310 """ Returns a the `color_start`, `color_end` tuple if the output is a 311 terminal, or empty strings otherwise """ 312 if not sys.stdout.isatty(): 313 return '', '' 314 return color_start, color_end 315 316 317if __name__ == '__main__': 318 sys.exit(main(sys.argv[1:])) 319