1#!/usr/bin/env python3 2# Copyright 2023 The Chromium Authors 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5""" 6Updates .filelist files using data from corresponding .globlist files (or 7checks whether they are up to date). 8 9bundle_data targets require an explicit source list, but maintaining these large 10lists can be cumbersome. This script aims to simplify the process of updating 11these lists by either expanding globs to update file lists or check that an 12existing file list matches such an expansion (i.e., checking during presubmit). 13 14The .globlist file contains a list of globs that will be expanded to either 15compare or replace a corresponding .filelist. It is possible to exclude items 16from the file list with globs as well. These lines are prefixed with '-' and are 17processed in order, so be sure that exclusions succeed inclusions in the list of 18globs. Comments and empty lines are permitted in .globfiles; comments are 19prefixed with '#'. 20 21By convention, the base name of the .globlist and .filelist files matches the 22label of their corresponding bundle_data from the .gn file. In order to ensure 23that these filelists don't get stale, there should also be a PRESUBMIT.py 24which uses this script to check that list is up to date. 25 26By default, the script will update the file list to match the expanded globs. 27""" 28 29import argparse 30import datetime 31import difflib 32import glob 33import os.path 34import re 35import subprocess 36import sys 37 38# Character to set colors in terminal. Taken, along with the printing routine 39# below, from update_deps.py. 40TERMINAL_ERROR_COLOR = '\033[91m' 41TERMINAL_RESET_COLOR = '\033[0m' 42 43_HEADER = """# Copyright %d The Chromium Authors 44# Use of this source code is governed by a BSD-style license that can be 45# found in the LICENSE file. 46# NOTE: this file is generated by build/ios/update_bundle_filelist.py 47# If it requires updating, you should get a presubmit error with 48# instructions on how to regenerate. Otherwise, do not edit. 49""" % (datetime.datetime.now().year) 50 51_HEADER_PATTERN = re.compile(r"""# Copyright [0-9]+ The Chromium Authors 52# Use of this source code is governed by a BSD-style license that can be 53# found in the LICENSE file. 54# NOTE: this file is generated by build/ios/update_bundle_filelist.py 55# If it requires updating, you should get a presubmit error with 56# instructions on how to regenerate. Otherwise, do not edit. 57""") 58 59_HEADER_HEIGHT = 6 60 61_START_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR = '# push(ignore-relative)' 62_STOP_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR = '# pop(ignore-relative)' 63 64 65def parse_filelist(filelist_name): 66 try: 67 with open(filelist_name) as filelist: 68 unfiltered = [l for l in filelist] 69 header = ''.join(unfiltered[:_HEADER_HEIGHT]) 70 files = sorted(l.strip() for l in unfiltered[_HEADER_HEIGHT:]) 71 return (files, header) 72 except Exception as e: 73 print_error(f'Could not read file list: {filelist_name}', f'{type(e)}: {e}') 74 return [] 75 76 77def get_git_command_name(): 78 if sys.platform.startswith('win'): 79 return 'git.bat' 80 return 'git' 81 82 83def get_tracked_files(directory, globroot, repository_root_relative, verbose): 84 try: 85 if os.getcwd().startswith('/google/cog/cloud'): 86 files = [] 87 for root, _, filenames in os.walk(directory): 88 files.extend([os.path.join(root, f) for f in filenames]) 89 return set(files) 90 cmd = [get_git_command_name(), 'ls-files', '--error-unmatch', directory] 91 with subprocess.Popen(cmd, 92 stdout=subprocess.PIPE, 93 stderr=subprocess.PIPE, 94 cwd=globroot) as p: 95 output = p.communicate() 96 if p.returncode != 0: 97 if verbose: 98 print_error( 99 f'Could not gather a list of tracked files in {directory}', 100 f'{output[1]}') 101 return set() 102 103 files = [f.decode('utf-8') for f in output[0].splitlines()] 104 105 # Need paths to be relative to directory in order to match expansions. 106 # This should happen naturally due to cwd above, but we need to take 107 # special care if relative to the repository root. 108 if repository_root_relative: 109 files = ['//' + f for f in files] 110 111 # Handle Windows backslashes 112 files = [f.replace('\\', '/') for f in files] 113 114 return set(files) 115 116 except Exception as e: 117 if verbose: 118 print_error(f'Could not gather a list of tracked files in {directory}', 119 f'{type(e)}: {e}') 120 return set() 121 122 123def combine_potentially_repository_root_relative_paths(a, b): 124 if b.startswith('//'): 125 # If b is relative to the repository root, os.path will consider it absolute 126 # and os.path.join will fail. In this case, we can simply concatenate the 127 # paths. 128 return (a + b, True) 129 else: 130 return (os.path.join(a, b), False) 131 132 133def parse_and_expand_globlist(globlist_name, glob_root): 134 # The following expects glob_root not to end in a trailing slash. 135 if glob_root.endswith('/'): 136 glob_root = glob_root[:-1] 137 138 check_expansions_outside_globlist_dir = True 139 globlist_dir = os.path.dirname(globlist_name) 140 141 with open(globlist_name) as globlist: 142 # Paths in |files| and |to_check| must use unix separators. Using a set 143 # ensures no unwanted duplicates. The files in |to_check| must be in the 144 # globroot or a subdirectory. 145 files = set() 146 to_check = set() 147 for g in globlist: 148 g = g.strip() 149 150 # Ignore blank lines 151 if not g: 152 continue 153 154 # Toggle error checking. 155 if g == _START_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR: 156 check_expansions_outside_globlist_dir = False 157 elif g == _STOP_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR: 158 check_expansions_outside_globlist_dir = True 159 160 # Ignore comments. 161 if not g or g.startswith('#'): 162 continue 163 164 # Exclusions are prefixed with '-'. 165 is_exclusion = g.startswith('-') 166 if is_exclusion: 167 g = g[1:] 168 169 (combined, 170 root_relative) = combine_potentially_repository_root_relative_paths( 171 glob_root, g) 172 173 prefix_size = len(glob_root) 174 if not root_relative: 175 # We need to account for the separator. 176 prefix_size += 1 177 178 expansion = glob.glob(combined, recursive=True) 179 180 # Filter out directories. 181 expansion = [f for f in expansion if os.path.isfile(f)] 182 183 if check_expansions_outside_globlist_dir: 184 for f in expansion: 185 relative = os.path.relpath(f, globlist_dir) 186 if relative.startswith('..'): 187 raise Exception(f'Globlist expansion outside globlist dir: {f}') 188 189 # Make relative to |glob_root|. 190 expansion = [f[prefix_size:] for f in expansion] 191 192 # Handle Windows backslashes 193 expansion = [f.replace('\\', '/') for f in expansion] 194 195 # Since paths in |expansion| only use unix separators, it is safe to 196 # compare for both the purpose of exclusion and addition. 197 if is_exclusion: 198 files = files.difference(expansion) 199 else: 200 files = files.union(expansion) 201 202 # Return a sorted list. 203 return sorted(files) 204 205 206def compare_lists(a, b): 207 differ = difflib.Differ() 208 full_diff = differ.compare(a, b) 209 lines = [d for d in full_diff if not d.startswith(' ')] 210 additions = [l[2:] for l in lines if l.startswith('+ ')] 211 removals = [l[2:] for l in lines if l.startswith('- ')] 212 return (additions, removals) 213 214 215def write_filelist(filelist_name, files, header): 216 try: 217 with open(filelist_name, 'w', encoding='utf-8', newline='') as filelist: 218 if not _HEADER_PATTERN.search(header): 219 header = _HEADER 220 filelist.write(header) 221 for file in files: 222 filelist.write(f'{file}\n') 223 except Exception as e: 224 print_error(f'Could not write file list: {filelist_name}', 225 f'{type(e)}: {e}') 226 return [] 227 228 229def process_filelist(filelist, globlist, globroot, check=False, verbose=False): 230 files_from_globlist = [] 231 try: 232 files_from_globlist = parse_and_expand_globlist(globlist, globroot) 233 except Exception as e: 234 if verbose: 235 print_error(f'Could not read glob list: {globlist}', f'{type(e)}: {e}') 236 return 1 237 238 (files, header) = parse_filelist(filelist) 239 240 (additions, removals) = compare_lists(files, files_from_globlist) 241 to_ignore = set() 242 243 # Ignore additions of untracked files. 244 if additions: 245 directories = set([os.path.dirname(f) for f in additions]) 246 tracked_files = set() 247 for d in directories: 248 (combined, 249 root_relative) = combine_potentially_repository_root_relative_paths( 250 globroot, d) 251 relative = os.path.relpath(combined, globroot) 252 tracked_files = tracked_files.union( 253 get_tracked_files(relative, globroot, root_relative, verbose)) 254 to_ignore = set(additions).difference(tracked_files) 255 additions = [f for f in additions if f in tracked_files] 256 257 files_from_globlist = [f for f in files_from_globlist if f not in to_ignore] 258 259 if check: 260 if not _HEADER_PATTERN.search(header): 261 if verbose: 262 print_error(f'Unexpected header for {filelist}', f'{header}') 263 return 1 264 if not additions and not removals: 265 return 0 266 if verbose: 267 pretty_additions = ['+ ' + f for f in additions] 268 pretty_removals = ['- ' + f for f in removals] 269 pretty_diff = '\n'.join(pretty_additions + pretty_removals) 270 print_error('File list does not match glob expansion', f'{pretty_diff}') 271 return 1 272 else: 273 write_filelist(filelist, files_from_globlist, header) 274 return 0 275 276 277def main(args): 278 parser = argparse.ArgumentParser( 279 description=__doc__, formatter_class=argparse.RawTextHelpFormatter) 280 parser.add_argument('filelist', help='Contains one file per line') 281 parser.add_argument('globlist', 282 help='Contains globs that, when expanded, ' 283 'should match the filelist. Use ' 284 '--help for details on syntax') 285 parser.add_argument('globroot', 286 help='Directory from which globs are relative') 287 parser.add_argument('-c', 288 '--check', 289 action='store_true', 290 help='Prevents modifying the file list') 291 parser.add_argument('-v', 292 '--verbose', 293 action='store_true', 294 help='Use this to print details on differences') 295 args = parser.parse_args() 296 return process_filelist(args.filelist, 297 args.globlist, 298 args.globroot, 299 check=args.check, 300 verbose=args.verbose) 301 302 303def print_error(error_message, error_info): 304 """ Print the `error_message` with additional `error_info` """ 305 color_start, color_end = adapted_color_for_output(TERMINAL_ERROR_COLOR, 306 TERMINAL_RESET_COLOR) 307 308 error_message = color_start + 'ERROR: ' + error_message + color_end 309 if len(error_info) > 0: 310 error_message = error_message + '\n' + error_info 311 print(error_message, file=sys.stderr) 312 313 314def adapted_color_for_output(color_start, color_end): 315 """ Returns a the `color_start`, `color_end` tuple if the output is a 316 terminal, or empty strings otherwise """ 317 if not sys.stdout.isatty(): 318 return '', '' 319 return color_start, color_end 320 321 322if __name__ == '__main__': 323 sys.exit(main(sys.argv[1:])) 324