1# -*- coding: utf-8 -*- 2# Copyright 2020 The ChromiumOS Authors 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5# 6# This script is used to help the compiler wrapper in the ChromeOS and 7# Android build systems bisect for bad object files. 8 9"""Utilities for bisection of ChromeOS and Android object files. 10 11This module contains a set of utilities to allow bisection between 12two sets (good and bad) of object files. Mostly used to find compiler 13bugs. 14 15Reference page: 16https://sites.google.com/a/google.com/chromeos-toolchain-team-home2/home/team-tools-and-scripts/bisecting-chromeos-compiler-problems/bisection-compiler-wrapper 17 18Design doc: 19https://docs.google.com/document/d/1yDgaUIa2O5w6dc3sSTe1ry-1ehKajTGJGQCbyn0fcEM 20""" 21 22 23import contextlib 24import fcntl 25import os 26import shutil 27import stat 28import subprocess 29import sys 30 31 32VALID_MODES = ("POPULATE_GOOD", "POPULATE_BAD", "TRIAGE") 33GOOD_CACHE = "good" 34BAD_CACHE = "bad" 35LIST_FILE = os.path.join(GOOD_CACHE, "_LIST") 36 37CONTINUE_ON_MISSING = os.environ.get("BISECT_CONTINUE_ON_MISSING", None) == "1" 38CONTINUE_ON_REDUNDANCY = ( 39 os.environ.get("BISECT_CONTINUE_ON_REDUNDANCY", None) == "1" 40) 41WRAPPER_SAFE_MODE = os.environ.get("BISECT_WRAPPER_SAFE_MODE", None) == "1" 42 43 44class Error(Exception): 45 """The general compiler wrapper error class.""" 46 47 48@contextlib.contextmanager 49def lock_file(path, mode): 50 """Lock file and block if other process has lock on file. 51 52 Acquire exclusive lock for file. Only blocks other processes if they attempt 53 to also acquire lock through this method. If only reading (modes 'r' and 'rb') 54 then the lock is shared (i.e. many reads can happen concurrently, but only one 55 process may write at a time). 56 57 This function is a contextmanager, meaning it's meant to be used with the 58 "with" statement in Python. This is so cleanup and setup happens automatically 59 and cleanly. Execution of the outer "with" statement happens at the "yield" 60 statement. Execution resumes after the yield when the outer "with" statement 61 ends. 62 63 Args: 64 path: path to file being locked 65 mode: mode to open file with ('w', 'r', etc.) 66 """ 67 with open(path, mode) as f: 68 # Apply FD_CLOEXEC argument to fd. This ensures that the file descriptor 69 # won't be leaked to any child processes. 70 current_args = fcntl.fcntl(f.fileno(), fcntl.F_GETFD) 71 fcntl.fcntl(f.fileno(), fcntl.F_SETFD, current_args | fcntl.FD_CLOEXEC) 72 73 # Reads can share the lock as no race conditions exist. If write is needed, 74 # give writing process exclusive access to the file. 75 if f.mode == "r" or f.mode == "rb": 76 lock_type = fcntl.LOCK_SH 77 else: 78 lock_type = fcntl.LOCK_EX 79 80 try: 81 fcntl.lockf(f, lock_type) 82 yield f 83 f.flush() 84 finally: 85 fcntl.lockf(f, fcntl.LOCK_UN) 86 87 88def log_to_file(path, execargs, link_from=None, link_to=None): 89 """Common logging function. 90 91 Log current working directory, current execargs, and a from-to relationship 92 between files. 93 """ 94 with lock_file(path, "a") as log: 95 log.write("cd: %s; %s\n" % (os.getcwd(), " ".join(execargs))) 96 if link_from and link_to: 97 log.write("%s -> %s\n" % (link_from, link_to)) 98 99 100def exec_and_return(execargs): 101 """Execute process and return. 102 103 Execute according to execargs and return immediately. Don't inspect 104 stderr or stdout. 105 """ 106 return subprocess.call(execargs) 107 108 109def which_cache(obj_file): 110 """Determine which cache an object belongs to. 111 112 The binary search tool creates two files for each search iteration listing 113 the full set of bad objects and full set of good objects. We use this to 114 determine where an object file should be linked from (good or bad). 115 """ 116 bad_set_file = os.environ.get("BISECT_BAD_SET") 117 if in_object_list(obj_file, bad_set_file): 118 return BAD_CACHE 119 else: 120 return GOOD_CACHE 121 122 123def makedirs(path): 124 """Try to create directories in path.""" 125 try: 126 os.makedirs(path) 127 except os.error: 128 if not os.path.isdir(path): 129 raise 130 131 132def get_obj_path(execargs): 133 """Get the object path for the object file in the list of arguments. 134 135 Returns: 136 Absolute object path from execution args (-o argument). If no object being 137 outputted, then return empty string. -o argument is checked only if -c is 138 also present. 139 """ 140 try: 141 i = execargs.index("-o") 142 _ = execargs.index("-c") 143 except ValueError: 144 return "" 145 146 obj_path = execargs[i + 1] 147 # Ignore args that do not create a file. 148 if obj_path in ( 149 "-", 150 "/dev/null", 151 ): 152 return "" 153 # Ignore files ending in .tmp. 154 if obj_path.endswith((".tmp",)): 155 return "" 156 # Ignore configuration files generated by Automake/Autoconf/CMake etc. 157 if ( 158 obj_path.endswith("conftest.o") 159 or obj_path.endswith("CMakeFiles/test.o") 160 or obj_path.find("CMakeTmp") != -1 161 or os.path.abspath(obj_path).find("CMakeTmp") != -1 162 ): 163 return "" 164 165 return os.path.abspath(obj_path) 166 167 168def get_dep_path(execargs): 169 """Get the dep file path for the dep file in the list of arguments. 170 171 Returns: 172 Absolute path of dependency file path from execution args (-o argument). If 173 no dependency being outputted then return empty string. 174 """ 175 if "-MD" not in execargs and "-MMD" not in execargs: 176 return "" 177 178 # If -MF is given this is the path of the dependency file. Otherwise the 179 # dependency file is the value of -o but with a .d extension 180 if "-MF" in execargs: 181 i = execargs.index("-MF") 182 dep_path = execargs[i + 1] 183 return os.path.abspath(dep_path) 184 185 full_obj_path = get_obj_path(execargs) 186 if not full_obj_path: 187 return "" 188 189 return full_obj_path[:-2] + ".d" 190 191 192def get_dwo_path(execargs): 193 """Get the dwo file path for the dwo file in the list of arguments. 194 195 Returns: 196 Absolute dwo file path from execution args (-gsplit-dwarf argument) If no 197 dwo file being outputted then return empty string. 198 """ 199 if "-gsplit-dwarf" not in execargs: 200 return "" 201 202 full_obj_path = get_obj_path(execargs) 203 if not full_obj_path: 204 return "" 205 206 return full_obj_path[:-2] + ".dwo" 207 208 209def in_object_list(obj_name, list_filename): 210 """Check if object file name exist in file with object list.""" 211 if not obj_name: 212 return False 213 214 with lock_file(list_filename, "r") as list_file: 215 for line in list_file: 216 if line.strip() == obj_name: 217 return True 218 219 return False 220 221 222def get_side_effects(execargs): 223 """Determine side effects generated by compiler 224 225 Returns: 226 List of paths of objects that the compiler generates as side effects. 227 """ 228 side_effects = [] 229 230 # Cache dependency files 231 full_dep_path = get_dep_path(execargs) 232 if full_dep_path: 233 side_effects.append(full_dep_path) 234 235 # Cache dwo files 236 full_dwo_path = get_dwo_path(execargs) 237 if full_dwo_path: 238 side_effects.append(full_dwo_path) 239 240 return side_effects 241 242 243def cache_file(execargs, bisect_dir, cache, abs_file_path): 244 """Cache compiler output file (.o/.d/.dwo). 245 246 Args: 247 execargs: compiler execution arguments. 248 bisect_dir: The directory where bisection caches live. 249 cache: Which cache the file will be cached to (GOOD/BAD). 250 abs_file_path: Absolute path to file being cached. 251 252 Returns: 253 True if caching was successful, False otherwise. 254 """ 255 # os.path.join fails with absolute paths, use + instead 256 bisect_path = os.path.join(bisect_dir, cache) + abs_file_path 257 bisect_path_dir = os.path.dirname(bisect_path) 258 makedirs(bisect_path_dir) 259 pop_log = os.path.join(bisect_dir, cache, "_POPULATE_LOG") 260 log_to_file(pop_log, execargs, abs_file_path, bisect_path) 261 262 try: 263 if os.path.exists(abs_file_path): 264 if os.path.exists(bisect_path): 265 # File exists 266 population_dir = os.path.join(bisect_dir, cache) 267 with lock_file( 268 os.path.join(population_dir, "_DUPS"), "a" 269 ) as dup_object_list: 270 dup_object_list.write("%s\n" % abs_file_path) 271 if CONTINUE_ON_REDUNDANCY: 272 return True 273 raise Exception( 274 "Trying to cache file %s multiple times. To avoid the error, set " 275 "BISECT_CONTINUE_ON_REDUNDANCY to 1. For reference, the list of " 276 "such files will be written to %s" 277 % (abs_file_path, os.path.join(population_dir, "_DUPS")) 278 ) 279 280 shutil.copy2(abs_file_path, bisect_path) 281 # Set cache object to be read-only so later compilations can't 282 # accidentally overwrite it. 283 os.chmod(bisect_path, 0o444) 284 return True 285 else: 286 # File not found (happens when compilation fails but error code is still 287 # 0) 288 return False 289 except Exception: 290 print("Could not cache file %s" % abs_file_path, file=sys.stderr) 291 raise 292 293 294def restore_file(bisect_dir, cache, abs_file_path): 295 """Restore file from cache (.o/.d/.dwo). 296 297 Args: 298 bisect_dir: The directory where bisection caches live. 299 cache: Which cache the file will be restored from (GOOD/BAD). 300 abs_file_path: Absolute path to file being restored. 301 """ 302 # os.path.join fails with absolute paths, use + instead 303 cached_path = os.path.join(bisect_dir, cache) + abs_file_path 304 if os.path.exists(cached_path): 305 if os.path.exists(abs_file_path): 306 os.remove(abs_file_path) 307 shutil.copy2(cached_path, abs_file_path) 308 # Add write permission to the restored object files as some packages 309 # (such as kernels) may need write permission to delete files. 310 os.chmod(abs_file_path, os.stat(abs_file_path).st_mode | stat.S_IWUSR) 311 else: 312 raise Error( 313 ( 314 "%s is missing from %s cache! Unsure how to proceed. Make " 315 "will now crash." % (cache, cached_path) 316 ) 317 ) 318 319 320def bisect_populate(execargs, bisect_dir, population_name): 321 """Add necessary information to the bisect cache for the given execution. 322 323 Extract the necessary information for bisection from the compiler 324 execution arguments and put it into the bisection cache. This 325 includes copying the created object file, adding the object 326 file path to the cache list and keeping a log of the execution. 327 328 Args: 329 execargs: compiler execution arguments. 330 bisect_dir: bisection directory. 331 population_name: name of the cache being populated (good/bad). 332 """ 333 retval = exec_and_return(execargs) 334 if retval: 335 return retval 336 337 full_obj_path = get_obj_path(execargs) 338 # This is not a normal compiler call because it doesn't have a -o argument, 339 # or the -o argument has an unusable output file. 340 # It's likely that this compiler call was actually made to invoke the linker, 341 # or as part of a configuratoin test. In this case we want to simply call the 342 # compiler and return. 343 if not full_obj_path: 344 return retval 345 346 # Return if not able to cache the object file 347 if not cache_file(execargs, bisect_dir, population_name, full_obj_path): 348 return retval 349 350 population_dir = os.path.join(bisect_dir, population_name) 351 with lock_file(os.path.join(population_dir, "_LIST"), "a") as object_list: 352 object_list.write("%s\n" % full_obj_path) 353 354 for side_effect in get_side_effects(execargs): 355 _ = cache_file(execargs, bisect_dir, population_name, side_effect) 356 357 return retval 358 359 360def bisect_triage(execargs, bisect_dir): 361 """Use object object file from appropriate cache (good/bad). 362 363 Given a populated bisection directory, use the object file saved 364 into one of the caches (good/bad) according to what is specified 365 in the good/bad sets. The good/bad sets are generated by the 366 high level binary search tool. Additionally restore any possible 367 side effects of compiler. 368 369 Args: 370 execargs: compiler execution arguments. 371 bisect_dir: populated bisection directory. 372 """ 373 full_obj_path = get_obj_path(execargs) 374 obj_list = os.path.join(bisect_dir, LIST_FILE) 375 376 # If the output isn't an object file just call compiler 377 if not full_obj_path: 378 return exec_and_return(execargs) 379 380 # If this isn't a bisected object just call compiler 381 # This shouldn't happen! 382 if not in_object_list(full_obj_path, obj_list): 383 if CONTINUE_ON_MISSING: 384 log_file = os.path.join(bisect_dir, "_MISSING_CACHED_OBJ_LOG") 385 log_to_file(log_file, execargs, "? compiler", full_obj_path) 386 return exec_and_return(execargs) 387 else: 388 raise Error( 389 ( 390 "%s is missing from cache! To ignore export " 391 "BISECT_CONTINUE_ON_MISSING=1. See documentation for more " 392 "details on this option." % full_obj_path 393 ) 394 ) 395 396 cache = which_cache(full_obj_path) 397 398 # If using safe WRAPPER_SAFE_MODE option call compiler and overwrite the 399 # result from the good/bad cache. This option is safe and covers all compiler 400 # side effects, but is very slow! 401 if WRAPPER_SAFE_MODE: 402 retval = exec_and_return(execargs) 403 if retval: 404 return retval 405 os.remove(full_obj_path) 406 restore_file(bisect_dir, cache, full_obj_path) 407 return retval 408 409 # Generate compiler side effects. Trick Make into thinking compiler was 410 # actually executed. 411 for side_effect in get_side_effects(execargs): 412 restore_file(bisect_dir, cache, side_effect) 413 414 # If generated object file happened to be pruned/cleaned by Make then link it 415 # over from cache again. 416 if not os.path.exists(full_obj_path): 417 restore_file(bisect_dir, cache, full_obj_path) 418 419 return 0 420 421 422def bisect_driver(bisect_stage, bisect_dir, execargs): 423 """Call appropriate bisection stage according to value in bisect_stage.""" 424 if bisect_stage == "POPULATE_GOOD": 425 return bisect_populate(execargs, bisect_dir, GOOD_CACHE) 426 elif bisect_stage == "POPULATE_BAD": 427 return bisect_populate(execargs, bisect_dir, BAD_CACHE) 428 elif bisect_stage == "TRIAGE": 429 return bisect_triage(execargs, bisect_dir) 430 else: 431 raise ValueError("wrong value for BISECT_STAGE: %s" % bisect_stage) 432