1*760c253cSXin Li# -*- coding: utf-8 -*- 2*760c253cSXin Li# Copyright 2020 The ChromiumOS Authors 3*760c253cSXin Li# Use of this source code is governed by a BSD-style license that can be 4*760c253cSXin Li# found in the LICENSE file. 5*760c253cSXin Li# 6*760c253cSXin Li# This script is used to help the compiler wrapper in the ChromeOS and 7*760c253cSXin Li# Android build systems bisect for bad object files. 8*760c253cSXin Li 9*760c253cSXin Li"""Utilities for bisection of ChromeOS and Android object files. 10*760c253cSXin Li 11*760c253cSXin LiThis module contains a set of utilities to allow bisection between 12*760c253cSXin Litwo sets (good and bad) of object files. Mostly used to find compiler 13*760c253cSXin Libugs. 14*760c253cSXin Li 15*760c253cSXin LiReference page: 16*760c253cSXin Lihttps://sites.google.com/a/google.com/chromeos-toolchain-team-home2/home/team-tools-and-scripts/bisecting-chromeos-compiler-problems/bisection-compiler-wrapper 17*760c253cSXin Li 18*760c253cSXin LiDesign doc: 19*760c253cSXin Lihttps://docs.google.com/document/d/1yDgaUIa2O5w6dc3sSTe1ry-1ehKajTGJGQCbyn0fcEM 20*760c253cSXin Li""" 21*760c253cSXin Li 22*760c253cSXin Li 23*760c253cSXin Liimport contextlib 24*760c253cSXin Liimport fcntl 25*760c253cSXin Liimport os 26*760c253cSXin Liimport shutil 27*760c253cSXin Liimport stat 28*760c253cSXin Liimport subprocess 29*760c253cSXin Liimport sys 30*760c253cSXin Li 31*760c253cSXin Li 32*760c253cSXin LiVALID_MODES = ("POPULATE_GOOD", "POPULATE_BAD", "TRIAGE") 33*760c253cSXin LiGOOD_CACHE = "good" 34*760c253cSXin LiBAD_CACHE = "bad" 35*760c253cSXin LiLIST_FILE = os.path.join(GOOD_CACHE, "_LIST") 36*760c253cSXin Li 37*760c253cSXin LiCONTINUE_ON_MISSING = os.environ.get("BISECT_CONTINUE_ON_MISSING", None) == "1" 38*760c253cSXin LiCONTINUE_ON_REDUNDANCY = ( 39*760c253cSXin Li os.environ.get("BISECT_CONTINUE_ON_REDUNDANCY", None) == "1" 40*760c253cSXin Li) 41*760c253cSXin LiWRAPPER_SAFE_MODE = os.environ.get("BISECT_WRAPPER_SAFE_MODE", None) == "1" 42*760c253cSXin Li 43*760c253cSXin Li 44*760c253cSXin Liclass Error(Exception): 45*760c253cSXin Li """The general compiler wrapper error class.""" 46*760c253cSXin Li 47*760c253cSXin Li 48*760c253cSXin Li@contextlib.contextmanager 49*760c253cSXin Lidef lock_file(path, mode): 50*760c253cSXin Li """Lock file and block if other process has lock on file. 51*760c253cSXin Li 52*760c253cSXin Li Acquire exclusive lock for file. Only blocks other processes if they attempt 53*760c253cSXin Li to also acquire lock through this method. If only reading (modes 'r' and 'rb') 54*760c253cSXin Li then the lock is shared (i.e. many reads can happen concurrently, but only one 55*760c253cSXin Li process may write at a time). 56*760c253cSXin Li 57*760c253cSXin Li This function is a contextmanager, meaning it's meant to be used with the 58*760c253cSXin Li "with" statement in Python. This is so cleanup and setup happens automatically 59*760c253cSXin Li and cleanly. Execution of the outer "with" statement happens at the "yield" 60*760c253cSXin Li statement. Execution resumes after the yield when the outer "with" statement 61*760c253cSXin Li ends. 62*760c253cSXin Li 63*760c253cSXin Li Args: 64*760c253cSXin Li path: path to file being locked 65*760c253cSXin Li mode: mode to open file with ('w', 'r', etc.) 66*760c253cSXin Li """ 67*760c253cSXin Li with open(path, mode) as f: 68*760c253cSXin Li # Apply FD_CLOEXEC argument to fd. This ensures that the file descriptor 69*760c253cSXin Li # won't be leaked to any child processes. 70*760c253cSXin Li current_args = fcntl.fcntl(f.fileno(), fcntl.F_GETFD) 71*760c253cSXin Li fcntl.fcntl(f.fileno(), fcntl.F_SETFD, current_args | fcntl.FD_CLOEXEC) 72*760c253cSXin Li 73*760c253cSXin Li # Reads can share the lock as no race conditions exist. If write is needed, 74*760c253cSXin Li # give writing process exclusive access to the file. 75*760c253cSXin Li if f.mode == "r" or f.mode == "rb": 76*760c253cSXin Li lock_type = fcntl.LOCK_SH 77*760c253cSXin Li else: 78*760c253cSXin Li lock_type = fcntl.LOCK_EX 79*760c253cSXin Li 80*760c253cSXin Li try: 81*760c253cSXin Li fcntl.lockf(f, lock_type) 82*760c253cSXin Li yield f 83*760c253cSXin Li f.flush() 84*760c253cSXin Li finally: 85*760c253cSXin Li fcntl.lockf(f, fcntl.LOCK_UN) 86*760c253cSXin Li 87*760c253cSXin Li 88*760c253cSXin Lidef log_to_file(path, execargs, link_from=None, link_to=None): 89*760c253cSXin Li """Common logging function. 90*760c253cSXin Li 91*760c253cSXin Li Log current working directory, current execargs, and a from-to relationship 92*760c253cSXin Li between files. 93*760c253cSXin Li """ 94*760c253cSXin Li with lock_file(path, "a") as log: 95*760c253cSXin Li log.write("cd: %s; %s\n" % (os.getcwd(), " ".join(execargs))) 96*760c253cSXin Li if link_from and link_to: 97*760c253cSXin Li log.write("%s -> %s\n" % (link_from, link_to)) 98*760c253cSXin Li 99*760c253cSXin Li 100*760c253cSXin Lidef exec_and_return(execargs): 101*760c253cSXin Li """Execute process and return. 102*760c253cSXin Li 103*760c253cSXin Li Execute according to execargs and return immediately. Don't inspect 104*760c253cSXin Li stderr or stdout. 105*760c253cSXin Li """ 106*760c253cSXin Li return subprocess.call(execargs) 107*760c253cSXin Li 108*760c253cSXin Li 109*760c253cSXin Lidef which_cache(obj_file): 110*760c253cSXin Li """Determine which cache an object belongs to. 111*760c253cSXin Li 112*760c253cSXin Li The binary search tool creates two files for each search iteration listing 113*760c253cSXin Li the full set of bad objects and full set of good objects. We use this to 114*760c253cSXin Li determine where an object file should be linked from (good or bad). 115*760c253cSXin Li """ 116*760c253cSXin Li bad_set_file = os.environ.get("BISECT_BAD_SET") 117*760c253cSXin Li if in_object_list(obj_file, bad_set_file): 118*760c253cSXin Li return BAD_CACHE 119*760c253cSXin Li else: 120*760c253cSXin Li return GOOD_CACHE 121*760c253cSXin Li 122*760c253cSXin Li 123*760c253cSXin Lidef makedirs(path): 124*760c253cSXin Li """Try to create directories in path.""" 125*760c253cSXin Li try: 126*760c253cSXin Li os.makedirs(path) 127*760c253cSXin Li except os.error: 128*760c253cSXin Li if not os.path.isdir(path): 129*760c253cSXin Li raise 130*760c253cSXin Li 131*760c253cSXin Li 132*760c253cSXin Lidef get_obj_path(execargs): 133*760c253cSXin Li """Get the object path for the object file in the list of arguments. 134*760c253cSXin Li 135*760c253cSXin Li Returns: 136*760c253cSXin Li Absolute object path from execution args (-o argument). If no object being 137*760c253cSXin Li outputted, then return empty string. -o argument is checked only if -c is 138*760c253cSXin Li also present. 139*760c253cSXin Li """ 140*760c253cSXin Li try: 141*760c253cSXin Li i = execargs.index("-o") 142*760c253cSXin Li _ = execargs.index("-c") 143*760c253cSXin Li except ValueError: 144*760c253cSXin Li return "" 145*760c253cSXin Li 146*760c253cSXin Li obj_path = execargs[i + 1] 147*760c253cSXin Li # Ignore args that do not create a file. 148*760c253cSXin Li if obj_path in ( 149*760c253cSXin Li "-", 150*760c253cSXin Li "/dev/null", 151*760c253cSXin Li ): 152*760c253cSXin Li return "" 153*760c253cSXin Li # Ignore files ending in .tmp. 154*760c253cSXin Li if obj_path.endswith((".tmp",)): 155*760c253cSXin Li return "" 156*760c253cSXin Li # Ignore configuration files generated by Automake/Autoconf/CMake etc. 157*760c253cSXin Li if ( 158*760c253cSXin Li obj_path.endswith("conftest.o") 159*760c253cSXin Li or obj_path.endswith("CMakeFiles/test.o") 160*760c253cSXin Li or obj_path.find("CMakeTmp") != -1 161*760c253cSXin Li or os.path.abspath(obj_path).find("CMakeTmp") != -1 162*760c253cSXin Li ): 163*760c253cSXin Li return "" 164*760c253cSXin Li 165*760c253cSXin Li return os.path.abspath(obj_path) 166*760c253cSXin Li 167*760c253cSXin Li 168*760c253cSXin Lidef get_dep_path(execargs): 169*760c253cSXin Li """Get the dep file path for the dep file in the list of arguments. 170*760c253cSXin Li 171*760c253cSXin Li Returns: 172*760c253cSXin Li Absolute path of dependency file path from execution args (-o argument). If 173*760c253cSXin Li no dependency being outputted then return empty string. 174*760c253cSXin Li """ 175*760c253cSXin Li if "-MD" not in execargs and "-MMD" not in execargs: 176*760c253cSXin Li return "" 177*760c253cSXin Li 178*760c253cSXin Li # If -MF is given this is the path of the dependency file. Otherwise the 179*760c253cSXin Li # dependency file is the value of -o but with a .d extension 180*760c253cSXin Li if "-MF" in execargs: 181*760c253cSXin Li i = execargs.index("-MF") 182*760c253cSXin Li dep_path = execargs[i + 1] 183*760c253cSXin Li return os.path.abspath(dep_path) 184*760c253cSXin Li 185*760c253cSXin Li full_obj_path = get_obj_path(execargs) 186*760c253cSXin Li if not full_obj_path: 187*760c253cSXin Li return "" 188*760c253cSXin Li 189*760c253cSXin Li return full_obj_path[:-2] + ".d" 190*760c253cSXin Li 191*760c253cSXin Li 192*760c253cSXin Lidef get_dwo_path(execargs): 193*760c253cSXin Li """Get the dwo file path for the dwo file in the list of arguments. 194*760c253cSXin Li 195*760c253cSXin Li Returns: 196*760c253cSXin Li Absolute dwo file path from execution args (-gsplit-dwarf argument) If no 197*760c253cSXin Li dwo file being outputted then return empty string. 198*760c253cSXin Li """ 199*760c253cSXin Li if "-gsplit-dwarf" not in execargs: 200*760c253cSXin Li return "" 201*760c253cSXin Li 202*760c253cSXin Li full_obj_path = get_obj_path(execargs) 203*760c253cSXin Li if not full_obj_path: 204*760c253cSXin Li return "" 205*760c253cSXin Li 206*760c253cSXin Li return full_obj_path[:-2] + ".dwo" 207*760c253cSXin Li 208*760c253cSXin Li 209*760c253cSXin Lidef in_object_list(obj_name, list_filename): 210*760c253cSXin Li """Check if object file name exist in file with object list.""" 211*760c253cSXin Li if not obj_name: 212*760c253cSXin Li return False 213*760c253cSXin Li 214*760c253cSXin Li with lock_file(list_filename, "r") as list_file: 215*760c253cSXin Li for line in list_file: 216*760c253cSXin Li if line.strip() == obj_name: 217*760c253cSXin Li return True 218*760c253cSXin Li 219*760c253cSXin Li return False 220*760c253cSXin Li 221*760c253cSXin Li 222*760c253cSXin Lidef get_side_effects(execargs): 223*760c253cSXin Li """Determine side effects generated by compiler 224*760c253cSXin Li 225*760c253cSXin Li Returns: 226*760c253cSXin Li List of paths of objects that the compiler generates as side effects. 227*760c253cSXin Li """ 228*760c253cSXin Li side_effects = [] 229*760c253cSXin Li 230*760c253cSXin Li # Cache dependency files 231*760c253cSXin Li full_dep_path = get_dep_path(execargs) 232*760c253cSXin Li if full_dep_path: 233*760c253cSXin Li side_effects.append(full_dep_path) 234*760c253cSXin Li 235*760c253cSXin Li # Cache dwo files 236*760c253cSXin Li full_dwo_path = get_dwo_path(execargs) 237*760c253cSXin Li if full_dwo_path: 238*760c253cSXin Li side_effects.append(full_dwo_path) 239*760c253cSXin Li 240*760c253cSXin Li return side_effects 241*760c253cSXin Li 242*760c253cSXin Li 243*760c253cSXin Lidef cache_file(execargs, bisect_dir, cache, abs_file_path): 244*760c253cSXin Li """Cache compiler output file (.o/.d/.dwo). 245*760c253cSXin Li 246*760c253cSXin Li Args: 247*760c253cSXin Li execargs: compiler execution arguments. 248*760c253cSXin Li bisect_dir: The directory where bisection caches live. 249*760c253cSXin Li cache: Which cache the file will be cached to (GOOD/BAD). 250*760c253cSXin Li abs_file_path: Absolute path to file being cached. 251*760c253cSXin Li 252*760c253cSXin Li Returns: 253*760c253cSXin Li True if caching was successful, False otherwise. 254*760c253cSXin Li """ 255*760c253cSXin Li # os.path.join fails with absolute paths, use + instead 256*760c253cSXin Li bisect_path = os.path.join(bisect_dir, cache) + abs_file_path 257*760c253cSXin Li bisect_path_dir = os.path.dirname(bisect_path) 258*760c253cSXin Li makedirs(bisect_path_dir) 259*760c253cSXin Li pop_log = os.path.join(bisect_dir, cache, "_POPULATE_LOG") 260*760c253cSXin Li log_to_file(pop_log, execargs, abs_file_path, bisect_path) 261*760c253cSXin Li 262*760c253cSXin Li try: 263*760c253cSXin Li if os.path.exists(abs_file_path): 264*760c253cSXin Li if os.path.exists(bisect_path): 265*760c253cSXin Li # File exists 266*760c253cSXin Li population_dir = os.path.join(bisect_dir, cache) 267*760c253cSXin Li with lock_file( 268*760c253cSXin Li os.path.join(population_dir, "_DUPS"), "a" 269*760c253cSXin Li ) as dup_object_list: 270*760c253cSXin Li dup_object_list.write("%s\n" % abs_file_path) 271*760c253cSXin Li if CONTINUE_ON_REDUNDANCY: 272*760c253cSXin Li return True 273*760c253cSXin Li raise Exception( 274*760c253cSXin Li "Trying to cache file %s multiple times. To avoid the error, set " 275*760c253cSXin Li "BISECT_CONTINUE_ON_REDUNDANCY to 1. For reference, the list of " 276*760c253cSXin Li "such files will be written to %s" 277*760c253cSXin Li % (abs_file_path, os.path.join(population_dir, "_DUPS")) 278*760c253cSXin Li ) 279*760c253cSXin Li 280*760c253cSXin Li shutil.copy2(abs_file_path, bisect_path) 281*760c253cSXin Li # Set cache object to be read-only so later compilations can't 282*760c253cSXin Li # accidentally overwrite it. 283*760c253cSXin Li os.chmod(bisect_path, 0o444) 284*760c253cSXin Li return True 285*760c253cSXin Li else: 286*760c253cSXin Li # File not found (happens when compilation fails but error code is still 287*760c253cSXin Li # 0) 288*760c253cSXin Li return False 289*760c253cSXin Li except Exception: 290*760c253cSXin Li print("Could not cache file %s" % abs_file_path, file=sys.stderr) 291*760c253cSXin Li raise 292*760c253cSXin Li 293*760c253cSXin Li 294*760c253cSXin Lidef restore_file(bisect_dir, cache, abs_file_path): 295*760c253cSXin Li """Restore file from cache (.o/.d/.dwo). 296*760c253cSXin Li 297*760c253cSXin Li Args: 298*760c253cSXin Li bisect_dir: The directory where bisection caches live. 299*760c253cSXin Li cache: Which cache the file will be restored from (GOOD/BAD). 300*760c253cSXin Li abs_file_path: Absolute path to file being restored. 301*760c253cSXin Li """ 302*760c253cSXin Li # os.path.join fails with absolute paths, use + instead 303*760c253cSXin Li cached_path = os.path.join(bisect_dir, cache) + abs_file_path 304*760c253cSXin Li if os.path.exists(cached_path): 305*760c253cSXin Li if os.path.exists(abs_file_path): 306*760c253cSXin Li os.remove(abs_file_path) 307*760c253cSXin Li shutil.copy2(cached_path, abs_file_path) 308*760c253cSXin Li # Add write permission to the restored object files as some packages 309*760c253cSXin Li # (such as kernels) may need write permission to delete files. 310*760c253cSXin Li os.chmod(abs_file_path, os.stat(abs_file_path).st_mode | stat.S_IWUSR) 311*760c253cSXin Li else: 312*760c253cSXin Li raise Error( 313*760c253cSXin Li ( 314*760c253cSXin Li "%s is missing from %s cache! Unsure how to proceed. Make " 315*760c253cSXin Li "will now crash." % (cache, cached_path) 316*760c253cSXin Li ) 317*760c253cSXin Li ) 318*760c253cSXin Li 319*760c253cSXin Li 320*760c253cSXin Lidef bisect_populate(execargs, bisect_dir, population_name): 321*760c253cSXin Li """Add necessary information to the bisect cache for the given execution. 322*760c253cSXin Li 323*760c253cSXin Li Extract the necessary information for bisection from the compiler 324*760c253cSXin Li execution arguments and put it into the bisection cache. This 325*760c253cSXin Li includes copying the created object file, adding the object 326*760c253cSXin Li file path to the cache list and keeping a log of the execution. 327*760c253cSXin Li 328*760c253cSXin Li Args: 329*760c253cSXin Li execargs: compiler execution arguments. 330*760c253cSXin Li bisect_dir: bisection directory. 331*760c253cSXin Li population_name: name of the cache being populated (good/bad). 332*760c253cSXin Li """ 333*760c253cSXin Li retval = exec_and_return(execargs) 334*760c253cSXin Li if retval: 335*760c253cSXin Li return retval 336*760c253cSXin Li 337*760c253cSXin Li full_obj_path = get_obj_path(execargs) 338*760c253cSXin Li # This is not a normal compiler call because it doesn't have a -o argument, 339*760c253cSXin Li # or the -o argument has an unusable output file. 340*760c253cSXin Li # It's likely that this compiler call was actually made to invoke the linker, 341*760c253cSXin Li # or as part of a configuratoin test. In this case we want to simply call the 342*760c253cSXin Li # compiler and return. 343*760c253cSXin Li if not full_obj_path: 344*760c253cSXin Li return retval 345*760c253cSXin Li 346*760c253cSXin Li # Return if not able to cache the object file 347*760c253cSXin Li if not cache_file(execargs, bisect_dir, population_name, full_obj_path): 348*760c253cSXin Li return retval 349*760c253cSXin Li 350*760c253cSXin Li population_dir = os.path.join(bisect_dir, population_name) 351*760c253cSXin Li with lock_file(os.path.join(population_dir, "_LIST"), "a") as object_list: 352*760c253cSXin Li object_list.write("%s\n" % full_obj_path) 353*760c253cSXin Li 354*760c253cSXin Li for side_effect in get_side_effects(execargs): 355*760c253cSXin Li _ = cache_file(execargs, bisect_dir, population_name, side_effect) 356*760c253cSXin Li 357*760c253cSXin Li return retval 358*760c253cSXin Li 359*760c253cSXin Li 360*760c253cSXin Lidef bisect_triage(execargs, bisect_dir): 361*760c253cSXin Li """Use object object file from appropriate cache (good/bad). 362*760c253cSXin Li 363*760c253cSXin Li Given a populated bisection directory, use the object file saved 364*760c253cSXin Li into one of the caches (good/bad) according to what is specified 365*760c253cSXin Li in the good/bad sets. The good/bad sets are generated by the 366*760c253cSXin Li high level binary search tool. Additionally restore any possible 367*760c253cSXin Li side effects of compiler. 368*760c253cSXin Li 369*760c253cSXin Li Args: 370*760c253cSXin Li execargs: compiler execution arguments. 371*760c253cSXin Li bisect_dir: populated bisection directory. 372*760c253cSXin Li """ 373*760c253cSXin Li full_obj_path = get_obj_path(execargs) 374*760c253cSXin Li obj_list = os.path.join(bisect_dir, LIST_FILE) 375*760c253cSXin Li 376*760c253cSXin Li # If the output isn't an object file just call compiler 377*760c253cSXin Li if not full_obj_path: 378*760c253cSXin Li return exec_and_return(execargs) 379*760c253cSXin Li 380*760c253cSXin Li # If this isn't a bisected object just call compiler 381*760c253cSXin Li # This shouldn't happen! 382*760c253cSXin Li if not in_object_list(full_obj_path, obj_list): 383*760c253cSXin Li if CONTINUE_ON_MISSING: 384*760c253cSXin Li log_file = os.path.join(bisect_dir, "_MISSING_CACHED_OBJ_LOG") 385*760c253cSXin Li log_to_file(log_file, execargs, "? compiler", full_obj_path) 386*760c253cSXin Li return exec_and_return(execargs) 387*760c253cSXin Li else: 388*760c253cSXin Li raise Error( 389*760c253cSXin Li ( 390*760c253cSXin Li "%s is missing from cache! To ignore export " 391*760c253cSXin Li "BISECT_CONTINUE_ON_MISSING=1. See documentation for more " 392*760c253cSXin Li "details on this option." % full_obj_path 393*760c253cSXin Li ) 394*760c253cSXin Li ) 395*760c253cSXin Li 396*760c253cSXin Li cache = which_cache(full_obj_path) 397*760c253cSXin Li 398*760c253cSXin Li # If using safe WRAPPER_SAFE_MODE option call compiler and overwrite the 399*760c253cSXin Li # result from the good/bad cache. This option is safe and covers all compiler 400*760c253cSXin Li # side effects, but is very slow! 401*760c253cSXin Li if WRAPPER_SAFE_MODE: 402*760c253cSXin Li retval = exec_and_return(execargs) 403*760c253cSXin Li if retval: 404*760c253cSXin Li return retval 405*760c253cSXin Li os.remove(full_obj_path) 406*760c253cSXin Li restore_file(bisect_dir, cache, full_obj_path) 407*760c253cSXin Li return retval 408*760c253cSXin Li 409*760c253cSXin Li # Generate compiler side effects. Trick Make into thinking compiler was 410*760c253cSXin Li # actually executed. 411*760c253cSXin Li for side_effect in get_side_effects(execargs): 412*760c253cSXin Li restore_file(bisect_dir, cache, side_effect) 413*760c253cSXin Li 414*760c253cSXin Li # If generated object file happened to be pruned/cleaned by Make then link it 415*760c253cSXin Li # over from cache again. 416*760c253cSXin Li if not os.path.exists(full_obj_path): 417*760c253cSXin Li restore_file(bisect_dir, cache, full_obj_path) 418*760c253cSXin Li 419*760c253cSXin Li return 0 420*760c253cSXin Li 421*760c253cSXin Li 422*760c253cSXin Lidef bisect_driver(bisect_stage, bisect_dir, execargs): 423*760c253cSXin Li """Call appropriate bisection stage according to value in bisect_stage.""" 424*760c253cSXin Li if bisect_stage == "POPULATE_GOOD": 425*760c253cSXin Li return bisect_populate(execargs, bisect_dir, GOOD_CACHE) 426*760c253cSXin Li elif bisect_stage == "POPULATE_BAD": 427*760c253cSXin Li return bisect_populate(execargs, bisect_dir, BAD_CACHE) 428*760c253cSXin Li elif bisect_stage == "TRIAGE": 429*760c253cSXin Li return bisect_triage(execargs, bisect_dir) 430*760c253cSXin Li else: 431*760c253cSXin Li raise ValueError("wrong value for BISECT_STAGE: %s" % bisect_stage) 432