xref: /aosp_15_r20/external/toolchain-utils/binary_search_tool/bisect_driver.py (revision 760c253c1ed00ce9abd48f8546f08516e57485fe)
1# -*- coding: utf-8 -*-
2# Copyright 2020 The ChromiumOS Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5#
6# This script is used to help the compiler wrapper in the ChromeOS and
7# Android build systems bisect for bad object files.
8
9"""Utilities for bisection of ChromeOS and Android object files.
10
11This module contains a set of utilities to allow bisection between
12two sets (good and bad) of object files. Mostly used to find compiler
13bugs.
14
15Reference page:
16https://sites.google.com/a/google.com/chromeos-toolchain-team-home2/home/team-tools-and-scripts/bisecting-chromeos-compiler-problems/bisection-compiler-wrapper
17
18Design doc:
19https://docs.google.com/document/d/1yDgaUIa2O5w6dc3sSTe1ry-1ehKajTGJGQCbyn0fcEM
20"""
21
22
23import contextlib
24import fcntl
25import os
26import shutil
27import stat
28import subprocess
29import sys
30
31
32VALID_MODES = ("POPULATE_GOOD", "POPULATE_BAD", "TRIAGE")
33GOOD_CACHE = "good"
34BAD_CACHE = "bad"
35LIST_FILE = os.path.join(GOOD_CACHE, "_LIST")
36
37CONTINUE_ON_MISSING = os.environ.get("BISECT_CONTINUE_ON_MISSING", None) == "1"
38CONTINUE_ON_REDUNDANCY = (
39    os.environ.get("BISECT_CONTINUE_ON_REDUNDANCY", None) == "1"
40)
41WRAPPER_SAFE_MODE = os.environ.get("BISECT_WRAPPER_SAFE_MODE", None) == "1"
42
43
44class Error(Exception):
45    """The general compiler wrapper error class."""
46
47
48@contextlib.contextmanager
49def lock_file(path, mode):
50    """Lock file and block if other process has lock on file.
51
52    Acquire exclusive lock for file. Only blocks other processes if they attempt
53    to also acquire lock through this method. If only reading (modes 'r' and 'rb')
54    then the lock is shared (i.e. many reads can happen concurrently, but only one
55    process may write at a time).
56
57    This function is a contextmanager, meaning it's meant to be used with the
58    "with" statement in Python. This is so cleanup and setup happens automatically
59    and cleanly. Execution of the outer "with" statement happens at the "yield"
60    statement. Execution resumes after the yield when the outer "with" statement
61    ends.
62
63    Args:
64      path: path to file being locked
65      mode: mode to open file with ('w', 'r', etc.)
66    """
67    with open(path, mode) as f:
68        # Apply FD_CLOEXEC argument to fd. This ensures that the file descriptor
69        # won't be leaked to any child processes.
70        current_args = fcntl.fcntl(f.fileno(), fcntl.F_GETFD)
71        fcntl.fcntl(f.fileno(), fcntl.F_SETFD, current_args | fcntl.FD_CLOEXEC)
72
73        # Reads can share the lock as no race conditions exist. If write is needed,
74        # give writing process exclusive access to the file.
75        if f.mode == "r" or f.mode == "rb":
76            lock_type = fcntl.LOCK_SH
77        else:
78            lock_type = fcntl.LOCK_EX
79
80        try:
81            fcntl.lockf(f, lock_type)
82            yield f
83            f.flush()
84        finally:
85            fcntl.lockf(f, fcntl.LOCK_UN)
86
87
88def log_to_file(path, execargs, link_from=None, link_to=None):
89    """Common logging function.
90
91    Log current working directory, current execargs, and a from-to relationship
92    between files.
93    """
94    with lock_file(path, "a") as log:
95        log.write("cd: %s; %s\n" % (os.getcwd(), " ".join(execargs)))
96        if link_from and link_to:
97            log.write("%s -> %s\n" % (link_from, link_to))
98
99
100def exec_and_return(execargs):
101    """Execute process and return.
102
103    Execute according to execargs and return immediately. Don't inspect
104    stderr or stdout.
105    """
106    return subprocess.call(execargs)
107
108
109def which_cache(obj_file):
110    """Determine which cache an object belongs to.
111
112    The binary search tool creates two files for each search iteration listing
113    the full set of bad objects and full set of good objects. We use this to
114    determine where an object file should be linked from (good or bad).
115    """
116    bad_set_file = os.environ.get("BISECT_BAD_SET")
117    if in_object_list(obj_file, bad_set_file):
118        return BAD_CACHE
119    else:
120        return GOOD_CACHE
121
122
123def makedirs(path):
124    """Try to create directories in path."""
125    try:
126        os.makedirs(path)
127    except os.error:
128        if not os.path.isdir(path):
129            raise
130
131
132def get_obj_path(execargs):
133    """Get the object path for the object file in the list of arguments.
134
135    Returns:
136      Absolute object path from execution args (-o argument). If no object being
137      outputted, then return empty string. -o argument is checked only if -c is
138      also present.
139    """
140    try:
141        i = execargs.index("-o")
142        _ = execargs.index("-c")
143    except ValueError:
144        return ""
145
146    obj_path = execargs[i + 1]
147    # Ignore args that do not create a file.
148    if obj_path in (
149        "-",
150        "/dev/null",
151    ):
152        return ""
153    # Ignore files ending in .tmp.
154    if obj_path.endswith((".tmp",)):
155        return ""
156    # Ignore configuration files generated by Automake/Autoconf/CMake etc.
157    if (
158        obj_path.endswith("conftest.o")
159        or obj_path.endswith("CMakeFiles/test.o")
160        or obj_path.find("CMakeTmp") != -1
161        or os.path.abspath(obj_path).find("CMakeTmp") != -1
162    ):
163        return ""
164
165    return os.path.abspath(obj_path)
166
167
168def get_dep_path(execargs):
169    """Get the dep file path for the dep file in the list of arguments.
170
171    Returns:
172      Absolute path of dependency file path from execution args (-o argument). If
173      no dependency being outputted then return empty string.
174    """
175    if "-MD" not in execargs and "-MMD" not in execargs:
176        return ""
177
178    # If -MF is given this is the path of the dependency file. Otherwise the
179    # dependency file is the value of -o but with a .d extension
180    if "-MF" in execargs:
181        i = execargs.index("-MF")
182        dep_path = execargs[i + 1]
183        return os.path.abspath(dep_path)
184
185    full_obj_path = get_obj_path(execargs)
186    if not full_obj_path:
187        return ""
188
189    return full_obj_path[:-2] + ".d"
190
191
192def get_dwo_path(execargs):
193    """Get the dwo file path for the dwo file in the list of arguments.
194
195    Returns:
196      Absolute dwo file path from execution args (-gsplit-dwarf argument) If no
197      dwo file being outputted then return empty string.
198    """
199    if "-gsplit-dwarf" not in execargs:
200        return ""
201
202    full_obj_path = get_obj_path(execargs)
203    if not full_obj_path:
204        return ""
205
206    return full_obj_path[:-2] + ".dwo"
207
208
209def in_object_list(obj_name, list_filename):
210    """Check if object file name exist in file with object list."""
211    if not obj_name:
212        return False
213
214    with lock_file(list_filename, "r") as list_file:
215        for line in list_file:
216            if line.strip() == obj_name:
217                return True
218
219        return False
220
221
222def get_side_effects(execargs):
223    """Determine side effects generated by compiler
224
225    Returns:
226      List of paths of objects that the compiler generates as side effects.
227    """
228    side_effects = []
229
230    # Cache dependency files
231    full_dep_path = get_dep_path(execargs)
232    if full_dep_path:
233        side_effects.append(full_dep_path)
234
235    # Cache dwo files
236    full_dwo_path = get_dwo_path(execargs)
237    if full_dwo_path:
238        side_effects.append(full_dwo_path)
239
240    return side_effects
241
242
243def cache_file(execargs, bisect_dir, cache, abs_file_path):
244    """Cache compiler output file (.o/.d/.dwo).
245
246    Args:
247      execargs: compiler execution arguments.
248      bisect_dir: The directory where bisection caches live.
249      cache: Which cache the file will be cached to (GOOD/BAD).
250      abs_file_path: Absolute path to file being cached.
251
252    Returns:
253      True if caching was successful, False otherwise.
254    """
255    # os.path.join fails with absolute paths, use + instead
256    bisect_path = os.path.join(bisect_dir, cache) + abs_file_path
257    bisect_path_dir = os.path.dirname(bisect_path)
258    makedirs(bisect_path_dir)
259    pop_log = os.path.join(bisect_dir, cache, "_POPULATE_LOG")
260    log_to_file(pop_log, execargs, abs_file_path, bisect_path)
261
262    try:
263        if os.path.exists(abs_file_path):
264            if os.path.exists(bisect_path):
265                # File exists
266                population_dir = os.path.join(bisect_dir, cache)
267                with lock_file(
268                    os.path.join(population_dir, "_DUPS"), "a"
269                ) as dup_object_list:
270                    dup_object_list.write("%s\n" % abs_file_path)
271                if CONTINUE_ON_REDUNDANCY:
272                    return True
273                raise Exception(
274                    "Trying to cache file %s multiple times. To avoid the error, set "
275                    "BISECT_CONTINUE_ON_REDUNDANCY to 1. For reference, the list of "
276                    "such files will be written to %s"
277                    % (abs_file_path, os.path.join(population_dir, "_DUPS"))
278                )
279
280            shutil.copy2(abs_file_path, bisect_path)
281            # Set cache object to be read-only so later compilations can't
282            # accidentally overwrite it.
283            os.chmod(bisect_path, 0o444)
284            return True
285        else:
286            # File not found (happens when compilation fails but error code is still
287            # 0)
288            return False
289    except Exception:
290        print("Could not cache file %s" % abs_file_path, file=sys.stderr)
291        raise
292
293
294def restore_file(bisect_dir, cache, abs_file_path):
295    """Restore file from cache (.o/.d/.dwo).
296
297    Args:
298      bisect_dir: The directory where bisection caches live.
299      cache: Which cache the file will be restored from (GOOD/BAD).
300      abs_file_path: Absolute path to file being restored.
301    """
302    # os.path.join fails with absolute paths, use + instead
303    cached_path = os.path.join(bisect_dir, cache) + abs_file_path
304    if os.path.exists(cached_path):
305        if os.path.exists(abs_file_path):
306            os.remove(abs_file_path)
307        shutil.copy2(cached_path, abs_file_path)
308        # Add write permission to the restored object files as some packages
309        # (such as kernels) may need write permission to delete files.
310        os.chmod(abs_file_path, os.stat(abs_file_path).st_mode | stat.S_IWUSR)
311    else:
312        raise Error(
313            (
314                "%s is missing from %s cache! Unsure how to proceed. Make "
315                "will now crash." % (cache, cached_path)
316            )
317        )
318
319
320def bisect_populate(execargs, bisect_dir, population_name):
321    """Add necessary information to the bisect cache for the given execution.
322
323    Extract the necessary information for bisection from the compiler
324    execution arguments and put it into the bisection cache. This
325    includes copying the created object file, adding the object
326    file path to the cache list and keeping a log of the execution.
327
328    Args:
329      execargs: compiler execution arguments.
330      bisect_dir: bisection directory.
331      population_name: name of the cache being populated (good/bad).
332    """
333    retval = exec_and_return(execargs)
334    if retval:
335        return retval
336
337    full_obj_path = get_obj_path(execargs)
338    # This is not a normal compiler call because it doesn't have a -o argument,
339    # or the -o argument has an unusable output file.
340    # It's likely that this compiler call was actually made to invoke the linker,
341    # or as part of a configuratoin test. In this case we want to simply call the
342    # compiler and return.
343    if not full_obj_path:
344        return retval
345
346    # Return if not able to cache the object file
347    if not cache_file(execargs, bisect_dir, population_name, full_obj_path):
348        return retval
349
350    population_dir = os.path.join(bisect_dir, population_name)
351    with lock_file(os.path.join(population_dir, "_LIST"), "a") as object_list:
352        object_list.write("%s\n" % full_obj_path)
353
354    for side_effect in get_side_effects(execargs):
355        _ = cache_file(execargs, bisect_dir, population_name, side_effect)
356
357    return retval
358
359
360def bisect_triage(execargs, bisect_dir):
361    """Use object object file from appropriate cache (good/bad).
362
363    Given a populated bisection directory, use the object file saved
364    into one of the caches (good/bad) according to what is specified
365    in the good/bad sets. The good/bad sets are generated by the
366    high level binary search tool. Additionally restore any possible
367    side effects of compiler.
368
369    Args:
370      execargs: compiler execution arguments.
371      bisect_dir: populated bisection directory.
372    """
373    full_obj_path = get_obj_path(execargs)
374    obj_list = os.path.join(bisect_dir, LIST_FILE)
375
376    # If the output isn't an object file just call compiler
377    if not full_obj_path:
378        return exec_and_return(execargs)
379
380    # If this isn't a bisected object just call compiler
381    # This shouldn't happen!
382    if not in_object_list(full_obj_path, obj_list):
383        if CONTINUE_ON_MISSING:
384            log_file = os.path.join(bisect_dir, "_MISSING_CACHED_OBJ_LOG")
385            log_to_file(log_file, execargs, "? compiler", full_obj_path)
386            return exec_and_return(execargs)
387        else:
388            raise Error(
389                (
390                    "%s is missing from cache! To ignore export "
391                    "BISECT_CONTINUE_ON_MISSING=1. See documentation for more "
392                    "details on this option." % full_obj_path
393                )
394            )
395
396    cache = which_cache(full_obj_path)
397
398    # If using safe WRAPPER_SAFE_MODE option call compiler and overwrite the
399    # result from the good/bad cache. This option is safe and covers all compiler
400    # side effects, but is very slow!
401    if WRAPPER_SAFE_MODE:
402        retval = exec_and_return(execargs)
403        if retval:
404            return retval
405        os.remove(full_obj_path)
406        restore_file(bisect_dir, cache, full_obj_path)
407        return retval
408
409    # Generate compiler side effects. Trick Make into thinking compiler was
410    # actually executed.
411    for side_effect in get_side_effects(execargs):
412        restore_file(bisect_dir, cache, side_effect)
413
414    # If generated object file happened to be pruned/cleaned by Make then link it
415    # over from cache again.
416    if not os.path.exists(full_obj_path):
417        restore_file(bisect_dir, cache, full_obj_path)
418
419    return 0
420
421
422def bisect_driver(bisect_stage, bisect_dir, execargs):
423    """Call appropriate bisection stage according to value in bisect_stage."""
424    if bisect_stage == "POPULATE_GOOD":
425        return bisect_populate(execargs, bisect_dir, GOOD_CACHE)
426    elif bisect_stage == "POPULATE_BAD":
427        return bisect_populate(execargs, bisect_dir, BAD_CACHE)
428    elif bisect_stage == "TRIAGE":
429        return bisect_triage(execargs, bisect_dir)
430    else:
431        raise ValueError("wrong value for BISECT_STAGE: %s" % bisect_stage)
432