xref: /aosp_15_r20/external/bazelbuild-rules_python/python/private/stage2_bootstrap_template.py (revision 60517a1edbc8ecf509223e9af94a7adec7d736b8)
1# This is a "stage 2" bootstrap. We can assume we've running under the desired
2# interpreter, with some of the basic interpreter options/envvars set.
3# However, more setup is required to make the app's real main file runnable.
4
5import sys
6
7# The Python interpreter unconditionally prepends the directory containing this
8# script (following symlinks) to the import path. This is the cause of #9239,
9# and is a special case of #7091. We therefore explicitly delete that entry.
10# TODO(#7091): Remove this hack when no longer necessary.
11# TODO: Use sys.flags.safe_path to determine whether this removal should be
12# performed
13del sys.path[0]
14
15import contextlib
16import os
17import re
18import runpy
19import subprocess
20import uuid
21
22# ===== Template substitutions start =====
23# We just put them in one place so its easy to tell which are used.
24
25# Runfiles-relative path to the main Python source file.
26MAIN = "%main%"
27# Colon-delimited string of runfiles-relative import paths to add
28IMPORTS_STR = "%imports%"
29WORKSPACE_NAME = "%workspace_name%"
30# Though the import all value is the correct literal, we quote it
31# so this file is parsable by tools.
32IMPORT_ALL = True if "%import_all%" == "True" else False
33# Runfiles-relative path to the coverage tool entry point, if any.
34COVERAGE_TOOL = "%coverage_tool%"
35
36# ===== Template substitutions end =====
37
38
39# Return True if running on Windows
40def is_windows():
41    return os.name == "nt"
42
43
44def get_windows_path_with_unc_prefix(path):
45    path = path.strip()
46
47    # No need to add prefix for non-Windows platforms.
48    if not is_windows() or sys.version_info[0] < 3:
49        return path
50
51    # Starting in Windows 10, version 1607(OS build 14393), MAX_PATH limitations have been
52    # removed from common Win32 file and directory functions.
53    # Related doc: https://docs.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation?tabs=cmd#enable-long-paths-in-windows-10-version-1607-and-later
54    import platform
55
56    if platform.win32_ver()[1] >= "10.0.14393":
57        return path
58
59    # import sysconfig only now to maintain python 2.6 compatibility
60    import sysconfig
61
62    if sysconfig.get_platform() == "mingw":
63        return path
64
65    # Lets start the unicode fun
66    if path.startswith(unicode_prefix):
67        return path
68
69    # os.path.abspath returns a normalized absolute path
70    return unicode_prefix + os.path.abspath(path)
71
72
73def search_path(name):
74    """Finds a file in a given search path."""
75    search_path = os.getenv("PATH", os.defpath).split(os.pathsep)
76    for directory in search_path:
77        if directory:
78            path = os.path.join(directory, name)
79            if os.path.isfile(path) and os.access(path, os.X_OK):
80                return path
81    return None
82
83
84def is_verbose():
85    return bool(os.environ.get("RULES_PYTHON_BOOTSTRAP_VERBOSE"))
86
87
88def print_verbose(*args, mapping=None, values=None):
89    if is_verbose():
90        if mapping is not None:
91            for key, value in sorted((mapping or {}).items()):
92                print(
93                    "bootstrap: stage 2:",
94                    *args,
95                    f"{key}={value!r}",
96                    file=sys.stderr,
97                    flush=True,
98                )
99        elif values is not None:
100            for i, v in enumerate(values):
101                print(
102                    "bootstrap: stage 2:",
103                    *args,
104                    f"[{i}] {v!r}",
105                    file=sys.stderr,
106                    flush=True,
107                )
108        else:
109            print("bootstrap: stage 2:", *args, file=sys.stderr, flush=True)
110
111
112def print_verbose_coverage(*args):
113    """Print output if VERBOSE_COVERAGE is non-empty in the environment."""
114    if os.environ.get("VERBOSE_COVERAGE"):
115        print(*args, file=sys.stderr, flush=True)
116
117
118def is_verbose_coverage():
119    """Returns True if VERBOSE_COVERAGE is non-empty in the environment."""
120    return os.environ.get("VERBOSE_COVERAGE") or is_verbose()
121
122
123def find_coverage_entry_point(module_space):
124    cov_tool = COVERAGE_TOOL
125    if cov_tool:
126        print_verbose_coverage("Using toolchain coverage_tool %r" % cov_tool)
127    else:
128        cov_tool = os.environ.get("PYTHON_COVERAGE")
129        if cov_tool:
130            print_verbose_coverage("PYTHON_COVERAGE: %r" % cov_tool)
131    if cov_tool:
132        return find_binary(module_space, cov_tool)
133    return None
134
135
136def find_binary(module_space, bin_name):
137    """Finds the real binary if it's not a normal absolute path."""
138    if not bin_name:
139        return None
140    if bin_name.startswith("//"):
141        # Case 1: Path is a label. Not supported yet.
142        raise AssertionError(
143            "Bazel does not support execution of Python interpreters via labels yet"
144        )
145    elif os.path.isabs(bin_name):
146        # Case 2: Absolute path.
147        return bin_name
148    # Use normpath() to convert slashes to os.sep on Windows.
149    elif os.sep in os.path.normpath(bin_name):
150        # Case 3: Path is relative to the repo root.
151        return os.path.join(module_space, bin_name)
152    else:
153        # Case 4: Path has to be looked up in the search path.
154        return search_path(bin_name)
155
156
157def create_python_path_entries(python_imports, module_space):
158    parts = python_imports.split(":")
159    return [module_space] + ["%s/%s" % (module_space, path) for path in parts]
160
161
162def find_runfiles_root(main_rel_path):
163    """Finds the runfiles tree."""
164    # When the calling process used the runfiles manifest to resolve the
165    # location of this stub script, the path may be expanded. This means
166    # argv[0] may no longer point to a location inside the runfiles
167    # directory. We should therefore respect RUNFILES_DIR and
168    # RUNFILES_MANIFEST_FILE set by the caller.
169    runfiles_dir = os.environ.get("RUNFILES_DIR", None)
170    if not runfiles_dir:
171        runfiles_manifest_file = os.environ.get("RUNFILES_MANIFEST_FILE", "")
172        if runfiles_manifest_file.endswith(
173            ".runfiles_manifest"
174        ) or runfiles_manifest_file.endswith(".runfiles/MANIFEST"):
175            runfiles_dir = runfiles_manifest_file[:-9]
176    # Be defensive: the runfiles dir should contain our main entry point. If
177    # it doesn't, then it must not be our runfiles directory.
178    if runfiles_dir and os.path.exists(os.path.join(runfiles_dir, main_rel_path)):
179        return runfiles_dir
180
181    stub_filename = sys.argv[0]
182    if not os.path.isabs(stub_filename):
183        stub_filename = os.path.join(os.getcwd(), stub_filename)
184
185    while True:
186        module_space = stub_filename + (".exe" if is_windows() else "") + ".runfiles"
187        if os.path.isdir(module_space):
188            return module_space
189
190        runfiles_pattern = r"(.*\.runfiles)" + (r"\\" if is_windows() else "/") + ".*"
191        matchobj = re.match(runfiles_pattern, stub_filename)
192        if matchobj:
193            return matchobj.group(1)
194
195        if not os.path.islink(stub_filename):
196            break
197        target = os.readlink(stub_filename)
198        if os.path.isabs(target):
199            stub_filename = target
200        else:
201            stub_filename = os.path.join(os.path.dirname(stub_filename), target)
202
203    raise AssertionError("Cannot find .runfiles directory for %s" % sys.argv[0])
204
205
206# Returns repository roots to add to the import path.
207def get_repositories_imports(module_space, import_all):
208    if import_all:
209        repo_dirs = [os.path.join(module_space, d) for d in os.listdir(module_space)]
210        repo_dirs.sort()
211        return [d for d in repo_dirs if os.path.isdir(d)]
212    return [os.path.join(module_space, WORKSPACE_NAME)]
213
214
215def runfiles_envvar(module_space):
216    """Finds the runfiles manifest or the runfiles directory.
217
218    Returns:
219      A tuple of (var_name, var_value) where var_name is either 'RUNFILES_DIR' or
220      'RUNFILES_MANIFEST_FILE' and var_value is the path to that directory or
221      file, or (None, None) if runfiles couldn't be found.
222    """
223    # If this binary is the data-dependency of another one, the other sets
224    # RUNFILES_MANIFEST_FILE or RUNFILES_DIR for our sake.
225    runfiles = os.environ.get("RUNFILES_MANIFEST_FILE", None)
226    if runfiles:
227        return ("RUNFILES_MANIFEST_FILE", runfiles)
228
229    runfiles = os.environ.get("RUNFILES_DIR", None)
230    if runfiles:
231        return ("RUNFILES_DIR", runfiles)
232
233    # Look for the runfiles "output" manifest, argv[0] + ".runfiles_manifest"
234    runfiles = module_space + "_manifest"
235    if os.path.exists(runfiles):
236        return ("RUNFILES_MANIFEST_FILE", runfiles)
237
238    # Look for the runfiles "input" manifest, argv[0] + ".runfiles/MANIFEST"
239    # Normally .runfiles_manifest and MANIFEST are both present, but the
240    # former will be missing for zip-based builds or if someone copies the
241    # runfiles tree elsewhere.
242    runfiles = os.path.join(module_space, "MANIFEST")
243    if os.path.exists(runfiles):
244        return ("RUNFILES_MANIFEST_FILE", runfiles)
245
246    # If running in a sandbox and no environment variables are set, then
247    # Look for the runfiles  next to the binary.
248    if module_space.endswith(".runfiles") and os.path.isdir(module_space):
249        return ("RUNFILES_DIR", module_space)
250
251    return (None, None)
252
253
254def deduplicate(items):
255    """Efficiently filter out duplicates, keeping the first element only."""
256    seen = set()
257    for it in items:
258        if it not in seen:
259            seen.add(it)
260            yield it
261
262
263def instrumented_file_paths():
264    """Yields tuples of realpath of each instrumented file with the relative path."""
265    manifest_filename = os.environ.get("COVERAGE_MANIFEST")
266    if not manifest_filename:
267        return
268    with open(manifest_filename, "r") as manifest:
269        for line in manifest:
270            filename = line.strip()
271            if not filename:
272                continue
273            try:
274                realpath = os.path.realpath(filename)
275            except OSError:
276                print(
277                    "Could not find instrumented file {}".format(filename),
278                    file=sys.stderr,
279                    flush=True,
280                )
281                continue
282            if realpath != filename:
283                print_verbose_coverage("Fixing up {} -> {}".format(realpath, filename))
284                yield (realpath, filename)
285
286
287def unresolve_symlinks(output_filename):
288    # type: (str) -> None
289    """Replace realpath of instrumented files with the relative path in the lcov output.
290
291    Though we are asking coveragepy to use relative file names, currently
292    ignore that for purposes of generating the lcov report (and other reports
293    which are not the XML report), so we need to go and fix up the report.
294
295    This function is a workaround for that issue. Once that issue is fixed
296    upstream and the updated version is widely in use, this should be removed.
297
298    See https://github.com/nedbat/coveragepy/issues/963.
299    """
300    substitutions = list(instrumented_file_paths())
301    if substitutions:
302        unfixed_file = output_filename + ".tmp"
303        os.rename(output_filename, unfixed_file)
304        with open(unfixed_file, "r") as unfixed:
305            with open(output_filename, "w") as output_file:
306                for line in unfixed:
307                    if line.startswith("SF:"):
308                        for realpath, filename in substitutions:
309                            line = line.replace(realpath, filename)
310                    output_file.write(line)
311        os.unlink(unfixed_file)
312
313
314def _run_py(main_filename, *, args, cwd=None):
315    # type: (str, str, list[str], dict[str, str]) -> ...
316    """Executes the given Python file using the various environment settings."""
317
318    orig_argv = sys.argv
319    orig_cwd = os.getcwd()
320    try:
321        sys.argv = [main_filename] + args
322        if cwd:
323            os.chdir(cwd)
324        print_verbose("run_py: cwd:", os.getcwd())
325        print_verbose("run_py: sys.argv: ", values=sys.argv)
326        print_verbose("run_py: os.environ:", mapping=os.environ)
327        print_verbose("run_py: sys.path:", values=sys.path)
328        runpy.run_path(main_filename, run_name="__main__")
329    finally:
330        os.chdir(orig_cwd)
331        sys.argv = orig_argv
332
333
334@contextlib.contextmanager
335def _maybe_collect_coverage(enable):
336    if not enable:
337        yield
338        return
339
340    import uuid
341
342    import coverage
343
344    coverage_dir = os.environ["COVERAGE_DIR"]
345    unique_id = uuid.uuid4()
346
347    # We need for coveragepy to use relative paths.  This can only be configured
348    rcfile_name = os.path.join(coverage_dir, ".coveragerc_{}".format(unique_id))
349    with open(rcfile_name, "w") as rcfile:
350        rcfile.write(
351            """[run]
352relative_files = True
353"""
354        )
355    try:
356        cov = coverage.Coverage(
357            config_file=rcfile_name,
358            branch=True,
359            # NOTE: The messages arg controls what coverage prints to stdout/stderr,
360            # which can interfere with the Bazel coverage command. Enabling message
361            # output is only useful for debugging coverage support.
362            messages=is_verbose_coverage(),
363            omit=[
364                # Pipes can't be read back later, which can cause coverage to
365                # throw an error when trying to get its source code.
366                "/dev/fd/*",
367                # The mechanism for finding third-party packages in coverage-py
368                # only works for installed packages, not for runfiles. e.g:
369                #'$HOME/.local/lib/python3.10/site-packages',
370                # '/usr/lib/python',
371                # '/usr/lib/python3.10/site-packages',
372                # '/usr/local/lib/python3.10/dist-packages'
373                # see https://github.com/nedbat/coveragepy/blob/bfb0c708fdd8182b2a9f0fc403596693ef65e475/coverage/inorout.py#L153-L164
374                "*/external/*",
375            ],
376        )
377        cov.start()
378        try:
379            yield
380        finally:
381            cov.stop()
382            lcov_path = os.path.join(coverage_dir, "pylcov.dat")
383            cov.lcov_report(
384                outfile=lcov_path,
385                # Ignore errors because sometimes instrumented files aren't
386                # readable afterwards. e.g. if they come from /dev/fd or if
387                # they were transient code-under-test in /tmp
388                ignore_errors=True,
389            )
390            if os.path.isfile(lcov_path):
391                unresolve_symlinks(lcov_path)
392    finally:
393        try:
394            os.unlink(rcfile_name)
395        except OSError as err:
396            # It's possible that the profiled program might execute another Python
397            # binary through a wrapper that would then delete the rcfile.  Not much
398            # we can do about that, besides ignore the failure here.
399            print_verbose_coverage("Error removing temporary coverage rc file:", err)
400
401
402def main():
403    print_verbose("initial argv:", values=sys.argv)
404    print_verbose("initial cwd:", os.getcwd())
405    print_verbose("initial environ:", mapping=os.environ)
406    print_verbose("initial sys.path:", values=sys.path)
407
408    main_rel_path = MAIN
409    if is_windows():
410        main_rel_path = main_rel_path.replace("/", os.sep)
411
412    module_space = find_runfiles_root(main_rel_path)
413    print_verbose("runfiles root:", module_space)
414
415    # Recreate the "add main's dir to sys.path[0]" behavior to match the
416    # system-python bootstrap / typical Python behavior.
417    #
418    # Without safe path enabled, when `python foo/bar.py` is run, python will
419    # resolve the foo/bar.py symlink to its real path, then add the directory
420    # of that path to sys.path. But, the resolved directory for the symlink
421    # depends on if the file is generated or not.
422    #
423    # When foo/bar.py is a source file, then it's a symlink pointing
424    # back to the client source directory. This means anything from that source
425    # directory becomes importable, i.e. most code is importable.
426    #
427    # When foo/bar.py is a generated file, then it's a symlink pointing to
428    # somewhere under bazel-out/.../bin, i.e. where generated files are. This
429    # means only other generated files are importable (not source files).
430    #
431    # To replicate this behavior, we add main's directory within the runfiles
432    # when safe path isn't enabled.
433    if not getattr(sys.flags, "safe_path", False):
434        prepend_path_entries = [
435            os.path.join(module_space, os.path.dirname(main_rel_path))
436        ]
437    else:
438        prepend_path_entries = []
439    python_path_entries = create_python_path_entries(IMPORTS_STR, module_space)
440    python_path_entries += get_repositories_imports(module_space, IMPORT_ALL)
441    python_path_entries = [
442        get_windows_path_with_unc_prefix(d) for d in python_path_entries
443    ]
444
445    # Remove duplicates to avoid overly long PYTHONPATH (#10977). Preserve order,
446    # keep first occurrence only.
447    python_path_entries = deduplicate(python_path_entries)
448
449    if is_windows():
450        python_path_entries = [p.replace("/", os.sep) for p in python_path_entries]
451    else:
452        # deduplicate returns a generator, but we need a list after this.
453        python_path_entries = list(python_path_entries)
454
455    # We're emulating PYTHONPATH being set, so we insert at the start
456    # This isn't a great idea (it can shadow the stdlib), but is the historical
457    # behavior.
458    runfiles_envkey, runfiles_envvalue = runfiles_envvar(module_space)
459    if runfiles_envkey:
460        os.environ[runfiles_envkey] = runfiles_envvalue
461
462    main_filename = os.path.join(module_space, main_rel_path)
463    main_filename = get_windows_path_with_unc_prefix(main_filename)
464    assert os.path.exists(main_filename), (
465        "Cannot exec() %r: file not found." % main_filename
466    )
467    assert os.access(main_filename, os.R_OK), (
468        "Cannot exec() %r: file not readable." % main_filename
469    )
470
471    # COVERAGE_DIR is set if coverage is enabled and instrumentation is configured
472    # for something, though it could be another program executing this one or
473    # one executed by this one (e.g. an extension module).
474    if os.environ.get("COVERAGE_DIR"):
475        cov_tool = find_coverage_entry_point(module_space)
476        if cov_tool is None:
477            print_verbose_coverage(
478                "Coverage was enabled, but python coverage tool was not configured."
479                + "To enable coverage, consult the docs at "
480                + "https://rules-python.readthedocs.io/en/latest/coverage.html"
481            )
482        else:
483            # Inhibit infinite recursion:
484            if "PYTHON_COVERAGE" in os.environ:
485                del os.environ["PYTHON_COVERAGE"]
486
487            if not os.path.exists(cov_tool):
488                raise EnvironmentError(
489                    "Python coverage tool %r not found. "
490                    "Try running with VERBOSE_COVERAGE=1 to collect more information."
491                    % cov_tool
492                )
493
494            # coverage library expects sys.path[0] to contain the library, and replaces
495            # it with the directory of the program it starts. Our actual sys.path[0] is
496            # the runfiles directory, which must not be replaced.
497            # CoverageScript.do_execute() undoes this sys.path[0] setting.
498            #
499            # Update sys.path such that python finds the coverage package. The coverage
500            # entry point is coverage.coverage_main, so we need to do twice the dirname.
501            coverage_dir = os.path.dirname(os.path.dirname(cov_tool))
502            print_verbose("coverage: adding to sys.path:", coverage_dir)
503            python_path_entries.append(coverage_dir)
504            python_path_entries = deduplicate(python_path_entries)
505    else:
506        cov_tool = None
507
508    sys.stdout.flush()
509
510    # Add the user imports after the stdlib, but before the runtime's
511    # site-packages directory. This gives the stdlib precedence, while allowing
512    # users to override non-stdlib packages that may have been bundled with
513    # the runtime (usually pip).
514    # NOTE: There isn't a good way to identify the stdlib paths, so we just
515    # expect site-packages comes after it, per
516    # https://docs.python.org/3/library/sys_path_init.html#sys-path-init
517    for i, path in enumerate(sys.path):
518        # dist-packages is a debian convention, see
519        # https://wiki.debian.org/Python#Deviations_from_upstream
520        if os.path.basename(path) in ("site-packages", "dist-packages"):
521            sys.path[i:i] = python_path_entries
522            break
523    else:
524        # Otherwise, no site-packages directory was found, which is odd but ok.
525        sys.path.extend(python_path_entries)
526
527    # NOTE: The sys.path must be modified before coverage is imported/activated
528    # NOTE: Perform this after the user imports are appended. This avoids a
529    # user import accidentally triggering the site-packages logic above.
530    sys.path[0:0] = prepend_path_entries
531
532    with _maybe_collect_coverage(enable=cov_tool is not None):
533        # The first arg is this bootstrap, so drop that for the re-invocation.
534        _run_py(main_filename, args=sys.argv[1:])
535        sys.exit(0)
536
537
538main()
539