1# This is a "stage 2" bootstrap. We can assume we've running under the desired 2# interpreter, with some of the basic interpreter options/envvars set. 3# However, more setup is required to make the app's real main file runnable. 4 5import sys 6 7# The Python interpreter unconditionally prepends the directory containing this 8# script (following symlinks) to the import path. This is the cause of #9239, 9# and is a special case of #7091. We therefore explicitly delete that entry. 10# TODO(#7091): Remove this hack when no longer necessary. 11# TODO: Use sys.flags.safe_path to determine whether this removal should be 12# performed 13del sys.path[0] 14 15import contextlib 16import os 17import re 18import runpy 19import subprocess 20import uuid 21 22# ===== Template substitutions start ===== 23# We just put them in one place so its easy to tell which are used. 24 25# Runfiles-relative path to the main Python source file. 26MAIN = "%main%" 27# Colon-delimited string of runfiles-relative import paths to add 28IMPORTS_STR = "%imports%" 29WORKSPACE_NAME = "%workspace_name%" 30# Though the import all value is the correct literal, we quote it 31# so this file is parsable by tools. 32IMPORT_ALL = True if "%import_all%" == "True" else False 33# Runfiles-relative path to the coverage tool entry point, if any. 34COVERAGE_TOOL = "%coverage_tool%" 35 36# ===== Template substitutions end ===== 37 38 39# Return True if running on Windows 40def is_windows(): 41 return os.name == "nt" 42 43 44def get_windows_path_with_unc_prefix(path): 45 path = path.strip() 46 47 # No need to add prefix for non-Windows platforms. 48 if not is_windows() or sys.version_info[0] < 3: 49 return path 50 51 # Starting in Windows 10, version 1607(OS build 14393), MAX_PATH limitations have been 52 # removed from common Win32 file and directory functions. 53 # Related doc: https://docs.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation?tabs=cmd#enable-long-paths-in-windows-10-version-1607-and-later 54 import platform 55 56 if platform.win32_ver()[1] >= "10.0.14393": 57 return path 58 59 # import sysconfig only now to maintain python 2.6 compatibility 60 import sysconfig 61 62 if sysconfig.get_platform() == "mingw": 63 return path 64 65 # Lets start the unicode fun 66 if path.startswith(unicode_prefix): 67 return path 68 69 # os.path.abspath returns a normalized absolute path 70 return unicode_prefix + os.path.abspath(path) 71 72 73def search_path(name): 74 """Finds a file in a given search path.""" 75 search_path = os.getenv("PATH", os.defpath).split(os.pathsep) 76 for directory in search_path: 77 if directory: 78 path = os.path.join(directory, name) 79 if os.path.isfile(path) and os.access(path, os.X_OK): 80 return path 81 return None 82 83 84def is_verbose(): 85 return bool(os.environ.get("RULES_PYTHON_BOOTSTRAP_VERBOSE")) 86 87 88def print_verbose(*args, mapping=None, values=None): 89 if is_verbose(): 90 if mapping is not None: 91 for key, value in sorted((mapping or {}).items()): 92 print( 93 "bootstrap: stage 2:", 94 *args, 95 f"{key}={value!r}", 96 file=sys.stderr, 97 flush=True, 98 ) 99 elif values is not None: 100 for i, v in enumerate(values): 101 print( 102 "bootstrap: stage 2:", 103 *args, 104 f"[{i}] {v!r}", 105 file=sys.stderr, 106 flush=True, 107 ) 108 else: 109 print("bootstrap: stage 2:", *args, file=sys.stderr, flush=True) 110 111 112def print_verbose_coverage(*args): 113 """Print output if VERBOSE_COVERAGE is non-empty in the environment.""" 114 if os.environ.get("VERBOSE_COVERAGE"): 115 print(*args, file=sys.stderr, flush=True) 116 117 118def is_verbose_coverage(): 119 """Returns True if VERBOSE_COVERAGE is non-empty in the environment.""" 120 return os.environ.get("VERBOSE_COVERAGE") or is_verbose() 121 122 123def find_coverage_entry_point(module_space): 124 cov_tool = COVERAGE_TOOL 125 if cov_tool: 126 print_verbose_coverage("Using toolchain coverage_tool %r" % cov_tool) 127 else: 128 cov_tool = os.environ.get("PYTHON_COVERAGE") 129 if cov_tool: 130 print_verbose_coverage("PYTHON_COVERAGE: %r" % cov_tool) 131 if cov_tool: 132 return find_binary(module_space, cov_tool) 133 return None 134 135 136def find_binary(module_space, bin_name): 137 """Finds the real binary if it's not a normal absolute path.""" 138 if not bin_name: 139 return None 140 if bin_name.startswith("//"): 141 # Case 1: Path is a label. Not supported yet. 142 raise AssertionError( 143 "Bazel does not support execution of Python interpreters via labels yet" 144 ) 145 elif os.path.isabs(bin_name): 146 # Case 2: Absolute path. 147 return bin_name 148 # Use normpath() to convert slashes to os.sep on Windows. 149 elif os.sep in os.path.normpath(bin_name): 150 # Case 3: Path is relative to the repo root. 151 return os.path.join(module_space, bin_name) 152 else: 153 # Case 4: Path has to be looked up in the search path. 154 return search_path(bin_name) 155 156 157def create_python_path_entries(python_imports, module_space): 158 parts = python_imports.split(":") 159 return [module_space] + ["%s/%s" % (module_space, path) for path in parts] 160 161 162def find_runfiles_root(main_rel_path): 163 """Finds the runfiles tree.""" 164 # When the calling process used the runfiles manifest to resolve the 165 # location of this stub script, the path may be expanded. This means 166 # argv[0] may no longer point to a location inside the runfiles 167 # directory. We should therefore respect RUNFILES_DIR and 168 # RUNFILES_MANIFEST_FILE set by the caller. 169 runfiles_dir = os.environ.get("RUNFILES_DIR", None) 170 if not runfiles_dir: 171 runfiles_manifest_file = os.environ.get("RUNFILES_MANIFEST_FILE", "") 172 if runfiles_manifest_file.endswith( 173 ".runfiles_manifest" 174 ) or runfiles_manifest_file.endswith(".runfiles/MANIFEST"): 175 runfiles_dir = runfiles_manifest_file[:-9] 176 # Be defensive: the runfiles dir should contain our main entry point. If 177 # it doesn't, then it must not be our runfiles directory. 178 if runfiles_dir and os.path.exists(os.path.join(runfiles_dir, main_rel_path)): 179 return runfiles_dir 180 181 stub_filename = sys.argv[0] 182 if not os.path.isabs(stub_filename): 183 stub_filename = os.path.join(os.getcwd(), stub_filename) 184 185 while True: 186 module_space = stub_filename + (".exe" if is_windows() else "") + ".runfiles" 187 if os.path.isdir(module_space): 188 return module_space 189 190 runfiles_pattern = r"(.*\.runfiles)" + (r"\\" if is_windows() else "/") + ".*" 191 matchobj = re.match(runfiles_pattern, stub_filename) 192 if matchobj: 193 return matchobj.group(1) 194 195 if not os.path.islink(stub_filename): 196 break 197 target = os.readlink(stub_filename) 198 if os.path.isabs(target): 199 stub_filename = target 200 else: 201 stub_filename = os.path.join(os.path.dirname(stub_filename), target) 202 203 raise AssertionError("Cannot find .runfiles directory for %s" % sys.argv[0]) 204 205 206# Returns repository roots to add to the import path. 207def get_repositories_imports(module_space, import_all): 208 if import_all: 209 repo_dirs = [os.path.join(module_space, d) for d in os.listdir(module_space)] 210 repo_dirs.sort() 211 return [d for d in repo_dirs if os.path.isdir(d)] 212 return [os.path.join(module_space, WORKSPACE_NAME)] 213 214 215def runfiles_envvar(module_space): 216 """Finds the runfiles manifest or the runfiles directory. 217 218 Returns: 219 A tuple of (var_name, var_value) where var_name is either 'RUNFILES_DIR' or 220 'RUNFILES_MANIFEST_FILE' and var_value is the path to that directory or 221 file, or (None, None) if runfiles couldn't be found. 222 """ 223 # If this binary is the data-dependency of another one, the other sets 224 # RUNFILES_MANIFEST_FILE or RUNFILES_DIR for our sake. 225 runfiles = os.environ.get("RUNFILES_MANIFEST_FILE", None) 226 if runfiles: 227 return ("RUNFILES_MANIFEST_FILE", runfiles) 228 229 runfiles = os.environ.get("RUNFILES_DIR", None) 230 if runfiles: 231 return ("RUNFILES_DIR", runfiles) 232 233 # Look for the runfiles "output" manifest, argv[0] + ".runfiles_manifest" 234 runfiles = module_space + "_manifest" 235 if os.path.exists(runfiles): 236 return ("RUNFILES_MANIFEST_FILE", runfiles) 237 238 # Look for the runfiles "input" manifest, argv[0] + ".runfiles/MANIFEST" 239 # Normally .runfiles_manifest and MANIFEST are both present, but the 240 # former will be missing for zip-based builds or if someone copies the 241 # runfiles tree elsewhere. 242 runfiles = os.path.join(module_space, "MANIFEST") 243 if os.path.exists(runfiles): 244 return ("RUNFILES_MANIFEST_FILE", runfiles) 245 246 # If running in a sandbox and no environment variables are set, then 247 # Look for the runfiles next to the binary. 248 if module_space.endswith(".runfiles") and os.path.isdir(module_space): 249 return ("RUNFILES_DIR", module_space) 250 251 return (None, None) 252 253 254def deduplicate(items): 255 """Efficiently filter out duplicates, keeping the first element only.""" 256 seen = set() 257 for it in items: 258 if it not in seen: 259 seen.add(it) 260 yield it 261 262 263def instrumented_file_paths(): 264 """Yields tuples of realpath of each instrumented file with the relative path.""" 265 manifest_filename = os.environ.get("COVERAGE_MANIFEST") 266 if not manifest_filename: 267 return 268 with open(manifest_filename, "r") as manifest: 269 for line in manifest: 270 filename = line.strip() 271 if not filename: 272 continue 273 try: 274 realpath = os.path.realpath(filename) 275 except OSError: 276 print( 277 "Could not find instrumented file {}".format(filename), 278 file=sys.stderr, 279 flush=True, 280 ) 281 continue 282 if realpath != filename: 283 print_verbose_coverage("Fixing up {} -> {}".format(realpath, filename)) 284 yield (realpath, filename) 285 286 287def unresolve_symlinks(output_filename): 288 # type: (str) -> None 289 """Replace realpath of instrumented files with the relative path in the lcov output. 290 291 Though we are asking coveragepy to use relative file names, currently 292 ignore that for purposes of generating the lcov report (and other reports 293 which are not the XML report), so we need to go and fix up the report. 294 295 This function is a workaround for that issue. Once that issue is fixed 296 upstream and the updated version is widely in use, this should be removed. 297 298 See https://github.com/nedbat/coveragepy/issues/963. 299 """ 300 substitutions = list(instrumented_file_paths()) 301 if substitutions: 302 unfixed_file = output_filename + ".tmp" 303 os.rename(output_filename, unfixed_file) 304 with open(unfixed_file, "r") as unfixed: 305 with open(output_filename, "w") as output_file: 306 for line in unfixed: 307 if line.startswith("SF:"): 308 for realpath, filename in substitutions: 309 line = line.replace(realpath, filename) 310 output_file.write(line) 311 os.unlink(unfixed_file) 312 313 314def _run_py(main_filename, *, args, cwd=None): 315 # type: (str, str, list[str], dict[str, str]) -> ... 316 """Executes the given Python file using the various environment settings.""" 317 318 orig_argv = sys.argv 319 orig_cwd = os.getcwd() 320 try: 321 sys.argv = [main_filename] + args 322 if cwd: 323 os.chdir(cwd) 324 print_verbose("run_py: cwd:", os.getcwd()) 325 print_verbose("run_py: sys.argv: ", values=sys.argv) 326 print_verbose("run_py: os.environ:", mapping=os.environ) 327 print_verbose("run_py: sys.path:", values=sys.path) 328 runpy.run_path(main_filename, run_name="__main__") 329 finally: 330 os.chdir(orig_cwd) 331 sys.argv = orig_argv 332 333 334@contextlib.contextmanager 335def _maybe_collect_coverage(enable): 336 if not enable: 337 yield 338 return 339 340 import uuid 341 342 import coverage 343 344 coverage_dir = os.environ["COVERAGE_DIR"] 345 unique_id = uuid.uuid4() 346 347 # We need for coveragepy to use relative paths. This can only be configured 348 rcfile_name = os.path.join(coverage_dir, ".coveragerc_{}".format(unique_id)) 349 with open(rcfile_name, "w") as rcfile: 350 rcfile.write( 351 """[run] 352relative_files = True 353""" 354 ) 355 try: 356 cov = coverage.Coverage( 357 config_file=rcfile_name, 358 branch=True, 359 # NOTE: The messages arg controls what coverage prints to stdout/stderr, 360 # which can interfere with the Bazel coverage command. Enabling message 361 # output is only useful for debugging coverage support. 362 messages=is_verbose_coverage(), 363 omit=[ 364 # Pipes can't be read back later, which can cause coverage to 365 # throw an error when trying to get its source code. 366 "/dev/fd/*", 367 # The mechanism for finding third-party packages in coverage-py 368 # only works for installed packages, not for runfiles. e.g: 369 #'$HOME/.local/lib/python3.10/site-packages', 370 # '/usr/lib/python', 371 # '/usr/lib/python3.10/site-packages', 372 # '/usr/local/lib/python3.10/dist-packages' 373 # see https://github.com/nedbat/coveragepy/blob/bfb0c708fdd8182b2a9f0fc403596693ef65e475/coverage/inorout.py#L153-L164 374 "*/external/*", 375 ], 376 ) 377 cov.start() 378 try: 379 yield 380 finally: 381 cov.stop() 382 lcov_path = os.path.join(coverage_dir, "pylcov.dat") 383 cov.lcov_report( 384 outfile=lcov_path, 385 # Ignore errors because sometimes instrumented files aren't 386 # readable afterwards. e.g. if they come from /dev/fd or if 387 # they were transient code-under-test in /tmp 388 ignore_errors=True, 389 ) 390 if os.path.isfile(lcov_path): 391 unresolve_symlinks(lcov_path) 392 finally: 393 try: 394 os.unlink(rcfile_name) 395 except OSError as err: 396 # It's possible that the profiled program might execute another Python 397 # binary through a wrapper that would then delete the rcfile. Not much 398 # we can do about that, besides ignore the failure here. 399 print_verbose_coverage("Error removing temporary coverage rc file:", err) 400 401 402def main(): 403 print_verbose("initial argv:", values=sys.argv) 404 print_verbose("initial cwd:", os.getcwd()) 405 print_verbose("initial environ:", mapping=os.environ) 406 print_verbose("initial sys.path:", values=sys.path) 407 408 main_rel_path = MAIN 409 if is_windows(): 410 main_rel_path = main_rel_path.replace("/", os.sep) 411 412 module_space = find_runfiles_root(main_rel_path) 413 print_verbose("runfiles root:", module_space) 414 415 # Recreate the "add main's dir to sys.path[0]" behavior to match the 416 # system-python bootstrap / typical Python behavior. 417 # 418 # Without safe path enabled, when `python foo/bar.py` is run, python will 419 # resolve the foo/bar.py symlink to its real path, then add the directory 420 # of that path to sys.path. But, the resolved directory for the symlink 421 # depends on if the file is generated or not. 422 # 423 # When foo/bar.py is a source file, then it's a symlink pointing 424 # back to the client source directory. This means anything from that source 425 # directory becomes importable, i.e. most code is importable. 426 # 427 # When foo/bar.py is a generated file, then it's a symlink pointing to 428 # somewhere under bazel-out/.../bin, i.e. where generated files are. This 429 # means only other generated files are importable (not source files). 430 # 431 # To replicate this behavior, we add main's directory within the runfiles 432 # when safe path isn't enabled. 433 if not getattr(sys.flags, "safe_path", False): 434 prepend_path_entries = [ 435 os.path.join(module_space, os.path.dirname(main_rel_path)) 436 ] 437 else: 438 prepend_path_entries = [] 439 python_path_entries = create_python_path_entries(IMPORTS_STR, module_space) 440 python_path_entries += get_repositories_imports(module_space, IMPORT_ALL) 441 python_path_entries = [ 442 get_windows_path_with_unc_prefix(d) for d in python_path_entries 443 ] 444 445 # Remove duplicates to avoid overly long PYTHONPATH (#10977). Preserve order, 446 # keep first occurrence only. 447 python_path_entries = deduplicate(python_path_entries) 448 449 if is_windows(): 450 python_path_entries = [p.replace("/", os.sep) for p in python_path_entries] 451 else: 452 # deduplicate returns a generator, but we need a list after this. 453 python_path_entries = list(python_path_entries) 454 455 # We're emulating PYTHONPATH being set, so we insert at the start 456 # This isn't a great idea (it can shadow the stdlib), but is the historical 457 # behavior. 458 runfiles_envkey, runfiles_envvalue = runfiles_envvar(module_space) 459 if runfiles_envkey: 460 os.environ[runfiles_envkey] = runfiles_envvalue 461 462 main_filename = os.path.join(module_space, main_rel_path) 463 main_filename = get_windows_path_with_unc_prefix(main_filename) 464 assert os.path.exists(main_filename), ( 465 "Cannot exec() %r: file not found." % main_filename 466 ) 467 assert os.access(main_filename, os.R_OK), ( 468 "Cannot exec() %r: file not readable." % main_filename 469 ) 470 471 # COVERAGE_DIR is set if coverage is enabled and instrumentation is configured 472 # for something, though it could be another program executing this one or 473 # one executed by this one (e.g. an extension module). 474 if os.environ.get("COVERAGE_DIR"): 475 cov_tool = find_coverage_entry_point(module_space) 476 if cov_tool is None: 477 print_verbose_coverage( 478 "Coverage was enabled, but python coverage tool was not configured." 479 + "To enable coverage, consult the docs at " 480 + "https://rules-python.readthedocs.io/en/latest/coverage.html" 481 ) 482 else: 483 # Inhibit infinite recursion: 484 if "PYTHON_COVERAGE" in os.environ: 485 del os.environ["PYTHON_COVERAGE"] 486 487 if not os.path.exists(cov_tool): 488 raise EnvironmentError( 489 "Python coverage tool %r not found. " 490 "Try running with VERBOSE_COVERAGE=1 to collect more information." 491 % cov_tool 492 ) 493 494 # coverage library expects sys.path[0] to contain the library, and replaces 495 # it with the directory of the program it starts. Our actual sys.path[0] is 496 # the runfiles directory, which must not be replaced. 497 # CoverageScript.do_execute() undoes this sys.path[0] setting. 498 # 499 # Update sys.path such that python finds the coverage package. The coverage 500 # entry point is coverage.coverage_main, so we need to do twice the dirname. 501 coverage_dir = os.path.dirname(os.path.dirname(cov_tool)) 502 print_verbose("coverage: adding to sys.path:", coverage_dir) 503 python_path_entries.append(coverage_dir) 504 python_path_entries = deduplicate(python_path_entries) 505 else: 506 cov_tool = None 507 508 sys.stdout.flush() 509 510 # Add the user imports after the stdlib, but before the runtime's 511 # site-packages directory. This gives the stdlib precedence, while allowing 512 # users to override non-stdlib packages that may have been bundled with 513 # the runtime (usually pip). 514 # NOTE: There isn't a good way to identify the stdlib paths, so we just 515 # expect site-packages comes after it, per 516 # https://docs.python.org/3/library/sys_path_init.html#sys-path-init 517 for i, path in enumerate(sys.path): 518 # dist-packages is a debian convention, see 519 # https://wiki.debian.org/Python#Deviations_from_upstream 520 if os.path.basename(path) in ("site-packages", "dist-packages"): 521 sys.path[i:i] = python_path_entries 522 break 523 else: 524 # Otherwise, no site-packages directory was found, which is odd but ok. 525 sys.path.extend(python_path_entries) 526 527 # NOTE: The sys.path must be modified before coverage is imported/activated 528 # NOTE: Perform this after the user imports are appended. This avoids a 529 # user import accidentally triggering the site-packages logic above. 530 sys.path[0:0] = prepend_path_entries 531 532 with _maybe_collect_coverage(enable=cov_tool is not None): 533 # The first arg is this bootstrap, so drop that for the re-invocation. 534 _run_py(main_filename, args=sys.argv[1:]) 535 sys.exit(0) 536 537 538main() 539