1# Copyright 2022 The Bazel Authors. All rights reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""Various things common to Bazel and Google rule implementations.""" 15 16load("//python/private:reexports.bzl", "BuiltinPyInfo") 17load(":cc_helper.bzl", "cc_helper") 18load(":providers.bzl", "PyInfo") 19load(":py_internal.bzl", "py_internal") 20load( 21 ":semantics.bzl", 22 "NATIVE_RULES_MIGRATION_FIX_CMD", 23 "NATIVE_RULES_MIGRATION_HELP_URL", 24) 25 26_testing = testing 27_platform_common = platform_common 28_coverage_common = coverage_common 29_py_builtins = py_internal 30PackageSpecificationInfo = getattr(py_internal, "PackageSpecificationInfo", None) 31 32# Extensions without the dot 33_PYTHON_SOURCE_EXTENSIONS = ["py"] 34 35# NOTE: Must stay in sync with the value used in rules_python 36_MIGRATION_TAG = "__PYTHON_RULES_MIGRATION_DO_NOT_USE_WILL_BREAK__" 37 38def create_binary_semantics_struct( 39 *, 40 create_executable, 41 get_cc_details_for_binary, 42 get_central_uncachable_version_file, 43 get_coverage_deps, 44 get_debugger_deps, 45 get_extra_common_runfiles_for_binary, 46 get_extra_providers, 47 get_extra_write_build_data_env, 48 get_interpreter_path, 49 get_imports, 50 get_native_deps_dso_name, 51 get_native_deps_user_link_flags, 52 get_stamp_flag, 53 maybe_precompile, 54 should_build_native_deps_dso, 55 should_create_init_files, 56 should_include_build_data): 57 """Helper to ensure a semantics struct has all necessary fields. 58 59 Call this instead of a raw call to `struct(...)`; it'll help ensure all 60 the necessary functions are being correctly provided. 61 62 Args: 63 create_executable: Callable; creates a binary's executable output. See 64 py_executable.bzl#py_executable_base_impl for details. 65 get_cc_details_for_binary: Callable that returns a `CcDetails` struct; see 66 `create_cc_detail_struct`. 67 get_central_uncachable_version_file: Callable that returns an optional 68 Artifact; this artifact is special: it is never cached and is a copy 69 of `ctx.version_file`; see py_builtins.copy_without_caching 70 get_coverage_deps: Callable that returns a list of Targets for making 71 coverage work; only called if coverage is enabled. 72 get_debugger_deps: Callable that returns a list of Targets that provide 73 custom debugger support; only called for target-configuration. 74 get_extra_common_runfiles_for_binary: Callable that returns a runfiles 75 object of extra runfiles a binary should include. 76 get_extra_providers: Callable that returns extra providers; see 77 py_executable.bzl#_create_providers for details. 78 get_extra_write_build_data_env: Callable that returns a dict[str, str] 79 of additional environment variable to pass to build data generation. 80 get_interpreter_path: Callable that returns an optional string, which is 81 the path to the Python interpreter to use for running the binary. 82 get_imports: Callable that returns a list of the target's import 83 paths (from the `imports` attribute, so just the target's own import 84 path strings, not from dependencies). 85 get_native_deps_dso_name: Callable that returns a string, which is the 86 basename (with extension) of the native deps DSO library. 87 get_native_deps_user_link_flags: Callable that returns a list of strings, 88 which are any extra linker flags to pass onto the native deps DSO 89 linking action. 90 get_stamp_flag: Callable that returns bool of if the --stamp flag was 91 enabled or not. 92 maybe_precompile: Callable that may optional precompile the input `.py` 93 sources and returns the full set of desired outputs derived from 94 the source files (e.g., both py and pyc, only one of them, etc). 95 should_build_native_deps_dso: Callable that returns bool; True if 96 building a native deps DSO is supported, False if not. 97 should_create_init_files: Callable that returns bool; True if 98 `__init__.py` files should be generated, False if not. 99 should_include_build_data: Callable that returns bool; True if 100 build data should be generated, False if not. 101 Returns: 102 A "BinarySemantics" struct. 103 """ 104 return struct( 105 # keep-sorted 106 create_executable = create_executable, 107 get_cc_details_for_binary = get_cc_details_for_binary, 108 get_central_uncachable_version_file = get_central_uncachable_version_file, 109 get_coverage_deps = get_coverage_deps, 110 get_debugger_deps = get_debugger_deps, 111 get_extra_common_runfiles_for_binary = get_extra_common_runfiles_for_binary, 112 get_extra_providers = get_extra_providers, 113 get_extra_write_build_data_env = get_extra_write_build_data_env, 114 get_imports = get_imports, 115 get_interpreter_path = get_interpreter_path, 116 get_native_deps_dso_name = get_native_deps_dso_name, 117 get_native_deps_user_link_flags = get_native_deps_user_link_flags, 118 get_stamp_flag = get_stamp_flag, 119 maybe_precompile = maybe_precompile, 120 should_build_native_deps_dso = should_build_native_deps_dso, 121 should_create_init_files = should_create_init_files, 122 should_include_build_data = should_include_build_data, 123 ) 124 125def create_library_semantics_struct( 126 *, 127 get_cc_info_for_library, 128 get_imports, 129 maybe_precompile): 130 """Create a `LibrarySemantics` struct. 131 132 Call this instead of a raw call to `struct(...)`; it'll help ensure all 133 the necessary functions are being correctly provided. 134 135 Args: 136 get_cc_info_for_library: Callable that returns a CcInfo for the library; 137 see py_library_impl for arg details. 138 get_imports: Callable; see create_binary_semantics_struct. 139 maybe_precompile: Callable; see create_binary_semantics_struct. 140 Returns: 141 a `LibrarySemantics` struct. 142 """ 143 return struct( 144 # keep sorted 145 get_cc_info_for_library = get_cc_info_for_library, 146 get_imports = get_imports, 147 maybe_precompile = maybe_precompile, 148 ) 149 150def create_cc_details_struct( 151 *, 152 cc_info_for_propagating, 153 cc_info_for_self_link, 154 cc_info_with_extra_link_time_libraries, 155 extra_runfiles, 156 cc_toolchain, 157 feature_config, 158 **kwargs): 159 """Creates a CcDetails struct. 160 161 Args: 162 cc_info_for_propagating: CcInfo that is propagated out of the target 163 by returning it within a PyCcLinkParamsProvider object. 164 cc_info_for_self_link: CcInfo that is used when linking for the 165 binary (or its native deps DSO) itself. This may include extra 166 information that isn't propagating (e.g. a custom malloc) 167 cc_info_with_extra_link_time_libraries: CcInfo of extra link time 168 libraries that MUST come after `cc_info_for_self_link` (or possibly 169 always last; not entirely clear) when passed to 170 `link.linking_contexts`. 171 extra_runfiles: runfiles of extra files needed at runtime, usually as 172 part of `cc_info_with_extra_link_time_libraries`; should be added to 173 runfiles. 174 cc_toolchain: CcToolchain that should be used when building. 175 feature_config: struct from cc_configure_features(); see 176 //python/private/common:py_executable.bzl%cc_configure_features. 177 **kwargs: Additional keys/values to set in the returned struct. This is to 178 facilitate extensions with less patching. Any added fields should 179 pick names that are unlikely to collide if the CcDetails API has 180 additional fields added. 181 182 Returns: 183 A `CcDetails` struct. 184 """ 185 return struct( 186 cc_info_for_propagating = cc_info_for_propagating, 187 cc_info_for_self_link = cc_info_for_self_link, 188 cc_info_with_extra_link_time_libraries = cc_info_with_extra_link_time_libraries, 189 extra_runfiles = extra_runfiles, 190 cc_toolchain = cc_toolchain, 191 feature_config = feature_config, 192 **kwargs 193 ) 194 195def create_executable_result_struct(*, extra_files_to_build, output_groups, extra_runfiles = None): 196 """Creates a `CreateExecutableResult` struct. 197 198 This is the return value type of the semantics create_executable function. 199 200 Args: 201 extra_files_to_build: depset of File; additional files that should be 202 included as default outputs. 203 output_groups: dict[str, depset[File]]; additional output groups that 204 should be returned. 205 extra_runfiles: A runfiles object of additional runfiles to include. 206 207 Returns: 208 A `CreateExecutableResult` struct. 209 """ 210 return struct( 211 extra_files_to_build = extra_files_to_build, 212 output_groups = output_groups, 213 extra_runfiles = extra_runfiles, 214 ) 215 216def union_attrs(*attr_dicts, allow_none = False): 217 """Helper for combining and building attriute dicts for rules. 218 219 Similar to dict.update, except: 220 * Duplicate keys raise an error if they aren't equal. This is to prevent 221 unintentionally replacing an attribute with a potentially incompatible 222 definition. 223 * None values are special: They mean the attribute is required, but the 224 value should be provided by another attribute dict (depending on the 225 `allow_none` arg). 226 Args: 227 *attr_dicts: The dicts to combine. 228 allow_none: bool, if True, then None values are allowed. If False, 229 then one of `attrs_dicts` must set a non-None value for keys 230 with a None value. 231 232 Returns: 233 dict of attributes. 234 """ 235 result = {} 236 missing = {} 237 for attr_dict in attr_dicts: 238 for attr_name, value in attr_dict.items(): 239 if value == None and not allow_none: 240 if attr_name not in result: 241 missing[attr_name] = None 242 else: 243 if attr_name in missing: 244 missing.pop(attr_name) 245 246 if attr_name not in result or result[attr_name] == None: 247 result[attr_name] = value 248 elif value != None and result[attr_name] != value: 249 fail("Duplicate attribute name: '{}': existing={}, new={}".format( 250 attr_name, 251 result[attr_name], 252 value, 253 )) 254 255 # Else, they're equal, so do nothing. This allows merging dicts 256 # that both define the same key from a common place. 257 258 if missing and not allow_none: 259 fail("Required attributes missing: " + csv(missing.keys())) 260 return result 261 262def csv(values): 263 """Convert a list of strings to comma separated value string.""" 264 return ", ".join(sorted(values)) 265 266def filter_to_py_srcs(srcs): 267 """Filters .py files from the given list of files""" 268 269 # TODO(b/203567235): Get the set of recognized extensions from 270 # elsewhere, as there may be others. e.g. Bazel recognizes .py3 271 # as a valid extension. 272 return [f for f in srcs if f.extension == "py"] 273 274def collect_imports(ctx, semantics): 275 return depset(direct = semantics.get_imports(ctx), transitive = [ 276 dep[PyInfo].imports 277 for dep in ctx.attr.deps 278 if PyInfo in dep 279 ] + [ 280 dep[BuiltinPyInfo].imports 281 for dep in ctx.attr.deps 282 if BuiltinPyInfo in dep 283 ]) 284 285def collect_runfiles(ctx, files): 286 """Collects the necessary files from the rule's context. 287 288 This presumes the ctx is for a py_binary, py_test, or py_library rule. 289 290 Args: 291 ctx: rule ctx 292 files: depset of extra files to include in the runfiles. 293 Returns: 294 runfiles necessary for the ctx's target. 295 """ 296 return ctx.runfiles( 297 transitive_files = files, 298 # This little arg carries a lot of weight, but because Starlark doesn't 299 # have a way to identify if a target is just a File, the equivalent 300 # logic can't be re-implemented in pure-Starlark. 301 # 302 # Under the hood, it calls the Java `Runfiles#addRunfiles(ctx, 303 # DEFAULT_RUNFILES)` method, which is the what the Java implementation 304 # of the Python rules originally did, and the details of how that method 305 # works have become relied on in various ways. Specifically, what it 306 # does is visit the srcs, deps, and data attributes in the following 307 # ways: 308 # 309 # For each target in the "data" attribute... 310 # If the target is a File, then add that file to the runfiles. 311 # Otherwise, add the target's **data runfiles** to the runfiles. 312 # 313 # Note that, contray to best practice, the default outputs of the 314 # targets in `data` are *not* added, nor are the default runfiles. 315 # 316 # This ends up being important for several reasons, some of which are 317 # specific to Google-internal features of the rules. 318 # * For Python executables, we have to use `data_runfiles` to avoid 319 # conflicts for the build data files. Such files have 320 # target-specific content, but uses a fixed location, so if a 321 # binary has another binary in `data`, and both try to specify a 322 # file for that file path, then a warning is printed and an 323 # arbitrary one will be used. 324 # * For rules with _entirely_ different sets of files in data runfiles 325 # vs default runfiles vs default outputs. For example, 326 # proto_library: documented behavior of this rule is that putting it 327 # in the `data` attribute will cause the transitive closure of 328 # `.proto` source files to be included. This set of sources is only 329 # in the `data_runfiles` (`default_runfiles` is empty). 330 # * For rules with a _subset_ of files in data runfiles. For example, 331 # a certain Google rule used for packaging arbitrary binaries will 332 # generate multiple versions of a binary (e.g. different archs, 333 # stripped vs un-stripped, etc) in its default outputs, but only 334 # one of them in the runfiles; this helps avoid large, unused 335 # binaries contributing to remote executor input limits. 336 # 337 # Unfortunately, the above behavior also results in surprising behavior 338 # in some cases. For example, simple custom rules that only return their 339 # files in their default outputs won't have their files included. Such 340 # cases must either return their files in runfiles, or use `filegroup()` 341 # which will do so for them. 342 # 343 # For each target in "srcs" and "deps"... 344 # Add the default runfiles of the target to the runfiles. While this 345 # is desirable behavior, it also ends up letting a `py_library` 346 # be put in `srcs` and still mostly work. 347 # TODO(b/224640180): Reject py_library et al rules in srcs. 348 collect_default = True, 349 ) 350 351def create_py_info(ctx, *, direct_sources, direct_pyc_files, imports): 352 """Create PyInfo provider. 353 354 Args: 355 ctx: rule ctx. 356 direct_sources: depset of Files; the direct, raw `.py` sources for the 357 target. This should only be Python source files. It should not 358 include pyc files. 359 direct_pyc_files: depset of Files; the direct `.pyc` sources for the target. 360 imports: depset of strings; the import path values to propagate. 361 362 Returns: 363 A tuple of the PyInfo instance and a depset of the 364 transitive sources collected from dependencies (the latter is only 365 necessary for deprecated extra actions support). 366 """ 367 uses_shared_libraries = False 368 has_py2_only_sources = ctx.attr.srcs_version in ("PY2", "PY2ONLY") 369 has_py3_only_sources = ctx.attr.srcs_version in ("PY3", "PY3ONLY") 370 transitive_sources_depsets = [] # list of depsets 371 transitive_sources_files = [] # list of Files 372 transitive_pyc_depsets = [direct_pyc_files] # list of depsets 373 for target in ctx.attr.deps: 374 # PyInfo may not be present e.g. cc_library rules. 375 if PyInfo in target or BuiltinPyInfo in target: 376 info = _get_py_info(target) 377 transitive_sources_depsets.append(info.transitive_sources) 378 uses_shared_libraries = uses_shared_libraries or info.uses_shared_libraries 379 has_py2_only_sources = has_py2_only_sources or info.has_py2_only_sources 380 has_py3_only_sources = has_py3_only_sources or info.has_py3_only_sources 381 382 # BuiltinPyInfo doesn't have this field. 383 if hasattr(info, "transitive_pyc_files"): 384 transitive_pyc_depsets.append(info.transitive_pyc_files) 385 else: 386 # TODO(b/228692666): Remove this once non-PyInfo targets are no 387 # longer supported in `deps`. 388 files = target.files.to_list() 389 for f in files: 390 if f.extension == "py": 391 transitive_sources_files.append(f) 392 uses_shared_libraries = ( 393 uses_shared_libraries or 394 cc_helper.is_valid_shared_library_artifact(f) 395 ) 396 deps_transitive_sources = depset( 397 direct = transitive_sources_files, 398 transitive = transitive_sources_depsets, 399 ) 400 401 # We only look at data to calculate uses_shared_libraries, if it's already 402 # true, then we don't need to waste time looping over it. 403 if not uses_shared_libraries: 404 # Similar to the above, except we only calculate uses_shared_libraries 405 for target in ctx.attr.data: 406 # TODO(b/234730058): Remove checking for PyInfo in data once depot 407 # cleaned up. 408 if PyInfo in target or BuiltinPyInfo in target: 409 info = _get_py_info(target) 410 uses_shared_libraries = info.uses_shared_libraries 411 else: 412 files = target.files.to_list() 413 for f in files: 414 uses_shared_libraries = cc_helper.is_valid_shared_library_artifact(f) 415 if uses_shared_libraries: 416 break 417 if uses_shared_libraries: 418 break 419 420 py_info_kwargs = dict( 421 transitive_sources = depset( 422 transitive = [deps_transitive_sources, direct_sources], 423 ), 424 imports = imports, 425 # NOTE: This isn't strictly correct, but with Python 2 gone, 426 # the srcs_version logic is largely defunct, so shouldn't matter in 427 # practice. 428 has_py2_only_sources = has_py2_only_sources, 429 has_py3_only_sources = has_py3_only_sources, 430 uses_shared_libraries = uses_shared_libraries, 431 direct_pyc_files = direct_pyc_files, 432 transitive_pyc_files = depset(transitive = transitive_pyc_depsets), 433 ) 434 435 # TODO(b/203567235): Set `uses_shared_libraries` field, though the Bazel 436 # docs indicate it's unused in Bazel and may be removed. 437 py_info = PyInfo(**py_info_kwargs) 438 439 # Remove args that BuiltinPyInfo doesn't support 440 py_info_kwargs.pop("direct_pyc_files") 441 py_info_kwargs.pop("transitive_pyc_files") 442 builtin_py_info = BuiltinPyInfo(**py_info_kwargs) 443 444 return py_info, deps_transitive_sources, builtin_py_info 445 446def _get_py_info(target): 447 return target[PyInfo] if PyInfo in target else target[BuiltinPyInfo] 448 449def create_instrumented_files_info(ctx): 450 return _coverage_common.instrumented_files_info( 451 ctx, 452 source_attributes = ["srcs"], 453 dependency_attributes = ["deps", "data"], 454 extensions = _PYTHON_SOURCE_EXTENSIONS, 455 ) 456 457def create_output_group_info(transitive_sources, extra_groups): 458 return OutputGroupInfo( 459 compilation_prerequisites_INTERNAL_ = transitive_sources, 460 compilation_outputs = transitive_sources, 461 **extra_groups 462 ) 463 464def maybe_add_test_execution_info(providers, ctx): 465 """Adds ExecutionInfo, if necessary for proper test execution. 466 467 Args: 468 providers: Mutable list of providers; may have ExecutionInfo 469 provider appended. 470 ctx: Rule ctx. 471 """ 472 473 # When built for Apple platforms, require the execution to be on a Mac. 474 # TODO(b/176993122): Remove when bazel automatically knows to run on darwin. 475 if target_platform_has_any_constraint(ctx, ctx.attr._apple_constraints): 476 providers.append(_testing.ExecutionInfo({"requires-darwin": ""})) 477 478_BOOL_TYPE = type(True) 479 480def is_bool(v): 481 return type(v) == _BOOL_TYPE 482 483def target_platform_has_any_constraint(ctx, constraints): 484 """Check if target platform has any of a list of constraints. 485 486 Args: 487 ctx: rule context. 488 constraints: label_list of constraints. 489 490 Returns: 491 True if target platform has at least one of the constraints. 492 """ 493 for constraint in constraints: 494 constraint_value = constraint[_platform_common.ConstraintValueInfo] 495 if ctx.target_platform_has_constraint(constraint_value): 496 return True 497 return False 498 499def check_native_allowed(ctx): 500 """Check if the usage of the native rule is allowed. 501 502 Args: 503 ctx: rule context to check 504 """ 505 if not ctx.fragments.py.disallow_native_rules: 506 return 507 508 if _MIGRATION_TAG in ctx.attr.tags: 509 return 510 511 # NOTE: The main repo name is empty in *labels*, but not in 512 # ctx.workspace_name 513 is_main_repo = not bool(ctx.label.workspace_name) 514 if is_main_repo: 515 check_label = ctx.label 516 else: 517 # package_group doesn't allow @repo syntax, so we work around that 518 # by prefixing external repos with a fake package path. This also 519 # makes it easy to enable or disable all external repos. 520 check_label = Label("@//__EXTERNAL_REPOS__/{workspace}/{package}".format( 521 workspace = ctx.label.workspace_name, 522 package = ctx.label.package, 523 )) 524 allowlist = ctx.attr._native_rules_allowlist 525 if allowlist: 526 allowed = ctx.attr._native_rules_allowlist[PackageSpecificationInfo].contains(check_label) 527 allowlist_help = str(allowlist.label).replace("@//", "//") 528 else: 529 allowed = False 530 allowlist_help = ("no allowlist specified; all disallowed; specify one " + 531 "with --python_native_rules_allowlist") 532 if not allowed: 533 if ctx.attr.generator_function: 534 generator = "{generator_function}(name={generator_name}) in {generator_location}".format( 535 generator_function = ctx.attr.generator_function, 536 generator_name = ctx.attr.generator_name, 537 generator_location = ctx.attr.generator_location, 538 ) 539 else: 540 generator = "No generator (called directly in BUILD file)" 541 542 msg = ( 543 "{target} not allowed to use native.{rule}\n" + 544 "Generated by: {generator}\n" + 545 "Allowlist: {allowlist}\n" + 546 "Migrate to using @rules_python, see {help_url}\n" + 547 "FIXCMD: {fix_cmd} --target={target} --rule={rule} " + 548 "--generator_name={generator_name} --location={generator_location}" 549 ) 550 fail(msg.format( 551 target = str(ctx.label).replace("@//", "//"), 552 rule = _py_builtins.get_rule_name(ctx), 553 generator = generator, 554 allowlist = allowlist_help, 555 generator_name = ctx.attr.generator_name, 556 generator_location = ctx.attr.generator_location, 557 help_url = NATIVE_RULES_MIGRATION_HELP_URL, 558 fix_cmd = NATIVE_RULES_MIGRATION_FIX_CMD, 559 )) 560