xref: /aosp_15_r20/external/bazelbuild-rules_python/python/private/common/common.bzl (revision 60517a1edbc8ecf509223e9af94a7adec7d736b8)
1# Copyright 2022 The Bazel Authors. All rights reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#    http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""Various things common to Bazel and Google rule implementations."""
15
16load("//python/private:reexports.bzl", "BuiltinPyInfo")
17load(":cc_helper.bzl", "cc_helper")
18load(":providers.bzl", "PyInfo")
19load(":py_internal.bzl", "py_internal")
20load(
21    ":semantics.bzl",
22    "NATIVE_RULES_MIGRATION_FIX_CMD",
23    "NATIVE_RULES_MIGRATION_HELP_URL",
24)
25
26_testing = testing
27_platform_common = platform_common
28_coverage_common = coverage_common
29_py_builtins = py_internal
30PackageSpecificationInfo = getattr(py_internal, "PackageSpecificationInfo", None)
31
32# Extensions without the dot
33_PYTHON_SOURCE_EXTENSIONS = ["py"]
34
35# NOTE: Must stay in sync with the value used in rules_python
36_MIGRATION_TAG = "__PYTHON_RULES_MIGRATION_DO_NOT_USE_WILL_BREAK__"
37
38def create_binary_semantics_struct(
39        *,
40        create_executable,
41        get_cc_details_for_binary,
42        get_central_uncachable_version_file,
43        get_coverage_deps,
44        get_debugger_deps,
45        get_extra_common_runfiles_for_binary,
46        get_extra_providers,
47        get_extra_write_build_data_env,
48        get_interpreter_path,
49        get_imports,
50        get_native_deps_dso_name,
51        get_native_deps_user_link_flags,
52        get_stamp_flag,
53        maybe_precompile,
54        should_build_native_deps_dso,
55        should_create_init_files,
56        should_include_build_data):
57    """Helper to ensure a semantics struct has all necessary fields.
58
59    Call this instead of a raw call to `struct(...)`; it'll help ensure all
60    the necessary functions are being correctly provided.
61
62    Args:
63        create_executable: Callable; creates a binary's executable output. See
64            py_executable.bzl#py_executable_base_impl for details.
65        get_cc_details_for_binary: Callable that returns a `CcDetails` struct; see
66            `create_cc_detail_struct`.
67        get_central_uncachable_version_file: Callable that returns an optional
68            Artifact; this artifact is special: it is never cached and is a copy
69            of `ctx.version_file`; see py_builtins.copy_without_caching
70        get_coverage_deps: Callable that returns a list of Targets for making
71            coverage work; only called if coverage is enabled.
72        get_debugger_deps: Callable that returns a list of Targets that provide
73            custom debugger support; only called for target-configuration.
74        get_extra_common_runfiles_for_binary: Callable that returns a runfiles
75            object of extra runfiles a binary should include.
76        get_extra_providers: Callable that returns extra providers; see
77            py_executable.bzl#_create_providers for details.
78        get_extra_write_build_data_env: Callable that returns a dict[str, str]
79            of additional environment variable to pass to build data generation.
80        get_interpreter_path: Callable that returns an optional string, which is
81            the path to the Python interpreter to use for running the binary.
82        get_imports: Callable that returns a list of the target's import
83            paths (from the `imports` attribute, so just the target's own import
84            path strings, not from dependencies).
85        get_native_deps_dso_name: Callable that returns a string, which is the
86            basename (with extension) of the native deps DSO library.
87        get_native_deps_user_link_flags: Callable that returns a list of strings,
88            which are any extra linker flags to pass onto the native deps DSO
89            linking action.
90        get_stamp_flag: Callable that returns bool of if the --stamp flag was
91            enabled or not.
92        maybe_precompile: Callable that may optional precompile the input `.py`
93            sources and returns the full set of desired outputs derived from
94            the source files (e.g., both py and pyc, only one of them, etc).
95        should_build_native_deps_dso: Callable that returns bool; True if
96            building a native deps DSO is supported, False if not.
97        should_create_init_files: Callable that returns bool; True if
98            `__init__.py` files should be generated, False if not.
99        should_include_build_data: Callable that returns bool; True if
100            build data should be generated, False if not.
101    Returns:
102        A "BinarySemantics" struct.
103    """
104    return struct(
105        # keep-sorted
106        create_executable = create_executable,
107        get_cc_details_for_binary = get_cc_details_for_binary,
108        get_central_uncachable_version_file = get_central_uncachable_version_file,
109        get_coverage_deps = get_coverage_deps,
110        get_debugger_deps = get_debugger_deps,
111        get_extra_common_runfiles_for_binary = get_extra_common_runfiles_for_binary,
112        get_extra_providers = get_extra_providers,
113        get_extra_write_build_data_env = get_extra_write_build_data_env,
114        get_imports = get_imports,
115        get_interpreter_path = get_interpreter_path,
116        get_native_deps_dso_name = get_native_deps_dso_name,
117        get_native_deps_user_link_flags = get_native_deps_user_link_flags,
118        get_stamp_flag = get_stamp_flag,
119        maybe_precompile = maybe_precompile,
120        should_build_native_deps_dso = should_build_native_deps_dso,
121        should_create_init_files = should_create_init_files,
122        should_include_build_data = should_include_build_data,
123    )
124
125def create_library_semantics_struct(
126        *,
127        get_cc_info_for_library,
128        get_imports,
129        maybe_precompile):
130    """Create a `LibrarySemantics` struct.
131
132    Call this instead of a raw call to `struct(...)`; it'll help ensure all
133    the necessary functions are being correctly provided.
134
135    Args:
136        get_cc_info_for_library: Callable that returns a CcInfo for the library;
137            see py_library_impl for arg details.
138        get_imports: Callable; see create_binary_semantics_struct.
139        maybe_precompile: Callable; see create_binary_semantics_struct.
140    Returns:
141        a `LibrarySemantics` struct.
142    """
143    return struct(
144        # keep sorted
145        get_cc_info_for_library = get_cc_info_for_library,
146        get_imports = get_imports,
147        maybe_precompile = maybe_precompile,
148    )
149
150def create_cc_details_struct(
151        *,
152        cc_info_for_propagating,
153        cc_info_for_self_link,
154        cc_info_with_extra_link_time_libraries,
155        extra_runfiles,
156        cc_toolchain,
157        feature_config,
158        **kwargs):
159    """Creates a CcDetails struct.
160
161    Args:
162        cc_info_for_propagating: CcInfo that is propagated out of the target
163            by returning it within a PyCcLinkParamsProvider object.
164        cc_info_for_self_link: CcInfo that is used when linking for the
165            binary (or its native deps DSO) itself. This may include extra
166            information that isn't propagating (e.g. a custom malloc)
167        cc_info_with_extra_link_time_libraries: CcInfo of extra link time
168            libraries that MUST come after `cc_info_for_self_link` (or possibly
169            always last; not entirely clear) when passed to
170            `link.linking_contexts`.
171        extra_runfiles: runfiles of extra files needed at runtime, usually as
172            part of `cc_info_with_extra_link_time_libraries`; should be added to
173            runfiles.
174        cc_toolchain: CcToolchain that should be used when building.
175        feature_config: struct from cc_configure_features(); see
176            //python/private/common:py_executable.bzl%cc_configure_features.
177        **kwargs: Additional keys/values to set in the returned struct. This is to
178            facilitate extensions with less patching. Any added fields should
179            pick names that are unlikely to collide if the CcDetails API has
180            additional fields added.
181
182    Returns:
183        A `CcDetails` struct.
184    """
185    return struct(
186        cc_info_for_propagating = cc_info_for_propagating,
187        cc_info_for_self_link = cc_info_for_self_link,
188        cc_info_with_extra_link_time_libraries = cc_info_with_extra_link_time_libraries,
189        extra_runfiles = extra_runfiles,
190        cc_toolchain = cc_toolchain,
191        feature_config = feature_config,
192        **kwargs
193    )
194
195def create_executable_result_struct(*, extra_files_to_build, output_groups, extra_runfiles = None):
196    """Creates a `CreateExecutableResult` struct.
197
198    This is the return value type of the semantics create_executable function.
199
200    Args:
201        extra_files_to_build: depset of File; additional files that should be
202            included as default outputs.
203        output_groups: dict[str, depset[File]]; additional output groups that
204            should be returned.
205        extra_runfiles: A runfiles object of additional runfiles to include.
206
207    Returns:
208        A `CreateExecutableResult` struct.
209    """
210    return struct(
211        extra_files_to_build = extra_files_to_build,
212        output_groups = output_groups,
213        extra_runfiles = extra_runfiles,
214    )
215
216def union_attrs(*attr_dicts, allow_none = False):
217    """Helper for combining and building attriute dicts for rules.
218
219    Similar to dict.update, except:
220      * Duplicate keys raise an error if they aren't equal. This is to prevent
221        unintentionally replacing an attribute with a potentially incompatible
222        definition.
223      * None values are special: They mean the attribute is required, but the
224        value should be provided by another attribute dict (depending on the
225        `allow_none` arg).
226    Args:
227        *attr_dicts: The dicts to combine.
228        allow_none: bool, if True, then None values are allowed. If False,
229            then one of `attrs_dicts` must set a non-None value for keys
230            with a None value.
231
232    Returns:
233        dict of attributes.
234    """
235    result = {}
236    missing = {}
237    for attr_dict in attr_dicts:
238        for attr_name, value in attr_dict.items():
239            if value == None and not allow_none:
240                if attr_name not in result:
241                    missing[attr_name] = None
242            else:
243                if attr_name in missing:
244                    missing.pop(attr_name)
245
246                if attr_name not in result or result[attr_name] == None:
247                    result[attr_name] = value
248                elif value != None and result[attr_name] != value:
249                    fail("Duplicate attribute name: '{}': existing={}, new={}".format(
250                        attr_name,
251                        result[attr_name],
252                        value,
253                    ))
254
255                # Else, they're equal, so do nothing. This allows merging dicts
256                # that both define the same key from a common place.
257
258    if missing and not allow_none:
259        fail("Required attributes missing: " + csv(missing.keys()))
260    return result
261
262def csv(values):
263    """Convert a list of strings to comma separated value string."""
264    return ", ".join(sorted(values))
265
266def filter_to_py_srcs(srcs):
267    """Filters .py files from the given list of files"""
268
269    # TODO(b/203567235): Get the set of recognized extensions from
270    # elsewhere, as there may be others. e.g. Bazel recognizes .py3
271    # as a valid extension.
272    return [f for f in srcs if f.extension == "py"]
273
274def collect_imports(ctx, semantics):
275    return depset(direct = semantics.get_imports(ctx), transitive = [
276        dep[PyInfo].imports
277        for dep in ctx.attr.deps
278        if PyInfo in dep
279    ] + [
280        dep[BuiltinPyInfo].imports
281        for dep in ctx.attr.deps
282        if BuiltinPyInfo in dep
283    ])
284
285def collect_runfiles(ctx, files):
286    """Collects the necessary files from the rule's context.
287
288    This presumes the ctx is for a py_binary, py_test, or py_library rule.
289
290    Args:
291        ctx: rule ctx
292        files: depset of extra files to include in the runfiles.
293    Returns:
294        runfiles necessary for the ctx's target.
295    """
296    return ctx.runfiles(
297        transitive_files = files,
298        # This little arg carries a lot of weight, but because Starlark doesn't
299        # have a way to identify if a target is just a File, the equivalent
300        # logic can't be re-implemented in pure-Starlark.
301        #
302        # Under the hood, it calls the Java `Runfiles#addRunfiles(ctx,
303        # DEFAULT_RUNFILES)` method, which is the what the Java implementation
304        # of the Python rules originally did, and the details of how that method
305        # works have become relied on in various ways. Specifically, what it
306        # does is visit the srcs, deps, and data attributes in the following
307        # ways:
308        #
309        # For each target in the "data" attribute...
310        #   If the target is a File, then add that file to the runfiles.
311        #   Otherwise, add the target's **data runfiles** to the runfiles.
312        #
313        # Note that, contray to best practice, the default outputs of the
314        # targets in `data` are *not* added, nor are the default runfiles.
315        #
316        # This ends up being important for several reasons, some of which are
317        # specific to Google-internal features of the rules.
318        #   * For Python executables, we have to use `data_runfiles` to avoid
319        #     conflicts for the build data files. Such files have
320        #     target-specific content, but uses a fixed location, so if a
321        #     binary has another binary in `data`, and both try to specify a
322        #     file for that file path, then a warning is printed and an
323        #     arbitrary one will be used.
324        #   * For rules with _entirely_ different sets of files in data runfiles
325        #     vs default runfiles vs default outputs. For example,
326        #     proto_library: documented behavior of this rule is that putting it
327        #     in the `data` attribute will cause the transitive closure of
328        #     `.proto` source files to be included. This set of sources is only
329        #     in the `data_runfiles` (`default_runfiles` is empty).
330        #   * For rules with a _subset_ of files in data runfiles. For example,
331        #     a certain Google rule used for packaging arbitrary binaries will
332        #     generate multiple versions of a binary (e.g. different archs,
333        #     stripped vs un-stripped, etc) in its default outputs, but only
334        #     one of them in the runfiles; this helps avoid large, unused
335        #     binaries contributing to remote executor input limits.
336        #
337        # Unfortunately, the above behavior also results in surprising behavior
338        # in some cases. For example, simple custom rules that only return their
339        # files in their default outputs won't have their files included. Such
340        # cases must either return their files in runfiles, or use `filegroup()`
341        # which will do so for them.
342        #
343        # For each target in "srcs" and "deps"...
344        #   Add the default runfiles of the target to the runfiles. While this
345        #   is desirable behavior, it also ends up letting a `py_library`
346        #   be put in `srcs` and still mostly work.
347        # TODO(b/224640180): Reject py_library et al rules in srcs.
348        collect_default = True,
349    )
350
351def create_py_info(ctx, *, direct_sources, direct_pyc_files, imports):
352    """Create PyInfo provider.
353
354    Args:
355        ctx: rule ctx.
356        direct_sources: depset of Files; the direct, raw `.py` sources for the
357            target. This should only be Python source files. It should not
358            include pyc files.
359        direct_pyc_files: depset of Files; the direct `.pyc` sources for the target.
360        imports: depset of strings; the import path values to propagate.
361
362    Returns:
363        A tuple of the PyInfo instance and a depset of the
364        transitive sources collected from dependencies (the latter is only
365        necessary for deprecated extra actions support).
366    """
367    uses_shared_libraries = False
368    has_py2_only_sources = ctx.attr.srcs_version in ("PY2", "PY2ONLY")
369    has_py3_only_sources = ctx.attr.srcs_version in ("PY3", "PY3ONLY")
370    transitive_sources_depsets = []  # list of depsets
371    transitive_sources_files = []  # list of Files
372    transitive_pyc_depsets = [direct_pyc_files]  # list of depsets
373    for target in ctx.attr.deps:
374        # PyInfo may not be present e.g. cc_library rules.
375        if PyInfo in target or BuiltinPyInfo in target:
376            info = _get_py_info(target)
377            transitive_sources_depsets.append(info.transitive_sources)
378            uses_shared_libraries = uses_shared_libraries or info.uses_shared_libraries
379            has_py2_only_sources = has_py2_only_sources or info.has_py2_only_sources
380            has_py3_only_sources = has_py3_only_sources or info.has_py3_only_sources
381
382            # BuiltinPyInfo doesn't have this field.
383            if hasattr(info, "transitive_pyc_files"):
384                transitive_pyc_depsets.append(info.transitive_pyc_files)
385        else:
386            # TODO(b/228692666): Remove this once non-PyInfo targets are no
387            # longer supported in `deps`.
388            files = target.files.to_list()
389            for f in files:
390                if f.extension == "py":
391                    transitive_sources_files.append(f)
392                uses_shared_libraries = (
393                    uses_shared_libraries or
394                    cc_helper.is_valid_shared_library_artifact(f)
395                )
396    deps_transitive_sources = depset(
397        direct = transitive_sources_files,
398        transitive = transitive_sources_depsets,
399    )
400
401    # We only look at data to calculate uses_shared_libraries, if it's already
402    # true, then we don't need to waste time looping over it.
403    if not uses_shared_libraries:
404        # Similar to the above, except we only calculate uses_shared_libraries
405        for target in ctx.attr.data:
406            # TODO(b/234730058): Remove checking for PyInfo in data once depot
407            # cleaned up.
408            if PyInfo in target or BuiltinPyInfo in target:
409                info = _get_py_info(target)
410                uses_shared_libraries = info.uses_shared_libraries
411            else:
412                files = target.files.to_list()
413                for f in files:
414                    uses_shared_libraries = cc_helper.is_valid_shared_library_artifact(f)
415                    if uses_shared_libraries:
416                        break
417            if uses_shared_libraries:
418                break
419
420    py_info_kwargs = dict(
421        transitive_sources = depset(
422            transitive = [deps_transitive_sources, direct_sources],
423        ),
424        imports = imports,
425        # NOTE: This isn't strictly correct, but with Python 2 gone,
426        # the srcs_version logic is largely defunct, so shouldn't matter in
427        # practice.
428        has_py2_only_sources = has_py2_only_sources,
429        has_py3_only_sources = has_py3_only_sources,
430        uses_shared_libraries = uses_shared_libraries,
431        direct_pyc_files = direct_pyc_files,
432        transitive_pyc_files = depset(transitive = transitive_pyc_depsets),
433    )
434
435    # TODO(b/203567235): Set `uses_shared_libraries` field, though the Bazel
436    # docs indicate it's unused in Bazel and may be removed.
437    py_info = PyInfo(**py_info_kwargs)
438
439    # Remove args that BuiltinPyInfo doesn't support
440    py_info_kwargs.pop("direct_pyc_files")
441    py_info_kwargs.pop("transitive_pyc_files")
442    builtin_py_info = BuiltinPyInfo(**py_info_kwargs)
443
444    return py_info, deps_transitive_sources, builtin_py_info
445
446def _get_py_info(target):
447    return target[PyInfo] if PyInfo in target else target[BuiltinPyInfo]
448
449def create_instrumented_files_info(ctx):
450    return _coverage_common.instrumented_files_info(
451        ctx,
452        source_attributes = ["srcs"],
453        dependency_attributes = ["deps", "data"],
454        extensions = _PYTHON_SOURCE_EXTENSIONS,
455    )
456
457def create_output_group_info(transitive_sources, extra_groups):
458    return OutputGroupInfo(
459        compilation_prerequisites_INTERNAL_ = transitive_sources,
460        compilation_outputs = transitive_sources,
461        **extra_groups
462    )
463
464def maybe_add_test_execution_info(providers, ctx):
465    """Adds ExecutionInfo, if necessary for proper test execution.
466
467    Args:
468        providers: Mutable list of providers; may have ExecutionInfo
469            provider appended.
470        ctx: Rule ctx.
471    """
472
473    # When built for Apple platforms, require the execution to be on a Mac.
474    # TODO(b/176993122): Remove when bazel automatically knows to run on darwin.
475    if target_platform_has_any_constraint(ctx, ctx.attr._apple_constraints):
476        providers.append(_testing.ExecutionInfo({"requires-darwin": ""}))
477
478_BOOL_TYPE = type(True)
479
480def is_bool(v):
481    return type(v) == _BOOL_TYPE
482
483def target_platform_has_any_constraint(ctx, constraints):
484    """Check if target platform has any of a list of constraints.
485
486    Args:
487      ctx: rule context.
488      constraints: label_list of constraints.
489
490    Returns:
491      True if target platform has at least one of the constraints.
492    """
493    for constraint in constraints:
494        constraint_value = constraint[_platform_common.ConstraintValueInfo]
495        if ctx.target_platform_has_constraint(constraint_value):
496            return True
497    return False
498
499def check_native_allowed(ctx):
500    """Check if the usage of the native rule is allowed.
501
502    Args:
503        ctx: rule context to check
504    """
505    if not ctx.fragments.py.disallow_native_rules:
506        return
507
508    if _MIGRATION_TAG in ctx.attr.tags:
509        return
510
511    # NOTE: The main repo name is empty in *labels*, but not in
512    # ctx.workspace_name
513    is_main_repo = not bool(ctx.label.workspace_name)
514    if is_main_repo:
515        check_label = ctx.label
516    else:
517        # package_group doesn't allow @repo syntax, so we work around that
518        # by prefixing external repos with a fake package path. This also
519        # makes it easy to enable or disable all external repos.
520        check_label = Label("@//__EXTERNAL_REPOS__/{workspace}/{package}".format(
521            workspace = ctx.label.workspace_name,
522            package = ctx.label.package,
523        ))
524    allowlist = ctx.attr._native_rules_allowlist
525    if allowlist:
526        allowed = ctx.attr._native_rules_allowlist[PackageSpecificationInfo].contains(check_label)
527        allowlist_help = str(allowlist.label).replace("@//", "//")
528    else:
529        allowed = False
530        allowlist_help = ("no allowlist specified; all disallowed; specify one " +
531                          "with --python_native_rules_allowlist")
532    if not allowed:
533        if ctx.attr.generator_function:
534            generator = "{generator_function}(name={generator_name}) in {generator_location}".format(
535                generator_function = ctx.attr.generator_function,
536                generator_name = ctx.attr.generator_name,
537                generator_location = ctx.attr.generator_location,
538            )
539        else:
540            generator = "No generator (called directly in BUILD file)"
541
542        msg = (
543            "{target} not allowed to use native.{rule}\n" +
544            "Generated by: {generator}\n" +
545            "Allowlist: {allowlist}\n" +
546            "Migrate to using @rules_python, see {help_url}\n" +
547            "FIXCMD: {fix_cmd} --target={target} --rule={rule} " +
548            "--generator_name={generator_name} --location={generator_location}"
549        )
550        fail(msg.format(
551            target = str(ctx.label).replace("@//", "//"),
552            rule = _py_builtins.get_rule_name(ctx),
553            generator = generator,
554            allowlist = allowlist_help,
555            generator_name = ctx.attr.generator_name,
556            generator_location = ctx.attr.generator_location,
557            help_url = NATIVE_RULES_MIGRATION_HELP_URL,
558            fix_cmd = NATIVE_RULES_MIGRATION_FIX_CMD,
559        ))
560