xref: /aosp_15_r20/external/grpc-grpc/tools/buildgen/extract_metadata_from_bazel_xml.py (revision cc02d7e222339f7a4f6ba5f422e6413f4bd931f2)
1#!/usr/bin/env python3
2# Copyright 2020 The gRPC Authors
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16# Script to extract build metadata from bazel BUILD.
17# To avoid having two sources of truth for the build metadata (build
18# targets, source files, header files etc.), this script analyzes the contents
19# of bazel BUILD files and generates a YAML file (currently called
20# build_autogenerated.yaml). The format and semantics of the generated YAML files
21# is chosen to match the format of a "build.yaml" file, which used
22# to be build the source of truth for gRPC build before bazel became
23# the primary build system.
24# A good basic overview of the "build.yaml" format is available here:
25# https://github.com/grpc/grpc/blob/master/templates/README.md. Note that
26# while useful as an overview, the doc does not act as formal spec
27# (formal spec does not exist in fact) and the doc can be incomplete,
28# inaccurate or slightly out of date.
29# TODO(jtattermusch): In the future we want to get rid of the legacy build.yaml
30# format entirely or simplify it to a point where it becomes self-explanatory
31# and doesn't need any detailed documentation.
32
33import collections
34import os
35import subprocess
36from typing import Any, Dict, Iterable, List, Optional
37import xml.etree.ElementTree as ET
38
39import build_cleaner
40
41BuildMetadata = Dict[str, Any]
42BuildDict = Dict[str, BuildMetadata]
43BuildYaml = Dict[str, Any]
44
45BuildMetadata = Dict[str, Any]
46BuildDict = Dict[str, BuildMetadata]
47BuildYaml = Dict[str, Any]
48
49
50class ExternalProtoLibrary:
51    """ExternalProtoLibrary is the struct about an external proto library.
52
53    Fields:
54    - destination(int): The relative path of this proto library should be.
55        Preferably, it should match the submodule path.
56    - proto_prefix(str): The prefix to remove in order to insure the proto import
57        is correct. For more info, see description of
58        https://github.com/grpc/grpc/pull/25272.
59    - urls(List[str]): Following 3 fields should be filled by build metadata from
60        Bazel.
61    - hash(str): The hash of the downloaded archive
62    - strip_prefix(str): The path to be stripped from the extracted directory, see
63        http_archive in Bazel.
64    """
65
66    def __init__(
67        self, destination, proto_prefix, urls=None, hash="", strip_prefix=""
68    ):
69        self.destination = destination
70        self.proto_prefix = proto_prefix
71        if urls is None:
72            self.urls = []
73        else:
74            self.urls = urls
75        self.hash = hash
76        self.strip_prefix = strip_prefix
77
78
79EXTERNAL_PROTO_LIBRARIES = {
80    "envoy_api": ExternalProtoLibrary(
81        destination="third_party/envoy-api",
82        proto_prefix="third_party/envoy-api/",
83    ),
84    "com_google_googleapis": ExternalProtoLibrary(
85        destination="third_party/googleapis",
86        proto_prefix="third_party/googleapis/",
87    ),
88    "com_github_cncf_xds": ExternalProtoLibrary(
89        destination="third_party/xds", proto_prefix="third_party/xds/"
90    ),
91    "opencensus_proto": ExternalProtoLibrary(
92        destination="third_party/opencensus-proto/src",
93        proto_prefix="third_party/opencensus-proto/src/",
94    ),
95}
96
97# We want to get a list of source files for some external libraries
98# to be able to include them in a non-bazel (e.g. make/cmake) build.
99# For that we need mapping from external repo name to a corresponding
100# path to a git submodule.
101EXTERNAL_SOURCE_PREFIXES = {
102    # TODO(veblush): Remove @utf8_range// item once protobuf is upgraded to 26.x
103    "@utf8_range//": "third_party/utf8_range",
104    "@com_googlesource_code_re2//": "third_party/re2",
105    "@com_google_googletest//": "third_party/googletest",
106    "@com_google_protobuf//upb": "third_party/upb/upb",
107    "@com_google_protobuf//third_party/utf8_range": "third_party/utf8_range",
108    "@zlib//": "third_party/zlib",
109}
110
111
112def _bazel_query_xml_tree(query: str) -> ET.Element:
113    """Get xml output of bazel query invocation, parsed as XML tree"""
114    output = subprocess.check_output(
115        ["tools/bazel", "query", "--noimplicit_deps", "--output", "xml", query]
116    )
117    return ET.fromstring(output)
118
119
120def _rule_dict_from_xml_node(rule_xml_node):
121    """Converts XML node representing a rule (obtained from "bazel query --output xml") to a dictionary that contains all the metadata we will need."""
122    result = {
123        "class": rule_xml_node.attrib.get("class"),
124        "name": rule_xml_node.attrib.get("name"),
125        "srcs": [],
126        "hdrs": [],
127        "textual_hdrs": [],
128        "deps": [],
129        "data": [],
130        "tags": [],
131        "args": [],
132        "generator_function": None,
133        "size": None,
134        "flaky": False,
135        "actual": None,  # the real target name for aliases
136    }
137    for child in rule_xml_node:
138        # all the metadata we want is stored under "list" tags
139        if child.tag == "list":
140            list_name = child.attrib["name"]
141            if list_name in [
142                "srcs",
143                "hdrs",
144                "textual_hdrs",
145                "deps",
146                "data",
147                "tags",
148                "args",
149            ]:
150                result[list_name] += [item.attrib["value"] for item in child]
151        if child.tag == "string":
152            string_name = child.attrib["name"]
153            if string_name in ["generator_function", "size"]:
154                result[string_name] = child.attrib["value"]
155        if child.tag == "boolean":
156            bool_name = child.attrib["name"]
157            if bool_name in ["flaky"]:
158                result[bool_name] = child.attrib["value"] == "true"
159        if child.tag == "label":
160            # extract actual name for alias and bind rules
161            label_name = child.attrib["name"]
162            if label_name in ["actual"]:
163                actual_name = child.attrib.get("value", None)
164                if actual_name:
165                    result["actual"] = actual_name
166                    # HACK: since we do a lot of transitive dependency scanning,
167                    # make it seem that the actual name is a dependency of the alias or bind rule
168                    # (aliases don't have dependencies themselves)
169                    result["deps"].append(actual_name)
170    return result
171
172
173def _extract_rules_from_bazel_xml(xml_tree):
174    """Extract bazel rules from an XML tree node obtained from "bazel query --output xml" command."""
175    result = {}
176    for child in xml_tree:
177        if child.tag == "rule":
178            rule_dict = _rule_dict_from_xml_node(child)
179            rule_clazz = rule_dict["class"]
180            rule_name = rule_dict["name"]
181            if rule_clazz in [
182                "cc_library",
183                "cc_binary",
184                "cc_test",
185                "cc_proto_library",
186                "cc_proto_gen_validate",
187                "proto_library",
188                "upb_c_proto_library",
189                "upb_proto_reflection_library",
190                "alias",
191                "bind",
192            ]:
193                if rule_name in result:
194                    raise Exception("Rule %s already present" % rule_name)
195                result[rule_name] = rule_dict
196    return result
197
198
199def _get_bazel_label(target_name: str) -> str:
200    if target_name.startswith("@"):
201        return target_name
202    if ":" in target_name:
203        return "//%s" % target_name
204    else:
205        return "//:%s" % target_name
206
207
208def _try_extract_source_file_path(label: str) -> str:
209    """Gets relative path to source file from bazel deps listing"""
210    if label.startswith("@"):
211        # This is an external source file. We are only interested in sources
212        # for some of the external libraries.
213        for lib_name, prefix in EXTERNAL_SOURCE_PREFIXES.items():
214            if label.startswith(lib_name):
215                return (
216                    label.replace("%s" % lib_name, prefix)
217                    .replace(":", "/")
218                    .replace("//", "/")
219                )
220
221        # This source file is external, and we need to translate the
222        # @REPO_NAME to a valid path prefix. At this stage, we need
223        # to check repo name, since the label/path mapping is not
224        # available in BUILD files.
225        for lib_name, external_proto_lib in EXTERNAL_PROTO_LIBRARIES.items():
226            if label.startswith("@" + lib_name + "//"):
227                return label.replace(
228                    "@%s//" % lib_name,
229                    external_proto_lib.proto_prefix,
230                ).replace(":", "/")
231
232        # No external library match found
233        return None
234    else:
235        if label.startswith("//"):
236            label = label[len("//") :]
237        # labels in form //:src/core/lib/surface/call_test_only.h
238        if label.startswith(":"):
239            label = label[len(":") :]
240        # labels in form //test/core/util:port.cc
241        return label.replace(":", "/")
242
243
244def _has_header_suffix(label: str) -> bool:
245    """Returns True if the label has a suffix that looks like a C/C++ include file"""
246    return (
247        label.endswith(".h")
248        or label.endswith(".h")
249        or label.endswith(".hpp")
250        or label.endswith(".inc")
251    )
252
253
254def _extract_public_headers(bazel_rule: BuildMetadata) -> List[str]:
255    """Gets list of public headers from a bazel rule"""
256    result = []
257    for dep in bazel_rule["hdrs"]:
258        if dep.startswith("//:include/") and _has_header_suffix(dep):
259            source_file_maybe = _try_extract_source_file_path(dep)
260            if source_file_maybe:
261                result.append(source_file_maybe)
262    return list(sorted(result))
263
264
265def _extract_nonpublic_headers(bazel_rule: BuildMetadata) -> List[str]:
266    """Gets list of non-public headers from a bazel rule"""
267    result = []
268    for dep in list(
269        bazel_rule["hdrs"] + bazel_rule["textual_hdrs"] + bazel_rule["srcs"]
270    ):
271        if not dep.startswith("//:include/") and _has_header_suffix(dep):
272            source_file_maybe = _try_extract_source_file_path(dep)
273            if source_file_maybe:
274                result.append(source_file_maybe)
275    return list(sorted(result))
276
277
278def _extract_sources(bazel_rule: BuildMetadata) -> List[str]:
279    """Gets list of source files from a bazel rule"""
280    result = []
281    for src in bazel_rule["srcs"]:
282        # Skip .proto files from the protobuf repo
283        if src.startswith("@com_google_protobuf//") and src.endswith(".proto"):
284            continue
285        if src.endswith(".cc") or src.endswith(".c") or src.endswith(".proto"):
286            source_file_maybe = _try_extract_source_file_path(src)
287            if source_file_maybe:
288                result.append(source_file_maybe)
289    return list(sorted(result))
290
291
292def _extract_deps(
293    bazel_rule: BuildMetadata, bazel_rules: BuildDict
294) -> List[str]:
295    """Gets list of deps from from a bazel rule"""
296    deps = set(bazel_rule["deps"])
297    for src in bazel_rule["srcs"]:
298        if (
299            not src.endswith(".cc")
300            and not src.endswith(".c")
301            and not src.endswith(".proto")
302        ):
303            if src in bazel_rules:
304                # This label doesn't point to a source file, but another Bazel
305                # target. This is required for :pkg_cc_proto_validate targets,
306                # and it's generally allowed by Bazel.
307                deps.add(src)
308    return list(sorted(list(deps)))
309
310
311def _create_target_from_bazel_rule(
312    target_name: str, bazel_rules: BuildDict
313) -> BuildMetadata:
314    """Create build.yaml-like target definition from bazel metadata"""
315    bazel_rule = bazel_rules[_get_bazel_label(target_name)]
316
317    # Create a template for our target from the bazel rule. Initially we only
318    # populate some "private" fields with the original info we got from bazel
319    # and only later we will populate the public fields (once we do some extra
320    # postprocessing).
321    result = {
322        "name": target_name,
323        "_PUBLIC_HEADERS_BAZEL": _extract_public_headers(bazel_rule),
324        "_HEADERS_BAZEL": _extract_nonpublic_headers(bazel_rule),
325        "_SRC_BAZEL": _extract_sources(bazel_rule),
326        "_DEPS_BAZEL": _extract_deps(bazel_rule, bazel_rules),
327        "public_headers": bazel_rule["_COLLAPSED_PUBLIC_HEADERS"],
328        "headers": bazel_rule["_COLLAPSED_HEADERS"],
329        "src": bazel_rule["_COLLAPSED_SRCS"],
330        "deps": bazel_rule["_COLLAPSED_DEPS"],
331    }
332    return result
333
334
335def _external_dep_name_from_bazel_dependency(bazel_dep: str) -> Optional[str]:
336    """Returns name of dependency if external bazel dependency is provided or None"""
337    if bazel_dep.startswith("@com_google_absl//"):
338        # special case for add dependency on one of the absl libraries (there is not just one absl library)
339        prefixlen = len("@com_google_absl//")
340        return bazel_dep[prefixlen:]
341    elif bazel_dep == "@com_github_google_benchmark//:benchmark":
342        return "benchmark"
343    elif bazel_dep == "@boringssl//:ssl":
344        return "libssl"
345    elif bazel_dep == "@com_github_cares_cares//:ares":
346        return "cares"
347    elif (
348        bazel_dep == "@com_google_protobuf//:protobuf"
349        or bazel_dep == "@com_google_protobuf//:protobuf_headers"
350    ):
351        return "protobuf"
352    elif bazel_dep == "@com_google_protobuf//:protoc_lib":
353        return "protoc"
354    elif bazel_dep == "@io_opentelemetry_cpp//api:api":
355        return "opentelemetry-cpp::api"
356    elif bazel_dep == "@io_opentelemetry_cpp//sdk/src/metrics:metrics":
357        return "opentelemetry-cpp::metrics"
358    else:
359        # Two options here:
360        # * either this is not external dependency at all (which is fine, we will treat it as internal library)
361        # * this is external dependency, but we don't want to make the dependency explicit in the build metadata
362        #   for other build systems.
363        return None
364
365
366def _compute_transitive_metadata(
367    rule_name: str, bazel_rules: Any, bazel_label_to_dep_name: Dict[str, str]
368) -> None:
369    """Computes the final build metadata for Bazel target with rule_name.
370
371    The dependencies that will appear on the deps list are:
372
373    * Public build targets including binaries and tests;
374    * External targets, like absl, re2.
375
376    All other intermediate dependencies will be merged, which means their
377    source file, headers, etc. will be collected into one build target. This
378    step of processing will greatly reduce the complexity of the generated
379    build specifications for other build systems, like CMake, Make, setuptools.
380
381    The final build metadata are:
382    * _TRANSITIVE_DEPS: all the transitive dependencies including intermediate
383                        targets;
384    * _COLLAPSED_DEPS:  dependencies that fits our requirement above, and it
385                        will remove duplicated items and produce the shortest
386                        possible dependency list in alphabetical order;
387    * _COLLAPSED_SRCS:  the merged source files;
388    * _COLLAPSED_PUBLIC_HEADERS: the merged public headers;
389    * _COLLAPSED_HEADERS: the merged non-public headers;
390    * _EXCLUDE_DEPS: intermediate targets to exclude when performing collapsing
391      of sources and dependencies.
392
393    For the collapsed_deps, the algorithm improved cases like:
394
395    The result in the past:
396        end2end_tests -> [grpc_test_util, grpc, gpr, address_sorting, upb]
397        grpc_test_util -> [grpc, gpr, address_sorting, upb, ...]
398        grpc -> [gpr, address_sorting, upb, ...]
399
400    The result of the algorithm:
401        end2end_tests -> [grpc_test_util]
402        grpc_test_util -> [grpc]
403        grpc -> [gpr, address_sorting, upb, ...]
404    """
405    bazel_rule = bazel_rules[rule_name]
406    direct_deps = _extract_deps(bazel_rule, bazel_rules)
407    transitive_deps = set()
408    collapsed_deps = set()
409    exclude_deps = set()
410    collapsed_srcs = set(_extract_sources(bazel_rule))
411    collapsed_public_headers = set(_extract_public_headers(bazel_rule))
412    collapsed_headers = set(_extract_nonpublic_headers(bazel_rule))
413
414    for dep in direct_deps:
415        external_dep_name_maybe = _external_dep_name_from_bazel_dependency(dep)
416
417        if dep in bazel_rules:
418            # Descend recursively, but no need to do that for external deps
419            if external_dep_name_maybe is None:
420                if "_PROCESSING_DONE" not in bazel_rules[dep]:
421                    # This item is not processed before, compute now
422                    _compute_transitive_metadata(
423                        dep, bazel_rules, bazel_label_to_dep_name
424                    )
425                transitive_deps.update(
426                    bazel_rules[dep].get("_TRANSITIVE_DEPS", [])
427                )
428                collapsed_deps.update(
429                    collapsed_deps, bazel_rules[dep].get("_COLLAPSED_DEPS", [])
430                )
431                exclude_deps.update(bazel_rules[dep].get("_EXCLUDE_DEPS", []))
432
433        # This dep is a public target, add it as a dependency
434        if dep in bazel_label_to_dep_name:
435            transitive_deps.update([bazel_label_to_dep_name[dep]])
436            collapsed_deps.update(
437                collapsed_deps, [bazel_label_to_dep_name[dep]]
438            )
439            # Add all the transitive deps of our every public dep to exclude
440            # list since we want to avoid building sources that are already
441            # built by our dependencies
442            exclude_deps.update(bazel_rules[dep]["_TRANSITIVE_DEPS"])
443            continue
444
445        # This dep is an external target, add it as a dependency
446        if external_dep_name_maybe is not None:
447            transitive_deps.update([external_dep_name_maybe])
448            collapsed_deps.update(collapsed_deps, [external_dep_name_maybe])
449            continue
450
451    # Direct dependencies are part of transitive dependencies
452    transitive_deps.update(direct_deps)
453
454    # Calculate transitive public deps (needed for collapsing sources)
455    transitive_public_deps = set(
456        [x for x in transitive_deps if x in bazel_label_to_dep_name]
457    )
458
459    # Remove intermediate targets that our public dependencies already depend
460    # on. This is the step that further shorten the deps list.
461    collapsed_deps = set([x for x in collapsed_deps if x not in exclude_deps])
462
463    # Compute the final source files and headers for this build target whose
464    # name is `rule_name` (input argument of this function).
465    #
466    # Imaging a public target PX has transitive deps [IA, IB, PY, IC, PZ]. PX,
467    # PY and PZ are public build targets. And IA, IB, IC are intermediate
468    # targets. In addition, PY depends on IC.
469    #
470    # Translate the condition into dependency graph:
471    #   PX -> [IA, IB, PY, IC, PZ]
472    #   PY -> [IC]
473    #   Public targets: [PX, PY, PZ]
474    #
475    # The collapsed dependencies of PX: [PY, PZ].
476    # The excluded dependencies of X: [PY, IC, PZ].
477    # (IC is excluded as a dependency of PX. It is already included in PY, hence
478    # it would be redundant to include it again.)
479    #
480    # Target PX should include source files and headers of [PX, IA, IB] as final
481    # build metadata.
482    for dep in transitive_deps:
483        if dep not in exclude_deps and dep not in transitive_public_deps:
484            if dep in bazel_rules:
485                collapsed_srcs.update(_extract_sources(bazel_rules[dep]))
486                collapsed_public_headers.update(
487                    _extract_public_headers(bazel_rules[dep])
488                )
489                collapsed_headers.update(
490                    _extract_nonpublic_headers(bazel_rules[dep])
491                )
492    # This item is a "visited" flag
493    bazel_rule["_PROCESSING_DONE"] = True
494    # Following items are described in the docstinrg.
495    bazel_rule["_TRANSITIVE_DEPS"] = list(sorted(transitive_deps))
496    bazel_rule["_COLLAPSED_DEPS"] = list(sorted(collapsed_deps))
497    bazel_rule["_COLLAPSED_SRCS"] = list(sorted(collapsed_srcs))
498    bazel_rule["_COLLAPSED_PUBLIC_HEADERS"] = list(
499        sorted(collapsed_public_headers)
500    )
501    bazel_rule["_COLLAPSED_HEADERS"] = list(sorted(collapsed_headers))
502    bazel_rule["_EXCLUDE_DEPS"] = list(sorted(exclude_deps))
503
504
505# TODO(jtattermusch): deduplicate with transitive_dependencies.py (which has a
506# slightly different logic)
507# TODO(jtattermusch): This is done to avoid introducing too many intermediate
508# libraries into the build.yaml-based builds (which might in cause issues
509# building language-specific artifacts) and also because the libraries in
510# build.yaml-based build are generally considered units of distributions (=
511# public libraries that are visible to the user and are installable), while in
512# bazel builds it is customary to define larger number of smaller
513# "sublibraries". The need for elision (and expansion) of intermediate libraries
514# can be re-evaluated in the future.
515def _populate_transitive_metadata(
516    bazel_rules: Any, public_dep_names: Iterable[str]
517) -> None:
518    """Add 'transitive_deps' field for each of the rules"""
519    # Create the map between Bazel label and public dependency name
520    bazel_label_to_dep_name = {}
521    for dep_name in public_dep_names:
522        bazel_label_to_dep_name[_get_bazel_label(dep_name)] = dep_name
523
524    # Make sure we reached all the Bazel rules
525    # TODO(lidiz) potentially we could only update a subset of rules
526    for rule_name in bazel_rules:
527        if "_PROCESSING_DONE" not in bazel_rules[rule_name]:
528            _compute_transitive_metadata(
529                rule_name, bazel_rules, bazel_label_to_dep_name
530            )
531
532
533def update_test_metadata_with_transitive_metadata(
534    all_extra_metadata: BuildDict, bazel_rules: BuildDict
535) -> None:
536    """Patches test build metadata with transitive metadata."""
537    for lib_name, lib_dict in list(all_extra_metadata.items()):
538        # Skip if it isn't not an test
539        if (
540            lib_dict.get("build") != "test"
541            and lib_dict.get("build") != "plugin_test"
542        ) or lib_dict.get("_TYPE") != "target":
543            continue
544
545        bazel_rule = bazel_rules[_get_bazel_label(lib_name)]
546
547        if "//external:benchmark" in bazel_rule["_TRANSITIVE_DEPS"]:
548            lib_dict["benchmark"] = True
549            lib_dict["defaults"] = "benchmark"
550
551        if "//external:gtest" in bazel_rule["_TRANSITIVE_DEPS"]:
552            # run_tests.py checks the "gtest" property to see if test should be run via gtest.
553            lib_dict["gtest"] = True
554            # TODO: this might be incorrect categorization of the test...
555            lib_dict["language"] = "c++"
556
557
558def _get_transitive_protos(bazel_rules, t):
559    que = [
560        t,
561    ]
562    visited = set()
563    ret = []
564    while que:
565        name = que.pop(0)
566        rule = bazel_rules.get(name, None)
567        if rule:
568            for dep in rule["deps"]:
569                if dep not in visited:
570                    visited.add(dep)
571                    que.append(dep)
572            for src in rule["srcs"]:
573                if src.endswith(".proto"):
574                    ret.append(src)
575    return list(set(ret))
576
577
578def _expand_upb_proto_library_rules(bazel_rules):
579    # Expand the .proto files from UPB proto library rules into the pre-generated
580    # upb files.
581    GEN_UPB_ROOT = "//:src/core/ext/upb-gen/"
582    GEN_UPBDEFS_ROOT = "//:src/core/ext/upbdefs-gen/"
583    EXTERNAL_LINKS = [
584        ("@com_google_protobuf//", "src/"),
585        ("@com_google_googleapis//", ""),
586        ("@com_github_cncf_xds//", ""),
587        ("@com_envoyproxy_protoc_gen_validate//", ""),
588        ("@envoy_api//", ""),
589        ("@opencensus_proto//", ""),
590    ]
591    for name, bazel_rule in bazel_rules.items():
592        gen_func = bazel_rule.get("generator_function", None)
593        if gen_func in (
594            "grpc_upb_proto_library",
595            "grpc_upb_proto_reflection_library",
596        ):
597            # get proto dependency
598            deps = bazel_rule["deps"]
599            if len(deps) != 1:
600                raise Exception(
601                    'upb rule "{0}" should have 1 proto dependency but has'
602                    ' "{1}"'.format(name, deps)
603                )
604            # deps is not properly fetched from bazel query for upb_c_proto_library target
605            # so add the upb dependency manually
606            bazel_rule["deps"] = [
607                "//external:upb_lib",
608                "//external:upb_lib_descriptor",
609                "//external:upb_generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me",
610            ]
611            # populate the upb_c_proto_library rule with pre-generated upb headers
612            # and sources using proto_rule
613            protos = _get_transitive_protos(bazel_rules, deps[0])
614            if len(protos) == 0:
615                raise Exception(
616                    'upb rule "{0}" should have at least one proto file.'.format(
617                        name
618                    )
619                )
620            srcs = []
621            hdrs = []
622            for proto_src in protos:
623                for external_link in EXTERNAL_LINKS:
624                    if proto_src.startswith(external_link[0]):
625                        prefix_to_strip = external_link[0] + external_link[1]
626                        if not proto_src.startswith(prefix_to_strip):
627                            raise Exception(
628                                'Source file "{0}" in upb rule {1} does not'
629                                ' have the expected prefix "{2}"'.format(
630                                    proto_src, name, prefix_to_strip
631                                )
632                            )
633                        proto_src = proto_src[len(prefix_to_strip) :]
634                        break
635                if proto_src.startswith("@"):
636                    raise Exception('"{0}" is unknown workspace.'.format(name))
637                proto_src_file = _try_extract_source_file_path(proto_src)
638                if not proto_src_file:
639                    raise Exception(
640                        'Failed to get source file for "{0}" in upb rule "{1}".'.format(
641                            proto_src, name
642                        )
643                    )
644
645                extensions = (
646                    # There is no longer a .upb.c extension.
647                    [".upb.h", ".upb_minitable.h", ".upb_minitable.c"]
648                    if gen_func == "grpc_upb_proto_library"
649                    else [".upbdefs.h", ".upbdefs.c"]
650                )
651                root = (
652                    GEN_UPB_ROOT
653                    if gen_func == "grpc_upb_proto_library"
654                    else GEN_UPBDEFS_ROOT
655                )
656                for ext in extensions:
657                    srcs.append(root + proto_src_file.replace(".proto", ext))
658                    hdrs.append(root + proto_src_file.replace(".proto", ext))
659            bazel_rule["srcs"] = srcs
660            bazel_rule["hdrs"] = hdrs
661
662
663def _patch_grpc_proto_library_rules(bazel_rules):
664    for name, bazel_rule in bazel_rules.items():
665        contains_proto = any(
666            src.endswith(".proto") for src in bazel_rule.get("srcs", [])
667        )
668        generator_func = bazel_rule.get("generator_function", None)
669
670        if (
671            name.startswith("//")
672            and contains_proto
673            and generator_func == "grpc_proto_library"
674        ):
675            # Add explicit protobuf dependency for internal c++ proto targets.
676            bazel_rule["deps"].append("//external:protobuf")
677
678
679def _patch_descriptor_upb_proto_library(bazel_rules):
680    # The upb's descriptor_upb_proto library doesn't reference the generated descriptor.proto
681    # sources explicitly, so we add them manually.
682    bazel_rule = bazel_rules.get(
683        "@com_google_protobuf//upb:descriptor_upb_proto", None
684    )
685    if bazel_rule:
686        bazel_rule["srcs"].append(
687            ":src/core/ext/upb-gen/google/protobuf/descriptor.upb_minitable.c"
688        )
689        bazel_rule["hdrs"].append(
690            ":src/core/ext/upb-gen/google/protobuf/descriptor.upb.h"
691        )
692
693
694def _generate_build_metadata(
695    build_extra_metadata: BuildDict, bazel_rules: BuildDict
696) -> BuildDict:
697    """Generate build metadata in build.yaml-like format bazel build metadata and build.yaml-specific "extra metadata"."""
698    lib_names = list(build_extra_metadata.keys())
699    result = {}
700
701    for lib_name in lib_names:
702        lib_dict = _create_target_from_bazel_rule(lib_name, bazel_rules)
703
704        # populate extra properties from the build.yaml-specific "extra metadata"
705        lib_dict.update(build_extra_metadata.get(lib_name, {}))
706
707        # store to results
708        result[lib_name] = lib_dict
709
710    # Rename targets marked with "_RENAME" extra metadata.
711    # This is mostly a cosmetic change to ensure that we end up with build.yaml target
712    # names we're used to from the past (and also to avoid too long target names).
713    # The rename step needs to be made after we're done with most of processing logic
714    # otherwise the already-renamed libraries will have different names than expected
715    for lib_name in lib_names:
716        to_name = build_extra_metadata.get(lib_name, {}).get("_RENAME", None)
717        if to_name:
718            # store lib under the new name and also change its 'name' property
719            if to_name in result:
720                raise Exception(
721                    "Cannot rename target "
722                    + str(lib_name)
723                    + ", "
724                    + str(to_name)
725                    + " already exists."
726                )
727            lib_dict = result.pop(lib_name)
728            lib_dict["name"] = to_name
729            result[to_name] = lib_dict
730
731            # dep names need to be updated as well
732            for lib_dict_to_update in list(result.values()):
733                lib_dict_to_update["deps"] = list(
734                    [
735                        to_name if dep == lib_name else dep
736                        for dep in lib_dict_to_update["deps"]
737                    ]
738                )
739
740    return result
741
742
743def _convert_to_build_yaml_like(lib_dict: BuildMetadata) -> BuildYaml:
744    lib_names = [
745        lib_name
746        for lib_name in list(lib_dict.keys())
747        if lib_dict[lib_name].get("_TYPE", "library") == "library"
748    ]
749    target_names = [
750        lib_name
751        for lib_name in list(lib_dict.keys())
752        if lib_dict[lib_name].get("_TYPE", "library") == "target"
753    ]
754    test_names = [
755        lib_name
756        for lib_name in list(lib_dict.keys())
757        if lib_dict[lib_name].get("_TYPE", "library") == "test"
758        or lib_dict[lib_name].get("_TYPE", "library") == "plugin_test"
759    ]
760
761    # list libraries and targets in predefined order
762    lib_list = [lib_dict[lib_name] for lib_name in lib_names]
763    target_list = [lib_dict[lib_name] for lib_name in target_names]
764    test_list = [lib_dict[lib_name] for lib_name in test_names]
765
766    # get rid of temporary private fields prefixed with "_" and some other useless fields
767    for lib in lib_list:
768        for field_to_remove in [
769            k for k in list(lib.keys()) if k.startswith("_")
770        ]:
771            lib.pop(field_to_remove, None)
772    for target in target_list:
773        for field_to_remove in [
774            k for k in list(target.keys()) if k.startswith("_")
775        ]:
776            target.pop(field_to_remove, None)
777        target.pop(
778            "public_headers", None
779        )  # public headers make no sense for targets
780    for test in test_list:
781        for field_to_remove in [
782            k for k in list(test.keys()) if k.startswith("_")
783        ]:
784            test.pop(field_to_remove, None)
785        test.pop(
786            "public_headers", None
787        )  # public headers make no sense for tests
788
789    build_yaml_like = {
790        "libs": lib_list,
791        "filegroups": [],
792        "targets": target_list,
793        "tests": test_list,
794    }
795    return build_yaml_like
796
797
798def _extract_cc_tests(bazel_rules: BuildDict) -> List[str]:
799    """Gets list of cc_test tests from bazel rules"""
800    result = []
801    for bazel_rule in list(bazel_rules.values()):
802        if bazel_rule["class"] == "cc_test":
803            test_name = bazel_rule["name"]
804            if test_name.startswith("//"):
805                prefixlen = len("//")
806                result.append(test_name[prefixlen:])
807    return list(sorted(result))
808
809
810def _exclude_unwanted_cc_tests(tests: List[str]) -> List[str]:
811    """Filters out bazel tests that we don't want to run with other build systems or we cannot build them reasonably"""
812
813    # most qps tests are autogenerated, we are fine without them
814    tests = [test for test in tests if not test.startswith("test/cpp/qps:")]
815    # microbenchmarks aren't needed for checking correctness
816    tests = [
817        test
818        for test in tests
819        if not test.startswith("test/cpp/microbenchmarks:")
820    ]
821    tests = [
822        test
823        for test in tests
824        if not test.startswith("test/core/promise/benchmark:")
825    ]
826
827    # we have trouble with census dependency outside of bazel
828    tests = [
829        test
830        for test in tests
831        if not test.startswith("test/cpp/ext/filters/census:")
832        and not test.startswith("test/core/xds:xds_channel_stack_modifier_test")
833        and not test.startswith("test/cpp/ext/gcp:")
834        and not test.startswith("test/cpp/ext/filters/logging:")
835        and not test.startswith("test/cpp/interop:observability_interop")
836    ]
837
838    # we have not added otel dependency outside of bazel
839    tests = [
840        test
841        for test in tests
842        if not test.startswith("test/cpp/ext/csm:")
843        and not test.startswith("test/cpp/interop:xds_interop")
844    ]
845
846    # missing opencensus/stats/stats.h
847    tests = [
848        test
849        for test in tests
850        if not test.startswith(
851            "test/cpp/end2end:server_load_reporting_end2end_test"
852        )
853    ]
854    tests = [
855        test
856        for test in tests
857        if not test.startswith(
858            "test/cpp/server/load_reporter:lb_load_reporter_test"
859        )
860    ]
861
862    # The test uses --running_under_bazel cmdline argument
863    # To avoid the trouble needing to adjust it, we just skip the test
864    tests = [
865        test
866        for test in tests
867        if not test.startswith(
868            "test/cpp/naming:resolver_component_tests_runner_invoker"
869        )
870    ]
871
872    # the test requires 'client_crash_test_server' to be built
873    tests = [
874        test
875        for test in tests
876        if not test.startswith("test/cpp/end2end:time_change_test")
877    ]
878
879    # the test requires 'client_crash_test_server' to be built
880    tests = [
881        test
882        for test in tests
883        if not test.startswith("test/cpp/end2end:client_crash_test")
884    ]
885
886    # the test requires 'server_crash_test_client' to be built
887    tests = [
888        test
889        for test in tests
890        if not test.startswith("test/cpp/end2end:server_crash_test")
891    ]
892
893    # test never existed under build.yaml and it fails -> skip it
894    tests = [
895        test
896        for test in tests
897        if not test.startswith("test/core/tsi:ssl_session_cache_test")
898    ]
899
900    # the binary of this test does not get built with cmake
901    tests = [
902        test
903        for test in tests
904        if not test.startswith("test/cpp/util:channelz_sampler_test")
905    ]
906
907    # chaotic good not supported outside bazel
908    tests = [
909        test
910        for test in tests
911        if not test.startswith("test/core/transport/chaotic_good")
912    ]
913
914    # we don't need to generate fuzzers outside of bazel
915    tests = [test for test in tests if not test.endswith("_fuzzer")]
916    tests = [test for test in tests if "_fuzzer_" not in test]
917
918    return tests
919
920
921def _generate_build_extra_metadata_for_tests(
922    tests: List[str], bazel_rules: BuildDict
923) -> BuildDict:
924    """For given tests, generate the "extra metadata" that we need for our "build.yaml"-like output. The extra metadata is generated from the bazel rule metadata by using a bunch of heuristics."""
925    test_metadata = {}
926    for test in tests:
927        test_dict = {"build": "test", "_TYPE": "target"}
928
929        bazel_rule = bazel_rules[_get_bazel_label(test)]
930
931        bazel_tags = bazel_rule["tags"]
932        if "manual" in bazel_tags:
933            # don't run the tests marked as "manual"
934            test_dict["run"] = False
935
936        if bazel_rule["flaky"]:
937            # don't run tests that are marked as "flaky" under bazel
938            # because that would only add noise for the run_tests.py tests
939            # and seeing more failures for tests that we already know are flaky
940            # doesn't really help anything
941            test_dict["run"] = False
942
943        if "no_uses_polling" in bazel_tags:
944            test_dict["uses_polling"] = False
945
946        if "grpc_fuzzer" == bazel_rule["generator_function"]:
947            # currently we hand-list fuzzers instead of generating them automatically
948            # because there's no way to obtain maxlen property from bazel BUILD file.
949            print(("skipping fuzzer " + test))
950            continue
951
952        if "bazel_only" in bazel_tags:
953            continue
954
955        # if any tags that restrict platform compatibility are present,
956        # generate the "platforms" field accordingly
957        # TODO(jtattermusch): there is also a "no_linux" tag, but we cannot take
958        # it into account as it is applied by grpc_cc_test when poller expansion
959        # is made (for tests where uses_polling=True). So for now, we just
960        # assume all tests are compatible with linux and ignore the "no_linux" tag
961        # completely.
962        known_platform_tags = set(["no_windows", "no_mac"])
963        if set(bazel_tags).intersection(known_platform_tags):
964            platforms = []
965            # assume all tests are compatible with linux and posix
966            platforms.append("linux")
967            platforms.append(
968                "posix"
969            )  # there is no posix-specific tag in bazel BUILD
970            if "no_mac" not in bazel_tags:
971                platforms.append("mac")
972            if "no_windows" not in bazel_tags:
973                platforms.append("windows")
974            test_dict["platforms"] = platforms
975
976        cmdline_args = bazel_rule["args"]
977        if cmdline_args:
978            test_dict["args"] = list(cmdline_args)
979
980        if test.startswith("test/cpp"):
981            test_dict["language"] = "c++"
982
983        elif test.startswith("test/core"):
984            test_dict["language"] = "c"
985        else:
986            raise Exception("wrong test" + test)
987
988        # short test name without the path.
989        # There can be name collisions, but we will resolve them later
990        simple_test_name = os.path.basename(_try_extract_source_file_path(test))
991        test_dict["_RENAME"] = simple_test_name
992
993        test_metadata[test] = test_dict
994
995    # detect duplicate test names
996    tests_by_simple_name = {}
997    for test_name, test_dict in list(test_metadata.items()):
998        simple_test_name = test_dict["_RENAME"]
999        if simple_test_name not in tests_by_simple_name:
1000            tests_by_simple_name[simple_test_name] = []
1001        tests_by_simple_name[simple_test_name].append(test_name)
1002
1003    # choose alternative names for tests with a name collision
1004    for collision_list in list(tests_by_simple_name.values()):
1005        if len(collision_list) > 1:
1006            for test_name in collision_list:
1007                long_name = test_name.replace("/", "_").replace(":", "_")
1008                print(
1009                    'short name of "%s" collides with another test, renaming'
1010                    " to %s" % (test_name, long_name)
1011                )
1012                test_metadata[test_name]["_RENAME"] = long_name
1013    print(test_metadata["test/cpp/ext/otel:otel_plugin_test"])
1014    return test_metadata
1015
1016
1017def _parse_http_archives(xml_tree: ET.Element) -> "List[ExternalProtoLibrary]":
1018    """Parse Bazel http_archive rule into ExternalProtoLibrary objects."""
1019    result = []
1020    for xml_http_archive in xml_tree:
1021        if (
1022            xml_http_archive.tag != "rule"
1023            or xml_http_archive.attrib["class"] != "http_archive"
1024        ):
1025            continue
1026        # A distilled Python representation of Bazel http_archive
1027        http_archive = dict()
1028        for xml_node in xml_http_archive:
1029            if xml_node.attrib["name"] == "name":
1030                http_archive["name"] = xml_node.attrib["value"]
1031            if xml_node.attrib["name"] == "urls":
1032                http_archive["urls"] = []
1033                for url_node in xml_node:
1034                    http_archive["urls"].append(url_node.attrib["value"])
1035            if xml_node.attrib["name"] == "url":
1036                http_archive["urls"] = [xml_node.attrib["value"]]
1037            if xml_node.attrib["name"] == "sha256":
1038                http_archive["hash"] = xml_node.attrib["value"]
1039            if xml_node.attrib["name"] == "strip_prefix":
1040                http_archive["strip_prefix"] = xml_node.attrib["value"]
1041        if http_archive["name"] not in EXTERNAL_PROTO_LIBRARIES:
1042            # If this http archive is not one of the external proto libraries,
1043            # we don't want to include it as a CMake target
1044            continue
1045        lib = EXTERNAL_PROTO_LIBRARIES[http_archive["name"]]
1046        lib.urls = http_archive["urls"]
1047        lib.hash = http_archive["hash"]
1048        lib.strip_prefix = http_archive["strip_prefix"]
1049        result.append(lib)
1050    return result
1051
1052
1053def _generate_external_proto_libraries() -> List[Dict[str, Any]]:
1054    """Generates the build metadata for external proto libraries"""
1055    xml_tree = _bazel_query_xml_tree("kind(http_archive, //external:*)")
1056    libraries = _parse_http_archives(xml_tree)
1057    libraries.sort(key=lambda x: x.destination)
1058    return list(map(lambda x: x.__dict__, libraries))
1059
1060
1061def _detect_and_print_issues(build_yaml_like: BuildYaml) -> None:
1062    """Try detecting some unusual situations and warn about them."""
1063    for tgt in build_yaml_like["targets"]:
1064        if tgt["build"] == "test":
1065            for src in tgt["src"]:
1066                if src.startswith("src/") and not src.endswith(".proto"):
1067                    print(
1068                        (
1069                            'source file from under "src/" tree used in test '
1070                            + tgt["name"]
1071                            + ": "
1072                            + src
1073                        )
1074                    )
1075
1076
1077# extra metadata that will be used to construct build.yaml
1078# there are mostly extra properties that we weren't able to obtain from the bazel build
1079# _TYPE: whether this is library, target or test
1080# _RENAME: whether this target should be renamed to a different name (to match expectations of make and cmake builds)
1081_BUILD_EXTRA_METADATA = {
1082    "third_party/address_sorting:address_sorting": {
1083        "language": "c",
1084        "build": "all",
1085        "_RENAME": "address_sorting",
1086    },
1087    "@com_google_protobuf//upb:base": {
1088        "language": "c",
1089        "build": "all",
1090        "_RENAME": "upb_base_lib",
1091    },
1092    "@com_google_protobuf//upb:mem": {
1093        "language": "c",
1094        "build": "all",
1095        "_RENAME": "upb_mem_lib",
1096    },
1097    "@com_google_protobuf//upb:message": {
1098        "language": "c",
1099        "build": "all",
1100        "_RENAME": "upb_message_lib",
1101    },
1102    "@com_google_protobuf//upb/json:json": {
1103        "language": "c",
1104        "build": "all",
1105        "_RENAME": "upb_json_lib",
1106    },
1107    "@com_google_protobuf//upb/text:text": {
1108        "language": "c",
1109        "build": "all",
1110        "_RENAME": "upb_textformat_lib",
1111    },
1112    "@com_google_protobuf//third_party/utf8_range:utf8_range": {
1113        "language": "c",
1114        "build": "all",
1115        # rename to utf8_range_lib is necessary for now to avoid clash with utf8_range target in protobuf's cmake
1116        "_RENAME": "utf8_range_lib",
1117    },
1118    "@com_googlesource_code_re2//:re2": {
1119        "language": "c",
1120        "build": "all",
1121        "_RENAME": "re2",
1122    },
1123    "@com_google_googletest//:gtest": {
1124        "language": "c",
1125        "build": "private",
1126        "_RENAME": "gtest",
1127    },
1128    "@zlib//:zlib": {
1129        "language": "c",
1130        "zlib": True,
1131        "build": "private",
1132        "defaults": "zlib",
1133        "_RENAME": "z",
1134    },
1135    "gpr": {
1136        "language": "c",
1137        "build": "all",
1138    },
1139    "grpc": {
1140        "language": "c",
1141        "build": "all",
1142        "baselib": True,
1143        "generate_plugin_registry": True,
1144    },
1145    "grpc++": {
1146        "language": "c++",
1147        "build": "all",
1148        "baselib": True,
1149    },
1150    "grpc++_alts": {"language": "c++", "build": "all", "baselib": True},
1151    "grpc++_error_details": {"language": "c++", "build": "all"},
1152    "grpc++_reflection": {"language": "c++", "build": "all"},
1153    "grpc_authorization_provider": {"language": "c++", "build": "all"},
1154    "grpc++_unsecure": {
1155        "language": "c++",
1156        "build": "all",
1157        "baselib": True,
1158    },
1159    "grpc_unsecure": {
1160        "language": "c",
1161        "build": "all",
1162        "baselib": True,
1163        "generate_plugin_registry": True,
1164    },
1165    "grpcpp_channelz": {"language": "c++", "build": "all"},
1166    "grpcpp_otel_plugin": {
1167        "language": "c++",
1168        "build": "plugin",
1169    },
1170    "grpc++_test": {
1171        "language": "c++",
1172        "build": "private",
1173    },
1174    "src/compiler:grpc_plugin_support": {
1175        "language": "c++",
1176        "build": "protoc",
1177        "_RENAME": "grpc_plugin_support",
1178    },
1179    "src/compiler:grpc_cpp_plugin": {
1180        "language": "c++",
1181        "build": "protoc",
1182        "_TYPE": "target",
1183        "_RENAME": "grpc_cpp_plugin",
1184    },
1185    "src/compiler:grpc_csharp_plugin": {
1186        "language": "c++",
1187        "build": "protoc",
1188        "_TYPE": "target",
1189        "_RENAME": "grpc_csharp_plugin",
1190    },
1191    "src/compiler:grpc_node_plugin": {
1192        "language": "c++",
1193        "build": "protoc",
1194        "_TYPE": "target",
1195        "_RENAME": "grpc_node_plugin",
1196    },
1197    "src/compiler:grpc_objective_c_plugin": {
1198        "language": "c++",
1199        "build": "protoc",
1200        "_TYPE": "target",
1201        "_RENAME": "grpc_objective_c_plugin",
1202    },
1203    "src/compiler:grpc_php_plugin": {
1204        "language": "c++",
1205        "build": "protoc",
1206        "_TYPE": "target",
1207        "_RENAME": "grpc_php_plugin",
1208    },
1209    "src/compiler:grpc_python_plugin": {
1210        "language": "c++",
1211        "build": "protoc",
1212        "_TYPE": "target",
1213        "_RENAME": "grpc_python_plugin",
1214    },
1215    "src/compiler:grpc_ruby_plugin": {
1216        "language": "c++",
1217        "build": "protoc",
1218        "_TYPE": "target",
1219        "_RENAME": "grpc_ruby_plugin",
1220    },
1221    # TODO(jtattermusch): consider adding grpc++_core_stats
1222    # test support libraries
1223    "test/core/util:grpc_test_util": {
1224        "language": "c",
1225        "build": "private",
1226        "_RENAME": "grpc_test_util",
1227    },
1228    "test/core/util:grpc_test_util_unsecure": {
1229        "language": "c",
1230        "build": "private",
1231        "_RENAME": "grpc_test_util_unsecure",
1232    },
1233    # TODO(jtattermusch): consider adding grpc++_test_util_unsecure - it doesn't seem to be used by bazel build (don't forget to set secure: False)
1234    "test/cpp/util:test_config": {
1235        "language": "c++",
1236        "build": "private",
1237        "_RENAME": "grpc++_test_config",
1238    },
1239    "test/cpp/util:test_util": {
1240        "language": "c++",
1241        "build": "private",
1242        "_RENAME": "grpc++_test_util",
1243    },
1244    # benchmark support libraries
1245    "test/cpp/microbenchmarks:helpers": {
1246        "language": "c++",
1247        "build": "test",
1248        "defaults": "benchmark",
1249        "_RENAME": "benchmark_helpers",
1250    },
1251    "test/cpp/interop:interop_client": {
1252        "language": "c++",
1253        "build": "test",
1254        "run": False,
1255        "_TYPE": "target",
1256        "_RENAME": "interop_client",
1257    },
1258    "test/cpp/interop:interop_server": {
1259        "language": "c++",
1260        "build": "test",
1261        "run": False,
1262        "_TYPE": "target",
1263        "_RENAME": "interop_server",
1264    },
1265    # TODO(stanleycheung): re-enable this after cmake support for otel is added
1266    # "test/cpp/interop:xds_interop_client": {
1267    #     "language": "c++",
1268    #     "build": "test",
1269    #     "run": False,
1270    #     "_TYPE": "target",
1271    #     "_RENAME": "xds_interop_client",
1272    # },
1273    # "test/cpp/interop:xds_interop_server": {
1274    #     "language": "c++",
1275    #     "build": "test",
1276    #     "run": False,
1277    #     "_TYPE": "target",
1278    #     "_RENAME": "xds_interop_server",
1279    # },
1280    "test/cpp/interop:http2_client": {
1281        "language": "c++",
1282        "build": "test",
1283        "run": False,
1284        "_TYPE": "target",
1285        "_RENAME": "http2_client",
1286    },
1287    "test/cpp/qps:qps_json_driver": {
1288        "language": "c++",
1289        "build": "test",
1290        "run": False,
1291        "_TYPE": "target",
1292        "_RENAME": "qps_json_driver",
1293    },
1294    "test/cpp/qps:qps_worker": {
1295        "language": "c++",
1296        "build": "test",
1297        "run": False,
1298        "_TYPE": "target",
1299        "_RENAME": "qps_worker",
1300    },
1301    "test/cpp/util:grpc_cli": {
1302        "language": "c++",
1303        "build": "test",
1304        "run": False,
1305        "_TYPE": "target",
1306        "_RENAME": "grpc_cli",
1307    },
1308    "test/cpp/ext/otel:otel_plugin_test": {
1309        "language": "c++",
1310        "build": "plugin_test",
1311        "_TYPE": "target",
1312        "plugin_option": "gRPC_BUILD_GRPCPP_OTEL_PLUGIN",
1313        "_RENAME": "otel_plugin_test",
1314    },
1315    # TODO(jtattermusch): create_jwt and verify_jwt breaks distribtests because it depends on grpc_test_utils and thus requires tests to be built
1316    # For now it's ok to disable them as these binaries aren't very useful anyway.
1317    # 'test/core/security:create_jwt': { 'language': 'c', 'build': 'tool', '_TYPE': 'target', '_RENAME': 'grpc_create_jwt' },
1318    # 'test/core/security:verify_jwt': { 'language': 'c', 'build': 'tool', '_TYPE': 'target', '_RENAME': 'grpc_verify_jwt' },
1319    # TODO(jtattermusch): add remaining tools such as grpc_print_google_default_creds_token (they are not used by bazel build)
1320    # TODO(jtattermusch): these fuzzers had no build.yaml equivalent
1321    # test/core/compression:message_compress_fuzzer
1322    # test/core/compression:message_decompress_fuzzer
1323    # test/core/compression:stream_compression_fuzzer
1324    # test/core/compression:stream_decompression_fuzzer
1325    # test/core/slice:b64_decode_fuzzer
1326    # test/core/slice:b64_encode_fuzzer
1327}
1328
1329# We need a complete picture of all the targets and dependencies we're interested in
1330# so we run multiple bazel queries and merge the results.
1331_BAZEL_DEPS_QUERIES = [
1332    'deps("//test/...")',
1333    'deps("//:all")',
1334    'deps("//src/compiler/...")',
1335    # allow resolving bind() workspace rules to the actual targets they point to
1336    'kind(bind, "//external:*")',
1337    # The ^ is needed to differentiate proto_library from go_proto_library
1338    'deps(kind("^proto_library", @envoy_api//envoy/...))',
1339    # Make sure we have source info for all the targets that _expand_upb_proto_library_rules artificially adds
1340    # as upb_c_proto_library dependencies.
1341    'deps("//external:upb_generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me")',
1342]
1343
1344# Step 1: run a bunch of "bazel query --output xml" queries to collect
1345# the raw build metadata from the bazel build.
1346# At the end of this step we will have a dictionary of bazel rules
1347# that are interesting to us (libraries, binaries, etc.) along
1348# with their most important metadata (sources, headers, dependencies)
1349#
1350# Example of a single bazel rule after being populated:
1351# '//:grpc' : { 'class': 'cc_library',
1352#               'hdrs': ['//:include/grpc/byte_buffer.h', ... ],
1353#               'srcs': ['//:src/core/lib/surface/init.cc', ... ],
1354#               'deps': ['//:grpc_common', ...],
1355#               ... }
1356bazel_rules = {}
1357for query in _BAZEL_DEPS_QUERIES:
1358    bazel_rules.update(
1359        _extract_rules_from_bazel_xml(_bazel_query_xml_tree(query))
1360    )
1361
1362# Step 1.5: The sources for UPB protos are pre-generated, so we want
1363# to expand the UPB proto library bazel rules into the generated
1364# .upb.h and .upb.c files.
1365_expand_upb_proto_library_rules(bazel_rules)
1366
1367# Step 1.6: Add explicit protobuf dependency to grpc_proto_library rules
1368_patch_grpc_proto_library_rules(bazel_rules)
1369
1370# Step 1.7: Make sure upb descriptor.proto library uses the pre-generated sources.
1371_patch_descriptor_upb_proto_library(bazel_rules)
1372
1373# Step 2: Extract the known bazel cc_test tests. While most tests
1374# will be buildable with other build systems just fine, some of these tests
1375# would be too difficult to build and run with other build systems,
1376# so we simply exclude the ones we don't want.
1377# Note that while making tests buildable with other build systems
1378# than just bazel is extra effort, we still need to do that for these
1379# reasons:
1380# - If our cmake build doesn't have any tests at all, it's hard to make
1381#   sure that what it built actually works (we need at least some "smoke tests").
1382#   This is quite important because the build flags between bazel / non-bazel flag might differ
1383#   (sometimes it's for interesting reasons that are not easy to overcome)
1384#   which makes it even more important to have at least some tests for cmake/make
1385# - Our portability suite actually runs cmake tests and migration of portability
1386#   suite fully towards bazel might be intricate (e.g. it's unclear whether it's
1387#   possible to get a good enough coverage of different compilers / distros etc.
1388#   with bazel)
1389# - some things that are considered "tests" in build.yaml-based builds are actually binaries
1390#   we'd want to be able to build anyway (qps_json_worker, interop_client, interop_server, grpc_cli)
1391#   so it's unclear how much make/cmake simplification we would gain by removing just some (but not all) test
1392# TODO(jtattermusch): Investigate feasibility of running portability suite with bazel.
1393tests = _exclude_unwanted_cc_tests(_extract_cc_tests(bazel_rules))
1394
1395# Step 3: Generate the "extra metadata" for all our build targets.
1396# While the bazel rules give us most of the information we need,
1397# the legacy "build.yaml" format requires some additional fields that
1398# we cannot get just from bazel alone (we call that "extra metadata").
1399# In this step, we basically analyze the build metadata we have from bazel
1400# and use heuristics to determine (and sometimes guess) the right
1401# extra metadata to use for each target.
1402#
1403# - For some targets (such as the public libraries, helper libraries
1404#   and executables) determining the right extra metadata is hard to do
1405#   automatically. For these targets, the extra metadata is supplied "manually"
1406#   in form of the _BUILD_EXTRA_METADATA dictionary. That allows us to match
1407#   the semantics of the legacy "build.yaml" as closely as possible.
1408#
1409# - For test binaries, it is possible to generate the "extra metadata" mostly
1410#   automatically using a rule-based heuristic approach because most tests
1411#   look and behave alike from the build's perspective.
1412#
1413# TODO(jtattermusch): Of course neither "_BUILD_EXTRA_METADATA" or
1414# the heuristic approach used for tests are ideal and they cannot be made
1415# to cover all possible situations (and are tailored to work with the way
1416# the grpc build currently works), but the idea was to start with something
1417# reasonably simple that matches the "build.yaml"-like semantics as closely
1418# as possible (to avoid changing too many things at once) and gradually get
1419# rid of the legacy "build.yaml"-specific fields one by one. Once that is done,
1420# only very little "extra metadata" would be needed and/or it would be trivial
1421# to generate it automatically.
1422all_extra_metadata = {}
1423# TODO(veblush): Remove this workaround once protobuf is upgraded to 26.x
1424if "@com_google_protobuf//third_party/utf8_range:utf8_range" not in bazel_rules:
1425    md = _BUILD_EXTRA_METADATA[
1426        "@com_google_protobuf//third_party/utf8_range:utf8_range"
1427    ]
1428    del _BUILD_EXTRA_METADATA[
1429        "@com_google_protobuf//third_party/utf8_range:utf8_range"
1430    ]
1431    _BUILD_EXTRA_METADATA["@utf8_range//:utf8_range"] = md
1432all_extra_metadata.update(
1433    _generate_build_extra_metadata_for_tests(tests, bazel_rules)
1434)
1435all_extra_metadata.update(_BUILD_EXTRA_METADATA)
1436
1437# Step 4: Compute the build metadata that will be used in the final build.yaml.
1438# The final build metadata includes transitive dependencies, and sources/headers
1439# expanded without intermediate dependencies.
1440# Example:
1441# '//:grpc' : { ...,
1442#               '_TRANSITIVE_DEPS': ['//:gpr_base', ...],
1443#               '_COLLAPSED_DEPS': ['gpr', ...],
1444#               '_COLLAPSED_SRCS': [...],
1445#               '_COLLAPSED_PUBLIC_HEADERS': [...],
1446#               '_COLLAPSED_HEADERS': [...]
1447#             }
1448_populate_transitive_metadata(bazel_rules, list(all_extra_metadata.keys()))
1449
1450# Step 4a: Update the existing test metadata with the updated build metadata.
1451# Certain build metadata of certain test targets depend on the transitive
1452# metadata that wasn't available earlier.
1453update_test_metadata_with_transitive_metadata(all_extra_metadata, bazel_rules)
1454
1455# Step 5: Generate the final metadata for all the targets.
1456# This is done by combining the bazel build metadata and the "extra metadata"
1457# we obtained in the previous step.
1458# In this step, we also perform some interesting massaging of the target metadata
1459# to end up with a result that is as similar to the legacy build.yaml data
1460# as possible.
1461# - Some targets get renamed (to match the legacy build.yaml target names)
1462# - Some intermediate libraries get elided ("expanded") to better match the set
1463#   of targets provided by the legacy build.yaml build
1464#
1465# Originally the target renaming was introduced to address these concerns:
1466# - avoid changing too many things at the same time and avoid people getting
1467#   confused by some well know targets suddenly being missing
1468# - Makefile/cmake and also language-specific generators rely on some build
1469#   targets being called exactly the way they they are. Some of our testing
1470#   scrips also invoke executables (e.g. "qps_json_driver") by their name.
1471# - The autogenerated test name from bazel includes the package path
1472#   (e.g. "test_cpp_TEST_NAME"). Without renaming, the target names would
1473#   end up pretty ugly (e.g. test_cpp_qps_qps_json_driver).
1474# TODO(jtattermusch): reevaluate the need for target renaming in the future.
1475#
1476# Example of a single generated target:
1477# 'grpc' : { 'language': 'c',
1478#            'public_headers': ['include/grpc/byte_buffer.h', ... ],
1479#            'headers': ['src/core/ext/filters/client_channel/client_channel.h', ... ],
1480#            'src': ['src/core/lib/surface/init.cc', ... ],
1481#            'deps': ['gpr', 'address_sorting', ...],
1482#            ... }
1483all_targets_dict = _generate_build_metadata(all_extra_metadata, bazel_rules)
1484
1485# Step 6: convert the dictionary with all the targets to a dict that has
1486# the desired "build.yaml"-like layout.
1487# TODO(jtattermusch): We use the custom "build.yaml"-like layout because
1488# currently all other build systems use that format as their source of truth.
1489# In the future, we can get rid of this custom & legacy format entirely,
1490# but we would need to update the generators for other build systems
1491# at the same time.
1492#
1493# Layout of the result:
1494# { 'libs': { TARGET_DICT_FOR_LIB_XYZ, ... },
1495#   'targets': { TARGET_DICT_FOR_BIN_XYZ, ... },
1496#   'tests': { TARGET_DICT_FOR_TEST_XYZ, ...} }
1497build_yaml_like = _convert_to_build_yaml_like(all_targets_dict)
1498
1499# Step 7: generates build metadata for external ProtoBuf libraries.
1500# We only want the ProtoBuf sources from these ProtoBuf dependencies, which may
1501# not be present in our release source tar balls. These rules will be used in CMake
1502# to download these libraries if not existed. Even if the download failed, it
1503# will be a soft error that doesn't block existing target from successfully
1504# built.
1505build_yaml_like[
1506    "external_proto_libraries"
1507] = _generate_external_proto_libraries()
1508
1509# detect and report some suspicious situations we've seen before
1510_detect_and_print_issues(build_yaml_like)
1511
1512# Step 7: Store the build_autogenerated.yaml in a deterministic (=sorted)
1513# and cleaned-up form.
1514# A basic overview of the resulting "build.yaml"-like format is here:
1515# https://github.com/grpc/grpc/blob/master/templates/README.md
1516# TODO(jtattermusch): The "cleanup" function is taken from the legacy
1517# build system (which used build.yaml) and can be eventually removed.
1518build_yaml_string = build_cleaner.cleaned_build_yaml_dict_as_string(
1519    build_yaml_like
1520)
1521with open("build_autogenerated.yaml", "w") as file:
1522    file.write(build_yaml_string)
1523