1# Copyright 2024 The Bazel Authors. All rights reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15""" 16A file that houses private functions used in the `bzlmod` extension with the same name. 17""" 18 19load("@bazel_features//:features.bzl", "bazel_features") 20load("//python/private:auth.bzl", "get_auth") 21load("//python/private:envsubst.bzl", "envsubst") 22load("//python/private:normalize_name.bzl", "normalize_name") 23load(":parse_simpleapi_html.bzl", "parse_simpleapi_html") 24 25def simpleapi_download(ctx, *, attr, cache, parallel_download = True): 26 """Download Simple API HTML. 27 28 Args: 29 ctx: The module_ctx or repository_ctx. 30 attr: Contains the parameters for the download. They are grouped into a 31 struct for better clarity. It must have attributes: 32 * index_url: str, the index. 33 * index_url_overrides: dict[str, str], the index overrides for 34 separate packages. 35 * extra_index_urls: Extra index URLs that will be looked up after 36 the main is looked up. 37 * sources: list[str], the sources to download things for. Each value is 38 the contents of requirements files. 39 * envsubst: list[str], the envsubst vars for performing substitution in index url. 40 * netrc: The netrc parameter for ctx.download, see http_file for docs. 41 * auth_patterns: The auth_patterns parameter for ctx.download, see 42 http_file for docs. 43 cache: A dictionary that can be used as a cache between calls during a 44 single evaluation of the extension. We use a dictionary as a cache 45 so that we can reuse calls to the simple API when evaluating the 46 extension. Using the canonical_id parameter of the module_ctx would 47 deposit the simple API responses to the bazel cache and that is 48 undesirable because additions to the PyPI index would not be 49 reflected when re-evaluating the extension unless we do 50 `bazel clean --expunge`. 51 parallel_download: A boolean to enable usage of bazel 7.1 non-blocking downloads. 52 53 Returns: 54 dict of pkg name to the parsed HTML contents - a list of structs. 55 """ 56 index_url_overrides = { 57 normalize_name(p): i 58 for p, i in (attr.index_url_overrides or {}).items() 59 } 60 61 download_kwargs = {} 62 if bazel_features.external_deps.download_has_block_param: 63 download_kwargs["block"] = not parallel_download 64 65 # NOTE @aignas 2024-03-31: we are not merging results from multiple indexes 66 # to replicate how `pip` would handle this case. 67 async_downloads = {} 68 contents = {} 69 index_urls = [attr.index_url] + attr.extra_index_urls 70 for pkg in attr.sources: 71 pkg_normalized = normalize_name(pkg) 72 73 success = False 74 for index_url in index_urls: 75 result = _read_simpleapi( 76 ctx = ctx, 77 url = "{}/{}/".format( 78 index_url_overrides.get(pkg_normalized, index_url).rstrip("/"), 79 pkg, 80 ), 81 attr = attr, 82 cache = cache, 83 **download_kwargs 84 ) 85 if hasattr(result, "wait"): 86 # We will process it in a separate loop: 87 async_downloads.setdefault(pkg_normalized, []).append( 88 struct( 89 pkg_normalized = pkg_normalized, 90 wait = result.wait, 91 ), 92 ) 93 continue 94 95 if result.success: 96 contents[pkg_normalized] = result.output 97 success = True 98 break 99 100 if not async_downloads and not success: 101 fail("Failed to download metadata from urls: {}".format( 102 ", ".join(index_urls), 103 )) 104 105 if not async_downloads: 106 return contents 107 108 # If we use `block` == False, then we need to have a second loop that is 109 # collecting all of the results as they were being downloaded in parallel. 110 for pkg, downloads in async_downloads.items(): 111 success = False 112 for download in downloads: 113 result = download.wait() 114 115 if result.success and download.pkg_normalized not in contents: 116 contents[download.pkg_normalized] = result.output 117 success = True 118 119 if not success: 120 fail("Failed to download metadata from urls: {}".format( 121 ", ".join(index_urls), 122 )) 123 124 return contents 125 126def _read_simpleapi(ctx, url, attr, cache, **download_kwargs): 127 """Read SimpleAPI. 128 129 Args: 130 ctx: The module_ctx or repository_ctx. 131 url: str, the url parameter that can be passed to ctx.download. 132 attr: The attribute that contains necessary info for downloading. The 133 following attributes must be present: 134 * envsubst: The envsubst values for performing substitutions in the URL. 135 * netrc: The netrc parameter for ctx.download, see http_file for docs. 136 * auth_patterns: The auth_patterns parameter for ctx.download, see 137 http_file for docs. 138 cache: A dict for storing the results. 139 **download_kwargs: Any extra params to ctx.download. 140 Note that output and auth will be passed for you. 141 142 Returns: 143 A similar object to what `download` would return except that in result.out 144 will be the parsed simple api contents. 145 """ 146 # NOTE @aignas 2024-03-31: some of the simple APIs use relative URLs for 147 # the whl location and we cannot handle multiple URLs at once by passing 148 # them to ctx.download if we want to correctly handle the relative URLs. 149 # TODO: Add a test that env subbed index urls do not leak into the lock file. 150 151 real_url = envsubst( 152 url, 153 attr.envsubst, 154 ctx.getenv if hasattr(ctx, "getenv") else ctx.os.environ.get, 155 ) 156 157 cache_key = real_url 158 if cache_key in cache: 159 return struct(success = True, output = cache[cache_key]) 160 161 output_str = envsubst( 162 url, 163 attr.envsubst, 164 # Use env names in the subst values - this will be unique over 165 # the lifetime of the execution of this function and we also use 166 # `~` as the separator to ensure that we don't get clashes. 167 {e: "~{}~".format(e) for e in attr.envsubst}.get, 168 ) 169 170 # Transform the URL into a valid filename 171 for char in [".", ":", "/", "\\", "-"]: 172 output_str = output_str.replace(char, "_") 173 174 output = ctx.path(output_str.strip("_").lower() + ".html") 175 176 # NOTE: this may have block = True or block = False in the download_kwargs 177 download = ctx.download( 178 url = [real_url], 179 output = output, 180 auth = get_auth(ctx, [real_url], ctx_attr = attr), 181 allow_fail = True, 182 **download_kwargs 183 ) 184 185 if download_kwargs.get("block") == False: 186 # Simulate the same API as ctx.download has 187 return struct( 188 wait = lambda: _read_index_result(ctx, download.wait(), output, real_url, cache, cache_key), 189 ) 190 191 return _read_index_result(ctx, download, output, real_url, cache, cache_key) 192 193def _read_index_result(ctx, result, output, url, cache, cache_key): 194 if not result.success: 195 return struct(success = False) 196 197 content = ctx.read(output) 198 199 output = parse_simpleapi_html(url = url, content = content) 200 if output: 201 cache.setdefault(cache_key, output) 202 return struct(success = True, output = output, cache_key = cache_key) 203 else: 204 return struct(success = False) 205