1# Copyright 2018 The Bazel Authors. All rights reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15from __future__ import annotations 16 17import argparse 18import base64 19import hashlib 20import os 21import re 22import stat 23import sys 24import zipfile 25from pathlib import Path 26 27_ZIP_EPOCH = (1980, 1, 1, 0, 0, 0) 28 29 30def commonpath(path1, path2): 31 ret = [] 32 for a, b in zip(path1.split(os.path.sep), path2.split(os.path.sep)): 33 if a != b: 34 break 35 ret.append(a) 36 return os.path.sep.join(ret) 37 38 39def escape_filename_segment(segment): 40 """Escapes a filename segment per https://www.python.org/dev/peps/pep-0427/#escaping-and-unicode 41 42 This is a legacy function, kept for backwards compatibility, 43 and may be removed in the future. See `escape_filename_distribution_name` 44 and `normalize_pep440` for the modern alternatives. 45 """ 46 return re.sub(r"[^\w\d.]+", "_", segment, re.UNICODE) 47 48 49def normalize_package_name(name): 50 """Normalize a package name according to the Python Packaging User Guide. 51 52 See https://packaging.python.org/en/latest/specifications/name-normalization/ 53 """ 54 return re.sub(r"[-_.]+", "-", name).lower() 55 56 57def escape_filename_distribution_name(name): 58 """Escape the distribution name component of a filename. 59 60 See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#escaping-and-unicode 61 """ 62 return normalize_package_name(name).replace("-", "_") 63 64 65def normalize_pep440(version): 66 """Normalize version according to PEP 440, with fallback for placeholders. 67 68 If there's a placeholder in braces, such as {BUILD_TIMESTAMP}, 69 replace it with 0. Such placeholders can be used with stamping, in 70 which case they would have been resolved already by now; if they 71 haven't, we're doing an unstamped build, but we still need to 72 produce a valid version. If such replacements are made, the 73 original version string, sanitized to dot-separated alphanumerics, 74 is appended as a local version segment, so you understand what 75 placeholder was involved. 76 77 If that still doesn't produce a valid version, use version 0 and 78 append the original version string, sanitized to dot-separated 79 alphanumerics, as a local version segment. 80 81 """ 82 83 import packaging.version 84 85 try: 86 return str(packaging.version.Version(version)) 87 except packaging.version.InvalidVersion: 88 pass 89 90 sanitized = re.sub(r"[^a-z0-9]+", ".", version.lower()).strip(".") 91 substituted = re.sub(r"\{\w+\}", "0", version) 92 delimiter = "." if "+" in substituted else "+" 93 try: 94 return str(packaging.version.Version(f"{substituted}{delimiter}{sanitized}")) 95 except packaging.version.InvalidVersion: 96 return str(packaging.version.Version(f"0+{sanitized}")) 97 98 99class _WhlFile(zipfile.ZipFile): 100 def __init__( 101 self, 102 filename, 103 *, 104 mode, 105 distribution_prefix: str, 106 strip_path_prefixes=None, 107 compression=zipfile.ZIP_DEFLATED, 108 **kwargs, 109 ): 110 self._distribution_prefix = distribution_prefix 111 112 self._strip_path_prefixes = strip_path_prefixes or [] 113 # Entries for the RECORD file as (filename, hash, size) tuples. 114 self._record = [] 115 116 super().__init__(filename, mode=mode, compression=compression, **kwargs) 117 118 def distinfo_path(self, basename): 119 return f"{self._distribution_prefix}.dist-info/{basename}" 120 121 def data_path(self, basename): 122 return f"{self._distribution_prefix}.data/{basename}" 123 124 def add_file(self, package_filename, real_filename): 125 """Add given file to the distribution.""" 126 127 def arcname_from(name): 128 # Always use unix path separators. 129 normalized_arcname = name.replace(os.path.sep, "/") 130 # Don't manipulate names filenames in the .distinfo or .data directories. 131 if normalized_arcname.startswith(self._distribution_prefix): 132 return normalized_arcname 133 for prefix in self._strip_path_prefixes: 134 if normalized_arcname.startswith(prefix): 135 return normalized_arcname[len(prefix) :] 136 137 return normalized_arcname 138 139 if os.path.isdir(real_filename): 140 directory_contents = os.listdir(real_filename) 141 for file_ in directory_contents: 142 self.add_file( 143 "{}/{}".format(package_filename, file_), 144 "{}/{}".format(real_filename, file_), 145 ) 146 return 147 148 arcname = arcname_from(package_filename) 149 zinfo = self._zipinfo(arcname) 150 151 # Write file to the zip archive while computing the hash and length 152 hash = hashlib.sha256() 153 size = 0 154 with open(real_filename, "rb") as fsrc: 155 with self.open(zinfo, "w") as fdst: 156 while True: 157 block = fsrc.read(2**20) 158 if not block: 159 break 160 fdst.write(block) 161 hash.update(block) 162 size += len(block) 163 164 self._add_to_record(arcname, self._serialize_digest(hash), size) 165 166 def add_string(self, filename, contents): 167 """Add given 'contents' as filename to the distribution.""" 168 if isinstance(contents, str): 169 contents = contents.encode("utf-8", "surrogateescape") 170 zinfo = self._zipinfo(filename) 171 self.writestr(zinfo, contents) 172 hash = hashlib.sha256() 173 hash.update(contents) 174 self._add_to_record(filename, self._serialize_digest(hash), len(contents)) 175 176 def _serialize_digest(self, hash): 177 # https://www.python.org/dev/peps/pep-0376/#record 178 # "base64.urlsafe_b64encode(digest) with trailing = removed" 179 digest = base64.urlsafe_b64encode(hash.digest()) 180 digest = b"sha256=" + digest.rstrip(b"=") 181 return digest 182 183 def _add_to_record(self, filename, hash, size): 184 size = str(size).encode("ascii") 185 self._record.append((filename, hash, size)) 186 187 def _zipinfo(self, filename): 188 """Construct deterministic ZipInfo entry for a file named filename""" 189 # Strip leading path separators to mirror ZipInfo.from_file behavior 190 separators = os.path.sep 191 if os.path.altsep is not None: 192 separators += os.path.altsep 193 arcname = filename.lstrip(separators) 194 195 zinfo = zipfile.ZipInfo(filename=arcname, date_time=_ZIP_EPOCH) 196 zinfo.create_system = 3 # ZipInfo entry created on a unix-y system 197 # Both pip and installer expect the regular file bit to be set in order for the 198 # executable bit to be preserved after extraction 199 # https://github.com/pypa/pip/blob/23.3.2/src/pip/_internal/utils/unpacking.py#L96-L100 200 # https://github.com/pypa/installer/blob/0.7.0/src/installer/sources.py#L310-L313 201 zinfo.external_attr = ( 202 stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO | stat.S_IFREG 203 ) << 16 # permissions: -rwxrwxrwx 204 zinfo.compress_type = self.compression 205 return zinfo 206 207 def add_recordfile(self): 208 """Write RECORD file to the distribution.""" 209 record_path = self.distinfo_path("RECORD") 210 entries = self._record + [(record_path, b"", b"")] 211 contents = b"" 212 for filename, digest, size in entries: 213 if isinstance(filename, str): 214 filename = filename.lstrip("/").encode("utf-8", "surrogateescape") 215 contents += b"%s,%s,%s\n" % (filename, digest, size) 216 217 self.add_string(record_path, contents) 218 return contents 219 220 221class WheelMaker(object): 222 def __init__( 223 self, 224 name, 225 version, 226 build_tag, 227 python_tag, 228 abi, 229 platform, 230 outfile=None, 231 strip_path_prefixes=None, 232 ): 233 self._name = name 234 self._version = normalize_pep440(version) 235 self._build_tag = build_tag 236 self._python_tag = python_tag 237 self._abi = abi 238 self._platform = platform 239 self._outfile = outfile 240 self._strip_path_prefixes = strip_path_prefixes 241 self._wheelname_fragment_distribution_name = escape_filename_distribution_name( 242 self._name 243 ) 244 245 self._distribution_prefix = ( 246 self._wheelname_fragment_distribution_name + "-" + self._version 247 ) 248 249 self._whlfile = None 250 251 def __enter__(self): 252 self._whlfile = _WhlFile( 253 self.filename(), 254 mode="w", 255 distribution_prefix=self._distribution_prefix, 256 strip_path_prefixes=self._strip_path_prefixes, 257 ) 258 return self 259 260 def __exit__(self, type, value, traceback): 261 self._whlfile.close() 262 self._whlfile = None 263 264 def wheelname(self) -> str: 265 components = [ 266 self._wheelname_fragment_distribution_name, 267 self._version, 268 ] 269 if self._build_tag: 270 components.append(self._build_tag) 271 components += [self._python_tag, self._abi, self._platform] 272 return "-".join(components) + ".whl" 273 274 def filename(self) -> str: 275 if self._outfile: 276 return self._outfile 277 return self.wheelname() 278 279 def disttags(self): 280 return ["-".join([self._python_tag, self._abi, self._platform])] 281 282 def distinfo_path(self, basename): 283 return self._whlfile.distinfo_path(basename) 284 285 def data_path(self, basename): 286 return self._whlfile.data_path(basename) 287 288 def add_file(self, package_filename, real_filename): 289 """Add given file to the distribution.""" 290 self._whlfile.add_file(package_filename, real_filename) 291 292 def add_wheelfile(self): 293 """Write WHEEL file to the distribution""" 294 # TODO(pstradomski): Support non-purelib wheels. 295 wheel_contents = """\ 296Wheel-Version: 1.0 297Generator: bazel-wheelmaker 1.0 298Root-Is-Purelib: {} 299""".format( 300 "true" if self._platform == "any" else "false" 301 ) 302 for tag in self.disttags(): 303 wheel_contents += "Tag: %s\n" % tag 304 self._whlfile.add_string(self.distinfo_path("WHEEL"), wheel_contents) 305 306 def add_metadata(self, metadata, name, description): 307 """Write METADATA file to the distribution.""" 308 # https://www.python.org/dev/peps/pep-0566/ 309 # https://packaging.python.org/specifications/core-metadata/ 310 metadata = re.sub("^Name: .*$", "Name: %s" % name, metadata, flags=re.MULTILINE) 311 metadata += "Version: %s\n\n" % self._version 312 # setuptools seems to insert UNKNOWN as description when none is 313 # provided. 314 metadata += description if description else "UNKNOWN" 315 metadata += "\n" 316 self._whlfile.add_string(self.distinfo_path("METADATA"), metadata) 317 318 def add_recordfile(self): 319 """Write RECORD file to the distribution.""" 320 self._whlfile.add_recordfile() 321 322 323def get_files_to_package(input_files): 324 """Find files to be added to the distribution. 325 326 input_files: list of pairs (package_path, real_path) 327 """ 328 files = {} 329 for package_path, real_path in input_files: 330 files[package_path] = real_path 331 return files 332 333 334def resolve_argument_stamp( 335 argument: str, volatile_status_stamp: Path, stable_status_stamp: Path 336) -> str: 337 """Resolve workspace status stamps format strings found in the argument string 338 339 Args: 340 argument (str): The raw argument represenation for the wheel (may include stamp variables) 341 volatile_status_stamp (Path): The path to a volatile workspace status file 342 stable_status_stamp (Path): The path to a stable workspace status file 343 344 Returns: 345 str: A resolved argument string 346 """ 347 lines = ( 348 volatile_status_stamp.read_text().splitlines() 349 + stable_status_stamp.read_text().splitlines() 350 ) 351 for line in lines: 352 if not line: 353 continue 354 key, value = line.split(" ", maxsplit=1) 355 stamp = "{" + key + "}" 356 argument = argument.replace(stamp, value) 357 358 return argument 359 360 361def parse_args() -> argparse.Namespace: 362 parser = argparse.ArgumentParser(description="Builds a python wheel") 363 metadata_group = parser.add_argument_group("Wheel name, version and platform") 364 metadata_group.add_argument( 365 "--name", required=True, type=str, help="Name of the distribution" 366 ) 367 metadata_group.add_argument( 368 "--version", required=True, type=str, help="Version of the distribution" 369 ) 370 metadata_group.add_argument( 371 "--build_tag", 372 type=str, 373 default="", 374 help="Optional build tag for the distribution", 375 ) 376 metadata_group.add_argument( 377 "--python_tag", 378 type=str, 379 default="py3", 380 help="Python version, e.g. 'py2' or 'py3'", 381 ) 382 metadata_group.add_argument("--abi", type=str, default="none") 383 metadata_group.add_argument( 384 "--platform", type=str, default="any", help="Target platform. " 385 ) 386 387 output_group = parser.add_argument_group("Output file location") 388 output_group.add_argument( 389 "--out", type=str, default=None, help="Override name of ouptut file" 390 ) 391 output_group.add_argument( 392 "--name_file", 393 type=Path, 394 help="A file where the canonical name of the " "wheel will be written", 395 ) 396 397 output_group.add_argument( 398 "--strip_path_prefix", 399 type=str, 400 action="append", 401 default=[], 402 help="Path prefix to be stripped from input package files' path. " 403 "Can be supplied multiple times. Evaluated in order.", 404 ) 405 406 wheel_group = parser.add_argument_group("Wheel metadata") 407 wheel_group.add_argument( 408 "--metadata_file", 409 type=Path, 410 help="Contents of the METADATA file (before appending contents of " 411 "--description_file)", 412 ) 413 wheel_group.add_argument( 414 "--description_file", help="Path to the file with package description" 415 ) 416 wheel_group.add_argument( 417 "--description_content_type", help="Content type of the package description" 418 ) 419 wheel_group.add_argument( 420 "--entry_points_file", 421 help="Path to a correctly-formatted entry_points.txt file", 422 ) 423 424 contents_group = parser.add_argument_group("Wheel contents") 425 contents_group.add_argument( 426 "--input_file", 427 action="append", 428 help="'package_path;real_path' pairs listing " 429 "files to be included in the wheel. " 430 "Can be supplied multiple times.", 431 ) 432 contents_group.add_argument( 433 "--input_file_list", 434 action="append", 435 help="A file that has all the input files defined as a list to avoid " 436 "the long command", 437 ) 438 contents_group.add_argument( 439 "--extra_distinfo_file", 440 action="append", 441 help="'filename;real_path' pairs listing extra files to include in" 442 "dist-info directory. Can be supplied multiple times.", 443 ) 444 contents_group.add_argument( 445 "--data_files", 446 action="append", 447 help="'filename;real_path' pairs listing data files to include in" 448 "data directory. Can be supplied multiple times.", 449 ) 450 451 build_group = parser.add_argument_group("Building requirements") 452 build_group.add_argument( 453 "--volatile_status_file", 454 type=Path, 455 help="Pass in the stamp info file for stamping", 456 ) 457 build_group.add_argument( 458 "--stable_status_file", 459 type=Path, 460 help="Pass in the stamp info file for stamping", 461 ) 462 463 return parser.parse_args(sys.argv[1:]) 464 465 466def _parse_file_pairs(content: List[str]) -> List[List[str]]: 467 """ 468 Parse ; delimited lists of files into a 2D list. 469 """ 470 return [i.split(";", maxsplit=1) for i in content or []] 471 472 473def main() -> None: 474 arguments = parse_args() 475 476 input_files = _parse_file_pairs(arguments.input_file) 477 extra_distinfo_file = _parse_file_pairs(arguments.extra_distinfo_file) 478 data_files = _parse_file_pairs(arguments.data_files) 479 480 for input_file in arguments.input_file_list: 481 with open(input_file) as _file: 482 input_file_list = _file.read().splitlines() 483 for _input_file in input_file_list: 484 input_files.append(_input_file.split(";")) 485 486 all_files = get_files_to_package(input_files) 487 # Sort the files for reproducible order in the archive. 488 all_files = sorted(all_files.items()) 489 490 strip_prefixes = [p for p in arguments.strip_path_prefix] 491 492 if arguments.volatile_status_file and arguments.stable_status_file: 493 name = resolve_argument_stamp( 494 arguments.name, 495 arguments.volatile_status_file, 496 arguments.stable_status_file, 497 ) 498 else: 499 name = arguments.name 500 501 if arguments.volatile_status_file and arguments.stable_status_file: 502 version = resolve_argument_stamp( 503 arguments.version, 504 arguments.volatile_status_file, 505 arguments.stable_status_file, 506 ) 507 else: 508 version = arguments.version 509 510 with WheelMaker( 511 name=name, 512 version=version, 513 build_tag=arguments.build_tag, 514 python_tag=arguments.python_tag, 515 abi=arguments.abi, 516 platform=arguments.platform, 517 outfile=arguments.out, 518 strip_path_prefixes=strip_prefixes, 519 ) as maker: 520 for package_filename, real_filename in all_files: 521 maker.add_file(package_filename, real_filename) 522 maker.add_wheelfile() 523 524 description = None 525 if arguments.description_file: 526 with open( 527 arguments.description_file, "rt", encoding="utf-8" 528 ) as description_file: 529 description = description_file.read() 530 531 metadata = arguments.metadata_file.read_text(encoding="utf-8") 532 533 # This is not imported at the top of the file due to the reliance 534 # on this file in the `whl_library` repository rule which does not 535 # provide `packaging` but does import symbols defined here. 536 from packaging.requirements import Requirement 537 538 # Search for any `Requires-Dist` entries that refer to other files and 539 # expand them. 540 541 def get_new_requirement_line(reqs_text, extra): 542 req = Requirement(reqs_text.strip()) 543 if req.marker: 544 if extra: 545 return f"Requires-Dist: {req.name}{req.specifier}; ({req.marker}) and {extra}" 546 else: 547 return f"Requires-Dist: {req.name}{req.specifier}; {req.marker}" 548 else: 549 return f"Requires-Dist: {req.name}{req.specifier}; {extra}".strip(" ;") 550 551 for meta_line in metadata.splitlines(): 552 if not meta_line.startswith("Requires-Dist: "): 553 continue 554 555 if not meta_line[len("Requires-Dist: ") :].startswith("@"): 556 # This is a normal requirement. 557 package, _, extra = meta_line[len("Requires-Dist: ") :].rpartition(";") 558 if not package: 559 # This is when the package requirement does not have markers. 560 continue 561 extra = extra.strip() 562 metadata = metadata.replace( 563 meta_line, get_new_requirement_line(package, extra) 564 ) 565 continue 566 567 # This is a requirement that refers to a file. 568 file, _, extra = meta_line[len("Requires-Dist: @") :].partition(";") 569 extra = extra.strip() 570 571 reqs = [] 572 for reqs_line in Path(file).read_text(encoding="utf-8").splitlines(): 573 reqs_text = reqs_line.strip() 574 if not reqs_text or reqs_text.startswith(("#", "-")): 575 continue 576 577 # Strip any comments 578 reqs_text, _, _ = reqs_text.partition("#") 579 580 reqs.append(get_new_requirement_line(reqs_text, extra)) 581 582 metadata = metadata.replace(meta_line, "\n".join(reqs)) 583 584 maker.add_metadata( 585 metadata=metadata, 586 name=name, 587 description=description, 588 ) 589 590 if arguments.entry_points_file: 591 maker.add_file( 592 maker.distinfo_path("entry_points.txt"), arguments.entry_points_file 593 ) 594 595 # Sort the files for reproducible order in the archive. 596 for filename, real_path in sorted(data_files): 597 maker.add_file(maker.data_path(filename), real_path) 598 for filename, real_path in sorted(extra_distinfo_file): 599 maker.add_file(maker.distinfo_path(filename), real_path) 600 601 maker.add_recordfile() 602 603 # Since stamping may otherwise change the target name of the 604 # wheel, the canonical name (with stamps resolved) is written 605 # to a file so consumers of the wheel can easily determine 606 # the correct name. 607 arguments.name_file.write_text(maker.wheelname()) 608 609 610if __name__ == "__main__": 611 main() 612