xref: /aosp_15_r20/external/bazelbuild-rules_python/tools/wheelmaker.py (revision 60517a1edbc8ecf509223e9af94a7adec7d736b8)
1# Copyright 2018 The Bazel Authors. All rights reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#    http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15from __future__ import annotations
16
17import argparse
18import base64
19import hashlib
20import os
21import re
22import stat
23import sys
24import zipfile
25from pathlib import Path
26
27_ZIP_EPOCH = (1980, 1, 1, 0, 0, 0)
28
29
30def commonpath(path1, path2):
31    ret = []
32    for a, b in zip(path1.split(os.path.sep), path2.split(os.path.sep)):
33        if a != b:
34            break
35        ret.append(a)
36    return os.path.sep.join(ret)
37
38
39def escape_filename_segment(segment):
40    """Escapes a filename segment per https://www.python.org/dev/peps/pep-0427/#escaping-and-unicode
41
42    This is a legacy function, kept for backwards compatibility,
43    and may be removed in the future. See `escape_filename_distribution_name`
44    and `normalize_pep440` for the modern alternatives.
45    """
46    return re.sub(r"[^\w\d.]+", "_", segment, re.UNICODE)
47
48
49def normalize_package_name(name):
50    """Normalize a package name according to the Python Packaging User Guide.
51
52    See https://packaging.python.org/en/latest/specifications/name-normalization/
53    """
54    return re.sub(r"[-_.]+", "-", name).lower()
55
56
57def escape_filename_distribution_name(name):
58    """Escape the distribution name component of a filename.
59
60    See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#escaping-and-unicode
61    """
62    return normalize_package_name(name).replace("-", "_")
63
64
65def normalize_pep440(version):
66    """Normalize version according to PEP 440, with fallback for placeholders.
67
68    If there's a placeholder in braces, such as {BUILD_TIMESTAMP},
69    replace it with 0. Such placeholders can be used with stamping, in
70    which case they would have been resolved already by now; if they
71    haven't, we're doing an unstamped build, but we still need to
72    produce a valid version. If such replacements are made, the
73    original version string, sanitized to dot-separated alphanumerics,
74    is appended as a local version segment, so you understand what
75    placeholder was involved.
76
77    If that still doesn't produce a valid version, use version 0 and
78    append the original version string, sanitized to dot-separated
79    alphanumerics, as a local version segment.
80
81    """
82
83    import packaging.version
84
85    try:
86        return str(packaging.version.Version(version))
87    except packaging.version.InvalidVersion:
88        pass
89
90    sanitized = re.sub(r"[^a-z0-9]+", ".", version.lower()).strip(".")
91    substituted = re.sub(r"\{\w+\}", "0", version)
92    delimiter = "." if "+" in substituted else "+"
93    try:
94        return str(packaging.version.Version(f"{substituted}{delimiter}{sanitized}"))
95    except packaging.version.InvalidVersion:
96        return str(packaging.version.Version(f"0+{sanitized}"))
97
98
99class _WhlFile(zipfile.ZipFile):
100    def __init__(
101        self,
102        filename,
103        *,
104        mode,
105        distribution_prefix: str,
106        strip_path_prefixes=None,
107        compression=zipfile.ZIP_DEFLATED,
108        **kwargs,
109    ):
110        self._distribution_prefix = distribution_prefix
111
112        self._strip_path_prefixes = strip_path_prefixes or []
113        # Entries for the RECORD file as (filename, hash, size) tuples.
114        self._record = []
115
116        super().__init__(filename, mode=mode, compression=compression, **kwargs)
117
118    def distinfo_path(self, basename):
119        return f"{self._distribution_prefix}.dist-info/{basename}"
120
121    def data_path(self, basename):
122        return f"{self._distribution_prefix}.data/{basename}"
123
124    def add_file(self, package_filename, real_filename):
125        """Add given file to the distribution."""
126
127        def arcname_from(name):
128            # Always use unix path separators.
129            normalized_arcname = name.replace(os.path.sep, "/")
130            # Don't manipulate names filenames in the .distinfo or .data directories.
131            if normalized_arcname.startswith(self._distribution_prefix):
132                return normalized_arcname
133            for prefix in self._strip_path_prefixes:
134                if normalized_arcname.startswith(prefix):
135                    return normalized_arcname[len(prefix) :]
136
137            return normalized_arcname
138
139        if os.path.isdir(real_filename):
140            directory_contents = os.listdir(real_filename)
141            for file_ in directory_contents:
142                self.add_file(
143                    "{}/{}".format(package_filename, file_),
144                    "{}/{}".format(real_filename, file_),
145                )
146            return
147
148        arcname = arcname_from(package_filename)
149        zinfo = self._zipinfo(arcname)
150
151        # Write file to the zip archive while computing the hash and length
152        hash = hashlib.sha256()
153        size = 0
154        with open(real_filename, "rb") as fsrc:
155            with self.open(zinfo, "w") as fdst:
156                while True:
157                    block = fsrc.read(2**20)
158                    if not block:
159                        break
160                    fdst.write(block)
161                    hash.update(block)
162                    size += len(block)
163
164        self._add_to_record(arcname, self._serialize_digest(hash), size)
165
166    def add_string(self, filename, contents):
167        """Add given 'contents' as filename to the distribution."""
168        if isinstance(contents, str):
169            contents = contents.encode("utf-8", "surrogateescape")
170        zinfo = self._zipinfo(filename)
171        self.writestr(zinfo, contents)
172        hash = hashlib.sha256()
173        hash.update(contents)
174        self._add_to_record(filename, self._serialize_digest(hash), len(contents))
175
176    def _serialize_digest(self, hash):
177        # https://www.python.org/dev/peps/pep-0376/#record
178        # "base64.urlsafe_b64encode(digest) with trailing = removed"
179        digest = base64.urlsafe_b64encode(hash.digest())
180        digest = b"sha256=" + digest.rstrip(b"=")
181        return digest
182
183    def _add_to_record(self, filename, hash, size):
184        size = str(size).encode("ascii")
185        self._record.append((filename, hash, size))
186
187    def _zipinfo(self, filename):
188        """Construct deterministic ZipInfo entry for a file named filename"""
189        # Strip leading path separators to mirror ZipInfo.from_file behavior
190        separators = os.path.sep
191        if os.path.altsep is not None:
192            separators += os.path.altsep
193        arcname = filename.lstrip(separators)
194
195        zinfo = zipfile.ZipInfo(filename=arcname, date_time=_ZIP_EPOCH)
196        zinfo.create_system = 3  # ZipInfo entry created on a unix-y system
197        # Both pip and installer expect the regular file bit to be set in order for the
198        # executable bit to be preserved after extraction
199        # https://github.com/pypa/pip/blob/23.3.2/src/pip/_internal/utils/unpacking.py#L96-L100
200        # https://github.com/pypa/installer/blob/0.7.0/src/installer/sources.py#L310-L313
201        zinfo.external_attr = (
202            stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO | stat.S_IFREG
203        ) << 16  # permissions: -rwxrwxrwx
204        zinfo.compress_type = self.compression
205        return zinfo
206
207    def add_recordfile(self):
208        """Write RECORD file to the distribution."""
209        record_path = self.distinfo_path("RECORD")
210        entries = self._record + [(record_path, b"", b"")]
211        contents = b""
212        for filename, digest, size in entries:
213            if isinstance(filename, str):
214                filename = filename.lstrip("/").encode("utf-8", "surrogateescape")
215            contents += b"%s,%s,%s\n" % (filename, digest, size)
216
217        self.add_string(record_path, contents)
218        return contents
219
220
221class WheelMaker(object):
222    def __init__(
223        self,
224        name,
225        version,
226        build_tag,
227        python_tag,
228        abi,
229        platform,
230        outfile=None,
231        strip_path_prefixes=None,
232    ):
233        self._name = name
234        self._version = normalize_pep440(version)
235        self._build_tag = build_tag
236        self._python_tag = python_tag
237        self._abi = abi
238        self._platform = platform
239        self._outfile = outfile
240        self._strip_path_prefixes = strip_path_prefixes
241        self._wheelname_fragment_distribution_name = escape_filename_distribution_name(
242            self._name
243        )
244
245        self._distribution_prefix = (
246            self._wheelname_fragment_distribution_name + "-" + self._version
247        )
248
249        self._whlfile = None
250
251    def __enter__(self):
252        self._whlfile = _WhlFile(
253            self.filename(),
254            mode="w",
255            distribution_prefix=self._distribution_prefix,
256            strip_path_prefixes=self._strip_path_prefixes,
257        )
258        return self
259
260    def __exit__(self, type, value, traceback):
261        self._whlfile.close()
262        self._whlfile = None
263
264    def wheelname(self) -> str:
265        components = [
266            self._wheelname_fragment_distribution_name,
267            self._version,
268        ]
269        if self._build_tag:
270            components.append(self._build_tag)
271        components += [self._python_tag, self._abi, self._platform]
272        return "-".join(components) + ".whl"
273
274    def filename(self) -> str:
275        if self._outfile:
276            return self._outfile
277        return self.wheelname()
278
279    def disttags(self):
280        return ["-".join([self._python_tag, self._abi, self._platform])]
281
282    def distinfo_path(self, basename):
283        return self._whlfile.distinfo_path(basename)
284
285    def data_path(self, basename):
286        return self._whlfile.data_path(basename)
287
288    def add_file(self, package_filename, real_filename):
289        """Add given file to the distribution."""
290        self._whlfile.add_file(package_filename, real_filename)
291
292    def add_wheelfile(self):
293        """Write WHEEL file to the distribution"""
294        # TODO(pstradomski): Support non-purelib wheels.
295        wheel_contents = """\
296Wheel-Version: 1.0
297Generator: bazel-wheelmaker 1.0
298Root-Is-Purelib: {}
299""".format(
300            "true" if self._platform == "any" else "false"
301        )
302        for tag in self.disttags():
303            wheel_contents += "Tag: %s\n" % tag
304        self._whlfile.add_string(self.distinfo_path("WHEEL"), wheel_contents)
305
306    def add_metadata(self, metadata, name, description):
307        """Write METADATA file to the distribution."""
308        # https://www.python.org/dev/peps/pep-0566/
309        # https://packaging.python.org/specifications/core-metadata/
310        metadata = re.sub("^Name: .*$", "Name: %s" % name, metadata, flags=re.MULTILINE)
311        metadata += "Version: %s\n\n" % self._version
312        # setuptools seems to insert UNKNOWN as description when none is
313        # provided.
314        metadata += description if description else "UNKNOWN"
315        metadata += "\n"
316        self._whlfile.add_string(self.distinfo_path("METADATA"), metadata)
317
318    def add_recordfile(self):
319        """Write RECORD file to the distribution."""
320        self._whlfile.add_recordfile()
321
322
323def get_files_to_package(input_files):
324    """Find files to be added to the distribution.
325
326    input_files: list of pairs (package_path, real_path)
327    """
328    files = {}
329    for package_path, real_path in input_files:
330        files[package_path] = real_path
331    return files
332
333
334def resolve_argument_stamp(
335    argument: str, volatile_status_stamp: Path, stable_status_stamp: Path
336) -> str:
337    """Resolve workspace status stamps format strings found in the argument string
338
339    Args:
340        argument (str): The raw argument represenation for the wheel (may include stamp variables)
341        volatile_status_stamp (Path): The path to a volatile workspace status file
342        stable_status_stamp (Path): The path to a stable workspace status file
343
344    Returns:
345        str: A resolved argument string
346    """
347    lines = (
348        volatile_status_stamp.read_text().splitlines()
349        + stable_status_stamp.read_text().splitlines()
350    )
351    for line in lines:
352        if not line:
353            continue
354        key, value = line.split(" ", maxsplit=1)
355        stamp = "{" + key + "}"
356        argument = argument.replace(stamp, value)
357
358    return argument
359
360
361def parse_args() -> argparse.Namespace:
362    parser = argparse.ArgumentParser(description="Builds a python wheel")
363    metadata_group = parser.add_argument_group("Wheel name, version and platform")
364    metadata_group.add_argument(
365        "--name", required=True, type=str, help="Name of the distribution"
366    )
367    metadata_group.add_argument(
368        "--version", required=True, type=str, help="Version of the distribution"
369    )
370    metadata_group.add_argument(
371        "--build_tag",
372        type=str,
373        default="",
374        help="Optional build tag for the distribution",
375    )
376    metadata_group.add_argument(
377        "--python_tag",
378        type=str,
379        default="py3",
380        help="Python version, e.g. 'py2' or 'py3'",
381    )
382    metadata_group.add_argument("--abi", type=str, default="none")
383    metadata_group.add_argument(
384        "--platform", type=str, default="any", help="Target platform. "
385    )
386
387    output_group = parser.add_argument_group("Output file location")
388    output_group.add_argument(
389        "--out", type=str, default=None, help="Override name of ouptut file"
390    )
391    output_group.add_argument(
392        "--name_file",
393        type=Path,
394        help="A file where the canonical name of the " "wheel will be written",
395    )
396
397    output_group.add_argument(
398        "--strip_path_prefix",
399        type=str,
400        action="append",
401        default=[],
402        help="Path prefix to be stripped from input package files' path. "
403        "Can be supplied multiple times. Evaluated in order.",
404    )
405
406    wheel_group = parser.add_argument_group("Wheel metadata")
407    wheel_group.add_argument(
408        "--metadata_file",
409        type=Path,
410        help="Contents of the METADATA file (before appending contents of "
411        "--description_file)",
412    )
413    wheel_group.add_argument(
414        "--description_file", help="Path to the file with package description"
415    )
416    wheel_group.add_argument(
417        "--description_content_type", help="Content type of the package description"
418    )
419    wheel_group.add_argument(
420        "--entry_points_file",
421        help="Path to a correctly-formatted entry_points.txt file",
422    )
423
424    contents_group = parser.add_argument_group("Wheel contents")
425    contents_group.add_argument(
426        "--input_file",
427        action="append",
428        help="'package_path;real_path' pairs listing "
429        "files to be included in the wheel. "
430        "Can be supplied multiple times.",
431    )
432    contents_group.add_argument(
433        "--input_file_list",
434        action="append",
435        help="A file that has all the input files defined as a list to avoid "
436        "the long command",
437    )
438    contents_group.add_argument(
439        "--extra_distinfo_file",
440        action="append",
441        help="'filename;real_path' pairs listing extra files to include in"
442        "dist-info directory. Can be supplied multiple times.",
443    )
444    contents_group.add_argument(
445        "--data_files",
446        action="append",
447        help="'filename;real_path' pairs listing data files to include in"
448        "data directory. Can be supplied multiple times.",
449    )
450
451    build_group = parser.add_argument_group("Building requirements")
452    build_group.add_argument(
453        "--volatile_status_file",
454        type=Path,
455        help="Pass in the stamp info file for stamping",
456    )
457    build_group.add_argument(
458        "--stable_status_file",
459        type=Path,
460        help="Pass in the stamp info file for stamping",
461    )
462
463    return parser.parse_args(sys.argv[1:])
464
465
466def _parse_file_pairs(content: List[str]) -> List[List[str]]:
467    """
468    Parse ; delimited lists of files into a 2D list.
469    """
470    return [i.split(";", maxsplit=1) for i in content or []]
471
472
473def main() -> None:
474    arguments = parse_args()
475
476    input_files = _parse_file_pairs(arguments.input_file)
477    extra_distinfo_file = _parse_file_pairs(arguments.extra_distinfo_file)
478    data_files = _parse_file_pairs(arguments.data_files)
479
480    for input_file in arguments.input_file_list:
481        with open(input_file) as _file:
482            input_file_list = _file.read().splitlines()
483        for _input_file in input_file_list:
484            input_files.append(_input_file.split(";"))
485
486    all_files = get_files_to_package(input_files)
487    # Sort the files for reproducible order in the archive.
488    all_files = sorted(all_files.items())
489
490    strip_prefixes = [p for p in arguments.strip_path_prefix]
491
492    if arguments.volatile_status_file and arguments.stable_status_file:
493        name = resolve_argument_stamp(
494            arguments.name,
495            arguments.volatile_status_file,
496            arguments.stable_status_file,
497        )
498    else:
499        name = arguments.name
500
501    if arguments.volatile_status_file and arguments.stable_status_file:
502        version = resolve_argument_stamp(
503            arguments.version,
504            arguments.volatile_status_file,
505            arguments.stable_status_file,
506        )
507    else:
508        version = arguments.version
509
510    with WheelMaker(
511        name=name,
512        version=version,
513        build_tag=arguments.build_tag,
514        python_tag=arguments.python_tag,
515        abi=arguments.abi,
516        platform=arguments.platform,
517        outfile=arguments.out,
518        strip_path_prefixes=strip_prefixes,
519    ) as maker:
520        for package_filename, real_filename in all_files:
521            maker.add_file(package_filename, real_filename)
522        maker.add_wheelfile()
523
524        description = None
525        if arguments.description_file:
526            with open(
527                arguments.description_file, "rt", encoding="utf-8"
528            ) as description_file:
529                description = description_file.read()
530
531        metadata = arguments.metadata_file.read_text(encoding="utf-8")
532
533        # This is not imported at the top of the file due to the reliance
534        # on this file in the `whl_library` repository rule which does not
535        # provide `packaging` but does import symbols defined here.
536        from packaging.requirements import Requirement
537
538        # Search for any `Requires-Dist` entries that refer to other files and
539        # expand them.
540
541        def get_new_requirement_line(reqs_text, extra):
542            req = Requirement(reqs_text.strip())
543            if req.marker:
544                if extra:
545                    return f"Requires-Dist: {req.name}{req.specifier}; ({req.marker}) and {extra}"
546                else:
547                    return f"Requires-Dist: {req.name}{req.specifier}; {req.marker}"
548            else:
549                return f"Requires-Dist: {req.name}{req.specifier}; {extra}".strip(" ;")
550
551        for meta_line in metadata.splitlines():
552            if not meta_line.startswith("Requires-Dist: "):
553                continue
554
555            if not meta_line[len("Requires-Dist: ") :].startswith("@"):
556                # This is a normal requirement.
557                package, _, extra = meta_line[len("Requires-Dist: ") :].rpartition(";")
558                if not package:
559                    # This is when the package requirement does not have markers.
560                    continue
561                extra = extra.strip()
562                metadata = metadata.replace(
563                    meta_line, get_new_requirement_line(package, extra)
564                )
565                continue
566
567            # This is a requirement that refers to a file.
568            file, _, extra = meta_line[len("Requires-Dist: @") :].partition(";")
569            extra = extra.strip()
570
571            reqs = []
572            for reqs_line in Path(file).read_text(encoding="utf-8").splitlines():
573                reqs_text = reqs_line.strip()
574                if not reqs_text or reqs_text.startswith(("#", "-")):
575                    continue
576
577                # Strip any comments
578                reqs_text, _, _ = reqs_text.partition("#")
579
580                reqs.append(get_new_requirement_line(reqs_text, extra))
581
582            metadata = metadata.replace(meta_line, "\n".join(reqs))
583
584        maker.add_metadata(
585            metadata=metadata,
586            name=name,
587            description=description,
588        )
589
590        if arguments.entry_points_file:
591            maker.add_file(
592                maker.distinfo_path("entry_points.txt"), arguments.entry_points_file
593            )
594
595        # Sort the files for reproducible order in the archive.
596        for filename, real_path in sorted(data_files):
597            maker.add_file(maker.data_path(filename), real_path)
598        for filename, real_path in sorted(extra_distinfo_file):
599            maker.add_file(maker.distinfo_path(filename), real_path)
600
601        maker.add_recordfile()
602
603        # Since stamping may otherwise change the target name of the
604        # wheel, the canonical name (with stamps resolved) is written
605        # to a file so consumers of the wheel can easily determine
606        # the correct name.
607        arguments.name_file.write_text(maker.wheelname())
608
609
610if __name__ == "__main__":
611    main()
612