1import os.path 2from typing import List, Callable 3import re 4 5import constants 6from metadata import Metadata 7from pathlib import Path 8from mapper import MapperException 9from license_type import LicenseType 10 11# The mandatory metadata fields for a single dependency. 12KNOWN_FIELDS = { 13 "Name", # Short name (for header on about:credits). 14 "URL", # Project home page. 15 "License", # Software license. 16 "License File", # Relative paths to license texts. 17 "Shipped", # Whether the package is in the shipped product. 18 "Version", # The version for the package. 19 "Revision", # This is equivalent to Version but Chromium is lenient. 20} 21 22# The metadata fields that can have multiple values. 23MULTIVALUE_FIELDS = { 24 "License", 25 "License File", 26} 27# Line used to separate dependencies within the same metadata file. 28PATTERN_DEPENDENCY_DIVIDER = re.compile(r"^-{20} DEPENDENCY DIVIDER -{20}$") 29 30# The delimiter used to separate multiple values for one metadata field. 31VALUE_DELIMITER = "," 32 33_RAW_LICENSE_TO_FORMATTED_DETAILS = { 34 "BSD": ("BSD", LicenseType.NOTICE, "SPDX-license-identifier-BSD"), 35 "BSD 3-Clause": ( 36 "BSD_3_CLAUSE", LicenseType.NOTICE, 37 "SPDX-license-identifier-BSD-3-Clause"), 38 "Apache 2.0": ( 39 "APACHE_2_0", LicenseType.NOTICE, "SPDX-license-identifier-Apache-2.0"), 40 "MIT": ("MIT", LicenseType.NOTICE, "SPDX-license-identifier-MIT"), 41 "Unicode": ( 42 "UNICODE", LicenseType.NOTICE, 43 "SPDX-license-identifier-Unicode-DFS-2016"), 44 "MPL 1.1": 45 ("MPL", LicenseType.RECIPROCAL, "SPDX-license-identifier-MPL-1.1"), 46 "unencumbered": 47 ("UNENCUMBERED", LicenseType.UNENCUMBERED, 48 "SPDX-license-identifier-Unlicense"), 49} 50 51 52def get_license_type(license: str) -> LicenseType: 53 """Return the equivalent license type for the provided string license.""" 54 if license in _RAW_LICENSE_TO_FORMATTED_DETAILS: 55 return _RAW_LICENSE_TO_FORMATTED_DETAILS[license][1] 56 raise None 57 58 59def get_license_bp_name(license: str) -> str: 60 return _RAW_LICENSE_TO_FORMATTED_DETAILS[license][2] 61 62 63def is_ignored_readme_chromium(path: str) -> bool: 64 return path in constants.IGNORED_README 65 66 67def get_most_restrictive_type(licenses: List[str]) -> LicenseType: 68 """Returns the most restrictive license according to the values of LicenseType.""" 69 most_restrictive = LicenseType.UNKNOWN 70 for license in licenses: 71 if _RAW_LICENSE_TO_FORMATTED_DETAILS[license][ 72 1].value > most_restrictive.value: 73 most_restrictive = _RAW_LICENSE_TO_FORMATTED_DETAILS[license][1] 74 return most_restrictive 75 76 77def get_license_file_format(license: str): 78 """Return a different representation of the license that is better suited 79 for file names.""" 80 if license in _RAW_LICENSE_TO_FORMATTED_DETAILS: 81 return _RAW_LICENSE_TO_FORMATTED_DETAILS[license][0] 82 raise None 83 84 85class InvalidMetadata(Exception): 86 """This exception is raised when metadata is invalid.""" 87 pass 88 89 90def parse_chromium_readme_file(readme_path: str, 91 post_process_operation: Callable = None) -> Metadata: 92 """Parses the metadata from the file. 93 94 Args: 95 readme_path: the path to a file from which to parse metadata. 96 post_process_operation: Operation done on the dictionary after parsing 97 metadata, this callable must return a dictionary. 98 99 Returns: the metadata for all dependencies described in the file. 100 101 Raises: 102 InvalidMetadata - Raised when the metadata can't be parsed correctly. This 103 could happen due to plenty of reasons (eg: unidentifiable license, license 104 file path does not exist or duplicate fields). 105 """ 106 field_lookup = {name.lower(): name for name in KNOWN_FIELDS} 107 108 dependencies = [] 109 metadata = {} 110 for line in Path(readme_path).read_text().split("\n"): 111 line = line.strip() 112 # Skip empty lines. 113 if not line: 114 continue 115 116 # Check if a new dependency will be described. 117 if re.match(PATTERN_DEPENDENCY_DIVIDER, line): 118 # Save the metadata for the previous dependency. 119 if metadata: 120 dependencies.append(metadata) 121 metadata = {} 122 continue 123 124 # Otherwise, try to parse the field name and field value. 125 parts = line.split(": ", 1) 126 if len(parts) == 2: 127 raw_field, value = parts 128 field = field_lookup.get(raw_field.lower()) 129 if field: 130 if field in metadata: 131 # Duplicate field for this dependency. 132 raise InvalidMetadata(f"duplicate '{field}' in {readme_path}") 133 if field in MULTIVALUE_FIELDS: 134 metadata[field] = [ 135 entry.strip() for entry in value.split(VALUE_DELIMITER) 136 ] 137 else: 138 metadata[field] = value 139 140 # The end of the file has been reached. Save the metadata for the 141 # last dependency, if available. 142 if metadata: 143 dependencies.append(metadata) 144 145 if len(dependencies) == 0: 146 raise Exception( 147 f"Failed to parse any valid metadata from \"{readme_path}\"") 148 149 try: 150 if post_process_operation is None: 151 post_process_operation = constants.POST_PROCESS_OPERATION.get(readme_path, 152 lambda 153 _metadata: _metadata) 154 metadata = Metadata(post_process_operation(dependencies[0])) 155 except MapperException: 156 raise Exception(f"Failed to post-process f{readme_path}") 157 158 for license in metadata.get_licenses(): 159 if not license in _RAW_LICENSE_TO_FORMATTED_DETAILS: 160 raise InvalidMetadata( 161 f"\"{readme_path}\" contains unidentified license \"{license}\"") 162 if not metadata.get_license_file_path(): 163 raise InvalidMetadata(f"License file path not declared in {readme_path}") 164 return metadata 165 166 167def resolve_license_path(readme_chromium_path: str, 168 license_path: str) -> str: 169 """ 170 Resolves the relative path from the repository root to the license file. 171 172 :param readme_chromium_path: Relative path to the README.chromium starting 173 from the root of the repository. 174 :param license_path: The field value of `License File` in the README.chromium. 175 If the value of the license_path starts with `//` then that means that the 176 license file path is already relative from the repo path. Otherwise, it is 177 assumed that the provided path is relative from the README.chromium path. 178 :return: The relative path from the repository root to the declared license 179 file. 180 """ 181 if license_path.startswith("//"): 182 # This is an relative path that starts from the root of external/cronet 183 # repository, we should not use the directory path for resolution here. 184 # See https://source.chromium.org/chromium/chromium/src/+/main:third_party/rust/bytes/v1/README.chromium as 185 # an example of such case. 186 return license_path[2:] 187 # Relative path from the README.chromium, append the path from root of repo 188 # until the README.chromium so it becomes a relative path from the root of 189 # repo. 190 return os.path.join(readme_chromium_path, license_path) 191