1# Copyright 2022 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14"""Check various rules for .gitmodules files.""" 15 16import dataclasses 17import logging 18from pathlib import Path 19from typing import Callable, Sequence 20import urllib.parse 21 22from pw_cli.plural import plural 23from pw_presubmit.presubmit import filter_paths 24from pw_presubmit.presubmit_context import ( 25 PresubmitContext, 26 PresubmitFailure, 27) 28from pw_presubmit import git_repo, presubmit_context 29 30 31_LOG: logging.Logger = logging.getLogger(__name__) 32 33 34@dataclasses.dataclass 35class Config: 36 # Allow submodules to exist in any form. 37 allow_submodules: bool = True 38 39 # Allow direct references to non-Google hosts. 40 allow_non_googlesource_hosts: bool = False 41 42 # Allow a specific subset of googlesource.com hosts. If an empty list then 43 # all googlesource hosts are permitted. 44 allowed_googlesource_hosts: Sequence[str] = () 45 46 # Require relative URLs, like those that start with "/" or "../". 47 require_relative_urls: bool = False 48 49 # Allow "sso://" URLs. 50 allow_sso: bool = True 51 52 # Allow use of "git.corp.google.com" URLs. 53 allow_git_corp_google_com: bool = True 54 55 # Require a branch for each submodule. 56 require_branch: bool = False 57 58 # Arbitrary validator. Gets invoked with the submodule name and a dict of 59 # the submodule properties. Should throw exceptions or call ctx.fail to 60 # register errors. 61 validator: ( 62 Callable[[PresubmitContext, Path, str, dict[str, str]], None] | None 63 ) = None 64 65 66def _parse_gitmodules(path: Path) -> dict[str, dict[str, str]]: 67 raw_submodules: str = git_repo.git_stdout( 68 'config', '--file', path, '--list' 69 ) 70 submodules: dict[str, dict[str, str]] = {} 71 for line in raw_submodules.splitlines(): 72 key: str 73 value: str 74 key, value = line.split('=', 1) 75 if not key.startswith('submodule.'): 76 raise PresubmitFailure(f'unexpected key {key!r}', path) 77 key = key.split('.', 1)[1] 78 79 submodule: str 80 param: str 81 submodule, param = key.rsplit('.', 1) 82 83 submodules.setdefault(submodule, {}) 84 submodules[submodule][param] = value 85 86 return submodules 87 88 89_GERRIT_HOST_SUFFIXES = ('.googlesource.com', '.git.corp.google.com') 90 91 92def process_gitmodules(ctx: PresubmitContext, config: Config, path: Path): 93 """Check if a specific .gitmodules file passes the options in the config.""" 94 _LOG.debug('Evaluating path %s', path) 95 submodules: dict[str, dict[str, str]] = _parse_gitmodules(path) 96 97 if submodules and not config.allow_submodules: 98 ctx.fail( 99 f'submodules are not permitted but ' 100 f'{plural(submodules, "submodule", exist=True)} {tuple(submodules)}' 101 ) 102 103 assert isinstance(config.allowed_googlesource_hosts, (list, tuple)) 104 for allowed in config.allowed_googlesource_hosts: 105 if '.' in allowed or '-review' in allowed: 106 raise PresubmitFailure( 107 f'invalid googlesource requirement: {allowed}' 108 ) 109 110 for name, submodule in submodules.items(): 111 _LOG.debug('======================') 112 _LOG.debug('evaluating submodule %s', name) 113 _LOG.debug('%r', submodule) 114 115 if config.require_branch: 116 _LOG.debug('branch is required') 117 if 'branch' not in submodule: 118 ctx.fail( 119 f'submodule {name} does not have a branch set but ' 120 'branches are required' 121 ) 122 123 url = submodule['url'] 124 125 if config.validator: 126 config.validator(ctx, path, name, submodule) 127 128 if url.startswith(('/', '../')): 129 _LOG.debug('URL is relative, remaining checks are irrelevant') 130 continue 131 132 if config.require_relative_urls: 133 _LOG.debug('relative URLs required') 134 ctx.fail( 135 f'submodule {name} has non-relative url {url!r} but ' 136 'relative urls are required' 137 ) 138 continue 139 140 parsed = urllib.parse.urlparse(url) 141 142 if not config.allow_sso: 143 _LOG.debug('sso not allowed') 144 if parsed.scheme in ('sso', 'rpc'): 145 ctx.fail( 146 f'submodule {name} has sso/rpc url {url!r} but ' 147 'sso/rpc urls are not allowed' 148 ) 149 continue 150 151 if not config.allow_git_corp_google_com: 152 _LOG.debug('git.corp.google.com not allowed') 153 if '.git.corp.google.com' in parsed.netloc: 154 ctx.fail( 155 f'submodule {name} has git.corp.google.com url ' 156 f'{url!r} but git.corp.google.com urls are not ' 157 'allowed' 158 ) 159 continue 160 161 if not config.allow_non_googlesource_hosts: 162 _LOG.debug('non-google hosted repos not allowed') 163 if parsed.scheme not in ( 164 'sso', 165 'rpc', 166 ) and not parsed.netloc.endswith(_GERRIT_HOST_SUFFIXES): 167 ctx.fail( 168 f'submodule {name} has prohibited non-Google url ' f'{url}' 169 ) 170 continue 171 172 if config.allowed_googlesource_hosts: 173 _LOG.debug( 174 'allowed googlesource hosts: %r', 175 config.allowed_googlesource_hosts, 176 ) 177 _LOG.debug('raw url: %s', url) 178 host = parsed.netloc 179 if host.endswith(_GERRIT_HOST_SUFFIXES) or parsed.scheme in ( 180 'sso', 181 'rpc', 182 ): 183 for suffix in _GERRIT_HOST_SUFFIXES: 184 host = host.replace(suffix, '') 185 _LOG.debug('host: %s', host) 186 if host not in config.allowed_googlesource_hosts: 187 ctx.fail( 188 f'submodule {name} is from prohibited Google ' 189 f'Gerrit host {parsed.netloc}' 190 ) 191 continue 192 193 194def create(config: Config = Config()): 195 """Create a gitmodules presubmit step with a given config.""" 196 197 @filter_paths(endswith='.gitmodules') 198 def gitmodules(ctx: PresubmitContext): 199 """Check various rules for .gitmodules files.""" 200 ctx.paths = presubmit_context.apply_exclusions(ctx) 201 202 for path in ctx.paths: 203 process_gitmodules(ctx, config, path) 204 205 return gitmodules 206