xref: /aosp_15_r20/external/pigweed/pw_presubmit/py/pw_presubmit/gitmodules.py (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1# Copyright 2022 The Pigweed Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may not
4# use this file except in compliance with the License. You may obtain a copy of
5# the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations under
13# the License.
14"""Check various rules for .gitmodules files."""
15
16import dataclasses
17import logging
18from pathlib import Path
19from typing import Callable, Sequence
20import urllib.parse
21
22from pw_cli.plural import plural
23from pw_presubmit.presubmit import filter_paths
24from pw_presubmit.presubmit_context import (
25    PresubmitContext,
26    PresubmitFailure,
27)
28from pw_presubmit import git_repo, presubmit_context
29
30
31_LOG: logging.Logger = logging.getLogger(__name__)
32
33
34@dataclasses.dataclass
35class Config:
36    # Allow submodules to exist in any form.
37    allow_submodules: bool = True
38
39    # Allow direct references to non-Google hosts.
40    allow_non_googlesource_hosts: bool = False
41
42    # Allow a specific subset of googlesource.com hosts. If an empty list then
43    # all googlesource hosts are permitted.
44    allowed_googlesource_hosts: Sequence[str] = ()
45
46    # Require relative URLs, like those that start with "/" or "../".
47    require_relative_urls: bool = False
48
49    # Allow "sso://" URLs.
50    allow_sso: bool = True
51
52    # Allow use of "git.corp.google.com" URLs.
53    allow_git_corp_google_com: bool = True
54
55    # Require a branch for each submodule.
56    require_branch: bool = False
57
58    # Arbitrary validator. Gets invoked with the submodule name and a dict of
59    # the submodule properties. Should throw exceptions or call ctx.fail to
60    # register errors.
61    validator: (
62        Callable[[PresubmitContext, Path, str, dict[str, str]], None] | None
63    ) = None
64
65
66def _parse_gitmodules(path: Path) -> dict[str, dict[str, str]]:
67    raw_submodules: str = git_repo.git_stdout(
68        'config', '--file', path, '--list'
69    )
70    submodules: dict[str, dict[str, str]] = {}
71    for line in raw_submodules.splitlines():
72        key: str
73        value: str
74        key, value = line.split('=', 1)
75        if not key.startswith('submodule.'):
76            raise PresubmitFailure(f'unexpected key {key!r}', path)
77        key = key.split('.', 1)[1]
78
79        submodule: str
80        param: str
81        submodule, param = key.rsplit('.', 1)
82
83        submodules.setdefault(submodule, {})
84        submodules[submodule][param] = value
85
86    return submodules
87
88
89_GERRIT_HOST_SUFFIXES = ('.googlesource.com', '.git.corp.google.com')
90
91
92def process_gitmodules(ctx: PresubmitContext, config: Config, path: Path):
93    """Check if a specific .gitmodules file passes the options in the config."""
94    _LOG.debug('Evaluating path %s', path)
95    submodules: dict[str, dict[str, str]] = _parse_gitmodules(path)
96
97    if submodules and not config.allow_submodules:
98        ctx.fail(
99            f'submodules are not permitted but '
100            f'{plural(submodules, "submodule", exist=True)} {tuple(submodules)}'
101        )
102
103    assert isinstance(config.allowed_googlesource_hosts, (list, tuple))
104    for allowed in config.allowed_googlesource_hosts:
105        if '.' in allowed or '-review' in allowed:
106            raise PresubmitFailure(
107                f'invalid googlesource requirement: {allowed}'
108            )
109
110    for name, submodule in submodules.items():
111        _LOG.debug('======================')
112        _LOG.debug('evaluating submodule %s', name)
113        _LOG.debug('%r', submodule)
114
115        if config.require_branch:
116            _LOG.debug('branch is required')
117            if 'branch' not in submodule:
118                ctx.fail(
119                    f'submodule {name} does not have a branch set but '
120                    'branches are required'
121                )
122
123        url = submodule['url']
124
125        if config.validator:
126            config.validator(ctx, path, name, submodule)
127
128        if url.startswith(('/', '../')):
129            _LOG.debug('URL is relative, remaining checks are irrelevant')
130            continue
131
132        if config.require_relative_urls:
133            _LOG.debug('relative URLs required')
134            ctx.fail(
135                f'submodule {name} has non-relative url {url!r} but '
136                'relative urls are required'
137            )
138            continue
139
140        parsed = urllib.parse.urlparse(url)
141
142        if not config.allow_sso:
143            _LOG.debug('sso not allowed')
144            if parsed.scheme in ('sso', 'rpc'):
145                ctx.fail(
146                    f'submodule {name} has sso/rpc url {url!r} but '
147                    'sso/rpc urls are not allowed'
148                )
149                continue
150
151        if not config.allow_git_corp_google_com:
152            _LOG.debug('git.corp.google.com not allowed')
153            if '.git.corp.google.com' in parsed.netloc:
154                ctx.fail(
155                    f'submodule {name} has git.corp.google.com url '
156                    f'{url!r} but git.corp.google.com urls are not '
157                    'allowed'
158                )
159                continue
160
161        if not config.allow_non_googlesource_hosts:
162            _LOG.debug('non-google hosted repos not allowed')
163            if parsed.scheme not in (
164                'sso',
165                'rpc',
166            ) and not parsed.netloc.endswith(_GERRIT_HOST_SUFFIXES):
167                ctx.fail(
168                    f'submodule {name} has prohibited non-Google url ' f'{url}'
169                )
170                continue
171
172        if config.allowed_googlesource_hosts:
173            _LOG.debug(
174                'allowed googlesource hosts: %r',
175                config.allowed_googlesource_hosts,
176            )
177            _LOG.debug('raw url: %s', url)
178            host = parsed.netloc
179            if host.endswith(_GERRIT_HOST_SUFFIXES) or parsed.scheme in (
180                'sso',
181                'rpc',
182            ):
183                for suffix in _GERRIT_HOST_SUFFIXES:
184                    host = host.replace(suffix, '')
185                _LOG.debug('host: %s', host)
186                if host not in config.allowed_googlesource_hosts:
187                    ctx.fail(
188                        f'submodule {name} is from prohibited Google '
189                        f'Gerrit host {parsed.netloc}'
190                    )
191                    continue
192
193
194def create(config: Config = Config()):
195    """Create a gitmodules presubmit step with a given config."""
196
197    @filter_paths(endswith='.gitmodules')
198    def gitmodules(ctx: PresubmitContext):
199        """Check various rules for .gitmodules files."""
200        ctx.paths = presubmit_context.apply_exclusions(ctx)
201
202        for path in ctx.paths:
203            process_gitmodules(ctx, config, path)
204
205    return gitmodules
206