xref: /aosp_15_r20/external/google-cloud-java/owl-bot-postprocessor/synthtool/sources/git.py (revision 55e87721aa1bc457b326496a7ca40f3ea1a63287)
1# Copyright 2018 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import os
16import pathlib
17import re
18import shutil
19import subprocess
20from typing import Dict, Optional, Tuple, Union
21
22import synthtool
23import synthtool.preconfig
24from synthtool.log import logger
25from synthtool import _tracked_paths, cache, metadata, shell
26
27REPO_REGEX = (
28    r"(((https:\/\/)|(git@))github.com(:|\/))?(?P<owner>[^\/]+)\/(?P<name>[^\/]+)"
29)
30
31USE_SSH = os.environ.get("AUTOSYNTH_USE_SSH", False)
32
33
34def make_repo_clone_url(repo: str) -> str:
35    """Returns a fully-qualified repo URL on GitHub from a string containing
36    "owner/repo".
37
38    This returns an https URL by default, but will return an ssh URL if
39    AUTOSYNTH_USE_SSH is set.
40    """
41    if USE_SSH:
42        return f"[email protected]:{repo}.git"
43    else:
44        return f"https://github.com/{repo}.git"
45
46
47def _local_default_branch(path: pathlib.Path) -> Union[str, None]:
48    """Helper method to infer the default branch.
49
50    Sorts the list of branches by committerdate (latest is last) and then
51    returns the later of master or main. The order of branches that are tied
52    by committerdate is undefined.
53
54    Arguments:
55        path {pathlib.Path} - Path to the local git clone
56
57    Returns:
58        string -- The inferred default branch.
59    """
60    branches = (
61        subprocess.check_output(
62            ["git", "branch", "--sort=-committerdate", "--format=%(refname:short)"],
63            cwd=str(path),
64        )
65        .decode("utf-8")
66        .splitlines()
67    )
68    for branch in branches:
69        if branch == "master" or branch == "main":
70            return branch
71    return None
72
73
74def clone(
75    url: str,
76    dest: pathlib.Path = None,
77    committish: str = None,
78    force: bool = False,
79) -> pathlib.Path:
80    """Clones a remote git repo.
81
82    Will not actually clone the repo if it's already local via two ways:
83      1. It's in the cache (the default destitination).
84      2. It was supplied via the preconfig file.
85
86    Arguments:
87        url {str} -- Url pointing to remote git repo.
88
89    Keyword Arguments:
90        dest {pathlib.Path} -- Local folder where repo should be cloned. (default: {None})
91        committish {str} -- The commit hash to check out. (default: {None})
92        force {bool} -- Wipe out and reclone if it already exists it the cache. (default: {False})
93
94    Returns:
95        pathlib.Path -- Local directory where the repo was cloned.
96    """
97    preclone = get_preclone(url)
98
99    if preclone:
100        logger.debug(f"Using precloned repo {preclone}")
101        dest = pathlib.Path(preclone)
102    else:
103        if dest is None:
104            dest = cache.get_cache_dir()
105
106        dest = dest / pathlib.Path(url).stem
107
108        if force and dest.exists():
109            shutil.rmtree(dest)
110
111        default_branch = None
112        if not dest.exists():
113            cmd = ["git", "clone", "--recurse-submodules", "--single-branch", url, dest]
114            shell.run(cmd, check=True)
115        else:
116            default_branch = _local_default_branch(dest)
117            shell.run(["git", "checkout", default_branch], cwd=str(dest), check=True)
118            shell.run(["git", "pull"], cwd=str(dest), check=True)
119        committish = committish or default_branch
120
121    if committish:
122        shell.run(["git", "reset", "--hard", committish], cwd=str(dest))
123
124    # track all git repositories
125    _tracked_paths.add(dest)
126
127    # add repo to metadata
128    sha, message = get_latest_commit(dest)
129    commit_metadata = extract_commit_message_metadata(message)
130
131    metadata.add_git_source(
132        name=dest.name,
133        remote=url,
134        sha=sha,
135        internal_ref=commit_metadata.get("PiperOrigin-RevId"),
136        local_path=str(dest),
137    )
138
139    return dest
140
141
142def parse_repo_url(url: str) -> Dict[str, str]:
143    """
144    Parses a GitHub url and returns a dict with:
145        owner - Owner of the repository
146        name  - Name of the repository
147
148    The following are matchable:
149        googleapis/nodejs-vision(.git)?
150        [email protected]:GoogleCloudPlatform/google-cloud-python.git
151        https://github.com/GoogleCloudPlatform/google-cloud-python.git
152    """
153    match = re.search(REPO_REGEX, url)
154
155    if not match:
156        raise RuntimeError("repository url is not a properly formatted git string.")
157
158    owner = match.group("owner")
159    name = match.group("name")
160
161    if name.endswith(".git"):
162        name = name[:-4]
163
164    return {"owner": owner, "name": name}
165
166
167def get_latest_commit(repo: pathlib.Path = None) -> Tuple[str, str]:
168    """Return the sha and commit message of the latest commit."""
169    output = subprocess.check_output(
170        ["git", "log", "-1", "--pretty=%H%n%B"], cwd=repo
171    ).decode("utf-8")
172    commit, message = output.split("\n", 1)
173    return commit, message
174
175
176def extract_commit_message_metadata(message: str) -> Dict[str, str]:
177    """Extract extended metadata stored in the Git commit message.
178
179    For example, a commit that looks like this::
180
181        Do the thing!
182
183        Piper-Changelog: 1234567
184
185    Will return::
186
187        {"Piper-Changelog": "1234567"}
188
189    """
190    metadata = {}
191    for line in message.splitlines():
192        if ":" not in line:
193            continue
194
195        key, value = line.split(":", 1)
196        metadata[key] = value.strip()
197
198    return metadata
199
200
201def get_preclone(url: str) -> Optional[str]:
202    """Finds a pre-cloned git repo in the preclone map."""
203    preconfig = synthtool.preconfig.load()
204    return preconfig.precloned_repos.get(url)
205