xref: /aosp_15_r20/external/google-cloud-java/owl-bot-postprocessor/synthtool/sources/git.py (revision 55e87721aa1bc457b326496a7ca40f3ea1a63287)
1*55e87721SMatt Gilbride# Copyright 2018 Google LLC
2*55e87721SMatt Gilbride#
3*55e87721SMatt Gilbride# Licensed under the Apache License, Version 2.0 (the "License");
4*55e87721SMatt Gilbride# you may not use this file except in compliance with the License.
5*55e87721SMatt Gilbride# You may obtain a copy of the License at
6*55e87721SMatt Gilbride#
7*55e87721SMatt Gilbride#     https://www.apache.org/licenses/LICENSE-2.0
8*55e87721SMatt Gilbride#
9*55e87721SMatt Gilbride# Unless required by applicable law or agreed to in writing, software
10*55e87721SMatt Gilbride# distributed under the License is distributed on an "AS IS" BASIS,
11*55e87721SMatt Gilbride# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*55e87721SMatt Gilbride# See the License for the specific language governing permissions and
13*55e87721SMatt Gilbride# limitations under the License.
14*55e87721SMatt Gilbride
15*55e87721SMatt Gilbrideimport os
16*55e87721SMatt Gilbrideimport pathlib
17*55e87721SMatt Gilbrideimport re
18*55e87721SMatt Gilbrideimport shutil
19*55e87721SMatt Gilbrideimport subprocess
20*55e87721SMatt Gilbridefrom typing import Dict, Optional, Tuple, Union
21*55e87721SMatt Gilbride
22*55e87721SMatt Gilbrideimport synthtool
23*55e87721SMatt Gilbrideimport synthtool.preconfig
24*55e87721SMatt Gilbridefrom synthtool.log import logger
25*55e87721SMatt Gilbridefrom synthtool import _tracked_paths, cache, metadata, shell
26*55e87721SMatt Gilbride
27*55e87721SMatt GilbrideREPO_REGEX = (
28*55e87721SMatt Gilbride    r"(((https:\/\/)|(git@))github.com(:|\/))?(?P<owner>[^\/]+)\/(?P<name>[^\/]+)"
29*55e87721SMatt Gilbride)
30*55e87721SMatt Gilbride
31*55e87721SMatt GilbrideUSE_SSH = os.environ.get("AUTOSYNTH_USE_SSH", False)
32*55e87721SMatt Gilbride
33*55e87721SMatt Gilbride
34*55e87721SMatt Gilbridedef make_repo_clone_url(repo: str) -> str:
35*55e87721SMatt Gilbride    """Returns a fully-qualified repo URL on GitHub from a string containing
36*55e87721SMatt Gilbride    "owner/repo".
37*55e87721SMatt Gilbride
38*55e87721SMatt Gilbride    This returns an https URL by default, but will return an ssh URL if
39*55e87721SMatt Gilbride    AUTOSYNTH_USE_SSH is set.
40*55e87721SMatt Gilbride    """
41*55e87721SMatt Gilbride    if USE_SSH:
42*55e87721SMatt Gilbride        return f"[email protected]:{repo}.git"
43*55e87721SMatt Gilbride    else:
44*55e87721SMatt Gilbride        return f"https://github.com/{repo}.git"
45*55e87721SMatt Gilbride
46*55e87721SMatt Gilbride
47*55e87721SMatt Gilbridedef _local_default_branch(path: pathlib.Path) -> Union[str, None]:
48*55e87721SMatt Gilbride    """Helper method to infer the default branch.
49*55e87721SMatt Gilbride
50*55e87721SMatt Gilbride    Sorts the list of branches by committerdate (latest is last) and then
51*55e87721SMatt Gilbride    returns the later of master or main. The order of branches that are tied
52*55e87721SMatt Gilbride    by committerdate is undefined.
53*55e87721SMatt Gilbride
54*55e87721SMatt Gilbride    Arguments:
55*55e87721SMatt Gilbride        path {pathlib.Path} - Path to the local git clone
56*55e87721SMatt Gilbride
57*55e87721SMatt Gilbride    Returns:
58*55e87721SMatt Gilbride        string -- The inferred default branch.
59*55e87721SMatt Gilbride    """
60*55e87721SMatt Gilbride    branches = (
61*55e87721SMatt Gilbride        subprocess.check_output(
62*55e87721SMatt Gilbride            ["git", "branch", "--sort=-committerdate", "--format=%(refname:short)"],
63*55e87721SMatt Gilbride            cwd=str(path),
64*55e87721SMatt Gilbride        )
65*55e87721SMatt Gilbride        .decode("utf-8")
66*55e87721SMatt Gilbride        .splitlines()
67*55e87721SMatt Gilbride    )
68*55e87721SMatt Gilbride    for branch in branches:
69*55e87721SMatt Gilbride        if branch == "master" or branch == "main":
70*55e87721SMatt Gilbride            return branch
71*55e87721SMatt Gilbride    return None
72*55e87721SMatt Gilbride
73*55e87721SMatt Gilbride
74*55e87721SMatt Gilbridedef clone(
75*55e87721SMatt Gilbride    url: str,
76*55e87721SMatt Gilbride    dest: pathlib.Path = None,
77*55e87721SMatt Gilbride    committish: str = None,
78*55e87721SMatt Gilbride    force: bool = False,
79*55e87721SMatt Gilbride) -> pathlib.Path:
80*55e87721SMatt Gilbride    """Clones a remote git repo.
81*55e87721SMatt Gilbride
82*55e87721SMatt Gilbride    Will not actually clone the repo if it's already local via two ways:
83*55e87721SMatt Gilbride      1. It's in the cache (the default destitination).
84*55e87721SMatt Gilbride      2. It was supplied via the preconfig file.
85*55e87721SMatt Gilbride
86*55e87721SMatt Gilbride    Arguments:
87*55e87721SMatt Gilbride        url {str} -- Url pointing to remote git repo.
88*55e87721SMatt Gilbride
89*55e87721SMatt Gilbride    Keyword Arguments:
90*55e87721SMatt Gilbride        dest {pathlib.Path} -- Local folder where repo should be cloned. (default: {None})
91*55e87721SMatt Gilbride        committish {str} -- The commit hash to check out. (default: {None})
92*55e87721SMatt Gilbride        force {bool} -- Wipe out and reclone if it already exists it the cache. (default: {False})
93*55e87721SMatt Gilbride
94*55e87721SMatt Gilbride    Returns:
95*55e87721SMatt Gilbride        pathlib.Path -- Local directory where the repo was cloned.
96*55e87721SMatt Gilbride    """
97*55e87721SMatt Gilbride    preclone = get_preclone(url)
98*55e87721SMatt Gilbride
99*55e87721SMatt Gilbride    if preclone:
100*55e87721SMatt Gilbride        logger.debug(f"Using precloned repo {preclone}")
101*55e87721SMatt Gilbride        dest = pathlib.Path(preclone)
102*55e87721SMatt Gilbride    else:
103*55e87721SMatt Gilbride        if dest is None:
104*55e87721SMatt Gilbride            dest = cache.get_cache_dir()
105*55e87721SMatt Gilbride
106*55e87721SMatt Gilbride        dest = dest / pathlib.Path(url).stem
107*55e87721SMatt Gilbride
108*55e87721SMatt Gilbride        if force and dest.exists():
109*55e87721SMatt Gilbride            shutil.rmtree(dest)
110*55e87721SMatt Gilbride
111*55e87721SMatt Gilbride        default_branch = None
112*55e87721SMatt Gilbride        if not dest.exists():
113*55e87721SMatt Gilbride            cmd = ["git", "clone", "--recurse-submodules", "--single-branch", url, dest]
114*55e87721SMatt Gilbride            shell.run(cmd, check=True)
115*55e87721SMatt Gilbride        else:
116*55e87721SMatt Gilbride            default_branch = _local_default_branch(dest)
117*55e87721SMatt Gilbride            shell.run(["git", "checkout", default_branch], cwd=str(dest), check=True)
118*55e87721SMatt Gilbride            shell.run(["git", "pull"], cwd=str(dest), check=True)
119*55e87721SMatt Gilbride        committish = committish or default_branch
120*55e87721SMatt Gilbride
121*55e87721SMatt Gilbride    if committish:
122*55e87721SMatt Gilbride        shell.run(["git", "reset", "--hard", committish], cwd=str(dest))
123*55e87721SMatt Gilbride
124*55e87721SMatt Gilbride    # track all git repositories
125*55e87721SMatt Gilbride    _tracked_paths.add(dest)
126*55e87721SMatt Gilbride
127*55e87721SMatt Gilbride    # add repo to metadata
128*55e87721SMatt Gilbride    sha, message = get_latest_commit(dest)
129*55e87721SMatt Gilbride    commit_metadata = extract_commit_message_metadata(message)
130*55e87721SMatt Gilbride
131*55e87721SMatt Gilbride    metadata.add_git_source(
132*55e87721SMatt Gilbride        name=dest.name,
133*55e87721SMatt Gilbride        remote=url,
134*55e87721SMatt Gilbride        sha=sha,
135*55e87721SMatt Gilbride        internal_ref=commit_metadata.get("PiperOrigin-RevId"),
136*55e87721SMatt Gilbride        local_path=str(dest),
137*55e87721SMatt Gilbride    )
138*55e87721SMatt Gilbride
139*55e87721SMatt Gilbride    return dest
140*55e87721SMatt Gilbride
141*55e87721SMatt Gilbride
142*55e87721SMatt Gilbridedef parse_repo_url(url: str) -> Dict[str, str]:
143*55e87721SMatt Gilbride    """
144*55e87721SMatt Gilbride    Parses a GitHub url and returns a dict with:
145*55e87721SMatt Gilbride        owner - Owner of the repository
146*55e87721SMatt Gilbride        name  - Name of the repository
147*55e87721SMatt Gilbride
148*55e87721SMatt Gilbride    The following are matchable:
149*55e87721SMatt Gilbride        googleapis/nodejs-vision(.git)?
150*55e87721SMatt Gilbride        [email protected]:GoogleCloudPlatform/google-cloud-python.git
151*55e87721SMatt Gilbride        https://github.com/GoogleCloudPlatform/google-cloud-python.git
152*55e87721SMatt Gilbride    """
153*55e87721SMatt Gilbride    match = re.search(REPO_REGEX, url)
154*55e87721SMatt Gilbride
155*55e87721SMatt Gilbride    if not match:
156*55e87721SMatt Gilbride        raise RuntimeError("repository url is not a properly formatted git string.")
157*55e87721SMatt Gilbride
158*55e87721SMatt Gilbride    owner = match.group("owner")
159*55e87721SMatt Gilbride    name = match.group("name")
160*55e87721SMatt Gilbride
161*55e87721SMatt Gilbride    if name.endswith(".git"):
162*55e87721SMatt Gilbride        name = name[:-4]
163*55e87721SMatt Gilbride
164*55e87721SMatt Gilbride    return {"owner": owner, "name": name}
165*55e87721SMatt Gilbride
166*55e87721SMatt Gilbride
167*55e87721SMatt Gilbridedef get_latest_commit(repo: pathlib.Path = None) -> Tuple[str, str]:
168*55e87721SMatt Gilbride    """Return the sha and commit message of the latest commit."""
169*55e87721SMatt Gilbride    output = subprocess.check_output(
170*55e87721SMatt Gilbride        ["git", "log", "-1", "--pretty=%H%n%B"], cwd=repo
171*55e87721SMatt Gilbride    ).decode("utf-8")
172*55e87721SMatt Gilbride    commit, message = output.split("\n", 1)
173*55e87721SMatt Gilbride    return commit, message
174*55e87721SMatt Gilbride
175*55e87721SMatt Gilbride
176*55e87721SMatt Gilbridedef extract_commit_message_metadata(message: str) -> Dict[str, str]:
177*55e87721SMatt Gilbride    """Extract extended metadata stored in the Git commit message.
178*55e87721SMatt Gilbride
179*55e87721SMatt Gilbride    For example, a commit that looks like this::
180*55e87721SMatt Gilbride
181*55e87721SMatt Gilbride        Do the thing!
182*55e87721SMatt Gilbride
183*55e87721SMatt Gilbride        Piper-Changelog: 1234567
184*55e87721SMatt Gilbride
185*55e87721SMatt Gilbride    Will return::
186*55e87721SMatt Gilbride
187*55e87721SMatt Gilbride        {"Piper-Changelog": "1234567"}
188*55e87721SMatt Gilbride
189*55e87721SMatt Gilbride    """
190*55e87721SMatt Gilbride    metadata = {}
191*55e87721SMatt Gilbride    for line in message.splitlines():
192*55e87721SMatt Gilbride        if ":" not in line:
193*55e87721SMatt Gilbride            continue
194*55e87721SMatt Gilbride
195*55e87721SMatt Gilbride        key, value = line.split(":", 1)
196*55e87721SMatt Gilbride        metadata[key] = value.strip()
197*55e87721SMatt Gilbride
198*55e87721SMatt Gilbride    return metadata
199*55e87721SMatt Gilbride
200*55e87721SMatt Gilbride
201*55e87721SMatt Gilbridedef get_preclone(url: str) -> Optional[str]:
202*55e87721SMatt Gilbride    """Finds a pre-cloned git repo in the preclone map."""
203*55e87721SMatt Gilbride    preconfig = synthtool.preconfig.load()
204*55e87721SMatt Gilbride    return preconfig.precloned_repos.get(url)
205