1# Copyright 2018 Google LLC 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15import os 16import pathlib 17import re 18import shutil 19import subprocess 20from typing import Dict, Optional, Tuple, Union 21 22import synthtool 23import synthtool.preconfig 24from synthtool.log import logger 25from synthtool import _tracked_paths, cache, metadata, shell 26 27REPO_REGEX = ( 28 r"(((https:\/\/)|(git@))github.com(:|\/))?(?P<owner>[^\/]+)\/(?P<name>[^\/]+)" 29) 30 31USE_SSH = os.environ.get("AUTOSYNTH_USE_SSH", False) 32 33 34def make_repo_clone_url(repo: str) -> str: 35 """Returns a fully-qualified repo URL on GitHub from a string containing 36 "owner/repo". 37 38 This returns an https URL by default, but will return an ssh URL if 39 AUTOSYNTH_USE_SSH is set. 40 """ 41 if USE_SSH: 42 return f"[email protected]:{repo}.git" 43 else: 44 return f"https://github.com/{repo}.git" 45 46 47def _local_default_branch(path: pathlib.Path) -> Union[str, None]: 48 """Helper method to infer the default branch. 49 50 Sorts the list of branches by committerdate (latest is last) and then 51 returns the later of master or main. The order of branches that are tied 52 by committerdate is undefined. 53 54 Arguments: 55 path {pathlib.Path} - Path to the local git clone 56 57 Returns: 58 string -- The inferred default branch. 59 """ 60 branches = ( 61 subprocess.check_output( 62 ["git", "branch", "--sort=-committerdate", "--format=%(refname:short)"], 63 cwd=str(path), 64 ) 65 .decode("utf-8") 66 .splitlines() 67 ) 68 for branch in branches: 69 if branch == "master" or branch == "main": 70 return branch 71 return None 72 73 74def clone( 75 url: str, 76 dest: pathlib.Path = None, 77 committish: str = None, 78 force: bool = False, 79) -> pathlib.Path: 80 """Clones a remote git repo. 81 82 Will not actually clone the repo if it's already local via two ways: 83 1. It's in the cache (the default destitination). 84 2. It was supplied via the preconfig file. 85 86 Arguments: 87 url {str} -- Url pointing to remote git repo. 88 89 Keyword Arguments: 90 dest {pathlib.Path} -- Local folder where repo should be cloned. (default: {None}) 91 committish {str} -- The commit hash to check out. (default: {None}) 92 force {bool} -- Wipe out and reclone if it already exists it the cache. (default: {False}) 93 94 Returns: 95 pathlib.Path -- Local directory where the repo was cloned. 96 """ 97 preclone = get_preclone(url) 98 99 if preclone: 100 logger.debug(f"Using precloned repo {preclone}") 101 dest = pathlib.Path(preclone) 102 else: 103 if dest is None: 104 dest = cache.get_cache_dir() 105 106 dest = dest / pathlib.Path(url).stem 107 108 if force and dest.exists(): 109 shutil.rmtree(dest) 110 111 default_branch = None 112 if not dest.exists(): 113 cmd = ["git", "clone", "--recurse-submodules", "--single-branch", url, dest] 114 shell.run(cmd, check=True) 115 else: 116 default_branch = _local_default_branch(dest) 117 shell.run(["git", "checkout", default_branch], cwd=str(dest), check=True) 118 shell.run(["git", "pull"], cwd=str(dest), check=True) 119 committish = committish or default_branch 120 121 if committish: 122 shell.run(["git", "reset", "--hard", committish], cwd=str(dest)) 123 124 # track all git repositories 125 _tracked_paths.add(dest) 126 127 # add repo to metadata 128 sha, message = get_latest_commit(dest) 129 commit_metadata = extract_commit_message_metadata(message) 130 131 metadata.add_git_source( 132 name=dest.name, 133 remote=url, 134 sha=sha, 135 internal_ref=commit_metadata.get("PiperOrigin-RevId"), 136 local_path=str(dest), 137 ) 138 139 return dest 140 141 142def parse_repo_url(url: str) -> Dict[str, str]: 143 """ 144 Parses a GitHub url and returns a dict with: 145 owner - Owner of the repository 146 name - Name of the repository 147 148 The following are matchable: 149 googleapis/nodejs-vision(.git)? 150 [email protected]:GoogleCloudPlatform/google-cloud-python.git 151 https://github.com/GoogleCloudPlatform/google-cloud-python.git 152 """ 153 match = re.search(REPO_REGEX, url) 154 155 if not match: 156 raise RuntimeError("repository url is not a properly formatted git string.") 157 158 owner = match.group("owner") 159 name = match.group("name") 160 161 if name.endswith(".git"): 162 name = name[:-4] 163 164 return {"owner": owner, "name": name} 165 166 167def get_latest_commit(repo: pathlib.Path = None) -> Tuple[str, str]: 168 """Return the sha and commit message of the latest commit.""" 169 output = subprocess.check_output( 170 ["git", "log", "-1", "--pretty=%H%n%B"], cwd=repo 171 ).decode("utf-8") 172 commit, message = output.split("\n", 1) 173 return commit, message 174 175 176def extract_commit_message_metadata(message: str) -> Dict[str, str]: 177 """Extract extended metadata stored in the Git commit message. 178 179 For example, a commit that looks like this:: 180 181 Do the thing! 182 183 Piper-Changelog: 1234567 184 185 Will return:: 186 187 {"Piper-Changelog": "1234567"} 188 189 """ 190 metadata = {} 191 for line in message.splitlines(): 192 if ":" not in line: 193 continue 194 195 key, value = line.split(":", 1) 196 metadata[key] = value.strip() 197 198 return metadata 199 200 201def get_preclone(url: str) -> Optional[str]: 202 """Finds a pre-cloned git repo in the preclone map.""" 203 preconfig = synthtool.preconfig.load() 204 return preconfig.precloned_repos.get(url) 205