1*55e87721SMatt Gilbride# Copyright 2018 Google LLC 2*55e87721SMatt Gilbride# 3*55e87721SMatt Gilbride# Licensed under the Apache License, Version 2.0 (the "License"); 4*55e87721SMatt Gilbride# you may not use this file except in compliance with the License. 5*55e87721SMatt Gilbride# You may obtain a copy of the License at 6*55e87721SMatt Gilbride# 7*55e87721SMatt Gilbride# https://www.apache.org/licenses/LICENSE-2.0 8*55e87721SMatt Gilbride# 9*55e87721SMatt Gilbride# Unless required by applicable law or agreed to in writing, software 10*55e87721SMatt Gilbride# distributed under the License is distributed on an "AS IS" BASIS, 11*55e87721SMatt Gilbride# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*55e87721SMatt Gilbride# See the License for the specific language governing permissions and 13*55e87721SMatt Gilbride# limitations under the License. 14*55e87721SMatt Gilbride 15*55e87721SMatt Gilbrideimport os 16*55e87721SMatt Gilbrideimport pathlib 17*55e87721SMatt Gilbrideimport re 18*55e87721SMatt Gilbrideimport shutil 19*55e87721SMatt Gilbrideimport subprocess 20*55e87721SMatt Gilbridefrom typing import Dict, Optional, Tuple, Union 21*55e87721SMatt Gilbride 22*55e87721SMatt Gilbrideimport synthtool 23*55e87721SMatt Gilbrideimport synthtool.preconfig 24*55e87721SMatt Gilbridefrom synthtool.log import logger 25*55e87721SMatt Gilbridefrom synthtool import _tracked_paths, cache, metadata, shell 26*55e87721SMatt Gilbride 27*55e87721SMatt GilbrideREPO_REGEX = ( 28*55e87721SMatt Gilbride r"(((https:\/\/)|(git@))github.com(:|\/))?(?P<owner>[^\/]+)\/(?P<name>[^\/]+)" 29*55e87721SMatt Gilbride) 30*55e87721SMatt Gilbride 31*55e87721SMatt GilbrideUSE_SSH = os.environ.get("AUTOSYNTH_USE_SSH", False) 32*55e87721SMatt Gilbride 33*55e87721SMatt Gilbride 34*55e87721SMatt Gilbridedef make_repo_clone_url(repo: str) -> str: 35*55e87721SMatt Gilbride """Returns a fully-qualified repo URL on GitHub from a string containing 36*55e87721SMatt Gilbride "owner/repo". 37*55e87721SMatt Gilbride 38*55e87721SMatt Gilbride This returns an https URL by default, but will return an ssh URL if 39*55e87721SMatt Gilbride AUTOSYNTH_USE_SSH is set. 40*55e87721SMatt Gilbride """ 41*55e87721SMatt Gilbride if USE_SSH: 42*55e87721SMatt Gilbride return f"[email protected]:{repo}.git" 43*55e87721SMatt Gilbride else: 44*55e87721SMatt Gilbride return f"https://github.com/{repo}.git" 45*55e87721SMatt Gilbride 46*55e87721SMatt Gilbride 47*55e87721SMatt Gilbridedef _local_default_branch(path: pathlib.Path) -> Union[str, None]: 48*55e87721SMatt Gilbride """Helper method to infer the default branch. 49*55e87721SMatt Gilbride 50*55e87721SMatt Gilbride Sorts the list of branches by committerdate (latest is last) and then 51*55e87721SMatt Gilbride returns the later of master or main. The order of branches that are tied 52*55e87721SMatt Gilbride by committerdate is undefined. 53*55e87721SMatt Gilbride 54*55e87721SMatt Gilbride Arguments: 55*55e87721SMatt Gilbride path {pathlib.Path} - Path to the local git clone 56*55e87721SMatt Gilbride 57*55e87721SMatt Gilbride Returns: 58*55e87721SMatt Gilbride string -- The inferred default branch. 59*55e87721SMatt Gilbride """ 60*55e87721SMatt Gilbride branches = ( 61*55e87721SMatt Gilbride subprocess.check_output( 62*55e87721SMatt Gilbride ["git", "branch", "--sort=-committerdate", "--format=%(refname:short)"], 63*55e87721SMatt Gilbride cwd=str(path), 64*55e87721SMatt Gilbride ) 65*55e87721SMatt Gilbride .decode("utf-8") 66*55e87721SMatt Gilbride .splitlines() 67*55e87721SMatt Gilbride ) 68*55e87721SMatt Gilbride for branch in branches: 69*55e87721SMatt Gilbride if branch == "master" or branch == "main": 70*55e87721SMatt Gilbride return branch 71*55e87721SMatt Gilbride return None 72*55e87721SMatt Gilbride 73*55e87721SMatt Gilbride 74*55e87721SMatt Gilbridedef clone( 75*55e87721SMatt Gilbride url: str, 76*55e87721SMatt Gilbride dest: pathlib.Path = None, 77*55e87721SMatt Gilbride committish: str = None, 78*55e87721SMatt Gilbride force: bool = False, 79*55e87721SMatt Gilbride) -> pathlib.Path: 80*55e87721SMatt Gilbride """Clones a remote git repo. 81*55e87721SMatt Gilbride 82*55e87721SMatt Gilbride Will not actually clone the repo if it's already local via two ways: 83*55e87721SMatt Gilbride 1. It's in the cache (the default destitination). 84*55e87721SMatt Gilbride 2. It was supplied via the preconfig file. 85*55e87721SMatt Gilbride 86*55e87721SMatt Gilbride Arguments: 87*55e87721SMatt Gilbride url {str} -- Url pointing to remote git repo. 88*55e87721SMatt Gilbride 89*55e87721SMatt Gilbride Keyword Arguments: 90*55e87721SMatt Gilbride dest {pathlib.Path} -- Local folder where repo should be cloned. (default: {None}) 91*55e87721SMatt Gilbride committish {str} -- The commit hash to check out. (default: {None}) 92*55e87721SMatt Gilbride force {bool} -- Wipe out and reclone if it already exists it the cache. (default: {False}) 93*55e87721SMatt Gilbride 94*55e87721SMatt Gilbride Returns: 95*55e87721SMatt Gilbride pathlib.Path -- Local directory where the repo was cloned. 96*55e87721SMatt Gilbride """ 97*55e87721SMatt Gilbride preclone = get_preclone(url) 98*55e87721SMatt Gilbride 99*55e87721SMatt Gilbride if preclone: 100*55e87721SMatt Gilbride logger.debug(f"Using precloned repo {preclone}") 101*55e87721SMatt Gilbride dest = pathlib.Path(preclone) 102*55e87721SMatt Gilbride else: 103*55e87721SMatt Gilbride if dest is None: 104*55e87721SMatt Gilbride dest = cache.get_cache_dir() 105*55e87721SMatt Gilbride 106*55e87721SMatt Gilbride dest = dest / pathlib.Path(url).stem 107*55e87721SMatt Gilbride 108*55e87721SMatt Gilbride if force and dest.exists(): 109*55e87721SMatt Gilbride shutil.rmtree(dest) 110*55e87721SMatt Gilbride 111*55e87721SMatt Gilbride default_branch = None 112*55e87721SMatt Gilbride if not dest.exists(): 113*55e87721SMatt Gilbride cmd = ["git", "clone", "--recurse-submodules", "--single-branch", url, dest] 114*55e87721SMatt Gilbride shell.run(cmd, check=True) 115*55e87721SMatt Gilbride else: 116*55e87721SMatt Gilbride default_branch = _local_default_branch(dest) 117*55e87721SMatt Gilbride shell.run(["git", "checkout", default_branch], cwd=str(dest), check=True) 118*55e87721SMatt Gilbride shell.run(["git", "pull"], cwd=str(dest), check=True) 119*55e87721SMatt Gilbride committish = committish or default_branch 120*55e87721SMatt Gilbride 121*55e87721SMatt Gilbride if committish: 122*55e87721SMatt Gilbride shell.run(["git", "reset", "--hard", committish], cwd=str(dest)) 123*55e87721SMatt Gilbride 124*55e87721SMatt Gilbride # track all git repositories 125*55e87721SMatt Gilbride _tracked_paths.add(dest) 126*55e87721SMatt Gilbride 127*55e87721SMatt Gilbride # add repo to metadata 128*55e87721SMatt Gilbride sha, message = get_latest_commit(dest) 129*55e87721SMatt Gilbride commit_metadata = extract_commit_message_metadata(message) 130*55e87721SMatt Gilbride 131*55e87721SMatt Gilbride metadata.add_git_source( 132*55e87721SMatt Gilbride name=dest.name, 133*55e87721SMatt Gilbride remote=url, 134*55e87721SMatt Gilbride sha=sha, 135*55e87721SMatt Gilbride internal_ref=commit_metadata.get("PiperOrigin-RevId"), 136*55e87721SMatt Gilbride local_path=str(dest), 137*55e87721SMatt Gilbride ) 138*55e87721SMatt Gilbride 139*55e87721SMatt Gilbride return dest 140*55e87721SMatt Gilbride 141*55e87721SMatt Gilbride 142*55e87721SMatt Gilbridedef parse_repo_url(url: str) -> Dict[str, str]: 143*55e87721SMatt Gilbride """ 144*55e87721SMatt Gilbride Parses a GitHub url and returns a dict with: 145*55e87721SMatt Gilbride owner - Owner of the repository 146*55e87721SMatt Gilbride name - Name of the repository 147*55e87721SMatt Gilbride 148*55e87721SMatt Gilbride The following are matchable: 149*55e87721SMatt Gilbride googleapis/nodejs-vision(.git)? 150*55e87721SMatt Gilbride [email protected]:GoogleCloudPlatform/google-cloud-python.git 151*55e87721SMatt Gilbride https://github.com/GoogleCloudPlatform/google-cloud-python.git 152*55e87721SMatt Gilbride """ 153*55e87721SMatt Gilbride match = re.search(REPO_REGEX, url) 154*55e87721SMatt Gilbride 155*55e87721SMatt Gilbride if not match: 156*55e87721SMatt Gilbride raise RuntimeError("repository url is not a properly formatted git string.") 157*55e87721SMatt Gilbride 158*55e87721SMatt Gilbride owner = match.group("owner") 159*55e87721SMatt Gilbride name = match.group("name") 160*55e87721SMatt Gilbride 161*55e87721SMatt Gilbride if name.endswith(".git"): 162*55e87721SMatt Gilbride name = name[:-4] 163*55e87721SMatt Gilbride 164*55e87721SMatt Gilbride return {"owner": owner, "name": name} 165*55e87721SMatt Gilbride 166*55e87721SMatt Gilbride 167*55e87721SMatt Gilbridedef get_latest_commit(repo: pathlib.Path = None) -> Tuple[str, str]: 168*55e87721SMatt Gilbride """Return the sha and commit message of the latest commit.""" 169*55e87721SMatt Gilbride output = subprocess.check_output( 170*55e87721SMatt Gilbride ["git", "log", "-1", "--pretty=%H%n%B"], cwd=repo 171*55e87721SMatt Gilbride ).decode("utf-8") 172*55e87721SMatt Gilbride commit, message = output.split("\n", 1) 173*55e87721SMatt Gilbride return commit, message 174*55e87721SMatt Gilbride 175*55e87721SMatt Gilbride 176*55e87721SMatt Gilbridedef extract_commit_message_metadata(message: str) -> Dict[str, str]: 177*55e87721SMatt Gilbride """Extract extended metadata stored in the Git commit message. 178*55e87721SMatt Gilbride 179*55e87721SMatt Gilbride For example, a commit that looks like this:: 180*55e87721SMatt Gilbride 181*55e87721SMatt Gilbride Do the thing! 182*55e87721SMatt Gilbride 183*55e87721SMatt Gilbride Piper-Changelog: 1234567 184*55e87721SMatt Gilbride 185*55e87721SMatt Gilbride Will return:: 186*55e87721SMatt Gilbride 187*55e87721SMatt Gilbride {"Piper-Changelog": "1234567"} 188*55e87721SMatt Gilbride 189*55e87721SMatt Gilbride """ 190*55e87721SMatt Gilbride metadata = {} 191*55e87721SMatt Gilbride for line in message.splitlines(): 192*55e87721SMatt Gilbride if ":" not in line: 193*55e87721SMatt Gilbride continue 194*55e87721SMatt Gilbride 195*55e87721SMatt Gilbride key, value = line.split(":", 1) 196*55e87721SMatt Gilbride metadata[key] = value.strip() 197*55e87721SMatt Gilbride 198*55e87721SMatt Gilbride return metadata 199*55e87721SMatt Gilbride 200*55e87721SMatt Gilbride 201*55e87721SMatt Gilbridedef get_preclone(url: str) -> Optional[str]: 202*55e87721SMatt Gilbride """Finds a pre-cloned git repo in the preclone map.""" 203*55e87721SMatt Gilbride preconfig = synthtool.preconfig.load() 204*55e87721SMatt Gilbride return preconfig.precloned_repos.get(url) 205