1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3# ===----------------------------------------------------------------------===## 4# 5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6# See https://llvm.org/LICENSE.txt for license information. 7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8# 9# ===----------------------------------------------------------------------===## 10# 11# !!!!!!!!!!!! NOTE !!!!!!!!!!!! 12# This is copied directly from upstream LLVM. Please make any changes upstream, 13# rather than to this file directly. Once changes are made there, you're free 14# to integrate them here. 15 16"""Checks for reverts of commits across a given git commit. 17 18To clarify the meaning of 'across' with an example, if we had the following 19commit history (where `a -> b` notes that `b` is a direct child of `a`): 20 21123abc -> 223abc -> 323abc -> 423abc -> 523abc 22 23And where 423abc is a revert of 223abc, this revert is considered to be 'across' 24323abc. More generally, a revert A of a parent commit B is considered to be 25'across' a commit C if C is a parent of A and B is a parent of C. 26 27Please note that revert detection in general is really difficult, since merge 28conflicts/etc always introduce _some_ amount of fuzziness. This script just 29uses a bundle of heuristics, and is bound to ignore / incorrectly flag some 30reverts. The hope is that it'll easily catch the vast majority (>90%) of them, 31though. 32 33This is designed to be used in one of two ways: an import in Python, or run 34directly from a shell. If you want to import this, the `find_reverts` 35function is the thing to look at. If you'd rather use this from a shell, have a 36usage example: 37 38``` 39./revert_checker.py c47f97169 origin/main origin/release/12.x 40``` 41 42This checks for all reverts from the tip of origin/main to c47f97169, which are 43across the latter. It then does the same for origin/release/12.x to c47f97169. 44Duplicate reverts discovered when walking both roots (origin/main and 45origin/release/12.x) are deduplicated in output. 46""" 47 48import argparse 49import collections 50import logging 51import re 52import subprocess 53import sys 54from typing import Generator, Iterable, List, NamedTuple 55 56 57assert sys.version_info >= (3, 6), "Only Python 3.6+ is supported." 58 59# People are creative with their reverts, and heuristics are a bit difficult. 60# Like 90% of of reverts have "This reverts commit ${full_sha}". 61# Some lack that entirely, while others have many of them specified in ad-hoc 62# ways, while others use short SHAs and whatever. 63# 64# The 90% case is trivial to handle (and 100% free + automatic). The extra 10% 65# starts involving human intervention, which is probably not worth it for now. 66 67 68def _try_parse_reverts_from_commit_message(commit_message: str) -> List[str]: 69 if not commit_message: 70 return [] 71 72 results = re.findall( 73 r"This reverts commit ([a-f0-9]{40})\b", commit_message 74 ) 75 76 first_line = commit_message.splitlines()[0] 77 initial_revert = re.match(r'Revert ([a-f0-9]{6,}) "', first_line) 78 if initial_revert: 79 results.append(initial_revert.group(1)) 80 return results 81 82 83def _stream_stdout(command: List[str]) -> Generator[str, None, None]: 84 with subprocess.Popen( 85 command, stdout=subprocess.PIPE, encoding="utf-8", errors="replace" 86 ) as p: 87 assert p.stdout is not None # for mypy's happiness. 88 yield from p.stdout 89 90 91def _resolve_sha(git_dir: str, sha: str) -> str: 92 if len(sha) == 40: 93 return sha 94 95 return subprocess.check_output( 96 ["git", "-C", git_dir, "rev-parse", sha], 97 encoding="utf-8", 98 stderr=subprocess.DEVNULL, 99 ).strip() 100 101 102_LogEntry = NamedTuple( 103 "_LogEntry", 104 [ 105 ("sha", str), 106 ("commit_message", str), 107 ], 108) 109 110 111def _log_stream( 112 git_dir: str, root_sha: str, end_at_sha: str 113) -> Iterable[_LogEntry]: 114 sep = 50 * "<>" 115 log_command = [ 116 "git", 117 "-C", 118 git_dir, 119 "log", 120 "^" + end_at_sha, 121 root_sha, 122 "--format=" + sep + "%n%H%n%B%n", 123 ] 124 125 stdout_stream = iter(_stream_stdout(log_command)) 126 127 # Find the next separator line. If there's nothing to log, it may not exist. 128 # It might not be the first line if git feels complainy. 129 found_commit_header = False 130 for line in stdout_stream: 131 if line.rstrip() == sep: 132 found_commit_header = True 133 break 134 135 while found_commit_header: 136 sha = next(stdout_stream, None) 137 assert sha is not None, "git died?" 138 sha = sha.rstrip() 139 140 commit_message = [] 141 142 found_commit_header = False 143 for line in stdout_stream: 144 line = line.rstrip() 145 if line.rstrip() == sep: 146 found_commit_header = True 147 break 148 commit_message.append(line) 149 150 yield _LogEntry(sha, "\n".join(commit_message).rstrip()) 151 152 153def _shas_between(git_dir: str, base_ref: str, head_ref: str) -> Iterable[str]: 154 rev_list = [ 155 "git", 156 "-C", 157 git_dir, 158 "rev-list", 159 "--first-parent", 160 f"{base_ref}..{head_ref}", 161 ] 162 return (x.strip() for x in _stream_stdout(rev_list)) 163 164 165def _rev_parse(git_dir: str, ref: str) -> str: 166 return subprocess.check_output( 167 ["git", "-C", git_dir, "rev-parse", ref], 168 encoding="utf-8", 169 ).strip() 170 171 172Revert = NamedTuple( 173 "Revert", 174 [ 175 ("sha", str), 176 ("reverted_sha", str), 177 ], 178) 179 180 181def _find_common_parent_commit(git_dir: str, ref_a: str, ref_b: str) -> str: 182 """Finds the closest common parent commit between `ref_a` and `ref_b`.""" 183 return subprocess.check_output( 184 ["git", "-C", git_dir, "merge-base", ref_a, ref_b], 185 encoding="utf-8", 186 ).strip() 187 188 189def find_reverts(git_dir: str, across_ref: str, root: str) -> List[Revert]: 190 """Finds reverts across `across_ref` in `git_dir`, starting from `root`. 191 192 These reverts are returned in order of oldest reverts first. 193 """ 194 across_sha = _rev_parse(git_dir, across_ref) 195 root_sha = _rev_parse(git_dir, root) 196 197 common_ancestor = _find_common_parent_commit(git_dir, across_sha, root_sha) 198 if common_ancestor != across_sha: 199 raise ValueError( 200 f"{across_sha} isn't an ancestor of {root_sha} " 201 "(common ancestor: {common_ancestor})" 202 ) 203 204 intermediate_commits = set(_shas_between(git_dir, across_sha, root_sha)) 205 assert across_sha not in intermediate_commits 206 207 logging.debug( 208 "%d commits appear between %s and %s", 209 len(intermediate_commits), 210 across_sha, 211 root_sha, 212 ) 213 214 all_reverts = [] 215 for sha, commit_message in _log_stream(git_dir, root_sha, across_sha): 216 reverts = _try_parse_reverts_from_commit_message(commit_message) 217 if not reverts: 218 continue 219 220 resolved_reverts = sorted( 221 set(_resolve_sha(git_dir, x) for x in reverts) 222 ) 223 for reverted_sha in resolved_reverts: 224 if reverted_sha in intermediate_commits: 225 logging.debug( 226 "Commit %s reverts %s, which happened after %s", 227 sha, 228 reverted_sha, 229 across_sha, 230 ) 231 continue 232 233 try: 234 object_type = subprocess.check_output( 235 ["git", "-C", git_dir, "cat-file", "-t", reverted_sha], 236 encoding="utf-8", 237 stderr=subprocess.DEVNULL, 238 ).strip() 239 except subprocess.CalledProcessError: 240 logging.warning( 241 "Failed to resolve reverted object %s (claimed to be reverted " 242 "by sha %s)", 243 reverted_sha, 244 sha, 245 ) 246 continue 247 248 if object_type == "commit": 249 all_reverts.append(Revert(sha, reverted_sha)) 250 continue 251 252 logging.error( 253 "%s claims to revert %s -- which isn't a commit -- %s", 254 sha, 255 object_type, 256 reverted_sha, 257 ) 258 259 # Since `all_reverts` contains reverts in log order (e.g., newer comes before 260 # older), we need to reverse this to keep with our guarantee of older = 261 # earlier in the result. 262 all_reverts.reverse() 263 return all_reverts 264 265 266def _main() -> None: 267 parser = argparse.ArgumentParser( 268 description=__doc__, 269 formatter_class=argparse.RawDescriptionHelpFormatter, 270 ) 271 parser.add_argument( 272 "base_ref", help="Git ref or sha to check for reverts around." 273 ) 274 parser.add_argument( 275 "-C", "--git_dir", default=".", help="Git directory to use." 276 ) 277 parser.add_argument( 278 "root", nargs="+", help="Root(s) to search for commits from." 279 ) 280 parser.add_argument("--debug", action="store_true") 281 parser.add_argument( 282 "-u", 283 "--review_url", 284 action="store_true", 285 help="Format SHAs as llvm review URLs", 286 ) 287 opts = parser.parse_args() 288 289 logging.basicConfig( 290 format="%(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: %(message)s", 291 level=logging.DEBUG if opts.debug else logging.INFO, 292 ) 293 294 # `root`s can have related history, so we want to filter duplicate commits 295 # out. The overwhelmingly common case is also to have one root, and it's way 296 # easier to reason about output that comes in an order that's meaningful to 297 # git. 298 seen_reverts = set() 299 all_reverts = [] 300 for root in opts.root: 301 for revert in find_reverts(opts.git_dir, opts.base_ref, root): 302 if revert not in seen_reverts: 303 seen_reverts.add(revert) 304 all_reverts.append(revert) 305 306 for revert in all_reverts: 307 sha_fmt = ( 308 f"https://reviews.llvm.org/rG{revert.sha}" 309 if opts.review_url 310 else revert.sha 311 ) 312 reverted_sha_fmt = ( 313 f"https://reviews.llvm.org/rG{revert.reverted_sha}" 314 if opts.review_url 315 else revert.reverted_sha 316 ) 317 print(f"{sha_fmt} claims to revert {reverted_sha_fmt}") 318 319 320if __name__ == "__main__": 321 _main() 322