xref: /aosp_15_r20/external/toolchain-utils/llvm_tools/get_llvm_hash.py (revision 760c253c1ed00ce9abd48f8546f08516e57485fe)
1#!/usr/bin/env python3
2# Copyright 2019 The ChromiumOS Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Returns the latest LLVM version's hash."""
7
8import argparse
9import contextlib
10import functools
11import os
12from pathlib import Path
13import re
14import shutil
15import subprocess
16import sys
17import tempfile
18from typing import Iterator, Optional, Tuple, Union
19
20import chroot
21import git_llvm_rev
22import llvm_next
23import manifest_utils
24import subprocess_helpers
25
26
27_LLVM_GIT_URL = (
28    "https://chromium.googlesource.com/external/github.com/llvm/llvm-project"
29)
30
31KNOWN_HASH_SOURCES = (
32    "google3",
33    "google3-unstable",
34    "llvm",
35    "llvm-next",
36    "tot",
37)
38
39
40def GetVersionFrom(src_dir: Union[Path, str], git_hash: str) -> int:
41    """Obtain an SVN-style version number based on the LLVM git hash passed in.
42
43    Args:
44        src_dir: LLVM's source directory.
45        git_hash: The git hash.
46
47    Returns:
48        An SVN-style version number associated with the git hash.
49    """
50
51    version = git_llvm_rev.translate_sha_to_rev(
52        git_llvm_rev.LLVMConfig(remote="origin", dir=src_dir), git_hash
53    )
54    # Note: branches aren't supported
55    assert version.branch == git_llvm_rev.MAIN_BRANCH, version.branch
56    return version.number
57
58
59def GetGitHashFrom(src_dir: Union[Path, str], version: int) -> str:
60    """Finds the commit hash(es) of the LLVM version in the git log history.
61
62    Args:
63        src_dir: The LLVM source tree.
64        version: The version number.
65
66    Returns:
67        A git hash string corresponding to the version number.
68
69    Raises:
70        subprocess.CalledProcessError: Failed to find a git hash.
71    """
72
73    return git_llvm_rev.translate_rev_to_sha(
74        git_llvm_rev.LLVMConfig(remote="origin", dir=src_dir),
75        git_llvm_rev.Rev(branch=git_llvm_rev.MAIN_BRANCH, number=version),
76    )
77
78
79def CheckoutBranch(src_dir: Union[Path, str], branch: str) -> None:
80    """Checks out and pulls from a branch in a git repo.
81
82    Args:
83        src_dir: The LLVM source tree.
84        branch: The git branch to checkout in src_dir.
85
86    Raises:
87        ValueError: Failed to checkout or pull branch version
88    """
89    subprocess_helpers.CheckCommand(["git", "-C", src_dir, "checkout", branch])
90    subprocess_helpers.CheckCommand(["git", "-C", src_dir, "pull"])
91
92
93def ParseLLVMMajorVersion(cmakelist: str) -> Optional[str]:
94    """Reads CMakeList.txt file contents for LLVMMajor Version.
95
96    Args:
97        cmakelist: contents of CMakeList.txt
98
99    Returns:
100        The major version number as a string, or None if it couldn't be found.
101    """
102    match = re.search(
103        r"\n\s+set\(LLVM_VERSION_MAJOR (?P<major>\d+)\)", cmakelist
104    )
105    if not match:
106        return None
107    return match.group("major")
108
109
110@functools.lru_cache(maxsize=1)
111def GetLLVMMajorVersion(git_hash: Optional[str] = None) -> str:
112    """Reads llvm/CMakeList.txt file contents for LLVMMajor Version.
113
114    Args:
115        git_hash: git hash of llvm version as string or None for top of trunk
116
117    Returns:
118        The major version number as a string
119
120    Raises:
121        ValueError: The major version cannot be parsed from cmakelist or
122          there was a failure to checkout git_hash version
123        FileExistsError: The src directory doe not contain CMakeList.txt
124    """
125    src_dir = GetAndUpdateLLVMProjectInLLVMTools()
126
127    # b/325895866#comment36: the LLVM version number was moved from
128    # `llvm/CMakeLists.txt` to `cmake/Modules/LLVMVersion.cmake` in upstream
129    # commit 81e20472a0c5a4a8edc5ec38dc345d580681af81 (r530225). Until we no
130    # longer care about looking before that, we need to support searching both
131    # files.
132    cmakelists_paths = (
133        Path(src_dir) / "llvm" / "CMakeLists.txt",
134        Path(src_dir) / "cmake" / "Modules" / "LLVMVersion.cmake",
135    )
136
137    with contextlib.ExitStack() as on_exit:
138        if git_hash:
139            subprocess_helpers.CheckCommand(
140                ["git", "-C", src_dir, "checkout", git_hash]
141            )
142            on_exit.callback(CheckoutBranch, src_dir, git_llvm_rev.MAIN_BRANCH)
143
144        for path in cmakelists_paths:
145            try:
146                file_contents = path.read_text(encoding="utf-8")
147            except FileNotFoundError:
148                # If this file DNE (yet), ignore it.
149                continue
150
151            if version := ParseLLVMMajorVersion(file_contents):
152                return version
153
154    raise ValueError(
155        f"Major version could not be parsed from any of {cmakelists_paths}"
156    )
157
158
159@contextlib.contextmanager
160def CreateTempLLVMRepo(temp_dir: str) -> Iterator[str]:
161    """Adds a LLVM worktree to 'temp_dir'.
162
163    Creating a worktree because the LLVM source tree in
164    '../toolchain-utils/llvm_tools/llvm-project-copy' should not be modified.
165
166    This is useful for applying patches to a source tree but do not want to
167    modify the actual LLVM source tree in 'llvm-project-copy'.
168
169    Args:
170        temp_dir: An absolute path to the temporary directory to put the
171        worktree in (obtained via 'tempfile.mkdtemp()').
172
173    Yields:
174        The absolute path to 'temp_dir'.
175
176    Raises:
177        subprocess.CalledProcessError: Failed to remove the worktree.
178        ValueError: Failed to add a worktree.
179    """
180
181    abs_path_to_llvm_project_dir = GetAndUpdateLLVMProjectInLLVMTools()
182    subprocess_helpers.CheckCommand(
183        [
184            "git",
185            "-C",
186            abs_path_to_llvm_project_dir,
187            "worktree",
188            "add",
189            "--detach",
190            temp_dir,
191            "origin/%s" % git_llvm_rev.MAIN_BRANCH,
192        ]
193    )
194
195    try:
196        yield temp_dir
197    finally:
198        if os.path.isdir(temp_dir):
199            subprocess_helpers.check_output(
200                [
201                    "git",
202                    "-C",
203                    abs_path_to_llvm_project_dir,
204                    "worktree",
205                    "remove",
206                    "-f",
207                    temp_dir,
208                ]
209            )
210
211
212def GetAndUpdateLLVMProjectInLLVMTools() -> str:
213    """Gets the absolute path to 'llvm-project-copy' directory in 'llvm_tools'.
214
215    The intent of this function is to avoid cloning the LLVM repo and then
216    discarding the contents of the repo. The function will create a directory
217    in '../toolchain-utils/llvm_tools' called 'llvm-project-copy' if this
218    directory does not exist yet. If it does not exist, then it will use the
219    LLVMHash() class to clone the LLVM repo into 'llvm-project-copy'.
220    Otherwise, it will clean the contents of that directory and then fetch from
221    the chromium LLVM mirror. In either case, this function will return the
222    absolute path to 'llvm-project-copy' directory.
223
224    Returns:
225        Absolute path to 'llvm-project-copy' directory in 'llvm_tools'
226
227    Raises:
228        ValueError: LLVM repo (in 'llvm-project-copy' dir.) has changes or
229        failed to checkout to main or failed to fetch from chromium mirror of
230        LLVM.
231    """
232
233    abs_path_to_llvm_tools_dir = os.path.dirname(os.path.abspath(__file__))
234
235    abs_path_to_llvm_project_dir = os.path.join(
236        abs_path_to_llvm_tools_dir, "llvm-project-copy"
237    )
238
239    if not os.path.isdir(abs_path_to_llvm_project_dir):
240        print(
241            f"Checking out LLVM to {abs_path_to_llvm_project_dir}\n"
242            "so that we can map between commit hashes and revision numbers.\n"
243            "This may take a while, but only has to be done once.",
244            file=sys.stderr,
245        )
246        os.mkdir(abs_path_to_llvm_project_dir)
247
248        LLVMHash().CloneLLVMRepo(abs_path_to_llvm_project_dir)
249    else:
250        # `git status` has a '-s'/'--short' option that shortens the output.
251        # With the '-s' option, if no changes were made to the LLVM repo, then
252        # the output (assigned to 'repo_status') would be empty.
253        repo_status = subprocess_helpers.check_output(
254            ["git", "-C", abs_path_to_llvm_project_dir, "status", "-s"]
255        )
256
257        if repo_status.rstrip():
258            raise ValueError(
259                "LLVM repo in %s has changes, please remove."
260                % abs_path_to_llvm_project_dir
261            )
262
263        CheckoutBranch(abs_path_to_llvm_project_dir, git_llvm_rev.MAIN_BRANCH)
264
265    return abs_path_to_llvm_project_dir
266
267
268def GetGoogle3LLVMVersion(stable: bool) -> int:
269    """Gets the latest google3 LLVM version.
270
271    Args:
272        stable: boolean, use the stable version or the unstable version
273
274    Returns:
275        The latest LLVM SVN version as an integer.
276
277    Raises:
278        subprocess.CalledProcessError: An invalid path has been provided to the
279        `cat` command.
280    """
281
282    subdir = "stable" if stable else "llvm_unstable"
283
284    # Cmd to get latest google3 LLVM version.
285    cmd = [
286        "cat",
287        os.path.join(
288            "/google/src/head/depot/google3/third_party/crosstool/v18",
289            subdir,
290            "installs/llvm/git_origin_rev_id",
291        ),
292    ]
293
294    # Get latest version.
295    git_hash = subprocess_helpers.check_output(cmd)
296
297    # Change type to an integer
298    return GetVersionFrom(
299        GetAndUpdateLLVMProjectInLLVMTools(), git_hash.rstrip()
300    )
301
302
303def IsSvnOption(svn_option: str) -> Union[int, str]:
304    """Validates whether the argument (string) is a git hash option.
305
306    The argument is used to find the git hash of LLVM.
307
308    Args:
309        svn_option: The option passed in as a command line argument.
310
311    Returns:
312        lowercase svn_option if it is a known hash source, otherwise the
313        svn_option as an int
314
315    Raises:
316        ValueError: Invalid svn option provided.
317    """
318
319    if svn_option.lower() in KNOWN_HASH_SOURCES:
320        return svn_option.lower()
321
322    try:
323        svn_version = int(svn_option)
324
325        return svn_version
326
327    # Unable to convert argument to an int, so the option is invalid.
328    #
329    # Ex: 'one'.
330    except ValueError:
331        pass
332
333    raise ValueError("Invalid LLVM git hash option provided: %s" % svn_option)
334
335
336def GetLLVMHashAndVersionFromSVNOption(
337    svn_option: Union[int, str]
338) -> Tuple[str, int]:
339    """Gets the LLVM hash and LLVM version based off of the svn option.
340
341    Args:
342        svn_option: A valid svn option obtained from the command line.
343          Ex. 'google3', 'tot', or <svn_version> such as 365123.
344
345    Returns:
346        A tuple that is the LLVM git hash and LLVM version.
347    """
348
349    new_llvm_hash = LLVMHash()
350
351    # Determine which LLVM git hash to retrieve.
352    if svn_option == "tot":
353        git_hash = new_llvm_hash.GetTopOfTrunkGitHash()
354        version = GetVersionFrom(GetAndUpdateLLVMProjectInLLVMTools(), git_hash)
355    elif isinstance(svn_option, int):
356        version = svn_option
357        git_hash = GetGitHashFrom(GetAndUpdateLLVMProjectInLLVMTools(), version)
358    else:
359        assert svn_option in ("google3", "google3-unstable")
360        version = GetGoogle3LLVMVersion(stable=svn_option == "google3")
361
362        git_hash = GetGitHashFrom(GetAndUpdateLLVMProjectInLLVMTools(), version)
363
364    return git_hash, version
365
366
367def GetCrOSCurrentLLVMHash(chromeos_tree: Path) -> str:
368    """Retrieves the current ChromeOS LLVM hash.
369
370    Args:
371        chromeos_tree: A ChromeOS source tree. This is allowed to be
372        arbitrary subdirectory of an actual ChromeOS tree, for convenience.
373
374    Raises:
375        ManifestValueError if the toolchain manifest doesn't match the
376        expected structure.
377    """
378    chromeos_root = chroot.FindChromeOSRootAbove(chromeos_tree)
379    return manifest_utils.extract_current_llvm_hash(chromeos_root)
380
381
382class LLVMHash:
383    """Provides methods to retrieve a LLVM hash."""
384
385    @staticmethod
386    @contextlib.contextmanager
387    def CreateTempDirectory() -> Iterator:
388        temp_dir = tempfile.mkdtemp()
389
390        try:
391            yield temp_dir
392        finally:
393            if os.path.isdir(temp_dir):
394                shutil.rmtree(temp_dir, ignore_errors=True)
395
396    def CloneLLVMRepo(self, temp_dir: str) -> None:
397        """Clones the LLVM repo.
398
399        Args:
400            temp_dir: The temporary directory to clone the repo to.
401
402        Raises:
403            ValueError: Failed to clone the LLVM repo.
404        """
405        clone_cmd = ["git", "clone", _LLVM_GIT_URL, temp_dir]
406        clone_cmd_obj = subprocess.run(
407            clone_cmd, check=False, stderr=subprocess.PIPE
408        )
409        if clone_cmd_obj.returncode:
410            raise ValueError(
411                "Failed to clone the LLVM repo; stderr: "
412                f"{repr(clone_cmd_obj.stderr)}"
413            )
414
415    def GetLLVMHash(self, version: int) -> str:
416        """Retrieves the LLVM hash corresponding to the LLVM version passed in.
417
418        Args:
419            version: The LLVM version to use as a delimiter.
420
421        Returns:
422            The hash as a string that corresponds to the LLVM version.
423        """
424        hash_value = GetGitHashFrom(
425            GetAndUpdateLLVMProjectInLLVMTools(), version
426        )
427        return hash_value
428
429    def GetCrOSCurrentLLVMHash(self, chromeos_tree: Path) -> str:
430        """Retrieves the current ChromeOS LLVM hash."""
431        return GetCrOSCurrentLLVMHash(chromeos_tree)
432
433    def GetCrOSLLVMNextHash(self) -> str:
434        """Retrieves the current ChromeOS llvm-next hash."""
435        return llvm_next.LLVM_NEXT_HASH
436
437    def GetGoogle3LLVMHash(self) -> str:
438        """Retrieves the google3 LLVM hash."""
439        return self.GetLLVMHash(GetGoogle3LLVMVersion(stable=True))
440
441    def GetGoogle3UnstableLLVMHash(self) -> str:
442        """Retrieves the LLVM hash of google3's unstable compiler."""
443        return self.GetLLVMHash(GetGoogle3LLVMVersion(stable=False))
444
445    def GetTopOfTrunkGitHash(self) -> str:
446        """Gets the latest git hash from top of trunk of LLVM."""
447
448        path_to_main_branch = "refs/heads/main"
449        llvm_tot_git_hash = subprocess_helpers.check_output(
450            ["git", "ls-remote", _LLVM_GIT_URL, path_to_main_branch]
451        )
452        return llvm_tot_git_hash.rstrip().split()[0]
453
454
455def main() -> None:
456    """Prints the git hash of LLVM.
457
458    Parses the command line for the optional command line
459    arguments.
460    """
461    my_dir = Path(__file__).parent.resolve()
462
463    # Create parser and add optional command-line arguments.
464    parser = argparse.ArgumentParser(description="Finds the LLVM hash.")
465    parser.add_argument(
466        "--llvm_version",
467        type=IsSvnOption,
468        required=True,
469        help="which git hash of LLVM to find. Either a svn revision, or one "
470        "of %s" % sorted(KNOWN_HASH_SOURCES),
471    )
472    parser.add_argument(
473        "--chromeos_tree",
474        type=Path,
475        required=True,
476        help="""
477        Path to a ChromeOS tree. If not passed, one will be inferred. If none
478        can be inferred, this script will fail.
479        """,
480    )
481
482    # Parse command-line arguments.
483    args_output = parser.parse_args()
484
485    cur_llvm_version = args_output.llvm_version
486    chromeos_tree = args_output.chromeos_tree
487    if not chromeos_tree:
488        # Try to infer this unconditionally, so mishandling of this script can
489        # be more easily detected (which allows more flexibility in the
490        # implementation in the future for things outside of what directly
491        # needs this value).
492        chromeos_tree = chroot.FindChromeOSRootAbove(my_dir)
493
494    new_llvm_hash = LLVMHash()
495    if isinstance(cur_llvm_version, int):
496        # Find the git hash of the specific LLVM version.
497        print(new_llvm_hash.GetLLVMHash(cur_llvm_version))
498    elif cur_llvm_version == "llvm":
499        print(new_llvm_hash.GetCrOSCurrentLLVMHash(chromeos_tree))
500    elif cur_llvm_version == "llvm-next":
501        print(new_llvm_hash.GetCrOSLLVMNextHash())
502    elif cur_llvm_version == "google3":
503        print(new_llvm_hash.GetGoogle3LLVMHash())
504    elif cur_llvm_version == "google3-unstable":
505        print(new_llvm_hash.GetGoogle3UnstableLLVMHash())
506    else:
507        assert cur_llvm_version == "tot"
508        print(new_llvm_hash.GetTopOfTrunkGitHash())
509
510
511if __name__ == "__main__":
512    main()
513