aboutsummaryrefslogtreecommitdiff
path: root/llvm_tools/git_llvm_rev.py
diff options
context:
space:
mode:
Diffstat (limited to 'llvm_tools/git_llvm_rev.py')
-rwxr-xr-xllvm_tools/git_llvm_rev.py248
1 files changed, 161 insertions, 87 deletions
diff --git a/llvm_tools/git_llvm_rev.py b/llvm_tools/git_llvm_rev.py
index 1db94461..51fb6fec 100755
--- a/llvm_tools/git_llvm_rev.py
+++ b/llvm_tools/git_llvm_rev.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
# Copyright 2019 The ChromiumOS Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
@@ -10,12 +9,12 @@ Revision numbers are all of the form '(branch_name, r1234)'. As a shorthand,
r1234 is parsed as '(main, 1234)'.
"""
-
import argparse
+from pathlib import Path
import re
import subprocess
import sys
-import typing as t
+from typing import IO, Iterable, List, NamedTuple, Optional, Tuple, Union
MAIN_BRANCH = "main"
@@ -35,13 +34,27 @@ MAIN_BRANCH = "main"
base_llvm_revision = 375505
base_llvm_sha = "186155b89c2d2a2f62337081e3ca15f676c9434b"
+# Known pairs of [revision, SHA] in ascending order.
+# The first element is the first non-`llvm-svn` commit that exists. Later ones
+# are functional nops, but speed this script up immensely, since `git` can take
+# quite a while to walk >100K commits.
+known_llvm_rev_sha_pairs = (
+ (base_llvm_revision, base_llvm_sha),
+ (425000, "af870e11aed7a5c475ae41a72e3015c4c88597d1"),
+ (450000, "906ebd5830e6053b50c52bf098e3586b567e8499"),
+ (475000, "530d14a99611a71f8f3eb811920fd7b5c4d4e1f8"),
+ (500000, "173855f9b0bdfe45d71272596b510650bfc1ca33"),
+)
+
# Represents an LLVM git checkout:
# - |dir| is the directory of the LLVM checkout
# - |remote| is the name of the LLVM remote. Generally it's "origin".
-LLVMConfig = t.NamedTuple("LLVMConfig", (("remote", str), ("dir", str)))
+LLVMConfig = NamedTuple(
+ "LLVMConfig", (("remote", str), ("dir", Union[Path, str]))
+)
-class Rev(t.NamedTuple("Rev", (("branch", str), ("number", int)))):
+class Rev(NamedTuple("Rev", (("branch", str), ("number", int)))):
"""Represents a LLVM 'revision', a shorthand identifies a LLVM commit."""
@staticmethod
@@ -83,7 +96,7 @@ def is_git_sha(xs: str) -> bool:
)
-def check_output(command: t.List[str], cwd: str) -> str:
+def check_output(command: List[str], cwd: Union[Path, str]) -> str:
"""Shorthand for subprocess.check_output. Auto-decodes any stdout."""
result = subprocess.run(
command,
@@ -130,26 +143,26 @@ def translate_sha_to_rev(llvm_config: LLVMConfig, sha_or_ref: str) -> Rev:
)
sha = sha.strip()
- merge_base = check_output(
- ["git", "merge-base", base_llvm_sha, sha, "--"],
- cwd=llvm_config.dir,
- )
- merge_base = merge_base.strip()
-
- if merge_base == base_llvm_sha:
- result = check_output(
- [
- "git",
- "rev-list",
- "--count",
- "--first-parent",
- f"{base_llvm_sha}..{sha}",
- "--",
- ],
+ for base_rev, base_sha in reversed(known_llvm_rev_sha_pairs):
+ merge_base = check_output(
+ ["git", "merge-base", base_sha, sha, "--"],
cwd=llvm_config.dir,
)
- count = int(result.strip())
- return Rev(branch=MAIN_BRANCH, number=count + base_llvm_revision)
+ merge_base = merge_base.strip()
+ if merge_base == base_sha:
+ result = check_output(
+ [
+ "git",
+ "rev-list",
+ "--count",
+ "--first-parent",
+ f"{base_sha}..{sha}",
+ "--",
+ ],
+ cwd=llvm_config.dir,
+ )
+ count = int(result.strip())
+ return Rev(branch=MAIN_BRANCH, number=count + base_rev)
# Otherwise, either:
# - |merge_base| is |sha| (we have a guaranteed llvm-svn number on |sha|)
@@ -193,22 +206,23 @@ def translate_sha_to_rev(llvm_config: LLVMConfig, sha_or_ref: str) -> Rev:
)
# It seems that some `origin/release/.*` branches have
- # `origin/upstream/release/.*` equivalents, which is... awkward to deal with.
- # Prefer the latter, since that seems to have newer commits than the former.
- # Technically n^2, but len(elements) should be like, tens in the worst case.
+ # `origin/upstream/release/.*` equivalents, which is... awkward to deal
+ # with. Prefer the latter, since that seems to have newer commits than the
+ # former. Technically n^2, but len(elements) should be like, tens in the
+ # worst case.
candidates = [x for x in candidates if f"upstream/{x}" not in candidates]
if len(candidates) != 1:
raise ValueError(
- f"Ambiguity: multiple branches from {llvm_config.remote} have {sha}: "
- f"{sorted(candidates)}"
+ f"Ambiguity: multiple branches from {llvm_config.remote} have "
+ f"{sha}: {sorted(candidates)}"
)
return Rev(branch=candidates[0], number=revision_number)
def parse_git_commit_messages(
- stream: t.Iterable[str], separator: str
-) -> t.Iterable[t.Tuple[str, str]]:
+ stream: Union[Iterable[str], IO[str]], separator: str
+) -> Iterable[Tuple[str, str]]:
"""Parses a stream of git log messages.
These are expected to be in the format:
@@ -272,6 +286,7 @@ def translate_prebase_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str:
stdout=subprocess.PIPE,
encoding="utf-8",
) as subp:
+ assert subp.stdout is not None
for sha, message in parse_git_commit_messages(subp.stdout, separator):
last_line = message.splitlines()[-1]
if last_line.strip() == looking_for:
@@ -283,78 +298,137 @@ def translate_prebase_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str:
raise ValueError(f"No commit with revision {rev} found")
-def translate_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str:
- """Translates a Rev to a SHA.
-
- Raises a ValueError if the given Rev doesn't exist in the given config.
+def translate_rev_to_sha_from_baseline(
+ llvm_config: LLVMConfig,
+ parent_sha: str,
+ parent_rev: int,
+ child_sha: str,
+ child_rev: Optional[int],
+ want_rev: int,
+ branch_name: str,
+) -> str:
+ """Translates a revision number between a parent & child to a SHA.
+
+ Args:
+ llvm_config: LLVM config to use.
+ parent_sha: SHA of the parent that the revision number is a child of.
+ parent_rev: Revision number of `parent_sha`.
+ child_sha: A child of `parent_sha` to find a rev on.
+ child_rev: Optional note of what the child's revision number is.
+ want_rev: The desired revision number between child and parent.
+ branch_name: Name of the branch to refer to if a ValueError is raised.
+
+ Raises:
+ ValueError if the given child isn't far enough away from the parent to
+ find `want_rev`.
"""
- branch, number = rev
-
- if branch == MAIN_BRANCH:
- if number < base_llvm_revision:
- return translate_prebase_rev_to_sha(llvm_config, rev)
- base_sha = base_llvm_sha
- base_revision_number = base_llvm_revision
+ # As a convenience, have a fast path for want_rev < parent_rev. In
+ # particular, branches can hit this case.
+ if want_rev < parent_rev:
+ baseline_git_sha = parent_sha
+ commits_behind_baseline = parent_rev - want_rev
else:
- base_sha = check_output(
- [
- "git",
- "merge-base",
- base_llvm_sha,
- f"{llvm_config.remote}/{branch}",
- ],
- cwd=llvm_config.dir,
- )
- base_sha = base_sha.strip()
- if base_sha == base_llvm_sha:
- base_revision_number = base_llvm_revision
- else:
- base_revision_number = translate_prebase_sha_to_rev_number(
- llvm_config, base_sha
+ if child_rev is None:
+ commits_between_parent_and_child = check_output(
+ [
+ "git",
+ "rev-list",
+ "--count",
+ "--first-parent",
+ f"{parent_sha}..{child_sha}",
+ "--",
+ ],
+ cwd=llvm_config.dir,
)
+ child_rev = parent_rev + int(
+ commits_between_parent_and_child.strip()
+ )
+ if child_rev < want_rev:
+ raise ValueError(
+ "Revision {want_rev} is past "
+ f"{llvm_config.remote}/{branch_name}. Try updating your tree?"
+ )
+ baseline_git_sha = child_sha
+ commits_behind_baseline = child_rev - want_rev
- # Alternatively, we could |git log --format=%H|, but git is *super* fast
- # about rev walking/counting locally compared to long |log|s, so we walk back
- # twice.
- head = check_output(
+ if not commits_behind_baseline:
+ return baseline_git_sha
+
+ result = check_output(
[
"git",
"rev-parse",
"--revs-only",
- f"{llvm_config.remote}/{branch}",
- "--",
+ f"{baseline_git_sha}~{commits_behind_baseline}",
],
cwd=llvm_config.dir,
)
- branch_head_sha = head.strip()
+ return result.strip()
- commit_number = number - base_revision_number
- revs_between_str = check_output(
- [
- "git",
- "rev-list",
- "--count",
- "--first-parent",
- f"{base_sha}..{branch_head_sha}",
- "--",
- ],
+
+def translate_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str:
+ """Translates a Rev to a SHA.
+
+ Raises a ValueError if the given Rev doesn't exist in the given config.
+ """
+ branch, number = rev
+
+ branch_tip = check_output(
+ ["git", "rev-parse", "--revs-only", f"{llvm_config.remote}/{branch}"],
cwd=llvm_config.dir,
- )
- revs_between = int(revs_between_str.strip())
+ ).strip()
- commits_behind_head = revs_between - commit_number
- if commits_behind_head < 0:
- raise ValueError(
- f"Revision {rev} is past {llvm_config.remote}/{branch}. Try updating "
- "your tree?"
+ if branch != MAIN_BRANCH:
+ main_merge_point = check_output(
+ [
+ "git",
+ "merge-base",
+ f"{llvm_config.remote}/{MAIN_BRANCH}",
+ branch_tip,
+ ],
+ cwd=llvm_config.dir,
+ )
+ main_merge_point = main_merge_point.strip()
+ main_rev = translate_sha_to_rev(llvm_config, main_merge_point)
+ return translate_rev_to_sha_from_baseline(
+ llvm_config,
+ parent_sha=main_merge_point,
+ parent_rev=main_rev.number,
+ child_sha=branch_tip,
+ child_rev=None,
+ want_rev=number,
+ branch_name=branch,
)
- result = check_output(
- ["git", "rev-parse", f"{branch_head_sha}~{commits_behind_head}"],
- cwd=llvm_config.dir,
- )
+ if number < base_llvm_revision:
+ return translate_prebase_rev_to_sha(llvm_config, rev)
+
+ # Technically this could be a binary search, but the list has fewer than 10
+ # elems, and won't grow fast. Linear is easier.
+ last_cached_rev = None
+ last_cached_sha = branch_tip
+ for cached_rev, cached_sha in reversed(known_llvm_rev_sha_pairs):
+ if cached_rev == number:
+ return cached_sha
+
+ if cached_rev < number:
+ return translate_rev_to_sha_from_baseline(
+ llvm_config,
+ parent_sha=cached_sha,
+ parent_rev=cached_rev,
+ child_sha=last_cached_sha,
+ child_rev=last_cached_rev,
+ want_rev=number,
+ branch_name=branch,
+ )
- return result.strip()
+ last_cached_rev = cached_rev
+ last_cached_sha = cached_sha
+
+ # This is only hit if `number >= base_llvm_revision` _and_ there's no
+ # coverage for `number` in `known_llvm_rev_sha_pairs`, which contains
+ # `base_llvm_revision`.
+ assert False, "Couldn't find a base SHA for a rev on main?"
def find_root_llvm_dir(root_dir: str = ".") -> str:
@@ -369,7 +443,7 @@ def find_root_llvm_dir(root_dir: str = ".") -> str:
return result.strip()
-def main(argv: t.List[str]) -> None:
+def main(argv: List[str]) -> None:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--llvm_dir",