diff options
Diffstat (limited to 'llvm_tools/git_llvm_rev.py')
-rwxr-xr-x | llvm_tools/git_llvm_rev.py | 248 |
1 files changed, 161 insertions, 87 deletions
diff --git a/llvm_tools/git_llvm_rev.py b/llvm_tools/git_llvm_rev.py index 1db94461..51fb6fec 100755 --- a/llvm_tools/git_llvm_rev.py +++ b/llvm_tools/git_llvm_rev.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # Copyright 2019 The ChromiumOS Authors # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -10,12 +9,12 @@ Revision numbers are all of the form '(branch_name, r1234)'. As a shorthand, r1234 is parsed as '(main, 1234)'. """ - import argparse +from pathlib import Path import re import subprocess import sys -import typing as t +from typing import IO, Iterable, List, NamedTuple, Optional, Tuple, Union MAIN_BRANCH = "main" @@ -35,13 +34,27 @@ MAIN_BRANCH = "main" base_llvm_revision = 375505 base_llvm_sha = "186155b89c2d2a2f62337081e3ca15f676c9434b" +# Known pairs of [revision, SHA] in ascending order. +# The first element is the first non-`llvm-svn` commit that exists. Later ones +# are functional nops, but speed this script up immensely, since `git` can take +# quite a while to walk >100K commits. +known_llvm_rev_sha_pairs = ( + (base_llvm_revision, base_llvm_sha), + (425000, "af870e11aed7a5c475ae41a72e3015c4c88597d1"), + (450000, "906ebd5830e6053b50c52bf098e3586b567e8499"), + (475000, "530d14a99611a71f8f3eb811920fd7b5c4d4e1f8"), + (500000, "173855f9b0bdfe45d71272596b510650bfc1ca33"), +) + # Represents an LLVM git checkout: # - |dir| is the directory of the LLVM checkout # - |remote| is the name of the LLVM remote. Generally it's "origin". -LLVMConfig = t.NamedTuple("LLVMConfig", (("remote", str), ("dir", str))) +LLVMConfig = NamedTuple( + "LLVMConfig", (("remote", str), ("dir", Union[Path, str])) +) -class Rev(t.NamedTuple("Rev", (("branch", str), ("number", int)))): +class Rev(NamedTuple("Rev", (("branch", str), ("number", int)))): """Represents a LLVM 'revision', a shorthand identifies a LLVM commit.""" @staticmethod @@ -83,7 +96,7 @@ def is_git_sha(xs: str) -> bool: ) -def check_output(command: t.List[str], cwd: str) -> str: +def check_output(command: List[str], cwd: Union[Path, str]) -> str: """Shorthand for subprocess.check_output. Auto-decodes any stdout.""" result = subprocess.run( command, @@ -130,26 +143,26 @@ def translate_sha_to_rev(llvm_config: LLVMConfig, sha_or_ref: str) -> Rev: ) sha = sha.strip() - merge_base = check_output( - ["git", "merge-base", base_llvm_sha, sha, "--"], - cwd=llvm_config.dir, - ) - merge_base = merge_base.strip() - - if merge_base == base_llvm_sha: - result = check_output( - [ - "git", - "rev-list", - "--count", - "--first-parent", - f"{base_llvm_sha}..{sha}", - "--", - ], + for base_rev, base_sha in reversed(known_llvm_rev_sha_pairs): + merge_base = check_output( + ["git", "merge-base", base_sha, sha, "--"], cwd=llvm_config.dir, ) - count = int(result.strip()) - return Rev(branch=MAIN_BRANCH, number=count + base_llvm_revision) + merge_base = merge_base.strip() + if merge_base == base_sha: + result = check_output( + [ + "git", + "rev-list", + "--count", + "--first-parent", + f"{base_sha}..{sha}", + "--", + ], + cwd=llvm_config.dir, + ) + count = int(result.strip()) + return Rev(branch=MAIN_BRANCH, number=count + base_rev) # Otherwise, either: # - |merge_base| is |sha| (we have a guaranteed llvm-svn number on |sha|) @@ -193,22 +206,23 @@ def translate_sha_to_rev(llvm_config: LLVMConfig, sha_or_ref: str) -> Rev: ) # It seems that some `origin/release/.*` branches have - # `origin/upstream/release/.*` equivalents, which is... awkward to deal with. - # Prefer the latter, since that seems to have newer commits than the former. - # Technically n^2, but len(elements) should be like, tens in the worst case. + # `origin/upstream/release/.*` equivalents, which is... awkward to deal + # with. Prefer the latter, since that seems to have newer commits than the + # former. Technically n^2, but len(elements) should be like, tens in the + # worst case. candidates = [x for x in candidates if f"upstream/{x}" not in candidates] if len(candidates) != 1: raise ValueError( - f"Ambiguity: multiple branches from {llvm_config.remote} have {sha}: " - f"{sorted(candidates)}" + f"Ambiguity: multiple branches from {llvm_config.remote} have " + f"{sha}: {sorted(candidates)}" ) return Rev(branch=candidates[0], number=revision_number) def parse_git_commit_messages( - stream: t.Iterable[str], separator: str -) -> t.Iterable[t.Tuple[str, str]]: + stream: Union[Iterable[str], IO[str]], separator: str +) -> Iterable[Tuple[str, str]]: """Parses a stream of git log messages. These are expected to be in the format: @@ -272,6 +286,7 @@ def translate_prebase_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str: stdout=subprocess.PIPE, encoding="utf-8", ) as subp: + assert subp.stdout is not None for sha, message in parse_git_commit_messages(subp.stdout, separator): last_line = message.splitlines()[-1] if last_line.strip() == looking_for: @@ -283,78 +298,137 @@ def translate_prebase_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str: raise ValueError(f"No commit with revision {rev} found") -def translate_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str: - """Translates a Rev to a SHA. - - Raises a ValueError if the given Rev doesn't exist in the given config. +def translate_rev_to_sha_from_baseline( + llvm_config: LLVMConfig, + parent_sha: str, + parent_rev: int, + child_sha: str, + child_rev: Optional[int], + want_rev: int, + branch_name: str, +) -> str: + """Translates a revision number between a parent & child to a SHA. + + Args: + llvm_config: LLVM config to use. + parent_sha: SHA of the parent that the revision number is a child of. + parent_rev: Revision number of `parent_sha`. + child_sha: A child of `parent_sha` to find a rev on. + child_rev: Optional note of what the child's revision number is. + want_rev: The desired revision number between child and parent. + branch_name: Name of the branch to refer to if a ValueError is raised. + + Raises: + ValueError if the given child isn't far enough away from the parent to + find `want_rev`. """ - branch, number = rev - - if branch == MAIN_BRANCH: - if number < base_llvm_revision: - return translate_prebase_rev_to_sha(llvm_config, rev) - base_sha = base_llvm_sha - base_revision_number = base_llvm_revision + # As a convenience, have a fast path for want_rev < parent_rev. In + # particular, branches can hit this case. + if want_rev < parent_rev: + baseline_git_sha = parent_sha + commits_behind_baseline = parent_rev - want_rev else: - base_sha = check_output( - [ - "git", - "merge-base", - base_llvm_sha, - f"{llvm_config.remote}/{branch}", - ], - cwd=llvm_config.dir, - ) - base_sha = base_sha.strip() - if base_sha == base_llvm_sha: - base_revision_number = base_llvm_revision - else: - base_revision_number = translate_prebase_sha_to_rev_number( - llvm_config, base_sha + if child_rev is None: + commits_between_parent_and_child = check_output( + [ + "git", + "rev-list", + "--count", + "--first-parent", + f"{parent_sha}..{child_sha}", + "--", + ], + cwd=llvm_config.dir, ) + child_rev = parent_rev + int( + commits_between_parent_and_child.strip() + ) + if child_rev < want_rev: + raise ValueError( + "Revision {want_rev} is past " + f"{llvm_config.remote}/{branch_name}. Try updating your tree?" + ) + baseline_git_sha = child_sha + commits_behind_baseline = child_rev - want_rev - # Alternatively, we could |git log --format=%H|, but git is *super* fast - # about rev walking/counting locally compared to long |log|s, so we walk back - # twice. - head = check_output( + if not commits_behind_baseline: + return baseline_git_sha + + result = check_output( [ "git", "rev-parse", "--revs-only", - f"{llvm_config.remote}/{branch}", - "--", + f"{baseline_git_sha}~{commits_behind_baseline}", ], cwd=llvm_config.dir, ) - branch_head_sha = head.strip() + return result.strip() - commit_number = number - base_revision_number - revs_between_str = check_output( - [ - "git", - "rev-list", - "--count", - "--first-parent", - f"{base_sha}..{branch_head_sha}", - "--", - ], + +def translate_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str: + """Translates a Rev to a SHA. + + Raises a ValueError if the given Rev doesn't exist in the given config. + """ + branch, number = rev + + branch_tip = check_output( + ["git", "rev-parse", "--revs-only", f"{llvm_config.remote}/{branch}"], cwd=llvm_config.dir, - ) - revs_between = int(revs_between_str.strip()) + ).strip() - commits_behind_head = revs_between - commit_number - if commits_behind_head < 0: - raise ValueError( - f"Revision {rev} is past {llvm_config.remote}/{branch}. Try updating " - "your tree?" + if branch != MAIN_BRANCH: + main_merge_point = check_output( + [ + "git", + "merge-base", + f"{llvm_config.remote}/{MAIN_BRANCH}", + branch_tip, + ], + cwd=llvm_config.dir, + ) + main_merge_point = main_merge_point.strip() + main_rev = translate_sha_to_rev(llvm_config, main_merge_point) + return translate_rev_to_sha_from_baseline( + llvm_config, + parent_sha=main_merge_point, + parent_rev=main_rev.number, + child_sha=branch_tip, + child_rev=None, + want_rev=number, + branch_name=branch, ) - result = check_output( - ["git", "rev-parse", f"{branch_head_sha}~{commits_behind_head}"], - cwd=llvm_config.dir, - ) + if number < base_llvm_revision: + return translate_prebase_rev_to_sha(llvm_config, rev) + + # Technically this could be a binary search, but the list has fewer than 10 + # elems, and won't grow fast. Linear is easier. + last_cached_rev = None + last_cached_sha = branch_tip + for cached_rev, cached_sha in reversed(known_llvm_rev_sha_pairs): + if cached_rev == number: + return cached_sha + + if cached_rev < number: + return translate_rev_to_sha_from_baseline( + llvm_config, + parent_sha=cached_sha, + parent_rev=cached_rev, + child_sha=last_cached_sha, + child_rev=last_cached_rev, + want_rev=number, + branch_name=branch, + ) - return result.strip() + last_cached_rev = cached_rev + last_cached_sha = cached_sha + + # This is only hit if `number >= base_llvm_revision` _and_ there's no + # coverage for `number` in `known_llvm_rev_sha_pairs`, which contains + # `base_llvm_revision`. + assert False, "Couldn't find a base SHA for a rev on main?" def find_root_llvm_dir(root_dir: str = ".") -> str: @@ -369,7 +443,7 @@ def find_root_llvm_dir(root_dir: str = ".") -> str: return result.strip() -def main(argv: t.List[str]) -> None: +def main(argv: List[str]) -> None: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--llvm_dir", |