aboutsummaryrefslogtreecommitdiff
path: root/llvm_tools/werror_logs.py
diff options
context:
space:
mode:
Diffstat (limited to 'llvm_tools/werror_logs.py')
-rwxr-xr-xllvm_tools/werror_logs.py577
1 files changed, 577 insertions, 0 deletions
diff --git a/llvm_tools/werror_logs.py b/llvm_tools/werror_logs.py
new file mode 100755
index 00000000..f8a526f1
--- /dev/null
+++ b/llvm_tools/werror_logs.py
@@ -0,0 +1,577 @@
+#!/usr/bin/env python3
+# Copyright 2024 The ChromiumOS Authors
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Helps reason about -Werror logs emitted by the compiler wrapper.
+
+Specifically, this works with the -Werror reports produced by the compiler
+wrapper in FORCE_DISABLE_WERROR mode. It's intended to be run on trees of these
+reports, so devs can run roughly the following commands:
+
+$ apply_force_disable_werror # (There's no actual script to do this today.)
+$ build_packages --board=foo --nousepkg
+$ ./werror_logs.py aggregate --directory=/build/foo/var/lib/chromeos
+
+And see a full aggregation of all warnings that were suppressed in that
+`build_packages` invocation.
+
+It can also be used to fetch warnings reports from CQ runs, for instance,
+$ ./werror_logs.py fetch-cq --cq-orchestrator-id=123456
+
+In this case, it downloads _all -Werror logs_ from children of the given
+cq-orchestrator, and prints the parent directory of all of these reports. If
+you run `aggregate` on this directory, it's highly recommended to use the
+`--canonicalize-board-roots` flag.
+"""
+
+import argparse
+import collections
+import dataclasses
+import json
+import logging
+import multiprocessing.pool
+import os
+from pathlib import Path
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+import threading
+from typing import Any, Counter, DefaultDict, Dict, IO, Iterable, List, Optional
+
+import cros_cls
+
+
+_DEFAULT_FETCH_DIRECTORY = Path("/tmp/werror_logs")
+
+
+def canonicalize_file_path_board_root(file_path: str) -> str:
+ # Get rid of double slashes, unnecessary directory traversal
+ # (foo/../bar/..), etc. Easier to read this way.
+ file_path = os.path.normpath(file_path)
+ if file_path.startswith("/build/"):
+ i = file_path.find("/", len("/build/"))
+ if i != -1:
+ return f"/build/{{board}}/{file_path[i+1:]}"
+ return file_path
+
+
+@dataclasses.dataclass(frozen=True, eq=True, order=True)
+class ClangWarningLocation:
+ """Represents a location at which a Clang warning was emitted."""
+
+ file: str
+ line: int
+ column: int
+
+ @classmethod
+ def parse(
+ cls, location: str, canonicalize_board_root: bool = False
+ ) -> "ClangWarningLocation":
+ split = location.rsplit(":", 2)
+ if len(split) == 3:
+ file = split[0]
+ if canonicalize_board_root:
+ file = canonicalize_file_path_board_root(file)
+ return cls(file=file, line=int(split[1]), column=int(split[2]))
+ raise ValueError(f"Invalid location: {location!r}")
+
+
+@dataclasses.dataclass(frozen=True, eq=True)
+class ClangWarning:
+ """Represents a Clang warning at a specific location (if applicable)."""
+
+ # The name of the warning, e.g., -Wunused-variable
+ name: str
+ # The message of the warning, e.g., "'allocate' is deprecated."
+ message: str
+ # The location of this warning. Not present for frontend diagnostics.
+ location: Optional[ClangWarningLocation]
+
+ # This parses two kinds of errors:
+ # 1. `clang-17: error: foo [-W...]`
+ # 2. `/file/path:123:45: error: foo [-W...]"
+ _WARNING_RE = re.compile(
+ # Capture the location on its own, since `clang-\d+` is unused below.
+ r"^(?:([^:]*:\d+:\d+)|clang-\d+)"
+ r": error: "
+ # Capture the message
+ r"(.*?)\s+"
+ r"\[(-W[^\][]+)]\s*$"
+ )
+
+ @classmethod
+ def try_parse_line(
+ cls, line: str, canonicalize_board_root: bool = False
+ ) -> Optional["ClangWarning"]:
+ # Fast path: we can expect "error: " in interesting lines. Break early
+ # if that's not present.
+ if "error: " not in line:
+ return None
+
+ m = cls._WARNING_RE.fullmatch(line)
+ if not m:
+ return None
+
+ location, message, warning_flags = m.groups()
+ individual_warning_flags = [
+ x for x in warning_flags.split(",") if x != "-Werror"
+ ]
+
+ # This isn't impossible to handle in theory, just unexpected. Complain
+ # about it.
+ if len(individual_warning_flags) != 1:
+ raise ValueError(
+ f"Weird: parsed warnings {individual_warning_flags} out "
+ f"of {line}"
+ )
+
+ if location is None:
+ parsed_location = None
+ else:
+ parsed_location = ClangWarningLocation.parse(
+ location, canonicalize_board_root
+ )
+ return cls(
+ name=individual_warning_flags[0],
+ message=message,
+ location=parsed_location,
+ )
+
+
+@dataclasses.dataclass(frozen=True, eq=True)
+class WarningInfo:
+ """Carries information about a ClangWarning."""
+
+ packages: DefaultDict[str, int] = dataclasses.field(
+ default_factory=lambda: collections.defaultdict(int)
+ )
+
+
+class UnknownPackageNameError(ValueError):
+ """Raised when a package name can't be determined from a warning report."""
+
+
+@dataclasses.dataclass
+class AggregatedWarnings:
+ """Aggregates warning reports incrementally."""
+
+ num_reports: int = 0
+ # Mapping of warning -> list of packages that emitted it. Warnings in
+ # headers may be referred to by multiple packages.
+ warnings: DefaultDict[ClangWarning, WarningInfo] = dataclasses.field(
+ default_factory=lambda: collections.defaultdict(WarningInfo)
+ )
+
+ _CWD_PACKAGE_RE = re.compile(
+ r"^(?:/build/[^/]+)?/var/(?:cache|tmp)/portage/([^/]+/[^/]+)/"
+ )
+
+ @classmethod
+ def _guess_package_name(cls, report: Dict[str, Any]) -> str:
+ """Tries to guess what package `report` is from.
+
+ Raises:
+ UnknownPackageNameError if the package's name couldn't be
+ determined.
+ """
+ m = cls._CWD_PACKAGE_RE.match(report.get("cwd", ""))
+ if not m:
+ raise UnknownPackageNameError()
+ return m.group(1)
+
+ def add_report_json(
+ self, report_json: Dict[str, Any], canonicalize_board_root: bool = False
+ ) -> int:
+ """Adds the given report, returning the number of warnings parsed.
+
+ Raises:
+ UnknownPackageNameError if the package's name couldn't be
+ determined.
+ """
+ self.num_reports += 1
+ package_name = self._guess_package_name(report_json)
+
+ num_warnings = 0
+ for line in report_json.get("stdout", "").splitlines():
+ if parsed := ClangWarning.try_parse_line(
+ line, canonicalize_board_root
+ ):
+ self.warnings[parsed].packages[package_name] += 1
+ num_warnings += 1
+
+ return num_warnings
+
+ def add_report(
+ self, report_file: Path, canonicalize_board_root: bool = False
+ ) -> None:
+ with report_file.open(encoding="utf-8") as f:
+ report = json.load(f)
+
+ try:
+ n = self.add_report_json(report, canonicalize_board_root)
+ except UnknownPackageNameError:
+ logging.warning(
+ "Failed guessing package name for report at %r; ignoring file",
+ report_file,
+ )
+ return
+
+ if not n:
+ logging.warning(
+ "Report at %r had no parseable warnings", report_file
+ )
+
+
+def print_aligned_counts(
+ name_count_map: Dict[str, int], file: Optional[IO[str]] = None
+) -> None:
+ assert name_count_map
+ # Sort on value, highest first. Name breaks ties.
+ summary = sorted(name_count_map.items(), key=lambda x: (-x[1], x[0]))
+ num_col_width = len(f"{summary[0][1]:,}")
+ name_col_width = max(len(x) for x in name_count_map)
+ for name, count in summary:
+ fmt_name = name.rjust(name_col_width)
+ fmt_count = f"{count:,}".rjust(num_col_width)
+ print(f"\t{fmt_name}: {fmt_count}", file=file)
+
+
+def summarize_per_package_warnings(
+ warning_infos: Iterable[WarningInfo],
+ file: Optional[IO[str]] = None,
+) -> None:
+ warnings_per_package: DefaultDict[str, int] = collections.defaultdict(int)
+ for info in warning_infos:
+ for package_name, warning_count in info.packages.items():
+ warnings_per_package[package_name] += warning_count
+
+ if not warnings_per_package:
+ return
+
+ print("## Per-package warning counts:", file=file)
+ print_aligned_counts(warnings_per_package, file=file)
+
+
+def summarize_warnings_by_flag(
+ warnings: Dict[ClangWarning, WarningInfo],
+ file: Optional[IO[str]] = None,
+) -> None:
+ if not warnings:
+ return
+
+ warnings_per_flag: Counter[str] = collections.Counter()
+ for warning, info in warnings.items():
+ warnings_per_flag[warning.name] += sum(info.packages.values())
+
+ print("## Instances of each fatal warning:", file=file)
+ print_aligned_counts(warnings_per_flag, file=file)
+
+
+def aggregate_reports(opts: argparse.Namespace) -> None:
+ directory = opts.directory
+ aggregated = AggregatedWarnings()
+ for report in directory.glob("**/warnings_report*.json"):
+ logging.debug("Discovered report %s", report)
+ aggregated.add_report(report, opts.canonicalize_board_roots)
+
+ if not aggregated.num_reports:
+ raise ValueError(f"Found no warnings report under {directory}")
+
+ logging.info("Discovered %d report files in total", aggregated.num_reports)
+ summarize_per_package_warnings(aggregated.warnings.values())
+ summarize_warnings_by_flag(aggregated.warnings)
+
+
+def fetch_werror_tarball_links(
+ child_builders: Dict[str, cros_cls.BuildID]
+) -> List[str]:
+ outputs = cros_cls.CQBoardBuilderOutput.fetch_many(child_builders.values())
+ artifacts_links = []
+ for builder_name, out in zip(child_builders, outputs):
+ if out.artifacts_link:
+ artifacts_links.append(out.artifacts_link)
+ else:
+ logging.info("%s had no output artifacts; ignoring", builder_name)
+
+ gsutil_stdout = subprocess.run(
+ ["gsutil", "-m", "ls"] + artifacts_links,
+ check=True,
+ encoding="utf-8",
+ stdin=subprocess.DEVNULL,
+ stdout=subprocess.PIPE,
+ ).stdout
+
+ return [
+ x
+ for x in gsutil_stdout.splitlines()
+ if x.endswith(".fatal_clang_warnings.tar.xz")
+ ]
+
+
+def cq_builder_name_from_werror_logs_path(werror_logs: str) -> str:
+ """Returns the CQ builder given a -Werror logs path.
+
+ >>> cq_builder_name_from_werror_logs_path(
+ "gs://chromeos-image-archive/staryu-cq/"
+ "R123-15771.0.0-94466-8756713501925941617/"
+ "staryu.20240207.fatal_clang_warnings.tar.xz"
+ )
+ "staryu-cq"
+ """
+ return os.path.basename(os.path.dirname(os.path.dirname(werror_logs)))
+
+
+def download_and_unpack_werror_tarballs(
+ unpack_dir: Path, download_dir: Path, gs_urls: List[str]
+):
+ # This is necessary below when we're untarring files. It should trivially
+ # always be the case, and assuming it makes testing easier.
+ assert download_dir.is_absolute(), download_dir
+
+ unpack_dir.mkdir()
+ download_dir.mkdir()
+
+ logging.info(
+ "Fetching and unpacking %d -Werror reports; this may take a bit",
+ len(gs_urls),
+ )
+ # Run the download in a threadpool since we can have >100 logs, and all of
+ # this is heavily I/O-bound.
+ # Max 8 downloads at a time is arbitrary, but should minimize the chance of
+ # rate-limiting. Don't limit `tar xaf`, since those should be short-lived.
+ download_limiter = threading.BoundedSemaphore(8)
+
+ def download_one_url(
+ unpack_dir: Path, download_dir: Path, gs_url: str
+ ) -> Optional[subprocess.CalledProcessError]:
+ """Downloads and unpacks -Werror logs from the given gs_url.
+
+ Leaves the tarball in `download_dir`, and the unpacked version in
+ `unpack_dir`.
+
+ Returns:
+ None if all went well; otherwise, returns the command that failed.
+ All commands have stderr data piped in.
+ """
+ file_targ = download_dir / os.path.basename(gs_url)
+ try:
+ with download_limiter:
+ subprocess.run(
+ ["gsutil", "cp", gs_url, file_targ],
+ check=True,
+ stdin=subprocess.DEVNULL,
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.PIPE,
+ encoding="utf-8",
+ errors="replace",
+ )
+
+ # N.B., file_targ is absolute, so running with `file_targ` while
+ # changing `cwd` is safe.
+ subprocess.run(
+ ["tar", "xaf", file_targ],
+ check=True,
+ cwd=unpack_dir,
+ stdin=subprocess.DEVNULL,
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.PIPE,
+ encoding="utf-8",
+ errors="replace",
+ )
+ except subprocess.CalledProcessError as e:
+ return e
+ return None
+
+ with multiprocessing.pool.ThreadPool() as thread_pool:
+ download_futures = []
+ for gs_url in gs_urls:
+ name = cq_builder_name_from_werror_logs_path(gs_url)
+ unpack_to = unpack_dir / name
+ unpack_to.mkdir()
+ download_to = download_dir / name
+ download_to.mkdir()
+ download_futures.append(
+ (
+ name,
+ thread_pool.apply_async(
+ download_one_url, (unpack_to, download_to, gs_url)
+ ),
+ )
+ )
+
+ num_failures = 0
+ for name, future in download_futures:
+ result = future.get()
+ if not result:
+ continue
+
+ num_failures += 1
+ logging.error(
+ "Downloading %s failed: running %r. Stderr: %r",
+ name,
+ result.cmd,
+ result.stderr,
+ )
+ if num_failures:
+ raise ValueError(f"{num_failures} download(s) failed.")
+
+
+def fetch_cq_reports(opts: argparse.Namespace) -> None:
+ if opts.cl:
+ logging.info(
+ "Fetching most recent completed CQ orchestrator from %s", opts.cl
+ )
+ all_ids = cros_cls.fetch_cq_orchestrator_ids(opts.cl)
+ if not all_ids:
+ raise ValueError(
+ f"No CQ orchestrators found under {opts.cl}. See --help for "
+ "how to pass a build ID directly."
+ )
+ # Note that these cq-orchestrator runs are returned in oldest-to-newest
+ # order. The user probably wants the newest run.
+ cq_orchestrator_id = all_ids[-1]
+ cq_orchestrator_url = cros_cls.builder_url(cq_orchestrator_id)
+ logging.info("Checking CQ run %s", cq_orchestrator_url)
+ else:
+ cq_orchestrator_id = opts.cq_orchestrator_id
+ cq_orchestrator_url = cros_cls.builder_url(cq_orchestrator_id)
+
+ # This is the earliest point at which we can compute this directory with
+ # certainty. Figure it out now and fail early if it exists.
+ output_directory = opts.directory
+ if not output_directory:
+ output_directory = _DEFAULT_FETCH_DIRECTORY / str(cq_orchestrator_id)
+
+ if output_directory.exists():
+ if not opts.force:
+ sys.exit(
+ f"Directory at {output_directory} exists; not overwriting. "
+ "Pass --force to overwrite."
+ )
+ # Actually _remove_ it when we have all logs unpacked and are able to
+ # create the output directory with confidence.
+
+ logging.info("Fetching info on child builders of %s", cq_orchestrator_url)
+ child_builders = cros_cls.CQOrchestratorOutput.fetch(
+ cq_orchestrator_id
+ ).child_builders
+ if not child_builders:
+ raise ValueError(f"No child builders found for {cq_orchestrator_url}")
+
+ logging.info(
+ "%d child builders found; finding associated tarball links",
+ len(child_builders),
+ )
+ werror_links = fetch_werror_tarball_links(child_builders)
+ if not werror_links:
+ raise ValueError(
+ f"No -Werror logs found in children of {cq_orchestrator_url}"
+ )
+
+ logging.info("%d -Werror logs found", len(werror_links))
+ with tempfile.TemporaryDirectory("werror_logs_fetch_cq") as t:
+ tempdir = Path(t)
+ unpack_dir = tempdir / "unpacked"
+ download_and_unpack_werror_tarballs(
+ unpack_dir=unpack_dir,
+ download_dir=tempdir / "tarballs",
+ gs_urls=werror_links,
+ )
+
+ if output_directory.exists():
+ logging.info("Removing output directory at %s", output_directory)
+ shutil.rmtree(output_directory)
+ output_directory.parent.mkdir(parents=True, exist_ok=True)
+ # (Convert these to strs to keep mypy happy.)
+ shutil.move(str(unpack_dir), str(output_directory))
+ logging.info(
+ "CQ logs from %s stored in %s",
+ cq_orchestrator_url,
+ output_directory,
+ )
+
+
+def main(argv: List[str]) -> None:
+ parser = argparse.ArgumentParser(
+ description=__doc__,
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ )
+ parser.add_argument(
+ "--debug", action="store_true", help="Enable debug logging"
+ )
+ subparsers = parser.add_subparsers(required=True)
+ # b/318833638: While there's only one subparser here for the moment, more
+ # are expected to come (specifically, one to download logs from a CQ run).
+ aggregate = subparsers.add_parser(
+ "aggregate",
+ help="""
+ Aggregate all -Werror reports beneath a directory. Note that this will
+ traverse all children of the directory, so can be used either on
+ unpacked -Werror reports from CQ builders, or can be used on e.g.,
+ /build/cherry/var/lib/chromeos.
+ """,
+ )
+ aggregate.set_defaults(func=aggregate_reports)
+ aggregate.add_argument(
+ "--canonicalize-board-roots",
+ action="store_true",
+ help="""
+ Converts warnings paths starting with a board root (e.g., /build/atlas)
+ to a form consistent across many boards.
+ """,
+ )
+ aggregate.add_argument(
+ "--directory", type=Path, required=True, help="Directory to inspect."
+ )
+
+ fetch_cq = subparsers.add_parser(
+ "fetch-cq",
+ help="Fetch all -Werror reports for a CQ run.",
+ )
+ fetch_cq.set_defaults(func=fetch_cq_reports)
+ cl_or_cq_orchestrator = fetch_cq.add_mutually_exclusive_group(required=True)
+ cl_or_cq_orchestrator.add_argument(
+ "--cl",
+ type=cros_cls.ChangeListURL.parse_with_patch_set,
+ help="Link to a CL to get the most recent cq-orchestrator from",
+ )
+ cl_or_cq_orchestrator.add_argument(
+ "--cq-orchestrator-id",
+ type=cros_cls.BuildID,
+ help="""
+ Build number for a cq-orchestrator run. Builders invoked by this are
+ examined for -Werror logs.
+ """,
+ )
+ fetch_cq.add_argument(
+ "--directory",
+ type=Path,
+ help=f"""
+ Directory to put downloaded -Werror logs in. Default is a subdirectory
+ of {_DEFAULT_FETCH_DIRECTORY}.
+ """,
+ )
+ fetch_cq.add_argument(
+ "-f",
+ "--force",
+ action="store_true",
+ help="Remove the directory at `--directory` if it exists",
+ )
+
+ opts = parser.parse_args(argv)
+
+ logging.basicConfig(
+ format=">> %(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: "
+ "%(message)s",
+ level=logging.DEBUG if opts.debug else logging.INFO,
+ )
+
+ assert getattr(opts, "func", None), "Unknown subcommand?"
+ opts.func(opts)
+
+
+if __name__ == "__main__":
+ main(sys.argv[1:])