xref: /aosp_15_r20/external/toolchain-utils/llvm_tools/werror_logs.py (revision 760c253c1ed00ce9abd48f8546f08516e57485fe)
1*760c253cSXin Li#!/usr/bin/env python3
2*760c253cSXin Li# Copyright 2024 The ChromiumOS Authors
3*760c253cSXin Li# Use of this source code is governed by a BSD-style license that can be
4*760c253cSXin Li# found in the LICENSE file.
5*760c253cSXin Li
6*760c253cSXin Li"""Helps reason about -Werror logs emitted by the compiler wrapper.
7*760c253cSXin Li
8*760c253cSXin LiSpecifically, this works with the -Werror reports produced by the compiler
9*760c253cSXin Liwrapper in FORCE_DISABLE_WERROR mode. It's intended to be run on trees of these
10*760c253cSXin Lireports, so devs can run roughly the following commands:
11*760c253cSXin Li
12*760c253cSXin Li$ apply_force_disable_werror  # (There's no actual script to do this today.)
13*760c253cSXin Li$ build_packages --board=foo --nousepkg
14*760c253cSXin Li$ ./werror_logs.py aggregate --directory=/build/foo/var/lib/chromeos
15*760c253cSXin Li
16*760c253cSXin LiAnd see a full aggregation of all warnings that were suppressed in that
17*760c253cSXin Li`build_packages` invocation.
18*760c253cSXin Li
19*760c253cSXin LiIt can also be used to fetch warnings reports from CQ runs, for instance,
20*760c253cSXin Li$ ./werror_logs.py fetch-cq --cq-orchestrator-id=123456
21*760c253cSXin Li
22*760c253cSXin LiIn this case, it downloads _all -Werror logs_ from children of the given
23*760c253cSXin Licq-orchestrator, and prints the parent directory of all of these reports. If
24*760c253cSXin Liyou run `aggregate` on this directory, it's highly recommended to use the
25*760c253cSXin Li`--canonicalize-board-roots` flag.
26*760c253cSXin Li"""
27*760c253cSXin Li
28*760c253cSXin Liimport argparse
29*760c253cSXin Liimport collections
30*760c253cSXin Liimport dataclasses
31*760c253cSXin Liimport json
32*760c253cSXin Liimport logging
33*760c253cSXin Liimport multiprocessing.pool
34*760c253cSXin Liimport os
35*760c253cSXin Lifrom pathlib import Path
36*760c253cSXin Liimport re
37*760c253cSXin Liimport shutil
38*760c253cSXin Liimport subprocess
39*760c253cSXin Liimport sys
40*760c253cSXin Liimport tempfile
41*760c253cSXin Liimport threading
42*760c253cSXin Lifrom typing import Any, Counter, DefaultDict, Dict, IO, Iterable, List, Optional
43*760c253cSXin Li
44*760c253cSXin Liimport cros_cls
45*760c253cSXin Li
46*760c253cSXin Li
47*760c253cSXin Li_DEFAULT_FETCH_DIRECTORY = Path("/tmp/werror_logs")
48*760c253cSXin Li
49*760c253cSXin Li
50*760c253cSXin Lidef canonicalize_file_path_board_root(file_path: str) -> str:
51*760c253cSXin Li    # Get rid of double slashes, unnecessary directory traversal
52*760c253cSXin Li    # (foo/../bar/..), etc. Easier to read this way.
53*760c253cSXin Li    file_path = os.path.normpath(file_path)
54*760c253cSXin Li    if file_path.startswith("/build/"):
55*760c253cSXin Li        i = file_path.find("/", len("/build/"))
56*760c253cSXin Li        if i != -1:
57*760c253cSXin Li            return f"/build/{{board}}/{file_path[i+1:]}"
58*760c253cSXin Li    return file_path
59*760c253cSXin Li
60*760c253cSXin Li
61*760c253cSXin Li@dataclasses.dataclass(frozen=True, eq=True, order=True)
62*760c253cSXin Liclass ClangWarningLocation:
63*760c253cSXin Li    """Represents a location at which a Clang warning was emitted."""
64*760c253cSXin Li
65*760c253cSXin Li    file: str
66*760c253cSXin Li    line: int
67*760c253cSXin Li    column: int
68*760c253cSXin Li
69*760c253cSXin Li    @classmethod
70*760c253cSXin Li    def parse(
71*760c253cSXin Li        cls, location: str, canonicalize_board_root: bool = False
72*760c253cSXin Li    ) -> "ClangWarningLocation":
73*760c253cSXin Li        split = location.rsplit(":", 2)
74*760c253cSXin Li        if len(split) == 3:
75*760c253cSXin Li            file = split[0]
76*760c253cSXin Li            if canonicalize_board_root:
77*760c253cSXin Li                file = canonicalize_file_path_board_root(file)
78*760c253cSXin Li            return cls(file=file, line=int(split[1]), column=int(split[2]))
79*760c253cSXin Li        raise ValueError(f"Invalid location: {location!r}")
80*760c253cSXin Li
81*760c253cSXin Li
82*760c253cSXin Li@dataclasses.dataclass(frozen=True, eq=True)
83*760c253cSXin Liclass ClangWarning:
84*760c253cSXin Li    """Represents a Clang warning at a specific location (if applicable)."""
85*760c253cSXin Li
86*760c253cSXin Li    # The name of the warning, e.g., -Wunused-variable
87*760c253cSXin Li    name: str
88*760c253cSXin Li    # The message of the warning, e.g., "'allocate' is deprecated."
89*760c253cSXin Li    message: str
90*760c253cSXin Li    # The location of this warning. Not present for frontend diagnostics.
91*760c253cSXin Li    location: Optional[ClangWarningLocation]
92*760c253cSXin Li
93*760c253cSXin Li    # This parses two kinds of errors:
94*760c253cSXin Li    # 1. `clang-17: error: foo [-W...]`
95*760c253cSXin Li    # 2. `/file/path:123:45: error: foo [-W...]"
96*760c253cSXin Li    _WARNING_RE = re.compile(
97*760c253cSXin Li        # Capture the location on its own, since `clang-\d+` is unused below.
98*760c253cSXin Li        r"^(?:([^:]*:\d+:\d+)|clang-\d+)"
99*760c253cSXin Li        r": error: "
100*760c253cSXin Li        # Capture the message
101*760c253cSXin Li        r"(.*?)\s+"
102*760c253cSXin Li        r"\[(-W[^\][]+)]\s*$"
103*760c253cSXin Li    )
104*760c253cSXin Li
105*760c253cSXin Li    @classmethod
106*760c253cSXin Li    def try_parse_line(
107*760c253cSXin Li        cls, line: str, canonicalize_board_root: bool = False
108*760c253cSXin Li    ) -> Optional["ClangWarning"]:
109*760c253cSXin Li        # Fast path: we can expect "error: " in interesting lines. Break early
110*760c253cSXin Li        # if that's not present.
111*760c253cSXin Li        if "error: " not in line:
112*760c253cSXin Li            return None
113*760c253cSXin Li
114*760c253cSXin Li        m = cls._WARNING_RE.fullmatch(line)
115*760c253cSXin Li        if not m:
116*760c253cSXin Li            return None
117*760c253cSXin Li
118*760c253cSXin Li        location, message, warning_flags = m.groups()
119*760c253cSXin Li        individual_warning_flags = [
120*760c253cSXin Li            x for x in warning_flags.split(",") if x != "-Werror"
121*760c253cSXin Li        ]
122*760c253cSXin Li
123*760c253cSXin Li        # This isn't impossible to handle in theory, just unexpected. Complain
124*760c253cSXin Li        # about it.
125*760c253cSXin Li        if len(individual_warning_flags) != 1:
126*760c253cSXin Li            raise ValueError(
127*760c253cSXin Li                f"Weird: parsed warnings {individual_warning_flags} out "
128*760c253cSXin Li                f"of {line}"
129*760c253cSXin Li            )
130*760c253cSXin Li
131*760c253cSXin Li        if location is None:
132*760c253cSXin Li            parsed_location = None
133*760c253cSXin Li        else:
134*760c253cSXin Li            parsed_location = ClangWarningLocation.parse(
135*760c253cSXin Li                location, canonicalize_board_root
136*760c253cSXin Li            )
137*760c253cSXin Li        return cls(
138*760c253cSXin Li            name=individual_warning_flags[0],
139*760c253cSXin Li            message=message,
140*760c253cSXin Li            location=parsed_location,
141*760c253cSXin Li        )
142*760c253cSXin Li
143*760c253cSXin Li
144*760c253cSXin Li@dataclasses.dataclass(frozen=True, eq=True)
145*760c253cSXin Liclass WarningInfo:
146*760c253cSXin Li    """Carries information about a ClangWarning."""
147*760c253cSXin Li
148*760c253cSXin Li    packages: DefaultDict[str, int] = dataclasses.field(
149*760c253cSXin Li        default_factory=lambda: collections.defaultdict(int)
150*760c253cSXin Li    )
151*760c253cSXin Li
152*760c253cSXin Li
153*760c253cSXin Liclass UnknownPackageNameError(ValueError):
154*760c253cSXin Li    """Raised when a package name can't be determined from a warning report."""
155*760c253cSXin Li
156*760c253cSXin Li
157*760c253cSXin Li@dataclasses.dataclass
158*760c253cSXin Liclass AggregatedWarnings:
159*760c253cSXin Li    """Aggregates warning reports incrementally."""
160*760c253cSXin Li
161*760c253cSXin Li    num_reports: int = 0
162*760c253cSXin Li    # Mapping of warning -> list of packages that emitted it. Warnings in
163*760c253cSXin Li    # headers may be referred to by multiple packages.
164*760c253cSXin Li    warnings: DefaultDict[ClangWarning, WarningInfo] = dataclasses.field(
165*760c253cSXin Li        default_factory=lambda: collections.defaultdict(WarningInfo)
166*760c253cSXin Li    )
167*760c253cSXin Li
168*760c253cSXin Li    _CWD_PACKAGE_RE = re.compile(
169*760c253cSXin Li        r"^(?:/build/[^/]+)?/var/(?:cache|tmp)/portage/([^/]+/[^/]+)/"
170*760c253cSXin Li    )
171*760c253cSXin Li
172*760c253cSXin Li    @classmethod
173*760c253cSXin Li    def _guess_package_name(cls, report: Dict[str, Any]) -> str:
174*760c253cSXin Li        """Tries to guess what package `report` is from.
175*760c253cSXin Li
176*760c253cSXin Li        Raises:
177*760c253cSXin Li            UnknownPackageNameError if the package's name couldn't be
178*760c253cSXin Li            determined.
179*760c253cSXin Li        """
180*760c253cSXin Li        m = cls._CWD_PACKAGE_RE.match(report.get("cwd", ""))
181*760c253cSXin Li        if not m:
182*760c253cSXin Li            raise UnknownPackageNameError()
183*760c253cSXin Li        return m.group(1)
184*760c253cSXin Li
185*760c253cSXin Li    def add_report_json(
186*760c253cSXin Li        self, report_json: Dict[str, Any], canonicalize_board_root: bool = False
187*760c253cSXin Li    ) -> int:
188*760c253cSXin Li        """Adds the given report, returning the number of warnings parsed.
189*760c253cSXin Li
190*760c253cSXin Li        Raises:
191*760c253cSXin Li            UnknownPackageNameError if the package's name couldn't be
192*760c253cSXin Li            determined.
193*760c253cSXin Li        """
194*760c253cSXin Li        self.num_reports += 1
195*760c253cSXin Li        package_name = self._guess_package_name(report_json)
196*760c253cSXin Li
197*760c253cSXin Li        num_warnings = 0
198*760c253cSXin Li        for line in report_json.get("stdout", "").splitlines():
199*760c253cSXin Li            if parsed := ClangWarning.try_parse_line(
200*760c253cSXin Li                line, canonicalize_board_root
201*760c253cSXin Li            ):
202*760c253cSXin Li                self.warnings[parsed].packages[package_name] += 1
203*760c253cSXin Li                num_warnings += 1
204*760c253cSXin Li
205*760c253cSXin Li        return num_warnings
206*760c253cSXin Li
207*760c253cSXin Li    def add_report(
208*760c253cSXin Li        self, report_file: Path, canonicalize_board_root: bool = False
209*760c253cSXin Li    ) -> None:
210*760c253cSXin Li        with report_file.open(encoding="utf-8") as f:
211*760c253cSXin Li            report = json.load(f)
212*760c253cSXin Li
213*760c253cSXin Li        try:
214*760c253cSXin Li            n = self.add_report_json(report, canonicalize_board_root)
215*760c253cSXin Li        except UnknownPackageNameError:
216*760c253cSXin Li            logging.warning(
217*760c253cSXin Li                "Failed guessing package name for report at %r; ignoring file",
218*760c253cSXin Li                report_file,
219*760c253cSXin Li            )
220*760c253cSXin Li            return
221*760c253cSXin Li
222*760c253cSXin Li        if not n:
223*760c253cSXin Li            logging.warning(
224*760c253cSXin Li                "Report at %r had no parseable warnings", report_file
225*760c253cSXin Li            )
226*760c253cSXin Li
227*760c253cSXin Li
228*760c253cSXin Lidef print_aligned_counts(
229*760c253cSXin Li    name_count_map: Dict[str, int], file: Optional[IO[str]] = None
230*760c253cSXin Li) -> None:
231*760c253cSXin Li    assert name_count_map
232*760c253cSXin Li    # Sort on value, highest first. Name breaks ties.
233*760c253cSXin Li    summary = sorted(name_count_map.items(), key=lambda x: (-x[1], x[0]))
234*760c253cSXin Li    num_col_width = len(f"{summary[0][1]:,}")
235*760c253cSXin Li    name_col_width = max(len(x) for x in name_count_map)
236*760c253cSXin Li    for name, count in summary:
237*760c253cSXin Li        fmt_name = name.rjust(name_col_width)
238*760c253cSXin Li        fmt_count = f"{count:,}".rjust(num_col_width)
239*760c253cSXin Li        print(f"\t{fmt_name}: {fmt_count}", file=file)
240*760c253cSXin Li
241*760c253cSXin Li
242*760c253cSXin Lidef summarize_per_package_warnings(
243*760c253cSXin Li    warning_infos: Iterable[WarningInfo],
244*760c253cSXin Li    file: Optional[IO[str]] = None,
245*760c253cSXin Li) -> None:
246*760c253cSXin Li    warnings_per_package: DefaultDict[str, int] = collections.defaultdict(int)
247*760c253cSXin Li    for info in warning_infos:
248*760c253cSXin Li        for package_name, warning_count in info.packages.items():
249*760c253cSXin Li            warnings_per_package[package_name] += warning_count
250*760c253cSXin Li
251*760c253cSXin Li    if not warnings_per_package:
252*760c253cSXin Li        return
253*760c253cSXin Li
254*760c253cSXin Li    print("## Per-package warning counts:", file=file)
255*760c253cSXin Li    print_aligned_counts(warnings_per_package, file=file)
256*760c253cSXin Li
257*760c253cSXin Li
258*760c253cSXin Lidef summarize_warnings_by_flag(
259*760c253cSXin Li    warnings: Dict[ClangWarning, WarningInfo],
260*760c253cSXin Li    file: Optional[IO[str]] = None,
261*760c253cSXin Li) -> None:
262*760c253cSXin Li    if not warnings:
263*760c253cSXin Li        return
264*760c253cSXin Li
265*760c253cSXin Li    warnings_per_flag: Counter[str] = collections.Counter()
266*760c253cSXin Li    for warning, info in warnings.items():
267*760c253cSXin Li        warnings_per_flag[warning.name] += sum(info.packages.values())
268*760c253cSXin Li
269*760c253cSXin Li    print("## Instances of each fatal warning:", file=file)
270*760c253cSXin Li    print_aligned_counts(warnings_per_flag, file=file)
271*760c253cSXin Li
272*760c253cSXin Li
273*760c253cSXin Lidef aggregate_reports(opts: argparse.Namespace) -> None:
274*760c253cSXin Li    directory = opts.directory
275*760c253cSXin Li    aggregated = AggregatedWarnings()
276*760c253cSXin Li    for report in directory.glob("**/warnings_report*.json"):
277*760c253cSXin Li        logging.debug("Discovered report %s", report)
278*760c253cSXin Li        aggregated.add_report(report, opts.canonicalize_board_roots)
279*760c253cSXin Li
280*760c253cSXin Li    if not aggregated.num_reports:
281*760c253cSXin Li        raise ValueError(f"Found no warnings report under {directory}")
282*760c253cSXin Li
283*760c253cSXin Li    logging.info("Discovered %d report files in total", aggregated.num_reports)
284*760c253cSXin Li    summarize_per_package_warnings(aggregated.warnings.values())
285*760c253cSXin Li    summarize_warnings_by_flag(aggregated.warnings)
286*760c253cSXin Li
287*760c253cSXin Li
288*760c253cSXin Lidef fetch_werror_tarball_links(
289*760c253cSXin Li    child_builders: Dict[str, cros_cls.BuildID]
290*760c253cSXin Li) -> List[str]:
291*760c253cSXin Li    outputs = cros_cls.CQBoardBuilderOutput.fetch_many(child_builders.values())
292*760c253cSXin Li    artifacts_links = []
293*760c253cSXin Li    for builder_name, out in zip(child_builders, outputs):
294*760c253cSXin Li        if out.artifacts_link:
295*760c253cSXin Li            artifacts_links.append(out.artifacts_link)
296*760c253cSXin Li        else:
297*760c253cSXin Li            logging.info("%s had no output artifacts; ignoring", builder_name)
298*760c253cSXin Li
299*760c253cSXin Li    gsutil_stdout = subprocess.run(
300*760c253cSXin Li        ["gsutil", "-m", "ls"] + artifacts_links,
301*760c253cSXin Li        check=True,
302*760c253cSXin Li        encoding="utf-8",
303*760c253cSXin Li        stdin=subprocess.DEVNULL,
304*760c253cSXin Li        stdout=subprocess.PIPE,
305*760c253cSXin Li    ).stdout
306*760c253cSXin Li
307*760c253cSXin Li    return [
308*760c253cSXin Li        x
309*760c253cSXin Li        for x in gsutil_stdout.splitlines()
310*760c253cSXin Li        if x.endswith(".fatal_clang_warnings.tar.xz")
311*760c253cSXin Li    ]
312*760c253cSXin Li
313*760c253cSXin Li
314*760c253cSXin Lidef cq_builder_name_from_werror_logs_path(werror_logs: str) -> str:
315*760c253cSXin Li    """Returns the CQ builder given a -Werror logs path.
316*760c253cSXin Li
317*760c253cSXin Li    >>> cq_builder_name_from_werror_logs_path(
318*760c253cSXin Li            "gs://chromeos-image-archive/staryu-cq/"
319*760c253cSXin Li            "R123-15771.0.0-94466-8756713501925941617/"
320*760c253cSXin Li            "staryu.20240207.fatal_clang_warnings.tar.xz"
321*760c253cSXin Li        )
322*760c253cSXin Li    "staryu-cq"
323*760c253cSXin Li    """
324*760c253cSXin Li    return os.path.basename(os.path.dirname(os.path.dirname(werror_logs)))
325*760c253cSXin Li
326*760c253cSXin Li
327*760c253cSXin Lidef download_and_unpack_werror_tarballs(
328*760c253cSXin Li    unpack_dir: Path, download_dir: Path, gs_urls: List[str]
329*760c253cSXin Li):
330*760c253cSXin Li    # This is necessary below when we're untarring files. It should trivially
331*760c253cSXin Li    # always be the case, and assuming it makes testing easier.
332*760c253cSXin Li    assert download_dir.is_absolute(), download_dir
333*760c253cSXin Li
334*760c253cSXin Li    unpack_dir.mkdir()
335*760c253cSXin Li    download_dir.mkdir()
336*760c253cSXin Li
337*760c253cSXin Li    logging.info(
338*760c253cSXin Li        "Fetching and unpacking %d -Werror reports; this may take a bit",
339*760c253cSXin Li        len(gs_urls),
340*760c253cSXin Li    )
341*760c253cSXin Li    # Run the download in a threadpool since we can have >100 logs, and all of
342*760c253cSXin Li    # this is heavily I/O-bound.
343*760c253cSXin Li    # Max 8 downloads at a time is arbitrary, but should minimize the chance of
344*760c253cSXin Li    # rate-limiting. Don't limit `tar xaf`, since those should be short-lived.
345*760c253cSXin Li    download_limiter = threading.BoundedSemaphore(8)
346*760c253cSXin Li
347*760c253cSXin Li    def download_one_url(
348*760c253cSXin Li        unpack_dir: Path, download_dir: Path, gs_url: str
349*760c253cSXin Li    ) -> Optional[subprocess.CalledProcessError]:
350*760c253cSXin Li        """Downloads and unpacks -Werror logs from the given gs_url.
351*760c253cSXin Li
352*760c253cSXin Li        Leaves the tarball in `download_dir`, and the unpacked version in
353*760c253cSXin Li        `unpack_dir`.
354*760c253cSXin Li
355*760c253cSXin Li        Returns:
356*760c253cSXin Li            None if all went well; otherwise, returns the command that failed.
357*760c253cSXin Li            All commands have stderr data piped in.
358*760c253cSXin Li        """
359*760c253cSXin Li        file_targ = download_dir / os.path.basename(gs_url)
360*760c253cSXin Li        try:
361*760c253cSXin Li            with download_limiter:
362*760c253cSXin Li                subprocess.run(
363*760c253cSXin Li                    ["gsutil", "cp", gs_url, file_targ],
364*760c253cSXin Li                    check=True,
365*760c253cSXin Li                    stdin=subprocess.DEVNULL,
366*760c253cSXin Li                    stdout=subprocess.DEVNULL,
367*760c253cSXin Li                    stderr=subprocess.PIPE,
368*760c253cSXin Li                    encoding="utf-8",
369*760c253cSXin Li                    errors="replace",
370*760c253cSXin Li                )
371*760c253cSXin Li
372*760c253cSXin Li            # N.B., file_targ is absolute, so running with `file_targ` while
373*760c253cSXin Li            # changing `cwd` is safe.
374*760c253cSXin Li            subprocess.run(
375*760c253cSXin Li                ["tar", "xaf", file_targ],
376*760c253cSXin Li                check=True,
377*760c253cSXin Li                cwd=unpack_dir,
378*760c253cSXin Li                stdin=subprocess.DEVNULL,
379*760c253cSXin Li                stdout=subprocess.DEVNULL,
380*760c253cSXin Li                stderr=subprocess.PIPE,
381*760c253cSXin Li                encoding="utf-8",
382*760c253cSXin Li                errors="replace",
383*760c253cSXin Li            )
384*760c253cSXin Li        except subprocess.CalledProcessError as e:
385*760c253cSXin Li            return e
386*760c253cSXin Li        return None
387*760c253cSXin Li
388*760c253cSXin Li    with multiprocessing.pool.ThreadPool() as thread_pool:
389*760c253cSXin Li        download_futures = []
390*760c253cSXin Li        for gs_url in gs_urls:
391*760c253cSXin Li            name = cq_builder_name_from_werror_logs_path(gs_url)
392*760c253cSXin Li            unpack_to = unpack_dir / name
393*760c253cSXin Li            unpack_to.mkdir()
394*760c253cSXin Li            download_to = download_dir / name
395*760c253cSXin Li            download_to.mkdir()
396*760c253cSXin Li            download_futures.append(
397*760c253cSXin Li                (
398*760c253cSXin Li                    name,
399*760c253cSXin Li                    thread_pool.apply_async(
400*760c253cSXin Li                        download_one_url, (unpack_to, download_to, gs_url)
401*760c253cSXin Li                    ),
402*760c253cSXin Li                )
403*760c253cSXin Li            )
404*760c253cSXin Li
405*760c253cSXin Li        num_failures = 0
406*760c253cSXin Li        for name, future in download_futures:
407*760c253cSXin Li            result = future.get()
408*760c253cSXin Li            if not result:
409*760c253cSXin Li                continue
410*760c253cSXin Li
411*760c253cSXin Li            num_failures += 1
412*760c253cSXin Li            logging.error(
413*760c253cSXin Li                "Downloading %s failed: running %r. Stderr: %r",
414*760c253cSXin Li                name,
415*760c253cSXin Li                result.cmd,
416*760c253cSXin Li                result.stderr,
417*760c253cSXin Li            )
418*760c253cSXin Li    if num_failures:
419*760c253cSXin Li        raise ValueError(f"{num_failures} download(s) failed.")
420*760c253cSXin Li
421*760c253cSXin Li
422*760c253cSXin Lidef fetch_cq_reports(opts: argparse.Namespace) -> None:
423*760c253cSXin Li    if opts.cl:
424*760c253cSXin Li        logging.info(
425*760c253cSXin Li            "Fetching most recent completed CQ orchestrator from %s", opts.cl
426*760c253cSXin Li        )
427*760c253cSXin Li        all_ids = cros_cls.fetch_cq_orchestrator_ids(opts.cl)
428*760c253cSXin Li        if not all_ids:
429*760c253cSXin Li            raise ValueError(
430*760c253cSXin Li                f"No CQ orchestrators found under {opts.cl}. See --help for "
431*760c253cSXin Li                "how to pass a build ID directly."
432*760c253cSXin Li            )
433*760c253cSXin Li        # Note that these cq-orchestrator runs are returned in oldest-to-newest
434*760c253cSXin Li        # order. The user probably wants the newest run.
435*760c253cSXin Li        cq_orchestrator_id = all_ids[-1]
436*760c253cSXin Li        cq_orchestrator_url = cros_cls.builder_url(cq_orchestrator_id)
437*760c253cSXin Li        logging.info("Checking CQ run %s", cq_orchestrator_url)
438*760c253cSXin Li    else:
439*760c253cSXin Li        cq_orchestrator_id = opts.cq_orchestrator_id
440*760c253cSXin Li        cq_orchestrator_url = cros_cls.builder_url(cq_orchestrator_id)
441*760c253cSXin Li
442*760c253cSXin Li    # This is the earliest point at which we can compute this directory with
443*760c253cSXin Li    # certainty. Figure it out now and fail early if it exists.
444*760c253cSXin Li    output_directory = opts.directory
445*760c253cSXin Li    if not output_directory:
446*760c253cSXin Li        output_directory = _DEFAULT_FETCH_DIRECTORY / str(cq_orchestrator_id)
447*760c253cSXin Li
448*760c253cSXin Li    if output_directory.exists():
449*760c253cSXin Li        if not opts.force:
450*760c253cSXin Li            sys.exit(
451*760c253cSXin Li                f"Directory at {output_directory} exists; not overwriting. "
452*760c253cSXin Li                "Pass --force to overwrite."
453*760c253cSXin Li            )
454*760c253cSXin Li        # Actually _remove_ it when we have all logs unpacked and are able to
455*760c253cSXin Li        # create the output directory with confidence.
456*760c253cSXin Li
457*760c253cSXin Li    logging.info("Fetching info on child builders of %s", cq_orchestrator_url)
458*760c253cSXin Li    child_builders = cros_cls.CQOrchestratorOutput.fetch(
459*760c253cSXin Li        cq_orchestrator_id
460*760c253cSXin Li    ).child_builders
461*760c253cSXin Li    if not child_builders:
462*760c253cSXin Li        raise ValueError(f"No child builders found for {cq_orchestrator_url}")
463*760c253cSXin Li
464*760c253cSXin Li    logging.info(
465*760c253cSXin Li        "%d child builders found; finding associated tarball links",
466*760c253cSXin Li        len(child_builders),
467*760c253cSXin Li    )
468*760c253cSXin Li    werror_links = fetch_werror_tarball_links(child_builders)
469*760c253cSXin Li    if not werror_links:
470*760c253cSXin Li        raise ValueError(
471*760c253cSXin Li            f"No -Werror logs found in children of {cq_orchestrator_url}"
472*760c253cSXin Li        )
473*760c253cSXin Li
474*760c253cSXin Li    logging.info("%d -Werror logs found", len(werror_links))
475*760c253cSXin Li    with tempfile.TemporaryDirectory("werror_logs_fetch_cq") as t:
476*760c253cSXin Li        tempdir = Path(t)
477*760c253cSXin Li        unpack_dir = tempdir / "unpacked"
478*760c253cSXin Li        download_and_unpack_werror_tarballs(
479*760c253cSXin Li            unpack_dir=unpack_dir,
480*760c253cSXin Li            download_dir=tempdir / "tarballs",
481*760c253cSXin Li            gs_urls=werror_links,
482*760c253cSXin Li        )
483*760c253cSXin Li
484*760c253cSXin Li        if output_directory.exists():
485*760c253cSXin Li            logging.info("Removing output directory at %s", output_directory)
486*760c253cSXin Li            shutil.rmtree(output_directory)
487*760c253cSXin Li        output_directory.parent.mkdir(parents=True, exist_ok=True)
488*760c253cSXin Li        # (Convert these to strs to keep mypy happy.)
489*760c253cSXin Li        shutil.move(str(unpack_dir), str(output_directory))
490*760c253cSXin Li        logging.info(
491*760c253cSXin Li            "CQ logs from %s stored in %s",
492*760c253cSXin Li            cq_orchestrator_url,
493*760c253cSXin Li            output_directory,
494*760c253cSXin Li        )
495*760c253cSXin Li
496*760c253cSXin Li
497*760c253cSXin Lidef main(argv: List[str]) -> None:
498*760c253cSXin Li    parser = argparse.ArgumentParser(
499*760c253cSXin Li        description=__doc__,
500*760c253cSXin Li        formatter_class=argparse.RawDescriptionHelpFormatter,
501*760c253cSXin Li    )
502*760c253cSXin Li    parser.add_argument(
503*760c253cSXin Li        "--debug", action="store_true", help="Enable debug logging"
504*760c253cSXin Li    )
505*760c253cSXin Li    subparsers = parser.add_subparsers(required=True)
506*760c253cSXin Li    # b/318833638: While there's only one subparser here for the moment, more
507*760c253cSXin Li    # are expected to come (specifically, one to download logs from a CQ run).
508*760c253cSXin Li    aggregate = subparsers.add_parser(
509*760c253cSXin Li        "aggregate",
510*760c253cSXin Li        help="""
511*760c253cSXin Li        Aggregate all -Werror reports beneath a directory. Note that this will
512*760c253cSXin Li        traverse all children of the directory, so can be used either on
513*760c253cSXin Li        unpacked -Werror reports from CQ builders, or can be used on e.g.,
514*760c253cSXin Li        /build/cherry/var/lib/chromeos.
515*760c253cSXin Li        """,
516*760c253cSXin Li    )
517*760c253cSXin Li    aggregate.set_defaults(func=aggregate_reports)
518*760c253cSXin Li    aggregate.add_argument(
519*760c253cSXin Li        "--canonicalize-board-roots",
520*760c253cSXin Li        action="store_true",
521*760c253cSXin Li        help="""
522*760c253cSXin Li        Converts warnings paths starting with a board root (e.g., /build/atlas)
523*760c253cSXin Li        to a form consistent across many boards.
524*760c253cSXin Li        """,
525*760c253cSXin Li    )
526*760c253cSXin Li    aggregate.add_argument(
527*760c253cSXin Li        "--directory", type=Path, required=True, help="Directory to inspect."
528*760c253cSXin Li    )
529*760c253cSXin Li
530*760c253cSXin Li    fetch_cq = subparsers.add_parser(
531*760c253cSXin Li        "fetch-cq",
532*760c253cSXin Li        help="Fetch all -Werror reports for a CQ run.",
533*760c253cSXin Li    )
534*760c253cSXin Li    fetch_cq.set_defaults(func=fetch_cq_reports)
535*760c253cSXin Li    cl_or_cq_orchestrator = fetch_cq.add_mutually_exclusive_group(required=True)
536*760c253cSXin Li    cl_or_cq_orchestrator.add_argument(
537*760c253cSXin Li        "--cl",
538*760c253cSXin Li        type=cros_cls.ChangeListURL.parse_with_patch_set,
539*760c253cSXin Li        help="Link to a CL to get the most recent cq-orchestrator from",
540*760c253cSXin Li    )
541*760c253cSXin Li    cl_or_cq_orchestrator.add_argument(
542*760c253cSXin Li        "--cq-orchestrator-id",
543*760c253cSXin Li        type=cros_cls.BuildID,
544*760c253cSXin Li        help="""
545*760c253cSXin Li        Build number for a cq-orchestrator run. Builders invoked by this are
546*760c253cSXin Li        examined for -Werror logs.
547*760c253cSXin Li        """,
548*760c253cSXin Li    )
549*760c253cSXin Li    fetch_cq.add_argument(
550*760c253cSXin Li        "--directory",
551*760c253cSXin Li        type=Path,
552*760c253cSXin Li        help=f"""
553*760c253cSXin Li        Directory to put downloaded -Werror logs in. Default is a subdirectory
554*760c253cSXin Li        of {_DEFAULT_FETCH_DIRECTORY}.
555*760c253cSXin Li        """,
556*760c253cSXin Li    )
557*760c253cSXin Li    fetch_cq.add_argument(
558*760c253cSXin Li        "-f",
559*760c253cSXin Li        "--force",
560*760c253cSXin Li        action="store_true",
561*760c253cSXin Li        help="Remove the directory at `--directory` if it exists",
562*760c253cSXin Li    )
563*760c253cSXin Li
564*760c253cSXin Li    opts = parser.parse_args(argv)
565*760c253cSXin Li
566*760c253cSXin Li    logging.basicConfig(
567*760c253cSXin Li        format=">> %(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: "
568*760c253cSXin Li        "%(message)s",
569*760c253cSXin Li        level=logging.DEBUG if opts.debug else logging.INFO,
570*760c253cSXin Li    )
571*760c253cSXin Li
572*760c253cSXin Li    assert getattr(opts, "func", None), "Unknown subcommand?"
573*760c253cSXin Li    opts.func(opts)
574*760c253cSXin Li
575*760c253cSXin Li
576*760c253cSXin Liif __name__ == "__main__":
577*760c253cSXin Li    main(sys.argv[1:])
578