xref: /aosp_15_r20/external/toolchain-utils/llvm_tools/llvm_bisection.py (revision 760c253c1ed00ce9abd48f8546f08516e57485fe)
1#!/usr/bin/env python3
2# Copyright 2019 The ChromiumOS Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Performs bisection on LLVM based off a .JSON file."""
7
8import argparse
9import enum
10import errno
11import json
12import os
13import subprocess
14import sys
15
16import chroot
17import get_llvm_hash
18import git_llvm_rev
19import modify_a_tryjob
20import update_chromeos_llvm_hash
21import update_tryjob_status
22
23
24class BisectionExitStatus(enum.Enum):
25    """Exit code when performing bisection."""
26
27    # Means that there are no more revisions available to bisect.
28    BISECTION_COMPLETE = 126
29
30
31def GetCommandLineArgs():
32    """Parses the command line for the command line arguments."""
33
34    # Default path to the chroot if a path is not specified.
35    cros_root = os.path.expanduser("~")
36    cros_root = os.path.join(cros_root, "chromiumos")
37
38    # Create parser and add optional command-line arguments.
39    parser = argparse.ArgumentParser(
40        description="Bisects LLVM via tracking a JSON file."
41    )
42
43    # Add argument for other change lists that want to run alongside the tryjob
44    # which has a change list of updating a package's git hash.
45    parser.add_argument(
46        "--parallel",
47        type=int,
48        default=3,
49        help="How many tryjobs to create between the last good version and "
50        "the first bad version (default: %(default)s)",
51    )
52
53    # Add argument for the good LLVM revision for bisection.
54    parser.add_argument(
55        "--start_rev",
56        required=True,
57        type=int,
58        help="The good revision for the bisection.",
59    )
60
61    # Add argument for the bad LLVM revision for bisection.
62    parser.add_argument(
63        "--end_rev",
64        required=True,
65        type=int,
66        help="The bad revision for the bisection.",
67    )
68
69    # Add argument for the absolute path to the file that contains information
70    # on the previous tested svn version.
71    parser.add_argument(
72        "--last_tested",
73        required=True,
74        help="the absolute path to the file that contains the tryjobs",
75    )
76
77    # Add argument for the absolute path to the LLVM source tree.
78    parser.add_argument(
79        "--src_path",
80        help="the path to the LLVM source tree to use (used for retrieving the "
81        "git hash of each version between the last good version and first bad "
82        "version)",
83    )
84
85    # Add argument for other change lists that want to run alongside the tryjob
86    # which has a change list of updating a package's git hash.
87    parser.add_argument(
88        "--extra_change_lists",
89        type=int,
90        nargs="+",
91        help="change lists that would like to be run alongside the change list "
92        "of updating the packages",
93    )
94
95    # Add argument for custom options for the tryjob.
96    parser.add_argument(
97        "--options",
98        required=False,
99        nargs="+",
100        help="options to use for the tryjob testing",
101    )
102
103    # Add argument for the builder to use for the tryjob.
104    parser.add_argument(
105        "--builder", required=True, help="builder to use for the tryjob testing"
106    )
107
108    # Add argument for the description of the tryjob.
109    parser.add_argument(
110        "--description",
111        required=False,
112        nargs="+",
113        help="the description of the tryjob",
114    )
115
116    # Add argument for a specific chroot path.
117    parser.add_argument(
118        "--chromeos_path",
119        default=cros_root,
120        help="the path to the chroot (default: %(default)s)",
121    )
122
123    # Add argument for whether to display command contents to `stdout`.
124    parser.add_argument(
125        "--nocleanup",
126        action="store_false",
127        dest="cleanup",
128        help="Abandon CLs created for bisectoin",
129    )
130
131    args_output = parser.parse_args()
132
133    assert (
134        args_output.start_rev < args_output.end_rev
135    ), "Start revision %d is >= end revision %d" % (
136        args_output.start_rev,
137        args_output.end_rev,
138    )
139
140    if args_output.last_tested and not args_output.last_tested.endswith(
141        ".json"
142    ):
143        raise ValueError(
144            'Filed provided %s does not end in ".json"'
145            % args_output.last_tested
146        )
147
148    return args_output
149
150
151def GetRemainingRange(start, end, tryjobs):
152    """Gets the start and end intervals in 'json_file'.
153
154    Args:
155        start: The start version of the bisection provided via the command line.
156        end: The end version of the bisection provided via the command line.
157        tryjobs: A list of tryjobs where each element is in the following
158        format:
159        [
160            {[TRYJOB_INFORMATION]},
161            {[TRYJOB_INFORMATION]},
162            ...,
163            {[TRYJOB_INFORMATION]}
164        ]
165
166    Returns:
167        The new start version and end version for bisection, a set of revisions
168        that are 'pending' and a set of revisions that are to be skipped.
169
170    Raises:
171        ValueError: The value for 'status' is missing or there is a mismatch
172        between 'start' and 'end' compared to the 'start' and 'end' in the JSON
173        file.
174        AssertionError: The new start version is >= than the new end version.
175    """
176
177    if not tryjobs:
178        return start, end, {}, {}
179
180    # Verify that each tryjob has a value for the 'status' key.
181    for cur_tryjob_dict in tryjobs:
182        if not cur_tryjob_dict.get("status", None):
183            raise ValueError(
184                '"status" is missing or has no value, please '
185                "go to %s and update it" % cur_tryjob_dict["link"]
186            )
187
188    all_bad_revisions = [end]
189    all_bad_revisions.extend(
190        cur_tryjob["rev"]
191        for cur_tryjob in tryjobs
192        if cur_tryjob["status"] == update_tryjob_status.TryjobStatus.BAD.value
193    )
194
195    # The minimum value for the 'bad' field in the tryjobs is the new end
196    # version.
197    bad_rev = min(all_bad_revisions)
198
199    all_good_revisions = [start]
200    all_good_revisions.extend(
201        cur_tryjob["rev"]
202        for cur_tryjob in tryjobs
203        if cur_tryjob["status"] == update_tryjob_status.TryjobStatus.GOOD.value
204    )
205
206    # The maximum value for the 'good' field in the tryjobs is the new start
207    # version.
208    good_rev = max(all_good_revisions)
209
210    # The good version should always be strictly less than the bad version;
211    # otherwise, bisection is broken.
212    assert (
213        good_rev < bad_rev
214    ), "Bisection is broken because %d (good) is >= " "%d (bad)" % (
215        good_rev,
216        bad_rev,
217    )
218
219    # Find all revisions that are 'pending' within 'good_rev' and 'bad_rev'.
220    #
221    # NOTE: The intent is to not launch tryjobs between 'good_rev' and 'bad_rev'
222    # that have already been launched (this set is used when constructing the
223    # list of revisions to launch tryjobs for).
224    pending_revisions = {
225        tryjob["rev"]
226        for tryjob in tryjobs
227        if tryjob["status"] == update_tryjob_status.TryjobStatus.PENDING.value
228        and good_rev < tryjob["rev"] < bad_rev
229    }
230
231    # Find all revisions that are to be skipped within 'good_rev' and 'bad_rev'.
232    #
233    # NOTE: The intent is to not launch tryjobs between 'good_rev' and 'bad_rev'
234    # that have already been marked as 'skip' (this set is used when
235    # constructing the list of revisions to launch tryjobs for).
236    skip_revisions = {
237        tryjob["rev"]
238        for tryjob in tryjobs
239        if tryjob["status"] == update_tryjob_status.TryjobStatus.SKIP.value
240        and good_rev < tryjob["rev"] < bad_rev
241    }
242
243    return good_rev, bad_rev, pending_revisions, skip_revisions
244
245
246def GetCommitsBetween(
247    start, end, parallel, src_path, pending_revisions, skip_revisions
248):
249    """Determines the revisions between start and end."""
250
251    with get_llvm_hash.LLVMHash().CreateTempDirectory() as temp_dir:
252        # We have guaranteed contiguous revision numbers after this,
253        # and that guarnatee simplifies things considerably, so we don't
254        # support anything before it.
255        assert (
256            start >= git_llvm_rev.base_llvm_revision
257        ), f"{start} was too long ago"
258
259        with get_llvm_hash.CreateTempLLVMRepo(temp_dir) as new_repo:
260            if not src_path:
261                src_path = new_repo
262            index_step = (end - (start + 1)) // (parallel + 1)
263            if not index_step:
264                index_step = 1
265            revisions = [
266                rev
267                for rev in range(start + 1, end, index_step)
268                if rev not in pending_revisions and rev not in skip_revisions
269            ]
270            git_hashes = [
271                get_llvm_hash.GetGitHashFrom(src_path, rev) for rev in revisions
272            ]
273            return revisions, git_hashes
274
275
276def Bisect(
277    revisions,
278    git_hashes,
279    bisect_state,
280    last_tested,
281    update_packages,
282    chromeos_path,
283    extra_change_lists,
284    options,
285    builder,
286):
287    """Adds tryjobs and updates the status file with the new tryjobs."""
288
289    try:
290        for svn_revision, git_hash in zip(revisions, git_hashes):
291            tryjob_dict = modify_a_tryjob.AddTryjob(
292                update_packages,
293                git_hash,
294                svn_revision,
295                chromeos_path,
296                extra_change_lists,
297                options,
298                builder,
299                svn_revision,
300            )
301
302            bisect_state["jobs"].append(tryjob_dict)
303    finally:
304        # Do not want to lose progress if there is an exception.
305        if last_tested:
306            new_file = "%s.new" % last_tested
307            with open(new_file, "w", encoding="utf-8") as json_file:
308                json.dump(
309                    bisect_state, json_file, indent=4, separators=(",", ": ")
310                )
311
312            os.rename(new_file, last_tested)
313
314
315def LoadStatusFile(last_tested, start, end):
316    """Loads the status file for bisection."""
317
318    try:
319        with open(last_tested, encoding="utf-8") as f:
320            return json.load(f)
321    except IOError as err:
322        if err.errno != errno.ENOENT:
323            raise
324
325    return {"start": start, "end": end, "jobs": []}
326
327
328def main(args_output):
329    """Bisects LLVM commits.
330
331    Raises:
332        AssertionError: The script was run inside the chroot.
333    """
334
335    chroot.VerifyOutsideChroot()
336    chroot.VerifyChromeOSRoot(args_output.chromeos_path)
337    start = args_output.start_rev
338    end = args_output.end_rev
339
340    bisect_state = LoadStatusFile(args_output.last_tested, start, end)
341    if start != bisect_state["start"] or end != bisect_state["end"]:
342        raise ValueError(
343            f"The start {start} or the end {end} version provided is "
344            f'different than "start" {bisect_state["start"]} or "end" '
345            f'{bisect_state["end"]} in the .JSON file'
346        )
347
348    # Pending and skipped revisions are between 'start_rev' and 'end_rev'.
349    start_rev, end_rev, pending_revs, skip_revs = GetRemainingRange(
350        start, end, bisect_state["jobs"]
351    )
352
353    revisions, git_hashes = GetCommitsBetween(
354        start_rev,
355        end_rev,
356        args_output.parallel,
357        args_output.src_path,
358        pending_revs,
359        skip_revs,
360    )
361
362    # No more revisions between 'start_rev' and 'end_rev', so
363    # bisection is complete.
364    #
365    # This is determined by finding all valid revisions between 'start_rev'
366    # and 'end_rev' and that are NOT in the 'pending' and 'skipped' set.
367    if not revisions:
368        if pending_revs:
369            # Some tryjobs are not finished which may change the actual bad
370            # commit/revision when those tryjobs are finished.
371            no_revisions_message = (
372                f"No revisions between start {start_rev} "
373                f"and end {end_rev} to create tryjobs\n"
374            )
375
376            if pending_revs:
377                no_revisions_message += (
378                    "The following tryjobs are pending:\n"
379                    + "\n".join(str(rev) for rev in pending_revs)
380                    + "\n"
381                )
382
383            if skip_revs:
384                no_revisions_message += (
385                    "The following tryjobs were skipped:\n"
386                    + "\n".join(str(rev) for rev in skip_revs)
387                    + "\n"
388                )
389
390            raise ValueError(no_revisions_message)
391
392        print(f"Finished bisecting for {args_output.last_tested}")
393        if args_output.src_path:
394            bad_llvm_hash = get_llvm_hash.GetGitHashFrom(
395                args_output.src_path, end_rev
396            )
397        else:
398            bad_llvm_hash = get_llvm_hash.LLVMHash().GetLLVMHash(end_rev)
399        print(
400            f"The bad revision is {end_rev} and its commit hash is "
401            f"{bad_llvm_hash}"
402        )
403        if skip_revs:
404            skip_revs_message = (
405                "\nThe following revisions were skipped:\n"
406                + "\n".join(str(rev) for rev in skip_revs)
407            )
408            print(skip_revs_message)
409
410        if args_output.cleanup:
411            # Abandon all the CLs created for bisection
412            gerrit = os.path.join(
413                args_output.chromeos_path, "chromite/bin/gerrit"
414            )
415            for build in bisect_state["jobs"]:
416                try:
417                    subprocess.check_output(
418                        [gerrit, "abandon", str(build["cl"])],
419                        stderr=subprocess.STDOUT,
420                        encoding="utf-8",
421                    )
422                except subprocess.CalledProcessError as err:
423                    # the CL may have been abandoned
424                    if "chromite.lib.gob_util.GOBError" not in err.output:
425                        raise
426
427        return BisectionExitStatus.BISECTION_COMPLETE.value
428
429    for rev in revisions:
430        if (
431            update_tryjob_status.FindTryjobIndex(rev, bisect_state["jobs"])
432            is not None
433        ):
434            raise ValueError(f'Revision {rev} exists already in "jobs"')
435
436    Bisect(
437        revisions,
438        git_hashes,
439        bisect_state,
440        args_output.last_tested,
441        update_chromeos_llvm_hash.DEFAULT_PACKAGES,
442        args_output.chromeos_path,
443        args_output.extra_change_lists,
444        args_output.options,
445        args_output.builder,
446    )
447
448
449if __name__ == "__main__":
450    sys.exit(main(GetCommandLineArgs()))
451