xref: /aosp_15_r20/external/toolchain-utils/cros_utils/buildbot_utils.py (revision 760c253c1ed00ce9abd48f8546f08516e57485fe)
1*760c253cSXin Li# Copyright 2017 The ChromiumOS Authors
2*760c253cSXin Li# Use of this source code is governed by a BSD-style license that can be
3*760c253cSXin Li# found in the LICENSE file.
4*760c253cSXin Li
5*760c253cSXin Li"""Utilities for launching and accessing ChromeOS buildbots."""
6*760c253cSXin Li
7*760c253cSXin Li
8*760c253cSXin Liimport ast
9*760c253cSXin Liimport json
10*760c253cSXin Liimport os
11*760c253cSXin Liimport re
12*760c253cSXin Liimport time
13*760c253cSXin Li
14*760c253cSXin Lifrom cros_utils import command_executer
15*760c253cSXin Lifrom cros_utils import logger
16*760c253cSXin Li
17*760c253cSXin Li
18*760c253cSXin LiINITIAL_SLEEP_TIME = 7200  # 2 hours; wait time before polling buildbot.
19*760c253cSXin LiSLEEP_TIME = 600  # 10 minutes; time between polling of buildbot.
20*760c253cSXin Li
21*760c253cSXin Li# Some of our slower builders (llvm-next) are taking more
22*760c253cSXin Li# than 12 hours. So, increase this TIME_OUT to 15 hours.
23*760c253cSXin LiTIME_OUT = 15 * 60 * 60  # Decide the build is dead or will never finish
24*760c253cSXin Li
25*760c253cSXin Li
26*760c253cSXin Liclass BuildbotTimeout(Exception):
27*760c253cSXin Li    """Exception to throw when a buildbot operation timesout."""
28*760c253cSXin Li
29*760c253cSXin Li
30*760c253cSXin Lidef RunCommandInPath(path, cmd):
31*760c253cSXin Li    ce = command_executer.GetCommandExecuter()
32*760c253cSXin Li    cwd = os.getcwd()
33*760c253cSXin Li    os.chdir(path)
34*760c253cSXin Li    status, stdout, stderr = ce.RunCommandWOutput(cmd, print_to_console=False)
35*760c253cSXin Li    os.chdir(cwd)
36*760c253cSXin Li    return status, stdout, stderr
37*760c253cSXin Li
38*760c253cSXin Li
39*760c253cSXin Lidef PeekTrybotImage(chromeos_root, buildbucket_id):
40*760c253cSXin Li    """Get the artifact URL of a given tryjob.
41*760c253cSXin Li
42*760c253cSXin Li    Args:
43*760c253cSXin Li        buildbucket_id: buildbucket-id
44*760c253cSXin Li        chromeos_root: root dir of chrome os checkout
45*760c253cSXin Li
46*760c253cSXin Li    Returns:
47*760c253cSXin Li        (status, url) where status can be 'pass', 'fail', 'running',
48*760c253cSXin Li                    and url looks like:
49*760c253cSXin Li        gs://chromeos-image-archive/trybot-elm-release-tryjob/R67-10468.0.0-b20789
50*760c253cSXin Li    """
51*760c253cSXin Li    command = (
52*760c253cSXin Li        "cros buildresult --report json --buildbucket-id %s" % buildbucket_id
53*760c253cSXin Li    )
54*760c253cSXin Li    rc, out, _ = RunCommandInPath(chromeos_root, command)
55*760c253cSXin Li
56*760c253cSXin Li    # Current implementation of cros buildresult returns fail when a job is still
57*760c253cSXin Li    # running.
58*760c253cSXin Li    if rc != 0:
59*760c253cSXin Li        return ("running", None)
60*760c253cSXin Li
61*760c253cSXin Li    results = json.loads(out)[buildbucket_id]
62*760c253cSXin Li
63*760c253cSXin Li    # Handle the case where the tryjob failed to launch correctly.
64*760c253cSXin Li    if results["artifacts_url"] is None:
65*760c253cSXin Li        return (results["status"], "")
66*760c253cSXin Li
67*760c253cSXin Li    return (results["status"], results["artifacts_url"].rstrip("/"))
68*760c253cSXin Li
69*760c253cSXin Li
70*760c253cSXin Lidef ParseTryjobBuildbucketId(msg):
71*760c253cSXin Li    """Find the buildbucket-id in the messages from `cros tryjob`.
72*760c253cSXin Li
73*760c253cSXin Li    Args:
74*760c253cSXin Li        msg: messages from `cros tryjob`
75*760c253cSXin Li
76*760c253cSXin Li    Returns:
77*760c253cSXin Li        buildbucket-id, which will be passed to `cros buildresult`
78*760c253cSXin Li    """
79*760c253cSXin Li    output_list = ast.literal_eval(msg)
80*760c253cSXin Li    output_dict = output_list[0]
81*760c253cSXin Li    if "buildbucket_id" in output_dict:
82*760c253cSXin Li        return output_dict["buildbucket_id"]
83*760c253cSXin Li    return None
84*760c253cSXin Li
85*760c253cSXin Li
86*760c253cSXin Lidef SubmitTryjob(
87*760c253cSXin Li    chromeos_root,
88*760c253cSXin Li    buildbot_name,
89*760c253cSXin Li    patch_list,
90*760c253cSXin Li    tryjob_flags=None,
91*760c253cSXin Li    build_toolchain=False,
92*760c253cSXin Li):
93*760c253cSXin Li    """Calls `cros tryjob ...`
94*760c253cSXin Li
95*760c253cSXin Li    Args:
96*760c253cSXin Li        chromeos_root: the path to the ChromeOS root, needed for finding chromite
97*760c253cSXin Li            and launching the buildbot.
98*760c253cSXin Li        buildbot_name: the name of the buildbot queue, such as lumpy-release or
99*760c253cSXin Li            daisy-paladin.
100*760c253cSXin Li        patch_list: a python list of the patches, if any, for the buildbot to use.
101*760c253cSXin Li        tryjob_flags: See cros tryjob --help for available options.
102*760c253cSXin Li        build_toolchain: builds and uses the latest toolchain, rather than the
103*760c253cSXin Li            prebuilt one in SDK.
104*760c253cSXin Li
105*760c253cSXin Li    Returns:
106*760c253cSXin Li        buildbucket id
107*760c253cSXin Li    """
108*760c253cSXin Li    patch_arg = ""
109*760c253cSXin Li    if patch_list:
110*760c253cSXin Li        for p in patch_list:
111*760c253cSXin Li            patch_arg = patch_arg + " -g " + repr(p)
112*760c253cSXin Li    if not tryjob_flags:
113*760c253cSXin Li        tryjob_flags = []
114*760c253cSXin Li    if build_toolchain:
115*760c253cSXin Li        tryjob_flags.append("--latest-toolchain")
116*760c253cSXin Li    tryjob_flags = " ".join(tryjob_flags)
117*760c253cSXin Li
118*760c253cSXin Li    # Launch buildbot with appropriate flags.
119*760c253cSXin Li    build = buildbot_name
120*760c253cSXin Li    command = "cros_sdk -- cros tryjob --yes --json --nochromesdk  %s %s %s" % (
121*760c253cSXin Li        tryjob_flags,
122*760c253cSXin Li        patch_arg,
123*760c253cSXin Li        build,
124*760c253cSXin Li    )
125*760c253cSXin Li    print("CMD: %s" % command)
126*760c253cSXin Li    _, out, _ = RunCommandInPath(chromeos_root, command)
127*760c253cSXin Li    buildbucket_id = ParseTryjobBuildbucketId(out)
128*760c253cSXin Li    print("buildbucket_id: %s" % repr(buildbucket_id))
129*760c253cSXin Li    if not buildbucket_id:
130*760c253cSXin Li        logger.GetLogger().LogFatal(
131*760c253cSXin Li            "Error occurred while launching trybot job: " "%s" % command
132*760c253cSXin Li        )
133*760c253cSXin Li    return buildbucket_id
134*760c253cSXin Li
135*760c253cSXin Li
136*760c253cSXin Lidef GetTrybotImage(
137*760c253cSXin Li    chromeos_root,
138*760c253cSXin Li    buildbot_name,
139*760c253cSXin Li    patch_list,
140*760c253cSXin Li    tryjob_flags=None,
141*760c253cSXin Li    build_toolchain=False,
142*760c253cSXin Li    asynchronous=False,
143*760c253cSXin Li):
144*760c253cSXin Li    """Launch buildbot and get resulting trybot artifact name.
145*760c253cSXin Li
146*760c253cSXin Li    This function launches a buildbot with the appropriate flags to
147*760c253cSXin Li    build the test ChromeOS image, with the current ToT mobile compiler.  It
148*760c253cSXin Li    checks every 10 minutes to see if the trybot has finished.  When the trybot
149*760c253cSXin Li    has finished, it parses the resulting report logs to find the trybot
150*760c253cSXin Li    artifact (if one was created), and returns that artifact name.
151*760c253cSXin Li
152*760c253cSXin Li    Args:
153*760c253cSXin Li        chromeos_root: the path to the ChromeOS root, needed for finding chromite
154*760c253cSXin Li            and launching the buildbot.
155*760c253cSXin Li        buildbot_name: the name of the buildbot queue, such as lumpy-release or
156*760c253cSXin Li            daisy-paladin.
157*760c253cSXin Li        patch_list: a python list of the patches, if any, for the buildbot to use.
158*760c253cSXin Li        tryjob_flags: See cros tryjob --help for available options.
159*760c253cSXin Li        build_toolchain: builds and uses the latest toolchain, rather than the
160*760c253cSXin Li                       prebuilt one in SDK.
161*760c253cSXin Li        asynchronous: don't wait for artifacts; just return the buildbucket id
162*760c253cSXin Li
163*760c253cSXin Li    Returns:
164*760c253cSXin Li        (buildbucket id, partial image url) e.g.
165*760c253cSXin Li        (8952271933586980528, trybot-elm-release-tryjob/R67-10480.0.0-b2373596)
166*760c253cSXin Li    """
167*760c253cSXin Li    buildbucket_id = SubmitTryjob(
168*760c253cSXin Li        chromeos_root, buildbot_name, patch_list, tryjob_flags, build_toolchain
169*760c253cSXin Li    )
170*760c253cSXin Li    if asynchronous:
171*760c253cSXin Li        return buildbucket_id, " "
172*760c253cSXin Li
173*760c253cSXin Li    # The trybot generally takes more than 2 hours to finish.
174*760c253cSXin Li    # Wait two hours before polling the status.
175*760c253cSXin Li    time.sleep(INITIAL_SLEEP_TIME)
176*760c253cSXin Li    elapsed = INITIAL_SLEEP_TIME
177*760c253cSXin Li    status = "running"
178*760c253cSXin Li    image = ""
179*760c253cSXin Li    while True:
180*760c253cSXin Li        status, image = PeekTrybotImage(chromeos_root, buildbucket_id)
181*760c253cSXin Li        if status == "running":
182*760c253cSXin Li            if elapsed > TIME_OUT:
183*760c253cSXin Li                logger.GetLogger().LogFatal(
184*760c253cSXin Li                    "Unable to get build result for target %s." % buildbot_name
185*760c253cSXin Li                )
186*760c253cSXin Li            else:
187*760c253cSXin Li                wait_msg = "Unable to find build result; job may be running."
188*760c253cSXin Li                logger.GetLogger().LogOutput(wait_msg)
189*760c253cSXin Li            logger.GetLogger().LogOutput(f"{elapsed / 60} minutes elapsed.")
190*760c253cSXin Li            logger.GetLogger().LogOutput(f"Sleeping {SLEEP_TIME} seconds.")
191*760c253cSXin Li            time.sleep(SLEEP_TIME)
192*760c253cSXin Li            elapsed += SLEEP_TIME
193*760c253cSXin Li        else:
194*760c253cSXin Li            break
195*760c253cSXin Li
196*760c253cSXin Li    if not buildbot_name.endswith("-toolchain") and status == "fail":
197*760c253cSXin Li        # For rotating testers, we don't care about their status
198*760c253cSXin Li        # result, because if any HWTest failed it will be non-zero.
199*760c253cSXin Li        #
200*760c253cSXin Li        # The nightly performance tests do not run HWTests, so if
201*760c253cSXin Li        # their status is non-zero, we do care.  In this case
202*760c253cSXin Li        # non-zero means the image itself probably did not build.
203*760c253cSXin Li        image = ""
204*760c253cSXin Li
205*760c253cSXin Li    if not image:
206*760c253cSXin Li        logger.GetLogger().LogError(
207*760c253cSXin Li            "Trybot job (buildbucket id: %s) failed with"
208*760c253cSXin Li            "status %s; no trybot image generated. " % (buildbucket_id, status)
209*760c253cSXin Li        )
210*760c253cSXin Li    else:
211*760c253cSXin Li        # Convert full gs path to what crosperf expects. For example, convert
212*760c253cSXin Li        # gs://chromeos-image-archive/trybot-elm-release-tryjob/R67-10468.0.0-b20789
213*760c253cSXin Li        # to
214*760c253cSXin Li        # trybot-elm-release-tryjob/R67-10468.0.0-b20789
215*760c253cSXin Li        image = "/".join(image.split("/")[-2:])
216*760c253cSXin Li
217*760c253cSXin Li    logger.GetLogger().LogOutput("image is '%s'" % image)
218*760c253cSXin Li    logger.GetLogger().LogOutput("status is %s" % status)
219*760c253cSXin Li    return buildbucket_id, image
220*760c253cSXin Li
221*760c253cSXin Li
222*760c253cSXin Lidef DoesImageExist(chromeos_root, build):
223*760c253cSXin Li    """Check if the image for the given build exists."""
224*760c253cSXin Li
225*760c253cSXin Li    ce = command_executer.GetCommandExecuter()
226*760c253cSXin Li    command = (
227*760c253cSXin Li        "gsutil ls gs://chromeos-image-archive/%s"
228*760c253cSXin Li        "/chromiumos_test_image.tar.xz" % (build)
229*760c253cSXin Li    )
230*760c253cSXin Li    ret = ce.ChrootRunCommand(chromeos_root, command, print_to_console=False)
231*760c253cSXin Li    return not ret
232*760c253cSXin Li
233*760c253cSXin Li
234*760c253cSXin Lidef WaitForImage(chromeos_root, build):
235*760c253cSXin Li    """Wait for an image to be ready."""
236*760c253cSXin Li
237*760c253cSXin Li    elapsed_time = 0
238*760c253cSXin Li    while elapsed_time < TIME_OUT:
239*760c253cSXin Li        if DoesImageExist(chromeos_root, build):
240*760c253cSXin Li            return
241*760c253cSXin Li        logger.GetLogger().LogOutput(
242*760c253cSXin Li            "Image %s not ready, waiting for 10 minutes" % build
243*760c253cSXin Li        )
244*760c253cSXin Li        time.sleep(SLEEP_TIME)
245*760c253cSXin Li        elapsed_time += SLEEP_TIME
246*760c253cSXin Li
247*760c253cSXin Li    logger.GetLogger().LogOutput(
248*760c253cSXin Li        "Image %s not found, waited for %d hours" % (build, (TIME_OUT / 3600))
249*760c253cSXin Li    )
250*760c253cSXin Li    raise BuildbotTimeout("Timeout while waiting for image %s" % build)
251*760c253cSXin Li
252*760c253cSXin Li
253*760c253cSXin Lidef GetLatestImage(chromeos_root, path):
254*760c253cSXin Li    """Get latest image"""
255*760c253cSXin Li
256*760c253cSXin Li    fmt = re.compile(r"R([0-9]+)-([0-9]+).([0-9]+).([0-9]+)")
257*760c253cSXin Li
258*760c253cSXin Li    ce = command_executer.GetCommandExecuter()
259*760c253cSXin Li    command = "gsutil ls gs://chromeos-image-archive/%s" % path
260*760c253cSXin Li    ret, out, _ = ce.ChrootRunCommandWOutput(
261*760c253cSXin Li        chromeos_root, command, print_to_console=False
262*760c253cSXin Li    )
263*760c253cSXin Li    if ret != 0:
264*760c253cSXin Li        raise RuntimeError("Failed to list buckets with command: %s." % command)
265*760c253cSXin Li    candidates = [l.split("/")[-2] for l in out.split()]
266*760c253cSXin Li    candidates = [fmt.match(c) for c in candidates]
267*760c253cSXin Li    candidates = [
268*760c253cSXin Li        [int(r) for r in m.group(1, 2, 3, 4)] for m in candidates if m
269*760c253cSXin Li    ]
270*760c253cSXin Li    candidates.sort(reverse=True)
271*760c253cSXin Li    for c in candidates:
272*760c253cSXin Li        build = "%s/R%d-%d.%d.%d" % (path, c[0], c[1], c[2], c[3])
273*760c253cSXin Li        if DoesImageExist(chromeos_root, build):
274*760c253cSXin Li            return build
275*760c253cSXin Li
276*760c253cSXin Li
277*760c253cSXin Lidef GetLatestRecipeImage(chromeos_root, path):
278*760c253cSXin Li    """Get latest nightly test image from recipe bucket.
279*760c253cSXin Li
280*760c253cSXin Li    Image location example:
281*760c253cSXin Li    $ARCHIVE/lulu-llvm-next-nightly/R84-13037.0.0-31011-8883172717979984032
282*760c253cSXin Li    """
283*760c253cSXin Li
284*760c253cSXin Li    fmt = re.compile(r"R([0-9]+)-([0-9]+).([0-9]+).([0-9]+)-([0-9]+)")
285*760c253cSXin Li
286*760c253cSXin Li    ce = command_executer.GetCommandExecuter()
287*760c253cSXin Li    command = "gsutil ls gs://chromeos-image-archive/%s" % path
288*760c253cSXin Li    ret, out, _ = ce.ChrootRunCommandWOutput(
289*760c253cSXin Li        chromeos_root, command, print_to_console=False
290*760c253cSXin Li    )
291*760c253cSXin Li    if ret != 0:
292*760c253cSXin Li        raise RuntimeError("Failed to list buckets with command: %s." % command)
293*760c253cSXin Li    candidates = [l.split("/")[-2] for l in out.split()]
294*760c253cSXin Li    candidates = [(fmt.match(c), c) for c in candidates]
295*760c253cSXin Li    candidates = [
296*760c253cSXin Li        ([int(r) for r in m[0].group(1, 2, 3, 4, 5)], m[1])
297*760c253cSXin Li        for m in candidates
298*760c253cSXin Li        if m
299*760c253cSXin Li    ]
300*760c253cSXin Li    candidates.sort(key=lambda x: x[0], reverse=True)
301*760c253cSXin Li    # Try to get ony last two days of images since nightly tests are run once
302*760c253cSXin Li    # another day.
303*760c253cSXin Li    for c in candidates[:2]:
304*760c253cSXin Li        build = "%s/%s" % (path, c[1])
305*760c253cSXin Li        if DoesImageExist(chromeos_root, build):
306*760c253cSXin Li            return build
307