xref: /aosp_15_r20/external/toolchain-utils/crosperf/crosperf_autolock.py (revision 760c253c1ed00ce9abd48f8546f08516e57485fe)
1#!/usr/bin/env python3
2
3# Copyright 2021 The ChromiumOS Authors
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Wrapper script to automatically lock devices for crosperf."""
8
9import argparse
10import contextlib
11import dataclasses
12import json
13import os
14import subprocess
15import sys
16from typing import Any, Dict, List, Optional, Tuple
17
18
19# Have to do sys.path hackery because crosperf relies on PYTHONPATH
20# modifications.
21PARENT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
22sys.path.append(PARENT_DIR)
23
24
25def main(sys_args: List[str]) -> Optional[str]:
26    """Run crosperf_autolock. Returns error msg or None"""
27    args, leftover_args = parse_args(sys_args)
28    fleet_params = [
29        CrosfleetParams(
30            board=args.board, pool=args.pool, lease_time=args.lease_time
31        )
32        for _ in range(args.num_leases)
33    ]
34    if not fleet_params:
35        return (
36            "No board names identified. If you want to use"
37            " a known host, just use crosperf directly."
38        )
39    try:
40        _run_crosperf(fleet_params, args.dut_lock_timeout, leftover_args)
41    except BoardLockError as e:
42        _eprint("ERROR:", e)
43        _eprint('May need to login to crosfleet? Run "crosfleet login"')
44        _eprint(
45            "The leases may also be successful later on. "
46            'Check with "crosfleet dut leases"'
47        )
48        return "crosperf_autolock failed"
49    except BoardReleaseError as e:
50        _eprint("ERROR:", e)
51        _eprint('May need to re-run "crosfleet dut abandon"')
52        return "crosperf_autolock failed"
53    return None
54
55
56def parse_args(args: List[str]) -> Tuple[Any, List]:
57    """Parse the CLI arguments."""
58    parser = argparse.ArgumentParser(
59        "crosperf_autolock",
60        description="Wrapper around crosperf"
61        " to autolock DUTs from crosfleet.",
62        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
63    )
64    parser.add_argument(
65        "--board",
66        type=str,
67        help="Space or comma separated list of boards to lock",
68        required=True,
69        default=argparse.SUPPRESS,
70    )
71    parser.add_argument(
72        "--num-leases",
73        type=int,
74        help="Number of boards to lock.",
75        metavar="NUM",
76        default=1,
77    )
78    parser.add_argument(
79        "--pool", type=str, help="Pool to pull from.", default="DUT_POOL_QUOTA"
80    )
81    parser.add_argument(
82        "--dut-lock-timeout",
83        type=float,
84        metavar="SEC",
85        help="Number of seconds we want to try to lease a board"
86        " from crosfleet. This option does NOT change the"
87        " lease length.",
88        default=600,
89    )
90    parser.add_argument(
91        "--lease-time",
92        type=int,
93        metavar="MIN",
94        help="Number of minutes to lock the board. Max is 1440.",
95        default=1440,
96    )
97    parser.epilog = (
98        "For more detailed flags, you have to read the args taken by the"
99        " crosperf executable. Args are passed transparently to crosperf."
100    )
101    return parser.parse_known_args(args)
102
103
104class BoardLockError(Exception):
105    """Error to indicate failure to lock a board."""
106
107    def __init__(self, msg: str):
108        self.msg = "BoardLockError: " + msg
109        super().__init__(self.msg)
110
111
112class BoardReleaseError(Exception):
113    """Error to indicate failure to release a board."""
114
115    def __init__(self, msg: str):
116        self.msg = "BoardReleaseError: " + msg
117        super().__init__(self.msg)
118
119
120@dataclasses.dataclass(frozen=True)
121class CrosfleetParams:
122    """Dataclass to hold all crosfleet parameterizations."""
123
124    board: str
125    pool: str
126    lease_time: int
127
128
129def _eprint(*msg, **kwargs):
130    print(*msg, file=sys.stderr, **kwargs)
131
132
133def _run_crosperf(
134    crosfleet_params: List[CrosfleetParams],
135    lock_timeout: float,
136    leftover_args: List[str],
137):
138    """Autolock devices and run crosperf with leftover arguments.
139
140    Raises:
141      BoardLockError: When board was unable to be locked.
142      BoardReleaseError: When board was unable to be released.
143    """
144    if not crosfleet_params:
145        raise ValueError("No crosfleet params given; cannot call crosfleet.")
146
147    # We'll assume all the boards are the same type, which seems to be the case
148    # in experiments that actually get used.
149    passed_board_arg = crosfleet_params[0].board
150    with contextlib.ExitStack() as stack:
151        dut_hostnames = []
152        for param in crosfleet_params:
153            print(
154                f"Sent lock request for {param.board} for {param.lease_time} minutes"
155                '\nIf this fails, you may need to run "crosfleet dut abandon <...>"'
156            )
157            # May raise BoardLockError, abandoning previous DUTs.
158            dut_hostname = stack.enter_context(
159                crosfleet_machine_ctx(
160                    param.board,
161                    param.lease_time,
162                    lock_timeout,
163                    {"label-pool": param.pool},
164                )
165            )
166            if dut_hostname:
167                print(f"Locked {param.board} machine: {dut_hostname}")
168                dut_hostnames.append(dut_hostname)
169
170        # We import crosperf late, because this import is extremely slow.
171        # We don't want the user to wait several seconds just to get
172        # help info.
173        import crosperf
174
175        for dut_hostname in dut_hostnames:
176            crosperf.Main(
177                [
178                    sys.argv[0],
179                    "--no_lock",
180                    "True",
181                    "--remote",
182                    dut_hostname,
183                    "--board",
184                    passed_board_arg,
185                ]
186                + leftover_args
187            )
188
189
190@contextlib.contextmanager
191def crosfleet_machine_ctx(
192    board: str,
193    lease_minutes: int,
194    lock_timeout: float,
195    dims: Dict[str, Any],
196    abandon_timeout: float = 120.0,
197) -> Any:
198    """Acquire dut from crosfleet, and release once it leaves the context.
199
200    Args:
201      board: Board type to lease.
202      lease_minutes: Length of lease, in minutes.
203      lock_timeout: How long to wait for a lock until quitting.
204      dims: Dictionary of dimension arguments to pass to crosfleet's '-dims'
205      abandon_timeout: How long to wait for releasing until quitting.
206
207    Yields:
208      A string representing the crosfleet DUT hostname.
209
210    Raises:
211      BoardLockError: When board was unable to be locked.
212      BoardReleaseError: When board was unable to be released.
213    """
214    # This lock may raise an exception, but if it does, we can't release
215    # the DUT anyways as we won't have the dut_hostname.
216    dut_hostname = crosfleet_autolock(board, lease_minutes, dims, lock_timeout)
217    try:
218        yield dut_hostname
219    finally:
220        if dut_hostname:
221            crosfleet_release(dut_hostname, abandon_timeout)
222
223
224def crosfleet_autolock(
225    board: str, lease_minutes: int, dims: Dict[str, Any], timeout_sec: float
226) -> str:
227    """Lock a device using crosfleet, paramaterized by the board type.
228
229    Args:
230      board: Board of the DUT we want to lock.
231      lease_minutes: Number of minutes we're trying to lease the DUT for.
232      dims: Dictionary of dimension arguments to pass to crosfleet's '-dims'
233      timeout_sec: Number of seconds to try to lease the DUT. Default 120s.
234
235    Returns:
236      The hostname of the board, or empty string if it couldn't be parsed.
237
238    Raises:
239      BoardLockError: When board was unable to be locked.
240    """
241    crosfleet_cmd_args = [
242        "crosfleet",
243        "dut",
244        "lease",
245        "-json",
246        '-reason="crosperf autolock"',
247        f"-board={board}",
248        f"-minutes={lease_minutes}",
249    ]
250    if dims:
251        dims_arg = ",".join(f"{k}={v}" for k, v in dims.items())
252        crosfleet_cmd_args.extend(["-dims", f"{dims_arg}"])
253
254    try:
255        output = subprocess.check_output(
256            crosfleet_cmd_args, timeout=timeout_sec, encoding="utf-8"
257        )
258    except subprocess.CalledProcessError as e:
259        raise BoardLockError(
260            f"crosfleet dut lease failed with exit code: {e.returncode}"
261        )
262    except subprocess.TimeoutExpired as e:
263        raise BoardLockError(
264            f"crosfleet dut lease timed out after {timeout_sec}s;"
265            " please abandon the dut manually."
266        )
267
268    try:
269        json_obj = json.loads(output)
270        dut_hostname = json_obj["DUT"]["Hostname"]
271        if not isinstance(dut_hostname, str):
272            raise TypeError("dut_hostname was not a string")
273    except (json.JSONDecodeError, IndexError, KeyError, TypeError) as e:
274        raise BoardLockError(
275            f"crosfleet dut lease output was parsed incorrectly: {e!r};"
276            f" observed output was {output}"
277        )
278    return _maybe_append_suffix(dut_hostname)
279
280
281def crosfleet_release(dut_hostname: str, timeout_sec: float = 120.0):
282    """Release a crosfleet device.
283
284    Consider using the context managed crosfleet_machine_context
285
286    Args:
287      dut_hostname: Name of the device we want to release.
288      timeout_sec: Number of seconds to try to release the DUT. Default is 120s.
289
290    Raises:
291      BoardReleaseError: Potentially failed to abandon the lease.
292    """
293    crosfleet_cmd_args = [
294        "crosfleet",
295        "dut",
296        "abandon",
297        dut_hostname,
298    ]
299    exit_code = subprocess.call(crosfleet_cmd_args, timeout=timeout_sec)
300    if exit_code != 0:
301        raise BoardReleaseError(
302            f'"crosfleet dut abandon" had exit code {exit_code}'
303        )
304
305
306def _maybe_append_suffix(hostname: str) -> str:
307    if hostname.endswith(".cros") or ".cros." in hostname:
308        return hostname
309    return hostname + ".cros"
310
311
312if __name__ == "__main__":
313    sys.exit(main(sys.argv[1:]))
314