xref: /aosp_15_r20/external/toolchain-utils/crosperf/benchmark_run.py (revision 760c253c1ed00ce9abd48f8546f08516e57485fe)
1# -*- coding: utf-8 -*-
2# Copyright 2013 The ChromiumOS Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Module of benchmark runs."""
7
8import datetime
9import threading
10import time
11import traceback
12
13from cros_utils import command_executer
14from cros_utils import timeline
15from results_cache import MockResult
16from results_cache import MockResultsCache
17from results_cache import Result
18from results_cache import ResultsCache
19from suite_runner import SuiteRunner
20
21
22STATUS_FAILED = "FAILED"
23STATUS_SUCCEEDED = "SUCCEEDED"
24STATUS_IMAGING = "IMAGING"
25STATUS_RUNNING = "RUNNING"
26STATUS_WAITING = "WAITING"
27STATUS_PENDING = "PENDING"
28
29
30class BenchmarkRun(threading.Thread):
31    """The benchmarkrun class."""
32
33    def __init__(
34        self,
35        name,
36        benchmark,
37        label,
38        iteration,
39        cache_conditions,
40        machine_manager,
41        logger_to_use,
42        log_level,
43        share_cache,
44        dut_config,
45    ):
46        threading.Thread.__init__(self)
47        self.name = name
48        self._logger = logger_to_use
49        self.log_level = log_level
50        self.benchmark = benchmark
51        self.iteration = iteration
52        self.label = label
53        self.result = None
54        self.terminated = False
55        self.retval = None
56        self.run_completed = False
57        self.machine_manager = machine_manager
58        self.suite_runner = SuiteRunner(
59            dut_config, self._logger, self.log_level
60        )
61        self.machine = None
62        self.cache_conditions = cache_conditions
63        self.runs_complete = 0
64        self.cache_hit = False
65        self.failure_reason = ""
66        self.test_args = benchmark.test_args
67        self.cache = None
68        self.profiler_args = self.GetExtraAutotestArgs()
69        self._ce = command_executer.GetCommandExecuter(
70            self._logger, log_level=self.log_level
71        )
72        self.timeline = timeline.Timeline()
73        self.timeline.Record(STATUS_PENDING)
74        self.share_cache = share_cache
75        self.cache_has_been_read = False
76
77        # This is used by schedv2.
78        self.owner_thread = None
79
80    def ReadCache(self):
81        # Just use the first machine for running the cached version,
82        # without locking it.
83        self.cache = ResultsCache()
84        self.cache.Init(
85            self.label.chromeos_image,
86            self.label.chromeos_root,
87            self.benchmark.test_name,
88            self.iteration,
89            self.test_args,
90            self.profiler_args,
91            self.machine_manager,
92            self.machine,
93            self.label.board,
94            self.cache_conditions,
95            self._logger,
96            self.log_level,
97            self.label,
98            self.share_cache,
99            self.benchmark.suite,
100            self.benchmark.show_all_results,
101            self.benchmark.run_local,
102            self.benchmark.cwp_dso,
103        )
104
105        self.result = self.cache.ReadResult()
106        self.cache_hit = self.result is not None
107        self.cache_has_been_read = True
108
109    def run(self):
110        try:
111            if not self.cache_has_been_read:
112                self.ReadCache()
113
114            if self.result:
115                self._logger.LogOutput("%s: Cache hit." % self.name)
116                self._logger.LogOutput(self.result.out, print_to_console=False)
117                self._logger.LogError(self.result.err, print_to_console=False)
118
119            elif self.label.cache_only:
120                self._logger.LogOutput("%s: No cache hit." % self.name)
121                output = "%s: No Cache hit." % self.name
122                retval = 1
123                err = "No cache hit."
124                self.result = Result.CreateFromRun(
125                    self._logger,
126                    self.log_level,
127                    self.label,
128                    self.machine,
129                    output,
130                    err,
131                    retval,
132                    self.benchmark.test_name,
133                    self.benchmark.suite,
134                    self.benchmark.cwp_dso,
135                )
136
137            else:
138                self._logger.LogOutput("%s: No cache hit." % self.name)
139                self.timeline.Record(STATUS_WAITING)
140                # Try to acquire a machine now.
141                self.machine = self.AcquireMachine()
142                self.cache.machine = self.machine
143                self.result = self.RunTest(self.machine)
144
145                self.cache.remote = self.machine.name
146                self.label.chrome_version = (
147                    self.machine_manager.GetChromeVersion(self.machine)
148                )
149                self.cache.StoreResult(self.result)
150
151            if not self.label.chrome_version:
152                if self.machine:
153                    self.label.chrome_version = (
154                        self.machine_manager.GetChromeVersion(self.machine)
155                    )
156                elif self.result.chrome_version:
157                    self.label.chrome_version = self.result.chrome_version
158
159            if self.terminated:
160                return
161
162            if not self.result.retval:
163                self.timeline.Record(STATUS_SUCCEEDED)
164            else:
165                if self.timeline.GetLastEvent() != STATUS_FAILED:
166                    self.failure_reason = (
167                        "Return value of test suite was non-zero."
168                    )
169                    self.timeline.Record(STATUS_FAILED)
170
171        except Exception as e:
172            self._logger.LogError(
173                "Benchmark run: '%s' failed: %s" % (self.name, e)
174            )
175            traceback.print_exc()
176            if self.timeline.GetLastEvent() != STATUS_FAILED:
177                self.timeline.Record(STATUS_FAILED)
178                self.failure_reason = str(e)
179        finally:
180            if self.owner_thread is not None:
181                # In schedv2 mode, we do not lock machine locally. So noop here.
182                pass
183            elif self.machine:
184                if not self.machine.IsReachable():
185                    self._logger.LogOutput(
186                        "Machine %s is not reachable, removing it."
187                        % self.machine.name
188                    )
189                    self.machine_manager.RemoveMachine(self.machine.name)
190                self._logger.LogOutput(
191                    "Releasing machine: %s" % self.machine.name
192                )
193                self.machine_manager.ReleaseMachine(self.machine)
194                self._logger.LogOutput(
195                    "Released machine: %s" % self.machine.name
196                )
197
198    def Terminate(self):
199        self.terminated = True
200        self.suite_runner.Terminate()
201        if self.timeline.GetLastEvent() != STATUS_FAILED:
202            self.timeline.Record(STATUS_FAILED)
203            self.failure_reason = "Thread terminated."
204
205    def AcquireMachine(self):
206        if self.owner_thread is not None:
207            # No need to lock machine locally, DutWorker, which is a thread, is
208            # responsible for running br.
209            return self.owner_thread.dut()
210        while True:
211            machine = None
212            if self.terminated:
213                raise RuntimeError(
214                    "Thread terminated while trying to acquire machine."
215                )
216
217            machine = self.machine_manager.AcquireMachine(self.label)
218
219            if machine:
220                self._logger.LogOutput(
221                    "%s: Machine %s acquired at %s"
222                    % (self.name, machine.name, datetime.datetime.now())
223                )
224                break
225            time.sleep(10)
226        return machine
227
228    def GetExtraAutotestArgs(self):
229        if (
230            self.benchmark.perf_args
231            and self.benchmark.suite != "telemetry_Crosperf"
232        ):
233            self._logger.LogError(
234                "Non-telemetry benchmark does not support profiler."
235            )
236            self.benchmark.perf_args = ""
237
238        if self.benchmark.perf_args:
239            perf_args_list = self.benchmark.perf_args.split(" ")
240            perf_args_list = [perf_args_list[0]] + ["-a"] + perf_args_list[1:]
241            perf_args = " ".join(perf_args_list)
242            if not perf_args_list[0] in ["record", "stat"]:
243                raise SyntaxError(
244                    "perf_args must start with either record or stat"
245                )
246            extra_test_args = [
247                "--profiler=custom_perf",
248                ("--profiler_args='perf_options=\"%s\"'" % perf_args),
249            ]
250            return " ".join(extra_test_args)
251        else:
252            return ""
253
254    def RunTest(self, machine):
255        self.timeline.Record(STATUS_IMAGING)
256        if self.owner_thread is not None:
257            # In schedv2 mode, do not even call ImageMachine. Machine image is
258            # guarenteed.
259            pass
260        else:
261            self.machine_manager.ImageMachine(machine, self.label)
262        self.timeline.Record(STATUS_RUNNING)
263        retval, out, err = self.suite_runner.Run(
264            machine,
265            self.label,
266            self.benchmark,
267            self.test_args,
268            self.profiler_args,
269        )
270        self.run_completed = True
271        return Result.CreateFromRun(
272            self._logger,
273            self.log_level,
274            self.label,
275            self.machine,
276            out,
277            err,
278            retval,
279            self.benchmark.test_name,
280            self.benchmark.suite,
281            self.benchmark.cwp_dso,
282        )
283
284    def SetCacheConditions(self, cache_conditions):
285        self.cache_conditions = cache_conditions
286
287    def logger(self):
288        """Return the logger, only used by unittest.
289
290        Returns:
291          self._logger
292        """
293
294        return self._logger
295
296    def __str__(self):
297        """For better debugging."""
298
299        return 'BenchmarkRun[name="{}"]'.format(self.name)
300
301
302class MockBenchmarkRun(BenchmarkRun):
303    """Inherited from BenchmarkRun."""
304
305    def ReadCache(self):
306        # Just use the first machine for running the cached version,
307        # without locking it.
308        self.cache = MockResultsCache()
309        self.cache.Init(
310            self.label.chromeos_image,
311            self.label.chromeos_root,
312            self.benchmark.test_name,
313            self.iteration,
314            self.test_args,
315            self.profiler_args,
316            self.machine_manager,
317            self.machine,
318            self.label.board,
319            self.cache_conditions,
320            self._logger,
321            self.log_level,
322            self.label,
323            self.share_cache,
324            self.benchmark.suite,
325            self.benchmark.show_all_results,
326            self.benchmark.run_local,
327            self.benchmark.cwp_dso,
328        )
329
330        self.result = self.cache.ReadResult()
331        self.cache_hit = self.result is not None
332
333    def RunTest(self, machine):
334        """Remove Result.CreateFromRun for testing."""
335        self.timeline.Record(STATUS_IMAGING)
336        self.machine_manager.ImageMachine(machine, self.label)
337        self.timeline.Record(STATUS_RUNNING)
338        [retval, out, err] = self.suite_runner.Run(
339            machine,
340            self.label,
341            self.benchmark,
342            self.test_args,
343            self.profiler_args,
344        )
345        self.run_completed = True
346        rr = MockResult("logger", self.label, self.log_level, machine)
347        rr.out = out
348        rr.err = err
349        rr.retval = retval
350        return rr
351