xref: /aosp_15_r20/external/autotest/server/profilers.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1*9c5db199SXin Li# Lint as: python2, python3
2*9c5db199SXin Liimport os, shutil, tempfile, logging
3*9c5db199SXin Li
4*9c5db199SXin Liimport common
5*9c5db199SXin Lifrom autotest_lib.client.common_lib import utils, error, profiler_manager
6*9c5db199SXin Lifrom autotest_lib.server import profiler, autotest, standalone_profiler
7*9c5db199SXin Li
8*9c5db199SXin Li
9*9c5db199SXin LiPROFILER_TMPDIR = '/tmp/profilers'
10*9c5db199SXin Li
11*9c5db199SXin Li
12*9c5db199SXin Lidef get_profiler_results_dir(autodir):
13*9c5db199SXin Li    """
14*9c5db199SXin Li    Given the directory of the autotest client used to run a profiler,
15*9c5db199SXin Li    return the remote path where profiler results will be stored.
16*9c5db199SXin Li    """
17*9c5db199SXin Li    return os.path.join(autodir, 'results', 'default', 'profiler_sync',
18*9c5db199SXin Li                        'profiling')
19*9c5db199SXin Li
20*9c5db199SXin Li
21*9c5db199SXin Lidef get_profiler_log_path(autodir):
22*9c5db199SXin Li    """
23*9c5db199SXin Li    Given the directory of a profiler client, find the client log path.
24*9c5db199SXin Li    """
25*9c5db199SXin Li    return os.path.join(autodir, 'results', 'default', 'debug', 'client.DEBUG')
26*9c5db199SXin Li
27*9c5db199SXin Li
28*9c5db199SXin Liclass profilers(profiler_manager.profiler_manager):
29*9c5db199SXin Li    def __init__(self, job):
30*9c5db199SXin Li        super(profilers, self).__init__(job)
31*9c5db199SXin Li        self.add_log = {}
32*9c5db199SXin Li        self.start_delay = 0
33*9c5db199SXin Li        # maps hostname to (host object, autotest.Autotest object, Autotest
34*9c5db199SXin Li        # install dir), where the host object is the one created specifically
35*9c5db199SXin Li        # for profiling
36*9c5db199SXin Li        self.installed_hosts = {}
37*9c5db199SXin Li        self.current_test = None
38*9c5db199SXin Li
39*9c5db199SXin Li
40*9c5db199SXin Li    def set_start_delay(self, start_delay):
41*9c5db199SXin Li        self.start_delay = start_delay
42*9c5db199SXin Li
43*9c5db199SXin Li
44*9c5db199SXin Li    def load_profiler(self, profiler_name, args, dargs):
45*9c5db199SXin Li        newprofiler = profiler.profiler_proxy(profiler_name)
46*9c5db199SXin Li        newprofiler.initialize(*args, **dargs)
47*9c5db199SXin Li        newprofiler.setup(*args, **dargs) # lazy setup is done client-side
48*9c5db199SXin Li        return newprofiler
49*9c5db199SXin Li
50*9c5db199SXin Li
51*9c5db199SXin Li    def add(self, profiler, *args, **dargs):
52*9c5db199SXin Li        super(profilers, self).add(profiler, *args, **dargs)
53*9c5db199SXin Li        self.add_log[profiler] = (args, dargs)
54*9c5db199SXin Li
55*9c5db199SXin Li
56*9c5db199SXin Li    def delete(self, profiler):
57*9c5db199SXin Li        super(profilers, self).delete(profiler)
58*9c5db199SXin Li        if profiler in self.add_log:
59*9c5db199SXin Li            del self.add_log[profiler]
60*9c5db199SXin Li
61*9c5db199SXin Li
62*9c5db199SXin Li    def _install_clients(self):
63*9c5db199SXin Li        """
64*9c5db199SXin Li        Install autotest on any current job hosts.
65*9c5db199SXin Li        """
66*9c5db199SXin Li        in_use_hosts = dict()
67*9c5db199SXin Li        # find hosts in use but not used by us
68*9c5db199SXin Li        for host in self.job.hosts:
69*9c5db199SXin Li            if host.hostname not in self.job.machines:
70*9c5db199SXin Li                # job.hosts include all host instances created on the fly.
71*9c5db199SXin Li                # We only care DUTs in job.machines which are
72*9c5db199SXin Li                # piped in from autoserv -m option.
73*9c5db199SXin Li                continue
74*9c5db199SXin Li            autodir = host.get_autodir()
75*9c5db199SXin Li            if not (autodir and autodir.startswith(PROFILER_TMPDIR)):
76*9c5db199SXin Li                in_use_hosts[host.hostname] = host
77*9c5db199SXin Li        logging.debug('Hosts currently in use: %s', set(in_use_hosts))
78*9c5db199SXin Li
79*9c5db199SXin Li        # determine what valid host objects we already have installed
80*9c5db199SXin Li        profiler_hosts = set()
81*9c5db199SXin Li        for host, at, profiler_dir in self.installed_hosts.values():
82*9c5db199SXin Li            if host.path_exists(profiler_dir):
83*9c5db199SXin Li                profiler_hosts.add(host.hostname)
84*9c5db199SXin Li            else:
85*9c5db199SXin Li                # the profiler was wiped out somehow, drop this install
86*9c5db199SXin Li                logging.warning('The profiler client on %s at %s was deleted',
87*9c5db199SXin Li                                host.hostname, profiler_dir)
88*9c5db199SXin Li                del self.installed_hosts[host.hostname]
89*9c5db199SXin Li        logging.debug('Hosts with profiler clients already installed: %s',
90*9c5db199SXin Li                      profiler_hosts)
91*9c5db199SXin Li
92*9c5db199SXin Li        # install autotest on any new hosts in use
93*9c5db199SXin Li        for hostname in set(in_use_hosts) - profiler_hosts:
94*9c5db199SXin Li            host = in_use_hosts[hostname]
95*9c5db199SXin Li            tmp_dir = host.get_tmp_dir(parent=PROFILER_TMPDIR)
96*9c5db199SXin Li            at = autotest.Autotest(host)
97*9c5db199SXin Li            at.install_no_autoserv(autodir=tmp_dir)
98*9c5db199SXin Li            self.installed_hosts[host.hostname] = (host, at, tmp_dir)
99*9c5db199SXin Li
100*9c5db199SXin Li        # drop any installs from hosts no longer in job.hosts
101*9c5db199SXin Li        for hostname in profiler_hosts - set(in_use_hosts):
102*9c5db199SXin Li            del self.installed_hosts[hostname]
103*9c5db199SXin Li
104*9c5db199SXin Li
105*9c5db199SXin Li    def _get_hosts(self, host=None):
106*9c5db199SXin Li        """
107*9c5db199SXin Li        Returns a list of (Host, Autotest, install directory) tuples for hosts
108*9c5db199SXin Li        currently supported by this profiler. The returned Host object is always
109*9c5db199SXin Li        the one created by this profiler, regardless of what's passed in. If
110*9c5db199SXin Li        'host' is not None, all entries not matching that host object are
111*9c5db199SXin Li        filtered out of the list.
112*9c5db199SXin Li        """
113*9c5db199SXin Li        if host is None:
114*9c5db199SXin Li            return list(self.installed_hosts.values())
115*9c5db199SXin Li        if host.hostname in self.installed_hosts:
116*9c5db199SXin Li            return [self.installed_hosts[host.hostname]]
117*9c5db199SXin Li        return []
118*9c5db199SXin Li
119*9c5db199SXin Li
120*9c5db199SXin Li    def _get_local_profilers_dir(self, test, hostname):
121*9c5db199SXin Li        in_machine_dir = (
122*9c5db199SXin Li                os.path.basename(test.job.resultdir) in test.job.machines)
123*9c5db199SXin Li        if len(test.job.machines) > 1 and not in_machine_dir:
124*9c5db199SXin Li            local_dir = os.path.join(test.profdir, hostname)
125*9c5db199SXin Li            if not os.path.exists(local_dir):
126*9c5db199SXin Li                os.makedirs(local_dir)
127*9c5db199SXin Li        else:
128*9c5db199SXin Li            local_dir = test.profdir
129*9c5db199SXin Li
130*9c5db199SXin Li        return local_dir
131*9c5db199SXin Li
132*9c5db199SXin Li
133*9c5db199SXin Li    def _get_failure_logs(self, autodir, test, host):
134*9c5db199SXin Li        """
135*9c5db199SXin Li        Collect the client logs from a profiler run and put them in a
136*9c5db199SXin Li        file named failure-*.log.
137*9c5db199SXin Li        """
138*9c5db199SXin Li        try:
139*9c5db199SXin Li            fd, path = tempfile.mkstemp(suffix='.log', prefix='failure-',
140*9c5db199SXin Li                    dir=self._get_local_profilers_dir(test, host.hostname))
141*9c5db199SXin Li            os.close(fd)
142*9c5db199SXin Li            host.get_file(get_profiler_log_path(autodir), path)
143*9c5db199SXin Li            # try to collect any partial profiler logs
144*9c5db199SXin Li            self._get_profiler_logs(autodir, test, host)
145*9c5db199SXin Li        except (error.AutotestError, error.AutoservError):
146*9c5db199SXin Li            logging.exception('Profiler failure log collection failed')
147*9c5db199SXin Li            # swallow the exception so that we don't override an existing
148*9c5db199SXin Li            # exception being thrown
149*9c5db199SXin Li
150*9c5db199SXin Li
151*9c5db199SXin Li    def _get_all_failure_logs(self, test, hosts):
152*9c5db199SXin Li        for host, at, autodir in hosts:
153*9c5db199SXin Li            self._get_failure_logs(autodir, test, host)
154*9c5db199SXin Li
155*9c5db199SXin Li
156*9c5db199SXin Li    def _get_profiler_logs(self, autodir, test, host):
157*9c5db199SXin Li        results_dir = get_profiler_results_dir(autodir)
158*9c5db199SXin Li        local_dir = self._get_local_profilers_dir(test, host.hostname)
159*9c5db199SXin Li
160*9c5db199SXin Li        self.job.remove_client_log(host.hostname, results_dir, local_dir)
161*9c5db199SXin Li
162*9c5db199SXin Li        tempdir = tempfile.mkdtemp(dir=self.job.tmpdir)
163*9c5db199SXin Li        try:
164*9c5db199SXin Li            host.get_file(results_dir + '/', tempdir)
165*9c5db199SXin Li        except error.AutoservRunError:
166*9c5db199SXin Li            pass # no files to pull back, nothing we can do
167*9c5db199SXin Li        utils.merge_trees(tempdir, local_dir)
168*9c5db199SXin Li        shutil.rmtree(tempdir, ignore_errors=True)
169*9c5db199SXin Li
170*9c5db199SXin Li
171*9c5db199SXin Li    def _run_clients(self, test, hosts):
172*9c5db199SXin Li        """
173*9c5db199SXin Li        We initialize the profilers just before start because only then we
174*9c5db199SXin Li        know all the hosts involved.
175*9c5db199SXin Li        """
176*9c5db199SXin Li
177*9c5db199SXin Li        hostnames = [host_info[0].hostname for host_info in hosts]
178*9c5db199SXin Li        profilers_args = [(p.name, p.args, p.dargs)
179*9c5db199SXin Li                          for p in self.list]
180*9c5db199SXin Li
181*9c5db199SXin Li        for host, at, autodir in hosts:
182*9c5db199SXin Li            control_script = standalone_profiler.generate_test(hostnames,
183*9c5db199SXin Li                                                               host.hostname,
184*9c5db199SXin Li                                                               profilers_args,
185*9c5db199SXin Li                                                               180, None)
186*9c5db199SXin Li            try:
187*9c5db199SXin Li                at.run(control_script, background=True)
188*9c5db199SXin Li            except Exception:
189*9c5db199SXin Li                self._get_failure_logs(autodir, test, host)
190*9c5db199SXin Li                raise
191*9c5db199SXin Li
192*9c5db199SXin Li            remote_results_dir = get_profiler_results_dir(autodir)
193*9c5db199SXin Li            local_results_dir = self._get_local_profilers_dir(test,
194*9c5db199SXin Li                                                              host.hostname)
195*9c5db199SXin Li            self.job.add_client_log(host.hostname, remote_results_dir,
196*9c5db199SXin Li                                    local_results_dir)
197*9c5db199SXin Li
198*9c5db199SXin Li        try:
199*9c5db199SXin Li            # wait for the profilers to be added
200*9c5db199SXin Li            standalone_profiler.wait_for_profilers(hostnames)
201*9c5db199SXin Li        except Exception:
202*9c5db199SXin Li            self._get_all_failure_logs(test, hosts)
203*9c5db199SXin Li            raise
204*9c5db199SXin Li
205*9c5db199SXin Li
206*9c5db199SXin Li    def before_start(self, test, host=None):
207*9c5db199SXin Li        # create host objects and install the needed clients
208*9c5db199SXin Li        # so later in start() we don't spend too much time
209*9c5db199SXin Li        self._install_clients()
210*9c5db199SXin Li        self._run_clients(test, self._get_hosts(host))
211*9c5db199SXin Li
212*9c5db199SXin Li
213*9c5db199SXin Li    def start(self, test, host=None):
214*9c5db199SXin Li        hosts = self._get_hosts(host)
215*9c5db199SXin Li
216*9c5db199SXin Li        # wait for the profilers to start
217*9c5db199SXin Li        hostnames = [host_info[0].hostname for host_info in hosts]
218*9c5db199SXin Li        try:
219*9c5db199SXin Li            standalone_profiler.start_profilers(hostnames)
220*9c5db199SXin Li        except Exception:
221*9c5db199SXin Li            self._get_all_failure_logs(test, hosts)
222*9c5db199SXin Li            raise
223*9c5db199SXin Li
224*9c5db199SXin Li        self.current_test = test
225*9c5db199SXin Li
226*9c5db199SXin Li
227*9c5db199SXin Li    def stop(self, test):
228*9c5db199SXin Li        assert self.current_test == test
229*9c5db199SXin Li
230*9c5db199SXin Li        hosts = self._get_hosts()
231*9c5db199SXin Li        # wait for the profilers to stop
232*9c5db199SXin Li        hostnames = [host_info[0].hostname for host_info in hosts]
233*9c5db199SXin Li        try:
234*9c5db199SXin Li            standalone_profiler.stop_profilers(hostnames)
235*9c5db199SXin Li        except Exception:
236*9c5db199SXin Li            self._get_all_failure_logs(test, hosts)
237*9c5db199SXin Li            raise
238*9c5db199SXin Li
239*9c5db199SXin Li
240*9c5db199SXin Li    def report(self, test, host=None):
241*9c5db199SXin Li        assert self.current_test == test
242*9c5db199SXin Li
243*9c5db199SXin Li        hosts = self._get_hosts(host)
244*9c5db199SXin Li        # when running on specific hosts we cannot wait for the other
245*9c5db199SXin Li        # hosts to sync with us
246*9c5db199SXin Li        if not host:
247*9c5db199SXin Li            hostnames = [host_info[0].hostname for host_info in hosts]
248*9c5db199SXin Li            try:
249*9c5db199SXin Li                standalone_profiler.finish_profilers(hostnames)
250*9c5db199SXin Li            except Exception:
251*9c5db199SXin Li                self._get_all_failure_logs(test, hosts)
252*9c5db199SXin Li                raise
253*9c5db199SXin Li
254*9c5db199SXin Li        # pull back all the results
255*9c5db199SXin Li        for host, at, autodir in hosts:
256*9c5db199SXin Li            self._get_profiler_logs(autodir, test, host)
257*9c5db199SXin Li
258*9c5db199SXin Li
259*9c5db199SXin Li    def handle_reboot(self, host):
260*9c5db199SXin Li        if self.current_test:
261*9c5db199SXin Li            test = self.current_test
262*9c5db199SXin Li            for profiler in self.list:
263*9c5db199SXin Li                if not profiler.supports_reboot:
264*9c5db199SXin Li                    msg = 'profiler %s does not support rebooting during tests'
265*9c5db199SXin Li                    msg %= profiler.name
266*9c5db199SXin Li                    self.job.record('WARN', os.path.basename(test.outputdir),
267*9c5db199SXin Li                                    None, msg)
268*9c5db199SXin Li
269*9c5db199SXin Li            self.report(test, host)
270*9c5db199SXin Li            self.before_start(test, host)
271*9c5db199SXin Li            self.start(test, host)
272