xref: /aosp_15_r20/external/autotest/server/_autoserv (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1*9c5db199SXin Li#!/usr/bin/python3 -u
2*9c5db199SXin Li# Copyright 2007-2008 Martin J. Bligh <[email protected]>, Google Inc.
3*9c5db199SXin Li# Released under the GPL v2
4*9c5db199SXin Li
5*9c5db199SXin Li"""
6*9c5db199SXin LiRun a control file through the server side engine
7*9c5db199SXin Li"""
8*9c5db199SXin Li
9*9c5db199SXin Liimport datetime
10*9c5db199SXin Liimport contextlib
11*9c5db199SXin Liimport getpass
12*9c5db199SXin Liimport logging
13*9c5db199SXin Liimport os
14*9c5db199SXin Liimport re
15*9c5db199SXin Liimport shutil
16*9c5db199SXin Liimport signal
17*9c5db199SXin Liimport socket
18*9c5db199SXin Liimport sys
19*9c5db199SXin Liimport traceback
20*9c5db199SXin Liimport time
21*9c5db199SXin Liimport six
22*9c5db199SXin Lifrom six.moves import urllib
23*9c5db199SXin Li
24*9c5db199SXin Liimport common
25*9c5db199SXin Lifrom autotest_lib.client.bin.result_tools import utils as result_utils
26*9c5db199SXin Lifrom autotest_lib.client.bin.result_tools import view as result_view
27*9c5db199SXin Lifrom autotest_lib.client.common_lib import control_data
28*9c5db199SXin Lifrom autotest_lib.client.common_lib import autotest_enum
29*9c5db199SXin Lifrom autotest_lib.client.common_lib import error
30*9c5db199SXin Lifrom autotest_lib.client.common_lib import global_config
31*9c5db199SXin Lifrom autotest_lib.client.common_lib import host_queue_entry_states
32*9c5db199SXin Lifrom autotest_lib.client.common_lib import host_states
33*9c5db199SXin Lifrom autotest_lib.client.common_lib import seven
34*9c5db199SXin Lifrom autotest_lib.server.cros.dynamic_suite import suite
35*9c5db199SXin Li
36*9c5db199SXin Litry:
37*9c5db199SXin Li    from autotest_lib.utils.frozen_chromite.lib import metrics
38*9c5db199SXin Li    from autotest_lib.utils.frozen_chromite.lib import cloud_trace
39*9c5db199SXin Liexcept ImportError as e:
40*9c5db199SXin Li    from autotest_lib.client.common_lib import utils as common_utils
41*9c5db199SXin Li    metrics = common_utils.metrics_mock
42*9c5db199SXin Li    import mock
43*9c5db199SXin Li    cloud_trace = mock.MagicMock()
44*9c5db199SXin Li
45*9c5db199SXin Li# Number of seconds to wait before returning if testing mode is enabled
46*9c5db199SXin LiTESTING_MODE_SLEEP_SECS = 1
47*9c5db199SXin Li
48*9c5db199SXin Li
49*9c5db199SXin Lifrom autotest_lib.server import frontend
50*9c5db199SXin Lifrom autotest_lib.server import server_logging_config
51*9c5db199SXin Lifrom autotest_lib.server import server_job, utils, autoserv_parser, autotest
52*9c5db199SXin Lifrom autotest_lib.server import utils as server_utils
53*9c5db199SXin Lifrom autotest_lib.server import site_utils
54*9c5db199SXin Lifrom autotest_lib.server.cros.dynamic_suite import frontend_wrappers
55*9c5db199SXin Lifrom autotest_lib.site_utils import job_directories
56*9c5db199SXin Lifrom autotest_lib.site_utils import lxc
57*9c5db199SXin Lifrom autotest_lib.site_utils.lxc import utils as lxc_utils
58*9c5db199SXin Lifrom autotest_lib.client.common_lib import pidfile, logging_manager
59*9c5db199SXin Li
60*9c5db199SXin Li
61*9c5db199SXin Li# Control segment to stage server-side package.
62*9c5db199SXin LiSTAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
63*9c5db199SXin Li        'stage_server_side_package')
64*9c5db199SXin Li
65*9c5db199SXin Li# Command line to start servod in a moblab.
66*9c5db199SXin LiSTART_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
67*9c5db199SXin LiSTOP_SERVOD_CMD = 'sudo stop servod'
68*9c5db199SXin Li
69*9c5db199SXin Li_AUTOTEST_ROOT = os.path.realpath(os.path.join(os.path.dirname(__file__), '..'))
70*9c5db199SXin Li_CONTROL_FILE_FROM_CONTROL_NAME = 'control.from_control_name'
71*9c5db199SXin Li
72*9c5db199SXin Li_LXC_JOB_FOLDER = 'lxc_job_folder'
73*9c5db199SXin Li
74*9c5db199SXin Lidef log_alarm(signum, frame):
75*9c5db199SXin Li    logging.error("Received SIGALARM. Ignoring and continuing on.")
76*9c5db199SXin Li    sys.exit(1)
77*9c5db199SXin Li
78*9c5db199SXin Li
79*9c5db199SXin Lidef _get_companions(parser):
80*9c5db199SXin Li    """Get a list of companion devices from command line arg -ch.
81*9c5db199SXin Li
82*9c5db199SXin Li    @param parser: Parser for the command line arguments.
83*9c5db199SXin Li
84*9c5db199SXin Li    @return: A list of companion devices from command line arg -ch.
85*9c5db199SXin Li    """
86*9c5db199SXin Li    if parser.options.companion_hosts:
87*9c5db199SXin Li        companions = parser.options.companion_hosts.replace(',', ' ').strip().split()
88*9c5db199SXin Li    else:
89*9c5db199SXin Li        companions = []
90*9c5db199SXin Li
91*9c5db199SXin Li    if companions:
92*9c5db199SXin Li        for companion in companions:
93*9c5db199SXin Li            if not companion or re.search('\s', companion):
94*9c5db199SXin Li                parser.parser.error("Invalid companion: %s" % str(companion))
95*9c5db199SXin Li        companions = list(set(companions))
96*9c5db199SXin Li        companions.sort()
97*9c5db199SXin Li    return companions
98*9c5db199SXin Li
99*9c5db199SXin Li
100*9c5db199SXin Lidef _get_dutservers(parser):
101*9c5db199SXin Li    """Get a list of DUT server addresses from command line arg --dut_servers.
102*9c5db199SXin Li
103*9c5db199SXin Li    @param parser: Parser for the command line arguments.
104*9c5db199SXin Li
105*9c5db199SXin Li    @return: A list of DUT server addresses from command line arg
106*9c5db199SXin Li             --dut_servers.
107*9c5db199SXin Li    """
108*9c5db199SXin Li    if parser.options.dut_servers:
109*9c5db199SXin Li        dut_servers = parser.options.dut_servers.replace(
110*9c5db199SXin Li            ',', ' ').strip().split()
111*9c5db199SXin Li    else:
112*9c5db199SXin Li        dut_servers = []
113*9c5db199SXin Li
114*9c5db199SXin Li    if dut_servers:
115*9c5db199SXin Li        for dut_server in dut_servers:
116*9c5db199SXin Li            if not dut_server or re.search('\s', dut_server):
117*9c5db199SXin Li                parser.parser.error(
118*9c5db199SXin Li                    "Invalid DUT Server address: %s" % str(dut_server))
119*9c5db199SXin Li        dut_servers = list(set(dut_servers))
120*9c5db199SXin Li        dut_servers.sort()
121*9c5db199SXin Li    return dut_servers
122*9c5db199SXin Li
123*9c5db199SXin Li
124*9c5db199SXin Lidef _get_machines(parser):
125*9c5db199SXin Li    """Get a list of machine names from command line arg -m or a file.
126*9c5db199SXin Li
127*9c5db199SXin Li    @param parser: Parser for the command line arguments.
128*9c5db199SXin Li
129*9c5db199SXin Li    @return: A list of machine names from command line arg -m or the
130*9c5db199SXin Li             machines file specified in the command line arg -M.
131*9c5db199SXin Li    """
132*9c5db199SXin Li    if parser.options.machines:
133*9c5db199SXin Li        machines = parser.options.machines.replace(',', ' ').strip().split()
134*9c5db199SXin Li    else:
135*9c5db199SXin Li        machines = []
136*9c5db199SXin Li    machines_file = parser.options.machines_file
137*9c5db199SXin Li    if machines_file:
138*9c5db199SXin Li        machines = []
139*9c5db199SXin Li        for m in open(machines_file, 'r').readlines():
140*9c5db199SXin Li            # remove comments, spaces
141*9c5db199SXin Li            m = re.sub('#.*', '', m).strip()
142*9c5db199SXin Li            if m:
143*9c5db199SXin Li                machines.append(m)
144*9c5db199SXin Li        logging.debug('Read list of machines from file: %s', machines_file)
145*9c5db199SXin Li        logging.debug('Machines: %s', ','.join(machines))
146*9c5db199SXin Li
147*9c5db199SXin Li    if machines:
148*9c5db199SXin Li        for machine in machines:
149*9c5db199SXin Li            if not machine or re.search('\s', machine):
150*9c5db199SXin Li                parser.parser.error("Invalid machine: %s" % str(machine))
151*9c5db199SXin Li        machines = list(set(machines))
152*9c5db199SXin Li        machines.sort()
153*9c5db199SXin Li    return machines
154*9c5db199SXin Li
155*9c5db199SXin Li
156*9c5db199SXin Lidef _stage_ssp(parser, resultsdir):
157*9c5db199SXin Li    """Stage server-side package.
158*9c5db199SXin Li
159*9c5db199SXin Li    This function calls a control segment to stage server-side package based on
160*9c5db199SXin Li    the job and autoserv command line option. The detail implementation could
161*9c5db199SXin Li    be different for each host type. Currently, only CrosHost has
162*9c5db199SXin Li    stage_server_side_package function defined.
163*9c5db199SXin Li    The script returns None if no server-side package is available. However,
164*9c5db199SXin Li    it may raise exception if it failed for reasons other than artifact (the
165*9c5db199SXin Li    server-side package) not found.
166*9c5db199SXin Li
167*9c5db199SXin Li    @param parser: Command line arguments parser passed in the autoserv process.
168*9c5db199SXin Li    @param resultsdir: Folder to store results. This could be different from
169*9c5db199SXin Li            parser.options.results: parser.options.results  can be set to None
170*9c5db199SXin Li            for results to be stored in a temp folder. resultsdir can be None
171*9c5db199SXin Li            for autoserv run requires no logging.
172*9c5db199SXin Li
173*9c5db199SXin Li    @return: url to the autotest server-side package. None in case of errors.
174*9c5db199SXin Li    """
175*9c5db199SXin Li    machines_list = _get_machines(parser)
176*9c5db199SXin Li    machines_list = server_job.get_machine_dicts(
177*9c5db199SXin Li            machine_names=machines_list,
178*9c5db199SXin Li            store_dir=os.path.join(resultsdir, parser.options.host_info_subdir),
179*9c5db199SXin Li            in_lab=parser.options.lab,
180*9c5db199SXin Li            use_shadow_store=not parser.options.local_only_host_info,
181*9c5db199SXin Li            host_attributes=parser.options.host_attributes,
182*9c5db199SXin Li    )
183*9c5db199SXin Li
184*9c5db199SXin Li    namespace = {'machines': machines_list,
185*9c5db199SXin Li                 'image': parser.options.test_source_build}
186*9c5db199SXin Li    script_locals = {}
187*9c5db199SXin Li
188*9c5db199SXin Li    seven.exec_file(
189*9c5db199SXin Li        STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE,
190*9c5db199SXin Li        globals_=namespace,
191*9c5db199SXin Li        locals_=script_locals,
192*9c5db199SXin Li    )
193*9c5db199SXin Li    ssp_url = script_locals['ssp_url']
194*9c5db199SXin Li    if not ssp_url:
195*9c5db199SXin Li        logging.error('Failed to stage SSP package: %s',
196*9c5db199SXin Li                      script_locals['error_msg'])
197*9c5db199SXin Li        logging.error('This job will fail later, when attempting to run with'
198*9c5db199SXin Li                      ' SSP')
199*9c5db199SXin Li    return ssp_url
200*9c5db199SXin Li
201*9c5db199SXin Li
202*9c5db199SXin Lidef _run_with_ssp(job, container_id, job_id, results, parser, ssp_url,
203*9c5db199SXin Li                  machines):
204*9c5db199SXin Li    """Run the server job with server-side packaging.
205*9c5db199SXin Li
206*9c5db199SXin Li    @param job: The server job object.
207*9c5db199SXin Li    @param container_id: ID of the container to run the test.
208*9c5db199SXin Li    @param job_id: ID of the test job.
209*9c5db199SXin Li    @param results: Folder to store results. This could be different from
210*9c5db199SXin Li                    parser.options.results:
211*9c5db199SXin Li                    parser.options.results  can be set to None for results to be
212*9c5db199SXin Li                    stored in a temp folder.
213*9c5db199SXin Li                    results can be None if the autoserv run requires no logging.
214*9c5db199SXin Li    @param parser: Command line parser that contains the options.
215*9c5db199SXin Li    @param ssp_url: url of the staged server-side package.
216*9c5db199SXin Li    @param machines: A list of machines to run the test.
217*9c5db199SXin Li    """
218*9c5db199SXin Li    if not ssp_url:
219*9c5db199SXin Li        job.record('FAIL', None, None,
220*9c5db199SXin Li                   'Failed to stage server-side package')
221*9c5db199SXin Li        raise error.AutoservError('Failed to stage server-side package')
222*9c5db199SXin Li
223*9c5db199SXin Li    bucket = lxc.ContainerBucket(
224*9c5db199SXin Li            base_name=_ssp_base_image_name_or_default(parser.options))
225*9c5db199SXin Li    control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
226*9c5db199SXin Li               else None)
227*9c5db199SXin Li    try:
228*9c5db199SXin Li        dut_name = machines[0] if len(machines) >= 1 else None
229*9c5db199SXin Li        test_container = bucket.setup_test(container_id, job_id, ssp_url,
230*9c5db199SXin Li                                           results, control=control,
231*9c5db199SXin Li                                           job_folder=_LXC_JOB_FOLDER,
232*9c5db199SXin Li                                           dut_name=dut_name)
233*9c5db199SXin Li    except Exception as e:
234*9c5db199SXin Li        job.record('START', None, None, 'Starting SSP')
235*9c5db199SXin Li        job.record('END ABORT', None, None,
236*9c5db199SXin Li                   'Failed to setup container for test: %s. Check logs in '
237*9c5db199SXin Li                   'ssp_logs folder for more details.' % e)
238*9c5db199SXin Li        raise error.AutoservSSPError
239*9c5db199SXin Li
240*9c5db199SXin Li    args = sys.argv[:]
241*9c5db199SXin Li    args.remove('--require-ssp')
242*9c5db199SXin Li    # --parent_job_id is only useful in autoserv running in host, not in
243*9c5db199SXin Li    # container. Include this argument will cause test to fail for builds before
244*9c5db199SXin Li    # CL 286265 was merged.
245*9c5db199SXin Li    if '--parent_job_id' in args:
246*9c5db199SXin Li        index = args.index('--parent_job_id')
247*9c5db199SXin Li        args.remove('--parent_job_id')
248*9c5db199SXin Li        # Remove the actual parent job id in command line arg.
249*9c5db199SXin Li        del args[index]
250*9c5db199SXin Li
251*9c5db199SXin Li    # A dictionary of paths to replace in the command line. Key is the path to
252*9c5db199SXin Li    # be replaced with the one in value.
253*9c5db199SXin Li    paths_to_replace = {}
254*9c5db199SXin Li    # Replace the control file path with the one in container.
255*9c5db199SXin Li    if control:
256*9c5db199SXin Li        container_control_filename = os.path.join(
257*9c5db199SXin Li                lxc.CONTROL_TEMP_PATH, os.path.basename(control))
258*9c5db199SXin Li        paths_to_replace[control] = container_control_filename
259*9c5db199SXin Li    # Update result directory with the one in container.
260*9c5db199SXin Li    container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % _LXC_JOB_FOLDER)
261*9c5db199SXin Li    if parser.options.results:
262*9c5db199SXin Li        paths_to_replace[parser.options.results] = container_result_dir
263*9c5db199SXin Li    args = [paths_to_replace.get(arg, arg) for arg in args]
264*9c5db199SXin Li
265*9c5db199SXin Li    # Apply --use-existing-results, results directory is aready created and
266*9c5db199SXin Li    # mounted in container. Apply this arg to avoid exception being raised.
267*9c5db199SXin Li    if not '--use-existing-results' in args:
268*9c5db199SXin Li        args.append('--use-existing-results')
269*9c5db199SXin Li
270*9c5db199SXin Li    # Make sure autoserv running in container using a different pid file.
271*9c5db199SXin Li    if not '--pidfile-label' in args:
272*9c5db199SXin Li        args.extend(['--pidfile-label', 'container_autoserv'])
273*9c5db199SXin Li
274*9c5db199SXin Li    cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
275*9c5db199SXin Li    logging.info('Run command in container: %s', cmd_line)
276*9c5db199SXin Li    success = False
277*9c5db199SXin Li    try:
278*9c5db199SXin Li        test_container.attach_run(cmd_line)
279*9c5db199SXin Li        success = True
280*9c5db199SXin Li    except Exception as e:
281*9c5db199SXin Li        # If the test run inside container fails without generating any log,
282*9c5db199SXin Li        # write a message to status.log to help troubleshooting.
283*9c5db199SXin Li        debug_files = os.listdir(os.path.join(results, 'debug'))
284*9c5db199SXin Li        if not debug_files:
285*9c5db199SXin Li            job.record('FAIL', None, None,
286*9c5db199SXin Li                       'Failed to run test inside the container: %s. Check '
287*9c5db199SXin Li                       'logs in ssp_logs folder for more details.' % e)
288*9c5db199SXin Li        raise
289*9c5db199SXin Li    finally:
290*9c5db199SXin Li        metrics.Counter(
291*9c5db199SXin Li            'chromeos/autotest/experimental/execute_job_in_ssp').increment(
292*9c5db199SXin Li                fields={'success': success})
293*9c5db199SXin Li        test_container.destroy()
294*9c5db199SXin Li
295*9c5db199SXin Li
296*9c5db199SXin Lidef correct_results_folder_permission(results):
297*9c5db199SXin Li    """Make sure the results folder has the right permission settings.
298*9c5db199SXin Li
299*9c5db199SXin Li    For tests running with server-side packaging, the results folder has the
300*9c5db199SXin Li    owner of root. This must be changed to the user running the autoserv
301*9c5db199SXin Li    process, so parsing job can access the results folder.
302*9c5db199SXin Li    TODO(dshi): crbug.com/459344 Remove this function when test container can be
303*9c5db199SXin Li    unprivileged container.
304*9c5db199SXin Li
305*9c5db199SXin Li    @param results: Path to the results folder.
306*9c5db199SXin Li
307*9c5db199SXin Li    """
308*9c5db199SXin Li    if not results:
309*9c5db199SXin Li        return
310*9c5db199SXin Li
311*9c5db199SXin Li    utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
312*9c5db199SXin Li    utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
313*9c5db199SXin Li
314*9c5db199SXin Li
315*9c5db199SXin Lidef _start_servod(machine):
316*9c5db199SXin Li    """Try to start servod in moblab if it's not already running or running with
317*9c5db199SXin Li    different board or port.
318*9c5db199SXin Li
319*9c5db199SXin Li    @param machine: Name of the dut used for test.
320*9c5db199SXin Li    """
321*9c5db199SXin Li    if not utils.is_moblab():
322*9c5db199SXin Li        return
323*9c5db199SXin Li
324*9c5db199SXin Li    logging.debug('Trying to start servod.')
325*9c5db199SXin Li    try:
326*9c5db199SXin Li        afe = frontend.AFE()
327*9c5db199SXin Li        board = server_utils.get_board_from_afe(machine, afe)
328*9c5db199SXin Li        hosts = afe.get_hosts(hostname=machine)
329*9c5db199SXin Li        servo_host = hosts[0].attributes.get('servo_host', None)
330*9c5db199SXin Li        servo_port = hosts[0].attributes.get('servo_port', 9999)
331*9c5db199SXin Li        if not servo_host in ['localhost', '127.0.0.1']:
332*9c5db199SXin Li            logging.warning('Starting servod is aborted. The dut\'s servo_host '
333*9c5db199SXin Li                         'attribute is not set to localhost.')
334*9c5db199SXin Li            return
335*9c5db199SXin Li    except (urllib.error.HTTPError, urllib.error.URLError):
336*9c5db199SXin Li        # Ignore error if RPC failed to get board
337*9c5db199SXin Li        logging.error('Failed to get board name from AFE. Start servod is '
338*9c5db199SXin Li                      'aborted')
339*9c5db199SXin Li        return
340*9c5db199SXin Li
341*9c5db199SXin Li    try:
342*9c5db199SXin Li        pid = utils.run('pgrep servod').stdout
343*9c5db199SXin Li        cmd_line = utils.run('ps -fp %s' % pid).stdout
344*9c5db199SXin Li        if ('--board %s' % board in cmd_line and
345*9c5db199SXin Li            '--port %s' % servo_port in cmd_line):
346*9c5db199SXin Li            logging.debug('Servod is already running with given board and port.'
347*9c5db199SXin Li                          ' There is no need to restart servod.')
348*9c5db199SXin Li            return
349*9c5db199SXin Li        logging.debug('Servod is running with different board or port. '
350*9c5db199SXin Li                      'Stopping existing servod.')
351*9c5db199SXin Li        utils.run('sudo stop servod')
352*9c5db199SXin Li    except error.CmdError:
353*9c5db199SXin Li        # servod is not running.
354*9c5db199SXin Li        pass
355*9c5db199SXin Li
356*9c5db199SXin Li    try:
357*9c5db199SXin Li        utils.run(START_SERVOD_CMD % (board, servo_port))
358*9c5db199SXin Li        logging.debug('Servod is started')
359*9c5db199SXin Li    except error.CmdError as e:
360*9c5db199SXin Li        logging.error('Servod failed to be started, error: %s', e)
361*9c5db199SXin Li
362*9c5db199SXin Li
363*9c5db199SXin Lidef _control_path_on_disk(control_name):
364*9c5db199SXin Li    """Find the control file corresponding to the given control name, on disk.
365*9c5db199SXin Li
366*9c5db199SXin Li    @param control_name: NAME attribute of the control file to fetch.
367*9c5db199SXin Li    @return: Path to the control file.
368*9c5db199SXin Li    """
369*9c5db199SXin Li    cf_getter = suite.create_fs_getter(_AUTOTEST_ROOT)
370*9c5db199SXin Li    control_name_predicate = suite.test_name_matches_pattern_predicate(
371*9c5db199SXin Li            '^%s$' % control_name)
372*9c5db199SXin Li    tests = suite.find_and_parse_tests(cf_getter, control_name_predicate)
373*9c5db199SXin Li    if not tests:
374*9c5db199SXin Li        raise error.AutoservError(
375*9c5db199SXin Li                'Failed to find any control files with NAME %s' % control_name)
376*9c5db199SXin Li    if len(tests) > 1:
377*9c5db199SXin Li        logging.error('Found more than one control file with NAME %s: %s',
378*9c5db199SXin Li                      control_name, [t.path for t in tests])
379*9c5db199SXin Li        raise error.AutoservError(
380*9c5db199SXin Li                'Found more than one control file with NAME %s' % control_name)
381*9c5db199SXin Li    return tests[0].path
382*9c5db199SXin Li
383*9c5db199SXin Li
384*9c5db199SXin Lidef _stage_control_file(control_name, results_dir):
385*9c5db199SXin Li    """Stage the control file to execute from local autotest checkout.
386*9c5db199SXin Li
387*9c5db199SXin Li    @param control_name: Name of the control file to stage.
388*9c5db199SXin Li    @param results_dir: Results directory to stage the control file into.
389*9c5db199SXin Li    @return: Absolute path to the staged control file.
390*9c5db199SXin Li    """
391*9c5db199SXin Li    control_path = _control_path_on_disk(control_name)
392*9c5db199SXin Li    new_control = os.path.join(results_dir, _CONTROL_FILE_FROM_CONTROL_NAME)
393*9c5db199SXin Li    shutil.copy2(control_path, new_control)
394*9c5db199SXin Li    return new_control
395*9c5db199SXin Li
396*9c5db199SXin Li
397*9c5db199SXin Lidef run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
398*9c5db199SXin Li    """Run server job with given options.
399*9c5db199SXin Li
400*9c5db199SXin Li    @param pid_file_manager: PidFileManager used to monitor the autoserv process
401*9c5db199SXin Li    @param results: Folder to store results.
402*9c5db199SXin Li    @param parser: Parser for the command line arguments.
403*9c5db199SXin Li    @param ssp_url: Url to server-side package.
404*9c5db199SXin Li    @param use_ssp: Set to True to run with server-side packaging.
405*9c5db199SXin Li    """
406*9c5db199SXin Li    # send stdin to /dev/null
407*9c5db199SXin Li    dev_null = os.open(os.devnull, os.O_RDONLY)
408*9c5db199SXin Li    os.dup2(dev_null, sys.stdin.fileno())
409*9c5db199SXin Li    os.close(dev_null)
410*9c5db199SXin Li
411*9c5db199SXin Li    # Create separate process group if the process is not a process group
412*9c5db199SXin Li    # leader. This allows autoserv process to keep running after the caller
413*9c5db199SXin Li    # process (drone manager call) exits.
414*9c5db199SXin Li    if os.getpid() != os.getpgid(0):
415*9c5db199SXin Li        os.setsid()
416*9c5db199SXin Li
417*9c5db199SXin Li    # Container name is predefined so the container can be destroyed in
418*9c5db199SXin Li    # handle_sigterm.
419*9c5db199SXin Li    job_or_task_id = job_directories.get_job_id_or_task_id(
420*9c5db199SXin Li            parser.options.results)
421*9c5db199SXin Li    container_id = lxc.ContainerId(job_or_task_id, time.time(), os.getpid())
422*9c5db199SXin Li
423*9c5db199SXin Li    # Implement SIGTERM handler
424*9c5db199SXin Li    def handle_sigterm(signum, frame):
425*9c5db199SXin Li        logging.debug('Received SIGTERM')
426*9c5db199SXin Li        if pid_file_manager:
427*9c5db199SXin Li            pid_file_manager.close_file(1, signal.SIGTERM)
428*9c5db199SXin Li        logging.debug('Finished writing to pid_file. Killing process.')
429*9c5db199SXin Li
430*9c5db199SXin Li        # Update results folder's file permission. This needs to be done ASAP
431*9c5db199SXin Li        # before the parsing process tries to access the log.
432*9c5db199SXin Li        if use_ssp and results:
433*9c5db199SXin Li            correct_results_folder_permission(results)
434*9c5db199SXin Li
435*9c5db199SXin Li        # This sleep allows the pending output to be logged before the kill
436*9c5db199SXin Li        # signal is sent.
437*9c5db199SXin Li        time.sleep(.1)
438*9c5db199SXin Li        if use_ssp:
439*9c5db199SXin Li            logging.debug('Destroy container %s before aborting the autoserv '
440*9c5db199SXin Li                          'process.', container_id)
441*9c5db199SXin Li            try:
442*9c5db199SXin Li                bucket = lxc.ContainerBucket(
443*9c5db199SXin Li                        base_name=_ssp_base_image_name_or_default(
444*9c5db199SXin Li                                parser.options))
445*9c5db199SXin Li                container = bucket.get_container(container_id)
446*9c5db199SXin Li                if container:
447*9c5db199SXin Li                    container.destroy()
448*9c5db199SXin Li                    logging.debug("Container %s destroyed.", container_id)
449*9c5db199SXin Li                else:
450*9c5db199SXin Li                    logging.debug('Container %s is not found.', container_id)
451*9c5db199SXin Li                    bucket.scrub_container_location(container_id)
452*9c5db199SXin Li            except:
453*9c5db199SXin Li                # Handle any exception so the autoserv process can be aborted.
454*9c5db199SXin Li                logging.exception('Failed to destroy container %s.',
455*9c5db199SXin Li                                  container_id)
456*9c5db199SXin Li            # Try to correct the result file permission again after the
457*9c5db199SXin Li            # container is destroyed, as the container might have created some
458*9c5db199SXin Li            # new files in the result folder.
459*9c5db199SXin Li            if results:
460*9c5db199SXin Li                correct_results_folder_permission(results)
461*9c5db199SXin Li
462*9c5db199SXin Li        os.killpg(os.getpgrp(), signal.SIGKILL)
463*9c5db199SXin Li
464*9c5db199SXin Li    # Set signal handler
465*9c5db199SXin Li    signal.signal(signal.SIGTERM, handle_sigterm)
466*9c5db199SXin Li
467*9c5db199SXin Li    # faulthandler is only needed to debug in the Lab and is not avaliable to
468*9c5db199SXin Li    # be imported in the chroot as part of VMTest, so Try-Except it.
469*9c5db199SXin Li    try:
470*9c5db199SXin Li        import faulthandler
471*9c5db199SXin Li        faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
472*9c5db199SXin Li        logging.debug('faulthandler registered on SIGTERM.')
473*9c5db199SXin Li    except ImportError:
474*9c5db199SXin Li        # exc_clear() doesn't exist (nor is needed) in python3
475*9c5db199SXin Li        if six.PY2:
476*9c5db199SXin Li            sys.exc_clear()
477*9c5db199SXin Li
478*9c5db199SXin Li    # Ignore SIGTTOU's generated by output from forked children.
479*9c5db199SXin Li    signal.signal(signal.SIGTTOU, signal.SIG_IGN)
480*9c5db199SXin Li
481*9c5db199SXin Li    # If we received a SIGALARM, let's be loud about it.
482*9c5db199SXin Li    signal.signal(signal.SIGALRM, log_alarm)
483*9c5db199SXin Li
484*9c5db199SXin Li    # Server side tests that call shell scripts often depend on $USER being set
485*9c5db199SXin Li    # but depending on how you launch your autotest scheduler it may not be set.
486*9c5db199SXin Li    os.environ['USER'] = getpass.getuser()
487*9c5db199SXin Li
488*9c5db199SXin Li    label = parser.options.label
489*9c5db199SXin Li    group_name = parser.options.group_name
490*9c5db199SXin Li    user = parser.options.user
491*9c5db199SXin Li    client = parser.options.client
492*9c5db199SXin Li    server = parser.options.server
493*9c5db199SXin Li    verify = parser.options.verify
494*9c5db199SXin Li    repair = parser.options.repair
495*9c5db199SXin Li    cleanup = parser.options.cleanup
496*9c5db199SXin Li    provision = parser.options.provision
497*9c5db199SXin Li    reset = parser.options.reset
498*9c5db199SXin Li    job_labels = parser.options.job_labels
499*9c5db199SXin Li    no_tee = parser.options.no_tee
500*9c5db199SXin Li    execution_tag = parser.options.execution_tag
501*9c5db199SXin Li    ssh_user = parser.options.ssh_user
502*9c5db199SXin Li    ssh_port = parser.options.ssh_port
503*9c5db199SXin Li    ssh_pass = parser.options.ssh_pass
504*9c5db199SXin Li    collect_crashinfo = parser.options.collect_crashinfo
505*9c5db199SXin Li    control_filename = parser.options.control_filename
506*9c5db199SXin Li    verify_job_repo_url = parser.options.verify_job_repo_url
507*9c5db199SXin Li    skip_crash_collection = parser.options.skip_crash_collection
508*9c5db199SXin Li    ssh_verbosity = int(parser.options.ssh_verbosity)
509*9c5db199SXin Li    ssh_options = parser.options.ssh_options
510*9c5db199SXin Li    no_use_packaging = parser.options.no_use_packaging
511*9c5db199SXin Li    in_lab = bool(parser.options.lab)
512*9c5db199SXin Li    companion_hosts = _get_companions(parser)
513*9c5db199SXin Li    dut_servers = _get_dutservers(parser)
514*9c5db199SXin Li    is_cft = parser.options.cft
515*9c5db199SXin Li    force_full_log_collection = parser.options.force_full_log_collection
516*9c5db199SXin Li
517*9c5db199SXin Li    # can't be both a client and a server side test
518*9c5db199SXin Li    if client and server:
519*9c5db199SXin Li        parser.parser.error("Can not specify a test as both server and client!")
520*9c5db199SXin Li
521*9c5db199SXin Li    if provision and client:
522*9c5db199SXin Li        parser.parser.error("Cannot specify provisioning and client!")
523*9c5db199SXin Li
524*9c5db199SXin Li    is_special_task = (verify or repair or cleanup or collect_crashinfo or
525*9c5db199SXin Li                       provision or reset)
526*9c5db199SXin Li    use_client_trampoline = False
527*9c5db199SXin Li    if parser.options.control_name:
528*9c5db199SXin Li        if use_ssp:
529*9c5db199SXin Li            # When use_ssp is True, autoserv will be re-executed inside a
530*9c5db199SXin Li            # container preserving the --control-name argument. Control file
531*9c5db199SXin Li            # will be staged inside the rexecuted autoserv.
532*9c5db199SXin Li            control = None
533*9c5db199SXin Li        else:
534*9c5db199SXin Li            try:
535*9c5db199SXin Li                control = _stage_control_file(parser.options.control_name,
536*9c5db199SXin Li                                              results)
537*9c5db199SXin Li            except error.AutoservError as e:
538*9c5db199SXin Li                logging.info("Using client trampoline because of: %s", e)
539*9c5db199SXin Li                control = parser.options.control_name
540*9c5db199SXin Li                use_client_trampoline = True
541*9c5db199SXin Li
542*9c5db199SXin Li    elif parser.args:
543*9c5db199SXin Li        control = parser.args[0]
544*9c5db199SXin Li    else:
545*9c5db199SXin Li        if not is_special_task:
546*9c5db199SXin Li            parser.parser.error("Missing argument: control file")
547*9c5db199SXin Li        control = None
548*9c5db199SXin Li
549*9c5db199SXin Li    if ssh_verbosity > 0:
550*9c5db199SXin Li        # ssh_verbosity is an integer between 0 and 3, inclusive
551*9c5db199SXin Li        ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
552*9c5db199SXin Li    else:
553*9c5db199SXin Li        ssh_verbosity_flag = ''
554*9c5db199SXin Li
555*9c5db199SXin Li    machines = _get_machines(parser)
556*9c5db199SXin Li    if group_name and len(machines) < 2:
557*9c5db199SXin Li        parser.parser.error('-G %r may only be supplied with more than one '
558*9c5db199SXin Li                            'machine.' % group_name)
559*9c5db199SXin Li
560*9c5db199SXin Li    logging.debug("Parser.args is %r", parser.args)
561*9c5db199SXin Li    try:
562*9c5db199SXin Li      logging.debug("Parser.options.args is %r", parser.options.args)
563*9c5db199SXin Li    except AttributeError:
564*9c5db199SXin Li      logging.debug("No Parser.options.args.")
565*9c5db199SXin Li
566*9c5db199SXin Li    try:
567*9c5db199SXin Li      logging.debug("Parser.options is %r", parser.options)
568*9c5db199SXin Li    except AttributeError:
569*9c5db199SXin Li      logging.debug("No Parser.options.")
570*9c5db199SXin Li    job_kwargs = {
571*9c5db199SXin Li            'control': control,
572*9c5db199SXin Li            'args': parser.args[1:],
573*9c5db199SXin Li            'resultdir': results,
574*9c5db199SXin Li            'label': label,
575*9c5db199SXin Li            'user': user,
576*9c5db199SXin Li            'machines': machines,
577*9c5db199SXin Li            'machine_dict_list': server_job.get_machine_dicts(
578*9c5db199SXin Li                    machine_names=machines,
579*9c5db199SXin Li                    store_dir=os.path.join(results,
580*9c5db199SXin Li                                           parser.options.host_info_subdir),
581*9c5db199SXin Li                    in_lab=in_lab,
582*9c5db199SXin Li                    use_shadow_store=not parser.options.local_only_host_info,
583*9c5db199SXin Li                    host_attributes=parser.options.host_attributes,
584*9c5db199SXin Li            ),
585*9c5db199SXin Li            'client': client,
586*9c5db199SXin Li            'ssh_user': ssh_user,
587*9c5db199SXin Li            'ssh_port': ssh_port,
588*9c5db199SXin Li            'ssh_pass': ssh_pass,
589*9c5db199SXin Li            'ssh_verbosity_flag': ssh_verbosity_flag,
590*9c5db199SXin Li            'ssh_options': ssh_options,
591*9c5db199SXin Li            'group_name': group_name,
592*9c5db199SXin Li            'tag': execution_tag,
593*9c5db199SXin Li            'disable_sysinfo': parser.options.disable_sysinfo,
594*9c5db199SXin Li            'in_lab': in_lab,
595*9c5db199SXin Li            'use_client_trampoline': use_client_trampoline,
596*9c5db199SXin Li            'sync_offload_dir': parser.options.sync_offload_dir,
597*9c5db199SXin Li            'companion_hosts': server_job.get_machine_dicts(
598*9c5db199SXin Li                    machine_names=companion_hosts,
599*9c5db199SXin Li                    store_dir=os.path.join(results,
600*9c5db199SXin Li                                           parser.options.host_info_subdir),
601*9c5db199SXin Li                    in_lab=in_lab,
602*9c5db199SXin Li                    use_shadow_store=not parser.options.local_only_host_info,
603*9c5db199SXin Li                    host_attributes=parser.options.host_attributes),
604*9c5db199SXin Li            'dut_servers': dut_servers,
605*9c5db199SXin Li            'is_cft': is_cft,
606*9c5db199SXin Li            'force_full_log_collection': force_full_log_collection
607*9c5db199SXin Li    }
608*9c5db199SXin Li    if parser.options.parent_job_id:
609*9c5db199SXin Li        job_kwargs['parent_job_id'] = int(parser.options.parent_job_id)
610*9c5db199SXin Li    if control_filename:
611*9c5db199SXin Li        job_kwargs['control_filename'] = control_filename
612*9c5db199SXin Li    if parser.options.image_storage_server:
613*9c5db199SXin Li        global_config.global_config.override_config_value(
614*9c5db199SXin Li            'CROS', 'image_storage_server',
615*9c5db199SXin Li            os.path.join(parser.options.image_storage_server, ''))
616*9c5db199SXin Li
617*9c5db199SXin Li    job = server_job.server_job(**job_kwargs)
618*9c5db199SXin Li
619*9c5db199SXin Li    job.logging.start_logging()
620*9c5db199SXin Li
621*9c5db199SXin Li    # perform checks
622*9c5db199SXin Li    job.precheck()
623*9c5db199SXin Li
624*9c5db199SXin Li    # run the job
625*9c5db199SXin Li    exit_code = 0
626*9c5db199SXin Li    auto_start_servod = global_config.global_config.get_config_value(
627*9c5db199SXin Li            'AUTOSERV', 'auto_start_servod', type=bool, default=False)
628*9c5db199SXin Li
629*9c5db199SXin Li    if not utils.is_in_container():
630*9c5db199SXin Li        # crbug.com/1054522 -- ts_mon setup is broken inside the SSP container
631*9c5db199SXin Li        # due to a problem in the installed python packages.
632*9c5db199SXin Li        # Trying to clean up an incorrectly initialized ts_mon state adds a 5
633*9c5db199SXin Li        # second overhead in process teardown, so avoid setting up ts_mon
634*9c5db199SXin Li        # entirely inside the SSP container.
635*9c5db199SXin Li        site_utils.SetupTsMonGlobalState('autoserv', indirect=False,
636*9c5db199SXin Li                                         short_lived=True)
637*9c5db199SXin Li    try:
638*9c5db199SXin Li        try:
639*9c5db199SXin Li            if repair:
640*9c5db199SXin Li                if auto_start_servod and len(machines) == 1:
641*9c5db199SXin Li                    _start_servod(machines[0])
642*9c5db199SXin Li                job.repair(job_labels)
643*9c5db199SXin Li            elif verify:
644*9c5db199SXin Li                job.verify(job_labels)
645*9c5db199SXin Li            elif provision:
646*9c5db199SXin Li                job.provision(job_labels)
647*9c5db199SXin Li            elif reset:
648*9c5db199SXin Li                job.reset(job_labels)
649*9c5db199SXin Li            elif cleanup:
650*9c5db199SXin Li                job.cleanup(job_labels)
651*9c5db199SXin Li            else:
652*9c5db199SXin Li                if auto_start_servod and len(machines) == 1:
653*9c5db199SXin Li                    _start_servod(machines[0])
654*9c5db199SXin Li                if use_ssp:
655*9c5db199SXin Li                    try:
656*9c5db199SXin Li                        _run_with_ssp(job, container_id, job_or_task_id,
657*9c5db199SXin Li                                        results, parser, ssp_url, machines)
658*9c5db199SXin Li                    finally:
659*9c5db199SXin Li                        # Update the ownership of files in result folder.
660*9c5db199SXin Li                        correct_results_folder_permission(results)
661*9c5db199SXin Li                else:
662*9c5db199SXin Li                    if collect_crashinfo:
663*9c5db199SXin Li                        # Update the ownership of files in result folder. If the
664*9c5db199SXin Li                        # job to collect crashinfo was running inside container
665*9c5db199SXin Li                        # (SSP) and crashed before correcting folder permission,
666*9c5db199SXin Li                        # the result folder might have wrong permission setting.
667*9c5db199SXin Li                        try:
668*9c5db199SXin Li                            correct_results_folder_permission(results)
669*9c5db199SXin Li                        except:
670*9c5db199SXin Li                            # Ignore any error as the user may not have root
671*9c5db199SXin Li                            # permission to run sudo command.
672*9c5db199SXin Li                            pass
673*9c5db199SXin Li                    metric_name = ('chromeos/autotest/experimental/'
674*9c5db199SXin Li                                   'autoserv_job_run_duration')
675*9c5db199SXin Li                    f = {'in_container': utils.is_in_container(),
676*9c5db199SXin Li                         'success': False}
677*9c5db199SXin Li                    with metrics.SecondsTimer(metric_name, fields=f) as c:
678*9c5db199SXin Li                        job.run(verify_job_repo_url=verify_job_repo_url,
679*9c5db199SXin Li                                only_collect_crashinfo=collect_crashinfo,
680*9c5db199SXin Li                                skip_crash_collection=skip_crash_collection,
681*9c5db199SXin Li                                job_labels=job_labels,
682*9c5db199SXin Li                                use_packaging=(not no_use_packaging))
683*9c5db199SXin Li                        c['success'] = True
684*9c5db199SXin Li
685*9c5db199SXin Li        finally:
686*9c5db199SXin Li            job.close()
687*9c5db199SXin Li    except error.AutoservSSPError:
688*9c5db199SXin Li        # Due to the complexity of the TKO parsing/stainless connection, this
689*9c5db199SXin Li        # must be 0 so that the "abort" is actually reflected on stainless.
690*9c5db199SXin Li        exit_code = 0
691*9c5db199SXin Li        traceback.print_exc()
692*9c5db199SXin Li    except:
693*9c5db199SXin Li        exit_code = 1
694*9c5db199SXin Li        traceback.print_exc()
695*9c5db199SXin Li    finally:
696*9c5db199SXin Li        metrics.Flush()
697*9c5db199SXin Li
698*9c5db199SXin Li    sys.exit(exit_code)
699*9c5db199SXin Li
700*9c5db199SXin Li
701*9c5db199SXin Li# Job breakdown statuses
702*9c5db199SXin Li_hs = host_states.Status
703*9c5db199SXin Li_qs = host_queue_entry_states.Status
704*9c5db199SXin Li_status_list = [
705*9c5db199SXin Li        _qs.QUEUED, _qs.RESETTING, _qs.VERIFYING,
706*9c5db199SXin Li        _qs.PROVISIONING, _hs.REPAIRING, _qs.CLEANING,
707*9c5db199SXin Li        _qs.RUNNING, _qs.GATHERING, _qs.PARSING]
708*9c5db199SXin Li_JOB_OVERHEAD_STATUS = autotest_enum.AutotestEnum(*_status_list,
709*9c5db199SXin Li                                                  string_values=True)
710*9c5db199SXin Li
711*9c5db199SXin Li
712*9c5db199SXin Lidef get_job_status(options):
713*9c5db199SXin Li    """Returns the HQE Status for this run.
714*9c5db199SXin Li
715*9c5db199SXin Li    @param options: parser options.
716*9c5db199SXin Li    """
717*9c5db199SXin Li    s = _JOB_OVERHEAD_STATUS
718*9c5db199SXin Li    task_mapping = {
719*9c5db199SXin Li            'reset': s.RESETTING, 'verify': s.VERIFYING,
720*9c5db199SXin Li            'provision': s.PROVISIONING, 'repair': s.REPAIRING,
721*9c5db199SXin Li            'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
722*9c5db199SXin Li    match = [task for task in task_mapping if getattr(options, task, False)]
723*9c5db199SXin Li    return task_mapping[match[0]] if match else s.RUNNING
724*9c5db199SXin Li
725*9c5db199SXin Li
726*9c5db199SXin Lidef _require_ssp_from_control(control_name):
727*9c5db199SXin Li    """Read the value of REQUIRE_SSP from test control file.
728*9c5db199SXin Li
729*9c5db199SXin Li    This reads the control file from the prod checkout of autotest and uses that
730*9c5db199SXin Li    to determine whether to even stage the SSP package on a devserver.
731*9c5db199SXin Li
732*9c5db199SXin Li    This means:
733*9c5db199SXin Li    [1] Any change in REQUIRE_SSP directive in a test requires a prod-push to go
734*9c5db199SXin Li    live.
735*9c5db199SXin Li    [2] This function may find that the control file does not exist but the SSP
736*9c5db199SXin Li    package may contain the test file. This function conservatively returns True
737*9c5db199SXin Li    in that case.
738*9c5db199SXin Li
739*9c5db199SXin Li    This function is called very early in autoserv, before logging is setup.
740*9c5db199SXin Li    """
741*9c5db199SXin Li    if not control_name:
742*9c5db199SXin Li        return True
743*9c5db199SXin Li    try:
744*9c5db199SXin Li        path = _control_path_on_disk(control_name)
745*9c5db199SXin Li    except error.AutoservError as e:
746*9c5db199SXin Li        sys.stderr.write("autoserv: Could not determine control file path,"
747*9c5db199SXin Li                         " assuming we need SSP: %s\n" % e)
748*9c5db199SXin Li        sys.stderr.flush()
749*9c5db199SXin Li        return True
750*9c5db199SXin Li    if not os.path.isfile(path):
751*9c5db199SXin Li        return True
752*9c5db199SXin Li    control = control_data.parse_control(path)
753*9c5db199SXin Li    # There must be explicit directive in the control file to disable SSP.
754*9c5db199SXin Li    if not control or control.require_ssp is None:
755*9c5db199SXin Li        return True
756*9c5db199SXin Li    return control.require_ssp
757*9c5db199SXin Li
758*9c5db199SXin Li
759*9c5db199SXin Lidef _ssp_base_image_name_or_default(options):
760*9c5db199SXin Li    """Extract base image name from autoserv options or the global config."""
761*9c5db199SXin Li    if options.ssp_base_image_name:
762*9c5db199SXin Li        return options.ssp_base_image_name
763*9c5db199SXin Li    return global_config.global_config.get_config_value('AUTOSERV',
764*9c5db199SXin Li                                                        'container_base_name')
765*9c5db199SXin Li
766*9c5db199SXin Li
767*9c5db199SXin Lidef main():
768*9c5db199SXin Li    start_time = datetime.datetime.now()
769*9c5db199SXin Li    parser = autoserv_parser.autoserv_parser
770*9c5db199SXin Li    parser.parse_args()
771*9c5db199SXin Li
772*9c5db199SXin Li    if len(sys.argv) == 1:
773*9c5db199SXin Li        parser.parser.print_help()
774*9c5db199SXin Li        sys.exit(1)
775*9c5db199SXin Li
776*9c5db199SXin Li    if parser.options.no_logging:
777*9c5db199SXin Li        results = None
778*9c5db199SXin Li    else:
779*9c5db199SXin Li        results = parser.options.results
780*9c5db199SXin Li        if not results:
781*9c5db199SXin Li            results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
782*9c5db199SXin Li        results = os.path.abspath(results)
783*9c5db199SXin Li        resultdir_exists = False
784*9c5db199SXin Li        for filename in ('control.srv', 'status.log', '.autoserv_execute'):
785*9c5db199SXin Li            if os.path.exists(os.path.join(results, filename)):
786*9c5db199SXin Li                resultdir_exists = True
787*9c5db199SXin Li        if not parser.options.use_existing_results and resultdir_exists:
788*9c5db199SXin Li            error = "Error: results directory already exists: %s\n" % results
789*9c5db199SXin Li            sys.stderr.write(error)
790*9c5db199SXin Li            sys.exit(1)
791*9c5db199SXin Li
792*9c5db199SXin Li        # Now that we certified that there's no leftover results dir from
793*9c5db199SXin Li        # previous jobs, lets create the result dir since the logging system
794*9c5db199SXin Li        # needs to create the log file in there.
795*9c5db199SXin Li        if not os.path.isdir(results):
796*9c5db199SXin Li            os.makedirs(results)
797*9c5db199SXin Li
798*9c5db199SXin Li    if parser.options.require_ssp:
799*9c5db199SXin Li        # This is currently only used for skylab (i.e., when --control-name is
800*9c5db199SXin Li        # used).
801*9c5db199SXin Li        use_ssp = _require_ssp_from_control(parser.options.control_name)
802*9c5db199SXin Li    else:
803*9c5db199SXin Li        use_ssp = False
804*9c5db199SXin Li
805*9c5db199SXin Li
806*9c5db199SXin Li    if use_ssp:
807*9c5db199SXin Li        log_dir = os.path.join(results, 'ssp_logs') if results else None
808*9c5db199SXin Li        if log_dir and not os.path.exists(log_dir):
809*9c5db199SXin Li            os.makedirs(log_dir)
810*9c5db199SXin Li    else:
811*9c5db199SXin Li        log_dir = results
812*9c5db199SXin Li
813*9c5db199SXin Li    logging_manager.configure_logging(
814*9c5db199SXin Li            server_logging_config.ServerLoggingConfig(),
815*9c5db199SXin Li            results_dir=log_dir,
816*9c5db199SXin Li            use_console=not parser.options.no_tee,
817*9c5db199SXin Li            verbose=parser.options.verbose,
818*9c5db199SXin Li            no_console_prefix=parser.options.no_console_prefix)
819*9c5db199SXin Li
820*9c5db199SXin Li    logging.debug('autoserv is running in drone %s.', socket.gethostname())
821*9c5db199SXin Li    logging.debug('autoserv environment: %r', os.environ)
822*9c5db199SXin Li    logging.debug('autoserv command was: %s', ' '.join(sys.argv))
823*9c5db199SXin Li    logging.debug('autoserv parsed options: %s', parser.options)
824*9c5db199SXin Li    logging.debug('autoserv python version: %s', sys.version)
825*9c5db199SXin Li
826*9c5db199SXin Li    if use_ssp:
827*9c5db199SXin Li        ssp_url = _stage_ssp(parser, results)
828*9c5db199SXin Li    else:
829*9c5db199SXin Li        ssp_url = None
830*9c5db199SXin Li
831*9c5db199SXin Li    if results:
832*9c5db199SXin Li        logging.info("Results placed in %s" % results)
833*9c5db199SXin Li
834*9c5db199SXin Li        # wait until now to perform this check, so it get properly logged
835*9c5db199SXin Li        if (parser.options.use_existing_results and not resultdir_exists and
836*9c5db199SXin Li            not utils.is_in_container()):
837*9c5db199SXin Li            logging.error("No existing results directory found: %s", results)
838*9c5db199SXin Li            sys.exit(1)
839*9c5db199SXin Li
840*9c5db199SXin Li    if parser.options.write_pidfile and results:
841*9c5db199SXin Li        pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
842*9c5db199SXin Li                                                  results)
843*9c5db199SXin Li        pid_file_manager.open_file()
844*9c5db199SXin Li    else:
845*9c5db199SXin Li        pid_file_manager = None
846*9c5db199SXin Li
847*9c5db199SXin Li    autotest.Autotest.set_install_in_tmpdir(
848*9c5db199SXin Li        parser.options.install_in_tmpdir)
849*9c5db199SXin Li
850*9c5db199SXin Li    exit_code = 0
851*9c5db199SXin Li    is_task = (parser.options.verify or parser.options.repair or
852*9c5db199SXin Li               parser.options.provision or parser.options.reset or
853*9c5db199SXin Li               parser.options.cleanup or parser.options.collect_crashinfo)
854*9c5db199SXin Li
855*9c5db199SXin Li    trace_labels = {
856*9c5db199SXin Li            'job_id': job_directories.get_job_id_or_task_id(
857*9c5db199SXin Li                    parser.options.results)
858*9c5db199SXin Li    }
859*9c5db199SXin Li    trace = cloud_trace.SpanStack(
860*9c5db199SXin Li            labels=trace_labels,
861*9c5db199SXin Li            global_context=parser.options.cloud_trace_context)
862*9c5db199SXin Li    trace.enabled = parser.options.cloud_trace_context_enabled == 'True'
863*9c5db199SXin Li    try:
864*9c5db199SXin Li        try:
865*9c5db199SXin Li            with trace.Span(get_job_status(parser.options)):
866*9c5db199SXin Li                run_autoserv(pid_file_manager, results, parser, ssp_url,
867*9c5db199SXin Li                             use_ssp)
868*9c5db199SXin Li        except SystemExit as e:
869*9c5db199SXin Li            exit_code = e.code
870*9c5db199SXin Li            if exit_code:
871*9c5db199SXin Li                logging.exception('Uncaught SystemExit with code %s', exit_code)
872*9c5db199SXin Li        except Exception:
873*9c5db199SXin Li            # If we don't know what happened, we'll classify it as
874*9c5db199SXin Li            # an 'abort' and return 1.
875*9c5db199SXin Li            logging.exception('Uncaught Exception, exit_code = 1.')
876*9c5db199SXin Li            exit_code = 1
877*9c5db199SXin Li    finally:
878*9c5db199SXin Li        if pid_file_manager:
879*9c5db199SXin Li            pid_file_manager.close_file(exit_code)
880*9c5db199SXin Li    sys.exit(exit_code)
881*9c5db199SXin Li
882*9c5db199SXin Li
883*9c5db199SXin Liif __name__ == '__main__':
884*9c5db199SXin Li    main()
885