xref: /aosp_15_r20/external/autotest/site_utils/check_hung_proc.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1*9c5db199SXin Li#!/usr/bin/python3
2*9c5db199SXin Li#
3*9c5db199SXin Li# Copyright (c) 2015 The Chromium OS Authors. All rights reserved.
4*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be
5*9c5db199SXin Li# found in the LICENSE file.
6*9c5db199SXin Li
7*9c5db199SXin Li
8*9c5db199SXin Li"""Script to check the number of long-running processes.
9*9c5db199SXin Li
10*9c5db199SXin LiThis script gets the number of processes for "gsutil" and "autoserv"
11*9c5db199SXin Lithat are running more than 24 hours, and throws the number to stats
12*9c5db199SXin Lidashboard.
13*9c5db199SXin Li
14*9c5db199SXin LiThis script depends on the "etimes" user-defined format of "ps".
15*9c5db199SXin LiGoobuntu 14.04 has the version of ps that supports etimes, but not
16*9c5db199SXin LiGoobuntu 12.04.
17*9c5db199SXin Li"""
18*9c5db199SXin Li
19*9c5db199SXin Li
20*9c5db199SXin Liimport subprocess
21*9c5db199SXin Li
22*9c5db199SXin Lifrom autotest_lib.server import site_utils
23*9c5db199SXin Li
24*9c5db199SXin Litry:
25*9c5db199SXin Li    from autotest_lib.utils.frozen_chromite.lib import metrics
26*9c5db199SXin Liexcept ImportError:
27*9c5db199SXin Li    metrics = site_utils.metrics_mock
28*9c5db199SXin Li
29*9c5db199SXin Li
30*9c5db199SXin LiPROGRAM_TO_CHECK_SET = set(['gsutil', 'autoserv'])
31*9c5db199SXin Li
32*9c5db199SXin Lidef check_proc(prog, max_elapsed_sec):
33*9c5db199SXin Li    """Check the number of long-running processes for a given program.
34*9c5db199SXin Li
35*9c5db199SXin Li    Finds out the number of processes for a given program that have run
36*9c5db199SXin Li    more than a given elapsed time.
37*9c5db199SXin Li    Sends out the number to stats dashboard.
38*9c5db199SXin Li
39*9c5db199SXin Li    @param prog: Program name.
40*9c5db199SXin Li    @param max_elapsed_sec: Max elapsed time in seconds. Processes that
41*9c5db199SXin Li                            have run more than this value will be caught.
42*9c5db199SXin Li    """
43*9c5db199SXin Li    cmd = ('ps -eo etimes,args | grep "%s" | awk \'{if($1 > %d) print $0}\' | '
44*9c5db199SXin Li           'wc -l' % (prog, max_elapsed_sec))
45*9c5db199SXin Li    count = int(subprocess.check_output(cmd, shell = True))
46*9c5db199SXin Li
47*9c5db199SXin Li    if prog not in PROGRAM_TO_CHECK_SET:
48*9c5db199SXin Li        prog = 'unknown'
49*9c5db199SXin Li
50*9c5db199SXin Li    metrics.Gauge('chromeos/autotest/hung_processes').set(
51*9c5db199SXin Li            count, fields={'program': prog}
52*9c5db199SXin Li    )
53*9c5db199SXin Li
54*9c5db199SXin Li
55*9c5db199SXin Lidef main():
56*9c5db199SXin Li    """Main script. """
57*9c5db199SXin Li    with site_utils.SetupTsMonGlobalState('check_hung_proc', short_lived=True):
58*9c5db199SXin Li        for p in PROGRAM_TO_CHECK_SET:
59*9c5db199SXin Li            check_proc(p, 86400)
60*9c5db199SXin Li
61*9c5db199SXin Li
62*9c5db199SXin Liif __name__ == '__main__':
63*9c5db199SXin Li    main()
64