1*9c5db199SXin Li#!/usr/bin/python3 2*9c5db199SXin Li# 3*9c5db199SXin Li# Copyright (c) 2015 The Chromium OS Authors. All rights reserved. 4*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be 5*9c5db199SXin Li# found in the LICENSE file. 6*9c5db199SXin Li 7*9c5db199SXin Li 8*9c5db199SXin Li"""Script to check the number of long-running processes. 9*9c5db199SXin Li 10*9c5db199SXin LiThis script gets the number of processes for "gsutil" and "autoserv" 11*9c5db199SXin Lithat are running more than 24 hours, and throws the number to stats 12*9c5db199SXin Lidashboard. 13*9c5db199SXin Li 14*9c5db199SXin LiThis script depends on the "etimes" user-defined format of "ps". 15*9c5db199SXin LiGoobuntu 14.04 has the version of ps that supports etimes, but not 16*9c5db199SXin LiGoobuntu 12.04. 17*9c5db199SXin Li""" 18*9c5db199SXin Li 19*9c5db199SXin Li 20*9c5db199SXin Liimport subprocess 21*9c5db199SXin Li 22*9c5db199SXin Lifrom autotest_lib.server import site_utils 23*9c5db199SXin Li 24*9c5db199SXin Litry: 25*9c5db199SXin Li from autotest_lib.utils.frozen_chromite.lib import metrics 26*9c5db199SXin Liexcept ImportError: 27*9c5db199SXin Li metrics = site_utils.metrics_mock 28*9c5db199SXin Li 29*9c5db199SXin Li 30*9c5db199SXin LiPROGRAM_TO_CHECK_SET = set(['gsutil', 'autoserv']) 31*9c5db199SXin Li 32*9c5db199SXin Lidef check_proc(prog, max_elapsed_sec): 33*9c5db199SXin Li """Check the number of long-running processes for a given program. 34*9c5db199SXin Li 35*9c5db199SXin Li Finds out the number of processes for a given program that have run 36*9c5db199SXin Li more than a given elapsed time. 37*9c5db199SXin Li Sends out the number to stats dashboard. 38*9c5db199SXin Li 39*9c5db199SXin Li @param prog: Program name. 40*9c5db199SXin Li @param max_elapsed_sec: Max elapsed time in seconds. Processes that 41*9c5db199SXin Li have run more than this value will be caught. 42*9c5db199SXin Li """ 43*9c5db199SXin Li cmd = ('ps -eo etimes,args | grep "%s" | awk \'{if($1 > %d) print $0}\' | ' 44*9c5db199SXin Li 'wc -l' % (prog, max_elapsed_sec)) 45*9c5db199SXin Li count = int(subprocess.check_output(cmd, shell = True)) 46*9c5db199SXin Li 47*9c5db199SXin Li if prog not in PROGRAM_TO_CHECK_SET: 48*9c5db199SXin Li prog = 'unknown' 49*9c5db199SXin Li 50*9c5db199SXin Li metrics.Gauge('chromeos/autotest/hung_processes').set( 51*9c5db199SXin Li count, fields={'program': prog} 52*9c5db199SXin Li ) 53*9c5db199SXin Li 54*9c5db199SXin Li 55*9c5db199SXin Lidef main(): 56*9c5db199SXin Li """Main script. """ 57*9c5db199SXin Li with site_utils.SetupTsMonGlobalState('check_hung_proc', short_lived=True): 58*9c5db199SXin Li for p in PROGRAM_TO_CHECK_SET: 59*9c5db199SXin Li check_proc(p, 86400) 60*9c5db199SXin Li 61*9c5db199SXin Li 62*9c5db199SXin Liif __name__ == '__main__': 63*9c5db199SXin Li main() 64