1*9c5db199SXin Li# Lint as: python2, python3 2*9c5db199SXin Li# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 3*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be 4*9c5db199SXin Li# found in the LICENSE file. 5*9c5db199SXin Li 6*9c5db199SXin Lifrom __future__ import absolute_import 7*9c5db199SXin Lifrom __future__ import division 8*9c5db199SXin Lifrom __future__ import print_function 9*9c5db199SXin Li 10*9c5db199SXin Liimport logging 11*9c5db199SXin Liimport random 12*9c5db199SXin Liimport signal 13*9c5db199SXin Liimport six 14*9c5db199SXin Liimport sys 15*9c5db199SXin Liimport threading 16*9c5db199SXin Liimport time 17*9c5db199SXin Li 18*9c5db199SXin Lifrom autotest_lib.client.common_lib import env 19*9c5db199SXin Lifrom autotest_lib.client.common_lib import error 20*9c5db199SXin Li 21*9c5db199SXin Li 22*9c5db199SXin Lidef install_sigalarm_handler(new_handler): 23*9c5db199SXin Li """ 24*9c5db199SXin Li Try installing a sigalarm handler. 25*9c5db199SXin Li 26*9c5db199SXin Li In order to protect apache, wsgi intercepts any attempt to install a 27*9c5db199SXin Li sigalarm handler, so our function will feel the full force of a sigalarm 28*9c5db199SXin Li even if we try to install a pacifying signal handler. To avoid this we 29*9c5db199SXin Li need to confirm that the handler we tried to install really was installed. 30*9c5db199SXin Li 31*9c5db199SXin Li @param new_handler: The new handler to install. This must be a callable 32*9c5db199SXin Li object, or signal.SIG_IGN/SIG_DFL which correspond to 33*9c5db199SXin Li the numbers 1,0 respectively. 34*9c5db199SXin Li @return: True if the installation of new_handler succeeded, False otherwise. 35*9c5db199SXin Li """ 36*9c5db199SXin Li # Installing signal handlers does not and is never expected to work if we're 37*9c5db199SXin Li # running in a mod_wsgi process. 38*9c5db199SXin Li if env.IN_MOD_WSGI: 39*9c5db199SXin Li return False 40*9c5db199SXin Li 41*9c5db199SXin Li if (new_handler is None or 42*9c5db199SXin Li (not callable(new_handler) and 43*9c5db199SXin Li new_handler != signal.SIG_IGN and 44*9c5db199SXin Li new_handler != signal.SIG_DFL)): 45*9c5db199SXin Li logging.warning('Trying to install an invalid sigalarm handler.') 46*9c5db199SXin Li return False 47*9c5db199SXin Li 48*9c5db199SXin Li signal.signal(signal.SIGALRM, new_handler) 49*9c5db199SXin Li installed_handler = signal.getsignal(signal.SIGALRM) 50*9c5db199SXin Li return installed_handler == new_handler 51*9c5db199SXin Li 52*9c5db199SXin Li 53*9c5db199SXin Lidef set_sigalarm_timeout(timeout_secs, default_timeout=60): 54*9c5db199SXin Li """ 55*9c5db199SXin Li Set the sigalarm timeout. 56*9c5db199SXin Li 57*9c5db199SXin Li This methods treats any timeout <= 0 as a possible error and falls back to 58*9c5db199SXin Li using it's default timeout, since negative timeouts can have 'alarming' 59*9c5db199SXin Li effects. Though 0 is a valid timeout, it is often used to cancel signals; in 60*9c5db199SXin Li order to set a sigalarm of 0 please call signal.alarm directly as there are 61*9c5db199SXin Li many situations where a 0 timeout is considered invalid. 62*9c5db199SXin Li 63*9c5db199SXin Li @param timeout_secs: The new timeout, in seconds. 64*9c5db199SXin Li @param default_timeout: The default timeout to use, if timeout <= 0. 65*9c5db199SXin Li @return: The old sigalarm timeout 66*9c5db199SXin Li """ 67*9c5db199SXin Li timeout_sec_n = int(timeout_secs) 68*9c5db199SXin Li if timeout_sec_n <= 0: 69*9c5db199SXin Li timeout_sec_n = int(default_timeout) 70*9c5db199SXin Li return signal.alarm(timeout_sec_n) 71*9c5db199SXin Li 72*9c5db199SXin Li 73*9c5db199SXin Lidef sigalarm_wrapper(message): 74*9c5db199SXin Li """ 75*9c5db199SXin Li Raise a TimeoutException with the given message. Needed because the body 76*9c5db199SXin Li of a closure (lambda) can only be an expression, not a statement (such 77*9c5db199SXin Li as "raise") :P :P :P 78*9c5db199SXin Li 79*9c5db199SXin Li @param message: the exception message. 80*9c5db199SXin Li """ 81*9c5db199SXin Li raise error.TimeoutException(message) 82*9c5db199SXin Li 83*9c5db199SXin Li 84*9c5db199SXin Lidef custom_sigalarm_handler(func, timeout_sec): 85*9c5db199SXin Li """ 86*9c5db199SXin Li Returns a sigalarm handler which produces an exception with a custom 87*9c5db199SXin Li error message (function name and timeout length) instead of a generic 88*9c5db199SXin Li one. 89*9c5db199SXin Li 90*9c5db199SXin Li @param func: the function that may time out 91*9c5db199SXin Li @param timeout_sec: timeout length in seconds 92*9c5db199SXin Li """ 93*9c5db199SXin Li try: 94*9c5db199SXin Li name = str(func.__name__) 95*9c5db199SXin Li except Exception as e: 96*9c5db199SXin Li name = '(unavailable function name: exception: %s)' % e 97*9c5db199SXin Li message = "sigalarm timeout (%d seconds) in %s" % (timeout_sec, name) 98*9c5db199SXin Li return lambda signum, frame: sigalarm_wrapper(message) 99*9c5db199SXin Li 100*9c5db199SXin Li 101*9c5db199SXin Lidef timeout(func, args=(), kwargs={}, timeout_sec=60.0, default_result=None): 102*9c5db199SXin Li """ 103*9c5db199SXin Li This function run the given function using the args, kwargs and 104*9c5db199SXin Li return the given default value if the timeout_sec is exceeded. 105*9c5db199SXin Li 106*9c5db199SXin Li @param func: function to be called. 107*9c5db199SXin Li @param args: arguments for function to be called. 108*9c5db199SXin Li @param kwargs: keyword arguments for function to be called. 109*9c5db199SXin Li @param timeout_sec: timeout setting for call to exit, in seconds. 110*9c5db199SXin Li @param default_result: default return value for the function call. 111*9c5db199SXin Li 112*9c5db199SXin Li @return 1: is_timeout 2: result of the function call. If 113*9c5db199SXin Li is_timeout is True, the call is timed out. If the 114*9c5db199SXin Li value is False, the call is finished on time. 115*9c5db199SXin Li """ 116*9c5db199SXin Li old_alarm_sec = 0 117*9c5db199SXin Li old_handler = signal.getsignal(signal.SIGALRM) 118*9c5db199SXin Li handler = custom_sigalarm_handler(func, timeout_sec) 119*9c5db199SXin Li installed_handler = install_sigalarm_handler(handler) 120*9c5db199SXin Li if installed_handler: 121*9c5db199SXin Li old_alarm_sec = set_sigalarm_timeout(timeout_sec, default_timeout=60) 122*9c5db199SXin Li 123*9c5db199SXin Li # If old_timeout_time = 0 we either didn't install a handler, or sigalrm 124*9c5db199SXin Li # had a signal.SIG_DFL handler with 0 timeout. In the latter case we still 125*9c5db199SXin Li # need to restore the handler/timeout. 126*9c5db199SXin Li old_timeout_time = (time.time() + old_alarm_sec) if old_alarm_sec > 0 else 0 127*9c5db199SXin Li 128*9c5db199SXin Li try: 129*9c5db199SXin Li default_result = func(*args, **kwargs) 130*9c5db199SXin Li return False, default_result 131*9c5db199SXin Li except error.TimeoutException: 132*9c5db199SXin Li return True, default_result 133*9c5db199SXin Li finally: 134*9c5db199SXin Li # If we installed a sigalarm handler, cancel it since our function 135*9c5db199SXin Li # returned on time. If we can successfully restore the old handler, 136*9c5db199SXin Li # reset the old timeout, or, if the old timeout's deadline has passed, 137*9c5db199SXin Li # set the sigalarm to fire in one second. If the old_timeout_time is 0 138*9c5db199SXin Li # we don't need to set the sigalarm timeout since we have already set it 139*9c5db199SXin Li # as a byproduct of cancelling the current signal. 140*9c5db199SXin Li if installed_handler: 141*9c5db199SXin Li signal.alarm(0) 142*9c5db199SXin Li if install_sigalarm_handler(old_handler) and old_timeout_time: 143*9c5db199SXin Li set_sigalarm_timeout(int(old_timeout_time - time.time()), 144*9c5db199SXin Li default_timeout=1) 145*9c5db199SXin Li 146*9c5db199SXin Li 147*9c5db199SXin Li 148*9c5db199SXin Lidef retry(ExceptionToCheck, timeout_min=1.0, delay_sec=3, raiselist=None, 149*9c5db199SXin Li exception_to_raise=None, label=None, callback=None, backoff=1): 150*9c5db199SXin Li """Retry calling the decorated function using a delay with jitter. 151*9c5db199SXin Li 152*9c5db199SXin Li Will raise RPC ValidationError exceptions from the decorated 153*9c5db199SXin Li function without retrying; a malformed RPC isn't going to 154*9c5db199SXin Li magically become good. Will raise exceptions in raiselist as well. 155*9c5db199SXin Li 156*9c5db199SXin Li If the retry is done in a child thread, timeout may not be enforced as 157*9c5db199SXin Li signal only works in main thread. Therefore, the retry inside a child 158*9c5db199SXin Li thread may run longer than timeout or even hang. 159*9c5db199SXin Li 160*9c5db199SXin Li original from: 161*9c5db199SXin Li http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ 162*9c5db199SXin Li 163*9c5db199SXin Li @param ExceptionToCheck: the exception to check. May be a tuple of 164*9c5db199SXin Li exceptions to check. 165*9c5db199SXin Li @param timeout_min: timeout in minutes until giving up. 166*9c5db199SXin Li @param delay_sec: pre-jittered base delay between retries in seconds. Actual 167*9c5db199SXin Li delays will be first calculated with exponential backoff, 168*9c5db199SXin Li then randomized around this new value, ranging up to 50% 169*9c5db199SXin Li off this midpoint. 170*9c5db199SXin Li @param raiselist: a list of exceptions that will be raised without retrying. 171*9c5db199SXin Li @param exception_to_raise: the exception to raise. Callers can specify the 172*9c5db199SXin Li exception they want to raise. 173*9c5db199SXin Li @param label: a label added to the exception message to help debug. 174*9c5db199SXin Li @param callback: a function to call before each retry. 175*9c5db199SXin Li @param backoff: exponent to calculate exponential backoff for the actual 176*9c5db199SXin Li delay. Set to 1 to disable exponential backoff. 177*9c5db199SXin Li """ 178*9c5db199SXin Li def deco_retry(func): 179*9c5db199SXin Li """ 180*9c5db199SXin Li Decorator wrapper. 181*9c5db199SXin Li 182*9c5db199SXin Li @param func: the function to be retried and timed-out. 183*9c5db199SXin Li """ 184*9c5db199SXin Li random.seed() 185*9c5db199SXin Li 186*9c5db199SXin Li 187*9c5db199SXin Li def delay(delay_with_backoff_sec): 188*9c5db199SXin Li """ 189*9c5db199SXin Li 'Jitter' the delay with backoff, up to 50% in either direction. 190*9c5db199SXin Li """ 191*9c5db199SXin Li random_delay = random.uniform(0.5 * delay_with_backoff_sec, 192*9c5db199SXin Li 1.5 * delay_with_backoff_sec) 193*9c5db199SXin Li logging.warning('Retrying in %f seconds...', random_delay) 194*9c5db199SXin Li time.sleep(random_delay) 195*9c5db199SXin Li 196*9c5db199SXin Li 197*9c5db199SXin Li def func_retry(*args, **kwargs): 198*9c5db199SXin Li """ 199*9c5db199SXin Li Used to cache exception to be raised later. 200*9c5db199SXin Li """ 201*9c5db199SXin Li exc_info = None 202*9c5db199SXin Li delayed_enabled = False 203*9c5db199SXin Li exception_tuple = () if raiselist is None else tuple(raiselist) 204*9c5db199SXin Li start_time = time.time() 205*9c5db199SXin Li remaining_time = timeout_min * 60 206*9c5db199SXin Li delay_with_backoff_sec = delay_sec 207*9c5db199SXin Li is_main_thread = isinstance(threading.current_thread(), 208*9c5db199SXin Li threading._MainThread) 209*9c5db199SXin Li if label: 210*9c5db199SXin Li details = 'label="%s"' % label 211*9c5db199SXin Li elif hasattr(func, '__name__'): 212*9c5db199SXin Li details = 'function="%s()"' % func.__name__ 213*9c5db199SXin Li else: 214*9c5db199SXin Li details = 'unknown function' 215*9c5db199SXin Li 216*9c5db199SXin Li exception_message = ('retry exception (%s), timeout = %ds' % 217*9c5db199SXin Li (details, timeout_min * 60)) 218*9c5db199SXin Li 219*9c5db199SXin Li while remaining_time > 0: 220*9c5db199SXin Li if delayed_enabled: 221*9c5db199SXin Li delay(delay_with_backoff_sec) 222*9c5db199SXin Li delay_with_backoff_sec *= backoff 223*9c5db199SXin Li else: 224*9c5db199SXin Li delayed_enabled = True 225*9c5db199SXin Li try: 226*9c5db199SXin Li # Clear the cache 227*9c5db199SXin Li exc_info = None 228*9c5db199SXin Li if is_main_thread: 229*9c5db199SXin Li is_timeout, result = timeout(func, args, kwargs, 230*9c5db199SXin Li remaining_time) 231*9c5db199SXin Li if not is_timeout: 232*9c5db199SXin Li return result 233*9c5db199SXin Li else: 234*9c5db199SXin Li return func(*args, **kwargs) 235*9c5db199SXin Li except exception_tuple: 236*9c5db199SXin Li raise 237*9c5db199SXin Li except error.CrosDynamicSuiteException: 238*9c5db199SXin Li raise 239*9c5db199SXin Li except ExceptionToCheck as e: 240*9c5db199SXin Li logging.warning('%s(%s)', e.__class__, e) 241*9c5db199SXin Li # Cache the exception to be raised later. 242*9c5db199SXin Li exc_info = sys.exc_info() 243*9c5db199SXin Li 244*9c5db199SXin Li remaining_time = int(timeout_min * 60 - 245*9c5db199SXin Li (time.time() - start_time)) 246*9c5db199SXin Li 247*9c5db199SXin Li if remaining_time > 0 and callback: 248*9c5db199SXin Li callback() 249*9c5db199SXin Li remaining_time = int(timeout_min * 60 - 250*9c5db199SXin Li (time.time() - start_time)) 251*9c5db199SXin Li 252*9c5db199SXin Li 253*9c5db199SXin Li # The call must have timed out or raised ExceptionToCheck. 254*9c5db199SXin Li if not exc_info: 255*9c5db199SXin Li if exception_to_raise: 256*9c5db199SXin Li raise exception_to_raise(exception_message) 257*9c5db199SXin Li else: 258*9c5db199SXin Li raise error.TimeoutException(exception_message) 259*9c5db199SXin Li # Raise the cached exception with original backtrace. 260*9c5db199SXin Li if exception_to_raise: 261*9c5db199SXin Li raise exception_to_raise('%s: %s' % (exc_info[0], exc_info[1])) 262*9c5db199SXin Li six.reraise(exc_info[0], exc_info[1], exc_info[2]) 263*9c5db199SXin Li 264*9c5db199SXin Li 265*9c5db199SXin Li return func_retry # true decorator 266*9c5db199SXin Li return deco_retry 267