1# -*- coding: utf-8 -*- 2# Copyright (c) 2014 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Infrastructure for collecting statistics about retries.""" 7 8from __future__ import print_function 9 10import collections 11import datetime 12 13from autotest_lib.utils.frozen_chromite.lib import parallel 14from autotest_lib.utils.frozen_chromite.lib import retry_util 15 16 17# Well known categories we gather stats for. 18CIDB = 'CIDB' 19GSUTIL = 'Google Storage' 20 21 22class UnconfiguredStatsCategory(Exception): 23 """We tried to use a Stats Category without configuring it.""" 24 25 26# Create one of these for each retry call. 27# attempts: a list of all attempts to perform the action. 28StatEntry = collections.namedtuple( 29 'StatEntry', 30 ('category', 'attempts')) 31 32# Create one of these for each attempt to call the function. 33# time: The time for this attempt in seconds. 34# exception: None for a successful attempt, or a string exception description. 35Attempt = collections.namedtuple( 36 'Attempt', 37 ('time', 'exception')) 38 39 40# After Setup, contains a multiprocess proxy array. 41# The array holds StatEntry values for each event seen. 42_STATS_COLLECTION = None 43 44 45def SetupStats(): 46 """Prepare a given category to collect stats. 47 48 This must be called BEFORE any new processes that might read or write to 49 these stat values are created. It is safe to call this more than once, 50 but most efficient to only make a single call. 51 """ 52 # Pylint thinks our manager has no members. 53 m = parallel.Manager() 54 55 # pylint: disable=global-statement 56 # Create a new stats collection structure that is multiprocess usable. 57 global _STATS_COLLECTION 58 _STATS_COLLECTION = m.list() 59 60 61def _SuccessFilter(entry): 62 """Returns True if the StatEntry succeeded (perhaps after retries).""" 63 # If all attempts contain an exception, they all failed. 64 return not all(a.exception for a in entry.attempts) 65 66 67def _RetryCount(entry): 68 """Returns the number of retries in this StatEntry.""" 69 # If all attempts contain an exception, they all failed. 70 return max(len(entry.attempts) - 1, 0) 71 72 73def CategoryStats(category): 74 """Return stats numbers for a given category. 75 76 success is the number of times a given command succeeded, even if it had to be 77 retried. 78 79 failure is the number of times we exhausting all retries without success. 80 81 retry is the total number of times we retried a command, unrelated to eventual 82 success or failure. 83 84 Args: 85 category: A string that defines the 'namespace' for these stats. 86 87 Returns: 88 succuess, failure, retry values as integers. 89 """ 90 # Convert the multiprocess proxy list into a local simple list. 91 local_stats_collection = list(_STATS_COLLECTION) 92 93 # Extract the values for the category we care about. 94 stats = [e for e in local_stats_collection if e.category == category] 95 96 success = len([e for e in stats if _SuccessFilter(e)]) 97 failure = len(stats) - success 98 retry = sum([_RetryCount(e) for e in stats]) 99 100 return success, failure, retry 101 102def ReportCategoryStats(out, category): 103 """Dump stats reports for a given category. 104 105 Args: 106 out: Output stream to write to (e.g. sys.stdout). 107 category: A string that defines the 'namespace' for these stats. 108 """ 109 success, failure, retry = CategoryStats(category) 110 111 line = '*' * 60 + '\n' 112 edge = '*' * 2 113 114 out.write(line) 115 out.write(edge + ' Performance Statistics for %s' % category + '\n') 116 out.write(edge + '\n') 117 out.write(edge + ' Success: %d' % success + '\n') 118 out.write(edge + ' Failure: %d' % failure + '\n') 119 out.write(edge + ' Retries: %d' % retry + '\n') 120 out.write(edge + ' Total: %d' % (success + failure) + '\n') 121 out.write(line) 122 123 124def ReportStats(out): 125 """Dump stats reports for a given category. 126 127 Args: 128 out: Output stream to write to (e.g. sys.stdout). 129 category: A string that defines the 'namespace' for these stats. 130 """ 131 categories = sorted(set(e.category for e in _STATS_COLLECTION)) 132 133 for category in categories: 134 ReportCategoryStats(out, category) 135 136 137def RetryWithStats(category, handler, max_retry, functor, *args, **kwargs): 138 """Wrapper around retry_util.GenericRetry that collects stats. 139 140 This wrapper collects statistics about each failure or retry. Each 141 category is defined by a unique string. Each category should be setup 142 before use (actually, before processes are forked). 143 144 All other arguments are blindly passed to retry_util.GenericRetry. 145 146 Args: 147 category: A string that defines the 'namespace' for these stats. 148 handler: See retry_util.GenericRetry. 149 max_retry: See retry_util.GenericRetry. 150 functor: See retry_util.GenericRetry. 151 args: See retry_util.GenericRetry. 152 kwargs: See retry_util.GenericRetry. 153 154 Returns: 155 See retry_util.GenericRetry raises. 156 157 Raises: 158 See retry_util.GenericRetry raises. 159 """ 160 statEntry = StatEntry(category, attempts=[]) 161 162 # Wrap the work method, so we can gather info. 163 def wrapper(*args, **kwargs): 164 start = datetime.datetime.now() 165 166 try: 167 result = functor(*args, **kwargs) 168 except Exception as e: 169 end = datetime.datetime.now() 170 e_description = '%s: %s' % (type(e).__name__, e) 171 statEntry.attempts.append(Attempt(end - start, e_description)) 172 raise 173 174 end = datetime.datetime.now() 175 statEntry.attempts.append(Attempt(end - start, None)) 176 return result 177 178 try: 179 return retry_util.GenericRetry(handler, max_retry, wrapper, 180 *args, **kwargs) 181 finally: 182 if _STATS_COLLECTION is not None: 183 _STATS_COLLECTION.append(statEntry) 184