xref: /aosp_15_r20/external/autotest/utils/frozen_chromite/lib/retry_stats.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1# -*- coding: utf-8 -*-
2# Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Infrastructure for collecting statistics about retries."""
7
8from __future__ import print_function
9
10import collections
11import datetime
12
13from autotest_lib.utils.frozen_chromite.lib import parallel
14from autotest_lib.utils.frozen_chromite.lib import retry_util
15
16
17# Well known categories we gather stats for.
18CIDB = 'CIDB'
19GSUTIL = 'Google Storage'
20
21
22class UnconfiguredStatsCategory(Exception):
23  """We tried to use a Stats Category without configuring it."""
24
25
26# Create one of these for each retry call.
27#   attempts: a list of all attempts to perform the action.
28StatEntry = collections.namedtuple(
29    'StatEntry',
30    ('category', 'attempts'))
31
32# Create one of these for each attempt to call the function.
33#  time: The time for this attempt in seconds.
34#  exception: None for a successful attempt, or a string exception description.
35Attempt = collections.namedtuple(
36    'Attempt',
37    ('time', 'exception'))
38
39
40# After Setup, contains a multiprocess proxy array.
41# The array holds StatEntry values for each event seen.
42_STATS_COLLECTION = None
43
44
45def SetupStats():
46  """Prepare a given category to collect stats.
47
48  This must be called BEFORE any new processes that might read or write to
49  these stat values are created. It is safe to call this more than once,
50  but most efficient to only make a single call.
51  """
52  # Pylint thinks our manager has no members.
53  m = parallel.Manager()
54
55  # pylint: disable=global-statement
56  # Create a new stats collection structure that is multiprocess usable.
57  global _STATS_COLLECTION
58  _STATS_COLLECTION = m.list()
59
60
61def _SuccessFilter(entry):
62  """Returns True if the StatEntry succeeded (perhaps after retries)."""
63  # If all attempts contain an exception, they all failed.
64  return not all(a.exception for a in entry.attempts)
65
66
67def _RetryCount(entry):
68  """Returns the number of retries in this StatEntry."""
69  # If all attempts contain an exception, they all failed.
70  return max(len(entry.attempts) - 1, 0)
71
72
73def CategoryStats(category):
74  """Return stats numbers for a given category.
75
76  success is the number of times a given command succeeded, even if it had to be
77  retried.
78
79  failure is the number of times we exhausting all retries without success.
80
81  retry is the total number of times we retried a command, unrelated to eventual
82  success or failure.
83
84  Args:
85    category: A string that defines the 'namespace' for these stats.
86
87  Returns:
88    succuess, failure, retry values as integers.
89  """
90  # Convert the multiprocess proxy list into a local simple list.
91  local_stats_collection = list(_STATS_COLLECTION)
92
93  # Extract the values for the category we care about.
94  stats = [e for e in local_stats_collection if e.category == category]
95
96  success = len([e for e in stats if _SuccessFilter(e)])
97  failure = len(stats) - success
98  retry = sum([_RetryCount(e) for e in stats])
99
100  return success, failure, retry
101
102def ReportCategoryStats(out, category):
103  """Dump stats reports for a given category.
104
105  Args:
106    out: Output stream to write to (e.g. sys.stdout).
107    category: A string that defines the 'namespace' for these stats.
108  """
109  success, failure, retry = CategoryStats(category)
110
111  line = '*' * 60 + '\n'
112  edge = '*' * 2
113
114  out.write(line)
115  out.write(edge + ' Performance Statistics for %s' % category + '\n')
116  out.write(edge + '\n')
117  out.write(edge + ' Success: %d' % success + '\n')
118  out.write(edge + ' Failure: %d' % failure + '\n')
119  out.write(edge + ' Retries: %d' % retry + '\n')
120  out.write(edge + ' Total: %d' % (success + failure) + '\n')
121  out.write(line)
122
123
124def ReportStats(out):
125  """Dump stats reports for a given category.
126
127  Args:
128    out: Output stream to write to (e.g. sys.stdout).
129    category: A string that defines the 'namespace' for these stats.
130  """
131  categories = sorted(set(e.category for e in _STATS_COLLECTION))
132
133  for category in categories:
134    ReportCategoryStats(out, category)
135
136
137def RetryWithStats(category, handler, max_retry, functor, *args, **kwargs):
138  """Wrapper around retry_util.GenericRetry that collects stats.
139
140  This wrapper collects statistics about each failure or retry. Each
141  category is defined by a unique string. Each category should be setup
142  before use (actually, before processes are forked).
143
144  All other arguments are blindly passed to retry_util.GenericRetry.
145
146  Args:
147    category: A string that defines the 'namespace' for these stats.
148    handler: See retry_util.GenericRetry.
149    max_retry: See retry_util.GenericRetry.
150    functor: See retry_util.GenericRetry.
151    args: See retry_util.GenericRetry.
152    kwargs: See retry_util.GenericRetry.
153
154  Returns:
155    See retry_util.GenericRetry raises.
156
157  Raises:
158    See retry_util.GenericRetry raises.
159  """
160  statEntry = StatEntry(category, attempts=[])
161
162  # Wrap the work method, so we can gather info.
163  def wrapper(*args, **kwargs):
164    start = datetime.datetime.now()
165
166    try:
167      result = functor(*args, **kwargs)
168    except Exception as e:
169      end = datetime.datetime.now()
170      e_description = '%s: %s' % (type(e).__name__, e)
171      statEntry.attempts.append(Attempt(end - start, e_description))
172      raise
173
174    end = datetime.datetime.now()
175    statEntry.attempts.append(Attempt(end - start, None))
176    return result
177
178  try:
179    return retry_util.GenericRetry(handler, max_retry, wrapper,
180                                   *args, **kwargs)
181  finally:
182    if _STATS_COLLECTION is not None:
183      _STATS_COLLECTION.append(statEntry)
184