xref: /aosp_15_r20/external/angle/scripts/generate_stats.py (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1#!/usr/bin/env vpython
2#
3# [VPYTHON:BEGIN]
4# wheel: <
5#   name: "infra/python/wheels/google-auth-py2_py3"
6#   version: "version:1.2.1"
7# >
8#
9# wheel: <
10#   name: "infra/python/wheels/pyasn1-py2_py3"
11#   version: "version:0.4.5"
12# >
13#
14# wheel: <
15#   name: "infra/python/wheels/pyasn1_modules-py2_py3"
16#   version: "version:0.2.4"
17# >
18#
19# wheel: <
20#   name: "infra/python/wheels/six"
21#   version: "version:1.10.0"
22# >
23#
24# wheel: <
25#   name: "infra/python/wheels/cachetools-py2_py3"
26#   version: "version:2.0.1"
27# >
28# wheel: <
29#   name: "infra/python/wheels/rsa-py2_py3"
30#   version: "version:4.0"
31# >
32#
33# wheel: <
34#   name: "infra/python/wheels/requests"
35#   version: "version:2.13.0"
36# >
37#
38# wheel: <
39#   name: "infra/python/wheels/google-api-python-client-py2_py3"
40#   version: "version:1.6.2"
41# >
42#
43# wheel: <
44#   name: "infra/python/wheels/httplib2-py2_py3"
45#   version: "version:0.12.1"
46# >
47#
48# wheel: <
49#   name: "infra/python/wheels/oauth2client-py2_py3"
50#   version: "version:3.0.0"
51# >
52#
53# wheel: <
54#   name: "infra/python/wheels/uritemplate-py2_py3"
55#   version: "version:3.0.0"
56# >
57#
58# wheel: <
59#   name: "infra/python/wheels/google-auth-oauthlib-py2_py3"
60#   version: "version:0.3.0"
61# >
62#
63# wheel: <
64#   name: "infra/python/wheels/requests-oauthlib-py2_py3"
65#   version: "version:1.2.0"
66# >
67#
68# wheel: <
69#   name: "infra/python/wheels/oauthlib-py2_py3"
70#   version: "version:3.0.1"
71# >
72#
73# wheel: <
74#   name: "infra/python/wheels/google-auth-httplib2-py2_py3"
75#   version: "version:0.0.3"
76# >
77# [VPYTHON:END]
78#
79# Copyright 2019 The ANGLE Project Authors. All rights reserved.
80# Use of this source code is governed by a BSD-style license that can be
81# found in the LICENSE file.
82#
83# generate_deqp_stats.py:
84#   Checks output of deqp testers and generates stats using the GDocs API
85#
86# prerequirements:
87#   https://devsite.googleplex.com/sheets/api/quickstart/python
88#   Follow the quickstart guide.
89#
90# usage: generate_deqp_stats.py [-h] [--auth_path [AUTH_PATH]] [--spreadsheet [SPREADSHEET]]
91#                               [--verbosity [VERBOSITY]]
92#
93# optional arguments:
94#   -h, --help            show this help message and exit
95#   --auth_path [AUTH_PATH]
96#                         path to directory containing authorization data (credentials.json and
97#                         token.pickle). [default=<home>/.auth]
98#   --spreadsheet [SPREADSHEET]
99#                         ID of the spreadsheet to write stats to. [default
100#                         ='1D6Yh7dAPP-aYLbX3HHQD8WubJV9XPuxvkKowmn2qhIw']
101#   --verbosity [VERBOSITY]
102#                         Verbosity of output. Valid options are [DEBUG, INFO, WARNING, ERROR].
103#                         [default=INFO]
104
105import argparse
106import datetime
107import logging
108import os
109import pickle
110import re
111import subprocess
112import sys
113import urllib
114from google.auth.transport.requests import Request
115from googleapiclient.discovery import build
116from google_auth_oauthlib.flow import InstalledAppFlow
117
118####################
119# Global Constants #
120####################
121
122HOME_DIR = os.path.expanduser('~')
123SCRIPT_DIR = sys.path[0]
124ROOT_DIR = os.path.abspath(os.path.join(SCRIPT_DIR, '..'))
125
126LOGGER = logging.getLogger('generate_stats')
127
128SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
129
130BOT_NAMES = [
131    'mac-angle-amd',
132    'mac-angle-intel',
133    'win10-angle-x64-nvidia',
134    'win10-angle-x64-intel',
135    'win7-angle-x64-nvidia',
136    'win7-angle-x86-amd',
137    'Linux FYI dEQP Release (Intel HD 630)',
138    'Linux FYI dEQP Release (NVIDIA)',
139    'Android FYI dEQP Release (Nexus 5X)',
140    'Android FYI 32 dEQP Vk Release (Pixel 2)',
141    'Android FYI 64 dEQP Vk Release (Pixel 2)',
142]
143BOT_NAME_PREFIX = 'chromium/ci/'
144BUILD_LINK_PREFIX = 'https://ci.chromium.org/p/chromium/builders/ci/'
145
146REQUIRED_COLUMNS = ['build_link', 'time', 'date', 'revision', 'angle_revision', 'duplicate']
147MAIN_RESULT_COLUMNS = ['Passed', 'Failed', 'Skipped', 'Not Supported', 'Exception', 'Crashed']
148
149INFO_TAG = '*RESULT'
150
151WORKAROUND_FORMATTING_ERROR_STRING = "Still waiting for the following processes to finish:"
152
153######################
154# Build Info Parsing #
155######################
156
157
158# Returns a struct with info about the latest successful build given a bot name. Info contains the
159# build_name, time, date, angle_revision, and chrome revision.
160# Uses: bb ls '<botname>' -n 1 -status success -p
161def get_latest_success_build_info(bot_name):
162    bb = subprocess.Popen(['bb', 'ls', bot_name, '-n', '1', '-status', 'success', '-p'],
163                          stdout=subprocess.PIPE,
164                          stderr=subprocess.PIPE)
165    LOGGER.debug("Ran [bb ls '" + bot_name + "' -n 1 -status success -p]")
166    out, err = bb.communicate()
167    if err:
168        raise ValueError("Unexpected error from bb ls: '" + err + "'")
169    if not out:
170        raise ValueError("Unexpected empty result from bb ls of bot '" + bot_name + "'")
171    # Example output (line 1):
172    # ci.chromium.org/b/8915280275579996928 SUCCESS   'chromium/ci/Win10 FYI dEQP Release (NVIDIA)/26877'
173    # ...
174    if 'SUCCESS' not in out:
175        raise ValueError("Unexpected result from bb ls: '" + out + "'")
176    info = {}
177    for line in out.splitlines():
178        # The first line holds the build name
179        if 'build_name' not in info:
180            info['build_name'] = line.strip().split("'")[1]
181            # Remove the bot name and prepend the build link
182            info['build_link'] = BUILD_LINK_PREFIX + urllib.quote(
183                info['build_name'].split(BOT_NAME_PREFIX)[1])
184        if 'Created' in line:
185            # Example output of line with 'Created':
186            # ...
187            # Created today at 12:26:39, waited 2.056319s, started at 12:26:41, ran for 1h16m48.14963s, ended at 13:43:30
188            # ...
189            info['time'] = re.findall(r'[0-9]{1,2}:[0-9]{2}:[0-9]{2}', line.split(',', 1)[0])[0]
190            # Format today's date in US format so Sheets can read it properly
191            info['date'] = datetime.datetime.now().strftime('%m/%d/%y')
192        if 'got_angle_revision' in line:
193            # Example output of line with angle revision:
194            # ...
195            #   "parent_got_angle_revision": "8cbd321cafa92ffbf0495e6d0aeb9e1a97940fee",
196            # ...
197            info['angle_revision'] = filter(str.isalnum, line.split(':')[1])
198        if '"revision"' in line:
199            # Example output of line with chromium revision:
200            # ...
201            #   "revision": "3b68405a27f1f9590f83ae07757589dba862f141",
202            # ...
203            info['revision'] = filter(str.isalnum, line.split(':')[1])
204    if 'build_name' not in info:
205        raise ValueError("Could not find build_name from bot '" + bot_name + "'")
206    return info
207
208
209# Returns a list of step names that we're interested in given a build name. We are interested in
210# step names starting with 'angle_'. May raise an exception.
211# Uses: bb get '<build_name>' -steps
212def get_step_names(build_name):
213    bb = subprocess.Popen(['bb', 'get', build_name, '-steps'],
214                          stdout=subprocess.PIPE,
215                          stderr=subprocess.PIPE)
216    LOGGER.debug("Ran [bb get '" + build_name + "' -steps]")
217    out, err = bb.communicate()
218    if err:
219        raise ValueError("Unexpected error from bb get: '" + err + "'")
220    step_names = []
221    # Example output (relevant lines to a single step):
222    # ...
223    # Step "angle_deqp_egl_vulkan_tests on (nvidia-quadro-p400-win10-stable) GPU on Windows on Windows-10"                                      SUCCESS   4m12s     Logs: "stdout", "chromium_swarming.summary", "Merge script log", "Flaky failure: dEQP.EGL&#x2f;info_version (status CRASH,SUCCESS)", "step_metadata"
224    # Run on OS: 'Windows-10'<br>Max shard duration: 0:04:07.309848 (shard \#1)<br>Min shard duration: 0:02:26.402128 (shard \#0)<br/>flaky failures [ignored]:<br/>dEQP.EGL/info\_version<br/>
225    #  * [shard #0 isolated out](https://isolateserver.appspot.com/browse?namespace=default-gzip&hash=9a5999a59d332e55f54f495948d0c9f959e60ed2)
226    #  * [shard #0 (128.3 sec)](https://chromium-swarm.appspot.com/user/task/446903ae365b8110)
227    #  * [shard #1 isolated out](https://isolateserver.appspot.com/browse?namespace=default-gzip&hash=d71e1bdd91dee61b536b4057a9222e642bd3809f)
228    #  * [shard #1 (229.3 sec)](https://chromium-swarm.appspot.com/user/task/446903b7b0d90210)
229    #  * [shard #2 isolated out](https://isolateserver.appspot.com/browse?namespace=default-gzip&hash=ac9ba85b1cca77774061b87335c077980e1eef85)
230    #  * [shard #2 (144.5 sec)](https://chromium-swarm.appspot.com/user/task/446903c18e15a010)
231    #  * [shard #3 isolated out](https://isolateserver.appspot.com/browse?namespace=default-gzip&hash=976d586386864abecf53915fbac3e085f672e30f)
232    #  * [shard #3 (138.4 sec)](https://chromium-swarm.appspot.com/user/task/446903cc8da0ad10)
233    # ...
234    for line in out.splitlines():
235        if 'Step "angle_' not in line:
236            continue
237        step_names.append(line.split('"')[1])
238    return step_names
239
240
241# Performs some heuristic validation of the step_info struct returned from a single step log.
242# Returns True if valid, False if invalid. May write to stderr
243def validate_step_info(step_info, build_name, step_name):
244    print_name = "'" + build_name + "': '" + step_name + "'"
245    if not step_info:
246        LOGGER.warning('Step info empty for ' + print_name + '\n')
247        return False
248
249    if 'Total' in step_info:
250        partial_sum_keys = MAIN_RESULT_COLUMNS
251        partial_sum_values = [int(step_info[key]) for key in partial_sum_keys if key in step_info]
252        computed_total = sum(partial_sum_values)
253        if step_info['Total'] != computed_total:
254            LOGGER.warning('Step info does not sum to total for ' + print_name + ' | Total: ' +
255                           str(step_info['Total']) + ' - Computed total: ' + str(computed_total) +
256                           '\n')
257    return True
258
259
260# Returns a struct containing parsed info from a given step log. The info is parsed by looking for
261# lines with the following format in stdout:
262# '[TESTSTATS]: <key>: <value>''
263# May write to stderr
264# Uses: bb log '<build_name>' '<step_name>'
265def get_step_info(build_name, step_name):
266    bb = subprocess.Popen(['bb', 'log', build_name, step_name],
267                          stdout=subprocess.PIPE,
268                          stderr=subprocess.PIPE)
269    LOGGER.debug("Ran [bb log '" + build_name + "' '" + step_name + "']")
270    out, err = bb.communicate()
271    if err:
272        LOGGER.warning("Unexpected error from bb log '" + build_name + "' '" + step_name + "': '" +
273                       err + "'")
274        return None
275    step_info = {}
276    # Example output (relevant lines of stdout):
277    # ...
278    # *RESULT: Total: 155
279    # *RESULT: Passed: 11
280    # *RESULT: Failed: 0
281    # *RESULT: Skipped: 12
282    # *RESULT: Not Supported: 132
283    # *RESULT: Exception: 0
284    # *RESULT: Crashed: 0
285    # *RESULT: Unexpected Passed: 12
286    # ...
287    append_errors = []
288    # Hacky workaround to fix issue where messages are dropped into the middle of lines by another
289    # process:
290    # eg.
291    # *RESULT: <start_of_result>Still waiting for the following processes to finish:
292    # "c:\b\s\w\ir\out\Release\angle_deqp_gles3_tests.exe" --deqp-egl-display-type=angle-vulkan --gtest_flagfile="c:\b\s\w\itlcgdrz\scoped_dir7104_364984996\8ad93729-f679-406d-973b-06b9d1bf32de.tmp" --single-process-tests --test-launcher-batch-limit=400 --test-launcher-output="c:\b\s\w\itlcgdrz\7104_437216092\test_results.xml" --test-launcher-summary-output="c:\b\s\w\iosuk8ai\output.json"
293    # <end_of_result>
294    #
295    # Removes the message and skips the line following it, and then appends the <start_of_result>
296    # and <end_of_result> back together
297    workaround_prev_line = ""
298    workaround_prev_line_count = 0
299    for line in out.splitlines():
300        # Skip lines if the workaround still has lines to skip
301        if workaround_prev_line_count > 0:
302            workaround_prev_line_count -= 1
303            continue
304        # If there are no more lines to skip and there is a previous <start_of_result> to append,
305        # append it and finish the workaround
306        elif workaround_prev_line != "":
307            line = workaround_prev_line + line
308            workaround_prev_line = ""
309            workaround_prev_line_count = 0
310            LOGGER.debug("Formatting error workaround rebuilt line as: '" + line + "'\n")
311
312        if INFO_TAG not in line:
313            continue
314
315        # When the workaround string is detected, start the workaround with 1 line to skip and save
316        # the <start_of_result>, but continue the loop until the workaround is finished
317        if WORKAROUND_FORMATTING_ERROR_STRING in line:
318            workaround_prev_line = line.split(WORKAROUND_FORMATTING_ERROR_STRING)[0]
319            workaround_prev_line_count = 1
320            continue
321
322        found_stat = True
323        line_columns = line.split(INFO_TAG, 1)[1].split(':')
324        if len(line_columns) is not 3:
325            LOGGER.warning("Line improperly formatted: '" + line + "'\n")
326            continue
327        key = line_columns[1].strip()
328        # If the value is clearly an int, sum it. Otherwise, concatenate it as a string
329        isInt = False
330        intVal = 0
331        try:
332            intVal = int(line_columns[2])
333            if intVal is not None:
334                isInt = True
335        except Exception as error:
336            isInt = False
337
338        if isInt:
339            if key not in step_info:
340                step_info[key] = 0
341            step_info[key] += intVal
342        else:
343            if key not in step_info:
344                step_info[key] = line_columns[2].strip()
345            else:
346                append_string = '\n' + line_columns[2].strip()
347                # Sheets has a limit of 50000 characters per cell, so make sure to stop appending
348                # below this limit
349                if len(step_info[key]) + len(append_string) < 50000:
350                    step_info[key] += append_string
351                else:
352                    if key not in append_errors:
353                        append_errors.append(key)
354                        LOGGER.warning("Too many characters in column '" + key +
355                                       "'. Output capped.")
356    return step_info
357
358
359# Returns the info for each step run on a given bot_name.
360def get_bot_info(bot_name):
361    info = get_latest_success_build_info(bot_name)
362    info['step_names'] = get_step_names(info['build_name'])
363    broken_step_names = []
364    for step_name in info['step_names']:
365        LOGGER.info("Parsing step '" + step_name + "'...")
366        step_info = get_step_info(info['build_name'], step_name)
367        if validate_step_info(step_info, info['build_name'], step_name):
368            info[step_name] = step_info
369        else:
370            broken_step_names += step_name
371    for step_name in broken_step_names:
372        info['step_names'].remove(step_name)
373    return info
374
375
376#####################
377# Sheets Formatting #
378#####################
379
380
381# Get an individual spreadsheet based on the spreadsheet id. Returns the result of
382# spreadsheets.get(), or throws an exception if the sheet could not open.
383def get_spreadsheet(service, spreadsheet_id):
384    LOGGER.debug("Called [spreadsheets.get(spreadsheetId='" + spreadsheet_id + "')]")
385    request = service.get(spreadsheetId=spreadsheet_id)
386    spreadsheet = request.execute()
387    if not spreadsheet:
388        raise Exception("Did not open spreadsheet '" + spreadsheet_id + "'")
389    return spreadsheet
390
391
392# Returns a nicely formatted string based on the bot_name and step_name
393def format_sheet_name(bot_name, step_name):
394    # Some tokens should be ignored for readability in the name
395    unneccesary_tokens = ['FYI', 'Release', 'Vk', 'dEQP', '(', ')']
396    for token in unneccesary_tokens:
397        bot_name = bot_name.replace(token, '')
398    bot_name = ' '.join(bot_name.strip().split())  # Remove extra spaces
399    step_name = re.findall(r'angle\w*', step_name)[0]  # Separate test name
400    # Test names are formatted as 'angle_deqp_<frontend>_<backend>_tests'
401    new_step_name = ''
402    # Put the frontend first
403    if '_egl_' in step_name:
404        step_name = step_name.replace('_egl_', '_')
405        new_step_name += ' EGL'
406    if '_gles2_' in step_name:
407        step_name = step_name.replace('_gles2_', '_')
408        new_step_name += ' GLES 2.0 '
409    if '_gles3_' in step_name:
410        step_name = step_name.replace('_gles3_', '_')
411        new_step_name += ' GLES 3.0 '
412    if '_gles31_' in step_name:
413        step_name = step_name.replace('_gles31_', '_')
414        new_step_name += ' GLES 3.1 '
415    # Put the backend second
416    if '_d3d9_' in step_name:
417        step_name = step_name.replace('_d3d9_', '_')
418        new_step_name += ' D3D9 '
419    if '_d3d11' in step_name:
420        step_name = step_name.replace('_d3d11_', '_')
421        new_step_name += ' D3D11 '
422    if '_gl_' in step_name:
423        step_name = step_name.replace('_gl_', '_')
424        new_step_name += ' Desktop OpenGL '
425    if '_gles_' in step_name:
426        step_name = step_name.replace('_gles_', '_')
427        new_step_name += ' OpenGLES '
428    if '_vulkan_' in step_name:
429        step_name = step_name.replace('_vulkan_', '_')
430        new_step_name += ' Vulkan '
431    # Add any remaining keywords from the step name into the formatted name (formatted nicely)
432    step_name = step_name.replace('angle_', '_')
433    step_name = step_name.replace('_deqp_', '_')
434    step_name = step_name.replace('_tests', '_')
435    step_name = step_name.replace('_', ' ').strip()
436    new_step_name += ' ' + step_name
437    new_step_name = ' '.join(new_step_name.strip().split())  # Remove extra spaces
438    return new_step_name + ' ' + bot_name
439
440
441# Returns the full list of sheet names that should be populated based on the info struct
442def get_sheet_names(info):
443    sheet_names = []
444    for bot_name in info:
445        for step_name in info[bot_name]['step_names']:
446            sheet_name = format_sheet_name(bot_name, step_name)
447            sheet_names.append(sheet_name)
448    return sheet_names
449
450
451# Returns True if the sheet is found in the spreadsheets object
452def sheet_exists(spreadsheet, step_name):
453    for sheet in spreadsheet['sheets']:
454        if sheet['properties']['title'] == step_name:
455            return True
456    return False
457
458
459# Validates the spreadsheets object against the list of sheet names which should appear. Returns a
460# list of sheets that need creation.
461def validate_sheets(spreadsheet, sheet_names):
462    create_sheets = []
463    for sheet_name in sheet_names:
464        if not sheet_exists(spreadsheet, sheet_name):
465            create_sheets.append(sheet_name)
466    return create_sheets
467
468
469# Performs a batch update with a given service, spreadsheet id, and list <object(Request)> of
470# updates to do.
471def batch_update(service, spreadsheet_id, updates):
472    batch_update_request_body = {
473        'requests': updates,
474    }
475    LOGGER.debug("Called [spreadsheets.batchUpdate(spreadsheetId='" + spreadsheet_id + "', body=" +
476                 str(batch_update_request_body) + ')]')
477    request = service.batchUpdate(spreadsheetId=spreadsheet_id, body=batch_update_request_body)
478    request.execute()
479
480
481# Creates sheets given a service and spreadsheed id based on a list of sheet names input
482def create_sheets(service, spreadsheet_id, sheet_names):
483    updates = [{'addSheet': {'properties': {'title': sheet_name,}}} for sheet_name in sheet_names]
484    batch_update(service, spreadsheet_id, updates)
485
486
487# Calls a values().batchGet() on the service to find the list of column names from each sheet in
488# sheet_names. Returns a dictionary with one list per sheet_name.
489def get_headers(service, spreadsheet_id, sheet_names):
490    header_ranges = [sheet_name + '!A1:Z' for sheet_name in sheet_names]
491    LOGGER.debug("Called [spreadsheets.values().batchGet(spreadsheetId='" + spreadsheet_id +
492                 ', ranges=' + str(header_ranges) + "')]")
493    request = service.values().batchGet(spreadsheetId=spreadsheet_id, ranges=header_ranges)
494    response = request.execute()
495    headers = {}
496    for k, sheet_name in enumerate(sheet_names):
497        if 'values' in response['valueRanges'][k]:
498            # Headers are in the first row of values
499            headers[sheet_name] = response['valueRanges'][k]['values'][0]
500        else:
501            headers[sheet_name] = []
502    return headers
503
504
505# Calls values().batchUpdate() with supplied list of data <object(ValueRange)> to update on the
506# service.
507def batch_update_values(service, spreadsheet_id, data):
508    batch_update_values_request_body = {
509        'valueInputOption': 'USER_ENTERED',  # Helps with formatting of dates
510        'data': data,
511    }
512    LOGGER.debug("Called [spreadsheets.values().batchUpdate(spreadsheetId='" + spreadsheet_id +
513                 "', body=" + str(batch_update_values_request_body) + ')]')
514    request = service.values().batchUpdate(
515        spreadsheetId=spreadsheet_id, body=batch_update_values_request_body)
516    request.execute()
517
518
519# Get the sheetId of a sheet based on its name
520def get_sheet_id(spreadsheet, sheet_name):
521    for sheet in spreadsheet['sheets']:
522        if sheet['properties']['title'] == sheet_name:
523            return sheet['properties']['sheetId']
524    return -1
525
526
527# Update the filters on sheets with a 'duplicate' column. Filter out any duplicate rows
528def update_filters(service, spreadsheet_id, headers, info, spreadsheet):
529    updates = []
530    for bot_name in info:
531        for step_name in info[bot_name]['step_names']:
532            sheet_name = format_sheet_name(bot_name, step_name)
533            duplicate_found = 'duplicate' in headers[sheet_name]
534            if duplicate_found:
535                sheet_id = get_sheet_id(spreadsheet, sheet_name)
536                if sheet_id > -1:
537                    updates.append({
538                        "setBasicFilter": {
539                            "filter": {
540                                "range": {
541                                    "sheetId": sheet_id,
542                                    "startColumnIndex": 0,
543                                    "endColumnIndex": len(headers[sheet_name])
544                                },
545                                "sortSpecs": [{
546                                    "dimensionIndex": headers[sheet_name].index('date'),
547                                    "sortOrder": "ASCENDING"
548                                }],
549                                "criteria": {
550                                    str(headers[sheet_name].index('duplicate')): {
551                                        "hiddenValues":
552                                            ["1"]  # Hide rows when duplicate is 1 (true)
553                                    }
554                                }
555                            }
556                        }
557                    })
558    if updates:
559        LOGGER.info('Updating sheet filters...')
560        batch_update(service, spreadsheet_id, updates)
561
562# Populates the headers with any missing/desired rows based on the info struct, and calls
563# batch update to update the corresponding sheets if necessary.
564def update_headers(service, spreadsheet_id, headers, info):
565    data = []
566    sheet_names = []
567    for bot_name in info:
568        for step_name in info[bot_name]['step_names']:
569            if not step_name in info[bot_name]:
570                LOGGER.error("Missing info for step name: '" + step_name + "'")
571            sheet_name = format_sheet_name(bot_name, step_name)
572            headers_stale = False
573            # Headers should always contain the following columns
574            for req in REQUIRED_COLUMNS:
575                if req not in headers[sheet_name]:
576                    headers_stale = True
577                    headers[sheet_name].append(req)
578            # Headers also must contain all the keys seen in this step
579            for key in info[bot_name][step_name].keys():
580                if key not in headers[sheet_name]:
581                    headers_stale = True
582                    headers[sheet_name].append(key)
583            # Update the Gdoc headers if necessary
584            if headers_stale:
585                sheet_names.append(sheet_name)
586                header_range = sheet_name + '!A1:Z'
587                data.append({
588                    'range': header_range,
589                    'majorDimension': 'ROWS',
590                    'values': [headers[sheet_name]]
591                })
592    if data:
593        LOGGER.info('Updating sheet headers...')
594        batch_update_values(service, spreadsheet_id, data)
595
596# Calls values().append() to append a list of values to a given sheet.
597def append_values(service, spreadsheet_id, sheet_name, values):
598    header_range = sheet_name + '!A1:Z'
599    insert_data_option = 'INSERT_ROWS'
600    value_input_option = 'USER_ENTERED'  # Helps with formatting of dates
601    append_values_request_body = {
602        'range': header_range,
603        'majorDimension': 'ROWS',
604        'values': [values],
605    }
606    LOGGER.debug("Called [spreadsheets.values().append(spreadsheetId='" + spreadsheet_id +
607                 "', body=" + str(append_values_request_body) + ", range='" + header_range +
608                 "', insertDataOption='" + insert_data_option + "', valueInputOption='" +
609                 value_input_option + "')]")
610    request = service.values().append(
611        spreadsheetId=spreadsheet_id,
612        body=append_values_request_body,
613        range=header_range,
614        insertDataOption=insert_data_option,
615        valueInputOption=value_input_option)
616    request.execute()
617
618
619# Formula to determine whether a row is a duplicate of the previous row based on checking the
620# columns listed in filter_columns.
621# Eg.
622# date | pass | fail
623# Jan 1  100    50
624# Jan 2  100    50
625# Jan 3  99     51
626#
627# If we want to filter based on only the "pass" and "fail" columns, we generate the following
628# formula in the 'duplicate' column: 'IF(B1=B0, IF(C1=C0,1,0) ,0);
629# This formula is recursively generated for each column in filter_columns, using the column
630# position as determined by headers. The formula uses a more generalized form with
631# 'INDIRECT(ADDRESS(<row>, <col>))'' instead of 'B1', where <row> is Row() and Row()-1, and col is
632# determined by the column's position in headers
633def generate_duplicate_formula(headers, filter_columns):
634    # No more columns, put a 1 in the IF statement true branch
635    if len(filter_columns) == 0:
636        return '1'
637    # Next column is found, generate the formula for duplicate checking, and remove from the list
638    # for recursion
639    for i in range(len(headers)):
640        if headers[i] == filter_columns[0]:
641            col = str(i + 1)
642            formula = "IF(INDIRECT(ADDRESS(ROW(), " + col + "))=INDIRECT(ADDRESS(ROW() - 1, " + \
643                col + "))," + generate_duplicate_formula(headers, filter_columns[1:]) + ",0)"
644            return formula
645    # Next column not found, remove from recursion but just return whatever the next one is
646    return generate_duplicate_formula(headers, filter_columns[1:])
647
648
649# Helper function to start the recursive call to generate_duplicate_formula
650def generate_duplicate_formula_helper(headers):
651    filter_columns = MAIN_RESULT_COLUMNS
652    formula = generate_duplicate_formula(headers, filter_columns)
653    if (formula == "1"):
654        return ""
655    else:
656        # Final result needs to be prepended with =
657        return "=" + formula
658
659# Uses the list of headers and the info struct to come up with a list of values for each step
660# from the latest builds.
661def update_values(service, spreadsheet_id, headers, info):
662    data = []
663    for bot_name in info:
664        for step_name in info[bot_name]['step_names']:
665            sheet_name = format_sheet_name(bot_name, step_name)
666            values = []
667            # For each key in the list of headers, either add the corresponding value or add a blank
668            # value. It's necessary for the values to match the order of the headers
669            for key in headers[sheet_name]:
670                if key in info[bot_name] and key in REQUIRED_COLUMNS:
671                    values.append(info[bot_name][key])
672                elif key in info[bot_name][step_name]:
673                    values.append(info[bot_name][step_name][key])
674                elif key == "duplicate" and key in REQUIRED_COLUMNS:
675                    values.append(generate_duplicate_formula_helper(headers[sheet_name]))
676                else:
677                    values.append('')
678            LOGGER.info("Appending new rows to sheet '" + sheet_name + "'...")
679            try:
680                append_values(service, spreadsheet_id, sheet_name, values)
681            except Exception as error:
682                LOGGER.warning('%s\n' % str(error))
683
684
685# Updates the given spreadsheed_id with the info struct passed in.
686def update_spreadsheet(service, spreadsheet_id, info):
687    LOGGER.info('Opening spreadsheet...')
688    spreadsheet = get_spreadsheet(service, spreadsheet_id)
689    LOGGER.info('Parsing sheet names...')
690    sheet_names = get_sheet_names(info)
691    new_sheets = validate_sheets(spreadsheet, sheet_names)
692    if new_sheets:
693        LOGGER.info('Creating new sheets...')
694        create_sheets(service, spreadsheet_id, new_sheets)
695    LOGGER.info('Parsing sheet headers...')
696    headers = get_headers(service, spreadsheet_id, sheet_names)
697    update_headers(service, spreadsheet_id, headers, info)
698    update_filters(service, spreadsheet_id, headers, info, spreadsheet)
699    update_values(service, spreadsheet_id, headers, info)
700
701
702#####################
703# Main/helpers      #
704#####################
705
706
707# Loads or creates credentials and connects to the Sheets API. Returns a Spreadsheets object with
708# an open connection.
709def get_sheets_service(auth_path):
710    credentials_path = auth_path + '/credentials.json'
711    token_path = auth_path + '/token.pickle'
712    creds = None
713    if not os.path.exists(auth_path):
714        LOGGER.info("Creating auth dir '" + auth_path + "'")
715        os.makedirs(auth_path)
716    if not os.path.exists(credentials_path):
717        raise Exception('Missing credentials.json.\n'
718                        'Go to: https://developers.google.com/sheets/api/quickstart/python\n'
719                        "Under Step 1, click 'ENABLE THE GOOGLE SHEETS API'\n"
720                        "Click 'DOWNLOAD CLIENT CONFIGURATION'\n"
721                        'Save to your auth_path (' + auth_path + ') as credentials.json')
722    if os.path.exists(token_path):
723        with open(token_path, 'rb') as token:
724            creds = pickle.load(token)
725            LOGGER.info('Loaded credentials from ' + token_path)
726    if not creds or not creds.valid:
727        if creds and creds.expired and creds.refresh_token:
728            LOGGER.info('Refreshing credentials...')
729            creds.refresh(Request())
730        else:
731            LOGGER.info('Could not find credentials. Requesting new credentials.')
732            flow = InstalledAppFlow.from_client_secrets_file(credentials_path, SCOPES)
733            creds = flow.run_local_server()
734        with open(token_path, 'wb') as token:
735            pickle.dump(creds, token)
736    service = build('sheets', 'v4', credentials=creds)
737    sheets = service.spreadsheets()
738    return sheets
739
740
741# Parse the input to the script
742def parse_args():
743    parser = argparse.ArgumentParser(os.path.basename(sys.argv[0]))
744    parser.add_argument(
745        '--auth_path',
746        default=HOME_DIR + '/.auth',
747        nargs='?',
748        help='path to directory containing authorization data '
749        '(credentials.json and token.pickle). '
750        '[default=<home>/.auth]')
751    parser.add_argument(
752        '--spreadsheet',
753        default='1uttk1z8lJ4ZsUY7wMdFauMzUxb048nh5l52zdrAznek',
754        nargs='?',
755        help='ID of the spreadsheet to write stats to. '
756        "[default='1uttk1z8lJ4ZsUY7wMdFauMzUxb048nh5l52zdrAznek']")
757    parser.add_argument(
758        '--verbosity',
759        default='INFO',
760        nargs='?',
761        help='Verbosity of output. Valid options are '
762        '[DEBUG, INFO, WARNING, ERROR]. '
763        '[default=INFO]')
764    return parser.parse_args()
765
766
767# Set up the logging with the right verbosity and output.
768def initialize_logging(verbosity):
769    handler = logging.StreamHandler()
770    formatter = logging.Formatter(fmt='%(levelname)s: %(message)s')
771    handler.setFormatter(formatter)
772    LOGGER.addHandler(handler)
773    if 'DEBUG' in verbosity:
774        LOGGER.setLevel(level=logging.DEBUG)
775    elif 'INFO' in verbosity:
776        LOGGER.setLevel(level=logging.INFO)
777    elif 'WARNING' in verbosity:
778        LOGGER.setLevel(level=logging.WARNING)
779    elif 'ERROR' in verbosity:
780        LOGGER.setLevel(level=logging.ERROR)
781    else:
782        LOGGER.setLevel(level=logging.INFO)
783
784
785def main():
786    os.chdir(ROOT_DIR)
787    args = parse_args()
788    verbosity = args.verbosity.strip().upper()
789    initialize_logging(verbosity)
790    auth_path = args.auth_path.replace('\\', '/')
791    try:
792        service = get_sheets_service(auth_path)
793    except Exception as error:
794        LOGGER.error('%s\n' % str(error))
795        exit(1)
796
797    info = {}
798    LOGGER.info('Building info struct...')
799    for bot_name in BOT_NAMES:
800        LOGGER.info("Parsing bot '" + bot_name + "'...")
801        try:
802            info[bot_name] = get_bot_info(BOT_NAME_PREFIX + bot_name)
803        except Exception as error:
804            LOGGER.error('%s\n' % str(error))
805
806    LOGGER.info('Updating sheets...')
807    try:
808        update_spreadsheet(service, args.spreadsheet, info)
809    except Exception as error:
810        LOGGER.error('%s\n' % str(error))
811        quit(1)
812
813    LOGGER.info('Info was successfully parsed to sheet: https://docs.google.com/spreadsheets/d/' +
814                args.spreadsheet)
815
816
817if __name__ == '__main__':
818    sys.exit(main())
819