xref: /aosp_15_r20/external/cronet/testing/flake_suppressor_common/expectations.py (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1# Copyright 2021 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4"""Module for interacting with expectation files."""
5
6import base64
7import collections
8from datetime import timedelta, date
9import itertools
10import os
11import posixpath
12import re
13from typing import Dict, List, Set, Tuple, Union
14import urllib.request
15
16from flake_suppressor_common import common_typing as ct
17
18from typ import expectations_parser
19
20CHROMIUM_SRC_DIR = os.path.realpath(
21    os.path.join(os.path.dirname(__file__), '..', '..'))
22GITILES_URL = 'https://chromium.googlesource.com/chromium/src/+/refs/heads/main'
23TEXT_FORMAT_ARG = '?format=TEXT'
24
25TAG_GROUP_REGEX = re.compile(r'# tags: \[([^\]]*)\]', re.MULTILINE | re.DOTALL)
26
27TestToUrlsType = Dict[str, List[str]]
28SuiteToTestsType = Dict[str, TestToUrlsType]
29TagOrderedAggregateResultType = Dict[ct.TagTupleType, SuiteToTestsType]
30
31
32def OverFailedBuildThreshold(failed_result_tuple_list: List[ct.ResultTupleType],
33                             build_fail_total_number_threshold: int) -> bool:
34  """Check if the number of failed build in |failed_result_tuple_list| is
35     equal to or more than |build_fail_total_number_threshold|.
36
37  Args:
38    failed_result_tuple_list: A list of ct.ResultTupleType failed test results.
39    build_fail_total_number_threshold: Threshold base on the number of failed
40      build caused by a test.
41
42  Returns:
43      Whether number of failed build in |failed_result_tuple_list| is equal to
44      or more than |build_fail_total_number_threshold|.
45  """
46  unique_build_ids = set()
47  for result in failed_result_tuple_list:
48    if '/' in result.build_url:
49      unique_build_ids.add(result.build_url.split('/')[-1])
50      if len(unique_build_ids) >= build_fail_total_number_threshold:
51        return True
52  return False
53
54
55def OverFailedBuildByConsecutiveDayThreshold(
56    failed_result_tuple_list: List[ct.ResultTupleType],
57    build_fail_consecutive_day_threshold: int) -> bool:
58  """Check if the max number of build fail in consecutive date
59     is equal to or more than |build_fail_consecutive_day_threshold|.
60
61  Args:
62    failed_result_tuple_list: A list of ct.ResultTupleType failed test result.
63    build_fail_consecutive_day_threshold: Threshold base on the number of
64      consecutive days that a test caused build fail.
65
66  Returns:
67      Whether the max number of build fail in consecutive date
68      is equal to or more than |build_fail_consecutive_day_threshold|.
69  """
70  dates = {t.date: False for t in failed_result_tuple_list}
71
72  for cur_date, is_checked in dates.items():
73    # A beginning point.
74    if not is_checked:
75      count = 1
76
77      while count < build_fail_consecutive_day_threshold:
78        new_date = cur_date + timedelta(days=count)
79        if new_date in dates:
80          count += 1
81          # Mark checked date.
82          dates[new_date] = True
83        else:
84          break
85
86      if count >= build_fail_consecutive_day_threshold:
87        return True
88
89  return False
90
91
92def FailedBuildWithinRecentDayThreshold(
93    failed_result_tuple_list: List[ct.ResultTupleType],
94    build_fail_recent_day_threshold: int) -> bool:
95  """Check if there are any failed builds within the most
96    recent |build_fail_latest_day_threshold| days.
97
98  Args:
99    failed_result_tuple_list: A list of ct.ResultTupleType failed test result.
100    build_fail_recent_day_threshold: Threshold base on the recent day range
101      that the test caused build fail.
102
103  Returns:
104      Whether the test caused build fail within the recent day.
105  """
106  recent_check_day = date.today() - timedelta(
107      days=build_fail_recent_day_threshold)
108  for test in failed_result_tuple_list:
109    if test.date >= recent_check_day:
110      return True
111  return False
112
113
114class ExpectationProcessor():
115  # pylint: disable=too-many-locals
116  def IterateThroughResultsForUser(self, result_map: ct.AggregatedResultsType,
117                                   group_by_tags: bool,
118                                   include_all_tags: bool) -> None:
119    """Iterates over |result_map| for the user to provide input.
120
121    For each unique result, user will be able to decide whether to ignore it (do
122    nothing), mark as flaky (add RetryOnFailure expectation), or mark as failing
123    (add Failure expectation). If the latter two are chosen, they can also
124    associate a bug with the new expectation.
125
126    Args:
127      result_map: Aggregated query results from results.AggregateResults to
128          iterate over.
129      group_by_tags: A boolean denoting whether to attempt to group expectations
130          by tags or not. If True, expectations will be added after an existing
131          expectation whose tags are the largest subset of the produced tags. If
132          False, new expectations will be appended to the end of the file.
133      include_all_tags: A boolean denoting whether all tags should be used for
134          expectations or only the most specific ones.
135    """
136    typ_tag_ordered_result_map = self._ReorderMapByTypTags(result_map)
137    for suite, test_map in result_map.items():
138      if self.IsSuiteUnsupported(suite):
139        continue
140      for test, tag_map in test_map.items():
141        for typ_tags, build_url_list in tag_map.items():
142
143          print('')
144          print('Suite: %s' % suite)
145          print('Test: %s' % test)
146          print('Configuration:\n    %s' % '\n    '.join(typ_tags))
147          print('Failed builds:\n    %s' % '\n    '.join(build_url_list))
148
149          other_failures_for_test = self.FindFailuresInSameTest(
150              result_map, suite, test, typ_tags)
151          if other_failures_for_test:
152            print('Other failures in same test found on other configurations')
153            for (tags, failure_count) in other_failures_for_test:
154              print('    %d failures on %s' % (failure_count, ' '.join(tags)))
155
156          other_failures_for_config = self.FindFailuresInSameConfig(
157              typ_tag_ordered_result_map, suite, test, typ_tags)
158          if other_failures_for_config:
159            print('Other failures on same configuration found in other tests')
160            for (name, failure_count) in other_failures_for_config:
161              print('    %d failures in %s' % (failure_count, name))
162
163          expected_result, bug = self.PromptUserForExpectationAction()
164          if not expected_result:
165            continue
166
167          self.ModifyFileForResult(suite, test, typ_tags, bug, expected_result,
168                                   group_by_tags, include_all_tags)
169
170  # pylint: enable=too-many-locals
171
172  # pylint: disable=too-many-locals,too-many-arguments
173  def IterateThroughResultsWithThresholds(
174      self, result_map: ct.AggregatedResultsType, group_by_tags: bool,
175      result_counts: ct.ResultCountType, ignore_threshold: float,
176      flaky_threshold: float, include_all_tags: bool) -> None:
177    """Iterates over |result_map| and generates expectations based off
178       thresholds.
179
180    Args:
181      result_map: Aggregated query results from results.AggregateResults to
182          iterate over.
183      group_by_tags: A boolean denoting whether to attempt to group expectations
184          by tags or not. If True, expectations will be added after an existing
185          expectation whose tags are the largest subset of the produced tags. If
186          False, new expectations will be appended to the end of the file.
187      result_counts: A dict in the format output by queries.GetResultCounts.
188      ignore_threshold: A float containing the fraction of failed tests under
189          which failures will be ignored.
190      flaky_threshold: A float containing the fraction of failed tests under
191          which failures will be suppressed with RetryOnFailure and above which
192          will be suppressed with Failure.
193      include_all_tags: A boolean denoting whether all tags should be used for
194          expectations or only the most specific ones.
195    """
196    assert isinstance(ignore_threshold, float)
197    assert isinstance(flaky_threshold, float)
198    for suite, test_map in result_map.items():
199      if self.IsSuiteUnsupported(suite):
200        continue
201      for test, tag_map in test_map.items():
202        for typ_tags, build_url_list in tag_map.items():
203          failure_count = len(build_url_list)
204          total_count = result_counts[typ_tags][test]
205          fraction = failure_count / total_count
206          if fraction < ignore_threshold:
207            continue
208          expected_result = self.GetExpectedResult(fraction, flaky_threshold)
209          if expected_result:
210            self.ModifyFileForResult(suite, test, typ_tags, '', expected_result,
211                                     group_by_tags, include_all_tags)
212
213  def CreateExpectationsForAllResults(
214      self, result_map: ct.AggregatedStatusResultsType, group_by_tags: bool,
215      include_all_tags: bool, build_fail_total_number_threshold: int,
216      build_fail_consecutive_day_threshold: int,
217      build_fail_recent_day_threshold: int) -> None:
218    """Iterates over |result_map|, selects tests that hit all
219       build-fail*-thresholds and adds expectations for their results. Same
220       test in all builders that caused build fail must be over all threshold
221       requirement.
222
223    Args:
224      result_map: Aggregated query results from results.AggregateResults to
225          iterate over.
226      group_by_tags: A boolean denoting whether to attempt to group expectations
227          by tags or not. If True, expectations will be added after an existing
228          expectation whose tags are the largest subset of the produced tags. If
229          False, new expectations will be appended to the end of the file.
230      include_all_tags: A boolean denoting whether all tags should be used for
231          expectations or only the most specific ones.
232      build_fail_total_number_threshold: Threshold based on the number of
233          failed builds caused by a test. Add to the expectations, if actual
234          is equal to or more than this threshold. All build-fail*-thresholds
235          must be hit in order for a test to actually be suppressed.
236      build_fail_consecutive_day_threshold: Threshold based on the number of
237          consecutive days that a test caused build fail. Add to the
238          expectations, if the consecutive days that it caused build fail
239          are equal to or more than this. All build-fail*-thresholds
240          must be hit in order for a test to actually be suppressed.
241      build_fail_recent_day_threshold: How many days worth of recent builds
242          to check for non-hidden failures. A test will be suppressed if
243          it has non-hidden failures within this time span. All
244          build-fail*-thresholds must be hit in order for a test to actually
245          be suppressed.
246    """
247    for suite, test_map in result_map.items():
248      if self.IsSuiteUnsupported(suite):
249        continue
250      for test, tag_map in test_map.items():
251        # Same test in all builders that caused build fail must be over all
252        # threshold requirement.
253        all_results = list(itertools.chain(*tag_map.values()))
254        if (not OverFailedBuildThreshold(all_results,
255                                         build_fail_total_number_threshold)
256            or not OverFailedBuildByConsecutiveDayThreshold(
257                all_results, build_fail_consecutive_day_threshold)):
258          continue
259        for typ_tags, result_tuple_list in tag_map.items():
260          if not FailedBuildWithinRecentDayThreshold(
261              result_tuple_list, build_fail_recent_day_threshold):
262            continue
263          status = set()
264          for test_result in result_tuple_list:
265            # Should always add a pass to all flaky web tests in
266            # TestsExpectation that have passed runs.
267            status.add('Pass')
268            if test_result.status == ct.ResultStatus.CRASH:
269              status.add('Crash')
270            elif test_result.status == ct.ResultStatus.FAIL:
271              status.add('Failure')
272            elif test_result.status == ct.ResultStatus.ABORT:
273              status.add('Timeout')
274          if status:
275            status_list = list(status)
276            status_list.sort()
277            self.ModifyFileForResult(suite, test, typ_tags, '',
278                                     ' '.join(status_list), group_by_tags,
279                                     include_all_tags)
280
281  # pylint: enable=too-many-locals,too-many-arguments
282
283  def FindFailuresInSameTest(self, result_map: ct.AggregatedResultsType,
284                             target_suite: str, target_test: str,
285                             target_typ_tags: ct.TagTupleType
286                             ) -> List[Tuple[ct.TagTupleType, int]]:
287    """Finds all other failures that occurred in the given test.
288
289    Ignores the failures for the test on the same configuration.
290
291    Args:
292      result_map: Aggregated query results from results.AggregateResults.
293      target_suite: A string containing the test suite being checked.
294      target_test: A string containing the target test case being checked.
295      target_typ_tags: A tuple of strings containing the typ tags that the
296          failure took place on.
297
298    Returns:
299      A list of tuples (typ_tags, count). |typ_tags| is a list of strings
300      defining a configuration the specified test failed on. |count| is how many
301      times the test failed on that configuration.
302    """
303    assert isinstance(target_typ_tags, tuple)
304    other_failures = []
305    tag_map = result_map.get(target_suite, {}).get(target_test, {})
306    for typ_tags, build_url_list in tag_map.items():
307      if typ_tags == target_typ_tags:
308        continue
309      other_failures.append((typ_tags, len(build_url_list)))
310    return other_failures
311
312  def FindFailuresInSameConfig(
313      self, typ_tag_ordered_result_map: TagOrderedAggregateResultType,
314      target_suite: str, target_test: str,
315      target_typ_tags: ct.TagTupleType) -> List[Tuple[str, int]]:
316    """Finds all other failures that occurred on the given configuration.
317
318    Ignores the failures for the given test on the given configuration.
319
320    Args:
321      typ_tag_ordered_result_map: Aggregated query results from
322          results.AggregateResults that have been reordered using
323          _ReorderMapByTypTags.
324      target_suite: A string containing the test suite the original failure was
325          found in.
326      target_test: A string containing the test case the original failure was
327          found in.
328      target_typ_tags: A tuple of strings containing the typ tags defining the
329          configuration to find failures for.
330
331    Returns:
332      A list of tuples (full_name, count). |full_name| is a string containing a
333      test suite and test case concatenated together. |count| is how many times
334      |full_name| failed on the configuration specified by |target_typ_tags|.
335    """
336    assert isinstance(target_typ_tags, tuple)
337    other_failures = []
338    suite_map = typ_tag_ordered_result_map.get(target_typ_tags, {})
339    for suite, test_map in suite_map.items():
340      for test, build_url_list in test_map.items():
341        if suite == target_suite and test == target_test:
342          continue
343        full_name = '%s.%s' % (suite, test)
344        other_failures.append((full_name, len(build_url_list)))
345    return other_failures
346
347  def _ReorderMapByTypTags(self, result_map: ct.AggregatedResultsType
348                           ) -> TagOrderedAggregateResultType:
349    """Rearranges|result_map| to use typ tags as the top level keys.
350
351    Args:
352      result_map: Aggregated query results from results.AggregateResults
353
354    Returns:
355      A dict containing the same contents as |result_map|, but in the following
356      format:
357      {
358        typ_tags (tuple of str): {
359          suite (str): {
360            test (str): build_url_list (list of str),
361          },
362        },
363      }
364    """
365    reordered_map = {}
366    for suite, test_map in result_map.items():
367      for test, tag_map in test_map.items():
368        for typ_tags, build_url_list in tag_map.items():
369          reordered_map.setdefault(typ_tags,
370                                   {}).setdefault(suite,
371                                                  {})[test] = build_url_list
372    return reordered_map
373
374  def PromptUserForExpectationAction(
375      self) -> Union[Tuple[str, str], Tuple[None, None]]:
376    """Prompts the user on what to do to handle a failure.
377
378    Returns:
379      A tuple (expected_result, bug). |expected_result| is a string containing
380      the expected result to use for the expectation, e.g. RetryOnFailure. |bug|
381      is a string containing the bug to use for the expectation. If the user
382      chooses to ignore the failure, both will be None. Otherwise, both are
383      filled, although |bug| may be an empty string if no bug is provided.
384    """
385    prompt = ('How should this failure be handled? (i)gnore/(r)etry on '
386              'failure/(f)ailure: ')
387    valid_inputs = ['f', 'i', 'r']
388    response = input(prompt).lower()
389    while response not in valid_inputs:
390      print('Invalid input, valid inputs are %s' % (', '.join(valid_inputs)))
391      response = input(prompt).lower()
392
393    if response == 'i':
394      return (None, None)
395    expected_result = 'RetryOnFailure' if response == 'r' else 'Failure'
396
397    prompt = ('What is the bug URL that should be associated with this '
398              'expectation? E.g. crbug.com/1234. ')
399    response = input(prompt)
400    return (expected_result, response)
401
402  # pylint: disable=too-many-locals,too-many-arguments
403  def ModifyFileForResult(self, suite: str, test: str,
404                          typ_tags: ct.TagTupleType, bug: str,
405                          expected_result: str, group_by_tags: bool,
406                          include_all_tags: bool) -> None:
407    """Adds an expectation to the appropriate expectation file.
408
409    Args:
410      suite: A string containing the suite the failure occurred in.
411      test: A string containing the test case the failure occurred in.
412      typ_tags: A tuple of strings containing the typ tags the test produced.
413      bug: A string containing the bug to associate with the new expectation.
414      expected_result: A string containing the expected result to use for the
415          new expectation, e.g. RetryOnFailure.
416      group_by_tags: A boolean denoting whether to attempt to group expectations
417          by tags or not. If True, expectations will be added after an existing
418          expectation whose tags are the largest subset of the produced tags. If
419          False, new expectations will be appended to the end of the file.
420      include_all_tags: A boolean denoting whether all tags should be used for
421          expectations or only the most specific ones.
422    """
423    expectation_file = self.GetExpectationFileForSuite(suite, typ_tags)
424    if not include_all_tags:
425      typ_tags = self.FilterToMostSpecificTypTags(typ_tags, expectation_file)
426    bug = '%s ' % bug if bug else bug
427
428    def AppendExpectationToEnd():
429      expectation_line = '%s[ %s ] %s [ %s ]\n' % (bug, ' '.join(
430          self.ProcessTypTagsBeforeWriting(typ_tags)), test, expected_result)
431      with open(expectation_file, 'a') as outfile:
432        outfile.write(expectation_line)
433
434    if group_by_tags:
435      insertion_line, best_matching_tags = (
436          self.FindBestInsertionLineForExpectation(typ_tags, expectation_file))
437      if insertion_line == -1:
438        AppendExpectationToEnd()
439      else:
440        # If we've already filtered tags, then use those instead of the "best
441        # matching" ones.
442        tags_to_use = best_matching_tags
443        if not include_all_tags:
444          tags_to_use = typ_tags
445        # enumerate starts at 0 but line numbers start at 1.
446        insertion_line -= 1
447        tags_to_use = list(self.ProcessTypTagsBeforeWriting(tags_to_use))
448        tags_to_use.sort()
449        expectation_line = '%s[ %s ] %s [ %s ]\n' % (bug, ' '.join(tags_to_use),
450                                                     test, expected_result)
451        with open(expectation_file) as infile:
452          input_contents = infile.read()
453        output_contents = ''
454        for lineno, line in enumerate(input_contents.splitlines(True)):
455          output_contents += line
456          if lineno == insertion_line:
457            output_contents += expectation_line
458        with open(expectation_file, 'w') as outfile:
459          outfile.write(output_contents)
460    else:
461      AppendExpectationToEnd()
462
463  # pylint: enable=too-many-locals,too-many-arguments
464
465  # pylint: disable=too-many-locals
466  def FilterToMostSpecificTypTags(self, typ_tags: ct.TagTupleType,
467                                  expectation_file: str) -> ct.TagTupleType:
468    """Filters |typ_tags| to the most specific set.
469
470    Assumes that the tags in |expectation_file| are ordered from least specific
471    to most specific within each tag group.
472
473    Args:
474      typ_tags: A tuple of strings containing the typ tags the test produced.
475      expectation_file: A string containing a filepath pointing to the
476          expectation file to filter tags with.
477
478    Returns:
479      A tuple containing the contents of |typ_tags| with only the most specific
480      tag from each tag group remaining.
481    """
482    with open(expectation_file) as infile:
483      contents = infile.read()
484
485    tag_groups = self.GetTagGroups(contents)
486    num_matches = 0
487    tags_in_same_group = collections.defaultdict(list)
488    for tag in typ_tags:
489      for index, tag_group in enumerate(tag_groups):
490        if tag in tag_group:
491          tags_in_same_group[index].append(tag)
492          num_matches += 1
493          break
494    if num_matches != len(typ_tags):
495      all_tags = set()
496      for group in tag_groups:
497        all_tags |= set(group)
498      raise RuntimeError('Found tags not in expectation file: %s' %
499                         ' '.join(set(typ_tags) - all_tags))
500
501    filtered_tags = []
502    for index, tags in tags_in_same_group.items():
503      if len(tags) == 1:
504        filtered_tags.append(tags[0])
505      else:
506        tag_group = tag_groups[index]
507        best_index = -1
508        for t in tags:
509          i = tag_group.index(t)
510          if i > best_index:
511            best_index = i
512        filtered_tags.append(tag_group[best_index])
513
514    # Sort to keep order consistent with what we were given.
515    filtered_tags.sort()
516    return tuple(filtered_tags)
517
518  # pylint: enable=too-many-locals
519
520  def FindBestInsertionLineForExpectation(self, typ_tags: ct.TagTupleType,
521                                          expectation_file: str
522                                          ) -> Tuple[int, Set[str]]:
523    """Finds the best place to insert an expectation when grouping by tags.
524
525    Args:
526      typ_tags: A tuple of strings containing typ tags that were produced by the
527          failing test.
528      expectation_file: A string containing a filepath to the expectation file
529      to use.
530
531    Returns:
532      A tuple (insertion_line, best_matching_tags). |insertion_line| is an int
533      specifying the line number to insert the expectation into.
534      |best_matching_tags| is a set containing the tags of an existing
535      expectation that was found to be the closest match. If no appropriate
536      line is found, |insertion_line| is -1 and |best_matching_tags| is empty.
537    """
538    best_matching_tags = set()
539    best_insertion_line = -1
540    with open(expectation_file) as f:
541      content = f.read()
542    list_parser = expectations_parser.TaggedTestListParser(content)
543    for e in list_parser.expectations:
544      expectation_tags = e.tags
545      if not expectation_tags.issubset(typ_tags):
546        continue
547      if len(expectation_tags) > len(best_matching_tags):
548        best_matching_tags = expectation_tags
549        best_insertion_line = e.lineno
550      elif len(expectation_tags) == len(best_matching_tags):
551        if best_insertion_line < e.lineno:
552          best_insertion_line = e.lineno
553    return best_insertion_line, best_matching_tags
554
555  def GetOriginExpectationFileContents(self) -> Dict[str, str]:
556    """Gets expectation file contents from origin/main.
557
558    Returns:
559      A dict of expectation file name (str) -> expectation file contents (str)
560      that are available on origin/main. File paths are relative to the
561      Chromium src dir and are OS paths.
562    """
563    # Get the path to the expectation file directory in gitiles, i.e. the POSIX
564    # path relative to the Chromium src directory.
565    origin_file_contents = {}
566    expectation_files = self.ListOriginExpectationFiles()
567    for f in expectation_files:
568      filepath_posix = f.replace(os.sep, '/')
569      origin_filepath_url = posixpath.join(GITILES_URL,
570                                           filepath_posix) + TEXT_FORMAT_ARG
571      response = urllib.request.urlopen(origin_filepath_url).read()
572      decoded_text = base64.b64decode(response).decode('utf-8')
573      # After the URL access maintain all the paths as os paths.
574      origin_file_contents[f] = decoded_text
575
576    return origin_file_contents
577
578  def GetLocalCheckoutExpectationFileContents(self) -> Dict[str, str]:
579    """Gets expectation file contents from the local checkout.
580
581    Returns:
582      A dict of expectation file name (str) -> expectation file contents (str)
583      that are available from the local checkout. File paths are relative to
584      the Chromium src dir and are OS paths.
585    """
586    local_file_contents = {}
587    expectation_files = self.ListLocalCheckoutExpectationFiles()
588    for f in expectation_files:
589      absolute_filepath = os.path.join(CHROMIUM_SRC_DIR, f)
590      with open(absolute_filepath) as infile:
591        local_file_contents[f] = infile.read()
592    return local_file_contents
593
594  def AssertCheckoutIsUpToDate(self) -> None:
595    """Confirms that the local checkout's expectations are up to date."""
596    origin_file_contents = self.GetOriginExpectationFileContents()
597    local_file_contents = self.GetLocalCheckoutExpectationFileContents()
598    if origin_file_contents != local_file_contents:
599      raise RuntimeError(
600          'Local Chromium checkout expectations are out of date. Please '
601          'perform a `git pull`.')
602
603  def GetExpectationFileForSuite(self, suite: str,
604                                 typ_tags: ct.TagTupleType) -> str:
605    """Finds the correct expectation file for the given suite.
606
607    Args:
608      suite: A string containing the test suite to look for.
609      typ_tags: A tuple of strings containing typ tags that were produced by
610          the failing test.
611
612    Returns:
613      A string containing a filepath to the correct expectation file for
614      |suite|and |typ_tags|.
615    """
616    raise NotImplementedError
617
618  def ListGitilesDirectory(self, origin_dir: str) -> List[str]:
619    """Gets the list of all files from origin/main under origin_dir.
620
621    Args:
622      origin_dir: A string containing the path to the directory containing
623      expectation files. Path is relative to the Chromium src dir.
624
625    Returns:
626      A list of filename strings under origin_dir.
627    """
628    origin_dir_url = posixpath.join(GITILES_URL, origin_dir) + TEXT_FORMAT_ARG
629    response = urllib.request.urlopen(origin_dir_url).read()
630    # Response is a base64 encoded, newline-separated list of files in the
631    # directory in the format: `mode file_type hash name`
632    files = []
633    decoded_text = base64.b64decode(response).decode('utf-8')
634    for line in decoded_text.splitlines():
635      files.append(line.split()[-1])
636    return files
637
638  def IsSuiteUnsupported(self, suite: str) -> bool:
639    raise NotImplementedError
640
641  def ListLocalCheckoutExpectationFiles(self) -> List[str]:
642    """Finds the list of all expectation files from the local checkout.
643
644    Returns:
645      A list of strings containing relative file paths to expectation files.
646      OS paths relative to Chromium src dir are returned.
647    """
648    raise NotImplementedError
649
650  def ListOriginExpectationFiles(self) -> List[str]:
651    """Finds the list of all expectation files from origin/main.
652
653    Returns:
654      A list of strings containing relative file paths to expectation files.
655      OS paths are relative to Chromium src directory.
656    """
657    raise NotImplementedError
658
659  def GetTagGroups(self, contents: str) -> List[List[str]]:
660    tag_groups = []
661    for match in TAG_GROUP_REGEX.findall(contents):
662      tag_groups.append(match.strip().replace('#', '').split())
663    return tag_groups
664
665  def GetExpectedResult(self, fraction: float, flaky_threshold: float) -> str:
666    raise NotImplementedError
667
668  def ProcessTypTagsBeforeWriting(self,
669                                  typ_tags: ct.TagTupleType) -> ct.TagTupleType:
670    return typ_tags
671