1# Copyright 2023 The Chromium Authors 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4"""Python module to find feature names in source code. 5 6These functions are declared in a separate module to allow multiprocessing to 7correctly unpickle the called functions again. 8""" 9 10import glob 11import itertools 12import multiprocessing 13import pathlib 14import re 15 16BASE_FEATURE_PATTERN = br"BASE_FEATURE\((.*?),(.*?),(.*?)\);" 17BASE_FEATURE_RE = re.compile(BASE_FEATURE_PATTERN, flags=re.MULTILINE+re.DOTALL) 18 19# Only search these directories for flags. If your flag is outside these root 20# directories, then add the directory here. 21DIRECTORIES_TO_SEARCH = [ 22 "android_webview", 23 "apps", 24 "ash", 25 "base", 26 "cc", 27 "chrome", 28 "chromecast", 29 "chromeos", 30 "clank", 31 "components", 32 "content", 33 "courgette", 34 "crypto", 35 "dbus", 36 "device", 37 "extensions", 38 "fuchsia_web", 39 "gin", 40 "google_apis", 41 "google_update", 42 "gpu", 43 "headless", 44 "infra", 45 "internal", 46 "ios", 47 "ipc", 48 "media", 49 "mojo", 50 "native_client", 51 "native_client_sdk", 52 "net", 53 "pdf", 54 "ppapi", 55 "printing", 56 "remoting", 57 "rlz", 58 "sandbox", 59 "services", 60 "skia", 61 "sql", 62 "storage", 63 # third_party/blink handled separately in FindDeclaredFeatures 64 "ui", 65 "url", 66 "v8", 67 "webkit", 68 "weblayer", 69] 70 71def _FindFeaturesInFile(filepath): 72 # Work on bytes to avoid utf-8 decode errors outside feature declarations 73 file_contents = pathlib.Path(filepath).read_bytes() 74 matches = BASE_FEATURE_RE.finditer(file_contents) 75 # Remove whitespace and surrounding " from the second argument 76 # which is the feature name. 77 return [m.group(2).strip().strip(b'"').decode("utf-8") for m in matches] 78 79 80def FindDeclaredFeatures(input_api): 81 """Finds all declared feature names in the source code. 82 83 This function will scan all *.cc and *.mm files and look for features 84 defined with the BASE_FEATURE macro. It will extract the feature names. 85 86 Args: 87 input_api: InputApi instance for opening files 88 Returns: 89 Set of defined feature names in the source tree. 90 """ 91 # Features are supposed to be defined in .cc files. 92 # Iterate over the search folders in the root. 93 root = pathlib.Path(input_api.change.RepositoryRoot()) 94 glob_patterns = [str(p / pathlib.Path("**/*.cc")) for p in root.iterdir() if 95 p.is_dir() and p.name in DIRECTORIES_TO_SEARCH] 96 97 # blink is the only directory in third_party that should be searched. 98 blink_glob = str(root / pathlib.Path("third_party/blink/**/*.cc")) 99 glob_patterns.append(blink_glob) 100 101 # Additional features for iOS can be found in mm files in the ios directory. 102 mm_glob = str(root / pathlib.Path("ios/**/*.mm")) 103 glob_patterns.append(mm_glob) 104 105 # Create glob iterators that lazily go over the files to search 106 glob_iterators = [glob.iglob(pattern, recursive=True) for pattern in 107 glob_patterns] 108 109 # Limit to 4 processes - the disk accesses becomes a bottleneck with just a 110 # few processes, but splitting the searching across multiple CPUs does yield 111 # a benefit of a few seconds. 112 # The exact batch size does not seem to matter much, as long as it is >> 1. 113 pool = multiprocessing.Pool(4) 114 found_features = pool.imap_unordered(_FindFeaturesInFile, 115 itertools.chain(*glob_iterators), 1000) 116 pool.close() 117 pool.join() 118 119 feature_names = set() 120 for feature_list in found_features: 121 feature_names.update(feature_list) 122 return feature_names 123