1# Copyright 2023 The Bazel Authors. All rights reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15import argparse 16import json 17import pathlib 18import re 19import sys 20import zipfile 21 22 23# Generator is the modules_mapping.json file generator. 24class Generator: 25 stderr = None 26 output_file = None 27 excluded_patterns = None 28 mapping = {} 29 30 def __init__(self, stderr, output_file, excluded_patterns): 31 self.stderr = stderr 32 self.output_file = output_file 33 self.excluded_patterns = [re.compile(pattern) for pattern in excluded_patterns] 34 35 # dig_wheel analyses the wheel .whl file determining the modules it provides 36 # by looking at the directory structure. 37 def dig_wheel(self, whl): 38 with zipfile.ZipFile(whl, "r") as zip_file: 39 for path in zip_file.namelist(): 40 if is_metadata(path): 41 if data_has_purelib_or_platlib(path): 42 self.module_for_path(path, whl) 43 else: 44 continue 45 else: 46 self.module_for_path(path, whl) 47 48 def simplify(self): 49 simplified = {} 50 for module, wheel_name in sorted(self.mapping.items(), key=lambda x: x[0]): 51 mod = module 52 while True: 53 if mod in simplified: 54 if simplified[mod] != wheel_name: 55 break 56 wheel_name = "" 57 break 58 if mod.count(".") == 0: 59 break 60 mod = mod.rsplit(".", 1)[0] 61 if wheel_name: 62 simplified[module] = wheel_name 63 self.mapping = simplified 64 65 def module_for_path(self, path, whl): 66 ext = pathlib.Path(path).suffix 67 if ext == ".py" or ext == ".so": 68 if "purelib" in path or "platlib" in path: 69 root = "/".join(path.split("/")[2:]) 70 else: 71 root = path 72 73 wheel_name = get_wheel_name(whl) 74 75 if root.endswith("/__init__.py"): 76 # Note the '/' here means that the __init__.py is not in the 77 # root of the wheel, therefore we can index the directory 78 # where this file is as an importable package. 79 module = root[: -len("/__init__.py")].replace("/", ".") 80 if not self.is_excluded(module): 81 self.mapping[module] = wheel_name 82 83 # Always index the module file. 84 if ext == ".so": 85 # Also remove extra metadata that is embeded as part of 86 # the file name as an extra extension. 87 ext = "".join(pathlib.Path(root).suffixes) 88 module = root[: -len(ext)].replace("/", ".") 89 if not self.is_excluded(module): 90 if not self.is_excluded(module): 91 self.mapping[module] = wheel_name 92 93 def is_excluded(self, module): 94 for pattern in self.excluded_patterns: 95 if pattern.search(module): 96 return True 97 return False 98 99 # run is the entrypoint for the generator. 100 def run(self, wheels): 101 for whl in wheels: 102 try: 103 self.dig_wheel(whl) 104 except AssertionError as error: 105 print(error, file=self.stderr) 106 return 1 107 self.simplify() 108 mapping_json = json.dumps(self.mapping) 109 with open(self.output_file, "w") as f: 110 f.write(mapping_json) 111 return 0 112 113 114def get_wheel_name(path): 115 pp = pathlib.PurePath(path) 116 if pp.suffix != ".whl": 117 raise RuntimeError( 118 "{} is not a valid wheel file name: the wheel doesn't follow ".format( 119 pp.name 120 ) 121 + "https://www.python.org/dev/peps/pep-0427/#file-name-convention" 122 ) 123 return pp.name[: pp.name.find("-")] 124 125 126# is_metadata checks if the path is in a metadata directory. 127# Ref: https://www.python.org/dev/peps/pep-0427/#file-contents. 128def is_metadata(path): 129 top_level = path.split("/")[0].lower() 130 return top_level.endswith(".dist-info") or top_level.endswith(".data") 131 132 133# The .data is allowed to contain a full purelib or platlib directory 134# These get unpacked into site-packages, so require indexing too. 135# This is the same if "Root-Is-Purelib: true" is set and the files are at the root. 136# Ref: https://peps.python.org/pep-0427/#what-s-the-deal-with-purelib-vs-platlib 137def data_has_purelib_or_platlib(path): 138 maybe_lib = path.split("/")[1].lower() 139 return is_metadata(path) and (maybe_lib == "purelib" or maybe_lib == "platlib") 140 141 142if __name__ == "__main__": 143 parser = argparse.ArgumentParser( 144 prog="generator", 145 description="Generates the modules mapping used by the Gazelle manifest.", 146 ) 147 parser.add_argument("--output_file", type=str) 148 parser.add_argument("--exclude_patterns", nargs="+", default=[]) 149 parser.add_argument("--wheels", nargs="+", default=[]) 150 args = parser.parse_args() 151 generator = Generator(sys.stderr, args.output_file, args.exclude_patterns) 152 exit(generator.run(args.wheels)) 153