1# Copyright 2018 The Bazel Authors. All rights reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15"""Runfiles lookup library for Bazel-built Python binaries and tests. 16 17See @rules_python//python/runfiles/README.md for usage instructions. 18""" 19import inspect 20import os 21import posixpath 22import sys 23from typing import Dict, Optional, Tuple, Union 24 25 26class _ManifestBased: 27 """`Runfiles` strategy that parses a runfiles-manifest to look up runfiles.""" 28 29 def __init__(self, path: str) -> None: 30 if not path: 31 raise ValueError() 32 if not isinstance(path, str): 33 raise TypeError() 34 self._path = path 35 self._runfiles = _ManifestBased._LoadRunfiles(path) 36 37 def RlocationChecked(self, path: str) -> Optional[str]: 38 """Returns the runtime path of a runfile.""" 39 exact_match = self._runfiles.get(path) 40 if exact_match: 41 return exact_match 42 # If path references a runfile that lies under a directory that 43 # itself is a runfile, then only the directory is listed in the 44 # manifest. Look up all prefixes of path in the manifest and append 45 # the relative path from the prefix to the looked up path. 46 prefix_end = len(path) 47 while True: 48 prefix_end = path.rfind("/", 0, prefix_end - 1) 49 if prefix_end == -1: 50 return None 51 prefix_match = self._runfiles.get(path[0:prefix_end]) 52 if prefix_match: 53 return prefix_match + "/" + path[prefix_end + 1 :] 54 55 @staticmethod 56 def _LoadRunfiles(path: str) -> Dict[str, str]: 57 """Loads the runfiles manifest.""" 58 result = {} 59 with open(path, "r") as f: 60 for line in f: 61 line = line.strip() 62 if line: 63 tokens = line.split(" ", 1) 64 if len(tokens) == 1: 65 result[line] = line 66 else: 67 result[tokens[0]] = tokens[1] 68 return result 69 70 def _GetRunfilesDir(self) -> str: 71 if self._path.endswith("/MANIFEST") or self._path.endswith("\\MANIFEST"): 72 return self._path[: -len("/MANIFEST")] 73 if self._path.endswith(".runfiles_manifest"): 74 return self._path[: -len("_manifest")] 75 return "" 76 77 def EnvVars(self) -> Dict[str, str]: 78 directory = self._GetRunfilesDir() 79 return { 80 "RUNFILES_MANIFEST_FILE": self._path, 81 "RUNFILES_DIR": directory, 82 # TODO(laszlocsomor): remove JAVA_RUNFILES once the Java launcher can 83 # pick up RUNFILES_DIR. 84 "JAVA_RUNFILES": directory, 85 } 86 87 88class _DirectoryBased: 89 """`Runfiles` strategy that appends runfiles paths to the runfiles root.""" 90 91 def __init__(self, path: str) -> None: 92 if not path: 93 raise ValueError() 94 if not isinstance(path, str): 95 raise TypeError() 96 self._runfiles_root = path 97 98 def RlocationChecked(self, path: str) -> str: 99 # Use posixpath instead of os.path, because Bazel only creates a runfiles 100 # tree on Unix platforms, so `Create()` will only create a directory-based 101 # runfiles strategy on those platforms. 102 return posixpath.join(self._runfiles_root, path) 103 104 def EnvVars(self) -> Dict[str, str]: 105 return { 106 "RUNFILES_DIR": self._runfiles_root, 107 # TODO(laszlocsomor): remove JAVA_RUNFILES once the Java launcher can 108 # pick up RUNFILES_DIR. 109 "JAVA_RUNFILES": self._runfiles_root, 110 } 111 112 113class Runfiles: 114 """Returns the runtime location of runfiles. 115 116 Runfiles are data-dependencies of Bazel-built binaries and tests. 117 """ 118 119 def __init__(self, strategy: Union[_ManifestBased, _DirectoryBased]) -> None: 120 self._strategy = strategy 121 self._python_runfiles_root = _FindPythonRunfilesRoot() 122 self._repo_mapping = _ParseRepoMapping( 123 strategy.RlocationChecked("_repo_mapping") 124 ) 125 126 def Rlocation(self, path: str, source_repo: Optional[str] = None) -> Optional[str]: 127 """Returns the runtime path of a runfile. 128 129 Runfiles are data-dependencies of Bazel-built binaries and tests. 130 131 The returned path may not be valid. The caller should check the path's 132 validity and that the path exists. 133 134 The function may return None. In that case the caller can be sure that the 135 rule does not know about this data-dependency. 136 137 Args: 138 path: string; runfiles-root-relative path of the runfile 139 source_repo: string; optional; the canonical name of the repository 140 whose repository mapping should be used to resolve apparent to 141 canonical repository names in `path`. If `None` (default), the 142 repository mapping of the repository containing the caller of this 143 method is used. Explicitly setting this parameter should only be 144 necessary for libraries that want to wrap the runfiles library. Use 145 `CurrentRepository` to obtain canonical repository names. 146 Returns: 147 the path to the runfile, which the caller should check for existence, or 148 None if the method doesn't know about this runfile 149 Raises: 150 TypeError: if `path` is not a string 151 ValueError: if `path` is None or empty, or it's absolute or not normalized 152 """ 153 if not path: 154 raise ValueError() 155 if not isinstance(path, str): 156 raise TypeError() 157 if ( 158 path.startswith("../") 159 or "/.." in path 160 or path.startswith("./") 161 or "/./" in path 162 or path.endswith("/.") 163 or "//" in path 164 ): 165 raise ValueError('path is not normalized: "%s"' % path) 166 if path[0] == "\\": 167 raise ValueError('path is absolute without a drive letter: "%s"' % path) 168 if os.path.isabs(path): 169 return path 170 171 if source_repo is None and self._repo_mapping: 172 # Look up runfiles using the repository mapping of the caller of the 173 # current method. If the repo mapping is empty, determining this 174 # name is not necessary. 175 source_repo = self.CurrentRepository(frame=2) 176 177 # Split off the first path component, which contains the repository 178 # name (apparent or canonical). 179 target_repo, _, remainder = path.partition("/") 180 if not remainder or (source_repo, target_repo) not in self._repo_mapping: 181 # One of the following is the case: 182 # - not using Bzlmod, so the repository mapping is empty and 183 # apparent and canonical repository names are the same 184 # - target_repo is already a canonical repository name and does not 185 # have to be mapped. 186 # - path did not contain a slash and referred to a root symlink, 187 # which also should not be mapped. 188 return self._strategy.RlocationChecked(path) 189 190 assert ( 191 source_repo is not None 192 ), "BUG: if the `source_repo` is None, we should never go past the `if` statement above" 193 194 # target_repo is an apparent repository name. Look up the corresponding 195 # canonical repository name with respect to the current repository, 196 # identified by its canonical name. 197 target_canonical = self._repo_mapping[(source_repo, target_repo)] 198 return self._strategy.RlocationChecked(target_canonical + "/" + remainder) 199 200 def EnvVars(self) -> Dict[str, str]: 201 """Returns environment variables for subprocesses. 202 203 The caller should set the returned key-value pairs in the environment of 204 subprocesses in case those subprocesses are also Bazel-built binaries that 205 need to use runfiles. 206 207 Returns: 208 {string: string}; a dict; keys are environment variable names, values are 209 the values for these environment variables 210 """ 211 return self._strategy.EnvVars() 212 213 def CurrentRepository(self, frame: int = 1) -> str: 214 """Returns the canonical name of the caller's Bazel repository. 215 216 For example, this function returns '' (the empty string) when called 217 from the main repository and a string of the form 218 'rules_python~0.13.0` when called from code in the repository 219 corresponding to the rules_python Bazel module. 220 221 More information about the difference between canonical repository 222 names and the `@repo` part of labels is available at: 223 https://bazel.build/build/bzlmod#repository-names 224 225 NOTE: This function inspects the callstack to determine where in the 226 runfiles the caller is located to determine which repository it came 227 from. This may fail or produce incorrect results depending on who the 228 caller is, for example if it is not represented by a Python source 229 file. Use the `frame` argument to control the stack lookup. 230 231 Args: 232 frame: int; the stack frame to return the repository name for. 233 Defaults to 1, the caller of the CurrentRepository function. 234 235 Returns: 236 The canonical name of the Bazel repository containing the file 237 containing the frame-th caller of this function 238 239 Raises: 240 ValueError: if the caller cannot be determined or the caller's file 241 path is not contained in the Python runfiles tree 242 """ 243 try: 244 # pylint: disable-next=protected-access 245 caller_path = inspect.getfile(sys._getframe(frame)) 246 except (TypeError, ValueError) as exc: 247 raise ValueError("failed to determine caller's file path") from exc 248 caller_runfiles_path = os.path.relpath(caller_path, self._python_runfiles_root) 249 if caller_runfiles_path.startswith(".." + os.path.sep): 250 # With Python 3.10 and earlier, sys.path contains the directory 251 # of the script, which can result in a module being loaded from 252 # outside the runfiles tree. In this case, assume that the module is 253 # located in the main repository. 254 # With Python 3.11 and higher, the Python launcher sets 255 # PYTHONSAFEPATH, which prevents this behavior. 256 # TODO: This doesn't cover the case of a script being run from an 257 # external repository, which could be heuristically detected 258 # by parsing the script's path. 259 if ( 260 sys.version_info.minor <= 10 261 and sys.path[0] != self._python_runfiles_root 262 ): 263 return "" 264 raise ValueError( 265 "{} does not lie under the runfiles root {}".format( 266 caller_path, self._python_runfiles_root 267 ) 268 ) 269 270 caller_runfiles_directory = caller_runfiles_path[ 271 : caller_runfiles_path.find(os.path.sep) 272 ] 273 # With Bzlmod, the runfiles directory of the main repository is always 274 # named "_main". Without Bzlmod, the value returned by this function is 275 # never used, so we just assume Bzlmod is enabled. 276 if caller_runfiles_directory == "_main": 277 # The canonical name of the main repository (also known as the 278 # workspace) is the empty string. 279 return "" 280 # For all other repositories, the name of the runfiles directory is the 281 # canonical name. 282 return caller_runfiles_directory 283 284 # TODO: Update return type to Self when 3.11 is the min version 285 # https://peps.python.org/pep-0673/ 286 @staticmethod 287 def CreateManifestBased(manifest_path: str) -> "Runfiles": 288 return Runfiles(_ManifestBased(manifest_path)) 289 290 # TODO: Update return type to Self when 3.11 is the min version 291 # https://peps.python.org/pep-0673/ 292 @staticmethod 293 def CreateDirectoryBased(runfiles_dir_path: str) -> "Runfiles": 294 return Runfiles(_DirectoryBased(runfiles_dir_path)) 295 296 # TODO: Update return type to Self when 3.11 is the min version 297 # https://peps.python.org/pep-0673/ 298 @staticmethod 299 def Create(env: Optional[Dict[str, str]] = None) -> Optional["Runfiles"]: 300 """Returns a new `Runfiles` instance. 301 302 The returned object is either: 303 - manifest-based, meaning it looks up runfile paths from a manifest file, or 304 - directory-based, meaning it looks up runfile paths under a given directory 305 path 306 307 If `env` contains "RUNFILES_MANIFEST_FILE" with non-empty value, this method 308 returns a manifest-based implementation. The object eagerly reads and caches 309 the whole manifest file upon instantiation; this may be relevant for 310 performance consideration. 311 312 Otherwise, if `env` contains "RUNFILES_DIR" with non-empty value (checked in 313 this priority order), this method returns a directory-based implementation. 314 315 If neither cases apply, this method returns null. 316 317 Args: 318 env: {string: string}; optional; the map of environment variables. If None, 319 this function uses the environment variable map of this process. 320 Raises: 321 IOError: if some IO error occurs. 322 """ 323 env_map = os.environ if env is None else env 324 manifest = env_map.get("RUNFILES_MANIFEST_FILE") 325 if manifest: 326 return CreateManifestBased(manifest) 327 328 directory = env_map.get("RUNFILES_DIR") 329 if directory: 330 return CreateDirectoryBased(directory) 331 332 return None 333 334 335# Support legacy imports by defining a private symbol. 336_Runfiles = Runfiles 337 338 339def _FindPythonRunfilesRoot() -> str: 340 """Finds the root of the Python runfiles tree.""" 341 root = __file__ 342 # Walk up our own runfiles path to the root of the runfiles tree from which 343 # the current file is being run. This path coincides with what the Bazel 344 # Python stub sets up as sys.path[0]. Since that entry can be changed at 345 # runtime, we rederive it here. 346 for _ in range("rules_python/python/runfiles/runfiles.py".count("/") + 1): 347 root = os.path.dirname(root) 348 return root 349 350 351def _ParseRepoMapping(repo_mapping_path: Optional[str]) -> Dict[Tuple[str, str], str]: 352 """Parses the repository mapping manifest.""" 353 # If the repository mapping file can't be found, that is not an error: We 354 # might be running without Bzlmod enabled or there may not be any runfiles. 355 # In this case, just apply an empty repo mapping. 356 if not repo_mapping_path: 357 return {} 358 try: 359 with open(repo_mapping_path, "r") as f: 360 content = f.read() 361 except FileNotFoundError: 362 return {} 363 364 repo_mapping = {} 365 for line in content.split("\n"): 366 if not line: 367 # Empty line following the last line break 368 break 369 current_canonical, target_local, target_canonical = line.split(",") 370 repo_mapping[(current_canonical, target_local)] = target_canonical 371 372 return repo_mapping 373 374 375def CreateManifestBased(manifest_path: str) -> Runfiles: 376 return Runfiles.CreateManifestBased(manifest_path) 377 378 379def CreateDirectoryBased(runfiles_dir_path: str) -> Runfiles: 380 return Runfiles.CreateDirectoryBased(runfiles_dir_path) 381 382 383def Create(env: Optional[Dict[str, str]] = None) -> Optional[Runfiles]: 384 return Runfiles.Create(env) 385