1# Copyright 2024 The Bazel Authors. All rights reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15"""Requirements parsing for whl_library creation. 16 17Use cases that the code needs to cover: 18* A single requirements_lock file that is used for the host platform. 19* Per-OS requirements_lock files that are used for the host platform. 20* A target platform specific requirements_lock that is used with extra 21 pip arguments with --platform, etc and download_only = True. 22 23In the last case only a single `requirements_lock` file is allowed, in all 24other cases we assume that there may be a desire to resolve the requirements 25file for the host platform to be backwards compatible with the legacy 26behavior. 27""" 28 29load("//python/private:normalize_name.bzl", "normalize_name") 30load("//python/private:repo_utils.bzl", "repo_utils") 31load(":index_sources.bzl", "index_sources") 32load(":parse_requirements_txt.bzl", "parse_requirements_txt") 33load(":whl_target_platforms.bzl", "select_whls") 34 35def parse_requirements( 36 ctx, 37 *, 38 requirements_by_platform = {}, 39 extra_pip_args = [], 40 get_index_urls = None, 41 evaluate_markers = lambda *_: {}, 42 logger = None): 43 """Get the requirements with platforms that the requirements apply to. 44 45 Args: 46 ctx: A context that has .read function that would read contents from a label. 47 requirements_by_platform (label_keyed_string_dict): a way to have 48 different package versions (or different packages) for different 49 os, arch combinations. 50 extra_pip_args (string list): Extra pip arguments to perform extra validations and to 51 be joined with args fined in files. 52 get_index_urls: Callable[[ctx, list[str]], dict], a callable to get all 53 of the distribution URLs from a PyPI index. Accepts ctx and 54 distribution names to query. 55 evaluate_markers: A function to use to evaluate the requirements. 56 Accepts the ctx and a dict where keys are requirement lines to 57 evaluate against the platforms stored as values in the input dict. 58 Returns the same dict, but with values being platforms that are 59 compatible with the requirements line. 60 logger: repo_utils.logger or None, a simple struct to log diagnostic messages. 61 62 Returns: 63 A tuple where the first element a dict of dicts where the first key is 64 the normalized distribution name (with underscores) and the second key 65 is the requirement_line, then value and the keys are structs with the 66 following attributes: 67 * distribution: The non-normalized distribution name. 68 * srcs: The Simple API downloadable source list. 69 * requirement_line: The original requirement line. 70 * target_platforms: The list of target platforms that this package is for. 71 * is_exposed: A boolean if the package should be exposed via the hub 72 repository. 73 74 The second element is extra_pip_args should be passed to `whl_library`. 75 """ 76 options = {} 77 requirements = {} 78 for file, plats in requirements_by_platform.items(): 79 if logger: 80 logger.debug(lambda: "Using {} for {}".format(file, plats)) 81 contents = ctx.read(file) 82 83 # Parse the requirements file directly in starlark to get the information 84 # needed for the whl_library declarations later. 85 parse_result = parse_requirements_txt(contents) 86 87 # Replicate a surprising behavior that WORKSPACE builds allowed: 88 # Defining a repo with the same name multiple times, but only the last 89 # definition is respected. 90 # The requirement lines might have duplicate names because lines for extras 91 # are returned as just the base package name. e.g., `foo[bar]` results 92 # in an entry like `("foo", "foo[bar] == 1.0 ...")`. 93 requirements_dict = { 94 normalize_name(entry[0]): entry 95 for entry in sorted( 96 parse_result.requirements, 97 # Get the longest match and fallback to original WORKSPACE sorting, 98 # which should get us the entry with most extras. 99 # 100 # FIXME @aignas 2024-05-13: The correct behaviour might be to get an 101 # entry with all aggregated extras, but it is unclear if we 102 # should do this now. 103 key = lambda x: (len(x[1].partition("==")[0]), x), 104 ) 105 }.values() 106 107 tokenized_options = [] 108 for opt in parse_result.options: 109 for p in opt.split(" "): 110 tokenized_options.append(p) 111 112 pip_args = tokenized_options + extra_pip_args 113 for plat in plats: 114 requirements[plat] = requirements_dict 115 options[plat] = pip_args 116 117 requirements_by_platform = {} 118 reqs_with_env_markers = {} 119 for target_platform, reqs_ in requirements.items(): 120 extra_pip_args = options[target_platform] 121 122 for distribution, requirement_line in reqs_: 123 for_whl = requirements_by_platform.setdefault( 124 normalize_name(distribution), 125 {}, 126 ) 127 128 if ";" in requirement_line: 129 reqs_with_env_markers.setdefault(requirement_line, []).append(target_platform) 130 131 for_req = for_whl.setdefault( 132 (requirement_line, ",".join(extra_pip_args)), 133 struct( 134 distribution = distribution, 135 srcs = index_sources(requirement_line), 136 requirement_line = requirement_line, 137 target_platforms = [], 138 extra_pip_args = extra_pip_args, 139 ), 140 ) 141 for_req.target_platforms.append(target_platform) 142 143 # This may call to Python, so execute it early (before calling to the 144 # internet below) and ensure that we call it only once. 145 # 146 # NOTE @aignas 2024-07-13: in the future, if this is something that we want 147 # to do, we could use Python to parse the requirement lines and infer the 148 # URL of the files to download things from. This should be important for 149 # VCS package references. 150 env_marker_target_platforms = evaluate_markers(ctx, reqs_with_env_markers) 151 if logger: 152 logger.debug(lambda: "Evaluated env markers from:\n{}\n\nTo:\n{}".format( 153 reqs_with_env_markers, 154 env_marker_target_platforms, 155 )) 156 157 index_urls = {} 158 if get_index_urls: 159 index_urls = get_index_urls( 160 ctx, 161 # Use list({}) as a way to have a set 162 list({ 163 req.distribution: None 164 for reqs in requirements_by_platform.values() 165 for req in reqs.values() 166 }), 167 ) 168 169 ret = {} 170 for whl_name, reqs in requirements_by_platform.items(): 171 requirement_target_platforms = {} 172 for r in reqs.values(): 173 target_platforms = env_marker_target_platforms.get(r.requirement_line, r.target_platforms) 174 for p in target_platforms: 175 requirement_target_platforms[p] = None 176 177 is_exposed = len(requirement_target_platforms) == len(requirements) 178 if not is_exposed and logger: 179 logger.debug(lambda: "Package '{}' will not be exposed because it is only present on a subset of platforms: {} out of {}".format( 180 whl_name, 181 sorted(requirement_target_platforms), 182 sorted(requirements), 183 )) 184 185 for r in sorted(reqs.values(), key = lambda r: r.requirement_line): 186 whls, sdist = _add_dists( 187 requirement = r, 188 index_urls = index_urls.get(whl_name), 189 logger = logger, 190 ) 191 192 target_platforms = env_marker_target_platforms.get(r.requirement_line, r.target_platforms) 193 ret.setdefault(whl_name, []).append( 194 struct( 195 distribution = r.distribution, 196 srcs = r.srcs, 197 requirement_line = r.requirement_line, 198 target_platforms = sorted(target_platforms), 199 extra_pip_args = r.extra_pip_args, 200 whls = whls, 201 sdist = sdist, 202 is_exposed = is_exposed, 203 ), 204 ) 205 206 if logger: 207 logger.debug(lambda: "Will configure whl repos: {}".format(ret.keys())) 208 209 return ret 210 211def select_requirement(requirements, *, platform): 212 """A simple function to get a requirement for a particular platform. 213 214 Args: 215 requirements (list[struct]): The list of requirements as returned by 216 the `parse_requirements` function above. 217 platform (str or None): The host platform. Usually an output of the 218 `host_platform` function. If None, then this function will return 219 the first requirement it finds. 220 221 Returns: 222 None if not found or a struct returned as one of the values in the 223 parse_requirements function. The requirement that should be downloaded 224 by the host platform will be returned. 225 """ 226 maybe_requirement = [ 227 req 228 for req in requirements 229 if not platform or [p for p in req.target_platforms if p.endswith(platform)] 230 ] 231 if not maybe_requirement: 232 # Sometimes the package is not present for host platform if there 233 # are whls specified only in particular requirements files, in that 234 # case just continue, however, if the download_only flag is set up, 235 # then the user can also specify the target platform of the wheel 236 # packages they want to download, in that case there will be always 237 # a requirement here, so we will not be in this code branch. 238 return None 239 240 return maybe_requirement[0] 241 242def host_platform(ctx): 243 """Return a string representation of the repository OS. 244 245 Args: 246 ctx (struct): The `module_ctx` or `repository_ctx` attribute. 247 248 Returns: 249 The string representation of the platform that we can later used in the `pip` 250 machinery. 251 """ 252 return "{}_{}".format( 253 repo_utils.get_platforms_os_name(ctx), 254 repo_utils.get_platforms_cpu_name(ctx), 255 ) 256 257def _add_dists(*, requirement, index_urls, logger = None): 258 """Populate dists based on the information from the PyPI index. 259 260 This function will modify the given requirements_by_platform data structure. 261 262 Args: 263 requirement: The result of parse_requirements function. 264 index_urls: The result of simpleapi_download. 265 logger: A logger for printing diagnostic info. 266 """ 267 if not index_urls: 268 return [], None 269 270 whls = [] 271 sdist = None 272 273 # TODO @aignas 2024-05-22: it is in theory possible to add all 274 # requirements by version instead of by sha256. This may be useful 275 # for some projects. 276 for sha256 in requirement.srcs.shas: 277 # For now if the artifact is marked as yanked we just ignore it. 278 # 279 # See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api 280 281 maybe_whl = index_urls.whls.get(sha256) 282 if maybe_whl and not maybe_whl.yanked: 283 whls.append(maybe_whl) 284 continue 285 286 maybe_sdist = index_urls.sdists.get(sha256) 287 if maybe_sdist and not maybe_sdist.yanked: 288 sdist = maybe_sdist 289 continue 290 291 if logger: 292 logger.warn(lambda: "Could not find a whl or an sdist with sha256={}".format(sha256)) 293 294 yanked = {} 295 for dist in whls + [sdist]: 296 if dist and dist.yanked: 297 yanked.setdefault(dist.yanked, []).append(dist.filename) 298 if yanked: 299 logger.warn(lambda: "\n".join([ 300 "the following distributions got yanked:", 301 ] + [ 302 "reason: {}\n {}".format(reason, "\n".join(sorted(dists))) 303 for reason, dists in yanked.items() 304 ])) 305 306 # Filter out the wheels that are incompatible with the target_platforms. 307 whls = select_whls(whls = whls, want_platforms = requirement.target_platforms, logger = logger) 308 309 return whls, sdist 310