xref: /aosp_15_r20/external/bazelbuild-rules_python/python/private/pypi/parse_requirements.bzl (revision 60517a1edbc8ecf509223e9af94a7adec7d736b8)
1# Copyright 2024 The Bazel Authors. All rights reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Requirements parsing for whl_library creation.
16
17Use cases that the code needs to cover:
18* A single requirements_lock file that is used for the host platform.
19* Per-OS requirements_lock files that are used for the host platform.
20* A target platform specific requirements_lock that is used with extra
21  pip arguments with --platform, etc and download_only = True.
22
23In the last case only a single `requirements_lock` file is allowed, in all
24other cases we assume that there may be a desire to resolve the requirements
25file for the host platform to be backwards compatible with the legacy
26behavior.
27"""
28
29load("//python/private:normalize_name.bzl", "normalize_name")
30load("//python/private:repo_utils.bzl", "repo_utils")
31load(":index_sources.bzl", "index_sources")
32load(":parse_requirements_txt.bzl", "parse_requirements_txt")
33load(":whl_target_platforms.bzl", "select_whls")
34
35def parse_requirements(
36        ctx,
37        *,
38        requirements_by_platform = {},
39        extra_pip_args = [],
40        get_index_urls = None,
41        evaluate_markers = lambda *_: {},
42        logger = None):
43    """Get the requirements with platforms that the requirements apply to.
44
45    Args:
46        ctx: A context that has .read function that would read contents from a label.
47        requirements_by_platform (label_keyed_string_dict): a way to have
48            different package versions (or different packages) for different
49            os, arch combinations.
50        extra_pip_args (string list): Extra pip arguments to perform extra validations and to
51            be joined with args fined in files.
52        get_index_urls: Callable[[ctx, list[str]], dict], a callable to get all
53            of the distribution URLs from a PyPI index. Accepts ctx and
54            distribution names to query.
55        evaluate_markers: A function to use to evaluate the requirements.
56            Accepts the ctx and a dict where keys are requirement lines to
57            evaluate against the platforms stored as values in the input dict.
58            Returns the same dict, but with values being platforms that are
59            compatible with the requirements line.
60        logger: repo_utils.logger or None, a simple struct to log diagnostic messages.
61
62    Returns:
63        A tuple where the first element a dict of dicts where the first key is
64        the normalized distribution name (with underscores) and the second key
65        is the requirement_line, then value and the keys are structs with the
66        following attributes:
67         * distribution: The non-normalized distribution name.
68         * srcs: The Simple API downloadable source list.
69         * requirement_line: The original requirement line.
70         * target_platforms: The list of target platforms that this package is for.
71         * is_exposed: A boolean if the package should be exposed via the hub
72           repository.
73
74        The second element is extra_pip_args should be passed to `whl_library`.
75    """
76    options = {}
77    requirements = {}
78    for file, plats in requirements_by_platform.items():
79        if logger:
80            logger.debug(lambda: "Using {} for {}".format(file, plats))
81        contents = ctx.read(file)
82
83        # Parse the requirements file directly in starlark to get the information
84        # needed for the whl_library declarations later.
85        parse_result = parse_requirements_txt(contents)
86
87        # Replicate a surprising behavior that WORKSPACE builds allowed:
88        # Defining a repo with the same name multiple times, but only the last
89        # definition is respected.
90        # The requirement lines might have duplicate names because lines for extras
91        # are returned as just the base package name. e.g., `foo[bar]` results
92        # in an entry like `("foo", "foo[bar] == 1.0 ...")`.
93        requirements_dict = {
94            normalize_name(entry[0]): entry
95            for entry in sorted(
96                parse_result.requirements,
97                # Get the longest match and fallback to original WORKSPACE sorting,
98                # which should get us the entry with most extras.
99                #
100                # FIXME @aignas 2024-05-13: The correct behaviour might be to get an
101                # entry with all aggregated extras, but it is unclear if we
102                # should do this now.
103                key = lambda x: (len(x[1].partition("==")[0]), x),
104            )
105        }.values()
106
107        tokenized_options = []
108        for opt in parse_result.options:
109            for p in opt.split(" "):
110                tokenized_options.append(p)
111
112        pip_args = tokenized_options + extra_pip_args
113        for plat in plats:
114            requirements[plat] = requirements_dict
115            options[plat] = pip_args
116
117    requirements_by_platform = {}
118    reqs_with_env_markers = {}
119    for target_platform, reqs_ in requirements.items():
120        extra_pip_args = options[target_platform]
121
122        for distribution, requirement_line in reqs_:
123            for_whl = requirements_by_platform.setdefault(
124                normalize_name(distribution),
125                {},
126            )
127
128            if ";" in requirement_line:
129                reqs_with_env_markers.setdefault(requirement_line, []).append(target_platform)
130
131            for_req = for_whl.setdefault(
132                (requirement_line, ",".join(extra_pip_args)),
133                struct(
134                    distribution = distribution,
135                    srcs = index_sources(requirement_line),
136                    requirement_line = requirement_line,
137                    target_platforms = [],
138                    extra_pip_args = extra_pip_args,
139                ),
140            )
141            for_req.target_platforms.append(target_platform)
142
143    # This may call to Python, so execute it early (before calling to the
144    # internet below) and ensure that we call it only once.
145    #
146    # NOTE @aignas 2024-07-13: in the future, if this is something that we want
147    # to do, we could use Python to parse the requirement lines and infer the
148    # URL of the files to download things from. This should be important for
149    # VCS package references.
150    env_marker_target_platforms = evaluate_markers(ctx, reqs_with_env_markers)
151    if logger:
152        logger.debug(lambda: "Evaluated env markers from:\n{}\n\nTo:\n{}".format(
153            reqs_with_env_markers,
154            env_marker_target_platforms,
155        ))
156
157    index_urls = {}
158    if get_index_urls:
159        index_urls = get_index_urls(
160            ctx,
161            # Use list({}) as a way to have a set
162            list({
163                req.distribution: None
164                for reqs in requirements_by_platform.values()
165                for req in reqs.values()
166            }),
167        )
168
169    ret = {}
170    for whl_name, reqs in requirements_by_platform.items():
171        requirement_target_platforms = {}
172        for r in reqs.values():
173            target_platforms = env_marker_target_platforms.get(r.requirement_line, r.target_platforms)
174            for p in target_platforms:
175                requirement_target_platforms[p] = None
176
177        is_exposed = len(requirement_target_platforms) == len(requirements)
178        if not is_exposed and logger:
179            logger.debug(lambda: "Package '{}' will not be exposed because it is only present on a subset of platforms: {} out of {}".format(
180                whl_name,
181                sorted(requirement_target_platforms),
182                sorted(requirements),
183            ))
184
185        for r in sorted(reqs.values(), key = lambda r: r.requirement_line):
186            whls, sdist = _add_dists(
187                requirement = r,
188                index_urls = index_urls.get(whl_name),
189                logger = logger,
190            )
191
192            target_platforms = env_marker_target_platforms.get(r.requirement_line, r.target_platforms)
193            ret.setdefault(whl_name, []).append(
194                struct(
195                    distribution = r.distribution,
196                    srcs = r.srcs,
197                    requirement_line = r.requirement_line,
198                    target_platforms = sorted(target_platforms),
199                    extra_pip_args = r.extra_pip_args,
200                    whls = whls,
201                    sdist = sdist,
202                    is_exposed = is_exposed,
203                ),
204            )
205
206    if logger:
207        logger.debug(lambda: "Will configure whl repos: {}".format(ret.keys()))
208
209    return ret
210
211def select_requirement(requirements, *, platform):
212    """A simple function to get a requirement for a particular platform.
213
214    Args:
215        requirements (list[struct]): The list of requirements as returned by
216            the `parse_requirements` function above.
217        platform (str or None): The host platform. Usually an output of the
218            `host_platform` function. If None, then this function will return
219            the first requirement it finds.
220
221    Returns:
222        None if not found or a struct returned as one of the values in the
223        parse_requirements function. The requirement that should be downloaded
224        by the host platform will be returned.
225    """
226    maybe_requirement = [
227        req
228        for req in requirements
229        if not platform or [p for p in req.target_platforms if p.endswith(platform)]
230    ]
231    if not maybe_requirement:
232        # Sometimes the package is not present for host platform if there
233        # are whls specified only in particular requirements files, in that
234        # case just continue, however, if the download_only flag is set up,
235        # then the user can also specify the target platform of the wheel
236        # packages they want to download, in that case there will be always
237        # a requirement here, so we will not be in this code branch.
238        return None
239
240    return maybe_requirement[0]
241
242def host_platform(ctx):
243    """Return a string representation of the repository OS.
244
245    Args:
246        ctx (struct): The `module_ctx` or `repository_ctx` attribute.
247
248    Returns:
249        The string representation of the platform that we can later used in the `pip`
250        machinery.
251    """
252    return "{}_{}".format(
253        repo_utils.get_platforms_os_name(ctx),
254        repo_utils.get_platforms_cpu_name(ctx),
255    )
256
257def _add_dists(*, requirement, index_urls, logger = None):
258    """Populate dists based on the information from the PyPI index.
259
260    This function will modify the given requirements_by_platform data structure.
261
262    Args:
263        requirement: The result of parse_requirements function.
264        index_urls: The result of simpleapi_download.
265        logger: A logger for printing diagnostic info.
266    """
267    if not index_urls:
268        return [], None
269
270    whls = []
271    sdist = None
272
273    # TODO @aignas 2024-05-22: it is in theory possible to add all
274    # requirements by version instead of by sha256. This may be useful
275    # for some projects.
276    for sha256 in requirement.srcs.shas:
277        # For now if the artifact is marked as yanked we just ignore it.
278        #
279        # See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api
280
281        maybe_whl = index_urls.whls.get(sha256)
282        if maybe_whl and not maybe_whl.yanked:
283            whls.append(maybe_whl)
284            continue
285
286        maybe_sdist = index_urls.sdists.get(sha256)
287        if maybe_sdist and not maybe_sdist.yanked:
288            sdist = maybe_sdist
289            continue
290
291        if logger:
292            logger.warn(lambda: "Could not find a whl or an sdist with sha256={}".format(sha256))
293
294    yanked = {}
295    for dist in whls + [sdist]:
296        if dist and dist.yanked:
297            yanked.setdefault(dist.yanked, []).append(dist.filename)
298    if yanked:
299        logger.warn(lambda: "\n".join([
300            "the following distributions got yanked:",
301        ] + [
302            "reason: {}\n  {}".format(reason, "\n".join(sorted(dists)))
303            for reason, dists in yanked.items()
304        ]))
305
306    # Filter out the wheels that are incompatible with the target_platforms.
307    whls = select_whls(whls = whls, want_platforms = requirement.target_platforms, logger = logger)
308
309    return whls, sdist
310