xref: /aosp_15_r20/external/bazelbuild-rules_python/sphinxdocs/private/proto_to_markdown.py (revision 60517a1edbc8ecf509223e9af94a7adec7d736b8)
1# Copyright 2023 The Bazel Authors. All rights reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import argparse
16import io
17import itertools
18import pathlib
19import sys
20import textwrap
21from typing import Callable, TextIO, TypeVar
22
23from stardoc.proto import stardoc_output_pb2
24
25_AttributeType = stardoc_output_pb2.AttributeType
26
27_T = TypeVar("_T")
28
29
30def _anchor_id(text: str) -> str:
31    # MyST/Sphinx's markdown processing doesn't like dots in anchor ids.
32    return "#" + text.replace(".", "_").lower()
33
34
35# Create block attribute line.
36# See https://myst-parser.readthedocs.io/en/latest/syntax/optional.html#block-attributes
37def _block_attrs(*attrs: str) -> str:
38    return "{" + " ".join(attrs) + "}\n"
39
40
41def _link(display: str, link: str = "", *, ref: str = "", classes: str = "") -> str:
42    if ref:
43        ref = f"[{ref}]"
44    if link:
45        link = f"({link})"
46    if classes:
47        classes = "{" + classes + "}"
48    return f"[{display}]{ref}{link}{classes}"
49
50
51def _span(display: str, classes: str = ".span") -> str:
52    return f"[{display}]{{" + classes + "}"
53
54
55def _link_here_icon(anchor: str) -> str:
56    # The headerlink class activates some special logic to show/hide
57    # text upon mouse-over; it's how headings show a clickable link.
58    return _link("¶", anchor, classes=".headerlink")
59
60
61def _inline_anchor(anchor: str) -> str:
62    return _span("", anchor)
63
64
65def _indent_block_text(text: str) -> str:
66    return text.strip().replace("\n", "\n  ")
67
68
69def _join_csv_and(values: list[str]) -> str:
70    if len(values) == 1:
71        return values[0]
72
73    values = list(values)
74    values[-1] = "and " + values[-1]
75    return ", ".join(values)
76
77
78def _position_iter(values: list[_T]) -> tuple[bool, bool, _T]:
79    for i, value in enumerate(values):
80        yield i == 0, i == len(values) - 1, value
81
82
83def _sort_attributes_inplace(attributes):
84    # Sort attributes so the iteration order results in a Python-syntax
85    # valid signature. Keep name first because that's convention.
86    attributes.sort(key=lambda a: (a.name != "name", bool(a.default_value), a.name))
87
88
89class _MySTRenderer:
90    def __init__(
91        self,
92        module: stardoc_output_pb2.ModuleInfo,
93        out_stream: TextIO,
94        public_load_path: str,
95    ):
96        self._module = module
97        self._out_stream = out_stream
98        self._public_load_path = public_load_path
99
100    def render(self):
101        self._render_module(self._module)
102
103    def _render_module(self, module: stardoc_output_pb2.ModuleInfo):
104        if self._public_load_path:
105            bzl_path = self._public_load_path
106        else:
107            bzl_path = "//" + self._module.file.split("//")[1]
108
109        self._write(":::{default-domain} bzl\n:::\n")
110        self._write(":::{bzl:currentfile} ", bzl_path, "\n:::\n\n")
111        self._write(
112            f"# {bzl_path}\n",
113            "\n",
114            module.module_docstring.strip(),
115            "\n\n",
116        )
117
118        # Sort the objects by name
119        objects = itertools.chain(
120            ((r.rule_name, r, self._render_rule) for r in module.rule_info),
121            ((p.provider_name, p, self._render_provider) for p in module.provider_info),
122            ((f.function_name, f, self._render_func) for f in module.func_info),
123            ((a.aspect_name, a, self._render_aspect) for a in module.aspect_info),
124            (
125                (m.extension_name, m, self._render_module_extension)
126                for m in module.module_extension_info
127            ),
128            (
129                (r.rule_name, r, self._render_repository_rule)
130                for r in module.repository_rule_info
131            ),
132        )
133
134        objects = sorted(objects, key=lambda v: v[0].lower())
135
136        for _, obj, func in objects:
137            func(obj)
138            self._write("\n")
139
140    def _render_aspect(self, aspect: stardoc_output_pb2.AspectInfo):
141        _sort_attributes_inplace(aspect.attribute)
142        self._write("::::::{bzl:aspect} ", aspect.aspect_name, "\n\n")
143        edges = ", ".join(sorted(f"`{attr}`" for attr in aspect.aspect_attribute))
144        self._write(":aspect-attributes: ", edges, "\n\n")
145        self._write(aspect.doc_string.strip(), "\n\n")
146
147        if aspect.attribute:
148            self._render_attributes(aspect.attribute)
149            self._write("\n")
150        self._write("::::::\n")
151
152    def _render_module_extension(self, mod_ext: stardoc_output_pb2.ModuleExtensionInfo):
153        self._write("::::::{bzl:module-extension} ", mod_ext.extension_name, "\n\n")
154        self._write(mod_ext.doc_string.strip(), "\n\n")
155
156        for tag in mod_ext.tag_class:
157            tag_name = f"{mod_ext.extension_name}.{tag.tag_name}"
158            tag_name = f"{tag.tag_name}"
159            self._write(":::::{bzl:tag-class} ", tag_name, "\n\n")
160
161            _sort_attributes_inplace(tag.attribute)
162            self._render_signature(
163                tag_name,
164                tag.attribute,
165                get_name=lambda a: a.name,
166                get_default=lambda a: a.default_value,
167            )
168
169            self._write(tag.doc_string.strip(), "\n\n")
170            self._render_attributes(tag.attribute)
171            self._write(":::::\n")
172        self._write("::::::\n")
173
174    def _render_repository_rule(self, repo_rule: stardoc_output_pb2.RepositoryRuleInfo):
175        self._write("::::::{bzl:repo-rule} ")
176        _sort_attributes_inplace(repo_rule.attribute)
177        self._render_signature(
178            repo_rule.rule_name,
179            repo_rule.attribute,
180            get_name=lambda a: a.name,
181            get_default=lambda a: a.default_value,
182        )
183        self._write(repo_rule.doc_string.strip(), "\n\n")
184        if repo_rule.attribute:
185            self._render_attributes(repo_rule.attribute)
186        if repo_rule.environ:
187            self._write(":envvars: ", ", ".join(sorted(repo_rule.environ)))
188        self._write("\n")
189
190    def _render_rule(self, rule: stardoc_output_pb2.RuleInfo):
191        rule_name = rule.rule_name
192        _sort_attributes_inplace(rule.attribute)
193        self._write("::::{bzl:rule} ")
194        self._render_signature(
195            rule_name,
196            rule.attribute,
197            get_name=lambda r: r.name,
198            get_default=lambda r: r.default_value,
199        )
200        self._write(rule.doc_string.strip(), "\n\n")
201
202        if rule.advertised_providers.provider_name:
203            self._write(":provides: ")
204            self._write(" | ".join(rule.advertised_providers.provider_name))
205            self._write("\n")
206        self._write("\n")
207
208        if rule.attribute:
209            self._render_attributes(rule.attribute)
210            self._write("\n")
211        self._write("::::\n")
212
213    def _rule_attr_type_string(self, attr: stardoc_output_pb2.AttributeInfo) -> str:
214        if attr.type == _AttributeType.NAME:
215            return "Name"
216        elif attr.type == _AttributeType.INT:
217            return "int"
218        elif attr.type == _AttributeType.LABEL:
219            return "label"
220        elif attr.type == _AttributeType.STRING:
221            return "str"
222        elif attr.type == _AttributeType.STRING_LIST:
223            return "list[str]"
224        elif attr.type == _AttributeType.INT_LIST:
225            return "list[int]"
226        elif attr.type == _AttributeType.LABEL_LIST:
227            return "list[label]"
228        elif attr.type == _AttributeType.BOOLEAN:
229            return "bool"
230        elif attr.type == _AttributeType.LABEL_STRING_DICT:
231            return "dict[label, str]"
232        elif attr.type == _AttributeType.STRING_DICT:
233            return "dict[str, str]"
234        elif attr.type == _AttributeType.STRING_LIST_DICT:
235            return "dict[str, list[str]]"
236        elif attr.type == _AttributeType.OUTPUT:
237            return "label"
238        elif attr.type == _AttributeType.OUTPUT_LIST:
239            return "list[label]"
240        else:
241            # If we get here, it means the value was unknown for some reason.
242            # Rather than error, give some somewhat understandable value.
243            return _AttributeType.Name(attr.type)
244
245    def _render_func(self, func: stardoc_output_pb2.StarlarkFunctionInfo):
246        self._write("::::::{bzl:function} ")
247
248        parameters = self._render_func_signature(func)
249
250        self._write(func.doc_string.strip(), "\n\n")
251
252        if parameters:
253            for param in parameters:
254                self._write(f":arg {param.name}:\n")
255                if param.default_value:
256                    default_value = self._format_default_value(param.default_value)
257                    self._write("  {default-value}`", default_value, "`\n")
258                if param.doc_string:
259                    self._write("  ", _indent_block_text(param.doc_string), "\n")
260                else:
261                    self._write("  _undocumented_\n")
262                self._write("\n")
263
264        if return_doc := getattr(func, "return").doc_string:
265            self._write(":returns:\n")
266            self._write("  ", _indent_block_text(return_doc), "\n")
267        if func.deprecated.doc_string:
268            self._write(":::::{deprecated}: unknown\n")
269            self._write("  ", _indent_block_text(func.deprecated.doc_string), "\n")
270            self._write(":::::\n")
271        self._write("::::::\n")
272
273    def _render_func_signature(self, func):
274        self._write(f"{func.function_name}(")
275        # TODO: Have an "is method" directive in the docstring to decide if
276        # the self parameter should be removed.
277        parameters = [param for param in func.parameter if param.name != "self"]
278
279        # Unfortunately, the stardoc info is incomplete and inaccurate:
280        # * The position of the `*args` param is wrong; it'll always
281        #   be last (or second to last, if kwargs is present).
282        # * Stardoc doesn't explicitly tell us if an arg is `*args` or
283        #   `**kwargs`. Hence f(*args) or f(**kwargs) is ambigiguous.
284        # See these issues:
285        # https://github.com/bazelbuild/stardoc/issues/226
286        # https://github.com/bazelbuild/stardoc/issues/225
287        #
288        # Below, we try to take what info we have and infer what the original
289        # signature was. In short:
290        # * A default=empty, mandatory=false arg is either *args or **kwargs
291        # * If two of those are seen, the first is *args and the second is
292        #   **kwargs. Recall, however, the position of *args is mis-represented.
293        # * If a single default=empty, mandatory=false arg is found, then
294        #   it's ambiguous as to whether its *args or **kwargs. To figure
295        #   that out, we:
296        #   * If it's not the last arg, then it must be *args. In practice,
297        #     this never occurs due to #226 above.
298        #   * If we saw a mandatory arg after an optional arg, then *args
299        #     was supposed to be between them (otherwise it wouldn't be
300        #     valid syntax).
301        #   * Otherwise, it's ambiguous. We just guess by looking at the
302        #     parameter name.
303        var_args = None
304        var_kwargs = None
305        saw_mandatory_after_optional = False
306        first_mandatory_after_optional_index = None
307        optionals_started = False
308        for i, p in enumerate(parameters):
309            optionals_started = optionals_started or not p.mandatory
310            if p.mandatory and optionals_started:
311                saw_mandatory_after_optional = True
312                if first_mandatory_after_optional_index is None:
313                    first_mandatory_after_optional_index = i
314
315            if not p.default_value and not p.mandatory:
316                if var_args is None:
317                    var_args = (i, p)
318                else:
319                    var_kwargs = p
320
321        if var_args and not var_kwargs:
322            if var_args[0] != len(parameters) - 1:
323                pass
324            elif saw_mandatory_after_optional:
325                var_kwargs = var_args[1]
326                var_args = None
327            elif var_args[1].name in ("kwargs", "attrs"):
328                var_kwargs = var_args[1]
329                var_args = None
330
331        # Partial workaround for
332        # https://github.com/bazelbuild/stardoc/issues/226: `*args` renders last
333        if var_args and var_kwargs and first_mandatory_after_optional_index is not None:
334            parameters.pop(var_args[0])
335            parameters.insert(first_mandatory_after_optional_index, var_args[1])
336
337        # The only way a mandatory-after-optional can occur is
338        # if there was `*args` before it. But if we didn't see it,
339        # it must have been the unbound `*` symbol, which stardoc doesn't
340        # tell us exists.
341        if saw_mandatory_after_optional and not var_args:
342            self._write("*, ")
343        for _, is_last, p in _position_iter(parameters):
344            if var_args and p.name == var_args[1].name:
345                self._write("*")
346            elif var_kwargs and p.name == var_kwargs.name:
347                self._write("**")
348            self._write(p.name)
349            if p.default_value:
350                self._write("=", self._format_default_value(p.default_value))
351            if not is_last:
352                self._write(", ")
353        self._write(")\n")
354        return parameters
355
356    def _render_provider(self, provider: stardoc_output_pb2.ProviderInfo):
357        self._write("::::::{bzl:provider} ", provider.provider_name, "\n")
358        if provider.origin_key:
359            self._render_origin_key_option(provider.origin_key)
360        self._write("\n")
361
362        self._write(provider.doc_string.strip(), "\n\n")
363
364        self._write(":::::{bzl:function} ")
365        provider.field_info.sort(key=lambda f: f.name)
366        self._render_signature(
367            "<init>",
368            provider.field_info,
369            get_name=lambda f: f.name,
370        )
371        # TODO: Add support for provider.init once our Bazel version supports
372        # that field
373        self._write(":::::\n")
374
375        for field in provider.field_info:
376            self._write(":::::{bzl:provider-field} ", field.name, "\n")
377            self._write(field.doc_string.strip())
378            self._write("\n")
379            self._write(":::::\n")
380        self._write("::::::\n")
381
382    def _render_attributes(self, attributes: list[stardoc_output_pb2.AttributeInfo]):
383        for attr in attributes:
384            attr_type = self._rule_attr_type_string(attr)
385            self._write(f":attr {attr.name}:\n")
386            if attr.default_value:
387                self._write("  {bzl:default-value}`%s`\n" % attr.default_value)
388            self._write("  {type}`%s`\n" % attr_type)
389            self._write("  ", _indent_block_text(attr.doc_string), "\n")
390            self._write("  :::{bzl:attr-info} Info\n")
391            if attr.mandatory:
392                self._write("  :mandatory:\n")
393            self._write("  :::\n")
394            self._write("\n")
395
396            if attr.provider_name_group:
397                self._write("  {required-providers}`")
398                for _, outer_is_last, provider_group in _position_iter(
399                    attr.provider_name_group
400                ):
401                    pairs = list(
402                        zip(
403                            provider_group.origin_key,
404                            provider_group.provider_name,
405                            strict=True,
406                        )
407                    )
408                    if len(pairs) > 1:
409                        self._write("[")
410                    for _, inner_is_last, (origin_key, name) in _position_iter(pairs):
411                        if origin_key.file == "<native>":
412                            origin = origin_key.name
413                        else:
414                            origin = f"{origin_key.file}%{origin_key.name}"
415                        # We have to use "title <ref>" syntax because the same
416                        # name might map to different origins. Stardoc gives us
417                        # the provider's actual name, not the name of the symbol
418                        # used in the source.
419                        self._write(f"'{name} <{origin}>'")
420                        if not inner_is_last:
421                            self._write(", ")
422
423                    if len(pairs) > 1:
424                        self._write("]")
425
426                    if not outer_is_last:
427                        self._write(" | ")
428                self._write("`\n")
429
430            self._write("\n")
431
432    def _render_signature(
433        self,
434        name: str,
435        parameters: list[_T],
436        *,
437        get_name: Callable[_T, str],
438        get_default: Callable[_T, str] = lambda v: None,
439    ):
440        self._write(name, "(")
441        for _, is_last, param in _position_iter(parameters):
442            param_name = get_name(param)
443            self._write(f"{param_name}")
444            default_value = get_default(param)
445            if default_value:
446                default_value = self._format_default_value(default_value)
447                self._write(f"={default_value}")
448            if not is_last:
449                self._write(", ")
450        self._write(")\n\n")
451
452    def _render_origin_key_option(self, origin_key, indent=""):
453        self._write(
454            indent,
455            ":origin-key: ",
456            self._format_option_value(f"{origin_key.file}%{origin_key.name}"),
457            "\n",
458        )
459
460    def _format_default_value(self, default_value):
461        # Handle <function foo from //baz:bar.bzl>
462        # For now, just use quotes for lack of a better option
463        if default_value.startswith("<"):
464            return f"'{default_value}'"
465        elif default_value.startswith("Label("):
466            # Handle Label(*, "@some//label:target")
467            start_quote = default_value.find('"')
468            end_quote = default_value.rfind('"')
469            return default_value[start_quote : end_quote + 1]
470        else:
471            return default_value
472
473    def _format_option_value(self, value):
474        # Leading @ symbols are special markup; escape them.
475        if value.startswith("@"):
476            return "\\" + value
477        else:
478            return value
479
480    def _write(self, *lines: str):
481        self._out_stream.writelines(lines)
482
483
484def _convert(
485    *,
486    proto: pathlib.Path,
487    output: pathlib.Path,
488    public_load_path: str,
489):
490    module = stardoc_output_pb2.ModuleInfo.FromString(proto.read_bytes())
491    with output.open("wt", encoding="utf8") as out_stream:
492        _MySTRenderer(module, out_stream, public_load_path).render()
493
494
495def _create_parser():
496    parser = argparse.ArgumentParser(fromfile_prefix_chars="@")
497    parser.add_argument("--proto", dest="proto", type=pathlib.Path)
498    parser.add_argument("--output", dest="output", type=pathlib.Path)
499    parser.add_argument("--public-load-path", dest="public_load_path")
500    return parser
501
502
503def main(args):
504    options = _create_parser().parse_args(args)
505    _convert(
506        proto=options.proto,
507        output=options.output,
508        public_load_path=options.public_load_path,
509    )
510    return 0
511
512
513if __name__ == "__main__":
514    sys.exit(main(sys.argv[1:]))
515