xref: /aosp_15_r20/external/google-cloud-java/owl-bot-postprocessor/synthtool/gcp/snippets.py (revision 55e87721aa1bc457b326496a7ca40f3ea1a63287)
1# Copyright 2020 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import glob
16import os
17import re
18from typing import Dict, List
19
20OPEN_SNIPPET_REGEX = r".*\[START ([a-z0-9_]+)\].*$"
21CLOSE_SNIPPET_REGEX = r".*\[END ([a-z0-9_]+)\].*$"
22OPEN_EXCLUDE_REGEX = r".*\[START_EXCLUDE\].*$"
23CLOSE_EXCLUDE_REGEX = r".*\[END_EXCLUDE\].*$"
24
25
26def _trim_leading_whitespace(lines: List[str]) -> List[str]:
27    """Trims leading, plain spaces from the snippet content. Finds the minimum
28    number of leading spaces, ignoring empty lines, and removes that number of
29    spaces from each line.
30
31    Args:
32        lines (List[str]): Lines of content. These lines are newline terminated.
33
34    Returns:
35        List of trimmed lines.
36    """
37
38    def number_of_leading_spaces(input: str) -> int:
39        return len(input) - len(input.lstrip(" "))
40
41    def is_empty_line(input: str) -> bool:
42        if re.match(r"^\s*$", input):
43            return True
44        return False
45
46    leading_spaces = [
47        number_of_leading_spaces(line) for line in lines if not is_empty_line(line)
48    ]
49    max_leading_spaces = min(leading_spaces)
50    return [
51        "\n" if is_empty_line(line) else line[max_leading_spaces:] for line in lines
52    ]
53
54
55def all_snippets_from_file(sample_file: str) -> Dict[str, str]:
56    """Reads in a sample file and parse out all contained snippets.
57
58    Args:
59        sample_file (str): Sample file to parse.
60
61    Returns:
62        Dictionary of snippet name to snippet code.
63    """
64    if not os.path.exists(sample_file):
65        return {}
66
67    snippet_lines = {}  # type: Dict[str, List[str]]
68    open_snippets = set()
69    with open(sample_file) as f:
70        excluding = False
71        # Iterate over each line:
72        # - If the line matches an opening snippet tag, add that snippet tag to
73        #   the set of open tags.
74        # - If the line matches a closing snippet tag, remove that snippet tag
75        #   from the set of open tags.
76        # - If the line matches an opening exclude tag, record that we excluding
77        #   content.
78        # - If the line matches a closing exclude tag, record that we are capturing
79        #   content again.
80        # - Otherwise, if we are not excluding content, add the line to each of the
81        #   open snippets
82        #
83        # This allows us to handle parsing nested or interleaved snippets and ignore
84        # blocks of code in the snippets
85        for line in f:
86            open_match = re.match(pattern=OPEN_SNIPPET_REGEX, string=line)
87            close_match = re.match(pattern=CLOSE_SNIPPET_REGEX, string=line)
88            open_exclude_match = re.match(pattern=OPEN_EXCLUDE_REGEX, string=line)
89            close_exclude_match = re.match(pattern=CLOSE_EXCLUDE_REGEX, string=line)
90            if open_match and not excluding:
91                open_snippets.add(open_match[1])
92                if not open_match[1] in snippet_lines:
93                    snippet_lines[open_match[1]] = []
94            elif close_match and not excluding:
95                open_snippets.discard(close_match[1])
96            elif open_exclude_match:
97                excluding = True
98            elif close_exclude_match:
99                excluding = False
100            elif not excluding:
101                for snippet in open_snippets:
102                    snippet_lines[snippet].append(line)
103
104    return {
105        snippet: "".join(_trim_leading_whitespace(lines))
106        for snippet, lines in snippet_lines.items()
107    }
108
109
110def all_snippets(snippet_globs: List[str]) -> Dict[str, str]:
111    """Walks the samples directory and parses snippets from each file.
112
113    Args:
114        snippet_globs (List[str]): List of path globs to expand.
115
116    Returns:
117        Dictionary of snippet name to snippet code.
118    """
119    snippets = {}
120    for snippet_glob in snippet_globs:
121        for file in glob.glob(snippet_glob, recursive=True):
122            for snippet, code in all_snippets_from_file(file).items():
123                snippets[snippet] = code
124    return snippets
125