xref: /aosp_15_r20/external/google-cloud-java/owl-bot-postprocessor/synthtool/gcp/snippets.py (revision 55e87721aa1bc457b326496a7ca40f3ea1a63287)
1*55e87721SMatt Gilbride# Copyright 2020 Google LLC
2*55e87721SMatt Gilbride#
3*55e87721SMatt Gilbride# Licensed under the Apache License, Version 2.0 (the "License");
4*55e87721SMatt Gilbride# you may not use this file except in compliance with the License.
5*55e87721SMatt Gilbride# You may obtain a copy of the License at
6*55e87721SMatt Gilbride#
7*55e87721SMatt Gilbride#     https://www.apache.org/licenses/LICENSE-2.0
8*55e87721SMatt Gilbride#
9*55e87721SMatt Gilbride# Unless required by applicable law or agreed to in writing, software
10*55e87721SMatt Gilbride# distributed under the License is distributed on an "AS IS" BASIS,
11*55e87721SMatt Gilbride# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*55e87721SMatt Gilbride# See the License for the specific language governing permissions and
13*55e87721SMatt Gilbride# limitations under the License.
14*55e87721SMatt Gilbride
15*55e87721SMatt Gilbrideimport glob
16*55e87721SMatt Gilbrideimport os
17*55e87721SMatt Gilbrideimport re
18*55e87721SMatt Gilbridefrom typing import Dict, List
19*55e87721SMatt Gilbride
20*55e87721SMatt GilbrideOPEN_SNIPPET_REGEX = r".*\[START ([a-z0-9_]+)\].*$"
21*55e87721SMatt GilbrideCLOSE_SNIPPET_REGEX = r".*\[END ([a-z0-9_]+)\].*$"
22*55e87721SMatt GilbrideOPEN_EXCLUDE_REGEX = r".*\[START_EXCLUDE\].*$"
23*55e87721SMatt GilbrideCLOSE_EXCLUDE_REGEX = r".*\[END_EXCLUDE\].*$"
24*55e87721SMatt Gilbride
25*55e87721SMatt Gilbride
26*55e87721SMatt Gilbridedef _trim_leading_whitespace(lines: List[str]) -> List[str]:
27*55e87721SMatt Gilbride    """Trims leading, plain spaces from the snippet content. Finds the minimum
28*55e87721SMatt Gilbride    number of leading spaces, ignoring empty lines, and removes that number of
29*55e87721SMatt Gilbride    spaces from each line.
30*55e87721SMatt Gilbride
31*55e87721SMatt Gilbride    Args:
32*55e87721SMatt Gilbride        lines (List[str]): Lines of content. These lines are newline terminated.
33*55e87721SMatt Gilbride
34*55e87721SMatt Gilbride    Returns:
35*55e87721SMatt Gilbride        List of trimmed lines.
36*55e87721SMatt Gilbride    """
37*55e87721SMatt Gilbride
38*55e87721SMatt Gilbride    def number_of_leading_spaces(input: str) -> int:
39*55e87721SMatt Gilbride        return len(input) - len(input.lstrip(" "))
40*55e87721SMatt Gilbride
41*55e87721SMatt Gilbride    def is_empty_line(input: str) -> bool:
42*55e87721SMatt Gilbride        if re.match(r"^\s*$", input):
43*55e87721SMatt Gilbride            return True
44*55e87721SMatt Gilbride        return False
45*55e87721SMatt Gilbride
46*55e87721SMatt Gilbride    leading_spaces = [
47*55e87721SMatt Gilbride        number_of_leading_spaces(line) for line in lines if not is_empty_line(line)
48*55e87721SMatt Gilbride    ]
49*55e87721SMatt Gilbride    max_leading_spaces = min(leading_spaces)
50*55e87721SMatt Gilbride    return [
51*55e87721SMatt Gilbride        "\n" if is_empty_line(line) else line[max_leading_spaces:] for line in lines
52*55e87721SMatt Gilbride    ]
53*55e87721SMatt Gilbride
54*55e87721SMatt Gilbride
55*55e87721SMatt Gilbridedef all_snippets_from_file(sample_file: str) -> Dict[str, str]:
56*55e87721SMatt Gilbride    """Reads in a sample file and parse out all contained snippets.
57*55e87721SMatt Gilbride
58*55e87721SMatt Gilbride    Args:
59*55e87721SMatt Gilbride        sample_file (str): Sample file to parse.
60*55e87721SMatt Gilbride
61*55e87721SMatt Gilbride    Returns:
62*55e87721SMatt Gilbride        Dictionary of snippet name to snippet code.
63*55e87721SMatt Gilbride    """
64*55e87721SMatt Gilbride    if not os.path.exists(sample_file):
65*55e87721SMatt Gilbride        return {}
66*55e87721SMatt Gilbride
67*55e87721SMatt Gilbride    snippet_lines = {}  # type: Dict[str, List[str]]
68*55e87721SMatt Gilbride    open_snippets = set()
69*55e87721SMatt Gilbride    with open(sample_file) as f:
70*55e87721SMatt Gilbride        excluding = False
71*55e87721SMatt Gilbride        # Iterate over each line:
72*55e87721SMatt Gilbride        # - If the line matches an opening snippet tag, add that snippet tag to
73*55e87721SMatt Gilbride        #   the set of open tags.
74*55e87721SMatt Gilbride        # - If the line matches a closing snippet tag, remove that snippet tag
75*55e87721SMatt Gilbride        #   from the set of open tags.
76*55e87721SMatt Gilbride        # - If the line matches an opening exclude tag, record that we excluding
77*55e87721SMatt Gilbride        #   content.
78*55e87721SMatt Gilbride        # - If the line matches a closing exclude tag, record that we are capturing
79*55e87721SMatt Gilbride        #   content again.
80*55e87721SMatt Gilbride        # - Otherwise, if we are not excluding content, add the line to each of the
81*55e87721SMatt Gilbride        #   open snippets
82*55e87721SMatt Gilbride        #
83*55e87721SMatt Gilbride        # This allows us to handle parsing nested or interleaved snippets and ignore
84*55e87721SMatt Gilbride        # blocks of code in the snippets
85*55e87721SMatt Gilbride        for line in f:
86*55e87721SMatt Gilbride            open_match = re.match(pattern=OPEN_SNIPPET_REGEX, string=line)
87*55e87721SMatt Gilbride            close_match = re.match(pattern=CLOSE_SNIPPET_REGEX, string=line)
88*55e87721SMatt Gilbride            open_exclude_match = re.match(pattern=OPEN_EXCLUDE_REGEX, string=line)
89*55e87721SMatt Gilbride            close_exclude_match = re.match(pattern=CLOSE_EXCLUDE_REGEX, string=line)
90*55e87721SMatt Gilbride            if open_match and not excluding:
91*55e87721SMatt Gilbride                open_snippets.add(open_match[1])
92*55e87721SMatt Gilbride                if not open_match[1] in snippet_lines:
93*55e87721SMatt Gilbride                    snippet_lines[open_match[1]] = []
94*55e87721SMatt Gilbride            elif close_match and not excluding:
95*55e87721SMatt Gilbride                open_snippets.discard(close_match[1])
96*55e87721SMatt Gilbride            elif open_exclude_match:
97*55e87721SMatt Gilbride                excluding = True
98*55e87721SMatt Gilbride            elif close_exclude_match:
99*55e87721SMatt Gilbride                excluding = False
100*55e87721SMatt Gilbride            elif not excluding:
101*55e87721SMatt Gilbride                for snippet in open_snippets:
102*55e87721SMatt Gilbride                    snippet_lines[snippet].append(line)
103*55e87721SMatt Gilbride
104*55e87721SMatt Gilbride    return {
105*55e87721SMatt Gilbride        snippet: "".join(_trim_leading_whitespace(lines))
106*55e87721SMatt Gilbride        for snippet, lines in snippet_lines.items()
107*55e87721SMatt Gilbride    }
108*55e87721SMatt Gilbride
109*55e87721SMatt Gilbride
110*55e87721SMatt Gilbridedef all_snippets(snippet_globs: List[str]) -> Dict[str, str]:
111*55e87721SMatt Gilbride    """Walks the samples directory and parses snippets from each file.
112*55e87721SMatt Gilbride
113*55e87721SMatt Gilbride    Args:
114*55e87721SMatt Gilbride        snippet_globs (List[str]): List of path globs to expand.
115*55e87721SMatt Gilbride
116*55e87721SMatt Gilbride    Returns:
117*55e87721SMatt Gilbride        Dictionary of snippet name to snippet code.
118*55e87721SMatt Gilbride    """
119*55e87721SMatt Gilbride    snippets = {}
120*55e87721SMatt Gilbride    for snippet_glob in snippet_globs:
121*55e87721SMatt Gilbride        for file in glob.glob(snippet_glob, recursive=True):
122*55e87721SMatt Gilbride            for snippet, code in all_snippets_from_file(file).items():
123*55e87721SMatt Gilbride                snippets[snippet] = code
124*55e87721SMatt Gilbride    return snippets
125