synthtool/gcp/snippets.py

# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import glob
import os
import re
from typing import Dict, List

OPEN_SNIPPET_REGEX = r".*\[START ([a-z0-9_]+)\].*$"
CLOSE_SNIPPET_REGEX = r".*\[END ([a-z0-9_]+)\].*$"
OPEN_EXCLUDE_REGEX = r".*\[START_EXCLUDE\].*$"
CLOSE_EXCLUDE_REGEX = r".*\[END_EXCLUDE\].*$"


def _trim_leading_whitespace(lines: List[str]) -> List[str]:
    """Trims leading, plain spaces from the snippet content. Finds the minimum
    number of leading spaces, ignoring empty lines, and removes that number of
    spaces from each line.

    Args:
        lines (List[str]): Lines of content. These lines are newline terminated.

    Returns:
        List of trimmed lines.
    """

    def number_of_leading_spaces(input: str) -> int:
        return len(input) - len(input.lstrip(" "))

    def is_empty_line(input: str) -> bool:
        if re.match(r"^\s*$", input):
            return True
        return False

    leading_spaces = [
        number_of_leading_spaces(line) for line in lines if not is_empty_line(line)
    ]
    max_leading_spaces = min(leading_spaces)
    return [
        "\n" if is_empty_line(line) else line[max_leading_spaces:] for line in lines
    ]


def all_snippets_from_file(sample_file: str) -> Dict[str, str]:
    """Reads in a sample file and parse out all contained snippets.

    Args:
        sample_file (str): Sample file to parse.

    Returns:
        Dictionary of snippet name to snippet code.
    """
    if not os.path.exists(sample_file):
        return {}

    snippet_lines = {}  # type: Dict[str, List[str]]
    open_snippets = set()
    with open(sample_file) as f:
        excluding = False
        # Iterate over each line:
        # - If the line matches an opening snippet tag, add that snippet tag to
        #   the set of open tags.
        # - If the line matches a closing snippet tag, remove that snippet tag
        #   from the set of open tags.
        # - If the line matches an opening exclude tag, record that we excluding
        #   content.
        # - If the line matches a closing exclude tag, record that we are capturing
        #   content again.
        # - Otherwise, if we are not excluding content, add the line to each of the
        #   open snippets
        #
        # This allows us to handle parsing nested or interleaved snippets and ignore
        # blocks of code in the snippets
        for line in f:
            open_match = re.match(pattern=OPEN_SNIPPET_REGEX, string=line)
            close_match = re.match(pattern=CLOSE_SNIPPET_REGEX, string=line)
            open_exclude_match = re.match(pattern=OPEN_EXCLUDE_REGEX, string=line)
            close_exclude_match = re.match(pattern=CLOSE_EXCLUDE_REGEX, string=line)
            if open_match and not excluding:
                open_snippets.add(open_match[1])
                if not open_match[1] in snippet_lines:
                    snippet_lines[open_match[1]] = []
            elif close_match and not excluding:
                open_snippets.discard(close_match[1])
            elif open_exclude_match:
                excluding = True
            elif close_exclude_match:
                excluding = False
            elif not excluding:
                for snippet in open_snippets:
                    snippet_lines[snippet].append(line)

    return {
        snippet: "".join(_trim_leading_whitespace(lines))
        for snippet, lines in snippet_lines.items()
    }


def all_snippets(snippet_globs: List[str]) -> Dict[str, str]:
    """Walks the samples directory and parses snippets from each file.

    Args:
        snippet_globs (List[str]): List of path globs to expand.

    Returns:
        Dictionary of snippet name to snippet code.
    """
    snippets = {}
    for snippet_glob in snippet_globs:
        for file in glob.glob(snippet_glob, recursive=True):
            for snippet, code in all_snippets_from_file(file).items():
                snippets[snippet] = code
    return snippets