xref: /aosp_15_r20/external/pigweed/pw_presubmit/py/pw_presubmit/owners_checks.py (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1# Copyright 2020 The Pigweed Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may not
4# use this file except in compliance with the License. You may obtain a copy of
5# the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations under
13# the License.
14#
15"""OWNERS file checks."""
16
17import argparse
18import collections
19import dataclasses
20import difflib
21import enum
22import functools
23import logging
24import pathlib
25import re
26import sys
27from typing import (
28    Callable,
29    Collection,
30    DefaultDict,
31    Iterable,
32    OrderedDict,
33    Set,
34)
35
36from pw_presubmit import git_repo
37from pw_presubmit.presubmit_context import PresubmitFailure
38
39_LOG = logging.getLogger(__name__)
40
41
42class LineType(enum.Enum):
43    COMMENT = "comment"
44    WILDCARD = "wildcard"
45    FILE_LEVEL = "file_level"
46    FILE_RULE = "file_rule"
47    INCLUDE = "include"
48    PER_FILE = "per-file"
49    USER = "user"
50    # Special type to hold lines that don't get attached to another type
51    TRAILING_COMMENTS = "trailing-comments"
52
53
54_LINE_TYPERS: OrderedDict[
55    LineType, Callable[[str], bool]
56] = collections.OrderedDict(
57    (
58        (LineType.COMMENT, lambda x: x.startswith("#")),
59        (LineType.WILDCARD, lambda x: x == "*"),
60        (LineType.FILE_LEVEL, lambda x: x.startswith("set ")),
61        (LineType.FILE_RULE, lambda x: x.startswith("file:")),
62        (LineType.INCLUDE, lambda x: x.startswith("include ")),
63        (LineType.PER_FILE, lambda x: x.startswith("per-file ")),
64        (
65            LineType.USER,
66            lambda x: bool(re.match("^[a-zA-Z1-9.+-]+@[a-zA-Z0-9.-]+", x)),
67        ),
68    )
69)
70
71
72class OwnersError(Exception):
73    """Generic level OWNERS file error."""
74
75
76class FormatterError(OwnersError):
77    """Errors where formatter doesn't know how to act."""
78
79
80class OwnersDuplicateError(OwnersError):
81    """Errors where duplicate lines are found in OWNERS files."""
82
83
84class OwnersUserGrantError(OwnersError):
85    """Invalid user grant, * is used with any other grant."""
86
87
88class OwnersProhibitedError(OwnersError):
89    """Any line that is prohibited by the owners syntax.
90
91    https://android-review.googlesource.com/plugins/code-owners/Documentation/backend-find-owners.html
92    """
93
94
95class OwnersDependencyError(OwnersError):
96    """OWNERS file tried to import file that does not exists."""
97
98
99class OwnersInvalidLineError(OwnersError):
100    """Line in OWNERS file does not match any 'line_typer'."""
101
102
103class OwnersStyleError(OwnersError):
104    """OWNERS file does not match style guide."""
105
106
107@dataclasses.dataclass
108class Line:
109    content: str
110    comments: list[str] = dataclasses.field(default_factory=list)
111
112
113class OwnersFile:
114    """Holds OWNERS file in easy to use parsed structure."""
115
116    path: pathlib.Path
117    original_lines: list[str]
118    sections: dict[LineType, list[Line]]
119    formatted_lines: list[str]
120
121    def __init__(self, path: pathlib.Path) -> None:
122        if not path.exists():
123            raise OwnersDependencyError(
124                f"Tried to import {path} but it does not exist"
125            )
126        self.path = path
127
128        self.original_lines = self.load_owners_file(self.path)
129        cleaned_lines = self.clean_lines(self.original_lines)
130        self.sections = self.parse_owners(cleaned_lines)
131        self.formatted_lines = self.format_sections(self.sections)
132
133    @staticmethod
134    def load_owners_file(owners_file: pathlib.Path) -> list[str]:
135        return owners_file.read_text().split("\n")
136
137    @staticmethod
138    def clean_lines(dirty_lines: list[str]) -> list[str]:
139        """Removes extra whitespace from list of strings."""
140
141        cleaned_lines = []
142        for line in dirty_lines:
143            line = line.strip()  # Remove initial and trailing whitespace
144
145            # Compress duplicated whitespace and remove tabs.
146            # Allow comment lines to break this rule as they may have initial
147            # whitespace for lining up text with neighboring lines.
148            if not line.startswith("#"):
149                line = re.sub(r"\s+", " ", line)
150            if line:
151                cleaned_lines.append(line)
152        return cleaned_lines
153
154    @staticmethod
155    def __find_line_type(line: str) -> LineType:
156        for line_type, type_matcher in _LINE_TYPERS.items():
157            if type_matcher(line):
158                return line_type
159
160        raise OwnersInvalidLineError(
161            f"Unrecognized OWNERS file line, '{line}'."
162        )
163
164    @staticmethod
165    def parse_owners(
166        cleaned_lines: list[str],
167    ) -> DefaultDict[LineType, list[Line]]:
168        """Converts text lines of OWNERS into structured object."""
169        sections: DefaultDict[LineType, list[Line]] = collections.defaultdict(
170            list
171        )
172        comment_buffer: list[str] = []
173
174        def add_line_to_sections(
175            sections, section: LineType, line: str, comment_buffer: list[str]
176        ):
177            if any(
178                seen_line.content == line for seen_line in sections[section]
179            ):
180                raise OwnersDuplicateError(f"Duplicate line '{line}'.")
181            line_obj = Line(content=line, comments=comment_buffer)
182            sections[section].append(line_obj)
183
184        for line in cleaned_lines:
185            line_type: LineType = OwnersFile.__find_line_type(line)
186            if line_type == LineType.COMMENT:
187                comment_buffer.append(line)
188            else:
189                add_line_to_sections(sections, line_type, line, comment_buffer)
190                comment_buffer = []
191
192        add_line_to_sections(
193            sections, LineType.TRAILING_COMMENTS, "", comment_buffer
194        )
195
196        return sections
197
198    @staticmethod
199    def format_sections(
200        sections: DefaultDict[LineType, list[Line]]
201    ) -> list[str]:
202        """Returns ideally styled OWNERS file.
203
204        The styling rules are
205        * Content will be sorted in the following orders with a blank line
206        separating
207            * "set noparent"
208            * "include" lines
209            * "file:" lines
210            * user grants (example, "*", [email protected])
211            * "per-file:" lines
212        * Do not combine user grants and "*"
213        * User grants should be sorted alphabetically (this assumes English
214        ordering)
215
216        Returns:
217          List of strings that make up a styled version of a OWNERS file.
218
219        Raises:
220          FormatterError: When formatter does not handle all lines of input.
221                          This is a coding error in owners_checks.
222        """
223        all_sections = [
224            LineType.FILE_LEVEL,
225            LineType.INCLUDE,
226            LineType.FILE_RULE,
227            LineType.WILDCARD,
228            LineType.USER,
229            LineType.PER_FILE,
230            LineType.TRAILING_COMMENTS,
231        ]
232        formatted_lines: list[str] = []
233
234        def append_section(line_type):
235            # Add a line of separation if there was a previous section and our
236            # current section has any content. I.e. do not lead with padding and
237            # do not have multiple successive lines of padding.
238            if (
239                formatted_lines
240                and line_type != LineType.TRAILING_COMMENTS
241                and sections[line_type]
242            ):
243                formatted_lines.append("")
244
245            sections[line_type].sort(key=lambda line: line.content)
246            for line in sections[line_type]:
247                # Strip keep-sorted comments out since sorting is done by this
248                # script
249                formatted_lines.extend(
250                    [
251                        comment
252                        for comment in line.comments
253                        if not comment.startswith("# keep-sorted: ")
254                    ]
255                )
256                formatted_lines.append(line.content)
257
258        for section in all_sections:
259            append_section(section)
260
261        if any(section_name not in all_sections for section_name in sections):
262            raise FormatterError("Formatter did not process all sections.")
263        return formatted_lines
264
265    def check_style(self) -> None:
266        """Checks styling of OWNERS file.
267
268        Enforce consistent style on OWNERS file. This also incidentally detects
269        a few classes of errors.
270
271        Raises:
272          OwnersStyleError: Indicates styled lines do not match original input.
273        """
274
275        if self.original_lines != self.formatted_lines:
276            print(
277                "\n".join(
278                    difflib.unified_diff(
279                        self.original_lines,
280                        self.formatted_lines,
281                        fromfile=str(self.path),
282                        tofile="styled",
283                        lineterm="",
284                    )
285                )
286            )
287
288            raise OwnersStyleError(
289                "OWNERS file format does not follow styling."
290            )
291
292    def look_for_owners_errors(self) -> None:
293        """Scans owners files for invalid or useless content."""
294
295        # Confirm when using the wildcard("*") we don't also try to use
296        # individual user grants.
297        if self.sections[LineType.WILDCARD] and self.sections[LineType.USER]:
298            raise OwnersUserGrantError(
299                "Do not use '*' with individual user "
300                "grants, * already applies to all users."
301            )
302
303        # NOTE: Using the include keyword in combination with a per-file rule is
304        # not possible.
305        # https://android-review.googlesource.com/plugins/code-owners/Documentation/backend-find-owners.html#syntax:~:text=NOTE%3A%20Using%20the%20include%20keyword%20in%20combination%20with%20a%20per%2Dfile%20rule%20is%20not%20possible.
306        if self.sections[LineType.INCLUDE] and self.sections[LineType.PER_FILE]:
307            raise OwnersProhibitedError(
308                "'include' cannot be used with 'per-file'."
309            )
310
311    def __complete_path(self, sub_owners_file_path) -> pathlib.Path:
312        """Always return absolute path."""
313        # Absolute paths start with the git/project root
314        if sub_owners_file_path.startswith("/"):
315            root = git_repo.root(self.path)
316            full_path = root / sub_owners_file_path[1:]
317        else:
318            # Relative paths start with owners file dir
319            full_path = self.path.parent / sub_owners_file_path
320        return full_path.resolve()
321
322    def get_dependencies(self) -> list[pathlib.Path]:
323        """Finds owners files this file includes."""
324        dependencies = []
325        # All the includes
326        for include in self.sections.get(LineType.INCLUDE, []):
327            file_str = include.content[len("include ") :]
328            dependencies.append(self.__complete_path(file_str))
329
330        # all file: rules:
331        for file_rule in self.sections.get(LineType.FILE_RULE, []):
332            file_str = file_rule.content[len("file:") :]
333            path = self.__complete_path(file_str)
334            if ":" in file_str and not path.is_file():
335                _LOG.warning(
336                    "TODO: b/254322931 - This check does not yet support "
337                    "<project> or <branch> in a file: rule"
338                )
339                _LOG.warning(
340                    "It will not check line '%s' found in %s",
341                    file_rule.content,
342                    self.path,
343                )
344
345            else:
346                dependencies.append(path)
347
348        # all the per-file rule includes
349        for per_file in self.sections.get(LineType.PER_FILE, []):
350            file_str = per_file.content[len("per-file ") :]
351            access_grant = file_str[file_str.index("=") + 1 :]
352            if access_grant.startswith("file:"):
353                dependencies.append(
354                    self.__complete_path(access_grant[len("file:") :])
355                )
356
357        return dependencies
358
359    def write_formatted(self) -> None:
360        self.path.write_text("\n".join(self.formatted_lines))
361
362
363def resolve_owners_tree(root_owners: pathlib.Path) -> list[OwnersFile]:
364    """Given a starting OWNERS file return it and all of it's dependencies."""
365    found = []
366    todo = collections.deque((root_owners,))
367    checked: Set[pathlib.Path] = set()
368    while todo:
369        cur_file = todo.popleft()
370        checked.add(cur_file)
371        owners_obj = OwnersFile(cur_file)
372        found.append(owners_obj)
373        new_dependents = owners_obj.get_dependencies()
374        for new_dep in new_dependents:
375            if new_dep not in checked and new_dep not in todo:
376                todo.append(new_dep)
377    return found
378
379
380def _run_owners_checks(owners_obj: OwnersFile) -> None:
381    owners_obj.look_for_owners_errors()
382    owners_obj.check_style()
383
384
385def _format_owners_file(owners_obj: OwnersFile) -> None:
386    owners_obj.look_for_owners_errors()
387
388    if owners_obj.original_lines != owners_obj.formatted_lines:
389        owners_obj.write_formatted()
390
391
392def _list_unwrapper(
393    func: Callable[[OwnersFile], None],
394    list_or_path: Iterable[pathlib.Path] | pathlib.Path,
395) -> dict[pathlib.Path, str]:
396    """Decorator that accepts Paths or list of Paths and iterates as needed."""
397    errors: dict[pathlib.Path, str] = {}
398    if isinstance(list_or_path, Iterable):
399        files = list_or_path
400    else:
401        files = (list_or_path,)
402
403    all_owners_obj: list[OwnersFile] = []
404    for file in files:
405        all_owners_obj.extend(resolve_owners_tree(file))
406
407    checked: Set[pathlib.Path] = set()
408    for current_owners in all_owners_obj:
409        # Ensure we don't check the same file twice
410        if current_owners.path in checked:
411            continue
412        checked.add(current_owners.path)
413        try:
414            func(current_owners)
415        except OwnersError as err:
416            errors[current_owners.path] = str(err)
417            _LOG.error("%s: %s", current_owners.path.absolute(), err)
418    return errors
419
420
421# This generates decorated versions of the functions that can used with both
422# formatter (which supplies files one at a time) and presubmits (which supplies
423# list of files).
424run_owners_checks = functools.partial(_list_unwrapper, _run_owners_checks)
425format_owners_file = functools.partial(_list_unwrapper, _format_owners_file)
426
427
428def presubmit_check(files: pathlib.Path | Collection[pathlib.Path]) -> None:
429    errors = run_owners_checks(files)
430    if errors:
431        for file in errors:
432            _LOG.warning("  pw format --fix %s", file)
433        _LOG.warning("will automatically fix this.")
434        raise PresubmitFailure
435
436
437def main() -> int:
438    """Standalone test of styling."""
439    parser = argparse.ArgumentParser()
440    parser.add_argument("--style", action="store_true")
441    parser.add_argument("--owners_file", required=True, type=str)
442    args = parser.parse_args()
443
444    try:
445        owners_obj = OwnersFile(pathlib.Path(args.owners_file))
446        owners_obj.look_for_owners_errors()
447        owners_obj.check_style()
448    except OwnersError as err:
449        _LOG.error("%s", err)
450        return 1
451    return 0
452
453
454if __name__ == "__main__":
455    sys.exit(main())
456