1# Copyright 2020 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14# 15"""OWNERS file checks.""" 16 17import argparse 18import collections 19import dataclasses 20import difflib 21import enum 22import functools 23import logging 24import pathlib 25import re 26import sys 27from typing import ( 28 Callable, 29 Collection, 30 DefaultDict, 31 Iterable, 32 OrderedDict, 33 Set, 34) 35 36from pw_presubmit import git_repo 37from pw_presubmit.presubmit_context import PresubmitFailure 38 39_LOG = logging.getLogger(__name__) 40 41 42class LineType(enum.Enum): 43 COMMENT = "comment" 44 WILDCARD = "wildcard" 45 FILE_LEVEL = "file_level" 46 FILE_RULE = "file_rule" 47 INCLUDE = "include" 48 PER_FILE = "per-file" 49 USER = "user" 50 # Special type to hold lines that don't get attached to another type 51 TRAILING_COMMENTS = "trailing-comments" 52 53 54_LINE_TYPERS: OrderedDict[ 55 LineType, Callable[[str], bool] 56] = collections.OrderedDict( 57 ( 58 (LineType.COMMENT, lambda x: x.startswith("#")), 59 (LineType.WILDCARD, lambda x: x == "*"), 60 (LineType.FILE_LEVEL, lambda x: x.startswith("set ")), 61 (LineType.FILE_RULE, lambda x: x.startswith("file:")), 62 (LineType.INCLUDE, lambda x: x.startswith("include ")), 63 (LineType.PER_FILE, lambda x: x.startswith("per-file ")), 64 ( 65 LineType.USER, 66 lambda x: bool(re.match("^[a-zA-Z1-9.+-]+@[a-zA-Z0-9.-]+", x)), 67 ), 68 ) 69) 70 71 72class OwnersError(Exception): 73 """Generic level OWNERS file error.""" 74 75 76class FormatterError(OwnersError): 77 """Errors where formatter doesn't know how to act.""" 78 79 80class OwnersDuplicateError(OwnersError): 81 """Errors where duplicate lines are found in OWNERS files.""" 82 83 84class OwnersUserGrantError(OwnersError): 85 """Invalid user grant, * is used with any other grant.""" 86 87 88class OwnersProhibitedError(OwnersError): 89 """Any line that is prohibited by the owners syntax. 90 91 https://android-review.googlesource.com/plugins/code-owners/Documentation/backend-find-owners.html 92 """ 93 94 95class OwnersDependencyError(OwnersError): 96 """OWNERS file tried to import file that does not exists.""" 97 98 99class OwnersInvalidLineError(OwnersError): 100 """Line in OWNERS file does not match any 'line_typer'.""" 101 102 103class OwnersStyleError(OwnersError): 104 """OWNERS file does not match style guide.""" 105 106 107@dataclasses.dataclass 108class Line: 109 content: str 110 comments: list[str] = dataclasses.field(default_factory=list) 111 112 113class OwnersFile: 114 """Holds OWNERS file in easy to use parsed structure.""" 115 116 path: pathlib.Path 117 original_lines: list[str] 118 sections: dict[LineType, list[Line]] 119 formatted_lines: list[str] 120 121 def __init__(self, path: pathlib.Path) -> None: 122 if not path.exists(): 123 raise OwnersDependencyError( 124 f"Tried to import {path} but it does not exist" 125 ) 126 self.path = path 127 128 self.original_lines = self.load_owners_file(self.path) 129 cleaned_lines = self.clean_lines(self.original_lines) 130 self.sections = self.parse_owners(cleaned_lines) 131 self.formatted_lines = self.format_sections(self.sections) 132 133 @staticmethod 134 def load_owners_file(owners_file: pathlib.Path) -> list[str]: 135 return owners_file.read_text().split("\n") 136 137 @staticmethod 138 def clean_lines(dirty_lines: list[str]) -> list[str]: 139 """Removes extra whitespace from list of strings.""" 140 141 cleaned_lines = [] 142 for line in dirty_lines: 143 line = line.strip() # Remove initial and trailing whitespace 144 145 # Compress duplicated whitespace and remove tabs. 146 # Allow comment lines to break this rule as they may have initial 147 # whitespace for lining up text with neighboring lines. 148 if not line.startswith("#"): 149 line = re.sub(r"\s+", " ", line) 150 if line: 151 cleaned_lines.append(line) 152 return cleaned_lines 153 154 @staticmethod 155 def __find_line_type(line: str) -> LineType: 156 for line_type, type_matcher in _LINE_TYPERS.items(): 157 if type_matcher(line): 158 return line_type 159 160 raise OwnersInvalidLineError( 161 f"Unrecognized OWNERS file line, '{line}'." 162 ) 163 164 @staticmethod 165 def parse_owners( 166 cleaned_lines: list[str], 167 ) -> DefaultDict[LineType, list[Line]]: 168 """Converts text lines of OWNERS into structured object.""" 169 sections: DefaultDict[LineType, list[Line]] = collections.defaultdict( 170 list 171 ) 172 comment_buffer: list[str] = [] 173 174 def add_line_to_sections( 175 sections, section: LineType, line: str, comment_buffer: list[str] 176 ): 177 if any( 178 seen_line.content == line for seen_line in sections[section] 179 ): 180 raise OwnersDuplicateError(f"Duplicate line '{line}'.") 181 line_obj = Line(content=line, comments=comment_buffer) 182 sections[section].append(line_obj) 183 184 for line in cleaned_lines: 185 line_type: LineType = OwnersFile.__find_line_type(line) 186 if line_type == LineType.COMMENT: 187 comment_buffer.append(line) 188 else: 189 add_line_to_sections(sections, line_type, line, comment_buffer) 190 comment_buffer = [] 191 192 add_line_to_sections( 193 sections, LineType.TRAILING_COMMENTS, "", comment_buffer 194 ) 195 196 return sections 197 198 @staticmethod 199 def format_sections( 200 sections: DefaultDict[LineType, list[Line]] 201 ) -> list[str]: 202 """Returns ideally styled OWNERS file. 203 204 The styling rules are 205 * Content will be sorted in the following orders with a blank line 206 separating 207 * "set noparent" 208 * "include" lines 209 * "file:" lines 210 * user grants (example, "*", [email protected]) 211 * "per-file:" lines 212 * Do not combine user grants and "*" 213 * User grants should be sorted alphabetically (this assumes English 214 ordering) 215 216 Returns: 217 List of strings that make up a styled version of a OWNERS file. 218 219 Raises: 220 FormatterError: When formatter does not handle all lines of input. 221 This is a coding error in owners_checks. 222 """ 223 all_sections = [ 224 LineType.FILE_LEVEL, 225 LineType.INCLUDE, 226 LineType.FILE_RULE, 227 LineType.WILDCARD, 228 LineType.USER, 229 LineType.PER_FILE, 230 LineType.TRAILING_COMMENTS, 231 ] 232 formatted_lines: list[str] = [] 233 234 def append_section(line_type): 235 # Add a line of separation if there was a previous section and our 236 # current section has any content. I.e. do not lead with padding and 237 # do not have multiple successive lines of padding. 238 if ( 239 formatted_lines 240 and line_type != LineType.TRAILING_COMMENTS 241 and sections[line_type] 242 ): 243 formatted_lines.append("") 244 245 sections[line_type].sort(key=lambda line: line.content) 246 for line in sections[line_type]: 247 # Strip keep-sorted comments out since sorting is done by this 248 # script 249 formatted_lines.extend( 250 [ 251 comment 252 for comment in line.comments 253 if not comment.startswith("# keep-sorted: ") 254 ] 255 ) 256 formatted_lines.append(line.content) 257 258 for section in all_sections: 259 append_section(section) 260 261 if any(section_name not in all_sections for section_name in sections): 262 raise FormatterError("Formatter did not process all sections.") 263 return formatted_lines 264 265 def check_style(self) -> None: 266 """Checks styling of OWNERS file. 267 268 Enforce consistent style on OWNERS file. This also incidentally detects 269 a few classes of errors. 270 271 Raises: 272 OwnersStyleError: Indicates styled lines do not match original input. 273 """ 274 275 if self.original_lines != self.formatted_lines: 276 print( 277 "\n".join( 278 difflib.unified_diff( 279 self.original_lines, 280 self.formatted_lines, 281 fromfile=str(self.path), 282 tofile="styled", 283 lineterm="", 284 ) 285 ) 286 ) 287 288 raise OwnersStyleError( 289 "OWNERS file format does not follow styling." 290 ) 291 292 def look_for_owners_errors(self) -> None: 293 """Scans owners files for invalid or useless content.""" 294 295 # Confirm when using the wildcard("*") we don't also try to use 296 # individual user grants. 297 if self.sections[LineType.WILDCARD] and self.sections[LineType.USER]: 298 raise OwnersUserGrantError( 299 "Do not use '*' with individual user " 300 "grants, * already applies to all users." 301 ) 302 303 # NOTE: Using the include keyword in combination with a per-file rule is 304 # not possible. 305 # https://android-review.googlesource.com/plugins/code-owners/Documentation/backend-find-owners.html#syntax:~:text=NOTE%3A%20Using%20the%20include%20keyword%20in%20combination%20with%20a%20per%2Dfile%20rule%20is%20not%20possible. 306 if self.sections[LineType.INCLUDE] and self.sections[LineType.PER_FILE]: 307 raise OwnersProhibitedError( 308 "'include' cannot be used with 'per-file'." 309 ) 310 311 def __complete_path(self, sub_owners_file_path) -> pathlib.Path: 312 """Always return absolute path.""" 313 # Absolute paths start with the git/project root 314 if sub_owners_file_path.startswith("/"): 315 root = git_repo.root(self.path) 316 full_path = root / sub_owners_file_path[1:] 317 else: 318 # Relative paths start with owners file dir 319 full_path = self.path.parent / sub_owners_file_path 320 return full_path.resolve() 321 322 def get_dependencies(self) -> list[pathlib.Path]: 323 """Finds owners files this file includes.""" 324 dependencies = [] 325 # All the includes 326 for include in self.sections.get(LineType.INCLUDE, []): 327 file_str = include.content[len("include ") :] 328 dependencies.append(self.__complete_path(file_str)) 329 330 # all file: rules: 331 for file_rule in self.sections.get(LineType.FILE_RULE, []): 332 file_str = file_rule.content[len("file:") :] 333 path = self.__complete_path(file_str) 334 if ":" in file_str and not path.is_file(): 335 _LOG.warning( 336 "TODO: b/254322931 - This check does not yet support " 337 "<project> or <branch> in a file: rule" 338 ) 339 _LOG.warning( 340 "It will not check line '%s' found in %s", 341 file_rule.content, 342 self.path, 343 ) 344 345 else: 346 dependencies.append(path) 347 348 # all the per-file rule includes 349 for per_file in self.sections.get(LineType.PER_FILE, []): 350 file_str = per_file.content[len("per-file ") :] 351 access_grant = file_str[file_str.index("=") + 1 :] 352 if access_grant.startswith("file:"): 353 dependencies.append( 354 self.__complete_path(access_grant[len("file:") :]) 355 ) 356 357 return dependencies 358 359 def write_formatted(self) -> None: 360 self.path.write_text("\n".join(self.formatted_lines)) 361 362 363def resolve_owners_tree(root_owners: pathlib.Path) -> list[OwnersFile]: 364 """Given a starting OWNERS file return it and all of it's dependencies.""" 365 found = [] 366 todo = collections.deque((root_owners,)) 367 checked: Set[pathlib.Path] = set() 368 while todo: 369 cur_file = todo.popleft() 370 checked.add(cur_file) 371 owners_obj = OwnersFile(cur_file) 372 found.append(owners_obj) 373 new_dependents = owners_obj.get_dependencies() 374 for new_dep in new_dependents: 375 if new_dep not in checked and new_dep not in todo: 376 todo.append(new_dep) 377 return found 378 379 380def _run_owners_checks(owners_obj: OwnersFile) -> None: 381 owners_obj.look_for_owners_errors() 382 owners_obj.check_style() 383 384 385def _format_owners_file(owners_obj: OwnersFile) -> None: 386 owners_obj.look_for_owners_errors() 387 388 if owners_obj.original_lines != owners_obj.formatted_lines: 389 owners_obj.write_formatted() 390 391 392def _list_unwrapper( 393 func: Callable[[OwnersFile], None], 394 list_or_path: Iterable[pathlib.Path] | pathlib.Path, 395) -> dict[pathlib.Path, str]: 396 """Decorator that accepts Paths or list of Paths and iterates as needed.""" 397 errors: dict[pathlib.Path, str] = {} 398 if isinstance(list_or_path, Iterable): 399 files = list_or_path 400 else: 401 files = (list_or_path,) 402 403 all_owners_obj: list[OwnersFile] = [] 404 for file in files: 405 all_owners_obj.extend(resolve_owners_tree(file)) 406 407 checked: Set[pathlib.Path] = set() 408 for current_owners in all_owners_obj: 409 # Ensure we don't check the same file twice 410 if current_owners.path in checked: 411 continue 412 checked.add(current_owners.path) 413 try: 414 func(current_owners) 415 except OwnersError as err: 416 errors[current_owners.path] = str(err) 417 _LOG.error("%s: %s", current_owners.path.absolute(), err) 418 return errors 419 420 421# This generates decorated versions of the functions that can used with both 422# formatter (which supplies files one at a time) and presubmits (which supplies 423# list of files). 424run_owners_checks = functools.partial(_list_unwrapper, _run_owners_checks) 425format_owners_file = functools.partial(_list_unwrapper, _format_owners_file) 426 427 428def presubmit_check(files: pathlib.Path | Collection[pathlib.Path]) -> None: 429 errors = run_owners_checks(files) 430 if errors: 431 for file in errors: 432 _LOG.warning(" pw format --fix %s", file) 433 _LOG.warning("will automatically fix this.") 434 raise PresubmitFailure 435 436 437def main() -> int: 438 """Standalone test of styling.""" 439 parser = argparse.ArgumentParser() 440 parser.add_argument("--style", action="store_true") 441 parser.add_argument("--owners_file", required=True, type=str) 442 args = parser.parse_args() 443 444 try: 445 owners_obj = OwnersFile(pathlib.Path(args.owners_file)) 446 owners_obj.look_for_owners_errors() 447 owners_obj.check_style() 448 except OwnersError as err: 449 _LOG.error("%s", err) 450 return 1 451 return 0 452 453 454if __name__ == "__main__": 455 sys.exit(main()) 456