1# Copyright 2024 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14"""Formatting library core.""" 15 16import abc 17from dataclasses import dataclass 18import difflib 19import logging 20from pathlib import Path 21from typing import Callable, Iterable, Iterator 22 23from pw_cli.tool_runner import ToolRunner, BasicSubprocessRunner 24 25 26_LOG: logging.Logger = logging.getLogger(__name__) 27 28 29def _ensure_newline(orig: str) -> str: 30 """Adds a warning and newline to any file without a trailing newline.""" 31 32 if orig.endswith('\n'): 33 return orig 34 return orig + '\nNo newline at end of file\n' 35 36 37def simple_diff(path: Path, original: str, formatted: str) -> str: 38 """Produces a diff of the contents of two files.""" 39 40 original = _ensure_newline(original) 41 formatted = _ensure_newline(formatted) 42 return ''.join( 43 difflib.unified_diff( 44 original.splitlines(keepends=True), 45 formatted.splitlines(keepends=True), 46 f'{path} (original)', 47 f'{path} (reformatted)', 48 ) 49 ) 50 51 52DiffCallback = Callable[[Path, str, str], str] 53"""The callback type for producing diffs. 54 55Arugments: 56 path: File path of the file being diffed. 57 original: The contents of the original file, as a string. 58 formatted: The contents of the formatted file, as a string. 59 60Returns: 61 A human readable diff as a string. 62""" 63 64 65@dataclass(frozen=True) 66class FormattedFileContents: 67 """The result of running a code formatter on the contents of a file. 68 69 This type is returned by in-memory formatting check operations. 70 71 Attributes: 72 ok: A boolean indicating whether or not formatting was successful. 73 formatted_file_contents: The contents of the resulting formatted file 74 as bytes. 75 error_message: A string containing any errors or warnings produced by 76 the formatting process. 77 """ 78 79 ok: bool 80 formatted_file_contents: bytes 81 error_message: str | None 82 83 84@dataclass(frozen=True) 85class FormattedDiff: 86 """The resulting diff of applying a code formatter to a file. 87 88 Attributes: 89 ok: A boolean indicating whether or not formatting was successful. 90 diff: The resulting diff of applying code formatting, as a 91 human-readable string. 92 error_message: A string containing any errors or warnings produced by 93 the formatting process. 94 file_path: The path of the corresponding file that produced this diff. 95 """ 96 97 ok: bool 98 diff: str 99 error_message: str | None 100 file_path: Path 101 102 103@dataclass(frozen=True) 104class FormatFixStatus: 105 """The status of running a code formatter in-place on a file. 106 107 This type is returned by in-place formatting fix operations. 108 109 Attributes: 110 ok: A boolean indicating whether or not formatting was successful. 111 error_message: A string containing any errors or warnings produced by 112 the formatting process. 113 """ 114 115 ok: bool 116 error_message: str | None 117 118 119class FileChecker(abc.ABC): 120 """Abstract class for a code format check tool. 121 122 This class does not have the ability to apply formatting to files, and 123 instead only allows in-memory checks to produce expected resulting diffs. 124 125 Attributes: 126 run_tool: The :py:class:`pw_presubmit.format.core.ToolRunner` to use 127 when calling out to subprocesses. 128 diff_tool: The :py:attr:`pw_presubmit.format.core.DiffCallback` to use 129 when producing formatting diffs. 130 """ 131 132 def __init__( 133 self, 134 tool_runner: ToolRunner = BasicSubprocessRunner(), 135 diff_tool: DiffCallback = simple_diff, 136 ): 137 # Always call `self.run_tool` rather than `subprocess.run`, as it allows 138 # injection of tools and other environment-specific handlers. 139 self.run_tool = tool_runner 140 self.diff_tool = diff_tool 141 142 @abc.abstractmethod 143 def format_file_in_memory( 144 self, file_path: Path, file_contents: bytes 145 ) -> FormattedFileContents: 146 """Returns the formatted version of a file as in-memory bytes. 147 148 ``file_path`` and ``file_content`` represent the same file. Both are 149 provided for convenience. Use ``file_path`` if you can do so without 150 modifying the file, or use ``file_contents`` if the formatting tool 151 provides a mechanism for formatting the file by piping it to stdin. 152 153 Any subprocess calls should be initiated with ``self.run_tool()`` to 154 enable testing and injection of tools and tool wrappers. 155 156 **WARNING**: A :py:class:`pw_presubmit.format.core.FileChecker` must 157 **never** modify the file at``file_path``. 158 159 Returns: 160 A populated 161 :py:class:`pw_presubmit.format.core.FormattedFileContents` that 162 contains either the result of formatting the file, or an error 163 message. 164 """ 165 166 def get_formatting_diff( 167 self, file_path: Path, dry_run: bool = False 168 ) -> FormattedDiff | None: 169 """Returns a diff comparing a file to its formatted version. 170 171 If ``dry_run`` is ``True``, the diff will always be ``None``. 172 173 Returns: 174 ``None`` if there is no difference after formatting **OR** if 175 ``dry_run`` is enabled. Otherwise, a 176 :py:class:`pw_presubmit.format.core.FormattedDiff` is returned 177 containing either a diff or an error. 178 """ 179 original = file_path.read_bytes() 180 181 formatted = self.format_file_in_memory(file_path, original) 182 183 if not formatted.ok: 184 return FormattedDiff( 185 diff='', # Don't try to diff. 186 ok=False, 187 file_path=file_path, 188 error_message=formatted.error_message, 189 ) 190 191 if dry_run: 192 return None 193 194 # No difference found. 195 if formatted.formatted_file_contents == original: 196 return None 197 198 return FormattedDiff( 199 diff=self.diff_tool( 200 file_path, 201 original.decode(errors='replace'), 202 formatted.formatted_file_contents.decode(errors='replace'), 203 ), 204 file_path=file_path, 205 error_message=formatted.error_message, 206 ok=True, 207 ) 208 209 def get_formatting_diffs( 210 self, paths: Iterable[Path], dry_run: bool = False 211 ) -> Iterator[FormattedDiff]: 212 """Checks the formatting of many files without modifying them. 213 214 This method may be overridden to optimize for formatters that allow 215 checking multiple files in a single invocation, though you may need 216 to do additional parsing to produce diffs or error messages associated 217 with each file path. 218 219 Returns: 220 An iterator of :py:class:`pw_presubmit.format.core.FormattingDiff` 221 objects for each file with identified formatting issues. 222 """ 223 224 for path in paths: 225 diff = self.get_formatting_diff(path, dry_run) 226 if diff is not None: 227 yield diff 228 229 230class FileFormatter(FileChecker): 231 """Abstract class for a code format fix tool.""" 232 233 def __init__(self, **kwargs): 234 super().__init__(**kwargs) 235 236 @abc.abstractmethod 237 def format_file(self, file_path: Path) -> FormatFixStatus: 238 """Formats the provided file in-place. 239 240 Any subprocess calls should be initiated with ``self.run_tool()`` to 241 enable testing and injection of tools and tool wrappers. 242 243 Returns: 244 A FormatFixStatus that contains relevant errors/warnings. 245 """ 246 247 def format_files( 248 self, paths: Iterable[Path], keep_warnings: bool = True 249 ) -> Iterator[tuple[Path, FormatFixStatus]]: 250 """Formats the provided files and fixes them in-place. 251 252 All files must be updated to contain the formatted version. If errors 253 are encountered along the way, they should be collected and returned as 254 a dictionary that maps the path of the file to a string that 255 describes the errors encountered while processing that file. 256 257 Any subprocess calls should be initiated with ``self.run_tool()`` to 258 enable testing and injection of tools and tool wrappers. 259 260 This method may be overridden to optimize for formatters that allow 261 formatting multiple files in a single invocation, though you may need 262 to do additional parsing to associate error messages with the paths of 263 the files that produced them. 264 265 Returns: 266 An iterator of ``Path`` and 267 :py:class:`pw_presubmit.format.core.FormatFixStatus` pairs for each 268 file that was not successfully formatted. If ``keep_warnings`` is 269 ``True``, any successful format operations with warnings will also 270 be returned. 271 """ 272 273 for file_path in paths: 274 status = self.format_file(file_path) 275 if not status.ok or (status.error_message and keep_warnings): 276 yield (file_path, status) 277