xref: /aosp_15_r20/external/pigweed/pw_presubmit/py/pw_presubmit/format/core.py (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1# Copyright 2024 The Pigweed Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may not
4# use this file except in compliance with the License. You may obtain a copy of
5# the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations under
13# the License.
14"""Formatting library core."""
15
16import abc
17from dataclasses import dataclass
18import difflib
19import logging
20from pathlib import Path
21from typing import Callable, Iterable, Iterator
22
23from pw_cli.tool_runner import ToolRunner, BasicSubprocessRunner
24
25
26_LOG: logging.Logger = logging.getLogger(__name__)
27
28
29def _ensure_newline(orig: str) -> str:
30    """Adds a warning and newline to any file without a trailing newline."""
31
32    if orig.endswith('\n'):
33        return orig
34    return orig + '\nNo newline at end of file\n'
35
36
37def simple_diff(path: Path, original: str, formatted: str) -> str:
38    """Produces a diff of the contents of two files."""
39
40    original = _ensure_newline(original)
41    formatted = _ensure_newline(formatted)
42    return ''.join(
43        difflib.unified_diff(
44            original.splitlines(keepends=True),
45            formatted.splitlines(keepends=True),
46            f'{path}  (original)',
47            f'{path}  (reformatted)',
48        )
49    )
50
51
52DiffCallback = Callable[[Path, str, str], str]
53"""The callback type for producing diffs.
54
55Arugments:
56    path: File path of the file being diffed.
57    original: The contents of the original file, as a string.
58    formatted: The contents of the formatted file, as a string.
59
60Returns:
61    A human readable diff as a string.
62"""
63
64
65@dataclass(frozen=True)
66class FormattedFileContents:
67    """The result of running a code formatter on the contents of a file.
68
69    This type is returned by in-memory formatting check operations.
70
71    Attributes:
72        ok: A boolean indicating whether or not formatting was successful.
73        formatted_file_contents: The contents of the resulting formatted file
74            as bytes.
75        error_message: A string containing any errors or warnings produced by
76            the formatting process.
77    """
78
79    ok: bool
80    formatted_file_contents: bytes
81    error_message: str | None
82
83
84@dataclass(frozen=True)
85class FormattedDiff:
86    """The resulting diff of applying a code formatter to a file.
87
88    Attributes:
89        ok: A boolean indicating whether or not formatting was successful.
90        diff: The resulting diff of applying code formatting, as a
91            human-readable string.
92        error_message: A string containing any errors or warnings produced by
93            the formatting process.
94        file_path: The path of the corresponding file that produced this diff.
95    """
96
97    ok: bool
98    diff: str
99    error_message: str | None
100    file_path: Path
101
102
103@dataclass(frozen=True)
104class FormatFixStatus:
105    """The status of running a code formatter in-place on a file.
106
107    This type is returned by in-place formatting fix operations.
108
109    Attributes:
110        ok: A boolean indicating whether or not formatting was successful.
111        error_message: A string containing any errors or warnings produced by
112            the formatting process.
113    """
114
115    ok: bool
116    error_message: str | None
117
118
119class FileChecker(abc.ABC):
120    """Abstract class for a code format check tool.
121
122    This class does not have the ability to apply formatting to files, and
123    instead only allows in-memory checks to produce expected resulting diffs.
124
125    Attributes:
126        run_tool: The :py:class:`pw_presubmit.format.core.ToolRunner` to use
127            when calling out to subprocesses.
128        diff_tool: The :py:attr:`pw_presubmit.format.core.DiffCallback` to use
129            when producing formatting diffs.
130    """
131
132    def __init__(
133        self,
134        tool_runner: ToolRunner = BasicSubprocessRunner(),
135        diff_tool: DiffCallback = simple_diff,
136    ):
137        # Always call `self.run_tool` rather than `subprocess.run`, as it allows
138        # injection of tools and other environment-specific handlers.
139        self.run_tool = tool_runner
140        self.diff_tool = diff_tool
141
142    @abc.abstractmethod
143    def format_file_in_memory(
144        self, file_path: Path, file_contents: bytes
145    ) -> FormattedFileContents:
146        """Returns the formatted version of a file as in-memory bytes.
147
148        ``file_path`` and ``file_content`` represent the same file. Both are
149        provided for convenience. Use ``file_path`` if you can do so without
150        modifying the file, or use ``file_contents`` if the formatting tool
151        provides a mechanism for formatting the file by piping it to stdin.
152
153        Any subprocess calls should be initiated with ``self.run_tool()`` to
154        enable testing and injection of tools and tool wrappers.
155
156        **WARNING**: A :py:class:`pw_presubmit.format.core.FileChecker` must
157        **never** modify the file at``file_path``.
158
159        Returns:
160            A populated
161            :py:class:`pw_presubmit.format.core.FormattedFileContents` that
162            contains either the result of formatting the file, or an error
163            message.
164        """
165
166    def get_formatting_diff(
167        self, file_path: Path, dry_run: bool = False
168    ) -> FormattedDiff | None:
169        """Returns a diff comparing a file to its formatted version.
170
171        If ``dry_run`` is ``True``, the diff will always be ``None``.
172
173        Returns:
174            ``None`` if there is no difference after formatting **OR** if
175            ``dry_run`` is enabled. Otherwise, a
176            :py:class:`pw_presubmit.format.core.FormattedDiff` is returned
177            containing either a diff or an error.
178        """
179        original = file_path.read_bytes()
180
181        formatted = self.format_file_in_memory(file_path, original)
182
183        if not formatted.ok:
184            return FormattedDiff(
185                diff='',  # Don't try to diff.
186                ok=False,
187                file_path=file_path,
188                error_message=formatted.error_message,
189            )
190
191        if dry_run:
192            return None
193
194        # No difference found.
195        if formatted.formatted_file_contents == original:
196            return None
197
198        return FormattedDiff(
199            diff=self.diff_tool(
200                file_path,
201                original.decode(errors='replace'),
202                formatted.formatted_file_contents.decode(errors='replace'),
203            ),
204            file_path=file_path,
205            error_message=formatted.error_message,
206            ok=True,
207        )
208
209    def get_formatting_diffs(
210        self, paths: Iterable[Path], dry_run: bool = False
211    ) -> Iterator[FormattedDiff]:
212        """Checks the formatting of many files without modifying them.
213
214        This method may be overridden to optimize for formatters that allow
215        checking multiple files in a single invocation, though you may need
216        to do additional parsing to produce diffs or error messages associated
217        with each file path.
218
219        Returns:
220            An iterator of :py:class:`pw_presubmit.format.core.FormattingDiff`
221            objects for each file with identified formatting issues.
222        """
223
224        for path in paths:
225            diff = self.get_formatting_diff(path, dry_run)
226            if diff is not None:
227                yield diff
228
229
230class FileFormatter(FileChecker):
231    """Abstract class for a code format fix tool."""
232
233    def __init__(self, **kwargs):
234        super().__init__(**kwargs)
235
236    @abc.abstractmethod
237    def format_file(self, file_path: Path) -> FormatFixStatus:
238        """Formats the provided file in-place.
239
240        Any subprocess calls should be initiated with ``self.run_tool()`` to
241        enable testing and injection of tools and tool wrappers.
242
243        Returns:
244            A FormatFixStatus that contains relevant errors/warnings.
245        """
246
247    def format_files(
248        self, paths: Iterable[Path], keep_warnings: bool = True
249    ) -> Iterator[tuple[Path, FormatFixStatus]]:
250        """Formats the provided files and fixes them in-place.
251
252        All files must be updated to contain the formatted version. If errors
253        are encountered along the way, they should be collected and returned as
254        a dictionary that maps the path of the file to a string that
255        describes the errors encountered while processing that file.
256
257        Any subprocess calls should be initiated with ``self.run_tool()`` to
258        enable testing and injection of tools and tool wrappers.
259
260        This method may be overridden to optimize for formatters that allow
261        formatting multiple files in a single invocation, though you may need
262        to do additional parsing to associate error messages with the paths of
263        the files that produced them.
264
265        Returns:
266            An iterator of ``Path`` and
267            :py:class:`pw_presubmit.format.core.FormatFixStatus` pairs for each
268            file that was not successfully formatted. If ``keep_warnings`` is
269            ``True``, any successful format operations with warnings will also
270            be returned.
271        """
272
273        for file_path in paths:
274            status = self.format_file(file_path)
275            if not status.ok or (status.error_message and keep_warnings):
276                yield (file_path, status)
277