1#!/usr/bin/env python3
2"""Check proposed changes for common issues."""
3import re
4import sys
5import shutil
6import os.path
7import subprocess
8import sysconfig
9
10import reindent
11import untabify
12
13
14# Excluded directories which are copies of external libraries:
15# don't check their coding style
16EXCLUDE_DIRS = [os.path.join('Modules', '_ctypes', 'libffi_osx'),
17                os.path.join('Modules', '_ctypes', 'libffi_msvc'),
18                os.path.join('Modules', '_decimal', 'libmpdec'),
19                os.path.join('Modules', 'expat'),
20                os.path.join('Modules', 'zlib')]
21SRCDIR = sysconfig.get_config_var('srcdir')
22
23
24def n_files_str(count):
25    """Return 'N file(s)' with the proper plurality on 'file'."""
26    return "{} file{}".format(count, "s" if count != 1 else "")
27
28
29def status(message, modal=False, info=None):
30    """Decorator to output status info to stdout."""
31    def decorated_fxn(fxn):
32        def call_fxn(*args, **kwargs):
33            sys.stdout.write(message + ' ... ')
34            sys.stdout.flush()
35            result = fxn(*args, **kwargs)
36            if not modal and not info:
37                print("done")
38            elif info:
39                print(info(result))
40            else:
41                print("yes" if result else "NO")
42            return result
43        return call_fxn
44    return decorated_fxn
45
46
47def get_git_branch():
48    """Get the symbolic name for the current git branch"""
49    cmd = "git rev-parse --abbrev-ref HEAD".split()
50    try:
51        return subprocess.check_output(cmd,
52                                       stderr=subprocess.DEVNULL,
53                                       cwd=SRCDIR,
54                                       encoding='UTF-8')
55    except subprocess.CalledProcessError:
56        return None
57
58
59def get_git_upstream_remote():
60    """Get the remote name to use for upstream branches
61
62    Uses "upstream" if it exists, "origin" otherwise
63    """
64    cmd = "git remote get-url upstream".split()
65    try:
66        subprocess.check_output(cmd,
67                                stderr=subprocess.DEVNULL,
68                                cwd=SRCDIR,
69                                encoding='UTF-8')
70    except subprocess.CalledProcessError:
71        return "origin"
72    return "upstream"
73
74
75def get_git_remote_default_branch(remote_name):
76    """Get the name of the default branch for the given remote
77
78    It is typically called 'main', but may differ
79    """
80    cmd = "git remote show {}".format(remote_name).split()
81    env = os.environ.copy()
82    env['LANG'] = 'C'
83    try:
84        remote_info = subprocess.check_output(cmd,
85                                              stderr=subprocess.DEVNULL,
86                                              cwd=SRCDIR,
87                                              encoding='UTF-8',
88                                              env=env)
89    except subprocess.CalledProcessError:
90        return None
91    for line in remote_info.splitlines():
92        if "HEAD branch:" in line:
93            base_branch = line.split(":")[1].strip()
94            return base_branch
95    return None
96
97
98@status("Getting base branch for PR",
99        info=lambda x: x if x is not None else "not a PR branch")
100def get_base_branch():
101    if not os.path.exists(os.path.join(SRCDIR, '.git')):
102        # Not a git checkout, so there's no base branch
103        return None
104    upstream_remote = get_git_upstream_remote()
105    version = sys.version_info
106    if version.releaselevel == 'alpha':
107        base_branch = get_git_remote_default_branch(upstream_remote)
108    else:
109        base_branch = "{0.major}.{0.minor}".format(version)
110    this_branch = get_git_branch()
111    if this_branch is None or this_branch == base_branch:
112        # Not on a git PR branch, so there's no base branch
113        return None
114    return upstream_remote + "/" + base_branch
115
116
117@status("Getting the list of files that have been added/changed",
118        info=lambda x: n_files_str(len(x)))
119def changed_files(base_branch=None):
120    """Get the list of changed or added files from git."""
121    if os.path.exists(os.path.join(SRCDIR, '.git')):
122        # We just use an existence check here as:
123        #  directory = normal git checkout/clone
124        #  file = git worktree directory
125        if base_branch:
126            cmd = 'git diff --name-status ' + base_branch
127        else:
128            cmd = 'git status --porcelain'
129        filenames = []
130        with subprocess.Popen(cmd.split(),
131                              stdout=subprocess.PIPE,
132                              cwd=SRCDIR) as st:
133            if st.wait() != 0:
134                sys.exit(f'error running {cmd}')
135            for line in st.stdout:
136                line = line.decode().rstrip()
137                status_text, filename = line.split(maxsplit=1)
138                status = set(status_text)
139                # modified, added or unmerged files
140                if not status.intersection('MAU'):
141                    continue
142                if ' -> ' in filename:
143                    # file is renamed
144                    filename = filename.split(' -> ', 2)[1].strip()
145                filenames.append(filename)
146    else:
147        sys.exit('need a git checkout to get modified files')
148
149    filenames2 = []
150    for filename in filenames:
151        # Normalize the path to be able to match using .startswith()
152        filename = os.path.normpath(filename)
153        if any(filename.startswith(path) for path in EXCLUDE_DIRS):
154            # Exclude the file
155            continue
156        filenames2.append(filename)
157
158    return filenames2
159
160
161def report_modified_files(file_paths):
162    count = len(file_paths)
163    if count == 0:
164        return n_files_str(count)
165    else:
166        lines = ["{}:".format(n_files_str(count))]
167        for path in file_paths:
168            lines.append("  {}".format(path))
169        return "\n".join(lines)
170
171
172@status("Fixing Python file whitespace", info=report_modified_files)
173def normalize_whitespace(file_paths):
174    """Make sure that the whitespace for .py files have been normalized."""
175    reindent.makebackup = False  # No need to create backups.
176    fixed = [path for path in file_paths if path.endswith('.py') and
177             reindent.check(os.path.join(SRCDIR, path))]
178    return fixed
179
180
181@status("Fixing C file whitespace", info=report_modified_files)
182def normalize_c_whitespace(file_paths):
183    """Report if any C files """
184    fixed = []
185    for path in file_paths:
186        abspath = os.path.join(SRCDIR, path)
187        with open(abspath, 'r') as f:
188            if '\t' not in f.read():
189                continue
190        untabify.process(abspath, 8, verbose=False)
191        fixed.append(path)
192    return fixed
193
194
195ws_re = re.compile(br'\s+(\r?\n)$')
196
197@status("Fixing docs whitespace", info=report_modified_files)
198def normalize_docs_whitespace(file_paths):
199    fixed = []
200    for path in file_paths:
201        abspath = os.path.join(SRCDIR, path)
202        try:
203            with open(abspath, 'rb') as f:
204                lines = f.readlines()
205            new_lines = [ws_re.sub(br'\1', line) for line in lines]
206            if new_lines != lines:
207                shutil.copyfile(abspath, abspath + '.bak')
208                with open(abspath, 'wb') as f:
209                    f.writelines(new_lines)
210                fixed.append(path)
211        except Exception as err:
212            print('Cannot fix %s: %s' % (path, err))
213    return fixed
214
215
216@status("Docs modified", modal=True)
217def docs_modified(file_paths):
218    """Report if any file in the Doc directory has been changed."""
219    return bool(file_paths)
220
221
222@status("Misc/ACKS updated", modal=True)
223def credit_given(file_paths):
224    """Check if Misc/ACKS has been changed."""
225    return os.path.join('Misc', 'ACKS') in file_paths
226
227
228@status("Misc/NEWS.d updated with `blurb`", modal=True)
229def reported_news(file_paths):
230    """Check if Misc/NEWS.d has been changed."""
231    return any(p.startswith(os.path.join('Misc', 'NEWS.d', 'next'))
232               for p in file_paths)
233
234@status("configure regenerated", modal=True, info=str)
235def regenerated_configure(file_paths):
236    """Check if configure has been regenerated."""
237    if 'configure.ac' in file_paths:
238        return "yes" if 'configure' in file_paths else "no"
239    else:
240        return "not needed"
241
242@status("pyconfig.h.in regenerated", modal=True, info=str)
243def regenerated_pyconfig_h_in(file_paths):
244    """Check if pyconfig.h.in has been regenerated."""
245    if 'configure.ac' in file_paths:
246        return "yes" if 'pyconfig.h.in' in file_paths else "no"
247    else:
248        return "not needed"
249
250def ci(pull_request):
251    if pull_request == 'false':
252        print('Not a pull request; skipping')
253        return
254    base_branch = get_base_branch()
255    file_paths = changed_files(base_branch)
256    python_files = [fn for fn in file_paths if fn.endswith('.py')]
257    c_files = [fn for fn in file_paths if fn.endswith(('.c', '.h'))]
258    doc_files = [fn for fn in file_paths if fn.startswith('Doc') and
259                 fn.endswith(('.rst', '.inc'))]
260    fixed = []
261    fixed.extend(normalize_whitespace(python_files))
262    fixed.extend(normalize_c_whitespace(c_files))
263    fixed.extend(normalize_docs_whitespace(doc_files))
264    if not fixed:
265        print('No whitespace issues found')
266    else:
267        print(f'Please fix the {len(fixed)} file(s) with whitespace issues')
268        print('(on UNIX you can run `make patchcheck` to make the fixes)')
269        sys.exit(1)
270
271def main():
272    base_branch = get_base_branch()
273    file_paths = changed_files(base_branch)
274    python_files = [fn for fn in file_paths if fn.endswith('.py')]
275    c_files = [fn for fn in file_paths if fn.endswith(('.c', '.h'))]
276    doc_files = [fn for fn in file_paths if fn.startswith('Doc') and
277                 fn.endswith(('.rst', '.inc'))]
278    misc_files = {p for p in file_paths if p.startswith('Misc')}
279    # PEP 8 whitespace rules enforcement.
280    normalize_whitespace(python_files)
281    # C rules enforcement.
282    normalize_c_whitespace(c_files)
283    # Doc whitespace enforcement.
284    normalize_docs_whitespace(doc_files)
285    # Docs updated.
286    docs_modified(doc_files)
287    # Misc/ACKS changed.
288    credit_given(misc_files)
289    # Misc/NEWS changed.
290    reported_news(misc_files)
291    # Regenerated configure, if necessary.
292    regenerated_configure(file_paths)
293    # Regenerated pyconfig.h.in, if necessary.
294    regenerated_pyconfig_h_in(file_paths)
295
296    # Test suite run and passed.
297    if python_files or c_files:
298        end = " and check for refleaks?" if c_files else "?"
299        print()
300        print("Did you run the test suite" + end)
301
302
303if __name__ == '__main__':
304    import argparse
305    parser = argparse.ArgumentParser(description=__doc__)
306    parser.add_argument('--ci',
307                        help='Perform pass/fail checks')
308    args = parser.parse_args()
309    if args.ci:
310        ci(args.ci)
311    else:
312        main()
313