1*62c56f98SSadaf Ebrahimi#!/usr/bin/env python3 2*62c56f98SSadaf Ebrahimi 3*62c56f98SSadaf Ebrahimi# Copyright The Mbed TLS Contributors 4*62c56f98SSadaf Ebrahimi# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 5*62c56f98SSadaf Ebrahimi 6*62c56f98SSadaf Ebrahimi""" 7*62c56f98SSadaf EbrahimiThis script checks the current state of the source code for minor issues, 8*62c56f98SSadaf Ebrahimiincluding incorrect file permissions, presence of tabs, non-Unix line endings, 9*62c56f98SSadaf Ebrahimitrailing whitespace, and presence of UTF-8 BOM. 10*62c56f98SSadaf EbrahimiNote: requires python 3, must be run from Mbed TLS root. 11*62c56f98SSadaf Ebrahimi""" 12*62c56f98SSadaf Ebrahimi 13*62c56f98SSadaf Ebrahimiimport os 14*62c56f98SSadaf Ebrahimiimport argparse 15*62c56f98SSadaf Ebrahimiimport logging 16*62c56f98SSadaf Ebrahimiimport codecs 17*62c56f98SSadaf Ebrahimiimport re 18*62c56f98SSadaf Ebrahimiimport subprocess 19*62c56f98SSadaf Ebrahimiimport sys 20*62c56f98SSadaf Ebrahimitry: 21*62c56f98SSadaf Ebrahimi from typing import FrozenSet, Optional, Pattern # pylint: disable=unused-import 22*62c56f98SSadaf Ebrahimiexcept ImportError: 23*62c56f98SSadaf Ebrahimi pass 24*62c56f98SSadaf Ebrahimi 25*62c56f98SSadaf Ebrahimiimport scripts_path # pylint: disable=unused-import 26*62c56f98SSadaf Ebrahimifrom mbedtls_dev import build_tree 27*62c56f98SSadaf Ebrahimi 28*62c56f98SSadaf Ebrahimi 29*62c56f98SSadaf Ebrahimiclass FileIssueTracker: 30*62c56f98SSadaf Ebrahimi """Base class for file-wide issue tracking. 31*62c56f98SSadaf Ebrahimi 32*62c56f98SSadaf Ebrahimi To implement a checker that processes a file as a whole, inherit from 33*62c56f98SSadaf Ebrahimi this class and implement `check_file_for_issue` and define ``heading``. 34*62c56f98SSadaf Ebrahimi 35*62c56f98SSadaf Ebrahimi ``suffix_exemptions``: files whose name ends with a string in this set 36*62c56f98SSadaf Ebrahimi will not be checked. 37*62c56f98SSadaf Ebrahimi 38*62c56f98SSadaf Ebrahimi ``path_exemptions``: files whose path (relative to the root of the source 39*62c56f98SSadaf Ebrahimi tree) matches this regular expression will not be checked. This can be 40*62c56f98SSadaf Ebrahimi ``None`` to match no path. Paths are normalized and converted to ``/`` 41*62c56f98SSadaf Ebrahimi separators before matching. 42*62c56f98SSadaf Ebrahimi 43*62c56f98SSadaf Ebrahimi ``heading``: human-readable description of the issue 44*62c56f98SSadaf Ebrahimi """ 45*62c56f98SSadaf Ebrahimi 46*62c56f98SSadaf Ebrahimi suffix_exemptions = frozenset() #type: FrozenSet[str] 47*62c56f98SSadaf Ebrahimi path_exemptions = None #type: Optional[Pattern[str]] 48*62c56f98SSadaf Ebrahimi # heading must be defined in derived classes. 49*62c56f98SSadaf Ebrahimi # pylint: disable=no-member 50*62c56f98SSadaf Ebrahimi 51*62c56f98SSadaf Ebrahimi def __init__(self): 52*62c56f98SSadaf Ebrahimi self.files_with_issues = {} 53*62c56f98SSadaf Ebrahimi 54*62c56f98SSadaf Ebrahimi @staticmethod 55*62c56f98SSadaf Ebrahimi def normalize_path(filepath): 56*62c56f98SSadaf Ebrahimi """Normalize ``filepath`` with / as the directory separator.""" 57*62c56f98SSadaf Ebrahimi filepath = os.path.normpath(filepath) 58*62c56f98SSadaf Ebrahimi # On Windows, we may have backslashes to separate directories. 59*62c56f98SSadaf Ebrahimi # We need slashes to match exemption lists. 60*62c56f98SSadaf Ebrahimi seps = os.path.sep 61*62c56f98SSadaf Ebrahimi if os.path.altsep is not None: 62*62c56f98SSadaf Ebrahimi seps += os.path.altsep 63*62c56f98SSadaf Ebrahimi return '/'.join(filepath.split(seps)) 64*62c56f98SSadaf Ebrahimi 65*62c56f98SSadaf Ebrahimi def should_check_file(self, filepath): 66*62c56f98SSadaf Ebrahimi """Whether the given file name should be checked. 67*62c56f98SSadaf Ebrahimi 68*62c56f98SSadaf Ebrahimi Files whose name ends with a string listed in ``self.suffix_exemptions`` 69*62c56f98SSadaf Ebrahimi or whose path matches ``self.path_exemptions`` will not be checked. 70*62c56f98SSadaf Ebrahimi """ 71*62c56f98SSadaf Ebrahimi for files_exemption in self.suffix_exemptions: 72*62c56f98SSadaf Ebrahimi if filepath.endswith(files_exemption): 73*62c56f98SSadaf Ebrahimi return False 74*62c56f98SSadaf Ebrahimi if self.path_exemptions and \ 75*62c56f98SSadaf Ebrahimi re.match(self.path_exemptions, self.normalize_path(filepath)): 76*62c56f98SSadaf Ebrahimi return False 77*62c56f98SSadaf Ebrahimi return True 78*62c56f98SSadaf Ebrahimi 79*62c56f98SSadaf Ebrahimi def check_file_for_issue(self, filepath): 80*62c56f98SSadaf Ebrahimi """Check the specified file for the issue that this class is for. 81*62c56f98SSadaf Ebrahimi 82*62c56f98SSadaf Ebrahimi Subclasses must implement this method. 83*62c56f98SSadaf Ebrahimi """ 84*62c56f98SSadaf Ebrahimi raise NotImplementedError 85*62c56f98SSadaf Ebrahimi 86*62c56f98SSadaf Ebrahimi def record_issue(self, filepath, line_number): 87*62c56f98SSadaf Ebrahimi """Record that an issue was found at the specified location.""" 88*62c56f98SSadaf Ebrahimi if filepath not in self.files_with_issues.keys(): 89*62c56f98SSadaf Ebrahimi self.files_with_issues[filepath] = [] 90*62c56f98SSadaf Ebrahimi self.files_with_issues[filepath].append(line_number) 91*62c56f98SSadaf Ebrahimi 92*62c56f98SSadaf Ebrahimi def output_file_issues(self, logger): 93*62c56f98SSadaf Ebrahimi """Log all the locations where the issue was found.""" 94*62c56f98SSadaf Ebrahimi if self.files_with_issues.values(): 95*62c56f98SSadaf Ebrahimi logger.info(self.heading) 96*62c56f98SSadaf Ebrahimi for filename, lines in sorted(self.files_with_issues.items()): 97*62c56f98SSadaf Ebrahimi if lines: 98*62c56f98SSadaf Ebrahimi logger.info("{}: {}".format( 99*62c56f98SSadaf Ebrahimi filename, ", ".join(str(x) for x in lines) 100*62c56f98SSadaf Ebrahimi )) 101*62c56f98SSadaf Ebrahimi else: 102*62c56f98SSadaf Ebrahimi logger.info(filename) 103*62c56f98SSadaf Ebrahimi logger.info("") 104*62c56f98SSadaf Ebrahimi 105*62c56f98SSadaf EbrahimiBINARY_FILE_PATH_RE_LIST = [ 106*62c56f98SSadaf Ebrahimi r'docs/.*\.pdf\Z', 107*62c56f98SSadaf Ebrahimi r'programs/fuzz/corpuses/[^.]+\Z', 108*62c56f98SSadaf Ebrahimi r'tests/data_files/[^.]+\Z', 109*62c56f98SSadaf Ebrahimi r'tests/data_files/.*\.(crt|csr|db|der|key|pubkey)\Z', 110*62c56f98SSadaf Ebrahimi r'tests/data_files/.*\.req\.[^/]+\Z', 111*62c56f98SSadaf Ebrahimi r'tests/data_files/.*malformed[^/]+\Z', 112*62c56f98SSadaf Ebrahimi r'tests/data_files/format_pkcs12\.fmt\Z', 113*62c56f98SSadaf Ebrahimi r'tests/data_files/.*\.bin\Z', 114*62c56f98SSadaf Ebrahimi] 115*62c56f98SSadaf EbrahimiBINARY_FILE_PATH_RE = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST)) 116*62c56f98SSadaf Ebrahimi 117*62c56f98SSadaf Ebrahimiclass LineIssueTracker(FileIssueTracker): 118*62c56f98SSadaf Ebrahimi """Base class for line-by-line issue tracking. 119*62c56f98SSadaf Ebrahimi 120*62c56f98SSadaf Ebrahimi To implement a checker that processes files line by line, inherit from 121*62c56f98SSadaf Ebrahimi this class and implement `line_with_issue`. 122*62c56f98SSadaf Ebrahimi """ 123*62c56f98SSadaf Ebrahimi 124*62c56f98SSadaf Ebrahimi # Exclude binary files. 125*62c56f98SSadaf Ebrahimi path_exemptions = BINARY_FILE_PATH_RE 126*62c56f98SSadaf Ebrahimi 127*62c56f98SSadaf Ebrahimi def issue_with_line(self, line, filepath, line_number): 128*62c56f98SSadaf Ebrahimi """Check the specified line for the issue that this class is for. 129*62c56f98SSadaf Ebrahimi 130*62c56f98SSadaf Ebrahimi Subclasses must implement this method. 131*62c56f98SSadaf Ebrahimi """ 132*62c56f98SSadaf Ebrahimi raise NotImplementedError 133*62c56f98SSadaf Ebrahimi 134*62c56f98SSadaf Ebrahimi def check_file_line(self, filepath, line, line_number): 135*62c56f98SSadaf Ebrahimi if self.issue_with_line(line, filepath, line_number): 136*62c56f98SSadaf Ebrahimi self.record_issue(filepath, line_number) 137*62c56f98SSadaf Ebrahimi 138*62c56f98SSadaf Ebrahimi def check_file_for_issue(self, filepath): 139*62c56f98SSadaf Ebrahimi """Check the lines of the specified file. 140*62c56f98SSadaf Ebrahimi 141*62c56f98SSadaf Ebrahimi Subclasses must implement the ``issue_with_line`` method. 142*62c56f98SSadaf Ebrahimi """ 143*62c56f98SSadaf Ebrahimi with open(filepath, "rb") as f: 144*62c56f98SSadaf Ebrahimi for i, line in enumerate(iter(f.readline, b"")): 145*62c56f98SSadaf Ebrahimi self.check_file_line(filepath, line, i + 1) 146*62c56f98SSadaf Ebrahimi 147*62c56f98SSadaf Ebrahimi 148*62c56f98SSadaf Ebrahimidef is_windows_file(filepath): 149*62c56f98SSadaf Ebrahimi _root, ext = os.path.splitext(filepath) 150*62c56f98SSadaf Ebrahimi return ext in ('.bat', '.dsp', '.dsw', '.sln', '.vcxproj') 151*62c56f98SSadaf Ebrahimi 152*62c56f98SSadaf Ebrahimi 153*62c56f98SSadaf Ebrahimiclass PermissionIssueTracker(FileIssueTracker): 154*62c56f98SSadaf Ebrahimi """Track files with bad permissions. 155*62c56f98SSadaf Ebrahimi 156*62c56f98SSadaf Ebrahimi Files that are not executable scripts must not be executable.""" 157*62c56f98SSadaf Ebrahimi 158*62c56f98SSadaf Ebrahimi heading = "Incorrect permissions:" 159*62c56f98SSadaf Ebrahimi 160*62c56f98SSadaf Ebrahimi # .py files can be either full scripts or modules, so they may or may 161*62c56f98SSadaf Ebrahimi # not be executable. 162*62c56f98SSadaf Ebrahimi suffix_exemptions = frozenset({".py"}) 163*62c56f98SSadaf Ebrahimi 164*62c56f98SSadaf Ebrahimi def check_file_for_issue(self, filepath): 165*62c56f98SSadaf Ebrahimi is_executable = os.access(filepath, os.X_OK) 166*62c56f98SSadaf Ebrahimi should_be_executable = filepath.endswith((".sh", ".pl")) 167*62c56f98SSadaf Ebrahimi if is_executable != should_be_executable: 168*62c56f98SSadaf Ebrahimi self.files_with_issues[filepath] = None 169*62c56f98SSadaf Ebrahimi 170*62c56f98SSadaf Ebrahimi 171*62c56f98SSadaf Ebrahimiclass ShebangIssueTracker(FileIssueTracker): 172*62c56f98SSadaf Ebrahimi """Track files with a bad, missing or extraneous shebang line. 173*62c56f98SSadaf Ebrahimi 174*62c56f98SSadaf Ebrahimi Executable scripts must start with a valid shebang (#!) line. 175*62c56f98SSadaf Ebrahimi """ 176*62c56f98SSadaf Ebrahimi 177*62c56f98SSadaf Ebrahimi heading = "Invalid shebang line:" 178*62c56f98SSadaf Ebrahimi 179*62c56f98SSadaf Ebrahimi # Allow either /bin/sh, /bin/bash, or /usr/bin/env. 180*62c56f98SSadaf Ebrahimi # Allow at most one argument (this is a Linux limitation). 181*62c56f98SSadaf Ebrahimi # For sh and bash, the argument if present must be options. 182*62c56f98SSadaf Ebrahimi # For env, the argument must be the base name of the interpreter. 183*62c56f98SSadaf Ebrahimi _shebang_re = re.compile(rb'^#! ?(?:/bin/(bash|sh)(?: -[^\n ]*)?' 184*62c56f98SSadaf Ebrahimi rb'|/usr/bin/env ([^\n /]+))$') 185*62c56f98SSadaf Ebrahimi _extensions = { 186*62c56f98SSadaf Ebrahimi b'bash': 'sh', 187*62c56f98SSadaf Ebrahimi b'perl': 'pl', 188*62c56f98SSadaf Ebrahimi b'python3': 'py', 189*62c56f98SSadaf Ebrahimi b'sh': 'sh', 190*62c56f98SSadaf Ebrahimi } 191*62c56f98SSadaf Ebrahimi 192*62c56f98SSadaf Ebrahimi def is_valid_shebang(self, first_line, filepath): 193*62c56f98SSadaf Ebrahimi m = re.match(self._shebang_re, first_line) 194*62c56f98SSadaf Ebrahimi if not m: 195*62c56f98SSadaf Ebrahimi return False 196*62c56f98SSadaf Ebrahimi interpreter = m.group(1) or m.group(2) 197*62c56f98SSadaf Ebrahimi if interpreter not in self._extensions: 198*62c56f98SSadaf Ebrahimi return False 199*62c56f98SSadaf Ebrahimi if not filepath.endswith('.' + self._extensions[interpreter]): 200*62c56f98SSadaf Ebrahimi return False 201*62c56f98SSadaf Ebrahimi return True 202*62c56f98SSadaf Ebrahimi 203*62c56f98SSadaf Ebrahimi def check_file_for_issue(self, filepath): 204*62c56f98SSadaf Ebrahimi is_executable = os.access(filepath, os.X_OK) 205*62c56f98SSadaf Ebrahimi with open(filepath, "rb") as f: 206*62c56f98SSadaf Ebrahimi first_line = f.readline() 207*62c56f98SSadaf Ebrahimi if first_line.startswith(b'#!'): 208*62c56f98SSadaf Ebrahimi if not is_executable: 209*62c56f98SSadaf Ebrahimi # Shebang on a non-executable file 210*62c56f98SSadaf Ebrahimi self.files_with_issues[filepath] = None 211*62c56f98SSadaf Ebrahimi elif not self.is_valid_shebang(first_line, filepath): 212*62c56f98SSadaf Ebrahimi self.files_with_issues[filepath] = [1] 213*62c56f98SSadaf Ebrahimi elif is_executable: 214*62c56f98SSadaf Ebrahimi # Executable without a shebang 215*62c56f98SSadaf Ebrahimi self.files_with_issues[filepath] = None 216*62c56f98SSadaf Ebrahimi 217*62c56f98SSadaf Ebrahimi 218*62c56f98SSadaf Ebrahimiclass EndOfFileNewlineIssueTracker(FileIssueTracker): 219*62c56f98SSadaf Ebrahimi """Track files that end with an incomplete line 220*62c56f98SSadaf Ebrahimi (no newline character at the end of the last line).""" 221*62c56f98SSadaf Ebrahimi 222*62c56f98SSadaf Ebrahimi heading = "Missing newline at end of file:" 223*62c56f98SSadaf Ebrahimi 224*62c56f98SSadaf Ebrahimi path_exemptions = BINARY_FILE_PATH_RE 225*62c56f98SSadaf Ebrahimi 226*62c56f98SSadaf Ebrahimi def check_file_for_issue(self, filepath): 227*62c56f98SSadaf Ebrahimi with open(filepath, "rb") as f: 228*62c56f98SSadaf Ebrahimi try: 229*62c56f98SSadaf Ebrahimi f.seek(-1, 2) 230*62c56f98SSadaf Ebrahimi except OSError: 231*62c56f98SSadaf Ebrahimi # This script only works on regular files. If we can't seek 232*62c56f98SSadaf Ebrahimi # 1 before the end, it means that this position is before 233*62c56f98SSadaf Ebrahimi # the beginning of the file, i.e. that the file is empty. 234*62c56f98SSadaf Ebrahimi return 235*62c56f98SSadaf Ebrahimi if f.read(1) != b"\n": 236*62c56f98SSadaf Ebrahimi self.files_with_issues[filepath] = None 237*62c56f98SSadaf Ebrahimi 238*62c56f98SSadaf Ebrahimi 239*62c56f98SSadaf Ebrahimiclass Utf8BomIssueTracker(FileIssueTracker): 240*62c56f98SSadaf Ebrahimi """Track files that start with a UTF-8 BOM. 241*62c56f98SSadaf Ebrahimi Files should be ASCII or UTF-8. Valid UTF-8 does not start with a BOM.""" 242*62c56f98SSadaf Ebrahimi 243*62c56f98SSadaf Ebrahimi heading = "UTF-8 BOM present:" 244*62c56f98SSadaf Ebrahimi 245*62c56f98SSadaf Ebrahimi suffix_exemptions = frozenset([".vcxproj", ".sln"]) 246*62c56f98SSadaf Ebrahimi path_exemptions = BINARY_FILE_PATH_RE 247*62c56f98SSadaf Ebrahimi 248*62c56f98SSadaf Ebrahimi def check_file_for_issue(self, filepath): 249*62c56f98SSadaf Ebrahimi with open(filepath, "rb") as f: 250*62c56f98SSadaf Ebrahimi if f.read().startswith(codecs.BOM_UTF8): 251*62c56f98SSadaf Ebrahimi self.files_with_issues[filepath] = None 252*62c56f98SSadaf Ebrahimi 253*62c56f98SSadaf Ebrahimi 254*62c56f98SSadaf Ebrahimiclass UnicodeIssueTracker(LineIssueTracker): 255*62c56f98SSadaf Ebrahimi """Track lines with invalid characters or invalid text encoding.""" 256*62c56f98SSadaf Ebrahimi 257*62c56f98SSadaf Ebrahimi heading = "Invalid UTF-8 or forbidden character:" 258*62c56f98SSadaf Ebrahimi 259*62c56f98SSadaf Ebrahimi # Only allow valid UTF-8, and only other explicitly allowed characters. 260*62c56f98SSadaf Ebrahimi # We deliberately exclude all characters that aren't a simple non-blank, 261*62c56f98SSadaf Ebrahimi # non-zero-width glyph, apart from a very small set (tab, ordinary space, 262*62c56f98SSadaf Ebrahimi # line breaks, "basic" no-break space and soft hyphen). In particular, 263*62c56f98SSadaf Ebrahimi # non-ASCII control characters, combinig characters, and Unicode state 264*62c56f98SSadaf Ebrahimi # changes (e.g. right-to-left text) are forbidden. 265*62c56f98SSadaf Ebrahimi # Note that we do allow some characters with a risk of visual confusion, 266*62c56f98SSadaf Ebrahimi # for example '-' (U+002D HYPHEN-MINUS) vs '' (U+00AD SOFT HYPHEN) vs 267*62c56f98SSadaf Ebrahimi # '‐' (U+2010 HYPHEN), or 'A' (U+0041 LATIN CAPITAL LETTER A) vs 268*62c56f98SSadaf Ebrahimi # 'Α' (U+0391 GREEK CAPITAL LETTER ALPHA). 269*62c56f98SSadaf Ebrahimi GOOD_CHARACTERS = ''.join([ 270*62c56f98SSadaf Ebrahimi '\t\n\r -~', # ASCII (tabs and line endings are checked separately) 271*62c56f98SSadaf Ebrahimi '\u00A0-\u00FF', # Latin-1 Supplement (for NO-BREAK SPACE and punctuation) 272*62c56f98SSadaf Ebrahimi '\u2010-\u2027\u2030-\u205E', # General Punctuation (printable) 273*62c56f98SSadaf Ebrahimi '\u2070\u2071\u2074-\u208E\u2090-\u209C', # Superscripts and Subscripts 274*62c56f98SSadaf Ebrahimi '\u2190-\u21FF', # Arrows 275*62c56f98SSadaf Ebrahimi '\u2200-\u22FF', # Mathematical Symbols 276*62c56f98SSadaf Ebrahimi '\u2500-\u257F' # Box Drawings characters used in markdown trees 277*62c56f98SSadaf Ebrahimi ]) 278*62c56f98SSadaf Ebrahimi # Allow any of the characters and ranges above, and anything classified 279*62c56f98SSadaf Ebrahimi # as a word constituent. 280*62c56f98SSadaf Ebrahimi GOOD_CHARACTERS_RE = re.compile(r'[\w{}]+\Z'.format(GOOD_CHARACTERS)) 281*62c56f98SSadaf Ebrahimi 282*62c56f98SSadaf Ebrahimi def issue_with_line(self, line, _filepath, line_number): 283*62c56f98SSadaf Ebrahimi try: 284*62c56f98SSadaf Ebrahimi text = line.decode('utf-8') 285*62c56f98SSadaf Ebrahimi except UnicodeDecodeError: 286*62c56f98SSadaf Ebrahimi return True 287*62c56f98SSadaf Ebrahimi if line_number == 1 and text.startswith('\uFEFF'): 288*62c56f98SSadaf Ebrahimi # Strip BOM (U+FEFF ZERO WIDTH NO-BREAK SPACE) at the beginning. 289*62c56f98SSadaf Ebrahimi # Which files are allowed to have a BOM is handled in 290*62c56f98SSadaf Ebrahimi # Utf8BomIssueTracker. 291*62c56f98SSadaf Ebrahimi text = text[1:] 292*62c56f98SSadaf Ebrahimi return not self.GOOD_CHARACTERS_RE.match(text) 293*62c56f98SSadaf Ebrahimi 294*62c56f98SSadaf Ebrahimiclass UnixLineEndingIssueTracker(LineIssueTracker): 295*62c56f98SSadaf Ebrahimi """Track files with non-Unix line endings (i.e. files with CR).""" 296*62c56f98SSadaf Ebrahimi 297*62c56f98SSadaf Ebrahimi heading = "Non-Unix line endings:" 298*62c56f98SSadaf Ebrahimi 299*62c56f98SSadaf Ebrahimi def should_check_file(self, filepath): 300*62c56f98SSadaf Ebrahimi if not super().should_check_file(filepath): 301*62c56f98SSadaf Ebrahimi return False 302*62c56f98SSadaf Ebrahimi return not is_windows_file(filepath) 303*62c56f98SSadaf Ebrahimi 304*62c56f98SSadaf Ebrahimi def issue_with_line(self, line, _filepath, _line_number): 305*62c56f98SSadaf Ebrahimi return b"\r" in line 306*62c56f98SSadaf Ebrahimi 307*62c56f98SSadaf Ebrahimi 308*62c56f98SSadaf Ebrahimiclass WindowsLineEndingIssueTracker(LineIssueTracker): 309*62c56f98SSadaf Ebrahimi """Track files with non-Windows line endings (i.e. CR or LF not in CRLF).""" 310*62c56f98SSadaf Ebrahimi 311*62c56f98SSadaf Ebrahimi heading = "Non-Windows line endings:" 312*62c56f98SSadaf Ebrahimi 313*62c56f98SSadaf Ebrahimi def should_check_file(self, filepath): 314*62c56f98SSadaf Ebrahimi if not super().should_check_file(filepath): 315*62c56f98SSadaf Ebrahimi return False 316*62c56f98SSadaf Ebrahimi return is_windows_file(filepath) 317*62c56f98SSadaf Ebrahimi 318*62c56f98SSadaf Ebrahimi def issue_with_line(self, line, _filepath, _line_number): 319*62c56f98SSadaf Ebrahimi return not line.endswith(b"\r\n") or b"\r" in line[:-2] 320*62c56f98SSadaf Ebrahimi 321*62c56f98SSadaf Ebrahimi 322*62c56f98SSadaf Ebrahimiclass TrailingWhitespaceIssueTracker(LineIssueTracker): 323*62c56f98SSadaf Ebrahimi """Track lines with trailing whitespace.""" 324*62c56f98SSadaf Ebrahimi 325*62c56f98SSadaf Ebrahimi heading = "Trailing whitespace:" 326*62c56f98SSadaf Ebrahimi suffix_exemptions = frozenset([".dsp", ".md"]) 327*62c56f98SSadaf Ebrahimi 328*62c56f98SSadaf Ebrahimi def issue_with_line(self, line, _filepath, _line_number): 329*62c56f98SSadaf Ebrahimi return line.rstrip(b"\r\n") != line.rstrip() 330*62c56f98SSadaf Ebrahimi 331*62c56f98SSadaf Ebrahimi 332*62c56f98SSadaf Ebrahimiclass TabIssueTracker(LineIssueTracker): 333*62c56f98SSadaf Ebrahimi """Track lines with tabs.""" 334*62c56f98SSadaf Ebrahimi 335*62c56f98SSadaf Ebrahimi heading = "Tabs present:" 336*62c56f98SSadaf Ebrahimi suffix_exemptions = frozenset([ 337*62c56f98SSadaf Ebrahimi ".pem", # some openssl dumps have tabs 338*62c56f98SSadaf Ebrahimi ".sln", 339*62c56f98SSadaf Ebrahimi "/Makefile", 340*62c56f98SSadaf Ebrahimi "/Makefile.inc", 341*62c56f98SSadaf Ebrahimi "/generate_visualc_files.pl", 342*62c56f98SSadaf Ebrahimi ]) 343*62c56f98SSadaf Ebrahimi 344*62c56f98SSadaf Ebrahimi def issue_with_line(self, line, _filepath, _line_number): 345*62c56f98SSadaf Ebrahimi return b"\t" in line 346*62c56f98SSadaf Ebrahimi 347*62c56f98SSadaf Ebrahimi 348*62c56f98SSadaf Ebrahimiclass MergeArtifactIssueTracker(LineIssueTracker): 349*62c56f98SSadaf Ebrahimi """Track lines with merge artifacts. 350*62c56f98SSadaf Ebrahimi These are leftovers from a ``git merge`` that wasn't fully edited.""" 351*62c56f98SSadaf Ebrahimi 352*62c56f98SSadaf Ebrahimi heading = "Merge artifact:" 353*62c56f98SSadaf Ebrahimi 354*62c56f98SSadaf Ebrahimi def issue_with_line(self, line, _filepath, _line_number): 355*62c56f98SSadaf Ebrahimi # Detect leftover git conflict markers. 356*62c56f98SSadaf Ebrahimi if line.startswith(b'<<<<<<< ') or line.startswith(b'>>>>>>> '): 357*62c56f98SSadaf Ebrahimi return True 358*62c56f98SSadaf Ebrahimi if line.startswith(b'||||||| '): # from merge.conflictStyle=diff3 359*62c56f98SSadaf Ebrahimi return True 360*62c56f98SSadaf Ebrahimi if line.rstrip(b'\r\n') == b'=======' and \ 361*62c56f98SSadaf Ebrahimi not _filepath.endswith('.md'): 362*62c56f98SSadaf Ebrahimi return True 363*62c56f98SSadaf Ebrahimi return False 364*62c56f98SSadaf Ebrahimi 365*62c56f98SSadaf Ebrahimi 366*62c56f98SSadaf Ebrahimiclass IntegrityChecker: 367*62c56f98SSadaf Ebrahimi """Sanity-check files under the current directory.""" 368*62c56f98SSadaf Ebrahimi 369*62c56f98SSadaf Ebrahimi def __init__(self, log_file): 370*62c56f98SSadaf Ebrahimi """Instantiate the sanity checker. 371*62c56f98SSadaf Ebrahimi Check files under the current directory. 372*62c56f98SSadaf Ebrahimi Write a report of issues to log_file.""" 373*62c56f98SSadaf Ebrahimi build_tree.check_repo_path() 374*62c56f98SSadaf Ebrahimi self.logger = None 375*62c56f98SSadaf Ebrahimi self.setup_logger(log_file) 376*62c56f98SSadaf Ebrahimi self.issues_to_check = [ 377*62c56f98SSadaf Ebrahimi PermissionIssueTracker(), 378*62c56f98SSadaf Ebrahimi ShebangIssueTracker(), 379*62c56f98SSadaf Ebrahimi EndOfFileNewlineIssueTracker(), 380*62c56f98SSadaf Ebrahimi Utf8BomIssueTracker(), 381*62c56f98SSadaf Ebrahimi UnicodeIssueTracker(), 382*62c56f98SSadaf Ebrahimi UnixLineEndingIssueTracker(), 383*62c56f98SSadaf Ebrahimi WindowsLineEndingIssueTracker(), 384*62c56f98SSadaf Ebrahimi TrailingWhitespaceIssueTracker(), 385*62c56f98SSadaf Ebrahimi TabIssueTracker(), 386*62c56f98SSadaf Ebrahimi MergeArtifactIssueTracker(), 387*62c56f98SSadaf Ebrahimi ] 388*62c56f98SSadaf Ebrahimi 389*62c56f98SSadaf Ebrahimi def setup_logger(self, log_file, level=logging.INFO): 390*62c56f98SSadaf Ebrahimi self.logger = logging.getLogger() 391*62c56f98SSadaf Ebrahimi self.logger.setLevel(level) 392*62c56f98SSadaf Ebrahimi if log_file: 393*62c56f98SSadaf Ebrahimi handler = logging.FileHandler(log_file) 394*62c56f98SSadaf Ebrahimi self.logger.addHandler(handler) 395*62c56f98SSadaf Ebrahimi else: 396*62c56f98SSadaf Ebrahimi console = logging.StreamHandler() 397*62c56f98SSadaf Ebrahimi self.logger.addHandler(console) 398*62c56f98SSadaf Ebrahimi 399*62c56f98SSadaf Ebrahimi @staticmethod 400*62c56f98SSadaf Ebrahimi def collect_files(): 401*62c56f98SSadaf Ebrahimi bytes_output = subprocess.check_output(['git', 'ls-files', '-z']) 402*62c56f98SSadaf Ebrahimi bytes_filepaths = bytes_output.split(b'\0')[:-1] 403*62c56f98SSadaf Ebrahimi ascii_filepaths = map(lambda fp: fp.decode('ascii'), bytes_filepaths) 404*62c56f98SSadaf Ebrahimi # Prepend './' to files in the top-level directory so that 405*62c56f98SSadaf Ebrahimi # something like `'/Makefile' in fp` matches in the top-level 406*62c56f98SSadaf Ebrahimi # directory as well as in subdirectories. 407*62c56f98SSadaf Ebrahimi return [fp if os.path.dirname(fp) else os.path.join(os.curdir, fp) 408*62c56f98SSadaf Ebrahimi for fp in ascii_filepaths] 409*62c56f98SSadaf Ebrahimi 410*62c56f98SSadaf Ebrahimi def check_files(self): 411*62c56f98SSadaf Ebrahimi for issue_to_check in self.issues_to_check: 412*62c56f98SSadaf Ebrahimi for filepath in self.collect_files(): 413*62c56f98SSadaf Ebrahimi if issue_to_check.should_check_file(filepath): 414*62c56f98SSadaf Ebrahimi issue_to_check.check_file_for_issue(filepath) 415*62c56f98SSadaf Ebrahimi 416*62c56f98SSadaf Ebrahimi def output_issues(self): 417*62c56f98SSadaf Ebrahimi integrity_return_code = 0 418*62c56f98SSadaf Ebrahimi for issue_to_check in self.issues_to_check: 419*62c56f98SSadaf Ebrahimi if issue_to_check.files_with_issues: 420*62c56f98SSadaf Ebrahimi integrity_return_code = 1 421*62c56f98SSadaf Ebrahimi issue_to_check.output_file_issues(self.logger) 422*62c56f98SSadaf Ebrahimi return integrity_return_code 423*62c56f98SSadaf Ebrahimi 424*62c56f98SSadaf Ebrahimi 425*62c56f98SSadaf Ebrahimidef run_main(): 426*62c56f98SSadaf Ebrahimi parser = argparse.ArgumentParser(description=__doc__) 427*62c56f98SSadaf Ebrahimi parser.add_argument( 428*62c56f98SSadaf Ebrahimi "-l", "--log_file", type=str, help="path to optional output log", 429*62c56f98SSadaf Ebrahimi ) 430*62c56f98SSadaf Ebrahimi check_args = parser.parse_args() 431*62c56f98SSadaf Ebrahimi integrity_check = IntegrityChecker(check_args.log_file) 432*62c56f98SSadaf Ebrahimi integrity_check.check_files() 433*62c56f98SSadaf Ebrahimi return_code = integrity_check.output_issues() 434*62c56f98SSadaf Ebrahimi sys.exit(return_code) 435*62c56f98SSadaf Ebrahimi 436*62c56f98SSadaf Ebrahimi 437*62c56f98SSadaf Ebrahimiif __name__ == "__main__": 438*62c56f98SSadaf Ebrahimi run_main() 439