1#! /usr/bin/env python3 2 3"""The Tab Nanny despises ambiguous indentation. She knows no mercy. 4 5tabnanny -- Detection of ambiguous indentation 6 7For the time being this module is intended to be called as a script. 8However it is possible to import it into an IDE and use the function 9check() described below. 10 11Warning: The API provided by this module is likely to change in future 12releases; such changes may not be backward compatible. 13""" 14 15# Released to the public domain, by Tim Peters, 15 April 1998. 16 17# XXX Note: this is now a standard library module. 18# XXX The API needs to undergo changes however; the current code is too 19# XXX script-like. This will be addressed later. 20 21__version__ = "6" 22 23import os 24import sys 25import tokenize 26 27__all__ = ["check", "NannyNag", "process_tokens"] 28 29verbose = 0 30filename_only = 0 31 32def errprint(*args): 33 sep = "" 34 for arg in args: 35 sys.stderr.write(sep + str(arg)) 36 sep = " " 37 sys.stderr.write("\n") 38 39def main(): 40 import getopt 41 42 global verbose, filename_only 43 try: 44 opts, args = getopt.getopt(sys.argv[1:], "qv") 45 except getopt.error as msg: 46 errprint(msg) 47 return 48 for o, a in opts: 49 if o == '-q': 50 filename_only = filename_only + 1 51 if o == '-v': 52 verbose = verbose + 1 53 if not args: 54 errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...") 55 return 56 for arg in args: 57 check(arg) 58 59class NannyNag(Exception): 60 """ 61 Raised by process_tokens() if detecting an ambiguous indent. 62 Captured and handled in check(). 63 """ 64 def __init__(self, lineno, msg, line): 65 self.lineno, self.msg, self.line = lineno, msg, line 66 def get_lineno(self): 67 return self.lineno 68 def get_msg(self): 69 return self.msg 70 def get_line(self): 71 return self.line 72 73def check(file): 74 """check(file_or_dir) 75 76 If file_or_dir is a directory and not a symbolic link, then recursively 77 descend the directory tree named by file_or_dir, checking all .py files 78 along the way. If file_or_dir is an ordinary Python source file, it is 79 checked for whitespace related problems. The diagnostic messages are 80 written to standard output using the print statement. 81 """ 82 83 if os.path.isdir(file) and not os.path.islink(file): 84 if verbose: 85 print("%r: listing directory" % (file,)) 86 names = os.listdir(file) 87 for name in names: 88 fullname = os.path.join(file, name) 89 if (os.path.isdir(fullname) and 90 not os.path.islink(fullname) or 91 os.path.normcase(name[-3:]) == ".py"): 92 check(fullname) 93 return 94 95 try: 96 f = tokenize.open(file) 97 except OSError as msg: 98 errprint("%r: I/O Error: %s" % (file, msg)) 99 return 100 101 if verbose > 1: 102 print("checking %r ..." % file) 103 104 try: 105 process_tokens(tokenize.generate_tokens(f.readline)) 106 107 except tokenize.TokenError as msg: 108 errprint("%r: Token Error: %s" % (file, msg)) 109 return 110 111 except IndentationError as msg: 112 errprint("%r: Indentation Error: %s" % (file, msg)) 113 return 114 115 except NannyNag as nag: 116 badline = nag.get_lineno() 117 line = nag.get_line() 118 if verbose: 119 print("%r: *** Line %d: trouble in tab city! ***" % (file, badline)) 120 print("offending line: %r" % (line,)) 121 print(nag.get_msg()) 122 else: 123 if ' ' in file: file = '"' + file + '"' 124 if filename_only: print(file) 125 else: print(file, badline, repr(line)) 126 return 127 128 finally: 129 f.close() 130 131 if verbose: 132 print("%r: Clean bill of health." % (file,)) 133 134class Whitespace: 135 # the characters used for space and tab 136 S, T = ' \t' 137 138 # members: 139 # raw 140 # the original string 141 # n 142 # the number of leading whitespace characters in raw 143 # nt 144 # the number of tabs in raw[:n] 145 # norm 146 # the normal form as a pair (count, trailing), where: 147 # count 148 # a tuple such that raw[:n] contains count[i] 149 # instances of S * i + T 150 # trailing 151 # the number of trailing spaces in raw[:n] 152 # It's A Theorem that m.indent_level(t) == 153 # n.indent_level(t) for all t >= 1 iff m.norm == n.norm. 154 # is_simple 155 # true iff raw[:n] is of the form (T*)(S*) 156 157 def __init__(self, ws): 158 self.raw = ws 159 S, T = Whitespace.S, Whitespace.T 160 count = [] 161 b = n = nt = 0 162 for ch in self.raw: 163 if ch == S: 164 n = n + 1 165 b = b + 1 166 elif ch == T: 167 n = n + 1 168 nt = nt + 1 169 if b >= len(count): 170 count = count + [0] * (b - len(count) + 1) 171 count[b] = count[b] + 1 172 b = 0 173 else: 174 break 175 self.n = n 176 self.nt = nt 177 self.norm = tuple(count), b 178 self.is_simple = len(count) <= 1 179 180 # return length of longest contiguous run of spaces (whether or not 181 # preceding a tab) 182 def longest_run_of_spaces(self): 183 count, trailing = self.norm 184 return max(len(count)-1, trailing) 185 186 def indent_level(self, tabsize): 187 # count, il = self.norm 188 # for i in range(len(count)): 189 # if count[i]: 190 # il = il + (i//tabsize + 1)*tabsize * count[i] 191 # return il 192 193 # quicker: 194 # il = trailing + sum (i//ts + 1)*ts*count[i] = 195 # trailing + ts * sum (i//ts + 1)*count[i] = 196 # trailing + ts * sum i//ts*count[i] + count[i] = 197 # trailing + ts * [(sum i//ts*count[i]) + (sum count[i])] = 198 # trailing + ts * [(sum i//ts*count[i]) + num_tabs] 199 # and note that i//ts*count[i] is 0 when i < ts 200 201 count, trailing = self.norm 202 il = 0 203 for i in range(tabsize, len(count)): 204 il = il + i//tabsize * count[i] 205 return trailing + tabsize * (il + self.nt) 206 207 # return true iff self.indent_level(t) == other.indent_level(t) 208 # for all t >= 1 209 def equal(self, other): 210 return self.norm == other.norm 211 212 # return a list of tuples (ts, i1, i2) such that 213 # i1 == self.indent_level(ts) != other.indent_level(ts) == i2. 214 # Intended to be used after not self.equal(other) is known, in which 215 # case it will return at least one witnessing tab size. 216 def not_equal_witness(self, other): 217 n = max(self.longest_run_of_spaces(), 218 other.longest_run_of_spaces()) + 1 219 a = [] 220 for ts in range(1, n+1): 221 if self.indent_level(ts) != other.indent_level(ts): 222 a.append( (ts, 223 self.indent_level(ts), 224 other.indent_level(ts)) ) 225 return a 226 227 # Return True iff self.indent_level(t) < other.indent_level(t) 228 # for all t >= 1. 229 # The algorithm is due to Vincent Broman. 230 # Easy to prove it's correct. 231 # XXXpost that. 232 # Trivial to prove n is sharp (consider T vs ST). 233 # Unknown whether there's a faster general way. I suspected so at 234 # first, but no longer. 235 # For the special (but common!) case where M and N are both of the 236 # form (T*)(S*), M.less(N) iff M.len() < N.len() and 237 # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded. 238 # XXXwrite that up. 239 # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1. 240 def less(self, other): 241 if self.n >= other.n: 242 return False 243 if self.is_simple and other.is_simple: 244 return self.nt <= other.nt 245 n = max(self.longest_run_of_spaces(), 246 other.longest_run_of_spaces()) + 1 247 # the self.n >= other.n test already did it for ts=1 248 for ts in range(2, n+1): 249 if self.indent_level(ts) >= other.indent_level(ts): 250 return False 251 return True 252 253 # return a list of tuples (ts, i1, i2) such that 254 # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2. 255 # Intended to be used after not self.less(other) is known, in which 256 # case it will return at least one witnessing tab size. 257 def not_less_witness(self, other): 258 n = max(self.longest_run_of_spaces(), 259 other.longest_run_of_spaces()) + 1 260 a = [] 261 for ts in range(1, n+1): 262 if self.indent_level(ts) >= other.indent_level(ts): 263 a.append( (ts, 264 self.indent_level(ts), 265 other.indent_level(ts)) ) 266 return a 267 268def format_witnesses(w): 269 firsts = (str(tup[0]) for tup in w) 270 prefix = "at tab size" 271 if len(w) > 1: 272 prefix = prefix + "s" 273 return prefix + " " + ', '.join(firsts) 274 275def process_tokens(tokens): 276 INDENT = tokenize.INDENT 277 DEDENT = tokenize.DEDENT 278 NEWLINE = tokenize.NEWLINE 279 JUNK = tokenize.COMMENT, tokenize.NL 280 indents = [Whitespace("")] 281 check_equal = 0 282 283 for (type, token, start, end, line) in tokens: 284 if type == NEWLINE: 285 # a program statement, or ENDMARKER, will eventually follow, 286 # after some (possibly empty) run of tokens of the form 287 # (NL | COMMENT)* (INDENT | DEDENT+)? 288 # If an INDENT appears, setting check_equal is wrong, and will 289 # be undone when we see the INDENT. 290 check_equal = 1 291 292 elif type == INDENT: 293 check_equal = 0 294 thisguy = Whitespace(token) 295 if not indents[-1].less(thisguy): 296 witness = indents[-1].not_less_witness(thisguy) 297 msg = "indent not greater e.g. " + format_witnesses(witness) 298 raise NannyNag(start[0], msg, line) 299 indents.append(thisguy) 300 301 elif type == DEDENT: 302 # there's nothing we need to check here! what's important is 303 # that when the run of DEDENTs ends, the indentation of the 304 # program statement (or ENDMARKER) that triggered the run is 305 # equal to what's left at the top of the indents stack 306 307 # Ouch! This assert triggers if the last line of the source 308 # is indented *and* lacks a newline -- then DEDENTs pop out 309 # of thin air. 310 # assert check_equal # else no earlier NEWLINE, or an earlier INDENT 311 check_equal = 1 312 313 del indents[-1] 314 315 elif check_equal and type not in JUNK: 316 # this is the first "real token" following a NEWLINE, so it 317 # must be the first token of the next program statement, or an 318 # ENDMARKER; the "line" argument exposes the leading whitespace 319 # for this statement; in the case of ENDMARKER, line is an empty 320 # string, so will properly match the empty string with which the 321 # "indents" stack was seeded 322 check_equal = 0 323 thisguy = Whitespace(line) 324 if not indents[-1].equal(thisguy): 325 witness = indents[-1].not_equal_witness(thisguy) 326 msg = "indent not equal e.g. " + format_witnesses(witness) 327 raise NannyNag(start[0], msg, line) 328 329 330if __name__ == '__main__': 331 main() 332