1#! /usr/bin/env python 2 3# Released to the public domain, by Tim Peters, 03 October 2000. 4 5"""reindent [-d][-r][-v] [ path ... ] 6 7-d (--dryrun) Dry run. Analyze, but don't make any changes to, files. 8-r (--recurse) Recurse. Search for all .py files in subdirectories too. 9-n (--nobackup) No backup. Does not make a ".bak" file before reindenting. 10-v (--verbose) Verbose. Print informative msgs; else no output. 11-h (--help) Help. Print this usage information and exit. 12 13Change Python (.py) files to use 4-space indents and no hard tab characters. 14Also trim excess spaces and tabs from ends of lines, and remove empty lines 15at the end of files. Also ensure the last line ends with a newline. 16 17If no paths are given on the command line, reindent operates as a filter, 18reading a single source file from standard input and writing the transformed 19source to standard output. In this case, the -d, -r and -v flags are 20ignored. 21 22You can pass one or more file and/or directory paths. When a directory 23path, all .py files within the directory will be examined, and, if the -r 24option is given, likewise recursively for subdirectories. 25 26If output is not to standard output, reindent overwrites files in place, 27renaming the originals with a .bak extension. If it finds nothing to 28change, the file is left alone. If reindent does change a file, the changed 29file is a fixed-point for future runs (i.e., running reindent on the 30resulting .py file won't change it again). 31 32The hard part of reindenting is figuring out what to do with comment 33lines. So long as the input files get a clean bill of health from 34tabnanny.py, reindent should do a good job. 35 36The backup file is a copy of the one that is being reindented. The ".bak" 37file is generated with shutil.copy(), but some corner cases regarding 38user/group and permissions could leave the backup file more readable that 39you'd prefer. You can always use the --nobackup option to prevent this. 40""" 41 42from __future__ import absolute_import 43from __future__ import division 44from __future__ import print_function 45 46__version__ = "1" 47 48import tokenize 49import os, shutil 50import sys 51 52from six.moves import range 53 54verbose = 0 55recurse = 0 56dryrun = 0 57makebackup = True 58 59def usage(msg=None): 60 if msg is not None: 61 print(msg, file=sys.stderr) 62 print(__doc__, file=sys.stderr) 63 64def errprint(*args): 65 sep = "" 66 for arg in args: 67 sys.stderr.write(sep + str(arg)) 68 sep = " " 69 sys.stderr.write("\n") 70 71def main(): 72 import getopt 73 global verbose, recurse, dryrun, makebackup 74 try: 75 opts, args = getopt.getopt(sys.argv[1:], "drnvh", 76 ["dryrun", "recurse", "nobackup", "verbose", "help"]) 77 except getopt.error as msg: 78 usage(msg) 79 return 80 for o, a in opts: 81 if o in ('-d', '--dryrun'): 82 dryrun += 1 83 elif o in ('-r', '--recurse'): 84 recurse += 1 85 elif o in ('-n', '--nobackup'): 86 makebackup = False 87 elif o in ('-v', '--verbose'): 88 verbose += 1 89 elif o in ('-h', '--help'): 90 usage() 91 return 92 if not args: 93 r = Reindenter(sys.stdin) 94 r.run() 95 r.write(sys.stdout) 96 return 97 for arg in args: 98 check(arg) 99 100def check(file): 101 if os.path.isdir(file) and not os.path.islink(file): 102 if verbose: 103 print("listing directory", file) 104 names = os.listdir(file) 105 for name in names: 106 fullname = os.path.join(file, name) 107 if ((recurse and os.path.isdir(fullname) and 108 not os.path.islink(fullname)) 109 or name.lower().endswith(".py")): 110 check(fullname) 111 return 112 113 if verbose: 114 print("checking", file, "...", end=' ') 115 try: 116 f = open(file) 117 except IOError as msg: 118 errprint("%s: I/O Error: %s" % (file, str(msg))) 119 return 120 121 r = Reindenter(f) 122 f.close() 123 if r.run(): 124 if verbose: 125 print("changed.") 126 if dryrun: 127 print("But this is a dry run, so leaving it alone.") 128 if not dryrun: 129 bak = file + ".bak" 130 if makebackup: 131 shutil.copyfile(file, bak) 132 if verbose: 133 print("backed up", file, "to", bak) 134 f = open(file, "w") 135 r.write(f) 136 f.close() 137 if verbose: 138 print("wrote new", file) 139 return True 140 else: 141 if verbose: 142 print("unchanged.") 143 return False 144 145def _rstrip(line, JUNK='\n \t'): 146 """Return line stripped of trailing spaces, tabs, newlines. 147 148 Note that line.rstrip() instead also strips sundry control characters, 149 but at least one known Emacs user expects to keep junk like that, not 150 mentioning Barry by name or anything <wink>. 151 """ 152 153 i = len(line) 154 while i > 0 and line[i-1] in JUNK: 155 i -= 1 156 return line[:i] 157 158class Reindenter: 159 160 def __init__(self, f): 161 self.find_stmt = 1 # next token begins a fresh stmt? 162 self.level = 0 # current indent level 163 164 # Raw file lines. 165 self.raw = f.readlines() 166 167 # File lines, rstripped & tab-expanded. Stub at start is so 168 # that we can use tokenize's 1-based line numbering easily. 169 # Note that a line is all-blank iff it's "\n". 170 self.lines = [_rstrip(line).expandtabs() + "\n" 171 for line in self.raw] 172 self.lines.insert(0, None) 173 self.index = 1 # index into self.lines of next line 174 175 # List of (lineno, indentlevel) pairs, one for each stmt and 176 # comment line. indentlevel is -1 for comment lines, as a 177 # signal that tokenize doesn't know what to do about them; 178 # indeed, they're our headache! 179 self.stats = [] 180 181 def run(self): 182 tokenize.tokenize(self.getline, self.tokeneater) 183 # Remove trailing empty lines. 184 lines = self.lines 185 while lines and lines[-1] == "\n": 186 lines.pop() 187 # Sentinel. 188 stats = self.stats 189 stats.append((len(lines), 0)) 190 # Map count of leading spaces to # we want. 191 have2want = {} 192 # Program after transformation. 193 after = self.after = [] 194 # Copy over initial empty lines -- there's nothing to do until 195 # we see a line with *something* on it. 196 i = stats[0][0] 197 after.extend(lines[1:i]) 198 for i in range(len(stats)-1): 199 thisstmt, thislevel = stats[i] 200 nextstmt = stats[i+1][0] 201 have = getlspace(lines[thisstmt]) 202 want = thislevel * 4 203 if want < 0: 204 # A comment line. 205 if have: 206 # An indented comment line. If we saw the same 207 # indentation before, reuse what it most recently 208 # mapped to. 209 want = have2want.get(have, -1) 210 if want < 0: 211 # Then it probably belongs to the next real stmt. 212 for j in range(i+1, len(stats)-1): 213 jline, jlevel = stats[j] 214 if jlevel >= 0: 215 if have == getlspace(lines[jline]): 216 want = jlevel * 4 217 break 218 if want < 0: # Maybe it's a hanging 219 # comment like this one, 220 # in which case we should shift it like its base 221 # line got shifted. 222 for j in range(i-1, -1, -1): 223 jline, jlevel = stats[j] 224 if jlevel >= 0: 225 want = have + getlspace(after[jline-1]) - \ 226 getlspace(lines[jline]) 227 break 228 if want < 0: 229 # Still no luck -- leave it alone. 230 want = have 231 else: 232 want = 0 233 assert want >= 0 234 have2want[have] = want 235 diff = want - have 236 if diff == 0 or have == 0: 237 after.extend(lines[thisstmt:nextstmt]) 238 else: 239 for line in lines[thisstmt:nextstmt]: 240 if diff > 0: 241 if line == "\n": 242 after.append(line) 243 else: 244 after.append(" " * diff + line) 245 else: 246 remove = min(getlspace(line), -diff) 247 after.append(line[remove:]) 248 return self.raw != self.after 249 250 def write(self, f): 251 f.writelines(self.after) 252 253 # Line-getter for tokenize. 254 def getline(self): 255 if self.index >= len(self.lines): 256 line = "" 257 else: 258 line = self.lines[self.index] 259 self.index += 1 260 return line 261 262 # Line-eater for tokenize. 263 def tokeneater(self, type, token, sline_scol, end, line, 264 INDENT=tokenize.INDENT, 265 DEDENT=tokenize.DEDENT, 266 NEWLINE=tokenize.NEWLINE, 267 COMMENT=tokenize.COMMENT, 268 NL=tokenize.NL): 269 270 (sline, scol) = sline_scol 271 if type == NEWLINE: 272 # A program statement, or ENDMARKER, will eventually follow, 273 # after some (possibly empty) run of tokens of the form 274 # (NL | COMMENT)* (INDENT | DEDENT+)? 275 self.find_stmt = 1 276 277 elif type == INDENT: 278 self.find_stmt = 1 279 self.level += 1 280 281 elif type == DEDENT: 282 self.find_stmt = 1 283 self.level -= 1 284 285 elif type == COMMENT: 286 if self.find_stmt: 287 self.stats.append((sline, -1)) 288 # but we're still looking for a new stmt, so leave 289 # find_stmt alone 290 291 elif type == NL: 292 pass 293 294 elif self.find_stmt: 295 # This is the first "real token" following a NEWLINE, so it 296 # must be the first token of the next program statement, or an 297 # ENDMARKER. 298 self.find_stmt = 0 299 if line: # not endmarker 300 self.stats.append((sline, self.level)) 301 302# Count number of leading blanks. 303def getlspace(line): 304 i, n = 0, len(line) 305 while i < n and line[i] == " ": 306 i += 1 307 return i 308 309if __name__ == '__main__': 310 main() 311