1*9c5db199SXin Li#! /usr/bin/env python 2*9c5db199SXin Li 3*9c5db199SXin Li# Released to the public domain, by Tim Peters, 03 October 2000. 4*9c5db199SXin Li 5*9c5db199SXin Li"""reindent [-d][-r][-v] [ path ... ] 6*9c5db199SXin Li 7*9c5db199SXin Li-d (--dryrun) Dry run. Analyze, but don't make any changes to, files. 8*9c5db199SXin Li-r (--recurse) Recurse. Search for all .py files in subdirectories too. 9*9c5db199SXin Li-n (--nobackup) No backup. Does not make a ".bak" file before reindenting. 10*9c5db199SXin Li-v (--verbose) Verbose. Print informative msgs; else no output. 11*9c5db199SXin Li-h (--help) Help. Print this usage information and exit. 12*9c5db199SXin Li 13*9c5db199SXin LiChange Python (.py) files to use 4-space indents and no hard tab characters. 14*9c5db199SXin LiAlso trim excess spaces and tabs from ends of lines, and remove empty lines 15*9c5db199SXin Liat the end of files. Also ensure the last line ends with a newline. 16*9c5db199SXin Li 17*9c5db199SXin LiIf no paths are given on the command line, reindent operates as a filter, 18*9c5db199SXin Lireading a single source file from standard input and writing the transformed 19*9c5db199SXin Lisource to standard output. In this case, the -d, -r and -v flags are 20*9c5db199SXin Liignored. 21*9c5db199SXin Li 22*9c5db199SXin LiYou can pass one or more file and/or directory paths. When a directory 23*9c5db199SXin Lipath, all .py files within the directory will be examined, and, if the -r 24*9c5db199SXin Lioption is given, likewise recursively for subdirectories. 25*9c5db199SXin Li 26*9c5db199SXin LiIf output is not to standard output, reindent overwrites files in place, 27*9c5db199SXin Lirenaming the originals with a .bak extension. If it finds nothing to 28*9c5db199SXin Lichange, the file is left alone. If reindent does change a file, the changed 29*9c5db199SXin Lifile is a fixed-point for future runs (i.e., running reindent on the 30*9c5db199SXin Liresulting .py file won't change it again). 31*9c5db199SXin Li 32*9c5db199SXin LiThe hard part of reindenting is figuring out what to do with comment 33*9c5db199SXin Lilines. So long as the input files get a clean bill of health from 34*9c5db199SXin Litabnanny.py, reindent should do a good job. 35*9c5db199SXin Li 36*9c5db199SXin LiThe backup file is a copy of the one that is being reindented. The ".bak" 37*9c5db199SXin Lifile is generated with shutil.copy(), but some corner cases regarding 38*9c5db199SXin Liuser/group and permissions could leave the backup file more readable that 39*9c5db199SXin Liyou'd prefer. You can always use the --nobackup option to prevent this. 40*9c5db199SXin Li""" 41*9c5db199SXin Li 42*9c5db199SXin Lifrom __future__ import absolute_import 43*9c5db199SXin Lifrom __future__ import division 44*9c5db199SXin Lifrom __future__ import print_function 45*9c5db199SXin Li 46*9c5db199SXin Li__version__ = "1" 47*9c5db199SXin Li 48*9c5db199SXin Liimport tokenize 49*9c5db199SXin Liimport os, shutil 50*9c5db199SXin Liimport sys 51*9c5db199SXin Li 52*9c5db199SXin Lifrom six.moves import range 53*9c5db199SXin Li 54*9c5db199SXin Liverbose = 0 55*9c5db199SXin Lirecurse = 0 56*9c5db199SXin Lidryrun = 0 57*9c5db199SXin Limakebackup = True 58*9c5db199SXin Li 59*9c5db199SXin Lidef usage(msg=None): 60*9c5db199SXin Li if msg is not None: 61*9c5db199SXin Li print(msg, file=sys.stderr) 62*9c5db199SXin Li print(__doc__, file=sys.stderr) 63*9c5db199SXin Li 64*9c5db199SXin Lidef errprint(*args): 65*9c5db199SXin Li sep = "" 66*9c5db199SXin Li for arg in args: 67*9c5db199SXin Li sys.stderr.write(sep + str(arg)) 68*9c5db199SXin Li sep = " " 69*9c5db199SXin Li sys.stderr.write("\n") 70*9c5db199SXin Li 71*9c5db199SXin Lidef main(): 72*9c5db199SXin Li import getopt 73*9c5db199SXin Li global verbose, recurse, dryrun, makebackup 74*9c5db199SXin Li try: 75*9c5db199SXin Li opts, args = getopt.getopt(sys.argv[1:], "drnvh", 76*9c5db199SXin Li ["dryrun", "recurse", "nobackup", "verbose", "help"]) 77*9c5db199SXin Li except getopt.error as msg: 78*9c5db199SXin Li usage(msg) 79*9c5db199SXin Li return 80*9c5db199SXin Li for o, a in opts: 81*9c5db199SXin Li if o in ('-d', '--dryrun'): 82*9c5db199SXin Li dryrun += 1 83*9c5db199SXin Li elif o in ('-r', '--recurse'): 84*9c5db199SXin Li recurse += 1 85*9c5db199SXin Li elif o in ('-n', '--nobackup'): 86*9c5db199SXin Li makebackup = False 87*9c5db199SXin Li elif o in ('-v', '--verbose'): 88*9c5db199SXin Li verbose += 1 89*9c5db199SXin Li elif o in ('-h', '--help'): 90*9c5db199SXin Li usage() 91*9c5db199SXin Li return 92*9c5db199SXin Li if not args: 93*9c5db199SXin Li r = Reindenter(sys.stdin) 94*9c5db199SXin Li r.run() 95*9c5db199SXin Li r.write(sys.stdout) 96*9c5db199SXin Li return 97*9c5db199SXin Li for arg in args: 98*9c5db199SXin Li check(arg) 99*9c5db199SXin Li 100*9c5db199SXin Lidef check(file): 101*9c5db199SXin Li if os.path.isdir(file) and not os.path.islink(file): 102*9c5db199SXin Li if verbose: 103*9c5db199SXin Li print("listing directory", file) 104*9c5db199SXin Li names = os.listdir(file) 105*9c5db199SXin Li for name in names: 106*9c5db199SXin Li fullname = os.path.join(file, name) 107*9c5db199SXin Li if ((recurse and os.path.isdir(fullname) and 108*9c5db199SXin Li not os.path.islink(fullname)) 109*9c5db199SXin Li or name.lower().endswith(".py")): 110*9c5db199SXin Li check(fullname) 111*9c5db199SXin Li return 112*9c5db199SXin Li 113*9c5db199SXin Li if verbose: 114*9c5db199SXin Li print("checking", file, "...", end=' ') 115*9c5db199SXin Li try: 116*9c5db199SXin Li f = open(file) 117*9c5db199SXin Li except IOError as msg: 118*9c5db199SXin Li errprint("%s: I/O Error: %s" % (file, str(msg))) 119*9c5db199SXin Li return 120*9c5db199SXin Li 121*9c5db199SXin Li r = Reindenter(f) 122*9c5db199SXin Li f.close() 123*9c5db199SXin Li if r.run(): 124*9c5db199SXin Li if verbose: 125*9c5db199SXin Li print("changed.") 126*9c5db199SXin Li if dryrun: 127*9c5db199SXin Li print("But this is a dry run, so leaving it alone.") 128*9c5db199SXin Li if not dryrun: 129*9c5db199SXin Li bak = file + ".bak" 130*9c5db199SXin Li if makebackup: 131*9c5db199SXin Li shutil.copyfile(file, bak) 132*9c5db199SXin Li if verbose: 133*9c5db199SXin Li print("backed up", file, "to", bak) 134*9c5db199SXin Li f = open(file, "w") 135*9c5db199SXin Li r.write(f) 136*9c5db199SXin Li f.close() 137*9c5db199SXin Li if verbose: 138*9c5db199SXin Li print("wrote new", file) 139*9c5db199SXin Li return True 140*9c5db199SXin Li else: 141*9c5db199SXin Li if verbose: 142*9c5db199SXin Li print("unchanged.") 143*9c5db199SXin Li return False 144*9c5db199SXin Li 145*9c5db199SXin Lidef _rstrip(line, JUNK='\n \t'): 146*9c5db199SXin Li """Return line stripped of trailing spaces, tabs, newlines. 147*9c5db199SXin Li 148*9c5db199SXin Li Note that line.rstrip() instead also strips sundry control characters, 149*9c5db199SXin Li but at least one known Emacs user expects to keep junk like that, not 150*9c5db199SXin Li mentioning Barry by name or anything <wink>. 151*9c5db199SXin Li """ 152*9c5db199SXin Li 153*9c5db199SXin Li i = len(line) 154*9c5db199SXin Li while i > 0 and line[i-1] in JUNK: 155*9c5db199SXin Li i -= 1 156*9c5db199SXin Li return line[:i] 157*9c5db199SXin Li 158*9c5db199SXin Liclass Reindenter: 159*9c5db199SXin Li 160*9c5db199SXin Li def __init__(self, f): 161*9c5db199SXin Li self.find_stmt = 1 # next token begins a fresh stmt? 162*9c5db199SXin Li self.level = 0 # current indent level 163*9c5db199SXin Li 164*9c5db199SXin Li # Raw file lines. 165*9c5db199SXin Li self.raw = f.readlines() 166*9c5db199SXin Li 167*9c5db199SXin Li # File lines, rstripped & tab-expanded. Stub at start is so 168*9c5db199SXin Li # that we can use tokenize's 1-based line numbering easily. 169*9c5db199SXin Li # Note that a line is all-blank iff it's "\n". 170*9c5db199SXin Li self.lines = [_rstrip(line).expandtabs() + "\n" 171*9c5db199SXin Li for line in self.raw] 172*9c5db199SXin Li self.lines.insert(0, None) 173*9c5db199SXin Li self.index = 1 # index into self.lines of next line 174*9c5db199SXin Li 175*9c5db199SXin Li # List of (lineno, indentlevel) pairs, one for each stmt and 176*9c5db199SXin Li # comment line. indentlevel is -1 for comment lines, as a 177*9c5db199SXin Li # signal that tokenize doesn't know what to do about them; 178*9c5db199SXin Li # indeed, they're our headache! 179*9c5db199SXin Li self.stats = [] 180*9c5db199SXin Li 181*9c5db199SXin Li def run(self): 182*9c5db199SXin Li tokenize.tokenize(self.getline, self.tokeneater) 183*9c5db199SXin Li # Remove trailing empty lines. 184*9c5db199SXin Li lines = self.lines 185*9c5db199SXin Li while lines and lines[-1] == "\n": 186*9c5db199SXin Li lines.pop() 187*9c5db199SXin Li # Sentinel. 188*9c5db199SXin Li stats = self.stats 189*9c5db199SXin Li stats.append((len(lines), 0)) 190*9c5db199SXin Li # Map count of leading spaces to # we want. 191*9c5db199SXin Li have2want = {} 192*9c5db199SXin Li # Program after transformation. 193*9c5db199SXin Li after = self.after = [] 194*9c5db199SXin Li # Copy over initial empty lines -- there's nothing to do until 195*9c5db199SXin Li # we see a line with *something* on it. 196*9c5db199SXin Li i = stats[0][0] 197*9c5db199SXin Li after.extend(lines[1:i]) 198*9c5db199SXin Li for i in range(len(stats)-1): 199*9c5db199SXin Li thisstmt, thislevel = stats[i] 200*9c5db199SXin Li nextstmt = stats[i+1][0] 201*9c5db199SXin Li have = getlspace(lines[thisstmt]) 202*9c5db199SXin Li want = thislevel * 4 203*9c5db199SXin Li if want < 0: 204*9c5db199SXin Li # A comment line. 205*9c5db199SXin Li if have: 206*9c5db199SXin Li # An indented comment line. If we saw the same 207*9c5db199SXin Li # indentation before, reuse what it most recently 208*9c5db199SXin Li # mapped to. 209*9c5db199SXin Li want = have2want.get(have, -1) 210*9c5db199SXin Li if want < 0: 211*9c5db199SXin Li # Then it probably belongs to the next real stmt. 212*9c5db199SXin Li for j in range(i+1, len(stats)-1): 213*9c5db199SXin Li jline, jlevel = stats[j] 214*9c5db199SXin Li if jlevel >= 0: 215*9c5db199SXin Li if have == getlspace(lines[jline]): 216*9c5db199SXin Li want = jlevel * 4 217*9c5db199SXin Li break 218*9c5db199SXin Li if want < 0: # Maybe it's a hanging 219*9c5db199SXin Li # comment like this one, 220*9c5db199SXin Li # in which case we should shift it like its base 221*9c5db199SXin Li # line got shifted. 222*9c5db199SXin Li for j in range(i-1, -1, -1): 223*9c5db199SXin Li jline, jlevel = stats[j] 224*9c5db199SXin Li if jlevel >= 0: 225*9c5db199SXin Li want = have + getlspace(after[jline-1]) - \ 226*9c5db199SXin Li getlspace(lines[jline]) 227*9c5db199SXin Li break 228*9c5db199SXin Li if want < 0: 229*9c5db199SXin Li # Still no luck -- leave it alone. 230*9c5db199SXin Li want = have 231*9c5db199SXin Li else: 232*9c5db199SXin Li want = 0 233*9c5db199SXin Li assert want >= 0 234*9c5db199SXin Li have2want[have] = want 235*9c5db199SXin Li diff = want - have 236*9c5db199SXin Li if diff == 0 or have == 0: 237*9c5db199SXin Li after.extend(lines[thisstmt:nextstmt]) 238*9c5db199SXin Li else: 239*9c5db199SXin Li for line in lines[thisstmt:nextstmt]: 240*9c5db199SXin Li if diff > 0: 241*9c5db199SXin Li if line == "\n": 242*9c5db199SXin Li after.append(line) 243*9c5db199SXin Li else: 244*9c5db199SXin Li after.append(" " * diff + line) 245*9c5db199SXin Li else: 246*9c5db199SXin Li remove = min(getlspace(line), -diff) 247*9c5db199SXin Li after.append(line[remove:]) 248*9c5db199SXin Li return self.raw != self.after 249*9c5db199SXin Li 250*9c5db199SXin Li def write(self, f): 251*9c5db199SXin Li f.writelines(self.after) 252*9c5db199SXin Li 253*9c5db199SXin Li # Line-getter for tokenize. 254*9c5db199SXin Li def getline(self): 255*9c5db199SXin Li if self.index >= len(self.lines): 256*9c5db199SXin Li line = "" 257*9c5db199SXin Li else: 258*9c5db199SXin Li line = self.lines[self.index] 259*9c5db199SXin Li self.index += 1 260*9c5db199SXin Li return line 261*9c5db199SXin Li 262*9c5db199SXin Li # Line-eater for tokenize. 263*9c5db199SXin Li def tokeneater(self, type, token, sline_scol, end, line, 264*9c5db199SXin Li INDENT=tokenize.INDENT, 265*9c5db199SXin Li DEDENT=tokenize.DEDENT, 266*9c5db199SXin Li NEWLINE=tokenize.NEWLINE, 267*9c5db199SXin Li COMMENT=tokenize.COMMENT, 268*9c5db199SXin Li NL=tokenize.NL): 269*9c5db199SXin Li 270*9c5db199SXin Li (sline, scol) = sline_scol 271*9c5db199SXin Li if type == NEWLINE: 272*9c5db199SXin Li # A program statement, or ENDMARKER, will eventually follow, 273*9c5db199SXin Li # after some (possibly empty) run of tokens of the form 274*9c5db199SXin Li # (NL | COMMENT)* (INDENT | DEDENT+)? 275*9c5db199SXin Li self.find_stmt = 1 276*9c5db199SXin Li 277*9c5db199SXin Li elif type == INDENT: 278*9c5db199SXin Li self.find_stmt = 1 279*9c5db199SXin Li self.level += 1 280*9c5db199SXin Li 281*9c5db199SXin Li elif type == DEDENT: 282*9c5db199SXin Li self.find_stmt = 1 283*9c5db199SXin Li self.level -= 1 284*9c5db199SXin Li 285*9c5db199SXin Li elif type == COMMENT: 286*9c5db199SXin Li if self.find_stmt: 287*9c5db199SXin Li self.stats.append((sline, -1)) 288*9c5db199SXin Li # but we're still looking for a new stmt, so leave 289*9c5db199SXin Li # find_stmt alone 290*9c5db199SXin Li 291*9c5db199SXin Li elif type == NL: 292*9c5db199SXin Li pass 293*9c5db199SXin Li 294*9c5db199SXin Li elif self.find_stmt: 295*9c5db199SXin Li # This is the first "real token" following a NEWLINE, so it 296*9c5db199SXin Li # must be the first token of the next program statement, or an 297*9c5db199SXin Li # ENDMARKER. 298*9c5db199SXin Li self.find_stmt = 0 299*9c5db199SXin Li if line: # not endmarker 300*9c5db199SXin Li self.stats.append((sline, self.level)) 301*9c5db199SXin Li 302*9c5db199SXin Li# Count number of leading blanks. 303*9c5db199SXin Lidef getlspace(line): 304*9c5db199SXin Li i, n = 0, len(line) 305*9c5db199SXin Li while i < n and line[i] == " ": 306*9c5db199SXin Li i += 1 307*9c5db199SXin Li return i 308*9c5db199SXin Li 309*9c5db199SXin Liif __name__ == '__main__': 310*9c5db199SXin Li main() 311