xref: /aosp_15_r20/external/autotest/utils/reindent.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1#! /usr/bin/env python
2
3# Released to the public domain, by Tim Peters, 03 October 2000.
4
5"""reindent [-d][-r][-v] [ path ... ]
6
7-d (--dryrun)   Dry run.   Analyze, but don't make any changes to, files.
8-r (--recurse)  Recurse.   Search for all .py files in subdirectories too.
9-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10-v (--verbose)  Verbose.   Print informative msgs; else no output.
11-h (--help)     Help.      Print this usage information and exit.
12
13Change Python (.py) files to use 4-space indents and no hard tab characters.
14Also trim excess spaces and tabs from ends of lines, and remove empty lines
15at the end of files.  Also ensure the last line ends with a newline.
16
17If no paths are given on the command line, reindent operates as a filter,
18reading a single source file from standard input and writing the transformed
19source to standard output.  In this case, the -d, -r and -v flags are
20ignored.
21
22You can pass one or more file and/or directory paths.  When a directory
23path, all .py files within the directory will be examined, and, if the -r
24option is given, likewise recursively for subdirectories.
25
26If output is not to standard output, reindent overwrites files in place,
27renaming the originals with a .bak extension.  If it finds nothing to
28change, the file is left alone.  If reindent does change a file, the changed
29file is a fixed-point for future runs (i.e., running reindent on the
30resulting .py file won't change it again).
31
32The hard part of reindenting is figuring out what to do with comment
33lines.  So long as the input files get a clean bill of health from
34tabnanny.py, reindent should do a good job.
35
36The backup file is a copy of the one that is being reindented. The ".bak"
37file is generated with shutil.copy(), but some corner cases regarding
38user/group and permissions could leave the backup file more readable that
39you'd prefer. You can always use the --nobackup option to prevent this.
40"""
41
42from __future__ import absolute_import
43from __future__ import division
44from __future__ import print_function
45
46__version__ = "1"
47
48import tokenize
49import os, shutil
50import sys
51
52from six.moves import range
53
54verbose    = 0
55recurse    = 0
56dryrun     = 0
57makebackup = True
58
59def usage(msg=None):
60    if msg is not None:
61        print(msg, file=sys.stderr)
62    print(__doc__, file=sys.stderr)
63
64def errprint(*args):
65    sep = ""
66    for arg in args:
67        sys.stderr.write(sep + str(arg))
68        sep = " "
69    sys.stderr.write("\n")
70
71def main():
72    import getopt
73    global verbose, recurse, dryrun, makebackup
74    try:
75        opts, args = getopt.getopt(sys.argv[1:], "drnvh",
76                        ["dryrun", "recurse", "nobackup", "verbose", "help"])
77    except getopt.error as msg:
78        usage(msg)
79        return
80    for o, a in opts:
81        if o in ('-d', '--dryrun'):
82            dryrun += 1
83        elif o in ('-r', '--recurse'):
84            recurse += 1
85        elif o in ('-n', '--nobackup'):
86            makebackup = False
87        elif o in ('-v', '--verbose'):
88            verbose += 1
89        elif o in ('-h', '--help'):
90            usage()
91            return
92    if not args:
93        r = Reindenter(sys.stdin)
94        r.run()
95        r.write(sys.stdout)
96        return
97    for arg in args:
98        check(arg)
99
100def check(file):
101    if os.path.isdir(file) and not os.path.islink(file):
102        if verbose:
103            print("listing directory", file)
104        names = os.listdir(file)
105        for name in names:
106            fullname = os.path.join(file, name)
107            if ((recurse and os.path.isdir(fullname) and
108                 not os.path.islink(fullname))
109                or name.lower().endswith(".py")):
110                check(fullname)
111        return
112
113    if verbose:
114        print("checking", file, "...", end=' ')
115    try:
116        f = open(file)
117    except IOError as msg:
118        errprint("%s: I/O Error: %s" % (file, str(msg)))
119        return
120
121    r = Reindenter(f)
122    f.close()
123    if r.run():
124        if verbose:
125            print("changed.")
126            if dryrun:
127                print("But this is a dry run, so leaving it alone.")
128        if not dryrun:
129            bak = file + ".bak"
130            if makebackup:
131                shutil.copyfile(file, bak)
132                if verbose:
133                    print("backed up", file, "to", bak)
134            f = open(file, "w")
135            r.write(f)
136            f.close()
137            if verbose:
138                print("wrote new", file)
139        return True
140    else:
141        if verbose:
142            print("unchanged.")
143        return False
144
145def _rstrip(line, JUNK='\n \t'):
146    """Return line stripped of trailing spaces, tabs, newlines.
147
148    Note that line.rstrip() instead also strips sundry control characters,
149    but at least one known Emacs user expects to keep junk like that, not
150    mentioning Barry by name or anything <wink>.
151    """
152
153    i = len(line)
154    while i > 0 and line[i-1] in JUNK:
155        i -= 1
156    return line[:i]
157
158class Reindenter:
159
160    def __init__(self, f):
161        self.find_stmt = 1  # next token begins a fresh stmt?
162        self.level = 0      # current indent level
163
164        # Raw file lines.
165        self.raw = f.readlines()
166
167        # File lines, rstripped & tab-expanded.  Stub at start is so
168        # that we can use tokenize's 1-based line numbering easily.
169        # Note that a line is all-blank iff it's "\n".
170        self.lines = [_rstrip(line).expandtabs() + "\n"
171                      for line in self.raw]
172        self.lines.insert(0, None)
173        self.index = 1  # index into self.lines of next line
174
175        # List of (lineno, indentlevel) pairs, one for each stmt and
176        # comment line.  indentlevel is -1 for comment lines, as a
177        # signal that tokenize doesn't know what to do about them;
178        # indeed, they're our headache!
179        self.stats = []
180
181    def run(self):
182        tokenize.tokenize(self.getline, self.tokeneater)
183        # Remove trailing empty lines.
184        lines = self.lines
185        while lines and lines[-1] == "\n":
186            lines.pop()
187        # Sentinel.
188        stats = self.stats
189        stats.append((len(lines), 0))
190        # Map count of leading spaces to # we want.
191        have2want = {}
192        # Program after transformation.
193        after = self.after = []
194        # Copy over initial empty lines -- there's nothing to do until
195        # we see a line with *something* on it.
196        i = stats[0][0]
197        after.extend(lines[1:i])
198        for i in range(len(stats)-1):
199            thisstmt, thislevel = stats[i]
200            nextstmt = stats[i+1][0]
201            have = getlspace(lines[thisstmt])
202            want = thislevel * 4
203            if want < 0:
204                # A comment line.
205                if have:
206                    # An indented comment line.  If we saw the same
207                    # indentation before, reuse what it most recently
208                    # mapped to.
209                    want = have2want.get(have, -1)
210                    if want < 0:
211                        # Then it probably belongs to the next real stmt.
212                        for j in range(i+1, len(stats)-1):
213                            jline, jlevel = stats[j]
214                            if jlevel >= 0:
215                                if have == getlspace(lines[jline]):
216                                    want = jlevel * 4
217                                break
218                    if want < 0:           # Maybe it's a hanging
219                                           # comment like this one,
220                        # in which case we should shift it like its base
221                        # line got shifted.
222                        for j in range(i-1, -1, -1):
223                            jline, jlevel = stats[j]
224                            if jlevel >= 0:
225                                want = have + getlspace(after[jline-1]) - \
226                                       getlspace(lines[jline])
227                                break
228                    if want < 0:
229                        # Still no luck -- leave it alone.
230                        want = have
231                else:
232                    want = 0
233            assert want >= 0
234            have2want[have] = want
235            diff = want - have
236            if diff == 0 or have == 0:
237                after.extend(lines[thisstmt:nextstmt])
238            else:
239                for line in lines[thisstmt:nextstmt]:
240                    if diff > 0:
241                        if line == "\n":
242                            after.append(line)
243                        else:
244                            after.append(" " * diff + line)
245                    else:
246                        remove = min(getlspace(line), -diff)
247                        after.append(line[remove:])
248        return self.raw != self.after
249
250    def write(self, f):
251        f.writelines(self.after)
252
253    # Line-getter for tokenize.
254    def getline(self):
255        if self.index >= len(self.lines):
256            line = ""
257        else:
258            line = self.lines[self.index]
259            self.index += 1
260        return line
261
262    # Line-eater for tokenize.
263    def tokeneater(self, type, token, sline_scol, end, line,
264                   INDENT=tokenize.INDENT,
265                   DEDENT=tokenize.DEDENT,
266                   NEWLINE=tokenize.NEWLINE,
267                   COMMENT=tokenize.COMMENT,
268                   NL=tokenize.NL):
269
270        (sline, scol) = sline_scol
271        if type == NEWLINE:
272            # A program statement, or ENDMARKER, will eventually follow,
273            # after some (possibly empty) run of tokens of the form
274            #     (NL | COMMENT)* (INDENT | DEDENT+)?
275            self.find_stmt = 1
276
277        elif type == INDENT:
278            self.find_stmt = 1
279            self.level += 1
280
281        elif type == DEDENT:
282            self.find_stmt = 1
283            self.level -= 1
284
285        elif type == COMMENT:
286            if self.find_stmt:
287                self.stats.append((sline, -1))
288                # but we're still looking for a new stmt, so leave
289                # find_stmt alone
290
291        elif type == NL:
292            pass
293
294        elif self.find_stmt:
295            # This is the first "real token" following a NEWLINE, so it
296            # must be the first token of the next program statement, or an
297            # ENDMARKER.
298            self.find_stmt = 0
299            if line:   # not endmarker
300                self.stats.append((sline, self.level))
301
302# Count number of leading blanks.
303def getlspace(line):
304    i, n = 0, len(line)
305    while i < n and line[i] == " ":
306        i += 1
307    return i
308
309if __name__ == '__main__':
310    main()
311