xref: /aosp_15_r20/external/autotest/utils/reindent.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1*9c5db199SXin Li#! /usr/bin/env python
2*9c5db199SXin Li
3*9c5db199SXin Li# Released to the public domain, by Tim Peters, 03 October 2000.
4*9c5db199SXin Li
5*9c5db199SXin Li"""reindent [-d][-r][-v] [ path ... ]
6*9c5db199SXin Li
7*9c5db199SXin Li-d (--dryrun)   Dry run.   Analyze, but don't make any changes to, files.
8*9c5db199SXin Li-r (--recurse)  Recurse.   Search for all .py files in subdirectories too.
9*9c5db199SXin Li-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10*9c5db199SXin Li-v (--verbose)  Verbose.   Print informative msgs; else no output.
11*9c5db199SXin Li-h (--help)     Help.      Print this usage information and exit.
12*9c5db199SXin Li
13*9c5db199SXin LiChange Python (.py) files to use 4-space indents and no hard tab characters.
14*9c5db199SXin LiAlso trim excess spaces and tabs from ends of lines, and remove empty lines
15*9c5db199SXin Liat the end of files.  Also ensure the last line ends with a newline.
16*9c5db199SXin Li
17*9c5db199SXin LiIf no paths are given on the command line, reindent operates as a filter,
18*9c5db199SXin Lireading a single source file from standard input and writing the transformed
19*9c5db199SXin Lisource to standard output.  In this case, the -d, -r and -v flags are
20*9c5db199SXin Liignored.
21*9c5db199SXin Li
22*9c5db199SXin LiYou can pass one or more file and/or directory paths.  When a directory
23*9c5db199SXin Lipath, all .py files within the directory will be examined, and, if the -r
24*9c5db199SXin Lioption is given, likewise recursively for subdirectories.
25*9c5db199SXin Li
26*9c5db199SXin LiIf output is not to standard output, reindent overwrites files in place,
27*9c5db199SXin Lirenaming the originals with a .bak extension.  If it finds nothing to
28*9c5db199SXin Lichange, the file is left alone.  If reindent does change a file, the changed
29*9c5db199SXin Lifile is a fixed-point for future runs (i.e., running reindent on the
30*9c5db199SXin Liresulting .py file won't change it again).
31*9c5db199SXin Li
32*9c5db199SXin LiThe hard part of reindenting is figuring out what to do with comment
33*9c5db199SXin Lilines.  So long as the input files get a clean bill of health from
34*9c5db199SXin Litabnanny.py, reindent should do a good job.
35*9c5db199SXin Li
36*9c5db199SXin LiThe backup file is a copy of the one that is being reindented. The ".bak"
37*9c5db199SXin Lifile is generated with shutil.copy(), but some corner cases regarding
38*9c5db199SXin Liuser/group and permissions could leave the backup file more readable that
39*9c5db199SXin Liyou'd prefer. You can always use the --nobackup option to prevent this.
40*9c5db199SXin Li"""
41*9c5db199SXin Li
42*9c5db199SXin Lifrom __future__ import absolute_import
43*9c5db199SXin Lifrom __future__ import division
44*9c5db199SXin Lifrom __future__ import print_function
45*9c5db199SXin Li
46*9c5db199SXin Li__version__ = "1"
47*9c5db199SXin Li
48*9c5db199SXin Liimport tokenize
49*9c5db199SXin Liimport os, shutil
50*9c5db199SXin Liimport sys
51*9c5db199SXin Li
52*9c5db199SXin Lifrom six.moves import range
53*9c5db199SXin Li
54*9c5db199SXin Liverbose    = 0
55*9c5db199SXin Lirecurse    = 0
56*9c5db199SXin Lidryrun     = 0
57*9c5db199SXin Limakebackup = True
58*9c5db199SXin Li
59*9c5db199SXin Lidef usage(msg=None):
60*9c5db199SXin Li    if msg is not None:
61*9c5db199SXin Li        print(msg, file=sys.stderr)
62*9c5db199SXin Li    print(__doc__, file=sys.stderr)
63*9c5db199SXin Li
64*9c5db199SXin Lidef errprint(*args):
65*9c5db199SXin Li    sep = ""
66*9c5db199SXin Li    for arg in args:
67*9c5db199SXin Li        sys.stderr.write(sep + str(arg))
68*9c5db199SXin Li        sep = " "
69*9c5db199SXin Li    sys.stderr.write("\n")
70*9c5db199SXin Li
71*9c5db199SXin Lidef main():
72*9c5db199SXin Li    import getopt
73*9c5db199SXin Li    global verbose, recurse, dryrun, makebackup
74*9c5db199SXin Li    try:
75*9c5db199SXin Li        opts, args = getopt.getopt(sys.argv[1:], "drnvh",
76*9c5db199SXin Li                        ["dryrun", "recurse", "nobackup", "verbose", "help"])
77*9c5db199SXin Li    except getopt.error as msg:
78*9c5db199SXin Li        usage(msg)
79*9c5db199SXin Li        return
80*9c5db199SXin Li    for o, a in opts:
81*9c5db199SXin Li        if o in ('-d', '--dryrun'):
82*9c5db199SXin Li            dryrun += 1
83*9c5db199SXin Li        elif o in ('-r', '--recurse'):
84*9c5db199SXin Li            recurse += 1
85*9c5db199SXin Li        elif o in ('-n', '--nobackup'):
86*9c5db199SXin Li            makebackup = False
87*9c5db199SXin Li        elif o in ('-v', '--verbose'):
88*9c5db199SXin Li            verbose += 1
89*9c5db199SXin Li        elif o in ('-h', '--help'):
90*9c5db199SXin Li            usage()
91*9c5db199SXin Li            return
92*9c5db199SXin Li    if not args:
93*9c5db199SXin Li        r = Reindenter(sys.stdin)
94*9c5db199SXin Li        r.run()
95*9c5db199SXin Li        r.write(sys.stdout)
96*9c5db199SXin Li        return
97*9c5db199SXin Li    for arg in args:
98*9c5db199SXin Li        check(arg)
99*9c5db199SXin Li
100*9c5db199SXin Lidef check(file):
101*9c5db199SXin Li    if os.path.isdir(file) and not os.path.islink(file):
102*9c5db199SXin Li        if verbose:
103*9c5db199SXin Li            print("listing directory", file)
104*9c5db199SXin Li        names = os.listdir(file)
105*9c5db199SXin Li        for name in names:
106*9c5db199SXin Li            fullname = os.path.join(file, name)
107*9c5db199SXin Li            if ((recurse and os.path.isdir(fullname) and
108*9c5db199SXin Li                 not os.path.islink(fullname))
109*9c5db199SXin Li                or name.lower().endswith(".py")):
110*9c5db199SXin Li                check(fullname)
111*9c5db199SXin Li        return
112*9c5db199SXin Li
113*9c5db199SXin Li    if verbose:
114*9c5db199SXin Li        print("checking", file, "...", end=' ')
115*9c5db199SXin Li    try:
116*9c5db199SXin Li        f = open(file)
117*9c5db199SXin Li    except IOError as msg:
118*9c5db199SXin Li        errprint("%s: I/O Error: %s" % (file, str(msg)))
119*9c5db199SXin Li        return
120*9c5db199SXin Li
121*9c5db199SXin Li    r = Reindenter(f)
122*9c5db199SXin Li    f.close()
123*9c5db199SXin Li    if r.run():
124*9c5db199SXin Li        if verbose:
125*9c5db199SXin Li            print("changed.")
126*9c5db199SXin Li            if dryrun:
127*9c5db199SXin Li                print("But this is a dry run, so leaving it alone.")
128*9c5db199SXin Li        if not dryrun:
129*9c5db199SXin Li            bak = file + ".bak"
130*9c5db199SXin Li            if makebackup:
131*9c5db199SXin Li                shutil.copyfile(file, bak)
132*9c5db199SXin Li                if verbose:
133*9c5db199SXin Li                    print("backed up", file, "to", bak)
134*9c5db199SXin Li            f = open(file, "w")
135*9c5db199SXin Li            r.write(f)
136*9c5db199SXin Li            f.close()
137*9c5db199SXin Li            if verbose:
138*9c5db199SXin Li                print("wrote new", file)
139*9c5db199SXin Li        return True
140*9c5db199SXin Li    else:
141*9c5db199SXin Li        if verbose:
142*9c5db199SXin Li            print("unchanged.")
143*9c5db199SXin Li        return False
144*9c5db199SXin Li
145*9c5db199SXin Lidef _rstrip(line, JUNK='\n \t'):
146*9c5db199SXin Li    """Return line stripped of trailing spaces, tabs, newlines.
147*9c5db199SXin Li
148*9c5db199SXin Li    Note that line.rstrip() instead also strips sundry control characters,
149*9c5db199SXin Li    but at least one known Emacs user expects to keep junk like that, not
150*9c5db199SXin Li    mentioning Barry by name or anything <wink>.
151*9c5db199SXin Li    """
152*9c5db199SXin Li
153*9c5db199SXin Li    i = len(line)
154*9c5db199SXin Li    while i > 0 and line[i-1] in JUNK:
155*9c5db199SXin Li        i -= 1
156*9c5db199SXin Li    return line[:i]
157*9c5db199SXin Li
158*9c5db199SXin Liclass Reindenter:
159*9c5db199SXin Li
160*9c5db199SXin Li    def __init__(self, f):
161*9c5db199SXin Li        self.find_stmt = 1  # next token begins a fresh stmt?
162*9c5db199SXin Li        self.level = 0      # current indent level
163*9c5db199SXin Li
164*9c5db199SXin Li        # Raw file lines.
165*9c5db199SXin Li        self.raw = f.readlines()
166*9c5db199SXin Li
167*9c5db199SXin Li        # File lines, rstripped & tab-expanded.  Stub at start is so
168*9c5db199SXin Li        # that we can use tokenize's 1-based line numbering easily.
169*9c5db199SXin Li        # Note that a line is all-blank iff it's "\n".
170*9c5db199SXin Li        self.lines = [_rstrip(line).expandtabs() + "\n"
171*9c5db199SXin Li                      for line in self.raw]
172*9c5db199SXin Li        self.lines.insert(0, None)
173*9c5db199SXin Li        self.index = 1  # index into self.lines of next line
174*9c5db199SXin Li
175*9c5db199SXin Li        # List of (lineno, indentlevel) pairs, one for each stmt and
176*9c5db199SXin Li        # comment line.  indentlevel is -1 for comment lines, as a
177*9c5db199SXin Li        # signal that tokenize doesn't know what to do about them;
178*9c5db199SXin Li        # indeed, they're our headache!
179*9c5db199SXin Li        self.stats = []
180*9c5db199SXin Li
181*9c5db199SXin Li    def run(self):
182*9c5db199SXin Li        tokenize.tokenize(self.getline, self.tokeneater)
183*9c5db199SXin Li        # Remove trailing empty lines.
184*9c5db199SXin Li        lines = self.lines
185*9c5db199SXin Li        while lines and lines[-1] == "\n":
186*9c5db199SXin Li            lines.pop()
187*9c5db199SXin Li        # Sentinel.
188*9c5db199SXin Li        stats = self.stats
189*9c5db199SXin Li        stats.append((len(lines), 0))
190*9c5db199SXin Li        # Map count of leading spaces to # we want.
191*9c5db199SXin Li        have2want = {}
192*9c5db199SXin Li        # Program after transformation.
193*9c5db199SXin Li        after = self.after = []
194*9c5db199SXin Li        # Copy over initial empty lines -- there's nothing to do until
195*9c5db199SXin Li        # we see a line with *something* on it.
196*9c5db199SXin Li        i = stats[0][0]
197*9c5db199SXin Li        after.extend(lines[1:i])
198*9c5db199SXin Li        for i in range(len(stats)-1):
199*9c5db199SXin Li            thisstmt, thislevel = stats[i]
200*9c5db199SXin Li            nextstmt = stats[i+1][0]
201*9c5db199SXin Li            have = getlspace(lines[thisstmt])
202*9c5db199SXin Li            want = thislevel * 4
203*9c5db199SXin Li            if want < 0:
204*9c5db199SXin Li                # A comment line.
205*9c5db199SXin Li                if have:
206*9c5db199SXin Li                    # An indented comment line.  If we saw the same
207*9c5db199SXin Li                    # indentation before, reuse what it most recently
208*9c5db199SXin Li                    # mapped to.
209*9c5db199SXin Li                    want = have2want.get(have, -1)
210*9c5db199SXin Li                    if want < 0:
211*9c5db199SXin Li                        # Then it probably belongs to the next real stmt.
212*9c5db199SXin Li                        for j in range(i+1, len(stats)-1):
213*9c5db199SXin Li                            jline, jlevel = stats[j]
214*9c5db199SXin Li                            if jlevel >= 0:
215*9c5db199SXin Li                                if have == getlspace(lines[jline]):
216*9c5db199SXin Li                                    want = jlevel * 4
217*9c5db199SXin Li                                break
218*9c5db199SXin Li                    if want < 0:           # Maybe it's a hanging
219*9c5db199SXin Li                                           # comment like this one,
220*9c5db199SXin Li                        # in which case we should shift it like its base
221*9c5db199SXin Li                        # line got shifted.
222*9c5db199SXin Li                        for j in range(i-1, -1, -1):
223*9c5db199SXin Li                            jline, jlevel = stats[j]
224*9c5db199SXin Li                            if jlevel >= 0:
225*9c5db199SXin Li                                want = have + getlspace(after[jline-1]) - \
226*9c5db199SXin Li                                       getlspace(lines[jline])
227*9c5db199SXin Li                                break
228*9c5db199SXin Li                    if want < 0:
229*9c5db199SXin Li                        # Still no luck -- leave it alone.
230*9c5db199SXin Li                        want = have
231*9c5db199SXin Li                else:
232*9c5db199SXin Li                    want = 0
233*9c5db199SXin Li            assert want >= 0
234*9c5db199SXin Li            have2want[have] = want
235*9c5db199SXin Li            diff = want - have
236*9c5db199SXin Li            if diff == 0 or have == 0:
237*9c5db199SXin Li                after.extend(lines[thisstmt:nextstmt])
238*9c5db199SXin Li            else:
239*9c5db199SXin Li                for line in lines[thisstmt:nextstmt]:
240*9c5db199SXin Li                    if diff > 0:
241*9c5db199SXin Li                        if line == "\n":
242*9c5db199SXin Li                            after.append(line)
243*9c5db199SXin Li                        else:
244*9c5db199SXin Li                            after.append(" " * diff + line)
245*9c5db199SXin Li                    else:
246*9c5db199SXin Li                        remove = min(getlspace(line), -diff)
247*9c5db199SXin Li                        after.append(line[remove:])
248*9c5db199SXin Li        return self.raw != self.after
249*9c5db199SXin Li
250*9c5db199SXin Li    def write(self, f):
251*9c5db199SXin Li        f.writelines(self.after)
252*9c5db199SXin Li
253*9c5db199SXin Li    # Line-getter for tokenize.
254*9c5db199SXin Li    def getline(self):
255*9c5db199SXin Li        if self.index >= len(self.lines):
256*9c5db199SXin Li            line = ""
257*9c5db199SXin Li        else:
258*9c5db199SXin Li            line = self.lines[self.index]
259*9c5db199SXin Li            self.index += 1
260*9c5db199SXin Li        return line
261*9c5db199SXin Li
262*9c5db199SXin Li    # Line-eater for tokenize.
263*9c5db199SXin Li    def tokeneater(self, type, token, sline_scol, end, line,
264*9c5db199SXin Li                   INDENT=tokenize.INDENT,
265*9c5db199SXin Li                   DEDENT=tokenize.DEDENT,
266*9c5db199SXin Li                   NEWLINE=tokenize.NEWLINE,
267*9c5db199SXin Li                   COMMENT=tokenize.COMMENT,
268*9c5db199SXin Li                   NL=tokenize.NL):
269*9c5db199SXin Li
270*9c5db199SXin Li        (sline, scol) = sline_scol
271*9c5db199SXin Li        if type == NEWLINE:
272*9c5db199SXin Li            # A program statement, or ENDMARKER, will eventually follow,
273*9c5db199SXin Li            # after some (possibly empty) run of tokens of the form
274*9c5db199SXin Li            #     (NL | COMMENT)* (INDENT | DEDENT+)?
275*9c5db199SXin Li            self.find_stmt = 1
276*9c5db199SXin Li
277*9c5db199SXin Li        elif type == INDENT:
278*9c5db199SXin Li            self.find_stmt = 1
279*9c5db199SXin Li            self.level += 1
280*9c5db199SXin Li
281*9c5db199SXin Li        elif type == DEDENT:
282*9c5db199SXin Li            self.find_stmt = 1
283*9c5db199SXin Li            self.level -= 1
284*9c5db199SXin Li
285*9c5db199SXin Li        elif type == COMMENT:
286*9c5db199SXin Li            if self.find_stmt:
287*9c5db199SXin Li                self.stats.append((sline, -1))
288*9c5db199SXin Li                # but we're still looking for a new stmt, so leave
289*9c5db199SXin Li                # find_stmt alone
290*9c5db199SXin Li
291*9c5db199SXin Li        elif type == NL:
292*9c5db199SXin Li            pass
293*9c5db199SXin Li
294*9c5db199SXin Li        elif self.find_stmt:
295*9c5db199SXin Li            # This is the first "real token" following a NEWLINE, so it
296*9c5db199SXin Li            # must be the first token of the next program statement, or an
297*9c5db199SXin Li            # ENDMARKER.
298*9c5db199SXin Li            self.find_stmt = 0
299*9c5db199SXin Li            if line:   # not endmarker
300*9c5db199SXin Li                self.stats.append((sline, self.level))
301*9c5db199SXin Li
302*9c5db199SXin Li# Count number of leading blanks.
303*9c5db199SXin Lidef getlspace(line):
304*9c5db199SXin Li    i, n = 0, len(line)
305*9c5db199SXin Li    while i < n and line[i] == " ":
306*9c5db199SXin Li        i += 1
307*9c5db199SXin Li    return i
308*9c5db199SXin Li
309*9c5db199SXin Liif __name__ == '__main__':
310*9c5db199SXin Li    main()
311