xref: /aosp_15_r20/prebuilts/build-tools/common/py3-stdlib/tabnanny.py (revision cda5da8d549138a6648c5ee6d7a49cf8f4a657be)
1#! /usr/bin/env python3
2
3"""The Tab Nanny despises ambiguous indentation.  She knows no mercy.
4
5tabnanny -- Detection of ambiguous indentation
6
7For the time being this module is intended to be called as a script.
8However it is possible to import it into an IDE and use the function
9check() described below.
10
11Warning: The API provided by this module is likely to change in future
12releases; such changes may not be backward compatible.
13"""
14
15# Released to the public domain, by Tim Peters, 15 April 1998.
16
17# XXX Note: this is now a standard library module.
18# XXX The API needs to undergo changes however; the current code is too
19# XXX script-like.  This will be addressed later.
20
21__version__ = "6"
22
23import os
24import sys
25import tokenize
26
27__all__ = ["check", "NannyNag", "process_tokens"]
28
29verbose = 0
30filename_only = 0
31
32def errprint(*args):
33    sep = ""
34    for arg in args:
35        sys.stderr.write(sep + str(arg))
36        sep = " "
37    sys.stderr.write("\n")
38
39def main():
40    import getopt
41
42    global verbose, filename_only
43    try:
44        opts, args = getopt.getopt(sys.argv[1:], "qv")
45    except getopt.error as msg:
46        errprint(msg)
47        return
48    for o, a in opts:
49        if o == '-q':
50            filename_only = filename_only + 1
51        if o == '-v':
52            verbose = verbose + 1
53    if not args:
54        errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
55        return
56    for arg in args:
57        check(arg)
58
59class NannyNag(Exception):
60    """
61    Raised by process_tokens() if detecting an ambiguous indent.
62    Captured and handled in check().
63    """
64    def __init__(self, lineno, msg, line):
65        self.lineno, self.msg, self.line = lineno, msg, line
66    def get_lineno(self):
67        return self.lineno
68    def get_msg(self):
69        return self.msg
70    def get_line(self):
71        return self.line
72
73def check(file):
74    """check(file_or_dir)
75
76    If file_or_dir is a directory and not a symbolic link, then recursively
77    descend the directory tree named by file_or_dir, checking all .py files
78    along the way. If file_or_dir is an ordinary Python source file, it is
79    checked for whitespace related problems. The diagnostic messages are
80    written to standard output using the print statement.
81    """
82
83    if os.path.isdir(file) and not os.path.islink(file):
84        if verbose:
85            print("%r: listing directory" % (file,))
86        names = os.listdir(file)
87        for name in names:
88            fullname = os.path.join(file, name)
89            if (os.path.isdir(fullname) and
90                not os.path.islink(fullname) or
91                os.path.normcase(name[-3:]) == ".py"):
92                check(fullname)
93        return
94
95    try:
96        f = tokenize.open(file)
97    except OSError as msg:
98        errprint("%r: I/O Error: %s" % (file, msg))
99        return
100
101    if verbose > 1:
102        print("checking %r ..." % file)
103
104    try:
105        process_tokens(tokenize.generate_tokens(f.readline))
106
107    except tokenize.TokenError as msg:
108        errprint("%r: Token Error: %s" % (file, msg))
109        return
110
111    except IndentationError as msg:
112        errprint("%r: Indentation Error: %s" % (file, msg))
113        return
114
115    except NannyNag as nag:
116        badline = nag.get_lineno()
117        line = nag.get_line()
118        if verbose:
119            print("%r: *** Line %d: trouble in tab city! ***" % (file, badline))
120            print("offending line: %r" % (line,))
121            print(nag.get_msg())
122        else:
123            if ' ' in file: file = '"' + file + '"'
124            if filename_only: print(file)
125            else: print(file, badline, repr(line))
126        return
127
128    finally:
129        f.close()
130
131    if verbose:
132        print("%r: Clean bill of health." % (file,))
133
134class Whitespace:
135    # the characters used for space and tab
136    S, T = ' \t'
137
138    # members:
139    #   raw
140    #       the original string
141    #   n
142    #       the number of leading whitespace characters in raw
143    #   nt
144    #       the number of tabs in raw[:n]
145    #   norm
146    #       the normal form as a pair (count, trailing), where:
147    #       count
148    #           a tuple such that raw[:n] contains count[i]
149    #           instances of S * i + T
150    #       trailing
151    #           the number of trailing spaces in raw[:n]
152    #       It's A Theorem that m.indent_level(t) ==
153    #       n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
154    #   is_simple
155    #       true iff raw[:n] is of the form (T*)(S*)
156
157    def __init__(self, ws):
158        self.raw  = ws
159        S, T = Whitespace.S, Whitespace.T
160        count = []
161        b = n = nt = 0
162        for ch in self.raw:
163            if ch == S:
164                n = n + 1
165                b = b + 1
166            elif ch == T:
167                n = n + 1
168                nt = nt + 1
169                if b >= len(count):
170                    count = count + [0] * (b - len(count) + 1)
171                count[b] = count[b] + 1
172                b = 0
173            else:
174                break
175        self.n    = n
176        self.nt   = nt
177        self.norm = tuple(count), b
178        self.is_simple = len(count) <= 1
179
180    # return length of longest contiguous run of spaces (whether or not
181    # preceding a tab)
182    def longest_run_of_spaces(self):
183        count, trailing = self.norm
184        return max(len(count)-1, trailing)
185
186    def indent_level(self, tabsize):
187        # count, il = self.norm
188        # for i in range(len(count)):
189        #    if count[i]:
190        #        il = il + (i//tabsize + 1)*tabsize * count[i]
191        # return il
192
193        # quicker:
194        # il = trailing + sum (i//ts + 1)*ts*count[i] =
195        # trailing + ts * sum (i//ts + 1)*count[i] =
196        # trailing + ts * sum i//ts*count[i] + count[i] =
197        # trailing + ts * [(sum i//ts*count[i]) + (sum count[i])] =
198        # trailing + ts * [(sum i//ts*count[i]) + num_tabs]
199        # and note that i//ts*count[i] is 0 when i < ts
200
201        count, trailing = self.norm
202        il = 0
203        for i in range(tabsize, len(count)):
204            il = il + i//tabsize * count[i]
205        return trailing + tabsize * (il + self.nt)
206
207    # return true iff self.indent_level(t) == other.indent_level(t)
208    # for all t >= 1
209    def equal(self, other):
210        return self.norm == other.norm
211
212    # return a list of tuples (ts, i1, i2) such that
213    # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
214    # Intended to be used after not self.equal(other) is known, in which
215    # case it will return at least one witnessing tab size.
216    def not_equal_witness(self, other):
217        n = max(self.longest_run_of_spaces(),
218                other.longest_run_of_spaces()) + 1
219        a = []
220        for ts in range(1, n+1):
221            if self.indent_level(ts) != other.indent_level(ts):
222                a.append( (ts,
223                           self.indent_level(ts),
224                           other.indent_level(ts)) )
225        return a
226
227    # Return True iff self.indent_level(t) < other.indent_level(t)
228    # for all t >= 1.
229    # The algorithm is due to Vincent Broman.
230    # Easy to prove it's correct.
231    # XXXpost that.
232    # Trivial to prove n is sharp (consider T vs ST).
233    # Unknown whether there's a faster general way.  I suspected so at
234    # first, but no longer.
235    # For the special (but common!) case where M and N are both of the
236    # form (T*)(S*), M.less(N) iff M.len() < N.len() and
237    # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
238    # XXXwrite that up.
239    # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
240    def less(self, other):
241        if self.n >= other.n:
242            return False
243        if self.is_simple and other.is_simple:
244            return self.nt <= other.nt
245        n = max(self.longest_run_of_spaces(),
246                other.longest_run_of_spaces()) + 1
247        # the self.n >= other.n test already did it for ts=1
248        for ts in range(2, n+1):
249            if self.indent_level(ts) >= other.indent_level(ts):
250                return False
251        return True
252
253    # return a list of tuples (ts, i1, i2) such that
254    # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
255    # Intended to be used after not self.less(other) is known, in which
256    # case it will return at least one witnessing tab size.
257    def not_less_witness(self, other):
258        n = max(self.longest_run_of_spaces(),
259                other.longest_run_of_spaces()) + 1
260        a = []
261        for ts in range(1, n+1):
262            if self.indent_level(ts) >= other.indent_level(ts):
263                a.append( (ts,
264                           self.indent_level(ts),
265                           other.indent_level(ts)) )
266        return a
267
268def format_witnesses(w):
269    firsts = (str(tup[0]) for tup in w)
270    prefix = "at tab size"
271    if len(w) > 1:
272        prefix = prefix + "s"
273    return prefix + " " + ', '.join(firsts)
274
275def process_tokens(tokens):
276    INDENT = tokenize.INDENT
277    DEDENT = tokenize.DEDENT
278    NEWLINE = tokenize.NEWLINE
279    JUNK = tokenize.COMMENT, tokenize.NL
280    indents = [Whitespace("")]
281    check_equal = 0
282
283    for (type, token, start, end, line) in tokens:
284        if type == NEWLINE:
285            # a program statement, or ENDMARKER, will eventually follow,
286            # after some (possibly empty) run of tokens of the form
287            #     (NL | COMMENT)* (INDENT | DEDENT+)?
288            # If an INDENT appears, setting check_equal is wrong, and will
289            # be undone when we see the INDENT.
290            check_equal = 1
291
292        elif type == INDENT:
293            check_equal = 0
294            thisguy = Whitespace(token)
295            if not indents[-1].less(thisguy):
296                witness = indents[-1].not_less_witness(thisguy)
297                msg = "indent not greater e.g. " + format_witnesses(witness)
298                raise NannyNag(start[0], msg, line)
299            indents.append(thisguy)
300
301        elif type == DEDENT:
302            # there's nothing we need to check here!  what's important is
303            # that when the run of DEDENTs ends, the indentation of the
304            # program statement (or ENDMARKER) that triggered the run is
305            # equal to what's left at the top of the indents stack
306
307            # Ouch!  This assert triggers if the last line of the source
308            # is indented *and* lacks a newline -- then DEDENTs pop out
309            # of thin air.
310            # assert check_equal  # else no earlier NEWLINE, or an earlier INDENT
311            check_equal = 1
312
313            del indents[-1]
314
315        elif check_equal and type not in JUNK:
316            # this is the first "real token" following a NEWLINE, so it
317            # must be the first token of the next program statement, or an
318            # ENDMARKER; the "line" argument exposes the leading whitespace
319            # for this statement; in the case of ENDMARKER, line is an empty
320            # string, so will properly match the empty string with which the
321            # "indents" stack was seeded
322            check_equal = 0
323            thisguy = Whitespace(line)
324            if not indents[-1].equal(thisguy):
325                witness = indents[-1].not_equal_witness(thisguy)
326                msg = "indent not equal e.g. " + format_witnesses(witness)
327                raise NannyNag(start[0], msg, line)
328
329
330if __name__ == '__main__':
331    main()
332