xref: /aosp_15_r20/prebuilts/build-tools/common/py3-stdlib/tokenize.py (revision cda5da8d549138a6648c5ee6d7a49cf8f4a657be)
1"""Tokenization help for Python programs.
2
3tokenize(readline) is a generator that breaks a stream of bytes into
4Python tokens.  It decodes the bytes according to PEP-0263 for
5determining source file encoding.
6
7It accepts a readline-like method which is called repeatedly to get the
8next line of input (or b"" for EOF).  It generates 5-tuples with these
9members:
10
11    the token type (see token.py)
12    the token (a string)
13    the starting (row, column) indices of the token (a 2-tuple of ints)
14    the ending (row, column) indices of the token (a 2-tuple of ints)
15    the original line (string)
16
17It is designed to match the working of the Python tokenizer exactly, except
18that it produces COMMENT tokens for comments and gives type OP for all
19operators.  Additionally, all token lists start with an ENCODING token
20which tells you which encoding was used to decode the bytes stream.
21"""
22
23__author__ = 'Ka-Ping Yee <[email protected]>'
24__credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
25               'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
26               'Michael Foord')
27from builtins import open as _builtin_open
28from codecs import lookup, BOM_UTF8
29import collections
30import functools
31from io import TextIOWrapper
32import itertools as _itertools
33import re
34import sys
35from token import *
36from token import EXACT_TOKEN_TYPES
37
38cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
39blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
40
41import token
42__all__ = token.__all__ + ["tokenize", "generate_tokens", "detect_encoding",
43                           "untokenize", "TokenInfo"]
44del token
45
46class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
47    def __repr__(self):
48        annotated_type = '%d (%s)' % (self.type, tok_name[self.type])
49        return ('TokenInfo(type=%s, string=%r, start=%r, end=%r, line=%r)' %
50                self._replace(type=annotated_type))
51
52    @property
53    def exact_type(self):
54        if self.type == OP and self.string in EXACT_TOKEN_TYPES:
55            return EXACT_TOKEN_TYPES[self.string]
56        else:
57            return self.type
58
59def group(*choices): return '(' + '|'.join(choices) + ')'
60def any(*choices): return group(*choices) + '*'
61def maybe(*choices): return group(*choices) + '?'
62
63# Note: we use unicode matching for names ("\w") but ascii matching for
64# number literals.
65Whitespace = r'[ \f\t]*'
66Comment = r'#[^\r\n]*'
67Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
68Name = r'\w+'
69
70Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
71Binnumber = r'0[bB](?:_?[01])+'
72Octnumber = r'0[oO](?:_?[0-7])+'
73Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
74Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
75Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
76Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
77                   r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
78Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
79Floatnumber = group(Pointfloat, Expfloat)
80Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
81Number = group(Imagnumber, Floatnumber, Intnumber)
82
83# Return the empty string, plus all of the valid string prefixes.
84def _all_string_prefixes():
85    # The valid string prefixes. Only contain the lower case versions,
86    #  and don't contain any permutations (include 'fr', but not
87    #  'rf'). The various permutations will be generated.
88    _valid_string_prefixes = ['b', 'r', 'u', 'f', 'br', 'fr']
89    # if we add binary f-strings, add: ['fb', 'fbr']
90    result = {''}
91    for prefix in _valid_string_prefixes:
92        for t in _itertools.permutations(prefix):
93            # create a list with upper and lower versions of each
94            #  character
95            for u in _itertools.product(*[(c, c.upper()) for c in t]):
96                result.add(''.join(u))
97    return result
98
99@functools.lru_cache
100def _compile(expr):
101    return re.compile(expr, re.UNICODE)
102
103# Note that since _all_string_prefixes includes the empty string,
104#  StringPrefix can be the empty string (making it optional).
105StringPrefix = group(*_all_string_prefixes())
106
107# Tail end of ' string.
108Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
109# Tail end of " string.
110Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
111# Tail end of ''' string.
112Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
113# Tail end of """ string.
114Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
115Triple = group(StringPrefix + "'''", StringPrefix + '"""')
116# Single-line ' or " string.
117String = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
118               StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
119
120# Sorting in reverse order puts the long operators before their prefixes.
121# Otherwise if = came before ==, == would get recognized as two instances
122# of =.
123Special = group(*map(re.escape, sorted(EXACT_TOKEN_TYPES, reverse=True)))
124Funny = group(r'\r?\n', Special)
125
126PlainToken = group(Number, Funny, String, Name)
127Token = Ignore + PlainToken
128
129# First (or only) line of ' or " string.
130ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
131                group("'", r'\\\r?\n'),
132                StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
133                group('"', r'\\\r?\n'))
134PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
135PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
136
137# For a given string prefix plus quotes, endpats maps it to a regex
138#  to match the remainder of that string. _prefix can be empty, for
139#  a normal single or triple quoted string (with no prefix).
140endpats = {}
141for _prefix in _all_string_prefixes():
142    endpats[_prefix + "'"] = Single
143    endpats[_prefix + '"'] = Double
144    endpats[_prefix + "'''"] = Single3
145    endpats[_prefix + '"""'] = Double3
146del _prefix
147
148# A set of all of the single and triple quoted string prefixes,
149#  including the opening quotes.
150single_quoted = set()
151triple_quoted = set()
152for t in _all_string_prefixes():
153    for u in (t + '"', t + "'"):
154        single_quoted.add(u)
155    for u in (t + '"""', t + "'''"):
156        triple_quoted.add(u)
157del t, u
158
159tabsize = 8
160
161class TokenError(Exception): pass
162
163class StopTokenizing(Exception): pass
164
165
166class Untokenizer:
167
168    def __init__(self):
169        self.tokens = []
170        self.prev_row = 1
171        self.prev_col = 0
172        self.encoding = None
173
174    def add_whitespace(self, start):
175        row, col = start
176        if row < self.prev_row or row == self.prev_row and col < self.prev_col:
177            raise ValueError("start ({},{}) precedes previous end ({},{})"
178                             .format(row, col, self.prev_row, self.prev_col))
179        row_offset = row - self.prev_row
180        if row_offset:
181            self.tokens.append("\\\n" * row_offset)
182            self.prev_col = 0
183        col_offset = col - self.prev_col
184        if col_offset:
185            self.tokens.append(" " * col_offset)
186
187    def untokenize(self, iterable):
188        it = iter(iterable)
189        indents = []
190        startline = False
191        for t in it:
192            if len(t) == 2:
193                self.compat(t, it)
194                break
195            tok_type, token, start, end, line = t
196            if tok_type == ENCODING:
197                self.encoding = token
198                continue
199            if tok_type == ENDMARKER:
200                break
201            if tok_type == INDENT:
202                indents.append(token)
203                continue
204            elif tok_type == DEDENT:
205                indents.pop()
206                self.prev_row, self.prev_col = end
207                continue
208            elif tok_type in (NEWLINE, NL):
209                startline = True
210            elif startline and indents:
211                indent = indents[-1]
212                if start[1] >= len(indent):
213                    self.tokens.append(indent)
214                    self.prev_col = len(indent)
215                startline = False
216            self.add_whitespace(start)
217            self.tokens.append(token)
218            self.prev_row, self.prev_col = end
219            if tok_type in (NEWLINE, NL):
220                self.prev_row += 1
221                self.prev_col = 0
222        return "".join(self.tokens)
223
224    def compat(self, token, iterable):
225        indents = []
226        toks_append = self.tokens.append
227        startline = token[0] in (NEWLINE, NL)
228        prevstring = False
229
230        for tok in _itertools.chain([token], iterable):
231            toknum, tokval = tok[:2]
232            if toknum == ENCODING:
233                self.encoding = tokval
234                continue
235
236            if toknum in (NAME, NUMBER):
237                tokval += ' '
238
239            # Insert a space between two consecutive strings
240            if toknum == STRING:
241                if prevstring:
242                    tokval = ' ' + tokval
243                prevstring = True
244            else:
245                prevstring = False
246
247            if toknum == INDENT:
248                indents.append(tokval)
249                continue
250            elif toknum == DEDENT:
251                indents.pop()
252                continue
253            elif toknum in (NEWLINE, NL):
254                startline = True
255            elif startline and indents:
256                toks_append(indents[-1])
257                startline = False
258            toks_append(tokval)
259
260
261def untokenize(iterable):
262    """Transform tokens back into Python source code.
263    It returns a bytes object, encoded using the ENCODING
264    token, which is the first token sequence output by tokenize.
265
266    Each element returned by the iterable must be a token sequence
267    with at least two elements, a token number and token value.  If
268    only two tokens are passed, the resulting output is poor.
269
270    Round-trip invariant for full input:
271        Untokenized source will match input source exactly
272
273    Round-trip invariant for limited input:
274        # Output bytes will tokenize back to the input
275        t1 = [tok[:2] for tok in tokenize(f.readline)]
276        newcode = untokenize(t1)
277        readline = BytesIO(newcode).readline
278        t2 = [tok[:2] for tok in tokenize(readline)]
279        assert t1 == t2
280    """
281    ut = Untokenizer()
282    out = ut.untokenize(iterable)
283    if ut.encoding is not None:
284        out = out.encode(ut.encoding)
285    return out
286
287
288def _get_normal_name(orig_enc):
289    """Imitates get_normal_name in tokenizer.c."""
290    # Only care about the first 12 characters.
291    enc = orig_enc[:12].lower().replace("_", "-")
292    if enc == "utf-8" or enc.startswith("utf-8-"):
293        return "utf-8"
294    if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
295       enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
296        return "iso-8859-1"
297    return orig_enc
298
299def detect_encoding(readline):
300    """
301    The detect_encoding() function is used to detect the encoding that should
302    be used to decode a Python source file.  It requires one argument, readline,
303    in the same way as the tokenize() generator.
304
305    It will call readline a maximum of twice, and return the encoding used
306    (as a string) and a list of any lines (left as bytes) it has read in.
307
308    It detects the encoding from the presence of a utf-8 bom or an encoding
309    cookie as specified in pep-0263.  If both a bom and a cookie are present,
310    but disagree, a SyntaxError will be raised.  If the encoding cookie is an
311    invalid charset, raise a SyntaxError.  Note that if a utf-8 bom is found,
312    'utf-8-sig' is returned.
313
314    If no encoding is specified, then the default of 'utf-8' will be returned.
315    """
316    try:
317        filename = readline.__self__.name
318    except AttributeError:
319        filename = None
320    bom_found = False
321    encoding = None
322    default = 'utf-8'
323    def read_or_stop():
324        try:
325            return readline()
326        except StopIteration:
327            return b''
328
329    def find_cookie(line):
330        try:
331            # Decode as UTF-8. Either the line is an encoding declaration,
332            # in which case it should be pure ASCII, or it must be UTF-8
333            # per default encoding.
334            line_string = line.decode('utf-8')
335        except UnicodeDecodeError:
336            msg = "invalid or missing encoding declaration"
337            if filename is not None:
338                msg = '{} for {!r}'.format(msg, filename)
339            raise SyntaxError(msg)
340
341        match = cookie_re.match(line_string)
342        if not match:
343            return None
344        encoding = _get_normal_name(match.group(1))
345        try:
346            codec = lookup(encoding)
347        except LookupError:
348            # This behaviour mimics the Python interpreter
349            if filename is None:
350                msg = "unknown encoding: " + encoding
351            else:
352                msg = "unknown encoding for {!r}: {}".format(filename,
353                        encoding)
354            raise SyntaxError(msg)
355
356        if bom_found:
357            if encoding != 'utf-8':
358                # This behaviour mimics the Python interpreter
359                if filename is None:
360                    msg = 'encoding problem: utf-8'
361                else:
362                    msg = 'encoding problem for {!r}: utf-8'.format(filename)
363                raise SyntaxError(msg)
364            encoding += '-sig'
365        return encoding
366
367    first = read_or_stop()
368    if first.startswith(BOM_UTF8):
369        bom_found = True
370        first = first[3:]
371        default = 'utf-8-sig'
372    if not first:
373        return default, []
374
375    encoding = find_cookie(first)
376    if encoding:
377        return encoding, [first]
378    if not blank_re.match(first):
379        return default, [first]
380
381    second = read_or_stop()
382    if not second:
383        return default, [first]
384
385    encoding = find_cookie(second)
386    if encoding:
387        return encoding, [first, second]
388
389    return default, [first, second]
390
391
392def open(filename):
393    """Open a file in read only mode using the encoding detected by
394    detect_encoding().
395    """
396    buffer = _builtin_open(filename, 'rb')
397    try:
398        encoding, lines = detect_encoding(buffer.readline)
399        buffer.seek(0)
400        text = TextIOWrapper(buffer, encoding, line_buffering=True)
401        text.mode = 'r'
402        return text
403    except:
404        buffer.close()
405        raise
406
407
408def tokenize(readline):
409    """
410    The tokenize() generator requires one argument, readline, which
411    must be a callable object which provides the same interface as the
412    readline() method of built-in file objects.  Each call to the function
413    should return one line of input as bytes.  Alternatively, readline
414    can be a callable function terminating with StopIteration:
415        readline = open(myfile, 'rb').__next__  # Example of alternate readline
416
417    The generator produces 5-tuples with these members: the token type; the
418    token string; a 2-tuple (srow, scol) of ints specifying the row and
419    column where the token begins in the source; a 2-tuple (erow, ecol) of
420    ints specifying the row and column where the token ends in the source;
421    and the line on which the token was found.  The line passed is the
422    physical line.
423
424    The first token sequence will always be an ENCODING token
425    which tells you which encoding was used to decode the bytes stream.
426    """
427    encoding, consumed = detect_encoding(readline)
428    empty = _itertools.repeat(b"")
429    rl_gen = _itertools.chain(consumed, iter(readline, b""), empty)
430    return _tokenize(rl_gen.__next__, encoding)
431
432
433def _tokenize(readline, encoding):
434    lnum = parenlev = continued = 0
435    numchars = '0123456789'
436    contstr, needcont = '', 0
437    contline = None
438    indents = [0]
439
440    if encoding is not None:
441        if encoding == "utf-8-sig":
442            # BOM will already have been stripped.
443            encoding = "utf-8"
444        yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
445    last_line = b''
446    line = b''
447    while True:                                # loop over lines in stream
448        try:
449            # We capture the value of the line variable here because
450            # readline uses the empty string '' to signal end of input,
451            # hence `line` itself will always be overwritten at the end
452            # of this loop.
453            last_line = line
454            line = readline()
455        except StopIteration:
456            line = b''
457
458        if encoding is not None:
459            line = line.decode(encoding)
460        lnum += 1
461        pos, max = 0, len(line)
462
463        if contstr:                            # continued string
464            if not line:
465                raise TokenError("EOF in multi-line string", strstart)
466            endmatch = endprog.match(line)
467            if endmatch:
468                pos = end = endmatch.end(0)
469                yield TokenInfo(STRING, contstr + line[:end],
470                       strstart, (lnum, end), contline + line)
471                contstr, needcont = '', 0
472                contline = None
473            elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
474                yield TokenInfo(ERRORTOKEN, contstr + line,
475                           strstart, (lnum, len(line)), contline)
476                contstr = ''
477                contline = None
478                continue
479            else:
480                contstr = contstr + line
481                contline = contline + line
482                continue
483
484        elif parenlev == 0 and not continued:  # new statement
485            if not line: break
486            column = 0
487            while pos < max:                   # measure leading whitespace
488                if line[pos] == ' ':
489                    column += 1
490                elif line[pos] == '\t':
491                    column = (column//tabsize + 1)*tabsize
492                elif line[pos] == '\f':
493                    column = 0
494                else:
495                    break
496                pos += 1
497            if pos == max:
498                break
499
500            if line[pos] in '#\r\n':           # skip comments or blank lines
501                if line[pos] == '#':
502                    comment_token = line[pos:].rstrip('\r\n')
503                    yield TokenInfo(COMMENT, comment_token,
504                           (lnum, pos), (lnum, pos + len(comment_token)), line)
505                    pos += len(comment_token)
506
507                yield TokenInfo(NL, line[pos:],
508                           (lnum, pos), (lnum, len(line)), line)
509                continue
510
511            if column > indents[-1]:           # count indents or dedents
512                indents.append(column)
513                yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
514            while column < indents[-1]:
515                if column not in indents:
516                    raise IndentationError(
517                        "unindent does not match any outer indentation level",
518                        ("<tokenize>", lnum, pos, line))
519                indents = indents[:-1]
520
521                yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
522
523        else:                                  # continued statement
524            if not line:
525                raise TokenError("EOF in multi-line statement", (lnum, 0))
526            continued = 0
527
528        while pos < max:
529            pseudomatch = _compile(PseudoToken).match(line, pos)
530            if pseudomatch:                                # scan for tokens
531                start, end = pseudomatch.span(1)
532                spos, epos, pos = (lnum, start), (lnum, end), end
533                if start == end:
534                    continue
535                token, initial = line[start:end], line[start]
536
537                if (initial in numchars or                 # ordinary number
538                    (initial == '.' and token != '.' and token != '...')):
539                    yield TokenInfo(NUMBER, token, spos, epos, line)
540                elif initial in '\r\n':
541                    if parenlev > 0:
542                        yield TokenInfo(NL, token, spos, epos, line)
543                    else:
544                        yield TokenInfo(NEWLINE, token, spos, epos, line)
545
546                elif initial == '#':
547                    assert not token.endswith("\n")
548                    yield TokenInfo(COMMENT, token, spos, epos, line)
549
550                elif token in triple_quoted:
551                    endprog = _compile(endpats[token])
552                    endmatch = endprog.match(line, pos)
553                    if endmatch:                           # all on one line
554                        pos = endmatch.end(0)
555                        token = line[start:pos]
556                        yield TokenInfo(STRING, token, spos, (lnum, pos), line)
557                    else:
558                        strstart = (lnum, start)           # multiple lines
559                        contstr = line[start:]
560                        contline = line
561                        break
562
563                # Check up to the first 3 chars of the token to see if
564                #  they're in the single_quoted set. If so, they start
565                #  a string.
566                # We're using the first 3, because we're looking for
567                #  "rb'" (for example) at the start of the token. If
568                #  we switch to longer prefixes, this needs to be
569                #  adjusted.
570                # Note that initial == token[:1].
571                # Also note that single quote checking must come after
572                #  triple quote checking (above).
573                elif (initial in single_quoted or
574                      token[:2] in single_quoted or
575                      token[:3] in single_quoted):
576                    if token[-1] == '\n':                  # continued string
577                        strstart = (lnum, start)
578                        # Again, using the first 3 chars of the
579                        #  token. This is looking for the matching end
580                        #  regex for the correct type of quote
581                        #  character. So it's really looking for
582                        #  endpats["'"] or endpats['"'], by trying to
583                        #  skip string prefix characters, if any.
584                        endprog = _compile(endpats.get(initial) or
585                                           endpats.get(token[1]) or
586                                           endpats.get(token[2]))
587                        contstr, needcont = line[start:], 1
588                        contline = line
589                        break
590                    else:                                  # ordinary string
591                        yield TokenInfo(STRING, token, spos, epos, line)
592
593                elif initial.isidentifier():               # ordinary name
594                    yield TokenInfo(NAME, token, spos, epos, line)
595                elif initial == '\\':                      # continued stmt
596                    continued = 1
597                else:
598                    if initial in '([{':
599                        parenlev += 1
600                    elif initial in ')]}':
601                        parenlev -= 1
602                    yield TokenInfo(OP, token, spos, epos, line)
603            else:
604                yield TokenInfo(ERRORTOKEN, line[pos],
605                           (lnum, pos), (lnum, pos+1), line)
606                pos += 1
607
608    # Add an implicit NEWLINE if the input doesn't end in one
609    if last_line and last_line[-1] not in '\r\n' and not last_line.strip().startswith("#"):
610        yield TokenInfo(NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '')
611    for indent in indents[1:]:                 # pop remaining indent levels
612        yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
613    yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
614
615
616def generate_tokens(readline):
617    """Tokenize a source reading Python code as unicode strings.
618
619    This has the same API as tokenize(), except that it expects the *readline*
620    callable to return str objects instead of bytes.
621    """
622    return _tokenize(readline, None)
623
624def main():
625    import argparse
626
627    # Helper error handling routines
628    def perror(message):
629        sys.stderr.write(message)
630        sys.stderr.write('\n')
631
632    def error(message, filename=None, location=None):
633        if location:
634            args = (filename,) + location + (message,)
635            perror("%s:%d:%d: error: %s" % args)
636        elif filename:
637            perror("%s: error: %s" % (filename, message))
638        else:
639            perror("error: %s" % message)
640        sys.exit(1)
641
642    # Parse the arguments and options
643    parser = argparse.ArgumentParser(prog='python -m tokenize')
644    parser.add_argument(dest='filename', nargs='?',
645                        metavar='filename.py',
646                        help='the file to tokenize; defaults to stdin')
647    parser.add_argument('-e', '--exact', dest='exact', action='store_true',
648                        help='display token names using the exact type')
649    args = parser.parse_args()
650
651    try:
652        # Tokenize the input
653        if args.filename:
654            filename = args.filename
655            with _builtin_open(filename, 'rb') as f:
656                tokens = list(tokenize(f.readline))
657        else:
658            filename = "<stdin>"
659            tokens = _tokenize(sys.stdin.readline, None)
660
661        # Output the tokenization
662        for token in tokens:
663            token_type = token.type
664            if args.exact:
665                token_type = token.exact_type
666            token_range = "%d,%d-%d,%d:" % (token.start + token.end)
667            print("%-20s%-15s%-15r" %
668                  (token_range, tok_name[token_type], token.string))
669    except IndentationError as err:
670        line, column = err.args[1][1:3]
671        error(err.args[0], filename, (line, column))
672    except TokenError as err:
673        line, column = err.args[1]
674        error(err.args[0], filename, (line, column))
675    except SyntaxError as err:
676        error(err, filename)
677    except OSError as err:
678        error(err)
679    except KeyboardInterrupt:
680        print("interrupted\n")
681    except Exception as err:
682        perror("unexpected error: %s" % err)
683        raise
684
685def _generate_tokens_from_c_tokenizer(source):
686    """Tokenize a source reading Python code as unicode strings using the internal C tokenizer"""
687    import _tokenize as c_tokenizer
688    for info in c_tokenizer.TokenizerIter(source):
689        tok, type, lineno, end_lineno, col_off, end_col_off, line = info
690        yield TokenInfo(type, tok, (lineno, col_off), (end_lineno, end_col_off), line)
691
692
693if __name__ == "__main__":
694    main()
695