1from test import test_support
2from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP, NEWLINE,
3                     STRING, ENDMARKER, tok_name, Untokenizer, tokenize)
4from StringIO import StringIO
5import os
6from unittest import TestCase
7
8
9# Converts a source string into a list of textual representation
10# of the tokens such as:
11# `    NAME       'if'          (1, 0) (1, 2)`
12# to make writing tests easier.
13def stringify_tokens_from_source(token_generator, source_string):
14    result = []
15    num_lines = len(source_string.splitlines())
16    missing_trailing_nl = source_string[-1] not in '\r\n'
17
18    for type, token, start, end, line in token_generator:
19        if type == ENDMARKER:
20            break
21        # Ignore the new line on the last line if the input lacks one
22        if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:
23            continue
24        type = tok_name[type]
25        result.append("    %(type)-10.10s %(token)-13.13r %(start)s %(end)s" %
26                          locals())
27
28    return result
29
30class TokenizeTest(TestCase):
31    # Tests for the tokenize module.
32
33    # The tests can be really simple. Given a small fragment of source
34    # code, print out a table with tokens. The ENDMARKER, ENCODING and
35    # final NEWLINE are omitted for brevity.
36
37    def check_tokenize(self, s, expected):
38        # Format the tokens in s in a table format.
39        f = StringIO(s)
40        result = stringify_tokens_from_source(generate_tokens(f.readline), s)
41
42        self.assertEqual(result,
43                         expected.rstrip().splitlines())
44
45    def test_implicit_newline(self):
46        # Make sure that the tokenizer puts in an implicit NEWLINE
47        # when the input lacks a trailing new line.
48        f = StringIO("x")
49        tokens = list(generate_tokens(f.readline))
50        self.assertEqual(tokens[-2][0], NEWLINE)
51        self.assertEqual(tokens[-1][0], ENDMARKER)
52
53    def test_basic(self):
54        self.check_tokenize("1 + 1", """\
55    NUMBER     '1'           (1, 0) (1, 1)
56    OP         '+'           (1, 2) (1, 3)
57    NUMBER     '1'           (1, 4) (1, 5)
58    """)
59        self.check_tokenize("if False:\n"
60                            "    # NL\n"
61                            "    True = False # NEWLINE\n", """\
62    NAME       'if'          (1, 0) (1, 2)
63    NAME       'False'       (1, 3) (1, 8)
64    OP         ':'           (1, 8) (1, 9)
65    NEWLINE    '\\n'          (1, 9) (1, 10)
66    COMMENT    '# NL'        (2, 4) (2, 8)
67    NL         '\\n'          (2, 8) (2, 9)
68    INDENT     '    '        (3, 0) (3, 4)
69    NAME       'True'        (3, 4) (3, 8)
70    OP         '='           (3, 9) (3, 10)
71    NAME       'False'       (3, 11) (3, 16)
72    COMMENT    '# NEWLINE'   (3, 17) (3, 26)
73    NEWLINE    '\\n'          (3, 26) (3, 27)
74    DEDENT     ''            (4, 0) (4, 0)
75    """)
76
77        indent_error_file = """\
78def k(x):
79    x += 2
80  x += 5
81"""
82        with self.assertRaisesRegexp(IndentationError,
83                                     "unindent does not match any "
84                                     "outer indentation level"):
85            for tok in generate_tokens(StringIO(indent_error_file).readline):
86                pass
87
88    def test_int(self):
89        # Ordinary integers and binary operators
90        self.check_tokenize("0xff <= 255", """\
91    NUMBER     '0xff'        (1, 0) (1, 4)
92    OP         '<='          (1, 5) (1, 7)
93    NUMBER     '255'         (1, 8) (1, 11)
94    """)
95        self.check_tokenize("0b10 <= 255", """\
96    NUMBER     '0b10'        (1, 0) (1, 4)
97    OP         '<='          (1, 5) (1, 7)
98    NUMBER     '255'         (1, 8) (1, 11)
99    """)
100        self.check_tokenize("0o123 <= 0123", """\
101    NUMBER     '0o123'       (1, 0) (1, 5)
102    OP         '<='          (1, 6) (1, 8)
103    NUMBER     '0123'        (1, 9) (1, 13)
104    """)
105        self.check_tokenize("01234567 > ~0x15", """\
106    NUMBER     '01234567'    (1, 0) (1, 8)
107    OP         '>'           (1, 9) (1, 10)
108    OP         '~'           (1, 11) (1, 12)
109    NUMBER     '0x15'        (1, 12) (1, 16)
110    """)
111        self.check_tokenize("2134568 != 01231515", """\
112    NUMBER     '2134568'     (1, 0) (1, 7)
113    OP         '!='          (1, 8) (1, 10)
114    NUMBER     '01231515'    (1, 11) (1, 19)
115    """)
116        self.check_tokenize("(-124561-1) & 0200000000", """\
117    OP         '('           (1, 0) (1, 1)
118    OP         '-'           (1, 1) (1, 2)
119    NUMBER     '124561'      (1, 2) (1, 8)
120    OP         '-'           (1, 8) (1, 9)
121    NUMBER     '1'           (1, 9) (1, 10)
122    OP         ')'           (1, 10) (1, 11)
123    OP         '&'           (1, 12) (1, 13)
124    NUMBER     '0200000000'  (1, 14) (1, 24)
125    """)
126        self.check_tokenize("0xdeadbeef != -1", """\
127    NUMBER     '0xdeadbeef'  (1, 0) (1, 10)
128    OP         '!='          (1, 11) (1, 13)
129    OP         '-'           (1, 14) (1, 15)
130    NUMBER     '1'           (1, 15) (1, 16)
131    """)
132        self.check_tokenize("0xdeadc0de & 012345", """\
133    NUMBER     '0xdeadc0de'  (1, 0) (1, 10)
134    OP         '&'           (1, 11) (1, 12)
135    NUMBER     '012345'      (1, 13) (1, 19)
136    """)
137        self.check_tokenize("0xFF & 0x15 | 1234", """\
138    NUMBER     '0xFF'        (1, 0) (1, 4)
139    OP         '&'           (1, 5) (1, 6)
140    NUMBER     '0x15'        (1, 7) (1, 11)
141    OP         '|'           (1, 12) (1, 13)
142    NUMBER     '1234'        (1, 14) (1, 18)
143    """)
144
145    def test_long(self):
146        # Long integers
147        self.check_tokenize("x = 0L", """\
148    NAME       'x'           (1, 0) (1, 1)
149    OP         '='           (1, 2) (1, 3)
150    NUMBER     '0L'          (1, 4) (1, 6)
151    """)
152        self.check_tokenize("x = 0xfffffffffff", """\
153    NAME       'x'           (1, 0) (1, 1)
154    OP         '='           (1, 2) (1, 3)
155    NUMBER     '0xffffffffff (1, 4) (1, 17)
156    """)
157        self.check_tokenize("x = 123141242151251616110l", """\
158    NAME       'x'           (1, 0) (1, 1)
159    OP         '='           (1, 2) (1, 3)
160    NUMBER     '123141242151 (1, 4) (1, 26)
161    """)
162        self.check_tokenize("x = -15921590215012591L", """\
163    NAME       'x'           (1, 0) (1, 1)
164    OP         '='           (1, 2) (1, 3)
165    OP         '-'           (1, 4) (1, 5)
166    NUMBER     '159215902150 (1, 5) (1, 23)
167    """)
168
169    def test_float(self):
170        # Floating point numbers
171        self.check_tokenize("x = 3.14159", """\
172    NAME       'x'           (1, 0) (1, 1)
173    OP         '='           (1, 2) (1, 3)
174    NUMBER     '3.14159'     (1, 4) (1, 11)
175    """)
176        self.check_tokenize("x = 314159.", """\
177    NAME       'x'           (1, 0) (1, 1)
178    OP         '='           (1, 2) (1, 3)
179    NUMBER     '314159.'     (1, 4) (1, 11)
180    """)
181        self.check_tokenize("x = .314159", """\
182    NAME       'x'           (1, 0) (1, 1)
183    OP         '='           (1, 2) (1, 3)
184    NUMBER     '.314159'     (1, 4) (1, 11)
185    """)
186        self.check_tokenize("x = 3e14159", """\
187    NAME       'x'           (1, 0) (1, 1)
188    OP         '='           (1, 2) (1, 3)
189    NUMBER     '3e14159'     (1, 4) (1, 11)
190    """)
191        self.check_tokenize("x = 3E123", """\
192    NAME       'x'           (1, 0) (1, 1)
193    OP         '='           (1, 2) (1, 3)
194    NUMBER     '3E123'       (1, 4) (1, 9)
195    """)
196        self.check_tokenize("x+y = 3e-1230", """\
197    NAME       'x'           (1, 0) (1, 1)
198    OP         '+'           (1, 1) (1, 2)
199    NAME       'y'           (1, 2) (1, 3)
200    OP         '='           (1, 4) (1, 5)
201    NUMBER     '3e-1230'     (1, 6) (1, 13)
202    """)
203        self.check_tokenize("x = 3.14e159", """\
204    NAME       'x'           (1, 0) (1, 1)
205    OP         '='           (1, 2) (1, 3)
206    NUMBER     '3.14e159'    (1, 4) (1, 12)
207    """)
208
209    def test_string(self):
210        # String literals
211        self.check_tokenize("x = ''; y = \"\"", """\
212    NAME       'x'           (1, 0) (1, 1)
213    OP         '='           (1, 2) (1, 3)
214    STRING     "''"          (1, 4) (1, 6)
215    OP         ';'           (1, 6) (1, 7)
216    NAME       'y'           (1, 8) (1, 9)
217    OP         '='           (1, 10) (1, 11)
218    STRING     '""'          (1, 12) (1, 14)
219    """)
220        self.check_tokenize("x = '\"'; y = \"'\"", """\
221    NAME       'x'           (1, 0) (1, 1)
222    OP         '='           (1, 2) (1, 3)
223    STRING     '\\'"\\''       (1, 4) (1, 7)
224    OP         ';'           (1, 7) (1, 8)
225    NAME       'y'           (1, 9) (1, 10)
226    OP         '='           (1, 11) (1, 12)
227    STRING     '"\\'"'        (1, 13) (1, 16)
228    """)
229        self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\
230    NAME       'x'           (1, 0) (1, 1)
231    OP         '='           (1, 2) (1, 3)
232    STRING     '"doesn\\'t "' (1, 4) (1, 14)
233    NAME       'shrink'      (1, 14) (1, 20)
234    STRING     '", does it"' (1, 20) (1, 31)
235    """)
236        self.check_tokenize("x = u'abc' + U'ABC'", """\
237    NAME       'x'           (1, 0) (1, 1)
238    OP         '='           (1, 2) (1, 3)
239    STRING     "u'abc'"      (1, 4) (1, 10)
240    OP         '+'           (1, 11) (1, 12)
241    STRING     "U'ABC'"      (1, 13) (1, 19)
242    """)
243        self.check_tokenize('y = u"ABC" + U"ABC"', """\
244    NAME       'y'           (1, 0) (1, 1)
245    OP         '='           (1, 2) (1, 3)
246    STRING     'u"ABC"'      (1, 4) (1, 10)
247    OP         '+'           (1, 11) (1, 12)
248    STRING     'U"ABC"'      (1, 13) (1, 19)
249    """)
250        self.check_tokenize("x = ur'abc' + Ur'ABC' + uR'ABC' + UR'ABC'", """\
251    NAME       'x'           (1, 0) (1, 1)
252    OP         '='           (1, 2) (1, 3)
253    STRING     "ur'abc'"     (1, 4) (1, 11)
254    OP         '+'           (1, 12) (1, 13)
255    STRING     "Ur'ABC'"     (1, 14) (1, 21)
256    OP         '+'           (1, 22) (1, 23)
257    STRING     "uR'ABC'"     (1, 24) (1, 31)
258    OP         '+'           (1, 32) (1, 33)
259    STRING     "UR'ABC'"     (1, 34) (1, 41)
260    """)
261        self.check_tokenize('y = ur"abc" + Ur"ABC" + uR"ABC" + UR"ABC"', """\
262    NAME       'y'           (1, 0) (1, 1)
263    OP         '='           (1, 2) (1, 3)
264    STRING     'ur"abc"'     (1, 4) (1, 11)
265    OP         '+'           (1, 12) (1, 13)
266    STRING     'Ur"ABC"'     (1, 14) (1, 21)
267    OP         '+'           (1, 22) (1, 23)
268    STRING     'uR"ABC"'     (1, 24) (1, 31)
269    OP         '+'           (1, 32) (1, 33)
270    STRING     'UR"ABC"'     (1, 34) (1, 41)
271
272    """)
273        self.check_tokenize("b'abc' + B'abc'", """\
274    STRING     "b'abc'"      (1, 0) (1, 6)
275    OP         '+'           (1, 7) (1, 8)
276    STRING     "B'abc'"      (1, 9) (1, 15)
277    """)
278        self.check_tokenize('b"abc" + B"abc"', """\
279    STRING     'b"abc"'      (1, 0) (1, 6)
280    OP         '+'           (1, 7) (1, 8)
281    STRING     'B"abc"'      (1, 9) (1, 15)
282    """)
283        self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
284    STRING     "br'abc'"     (1, 0) (1, 7)
285    OP         '+'           (1, 8) (1, 9)
286    STRING     "bR'abc'"     (1, 10) (1, 17)
287    OP         '+'           (1, 18) (1, 19)
288    STRING     "Br'abc'"     (1, 20) (1, 27)
289    OP         '+'           (1, 28) (1, 29)
290    STRING     "BR'abc'"     (1, 30) (1, 37)
291    """)
292        self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
293    STRING     'br"abc"'     (1, 0) (1, 7)
294    OP         '+'           (1, 8) (1, 9)
295    STRING     'bR"abc"'     (1, 10) (1, 17)
296    OP         '+'           (1, 18) (1, 19)
297    STRING     'Br"abc"'     (1, 20) (1, 27)
298    OP         '+'           (1, 28) (1, 29)
299    STRING     'BR"abc"'     (1, 30) (1, 37)
300    """)
301
302    def test_function(self):
303        self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\
304    NAME       'def'         (1, 0) (1, 3)
305    NAME       'd22'         (1, 4) (1, 7)
306    OP         '('           (1, 7) (1, 8)
307    NAME       'a'           (1, 8) (1, 9)
308    OP         ','           (1, 9) (1, 10)
309    NAME       'b'           (1, 11) (1, 12)
310    OP         ','           (1, 12) (1, 13)
311    NAME       'c'           (1, 14) (1, 15)
312    OP         '='           (1, 15) (1, 16)
313    NUMBER     '2'           (1, 16) (1, 17)
314    OP         ','           (1, 17) (1, 18)
315    NAME       'd'           (1, 19) (1, 20)
316    OP         '='           (1, 20) (1, 21)
317    NUMBER     '2'           (1, 21) (1, 22)
318    OP         ','           (1, 22) (1, 23)
319    OP         '*'           (1, 24) (1, 25)
320    NAME       'k'           (1, 25) (1, 26)
321    OP         ')'           (1, 26) (1, 27)
322    OP         ':'           (1, 27) (1, 28)
323    NAME       'pass'        (1, 29) (1, 33)
324    """)
325        self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\
326    NAME       'def'         (1, 0) (1, 3)
327    NAME       'd01v_'       (1, 4) (1, 9)
328    OP         '('           (1, 9) (1, 10)
329    NAME       'a'           (1, 10) (1, 11)
330    OP         '='           (1, 11) (1, 12)
331    NUMBER     '1'           (1, 12) (1, 13)
332    OP         ','           (1, 13) (1, 14)
333    OP         '*'           (1, 15) (1, 16)
334    NAME       'k'           (1, 16) (1, 17)
335    OP         ','           (1, 17) (1, 18)
336    OP         '**'          (1, 19) (1, 21)
337    NAME       'w'           (1, 21) (1, 22)
338    OP         ')'           (1, 22) (1, 23)
339    OP         ':'           (1, 23) (1, 24)
340    NAME       'pass'        (1, 25) (1, 29)
341    """)
342
343    def test_comparison(self):
344        # Comparison
345        self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " +
346                            "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
347    NAME       'if'          (1, 0) (1, 2)
348    NUMBER     '1'           (1, 3) (1, 4)
349    OP         '<'           (1, 5) (1, 6)
350    NUMBER     '1'           (1, 7) (1, 8)
351    OP         '>'           (1, 9) (1, 10)
352    NUMBER     '1'           (1, 11) (1, 12)
353    OP         '=='          (1, 13) (1, 15)
354    NUMBER     '1'           (1, 16) (1, 17)
355    OP         '>='          (1, 18) (1, 20)
356    NUMBER     '5'           (1, 21) (1, 22)
357    OP         '<='          (1, 23) (1, 25)
358    NUMBER     '0x15'        (1, 26) (1, 30)
359    OP         '<='          (1, 31) (1, 33)
360    NUMBER     '0x12'        (1, 34) (1, 38)
361    OP         '!='          (1, 39) (1, 41)
362    NUMBER     '1'           (1, 42) (1, 43)
363    NAME       'and'         (1, 44) (1, 47)
364    NUMBER     '5'           (1, 48) (1, 49)
365    NAME       'in'          (1, 50) (1, 52)
366    NUMBER     '1'           (1, 53) (1, 54)
367    NAME       'not'         (1, 55) (1, 58)
368    NAME       'in'          (1, 59) (1, 61)
369    NUMBER     '1'           (1, 62) (1, 63)
370    NAME       'is'          (1, 64) (1, 66)
371    NUMBER     '1'           (1, 67) (1, 68)
372    NAME       'or'          (1, 69) (1, 71)
373    NUMBER     '5'           (1, 72) (1, 73)
374    NAME       'is'          (1, 74) (1, 76)
375    NAME       'not'         (1, 77) (1, 80)
376    NUMBER     '1'           (1, 81) (1, 82)
377    OP         ':'           (1, 82) (1, 83)
378    NAME       'pass'        (1, 84) (1, 88)
379    """)
380
381    def test_shift(self):
382        # Shift
383        self.check_tokenize("x = 1 << 1 >> 5", """\
384    NAME       'x'           (1, 0) (1, 1)
385    OP         '='           (1, 2) (1, 3)
386    NUMBER     '1'           (1, 4) (1, 5)
387    OP         '<<'          (1, 6) (1, 8)
388    NUMBER     '1'           (1, 9) (1, 10)
389    OP         '>>'          (1, 11) (1, 13)
390    NUMBER     '5'           (1, 14) (1, 15)
391    """)
392
393    def test_additive(self):
394        # Additive
395        self.check_tokenize("x = 1 - y + 15 - 01 + 0x124 + z + a[5]", """\
396    NAME       'x'           (1, 0) (1, 1)
397    OP         '='           (1, 2) (1, 3)
398    NUMBER     '1'           (1, 4) (1, 5)
399    OP         '-'           (1, 6) (1, 7)
400    NAME       'y'           (1, 8) (1, 9)
401    OP         '+'           (1, 10) (1, 11)
402    NUMBER     '15'          (1, 12) (1, 14)
403    OP         '-'           (1, 15) (1, 16)
404    NUMBER     '01'          (1, 17) (1, 19)
405    OP         '+'           (1, 20) (1, 21)
406    NUMBER     '0x124'       (1, 22) (1, 27)
407    OP         '+'           (1, 28) (1, 29)
408    NAME       'z'           (1, 30) (1, 31)
409    OP         '+'           (1, 32) (1, 33)
410    NAME       'a'           (1, 34) (1, 35)
411    OP         '['           (1, 35) (1, 36)
412    NUMBER     '5'           (1, 36) (1, 37)
413    OP         ']'           (1, 37) (1, 38)
414    """)
415
416    def test_multiplicative(self):
417        # Multiplicative
418        self.check_tokenize("x = 1//1*1/5*12%0x12", """\
419    NAME       'x'           (1, 0) (1, 1)
420    OP         '='           (1, 2) (1, 3)
421    NUMBER     '1'           (1, 4) (1, 5)
422    OP         '//'          (1, 5) (1, 7)
423    NUMBER     '1'           (1, 7) (1, 8)
424    OP         '*'           (1, 8) (1, 9)
425    NUMBER     '1'           (1, 9) (1, 10)
426    OP         '/'           (1, 10) (1, 11)
427    NUMBER     '5'           (1, 11) (1, 12)
428    OP         '*'           (1, 12) (1, 13)
429    NUMBER     '12'          (1, 13) (1, 15)
430    OP         '%'           (1, 15) (1, 16)
431    NUMBER     '0x12'        (1, 16) (1, 20)
432    """)
433
434    def test_unary(self):
435        # Unary
436        self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\
437    OP         '~'           (1, 0) (1, 1)
438    NUMBER     '1'           (1, 1) (1, 2)
439    OP         '^'           (1, 3) (1, 4)
440    NUMBER     '1'           (1, 5) (1, 6)
441    OP         '&'           (1, 7) (1, 8)
442    NUMBER     '1'           (1, 9) (1, 10)
443    OP         '|'           (1, 11) (1, 12)
444    NUMBER     '1'           (1, 12) (1, 13)
445    OP         '^'           (1, 14) (1, 15)
446    OP         '-'           (1, 16) (1, 17)
447    NUMBER     '1'           (1, 17) (1, 18)
448    """)
449        self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\
450    OP         '-'           (1, 0) (1, 1)
451    NUMBER     '1'           (1, 1) (1, 2)
452    OP         '*'           (1, 2) (1, 3)
453    NUMBER     '1'           (1, 3) (1, 4)
454    OP         '/'           (1, 4) (1, 5)
455    NUMBER     '1'           (1, 5) (1, 6)
456    OP         '+'           (1, 6) (1, 7)
457    NUMBER     '1'           (1, 7) (1, 8)
458    OP         '*'           (1, 8) (1, 9)
459    NUMBER     '1'           (1, 9) (1, 10)
460    OP         '//'          (1, 10) (1, 12)
461    NUMBER     '1'           (1, 12) (1, 13)
462    OP         '-'           (1, 14) (1, 15)
463    OP         '-'           (1, 16) (1, 17)
464    OP         '-'           (1, 17) (1, 18)
465    OP         '-'           (1, 18) (1, 19)
466    NUMBER     '1'           (1, 19) (1, 20)
467    OP         '**'          (1, 20) (1, 22)
468    NUMBER     '1'           (1, 22) (1, 23)
469    """)
470
471    def test_selector(self):
472        # Selector
473        self.check_tokenize("import sys, time\n"
474                            "x = sys.modules['time'].time()", """\
475    NAME       'import'      (1, 0) (1, 6)
476    NAME       'sys'         (1, 7) (1, 10)
477    OP         ','           (1, 10) (1, 11)
478    NAME       'time'        (1, 12) (1, 16)
479    NEWLINE    '\\n'          (1, 16) (1, 17)
480    NAME       'x'           (2, 0) (2, 1)
481    OP         '='           (2, 2) (2, 3)
482    NAME       'sys'         (2, 4) (2, 7)
483    OP         '.'           (2, 7) (2, 8)
484    NAME       'modules'     (2, 8) (2, 15)
485    OP         '['           (2, 15) (2, 16)
486    STRING     "'time'"      (2, 16) (2, 22)
487    OP         ']'           (2, 22) (2, 23)
488    OP         '.'           (2, 23) (2, 24)
489    NAME       'time'        (2, 24) (2, 28)
490    OP         '('           (2, 28) (2, 29)
491    OP         ')'           (2, 29) (2, 30)
492    """)
493
494    def test_method(self):
495        # Methods
496        self.check_tokenize("@staticmethod\n"
497                            "def foo(x,y): pass", """\
498    OP         '@'           (1, 0) (1, 1)
499    NAME       'staticmethod (1, 1) (1, 13)
500    NEWLINE    '\\n'          (1, 13) (1, 14)
501    NAME       'def'         (2, 0) (2, 3)
502    NAME       'foo'         (2, 4) (2, 7)
503    OP         '('           (2, 7) (2, 8)
504    NAME       'x'           (2, 8) (2, 9)
505    OP         ','           (2, 9) (2, 10)
506    NAME       'y'           (2, 10) (2, 11)
507    OP         ')'           (2, 11) (2, 12)
508    OP         ':'           (2, 12) (2, 13)
509    NAME       'pass'        (2, 14) (2, 18)
510    """)
511
512    def test_tabs(self):
513        # Evil tabs
514        self.check_tokenize("def f():\n"
515                            "\tif x\n"
516                            "        \tpass", """\
517    NAME       'def'         (1, 0) (1, 3)
518    NAME       'f'           (1, 4) (1, 5)
519    OP         '('           (1, 5) (1, 6)
520    OP         ')'           (1, 6) (1, 7)
521    OP         ':'           (1, 7) (1, 8)
522    NEWLINE    '\\n'          (1, 8) (1, 9)
523    INDENT     '\\t'          (2, 0) (2, 1)
524    NAME       'if'          (2, 1) (2, 3)
525    NAME       'x'           (2, 4) (2, 5)
526    NEWLINE    '\\n'          (2, 5) (2, 6)
527    INDENT     '        \\t'  (3, 0) (3, 9)
528    NAME       'pass'        (3, 9) (3, 13)
529    DEDENT     ''            (4, 0) (4, 0)
530    DEDENT     ''            (4, 0) (4, 0)
531    """)
532
533    def test_pathological_trailing_whitespace(self):
534        # Pathological whitespace (http://bugs.python.org/issue16152)
535        self.check_tokenize("@          ", """\
536    OP         '@'           (1, 0) (1, 1)
537    """)
538
539
540def decistmt(s):
541    result = []
542    g = generate_tokens(StringIO(s).readline)   # tokenize the string
543    for toknum, tokval, _, _, _  in g:
544        if toknum == NUMBER and '.' in tokval:  # replace NUMBER tokens
545            result.extend([
546                (NAME, 'Decimal'),
547                (OP, '('),
548                (STRING, repr(tokval)),
549                (OP, ')')
550            ])
551        else:
552            result.append((toknum, tokval))
553    return untokenize(result)
554
555class TestMisc(TestCase):
556
557    def test_decistmt(self):
558        # Substitute Decimals for floats in a string of statements.
559        # This is an example from the docs.
560
561        from decimal import Decimal
562        s = '+21.3e-5*-.1234/81.7'
563        self.assertEqual(decistmt(s),
564                         "+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")
565
566        # The format of the exponent is inherited from the platform C library.
567        # Known cases are "e-007" (Windows) and "e-07" (not Windows).  Since
568        # we're only showing 12 digits, and the 13th isn't close to 5, the
569        # rest of the output should be platform-independent.
570        self.assertRegexpMatches(str(eval(s)), '-3.21716034272e-0+7')
571
572        # Output from calculations with Decimal should be identical across all
573        # platforms.
574        self.assertEqual(eval(decistmt(s)), Decimal('-3.217160342717258261933904529E-7'))
575
576
577class UntokenizeTest(TestCase):
578
579    def test_bad_input_order(self):
580        # raise if previous row
581        u = Untokenizer()
582        u.prev_row = 2
583        u.prev_col = 2
584        with self.assertRaises(ValueError) as cm:
585            u.add_whitespace((1,3))
586        self.assertEqual(cm.exception.args[0],
587                'start (1,3) precedes previous end (2,2)')
588        # raise if previous column in row
589        self.assertRaises(ValueError, u.add_whitespace, (2,1))
590
591    def test_backslash_continuation(self):
592        # The problem is that <whitespace>\<newline> leaves no token
593        u = Untokenizer()
594        u.prev_row = 1
595        u.prev_col =  1
596        u.tokens = []
597        u.add_whitespace((2, 0))
598        self.assertEqual(u.tokens, ['\\\n'])
599        u.prev_row = 2
600        u.add_whitespace((4, 4))
601        self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', '    '])
602
603    def test_iter_compat(self):
604        u = Untokenizer()
605        token = (NAME, 'Hello')
606        u.compat(token, iter([]))
607        self.assertEqual(u.tokens, ["Hello "])
608        u = Untokenizer()
609        self.assertEqual(u.untokenize(iter([token])), 'Hello ')
610
611
612class TestRoundtrip(TestCase):
613
614    def check_roundtrip(self, f):
615        """
616        Test roundtrip for `untokenize`. `f` is an open file or a string.
617        The source code in f is tokenized, converted back to source code
618        via tokenize.untokenize(), and tokenized again from the latter.
619        The test fails if the second tokenization doesn't match the first.
620        """
621        if isinstance(f, str): f = StringIO(f)
622        token_list = list(generate_tokens(f.readline))
623        f.close()
624        tokens1 = [tok[:2] for tok in token_list]
625        new_text = untokenize(tokens1)
626        readline = iter(new_text.splitlines(1)).next
627        tokens2 = [tok[:2] for tok in generate_tokens(readline)]
628        self.assertEqual(tokens2, tokens1)
629
630    def test_roundtrip(self):
631        # There are some standard formatting practices that are easy to get right.
632
633        self.check_roundtrip("if x == 1:\n"
634                             "    print(x)\n")
635
636        # There are some standard formatting practices that are easy to get right.
637
638        self.check_roundtrip("if x == 1:\n"
639                             "    print x\n")
640        self.check_roundtrip("# This is a comment\n"
641                             "# This also\n")
642
643        # Some people use different formatting conventions, which makes
644        # untokenize a little trickier. Note that this test involves trailing
645        # whitespace after the colon. Note that we use hex escapes to make the
646        # two trailing blanks apperant in the expected output.
647
648        self.check_roundtrip("if x == 1 : \n"
649                             "  print x\n")
650        fn = test_support.findfile("tokenize_tests" + os.extsep + "txt")
651        with open(fn) as f:
652            self.check_roundtrip(f)
653        self.check_roundtrip("if x == 1:\n"
654                             "    # A comment by itself.\n"
655                             "    print x # Comment here, too.\n"
656                             "    # Another comment.\n"
657                             "after_if = True\n")
658        self.check_roundtrip("if (x # The comments need to go in the right place\n"
659                             "    == 1):\n"
660                             "    print 'x==1'\n")
661        self.check_roundtrip("class Test: # A comment here\n"
662                             "  # A comment with weird indent\n"
663                             "  after_com = 5\n"
664                             "  def x(m): return m*5 # a one liner\n"
665                             "  def y(m): # A whitespace after the colon\n"
666                             "     return y*4 # 3-space indent\n")
667
668        # Some error-handling code
669
670        self.check_roundtrip("try: import somemodule\n"
671                             "except ImportError: # comment\n"
672                             "    print 'Can not import' # comment2\n"
673                             "else:   print 'Loaded'\n")
674
675    def test_continuation(self):
676        # Balancing continuation
677        self.check_roundtrip("a = (3,4, \n"
678                             "5,6)\n"
679                             "y = [3, 4,\n"
680                             "5]\n"
681                             "z = {'a': 5,\n"
682                             "'b':15, 'c':True}\n"
683                             "x = len(y) + 5 - a[\n"
684                             "3] - a[2]\n"
685                             "+ len(z) - z[\n"
686                             "'b']\n")
687
688    def test_backslash_continuation(self):
689        # Backslash means line continuation, except for comments
690        self.check_roundtrip("x=1+\\\n"
691                             "1\n"
692                             "# This is a comment\\\n"
693                             "# This also\n")
694        self.check_roundtrip("# Comment \\\n"
695                             "x = 0")
696
697    def test_string_concatenation(self):
698        # Two string literals on the same line
699        self.check_roundtrip("'' ''")
700
701    def test_random_files(self):
702        # Test roundtrip on random python modules.
703        # pass the '-ucpu' option to process the full directory.
704
705        import glob, random
706        fn = test_support.findfile("tokenize_tests" + os.extsep + "txt")
707        tempdir = os.path.dirname(fn) or os.curdir
708        testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
709
710        if not test_support.is_resource_enabled("cpu"):
711            testfiles = random.sample(testfiles, 10)
712
713        for testfile in testfiles:
714            try:
715                with open(testfile, 'rb') as f:
716                    self.check_roundtrip(f)
717            except:
718                print "Roundtrip failed for file %s" % testfile
719                raise
720
721
722    def roundtrip(self, code):
723        if isinstance(code, str):
724            code = code.encode('utf-8')
725        tokens = generate_tokens(StringIO(code).readline)
726        return untokenize(tokens).decode('utf-8')
727
728    def test_indentation_semantics_retained(self):
729        """
730        Ensure that although whitespace might be mutated in a roundtrip,
731        the semantic meaning of the indentation remains consistent.
732        """
733        code = "if False:\n\tx=3\n\tx=3\n"
734        codelines = self.roundtrip(code).split('\n')
735        self.assertEqual(codelines[1], codelines[2])
736
737
738def test_main():
739    test_support.run_unittest(TokenizeTest)
740    test_support.run_unittest(UntokenizeTest)
741    test_support.run_unittest(TestRoundtrip)
742    test_support.run_unittest(TestMisc)
743
744if __name__ == "__main__":
745    test_main()
746