1from test import test_support 2from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP, NEWLINE, 3 STRING, ENDMARKER, tok_name, Untokenizer, tokenize) 4from StringIO import StringIO 5import os 6from unittest import TestCase 7 8 9# Converts a source string into a list of textual representation 10# of the tokens such as: 11# ` NAME 'if' (1, 0) (1, 2)` 12# to make writing tests easier. 13def stringify_tokens_from_source(token_generator, source_string): 14 result = [] 15 num_lines = len(source_string.splitlines()) 16 missing_trailing_nl = source_string[-1] not in '\r\n' 17 18 for type, token, start, end, line in token_generator: 19 if type == ENDMARKER: 20 break 21 # Ignore the new line on the last line if the input lacks one 22 if missing_trailing_nl and type == NEWLINE and end[0] == num_lines: 23 continue 24 type = tok_name[type] 25 result.append(" %(type)-10.10s %(token)-13.13r %(start)s %(end)s" % 26 locals()) 27 28 return result 29 30class TokenizeTest(TestCase): 31 # Tests for the tokenize module. 32 33 # The tests can be really simple. Given a small fragment of source 34 # code, print out a table with tokens. The ENDMARKER, ENCODING and 35 # final NEWLINE are omitted for brevity. 36 37 def check_tokenize(self, s, expected): 38 # Format the tokens in s in a table format. 39 f = StringIO(s) 40 result = stringify_tokens_from_source(generate_tokens(f.readline), s) 41 42 self.assertEqual(result, 43 expected.rstrip().splitlines()) 44 45 def test_implicit_newline(self): 46 # Make sure that the tokenizer puts in an implicit NEWLINE 47 # when the input lacks a trailing new line. 48 f = StringIO("x") 49 tokens = list(generate_tokens(f.readline)) 50 self.assertEqual(tokens[-2][0], NEWLINE) 51 self.assertEqual(tokens[-1][0], ENDMARKER) 52 53 def test_basic(self): 54 self.check_tokenize("1 + 1", """\ 55 NUMBER '1' (1, 0) (1, 1) 56 OP '+' (1, 2) (1, 3) 57 NUMBER '1' (1, 4) (1, 5) 58 """) 59 self.check_tokenize("if False:\n" 60 " # NL\n" 61 " True = False # NEWLINE\n", """\ 62 NAME 'if' (1, 0) (1, 2) 63 NAME 'False' (1, 3) (1, 8) 64 OP ':' (1, 8) (1, 9) 65 NEWLINE '\\n' (1, 9) (1, 10) 66 COMMENT '# NL' (2, 4) (2, 8) 67 NL '\\n' (2, 8) (2, 9) 68 INDENT ' ' (3, 0) (3, 4) 69 NAME 'True' (3, 4) (3, 8) 70 OP '=' (3, 9) (3, 10) 71 NAME 'False' (3, 11) (3, 16) 72 COMMENT '# NEWLINE' (3, 17) (3, 26) 73 NEWLINE '\\n' (3, 26) (3, 27) 74 DEDENT '' (4, 0) (4, 0) 75 """) 76 77 indent_error_file = """\ 78def k(x): 79 x += 2 80 x += 5 81""" 82 with self.assertRaisesRegexp(IndentationError, 83 "unindent does not match any " 84 "outer indentation level"): 85 for tok in generate_tokens(StringIO(indent_error_file).readline): 86 pass 87 88 def test_int(self): 89 # Ordinary integers and binary operators 90 self.check_tokenize("0xff <= 255", """\ 91 NUMBER '0xff' (1, 0) (1, 4) 92 OP '<=' (1, 5) (1, 7) 93 NUMBER '255' (1, 8) (1, 11) 94 """) 95 self.check_tokenize("0b10 <= 255", """\ 96 NUMBER '0b10' (1, 0) (1, 4) 97 OP '<=' (1, 5) (1, 7) 98 NUMBER '255' (1, 8) (1, 11) 99 """) 100 self.check_tokenize("0o123 <= 0123", """\ 101 NUMBER '0o123' (1, 0) (1, 5) 102 OP '<=' (1, 6) (1, 8) 103 NUMBER '0123' (1, 9) (1, 13) 104 """) 105 self.check_tokenize("01234567 > ~0x15", """\ 106 NUMBER '01234567' (1, 0) (1, 8) 107 OP '>' (1, 9) (1, 10) 108 OP '~' (1, 11) (1, 12) 109 NUMBER '0x15' (1, 12) (1, 16) 110 """) 111 self.check_tokenize("2134568 != 01231515", """\ 112 NUMBER '2134568' (1, 0) (1, 7) 113 OP '!=' (1, 8) (1, 10) 114 NUMBER '01231515' (1, 11) (1, 19) 115 """) 116 self.check_tokenize("(-124561-1) & 0200000000", """\ 117 OP '(' (1, 0) (1, 1) 118 OP '-' (1, 1) (1, 2) 119 NUMBER '124561' (1, 2) (1, 8) 120 OP '-' (1, 8) (1, 9) 121 NUMBER '1' (1, 9) (1, 10) 122 OP ')' (1, 10) (1, 11) 123 OP '&' (1, 12) (1, 13) 124 NUMBER '0200000000' (1, 14) (1, 24) 125 """) 126 self.check_tokenize("0xdeadbeef != -1", """\ 127 NUMBER '0xdeadbeef' (1, 0) (1, 10) 128 OP '!=' (1, 11) (1, 13) 129 OP '-' (1, 14) (1, 15) 130 NUMBER '1' (1, 15) (1, 16) 131 """) 132 self.check_tokenize("0xdeadc0de & 012345", """\ 133 NUMBER '0xdeadc0de' (1, 0) (1, 10) 134 OP '&' (1, 11) (1, 12) 135 NUMBER '012345' (1, 13) (1, 19) 136 """) 137 self.check_tokenize("0xFF & 0x15 | 1234", """\ 138 NUMBER '0xFF' (1, 0) (1, 4) 139 OP '&' (1, 5) (1, 6) 140 NUMBER '0x15' (1, 7) (1, 11) 141 OP '|' (1, 12) (1, 13) 142 NUMBER '1234' (1, 14) (1, 18) 143 """) 144 145 def test_long(self): 146 # Long integers 147 self.check_tokenize("x = 0L", """\ 148 NAME 'x' (1, 0) (1, 1) 149 OP '=' (1, 2) (1, 3) 150 NUMBER '0L' (1, 4) (1, 6) 151 """) 152 self.check_tokenize("x = 0xfffffffffff", """\ 153 NAME 'x' (1, 0) (1, 1) 154 OP '=' (1, 2) (1, 3) 155 NUMBER '0xffffffffff (1, 4) (1, 17) 156 """) 157 self.check_tokenize("x = 123141242151251616110l", """\ 158 NAME 'x' (1, 0) (1, 1) 159 OP '=' (1, 2) (1, 3) 160 NUMBER '123141242151 (1, 4) (1, 26) 161 """) 162 self.check_tokenize("x = -15921590215012591L", """\ 163 NAME 'x' (1, 0) (1, 1) 164 OP '=' (1, 2) (1, 3) 165 OP '-' (1, 4) (1, 5) 166 NUMBER '159215902150 (1, 5) (1, 23) 167 """) 168 169 def test_float(self): 170 # Floating point numbers 171 self.check_tokenize("x = 3.14159", """\ 172 NAME 'x' (1, 0) (1, 1) 173 OP '=' (1, 2) (1, 3) 174 NUMBER '3.14159' (1, 4) (1, 11) 175 """) 176 self.check_tokenize("x = 314159.", """\ 177 NAME 'x' (1, 0) (1, 1) 178 OP '=' (1, 2) (1, 3) 179 NUMBER '314159.' (1, 4) (1, 11) 180 """) 181 self.check_tokenize("x = .314159", """\ 182 NAME 'x' (1, 0) (1, 1) 183 OP '=' (1, 2) (1, 3) 184 NUMBER '.314159' (1, 4) (1, 11) 185 """) 186 self.check_tokenize("x = 3e14159", """\ 187 NAME 'x' (1, 0) (1, 1) 188 OP '=' (1, 2) (1, 3) 189 NUMBER '3e14159' (1, 4) (1, 11) 190 """) 191 self.check_tokenize("x = 3E123", """\ 192 NAME 'x' (1, 0) (1, 1) 193 OP '=' (1, 2) (1, 3) 194 NUMBER '3E123' (1, 4) (1, 9) 195 """) 196 self.check_tokenize("x+y = 3e-1230", """\ 197 NAME 'x' (1, 0) (1, 1) 198 OP '+' (1, 1) (1, 2) 199 NAME 'y' (1, 2) (1, 3) 200 OP '=' (1, 4) (1, 5) 201 NUMBER '3e-1230' (1, 6) (1, 13) 202 """) 203 self.check_tokenize("x = 3.14e159", """\ 204 NAME 'x' (1, 0) (1, 1) 205 OP '=' (1, 2) (1, 3) 206 NUMBER '3.14e159' (1, 4) (1, 12) 207 """) 208 209 def test_string(self): 210 # String literals 211 self.check_tokenize("x = ''; y = \"\"", """\ 212 NAME 'x' (1, 0) (1, 1) 213 OP '=' (1, 2) (1, 3) 214 STRING "''" (1, 4) (1, 6) 215 OP ';' (1, 6) (1, 7) 216 NAME 'y' (1, 8) (1, 9) 217 OP '=' (1, 10) (1, 11) 218 STRING '""' (1, 12) (1, 14) 219 """) 220 self.check_tokenize("x = '\"'; y = \"'\"", """\ 221 NAME 'x' (1, 0) (1, 1) 222 OP '=' (1, 2) (1, 3) 223 STRING '\\'"\\'' (1, 4) (1, 7) 224 OP ';' (1, 7) (1, 8) 225 NAME 'y' (1, 9) (1, 10) 226 OP '=' (1, 11) (1, 12) 227 STRING '"\\'"' (1, 13) (1, 16) 228 """) 229 self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\ 230 NAME 'x' (1, 0) (1, 1) 231 OP '=' (1, 2) (1, 3) 232 STRING '"doesn\\'t "' (1, 4) (1, 14) 233 NAME 'shrink' (1, 14) (1, 20) 234 STRING '", does it"' (1, 20) (1, 31) 235 """) 236 self.check_tokenize("x = u'abc' + U'ABC'", """\ 237 NAME 'x' (1, 0) (1, 1) 238 OP '=' (1, 2) (1, 3) 239 STRING "u'abc'" (1, 4) (1, 10) 240 OP '+' (1, 11) (1, 12) 241 STRING "U'ABC'" (1, 13) (1, 19) 242 """) 243 self.check_tokenize('y = u"ABC" + U"ABC"', """\ 244 NAME 'y' (1, 0) (1, 1) 245 OP '=' (1, 2) (1, 3) 246 STRING 'u"ABC"' (1, 4) (1, 10) 247 OP '+' (1, 11) (1, 12) 248 STRING 'U"ABC"' (1, 13) (1, 19) 249 """) 250 self.check_tokenize("x = ur'abc' + Ur'ABC' + uR'ABC' + UR'ABC'", """\ 251 NAME 'x' (1, 0) (1, 1) 252 OP '=' (1, 2) (1, 3) 253 STRING "ur'abc'" (1, 4) (1, 11) 254 OP '+' (1, 12) (1, 13) 255 STRING "Ur'ABC'" (1, 14) (1, 21) 256 OP '+' (1, 22) (1, 23) 257 STRING "uR'ABC'" (1, 24) (1, 31) 258 OP '+' (1, 32) (1, 33) 259 STRING "UR'ABC'" (1, 34) (1, 41) 260 """) 261 self.check_tokenize('y = ur"abc" + Ur"ABC" + uR"ABC" + UR"ABC"', """\ 262 NAME 'y' (1, 0) (1, 1) 263 OP '=' (1, 2) (1, 3) 264 STRING 'ur"abc"' (1, 4) (1, 11) 265 OP '+' (1, 12) (1, 13) 266 STRING 'Ur"ABC"' (1, 14) (1, 21) 267 OP '+' (1, 22) (1, 23) 268 STRING 'uR"ABC"' (1, 24) (1, 31) 269 OP '+' (1, 32) (1, 33) 270 STRING 'UR"ABC"' (1, 34) (1, 41) 271 272 """) 273 self.check_tokenize("b'abc' + B'abc'", """\ 274 STRING "b'abc'" (1, 0) (1, 6) 275 OP '+' (1, 7) (1, 8) 276 STRING "B'abc'" (1, 9) (1, 15) 277 """) 278 self.check_tokenize('b"abc" + B"abc"', """\ 279 STRING 'b"abc"' (1, 0) (1, 6) 280 OP '+' (1, 7) (1, 8) 281 STRING 'B"abc"' (1, 9) (1, 15) 282 """) 283 self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\ 284 STRING "br'abc'" (1, 0) (1, 7) 285 OP '+' (1, 8) (1, 9) 286 STRING "bR'abc'" (1, 10) (1, 17) 287 OP '+' (1, 18) (1, 19) 288 STRING "Br'abc'" (1, 20) (1, 27) 289 OP '+' (1, 28) (1, 29) 290 STRING "BR'abc'" (1, 30) (1, 37) 291 """) 292 self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\ 293 STRING 'br"abc"' (1, 0) (1, 7) 294 OP '+' (1, 8) (1, 9) 295 STRING 'bR"abc"' (1, 10) (1, 17) 296 OP '+' (1, 18) (1, 19) 297 STRING 'Br"abc"' (1, 20) (1, 27) 298 OP '+' (1, 28) (1, 29) 299 STRING 'BR"abc"' (1, 30) (1, 37) 300 """) 301 302 def test_function(self): 303 self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\ 304 NAME 'def' (1, 0) (1, 3) 305 NAME 'd22' (1, 4) (1, 7) 306 OP '(' (1, 7) (1, 8) 307 NAME 'a' (1, 8) (1, 9) 308 OP ',' (1, 9) (1, 10) 309 NAME 'b' (1, 11) (1, 12) 310 OP ',' (1, 12) (1, 13) 311 NAME 'c' (1, 14) (1, 15) 312 OP '=' (1, 15) (1, 16) 313 NUMBER '2' (1, 16) (1, 17) 314 OP ',' (1, 17) (1, 18) 315 NAME 'd' (1, 19) (1, 20) 316 OP '=' (1, 20) (1, 21) 317 NUMBER '2' (1, 21) (1, 22) 318 OP ',' (1, 22) (1, 23) 319 OP '*' (1, 24) (1, 25) 320 NAME 'k' (1, 25) (1, 26) 321 OP ')' (1, 26) (1, 27) 322 OP ':' (1, 27) (1, 28) 323 NAME 'pass' (1, 29) (1, 33) 324 """) 325 self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\ 326 NAME 'def' (1, 0) (1, 3) 327 NAME 'd01v_' (1, 4) (1, 9) 328 OP '(' (1, 9) (1, 10) 329 NAME 'a' (1, 10) (1, 11) 330 OP '=' (1, 11) (1, 12) 331 NUMBER '1' (1, 12) (1, 13) 332 OP ',' (1, 13) (1, 14) 333 OP '*' (1, 15) (1, 16) 334 NAME 'k' (1, 16) (1, 17) 335 OP ',' (1, 17) (1, 18) 336 OP '**' (1, 19) (1, 21) 337 NAME 'w' (1, 21) (1, 22) 338 OP ')' (1, 22) (1, 23) 339 OP ':' (1, 23) (1, 24) 340 NAME 'pass' (1, 25) (1, 29) 341 """) 342 343 def test_comparison(self): 344 # Comparison 345 self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " + 346 "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\ 347 NAME 'if' (1, 0) (1, 2) 348 NUMBER '1' (1, 3) (1, 4) 349 OP '<' (1, 5) (1, 6) 350 NUMBER '1' (1, 7) (1, 8) 351 OP '>' (1, 9) (1, 10) 352 NUMBER '1' (1, 11) (1, 12) 353 OP '==' (1, 13) (1, 15) 354 NUMBER '1' (1, 16) (1, 17) 355 OP '>=' (1, 18) (1, 20) 356 NUMBER '5' (1, 21) (1, 22) 357 OP '<=' (1, 23) (1, 25) 358 NUMBER '0x15' (1, 26) (1, 30) 359 OP '<=' (1, 31) (1, 33) 360 NUMBER '0x12' (1, 34) (1, 38) 361 OP '!=' (1, 39) (1, 41) 362 NUMBER '1' (1, 42) (1, 43) 363 NAME 'and' (1, 44) (1, 47) 364 NUMBER '5' (1, 48) (1, 49) 365 NAME 'in' (1, 50) (1, 52) 366 NUMBER '1' (1, 53) (1, 54) 367 NAME 'not' (1, 55) (1, 58) 368 NAME 'in' (1, 59) (1, 61) 369 NUMBER '1' (1, 62) (1, 63) 370 NAME 'is' (1, 64) (1, 66) 371 NUMBER '1' (1, 67) (1, 68) 372 NAME 'or' (1, 69) (1, 71) 373 NUMBER '5' (1, 72) (1, 73) 374 NAME 'is' (1, 74) (1, 76) 375 NAME 'not' (1, 77) (1, 80) 376 NUMBER '1' (1, 81) (1, 82) 377 OP ':' (1, 82) (1, 83) 378 NAME 'pass' (1, 84) (1, 88) 379 """) 380 381 def test_shift(self): 382 # Shift 383 self.check_tokenize("x = 1 << 1 >> 5", """\ 384 NAME 'x' (1, 0) (1, 1) 385 OP '=' (1, 2) (1, 3) 386 NUMBER '1' (1, 4) (1, 5) 387 OP '<<' (1, 6) (1, 8) 388 NUMBER '1' (1, 9) (1, 10) 389 OP '>>' (1, 11) (1, 13) 390 NUMBER '5' (1, 14) (1, 15) 391 """) 392 393 def test_additive(self): 394 # Additive 395 self.check_tokenize("x = 1 - y + 15 - 01 + 0x124 + z + a[5]", """\ 396 NAME 'x' (1, 0) (1, 1) 397 OP '=' (1, 2) (1, 3) 398 NUMBER '1' (1, 4) (1, 5) 399 OP '-' (1, 6) (1, 7) 400 NAME 'y' (1, 8) (1, 9) 401 OP '+' (1, 10) (1, 11) 402 NUMBER '15' (1, 12) (1, 14) 403 OP '-' (1, 15) (1, 16) 404 NUMBER '01' (1, 17) (1, 19) 405 OP '+' (1, 20) (1, 21) 406 NUMBER '0x124' (1, 22) (1, 27) 407 OP '+' (1, 28) (1, 29) 408 NAME 'z' (1, 30) (1, 31) 409 OP '+' (1, 32) (1, 33) 410 NAME 'a' (1, 34) (1, 35) 411 OP '[' (1, 35) (1, 36) 412 NUMBER '5' (1, 36) (1, 37) 413 OP ']' (1, 37) (1, 38) 414 """) 415 416 def test_multiplicative(self): 417 # Multiplicative 418 self.check_tokenize("x = 1//1*1/5*12%0x12", """\ 419 NAME 'x' (1, 0) (1, 1) 420 OP '=' (1, 2) (1, 3) 421 NUMBER '1' (1, 4) (1, 5) 422 OP '//' (1, 5) (1, 7) 423 NUMBER '1' (1, 7) (1, 8) 424 OP '*' (1, 8) (1, 9) 425 NUMBER '1' (1, 9) (1, 10) 426 OP '/' (1, 10) (1, 11) 427 NUMBER '5' (1, 11) (1, 12) 428 OP '*' (1, 12) (1, 13) 429 NUMBER '12' (1, 13) (1, 15) 430 OP '%' (1, 15) (1, 16) 431 NUMBER '0x12' (1, 16) (1, 20) 432 """) 433 434 def test_unary(self): 435 # Unary 436 self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\ 437 OP '~' (1, 0) (1, 1) 438 NUMBER '1' (1, 1) (1, 2) 439 OP '^' (1, 3) (1, 4) 440 NUMBER '1' (1, 5) (1, 6) 441 OP '&' (1, 7) (1, 8) 442 NUMBER '1' (1, 9) (1, 10) 443 OP '|' (1, 11) (1, 12) 444 NUMBER '1' (1, 12) (1, 13) 445 OP '^' (1, 14) (1, 15) 446 OP '-' (1, 16) (1, 17) 447 NUMBER '1' (1, 17) (1, 18) 448 """) 449 self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\ 450 OP '-' (1, 0) (1, 1) 451 NUMBER '1' (1, 1) (1, 2) 452 OP '*' (1, 2) (1, 3) 453 NUMBER '1' (1, 3) (1, 4) 454 OP '/' (1, 4) (1, 5) 455 NUMBER '1' (1, 5) (1, 6) 456 OP '+' (1, 6) (1, 7) 457 NUMBER '1' (1, 7) (1, 8) 458 OP '*' (1, 8) (1, 9) 459 NUMBER '1' (1, 9) (1, 10) 460 OP '//' (1, 10) (1, 12) 461 NUMBER '1' (1, 12) (1, 13) 462 OP '-' (1, 14) (1, 15) 463 OP '-' (1, 16) (1, 17) 464 OP '-' (1, 17) (1, 18) 465 OP '-' (1, 18) (1, 19) 466 NUMBER '1' (1, 19) (1, 20) 467 OP '**' (1, 20) (1, 22) 468 NUMBER '1' (1, 22) (1, 23) 469 """) 470 471 def test_selector(self): 472 # Selector 473 self.check_tokenize("import sys, time\n" 474 "x = sys.modules['time'].time()", """\ 475 NAME 'import' (1, 0) (1, 6) 476 NAME 'sys' (1, 7) (1, 10) 477 OP ',' (1, 10) (1, 11) 478 NAME 'time' (1, 12) (1, 16) 479 NEWLINE '\\n' (1, 16) (1, 17) 480 NAME 'x' (2, 0) (2, 1) 481 OP '=' (2, 2) (2, 3) 482 NAME 'sys' (2, 4) (2, 7) 483 OP '.' (2, 7) (2, 8) 484 NAME 'modules' (2, 8) (2, 15) 485 OP '[' (2, 15) (2, 16) 486 STRING "'time'" (2, 16) (2, 22) 487 OP ']' (2, 22) (2, 23) 488 OP '.' (2, 23) (2, 24) 489 NAME 'time' (2, 24) (2, 28) 490 OP '(' (2, 28) (2, 29) 491 OP ')' (2, 29) (2, 30) 492 """) 493 494 def test_method(self): 495 # Methods 496 self.check_tokenize("@staticmethod\n" 497 "def foo(x,y): pass", """\ 498 OP '@' (1, 0) (1, 1) 499 NAME 'staticmethod (1, 1) (1, 13) 500 NEWLINE '\\n' (1, 13) (1, 14) 501 NAME 'def' (2, 0) (2, 3) 502 NAME 'foo' (2, 4) (2, 7) 503 OP '(' (2, 7) (2, 8) 504 NAME 'x' (2, 8) (2, 9) 505 OP ',' (2, 9) (2, 10) 506 NAME 'y' (2, 10) (2, 11) 507 OP ')' (2, 11) (2, 12) 508 OP ':' (2, 12) (2, 13) 509 NAME 'pass' (2, 14) (2, 18) 510 """) 511 512 def test_tabs(self): 513 # Evil tabs 514 self.check_tokenize("def f():\n" 515 "\tif x\n" 516 " \tpass", """\ 517 NAME 'def' (1, 0) (1, 3) 518 NAME 'f' (1, 4) (1, 5) 519 OP '(' (1, 5) (1, 6) 520 OP ')' (1, 6) (1, 7) 521 OP ':' (1, 7) (1, 8) 522 NEWLINE '\\n' (1, 8) (1, 9) 523 INDENT '\\t' (2, 0) (2, 1) 524 NAME 'if' (2, 1) (2, 3) 525 NAME 'x' (2, 4) (2, 5) 526 NEWLINE '\\n' (2, 5) (2, 6) 527 INDENT ' \\t' (3, 0) (3, 9) 528 NAME 'pass' (3, 9) (3, 13) 529 DEDENT '' (4, 0) (4, 0) 530 DEDENT '' (4, 0) (4, 0) 531 """) 532 533 def test_pathological_trailing_whitespace(self): 534 # Pathological whitespace (http://bugs.python.org/issue16152) 535 self.check_tokenize("@ ", """\ 536 OP '@' (1, 0) (1, 1) 537 """) 538 539 540def decistmt(s): 541 result = [] 542 g = generate_tokens(StringIO(s).readline) # tokenize the string 543 for toknum, tokval, _, _, _ in g: 544 if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens 545 result.extend([ 546 (NAME, 'Decimal'), 547 (OP, '('), 548 (STRING, repr(tokval)), 549 (OP, ')') 550 ]) 551 else: 552 result.append((toknum, tokval)) 553 return untokenize(result) 554 555class TestMisc(TestCase): 556 557 def test_decistmt(self): 558 # Substitute Decimals for floats in a string of statements. 559 # This is an example from the docs. 560 561 from decimal import Decimal 562 s = '+21.3e-5*-.1234/81.7' 563 self.assertEqual(decistmt(s), 564 "+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')") 565 566 # The format of the exponent is inherited from the platform C library. 567 # Known cases are "e-007" (Windows) and "e-07" (not Windows). Since 568 # we're only showing 12 digits, and the 13th isn't close to 5, the 569 # rest of the output should be platform-independent. 570 self.assertRegexpMatches(str(eval(s)), '-3.21716034272e-0+7') 571 572 # Output from calculations with Decimal should be identical across all 573 # platforms. 574 self.assertEqual(eval(decistmt(s)), Decimal('-3.217160342717258261933904529E-7')) 575 576 577class UntokenizeTest(TestCase): 578 579 def test_bad_input_order(self): 580 # raise if previous row 581 u = Untokenizer() 582 u.prev_row = 2 583 u.prev_col = 2 584 with self.assertRaises(ValueError) as cm: 585 u.add_whitespace((1,3)) 586 self.assertEqual(cm.exception.args[0], 587 'start (1,3) precedes previous end (2,2)') 588 # raise if previous column in row 589 self.assertRaises(ValueError, u.add_whitespace, (2,1)) 590 591 def test_backslash_continuation(self): 592 # The problem is that <whitespace>\<newline> leaves no token 593 u = Untokenizer() 594 u.prev_row = 1 595 u.prev_col = 1 596 u.tokens = [] 597 u.add_whitespace((2, 0)) 598 self.assertEqual(u.tokens, ['\\\n']) 599 u.prev_row = 2 600 u.add_whitespace((4, 4)) 601 self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' ']) 602 603 def test_iter_compat(self): 604 u = Untokenizer() 605 token = (NAME, 'Hello') 606 u.compat(token, iter([])) 607 self.assertEqual(u.tokens, ["Hello "]) 608 u = Untokenizer() 609 self.assertEqual(u.untokenize(iter([token])), 'Hello ') 610 611 612class TestRoundtrip(TestCase): 613 614 def check_roundtrip(self, f): 615 """ 616 Test roundtrip for `untokenize`. `f` is an open file or a string. 617 The source code in f is tokenized, converted back to source code 618 via tokenize.untokenize(), and tokenized again from the latter. 619 The test fails if the second tokenization doesn't match the first. 620 """ 621 if isinstance(f, str): f = StringIO(f) 622 token_list = list(generate_tokens(f.readline)) 623 f.close() 624 tokens1 = [tok[:2] for tok in token_list] 625 new_text = untokenize(tokens1) 626 readline = iter(new_text.splitlines(1)).next 627 tokens2 = [tok[:2] for tok in generate_tokens(readline)] 628 self.assertEqual(tokens2, tokens1) 629 630 def test_roundtrip(self): 631 # There are some standard formatting practices that are easy to get right. 632 633 self.check_roundtrip("if x == 1:\n" 634 " print(x)\n") 635 636 # There are some standard formatting practices that are easy to get right. 637 638 self.check_roundtrip("if x == 1:\n" 639 " print x\n") 640 self.check_roundtrip("# This is a comment\n" 641 "# This also\n") 642 643 # Some people use different formatting conventions, which makes 644 # untokenize a little trickier. Note that this test involves trailing 645 # whitespace after the colon. Note that we use hex escapes to make the 646 # two trailing blanks apperant in the expected output. 647 648 self.check_roundtrip("if x == 1 : \n" 649 " print x\n") 650 fn = test_support.findfile("tokenize_tests" + os.extsep + "txt") 651 with open(fn) as f: 652 self.check_roundtrip(f) 653 self.check_roundtrip("if x == 1:\n" 654 " # A comment by itself.\n" 655 " print x # Comment here, too.\n" 656 " # Another comment.\n" 657 "after_if = True\n") 658 self.check_roundtrip("if (x # The comments need to go in the right place\n" 659 " == 1):\n" 660 " print 'x==1'\n") 661 self.check_roundtrip("class Test: # A comment here\n" 662 " # A comment with weird indent\n" 663 " after_com = 5\n" 664 " def x(m): return m*5 # a one liner\n" 665 " def y(m): # A whitespace after the colon\n" 666 " return y*4 # 3-space indent\n") 667 668 # Some error-handling code 669 670 self.check_roundtrip("try: import somemodule\n" 671 "except ImportError: # comment\n" 672 " print 'Can not import' # comment2\n" 673 "else: print 'Loaded'\n") 674 675 def test_continuation(self): 676 # Balancing continuation 677 self.check_roundtrip("a = (3,4, \n" 678 "5,6)\n" 679 "y = [3, 4,\n" 680 "5]\n" 681 "z = {'a': 5,\n" 682 "'b':15, 'c':True}\n" 683 "x = len(y) + 5 - a[\n" 684 "3] - a[2]\n" 685 "+ len(z) - z[\n" 686 "'b']\n") 687 688 def test_backslash_continuation(self): 689 # Backslash means line continuation, except for comments 690 self.check_roundtrip("x=1+\\\n" 691 "1\n" 692 "# This is a comment\\\n" 693 "# This also\n") 694 self.check_roundtrip("# Comment \\\n" 695 "x = 0") 696 697 def test_string_concatenation(self): 698 # Two string literals on the same line 699 self.check_roundtrip("'' ''") 700 701 def test_random_files(self): 702 # Test roundtrip on random python modules. 703 # pass the '-ucpu' option to process the full directory. 704 705 import glob, random 706 fn = test_support.findfile("tokenize_tests" + os.extsep + "txt") 707 tempdir = os.path.dirname(fn) or os.curdir 708 testfiles = glob.glob(os.path.join(tempdir, "test*.py")) 709 710 if not test_support.is_resource_enabled("cpu"): 711 testfiles = random.sample(testfiles, 10) 712 713 for testfile in testfiles: 714 try: 715 with open(testfile, 'rb') as f: 716 self.check_roundtrip(f) 717 except: 718 print "Roundtrip failed for file %s" % testfile 719 raise 720 721 722 def roundtrip(self, code): 723 if isinstance(code, str): 724 code = code.encode('utf-8') 725 tokens = generate_tokens(StringIO(code).readline) 726 return untokenize(tokens).decode('utf-8') 727 728 def test_indentation_semantics_retained(self): 729 """ 730 Ensure that although whitespace might be mutated in a roundtrip, 731 the semantic meaning of the indentation remains consistent. 732 """ 733 code = "if False:\n\tx=3\n\tx=3\n" 734 codelines = self.roundtrip(code).split('\n') 735 self.assertEqual(codelines[1], codelines[2]) 736 737 738def test_main(): 739 test_support.run_unittest(TokenizeTest) 740 test_support.run_unittest(UntokenizeTest) 741 test_support.run_unittest(TestRoundtrip) 742 test_support.run_unittest(TestMisc) 743 744if __name__ == "__main__": 745 test_main() 746