1r"""Test correct treatment of various string literals by the parser. 2 3There are four types of string literals: 4 5 'abc' -- normal str 6 r'abc' -- raw str 7 b'xyz' -- normal bytes 8 br'xyz' | rb'xyz' -- raw bytes 9 10The difference between normal and raw strings is of course that in a 11raw string, \ escapes (while still used to determine the end of the 12literal) are not interpreted, so that r'\x00' contains four 13characters: a backslash, an x, and two zeros; while '\x00' contains a 14single character (code point zero). 15 16The tricky thing is what should happen when non-ASCII bytes are used 17inside literals. For bytes literals, this is considered illegal. But 18for str literals, those bytes are supposed to be decoded using the 19encoding declared for the file (UTF-8 by default). 20 21We have to test this with various file encodings. We also test it with 22exec()/eval(), which uses a different code path. 23 24This file is really about correct treatment of encodings and 25backslashes. It doesn't concern itself with issues like single 26vs. double quotes or singly- vs. triply-quoted strings: that's dealt 27with elsewhere (I assume). 28""" 29 30import os 31import sys 32import shutil 33import tempfile 34import unittest 35import warnings 36 37 38TEMPLATE = r"""# coding: %s 39a = 'x' 40assert ord(a) == 120 41b = '\x01' 42assert ord(b) == 1 43c = r'\x01' 44assert list(map(ord, c)) == [92, 120, 48, 49] 45d = '\x81' 46assert ord(d) == 0x81 47e = r'\x81' 48assert list(map(ord, e)) == [92, 120, 56, 49] 49f = '\u1881' 50assert ord(f) == 0x1881 51g = r'\u1881' 52assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49] 53h = '\U0001d120' 54assert ord(h) == 0x1d120 55i = r'\U0001d120' 56assert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48] 57""" 58 59 60def byte(i): 61 return bytes([i]) 62 63 64class TestLiterals(unittest.TestCase): 65 66 def setUp(self): 67 self.save_path = sys.path[:] 68 self.tmpdir = tempfile.mkdtemp() 69 sys.path.insert(0, self.tmpdir) 70 71 def tearDown(self): 72 sys.path[:] = self.save_path 73 shutil.rmtree(self.tmpdir, ignore_errors=True) 74 75 def test_template(self): 76 # Check that the template doesn't contain any non-printables 77 # except for \n. 78 for c in TEMPLATE: 79 assert c == '\n' or ' ' <= c <= '~', repr(c) 80 81 def test_eval_str_normal(self): 82 self.assertEqual(eval(""" 'x' """), 'x') 83 self.assertEqual(eval(r""" '\x01' """), chr(1)) 84 self.assertEqual(eval(""" '\x01' """), chr(1)) 85 self.assertEqual(eval(r""" '\x81' """), chr(0x81)) 86 self.assertEqual(eval(""" '\x81' """), chr(0x81)) 87 self.assertEqual(eval(r""" '\u1881' """), chr(0x1881)) 88 self.assertEqual(eval(""" '\u1881' """), chr(0x1881)) 89 self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120)) 90 self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120)) 91 92 def test_eval_str_incomplete(self): 93 self.assertRaises(SyntaxError, eval, r""" '\x' """) 94 self.assertRaises(SyntaxError, eval, r""" '\x0' """) 95 self.assertRaises(SyntaxError, eval, r""" '\u' """) 96 self.assertRaises(SyntaxError, eval, r""" '\u0' """) 97 self.assertRaises(SyntaxError, eval, r""" '\u00' """) 98 self.assertRaises(SyntaxError, eval, r""" '\u000' """) 99 self.assertRaises(SyntaxError, eval, r""" '\U' """) 100 self.assertRaises(SyntaxError, eval, r""" '\U0' """) 101 self.assertRaises(SyntaxError, eval, r""" '\U00' """) 102 self.assertRaises(SyntaxError, eval, r""" '\U000' """) 103 self.assertRaises(SyntaxError, eval, r""" '\U0000' """) 104 self.assertRaises(SyntaxError, eval, r""" '\U00000' """) 105 self.assertRaises(SyntaxError, eval, r""" '\U000000' """) 106 self.assertRaises(SyntaxError, eval, r""" '\U0000000' """) 107 108 def test_eval_str_invalid_escape(self): 109 for b in range(1, 128): 110 if b in b"""\n\r"'01234567NU\\abfnrtuvx""": 111 continue 112 with self.assertWarns(DeprecationWarning): 113 self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b)) 114 115 with warnings.catch_warnings(record=True) as w: 116 warnings.simplefilter('always', category=DeprecationWarning) 117 eval("'''\n\\z'''") 118 self.assertEqual(len(w), 1) 119 self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'") 120 self.assertEqual(w[0].filename, '<string>') 121 self.assertEqual(w[0].lineno, 1) 122 123 with warnings.catch_warnings(record=True) as w: 124 warnings.simplefilter('error', category=DeprecationWarning) 125 with self.assertRaises(SyntaxError) as cm: 126 eval("'''\n\\z'''") 127 exc = cm.exception 128 self.assertEqual(w, []) 129 self.assertEqual(exc.msg, r"invalid escape sequence '\z'") 130 self.assertEqual(exc.filename, '<string>') 131 self.assertEqual(exc.lineno, 1) 132 self.assertEqual(exc.offset, 1) 133 134 def test_eval_str_invalid_octal_escape(self): 135 for i in range(0o400, 0o1000): 136 with self.assertWarns(DeprecationWarning): 137 self.assertEqual(eval(r"'\%o'" % i), chr(i)) 138 139 with warnings.catch_warnings(record=True) as w: 140 warnings.simplefilter('always', category=DeprecationWarning) 141 eval("'''\n\\407'''") 142 self.assertEqual(len(w), 1) 143 self.assertEqual(str(w[0].message), 144 r"invalid octal escape sequence '\407'") 145 self.assertEqual(w[0].filename, '<string>') 146 self.assertEqual(w[0].lineno, 1) 147 148 with warnings.catch_warnings(record=True) as w: 149 warnings.simplefilter('error', category=DeprecationWarning) 150 with self.assertRaises(SyntaxError) as cm: 151 eval("'''\n\\407'''") 152 exc = cm.exception 153 self.assertEqual(w, []) 154 self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'") 155 self.assertEqual(exc.filename, '<string>') 156 self.assertEqual(exc.lineno, 1) 157 self.assertEqual(exc.offset, 1) 158 159 def test_eval_str_raw(self): 160 self.assertEqual(eval(""" r'x' """), 'x') 161 self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01') 162 self.assertEqual(eval(""" r'\x01' """), chr(1)) 163 self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81') 164 self.assertEqual(eval(""" r'\x81' """), chr(0x81)) 165 self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881') 166 self.assertEqual(eval(""" r'\u1881' """), chr(0x1881)) 167 self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120') 168 self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120)) 169 170 def test_eval_bytes_normal(self): 171 self.assertEqual(eval(""" b'x' """), b'x') 172 self.assertEqual(eval(r""" b'\x01' """), byte(1)) 173 self.assertEqual(eval(""" b'\x01' """), byte(1)) 174 self.assertEqual(eval(r""" b'\x81' """), byte(0x81)) 175 self.assertRaises(SyntaxError, eval, """ b'\x81' """) 176 self.assertEqual(eval(r""" br'\u1881' """), b'\\' + b'u1881') 177 self.assertRaises(SyntaxError, eval, """ b'\u1881' """) 178 self.assertEqual(eval(r""" br'\U0001d120' """), b'\\' + b'U0001d120') 179 self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """) 180 181 def test_eval_bytes_incomplete(self): 182 self.assertRaises(SyntaxError, eval, r""" b'\x' """) 183 self.assertRaises(SyntaxError, eval, r""" b'\x0' """) 184 185 def test_eval_bytes_invalid_escape(self): 186 for b in range(1, 128): 187 if b in b"""\n\r"'01234567\\abfnrtvx""": 188 continue 189 with self.assertWarns(DeprecationWarning): 190 self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b])) 191 192 with warnings.catch_warnings(record=True) as w: 193 warnings.simplefilter('always', category=DeprecationWarning) 194 eval("b'''\n\\z'''") 195 self.assertEqual(len(w), 1) 196 self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'") 197 self.assertEqual(w[0].filename, '<string>') 198 self.assertEqual(w[0].lineno, 1) 199 200 with warnings.catch_warnings(record=True) as w: 201 warnings.simplefilter('error', category=DeprecationWarning) 202 with self.assertRaises(SyntaxError) as cm: 203 eval("b'''\n\\z'''") 204 exc = cm.exception 205 self.assertEqual(w, []) 206 self.assertEqual(exc.msg, r"invalid escape sequence '\z'") 207 self.assertEqual(exc.filename, '<string>') 208 self.assertEqual(exc.lineno, 1) 209 210 def test_eval_bytes_invalid_octal_escape(self): 211 for i in range(0o400, 0o1000): 212 with self.assertWarns(DeprecationWarning): 213 self.assertEqual(eval(r"b'\%o'" % i), bytes([i & 0o377])) 214 215 with warnings.catch_warnings(record=True) as w: 216 warnings.simplefilter('always', category=DeprecationWarning) 217 eval("b'''\n\\407'''") 218 self.assertEqual(len(w), 1) 219 self.assertEqual(str(w[0].message), 220 r"invalid octal escape sequence '\407'") 221 self.assertEqual(w[0].filename, '<string>') 222 self.assertEqual(w[0].lineno, 1) 223 224 with warnings.catch_warnings(record=True) as w: 225 warnings.simplefilter('error', category=DeprecationWarning) 226 with self.assertRaises(SyntaxError) as cm: 227 eval("b'''\n\\407'''") 228 exc = cm.exception 229 self.assertEqual(w, []) 230 self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'") 231 self.assertEqual(exc.filename, '<string>') 232 self.assertEqual(exc.lineno, 1) 233 234 def test_eval_bytes_raw(self): 235 self.assertEqual(eval(""" br'x' """), b'x') 236 self.assertEqual(eval(""" rb'x' """), b'x') 237 self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01') 238 self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01') 239 self.assertEqual(eval(""" br'\x01' """), byte(1)) 240 self.assertEqual(eval(""" rb'\x01' """), byte(1)) 241 self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81") 242 self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81") 243 self.assertRaises(SyntaxError, eval, """ br'\x81' """) 244 self.assertRaises(SyntaxError, eval, """ rb'\x81' """) 245 self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881") 246 self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881") 247 self.assertRaises(SyntaxError, eval, """ br'\u1881' """) 248 self.assertRaises(SyntaxError, eval, """ rb'\u1881' """) 249 self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120") 250 self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120") 251 self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """) 252 self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """) 253 self.assertRaises(SyntaxError, eval, """ bb'' """) 254 self.assertRaises(SyntaxError, eval, """ rr'' """) 255 self.assertRaises(SyntaxError, eval, """ brr'' """) 256 self.assertRaises(SyntaxError, eval, """ bbr'' """) 257 self.assertRaises(SyntaxError, eval, """ rrb'' """) 258 self.assertRaises(SyntaxError, eval, """ rbb'' """) 259 260 def test_eval_str_u(self): 261 self.assertEqual(eval(""" u'x' """), 'x') 262 self.assertEqual(eval(""" U'\u00e4' """), 'ä') 263 self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä') 264 self.assertRaises(SyntaxError, eval, """ ur'' """) 265 self.assertRaises(SyntaxError, eval, """ ru'' """) 266 self.assertRaises(SyntaxError, eval, """ bu'' """) 267 self.assertRaises(SyntaxError, eval, """ ub'' """) 268 269 def test_uppercase_prefixes(self): 270 self.assertEqual(eval(""" B'x' """), b'x') 271 self.assertEqual(eval(r""" R'\x01' """), r'\x01') 272 self.assertEqual(eval(r""" BR'\x01' """), br'\x01') 273 self.assertEqual(eval(""" F'{1+1}' """), f'{1+1}') 274 self.assertEqual(eval(r""" U'\U0001d120' """), u'\U0001d120') 275 276 def check_encoding(self, encoding, extra=""): 277 modname = "xx_" + encoding.replace("-", "_") 278 fn = os.path.join(self.tmpdir, modname + ".py") 279 f = open(fn, "w", encoding=encoding) 280 try: 281 f.write(TEMPLATE % encoding) 282 f.write(extra) 283 finally: 284 f.close() 285 __import__(modname) 286 del sys.modules[modname] 287 288 def test_file_utf_8(self): 289 extra = "z = '\u1234'; assert ord(z) == 0x1234\n" 290 self.check_encoding("utf-8", extra) 291 292 def test_file_utf_8_error(self): 293 extra = "b'\x80'\n" 294 self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra) 295 296 def test_file_utf8(self): 297 self.check_encoding("utf-8") 298 299 def test_file_iso_8859_1(self): 300 self.check_encoding("iso-8859-1") 301 302 def test_file_latin_1(self): 303 self.check_encoding("latin-1") 304 305 def test_file_latin9(self): 306 self.check_encoding("latin9") 307 308 309if __name__ == "__main__": 310 unittest.main() 311