1r"""Test correct treatment of various string literals by the parser.
2
3There are four types of string literals:
4
5    'abc'             -- normal str
6    r'abc'            -- raw str
7    b'xyz'            -- normal bytes
8    br'xyz' | rb'xyz' -- raw bytes
9
10The difference between normal and raw strings is of course that in a
11raw string, \ escapes (while still used to determine the end of the
12literal) are not interpreted, so that r'\x00' contains four
13characters: a backslash, an x, and two zeros; while '\x00' contains a
14single character (code point zero).
15
16The tricky thing is what should happen when non-ASCII bytes are used
17inside literals.  For bytes literals, this is considered illegal.  But
18for str literals, those bytes are supposed to be decoded using the
19encoding declared for the file (UTF-8 by default).
20
21We have to test this with various file encodings.  We also test it with
22exec()/eval(), which uses a different code path.
23
24This file is really about correct treatment of encodings and
25backslashes.  It doesn't concern itself with issues like single
26vs. double quotes or singly- vs. triply-quoted strings: that's dealt
27with elsewhere (I assume).
28"""
29
30import os
31import sys
32import shutil
33import tempfile
34import unittest
35import warnings
36
37
38TEMPLATE = r"""# coding: %s
39a = 'x'
40assert ord(a) == 120
41b = '\x01'
42assert ord(b) == 1
43c = r'\x01'
44assert list(map(ord, c)) == [92, 120, 48, 49]
45d = '\x81'
46assert ord(d) == 0x81
47e = r'\x81'
48assert list(map(ord, e)) == [92, 120, 56, 49]
49f = '\u1881'
50assert ord(f) == 0x1881
51g = r'\u1881'
52assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49]
53h = '\U0001d120'
54assert ord(h) == 0x1d120
55i = r'\U0001d120'
56assert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48]
57"""
58
59
60def byte(i):
61    return bytes([i])
62
63
64class TestLiterals(unittest.TestCase):
65
66    def setUp(self):
67        self.save_path = sys.path[:]
68        self.tmpdir = tempfile.mkdtemp()
69        sys.path.insert(0, self.tmpdir)
70
71    def tearDown(self):
72        sys.path[:] = self.save_path
73        shutil.rmtree(self.tmpdir, ignore_errors=True)
74
75    def test_template(self):
76        # Check that the template doesn't contain any non-printables
77        # except for \n.
78        for c in TEMPLATE:
79            assert c == '\n' or ' ' <= c <= '~', repr(c)
80
81    def test_eval_str_normal(self):
82        self.assertEqual(eval(""" 'x' """), 'x')
83        self.assertEqual(eval(r""" '\x01' """), chr(1))
84        self.assertEqual(eval(""" '\x01' """), chr(1))
85        self.assertEqual(eval(r""" '\x81' """), chr(0x81))
86        self.assertEqual(eval(""" '\x81' """), chr(0x81))
87        self.assertEqual(eval(r""" '\u1881' """), chr(0x1881))
88        self.assertEqual(eval(""" '\u1881' """), chr(0x1881))
89        self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120))
90        self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120))
91
92    def test_eval_str_incomplete(self):
93        self.assertRaises(SyntaxError, eval, r""" '\x' """)
94        self.assertRaises(SyntaxError, eval, r""" '\x0' """)
95        self.assertRaises(SyntaxError, eval, r""" '\u' """)
96        self.assertRaises(SyntaxError, eval, r""" '\u0' """)
97        self.assertRaises(SyntaxError, eval, r""" '\u00' """)
98        self.assertRaises(SyntaxError, eval, r""" '\u000' """)
99        self.assertRaises(SyntaxError, eval, r""" '\U' """)
100        self.assertRaises(SyntaxError, eval, r""" '\U0' """)
101        self.assertRaises(SyntaxError, eval, r""" '\U00' """)
102        self.assertRaises(SyntaxError, eval, r""" '\U000' """)
103        self.assertRaises(SyntaxError, eval, r""" '\U0000' """)
104        self.assertRaises(SyntaxError, eval, r""" '\U00000' """)
105        self.assertRaises(SyntaxError, eval, r""" '\U000000' """)
106        self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)
107
108    def test_eval_str_invalid_escape(self):
109        for b in range(1, 128):
110            if b in b"""\n\r"'01234567NU\\abfnrtuvx""":
111                continue
112            with self.assertWarns(DeprecationWarning):
113                self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b))
114
115        with warnings.catch_warnings(record=True) as w:
116            warnings.simplefilter('always', category=DeprecationWarning)
117            eval("'''\n\\z'''")
118        self.assertEqual(len(w), 1)
119        self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
120        self.assertEqual(w[0].filename, '<string>')
121        self.assertEqual(w[0].lineno, 1)
122
123        with warnings.catch_warnings(record=True) as w:
124            warnings.simplefilter('error', category=DeprecationWarning)
125            with self.assertRaises(SyntaxError) as cm:
126                eval("'''\n\\z'''")
127            exc = cm.exception
128        self.assertEqual(w, [])
129        self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
130        self.assertEqual(exc.filename, '<string>')
131        self.assertEqual(exc.lineno, 1)
132        self.assertEqual(exc.offset, 1)
133
134    def test_eval_str_invalid_octal_escape(self):
135        for i in range(0o400, 0o1000):
136            with self.assertWarns(DeprecationWarning):
137                self.assertEqual(eval(r"'\%o'" % i), chr(i))
138
139        with warnings.catch_warnings(record=True) as w:
140            warnings.simplefilter('always', category=DeprecationWarning)
141            eval("'''\n\\407'''")
142        self.assertEqual(len(w), 1)
143        self.assertEqual(str(w[0].message),
144                         r"invalid octal escape sequence '\407'")
145        self.assertEqual(w[0].filename, '<string>')
146        self.assertEqual(w[0].lineno, 1)
147
148        with warnings.catch_warnings(record=True) as w:
149            warnings.simplefilter('error', category=DeprecationWarning)
150            with self.assertRaises(SyntaxError) as cm:
151                eval("'''\n\\407'''")
152            exc = cm.exception
153        self.assertEqual(w, [])
154        self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
155        self.assertEqual(exc.filename, '<string>')
156        self.assertEqual(exc.lineno, 1)
157        self.assertEqual(exc.offset, 1)
158
159    def test_eval_str_raw(self):
160        self.assertEqual(eval(""" r'x' """), 'x')
161        self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
162        self.assertEqual(eval(""" r'\x01' """), chr(1))
163        self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81')
164        self.assertEqual(eval(""" r'\x81' """), chr(0x81))
165        self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881')
166        self.assertEqual(eval(""" r'\u1881' """), chr(0x1881))
167        self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120')
168        self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120))
169
170    def test_eval_bytes_normal(self):
171        self.assertEqual(eval(""" b'x' """), b'x')
172        self.assertEqual(eval(r""" b'\x01' """), byte(1))
173        self.assertEqual(eval(""" b'\x01' """), byte(1))
174        self.assertEqual(eval(r""" b'\x81' """), byte(0x81))
175        self.assertRaises(SyntaxError, eval, """ b'\x81' """)
176        self.assertEqual(eval(r""" br'\u1881' """), b'\\' + b'u1881')
177        self.assertRaises(SyntaxError, eval, """ b'\u1881' """)
178        self.assertEqual(eval(r""" br'\U0001d120' """), b'\\' + b'U0001d120')
179        self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """)
180
181    def test_eval_bytes_incomplete(self):
182        self.assertRaises(SyntaxError, eval, r""" b'\x' """)
183        self.assertRaises(SyntaxError, eval, r""" b'\x0' """)
184
185    def test_eval_bytes_invalid_escape(self):
186        for b in range(1, 128):
187            if b in b"""\n\r"'01234567\\abfnrtvx""":
188                continue
189            with self.assertWarns(DeprecationWarning):
190                self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b]))
191
192        with warnings.catch_warnings(record=True) as w:
193            warnings.simplefilter('always', category=DeprecationWarning)
194            eval("b'''\n\\z'''")
195        self.assertEqual(len(w), 1)
196        self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
197        self.assertEqual(w[0].filename, '<string>')
198        self.assertEqual(w[0].lineno, 1)
199
200        with warnings.catch_warnings(record=True) as w:
201            warnings.simplefilter('error', category=DeprecationWarning)
202            with self.assertRaises(SyntaxError) as cm:
203                eval("b'''\n\\z'''")
204            exc = cm.exception
205        self.assertEqual(w, [])
206        self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
207        self.assertEqual(exc.filename, '<string>')
208        self.assertEqual(exc.lineno, 1)
209
210    def test_eval_bytes_invalid_octal_escape(self):
211        for i in range(0o400, 0o1000):
212            with self.assertWarns(DeprecationWarning):
213                self.assertEqual(eval(r"b'\%o'" % i), bytes([i & 0o377]))
214
215        with warnings.catch_warnings(record=True) as w:
216            warnings.simplefilter('always', category=DeprecationWarning)
217            eval("b'''\n\\407'''")
218        self.assertEqual(len(w), 1)
219        self.assertEqual(str(w[0].message),
220                         r"invalid octal escape sequence '\407'")
221        self.assertEqual(w[0].filename, '<string>')
222        self.assertEqual(w[0].lineno, 1)
223
224        with warnings.catch_warnings(record=True) as w:
225            warnings.simplefilter('error', category=DeprecationWarning)
226            with self.assertRaises(SyntaxError) as cm:
227                eval("b'''\n\\407'''")
228            exc = cm.exception
229        self.assertEqual(w, [])
230        self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
231        self.assertEqual(exc.filename, '<string>')
232        self.assertEqual(exc.lineno, 1)
233
234    def test_eval_bytes_raw(self):
235        self.assertEqual(eval(""" br'x' """), b'x')
236        self.assertEqual(eval(""" rb'x' """), b'x')
237        self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
238        self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01')
239        self.assertEqual(eval(""" br'\x01' """), byte(1))
240        self.assertEqual(eval(""" rb'\x01' """), byte(1))
241        self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
242        self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81")
243        self.assertRaises(SyntaxError, eval, """ br'\x81' """)
244        self.assertRaises(SyntaxError, eval, """ rb'\x81' """)
245        self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
246        self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881")
247        self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
248        self.assertRaises(SyntaxError, eval, """ rb'\u1881' """)
249        self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120")
250        self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120")
251        self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """)
252        self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """)
253        self.assertRaises(SyntaxError, eval, """ bb'' """)
254        self.assertRaises(SyntaxError, eval, """ rr'' """)
255        self.assertRaises(SyntaxError, eval, """ brr'' """)
256        self.assertRaises(SyntaxError, eval, """ bbr'' """)
257        self.assertRaises(SyntaxError, eval, """ rrb'' """)
258        self.assertRaises(SyntaxError, eval, """ rbb'' """)
259
260    def test_eval_str_u(self):
261        self.assertEqual(eval(""" u'x' """), 'x')
262        self.assertEqual(eval(""" U'\u00e4' """), 'ä')
263        self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
264        self.assertRaises(SyntaxError, eval, """ ur'' """)
265        self.assertRaises(SyntaxError, eval, """ ru'' """)
266        self.assertRaises(SyntaxError, eval, """ bu'' """)
267        self.assertRaises(SyntaxError, eval, """ ub'' """)
268
269    def test_uppercase_prefixes(self):
270        self.assertEqual(eval(""" B'x' """), b'x')
271        self.assertEqual(eval(r""" R'\x01' """), r'\x01')
272        self.assertEqual(eval(r""" BR'\x01' """), br'\x01')
273        self.assertEqual(eval(""" F'{1+1}' """), f'{1+1}')
274        self.assertEqual(eval(r""" U'\U0001d120' """), u'\U0001d120')
275
276    def check_encoding(self, encoding, extra=""):
277        modname = "xx_" + encoding.replace("-", "_")
278        fn = os.path.join(self.tmpdir, modname + ".py")
279        f = open(fn, "w", encoding=encoding)
280        try:
281            f.write(TEMPLATE % encoding)
282            f.write(extra)
283        finally:
284            f.close()
285        __import__(modname)
286        del sys.modules[modname]
287
288    def test_file_utf_8(self):
289        extra = "z = '\u1234'; assert ord(z) == 0x1234\n"
290        self.check_encoding("utf-8", extra)
291
292    def test_file_utf_8_error(self):
293        extra = "b'\x80'\n"
294        self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
295
296    def test_file_utf8(self):
297        self.check_encoding("utf-8")
298
299    def test_file_iso_8859_1(self):
300        self.check_encoding("iso-8859-1")
301
302    def test_file_latin_1(self):
303        self.check_encoding("latin-1")
304
305    def test_file_latin9(self):
306        self.check_encoding("latin9")
307
308
309if __name__ == "__main__":
310    unittest.main()
311