1""" 2Test the implementation of the PEP 540: the UTF-8 Mode. 3""" 4 5import locale 6import subprocess 7import sys 8import textwrap 9import unittest 10from test import support 11from test.support.script_helper import assert_python_ok, assert_python_failure 12from test.support import os_helper 13 14 15MS_WINDOWS = (sys.platform == 'win32') 16POSIX_LOCALES = ('C', 'POSIX') 17VXWORKS = (sys.platform == "vxworks") 18 19class UTF8ModeTests(unittest.TestCase): 20 DEFAULT_ENV = { 21 'PYTHONUTF8': '', 22 'PYTHONLEGACYWINDOWSFSENCODING': '', 23 'PYTHONCOERCECLOCALE': '0', 24 } 25 26 def posix_locale(self): 27 loc = locale.setlocale(locale.LC_CTYPE, None) 28 return (loc in POSIX_LOCALES) 29 30 def get_output(self, *args, failure=False, **kw): 31 kw = dict(self.DEFAULT_ENV, **kw) 32 if failure: 33 out = assert_python_failure(*args, **kw) 34 out = out[2] 35 else: 36 out = assert_python_ok(*args, **kw) 37 out = out[1] 38 return out.decode().rstrip("\n\r") 39 40 @unittest.skipIf(MS_WINDOWS, 'Windows has no POSIX locale') 41 def test_posix_locale(self): 42 code = 'import sys; print(sys.flags.utf8_mode)' 43 44 for loc in POSIX_LOCALES: 45 with self.subTest(LC_ALL=loc): 46 out = self.get_output('-c', code, LC_ALL=loc) 47 self.assertEqual(out, '1') 48 49 def test_xoption(self): 50 code = 'import sys; print(sys.flags.utf8_mode)' 51 52 out = self.get_output('-X', 'utf8', '-c', code) 53 self.assertEqual(out, '1') 54 55 # undocumented but accepted syntax: -X utf8=1 56 out = self.get_output('-X', 'utf8=1', '-c', code) 57 self.assertEqual(out, '1') 58 59 out = self.get_output('-X', 'utf8=0', '-c', code) 60 self.assertEqual(out, '0') 61 62 if MS_WINDOWS: 63 # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 Mode 64 # and has the priority over -X utf8 65 out = self.get_output('-X', 'utf8', '-c', code, 66 PYTHONLEGACYWINDOWSFSENCODING='1') 67 self.assertEqual(out, '0') 68 69 def test_env_var(self): 70 code = 'import sys; print(sys.flags.utf8_mode)' 71 72 out = self.get_output('-c', code, PYTHONUTF8='1') 73 self.assertEqual(out, '1') 74 75 out = self.get_output('-c', code, PYTHONUTF8='0') 76 self.assertEqual(out, '0') 77 78 # -X utf8 has the priority over PYTHONUTF8 79 out = self.get_output('-X', 'utf8=0', '-c', code, PYTHONUTF8='1') 80 self.assertEqual(out, '0') 81 82 if MS_WINDOWS: 83 # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode 84 # and has the priority over PYTHONUTF8 85 out = self.get_output('-X', 'utf8', '-c', code, PYTHONUTF8='1', 86 PYTHONLEGACYWINDOWSFSENCODING='1') 87 self.assertEqual(out, '0') 88 89 # Cannot test with the POSIX locale, since the POSIX locale enables 90 # the UTF-8 mode 91 if not self.posix_locale(): 92 # PYTHONUTF8 should be ignored if -E is used 93 out = self.get_output('-E', '-c', code, PYTHONUTF8='1') 94 self.assertEqual(out, '0') 95 96 # invalid mode 97 out = self.get_output('-c', code, PYTHONUTF8='xxx', failure=True) 98 self.assertIn('invalid PYTHONUTF8 environment variable value', 99 out.rstrip()) 100 101 def test_filesystemencoding(self): 102 code = textwrap.dedent(''' 103 import sys 104 print("{}/{}".format(sys.getfilesystemencoding(), 105 sys.getfilesystemencodeerrors())) 106 ''') 107 108 if MS_WINDOWS: 109 expected = 'utf-8/surrogatepass' 110 else: 111 expected = 'utf-8/surrogateescape' 112 113 out = self.get_output('-X', 'utf8', '-c', code) 114 self.assertEqual(out, expected) 115 116 if MS_WINDOWS: 117 # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode 118 # and has the priority over -X utf8 and PYTHONUTF8 119 out = self.get_output('-X', 'utf8', '-c', code, 120 PYTHONUTF8='strict', 121 PYTHONLEGACYWINDOWSFSENCODING='1') 122 self.assertEqual(out, 'mbcs/replace') 123 124 def test_stdio(self): 125 code = textwrap.dedent(''' 126 import sys 127 print(f"stdin: {sys.stdin.encoding}/{sys.stdin.errors}") 128 print(f"stdout: {sys.stdout.encoding}/{sys.stdout.errors}") 129 print(f"stderr: {sys.stderr.encoding}/{sys.stderr.errors}") 130 ''') 131 132 out = self.get_output('-X', 'utf8', '-c', code, 133 PYTHONIOENCODING='') 134 self.assertEqual(out.splitlines(), 135 ['stdin: utf-8/surrogateescape', 136 'stdout: utf-8/surrogateescape', 137 'stderr: utf-8/backslashreplace']) 138 139 # PYTHONIOENCODING has the priority over PYTHONUTF8 140 out = self.get_output('-X', 'utf8', '-c', code, 141 PYTHONIOENCODING="latin1") 142 self.assertEqual(out.splitlines(), 143 ['stdin: iso8859-1/strict', 144 'stdout: iso8859-1/strict', 145 'stderr: iso8859-1/backslashreplace']) 146 147 out = self.get_output('-X', 'utf8', '-c', code, 148 PYTHONIOENCODING=":namereplace") 149 self.assertEqual(out.splitlines(), 150 ['stdin: utf-8/namereplace', 151 'stdout: utf-8/namereplace', 152 'stderr: utf-8/backslashreplace']) 153 154 def test_io(self): 155 code = textwrap.dedent(''' 156 import sys 157 filename = sys.argv[1] 158 with open(filename) as fp: 159 print(f"{fp.encoding}/{fp.errors}") 160 ''') 161 filename = __file__ 162 163 out = self.get_output('-c', code, filename, PYTHONUTF8='1') 164 self.assertEqual(out.lower(), 'utf-8/strict') 165 166 def _check_io_encoding(self, module, encoding=None, errors=None): 167 filename = __file__ 168 169 # Encoding explicitly set 170 args = [] 171 if encoding: 172 args.append(f'encoding={encoding!r}') 173 if errors: 174 args.append(f'errors={errors!r}') 175 code = textwrap.dedent(''' 176 import sys 177 from %s import open 178 filename = sys.argv[1] 179 with open(filename, %s) as fp: 180 print(f"{fp.encoding}/{fp.errors}") 181 ''') % (module, ', '.join(args)) 182 out = self.get_output('-c', code, filename, 183 PYTHONUTF8='1') 184 185 if not encoding: 186 encoding = 'utf-8' 187 if not errors: 188 errors = 'strict' 189 self.assertEqual(out.lower(), f'{encoding}/{errors}') 190 191 def check_io_encoding(self, module): 192 self._check_io_encoding(module, encoding="latin1") 193 self._check_io_encoding(module, errors="namereplace") 194 self._check_io_encoding(module, 195 encoding="latin1", errors="namereplace") 196 197 def test_io_encoding(self): 198 self.check_io_encoding('io') 199 200 def test_pyio_encoding(self): 201 self.check_io_encoding('_pyio') 202 203 def test_locale_getpreferredencoding(self): 204 code = 'import locale; print(locale.getpreferredencoding(False), locale.getpreferredencoding(True))' 205 out = self.get_output('-X', 'utf8', '-c', code) 206 self.assertEqual(out, 'utf-8 utf-8') 207 208 for loc in POSIX_LOCALES: 209 with self.subTest(LC_ALL=loc): 210 out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc) 211 self.assertEqual(out, 'utf-8 utf-8') 212 213 @unittest.skipIf(MS_WINDOWS, 'test specific to Unix') 214 def test_cmd_line(self): 215 arg = 'h\xe9\u20ac'.encode('utf-8') 216 arg_utf8 = arg.decode('utf-8') 217 arg_ascii = arg.decode('ascii', 'surrogateescape') 218 code = 'import locale, sys; print("%s:%s" % (locale.getpreferredencoding(), ascii(sys.argv[1:])))' 219 220 def check(utf8_opt, expected, **kw): 221 out = self.get_output('-X', utf8_opt, '-c', code, arg, **kw) 222 args = out.partition(':')[2].rstrip() 223 self.assertEqual(args, ascii(expected), out) 224 225 check('utf8', [arg_utf8]) 226 for loc in POSIX_LOCALES: 227 with self.subTest(LC_ALL=loc): 228 check('utf8', [arg_utf8], LC_ALL=loc) 229 230 if sys.platform == 'darwin' or support.is_android or VXWORKS: 231 c_arg = arg_utf8 232 elif sys.platform.startswith("aix"): 233 c_arg = arg.decode('iso-8859-1') 234 else: 235 c_arg = arg_ascii 236 for loc in POSIX_LOCALES: 237 with self.subTest(LC_ALL=loc): 238 check('utf8=0', [c_arg], LC_ALL=loc) 239 240 def test_optim_level(self): 241 # CPython: check that Py_Main() doesn't increment Py_OptimizeFlag 242 # twice when -X utf8 requires to parse the configuration twice (when 243 # the encoding changes after reading the configuration, the 244 # configuration is read again with the new encoding). 245 code = 'import sys; print(sys.flags.optimize)' 246 out = self.get_output('-X', 'utf8', '-O', '-c', code) 247 self.assertEqual(out, '1') 248 out = self.get_output('-X', 'utf8', '-OO', '-c', code) 249 self.assertEqual(out, '2') 250 251 code = 'import sys; print(sys.flags.ignore_environment)' 252 out = self.get_output('-X', 'utf8', '-E', '-c', code) 253 self.assertEqual(out, '1') 254 255 @unittest.skipIf(MS_WINDOWS, 256 "os.device_encoding() doesn't implement " 257 "the UTF-8 Mode on Windows") 258 @support.requires_subprocess() 259 def test_device_encoding(self): 260 # Use stdout as TTY 261 if not sys.stdout.isatty(): 262 self.skipTest("sys.stdout is not a TTY") 263 264 filename = 'out.txt' 265 self.addCleanup(os_helper.unlink, filename) 266 267 code = (f'import os, sys; fd = sys.stdout.fileno(); ' 268 f'out = open({filename!r}, "w", encoding="utf-8"); ' 269 f'print(os.isatty(fd), os.device_encoding(fd), file=out); ' 270 f'out.close()') 271 cmd = [sys.executable, '-X', 'utf8', '-c', code] 272 # The stdout TTY is inherited to the child process 273 proc = subprocess.run(cmd, text=True) 274 self.assertEqual(proc.returncode, 0, proc) 275 276 # In UTF-8 Mode, device_encoding(fd) returns "UTF-8" if fd is a TTY 277 with open(filename, encoding="utf8") as fp: 278 out = fp.read().rstrip() 279 self.assertEqual(out, 'True utf-8') 280 281 282if __name__ == "__main__": 283 unittest.main() 284