1 #include "Python.h"
2 #include "pycore_fileutils.h"     // DECODE_LOCALE_ERR
3 #include "pycore_getopt.h"        // _PyOS_GetOpt()
4 #include "pycore_initconfig.h"    // _PyArgv
5 #include "pycore_pymem.h"         // _PyMem_GetAllocatorName()
6 #include "pycore_runtime.h"       // _PyRuntime_Initialize()
7 
8 #include <locale.h>               // setlocale()
9 #include <stdlib.h>               // getenv()
10 
11 
12 /* Forward declarations */
13 static void
14 preconfig_copy(PyPreConfig *config, const PyPreConfig *config2);
15 
16 
17 /* --- File system encoding/errors -------------------------------- */
18 
19 const char *Py_FileSystemDefaultEncoding = NULL;
20 int Py_HasFileSystemDefaultEncoding = 0;
21 const char *Py_FileSystemDefaultEncodeErrors = NULL;
22 int _Py_HasFileSystemDefaultEncodeErrors = 0;
23 
24 void
_Py_ClearFileSystemEncoding(void)25 _Py_ClearFileSystemEncoding(void)
26 {
27     if (!Py_HasFileSystemDefaultEncoding && Py_FileSystemDefaultEncoding) {
28         PyMem_RawFree((char*)Py_FileSystemDefaultEncoding);
29         Py_FileSystemDefaultEncoding = NULL;
30     }
31     if (!_Py_HasFileSystemDefaultEncodeErrors && Py_FileSystemDefaultEncodeErrors) {
32         PyMem_RawFree((char*)Py_FileSystemDefaultEncodeErrors);
33         Py_FileSystemDefaultEncodeErrors = NULL;
34     }
35 }
36 
37 
38 /* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
39    global configuration variables to PyConfig.filesystem_encoding and
40    PyConfig.filesystem_errors (encoded to UTF-8).
41 
42    Function called by _PyUnicode_InitEncodings(). */
43 int
_Py_SetFileSystemEncoding(const char * encoding,const char * errors)44 _Py_SetFileSystemEncoding(const char *encoding, const char *errors)
45 {
46     char *encoding2 = _PyMem_RawStrdup(encoding);
47     if (encoding2 == NULL) {
48         return -1;
49     }
50 
51     char *errors2 = _PyMem_RawStrdup(errors);
52     if (errors2 == NULL) {
53         PyMem_RawFree(encoding2);
54         return -1;
55     }
56 
57     _Py_ClearFileSystemEncoding();
58 
59     Py_FileSystemDefaultEncoding = encoding2;
60     Py_HasFileSystemDefaultEncoding = 0;
61 
62     Py_FileSystemDefaultEncodeErrors = errors2;
63     _Py_HasFileSystemDefaultEncodeErrors = 0;
64     return 0;
65 }
66 
67 
68 /* --- _PyArgv ---------------------------------------------------- */
69 
70 /* Decode bytes_argv using Py_DecodeLocale() */
71 PyStatus
_PyArgv_AsWstrList(const _PyArgv * args,PyWideStringList * list)72 _PyArgv_AsWstrList(const _PyArgv *args, PyWideStringList *list)
73 {
74     PyWideStringList wargv = _PyWideStringList_INIT;
75     if (args->use_bytes_argv) {
76         size_t size = sizeof(wchar_t*) * args->argc;
77         wargv.items = (wchar_t **)PyMem_RawMalloc(size);
78         if (wargv.items == NULL) {
79             return _PyStatus_NO_MEMORY();
80         }
81 
82         for (Py_ssize_t i = 0; i < args->argc; i++) {
83             size_t len;
84             wchar_t *arg = Py_DecodeLocale(args->bytes_argv[i], &len);
85             if (arg == NULL) {
86                 _PyWideStringList_Clear(&wargv);
87                 return DECODE_LOCALE_ERR("command line arguments", len);
88             }
89             wargv.items[i] = arg;
90             wargv.length++;
91         }
92 
93         _PyWideStringList_Clear(list);
94         *list = wargv;
95     }
96     else {
97         wargv.length = args->argc;
98         wargv.items = (wchar_t **)args->wchar_argv;
99         if (_PyWideStringList_Copy(list, &wargv) < 0) {
100             return _PyStatus_NO_MEMORY();
101         }
102     }
103     return _PyStatus_OK();
104 }
105 
106 
107 /* --- _PyPreCmdline ------------------------------------------------- */
108 
109 void
_PyPreCmdline_Clear(_PyPreCmdline * cmdline)110 _PyPreCmdline_Clear(_PyPreCmdline *cmdline)
111 {
112     _PyWideStringList_Clear(&cmdline->argv);
113     _PyWideStringList_Clear(&cmdline->xoptions);
114 }
115 
116 
117 PyStatus
_PyPreCmdline_SetArgv(_PyPreCmdline * cmdline,const _PyArgv * args)118 _PyPreCmdline_SetArgv(_PyPreCmdline *cmdline, const _PyArgv *args)
119 {
120     return _PyArgv_AsWstrList(args, &cmdline->argv);
121 }
122 
123 
124 static void
precmdline_get_preconfig(_PyPreCmdline * cmdline,const PyPreConfig * config)125 precmdline_get_preconfig(_PyPreCmdline *cmdline, const PyPreConfig *config)
126 {
127 #define COPY_ATTR(ATTR) \
128     if (config->ATTR != -1) { \
129         cmdline->ATTR = config->ATTR; \
130     }
131 
132     COPY_ATTR(isolated);
133     COPY_ATTR(use_environment);
134     COPY_ATTR(dev_mode);
135 
136 #undef COPY_ATTR
137 }
138 
139 
140 static void
precmdline_set_preconfig(const _PyPreCmdline * cmdline,PyPreConfig * config)141 precmdline_set_preconfig(const _PyPreCmdline *cmdline, PyPreConfig *config)
142 {
143 #define COPY_ATTR(ATTR) \
144     config->ATTR = cmdline->ATTR
145 
146     COPY_ATTR(isolated);
147     COPY_ATTR(use_environment);
148     COPY_ATTR(dev_mode);
149 
150 #undef COPY_ATTR
151 }
152 
153 
154 PyStatus
_PyPreCmdline_SetConfig(const _PyPreCmdline * cmdline,PyConfig * config)155 _PyPreCmdline_SetConfig(const _PyPreCmdline *cmdline, PyConfig *config)
156 {
157 #define COPY_ATTR(ATTR) \
158     config->ATTR = cmdline->ATTR
159 
160     PyStatus status = _PyWideStringList_Extend(&config->xoptions, &cmdline->xoptions);
161     if (_PyStatus_EXCEPTION(status)) {
162         return status;
163     }
164 
165     COPY_ATTR(isolated);
166     COPY_ATTR(use_environment);
167     COPY_ATTR(dev_mode);
168     COPY_ATTR(warn_default_encoding);
169     return _PyStatus_OK();
170 
171 #undef COPY_ATTR
172 }
173 
174 
175 /* Parse the command line arguments */
176 static PyStatus
precmdline_parse_cmdline(_PyPreCmdline * cmdline)177 precmdline_parse_cmdline(_PyPreCmdline *cmdline)
178 {
179     const PyWideStringList *argv = &cmdline->argv;
180 
181     _PyOS_ResetGetOpt();
182     /* Don't log parsing errors into stderr here: PyConfig_Read()
183        is responsible for that */
184     _PyOS_opterr = 0;
185     do {
186         int longindex = -1;
187         int c = _PyOS_GetOpt(argv->length, argv->items, &longindex);
188 
189         if (c == EOF || c == 'c' || c == 'm') {
190             break;
191         }
192 
193         switch (c) {
194         case 'E':
195             cmdline->use_environment = 0;
196             break;
197 
198         case 'I':
199             cmdline->isolated = 1;
200             break;
201 
202         case 'X':
203         {
204             PyStatus status = PyWideStringList_Append(&cmdline->xoptions,
205                                                       _PyOS_optarg);
206             if (_PyStatus_EXCEPTION(status)) {
207                 return status;
208             }
209             break;
210         }
211 
212         default:
213             /* ignore other argument:
214                handled by PyConfig_Read() */
215             break;
216         }
217     } while (1);
218 
219     return _PyStatus_OK();
220 }
221 
222 
223 PyStatus
_PyPreCmdline_Read(_PyPreCmdline * cmdline,const PyPreConfig * preconfig)224 _PyPreCmdline_Read(_PyPreCmdline *cmdline, const PyPreConfig *preconfig)
225 {
226     precmdline_get_preconfig(cmdline, preconfig);
227 
228     if (preconfig->parse_argv) {
229         PyStatus status = precmdline_parse_cmdline(cmdline);
230         if (_PyStatus_EXCEPTION(status)) {
231             return status;
232         }
233     }
234 
235     /* isolated, use_environment */
236     if (cmdline->isolated < 0) {
237         cmdline->isolated = 0;
238     }
239     if (cmdline->isolated > 0) {
240         cmdline->use_environment = 0;
241     }
242     if (cmdline->use_environment < 0) {
243         cmdline->use_environment = 0;
244     }
245 
246     /* dev_mode */
247     if ((cmdline->dev_mode < 0)
248         && (_Py_get_xoption(&cmdline->xoptions, L"dev")
249             || _Py_GetEnv(cmdline->use_environment, "PYTHONDEVMODE")))
250     {
251         cmdline->dev_mode = 1;
252     }
253     if (cmdline->dev_mode < 0) {
254         cmdline->dev_mode = 0;
255     }
256 
257     // warn_default_encoding
258     if (_Py_get_xoption(&cmdline->xoptions, L"warn_default_encoding")
259             || _Py_GetEnv(cmdline->use_environment, "PYTHONWARNDEFAULTENCODING"))
260     {
261         cmdline->warn_default_encoding = 1;
262     }
263 
264     assert(cmdline->use_environment >= 0);
265     assert(cmdline->isolated >= 0);
266     assert(cmdline->dev_mode >= 0);
267     assert(cmdline->warn_default_encoding >= 0);
268 
269     return _PyStatus_OK();
270 }
271 
272 
273 /* --- PyPreConfig ----------------------------------------------- */
274 
275 
276 void
_PyPreConfig_InitCompatConfig(PyPreConfig * config)277 _PyPreConfig_InitCompatConfig(PyPreConfig *config)
278 {
279     memset(config, 0, sizeof(*config));
280 
281     config->_config_init = (int)_PyConfig_INIT_COMPAT;
282     config->parse_argv = 0;
283     config->isolated = -1;
284     config->use_environment = -1;
285     config->configure_locale = 1;
286 
287     /* bpo-36443: C locale coercion (PEP 538) and UTF-8 Mode (PEP 540)
288        are disabled by default using the Compat configuration.
289 
290        Py_UTF8Mode=1 enables the UTF-8 mode. PYTHONUTF8 environment variable
291        is ignored (even if use_environment=1). */
292     config->utf8_mode = 0;
293     config->coerce_c_locale = 0;
294     config->coerce_c_locale_warn = 0;
295 
296     config->dev_mode = -1;
297     config->allocator = PYMEM_ALLOCATOR_NOT_SET;
298 #ifdef MS_WINDOWS
299     config->legacy_windows_fs_encoding = -1;
300 #endif
301 }
302 
303 
304 void
PyPreConfig_InitPythonConfig(PyPreConfig * config)305 PyPreConfig_InitPythonConfig(PyPreConfig *config)
306 {
307     _PyPreConfig_InitCompatConfig(config);
308 
309     config->_config_init = (int)_PyConfig_INIT_PYTHON;
310     config->isolated = 0;
311     config->parse_argv = 1;
312     config->use_environment = 1;
313     /* Set to -1 to enable C locale coercion (PEP 538) and UTF-8 Mode (PEP 540)
314        depending on the LC_CTYPE locale, PYTHONUTF8 and PYTHONCOERCECLOCALE
315        environment variables. */
316     config->coerce_c_locale = -1;
317     config->coerce_c_locale_warn = -1;
318     config->utf8_mode = -1;
319 #ifdef MS_WINDOWS
320     config->legacy_windows_fs_encoding = 0;
321 #endif
322 }
323 
324 
325 void
PyPreConfig_InitIsolatedConfig(PyPreConfig * config)326 PyPreConfig_InitIsolatedConfig(PyPreConfig *config)
327 {
328     _PyPreConfig_InitCompatConfig(config);
329 
330     config->_config_init = (int)_PyConfig_INIT_ISOLATED;
331     config->configure_locale = 0;
332     config->isolated = 1;
333     config->use_environment = 0;
334     config->utf8_mode = 0;
335     config->dev_mode = 0;
336 #ifdef MS_WINDOWS
337     config->legacy_windows_fs_encoding = 0;
338 #endif
339 }
340 
341 
342 PyStatus
_PyPreConfig_InitFromPreConfig(PyPreConfig * config,const PyPreConfig * config2)343 _PyPreConfig_InitFromPreConfig(PyPreConfig *config,
344                                const PyPreConfig *config2)
345 {
346     PyPreConfig_InitPythonConfig(config);
347     preconfig_copy(config, config2);
348     return _PyStatus_OK();
349 }
350 
351 
352 void
_PyPreConfig_InitFromConfig(PyPreConfig * preconfig,const PyConfig * config)353 _PyPreConfig_InitFromConfig(PyPreConfig *preconfig, const PyConfig *config)
354 {
355     _PyConfigInitEnum config_init = (_PyConfigInitEnum)config->_config_init;
356     switch (config_init) {
357     case _PyConfig_INIT_PYTHON:
358         PyPreConfig_InitPythonConfig(preconfig);
359         break;
360     case _PyConfig_INIT_ISOLATED:
361         PyPreConfig_InitIsolatedConfig(preconfig);
362         break;
363     case _PyConfig_INIT_COMPAT:
364     default:
365         _PyPreConfig_InitCompatConfig(preconfig);
366     }
367 
368     _PyPreConfig_GetConfig(preconfig, config);
369 }
370 
371 
372 static void
preconfig_copy(PyPreConfig * config,const PyPreConfig * config2)373 preconfig_copy(PyPreConfig *config, const PyPreConfig *config2)
374 {
375 #define COPY_ATTR(ATTR) config->ATTR = config2->ATTR
376 
377     COPY_ATTR(_config_init);
378     COPY_ATTR(parse_argv);
379     COPY_ATTR(isolated);
380     COPY_ATTR(use_environment);
381     COPY_ATTR(configure_locale);
382     COPY_ATTR(dev_mode);
383     COPY_ATTR(coerce_c_locale);
384     COPY_ATTR(coerce_c_locale_warn);
385     COPY_ATTR(utf8_mode);
386     COPY_ATTR(allocator);
387 #ifdef MS_WINDOWS
388     COPY_ATTR(legacy_windows_fs_encoding);
389 #endif
390 
391 #undef COPY_ATTR
392 }
393 
394 
395 PyObject*
_PyPreConfig_AsDict(const PyPreConfig * config)396 _PyPreConfig_AsDict(const PyPreConfig *config)
397 {
398     PyObject *dict;
399 
400     dict = PyDict_New();
401     if (dict == NULL) {
402         return NULL;
403     }
404 
405 #define SET_ITEM_INT(ATTR) \
406         do { \
407             PyObject *obj = PyLong_FromLong(config->ATTR); \
408             if (obj == NULL) { \
409                 goto fail; \
410             } \
411             int res = PyDict_SetItemString(dict, #ATTR, obj); \
412             Py_DECREF(obj); \
413             if (res < 0) { \
414                 goto fail; \
415             } \
416         } while (0)
417 
418     SET_ITEM_INT(_config_init);
419     SET_ITEM_INT(parse_argv);
420     SET_ITEM_INT(isolated);
421     SET_ITEM_INT(use_environment);
422     SET_ITEM_INT(configure_locale);
423     SET_ITEM_INT(coerce_c_locale);
424     SET_ITEM_INT(coerce_c_locale_warn);
425     SET_ITEM_INT(utf8_mode);
426 #ifdef MS_WINDOWS
427     SET_ITEM_INT(legacy_windows_fs_encoding);
428 #endif
429     SET_ITEM_INT(dev_mode);
430     SET_ITEM_INT(allocator);
431     return dict;
432 
433 fail:
434     Py_DECREF(dict);
435     return NULL;
436 
437 #undef SET_ITEM_INT
438 }
439 
440 
441 void
_PyPreConfig_GetConfig(PyPreConfig * preconfig,const PyConfig * config)442 _PyPreConfig_GetConfig(PyPreConfig *preconfig, const PyConfig *config)
443 {
444 #define COPY_ATTR(ATTR) \
445     if (config->ATTR != -1) { \
446         preconfig->ATTR = config->ATTR; \
447     }
448 
449     COPY_ATTR(parse_argv);
450     COPY_ATTR(isolated);
451     COPY_ATTR(use_environment);
452     COPY_ATTR(dev_mode);
453 
454 #undef COPY_ATTR
455 }
456 
457 
458 static void
preconfig_get_global_vars(PyPreConfig * config)459 preconfig_get_global_vars(PyPreConfig *config)
460 {
461     if (config->_config_init != _PyConfig_INIT_COMPAT) {
462         /* Python and Isolated configuration ignore global variables */
463         return;
464     }
465 
466 #define COPY_FLAG(ATTR, VALUE) \
467     if (config->ATTR < 0) { \
468         config->ATTR = VALUE; \
469     }
470 #define COPY_NOT_FLAG(ATTR, VALUE) \
471     if (config->ATTR < 0) { \
472         config->ATTR = !(VALUE); \
473     }
474 
475     COPY_FLAG(isolated, Py_IsolatedFlag);
476     COPY_NOT_FLAG(use_environment, Py_IgnoreEnvironmentFlag);
477     if (Py_UTF8Mode > 0) {
478         config->utf8_mode = Py_UTF8Mode;
479     }
480 #ifdef MS_WINDOWS
481     COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag);
482 #endif
483 
484 #undef COPY_FLAG
485 #undef COPY_NOT_FLAG
486 }
487 
488 
489 static void
preconfig_set_global_vars(const PyPreConfig * config)490 preconfig_set_global_vars(const PyPreConfig *config)
491 {
492 #define COPY_FLAG(ATTR, VAR) \
493     if (config->ATTR >= 0) { \
494         VAR = config->ATTR; \
495     }
496 #define COPY_NOT_FLAG(ATTR, VAR) \
497     if (config->ATTR >= 0) { \
498         VAR = !config->ATTR; \
499     }
500 
501     COPY_FLAG(isolated, Py_IsolatedFlag);
502     COPY_NOT_FLAG(use_environment, Py_IgnoreEnvironmentFlag);
503 #ifdef MS_WINDOWS
504     COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag);
505 #endif
506     COPY_FLAG(utf8_mode, Py_UTF8Mode);
507 
508 #undef COPY_FLAG
509 #undef COPY_NOT_FLAG
510 }
511 
512 
513 const char*
_Py_GetEnv(int use_environment,const char * name)514 _Py_GetEnv(int use_environment, const char *name)
515 {
516     assert(use_environment >= 0);
517 
518     if (!use_environment) {
519         return NULL;
520     }
521 
522     const char *var = getenv(name);
523     if (var && var[0] != '\0') {
524         return var;
525     }
526     else {
527         return NULL;
528     }
529 }
530 
531 
532 int
_Py_str_to_int(const char * str,int * result)533 _Py_str_to_int(const char *str, int *result)
534 {
535     const char *endptr = str;
536     errno = 0;
537     long value = strtol(str, (char **)&endptr, 10);
538     if (*endptr != '\0' || errno == ERANGE) {
539         return -1;
540     }
541     if (value < INT_MIN || value > INT_MAX) {
542         return -1;
543     }
544 
545     *result = (int)value;
546     return 0;
547 }
548 
549 
550 void
_Py_get_env_flag(int use_environment,int * flag,const char * name)551 _Py_get_env_flag(int use_environment, int *flag, const char *name)
552 {
553     const char *var = _Py_GetEnv(use_environment, name);
554     if (!var) {
555         return;
556     }
557     int value;
558     if (_Py_str_to_int(var, &value) < 0 || value < 0) {
559         /* PYTHONDEBUG=text and PYTHONDEBUG=-2 behave as PYTHONDEBUG=1 */
560         value = 1;
561     }
562     if (*flag < value) {
563         *flag = value;
564     }
565 }
566 
567 
568 const wchar_t*
_Py_get_xoption(const PyWideStringList * xoptions,const wchar_t * name)569 _Py_get_xoption(const PyWideStringList *xoptions, const wchar_t *name)
570 {
571     for (Py_ssize_t i=0; i < xoptions->length; i++) {
572         const wchar_t *option = xoptions->items[i];
573         size_t len;
574         wchar_t *sep = wcschr(option, L'=');
575         if (sep != NULL) {
576             len = (sep - option);
577         }
578         else {
579             len = wcslen(option);
580         }
581         if (wcsncmp(option, name, len) == 0 && name[len] == L'\0') {
582             return option;
583         }
584     }
585     return NULL;
586 }
587 
588 
589 static PyStatus
preconfig_init_utf8_mode(PyPreConfig * config,const _PyPreCmdline * cmdline)590 preconfig_init_utf8_mode(PyPreConfig *config, const _PyPreCmdline *cmdline)
591 {
592 #ifdef MS_WINDOWS
593     if (config->legacy_windows_fs_encoding) {
594         config->utf8_mode = 0;
595     }
596 #endif
597 
598     if (config->utf8_mode >= 0) {
599         return _PyStatus_OK();
600     }
601 
602     const wchar_t *xopt;
603     xopt = _Py_get_xoption(&cmdline->xoptions, L"utf8");
604     if (xopt) {
605         wchar_t *sep = wcschr(xopt, L'=');
606         if (sep) {
607             xopt = sep + 1;
608             if (wcscmp(xopt, L"1") == 0) {
609                 config->utf8_mode = 1;
610             }
611             else if (wcscmp(xopt, L"0") == 0) {
612                 config->utf8_mode = 0;
613             }
614             else {
615                 return _PyStatus_ERR("invalid -X utf8 option value");
616             }
617         }
618         else {
619             config->utf8_mode = 1;
620         }
621         return _PyStatus_OK();
622     }
623 
624     const char *opt = _Py_GetEnv(config->use_environment, "PYTHONUTF8");
625     if (opt) {
626         if (strcmp(opt, "1") == 0) {
627             config->utf8_mode = 1;
628         }
629         else if (strcmp(opt, "0") == 0) {
630             config->utf8_mode = 0;
631         }
632         else {
633             return _PyStatus_ERR("invalid PYTHONUTF8 environment "
634                                 "variable value");
635         }
636         return _PyStatus_OK();
637     }
638 
639 
640 #ifndef MS_WINDOWS
641     if (config->utf8_mode < 0) {
642         /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
643         const char *ctype_loc = setlocale(LC_CTYPE, NULL);
644         if (ctype_loc != NULL
645            && (strcmp(ctype_loc, "C") == 0
646                || strcmp(ctype_loc, "POSIX") == 0))
647         {
648             config->utf8_mode = 1;
649         }
650     }
651 #endif
652 
653     if (config->utf8_mode < 0) {
654         config->utf8_mode = 0;
655     }
656     return _PyStatus_OK();
657 }
658 
659 
660 static void
preconfig_init_coerce_c_locale(PyPreConfig * config)661 preconfig_init_coerce_c_locale(PyPreConfig *config)
662 {
663     if (!config->configure_locale) {
664         config->coerce_c_locale = 0;
665         config->coerce_c_locale_warn = 0;
666         return;
667     }
668 
669     const char *env = _Py_GetEnv(config->use_environment, "PYTHONCOERCECLOCALE");
670     if (env) {
671         if (strcmp(env, "0") == 0) {
672             if (config->coerce_c_locale < 0) {
673                 config->coerce_c_locale = 0;
674             }
675         }
676         else if (strcmp(env, "warn") == 0) {
677             if (config->coerce_c_locale_warn < 0) {
678                 config->coerce_c_locale_warn = 1;
679             }
680         }
681         else {
682             if (config->coerce_c_locale < 0) {
683                 config->coerce_c_locale = 1;
684             }
685         }
686     }
687 
688     /* Test if coerce_c_locale equals to -1 or equals to 1:
689        PYTHONCOERCECLOCALE=1 doesn't imply that the C locale is always coerced.
690        It is only coerced if if the LC_CTYPE locale is "C". */
691     if (config->coerce_c_locale < 0 || config->coerce_c_locale == 1) {
692         /* The C locale enables the C locale coercion (PEP 538) */
693         if (_Py_LegacyLocaleDetected(0)) {
694             config->coerce_c_locale = 2;
695         }
696         else {
697             config->coerce_c_locale = 0;
698         }
699     }
700 
701     if (config->coerce_c_locale_warn < 0) {
702         config->coerce_c_locale_warn = 0;
703     }
704 }
705 
706 
707 static PyStatus
preconfig_init_allocator(PyPreConfig * config)708 preconfig_init_allocator(PyPreConfig *config)
709 {
710     if (config->allocator == PYMEM_ALLOCATOR_NOT_SET) {
711         /* bpo-34247. The PYTHONMALLOC environment variable has the priority
712            over PYTHONDEV env var and "-X dev" command line option.
713            For example, PYTHONMALLOC=malloc PYTHONDEVMODE=1 sets the memory
714            allocators to "malloc" (and not to "debug"). */
715         const char *envvar = _Py_GetEnv(config->use_environment, "PYTHONMALLOC");
716         if (envvar) {
717             PyMemAllocatorName name;
718             if (_PyMem_GetAllocatorName(envvar, &name) < 0) {
719                 return _PyStatus_ERR("PYTHONMALLOC: unknown allocator");
720             }
721             config->allocator = (int)name;
722         }
723     }
724 
725     if (config->dev_mode && config->allocator == PYMEM_ALLOCATOR_NOT_SET) {
726         config->allocator = PYMEM_ALLOCATOR_DEBUG;
727     }
728     return _PyStatus_OK();
729 }
730 
731 
732 static PyStatus
preconfig_read(PyPreConfig * config,_PyPreCmdline * cmdline)733 preconfig_read(PyPreConfig *config, _PyPreCmdline *cmdline)
734 {
735     PyStatus status;
736 
737     status = _PyPreCmdline_Read(cmdline, config);
738     if (_PyStatus_EXCEPTION(status)) {
739         return status;
740     }
741 
742     precmdline_set_preconfig(cmdline, config);
743 
744     /* legacy_windows_fs_encoding, coerce_c_locale, utf8_mode */
745 #ifdef MS_WINDOWS
746     _Py_get_env_flag(config->use_environment,
747                      &config->legacy_windows_fs_encoding,
748                      "PYTHONLEGACYWINDOWSFSENCODING");
749 #endif
750 
751     preconfig_init_coerce_c_locale(config);
752 
753     status = preconfig_init_utf8_mode(config, cmdline);
754     if (_PyStatus_EXCEPTION(status)) {
755         return status;
756     }
757 
758     /* allocator */
759     status = preconfig_init_allocator(config);
760     if (_PyStatus_EXCEPTION(status)) {
761         return status;
762     }
763 
764     assert(config->coerce_c_locale >= 0);
765     assert(config->coerce_c_locale_warn >= 0);
766 #ifdef MS_WINDOWS
767     assert(config->legacy_windows_fs_encoding >= 0);
768 #endif
769     assert(config->utf8_mode >= 0);
770     assert(config->isolated >= 0);
771     assert(config->use_environment >= 0);
772     assert(config->dev_mode >= 0);
773 
774     return _PyStatus_OK();
775 }
776 
777 
778 /* Read the configuration from:
779 
780    - command line arguments
781    - environment variables
782    - Py_xxx global configuration variables
783    - the LC_CTYPE locale */
784 PyStatus
_PyPreConfig_Read(PyPreConfig * config,const _PyArgv * args)785 _PyPreConfig_Read(PyPreConfig *config, const _PyArgv *args)
786 {
787     PyStatus status;
788 
789     status = _PyRuntime_Initialize();
790     if (_PyStatus_EXCEPTION(status)) {
791         return status;
792     }
793 
794     preconfig_get_global_vars(config);
795 
796     /* Copy LC_CTYPE locale, since it's modified later */
797     const char *loc = setlocale(LC_CTYPE, NULL);
798     if (loc == NULL) {
799         return _PyStatus_ERR("failed to LC_CTYPE locale");
800     }
801     char *init_ctype_locale = _PyMem_RawStrdup(loc);
802     if (init_ctype_locale == NULL) {
803         return _PyStatus_NO_MEMORY();
804     }
805 
806     /* Save the config to be able to restore it if encodings change */
807     PyPreConfig save_config;
808 
809     status = _PyPreConfig_InitFromPreConfig(&save_config, config);
810     if (_PyStatus_EXCEPTION(status)) {
811         return status;
812     }
813 
814     /* Set LC_CTYPE to the user preferred locale */
815     if (config->configure_locale) {
816         _Py_SetLocaleFromEnv(LC_CTYPE);
817     }
818 
819     _PyPreCmdline cmdline = _PyPreCmdline_INIT;
820     int init_utf8_mode = Py_UTF8Mode;
821 #ifdef MS_WINDOWS
822     int init_legacy_encoding = Py_LegacyWindowsFSEncodingFlag;
823 #endif
824 
825     int locale_coerced = 0;
826     int loops = 0;
827 
828     while (1) {
829         int utf8_mode = config->utf8_mode;
830 
831         /* Watchdog to prevent an infinite loop */
832         loops++;
833         if (loops == 3) {
834             status = _PyStatus_ERR("Encoding changed twice while "
835                                    "reading the configuration");
836             goto done;
837         }
838 
839         /* bpo-34207: Py_DecodeLocale() and Py_EncodeLocale() depend
840            on Py_UTF8Mode and Py_LegacyWindowsFSEncodingFlag. */
841         Py_UTF8Mode = config->utf8_mode;
842 #ifdef MS_WINDOWS
843         Py_LegacyWindowsFSEncodingFlag = config->legacy_windows_fs_encoding;
844 #endif
845 
846         if (args) {
847             // Set command line arguments at each iteration. If they are bytes
848             // strings, they are decoded from the new encoding.
849             status = _PyPreCmdline_SetArgv(&cmdline, args);
850             if (_PyStatus_EXCEPTION(status)) {
851                 goto done;
852             }
853         }
854 
855         status = preconfig_read(config, &cmdline);
856         if (_PyStatus_EXCEPTION(status)) {
857             goto done;
858         }
859 
860         /* The legacy C locale assumes ASCII as the default text encoding, which
861          * causes problems not only for the CPython runtime, but also other
862          * components like GNU readline.
863          *
864          * Accordingly, when the CLI detects it, it attempts to coerce it to a
865          * more capable UTF-8 based alternative.
866          *
867          * See the documentation of the PYTHONCOERCECLOCALE setting for more
868          * details.
869          */
870         int encoding_changed = 0;
871         if (config->coerce_c_locale && !locale_coerced) {
872             locale_coerced = 1;
873             _Py_CoerceLegacyLocale(0);
874             encoding_changed = 1;
875         }
876 
877         if (utf8_mode == -1) {
878             if (config->utf8_mode == 1) {
879                 /* UTF-8 Mode enabled */
880                 encoding_changed = 1;
881             }
882         }
883         else {
884             if (config->utf8_mode != utf8_mode) {
885                 encoding_changed = 1;
886             }
887         }
888 
889         if (!encoding_changed) {
890             break;
891         }
892 
893         /* Reset the configuration before reading again the configuration,
894            just keep UTF-8 Mode and coerce C locale value. */
895         int new_utf8_mode = config->utf8_mode;
896         int new_coerce_c_locale = config->coerce_c_locale;
897         preconfig_copy(config, &save_config);
898         config->utf8_mode = new_utf8_mode;
899         config->coerce_c_locale = new_coerce_c_locale;
900 
901         /* The encoding changed: read again the configuration
902            with the new encoding */
903     }
904     status = _PyStatus_OK();
905 
906 done:
907     if (init_ctype_locale != NULL) {
908         setlocale(LC_CTYPE, init_ctype_locale);
909         PyMem_RawFree(init_ctype_locale);
910     }
911     Py_UTF8Mode = init_utf8_mode ;
912 #ifdef MS_WINDOWS
913     Py_LegacyWindowsFSEncodingFlag = init_legacy_encoding;
914 #endif
915     _PyPreCmdline_Clear(&cmdline);
916     return status;
917 }
918 
919 
920 /* Write the pre-configuration:
921 
922    - set the memory allocators
923    - set Py_xxx global configuration variables
924    - set the LC_CTYPE locale (coerce C locale, PEP 538) and set the UTF-8 mode
925      (PEP 540)
926 
927    The applied configuration is written into _PyRuntime.preconfig.
928    If the C locale cannot be coerced, set coerce_c_locale to 0.
929 
930    Do nothing if called after Py_Initialize(): ignore the new
931    pre-configuration. */
932 PyStatus
_PyPreConfig_Write(const PyPreConfig * src_config)933 _PyPreConfig_Write(const PyPreConfig *src_config)
934 {
935     PyPreConfig config;
936 
937     PyStatus status = _PyPreConfig_InitFromPreConfig(&config, src_config);
938     if (_PyStatus_EXCEPTION(status)) {
939         return status;
940     }
941 
942     if (_PyRuntime.core_initialized) {
943         /* bpo-34008: Calling this functions after Py_Initialize() ignores
944            the new configuration. */
945         return _PyStatus_OK();
946     }
947 
948     PyMemAllocatorName name = (PyMemAllocatorName)config.allocator;
949     if (name != PYMEM_ALLOCATOR_NOT_SET) {
950         if (_PyMem_SetupAllocators(name) < 0) {
951             return _PyStatus_ERR("Unknown PYTHONMALLOC allocator");
952         }
953     }
954 
955     preconfig_set_global_vars(&config);
956 
957     if (config.configure_locale) {
958         if (config.coerce_c_locale) {
959             if (!_Py_CoerceLegacyLocale(config.coerce_c_locale_warn)) {
960                 /* C locale not coerced */
961                 config.coerce_c_locale = 0;
962             }
963         }
964 
965         /* Set LC_CTYPE to the user preferred locale */
966         _Py_SetLocaleFromEnv(LC_CTYPE);
967     }
968 
969     /* Write the new pre-configuration into _PyRuntime */
970     preconfig_copy(&_PyRuntime.preconfig, &config);
971 
972     return _PyStatus_OK();
973 }
974