1"""Locale support module.
2
3The module provides low-level access to the C lib's locale APIs and adds high
4level number formatting APIs as well as a locale aliasing engine to complement
5these.
6
7The aliasing engine includes support for many commonly used locale names and
8maps them to values suitable for passing to the C lib's setlocale() function. It
9also includes default encodings for all supported locale names.
10
11"""
12
13import sys
14import encodings
15import encodings.aliases
16import re
17import _collections_abc
18from builtins import str as _builtin_str
19import functools
20
21# Try importing the _locale module.
22#
23# If this fails, fall back on a basic 'C' locale emulation.
24
25# Yuck:  LC_MESSAGES is non-standard:  can't tell whether it exists before
26# trying the import.  So __all__ is also fiddled at the end of the file.
27__all__ = ["getlocale", "getdefaultlocale", "getpreferredencoding", "Error",
28           "setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm",
29           "str", "atof", "atoi", "format", "format_string", "currency",
30           "normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY",
31           "LC_NUMERIC", "LC_ALL", "CHAR_MAX", "getencoding"]
32
33def _strcoll(a,b):
34    """ strcoll(string,string) -> int.
35        Compares two strings according to the locale.
36    """
37    return (a > b) - (a < b)
38
39def _strxfrm(s):
40    """ strxfrm(string) -> string.
41        Returns a string that behaves for cmp locale-aware.
42    """
43    return s
44
45try:
46
47    from _locale import *
48
49except ImportError:
50
51    # Locale emulation
52
53    CHAR_MAX = 127
54    LC_ALL = 6
55    LC_COLLATE = 3
56    LC_CTYPE = 0
57    LC_MESSAGES = 5
58    LC_MONETARY = 4
59    LC_NUMERIC = 1
60    LC_TIME = 2
61    Error = ValueError
62
63    def localeconv():
64        """ localeconv() -> dict.
65            Returns numeric and monetary locale-specific parameters.
66        """
67        # 'C' locale default values
68        return {'grouping': [127],
69                'currency_symbol': '',
70                'n_sign_posn': 127,
71                'p_cs_precedes': 127,
72                'n_cs_precedes': 127,
73                'mon_grouping': [],
74                'n_sep_by_space': 127,
75                'decimal_point': '.',
76                'negative_sign': '',
77                'positive_sign': '',
78                'p_sep_by_space': 127,
79                'int_curr_symbol': '',
80                'p_sign_posn': 127,
81                'thousands_sep': '',
82                'mon_thousands_sep': '',
83                'frac_digits': 127,
84                'mon_decimal_point': '',
85                'int_frac_digits': 127}
86
87    def setlocale(category, value=None):
88        """ setlocale(integer,string=None) -> string.
89            Activates/queries locale processing.
90        """
91        if value not in (None, '', 'C'):
92            raise Error('_locale emulation only supports "C" locale')
93        return 'C'
94
95# These may or may not exist in _locale, so be sure to set them.
96if 'strxfrm' not in globals():
97    strxfrm = _strxfrm
98if 'strcoll' not in globals():
99    strcoll = _strcoll
100
101
102_localeconv = localeconv
103
104# With this dict, you can override some items of localeconv's return value.
105# This is useful for testing purposes.
106_override_localeconv = {}
107
108@functools.wraps(_localeconv)
109def localeconv():
110    d = _localeconv()
111    if _override_localeconv:
112        d.update(_override_localeconv)
113    return d
114
115
116### Number formatting APIs
117
118# Author: Martin von Loewis
119# improved by Georg Brandl
120
121# Iterate over grouping intervals
122def _grouping_intervals(grouping):
123    last_interval = None
124    for interval in grouping:
125        # if grouping is -1, we are done
126        if interval == CHAR_MAX:
127            return
128        # 0: re-use last group ad infinitum
129        if interval == 0:
130            if last_interval is None:
131                raise ValueError("invalid grouping")
132            while True:
133                yield last_interval
134        yield interval
135        last_interval = interval
136
137#perform the grouping from right to left
138def _group(s, monetary=False):
139    conv = localeconv()
140    thousands_sep = conv[monetary and 'mon_thousands_sep' or 'thousands_sep']
141    grouping = conv[monetary and 'mon_grouping' or 'grouping']
142    if not grouping:
143        return (s, 0)
144    if s[-1] == ' ':
145        stripped = s.rstrip()
146        right_spaces = s[len(stripped):]
147        s = stripped
148    else:
149        right_spaces = ''
150    left_spaces = ''
151    groups = []
152    for interval in _grouping_intervals(grouping):
153        if not s or s[-1] not in "0123456789":
154            # only non-digit characters remain (sign, spaces)
155            left_spaces = s
156            s = ''
157            break
158        groups.append(s[-interval:])
159        s = s[:-interval]
160    if s:
161        groups.append(s)
162    groups.reverse()
163    return (
164        left_spaces + thousands_sep.join(groups) + right_spaces,
165        len(thousands_sep) * (len(groups) - 1)
166    )
167
168# Strip a given amount of excess padding from the given string
169def _strip_padding(s, amount):
170    lpos = 0
171    while amount and s[lpos] == ' ':
172        lpos += 1
173        amount -= 1
174    rpos = len(s) - 1
175    while amount and s[rpos] == ' ':
176        rpos -= 1
177        amount -= 1
178    return s[lpos:rpos+1]
179
180_percent_re = re.compile(r'%(?:\((?P<key>.*?)\))?'
181                         r'(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]')
182
183def _format(percent, value, grouping=False, monetary=False, *additional):
184    if additional:
185        formatted = percent % ((value,) + additional)
186    else:
187        formatted = percent % value
188    if percent[-1] in 'eEfFgGdiu':
189        formatted = _localize(formatted, grouping, monetary)
190    return formatted
191
192# Transform formatted as locale number according to the locale settings
193def _localize(formatted, grouping=False, monetary=False):
194    # floats and decimal ints need special action!
195    if '.' in formatted:
196        seps = 0
197        parts = formatted.split('.')
198        if grouping:
199            parts[0], seps = _group(parts[0], monetary=monetary)
200        decimal_point = localeconv()[monetary and 'mon_decimal_point'
201                                              or 'decimal_point']
202        formatted = decimal_point.join(parts)
203        if seps:
204            formatted = _strip_padding(formatted, seps)
205    else:
206        seps = 0
207        if grouping:
208            formatted, seps = _group(formatted, monetary=monetary)
209        if seps:
210            formatted = _strip_padding(formatted, seps)
211    return formatted
212
213def format_string(f, val, grouping=False, monetary=False):
214    """Formats a string in the same way that the % formatting would use,
215    but takes the current locale into account.
216
217    Grouping is applied if the third parameter is true.
218    Conversion uses monetary thousands separator and grouping strings if
219    forth parameter monetary is true."""
220    percents = list(_percent_re.finditer(f))
221    new_f = _percent_re.sub('%s', f)
222
223    if isinstance(val, _collections_abc.Mapping):
224        new_val = []
225        for perc in percents:
226            if perc.group()[-1]=='%':
227                new_val.append('%')
228            else:
229                new_val.append(_format(perc.group(), val, grouping, monetary))
230    else:
231        if not isinstance(val, tuple):
232            val = (val,)
233        new_val = []
234        i = 0
235        for perc in percents:
236            if perc.group()[-1]=='%':
237                new_val.append('%')
238            else:
239                starcount = perc.group('modifiers').count('*')
240                new_val.append(_format(perc.group(),
241                                      val[i],
242                                      grouping,
243                                      monetary,
244                                      *val[i+1:i+1+starcount]))
245                i += (1 + starcount)
246    val = tuple(new_val)
247
248    return new_f % val
249
250def format(percent, value, grouping=False, monetary=False, *additional):
251    """Deprecated, use format_string instead."""
252    import warnings
253    warnings.warn(
254        "This method will be removed in a future version of Python. "
255        "Use 'locale.format_string()' instead.",
256        DeprecationWarning, stacklevel=2
257    )
258
259    match = _percent_re.match(percent)
260    if not match or len(match.group())!= len(percent):
261        raise ValueError(("format() must be given exactly one %%char "
262                         "format specifier, %s not valid") % repr(percent))
263    return _format(percent, value, grouping, monetary, *additional)
264
265def currency(val, symbol=True, grouping=False, international=False):
266    """Formats val according to the currency settings
267    in the current locale."""
268    conv = localeconv()
269
270    # check for illegal values
271    digits = conv[international and 'int_frac_digits' or 'frac_digits']
272    if digits == 127:
273        raise ValueError("Currency formatting is not possible using "
274                         "the 'C' locale.")
275
276    s = _localize(f'{abs(val):.{digits}f}', grouping, monetary=True)
277    # '<' and '>' are markers if the sign must be inserted between symbol and value
278    s = '<' + s + '>'
279
280    if symbol:
281        smb = conv[international and 'int_curr_symbol' or 'currency_symbol']
282        precedes = conv[val<0 and 'n_cs_precedes' or 'p_cs_precedes']
283        separated = conv[val<0 and 'n_sep_by_space' or 'p_sep_by_space']
284
285        if precedes:
286            s = smb + (separated and ' ' or '') + s
287        else:
288            if international and smb[-1] == ' ':
289                smb = smb[:-1]
290            s = s + (separated and ' ' or '') + smb
291
292    sign_pos = conv[val<0 and 'n_sign_posn' or 'p_sign_posn']
293    sign = conv[val<0 and 'negative_sign' or 'positive_sign']
294
295    if sign_pos == 0:
296        s = '(' + s + ')'
297    elif sign_pos == 1:
298        s = sign + s
299    elif sign_pos == 2:
300        s = s + sign
301    elif sign_pos == 3:
302        s = s.replace('<', sign)
303    elif sign_pos == 4:
304        s = s.replace('>', sign)
305    else:
306        # the default if nothing specified;
307        # this should be the most fitting sign position
308        s = sign + s
309
310    return s.replace('<', '').replace('>', '')
311
312def str(val):
313    """Convert float to string, taking the locale into account."""
314    return _format("%.12g", val)
315
316def delocalize(string):
317    "Parses a string as a normalized number according to the locale settings."
318
319    conv = localeconv()
320
321    #First, get rid of the grouping
322    ts = conv['thousands_sep']
323    if ts:
324        string = string.replace(ts, '')
325
326    #next, replace the decimal point with a dot
327    dd = conv['decimal_point']
328    if dd:
329        string = string.replace(dd, '.')
330    return string
331
332def localize(string, grouping=False, monetary=False):
333    """Parses a string as locale number according to the locale settings."""
334    return _localize(string, grouping, monetary)
335
336def atof(string, func=float):
337    "Parses a string as a float according to the locale settings."
338    return func(delocalize(string))
339
340def atoi(string):
341    "Converts a string to an integer according to the locale settings."
342    return int(delocalize(string))
343
344def _test():
345    setlocale(LC_ALL, "")
346    #do grouping
347    s1 = format_string("%d", 123456789,1)
348    print(s1, "is", atoi(s1))
349    #standard formatting
350    s1 = str(3.14)
351    print(s1, "is", atof(s1))
352
353### Locale name aliasing engine
354
355# Author: Marc-Andre Lemburg, [email protected]
356# Various tweaks by Fredrik Lundh <[email protected]>
357
358# store away the low-level version of setlocale (it's
359# overridden below)
360_setlocale = setlocale
361
362def _replace_encoding(code, encoding):
363    if '.' in code:
364        langname = code[:code.index('.')]
365    else:
366        langname = code
367    # Convert the encoding to a C lib compatible encoding string
368    norm_encoding = encodings.normalize_encoding(encoding)
369    #print('norm encoding: %r' % norm_encoding)
370    norm_encoding = encodings.aliases.aliases.get(norm_encoding.lower(),
371                                                  norm_encoding)
372    #print('aliased encoding: %r' % norm_encoding)
373    encoding = norm_encoding
374    norm_encoding = norm_encoding.lower()
375    if norm_encoding in locale_encoding_alias:
376        encoding = locale_encoding_alias[norm_encoding]
377    else:
378        norm_encoding = norm_encoding.replace('_', '')
379        norm_encoding = norm_encoding.replace('-', '')
380        if norm_encoding in locale_encoding_alias:
381            encoding = locale_encoding_alias[norm_encoding]
382    #print('found encoding %r' % encoding)
383    return langname + '.' + encoding
384
385def _append_modifier(code, modifier):
386    if modifier == 'euro':
387        if '.' not in code:
388            return code + '.ISO8859-15'
389        _, _, encoding = code.partition('.')
390        if encoding in ('ISO8859-15', 'UTF-8'):
391            return code
392        if encoding == 'ISO8859-1':
393            return _replace_encoding(code, 'ISO8859-15')
394    return code + '@' + modifier
395
396def normalize(localename):
397
398    """ Returns a normalized locale code for the given locale
399        name.
400
401        The returned locale code is formatted for use with
402        setlocale().
403
404        If normalization fails, the original name is returned
405        unchanged.
406
407        If the given encoding is not known, the function defaults to
408        the default encoding for the locale code just like setlocale()
409        does.
410
411    """
412    # Normalize the locale name and extract the encoding and modifier
413    code = localename.lower()
414    if ':' in code:
415        # ':' is sometimes used as encoding delimiter.
416        code = code.replace(':', '.')
417    if '@' in code:
418        code, modifier = code.split('@', 1)
419    else:
420        modifier = ''
421    if '.' in code:
422        langname, encoding = code.split('.')[:2]
423    else:
424        langname = code
425        encoding = ''
426
427    # First lookup: fullname (possibly with encoding and modifier)
428    lang_enc = langname
429    if encoding:
430        norm_encoding = encoding.replace('-', '')
431        norm_encoding = norm_encoding.replace('_', '')
432        lang_enc += '.' + norm_encoding
433    lookup_name = lang_enc
434    if modifier:
435        lookup_name += '@' + modifier
436    code = locale_alias.get(lookup_name, None)
437    if code is not None:
438        return code
439    #print('first lookup failed')
440
441    if modifier:
442        # Second try: fullname without modifier (possibly with encoding)
443        code = locale_alias.get(lang_enc, None)
444        if code is not None:
445            #print('lookup without modifier succeeded')
446            if '@' not in code:
447                return _append_modifier(code, modifier)
448            if code.split('@', 1)[1].lower() == modifier:
449                return code
450        #print('second lookup failed')
451
452    if encoding:
453        # Third try: langname (without encoding, possibly with modifier)
454        lookup_name = langname
455        if modifier:
456            lookup_name += '@' + modifier
457        code = locale_alias.get(lookup_name, None)
458        if code is not None:
459            #print('lookup without encoding succeeded')
460            if '@' not in code:
461                return _replace_encoding(code, encoding)
462            code, modifier = code.split('@', 1)
463            return _replace_encoding(code, encoding) + '@' + modifier
464
465        if modifier:
466            # Fourth try: langname (without encoding and modifier)
467            code = locale_alias.get(langname, None)
468            if code is not None:
469                #print('lookup without modifier and encoding succeeded')
470                if '@' not in code:
471                    code = _replace_encoding(code, encoding)
472                    return _append_modifier(code, modifier)
473                code, defmod = code.split('@', 1)
474                if defmod.lower() == modifier:
475                    return _replace_encoding(code, encoding) + '@' + defmod
476
477    return localename
478
479def _parse_localename(localename):
480
481    """ Parses the locale code for localename and returns the
482        result as tuple (language code, encoding).
483
484        The localename is normalized and passed through the locale
485        alias engine. A ValueError is raised in case the locale name
486        cannot be parsed.
487
488        The language code corresponds to RFC 1766.  code and encoding
489        can be None in case the values cannot be determined or are
490        unknown to this implementation.
491
492    """
493    code = normalize(localename)
494    if '@' in code:
495        # Deal with locale modifiers
496        code, modifier = code.split('@', 1)
497        if modifier == 'euro' and '.' not in code:
498            # Assume Latin-9 for @euro locales. This is bogus,
499            # since some systems may use other encodings for these
500            # locales. Also, we ignore other modifiers.
501            return code, 'iso-8859-15'
502
503    if '.' in code:
504        return tuple(code.split('.')[:2])
505    elif code == 'C':
506        return None, None
507    elif code == 'UTF-8':
508        # On macOS "LC_CTYPE=UTF-8" is a valid locale setting
509        # for getting UTF-8 handling for text.
510        return None, 'UTF-8'
511    raise ValueError('unknown locale: %s' % localename)
512
513def _build_localename(localetuple):
514
515    """ Builds a locale code from the given tuple (language code,
516        encoding).
517
518        No aliasing or normalizing takes place.
519
520    """
521    try:
522        language, encoding = localetuple
523
524        if language is None:
525            language = 'C'
526        if encoding is None:
527            return language
528        else:
529            return language + '.' + encoding
530    except (TypeError, ValueError):
531        raise TypeError('Locale must be None, a string, or an iterable of '
532                        'two strings -- language code, encoding.') from None
533
534def getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')):
535
536    """ Tries to determine the default locale settings and returns
537        them as tuple (language code, encoding).
538
539        According to POSIX, a program which has not called
540        setlocale(LC_ALL, "") runs using the portable 'C' locale.
541        Calling setlocale(LC_ALL, "") lets it use the default locale as
542        defined by the LANG variable. Since we don't want to interfere
543        with the current locale setting we thus emulate the behavior
544        in the way described above.
545
546        To maintain compatibility with other platforms, not only the
547        LANG variable is tested, but a list of variables given as
548        envvars parameter. The first found to be defined will be
549        used. envvars defaults to the search path used in GNU gettext;
550        it must always contain the variable name 'LANG'.
551
552        Except for the code 'C', the language code corresponds to RFC
553        1766.  code and encoding can be None in case the values cannot
554        be determined.
555
556    """
557
558    import warnings
559    warnings.warn(
560        "Use setlocale(), getencoding() and getlocale() instead",
561        DeprecationWarning, stacklevel=2
562    )
563
564    try:
565        # check if it's supported by the _locale module
566        import _locale
567        code, encoding = _locale._getdefaultlocale()
568    except (ImportError, AttributeError):
569        pass
570    else:
571        # make sure the code/encoding values are valid
572        if sys.platform == "win32" and code and code[:2] == "0x":
573            # map windows language identifier to language name
574            code = windows_locale.get(int(code, 0))
575        # ...add other platform-specific processing here, if
576        # necessary...
577        return code, encoding
578
579    # fall back on POSIX behaviour
580    import os
581    lookup = os.environ.get
582    for variable in envvars:
583        localename = lookup(variable,None)
584        if localename:
585            if variable == 'LANGUAGE':
586                localename = localename.split(':')[0]
587            break
588    else:
589        localename = 'C'
590    return _parse_localename(localename)
591
592
593def getlocale(category=LC_CTYPE):
594
595    """ Returns the current setting for the given locale category as
596        tuple (language code, encoding).
597
598        category may be one of the LC_* value except LC_ALL. It
599        defaults to LC_CTYPE.
600
601        Except for the code 'C', the language code corresponds to RFC
602        1766.  code and encoding can be None in case the values cannot
603        be determined.
604
605    """
606    localename = _setlocale(category)
607    if category == LC_ALL and ';' in localename:
608        raise TypeError('category LC_ALL is not supported')
609    return _parse_localename(localename)
610
611def setlocale(category, locale=None):
612
613    """ Set the locale for the given category.  The locale can be
614        a string, an iterable of two strings (language code and encoding),
615        or None.
616
617        Iterables are converted to strings using the locale aliasing
618        engine.  Locale strings are passed directly to the C lib.
619
620        category may be given as one of the LC_* values.
621
622    """
623    if locale and not isinstance(locale, _builtin_str):
624        # convert to string
625        locale = normalize(_build_localename(locale))
626    return _setlocale(category, locale)
627
628def resetlocale(category=LC_ALL):
629
630    """ Sets the locale for category to the default setting.
631
632        The default setting is determined by calling
633        getdefaultlocale(). category defaults to LC_ALL.
634
635    """
636    import warnings
637    warnings.warn(
638        'Use locale.setlocale(locale.LC_ALL, "") instead',
639        DeprecationWarning, stacklevel=2
640    )
641
642    with warnings.catch_warnings():
643        warnings.simplefilter('ignore', category=DeprecationWarning)
644        loc = getdefaultlocale()
645
646    _setlocale(category, _build_localename(loc))
647
648
649try:
650    from _locale import getencoding
651except ImportError:
652    def getencoding():
653        if hasattr(sys, 'getandroidapilevel'):
654            # On Android langinfo.h and CODESET are missing, and UTF-8 is
655            # always used in mbstowcs() and wcstombs().
656            return 'utf-8'
657        encoding = getdefaultlocale()[1]
658        if encoding is None:
659            # LANG not set, default to UTF-8
660            encoding = 'utf-8'
661        return encoding
662
663try:
664    CODESET
665except NameError:
666    def getpreferredencoding(do_setlocale=True):
667        """Return the charset that the user is likely using."""
668        if sys.flags.warn_default_encoding:
669            import warnings
670            warnings.warn(
671                "UTF-8 Mode affects locale.getpreferredencoding(). Consider locale.getencoding() instead.",
672                EncodingWarning, 2)
673        if sys.flags.utf8_mode:
674            return 'utf-8'
675        return getencoding()
676else:
677    # On Unix, if CODESET is available, use that.
678    def getpreferredencoding(do_setlocale=True):
679        """Return the charset that the user is likely using,
680        according to the system configuration."""
681
682        if sys.flags.warn_default_encoding:
683            import warnings
684            warnings.warn(
685                "UTF-8 Mode affects locale.getpreferredencoding(). Consider locale.getencoding() instead.",
686                EncodingWarning, 2)
687        if sys.flags.utf8_mode:
688            return 'utf-8'
689
690        if not do_setlocale:
691            return getencoding()
692
693        old_loc = setlocale(LC_CTYPE)
694        try:
695            try:
696                setlocale(LC_CTYPE, "")
697            except Error:
698                pass
699            return getencoding()
700        finally:
701            setlocale(LC_CTYPE, old_loc)
702
703
704### Database
705#
706# The following data was extracted from the locale.alias file which
707# comes with X11 and then hand edited removing the explicit encoding
708# definitions and adding some more aliases. The file is usually
709# available as /usr/lib/X11/locale/locale.alias.
710#
711
712#
713# The local_encoding_alias table maps lowercase encoding alias names
714# to C locale encoding names (case-sensitive). Note that normalize()
715# first looks up the encoding in the encodings.aliases dictionary and
716# then applies this mapping to find the correct C lib name for the
717# encoding.
718#
719locale_encoding_alias = {
720
721    # Mappings for non-standard encoding names used in locale names
722    '437':                          'C',
723    'c':                            'C',
724    'en':                           'ISO8859-1',
725    'jis':                          'JIS7',
726    'jis7':                         'JIS7',
727    'ajec':                         'eucJP',
728    'koi8c':                        'KOI8-C',
729    'microsoftcp1251':              'CP1251',
730    'microsoftcp1255':              'CP1255',
731    'microsoftcp1256':              'CP1256',
732    '88591':                        'ISO8859-1',
733    '88592':                        'ISO8859-2',
734    '88595':                        'ISO8859-5',
735    '885915':                       'ISO8859-15',
736
737    # Mappings from Python codec names to C lib encoding names
738    'ascii':                        'ISO8859-1',
739    'latin_1':                      'ISO8859-1',
740    'iso8859_1':                    'ISO8859-1',
741    'iso8859_10':                   'ISO8859-10',
742    'iso8859_11':                   'ISO8859-11',
743    'iso8859_13':                   'ISO8859-13',
744    'iso8859_14':                   'ISO8859-14',
745    'iso8859_15':                   'ISO8859-15',
746    'iso8859_16':                   'ISO8859-16',
747    'iso8859_2':                    'ISO8859-2',
748    'iso8859_3':                    'ISO8859-3',
749    'iso8859_4':                    'ISO8859-4',
750    'iso8859_5':                    'ISO8859-5',
751    'iso8859_6':                    'ISO8859-6',
752    'iso8859_7':                    'ISO8859-7',
753    'iso8859_8':                    'ISO8859-8',
754    'iso8859_9':                    'ISO8859-9',
755    'iso2022_jp':                   'JIS7',
756    'shift_jis':                    'SJIS',
757    'tactis':                       'TACTIS',
758    'euc_jp':                       'eucJP',
759    'euc_kr':                       'eucKR',
760    'utf_8':                        'UTF-8',
761    'koi8_r':                       'KOI8-R',
762    'koi8_t':                       'KOI8-T',
763    'koi8_u':                       'KOI8-U',
764    'kz1048':                       'RK1048',
765    'cp1251':                       'CP1251',
766    'cp1255':                       'CP1255',
767    'cp1256':                       'CP1256',
768
769    # XXX This list is still incomplete. If you know more
770    # mappings, please file a bug report. Thanks.
771}
772
773for k, v in sorted(locale_encoding_alias.items()):
774    k = k.replace('_', '')
775    locale_encoding_alias.setdefault(k, v)
776del k, v
777
778#
779# The locale_alias table maps lowercase alias names to C locale names
780# (case-sensitive). Encodings are always separated from the locale
781# name using a dot ('.'); they should only be given in case the
782# language name is needed to interpret the given encoding alias
783# correctly (CJK codes often have this need).
784#
785# Note that the normalize() function which uses this tables
786# removes '_' and '-' characters from the encoding part of the
787# locale name before doing the lookup. This saves a lot of
788# space in the table.
789#
790# MAL 2004-12-10:
791# Updated alias mapping to most recent locale.alias file
792# from X.org distribution using makelocalealias.py.
793#
794# These are the differences compared to the old mapping (Python 2.4
795# and older):
796#
797#    updated 'bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
798#    updated 'bg_bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
799#    updated 'bulgarian' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
800#    updated 'cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
801#    updated 'cz_cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
802#    updated 'czech' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
803#    updated 'dutch' -> 'nl_BE.ISO8859-1' to 'nl_NL.ISO8859-1'
804#    updated 'et' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
805#    updated 'et_ee' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
806#    updated 'fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
807#    updated 'fi_fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
808#    updated 'iw' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
809#    updated 'iw_il' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
810#    updated 'japanese' -> 'ja_JP.SJIS' to 'ja_JP.eucJP'
811#    updated 'lt' -> 'lt_LT.ISO8859-4' to 'lt_LT.ISO8859-13'
812#    updated 'lv' -> 'lv_LV.ISO8859-4' to 'lv_LV.ISO8859-13'
813#    updated 'sl' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
814#    updated 'slovene' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
815#    updated 'th_th' -> 'th_TH.TACTIS' to 'th_TH.ISO8859-11'
816#    updated 'zh_cn' -> 'zh_CN.eucCN' to 'zh_CN.gb2312'
817#    updated 'zh_cn.big5' -> 'zh_TW.eucTW' to 'zh_TW.big5'
818#    updated 'zh_tw' -> 'zh_TW.eucTW' to 'zh_TW.big5'
819#
820# MAL 2008-05-30:
821# Updated alias mapping to most recent locale.alias file
822# from X.org distribution using makelocalealias.py.
823#
824# These are the differences compared to the old mapping (Python 2.5
825# and older):
826#
827#    updated 'cs_cs.iso88592' -> 'cs_CZ.ISO8859-2' to 'cs_CS.ISO8859-2'
828#    updated 'serbocroatian' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
829#    updated 'sh' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
830#    updated 'sh_hr.iso88592' -> 'sh_HR.ISO8859-2' to 'hr_HR.ISO8859-2'
831#    updated 'sh_sp' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
832#    updated 'sh_yu' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
833#    updated 'sp' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
834#    updated 'sp_yu' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
835#    updated 'sr' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
836#    updated 'sr@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
837#    updated 'sr_sp' -> 'sr_SP.ISO8859-2' to 'sr_CS.ISO8859-2'
838#    updated 'sr_yu' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
839#    updated 'sr_yu.cp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
840#    updated 'sr_yu.iso88592' -> 'sr_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
841#    updated 'sr_yu.iso88595' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
842#    updated 'sr_yu.iso88595@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
843#    updated 'sr_yu.microsoftcp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
844#    updated 'sr_yu.utf8@cyrillic' -> 'sr_YU.UTF-8' to 'sr_CS.UTF-8'
845#    updated 'sr_yu@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
846#
847# AP 2010-04-12:
848# Updated alias mapping to most recent locale.alias file
849# from X.org distribution using makelocalealias.py.
850#
851# These are the differences compared to the old mapping (Python 2.6.5
852# and older):
853#
854#    updated 'ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
855#    updated 'ru_ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
856#    updated 'serbocroatian' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
857#    updated 'sh' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
858#    updated 'sh_yu' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
859#    updated 'sr' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
860#    updated 'sr@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
861#    updated 'sr@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
862#    updated 'sr_cs.utf8@latn' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8@latin'
863#    updated 'sr_cs@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
864#    updated 'sr_yu' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8@latin'
865#    updated 'sr_yu.utf8@cyrillic' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8'
866#    updated 'sr_yu@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
867#
868# SS 2013-12-20:
869# Updated alias mapping to most recent locale.alias file
870# from X.org distribution using makelocalealias.py.
871#
872# These are the differences compared to the old mapping (Python 3.3.3
873# and older):
874#
875#    updated 'a3' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
876#    updated 'a3_az' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
877#    updated 'a3_az.koi8c' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
878#    updated 'cs_cs.iso88592' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
879#    updated 'hebrew' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
880#    updated 'hebrew.iso88598' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
881#    updated 'sd' -> '[email protected]' to 'sd_IN.UTF-8'
882#    updated 'sr@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
883#    updated 'sr_cs' -> 'sr_RS.UTF-8' to 'sr_CS.UTF-8'
884#    updated 'sr_cs.utf8@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
885#    updated 'sr_cs@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
886#
887# SS 2014-10-01:
888# Updated alias mapping with glibc 2.19 supported locales.
889#
890# SS 2018-05-05:
891# Updated alias mapping with glibc 2.27 supported locales.
892#
893# These are the differences compared to the old mapping (Python 3.6.5
894# and older):
895#
896#    updated 'ca_es@valencia' -> 'ca_ES.ISO8859-15@valencia' to 'ca_ES.UTF-8@valencia'
897#    updated 'kk_kz' -> 'kk_KZ.RK1048' to 'kk_KZ.ptcp154'
898#    updated 'russian' -> 'ru_RU.ISO8859-5' to 'ru_RU.KOI8-R'
899
900locale_alias = {
901    'a3':                                   'az_AZ.KOI8-C',
902    'a3_az':                                'az_AZ.KOI8-C',
903    'a3_az.koic':                           'az_AZ.KOI8-C',
904    'aa_dj':                                'aa_DJ.ISO8859-1',
905    'aa_er':                                'aa_ER.UTF-8',
906    'aa_et':                                'aa_ET.UTF-8',
907    'af':                                   'af_ZA.ISO8859-1',
908    'af_za':                                'af_ZA.ISO8859-1',
909    'agr_pe':                               'agr_PE.UTF-8',
910    'ak_gh':                                'ak_GH.UTF-8',
911    'am':                                   'am_ET.UTF-8',
912    'am_et':                                'am_ET.UTF-8',
913    'american':                             'en_US.ISO8859-1',
914    'an_es':                                'an_ES.ISO8859-15',
915    'anp_in':                               'anp_IN.UTF-8',
916    'ar':                                   'ar_AA.ISO8859-6',
917    'ar_aa':                                'ar_AA.ISO8859-6',
918    'ar_ae':                                'ar_AE.ISO8859-6',
919    'ar_bh':                                'ar_BH.ISO8859-6',
920    'ar_dz':                                'ar_DZ.ISO8859-6',
921    'ar_eg':                                'ar_EG.ISO8859-6',
922    'ar_in':                                'ar_IN.UTF-8',
923    'ar_iq':                                'ar_IQ.ISO8859-6',
924    'ar_jo':                                'ar_JO.ISO8859-6',
925    'ar_kw':                                'ar_KW.ISO8859-6',
926    'ar_lb':                                'ar_LB.ISO8859-6',
927    'ar_ly':                                'ar_LY.ISO8859-6',
928    'ar_ma':                                'ar_MA.ISO8859-6',
929    'ar_om':                                'ar_OM.ISO8859-6',
930    'ar_qa':                                'ar_QA.ISO8859-6',
931    'ar_sa':                                'ar_SA.ISO8859-6',
932    'ar_sd':                                'ar_SD.ISO8859-6',
933    'ar_ss':                                'ar_SS.UTF-8',
934    'ar_sy':                                'ar_SY.ISO8859-6',
935    'ar_tn':                                'ar_TN.ISO8859-6',
936    'ar_ye':                                'ar_YE.ISO8859-6',
937    'arabic':                               'ar_AA.ISO8859-6',
938    'as':                                   'as_IN.UTF-8',
939    'as_in':                                'as_IN.UTF-8',
940    'ast_es':                               'ast_ES.ISO8859-15',
941    'ayc_pe':                               'ayc_PE.UTF-8',
942    'az':                                   'az_AZ.ISO8859-9E',
943    'az_az':                                'az_AZ.ISO8859-9E',
944    'az_az.iso88599e':                      'az_AZ.ISO8859-9E',
945    'az_ir':                                'az_IR.UTF-8',
946    'be':                                   'be_BY.CP1251',
947    'be@latin':                             'be_BY.UTF-8@latin',
948    'be_bg.utf8':                           'bg_BG.UTF-8',
949    'be_by':                                'be_BY.CP1251',
950    'be_by@latin':                          'be_BY.UTF-8@latin',
951    'bem_zm':                               'bem_ZM.UTF-8',
952    'ber_dz':                               'ber_DZ.UTF-8',
953    'ber_ma':                               'ber_MA.UTF-8',
954    'bg':                                   'bg_BG.CP1251',
955    'bg_bg':                                'bg_BG.CP1251',
956    'bhb_in.utf8':                          'bhb_IN.UTF-8',
957    'bho_in':                               'bho_IN.UTF-8',
958    'bho_np':                               'bho_NP.UTF-8',
959    'bi_vu':                                'bi_VU.UTF-8',
960    'bn_bd':                                'bn_BD.UTF-8',
961    'bn_in':                                'bn_IN.UTF-8',
962    'bo_cn':                                'bo_CN.UTF-8',
963    'bo_in':                                'bo_IN.UTF-8',
964    'bokmal':                               'nb_NO.ISO8859-1',
965    'bokm\xe5l':                            'nb_NO.ISO8859-1',
966    'br':                                   'br_FR.ISO8859-1',
967    'br_fr':                                'br_FR.ISO8859-1',
968    'brx_in':                               'brx_IN.UTF-8',
969    'bs':                                   'bs_BA.ISO8859-2',
970    'bs_ba':                                'bs_BA.ISO8859-2',
971    'bulgarian':                            'bg_BG.CP1251',
972    'byn_er':                               'byn_ER.UTF-8',
973    'c':                                    'C',
974    'c-french':                             'fr_CA.ISO8859-1',
975    'c.ascii':                              'C',
976    'c.en':                                 'C',
977    'c.iso88591':                           'en_US.ISO8859-1',
978    'c.utf8':                               'en_US.UTF-8',
979    'c_c':                                  'C',
980    'c_c.c':                                'C',
981    'ca':                                   'ca_ES.ISO8859-1',
982    'ca_ad':                                'ca_AD.ISO8859-1',
983    'ca_es':                                'ca_ES.ISO8859-1',
984    'ca_es@valencia':                       'ca_ES.UTF-8@valencia',
985    'ca_fr':                                'ca_FR.ISO8859-1',
986    'ca_it':                                'ca_IT.ISO8859-1',
987    'catalan':                              'ca_ES.ISO8859-1',
988    'ce_ru':                                'ce_RU.UTF-8',
989    'cextend':                              'en_US.ISO8859-1',
990    'chinese-s':                            'zh_CN.eucCN',
991    'chinese-t':                            'zh_TW.eucTW',
992    'chr_us':                               'chr_US.UTF-8',
993    'ckb_iq':                               'ckb_IQ.UTF-8',
994    'cmn_tw':                               'cmn_TW.UTF-8',
995    'crh_ua':                               'crh_UA.UTF-8',
996    'croatian':                             'hr_HR.ISO8859-2',
997    'cs':                                   'cs_CZ.ISO8859-2',
998    'cs_cs':                                'cs_CZ.ISO8859-2',
999    'cs_cz':                                'cs_CZ.ISO8859-2',
1000    'csb_pl':                               'csb_PL.UTF-8',
1001    'cv_ru':                                'cv_RU.UTF-8',
1002    'cy':                                   'cy_GB.ISO8859-1',
1003    'cy_gb':                                'cy_GB.ISO8859-1',
1004    'cz':                                   'cs_CZ.ISO8859-2',
1005    'cz_cz':                                'cs_CZ.ISO8859-2',
1006    'czech':                                'cs_CZ.ISO8859-2',
1007    'da':                                   'da_DK.ISO8859-1',
1008    'da_dk':                                'da_DK.ISO8859-1',
1009    'danish':                               'da_DK.ISO8859-1',
1010    'dansk':                                'da_DK.ISO8859-1',
1011    'de':                                   'de_DE.ISO8859-1',
1012    'de_at':                                'de_AT.ISO8859-1',
1013    'de_be':                                'de_BE.ISO8859-1',
1014    'de_ch':                                'de_CH.ISO8859-1',
1015    'de_de':                                'de_DE.ISO8859-1',
1016    'de_it':                                'de_IT.ISO8859-1',
1017    'de_li.utf8':                           'de_LI.UTF-8',
1018    'de_lu':                                'de_LU.ISO8859-1',
1019    'deutsch':                              'de_DE.ISO8859-1',
1020    'doi_in':                               'doi_IN.UTF-8',
1021    'dutch':                                'nl_NL.ISO8859-1',
1022    'dutch.iso88591':                       'nl_BE.ISO8859-1',
1023    'dv_mv':                                'dv_MV.UTF-8',
1024    'dz_bt':                                'dz_BT.UTF-8',
1025    'ee':                                   'ee_EE.ISO8859-4',
1026    'ee_ee':                                'ee_EE.ISO8859-4',
1027    'eesti':                                'et_EE.ISO8859-1',
1028    'el':                                   'el_GR.ISO8859-7',
1029    'el_cy':                                'el_CY.ISO8859-7',
1030    'el_gr':                                'el_GR.ISO8859-7',
1031    'el_gr@euro':                           'el_GR.ISO8859-15',
1032    'en':                                   'en_US.ISO8859-1',
1033    'en_ag':                                'en_AG.UTF-8',
1034    'en_au':                                'en_AU.ISO8859-1',
1035    'en_be':                                'en_BE.ISO8859-1',
1036    'en_bw':                                'en_BW.ISO8859-1',
1037    'en_ca':                                'en_CA.ISO8859-1',
1038    'en_dk':                                'en_DK.ISO8859-1',
1039    'en_dl.utf8':                           'en_DL.UTF-8',
1040    'en_gb':                                'en_GB.ISO8859-1',
1041    'en_hk':                                'en_HK.ISO8859-1',
1042    'en_ie':                                'en_IE.ISO8859-1',
1043    'en_il':                                'en_IL.UTF-8',
1044    'en_in':                                'en_IN.ISO8859-1',
1045    'en_ng':                                'en_NG.UTF-8',
1046    'en_nz':                                'en_NZ.ISO8859-1',
1047    'en_ph':                                'en_PH.ISO8859-1',
1048    'en_sc.utf8':                           'en_SC.UTF-8',
1049    'en_sg':                                'en_SG.ISO8859-1',
1050    'en_uk':                                'en_GB.ISO8859-1',
1051    'en_us':                                'en_US.ISO8859-1',
1052    'en_us@euro@euro':                      'en_US.ISO8859-15',
1053    'en_za':                                'en_ZA.ISO8859-1',
1054    'en_zm':                                'en_ZM.UTF-8',
1055    'en_zw':                                'en_ZW.ISO8859-1',
1056    'en_zw.utf8':                           'en_ZS.UTF-8',
1057    'eng_gb':                               'en_GB.ISO8859-1',
1058    'english':                              'en_EN.ISO8859-1',
1059    'english.iso88591':                     'en_US.ISO8859-1',
1060    'english_uk':                           'en_GB.ISO8859-1',
1061    'english_united-states':                'en_US.ISO8859-1',
1062    'english_united-states.437':            'C',
1063    'english_us':                           'en_US.ISO8859-1',
1064    'eo':                                   'eo_XX.ISO8859-3',
1065    'eo.utf8':                              'eo.UTF-8',
1066    'eo_eo':                                'eo_EO.ISO8859-3',
1067    'eo_us.utf8':                           'eo_US.UTF-8',
1068    'eo_xx':                                'eo_XX.ISO8859-3',
1069    'es':                                   'es_ES.ISO8859-1',
1070    'es_ar':                                'es_AR.ISO8859-1',
1071    'es_bo':                                'es_BO.ISO8859-1',
1072    'es_cl':                                'es_CL.ISO8859-1',
1073    'es_co':                                'es_CO.ISO8859-1',
1074    'es_cr':                                'es_CR.ISO8859-1',
1075    'es_cu':                                'es_CU.UTF-8',
1076    'es_do':                                'es_DO.ISO8859-1',
1077    'es_ec':                                'es_EC.ISO8859-1',
1078    'es_es':                                'es_ES.ISO8859-1',
1079    'es_gt':                                'es_GT.ISO8859-1',
1080    'es_hn':                                'es_HN.ISO8859-1',
1081    'es_mx':                                'es_MX.ISO8859-1',
1082    'es_ni':                                'es_NI.ISO8859-1',
1083    'es_pa':                                'es_PA.ISO8859-1',
1084    'es_pe':                                'es_PE.ISO8859-1',
1085    'es_pr':                                'es_PR.ISO8859-1',
1086    'es_py':                                'es_PY.ISO8859-1',
1087    'es_sv':                                'es_SV.ISO8859-1',
1088    'es_us':                                'es_US.ISO8859-1',
1089    'es_uy':                                'es_UY.ISO8859-1',
1090    'es_ve':                                'es_VE.ISO8859-1',
1091    'estonian':                             'et_EE.ISO8859-1',
1092    'et':                                   'et_EE.ISO8859-15',
1093    'et_ee':                                'et_EE.ISO8859-15',
1094    'eu':                                   'eu_ES.ISO8859-1',
1095    'eu_es':                                'eu_ES.ISO8859-1',
1096    'eu_fr':                                'eu_FR.ISO8859-1',
1097    'fa':                                   'fa_IR.UTF-8',
1098    'fa_ir':                                'fa_IR.UTF-8',
1099    'fa_ir.isiri3342':                      'fa_IR.ISIRI-3342',
1100    'ff_sn':                                'ff_SN.UTF-8',
1101    'fi':                                   'fi_FI.ISO8859-15',
1102    'fi_fi':                                'fi_FI.ISO8859-15',
1103    'fil_ph':                               'fil_PH.UTF-8',
1104    'finnish':                              'fi_FI.ISO8859-1',
1105    'fo':                                   'fo_FO.ISO8859-1',
1106    'fo_fo':                                'fo_FO.ISO8859-1',
1107    'fr':                                   'fr_FR.ISO8859-1',
1108    'fr_be':                                'fr_BE.ISO8859-1',
1109    'fr_ca':                                'fr_CA.ISO8859-1',
1110    'fr_ch':                                'fr_CH.ISO8859-1',
1111    'fr_fr':                                'fr_FR.ISO8859-1',
1112    'fr_lu':                                'fr_LU.ISO8859-1',
1113    'fran\xe7ais':                          'fr_FR.ISO8859-1',
1114    'fre_fr':                               'fr_FR.ISO8859-1',
1115    'french':                               'fr_FR.ISO8859-1',
1116    'french.iso88591':                      'fr_CH.ISO8859-1',
1117    'french_france':                        'fr_FR.ISO8859-1',
1118    'fur_it':                               'fur_IT.UTF-8',
1119    'fy_de':                                'fy_DE.UTF-8',
1120    'fy_nl':                                'fy_NL.UTF-8',
1121    'ga':                                   'ga_IE.ISO8859-1',
1122    'ga_ie':                                'ga_IE.ISO8859-1',
1123    'galego':                               'gl_ES.ISO8859-1',
1124    'galician':                             'gl_ES.ISO8859-1',
1125    'gd':                                   'gd_GB.ISO8859-1',
1126    'gd_gb':                                'gd_GB.ISO8859-1',
1127    'ger_de':                               'de_DE.ISO8859-1',
1128    'german':                               'de_DE.ISO8859-1',
1129    'german.iso88591':                      'de_CH.ISO8859-1',
1130    'german_germany':                       'de_DE.ISO8859-1',
1131    'gez_er':                               'gez_ER.UTF-8',
1132    'gez_et':                               'gez_ET.UTF-8',
1133    'gl':                                   'gl_ES.ISO8859-1',
1134    'gl_es':                                'gl_ES.ISO8859-1',
1135    'greek':                                'el_GR.ISO8859-7',
1136    'gu_in':                                'gu_IN.UTF-8',
1137    'gv':                                   'gv_GB.ISO8859-1',
1138    'gv_gb':                                'gv_GB.ISO8859-1',
1139    'ha_ng':                                'ha_NG.UTF-8',
1140    'hak_tw':                               'hak_TW.UTF-8',
1141    'he':                                   'he_IL.ISO8859-8',
1142    'he_il':                                'he_IL.ISO8859-8',
1143    'hebrew':                               'he_IL.ISO8859-8',
1144    'hi':                                   'hi_IN.ISCII-DEV',
1145    'hi_in':                                'hi_IN.ISCII-DEV',
1146    'hi_in.isciidev':                       'hi_IN.ISCII-DEV',
1147    'hif_fj':                               'hif_FJ.UTF-8',
1148    'hne':                                  'hne_IN.UTF-8',
1149    'hne_in':                               'hne_IN.UTF-8',
1150    'hr':                                   'hr_HR.ISO8859-2',
1151    'hr_hr':                                'hr_HR.ISO8859-2',
1152    'hrvatski':                             'hr_HR.ISO8859-2',
1153    'hsb_de':                               'hsb_DE.ISO8859-2',
1154    'ht_ht':                                'ht_HT.UTF-8',
1155    'hu':                                   'hu_HU.ISO8859-2',
1156    'hu_hu':                                'hu_HU.ISO8859-2',
1157    'hungarian':                            'hu_HU.ISO8859-2',
1158    'hy_am':                                'hy_AM.UTF-8',
1159    'hy_am.armscii8':                       'hy_AM.ARMSCII_8',
1160    'ia':                                   'ia.UTF-8',
1161    'ia_fr':                                'ia_FR.UTF-8',
1162    'icelandic':                            'is_IS.ISO8859-1',
1163    'id':                                   'id_ID.ISO8859-1',
1164    'id_id':                                'id_ID.ISO8859-1',
1165    'ig_ng':                                'ig_NG.UTF-8',
1166    'ik_ca':                                'ik_CA.UTF-8',
1167    'in':                                   'id_ID.ISO8859-1',
1168    'in_id':                                'id_ID.ISO8859-1',
1169    'is':                                   'is_IS.ISO8859-1',
1170    'is_is':                                'is_IS.ISO8859-1',
1171    'iso-8859-1':                           'en_US.ISO8859-1',
1172    'iso-8859-15':                          'en_US.ISO8859-15',
1173    'iso8859-1':                            'en_US.ISO8859-1',
1174    'iso8859-15':                           'en_US.ISO8859-15',
1175    'iso_8859_1':                           'en_US.ISO8859-1',
1176    'iso_8859_15':                          'en_US.ISO8859-15',
1177    'it':                                   'it_IT.ISO8859-1',
1178    'it_ch':                                'it_CH.ISO8859-1',
1179    'it_it':                                'it_IT.ISO8859-1',
1180    'italian':                              'it_IT.ISO8859-1',
1181    'iu':                                   'iu_CA.NUNACOM-8',
1182    'iu_ca':                                'iu_CA.NUNACOM-8',
1183    'iu_ca.nunacom8':                       'iu_CA.NUNACOM-8',
1184    'iw':                                   'he_IL.ISO8859-8',
1185    'iw_il':                                'he_IL.ISO8859-8',
1186    'iw_il.utf8':                           'iw_IL.UTF-8',
1187    'ja':                                   'ja_JP.eucJP',
1188    'ja_jp':                                'ja_JP.eucJP',
1189    'ja_jp.euc':                            'ja_JP.eucJP',
1190    'ja_jp.mscode':                         'ja_JP.SJIS',
1191    'ja_jp.pck':                            'ja_JP.SJIS',
1192    'japan':                                'ja_JP.eucJP',
1193    'japanese':                             'ja_JP.eucJP',
1194    'japanese-euc':                         'ja_JP.eucJP',
1195    'japanese.euc':                         'ja_JP.eucJP',
1196    'jp_jp':                                'ja_JP.eucJP',
1197    'ka':                                   'ka_GE.GEORGIAN-ACADEMY',
1198    'ka_ge':                                'ka_GE.GEORGIAN-ACADEMY',
1199    'ka_ge.georgianacademy':                'ka_GE.GEORGIAN-ACADEMY',
1200    'ka_ge.georgianps':                     'ka_GE.GEORGIAN-PS',
1201    'ka_ge.georgianrs':                     'ka_GE.GEORGIAN-ACADEMY',
1202    'kab_dz':                               'kab_DZ.UTF-8',
1203    'kk_kz':                                'kk_KZ.ptcp154',
1204    'kl':                                   'kl_GL.ISO8859-1',
1205    'kl_gl':                                'kl_GL.ISO8859-1',
1206    'km_kh':                                'km_KH.UTF-8',
1207    'kn':                                   'kn_IN.UTF-8',
1208    'kn_in':                                'kn_IN.UTF-8',
1209    'ko':                                   'ko_KR.eucKR',
1210    'ko_kr':                                'ko_KR.eucKR',
1211    'ko_kr.euc':                            'ko_KR.eucKR',
1212    'kok_in':                               'kok_IN.UTF-8',
1213    'korean':                               'ko_KR.eucKR',
1214    'korean.euc':                           'ko_KR.eucKR',
1215    'ks':                                   'ks_IN.UTF-8',
1216    'ks_in':                                'ks_IN.UTF-8',
1217    '[email protected]':                'ks_IN.UTF-8@devanagari',
1218    'ku_tr':                                'ku_TR.ISO8859-9',
1219    'kw':                                   'kw_GB.ISO8859-1',
1220    'kw_gb':                                'kw_GB.ISO8859-1',
1221    'ky':                                   'ky_KG.UTF-8',
1222    'ky_kg':                                'ky_KG.UTF-8',
1223    'lb_lu':                                'lb_LU.UTF-8',
1224    'lg_ug':                                'lg_UG.ISO8859-10',
1225    'li_be':                                'li_BE.UTF-8',
1226    'li_nl':                                'li_NL.UTF-8',
1227    'lij_it':                               'lij_IT.UTF-8',
1228    'lithuanian':                           'lt_LT.ISO8859-13',
1229    'ln_cd':                                'ln_CD.UTF-8',
1230    'lo':                                   'lo_LA.MULELAO-1',
1231    'lo_la':                                'lo_LA.MULELAO-1',
1232    'lo_la.cp1133':                         'lo_LA.IBM-CP1133',
1233    'lo_la.ibmcp1133':                      'lo_LA.IBM-CP1133',
1234    'lo_la.mulelao1':                       'lo_LA.MULELAO-1',
1235    'lt':                                   'lt_LT.ISO8859-13',
1236    'lt_lt':                                'lt_LT.ISO8859-13',
1237    'lv':                                   'lv_LV.ISO8859-13',
1238    'lv_lv':                                'lv_LV.ISO8859-13',
1239    'lzh_tw':                               'lzh_TW.UTF-8',
1240    'mag_in':                               'mag_IN.UTF-8',
1241    'mai':                                  'mai_IN.UTF-8',
1242    'mai_in':                               'mai_IN.UTF-8',
1243    'mai_np':                               'mai_NP.UTF-8',
1244    'mfe_mu':                               'mfe_MU.UTF-8',
1245    'mg_mg':                                'mg_MG.ISO8859-15',
1246    'mhr_ru':                               'mhr_RU.UTF-8',
1247    'mi':                                   'mi_NZ.ISO8859-1',
1248    'mi_nz':                                'mi_NZ.ISO8859-1',
1249    'miq_ni':                               'miq_NI.UTF-8',
1250    'mjw_in':                               'mjw_IN.UTF-8',
1251    'mk':                                   'mk_MK.ISO8859-5',
1252    'mk_mk':                                'mk_MK.ISO8859-5',
1253    'ml':                                   'ml_IN.UTF-8',
1254    'ml_in':                                'ml_IN.UTF-8',
1255    'mn_mn':                                'mn_MN.UTF-8',
1256    'mni_in':                               'mni_IN.UTF-8',
1257    'mr':                                   'mr_IN.UTF-8',
1258    'mr_in':                                'mr_IN.UTF-8',
1259    'ms':                                   'ms_MY.ISO8859-1',
1260    'ms_my':                                'ms_MY.ISO8859-1',
1261    'mt':                                   'mt_MT.ISO8859-3',
1262    'mt_mt':                                'mt_MT.ISO8859-3',
1263    'my_mm':                                'my_MM.UTF-8',
1264    'nan_tw':                               'nan_TW.UTF-8',
1265    'nb':                                   'nb_NO.ISO8859-1',
1266    'nb_no':                                'nb_NO.ISO8859-1',
1267    'nds_de':                               'nds_DE.UTF-8',
1268    'nds_nl':                               'nds_NL.UTF-8',
1269    'ne_np':                                'ne_NP.UTF-8',
1270    'nhn_mx':                               'nhn_MX.UTF-8',
1271    'niu_nu':                               'niu_NU.UTF-8',
1272    'niu_nz':                               'niu_NZ.UTF-8',
1273    'nl':                                   'nl_NL.ISO8859-1',
1274    'nl_aw':                                'nl_AW.UTF-8',
1275    'nl_be':                                'nl_BE.ISO8859-1',
1276    'nl_nl':                                'nl_NL.ISO8859-1',
1277    'nn':                                   'nn_NO.ISO8859-1',
1278    'nn_no':                                'nn_NO.ISO8859-1',
1279    'no':                                   'no_NO.ISO8859-1',
1280    'no@nynorsk':                           'ny_NO.ISO8859-1',
1281    'no_no':                                'no_NO.ISO8859-1',
1282    'no_no.iso88591@bokmal':                'no_NO.ISO8859-1',
1283    'no_no.iso88591@nynorsk':               'no_NO.ISO8859-1',
1284    'norwegian':                            'no_NO.ISO8859-1',
1285    'nr':                                   'nr_ZA.ISO8859-1',
1286    'nr_za':                                'nr_ZA.ISO8859-1',
1287    'nso':                                  'nso_ZA.ISO8859-15',
1288    'nso_za':                               'nso_ZA.ISO8859-15',
1289    'ny':                                   'ny_NO.ISO8859-1',
1290    'ny_no':                                'ny_NO.ISO8859-1',
1291    'nynorsk':                              'nn_NO.ISO8859-1',
1292    'oc':                                   'oc_FR.ISO8859-1',
1293    'oc_fr':                                'oc_FR.ISO8859-1',
1294    'om_et':                                'om_ET.UTF-8',
1295    'om_ke':                                'om_KE.ISO8859-1',
1296    'or':                                   'or_IN.UTF-8',
1297    'or_in':                                'or_IN.UTF-8',
1298    'os_ru':                                'os_RU.UTF-8',
1299    'pa':                                   'pa_IN.UTF-8',
1300    'pa_in':                                'pa_IN.UTF-8',
1301    'pa_pk':                                'pa_PK.UTF-8',
1302    'pap_an':                               'pap_AN.UTF-8',
1303    'pap_aw':                               'pap_AW.UTF-8',
1304    'pap_cw':                               'pap_CW.UTF-8',
1305    'pd':                                   'pd_US.ISO8859-1',
1306    'pd_de':                                'pd_DE.ISO8859-1',
1307    'pd_us':                                'pd_US.ISO8859-1',
1308    'ph':                                   'ph_PH.ISO8859-1',
1309    'ph_ph':                                'ph_PH.ISO8859-1',
1310    'pl':                                   'pl_PL.ISO8859-2',
1311    'pl_pl':                                'pl_PL.ISO8859-2',
1312    'polish':                               'pl_PL.ISO8859-2',
1313    'portuguese':                           'pt_PT.ISO8859-1',
1314    'portuguese_brazil':                    'pt_BR.ISO8859-1',
1315    'posix':                                'C',
1316    'posix-utf2':                           'C',
1317    'pp':                                   'pp_AN.ISO8859-1',
1318    'pp_an':                                'pp_AN.ISO8859-1',
1319    'ps_af':                                'ps_AF.UTF-8',
1320    'pt':                                   'pt_PT.ISO8859-1',
1321    'pt_br':                                'pt_BR.ISO8859-1',
1322    'pt_pt':                                'pt_PT.ISO8859-1',
1323    'quz_pe':                               'quz_PE.UTF-8',
1324    'raj_in':                               'raj_IN.UTF-8',
1325    'ro':                                   'ro_RO.ISO8859-2',
1326    'ro_ro':                                'ro_RO.ISO8859-2',
1327    'romanian':                             'ro_RO.ISO8859-2',
1328    'ru':                                   'ru_RU.UTF-8',
1329    'ru_ru':                                'ru_RU.UTF-8',
1330    'ru_ua':                                'ru_UA.KOI8-U',
1331    'rumanian':                             'ro_RO.ISO8859-2',
1332    'russian':                              'ru_RU.KOI8-R',
1333    'rw':                                   'rw_RW.ISO8859-1',
1334    'rw_rw':                                'rw_RW.ISO8859-1',
1335    'sa_in':                                'sa_IN.UTF-8',
1336    'sat_in':                               'sat_IN.UTF-8',
1337    'sc_it':                                'sc_IT.UTF-8',
1338    'sd':                                   'sd_IN.UTF-8',
1339    'sd_in':                                'sd_IN.UTF-8',
1340    '[email protected]':                'sd_IN.UTF-8@devanagari',
1341    'sd_pk':                                'sd_PK.UTF-8',
1342    'se_no':                                'se_NO.UTF-8',
1343    'serbocroatian':                        'sr_RS.UTF-8@latin',
1344    'sgs_lt':                               'sgs_LT.UTF-8',
1345    'sh':                                   'sr_RS.UTF-8@latin',
1346    'sh_ba.iso88592@bosnia':                'sr_CS.ISO8859-2',
1347    'sh_hr':                                'sh_HR.ISO8859-2',
1348    'sh_hr.iso88592':                       'hr_HR.ISO8859-2',
1349    'sh_sp':                                'sr_CS.ISO8859-2',
1350    'sh_yu':                                'sr_RS.UTF-8@latin',
1351    'shn_mm':                               'shn_MM.UTF-8',
1352    'shs_ca':                               'shs_CA.UTF-8',
1353    'si':                                   'si_LK.UTF-8',
1354    'si_lk':                                'si_LK.UTF-8',
1355    'sid_et':                               'sid_ET.UTF-8',
1356    'sinhala':                              'si_LK.UTF-8',
1357    'sk':                                   'sk_SK.ISO8859-2',
1358    'sk_sk':                                'sk_SK.ISO8859-2',
1359    'sl':                                   'sl_SI.ISO8859-2',
1360    'sl_cs':                                'sl_CS.ISO8859-2',
1361    'sl_si':                                'sl_SI.ISO8859-2',
1362    'slovak':                               'sk_SK.ISO8859-2',
1363    'slovene':                              'sl_SI.ISO8859-2',
1364    'slovenian':                            'sl_SI.ISO8859-2',
1365    'sm_ws':                                'sm_WS.UTF-8',
1366    'so_dj':                                'so_DJ.ISO8859-1',
1367    'so_et':                                'so_ET.UTF-8',
1368    'so_ke':                                'so_KE.ISO8859-1',
1369    'so_so':                                'so_SO.ISO8859-1',
1370    'sp':                                   'sr_CS.ISO8859-5',
1371    'sp_yu':                                'sr_CS.ISO8859-5',
1372    'spanish':                              'es_ES.ISO8859-1',
1373    'spanish_spain':                        'es_ES.ISO8859-1',
1374    'sq':                                   'sq_AL.ISO8859-2',
1375    'sq_al':                                'sq_AL.ISO8859-2',
1376    'sq_mk':                                'sq_MK.UTF-8',
1377    'sr':                                   'sr_RS.UTF-8',
1378    'sr@cyrillic':                          'sr_RS.UTF-8',
1379    'sr@latn':                              'sr_CS.UTF-8@latin',
1380    'sr_cs':                                'sr_CS.UTF-8',
1381    'sr_cs.iso88592@latn':                  'sr_CS.ISO8859-2',
1382    'sr_cs@latn':                           'sr_CS.UTF-8@latin',
1383    'sr_me':                                'sr_ME.UTF-8',
1384    'sr_rs':                                'sr_RS.UTF-8',
1385    'sr_rs@latn':                           'sr_RS.UTF-8@latin',
1386    'sr_sp':                                'sr_CS.ISO8859-2',
1387    'sr_yu':                                'sr_RS.UTF-8@latin',
1388    'sr_yu.cp1251@cyrillic':                'sr_CS.CP1251',
1389    'sr_yu.iso88592':                       'sr_CS.ISO8859-2',
1390    'sr_yu.iso88595':                       'sr_CS.ISO8859-5',
1391    'sr_yu.iso88595@cyrillic':              'sr_CS.ISO8859-5',
1392    'sr_yu.microsoftcp1251@cyrillic':       'sr_CS.CP1251',
1393    'sr_yu.utf8':                           'sr_RS.UTF-8',
1394    'sr_yu.utf8@cyrillic':                  'sr_RS.UTF-8',
1395    'sr_yu@cyrillic':                       'sr_RS.UTF-8',
1396    'ss':                                   'ss_ZA.ISO8859-1',
1397    'ss_za':                                'ss_ZA.ISO8859-1',
1398    'st':                                   'st_ZA.ISO8859-1',
1399    'st_za':                                'st_ZA.ISO8859-1',
1400    'sv':                                   'sv_SE.ISO8859-1',
1401    'sv_fi':                                'sv_FI.ISO8859-1',
1402    'sv_se':                                'sv_SE.ISO8859-1',
1403    'sw_ke':                                'sw_KE.UTF-8',
1404    'sw_tz':                                'sw_TZ.UTF-8',
1405    'swedish':                              'sv_SE.ISO8859-1',
1406    'szl_pl':                               'szl_PL.UTF-8',
1407    'ta':                                   'ta_IN.TSCII-0',
1408    'ta_in':                                'ta_IN.TSCII-0',
1409    'ta_in.tscii':                          'ta_IN.TSCII-0',
1410    'ta_in.tscii0':                         'ta_IN.TSCII-0',
1411    'ta_lk':                                'ta_LK.UTF-8',
1412    'tcy_in.utf8':                          'tcy_IN.UTF-8',
1413    'te':                                   'te_IN.UTF-8',
1414    'te_in':                                'te_IN.UTF-8',
1415    'tg':                                   'tg_TJ.KOI8-C',
1416    'tg_tj':                                'tg_TJ.KOI8-C',
1417    'th':                                   'th_TH.ISO8859-11',
1418    'th_th':                                'th_TH.ISO8859-11',
1419    'th_th.tactis':                         'th_TH.TIS620',
1420    'th_th.tis620':                         'th_TH.TIS620',
1421    'thai':                                 'th_TH.ISO8859-11',
1422    'the_np':                               'the_NP.UTF-8',
1423    'ti_er':                                'ti_ER.UTF-8',
1424    'ti_et':                                'ti_ET.UTF-8',
1425    'tig_er':                               'tig_ER.UTF-8',
1426    'tk_tm':                                'tk_TM.UTF-8',
1427    'tl':                                   'tl_PH.ISO8859-1',
1428    'tl_ph':                                'tl_PH.ISO8859-1',
1429    'tn':                                   'tn_ZA.ISO8859-15',
1430    'tn_za':                                'tn_ZA.ISO8859-15',
1431    'to_to':                                'to_TO.UTF-8',
1432    'tpi_pg':                               'tpi_PG.UTF-8',
1433    'tr':                                   'tr_TR.ISO8859-9',
1434    'tr_cy':                                'tr_CY.ISO8859-9',
1435    'tr_tr':                                'tr_TR.ISO8859-9',
1436    'ts':                                   'ts_ZA.ISO8859-1',
1437    'ts_za':                                'ts_ZA.ISO8859-1',
1438    'tt':                                   'tt_RU.TATAR-CYR',
1439    'tt_ru':                                'tt_RU.TATAR-CYR',
1440    'tt_ru.tatarcyr':                       'tt_RU.TATAR-CYR',
1441    'tt_ru@iqtelif':                        'tt_RU.UTF-8@iqtelif',
1442    'turkish':                              'tr_TR.ISO8859-9',
1443    'ug_cn':                                'ug_CN.UTF-8',
1444    'uk':                                   'uk_UA.KOI8-U',
1445    'uk_ua':                                'uk_UA.KOI8-U',
1446    'univ':                                 'en_US.utf',
1447    'universal':                            'en_US.utf',
1448    'universal.utf8@ucs4':                  'en_US.UTF-8',
1449    'unm_us':                               'unm_US.UTF-8',
1450    'ur':                                   'ur_PK.CP1256',
1451    'ur_in':                                'ur_IN.UTF-8',
1452    'ur_pk':                                'ur_PK.CP1256',
1453    'uz':                                   'uz_UZ.UTF-8',
1454    'uz_uz':                                'uz_UZ.UTF-8',
1455    'uz_uz@cyrillic':                       'uz_UZ.UTF-8',
1456    've':                                   've_ZA.UTF-8',
1457    've_za':                                've_ZA.UTF-8',
1458    'vi':                                   'vi_VN.TCVN',
1459    'vi_vn':                                'vi_VN.TCVN',
1460    'vi_vn.tcvn':                           'vi_VN.TCVN',
1461    'vi_vn.tcvn5712':                       'vi_VN.TCVN',
1462    'vi_vn.viscii':                         'vi_VN.VISCII',
1463    'vi_vn.viscii111':                      'vi_VN.VISCII',
1464    'wa':                                   'wa_BE.ISO8859-1',
1465    'wa_be':                                'wa_BE.ISO8859-1',
1466    'wae_ch':                               'wae_CH.UTF-8',
1467    'wal_et':                               'wal_ET.UTF-8',
1468    'wo_sn':                                'wo_SN.UTF-8',
1469    'xh':                                   'xh_ZA.ISO8859-1',
1470    'xh_za':                                'xh_ZA.ISO8859-1',
1471    'yi':                                   'yi_US.CP1255',
1472    'yi_us':                                'yi_US.CP1255',
1473    'yo_ng':                                'yo_NG.UTF-8',
1474    'yue_hk':                               'yue_HK.UTF-8',
1475    'yuw_pg':                               'yuw_PG.UTF-8',
1476    'zh':                                   'zh_CN.eucCN',
1477    'zh_cn':                                'zh_CN.gb2312',
1478    'zh_cn.big5':                           'zh_TW.big5',
1479    'zh_cn.euc':                            'zh_CN.eucCN',
1480    'zh_hk':                                'zh_HK.big5hkscs',
1481    'zh_hk.big5hk':                         'zh_HK.big5hkscs',
1482    'zh_sg':                                'zh_SG.GB2312',
1483    'zh_sg.gbk':                            'zh_SG.GBK',
1484    'zh_tw':                                'zh_TW.big5',
1485    'zh_tw.euc':                            'zh_TW.eucTW',
1486    'zh_tw.euctw':                          'zh_TW.eucTW',
1487    'zu':                                   'zu_ZA.ISO8859-1',
1488    'zu_za':                                'zu_ZA.ISO8859-1',
1489}
1490
1491#
1492# This maps Windows language identifiers to locale strings.
1493#
1494# This list has been updated from
1495# http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp
1496# to include every locale up to Windows Vista.
1497#
1498# NOTE: this mapping is incomplete.  If your language is missing, please
1499# submit a bug report to the Python bug tracker at http://bugs.python.org/
1500# Make sure you include the missing language identifier and the suggested
1501# locale code.
1502#
1503
1504windows_locale = {
1505    0x0436: "af_ZA", # Afrikaans
1506    0x041c: "sq_AL", # Albanian
1507    0x0484: "gsw_FR",# Alsatian - France
1508    0x045e: "am_ET", # Amharic - Ethiopia
1509    0x0401: "ar_SA", # Arabic - Saudi Arabia
1510    0x0801: "ar_IQ", # Arabic - Iraq
1511    0x0c01: "ar_EG", # Arabic - Egypt
1512    0x1001: "ar_LY", # Arabic - Libya
1513    0x1401: "ar_DZ", # Arabic - Algeria
1514    0x1801: "ar_MA", # Arabic - Morocco
1515    0x1c01: "ar_TN", # Arabic - Tunisia
1516    0x2001: "ar_OM", # Arabic - Oman
1517    0x2401: "ar_YE", # Arabic - Yemen
1518    0x2801: "ar_SY", # Arabic - Syria
1519    0x2c01: "ar_JO", # Arabic - Jordan
1520    0x3001: "ar_LB", # Arabic - Lebanon
1521    0x3401: "ar_KW", # Arabic - Kuwait
1522    0x3801: "ar_AE", # Arabic - United Arab Emirates
1523    0x3c01: "ar_BH", # Arabic - Bahrain
1524    0x4001: "ar_QA", # Arabic - Qatar
1525    0x042b: "hy_AM", # Armenian
1526    0x044d: "as_IN", # Assamese - India
1527    0x042c: "az_AZ", # Azeri - Latin
1528    0x082c: "az_AZ", # Azeri - Cyrillic
1529    0x046d: "ba_RU", # Bashkir
1530    0x042d: "eu_ES", # Basque - Russia
1531    0x0423: "be_BY", # Belarusian
1532    0x0445: "bn_IN", # Begali
1533    0x201a: "bs_BA", # Bosnian - Cyrillic
1534    0x141a: "bs_BA", # Bosnian - Latin
1535    0x047e: "br_FR", # Breton - France
1536    0x0402: "bg_BG", # Bulgarian
1537#    0x0455: "my_MM", # Burmese - Not supported
1538    0x0403: "ca_ES", # Catalan
1539    0x0004: "zh_CHS",# Chinese - Simplified
1540    0x0404: "zh_TW", # Chinese - Taiwan
1541    0x0804: "zh_CN", # Chinese - PRC
1542    0x0c04: "zh_HK", # Chinese - Hong Kong S.A.R.
1543    0x1004: "zh_SG", # Chinese - Singapore
1544    0x1404: "zh_MO", # Chinese - Macao S.A.R.
1545    0x7c04: "zh_CHT",# Chinese - Traditional
1546    0x0483: "co_FR", # Corsican - France
1547    0x041a: "hr_HR", # Croatian
1548    0x101a: "hr_BA", # Croatian - Bosnia
1549    0x0405: "cs_CZ", # Czech
1550    0x0406: "da_DK", # Danish
1551    0x048c: "gbz_AF",# Dari - Afghanistan
1552    0x0465: "div_MV",# Divehi - Maldives
1553    0x0413: "nl_NL", # Dutch - The Netherlands
1554    0x0813: "nl_BE", # Dutch - Belgium
1555    0x0409: "en_US", # English - United States
1556    0x0809: "en_GB", # English - United Kingdom
1557    0x0c09: "en_AU", # English - Australia
1558    0x1009: "en_CA", # English - Canada
1559    0x1409: "en_NZ", # English - New Zealand
1560    0x1809: "en_IE", # English - Ireland
1561    0x1c09: "en_ZA", # English - South Africa
1562    0x2009: "en_JA", # English - Jamaica
1563    0x2409: "en_CB", # English - Caribbean
1564    0x2809: "en_BZ", # English - Belize
1565    0x2c09: "en_TT", # English - Trinidad
1566    0x3009: "en_ZW", # English - Zimbabwe
1567    0x3409: "en_PH", # English - Philippines
1568    0x4009: "en_IN", # English - India
1569    0x4409: "en_MY", # English - Malaysia
1570    0x4809: "en_IN", # English - Singapore
1571    0x0425: "et_EE", # Estonian
1572    0x0438: "fo_FO", # Faroese
1573    0x0464: "fil_PH",# Filipino
1574    0x040b: "fi_FI", # Finnish
1575    0x040c: "fr_FR", # French - France
1576    0x080c: "fr_BE", # French - Belgium
1577    0x0c0c: "fr_CA", # French - Canada
1578    0x100c: "fr_CH", # French - Switzerland
1579    0x140c: "fr_LU", # French - Luxembourg
1580    0x180c: "fr_MC", # French - Monaco
1581    0x0462: "fy_NL", # Frisian - Netherlands
1582    0x0456: "gl_ES", # Galician
1583    0x0437: "ka_GE", # Georgian
1584    0x0407: "de_DE", # German - Germany
1585    0x0807: "de_CH", # German - Switzerland
1586    0x0c07: "de_AT", # German - Austria
1587    0x1007: "de_LU", # German - Luxembourg
1588    0x1407: "de_LI", # German - Liechtenstein
1589    0x0408: "el_GR", # Greek
1590    0x046f: "kl_GL", # Greenlandic - Greenland
1591    0x0447: "gu_IN", # Gujarati
1592    0x0468: "ha_NG", # Hausa - Latin
1593    0x040d: "he_IL", # Hebrew
1594    0x0439: "hi_IN", # Hindi
1595    0x040e: "hu_HU", # Hungarian
1596    0x040f: "is_IS", # Icelandic
1597    0x0421: "id_ID", # Indonesian
1598    0x045d: "iu_CA", # Inuktitut - Syllabics
1599    0x085d: "iu_CA", # Inuktitut - Latin
1600    0x083c: "ga_IE", # Irish - Ireland
1601    0x0410: "it_IT", # Italian - Italy
1602    0x0810: "it_CH", # Italian - Switzerland
1603    0x0411: "ja_JP", # Japanese
1604    0x044b: "kn_IN", # Kannada - India
1605    0x043f: "kk_KZ", # Kazakh
1606    0x0453: "kh_KH", # Khmer - Cambodia
1607    0x0486: "qut_GT",# K'iche - Guatemala
1608    0x0487: "rw_RW", # Kinyarwanda - Rwanda
1609    0x0457: "kok_IN",# Konkani
1610    0x0412: "ko_KR", # Korean
1611    0x0440: "ky_KG", # Kyrgyz
1612    0x0454: "lo_LA", # Lao - Lao PDR
1613    0x0426: "lv_LV", # Latvian
1614    0x0427: "lt_LT", # Lithuanian
1615    0x082e: "dsb_DE",# Lower Sorbian - Germany
1616    0x046e: "lb_LU", # Luxembourgish
1617    0x042f: "mk_MK", # FYROM Macedonian
1618    0x043e: "ms_MY", # Malay - Malaysia
1619    0x083e: "ms_BN", # Malay - Brunei Darussalam
1620    0x044c: "ml_IN", # Malayalam - India
1621    0x043a: "mt_MT", # Maltese
1622    0x0481: "mi_NZ", # Maori
1623    0x047a: "arn_CL",# Mapudungun
1624    0x044e: "mr_IN", # Marathi
1625    0x047c: "moh_CA",# Mohawk - Canada
1626    0x0450: "mn_MN", # Mongolian - Cyrillic
1627    0x0850: "mn_CN", # Mongolian - PRC
1628    0x0461: "ne_NP", # Nepali
1629    0x0414: "nb_NO", # Norwegian - Bokmal
1630    0x0814: "nn_NO", # Norwegian - Nynorsk
1631    0x0482: "oc_FR", # Occitan - France
1632    0x0448: "or_IN", # Oriya - India
1633    0x0463: "ps_AF", # Pashto - Afghanistan
1634    0x0429: "fa_IR", # Persian
1635    0x0415: "pl_PL", # Polish
1636    0x0416: "pt_BR", # Portuguese - Brazil
1637    0x0816: "pt_PT", # Portuguese - Portugal
1638    0x0446: "pa_IN", # Punjabi
1639    0x046b: "quz_BO",# Quechua (Bolivia)
1640    0x086b: "quz_EC",# Quechua (Ecuador)
1641    0x0c6b: "quz_PE",# Quechua (Peru)
1642    0x0418: "ro_RO", # Romanian - Romania
1643    0x0417: "rm_CH", # Romansh
1644    0x0419: "ru_RU", # Russian
1645    0x243b: "smn_FI",# Sami Finland
1646    0x103b: "smj_NO",# Sami Norway
1647    0x143b: "smj_SE",# Sami Sweden
1648    0x043b: "se_NO", # Sami Northern Norway
1649    0x083b: "se_SE", # Sami Northern Sweden
1650    0x0c3b: "se_FI", # Sami Northern Finland
1651    0x203b: "sms_FI",# Sami Skolt
1652    0x183b: "sma_NO",# Sami Southern Norway
1653    0x1c3b: "sma_SE",# Sami Southern Sweden
1654    0x044f: "sa_IN", # Sanskrit
1655    0x0c1a: "sr_SP", # Serbian - Cyrillic
1656    0x1c1a: "sr_BA", # Serbian - Bosnia Cyrillic
1657    0x081a: "sr_SP", # Serbian - Latin
1658    0x181a: "sr_BA", # Serbian - Bosnia Latin
1659    0x045b: "si_LK", # Sinhala - Sri Lanka
1660    0x046c: "ns_ZA", # Northern Sotho
1661    0x0432: "tn_ZA", # Setswana - Southern Africa
1662    0x041b: "sk_SK", # Slovak
1663    0x0424: "sl_SI", # Slovenian
1664    0x040a: "es_ES", # Spanish - Spain
1665    0x080a: "es_MX", # Spanish - Mexico
1666    0x0c0a: "es_ES", # Spanish - Spain (Modern)
1667    0x100a: "es_GT", # Spanish - Guatemala
1668    0x140a: "es_CR", # Spanish - Costa Rica
1669    0x180a: "es_PA", # Spanish - Panama
1670    0x1c0a: "es_DO", # Spanish - Dominican Republic
1671    0x200a: "es_VE", # Spanish - Venezuela
1672    0x240a: "es_CO", # Spanish - Colombia
1673    0x280a: "es_PE", # Spanish - Peru
1674    0x2c0a: "es_AR", # Spanish - Argentina
1675    0x300a: "es_EC", # Spanish - Ecuador
1676    0x340a: "es_CL", # Spanish - Chile
1677    0x380a: "es_UR", # Spanish - Uruguay
1678    0x3c0a: "es_PY", # Spanish - Paraguay
1679    0x400a: "es_BO", # Spanish - Bolivia
1680    0x440a: "es_SV", # Spanish - El Salvador
1681    0x480a: "es_HN", # Spanish - Honduras
1682    0x4c0a: "es_NI", # Spanish - Nicaragua
1683    0x500a: "es_PR", # Spanish - Puerto Rico
1684    0x540a: "es_US", # Spanish - United States
1685#    0x0430: "", # Sutu - Not supported
1686    0x0441: "sw_KE", # Swahili
1687    0x041d: "sv_SE", # Swedish - Sweden
1688    0x081d: "sv_FI", # Swedish - Finland
1689    0x045a: "syr_SY",# Syriac
1690    0x0428: "tg_TJ", # Tajik - Cyrillic
1691    0x085f: "tmz_DZ",# Tamazight - Latin
1692    0x0449: "ta_IN", # Tamil
1693    0x0444: "tt_RU", # Tatar
1694    0x044a: "te_IN", # Telugu
1695    0x041e: "th_TH", # Thai
1696    0x0851: "bo_BT", # Tibetan - Bhutan
1697    0x0451: "bo_CN", # Tibetan - PRC
1698    0x041f: "tr_TR", # Turkish
1699    0x0442: "tk_TM", # Turkmen - Cyrillic
1700    0x0480: "ug_CN", # Uighur - Arabic
1701    0x0422: "uk_UA", # Ukrainian
1702    0x042e: "wen_DE",# Upper Sorbian - Germany
1703    0x0420: "ur_PK", # Urdu
1704    0x0820: "ur_IN", # Urdu - India
1705    0x0443: "uz_UZ", # Uzbek - Latin
1706    0x0843: "uz_UZ", # Uzbek - Cyrillic
1707    0x042a: "vi_VN", # Vietnamese
1708    0x0452: "cy_GB", # Welsh
1709    0x0488: "wo_SN", # Wolof - Senegal
1710    0x0434: "xh_ZA", # Xhosa - South Africa
1711    0x0485: "sah_RU",# Yakut - Cyrillic
1712    0x0478: "ii_CN", # Yi - PRC
1713    0x046a: "yo_NG", # Yoruba - Nigeria
1714    0x0435: "zu_ZA", # Zulu
1715}
1716
1717def _print_locale():
1718
1719    """ Test function.
1720    """
1721    categories = {}
1722    def _init_categories(categories=categories):
1723        for k,v in globals().items():
1724            if k[:3] == 'LC_':
1725                categories[k] = v
1726    _init_categories()
1727    del categories['LC_ALL']
1728
1729    print('Locale defaults as determined by getdefaultlocale():')
1730    print('-'*72)
1731    lang, enc = getdefaultlocale()
1732    print('Language: ', lang or '(undefined)')
1733    print('Encoding: ', enc or '(undefined)')
1734    print()
1735
1736    print('Locale settings on startup:')
1737    print('-'*72)
1738    for name,category in categories.items():
1739        print(name, '...')
1740        lang, enc = getlocale(category)
1741        print('   Language: ', lang or '(undefined)')
1742        print('   Encoding: ', enc or '(undefined)')
1743        print()
1744
1745    print()
1746    print('Locale settings after calling resetlocale():')
1747    print('-'*72)
1748    resetlocale()
1749    for name,category in categories.items():
1750        print(name, '...')
1751        lang, enc = getlocale(category)
1752        print('   Language: ', lang or '(undefined)')
1753        print('   Encoding: ', enc or '(undefined)')
1754        print()
1755
1756    try:
1757        setlocale(LC_ALL, "")
1758    except:
1759        print('NOTE:')
1760        print('setlocale(LC_ALL, "") does not support the default locale')
1761        print('given in the OS environment variables.')
1762    else:
1763        print()
1764        print('Locale settings after calling setlocale(LC_ALL, ""):')
1765        print('-'*72)
1766        for name,category in categories.items():
1767            print(name, '...')
1768            lang, enc = getlocale(category)
1769            print('   Language: ', lang or '(undefined)')
1770            print('   Encoding: ', enc or '(undefined)')
1771            print()
1772
1773###
1774
1775try:
1776    LC_MESSAGES
1777except NameError:
1778    pass
1779else:
1780    __all__.append("LC_MESSAGES")
1781
1782if __name__=='__main__':
1783    print('Locale aliasing:')
1784    print()
1785    _print_locale()
1786    print()
1787    print('Number formatting:')
1788    print()
1789    _test()
1790