1"""Locale support module. 2 3The module provides low-level access to the C lib's locale APIs and adds high 4level number formatting APIs as well as a locale aliasing engine to complement 5these. 6 7The aliasing engine includes support for many commonly used locale names and 8maps them to values suitable for passing to the C lib's setlocale() function. It 9also includes default encodings for all supported locale names. 10 11""" 12 13import sys 14import encodings 15import encodings.aliases 16import re 17import _collections_abc 18from builtins import str as _builtin_str 19import functools 20 21# Try importing the _locale module. 22# 23# If this fails, fall back on a basic 'C' locale emulation. 24 25# Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before 26# trying the import. So __all__ is also fiddled at the end of the file. 27__all__ = ["getlocale", "getdefaultlocale", "getpreferredencoding", "Error", 28 "setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm", 29 "str", "atof", "atoi", "format", "format_string", "currency", 30 "normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY", 31 "LC_NUMERIC", "LC_ALL", "CHAR_MAX", "getencoding"] 32 33def _strcoll(a,b): 34 """ strcoll(string,string) -> int. 35 Compares two strings according to the locale. 36 """ 37 return (a > b) - (a < b) 38 39def _strxfrm(s): 40 """ strxfrm(string) -> string. 41 Returns a string that behaves for cmp locale-aware. 42 """ 43 return s 44 45try: 46 47 from _locale import * 48 49except ImportError: 50 51 # Locale emulation 52 53 CHAR_MAX = 127 54 LC_ALL = 6 55 LC_COLLATE = 3 56 LC_CTYPE = 0 57 LC_MESSAGES = 5 58 LC_MONETARY = 4 59 LC_NUMERIC = 1 60 LC_TIME = 2 61 Error = ValueError 62 63 def localeconv(): 64 """ localeconv() -> dict. 65 Returns numeric and monetary locale-specific parameters. 66 """ 67 # 'C' locale default values 68 return {'grouping': [127], 69 'currency_symbol': '', 70 'n_sign_posn': 127, 71 'p_cs_precedes': 127, 72 'n_cs_precedes': 127, 73 'mon_grouping': [], 74 'n_sep_by_space': 127, 75 'decimal_point': '.', 76 'negative_sign': '', 77 'positive_sign': '', 78 'p_sep_by_space': 127, 79 'int_curr_symbol': '', 80 'p_sign_posn': 127, 81 'thousands_sep': '', 82 'mon_thousands_sep': '', 83 'frac_digits': 127, 84 'mon_decimal_point': '', 85 'int_frac_digits': 127} 86 87 def setlocale(category, value=None): 88 """ setlocale(integer,string=None) -> string. 89 Activates/queries locale processing. 90 """ 91 if value not in (None, '', 'C'): 92 raise Error('_locale emulation only supports "C" locale') 93 return 'C' 94 95# These may or may not exist in _locale, so be sure to set them. 96if 'strxfrm' not in globals(): 97 strxfrm = _strxfrm 98if 'strcoll' not in globals(): 99 strcoll = _strcoll 100 101 102_localeconv = localeconv 103 104# With this dict, you can override some items of localeconv's return value. 105# This is useful for testing purposes. 106_override_localeconv = {} 107 108@functools.wraps(_localeconv) 109def localeconv(): 110 d = _localeconv() 111 if _override_localeconv: 112 d.update(_override_localeconv) 113 return d 114 115 116### Number formatting APIs 117 118# Author: Martin von Loewis 119# improved by Georg Brandl 120 121# Iterate over grouping intervals 122def _grouping_intervals(grouping): 123 last_interval = None 124 for interval in grouping: 125 # if grouping is -1, we are done 126 if interval == CHAR_MAX: 127 return 128 # 0: re-use last group ad infinitum 129 if interval == 0: 130 if last_interval is None: 131 raise ValueError("invalid grouping") 132 while True: 133 yield last_interval 134 yield interval 135 last_interval = interval 136 137#perform the grouping from right to left 138def _group(s, monetary=False): 139 conv = localeconv() 140 thousands_sep = conv[monetary and 'mon_thousands_sep' or 'thousands_sep'] 141 grouping = conv[monetary and 'mon_grouping' or 'grouping'] 142 if not grouping: 143 return (s, 0) 144 if s[-1] == ' ': 145 stripped = s.rstrip() 146 right_spaces = s[len(stripped):] 147 s = stripped 148 else: 149 right_spaces = '' 150 left_spaces = '' 151 groups = [] 152 for interval in _grouping_intervals(grouping): 153 if not s or s[-1] not in "0123456789": 154 # only non-digit characters remain (sign, spaces) 155 left_spaces = s 156 s = '' 157 break 158 groups.append(s[-interval:]) 159 s = s[:-interval] 160 if s: 161 groups.append(s) 162 groups.reverse() 163 return ( 164 left_spaces + thousands_sep.join(groups) + right_spaces, 165 len(thousands_sep) * (len(groups) - 1) 166 ) 167 168# Strip a given amount of excess padding from the given string 169def _strip_padding(s, amount): 170 lpos = 0 171 while amount and s[lpos] == ' ': 172 lpos += 1 173 amount -= 1 174 rpos = len(s) - 1 175 while amount and s[rpos] == ' ': 176 rpos -= 1 177 amount -= 1 178 return s[lpos:rpos+1] 179 180_percent_re = re.compile(r'%(?:\((?P<key>.*?)\))?' 181 r'(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]') 182 183def _format(percent, value, grouping=False, monetary=False, *additional): 184 if additional: 185 formatted = percent % ((value,) + additional) 186 else: 187 formatted = percent % value 188 if percent[-1] in 'eEfFgGdiu': 189 formatted = _localize(formatted, grouping, monetary) 190 return formatted 191 192# Transform formatted as locale number according to the locale settings 193def _localize(formatted, grouping=False, monetary=False): 194 # floats and decimal ints need special action! 195 if '.' in formatted: 196 seps = 0 197 parts = formatted.split('.') 198 if grouping: 199 parts[0], seps = _group(parts[0], monetary=monetary) 200 decimal_point = localeconv()[monetary and 'mon_decimal_point' 201 or 'decimal_point'] 202 formatted = decimal_point.join(parts) 203 if seps: 204 formatted = _strip_padding(formatted, seps) 205 else: 206 seps = 0 207 if grouping: 208 formatted, seps = _group(formatted, monetary=monetary) 209 if seps: 210 formatted = _strip_padding(formatted, seps) 211 return formatted 212 213def format_string(f, val, grouping=False, monetary=False): 214 """Formats a string in the same way that the % formatting would use, 215 but takes the current locale into account. 216 217 Grouping is applied if the third parameter is true. 218 Conversion uses monetary thousands separator and grouping strings if 219 forth parameter monetary is true.""" 220 percents = list(_percent_re.finditer(f)) 221 new_f = _percent_re.sub('%s', f) 222 223 if isinstance(val, _collections_abc.Mapping): 224 new_val = [] 225 for perc in percents: 226 if perc.group()[-1]=='%': 227 new_val.append('%') 228 else: 229 new_val.append(_format(perc.group(), val, grouping, monetary)) 230 else: 231 if not isinstance(val, tuple): 232 val = (val,) 233 new_val = [] 234 i = 0 235 for perc in percents: 236 if perc.group()[-1]=='%': 237 new_val.append('%') 238 else: 239 starcount = perc.group('modifiers').count('*') 240 new_val.append(_format(perc.group(), 241 val[i], 242 grouping, 243 monetary, 244 *val[i+1:i+1+starcount])) 245 i += (1 + starcount) 246 val = tuple(new_val) 247 248 return new_f % val 249 250def format(percent, value, grouping=False, monetary=False, *additional): 251 """Deprecated, use format_string instead.""" 252 import warnings 253 warnings.warn( 254 "This method will be removed in a future version of Python. " 255 "Use 'locale.format_string()' instead.", 256 DeprecationWarning, stacklevel=2 257 ) 258 259 match = _percent_re.match(percent) 260 if not match or len(match.group())!= len(percent): 261 raise ValueError(("format() must be given exactly one %%char " 262 "format specifier, %s not valid") % repr(percent)) 263 return _format(percent, value, grouping, monetary, *additional) 264 265def currency(val, symbol=True, grouping=False, international=False): 266 """Formats val according to the currency settings 267 in the current locale.""" 268 conv = localeconv() 269 270 # check for illegal values 271 digits = conv[international and 'int_frac_digits' or 'frac_digits'] 272 if digits == 127: 273 raise ValueError("Currency formatting is not possible using " 274 "the 'C' locale.") 275 276 s = _localize(f'{abs(val):.{digits}f}', grouping, monetary=True) 277 # '<' and '>' are markers if the sign must be inserted between symbol and value 278 s = '<' + s + '>' 279 280 if symbol: 281 smb = conv[international and 'int_curr_symbol' or 'currency_symbol'] 282 precedes = conv[val<0 and 'n_cs_precedes' or 'p_cs_precedes'] 283 separated = conv[val<0 and 'n_sep_by_space' or 'p_sep_by_space'] 284 285 if precedes: 286 s = smb + (separated and ' ' or '') + s 287 else: 288 if international and smb[-1] == ' ': 289 smb = smb[:-1] 290 s = s + (separated and ' ' or '') + smb 291 292 sign_pos = conv[val<0 and 'n_sign_posn' or 'p_sign_posn'] 293 sign = conv[val<0 and 'negative_sign' or 'positive_sign'] 294 295 if sign_pos == 0: 296 s = '(' + s + ')' 297 elif sign_pos == 1: 298 s = sign + s 299 elif sign_pos == 2: 300 s = s + sign 301 elif sign_pos == 3: 302 s = s.replace('<', sign) 303 elif sign_pos == 4: 304 s = s.replace('>', sign) 305 else: 306 # the default if nothing specified; 307 # this should be the most fitting sign position 308 s = sign + s 309 310 return s.replace('<', '').replace('>', '') 311 312def str(val): 313 """Convert float to string, taking the locale into account.""" 314 return _format("%.12g", val) 315 316def delocalize(string): 317 "Parses a string as a normalized number according to the locale settings." 318 319 conv = localeconv() 320 321 #First, get rid of the grouping 322 ts = conv['thousands_sep'] 323 if ts: 324 string = string.replace(ts, '') 325 326 #next, replace the decimal point with a dot 327 dd = conv['decimal_point'] 328 if dd: 329 string = string.replace(dd, '.') 330 return string 331 332def localize(string, grouping=False, monetary=False): 333 """Parses a string as locale number according to the locale settings.""" 334 return _localize(string, grouping, monetary) 335 336def atof(string, func=float): 337 "Parses a string as a float according to the locale settings." 338 return func(delocalize(string)) 339 340def atoi(string): 341 "Converts a string to an integer according to the locale settings." 342 return int(delocalize(string)) 343 344def _test(): 345 setlocale(LC_ALL, "") 346 #do grouping 347 s1 = format_string("%d", 123456789,1) 348 print(s1, "is", atoi(s1)) 349 #standard formatting 350 s1 = str(3.14) 351 print(s1, "is", atof(s1)) 352 353### Locale name aliasing engine 354 355# Author: Marc-Andre Lemburg, [email protected] 356# Various tweaks by Fredrik Lundh <[email protected]> 357 358# store away the low-level version of setlocale (it's 359# overridden below) 360_setlocale = setlocale 361 362def _replace_encoding(code, encoding): 363 if '.' in code: 364 langname = code[:code.index('.')] 365 else: 366 langname = code 367 # Convert the encoding to a C lib compatible encoding string 368 norm_encoding = encodings.normalize_encoding(encoding) 369 #print('norm encoding: %r' % norm_encoding) 370 norm_encoding = encodings.aliases.aliases.get(norm_encoding.lower(), 371 norm_encoding) 372 #print('aliased encoding: %r' % norm_encoding) 373 encoding = norm_encoding 374 norm_encoding = norm_encoding.lower() 375 if norm_encoding in locale_encoding_alias: 376 encoding = locale_encoding_alias[norm_encoding] 377 else: 378 norm_encoding = norm_encoding.replace('_', '') 379 norm_encoding = norm_encoding.replace('-', '') 380 if norm_encoding in locale_encoding_alias: 381 encoding = locale_encoding_alias[norm_encoding] 382 #print('found encoding %r' % encoding) 383 return langname + '.' + encoding 384 385def _append_modifier(code, modifier): 386 if modifier == 'euro': 387 if '.' not in code: 388 return code + '.ISO8859-15' 389 _, _, encoding = code.partition('.') 390 if encoding in ('ISO8859-15', 'UTF-8'): 391 return code 392 if encoding == 'ISO8859-1': 393 return _replace_encoding(code, 'ISO8859-15') 394 return code + '@' + modifier 395 396def normalize(localename): 397 398 """ Returns a normalized locale code for the given locale 399 name. 400 401 The returned locale code is formatted for use with 402 setlocale(). 403 404 If normalization fails, the original name is returned 405 unchanged. 406 407 If the given encoding is not known, the function defaults to 408 the default encoding for the locale code just like setlocale() 409 does. 410 411 """ 412 # Normalize the locale name and extract the encoding and modifier 413 code = localename.lower() 414 if ':' in code: 415 # ':' is sometimes used as encoding delimiter. 416 code = code.replace(':', '.') 417 if '@' in code: 418 code, modifier = code.split('@', 1) 419 else: 420 modifier = '' 421 if '.' in code: 422 langname, encoding = code.split('.')[:2] 423 else: 424 langname = code 425 encoding = '' 426 427 # First lookup: fullname (possibly with encoding and modifier) 428 lang_enc = langname 429 if encoding: 430 norm_encoding = encoding.replace('-', '') 431 norm_encoding = norm_encoding.replace('_', '') 432 lang_enc += '.' + norm_encoding 433 lookup_name = lang_enc 434 if modifier: 435 lookup_name += '@' + modifier 436 code = locale_alias.get(lookup_name, None) 437 if code is not None: 438 return code 439 #print('first lookup failed') 440 441 if modifier: 442 # Second try: fullname without modifier (possibly with encoding) 443 code = locale_alias.get(lang_enc, None) 444 if code is not None: 445 #print('lookup without modifier succeeded') 446 if '@' not in code: 447 return _append_modifier(code, modifier) 448 if code.split('@', 1)[1].lower() == modifier: 449 return code 450 #print('second lookup failed') 451 452 if encoding: 453 # Third try: langname (without encoding, possibly with modifier) 454 lookup_name = langname 455 if modifier: 456 lookup_name += '@' + modifier 457 code = locale_alias.get(lookup_name, None) 458 if code is not None: 459 #print('lookup without encoding succeeded') 460 if '@' not in code: 461 return _replace_encoding(code, encoding) 462 code, modifier = code.split('@', 1) 463 return _replace_encoding(code, encoding) + '@' + modifier 464 465 if modifier: 466 # Fourth try: langname (without encoding and modifier) 467 code = locale_alias.get(langname, None) 468 if code is not None: 469 #print('lookup without modifier and encoding succeeded') 470 if '@' not in code: 471 code = _replace_encoding(code, encoding) 472 return _append_modifier(code, modifier) 473 code, defmod = code.split('@', 1) 474 if defmod.lower() == modifier: 475 return _replace_encoding(code, encoding) + '@' + defmod 476 477 return localename 478 479def _parse_localename(localename): 480 481 """ Parses the locale code for localename and returns the 482 result as tuple (language code, encoding). 483 484 The localename is normalized and passed through the locale 485 alias engine. A ValueError is raised in case the locale name 486 cannot be parsed. 487 488 The language code corresponds to RFC 1766. code and encoding 489 can be None in case the values cannot be determined or are 490 unknown to this implementation. 491 492 """ 493 code = normalize(localename) 494 if '@' in code: 495 # Deal with locale modifiers 496 code, modifier = code.split('@', 1) 497 if modifier == 'euro' and '.' not in code: 498 # Assume Latin-9 for @euro locales. This is bogus, 499 # since some systems may use other encodings for these 500 # locales. Also, we ignore other modifiers. 501 return code, 'iso-8859-15' 502 503 if '.' in code: 504 return tuple(code.split('.')[:2]) 505 elif code == 'C': 506 return None, None 507 elif code == 'UTF-8': 508 # On macOS "LC_CTYPE=UTF-8" is a valid locale setting 509 # for getting UTF-8 handling for text. 510 return None, 'UTF-8' 511 raise ValueError('unknown locale: %s' % localename) 512 513def _build_localename(localetuple): 514 515 """ Builds a locale code from the given tuple (language code, 516 encoding). 517 518 No aliasing or normalizing takes place. 519 520 """ 521 try: 522 language, encoding = localetuple 523 524 if language is None: 525 language = 'C' 526 if encoding is None: 527 return language 528 else: 529 return language + '.' + encoding 530 except (TypeError, ValueError): 531 raise TypeError('Locale must be None, a string, or an iterable of ' 532 'two strings -- language code, encoding.') from None 533 534def getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')): 535 536 """ Tries to determine the default locale settings and returns 537 them as tuple (language code, encoding). 538 539 According to POSIX, a program which has not called 540 setlocale(LC_ALL, "") runs using the portable 'C' locale. 541 Calling setlocale(LC_ALL, "") lets it use the default locale as 542 defined by the LANG variable. Since we don't want to interfere 543 with the current locale setting we thus emulate the behavior 544 in the way described above. 545 546 To maintain compatibility with other platforms, not only the 547 LANG variable is tested, but a list of variables given as 548 envvars parameter. The first found to be defined will be 549 used. envvars defaults to the search path used in GNU gettext; 550 it must always contain the variable name 'LANG'. 551 552 Except for the code 'C', the language code corresponds to RFC 553 1766. code and encoding can be None in case the values cannot 554 be determined. 555 556 """ 557 558 import warnings 559 warnings.warn( 560 "Use setlocale(), getencoding() and getlocale() instead", 561 DeprecationWarning, stacklevel=2 562 ) 563 564 try: 565 # check if it's supported by the _locale module 566 import _locale 567 code, encoding = _locale._getdefaultlocale() 568 except (ImportError, AttributeError): 569 pass 570 else: 571 # make sure the code/encoding values are valid 572 if sys.platform == "win32" and code and code[:2] == "0x": 573 # map windows language identifier to language name 574 code = windows_locale.get(int(code, 0)) 575 # ...add other platform-specific processing here, if 576 # necessary... 577 return code, encoding 578 579 # fall back on POSIX behaviour 580 import os 581 lookup = os.environ.get 582 for variable in envvars: 583 localename = lookup(variable,None) 584 if localename: 585 if variable == 'LANGUAGE': 586 localename = localename.split(':')[0] 587 break 588 else: 589 localename = 'C' 590 return _parse_localename(localename) 591 592 593def getlocale(category=LC_CTYPE): 594 595 """ Returns the current setting for the given locale category as 596 tuple (language code, encoding). 597 598 category may be one of the LC_* value except LC_ALL. It 599 defaults to LC_CTYPE. 600 601 Except for the code 'C', the language code corresponds to RFC 602 1766. code and encoding can be None in case the values cannot 603 be determined. 604 605 """ 606 localename = _setlocale(category) 607 if category == LC_ALL and ';' in localename: 608 raise TypeError('category LC_ALL is not supported') 609 return _parse_localename(localename) 610 611def setlocale(category, locale=None): 612 613 """ Set the locale for the given category. The locale can be 614 a string, an iterable of two strings (language code and encoding), 615 or None. 616 617 Iterables are converted to strings using the locale aliasing 618 engine. Locale strings are passed directly to the C lib. 619 620 category may be given as one of the LC_* values. 621 622 """ 623 if locale and not isinstance(locale, _builtin_str): 624 # convert to string 625 locale = normalize(_build_localename(locale)) 626 return _setlocale(category, locale) 627 628def resetlocale(category=LC_ALL): 629 630 """ Sets the locale for category to the default setting. 631 632 The default setting is determined by calling 633 getdefaultlocale(). category defaults to LC_ALL. 634 635 """ 636 import warnings 637 warnings.warn( 638 'Use locale.setlocale(locale.LC_ALL, "") instead', 639 DeprecationWarning, stacklevel=2 640 ) 641 642 with warnings.catch_warnings(): 643 warnings.simplefilter('ignore', category=DeprecationWarning) 644 loc = getdefaultlocale() 645 646 _setlocale(category, _build_localename(loc)) 647 648 649try: 650 from _locale import getencoding 651except ImportError: 652 def getencoding(): 653 if hasattr(sys, 'getandroidapilevel'): 654 # On Android langinfo.h and CODESET are missing, and UTF-8 is 655 # always used in mbstowcs() and wcstombs(). 656 return 'utf-8' 657 encoding = getdefaultlocale()[1] 658 if encoding is None: 659 # LANG not set, default to UTF-8 660 encoding = 'utf-8' 661 return encoding 662 663try: 664 CODESET 665except NameError: 666 def getpreferredencoding(do_setlocale=True): 667 """Return the charset that the user is likely using.""" 668 if sys.flags.warn_default_encoding: 669 import warnings 670 warnings.warn( 671 "UTF-8 Mode affects locale.getpreferredencoding(). Consider locale.getencoding() instead.", 672 EncodingWarning, 2) 673 if sys.flags.utf8_mode: 674 return 'utf-8' 675 return getencoding() 676else: 677 # On Unix, if CODESET is available, use that. 678 def getpreferredencoding(do_setlocale=True): 679 """Return the charset that the user is likely using, 680 according to the system configuration.""" 681 682 if sys.flags.warn_default_encoding: 683 import warnings 684 warnings.warn( 685 "UTF-8 Mode affects locale.getpreferredencoding(). Consider locale.getencoding() instead.", 686 EncodingWarning, 2) 687 if sys.flags.utf8_mode: 688 return 'utf-8' 689 690 if not do_setlocale: 691 return getencoding() 692 693 old_loc = setlocale(LC_CTYPE) 694 try: 695 try: 696 setlocale(LC_CTYPE, "") 697 except Error: 698 pass 699 return getencoding() 700 finally: 701 setlocale(LC_CTYPE, old_loc) 702 703 704### Database 705# 706# The following data was extracted from the locale.alias file which 707# comes with X11 and then hand edited removing the explicit encoding 708# definitions and adding some more aliases. The file is usually 709# available as /usr/lib/X11/locale/locale.alias. 710# 711 712# 713# The local_encoding_alias table maps lowercase encoding alias names 714# to C locale encoding names (case-sensitive). Note that normalize() 715# first looks up the encoding in the encodings.aliases dictionary and 716# then applies this mapping to find the correct C lib name for the 717# encoding. 718# 719locale_encoding_alias = { 720 721 # Mappings for non-standard encoding names used in locale names 722 '437': 'C', 723 'c': 'C', 724 'en': 'ISO8859-1', 725 'jis': 'JIS7', 726 'jis7': 'JIS7', 727 'ajec': 'eucJP', 728 'koi8c': 'KOI8-C', 729 'microsoftcp1251': 'CP1251', 730 'microsoftcp1255': 'CP1255', 731 'microsoftcp1256': 'CP1256', 732 '88591': 'ISO8859-1', 733 '88592': 'ISO8859-2', 734 '88595': 'ISO8859-5', 735 '885915': 'ISO8859-15', 736 737 # Mappings from Python codec names to C lib encoding names 738 'ascii': 'ISO8859-1', 739 'latin_1': 'ISO8859-1', 740 'iso8859_1': 'ISO8859-1', 741 'iso8859_10': 'ISO8859-10', 742 'iso8859_11': 'ISO8859-11', 743 'iso8859_13': 'ISO8859-13', 744 'iso8859_14': 'ISO8859-14', 745 'iso8859_15': 'ISO8859-15', 746 'iso8859_16': 'ISO8859-16', 747 'iso8859_2': 'ISO8859-2', 748 'iso8859_3': 'ISO8859-3', 749 'iso8859_4': 'ISO8859-4', 750 'iso8859_5': 'ISO8859-5', 751 'iso8859_6': 'ISO8859-6', 752 'iso8859_7': 'ISO8859-7', 753 'iso8859_8': 'ISO8859-8', 754 'iso8859_9': 'ISO8859-9', 755 'iso2022_jp': 'JIS7', 756 'shift_jis': 'SJIS', 757 'tactis': 'TACTIS', 758 'euc_jp': 'eucJP', 759 'euc_kr': 'eucKR', 760 'utf_8': 'UTF-8', 761 'koi8_r': 'KOI8-R', 762 'koi8_t': 'KOI8-T', 763 'koi8_u': 'KOI8-U', 764 'kz1048': 'RK1048', 765 'cp1251': 'CP1251', 766 'cp1255': 'CP1255', 767 'cp1256': 'CP1256', 768 769 # XXX This list is still incomplete. If you know more 770 # mappings, please file a bug report. Thanks. 771} 772 773for k, v in sorted(locale_encoding_alias.items()): 774 k = k.replace('_', '') 775 locale_encoding_alias.setdefault(k, v) 776del k, v 777 778# 779# The locale_alias table maps lowercase alias names to C locale names 780# (case-sensitive). Encodings are always separated from the locale 781# name using a dot ('.'); they should only be given in case the 782# language name is needed to interpret the given encoding alias 783# correctly (CJK codes often have this need). 784# 785# Note that the normalize() function which uses this tables 786# removes '_' and '-' characters from the encoding part of the 787# locale name before doing the lookup. This saves a lot of 788# space in the table. 789# 790# MAL 2004-12-10: 791# Updated alias mapping to most recent locale.alias file 792# from X.org distribution using makelocalealias.py. 793# 794# These are the differences compared to the old mapping (Python 2.4 795# and older): 796# 797# updated 'bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251' 798# updated 'bg_bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251' 799# updated 'bulgarian' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251' 800# updated 'cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2' 801# updated 'cz_cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2' 802# updated 'czech' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2' 803# updated 'dutch' -> 'nl_BE.ISO8859-1' to 'nl_NL.ISO8859-1' 804# updated 'et' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15' 805# updated 'et_ee' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15' 806# updated 'fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15' 807# updated 'fi_fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15' 808# updated 'iw' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8' 809# updated 'iw_il' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8' 810# updated 'japanese' -> 'ja_JP.SJIS' to 'ja_JP.eucJP' 811# updated 'lt' -> 'lt_LT.ISO8859-4' to 'lt_LT.ISO8859-13' 812# updated 'lv' -> 'lv_LV.ISO8859-4' to 'lv_LV.ISO8859-13' 813# updated 'sl' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2' 814# updated 'slovene' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2' 815# updated 'th_th' -> 'th_TH.TACTIS' to 'th_TH.ISO8859-11' 816# updated 'zh_cn' -> 'zh_CN.eucCN' to 'zh_CN.gb2312' 817# updated 'zh_cn.big5' -> 'zh_TW.eucTW' to 'zh_TW.big5' 818# updated 'zh_tw' -> 'zh_TW.eucTW' to 'zh_TW.big5' 819# 820# MAL 2008-05-30: 821# Updated alias mapping to most recent locale.alias file 822# from X.org distribution using makelocalealias.py. 823# 824# These are the differences compared to the old mapping (Python 2.5 825# and older): 826# 827# updated 'cs_cs.iso88592' -> 'cs_CZ.ISO8859-2' to 'cs_CS.ISO8859-2' 828# updated 'serbocroatian' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2' 829# updated 'sh' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2' 830# updated 'sh_hr.iso88592' -> 'sh_HR.ISO8859-2' to 'hr_HR.ISO8859-2' 831# updated 'sh_sp' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2' 832# updated 'sh_yu' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2' 833# updated 'sp' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5' 834# updated 'sp_yu' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5' 835# updated 'sr' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' 836# updated 'sr@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' 837# updated 'sr_sp' -> 'sr_SP.ISO8859-2' to 'sr_CS.ISO8859-2' 838# updated 'sr_yu' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' 839# updated 'sr_yu.cp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251' 840# updated 'sr_yu.iso88592' -> 'sr_YU.ISO8859-2' to 'sr_CS.ISO8859-2' 841# updated 'sr_yu.iso88595' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' 842# updated 'sr_yu.iso88595@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' 843# updated 'sr_yu.microsoftcp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251' 844# updated 'sr_yu.utf8@cyrillic' -> 'sr_YU.UTF-8' to 'sr_CS.UTF-8' 845# updated 'sr_yu@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' 846# 847# AP 2010-04-12: 848# Updated alias mapping to most recent locale.alias file 849# from X.org distribution using makelocalealias.py. 850# 851# These are the differences compared to the old mapping (Python 2.6.5 852# and older): 853# 854# updated 'ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8' 855# updated 'ru_ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8' 856# updated 'serbocroatian' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' 857# updated 'sh' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' 858# updated 'sh_yu' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' 859# updated 'sr' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8' 860# updated 'sr@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8' 861# updated 'sr@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' 862# updated 'sr_cs.utf8@latn' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8@latin' 863# updated 'sr_cs@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' 864# updated 'sr_yu' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8@latin' 865# updated 'sr_yu.utf8@cyrillic' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8' 866# updated 'sr_yu@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8' 867# 868# SS 2013-12-20: 869# Updated alias mapping to most recent locale.alias file 870# from X.org distribution using makelocalealias.py. 871# 872# These are the differences compared to the old mapping (Python 3.3.3 873# and older): 874# 875# updated 'a3' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C' 876# updated 'a3_az' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C' 877# updated 'a3_az.koi8c' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C' 878# updated 'cs_cs.iso88592' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2' 879# updated 'hebrew' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8' 880# updated 'hebrew.iso88598' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8' 881# updated 'sd' -> '[email protected]' to 'sd_IN.UTF-8' 882# updated 'sr@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin' 883# updated 'sr_cs' -> 'sr_RS.UTF-8' to 'sr_CS.UTF-8' 884# updated 'sr_cs.utf8@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin' 885# updated 'sr_cs@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin' 886# 887# SS 2014-10-01: 888# Updated alias mapping with glibc 2.19 supported locales. 889# 890# SS 2018-05-05: 891# Updated alias mapping with glibc 2.27 supported locales. 892# 893# These are the differences compared to the old mapping (Python 3.6.5 894# and older): 895# 896# updated 'ca_es@valencia' -> 'ca_ES.ISO8859-15@valencia' to 'ca_ES.UTF-8@valencia' 897# updated 'kk_kz' -> 'kk_KZ.RK1048' to 'kk_KZ.ptcp154' 898# updated 'russian' -> 'ru_RU.ISO8859-5' to 'ru_RU.KOI8-R' 899 900locale_alias = { 901 'a3': 'az_AZ.KOI8-C', 902 'a3_az': 'az_AZ.KOI8-C', 903 'a3_az.koic': 'az_AZ.KOI8-C', 904 'aa_dj': 'aa_DJ.ISO8859-1', 905 'aa_er': 'aa_ER.UTF-8', 906 'aa_et': 'aa_ET.UTF-8', 907 'af': 'af_ZA.ISO8859-1', 908 'af_za': 'af_ZA.ISO8859-1', 909 'agr_pe': 'agr_PE.UTF-8', 910 'ak_gh': 'ak_GH.UTF-8', 911 'am': 'am_ET.UTF-8', 912 'am_et': 'am_ET.UTF-8', 913 'american': 'en_US.ISO8859-1', 914 'an_es': 'an_ES.ISO8859-15', 915 'anp_in': 'anp_IN.UTF-8', 916 'ar': 'ar_AA.ISO8859-6', 917 'ar_aa': 'ar_AA.ISO8859-6', 918 'ar_ae': 'ar_AE.ISO8859-6', 919 'ar_bh': 'ar_BH.ISO8859-6', 920 'ar_dz': 'ar_DZ.ISO8859-6', 921 'ar_eg': 'ar_EG.ISO8859-6', 922 'ar_in': 'ar_IN.UTF-8', 923 'ar_iq': 'ar_IQ.ISO8859-6', 924 'ar_jo': 'ar_JO.ISO8859-6', 925 'ar_kw': 'ar_KW.ISO8859-6', 926 'ar_lb': 'ar_LB.ISO8859-6', 927 'ar_ly': 'ar_LY.ISO8859-6', 928 'ar_ma': 'ar_MA.ISO8859-6', 929 'ar_om': 'ar_OM.ISO8859-6', 930 'ar_qa': 'ar_QA.ISO8859-6', 931 'ar_sa': 'ar_SA.ISO8859-6', 932 'ar_sd': 'ar_SD.ISO8859-6', 933 'ar_ss': 'ar_SS.UTF-8', 934 'ar_sy': 'ar_SY.ISO8859-6', 935 'ar_tn': 'ar_TN.ISO8859-6', 936 'ar_ye': 'ar_YE.ISO8859-6', 937 'arabic': 'ar_AA.ISO8859-6', 938 'as': 'as_IN.UTF-8', 939 'as_in': 'as_IN.UTF-8', 940 'ast_es': 'ast_ES.ISO8859-15', 941 'ayc_pe': 'ayc_PE.UTF-8', 942 'az': 'az_AZ.ISO8859-9E', 943 'az_az': 'az_AZ.ISO8859-9E', 944 'az_az.iso88599e': 'az_AZ.ISO8859-9E', 945 'az_ir': 'az_IR.UTF-8', 946 'be': 'be_BY.CP1251', 947 'be@latin': 'be_BY.UTF-8@latin', 948 'be_bg.utf8': 'bg_BG.UTF-8', 949 'be_by': 'be_BY.CP1251', 950 'be_by@latin': 'be_BY.UTF-8@latin', 951 'bem_zm': 'bem_ZM.UTF-8', 952 'ber_dz': 'ber_DZ.UTF-8', 953 'ber_ma': 'ber_MA.UTF-8', 954 'bg': 'bg_BG.CP1251', 955 'bg_bg': 'bg_BG.CP1251', 956 'bhb_in.utf8': 'bhb_IN.UTF-8', 957 'bho_in': 'bho_IN.UTF-8', 958 'bho_np': 'bho_NP.UTF-8', 959 'bi_vu': 'bi_VU.UTF-8', 960 'bn_bd': 'bn_BD.UTF-8', 961 'bn_in': 'bn_IN.UTF-8', 962 'bo_cn': 'bo_CN.UTF-8', 963 'bo_in': 'bo_IN.UTF-8', 964 'bokmal': 'nb_NO.ISO8859-1', 965 'bokm\xe5l': 'nb_NO.ISO8859-1', 966 'br': 'br_FR.ISO8859-1', 967 'br_fr': 'br_FR.ISO8859-1', 968 'brx_in': 'brx_IN.UTF-8', 969 'bs': 'bs_BA.ISO8859-2', 970 'bs_ba': 'bs_BA.ISO8859-2', 971 'bulgarian': 'bg_BG.CP1251', 972 'byn_er': 'byn_ER.UTF-8', 973 'c': 'C', 974 'c-french': 'fr_CA.ISO8859-1', 975 'c.ascii': 'C', 976 'c.en': 'C', 977 'c.iso88591': 'en_US.ISO8859-1', 978 'c.utf8': 'en_US.UTF-8', 979 'c_c': 'C', 980 'c_c.c': 'C', 981 'ca': 'ca_ES.ISO8859-1', 982 'ca_ad': 'ca_AD.ISO8859-1', 983 'ca_es': 'ca_ES.ISO8859-1', 984 'ca_es@valencia': 'ca_ES.UTF-8@valencia', 985 'ca_fr': 'ca_FR.ISO8859-1', 986 'ca_it': 'ca_IT.ISO8859-1', 987 'catalan': 'ca_ES.ISO8859-1', 988 'ce_ru': 'ce_RU.UTF-8', 989 'cextend': 'en_US.ISO8859-1', 990 'chinese-s': 'zh_CN.eucCN', 991 'chinese-t': 'zh_TW.eucTW', 992 'chr_us': 'chr_US.UTF-8', 993 'ckb_iq': 'ckb_IQ.UTF-8', 994 'cmn_tw': 'cmn_TW.UTF-8', 995 'crh_ua': 'crh_UA.UTF-8', 996 'croatian': 'hr_HR.ISO8859-2', 997 'cs': 'cs_CZ.ISO8859-2', 998 'cs_cs': 'cs_CZ.ISO8859-2', 999 'cs_cz': 'cs_CZ.ISO8859-2', 1000 'csb_pl': 'csb_PL.UTF-8', 1001 'cv_ru': 'cv_RU.UTF-8', 1002 'cy': 'cy_GB.ISO8859-1', 1003 'cy_gb': 'cy_GB.ISO8859-1', 1004 'cz': 'cs_CZ.ISO8859-2', 1005 'cz_cz': 'cs_CZ.ISO8859-2', 1006 'czech': 'cs_CZ.ISO8859-2', 1007 'da': 'da_DK.ISO8859-1', 1008 'da_dk': 'da_DK.ISO8859-1', 1009 'danish': 'da_DK.ISO8859-1', 1010 'dansk': 'da_DK.ISO8859-1', 1011 'de': 'de_DE.ISO8859-1', 1012 'de_at': 'de_AT.ISO8859-1', 1013 'de_be': 'de_BE.ISO8859-1', 1014 'de_ch': 'de_CH.ISO8859-1', 1015 'de_de': 'de_DE.ISO8859-1', 1016 'de_it': 'de_IT.ISO8859-1', 1017 'de_li.utf8': 'de_LI.UTF-8', 1018 'de_lu': 'de_LU.ISO8859-1', 1019 'deutsch': 'de_DE.ISO8859-1', 1020 'doi_in': 'doi_IN.UTF-8', 1021 'dutch': 'nl_NL.ISO8859-1', 1022 'dutch.iso88591': 'nl_BE.ISO8859-1', 1023 'dv_mv': 'dv_MV.UTF-8', 1024 'dz_bt': 'dz_BT.UTF-8', 1025 'ee': 'ee_EE.ISO8859-4', 1026 'ee_ee': 'ee_EE.ISO8859-4', 1027 'eesti': 'et_EE.ISO8859-1', 1028 'el': 'el_GR.ISO8859-7', 1029 'el_cy': 'el_CY.ISO8859-7', 1030 'el_gr': 'el_GR.ISO8859-7', 1031 'el_gr@euro': 'el_GR.ISO8859-15', 1032 'en': 'en_US.ISO8859-1', 1033 'en_ag': 'en_AG.UTF-8', 1034 'en_au': 'en_AU.ISO8859-1', 1035 'en_be': 'en_BE.ISO8859-1', 1036 'en_bw': 'en_BW.ISO8859-1', 1037 'en_ca': 'en_CA.ISO8859-1', 1038 'en_dk': 'en_DK.ISO8859-1', 1039 'en_dl.utf8': 'en_DL.UTF-8', 1040 'en_gb': 'en_GB.ISO8859-1', 1041 'en_hk': 'en_HK.ISO8859-1', 1042 'en_ie': 'en_IE.ISO8859-1', 1043 'en_il': 'en_IL.UTF-8', 1044 'en_in': 'en_IN.ISO8859-1', 1045 'en_ng': 'en_NG.UTF-8', 1046 'en_nz': 'en_NZ.ISO8859-1', 1047 'en_ph': 'en_PH.ISO8859-1', 1048 'en_sc.utf8': 'en_SC.UTF-8', 1049 'en_sg': 'en_SG.ISO8859-1', 1050 'en_uk': 'en_GB.ISO8859-1', 1051 'en_us': 'en_US.ISO8859-1', 1052 'en_us@euro@euro': 'en_US.ISO8859-15', 1053 'en_za': 'en_ZA.ISO8859-1', 1054 'en_zm': 'en_ZM.UTF-8', 1055 'en_zw': 'en_ZW.ISO8859-1', 1056 'en_zw.utf8': 'en_ZS.UTF-8', 1057 'eng_gb': 'en_GB.ISO8859-1', 1058 'english': 'en_EN.ISO8859-1', 1059 'english.iso88591': 'en_US.ISO8859-1', 1060 'english_uk': 'en_GB.ISO8859-1', 1061 'english_united-states': 'en_US.ISO8859-1', 1062 'english_united-states.437': 'C', 1063 'english_us': 'en_US.ISO8859-1', 1064 'eo': 'eo_XX.ISO8859-3', 1065 'eo.utf8': 'eo.UTF-8', 1066 'eo_eo': 'eo_EO.ISO8859-3', 1067 'eo_us.utf8': 'eo_US.UTF-8', 1068 'eo_xx': 'eo_XX.ISO8859-3', 1069 'es': 'es_ES.ISO8859-1', 1070 'es_ar': 'es_AR.ISO8859-1', 1071 'es_bo': 'es_BO.ISO8859-1', 1072 'es_cl': 'es_CL.ISO8859-1', 1073 'es_co': 'es_CO.ISO8859-1', 1074 'es_cr': 'es_CR.ISO8859-1', 1075 'es_cu': 'es_CU.UTF-8', 1076 'es_do': 'es_DO.ISO8859-1', 1077 'es_ec': 'es_EC.ISO8859-1', 1078 'es_es': 'es_ES.ISO8859-1', 1079 'es_gt': 'es_GT.ISO8859-1', 1080 'es_hn': 'es_HN.ISO8859-1', 1081 'es_mx': 'es_MX.ISO8859-1', 1082 'es_ni': 'es_NI.ISO8859-1', 1083 'es_pa': 'es_PA.ISO8859-1', 1084 'es_pe': 'es_PE.ISO8859-1', 1085 'es_pr': 'es_PR.ISO8859-1', 1086 'es_py': 'es_PY.ISO8859-1', 1087 'es_sv': 'es_SV.ISO8859-1', 1088 'es_us': 'es_US.ISO8859-1', 1089 'es_uy': 'es_UY.ISO8859-1', 1090 'es_ve': 'es_VE.ISO8859-1', 1091 'estonian': 'et_EE.ISO8859-1', 1092 'et': 'et_EE.ISO8859-15', 1093 'et_ee': 'et_EE.ISO8859-15', 1094 'eu': 'eu_ES.ISO8859-1', 1095 'eu_es': 'eu_ES.ISO8859-1', 1096 'eu_fr': 'eu_FR.ISO8859-1', 1097 'fa': 'fa_IR.UTF-8', 1098 'fa_ir': 'fa_IR.UTF-8', 1099 'fa_ir.isiri3342': 'fa_IR.ISIRI-3342', 1100 'ff_sn': 'ff_SN.UTF-8', 1101 'fi': 'fi_FI.ISO8859-15', 1102 'fi_fi': 'fi_FI.ISO8859-15', 1103 'fil_ph': 'fil_PH.UTF-8', 1104 'finnish': 'fi_FI.ISO8859-1', 1105 'fo': 'fo_FO.ISO8859-1', 1106 'fo_fo': 'fo_FO.ISO8859-1', 1107 'fr': 'fr_FR.ISO8859-1', 1108 'fr_be': 'fr_BE.ISO8859-1', 1109 'fr_ca': 'fr_CA.ISO8859-1', 1110 'fr_ch': 'fr_CH.ISO8859-1', 1111 'fr_fr': 'fr_FR.ISO8859-1', 1112 'fr_lu': 'fr_LU.ISO8859-1', 1113 'fran\xe7ais': 'fr_FR.ISO8859-1', 1114 'fre_fr': 'fr_FR.ISO8859-1', 1115 'french': 'fr_FR.ISO8859-1', 1116 'french.iso88591': 'fr_CH.ISO8859-1', 1117 'french_france': 'fr_FR.ISO8859-1', 1118 'fur_it': 'fur_IT.UTF-8', 1119 'fy_de': 'fy_DE.UTF-8', 1120 'fy_nl': 'fy_NL.UTF-8', 1121 'ga': 'ga_IE.ISO8859-1', 1122 'ga_ie': 'ga_IE.ISO8859-1', 1123 'galego': 'gl_ES.ISO8859-1', 1124 'galician': 'gl_ES.ISO8859-1', 1125 'gd': 'gd_GB.ISO8859-1', 1126 'gd_gb': 'gd_GB.ISO8859-1', 1127 'ger_de': 'de_DE.ISO8859-1', 1128 'german': 'de_DE.ISO8859-1', 1129 'german.iso88591': 'de_CH.ISO8859-1', 1130 'german_germany': 'de_DE.ISO8859-1', 1131 'gez_er': 'gez_ER.UTF-8', 1132 'gez_et': 'gez_ET.UTF-8', 1133 'gl': 'gl_ES.ISO8859-1', 1134 'gl_es': 'gl_ES.ISO8859-1', 1135 'greek': 'el_GR.ISO8859-7', 1136 'gu_in': 'gu_IN.UTF-8', 1137 'gv': 'gv_GB.ISO8859-1', 1138 'gv_gb': 'gv_GB.ISO8859-1', 1139 'ha_ng': 'ha_NG.UTF-8', 1140 'hak_tw': 'hak_TW.UTF-8', 1141 'he': 'he_IL.ISO8859-8', 1142 'he_il': 'he_IL.ISO8859-8', 1143 'hebrew': 'he_IL.ISO8859-8', 1144 'hi': 'hi_IN.ISCII-DEV', 1145 'hi_in': 'hi_IN.ISCII-DEV', 1146 'hi_in.isciidev': 'hi_IN.ISCII-DEV', 1147 'hif_fj': 'hif_FJ.UTF-8', 1148 'hne': 'hne_IN.UTF-8', 1149 'hne_in': 'hne_IN.UTF-8', 1150 'hr': 'hr_HR.ISO8859-2', 1151 'hr_hr': 'hr_HR.ISO8859-2', 1152 'hrvatski': 'hr_HR.ISO8859-2', 1153 'hsb_de': 'hsb_DE.ISO8859-2', 1154 'ht_ht': 'ht_HT.UTF-8', 1155 'hu': 'hu_HU.ISO8859-2', 1156 'hu_hu': 'hu_HU.ISO8859-2', 1157 'hungarian': 'hu_HU.ISO8859-2', 1158 'hy_am': 'hy_AM.UTF-8', 1159 'hy_am.armscii8': 'hy_AM.ARMSCII_8', 1160 'ia': 'ia.UTF-8', 1161 'ia_fr': 'ia_FR.UTF-8', 1162 'icelandic': 'is_IS.ISO8859-1', 1163 'id': 'id_ID.ISO8859-1', 1164 'id_id': 'id_ID.ISO8859-1', 1165 'ig_ng': 'ig_NG.UTF-8', 1166 'ik_ca': 'ik_CA.UTF-8', 1167 'in': 'id_ID.ISO8859-1', 1168 'in_id': 'id_ID.ISO8859-1', 1169 'is': 'is_IS.ISO8859-1', 1170 'is_is': 'is_IS.ISO8859-1', 1171 'iso-8859-1': 'en_US.ISO8859-1', 1172 'iso-8859-15': 'en_US.ISO8859-15', 1173 'iso8859-1': 'en_US.ISO8859-1', 1174 'iso8859-15': 'en_US.ISO8859-15', 1175 'iso_8859_1': 'en_US.ISO8859-1', 1176 'iso_8859_15': 'en_US.ISO8859-15', 1177 'it': 'it_IT.ISO8859-1', 1178 'it_ch': 'it_CH.ISO8859-1', 1179 'it_it': 'it_IT.ISO8859-1', 1180 'italian': 'it_IT.ISO8859-1', 1181 'iu': 'iu_CA.NUNACOM-8', 1182 'iu_ca': 'iu_CA.NUNACOM-8', 1183 'iu_ca.nunacom8': 'iu_CA.NUNACOM-8', 1184 'iw': 'he_IL.ISO8859-8', 1185 'iw_il': 'he_IL.ISO8859-8', 1186 'iw_il.utf8': 'iw_IL.UTF-8', 1187 'ja': 'ja_JP.eucJP', 1188 'ja_jp': 'ja_JP.eucJP', 1189 'ja_jp.euc': 'ja_JP.eucJP', 1190 'ja_jp.mscode': 'ja_JP.SJIS', 1191 'ja_jp.pck': 'ja_JP.SJIS', 1192 'japan': 'ja_JP.eucJP', 1193 'japanese': 'ja_JP.eucJP', 1194 'japanese-euc': 'ja_JP.eucJP', 1195 'japanese.euc': 'ja_JP.eucJP', 1196 'jp_jp': 'ja_JP.eucJP', 1197 'ka': 'ka_GE.GEORGIAN-ACADEMY', 1198 'ka_ge': 'ka_GE.GEORGIAN-ACADEMY', 1199 'ka_ge.georgianacademy': 'ka_GE.GEORGIAN-ACADEMY', 1200 'ka_ge.georgianps': 'ka_GE.GEORGIAN-PS', 1201 'ka_ge.georgianrs': 'ka_GE.GEORGIAN-ACADEMY', 1202 'kab_dz': 'kab_DZ.UTF-8', 1203 'kk_kz': 'kk_KZ.ptcp154', 1204 'kl': 'kl_GL.ISO8859-1', 1205 'kl_gl': 'kl_GL.ISO8859-1', 1206 'km_kh': 'km_KH.UTF-8', 1207 'kn': 'kn_IN.UTF-8', 1208 'kn_in': 'kn_IN.UTF-8', 1209 'ko': 'ko_KR.eucKR', 1210 'ko_kr': 'ko_KR.eucKR', 1211 'ko_kr.euc': 'ko_KR.eucKR', 1212 'kok_in': 'kok_IN.UTF-8', 1213 'korean': 'ko_KR.eucKR', 1214 'korean.euc': 'ko_KR.eucKR', 1215 'ks': 'ks_IN.UTF-8', 1216 'ks_in': 'ks_IN.UTF-8', 1217 '[email protected]': 'ks_IN.UTF-8@devanagari', 1218 'ku_tr': 'ku_TR.ISO8859-9', 1219 'kw': 'kw_GB.ISO8859-1', 1220 'kw_gb': 'kw_GB.ISO8859-1', 1221 'ky': 'ky_KG.UTF-8', 1222 'ky_kg': 'ky_KG.UTF-8', 1223 'lb_lu': 'lb_LU.UTF-8', 1224 'lg_ug': 'lg_UG.ISO8859-10', 1225 'li_be': 'li_BE.UTF-8', 1226 'li_nl': 'li_NL.UTF-8', 1227 'lij_it': 'lij_IT.UTF-8', 1228 'lithuanian': 'lt_LT.ISO8859-13', 1229 'ln_cd': 'ln_CD.UTF-8', 1230 'lo': 'lo_LA.MULELAO-1', 1231 'lo_la': 'lo_LA.MULELAO-1', 1232 'lo_la.cp1133': 'lo_LA.IBM-CP1133', 1233 'lo_la.ibmcp1133': 'lo_LA.IBM-CP1133', 1234 'lo_la.mulelao1': 'lo_LA.MULELAO-1', 1235 'lt': 'lt_LT.ISO8859-13', 1236 'lt_lt': 'lt_LT.ISO8859-13', 1237 'lv': 'lv_LV.ISO8859-13', 1238 'lv_lv': 'lv_LV.ISO8859-13', 1239 'lzh_tw': 'lzh_TW.UTF-8', 1240 'mag_in': 'mag_IN.UTF-8', 1241 'mai': 'mai_IN.UTF-8', 1242 'mai_in': 'mai_IN.UTF-8', 1243 'mai_np': 'mai_NP.UTF-8', 1244 'mfe_mu': 'mfe_MU.UTF-8', 1245 'mg_mg': 'mg_MG.ISO8859-15', 1246 'mhr_ru': 'mhr_RU.UTF-8', 1247 'mi': 'mi_NZ.ISO8859-1', 1248 'mi_nz': 'mi_NZ.ISO8859-1', 1249 'miq_ni': 'miq_NI.UTF-8', 1250 'mjw_in': 'mjw_IN.UTF-8', 1251 'mk': 'mk_MK.ISO8859-5', 1252 'mk_mk': 'mk_MK.ISO8859-5', 1253 'ml': 'ml_IN.UTF-8', 1254 'ml_in': 'ml_IN.UTF-8', 1255 'mn_mn': 'mn_MN.UTF-8', 1256 'mni_in': 'mni_IN.UTF-8', 1257 'mr': 'mr_IN.UTF-8', 1258 'mr_in': 'mr_IN.UTF-8', 1259 'ms': 'ms_MY.ISO8859-1', 1260 'ms_my': 'ms_MY.ISO8859-1', 1261 'mt': 'mt_MT.ISO8859-3', 1262 'mt_mt': 'mt_MT.ISO8859-3', 1263 'my_mm': 'my_MM.UTF-8', 1264 'nan_tw': 'nan_TW.UTF-8', 1265 'nb': 'nb_NO.ISO8859-1', 1266 'nb_no': 'nb_NO.ISO8859-1', 1267 'nds_de': 'nds_DE.UTF-8', 1268 'nds_nl': 'nds_NL.UTF-8', 1269 'ne_np': 'ne_NP.UTF-8', 1270 'nhn_mx': 'nhn_MX.UTF-8', 1271 'niu_nu': 'niu_NU.UTF-8', 1272 'niu_nz': 'niu_NZ.UTF-8', 1273 'nl': 'nl_NL.ISO8859-1', 1274 'nl_aw': 'nl_AW.UTF-8', 1275 'nl_be': 'nl_BE.ISO8859-1', 1276 'nl_nl': 'nl_NL.ISO8859-1', 1277 'nn': 'nn_NO.ISO8859-1', 1278 'nn_no': 'nn_NO.ISO8859-1', 1279 'no': 'no_NO.ISO8859-1', 1280 'no@nynorsk': 'ny_NO.ISO8859-1', 1281 'no_no': 'no_NO.ISO8859-1', 1282 'no_no.iso88591@bokmal': 'no_NO.ISO8859-1', 1283 'no_no.iso88591@nynorsk': 'no_NO.ISO8859-1', 1284 'norwegian': 'no_NO.ISO8859-1', 1285 'nr': 'nr_ZA.ISO8859-1', 1286 'nr_za': 'nr_ZA.ISO8859-1', 1287 'nso': 'nso_ZA.ISO8859-15', 1288 'nso_za': 'nso_ZA.ISO8859-15', 1289 'ny': 'ny_NO.ISO8859-1', 1290 'ny_no': 'ny_NO.ISO8859-1', 1291 'nynorsk': 'nn_NO.ISO8859-1', 1292 'oc': 'oc_FR.ISO8859-1', 1293 'oc_fr': 'oc_FR.ISO8859-1', 1294 'om_et': 'om_ET.UTF-8', 1295 'om_ke': 'om_KE.ISO8859-1', 1296 'or': 'or_IN.UTF-8', 1297 'or_in': 'or_IN.UTF-8', 1298 'os_ru': 'os_RU.UTF-8', 1299 'pa': 'pa_IN.UTF-8', 1300 'pa_in': 'pa_IN.UTF-8', 1301 'pa_pk': 'pa_PK.UTF-8', 1302 'pap_an': 'pap_AN.UTF-8', 1303 'pap_aw': 'pap_AW.UTF-8', 1304 'pap_cw': 'pap_CW.UTF-8', 1305 'pd': 'pd_US.ISO8859-1', 1306 'pd_de': 'pd_DE.ISO8859-1', 1307 'pd_us': 'pd_US.ISO8859-1', 1308 'ph': 'ph_PH.ISO8859-1', 1309 'ph_ph': 'ph_PH.ISO8859-1', 1310 'pl': 'pl_PL.ISO8859-2', 1311 'pl_pl': 'pl_PL.ISO8859-2', 1312 'polish': 'pl_PL.ISO8859-2', 1313 'portuguese': 'pt_PT.ISO8859-1', 1314 'portuguese_brazil': 'pt_BR.ISO8859-1', 1315 'posix': 'C', 1316 'posix-utf2': 'C', 1317 'pp': 'pp_AN.ISO8859-1', 1318 'pp_an': 'pp_AN.ISO8859-1', 1319 'ps_af': 'ps_AF.UTF-8', 1320 'pt': 'pt_PT.ISO8859-1', 1321 'pt_br': 'pt_BR.ISO8859-1', 1322 'pt_pt': 'pt_PT.ISO8859-1', 1323 'quz_pe': 'quz_PE.UTF-8', 1324 'raj_in': 'raj_IN.UTF-8', 1325 'ro': 'ro_RO.ISO8859-2', 1326 'ro_ro': 'ro_RO.ISO8859-2', 1327 'romanian': 'ro_RO.ISO8859-2', 1328 'ru': 'ru_RU.UTF-8', 1329 'ru_ru': 'ru_RU.UTF-8', 1330 'ru_ua': 'ru_UA.KOI8-U', 1331 'rumanian': 'ro_RO.ISO8859-2', 1332 'russian': 'ru_RU.KOI8-R', 1333 'rw': 'rw_RW.ISO8859-1', 1334 'rw_rw': 'rw_RW.ISO8859-1', 1335 'sa_in': 'sa_IN.UTF-8', 1336 'sat_in': 'sat_IN.UTF-8', 1337 'sc_it': 'sc_IT.UTF-8', 1338 'sd': 'sd_IN.UTF-8', 1339 'sd_in': 'sd_IN.UTF-8', 1340 '[email protected]': 'sd_IN.UTF-8@devanagari', 1341 'sd_pk': 'sd_PK.UTF-8', 1342 'se_no': 'se_NO.UTF-8', 1343 'serbocroatian': 'sr_RS.UTF-8@latin', 1344 'sgs_lt': 'sgs_LT.UTF-8', 1345 'sh': 'sr_RS.UTF-8@latin', 1346 'sh_ba.iso88592@bosnia': 'sr_CS.ISO8859-2', 1347 'sh_hr': 'sh_HR.ISO8859-2', 1348 'sh_hr.iso88592': 'hr_HR.ISO8859-2', 1349 'sh_sp': 'sr_CS.ISO8859-2', 1350 'sh_yu': 'sr_RS.UTF-8@latin', 1351 'shn_mm': 'shn_MM.UTF-8', 1352 'shs_ca': 'shs_CA.UTF-8', 1353 'si': 'si_LK.UTF-8', 1354 'si_lk': 'si_LK.UTF-8', 1355 'sid_et': 'sid_ET.UTF-8', 1356 'sinhala': 'si_LK.UTF-8', 1357 'sk': 'sk_SK.ISO8859-2', 1358 'sk_sk': 'sk_SK.ISO8859-2', 1359 'sl': 'sl_SI.ISO8859-2', 1360 'sl_cs': 'sl_CS.ISO8859-2', 1361 'sl_si': 'sl_SI.ISO8859-2', 1362 'slovak': 'sk_SK.ISO8859-2', 1363 'slovene': 'sl_SI.ISO8859-2', 1364 'slovenian': 'sl_SI.ISO8859-2', 1365 'sm_ws': 'sm_WS.UTF-8', 1366 'so_dj': 'so_DJ.ISO8859-1', 1367 'so_et': 'so_ET.UTF-8', 1368 'so_ke': 'so_KE.ISO8859-1', 1369 'so_so': 'so_SO.ISO8859-1', 1370 'sp': 'sr_CS.ISO8859-5', 1371 'sp_yu': 'sr_CS.ISO8859-5', 1372 'spanish': 'es_ES.ISO8859-1', 1373 'spanish_spain': 'es_ES.ISO8859-1', 1374 'sq': 'sq_AL.ISO8859-2', 1375 'sq_al': 'sq_AL.ISO8859-2', 1376 'sq_mk': 'sq_MK.UTF-8', 1377 'sr': 'sr_RS.UTF-8', 1378 'sr@cyrillic': 'sr_RS.UTF-8', 1379 'sr@latn': 'sr_CS.UTF-8@latin', 1380 'sr_cs': 'sr_CS.UTF-8', 1381 'sr_cs.iso88592@latn': 'sr_CS.ISO8859-2', 1382 'sr_cs@latn': 'sr_CS.UTF-8@latin', 1383 'sr_me': 'sr_ME.UTF-8', 1384 'sr_rs': 'sr_RS.UTF-8', 1385 'sr_rs@latn': 'sr_RS.UTF-8@latin', 1386 'sr_sp': 'sr_CS.ISO8859-2', 1387 'sr_yu': 'sr_RS.UTF-8@latin', 1388 'sr_yu.cp1251@cyrillic': 'sr_CS.CP1251', 1389 'sr_yu.iso88592': 'sr_CS.ISO8859-2', 1390 'sr_yu.iso88595': 'sr_CS.ISO8859-5', 1391 'sr_yu.iso88595@cyrillic': 'sr_CS.ISO8859-5', 1392 'sr_yu.microsoftcp1251@cyrillic': 'sr_CS.CP1251', 1393 'sr_yu.utf8': 'sr_RS.UTF-8', 1394 'sr_yu.utf8@cyrillic': 'sr_RS.UTF-8', 1395 'sr_yu@cyrillic': 'sr_RS.UTF-8', 1396 'ss': 'ss_ZA.ISO8859-1', 1397 'ss_za': 'ss_ZA.ISO8859-1', 1398 'st': 'st_ZA.ISO8859-1', 1399 'st_za': 'st_ZA.ISO8859-1', 1400 'sv': 'sv_SE.ISO8859-1', 1401 'sv_fi': 'sv_FI.ISO8859-1', 1402 'sv_se': 'sv_SE.ISO8859-1', 1403 'sw_ke': 'sw_KE.UTF-8', 1404 'sw_tz': 'sw_TZ.UTF-8', 1405 'swedish': 'sv_SE.ISO8859-1', 1406 'szl_pl': 'szl_PL.UTF-8', 1407 'ta': 'ta_IN.TSCII-0', 1408 'ta_in': 'ta_IN.TSCII-0', 1409 'ta_in.tscii': 'ta_IN.TSCII-0', 1410 'ta_in.tscii0': 'ta_IN.TSCII-0', 1411 'ta_lk': 'ta_LK.UTF-8', 1412 'tcy_in.utf8': 'tcy_IN.UTF-8', 1413 'te': 'te_IN.UTF-8', 1414 'te_in': 'te_IN.UTF-8', 1415 'tg': 'tg_TJ.KOI8-C', 1416 'tg_tj': 'tg_TJ.KOI8-C', 1417 'th': 'th_TH.ISO8859-11', 1418 'th_th': 'th_TH.ISO8859-11', 1419 'th_th.tactis': 'th_TH.TIS620', 1420 'th_th.tis620': 'th_TH.TIS620', 1421 'thai': 'th_TH.ISO8859-11', 1422 'the_np': 'the_NP.UTF-8', 1423 'ti_er': 'ti_ER.UTF-8', 1424 'ti_et': 'ti_ET.UTF-8', 1425 'tig_er': 'tig_ER.UTF-8', 1426 'tk_tm': 'tk_TM.UTF-8', 1427 'tl': 'tl_PH.ISO8859-1', 1428 'tl_ph': 'tl_PH.ISO8859-1', 1429 'tn': 'tn_ZA.ISO8859-15', 1430 'tn_za': 'tn_ZA.ISO8859-15', 1431 'to_to': 'to_TO.UTF-8', 1432 'tpi_pg': 'tpi_PG.UTF-8', 1433 'tr': 'tr_TR.ISO8859-9', 1434 'tr_cy': 'tr_CY.ISO8859-9', 1435 'tr_tr': 'tr_TR.ISO8859-9', 1436 'ts': 'ts_ZA.ISO8859-1', 1437 'ts_za': 'ts_ZA.ISO8859-1', 1438 'tt': 'tt_RU.TATAR-CYR', 1439 'tt_ru': 'tt_RU.TATAR-CYR', 1440 'tt_ru.tatarcyr': 'tt_RU.TATAR-CYR', 1441 'tt_ru@iqtelif': 'tt_RU.UTF-8@iqtelif', 1442 'turkish': 'tr_TR.ISO8859-9', 1443 'ug_cn': 'ug_CN.UTF-8', 1444 'uk': 'uk_UA.KOI8-U', 1445 'uk_ua': 'uk_UA.KOI8-U', 1446 'univ': 'en_US.utf', 1447 'universal': 'en_US.utf', 1448 'universal.utf8@ucs4': 'en_US.UTF-8', 1449 'unm_us': 'unm_US.UTF-8', 1450 'ur': 'ur_PK.CP1256', 1451 'ur_in': 'ur_IN.UTF-8', 1452 'ur_pk': 'ur_PK.CP1256', 1453 'uz': 'uz_UZ.UTF-8', 1454 'uz_uz': 'uz_UZ.UTF-8', 1455 'uz_uz@cyrillic': 'uz_UZ.UTF-8', 1456 've': 've_ZA.UTF-8', 1457 've_za': 've_ZA.UTF-8', 1458 'vi': 'vi_VN.TCVN', 1459 'vi_vn': 'vi_VN.TCVN', 1460 'vi_vn.tcvn': 'vi_VN.TCVN', 1461 'vi_vn.tcvn5712': 'vi_VN.TCVN', 1462 'vi_vn.viscii': 'vi_VN.VISCII', 1463 'vi_vn.viscii111': 'vi_VN.VISCII', 1464 'wa': 'wa_BE.ISO8859-1', 1465 'wa_be': 'wa_BE.ISO8859-1', 1466 'wae_ch': 'wae_CH.UTF-8', 1467 'wal_et': 'wal_ET.UTF-8', 1468 'wo_sn': 'wo_SN.UTF-8', 1469 'xh': 'xh_ZA.ISO8859-1', 1470 'xh_za': 'xh_ZA.ISO8859-1', 1471 'yi': 'yi_US.CP1255', 1472 'yi_us': 'yi_US.CP1255', 1473 'yo_ng': 'yo_NG.UTF-8', 1474 'yue_hk': 'yue_HK.UTF-8', 1475 'yuw_pg': 'yuw_PG.UTF-8', 1476 'zh': 'zh_CN.eucCN', 1477 'zh_cn': 'zh_CN.gb2312', 1478 'zh_cn.big5': 'zh_TW.big5', 1479 'zh_cn.euc': 'zh_CN.eucCN', 1480 'zh_hk': 'zh_HK.big5hkscs', 1481 'zh_hk.big5hk': 'zh_HK.big5hkscs', 1482 'zh_sg': 'zh_SG.GB2312', 1483 'zh_sg.gbk': 'zh_SG.GBK', 1484 'zh_tw': 'zh_TW.big5', 1485 'zh_tw.euc': 'zh_TW.eucTW', 1486 'zh_tw.euctw': 'zh_TW.eucTW', 1487 'zu': 'zu_ZA.ISO8859-1', 1488 'zu_za': 'zu_ZA.ISO8859-1', 1489} 1490 1491# 1492# This maps Windows language identifiers to locale strings. 1493# 1494# This list has been updated from 1495# http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp 1496# to include every locale up to Windows Vista. 1497# 1498# NOTE: this mapping is incomplete. If your language is missing, please 1499# submit a bug report to the Python bug tracker at http://bugs.python.org/ 1500# Make sure you include the missing language identifier and the suggested 1501# locale code. 1502# 1503 1504windows_locale = { 1505 0x0436: "af_ZA", # Afrikaans 1506 0x041c: "sq_AL", # Albanian 1507 0x0484: "gsw_FR",# Alsatian - France 1508 0x045e: "am_ET", # Amharic - Ethiopia 1509 0x0401: "ar_SA", # Arabic - Saudi Arabia 1510 0x0801: "ar_IQ", # Arabic - Iraq 1511 0x0c01: "ar_EG", # Arabic - Egypt 1512 0x1001: "ar_LY", # Arabic - Libya 1513 0x1401: "ar_DZ", # Arabic - Algeria 1514 0x1801: "ar_MA", # Arabic - Morocco 1515 0x1c01: "ar_TN", # Arabic - Tunisia 1516 0x2001: "ar_OM", # Arabic - Oman 1517 0x2401: "ar_YE", # Arabic - Yemen 1518 0x2801: "ar_SY", # Arabic - Syria 1519 0x2c01: "ar_JO", # Arabic - Jordan 1520 0x3001: "ar_LB", # Arabic - Lebanon 1521 0x3401: "ar_KW", # Arabic - Kuwait 1522 0x3801: "ar_AE", # Arabic - United Arab Emirates 1523 0x3c01: "ar_BH", # Arabic - Bahrain 1524 0x4001: "ar_QA", # Arabic - Qatar 1525 0x042b: "hy_AM", # Armenian 1526 0x044d: "as_IN", # Assamese - India 1527 0x042c: "az_AZ", # Azeri - Latin 1528 0x082c: "az_AZ", # Azeri - Cyrillic 1529 0x046d: "ba_RU", # Bashkir 1530 0x042d: "eu_ES", # Basque - Russia 1531 0x0423: "be_BY", # Belarusian 1532 0x0445: "bn_IN", # Begali 1533 0x201a: "bs_BA", # Bosnian - Cyrillic 1534 0x141a: "bs_BA", # Bosnian - Latin 1535 0x047e: "br_FR", # Breton - France 1536 0x0402: "bg_BG", # Bulgarian 1537# 0x0455: "my_MM", # Burmese - Not supported 1538 0x0403: "ca_ES", # Catalan 1539 0x0004: "zh_CHS",# Chinese - Simplified 1540 0x0404: "zh_TW", # Chinese - Taiwan 1541 0x0804: "zh_CN", # Chinese - PRC 1542 0x0c04: "zh_HK", # Chinese - Hong Kong S.A.R. 1543 0x1004: "zh_SG", # Chinese - Singapore 1544 0x1404: "zh_MO", # Chinese - Macao S.A.R. 1545 0x7c04: "zh_CHT",# Chinese - Traditional 1546 0x0483: "co_FR", # Corsican - France 1547 0x041a: "hr_HR", # Croatian 1548 0x101a: "hr_BA", # Croatian - Bosnia 1549 0x0405: "cs_CZ", # Czech 1550 0x0406: "da_DK", # Danish 1551 0x048c: "gbz_AF",# Dari - Afghanistan 1552 0x0465: "div_MV",# Divehi - Maldives 1553 0x0413: "nl_NL", # Dutch - The Netherlands 1554 0x0813: "nl_BE", # Dutch - Belgium 1555 0x0409: "en_US", # English - United States 1556 0x0809: "en_GB", # English - United Kingdom 1557 0x0c09: "en_AU", # English - Australia 1558 0x1009: "en_CA", # English - Canada 1559 0x1409: "en_NZ", # English - New Zealand 1560 0x1809: "en_IE", # English - Ireland 1561 0x1c09: "en_ZA", # English - South Africa 1562 0x2009: "en_JA", # English - Jamaica 1563 0x2409: "en_CB", # English - Caribbean 1564 0x2809: "en_BZ", # English - Belize 1565 0x2c09: "en_TT", # English - Trinidad 1566 0x3009: "en_ZW", # English - Zimbabwe 1567 0x3409: "en_PH", # English - Philippines 1568 0x4009: "en_IN", # English - India 1569 0x4409: "en_MY", # English - Malaysia 1570 0x4809: "en_IN", # English - Singapore 1571 0x0425: "et_EE", # Estonian 1572 0x0438: "fo_FO", # Faroese 1573 0x0464: "fil_PH",# Filipino 1574 0x040b: "fi_FI", # Finnish 1575 0x040c: "fr_FR", # French - France 1576 0x080c: "fr_BE", # French - Belgium 1577 0x0c0c: "fr_CA", # French - Canada 1578 0x100c: "fr_CH", # French - Switzerland 1579 0x140c: "fr_LU", # French - Luxembourg 1580 0x180c: "fr_MC", # French - Monaco 1581 0x0462: "fy_NL", # Frisian - Netherlands 1582 0x0456: "gl_ES", # Galician 1583 0x0437: "ka_GE", # Georgian 1584 0x0407: "de_DE", # German - Germany 1585 0x0807: "de_CH", # German - Switzerland 1586 0x0c07: "de_AT", # German - Austria 1587 0x1007: "de_LU", # German - Luxembourg 1588 0x1407: "de_LI", # German - Liechtenstein 1589 0x0408: "el_GR", # Greek 1590 0x046f: "kl_GL", # Greenlandic - Greenland 1591 0x0447: "gu_IN", # Gujarati 1592 0x0468: "ha_NG", # Hausa - Latin 1593 0x040d: "he_IL", # Hebrew 1594 0x0439: "hi_IN", # Hindi 1595 0x040e: "hu_HU", # Hungarian 1596 0x040f: "is_IS", # Icelandic 1597 0x0421: "id_ID", # Indonesian 1598 0x045d: "iu_CA", # Inuktitut - Syllabics 1599 0x085d: "iu_CA", # Inuktitut - Latin 1600 0x083c: "ga_IE", # Irish - Ireland 1601 0x0410: "it_IT", # Italian - Italy 1602 0x0810: "it_CH", # Italian - Switzerland 1603 0x0411: "ja_JP", # Japanese 1604 0x044b: "kn_IN", # Kannada - India 1605 0x043f: "kk_KZ", # Kazakh 1606 0x0453: "kh_KH", # Khmer - Cambodia 1607 0x0486: "qut_GT",# K'iche - Guatemala 1608 0x0487: "rw_RW", # Kinyarwanda - Rwanda 1609 0x0457: "kok_IN",# Konkani 1610 0x0412: "ko_KR", # Korean 1611 0x0440: "ky_KG", # Kyrgyz 1612 0x0454: "lo_LA", # Lao - Lao PDR 1613 0x0426: "lv_LV", # Latvian 1614 0x0427: "lt_LT", # Lithuanian 1615 0x082e: "dsb_DE",# Lower Sorbian - Germany 1616 0x046e: "lb_LU", # Luxembourgish 1617 0x042f: "mk_MK", # FYROM Macedonian 1618 0x043e: "ms_MY", # Malay - Malaysia 1619 0x083e: "ms_BN", # Malay - Brunei Darussalam 1620 0x044c: "ml_IN", # Malayalam - India 1621 0x043a: "mt_MT", # Maltese 1622 0x0481: "mi_NZ", # Maori 1623 0x047a: "arn_CL",# Mapudungun 1624 0x044e: "mr_IN", # Marathi 1625 0x047c: "moh_CA",# Mohawk - Canada 1626 0x0450: "mn_MN", # Mongolian - Cyrillic 1627 0x0850: "mn_CN", # Mongolian - PRC 1628 0x0461: "ne_NP", # Nepali 1629 0x0414: "nb_NO", # Norwegian - Bokmal 1630 0x0814: "nn_NO", # Norwegian - Nynorsk 1631 0x0482: "oc_FR", # Occitan - France 1632 0x0448: "or_IN", # Oriya - India 1633 0x0463: "ps_AF", # Pashto - Afghanistan 1634 0x0429: "fa_IR", # Persian 1635 0x0415: "pl_PL", # Polish 1636 0x0416: "pt_BR", # Portuguese - Brazil 1637 0x0816: "pt_PT", # Portuguese - Portugal 1638 0x0446: "pa_IN", # Punjabi 1639 0x046b: "quz_BO",# Quechua (Bolivia) 1640 0x086b: "quz_EC",# Quechua (Ecuador) 1641 0x0c6b: "quz_PE",# Quechua (Peru) 1642 0x0418: "ro_RO", # Romanian - Romania 1643 0x0417: "rm_CH", # Romansh 1644 0x0419: "ru_RU", # Russian 1645 0x243b: "smn_FI",# Sami Finland 1646 0x103b: "smj_NO",# Sami Norway 1647 0x143b: "smj_SE",# Sami Sweden 1648 0x043b: "se_NO", # Sami Northern Norway 1649 0x083b: "se_SE", # Sami Northern Sweden 1650 0x0c3b: "se_FI", # Sami Northern Finland 1651 0x203b: "sms_FI",# Sami Skolt 1652 0x183b: "sma_NO",# Sami Southern Norway 1653 0x1c3b: "sma_SE",# Sami Southern Sweden 1654 0x044f: "sa_IN", # Sanskrit 1655 0x0c1a: "sr_SP", # Serbian - Cyrillic 1656 0x1c1a: "sr_BA", # Serbian - Bosnia Cyrillic 1657 0x081a: "sr_SP", # Serbian - Latin 1658 0x181a: "sr_BA", # Serbian - Bosnia Latin 1659 0x045b: "si_LK", # Sinhala - Sri Lanka 1660 0x046c: "ns_ZA", # Northern Sotho 1661 0x0432: "tn_ZA", # Setswana - Southern Africa 1662 0x041b: "sk_SK", # Slovak 1663 0x0424: "sl_SI", # Slovenian 1664 0x040a: "es_ES", # Spanish - Spain 1665 0x080a: "es_MX", # Spanish - Mexico 1666 0x0c0a: "es_ES", # Spanish - Spain (Modern) 1667 0x100a: "es_GT", # Spanish - Guatemala 1668 0x140a: "es_CR", # Spanish - Costa Rica 1669 0x180a: "es_PA", # Spanish - Panama 1670 0x1c0a: "es_DO", # Spanish - Dominican Republic 1671 0x200a: "es_VE", # Spanish - Venezuela 1672 0x240a: "es_CO", # Spanish - Colombia 1673 0x280a: "es_PE", # Spanish - Peru 1674 0x2c0a: "es_AR", # Spanish - Argentina 1675 0x300a: "es_EC", # Spanish - Ecuador 1676 0x340a: "es_CL", # Spanish - Chile 1677 0x380a: "es_UR", # Spanish - Uruguay 1678 0x3c0a: "es_PY", # Spanish - Paraguay 1679 0x400a: "es_BO", # Spanish - Bolivia 1680 0x440a: "es_SV", # Spanish - El Salvador 1681 0x480a: "es_HN", # Spanish - Honduras 1682 0x4c0a: "es_NI", # Spanish - Nicaragua 1683 0x500a: "es_PR", # Spanish - Puerto Rico 1684 0x540a: "es_US", # Spanish - United States 1685# 0x0430: "", # Sutu - Not supported 1686 0x0441: "sw_KE", # Swahili 1687 0x041d: "sv_SE", # Swedish - Sweden 1688 0x081d: "sv_FI", # Swedish - Finland 1689 0x045a: "syr_SY",# Syriac 1690 0x0428: "tg_TJ", # Tajik - Cyrillic 1691 0x085f: "tmz_DZ",# Tamazight - Latin 1692 0x0449: "ta_IN", # Tamil 1693 0x0444: "tt_RU", # Tatar 1694 0x044a: "te_IN", # Telugu 1695 0x041e: "th_TH", # Thai 1696 0x0851: "bo_BT", # Tibetan - Bhutan 1697 0x0451: "bo_CN", # Tibetan - PRC 1698 0x041f: "tr_TR", # Turkish 1699 0x0442: "tk_TM", # Turkmen - Cyrillic 1700 0x0480: "ug_CN", # Uighur - Arabic 1701 0x0422: "uk_UA", # Ukrainian 1702 0x042e: "wen_DE",# Upper Sorbian - Germany 1703 0x0420: "ur_PK", # Urdu 1704 0x0820: "ur_IN", # Urdu - India 1705 0x0443: "uz_UZ", # Uzbek - Latin 1706 0x0843: "uz_UZ", # Uzbek - Cyrillic 1707 0x042a: "vi_VN", # Vietnamese 1708 0x0452: "cy_GB", # Welsh 1709 0x0488: "wo_SN", # Wolof - Senegal 1710 0x0434: "xh_ZA", # Xhosa - South Africa 1711 0x0485: "sah_RU",# Yakut - Cyrillic 1712 0x0478: "ii_CN", # Yi - PRC 1713 0x046a: "yo_NG", # Yoruba - Nigeria 1714 0x0435: "zu_ZA", # Zulu 1715} 1716 1717def _print_locale(): 1718 1719 """ Test function. 1720 """ 1721 categories = {} 1722 def _init_categories(categories=categories): 1723 for k,v in globals().items(): 1724 if k[:3] == 'LC_': 1725 categories[k] = v 1726 _init_categories() 1727 del categories['LC_ALL'] 1728 1729 print('Locale defaults as determined by getdefaultlocale():') 1730 print('-'*72) 1731 lang, enc = getdefaultlocale() 1732 print('Language: ', lang or '(undefined)') 1733 print('Encoding: ', enc or '(undefined)') 1734 print() 1735 1736 print('Locale settings on startup:') 1737 print('-'*72) 1738 for name,category in categories.items(): 1739 print(name, '...') 1740 lang, enc = getlocale(category) 1741 print(' Language: ', lang or '(undefined)') 1742 print(' Encoding: ', enc or '(undefined)') 1743 print() 1744 1745 print() 1746 print('Locale settings after calling resetlocale():') 1747 print('-'*72) 1748 resetlocale() 1749 for name,category in categories.items(): 1750 print(name, '...') 1751 lang, enc = getlocale(category) 1752 print(' Language: ', lang or '(undefined)') 1753 print(' Encoding: ', enc or '(undefined)') 1754 print() 1755 1756 try: 1757 setlocale(LC_ALL, "") 1758 except: 1759 print('NOTE:') 1760 print('setlocale(LC_ALL, "") does not support the default locale') 1761 print('given in the OS environment variables.') 1762 else: 1763 print() 1764 print('Locale settings after calling setlocale(LC_ALL, ""):') 1765 print('-'*72) 1766 for name,category in categories.items(): 1767 print(name, '...') 1768 lang, enc = getlocale(category) 1769 print(' Language: ', lang or '(undefined)') 1770 print(' Encoding: ', enc or '(undefined)') 1771 print() 1772 1773### 1774 1775try: 1776 LC_MESSAGES 1777except NameError: 1778 pass 1779else: 1780 __all__.append("LC_MESSAGES") 1781 1782if __name__=='__main__': 1783 print('Locale aliasing:') 1784 print() 1785 _print_locale() 1786 print() 1787 print('Number formatting:') 1788 print() 1789 _test() 1790