3 The module provides low-level access to the C lib's locale APIs
4 and adds high level number formatting APIs as well as a locale
5 aliasing engine to complement these.
7 The aliasing engine includes support for many commonly used locale
8 names and maps them to values suitable for passing to the C lib's
9 setlocale() function. It also includes default encodings for all
10 supported locale names.
22 __all__ = [
"setlocale",
"Error",
"localeconv",
"strcoll",
"strxfrm",
23 "format",
"str",
"atof",
"atoi",
"LC_CTYPE",
"LC_COLLATE",
24 "LC_TIME",
"LC_MONETARY",
"LC_NUMERIC",
"LC_ALL",
"CHAR_MAX"]
45 """ localeconv() -> dict.
46 Returns numeric and monetary locale-specific parameters.
49 return {
'grouping': [127],
50 'currency_symbol':
'',
55 'n_sep_by_space': 127,
59 'p_sep_by_space': 127,
60 'int_curr_symbol':
'',
63 'mon_thousands_sep':
'',
65 'mon_decimal_point':
'',
66 'int_frac_digits': 127}
69 """ setlocale(integer,string=None) -> string.
70 Activates/queries locale processing.
72 if value
is not None and value !=
'C':
73 raise Error,
'_locale emulation only supports "C" locale'
77 """ strcoll(string,string) -> int.
78 Compares two strings according to the locale.
83 """ strxfrm(string) -> string.
84 Returns a string that behaves for cmp locale-aware.
95 grouping=conv[
'grouping']
96 if not grouping:
return (s, 0)
104 while s
and grouping:
106 if grouping[0]==CHAR_MAX:
112 grouping=grouping[1:]
114 result=s[-group:]+conv[
'thousands_sep']+result
119 if s
and s[-1]
not in "0123456789":
121 return s+result+spaces,seps
125 result=s+conv[
'thousands_sep']+result
127 return result+spaces,seps
130 """Formats a value in the same way that the % formatting would use,
131 but takes the current locale into account.
132 Grouping is applied if the third parameter is true."""
134 fields = result.split(
".")
137 fields[0],seps=_group(fields[0])
139 result = fields[0]+
localeconv()[
'decimal_point']+fields[1]
143 raise Error,
"Too many decimal points in result string"
151 sp = result.find(
" ")
153 result = result[:sp]+result[sp+1:]
159 """Convert float to integer, taking the locale into account."""
160 return format(
"%.12g",val)
163 "Parses a string as a float according to the locale settings."
178 "Converts a string to an integer according to the locale settings."
179 return atof(str, int)
184 s1=
format(
"%d", 123456789,1)
185 print s1,
"is",
atoi(s1)
188 print s1,
"is",
atof(s1)
197 _setlocale = setlocale
201 """ Returns a normalized locale code for the given locale
204 The returned locale code is formatted for use with
207 If normalization fails, the original name is returned
210 If the given encoding is not known, the function defaults to
211 the default encoding for the locale code just like setlocale()
216 fullname = localename.lower()
219 fullname = fullname.replace(
':',
'.')
221 langname, encoding = fullname.split(
'.')[:2]
222 fullname = langname +
'.' + encoding
228 code = locale_alias.get(fullname,
None)
233 code = locale_alias.get(langname,
None)
236 langname, defenc = code.split(
'.')
241 encoding = encoding_alias.get(encoding, encoding)
245 return langname +
'.' + encoding
252 def _parse_localename(localename):
254 """ Parses the locale code for localename and returns the
255 result as tuple (language code, encoding).
257 The localename is normalized and passed through the locale
258 alias engine. A ValueError is raised in case the locale name
261 The language code corresponds to RFC 1766. code and encoding
262 can be None in case the values cannot be determined or are
263 unknown to this implementation.
268 return code.split(
'.')[:2]
271 raise ValueError,
'unknown locale: %s' % localename
273 def _build_localename(localetuple):
275 """ Builds a locale code from the given tuple (language code,
278 No aliasing or normalizing takes place.
281 language, encoding = localetuple
287 return language +
'.' + encoding
291 """ Tries to determine the default locale settings and returns
292 them as tuple (language code, encoding).
294 According to POSIX, a program which has not called
295 setlocale(LC_ALL, "") runs using the portable 'C' locale.
296 Calling setlocale(LC_ALL, "") lets it use the default locale as
297 defined by the LANG variable. Since we don't want to interfere
298 with the current locale setting we thus emulate the behavior
299 in the way described above.
301 To maintain compatibility with other platforms, not only the
302 LANG variable is tested, but a list of variables given as
303 envvars parameter. The first found to be defined will be
304 used. envvars defaults to the search path used in GNU gettext;
305 it must always contain the variable name 'LANG'.
307 Except for the code 'C', the language code corresponds to RFC
308 1766. code and encoding can be None in case the values cannot
316 code, encoding = _locale._getdefaultlocale()
317 except (ImportError, AttributeError):
321 if sys.platform ==
"win32" and code
and code[:2] ==
"0x":
323 code = windows_locale.get(int(code, 0))
326 return code, encoding
330 lookup = os.environ.get
331 for variable
in envvars:
332 localename =
lookup(variable,
None)
333 if localename
is not None:
337 return _parse_localename(localename)
342 """ Returns the current setting for the given locale category as
343 tuple (language code, encoding).
345 category may be one of the LC_* value except LC_ALL. It
346 defaults to LC_CTYPE.
348 Except for the code 'C', the language code corresponds to RFC
349 1766. code and encoding can be None in case the values cannot
354 if category == LC_ALL
and ';' in localename:
355 raise TypeError,
'category LC_ALL is not supported'
356 return _parse_localename(localename)
360 """ Set the locale for the given category. The locale can be
361 a string, a locale tuple (language code, encoding), or None.
363 Locale tuples are converted to strings the locale aliasing
364 engine. Locale strings are passed directly to the C lib.
366 category may be given as one of the LC_* values.
369 if locale
and type(locale)
is not type(
""):
371 locale =
normalize(_build_localename(locale))
376 """ Sets the locale for category to the default setting.
378 The default setting is determined by calling
379 getdefaultlocale(). category defaults to LC_ALL.
399 'iso8859':
'ISO8859-1',
401 '88591':
'ISO8859-1',
402 'ascii':
'ISO8859-1',
404 'iso88591':
'ISO8859-1',
405 'iso_8859-1':
'ISO8859-1',
406 '885915':
'ISO8859-15',
407 'iso885915':
'ISO8859-15',
408 'iso_8859-15':
'ISO8859-15',
409 'iso8859-2':
'ISO8859-2',
410 'iso88592':
'ISO8859-2',
411 'iso_8859-2':
'ISO8859-2',
412 'iso88595':
'ISO8859-5',
413 'iso88596':
'ISO8859-6',
414 'iso88597':
'ISO8859-7',
415 'iso88598':
'ISO8859-8',
416 'iso88599':
'ISO8859-9',
417 'iso-2022-jp':
'JIS7',
438 'american':
'en_US.ISO8859-1',
439 'ar':
'ar_AA.ISO8859-6',
440 'ar_aa':
'ar_AA.ISO8859-6',
441 'ar_sa':
'ar_SA.ISO8859-6',
442 'arabic':
'ar_AA.ISO8859-6',
443 'bg':
'bg_BG.ISO8859-5',
444 'bg_bg':
'bg_BG.ISO8859-5',
445 'bulgarian':
'bg_BG.ISO8859-5',
446 'c-french':
'fr_CA.ISO8859-1',
449 'cextend':
'en_US.ISO8859-1',
450 'chinese-s':
'zh_CN.eucCN',
451 'chinese-t':
'zh_TW.eucTW',
452 'croatian':
'hr_HR.ISO8859-2',
453 'cs':
'cs_CZ.ISO8859-2',
454 'cs_cs':
'cs_CZ.ISO8859-2',
455 'cs_cz':
'cs_CZ.ISO8859-2',
456 'cz':
'cz_CZ.ISO8859-2',
457 'cz_cz':
'cz_CZ.ISO8859-2',
458 'czech':
'cs_CS.ISO8859-2',
459 'da':
'da_DK.ISO8859-1',
460 'da_dk':
'da_DK.ISO8859-1',
461 'danish':
'da_DK.ISO8859-1',
462 'de':
'de_DE.ISO8859-1',
463 'de_at':
'de_AT.ISO8859-1',
464 'de_ch':
'de_CH.ISO8859-1',
465 'de_de':
'de_DE.ISO8859-1',
466 'dutch':
'nl_BE.ISO8859-1',
467 'ee':
'ee_EE.ISO8859-4',
468 'el':
'el_GR.ISO8859-7',
469 'el_gr':
'el_GR.ISO8859-7',
470 'en':
'en_US.ISO8859-1',
471 'en_au':
'en_AU.ISO8859-1',
472 'en_ca':
'en_CA.ISO8859-1',
473 'en_gb':
'en_GB.ISO8859-1',
474 'en_ie':
'en_IE.ISO8859-1',
475 'en_nz':
'en_NZ.ISO8859-1',
476 'en_uk':
'en_GB.ISO8859-1',
477 'en_us':
'en_US.ISO8859-1',
478 'eng_gb':
'en_GB.ISO8859-1',
479 'english':
'en_EN.ISO8859-1',
480 'english_uk':
'en_GB.ISO8859-1',
481 'english_united-states':
'en_US.ISO8859-1',
482 'english_us':
'en_US.ISO8859-1',
483 'es':
'es_ES.ISO8859-1',
484 'es_ar':
'es_AR.ISO8859-1',
485 'es_bo':
'es_BO.ISO8859-1',
486 'es_cl':
'es_CL.ISO8859-1',
487 'es_co':
'es_CO.ISO8859-1',
488 'es_cr':
'es_CR.ISO8859-1',
489 'es_ec':
'es_EC.ISO8859-1',
490 'es_es':
'es_ES.ISO8859-1',
491 'es_gt':
'es_GT.ISO8859-1',
492 'es_mx':
'es_MX.ISO8859-1',
493 'es_ni':
'es_NI.ISO8859-1',
494 'es_pa':
'es_PA.ISO8859-1',
495 'es_pe':
'es_PE.ISO8859-1',
496 'es_py':
'es_PY.ISO8859-1',
497 'es_sv':
'es_SV.ISO8859-1',
498 'es_uy':
'es_UY.ISO8859-1',
499 'es_ve':
'es_VE.ISO8859-1',
500 'et':
'et_EE.ISO8859-4',
501 'et_ee':
'et_EE.ISO8859-4',
502 'fi':
'fi_FI.ISO8859-1',
503 'fi_fi':
'fi_FI.ISO8859-1',
504 'finnish':
'fi_FI.ISO8859-1',
505 'fr':
'fr_FR.ISO8859-1',
506 'fr_be':
'fr_BE.ISO8859-1',
507 'fr_ca':
'fr_CA.ISO8859-1',
508 'fr_ch':
'fr_CH.ISO8859-1',
509 'fr_fr':
'fr_FR.ISO8859-1',
510 'fre_fr':
'fr_FR.ISO8859-1',
511 'french':
'fr_FR.ISO8859-1',
512 'french_france':
'fr_FR.ISO8859-1',
513 'ger_de':
'de_DE.ISO8859-1',
514 'german':
'de_DE.ISO8859-1',
515 'german_germany':
'de_DE.ISO8859-1',
516 'greek':
'el_GR.ISO8859-7',
517 'hebrew':
'iw_IL.ISO8859-8',
518 'hr':
'hr_HR.ISO8859-2',
519 'hr_hr':
'hr_HR.ISO8859-2',
520 'hu':
'hu_HU.ISO8859-2',
521 'hu_hu':
'hu_HU.ISO8859-2',
522 'hungarian':
'hu_HU.ISO8859-2',
523 'icelandic':
'is_IS.ISO8859-1',
524 'id':
'id_ID.ISO8859-1',
525 'id_id':
'id_ID.ISO8859-1',
526 'is':
'is_IS.ISO8859-1',
527 'is_is':
'is_IS.ISO8859-1',
528 'iso-8859-1':
'en_US.ISO8859-1',
529 'iso-8859-15':
'en_US.ISO8859-15',
530 'iso8859-1':
'en_US.ISO8859-1',
531 'iso8859-15':
'en_US.ISO8859-15',
532 'iso_8859_1':
'en_US.ISO8859-1',
533 'iso_8859_15':
'en_US.ISO8859-15',
534 'it':
'it_IT.ISO8859-1',
535 'it_ch':
'it_CH.ISO8859-1',
536 'it_it':
'it_IT.ISO8859-1',
537 'italian':
'it_IT.ISO8859-1',
538 'iw':
'iw_IL.ISO8859-8',
539 'iw_il':
'iw_IL.ISO8859-8',
541 'ja.jis':
'ja_JP.JIS7',
542 'ja.sjis':
'ja_JP.SJIS',
543 'ja_jp':
'ja_JP.eucJP',
544 'ja_jp.ajec':
'ja_JP.eucJP',
545 'ja_jp.euc':
'ja_JP.eucJP',
546 'ja_jp.eucjp':
'ja_JP.eucJP',
547 'ja_jp.iso-2022-jp':
'ja_JP.JIS7',
548 'ja_jp.jis':
'ja_JP.JIS7',
549 'ja_jp.jis7':
'ja_JP.JIS7',
550 'ja_jp.mscode':
'ja_JP.SJIS',
551 'ja_jp.sjis':
'ja_JP.SJIS',
552 'ja_jp.ujis':
'ja_JP.eucJP',
553 'japan':
'ja_JP.eucJP',
554 'japanese':
'ja_JP.SJIS',
555 'japanese-euc':
'ja_JP.eucJP',
556 'japanese.euc':
'ja_JP.eucJP',
557 'jp_jp':
'ja_JP.eucJP',
559 'ko_kr':
'ko_KR.eucKR',
560 'ko_kr.euc':
'ko_KR.eucKR',
561 'korean':
'ko_KR.eucKR',
562 'lt':
'lt_LT.ISO8859-4',
563 'lv':
'lv_LV.ISO8859-4',
564 'mk':
'mk_MK.ISO8859-5',
565 'mk_mk':
'mk_MK.ISO8859-5',
566 'nl':
'nl_NL.ISO8859-1',
567 'nl_be':
'nl_BE.ISO8859-1',
568 'nl_nl':
'nl_NL.ISO8859-1',
569 'no':
'no_NO.ISO8859-1',
570 'no_no':
'no_NO.ISO8859-1',
571 'norwegian':
'no_NO.ISO8859-1',
572 'pl':
'pl_PL.ISO8859-2',
573 'pl_pl':
'pl_PL.ISO8859-2',
574 'polish':
'pl_PL.ISO8859-2',
575 'portuguese':
'pt_PT.ISO8859-1',
576 'portuguese_brazil':
'pt_BR.ISO8859-1',
579 'pt':
'pt_PT.ISO8859-1',
580 'pt_br':
'pt_BR.ISO8859-1',
581 'pt_pt':
'pt_PT.ISO8859-1',
582 'ro':
'ro_RO.ISO8859-2',
583 'ro_ro':
'ro_RO.ISO8859-2',
584 'ru':
'ru_RU.ISO8859-5',
585 'ru_ru':
'ru_RU.ISO8859-5',
586 'rumanian':
'ro_RO.ISO8859-2',
587 'russian':
'ru_RU.ISO8859-5',
588 'serbocroatian':
'sh_YU.ISO8859-2',
589 'sh':
'sh_YU.ISO8859-2',
590 'sh_hr':
'sh_HR.ISO8859-2',
591 'sh_sp':
'sh_YU.ISO8859-2',
592 'sh_yu':
'sh_YU.ISO8859-2',
593 'sk':
'sk_SK.ISO8859-2',
594 'sk_sk':
'sk_SK.ISO8859-2',
595 'sl':
'sl_CS.ISO8859-2',
596 'sl_cs':
'sl_CS.ISO8859-2',
597 'sl_si':
'sl_SI.ISO8859-2',
598 'slovak':
'sk_SK.ISO8859-2',
599 'slovene':
'sl_CS.ISO8859-2',
600 'sp':
'sp_YU.ISO8859-5',
601 'sp_yu':
'sp_YU.ISO8859-5',
602 'spanish':
'es_ES.ISO8859-1',
603 'spanish_spain':
'es_ES.ISO8859-1',
604 'sr_sp':
'sr_SP.ISO8859-2',
605 'sv':
'sv_SE.ISO8859-1',
606 'sv_se':
'sv_SE.ISO8859-1',
607 'swedish':
'sv_SE.ISO8859-1',
608 'th_th':
'th_TH.TACTIS',
609 'tr':
'tr_TR.ISO8859-9',
610 'tr_tr':
'tr_TR.ISO8859-9',
611 'turkish':
'tr_TR.ISO8859-9',
613 'universal':
'en_US.utf',
615 'zh_cn':
'zh_CN.eucCN',
616 'zh_cn.big5':
'zh_TW.eucTW',
617 'zh_cn.euc':
'zh_CN.eucCN',
618 'zh_tw':
'zh_TW.eucTW',
619 'zh_tw.euc':
'zh_TW.eucTW',
667 def _init_categories(categories=categories):
668 for k,v
in globals().items():
672 del categories[
'LC_ALL']
674 print 'Locale defaults as determined by getdefaultlocale():'
677 print 'Language: ', lang
or '(undefined)'
678 print 'Encoding: ', enc
or '(undefined)'
681 print 'Locale settings on startup:'
683 for name,category
in categories.items():
686 print ' Language: ', lang
or '(undefined)'
687 print ' Encoding: ', enc
or '(undefined)'
691 print 'Locale settings after calling resetlocale():'
694 for name,category
in categories.items():
697 print ' Language: ', lang
or '(undefined)'
698 print ' Encoding: ', enc
or '(undefined)'
705 print 'setlocale(LC_ALL, "") does not support the default locale'
706 print 'given in the OS environment variables.'
709 print 'Locale settings after calling setlocale(LC_ALL, ""):'
711 for name,category
in categories.items():
714 print ' Language: ', lang
or '(undefined)'
715 print ' Encoding: ', enc
or '(undefined)'
725 __all__.append(
"LC_MESSAGES")
727 if __name__==
'__main__':
728 print 'Locale aliasing:'
732 print 'Number formatting:'