File 1 - . \__init__.py:
1: (0) """
2: (4) babel
3: (4) ~~~~~
4: (4) Integrated collection of utilities that assist in internationalizing and
5: (4) localizing applications.
6: (4) This package is basically composed of two major parts:
7: (5) * tools to build and work with ``gettext`` message catalogs
8: (5) * a Python interface to the CLDR (Common Locale Data Repository), providing
9: (7) access to various locale display names, localized number and date
10: (7) formatting, etc.
11: (4) :copyright: (c) 2013-2024 by the Babel Team.
12: (4) :license: BSD, see LICENSE for more details.
13: (0) """
14: (0) from babel.core import (
15: (4) Locale,
16: (4) UnknownLocaleError,
17: (4) default_locale,
18: (4) get_locale_identifier,
19: (4) negotiate_locale,
20: (4) parse_locale,
21: (0) )
22: (0) __version__ = '2.16.0'
23: (0) __all__ = [
24: (4) 'Locale',
25: (4) 'UnknownLocaleError',
26: (4) 'default_locale',
27: (4) 'get_locale_identifier',
28: (4) 'negotiate_locale',
29: (4) 'parse_locale',
30: (0) ]
----------------------------------------
File 2 - . \core.py:
1: (0) """
2: (4) babel.core
3: (4) ~~~~~~~~~~
4: (4) Core locale representation and locale data access.
5: (4) :copyright: (c) 2013-2024 by the Babel Team.
6: (4) :license: BSD, see LICENSE for more details.
7: (0) """
8: (0) from __future__ import annotations
9: (0) import os
10: (0) import pickle
11: (0) from collections.abc import Iterable, Mapping
12: (0) from typing import TYPE_CHECKING, Any
13: (0) from babel import localedata
14: (0) from babel.plural import PluralRule
15: (0) __all__ = ['UnknownLocaleError', 'Locale', 'default_locale', 'negotiate_locale',
16: (11) 'parse_locale']
17: (0) if TYPE_CHECKING:
18: (4) from typing_extensions import Literal, TypeAlias
19: (4) _GLOBAL_KEY: TypeAlias = Literal[
20: (8) "all_currencies",
21: (8) "currency_fractions",
22: (8) "language_aliases",
23: (8) "likely_subtags",
24: (8) "meta_zones",
25: (8) "parent_exceptions",
26: (8) "script_aliases",
27: (8) "territory_aliases",
28: (8) "territory_currencies",
29: (8) "territory_languages",
30: (8) "territory_zones",
31: (8) "variant_aliases",
32: (8) "windows_zone_mapping",
33: (8) "zone_aliases",
34: (8) "zone_territories",
35: (4) ]
36: (4) _global_data: Mapping[_GLOBAL_KEY, Mapping[str, Any]] | None
37: (0) _global_data = None
38: (0) _default_plural_rule = PluralRule({})
39: (0) def _raise_no_data_error():
40: (4) raise RuntimeError('The babel data files are not available. '
41: (23) 'This usually happens because you are using '
42: (23) 'a source checkout from Babel and you did '
43: (23) 'not build the data files. Just make sure '
44: (23) 'to run "python setup.py import_cldr" before '
45: (23) 'installing the library.')
46: (0) def get_global(key: _GLOBAL_KEY) -> Mapping[str, Any]:
47: (4) """Return the dictionary for the given key in the global data.
48: (4) The global data is stored in the ``babel/global.dat`` file and contains
49: (4) information independent of individual locales.
50: (4) >>> get_global('zone_aliases')['UTC']
51: (4) u'Etc/UTC'
52: (4) >>> get_global('zone_territories')['Europe/Berlin']
53: (4) u'DE'
54: (4) The keys available are:
55: (4) - ``all_currencies``
56: (4) - ``currency_fractions``
57: (4) - ``language_aliases``
58: (4) - ``likely_subtags``
59: (4) - ``parent_exceptions``
60: (4) - ``script_aliases``
61: (4) - ``territory_aliases``
62: (4) - ``territory_currencies``
63: (4) - ``territory_languages``
64: (4) - ``territory_zones``
65: (4) - ``variant_aliases``
66: (4) - ``windows_zone_mapping``
67: (4) - ``zone_aliases``
68: (4) - ``zone_territories``
69: (4) .. note:: The internal structure of the data may change between versions.
70: (4) .. versionadded:: 0.9
71: (4) :param key: the data key
72: (4) """
73: (4) global _global_data
74: (4) if _global_data is None:
75: (8) dirname = os.path.join(os.path.dirname(__file__))
76: (8) filename = os.path.join(dirname, 'global.dat')
77: (8) if not os.path.isfile(filename):
78: (12) _raise_no_data_error()
79: (8) with open(filename, 'rb') as fileobj:
80: (12) _global_data = pickle.load(fileobj)
81: (12) assert _global_data is not None
82: (4) return _global_data.get(key, {})
83: (0) LOCALE_ALIASES = {
84: (4) 'ar': 'ar_SY', 'bg': 'bg_BG', 'bs': 'bs_BA', 'ca': 'ca_ES', 'cs': 'cs_CZ',
85: (4) 'da': 'da_DK', 'de': 'de_DE', 'el': 'el_GR', 'en': 'en_US', 'es': 'es_ES',
86: (4) 'et': 'et_EE', 'fa': 'fa_IR', 'fi': 'fi_FI', 'fr': 'fr_FR', 'gl': 'gl_ES',
87: (4) 'he': 'he_IL', 'hu': 'hu_HU', 'id': 'id_ID', 'is': 'is_IS', 'it': 'it_IT',
88: (4) 'ja': 'ja_JP', 'km': 'km_KH', 'ko': 'ko_KR', 'lt': 'lt_LT', 'lv': 'lv_LV',
89: (4) 'mk': 'mk_MK', 'nl': 'nl_NL', 'nn': 'nn_NO', 'no': 'nb_NO', 'pl': 'pl_PL',
90: (4) 'pt': 'pt_PT', 'ro': 'ro_RO', 'ru': 'ru_RU', 'sk': 'sk_SK', 'sl': 'sl_SI',
91: (4) 'sv': 'sv_SE', 'th': 'th_TH', 'tr': 'tr_TR', 'uk': 'uk_UA',
92: (0) }
93: (0) class UnknownLocaleError(Exception):
94: (4) """Exception thrown when a locale is requested for which no locale data
95: (4) is available.
96: (4) """
97: (4) def __init__(self, identifier: str) -> None:
98: (8) """Create the exception.
99: (8) :param identifier: the identifier string of the unsupported locale
100: (8) """
101: (8) Exception.__init__(self, f"unknown locale {identifier!r}")
102: (8) #: The identifier of the locale that could not be found.
103: (8) self.identifier = identifier
104: (0) class Locale:
105: (4) """Representation of a specific locale.
106: (4) >>> locale = Locale('en', 'US')
107: (4) >>> repr(locale)
108: (4) "Locale('en', territory='US')"
109: (4) >>> locale.display_name
110: (4) u'English (United States)'
111: (4) A `Locale` object can also be instantiated from a raw locale string:
112: (4) >>> locale = Locale.parse('en-US', sep='-')
113: (4) >>> repr(locale)
114: (4) "Locale('en', territory='US')"
115: (4) `Locale` objects provide access to a collection of locale data, such as
116: (4) territory and language names, number and date format patterns, and more:
117: (4) >>> locale.number_symbols['latn']['decimal']
118: (4) u'.'
119: (4) If a locale is requested for which no locale data is available, an
120: (4) `UnknownLocaleError` is raised:
121: (4) >>> Locale.parse('en_XX')
122: (4) Traceback (most recent call last):
123: (8) ...
124: (4) UnknownLocaleError: unknown locale 'en_XX'
125: (4) For more information see :rfc:`3066`.
126: (4) """
127: (4) def __init__(
128: (8) self,
129: (8) language: str,
130: (8) territory: str | None = None,
131: (8) script: str | None = None,
132: (8) variant: str | None = None,
133: (8) modifier: str | None = None,
134: (4) ) -> None:
135: (8) """Initialize the locale object from the given identifier components.
136: (8) >>> locale = Locale('en', 'US')
137: (8) >>> locale.language
138: (8) 'en'
139: (8) >>> locale.territory
140: (8) 'US'
141: (8) :param language: the language code
142: (8) :param territory: the territory (country or region) code
143: (8) :param script: the script code
144: (8) :param variant: the variant code
145: (8) :param modifier: a modifier (following the '@' symbol, sometimes called '@variant')
146: (8) :raise `UnknownLocaleError`: if no locale data is available for the
147: (37) requested locale
148: (8) """
149: (8) #: the language code
150: (8) self.language = language
151: (8) #: the territory (country or region) code
152: (8) self.territory = territory
153: (8) #: the script code
154: (8) self.script = script
155: (8) #: the variant code
156: (8) self.variant = variant
157: (8) #: the modifier
158: (8) self.modifier = modifier
159: (8) self.__data: localedata.LocaleDataDict | None = None
160: (8) identifier = str(self)
161: (8) identifier_without_modifier = identifier.partition('@')[0]
162: (8) if localedata.exists(identifier):
163: (12) self.__data_identifier = identifier
164: (8) elif localedata.exists(identifier_without_modifier):
165: (12) self.__data_identifier = identifier_without_modifier
166: (8) else:
167: (12) raise UnknownLocaleError(identifier)
168: (4) @classmethod
169: (4) def default(cls, category: str | None = None, aliases: Mapping[str, str] = LOCALE_ALIASES) -> Locale:
170: (8) """Return the system default locale for the specified category.
171: (8) >>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LC_MESSAGES']:
172: (8) ... os.environ[name] = ''
173: (8) >>> os.environ['LANG'] = 'fr_FR.UTF-8'
174: (8) >>> Locale.default('LC_MESSAGES')
175: (8) Locale('fr', territory='FR')
176: (8) The following fallbacks to the variable are always considered:
177: (8) - ``LANGUAGE``
178: (8) - ``LC_ALL``
179: (8) - ``LC_CTYPE``
180: (8) - ``LANG``
181: (8) :param category: one of the ``LC_XXX`` environment variable names
182: (8) :param aliases: a dictionary of aliases for locale identifiers
183: (8) """
184: (8) # XXX: use likely subtag expansion here instead of the
185: (8) # aliases dictionary.
186: (8) locale_string = default_locale(category, aliases=aliases)
187: (8) return cls.parse(locale_string)
188: (4) @classmethod
189: (4) def negotiate(
190: (8) cls,
191: (8) preferred: Iterable[str],
192: (8) available: Iterable[str],
193: (8) sep: str = '_',
194: (8) aliases: Mapping[str, str] = LOCALE_ALIASES,
195: (4) ) -> Locale | None:
196: (8) """Find the best match between available and requested locale strings.
197: (8) >>> Locale.negotiate(['de_DE', 'en_US'], ['de_DE', 'de_AT'])
198: (8) Locale('de', territory='DE')
199: (8) >>> Locale.negotiate(['de_DE', 'en_US'], ['en', 'de'])
200: (8) Locale('de')
201: (8) >>> Locale.negotiate(['de_DE', 'de'], ['en_US'])
202: (8) You can specify the character used in the locale identifiers to separate
203: (8) the different components. This separator is applied to both lists. Also,
204: (8) case is ignored in the comparison:
205: (8) >>> Locale.negotiate(['de-DE', 'de'], ['en-us', 'de-de'], sep='-')
206: (8) Locale('de', territory='DE')
207: (8) :param preferred: the list of locale identifiers preferred by the user
208: (8) :param available: the list of locale identifiers available
209: (8) :param aliases: a dictionary of aliases for locale identifiers
210: (8) """
211: (8) identifier = negotiate_locale(preferred, available, sep=sep,
212: (38) aliases=aliases)
213: (8) if identifier:
214: (12) return Locale.parse(identifier, sep=sep)
215: (8) return None
216: (4) @classmethod
217: (4) def parse(
218: (8) cls,
219: (8) identifier: str | Locale | None,
220: (8) sep: str = '_',
221: (8) resolve_likely_subtags: bool = True,
222: (4) ) -> Locale:
223: (8) """Create a `Locale` instance for the given locale identifier.
224: (8) >>> l = Locale.parse('de-DE', sep='-')
225: (8) >>> l.display_name
226: (8) u'Deutsch (Deutschland)'
227: (8) If the `identifier` parameter is not a string, but actually a `Locale`
228: (8) object, that object is returned:
229: (8) >>> Locale.parse(l)
230: (8) Locale('de', territory='DE')
231: (8) If the `identifier` parameter is neither of these, such as `None`
232: (8) e.g. because a default locale identifier could not be determined,
233: (8) a `TypeError` is raised:
234: (8) >>> Locale.parse(None)
235: (8) Traceback (most recent call last):
236: (12) ...
237: (8) TypeError: ...
238: (8) This also can perform resolving of likely subtags which it does
239: (8) by default. This is for instance useful to figure out the most
240: (8) likely locale for a territory you can use ``'und'`` as the
241: (8) language tag:
242: (8) >>> Locale.parse('und_AT')
243: (8) Locale('de', territory='AT')
244: (8) Modifiers are optional, and always at the end, separated by "@":
245: (8) >>> Locale.parse('de_AT@euro')
246: (8) Locale('de', territory='AT', modifier='euro')
247: (8) :param identifier: the locale identifier string
248: (8) :param sep: optional component separator
249: (8) :param resolve_likely_subtags: if this is specified then a locale will
250: (39) have its likely subtag resolved if the
251: (39) locale otherwise does not exist. For
252: (39) instance ``zh_TW`` by itself is not a
253: (39) locale that exists but Babel can
254: (39) automatically expand it to the full
255: (39) form of ``zh_hant_TW``. Note that this
256: (39) expansion is only taking place if no
257: (39) locale exists otherwise. For instance
258: (39) there is a locale ``en`` that can exist
259: (39) by itself.
260: (8) :raise `ValueError`: if the string does not appear to be a valid locale
261: (29) identifier
262: (8) :raise `UnknownLocaleError`: if no locale data is available for the
263: (37) requested locale
264: (8) :raise `TypeError`: if the identifier is not a string or a `Locale`
265: (8) """
266: (8) if isinstance(identifier, Locale):
267: (12) return identifier
268: (8) elif not isinstance(identifier, str):
269: (12) raise TypeError(f"Unexpected value for identifier: {identifier!r}")
270: (8) parts = parse_locale(identifier, sep=sep)
271: (8) input_id = get_locale_identifier(parts)
272: (8) def _try_load(parts):
273: (12) try:
274: (16) return cls(*parts)
275: (12) except UnknownLocaleError:
276: (16) return None
277: (8) def _try_load_reducing(parts):
278: (12) # Success on first hit, return it.
279: (12) locale = _try_load(parts)
280: (12) if locale is not None:
281: (16) return locale
282: (12) # Now try without script and variant
283: (12) locale = _try_load(parts[:2])
284: (12) if locale is not None:
285: (16) return locale
286: (8) locale = _try_load(parts)
287: (8) if locale is not None:
288: (12) return locale
289: (8) if not resolve_likely_subtags:
290: (12) raise UnknownLocaleError(input_id)
291: (8) # From here onwards is some very bad likely subtag resolving. This
292: (8) # whole logic is not entirely correct but good enough (tm) for the
293: (8) # time being. This has been added so that zh_TW does not cause
294: (8) # errors for people when they upgrade. Later we should properly
295: (8) # implement ICU like fuzzy locale objects and provide a way to
296: (8) # maximize and minimize locale tags.
297: (8) if len(parts) == 5:
298: (12) language, territory, script, variant, modifier = parts
299: (8) else:
300: (12) language, territory, script, variant = parts
301: (12) modifier = None
302: (8) language = get_global('language_aliases').get(language, language)
303: (8) territory = get_global('territory_aliases').get(territory or '', (territory,))[0]
304: (8) script = get_global('script_aliases').get(script or '', script)
305: (8) variant = get_global('variant_aliases').get(variant or '', variant)
306: (8) if territory == 'ZZ':
307: (12) territory = None
308: (8) if script == 'Zzzz':
309: (12) script = None
310: (8) parts = language, territory, script, variant, modifier
311: (8) # First match: try the whole identifier
312: (8) new_id = get_locale_identifier(parts)
313: (8) likely_subtag = get_global('likely_subtags').get(new_id)
314: (8) if likely_subtag is not None:
315: (12) locale = _try_load_reducing(parse_locale(likely_subtag))
316: (12) if locale is not None:
317: (16) return locale
318: (8) # If we did not find anything so far, try again with a
319: (8) # simplified identifier that is just the language
320: (8) likely_subtag = get_global('likely_subtags').get(language)
321: (8) if likely_subtag is not None:
322: (12) parts2 = parse_locale(likely_subtag)
323: (12) if len(parts2) == 5:
324: (16) language2, _, script2, variant2, modifier2 = parts2
325: (12) else:
326: (16) language2, _, script2, variant2 = parts2
327: (16) modifier2 = None
328: (12) locale = _try_load_reducing((language2, territory, script2, variant2, modifier2))
329: (12) if locale is not None:
330: (16) return locale
331: (8) raise UnknownLocaleError(input_id)
332: (4) def __eq__(self, other: object) -> bool:
333: (8) for key in ('language', 'territory', 'script', 'variant', 'modifier'):
334: (12) if not hasattr(other, key):
335: (16) return False
336: (8) return (
337: (12) self.language == getattr(other, 'language') and # noqa: B009
338: (12) self.territory == getattr(other, 'territory') and # noqa: B009
339: (12) self.script == getattr(other, 'script') and # noqa: B009
340: (12) self.variant == getattr(other, 'variant') and # noqa: B009
341: (12) self.modifier == getattr(other, 'modifier') # noqa: B009
342: (8) )
343: (4) def __ne__(self, other: object) -> bool:
344: (8) return not self.__eq__(other)
345: (4) def __hash__(self) -> int:
346: (8) return hash((self.language, self.territory, self.script,
347: (21) self.variant, self.modifier))
348: (4) def __repr__(self) -> str:
349: (8) parameters = ['']
350: (8) for key in ('territory', 'script', 'variant', 'modifier'):
351: (12) value = getattr(self, key)
352: (12) if value is not None:
353: (16) parameters.append(f"{key}={value!r}")
354: (8) return f"Locale({self.language!r}{', '.join(parameters)})"
355: (4) def __str__(self) -> str:
356: (8) return get_locale_identifier((self.language, self.territory,
357: (38) self.script, self.variant,
358: (38) self.modifier))
359: (4) @property
360: (4) def _data(self) -> localedata.LocaleDataDict:
361: (8) if self.__data is None:
362: (12) self.__data = localedata.LocaleDataDict(localedata.load(self.__data_identifier))
363: (8) return self.__data
364: (4) def get_display_name(self, locale: Locale | str | None = None) -> str | None:
365: (8) """Return the display name of the locale using the given locale.
366: (8) The display name will include the language, territory, script, and
367: (8) variant, if those are specified.
368: (8) >>> Locale('zh', 'CN', script='Hans').get_display_name('en')
369: (8) u'Chinese (Simplified, China)'
370: (8) Modifiers are currently passed through verbatim:
371: (8) >>> Locale('it', 'IT', modifier='euro').get_display_name('en')
372: (8) u'Italian (Italy, euro)'
373: (8) :param locale: the locale to use
374: (8) """
375: (8) if locale is None:
376: (12) locale = self
377: (8) locale = Locale.parse(locale)
378: (8) retval = locale.languages.get(self.language)
379: (8) if retval and (self.territory or self.script or self.variant):
380: (12) details = []
381: (12) if self.script:
382: (16) details.append(locale.scripts.get(self.script))
383: (12) if self.territory:
384: (16) details.append(locale.territories.get(self.territory))
385: (12) if self.variant:
386: (16) details.append(locale.variants.get(self.variant))
387: (12) if self.modifier:
388: (16) details.append(self.modifier)
389: (12) detail_string = ', '.join(atom for atom in details if atom)
390: (12) if detail_string:
391: (16) retval += f" ({detail_string})"
392: (8) return retval
393: (4) display_name = property(get_display_name, doc="""\
394: (8) The localized display name of the locale.
395: (8) >>> Locale('en').display_name
396: (8) u'English'
397: (8) >>> Locale('en', 'US').display_name
398: (8) u'English (United States)'
399: (8) >>> Locale('sv').display_name
400: (8) u'svenska'
401: (8) :type: `unicode`
402: (8) """)
403: (4) def get_language_name(self, locale: Locale | str | None = None) -> str | None:
404: (8) """Return the language of this locale in the given locale.
405: (8) >>> Locale('zh', 'CN', script='Hans').get_language_name('de')
406: (8) u'Chinesisch'
407: (8) .. versionadded:: 1.0
408: (8) :param locale: the locale to use
409: (8) """
410: (8) if locale is None:
411: (12) locale = self
412: (8) locale = Locale.parse(locale)
413: (8) return locale.languages.get(self.language)
414: (4) language_name = property(get_language_name, doc="""\
415: (8) The localized language name of the locale.
416: (8) >>> Locale('en', 'US').language_name
417: (8) u'English'
418: (4) """)
419: (4) def get_territory_name(self, locale: Locale | str | None = None) -> str | None:
420: (8) """Return the territory name in the given locale."""
421: (8) if locale is None:
422: (12) locale = self
423: (8) locale = Locale.parse(locale)
424: (8) return locale.territories.get(self.territory or '')
425: (4) territory_name = property(get_territory_name, doc="""\
426: (8) The localized territory name of the locale if available.
427: (8) >>> Locale('de', 'DE').territory_name
428: (8) u'Deutschland'
429: (4) """)
430: (4) def get_script_name(self, locale: Locale | str | None = None) -> str | None:
431: (8) """Return the script name in the given locale."""
432: (8) if locale is None:
433: (12) locale = self
434: (8) locale = Locale.parse(locale)
435: (8) return locale.scripts.get(self.script or '')
436: (4) script_name = property(get_script_name, doc="""\
437: (8) The localized script name of the locale if available.
438: (8) >>> Locale('sr', 'ME', script='Latn').script_name
439: (8) u'latinica'
440: (4) """)
441: (4) @property
442: (4) def english_name(self) -> str | None:
443: (8) """The english display name of the locale.
444: (8) >>> Locale('de').english_name
445: (8) u'German'
446: (8) >>> Locale('de', 'DE').english_name
447: (8) u'German (Germany)'
448: (8) :type: `unicode`"""
449: (8) return self.get_display_name(Locale('en'))
450: (4) # { General Locale Display Names
451: (4) @property
452: (4) def languages(self) -> localedata.LocaleDataDict:
453: (8) """Mapping of language codes to translated language names.
454: (8) >>> Locale('de', 'DE').languages['ja']
455: (8) u'Japanisch'
456: (8) See `ISO 639 <http://www.loc.gov/standards/iso639-2/>`_ for
457: (8) more information.
458: (8) """
459: (8) return self._data['languages']
460: (4) @property
461: (4) def scripts(self) -> localedata.LocaleDataDict:
462: (8) """Mapping of script codes to translated script names.
463: (8) >>> Locale('en', 'US').scripts['Hira']
464: (8) u'Hiragana'
465: (8) See `ISO 15924 <http://www.evertype.com/standards/iso15924/>`_
466: (8) for more information.
467: (8) """
468: (8) return self._data['scripts']
469: (4) @property
470: (4) def territories(self) -> localedata.LocaleDataDict:
471: (8) """Mapping of script codes to translated script names.
472: (8) >>> Locale('es', 'CO').territories['DE']
473: (8) u'Alemania'
474: (8) See `ISO 3166 <http://www.iso.org/iso/en/prods-services/iso3166ma/>`_
475: (8) for more information.
476: (8) """
477: (8) return self._data['territories']
478: (4) @property
479: (4) def variants(self) -> localedata.LocaleDataDict:
480: (8) """Mapping of script codes to translated script names.
481: (8) >>> Locale('de', 'DE').variants['1901']
482: (8) u'Alte deutsche Rechtschreibung'
483: (8) """
484: (8) return self._data['variants']
485: (4) # { Number Formatting
486: (4) @property
487: (4) def currencies(self) -> localedata.LocaleDataDict:
488: (8) """Mapping of currency codes to translated currency names. This
489: (8) only returns the generic form of the currency name, not the count
490: (8) specific one. If an actual number is requested use the
491: (8) :func:`babel.numbers.get_currency_name` function.
492: (8) >>> Locale('en').currencies['COP']
493: (8) u'Colombian Peso'
494: (8) >>> Locale('de', 'DE').currencies['COP']
495: (8) u'Kolumbianischer Peso'
496: (8) """
497: (8) return self._data['currency_names']
498: (4) @property
499: (4) def currency_symbols(self) -> localedata.LocaleDataDict:
500: (8) """Mapping of currency codes to symbols.
501: (8) >>> Locale('en', 'US').currency_symbols['USD']
502: (8) u'$'
503: (8) >>> Locale('es', 'CO').currency_symbols['USD']
504: (8) u'US$'
505: (8) """
506: (8) return self._data['currency_symbols']
507: (4) @property
508: (4) def number_symbols(self) -> localedata.LocaleDataDict:
509: (8) """Symbols used in number formatting by number system.
510: (8) .. note:: The format of the value returned may change between
511: (18) Babel versions.
512: (8) >>> Locale('fr', 'FR').number_symbols["latn"]['decimal']
513: (8) u','
514: (8) >>> Locale('fa', 'IR').number_symbols["arabext"]['decimal']
515: (8) u'٫'
516: (8) >>> Locale('fa', 'IR').number_symbols["latn"]['decimal']
517: (8) u'.'
518: (8) """
519: (8) return self._data['number_symbols']
520: (4) @property
521: (4) def other_numbering_systems(self) -> localedata.LocaleDataDict:
522: (8) """
523: (8) Mapping of other numbering systems available for the locale.
524: (8) See: https://www.unicode.org/reports/tr35/tr35-numbers.html#otherNumberingSystems
525: (8) >>> Locale('el', 'GR').other_numbering_systems['traditional']
526: (8) u'grek'
527: (8) .. note:: The format of the value returned may change between
528: (18) Babel versions.
529: (8) """
530: (8) return self._data['numbering_systems']
531: (4) @property
532: (4) def default_numbering_system(self) -> str:
533: (8) """The default numbering system used by the locale.
534: (8) >>> Locale('el', 'GR').default_numbering_system
535: (8) u'latn'
536: (8) """
537: (8) return self._data['default_numbering_system']
538: (4) @property
539: (4) def decimal_formats(self) -> localedata.LocaleDataDict:
540: (8) """Locale patterns for decimal number formatting.
541: (8) .. note:: The format of the value returned may change between
542: (18) Babel versions.
543: (8) >>> Locale('en', 'US').decimal_formats[None]
544: (8) <NumberPattern u'#,##0.###'>
545: (8) """
546: (8) return self._data['decimal_formats']
547: (4) @property
548: (4) def compact_decimal_formats(self) -> localedata.LocaleDataDict:
549: (8) """Locale patterns for compact decimal number formatting.
550: (8) .. note:: The format of the value returned may change between
551: (18) Babel versions.
552: (8) >>> Locale('en', 'US').compact_decimal_formats["short"]["one"]["1000"]
553: (8) <NumberPattern u'0K'>
554: (8) """
555: (8) return self._data['compact_decimal_formats']
556: (4) @property
557: (4) def currency_formats(self) -> localedata.LocaleDataDict:
558: (8) """Locale patterns for currency number formatting.
559: (8) .. note:: The format of the value returned may change between
560: (18) Babel versions.
561: (8) >>> Locale('en', 'US').currency_formats['standard']
562: (8) <NumberPattern u'\\xa4#,##0.00'>
563: (8) >>> Locale('en', 'US').currency_formats['accounting']
564: (8) <NumberPattern u'\\xa4#,##0.00;(\\xa4#,##0.00)'>
565: (8) """
566: (8) return self._data['currency_formats']
567: (4) @property
568: (4) def compact_currency_formats(self) -> localedata.LocaleDataDict:
569: (8) """Locale patterns for compact currency number formatting.
570: (8) .. note:: The format of the value returned may change between
571: (18) Babel versions.
572: (8) >>> Locale('en', 'US').compact_currency_formats["short"]["one"]["1000"]
573: (8) <NumberPattern u'¤0K'>
574: (8) """
575: (8) return self._data['compact_currency_formats']
576: (4) @property
577: (4) def percent_formats(self) -> localedata.LocaleDataDict:
578: (8) """Locale patterns for percent number formatting.
579: (8) .. note:: The format of the value returned may change between
580: (18) Babel versions.
581: (8) >>> Locale('en', 'US').percent_formats[None]
582: (8) <NumberPattern u'#,##0%'>
583: (8) """
584: (8) return self._data['percent_formats']
585: (4) @property
586: (4) def scientific_formats(self) -> localedata.LocaleDataDict:
587: (8) """Locale patterns for scientific number formatting.
588: (8) .. note:: The format of the value returned may change between
589: (18) Babel versions.
590: (8) >>> Locale('en', 'US').scientific_formats[None]
591: (8) <NumberPattern u'#E0'>
592: (8) """
593: (8) return self._data['scientific_formats']
594: (4) # { Calendar Information and Date Formatting
595: (4) @property
596: (4) def periods(self) -> localedata.LocaleDataDict:
597: (8) """Locale display names for day periods (AM/PM).
598: (8) >>> Locale('en', 'US').periods['am']
599: (8) u'AM'
600: (8) """
601: (8) try:
602: (12) return self._data['day_periods']['stand-alone']['wide']
603: (8) except KeyError:
604: (12) return localedata.LocaleDataDict({}) # pragma: no cover
605: (4) @property
606: (4) def day_periods(self) -> localedata.LocaleDataDict:
607: (8) """Locale display names for various day periods (not necessarily only AM/PM).
608: (8) These are not meant to be used without the relevant `day_period_rules`.
609: (8) """
610: (8) return self._data['day_periods']
611: (4) @property
612: (4) def day_period_rules(self) -> localedata.LocaleDataDict:
613: (8) """Day period rules for the locale. Used by `get_period_id`.
614: (8) """
615: (8) return self._data.get('day_period_rules', localedata.LocaleDataDict({}))
616: (4) @property
617: (4) def days(self) -> localedata.LocaleDataDict:
618: (8) """Locale display names for weekdays.
619: (8) >>> Locale('de', 'DE').days['format']['wide'][3]
620: (8) u'Donnerstag'
621: (8) """
622: (8) return self._data['days']
623: (4) @property
624: (4) def months(self) -> localedata.LocaleDataDict:
625: (8) """Locale display names for months.
626: (8) >>> Locale('de', 'DE').months['format']['wide'][10]
627: (8) u'Oktober'
628: (8) """
629: (8) return self._data['months']
630: (4) @property
631: (4) def quarters(self) -> localedata.LocaleDataDict:
632: (8) """Locale display names for quarters.
633: (8) >>> Locale('de', 'DE').quarters['format']['wide'][1]
634: (8) u'1. Quartal'
635: (8) """
636: (8) return self._data['quarters']
637: (4) @property
638: (4) def eras(self) -> localedata.LocaleDataDict:
639: (8) """Locale display names for eras.
640: (8) .. note:: The format of the value returned may change between
641: (18) Babel versions.
642: (8) >>> Locale('en', 'US').eras['wide'][1]
643: (8) u'Anno Domini'
644: (8) >>> Locale('en', 'US').eras['abbreviated'][0]
645: (8) u'BC'
646: (8) """
647: (8) return self._data['eras']
648: (4) @property
649: (4) def time_zones(self) -> localedata.LocaleDataDict:
650: (8) """Locale display names for time zones.
651: (8) .. note:: The format of the value returned may change between
652: (18) Babel versions.
653: (8) >>> Locale('en', 'US').time_zones['Europe/London']['long']['daylight']
654: (8) u'British Summer Time'
655: (8) >>> Locale('en', 'US').time_zones['America/St_Johns']['city']
656: (8) u'St. John\u2019s'
657: (8) """
658: (8) return self._data['time_zones']
659: (4) @property
660: (4) def meta_zones(self) -> localedata.LocaleDataDict:
661: (8) """Locale display names for meta time zones.
662: (8) Meta time zones are basically groups of different Olson time zones that
663: (8) have the same GMT offset and daylight savings time.
664: (8) .. note:: The format of the value returned may change between
665: (18) Babel versions.
666: (8) >>> Locale('en', 'US').meta_zones['Europe_Central']['long']['daylight']
667: (8) u'Central European Summer Time'
668: (8) .. versionadded:: 0.9
669: (8) """
670: (8) return self._data['meta_zones']
671: (4) @property
672: (4) def zone_formats(self) -> localedata.LocaleDataDict:
673: (8) """Patterns related to the formatting of time zones.
674: (8) .. note:: The format of the value returned may change between
675: (18) Babel versions.
676: (8) >>> Locale('en', 'US').zone_formats['fallback']
677: (8) u'%(1)s (%(0)s)'
678: (8) >>> Locale('pt', 'BR').zone_formats['region']
679: (8) u'Hor\\xe1rio %s'
680: (8) .. versionadded:: 0.9
681: (8) """
682: (8) return self._data['zone_formats']
683: (4) @property
684: (4) def first_week_day(self) -> int:
685: (8) """The first day of a week, with 0 being Monday.
686: (8) >>> Locale('de', 'DE').first_week_day
687: (8) 0
688: (8) >>> Locale('en', 'US').first_week_day
689: (8) 6
690: (8) """
691: (8) return self._data['week_data']['first_day']
692: (4) @property
693: (4) def weekend_start(self) -> int:
694: (8) """The day the weekend starts, with 0 being Monday.
695: (8) >>> Locale('de', 'DE').weekend_start
696: (8) 5
697: (8) """
698: (8) return self._data['week_data']['weekend_start']
699: (4) @property
700: (4) def weekend_end(self) -> int:
701: (8) """The day the weekend ends, with 0 being Monday.
702: (8) >>> Locale('de', 'DE').weekend_end
703: (8) 6
704: (8) """
705: (8) return self._data['week_data']['weekend_end']
706: (4) @property
707: (4) def min_week_days(self) -> int:
708: (8) """The minimum number of days in a week so that the week is counted as
709: (8) the first week of a year or month.
710: (8) >>> Locale('de', 'DE').min_week_days
711: (8) 4
712: (8) """
713: (8) return self._data['week_data']['min_days']
714: (4) @property
715: (4) def date_formats(self) -> localedata.LocaleDataDict:
716: (8) """Locale patterns for date formatting.
717: (8) .. note:: The format of the value returned may change between
718: (18) Babel versions.
719: (8) >>> Locale('en', 'US').date_formats['short']
720: (8) <DateTimePattern u'M/d/yy'>
721: (8) >>> Locale('fr', 'FR').date_formats['long']
722: (8) <DateTimePattern u'd MMMM y'>
723: (8) """
724: (8) return self._data['date_formats']
725: (4) @property
726: (4) def time_formats(self) -> localedata.LocaleDataDict:
727: (8) """Locale patterns for time formatting.
728: (8) .. note:: The format of the value returned may change between
729: (18) Babel versions.
730: (8) >>> Locale('en', 'US').time_formats['short']
731: (8) <DateTimePattern u'h:mm\u202fa'>
732: (8) >>> Locale('fr', 'FR').time_formats['long']
733: (8) <DateTimePattern u'HH:mm:ss z'>
734: (8) """
735: (8) return self._data['time_formats']
736: (4) @property
737: (4) def datetime_formats(self) -> localedata.LocaleDataDict:
738: (8) """Locale patterns for datetime formatting.
739: (8) .. note:: The format of the value returned may change between
740: (18) Babel versions.
741: (8) >>> Locale('en').datetime_formats['full']
742: (8) u'{1}, {0}'
743: (8) >>> Locale('th').datetime_formats['medium']
744: (8) u'{1} {0}'
745: (8) """
746: (8) return self._data['datetime_formats']
747: (4) @property
748: (4) def datetime_skeletons(self) -> localedata.LocaleDataDict:
749: (8) """Locale patterns for formatting parts of a datetime.
750: (8) >>> Locale('en').datetime_skeletons['MEd']
751: (8) <DateTimePattern u'E, M/d'>
752: (8) >>> Locale('fr').datetime_skeletons['MEd']
753: (8) <DateTimePattern u'E dd/MM'>
754: (8) >>> Locale('fr').datetime_skeletons['H']
755: (8) <DateTimePattern u"HH 'h'">
756: (8) """
757: (8) return self._data['datetime_skeletons']
758: (4) @property
759: (4) def interval_formats(self) -> localedata.LocaleDataDict:
760: (8) """Locale patterns for interval formatting.
761: (8) .. note:: The format of the value returned may change between
762: (18) Babel versions.
763: (8) How to format date intervals in Finnish when the day is the
764: (8) smallest changing component:
765: (8) >>> Locale('fi_FI').interval_formats['MEd']['d']
766: (8) [u'E d.\u2009\u2013\u2009', u'E d.M.']
767: (8) .. seealso::
768: (11) The primary API to use this data is :py:func:`babel.dates.format_interval`.
769: (8) :rtype: dict[str, dict[str, list[str]]]
770: (8) """
771: (8) return self._data['interval_formats']
772: (4) @property
773: (4) def plural_form(self) -> PluralRule:
774: (8) """Plural rules for the locale.
775: (8) >>> Locale('en').plural_form(1)
776: (8) 'one'
777: (8) >>> Locale('en').plural_form(0)
778: (8) 'other'
779: (8) >>> Locale('fr').plural_form(0)
780: (8) 'one'
781: (8) >>> Locale('ru').plural_form(100)
782: (8) 'many'
783: (8) """
784: (8) return self._data.get('plural_form', _default_plural_rule)
785: (4) @property
786: (4) def list_patterns(self) -> localedata.LocaleDataDict:
787: (8) """Patterns for generating lists
788: (8) .. note:: The format of the value returned may change between
789: (18) Babel versions.
790: (8) >>> Locale('en').list_patterns['standard']['start']
791: (8) u'{0}, {1}'
792: (8) >>> Locale('en').list_patterns['standard']['end']
793: (8) u'{0}, and {1}'
794: (8) >>> Locale('en_GB').list_patterns['standard']['end']
795: (8) u'{0} and {1}'
796: (8) """
797: (8) return self._data['list_patterns']
798: (4) @property
799: (4) def ordinal_form(self) -> PluralRule:
800: (8) """Plural rules for the locale.
801: (8) >>> Locale('en').ordinal_form(1)
802: (8) 'one'
803: (8) >>> Locale('en').ordinal_form(2)
804: (8) 'two'
805: (8) >>> Locale('en').ordinal_form(3)
806: (8) 'few'
807: (8) >>> Locale('fr').ordinal_form(2)
808: (8) 'other'
809: (8) >>> Locale('ru').ordinal_form(100)
810: (8) 'other'
811: (8) """
812: (8) return self._data.get('ordinal_form', _default_plural_rule)
813: (4) @property
814: (4) def measurement_systems(self) -> localedata.LocaleDataDict:
815: (8) """Localized names for various measurement systems.
816: (8) >>> Locale('fr', 'FR').measurement_systems['US']
817: (8) u'am\\xe9ricain'
818: (8) >>> Locale('en', 'US').measurement_systems['US']
819: (8) u'US'
820: (8) """
821: (8) return self._data['measurement_systems']
822: (4) @property
823: (4) def character_order(self) -> str:
824: (8) """The text direction for the language.
825: (8) >>> Locale('de', 'DE').character_order
826: (8) 'left-to-right'
827: (8) >>> Locale('ar', 'SA').character_order
828: (8) 'right-to-left'
829: (8) """
830: (8) return self._data['character_order']
831: (4) @property
832: (4) def text_direction(self) -> str:
833: (8) """The text direction for the language in CSS short-hand form.
834: (8) >>> Locale('de', 'DE').text_direction
835: (8) 'ltr'
836: (8) >>> Locale('ar', 'SA').text_direction
837: (8) 'rtl'
838: (8) """
839: (8) return ''.join(word[0] for word in self.character_order.split('-'))
840: (4) @property
841: (4) def unit_display_names(self) -> localedata.LocaleDataDict:
842: (8) """Display names for units of measurement.
843: (8) .. seealso::
844: (11) You may want to use :py:func:`babel.units.get_unit_name` instead.
845: (8) .. note:: The format of the value returned may change between
846: (18) Babel versions.
847: (8) """
848: (8) return self._data['unit_display_names']
849: (0) def default_locale(category: str | None = None, aliases: Mapping[str, str] = LOCALE_ALIASES) -> str | None:
850: (4) """Returns the system default locale for a given category, based on
851: (4) environment variables.
852: (4) >>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE']:
853: (4) ... os.environ[name] = ''
854: (4) >>> os.environ['LANG'] = 'fr_FR.UTF-8'
855: (4) >>> default_locale('LC_MESSAGES')
856: (4) 'fr_FR'
857: (4) The "C" or "POSIX" pseudo-locales are treated as aliases for the
858: (4) "en_US_POSIX" locale:
859: (4) >>> os.environ['LC_MESSAGES'] = 'POSIX'
860: (4) >>> default_locale('LC_MESSAGES')
861: (4) 'en_US_POSIX'
862: (4) The following fallbacks to the variable are always considered:
863: (4) - ``LANGUAGE``
864: (4) - ``LC_ALL``
865: (4) - ``LC_CTYPE``
866: (4) - ``LANG``
867: (4) :param category: one of the ``LC_XXX`` environment variable names
868: (4) :param aliases: a dictionary of aliases for locale identifiers
869: (4) """
870: (4) varnames = (category, 'LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')
871: (4) for name in filter(None, varnames):
872: (8) locale = os.getenv(name)
873: (8) if locale:
874: (12) if name == 'LANGUAGE' and ':' in locale:
875: (16) # the LANGUAGE variable may contain a colon-separated list of
876: (16) # language codes; we just pick the language on the list
877: (16) locale = locale.split(':')[0]
878: (12) if locale.split('.')[0] in ('C', 'POSIX'):
879: (16) locale = 'en_US_POSIX'
880: (12) elif aliases and locale in aliases:
881: (16) locale = aliases[locale]
882: (12) try:
883: (16) return get_locale_identifier(parse_locale(locale))
884: (12) except ValueError:
885: (16) pass
886: (4) return None
887: (0) def negotiate_locale(preferred: Iterable[str], available: Iterable[str], sep: str = '_', aliases: Mapping[str, str] = LOCALE_ALIASES) -> str | None:
888: (4) """Find the best match between available and requested locale strings.
889: (4) >>> negotiate_locale(['de_DE', 'en_US'], ['de_DE', 'de_AT'])
890: (4) 'de_DE'
891: (4) >>> negotiate_locale(['de_DE', 'en_US'], ['en', 'de'])
892: (4) 'de'
893: (4) Case is ignored by the algorithm, the result uses the case of the preferred
894: (4) locale identifier:
895: (4) >>> negotiate_locale(['de_DE', 'en_US'], ['de_de', 'de_at'])
896: (4) 'de_DE'
897: (4) >>> negotiate_locale(['de_DE', 'en_US'], ['de_de', 'de_at'])
898: (4) 'de_DE'
899: (4) By default, some web browsers unfortunately do not include the territory
900: (4) in the locale identifier for many locales, and some don't even allow the
901: (4) user to easily add the territory. So while you may prefer using qualified
902: (4) locale identifiers in your web-application, they would not normally match
903: (4) the language-only locale sent by such browsers. To workaround that, this
904: (4) function uses a default mapping of commonly used language-only locale
905: (4) identifiers to identifiers including the territory:
906: (4) >>> negotiate_locale(['ja', 'en_US'], ['ja_JP', 'en_US'])
907: (4) 'ja_JP'
908: (4) Some browsers even use an incorrect or outdated language code, such as "no"
909: (4) for Norwegian, where the correct locale identifier would actually be "nb_NO"
910: (4) (Bokmål) or "nn_NO" (Nynorsk). The aliases are intended to take care of
911: (4) such cases, too:
912: (4) >>> negotiate_locale(['no', 'sv'], ['nb_NO', 'sv_SE'])
913: (4) 'nb_NO'
914: (4) You can override this default mapping by passing a different `aliases`
915: (4) dictionary to this function, or you can bypass the behavior althogher by
916: (4) setting the `aliases` parameter to `None`.
917: (4) :param preferred: the list of locale strings preferred by the user
918: (4) :param available: the list of locale strings available
919: (4) :param sep: character that separates the different parts of the locale
920: (16) strings
921: (4) :param aliases: a dictionary of aliases for locale identifiers
922: (4) """
923: (4) available = [a.lower() for a in available if a]
924: (4) for locale in preferred:
925: (8) ll = locale.lower()
926: (8) if ll in available:
927: (12) return locale
928: (8) if aliases:
929: (12) alias = aliases.get(ll)
930: (12) if alias:
931: (16) alias = alias.replace('_', sep)
932: (16) if alias.lower() in available:
933: (20) return alias
934: (8) parts = locale.split(sep)
935: (8) if len(parts) > 1 and parts[0].lower() in available:
936: (12) return parts[0]
937: (4) return None
938: (0) def parse_locale(
939: (4) identifier: str,
940: (4) sep: str = '_',
941: (0) ) -> tuple[str, str | None, str | None, str | None] | tuple[str, str | None, str | None, str | None, str | None]:
942: (4) """Parse a locale identifier into a tuple of the form ``(language,
943: (4) territory, script, variant, modifier)``.
944: (4) >>> parse_locale('zh_CN')
945: (4) ('zh', 'CN', None, None)
946: (4) >>> parse_locale('zh_Hans_CN')
947: (4) ('zh', 'CN', 'Hans', None)
948: (4) >>> parse_locale('ca_es_valencia')
949: (4) ('ca', 'ES', None, 'VALENCIA')
950: (4) >>> parse_locale('en_150')
951: (4) ('en', '150', None, None)
952: (4) >>> parse_locale('en_us_posix')
953: (4) ('en', 'US', None, 'POSIX')
954: (4) >>> parse_locale('it_IT@euro')
955: (4) ('it', 'IT', None, None, 'euro')
956: (4) >>> parse_locale('it_IT@custom')
957: (4) ('it', 'IT', None, None, 'custom')
958: (4) >>> parse_locale('it_IT@')
959: (4) ('it', 'IT', None, None)
960: (4) The default component separator is "_", but a different separator can be
961: (4) specified using the `sep` parameter.
962: (4) The optional modifier is always separated with "@" and at the end:
963: (4) >>> parse_locale('zh-CN', sep='-')
964: (4) ('zh', 'CN', None, None)
965: (4) >>> parse_locale('zh-CN@custom', sep='-')
966: (4) ('zh', 'CN', None, None, 'custom')
967: (4) If the identifier cannot be parsed into a locale, a `ValueError` exception
968: (4) is raised:
969: (4) >>> parse_locale('not_a_LOCALE_String')
970: (4) Traceback (most recent call last):
971: (6) ...
972: (4) ValueError: 'not_a_LOCALE_String' is not a valid locale identifier
973: (4) Encoding information is removed from the identifier, while modifiers are
974: (4) kept:
975: (4) >>> parse_locale('en_US.UTF-8')
976: (4) ('en', 'US', None, None)
977: (4) >>> parse_locale('de_DE.iso885915@euro')
978: (4) ('de', 'DE', None, None, 'euro')
979: (4) See :rfc:`4646` for more information.
980: (4) :param identifier: the locale identifier string
981: (4) :param sep: character that separates the different components of the locale
982: (16) identifier
983: (4) :raise `ValueError`: if the string does not appear to be a valid locale
984: (25) identifier
985: (4) """
986: (4) identifier, _, modifier = identifier.partition('@')
987: (4) if '.' in identifier:
988: (8) # this is probably the charset/encoding, which we don't care about
989: (8) identifier = identifier.split('.', 1)[0]
990: (4) parts = identifier.split(sep)
991: (4) lang = parts.pop(0).lower()
992: (4) if not lang.isalpha():
993: (8) raise ValueError(f"expected only letters, got {lang!r}")
994: (4) script = territory = variant = None
995: (4) if parts and len(parts[0]) == 4 and parts[0].isalpha():
996: (8) script = parts.pop(0).title()
997: (4) if parts:
998: (8) if len(parts[0]) == 2 and parts[0].isalpha():
999: (12) territory = parts.pop(0).upper()
1000: (8) elif len(parts[0]) == 3 and parts[0].isdigit():
1001: (12) territory = parts.pop(0)
1002: (4) if parts and (
1003: (8) len(parts[0]) == 4 and parts[0][0].isdigit() or
1004: (8) len(parts[0]) >= 5 and parts[0][0].isalpha()
1005: (4) ):
1006: (8) variant = parts.pop().upper()
1007: (4) if parts:
1008: (8) raise ValueError(f"{identifier!r} is not a valid locale identifier")
1009: (4) # TODO(3.0): always return a 5-tuple
1010: (4) if modifier:
1011: (8) return lang, territory, script, variant, modifier
1012: (4) else:
1013: (8) return lang, territory, script, variant
1014: (0) def get_locale_identifier(
1015: (4) tup: tuple[str]
1016: (4) | tuple[str, str | None]
1017: (4) | tuple[str, str | None, str | None]
1018: (4) | tuple[str, str | None, str | None, str | None]
1019: (4) | tuple[str, str | None, str | None, str | None, str | None],
1020: (4) sep: str = "_",
1021: (0) ) -> str:
1022: (4) """The reverse of :func:`parse_locale`. It creates a locale identifier out
1023: (4) of a ``(language, territory, script, variant, modifier)`` tuple. Items can be set to
1024: (4) ``None`` and trailing ``None``\\s can also be left out of the tuple.
1025: (4) >>> get_locale_identifier(('de', 'DE', None, '1999', 'custom'))
1026: (4) 'de_DE_1999@custom'
1027: (4) >>> get_locale_identifier(('fi', None, None, None, 'custom'))
1028: (4) 'fi@custom'
1029: (4) .. versionadded:: 1.0
1030: (4) :param tup: the tuple as returned by :func:`parse_locale`.
1031: (4) :param sep: the separator for the identifier.
1032: (4) """
1033: (4) tup = tuple(tup[:5]) # type: ignore # length should be no more than 5
1034: (4) lang, territory, script, variant, modifier = tup + (None,) * (5 - len(tup))
1035: (4) ret = sep.join(filter(None, (lang, script, territory, variant)))
1036: (4) return f'{ret}@{modifier}' if modifier else ret
----------------------------------------
File 3 - . \util.py:
1: (0) """
2: (4) babel.util
3: (4) ~~~~~~~~~~
4: (4) Various utility classes and functions.
5: (4) :copyright: (c) 2013-2024 by the Babel Team.
6: (4) :license: BSD, see LICENSE for more details.
7: (0) """
8: (0) from __future__ import annotations
9: (0) import codecs
10: (0) import collections
11: (0) import datetime
12: (0) import os
13: (0) import re
14: (0) import textwrap
15: (0) from collections.abc import Generator, Iterable
16: (0) from typing import IO, Any, TypeVar
17: (0) from babel import dates, localtime
18: (0) missing = object()
19: (0) _T = TypeVar("_T")
20: (0) def distinct(iterable: Iterable[_T]) -> Generator[_T, None, None]:
21: (4) """Yield all items in an iterable collection that are distinct.
22: (4) Unlike when using sets for a similar effect, the original ordering of the
23: (4) items in the collection is preserved by this function.
24: (4) >>> print(list(distinct([1, 2, 1, 3, 4, 4])))
25: (4) [1, 2, 3, 4]
26: (4) >>> print(list(distinct('foobar')))
27: (4) ['f', 'o', 'b', 'a', 'r']
28: (4) :param iterable: the iterable collection providing the data
29: (4) """
30: (4) seen = set()
31: (4) for item in iter(iterable):
32: (8) if item not in seen:
33: (12) yield item
34: (12) seen.add(item)
35: (0) # Regexp to match python magic encoding line
36: (0) PYTHON_MAGIC_COMMENT_re = re.compile(
37: (4) br'[ \t\f]* \# .* coding[=:][ \t]*([-\w.]+)', re.VERBOSE)
38: (0) def parse_encoding(fp: IO[bytes]) -> str | None:
39: (4) """Deduce the encoding of a source file from magic comment.
40: (4) It does this in the same way as the `Python interpreter`__
41: (4) .. __: https://docs.python.org/3.4/reference/lexical_analysis.html#encoding-declarations
42: (4) The ``fp`` argument should be a seekable file object.
43: (4) (From Jeff Dairiki)
44: (4) """
45: (4) pos = fp.tell()
46: (4) fp.seek(0)
47: (4) try:
48: (8) line1 = fp.readline()
49: (8) has_bom = line1.startswith(codecs.BOM_UTF8)
50: (8) if has_bom:
51: (12) line1 = line1[len(codecs.BOM_UTF8):]
52: (8) m = PYTHON_MAGIC_COMMENT_re.match(line1)
53: (8) if not m:
54: (12) try:
55: (16) import ast
56: (16) ast.parse(line1.decode('latin-1'))
57: (12) except (ImportError, SyntaxError, UnicodeEncodeError):
58: (16) # Either it's a real syntax error, in which case the source is
59: (16) # not valid python source, or line2 is a continuation of line1,
60: (16) # in which case we don't want to scan line2 for a magic
61: (16) # comment.
62: (16) pass
63: (12) else:
64: (16) line2 = fp.readline()
65: (16) m = PYTHON_MAGIC_COMMENT_re.match(line2)
66: (8) if has_bom:
67: (12) if m:
68: (16) magic_comment_encoding = m.group(1).decode('latin-1')
69: (16) if magic_comment_encoding != 'utf-8':
70: (20) raise SyntaxError(f"encoding problem: {magic_comment_encoding} with BOM")
71: (12) return 'utf-8'
72: (8) elif m:
73: (12) return m.group(1).decode('latin-1')
74: (8) else:
75: (12) return None
76: (4) finally:
77: (8) fp.seek(pos)
78: (0) PYTHON_FUTURE_IMPORT_re = re.compile(
79: (4) r'from\s+__future__\s+import\s+\(*(.+)\)*')
80: (0) def parse_future_flags(fp: IO[bytes], encoding: str = 'latin-1') -> int:
81: (4) """Parse the compiler flags by :mod:`__future__` from the given Python
82: (4) code.
83: (4) """
84: (4) import __future__
85: (4) pos = fp.tell()
86: (4) fp.seek(0)
87: (4) flags = 0
88: (4) try:
89: (8) body = fp.read().decode(encoding)
90: (8) # Fix up the source to be (hopefully) parsable by regexpen.
91: (8) # This will likely do untoward things if the source code itself is broken.
92: (8) # (1) Fix `import (\n...` to be `import (...`.
93: (8) body = re.sub(r'import\s*\([\r\n]+', 'import (', body)
94: (8) # (2) Join line-ending commas with the next line.
95: (8) body = re.sub(r',\s*[\r\n]+', ', ', body)
96: (8) # (3) Remove backslash line continuations.
97: (8) body = re.sub(r'\\\s*[\r\n]+', ' ', body)
98: (8) for m in PYTHON_FUTURE_IMPORT_re.finditer(body):
99: (12) names = [x.strip().strip('()') for x in m.group(1).split(',')]
100: (12) for name in names:
101: (16) feature = getattr(__future__, name, None)
102: (16) if feature:
103: (20) flags |= feature.compiler_flag
104: (4) finally:
105: (8) fp.seek(pos)
106: (4) return flags
107: (0) def pathmatch(pattern: str, filename: str) -> bool:
108: (4) """Extended pathname pattern matching.
109: (4) This function is similar to what is provided by the ``fnmatch`` module in
110: (4) the Python standard library, but:
111: (5) * can match complete (relative or absolute) path names, and not just file
112: (7) names, and
113: (5) * also supports a convenience pattern ("**") to match files at any
114: (7) directory level.
115: (4) Examples:
116: (4) >>> pathmatch('**.py', 'bar.py')
117: (4) True
118: (4) >>> pathmatch('**.py', 'foo/bar/baz.py')
119: (4) True
120: (4) >>> pathmatch('**.py', 'templates/index.html')
121: (4) False
122: (4) >>> pathmatch('./foo/**.py', 'foo/bar/baz.py')
123: (4) True
124: (4) >>> pathmatch('./foo/**.py', 'bar/baz.py')
125: (4) False
126: (4) >>> pathmatch('^foo/**.py', 'foo/bar/baz.py')
127: (4) True
128: (4) >>> pathmatch('^foo/**.py', 'bar/baz.py')
129: (4) False
130: (4) >>> pathmatch('**/templates/*.html', 'templates/index.html')
131: (4) True
132: (4) >>> pathmatch('**/templates/*.html', 'templates/foo/bar.html')
133: (4) False
134: (4) :param pattern: the glob pattern
135: (4) :param filename: the path name of the file to match against
136: (4) """
137: (4) symbols = {
138: (8) '?': '[^/]',
139: (8) '?/': '[^/]/',
140: (8) '*': '[^/]+',
141: (8) '*/': '[^/]+/',
142: (8) '**/': '(?:.+/)*?',
143: (8) '**': '(?:.+/)*?[^/]+',
144: (4) }
145: (4) if pattern.startswith('^'):
146: (8) buf = ['^']
147: (8) pattern = pattern[1:]
148: (4) elif pattern.startswith('./'):
149: (8) buf = ['^']
150: (8) pattern = pattern[2:]
151: (4) else:
152: (8) buf = []
153: (4) for idx, part in enumerate(re.split('([?*]+/?)', pattern)):
154: (8) if idx % 2:
155: (12) buf.append(symbols[part])
156: (8) elif part:
157: (12) buf.append(re.escape(part))
158: (4) match = re.match(f"{''.join(buf)}$", filename.replace(os.sep, "/"))
159: (4) return match is not None
160: (0) class TextWrapper(textwrap.TextWrapper):
161: (4) wordsep_re = re.compile(
162: (8) r'(\s+|' # any whitespace
163: (8) r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))', # em-dash
164: (4) )
165: (0) def wraptext(text: str, width: int = 70, initial_indent: str = '', subsequent_indent: str = '') -> list[str]:
166: (4) """Simple wrapper around the ``textwrap.wrap`` function in the standard
167: (4) library. This version does not wrap lines on hyphens in words.
168: (4) :param text: the text to wrap
169: (4) :param width: the maximum line width
170: (4) :param initial_indent: string that will be prepended to the first line of
171: (27) wrapped output
172: (4) :param subsequent_indent: string that will be prepended to all lines save
173: (30) the first of wrapped output
174: (4) """
175: (4) wrapper = TextWrapper(width=width, initial_indent=initial_indent,
176: (26) subsequent_indent=subsequent_indent,
177: (26) break_long_words=False)
178: (4) return wrapper.wrap(text)
179: (0) # TODO (Babel 3.x): Remove this re-export
180: (0) odict = collections.OrderedDict
181: (0) class FixedOffsetTimezone(datetime.tzinfo):
182: (4) """Fixed offset in minutes east from UTC."""
183: (4) def __init__(self, offset: float, name: str | None = None) -> None:
184: (8) self._offset = datetime.timedelta(minutes=offset)
185: (8) if name is None:
186: (12) name = 'Etc/GMT%+d' % offset
187: (8) self.zone = name
188: (4) def __str__(self) -> str:
189: (8) return self.zone
190: (4) def __repr__(self) -> str:
191: (8) return f'<FixedOffset "{self.zone}" {self._offset}>'
192: (4) def utcoffset(self, dt: datetime.datetime) -> datetime.timedelta:
193: (8) return self._offset
194: (4) def tzname(self, dt: datetime.datetime) -> str:
195: (8) return self.zone
196: (4) def dst(self, dt: datetime.datetime) -> datetime.timedelta:
197: (8) return ZERO
198: (0) # Export the localtime functionality here because that's
199: (0) # where it was in the past.
200: (0) # TODO(3.0): remove these aliases
201: (0) UTC = dates.UTC
202: (0) LOCALTZ = dates.LOCALTZ
203: (0) get_localzone = localtime.get_localzone
204: (0) STDOFFSET = localtime.STDOFFSET
205: (0) DSTOFFSET = localtime.DSTOFFSET
206: (0) DSTDIFF = localtime.DSTDIFF
207: (0) ZERO = localtime.ZERO
208: (0) def _cmp(a: Any, b: Any):
209: (4) return (a > b) - (a < b)
----------------------------------------
File 4 - . \dates.py:
1: (0) """
2: (4) babel.dates
3: (4) ~~~~~~~~~~~
4: (4) Locale dependent formatting and parsing of dates and times.
5: (4) The default locale for the functions in this module is determined by the
6: (4) following environment variables, in that order:
7: (5) * ``LC_TIME``,
8: (5) * ``LC_ALL``, and
9: (5) * ``LANG``
10: (4) :copyright: (c) 2013-2024 by the Babel Team.
11: (4) :license: BSD, see LICENSE for more details.
12: (0) """
13: (0) from __future__ import annotations
14: (0) import re
15: (0) import warnings
16: (0) from functools import lru_cache
17: (0) from typing import TYPE_CHECKING, SupportsInt
18: (0) try:
19: (4) import pytz
20: (0) except ModuleNotFoundError:
21: (4) pytz = None
22: (4) import zoneinfo
23: (0) import datetime
24: (0) from collections.abc import Iterable
25: (0) from babel import localtime
26: (0) from babel.core import Locale, default_locale, get_global
27: (0) from babel.localedata import LocaleDataDict
28: (0) if TYPE_CHECKING:
29: (4) from typing_extensions import Literal, TypeAlias
30: (4) _Instant: TypeAlias = datetime.date | datetime.time | float | None
31: (4) _PredefinedTimeFormat: TypeAlias = Literal['full', 'long', 'medium', 'short']
32: (4) _Context: TypeAlias = Literal['format', 'stand-alone']
33: (4) _DtOrTzinfo: TypeAlias = datetime.datetime | datetime.tzinfo | str | int | datetime.time | None
34: (0) # "If a given short metazone form is known NOT to be understood in a given
35: (0) # locale and the parent locale has this value such that it would normally
36: (0) # be inherited, the inheritance of this value can be explicitly disabled by
37: (0) # use of the 'no inheritance marker' as the value, which is 3 simultaneous [sic]
38: (0) # empty set characters ( U+2205 )."
39: (0) # - https://www.unicode.org/reports/tr35/tr35-dates.html#Metazone_Names
40: (0) NO_INHERITANCE_MARKER = '\u2205\u2205\u2205'
41: (0) UTC = datetime.timezone.utc
42: (0) LOCALTZ = localtime.LOCALTZ
43: (0) LC_TIME = default_locale('LC_TIME')
44: (0) def _localize(tz: datetime.tzinfo, dt: datetime.datetime) -> datetime.datetime:
45: (4) # Support localizing with both pytz and zoneinfo tzinfos
46: (4) # nothing to do
47: (4) if dt.tzinfo is tz:
48: (8) return dt
49: (4) if hasattr(tz, 'localize'): # pytz
50: (8) return tz.localize(dt)
51: (4) if dt.tzinfo is None:
52: (8) # convert naive to localized
53: (8) return dt.replace(tzinfo=tz)
54: (4) # convert timezones
55: (4) return dt.astimezone(tz)
56: (0) def _get_dt_and_tzinfo(dt_or_tzinfo: _DtOrTzinfo) -> tuple[datetime.datetime | None, datetime.tzinfo]:
57: (4) """
58: (4) Parse a `dt_or_tzinfo` value into a datetime and a tzinfo.
59: (4) See the docs for this function's callers for semantics.
60: (4) :rtype: tuple[datetime, tzinfo]
61: (4) """
62: (4) if dt_or_tzinfo is None:
63: (8) dt = datetime.datetime.now()
64: (8) tzinfo = LOCALTZ
65: (4) elif isinstance(dt_or_tzinfo, str):
66: (8) dt = None
67: (8) tzinfo = get_timezone(dt_or_tzinfo)
68: (4) elif isinstance(dt_or_tzinfo, int):
69: (8) dt = None
70: (8) tzinfo = UTC
71: (4) elif isinstance(dt_or_tzinfo, (datetime.datetime, datetime.time)):
72: (8) dt = _get_datetime(dt_or_tzinfo)
73: (8) tzinfo = dt.tzinfo if dt.tzinfo is not None else UTC
74: (4) else:
75: (8) dt = None
76: (8) tzinfo = dt_or_tzinfo
77: (4) return dt, tzinfo
78: (0) def _get_tz_name(dt_or_tzinfo: _DtOrTzinfo) -> str:
79: (4) """
80: (4) Get the timezone name out of a time, datetime, or tzinfo object.
81: (4) :rtype: str
82: (4) """
83: (4) dt, tzinfo = _get_dt_and_tzinfo(dt_or_tzinfo)
84: (4) if hasattr(tzinfo, 'zone'): # pytz object
85: (8) return tzinfo.zone
86: (4) elif hasattr(tzinfo, 'key') and tzinfo.key is not None: # ZoneInfo object
87: (8) return tzinfo.key
88: (4) else:
89: (8) return tzinfo.tzname(dt or datetime.datetime.now(UTC))
90: (0) def _get_datetime(instant: _Instant) -> datetime.datetime:
91: (4) """
92: (4) Get a datetime out of an "instant" (date, time, datetime, number).
93: (4) .. warning:: The return values of this function may depend on the system clock.
94: (4) If the instant is None, the current moment is used.
95: (4) If the instant is a time, it's augmented with today's date.
96: (4) Dates are converted to naive datetimes with midnight as the time component.
97: (4) >>> from datetime import date, datetime
98: (4) >>> _get_datetime(date(2015, 1, 1))
99: (4) datetime.datetime(2015, 1, 1, 0, 0)
100: (4) UNIX timestamps are converted to datetimes.
101: (4) >>> _get_datetime(1400000000)
102: (4) datetime.datetime(2014, 5, 13, 16, 53, 20)
103: (4) Other values are passed through as-is.
104: (4) >>> x = datetime(2015, 1, 1)
105: (4) >>> _get_datetime(x) is x
106: (4) True
107: (4) :param instant: date, time, datetime, integer, float or None
108: (4) :type instant: date|time|datetime|int|float|None
109: (4) :return: a datetime
110: (4) :rtype: datetime
111: (4) """
112: (4) if instant is None:
113: (8) return datetime.datetime.now(UTC).replace(tzinfo=None)
114: (4) elif isinstance(instant, (int, float)):
115: (8) return datetime.datetime.fromtimestamp(instant, UTC).replace(tzinfo=None)
116: (4) elif isinstance(instant, datetime.time):
117: (8) return datetime.datetime.combine(datetime.date.today(), instant)
118: (4) elif isinstance(instant, datetime.date) and not isinstance(instant, datetime.datetime):
119: (8) return datetime.datetime.combine(instant, datetime.time())
120: (4) # TODO (3.x): Add an assertion/type check for this fallthrough branch:
121: (4) return instant
122: (0) def _ensure_datetime_tzinfo(dt: datetime.datetime, tzinfo: datetime.tzinfo | None = None) -> datetime.datetime:
123: (4) """
124: (4) Ensure the datetime passed has an attached tzinfo.
125: (4) If the datetime is tz-naive to begin with, UTC is attached.
126: (4) If a tzinfo is passed in, the datetime is normalized to that timezone.
127: (4) >>> from datetime import datetime
128: (4) >>> _get_tz_name(_ensure_datetime_tzinfo(datetime(2015, 1, 1)))
129: (4) 'UTC'
130: (4) >>> tz = get_timezone("Europe/Stockholm")
131: (4) >>> _ensure_datetime_tzinfo(datetime(2015, 1, 1, 13, 15, tzinfo=UTC), tzinfo=tz).hour
132: (4) 14
133: (4) :param datetime: Datetime to augment.
134: (4) :param tzinfo: optional tzinfo
135: (4) :return: datetime with tzinfo
136: (4) :rtype: datetime
137: (4) """
138: (4) if dt.tzinfo is None:
139: (8) dt = dt.replace(tzinfo=UTC)
140: (4) if tzinfo is not None:
141: (8) dt = dt.astimezone(get_timezone(tzinfo))
142: (8) if hasattr(tzinfo, 'normalize'): # pytz
143: (12) dt = tzinfo.normalize(dt)
144: (4) return dt
145: (0) def _get_time(
146: (4) time: datetime.time | datetime.datetime | None,
147: (4) tzinfo: datetime.tzinfo | None = None,
148: (0) ) -> datetime.time:
149: (4) """
150: (4) Get a timezoned time from a given instant.
151: (4) .. warning:: The return values of this function may depend on the system clock.
152: (4) :param time: time, datetime or None
153: (4) :rtype: time
154: (4) """
155: (4) if time is None:
156: (8) time = datetime.datetime.now(UTC)
157: (4) elif isinstance(time, (int, float)):
158: (8) time = datetime.datetime.fromtimestamp(time, UTC)
159: (4) if time.tzinfo is None:
160: (8) time = time.replace(tzinfo=UTC)
161: (4) if isinstance(time, datetime.datetime):
162: (8) if tzinfo is not None:
163: (12) time = time.astimezone(tzinfo)
164: (12) if hasattr(tzinfo, 'normalize'): # pytz
165: (16) time = tzinfo.normalize(time)
166: (8) time = time.timetz()
167: (4) elif tzinfo is not None:
168: (8) time = time.replace(tzinfo=tzinfo)
169: (4) return time
170: (0) def get_timezone(zone: str | datetime.tzinfo | None = None) -> datetime.tzinfo:
171: (4) """Looks up a timezone by name and returns it. The timezone object
172: (4) returned comes from ``pytz`` or ``zoneinfo``, whichever is available.
173: (4) It corresponds to the `tzinfo` interface and can be used with all of
174: (4) the functions of Babel that operate with dates.
175: (4) If a timezone is not known a :exc:`LookupError` is raised. If `zone`
176: (4) is ``None`` a local zone object is returned.
177: (4) :param zone: the name of the timezone to look up. If a timezone object
178: (17) itself is passed in, it's returned unchanged.
179: (4) """
180: (4) if zone is None:
181: (8) return LOCALTZ
182: (4) if not isinstance(zone, str):
183: (8) return zone
184: (4) if pytz:
185: (8) try:
186: (12) return pytz.timezone(zone)
187: (8) except pytz.UnknownTimeZoneError as e:
188: (12) exc = e
189: (4) else:
190: (8) assert zoneinfo
191: (8) try:
192: (12) return zoneinfo.ZoneInfo(zone)
193: (8) except zoneinfo.ZoneInfoNotFoundError as e:
194: (12) exc = e
195: (4) raise LookupError(f"Unknown timezone {zone}") from exc
196: (0) def get_period_names(width: Literal['abbreviated', 'narrow', 'wide'] = 'wide',
197: (21) context: _Context = 'stand-alone', locale: Locale | str | None = LC_TIME) -> LocaleDataDict:
198: (4) """Return the names for day periods (AM/PM) used by the locale.
199: (4) >>> get_period_names(locale='en_US')['am']
200: (4) u'AM'
201: (4) :param width: the width to use, one of "abbreviated", "narrow", or "wide"
202: (4) :param context: the context, either "format" or "stand-alone"
203: (4) :param locale: the `Locale` object, or a locale string
204: (4) """
205: (4) return Locale.parse(locale).day_periods[context][width]
206: (0) def get_day_names(width: Literal['abbreviated', 'narrow', 'short', 'wide'] = 'wide',
207: (18) context: _Context = 'format', locale: Locale | str | None = LC_TIME) -> LocaleDataDict:
208: (4) """Return the day names used by the locale for the specified format.
209: (4) >>> get_day_names('wide', locale='en_US')[1]
210: (4) u'Tuesday'
211: (4) >>> get_day_names('short', locale='en_US')[1]
212: (4) u'Tu'
213: (4) >>> get_day_names('abbreviated', locale='es')[1]
214: (4) u'mar'
215: (4) >>> get_day_names('narrow', context='stand-alone', locale='de_DE')[1]
216: (4) u'D'
217: (4) :param width: the width to use, one of "wide", "abbreviated", "short" or "narrow"
218: (4) :param context: the context, either "format" or "stand-alone"
219: (4) :param locale: the `Locale` object, or a locale string
220: (4) """
221: (4) return Locale.parse(locale).days[context][width]
222: (0) def get_month_names(width: Literal['abbreviated', 'narrow', 'wide'] = 'wide',
223: (20) context: _Context = 'format', locale: Locale | str | None = LC_TIME) -> LocaleDataDict:
224: (4) """Return the month names used by the locale for the specified format.
225: (4) >>> get_month_names('wide', locale='en_US')[1]
226: (4) u'January'
227: (4) >>> get_month_names('abbreviated', locale='es')[1]
228: (4) u'ene'
229: (4) >>> get_month_names('narrow', context='stand-alone', locale='de_DE')[1]
230: (4) u'J'
231: (4) :param width: the width to use, one of "wide", "abbreviated", or "narrow"
232: (4) :param context: the context, either "format" or "stand-alone"
233: (4) :param locale: the `Locale` object, or a locale string
234: (4) """
235: (4) return Locale.parse(locale).months[context][width]
236: (0) def get_quarter_names(width: Literal['abbreviated', 'narrow', 'wide'] = 'wide',
237: (22) context: _Context = 'format', locale: Locale | str | None = LC_TIME) -> LocaleDataDict:
238: (4) """Return the quarter names used by the locale for the specified format.
239: (4) >>> get_quarter_names('wide', locale='en_US')[1]
240: (4) u'1st quarter'
241: (4) >>> get_quarter_names('abbreviated', locale='de_DE')[1]
242: (4) u'Q1'
243: (4) >>> get_quarter_names('narrow', locale='de_DE')[1]
244: (4) u'1'
245: (4) :param width: the width to use, one of "wide", "abbreviated", or "narrow"
246: (4) :param context: the context, either "format" or "stand-alone"
247: (4) :param locale: the `Locale` object, or a locale string
248: (4) """
249: (4) return Locale.parse(locale).quarters[context][width]
250: (0) def get_era_names(width: Literal['abbreviated', 'narrow', 'wide'] = 'wide',
251: (18) locale: Locale | str | None = LC_TIME) -> LocaleDataDict:
252: (4) """Return the era names used by the locale for the specified format.
253: (4) >>> get_era_names('wide', locale='en_US')[1]
254: (4) u'Anno Domini'
255: (4) >>> get_era_names('abbreviated', locale='de_DE')[1]
256: (4) u'n. Chr.'
257: (4) :param width: the width to use, either "wide", "abbreviated", or "narrow"
258: (4) :param locale: the `Locale` object, or a locale string
259: (4) """
260: (4) return Locale.parse(locale).eras[width]
261: (0) def get_date_format(format: _PredefinedTimeFormat = 'medium', locale: Locale | str | None = LC_TIME) -> DateTimePattern:
262: (4) """Return the date formatting patterns used by the locale for the specified
263: (4) format.
264: (4) >>> get_date_format(locale='en_US')
265: (4) <DateTimePattern u'MMM d, y'>
266: (4) >>> get_date_format('full', locale='de_DE')
267: (4) <DateTimePattern u'EEEE, d. MMMM y'>
268: (4) :param format: the format to use, one of "full", "long", "medium", or
269: (19) "short"
270: (4) :param locale: the `Locale` object, or a locale string
271: (4) """
272: (4) return Locale.parse(locale).date_formats[format]
273: (0) def get_datetime_format(format: _PredefinedTimeFormat = 'medium', locale: Locale | str | None = LC_TIME) -> DateTimePattern:
274: (4) """Return the datetime formatting patterns used by the locale for the
275: (4) specified format.
276: (4) >>> get_datetime_format(locale='en_US')
277: (4) u'{1}, {0}'
278: (4) :param format: the format to use, one of "full", "long", "medium", or
279: (19) "short"
280: (4) :param locale: the `Locale` object, or a locale string
281: (4) """
282: (4) patterns = Locale.parse(locale).datetime_formats
283: (4) if format not in patterns:
284: (8) format = None
285: (4) return patterns[format]
286: (0) def get_time_format(format: _PredefinedTimeFormat = 'medium', locale: Locale | str | None = LC_TIME) -> DateTimePattern:
287: (4) """Return the time formatting patterns used by the locale for the specified
288: (4) format.
289: (4) >>> get_time_format(locale='en_US')
290: (4) <DateTimePattern u'h:mm:ss\u202fa'>
291: (4) >>> get_time_format('full', locale='de_DE')
292: (4) <DateTimePattern u'HH:mm:ss zzzz'>
293: (4) :param format: the format to use, one of "full", "long", "medium", or
294: (19) "short"
295: (4) :param locale: the `Locale` object, or a locale string
296: (4) """
297: (4) return Locale.parse(locale).time_formats[format]
298: (0) def get_timezone_gmt(
299: (4) datetime: _Instant = None,
300: (4) width: Literal['long', 'short', 'iso8601', 'iso8601_short'] = 'long',
301: (4) locale: Locale | str | None = LC_TIME,
302: (4) return_z: bool = False,
303: (0) ) -> str:
304: (4) """Return the timezone associated with the given `datetime` object formatted
305: (4) as string indicating the offset from GMT.
306: (4) >>> from datetime import datetime
307: (4) >>> dt = datetime(2007, 4, 1, 15, 30)
308: (4) >>> get_timezone_gmt(dt, locale='en')
309: (4) u'GMT+00:00'
310: (4) >>> get_timezone_gmt(dt, locale='en', return_z=True)
311: (4) 'Z'
312: (4) >>> get_timezone_gmt(dt, locale='en', width='iso8601_short')
313: (4) u'+00'
314: (4) >>> tz = get_timezone('America/Los_Angeles')
315: (4) >>> dt = _localize(tz, datetime(2007, 4, 1, 15, 30))
316: (4) >>> get_timezone_gmt(dt, locale='en')
317: (4) u'GMT-07:00'
318: (4) >>> get_timezone_gmt(dt, 'short', locale='en')
319: (4) u'-0700'
320: (4) >>> get_timezone_gmt(dt, locale='en', width='iso8601_short')
321: (4) u'-07'
322: (4) The long format depends on the locale, for example in France the acronym
323: (4) UTC string is used instead of GMT:
324: (4) >>> get_timezone_gmt(dt, 'long', locale='fr_FR')
325: (4) u'UTC-07:00'
326: (4) .. versionadded:: 0.9
327: (4) :param datetime: the ``datetime`` object; if `None`, the current date and
328: (21) time in UTC is used
329: (4) :param width: either "long" or "short" or "iso8601" or "iso8601_short"
330: (4) :param locale: the `Locale` object, or a locale string
331: (4) :param return_z: True or False; Function returns indicator "Z"
332: (21) when local time offset is 0
333: (4) """
334: (4) datetime = _ensure_datetime_tzinfo(_get_datetime(datetime))
335: (4) locale = Locale.parse(locale)
336: (4) offset = datetime.tzinfo.utcoffset(datetime)
337: (4) seconds = offset.days * 24 * 60 * 60 + offset.seconds
338: (4) hours, seconds = divmod(seconds, 3600)
339: (4) if return_z and hours == 0 and seconds == 0:
340: (8) return 'Z'
341: (4) elif seconds == 0 and width == 'iso8601_short':
342: (8) return '%+03d' % hours
343: (4) elif width == 'short' or width == 'iso8601_short':
344: (8) pattern = '%+03d%02d'
345: (4) elif width == 'iso8601':
346: (8) pattern = '%+03d:%02d'
347: (4) else:
348: (8) pattern = locale.zone_formats['gmt'] % '%+03d:%02d'
349: (4) return pattern % (hours, seconds // 60)
350: (0) def get_timezone_location(
351: (4) dt_or_tzinfo: _DtOrTzinfo = None,
352: (4) locale: Locale | str | None = LC_TIME,
353: (4) return_city: bool = False,
354: (0) ) -> str:
355: (4) """Return a representation of the given timezone using "location format".
356: (4) The result depends on both the local display name of the country and the
357: (4) city associated with the time zone:
358: (4) >>> tz = get_timezone('America/St_Johns')
359: (4) >>> print(get_timezone_location(tz, locale='de_DE'))
360: (4) Kanada (St. John’s) (Ortszeit)
361: (4) >>> print(get_timezone_location(tz, locale='en'))
362: (4) Canada (St. John’s) Time
363: (4) >>> print(get_timezone_location(tz, locale='en', return_city=True))
364: (4) St. John’s
365: (4) >>> tz = get_timezone('America/Mexico_City')
366: (4) >>> get_timezone_location(tz, locale='de_DE')
367: (4) u'Mexiko (Mexiko-Stadt) (Ortszeit)'
368: (4) If the timezone is associated with a country that uses only a single
369: (4) timezone, just the localized country name is returned:
370: (4) >>> tz = get_timezone('Europe/Berlin')
371: (4) >>> get_timezone_name(tz, locale='de_DE')
372: (4) u'Mitteleurop\\xe4ische Zeit'
373: (4) .. versionadded:: 0.9
374: (4) :param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines
375: (25) the timezone; if `None`, the current date and time in
376: (25) UTC is assumed
377: (4) :param locale: the `Locale` object, or a locale string
378: (4) :param return_city: True or False, if True then return exemplar city (location)
379: (24) for the time zone
380: (4) :return: the localized timezone name using location format
381: (4) """
382: (4) locale = Locale.parse(locale)
383: (4) zone = _get_tz_name(dt_or_tzinfo)
384: (4) # Get the canonical time-zone code
385: (4) zone = get_global('zone_aliases').get(zone, zone)
386: (4) info = locale.time_zones.get(zone, {})
387: (4) # Otherwise, if there is only one timezone for the country, return the
388: (4) # localized country name
389: (4) region_format = locale.zone_formats['region']
390: (4) territory = get_global('zone_territories').get(zone)
391: (4) if territory not in locale.territories:
392: (8) territory = 'ZZ' # invalid/unknown
393: (4) territory_name = locale.territories[territory]
394: (4) if not return_city and territory and len(get_global('territory_zones').get(territory, [])) == 1:
395: (8) return region_format % territory_name
396: (4) # Otherwise, include the city in the output
397: (4) fallback_format = locale.zone_formats['fallback']
398: (4) if 'city' in info:
399: (8) city_name = info['city']
400: (4) else:
401: (8) metazone = get_global('meta_zones').get(zone)
402: (8) metazone_info = locale.meta_zones.get(metazone, {})
403: (8) if 'city' in metazone_info:
404: (12) city_name = metazone_info['city']
405: (8) elif '/' in zone:
406: (12) city_name = zone.split('/', 1)[1].replace('_', ' ')
407: (8) else:
408: (12) city_name = zone.replace('_', ' ')
409: (4) if return_city:
410: (8) return city_name
411: (4) return region_format % (fallback_format % {
412: (8) '0': city_name,
413: (8) '1': territory_name,
414: (4) })
415: (0) def get_timezone_name(
416: (4) dt_or_tzinfo: _DtOrTzinfo = None,
417: (4) width: Literal['long', 'short'] = 'long',
418: (4) uncommon: bool = False,
419: (4) locale: Locale | str | None = LC_TIME,
420: (4) zone_variant: Literal['generic', 'daylight', 'standard'] | None = None,
421: (4) return_zone: bool = False,
422: (0) ) -> str:
423: (4) r"""Return the localized display name for the given timezone. The timezone
424: (4) may be specified using a ``datetime`` or `tzinfo` object.
425: (4) >>> from datetime import time
426: (4) >>> dt = time(15, 30, tzinfo=get_timezone('America/Los_Angeles'))
427: (4) >>> get_timezone_name(dt, locale='en_US') # doctest: +SKIP
428: (4) u'Pacific Standard Time'
429: (4) >>> get_timezone_name(dt, locale='en_US', return_zone=True)
430: (4) 'America/Los_Angeles'
431: (4) >>> get_timezone_name(dt, width='short', locale='en_US') # doctest: +SKIP
432: (4) u'PST'
433: (4) If this function gets passed only a `tzinfo` object and no concrete
434: (4) `datetime`, the returned display name is independent of daylight savings
435: (4) time. This can be used for example for selecting timezones, or to set the
436: (4) time of events that recur across DST changes:
437: (4) >>> tz = get_timezone('America/Los_Angeles')
438: (4) >>> get_timezone_name(tz, locale='en_US')
439: (4) u'Pacific Time'
440: (4) >>> get_timezone_name(tz, 'short', locale='en_US')
441: (4) u'PT'
442: (4) If no localized display name for the timezone is available, and the timezone
443: (4) is associated with a country that uses only a single timezone, the name of
444: (4) that country is returned, formatted according to the locale:
445: (4) >>> tz = get_timezone('Europe/Berlin')
446: (4) >>> get_timezone_name(tz, locale='de_DE')
447: (4) u'Mitteleurop\xe4ische Zeit'
448: (4) >>> get_timezone_name(tz, locale='pt_BR')
449: (4) u'Hor\xe1rio da Europa Central'
450: (4) On the other hand, if the country uses multiple timezones, the city is also
451: (4) included in the representation:
452: (4) >>> tz = get_timezone('America/St_Johns')
453: (4) >>> get_timezone_name(tz, locale='de_DE')
454: (4) u'Neufundland-Zeit'
455: (4) Note that short format is currently not supported for all timezones and
456: (4) all locales. This is partially because not every timezone has a short
457: (4) code in every locale. In that case it currently falls back to the long
458: (4) format.
459: (4) For more information see `LDML Appendix J: Time Zone Display Names
460: (4) <https://www.unicode.org/reports/tr35/#Time_Zone_Fallback>`_
461: (4) .. versionadded:: 0.9
462: (4) .. versionchanged:: 1.0
463: (7) Added `zone_variant` support.
464: (4) :param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines
465: (25) the timezone; if a ``tzinfo`` object is used, the
466: (25) resulting display name will be generic, i.e.
467: (25) independent of daylight savings time; if `None`, the
468: (25) current date in UTC is assumed
469: (4) :param width: either "long" or "short"
470: (4) :param uncommon: deprecated and ignored
471: (4) :param zone_variant: defines the zone variation to return. By default the
472: (27) variation is defined from the datetime object
473: (27) passed in. If no datetime object is passed in, the
474: (27) ``'generic'`` variation is assumed. The following
475: (27) values are valid: ``'generic'``, ``'daylight'`` and
476: (27) ``'standard'``.
477: (4) :param locale: the `Locale` object, or a locale string
478: (4) :param return_zone: True or False. If true then function
479: (24) returns long time zone ID
480: (4) """
481: (4) dt, tzinfo = _get_dt_and_tzinfo(dt_or_tzinfo)
482: (4) locale = Locale.parse(locale)
483: (4) zone = _get_tz_name(dt_or_tzinfo)
484: (4) if zone_variant is None:
485: (8) if dt is None:
486: (12) zone_variant = 'generic'
487: (8) else:
488: (12) dst = tzinfo.dst(dt)
489: (12) zone_variant = "daylight" if dst else "standard"
490: (4) else:
491: (8) if zone_variant not in ('generic', 'standard', 'daylight'):
492: (12) raise ValueError('Invalid zone variation')
493: (4) # Get the canonical time-zone code
494: (4) zone = get_global('zone_aliases').get(zone, zone)
495: (4) if return_zone:
496: (8) return zone
497: (4) info = locale.time_zones.get(zone, {})
498: (4) # Try explicitly translated zone names first
499: (4) if width in info and zone_variant in info[width]:
500: (8) return info[width][zone_variant]
501: (4) metazone = get_global('meta_zones').get(zone)
502: (4) if metazone:
503: (8) metazone_info = locale.meta_zones.get(metazone, {})
504: (8) if width in metazone_info:
505: (12) name = metazone_info[width].get(zone_variant)
506: (12) if width == 'short' and name == NO_INHERITANCE_MARKER:
507: (16) # If the short form is marked no-inheritance,
508: (16) # try to fall back to the long name instead.
509: (16) name = metazone_info.get('long', {}).get(zone_variant)
510: (12) if name:
511: (16) return name
512: (4) # If we have a concrete datetime, we assume that the result can't be
513: (4) # independent of daylight savings time, so we return the GMT offset
514: (4) if dt is not None:
515: (8) return get_timezone_gmt(dt, width=width, locale=locale)
516: (4) return get_timezone_location(dt_or_tzinfo, locale=locale)
517: (0) def format_date(
518: (4) date: datetime.date | None = None,
519: (4) format: _PredefinedTimeFormat | str = 'medium',
520: (4) locale: Locale | str | None = LC_TIME,
521: (0) ) -> str:
522: (4) """Return a date formatted according to the given pattern.
523: (4) >>> from datetime import date
524: (4) >>> d = date(2007, 4, 1)
525: (4) >>> format_date(d, locale='en_US')
526: (4) u'Apr 1, 2007'
527: (4) >>> format_date(d, format='full', locale='de_DE')
528: (4) u'Sonntag, 1. April 2007'
529: (4) If you don't want to use the locale default formats, you can specify a
530: (4) custom date pattern:
531: (4) >>> format_date(d, "EEE, MMM d, ''yy", locale='en')
532: (4) u"Sun, Apr 1, '07"
533: (4) :param date: the ``date`` or ``datetime`` object; if `None`, the current
534: (17) date is used
535: (4) :param format: one of "full", "long", "medium", or "short", or a custom
536: (19) date/time pattern
537: (4) :param locale: a `Locale` object or a locale identifier
538: (4) """
539: (4) if date is None:
540: (8) date = datetime.date.today()
541: (4) elif isinstance(date, datetime.datetime):
542: (8) date = date.date()
543: (4) locale = Locale.parse(locale)
544: (4) if format in ('full', 'long', 'medium', 'short'):
545: (8) format = get_date_format(format, locale=locale)
546: (4) pattern = parse_pattern(format)
547: (4) return pattern.apply(date, locale)
548: (0) def format_datetime(
549: (4) datetime: _Instant = None,
550: (4) format: _PredefinedTimeFormat | str = 'medium',
551: (4) tzinfo: datetime.tzinfo | None = None,
552: (4) locale: Locale | str | None = LC_TIME,
553: (0) ) -> str:
554: (4) r"""Return a date formatted according to the given pattern.
555: (4) >>> from datetime import datetime
556: (4) >>> dt = datetime(2007, 4, 1, 15, 30)
557: (4) >>> format_datetime(dt, locale='en_US')
558: (4) u'Apr 1, 2007, 3:30:00\u202fPM'
559: (4) For any pattern requiring the display of the timezone:
560: (4) >>> format_datetime(dt, 'full', tzinfo=get_timezone('Europe/Paris'),
561: (4) ... locale='fr_FR')
562: (4) 'dimanche 1 avril 2007, 17:30:00 heure d’été d’Europe centrale'
563: (4) >>> format_datetime(dt, "yyyy.MM.dd G 'at' HH:mm:ss zzz",
564: (4) ... tzinfo=get_timezone('US/Eastern'), locale='en')
565: (4) u'2007.04.01 AD at 11:30:00 EDT'
566: (4) :param datetime: the `datetime` object; if `None`, the current date and
567: (21) time is used
568: (4) :param format: one of "full", "long", "medium", or "short", or a custom
569: (19) date/time pattern
570: (4) :param tzinfo: the timezone to apply to the time for display
571: (4) :param locale: a `Locale` object or a locale identifier
572: (4) """
573: (4) datetime = _ensure_datetime_tzinfo(_get_datetime(datetime), tzinfo)
574: (4) locale = Locale.parse(locale)
575: (4) if format in ('full', 'long', 'medium', 'short'):
576: (8) return get_datetime_format(format, locale=locale) \
577: (12) .replace("'", "") \
578: (12) .replace('{0}', format_time(datetime, format, tzinfo=None,
579: (40) locale=locale)) \
580: (12) .replace('{1}', format_date(datetime, format, locale=locale))
581: (4) else:
582: (8) return parse_pattern(format).apply(datetime, locale)
583: (0) def format_time(
584: (4) time: datetime.time | datetime.datetime | float | None = None,
585: (4) format: _PredefinedTimeFormat | str = 'medium',
586: (4) tzinfo: datetime.tzinfo | None = None, locale: Locale | str | None = LC_TIME,
587: (0) ) -> str:
588: (4) r"""Return a time formatted according to the given pattern.
589: (4) >>> from datetime import datetime, time
590: (4) >>> t = time(15, 30)
591: (4) >>> format_time(t, locale='en_US')
592: (4) u'3:30:00\u202fPM'
593: (4) >>> format_time(t, format='short', locale='de_DE')
594: (4) u'15:30'
595: (4) If you don't want to use the locale default formats, you can specify a
596: (4) custom time pattern:
597: (4) >>> format_time(t, "hh 'o''clock' a", locale='en')
598: (4) u"03 o'clock PM"
599: (4) For any pattern requiring the display of the time-zone a
600: (4) timezone has to be specified explicitly:
601: (4) >>> t = datetime(2007, 4, 1, 15, 30)
602: (4) >>> tzinfo = get_timezone('Europe/Paris')
603: (4) >>> t = _localize(tzinfo, t)
604: (4) >>> format_time(t, format='full', tzinfo=tzinfo, locale='fr_FR')
605: (4) '15:30:00 heure d’été d’Europe centrale'
606: (4) >>> format_time(t, "hh 'o''clock' a, zzzz", tzinfo=get_timezone('US/Eastern'),
607: (4) ... locale='en')
608: (4) u"09 o'clock AM, Eastern Daylight Time"
609: (4) As that example shows, when this function gets passed a
610: (4) ``datetime.datetime`` value, the actual time in the formatted string is
611: (4) adjusted to the timezone specified by the `tzinfo` parameter. If the
612: (4) ``datetime`` is "naive" (i.e. it has no associated timezone information),
613: (4) it is assumed to be in UTC.
614: (4) These timezone calculations are **not** performed if the value is of type
615: (4) ``datetime.time``, as without date information there's no way to determine
616: (4) what a given time would translate to in a different timezone without
617: (4) information about whether daylight savings time is in effect or not. This
618: (4) means that time values are left as-is, and the value of the `tzinfo`
619: (4) parameter is only used to display the timezone name if needed:
620: (4) >>> t = time(15, 30)
621: (4) >>> format_time(t, format='full', tzinfo=get_timezone('Europe/Paris'),
622: (4) ... locale='fr_FR') # doctest: +SKIP
623: (4) u'15:30:00 heure normale d\u2019Europe centrale'
624: (4) >>> format_time(t, format='full', tzinfo=get_timezone('US/Eastern'),
625: (4) ... locale='en_US') # doctest: +SKIP
626: (4) u'3:30:00\u202fPM Eastern Standard Time'
627: (4) :param time: the ``time`` or ``datetime`` object; if `None`, the current
628: (17) time in UTC is used
629: (4) :param format: one of "full", "long", "medium", or "short", or a custom
630: (19) date/time pattern
631: (4) :param tzinfo: the time-zone to apply to the time for display
632: (4) :param locale: a `Locale` object or a locale identifier
633: (4) """
634: (4) # get reference date for if we need to find the right timezone variant
635: (4) # in the pattern
636: (4) ref_date = time.date() if isinstance(time, datetime.datetime) else None
637: (4) time = _get_time(time, tzinfo)
638: (4) locale = Locale.parse(locale)
639: (4) if format in ('full', 'long', 'medium', 'short'):
640: (8) format = get_time_format(format, locale=locale)
641: (4) return parse_pattern(format).apply(time, locale, reference_date=ref_date)
642: (0) def format_skeleton(
643: (4) skeleton: str,
644: (4) datetime: _Instant = None,
645: (4) tzinfo: datetime.tzinfo | None = None,
646: (4) fuzzy: bool = True,
647: (4) locale: Locale | str | None = LC_TIME,
648: (0) ) -> str:
649: (4) r"""Return a time and/or date formatted according to the given pattern.
650: (4) The skeletons are defined in the CLDR data and provide more flexibility
651: (4) than the simple short/long/medium formats, but are a bit harder to use.
652: (4) The are defined using the date/time symbols without order or punctuation
653: (4) and map to a suitable format for the given locale.
654: (4) >>> from datetime import datetime
655: (4) >>> t = datetime(2007, 4, 1, 15, 30)
656: (4) >>> format_skeleton('MMMEd', t, locale='fr')
657: (4) u'dim. 1 avr.'
658: (4) >>> format_skeleton('MMMEd', t, locale='en')
659: (4) u'Sun, Apr 1'
660: (4) >>> format_skeleton('yMMd', t, locale='fi') # yMMd is not in the Finnish locale; yMd gets used
661: (4) u'1.4.2007'
662: (4) >>> format_skeleton('yMMd', t, fuzzy=False, locale='fi') # yMMd is not in the Finnish locale, an error is thrown
663: (4) Traceback (most recent call last):
664: (8) ...
665: (4) KeyError: yMMd
666: (4) >>> format_skeleton('GH', t, fuzzy=True, locale='fi_FI') # GH is not in the Finnish locale and there is no close match, an error is thrown
667: (4) Traceback (most recent call last):
668: (8) ...
669: (4) KeyError: None
670: (4) After the skeleton is resolved to a pattern `format_datetime` is called so
671: (4) all timezone processing etc is the same as for that.
672: (4) :param skeleton: A date time skeleton as defined in the cldr data.
673: (4) :param datetime: the ``time`` or ``datetime`` object; if `None`, the current
674: (17) time in UTC is used
675: (4) :param tzinfo: the time-zone to apply to the time for display
676: (4) :param fuzzy: If the skeleton is not found, allow choosing a skeleton that's
677: (18) close enough to it. If there is no close match, a `KeyError`
678: (18) is thrown.
679: (4) :param locale: a `Locale` object or a locale identifier
680: (4) """
681: (4) locale = Locale.parse(locale)
682: (4) if fuzzy and skeleton not in locale.datetime_skeletons:
683: (8) skeleton = match_skeleton(skeleton, locale.datetime_skeletons)
684: (4) format = locale.datetime_skeletons[skeleton]
685: (4) return format_datetime(datetime, format, tzinfo, locale)
686: (0) TIMEDELTA_UNITS: tuple[tuple[str, int], ...] = (
687: (4) ('year', 3600 * 24 * 365),
688: (4) ('month', 3600 * 24 * 30),
689: (4) ('week', 3600 * 24 * 7),
690: (4) ('day', 3600 * 24),
691: (4) ('hour', 3600),
692: (4) ('minute', 60),
693: (4) ('second', 1),
694: (0) )
695: (0) def format_timedelta(
696: (4) delta: datetime.timedelta | int,
697: (4) granularity: Literal['year', 'month', 'week', 'day', 'hour', 'minute', 'second'] = 'second',
698: (4) threshold: float = .85,
699: (4) add_direction: bool = False,
700: (4) format: Literal['narrow', 'short', 'medium', 'long'] = 'long',
701: (4) locale: Locale | str | None = LC_TIME,
702: (0) ) -> str:
703: (4) """Return a time delta according to the rules of the given locale.
704: (4) >>> from datetime import timedelta
705: (4) >>> format_timedelta(timedelta(weeks=12), locale='en_US')
706: (4) u'3 months'
707: (4) >>> format_timedelta(timedelta(seconds=1), locale='es')
708: (4) u'1 segundo'
709: (4) The granularity parameter can be provided to alter the lowest unit
710: (4) presented, which defaults to a second.
711: (4) >>> format_timedelta(timedelta(hours=3), granularity='day', locale='en_US')
712: (4) u'1 day'
713: (4) The threshold parameter can be used to determine at which value the
714: (4) presentation switches to the next higher unit. A higher threshold factor
715: (4) means the presentation will switch later. For example:
716: (4) >>> format_timedelta(timedelta(hours=23), threshold=0.9, locale='en_US')
717: (4) u'1 day'
718: (4) >>> format_timedelta(timedelta(hours=23), threshold=1.1, locale='en_US')
719: (4) u'23 hours'
720: (4) In addition directional information can be provided that informs
721: (4) the user if the date is in the past or in the future:
722: (4) >>> format_timedelta(timedelta(hours=1), add_direction=True, locale='en')
723: (4) u'in 1 hour'
724: (4) >>> format_timedelta(timedelta(hours=-1), add_direction=True, locale='en')
725: (4) u'1 hour ago'
726: (4) The format parameter controls how compact or wide the presentation is:
727: (4) >>> format_timedelta(timedelta(hours=3), format='short', locale='en')
728: (4) u'3 hr'
729: (4) >>> format_timedelta(timedelta(hours=3), format='narrow', locale='en')
730: (4) u'3h'
731: (4) :param delta: a ``timedelta`` object representing the time difference to
732: (18) format, or the delta in seconds as an `int` value
733: (4) :param granularity: determines the smallest unit that should be displayed,
734: (24) the value can be one of "year", "month", "week", "day",
735: (24) "hour", "minute" or "second"
736: (4) :param threshold: factor that determines at which point the presentation
737: (22) switches to the next higher unit
738: (4) :param add_direction: if this flag is set to `True` the return value will
739: (26) include directional information. For instance a
740: (26) positive timedelta will include the information about
741: (26) it being in the future, a negative will be information
742: (26) about the value being in the past.
743: (4) :param format: the format, can be "narrow", "short" or "long". (
744: (19) "medium" is deprecated, currently converted to "long" to
745: (19) maintain compatibility)
746: (4) :param locale: a `Locale` object or a locale identifier
747: (4) """
748: (4) if format not in ('narrow', 'short', 'medium', 'long'):
749: (8) raise TypeError('Format must be one of "narrow", "short" or "long"')
750: (4) if format == 'medium':
751: (8) warnings.warn(
752: (12) '"medium" value for format param of format_timedelta'
753: (12) ' is deprecated. Use "long" instead',
754: (12) category=DeprecationWarning,
755: (12) stacklevel=2,
756: (8) )
757: (8) format = 'long'
758: (4) if isinstance(delta, datetime.timedelta):
759: (8) seconds = int((delta.days * 86400) + delta.seconds)
760: (4) else:
761: (8) seconds = delta
762: (4) locale = Locale.parse(locale)
763: (4) def _iter_patterns(a_unit):
764: (8) if add_direction:
765: (12) unit_rel_patterns = locale._data['date_fields'][a_unit]
766: (12) if seconds >= 0:
767: (16) yield unit_rel_patterns['future']
768: (12) else:
769: (16) yield unit_rel_patterns['past']
770: (8) a_unit = f"duration-{a_unit}"
771: (8) unit_pats = locale._data['unit_patterns'].get(a_unit, {})
772: (8) yield unit_pats.get(format)
773: (8) # We do not support `<alias>` tags at all while ingesting CLDR data,
774: (8) # so these aliases specified in `root.xml` are hard-coded here:
775: (8) # <unitLength type="long"><alias source="locale" path="../unitLength[@type='short']"/></unitLength>
776: (8) # <unitLength type="narrow"><alias source="locale" path="../unitLength[@type='short']"/></unitLength>
777: (8) if format in ("long", "narrow"):
778: (12) yield unit_pats.get("short")
779: (4) for unit, secs_per_unit in TIMEDELTA_UNITS:
780: (8) value = abs(seconds) / secs_per_unit
781: (8) if value >= threshold or unit == granularity:
782: (12) if unit == granularity and value > 0:
783: (16) value = max(1, value)
784: (12) value = int(round(value))
785: (12) plural_form = locale.plural_form(value)
786: (12) pattern = None
787: (12) for patterns in _iter_patterns(unit):
788: (16) if patterns is not None:
789: (20) pattern = patterns.get(plural_form) or patterns.get('other')
790: (20) if pattern:
791: (24) break
792: (12) # This really should not happen
793: (12) if pattern is None:
794: (16) return ''
795: (12) return pattern.replace('{0}', str(value))
796: (4) return ''
797: (0) def _format_fallback_interval(
798: (4) start: _Instant,
799: (4) end: _Instant,
800: (4) skeleton: str | None,
801: (4) tzinfo: datetime.tzinfo | None,
802: (4) locale: Locale | str | None = LC_TIME,
803: (0) ) -> str:
804: (4) if skeleton in locale.datetime_skeletons: # Use the given skeleton
805: (8) format = lambda dt: format_skeleton(skeleton, dt, tzinfo, locale=locale)
806: (4) elif all((isinstance(d, datetime.date) and not isinstance(d, datetime.datetime)) for d in (start, end)): # Both are just dates
807: (8) format = lambda dt: format_date(dt, locale=locale)
808: (4) elif all((isinstance(d, datetime.time) and not isinstance(d, datetime.date)) for d in (start, end)): # Both are times
809: (8) format = lambda dt: format_time(dt, tzinfo=tzinfo, locale=locale)
810: (4) else:
811: (8) format = lambda dt: format_datetime(dt, tzinfo=tzinfo, locale=locale)
812: (4) formatted_start = format(start)
813: (4) formatted_end = format(end)
814: (4) if formatted_start == formatted_end:
815: (8) return format(start)
816: (4) return (
817: (8) locale.interval_formats.get(None, "{0}-{1}").
818: (8) replace("{0}", formatted_start).
819: (8) replace("{1}", formatted_end)
820: (4) )
821: (0) def format_interval(
822: (4) start: _Instant,
823: (4) end: _Instant,
824: (4) skeleton: str | None = None,
825: (4) tzinfo: datetime.tzinfo | None = None,
826: (4) fuzzy: bool = True,
827: (4) locale: Locale | str | None = LC_TIME,
828: (0) ) -> str:
829: (4) """
830: (4) Format an interval between two instants according to the locale's rules.
831: (4) >>> from datetime import date, time
832: (4) >>> format_interval(date(2016, 1, 15), date(2016, 1, 17), "yMd", locale="fi")
833: (4) u'15.\u201317.1.2016'
834: (4) >>> format_interval(time(12, 12), time(16, 16), "Hm", locale="en_GB")
835: (4) '12:12\u201316:16'
836: (4) >>> format_interval(time(5, 12), time(16, 16), "hm", locale="en_US")
837: (4) '5:12\u202fAM\u2009–\u20094:16\u202fPM'
838: (4) >>> format_interval(time(16, 18), time(16, 24), "Hm", locale="it")
839: (4) '16:18\u201316:24'
840: (4) If the start instant equals the end instant, the interval is formatted like the instant.
841: (4) >>> format_interval(time(16, 18), time(16, 18), "Hm", locale="it")
842: (4) '16:18'
843: (4) Unknown skeletons fall back to "default" formatting.
844: (4) >>> format_interval(date(2015, 1, 1), date(2017, 1, 1), "wzq", locale="ja")
845: (4) '2015/01/01\uff5e2017/01/01'
846: (4) >>> format_interval(time(16, 18), time(16, 24), "xxx", locale="ja")
847: (4) '16:18:00\uff5e16:24:00'
848: (4) >>> format_interval(date(2016, 1, 15), date(2016, 1, 17), "xxx", locale="de")
849: (4) '15.01.2016\u2009–\u200917.01.2016'
850: (4) :param start: First instant (datetime/date/time)
851: (4) :param end: Second instant (datetime/date/time)
852: (4) :param skeleton: The "skeleton format" to use for formatting.
853: (4) :param tzinfo: tzinfo to use (if none is already attached)
854: (4) :param fuzzy: If the skeleton is not found, allow choosing a skeleton that's
855: (18) close enough to it.
856: (4) :param locale: A locale object or identifier.
857: (4) :return: Formatted interval
858: (4) """
859: (4) locale = Locale.parse(locale)
860: (4) # NB: The quote comments below are from the algorithm description in
861: (4) # https://www.unicode.org/reports/tr35/tr35-dates.html#intervalFormats
862: (4) # > Look for the intervalFormatItem element that matches the "skeleton",
863: (4) # > starting in the current locale and then following the locale fallback
864: (4) # > chain up to, but not including root.
865: (4) interval_formats = locale.interval_formats
866: (4) if skeleton not in interval_formats or not skeleton:
867: (8) # > If no match was found from the previous step, check what the closest
868: (8) # > match is in the fallback locale chain, as in availableFormats. That
869: (8) # > is, this allows for adjusting the string value field's width,
870: (8) # > including adjusting between "MMM" and "MMMM", and using different
871: (8) # > variants of the same field, such as 'v' and 'z'.
872: (8) if skeleton and fuzzy:
873: (12) skeleton = match_skeleton(skeleton, interval_formats)
874: (8) else:
875: (12) skeleton = None
876: (8) if not skeleton: # Still no match whatsoever?
877: (12) # > Otherwise, format the start and end datetime using the fallback pattern.
878: (12) return _format_fallback_interval(start, end, skeleton, tzinfo, locale)
879: (4) skel_formats = interval_formats[skeleton]
880: (4) if start == end:
881: (8) return format_skeleton(skeleton, start, tzinfo, fuzzy=fuzzy, locale=locale)
882: (4) start = _ensure_datetime_tzinfo(_get_datetime(start), tzinfo=tzinfo)
883: (4) end = _ensure_datetime_tzinfo(_get_datetime(end), tzinfo=tzinfo)
884: (4) start_fmt = DateTimeFormat(start, locale=locale)
885: (4) end_fmt = DateTimeFormat(end, locale=locale)
886: (4) # > If a match is found from previous steps, compute the calendar field
887: (4) # > with the greatest difference between start and end datetime. If there
888: (4) # > is no difference among any of the fields in the pattern, format as a
889: (4) # > single date using availableFormats, and return.
890: (4) for field in PATTERN_CHAR_ORDER: # These are in largest-to-smallest order
891: (8) if field in skel_formats and start_fmt.extract(field) != end_fmt.extract(field):
892: (12) # > If there is a match, use the pieces of the corresponding pattern to
893: (12) # > format the start and end datetime, as above.
894: (12) return "".join(
895: (16) parse_pattern(pattern).apply(instant, locale)
896: (16) for pattern, instant
897: (16) in zip(skel_formats[field], (start, end))
898: (12) )
899: (4) # > Otherwise, format the start and end datetime using the fallback pattern.
900: (4) return _format_fallback_interval(start, end, skeleton, tzinfo, locale)
901: (0) def get_period_id(
902: (4) time: _Instant,
903: (4) tzinfo: datetime.tzinfo | None = None,
904: (4) type: Literal['selection'] | None = None,
905: (4) locale: Locale | str | None = LC_TIME,
906: (0) ) -> str:
907: (4) """
908: (4) Get the day period ID for a given time.
909: (4) This ID can be used as a key for the period name dictionary.
910: (4) >>> from datetime import time
911: (4) >>> get_period_names(locale="de")[get_period_id(time(7, 42), locale="de")]
912: (4) u'Morgen'
913: (4) >>> get_period_id(time(0), locale="en_US")
914: (4) u'midnight'
915: (4) >>> get_period_id(time(0), type="selection", locale="en_US")
916: (4) u'night1'
917: (4) :param time: The time to inspect.
918: (4) :param tzinfo: The timezone for the time. See ``format_time``.
919: (4) :param type: The period type to use. Either "selection" or None.
920: (17) The selection type is used for selecting among phrases such as
921: (17) “Your email arrived yesterday evening” or “Your email arrived last night”.
922: (4) :param locale: the `Locale` object, or a locale string
923: (4) :return: period ID. Something is always returned -- even if it's just "am" or "pm".
924: (4) """
925: (4) time = _get_time(time, tzinfo)
926: (4) seconds_past_midnight = int(time.hour * 60 * 60 + time.minute * 60 + time.second)
927: (4) locale = Locale.parse(locale)
928: (4) # The LDML rules state that the rules may not overlap, so iterating in arbitrary
929: (4) # order should be alright, though `at` periods should be preferred.
930: (4) rulesets = locale.day_period_rules.get(type, {}).items()
931: (4) for rule_id, rules in rulesets:
932: (8) for rule in rules:
933: (12) if "at" in rule and rule["at"] == seconds_past_midnight:
934: (16) return rule_id
935: (4) for rule_id, rules in rulesets:
936: (8) for rule in rules:
937: (12) if "from" in rule and "before" in rule:
938: (16) if rule["from"] < rule["before"]:
939: (20) if rule["from"] <= seconds_past_midnight < rule["before"]:
940: (24) return rule_id
941: (16) else:
942: (20) # e.g. from="21:00" before="06:00"
943: (20) if rule["from"] <= seconds_past_midnight < 86400 or \
944: (28) 0 <= seconds_past_midnight < rule["before"]:
945: (24) return rule_id
946: (12) start_ok = end_ok = False
947: (12) if "from" in rule and seconds_past_midnight >= rule["from"]:
948: (16) start_ok = True
949: (12) if "to" in rule and seconds_past_midnight <= rule["to"]:
950: (16) # This rule type does not exist in the present CLDR data;
951: (16) # excuse the lack of test coverage.
952: (16) end_ok = True
953: (12) if "before" in rule and seconds_past_midnight < rule["before"]:
954: (16) end_ok = True
955: (12) if "after" in rule:
956: (16) raise NotImplementedError("'after' is deprecated as of CLDR 29.")
957: (12) if start_ok and end_ok:
958: (16) return rule_id
959: (4) if seconds_past_midnight < 43200:
960: (8) return "am"
961: (4) else:
962: (8) return "pm"
963: (0) class ParseError(ValueError):
964: (4) pass
965: (0) def parse_date(
966: (4) string: str,
967: (4) locale: Locale | str | None = LC_TIME,
968: (4) format: _PredefinedTimeFormat = 'medium',
969: (0) ) -> datetime.date:
970: (4) """Parse a date from a string.
971: (4) This function first tries to interpret the string as ISO-8601
972: (4) date format, then uses the date format for the locale as a hint to
973: (4) determine the order in which the date fields appear in the string.
974: (4) >>> parse_date('4/1/04', locale='en_US')
975: (4) datetime.date(2004, 4, 1)
976: (4) >>> parse_date('01.04.2004', locale='de_DE')
977: (4) datetime.date(2004, 4, 1)
978: (4) >>> parse_date('2004-04-01', locale='en_US')
979: (4) datetime.date(2004, 4, 1)
980: (4) >>> parse_date('2004-04-01', locale='de_DE')
981: (4) datetime.date(2004, 4, 1)
982: (4) :param string: the string containing the date
983: (4) :param locale: a `Locale` object or a locale identifier
984: (4) :param format: the format to use (see ``get_date_format``)
985: (4) """
986: (4) numbers = re.findall(r'(\d+)', string)
987: (4) if not numbers:
988: (8) raise ParseError("No numbers were found in input")
989: (4) # we try ISO-8601 format first, meaning similar to formats
990: (4) # extended YYYY-MM-DD or basic YYYYMMDD
991: (4) iso_alike = re.match(r'^(\d{4})-?([01]\d)-?([0-3]\d)$',
992: (25) string, flags=re.ASCII) # allow only ASCII digits
993: (4) if iso_alike:
994: (8) try:
995: (12) return datetime.date(*map(int, iso_alike.groups()))
996: (8) except ValueError:
997: (12) pass # a locale format might fit better, so let's continue
998: (4) format_str = get_date_format(format=format, locale=locale).pattern.lower()
999: (4) year_idx = format_str.index('y')
1000: (4) month_idx = format_str.index('m')
1001: (4) if month_idx < 0:
1002: (8) month_idx = format_str.index('l')
1003: (4) day_idx = format_str.index('d')
1004: (4) indexes = sorted([(year_idx, 'Y'), (month_idx, 'M'), (day_idx, 'D')])
1005: (4) indexes = {item[1]: idx for idx, item in enumerate(indexes)}
1006: (4) # FIXME: this currently only supports numbers, but should also support month
1007: (4) # names, both in the requested locale, and english
1008: (4) year = numbers[indexes['Y']]
1009: (4) year = 2000 + int(year) if len(year) == 2 else int(year)
1010: (4) month = int(numbers[indexes['M']])
1011: (4) day = int(numbers[indexes['D']])
1012: (4) if month > 12:
1013: (8) month, day = day, month
1014: (4) return datetime.date(year, month, day)
1015: (0) def parse_time(
1016: (4) string: str,
1017: (4) locale: Locale | str | None = LC_TIME,
1018: (4) format: _PredefinedTimeFormat = 'medium',
1019: (0) ) -> datetime.time:
1020: (4) """Parse a time from a string.
1021: (4) This function uses the time format for the locale as a hint to determine
1022: (4) the order in which the time fields appear in the string.
1023: (4) >>> parse_time('15:30:00', locale='en_US')
1024: (4) datetime.time(15, 30)
1025: (4) :param string: the string containing the time
1026: (4) :param locale: a `Locale` object or a locale identifier
1027: (4) :param format: the format to use (see ``get_time_format``)
1028: (4) :return: the parsed time
1029: (4) :rtype: `time`
1030: (4) """
1031: (4) numbers = re.findall(r'(\d+)', string)
1032: (4) if not numbers:
1033: (8) raise ParseError("No numbers were found in input")
1034: (4) # TODO: try ISO format first?
1035: (4) format_str = get_time_format(format=format, locale=locale).pattern.lower()
1036: (4) hour_idx = format_str.index('h')
1037: (4) if hour_idx < 0:
1038: (8) hour_idx = format_str.index('k')
1039: (4) min_idx = format_str.index('m')
1040: (4) sec_idx = format_str.index('s')
1041: (4) indexes = sorted([(hour_idx, 'H'), (min_idx, 'M'), (sec_idx, 'S')])
1042: (4) indexes = {item[1]: idx for idx, item in enumerate(indexes)}
1043: (4) # TODO: support time zones
1044: (4) # Check if the format specifies a period to be used;
1045: (4) # if it does, look for 'pm' to figure out an offset.
1046: (4) hour_offset = 0
1047: (4) if 'a' in format_str and 'pm' in string.lower():
1048: (8) hour_offset = 12
1049: (4) # Parse up to three numbers from the string.
1050: (4) minute = second = 0
1051: (4) hour = int(numbers[indexes['H']]) + hour_offset
1052: (4) if len(numbers) > 1:
1053: (8) minute = int(numbers[indexes['M']])
1054: (8) if len(numbers) > 2:
1055: (12) second = int(numbers[indexes['S']])
1056: (4) return datetime.time(hour, minute, second)
1057: (0) class DateTimePattern:
1058: (4) def __init__(self, pattern: str, format: DateTimeFormat):
1059: (8) self.pattern = pattern
1060: (8) self.format = format
1061: (4) def __repr__(self) -> str:
1062: (8) return f"<{type(self).__name__} {self.pattern!r}>"
1063: (4) def __str__(self) -> str:
1064: (8) pat = self.pattern
1065: (8) return pat
1066: (4) def __mod__(self, other: DateTimeFormat) -> str:
1067: (8) if not isinstance(other, DateTimeFormat):
1068: (12) return NotImplemented
1069: (8) return self.format % other
1070: (4) def apply(
1071: (8) self,
1072: (8) datetime: datetime.date | datetime.time,
1073: (8) locale: Locale | str | None,
1074: (8) reference_date: datetime.date | None = None,
1075: (4) ) -> str:
1076: (8) return self % DateTimeFormat(datetime, locale, reference_date)
1077: (0) class DateTimeFormat:
1078: (4) def __init__(
1079: (8) self,
1080: (8) value: datetime.date | datetime.time,
1081: (8) locale: Locale | str,
1082: (8) reference_date: datetime.date | None = None,
1083: (4) ) -> None:
1084: (8) assert isinstance(value, (datetime.date, datetime.datetime, datetime.time))
1085: (8) if isinstance(value, (datetime.datetime, datetime.time)) and value.tzinfo is None:
1086: (12) value = value.replace(tzinfo=UTC)
1087: (8) self.value = value
1088: (8) self.locale = Locale.parse(locale)
1089: (8) self.reference_date = reference_date
1090: (4) def __getitem__(self, name: str) -> str:
1091: (8) char = name[0]
1092: (8) num = len(name)
1093: (8) if char == 'G':
1094: (12) return self.format_era(char, num)
1095: (8) elif char in ('y', 'Y', 'u'):
1096: (12) return self.format_year(char, num)
1097: (8) elif char in ('Q', 'q'):
1098: (12) return self.format_quarter(char, num)
1099: (8) elif char in ('M', 'L'):
1100: (12) return self.format_month(char, num)
1101: (8) elif char in ('w', 'W'):
1102: (12) return self.format_week(char, num)
1103: (8) elif char == 'd':
1104: (12) return self.format(self.value.day, num)
1105: (8) elif char == 'D':
1106: (12) return self.format_day_of_year(num)
1107: (8) elif char == 'F':
1108: (12) return self.format_day_of_week_in_month()
1109: (8) elif char in ('E', 'e', 'c'):
1110: (12) return self.format_weekday(char, num)
1111: (8) elif char in ('a', 'b', 'B'):
1112: (12) return self.format_period(char, num)
1113: (8) elif char == 'h':
1114: (12) if self.value.hour % 12 == 0:
1115: (16) return self.format(12, num)
1116: (12) else:
1117: (16) return self.format(self.value.hour % 12, num)
1118: (8) elif char == 'H':
1119: (12) return self.format(self.value.hour, num)
1120: (8) elif char == 'K':
1121: (12) return self.format(self.value.hour % 12, num)
1122: (8) elif char == 'k':
1123: (12) if self.value.hour == 0:
1124: (16) return self.format(24, num)
1125: (12) else:
1126: (16) return self.format(self.value.hour, num)
1127: (8) elif char == 'm':
1128: (12) return self.format(self.value.minute, num)
1129: (8) elif char == 's':
1130: (12) return self.format(self.value.second, num)
1131: (8) elif char == 'S':
1132: (12) return self.format_frac_seconds(num)
1133: (8) elif char == 'A':
1134: (12) return self.format_milliseconds_in_day(num)
1135: (8) elif char in ('z', 'Z', 'v', 'V', 'x', 'X', 'O'):
1136: (12) return self.format_timezone(char, num)
1137: (8) else:
1138: (12) raise KeyError(f"Unsupported date/time field {char!r}")
1139: (4) def extract(self, char: str) -> int:
1140: (8) char = str(char)[0]
1141: (8) if char == 'y':
1142: (12) return self.value.year
1143: (8) elif char == 'M':
1144: (12) return self.value.month
1145: (8) elif char == 'd':
1146: (12) return self.value.day
1147: (8) elif char == 'H':
1148: (12) return self.value.hour
1149: (8) elif char == 'h':
1150: (12) return self.value.hour % 12 or 12
1151: (8) elif char == 'm':
1152: (12) return self.value.minute
1153: (8) elif char == 'a':
1154: (12) return int(self.value.hour >= 12) # 0 for am, 1 for pm
1155: (8) else:
1156: (12) raise NotImplementedError(f"Not implemented: extracting {char!r} from {self.value!r}")
1157: (4) def format_era(self, char: str, num: int) -> str:
1158: (8) width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[max(3, num)]
1159: (8) era = int(self.value.year >= 0)
1160: (8) return get_era_names(width, self.locale)[era]
1161: (4) def format_year(self, char: str, num: int) -> str:
1162: (8) value = self.value.year
1163: (8) if char.isupper():
1164: (12) value = self.value.isocalendar()[0]
1165: (8) year = self.format(value, num)
1166: (8) if num == 2:
1167: (12) year = year[-2:]
1168: (8) return year
1169: (4) def format_quarter(self, char: str, num: int) -> str:
1170: (8) quarter = (self.value.month - 1) // 3 + 1
1171: (8) if num <= 2:
1172: (12) return '%0*d' % (num, quarter)
1173: (8) width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[num]
1174: (8) context = {'Q': 'format', 'q': 'stand-alone'}[char]
1175: (8) return get_quarter_names(width, context, self.locale)[quarter]
1176: (4) def format_month(self, char: str, num: int) -> str:
1177: (8) if num <= 2:
1178: (12) return '%0*d' % (num, self.value.month)
1179: (8) width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[num]
1180: (8) context = {'M': 'format', 'L': 'stand-alone'}[char]
1181: (8) return get_month_names(width, context, self.locale)[self.value.month]
1182: (4) def format_week(self, char: str, num: int) -> str:
1183: (8) if char.islower(): # week of year
1184: (12) day_of_year = self.get_day_of_year()
1185: (12) week = self.get_week_number(day_of_year)
1186: (12) if week == 0:
1187: (16) date = self.value - datetime.timedelta(days=day_of_year)
1188: (16) week = self.get_week_number(self.get_day_of_year(date),
1189: (44) date.weekday())
1190: (12) return self.format(week, num)
1191: (8) else: # week of month
1192: (12) week = self.get_week_number(self.value.day)
1193: (12) if week == 0:
1194: (16) date = self.value - datetime.timedelta(days=self.value.day)
1195: (16) week = self.get_week_number(date.day, date.weekday())
1196: (12) return str(week)
1197: (4) def format_weekday(self, char: str = 'E', num: int = 4) -> str:
1198: (8) """
1199: (8) Return weekday from parsed datetime according to format pattern.
1200: (8) >>> from datetime import date
1201: (8) >>> format = DateTimeFormat(date(2016, 2, 28), Locale.parse('en_US'))
1202: (8) >>> format.format_weekday()
1203: (8) u'Sunday'
1204: (8) 'E': Day of week - Use one through three letters for the abbreviated day name, four for the full (wide) name,
1205: (13) five for the narrow name, or six for the short name.
1206: (8) >>> format.format_weekday('E',2)
1207: (8) u'Sun'
1208: (8) 'e': Local day of week. Same as E except adds a numeric value that will depend on the local starting day of the
1209: (13) week, using one or two letters. For this example, Monday is the first day of the week.
1210: (8) >>> format.format_weekday('e',2)
1211: (8) '01'
1212: (8) 'c': Stand-Alone local day of week - Use one letter for the local numeric value (same as 'e'), three for the
1213: (13) abbreviated day name, four for the full (wide) name, five for the narrow name, or six for the short name.
1214: (8) >>> format.format_weekday('c',1)
1215: (8) '1'
1216: (8) :param char: pattern format character ('e','E','c')
1217: (8) :param num: count of format character
1218: (8) """
1219: (8) if num < 3:
1220: (12) if char.islower():
1221: (16) value = 7 - self.locale.first_week_day + self.value.weekday()
1222: (16) return self.format(value % 7 + 1, num)
1223: (12) num = 3
1224: (8) weekday = self.value.weekday()
1225: (8) width = {3: 'abbreviated', 4: 'wide', 5: 'narrow', 6: 'short'}[num]
1226: (8) context = "stand-alone" if char == "c" else "format"
1227: (8) return get_day_names(width, context, self.locale)[weekday]
1228: (4) def format_day_of_year(self, num: int) -> str:
1229: (8) return self.format(self.get_day_of_year(), num)
1230: (4) def format_day_of_week_in_month(self) -> str:
1231: (8) return str((self.value.day - 1) // 7 + 1)
1232: (4) def format_period(self, char: str, num: int) -> str:
1233: (8) """
1234: (8) Return period from parsed datetime according to format pattern.
1235: (8) >>> from datetime import datetime, time
1236: (8) >>> format = DateTimeFormat(time(13, 42), 'fi_FI')
1237: (8) >>> format.format_period('a', 1)
1238: (8) u'ip.'
1239: (8) >>> format.format_period('b', 1)
1240: (8) u'iltap.'
1241: (8) >>> format.format_period('b', 4)
1242: (8) u'iltapäivä'
1243: (8) >>> format.format_period('B', 4)
1244: (8) u'iltapäivällä'
1245: (8) >>> format.format_period('B', 5)
1246: (8) u'ip.'
1247: (8) >>> format = DateTimeFormat(datetime(2022, 4, 28, 6, 27), 'zh_Hant')
1248: (8) >>> format.format_period('a', 1)
1249: (8) u'上午'
1250: (8) >>> format.format_period('B', 1)
1251: (8) u'清晨'
1252: (8) :param char: pattern format character ('a', 'b', 'B')
1253: (8) :param num: count of format character
1254: (8) """
1255: (8) widths = [{3: 'abbreviated', 4: 'wide', 5: 'narrow'}[max(3, num)],
1256: (18) 'wide', 'narrow', 'abbreviated']
1257: (8) if char == 'a':
1258: (12) period = 'pm' if self.value.hour >= 12 else 'am'
1259: (12) context = 'format'
1260: (8) else:
1261: (12) period = get_period_id(self.value, locale=self.locale)
1262: (12) context = 'format' if char == 'B' else 'stand-alone'
1263: (8) for width in widths:
1264: (12) period_names = get_period_names(context=context, width=width, locale=self.locale)
1265: (12) if period in period_names:
1266: (16) return period_names[period]
1267: (8) raise ValueError(f"Could not format period {period} in {self.locale}")
1268: (4) def format_frac_seconds(self, num: int) -> str:
1269: (8) """ Return fractional seconds.
1270: (8) Rounds the time's microseconds to the precision given by the number \
1271: (8) of digits passed in.
1272: (8) """
1273: (8) value = self.value.microsecond / 1000000
1274: (8) return self.format(round(value, num) * 10**num, num)
1275: (4) def format_milliseconds_in_day(self, num):
1276: (8) msecs = self.value.microsecond // 1000 + self.value.second * 1000 + \
1277: (12) self.value.minute * 60000 + self.value.hour * 3600000
1278: (8) return self.format(msecs, num)
1279: (4) def format_timezone(self, char: str, num: int) -> str:
1280: (8) width = {3: 'short', 4: 'long', 5: 'iso8601'}[max(3, num)]
1281: (8) # It could be that we only receive a time to format, but also have a
1282: (8) # reference date which is important to distinguish between timezone
1283: (8) # variants (summer/standard time)
1284: (8) value = self.value
1285: (8) if self.reference_date:
1286: (12) value = datetime.datetime.combine(self.reference_date, self.value)
1287: (8) if char == 'z':
1288: (12) return get_timezone_name(value, width, locale=self.locale)
1289: (8) elif char == 'Z':
1290: (12) if num == 5:
1291: (16) return get_timezone_gmt(value, width, locale=self.locale, return_z=True)
1292: (12) return get_timezone_gmt(value, width, locale=self.locale)
1293: (8) elif char == 'O':
1294: (12) if num == 4:
1295: (16) return get_timezone_gmt(value, width, locale=self.locale)
1296: (8) # TODO: To add support for O:1
1297: (8) elif char == 'v':
1298: (12) return get_timezone_name(value.tzinfo, width,
1299: (37) locale=self.locale)
1300: (8) elif char == 'V':
1301: (12) if num == 1:
1302: (16) return get_timezone_name(value.tzinfo, width,
1303: (41) uncommon=True, locale=self.locale)
1304: (12) elif num == 2:
1305: (16) return get_timezone_name(value.tzinfo, locale=self.locale, return_zone=True)
1306: (12) elif num == 3:
1307: (16) return get_timezone_location(value.tzinfo, locale=self.locale, return_city=True)
1308: (12) return get_timezone_location(value.tzinfo, locale=self.locale)
1309: (8) # Included additional elif condition to add support for 'Xx' in timezone format
1310: (8) elif char == 'X':
1311: (12) if num == 1:
1312: (16) return get_timezone_gmt(value, width='iso8601_short', locale=self.locale,
1313: (40) return_z=True)
1314: (12) elif num in (2, 4):
1315: (16) return get_timezone_gmt(value, width='short', locale=self.locale,
1316: (40) return_z=True)
1317: (12) elif num in (3, 5):
1318: (16) return get_timezone_gmt(value, width='iso8601', locale=self.locale,
1319: (40) return_z=True)
1320: (8) elif char == 'x':
1321: (12) if num == 1:
1322: (16) return get_timezone_gmt(value, width='iso8601_short', locale=self.locale)
1323: (12) elif num in (2, 4):
1324: (16) return get_timezone_gmt(value, width='short', locale=self.locale)
1325: (12) elif num in (3, 5):
1326: (16) return get_timezone_gmt(value, width='iso8601', locale=self.locale)
1327: (4) def format(self, value: SupportsInt, length: int) -> str:
1328: (8) return '%0*d' % (length, value)
1329: (4) def get_day_of_year(self, date: datetime.date | None = None) -> int:
1330: (8) if date is None:
1331: (12) date = self.value
1332: (8) return (date - date.replace(month=1, day=1)).days + 1
1333: (4) def get_week_number(self, day_of_period: int, day_of_week: int | None = None) -> int:
1334: (8) """Return the number of the week of a day within a period. This may be
1335: (8) the week number in a year or the week number in a month.
1336: (8) Usually this will return a value equal to or greater than 1, but if the
1337: (8) first week of the period is so short that it actually counts as the last
1338: (8) week of the previous period, this function will return 0.
1339: (8) >>> date = datetime.date(2006, 1, 8)
1340: (8) >>> DateTimeFormat(date, 'de_DE').get_week_number(6)
1341: (8) 1
1342: (8) >>> DateTimeFormat(date, 'en_US').get_week_number(6)
1343: (8) 2
1344: (8) :param day_of_period: the number of the day in the period (usually
1345: (30) either the day of month or the day of year)
1346: (8) :param day_of_week: the week day; if omitted, the week day of the
1347: (28) current date is assumed
1348: (8) """
1349: (8) if day_of_week is None:
1350: (12) day_of_week = self.value.weekday()
1351: (8) first_day = (day_of_week - self.locale.first_week_day -
1352: (21) day_of_period + 1) % 7
1353: (8) if first_day < 0:
1354: (12) first_day += 7
1355: (8) week_number = (day_of_period + first_day - 1) // 7
1356: (8) if 7 - first_day >= self.locale.min_week_days:
1357: (12) week_number += 1
1358: (8) if self.locale.first_week_day == 0:
1359: (12) # Correct the weeknumber in case of iso-calendar usage (first_week_day=0).
1360: (12) # If the weeknumber exceeds the maximum number of weeks for the given year
1361: (12) # we must count from zero.For example the above calculation gives week 53
1362: (12) # for 2018-12-31. By iso-calender definition 2018 has a max of 52
1363: (12) # weeks, thus the weeknumber must be 53-52=1.
1364: (12) max_weeks = datetime.date(year=self.value.year, day=28, month=12).isocalendar()[1]
1365: (12) if week_number > max_weeks:
1366: (16) week_number -= max_weeks
1367: (8) return week_number
1368: (0) PATTERN_CHARS: dict[str, list[int] | None] = {
1369: (4) 'G': [1, 2, 3, 4, 5], # era
1370: (4) 'y': None, 'Y': None, 'u': None, # year
1371: (4) 'Q': [1, 2, 3, 4, 5], 'q': [1, 2, 3, 4, 5], # quarter
1372: (4) 'M': [1, 2, 3, 4, 5], 'L': [1, 2, 3, 4, 5], # month
1373: (4) 'w': [1, 2], 'W': [1], # week
1374: (4) 'd': [1, 2], 'D': [1, 2, 3], 'F': [1], 'g': None, # day
1375: (4) 'E': [1, 2, 3, 4, 5, 6], 'e': [1, 2, 3, 4, 5, 6], 'c': [1, 3, 4, 5, 6], # week day
1376: (4) 'a': [1, 2, 3, 4, 5], 'b': [1, 2, 3, 4, 5], 'B': [1, 2, 3, 4, 5], # period
1377: (4) 'h': [1, 2], 'H': [1, 2], 'K': [1, 2], 'k': [1, 2], # hour
1378: (4) 'm': [1, 2], # minute
1379: (4) 's': [1, 2], 'S': None, 'A': None, # second
1380: (4) 'z': [1, 2, 3, 4], 'Z': [1, 2, 3, 4, 5], 'O': [1, 4], 'v': [1, 4], # zone
1381: (4) 'V': [1, 2, 3, 4], 'x': [1, 2, 3, 4, 5], 'X': [1, 2, 3, 4, 5], # zone
1382: (0) }
1383: (0) #: The pattern characters declared in the Date Field Symbol Table
1384: (0) #: (https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table)
1385: (0) #: in order of decreasing magnitude.
1386: (0) PATTERN_CHAR_ORDER = "GyYuUQqMLlwWdDFgEecabBChHKkjJmsSAzZOvVXx"
1387: (0) def parse_pattern(pattern: str | DateTimePattern) -> DateTimePattern:
1388: (4) """Parse date, time, and datetime format patterns.
1389: (4) >>> parse_pattern("MMMMd").format
1390: (4) u'%(MMMM)s%(d)s'
1391: (4) >>> parse_pattern("MMM d, yyyy").format
1392: (4) u'%(MMM)s %(d)s, %(yyyy)s'
1393: (4) Pattern can contain literal strings in single quotes:
1394: (4) >>> parse_pattern("H:mm' Uhr 'z").format
1395: (4) u'%(H)s:%(mm)s Uhr %(z)s'
1396: (4) An actual single quote can be used by using two adjacent single quote
1397: (4) characters:
1398: (4) >>> parse_pattern("hh' o''clock'").format
1399: (4) u"%(hh)s o'clock"
1400: (4) :param pattern: the formatting pattern to parse
1401: (4) """
1402: (4) if isinstance(pattern, DateTimePattern):
1403: (8) return pattern
1404: (4) return _cached_parse_pattern(pattern)
1405: (0) @lru_cache(maxsize=1024)
1406: (0) def _cached_parse_pattern(pattern: str) -> DateTimePattern:
1407: (4) result = []
1408: (4) for tok_type, tok_value in tokenize_pattern(pattern):
1409: (8) if tok_type == "chars":
1410: (12) result.append(tok_value.replace('%', '%%'))
1411: (8) elif tok_type == "field":
1412: (12) fieldchar, fieldnum = tok_value
1413: (12) limit = PATTERN_CHARS[fieldchar]
1414: (12) if limit and fieldnum not in limit:
1415: (16) raise ValueError(f"Invalid length for field: {fieldchar * fieldnum!r}")
1416: (12) result.append('%%(%s)s' % (fieldchar * fieldnum))
1417: (8) else:
1418: (12) raise NotImplementedError(f"Unknown token type: {tok_type}")
1419: (4) return DateTimePattern(pattern, ''.join(result))
1420: (0) def tokenize_pattern(pattern: str) -> list[tuple[str, str | tuple[str, int]]]:
1421: (4) """
1422: (4) Tokenize date format patterns.
1423: (4) Returns a list of (token_type, token_value) tuples.
1424: (4) ``token_type`` may be either "chars" or "field".
1425: (4) For "chars" tokens, the value is the literal value.
1426: (4) For "field" tokens, the value is a tuple of (field character, repetition count).
1427: (4) :param pattern: Pattern string
1428: (4) :type pattern: str
1429: (4) :rtype: list[tuple]
1430: (4) """
1431: (4) result = []
1432: (4) quotebuf = None
1433: (4) charbuf = []
1434: (4) fieldchar = ['']
1435: (4) fieldnum = [0]
1436: (4) def append_chars():
1437: (8) result.append(('chars', ''.join(charbuf).replace('\0', "'")))
1438: (8) del charbuf[:]
1439: (4) def append_field():
1440: (8) result.append(('field', (fieldchar[0], fieldnum[0])))
1441: (8) fieldchar[0] = ''
1442: (8) fieldnum[0] = 0
1443: (4) for char in pattern.replace("''", '\0'):
1444: (8) if quotebuf is None:
1445: (12) if char == "'": # quote started
1446: (16) if fieldchar[0]:
1447: (20) append_field()
1448: (16) elif charbuf:
1449: (20) append_chars()
1450: (16) quotebuf = []
1451: (12) elif char in PATTERN_CHARS:
1452: (16) if charbuf:
1453: (20) append_chars()
1454: (16) if char == fieldchar[0]:
1455: (20) fieldnum[0] += 1
1456: (16) else:
1457: (20) if fieldchar[0]:
1458: (24) append_field()
1459: (20) fieldchar[0] = char
1460: (20) fieldnum[0] = 1
1461: (12) else:
1462: (16) if fieldchar[0]:
1463: (20) append_field()
1464: (16) charbuf.append(char)
1465: (8) elif quotebuf is not None:
1466: (12) if char == "'": # end of quote
1467: (16) charbuf.extend(quotebuf)
1468: (16) quotebuf = None
1469: (12) else: # inside quote
1470: (16) quotebuf.append(char)
1471: (4) if fieldchar[0]:
1472: (8) append_field()
1473: (4) elif charbuf:
1474: (8) append_chars()
1475: (4) return result
1476: (0) def untokenize_pattern(tokens: Iterable[tuple[str, str | tuple[str, int]]]) -> str:
1477: (4) """
1478: (4) Turn a date format pattern token stream back into a string.
1479: (4) This is the reverse operation of ``tokenize_pattern``.
1480: (4) :type tokens: Iterable[tuple]
1481: (4) :rtype: str
1482: (4) """
1483: (4) output = []
1484: (4) for tok_type, tok_value in tokens:
1485: (8) if tok_type == "field":
1486: (12) output.append(tok_value[0] * tok_value[1])
1487: (8) elif tok_type == "chars":
1488: (12) if not any(ch in PATTERN_CHARS for ch in tok_value): # No need to quote
1489: (16) output.append(tok_value)
1490: (12) else:
1491: (16) output.append("'%s'" % tok_value.replace("'", "''"))
1492: (4) return "".join(output)
1493: (0) def split_interval_pattern(pattern: str) -> list[str]:
1494: (4) """
1495: (4) Split an interval-describing datetime pattern into multiple pieces.
1496: (4) > The pattern is then designed to be broken up into two pieces by determining the first repeating field.
1497: (4) - https://www.unicode.org/reports/tr35/tr35-dates.html#intervalFormats
1498: (4) >>> split_interval_pattern(u'E d.M. \u2013 E d.M.')
1499: (4) [u'E d.M. \u2013 ', 'E d.M.']
1500: (4) >>> split_interval_pattern("Y 'text' Y 'more text'")
1501: (4) ["Y 'text '", "Y 'more text'"]
1502: (4) >>> split_interval_pattern(u"E, MMM d \u2013 E")
1503: (4) [u'E, MMM d \u2013 ', u'E']
1504: (4) >>> split_interval_pattern("MMM d")
1505: (4) ['MMM d']
1506: (4) >>> split_interval_pattern("y G")
1507: (4) ['y G']
1508: (4) >>> split_interval_pattern(u"MMM d \u2013 d")
1509: (4) [u'MMM d \u2013 ', u'd']
1510: (4) :param pattern: Interval pattern string
1511: (4) :return: list of "subpatterns"
1512: (4) """
1513: (4) seen_fields = set()
1514: (4) parts = [[]]
1515: (4) for tok_type, tok_value in tokenize_pattern(pattern):
1516: (8) if tok_type == "field":
1517: (12) if tok_value[0] in seen_fields: # Repeated field
1518: (16) parts.append([])
1519: (16) seen_fields.clear()
1520: (12) seen_fields.add(tok_value[0])
1521: (8) parts[-1].append((tok_type, tok_value))
1522: (4) return [untokenize_pattern(tokens) for tokens in parts]
1523: (0) def match_skeleton(skeleton: str, options: Iterable[str], allow_different_fields: bool = False) -> str | None:
1524: (4) """
1525: (4) Find the closest match for the given datetime skeleton among the options given.
1526: (4) This uses the rules outlined in the TR35 document.
1527: (4) >>> match_skeleton('yMMd', ('yMd', 'yMMMd'))
1528: (4) 'yMd'
1529: (4) >>> match_skeleton('yMMd', ('jyMMd',), allow_different_fields=True)
1530: (4) 'jyMMd'
1531: (4) >>> match_skeleton('yMMd', ('qyMMd',), allow_different_fields=False)
1532: (4) >>> match_skeleton('hmz', ('hmv',))
1533: (4) 'hmv'
1534: (4) :param skeleton: The skeleton to match
1535: (4) :type skeleton: str
1536: (4) :param options: An iterable of other skeletons to match against
1537: (4) :type options: Iterable[str]
1538: (4) :return: The closest skeleton match, or if no match was found, None.
1539: (4) :rtype: str|None
1540: (4) """
1541: (4) # TODO: maybe implement pattern expansion?
1542: (4) # Based on the implementation in
1543: (4) # http://source.icu-project.org/repos/icu/icu4j/trunk/main/classes/core/src/com/ibm/icu/text/DateIntervalInfo.java
1544: (4) # Filter out falsy values and sort for stability; when `interval_formats` is passed in, there may be a None key.
1545: (4) options = sorted(option for option in options if option)
1546: (4) if 'z' in skeleton and not any('z' in option for option in options):
1547: (8) skeleton = skeleton.replace('z', 'v')
1548: (4) get_input_field_width = dict(t[1] for t in tokenize_pattern(skeleton) if t[0] == "field").get
1549: (4) best_skeleton = None
1550: (4) best_distance = None
1551: (4) for option in options:
1552: (8) get_opt_field_width = dict(t[1] for t in tokenize_pattern(option) if t[0] == "field").get
1553: (8) distance = 0
1554: (8) for field in PATTERN_CHARS:
1555: (12) input_width = get_input_field_width(field, 0)
1556: (12) opt_width = get_opt_field_width(field, 0)
1557: (12) if input_width == opt_width:
1558: (16) continue
1559: (12) if opt_width == 0 or input_width == 0:
1560: (16) if not allow_different_fields: # This one is not okay
1561: (20) option = None
1562: (20) break
1563: (16) distance += 0x1000 # Magic weight constant for "entirely different fields"
1564: (12) elif field == 'M' and ((input_width > 2 and opt_width <= 2) or (input_width <= 2 and opt_width > 2)):
1565: (16) distance += 0x100 # Magic weight for "text turns into a number"
1566: (12) else:
1567: (16) distance += abs(input_width - opt_width)
1568: (8) if not option: # We lost the option along the way (probably due to "allow_different_fields")
1569: (12) continue
1570: (8) if not best_skeleton or distance < best_distance:
1571: (12) best_skeleton = option
1572: (12) best_distance = distance
1573: (8) if distance == 0: # Found a perfect match!
1574: (12) break
1575: (4) return best_skeleton
----------------------------------------
File 5 - . \lists.py:
1: (0) """
2: (4) babel.lists
3: (4) ~~~~~~~~~~~
4: (4) Locale dependent formatting of lists.
5: (4) The default locale for the functions in this module is determined by the
6: (4) following environment variables, in that order:
7: (5) * ``LC_ALL``, and
8: (5) * ``LANG``
9: (4) :copyright: (c) 2015-2024 by the Babel Team.
10: (4) :license: BSD, see LICENSE for more details.
11: (0) """
12: (0) from __future__ import annotations
13: (0) from collections.abc import Sequence
14: (0) from typing import TYPE_CHECKING
15: (0) from babel.core import Locale, default_locale
16: (0) if TYPE_CHECKING:
17: (4) from typing_extensions import Literal
18: (0) DEFAULT_LOCALE = default_locale()
19: (0) def format_list(
20: (4) lst: Sequence[str],
21: (4) style: Literal['standard', 'standard-short', 'or', 'or-short', 'unit', 'unit-short', 'unit-narrow'] = 'standard',
22: (4) locale: Locale | str | None = DEFAULT_LOCALE,
23: (0) ) -> str:
24: (4) """
25: (4) Format the items in `lst` as a list.
26: (4) >>> format_list(['apples', 'oranges', 'pears'], locale='en')
27: (4) u'apples, oranges, and pears'
28: (4) >>> format_list(['apples', 'oranges', 'pears'], locale='zh')
29: (4) u'apples\u3001oranges\u548cpears'
30: (4) >>> format_list(['omena', 'peruna', 'aplari'], style='or', locale='fi')
31: (4) u'omena, peruna tai aplari'
32: (4) Not all styles are necessarily available in all locales.
33: (4) The function will attempt to fall back to replacement styles according to the rules
34: (4) set forth in the CLDR root XML file, and raise a ValueError if no suitable replacement
35: (4) can be found.
36: (4) The following text is verbatim from the Unicode TR35-49 spec [1].
37: (4) * standard:
38: (6) A typical 'and' list for arbitrary placeholders.
39: (6) eg. "January, February, and March"
40: (4) * standard-short:
41: (6) A short version of an 'and' list, suitable for use with short or abbreviated placeholder values.
42: (6) eg. "Jan., Feb., and Mar."
43: (4) * or:
44: (6) A typical 'or' list for arbitrary placeholders.
45: (6) eg. "January, February, or March"
46: (4) * or-short:
47: (6) A short version of an 'or' list.
48: (6) eg. "Jan., Feb., or Mar."
49: (4) * unit:
50: (6) A list suitable for wide units.
51: (6) eg. "3 feet, 7 inches"
52: (4) * unit-short:
53: (6) A list suitable for short units
54: (6) eg. "3 ft, 7 in"
55: (4) * unit-narrow:
56: (6) A list suitable for narrow units, where space on the screen is very limited.
57: (6) eg. "3′ 7″"
58: (4) [1]: https://www.unicode.org/reports/tr35/tr35-49/tr35-general.html#ListPatterns
59: (4) :param lst: a sequence of items to format in to a list
60: (4) :param style: the style to format the list with. See above for description.
61: (4) :param locale: the locale
62: (4) """
63: (4) locale = Locale.parse(locale)
64: (4) if not lst:
65: (8) return ''
66: (4) if len(lst) == 1:
67: (8) return lst[0]
68: (4) patterns = _resolve_list_style(locale, style)
69: (4) if len(lst) == 2 and '2' in patterns:
70: (8) return patterns['2'].format(*lst)
71: (4) result = patterns['start'].format(lst[0], lst[1])
72: (4) for elem in lst[2:-1]:
73: (8) result = patterns['middle'].format(result, elem)
74: (4) result = patterns['end'].format(result, lst[-1])
75: (4) return result
76: (0) # Based on CLDR 45's root.xml file's `<alias>`es.
77: (0) # The root file defines both `standard` and `or`,
78: (0) # so they're always available.
79: (0) # TODO: It would likely be better to use the
80: (0) # babel.localedata.Alias mechanism for this,
81: (0) # but I'm not quite sure how it's supposed to
82: (0) # work with inheritance and data in the root.
83: (0) _style_fallbacks = {
84: (4) "or-narrow": ["or-short", "or"],
85: (4) "or-short": ["or"],
86: (4) "standard-narrow": ["standard-short", "standard"],
87: (4) "standard-short": ["standard"],
88: (4) "unit": ["unit-short", "standard"],
89: (4) "unit-narrow": ["unit-short", "unit", "standard"],
90: (4) "unit-short": ["standard"],
91: (0) }
92: (0) def _resolve_list_style(locale: Locale, style: str):
93: (4) for style in (style, *(_style_fallbacks.get(style, []))): # noqa: B020
94: (8) if style in locale.list_patterns:
95: (12) return locale.list_patterns[style]
96: (4) raise ValueError(
97: (8) f"Locale {locale} does not support list formatting style {style!r} "
98: (8) f"(supported are {sorted(locale.list_patterns)})",
99: (4) )
----------------------------------------
File 6 - . \units.py:
1: (0) from __future__ import annotations
2: (0) import decimal
3: (0) from typing import TYPE_CHECKING
4: (0) from babel.core import Locale
5: (0) from babel.numbers import LC_NUMERIC, format_decimal
6: (0) if TYPE_CHECKING:
7: (4) from typing_extensions import Literal
8: (0) class UnknownUnitError(ValueError):
9: (4) def __init__(self, unit: str, locale: Locale) -> None:
10: (8) ValueError.__init__(self, f"{unit} is not a known unit in {locale}")
11: (0) def get_unit_name(
12: (4) measurement_unit: str,
13: (4) length: Literal['short', 'long', 'narrow'] = 'long',
14: (4) locale: Locale | str | None = LC_NUMERIC,
15: (0) ) -> str | None:
16: (4) """
17: (4) Get the display name for a measurement unit in the given locale.
18: (4) >>> get_unit_name("radian", locale="en")
19: (4) 'radians'
20: (4) Unknown units will raise exceptions:
21: (4) >>> get_unit_name("battery", locale="fi")
22: (4) Traceback (most recent call last):
23: (8) ...
24: (4) UnknownUnitError: battery/long is not a known unit/length in fi
25: (4) :param measurement_unit: the code of a measurement unit.
26: (29) Known units can be found in the CLDR Unit Validity XML file:
27: (29) https://unicode.org/repos/cldr/tags/latest/common/validity/unit.xml
28: (4) :param length: "short", "long" or "narrow"
29: (4) :param locale: the `Locale` object or locale identifier
30: (4) :return: The unit display name, or None.
31: (4) """
32: (4) locale = Locale.parse(locale)
33: (4) unit = _find_unit_pattern(measurement_unit, locale=locale)
34: (4) if not unit:
35: (8) raise UnknownUnitError(unit=measurement_unit, locale=locale)
36: (4) return locale.unit_display_names.get(unit, {}).get(length)
37: (0) def _find_unit_pattern(unit_id: str, locale: Locale | str | None = LC_NUMERIC) -> str | None:
38: (4) """
39: (4) Expand a unit into a qualified form.
40: (4) Known units can be found in the CLDR Unit Validity XML file:
41: (4) https://unicode.org/repos/cldr/tags/latest/common/validity/unit.xml
42: (4) >>> _find_unit_pattern("radian", locale="en")
43: (4) 'angle-radian'
44: (4) Unknown values will return None.
45: (4) >>> _find_unit_pattern("horse", locale="en")
46: (4) :param unit_id: the code of a measurement unit.
47: (4) :return: A key to the `unit_patterns` mapping, or None.
48: (4) """
49: (4) locale = Locale.parse(locale)
50: (4) unit_patterns = locale._data["unit_patterns"]
51: (4) if unit_id in unit_patterns:
52: (8) return unit_id
53: (4) for unit_pattern in sorted(unit_patterns, key=len):
54: (8) if unit_pattern.endswith(unit_id):
55: (12) return unit_pattern
56: (4) return None
57: (0) def format_unit(
58: (4) value: str | float | decimal.Decimal,
59: (4) measurement_unit: str,
60: (4) length: Literal['short', 'long', 'narrow'] = 'long',
61: (4) format: str | None = None,
62: (4) locale: Locale | str | None = LC_NUMERIC,
63: (4) *,
64: (4) numbering_system: Literal["default"] | str = "latn",
65: (0) ) -> str:
66: (4) """Format a value of a given unit.
67: (4) Values are formatted according to the locale's usual pluralization rules
68: (4) and number formats.
69: (4) >>> format_unit(12, 'length-meter', locale='ro_RO')
70: (4) u'12 metri'
71: (4) >>> format_unit(15.5, 'length-mile', locale='fi_FI')
72: (4) u'15,5 mailia'
73: (4) >>> format_unit(1200, 'pressure-millimeter-ofhg', locale='nb')
74: (4) u'1\\xa0200 millimeter kvikks\\xf8lv'
75: (4) >>> format_unit(270, 'ton', locale='en')
76: (4) u'270 tons'
77: (4) >>> format_unit(1234.5, 'kilogram', locale='ar_EG', numbering_system='default')
78: (4) u'1٬234٫5 كيلوغرام'
79: (4) Number formats may be overridden with the ``format`` parameter.
80: (4) >>> import decimal
81: (4) >>> format_unit(decimal.Decimal("-42.774"), 'temperature-celsius', 'short', format='#.0', locale='fr')
82: (4) u'-42,8\\u202f\\xb0C'
83: (4) The locale's usual pluralization rules are respected.
84: (4) >>> format_unit(1, 'length-meter', locale='ro_RO')
85: (4) u'1 metru'
86: (4) >>> format_unit(0, 'length-mile', locale='cy')
87: (4) u'0 mi'
88: (4) >>> format_unit(1, 'length-mile', locale='cy')
89: (4) u'1 filltir'
90: (4) >>> format_unit(3, 'length-mile', locale='cy')
91: (4) u'3 milltir'
92: (4) >>> format_unit(15, 'length-horse', locale='fi')
93: (4) Traceback (most recent call last):
94: (8) ...
95: (4) UnknownUnitError: length-horse is not a known unit in fi
96: (4) .. versionadded:: 2.2.0
97: (4) :param value: the value to format. If this is a string, no number formatting will be attempted.
98: (4) :param measurement_unit: the code of a measurement unit.
99: (29) Known units can be found in the CLDR Unit Validity XML file:
100: (29) https://unicode.org/repos/cldr/tags/latest/common/validity/unit.xml
101: (4) :param length: "short", "long" or "narrow"
102: (4) :param format: An optional format, as accepted by `format_decimal`.
103: (4) :param locale: the `Locale` object or locale identifier
104: (4) :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn".
105: (29) The special value "default" will use the default numbering system of the locale.
106: (4) :raise `UnsupportedNumberingSystemError`: If the numbering system is not supported by the locale.
107: (4) """
108: (4) locale = Locale.parse(locale)
109: (4) q_unit = _find_unit_pattern(measurement_unit, locale=locale)
110: (4) if not q_unit:
111: (8) raise UnknownUnitError(unit=measurement_unit, locale=locale)
112: (4) unit_patterns = locale._data["unit_patterns"][q_unit].get(length, {})
113: (4) if isinstance(value, str): # Assume the value is a preformatted singular.
114: (8) formatted_value = value
115: (8) plural_form = "one"
116: (4) else:
117: (8) formatted_value = format_decimal(value, format, locale, numbering_system=numbering_system)
118: (8) plural_form = locale.plural_form(value)
119: (4) if plural_form in unit_patterns:
120: (8) return unit_patterns[plural_form].format(formatted_value)
121: (4) # Fall back to a somewhat bad representation.
122: (4) # nb: This is marked as no-cover, as the current CLDR seemingly has no way for this to happen.
123: (4) fallback_name = get_unit_name(measurement_unit, length=length, locale=locale) # pragma: no cover
124: (4) return f"{formatted_value} {fallback_name or measurement_unit}" # pragma: no cover
125: (0) def _find_compound_unit(
126: (4) numerator_unit: str,
127: (4) denominator_unit: str,
128: (4) locale: Locale | str | None = LC_NUMERIC,
129: (0) ) -> str | None:
130: (4) """
131: (4) Find a predefined compound unit pattern.
132: (4) Used internally by format_compound_unit.
133: (4) >>> _find_compound_unit("kilometer", "hour", locale="en")
134: (4) 'speed-kilometer-per-hour'
135: (4) >>> _find_compound_unit("mile", "gallon", locale="en")
136: (4) 'consumption-mile-per-gallon'
137: (4) If no predefined compound pattern can be found, `None` is returned.
138: (4) >>> _find_compound_unit("gallon", "mile", locale="en")
139: (4) >>> _find_compound_unit("horse", "purple", locale="en")
140: (4) :param numerator_unit: The numerator unit's identifier
141: (4) :param denominator_unit: The denominator unit's identifier
142: (4) :param locale: the `Locale` object or locale identifier
143: (4) :return: A key to the `unit_patterns` mapping, or None.
144: (4) :rtype: str|None
145: (4) """
146: (4) locale = Locale.parse(locale)
147: (4) # Qualify the numerator and denominator units. This will turn possibly partial
148: (4) # units like "kilometer" or "hour" into actual units like "length-kilometer" and
149: (4) # "duration-hour".
150: (4) resolved_numerator_unit = _find_unit_pattern(numerator_unit, locale=locale)
151: (4) resolved_denominator_unit = _find_unit_pattern(denominator_unit, locale=locale)
152: (4) # If either was not found, we can't possibly build a suitable compound unit either.
153: (4) if not (resolved_numerator_unit and resolved_denominator_unit):
154: (8) return None
155: (4) # Since compound units are named "speed-kilometer-per-hour", we'll have to slice off
156: (4) # the quantities (i.e. "length", "duration") from both qualified units.
157: (4) bare_numerator_unit = resolved_numerator_unit.split("-", 1)[-1]
158: (4) bare_denominator_unit = resolved_denominator_unit.split("-", 1)[-1]
159: (4) # Now we can try and rebuild a compound unit specifier, then qualify it:
160: (4) return _find_unit_pattern(f"{bare_numerator_unit}-per-{bare_denominator_unit}", locale=locale)
161: (0) def format_compound_unit(
162: (4) numerator_value: str | float | decimal.Decimal,
163: (4) numerator_unit: str | None = None,
164: (4) denominator_value: str | float | decimal.Decimal = 1,
165: (4) denominator_unit: str | None = None,
166: (4) length: Literal["short", "long", "narrow"] = "long",
167: (4) format: str | None = None,
168: (4) locale: Locale | str | None = LC_NUMERIC,
169: (4) *,
170: (4) numbering_system: Literal["default"] | str = "latn",
171: (0) ) -> str | None:
172: (4) """
173: (4) Format a compound number value, i.e. "kilometers per hour" or similar.
174: (4) Both unit specifiers are optional to allow for formatting of arbitrary values still according
175: (4) to the locale's general "per" formatting specifier.
176: (4) >>> format_compound_unit(7, denominator_value=11, length="short", locale="pt")
177: (4) '7/11'
178: (4) >>> format_compound_unit(150, "kilometer", denominator_unit="hour", locale="sv")
179: (4) '150 kilometer per timme'
180: (4) >>> format_compound_unit(150, "kilowatt", denominator_unit="year", locale="fi")
181: (4) '150 kilowattia / vuosi'
182: (4) >>> format_compound_unit(32.5, "ton", 15, denominator_unit="hour", locale="en")
183: (4) '32.5 tons per 15 hours'
184: (4) >>> format_compound_unit(1234.5, "ton", 15, denominator_unit="hour", locale="ar_EG", numbering_system="arab")
185: (4) '1٬234٫5 طن لكل 15 ساعة'
186: (4) >>> format_compound_unit(160, denominator_unit="square-meter", locale="fr")
187: (4) '160 par m\\xe8tre carr\\xe9'
188: (4) >>> format_compound_unit(4, "meter", "ratakisko", length="short", locale="fi")
189: (4) '4 m/ratakisko'
190: (4) >>> format_compound_unit(35, "minute", denominator_unit="nautical-mile", locale="sv")
191: (4) '35 minuter per nautisk mil'
192: (4) >>> from babel.numbers import format_currency
193: (4) >>> format_compound_unit(format_currency(35, "JPY", locale="de"), denominator_unit="liter", locale="de")
194: (4) '35\\xa0\\xa5 pro Liter'
195: (4) See https://www.unicode.org/reports/tr35/tr35-general.html#perUnitPatterns
196: (4) :param numerator_value: The numerator value. This may be a string,
197: (28) in which case it is considered preformatted and the unit is ignored.
198: (4) :param numerator_unit: The numerator unit. See `format_unit`.
199: (4) :param denominator_value: The denominator value. This may be a string,
200: (30) in which case it is considered preformatted and the unit is ignored.
201: (4) :param denominator_unit: The denominator unit. See `format_unit`.
202: (4) :param length: The formatting length. "short", "long" or "narrow"
203: (4) :param format: An optional format, as accepted by `format_decimal`.
204: (4) :param locale: the `Locale` object or locale identifier
205: (4) :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn".
206: (29) The special value "default" will use the default numbering system of the locale.
207: (4) :return: A formatted compound value.
208: (4) :raise `UnsupportedNumberingSystemError`: If the numbering system is not supported by the locale.
209: (4) """
210: (4) locale = Locale.parse(locale)
211: (4) # Look for a specific compound unit first...
212: (4) if numerator_unit and denominator_unit and denominator_value == 1:
213: (8) compound_unit = _find_compound_unit(numerator_unit, denominator_unit, locale=locale)
214: (8) if compound_unit:
215: (12) return format_unit(
216: (16) numerator_value,
217: (16) compound_unit,
218: (16) length=length,
219: (16) format=format,
220: (16) locale=locale,
221: (16) numbering_system=numbering_system,
222: (12) )
223: (4) # ... failing that, construct one "by hand".
224: (4) if isinstance(numerator_value, str): # Numerator is preformatted
225: (8) formatted_numerator = numerator_value
226: (4) elif numerator_unit: # Numerator has unit
227: (8) formatted_numerator = format_unit(
228: (12) numerator_value,
229: (12) numerator_unit,
230: (12) length=length,
231: (12) format=format,
232: (12) locale=locale,
233: (12) numbering_system=numbering_system,
234: (8) )
235: (4) else: # Unitless numerator
236: (8) formatted_numerator = format_decimal(
237: (12) numerator_value,
238: (12) format=format,
239: (12) locale=locale,
240: (12) numbering_system=numbering_system,
241: (8) )
242: (4) if isinstance(denominator_value, str): # Denominator is preformatted
243: (8) formatted_denominator = denominator_value
244: (4) elif denominator_unit: # Denominator has unit
245: (8) if denominator_value == 1: # support perUnitPatterns when the denominator is 1
246: (12) denominator_unit = _find_unit_pattern(denominator_unit, locale=locale)
247: (12) per_pattern = locale._data["unit_patterns"].get(denominator_unit, {}).get(length, {}).get("per")
248: (12) if per_pattern:
249: (16) return per_pattern.format(formatted_numerator)
250: (12) # See TR-35's per-unit pattern algorithm, point 3.2.
251: (12) # For denominator 1, we replace the value to be formatted with the empty string;
252: (12) # this will make `format_unit` return " second" instead of "1 second".
253: (12) denominator_value = ""
254: (8) formatted_denominator = format_unit(
255: (12) denominator_value,
256: (12) measurement_unit=(denominator_unit or ""),
257: (12) length=length,
258: (12) format=format,
259: (12) locale=locale,
260: (12) numbering_system=numbering_system,
261: (8) ).strip()
262: (4) else: # Bare denominator
263: (8) formatted_denominator = format_decimal(
264: (12) denominator_value,
265: (12) format=format,
266: (12) locale=locale,
267: (12) numbering_system=numbering_system,
268: (8) )
269: (4) # TODO: this doesn't support "compound_variations" (or "prefix"), and will fall back to the "x/y" representation
270: (4) per_pattern = locale._data["compound_unit_patterns"].get("per", {}).get(length, {}).get("compound", "{0}/{1}")
271: (4) return per_pattern.format(formatted_numerator, formatted_denominator)
----------------------------------------
File 7 - . \plural.py:
1: (0) """
2: (4) babel.numbers
3: (4) ~~~~~~~~~~~~~
4: (4) CLDR Plural support. See UTS #35.
5: (4) :copyright: (c) 2013-2024 by the Babel Team.
6: (4) :license: BSD, see LICENSE for more details.
7: (0) """
8: (0) from __future__ import annotations
9: (0) import decimal
10: (0) import re
11: (0) from collections.abc import Iterable, Mapping
12: (0) from typing import TYPE_CHECKING, Any, Callable
13: (0) if TYPE_CHECKING:
14: (4) from typing_extensions import Literal
15: (0) _plural_tags = ('zero', 'one', 'two', 'few', 'many', 'other')
16: (0) _fallback_tag = 'other'
17: (0) def extract_operands(source: float | decimal.Decimal) -> tuple[decimal.Decimal | int, int, int, int, int, int, Literal[0], Literal[0]]:
18: (4) """Extract operands from a decimal, a float or an int, according to `CLDR rules`_.
19: (4) The result is an 8-tuple (n, i, v, w, f, t, c, e), where those symbols are as follows:
20: (4) ====== ===============================================================
21: (4) Symbol Value
22: (4) ------ ---------------------------------------------------------------
23: (4) n absolute value of the source number (integer and decimals).
24: (4) i integer digits of n.
25: (4) v number of visible fraction digits in n, with trailing zeros.
26: (4) w number of visible fraction digits in n, without trailing zeros.
27: (4) f visible fractional digits in n, with trailing zeros.
28: (4) t visible fractional digits in n, without trailing zeros.
29: (4) c compact decimal exponent value: exponent of the power of 10 used in compact decimal formatting.
30: (4) e currently, synonym for ‘c’. however, may be redefined in the future.
31: (4) ====== ===============================================================
32: (4) .. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-61/tr35-numbers.html#Operands
33: (4) :param source: A real number
34: (4) :type source: int|float|decimal.Decimal
35: (4) :return: A n-i-v-w-f-t-c-e tuple
36: (4) :rtype: tuple[decimal.Decimal, int, int, int, int, int, int, int]
37: (4) """
38: (4) n = abs(source)
39: (4) i = int(n)
40: (4) if isinstance(n, float):
41: (8) if i == n:
42: (12) n = i
43: (8) else:
44: (12) # Cast the `float` to a number via the string representation.
45: (12) # This is required for Python 2.6 anyway (it will straight out fail to
46: (12) # do the conversion otherwise), and it's highly unlikely that the user
47: (12) # actually wants the lossless conversion behavior (quoting the Python
48: (12) # documentation):
49: (12) # > If value is a float, the binary floating point value is losslessly
50: (12) # > converted to its exact decimal equivalent.
51: (12) # > This conversion can often require 53 or more digits of precision.
52: (12) # Should the user want that behavior, they can simply pass in a pre-
53: (12) # converted `Decimal` instance of desired accuracy.
54: (12) n = decimal.Decimal(str(n))
55: (4) if isinstance(n, decimal.Decimal):
56: (8) dec_tuple = n.as_tuple()
57: (8) exp = dec_tuple.exponent
58: (8) fraction_digits = dec_tuple.digits[exp:] if exp < 0 else ()
59: (8) trailing = ''.join(str(d) for d in fraction_digits)
60: (8) no_trailing = trailing.rstrip('0')
61: (8) v = len(trailing)
62: (8) w = len(no_trailing)
63: (8) f = int(trailing or 0)
64: (8) t = int(no_trailing or 0)
65: (4) else:
66: (8) v = w = f = t = 0
67: (4) c = e = 0 # TODO: c and e are not supported
68: (4) return n, i, v, w, f, t, c, e
69: (0) class PluralRule:
70: (4) """Represents a set of language pluralization rules. The constructor
71: (4) accepts a list of (tag, expr) tuples or a dict of `CLDR rules`_. The
72: (4) resulting object is callable and accepts one parameter with a positive or
73: (4) negative number (both integer and float) for the number that indicates the
74: (4) plural form for a string and returns the tag for the format:
75: (4) >>> rule = PluralRule({'one': 'n is 1'})
76: (4) >>> rule(1)
77: (4) 'one'
78: (4) >>> rule(2)
79: (4) 'other'
80: (4) Currently the CLDR defines these tags: zero, one, two, few, many and
81: (4) other where other is an implicit default. Rules should be mutually
82: (4) exclusive; for a given numeric value, only one rule should apply (i.e.
83: (4) the condition should only be true for one of the plural rule elements.
84: (4) .. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Language_Plural_Rules
85: (4) """
86: (4) __slots__ = ('abstract', '_func')
87: (4) def __init__(self, rules: Mapping[str, str] | Iterable[tuple[str, str]]) -> None:
88: (8) """Initialize the rule instance.
89: (8) :param rules: a list of ``(tag, expr)``) tuples with the rules
90: (22) conforming to UTS #35 or a dict with the tags as keys
91: (22) and expressions as values.
92: (8) :raise RuleError: if the expression is malformed
93: (8) """
94: (8) if isinstance(rules, Mapping):
95: (12) rules = rules.items()
96: (8) found = set()
97: (8) self.abstract: list[tuple[str, Any]] = []
98: (8) for key, expr in sorted(rules):
99: (12) if key not in _plural_tags:
100: (16) raise ValueError(f"unknown tag {key!r}")
101: (12) elif key in found:
102: (16) raise ValueError(f"tag {key!r} defined twice")
103: (12) found.add(key)
104: (12) ast = _Parser(expr).ast
105: (12) if ast:
106: (16) self.abstract.append((key, ast))
107: (4) def __repr__(self) -> str:
108: (8) rules = self.rules
109: (8) args = ", ".join([f"{tag}: {rules[tag]}" for tag in _plural_tags if tag in rules])
110: (8) return f"<{type(self).__name__} {args!r}>"
111: (4) @classmethod
112: (4) def parse(cls, rules: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> PluralRule:
113: (8) """Create a `PluralRule` instance for the given rules. If the rules
114: (8) are a `PluralRule` object, that object is returned.
115: (8) :param rules: the rules as list or dict, or a `PluralRule` object
116: (8) :raise RuleError: if the expression is malformed
117: (8) """
118: (8) if isinstance(rules, PluralRule):
119: (12) return rules
120: (8) return cls(rules)
121: (4) @property
122: (4) def rules(self) -> Mapping[str, str]:
123: (8) """The `PluralRule` as a dict of unicode plural rules.
124: (8) >>> rule = PluralRule({'one': 'n is 1'})
125: (8) >>> rule.rules
126: (8) {'one': 'n is 1'}
127: (8) """
128: (8) _compile = _UnicodeCompiler().compile
129: (8) return {tag: _compile(ast) for tag, ast in self.abstract}
130: (4) @property
131: (4) def tags(self) -> frozenset[str]:
132: (8) """A set of explicitly defined tags in this rule. The implicit default
133: (8) ``'other'`` rules is not part of this set unless there is an explicit
134: (8) rule for it.
135: (8) """
136: (8) return frozenset(i[0] for i in self.abstract)
137: (4) def __getstate__(self) -> list[tuple[str, Any]]:
138: (8) return self.abstract
139: (4) def __setstate__(self, abstract: list[tuple[str, Any]]) -> None:
140: (8) self.abstract = abstract
141: (4) def __call__(self, n: float | decimal.Decimal) -> str:
142: (8) if not hasattr(self, '_func'):
143: (12) self._func = to_python(self)
144: (8) return self._func(n)
145: (0) def to_javascript(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> str:
146: (4) """Convert a list/dict of rules or a `PluralRule` object into a JavaScript
147: (4) function. This function depends on no external library:
148: (4) >>> to_javascript({'one': 'n is 1'})
149: (4) "(function(n) { return (n == 1) ? 'one' : 'other'; })"
150: (4) Implementation detail: The function generated will probably evaluate
151: (4) expressions involved into range operations multiple times. This has the
152: (4) advantage that external helper functions are not required and is not a
153: (4) big performance hit for these simple calculations.
154: (4) :param rule: the rules as list or dict, or a `PluralRule` object
155: (4) :raise RuleError: if the expression is malformed
156: (4) """
157: (4) to_js = _JavaScriptCompiler().compile
158: (4) result = ['(function(n) { return ']
159: (4) for tag, ast in PluralRule.parse(rule).abstract:
160: (8) result.append(f"{to_js(ast)} ? {tag!r} : ")
161: (4) result.append('%r; })' % _fallback_tag)
162: (4) return ''.join(result)
163: (0) def to_python(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> Callable[[float | decimal.Decimal], str]:
164: (4) """Convert a list/dict of rules or a `PluralRule` object into a regular
165: (4) Python function. This is useful in situations where you need a real
166: (4) function and don't are about the actual rule object:
167: (4) >>> func = to_python({'one': 'n is 1', 'few': 'n in 2..4'})
168: (4) >>> func(1)
169: (4) 'one'
170: (4) >>> func(3)
171: (4) 'few'
172: (4) >>> func = to_python({'one': 'n in 1,11', 'few': 'n in 3..10,13..19'})
173: (4) >>> func(11)
174: (4) 'one'
175: (4) >>> func(15)
176: (4) 'few'
177: (4) :param rule: the rules as list or dict, or a `PluralRule` object
178: (4) :raise RuleError: if the expression is malformed
179: (4) """
180: (4) namespace = {
181: (8) 'IN': in_range_list,
182: (8) 'WITHIN': within_range_list,
183: (8) 'MOD': cldr_modulo,
184: (8) 'extract_operands': extract_operands,
185: (4) }
186: (4) to_python_func = _PythonCompiler().compile
187: (4) result = [
188: (8) 'def evaluate(n):',
189: (8) ' n, i, v, w, f, t, c, e = extract_operands(n)',
190: (4) ]
191: (4) for tag, ast in PluralRule.parse(rule).abstract:
192: (8) # the str() call is to coerce the tag to the native string. It's
193: (8) # a limited ascii restricted set of tags anyways so that is fine.
194: (8) result.append(f" if ({to_python_func(ast)}): return {str(tag)!r}")
195: (4) result.append(f" return {_fallback_tag!r}")
196: (4) code = compile('\n'.join(result), '<rule>', 'exec')
197: (4) eval(code, namespace)
198: (4) return namespace['evaluate']
199: (0) def to_gettext(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> str:
200: (4) """The plural rule as gettext expression. The gettext expression is
201: (4) technically limited to integers and returns indices rather than tags.
202: (4) >>> to_gettext({'one': 'n is 1', 'two': 'n is 2'})
203: (4) 'nplurals=3; plural=((n == 1) ? 0 : (n == 2) ? 1 : 2);'
204: (4) :param rule: the rules as list or dict, or a `PluralRule` object
205: (4) :raise RuleError: if the expression is malformed
206: (4) """
207: (4) rule = PluralRule.parse(rule)
208: (4) used_tags = rule.tags | {_fallback_tag}
209: (4) _compile = _GettextCompiler().compile
210: (4) _get_index = [tag for tag in _plural_tags if tag in used_tags].index
211: (4) result = [f"nplurals={len(used_tags)}; plural=("]
212: (4) for tag, ast in rule.abstract:
213: (8) result.append(f"{_compile(ast)} ? {_get_index(tag)} : ")
214: (4) result.append(f"{_get_index(_fallback_tag)});")
215: (4) return ''.join(result)
216: (0) def in_range_list(num: float | decimal.Decimal, range_list: Iterable[Iterable[float | decimal.Decimal]]) -> bool:
217: (4) """Integer range list test. This is the callback for the "in" operator
218: (4) of the UTS #35 pluralization rule language:
219: (4) >>> in_range_list(1, [(1, 3)])
220: (4) True
221: (4) >>> in_range_list(3, [(1, 3)])
222: (4) True
223: (4) >>> in_range_list(3, [(1, 3), (5, 8)])
224: (4) True
225: (4) >>> in_range_list(1.2, [(1, 4)])
226: (4) False
227: (4) >>> in_range_list(10, [(1, 4)])
228: (4) False
229: (4) >>> in_range_list(10, [(1, 4), (6, 8)])
230: (4) False
231: (4) """
232: (4) return num == int(num) and within_range_list(num, range_list)
233: (0) def within_range_list(num: float | decimal.Decimal, range_list: Iterable[Iterable[float | decimal.Decimal]]) -> bool:
234: (4) """Float range test. This is the callback for the "within" operator
235: (4) of the UTS #35 pluralization rule language:
236: (4) >>> within_range_list(1, [(1, 3)])
237: (4) True
238: (4) >>> within_range_list(1.0, [(1, 3)])
239: (4) True
240: (4) >>> within_range_list(1.2, [(1, 4)])
241: (4) True
242: (4) >>> within_range_list(8.8, [(1, 4), (7, 15)])
243: (4) True
244: (4) >>> within_range_list(10, [(1, 4)])
245: (4) False
246: (4) >>> within_range_list(10.5, [(1, 4), (20, 30)])
247: (4) False
248: (4) """
249: (4) return any(num >= min_ and num <= max_ for min_, max_ in range_list)
250: (0) def cldr_modulo(a: float, b: float) -> float:
251: (4) """Javaish modulo. This modulo operator returns the value with the sign
252: (4) of the dividend rather than the divisor like Python does:
253: (4) >>> cldr_modulo(-3, 5)
254: (4) -3
255: (4) >>> cldr_modulo(-3, -5)
256: (4) -3
257: (4) >>> cldr_modulo(3, 5)
258: (4) 3
259: (4) """
260: (4) reverse = 0
261: (4) if a < 0:
262: (8) a *= -1
263: (8) reverse = 1
264: (4) if b < 0:
265: (8) b *= -1
266: (4) rv = a % b
267: (4) if reverse:
268: (8) rv *= -1
269: (4) return rv
270: (0) class RuleError(Exception):
271: (4) """Raised if a rule is malformed."""
272: (0) _VARS = {
273: (4) 'n', # absolute value of the source number.
274: (4) 'i', # integer digits of n.
275: (4) 'v', # number of visible fraction digits in n, with trailing zeros.*
276: (4) 'w', # number of visible fraction digits in n, without trailing zeros.*
277: (4) 'f', # visible fraction digits in n, with trailing zeros.*
278: (4) 't', # visible fraction digits in n, without trailing zeros.*
279: (4) 'c', # compact decimal exponent value: exponent of the power of 10 used in compact decimal formatting.
280: (4) 'e', # currently, synonym for `c`. however, may be redefined in the future.
281: (0) }
282: (0) _RULES: list[tuple[str | None, re.Pattern[str]]] = [
283: (4) (None, re.compile(r'\s+', re.UNICODE)),
284: (4) ('word', re.compile(fr'\b(and|or|is|(?:with)?in|not|mod|[{"".join(_VARS)}])\b')),
285: (4) ('value', re.compile(r'\d+')),
286: (4) ('symbol', re.compile(r'%|,|!=|=')),
287: (4) ('ellipsis', re.compile(r'\.{2,3}|\u2026', re.UNICODE)), # U+2026: ELLIPSIS
288: (0) ]
289: (0) def tokenize_rule(s: str) -> list[tuple[str, str]]:
290: (4) s = s.split('@')[0]
291: (4) result: list[tuple[str, str]] = []
292: (4) pos = 0
293: (4) end = len(s)
294: (4) while pos < end:
295: (8) for tok, rule in _RULES:
296: (12) match = rule.match(s, pos)
297: (12) if match is not None:
298: (16) pos = match.end()
299: (16) if tok:
300: (20) result.append((tok, match.group()))
301: (16) break
302: (8) else:
303: (12) raise RuleError(f"malformed CLDR pluralization rule. Got unexpected {s[pos]!r}")
304: (4) return result[::-1]
305: (0) def test_next_token(
306: (4) tokens: list[tuple[str, str]],
307: (4) type_: str,
308: (4) value: str | None = None,
309: (0) ) -> list[tuple[str, str]] | bool:
310: (4) return tokens and tokens[-1][0] == type_ and \
311: (8) (value is None or tokens[-1][1] == value)
312: (0) def skip_token(tokens: list[tuple[str, str]], type_: str, value: str | None = None):
313: (4) if test_next_token(tokens, type_, value):
314: (8) return tokens.pop()
315: (0) def value_node(value: int) -> tuple[Literal['value'], tuple[int]]:
316: (4) return 'value', (value, )
317: (0) def ident_node(name: str) -> tuple[str, tuple[()]]:
318: (4) return name, ()
319: (0) def range_list_node(
320: (4) range_list: Iterable[Iterable[float | decimal.Decimal]],
321: (0) ) -> tuple[Literal['range_list'], Iterable[Iterable[float | decimal.Decimal]]]:
322: (4) return 'range_list', range_list
323: (0) def negate(rv: tuple[Any, ...]) -> tuple[Literal['not'], tuple[tuple[Any, ...]]]:
324: (4) return 'not', (rv,)
325: (0) class _Parser:
326: (4) """Internal parser. This class can translate a single rule into an abstract
327: (4) tree of tuples. It implements the following grammar::
328: (8) condition = and_condition ('or' and_condition)*
329: (24) ('@integer' samples)?
330: (24) ('@decimal' samples)?
331: (8) and_condition = relation ('and' relation)*
332: (8) relation = is_relation | in_relation | within_relation
333: (8) is_relation = expr 'is' ('not')? value
334: (8) in_relation = expr (('not')? 'in' | '=' | '!=') range_list
335: (8) within_relation = expr ('not')? 'within' range_list
336: (8) expr = operand (('mod' | '%') value)?
337: (8) operand = 'n' | 'i' | 'f' | 't' | 'v' | 'w'
338: (8) range_list = (range | value) (',' range_list)*
339: (8) value = digit+
340: (8) digit = 0|1|2|3|4|5|6|7|8|9
341: (8) range = value'..'value
342: (8) samples = sampleRange (',' sampleRange)* (',' ('…'|'...'))?
343: (8) sampleRange = decimalValue '~' decimalValue
344: (8) decimalValue = value ('.' value)?
345: (4) - Whitespace can occur between or around any of the above tokens.
346: (4) - Rules should be mutually exclusive; for a given numeric value, only one
347: (6) rule should apply (i.e. the condition should only be true for one of
348: (6) the plural rule elements).
349: (4) - The in and within relations can take comma-separated lists, such as:
350: (6) 'n in 3,5,7..15'.
351: (4) - Samples are ignored.
352: (4) The translator parses the expression on instantiation into an attribute
353: (4) called `ast`.
354: (4) """
355: (4) def __init__(self, string):
356: (8) self.tokens = tokenize_rule(string)
357: (8) if not self.tokens:
358: (12) # If the pattern is only samples, it's entirely possible
359: (12) # no stream of tokens whatsoever is generated.
360: (12) self.ast = None
361: (12) return
362: (8) self.ast = self.condition()
363: (8) if self.tokens:
364: (12) raise RuleError(f"Expected end of rule, got {self.tokens[-1][1]!r}")
365: (4) def expect(self, type_, value=None, term=None):
366: (8) token = skip_token(self.tokens, type_, value)
367: (8) if token is not None:
368: (12) return token
369: (8) if term is None:
370: (12) term = repr(value is None and type_ or value)
371: (8) if not self.tokens:
372: (12) raise RuleError(f"expected {term} but end of rule reached")
373: (8) raise RuleError(f"expected {term} but got {self.tokens[-1][1]!r}")
374: (4) def condition(self):
375: (8) op = self.and_condition()
376: (8) while skip_token(self.tokens, 'word', 'or'):
377: (12) op = 'or', (op, self.and_condition())
378: (8) return op
379: (4) def and_condition(self):
380: (8) op = self.relation()
381: (8) while skip_token(self.tokens, 'word', 'and'):
382: (12) op = 'and', (op, self.relation())
383: (8) return op
384: (4) def relation(self):
385: (8) left = self.expr()
386: (8) if skip_token(self.tokens, 'word', 'is'):
387: (12) return skip_token(self.tokens, 'word', 'not') and 'isnot' or 'is', \
388: (16) (left, self.value())
389: (8) negated = skip_token(self.tokens, 'word', 'not')
390: (8) method = 'in'
391: (8) if skip_token(self.tokens, 'word', 'within'):
392: (12) method = 'within'
393: (8) else:
394: (12) if not skip_token(self.tokens, 'word', 'in'):
395: (16) if negated:
396: (20) raise RuleError('Cannot negate operator based rules.')
397: (16) return self.newfangled_relation(left)
398: (8) rv = 'relation', (method, left, self.range_list())
399: (8) return negate(rv) if negated else rv
400: (4) def newfangled_relation(self, left):
401: (8) if skip_token(self.tokens, 'symbol', '='):
402: (12) negated = False
403: (8) elif skip_token(self.tokens, 'symbol', '!='):
404: (12) negated = True
405: (8) else:
406: (12) raise RuleError('Expected "=" or "!=" or legacy relation')
407: (8) rv = 'relation', ('in', left, self.range_list())
408: (8) return negate(rv) if negated else rv
409: (4) def range_or_value(self):
410: (8) left = self.value()
411: (8) if skip_token(self.tokens, 'ellipsis'):
412: (12) return left, self.value()
413: (8) else:
414: (12) return left, left
415: (4) def range_list(self):
416: (8) range_list = [self.range_or_value()]
417: (8) while skip_token(self.tokens, 'symbol', ','):
418: (12) range_list.append(self.range_or_value())
419: (8) return range_list_node(range_list)
420: (4) def expr(self):
421: (8) word = skip_token(self.tokens, 'word')
422: (8) if word is None or word[1] not in _VARS:
423: (12) raise RuleError('Expected identifier variable')
424: (8) name = word[1]
425: (8) if skip_token(self.tokens, 'word', 'mod'):
426: (12) return 'mod', ((name, ()), self.value())
427: (8) elif skip_token(self.tokens, 'symbol', '%'):
428: (12) return 'mod', ((name, ()), self.value())
429: (8) return ident_node(name)
430: (4) def value(self):
431: (8) return value_node(int(self.expect('value')[1]))
432: (0) def _binary_compiler(tmpl):
433: (4) """Compiler factory for the `_Compiler`."""
434: (4) return lambda self, left, right: tmpl % (self.compile(left), self.compile(right))
435: (0) def _unary_compiler(tmpl):
436: (4) """Compiler factory for the `_Compiler`."""
437: (4) return lambda self, x: tmpl % self.compile(x)
438: (0) compile_zero = lambda x: '0'
439: (0) class _Compiler:
440: (4) """The compilers are able to transform the expressions into multiple
441: (4) output formats.
442: (4) """
443: (4) def compile(self, arg):
444: (8) op, args = arg
445: (8) return getattr(self, f"compile_{op}")(*args)
446: (4) compile_n = lambda x: 'n'
447: (4) compile_i = lambda x: 'i'
448: (4) compile_v = lambda x: 'v'
449: (4) compile_w = lambda x: 'w'
450: (4) compile_f = lambda x: 'f'
451: (4) compile_t = lambda x: 't'
452: (4) compile_c = lambda x: 'c'
453: (4) compile_e = lambda x: 'e'
454: (4) compile_value = lambda x, v: str(v)
455: (4) compile_and = _binary_compiler('(%s && %s)')
456: (4) compile_or = _binary_compiler('(%s || %s)')
457: (4) compile_not = _unary_compiler('(!%s)')
458: (4) compile_mod = _binary_compiler('(%s %% %s)')
459: (4) compile_is = _binary_compiler('(%s == %s)')
460: (4) compile_isnot = _binary_compiler('(%s != %s)')
461: (4) def compile_relation(self, method, expr, range_list):
462: (8) raise NotImplementedError()
463: (0) class _PythonCompiler(_Compiler):
464: (4) """Compiles an expression to Python."""
465: (4) compile_and = _binary_compiler('(%s and %s)')
466: (4) compile_or = _binary_compiler('(%s or %s)')
467: (4) compile_not = _unary_compiler('(not %s)')
468: (4) compile_mod = _binary_compiler('MOD(%s, %s)')
469: (4) def compile_relation(self, method, expr, range_list):
470: (8) ranges = ",".join([f"({self.compile(a)}, {self.compile(b)})" for (a, b) in range_list[1]])
471: (8) return f"{method.upper()}({self.compile(expr)}, [{ranges}])"
472: (0) class _GettextCompiler(_Compiler):
473: (4) """Compile into a gettext plural expression."""
474: (4) compile_i = _Compiler.compile_n
475: (4) compile_v = compile_zero
476: (4) compile_w = compile_zero
477: (4) compile_f = compile_zero
478: (4) compile_t = compile_zero
479: (4) def compile_relation(self, method, expr, range_list):
480: (8) rv = []
481: (8) expr = self.compile(expr)
482: (8) for item in range_list[1]:
483: (12) if item[0] == item[1]:
484: (16) rv.append(f"({expr} == {self.compile(item[0])})")
485: (12) else:
486: (16) min, max = map(self.compile, item)
487: (16) rv.append(f"({expr} >= {min} && {expr} <= {max})")
488: (8) return f"({' || '.join(rv)})"
489: (0) class _JavaScriptCompiler(_GettextCompiler):
490: (4) """Compiles the expression to plain of JavaScript."""
491: (4) # XXX: presently javascript does not support any of the
492: (4) # fraction support and basically only deals with integers.
493: (4) compile_i = lambda x: 'parseInt(n, 10)'
494: (4) compile_v = compile_zero
495: (4) compile_w = compile_zero
496: (4) compile_f = compile_zero
497: (4) compile_t = compile_zero
498: (4) def compile_relation(self, method, expr, range_list):
499: (8) code = _GettextCompiler.compile_relation(
500: (12) self, method, expr, range_list)
501: (8) if method == 'in':
502: (12) expr = self.compile(expr)
503: (12) code = f"(parseInt({expr}, 10) == {expr} && {code})"
504: (8) return code
505: (0) class _UnicodeCompiler(_Compiler):
506: (4) """Returns a unicode pluralization rule again."""
507: (4) # XXX: this currently spits out the old syntax instead of the new
508: (4) # one. We can change that, but it will break a whole bunch of stuff
509: (4) # for users I suppose.
510: (4) compile_is = _binary_compiler('%s is %s')
511: (4) compile_isnot = _binary_compiler('%s is not %s')
512: (4) compile_and = _binary_compiler('%s and %s')
513: (4) compile_or = _binary_compiler('%s or %s')
514: (4) compile_mod = _binary_compiler('%s mod %s')
515: (4) def compile_not(self, relation):
516: (8) return self.compile_relation(*relation[1], negated=True)
517: (4) def compile_relation(self, method, expr, range_list, negated=False):
518: (8) ranges = []
519: (8) for item in range_list[1]:
520: (12) if item[0] == item[1]:
521: (16) ranges.append(self.compile(item[0]))
522: (12) else:
523: (16) ranges.append(f"{self.compile(item[0])}..{self.compile(item[1])}")
524: (8) return f"{self.compile(expr)}{' not' if negated else ''} {method} {','.join(ranges)}"
----------------------------------------
File 8 - . \numbers.py:
1: (0) """
2: (4) babel.numbers
3: (4) ~~~~~~~~~~~~~
4: (4) Locale dependent formatting and parsing of numeric data.
5: (4) The default locale for the functions in this module is determined by the
6: (4) following environment variables, in that order:
7: (5) * ``LC_NUMERIC``,
8: (5) * ``LC_ALL``, and
9: (5) * ``LANG``
10: (4) :copyright: (c) 2013-2024 by the Babel Team.
11: (4) :license: BSD, see LICENSE for more details.
12: (0) """
13: (0) # TODO:
14: (0) # Padding and rounding increments in pattern:
15: (0) # - https://www.unicode.org/reports/tr35/ (Appendix G.6)
16: (0) from __future__ import annotations
17: (0) import datetime
18: (0) import decimal
19: (0) import re
20: (0) import warnings
21: (0) from typing import TYPE_CHECKING, Any, cast, overload
22: (0) from babel.core import Locale, default_locale, get_global
23: (0) from babel.localedata import LocaleDataDict
24: (0) if TYPE_CHECKING:
25: (4) from typing_extensions import Literal
26: (0) LC_NUMERIC = default_locale('LC_NUMERIC')
27: (0) class UnknownCurrencyError(Exception):
28: (4) """Exception thrown when a currency is requested for which no data is available.
29: (4) """
30: (4) def __init__(self, identifier: str) -> None:
31: (8) """Create the exception.
32: (8) :param identifier: the identifier string of the unsupported currency
33: (8) """
34: (8) Exception.__init__(self, f"Unknown currency {identifier!r}.")
35: (8) #: The identifier of the locale that could not be found.
36: (8) self.identifier = identifier
37: (0) def list_currencies(locale: Locale | str | None = None) -> set[str]:
38: (4) """ Return a `set` of normalized currency codes.
39: (4) .. versionadded:: 2.5.0
40: (4) :param locale: filters returned currency codes by the provided locale.
41: (19) Expected to be a locale instance or code. If no locale is
42: (19) provided, returns the list of all currencies from all
43: (19) locales.
44: (4) """
45: (4) # Get locale-scoped currencies.
46: (4) if locale:
47: (8) return set(Locale.parse(locale).currencies)
48: (4) return set(get_global('all_currencies'))
49: (0) def validate_currency(currency: str, locale: Locale | str | None = None) -> None:
50: (4) """ Check the currency code is recognized by Babel.
51: (4) Accepts a ``locale`` parameter for fined-grained validation, working as
52: (4) the one defined above in ``list_currencies()`` method.
53: (4) Raises a `UnknownCurrencyError` exception if the currency is unknown to Babel.
54: (4) """
55: (4) if currency not in list_currencies(locale):
56: (8) raise UnknownCurrencyError(currency)
57: (0) def is_currency(currency: str, locale: Locale | str | None = None) -> bool:
58: (4) """ Returns `True` only if a currency is recognized by Babel.
59: (4) This method always return a Boolean and never raise.
60: (4) """
61: (4) if not currency or not isinstance(currency, str):
62: (8) return False
63: (4) try:
64: (8) validate_currency(currency, locale)
65: (4) except UnknownCurrencyError:
66: (8) return False
67: (4) return True
68: (0) def normalize_currency(currency: str, locale: Locale | str | None = None) -> str | None:
69: (4) """Returns the normalized identifier of any currency code.
70: (4) Accepts a ``locale`` parameter for fined-grained validation, working as
71: (4) the one defined above in ``list_currencies()`` method.
72: (4) Returns None if the currency is unknown to Babel.
73: (4) """
74: (4) if isinstance(currency, str):
75: (8) currency = currency.upper()
76: (4) if not is_currency(currency, locale):
77: (8) return None
78: (4) return currency
79: (0) def get_currency_name(
80: (4) currency: str,
81: (4) count: float | decimal.Decimal | None = None,
82: (4) locale: Locale | str | None = LC_NUMERIC,
83: (0) ) -> str:
84: (4) """Return the name used by the locale for the specified currency.
85: (4) >>> get_currency_name('USD', locale='en_US')
86: (4) u'US Dollar'
87: (4) .. versionadded:: 0.9.4
88: (4) :param currency: the currency code.
89: (4) :param count: the optional count. If provided the currency name
90: (18) will be pluralized to that number if possible.
91: (4) :param locale: the `Locale` object or locale identifier.
92: (4) """
93: (4) loc = Locale.parse(locale)
94: (4) if count is not None:
95: (8) try:
96: (12) plural_form = loc.plural_form(count)
97: (8) except (OverflowError, ValueError):
98: (12) plural_form = 'other'
99: (8) plural_names = loc._data['currency_names_plural']
100: (8) if currency in plural_names:
101: (12) currency_plural_names = plural_names[currency]
102: (12) if plural_form in currency_plural_names:
103: (16) return currency_plural_names[plural_form]
104: (12) if 'other' in currency_plural_names:
105: (16) return currency_plural_names['other']
106: (4) return loc.currencies.get(currency, currency)
107: (0) def get_currency_symbol(currency: str, locale: Locale | str | None = LC_NUMERIC) -> str:
108: (4) """Return the symbol used by the locale for the specified currency.
109: (4) >>> get_currency_symbol('USD', locale='en_US')
110: (4) u'$'
111: (4) :param currency: the currency code.
112: (4) :param locale: the `Locale` object or locale identifier.
113: (4) """
114: (4) return Locale.parse(locale).currency_symbols.get(currency, currency)
115: (0) def get_currency_precision(currency: str) -> int:
116: (4) """Return currency's precision.
117: (4) Precision is the number of decimals found after the decimal point in the
118: (4) currency's format pattern.
119: (4) .. versionadded:: 2.5.0
120: (4) :param currency: the currency code.
121: (4) """
122: (4) precisions = get_global('currency_fractions')
123: (4) return precisions.get(currency, precisions['DEFAULT'])[0]
124: (0) def get_currency_unit_pattern(
125: (4) currency: str,
126: (4) count: float | decimal.Decimal | None = None,
127: (4) locale: Locale | str | None = LC_NUMERIC,
128: (0) ) -> str:
129: (4) """
130: (4) Return the unit pattern used for long display of a currency value
131: (4) for a given locale.
132: (4) This is a string containing ``{0}`` where the numeric part
133: (4) should be substituted and ``{1}`` where the currency long display
134: (4) name should be substituted.
135: (4) >>> get_currency_unit_pattern('USD', locale='en_US', count=10)
136: (4) u'{0} {1}'
137: (4) .. versionadded:: 2.7.0
138: (4) :param currency: the currency code.
139: (4) :param count: the optional count. If provided the unit
140: (18) pattern for that number will be returned.
141: (4) :param locale: the `Locale` object or locale identifier.
142: (4) """
143: (4) loc = Locale.parse(locale)
144: (4) if count is not None:
145: (8) plural_form = loc.plural_form(count)
146: (8) try:
147: (12) return loc._data['currency_unit_patterns'][plural_form]
148: (8) except LookupError:
149: (12) # Fall back to 'other'
150: (12) pass
151: (4) return loc._data['currency_unit_patterns']['other']
152: (0) @overload
153: (0) def get_territory_currencies(
154: (4) territory: str,
155: (4) start_date: datetime.date | None = ...,
156: (4) end_date: datetime.date | None = ...,
157: (4) tender: bool = ...,
158: (4) non_tender: bool = ...,
159: (4) include_details: Literal[False] = ...,
160: (0) ) -> list[str]:
161: (4) ... # pragma: no cover
162: (0) @overload
163: (0) def get_territory_currencies(
164: (4) territory: str,
165: (4) start_date: datetime.date | None = ...,
166: (4) end_date: datetime.date | None = ...,
167: (4) tender: bool = ...,
168: (4) non_tender: bool = ...,
169: (4) include_details: Literal[True] = ...,
170: (0) ) -> list[dict[str, Any]]:
171: (4) ... # pragma: no cover
172: (0) def get_territory_currencies(
173: (4) territory: str,
174: (4) start_date: datetime.date | None = None,
175: (4) end_date: datetime.date | None = None,
176: (4) tender: bool = True,
177: (4) non_tender: bool = False,
178: (4) include_details: bool = False,
179: (0) ) -> list[str] | list[dict[str, Any]]:
180: (4) """Returns the list of currencies for the given territory that are valid for
181: (4) the given date range. In addition to that the currency database
182: (4) distinguishes between tender and non-tender currencies. By default only
183: (4) tender currencies are returned.
184: (4) The return value is a list of all currencies roughly ordered by the time
185: (4) of when the currency became active. The longer the currency is being in
186: (4) use the more to the left of the list it will be.
187: (4) The start date defaults to today. If no end date is given it will be the
188: (4) same as the start date. Otherwise a range can be defined. For instance
189: (4) this can be used to find the currencies in use in Austria between 1995 and
190: (4) 2011:
191: (4) >>> from datetime import date
192: (4) >>> get_territory_currencies('AT', date(1995, 1, 1), date(2011, 1, 1))
193: (4) ['ATS', 'EUR']
194: (4) Likewise it's also possible to find all the currencies in use on a
195: (4) single date:
196: (4) >>> get_territory_currencies('AT', date(1995, 1, 1))
197: (4) ['ATS']
198: (4) >>> get_territory_currencies('AT', date(2011, 1, 1))
199: (4) ['EUR']
200: (4) By default the return value only includes tender currencies. This
201: (4) however can be changed:
202: (4) >>> get_territory_currencies('US')
203: (4) ['USD']
204: (4) >>> get_territory_currencies('US', tender=False, non_tender=True,
205: (4) ... start_date=date(2014, 1, 1))
206: (4) ['USN', 'USS']
207: (4) .. versionadded:: 2.0
208: (4) :param territory: the name of the territory to find the currency for.
209: (4) :param start_date: the start date. If not given today is assumed.
210: (4) :param end_date: the end date. If not given the start date is assumed.
211: (4) :param tender: controls whether tender currencies should be included.
212: (4) :param non_tender: controls whether non-tender currencies should be
213: (23) included.
214: (4) :param include_details: if set to `True`, instead of returning currency
215: (28) codes the return value will be dictionaries
216: (28) with detail information. In that case each
217: (28) dictionary will have the keys ``'currency'``,
218: (28) ``'from'``, ``'to'``, and ``'tender'``.
219: (4) """
220: (4) currencies = get_global('territory_currencies')
221: (4) if start_date is None:
222: (8) start_date = datetime.date.today()
223: (4) elif isinstance(start_date, datetime.datetime):
224: (8) start_date = start_date.date()
225: (4) if end_date is None:
226: (8) end_date = start_date
227: (4) elif isinstance(end_date, datetime.datetime):
228: (8) end_date = end_date.date()
229: (4) curs = currencies.get(territory.upper(), ())
230: (4) # TODO: validate that the territory exists
231: (4) def _is_active(start, end):
232: (8) return (start is None or start <= end_date) and \
233: (15) (end is None or end >= start_date)
234: (4) result = []
235: (4) for currency_code, start, end, is_tender in curs:
236: (8) if start:
237: (12) start = datetime.date(*start)
238: (8) if end:
239: (12) end = datetime.date(*end)
240: (8) if ((is_tender and tender) or
241: (16) (not is_tender and non_tender)) and _is_active(start, end):
242: (12) if include_details:
243: (16) result.append({
244: (20) 'currency': currency_code,
245: (20) 'from': start,
246: (20) 'to': end,
247: (20) 'tender': is_tender,
248: (16) })
249: (12) else:
250: (16) result.append(currency_code)
251: (4) return result
252: (0) def _get_numbering_system(locale: Locale, numbering_system: Literal["default"] | str = "latn") -> str:
253: (4) if numbering_system == "default":
254: (8) return locale.default_numbering_system
255: (4) else:
256: (8) return numbering_system
257: (0) def _get_number_symbols(
258: (4) locale: Locale | str | None,
259: (4) *,
260: (4) numbering_system: Literal["default"] | str = "latn",
261: (0) ) -> LocaleDataDict:
262: (4) parsed_locale = Locale.parse(locale)
263: (4) numbering_system = _get_numbering_system(parsed_locale, numbering_system)
264: (4) try:
265: (8) return parsed_locale.number_symbols[numbering_system]
266: (4) except KeyError as error:
267: (8) raise UnsupportedNumberingSystemError(f"Unknown numbering system {numbering_system} for Locale {parsed_locale}.") from error
268: (0) class UnsupportedNumberingSystemError(Exception):
269: (4) """Exception thrown when an unsupported numbering system is requested for the given Locale."""
270: (4) pass
271: (0) def get_decimal_symbol(
272: (4) locale: Locale | str | None = LC_NUMERIC,
273: (4) *,
274: (4) numbering_system: Literal["default"] | str = "latn",
275: (0) ) -> str:
276: (4) """Return the symbol used by the locale to separate decimal fractions.
277: (4) >>> get_decimal_symbol('en_US')
278: (4) u'.'
279: (4) >>> get_decimal_symbol('ar_EG', numbering_system='default')
280: (4) u'٫'
281: (4) >>> get_decimal_symbol('ar_EG', numbering_system='latn')
282: (4) u'.'
283: (4) :param locale: the `Locale` object or locale identifier
284: (4) :param numbering_system: The numbering system used for fetching the symbol. Defaults to "latn".
285: (29) The special value "default" will use the default numbering system of the locale.
286: (4) :raise `UnsupportedNumberingSystemError`: If the numbering system is not supported by the locale.
287: (4) """
288: (4) return _get_number_symbols(locale, numbering_system=numbering_system).get('decimal', '.')
289: (0) def get_plus_sign_symbol(
290: (4) locale: Locale | str | None = LC_NUMERIC,
291: (4) *,
292: (4) numbering_system: Literal["default"] | str = "latn",
293: (0) ) -> str:
294: (4) """Return the plus sign symbol used by the current locale.
295: (4) >>> get_plus_sign_symbol('en_US')
296: (4) u'+'
297: (4) >>> get_plus_sign_symbol('ar_EG', numbering_system='default')
298: (4) u'\u061c+'
299: (4) >>> get_plus_sign_symbol('ar_EG', numbering_system='latn')
300: (4) u'\u200e+'
301: (4) :param locale: the `Locale` object or locale identifier
302: (4) :param numbering_system: The numbering system used for fetching the symbol. Defaults to "latn".
303: (29) The special value "default" will use the default numbering system of the locale.
304: (4) :raise `UnsupportedNumberingSystemError`: if the numbering system is not supported by the locale.
305: (4) """
306: (4) return _get_number_symbols(locale, numbering_system=numbering_system).get('plusSign', '+')
307: (0) def get_minus_sign_symbol(
308: (4) locale: Locale | str | None = LC_NUMERIC,
309: (4) *,
310: (4) numbering_system: Literal["default"] | str = "latn",
311: (0) ) -> str:
312: (4) """Return the plus sign symbol used by the current locale.
313: (4) >>> get_minus_sign_symbol('en_US')
314: (4) u'-'
315: (4) >>> get_minus_sign_symbol('ar_EG', numbering_system='default')
316: (4) u'\u061c-'
317: (4) >>> get_minus_sign_symbol('ar_EG', numbering_system='latn')
318: (4) u'\u200e-'
319: (4) :param locale: the `Locale` object or locale identifier
320: (4) :param numbering_system: The numbering system used for fetching the symbol. Defaults to "latn".
321: (29) The special value "default" will use the default numbering system of the locale.
322: (4) :raise `UnsupportedNumberingSystemError`: if the numbering system is not supported by the locale.
323: (4) """
324: (4) return _get_number_symbols(locale, numbering_system=numbering_system).get('minusSign', '-')
325: (0) def get_exponential_symbol(
326: (4) locale: Locale | str | None = LC_NUMERIC,
327: (4) *,
328: (4) numbering_system: Literal["default"] | str = "latn",
329: (0) ) -> str:
330: (4) """Return the symbol used by the locale to separate mantissa and exponent.
331: (4) >>> get_exponential_symbol('en_US')
332: (4) u'E'
333: (4) >>> get_exponential_symbol('ar_EG', numbering_system='default')
334: (4) u'أس'
335: (4) >>> get_exponential_symbol('ar_EG', numbering_system='latn')
336: (4) u'E'
337: (4) :param locale: the `Locale` object or locale identifier
338: (4) :param numbering_system: The numbering system used for fetching the symbol. Defaults to "latn".
339: (29) The special value "default" will use the default numbering system of the locale.
340: (4) :raise `UnsupportedNumberingSystemError`: if the numbering system is not supported by the locale.
341: (4) """
342: (4) return _get_number_symbols(locale, numbering_system=numbering_system).get('exponential', 'E')
343: (0) def get_group_symbol(
344: (4) locale: Locale | str | None = LC_NUMERIC,
345: (4) *,
346: (4) numbering_system: Literal["default"] | str = "latn",
347: (0) ) -> str:
348: (4) """Return the symbol used by the locale to separate groups of thousands.
349: (4) >>> get_group_symbol('en_US')
350: (4) u','
351: (4) >>> get_group_symbol('ar_EG', numbering_system='default')
352: (4) u'٬'
353: (4) >>> get_group_symbol('ar_EG', numbering_system='latn')
354: (4) u','
355: (4) :param locale: the `Locale` object or locale identifier
356: (4) :param numbering_system: The numbering system used for fetching the symbol. Defaults to "latn".
357: (29) The special value "default" will use the default numbering system of the locale.
358: (4) :raise `UnsupportedNumberingSystemError`: if the numbering system is not supported by the locale.
359: (4) """
360: (4) return _get_number_symbols(locale, numbering_system=numbering_system).get('group', ',')
361: (0) def get_infinity_symbol(
362: (4) locale: Locale | str | None = LC_NUMERIC,
363: (4) *,
364: (4) numbering_system: Literal["default"] | str = "latn",
365: (0) ) -> str:
366: (4) """Return the symbol used by the locale to represent infinity.
367: (4) >>> get_infinity_symbol('en_US')
368: (4) u'∞'
369: (4) >>> get_infinity_symbol('ar_EG', numbering_system='default')
370: (4) u'∞'
371: (4) >>> get_infinity_symbol('ar_EG', numbering_system='latn')
372: (4) u'∞'
373: (4) :param locale: the `Locale` object or locale identifier
374: (4) :param numbering_system: The numbering system used for fetching the symbol. Defaults to "latn".
375: (29) The special value "default" will use the default numbering system of the locale.
376: (4) :raise `UnsupportedNumberingSystemError`: if the numbering system is not supported by the locale.
377: (4) """
378: (4) return _get_number_symbols(locale, numbering_system=numbering_system).get('infinity', '∞')
379: (0) def format_number(number: float | decimal.Decimal | str, locale: Locale | str | None = LC_NUMERIC) -> str:
380: (4) """Return the given number formatted for a specific locale.
381: (4) >>> format_number(1099, locale='en_US') # doctest: +SKIP
382: (4) u'1,099'
383: (4) >>> format_number(1099, locale='de_DE') # doctest: +SKIP
384: (4) u'1.099'
385: (4) .. deprecated:: 2.6.0
386: (7) Use babel.numbers.format_decimal() instead.
387: (4) :param number: the number to format
388: (4) :param locale: the `Locale` object or locale identifier
389: (4) """
390: (4) warnings.warn('Use babel.numbers.format_decimal() instead.', DeprecationWarning, stacklevel=2)
391: (4) return format_decimal(number, locale=locale)
392: (0) def get_decimal_precision(number: decimal.Decimal) -> int:
393: (4) """Return maximum precision of a decimal instance's fractional part.
394: (4) Precision is extracted from the fractional part only.
395: (4) """
396: (4) # Copied from: https://github.com/mahmoud/boltons/pull/59
397: (4) assert isinstance(number, decimal.Decimal)
398: (4) decimal_tuple = number.normalize().as_tuple()
399: (4) # Note: DecimalTuple.exponent can be 'n' (qNaN), 'N' (sNaN), or 'F' (Infinity)
400: (4) if not isinstance(decimal_tuple.exponent, int) or decimal_tuple.exponent >= 0:
401: (8) return 0
402: (4) return abs(decimal_tuple.exponent)
403: (0) def get_decimal_quantum(precision: int | decimal.Decimal) -> decimal.Decimal:
404: (4) """Return minimal quantum of a number, as defined by precision."""
405: (4) assert isinstance(precision, (int, decimal.Decimal))
406: (4) return decimal.Decimal(10) ** (-precision)
407: (0) def format_decimal(
408: (4) number: float | decimal.Decimal | str,
409: (4) format: str | NumberPattern | None = None,
410: (4) locale: Locale | str | None = LC_NUMERIC,
411: (4) decimal_quantization: bool = True,
412: (4) group_separator: bool = True,
413: (4) *,
414: (4) numbering_system: Literal["default"] | str = "latn",
415: (0) ) -> str:
416: (4) """Return the given decimal number formatted for a specific locale.
417: (4) >>> format_decimal(1.2345, locale='en_US')
418: (4) u'1.234'
419: (4) >>> format_decimal(1.2346, locale='en_US')
420: (4) u'1.235'
421: (4) >>> format_decimal(-1.2346, locale='en_US')
422: (4) u'-1.235'
423: (4) >>> format_decimal(1.2345, locale='sv_SE')
424: (4) u'1,234'
425: (4) >>> format_decimal(1.2345, locale='de')
426: (4) u'1,234'
427: (4) >>> format_decimal(1.2345, locale='ar_EG', numbering_system='default')
428: (4) u'1٫234'
429: (4) >>> format_decimal(1.2345, locale='ar_EG', numbering_system='latn')
430: (4) u'1.234'
431: (4) The appropriate thousands grouping and the decimal separator are used for
432: (4) each locale:
433: (4) >>> format_decimal(12345.5, locale='en_US')
434: (4) u'12,345.5'
435: (4) By default the locale is allowed to truncate and round a high-precision
436: (4) number by forcing its format pattern onto the decimal part. You can bypass
437: (4) this behavior with the `decimal_quantization` parameter:
438: (4) >>> format_decimal(1.2346, locale='en_US')
439: (4) u'1.235'
440: (4) >>> format_decimal(1.2346, locale='en_US', decimal_quantization=False)
441: (4) u'1.2346'
442: (4) >>> format_decimal(12345.67, locale='fr_CA', group_separator=False)
443: (4) u'12345,67'
444: (4) >>> format_decimal(12345.67, locale='en_US', group_separator=True)
445: (4) u'12,345.67'
446: (4) :param number: the number to format
447: (4) :param format:
448: (4) :param locale: the `Locale` object or locale identifier
449: (4) :param decimal_quantization: Truncate and round high-precision numbers to
450: (33) the format pattern. Defaults to `True`.
451: (4) :param group_separator: Boolean to switch group separator on/off in a locale's
452: (28) number format.
453: (4) :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn".
454: (29) The special value "default" will use the default numbering system of the locale.
455: (4) :raise `UnsupportedNumberingSystemError`: If the numbering system is not supported by the locale.
456: (4) """
457: (4) locale = Locale.parse(locale)
458: (4) if format is None:
459: (8) format = locale.decimal_formats[format]
460: (4) pattern = parse_pattern(format)
461: (4) return pattern.apply(
462: (8) number, locale, decimal_quantization=decimal_quantization, group_separator=group_separator, numbering_system=numbering_system)
463: (0) def format_compact_decimal(
464: (4) number: float | decimal.Decimal | str,
465: (4) *,
466: (4) format_type: Literal["short", "long"] = "short",
467: (4) locale: Locale | str | None = LC_NUMERIC,
468: (4) fraction_digits: int = 0,
469: (4) numbering_system: Literal["default"] | str = "latn",
470: (0) ) -> str:
471: (4) """Return the given decimal number formatted for a specific locale in compact form.
472: (4) >>> format_compact_decimal(12345, format_type="short", locale='en_US')
473: (4) u'12K'
474: (4) >>> format_compact_decimal(12345, format_type="long", locale='en_US')
475: (4) u'12 thousand'
476: (4) >>> format_compact_decimal(12345, format_type="short", locale='en_US', fraction_digits=2)
477: (4) u'12.34K'
478: (4) >>> format_compact_decimal(1234567, format_type="short", locale="ja_JP")
479: (4) u'123万'
480: (4) >>> format_compact_decimal(2345678, format_type="long", locale="mk")
481: (4) u'2 милиони'
482: (4) >>> format_compact_decimal(21000000, format_type="long", locale="mk")
483: (4) u'21 милион'
484: (4) >>> format_compact_decimal(12345, format_type="short", locale='ar_EG', fraction_digits=2, numbering_system='default')
485: (4) u'12٫34\xa0ألف'
486: (4) :param number: the number to format
487: (4) :param format_type: Compact format to use ("short" or "long")
488: (4) :param locale: the `Locale` object or locale identifier
489: (4) :param fraction_digits: Number of digits after the decimal point to use. Defaults to `0`.
490: (4) :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn".
491: (29) The special value "default" will use the default numbering system of the locale.
492: (4) :raise `UnsupportedNumberingSystemError`: If the numbering system is not supported by the locale.
493: (4) """
494: (4) locale = Locale.parse(locale)
495: (4) compact_format = locale.compact_decimal_formats[format_type]
496: (4) number, format = _get_compact_format(number, compact_format, locale, fraction_digits)
497: (4) # Did not find a format, fall back.
498: (4) if format is None:
499: (8) format = locale.decimal_formats[None]
500: (4) pattern = parse_pattern(format)
501: (4) return pattern.apply(number, locale, decimal_quantization=False, numbering_system=numbering_system)
502: (0) def _get_compact_format(
503: (4) number: float | decimal.Decimal | str,
504: (4) compact_format: LocaleDataDict,
505: (4) locale: Locale,
506: (4) fraction_digits: int,
507: (0) ) -> tuple[decimal.Decimal, NumberPattern | None]:
508: (4) """Returns the number after dividing by the unit and the format pattern to use.
509: (4) The algorithm is described here:
510: (4) https://www.unicode.org/reports/tr35/tr35-45/tr35-numbers.html#Compact_Number_Formats.
511: (4) """
512: (4) if not isinstance(number, decimal.Decimal):
513: (8) number = decimal.Decimal(str(number))
514: (4) if number.is_nan() or number.is_infinite():
515: (8) return number, None
516: (4) format = None
517: (4) for magnitude in sorted([int(m) for m in compact_format["other"]], reverse=True):
518: (8) if abs(number) >= magnitude:
519: (12) # check the pattern using "other" as the amount
520: (12) format = compact_format["other"][str(magnitude)]
521: (12) pattern = parse_pattern(format).pattern
522: (12) # if the pattern is "0", we do not divide the number
523: (12) if pattern == "0":
524: (16) break
525: (12) # otherwise, we need to divide the number by the magnitude but remove zeros
526: (12) # equal to the number of 0's in the pattern minus 1
527: (12) number = cast(decimal.Decimal, number / (magnitude // (10 ** (pattern.count("0") - 1))))
528: (12) # round to the number of fraction digits requested
529: (12) rounded = round(number, fraction_digits)
530: (12) # if the remaining number is singular, use the singular format
531: (12) plural_form = locale.plural_form(abs(number))
532: (12) if plural_form not in compact_format:
533: (16) plural_form = "other"
534: (12) if number == 1 and "1" in compact_format:
535: (16) plural_form = "1"
536: (12) format = compact_format[plural_form][str(magnitude)]
537: (12) number = rounded
538: (12) break
539: (4) return number, format
540: (0) class UnknownCurrencyFormatError(KeyError):
541: (4) """Exception raised when an unknown currency format is requested."""
542: (0) def format_currency(
543: (4) number: float | decimal.Decimal | str,
544: (4) currency: str,
545: (4) format: str | NumberPattern | None = None,
546: (4) locale: Locale | str | None = LC_NUMERIC,
547: (4) currency_digits: bool = True,
548: (4) format_type: Literal["name", "standard", "accounting"] = "standard",
549: (4) decimal_quantization: bool = True,
550: (4) group_separator: bool = True,
551: (4) *,
552: (4) numbering_system: Literal["default"] | str = "latn",
553: (0) ) -> str:
554: (4) """Return formatted currency value.
555: (4) >>> format_currency(1099.98, 'USD', locale='en_US')
556: (4) '$1,099.98'
557: (4) >>> format_currency(1099.98, 'USD', locale='es_CO')
558: (4) u'US$1.099,98'
559: (4) >>> format_currency(1099.98, 'EUR', locale='de_DE')
560: (4) u'1.099,98\\xa0\\u20ac'
561: (4) >>> format_currency(1099.98, 'EGP', locale='ar_EG', numbering_system='default')
562: (4) u'\u200f1٬099٫98\xa0ج.م.\u200f'
563: (4) The format can also be specified explicitly. The currency is
564: (4) placed with the '¤' sign. As the sign gets repeated the format
565: (4) expands (¤ being the symbol, ¤¤ is the currency abbreviation and
566: (4) ¤¤¤ is the full name of the currency):
567: (4) >>> format_currency(1099.98, 'EUR', u'\xa4\xa4 #,##0.00', locale='en_US')
568: (4) u'EUR 1,099.98'
569: (4) >>> format_currency(1099.98, 'EUR', u'#,##0.00 \xa4\xa4\xa4', locale='en_US')
570: (4) u'1,099.98 euros'
571: (4) Currencies usually have a specific number of decimal digits. This function
572: (4) favours that information over the given format:
573: (4) >>> format_currency(1099.98, 'JPY', locale='en_US')
574: (4) u'\\xa51,100'
575: (4) >>> format_currency(1099.98, 'COP', u'#,##0.00', locale='es_ES')
576: (4) u'1.099,98'
577: (4) However, the number of decimal digits can be overridden from the currency
578: (4) information, by setting the last parameter to ``False``:
579: (4) >>> format_currency(1099.98, 'JPY', locale='en_US', currency_digits=False)
580: (4) u'\\xa51,099.98'
581: (4) >>> format_currency(1099.98, 'COP', u'#,##0.00', locale='es_ES', currency_digits=False)
582: (4) u'1.099,98'
583: (4) If a format is not specified the type of currency format to use
584: (4) from the locale can be specified:
585: (4) >>> format_currency(1099.98, 'EUR', locale='en_US', format_type='standard')
586: (4) u'\\u20ac1,099.98'
587: (4) When the given currency format type is not available, an exception is
588: (4) raised:
589: (4) >>> format_currency('1099.98', 'EUR', locale='root', format_type='unknown')
590: (4) Traceback (most recent call last):
591: (8) ...
592: (4) UnknownCurrencyFormatError: "'unknown' is not a known currency format type"
593: (4) >>> format_currency(101299.98, 'USD', locale='en_US', group_separator=False)
594: (4) u'$101299.98'
595: (4) >>> format_currency(101299.98, 'USD', locale='en_US', group_separator=True)
596: (4) u'$101,299.98'
597: (4) You can also pass format_type='name' to use long display names. The order of
598: (4) the number and currency name, along with the correct localized plural form
599: (4) of the currency name, is chosen according to locale:
600: (4) >>> format_currency(1, 'USD', locale='en_US', format_type='name')
601: (4) u'1.00 US dollar'
602: (4) >>> format_currency(1099.98, 'USD', locale='en_US', format_type='name')
603: (4) u'1,099.98 US dollars'
604: (4) >>> format_currency(1099.98, 'USD', locale='ee', format_type='name')
605: (4) u'us ga dollar 1,099.98'
606: (4) By default the locale is allowed to truncate and round a high-precision
607: (4) number by forcing its format pattern onto the decimal part. You can bypass
608: (4) this behavior with the `decimal_quantization` parameter:
609: (4) >>> format_currency(1099.9876, 'USD', locale='en_US')
610: (4) u'$1,099.99'
611: (4) >>> format_currency(1099.9876, 'USD', locale='en_US', decimal_quantization=False)
612: (4) u'$1,099.9876'
613: (4) :param number: the number to format
614: (4) :param currency: the currency code
615: (4) :param format: the format string to use
616: (4) :param locale: the `Locale` object or locale identifier
617: (4) :param currency_digits: use the currency's natural number of decimal digits
618: (4) :param format_type: the currency format type to use
619: (4) :param decimal_quantization: Truncate and round high-precision numbers to
620: (33) the format pattern. Defaults to `True`.
621: (4) :param group_separator: Boolean to switch group separator on/off in a locale's
622: (28) number format.
623: (4) :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn".
624: (29) The special value "default" will use the default numbering system of the locale.
625: (4) :raise `UnsupportedNumberingSystemError`: If the numbering system is not supported by the locale.
626: (4) """
627: (4) if format_type == 'name':
628: (8) return _format_currency_long_name(number, currency, format=format,
629: (42) locale=locale, currency_digits=currency_digits,
630: (42) decimal_quantization=decimal_quantization, group_separator=group_separator,
631: (42) numbering_system=numbering_system)
632: (4) locale = Locale.parse(locale)
633: (4) if format:
634: (8) pattern = parse_pattern(format)
635: (4) else:
636: (8) try:
637: (12) pattern = locale.currency_formats[format_type]
638: (8) except KeyError:
639: (12) raise UnknownCurrencyFormatError(f"{format_type!r} is not a known currency format type") from None
640: (4) return pattern.apply(
641: (8) number, locale, currency=currency, currency_digits=currency_digits,
642: (8) decimal_quantization=decimal_quantization, group_separator=group_separator, numbering_system=numbering_system)
643: (0) def _format_currency_long_name(
644: (4) number: float | decimal.Decimal | str,
645: (4) currency: str,
646: (4) format: str | NumberPattern | None = None,
647: (4) locale: Locale | str | None = LC_NUMERIC,
648: (4) currency_digits: bool = True,
649: (4) format_type: Literal["name", "standard", "accounting"] = "standard",
650: (4) decimal_quantization: bool = True,
651: (4) group_separator: bool = True,
652: (4) *,
653: (4) numbering_system: Literal["default"] | str = "latn",
654: (0) ) -> str:
655: (4) # Algorithm described here:
656: (4) # https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies
657: (4) locale = Locale.parse(locale)
658: (4) # Step 1.
659: (4) # There are no examples of items with explicit count (0 or 1) in current
660: (4) # locale data. So there is no point implementing that.
661: (4) # Step 2.
662: (4) # Correct number to numeric type, important for looking up plural rules:
663: (4) number_n = float(number) if isinstance(number, str) else number
664: (4) # Step 3.
665: (4) unit_pattern = get_currency_unit_pattern(currency, count=number_n, locale=locale)
666: (4) # Step 4.
667: (4) display_name = get_currency_name(currency, count=number_n, locale=locale)
668: (4) # Step 5.
669: (4) if not format:
670: (8) format = locale.decimal_formats[None]
671: (4) pattern = parse_pattern(format)
672: (4) number_part = pattern.apply(
673: (8) number, locale, currency=currency, currency_digits=currency_digits,
674: (8) decimal_quantization=decimal_quantization, group_separator=group_separator, numbering_system=numbering_system)
675: (4) return unit_pattern.format(number_part, display_name)
676: (0) def format_compact_currency(
677: (4) number: float | decimal.Decimal | str,
678: (4) currency: str,
679: (4) *,
680: (4) format_type: Literal["short"] = "short",
681: (4) locale: Locale | str | None = LC_NUMERIC,
682: (4) fraction_digits: int = 0,
683: (4) numbering_system: Literal["default"] | str = "latn",
684: (0) ) -> str:
685: (4) """Format a number as a currency value in compact form.
686: (4) >>> format_compact_currency(12345, 'USD', locale='en_US')
687: (4) u'$12K'
688: (4) >>> format_compact_currency(123456789, 'USD', locale='en_US', fraction_digits=2)
689: (4) u'$123.46M'
690: (4) >>> format_compact_currency(123456789, 'EUR', locale='de_DE', fraction_digits=1)
691: (4) '123,5\xa0Mio.\xa0€'
692: (4) :param number: the number to format
693: (4) :param currency: the currency code
694: (4) :param format_type: the compact format type to use. Defaults to "short".
695: (4) :param locale: the `Locale` object or locale identifier
696: (4) :param fraction_digits: Number of digits after the decimal point to use. Defaults to `0`.
697: (4) :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn".
698: (29) The special value "default" will use the default numbering system of the locale.
699: (4) :raise `UnsupportedNumberingSystemError`: If the numbering system is not supported by the locale.
700: (4) """
701: (4) locale = Locale.parse(locale)
702: (4) try:
703: (8) compact_format = locale.compact_currency_formats[format_type]
704: (4) except KeyError as error:
705: (8) raise UnknownCurrencyFormatError(f"{format_type!r} is not a known compact currency format type") from error
706: (4) number, format = _get_compact_format(number, compact_format, locale, fraction_digits)
707: (4) # Did not find a format, fall back.
708: (4) if format is None or "¤" not in str(format):
709: (8) # find first format that has a currency symbol
710: (8) for magnitude in compact_format['other']:
711: (12) format = compact_format['other'][magnitude].pattern
712: (12) if '¤' not in format:
713: (16) continue
714: (12) # remove characters that are not the currency symbol, 0's or spaces
715: (12) format = re.sub(r'[^0\s\¤]', '', format)
716: (12) # compress adjacent spaces into one
717: (12) format = re.sub(r'(\s)\s+', r'\1', format).strip()
718: (12) break
719: (4) if format is None:
720: (8) raise ValueError('No compact currency format found for the given number and locale.')
721: (4) pattern = parse_pattern(format)
722: (4) return pattern.apply(number, locale, currency=currency, currency_digits=False, decimal_quantization=False,
723: (25) numbering_system=numbering_system)
724: (0) def format_percent(
725: (4) number: float | decimal.Decimal | str,
726: (4) format: str | NumberPattern | None = None,
727: (4) locale: Locale | str | None = LC_NUMERIC,
728: (4) decimal_quantization: bool = True,
729: (4) group_separator: bool = True,
730: (4) *,
731: (4) numbering_system: Literal["default"] | str = "latn",
732: (0) ) -> str:
733: (4) """Return formatted percent value for a specific locale.
734: (4) >>> format_percent(0.34, locale='en_US')
735: (4) u'34%'
736: (4) >>> format_percent(25.1234, locale='en_US')
737: (4) u'2,512%'
738: (4) >>> format_percent(25.1234, locale='sv_SE')
739: (4) u'2\\xa0512\\xa0%'
740: (4) >>> format_percent(25.1234, locale='ar_EG', numbering_system='default')
741: (4) u'2٬512%'
742: (4) The format pattern can also be specified explicitly:
743: (4) >>> format_percent(25.1234, u'#,##0\u2030', locale='en_US')
744: (4) u'25,123\u2030'
745: (4) By default the locale is allowed to truncate and round a high-precision
746: (4) number by forcing its format pattern onto the decimal part. You can bypass
747: (4) this behavior with the `decimal_quantization` parameter:
748: (4) >>> format_percent(23.9876, locale='en_US')
749: (4) u'2,399%'
750: (4) >>> format_percent(23.9876, locale='en_US', decimal_quantization=False)
751: (4) u'2,398.76%'
752: (4) >>> format_percent(229291.1234, locale='pt_BR', group_separator=False)
753: (4) u'22929112%'
754: (4) >>> format_percent(229291.1234, locale='pt_BR', group_separator=True)
755: (4) u'22.929.112%'
756: (4) :param number: the percent number to format
757: (4) :param format:
758: (4) :param locale: the `Locale` object or locale identifier
759: (4) :param decimal_quantization: Truncate and round high-precision numbers to
760: (33) the format pattern. Defaults to `True`.
761: (4) :param group_separator: Boolean to switch group separator on/off in a locale's
762: (28) number format.
763: (4) :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn".
764: (29) The special value "default" will use the default numbering system of the locale.
765: (4) :raise `UnsupportedNumberingSystemError`: If the numbering system is not supported by the locale.
766: (4) """
767: (4) locale = Locale.parse(locale)
768: (4) if not format:
769: (8) format = locale.percent_formats[None]
770: (4) pattern = parse_pattern(format)
771: (4) return pattern.apply(
772: (8) number, locale, decimal_quantization=decimal_quantization, group_separator=group_separator,
773: (8) numbering_system=numbering_system,
774: (4) )
775: (0) def format_scientific(
776: (8) number: float | decimal.Decimal | str,
777: (8) format: str | NumberPattern | None = None,
778: (8) locale: Locale | str | None = LC_NUMERIC,
779: (8) decimal_quantization: bool = True,
780: (8) *,
781: (8) numbering_system: Literal["default"] | str = "latn",
782: (0) ) -> str:
783: (4) """Return value formatted in scientific notation for a specific locale.
784: (4) >>> format_scientific(10000, locale='en_US')
785: (4) u'1E4'
786: (4) >>> format_scientific(10000, locale='ar_EG', numbering_system='default')
787: (4) u'1أس4'
788: (4) The format pattern can also be specified explicitly:
789: (4) >>> format_scientific(1234567, u'##0.##E00', locale='en_US')
790: (4) u'1.23E06'
791: (4) By default the locale is allowed to truncate and round a high-precision
792: (4) number by forcing its format pattern onto the decimal part. You can bypass
793: (4) this behavior with the `decimal_quantization` parameter:
794: (4) >>> format_scientific(1234.9876, u'#.##E0', locale='en_US')
795: (4) u'1.23E3'
796: (4) >>> format_scientific(1234.9876, u'#.##E0', locale='en_US', decimal_quantization=False)
797: (4) u'1.2349876E3'
798: (4) :param number: the number to format
799: (4) :param format:
800: (4) :param locale: the `Locale` object or locale identifier
801: (4) :param decimal_quantization: Truncate and round high-precision numbers to
802: (33) the format pattern. Defaults to `True`.
803: (4) :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn".
804: (29) The special value "default" will use the default numbering system of the locale.
805: (4) :raise `UnsupportedNumberingSystemError`: If the numbering system is not supported by the locale.
806: (4) """
807: (4) locale = Locale.parse(locale)
808: (4) if not format:
809: (8) format = locale.scientific_formats[None]
810: (4) pattern = parse_pattern(format)
811: (4) return pattern.apply(
812: (8) number, locale, decimal_quantization=decimal_quantization, numbering_system=numbering_system)
813: (0) class NumberFormatError(ValueError):
814: (4) """Exception raised when a string cannot be parsed into a number."""
815: (4) def __init__(self, message: str, suggestions: list[str] | None = None) -> None:
816: (8) super().__init__(message)
817: (8) #: a list of properly formatted numbers derived from the invalid input
818: (8) self.suggestions = suggestions
819: (0) SPACE_CHARS = {
820: (4) ' ', # space
821: (4) '\xa0', # no-break space
822: (4) '\u202f', # narrow no-break space
823: (0) }
824: (0) SPACE_CHARS_RE = re.compile('|'.join(SPACE_CHARS))
825: (0) def parse_number(
826: (4) string: str,
827: (4) locale: Locale | str | None = LC_NUMERIC,
828: (4) *,
829: (4) numbering_system: Literal["default"] | str = "latn",
830: (0) ) -> int:
831: (4) """Parse localized number string into an integer.
832: (4) >>> parse_number('1,099', locale='en_US')
833: (4) 1099
834: (4) >>> parse_number('1.099', locale='de_DE')
835: (4) 1099
836: (4) When the given string cannot be parsed, an exception is raised:
837: (4) >>> parse_number('1.099,98', locale='de')
838: (4) Traceback (most recent call last):
839: (8) ...
840: (4) NumberFormatError: '1.099,98' is not a valid number
841: (4) :param string: the string to parse
842: (4) :param locale: the `Locale` object or locale identifier
843: (4) :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn".
844: (29) The special value "default" will use the default numbering system of the locale.
845: (4) :return: the parsed number
846: (4) :raise `NumberFormatError`: if the string can not be converted to a number
847: (4) :raise `UnsupportedNumberingSystemError`: if the numbering system is not supported by the locale.
848: (4) """
849: (4) group_symbol = get_group_symbol(locale, numbering_system=numbering_system)
850: (4) if (
851: (8) group_symbol in SPACE_CHARS and # if the grouping symbol is a kind of space,
852: (8) group_symbol not in string and # and the string to be parsed does not contain it,
853: (8) SPACE_CHARS_RE.search(string) # but it does contain any other kind of space instead,
854: (4) ):
855: (8) # ... it's reasonable to assume it is taking the place of the grouping symbol.
856: (8) string = SPACE_CHARS_RE.sub(group_symbol, string)
857: (4) try:
858: (8) return int(string.replace(group_symbol, ''))
859: (4) except ValueError as ve:
860: (8) raise NumberFormatError(f"{string!r} is not a valid number") from ve
861: (0) def parse_decimal(
862: (4) string: str,
863: (4) locale: Locale | str | None = LC_NUMERIC,
864: (4) strict: bool = False,
865: (4) *,
866: (4) numbering_system: Literal["default"] | str = "latn",
867: (0) ) -> decimal.Decimal:
868: (4) """Parse localized decimal string into a decimal.
869: (4) >>> parse_decimal('1,099.98', locale='en_US')
870: (4) Decimal('1099.98')
871: (4) >>> parse_decimal('1.099,98', locale='de')
872: (4) Decimal('1099.98')
873: (4) >>> parse_decimal('12 345,123', locale='ru')
874: (4) Decimal('12345.123')
875: (4) >>> parse_decimal('1٬099٫98', locale='ar_EG', numbering_system='default')
876: (4) Decimal('1099.98')
877: (4) When the given string cannot be parsed, an exception is raised:
878: (4) >>> parse_decimal('2,109,998', locale='de')
879: (4) Traceback (most recent call last):
880: (8) ...
881: (4) NumberFormatError: '2,109,998' is not a valid decimal number
882: (4) If `strict` is set to `True` and the given string contains a number
883: (4) formatted in an irregular way, an exception is raised:
884: (4) >>> parse_decimal('30.00', locale='de', strict=True)
885: (4) Traceback (most recent call last):
886: (8) ...
887: (4) NumberFormatError: '30.00' is not a properly formatted decimal number. Did you mean '3.000'? Or maybe '30,00'?
888: (4) >>> parse_decimal('0.00', locale='de', strict=True)
889: (4) Traceback (most recent call last):
890: (8) ...
891: (4) NumberFormatError: '0.00' is not a properly formatted decimal number. Did you mean '0'?
892: (4) :param string: the string to parse
893: (4) :param locale: the `Locale` object or locale identifier
894: (4) :param strict: controls whether numbers formatted in a weird way are
895: (19) accepted or rejected
896: (4) :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn".
897: (29) The special value "default" will use the default numbering system of the locale.
898: (4) :raise NumberFormatError: if the string can not be converted to a
899: (30) decimal number
900: (4) :raise UnsupportedNumberingSystemError: if the numbering system is not supported by the locale.
901: (4) """
902: (4) locale = Locale.parse(locale)
903: (4) group_symbol = get_group_symbol(locale, numbering_system=numbering_system)
904: (4) decimal_symbol = get_decimal_symbol(locale, numbering_system=numbering_system)
905: (4) if not strict and (
906: (8) group_symbol in SPACE_CHARS and # if the grouping symbol is a kind of space,
907: (8) group_symbol not in string and # and the string to be parsed does not contain it,
908: (8) SPACE_CHARS_RE.search(string) # but it does contain any other kind of space instead,
909: (4) ):
910: (8) # ... it's reasonable to assume it is taking the place of the grouping symbol.
911: (8) string = SPACE_CHARS_RE.sub(group_symbol, string)
912: (4) try:
913: (8) parsed = decimal.Decimal(string.replace(group_symbol, '')
914: (39) .replace(decimal_symbol, '.'))
915: (4) except decimal.InvalidOperation as exc:
916: (8) raise NumberFormatError(f"{string!r} is not a valid decimal number") from exc
917: (4) if strict and group_symbol in string:
918: (8) proper = format_decimal(parsed, locale=locale, decimal_quantization=False, numbering_system=numbering_system)
919: (8) if string != proper and proper != _remove_trailing_zeros_after_decimal(string, decimal_symbol):
920: (12) try:
921: (16) parsed_alt = decimal.Decimal(string.replace(decimal_symbol, '')
922: (51) .replace(group_symbol, '.'))
923: (12) except decimal.InvalidOperation as exc:
924: (16) raise NumberFormatError(
925: (20) f"{string!r} is not a properly formatted decimal number. "
926: (20) f"Did you mean {proper!r}?",
927: (20) suggestions=[proper],
928: (16) ) from exc
929: (12) else:
930: (16) proper_alt = format_decimal(
931: (20) parsed_alt,
932: (20) locale=locale,
933: (20) decimal_quantization=False,
934: (20) numbering_system=numbering_system,
935: (16) )
936: (16) if proper_alt == proper:
937: (20) raise NumberFormatError(
938: (24) f"{string!r} is not a properly formatted decimal number. "
939: (24) f"Did you mean {proper!r}?",
940: (24) suggestions=[proper],
941: (20) )
942: (16) else:
943: (20) raise NumberFormatError(
944: (24) f"{string!r} is not a properly formatted decimal number. "
945: (24) f"Did you mean {proper!r}? Or maybe {proper_alt!r}?",
946: (24) suggestions=[proper, proper_alt],
947: (20) )
948: (4) return parsed
949: (0) def _remove_trailing_zeros_after_decimal(string: str, decimal_symbol: str) -> str:
950: (4) """
951: (4) Remove trailing zeros from the decimal part of a numeric string.
952: (4) This function takes a string representing a numeric value and a decimal symbol.
953: (4) It removes any trailing zeros that appear after the decimal symbol in the number.
954: (4) If the decimal part becomes empty after removing trailing zeros, the decimal symbol
955: (4) is also removed. If the string does not contain the decimal symbol, it is returned unchanged.
956: (4) :param string: The numeric string from which to remove trailing zeros.
957: (4) :type string: str
958: (4) :param decimal_symbol: The symbol used to denote the decimal point.
959: (4) :type decimal_symbol: str
960: (4) :return: The numeric string with trailing zeros removed from its decimal part.
961: (4) :rtype: str
962: (4) Example:
963: (4) >>> _remove_trailing_zeros_after_decimal("123.4500", ".")
964: (4) '123.45'
965: (4) >>> _remove_trailing_zeros_after_decimal("100.000", ".")
966: (4) '100'
967: (4) >>> _remove_trailing_zeros_after_decimal("100", ".")
968: (4) '100'
969: (4) """
970: (4) integer_part, _, decimal_part = string.partition(decimal_symbol)
971: (4) if decimal_part:
972: (8) decimal_part = decimal_part.rstrip("0")
973: (8) if decimal_part:
974: (12) return integer_part + decimal_symbol + decimal_part
975: (8) return integer_part
976: (4) return string
977: (0) PREFIX_END = r'[^0-9@#.,]'
978: (0) NUMBER_TOKEN = r'[0-9@#.,E+]'
979: (0) PREFIX_PATTERN = r"(?P<prefix>(?:'[^']*'|%s)*)" % PREFIX_END
980: (0) NUMBER_PATTERN = r"(?P<number>%s*)" % NUMBER_TOKEN
981: (0) SUFFIX_PATTERN = r"(?P<suffix>.*)"
982: (0) number_re = re.compile(f"{PREFIX_PATTERN}{NUMBER_PATTERN}{SUFFIX_PATTERN}")
983: (0) def parse_grouping(p: str) -> tuple[int, int]:
984: (4) """Parse primary and secondary digit grouping
985: (4) >>> parse_grouping('##')
986: (4) (1000, 1000)
987: (4) >>> parse_grouping('#,###')
988: (4) (3, 3)
989: (4) >>> parse_grouping('#,####,###')
990: (4) (3, 4)
991: (4) """
992: (4) width = len(p)
993: (4) g1 = p.rfind(',')
994: (4) if g1 == -1:
995: (8) return 1000, 1000
996: (4) g1 = width - g1 - 1
997: (4) g2 = p[:-g1 - 1].rfind(',')
998: (4) if g2 == -1:
999: (8) return g1, g1
1000: (4) g2 = width - g1 - g2 - 2
1001: (4) return g1, g2
1002: (0) def parse_pattern(pattern: NumberPattern | str) -> NumberPattern:
1003: (4) """Parse number format patterns"""
1004: (4) if isinstance(pattern, NumberPattern):
1005: (8) return pattern
1006: (4) def _match_number(pattern):
1007: (8) rv = number_re.search(pattern)
1008: (8) if rv is None:
1009: (12) raise ValueError(f"Invalid number pattern {pattern!r}")
1010: (8) return rv.groups()
1011: (4) pos_pattern = pattern
1012: (4) # Do we have a negative subpattern?
1013: (4) if ';' in pattern:
1014: (8) pos_pattern, neg_pattern = pattern.split(';', 1)
1015: (8) pos_prefix, number, pos_suffix = _match_number(pos_pattern)
1016: (8) neg_prefix, _, neg_suffix = _match_number(neg_pattern)
1017: (4) else:
1018: (8) pos_prefix, number, pos_suffix = _match_number(pos_pattern)
1019: (8) neg_prefix = f"-{pos_prefix}"
1020: (8) neg_suffix = pos_suffix
1021: (4) if 'E' in number:
1022: (8) number, exp = number.split('E', 1)
1023: (4) else:
1024: (8) exp = None
1025: (4) if '@' in number and '.' in number and '0' in number:
1026: (8) raise ValueError('Significant digit patterns can not contain "@" or "0"')
1027: (4) if '.' in number:
1028: (8) integer, fraction = number.rsplit('.', 1)
1029: (4) else:
1030: (8) integer = number
1031: (8) fraction = ''
1032: (4) def parse_precision(p):
1033: (8) """Calculate the min and max allowed digits"""
1034: (8) min = max = 0
1035: (8) for c in p:
1036: (12) if c in '@0':
1037: (16) min += 1
1038: (16) max += 1
1039: (12) elif c == '#':
1040: (16) max += 1
1041: (12) elif c == ',':
1042: (16) continue
1043: (12) else:
1044: (16) break
1045: (8) return min, max
1046: (4) int_prec = parse_precision(integer)
1047: (4) frac_prec = parse_precision(fraction)
1048: (4) if exp:
1049: (8) exp_plus = exp.startswith('+')
1050: (8) exp = exp.lstrip('+')
1051: (8) exp_prec = parse_precision(exp)
1052: (4) else:
1053: (8) exp_plus = None
1054: (8) exp_prec = None
1055: (4) grouping = parse_grouping(integer)
1056: (4) return NumberPattern(pattern, (pos_prefix, neg_prefix),
1057: (25) (pos_suffix, neg_suffix), grouping,
1058: (25) int_prec, frac_prec,
1059: (25) exp_prec, exp_plus, number)
1060: (0) class NumberPattern:
1061: (4) def __init__(
1062: (8) self,
1063: (8) pattern: str,
1064: (8) prefix: tuple[str, str],
1065: (8) suffix: tuple[str, str],
1066: (8) grouping: tuple[int, int],
1067: (8) int_prec: tuple[int, int],
1068: (8) frac_prec: tuple[int, int],
1069: (8) exp_prec: tuple[int, int] | None,
1070: (8) exp_plus: bool | None,
1071: (8) number_pattern: str | None = None,
1072: (4) ) -> None:
1073: (8) # Metadata of the decomposed parsed pattern.
1074: (8) self.pattern = pattern
1075: (8) self.prefix = prefix
1076: (8) self.suffix = suffix
1077: (8) self.number_pattern = number_pattern
1078: (8) self.grouping = grouping
1079: (8) self.int_prec = int_prec
1080: (8) self.frac_prec = frac_prec
1081: (8) self.exp_prec = exp_prec
1082: (8) self.exp_plus = exp_plus
1083: (8) self.scale = self.compute_scale()
1084: (4) def __repr__(self) -> str:
1085: (8) return f"<{type(self).__name__} {self.pattern!r}>"
1086: (4) def compute_scale(self) -> Literal[0, 2, 3]:
1087: (8) """Return the scaling factor to apply to the number before rendering.
1088: (8) Auto-set to a factor of 2 or 3 if presence of a ``%`` or ``‰`` sign is
1089: (8) detected in the prefix or suffix of the pattern. Default is to not mess
1090: (8) with the scale at all and keep it to 0.
1091: (8) """
1092: (8) scale = 0
1093: (8) if '%' in ''.join(self.prefix + self.suffix):
1094: (12) scale = 2
1095: (8) elif '‰' in ''.join(self.prefix + self.suffix):
1096: (12) scale = 3
1097: (8) return scale
1098: (4) def scientific_notation_elements(
1099: (8) self,
1100: (8) value: decimal.Decimal,
1101: (8) locale: Locale | str | None,
1102: (8) *,
1103: (8) numbering_system: Literal["default"] | str = "latn",
1104: (4) ) -> tuple[decimal.Decimal, int, str]:
1105: (8) """ Returns normalized scientific notation components of a value.
1106: (8) """
1107: (8) # Normalize value to only have one lead digit.
1108: (8) exp = value.adjusted()
1109: (8) value = value * get_decimal_quantum(exp)
1110: (8) assert value.adjusted() == 0
1111: (8) # Shift exponent and value by the minimum number of leading digits
1112: (8) # imposed by the rendering pattern. And always make that number
1113: (8) # greater or equal to 1.
1114: (8) lead_shift = max([1, min(self.int_prec)]) - 1
1115: (8) exp = exp - lead_shift
1116: (8) value = value * get_decimal_quantum(-lead_shift)
1117: (8) # Get exponent sign symbol.
1118: (8) exp_sign = ''
1119: (8) if exp < 0:
1120: (12) exp_sign = get_minus_sign_symbol(locale, numbering_system=numbering_system)
1121: (8) elif self.exp_plus:
1122: (12) exp_sign = get_plus_sign_symbol(locale, numbering_system=numbering_system)
1123: (8) # Normalize exponent value now that we have the sign.
1124: (8) exp = abs(exp)
1125: (8) return value, exp, exp_sign
1126: (4) def apply(
1127: (8) self,
1128: (8) value: float | decimal.Decimal | str,
1129: (8) locale: Locale | str | None,
1130: (8) currency: str | None = None,
1131: (8) currency_digits: bool = True,
1132: (8) decimal_quantization: bool = True,
1133: (8) force_frac: tuple[int, int] | None = None,
1134: (8) group_separator: bool = True,
1135: (8) *,
1136: (8) numbering_system: Literal["default"] | str = "latn",
1137: (4) ):
1138: (8) """Renders into a string a number following the defined pattern.
1139: (8) Forced decimal quantization is active by default so we'll produce a
1140: (8) number string that is strictly following CLDR pattern definitions.
1141: (8) :param value: The value to format. If this is not a Decimal object,
1142: (22) it will be cast to one.
1143: (8) :type value: decimal.Decimal|float|int
1144: (8) :param locale: The locale to use for formatting.
1145: (8) :type locale: str|babel.core.Locale
1146: (8) :param currency: Which currency, if any, to format as.
1147: (8) :type currency: str|None
1148: (8) :param currency_digits: Whether or not to use the currency's precision.
1149: (32) If false, the pattern's precision is used.
1150: (8) :type currency_digits: bool
1151: (8) :param decimal_quantization: Whether decimal numbers should be forcibly
1152: (37) quantized to produce a formatted output
1153: (37) strictly matching the CLDR definition for
1154: (37) the locale.
1155: (8) :type decimal_quantization: bool
1156: (8) :param force_frac: DEPRECATED - a forced override for `self.frac_prec`
1157: (27) for a single formatting invocation.
1158: (8) :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn".
1159: (33) The special value "default" will use the default numbering system of the locale.
1160: (8) :return: Formatted decimal string.
1161: (8) :rtype: str
1162: (8) :raise UnsupportedNumberingSystemError: If the numbering system is not supported by the locale.
1163: (8) """
1164: (8) if not isinstance(value, decimal.Decimal):
1165: (12) value = decimal.Decimal(str(value))
1166: (8) value = value.scaleb(self.scale)
1167: (8) # Separate the absolute value from its sign.
1168: (8) is_negative = int(value.is_signed())
1169: (8) value = abs(value).normalize()
1170: (8) # Prepare scientific notation metadata.
1171: (8) if self.exp_prec:
1172: (12) value, exp, exp_sign = self.scientific_notation_elements(value, locale, numbering_system=numbering_system)
1173: (8) # Adjust the precision of the fractional part and force it to the
1174: (8) # currency's if necessary.
1175: (8) if force_frac:
1176: (12) # TODO (3.x?): Remove this parameter
1177: (12) warnings.warn(
1178: (16) 'The force_frac parameter to NumberPattern.apply() is deprecated.',
1179: (16) DeprecationWarning,
1180: (16) stacklevel=2,
1181: (12) )
1182: (12) frac_prec = force_frac
1183: (8) elif currency and currency_digits:
1184: (12) frac_prec = (get_currency_precision(currency), ) * 2
1185: (8) else:
1186: (12) frac_prec = self.frac_prec
1187: (8) # Bump decimal precision to the natural precision of the number if it
1188: (8) # exceeds the one we're about to use. This adaptative precision is only
1189: (8) # triggered if the decimal quantization is disabled or if a scientific
1190: (8) # notation pattern has a missing mandatory fractional part (as in the
1191: (8) # default '#E0' pattern). This special case has been extensively
1192: (8) # discussed at https://github.com/python-babel/babel/pull/494#issuecomment-307649969 .
1193: (8) if not decimal_quantization or (self.exp_prec and frac_prec == (0, 0)):
1194: (12) frac_prec = (frac_prec[0], max([frac_prec[1], get_decimal_precision(value)]))
1195: (8) # Render scientific notation.
1196: (8) if self.exp_prec:
1197: (12) number = ''.join([
1198: (16) self._quantize_value(value, locale, frac_prec, group_separator, numbering_system=numbering_system),
1199: (16) get_exponential_symbol(locale, numbering_system=numbering_system),
1200: (16) exp_sign, # type: ignore # exp_sign is always defined here
1201: (16) self._format_int(str(exp), self.exp_prec[0], self.exp_prec[1], locale, numbering_system=numbering_system), # type: ignore # exp is always defined here
1202: (12) ])
1203: (8) # Is it a significant digits pattern?
1204: (8) elif '@' in self.pattern:
1205: (12) text = self._format_significant(value,
1206: (44) self.int_prec[0],
1207: (44) self.int_prec[1])
1208: (12) a, sep, b = text.partition(".")
1209: (12) number = self._format_int(a, 0, 1000, locale, numbering_system=numbering_system)
1210: (12) if sep:
1211: (16) number += get_decimal_symbol(locale, numbering_system=numbering_system) + b
1212: (8) # A normal number pattern.
1213: (8) else:
1214: (12) number = self._quantize_value(value, locale, frac_prec, group_separator, numbering_system=numbering_system)
1215: (8) retval = ''.join([
1216: (12) self.prefix[is_negative],
1217: (12) number if self.number_pattern != '' else '',
1218: (12) self.suffix[is_negative]])
1219: (8) if '¤' in retval and currency is not None:
1220: (12) retval = retval.replace('¤¤¤', get_currency_name(currency, value, locale))
1221: (12) retval = retval.replace('¤¤', currency.upper())
1222: (12) retval = retval.replace('¤', get_currency_symbol(currency, locale))
1223: (8) # remove single quotes around text, except for doubled single quotes
1224: (8) # which are replaced with a single quote
1225: (8) retval = re.sub(r"'([^']*)'", lambda m: m.group(1) or "'", retval)
1226: (8) return retval
1227: (4) #
1228: (4) # This is one tricky piece of code. The idea is to rely as much as possible
1229: (4) # on the decimal module to minimize the amount of code.
1230: (4) #
1231: (4) # Conceptually, the implementation of this method can be summarized in the
1232: (4) # following steps:
1233: (4) #
1234: (4) # - Move or shift the decimal point (i.e. the exponent) so the maximum
1235: (4) # amount of significant digits fall into the integer part (i.e. to the
1236: (4) # left of the decimal point)
1237: (4) #
1238: (4) # - Round the number to the nearest integer, discarding all the fractional
1239: (4) # part which contained extra digits to be eliminated
1240: (4) #
1241: (4) # - Convert the rounded integer to a string, that will contain the final
1242: (4) # sequence of significant digits already trimmed to the maximum
1243: (4) #
1244: (4) # - Restore the original position of the decimal point, potentially
1245: (4) # padding with zeroes on either side
1246: (4) #
1247: (4) def _format_significant(self, value: decimal.Decimal, minimum: int, maximum: int) -> str:
1248: (8) exp = value.adjusted()
1249: (8) scale = maximum - 1 - exp
1250: (8) digits = str(value.scaleb(scale).quantize(decimal.Decimal(1)))
1251: (8) if scale <= 0:
1252: (12) result = digits + '0' * -scale
1253: (8) else:
1254: (12) intpart = digits[:-scale]
1255: (12) i = len(intpart)
1256: (12) j = i + max(minimum - i, 0)
1257: (12) result = "{intpart}.{pad:0<{fill}}{fracpart}{fracextra}".format(
1258: (16) intpart=intpart or '0',
1259: (16) pad='',
1260: (16) fill=-min(exp + 1, 0),
1261: (16) fracpart=digits[i:j],
1262: (16) fracextra=digits[j:].rstrip('0'),
1263: (12) ).rstrip('.')
1264: (8) return result
1265: (4) def _format_int(
1266: (8) self,
1267: (8) value: str,
1268: (8) min: int,
1269: (8) max: int,
1270: (8) locale: Locale | str | None,
1271: (8) *,
1272: (8) numbering_system: Literal["default"] | str,
1273: (4) ) -> str:
1274: (8) width = len(value)
1275: (8) if width < min:
1276: (12) value = '0' * (min - width) + value
1277: (8) gsize = self.grouping[0]
1278: (8) ret = ''
1279: (8) symbol = get_group_symbol(locale, numbering_system=numbering_system)
1280: (8) while len(value) > gsize:
1281: (12) ret = symbol + value[-gsize:] + ret
1282: (12) value = value[:-gsize]
1283: (12) gsize = self.grouping[1]
1284: (8) return value + ret
1285: (4) def _quantize_value(
1286: (8) self,
1287: (8) value: decimal.Decimal,
1288: (8) locale: Locale | str | None,
1289: (8) frac_prec: tuple[int, int],
1290: (8) group_separator: bool,
1291: (8) *,
1292: (8) numbering_system: Literal["default"] | str,
1293: (4) ) -> str:
1294: (8) # If the number is +/-Infinity, we can't quantize it
1295: (8) if value.is_infinite():
1296: (12) return get_infinity_symbol(locale, numbering_system=numbering_system)
1297: (8) quantum = get_decimal_quantum(frac_prec[1])
1298: (8) rounded = value.quantize(quantum)
1299: (8) a, sep, b = f"{rounded:f}".partition(".")
1300: (8) integer_part = a
1301: (8) if group_separator:
1302: (12) integer_part = self._format_int(a, self.int_prec[0], self.int_prec[1], locale, numbering_system=numbering_system)
1303: (8) number = integer_part + self._format_frac(b or '0', locale=locale, force_frac=frac_prec, numbering_system=numbering_system)
1304: (8) return number
1305: (4) def _format_frac(
1306: (8) self,
1307: (8) value: str,
1308: (8) locale: Locale | str | None,
1309: (8) force_frac: tuple[int, int] | None = None,
1310: (8) *,
1311: (8) numbering_system: Literal["default"] | str,
1312: (4) ) -> str:
1313: (8) min, max = force_frac or self.frac_prec
1314: (8) if len(value) < min:
1315: (12) value += ('0' * (min - len(value)))
1316: (8) if max == 0 or (min == 0 and int(value) == 0):
1317: (12) return ''
1318: (8) while len(value) > min and value[-1] == '0':
1319: (12) value = value[:-1]
1320: (8) return get_decimal_symbol(locale, numbering_system=numbering_system) + value
----------------------------------------
File 9 - . \support.py:
1: (0) """
2: (4) babel.support
3: (4) ~~~~~~~~~~~~~
4: (4) Several classes and functions that help with integrating and using Babel
5: (4) in applications.
6: (4) .. note: the code in this module is not used by Babel itself
7: (4) :copyright: (c) 2013-2024 by the Babel Team.
8: (4) :license: BSD, see LICENSE for more details.
9: (0) """
10: (0) from __future__ import annotations
11: (0) import decimal
12: (0) import gettext
13: (0) import locale
14: (0) import os
15: (0) from collections.abc import Iterator
16: (0) from typing import TYPE_CHECKING, Any, Callable, Iterable
17: (0) from babel.core import Locale
18: (0) from babel.dates import format_date, format_datetime, format_time, format_timedelta
19: (0) from babel.numbers import (
20: (4) format_compact_currency,
21: (4) format_compact_decimal,
22: (4) format_currency,
23: (4) format_decimal,
24: (4) format_percent,
25: (4) format_scientific,
26: (0) )
27: (0) if TYPE_CHECKING:
28: (4) from typing_extensions import Literal
29: (4) from babel.dates import _PredefinedTimeFormat
30: (0) class Format:
31: (4) """Wrapper class providing the various date and number formatting functions
32: (4) bound to a specific locale and time-zone.
33: (4) >>> from babel.util import UTC
34: (4) >>> from datetime import date
35: (4) >>> fmt = Format('en_US', UTC)
36: (4) >>> fmt.date(date(2007, 4, 1))
37: (4) u'Apr 1, 2007'
38: (4) >>> fmt.decimal(1.2345)
39: (4) u'1.234'
40: (4) """
41: (4) def __init__(
42: (8) self,
43: (8) locale: Locale | str,
44: (8) tzinfo: datetime.tzinfo | None = None,
45: (8) *,
46: (8) numbering_system: Literal["default"] | str = "latn",
47: (4) ) -> None:
48: (8) """Initialize the formatter.
49: (8) :param locale: the locale identifier or `Locale` instance
50: (8) :param tzinfo: the time-zone info (a `tzinfo` instance or `None`)
51: (8) :param numbering_system: The numbering system used for formatting number symbols. Defaults to "latn".
52: (33) The special value "default" will use the default numbering system of the locale.
53: (8) """
54: (8) self.locale = Locale.parse(locale)
55: (8) self.tzinfo = tzinfo
56: (8) self.numbering_system = numbering_system
57: (4) def date(
58: (8) self,
59: (8) date: datetime.date | None = None,
60: (8) format: _PredefinedTimeFormat | str = 'medium',
61: (4) ) -> str:
62: (8) """Return a date formatted according to the given pattern.
63: (8) >>> from datetime import date
64: (8) >>> fmt = Format('en_US')
65: (8) >>> fmt.date(date(2007, 4, 1))
66: (8) u'Apr 1, 2007'
67: (8) """
68: (8) return format_date(date, format, locale=self.locale)
69: (4) def datetime(
70: (8) self,
71: (8) datetime: datetime.date | None = None,
72: (8) format: _PredefinedTimeFormat | str = 'medium',
73: (4) ) -> str:
74: (8) """Return a date and time formatted according to the given pattern.
75: (8) >>> from datetime import datetime
76: (8) >>> from babel.dates import get_timezone
77: (8) >>> fmt = Format('en_US', tzinfo=get_timezone('US/Eastern'))
78: (8) >>> fmt.datetime(datetime(2007, 4, 1, 15, 30))
79: (8) u'Apr 1, 2007, 11:30:00\u202fAM'
80: (8) """
81: (8) return format_datetime(datetime, format, tzinfo=self.tzinfo, locale=self.locale)
82: (4) def time(
83: (8) self,
84: (8) time: datetime.time | datetime.datetime | None = None,
85: (8) format: _PredefinedTimeFormat | str = 'medium',
86: (4) ) -> str:
87: (8) """Return a time formatted according to the given pattern.
88: (8) >>> from datetime import datetime
89: (8) >>> from babel.dates import get_timezone
90: (8) >>> fmt = Format('en_US', tzinfo=get_timezone('US/Eastern'))
91: (8) >>> fmt.time(datetime(2007, 4, 1, 15, 30))
92: (8) u'11:30:00\u202fAM'
93: (8) """
94: (8) return format_time(time, format, tzinfo=self.tzinfo, locale=self.locale)
95: (4) def timedelta(
96: (8) self,
97: (8) delta: datetime.timedelta | int,
98: (8) granularity: Literal["year", "month", "week", "day", "hour", "minute", "second"] = "second",
99: (8) threshold: float = 0.85,
100: (8) format: Literal["narrow", "short", "medium", "long"] = "long",
101: (8) add_direction: bool = False,
102: (4) ) -> str:
103: (8) """Return a time delta according to the rules of the given locale.
104: (8) >>> from datetime import timedelta
105: (8) >>> fmt = Format('en_US')
106: (8) >>> fmt.timedelta(timedelta(weeks=11))
107: (8) u'3 months'
108: (8) """
109: (8) return format_timedelta(delta, granularity=granularity,
110: (32) threshold=threshold,
111: (32) format=format, add_direction=add_direction,
112: (32) locale=self.locale)
113: (4) def number(self, number: float | decimal.Decimal | str) -> str:
114: (8) """Return an integer number formatted for the locale.
115: (8) >>> fmt = Format('en_US')
116: (8) >>> fmt.number(1099)
117: (8) u'1,099'
118: (8) """
119: (8) return format_decimal(number, locale=self.locale, numbering_system=self.numbering_system)
120: (4) def decimal(self, number: float | decimal.Decimal | str, format: str | None = None) -> str:
121: (8) """Return a decimal number formatted for the locale.
122: (8) >>> fmt = Format('en_US')
123: (8) >>> fmt.decimal(1.2345)
124: (8) u'1.234'
125: (8) """
126: (8) return format_decimal(number, format, locale=self.locale, numbering_system=self.numbering_system)
127: (4) def compact_decimal(
128: (8) self,
129: (8) number: float | decimal.Decimal | str,
130: (8) format_type: Literal['short', 'long'] = 'short',
131: (8) fraction_digits: int = 0,
132: (4) ) -> str:
133: (8) """Return a number formatted in compact form for the locale.
134: (8) >>> fmt = Format('en_US')
135: (8) >>> fmt.compact_decimal(123456789)
136: (8) u'123M'
137: (8) >>> fmt.compact_decimal(1234567, format_type='long', fraction_digits=2)
138: (8) '1.23 million'
139: (8) """
140: (8) return format_compact_decimal(
141: (12) number,
142: (12) format_type=format_type,
143: (12) fraction_digits=fraction_digits,
144: (12) locale=self.locale,
145: (12) numbering_system=self.numbering_system,
146: (8) )
147: (4) def currency(self, number: float | decimal.Decimal | str, currency: str) -> str:
148: (8) """Return a number in the given currency formatted for the locale.
149: (8) """
150: (8) return format_currency(number, currency, locale=self.locale, numbering_system=self.numbering_system)
151: (4) def compact_currency(
152: (8) self,
153: (8) number: float | decimal.Decimal | str,
154: (8) currency: str,
155: (8) format_type: Literal['short'] = 'short',
156: (8) fraction_digits: int = 0,
157: (4) ) -> str:
158: (8) """Return a number in the given currency formatted for the locale
159: (8) using the compact number format.
160: (8) >>> Format('en_US').compact_currency(1234567, "USD", format_type='short', fraction_digits=2)
161: (8) '$1.23M'
162: (8) """
163: (8) return format_compact_currency(number, currency, format_type=format_type, fraction_digits=fraction_digits,
164: (39) locale=self.locale, numbering_system=self.numbering_system)
165: (4) def percent(self, number: float | decimal.Decimal | str, format: str | None = None) -> str:
166: (8) """Return a number formatted as percentage for the locale.
167: (8) >>> fmt = Format('en_US')
168: (8) >>> fmt.percent(0.34)
169: (8) u'34%'
170: (8) """
171: (8) return format_percent(number, format, locale=self.locale, numbering_system=self.numbering_system)
172: (4) def scientific(self, number: float | decimal.Decimal | str) -> str:
173: (8) """Return a number formatted using scientific notation for the locale.
174: (8) """
175: (8) return format_scientific(number, locale=self.locale, numbering_system=self.numbering_system)
176: (0) class LazyProxy:
177: (4) """Class for proxy objects that delegate to a specified function to evaluate
178: (4) the actual object.
179: (4) >>> def greeting(name='world'):
180: (4) ... return 'Hello, %s!' % name
181: (4) >>> lazy_greeting = LazyProxy(greeting, name='Joe')
182: (4) >>> print(lazy_greeting)
183: (4) Hello, Joe!
184: (4) >>> u' ' + lazy_greeting
185: (4) u' Hello, Joe!'
186: (4) >>> u'(%s)' % lazy_greeting
187: (4) u'(Hello, Joe!)'
188: (4) This can be used, for example, to implement lazy translation functions that
189: (4) delay the actual translation until the string is actually used. The
190: (4) rationale for such behavior is that the locale of the user may not always
191: (4) be available. In web applications, you only know the locale when processing
192: (4) a request.
193: (4) The proxy implementation attempts to be as complete as possible, so that
194: (4) the lazy objects should mostly work as expected, for example for sorting:
195: (4) >>> greetings = [
196: (4) ... LazyProxy(greeting, 'world'),
197: (4) ... LazyProxy(greeting, 'Joe'),
198: (4) ... LazyProxy(greeting, 'universe'),
199: (4) ... ]
200: (4) >>> greetings.sort()
201: (4) >>> for greeting in greetings:
202: (4) ... print(greeting)
203: (4) Hello, Joe!
204: (4) Hello, universe!
205: (4) Hello, world!
206: (4) """
207: (4) __slots__ = ['_func', '_args', '_kwargs', '_value', '_is_cache_enabled', '_attribute_error']
208: (4) if TYPE_CHECKING:
209: (8) _func: Callable[..., Any]
210: (8) _args: tuple[Any, ...]
211: (8) _kwargs: dict[str, Any]
212: (8) _is_cache_enabled: bool
213: (8) _value: Any
214: (8) _attribute_error: AttributeError | None
215: (4) def __init__(self, func: Callable[..., Any], *args: Any, enable_cache: bool = True, **kwargs: Any) -> None:
216: (8) # Avoid triggering our own __setattr__ implementation
217: (8) object.__setattr__(self, '_func', func)
218: (8) object.__setattr__(self, '_args', args)
219: (8) object.__setattr__(self, '_kwargs', kwargs)
220: (8) object.__setattr__(self, '_is_cache_enabled', enable_cache)
221: (8) object.__setattr__(self, '_value', None)
222: (8) object.__setattr__(self, '_attribute_error', None)
223: (4) @property
224: (4) def value(self) -> Any:
225: (8) if self._value is None:
226: (12) try:
227: (16) value = self._func(*self._args, **self._kwargs)
228: (12) except AttributeError as error:
229: (16) object.__setattr__(self, '_attribute_error', error)
230: (16) raise
231: (12) if not self._is_cache_enabled:
232: (16) return value
233: (12) object.__setattr__(self, '_value', value)
234: (8) return self._value
235: (4) def __contains__(self, key: object) -> bool:
236: (8) return key in self.value
237: (4) def __bool__(self) -> bool:
238: (8) return bool(self.value)
239: (4) def __dir__(self) -> list[str]:
240: (8) return dir(self.value)
241: (4) def __iter__(self) -> Iterator[Any]:
242: (8) return iter(self.value)
243: (4) def __len__(self) -> int:
244: (8) return len(self.value)
245: (4) def __str__(self) -> str:
246: (8) return str(self.value)
247: (4) def __add__(self, other: object) -> Any:
248: (8) return self.value + other
249: (4) def __radd__(self, other: object) -> Any:
250: (8) return other + self.value
251: (4) def __mod__(self, other: object) -> Any:
252: (8) return self.value % other
253: (4) def __rmod__(self, other: object) -> Any:
254: (8) return other % self.value
255: (4) def __mul__(self, other: object) -> Any:
256: (8) return self.value * other
257: (4) def __rmul__(self, other: object) -> Any:
258: (8) return other * self.value
259: (4) def __call__(self, *args: Any, **kwargs: Any) -> Any:
260: (8) return self.value(*args, **kwargs)
261: (4) def __lt__(self, other: object) -> bool:
262: (8) return self.value < other
263: (4) def __le__(self, other: object) -> bool:
264: (8) return self.value <= other
265: (4) def __eq__(self, other: object) -> bool:
266: (8) return self.value == other
267: (4) def __ne__(self, other: object) -> bool:
268: (8) return self.value != other
269: (4) def __gt__(self, other: object) -> bool:
270: (8) return self.value > other
271: (4) def __ge__(self, other: object) -> bool:
272: (8) return self.value >= other
273: (4) def __delattr__(self, name: str) -> None:
274: (8) delattr(self.value, name)
275: (4) def __getattr__(self, name: str) -> Any:
276: (8) if self._attribute_error is not None:
277: (12) raise self._attribute_error
278: (8) return getattr(self.value, name)
279: (4) def __setattr__(self, name: str, value: Any) -> None:
280: (8) setattr(self.value, name, value)
281: (4) def __delitem__(self, key: Any) -> None:
282: (8) del self.value[key]
283: (4) def __getitem__(self, key: Any) -> Any:
284: (8) return self.value[key]
285: (4) def __setitem__(self, key: Any, value: Any) -> None:
286: (8) self.value[key] = value
287: (4) def __copy__(self) -> LazyProxy:
288: (8) return LazyProxy(
289: (12) self._func,
290: (12) enable_cache=self._is_cache_enabled,
291: (12) *self._args, # noqa: B026
292: (12) **self._kwargs,
293: (8) )
294: (4) def __deepcopy__(self, memo: Any) -> LazyProxy:
295: (8) from copy import deepcopy
296: (8) return LazyProxy(
297: (12) deepcopy(self._func, memo),
298: (12) enable_cache=deepcopy(self._is_cache_enabled, memo),
299: (12) *deepcopy(self._args, memo), # noqa: B026
300: (12) **deepcopy(self._kwargs, memo),
301: (8) )
302: (0) class NullTranslations(gettext.NullTranslations):
303: (4) if TYPE_CHECKING:
304: (8) _info: dict[str, str]
305: (8) _fallback: NullTranslations | None
306: (4) DEFAULT_DOMAIN = None
307: (4) def __init__(self, fp: gettext._TranslationsReader | None = None) -> None:
308: (8) """Initialize a simple translations class which is not backed by a
309: (8) real catalog. Behaves similar to gettext.NullTranslations but also
310: (8) offers Babel's on *gettext methods (e.g. 'dgettext()').
311: (8) :param fp: a file-like object (ignored in this class)
312: (8) """
313: (8) # These attributes are set by gettext.NullTranslations when a catalog
314: (8) # is parsed (fp != None). Ensure that they are always present because
315: (8) # some *gettext methods (including '.gettext()') rely on the attributes.
316: (8) self._catalog: dict[tuple[str, Any] | str, str] = {}
317: (8) self.plural: Callable[[float | decimal.Decimal], int] = lambda n: int(n != 1)
318: (8) super().__init__(fp=fp)
319: (8) self.files = list(filter(None, [getattr(fp, 'name', None)]))
320: (8) self.domain = self.DEFAULT_DOMAIN
321: (8) self._domains: dict[str, NullTranslations] = {}
322: (4) def dgettext(self, domain: str, message: str) -> str:
323: (8) """Like ``gettext()``, but look the message up in the specified
324: (8) domain.
325: (8) """
326: (8) return self._domains.get(domain, self).gettext(message)
327: (4) def ldgettext(self, domain: str, message: str) -> str:
328: (8) """Like ``lgettext()``, but look the message up in the specified
329: (8) domain.
330: (8) """
331: (8) import warnings
332: (8) warnings.warn(
333: (12) 'ldgettext() is deprecated, use dgettext() instead',
334: (12) DeprecationWarning,
335: (12) stacklevel=2,
336: (8) )
337: (8) return self._domains.get(domain, self).lgettext(message)
338: (4) def udgettext(self, domain: str, message: str) -> str:
339: (8) """Like ``ugettext()``, but look the message up in the specified
340: (8) domain.
341: (8) """
342: (8) return self._domains.get(domain, self).ugettext(message)
343: (4) # backward compatibility with 0.9
344: (4) dugettext = udgettext
345: (4) def dngettext(self, domain: str, singular: str, plural: str, num: int) -> str:
346: (8) """Like ``ngettext()``, but look the message up in the specified
347: (8) domain.
348: (8) """
349: (8) return self._domains.get(domain, self).ngettext(singular, plural, num)
350: (4) def ldngettext(self, domain: str, singular: str, plural: str, num: int) -> str:
351: (8) """Like ``lngettext()``, but look the message up in the specified
352: (8) domain.
353: (8) """
354: (8) import warnings
355: (8) warnings.warn(
356: (12) 'ldngettext() is deprecated, use dngettext() instead',
357: (12) DeprecationWarning,
358: (12) stacklevel=2,
359: (8) )
360: (8) return self._domains.get(domain, self).lngettext(singular, plural, num)
361: (4) def udngettext(self, domain: str, singular: str, plural: str, num: int) -> str:
362: (8) """Like ``ungettext()`` but look the message up in the specified
363: (8) domain.
364: (8) """
365: (8) return self._domains.get(domain, self).ungettext(singular, plural, num)
366: (4) # backward compatibility with 0.9
367: (4) dungettext = udngettext
368: (4) # Most of the downwards code, until it gets included in stdlib, from:
369: (4) # https://bugs.python.org/file10036/gettext-pgettext.patch
370: (4) #
371: (4) # The encoding of a msgctxt and a msgid in a .mo file is
372: (4) # msgctxt + "\x04" + msgid (gettext version >= 0.15)
373: (4) CONTEXT_ENCODING = '%s\x04%s'
374: (4) def pgettext(self, context: str, message: str) -> str | object:
375: (8) """Look up the `context` and `message` id in the catalog and return the
376: (8) corresponding message string, as an 8-bit string encoded with the
377: (8) catalog's charset encoding, if known. If there is no entry in the
378: (8) catalog for the `message` id and `context` , and a fallback has been
379: (8) set, the look up is forwarded to the fallback's ``pgettext()``
380: (8) method. Otherwise, the `message` id is returned.
381: (8) """
382: (8) ctxt_msg_id = self.CONTEXT_ENCODING % (context, message)
383: (8) missing = object()
384: (8) tmsg = self._catalog.get(ctxt_msg_id, missing)
385: (8) if tmsg is missing:
386: (12) tmsg = self._catalog.get((ctxt_msg_id, self.plural(1)), missing)
387: (8) if tmsg is not missing:
388: (12) return tmsg
389: (8) if self._fallback:
390: (12) return self._fallback.pgettext(context, message)
391: (8) return message
392: (4) def lpgettext(self, context: str, message: str) -> str | bytes | object:
393: (8) """Equivalent to ``pgettext()``, but the translation is returned in the
394: (8) preferred system encoding, if no other encoding was explicitly set with
395: (8) ``bind_textdomain_codeset()``.
396: (8) """
397: (8) import warnings
398: (8) warnings.warn(
399: (12) 'lpgettext() is deprecated, use pgettext() instead',
400: (12) DeprecationWarning,
401: (12) stacklevel=2,
402: (8) )
403: (8) tmsg = self.pgettext(context, message)
404: (8) encoding = getattr(self, "_output_charset", None) or locale.getpreferredencoding()
405: (8) return tmsg.encode(encoding) if isinstance(tmsg, str) else tmsg
406: (4) def npgettext(self, context: str, singular: str, plural: str, num: int) -> str:
407: (8) """Do a plural-forms lookup of a message id. `singular` is used as the
408: (8) message id for purposes of lookup in the catalog, while `num` is used to
409: (8) determine which plural form to use. The returned message string is an
410: (8) 8-bit string encoded with the catalog's charset encoding, if known.
411: (8) If the message id for `context` is not found in the catalog, and a
412: (8) fallback is specified, the request is forwarded to the fallback's
413: (8) ``npgettext()`` method. Otherwise, when ``num`` is 1 ``singular`` is
414: (8) returned, and ``plural`` is returned in all other cases.
415: (8) """
416: (8) ctxt_msg_id = self.CONTEXT_ENCODING % (context, singular)
417: (8) try:
418: (12) tmsg = self._catalog[(ctxt_msg_id, self.plural(num))]
419: (12) return tmsg
420: (8) except KeyError:
421: (12) if self._fallback:
422: (16) return self._fallback.npgettext(context, singular, plural, num)
423: (12) if num == 1:
424: (16) return singular
425: (12) else:
426: (16) return plural
427: (4) def lnpgettext(self, context: str, singular: str, plural: str, num: int) -> str | bytes:
428: (8) """Equivalent to ``npgettext()``, but the translation is returned in the
429: (8) preferred system encoding, if no other encoding was explicitly set with
430: (8) ``bind_textdomain_codeset()``.
431: (8) """
432: (8) import warnings
433: (8) warnings.warn(
434: (12) 'lnpgettext() is deprecated, use npgettext() instead',
435: (12) DeprecationWarning,
436: (12) stacklevel=2,
437: (8) )
438: (8) ctxt_msg_id = self.CONTEXT_ENCODING % (context, singular)
439: (8) try:
440: (12) tmsg = self._catalog[(ctxt_msg_id, self.plural(num))]
441: (12) encoding = getattr(self, "_output_charset", None) or locale.getpreferredencoding()
442: (12) return tmsg.encode(encoding)
443: (8) except KeyError:
444: (12) if self._fallback:
445: (16) return self._fallback.lnpgettext(context, singular, plural, num)
446: (12) if num == 1:
447: (16) return singular
448: (12) else:
449: (16) return plural
450: (4) def upgettext(self, context: str, message: str) -> str:
451: (8) """Look up the `context` and `message` id in the catalog and return the
452: (8) corresponding message string, as a Unicode string. If there is no entry
453: (8) in the catalog for the `message` id and `context`, and a fallback has
454: (8) been set, the look up is forwarded to the fallback's ``upgettext()``
455: (8) method. Otherwise, the `message` id is returned.
456: (8) """
457: (8) ctxt_message_id = self.CONTEXT_ENCODING % (context, message)
458: (8) missing = object()
459: (8) tmsg = self._catalog.get(ctxt_message_id, missing)
460: (8) if tmsg is missing:
461: (12) if self._fallback:
462: (16) return self._fallback.upgettext(context, message)
463: (12) return str(message)
464: (8) assert isinstance(tmsg, str)
465: (8) return tmsg
466: (4) def unpgettext(self, context: str, singular: str, plural: str, num: int) -> str:
467: (8) """Do a plural-forms lookup of a message id. `singular` is used as the
468: (8) message id for purposes of lookup in the catalog, while `num` is used to
469: (8) determine which plural form to use. The returned message string is a
470: (8) Unicode string.
471: (8) If the message id for `context` is not found in the catalog, and a
472: (8) fallback is specified, the request is forwarded to the fallback's
473: (8) ``unpgettext()`` method. Otherwise, when `num` is 1 `singular` is
474: (8) returned, and `plural` is returned in all other cases.
475: (8) """
476: (8) ctxt_message_id = self.CONTEXT_ENCODING % (context, singular)
477: (8) try:
478: (12) tmsg = self._catalog[(ctxt_message_id, self.plural(num))]
479: (8) except KeyError:
480: (12) if self._fallback:
481: (16) return self._fallback.unpgettext(context, singular, plural, num)
482: (12) tmsg = str(singular) if num == 1 else str(plural)
483: (8) return tmsg
484: (4) def dpgettext(self, domain: str, context: str, message: str) -> str | object:
485: (8) """Like `pgettext()`, but look the message up in the specified
486: (8) `domain`.
487: (8) """
488: (8) return self._domains.get(domain, self).pgettext(context, message)
489: (4) def udpgettext(self, domain: str, context: str, message: str) -> str:
490: (8) """Like `upgettext()`, but look the message up in the specified
491: (8) `domain`.
492: (8) """
493: (8) return self._domains.get(domain, self).upgettext(context, message)
494: (4) # backward compatibility with 0.9
495: (4) dupgettext = udpgettext
496: (4) def ldpgettext(self, domain: str, context: str, message: str) -> str | bytes | object:
497: (8) """Equivalent to ``dpgettext()``, but the translation is returned in the
498: (8) preferred system encoding, if no other encoding was explicitly set with
499: (8) ``bind_textdomain_codeset()``.
500: (8) """
501: (8) return self._domains.get(domain, self).lpgettext(context, message)
502: (4) def dnpgettext(self, domain: str, context: str, singular: str, plural: str, num: int) -> str:
503: (8) """Like ``npgettext``, but look the message up in the specified
504: (8) `domain`.
505: (8) """
506: (8) return self._domains.get(domain, self).npgettext(context, singular,
507: (57) plural, num)
508: (4) def udnpgettext(self, domain: str, context: str, singular: str, plural: str, num: int) -> str:
509: (8) """Like ``unpgettext``, but look the message up in the specified
510: (8) `domain`.
511: (8) """
512: (8) return self._domains.get(domain, self).unpgettext(context, singular,
513: (58) plural, num)
514: (4) # backward compatibility with 0.9
515: (4) dunpgettext = udnpgettext
516: (4) def ldnpgettext(self, domain: str, context: str, singular: str, plural: str, num: int) -> str | bytes:
517: (8) """Equivalent to ``dnpgettext()``, but the translation is returned in
518: (8) the preferred system encoding, if no other encoding was explicitly set
519: (8) with ``bind_textdomain_codeset()``.
520: (8) """
521: (8) return self._domains.get(domain, self).lnpgettext(context, singular,
522: (58) plural, num)
523: (4) ugettext = gettext.NullTranslations.gettext
524: (4) ungettext = gettext.NullTranslations.ngettext
525: (0) class Translations(NullTranslations, gettext.GNUTranslations):
526: (4) """An extended translation catalog class."""
527: (4) DEFAULT_DOMAIN = 'messages'
528: (4) def __init__(self, fp: gettext._TranslationsReader | None = None, domain: str | None = None):
529: (8) """Initialize the translations catalog.
530: (8) :param fp: the file-like object the translation should be read from
531: (8) :param domain: the message domain (default: 'messages')
532: (8) """
533: (8) super().__init__(fp=fp)
534: (8) self.domain = domain or self.DEFAULT_DOMAIN
535: (4) ugettext = gettext.GNUTranslations.gettext
536: (4) ungettext = gettext.GNUTranslations.ngettext
537: (4) @classmethod
538: (4) def load(
539: (8) cls,
540: (8) dirname: str | os.PathLike[str] | None = None,
541: (8) locales: Iterable[str | Locale] | str | Locale | None = None,
542: (8) domain: str | None = None,
543: (4) ) -> NullTranslations:
544: (8) """Load translations from the given directory.
545: (8) :param dirname: the directory containing the ``MO`` files
546: (8) :param locales: the list of locales in order of preference (items in
547: (24) this list can be either `Locale` objects or locale
548: (24) strings)
549: (8) :param domain: the message domain (default: 'messages')
550: (8) """
551: (8) if not domain:
552: (12) domain = cls.DEFAULT_DOMAIN
553: (8) filename = gettext.find(domain, dirname, _locales_to_names(locales))
554: (8) if not filename:
555: (12) return NullTranslations()
556: (8) with open(filename, 'rb') as fp:
557: (12) return cls(fp=fp, domain=domain)
558: (4) def __repr__(self) -> str:
559: (8) version = self._info.get('project-id-version')
560: (8) return f'<{type(self).__name__}: "{version}">'
561: (4) def add(self, translations: Translations, merge: bool = True):
562: (8) """Add the given translations to the catalog.
563: (8) If the domain of the translations is different than that of the
564: (8) current catalog, they are added as a catalog that is only accessible
565: (8) by the various ``d*gettext`` functions.
566: (8) :param translations: the `Translations` instance with the messages to
567: (29) add
568: (8) :param merge: whether translations for message domains that have
569: (22) already been added should be merged with the existing
570: (22) translations
571: (8) """
572: (8) domain = getattr(translations, 'domain', self.DEFAULT_DOMAIN)
573: (8) if merge and domain == self.domain:
574: (12) return self.merge(translations)
575: (8) existing = self._domains.get(domain)
576: (8) if merge and isinstance(existing, Translations):
577: (12) existing.merge(translations)
578: (8) else:
579: (12) translations.add_fallback(self)
580: (12) self._domains[domain] = translations
581: (8) return self
582: (4) def merge(self, translations: Translations):
583: (8) """Merge the given translations into the catalog.
584: (8) Message translations in the specified catalog override any messages
585: (8) with the same identifier in the existing catalog.
586: (8) :param translations: the `Translations` instance with the messages to
587: (29) merge
588: (8) """
589: (8) if isinstance(translations, gettext.GNUTranslations):
590: (12) self._catalog.update(translations._catalog)
591: (12) if isinstance(translations, Translations):
592: (16) self.files.extend(translations.files)
593: (8) return self
594: (0) def _locales_to_names(
595: (4) locales: Iterable[str | Locale] | str | Locale | None,
596: (0) ) -> list[str] | None:
597: (4) """Normalize a `locales` argument to a list of locale names.
598: (4) :param locales: the list of locales in order of preference (items in
599: (20) this list can be either `Locale` objects or locale
600: (20) strings)
601: (4) """
602: (4) if locales is None:
603: (8) return None
604: (4) if isinstance(locales, Locale):
605: (8) return [str(locales)]
606: (4) if isinstance(locales, str):
607: (8) return [locales]
608: (4) return [str(locale) for locale in locales]
----------------------------------------
File 10 - . \languages.py:
1: (0) from __future__ import annotations
2: (0) from babel.core import get_global
3: (0) def get_official_languages(territory: str, regional: bool = False, de_facto: bool = False) -> tuple[str, ...]:
4: (4) """
5: (4) Get the official language(s) for the given territory.
6: (4) The language codes, if any are known, are returned in order of descending popularity.
7: (4) If the `regional` flag is set, then languages which are regionally official are also returned.
8: (4) If the `de_facto` flag is set, then languages which are "de facto" official are also returned.
9: (4) .. warning:: Note that the data is as up to date as the current version of the CLDR used
10: (17) by Babel. If you need scientifically accurate information, use another source!
11: (4) :param territory: Territory code
12: (4) :type territory: str
13: (4) :param regional: Whether to return regionally official languages too
14: (4) :type regional: bool
15: (4) :param de_facto: Whether to return de-facto official languages too
16: (4) :type de_facto: bool
17: (4) :return: Tuple of language codes
18: (4) :rtype: tuple[str]
19: (4) """
20: (4) territory = str(territory).upper()
21: (4) allowed_stati = {"official"}
22: (4) if regional:
23: (8) allowed_stati.add("official_regional")
24: (4) if de_facto:
25: (8) allowed_stati.add("de_facto_official")
26: (4) languages = get_global("territory_languages").get(territory, {})
27: (4) pairs = [
28: (8) (info['population_percent'], language)
29: (8) for language, info in languages.items()
30: (8) if info.get('official_status') in allowed_stati
31: (4) ]
32: (4) pairs.sort(reverse=True)
33: (4) return tuple(lang for _, lang in pairs)
34: (0) def get_territory_language_info(territory: str) -> dict[str, dict[str, float | str | None]]:
35: (4) """
36: (4) Get a dictionary of language information for a territory.
37: (4) The dictionary is keyed by language code; the values are dicts with more information.
38: (4) The following keys are currently known for the values:
39: (4) * `population_percent`: The percentage of the territory's population speaking the
40: (28) language.
41: (4) * `official_status`: An optional string describing the officiality status of the language.
42: (25) Known values are "official", "official_regional" and "de_facto_official".
43: (4) .. warning:: Note that the data is as up to date as the current version of the CLDR used
44: (17) by Babel. If you need scientifically accurate information, use another source!
45: (4) .. note:: Note that the format of the dict returned may change between Babel versions.
46: (4) See https://www.unicode.org/cldr/charts/latest/supplemental/territory_language_information.html
47: (4) :param territory: Territory code
48: (4) :type territory: str
49: (4) :return: Language information dictionary
50: (4) :rtype: dict[str, dict]
51: (4) """
52: (4) territory = str(territory).upper()
53: (4) return get_global("territory_languages").get(territory, {}).copy()
----------------------------------------
File 11 - . \localedata.py:
1: (0) """
2: (4) babel.localedata
3: (4) ~~~~~~~~~~~~~~~~
4: (4) Low-level locale data access.
5: (4) :note: The `Locale` class, which uses this module under the hood, provides a
6: (11) more convenient interface for accessing the locale data.
7: (4) :copyright: (c) 2013-2024 by the Babel Team.
8: (4) :license: BSD, see LICENSE for more details.
9: (0) """
10: (0) from __future__ import annotations
11: (0) import os
12: (0) import pickle
13: (0) import re
14: (0) import sys
15: (0) import threading
16: (0) from collections import abc
17: (0) from collections.abc import Iterator, Mapping, MutableMapping
18: (0) from functools import lru_cache
19: (0) from itertools import chain
20: (0) from typing import Any
21: (0) _cache: dict[str, Any] = {}
22: (0) _cache_lock = threading.RLock()
23: (0) _dirname = os.path.join(os.path.dirname(__file__), 'locale-data')
24: (0) _windows_reserved_name_re = re.compile("^(con|prn|aux|nul|com[0-9]|lpt[0-9])$", re.I)
25: (0) def normalize_locale(name: str) -> str | None:
26: (4) """Normalize a locale ID by stripping spaces and apply proper casing.
27: (4) Returns the normalized locale ID string or `None` if the ID is not
28: (4) recognized.
29: (4) """
30: (4) if not name or not isinstance(name, str):
31: (8) return None
32: (4) name = name.strip().lower()
33: (4) for locale_id in chain.from_iterable([_cache, locale_identifiers()]):
34: (8) if name == locale_id.lower():
35: (12) return locale_id
36: (0) def resolve_locale_filename(name: os.PathLike[str] | str) -> str:
37: (4) """
38: (4) Resolve a locale identifier to a `.dat` path on disk.
39: (4) """
40: (4) # Clean up any possible relative paths.
41: (4) name = os.path.basename(name)
42: (4) # Ensure we're not left with one of the Windows reserved names.
43: (4) if sys.platform == "win32" and _windows_reserved_name_re.match(os.path.splitext(name)[0]):
44: (8) raise ValueError(f"Name {name} is invalid on Windows")
45: (4) # Build the path.
46: (4) return os.path.join(_dirname, f"{name}.dat")
47: (0) def exists(name: str) -> bool:
48: (4) """Check whether locale data is available for the given locale.
49: (4) Returns `True` if it exists, `False` otherwise.
50: (4) :param name: the locale identifier string
51: (4) """
52: (4) if not name or not isinstance(name, str):
53: (8) return False
54: (4) if name in _cache:
55: (8) return True
56: (4) file_found = os.path.exists(resolve_locale_filename(name))
57: (4) return True if file_found else bool(normalize_locale(name))
58: (0) @lru_cache(maxsize=None)
59: (0) def locale_identifiers() -> list[str]:
60: (4) """Return a list of all locale identifiers for which locale data is
61: (4) available.
62: (4) This data is cached after the first invocation.
63: (4) You can clear the cache by calling `locale_identifiers.cache_clear()`.
64: (4) .. versionadded:: 0.8.1
65: (4) :return: a list of locale identifiers (strings)
66: (4) """
67: (4) return [
68: (8) stem
69: (8) for stem, extension in
70: (8) (os.path.splitext(filename) for filename in os.listdir(_dirname))
71: (8) if extension == '.dat' and stem != 'root'
72: (4) ]
73: (0) def _is_non_likely_script(name: str) -> bool:
74: (4) """Return whether the locale is of the form ``lang_Script``,
75: (4) and the script is not the likely script for the language.
76: (4) This implements the behavior of the ``nonlikelyScript`` value of the
77: (4) ``localRules`` attribute for parent locales added in CLDR 45.
78: (4) """
79: (4) from babel.core import get_global, parse_locale
80: (4) try:
81: (8) lang, territory, script, variant, *rest = parse_locale(name)
82: (4) except ValueError:
83: (8) return False
84: (4) if lang and script and not territory and not variant and not rest:
85: (8) likely_subtag = get_global('likely_subtags').get(lang)
86: (8) _, _, likely_script, *_ = parse_locale(likely_subtag)
87: (8) return script != likely_script
88: (4) return False
89: (0) def load(name: os.PathLike[str] | str, merge_inherited: bool = True) -> dict[str, Any]:
90: (4) """Load the locale data for the given locale.
91: (4) The locale data is a dictionary that contains much of the data defined by
92: (4) the Common Locale Data Repository (CLDR). This data is stored as a
93: (4) collection of pickle files inside the ``babel`` package.
94: (4) >>> d = load('en_US')
95: (4) >>> d['languages']['sv']
96: (4) u'Swedish'
97: (4) Note that the results are cached, and subsequent requests for the same
98: (4) locale return the same dictionary:
99: (4) >>> d1 = load('en_US')
100: (4) >>> d2 = load('en_US')
101: (4) >>> d1 is d2
102: (4) True
103: (4) :param name: the locale identifier string (or "root")
104: (4) :param merge_inherited: whether the inherited data should be merged into
105: (28) the data of the requested locale
106: (4) :raise `IOError`: if no locale data file is found for the given locale
107: (22) identifier, or one of the locales it inherits from
108: (4) """
109: (4) name = os.path.basename(name)
110: (4) _cache_lock.acquire()
111: (4) try:
112: (8) data = _cache.get(name)
113: (8) if not data:
114: (12) # Load inherited data
115: (12) if name == 'root' or not merge_inherited:
116: (16) data = {}
117: (12) else:
118: (16) from babel.core import get_global
119: (16) parent = get_global('parent_exceptions').get(name)
120: (16) if not parent:
121: (20) if _is_non_likely_script(name):
122: (24) parent = 'root'
123: (20) else:
124: (24) parts = name.split('_')
125: (24) parent = "root" if len(parts) == 1 else "_".join(parts[:-1])
126: (16) data = load(parent).copy()
127: (12) filename = resolve_locale_filename(name)
128: (12) with open(filename, 'rb') as fileobj:
129: (16) if name != 'root' and merge_inherited:
130: (20) merge(data, pickle.load(fileobj))
131: (16) else:
132: (20) data = pickle.load(fileobj)
133: (12) _cache[name] = data
134: (8) return data
135: (4) finally:
136: (8) _cache_lock.release()
137: (0) def merge(dict1: MutableMapping[Any, Any], dict2: Mapping[Any, Any]) -> None:
138: (4) """Merge the data from `dict2` into the `dict1` dictionary, making copies
139: (4) of nested dictionaries.
140: (4) >>> d = {1: 'foo', 3: 'baz'}
141: (4) >>> merge(d, {1: 'Foo', 2: 'Bar'})
142: (4) >>> sorted(d.items())
143: (4) [(1, 'Foo'), (2, 'Bar'), (3, 'baz')]
144: (4) :param dict1: the dictionary to merge into
145: (4) :param dict2: the dictionary containing the data that should be merged
146: (4) """
147: (4) for key, val2 in dict2.items():
148: (8) if val2 is not None:
149: (12) val1 = dict1.get(key)
150: (12) if isinstance(val2, dict):
151: (16) if val1 is None:
152: (20) val1 = {}
153: (16) if isinstance(val1, Alias):
154: (20) val1 = (val1, val2)
155: (16) elif isinstance(val1, tuple):
156: (20) alias, others = val1
157: (20) others = others.copy()
158: (20) merge(others, val2)
159: (20) val1 = (alias, others)
160: (16) else:
161: (20) val1 = val1.copy()
162: (20) merge(val1, val2)
163: (12) else:
164: (16) val1 = val2
165: (12) dict1[key] = val1
166: (0) class Alias:
167: (4) """Representation of an alias in the locale data.
168: (4) An alias is a value that refers to some other part of the locale data,
169: (4) as specified by the `keys`.
170: (4) """
171: (4) def __init__(self, keys: tuple[str, ...]) -> None:
172: (8) self.keys = tuple(keys)
173: (4) def __repr__(self) -> str:
174: (8) return f"<{type(self).__name__} {self.keys!r}>"
175: (4) def resolve(self, data: Mapping[str | int | None, Any]) -> Mapping[str | int | None, Any]:
176: (8) """Resolve the alias based on the given data.
177: (8) This is done recursively, so if one alias resolves to a second alias,
178: (8) that second alias will also be resolved.
179: (8) :param data: the locale data
180: (8) :type data: `dict`
181: (8) """
182: (8) base = data
183: (8) for key in self.keys:
184: (12) data = data[key]
185: (8) if isinstance(data, Alias):
186: (12) data = data.resolve(base)
187: (8) elif isinstance(data, tuple):
188: (12) alias, others = data
189: (12) data = alias.resolve(base)
190: (8) return data
191: (0) class LocaleDataDict(abc.MutableMapping):
192: (4) """Dictionary wrapper that automatically resolves aliases to the actual
193: (4) values.
194: (4) """
195: (4) def __init__(self, data: MutableMapping[str | int | None, Any], base: Mapping[str | int | None, Any] | None = None):
196: (8) self._data = data
197: (8) if base is None:
198: (12) base = data
199: (8) self.base = base
200: (4) def __len__(self) -> int:
201: (8) return len(self._data)
202: (4) def __iter__(self) -> Iterator[str | int | None]:
203: (8) return iter(self._data)
204: (4) def __getitem__(self, key: str | int | None) -> Any:
205: (8) orig = val = self._data[key]
206: (8) if isinstance(val, Alias): # resolve an alias
207: (12) val = val.resolve(self.base)
208: (8) if isinstance(val, tuple): # Merge a partial dict with an alias
209: (12) alias, others = val
210: (12) val = alias.resolve(self.base).copy()
211: (12) merge(val, others)
212: (8) if isinstance(val, dict): # Return a nested alias-resolving dict
213: (12) val = LocaleDataDict(val, base=self.base)
214: (8) if val is not orig:
215: (12) self._data[key] = val
216: (8) return val
217: (4) def __setitem__(self, key: str | int | None, value: Any) -> None:
218: (8) self._data[key] = value
219: (4) def __delitem__(self, key: str | int | None) -> None:
220: (8) del self._data[key]
221: (4) def copy(self) -> LocaleDataDict:
222: (8) return LocaleDataDict(self._data.copy(), base=self.base)
----------------------------------------
File 12 - .\localtime \__init__.py:
1: (0) """
2: (4) babel.localtime
3: (4) ~~~~~~~~~~~~~~~
4: (4) Babel specific fork of tzlocal to determine the local timezone
5: (4) of the system.
6: (4) :copyright: (c) 2013-2024 by the Babel Team.
7: (4) :license: BSD, see LICENSE for more details.
8: (0) """
9: (0) import datetime
10: (0) import sys
11: (0) if sys.platform == 'win32':
12: (4) from babel.localtime._win32 import _get_localzone
13: (0) else:
14: (4) from babel.localtime._unix import _get_localzone
15: (0) # TODO(3.0): the offset constants are not part of the public API
16: (0) # and should be removed
17: (0) from babel.localtime._fallback import (
18: (4) DSTDIFF, # noqa: F401
19: (4) DSTOFFSET, # noqa: F401
20: (4) STDOFFSET, # noqa: F401
21: (4) ZERO, # noqa: F401
22: (4) _FallbackLocalTimezone,
23: (0) )
24: (0) def get_localzone() -> datetime.tzinfo:
25: (4) """Returns the current underlying local timezone object.
26: (4) Generally this function does not need to be used, it's a
27: (4) better idea to use the :data:`LOCALTZ` singleton instead.
28: (4) """
29: (4) return _get_localzone()
30: (0) try:
31: (4) LOCALTZ = get_localzone()
32: (0) except LookupError:
33: (4) LOCALTZ = _FallbackLocalTimezone()
----------------------------------------
File 13 - .\localtime \_fallback.py:
1: (0) """
2: (4) babel.localtime._fallback
3: (4) ~~~~~~~~~~~~~~~~~~~~~~~~~
4: (4) Emulated fallback local timezone when all else fails.
5: (4) :copyright: (c) 2013-2024 by the Babel Team.
6: (4) :license: BSD, see LICENSE for more details.
7: (0) """
8: (0) import datetime
9: (0) import time
10: (0) STDOFFSET = datetime.timedelta(seconds=-time.timezone)
11: (0) DSTOFFSET = datetime.timedelta(seconds=-time.altzone) if time.daylight else STDOFFSET
12: (0) DSTDIFF = DSTOFFSET - STDOFFSET
13: (0) ZERO = datetime.timedelta(0)
14: (0) class _FallbackLocalTimezone(datetime.tzinfo):
15: (4) def utcoffset(self, dt: datetime.datetime) -> datetime.timedelta:
16: (8) if self._isdst(dt):
17: (12) return DSTOFFSET
18: (8) else:
19: (12) return STDOFFSET
20: (4) def dst(self, dt: datetime.datetime) -> datetime.timedelta:
21: (8) if self._isdst(dt):
22: (12) return DSTDIFF
23: (8) else:
24: (12) return ZERO
25: (4) def tzname(self, dt: datetime.datetime) -> str:
26: (8) return time.tzname[self._isdst(dt)]
27: (4) def _isdst(self, dt: datetime.datetime) -> bool:
28: (8) tt = (dt.year, dt.month, dt.day,
29: (14) dt.hour, dt.minute, dt.second,
30: (14) dt.weekday(), 0, -1)
31: (8) stamp = time.mktime(tt)
32: (8) tt = time.localtime(stamp)
33: (8) return tt.tm_isdst > 0
----------------------------------------
File 14 - .\localtime \_helpers.py:
1: (0) try:
2: (4) import pytz
3: (0) except ModuleNotFoundError:
4: (4) pytz = None
5: (0) try:
6: (4) import zoneinfo
7: (0) except ModuleNotFoundError:
8: (4) zoneinfo = None
9: (0) def _get_tzinfo(tzenv: str):
10: (4) """Get the tzinfo from `zoneinfo` or `pytz`
11: (4) :param tzenv: timezone in the form of Continent/City
12: (4) :return: tzinfo object or None if not found
13: (4) """
14: (4) if pytz:
15: (8) try:
16: (12) return pytz.timezone(tzenv)
17: (8) except pytz.UnknownTimeZoneError:
18: (12) pass
19: (4) else:
20: (8) try:
21: (12) return zoneinfo.ZoneInfo(tzenv)
22: (8) except ValueError as ve:
23: (12) # This is somewhat hacky, but since _validate_tzfile_path() doesn't
24: (12) # raise a specific error type, we'll need to check the message to be
25: (12) # one we know to be from that function.
26: (12) # If so, we pretend it meant that the TZ didn't exist, for the benefit
27: (12) # of `babel.localtime` catching the `LookupError` raised by
28: (12) # `_get_tzinfo_or_raise()`.
29: (12) # See https://github.com/python-babel/babel/issues/1092
30: (12) if str(ve).startswith("ZoneInfo keys "):
31: (16) return None
32: (8) except zoneinfo.ZoneInfoNotFoundError:
33: (12) pass
34: (4) return None
35: (0) def _get_tzinfo_or_raise(tzenv: str):
36: (4) tzinfo = _get_tzinfo(tzenv)
37: (4) if tzinfo is None:
38: (8) raise LookupError(
39: (12) f"Can not find timezone {tzenv}. \n"
40: (12) "Timezone names are generally in the form `Continent/City`.",
41: (8) )
42: (4) return tzinfo
43: (0) def _get_tzinfo_from_file(tzfilename: str):
44: (4) with open(tzfilename, 'rb') as tzfile:
45: (8) if pytz:
46: (12) return pytz.tzfile.build_tzinfo('local', tzfile)
47: (8) else:
48: (12) return zoneinfo.ZoneInfo.from_file(tzfile)
----------------------------------------
File 15 - .\localtime \_unix.py:
1: (0) import datetime
2: (0) import os
3: (0) import re
4: (0) from babel.localtime._helpers import (
5: (4) _get_tzinfo,
6: (4) _get_tzinfo_from_file,
7: (4) _get_tzinfo_or_raise,
8: (0) )
9: (0) def _tz_from_env(tzenv: str) -> datetime.tzinfo:
10: (4) if tzenv[0] == ':':
11: (8) tzenv = tzenv[1:]
12: (4) # TZ specifies a file
13: (4) if os.path.exists(tzenv):
14: (8) return _get_tzinfo_from_file(tzenv)
15: (4) # TZ specifies a zoneinfo zone.
16: (4) return _get_tzinfo_or_raise(tzenv)
17: (0) def _get_localzone(_root: str = '/') -> datetime.tzinfo:
18: (4) """Tries to find the local timezone configuration.
19: (4) This method prefers finding the timezone name and passing that to
20: (4) zoneinfo or pytz, over passing in the localtime file, as in the later
21: (4) case the zoneinfo name is unknown.
22: (4) The parameter _root makes the function look for files like /etc/localtime
23: (4) beneath the _root directory. This is primarily used by the tests.
24: (4) In normal usage you call the function without parameters.
25: (4) """
26: (4) tzenv = os.environ.get('TZ')
27: (4) if tzenv:
28: (8) return _tz_from_env(tzenv)
29: (4) # This is actually a pretty reliable way to test for the local time
30: (4) # zone on operating systems like OS X. On OS X especially this is the
31: (4) # only one that actually works.
32: (4) try:
33: (8) link_dst = os.readlink('/etc/localtime')
34: (4) except OSError:
35: (8) pass
36: (4) else:
37: (8) pos = link_dst.find('/zoneinfo/')
38: (8) if pos >= 0:
39: (12) zone_name = link_dst[pos + 10:]
40: (12) tzinfo = _get_tzinfo(zone_name)
41: (12) if tzinfo is not None:
42: (16) return tzinfo
43: (4) # Now look for distribution specific configuration files
44: (4) # that contain the timezone name.
45: (4) tzpath = os.path.join(_root, 'etc/timezone')
46: (4) if os.path.exists(tzpath):
47: (8) with open(tzpath, 'rb') as tzfile:
48: (12) data = tzfile.read()
49: (12) # Issue #3 in tzlocal was that /etc/timezone was a zoneinfo file.
50: (12) # That's a misconfiguration, but we need to handle it gracefully:
51: (12) if data[:5] != b'TZif2':
52: (16) etctz = data.strip().decode()
53: (16) # Get rid of host definitions and comments:
54: (16) if ' ' in etctz:
55: (20) etctz, dummy = etctz.split(' ', 1)
56: (16) if '#' in etctz:
57: (20) etctz, dummy = etctz.split('#', 1)
58: (16) return _get_tzinfo_or_raise(etctz.replace(' ', '_'))
59: (4) # CentOS has a ZONE setting in /etc/sysconfig/clock,
60: (4) # OpenSUSE has a TIMEZONE setting in /etc/sysconfig/clock and
61: (4) # Gentoo has a TIMEZONE setting in /etc/conf.d/clock
62: (4) # We look through these files for a timezone:
63: (4) timezone_re = re.compile(r'\s*(TIME)?ZONE\s*=\s*"(?P<etctz>.+)"')
64: (4) for filename in ('etc/sysconfig/clock', 'etc/conf.d/clock'):
65: (8) tzpath = os.path.join(_root, filename)
66: (8) if not os.path.exists(tzpath):
67: (12) continue
68: (8) with open(tzpath) as tzfile:
69: (12) for line in tzfile:
70: (16) match = timezone_re.match(line)
71: (16) if match is not None:
72: (20) # We found a timezone
73: (20) etctz = match.group("etctz")
74: (20) return _get_tzinfo_or_raise(etctz.replace(' ', '_'))
75: (4) # No explicit setting existed. Use localtime
76: (4) for filename in ('etc/localtime', 'usr/local/etc/localtime'):
77: (8) tzpath = os.path.join(_root, filename)
78: (8) if not os.path.exists(tzpath):
79: (12) continue
80: (8) return _get_tzinfo_from_file(tzpath)
81: (4) raise LookupError('Can not find any timezone configuration')
----------------------------------------
File 16 - .\localtime \_win32.py:
1: (0) from __future__ import annotations
2: (0) try:
3: (4) import winreg
4: (0) except ImportError:
5: (4) winreg = None
6: (0) import datetime
7: (0) from typing import Any, Dict, cast
8: (0) from babel.core import get_global
9: (0) from babel.localtime._helpers import _get_tzinfo_or_raise
10: (0) # When building the cldr data on windows this module gets imported.
11: (0) # Because at that point there is no global.dat yet this call will
12: (0) # fail. We want to catch it down in that case then and just assume
13: (0) # the mapping was empty.
14: (0) try:
15: (4) tz_names: dict[str, str] = cast(Dict[str, str], get_global('windows_zone_mapping'))
16: (0) except RuntimeError:
17: (4) tz_names = {}
18: (0) def valuestodict(key) -> dict[str, Any]:
19: (4) """Convert a registry key's values to a dictionary."""
20: (4) dict = {}
21: (4) size = winreg.QueryInfoKey(key)[1]
22: (4) for i in range(size):
23: (8) data = winreg.EnumValue(key, i)
24: (8) dict[data[0]] = data[1]
25: (4) return dict
26: (0) def get_localzone_name() -> str:
27: (4) # Windows is special. It has unique time zone names (in several
28: (4) # meanings of the word) available, but unfortunately, they can be
29: (4) # translated to the language of the operating system, so we need to
30: (4) # do a backwards lookup, by going through all time zones and see which
31: (4) # one matches.
32: (4) handle = winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE)
33: (4) TZLOCALKEYNAME = r'SYSTEM\CurrentControlSet\Control\TimeZoneInformation'
34: (4) localtz = winreg.OpenKey(handle, TZLOCALKEYNAME)
35: (4) keyvalues = valuestodict(localtz)
36: (4) localtz.Close()
37: (4) if 'TimeZoneKeyName' in keyvalues:
38: (8) # Windows 7 (and Vista?)
39: (8) # For some reason this returns a string with loads of NUL bytes at
40: (8) # least on some systems. I don't know if this is a bug somewhere, I
41: (8) # just work around it.
42: (8) tzkeyname = keyvalues['TimeZoneKeyName'].split('\x00', 1)[0]
43: (4) else:
44: (8) # Windows 2000 or XP
45: (8) # This is the localized name:
46: (8) tzwin = keyvalues['StandardName']
47: (8) # Open the list of timezones to look up the real name:
48: (8) TZKEYNAME = r'SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones'
49: (8) tzkey = winreg.OpenKey(handle, TZKEYNAME)
50: (8) # Now, match this value to Time Zone information
51: (8) tzkeyname = None
52: (8) for i in range(winreg.QueryInfoKey(tzkey)[0]):
53: (12) subkey = winreg.EnumKey(tzkey, i)
54: (12) sub = winreg.OpenKey(tzkey, subkey)
55: (12) data = valuestodict(sub)
56: (12) sub.Close()
57: (12) if data.get('Std', None) == tzwin:
58: (16) tzkeyname = subkey
59: (16) break
60: (8) tzkey.Close()
61: (8) handle.Close()
62: (4) if tzkeyname is None:
63: (8) raise LookupError('Can not find Windows timezone configuration')
64: (4) timezone = tz_names.get(tzkeyname)
65: (4) if timezone is None:
66: (8) # Nope, that didn't work. Try adding 'Standard Time',
67: (8) # it seems to work a lot of times:
68: (8) timezone = tz_names.get(f"{tzkeyname} Standard Time")
69: (4) # Return what we have.
70: (4) if timezone is None:
71: (8) raise LookupError(f"Can not find timezone {tzkeyname}")
72: (4) return timezone
73: (0) def _get_localzone() -> datetime.tzinfo:
74: (4) if winreg is None:
75: (8) raise LookupError(
76: (12) 'Runtime support not available')
77: (4) return _get_tzinfo_or_raise(get_localzone_name())
----------------------------------------
File 17 - .\messages \mofile.py:
1: (0) """
2: (4) babel.messages.mofile
3: (4) ~~~~~~~~~~~~~~~~~~~~~
4: (4) Writing of files in the ``gettext`` MO (machine object) format.
5: (4) :copyright: (c) 2013-2024 by the Babel Team.
6: (4) :license: BSD, see LICENSE for more details.
7: (0) """
8: (0) from __future__ import annotations
9: (0) import array
10: (0) import struct
11: (0) from typing import TYPE_CHECKING
12: (0) from babel.messages.catalog import Catalog, Message
13: (0) if TYPE_CHECKING:
14: (4) from _typeshed import SupportsRead, SupportsWrite
15: (0) LE_MAGIC: int = 0x950412de
16: (0) BE_MAGIC: int = 0xde120495
17: (0) def read_mo(fileobj: SupportsRead[bytes]) -> Catalog:
18: (4) """Read a binary MO file from the given file-like object and return a
19: (4) corresponding `Catalog` object.
20: (4) :param fileobj: the file-like object to read the MO file from
21: (4) :note: The implementation of this function is heavily based on the
22: (11) ``GNUTranslations._parse`` method of the ``gettext`` module in the
23: (11) standard library.
24: (4) """
25: (4) catalog = Catalog()
26: (4) headers = {}
27: (4) filename = getattr(fileobj, 'name', '')
28: (4) buf = fileobj.read()
29: (4) buflen = len(buf)
30: (4) unpack = struct.unpack
31: (4) # Parse the .mo file header, which consists of 5 little endian 32
32: (4) # bit words.
33: (4) magic = unpack('<I', buf[:4])[0] # Are we big endian or little endian?
34: (4) if magic == LE_MAGIC:
35: (8) version, msgcount, origidx, transidx = unpack('<4I', buf[4:20])
36: (8) ii = '<II'
37: (4) elif magic == BE_MAGIC:
38: (8) version, msgcount, origidx, transidx = unpack('>4I', buf[4:20])
39: (8) ii = '>II'
40: (4) else:
41: (8) raise OSError(0, 'Bad magic number', filename)
42: (4) # Now put all messages from the .mo file buffer into the catalog
43: (4) # dictionary
44: (4) for _i in range(msgcount):
45: (8) mlen, moff = unpack(ii, buf[origidx:origidx + 8])
46: (8) mend = moff + mlen
47: (8) tlen, toff = unpack(ii, buf[transidx:transidx + 8])
48: (8) tend = toff + tlen
49: (8) if mend < buflen and tend < buflen:
50: (12) msg = buf[moff:mend]
51: (12) tmsg = buf[toff:tend]
52: (8) else:
53: (12) raise OSError(0, 'File is corrupt', filename)
54: (8) # See if we're looking at GNU .mo conventions for metadata
55: (8) if mlen == 0:
56: (12) # Catalog description
57: (12) lastkey = key = None
58: (12) for item in tmsg.splitlines():
59: (16) item = item.strip()
60: (16) if not item:
61: (20) continue
62: (16) if b':' in item:
63: (20) key, value = item.split(b':', 1)
64: (20) lastkey = key = key.strip().lower()
65: (20) headers[key] = value.strip()
66: (16) elif lastkey:
67: (20) headers[lastkey] += b'\n' + item
68: (8) if b'\x04' in msg: # context
69: (12) ctxt, msg = msg.split(b'\x04')
70: (8) else:
71: (12) ctxt = None
72: (8) if b'\x00' in msg: # plural forms
73: (12) msg = msg.split(b'\x00')
74: (12) tmsg = tmsg.split(b'\x00')
75: (12) if catalog.charset:
76: (16) msg = [x.decode(catalog.charset) for x in msg]
77: (16) tmsg = [x.decode(catalog.charset) for x in tmsg]
78: (8) else:
79: (12) if catalog.charset:
80: (16) msg = msg.decode(catalog.charset)
81: (16) tmsg = tmsg.decode(catalog.charset)
82: (8) catalog[msg] = Message(msg, tmsg, context=ctxt)
83: (8) # advance to next entry in the seek tables
84: (8) origidx += 8
85: (8) transidx += 8
86: (4) catalog.mime_headers = headers.items()
87: (4) return catalog
88: (0) def write_mo(fileobj: SupportsWrite[bytes], catalog: Catalog, use_fuzzy: bool = False) -> None:
89: (4) """Write a catalog to the specified file-like object using the GNU MO file
90: (4) format.
91: (4) >>> import sys
92: (4) >>> from babel.messages import Catalog
93: (4) >>> from gettext import GNUTranslations
94: (4) >>> from io import BytesIO
95: (4) >>> catalog = Catalog(locale='en_US')
96: (4) >>> catalog.add('foo', 'Voh')
97: (4) <Message ...>
98: (4) >>> catalog.add((u'bar', u'baz'), (u'Bahr', u'Batz'))
99: (4) <Message ...>
100: (4) >>> catalog.add('fuz', 'Futz', flags=['fuzzy'])
101: (4) <Message ...>
102: (4) >>> catalog.add('Fizz', '')
103: (4) <Message ...>
104: (4) >>> catalog.add(('Fuzz', 'Fuzzes'), ('', ''))
105: (4) <Message ...>
106: (4) >>> buf = BytesIO()
107: (4) >>> write_mo(buf, catalog)
108: (4) >>> x = buf.seek(0)
109: (4) >>> translations = GNUTranslations(fp=buf)
110: (4) >>> if sys.version_info[0] >= 3:
111: (4) ... translations.ugettext = translations.gettext
112: (4) ... translations.ungettext = translations.ngettext
113: (4) >>> translations.ugettext('foo')
114: (4) u'Voh'
115: (4) >>> translations.ungettext('bar', 'baz', 1)
116: (4) u'Bahr'
117: (4) >>> translations.ungettext('bar', 'baz', 2)
118: (4) u'Batz'
119: (4) >>> translations.ugettext('fuz')
120: (4) u'fuz'
121: (4) >>> translations.ugettext('Fizz')
122: (4) u'Fizz'
123: (4) >>> translations.ugettext('Fuzz')
124: (4) u'Fuzz'
125: (4) >>> translations.ugettext('Fuzzes')
126: (4) u'Fuzzes'
127: (4) :param fileobj: the file-like object to write to
128: (4) :param catalog: the `Catalog` instance
129: (4) :param use_fuzzy: whether translations marked as "fuzzy" should be included
130: (22) in the output
131: (4) """
132: (4) messages = list(catalog)
133: (4) messages[1:] = [m for m in messages[1:]
134: (20) if m.string and (use_fuzzy or not m.fuzzy)]
135: (4) messages.sort()
136: (4) ids = strs = b''
137: (4) offsets = []
138: (4) for message in messages:
139: (8) # For each string, we need size and file offset. Each string is NUL
140: (8) # terminated; the NUL does not count into the size.
141: (8) if message.pluralizable:
142: (12) msgid = b'\x00'.join([
143: (16) msgid.encode(catalog.charset) for msgid in message.id
144: (12) ])
145: (12) msgstrs = []
146: (12) for idx, string in enumerate(message.string):
147: (16) if not string:
148: (20) msgstrs.append(message.id[min(int(idx), 1)])
149: (16) else:
150: (20) msgstrs.append(string)
151: (12) msgstr = b'\x00'.join([
152: (16) msgstr.encode(catalog.charset) for msgstr in msgstrs
153: (12) ])
154: (8) else:
155: (12) msgid = message.id.encode(catalog.charset)
156: (12) msgstr = message.string.encode(catalog.charset)
157: (8) if message.context:
158: (12) msgid = b'\x04'.join([message.context.encode(catalog.charset),
159: (34) msgid])
160: (8) offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
161: (8) ids += msgid + b'\x00'
162: (8) strs += msgstr + b'\x00'
163: (4) # The header is 7 32-bit unsigned integers. We don't use hash tables, so
164: (4) # the keys start right after the index tables.
165: (4) keystart = 7 * 4 + 16 * len(messages)
166: (4) valuestart = keystart + len(ids)
167: (4) # The string table first has the list of keys, then the list of values.
168: (4) # Each entry has first the size of the string, then the file offset.
169: (4) koffsets = []
170: (4) voffsets = []
171: (4) for o1, l1, o2, l2 in offsets:
172: (8) koffsets += [l1, o1 + keystart]
173: (8) voffsets += [l2, o2 + valuestart]
174: (4) offsets = koffsets + voffsets
175: (4) fileobj.write(struct.pack('Iiiiiii',
176: (30) LE_MAGIC, # magic
177: (30) 0, # version
178: (30) len(messages), # number of entries
179: (30) 7 * 4, # start of key index
180: (30) 7 * 4 + len(messages) * 8, # start of value index
181: (30) 0, 0, # size and offset of hash table
182: (30) ) + array.array.tobytes(array.array("i", offsets)) + ids + strs)
----------------------------------------
File 18 - .\messages \pofile.py:
1: (0) """
2: (4) babel.messages.pofile
3: (4) ~~~~~~~~~~~~~~~~~~~~~
4: (4) Reading and writing of files in the ``gettext`` PO (portable object)
5: (4) format.
6: (4) :copyright: (c) 2013-2024 by the Babel Team.
7: (4) :license: BSD, see LICENSE for more details.
8: (0) """
9: (0) from __future__ import annotations
10: (0) import os
11: (0) import re
12: (0) from collections.abc import Iterable
13: (0) from typing import TYPE_CHECKING
14: (0) from babel.core import Locale
15: (0) from babel.messages.catalog import Catalog, Message
16: (0) from babel.util import _cmp, wraptext
17: (0) if TYPE_CHECKING:
18: (4) from typing import IO, AnyStr
19: (4) from _typeshed import SupportsWrite
20: (4) from typing_extensions import Literal
21: (0) def unescape(string: str) -> str:
22: (4) r"""Reverse `escape` the given string.
23: (4) >>> print(unescape('"Say:\\n \\"hello, world!\\"\\n"'))
24: (4) Say:
25: (6) "hello, world!"
26: (4) <BLANKLINE>
27: (4) :param string: the string to unescape
28: (4) """
29: (4) def replace_escapes(match):
30: (8) m = match.group(1)
31: (8) if m == 'n':
32: (12) return '\n'
33: (8) elif m == 't':
34: (12) return '\t'
35: (8) elif m == 'r':
36: (12) return '\r'
37: (8) # m is \ or "
38: (8) return m
39: (4) return re.compile(r'\\([\\trn"])').sub(replace_escapes, string[1:-1])
40: (0) def denormalize(string: str) -> str:
41: (4) r"""Reverse the normalization done by the `normalize` function.
42: (4) >>> print(denormalize(r'''""
43: (4) ... "Say:\n"
44: (4) ... " \"hello, world!\"\n"'''))
45: (4) Say:
46: (6) "hello, world!"
47: (4) <BLANKLINE>
48: (4) >>> print(denormalize(r'''""
49: (4) ... "Say:\n"
50: (4) ... " \"Lorem ipsum dolor sit "
51: (4) ... "amet, consectetur adipisicing"
52: (4) ... " elit, \"\n"'''))
53: (4) Say:
54: (6) "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
55: (4) <BLANKLINE>
56: (4) :param string: the string to denormalize
57: (4) """
58: (4) if '\n' in string:
59: (8) escaped_lines = string.splitlines()
60: (8) if string.startswith('""'):
61: (12) escaped_lines = escaped_lines[1:]
62: (8) lines = map(unescape, escaped_lines)
63: (8) return ''.join(lines)
64: (4) else:
65: (8) return unescape(string)
66: (0) class PoFileError(Exception):
67: (4) """Exception thrown by PoParser when an invalid po file is encountered."""
68: (4) def __init__(self, message: str, catalog: Catalog, line: str, lineno: int) -> None:
69: (8) super().__init__(f'{message} on {lineno}')
70: (8) self.catalog = catalog
71: (8) self.line = line
72: (8) self.lineno = lineno
73: (0) class _NormalizedString:
74: (4) def __init__(self, *args: str) -> None:
75: (8) self._strs: list[str] = []
76: (8) for arg in args:
77: (12) self.append(arg)
78: (4) def append(self, s: str) -> None:
79: (8) self._strs.append(s.strip())
80: (4) def denormalize(self) -> str:
81: (8) return ''.join(map(unescape, self._strs))
82: (4) def __bool__(self) -> bool:
83: (8) return bool(self._strs)
84: (4) def __repr__(self) -> str:
85: (8) return os.linesep.join(self._strs)
86: (4) def __cmp__(self, other: object) -> int:
87: (8) if not other:
88: (12) return 1
89: (8) return _cmp(str(self), str(other))
90: (4) def __gt__(self, other: object) -> bool:
91: (8) return self.__cmp__(other) > 0
92: (4) def __lt__(self, other: object) -> bool:
93: (8) return self.__cmp__(other) < 0
94: (4) def __ge__(self, other: object) -> bool:
95: (8) return self.__cmp__(other) >= 0
96: (4) def __le__(self, other: object) -> bool:
97: (8) return self.__cmp__(other) <= 0
98: (4) def __eq__(self, other: object) -> bool:
99: (8) return self.__cmp__(other) == 0
100: (4) def __ne__(self, other: object) -> bool:
101: (8) return self.__cmp__(other) != 0
102: (0) class PoFileParser:
103: (4) """Support class to read messages from a ``gettext`` PO (portable object) file
104: (4) and add them to a `Catalog`
105: (4) See `read_po` for simple cases.
106: (4) """
107: (4) _keywords = [
108: (8) 'msgid',
109: (8) 'msgstr',
110: (8) 'msgctxt',
111: (8) 'msgid_plural',
112: (4) ]
113: (4) def __init__(self, catalog: Catalog, ignore_obsolete: bool = False, abort_invalid: bool = False) -> None:
114: (8) self.catalog = catalog
115: (8) self.ignore_obsolete = ignore_obsolete
116: (8) self.counter = 0
117: (8) self.offset = 0
118: (8) self.abort_invalid = abort_invalid
119: (8) self._reset_message_state()
120: (4) def _reset_message_state(self) -> None:
121: (8) self.messages = []
122: (8) self.translations = []
123: (8) self.locations = []
124: (8) self.flags = []
125: (8) self.user_comments = []
126: (8) self.auto_comments = []
127: (8) self.context = None
128: (8) self.obsolete = False
129: (8) self.in_msgid = False
130: (8) self.in_msgstr = False
131: (8) self.in_msgctxt = False
132: (4) def _add_message(self) -> None:
133: (8) """
134: (8) Add a message to the catalog based on the current parser state and
135: (8) clear the state ready to process the next message.
136: (8) """
137: (8) self.translations.sort()
138: (8) if len(self.messages) > 1:
139: (12) msgid = tuple(m.denormalize() for m in self.messages)
140: (8) else:
141: (12) msgid = self.messages[0].denormalize()
142: (8) if isinstance(msgid, (list, tuple)):
143: (12) string = ['' for _ in range(self.catalog.num_plurals)]
144: (12) for idx, translation in self.translations:
145: (16) if idx >= self.catalog.num_plurals:
146: (20) self._invalid_pofile("", self.offset, "msg has more translations than num_plurals of catalog")
147: (20) continue
148: (16) string[idx] = translation.denormalize()
149: (12) string = tuple(string)
150: (8) else:
151: (12) string = self.translations[0][1].denormalize()
152: (8) msgctxt = self.context.denormalize() if self.context else None
153: (8) message = Message(msgid, string, list(self.locations), set(self.flags),
154: (26) self.auto_comments, self.user_comments, lineno=self.offset + 1,
155: (26) context=msgctxt)
156: (8) if self.obsolete:
157: (12) if not self.ignore_obsolete:
158: (16) self.catalog.obsolete[msgid] = message
159: (8) else:
160: (12) self.catalog[msgid] = message
161: (8) self.counter += 1
162: (8) self._reset_message_state()
163: (4) def _finish_current_message(self) -> None:
164: (8) if self.messages:
165: (12) self._add_message()
166: (4) def _process_message_line(self, lineno, line, obsolete=False) -> None:
167: (8) if line.startswith('"'):
168: (12) self._process_string_continuation_line(line, lineno)
169: (8) else:
170: (12) self._process_keyword_line(lineno, line, obsolete)
171: (4) def _process_keyword_line(self, lineno, line, obsolete=False) -> None:
172: (8) for keyword in self._keywords:
173: (12) try:
174: (16) if line.startswith(keyword) and line[len(keyword)] in [' ', '[']:
175: (20) arg = line[len(keyword):]
176: (20) break
177: (12) except IndexError:
178: (16) self._invalid_pofile(line, lineno, "Keyword must be followed by a string")
179: (8) else:
180: (12) self._invalid_pofile(line, lineno, "Start of line didn't match any expected keyword.")
181: (12) return
182: (8) if keyword in ['msgid', 'msgctxt']:
183: (12) self._finish_current_message()
184: (8) self.obsolete = obsolete
185: (8) # The line that has the msgid is stored as the offset of the msg
186: (8) # should this be the msgctxt if it has one?
187: (8) if keyword == 'msgid':
188: (12) self.offset = lineno
189: (8) if keyword in ['msgid', 'msgid_plural']:
190: (12) self.in_msgctxt = False
191: (12) self.in_msgid = True
192: (12) self.messages.append(_NormalizedString(arg))
193: (8) elif keyword == 'msgstr':
194: (12) self.in_msgid = False
195: (12) self.in_msgstr = True
196: (12) if arg.startswith('['):
197: (16) idx, msg = arg[1:].split(']', 1)
198: (16) self.translations.append([int(idx), _NormalizedString(msg)])
199: (12) else:
200: (16) self.translations.append([0, _NormalizedString(arg)])
201: (8) elif keyword == 'msgctxt':
202: (12) self.in_msgctxt = True
203: (12) self.context = _NormalizedString(arg)
204: (4) def _process_string_continuation_line(self, line, lineno) -> None:
205: (8) if self.in_msgid:
206: (12) s = self.messages[-1]
207: (8) elif self.in_msgstr:
208: (12) s = self.translations[-1][1]
209: (8) elif self.in_msgctxt:
210: (12) s = self.context
211: (8) else:
212: (12) self._invalid_pofile(line, lineno, "Got line starting with \" but not in msgid, msgstr or msgctxt")
213: (12) return
214: (8) s.append(line)
215: (4) def _process_comment(self, line) -> None:
216: (8) self._finish_current_message()
217: (8) if line[1:].startswith(':'):
218: (12) for location in line[2:].lstrip().split():
219: (16) pos = location.rfind(':')
220: (16) if pos >= 0:
221: (20) try:
222: (24) lineno = int(location[pos + 1:])
223: (20) except ValueError:
224: (24) continue
225: (20) self.locations.append((location[:pos], lineno))
226: (16) else:
227: (20) self.locations.append((location, None))
228: (8) elif line[1:].startswith(','):
229: (12) for flag in line[2:].lstrip().split(','):
230: (16) self.flags.append(flag.strip())
231: (8) elif line[1:].startswith('.'):
232: (12) # These are called auto-comments
233: (12) comment = line[2:].strip()
234: (12) if comment: # Just check that we're not adding empty comments
235: (16) self.auto_comments.append(comment)
236: (8) else:
237: (12) # These are called user comments
238: (12) self.user_comments.append(line[1:].strip())
239: (4) def parse(self, fileobj: IO[AnyStr] | Iterable[AnyStr]) -> None:
240: (8) """
241: (8) Reads from the file-like object `fileobj` and adds any po file
242: (8) units found in it to the `Catalog` supplied to the constructor.
243: (8) """
244: (8) for lineno, line in enumerate(fileobj):
245: (12) line = line.strip()
246: (12) if not isinstance(line, str):
247: (16) line = line.decode(self.catalog.charset)
248: (12) if not line:
249: (16) continue
250: (12) if line.startswith('#'):
251: (16) if line[1:].startswith('~'):
252: (20) self._process_message_line(lineno, line[2:].lstrip(), obsolete=True)
253: (16) else:
254: (20) self._process_comment(line)
255: (12) else:
256: (16) self._process_message_line(lineno, line)
257: (8) self._finish_current_message()
258: (8) # No actual messages found, but there was some info in comments, from which
259: (8) # we'll construct an empty header message
260: (8) if not self.counter and (self.flags or self.user_comments or self.auto_comments):
261: (12) self.messages.append(_NormalizedString('""'))
262: (12) self.translations.append([0, _NormalizedString('""')])
263: (12) self._add_message()
264: (4) def _invalid_pofile(self, line, lineno, msg) -> None:
265: (8) assert isinstance(line, str)
266: (8) if self.abort_invalid:
267: (12) raise PoFileError(msg, self.catalog, line, lineno)
268: (8) print("WARNING:", msg)
269: (8) print(f"WARNING: Problem on line {lineno + 1}: {line!r}")
270: (0) def read_po(
271: (4) fileobj: IO[AnyStr] | Iterable[AnyStr],
272: (4) locale: str | Locale | None = None,
273: (4) domain: str | None = None,
274: (4) ignore_obsolete: bool = False,
275: (4) charset: str | None = None,
276: (4) abort_invalid: bool = False,
277: (0) ) -> Catalog:
278: (4) """Read messages from a ``gettext`` PO (portable object) file from the given
279: (4) file-like object (or an iterable of lines) and return a `Catalog`.
280: (4) >>> from datetime import datetime
281: (4) >>> from io import StringIO
282: (4) >>> buf = StringIO('''
283: (4) ... #: main.py:1
284: (4) ... #, fuzzy, python-format
285: (4) ... msgid "foo %(name)s"
286: (4) ... msgstr "quux %(name)s"
287: (4) ...
288: (4) ... # A user comment
289: (4) ... #. An auto comment
290: (4) ... #: main.py:3
291: (4) ... msgid "bar"
292: (4) ... msgid_plural "baz"
293: (4) ... msgstr[0] "bar"
294: (4) ... msgstr[1] "baaz"
295: (4) ... ''')
296: (4) >>> catalog = read_po(buf)
297: (4) >>> catalog.revision_date = datetime(2007, 4, 1)
298: (4) >>> for message in catalog:
299: (4) ... if message.id:
300: (4) ... print((message.id, message.string))
301: (4) ... print(' ', (message.locations, sorted(list(message.flags))))
302: (4) ... print(' ', (message.user_comments, message.auto_comments))
303: (4) (u'foo %(name)s', u'quux %(name)s')
304: (6) ([(u'main.py', 1)], [u'fuzzy', u'python-format'])
305: (6) ([], [])
306: (4) ((u'bar', u'baz'), (u'bar', u'baaz'))
307: (6) ([(u'main.py', 3)], [])
308: (6) ([u'A user comment'], [u'An auto comment'])
309: (4) .. versionadded:: 1.0
310: (7) Added support for explicit charset argument.
311: (4) :param fileobj: the file-like object (or iterable of lines) to read the PO file from
312: (4) :param locale: the locale identifier or `Locale` object, or `None`
313: (19) if the catalog is not bound to a locale (which basically
314: (19) means it's a template)
315: (4) :param domain: the message domain
316: (4) :param ignore_obsolete: whether to ignore obsolete messages in the input
317: (4) :param charset: the character set of the catalog.
318: (4) :param abort_invalid: abort read if po file is invalid
319: (4) """
320: (4) catalog = Catalog(locale=locale, domain=domain, charset=charset)
321: (4) parser = PoFileParser(catalog, ignore_obsolete, abort_invalid=abort_invalid)
322: (4) parser.parse(fileobj)
323: (4) return catalog
324: (0) WORD_SEP = re.compile('('
325: (22) r'\s+|' # any whitespace
326: (22) r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
327: (22) r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash
328: (22) ')')
329: (0) def escape(string: str) -> str:
330: (4) r"""Escape the given string so that it can be included in double-quoted
331: (4) strings in ``PO`` files.
332: (4) >>> escape('''Say:
333: (4) ... "hello, world!"
334: (4) ... ''')
335: (4) '"Say:\\n \\"hello, world!\\"\\n"'
336: (4) :param string: the string to escape
337: (4) """
338: (4) return '"%s"' % string.replace('\\', '\\\\') \
339: (26) .replace('\t', '\\t') \
340: (26) .replace('\r', '\\r') \
341: (26) .replace('\n', '\\n') \
342: (26) .replace('\"', '\\"')
343: (0) def normalize(string: str, prefix: str = '', width: int = 76) -> str:
344: (4) r"""Convert a string into a format that is appropriate for .po files.
345: (4) >>> print(normalize('''Say:
346: (4) ... "hello, world!"
347: (4) ... ''', width=None))
348: (4) ""
349: (4) "Say:\n"
350: (4) " \"hello, world!\"\n"
351: (4) >>> print(normalize('''Say:
352: (4) ... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
353: (4) ... ''', width=32))
354: (4) ""
355: (4) "Say:\n"
356: (4) " \"Lorem ipsum dolor sit "
357: (4) "amet, consectetur adipisicing"
358: (4) " elit, \"\n"
359: (4) :param string: the string to normalize
360: (4) :param prefix: a string that should be prepended to every line
361: (4) :param width: the maximum line width; use `None`, 0, or a negative number
362: (18) to completely disable line wrapping
363: (4) """
364: (4) if width and width > 0:
365: (8) prefixlen = len(prefix)
366: (8) lines = []
367: (8) for line in string.splitlines(True):
368: (12) if len(escape(line)) + prefixlen > width:
369: (16) chunks = WORD_SEP.split(line)
370: (16) chunks.reverse()
371: (16) while chunks:
372: (20) buf = []
373: (20) size = 2
374: (20) while chunks:
375: (24) length = len(escape(chunks[-1])) - 2 + prefixlen
376: (24) if size + length < width:
377: (28) buf.append(chunks.pop())
378: (28) size += length
379: (24) else:
380: (28) if not buf:
381: (32) # handle long chunks by putting them on a
382: (32) # separate line
383: (32) buf.append(chunks.pop())
384: (28) break
385: (20) lines.append(''.join(buf))
386: (12) else:
387: (16) lines.append(line)
388: (4) else:
389: (8) lines = string.splitlines(True)
390: (4) if len(lines) <= 1:
391: (8) return escape(string)
392: (4) # Remove empty trailing line
393: (4) if lines and not lines[-1]:
394: (8) del lines[-1]
395: (8) lines[-1] += '\n'
396: (4) return '""\n' + '\n'.join([(prefix + escape(line)) for line in lines])
397: (0) def write_po(
398: (4) fileobj: SupportsWrite[bytes],
399: (4) catalog: Catalog,
400: (4) width: int = 76,
401: (4) no_location: bool = False,
402: (4) omit_header: bool = False,
403: (4) sort_output: bool = False,
404: (4) sort_by_file: bool = False,
405: (4) ignore_obsolete: bool = False,
406: (4) include_previous: bool = False,
407: (4) include_lineno: bool = True,
408: (0) ) -> None:
409: (4) r"""Write a ``gettext`` PO (portable object) template file for a given
410: (4) message catalog to the provided file-like object.
411: (4) >>> catalog = Catalog()
412: (4) >>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)],
413: (4) ... flags=('fuzzy',))
414: (4) <Message...>
415: (4) >>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)])
416: (4) <Message...>
417: (4) >>> from io import BytesIO
418: (4) >>> buf = BytesIO()
419: (4) >>> write_po(buf, catalog, omit_header=True)
420: (4) >>> print(buf.getvalue().decode("utf8"))
421: (4) #: main.py:1
422: (4) #, fuzzy, python-format
423: (4) msgid "foo %(name)s"
424: (4) msgstr ""
425: (4) <BLANKLINE>
426: (4) #: main.py:3
427: (4) msgid "bar"
428: (4) msgid_plural "baz"
429: (4) msgstr[0] ""
430: (4) msgstr[1] ""
431: (4) <BLANKLINE>
432: (4) <BLANKLINE>
433: (4) :param fileobj: the file-like object to write to
434: (4) :param catalog: the `Catalog` instance
435: (4) :param width: the maximum line width for the generated output; use `None`,
436: (18) 0, or a negative number to completely disable line wrapping
437: (4) :param no_location: do not emit a location comment for every message
438: (4) :param omit_header: do not include the ``msgid ""`` entry at the top of the
439: (24) output
440: (4) :param sort_output: whether to sort the messages in the output by msgid
441: (4) :param sort_by_file: whether to sort the messages in the output by their
442: (25) locations
443: (4) :param ignore_obsolete: whether to ignore obsolete messages and not include
444: (28) them in the output; by default they are included as
445: (28) comments
446: (4) :param include_previous: include the old msgid as a comment when
447: (29) updating the catalog
448: (4) :param include_lineno: include line number in the location comment
449: (4) """
450: (4) sort_by = None
451: (4) if sort_output:
452: (8) sort_by = "message"
453: (4) elif sort_by_file:
454: (8) sort_by = "location"
455: (4) for line in generate_po(
456: (8) catalog,
457: (8) ignore_obsolete=ignore_obsolete,
458: (8) include_lineno=include_lineno,
459: (8) include_previous=include_previous,
460: (8) no_location=no_location,
461: (8) omit_header=omit_header,
462: (8) sort_by=sort_by,
463: (8) width=width,
464: (4) ):
465: (8) if isinstance(line, str):
466: (12) line = line.encode(catalog.charset, 'backslashreplace')
467: (8) fileobj.write(line)
468: (0) def generate_po(
469: (4) catalog: Catalog,
470: (4) *,
471: (4) ignore_obsolete: bool = False,
472: (4) include_lineno: bool = True,
473: (4) include_previous: bool = False,
474: (4) no_location: bool = False,
475: (4) omit_header: bool = False,
476: (4) sort_by: Literal["message", "location"] | None = None,
477: (4) width: int = 76,
478: (0) ) -> Iterable[str]:
479: (4) r"""Yield text strings representing a ``gettext`` PO (portable object) file.
480: (4) See `write_po()` for a more detailed description.
481: (4) """
482: (4) # xgettext always wraps comments even if --no-wrap is passed;
483: (4) # provide the same behaviour
484: (4) comment_width = width if width and width > 0 else 76
485: (4) def _format_comment(comment, prefix=''):
486: (8) for line in wraptext(comment, comment_width):
487: (12) yield f"#{prefix} {line.strip()}\n"
488: (4) def _format_message(message, prefix=''):
489: (8) if isinstance(message.id, (list, tuple)):
490: (12) if message.context:
491: (16) yield f"{prefix}msgctxt {normalize(message.context, prefix=prefix, width=width)}\n"
492: (12) yield f"{prefix}msgid {normalize(message.id[0], prefix=prefix, width=width)}\n"
493: (12) yield f"{prefix}msgid_plural {normalize(message.id[1], prefix=prefix, width=width)}\n"
494: (12) for idx in range(catalog.num_plurals):
495: (16) try:
496: (20) string = message.string[idx]
497: (16) except IndexError:
498: (20) string = ''
499: (16) yield f"{prefix}msgstr[{idx:d}] {normalize(string, prefix=prefix, width=width)}\n"
500: (8) else:
501: (12) if message.context:
502: (16) yield f"{prefix}msgctxt {normalize(message.context, prefix=prefix, width=width)}\n"
503: (12) yield f"{prefix}msgid {normalize(message.id, prefix=prefix, width=width)}\n"
504: (12) yield f"{prefix}msgstr {normalize(message.string or '', prefix=prefix, width=width)}\n"
505: (4) for message in _sort_messages(catalog, sort_by=sort_by):
506: (8) if not message.id: # This is the header "message"
507: (12) if omit_header:
508: (16) continue
509: (12) comment_header = catalog.header_comment
510: (12) if width and width > 0:
511: (16) lines = []
512: (16) for line in comment_header.splitlines():
513: (20) lines += wraptext(line, width=width,
514: (38) subsequent_indent='# ')
515: (16) comment_header = '\n'.join(lines)
516: (12) yield f"{comment_header}\n"
517: (8) for comment in message.user_comments:
518: (12) yield from _format_comment(comment)
519: (8) for comment in message.auto_comments:
520: (12) yield from _format_comment(comment, prefix='.')
521: (8) if not no_location:
522: (12) locs = []
523: (12) # sort locations by filename and lineno.
524: (12) # if there's no <int> as lineno, use `-1`.
525: (12) # if no sorting possible, leave unsorted.
526: (12) # (see issue #606)
527: (12) try:
528: (16) locations = sorted(message.locations,
529: (35) key=lambda x: (x[0], isinstance(x[1], int) and x[1] or -1))
530: (12) except TypeError: # e.g. "TypeError: unorderable types: NoneType() < int()"
531: (16) locations = message.locations
532: (12) for filename, lineno in locations:
533: (16) location = filename.replace(os.sep, '/')
534: (16) if lineno and include_lineno:
535: (20) location = f"{location}:{lineno:d}"
536: (16) if location not in locs:
537: (20) locs.append(location)
538: (12) yield from _format_comment(' '.join(locs), prefix=':')
539: (8) if message.flags:
540: (12) yield f"#{', '.join(['', *sorted(message.flags)])}\n"
541: (8) if message.previous_id and include_previous:
542: (12) yield from _format_comment(
543: (16) f'msgid {normalize(message.previous_id[0], width=width)}',
544: (16) prefix='|',
545: (12) )
546: (12) if len(message.previous_id) > 1:
547: (16) norm_previous_id = normalize(message.previous_id[1], width=width)
548: (16) yield from _format_comment(f'msgid_plural {norm_previous_id}', prefix='|')
549: (8) yield from _format_message(message)
550: (8) yield '\n'
551: (4) if not ignore_obsolete:
552: (8) for message in _sort_messages(
553: (12) catalog.obsolete.values(),
554: (12) sort_by=sort_by,
555: (8) ):
556: (12) for comment in message.user_comments:
557: (16) yield from _format_comment(comment)
558: (12) yield from _format_message(message, prefix='#~ ')
559: (12) yield '\n'
560: (0) def _sort_messages(messages: Iterable[Message], sort_by: Literal["message", "location"] | None) -> list[Message]:
561: (4) """
562: (4) Sort the given message iterable by the given criteria.
563: (4) Always returns a list.
564: (4) :param messages: An iterable of Messages.
565: (4) :param sort_by: Sort by which criteria? Options are `message` and `location`.
566: (4) :return: list[Message]
567: (4) """
568: (4) messages = list(messages)
569: (4) if sort_by == "message":
570: (8) messages.sort()
571: (4) elif sort_by == "location":
572: (8) messages.sort(key=lambda m: m.locations)
573: (4) return messages
----------------------------------------
File 19 - .\messages \catalog.py:
1: (0) """
2: (4) babel.messages.catalog
3: (4) ~~~~~~~~~~~~~~~~~~~~~~
4: (4) Data structures for message catalogs.
5: (4) :copyright: (c) 2013-2024 by the Babel Team.
6: (4) :license: BSD, see LICENSE for more details.
7: (0) """
8: (0) from __future__ import annotations
9: (0) import datetime
10: (0) import re
11: (0) from collections import OrderedDict
12: (0) from collections.abc import Iterable, Iterator
13: (0) from copy import copy
14: (0) from difflib import SequenceMatcher
15: (0) from email import message_from_string
16: (0) from heapq import nlargest
17: (0) from typing import TYPE_CHECKING
18: (0) from babel import __version__ as VERSION
19: (0) from babel.core import Locale, UnknownLocaleError
20: (0) from babel.dates import format_datetime
21: (0) from babel.messages.plurals import get_plural
22: (0) from babel.util import LOCALTZ, FixedOffsetTimezone, _cmp, distinct
23: (0) if TYPE_CHECKING:
24: (4) from typing_extensions import TypeAlias
25: (4) _MessageID: TypeAlias = str | tuple[str, ...] | list[str]
26: (0) __all__ = ['Message', 'Catalog', 'TranslationError']
27: (0) def get_close_matches(word, possibilities, n=3, cutoff=0.6):
28: (4) """A modified version of ``difflib.get_close_matches``.
29: (4) It just passes ``autojunk=False`` to the ``SequenceMatcher``, to work
30: (4) around https://github.com/python/cpython/issues/90825.
31: (4) """
32: (4) if not n > 0: # pragma: no cover
33: (8) raise ValueError(f"n must be > 0: {n!r}")
34: (4) if not 0.0 <= cutoff <= 1.0: # pragma: no cover
35: (8) raise ValueError(f"cutoff must be in [0.0, 1.0]: {cutoff!r}")
36: (4) result = []
37: (4) s = SequenceMatcher(autojunk=False) # only line changed from difflib.py
38: (4) s.set_seq2(word)
39: (4) for x in possibilities:
40: (8) s.set_seq1(x)
41: (8) if s.real_quick_ratio() >= cutoff and \
42: (11) s.quick_ratio() >= cutoff and \
43: (11) s.ratio() >= cutoff:
44: (12) result.append((s.ratio(), x))
45: (4) # Move the best scorers to head of list
46: (4) result = nlargest(n, result)
47: (4) # Strip scores for the best n matches
48: (4) return [x for score, x in result]
49: (0) PYTHON_FORMAT = re.compile(r'''
50: (4) \%
51: (8) (?:\(([\w]*)\))?
52: (8) (
53: (12) [-#0\ +]?(?:\*|[\d]+)?
54: (12) (?:\.(?:\*|[\d]+))?
55: (12) [hlL]?
56: (8) )
57: (8) ([diouxXeEfFgGcrs%])
58: (0) ''', re.VERBOSE)
59: (0) def _parse_datetime_header(value: str) -> datetime.datetime:
60: (4) match = re.match(r'^(?P<datetime>.*?)(?P<tzoffset>[+-]\d{4})?$', value)
61: (4) dt = datetime.datetime.strptime(match.group('datetime'), '%Y-%m-%d %H:%M')
62: (4) # Separate the offset into a sign component, hours, and # minutes
63: (4) tzoffset = match.group('tzoffset')
64: (4) if tzoffset is not None:
65: (8) plus_minus_s, rest = tzoffset[0], tzoffset[1:]
66: (8) hours_offset_s, mins_offset_s = rest[:2], rest[2:]
67: (8) # Make them all integers
68: (8) plus_minus = int(f"{plus_minus_s}1")
69: (8) hours_offset = int(hours_offset_s)
70: (8) mins_offset = int(mins_offset_s)
71: (8) # Calculate net offset
72: (8) net_mins_offset = hours_offset * 60
73: (8) net_mins_offset += mins_offset
74: (8) net_mins_offset *= plus_minus
75: (8) # Create an offset object
76: (8) tzoffset = FixedOffsetTimezone(net_mins_offset)
77: (8) # Store the offset in a datetime object
78: (8) dt = dt.replace(tzinfo=tzoffset)
79: (4) return dt
80: (0) class Message:
81: (4) """Representation of a single message in a catalog."""
82: (4) def __init__(
83: (8) self,
84: (8) id: _MessageID,
85: (8) string: _MessageID | None = '',
86: (8) locations: Iterable[tuple[str, int]] = (),
87: (8) flags: Iterable[str] = (),
88: (8) auto_comments: Iterable[str] = (),
89: (8) user_comments: Iterable[str] = (),
90: (8) previous_id: _MessageID = (),
91: (8) lineno: int | None = None,
92: (8) context: str | None = None,
93: (4) ) -> None:
94: (8) """Create the message object.
95: (8) :param id: the message ID, or a ``(singular, plural)`` tuple for
96: (19) pluralizable messages
97: (8) :param string: the translated message string, or a
98: (23) ``(singular, plural)`` tuple for pluralizable messages
99: (8) :param locations: a sequence of ``(filename, lineno)`` tuples
100: (8) :param flags: a set or sequence of flags
101: (8) :param auto_comments: a sequence of automatic comments for the message
102: (8) :param user_comments: a sequence of user comments for the message
103: (8) :param previous_id: the previous message ID, or a ``(singular, plural)``
104: (28) tuple for pluralizable messages
105: (8) :param lineno: the line number on which the msgid line was found in the
106: (23) PO file, if any
107: (8) :param context: the message context
108: (8) """
109: (8) self.id = id
110: (8) if not string and self.pluralizable:
111: (12) string = ('', '')
112: (8) self.string = string
113: (8) self.locations = list(distinct(locations))
114: (8) self.flags = set(flags)
115: (8) if id and self.python_format:
116: (12) self.flags.add('python-format')
117: (8) else:
118: (12) self.flags.discard('python-format')
119: (8) self.auto_comments = list(distinct(auto_comments))
120: (8) self.user_comments = list(distinct(user_comments))
121: (8) if isinstance(previous_id, str):
122: (12) self.previous_id = [previous_id]
123: (8) else:
124: (12) self.previous_id = list(previous_id)
125: (8) self.lineno = lineno
126: (8) self.context = context
127: (4) def __repr__(self) -> str:
128: (8) return f"<{type(self).__name__} {self.id!r} (flags: {list(self.flags)!r})>"
129: (4) def __cmp__(self, other: object) -> int:
130: (8) """Compare Messages, taking into account plural ids"""
131: (8) def values_to_compare(obj):
132: (12) if isinstance(obj, Message) and obj.pluralizable:
133: (16) return obj.id[0], obj.context or ''
134: (12) return obj.id, obj.context or ''
135: (8) return _cmp(values_to_compare(self), values_to_compare(other))
136: (4) def __gt__(self, other: object) -> bool:
137: (8) return self.__cmp__(other) > 0
138: (4) def __lt__(self, other: object) -> bool:
139: (8) return self.__cmp__(other) < 0
140: (4) def __ge__(self, other: object) -> bool:
141: (8) return self.__cmp__(other) >= 0
142: (4) def __le__(self, other: object) -> bool:
143: (8) return self.__cmp__(other) <= 0
144: (4) def __eq__(self, other: object) -> bool:
145: (8) return self.__cmp__(other) == 0
146: (4) def __ne__(self, other: object) -> bool:
147: (8) return self.__cmp__(other) != 0
148: (4) def is_identical(self, other: Message) -> bool:
149: (8) """Checks whether messages are identical, taking into account all
150: (8) properties.
151: (8) """
152: (8) assert isinstance(other, Message)
153: (8) return self.__dict__ == other.__dict__
154: (4) def clone(self) -> Message:
155: (8) return Message(*map(copy, (self.id, self.string, self.locations,
156: (35) self.flags, self.auto_comments,
157: (35) self.user_comments, self.previous_id,
158: (35) self.lineno, self.context)))
159: (4) def check(self, catalog: Catalog | None = None) -> list[TranslationError]:
160: (8) """Run various validation checks on the message. Some validations
161: (8) are only performed if the catalog is provided. This method returns
162: (8) a sequence of `TranslationError` objects.
163: (8) :rtype: ``iterator``
164: (8) :param catalog: A catalog instance that is passed to the checkers
165: (8) :see: `Catalog.check` for a way to perform checks for all messages
166: (14) in a catalog.
167: (8) """
168: (8) from babel.messages.checkers import checkers
169: (8) errors: list[TranslationError] = []
170: (8) for checker in checkers:
171: (12) try:
172: (16) checker(catalog, self)
173: (12) except TranslationError as e:
174: (16) errors.append(e)
175: (8) return errors
176: (4) @property
177: (4) def fuzzy(self) -> bool:
178: (8) """Whether the translation is fuzzy.
179: (8) >>> Message('foo').fuzzy
180: (8) False
181: (8) >>> msg = Message('foo', 'foo', flags=['fuzzy'])
182: (8) >>> msg.fuzzy
183: (8) True
184: (8) >>> msg
185: (8) <Message 'foo' (flags: ['fuzzy'])>
186: (8) :type: `bool`"""
187: (8) return 'fuzzy' in self.flags
188: (4) @property
189: (4) def pluralizable(self) -> bool:
190: (8) """Whether the message is plurizable.
191: (8) >>> Message('foo').pluralizable
192: (8) False
193: (8) >>> Message(('foo', 'bar')).pluralizable
194: (8) True
195: (8) :type: `bool`"""
196: (8) return isinstance(self.id, (list, tuple))
197: (4) @property
198: (4) def python_format(self) -> bool:
199: (8) """Whether the message contains Python-style parameters.
200: (8) >>> Message('foo %(name)s bar').python_format
201: (8) True
202: (8) >>> Message(('foo %(name)s', 'foo %(name)s')).python_format
203: (8) True
204: (8) :type: `bool`"""
205: (8) ids = self.id
206: (8) if not isinstance(ids, (list, tuple)):
207: (12) ids = [ids]
208: (8) return any(PYTHON_FORMAT.search(id) for id in ids)
209: (0) class TranslationError(Exception):
210: (4) """Exception thrown by translation checkers when invalid message
211: (4) translations are encountered."""
212: (0) DEFAULT_HEADER = """\
213: (0) # Translations template for PROJECT.
214: (0) # Copyright (C) YEAR ORGANIZATION
215: (0) # This file is distributed under the same license as the PROJECT project.
216: (0) # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
217: (0) #"""
218: (0) def parse_separated_header(value: str) -> dict[str, str]:
219: (4) # Adapted from https://peps.python.org/pep-0594/#cgi
220: (4) from email.message import Message
221: (4) m = Message()
222: (4) m['content-type'] = value
223: (4) return dict(m.get_params())
224: (0) class Catalog:
225: (4) """Representation of a message catalog."""
226: (4) def __init__(
227: (8) self,
228: (8) locale: str | Locale | None = None,
229: (8) domain: str | None = None,
230: (8) header_comment: str | None = DEFAULT_HEADER,
231: (8) project: str | None = None,
232: (8) version: str | None = None,
233: (8) copyright_holder: str | None = None,
234: (8) msgid_bugs_address: str | None = None,
235: (8) creation_date: datetime.datetime | str | None = None,
236: (8) revision_date: datetime.datetime | datetime.time | float | str | None = None,
237: (8) last_translator: str | None = None,
238: (8) language_team: str | None = None,
239: (8) charset: str | None = None,
240: (8) fuzzy: bool = True,
241: (4) ) -> None:
242: (8) """Initialize the catalog object.
243: (8) :param locale: the locale identifier or `Locale` object, or `None`
244: (23) if the catalog is not bound to a locale (which basically
245: (23) means it's a template)
246: (8) :param domain: the message domain
247: (8) :param header_comment: the header comment as string, or `None` for the
248: (31) default header
249: (8) :param project: the project's name
250: (8) :param version: the project's version
251: (8) :param copyright_holder: the copyright holder of the catalog
252: (8) :param msgid_bugs_address: the email address or URL to submit bug
253: (35) reports to
254: (8) :param creation_date: the date the catalog was created
255: (8) :param revision_date: the date the catalog was revised
256: (8) :param last_translator: the name and email of the last translator
257: (8) :param language_team: the name and email of the language team
258: (8) :param charset: the encoding to use in the output (defaults to utf-8)
259: (8) :param fuzzy: the fuzzy bit on the catalog header
260: (8) """
261: (8) self.domain = domain
262: (8) self.locale = locale
263: (8) self._header_comment = header_comment
264: (8) self._messages: OrderedDict[str | tuple[str, str], Message] = OrderedDict()
265: (8) self.project = project or 'PROJECT'
266: (8) self.version = version or 'VERSION'
267: (8) self.copyright_holder = copyright_holder or 'ORGANIZATION'
268: (8) self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS'
269: (8) self.last_translator = last_translator or 'FULL NAME <EMAIL@ADDRESS>'
270: (8) """Name and email address of the last translator."""
271: (8) self.language_team = language_team or 'LANGUAGE <LL@li.org>'
272: (8) """Name and email address of the language team."""
273: (8) self.charset = charset or 'utf-8'
274: (8) if creation_date is None:
275: (12) creation_date = datetime.datetime.now(LOCALTZ)
276: (8) elif isinstance(creation_date, datetime.datetime) and not creation_date.tzinfo:
277: (12) creation_date = creation_date.replace(tzinfo=LOCALTZ)
278: (8) self.creation_date = creation_date
279: (8) if revision_date is None:
280: (12) revision_date = 'YEAR-MO-DA HO:MI+ZONE'
281: (8) elif isinstance(revision_date, datetime.datetime) and not revision_date.tzinfo:
282: (12) revision_date = revision_date.replace(tzinfo=LOCALTZ)
283: (8) self.revision_date = revision_date
284: (8) self.fuzzy = fuzzy
285: (8) # Dictionary of obsolete messages
286: (8) self.obsolete: OrderedDict[str | tuple[str, str], Message] = OrderedDict()
287: (8) self._num_plurals = None
288: (8) self._plural_expr = None
289: (4) def _set_locale(self, locale: Locale | str | None) -> None:
290: (8) if locale is None:
291: (12) self._locale_identifier = None
292: (12) self._locale = None
293: (12) return
294: (8) if isinstance(locale, Locale):
295: (12) self._locale_identifier = str(locale)
296: (12) self._locale = locale
297: (12) return
298: (8) if isinstance(locale, str):
299: (12) self._locale_identifier = str(locale)
300: (12) try:
301: (16) self._locale = Locale.parse(locale)
302: (12) except UnknownLocaleError:
303: (16) self._locale = None
304: (12) return
305: (8) raise TypeError(f"`locale` must be a Locale, a locale identifier string, or None; got {locale!r}")
306: (4) def _get_locale(self) -> Locale | None:
307: (8) return self._locale
308: (4) def _get_locale_identifier(self) -> str | None:
309: (8) return self._locale_identifier
310: (4) locale = property(_get_locale, _set_locale)
311: (4) locale_identifier = property(_get_locale_identifier)
312: (4) def _get_header_comment(self) -> str:
313: (8) comment = self._header_comment
314: (8) year = datetime.datetime.now(LOCALTZ).strftime('%Y')
315: (8) if hasattr(self.revision_date, 'strftime'):
316: (12) year = self.revision_date.strftime('%Y')
317: (8) comment = comment.replace('PROJECT', self.project) \
318: (25) .replace('VERSION', self.version) \
319: (25) .replace('YEAR', year) \
320: (25) .replace('ORGANIZATION', self.copyright_holder)
321: (8) locale_name = (self.locale.english_name if self.locale else self.locale_identifier)
322: (8) if locale_name:
323: (12) comment = comment.replace("Translations template", f"{locale_name} translations")
324: (8) return comment
325: (4) def _set_header_comment(self, string: str | None) -> None:
326: (8) self._header_comment = string
327: (4) header_comment = property(_get_header_comment, _set_header_comment, doc="""\
328: (4) The header comment for the catalog.
329: (4) >>> catalog = Catalog(project='Foobar', version='1.0',
330: (4) ... copyright_holder='Foo Company')
331: (4) >>> print(catalog.header_comment) #doctest: +ELLIPSIS
332: (4) # Translations template for Foobar.
333: (4) # Copyright (C) ... Foo Company
334: (4) # This file is distributed under the same license as the Foobar project.
335: (4) # FIRST AUTHOR <EMAIL@ADDRESS>, ....
336: (4) #
337: (4) The header can also be set from a string. Any known upper-case variables
338: (4) will be replaced when the header is retrieved again:
339: (4) >>> catalog = Catalog(project='Foobar', version='1.0',
340: (4) ... copyright_holder='Foo Company')
341: (4) >>> catalog.header_comment = '''\\
342: (4) ... # The POT for my really cool PROJECT project.
343: (4) ... # Copyright (C) 1990-2003 ORGANIZATION
344: (4) ... # This file is distributed under the same license as the PROJECT
345: (4) ... # project.
346: (4) ... #'''
347: (4) >>> print(catalog.header_comment)
348: (4) # The POT for my really cool Foobar project.
349: (4) # Copyright (C) 1990-2003 Foo Company
350: (4) # This file is distributed under the same license as the Foobar
351: (4) # project.
352: (4) #
353: (4) :type: `unicode`
354: (4) """)
355: (4) def _get_mime_headers(self) -> list[tuple[str, str]]:
356: (8) headers: list[tuple[str, str]] = []
357: (8) headers.append(("Project-Id-Version", f"{self.project} {self.version}"))
358: (8) headers.append(('Report-Msgid-Bugs-To', self.msgid_bugs_address))
359: (8) headers.append(('POT-Creation-Date',
360: (24) format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ',
361: (40) locale='en')))
362: (8) if isinstance(self.revision_date, (datetime.datetime, datetime.time, int, float)):
363: (12) headers.append(('PO-Revision-Date',
364: (28) format_datetime(self.revision_date,
365: (44) 'yyyy-MM-dd HH:mmZ', locale='en')))
366: (8) else:
367: (12) headers.append(('PO-Revision-Date', self.revision_date))
368: (8) headers.append(('Last-Translator', self.last_translator))
369: (8) if self.locale_identifier:
370: (12) headers.append(('Language', str(self.locale_identifier)))
371: (8) if self.locale_identifier and ('LANGUAGE' in self.language_team):
372: (12) headers.append(('Language-Team',
373: (28) self.language_team.replace('LANGUAGE',
374: (55) str(self.locale_identifier))))
375: (8) else:
376: (12) headers.append(('Language-Team', self.language_team))
377: (8) if self.locale is not None:
378: (12) headers.append(('Plural-Forms', self.plural_forms))
379: (8) headers.append(('MIME-Version', '1.0'))
380: (8) headers.append(("Content-Type", f"text/plain; charset={self.charset}"))
381: (8) headers.append(('Content-Transfer-Encoding', '8bit'))
382: (8) headers.append(("Generated-By", f"Babel {VERSION}\n"))
383: (8) return headers
384: (4) def _force_text(self, s: str | bytes, encoding: str = 'utf-8', errors: str = 'strict') -> str:
385: (8) if isinstance(s, str):
386: (12) return s
387: (8) if isinstance(s, bytes):
388: (12) return s.decode(encoding, errors)
389: (8) return str(s)
390: (4) def _set_mime_headers(self, headers: Iterable[tuple[str, str]]) -> None:
391: (8) for name, value in headers:
392: (12) name = self._force_text(name.lower(), encoding=self.charset)
393: (12) value = self._force_text(value, encoding=self.charset)
394: (12) if name == 'project-id-version':
395: (16) parts = value.split(' ')
396: (16) self.project = ' '.join(parts[:-1])
397: (16) self.version = parts[-1]
398: (12) elif name == 'report-msgid-bugs-to':
399: (16) self.msgid_bugs_address = value
400: (12) elif name == 'last-translator':
401: (16) self.last_translator = value
402: (12) elif name == 'language':
403: (16) value = value.replace('-', '_')
404: (16) # The `or None` makes sure that the locale is set to None
405: (16) # if the header's value is an empty string, which is what
406: (16) # some tools generate (instead of eliding the empty Language
407: (16) # header altogether).
408: (16) self._set_locale(value or None)
409: (12) elif name == 'language-team':
410: (16) self.language_team = value
411: (12) elif name == 'content-type':
412: (16) params = parse_separated_header(value)
413: (16) if 'charset' in params:
414: (20) self.charset = params['charset'].lower()
415: (12) elif name == 'plural-forms':
416: (16) params = parse_separated_header(f" ;{value}")
417: (16) self._num_plurals = int(params.get('nplurals', 2))
418: (16) self._plural_expr = params.get('plural', '(n != 1)')
419: (12) elif name == 'pot-creation-date':
420: (16) self.creation_date = _parse_datetime_header(value)
421: (12) elif name == 'po-revision-date':
422: (16) # Keep the value if it's not the default one
423: (16) if 'YEAR' not in value:
424: (20) self.revision_date = _parse_datetime_header(value)
425: (4) mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\
426: (4) The MIME headers of the catalog, used for the special ``msgid ""`` entry.
427: (4) The behavior of this property changes slightly depending on whether a locale
428: (4) is set or not, the latter indicating that the catalog is actually a template
429: (4) for actual translations.
430: (4) Here's an example of the output for such a catalog template:
431: (4) >>> from babel.dates import UTC
432: (4) >>> from datetime import datetime
433: (4) >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC)
434: (4) >>> catalog = Catalog(project='Foobar', version='1.0',
435: (4) ... creation_date=created)
436: (4) >>> for name, value in catalog.mime_headers:
437: (4) ... print('%s: %s' % (name, value))
438: (4) Project-Id-Version: Foobar 1.0
439: (4) Report-Msgid-Bugs-To: EMAIL@ADDRESS
440: (4) POT-Creation-Date: 1990-04-01 15:30+0000
441: (4) PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE
442: (4) Last-Translator: FULL NAME <EMAIL@ADDRESS>
443: (4) Language-Team: LANGUAGE <LL@li.org>
444: (4) MIME-Version: 1.0
445: (4) Content-Type: text/plain; charset=utf-8
446: (4) Content-Transfer-Encoding: 8bit
447: (4) Generated-By: Babel ...
448: (4) And here's an example of the output when the locale is set:
449: (4) >>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC)
450: (4) >>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0',
451: (4) ... creation_date=created, revision_date=revised,
452: (4) ... last_translator='John Doe <jd@example.com>',
453: (4) ... language_team='de_DE <de@example.com>')
454: (4) >>> for name, value in catalog.mime_headers:
455: (4) ... print('%s: %s' % (name, value))
456: (4) Project-Id-Version: Foobar 1.0
457: (4) Report-Msgid-Bugs-To: EMAIL@ADDRESS
458: (4) POT-Creation-Date: 1990-04-01 15:30+0000
459: (4) PO-Revision-Date: 1990-08-03 12:00+0000
460: (4) Last-Translator: John Doe <jd@example.com>
461: (4) Language: de_DE
462: (4) Language-Team: de_DE <de@example.com>
463: (4) Plural-Forms: nplurals=2; plural=(n != 1);
464: (4) MIME-Version: 1.0
465: (4) Content-Type: text/plain; charset=utf-8
466: (4) Content-Transfer-Encoding: 8bit
467: (4) Generated-By: Babel ...
468: (4) :type: `list`
469: (4) """)
470: (4) @property
471: (4) def num_plurals(self) -> int:
472: (8) """The number of plurals used by the catalog or locale.
473: (8) >>> Catalog(locale='en').num_plurals
474: (8) 2
475: (8) >>> Catalog(locale='ga').num_plurals
476: (8) 5
477: (8) :type: `int`"""
478: (8) if self._num_plurals is None:
479: (12) num = 2
480: (12) if self.locale:
481: (16) num = get_plural(self.locale)[0]
482: (12) self._num_plurals = num
483: (8) return self._num_plurals
484: (4) @property
485: (4) def plural_expr(self) -> str:
486: (8) """The plural expression used by the catalog or locale.
487: (8) >>> Catalog(locale='en').plural_expr
488: (8) '(n != 1)'
489: (8) >>> Catalog(locale='ga').plural_expr
490: (8) '(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && n<=10 ? 3 : 4)'
491: (8) >>> Catalog(locale='ding').plural_expr # unknown locale
492: (8) '(n != 1)'
493: (8) :type: `str`"""
494: (8) if self._plural_expr is None:
495: (12) expr = '(n != 1)'
496: (12) if self.locale:
497: (16) expr = get_plural(self.locale)[1]
498: (12) self._plural_expr = expr
499: (8) return self._plural_expr
500: (4) @property
501: (4) def plural_forms(self) -> str:
502: (8) """Return the plural forms declaration for the locale.
503: (8) >>> Catalog(locale='en').plural_forms
504: (8) 'nplurals=2; plural=(n != 1);'
505: (8) >>> Catalog(locale='pt_BR').plural_forms
506: (8) 'nplurals=2; plural=(n > 1);'
507: (8) :type: `str`"""
508: (8) return f"nplurals={self.num_plurals}; plural={self.plural_expr};"
509: (4) def __contains__(self, id: _MessageID) -> bool:
510: (8) """Return whether the catalog has a message with the specified ID."""
511: (8) return self._key_for(id) in self._messages
512: (4) def __len__(self) -> int:
513: (8) """The number of messages in the catalog.
514: (8) This does not include the special ``msgid ""`` entry."""
515: (8) return len(self._messages)
516: (4) def __iter__(self) -> Iterator[Message]:
517: (8) """Iterates through all the entries in the catalog, in the order they
518: (8) were added, yielding a `Message` object for every entry.
519: (8) :rtype: ``iterator``"""
520: (8) buf = []
521: (8) for name, value in self.mime_headers:
522: (12) buf.append(f"{name}: {value}")
523: (8) flags = set()
524: (8) if self.fuzzy:
525: (12) flags |= {'fuzzy'}
526: (8) yield Message('', '\n'.join(buf), flags=flags)
527: (8) for key in self._messages:
528: (12) yield self._messages[key]
529: (4) def __repr__(self) -> str:
530: (8) locale = ''
531: (8) if self.locale:
532: (12) locale = f" {self.locale}"
533: (8) return f"<{type(self).__name__} {self.domain!r}{locale}>"
534: (4) def __delitem__(self, id: _MessageID) -> None:
535: (8) """Delete the message with the specified ID."""
536: (8) self.delete(id)
537: (4) def __getitem__(self, id: _MessageID) -> Message:
538: (8) """Return the message with the specified ID.
539: (8) :param id: the message ID
540: (8) """
541: (8) return self.get(id)
542: (4) def __setitem__(self, id: _MessageID, message: Message) -> None:
543: (8) """Add or update the message with the specified ID.
544: (8) >>> catalog = Catalog()
545: (8) >>> catalog[u'foo'] = Message(u'foo')
546: (8) >>> catalog[u'foo']
547: (8) <Message u'foo' (flags: [])>
548: (8) If a message with that ID is already in the catalog, it is updated
549: (8) to include the locations and flags of the new message.
550: (8) >>> catalog = Catalog()
551: (8) >>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)])
552: (8) >>> catalog[u'foo'].locations
553: (8) [('main.py', 1)]
554: (8) >>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)])
555: (8) >>> catalog[u'foo'].locations
556: (8) [('main.py', 1), ('utils.py', 5)]
557: (8) :param id: the message ID
558: (8) :param message: the `Message` object
559: (8) """
560: (8) assert isinstance(message, Message), 'expected a Message object'
561: (8) key = self._key_for(id, message.context)
562: (8) current = self._messages.get(key)
563: (8) if current:
564: (12) if message.pluralizable and not current.pluralizable:
565: (16) # The new message adds pluralization
566: (16) current.id = message.id
567: (16) current.string = message.string
568: (12) current.locations = list(distinct(current.locations +
569: (46) message.locations))
570: (12) current.auto_comments = list(distinct(current.auto_comments +
571: (50) message.auto_comments))
572: (12) current.user_comments = list(distinct(current.user_comments +
573: (50) message.user_comments))
574: (12) current.flags |= message.flags
575: (12) message = current
576: (8) elif id == '':
577: (12) # special treatment for the header message
578: (12) self.mime_headers = message_from_string(message.string).items()
579: (12) self.header_comment = "\n".join([f"# {c}".rstrip() for c in message.user_comments])
580: (12) self.fuzzy = message.fuzzy
581: (8) else:
582: (12) if isinstance(id, (list, tuple)):
583: (16) assert isinstance(message.string, (list, tuple)), \
584: (20) f"Expected sequence but got {type(message.string)}"
585: (12) self._messages[key] = message
586: (4) def add(
587: (8) self,
588: (8) id: _MessageID,
589: (8) string: _MessageID | None = None,
590: (8) locations: Iterable[tuple[str, int]] = (),
591: (8) flags: Iterable[str] = (),
592: (8) auto_comments: Iterable[str] = (),
593: (8) user_comments: Iterable[str] = (),
594: (8) previous_id: _MessageID = (),
595: (8) lineno: int | None = None,
596: (8) context: str | None = None,
597: (4) ) -> Message:
598: (8) """Add or update the message with the specified ID.
599: (8) >>> catalog = Catalog()
600: (8) >>> catalog.add(u'foo')
601: (8) <Message ...>
602: (8) >>> catalog[u'foo']
603: (8) <Message u'foo' (flags: [])>
604: (8) This method simply constructs a `Message` object with the given
605: (8) arguments and invokes `__setitem__` with that object.
606: (8) :param id: the message ID, or a ``(singular, plural)`` tuple for
607: (19) pluralizable messages
608: (8) :param string: the translated message string, or a
609: (23) ``(singular, plural)`` tuple for pluralizable messages
610: (8) :param locations: a sequence of ``(filename, lineno)`` tuples
611: (8) :param flags: a set or sequence of flags
612: (8) :param auto_comments: a sequence of automatic comments
613: (8) :param user_comments: a sequence of user comments
614: (8) :param previous_id: the previous message ID, or a ``(singular, plural)``
615: (28) tuple for pluralizable messages
616: (8) :param lineno: the line number on which the msgid line was found in the
617: (23) PO file, if any
618: (8) :param context: the message context
619: (8) """
620: (8) message = Message(id, string, list(locations), flags, auto_comments,
621: (26) user_comments, previous_id, lineno=lineno,
622: (26) context=context)
623: (8) self[id] = message
624: (8) return message
625: (4) def check(self) -> Iterable[tuple[Message, list[TranslationError]]]:
626: (8) """Run various validation checks on the translations in the catalog.
627: (8) For every message which fails validation, this method yield a
628: (8) ``(message, errors)`` tuple, where ``message`` is the `Message` object
629: (8) and ``errors`` is a sequence of `TranslationError` objects.
630: (8) :rtype: ``generator`` of ``(message, errors)``
631: (8) """
632: (8) for message in self._messages.values():
633: (12) errors = message.check(catalog=self)
634: (12) if errors:
635: (16) yield message, errors
636: (4) def get(self, id: _MessageID, context: str | None = None) -> Message | None:
637: (8) """Return the message with the specified ID and context.
638: (8) :param id: the message ID
639: (8) :param context: the message context, or ``None`` for no context
640: (8) """
641: (8) return self._messages.get(self._key_for(id, context))
642: (4) def delete(self, id: _MessageID, context: str | None = None) -> None:
643: (8) """Delete the message with the specified ID and context.
644: (8) :param id: the message ID
645: (8) :param context: the message context, or ``None`` for no context
646: (8) """
647: (8) key = self._key_for(id, context)
648: (8) if key in self._messages:
649: (12) del self._messages[key]
650: (4) def update(
651: (8) self,
652: (8) template: Catalog,
653: (8) no_fuzzy_matching: bool = False,
654: (8) update_header_comment: bool = False,
655: (8) keep_user_comments: bool = True,
656: (8) update_creation_date: bool = True,
657: (4) ) -> None:
658: (8) """Update the catalog based on the given template catalog.
659: (8) >>> from babel.messages import Catalog
660: (8) >>> template = Catalog()
661: (8) >>> template.add('green', locations=[('main.py', 99)])
662: (8) <Message ...>
663: (8) >>> template.add('blue', locations=[('main.py', 100)])
664: (8) <Message ...>
665: (8) >>> template.add(('salad', 'salads'), locations=[('util.py', 42)])
666: (8) <Message ...>
667: (8) >>> catalog = Catalog(locale='de_DE')
668: (8) >>> catalog.add('blue', u'blau', locations=[('main.py', 98)])
669: (8) <Message ...>
670: (8) >>> catalog.add('head', u'Kopf', locations=[('util.py', 33)])
671: (8) <Message ...>
672: (8) >>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'),
673: (8) ... locations=[('util.py', 38)])
674: (8) <Message ...>
675: (8) >>> catalog.update(template)
676: (8) >>> len(catalog)
677: (8) 3
678: (8) >>> msg1 = catalog['green']
679: (8) >>> msg1.string
680: (8) >>> msg1.locations
681: (8) [('main.py', 99)]
682: (8) >>> msg2 = catalog['blue']
683: (8) >>> msg2.string
684: (8) u'blau'
685: (8) >>> msg2.locations
686: (8) [('main.py', 100)]
687: (8) >>> msg3 = catalog['salad']
688: (8) >>> msg3.string
689: (8) (u'Salat', u'Salate')
690: (8) >>> msg3.locations
691: (8) [('util.py', 42)]
692: (8) Messages that are in the catalog but not in the template are removed
693: (8) from the main collection, but can still be accessed via the `obsolete`
694: (8) member:
695: (8) >>> 'head' in catalog
696: (8) False
697: (8) >>> list(catalog.obsolete.values())
698: (8) [<Message 'head' (flags: [])>]
699: (8) :param template: the reference catalog, usually read from a POT file
700: (8) :param no_fuzzy_matching: whether to use fuzzy matching of message IDs
701: (8) """
702: (8) messages = self._messages
703: (8) remaining = messages.copy()
704: (8) self._messages = OrderedDict()
705: (8) # Prepare for fuzzy matching
706: (8) fuzzy_candidates = {}
707: (8) if not no_fuzzy_matching:
708: (12) for msgid in messages:
709: (16) if msgid and messages[msgid].string:
710: (20) key = self._key_for(msgid)
711: (20) ctxt = messages[msgid].context
712: (20) fuzzy_candidates[self._to_fuzzy_match_key(key)] = (key, ctxt)
713: (8) fuzzy_matches = set()
714: (8) def _merge(message: Message, oldkey: tuple[str, str] | str, newkey: tuple[str, str] | str) -> None:
715: (12) message = message.clone()
716: (12) fuzzy = False
717: (12) if oldkey != newkey:
718: (16) fuzzy = True
719: (16) fuzzy_matches.add(oldkey)
720: (16) oldmsg = messages.get(oldkey)
721: (16) assert oldmsg is not None
722: (16) if isinstance(oldmsg.id, str):
723: (20) message.previous_id = [oldmsg.id]
724: (16) else:
725: (20) message.previous_id = list(oldmsg.id)
726: (12) else:
727: (16) oldmsg = remaining.pop(oldkey, None)
728: (16) assert oldmsg is not None
729: (12) message.string = oldmsg.string
730: (12) if keep_user_comments:
731: (16) message.user_comments = list(distinct(oldmsg.user_comments))
732: (12) if isinstance(message.id, (list, tuple)):
733: (16) if not isinstance(message.string, (list, tuple)):
734: (20) fuzzy = True
735: (20) message.string = tuple(
736: (24) [message.string] + ([''] * (len(message.id) - 1)),
737: (20) )
738: (16) elif len(message.string) != self.num_plurals:
739: (20) fuzzy = True
740: (20) message.string = tuple(message.string[:len(oldmsg.string)])
741: (12) elif isinstance(message.string, (list, tuple)):
742: (16) fuzzy = True
743: (16) message.string = message.string[0]
744: (12) message.flags |= oldmsg.flags
745: (12) if fuzzy:
746: (16) message.flags |= {'fuzzy'}
747: (12) self[message.id] = message
748: (8) for message in template:
749: (12) if message.id:
750: (16) key = self._key_for(message.id, message.context)
751: (16) if key in messages:
752: (20) _merge(message, key, key)
753: (16) else:
754: (20) if not no_fuzzy_matching:
755: (24) # do some fuzzy matching with difflib
756: (24) matches = get_close_matches(
757: (28) self._to_fuzzy_match_key(key),
758: (28) fuzzy_candidates.keys(),
759: (28) 1,
760: (24) )
761: (24) if matches:
762: (28) modified_key = matches[0]
763: (28) newkey, newctxt = fuzzy_candidates[modified_key]
764: (28) if newctxt is not None:
765: (32) newkey = newkey, newctxt
766: (28) _merge(message, newkey, key)
767: (28) continue
768: (20) self[message.id] = message
769: (8) for msgid in remaining:
770: (12) if no_fuzzy_matching or msgid not in fuzzy_matches:
771: (16) self.obsolete[msgid] = remaining[msgid]
772: (8) if update_header_comment:
773: (12) # Allow the updated catalog's header to be rewritten based on the
774: (12) # template's header
775: (12) self.header_comment = template.header_comment
776: (8) # Make updated catalog's POT-Creation-Date equal to the template
777: (8) # used to update the catalog
778: (8) if update_creation_date:
779: (12) self.creation_date = template.creation_date
780: (4) def _to_fuzzy_match_key(self, key: tuple[str, str] | str) -> str:
781: (8) """Converts a message key to a string suitable for fuzzy matching."""
782: (8) if isinstance(key, tuple):
783: (12) matchkey = key[0] # just the msgid, no context
784: (8) else:
785: (12) matchkey = key
786: (8) return matchkey.lower().strip()
787: (4) def _key_for(self, id: _MessageID, context: str | None = None) -> tuple[str, str] | str:
788: (8) """The key for a message is just the singular ID even for pluralizable
789: (8) messages, but is a ``(msgid, msgctxt)`` tuple for context-specific
790: (8) messages.
791: (8) """
792: (8) key = id
793: (8) if isinstance(key, (list, tuple)):
794: (12) key = id[0]
795: (8) if context is not None:
796: (12) key = (key, context)
797: (8) return key
798: (4) def is_identical(self, other: Catalog) -> bool:
799: (8) """Checks if catalogs are identical, taking into account messages and
800: (8) headers.
801: (8) """
802: (8) assert isinstance(other, Catalog)
803: (8) for key in self._messages.keys() | other._messages.keys():
804: (12) message_1 = self.get(key)
805: (12) message_2 = other.get(key)
806: (12) if (
807: (16) message_1 is None
808: (16) or message_2 is None
809: (16) or not message_1.is_identical(message_2)
810: (12) ):
811: (16) return False
812: (8) return dict(self.mime_headers) == dict(other.mime_headers)
----------------------------------------
File 20 - .\messages \extract.py:
1: (0) """
2: (4) babel.messages.extract
3: (4) ~~~~~~~~~~~~~~~~~~~~~~
4: (4) Basic infrastructure for extracting localizable messages from source files.
5: (4) This module defines an extensible system for collecting localizable message
6: (4) strings from a variety of sources. A native extractor for Python source
7: (4) files is builtin, extractors for other sources can be added using very
8: (4) simple plugins.
9: (4) The main entry points into the extraction functionality are the functions
10: (4) `extract_from_dir` and `extract_from_file`.
11: (4) :copyright: (c) 2013-2024 by the Babel Team.
12: (4) :license: BSD, see LICENSE for more details.
13: (0) """
14: (0) from __future__ import annotations
15: (0) import ast
16: (0) import io
17: (0) import os
18: (0) import sys
19: (0) import tokenize
20: (0) from collections.abc import (
21: (4) Callable,
22: (4) Collection,
23: (4) Generator,
24: (4) Iterable,
25: (4) Mapping,
26: (4) MutableSequence,
27: (0) )
28: (0) from functools import lru_cache
29: (0) from os.path import relpath
30: (0) from textwrap import dedent
31: (0) from tokenize import COMMENT, NAME, OP, STRING, generate_tokens
32: (0) from typing import TYPE_CHECKING, Any
33: (0) from babel.messages._compat import find_entrypoints
34: (0) from babel.util import parse_encoding, parse_future_flags, pathmatch
35: (0) if TYPE_CHECKING:
36: (4) from typing import IO, Final, Protocol
37: (4) from _typeshed import SupportsItems, SupportsRead, SupportsReadline
38: (4) from typing_extensions import TypeAlias, TypedDict
39: (4) class _PyOptions(TypedDict, total=False):
40: (8) encoding: str
41: (4) class _JSOptions(TypedDict, total=False):
42: (8) encoding: str
43: (8) jsx: bool
44: (8) template_string: bool
45: (8) parse_template_string: bool
46: (4) class _FileObj(SupportsRead[bytes], SupportsReadline[bytes], Protocol):
47: (8) def seek(self, __offset: int, __whence: int = ...) -> int: ...
48: (8) def tell(self) -> int: ...
49: (4) _SimpleKeyword: TypeAlias = tuple[int | tuple[int, int] | tuple[int, str], ...] | None
50: (4) _Keyword: TypeAlias = dict[int | None, _SimpleKeyword] | _SimpleKeyword
51: (4) # 5-tuple of (filename, lineno, messages, comments, context)
52: (4) _FileExtractionResult: TypeAlias = tuple[str, int, str | tuple[str, ...], list[str], str | None]
53: (4) # 4-tuple of (lineno, message, comments, context)
54: (4) _ExtractionResult: TypeAlias = tuple[int, str | tuple[str, ...], list[str], str | None]
55: (4) # Required arguments: fileobj, keywords, comment_tags, options
56: (4) # Return value: Iterable of (lineno, message, comments, context)
57: (4) _CallableExtractionMethod: TypeAlias = Callable[
58: (8) [_FileObj | IO[bytes], Mapping[str, _Keyword], Collection[str], Mapping[str, Any]],
59: (8) Iterable[_ExtractionResult],
60: (4) ]
61: (4) _ExtractionMethod: TypeAlias = _CallableExtractionMethod | str
62: (0) GROUP_NAME: Final[str] = 'babel.extractors'
63: (0) DEFAULT_KEYWORDS: dict[str, _Keyword] = {
64: (4) '_': None,
65: (4) 'gettext': None,
66: (4) 'ngettext': (1, 2),
67: (4) 'ugettext': None,
68: (4) 'ungettext': (1, 2),
69: (4) 'dgettext': (2,),
70: (4) 'dngettext': (2, 3),
71: (4) 'N_': None,
72: (4) 'pgettext': ((1, 'c'), 2),
73: (4) 'npgettext': ((1, 'c'), 2, 3),
74: (0) }
75: (0) DEFAULT_MAPPING: list[tuple[str, str]] = [('**.py', 'python')]
76: (0) # New tokens in Python 3.12, or None on older versions
77: (0) FSTRING_START = getattr(tokenize, "FSTRING_START", None)
78: (0) FSTRING_MIDDLE = getattr(tokenize, "FSTRING_MIDDLE", None)
79: (0) FSTRING_END = getattr(tokenize, "FSTRING_END", None)
80: (0) def _strip_comment_tags(comments: MutableSequence[str], tags: Iterable[str]):
81: (4) """Helper function for `extract` that strips comment tags from strings
82: (4) in a list of comment lines. This functions operates in-place.
83: (4) """
84: (4) def _strip(line: str):
85: (8) for tag in tags:
86: (12) if line.startswith(tag):
87: (16) return line[len(tag):].strip()
88: (8) return line
89: (4) comments[:] = map(_strip, comments)
90: (0) def default_directory_filter(dirpath: str | os.PathLike[str]) -> bool:
91: (4) subdir = os.path.basename(dirpath)
92: (4) # Legacy default behavior: ignore dot and underscore directories
93: (4) return not (subdir.startswith('.') or subdir.startswith('_'))
94: (0) def extract_from_dir(
95: (4) dirname: str | os.PathLike[str] | None = None,
96: (4) method_map: Iterable[tuple[str, str]] = DEFAULT_MAPPING,
97: (4) options_map: SupportsItems[str, dict[str, Any]] | None = None,
98: (4) keywords: Mapping[str, _Keyword] = DEFAULT_KEYWORDS,
99: (4) comment_tags: Collection[str] = (),
100: (4) callback: Callable[[str, str, dict[str, Any]], object] | None = None,
101: (4) strip_comment_tags: bool = False,
102: (4) directory_filter: Callable[[str], bool] | None = None,
103: (0) ) -> Generator[_FileExtractionResult, None, None]:
104: (4) """Extract messages from any source files found in the given directory.
105: (4) This function generates tuples of the form ``(filename, lineno, message,
106: (4) comments, context)``.
107: (4) Which extraction method is used per file is determined by the `method_map`
108: (4) parameter, which maps extended glob patterns to extraction method names.
109: (4) For example, the following is the default mapping:
110: (4) >>> method_map = [
111: (4) ... ('**.py', 'python')
112: (4) ... ]
113: (4) This basically says that files with the filename extension ".py" at any
114: (4) level inside the directory should be processed by the "python" extraction
115: (4) method. Files that don't match any of the mapping patterns are ignored. See
116: (4) the documentation of the `pathmatch` function for details on the pattern
117: (4) syntax.
118: (4) The following extended mapping would also use the "genshi" extraction
119: (4) method on any file in "templates" subdirectory:
120: (4) >>> method_map = [
121: (4) ... ('**/templates/**.*', 'genshi'),
122: (4) ... ('**.py', 'python')
123: (4) ... ]
124: (4) The dictionary provided by the optional `options_map` parameter augments
125: (4) these mappings. It uses extended glob patterns as keys, and the values are
126: (4) dictionaries mapping options names to option values (both strings).
127: (4) The glob patterns of the `options_map` do not necessarily need to be the
128: (4) same as those used in the method mapping. For example, while all files in
129: (4) the ``templates`` folders in an application may be Genshi applications, the
130: (4) options for those files may differ based on extension:
131: (4) >>> options_map = {
132: (4) ... '**/templates/**.txt': {
133: (4) ... 'template_class': 'genshi.template:TextTemplate',
134: (4) ... 'encoding': 'latin-1'
135: (4) ... },
136: (4) ... '**/templates/**.html': {
137: (4) ... 'include_attrs': ''
138: (4) ... }
139: (4) ... }
140: (4) :param dirname: the path to the directory to extract messages from. If
141: (20) not given the current working directory is used.
142: (4) :param method_map: a list of ``(pattern, method)`` tuples that maps of
143: (23) extraction method names to extended glob patterns
144: (4) :param options_map: a dictionary of additional options (optional)
145: (4) :param keywords: a dictionary mapping keywords (i.e. names of functions
146: (21) that should be recognized as translation functions) to
147: (21) tuples that specify which of their arguments contain
148: (21) localizable strings
149: (4) :param comment_tags: a list of tags of translator comments to search for
150: (25) and include in the results
151: (4) :param callback: a function that is called for every file that message are
152: (21) extracted from, just before the extraction itself is
153: (21) performed; the function is passed the filename, the name
154: (21) of the extraction method and and the options dictionary as
155: (21) positional arguments, in that order
156: (4) :param strip_comment_tags: a flag that if set to `True` causes all comment
157: (31) tags to be removed from the collected comments.
158: (4) :param directory_filter: a callback to determine whether a directory should
159: (29) be recursed into. Receives the full directory path;
160: (29) should return True if the directory is valid.
161: (4) :see: `pathmatch`
162: (4) """
163: (4) if dirname is None:
164: (8) dirname = os.getcwd()
165: (4) if options_map is None:
166: (8) options_map = {}
167: (4) if directory_filter is None:
168: (8) directory_filter = default_directory_filter
169: (4) absname = os.path.abspath(dirname)
170: (4) for root, dirnames, filenames in os.walk(absname):
171: (8) dirnames[:] = [
172: (12) subdir for subdir in dirnames
173: (12) if directory_filter(os.path.join(root, subdir))
174: (8) ]
175: (8) dirnames.sort()
176: (8) filenames.sort()
177: (8) for filename in filenames:
178: (12) filepath = os.path.join(root, filename).replace(os.sep, '/')
179: (12) yield from check_and_call_extract_file(
180: (16) filepath,
181: (16) method_map,
182: (16) options_map,
183: (16) callback,
184: (16) keywords,
185: (16) comment_tags,
186: (16) strip_comment_tags,
187: (16) dirpath=absname,
188: (12) )
189: (0) def check_and_call_extract_file(
190: (4) filepath: str | os.PathLike[str],
191: (4) method_map: Iterable[tuple[str, str]],
192: (4) options_map: SupportsItems[str, dict[str, Any]],
193: (4) callback: Callable[[str, str, dict[str, Any]], object] | None,
194: (4) keywords: Mapping[str, _Keyword],
195: (4) comment_tags: Collection[str],
196: (4) strip_comment_tags: bool,
197: (4) dirpath: str | os.PathLike[str] | None = None,
198: (0) ) -> Generator[_FileExtractionResult, None, None]:
199: (4) """Checks if the given file matches an extraction method mapping, and if so, calls extract_from_file.
200: (4) Note that the extraction method mappings are based relative to dirpath.
201: (4) So, given an absolute path to a file `filepath`, we want to check using
202: (4) just the relative path from `dirpath` to `filepath`.
203: (4) Yields 5-tuples (filename, lineno, messages, comments, context).
204: (4) :param filepath: An absolute path to a file that exists.
205: (4) :param method_map: a list of ``(pattern, method)`` tuples that maps of
206: (23) extraction method names to extended glob patterns
207: (4) :param options_map: a dictionary of additional options (optional)
208: (4) :param callback: a function that is called for every file that message are
209: (21) extracted from, just before the extraction itself is
210: (21) performed; the function is passed the filename, the name
211: (21) of the extraction method and and the options dictionary as
212: (21) positional arguments, in that order
213: (4) :param keywords: a dictionary mapping keywords (i.e. names of functions
214: (21) that should be recognized as translation functions) to
215: (21) tuples that specify which of their arguments contain
216: (21) localizable strings
217: (4) :param comment_tags: a list of tags of translator comments to search for
218: (25) and include in the results
219: (4) :param strip_comment_tags: a flag that if set to `True` causes all comment
220: (31) tags to be removed from the collected comments.
221: (4) :param dirpath: the path to the directory to extract messages from.
222: (4) :return: iterable of 5-tuples (filename, lineno, messages, comments, context)
223: (4) :rtype: Iterable[tuple[str, int, str|tuple[str], list[str], str|None]
224: (4) """
225: (4) # filename is the relative path from dirpath to the actual file
226: (4) filename = relpath(filepath, dirpath)
227: (4) for pattern, method in method_map:
228: (8) if not pathmatch(pattern, filename):
229: (12) continue
230: (8) options = {}
231: (8) for opattern, odict in options_map.items():
232: (12) if pathmatch(opattern, filename):
233: (16) options = odict
234: (8) if callback:
235: (12) callback(filename, method, options)
236: (8) for message_tuple in extract_from_file(
237: (12) method, filepath,
238: (12) keywords=keywords,
239: (12) comment_tags=comment_tags,
240: (12) options=options,
241: (12) strip_comment_tags=strip_comment_tags,
242: (8) ):
243: (12) yield (filename, *message_tuple)
244: (8) break
245: (0) def extract_from_file(
246: (4) method: _ExtractionMethod,
247: (4) filename: str | os.PathLike[str],
248: (4) keywords: Mapping[str, _Keyword] = DEFAULT_KEYWORDS,
249: (4) comment_tags: Collection[str] = (),
250: (4) options: Mapping[str, Any] | None = None,
251: (4) strip_comment_tags: bool = False,
252: (0) ) -> list[_ExtractionResult]:
253: (4) """Extract messages from a specific file.
254: (4) This function returns a list of tuples of the form ``(lineno, message, comments, context)``.
255: (4) :param filename: the path to the file to extract messages from
256: (4) :param method: a string specifying the extraction method (.e.g. "python")
257: (4) :param keywords: a dictionary mapping keywords (i.e. names of functions
258: (21) that should be recognized as translation functions) to
259: (21) tuples that specify which of their arguments contain
260: (21) localizable strings
261: (4) :param comment_tags: a list of translator tags to search for and include
262: (25) in the results
263: (4) :param strip_comment_tags: a flag that if set to `True` causes all comment
264: (31) tags to be removed from the collected comments.
265: (4) :param options: a dictionary of additional options (optional)
266: (4) :returns: list of tuples of the form ``(lineno, message, comments, context)``
267: (4) :rtype: list[tuple[int, str|tuple[str], list[str], str|None]
268: (4) """
269: (4) if method == 'ignore':
270: (8) return []
271: (4) with open(filename, 'rb') as fileobj:
272: (8) return list(extract(method, fileobj, keywords, comment_tags,
273: (28) options, strip_comment_tags))
274: (0) def _match_messages_against_spec(lineno: int, messages: list[str|None], comments: list[str],
275: (33) fileobj: _FileObj, spec: tuple[int|tuple[int, str], ...]):
276: (4) translatable = []
277: (4) context = None
278: (4) # last_index is 1 based like the keyword spec
279: (4) last_index = len(messages)
280: (4) for index in spec:
281: (8) if isinstance(index, tuple): # (n, 'c')
282: (12) context = messages[index[0] - 1]
283: (12) continue
284: (8) if last_index < index:
285: (12) # Not enough arguments
286: (12) return
287: (8) message = messages[index - 1]
288: (8) if message is None:
289: (12) return
290: (8) translatable.append(message)
291: (4) # keyword spec indexes are 1 based, therefore '-1'
292: (4) if isinstance(spec[0], tuple):
293: (8) # context-aware *gettext method
294: (8) first_msg_index = spec[1] - 1
295: (4) else:
296: (8) first_msg_index = spec[0] - 1
297: (4) # An empty string msgid isn't valid, emit a warning
298: (4) if not messages[first_msg_index]:
299: (8) filename = (getattr(fileobj, "name", None) or "(unknown)")
300: (8) sys.stderr.write(
301: (12) f"{filename}:{lineno}: warning: Empty msgid. It is reserved by GNU gettext: gettext(\"\") "
302: (12) f"returns the header entry with meta information, not the empty string.\n",
303: (8) )
304: (8) return
305: (4) translatable = tuple(translatable)
306: (4) if len(translatable) == 1:
307: (8) translatable = translatable[0]
308: (4) return lineno, translatable, comments, context
309: (0) @lru_cache(maxsize=None)
310: (0) def _find_extractor(name: str):
311: (4) for ep_name, load in find_entrypoints(GROUP_NAME):
312: (8) if ep_name == name:
313: (12) return load()
314: (4) return None
315: (0) def extract(
316: (4) method: _ExtractionMethod,
317: (4) fileobj: _FileObj,
318: (4) keywords: Mapping[str, _Keyword] = DEFAULT_KEYWORDS,
319: (4) comment_tags: Collection[str] = (),
320: (4) options: Mapping[str, Any] | None = None,
321: (4) strip_comment_tags: bool = False,
322: (0) ) -> Generator[_ExtractionResult, None, None]:
323: (4) """Extract messages from the given file-like object using the specified
324: (4) extraction method.
325: (4) This function returns tuples of the form ``(lineno, message, comments, context)``.
326: (4) The implementation dispatches the actual extraction to plugins, based on the
327: (4) value of the ``method`` parameter.
328: (4) >>> source = b'''# foo module
329: (4) ... def run(argv):
330: (4) ... print(_('Hello, world!'))
331: (4) ... '''
332: (4) >>> from io import BytesIO
333: (4) >>> for message in extract('python', BytesIO(source)):
334: (4) ... print(message)
335: (4) (3, u'Hello, world!', [], None)
336: (4) :param method: an extraction method (a callable), or
337: (19) a string specifying the extraction method (.e.g. "python");
338: (19) if this is a simple name, the extraction function will be
339: (19) looked up by entry point; if it is an explicit reference
340: (19) to a function (of the form ``package.module:funcname`` or
341: (19) ``package.module.funcname``), the corresponding function
342: (19) will be imported and used
343: (4) :param fileobj: the file-like object the messages should be extracted from
344: (4) :param keywords: a dictionary mapping keywords (i.e. names of functions
345: (21) that should be recognized as translation functions) to
346: (21) tuples that specify which of their arguments contain
347: (21) localizable strings
348: (4) :param comment_tags: a list of translator tags to search for and include
349: (25) in the results
350: (4) :param options: a dictionary of additional options (optional)
351: (4) :param strip_comment_tags: a flag that if set to `True` causes all comment
352: (31) tags to be removed from the collected comments.
353: (4) :raise ValueError: if the extraction method is not registered
354: (4) :returns: iterable of tuples of the form ``(lineno, message, comments, context)``
355: (4) :rtype: Iterable[tuple[int, str|tuple[str], list[str], str|None]
356: (4) """
357: (4) func = None
358: (4) if callable(method):
359: (8) func = method
360: (4) elif ':' in method or '.' in method:
361: (8) if ':' not in method:
362: (12) lastdot = method.rfind('.')
363: (12) module, attrname = method[:lastdot], method[lastdot + 1:]
364: (8) else:
365: (12) module, attrname = method.split(':', 1)
366: (8) func = getattr(__import__(module, {}, {}, [attrname]), attrname)
367: (4) else:
368: (8) func = _find_extractor(method)
369: (8) if func is None:
370: (12) # if no named entry point was found,
371: (12) # we resort to looking up a builtin extractor
372: (12) func = _BUILTIN_EXTRACTORS.get(method)
373: (4) if func is None:
374: (8) raise ValueError(f"Unknown extraction method {method!r}")
375: (4) results = func(fileobj, keywords.keys(), comment_tags,
376: (19) options=options or {})
377: (4) for lineno, funcname, messages, comments in results:
378: (8) if not isinstance(messages, (list, tuple)):
379: (12) messages = [messages]
380: (8) if not messages:
381: (12) continue
382: (8) specs = keywords[funcname] or None if funcname else None
383: (8) # {None: x} may be collapsed into x for backwards compatibility.
384: (8) if not isinstance(specs, dict):
385: (12) specs = {None: specs}
386: (8) if strip_comment_tags:
387: (12) _strip_comment_tags(comments, comment_tags)
388: (8) # None matches all arities.
389: (8) for arity in (None, len(messages)):
390: (12) try:
391: (16) spec = specs[arity]
392: (12) except KeyError:
393: (16) continue
394: (12) if spec is None:
395: (16) spec = (1,)
396: (12) result = _match_messages_against_spec(lineno, messages, comments, fileobj, spec)
397: (12) if result is not None:
398: (16) yield result
399: (0) def extract_nothing(
400: (4) fileobj: _FileObj,
401: (4) keywords: Mapping[str, _Keyword],
402: (4) comment_tags: Collection[str],
403: (4) options: Mapping[str, Any],
404: (0) ) -> list[_ExtractionResult]:
405: (4) """Pseudo extractor that does not actually extract anything, but simply
406: (4) returns an empty list.
407: (4) """
408: (4) return []
409: (0) def extract_python(
410: (4) fileobj: IO[bytes],
411: (4) keywords: Mapping[str, _Keyword],
412: (4) comment_tags: Collection[str],
413: (4) options: _PyOptions,
414: (0) ) -> Generator[_ExtractionResult, None, None]:
415: (4) """Extract messages from Python source code.
416: (4) It returns an iterator yielding tuples in the following form ``(lineno,
417: (4) funcname, message, comments)``.
418: (4) :param fileobj: the seekable, file-like object the messages should be
419: (20) extracted from
420: (4) :param keywords: a list of keywords (i.e. function names) that should be
421: (21) recognized as translation functions
422: (4) :param comment_tags: a list of translator tags to search for and include
423: (25) in the results
424: (4) :param options: a dictionary of additional options (optional)
425: (4) :rtype: ``iterator``
426: (4) """
427: (4) funcname = lineno = message_lineno = None
428: (4) call_stack = -1
429: (4) buf = []
430: (4) messages = []
431: (4) translator_comments = []
432: (4) in_def = in_translator_comments = False
433: (4) comment_tag = None
434: (4) encoding = parse_encoding(fileobj) or options.get('encoding', 'UTF-8')
435: (4) future_flags = parse_future_flags(fileobj, encoding)
436: (4) next_line = lambda: fileobj.readline().decode(encoding)
437: (4) tokens = generate_tokens(next_line)
438: (4) # Current prefix of a Python 3.12 (PEP 701) f-string, or None if we're not
439: (4) # currently parsing one.
440: (4) current_fstring_start = None
441: (4) for tok, value, (lineno, _), _, _ in tokens:
442: (8) if call_stack == -1 and tok == NAME and value in ('def', 'class'):
443: (12) in_def = True
444: (8) elif tok == OP and value == '(':
445: (12) if in_def:
446: (16) # Avoid false positives for declarations such as:
447: (16) # def gettext(arg='message'):
448: (16) in_def = False
449: (16) continue
450: (12) if funcname:
451: (16) message_lineno = lineno
452: (16) call_stack += 1
453: (8) elif in_def and tok == OP and value == ':':
454: (12) # End of a class definition without parens
455: (12) in_def = False
456: (12) continue
457: (8) elif call_stack == -1 and tok == COMMENT:
458: (12) # Strip the comment token from the line
459: (12) value = value[1:].strip()
460: (12) if in_translator_comments and \
461: (20) translator_comments[-1][0] == lineno - 1:
462: (16) # We're already inside a translator comment, continue appending
463: (16) translator_comments.append((lineno, value))
464: (16) continue
465: (12) # If execution reaches this point, let's see if comment line
466: (12) # starts with one of the comment tags
467: (12) for comment_tag in comment_tags:
468: (16) if value.startswith(comment_tag):
469: (20) in_translator_comments = True
470: (20) translator_comments.append((lineno, value))
471: (20) break
472: (8) elif funcname and call_stack == 0:
473: (12) nested = (tok == NAME and value in keywords)
474: (12) if (tok == OP and value == ')') or nested:
475: (16) if buf:
476: (20) messages.append(''.join(buf))
477: (20) del buf[:]
478: (16) else:
479: (20) messages.append(None)
480: (16) messages = tuple(messages) if len(messages) > 1 else messages[0]
481: (16) # Comments don't apply unless they immediately
482: (16) # precede the message
483: (16) if translator_comments and \
484: (24) translator_comments[-1][0] < message_lineno - 1:
485: (20) translator_comments = []
486: (16) yield (message_lineno, funcname, messages,
487: (23) [comment[1] for comment in translator_comments])
488: (16) funcname = lineno = message_lineno = None
489: (16) call_stack = -1
490: (16) messages = []
491: (16) translator_comments = []
492: (16) in_translator_comments = False
493: (16) if nested:
494: (20) funcname = value
495: (12) elif tok == STRING:
496: (16) val = _parse_python_string(value, encoding, future_flags)
497: (16) if val is not None:
498: (20) buf.append(val)
499: (12) # Python 3.12+, see https://peps.python.org/pep-0701/#new-tokens
500: (12) elif tok == FSTRING_START:
501: (16) current_fstring_start = value
502: (12) elif tok == FSTRING_MIDDLE:
503: (16) if current_fstring_start is not None:
504: (20) current_fstring_start += value
505: (12) elif tok == FSTRING_END:
506: (16) if current_fstring_start is not None:
507: (20) fstring = current_fstring_start + value
508: (20) val = _parse_python_string(fstring, encoding, future_flags)
509: (20) if val is not None:
510: (24) buf.append(val)
511: (12) elif tok == OP and value == ',':
512: (16) if buf:
513: (20) messages.append(''.join(buf))
514: (20) del buf[:]
515: (16) else:
516: (20) messages.append(None)
517: (16) if translator_comments:
518: (20) # We have translator comments, and since we're on a
519: (20) # comma(,) user is allowed to break into a new line
520: (20) # Let's increase the last comment's lineno in order
521: (20) # for the comment to still be a valid one
522: (20) old_lineno, old_comment = translator_comments.pop()
523: (20) translator_comments.append((old_lineno + 1, old_comment))
524: (8) elif call_stack > 0 and tok == OP and value == ')':
525: (12) call_stack -= 1
526: (8) elif funcname and call_stack == -1:
527: (12) funcname = None
528: (8) elif tok == NAME and value in keywords:
529: (12) funcname = value
530: (8) if (current_fstring_start is not None
531: (12) and tok not in {FSTRING_START, FSTRING_MIDDLE}
532: (8) ):
533: (12) # In Python 3.12, tokens other than FSTRING_* mean the
534: (12) # f-string is dynamic, so we don't wan't to extract it.
535: (12) # And if it's FSTRING_END, we've already handled it above.
536: (12) # Let's forget that we're in an f-string.
537: (12) current_fstring_start = None
538: (0) def _parse_python_string(value: str, encoding: str, future_flags: int) -> str | None:
539: (4) # Unwrap quotes in a safe manner, maintaining the string's encoding
540: (4) # https://sourceforge.net/tracker/?func=detail&atid=355470&aid=617979&group_id=5470
541: (4) code = compile(
542: (8) f'# coding={str(encoding)}\n{value}',
543: (8) '<string>',
544: (8) 'eval',
545: (8) ast.PyCF_ONLY_AST | future_flags,
546: (4) )
547: (4) if isinstance(code, ast.Expression):
548: (8) body = code.body
549: (8) if isinstance(body, ast.Constant):
550: (12) return body.value
551: (8) if isinstance(body, ast.JoinedStr): # f-string
552: (12) if all(isinstance(node, ast.Constant) for node in body.values):
553: (16) return ''.join(node.value for node in body.values)
554: (12) # TODO: we could raise an error or warning when not all nodes are constants
555: (4) return None
556: (0) def extract_javascript(
557: (4) fileobj: _FileObj,
558: (4) keywords: Mapping[str, _Keyword],
559: (4) comment_tags: Collection[str],
560: (4) options: _JSOptions,
561: (4) lineno: int = 1,
562: (0) ) -> Generator[_ExtractionResult, None, None]:
563: (4) """Extract messages from JavaScript source code.
564: (4) :param fileobj: the seekable, file-like object the messages should be
565: (20) extracted from
566: (4) :param keywords: a list of keywords (i.e. function names) that should be
567: (21) recognized as translation functions
568: (4) :param comment_tags: a list of translator tags to search for and include
569: (25) in the results
570: (4) :param options: a dictionary of additional options (optional)
571: (20) Supported options are:
572: (20) * `jsx` -- set to false to disable JSX/E4X support.
573: (20) * `template_string` -- if `True`, supports gettext(`key`)
574: (20) * `parse_template_string` -- if `True` will parse the
575: (49) contents of javascript
576: (49) template strings.
577: (4) :param lineno: line number offset (for parsing embedded fragments)
578: (4) """
579: (4) from babel.messages.jslexer import Token, tokenize, unquote_string
580: (4) funcname = message_lineno = None
581: (4) messages = []
582: (4) last_argument = None
583: (4) translator_comments = []
584: (4) concatenate_next = False
585: (4) encoding = options.get('encoding', 'utf-8')
586: (4) last_token = None
587: (4) call_stack = -1
588: (4) dotted = any('.' in kw for kw in keywords)
589: (4) for token in tokenize(
590: (8) fileobj.read().decode(encoding),
591: (8) jsx=options.get("jsx", True),
592: (8) template_string=options.get("template_string", True),
593: (8) dotted=dotted,
594: (8) lineno=lineno,
595: (4) ):
596: (8) if ( # Turn keyword`foo` expressions into keyword("foo") calls:
597: (12) funcname and # have a keyword...
598: (12) (last_token and last_token.type == 'name') and # we've seen nothing after the keyword...
599: (12) token.type == 'template_string' # this is a template string
600: (8) ):
601: (12) message_lineno = token.lineno
602: (12) messages = [unquote_string(token.value)]
603: (12) call_stack = 0
604: (12) token = Token('operator', ')', token.lineno)
605: (8) if options.get('parse_template_string') and not funcname and token.type == 'template_string':
606: (12) yield from parse_template_string(token.value, keywords, comment_tags, options, token.lineno)
607: (8) elif token.type == 'operator' and token.value == '(':
608: (12) if funcname:
609: (16) message_lineno = token.lineno
610: (16) call_stack += 1
611: (8) elif call_stack == -1 and token.type == 'linecomment':
612: (12) value = token.value[2:].strip()
613: (12) if translator_comments and \
614: (15) translator_comments[-1][0] == token.lineno - 1:
615: (16) translator_comments.append((token.lineno, value))
616: (16) continue
617: (12) for comment_tag in comment_tags:
618: (16) if value.startswith(comment_tag):
619: (20) translator_comments.append((token.lineno, value.strip()))
620: (20) break
621: (8) elif token.type == 'multilinecomment':
622: (12) # only one multi-line comment may precede a translation
623: (12) translator_comments = []
624: (12) value = token.value[2:-2].strip()
625: (12) for comment_tag in comment_tags:
626: (16) if value.startswith(comment_tag):
627: (20) lines = value.splitlines()
628: (20) if lines:
629: (24) lines[0] = lines[0].strip()
630: (24) lines[1:] = dedent('\n'.join(lines[1:])).splitlines()
631: (24) for offset, line in enumerate(lines):
632: (28) translator_comments.append((token.lineno + offset,
633: (56) line))
634: (20) break
635: (8) elif funcname and call_stack == 0:
636: (12) if token.type == 'operator' and token.value == ')':
637: (16) if last_argument is not None:
638: (20) messages.append(last_argument)
639: (16) if len(messages) > 1:
640: (20) messages = tuple(messages)
641: (16) elif messages:
642: (20) messages = messages[0]
643: (16) else:
644: (20) messages = None
645: (16) # Comments don't apply unless they immediately precede the
646: (16) # message
647: (16) if translator_comments and \
648: (19) translator_comments[-1][0] < message_lineno - 1:
649: (20) translator_comments = []
650: (16) if messages is not None:
651: (20) yield (message_lineno, funcname, messages,
652: (27) [comment[1] for comment in translator_comments])
653: (16) funcname = message_lineno = last_argument = None
654: (16) concatenate_next = False
655: (16) translator_comments = []
656: (16) messages = []
657: (16) call_stack = -1
658: (12) elif token.type in ('string', 'template_string'):
659: (16) new_value = unquote_string(token.value)
660: (16) if concatenate_next:
661: (20) last_argument = (last_argument or '') + new_value
662: (20) concatenate_next = False
663: (16) else:
664: (20) last_argument = new_value
665: (12) elif token.type == 'operator':
666: (16) if token.value == ',':
667: (20) if last_argument is not None:
668: (24) messages.append(last_argument)
669: (24) last_argument = None
670: (20) else:
671: (24) messages.append(None)
672: (20) concatenate_next = False
673: (16) elif token.value == '+':
674: (20) concatenate_next = True
675: (8) elif call_stack > 0 and token.type == 'operator' \
676: (16) and token.value == ')':
677: (12) call_stack -= 1
678: (8) elif funcname and call_stack == -1:
679: (12) funcname = None
680: (8) elif call_stack == -1 and token.type == 'name' and \
681: (12) token.value in keywords and \
682: (12) (last_token is None or last_token.type != 'name' or
683: (13) last_token.value != 'function'):
684: (12) funcname = token.value
685: (8) last_token = token
686: (0) def parse_template_string(
687: (4) template_string: str,
688: (4) keywords: Mapping[str, _Keyword],
689: (4) comment_tags: Collection[str],
690: (4) options: _JSOptions,
691: (4) lineno: int = 1,
692: (0) ) -> Generator[_ExtractionResult, None, None]:
693: (4) """Parse JavaScript template string.
694: (4) :param template_string: the template string to be parsed
695: (4) :param keywords: a list of keywords (i.e. function names) that should be
696: (21) recognized as translation functions
697: (4) :param comment_tags: a list of translator tags to search for and include
698: (25) in the results
699: (4) :param options: a dictionary of additional options (optional)
700: (4) :param lineno: starting line number (optional)
701: (4) """
702: (4) from babel.messages.jslexer import line_re
703: (4) prev_character = None
704: (4) level = 0
705: (4) inside_str = False
706: (4) expression_contents = ''
707: (4) for character in template_string[1:-1]:
708: (8) if not inside_str and character in ('"', "'", '`'):
709: (12) inside_str = character
710: (8) elif inside_str == character and prev_character != r'\\':
711: (12) inside_str = False
712: (8) if level:
713: (12) expression_contents += character
714: (8) if not inside_str:
715: (12) if character == '{' and prev_character == '$':
716: (16) level += 1
717: (12) elif level and character == '}':
718: (16) level -= 1
719: (16) if level == 0 and expression_contents:
720: (20) expression_contents = expression_contents[0:-1]
721: (20) fake_file_obj = io.BytesIO(expression_contents.encode())
722: (20) yield from extract_javascript(fake_file_obj, keywords, comment_tags, options, lineno)
723: (20) lineno += len(line_re.findall(expression_contents))
724: (20) expression_contents = ''
725: (8) prev_character = character
726: (0) _BUILTIN_EXTRACTORS = {
727: (4) 'ignore': extract_nothing,
728: (4) 'python': extract_python,
729: (4) 'javascript': extract_javascript,
730: (0) }
----------------------------------------
File 21 - .\messages \jslexer.py:
1: (0) """
2: (4) babel.messages.jslexer
3: (4) ~~~~~~~~~~~~~~~~~~~~~~
4: (4) A simple JavaScript 1.5 lexer which is used for the JavaScript
5: (4) extractor.
6: (4) :copyright: (c) 2013-2024 by the Babel Team.
7: (4) :license: BSD, see LICENSE for more details.
8: (0) """
9: (0) from __future__ import annotations
10: (0) import re
11: (0) from collections.abc import Generator
12: (0) from typing import NamedTuple
13: (0) operators: list[str] = sorted([
14: (4) '+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=',
15: (4) '+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=',
16: (4) '>>>=', '&', '&=', '|', '|=', '&&', '||', '^', '^=', '(', ')',
17: (4) '[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':',
18: (0) ], key=len, reverse=True)
19: (0) escapes: dict[str, str] = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'}
20: (0) name_re = re.compile(r'[\w$_][\w\d$_]*', re.UNICODE)
21: (0) dotted_name_re = re.compile(r'[\w$_][\w\d$_.]*[\w\d$_.]', re.UNICODE)
22: (0) division_re = re.compile(r'/=?')
23: (0) regex_re = re.compile(r'/(?:[^/\\]*(?:\\.[^/\\]*)*)/[a-zA-Z]*', re.DOTALL)
24: (0) line_re = re.compile(r'(\r\n|\n|\r)')
25: (0) line_join_re = re.compile(r'\\' + line_re.pattern)
26: (0) uni_escape_re = re.compile(r'[a-fA-F0-9]{1,4}')
27: (0) hex_escape_re = re.compile(r'[a-fA-F0-9]{1,2}')
28: (0) class Token(NamedTuple):
29: (4) type: str
30: (4) value: str
31: (4) lineno: int
32: (0) _rules: list[tuple[str | None, re.Pattern[str]]] = [
33: (4) (None, re.compile(r'\s+', re.UNICODE)),
34: (4) (None, re.compile(r'<!--.*')),
35: (4) ('linecomment', re.compile(r'//.*')),
36: (4) ('multilinecomment', re.compile(r'/\*.*?\*/', re.UNICODE | re.DOTALL)),
37: (4) ('dotted_name', dotted_name_re),
38: (4) ('name', name_re),
39: (4) ('number', re.compile(r'''(
40: (8) (?:0|[1-9]\d*)
41: (8) (\.\d+)?
42: (8) ([eE][-+]?\d+)? |
43: (8) (0x[a-fA-F0-9]+)
44: (4) )''', re.VERBOSE)),
45: (4) ('jsx_tag', re.compile(r'(?:</?[^>\s]+|/>)', re.I)), # May be mangled in `get_rules`
46: (4) ('operator', re.compile(r'(%s)' % '|'.join(map(re.escape, operators)))),
47: (4) ('template_string', re.compile(r'''`(?:[^`\\]*(?:\\.[^`\\]*)*)`''', re.UNICODE)),
48: (4) ('string', re.compile(r'''(
49: (8) '(?:[^'\\]*(?:\\.[^'\\]*)*)' |
50: (8) "(?:[^"\\]*(?:\\.[^"\\]*)*)"
51: (4) )''', re.VERBOSE | re.DOTALL)),
52: (0) ]
53: (0) def get_rules(jsx: bool, dotted: bool, template_string: bool) -> list[tuple[str | None, re.Pattern[str]]]:
54: (4) """
55: (4) Get a tokenization rule list given the passed syntax options.
56: (4) Internal to this module.
57: (4) """
58: (4) rules = []
59: (4) for token_type, rule in _rules:
60: (8) if not jsx and token_type and 'jsx' in token_type:
61: (12) continue
62: (8) if not template_string and token_type == 'template_string':
63: (12) continue
64: (8) if token_type == 'dotted_name':
65: (12) if not dotted:
66: (16) continue
67: (12) token_type = 'name'
68: (8) rules.append((token_type, rule))
69: (4) return rules
70: (0) def indicates_division(token: Token) -> bool:
71: (4) """A helper function that helps the tokenizer to decide if the current
72: (4) token may be followed by a division operator.
73: (4) """
74: (4) if token.type == 'operator':
75: (8) return token.value in (')', ']', '}', '++', '--')
76: (4) return token.type in ('name', 'number', 'string', 'regexp')
77: (0) def unquote_string(string: str) -> str:
78: (4) """Unquote a string with JavaScript rules. The string has to start with
79: (4) string delimiters (``'``, ``"`` or the back-tick/grave accent (for template strings).)
80: (4) """
81: (4) assert string and string[0] == string[-1] and string[0] in '"\'`', \
82: (8) 'string provided is not properly delimited'
83: (4) string = line_join_re.sub('\\1', string[1:-1])
84: (4) result: list[str] = []
85: (4) add = result.append
86: (4) pos = 0
87: (4) while True:
88: (8) # scan for the next escape
89: (8) escape_pos = string.find('\\', pos)
90: (8) if escape_pos < 0:
91: (12) break
92: (8) add(string[pos:escape_pos])
93: (8) # check which character is escaped
94: (8) next_char = string[escape_pos + 1]
95: (8) if next_char in escapes:
96: (12) add(escapes[next_char])
97: (8) # unicode escapes. trie to consume up to four characters of
98: (8) # hexadecimal characters and try to interpret them as unicode
99: (8) # character point. If there is no such character point, put
100: (8) # all the consumed characters into the string.
101: (8) elif next_char in 'uU':
102: (12) escaped = uni_escape_re.match(string, escape_pos + 2)
103: (12) if escaped is not None:
104: (16) escaped_value = escaped.group()
105: (16) if len(escaped_value) == 4:
106: (20) try:
107: (24) add(chr(int(escaped_value, 16)))
108: (20) except ValueError:
109: (24) pass
110: (20) else:
111: (24) pos = escape_pos + 6
112: (24) continue
113: (16) add(next_char + escaped_value)
114: (16) pos = escaped.end()
115: (16) continue
116: (12) else:
117: (16) add(next_char)
118: (8) # hex escapes. conversion from 2-digits hex to char is infallible
119: (8) elif next_char in 'xX':
120: (12) escaped = hex_escape_re.match(string, escape_pos + 2)
121: (12) if escaped is not None:
122: (16) escaped_value = escaped.group()
123: (16) add(chr(int(escaped_value, 16)))
124: (16) pos = escape_pos + 2 + len(escaped_value)
125: (16) continue
126: (12) else:
127: (16) add(next_char)
128: (8) # bogus escape. Just remove the backslash.
129: (8) else:
130: (12) add(next_char)
131: (8) pos = escape_pos + 2
132: (4) if pos < len(string):
133: (8) add(string[pos:])
134: (4) return ''.join(result)
135: (0) def tokenize(source: str, jsx: bool = True, dotted: bool = True, template_string: bool = True, lineno: int = 1) -> Generator[Token, None, None]:
136: (4) """
137: (4) Tokenize JavaScript/JSX source. Returns a generator of tokens.
138: (4) :param jsx: Enable (limited) JSX parsing.
139: (4) :param dotted: Read dotted names as single name token.
140: (4) :param template_string: Support ES6 template strings
141: (4) :param lineno: starting line number (optional)
142: (4) """
143: (4) may_divide = False
144: (4) pos = 0
145: (4) end = len(source)
146: (4) rules = get_rules(jsx=jsx, dotted=dotted, template_string=template_string)
147: (4) while pos < end:
148: (8) # handle regular rules first
149: (8) for token_type, rule in rules: # noqa: B007
150: (12) match = rule.match(source, pos)
151: (12) if match is not None:
152: (16) break
153: (8) # if we don't have a match we don't give up yet, but check for
154: (8) # division operators or regular expression literals, based on
155: (8) # the status of `may_divide` which is determined by the last
156: (8) # processed non-whitespace token using `indicates_division`.
157: (8) else:
158: (12) if may_divide:
159: (16) match = division_re.match(source, pos)
160: (16) token_type = 'operator'
161: (12) else:
162: (16) match = regex_re.match(source, pos)
163: (16) token_type = 'regexp'
164: (12) if match is None:
165: (16) # woops. invalid syntax. jump one char ahead and try again.
166: (16) pos += 1
167: (16) continue
168: (8) token_value = match.group()
169: (8) if token_type is not None:
170: (12) token = Token(token_type, token_value, lineno)
171: (12) may_divide = indicates_division(token)
172: (12) yield token
173: (8) lineno += len(line_re.findall(token_value))
174: (8) pos = match.end()
----------------------------------------
File 22 - .\messages \plurals.py:
1: (0) """
2: (4) babel.messages.plurals
3: (4) ~~~~~~~~~~~~~~~~~~~~~~
4: (4) Plural form definitions.
5: (4) :copyright: (c) 2013-2024 by the Babel Team.
6: (4) :license: BSD, see LICENSE for more details.
7: (0) """
8: (0) from __future__ import annotations
9: (0) from operator import itemgetter
10: (0) from babel.core import Locale, default_locale
11: (0) # XXX: remove this file, duplication with babel.plural
12: (0) LC_CTYPE: str | None = default_locale('LC_CTYPE')
13: (0) PLURALS: dict[str, tuple[int, str]] = {
14: (4) # Afar
15: (4) # 'aa': (),
16: (4) # Abkhazian
17: (4) # 'ab': (),
18: (4) # Avestan
19: (4) # 'ae': (),
20: (4) # Afrikaans - From Pootle's PO's
21: (4) 'af': (2, '(n != 1)'),
22: (4) # Akan
23: (4) # 'ak': (),
24: (4) # Amharic
25: (4) # 'am': (),
26: (4) # Aragonese
27: (4) # 'an': (),
28: (4) # Arabic - From Pootle's PO's
29: (4) 'ar': (6, '(n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=0 && n%100<=2 ? 4 : 5)'),
30: (4) # Assamese
31: (4) # 'as': (),
32: (4) # Avaric
33: (4) # 'av': (),
34: (4) # Aymara
35: (4) # 'ay': (),
36: (4) # Azerbaijani
37: (4) # 'az': (),
38: (4) # Bashkir
39: (4) # 'ba': (),
40: (4) # Belarusian
41: (4) 'be': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
42: (4) # Bulgarian - From Pootle's PO's
43: (4) 'bg': (2, '(n != 1)'),
44: (4) # Bihari
45: (4) # 'bh': (),
46: (4) # Bislama
47: (4) # 'bi': (),
48: (4) # Bambara
49: (4) # 'bm': (),
50: (4) # Bengali - From Pootle's PO's
51: (4) 'bn': (2, '(n != 1)'),
52: (4) # Tibetan - as discussed in private with Andrew West
53: (4) 'bo': (1, '0'),
54: (4) # Breton
55: (4) 'br': (
56: (8) 6,
57: (8) '(n==1 ? 0 : n%10==1 && n%100!=11 && n%100!=71 && n%100!=91 ? 1 : n%10==2 && n%100!=12 && n%100!=72 && '
58: (8) 'n%100!=92 ? 2 : (n%10==3 || n%10==4 || n%10==9) && n%100!=13 && n%100!=14 && n%100!=19 && n%100!=73 && '
59: (8) 'n%100!=74 && n%100!=79 && n%100!=93 && n%100!=94 && n%100!=99 ? 3 : n%1000000==0 ? 4 : 5)',
60: (4) ),
61: (4) # Bosnian
62: (4) 'bs': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
63: (4) # Catalan - From Pootle's PO's
64: (4) 'ca': (2, '(n != 1)'),
65: (4) # Chechen
66: (4) # 'ce': (),
67: (4) # Chamorro
68: (4) # 'ch': (),
69: (4) # Corsican
70: (4) # 'co': (),
71: (4) # Cree
72: (4) # 'cr': (),
73: (4) # Czech
74: (4) 'cs': (3, '((n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2)'),
75: (4) # Church Slavic
76: (4) # 'cu': (),
77: (4) # Chuvash
78: (4) 'cv': (1, '0'),
79: (4) # Welsh
80: (4) 'cy': (5, '(n==1 ? 1 : n==2 ? 2 : n==3 ? 3 : n==6 ? 4 : 0)'),
81: (4) # Danish
82: (4) 'da': (2, '(n != 1)'),
83: (4) # German
84: (4) 'de': (2, '(n != 1)'),
85: (4) # Divehi
86: (4) # 'dv': (),
87: (4) # Dzongkha
88: (4) 'dz': (1, '0'),
89: (4) # Greek
90: (4) 'el': (2, '(n != 1)'),
91: (4) # English
92: (4) 'en': (2, '(n != 1)'),
93: (4) # Esperanto
94: (4) 'eo': (2, '(n != 1)'),
95: (4) # Spanish
96: (4) 'es': (2, '(n != 1)'),
97: (4) # Estonian
98: (4) 'et': (2, '(n != 1)'),
99: (4) # Basque - From Pootle's PO's
100: (4) 'eu': (2, '(n != 1)'),
101: (4) # Persian - From Pootle's PO's
102: (4) 'fa': (1, '0'),
103: (4) # Finnish
104: (4) 'fi': (2, '(n != 1)'),
105: (4) # French
106: (4) 'fr': (2, '(n > 1)'),
107: (4) # Friulian - From Pootle's PO's
108: (4) 'fur': (2, '(n > 1)'),
109: (4) # Irish
110: (4) 'ga': (5, '(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && n<=10 ? 3 : 4)'),
111: (4) # Galician - From Pootle's PO's
112: (4) 'gl': (2, '(n != 1)'),
113: (4) # Hausa - From Pootle's PO's
114: (4) 'ha': (2, '(n != 1)'),
115: (4) # Hebrew
116: (4) 'he': (2, '(n != 1)'),
117: (4) # Hindi - From Pootle's PO's
118: (4) 'hi': (2, '(n != 1)'),
119: (4) # Croatian
120: (4) 'hr': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
121: (4) # Hungarian
122: (4) 'hu': (1, '0'),
123: (4) # Armenian - From Pootle's PO's
124: (4) 'hy': (1, '0'),
125: (4) # Icelandic - From Pootle's PO's
126: (4) 'is': (2, '(n%10==1 && n%100!=11 ? 0 : 1)'),
127: (4) # Italian
128: (4) 'it': (2, '(n != 1)'),
129: (4) # Japanese
130: (4) 'ja': (1, '0'),
131: (4) # Georgian - From Pootle's PO's
132: (4) 'ka': (1, '0'),
133: (4) # Kongo - From Pootle's PO's
134: (4) 'kg': (2, '(n != 1)'),
135: (4) # Khmer - From Pootle's PO's
136: (4) 'km': (1, '0'),
137: (4) # Korean
138: (4) 'ko': (1, '0'),
139: (4) # Kurdish - From Pootle's PO's
140: (4) 'ku': (2, '(n != 1)'),
141: (4) # Lao - Another member of the Tai language family, like Thai.
142: (4) 'lo': (1, '0'),
143: (4) # Lithuanian
144: (4) 'lt': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2)'),
145: (4) # Latvian
146: (4) 'lv': (3, '(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)'),
147: (4) # Maltese - From Pootle's PO's
148: (4) 'mt': (4, '(n==1 ? 0 : n==0 || ( n%100>=1 && n%100<=10) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)'),
149: (4) # Norwegian Bokmål
150: (4) 'nb': (2, '(n != 1)'),
151: (4) # Dutch
152: (4) 'nl': (2, '(n != 1)'),
153: (4) # Norwegian Nynorsk
154: (4) 'nn': (2, '(n != 1)'),
155: (4) # Norwegian
156: (4) 'no': (2, '(n != 1)'),
157: (4) # Punjabi - From Pootle's PO's
158: (4) 'pa': (2, '(n != 1)'),
159: (4) # Polish
160: (4) 'pl': (3, '(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
161: (4) # Portuguese
162: (4) 'pt': (2, '(n != 1)'),
163: (4) # Brazilian
164: (4) 'pt_BR': (2, '(n > 1)'),
165: (4) # Romanian - From Pootle's PO's
166: (4) 'ro': (3, '(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2)'),
167: (4) # Russian
168: (4) 'ru': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
169: (4) # Slovak
170: (4) 'sk': (3, '((n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2)'),
171: (4) # Slovenian
172: (4) 'sl': (4, '(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3)'),
173: (4) # Serbian - From Pootle's PO's
174: (4) 'sr': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
175: (4) # Southern Sotho - From Pootle's PO's
176: (4) 'st': (2, '(n != 1)'),
177: (4) # Swedish
178: (4) 'sv': (2, '(n != 1)'),
179: (4) # Thai
180: (4) 'th': (1, '0'),
181: (4) # Turkish
182: (4) 'tr': (1, '0'),
183: (4) # Ukrainian
184: (4) 'uk': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
185: (4) # Venda - From Pootle's PO's
186: (4) 've': (2, '(n != 1)'),
187: (4) # Vietnamese - From Pootle's PO's
188: (4) 'vi': (1, '0'),
189: (4) # Xhosa - From Pootle's PO's
190: (4) 'xh': (2, '(n != 1)'),
191: (4) # Chinese - From Pootle's PO's (modified)
192: (4) 'zh': (1, '0'),
193: (0) }
194: (0) DEFAULT_PLURAL: tuple[int, str] = (2, '(n != 1)')
195: (0) class _PluralTuple(tuple):
196: (4) """A tuple with plural information."""
197: (4) __slots__ = ()
198: (4) num_plurals = property(itemgetter(0), doc="""
199: (4) The number of plurals used by the locale.""")
200: (4) plural_expr = property(itemgetter(1), doc="""
201: (4) The plural expression used by the locale.""")
202: (4) plural_forms = property(lambda x: 'nplurals={}; plural={};'.format(*x), doc="""
203: (4) The plural expression used by the catalog or locale.""")
204: (4) def __str__(self) -> str:
205: (8) return self.plural_forms
206: (0) def get_plural(locale: str | None = LC_CTYPE) -> _PluralTuple:
207: (4) """A tuple with the information catalogs need to perform proper
208: (4) pluralization. The first item of the tuple is the number of plural
209: (4) forms, the second the plural expression.
210: (4) >>> get_plural(locale='en')
211: (4) (2, '(n != 1)')
212: (4) >>> get_plural(locale='ga')
213: (4) (5, '(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && n<=10 ? 3 : 4)')
214: (4) The object returned is a special tuple with additional members:
215: (4) >>> tup = get_plural("ja")
216: (4) >>> tup.num_plurals
217: (4) 1
218: (4) >>> tup.plural_expr
219: (4) '0'
220: (4) >>> tup.plural_forms
221: (4) 'nplurals=1; plural=0;'
222: (4) Converting the tuple into a string prints the plural forms for a
223: (4) gettext catalog:
224: (4) >>> str(tup)
225: (4) 'nplurals=1; plural=0;'
226: (4) """
227: (4) locale = Locale.parse(locale)
228: (4) try:
229: (8) tup = PLURALS[str(locale)]
230: (4) except KeyError:
231: (8) try:
232: (12) tup = PLURALS[locale.language]
233: (8) except KeyError:
234: (12) tup = DEFAULT_PLURAL
235: (4) return _PluralTuple(tup)
----------------------------------------
File 23 - .\messages \_compat.py:
1: (0) import sys
2: (0) from functools import partial
3: (0) def find_entrypoints(group_name: str):
4: (4) """
5: (4) Find entrypoints of a given group using either `importlib.metadata` or the
6: (4) older `pkg_resources` mechanism.
7: (4) Yields tuples of the entrypoint name and a callable function that will
8: (4) load the actual entrypoint.
9: (4) """
10: (4) if sys.version_info >= (3, 10):
11: (8) # "Changed in version 3.10: importlib.metadata is no longer provisional."
12: (8) try:
13: (12) from importlib.metadata import entry_points
14: (8) except ImportError:
15: (12) pass
16: (8) else:
17: (12) eps = entry_points(group=group_name)
18: (12) # Only do this if this implementation of `importlib.metadata` is
19: (12) # modern enough to not return a dict.
20: (12) if not isinstance(eps, dict):
21: (16) for entry_point in eps:
22: (20) yield (entry_point.name, entry_point.load)
23: (16) return
24: (4) try:
25: (8) from pkg_resources import working_set
26: (4) except ImportError:
27: (8) pass
28: (4) else:
29: (8) for entry_point in working_set.iter_entry_points(group_name):
30: (12) yield (entry_point.name, partial(entry_point.load, require=True))
----------------------------------------
File 24 - .\messages \checkers.py:
1: (0) """
2: (4) babel.messages.checkers
3: (4) ~~~~~~~~~~~~~~~~~~~~~~~
4: (4) Various routines that help with validation of translations.
5: (4) :since: version 0.9
6: (4) :copyright: (c) 2013-2024 by the Babel Team.
7: (4) :license: BSD, see LICENSE for more details.
8: (0) """
9: (0) from __future__ import annotations
10: (0) from collections.abc import Callable
11: (0) from babel.messages.catalog import PYTHON_FORMAT, Catalog, Message, TranslationError
12: (0) #: list of format chars that are compatible to each other
13: (0) _string_format_compatibilities = [
14: (4) {'i', 'd', 'u'},
15: (4) {'x', 'X'},
16: (4) {'f', 'F', 'g', 'G'},
17: (0) ]
18: (0) def num_plurals(catalog: Catalog | None, message: Message) -> None:
19: (4) """Verify the number of plurals in the translation."""
20: (4) if not message.pluralizable:
21: (8) if not isinstance(message.string, str):
22: (12) raise TranslationError("Found plural forms for non-pluralizable "
23: (35) "message")
24: (8) return
25: (4) # skip further tests if no catalog is provided.
26: (4) elif catalog is None:
27: (8) return
28: (4) msgstrs = message.string
29: (4) if not isinstance(msgstrs, (list, tuple)):
30: (8) msgstrs = (msgstrs,)
31: (4) if len(msgstrs) != catalog.num_plurals:
32: (8) raise TranslationError("Wrong number of plural forms (expected %d)" %
33: (31) catalog.num_plurals)
34: (0) def python_format(catalog: Catalog | None, message: Message) -> None:
35: (4) """Verify the format string placeholders in the translation."""
36: (4) if 'python-format' not in message.flags:
37: (8) return
38: (4) msgids = message.id
39: (4) if not isinstance(msgids, (list, tuple)):
40: (8) msgids = (msgids,)
41: (4) msgstrs = message.string
42: (4) if not isinstance(msgstrs, (list, tuple)):
43: (8) msgstrs = (msgstrs,)
44: (4) for msgid, msgstr in zip(msgids, msgstrs):
45: (8) if msgstr:
46: (12) _validate_format(msgid, msgstr)
47: (0) def _validate_format(format: str, alternative: str) -> None:
48: (4) """Test format string `alternative` against `format`. `format` can be the
49: (4) msgid of a message and `alternative` one of the `msgstr`\\s. The two
50: (4) arguments are not interchangeable as `alternative` may contain less
51: (4) placeholders if `format` uses named placeholders.
52: (4) The behavior of this function is undefined if the string does not use
53: (4) string formatting.
54: (4) If the string formatting of `alternative` is compatible to `format` the
55: (4) function returns `None`, otherwise a `TranslationError` is raised.
56: (4) Examples for compatible format strings:
57: (4) >>> _validate_format('Hello %s!', 'Hallo %s!')
58: (4) >>> _validate_format('Hello %i!', 'Hallo %d!')
59: (4) Example for an incompatible format strings:
60: (4) >>> _validate_format('Hello %(name)s!', 'Hallo %s!')
61: (4) Traceback (most recent call last):
62: (6) ...
63: (4) TranslationError: the format strings are of different kinds
64: (4) This function is used by the `python_format` checker.
65: (4) :param format: The original format string
66: (4) :param alternative: The alternative format string that should be checked
67: (24) against format
68: (4) :raises TranslationError: on formatting errors
69: (4) """
70: (4) def _parse(string: str) -> list[tuple[str, str]]:
71: (8) result: list[tuple[str, str]] = []
72: (8) for match in PYTHON_FORMAT.finditer(string):
73: (12) name, format, typechar = match.groups()
74: (12) if typechar == '%' and name is None:
75: (16) continue
76: (12) result.append((name, str(typechar)))
77: (8) return result
78: (4) def _compatible(a: str, b: str) -> bool:
79: (8) if a == b:
80: (12) return True
81: (8) for set in _string_format_compatibilities:
82: (12) if a in set and b in set:
83: (16) return True
84: (8) return False
85: (4) def _check_positional(results: list[tuple[str, str]]) -> bool:
86: (8) positional = None
87: (8) for name, _char in results:
88: (12) if positional is None:
89: (16) positional = name is None
90: (12) else:
91: (16) if (name is None) != positional:
92: (20) raise TranslationError('format string mixes positional '
93: (43) 'and named placeholders')
94: (8) return bool(positional)
95: (4) a, b = map(_parse, (format, alternative))
96: (4) # now check if both strings are positional or named
97: (4) a_positional, b_positional = map(_check_positional, (a, b))
98: (4) if a_positional and not b_positional and not b:
99: (8) raise TranslationError('placeholders are incompatible')
100: (4) elif a_positional != b_positional:
101: (8) raise TranslationError('the format strings are of different kinds')
102: (4) # if we are operating on positional strings both must have the
103: (4) # same number of format chars and those must be compatible
104: (4) if a_positional:
105: (8) if len(a) != len(b):
106: (12) raise TranslationError('positional format placeholders are '
107: (35) 'unbalanced')
108: (8) for idx, ((_, first), (_, second)) in enumerate(zip(a, b)):
109: (12) if not _compatible(first, second):
110: (16) raise TranslationError('incompatible format for placeholder '
111: (39) '%d: %r and %r are not compatible' %
112: (39) (idx + 1, first, second))
113: (4) # otherwise the second string must not have names the first one
114: (4) # doesn't have and the types of those included must be compatible
115: (4) else:
116: (8) type_map = dict(a)
117: (8) for name, typechar in b:
118: (12) if name not in type_map:
119: (16) raise TranslationError(f'unknown named placeholder {name!r}')
120: (12) elif not _compatible(typechar, type_map[name]):
121: (16) raise TranslationError(
122: (20) f'incompatible format for placeholder {name!r}: '
123: (20) f'{typechar!r} and {type_map[name]!r} are not compatible',
124: (16) )
125: (0) def _find_checkers() -> list[Callable[[Catalog | None, Message], object]]:
126: (4) from babel.messages._compat import find_entrypoints
127: (4) checkers: list[Callable[[Catalog | None, Message], object]] = []
128: (4) checkers.extend(load() for (name, load) in find_entrypoints('babel.checkers'))
129: (4) if len(checkers) == 0:
130: (8) # if entrypoints are not available or no usable egg-info was found
131: (8) # (see #230), just resort to hard-coded checkers
132: (8) return [num_plurals, python_format]
133: (4) return checkers
134: (0) checkers: list[Callable[[Catalog | None, Message], object]] = _find_checkers()
----------------------------------------
File 25 - .\messages \frontend.py:
1: (0) """
2: (4) babel.messages.frontend
3: (4) ~~~~~~~~~~~~~~~~~~~~~~~
4: (4) Frontends for the message extraction functionality.
5: (4) :copyright: (c) 2013-2024 by the Babel Team.
6: (4) :license: BSD, see LICENSE for more details.
7: (0) """
8: (0) from __future__ import annotations
9: (0) import datetime
10: (0) import fnmatch
11: (0) import logging
12: (0) import optparse
13: (0) import os
14: (0) import re
15: (0) import shutil
16: (0) import sys
17: (0) import tempfile
18: (0) import warnings
19: (0) from collections import OrderedDict
20: (0) from configparser import RawConfigParser
21: (0) from io import StringIO
22: (0) from typing import BinaryIO, Iterable, Literal
23: (0) from babel import Locale, localedata
24: (0) from babel import __version__ as VERSION
25: (0) from babel.core import UnknownLocaleError
26: (0) from babel.messages.catalog import DEFAULT_HEADER, Catalog
27: (0) from babel.messages.extract import (
28: (4) DEFAULT_KEYWORDS,
29: (4) DEFAULT_MAPPING,
30: (4) check_and_call_extract_file,
31: (4) extract_from_dir,
32: (0) )
33: (0) from babel.messages.mofile import write_mo
34: (0) from babel.messages.pofile import read_po, write_po
35: (0) from babel.util import LOCALTZ
36: (0) log = logging.getLogger('babel')
37: (0) class BaseError(Exception):
38: (4) pass
39: (0) class OptionError(BaseError):
40: (4) pass
41: (0) class SetupError(BaseError):
42: (4) pass
43: (0) class ConfigurationError(BaseError):
44: (4) """
45: (4) Raised for errors in configuration files.
46: (4) """
47: (0) def listify_value(arg, split=None):
48: (4) """
49: (4) Make a list out of an argument.
50: (4) Values from `distutils` argument parsing are always single strings;
51: (4) values from `optparse` parsing may be lists of strings that may need
52: (4) to be further split.
53: (4) No matter the input, this function returns a flat list of whitespace-trimmed
54: (4) strings, with `None` values filtered out.
55: (4) >>> listify_value("foo bar")
56: (4) ['foo', 'bar']
57: (4) >>> listify_value(["foo bar"])
58: (4) ['foo', 'bar']
59: (4) >>> listify_value([["foo"], "bar"])
60: (4) ['foo', 'bar']
61: (4) >>> listify_value([["foo"], ["bar", None, "foo"]])
62: (4) ['foo', 'bar', 'foo']
63: (4) >>> listify_value("foo, bar, quux", ",")
64: (4) ['foo', 'bar', 'quux']
65: (4) :param arg: A string or a list of strings
66: (4) :param split: The argument to pass to `str.split()`.
67: (4) :return:
68: (4) """
69: (4) out = []
70: (4) if not isinstance(arg, (list, tuple)):
71: (8) arg = [arg]
72: (4) for val in arg:
73: (8) if val is None:
74: (12) continue
75: (8) if isinstance(val, (list, tuple)):
76: (12) out.extend(listify_value(val, split=split))
77: (12) continue
78: (8) out.extend(s.strip() for s in str(val).split(split))
79: (4) assert all(isinstance(val, str) for val in out)
80: (4) return out
81: (0) class CommandMixin:
82: (4) # This class is a small shim between Distutils commands and
83: (4) # optparse option parsing in the frontend command line.
84: (4) #: Option name to be input as `args` on the script command line.
85: (4) as_args = None
86: (4) #: Options which allow multiple values.
87: (4) #: This is used by the `optparse` transmogrification code.
88: (4) multiple_value_options = ()
89: (4) #: Options which are booleans.
90: (4) #: This is used by the `optparse` transmogrification code.
91: (4) # (This is actually used by distutils code too, but is never
92: (4) # declared in the base class.)
93: (4) boolean_options = ()
94: (4) #: Option aliases, to retain standalone command compatibility.
95: (4) #: Distutils does not support option aliases, but optparse does.
96: (4) #: This maps the distutils argument name to an iterable of aliases
97: (4) #: that are usable with optparse.
98: (4) option_aliases = {}
99: (4) #: Choices for options that needed to be restricted to specific
100: (4) #: list of choices.
101: (4) option_choices = {}
102: (4) #: Log object. To allow replacement in the script command line runner.
103: (4) log = log
104: (4) def __init__(self, dist=None):
105: (8) # A less strict version of distutils' `__init__`.
106: (8) self.distribution = dist
107: (8) self.initialize_options()
108: (8) self._dry_run = None
109: (8) self.verbose = False
110: (8) self.force = None
111: (8) self.help = 0
112: (8) self.finalized = 0
113: (4) def initialize_options(self):
114: (8) pass
115: (4) def ensure_finalized(self):
116: (8) if not self.finalized:
117: (12) self.finalize_options()
118: (8) self.finalized = 1
119: (4) def finalize_options(self):
120: (8) raise RuntimeError(
121: (12) f"abstract method -- subclass {self.__class__} must override",
122: (8) )
123: (0) class CompileCatalog(CommandMixin):
124: (4) description = 'compile message catalogs to binary MO files'
125: (4) user_options = [
126: (8) ('domain=', 'D',
127: (9) "domains of PO files (space separated list, default 'messages')"),
128: (8) ('directory=', 'd',
129: (9) 'path to base directory containing the catalogs'),
130: (8) ('input-file=', 'i',
131: (9) 'name of the input file'),
132: (8) ('output-file=', 'o',
133: (9) "name of the output file (default "
134: (9) "'<output_dir>/<locale>/LC_MESSAGES/<domain>.mo')"),
135: (8) ('locale=', 'l',
136: (9) 'locale of the catalog to compile'),
137: (8) ('use-fuzzy', 'f',
138: (9) 'also include fuzzy translations'),
139: (8) ('statistics', None,
140: (9) 'print statistics about translations'),
141: (4) ]
142: (4) boolean_options = ['use-fuzzy', 'statistics']
143: (4) def initialize_options(self):
144: (8) self.domain = 'messages'
145: (8) self.directory = None
146: (8) self.input_file = None
147: (8) self.output_file = None
148: (8) self.locale = None
149: (8) self.use_fuzzy = False
150: (8) self.statistics = False
151: (4) def finalize_options(self):
152: (8) self.domain = listify_value(self.domain)
153: (8) if not self.input_file and not self.directory:
154: (12) raise OptionError('you must specify either the input file or the base directory')
155: (8) if not self.output_file and not self.directory:
156: (12) raise OptionError('you must specify either the output file or the base directory')
157: (4) def run(self):
158: (8) n_errors = 0
159: (8) for domain in self.domain:
160: (12) for errors in self._run_domain(domain).values():
161: (16) n_errors += len(errors)
162: (8) if n_errors:
163: (12) self.log.error('%d errors encountered.', n_errors)
164: (8) return (1 if n_errors else 0)
165: (4) def _run_domain(self, domain):
166: (8) po_files = []
167: (8) mo_files = []
168: (8) if not self.input_file:
169: (12) if self.locale:
170: (16) po_files.append((self.locale,
171: (33) os.path.join(self.directory, self.locale,
172: (46) 'LC_MESSAGES',
173: (46) f"{domain}.po")))
174: (16) mo_files.append(os.path.join(self.directory, self.locale,
175: (45) 'LC_MESSAGES',
176: (45) f"{domain}.mo"))
177: (12) else:
178: (16) for locale in os.listdir(self.directory):
179: (20) po_file = os.path.join(self.directory, locale,
180: (43) 'LC_MESSAGES', f"{domain}.po")
181: (20) if os.path.exists(po_file):
182: (24) po_files.append((locale, po_file))
183: (24) mo_files.append(os.path.join(self.directory, locale,
184: (53) 'LC_MESSAGES',
185: (53) f"{domain}.mo"))
186: (8) else:
187: (12) po_files.append((self.locale, self.input_file))
188: (12) if self.output_file:
189: (16) mo_files.append(self.output_file)
190: (12) else:
191: (16) mo_files.append(os.path.join(self.directory, self.locale,
192: (45) 'LC_MESSAGES',
193: (45) f"{domain}.mo"))
194: (8) if not po_files:
195: (12) raise OptionError('no message catalogs found')
196: (8) catalogs_and_errors = {}
197: (8) for idx, (locale, po_file) in enumerate(po_files):
198: (12) mo_file = mo_files[idx]
199: (12) with open(po_file, 'rb') as infile:
200: (16) catalog = read_po(infile, locale)
201: (12) if self.statistics:
202: (16) translated = 0
203: (16) for message in list(catalog)[1:]:
204: (20) if message.string:
205: (24) translated += 1
206: (16) percentage = 0
207: (16) if len(catalog):
208: (20) percentage = translated * 100 // len(catalog)
209: (16) self.log.info(
210: (20) '%d of %d messages (%d%%) translated in %s',
211: (20) translated, len(catalog), percentage, po_file,
212: (16) )
213: (12) if catalog.fuzzy and not self.use_fuzzy:
214: (16) self.log.info('catalog %s is marked as fuzzy, skipping', po_file)
215: (16) continue
216: (12) catalogs_and_errors[catalog] = catalog_errors = list(catalog.check())
217: (12) for message, errors in catalog_errors:
218: (16) for error in errors:
219: (20) self.log.error(
220: (24) 'error: %s:%d: %s', po_file, message.lineno, error,
221: (20) )
222: (12) self.log.info('compiling catalog %s to %s', po_file, mo_file)
223: (12) with open(mo_file, 'wb') as outfile:
224: (16) write_mo(outfile, catalog, use_fuzzy=self.use_fuzzy)
225: (8) return catalogs_and_errors
226: (0) def _make_directory_filter(ignore_patterns):
227: (4) """
228: (4) Build a directory_filter function based on a list of ignore patterns.
229: (4) """
230: (4) def cli_directory_filter(dirname):
231: (8) basename = os.path.basename(dirname)
232: (8) return not any(
233: (12) fnmatch.fnmatch(basename, ignore_pattern)
234: (12) for ignore_pattern
235: (12) in ignore_patterns
236: (8) )
237: (4) return cli_directory_filter
238: (0) class ExtractMessages(CommandMixin):
239: (4) description = 'extract localizable strings from the project code'
240: (4) user_options = [
241: (8) ('charset=', None,
242: (9) 'charset to use in the output file (default "utf-8")'),
243: (8) ('keywords=', 'k',
244: (9) 'space-separated list of keywords to look for in addition to the '
245: (9) 'defaults (may be repeated multiple times)'),
246: (8) ('no-default-keywords', None,
247: (9) 'do not include the default keywords'),
248: (8) ('mapping-file=', 'F',
249: (9) 'path to the mapping configuration file'),
250: (8) ('no-location', None,
251: (9) 'do not include location comments with filename and line number'),
252: (8) ('add-location=', None,
253: (9) 'location lines format. If it is not given or "full", it generates '
254: (9) 'the lines with both file name and line number. If it is "file", '
255: (9) 'the line number part is omitted. If it is "never", it completely '
256: (9) 'suppresses the lines (same as --no-location).'),
257: (8) ('omit-header', None,
258: (9) 'do not include msgid "" entry in header'),
259: (8) ('output-file=', 'o',
260: (9) 'name of the output file'),
261: (8) ('width=', 'w',
262: (9) 'set output line width (default 76)'),
263: (8) ('no-wrap', None,
264: (9) 'do not break long message lines, longer than the output line width, '
265: (9) 'into several lines'),
266: (8) ('sort-output', None,
267: (9) 'generate sorted output (default False)'),
268: (8) ('sort-by-file', None,
269: (9) 'sort output by file location (default False)'),
270: (8) ('msgid-bugs-address=', None,
271: (9) 'set report address for msgid'),
272: (8) ('copyright-holder=', None,
273: (9) 'set copyright holder in output'),
274: (8) ('project=', None,
275: (9) 'set project name in output'),
276: (8) ('version=', None,
277: (9) 'set project version in output'),
278: (8) ('add-comments=', 'c',
279: (9) 'place comment block with TAG (or those preceding keyword lines) in '
280: (9) 'output file. Separate multiple TAGs with commas(,)'), # TODO: Support repetition of this argument
281: (8) ('strip-comments', 's',
282: (9) 'strip the comment TAGs from the comments.'),
283: (8) ('input-paths=', None,
284: (9) 'files or directories that should be scanned for messages. Separate multiple '
285: (9) 'files or directories with commas(,)'), # TODO: Support repetition of this argument
286: (8) ('input-dirs=', None, # TODO (3.x): Remove me.
287: (9) 'alias for input-paths (does allow files as well as directories).'),
288: (8) ('ignore-dirs=', None,
289: (9) 'Patterns for directories to ignore when scanning for messages. '
290: (9) 'Separate multiple patterns with spaces (default ".* ._")'),
291: (8) ('header-comment=', None,
292: (9) 'header comment for the catalog'),
293: (8) ('last-translator=', None,
294: (9) 'set the name and email of the last translator in output'),
295: (4) ]
296: (4) boolean_options = [
297: (8) 'no-default-keywords', 'no-location', 'omit-header', 'no-wrap',
298: (8) 'sort-output', 'sort-by-file', 'strip-comments',
299: (4) ]
300: (4) as_args = 'input-paths'
301: (4) multiple_value_options = (
302: (8) 'add-comments',
303: (8) 'keywords',
304: (8) 'ignore-dirs',
305: (4) )
306: (4) option_aliases = {
307: (8) 'keywords': ('--keyword',),
308: (8) 'mapping-file': ('--mapping',),
309: (8) 'output-file': ('--output',),
310: (8) 'strip-comments': ('--strip-comment-tags',),
311: (8) 'last-translator': ('--last-translator',),
312: (4) }
313: (4) option_choices = {
314: (8) 'add-location': ('full', 'file', 'never'),
315: (4) }
316: (4) def initialize_options(self):
317: (8) self.charset = 'utf-8'
318: (8) self.keywords = None
319: (8) self.no_default_keywords = False
320: (8) self.mapping_file = None
321: (8) self.no_location = False
322: (8) self.add_location = None
323: (8) self.omit_header = False
324: (8) self.output_file = None
325: (8) self.input_dirs = None
326: (8) self.input_paths = None
327: (8) self.width = None
328: (8) self.no_wrap = False
329: (8) self.sort_output = False
330: (8) self.sort_by_file = False
331: (8) self.msgid_bugs_address = None
332: (8) self.copyright_holder = None
333: (8) self.project = None
334: (8) self.version = None
335: (8) self.add_comments = None
336: (8) self.strip_comments = False
337: (8) self.include_lineno = True
338: (8) self.ignore_dirs = None
339: (8) self.header_comment = None
340: (8) self.last_translator = None
341: (4) def finalize_options(self):
342: (8) if self.input_dirs:
343: (12) if not self.input_paths:
344: (16) self.input_paths = self.input_dirs
345: (12) else:
346: (16) raise OptionError(
347: (20) 'input-dirs and input-paths are mutually exclusive',
348: (16) )
349: (8) keywords = {} if self.no_default_keywords else DEFAULT_KEYWORDS.copy()
350: (8) keywords.update(parse_keywords(listify_value(self.keywords)))
351: (8) self.keywords = keywords
352: (8) if not self.keywords:
353: (12) raise OptionError(
354: (16) 'you must specify new keywords if you disable the default ones',
355: (12) )
356: (8) if not self.output_file:
357: (12) raise OptionError('no output file specified')
358: (8) if self.no_wrap and self.width:
359: (12) raise OptionError(
360: (16) "'--no-wrap' and '--width' are mutually exclusive",
361: (12) )
362: (8) if not self.no_wrap and not self.width:
363: (12) self.width = 76
364: (8) elif self.width is not None:
365: (12) self.width = int(self.width)
366: (8) if self.sort_output and self.sort_by_file:
367: (12) raise OptionError(
368: (16) "'--sort-output' and '--sort-by-file' are mutually exclusive",
369: (12) )
370: (8) if self.input_paths:
371: (12) if isinstance(self.input_paths, str):
372: (16) self.input_paths = re.split(r',\s*', self.input_paths)
373: (8) elif self.distribution is not None:
374: (12) self.input_paths = dict.fromkeys([
375: (16) k.split('.', 1)[0]
376: (16) for k in (self.distribution.packages or ())
377: (12) ]).keys()
378: (8) else:
379: (12) self.input_paths = []
380: (8) if not self.input_paths:
381: (12) raise OptionError("no input files or directories specified")
382: (8) for path in self.input_paths:
383: (12) if not os.path.exists(path):
384: (16) raise OptionError(f"Input path: {path} does not exist")
385: (8) self.add_comments = listify_value(self.add_comments or (), ",")
386: (8) if self.distribution:
387: (12) if not self.project:
388: (16) self.project = self.distribution.get_name()
389: (12) if not self.version:
390: (16) self.version = self.distribution.get_version()
391: (8) if self.add_location == 'never':
392: (12) self.no_location = True
393: (8) elif self.add_location == 'file':
394: (12) self.include_lineno = False
395: (8) ignore_dirs = listify_value(self.ignore_dirs)
396: (8) if ignore_dirs:
397: (12) self.directory_filter = _make_directory_filter(ignore_dirs)
398: (8) else:
399: (12) self.directory_filter = None
400: (4) def _build_callback(self, path: str):
401: (8) def callback(filename: str, method: str, options: dict):
402: (12) if method == 'ignore':
403: (16) return
404: (12) # If we explicitly provide a full filepath, just use that.
405: (12) # Otherwise, path will be the directory path and filename
406: (12) # is the relative path from that dir to the file.
407: (12) # So we can join those to get the full filepath.
408: (12) if os.path.isfile(path):
409: (16) filepath = path
410: (12) else:
411: (16) filepath = os.path.normpath(os.path.join(path, filename))
412: (12) optstr = ''
413: (12) if options:
414: (16) opt_values = ", ".join(f'{k}="{v}"' for k, v in options.items())
415: (16) optstr = f" ({opt_values})"
416: (12) self.log.info('extracting messages from %s%s', filepath, optstr)
417: (8) return callback
418: (4) def run(self):
419: (8) mappings = self._get_mappings()
420: (8) with open(self.output_file, 'wb') as outfile:
421: (12) catalog = Catalog(project=self.project,
422: (30) version=self.version,
423: (30) msgid_bugs_address=self.msgid_bugs_address,
424: (30) copyright_holder=self.copyright_holder,
425: (30) charset=self.charset,
426: (30) header_comment=(self.header_comment or DEFAULT_HEADER),
427: (30) last_translator=self.last_translator)
428: (12) for path, method_map, options_map in mappings:
429: (16) callback = self._build_callback(path)
430: (16) if os.path.isfile(path):
431: (20) current_dir = os.getcwd()
432: (20) extracted = check_and_call_extract_file(
433: (24) path, method_map, options_map,
434: (24) callback, self.keywords, self.add_comments,
435: (24) self.strip_comments, current_dir,
436: (20) )
437: (16) else:
438: (20) extracted = extract_from_dir(
439: (24) path, method_map, options_map,
440: (24) keywords=self.keywords,
441: (24) comment_tags=self.add_comments,
442: (24) callback=callback,
443: (24) strip_comment_tags=self.strip_comments,
444: (24) directory_filter=self.directory_filter,
445: (20) )
446: (16) for filename, lineno, message, comments, context in extracted:
447: (20) if os.path.isfile(path):
448: (24) filepath = filename # already normalized
449: (20) else:
450: (24) filepath = os.path.normpath(os.path.join(path, filename))
451: (20) catalog.add(message, None, [(filepath, lineno)],
452: (32) auto_comments=comments, context=context)
453: (12) self.log.info('writing PO template file to %s', self.output_file)
454: (12) write_po(outfile, catalog, width=self.width,
455: (21) no_location=self.no_location,
456: (21) omit_header=self.omit_header,
457: (21) sort_output=self.sort_output,
458: (21) sort_by_file=self.sort_by_file,
459: (21) include_lineno=self.include_lineno)
460: (4) def _get_mappings(self):
461: (8) mappings = []
462: (8) if self.mapping_file:
463: (12) if self.mapping_file.endswith(".toml"):
464: (16) with open(self.mapping_file, "rb") as fileobj:
465: (20) file_style = (
466: (24) "pyproject.toml"
467: (24) if os.path.basename(self.mapping_file) == "pyproject.toml"
468: (24) else "standalone"
469: (20) )
470: (20) method_map, options_map = _parse_mapping_toml(
471: (24) fileobj,
472: (24) filename=self.mapping_file,
473: (24) style=file_style,
474: (20) )
475: (12) else:
476: (16) with open(self.mapping_file) as fileobj:
477: (20) method_map, options_map = parse_mapping_cfg(fileobj, filename=self.mapping_file)
478: (12) for path in self.input_paths:
479: (16) mappings.append((path, method_map, options_map))
480: (8) elif getattr(self.distribution, 'message_extractors', None):
481: (12) message_extractors = self.distribution.message_extractors
482: (12) for path, mapping in message_extractors.items():
483: (16) if isinstance(mapping, str):
484: (20) method_map, options_map = parse_mapping_cfg(StringIO(mapping))
485: (16) else:
486: (20) method_map, options_map = [], {}
487: (20) for pattern, method, options in mapping:
488: (24) method_map.append((pattern, method))
489: (24) options_map[pattern] = options or {}
490: (16) mappings.append((path, method_map, options_map))
491: (8) else:
492: (12) for path in self.input_paths:
493: (16) mappings.append((path, DEFAULT_MAPPING, {}))
494: (8) return mappings
495: (0) class InitCatalog(CommandMixin):
496: (4) description = 'create a new catalog based on a POT file'
497: (4) user_options = [
498: (8) ('domain=', 'D',
499: (9) "domain of PO file (default 'messages')"),
500: (8) ('input-file=', 'i',
501: (9) 'name of the input file'),
502: (8) ('output-dir=', 'd',
503: (9) 'path to output directory'),
504: (8) ('output-file=', 'o',
505: (9) "name of the output file (default "
506: (9) "'<output_dir>/<locale>/LC_MESSAGES/<domain>.po')"),
507: (8) ('locale=', 'l',
508: (9) 'locale for the new localized catalog'),
509: (8) ('width=', 'w',
510: (9) 'set output line width (default 76)'),
511: (8) ('no-wrap', None,
512: (9) 'do not break long message lines, longer than the output line width, '
513: (9) 'into several lines'),
514: (4) ]
515: (4) boolean_options = ['no-wrap']
516: (4) def initialize_options(self):
517: (8) self.output_dir = None
518: (8) self.output_file = None
519: (8) self.input_file = None
520: (8) self.locale = None
521: (8) self.domain = 'messages'
522: (8) self.no_wrap = False
523: (8) self.width = None
524: (4) def finalize_options(self):
525: (8) if not self.input_file:
526: (12) raise OptionError('you must specify the input file')
527: (8) if not self.locale:
528: (12) raise OptionError('you must provide a locale for the new catalog')
529: (8) try:
530: (12) self._locale = Locale.parse(self.locale)
531: (8) except UnknownLocaleError as e:
532: (12) raise OptionError(e) from e
533: (8) if not self.output_file and not self.output_dir:
534: (12) raise OptionError('you must specify the output directory')
535: (8) if not self.output_file:
536: (12) self.output_file = os.path.join(self.output_dir, self.locale,
537: (44) 'LC_MESSAGES', f"{self.domain}.po")
538: (8) if not os.path.exists(os.path.dirname(self.output_file)):
539: (12) os.makedirs(os.path.dirname(self.output_file))
540: (8) if self.no_wrap and self.width:
541: (12) raise OptionError("'--no-wrap' and '--width' are mutually exclusive")
542: (8) if not self.no_wrap and not self.width:
543: (12) self.width = 76
544: (8) elif self.width is not None:
545: (12) self.width = int(self.width)
546: (4) def run(self):
547: (8) self.log.info(
548: (12) 'creating catalog %s based on %s', self.output_file, self.input_file,
549: (8) )
550: (8) with open(self.input_file, 'rb') as infile:
551: (12) # Although reading from the catalog template, read_po must be fed
552: (12) # the locale in order to correctly calculate plurals
553: (12) catalog = read_po(infile, locale=self.locale)
554: (8) catalog.locale = self._locale
555: (8) catalog.revision_date = datetime.datetime.now(LOCALTZ)
556: (8) catalog.fuzzy = False
557: (8) with open(self.output_file, 'wb') as outfile:
558: (12) write_po(outfile, catalog, width=self.width)
559: (0) class UpdateCatalog(CommandMixin):
560: (4) description = 'update message catalogs from a POT file'
561: (4) user_options = [
562: (8) ('domain=', 'D',
563: (9) "domain of PO file (default 'messages')"),
564: (8) ('input-file=', 'i',
565: (9) 'name of the input file'),
566: (8) ('output-dir=', 'd',
567: (9) 'path to base directory containing the catalogs'),
568: (8) ('output-file=', 'o',
569: (9) "name of the output file (default "
570: (9) "'<output_dir>/<locale>/LC_MESSAGES/<domain>.po')"),
571: (8) ('omit-header', None,
572: (9) "do not include msgid "" entry in header"),
573: (8) ('locale=', 'l',
574: (9) 'locale of the catalog to compile'),
575: (8) ('width=', 'w',
576: (9) 'set output line width (default 76)'),
577: (8) ('no-wrap', None,
578: (9) 'do not break long message lines, longer than the output line width, '
579: (9) 'into several lines'),
580: (8) ('ignore-obsolete=', None,
581: (9) 'whether to omit obsolete messages from the output'),
582: (8) ('init-missing=', None,
583: (9) 'if any output files are missing, initialize them first'),
584: (8) ('no-fuzzy-matching', 'N',
585: (9) 'do not use fuzzy matching'),
586: (8) ('update-header-comment', None,
587: (9) 'update target header comment'),
588: (8) ('previous', None,
589: (9) 'keep previous msgids of translated messages'),
590: (8) ('check=', None,
591: (9) 'don\'t update the catalog, just return the status. Return code 0 '
592: (9) 'means nothing would change. Return code 1 means that the catalog '
593: (9) 'would be updated'),
594: (8) ('ignore-pot-creation-date=', None,
595: (9) 'ignore changes to POT-Creation-Date when updating or checking'),
596: (4) ]
597: (4) boolean_options = [
598: (8) 'omit-header', 'no-wrap', 'ignore-obsolete', 'init-missing',
599: (8) 'no-fuzzy-matching', 'previous', 'update-header-comment',
600: (8) 'check', 'ignore-pot-creation-date',
601: (4) ]
602: (4) def initialize_options(self):
603: (8) self.domain = 'messages'
604: (8) self.input_file = None
605: (8) self.output_dir = None
606: (8) self.output_file = None
607: (8) self.omit_header = False
608: (8) self.locale = None
609: (8) self.width = None
610: (8) self.no_wrap = False
611: (8) self.ignore_obsolete = False
612: (8) self.init_missing = False
613: (8) self.no_fuzzy_matching = False
614: (8) self.update_header_comment = False
615: (8) self.previous = False
616: (8) self.check = False
617: (8) self.ignore_pot_creation_date = False
618: (4) def finalize_options(self):
619: (8) if not self.input_file:
620: (12) raise OptionError('you must specify the input file')
621: (8) if not self.output_file and not self.output_dir:
622: (12) raise OptionError('you must specify the output file or directory')
623: (8) if self.output_file and not self.locale:
624: (12) raise OptionError('you must specify the locale')
625: (8) if self.init_missing:
626: (12) if not self.locale:
627: (16) raise OptionError(
628: (20) 'you must specify the locale for '
629: (20) 'the init-missing option to work',
630: (16) )
631: (12) try:
632: (16) self._locale = Locale.parse(self.locale)
633: (12) except UnknownLocaleError as e:
634: (16) raise OptionError(e) from e
635: (8) else:
636: (12) self._locale = None
637: (8) if self.no_wrap and self.width:
638: (12) raise OptionError("'--no-wrap' and '--width' are mutually exclusive")
639: (8) if not self.no_wrap and not self.width:
640: (12) self.width = 76
641: (8) elif self.width is not None:
642: (12) self.width = int(self.width)
643: (8) if self.no_fuzzy_matching and self.previous:
644: (12) self.previous = False
645: (4) def run(self):
646: (8) check_status = {}
647: (8) po_files = []
648: (8) if not self.output_file:
649: (12) if self.locale:
650: (16) po_files.append((self.locale,
651: (33) os.path.join(self.output_dir, self.locale,
652: (46) 'LC_MESSAGES',
653: (46) f"{self.domain}.po")))
654: (12) else:
655: (16) for locale in os.listdir(self.output_dir):
656: (20) po_file = os.path.join(self.output_dir, locale,
657: (43) 'LC_MESSAGES',
658: (43) f"{self.domain}.po")
659: (20) if os.path.exists(po_file):
660: (24) po_files.append((locale, po_file))
661: (8) else:
662: (12) po_files.append((self.locale, self.output_file))
663: (8) if not po_files:
664: (12) raise OptionError('no message catalogs found')
665: (8) domain = self.domain
666: (8) if not domain:
667: (12) domain = os.path.splitext(os.path.basename(self.input_file))[0]
668: (8) with open(self.input_file, 'rb') as infile:
669: (12) template = read_po(infile)
670: (8) for locale, filename in po_files:
671: (12) if self.init_missing and not os.path.exists(filename):
672: (16) if self.check:
673: (20) check_status[filename] = False
674: (20) continue
675: (16) self.log.info(
676: (20) 'creating catalog %s based on %s', filename, self.input_file,
677: (16) )
678: (16) with open(self.input_file, 'rb') as infile:
679: (20) # Although reading from the catalog template, read_po must
680: (20) # be fed the locale in order to correctly calculate plurals
681: (20) catalog = read_po(infile, locale=self.locale)
682: (16) catalog.locale = self._locale
683: (16) catalog.revision_date = datetime.datetime.now(LOCALTZ)
684: (16) catalog.fuzzy = False
685: (16) with open(filename, 'wb') as outfile:
686: (20) write_po(outfile, catalog)
687: (12) self.log.info('updating catalog %s based on %s', filename, self.input_file)
688: (12) with open(filename, 'rb') as infile:
689: (16) catalog = read_po(infile, locale=locale, domain=domain)
690: (12) catalog.update(
691: (16) template, self.no_fuzzy_matching,
692: (16) update_header_comment=self.update_header_comment,
693: (16) update_creation_date=not self.ignore_pot_creation_date,
694: (12) )
695: (12) tmpname = os.path.join(os.path.dirname(filename),
696: (35) tempfile.gettempprefix() +
697: (35) os.path.basename(filename))
698: (12) try:
699: (16) with open(tmpname, 'wb') as tmpfile:
700: (20) write_po(tmpfile, catalog,
701: (29) omit_header=self.omit_header,
702: (29) ignore_obsolete=self.ignore_obsolete,
703: (29) include_previous=self.previous, width=self.width)
704: (12) except Exception:
705: (16) os.remove(tmpname)
706: (16) raise
707: (12) if self.check:
708: (16) with open(filename, "rb") as origfile:
709: (20) original_catalog = read_po(origfile)
710: (16) with open(tmpname, "rb") as newfile:
711: (20) updated_catalog = read_po(newfile)
712: (16) updated_catalog.revision_date = original_catalog.revision_date
713: (16) check_status[filename] = updated_catalog.is_identical(original_catalog)
714: (16) os.remove(tmpname)
715: (16) continue
716: (12) try:
717: (16) os.rename(tmpname, filename)
718: (12) except OSError:
719: (16) # We're probably on Windows, which doesn't support atomic
720: (16) # renames, at least not through Python
721: (16) # If the error is in fact due to a permissions problem, that
722: (16) # same error is going to be raised from one of the following
723: (16) # operations
724: (16) os.remove(filename)
725: (16) shutil.copy(tmpname, filename)
726: (16) os.remove(tmpname)
727: (8) if self.check:
728: (12) for filename, up_to_date in check_status.items():
729: (16) if up_to_date:
730: (20) self.log.info('Catalog %s is up to date.', filename)
731: (16) else:
732: (20) self.log.warning('Catalog %s is out of date.', filename)
733: (12) if not all(check_status.values()):
734: (16) raise BaseError("Some catalogs are out of date.")
735: (12) else:
736: (16) self.log.info("All the catalogs are up-to-date.")
737: (12) return
738: (0) class CommandLineInterface:
739: (4) """Command-line interface.
740: (4) This class provides a simple command-line interface to the message
741: (4) extraction and PO file generation functionality.
742: (4) """
743: (4) usage = '%%prog %s [options] %s'
744: (4) version = f'%prog {VERSION}'
745: (4) commands = {
746: (8) 'compile': 'compile message catalogs to MO files',
747: (8) 'extract': 'extract messages from source files and generate a POT file',
748: (8) 'init': 'create new message catalogs from a POT file',
749: (8) 'update': 'update existing message catalogs from a POT file',
750: (4) }
751: (4) command_classes = {
752: (8) 'compile': CompileCatalog,
753: (8) 'extract': ExtractMessages,
754: (8) 'init': InitCatalog,
755: (8) 'update': UpdateCatalog,
756: (4) }
757: (4) log = None # Replaced on instance level
758: (4) def run(self, argv=None):
759: (8) """Main entry point of the command-line interface.
760: (8) :param argv: list of arguments passed on the command-line
761: (8) """
762: (8) if argv is None:
763: (12) argv = sys.argv
764: (8) self.parser = optparse.OptionParser(usage=self.usage % ('command', '[args]'),
765: (44) version=self.version)
766: (8) self.parser.disable_interspersed_args()
767: (8) self.parser.print_help = self._help
768: (8) self.parser.add_option('--list-locales', dest='list_locales',
769: (31) action='store_true',
770: (31) help="print all known locales and exit")
771: (8) self.parser.add_option('-v', '--verbose', action='store_const',
772: (31) dest='loglevel', const=logging.DEBUG,
773: (31) help='print as much as possible')
774: (8) self.parser.add_option('-q', '--quiet', action='store_const',
775: (31) dest='loglevel', const=logging.ERROR,
776: (31) help='print as little as possible')
777: (8) self.parser.set_defaults(list_locales=False, loglevel=logging.INFO)
778: (8) options, args = self.parser.parse_args(argv[1:])
779: (8) self._configure_logging(options.loglevel)
780: (8) if options.list_locales:
781: (12) identifiers = localedata.locale_identifiers()
782: (12) id_width = max(len(identifier) for identifier in identifiers) + 1
783: (12) for identifier in sorted(identifiers):
784: (16) locale = Locale.parse(identifier)
785: (16) print(f"{identifier:<{id_width}} {locale.english_name}")
786: (12) return 0
787: (8) if not args:
788: (12) self.parser.error('no valid command or option passed. '
789: (30) 'Try the -h/--help option for more information.')
790: (8) cmdname = args[0]
791: (8) if cmdname not in self.commands:
792: (12) self.parser.error(f'unknown command "{cmdname}"')
793: (8) cmdinst = self._configure_command(cmdname, args[1:])
794: (8) return cmdinst.run()
795: (4) def _configure_logging(self, loglevel):
796: (8) self.log = log
797: (8) self.log.setLevel(loglevel)
798: (8) # Don't add a new handler for every instance initialization (#227), this
799: (8) # would cause duplicated output when the CommandLineInterface as an
800: (8) # normal Python class.
801: (8) if self.log.handlers:
802: (12) handler = self.log.handlers[0]
803: (8) else:
804: (12) handler = logging.StreamHandler()
805: (12) self.log.addHandler(handler)
806: (8) handler.setLevel(loglevel)
807: (8) formatter = logging.Formatter('%(message)s')
808: (8) handler.setFormatter(formatter)
809: (4) def _help(self):
810: (8) print(self.parser.format_help())
811: (8) print("commands:")
812: (8) cmd_width = max(8, max(len(command) for command in self.commands) + 1)
813: (8) for name, description in sorted(self.commands.items()):
814: (12) print(f" {name:<{cmd_width}} {description}")
815: (4) def _configure_command(self, cmdname, argv):
816: (8) """
817: (8) :type cmdname: str
818: (8) :type argv: list[str]
819: (8) """
820: (8) cmdclass = self.command_classes[cmdname]
821: (8) cmdinst = cmdclass()
822: (8) if self.log:
823: (12) cmdinst.log = self.log # Use our logger, not distutils'.
824: (8) assert isinstance(cmdinst, CommandMixin)
825: (8) cmdinst.initialize_options()
826: (8) parser = optparse.OptionParser(
827: (12) usage=self.usage % (cmdname, ''),
828: (12) description=self.commands[cmdname],
829: (8) )
830: (8) as_args = getattr(cmdclass, "as_args", ())
831: (8) for long, short, help in cmdclass.user_options:
832: (12) name = long.strip("=")
833: (12) default = getattr(cmdinst, name.replace("-", "_"))
834: (12) strs = [f"--{name}"]
835: (12) if short:
836: (16) strs.append(f"-{short}")
837: (12) strs.extend(cmdclass.option_aliases.get(name, ()))
838: (12) choices = cmdclass.option_choices.get(name, None)
839: (12) if name == as_args:
840: (16) parser.usage += f"<{name}>"
841: (12) elif name in cmdclass.boolean_options:
842: (16) parser.add_option(*strs, action="store_true", help=help)
843: (12) elif name in cmdclass.multiple_value_options:
844: (16) parser.add_option(*strs, action="append", help=help, choices=choices)
845: (12) else:
846: (16) parser.add_option(*strs, help=help, default=default, choices=choices)
847: (8) options, args = parser.parse_args(argv)
848: (8) if as_args:
849: (12) setattr(options, as_args.replace('-', '_'), args)
850: (8) for key, value in vars(options).items():
851: (12) setattr(cmdinst, key, value)
852: (8) try:
853: (12) cmdinst.ensure_finalized()
854: (8) except OptionError as err:
855: (12) parser.error(str(err))
856: (8) return cmdinst
857: (0) def main():
858: (4) return CommandLineInterface().run(sys.argv)
859: (0) def parse_mapping(fileobj, filename=None):
860: (4) warnings.warn(
861: (8) "parse_mapping is deprecated, use parse_mapping_cfg instead",
862: (8) DeprecationWarning,
863: (8) stacklevel=2,
864: (4) )
865: (4) return parse_mapping_cfg(fileobj, filename)
866: (0) def parse_mapping_cfg(fileobj, filename=None):
867: (4) """Parse an extraction method mapping from a file-like object.
868: (4) :param fileobj: a readable file-like object containing the configuration
869: (20) text to parse
870: (4) """
871: (4) extractors = {}
872: (4) method_map = []
873: (4) options_map = {}
874: (4) parser = RawConfigParser()
875: (4) parser._sections = OrderedDict(parser._sections) # We need ordered sections
876: (4) parser.read_file(fileobj, filename)
877: (4) for section in parser.sections():
878: (8) if section == 'extractors':
879: (12) extractors = dict(parser.items(section))
880: (8) else:
881: (12) method, pattern = (part.strip() for part in section.split(':', 1))
882: (12) method_map.append((pattern, method))
883: (12) options_map[pattern] = dict(parser.items(section))
884: (4) if extractors:
885: (8) for idx, (pattern, method) in enumerate(method_map):
886: (12) if method in extractors:
887: (16) method = extractors[method]
888: (12) method_map[idx] = (pattern, method)
889: (4) return method_map, options_map
890: (0) def _parse_config_object(config: dict, *, filename="(unknown)"):
891: (4) extractors = {}
892: (4) method_map = []
893: (4) options_map = {}
894: (4) extractors_read = config.get("extractors", {})
895: (4) if not isinstance(extractors_read, dict):
896: (8) raise ConfigurationError(f"{filename}: extractors: Expected a dictionary, got {type(extractors_read)!r}")
897: (4) for method, callable_spec in extractors_read.items():
898: (8) if not isinstance(method, str):
899: (12) # Impossible via TOML, but could happen with a custom object.
900: (12) raise ConfigurationError(f"{filename}: extractors: Extraction method must be a string, got {method!r}")
901: (8) if not isinstance(callable_spec, str):
902: (12) raise ConfigurationError(f"{filename}: extractors: Callable specification must be a string, got {callable_spec!r}")
903: (8) extractors[method] = callable_spec
904: (4) if "mapping" in config:
905: (8) raise ConfigurationError(f"{filename}: 'mapping' is not a valid key, did you mean 'mappings'?")
906: (4) mappings_read = config.get("mappings", [])
907: (4) if not isinstance(mappings_read, list):
908: (8) raise ConfigurationError(f"{filename}: mappings: Expected a list, got {type(mappings_read)!r}")
909: (4) for idx, entry in enumerate(mappings_read):
910: (8) if not isinstance(entry, dict):
911: (12) raise ConfigurationError(f"{filename}: mappings[{idx}]: Expected a dictionary, got {type(entry)!r}")
912: (8) entry = entry.copy()
913: (8) method = entry.pop("method", None)
914: (8) if not isinstance(method, str):
915: (12) raise ConfigurationError(f"{filename}: mappings[{idx}]: 'method' must be a string, got {method!r}")
916: (8) method = extractors.get(method, method) # Map the extractor name to the callable now
917: (8) pattern = entry.pop("pattern", None)
918: (8) if not isinstance(pattern, (list, str)):
919: (12) raise ConfigurationError(f"{filename}: mappings[{idx}]: 'pattern' must be a list or a string, got {pattern!r}")
920: (8) if not isinstance(pattern, list):
921: (12) pattern = [pattern]
922: (8) for pat in pattern:
923: (12) if not isinstance(pat, str):
924: (16) raise ConfigurationError(f"{filename}: mappings[{idx}]: 'pattern' elements must be strings, got {pat!r}")
925: (12) method_map.append((pat, method))
926: (12) options_map[pat] = entry
927: (4) return method_map, options_map
928: (0) def _parse_mapping_toml(
929: (4) fileobj: BinaryIO,
930: (4) filename: str = "(unknown)",
931: (4) style: Literal["standalone", "pyproject.toml"] = "standalone",
932: (0) ):
933: (4) """Parse an extraction method mapping from a binary file-like object.
934: (4) .. warning: As of this version of Babel, this is a private API subject to changes.
935: (4) :param fileobj: a readable binary file-like object containing the configuration TOML to parse
936: (4) :param filename: the name of the file being parsed, for error messages
937: (4) :param style: whether the file is in the style of a `pyproject.toml` file, i.e. whether to look for `tool.babel`.
938: (4) """
939: (4) try:
940: (8) import tomllib
941: (4) except ImportError:
942: (8) try:
943: (12) import tomli as tomllib
944: (8) except ImportError as ie: # pragma: no cover
945: (12) raise ImportError("tomli or tomllib is required to parse TOML files") from ie
946: (4) try:
947: (8) parsed_data = tomllib.load(fileobj)
948: (4) except tomllib.TOMLDecodeError as e:
949: (8) raise ConfigurationError(f"{filename}: Error parsing TOML file: {e}") from e
950: (4) if style == "pyproject.toml":
951: (8) try:
952: (12) babel_data = parsed_data["tool"]["babel"]
953: (8) except (TypeError, KeyError) as e:
954: (12) raise ConfigurationError(f"{filename}: No 'tool.babel' section found in file") from e
955: (4) elif style == "standalone":
956: (8) babel_data = parsed_data
957: (8) if "babel" in babel_data:
958: (12) raise ConfigurationError(f"{filename}: 'babel' should not be present in a stand-alone configuration file")
959: (4) else: # pragma: no cover
960: (8) raise ValueError(f"Unknown TOML style {style!r}")
961: (4) return _parse_config_object(babel_data, filename=filename)
962: (0) def _parse_spec(s: str) -> tuple[int | None, tuple[int | tuple[int, str], ...]]:
963: (4) inds = []
964: (4) number = None
965: (4) for x in s.split(','):
966: (8) if x[-1] == 't':
967: (12) number = int(x[:-1])
968: (8) elif x[-1] == 'c':
969: (12) inds.append((int(x[:-1]), 'c'))
970: (8) else:
971: (12) inds.append(int(x))
972: (4) return number, tuple(inds)
973: (0) def parse_keywords(strings: Iterable[str] = ()):
974: (4) """Parse keywords specifications from the given list of strings.
975: (4) >>> import pprint
976: (4) >>> keywords = ['_', 'dgettext:2', 'dngettext:2,3', 'pgettext:1c,2',
977: (4) ... 'polymorphic:1', 'polymorphic:2,2t', 'polymorphic:3c,3t']
978: (4) >>> pprint.pprint(parse_keywords(keywords))
979: (4) {'_': None,
980: (5) 'dgettext': (2,),
981: (5) 'dngettext': (2, 3),
982: (5) 'pgettext': ((1, 'c'), 2),
983: (5) 'polymorphic': {None: (1,), 2: (2,), 3: ((3, 'c'),)}}
984: (4) The input keywords are in GNU Gettext style; see :doc:`cmdline` for details.
985: (4) The output is a dictionary mapping keyword names to a dictionary of specifications.
986: (4) Keys in this dictionary are numbers of arguments, where ``None`` means that all numbers
987: (4) of arguments are matched, and a number means only calls with that number of arguments
988: (4) are matched (which happens when using the "t" specifier). However, as a special
989: (4) case for backwards compatibility, if the dictionary of specifications would
990: (4) be ``{None: x}``, i.e., there is only one specification and it matches all argument
991: (4) counts, then it is collapsed into just ``x``.
992: (4) A specification is either a tuple or None. If a tuple, each element can be either a number
993: (4) ``n``, meaning that the nth argument should be extracted as a message, or the tuple
994: (4) ``(n, 'c')``, meaning that the nth argument should be extracted as context for the
995: (4) messages. A ``None`` specification is equivalent to ``(1,)``, extracting the first
996: (4) argument.
997: (4) """
998: (4) keywords = {}
999: (4) for string in strings:
1000: (8) if ':' in string:
1001: (12) funcname, spec_str = string.split(':')
1002: (12) number, spec = _parse_spec(spec_str)
1003: (8) else:
1004: (12) funcname = string
1005: (12) number = None
1006: (12) spec = None
1007: (8) keywords.setdefault(funcname, {})[number] = spec
1008: (4) # For best backwards compatibility, collapse {None: x} into x.
1009: (4) for k, v in keywords.items():
1010: (8) if set(v) == {None}:
1011: (12) keywords[k] = v[None]
1012: (4) return keywords
1013: (0) def __getattr__(name: str):
1014: (4) # Re-exports for backwards compatibility;
1015: (4) # `setuptools_frontend` is the canonical import location.
1016: (4) if name in {'check_message_extractors', 'compile_catalog', 'extract_messages', 'init_catalog', 'update_catalog'}:
1017: (8) from babel.messages import setuptools_frontend
1018: (8) return getattr(setuptools_frontend, name)
1019: (4) raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
1020: (0) if __name__ == '__main__':
1021: (4) main()
----------------------------------------
File 26 - .\messages \__init__.py:
1: (0) """
2: (4) babel.messages
3: (4) ~~~~~~~~~~~~~~
4: (4) Support for ``gettext`` message catalogs.
5: (4) :copyright: (c) 2013-2024 by the Babel Team.
6: (4) :license: BSD, see LICENSE for more details.
7: (0) """
8: (0) from babel.messages.catalog import (
9: (4) Catalog,
10: (4) Message,
11: (4) TranslationError,
12: (0) )
13: (0) __all__ = [
14: (4) "Catalog",
15: (4) "Message",
16: (4) "TranslationError",
17: (0) ]
----------------------------------------
File 27 - .\messages \setuptools_frontend.py:
1: (0) from __future__ import annotations
2: (0) from babel.messages import frontend
3: (0) try:
4: (4) # See: https://setuptools.pypa.io/en/latest/deprecated/distutils-legacy.html
5: (4) from setuptools import Command
6: (4) try:
7: (8) from setuptools.errors import BaseError, OptionError, SetupError
8: (4) except ImportError: # Error aliases only added in setuptools 59 (2021-11).
9: (8) OptionError = SetupError = BaseError = Exception
10: (0) except ImportError:
11: (4) from distutils.cmd import Command
12: (4) from distutils.errors import DistutilsSetupError as SetupError
13: (0) def check_message_extractors(dist, name, value):
14: (4) """Validate the ``message_extractors`` keyword argument to ``setup()``.
15: (4) :param dist: the distutils/setuptools ``Distribution`` object
16: (4) :param name: the name of the keyword argument (should always be
17: (17) "message_extractors")
18: (4) :param value: the value of the keyword argument
19: (4) :raise `DistutilsSetupError`: if the value is not valid
20: (4) """
21: (4) assert name == "message_extractors"
22: (4) if not isinstance(value, dict):
23: (8) raise SetupError(
24: (12) 'the value of the "message_extractors" parameter must be a dictionary',
25: (8) )
26: (0) class compile_catalog(frontend.CompileCatalog, Command):
27: (4) """Catalog compilation command for use in ``setup.py`` scripts.
28: (4) If correctly installed, this command is available to Setuptools-using
29: (4) setup scripts automatically. For projects using plain old ``distutils``,
30: (4) the command needs to be registered explicitly in ``setup.py``::
31: (8) from babel.messages.setuptools_frontend import compile_catalog
32: (8) setup(
33: (12) ...
34: (12) cmdclass = {'compile_catalog': compile_catalog}
35: (8) )
36: (4) .. versionadded:: 0.9
37: (4) """
38: (0) class extract_messages(frontend.ExtractMessages, Command):
39: (4) """Message extraction command for use in ``setup.py`` scripts.
40: (4) If correctly installed, this command is available to Setuptools-using
41: (4) setup scripts automatically. For projects using plain old ``distutils``,
42: (4) the command needs to be registered explicitly in ``setup.py``::
43: (8) from babel.messages.setuptools_frontend import extract_messages
44: (8) setup(
45: (12) ...
46: (12) cmdclass = {'extract_messages': extract_messages}
47: (8) )
48: (4) """
49: (0) class init_catalog(frontend.InitCatalog, Command):
50: (4) """New catalog initialization command for use in ``setup.py`` scripts.
51: (4) If correctly installed, this command is available to Setuptools-using
52: (4) setup scripts automatically. For projects using plain old ``distutils``,
53: (4) the command needs to be registered explicitly in ``setup.py``::
54: (8) from babel.messages.setuptools_frontend import init_catalog
55: (8) setup(
56: (12) ...
57: (12) cmdclass = {'init_catalog': init_catalog}
58: (8) )
59: (4) """
60: (0) class update_catalog(frontend.UpdateCatalog, Command):
61: (4) """Catalog merging command for use in ``setup.py`` scripts.
62: (4) If correctly installed, this command is available to Setuptools-using
63: (4) setup scripts automatically. For projects using plain old ``distutils``,
64: (4) the command needs to be registered explicitly in ``setup.py``::
65: (8) from babel.messages.setuptools_frontend import update_catalog
66: (8) setup(
67: (12) ...
68: (12) cmdclass = {'update_catalog': update_catalog}
69: (8) )
70: (4) .. versionadded:: 0.9
71: (4) """
72: (0) COMMANDS = {
73: (4) "compile_catalog": compile_catalog,
74: (4) "extract_messages": extract_messages,
75: (4) "init_catalog": init_catalog,
76: (4) "update_catalog": update_catalog,
77: (0) }
----------------------------------------
File 28 - . \SANJOYNATHQHENOMENOLOGYGEOMETRIFYINGTRIGONOMETRYCOMBINER_aligner_20_characters_for_pythons_codes.py:
1: (0) import os
2: (0) from datetime import datetime
3: (0) def get_file_info(root_folder):
4: (4) file_info_list = []
5: (4) for root, dirs, files in os.walk(root_folder):
6: (8) for file in files:
7: (12) try:
8: (16) # Check if the file is a Python file
9: (16) if file.endswith('.py'):
10: (20) file_path = os.path.join(root, file)
11: (20) # Get file times
12: (20) creation_time = datetime.fromtimestamp(os.path.getctime(file_path))
13: (20) modified_time = datetime.fromtimestamp(os.path.getmtime(file_path))
14: (20) # Get file extension
15: (20) file_extension = os.path.splitext(file)[1].lower()
16: (20) # Append file info to list
17: (20) file_info_list.append([file, file_path, creation_time, modified_time, file_extension, root])
18: (12) except Exception as e:
19: (16) print(f"Error processing file {file}: {e}")
20: (4) # Sort the files by multiple criteria
21: (4) file_info_list.sort(key=lambda x: (x[2], x[3], len(x[0]), x[4])) # Sort by creation, modification time, name length, extension
22: (4) return file_info_list
23: (0) def process_file(file_info_list):
24: (4) combined_output = []
25: (4) for idx, (file_name, file_path, creation_time, modified_time, file_extension, root) in enumerate(file_info_list):
26: (8) with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
27: (12) content = f.read()
28: (12) # Remove Python comments and blank lines
29: (10) ### content = "\n".join([line for line in content.split('\n') if line.strip() and not line.strip().startswith("#")])
30: (12) content = "\n".join([line for line in content.split('\n') if line.strip() ])###and not line.strip().startswith("#")
31: (12) # Replace tabs with spaces
32: (12) content = content.replace('\t', ' ')
33: (12) # Process each line
34: (12) processed_lines = []
35: (12) for i, line in enumerate(content.split('\n')):
36: (16) # Count the number of starting blank spaces
37: (16) leading_spaces = len(line) - len(line.lstrip(' '))
38: (16) # Create the line with line number and leading spaces count
39: (16) line_number_str = f"{i+1}: ({leading_spaces})"
40: (16) # Calculate padding to align the original code at the 61st character
41: (16) padding = ' ' * (20 - len(line_number_str))
42: (16) processed_line = f"{line_number_str}{padding}{line}"
43: (16) processed_lines.append(processed_line)
44: (12) content_with_line_numbers = "\n".join(processed_lines)
45: (12) # Add file listing order and line number
46: (12) combined_output.append(f"File {idx + 1} - {root} \\{file_name}:\n")
47: (12) combined_output.append(content_with_line_numbers)
48: (12) combined_output.append("\n" + "-"*40 + "\n")
49: (4) return combined_output
50: (0) # Set the root folder path
51: (0) root_folder_path = '.' # Set this to the desired folder
52: (0) # Get file information and process files
53: (0) file_info_list = get_file_info(root_folder_path)
54: (0) combined_output = process_file(file_info_list)
55: (0) # Save the processed data to an output file
56: (0) output_file = 'WITHRELPATH_COMMENTSKEPT_SANJOYNATHQHENOMENOLOGYGEOMETRIFYINGTRIGONOMETRY_combined_python_files_20_chars.txt'
57: (0) with open(output_file, 'w', encoding='utf-8') as logfile:
58: (4) logfile.write("\n".join(combined_output))
59: (0) print(f"Processed file info logged to {output_file}")
----------------------------------------
No comments:
Post a Comment