mirror of
https://github.com/crystalidea/qt6windows7.git
synced 2025-07-02 23:35:28 +08:00
qt 6.5.1 original
This commit is contained in:
14
util/locale_database/README
Normal file
14
util/locale_database/README
Normal file
@ -0,0 +1,14 @@
|
||||
locale_database is used to generate qlocale data from CLDR.
|
||||
|
||||
CLDR is the Common Locale Data Repository, a database for localized
|
||||
data (like date formats, country names etc). It is provided by the
|
||||
Unicode consortium.
|
||||
|
||||
See cldr2qlocalexml.py for how to run it and qlocalexml2cpp.py to
|
||||
update the locale data tables (principally text/qlocale_data_p.h and
|
||||
time/q*calendar_data_p.h under src/corelib/). See enumdata.py for when
|
||||
and how to update the data it provides. You shall definitely need to
|
||||
pass --no-verify or -n to git commit for these changes.
|
||||
|
||||
See cldr2qtimezone.py on how to update tables of Windows-specific
|
||||
names for zones and UTC-offset zone names.
|
760
util/locale_database/cldr.py
Normal file
760
util/locale_database/cldr.py
Normal file
@ -0,0 +1,760 @@
|
||||
# Copyright (C) 2021 The Qt Company Ltd.
|
||||
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
|
||||
"""Digesting the CLDR's data.
|
||||
|
||||
Provides two classes:
|
||||
CldrReader -- driver for reading CLDR data
|
||||
CldrAccess -- used by the reader to access the tree of data files
|
||||
|
||||
The former should normally be all you need to access.
|
||||
See individual classes for further detail.
|
||||
"""
|
||||
|
||||
from typing import Iterable, TextIO
|
||||
from xml.dom import minidom
|
||||
from weakref import WeakValueDictionary as CacheDict
|
||||
from pathlib import Path
|
||||
|
||||
from ldml import Error, Node, XmlScanner, Supplement, LocaleScanner
|
||||
from qlocalexml import Locale
|
||||
|
||||
class CldrReader (object):
|
||||
def __init__(self, root: Path, grumble = lambda msg: None, whitter = lambda msg: None):
|
||||
"""Set up a reader object for reading CLDR data.
|
||||
|
||||
Single parameter, root, is the file-system path to the root of
|
||||
the unpacked CLDR archive; its common/ sub-directory should
|
||||
contain dtd/, main/ and supplemental/ sub-directories.
|
||||
|
||||
Optional second argument, grumble, is a callable that logs
|
||||
warnings and complaints, e.g. sys.stderr.write would be a
|
||||
suitable callable. The default is a no-op that ignores its
|
||||
single argument. Optional third argument is similar, used for
|
||||
less interesting output; pass sys.stderr.write for it for
|
||||
verbose output."""
|
||||
self.root = CldrAccess(root)
|
||||
self.whitter, self.grumble = whitter, grumble
|
||||
self.root.checkEnumData(grumble)
|
||||
|
||||
def likelySubTags(self):
|
||||
"""Generator for likely subtag information.
|
||||
|
||||
Yields pairs (have, give) of 4-tuples; if what you have
|
||||
matches the left member, giving the right member is probably
|
||||
sensible. Each 4-tuple's entries are the full names of a
|
||||
language, a script, a territory (usually a country) and a
|
||||
variant (currently ignored)."""
|
||||
skips = []
|
||||
for got, use in self.root.likelySubTags():
|
||||
try:
|
||||
have = self.__parseTags(got)
|
||||
give = self.__parseTags(use)
|
||||
except Error as e:
|
||||
if ((use.startswith(got) or got.startswith('und_'))
|
||||
and e.message.startswith('Unknown ') and ' code ' in e.message):
|
||||
skips.append(use)
|
||||
else:
|
||||
self.grumble(f'Skipping likelySubtag "{got}" -> "{use}" ({e})\n')
|
||||
continue
|
||||
if all(code.startswith('Any') and code[3].isupper() for code in have[:-1]):
|
||||
continue
|
||||
|
||||
give = (give[0],
|
||||
# Substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags
|
||||
have[1] if give[1] == 'AnyScript' else give[1],
|
||||
have[2] if give[2] == 'AnyTerritory' else give[2],
|
||||
give[3]) # AnyVariant similarly ?
|
||||
|
||||
yield have, give
|
||||
|
||||
if skips:
|
||||
# TODO: look at LDML's reserved locale tag names; they
|
||||
# show up a lot in this, and may be grounds for filtering
|
||||
# more out.
|
||||
pass # self.__wrapped(self.whitter, 'Skipping likelySubtags (for unknown codes): ', skips)
|
||||
|
||||
def readLocales(self, calendars = ('gregorian',)):
|
||||
locales = tuple(self.__allLocales(calendars))
|
||||
return dict(((k.language_id, k.script_id, k.territory_id, k.variant_code),
|
||||
k) for k in locales)
|
||||
|
||||
def __allLocales(self, calendars):
|
||||
def skip(locale, reason):
|
||||
return f'Skipping defaultContent locale "{locale}" ({reason})\n'
|
||||
|
||||
for locale in self.root.defaultContentLocales:
|
||||
try:
|
||||
language, script, territory, variant = self.__splitLocale(locale)
|
||||
except ValueError:
|
||||
self.whitter(skip(locale, 'only language tag'))
|
||||
continue
|
||||
|
||||
if not (script or territory):
|
||||
self.grumble(skip(locale, 'second tag is neither script nor territory'))
|
||||
continue
|
||||
|
||||
if not (language and territory):
|
||||
continue
|
||||
|
||||
try:
|
||||
yield self.__getLocaleData(self.root.locale(locale), calendars,
|
||||
language, script, territory, variant)
|
||||
except Error as e:
|
||||
self.grumble(skip(locale, e.message))
|
||||
|
||||
for locale in self.root.fileLocales:
|
||||
try:
|
||||
chain = self.root.locale(locale)
|
||||
language, script, territory, variant = chain.tagCodes()
|
||||
assert language
|
||||
# TODO: this skip should probably be based on likely
|
||||
# sub-tags, instead of empty territory: if locale has a
|
||||
# likely-subtag expansion, that's what QLocale uses,
|
||||
# and we'll be saving its data for the expanded locale
|
||||
# anyway, so don't need to record it for itself.
|
||||
# See also QLocaleXmlReader.loadLocaleMap's grumble.
|
||||
if not territory:
|
||||
continue
|
||||
yield self.__getLocaleData(chain, calendars, language, script, territory, variant)
|
||||
except Error as e:
|
||||
self.grumble(f'Skipping file locale "{locale}" ({e})\n')
|
||||
|
||||
import textwrap
|
||||
@staticmethod
|
||||
def __wrapped(writer, prefix, tokens, wrap = textwrap.wrap):
|
||||
writer('\n'.join(wrap(prefix + ', '.join(tokens),
|
||||
subsequent_indent=' ', width=80)) + '\n')
|
||||
del textwrap
|
||||
|
||||
def __parseTags(self, locale):
|
||||
tags = self.__splitLocale(locale)
|
||||
language = next(tags)
|
||||
script = territory = variant = ''
|
||||
try:
|
||||
script, territory, variant = tags
|
||||
except ValueError:
|
||||
pass
|
||||
return tuple(p[1] for p in self.root.codesToIdName(language, script, territory, variant))
|
||||
|
||||
def __splitLocale(self, name):
|
||||
"""Generate (language, script, territory, variant) from a locale name
|
||||
|
||||
Ignores any trailing fields (with a warning), leaves script (a
|
||||
capitalised four-letter token), territory (either a number or
|
||||
an all-uppercase token) or variant (upper case and digits)
|
||||
empty if unspecified. Only generates one entry if name is a
|
||||
single tag (i.e. contains no underscores). Always yields 1 or
|
||||
4 values, never 2 or 3."""
|
||||
tags = iter(name.split('_'))
|
||||
yield next(tags) # Language
|
||||
|
||||
try:
|
||||
tag = next(tags)
|
||||
except StopIteration:
|
||||
return
|
||||
|
||||
# Script is always four letters, always capitalised:
|
||||
if len(tag) == 4 and tag[0].isupper() and tag[1:].islower():
|
||||
yield tag
|
||||
try:
|
||||
tag = next(tags)
|
||||
except StopIteration:
|
||||
tag = ''
|
||||
else:
|
||||
yield ''
|
||||
|
||||
# Territory is upper-case or numeric:
|
||||
if tag and tag.isupper() or tag.isdigit():
|
||||
yield tag
|
||||
try:
|
||||
tag = next(tags)
|
||||
except StopIteration:
|
||||
tag = ''
|
||||
else:
|
||||
yield ''
|
||||
|
||||
# Variant can be any mixture of upper-case and digits.
|
||||
if tag and all(c.isupper() or c.isdigit() for c in tag):
|
||||
yield tag
|
||||
tag = ''
|
||||
else:
|
||||
yield ''
|
||||
|
||||
rest = [tag] if tag else []
|
||||
rest.extend(tags)
|
||||
|
||||
if rest:
|
||||
self.grumble(f'Ignoring unparsed cruft {"_".join(rest)} in {name}\n')
|
||||
|
||||
def __getLocaleData(self, scan, calendars, language, script, territory, variant):
|
||||
ids, names = zip(*self.root.codesToIdName(language, script, territory, variant))
|
||||
assert ids[0] > 0 and ids[2] > 0, (language, script, territory, variant)
|
||||
locale = Locale(
|
||||
language = names[0], language_code = language, language_id = ids[0],
|
||||
script = names[1], script_code = script, script_id = ids[1],
|
||||
territory = names[2], territory_code = territory, territory_id = ids[2],
|
||||
variant_code = variant)
|
||||
|
||||
firstDay, weStart, weEnd = self.root.weekData(territory)
|
||||
assert all(day in ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun')
|
||||
for day in (firstDay, weStart, weEnd))
|
||||
|
||||
locale.update(firstDayOfWeek = firstDay,
|
||||
weekendStart = weStart,
|
||||
weekendEnd = weEnd)
|
||||
|
||||
iso, digits, rounding = self.root.currencyData(territory)
|
||||
locale.update(currencyIsoCode = iso,
|
||||
currencyDigits = int(digits),
|
||||
currencyRounding = int(rounding))
|
||||
|
||||
locale.update(scan.currencyData(iso))
|
||||
locale.update(scan.numericData(self.root.numberSystem, self.whitter))
|
||||
locale.update(scan.textPatternData())
|
||||
locale.update(scan.endonyms(language, script, territory, variant))
|
||||
locale.update(scan.unitData()) # byte, kB, MB, GB, ..., KiB, MiB, GiB, ...
|
||||
locale.update(scan.calendarNames(calendars)) # Names of days and months
|
||||
|
||||
return locale
|
||||
|
||||
# Note: various caches assume this class is a singleton, so the
|
||||
# "default" value for a parameter no caller should pass can serve as
|
||||
# the cache. If a process were to instantiate this class with distinct
|
||||
# roots, each cache would be filled by the first to need it !
|
||||
class CldrAccess (object):
|
||||
def __init__(self, root: Path):
|
||||
"""Set up a master object for accessing CLDR data.
|
||||
|
||||
Single parameter, root, is the file-system path to the root of
|
||||
the unpacked CLDR archive; its common/ sub-directory should
|
||||
contain dtd/, main/ and supplemental/ sub-directories."""
|
||||
self.root = root
|
||||
|
||||
def xml(self, relative_path: str):
|
||||
"""Load a single XML file and return its root element as an XmlScanner.
|
||||
|
||||
The path is interpreted relative to self.root"""
|
||||
return XmlScanner(Node(self.__xml(relative_path)))
|
||||
|
||||
def supplement(self, name):
|
||||
"""Loads supplemental data as a Supplement object.
|
||||
|
||||
The name should be that of a file in common/supplemental/, without path.
|
||||
"""
|
||||
return Supplement(Node(self.__xml(f'common/supplemental/{name}')))
|
||||
|
||||
def locale(self, name):
|
||||
"""Loads all data for a locale as a LocaleScanner object.
|
||||
|
||||
The name should be a locale name; adding suffix '.xml' to it
|
||||
should usually yield a file in common/main/. The returned
|
||||
LocaleScanner object packages this file along with all those
|
||||
from which it inherits; its methods know how to handle that
|
||||
inheritance, where relevant."""
|
||||
return LocaleScanner(name, self.__localeRoots(name), self.__rootLocale)
|
||||
|
||||
@property
|
||||
def fileLocales(self) -> Iterable[str]:
|
||||
"""Generator for locale IDs seen in file-names.
|
||||
|
||||
All *.xml other than root.xml in common/main/ are assumed to
|
||||
identify locales."""
|
||||
for path in self.root.joinpath('common/main').glob('*.xml'):
|
||||
if path.stem != 'root':
|
||||
yield path.stem
|
||||
|
||||
@property
|
||||
def defaultContentLocales(self):
|
||||
"""Generator for the default content locales."""
|
||||
for name, attrs in self.supplement('supplementalMetadata.xml').find('metadata/defaultContent'):
|
||||
try:
|
||||
locales = attrs['locales']
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
for locale in locales.split():
|
||||
yield locale
|
||||
|
||||
def likelySubTags(self):
|
||||
for ignore, attrs in self.supplement('likelySubtags.xml').find('likelySubtags'):
|
||||
yield attrs['from'], attrs['to']
|
||||
|
||||
def numberSystem(self, system):
|
||||
"""Get a description of a numbering system.
|
||||
|
||||
Returns a mapping, with keys 'digits', 'type' and 'id'; the
|
||||
value for this last is system. Raises KeyError for unknown
|
||||
number system, ldml.Error on failure to load data."""
|
||||
try:
|
||||
return self.__numberSystems[system]
|
||||
except KeyError:
|
||||
raise Error(f'Unsupported number system: {system}')
|
||||
|
||||
def weekData(self, territory):
|
||||
"""Data on the weekly cycle.
|
||||
|
||||
Returns a triple (W, S, E) of en's short names for week-days;
|
||||
W is the first day of the week, S the start of the week-end
|
||||
and E the end of the week-end. Where data for a territory is
|
||||
unavailable, the data for CLDR's territory 001 (The World) is
|
||||
used."""
|
||||
try:
|
||||
return self.__weekData[territory]
|
||||
except KeyError:
|
||||
return self.__weekData['001']
|
||||
|
||||
def currencyData(self, territory):
|
||||
"""Returns currency data for the given territory code.
|
||||
|
||||
Return value is a tuple (ISO4217 code, digit count, rounding
|
||||
mode). If CLDR provides no data for this territory, ('', 2, 1)
|
||||
is the default result.
|
||||
"""
|
||||
try:
|
||||
return self.__currencyData[territory]
|
||||
except KeyError:
|
||||
return '', 2, 1
|
||||
|
||||
def codesToIdName(self, language, script, territory, variant = ''):
|
||||
"""Maps each code to the appropriate ID and name.
|
||||
|
||||
Returns a 4-tuple of (ID, name) pairs corresponding to the
|
||||
language, script, territory and variant given. Raises a
|
||||
suitable error if any of them is unknown, indicating all that
|
||||
are unknown plus suitable names for any that could sensibly be
|
||||
added to enumdata.py to make them known.
|
||||
|
||||
Until we implement variant support (QTBUG-81051), the fourth
|
||||
member of the returned tuple is always 0 paired with a string
|
||||
that should not be used."""
|
||||
enum = self.__enumMap
|
||||
try:
|
||||
return (enum('language')[language],
|
||||
enum('script')[script],
|
||||
enum('territory')[territory],
|
||||
enum('variant')[variant])
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
parts, values = [], [language, script, territory, variant]
|
||||
for index, key in enumerate(('language', 'script', 'territory', 'variant')):
|
||||
naming, enums = self.__codeMap(key), enum(key)
|
||||
value = values[index]
|
||||
if value not in enums:
|
||||
text = f'{key} code {value}'
|
||||
name = naming.get(value)
|
||||
if name and value != 'POSIX':
|
||||
text += f' (could add {name})'
|
||||
parts.append(text)
|
||||
if len(parts) > 1:
|
||||
parts[-1] = 'and ' + parts[-1]
|
||||
assert parts
|
||||
raise Error('Unknown ' + ', '.join(parts),
|
||||
language, script, territory, variant)
|
||||
|
||||
@staticmethod
|
||||
def __checkEnum(given, proper, scraps,
|
||||
remap = { 'å': 'a', 'ã': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ü': 'u'},
|
||||
prefix = { 'St.': 'Saint', 'U.S.': 'United States' },
|
||||
suffixes = ( 'Han', ),
|
||||
skip = '\u02bc'):
|
||||
# Each is a { code: full name } mapping
|
||||
for code, name in given.items():
|
||||
try: right = proper[code]
|
||||
except KeyError:
|
||||
# No en.xml name for this code, but supplementalData's
|
||||
# parentLocale may still believe in it:
|
||||
if code not in scraps:
|
||||
yield name, f'[Found no CLDR name for code {code}]'
|
||||
continue
|
||||
if name == right: continue
|
||||
ok = right.replace('&', 'And')
|
||||
for k, v in prefix.items():
|
||||
if ok.startswith(k + ' '):
|
||||
ok = v + ok[len(k):]
|
||||
while '(' in ok:
|
||||
try: f, t = ok.index('('), ok.index(')')
|
||||
except ValueError: break
|
||||
ok = ok[:f].rstrip() + ' ' + ok[t:].lstrip()
|
||||
if any(name == ok + ' ' + s for s in suffixes):
|
||||
continue
|
||||
if ''.join(ch for ch in name.lower() if not ch.isspace()) in ''.join(
|
||||
remap.get(ch, ch) for ch in ok.lower() if ch.isalpha() and ch not in skip):
|
||||
continue
|
||||
yield name, ok
|
||||
|
||||
def checkEnumData(self, grumble):
|
||||
scraps = set()
|
||||
for k in self.__parentLocale.keys():
|
||||
for f in k.split('_'):
|
||||
scraps.add(f)
|
||||
from enumdata import language_map, territory_map, script_map
|
||||
language = dict((v, k) for k, v in language_map.values() if not v.isspace())
|
||||
territory = dict((v, k) for k, v in territory_map.values() if v != 'ZZ')
|
||||
script = dict((v, k) for k, v in script_map.values() if v != 'Zzzz')
|
||||
lang = dict(self.__checkEnum(language, self.__codeMap('language'), scraps))
|
||||
land = dict(self.__checkEnum(territory, self.__codeMap('territory'), scraps))
|
||||
text = dict(self.__checkEnum(script, self.__codeMap('script'), scraps))
|
||||
if lang or land or text:
|
||||
grumble("""\
|
||||
Using names that don't match CLDR: consider updating the name(s) in
|
||||
enumdata.py (keeping the old name as an alias):
|
||||
""")
|
||||
if lang:
|
||||
grumble('Language:\n\t'
|
||||
+ '\n\t'.join(f'{k} -> {v}' for k, v in lang.items())
|
||||
+ '\n')
|
||||
if land:
|
||||
grumble('Territory:\n\t'
|
||||
+ '\n\t'.join(f'{k} -> {v}' for k, v in land.items())
|
||||
+ '\n')
|
||||
if text:
|
||||
grumble('Script:\n\t'
|
||||
+ '\n\t'.join(f'{k} -> {v}' for k, v in text.items())
|
||||
+ '\n')
|
||||
grumble('\n')
|
||||
|
||||
def readWindowsTimeZones(self, lookup): # For use by cldr2qtimezone.py
|
||||
"""Digest CLDR's MS-Win time-zone name mapping.
|
||||
|
||||
MS-Win have their own eccentric names for time-zones. CLDR
|
||||
helpfully provides a translation to more orthodox names.
|
||||
|
||||
Single argument, lookup, is a mapping from known MS-Win names
|
||||
for locales to a unique integer index (starting at 1).
|
||||
|
||||
The XML structure we read has the form:
|
||||
|
||||
<supplementalData>
|
||||
<windowsZones>
|
||||
<mapTimezones otherVersion="..." typeVersion="...">
|
||||
<!-- (UTC-08:00) Pacific Time (US & Canada) -->
|
||||
<mapZone other="Pacific Standard Time" territory="001" type="America/Los_Angeles"/>
|
||||
<mapZone other="Pacific Standard Time" territory="CA" type="America/Vancouver America/Dawson America/Whitehorse"/>
|
||||
<mapZone other="Pacific Standard Time" territory="US" type="America/Los_Angeles America/Metlakatla"/>
|
||||
<mapZone other="Pacific Standard Time" territory="ZZ" type="PST8PDT"/>
|
||||
</mapTimezones>
|
||||
</windowsZones>
|
||||
</supplementalData>
|
||||
"""
|
||||
zones = self.supplement('windowsZones.xml')
|
||||
enum = self.__enumMap('territory')
|
||||
badZones, unLands, defaults, windows = set(), set(), {}, {}
|
||||
|
||||
for name, attrs in zones.find('windowsZones/mapTimezones'):
|
||||
if name != 'mapZone':
|
||||
continue
|
||||
|
||||
wid, code = attrs['other'], attrs['territory']
|
||||
data = dict(windowsId = wid,
|
||||
territoryCode = code,
|
||||
ianaList = attrs['type'])
|
||||
|
||||
try:
|
||||
key = lookup[wid]
|
||||
except KeyError:
|
||||
badZones.add(wid)
|
||||
key = 0
|
||||
data['windowsKey'] = key
|
||||
|
||||
if code == '001':
|
||||
defaults[key] = data['ianaList']
|
||||
else:
|
||||
try:
|
||||
cid, name = enum[code]
|
||||
except KeyError:
|
||||
unLands.append(code)
|
||||
continue
|
||||
data.update(territoryId = cid, territory = name)
|
||||
windows[key, cid] = data
|
||||
|
||||
if unLands:
|
||||
raise Error('Unknown territory codes, please add to enumdata.py: '
|
||||
+ ', '.join(sorted(unLands)))
|
||||
|
||||
if badZones:
|
||||
raise Error('Unknown Windows IDs, please add to cldr2qtimezone.py: '
|
||||
+ ', '.join(sorted(badZones)))
|
||||
|
||||
return self.cldrVersion, defaults, windows
|
||||
|
||||
@property
|
||||
def cldrVersion(self):
|
||||
# Evaluate so as to ensure __cldrVersion is set:
|
||||
self.__unDistinguishedAttributes
|
||||
return self.__cldrVersion
|
||||
|
||||
# Implementation details
|
||||
def __xml(self, relative_path: str, cache = CacheDict(), read = minidom.parse):
|
||||
try:
|
||||
doc = cache[relative_path]
|
||||
except KeyError:
|
||||
cache[relative_path] = doc = read(str(self.root.joinpath(relative_path))).documentElement
|
||||
return doc
|
||||
|
||||
def __open(self, relative_path: str) -> TextIO:
|
||||
return self.root.joinpath(relative_path).open()
|
||||
|
||||
@property
|
||||
def __rootLocale(self, cache = []):
|
||||
if not cache:
|
||||
cache.append(self.xml('common/main/root.xml'))
|
||||
return cache[0]
|
||||
|
||||
@property
|
||||
def __supplementalData(self, cache = []):
|
||||
if not cache:
|
||||
cache.append(self.supplement('supplementalData.xml'))
|
||||
return cache[0]
|
||||
|
||||
@property
|
||||
def __numberSystems(self, cache = {}):
|
||||
if not cache:
|
||||
for ignore, attrs in self.supplement('numberingSystems.xml').find('numberingSystems'):
|
||||
cache[attrs['id']] = attrs
|
||||
assert cache
|
||||
return cache
|
||||
|
||||
@property
|
||||
def __weekData(self, cache = {}):
|
||||
if not cache:
|
||||
firstDay, weStart, weEnd = self.__getWeekData()
|
||||
# Massage those into an easily-consulted form:
|
||||
# World defaults given for code '001':
|
||||
mon, sat, sun = firstDay['001'], weStart['001'], weEnd['001']
|
||||
lands = set(firstDay) | set(weStart) | set(weEnd)
|
||||
cache.update((land,
|
||||
(firstDay.get(land, mon), weStart.get(land, sat), weEnd.get(land, sun)))
|
||||
for land in lands)
|
||||
assert cache
|
||||
return cache
|
||||
|
||||
def __getWeekData(self):
|
||||
"""Scan for data on the weekly cycle.
|
||||
|
||||
Yields three mappings from locales to en's short names for
|
||||
week-days; if a locale isn't a key of a given mapping, it
|
||||
should use the '001' (world) locale's value. The first mapping
|
||||
gives the day on which the week starts, the second gives the
|
||||
day on which the week-end starts, the third gives the last day
|
||||
of the week-end."""
|
||||
source = self.__supplementalData
|
||||
for key in ('firstDay', 'weekendStart', 'weekendEnd'):
|
||||
result = {}
|
||||
for ignore, attrs in source.find(f'weekData/{key}'):
|
||||
assert ignore == key
|
||||
day = attrs['day']
|
||||
assert day in ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'), day
|
||||
if 'alt' in attrs:
|
||||
continue
|
||||
for loc in attrs.get('territories', '').split():
|
||||
result[loc] = day
|
||||
yield result
|
||||
|
||||
@property
|
||||
def __currencyData(self, cache = {}):
|
||||
if not cache:
|
||||
source = self.__supplementalData
|
||||
for elt in source.findNodes('currencyData/region'):
|
||||
iso, digits, rounding = '', 2, 1
|
||||
try:
|
||||
territory = elt.dom.attributes['iso3166'].nodeValue
|
||||
except KeyError:
|
||||
continue
|
||||
for child in elt.findAllChildren('currency'):
|
||||
try:
|
||||
if child.dom.attributes['tender'].nodeValue == 'false':
|
||||
continue
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
child.dom.attributes['to'] # Is set if this element has gone out of date.
|
||||
except KeyError:
|
||||
iso = child.dom.attributes['iso4217'].nodeValue
|
||||
break
|
||||
if iso:
|
||||
for tag, data in source.find(
|
||||
f'currencyData/fractions/info[iso4217={iso}]'):
|
||||
digits = data['digits']
|
||||
rounding = data['rounding']
|
||||
cache[territory] = iso, digits, rounding
|
||||
assert cache
|
||||
|
||||
return cache
|
||||
|
||||
@property
|
||||
def __unDistinguishedAttributes(self, cache = {}):
|
||||
"""Mapping from tag names to lists of attributes.
|
||||
|
||||
LDML defines some attributes as 'distinguishing': if a node
|
||||
has distinguishing attributes that weren't specified in an
|
||||
XPath, a search on that XPath should exclude the node's
|
||||
children.
|
||||
|
||||
This property is a mapping from tag names to tuples of
|
||||
attribute names that *aren't* distinguishing for that tag.
|
||||
Its value is cached (so its costly computation isonly done
|
||||
once) and there's a side-effect of populating its cache: it
|
||||
sets self.__cldrVersion to the value found in ldml.dtd, during
|
||||
parsing."""
|
||||
if not cache:
|
||||
cache.update(self.__scanLdmlDtd())
|
||||
assert cache
|
||||
|
||||
return cache
|
||||
|
||||
def __scanLdmlDtd(self):
|
||||
"""Scan the LDML DTD, record CLDR version
|
||||
|
||||
Yields (tag, attrs) pairs: on elements with a given tag,
|
||||
attributes named in its attrs (a tuple) may be ignored in an
|
||||
XPath search; other attributes are distinguished attributes,
|
||||
in the terminology of LDML's locale-inheritance rules.
|
||||
|
||||
Sets self.__cldrVersion as a side-effect, since this
|
||||
information is found in the same file."""
|
||||
with self.__open('common/dtd/ldml.dtd') as dtd:
|
||||
tag, ignored, last = None, None, None
|
||||
|
||||
for line in dtd:
|
||||
if line.startswith('<!ELEMENT '):
|
||||
if ignored:
|
||||
assert tag
|
||||
yield tag, tuple(ignored)
|
||||
tag, ignored, last = line.split()[1], [], None
|
||||
continue
|
||||
|
||||
if line.startswith('<!ATTLIST '):
|
||||
assert tag is not None
|
||||
parts = line.split()
|
||||
assert parts[1] == tag
|
||||
last = parts[2]
|
||||
if parts[1:5] == ['version', 'cldrVersion', 'CDATA', '#FIXED']:
|
||||
# parts[5] is the version, in quotes, although the final > might be stuck on its end:
|
||||
self.__cldrVersion = parts[5].split('"')[1]
|
||||
continue
|
||||
|
||||
# <!ELEMENT...>s can also be @METADATA, but not @VALUE:
|
||||
if '<!--@VALUE-->' in line or (last and '<!--@METADATA-->' in line):
|
||||
assert last is not None
|
||||
assert ignored is not None
|
||||
assert tag is not None
|
||||
ignored.append(last)
|
||||
last = None # No attribute is both value and metadata
|
||||
|
||||
if tag and ignored:
|
||||
yield tag, tuple(ignored)
|
||||
|
||||
def __enumMap(self, key, cache = {}):
|
||||
if not cache:
|
||||
cache['variant'] = {'': (0, 'This should never be seen outside ldml.py')}
|
||||
# They're not actually lists: mappings from numeric value
|
||||
# to pairs of full name and short code. What we want, in
|
||||
# each case, is a mapping from code to the other two.
|
||||
from enumdata import language_map, script_map, territory_map
|
||||
for form, book, empty in (('language', language_map, 'AnyLanguage'),
|
||||
('script', script_map, 'AnyScript'),
|
||||
('territory', territory_map, 'AnyTerritory')):
|
||||
cache[form] = dict((pair[1], (num, pair[0]))
|
||||
for num, pair in book.items() if pair[0] != 'C')
|
||||
# (Have to filter out the C locale, as we give it the
|
||||
# same (all space) code as AnyLanguage, whose code
|
||||
# should probably be 'und' instead.)
|
||||
|
||||
# Map empty to zero and the any value:
|
||||
cache[form][''] = (0, empty)
|
||||
# and map language code 'und' also to (0, any):
|
||||
cache['language']['und'] = (0, 'AnyLanguage')
|
||||
|
||||
return cache[key]
|
||||
|
||||
def __codeMap(self, key, cache = {},
|
||||
# Maps our name for it to CLDR's name:
|
||||
naming = {'language': 'languages', 'script': 'scripts',
|
||||
'territory': 'territories', 'variant': 'variants'}):
|
||||
if not cache:
|
||||
root = self.xml('common/main/en.xml').root.findUniqueChild('localeDisplayNames')
|
||||
for dst, src in naming.items():
|
||||
cache[dst] = dict(self.__codeMapScan(root.findUniqueChild(src)))
|
||||
assert cache
|
||||
|
||||
return cache[key]
|
||||
|
||||
def __codeMapScan(self, node):
|
||||
"""Get mapping from codes to element values.
|
||||
|
||||
Passed in node is a <languages>, <scripts>, <territories> or
|
||||
<variants> node, each child of which is a <language>,
|
||||
<script>, <territory> or <variant> node as appropriate, whose
|
||||
type is a code (of the appropriate flavour) and content is its
|
||||
full name. In some cases, two child nodes have the same type;
|
||||
in these cases, one always has an alt attribute and we should
|
||||
prefer the other. Yields all such type, content pairs found
|
||||
in node's children (skipping any with an alt attribute, if
|
||||
their type has been seen previously)."""
|
||||
seen = set()
|
||||
for elt in node.dom.childNodes:
|
||||
try:
|
||||
key, value = elt.attributes['type'].nodeValue, elt.childNodes[0].wholeText
|
||||
except (KeyError, ValueError, TypeError):
|
||||
pass
|
||||
else:
|
||||
if key not in seen or 'alt' not in elt.attributes:
|
||||
yield key, value
|
||||
seen.add(key)
|
||||
|
||||
# CLDR uses inheritance between locales to save repetition:
|
||||
@property
|
||||
def __parentLocale(self, cache = {}):
|
||||
# see http://www.unicode.org/reports/tr35/#Parent_Locales
|
||||
if not cache:
|
||||
for tag, attrs in self.__supplementalData.find('parentLocales'):
|
||||
parent = attrs.get('parent', '')
|
||||
for child in attrs['locales'].split():
|
||||
cache[child] = parent
|
||||
assert cache
|
||||
|
||||
return cache
|
||||
|
||||
def __localeAsDoc(self, name: str, aliasFor = None):
|
||||
path = f'common/main/{name}.xml'
|
||||
if self.root.joinpath(path).exists():
|
||||
elt = self.__xml(path)
|
||||
for child in Node(elt).findAllChildren('alias'):
|
||||
try:
|
||||
alias = child.dom.attributes['source'].nodeValue
|
||||
except (KeyError, AttributeError):
|
||||
pass
|
||||
else:
|
||||
return self.__localeAsDoc(alias, aliasFor or name)
|
||||
# No alias child with a source:
|
||||
return elt
|
||||
|
||||
if aliasFor:
|
||||
raise Error(f'Fatal error: found an alias "{aliasFor}" -> "{name}", '
|
||||
'but found no file for the alias')
|
||||
|
||||
def __scanLocaleRoots(self, name):
|
||||
while name and name != 'root':
|
||||
doc = self.__localeAsDoc(name)
|
||||
if doc is not None:
|
||||
yield Node(doc, self.__unDistinguishedAttributes)
|
||||
|
||||
try:
|
||||
name = self.__parentLocale[name]
|
||||
except KeyError:
|
||||
try:
|
||||
name, tail = name.rsplit('_', 1)
|
||||
except ValueError: # No tail to discard: we're done
|
||||
break
|
||||
|
||||
class __Seq (list): pass # No weakref for tuple and list, but list sub-class is ok.
|
||||
def __localeRoots(self, name, cache = CacheDict()):
|
||||
try:
|
||||
chain = cache[name]
|
||||
except KeyError:
|
||||
cache[name] = chain = self.__Seq(self.__scanLocaleRoots(name))
|
||||
return chain
|
||||
|
||||
# Unpolute the namespace: we don't need to export these.
|
||||
del minidom, CacheDict
|
87
util/locale_database/cldr2qlocalexml.py
Normal file
87
util/locale_database/cldr2qlocalexml.py
Normal file
@ -0,0 +1,87 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (C) 2021 The Qt Company Ltd.
|
||||
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
|
||||
"""Convert CLDR data to QLocaleXML
|
||||
|
||||
The CLDR data can be downloaded from CLDR_, which has a sub-directory
|
||||
for each version; you need the ``core.zip`` file for your version of
|
||||
choice (typically the latest). This script has had updates to cope up
|
||||
to v38.1; for later versions, we may need adaptations. Unpack the
|
||||
downloaded ``core.zip`` and check it has a common/main/ sub-directory:
|
||||
pass the path of that root of the download to this script as its first
|
||||
command-line argument. Pass the name of the file in which to write
|
||||
output as the second argument; either omit it or use '-' to select the
|
||||
standard output. This file is the input needed by
|
||||
``./qlocalexml2cpp.py``
|
||||
|
||||
When you update the CLDR data, be sure to also update
|
||||
src/corelib/text/qt_attribution.json's entry for unicode-cldr. Check
|
||||
this script's output for unknown language, territory or script messages;
|
||||
if any can be resolved, use their entry in common/main/en.xml to
|
||||
append new entries to enumdata.py's lists and update documentation in
|
||||
src/corelib/text/qlocale.qdoc, adding the new entries in alphabetic
|
||||
order.
|
||||
|
||||
While updating the locale data, check also for updates to MS-Win's
|
||||
time zone names; see cldr2qtimezone.py for details.
|
||||
|
||||
All the scripts mentioned support --help to tell you how to use them.
|
||||
|
||||
.. _CLDR: https://unicode.org/Public/cldr/
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from cldr import CldrReader
|
||||
from qlocalexml import QLocaleXmlWriter
|
||||
|
||||
|
||||
def main(out, err):
|
||||
all_calendars = ['gregorian', 'persian', 'islamic'] # 'hebrew'
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Generate QLocaleXML from CLDR data.',
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('cldr_path', help='path to the root of the CLDR tree')
|
||||
parser.add_argument('out_file', help='output XML file name',
|
||||
nargs='?', metavar='out-file.xml')
|
||||
parser.add_argument('--calendars', help='select calendars to emit data for',
|
||||
nargs='+', metavar='CALENDAR',
|
||||
choices=all_calendars, default=all_calendars)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
root = Path(args.cldr_path)
|
||||
root_xml_path = 'common/main/root.xml'
|
||||
|
||||
if not root.joinpath(root_xml_path).exists():
|
||||
parser.error('First argument is the root of the CLDR tree: '
|
||||
f'found no {root_xml_path} under {root}')
|
||||
|
||||
xml = args.out_file
|
||||
if not xml or xml == '-':
|
||||
emit = out
|
||||
elif not xml.endswith('.xml'):
|
||||
parser.error(f'Please use a .xml extension on your output file name, not {xml}')
|
||||
else:
|
||||
try:
|
||||
emit = open(xml, 'w')
|
||||
except IOError as e:
|
||||
parser.error(f'Failed to open "{xml}" to write output to it')
|
||||
|
||||
# TODO - command line options to tune choice of grumble and whitter:
|
||||
reader = CldrReader(root, err.write, err.write)
|
||||
writer = QLocaleXmlWriter(emit.write)
|
||||
|
||||
writer.version(reader.root.cldrVersion)
|
||||
writer.enumData()
|
||||
writer.likelySubTags(reader.likelySubTags())
|
||||
writer.locales(reader.readLocales(args.calendars), args.calendars)
|
||||
|
||||
writer.close(err.write)
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main(sys.stdout, sys.stderr))
|
361
util/locale_database/cldr2qtimezone.py
Normal file
361
util/locale_database/cldr2qtimezone.py
Normal file
@ -0,0 +1,361 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (C) 2021 The Qt Company Ltd.
|
||||
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
|
||||
"""Parse CLDR data for QTimeZone use with MS-Windows
|
||||
|
||||
Script to parse the CLDR common/supplemental/windowsZones.xml file and
|
||||
prepare its data for use in QTimeZone. See ``./cldr2qlocalexml.py`` for
|
||||
where to get the CLDR data. Pass its root directory as first parameter
|
||||
to this script. You can optionally pass the qtbase root directory as
|
||||
second parameter; it defaults to the root of the checkout containing
|
||||
this script. This script updates qtbase's
|
||||
src/corelib/time/qtimezoneprivate_data_p.h with the new data.
|
||||
"""
|
||||
|
||||
import datetime
|
||||
from pathlib import Path
|
||||
import textwrap
|
||||
import argparse
|
||||
|
||||
from localetools import unicode2hex, wrap_list, Error, SourceFileEditor, qtbase_root
|
||||
from cldr import CldrAccess
|
||||
|
||||
### Data that may need updates in response to new entries in the CLDR file ###
|
||||
|
||||
# This script shall report the updates you need to make, if any arise.
|
||||
# However, you may need to research the relevant zone's standard offset.
|
||||
|
||||
# List of currently known Windows IDs.
|
||||
# If this script reports missing IDs, please add them here.
|
||||
# Look up the offset using (google and) timeanddate.com.
|
||||
# Not public so may safely be changed. Please keep in alphabetic order by ID.
|
||||
# ( Windows Id, Offset Seconds )
|
||||
windowsIdList = (
|
||||
('Afghanistan Standard Time', 16200),
|
||||
('Alaskan Standard Time', -32400),
|
||||
('Aleutian Standard Time', -36000),
|
||||
('Altai Standard Time', 25200),
|
||||
('Arab Standard Time', 10800),
|
||||
('Arabian Standard Time', 14400),
|
||||
('Arabic Standard Time', 10800),
|
||||
('Argentina Standard Time', -10800),
|
||||
('Astrakhan Standard Time', 14400),
|
||||
('Atlantic Standard Time', -14400),
|
||||
('AUS Central Standard Time', 34200),
|
||||
('Aus Central W. Standard Time', 31500),
|
||||
('AUS Eastern Standard Time', 36000),
|
||||
('Azerbaijan Standard Time', 14400),
|
||||
('Azores Standard Time', -3600),
|
||||
('Bahia Standard Time', -10800),
|
||||
('Bangladesh Standard Time', 21600),
|
||||
('Belarus Standard Time', 10800),
|
||||
('Bougainville Standard Time', 39600),
|
||||
('Canada Central Standard Time', -21600),
|
||||
('Cape Verde Standard Time', -3600),
|
||||
('Caucasus Standard Time', 14400),
|
||||
('Cen. Australia Standard Time', 34200),
|
||||
('Central America Standard Time', -21600),
|
||||
('Central Asia Standard Time', 21600),
|
||||
('Central Brazilian Standard Time', -14400),
|
||||
('Central Europe Standard Time', 3600),
|
||||
('Central European Standard Time', 3600),
|
||||
('Central Pacific Standard Time', 39600),
|
||||
('Central Standard Time (Mexico)', -21600),
|
||||
('Central Standard Time', -21600),
|
||||
('China Standard Time', 28800),
|
||||
('Chatham Islands Standard Time', 45900),
|
||||
('Cuba Standard Time', -18000),
|
||||
('Dateline Standard Time', -43200),
|
||||
('E. Africa Standard Time', 10800),
|
||||
('E. Australia Standard Time', 36000),
|
||||
('E. Europe Standard Time', 7200),
|
||||
('E. South America Standard Time', -10800),
|
||||
('Easter Island Standard Time', -21600),
|
||||
('Eastern Standard Time', -18000),
|
||||
('Eastern Standard Time (Mexico)', -18000),
|
||||
('Egypt Standard Time', 7200),
|
||||
('Ekaterinburg Standard Time', 18000),
|
||||
('Fiji Standard Time', 43200),
|
||||
('FLE Standard Time', 7200),
|
||||
('Georgian Standard Time', 14400),
|
||||
('GMT Standard Time', 0),
|
||||
('Greenland Standard Time', -10800),
|
||||
('Greenwich Standard Time', 0),
|
||||
('GTB Standard Time', 7200),
|
||||
('Haiti Standard Time', -18000),
|
||||
('Hawaiian Standard Time', -36000),
|
||||
('India Standard Time', 19800),
|
||||
('Iran Standard Time', 12600),
|
||||
('Israel Standard Time', 7200),
|
||||
('Jordan Standard Time', 7200),
|
||||
('Kaliningrad Standard Time', 7200),
|
||||
('Korea Standard Time', 32400),
|
||||
('Libya Standard Time', 7200),
|
||||
('Line Islands Standard Time', 50400),
|
||||
('Lord Howe Standard Time', 37800),
|
||||
('Magadan Standard Time', 36000),
|
||||
('Magallanes Standard Time', -10800), # permanent DST
|
||||
('Marquesas Standard Time', -34200),
|
||||
('Mauritius Standard Time', 14400),
|
||||
('Middle East Standard Time', 7200),
|
||||
('Montevideo Standard Time', -10800),
|
||||
('Morocco Standard Time', 0),
|
||||
('Mountain Standard Time (Mexico)', -25200),
|
||||
('Mountain Standard Time', -25200),
|
||||
('Myanmar Standard Time', 23400),
|
||||
('N. Central Asia Standard Time', 21600),
|
||||
('Namibia Standard Time', 3600),
|
||||
('Nepal Standard Time', 20700),
|
||||
('New Zealand Standard Time', 43200),
|
||||
('Newfoundland Standard Time', -12600),
|
||||
('Norfolk Standard Time', 39600),
|
||||
('North Asia East Standard Time', 28800),
|
||||
('North Asia Standard Time', 25200),
|
||||
('North Korea Standard Time', 30600),
|
||||
('Omsk Standard Time', 21600),
|
||||
('Pacific SA Standard Time', -10800),
|
||||
('Pacific Standard Time', -28800),
|
||||
('Pacific Standard Time (Mexico)', -28800),
|
||||
('Pakistan Standard Time', 18000),
|
||||
('Paraguay Standard Time', -14400),
|
||||
('Qyzylorda Standard Time', 18000), # a.k.a. Kyzylorda, in Kazakhstan
|
||||
('Romance Standard Time', 3600),
|
||||
('Russia Time Zone 3', 14400),
|
||||
('Russia Time Zone 10', 39600),
|
||||
('Russia Time Zone 11', 43200),
|
||||
('Russian Standard Time', 10800),
|
||||
('SA Eastern Standard Time', -10800),
|
||||
('SA Pacific Standard Time', -18000),
|
||||
('SA Western Standard Time', -14400),
|
||||
('Saint Pierre Standard Time', -10800), # New France
|
||||
('Sakhalin Standard Time', 39600),
|
||||
('Samoa Standard Time', 46800),
|
||||
('Sao Tome Standard Time', 0),
|
||||
('Saratov Standard Time', 14400),
|
||||
('SE Asia Standard Time', 25200),
|
||||
('Singapore Standard Time', 28800),
|
||||
('South Africa Standard Time', 7200),
|
||||
('South Sudan Standard Time', 7200),
|
||||
('Sri Lanka Standard Time', 19800),
|
||||
('Sudan Standard Time', 7200), # unless they mean South Sudan, +03:00
|
||||
('Syria Standard Time', 7200),
|
||||
('Taipei Standard Time', 28800),
|
||||
('Tasmania Standard Time', 36000),
|
||||
('Tocantins Standard Time', -10800),
|
||||
('Tokyo Standard Time', 32400),
|
||||
('Tomsk Standard Time', 25200),
|
||||
('Tonga Standard Time', 46800),
|
||||
('Transbaikal Standard Time', 32400), # Yakutsk
|
||||
('Turkey Standard Time', 7200),
|
||||
('Turks And Caicos Standard Time', -14400),
|
||||
('Ulaanbaatar Standard Time', 28800),
|
||||
('US Eastern Standard Time', -18000),
|
||||
('US Mountain Standard Time', -25200),
|
||||
('UTC-11', -39600),
|
||||
('UTC-09', -32400),
|
||||
('UTC-08', -28800),
|
||||
('UTC-02', -7200),
|
||||
('UTC', 0),
|
||||
('UTC+12', 43200),
|
||||
('UTC+13', 46800),
|
||||
('Venezuela Standard Time', -16200),
|
||||
('Vladivostok Standard Time', 36000),
|
||||
('Volgograd Standard Time', 14400),
|
||||
('W. Australia Standard Time', 28800),
|
||||
('W. Central Africa Standard Time', 3600),
|
||||
('W. Europe Standard Time', 3600),
|
||||
('W. Mongolia Standard Time', 25200), # Hovd
|
||||
('West Asia Standard Time', 18000),
|
||||
('West Bank Standard Time', 7200),
|
||||
('West Pacific Standard Time', 36000),
|
||||
('Yakutsk Standard Time', 32400),
|
||||
('Yukon Standard Time', -25200), # Non-DST Mountain Standard Time since 2020-11-01
|
||||
)
|
||||
|
||||
# List of standard UTC IDs to use. Not public so may be safely changed.
|
||||
# Do not remove IDs, as each entry is part of the API/behavior guarantee.
|
||||
# ( UTC Id, Offset Seconds )
|
||||
utcIdList = (
|
||||
('UTC', 0), # Goes first so is default
|
||||
('UTC-14:00', -50400),
|
||||
('UTC-13:00', -46800),
|
||||
('UTC-12:00', -43200),
|
||||
('UTC-11:00', -39600),
|
||||
('UTC-10:00', -36000),
|
||||
('UTC-09:00', -32400),
|
||||
('UTC-08:00', -28800),
|
||||
('UTC-07:00', -25200),
|
||||
('UTC-06:00', -21600),
|
||||
('UTC-05:00', -18000),
|
||||
('UTC-04:30', -16200),
|
||||
('UTC-04:00', -14400),
|
||||
('UTC-03:30', -12600),
|
||||
('UTC-03:00', -10800),
|
||||
('UTC-02:00', -7200),
|
||||
('UTC-01:00', -3600),
|
||||
('UTC-00:00', 0),
|
||||
('UTC+00:00', 0),
|
||||
('UTC+01:00', 3600),
|
||||
('UTC+02:00', 7200),
|
||||
('UTC+03:00', 10800),
|
||||
('UTC+03:30', 12600),
|
||||
('UTC+04:00', 14400),
|
||||
('UTC+04:30', 16200),
|
||||
('UTC+05:00', 18000),
|
||||
('UTC+05:30', 19800),
|
||||
('UTC+05:45', 20700),
|
||||
('UTC+06:00', 21600),
|
||||
('UTC+06:30', 23400),
|
||||
('UTC+07:00', 25200),
|
||||
('UTC+08:00', 28800),
|
||||
('UTC+08:30', 30600),
|
||||
('UTC+09:00', 32400),
|
||||
('UTC+09:30', 34200),
|
||||
('UTC+10:00', 36000),
|
||||
('UTC+11:00', 39600),
|
||||
('UTC+12:00', 43200),
|
||||
('UTC+13:00', 46800),
|
||||
('UTC+14:00', 50400),
|
||||
)
|
||||
|
||||
### End of data that may need updates in response to CLDR ###
|
||||
|
||||
class ByteArrayData:
|
||||
def __init__(self):
|
||||
self.data = []
|
||||
self.hash = {}
|
||||
|
||||
def append(self, s):
|
||||
s = s + '\0'
|
||||
if s in self.hash:
|
||||
return self.hash[s]
|
||||
|
||||
lst = unicode2hex(s)
|
||||
index = len(self.data)
|
||||
if index > 0xffff:
|
||||
raise Error(f'Index ({index}) outside the uint16 range !')
|
||||
self.hash[s] = index
|
||||
self.data += lst
|
||||
return index
|
||||
|
||||
def write(self, out, name):
|
||||
out(f'\nstatic constexpr char {name}[] = {{\n')
|
||||
out(wrap_list(self.data))
|
||||
out('\n};\n')
|
||||
|
||||
class ZoneIdWriter (SourceFileEditor):
|
||||
def write(self, version, defaults, windowsIds):
|
||||
self.__writeWarning(version)
|
||||
windows, iana = self.__writeTables(self.writer.write, defaults, windowsIds)
|
||||
windows.write(self.writer.write, 'windowsIdData')
|
||||
iana.write(self.writer.write, 'ianaIdData')
|
||||
|
||||
def __writeWarning(self, version):
|
||||
self.writer.write(f"""
|
||||
/*
|
||||
This part of the file was generated on {datetime.date.today()} from the
|
||||
Common Locale Data Repository v{version} file supplemental/windowsZones.xml
|
||||
|
||||
http://www.unicode.org/cldr/
|
||||
|
||||
Do not edit this code: run cldr2qtimezone.py on updated (or
|
||||
edited) CLDR data; see qtbase/util/locale_database/.
|
||||
*/
|
||||
|
||||
""")
|
||||
|
||||
@staticmethod
|
||||
def __writeTables(out, defaults, windowsIds):
|
||||
windowsIdData, ianaIdData = ByteArrayData(), ByteArrayData()
|
||||
|
||||
# Write Windows/IANA table
|
||||
out('// Windows ID Key, Territory Enum, IANA ID Index\n')
|
||||
out('static constexpr QZoneData zoneDataTable[] = {\n')
|
||||
for index, data in sorted(windowsIds.items()):
|
||||
out(' {{ {:6d},{:6d},{:6d} }}, // {} / {}\n'.format(
|
||||
data['windowsKey'], data['territoryId'],
|
||||
ianaIdData.append(data['ianaList']),
|
||||
data['windowsId'], data['territory']))
|
||||
out('};\n\n')
|
||||
|
||||
# Write Windows ID key table
|
||||
out('// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n')
|
||||
out('static constexpr QWindowsData windowsDataTable[] = {\n')
|
||||
for index, pair in enumerate(windowsIdList, 1):
|
||||
out(' {{ {:6d},{:6d},{:6d},{:6d} }}, // {}\n'.format(
|
||||
index,
|
||||
windowsIdData.append(pair[0]),
|
||||
ianaIdData.append(defaults[index]),
|
||||
pair[1], pair[0]))
|
||||
out('};\n\n')
|
||||
|
||||
# Write UTC ID key table
|
||||
out('// IANA ID Index, UTC Offset\n')
|
||||
out('static constexpr QUtcData utcDataTable[] = {\n')
|
||||
for pair in utcIdList:
|
||||
out(' {{ {:6d},{:6d} }}, // {}\n'.format(
|
||||
ianaIdData.append(pair[0]), pair[1], pair[0]))
|
||||
out('};\n')
|
||||
|
||||
return windowsIdData, ianaIdData
|
||||
|
||||
|
||||
def main(out, err):
|
||||
"""Parses CLDR's data and updates Qt's representation of it.
|
||||
|
||||
Takes sys.stdout, sys.stderr (or equivalents) as
|
||||
arguments. Expects two command-line options: the root of the
|
||||
unpacked CLDR data-file tree and the root of the qtbase module's
|
||||
checkout. Updates QTimeZone's private data about Windows time-zone
|
||||
IDs."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Update Qt's CLDR-derived timezone data.")
|
||||
parser.add_argument('cldr_path', help='path to the root of the CLDR tree')
|
||||
parser.add_argument('qtbase_path',
|
||||
help='path to the root of the qtbase source tree',
|
||||
nargs='?', default=qtbase_root)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
cldrPath = Path(args.cldr_path)
|
||||
qtPath = Path(args.qtbase_path)
|
||||
|
||||
if not qtPath.is_dir():
|
||||
parser.error(f"No such Qt directory: {qtPath}")
|
||||
|
||||
if not cldrPath.is_dir():
|
||||
parser.error(f"No such CLDR directory: {cldrPath}")
|
||||
|
||||
dataFilePath = qtPath.joinpath('src/corelib/time/qtimezoneprivate_data_p.h')
|
||||
|
||||
if not dataFilePath.is_file():
|
||||
parser.error(f'No such file: {dataFilePath}')
|
||||
|
||||
try:
|
||||
version, defaults, winIds = CldrAccess(cldrPath).readWindowsTimeZones(
|
||||
dict((name, ind) for ind, name in enumerate((x[0] for x in windowsIdList), 1)))
|
||||
except IOError as e:
|
||||
parser.error(
|
||||
f'Failed to open common/supplemental/windowsZones.xml: {e}')
|
||||
return 1
|
||||
except Error as e:
|
||||
err.write('\n'.join(textwrap.wrap(
|
||||
f'Failed to read windowsZones.xml: {e}',
|
||||
subsequent_indent=' ', width=80)) + '\n')
|
||||
return 1
|
||||
|
||||
out.write('Input file parsed, now writing data\n')
|
||||
|
||||
try:
|
||||
with ZoneIdWriter(dataFilePath, qtPath) as writer:
|
||||
writer.write(version, defaults, winIds)
|
||||
except Exception as e:
|
||||
err.write(f'\nError while updating timezone data: {e}\n')
|
||||
return 1
|
||||
|
||||
out.write(f'Data generation completed, please check the new file at {dataFilePath}\n')
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
sys.exit(main(sys.stdout, sys.stderr))
|
81
util/locale_database/dateconverter.py
Normal file
81
util/locale_database/dateconverter.py
Normal file
@ -0,0 +1,81 @@
|
||||
# Copyright (C) 2016 The Qt Company Ltd.
|
||||
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
|
||||
|
||||
import re
|
||||
|
||||
def _convert_pattern(pattern):
|
||||
# patterns from http://www.unicode.org/reports/tr35/#Date_Format_Patterns
|
||||
qt_regexps = {
|
||||
r"yyy{3,}" : "yyyy", # more that three digits hence convert to four-digit year
|
||||
r"L" : "M", # stand-alone month names. not supported.
|
||||
r"g{1,}": "", # modified julian day. not supported.
|
||||
r"S{1,}" : "", # fractional seconds. not supported.
|
||||
r"A{1,}" : "" # milliseconds in day. not supported.
|
||||
}
|
||||
qt_patterns = {
|
||||
"G" : "", "GG" : "", "GGG" : "", "GGGG" : "", "GGGGG" : "", # Era. not supported.
|
||||
"y" : "yyyy", # four-digit year without leading zeroes
|
||||
"Q" : "", "QQ" : "", "QQQ" : "", "QQQQ" : "", # quarter. not supported.
|
||||
"q" : "", "qq" : "", "qqq" : "", "qqqq" : "", # quarter. not supported.
|
||||
"MMMMM" : "MMM", # narrow month name.
|
||||
"LLLLL" : "MMM", # stand-alone narrow month name.
|
||||
"l" : "", # special symbol for chinese leap month. not supported.
|
||||
"w" : "", "W" : "", # week of year/month. not supported.
|
||||
"D" : "", "DD" : "", "DDD" : "", # day of year. not supported.
|
||||
"F" : "", # day of week in month. not supported.
|
||||
"E" : "ddd", "EE" : "ddd", "EEE" : "ddd", "EEEEE" : "ddd", "EEEE" : "dddd", # day of week
|
||||
"e" : "ddd", "ee" : "ddd", "eee" : "ddd", "eeeee" : "ddd", "eeee" : "dddd", # local day of week
|
||||
"c" : "ddd", "cc" : "ddd", "ccc" : "ddd", "ccccc" : "ddd", "cccc" : "dddd", # stand-alone local day of week
|
||||
"a" : "AP", # AM/PM
|
||||
"K" : "h", # Hour 0-11
|
||||
"k" : "H", # Hour 1-24
|
||||
"j" : "", # special reserved symbol.
|
||||
"z" : "t", "zz" : "t", "zzz" : "t", "zzzz" : "t", # timezone
|
||||
"Z" : "t", "ZZ" : "t", "ZZZ" : "t", "ZZZZ" : "t", # timezone
|
||||
"v" : "t", "vv" : "t", "vvv" : "t", "vvvv" : "t", # timezone
|
||||
"V" : "t", "VV" : "t", "VVV" : "t", "VVVV" : "t" # timezone
|
||||
}
|
||||
if pattern in qt_patterns:
|
||||
return qt_patterns[pattern]
|
||||
for r,v in qt_regexps.items():
|
||||
pattern = re.sub(r, v, pattern)
|
||||
return pattern
|
||||
|
||||
def convert_date(input):
|
||||
result = ""
|
||||
patterns = "GyYuQqMLlwWdDFgEecahHKkjmsSAzZvV"
|
||||
last = ""
|
||||
inquote = 0
|
||||
chars_to_strip = " -"
|
||||
for c in input:
|
||||
if c == "'":
|
||||
inquote = inquote + 1
|
||||
if inquote % 2 == 0:
|
||||
if c in patterns:
|
||||
if not last:
|
||||
last = c
|
||||
else:
|
||||
if c in last:
|
||||
last += c
|
||||
else:
|
||||
# pattern changed
|
||||
converted = _convert_pattern(last)
|
||||
result += converted
|
||||
if not converted:
|
||||
result = result.rstrip(chars_to_strip)
|
||||
last = c
|
||||
continue
|
||||
if last:
|
||||
# pattern ended
|
||||
converted = _convert_pattern(last)
|
||||
result += converted
|
||||
if not converted:
|
||||
result = result.rstrip(chars_to_strip)
|
||||
last = ""
|
||||
result += c
|
||||
if last:
|
||||
converted = _convert_pattern(last)
|
||||
result += converted
|
||||
if not converted:
|
||||
result = result.rstrip(chars_to_strip)
|
||||
return result.lstrip(chars_to_strip)
|
850
util/locale_database/enumdata.py
Normal file
850
util/locale_database/enumdata.py
Normal file
@ -0,0 +1,850 @@
|
||||
# Copyright (C) 2021 The Qt Company Ltd.
|
||||
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
|
||||
|
||||
# A run of cldr2qlocalexml.py will produce output reporting any
|
||||
# language, script and territory codes it sees, in data, for which it
|
||||
# can find a name (taken always from en.xml) that could potentially be
|
||||
# used. There is no point adding a mapping for such a code unless the
|
||||
# CLDR's common/main/ contains an XML file for at least one locale
|
||||
# that exerciss it.
|
||||
|
||||
# Each *_list reflects the current values of its enums in qlocale.h;
|
||||
# if new xml language files are available in CLDR, these languages and
|
||||
# territories need to be *appended* to this list (for compatibility
|
||||
# between versions). Include any spaces present in names (scripts
|
||||
# shall squish them out for the enum entries) in *_list, but use the
|
||||
# squished forms of names in the *_aliases mappings.
|
||||
|
||||
# For a new major version (and only then), we can change the
|
||||
# numbering, so re-sort each list into alphabetic order (e.g. using
|
||||
# sort -k2); but keep the Any and C entries first. That's why those
|
||||
# are offset with a blank line, below. After doing that, regenerate
|
||||
# locale data as usual; this will cause a binary-incompatible change.
|
||||
|
||||
# Note on "macrolanguage" comments: see "ISO 639 macrolanguage" on
|
||||
# Wikipedia. A "macrolanguage" is (loosely-speaking) a group of
|
||||
# languages so closely related to one another that they could also be
|
||||
# regarded as divergent dialects of the macrolanguage.
|
||||
|
||||
language_map = {
|
||||
0: ("AnyLanguage", " "),
|
||||
1: ("C", " "),
|
||||
|
||||
2: ("Abkhazian", "ab"),
|
||||
3: ("Afar", "aa"),
|
||||
4: ("Afrikaans", "af"),
|
||||
5: ("Aghem", "agq"),
|
||||
6: ("Akan", "ak"), # macrolanguage
|
||||
7: ("Akkadian", "akk"),
|
||||
8: ("Akoose", "bss"),
|
||||
9: ("Albanian", "sq"), # macrolanguage
|
||||
10: ("American Sign Language", "ase"),
|
||||
11: ("Amharic", "am"),
|
||||
12: ("Ancient Egyptian", "egy"),
|
||||
13: ("Ancient Greek", "grc"),
|
||||
14: ("Arabic", "ar"), # macrolanguage
|
||||
15: ("Aragonese", "an"),
|
||||
16: ("Aramaic", "arc"),
|
||||
17: ("Armenian", "hy"),
|
||||
18: ("Assamese", "as"),
|
||||
19: ("Asturian", "ast"),
|
||||
20: ("Asu", "asa"),
|
||||
21: ("Atsam", "cch"),
|
||||
22: ("Avaric", "av"),
|
||||
23: ("Avestan", "ae"),
|
||||
24: ("Aymara", "ay"), # macrolanguage
|
||||
25: ("Azerbaijani", "az"), # macrolanguage
|
||||
26: ("Bafia", "ksf"),
|
||||
27: ("Balinese", "ban"),
|
||||
28: ("Bambara", "bm"),
|
||||
29: ("Bamun", "bax"),
|
||||
30: ("Bangla", "bn"),
|
||||
31: ("Basaa", "bas"),
|
||||
32: ("Bashkir", "ba"),
|
||||
33: ("Basque", "eu"),
|
||||
34: ("Batak Toba", "bbc"),
|
||||
35: ("Belarusian", "be"),
|
||||
36: ("Bemba", "bem"),
|
||||
37: ("Bena", "bez"),
|
||||
38: ("Bhojpuri", "bho"),
|
||||
39: ("Bislama", "bi"),
|
||||
40: ("Blin", "byn"),
|
||||
41: ("Bodo", "brx"),
|
||||
42: ("Bosnian", "bs"),
|
||||
43: ("Breton", "br"),
|
||||
44: ("Buginese", "bug"),
|
||||
45: ("Bulgarian", "bg"),
|
||||
46: ("Burmese", "my"),
|
||||
47: ("Cantonese", "yue"),
|
||||
48: ("Catalan", "ca"),
|
||||
49: ("Cebuano", "ceb"),
|
||||
50: ("Central Atlas Tamazight", "tzm"),
|
||||
51: ("Central Kurdish", "ckb"),
|
||||
52: ("Chakma", "ccp"),
|
||||
53: ("Chamorro", "ch"),
|
||||
54: ("Chechen", "ce"),
|
||||
55: ("Cherokee", "chr"),
|
||||
56: ("Chickasaw", "cic"),
|
||||
57: ("Chiga", "cgg"),
|
||||
58: ("Chinese", "zh"), # macrolanguage
|
||||
59: ("Church", "cu"), # macrolanguage
|
||||
60: ("Chuvash", "cv"),
|
||||
61: ("Colognian", "ksh"),
|
||||
62: ("Coptic", "cop"),
|
||||
63: ("Cornish", "kw"),
|
||||
64: ("Corsican", "co"),
|
||||
65: ("Cree", "cr"), # macrolanguage
|
||||
66: ("Croatian", "hr"),
|
||||
67: ("Czech", "cs"),
|
||||
68: ("Danish", "da"),
|
||||
69: ("Divehi", "dv"),
|
||||
70: ("Dogri", "doi"), # macrolanguage
|
||||
71: ("Duala", "dua"),
|
||||
72: ("Dutch", "nl"),
|
||||
73: ("Dzongkha", "dz"),
|
||||
74: ("Embu", "ebu"),
|
||||
75: ("English", "en"),
|
||||
76: ("Erzya", "myv"),
|
||||
77: ("Esperanto", "eo"),
|
||||
78: ("Estonian", "et"), # macrolanguage
|
||||
79: ("Ewe", "ee" ),
|
||||
80: ("Ewondo", "ewo"),
|
||||
81: ("Faroese", "fo"),
|
||||
82: ("Fijian", "fj"),
|
||||
83: ("Filipino", "fil"),
|
||||
84: ("Finnish", "fi"),
|
||||
85: ("French", "fr"),
|
||||
86: ("Friulian", "fur"),
|
||||
87: ("Fulah", "ff"), # macrolanguage
|
||||
88: ("Gaelic", "gd"),
|
||||
89: ("Ga", "gaa"),
|
||||
90: ("Galician", "gl"),
|
||||
91: ("Ganda", "lg"),
|
||||
92: ("Geez", "gez"),
|
||||
93: ("Georgian", "ka"),
|
||||
94: ("German", "de"),
|
||||
95: ("Gothic", "got"),
|
||||
96: ("Greek", "el"),
|
||||
97: ("Guarani", "gn"), # macrolanguage
|
||||
98: ("Gujarati", "gu"),
|
||||
99: ("Gusii", "guz"),
|
||||
100: ("Haitian", "ht"),
|
||||
101: ("Hausa", "ha"),
|
||||
102: ("Hawaiian", "haw"),
|
||||
103: ("Hebrew", "he"),
|
||||
104: ("Herero", "hz"),
|
||||
105: ("Hindi", "hi"),
|
||||
106: ("Hiri Motu", "ho"),
|
||||
107: ("Hungarian", "hu"),
|
||||
108: ("Icelandic", "is"),
|
||||
109: ("Ido", "io"),
|
||||
110: ("Igbo", "ig" ),
|
||||
111: ("Inari Sami", "smn"),
|
||||
112: ("Indonesian", "id"),
|
||||
113: ("Ingush", "inh"),
|
||||
114: ("Interlingua", "ia"),
|
||||
115: ("Interlingue", "ie"),
|
||||
116: ("Inuktitut", "iu"), # macrolanguage
|
||||
117: ("Inupiaq", "ik"), # macrolanguage
|
||||
118: ("Irish", "ga"),
|
||||
119: ("Italian", "it"),
|
||||
120: ("Japanese", "ja"),
|
||||
121: ("Javanese", "jv"),
|
||||
122: ("Jju", "kaj"),
|
||||
123: ("Jola Fonyi", "dyo"),
|
||||
124: ("Kabuverdianu", "kea"),
|
||||
125: ("Kabyle", "kab"),
|
||||
126: ("Kako", "kkj"),
|
||||
127: ("Kalaallisut", "kl"),
|
||||
128: ("Kalenjin", "kln"),
|
||||
129: ("Kamba", "kam"),
|
||||
130: ("Kannada", "kn"),
|
||||
131: ("Kanuri", "kr"), # macrolanguage
|
||||
132: ("Kashmiri", "ks"),
|
||||
133: ("Kazakh", "kk"),
|
||||
134: ("Kenyang", "ken"),
|
||||
135: ("Khmer", "km"),
|
||||
136: ("Kiche", "quc"),
|
||||
137: ("Kikuyu", "ki"),
|
||||
138: ("Kinyarwanda", "rw"),
|
||||
139: ("Komi", "kv"), # macrolanguage
|
||||
140: ("Kongo", "kg"), # macrolanguage
|
||||
141: ("Konkani", "kok"),
|
||||
142: ("Korean", "ko"),
|
||||
143: ("Koro", "kfo"),
|
||||
144: ("Koyraboro Senni", "ses"),
|
||||
145: ("Koyra Chiini", "khq"),
|
||||
146: ("Kpelle", "kpe"),
|
||||
147: ("Kuanyama", "kj"),
|
||||
148: ("Kurdish", "ku"), # macrolanguage
|
||||
149: ("Kwasio", "nmg"),
|
||||
150: ("Kyrgyz", "ky"),
|
||||
151: ("Lakota", "lkt"),
|
||||
152: ("Langi", "lag"),
|
||||
153: ("Lao", "lo"),
|
||||
154: ("Latin", "la"),
|
||||
155: ("Latvian", "lv"), # macrolanguage
|
||||
156: ("Lezghian", "lez"),
|
||||
157: ("Limburgish", "li"),
|
||||
158: ("Lingala", "ln"),
|
||||
159: ("Literary Chinese", "lzh"),
|
||||
160: ("Lithuanian", "lt"),
|
||||
161: ("Lojban", "jbo"),
|
||||
162: ("Lower Sorbian", "dsb"),
|
||||
163: ("Low German", "nds"),
|
||||
164: ("Luba Katanga", "lu"),
|
||||
165: ("Lule Sami", "smj"),
|
||||
166: ("Luo", "luo"),
|
||||
167: ("Luxembourgish", "lb"),
|
||||
168: ("Luyia", "luy"),
|
||||
169: ("Macedonian", "mk"),
|
||||
170: ("Machame", "jmc"),
|
||||
171: ("Maithili", "mai"),
|
||||
172: ("Makhuwa Meetto", "mgh"),
|
||||
173: ("Makonde", "kde"),
|
||||
174: ("Malagasy", "mg"), # macrolanguage
|
||||
175: ("Malayalam", "ml"),
|
||||
176: ("Malay", "ms"), # macrolanguage
|
||||
177: ("Maltese", "mt"),
|
||||
178: ("Mandingo", "man"), # macrolanguage
|
||||
179: ("Manipuri", "mni"),
|
||||
180: ("Manx", "gv"),
|
||||
181: ("Maori", "mi"),
|
||||
182: ("Mapuche", "arn"),
|
||||
183: ("Marathi", "mr"),
|
||||
184: ("Marshallese", "mh"),
|
||||
185: ("Masai", "mas"),
|
||||
186: ("Mazanderani", "mzn"),
|
||||
187: ("Mende", "men"),
|
||||
188: ("Meru", "mer"),
|
||||
189: ("Meta", "mgo"),
|
||||
190: ("Mohawk", "moh"),
|
||||
191: ("Mongolian", "mn"), # macrolanguage
|
||||
192: ("Morisyen", "mfe"),
|
||||
193: ("Mundang", "mua"),
|
||||
194: ("Muscogee", "mus"),
|
||||
195: ("Nama", "naq"),
|
||||
196: ("Nauru", "na"),
|
||||
197: ("Navajo", "nv"),
|
||||
198: ("Ndonga", "ng"),
|
||||
199: ("Nepali", "ne"), # macrolanguage
|
||||
200: ("Newari", "new"),
|
||||
201: ("Ngiemboon", "nnh"),
|
||||
202: ("Ngomba", "jgo"),
|
||||
203: ("Nigerian Pidgin", "pcm"),
|
||||
204: ("Nko", "nqo"),
|
||||
205: ("Northern Luri", "lrc"),
|
||||
206: ("Northern Sami", "se" ),
|
||||
207: ("Northern Sotho", "nso"),
|
||||
208: ("North Ndebele", "nd"),
|
||||
209: ("Norwegian Bokmal", "nb"),
|
||||
210: ("Norwegian Nynorsk", "nn"),
|
||||
211: ("Nuer", "nus"),
|
||||
212: ("Nyanja", "ny" ),
|
||||
213: ("Nyankole", "nyn"),
|
||||
214: ("Occitan", "oc"),
|
||||
215: ("Odia", "or"), # macrolanguage
|
||||
216: ("Ojibwa", "oj"), # macrolanguage
|
||||
217: ("Old Irish", "sga"),
|
||||
218: ("Old Norse", "non"),
|
||||
219: ("Old Persian", "peo"),
|
||||
220: ("Oromo", "om"), # macrolanguage
|
||||
221: ("Osage", "osa"),
|
||||
222: ("Ossetic", "os"),
|
||||
223: ("Pahlavi", "pal"),
|
||||
224: ("Palauan", "pau"),
|
||||
225: ("Pali", "pi"), # macrolanguage
|
||||
226: ("Papiamento", "pap"),
|
||||
227: ("Pashto", "ps"), # macrolanguage
|
||||
228: ("Persian", "fa"), # macrolanguage
|
||||
229: ("Phoenician", "phn"),
|
||||
230: ("Polish", "pl"),
|
||||
231: ("Portuguese", "pt"),
|
||||
232: ("Prussian", "prg"),
|
||||
233: ("Punjabi", "pa"),
|
||||
234: ("Quechua", "qu"), # macrolanguage
|
||||
235: ("Romanian", "ro"),
|
||||
236: ("Romansh", "rm"),
|
||||
237: ("Rombo", "rof"),
|
||||
238: ("Rundi", "rn"),
|
||||
239: ("Russian", "ru"),
|
||||
240: ("Rwa", "rwk"),
|
||||
241: ("Saho", "ssy"),
|
||||
242: ("Sakha", "sah"),
|
||||
243: ("Samburu", "saq"),
|
||||
244: ("Samoan", "sm"),
|
||||
245: ("Sango", "sg"),
|
||||
246: ("Sangu", "sbp"),
|
||||
247: ("Sanskrit", "sa"),
|
||||
248: ("Santali", "sat"),
|
||||
249: ("Sardinian", "sc"), # macrolanguage
|
||||
250: ("Saurashtra", "saz"),
|
||||
251: ("Sena", "seh"),
|
||||
252: ("Serbian", "sr"),
|
||||
253: ("Shambala", "ksb"),
|
||||
254: ("Shona", "sn"),
|
||||
255: ("Sichuan Yi", "ii" ),
|
||||
256: ("Sicilian", "scn"),
|
||||
257: ("Sidamo", "sid"),
|
||||
258: ("Silesian", "szl"),
|
||||
259: ("Sindhi", "sd"),
|
||||
260: ("Sinhala", "si"),
|
||||
261: ("Skolt Sami", "sms"),
|
||||
262: ("Slovak", "sk"),
|
||||
263: ("Slovenian", "sl"),
|
||||
264: ("Soga", "xog"),
|
||||
265: ("Somali", "so"),
|
||||
266: ("Southern Kurdish", "sdh"),
|
||||
267: ("Southern Sami", "sma"),
|
||||
268: ("Southern Sotho", "st"),
|
||||
269: ("South Ndebele", "nr" ),
|
||||
270: ("Spanish", "es"),
|
||||
271: ("Standard Moroccan Tamazight", "zgh"),
|
||||
272: ("Sundanese", "su"),
|
||||
273: ("Swahili", "sw"), # macrolanguage
|
||||
274: ("Swati", "ss"),
|
||||
275: ("Swedish", "sv"),
|
||||
276: ("Swiss German", "gsw"),
|
||||
277: ("Syriac", "syr"),
|
||||
278: ("Tachelhit", "shi"),
|
||||
279: ("Tahitian", "ty"),
|
||||
280: ("Tai Dam", "blt"),
|
||||
281: ("Taita", "dav"),
|
||||
282: ("Tajik", "tg"),
|
||||
283: ("Tamil", "ta"),
|
||||
284: ("Taroko", "trv"),
|
||||
285: ("Tasawaq", "twq"),
|
||||
286: ("Tatar", "tt"),
|
||||
287: ("Telugu", "te"),
|
||||
288: ("Teso", "teo"),
|
||||
289: ("Thai", "th"),
|
||||
290: ("Tibetan", "bo"),
|
||||
291: ("Tigre", "tig"),
|
||||
292: ("Tigrinya", "ti"),
|
||||
293: ("Tokelau", "tkl"),
|
||||
294: ("Tok Pisin", "tpi"),
|
||||
295: ("Tongan", "to"),
|
||||
296: ("Tsonga", "ts"),
|
||||
297: ("Tswana", "tn"),
|
||||
298: ("Turkish", "tr"),
|
||||
299: ("Turkmen", "tk"),
|
||||
300: ("Tuvalu", "tvl"),
|
||||
301: ("Tyap", "kcg"),
|
||||
302: ("Ugaritic", "uga"),
|
||||
303: ("Ukrainian", "uk"),
|
||||
304: ("Upper Sorbian", "hsb"),
|
||||
305: ("Urdu", "ur"),
|
||||
306: ("Uyghur", "ug"),
|
||||
307: ("Uzbek", "uz"), # macrolanguage
|
||||
308: ("Vai", "vai"),
|
||||
309: ("Venda", "ve" ),
|
||||
310: ("Vietnamese", "vi"),
|
||||
311: ("Volapuk", "vo"),
|
||||
312: ("Vunjo", "vun"),
|
||||
313: ("Walloon", "wa"),
|
||||
314: ("Walser", "wae"),
|
||||
315: ("Warlpiri", "wbp"),
|
||||
316: ("Welsh", "cy"),
|
||||
317: ("Western Balochi", "bgn"),
|
||||
318: ("Western Frisian", "fy"),
|
||||
319: ("Wolaytta", "wal"),
|
||||
320: ("Wolof", "wo"),
|
||||
321: ("Xhosa", "xh"),
|
||||
322: ("Yangben", "yav"),
|
||||
323: ("Yiddish", "yi"), # macrolanguage
|
||||
324: ("Yoruba", "yo"),
|
||||
325: ("Zarma", "dje"),
|
||||
326: ("Zhuang", "za"), # macrolanguage
|
||||
327: ("Zulu", "zu"),
|
||||
# added in CLDR v40
|
||||
328: ("Kaingang", "kgp"),
|
||||
329: ("Nheengatu", "yrl"),
|
||||
# added in CLDR v42
|
||||
330: ("Haryanvi", "bgc"),
|
||||
331: ("Moksha", "mdf"),
|
||||
332: ("Northern Frisian", "frr"),
|
||||
333: ("Obolo", "ann"),
|
||||
334: ("Pijin", "pis"),
|
||||
335: ("Rajasthani", "raj"),
|
||||
336: ("Toki Pona", "tok"),
|
||||
}
|
||||
|
||||
language_aliases = {
|
||||
# Renamings prior to Qt 6.0 (CLDR v37):
|
||||
'Afan': 'Oromo',
|
||||
'Byelorussian': 'Belarusian',
|
||||
'Bhutani': 'Dzongkha',
|
||||
'Cambodian': 'Khmer',
|
||||
'Kurundi': 'Rundi',
|
||||
'RhaetoRomance': 'Romansh',
|
||||
'Chewa': 'Nyanja',
|
||||
'Frisian': 'WesternFrisian',
|
||||
'Uigur': 'Uyghur',
|
||||
# Renamings:
|
||||
'Uighur': 'Uyghur',
|
||||
'Kwanyama': 'Kuanyama',
|
||||
'Inupiak': 'Inupiaq',
|
||||
'Bengali': 'Bangla',
|
||||
'CentralMoroccoTamazight': 'CentralAtlasTamazight',
|
||||
'Greenlandic': 'Kalaallisut',
|
||||
'Walamo': 'Wolaytta',
|
||||
'Navaho': 'Navajo',
|
||||
'Oriya': 'Odia',
|
||||
'Kirghiz': 'Kyrgyz'
|
||||
}
|
||||
|
||||
territory_map = {
|
||||
0: ("AnyTerritory", "ZZ"),
|
||||
|
||||
1: ("Afghanistan", "AF"),
|
||||
2: ("Aland Islands", "AX"),
|
||||
3: ("Albania", "AL"),
|
||||
4: ("Algeria", "DZ"),
|
||||
5: ("American Samoa", "AS"),
|
||||
6: ("Andorra", "AD"),
|
||||
7: ("Angola", "AO"),
|
||||
8: ("Anguilla", "AI"),
|
||||
9: ("Antarctica", "AQ"),
|
||||
10: ("Antigua And Barbuda", "AG"),
|
||||
11: ("Argentina", "AR"),
|
||||
12: ("Armenia", "AM"),
|
||||
13: ("Aruba", "AW"),
|
||||
14: ("Ascension Island", "AC"),
|
||||
15: ("Australia", "AU"),
|
||||
16: ("Austria", "AT"),
|
||||
17: ("Azerbaijan", "AZ"),
|
||||
18: ("Bahamas", "BS"),
|
||||
19: ("Bahrain", "BH"),
|
||||
20: ("Bangladesh", "BD"),
|
||||
21: ("Barbados", "BB"),
|
||||
22: ("Belarus", "BY"),
|
||||
23: ("Belgium", "BE"),
|
||||
24: ("Belize", "BZ"),
|
||||
25: ("Benin", "BJ"),
|
||||
26: ("Bermuda", "BM"),
|
||||
27: ("Bhutan", "BT"),
|
||||
28: ("Bolivia", "BO"),
|
||||
29: ("Bosnia And Herzegovina", "BA"),
|
||||
30: ("Botswana", "BW"),
|
||||
31: ("Bouvet Island", "BV"),
|
||||
32: ("Brazil", "BR"),
|
||||
33: ("British Indian Ocean Territory", "IO"),
|
||||
34: ("British Virgin Islands", "VG"),
|
||||
35: ("Brunei", "BN"),
|
||||
36: ("Bulgaria", "BG"),
|
||||
37: ("Burkina Faso", "BF"),
|
||||
38: ("Burundi", "BI"),
|
||||
39: ("Cambodia", "KH"),
|
||||
40: ("Cameroon", "CM"),
|
||||
41: ("Canada", "CA"),
|
||||
42: ("Canary Islands", "IC"),
|
||||
43: ("Cape Verde", "CV"),
|
||||
44: ("Caribbean Netherlands", "BQ"),
|
||||
45: ("Cayman Islands", "KY"),
|
||||
46: ("Central African Republic", "CF"),
|
||||
47: ("Ceuta And Melilla", "EA"),
|
||||
48: ("Chad", "TD"),
|
||||
49: ("Chile", "CL"),
|
||||
50: ("China", "CN"),
|
||||
51: ("Christmas Island", "CX"),
|
||||
52: ("Clipperton Island", "CP"),
|
||||
53: ("Cocos Islands", "CC"),
|
||||
54: ("Colombia", "CO"),
|
||||
55: ("Comoros", "KM"),
|
||||
56: ("Congo Brazzaville", "CG"),
|
||||
57: ("Congo Kinshasa", "CD"),
|
||||
58: ("Cook Islands", "CK"),
|
||||
59: ("Costa Rica", "CR"),
|
||||
60: ("Croatia", "HR"),
|
||||
61: ("Cuba", "CU"),
|
||||
62: ("Curacao", "CW"),
|
||||
63: ("Cyprus", "CY"),
|
||||
64: ("Czechia", "CZ"),
|
||||
65: ("Denmark", "DK"),
|
||||
66: ("Diego Garcia", "DG"),
|
||||
67: ("Djibouti", "DJ"),
|
||||
68: ("Dominica", "DM"),
|
||||
69: ("Dominican Republic", "DO"),
|
||||
70: ("Ecuador", "EC"),
|
||||
71: ("Egypt", "EG"),
|
||||
72: ("El Salvador", "SV"),
|
||||
73: ("Equatorial Guinea", "GQ"),
|
||||
74: ("Eritrea", "ER"),
|
||||
75: ("Estonia", "EE"),
|
||||
76: ("Eswatini", "SZ"),
|
||||
77: ("Ethiopia", "ET"),
|
||||
78: ("Europe", "150"),
|
||||
79: ("European Union", "EU"),
|
||||
80: ("Falkland Islands", "FK"),
|
||||
81: ("Faroe Islands", "FO"),
|
||||
82: ("Fiji", "FJ"),
|
||||
83: ("Finland", "FI"),
|
||||
84: ("France", "FR"),
|
||||
85: ("French Guiana", "GF"),
|
||||
86: ("French Polynesia", "PF"),
|
||||
87: ("French Southern Territories", "TF"),
|
||||
88: ("Gabon", "GA"),
|
||||
89: ("Gambia", "GM"),
|
||||
90: ("Georgia", "GE"),
|
||||
91: ("Germany", "DE"),
|
||||
92: ("Ghana", "GH"),
|
||||
93: ("Gibraltar", "GI"),
|
||||
94: ("Greece", "GR"),
|
||||
95: ("Greenland", "GL"),
|
||||
96: ("Grenada", "GD"),
|
||||
97: ("Guadeloupe", "GP"),
|
||||
98: ("Guam", "GU"),
|
||||
99: ("Guatemala", "GT"),
|
||||
100: ("Guernsey", "GG"),
|
||||
101: ("Guinea Bissau", "GW"),
|
||||
102: ("Guinea", "GN"),
|
||||
103: ("Guyana", "GY"),
|
||||
104: ("Haiti", "HT"),
|
||||
105: ("Heard And McDonald Islands", "HM"),
|
||||
106: ("Honduras", "HN"),
|
||||
107: ("Hong Kong", "HK"),
|
||||
108: ("Hungary", "HU"),
|
||||
109: ("Iceland", "IS"),
|
||||
110: ("India", "IN"),
|
||||
111: ("Indonesia", "ID"),
|
||||
112: ("Iran", "IR"),
|
||||
113: ("Iraq", "IQ"),
|
||||
114: ("Ireland", "IE"),
|
||||
115: ("Isle Of Man", "IM"),
|
||||
116: ("Israel", "IL"),
|
||||
117: ("Italy", "IT"),
|
||||
# Officially Côte d’Ivoire, which we'd ned to map to CotedIvoire
|
||||
# or CoteDIvoire, either failing to make the d' separate from
|
||||
# Cote or messing with its case. So stick with Ivory Coast:
|
||||
118: ("Ivory Coast", "CI"),
|
||||
119: ("Jamaica", "JM"),
|
||||
120: ("Japan", "JP"),
|
||||
121: ("Jersey", "JE"),
|
||||
122: ("Jordan", "JO"),
|
||||
123: ("Kazakhstan", "KZ"),
|
||||
124: ("Kenya", "KE"),
|
||||
125: ("Kiribati", "KI"),
|
||||
126: ("Kosovo", "XK"),
|
||||
127: ("Kuwait", "KW"),
|
||||
128: ("Kyrgyzstan", "KG"),
|
||||
129: ("Laos", "LA"),
|
||||
130: ("Latin America", "419"),
|
||||
131: ("Latvia", "LV"),
|
||||
132: ("Lebanon", "LB"),
|
||||
133: ("Lesotho", "LS"),
|
||||
134: ("Liberia", "LR"),
|
||||
135: ("Libya", "LY"),
|
||||
136: ("Liechtenstein", "LI"),
|
||||
137: ("Lithuania", "LT"),
|
||||
138: ("Luxembourg", "LU"),
|
||||
139: ("Macao", "MO"),
|
||||
140: ("Macedonia", "MK"),
|
||||
141: ("Madagascar", "MG"),
|
||||
142: ("Malawi", "MW"),
|
||||
143: ("Malaysia", "MY"),
|
||||
144: ("Maldives", "MV"),
|
||||
145: ("Mali", "ML"),
|
||||
146: ("Malta", "MT"),
|
||||
147: ("Marshall Islands", "MH"),
|
||||
148: ("Martinique", "MQ"),
|
||||
149: ("Mauritania", "MR"),
|
||||
150: ("Mauritius", "MU"),
|
||||
151: ("Mayotte", "YT"),
|
||||
152: ("Mexico", "MX"),
|
||||
153: ("Micronesia", "FM"),
|
||||
154: ("Moldova", "MD"),
|
||||
155: ("Monaco", "MC"),
|
||||
156: ("Mongolia", "MN"),
|
||||
157: ("Montenegro", "ME"),
|
||||
158: ("Montserrat", "MS"),
|
||||
159: ("Morocco", "MA"),
|
||||
160: ("Mozambique", "MZ"),
|
||||
161: ("Myanmar", "MM"),
|
||||
162: ("Namibia", "NA"),
|
||||
163: ("Nauru", "NR"),
|
||||
164: ("Nepal", "NP"),
|
||||
165: ("Netherlands", "NL"),
|
||||
166: ("New Caledonia", "NC"),
|
||||
167: ("New Zealand", "NZ"),
|
||||
168: ("Nicaragua", "NI"),
|
||||
169: ("Nigeria", "NG"),
|
||||
170: ("Niger", "NE"),
|
||||
171: ("Niue", "NU"),
|
||||
172: ("Norfolk Island", "NF"),
|
||||
173: ("Northern Mariana Islands", "MP"),
|
||||
174: ("North Korea", "KP"),
|
||||
175: ("Norway", "NO"),
|
||||
176: ("Oman", "OM"),
|
||||
177: ("Outlying Oceania", "QO"),
|
||||
178: ("Pakistan", "PK"),
|
||||
179: ("Palau", "PW"),
|
||||
180: ("Palestinian Territories", "PS"),
|
||||
181: ("Panama", "PA"),
|
||||
182: ("Papua New Guinea", "PG"),
|
||||
183: ("Paraguay", "PY"),
|
||||
184: ("Peru", "PE"),
|
||||
185: ("Philippines", "PH"),
|
||||
186: ("Pitcairn", "PN"),
|
||||
187: ("Poland", "PL"),
|
||||
188: ("Portugal", "PT"),
|
||||
189: ("Puerto Rico", "PR"),
|
||||
190: ("Qatar", "QA"),
|
||||
191: ("Reunion", "RE"),
|
||||
192: ("Romania", "RO"),
|
||||
193: ("Russia", "RU"),
|
||||
194: ("Rwanda", "RW"),
|
||||
195: ("Saint Barthelemy", "BL"),
|
||||
196: ("Saint Helena", "SH"),
|
||||
197: ("Saint Kitts And Nevis", "KN"),
|
||||
198: ("Saint Lucia", "LC"),
|
||||
199: ("Saint Martin", "MF"),
|
||||
200: ("Saint Pierre And Miquelon", "PM"),
|
||||
201: ("Saint Vincent And Grenadines", "VC"),
|
||||
202: ("Samoa", "WS"),
|
||||
203: ("San Marino", "SM"),
|
||||
204: ("Sao Tome And Principe", "ST"),
|
||||
205: ("Saudi Arabia", "SA"),
|
||||
206: ("Senegal", "SN"),
|
||||
207: ("Serbia", "RS"),
|
||||
208: ("Seychelles", "SC"),
|
||||
209: ("Sierra Leone", "SL"),
|
||||
210: ("Singapore", "SG"),
|
||||
211: ("Sint Maarten", "SX"),
|
||||
212: ("Slovakia", "SK"),
|
||||
213: ("Slovenia", "SI"),
|
||||
214: ("Solomon Islands", "SB"),
|
||||
215: ("Somalia", "SO"),
|
||||
216: ("South Africa", "ZA"),
|
||||
217: ("South Georgia And South Sandwich Islands", "GS"),
|
||||
218: ("South Korea", "KR"),
|
||||
219: ("South Sudan", "SS"),
|
||||
220: ("Spain", "ES"),
|
||||
221: ("Sri Lanka", "LK"),
|
||||
222: ("Sudan", "SD"),
|
||||
223: ("Suriname", "SR"),
|
||||
224: ("Svalbard And Jan Mayen", "SJ"),
|
||||
225: ("Sweden", "SE"),
|
||||
226: ("Switzerland", "CH"),
|
||||
227: ("Syria", "SY"),
|
||||
228: ("Taiwan", "TW"),
|
||||
229: ("Tajikistan", "TJ"),
|
||||
230: ("Tanzania", "TZ"),
|
||||
231: ("Thailand", "TH"),
|
||||
232: ("Timor-Leste", "TL"),
|
||||
233: ("Togo", "TG"),
|
||||
234: ("Tokelau", "TK"),
|
||||
235: ("Tonga", "TO"),
|
||||
236: ("Trinidad And Tobago", "TT"),
|
||||
237: ("Tristan Da Cunha", "TA"),
|
||||
238: ("Tunisia", "TN"),
|
||||
239: ("Turkey", "TR"),
|
||||
240: ("Turkmenistan", "TM"),
|
||||
241: ("Turks And Caicos Islands", "TC"),
|
||||
242: ("Tuvalu", "TV"),
|
||||
243: ("Uganda", "UG"),
|
||||
244: ("Ukraine", "UA"),
|
||||
245: ("United Arab Emirates", "AE"),
|
||||
246: ("United Kingdom", "GB"),
|
||||
247: ("United States Outlying Islands", "UM"),
|
||||
248: ("United States", "US"),
|
||||
249: ("United States Virgin Islands", "VI"),
|
||||
250: ("Uruguay", "UY"),
|
||||
251: ("Uzbekistan", "UZ"),
|
||||
252: ("Vanuatu", "VU"),
|
||||
253: ("Vatican City", "VA"),
|
||||
254: ("Venezuela", "VE"),
|
||||
255: ("Vietnam", "VN"),
|
||||
256: ("Wallis And Futuna", "WF"),
|
||||
257: ("Western Sahara", "EH"),
|
||||
258: ("World", "001"),
|
||||
259: ("Yemen", "YE"),
|
||||
260: ("Zambia", "ZM"),
|
||||
261: ("Zimbabwe", "ZW"),
|
||||
}
|
||||
|
||||
territory_aliases = {
|
||||
# Renamings prior to Qt 6.0 (CLDR v37):
|
||||
'DemocraticRepublicOfCongo': 'CongoKinshasa',
|
||||
'PeoplesRepublicOfCongo': 'CongoBrazzaville',
|
||||
'DemocraticRepublicOfKorea': 'NorthKorea',
|
||||
'RepublicOfKorea': 'SouthKorea',
|
||||
'RussianFederation': 'Russia',
|
||||
'SyrianArabRepublic': 'Syria',
|
||||
'LatinAmericaAndTheCaribbean': 'LatinAmerica',
|
||||
# Renamings:
|
||||
'EastTimor': 'TimorLeste',
|
||||
'Bonaire': 'CaribbeanNetherlands',
|
||||
'Macau': 'Macao',
|
||||
'SouthGeorgiaAndTheSouthSandwichIslands': 'SouthGeorgiaAndSouthSandwichIslands',
|
||||
'WallisAndFutunaIslands': 'WallisAndFutuna',
|
||||
'SaintVincentAndTheGrenadines': 'SaintVincentAndGrenadines',
|
||||
'BosniaAndHerzegowina': 'BosniaAndHerzegovina',
|
||||
'SvalbardAndJanMayenIslands': 'SvalbardAndJanMayen',
|
||||
'VaticanCityState': 'VaticanCity',
|
||||
'Swaziland': 'Eswatini',
|
||||
'UnitedStatesMinorOutlyingIslands': 'UnitedStatesOutlyingIslands',
|
||||
'CuraSao': 'Curacao',
|
||||
'CzechRepublic': 'Czechia',
|
||||
|
||||
# Backwards compatibility with old Country enum, prior to Qt 6.2:
|
||||
'AnyCountry': 'AnyTerritory',
|
||||
'NauruCountry': 'NauruTerritory',
|
||||
'TokelauCountry': 'TokelauTerritory',
|
||||
'TuvaluCountry': 'TuvaluTerritory',
|
||||
}
|
||||
|
||||
script_map = {
|
||||
0: ("AnyScript", "Zzzz"),
|
||||
|
||||
1: ("Adlam", "Adlm"),
|
||||
2: ("Ahom", "Ahom"),
|
||||
3: ("Anatolian Hieroglyphs", "Hluw"),
|
||||
4: ("Arabic", "Arab"),
|
||||
5: ("Armenian", "Armn"),
|
||||
6: ("Avestan", "Avst"),
|
||||
7: ("Balinese", "Bali"),
|
||||
8: ("Bamum", "Bamu"),
|
||||
9: ("Bangla", "Beng"),
|
||||
10: ("Bassa Vah", "Bass"),
|
||||
11: ("Batak", "Batk"),
|
||||
12: ("Bhaiksuki", "Bhks"),
|
||||
13: ("Bopomofo", "Bopo"),
|
||||
14: ("Brahmi", "Brah"),
|
||||
15: ("Braille", "Brai"),
|
||||
16: ("Buginese", "Bugi"),
|
||||
17: ("Buhid", "Buhd"),
|
||||
18: ("Canadian Aboriginal", "Cans"),
|
||||
19: ("Carian", "Cari"),
|
||||
20: ("Caucasian Albanian", "Aghb"),
|
||||
21: ("Chakma", "Cakm"),
|
||||
22: ("Cham", "Cham"),
|
||||
23: ("Cherokee", "Cher"),
|
||||
24: ("Coptic", "Copt"),
|
||||
25: ("Cuneiform", "Xsux"),
|
||||
26: ("Cypriot", "Cprt"),
|
||||
27: ("Cyrillic", "Cyrl"),
|
||||
28: ("Deseret", "Dsrt"),
|
||||
29: ("Devanagari", "Deva"),
|
||||
30: ("Duployan", "Dupl"),
|
||||
31: ("Egyptian Hieroglyphs", "Egyp"),
|
||||
32: ("Elbasan", "Elba"),
|
||||
33: ("Ethiopic", "Ethi"),
|
||||
34: ("Fraser", "Lisu"),
|
||||
35: ("Georgian", "Geor"),
|
||||
36: ("Glagolitic", "Glag"),
|
||||
37: ("Gothic", "Goth"),
|
||||
38: ("Grantha", "Gran"),
|
||||
39: ("Greek", "Grek"),
|
||||
40: ("Gujarati", "Gujr"),
|
||||
41: ("Gurmukhi", "Guru"),
|
||||
42: ("Hangul", "Hang"),
|
||||
43: ("Han", "Hani"),
|
||||
44: ("Hanunoo", "Hano"),
|
||||
45: ("Han with Bopomofo", "Hanb"),
|
||||
46: ("Hatran", "Hatr"),
|
||||
47: ("Hebrew", "Hebr"),
|
||||
48: ("Hiragana", "Hira"),
|
||||
49: ("Imperial Aramaic", "Armi"),
|
||||
50: ("Inscriptional Pahlavi", "Phli"),
|
||||
51: ("Inscriptional Parthian", "Prti"),
|
||||
52: ("Jamo", "Jamo"),
|
||||
53: ("Japanese", "Jpan"),
|
||||
54: ("Javanese", "Java"),
|
||||
55: ("Kaithi", "Kthi"),
|
||||
56: ("Kannada", "Knda"),
|
||||
57: ("Katakana", "Kana"),
|
||||
58: ("Kayah Li", "Kali"),
|
||||
59: ("Kharoshthi", "Khar"),
|
||||
60: ("Khmer", "Khmr"),
|
||||
61: ("Khojki", "Khoj"),
|
||||
62: ("Khudawadi", "Sind"),
|
||||
63: ("Korean", "Kore"),
|
||||
64: ("Lanna", "Lana"),
|
||||
65: ("Lao", "Laoo"),
|
||||
66: ("Latin", "Latn"),
|
||||
67: ("Lepcha", "Lepc"),
|
||||
68: ("Limbu", "Limb"),
|
||||
69: ("Linear A", "Lina"),
|
||||
70: ("Linear B", "Linb"),
|
||||
71: ("Lycian", "Lyci"),
|
||||
72: ("Lydian", "Lydi"),
|
||||
73: ("Mahajani", "Mahj"),
|
||||
74: ("Malayalam", "Mlym"),
|
||||
75: ("Mandaean", "Mand"),
|
||||
76: ("Manichaean", "Mani"),
|
||||
77: ("Marchen", "Marc"),
|
||||
78: ("Meitei Mayek", "Mtei"),
|
||||
79: ("Mende", "Mend"),
|
||||
80: ("Meroitic Cursive", "Merc"),
|
||||
81: ("Meroitic", "Mero"),
|
||||
82: ("Modi", "Modi"),
|
||||
83: ("Mongolian", "Mong"),
|
||||
84: ("Mro", "Mroo"),
|
||||
85: ("Multani", "Mult"),
|
||||
86: ("Myanmar", "Mymr"),
|
||||
87: ("Nabataean", "Nbat"),
|
||||
88: ("Newa", "Newa"),
|
||||
89: ("New Tai Lue", "Talu"),
|
||||
90: ("Nko", "Nkoo"),
|
||||
91: ("Odia", "Orya"),
|
||||
92: ("Ogham", "Ogam"),
|
||||
93: ("Ol Chiki", "Olck"),
|
||||
94: ("Old Hungarian", "Hung"),
|
||||
95: ("Old Italic", "Ital"),
|
||||
96: ("Old North Arabian", "Narb"),
|
||||
97: ("Old Permic", "Perm"),
|
||||
98: ("Old Persian", "Xpeo"),
|
||||
99: ("Old South Arabian", "Sarb"),
|
||||
100: ("Orkhon", "Orkh"),
|
||||
101: ("Osage", "Osge"),
|
||||
102: ("Osmanya", "Osma"),
|
||||
103: ("Pahawh Hmong", "Hmng"),
|
||||
104: ("Palmyrene", "Palm"),
|
||||
105: ("Pau Cin Hau", "Pauc"),
|
||||
106: ("Phags Pa", "Phag"),
|
||||
107: ("Phoenician", "Phnx"),
|
||||
108: ("Pollard Phonetic", "Plrd"),
|
||||
109: ("Psalter Pahlavi", "Phlp"),
|
||||
110: ("Rejang", "Rjng"),
|
||||
111: ("Runic", "Runr"),
|
||||
112: ("Samaritan", "Samr"),
|
||||
113: ("Saurashtra", "Saur"),
|
||||
114: ("Sharada", "Shrd"),
|
||||
115: ("Shavian", "Shaw"),
|
||||
116: ("Siddham", "Sidd"),
|
||||
117: ("Sign Writing", "Sgnw"),
|
||||
118: ("Simplified Han", "Hans"),
|
||||
119: ("Sinhala", "Sinh"),
|
||||
120: ("Sora Sompeng", "Sora"),
|
||||
121: ("Sundanese", "Sund"),
|
||||
122: ("Syloti Nagri", "Sylo"),
|
||||
123: ("Syriac", "Syrc"),
|
||||
124: ("Tagalog", "Tglg"),
|
||||
125: ("Tagbanwa", "Tagb"),
|
||||
126: ("Tai Le", "Tale"),
|
||||
127: ("Tai Viet", "Tavt"),
|
||||
128: ("Takri", "Takr"),
|
||||
129: ("Tamil", "Taml"),
|
||||
130: ("Tangut", "Tang"),
|
||||
131: ("Telugu", "Telu"),
|
||||
132: ("Thaana", "Thaa"),
|
||||
133: ("Thai", "Thai"),
|
||||
134: ("Tibetan", "Tibt"),
|
||||
135: ("Tifinagh", "Tfng"),
|
||||
136: ("Tirhuta", "Tirh"),
|
||||
137: ("Traditional Han", "Hant"),
|
||||
138: ("Ugaritic", "Ugar"),
|
||||
139: ("Vai", "Vaii"),
|
||||
140: ("Varang Kshiti", "Wara"),
|
||||
141: ("Yi", "Yiii"),
|
||||
}
|
||||
|
||||
script_aliases = {
|
||||
# Renamings prior to Qt 6.0 (CLDR v37):
|
||||
'SimplifiedChineseScript': 'SimplifiedHanScript',
|
||||
'TraditionalChineseScript': 'TraditionalHanScript',
|
||||
# Renamings:
|
||||
'OriyaScript': 'OdiaScript',
|
||||
'MendeKikakuiScript': 'MendeScript',
|
||||
'BengaliScript': 'BanglaScript',
|
||||
}
|
23
util/locale_database/formattags.txt
Normal file
23
util/locale_database/formattags.txt
Normal file
@ -0,0 +1,23 @@
|
||||
d
|
||||
dd
|
||||
ddd
|
||||
dddd
|
||||
M
|
||||
MM
|
||||
MMM
|
||||
MMMM
|
||||
yy
|
||||
yyyy
|
||||
h the hour without a leading zero (0 to 23 or 1 to 12 if AM/PM display)
|
||||
hh the hour with a leading zero (00 to 23 or 01 to 12 if AM/PM display)
|
||||
H the hour without a leading zero (0 to 23, even with AM/PM display)
|
||||
HH the hour with a leading zero (00 to 23, even with AM/PM display)
|
||||
m
|
||||
mm
|
||||
s
|
||||
ss
|
||||
z the milliseconds without leading zeroes (0 to 999)
|
||||
zzz the milliseconds with leading zeroes (000 to 999)
|
||||
AP or A interpret as an AM/PM time. AP must be either "AM" or "PM"
|
||||
ap or a Interpret as an AM/PM time. ap must be either "am" or "pm"
|
||||
t time zone
|
80
util/locale_database/iso639_3.py
Normal file
80
util/locale_database/iso639_3.py
Normal file
@ -0,0 +1,80 @@
|
||||
# Copyright (C) 2021 The Qt Company Ltd.
|
||||
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class LanguageCodeEntry:
|
||||
part3Code: str
|
||||
part2BCode: Optional[str]
|
||||
part2TCode: Optional[str]
|
||||
part1Code: Optional[str]
|
||||
|
||||
def id(self) -> str:
|
||||
if self.part1Code:
|
||||
return self.part1Code
|
||||
if self.part2BCode:
|
||||
return self.part2BCode
|
||||
return self.part3Code
|
||||
|
||||
def __repr__(self) -> str:
|
||||
parts = [f'{self.__class__.__name__}({self.id()!r}, part3Code={self.part3Code!r}']
|
||||
if self.part2BCode is not None and self.part2BCode != self.part3Code:
|
||||
parts.append(f', part2BCode={self.part2BCode!r}')
|
||||
if self.part2TCode != self.part2BCode:
|
||||
parts.append(f', part2TCode={self.part2TCode!r}')
|
||||
if self.part1Code is not None:
|
||||
parts.append(f', part1Code={self.part1Code!r}')
|
||||
parts.append(')')
|
||||
return ''.join(parts)
|
||||
|
||||
|
||||
class LanguageCodeData:
|
||||
"""
|
||||
Representation of ISO639-2 language code data.
|
||||
"""
|
||||
def __init__(self, fileName: str):
|
||||
"""
|
||||
Construct the object populating the data from the given file.
|
||||
"""
|
||||
self.__codeMap: Dict[str, LanguageCodeEntry] = {}
|
||||
|
||||
with open(fileName, 'r', encoding='utf-8') as stream:
|
||||
stream.readline() # skip the header
|
||||
for line in stream.readlines():
|
||||
part3Code, part2BCode, part2TCode, part1Code, _ = line.split('\t', 4)
|
||||
|
||||
# sanity checks
|
||||
assert all(p.isascii() for p in (part3Code, part2BCode, part2TCode, part1Code)), \
|
||||
f'Non-ascii characters in code names: {part3Code!r} {part2BCode!r} '\
|
||||
f'{part2TCode!r} {part1Code!r}'
|
||||
|
||||
assert len(part3Code) == 3, f'Invalid Part 3 code length for {part3Code!r}'
|
||||
assert not part1Code or len(part1Code) == 2, \
|
||||
f'Invalid Part 1 code length for {part3Code!r}: {part1Code!r}'
|
||||
assert not part2BCode or len(part2BCode) == 3, \
|
||||
f'Invalid Part 2B code length for {part3Code!r}: {part2BCode!r}'
|
||||
assert not part2TCode or len(part2TCode) == 3, \
|
||||
f'Invalid Part 2T code length for {part3Code!r}: {part2TCode!r}'
|
||||
|
||||
assert (part2BCode == '') == (part2TCode == ''), \
|
||||
f'Only one Part 2 code is specified for {part3Code!r}: ' \
|
||||
f'{part2BCode!r} vs {part2TCode!r}'
|
||||
assert not part2TCode or part2TCode == part3Code, \
|
||||
f'Part 3 code {part3Code!r} does not match Part 2T code {part2TCode!r}'
|
||||
|
||||
entry = LanguageCodeEntry(part3Code, part2BCode or None,
|
||||
part2TCode or None, part1Code or None)
|
||||
|
||||
self.__codeMap[entry.id()] = entry
|
||||
|
||||
def query(self, code: str) -> Optional[LanguageCodeEntry]:
|
||||
"""
|
||||
Lookup the entry with the given code and return it.
|
||||
|
||||
The entries can be looked up by using either the Alpha2 code or the bibliographical
|
||||
Alpha3 code.
|
||||
"""
|
||||
return self.__codeMap.get(code)
|
599
util/locale_database/ldml.py
Normal file
599
util/locale_database/ldml.py
Normal file
@ -0,0 +1,599 @@
|
||||
# Copyright (C) 2020 The Qt Company Ltd.
|
||||
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
|
||||
"""Parsing the Locale Data Markup Language
|
||||
|
||||
It's an XML format, so the raw parsing of XML is, of course, delegated
|
||||
to xml.dom.minidom; but it has its own specific schemata and some
|
||||
funky rules for combining data from various files (inheritance between
|
||||
locales). The use of it we're interested in is extraction of CLDR's
|
||||
data, so some of the material here is specific to CLDR; see cldr.py
|
||||
for how it is mainly used.
|
||||
|
||||
Provides various classes to wrap xml.dom's objects, specifically those
|
||||
returned by minidom.parse() and their child-nodes:
|
||||
Node -- wraps any node in the DOM tree
|
||||
XmlScanner -- wraps the root element of a stand-alone XML file
|
||||
Supplement -- specializes XmlScanner for supplemental data files
|
||||
LocaleScanner -- wraps a locale's inheritance-chain of file roots
|
||||
|
||||
See individual classes for further detail.
|
||||
"""
|
||||
from localetools import Error
|
||||
from dateconverter import convert_date
|
||||
|
||||
class Node (object):
|
||||
"""Wrapper for an arbitrary DOM node.
|
||||
|
||||
Provides various ways to select chldren of a node. Selected child
|
||||
nodes are returned wrapped as Node objects. A Node exposes the
|
||||
raw DOM node it wraps via its .dom attribute."""
|
||||
|
||||
def __init__(self, elt, dullAttrs = None, draft = 0):
|
||||
"""Wraps a DOM node for ease of access.
|
||||
|
||||
First argument, elt, is the DOM node to wrap.
|
||||
|
||||
Optional second argument, dullAttrs, should either be None or
|
||||
map each LDML tag name to a list of the names of
|
||||
non-distinguishing attributes for nodes with the given tag
|
||||
name. If None is given, no distinguishing attribute checks are
|
||||
performed.
|
||||
|
||||
(Optional third argument, draft, should only be supplied by
|
||||
this class's creation of child nodes; it is the maximum draft
|
||||
score of any ancestor of the new node.)"""
|
||||
self.dom, self.__dull = elt, dullAttrs
|
||||
try:
|
||||
attr = elt.attributes['draft'].nodeValue
|
||||
except KeyError:
|
||||
self.draft = draft
|
||||
else:
|
||||
self.draft = max(draft, self.draftScore(attr))
|
||||
|
||||
def findAllChildren(self, tag, wanted = None, allDull = False):
|
||||
"""All children that do have the given tag and attributes.
|
||||
|
||||
First argument is the tag: children with any other tag are
|
||||
ignored.
|
||||
|
||||
Optional second argument, wanted, should either be None or map
|
||||
attribute names to the values they must have. Only child nodes
|
||||
with thes attributes set to the given values are yielded.
|
||||
|
||||
By default, nodes that have distinguishing attributes, other
|
||||
than those specified in wanted, are ignored. Pass the allDull
|
||||
parameter a true value to suppress this check."""
|
||||
|
||||
if self.__dull is None:
|
||||
allDull = True
|
||||
dull = () if allDull else self.__dull[tag]
|
||||
|
||||
for child in self.dom.childNodes:
|
||||
if child.nodeType != child.ELEMENT_NODE:
|
||||
continue
|
||||
if child.nodeName != tag:
|
||||
continue
|
||||
|
||||
if wanted:
|
||||
try:
|
||||
if any(child.attributes[k].nodeValue != v
|
||||
for k, v in wanted.items()):
|
||||
continue
|
||||
except KeyError: # Some wanted attribute is missing
|
||||
continue
|
||||
|
||||
if not (allDull or all(k in dull or k in wanted
|
||||
for k in child.attributes.keys())):
|
||||
continue
|
||||
|
||||
elif not (allDull or all(k in dull
|
||||
for k in child.attributes.keys())):
|
||||
continue
|
||||
|
||||
yield Node(child, self.__dull, self.draft)
|
||||
|
||||
def findUniqueChild(self, tag):
|
||||
"""Returns the single child with the given nodeName.
|
||||
|
||||
Raises Error if there is no such child or there is more than
|
||||
one."""
|
||||
seq = self.findAllChildren(tag)
|
||||
try:
|
||||
node = next(seq)
|
||||
except StopIteration:
|
||||
raise Error('No child found where one was expected', tag)
|
||||
for it in seq:
|
||||
raise Error('Many children found where only one was expected', tag)
|
||||
return node
|
||||
|
||||
@classmethod
|
||||
def draftScore(cls, level):
|
||||
"""Maps draft level names to numeric scores.
|
||||
|
||||
Single parameter, level, is the least sure value of the draft
|
||||
attribute on a node that you're willing to accept; returns a
|
||||
numeric value (lower is less drafty).
|
||||
|
||||
Tempting as it is to insist on low draft scores, there are
|
||||
many locales in which pretty much every leaf is
|
||||
unconfirmed. It may make sense to actually check each
|
||||
XmlScanner object, or each node in each LocaleScanner's nodes
|
||||
list, to see what its distribution of draft level looks like,
|
||||
so as to set the acceptable draft score for its elements
|
||||
accordingly. However, for the moment, we mostly just accept
|
||||
all elements, regardless of draft values (the one exception is
|
||||
am/pm indicators)."""
|
||||
return cls.__draftScores.get(level, 5) if level else 0
|
||||
|
||||
# Implementation details:
|
||||
__draftScores = dict(true = 4, unconfirmed = 3, provisional = 2,
|
||||
contributed = 1, approved = 0, false = 0)
|
||||
|
||||
def _parseXPath(selector):
|
||||
# Split "tag[attr=val][...]" into tag-name and attribute mapping
|
||||
attrs = selector.split('[')
|
||||
name = attrs.pop(0)
|
||||
if attrs:
|
||||
attrs = [x.strip() for x in attrs]
|
||||
assert all(x.endswith(']') for x in attrs)
|
||||
attrs = [x[:-1].split('=') for x in attrs]
|
||||
assert all(len(x) in (1, 2) for x in attrs)
|
||||
attrs = (('type', x[0]) if len(x) == 1 else x for x in attrs)
|
||||
return name, dict(attrs)
|
||||
|
||||
def _iterateEach(iters):
|
||||
# Flatten a two-layer iterator.
|
||||
for it in iters:
|
||||
for item in it:
|
||||
yield item
|
||||
|
||||
class XmlScanner (object):
|
||||
"""Wrap an XML file to enable XPath access to its nodes.
|
||||
"""
|
||||
def __init__(self, node):
|
||||
self.root = node
|
||||
|
||||
def findNodes(self, xpath):
|
||||
"""Return all nodes under self.root matching this xpath.
|
||||
|
||||
Ignores any excess attributes."""
|
||||
elts = (self.root,)
|
||||
for selector in xpath.split('/'):
|
||||
tag, attrs = _parseXPath(selector)
|
||||
elts = tuple(_iterateEach(e.findAllChildren(tag, attrs) for e in elts))
|
||||
if not elts:
|
||||
break
|
||||
return elts
|
||||
|
||||
class Supplement (XmlScanner):
|
||||
def find(self, xpath):
|
||||
elts = self.findNodes(xpath)
|
||||
for elt in _iterateEach(e.dom.childNodes if e.dom.childNodes else (e.dom,)
|
||||
for e in elts):
|
||||
if elt.attributes:
|
||||
yield (elt.nodeName,
|
||||
dict((k, v if isinstance(v, str) else v.nodeValue)
|
||||
for k, v in elt.attributes.items()))
|
||||
|
||||
class LocaleScanner (object):
|
||||
def __init__(self, name, nodes, root):
|
||||
self.name, self.nodes, self.base = name, nodes, root
|
||||
|
||||
def find(self, xpath, default = None, draft = None):
|
||||
"""XPath search for the content of an element.
|
||||
|
||||
Required argument, xpath, is the XPath to search for. Optional
|
||||
second argument is a default value to use, if no such node is
|
||||
found. Optional third argument is a draft score (see
|
||||
Node.draftScore() for details); if given, leaf elements with
|
||||
higher draft scores are ignored."""
|
||||
try:
|
||||
for elt in self.__find(xpath):
|
||||
try:
|
||||
if draft is None or elt.draft <= draft:
|
||||
return elt.dom.firstChild.nodeValue
|
||||
except (AttributeError, KeyError):
|
||||
pass
|
||||
except Error as e:
|
||||
if default is None:
|
||||
raise
|
||||
return default
|
||||
|
||||
def tagCodes(self):
|
||||
"""Yields four tag codes
|
||||
|
||||
The tag codes are language, script, territory and variant; an
|
||||
empty value for any of them indicates that no value was
|
||||
provided. The values are obtained from the primary file's
|
||||
top-level <identity> element. An Error is raised if any
|
||||
top-level <alias> element of this file has a non-empty source
|
||||
attribute; that attribute value is mentioned in the error's
|
||||
message."""
|
||||
root = self.nodes[0]
|
||||
for alias in root.findAllChildren('alias', allDull=True):
|
||||
try:
|
||||
source = alias.dom.attributes['source'].nodeValue
|
||||
except (KeyError, AttributeError):
|
||||
pass
|
||||
else:
|
||||
raise Error(f'Alias to {source}')
|
||||
|
||||
ids = root.findUniqueChild('identity')
|
||||
for code in ('language', 'script', 'territory', 'variant'):
|
||||
for node in ids.findAllChildren(code, allDull=True):
|
||||
try:
|
||||
yield node.dom.attributes['type'].nodeValue
|
||||
except (KeyError, AttributeError):
|
||||
pass
|
||||
else:
|
||||
break # only want one value for each code
|
||||
else: # No value for this code, use empty
|
||||
yield ''
|
||||
|
||||
def currencyData(self, isoCode):
|
||||
"""Fetches currency data for this locale.
|
||||
|
||||
Single argument, isoCode, is the ISO currency code for the
|
||||
currency in use in the territory. See also numericData, which
|
||||
includes some currency formats.
|
||||
"""
|
||||
if isoCode:
|
||||
stem = f'numbers/currencies/currency[{isoCode}]/'
|
||||
symbol = self.find(f'{stem}symbol', '')
|
||||
name = self.__currencyDisplayName(stem)
|
||||
else:
|
||||
symbol = name = ''
|
||||
yield 'currencySymbol', symbol
|
||||
yield 'currencyDisplayName', name
|
||||
|
||||
def numericData(self, lookup, complain = lambda text: None):
|
||||
"""Generate assorted numeric data for the locale.
|
||||
|
||||
First argument, lookup, is a callable that maps a numbering
|
||||
system's name to certain data about the system, as a mapping;
|
||||
we expect this to have 'digits' as a key.
|
||||
"""
|
||||
system = self.find('numbers/defaultNumberingSystem')
|
||||
stem = f'numbers/symbols[numberSystem={system}]/'
|
||||
decimal = self.find(f'{stem}decimal')
|
||||
group = self.find(f'{stem}group')
|
||||
assert decimal != group, (self.name, system, decimal)
|
||||
yield 'decimal', decimal
|
||||
yield 'group', group
|
||||
yield 'percent', self.find(f'{stem}percentSign')
|
||||
yield 'list', self.find(f'{stem}list')
|
||||
yield 'exp', self.find(f'{stem}exponential')
|
||||
yield 'groupSizes', self.__numberGrouping(system)
|
||||
|
||||
digits = lookup(system)['digits']
|
||||
assert len(digits) == 10
|
||||
zero = digits[0]
|
||||
# Qt's number-formatting code assumes digits are consecutive
|
||||
# (except Suzhou, CLDR's hanidec - see QTBUG-85409):
|
||||
assert all(ord(c) == i + (0x3020 if ord(zero) == 0x3007 else ord(zero))
|
||||
for i, c in enumerate(digits[1:], 1))
|
||||
yield 'zero', zero
|
||||
|
||||
plus = self.find(f'{stem}plusSign')
|
||||
minus = self.find(f'{stem}minusSign')
|
||||
yield 'plus', plus
|
||||
yield 'minus', minus
|
||||
|
||||
# Currency formatting:
|
||||
xpath = 'numbers/currencyFormats/currencyFormatLength/currencyFormat[accounting]/pattern'
|
||||
try:
|
||||
money = self.find(xpath.replace('Formats/',
|
||||
f'Formats[numberSystem={system}]/'))
|
||||
except Error:
|
||||
money = self.find(xpath)
|
||||
money = self.__currencyFormats(money, plus, minus)
|
||||
yield 'currencyFormat', next(money)
|
||||
neg = ''
|
||||
for it in money:
|
||||
assert not neg, 'There should be at most one more pattern'
|
||||
neg = it
|
||||
yield 'currencyNegativeFormat', neg
|
||||
|
||||
def textPatternData(self):
|
||||
for key in ('quotationStart', 'alternateQuotationEnd',
|
||||
'quotationEnd', 'alternateQuotationStart'):
|
||||
yield key, self.find(f'delimiters/{key}')
|
||||
|
||||
for key in ('start', 'middle', 'end'):
|
||||
yield (f'listPatternPart{key.capitalize()}',
|
||||
self.__fromLdmlListPattern(self.find(
|
||||
f'listPatterns/listPattern/listPatternPart[{key}]')))
|
||||
yield ('listPatternPartTwo',
|
||||
self.__fromLdmlListPattern(self.find(
|
||||
'listPatterns/listPattern/listPatternPart[2]')))
|
||||
|
||||
stem = 'dates/calendars/calendar[gregorian]/'
|
||||
# TODO: is wide really the right width to use here ?
|
||||
# abbreviated might be an option ... or try both ?
|
||||
meridiem = f'{stem}dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/'
|
||||
for key in ('am', 'pm'):
|
||||
yield key, self.find(f'{meridiem}dayPeriod[{key}]',
|
||||
draft = Node.draftScore('contributed'))
|
||||
|
||||
for pair in (('long', 'full'), ('short', 'short')):
|
||||
for key in ('time', 'date'):
|
||||
yield (f'{pair[0]}{key.capitalize()}Format',
|
||||
convert_date(self.find(
|
||||
f'{stem}{key}Formats/{key}FormatLength[{pair[1]}]/{key}Format/pattern')))
|
||||
|
||||
def endonyms(self, language, script, territory, variant):
|
||||
# TODO: take variant into account ?
|
||||
for seq in ((language, script, territory),
|
||||
(language, script), (language, territory), (language,)):
|
||||
if not all(seq):
|
||||
continue
|
||||
try:
|
||||
yield ('languageEndonym',
|
||||
self.find(f'localeDisplayNames/languages/language[{"_".join(seq)}]'))
|
||||
except Error:
|
||||
pass
|
||||
else:
|
||||
break
|
||||
else:
|
||||
# grumble(failed to find endonym for language)
|
||||
yield 'languageEndonym', ''
|
||||
|
||||
yield ('territoryEndonym',
|
||||
self.find(f'localeDisplayNames/territories/territory[{territory}]', ''))
|
||||
|
||||
def unitData(self):
|
||||
yield ('byte_unit',
|
||||
self.find('units/unitLength[long]/unit[digital-byte]/displayName',
|
||||
'bytes'))
|
||||
|
||||
unit = self.__findUnit('', 'B')
|
||||
cache = [] # Populated by the SI call, to give hints to the IEC call
|
||||
yield ('byte_si_quantified',
|
||||
';'.join(self.__unitCount('', unit, cache)))
|
||||
# IEC 60027-2
|
||||
# http://physics.nist.gov/cuu/Units/binary.html
|
||||
yield ('byte_iec_quantified',
|
||||
';'.join(self.__unitCount('bi', 'iB', cache)))
|
||||
|
||||
def calendarNames(self, calendars):
|
||||
namings = self.__nameForms
|
||||
for cal in calendars:
|
||||
stem = f'dates/calendars/calendar[{cal}]/months/'
|
||||
for key, mode, size in namings:
|
||||
prop = f'monthContext[{mode}]/monthWidth[{size}]/'
|
||||
yield (f'{key}Months_{cal}',
|
||||
';'.join(self.find(f'{stem}{prop}month[{i}]')
|
||||
for i in range(1, 13)))
|
||||
|
||||
# Day data (for Gregorian, at least):
|
||||
stem = 'dates/calendars/calendar[gregorian]/days/'
|
||||
days = ('sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat')
|
||||
for (key, mode, size) in namings:
|
||||
prop = f'dayContext[{mode}]/dayWidth[{size}]/day'
|
||||
yield (f'{key}Days',
|
||||
';'.join(self.find(f'{stem}{prop}[{day}]')
|
||||
for day in days))
|
||||
|
||||
# Implementation details
|
||||
__nameForms = (
|
||||
('standaloneLong', 'stand-alone', 'wide'),
|
||||
('standaloneShort', 'stand-alone', 'abbreviated'),
|
||||
('standaloneNarrow', 'stand-alone', 'narrow'),
|
||||
('long', 'format', 'wide'),
|
||||
('short', 'format', 'abbreviated'),
|
||||
('narrow', 'format', 'narrow'),
|
||||
) # Used for month and day names
|
||||
|
||||
def __find(self, xpath):
|
||||
retries = [ xpath.split('/') ]
|
||||
while retries:
|
||||
tags, elts, roots = retries.pop(), self.nodes, (self.base.root,)
|
||||
for selector in tags:
|
||||
tag, attrs = _parseXPath(selector)
|
||||
elts = tuple(_iterateEach(e.findAllChildren(tag, attrs) for e in elts))
|
||||
if not elts:
|
||||
break
|
||||
|
||||
else: # Found matching elements
|
||||
# Possibly filter elts to prefer the least drafty ?
|
||||
for elt in elts:
|
||||
yield elt
|
||||
|
||||
# Process roots separately: otherwise the alias-processing
|
||||
# is excessive.
|
||||
for i, selector in enumerate(tags):
|
||||
tag, attrs = _parseXPath(selector)
|
||||
|
||||
for alias in tuple(_iterateEach(r.findAllChildren('alias', allDull=True)
|
||||
for r in roots)):
|
||||
if alias.dom.attributes['source'].nodeValue == 'locale':
|
||||
replace = alias.dom.attributes['path'].nodeValue.split('/')
|
||||
retries.append(self.__xpathJoin(tags[:i], replace, tags[i:]))
|
||||
|
||||
roots = tuple(_iterateEach(r.findAllChildren(tag, attrs) for r in roots))
|
||||
if not roots:
|
||||
if retries: # Let outer loop fall back on an alias path:
|
||||
break
|
||||
sought = '/'.join(tags)
|
||||
if sought != xpath:
|
||||
sought += f' (for {xpath})'
|
||||
raise Error(f'All lack child {selector} for {sought} in {self.name}')
|
||||
|
||||
else: # Found matching elements
|
||||
for elt in roots:
|
||||
yield elt
|
||||
|
||||
sought = '/'.join(tags)
|
||||
if sought != xpath:
|
||||
sought += f' (for {xpath})'
|
||||
raise Error(f'No {sought} in {self.name}')
|
||||
|
||||
def __currencyDisplayName(self, stem):
|
||||
try:
|
||||
return self.find(stem + 'displayName')
|
||||
except Error:
|
||||
pass
|
||||
for x in ('zero', 'one', 'two', 'few', 'many', 'other'):
|
||||
try:
|
||||
return self.find(f'{stem}displayName[count={x}]')
|
||||
except Error:
|
||||
pass
|
||||
return ''
|
||||
|
||||
def __findUnit(self, keySuffix, quantify, fallback=''):
|
||||
# The displayName for a quantified unit in en.xml is kByte
|
||||
# (even for unitLength[narrow]) instead of kB (etc.), so
|
||||
# prefer any unitPattern provided, but prune its placeholder:
|
||||
for size in ('short', 'narrow'): # TODO: reverse order ?
|
||||
stem = f'units/unitLength[{size}{keySuffix}]/unit[digital-{quantify}byte]/'
|
||||
for count in ('many', 'few', 'two', 'other', 'zero', 'one'):
|
||||
try:
|
||||
ans = self.find(f'{stem}unitPattern[count={count}]')
|
||||
except Error:
|
||||
continue
|
||||
|
||||
# TODO: do count-handling, instead of discarding placeholders
|
||||
if False: # TODO: do it this way, instead !
|
||||
ans = ans.replace('{0}', '').strip()
|
||||
elif ans.startswith('{0}'):
|
||||
ans = ans[3:].lstrip()
|
||||
if ans:
|
||||
return ans
|
||||
|
||||
try:
|
||||
return self.find(f'{stem}displayName')
|
||||
except Error:
|
||||
pass
|
||||
|
||||
return fallback
|
||||
|
||||
def __unitCount(self, keySuffix, suffix, cache,
|
||||
# Stop at exa/exbi: 16 exbi = 2^{64} < zetta =
|
||||
# 1000^7 < zebi = 2^{70}, the next quantifiers up:
|
||||
siQuantifiers = ('kilo', 'mega', 'giga', 'tera', 'peta', 'exa')):
|
||||
"""Work out the unit quantifiers.
|
||||
|
||||
Unfortunately, the CLDR data only go up to terabytes and we
|
||||
want all the way to exabytes; but we can recognize the SI
|
||||
quantifiers as prefixes, strip and identify the tail as the
|
||||
localized translation for 'B' (e.g. French has 'octet' for
|
||||
'byte' and uses ko, Mo, Go, To from which we can extrapolate
|
||||
Po, Eo).
|
||||
|
||||
Should be called first for the SI quantifiers, with suffix =
|
||||
'B', then for the IEC ones, with suffix = 'iB'; the list cache
|
||||
(initially empty before first call) is used to let the second
|
||||
call know what the first learned about the localized unit.
|
||||
"""
|
||||
if suffix == 'iB': # second call, re-using first's cache
|
||||
if cache:
|
||||
byte = cache.pop()
|
||||
if all(byte == k for k in cache):
|
||||
suffix = f'i{byte}'
|
||||
for q in siQuantifiers:
|
||||
# Those don't (yet, v36) exist in CLDR, so we always get the fall-back:
|
||||
yield self.__findUnit(keySuffix, q[:2], f'{q[0].upper()}{suffix}')
|
||||
else: # first call
|
||||
tail = suffix = suffix or 'B'
|
||||
for q in siQuantifiers:
|
||||
it = self.__findUnit(keySuffix, q)
|
||||
# kB for kilobyte, in contrast with KiB for IEC:
|
||||
q = q[0] if q == 'kilo' else q[0].upper()
|
||||
if not it:
|
||||
it = q + tail
|
||||
elif it.startswith(q):
|
||||
rest = it[1:]
|
||||
tail = rest if all(rest == k for k in cache) else suffix
|
||||
cache.append(rest)
|
||||
yield it
|
||||
|
||||
def __numberGrouping(self, system):
|
||||
"""Sizes of groups of digits within a number.
|
||||
|
||||
Returns a triple (least, higher, top) for which:
|
||||
* least is the number of digits after the last grouping
|
||||
separator;
|
||||
* higher is the number of digits between grouping
|
||||
separators;
|
||||
* top is the fewest digits that can appear before the first
|
||||
grouping separator.
|
||||
|
||||
Thus (4, 3, 2) would want 1e7 as 1000,0000 but 1e8 as 10,000,0000.
|
||||
|
||||
Note: CLDR does countenance the possibility of grouping also
|
||||
in the fractional part. This is not presently attempted. Nor
|
||||
is placement of the sign character anywhere but at the start
|
||||
of the number (some formats may place it at the end, possibly
|
||||
elsewhere)."""
|
||||
top = int(self.find('numbers/minimumGroupingDigits'))
|
||||
assert top < 4, top # We store it in a 2-bit field
|
||||
grouping = self.find(f'numbers/decimalFormats[numberSystem={system}]/'
|
||||
'decimalFormatLength/decimalFormat/pattern')
|
||||
groups = grouping.split('.')[0].split(',')[-3:]
|
||||
assert all(len(x) < 8 for x in groups[-2:]), grouping # we store them in 3-bit fields
|
||||
if len(groups) > 2:
|
||||
return len(groups[-1]), len(groups[-2]), top
|
||||
|
||||
size = len(groups[-1]) if len(groups) == 2 else 3
|
||||
return size, size, top
|
||||
|
||||
@staticmethod
|
||||
def __currencyFormats(patterns, plus, minus):
|
||||
for p in patterns.split(';'):
|
||||
p = p.replace('0', '#').replace(',', '').replace('.', '')
|
||||
try:
|
||||
cut = p.find('#') + 1
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
p = p[:cut] + p[cut:].replace('#', '')
|
||||
p = p.replace('#', "%1")
|
||||
# According to http://www.unicode.org/reports/tr35/#Number_Format_Patterns
|
||||
# there can be doubled or trippled currency sign, however none of the
|
||||
# locales use that.
|
||||
p = p.replace('\xa4', "%2")
|
||||
# Single quote goes away, but double goes to single:
|
||||
p = p.replace("''", '###').replace("'", '').replace('###', "'")
|
||||
# Use number system's signs:
|
||||
p = p.replace('+', plus).replace('-', minus)
|
||||
yield p
|
||||
|
||||
@staticmethod
|
||||
def __fromLdmlListPattern(pattern):
|
||||
# This is a very limited parsing of the format for list pattern part only.
|
||||
return pattern.replace('{0}', '%1').replace('{1}', '%2').replace('{2}', '%3')
|
||||
|
||||
@staticmethod
|
||||
def __fromLdmlPath(seq): # tool function for __xpathJoin()
|
||||
"""Convert LDML's [@name='value'] to our [name=value] form."""
|
||||
for it in seq:
|
||||
# First dismember it:
|
||||
attrs = it.split('[')
|
||||
tag = attrs.pop(0)
|
||||
if not attrs: # Short-cut the easy case:
|
||||
yield it
|
||||
continue
|
||||
|
||||
assert all(x.endswith(']') for x in attrs)
|
||||
attrs = [x[:-1].split('=') for x in attrs]
|
||||
# Then fix each attribute specification in it:
|
||||
attrs = [(x[0][1:] if x[0].startswith('@') else x[0],
|
||||
x[1][1:-1] if x[1].startswith("'") and x[1].endswith("'") else x[1])
|
||||
for x in attrs]
|
||||
# Finally, put it all back together:
|
||||
attrs = ['='.join(x) + ']' for x in attrs]
|
||||
attrs.insert(0, tag)
|
||||
yield '['.join(attrs)
|
||||
|
||||
@classmethod
|
||||
def __xpathJoin(cls, head, insert, tail):
|
||||
"""Join three lists of XPath selectors.
|
||||
|
||||
Each of head, insert and tail is a sequence of selectors but
|
||||
insert may start with some uses of '..', that we want to
|
||||
resolve away, and may use LDML's attribute format, that we
|
||||
want to convert to our format."""
|
||||
while insert and insert[0] == '..':
|
||||
insert.pop(0)
|
||||
head.pop()
|
||||
return head + list(cls.__fromLdmlPath(insert)) + tail
|
184
util/locale_database/localetools.py
Normal file
184
util/locale_database/localetools.py
Normal file
@ -0,0 +1,184 @@
|
||||
# Copyright (C) 2020 The Qt Company Ltd.
|
||||
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
|
||||
"""Utilities shared among the CLDR extraction tools.
|
||||
|
||||
Functions:
|
||||
unicode2hex() -- converts unicode text to UCS-2 in hex form.
|
||||
wrap_list() -- map list to comma-separated string, 20 entries per line.
|
||||
|
||||
Classes:
|
||||
Error -- A shared error class.
|
||||
Transcriber -- edit a file by writing a temporary file, then renaming.
|
||||
SourceFileEditor -- adds standard prelude and tail handling to Transcriber.
|
||||
"""
|
||||
|
||||
from contextlib import ExitStack, contextmanager
|
||||
from pathlib import Path
|
||||
from tempfile import NamedTemporaryFile
|
||||
|
||||
qtbase_root = Path(__file__).parents[2]
|
||||
assert qtbase_root.name == 'qtbase'
|
||||
|
||||
class Error (Exception):
|
||||
def __init__(self, msg, *args):
|
||||
super().__init__(msg, *args)
|
||||
self.message = msg
|
||||
def __str__(self):
|
||||
return self.message
|
||||
|
||||
def unicode2hex(s):
|
||||
lst = []
|
||||
for x in s:
|
||||
v = ord(x)
|
||||
if v > 0xFFFF:
|
||||
# make a surrogate pair
|
||||
# copied from qchar.h
|
||||
high = (v >> 10) + 0xd7c0
|
||||
low = (v % 0x400 + 0xdc00)
|
||||
lst.append(hex(high))
|
||||
lst.append(hex(low))
|
||||
else:
|
||||
lst.append(hex(v))
|
||||
return lst
|
||||
|
||||
def wrap_list(lst):
|
||||
def split(lst, size):
|
||||
while lst:
|
||||
head, lst = lst[:size], lst[size:]
|
||||
yield head
|
||||
return ",\n".join(", ".join(x) for x in split(lst, 20))
|
||||
|
||||
|
||||
@contextmanager
|
||||
def AtomicRenameTemporaryFile(originalLocation: Path, *, prefix: str, dir: Path):
|
||||
"""Context manager for safe file update via a temporary file.
|
||||
|
||||
Accepts path to the file to be updated. Yields a temporary file to the user
|
||||
code, open for writing.
|
||||
|
||||
On success closes the temporary file and moves its content to the original
|
||||
location. On error, removes temporary file, without disturbing the original.
|
||||
"""
|
||||
tempFile = NamedTemporaryFile('w', prefix=prefix, dir=dir, delete=False)
|
||||
try:
|
||||
yield tempFile
|
||||
tempFile.close()
|
||||
# Move the modified file to the original location
|
||||
Path(tempFile.name).rename(originalLocation)
|
||||
except Exception:
|
||||
# delete the temporary file in case of error
|
||||
tempFile.close()
|
||||
Path(tempFile.name).unlink()
|
||||
raise
|
||||
|
||||
|
||||
class Transcriber:
|
||||
"""Context manager base-class to manage source file rewrites.
|
||||
|
||||
Derived classes need to implement transcribing of the content, with
|
||||
whatever modifications they may want. Members reader and writer
|
||||
are exposed; use writer.write() to output to the new file; use
|
||||
reader.readline() or iterate reader to read the original.
|
||||
|
||||
This class is intended to be used as context manager only (inside a
|
||||
`with` statement).
|
||||
|
||||
Reimplement onEnter() to write any preamble the file may have,
|
||||
onExit() to write any tail. The body of the with statement takes
|
||||
care of anything in between, using methods provided by derived classes.
|
||||
|
||||
The data is written to a temporary file first. The temporary file data
|
||||
is then moved to the original location if there were no errors. Otherwise
|
||||
the temporary file is removed and the original is left unchanged.
|
||||
"""
|
||||
def __init__(self, path: Path, temp_dir: Path):
|
||||
self.path = path
|
||||
self.tempDir = temp_dir
|
||||
|
||||
def onEnter(self) -> None:
|
||||
"""
|
||||
Called before transferring control to user code.
|
||||
|
||||
This function can be overridden in derived classes to perform actions
|
||||
before transferring control to the user code.
|
||||
|
||||
The default implementation does nothing.
|
||||
"""
|
||||
pass
|
||||
|
||||
def onExit(self) -> None:
|
||||
"""
|
||||
Called after return from user code.
|
||||
|
||||
This function can be overridden in derived classes to perform actions
|
||||
after successful return from user code.
|
||||
|
||||
The default implementation does nothing.
|
||||
"""
|
||||
pass
|
||||
|
||||
def __enter__(self):
|
||||
with ExitStack() as resources:
|
||||
# Create a temp file to write the new data into
|
||||
self.writer = resources.enter_context(
|
||||
AtomicRenameTemporaryFile(self.path, prefix=self.path.name, dir=self.tempDir))
|
||||
# Open the old file
|
||||
self.reader = resources.enter_context(open(self.path))
|
||||
|
||||
self.onEnter()
|
||||
|
||||
# Prevent resources from being closed on normal return from this
|
||||
# method and make them available inside __exit__():
|
||||
self.__resources = resources.pop_all()
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
if exc_type is None:
|
||||
with self.__resources:
|
||||
self.onExit()
|
||||
else:
|
||||
self.__resources.__exit__(exc_type, exc_value, traceback)
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class SourceFileEditor (Transcriber):
|
||||
"""Transcriber with transcription of code around a gnerated block.
|
||||
|
||||
We have a common pattern of source files with a generated part
|
||||
embedded in a context that's not touched by the regeneration
|
||||
scripts. The generated part is, in each case, marked with a common
|
||||
pair of start and end markers. We transcribe the old file to a new
|
||||
temporary file; on success, we then remove the original and move
|
||||
the new version to replace it.
|
||||
|
||||
This class takes care of transcribing the parts before and after
|
||||
the generated content; on entering the context, an instance will
|
||||
copy the preamble up to the start marker; on exit from the context
|
||||
it will skip over the original's generated content and resume
|
||||
transcribing with the end marker.
|
||||
|
||||
This class is only intended to be used as a context manager:
|
||||
see Transcriber. Derived classes implement suitable methods for use in
|
||||
the body of the with statement, using self.writer to rewrite the part
|
||||
of the file between the start and end markers.
|
||||
"""
|
||||
GENERATED_BLOCK_START = '// GENERATED PART STARTS HERE'
|
||||
GENERATED_BLOCK_END = '// GENERATED PART ENDS HERE'
|
||||
|
||||
def onEnter(self) -> None:
|
||||
# Copy over the first non-generated section to the new file
|
||||
for line in self.reader:
|
||||
self.writer.write(line)
|
||||
if line.strip() == self.GENERATED_BLOCK_START:
|
||||
break
|
||||
|
||||
def onExit(self) -> None:
|
||||
# Skip through the old generated data in the old file
|
||||
for line in self.reader:
|
||||
if line.strip() == self.GENERATED_BLOCK_END:
|
||||
self.writer.write(line)
|
||||
break
|
||||
# Transcribe the remainder:
|
||||
for line in self.reader:
|
||||
self.writer.write(line)
|
627
util/locale_database/qlocalexml.py
Normal file
627
util/locale_database/qlocalexml.py
Normal file
@ -0,0 +1,627 @@
|
||||
# Copyright (C) 2021 The Qt Company Ltd.
|
||||
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
|
||||
"""Shared serialization-scanning code for QLocaleXML format.
|
||||
|
||||
Provides classes:
|
||||
Locale -- common data-type representing one locale as a namespace
|
||||
QLocaleXmlWriter -- helper to write a QLocaleXML file
|
||||
QLocaleXmlReader -- helper to read a QLocaleXML file back in
|
||||
|
||||
Support:
|
||||
Spacer -- provides control over indentation of the output.
|
||||
|
||||
RelaxNG schema for the used file format can be found in qlocalexml.rnc.
|
||||
QLocaleXML files can be validated using:
|
||||
|
||||
jing -c qlocalexml.rnc <file.xml>
|
||||
|
||||
You can download jing from https://relaxng.org/jclark/jing.html if your
|
||||
package manager lacks the jing package.
|
||||
"""
|
||||
|
||||
from xml.sax.saxutils import escape
|
||||
|
||||
from localetools import Error
|
||||
|
||||
# Tools used by Locale:
|
||||
def camel(seq):
|
||||
yield next(seq)
|
||||
for word in seq:
|
||||
yield word.capitalize()
|
||||
|
||||
def camelCase(words):
|
||||
return ''.join(camel(iter(words)))
|
||||
|
||||
def addEscapes(s):
|
||||
return ''.join(c if n < 128 else f'\\x{n:02x}'
|
||||
for n, c in ((ord(c), c) for c in s))
|
||||
|
||||
def startCount(c, text): # strspn
|
||||
"""First index in text where it doesn't have a character in c"""
|
||||
assert text and text[0] in c
|
||||
try:
|
||||
return next((j for j, d in enumerate(text) if d not in c))
|
||||
except StopIteration:
|
||||
return len(text)
|
||||
|
||||
def convertFormat(format):
|
||||
"""Convert date/time format-specier from CLDR to Qt
|
||||
|
||||
Match up (as best we can) the differences between:
|
||||
* https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
|
||||
* QDateTimeParser::parseFormat() and QLocalePrivate::dateTimeToString()
|
||||
"""
|
||||
# Compare and contrast dateconverter.py's convert_date().
|
||||
# Need to (check consistency and) reduce redundancy !
|
||||
result = ""
|
||||
i = 0
|
||||
while i < len(format):
|
||||
if format[i] == "'":
|
||||
result += "'"
|
||||
i += 1
|
||||
while i < len(format) and format[i] != "'":
|
||||
result += format[i]
|
||||
i += 1
|
||||
if i < len(format):
|
||||
result += "'"
|
||||
i += 1
|
||||
else:
|
||||
s = format[i:]
|
||||
if s.startswith('E'): # week-day
|
||||
n = startCount('E', s)
|
||||
if n < 3:
|
||||
result += 'ddd'
|
||||
elif n == 4:
|
||||
result += 'dddd'
|
||||
else: # 5: narrow, 6 short; but should be name, not number :-(
|
||||
result += 'd' if n < 6 else 'dd'
|
||||
i += n
|
||||
elif s[0] in 'ab': # am/pm
|
||||
# 'b' should distinguish noon/midnight, too :-(
|
||||
result += "AP"
|
||||
i += startCount('ab', s)
|
||||
elif s.startswith('S'): # fractions of seconds: count('S') == number of decimals to show
|
||||
result += 'z'
|
||||
i += startCount('S', s)
|
||||
elif s.startswith('V'): # long time zone specifiers (and a deprecated short ID)
|
||||
result += 't'
|
||||
i += startCount('V', s)
|
||||
elif s[0] in 'zv': # zone
|
||||
# Should use full name, e.g. "Central European Time", if 'zzzz' :-(
|
||||
# 'v' should get generic non-location format, e.g. PT for "Pacific Time", no DST indicator
|
||||
result += "t"
|
||||
i += startCount('zv', s)
|
||||
else:
|
||||
result += format[i]
|
||||
i += 1
|
||||
|
||||
return result
|
||||
|
||||
class QLocaleXmlReader (object):
|
||||
def __init__(self, filename):
|
||||
self.root = self.__parse(filename)
|
||||
# Lists of (id, name, code) triples:
|
||||
languages = tuple(self.__loadMap('language'))
|
||||
scripts = tuple(self.__loadMap('script'))
|
||||
territories = tuple(self.__loadMap('territory'))
|
||||
self.__likely = tuple(self.__likelySubtagsMap())
|
||||
# Mappings {ID: (name, code)}
|
||||
self.languages = dict((v[0], v[1:]) for v in languages)
|
||||
self.scripts = dict((v[0], v[1:]) for v in scripts)
|
||||
self.territories = dict((v[0], v[1:]) for v in territories)
|
||||
# Private mappings {name: (ID, code)}
|
||||
self.__langByName = dict((v[1], (v[0], v[2])) for v in languages)
|
||||
self.__textByName = dict((v[1], (v[0], v[2])) for v in scripts)
|
||||
self.__landByName = dict((v[1], (v[0], v[2])) for v in territories)
|
||||
# Other properties:
|
||||
self.dupes = set(v[1] for v in languages) & set(v[1] for v in territories)
|
||||
self.cldrVersion = self.__firstChildText(self.root, "version")
|
||||
|
||||
def loadLocaleMap(self, calendars, grumble = lambda text: None):
|
||||
kid = self.__firstChildText
|
||||
likely = dict(self.__likely)
|
||||
for elt in self.__eachEltInGroup(self.root, 'localeList', 'locale'):
|
||||
locale = Locale.fromXmlData(lambda k: kid(elt, k), calendars)
|
||||
language = self.__langByName[locale.language][0]
|
||||
script = self.__textByName[locale.script][0]
|
||||
territory = self.__landByName[locale.territory][0]
|
||||
|
||||
if language != 1: # C
|
||||
if territory == 0:
|
||||
grumble(f'loadLocaleMap: No territory id for "{locale.language}"\n')
|
||||
|
||||
if script == 0:
|
||||
# Find default script for the given language and territory - see:
|
||||
# http://www.unicode.org/reports/tr35/#Likely_Subtags
|
||||
try:
|
||||
try:
|
||||
to = likely[(locale.language, 'AnyScript', locale.territory)]
|
||||
except KeyError:
|
||||
to = likely[(locale.language, 'AnyScript', 'AnyTerritory')]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
locale.script = to[1]
|
||||
script = self.__textByName[locale.script][0]
|
||||
|
||||
yield (language, script, territory), locale
|
||||
|
||||
def languageIndices(self, locales):
|
||||
index = 0
|
||||
for key, value in self.languages.items():
|
||||
i, count = 0, locales.count(key)
|
||||
if count > 0:
|
||||
i = index
|
||||
index += count
|
||||
yield i, value[0]
|
||||
|
||||
def likelyMap(self):
|
||||
def tag(t):
|
||||
lang, script, land = t
|
||||
yield lang[1] if lang[0] else 'und'
|
||||
if script[0]: yield script[1]
|
||||
if land[0]: yield land[1]
|
||||
|
||||
def ids(t):
|
||||
return tuple(x[0] for x in t)
|
||||
|
||||
for pair in self.__likely:
|
||||
have = self.__fromNames(pair[0])
|
||||
give = self.__fromNames(pair[1])
|
||||
yield ('_'.join(tag(have)), ids(have),
|
||||
'_'.join(tag(give)), ids(give))
|
||||
|
||||
def defaultMap(self):
|
||||
"""Map language and script to their default territory by ID.
|
||||
|
||||
Yields ((language, script), territory) wherever the likely
|
||||
sub-tags mapping says language's default locale uses the given
|
||||
script and territory."""
|
||||
for have, give in self.__likely:
|
||||
if have[1:] == ('AnyScript', 'AnyTerritory') and give[2] != 'AnyTerritory':
|
||||
assert have[0] == give[0], (have, give)
|
||||
yield ((self.__langByName[give[0]][0],
|
||||
self.__textByName[give[1]][0]),
|
||||
self.__landByName[give[2]][0])
|
||||
|
||||
# Implementation details:
|
||||
def __loadMap(self, category):
|
||||
kid = self.__firstChildText
|
||||
for element in self.__eachEltInGroup(self.root, f'{category}List', category):
|
||||
yield int(kid(element, 'id')), kid(element, 'name'), kid(element, 'code')
|
||||
|
||||
def __likelySubtagsMap(self):
|
||||
def triplet(element, keys=('language', 'script', 'territory'), kid = self.__firstChildText):
|
||||
return tuple(kid(element, key) for key in keys)
|
||||
|
||||
kid = self.__firstChildElt
|
||||
for elt in self.__eachEltInGroup(self.root, 'likelySubtags', 'likelySubtag'):
|
||||
yield triplet(kid(elt, "from")), triplet(kid(elt, "to"))
|
||||
|
||||
def __fromNames(self, names):
|
||||
return self.__langByName[names[0]], self.__textByName[names[1]], self.__landByName[names[2]]
|
||||
|
||||
# DOM access:
|
||||
from xml.dom import minidom
|
||||
@staticmethod
|
||||
def __parse(filename, read = minidom.parse):
|
||||
return read(filename).documentElement
|
||||
|
||||
@staticmethod
|
||||
def __isNodeNamed(elt, name, TYPE=minidom.Node.ELEMENT_NODE):
|
||||
return elt.nodeType == TYPE and elt.nodeName == name
|
||||
del minidom
|
||||
|
||||
@staticmethod
|
||||
def __eltWords(elt):
|
||||
child = elt.firstChild
|
||||
while child:
|
||||
if child.nodeType == elt.TEXT_NODE:
|
||||
yield child.nodeValue
|
||||
child = child.nextSibling
|
||||
|
||||
@classmethod
|
||||
def __firstChildElt(cls, parent, name):
|
||||
child = parent.firstChild
|
||||
while child:
|
||||
if cls.__isNodeNamed(child, name):
|
||||
return child
|
||||
child = child.nextSibling
|
||||
|
||||
raise Error(f'No {name} child found')
|
||||
|
||||
@classmethod
|
||||
def __firstChildText(cls, elt, key):
|
||||
return ' '.join(cls.__eltWords(cls.__firstChildElt(elt, key)))
|
||||
|
||||
@classmethod
|
||||
def __eachEltInGroup(cls, parent, group, key):
|
||||
try:
|
||||
element = cls.__firstChildElt(parent, group).firstChild
|
||||
except Error:
|
||||
element = None
|
||||
|
||||
while element:
|
||||
if cls.__isNodeNamed(element, key):
|
||||
yield element
|
||||
element = element.nextSibling
|
||||
|
||||
|
||||
class Spacer (object):
|
||||
def __init__(self, indent = None, initial = ''):
|
||||
"""Prepare to manage indentation and line breaks.
|
||||
|
||||
Arguments are both optional.
|
||||
|
||||
First argument, indent, is either None (its default, for
|
||||
'minifying'), an ingeter (number of spaces) or the unit of
|
||||
text that is to be used for each indentation level (e.g. '\t'
|
||||
to use tabs). If indent is None, no indentation is added, nor
|
||||
are line-breaks; otherwise, self(text), for non-empty text,
|
||||
shall end with a newline and begin with indentation.
|
||||
|
||||
Second argument, initial, is the initial indentation; it is
|
||||
ignored if indent is None. Indentation increases after each
|
||||
call to self(text) in which text starts with a tag and doesn't
|
||||
include its end-tag; indentation decreases if text starts with
|
||||
an end-tag. The text is not parsed any more carefully than
|
||||
just described.
|
||||
"""
|
||||
if indent is None:
|
||||
self.__call = lambda x: x
|
||||
else:
|
||||
self.__each = ' ' * indent if isinstance(indent, int) else indent
|
||||
self.current = initial
|
||||
self.__call = self.__wrap
|
||||
|
||||
def __wrap(self, line):
|
||||
if not line:
|
||||
return '\n'
|
||||
|
||||
indent = self.current
|
||||
if line.startswith('</'):
|
||||
indent = self.current = indent[:-len(self.__each)]
|
||||
elif line.startswith('<') and not line.startswith('<!'):
|
||||
cut = line.find('>')
|
||||
tag = (line[1:] if cut < 0 else line[1 : cut]).strip().split()[0]
|
||||
if f'</{tag}>' not in line:
|
||||
self.current += self.__each
|
||||
return indent + line + '\n'
|
||||
|
||||
def __call__(self, line):
|
||||
return self.__call(line)
|
||||
|
||||
class QLocaleXmlWriter (object):
|
||||
def __init__(self, save = None, space = Spacer(4)):
|
||||
"""Set up to write digested CLDR data as QLocale XML.
|
||||
|
||||
Arguments are both optional.
|
||||
|
||||
First argument, save, is None (its default) or a callable that
|
||||
will write content to where you intend to save it. If None, it
|
||||
is replaced with a callable that prints the given content,
|
||||
suppressing the newline (but see the following); this is
|
||||
equivalent to passing sys.stdout.write.
|
||||
|
||||
Second argument, space, is an object to call on each text
|
||||
output to prepend indentation and append newlines, or not as
|
||||
the case may be. The default is a Spacer(4), which grows
|
||||
indent by four spaces after each unmatched new tag and shrinks
|
||||
back on a close-tag (its parsing is naive, but adequate to how
|
||||
this class uses it), while adding a newline to each line.
|
||||
"""
|
||||
self.__rawOutput = self.__printit if save is None else save
|
||||
self.__wrap = space
|
||||
self.__write('<localeDatabase>')
|
||||
|
||||
# Output of various sections, in their usual order:
|
||||
def enumData(self):
|
||||
from enumdata import language_map, script_map, territory_map
|
||||
self.__enumTable('language', language_map)
|
||||
self.__enumTable('script', script_map)
|
||||
self.__enumTable('territory', territory_map)
|
||||
# Prepare to detect any unused codes (see __writeLocale(), close()):
|
||||
self.__languages = set(p[1] for p in language_map.values()
|
||||
if not p[1].isspace())
|
||||
self.__scripts = set(p[1] for p in script_map.values()
|
||||
if p[1] != 'ZZ')
|
||||
self.__territories = set(p[1] for p in territory_map.values()
|
||||
if p[1] != 'Zzzz')
|
||||
|
||||
def likelySubTags(self, entries):
|
||||
self.__openTag('likelySubtags')
|
||||
for have, give in entries:
|
||||
self.__openTag('likelySubtag')
|
||||
self.__likelySubTag('from', have)
|
||||
self.__likelySubTag('to', give)
|
||||
self.__closeTag('likelySubtag')
|
||||
self.__closeTag('likelySubtags')
|
||||
|
||||
def locales(self, locales, calendars):
|
||||
self.__openTag('localeList')
|
||||
self.__openTag('locale')
|
||||
self.__writeLocale(Locale.C(calendars), calendars)
|
||||
self.__closeTag('locale')
|
||||
for key in sorted(locales.keys()):
|
||||
self.__openTag('locale')
|
||||
self.__writeLocale(locales[key], calendars)
|
||||
self.__closeTag('locale')
|
||||
self.__closeTag('localeList')
|
||||
|
||||
def version(self, cldrVersion):
|
||||
self.inTag('version', cldrVersion)
|
||||
|
||||
def inTag(self, tag, text):
|
||||
self.__write(f'<{tag}>{text}</{tag}>')
|
||||
|
||||
def close(self, grumble):
|
||||
"""Finish writing and grumble any issues discovered."""
|
||||
if self.__rawOutput != self.__complain:
|
||||
self.__write('</localeDatabase>')
|
||||
self.__rawOutput = self.__complain
|
||||
|
||||
if self.__languages or self.__scripts or self.territories:
|
||||
grumble('Some enum members are unused, corresponding to these tags:\n')
|
||||
import textwrap
|
||||
def kvetch(kind, seq, g = grumble, w = textwrap.wrap):
|
||||
g('\n\t'.join(w(f' {kind}: {", ".join(sorted(seq))}', width=80)) + '\n')
|
||||
if self.__languages:
|
||||
kvetch('Languages', self.__languages)
|
||||
if self.__scripts:
|
||||
kvetch('Scripts', self.__scripts)
|
||||
if self.__territories:
|
||||
kvetch('Territories', self.__territories)
|
||||
grumble('It may make sense to deprecate them.\n')
|
||||
|
||||
# Implementation details
|
||||
@staticmethod
|
||||
def __printit(text):
|
||||
print(text, end='')
|
||||
@staticmethod
|
||||
def __complain(text):
|
||||
raise Error('Attempted to write data after closing :-(')
|
||||
|
||||
def __enumTable(self, tag, table):
|
||||
self.__openTag(f'{tag}List')
|
||||
for key, value in table.items():
|
||||
self.__openTag(tag)
|
||||
self.inTag('name', value[0])
|
||||
self.inTag('id', key)
|
||||
self.inTag('code', value[1])
|
||||
self.__closeTag(tag)
|
||||
self.__closeTag(f'{tag}List')
|
||||
|
||||
def __likelySubTag(self, tag, likely):
|
||||
self.__openTag(tag)
|
||||
self.inTag('language', likely[0])
|
||||
self.inTag('script', likely[1])
|
||||
self.inTag('territory', likely[2])
|
||||
# self.inTag('variant', likely[3])
|
||||
self.__closeTag(tag)
|
||||
|
||||
def __writeLocale(self, locale, calendars):
|
||||
locale.toXml(self.inTag, calendars)
|
||||
self.__languages.discard(locale.language_code)
|
||||
self.__scripts.discard(locale.script_code)
|
||||
self.__territories.discard(locale.territory_code)
|
||||
|
||||
def __openTag(self, tag):
|
||||
self.__write(f'<{tag}>')
|
||||
def __closeTag(self, tag):
|
||||
self.__write(f'</{tag}>')
|
||||
|
||||
def __write(self, line):
|
||||
self.__rawOutput(self.__wrap(line))
|
||||
|
||||
class Locale (object):
|
||||
"""Holder for the assorted data representing one locale.
|
||||
|
||||
Implemented as a namespace; its constructor and update() have the
|
||||
same signatures as those of a dict, acting on the instance's
|
||||
__dict__, so the results are accessed as attributes rather than
|
||||
mapping keys."""
|
||||
def __init__(self, data=None, **kw):
|
||||
self.update(data, **kw)
|
||||
|
||||
def update(self, data=None, **kw):
|
||||
if data: self.__dict__.update(data)
|
||||
if kw: self.__dict__.update(kw)
|
||||
|
||||
def __len__(self): # Used when testing as a boolean
|
||||
return len(self.__dict__)
|
||||
|
||||
@staticmethod
|
||||
def propsMonthDay(scale, lengths=('long', 'short', 'narrow')):
|
||||
for L in lengths:
|
||||
yield camelCase((L, scale))
|
||||
yield camelCase(('standalone', L, scale))
|
||||
|
||||
# Expected to be numbers, read with int():
|
||||
__asint = ("currencyDigits", "currencyRounding")
|
||||
# Convert day-name to Qt day-of-week number:
|
||||
__asdow = ("firstDayOfWeek", "weekendStart", "weekendEnd")
|
||||
# Convert from CLDR format-strings to QDateTimeParser ones:
|
||||
__asfmt = ("longDateFormat", "shortDateFormat", "longTimeFormat", "shortTimeFormat")
|
||||
# Just use the raw text:
|
||||
__astxt = ("language", "languageEndonym", "script", "territory", "territoryEndonym",
|
||||
"decimal", "group", "zero",
|
||||
"list", "percent", "minus", "plus", "exp",
|
||||
"quotationStart", "quotationEnd",
|
||||
"alternateQuotationStart", "alternateQuotationEnd",
|
||||
"listPatternPartStart", "listPatternPartMiddle",
|
||||
"listPatternPartEnd", "listPatternPartTwo", "am", "pm",
|
||||
'byte_unit', 'byte_si_quantified', 'byte_iec_quantified',
|
||||
"currencyIsoCode", "currencySymbol", "currencyDisplayName",
|
||||
"currencyFormat", "currencyNegativeFormat")
|
||||
|
||||
# Day-of-Week numbering used by Qt:
|
||||
__qDoW = {"mon": 1, "tue": 2, "wed": 3, "thu": 4, "fri": 5, "sat": 6, "sun": 7}
|
||||
|
||||
@classmethod
|
||||
def fromXmlData(cls, lookup, calendars=('gregorian',)):
|
||||
"""Constructor from the contents of XML elements.
|
||||
|
||||
Single parameter, lookup, is called with the names of XML
|
||||
elements that should contain the relevant data, within a CLDR
|
||||
locale element (within a localeList element); these names are
|
||||
used for the attributes of the object constructed. Attribute
|
||||
values are obtained by suitably digesting the returned element
|
||||
texts.\n"""
|
||||
data = {}
|
||||
for k in cls.__asint:
|
||||
data[k] = int(lookup(k))
|
||||
|
||||
for k in cls.__asdow:
|
||||
data[k] = cls.__qDoW[lookup(k)]
|
||||
|
||||
for k in cls.__asfmt:
|
||||
data[k] = convertFormat(lookup(k))
|
||||
|
||||
for k in cls.__astxt + tuple(cls.propsMonthDay('days')):
|
||||
data['listDelim' if k == 'list' else k] = lookup(k)
|
||||
|
||||
for k in cls.propsMonthDay('months'):
|
||||
data[k] = dict((cal, lookup('_'.join((k, cal)))) for cal in calendars)
|
||||
|
||||
grouping = lookup('groupSizes').split(';')
|
||||
data.update(groupLeast = int(grouping[0]),
|
||||
groupHigher = int(grouping[1]),
|
||||
groupTop = int(grouping[2]))
|
||||
|
||||
return cls(data)
|
||||
|
||||
def toXml(self, write, calendars=('gregorian',)):
|
||||
"""Writes its data as QLocale XML.
|
||||
|
||||
First argument, write, is a callable taking the name and
|
||||
content of an XML element; it is expected to be the inTag
|
||||
bound method of a QLocaleXmlWriter instance.
|
||||
|
||||
Optional second argument is a list of calendar names, in the
|
||||
form used by CLDR; its default is ('gregorian',).
|
||||
"""
|
||||
get = lambda k: getattr(self, k)
|
||||
for key in ('language', 'script', 'territory'):
|
||||
write(key, get(key))
|
||||
write(f'{key}code', get(f'{key}_code'))
|
||||
|
||||
for key in ('decimal', 'group', 'zero', 'list',
|
||||
'percent', 'minus', 'plus', 'exp'):
|
||||
write(key, get(key))
|
||||
|
||||
for key in ('languageEndonym', 'territoryEndonym',
|
||||
'quotationStart', 'quotationEnd',
|
||||
'alternateQuotationStart', 'alternateQuotationEnd',
|
||||
'listPatternPartStart', 'listPatternPartMiddle',
|
||||
'listPatternPartEnd', 'listPatternPartTwo',
|
||||
'byte_unit', 'byte_si_quantified', 'byte_iec_quantified',
|
||||
'am', 'pm', 'firstDayOfWeek',
|
||||
'weekendStart', 'weekendEnd',
|
||||
'longDateFormat', 'shortDateFormat',
|
||||
'longTimeFormat', 'shortTimeFormat',
|
||||
'currencyIsoCode', 'currencySymbol', 'currencyDisplayName',
|
||||
'currencyFormat', 'currencyNegativeFormat'
|
||||
) + tuple(self.propsMonthDay('days')) + tuple(
|
||||
'_'.join((k, cal))
|
||||
for k in self.propsMonthDay('months')
|
||||
for cal in calendars):
|
||||
write(key, escape(get(key)))
|
||||
|
||||
write('groupSizes', ';'.join(str(x) for x in get('groupSizes')))
|
||||
for key in ('currencyDigits', 'currencyRounding'):
|
||||
write(key, get(key))
|
||||
|
||||
# Tools used by __monthNames:
|
||||
def fullName(i, name): return name
|
||||
def firstThree(i, name): return name[:3]
|
||||
def initial(i, name): return name[:1]
|
||||
def number(i, name): return str(i + 1)
|
||||
def islamicShort(i, name):
|
||||
if not name: return name
|
||||
if name == 'Shawwal': return 'Shaw.'
|
||||
words = name.split()
|
||||
if words[0].startswith('Dhu'):
|
||||
words[0] = words[0][:7] + '.'
|
||||
elif len(words[0]) > 3:
|
||||
words[0] = words[0][:3] + '.'
|
||||
return ' '.join(words)
|
||||
@staticmethod
|
||||
def __monthNames(calendars,
|
||||
known={ # Map calendar to (names, extractors...):
|
||||
# TODO: do we even need these ? CLDR's root.xml seems to
|
||||
# have them, complete with yeartype="leap" handling for
|
||||
# Hebrew's extra.
|
||||
'gregorian': (('January', 'February', 'March', 'April', 'May', 'June', 'July',
|
||||
'August', 'September', 'October', 'November', 'December'),
|
||||
# Extractor pairs, (plain, standalone)
|
||||
(fullName, fullName), # long
|
||||
(firstThree, firstThree), # short
|
||||
(number, initial)), # narrow
|
||||
'persian': (('Farvardin', 'Ordibehesht', 'Khordad', 'Tir', 'Mordad',
|
||||
'Shahrivar', 'Mehr', 'Aban', 'Azar', 'Dey', 'Bahman', 'Esfand'),
|
||||
(fullName, fullName),
|
||||
(firstThree, firstThree),
|
||||
(number, initial)),
|
||||
'islamic': (('Muharram', 'Safar', 'Rabiʻ I', 'Rabiʻ II', 'Jumada I',
|
||||
'Jumada II', 'Rajab', 'Shaʻban', 'Ramadan', 'Shawwal',
|
||||
'Dhuʻl-Qiʻdah', 'Dhuʻl-Hijjah'),
|
||||
(fullName, fullName),
|
||||
(islamicShort, islamicShort),
|
||||
(number, number)),
|
||||
'hebrew': (('Tishri', 'Heshvan', 'Kislev', 'Tevet', 'Shevat', 'Adar I',
|
||||
'Adar', 'Nisan', 'Iyar', 'Sivan', 'Tamuz', 'Av'),
|
||||
(fullName, fullName),
|
||||
(fullName, fullName),
|
||||
(number, number)),
|
||||
},
|
||||
sizes=('long', 'short', 'narrow')):
|
||||
for cal in calendars:
|
||||
try:
|
||||
data = known[cal]
|
||||
except KeyError as e: # Need to add an entry to known, above.
|
||||
e.args += ('Unsupported calendar:', cal)
|
||||
raise
|
||||
names, get = data[0], data[1:]
|
||||
for n, size in enumerate(sizes):
|
||||
yield ('_'.join((camelCase((size, 'months')), cal)),
|
||||
';'.join(get[n][0](i, x) for i, x in enumerate(names)))
|
||||
yield ('_'.join((camelCase(('standalone', size, 'months')), cal)),
|
||||
';'.join(get[n][1](i, x) for i, x in enumerate(names)))
|
||||
del fullName, firstThree, initial, number, islamicShort
|
||||
|
||||
@classmethod
|
||||
def C(cls, calendars=('gregorian',),
|
||||
days = ('Sunday', 'Monday', 'Tuesday', 'Wednesday',
|
||||
'Thursday', 'Friday', 'Saturday'),
|
||||
quantifiers=('k', 'M', 'G', 'T', 'P', 'E')):
|
||||
"""Returns an object representing the C locale."""
|
||||
return cls(cls.__monthNames(calendars),
|
||||
language='C', language_code='0', languageEndonym='',
|
||||
script='AnyScript', script_code='0',
|
||||
territory='AnyTerritory', territory_code='0', territoryEndonym='',
|
||||
groupSizes=(3, 3, 1),
|
||||
decimal='.', group=',', list=';', percent='%',
|
||||
zero='0', minus='-', plus='+', exp='e',
|
||||
quotationStart='"', quotationEnd='"',
|
||||
alternateQuotationStart='\'', alternateQuotationEnd='\'',
|
||||
listPatternPartStart='%1, %2',
|
||||
listPatternPartMiddle='%1, %2',
|
||||
listPatternPartEnd='%1, %2',
|
||||
listPatternPartTwo='%1, %2',
|
||||
byte_unit='bytes',
|
||||
byte_si_quantified=';'.join(q + 'B' for q in quantifiers),
|
||||
byte_iec_quantified=';'.join(q.upper() + 'iB' for q in quantifiers),
|
||||
am='AM', pm='PM', firstDayOfWeek='mon',
|
||||
weekendStart='sat', weekendEnd='sun',
|
||||
longDateFormat='EEEE, d MMMM yyyy', shortDateFormat='d MMM yyyy',
|
||||
longTimeFormat='HH:mm:ss z', shortTimeFormat='HH:mm:ss',
|
||||
longDays=';'.join(days),
|
||||
shortDays=';'.join(d[:3] for d in days),
|
||||
narrowDays='7;1;2;3;4;5;6',
|
||||
standaloneLongDays=';'.join(days),
|
||||
standaloneShortDays=';'.join(d[:3] for d in days),
|
||||
standaloneNarrowDays=';'.join(d[:1] for d in days),
|
||||
currencyIsoCode='', currencySymbol='',
|
||||
currencyDisplayName='',
|
||||
currencyDigits=2, currencyRounding=1,
|
||||
currencyFormat='%1%2', currencyNegativeFormat='')
|
119
util/locale_database/qlocalexml.rnc
Normal file
119
util/locale_database/qlocalexml.rnc
Normal file
@ -0,0 +1,119 @@
|
||||
# Copyright (C) 2021 The Qt Company Ltd.
|
||||
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
|
||||
# This is RelaxNG compact schema for qLocaleXML interemediate locale data
|
||||
# representation format produced and consumed by the qlocalexml module.
|
||||
#
|
||||
# To validate an xml file run:
|
||||
#
|
||||
# jing -c qlocalexml.rnc <your-file.xml>
|
||||
#
|
||||
# You can download jing from https://relaxng.org/jclark/jing.html if your
|
||||
# package manager lacks the jing package.
|
||||
|
||||
start = element localeDatabase {
|
||||
element version { text },
|
||||
element languageList { Language+ },
|
||||
element scriptList { Script+ },
|
||||
element territoryList { Territory+ },
|
||||
element likelySubtags { LikelySubtag+ },
|
||||
element localeList { Locale+ }
|
||||
}
|
||||
|
||||
Language = element language { TagDescriptor }
|
||||
Script = element script { TagDescriptor }
|
||||
Territory = element territory { TagDescriptor }
|
||||
TagDescriptor = (
|
||||
element name { text },
|
||||
element id { xsd:nonNegativeInteger },
|
||||
element code { text }
|
||||
)
|
||||
|
||||
LikelySubtag = element likelySubtag {
|
||||
element from { LocaleTriplet },
|
||||
element to { LocaleTriplet }
|
||||
}
|
||||
|
||||
LocaleTriplet = (
|
||||
element language { text },
|
||||
element script { text },
|
||||
element territory { text }
|
||||
)
|
||||
|
||||
WeekDay = ("sun" | "mon" | "tue" | "wed" | "thu" | "fri" | "sat")
|
||||
Digit = xsd:string { pattern = "\d" }
|
||||
Punctuation = xsd:string { pattern = "\p{P}" }
|
||||
GroupSizes = xsd:string { pattern = "\d;\d;\d" }
|
||||
|
||||
Locale = element locale {
|
||||
element language { text },
|
||||
element languagecode { text },
|
||||
element script { text },
|
||||
element scriptcode { text },
|
||||
element territory { text },
|
||||
element territorycode { text },
|
||||
element decimal { Punctuation },
|
||||
element group { text },
|
||||
element zero { Digit },
|
||||
element list { Punctuation },
|
||||
element percent { text },
|
||||
element minus { text },
|
||||
element plus { text },
|
||||
element exp { text },
|
||||
element languageEndonym { text },
|
||||
element territoryEndonym { text },
|
||||
element quotationStart { Punctuation },
|
||||
element quotationEnd { Punctuation },
|
||||
element alternateQuotationStart { Punctuation },
|
||||
element alternateQuotationEnd { Punctuation },
|
||||
element listPatternPartStart { text },
|
||||
element listPatternPartMiddle { text },
|
||||
element listPatternPartEnd { text },
|
||||
element listPatternPartTwo { text },
|
||||
element byte_unit { text },
|
||||
element byte_si_quantified { text },
|
||||
element byte_iec_quantified { text },
|
||||
element am { text },
|
||||
element pm { text },
|
||||
element firstDayOfWeek { text },
|
||||
element weekendStart { WeekDay },
|
||||
element weekendEnd { WeekDay },
|
||||
element longDateFormat { text },
|
||||
element shortDateFormat { text },
|
||||
element longTimeFormat { text },
|
||||
element shortTimeFormat { text },
|
||||
element currencyIsoCode { text },
|
||||
element currencySymbol { text },
|
||||
element currencyDisplayName { text },
|
||||
element currencyFormat { text },
|
||||
element currencyNegativeFormat { text },
|
||||
element longDays { text },
|
||||
element standaloneLongDays { text },
|
||||
element shortDays { text },
|
||||
element standaloneShortDays { text },
|
||||
element narrowDays { text },
|
||||
element standaloneNarrowDays { text },
|
||||
|
||||
# Some of these entries may be absent depending on command line arguments
|
||||
element longMonths_gregorian { text }?,
|
||||
element longMonths_persian { text }?,
|
||||
element longMonths_islamic { text }?,
|
||||
element standaloneLongMonths_gregorian { text }?,
|
||||
element standaloneLongMonths_persian { text }?,
|
||||
element standaloneLongMonths_islamic { text }?,
|
||||
element shortMonths_gregorian { text }?,
|
||||
element shortMonths_persian { text }?,
|
||||
element shortMonths_islamic { text }?,
|
||||
element standaloneShortMonths_gregorian { text }?,
|
||||
element standaloneShortMonths_persian { text }?,
|
||||
element standaloneShortMonths_islamic { text }?,
|
||||
element narrowMonths_gregorian { text }?,
|
||||
element narrowMonths_persian { text }?,
|
||||
element narrowMonths_islamic { text }?,
|
||||
element standaloneNarrowMonths_gregorian { text }?,
|
||||
element standaloneNarrowMonths_persian { text }?,
|
||||
element standaloneNarrowMonths_islamic { text }?,
|
||||
|
||||
element groupSizes { GroupSizes },
|
||||
element currencyDigits { xsd:nonNegativeInteger },
|
||||
element currencyRounding { xsd:nonNegativeInteger }
|
||||
}
|
618
util/locale_database/qlocalexml2cpp.py
Normal file
618
util/locale_database/qlocalexml2cpp.py
Normal file
@ -0,0 +1,618 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (C) 2021 The Qt Company Ltd.
|
||||
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
|
||||
"""Script to generate C++ code from CLDR data in QLocaleXML form
|
||||
|
||||
See ``cldr2qlocalexml.py`` for how to generate the QLocaleXML data itself.
|
||||
Pass the output file from that as first parameter to this script; pass the ISO
|
||||
639-3 data file as second parameter. You can optionally pass the root of the
|
||||
qtbase check-out as third parameter; it defaults to the root of the qtbase
|
||||
check-out containing this script.
|
||||
|
||||
The ISO 639-3 data file can be downloaded from the SIL website:
|
||||
|
||||
https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3.tab
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from qlocalexml import QLocaleXmlReader
|
||||
from localetools import unicode2hex, wrap_list, Error, Transcriber, SourceFileEditor, qtbase_root
|
||||
from iso639_3 import LanguageCodeData
|
||||
|
||||
class LocaleKeySorter:
|
||||
"""Sort-ordering representation of a locale key.
|
||||
|
||||
This is for passing to a sorting algorithm as key-function, that
|
||||
it applies to each entry in the list to decide which belong
|
||||
earlier. It adds an entry to the (language, script, territory)
|
||||
triple, just before script, that sorts earlier if the territory is
|
||||
the default for the given language and script, later otherwise.
|
||||
"""
|
||||
|
||||
# TODO: study the relationship between this and CLDR's likely
|
||||
# sub-tags algorithm. Work out how locale sort-order impacts
|
||||
# QLocale's likely sub-tag matching algorithms. Make sure this is
|
||||
# sorting in an order compatible with those algorithms.
|
||||
|
||||
def __init__(self, defaults):
|
||||
self.map = dict(defaults)
|
||||
def foreign(self, key):
|
||||
default = self.map.get(key[:2])
|
||||
return default is None or default != key[2]
|
||||
def __call__(self, key):
|
||||
# TODO: should we compare territory before or after script ?
|
||||
return (key[0], self.foreign(key)) + key[1:]
|
||||
|
||||
class StringDataToken:
|
||||
def __init__(self, index, length, bits):
|
||||
if index > 0xffff:
|
||||
raise ValueError(f'Start-index ({index}) exceeds the uint16 range!')
|
||||
if length >= (1 << bits):
|
||||
raise ValueError(f'Data size ({length}) exceeds the {bits}-bit range!')
|
||||
|
||||
self.index = index
|
||||
self.length = length
|
||||
|
||||
class StringData:
|
||||
def __init__(self, name):
|
||||
self.data = []
|
||||
self.hash = {}
|
||||
self.name = name
|
||||
self.text = '' # Used in quick-search for matches in data
|
||||
|
||||
def append(self, s, bits = 8):
|
||||
try:
|
||||
token = self.hash[s]
|
||||
except KeyError:
|
||||
token = self.__store(s, bits)
|
||||
self.hash[s] = token
|
||||
return token
|
||||
|
||||
def __store(self, s, bits):
|
||||
"""Add string s to known data.
|
||||
|
||||
Seeks to avoid duplication, where possible.
|
||||
For example, short-forms may be prefixes of long-forms.
|
||||
"""
|
||||
if not s:
|
||||
return StringDataToken(0, 0, bits)
|
||||
ucs2 = unicode2hex(s)
|
||||
try:
|
||||
index = self.text.index(s) - 1
|
||||
matched = 0
|
||||
while matched < len(ucs2):
|
||||
index, matched = self.data.index(ucs2[0], index + 1), 1
|
||||
if index + len(ucs2) >= len(self.data):
|
||||
raise ValueError # not found after all !
|
||||
while matched < len(ucs2) and self.data[index + matched] == ucs2[matched]:
|
||||
matched += 1
|
||||
except ValueError:
|
||||
index = len(self.data)
|
||||
self.data += ucs2
|
||||
self.text += s
|
||||
|
||||
assert index >= 0
|
||||
try:
|
||||
return StringDataToken(index, len(ucs2), bits)
|
||||
except ValueError as e:
|
||||
e.args += (self.name, s)
|
||||
raise
|
||||
|
||||
def write(self, fd):
|
||||
if len(self.data) > 0xffff:
|
||||
raise ValueError(f'Data is too big ({len(self.data)}) for quint16 index to its end!',
|
||||
self.name)
|
||||
fd.write(f"\nstatic constexpr char16_t {self.name}[] = {{\n")
|
||||
fd.write(wrap_list(self.data))
|
||||
fd.write("\n};\n")
|
||||
|
||||
def currencyIsoCodeData(s):
|
||||
if s:
|
||||
return '{' + ",".join(str(ord(x)) for x in s) + '}'
|
||||
return "{0,0,0}"
|
||||
|
||||
class LocaleSourceEditor (SourceFileEditor):
|
||||
def __init__(self, path: Path, temp: Path, version: str):
|
||||
super().__init__(path, temp)
|
||||
self.version = version
|
||||
|
||||
def onEnter(self) -> None:
|
||||
super().onEnter()
|
||||
self.writer.write(f"""
|
||||
/*
|
||||
This part of the file was generated on {datetime.date.today()} from the
|
||||
Common Locale Data Repository v{self.version}
|
||||
|
||||
http://www.unicode.org/cldr/
|
||||
|
||||
Do not edit this section: instead regenerate it using
|
||||
cldr2qlocalexml.py and qlocalexml2cpp.py on updated (or
|
||||
edited) CLDR data; see qtbase/util/locale_database/.
|
||||
*/
|
||||
|
||||
""")
|
||||
|
||||
class LocaleDataWriter (LocaleSourceEditor):
|
||||
def likelySubtags(self, likely):
|
||||
# First sort likely, so that we can use binary search in C++
|
||||
# code. Although the entries are (lang, script, region), sort
|
||||
# as (lang, region, script) and sort 0 after all non-zero
|
||||
# values. This ensures that, when several mappings partially
|
||||
# match a requested locale, the one we should prefer to use
|
||||
# appears first.
|
||||
huge = 0x10000 # > any ushort; all tag values are ushort
|
||||
def keyLikely(entry):
|
||||
have = entry[1] # Numeric id triple
|
||||
return have[0] or huge, have[2] or huge, have[1] or huge # language, region, script
|
||||
likely = sorted(likely, key=keyLikely)
|
||||
|
||||
i = 0
|
||||
self.writer.write('static constexpr QLocaleId likely_subtags[] = {\n')
|
||||
for had, have, got, give in likely:
|
||||
i += 1
|
||||
self.writer.write(' {{ {:3d}, {:3d}, {:3d} }}'.format(*have))
|
||||
self.writer.write(', {{ {:3d}, {:3d}, {:3d} }}'.format(*give))
|
||||
self.writer.write(' ' if i == len(likely) else ',')
|
||||
self.writer.write(f' // {had} -> {got}\n')
|
||||
self.writer.write('};\n\n')
|
||||
|
||||
def localeIndex(self, indices):
|
||||
self.writer.write('static constexpr quint16 locale_index[] = {\n')
|
||||
for index, name in indices:
|
||||
self.writer.write(f'{index:6d}, // {name}\n')
|
||||
self.writer.write(' 0 // trailing 0\n')
|
||||
self.writer.write('};\n\n')
|
||||
|
||||
def localeData(self, locales, names):
|
||||
list_pattern_part_data = StringData('list_pattern_part_data')
|
||||
single_character_data = StringData('single_character_data')
|
||||
date_format_data = StringData('date_format_data')
|
||||
time_format_data = StringData('time_format_data')
|
||||
days_data = StringData('days_data')
|
||||
am_data = StringData('am_data')
|
||||
pm_data = StringData('pm_data')
|
||||
byte_unit_data = StringData('byte_unit_data')
|
||||
currency_symbol_data = StringData('currency_symbol_data')
|
||||
currency_display_name_data = StringData('currency_display_name_data')
|
||||
currency_format_data = StringData('currency_format_data')
|
||||
endonyms_data = StringData('endonyms_data')
|
||||
|
||||
# Locale data
|
||||
self.writer.write('static constexpr QLocaleData locale_data[] = {\n')
|
||||
# Table headings: keep each label centred in its field, matching line_format:
|
||||
self.writer.write(' // '
|
||||
# Width 6 + comma
|
||||
' lang ' # IDs
|
||||
'script '
|
||||
' terr '
|
||||
|
||||
# Range entries (all start-indices, then all sizes)
|
||||
# Width 5 + comma
|
||||
'lStrt ' # List pattern
|
||||
'lpMid '
|
||||
'lpEnd '
|
||||
'lPair '
|
||||
'lDelm ' # List delimiter
|
||||
# Representing numbers
|
||||
' dec '
|
||||
'group '
|
||||
'prcnt '
|
||||
' zero '
|
||||
'minus '
|
||||
'plus '
|
||||
' exp '
|
||||
# Quotation marks
|
||||
'qtOpn '
|
||||
'qtEnd '
|
||||
'altQO '
|
||||
'altQE '
|
||||
'lDFmt ' # Date format
|
||||
'sDFmt '
|
||||
'lTFmt ' # Time format
|
||||
'sTFmt '
|
||||
'slDay ' # Day names
|
||||
'lDays '
|
||||
'ssDys '
|
||||
'sDays '
|
||||
'snDay '
|
||||
'nDays '
|
||||
' am ' # am/pm indicators
|
||||
' pm '
|
||||
' byte '
|
||||
'siQnt '
|
||||
'iecQn '
|
||||
'crSym ' # Currency formatting
|
||||
'crDsp '
|
||||
'crFmt '
|
||||
'crFNg '
|
||||
'ntLng ' # Name of language in itself, and of territory
|
||||
'ntTer '
|
||||
# Width 3 + comma for each size; no header
|
||||
+ ' ' * 37 +
|
||||
|
||||
# Strays (char array, bit-fields):
|
||||
# Width 10 + 2 spaces + comma
|
||||
' currISO '
|
||||
# Width 6 + comma
|
||||
'curDgt ' # Currency digits
|
||||
'curRnd ' # Currencty rounding (unused: QTBUG-81343)
|
||||
'dow1st ' # First day of week
|
||||
' wknd+ ' # Week-end start/end days
|
||||
' wknd- '
|
||||
'grpTop '
|
||||
'grpMid '
|
||||
'grpEnd'
|
||||
# No trailing space on last entry (be sure to
|
||||
# pad before adding anything after it).
|
||||
'\n')
|
||||
|
||||
formatLine = ''.join((
|
||||
' {{ ',
|
||||
# Locale-identifier
|
||||
'{:6d},' * 3,
|
||||
# List patterns, date/time formats, day names, am/pm
|
||||
# SI/IEC byte-unit abbreviations
|
||||
# Currency and endonyms
|
||||
# Range starts
|
||||
'{:5d},' * 37,
|
||||
# Range sizes
|
||||
'{:3d},' * 37,
|
||||
|
||||
# Currency ISO code
|
||||
' {:>10s}, ',
|
||||
# Currency formatting
|
||||
'{:6d},{:6d}',
|
||||
# Day of week and week-end
|
||||
',{:6d}' * 3,
|
||||
# Number group sizes
|
||||
',{:6d}' * 3,
|
||||
' }}')).format
|
||||
for key in names:
|
||||
locale = locales[key]
|
||||
# Sequence of StringDataToken:
|
||||
ranges = (tuple(list_pattern_part_data.append(p) for p in # 5 entries:
|
||||
(locale.listPatternPartStart, locale.listPatternPartMiddle,
|
||||
locale.listPatternPartEnd, locale.listPatternPartTwo,
|
||||
locale.listDelim)) +
|
||||
tuple(single_character_data.append(p) for p in # 11 entries
|
||||
(locale.decimal, locale.group, locale.percent, locale.zero,
|
||||
locale.minus, locale.plus, locale.exp,
|
||||
locale.quotationStart, locale.quotationEnd,
|
||||
locale.alternateQuotationStart, locale.alternateQuotationEnd)) +
|
||||
tuple (date_format_data.append(f) for f in # 2 entries:
|
||||
(locale.longDateFormat, locale.shortDateFormat)) +
|
||||
tuple(time_format_data.append(f) for f in # 2 entries:
|
||||
(locale.longTimeFormat, locale.shortTimeFormat)) +
|
||||
tuple(days_data.append(d) for d in # 6 entries:
|
||||
(locale.standaloneLongDays, locale.longDays,
|
||||
locale.standaloneShortDays, locale.shortDays,
|
||||
locale.standaloneNarrowDays, locale.narrowDays)) +
|
||||
(am_data.append(locale.am), pm_data.append(locale.pm)) + # 2 entries
|
||||
tuple(byte_unit_data.append(b) for b in # 3 entries:
|
||||
(locale.byte_unit,
|
||||
locale.byte_si_quantified,
|
||||
locale.byte_iec_quantified)) +
|
||||
(currency_symbol_data.append(locale.currencySymbol),
|
||||
currency_display_name_data.append(locale.currencyDisplayName),
|
||||
currency_format_data.append(locale.currencyFormat),
|
||||
currency_format_data.append(locale.currencyNegativeFormat),
|
||||
endonyms_data.append(locale.languageEndonym),
|
||||
endonyms_data.append(locale.territoryEndonym)) # 6 entries
|
||||
) # Total: 37 entries
|
||||
assert len(ranges) == 37
|
||||
|
||||
self.writer.write(formatLine(*(
|
||||
key +
|
||||
tuple(r.index for r in ranges) +
|
||||
tuple(r.length for r in ranges) +
|
||||
(currencyIsoCodeData(locale.currencyIsoCode),
|
||||
locale.currencyDigits,
|
||||
locale.currencyRounding, # unused (QTBUG-81343)
|
||||
locale.firstDayOfWeek, locale.weekendStart, locale.weekendEnd,
|
||||
locale.groupTop, locale.groupHigher, locale.groupLeast) ))
|
||||
+ f', // {locale.language}/{locale.script}/{locale.territory}\n')
|
||||
self.writer.write(formatLine(*( # All zeros, matching the format:
|
||||
(0,) * 3 + (0,) * 37 * 2
|
||||
+ (currencyIsoCodeData(0),)
|
||||
+ (0,) * 8 ))
|
||||
+ ' // trailing zeros\n')
|
||||
self.writer.write('};\n')
|
||||
|
||||
# StringData tables:
|
||||
for data in (list_pattern_part_data, single_character_data,
|
||||
date_format_data, time_format_data, days_data,
|
||||
byte_unit_data, am_data, pm_data, currency_symbol_data,
|
||||
currency_display_name_data, currency_format_data,
|
||||
endonyms_data):
|
||||
data.write(self.writer)
|
||||
|
||||
@staticmethod
|
||||
def __writeNameData(out, book, form):
|
||||
out(f'static constexpr char {form}_name_list[] =\n')
|
||||
out('"Default\\0"\n')
|
||||
for key, value in book.items():
|
||||
if key == 0:
|
||||
continue
|
||||
out(f'"{value[0]}\\0"\n')
|
||||
out(';\n\n')
|
||||
|
||||
out(f'static constexpr quint16 {form}_name_index[] = {{\n')
|
||||
out(f' 0, // Any{form.capitalize()}\n')
|
||||
index = 8
|
||||
for key, value in book.items():
|
||||
if key == 0:
|
||||
continue
|
||||
name = value[0]
|
||||
out(f'{index:6d}, // {name}\n')
|
||||
index += len(name) + 1
|
||||
out('};\n\n')
|
||||
|
||||
@staticmethod
|
||||
def __writeCodeList(out, book, form, width):
|
||||
out(f'static constexpr unsigned char {form}_code_list[] =\n')
|
||||
for key, value in book.items():
|
||||
code = value[1]
|
||||
code += r'\0' * max(width - len(code), 0)
|
||||
out(f'"{code}" // {value[0]}\n')
|
||||
out(';\n\n')
|
||||
|
||||
def languageNames(self, languages):
|
||||
self.__writeNameData(self.writer.write, languages, 'language')
|
||||
|
||||
def scriptNames(self, scripts):
|
||||
self.__writeNameData(self.writer.write, scripts, 'script')
|
||||
|
||||
def territoryNames(self, territories):
|
||||
self.__writeNameData(self.writer.write, territories, 'territory')
|
||||
|
||||
# TODO: unify these next three into the previous three; kept
|
||||
# separate for now to verify we're not changing data.
|
||||
|
||||
def languageCodes(self, languages, code_data: LanguageCodeData):
|
||||
out = self.writer.write
|
||||
|
||||
out(f'constexpr std::array<LanguageCodeEntry, {len(languages)}> languageCodeList {{\n')
|
||||
|
||||
def q(val: Optional[str], size: int) -> str:
|
||||
"""Quote the value and adjust the result for tabular view."""
|
||||
chars = []
|
||||
if val is not None:
|
||||
for c in val:
|
||||
chars.append(f"'{c}'")
|
||||
s = ', '.join(chars)
|
||||
s = f'{{{s}}}'
|
||||
else:
|
||||
s = ''
|
||||
if size == 0:
|
||||
return f'{{{s}}}'
|
||||
else:
|
||||
return f'{{{s}}},'.ljust(size * 5 + 4)
|
||||
|
||||
for key, value in languages.items():
|
||||
code = value[1]
|
||||
if key < 2:
|
||||
result = code_data.query('und')
|
||||
else:
|
||||
result = code_data.query(code)
|
||||
assert code == result.id()
|
||||
assert result is not None
|
||||
|
||||
codeString = q(result.part1Code, 2)
|
||||
codeString += q(result.part2BCode, 3)
|
||||
codeString += q(result.part2TCode, 3)
|
||||
codeString += q(result.part3Code, 0)
|
||||
out(f' LanguageCodeEntry {{{codeString}}}, // {value[0]}\n')
|
||||
|
||||
out('};\n\n')
|
||||
|
||||
def scriptCodes(self, scripts):
|
||||
self.__writeCodeList(self.writer.write, scripts, 'script', 4)
|
||||
|
||||
def territoryCodes(self, territories): # TODO: unify with territoryNames()
|
||||
self.__writeCodeList(self.writer.write, territories, 'territory', 3)
|
||||
|
||||
class CalendarDataWriter (LocaleSourceEditor):
|
||||
formatCalendar = (
|
||||
' {{'
|
||||
+ ','.join(('{:6d}',) * 3 + ('{:5d}',) * 6 + ('{:3d}',) * 6)
|
||||
+ ' }},').format
|
||||
def write(self, calendar, locales, names):
|
||||
months_data = StringData('months_data')
|
||||
|
||||
self.writer.write('static constexpr QCalendarLocale locale_data[] = {\n')
|
||||
self.writer.write(
|
||||
' //'
|
||||
# IDs, width 7 (6 + comma)
|
||||
' lang '
|
||||
' script'
|
||||
' terr '
|
||||
# Month-name start-indices, width 6 (5 + comma)
|
||||
'sLong '
|
||||
' long '
|
||||
'sShrt '
|
||||
'short '
|
||||
'sNarw '
|
||||
'narow '
|
||||
# No individual headers for the sizes.
|
||||
'Sizes...'
|
||||
'\n')
|
||||
for key in names:
|
||||
locale = locales[key]
|
||||
# Sequence of StringDataToken:
|
||||
try:
|
||||
# Twelve long month names can add up to more than 256 (e.g. kde_TZ: 264)
|
||||
ranges = (tuple(months_data.append(m[calendar], 16) for m in
|
||||
(locale.standaloneLongMonths, locale.longMonths)) +
|
||||
tuple(months_data.append(m[calendar]) for m in
|
||||
(locale.standaloneShortMonths, locale.shortMonths,
|
||||
locale.standaloneNarrowMonths, locale.narrowMonths)))
|
||||
except ValueError as e:
|
||||
e.args += (locale.language, locale.script, locale.territory)
|
||||
raise
|
||||
|
||||
self.writer.write(
|
||||
self.formatCalendar(*(
|
||||
key +
|
||||
tuple(r.index for r in ranges) +
|
||||
tuple(r.length for r in ranges) ))
|
||||
+ f'// {locale.language}/{locale.script}/{locale.territory}\n')
|
||||
self.writer.write(self.formatCalendar(*( (0,) * (3 + 6 * 2) ))
|
||||
+ '// trailing zeros\n')
|
||||
self.writer.write('};\n')
|
||||
months_data.write(self.writer)
|
||||
|
||||
class LocaleHeaderWriter (SourceFileEditor):
|
||||
def __init__(self, path, temp, dupes):
|
||||
super().__init__(path, temp)
|
||||
self.__dupes = dupes
|
||||
|
||||
def languages(self, languages):
|
||||
self.__enum('Language', languages, self.__language)
|
||||
self.writer.write('\n')
|
||||
|
||||
def territories(self, territories):
|
||||
self.writer.write(" // ### Qt 7: Rename to Territory\n")
|
||||
self.__enum('Country', territories, self.__territory, 'Territory')
|
||||
|
||||
def scripts(self, scripts):
|
||||
self.__enum('Script', scripts, self.__script)
|
||||
self.writer.write('\n')
|
||||
|
||||
# Implementation details
|
||||
from enumdata import (language_aliases as __language,
|
||||
territory_aliases as __territory,
|
||||
script_aliases as __script)
|
||||
|
||||
def __enum(self, name, book, alias, suffix = None):
|
||||
assert book
|
||||
|
||||
if suffix is None:
|
||||
suffix = name
|
||||
|
||||
out, dupes = self.writer.write, self.__dupes
|
||||
out(f' enum {name} : ushort {{\n')
|
||||
for key, value in book.items():
|
||||
member = value[0].replace('-', ' ')
|
||||
if name == 'Script':
|
||||
# Don't .capitalize() as some names are already camel-case (see enumdata.py):
|
||||
member = ''.join(word[0].upper() + word[1:] for word in member.split())
|
||||
if not member.endswith('Script'):
|
||||
member += 'Script'
|
||||
if member in dupes:
|
||||
raise Error(f'The script name "{member}" is messy')
|
||||
else:
|
||||
member = ''.join(member.split())
|
||||
member = member + suffix if member in dupes else member
|
||||
out(f' {member} = {key},\n')
|
||||
|
||||
out('\n '
|
||||
+ ',\n '.join(f'{k} = {v}' for k, v in sorted(alias.items()))
|
||||
+ f',\n\n Last{suffix} = {member}')
|
||||
|
||||
# for "LastCountry = LastTerritory"
|
||||
# ### Qt 7: Remove
|
||||
if suffix != name:
|
||||
out(f',\n Last{name} = Last{suffix}')
|
||||
|
||||
out('\n };\n')
|
||||
|
||||
|
||||
def main(out, err):
|
||||
calendars_map = {
|
||||
# CLDR name: Qt file name fragment
|
||||
'gregorian': 'roman',
|
||||
'persian': 'jalali',
|
||||
'islamic': 'hijri',
|
||||
# 'hebrew': 'hebrew'
|
||||
}
|
||||
all_calendars = list(calendars_map.keys())
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Generate C++ code from CLDR data in QLocaleXML form.',
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('input_file', help='input XML file name',
|
||||
metavar='input-file.xml')
|
||||
parser.add_argument('iso_path', help='path to the ISO 639-3 data file',
|
||||
metavar='iso-639-3.tab')
|
||||
parser.add_argument('qtbase_path', help='path to the root of the qtbase source tree',
|
||||
nargs='?', default=qtbase_root)
|
||||
parser.add_argument('--calendars', help='select calendars to emit data for',
|
||||
nargs='+', metavar='CALENDAR',
|
||||
choices=all_calendars, default=all_calendars)
|
||||
args = parser.parse_args()
|
||||
|
||||
qlocalexml = args.input_file
|
||||
qtsrcdir = Path(args.qtbase_path)
|
||||
calendars = {cal: calendars_map[cal] for cal in args.calendars}
|
||||
|
||||
if not (qtsrcdir.is_dir()
|
||||
and all(qtsrcdir.joinpath('src/corelib/text', leaf).is_file()
|
||||
for leaf in ('qlocale_data_p.h', 'qlocale.h', 'qlocale.qdoc'))):
|
||||
parser.error(f'Missing expected files under qtbase source root {qtsrcdir}')
|
||||
|
||||
reader = QLocaleXmlReader(qlocalexml)
|
||||
locale_map = dict(reader.loadLocaleMap(calendars, err.write))
|
||||
locale_keys = sorted(locale_map.keys(), key=LocaleKeySorter(reader.defaultMap()))
|
||||
|
||||
code_data = LanguageCodeData(args.iso_path)
|
||||
|
||||
try:
|
||||
with LocaleDataWriter(qtsrcdir.joinpath('src/corelib/text/qlocale_data_p.h'),
|
||||
qtsrcdir, reader.cldrVersion) as writer:
|
||||
writer.likelySubtags(reader.likelyMap())
|
||||
writer.localeIndex(reader.languageIndices(tuple(k[0] for k in locale_map)))
|
||||
writer.localeData(locale_map, locale_keys)
|
||||
writer.writer.write('\n')
|
||||
writer.languageNames(reader.languages)
|
||||
writer.scriptNames(reader.scripts)
|
||||
writer.territoryNames(reader.territories)
|
||||
# TODO: merge the next three into the previous three
|
||||
writer.languageCodes(reader.languages, code_data)
|
||||
writer.scriptCodes(reader.scripts)
|
||||
writer.territoryCodes(reader.territories)
|
||||
except Exception as e:
|
||||
err.write(f'\nError updating locale data: {e}\n')
|
||||
return 1
|
||||
|
||||
# Generate calendar data
|
||||
for calendar, stem in calendars.items():
|
||||
try:
|
||||
with CalendarDataWriter(
|
||||
qtsrcdir.joinpath(f'src/corelib/time/q{stem}calendar_data_p.h'),
|
||||
qtsrcdir, reader.cldrVersion) as writer:
|
||||
writer.write(calendar, locale_map, locale_keys)
|
||||
except Exception as e:
|
||||
err.write(f'\nError updating {calendar} locale data: {e}\n')
|
||||
|
||||
# qlocale.h
|
||||
try:
|
||||
with LocaleHeaderWriter(qtsrcdir.joinpath('src/corelib/text/qlocale.h'),
|
||||
qtsrcdir, reader.dupes) as writer:
|
||||
writer.languages(reader.languages)
|
||||
writer.scripts(reader.scripts)
|
||||
writer.territories(reader.territories)
|
||||
except Exception as e:
|
||||
err.write(f'\nError updating qlocale.h: {e}\n')
|
||||
|
||||
# qlocale.qdoc
|
||||
try:
|
||||
with Transcriber(qtsrcdir.joinpath('src/corelib/text/qlocale.qdoc'), qtsrcdir) as qdoc:
|
||||
DOCSTRING = " QLocale's data is based on Common Locale Data Repository "
|
||||
for line in qdoc.reader:
|
||||
if DOCSTRING in line:
|
||||
qdoc.writer.write(f'{DOCSTRING}v{reader.cldrVersion}.\n')
|
||||
else:
|
||||
qdoc.writer.write(line)
|
||||
except Exception as e:
|
||||
err.write(f'\nError updating qlocale.h: {e}\n')
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
sys.exit(main(sys.stdout, sys.stderr))
|
424
util/locale_database/testlocales/localemodel.cpp
Normal file
424
util/locale_database/testlocales/localemodel.cpp
Normal file
@ -0,0 +1,424 @@
|
||||
// Copyright (C) 2016 The Qt Company Ltd.
|
||||
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
|
||||
#include "localemodel.h"
|
||||
|
||||
#include <QLocale>
|
||||
#include <QDate>
|
||||
#include <qdebug.h>
|
||||
|
||||
static const int g_model_cols = 6;
|
||||
|
||||
struct LocaleListItem
|
||||
{
|
||||
int language;
|
||||
int territory;
|
||||
};
|
||||
|
||||
const LocaleListItem g_locale_list[] = {
|
||||
{ 1, 0 }, // C/AnyTerritory
|
||||
{ 3, 69 }, // Afan/Ethiopia
|
||||
{ 3, 111 }, // Afan/Kenya
|
||||
{ 4, 59 }, // Afar/Djibouti
|
||||
{ 4, 67 }, // Afar/Eritrea
|
||||
{ 4, 69 }, // Afar/Ethiopia
|
||||
{ 5, 195 }, // Afrikaans/SouthAfrica
|
||||
{ 5, 148 }, // Afrikaans/Namibia
|
||||
{ 6, 2 }, // Albanian/Albania
|
||||
{ 7, 69 }, // Amharic/Ethiopia
|
||||
{ 8, 186 }, // Arabic/SaudiArabia
|
||||
{ 8, 3 }, // Arabic/Algeria
|
||||
{ 8, 17 }, // Arabic/Bahrain
|
||||
{ 8, 64 }, // Arabic/Egypt
|
||||
{ 8, 103 }, // Arabic/Iraq
|
||||
{ 8, 109 }, // Arabic/Jordan
|
||||
{ 8, 115 }, // Arabic/Kuwait
|
||||
{ 8, 119 }, // Arabic/Lebanon
|
||||
{ 8, 122 }, // Arabic/LibyanArabJamahiriya
|
||||
{ 8, 145 }, // Arabic/Morocco
|
||||
{ 8, 162 }, // Arabic/Oman
|
||||
{ 8, 175 }, // Arabic/Qatar
|
||||
{ 8, 201 }, // Arabic/Sudan
|
||||
{ 8, 207 }, // Arabic/SyrianArabRepublic
|
||||
{ 8, 216 }, // Arabic/Tunisia
|
||||
{ 8, 223 }, // Arabic/UnitedArabEmirates
|
||||
{ 8, 237 }, // Arabic/Yemen
|
||||
{ 9, 11 }, // Armenian/Armenia
|
||||
{ 10, 100 }, // Assamese/India
|
||||
{ 12, 15 }, // Azerbaijani/Azerbaijan
|
||||
{ 14, 197 }, // Basque/Spain
|
||||
{ 15, 18 }, // Bengali/Bangladesh
|
||||
{ 15, 100 }, // Bengali/India
|
||||
{ 16, 25 }, // Bhutani/Bhutan
|
||||
{ 20, 33 }, // Bulgarian/Bulgaria
|
||||
{ 22, 20 }, // Byelorussian/Belarus
|
||||
{ 23, 36 }, // Cambodian/Cambodia
|
||||
{ 24, 197 }, // Catalan/Spain
|
||||
{ 25, 44 }, // Chinese/China
|
||||
{ 25, 97 }, // Chinese/HongKong
|
||||
{ 25, 126 }, // Chinese/Macau
|
||||
{ 25, 190 }, // Chinese/Singapore
|
||||
{ 25, 208 }, // Chinese/Taiwan
|
||||
{ 27, 54 }, // Croatian/Croatia
|
||||
{ 28, 57 }, // Czech/CzechRepublic
|
||||
{ 29, 58 }, // Danish/Denmark
|
||||
{ 30, 151 }, // Dutch/Netherlands
|
||||
{ 30, 21 }, // Dutch/Belgium
|
||||
{ 31, 225 }, // English/UnitedStates
|
||||
{ 31, 4 }, // English/AmericanSamoa
|
||||
{ 31, 13 }, // English/Australia
|
||||
{ 31, 21 }, // English/Belgium
|
||||
{ 31, 22 }, // English/Belize
|
||||
{ 31, 28 }, // English/Botswana
|
||||
{ 31, 38 }, // English/Canada
|
||||
{ 31, 89 }, // English/Guam
|
||||
{ 31, 97 }, // English/HongKong
|
||||
{ 31, 100 }, // English/India
|
||||
{ 31, 104 }, // English/Ireland
|
||||
{ 31, 107 }, // English/Jamaica
|
||||
{ 31, 133 }, // English/Malta
|
||||
{ 31, 134 }, // English/MarshallIslands
|
||||
{ 31, 148 }, // English/Namibia
|
||||
{ 31, 154 }, // English/NewZealand
|
||||
{ 31, 160 }, // English/NorthernMarianaIslands
|
||||
{ 31, 163 }, // English/Pakistan
|
||||
{ 31, 170 }, // English/Philippines
|
||||
{ 31, 190 }, // English/Singapore
|
||||
{ 31, 195 }, // English/SouthAfrica
|
||||
{ 31, 215 }, // English/TrinidadAndTobago
|
||||
{ 31, 224 }, // English/UnitedKingdom
|
||||
{ 31, 226 }, // English/UnitedStatesMinorOutlyingIslands
|
||||
{ 31, 234 }, // English/USVirginIslands
|
||||
{ 31, 240 }, // English/Zimbabwe
|
||||
{ 33, 68 }, // Estonian/Estonia
|
||||
{ 34, 71 }, // Faroese/FaroeIslands
|
||||
{ 36, 73 }, // Finnish/Finland
|
||||
{ 37, 74 }, // French/France
|
||||
{ 37, 21 }, // French/Belgium
|
||||
{ 37, 38 }, // French/Canada
|
||||
{ 37, 125 }, // French/Luxembourg
|
||||
{ 37, 142 }, // French/Monaco
|
||||
{ 37, 206 }, // French/Switzerland
|
||||
{ 40, 197 }, // Galician/Spain
|
||||
{ 41, 81 }, // Georgian/Georgia
|
||||
{ 42, 82 }, // German/Germany
|
||||
{ 42, 14 }, // German/Austria
|
||||
{ 42, 21 }, // German/Belgium
|
||||
{ 42, 123 }, // German/Liechtenstein
|
||||
{ 42, 125 }, // German/Luxembourg
|
||||
{ 42, 206 }, // German/Switzerland
|
||||
{ 43, 85 }, // Greek/Greece
|
||||
{ 43, 56 }, // Greek/Cyprus
|
||||
{ 44, 86 }, // Greenlandic/Greenland
|
||||
{ 46, 100 }, // Gujarati/India
|
||||
{ 47, 83 }, // Hausa/Ghana
|
||||
{ 47, 156 }, // Hausa/Niger
|
||||
{ 47, 157 }, // Hausa/Nigeria
|
||||
{ 48, 105 }, // Hebrew/Israel
|
||||
{ 49, 100 }, // Hindi/India
|
||||
{ 50, 98 }, // Hungarian/Hungary
|
||||
{ 51, 99 }, // Icelandic/Iceland
|
||||
{ 52, 101 }, // Indonesian/Indonesia
|
||||
{ 57, 104 }, // Irish/Ireland
|
||||
{ 58, 106 }, // Italian/Italy
|
||||
{ 58, 206 }, // Italian/Switzerland
|
||||
{ 59, 108 }, // Japanese/Japan
|
||||
{ 61, 100 }, // Kannada/India
|
||||
{ 63, 110 }, // Kazakh/Kazakhstan
|
||||
{ 64, 179 }, // Kinyarwanda/Rwanda
|
||||
{ 65, 116 }, // Kirghiz/Kyrgyzstan
|
||||
{ 66, 114 }, // Korean/RepublicOfKorea
|
||||
{ 67, 102 }, // Kurdish/Iran
|
||||
{ 67, 103 }, // Kurdish/Iraq
|
||||
{ 67, 207 }, // Kurdish/SyrianArabRepublic
|
||||
{ 67, 217 }, // Kurdish/Turkey
|
||||
{ 69, 117 }, // Laothian/Lao
|
||||
{ 71, 118 }, // Latvian/Latvia
|
||||
{ 72, 49 }, // Lingala/DemocraticRepublicOfCongo
|
||||
{ 72, 50 }, // Lingala/PeoplesRepublicOfCongo
|
||||
{ 73, 124 }, // Lithuanian/Lithuania
|
||||
{ 74, 127 }, // Macedonian/Macedonia
|
||||
{ 76, 130 }, // Malay/Malaysia
|
||||
{ 76, 32 }, // Malay/BruneiDarussalam
|
||||
{ 77, 100 }, // Malayalam/India
|
||||
{ 78, 133 }, // Maltese/Malta
|
||||
{ 80, 100 }, // Marathi/India
|
||||
{ 82, 143 }, // Mongolian/Mongolia
|
||||
{ 84, 150 }, // Nepali/Nepal
|
||||
{ 85, 161 }, // Norwegian/Norway
|
||||
{ 87, 100 }, // Oriya/India
|
||||
{ 88, 1 }, // Pashto/Afghanistan
|
||||
{ 89, 102 }, // Persian/Iran
|
||||
{ 89, 1 }, // Persian/Afghanistan
|
||||
{ 90, 172 }, // Polish/Poland
|
||||
{ 91, 173 }, // Portuguese/Portugal
|
||||
{ 91, 30 }, // Portuguese/Brazil
|
||||
{ 92, 100 }, // Punjabi/India
|
||||
{ 92, 163 }, // Punjabi/Pakistan
|
||||
{ 95, 177 }, // Romanian/Romania
|
||||
{ 96, 178 }, // Russian/RussianFederation
|
||||
{ 96, 222 }, // Russian/Ukraine
|
||||
{ 99, 100 }, // Sanskrit/India
|
||||
{ 100, 241 }, // Serbian/SerbiaAndMontenegro
|
||||
{ 100, 27 }, // Serbian/BosniaAndHerzegowina
|
||||
{ 100, 238 }, // Serbian/Yugoslavia
|
||||
{ 101, 241 }, // SerboCroatian/SerbiaAndMontenegro
|
||||
{ 101, 27 }, // SerboCroatian/BosniaAndHerzegowina
|
||||
{ 101, 238 }, // SerboCroatian/Yugoslavia
|
||||
{ 102, 195 }, // Sesotho/SouthAfrica
|
||||
{ 103, 195 }, // Setswana/SouthAfrica
|
||||
{ 107, 195 }, // Siswati/SouthAfrica
|
||||
{ 108, 191 }, // Slovak/Slovakia
|
||||
{ 109, 192 }, // Slovenian/Slovenia
|
||||
{ 110, 194 }, // Somali/Somalia
|
||||
{ 110, 59 }, // Somali/Djibouti
|
||||
{ 110, 69 }, // Somali/Ethiopia
|
||||
{ 110, 111 }, // Somali/Kenya
|
||||
{ 111, 197 }, // Spanish/Spain
|
||||
{ 111, 10 }, // Spanish/Argentina
|
||||
{ 111, 26 }, // Spanish/Bolivia
|
||||
{ 111, 43 }, // Spanish/Chile
|
||||
{ 111, 47 }, // Spanish/Colombia
|
||||
{ 111, 52 }, // Spanish/CostaRica
|
||||
{ 111, 61 }, // Spanish/DominicanRepublic
|
||||
{ 111, 63 }, // Spanish/Ecuador
|
||||
{ 111, 65 }, // Spanish/ElSalvador
|
||||
{ 111, 90 }, // Spanish/Guatemala
|
||||
{ 111, 96 }, // Spanish/Honduras
|
||||
{ 111, 139 }, // Spanish/Mexico
|
||||
{ 111, 155 }, // Spanish/Nicaragua
|
||||
{ 111, 166 }, // Spanish/Panama
|
||||
{ 111, 168 }, // Spanish/Paraguay
|
||||
{ 111, 169 }, // Spanish/Peru
|
||||
{ 111, 174 }, // Spanish/PuertoRico
|
||||
{ 111, 225 }, // Spanish/UnitedStates
|
||||
{ 111, 227 }, // Spanish/Uruguay
|
||||
{ 111, 231 }, // Spanish/Venezuela
|
||||
{ 113, 111 }, // Swahili/Kenya
|
||||
{ 113, 210 }, // Swahili/Tanzania
|
||||
{ 114, 205 }, // Swedish/Sweden
|
||||
{ 114, 73 }, // Swedish/Finland
|
||||
{ 116, 209 }, // Tajik/Tajikistan
|
||||
{ 117, 100 }, // Tamil/India
|
||||
{ 118, 178 }, // Tatar/RussianFederation
|
||||
{ 119, 100 }, // Telugu/India
|
||||
{ 120, 211 }, // Thai/Thailand
|
||||
{ 122, 67 }, // Tigrinya/Eritrea
|
||||
{ 122, 69 }, // Tigrinya/Ethiopia
|
||||
{ 124, 195 }, // Tsonga/SouthAfrica
|
||||
{ 125, 217 }, // Turkish/Turkey
|
||||
{ 129, 222 }, // Ukrainian/Ukraine
|
||||
{ 130, 100 }, // Urdu/India
|
||||
{ 130, 163 }, // Urdu/Pakistan
|
||||
{ 131, 228 }, // Uzbek/Uzbekistan
|
||||
{ 131, 1 }, // Uzbek/Afghanistan
|
||||
{ 132, 232 }, // Vietnamese/VietNam
|
||||
{ 134, 224 }, // Welsh/UnitedKingdom
|
||||
{ 136, 195 }, // Xhosa/SouthAfrica
|
||||
{ 138, 157 }, // Yoruba/Nigeria
|
||||
{ 140, 195 }, // Zulu/SouthAfrica
|
||||
{ 141, 161 }, // Nynorsk/Norway
|
||||
{ 142, 27 }, // Bosnian/BosniaAndHerzegowina
|
||||
{ 143, 131 }, // Divehi/Maldives
|
||||
{ 144, 224 }, // Manx/UnitedKingdom
|
||||
{ 145, 224 }, // Cornish/UnitedKingdom
|
||||
{ 146, 83 }, // Akan/Ghana
|
||||
{ 147, 100 }, // Konkani/India
|
||||
{ 148, 83 }, // Ga/Ghana
|
||||
{ 149, 157 }, // Igbo/Nigeria
|
||||
{ 150, 111 }, // Kamba/Kenya
|
||||
{ 151, 207 }, // Syriac/SyrianArabRepublic
|
||||
{ 152, 67 }, // Blin/Eritrea
|
||||
{ 153, 67 }, // Geez/Eritrea
|
||||
{ 153, 69 }, // Geez/Ethiopia
|
||||
{ 154, 157 }, // Koro/Nigeria
|
||||
{ 155, 69 }, // Sidamo/Ethiopia
|
||||
{ 156, 157 }, // Atsam/Nigeria
|
||||
{ 157, 67 }, // Tigre/Eritrea
|
||||
{ 158, 157 }, // Jju/Nigeria
|
||||
{ 159, 106 }, // Friulian/Italy
|
||||
{ 160, 195 }, // Venda/SouthAfrica
|
||||
{ 161, 83 }, // Ewe/Ghana
|
||||
{ 161, 212 }, // Ewe/Togo
|
||||
{ 163, 225 }, // Hawaiian/UnitedStates
|
||||
{ 164, 157 }, // Tyap/Nigeria
|
||||
{ 165, 129 }, // Chewa/Malawi
|
||||
};
|
||||
static const int g_locale_list_count = sizeof(g_locale_list)/sizeof(g_locale_list[0]);
|
||||
|
||||
LocaleModel::LocaleModel(QObject *parent)
|
||||
: QAbstractItemModel(parent)
|
||||
{
|
||||
m_data_list.append(1234.5678);
|
||||
m_data_list.append(QDate::currentDate());
|
||||
m_data_list.append(QDate::currentDate());
|
||||
m_data_list.append(QTime::currentTime());
|
||||
m_data_list.append(QTime::currentTime());
|
||||
}
|
||||
|
||||
QVariant LocaleModel::data(const QModelIndex &index, int role) const
|
||||
{
|
||||
if (!index.isValid()
|
||||
|| role != Qt::DisplayRole && role != Qt::EditRole && role != Qt::ToolTipRole
|
||||
|| index.column() >= g_model_cols
|
||||
|| index.row() >= g_locale_list_count + 2)
|
||||
return QVariant();
|
||||
|
||||
QVariant data;
|
||||
if (index.column() < g_model_cols - 1)
|
||||
data = m_data_list.at(index.column());
|
||||
|
||||
if (index.row() == 0) {
|
||||
if (role == Qt::ToolTipRole)
|
||||
return QVariant();
|
||||
switch (index.column()) {
|
||||
case 0:
|
||||
return data.toDouble();
|
||||
case 1:
|
||||
return data.toDate();
|
||||
case 2:
|
||||
return data.toDate();
|
||||
case 3:
|
||||
return data.toTime();
|
||||
case 4:
|
||||
return data.toTime();
|
||||
case 5:
|
||||
return QVariant();
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
QLocale locale;
|
||||
if (index.row() == 1) {
|
||||
locale = QLocale::system();
|
||||
} else {
|
||||
LocaleListItem item = g_locale_list[index.row() - 2];
|
||||
locale = QLocale((QLocale::Language)item.language, (QLocale::Territory)item.territory);
|
||||
}
|
||||
|
||||
switch (index.column()) {
|
||||
case 0:
|
||||
if (role == Qt::ToolTipRole)
|
||||
return QVariant();
|
||||
return locale.toString(data.toDouble());
|
||||
case 1:
|
||||
if (role == Qt::ToolTipRole)
|
||||
return locale.dateFormat(QLocale::LongFormat);
|
||||
return locale.toString(data.toDate(), QLocale::LongFormat);
|
||||
case 2:
|
||||
if (role == Qt::ToolTipRole)
|
||||
return locale.dateFormat(QLocale::ShortFormat);
|
||||
return locale.toString(data.toDate(), QLocale::ShortFormat);
|
||||
case 3:
|
||||
if (role == Qt::ToolTipRole)
|
||||
return locale.timeFormat(QLocale::LongFormat);
|
||||
return locale.toString(data.toTime(), QLocale::LongFormat);
|
||||
case 4:
|
||||
if (role == Qt::ToolTipRole)
|
||||
return locale.timeFormat(QLocale::ShortFormat);
|
||||
return locale.toString(data.toTime(), QLocale::ShortFormat);
|
||||
case 5:
|
||||
if (role == Qt::ToolTipRole)
|
||||
return QVariant();
|
||||
return locale.name();
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return QVariant();
|
||||
}
|
||||
|
||||
QVariant LocaleModel::headerData(int section, Qt::Orientation orientation, int role) const
|
||||
{
|
||||
if (role != Qt::DisplayRole)
|
||||
return QVariant();
|
||||
|
||||
if (orientation == Qt::Horizontal) {
|
||||
switch (section) {
|
||||
case 0:
|
||||
return QLatin1String("Double");
|
||||
case 1:
|
||||
return QLatin1String("Long Date");
|
||||
case 2:
|
||||
return QLatin1String("Short Date");
|
||||
case 3:
|
||||
return QLatin1String("Long Time");
|
||||
case 4:
|
||||
return QLatin1String("Short Time");
|
||||
case 5:
|
||||
return QLatin1String("Name");
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (section >= g_locale_list_count + 2)
|
||||
return QVariant();
|
||||
if (section == 0) {
|
||||
return QLatin1String("Input");
|
||||
} else if (section == 1) {
|
||||
return QLatin1String("System");
|
||||
} else {
|
||||
LocaleListItem item = g_locale_list[section - 2];
|
||||
return QLocale::languageToString((QLocale::Language)item.language)
|
||||
+ QLatin1Char('/')
|
||||
+ QLocale::territoryToString((QLocale::Territory)item.territory);
|
||||
}
|
||||
}
|
||||
|
||||
return QVariant();
|
||||
}
|
||||
|
||||
QModelIndex LocaleModel::index(int row, int column,
|
||||
const QModelIndex &parent) const
|
||||
{
|
||||
if (parent.isValid()
|
||||
|| row >= g_locale_list_count + 2
|
||||
|| column >= g_model_cols)
|
||||
return QModelIndex();
|
||||
|
||||
return createIndex(row, column);
|
||||
}
|
||||
|
||||
QModelIndex LocaleModel::parent(const QModelIndex&) const
|
||||
{
|
||||
return QModelIndex();
|
||||
}
|
||||
|
||||
int LocaleModel::columnCount(const QModelIndex&) const
|
||||
{
|
||||
return g_model_cols;
|
||||
}
|
||||
|
||||
int LocaleModel::rowCount(const QModelIndex &parent) const
|
||||
{
|
||||
if (parent.isValid())
|
||||
return 0;
|
||||
return g_locale_list_count + 2;
|
||||
}
|
||||
|
||||
Qt::ItemFlags LocaleModel::flags(const QModelIndex &index) const
|
||||
{
|
||||
if (!index.isValid())
|
||||
return 0;
|
||||
if (index.row() == 0 && index.column() == g_model_cols - 1)
|
||||
return 0;
|
||||
if (index.row() == 0)
|
||||
return QAbstractItemModel::flags(index) | Qt::ItemIsEditable;
|
||||
return QAbstractItemModel::flags(index);
|
||||
}
|
||||
|
||||
bool LocaleModel::setData(const QModelIndex &index, const QVariant &value, int role)
|
||||
{
|
||||
if (!index.isValid()
|
||||
|| index.row() != 0
|
||||
|| index.column() >= g_model_cols - 1
|
||||
|| role != Qt::EditRole
|
||||
|| m_data_list.at(index.column()).type() != value.type())
|
||||
return false;
|
||||
|
||||
m_data_list[index.column()] = value;
|
||||
emit dataChanged(createIndex(1, index.column()),
|
||||
createIndex(g_locale_list_count, index.column()));
|
||||
|
||||
return true;
|
||||
}
|
31
util/locale_database/testlocales/localemodel.h
Normal file
31
util/locale_database/testlocales/localemodel.h
Normal file
@ -0,0 +1,31 @@
|
||||
// Copyright (C) 2016 The Qt Company Ltd.
|
||||
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
|
||||
#ifndef LOCALEMODEL_H
|
||||
#define LOCALEMODEL_H
|
||||
|
||||
#include <QAbstractItemModel>
|
||||
#include <QList>
|
||||
#include <QVariant>
|
||||
|
||||
class LocaleModel : public QAbstractItemModel
|
||||
{
|
||||
Q_OBJECT
|
||||
public:
|
||||
LocaleModel(QObject *parent = nullptr);
|
||||
|
||||
virtual int columnCount(const QModelIndex &parent = QModelIndex()) const;
|
||||
virtual QVariant data(const QModelIndex &index, int role = Qt::DisplayRole) const;
|
||||
virtual QModelIndex index(int row, int column,
|
||||
const QModelIndex &parent = QModelIndex()) const;
|
||||
virtual QModelIndex parent(const QModelIndex &index) const;
|
||||
virtual int rowCount(const QModelIndex &parent = QModelIndex()) const;
|
||||
virtual QVariant headerData(int section, Qt::Orientation orientation,
|
||||
int role = Qt::DisplayRole ) const;
|
||||
virtual Qt::ItemFlags flags(const QModelIndex &index) const;
|
||||
virtual bool setData(const QModelIndex &index, const QVariant &value,
|
||||
int role = Qt::EditRole);
|
||||
private:
|
||||
QList<QVariant> m_data_list;
|
||||
};
|
||||
|
||||
#endif // LOCALEMODEL_H
|
51
util/locale_database/testlocales/localewidget.cpp
Normal file
51
util/locale_database/testlocales/localewidget.cpp
Normal file
@ -0,0 +1,51 @@
|
||||
// Copyright (C) 2016 The Qt Company Ltd.
|
||||
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
|
||||
#include <QTableView>
|
||||
#include <QVBoxLayout>
|
||||
#include <QItemDelegate>
|
||||
#include <QItemEditorFactory>
|
||||
#include <QDoubleSpinBox>
|
||||
|
||||
#include "localewidget.h"
|
||||
#include "localemodel.h"
|
||||
|
||||
class DoubleEditorCreator : public QItemEditorCreatorBase
|
||||
{
|
||||
public:
|
||||
QWidget *createWidget(QWidget *parent) const {
|
||||
QDoubleSpinBox *w = new QDoubleSpinBox(parent);
|
||||
w->setDecimals(4);
|
||||
w->setRange(-10000.0, 10000.0);
|
||||
return w;
|
||||
}
|
||||
virtual QByteArray valuePropertyName() const {
|
||||
return QByteArray("value");
|
||||
}
|
||||
};
|
||||
|
||||
class EditorFactory : public QItemEditorFactory
|
||||
{
|
||||
public:
|
||||
EditorFactory() {
|
||||
static DoubleEditorCreator double_editor_creator;
|
||||
registerEditor(QVariant::Double, &double_editor_creator);
|
||||
}
|
||||
};
|
||||
|
||||
LocaleWidget::LocaleWidget(QWidget *parent)
|
||||
: QWidget(parent)
|
||||
{
|
||||
m_model = new LocaleModel(this);
|
||||
m_view = new QTableView(this);
|
||||
|
||||
QItemDelegate *delegate = qobject_cast<QItemDelegate*>(m_view->itemDelegate());
|
||||
Q_ASSERT(delegate != 0);
|
||||
static EditorFactory editor_factory;
|
||||
delegate->setItemEditorFactory(&editor_factory);
|
||||
|
||||
m_view->setModel(m_model);
|
||||
|
||||
QVBoxLayout *layout = new QVBoxLayout(this);
|
||||
layout->setMargin(0);
|
||||
layout->addWidget(m_view);
|
||||
}
|
21
util/locale_database/testlocales/localewidget.h
Normal file
21
util/locale_database/testlocales/localewidget.h
Normal file
@ -0,0 +1,21 @@
|
||||
// Copyright (C) 2016 The Qt Company Ltd.
|
||||
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
|
||||
#ifndef LOCALEWIDGET_H
|
||||
#define LOCALEWIDGET_H
|
||||
|
||||
#include <QWidget>
|
||||
|
||||
class LocaleModel;
|
||||
class QTableView;
|
||||
|
||||
class LocaleWidget : public QWidget
|
||||
{
|
||||
Q_OBJECT
|
||||
public:
|
||||
LocaleWidget(QWidget *parent = nullptr);
|
||||
private:
|
||||
LocaleModel *m_model;
|
||||
QTableView *m_view;
|
||||
};
|
||||
|
||||
#endif // LOCALEWIDGET_H
|
13
util/locale_database/testlocales/main.cpp
Normal file
13
util/locale_database/testlocales/main.cpp
Normal file
@ -0,0 +1,13 @@
|
||||
// Copyright (C) 2016 The Qt Company Ltd.
|
||||
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
|
||||
#include <QApplication>
|
||||
|
||||
#include "localewidget.h"
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
QApplication app(argc, argv);
|
||||
LocaleWidget wgt;
|
||||
wgt.show();
|
||||
return app.exec();
|
||||
}
|
4
util/locale_database/testlocales/testlocales.pro
Normal file
4
util/locale_database/testlocales/testlocales.pro
Normal file
@ -0,0 +1,4 @@
|
||||
TARGET = testlocales
|
||||
CONFIG += debug
|
||||
SOURCES += localemodel.cpp localewidget.cpp main.cpp
|
||||
HEADERS += localemodel.h localewidget.h
|
Reference in New Issue
Block a user