qt 6.5.1 original

This commit is contained in:
kleuter
2023-10-29 23:33:08 +01:00
parent 71d22ab6b0
commit 85d238dfda
21202 changed files with 5499099 additions and 0 deletions

View File

@ -0,0 +1,14 @@
locale_database is used to generate qlocale data from CLDR.
CLDR is the Common Locale Data Repository, a database for localized
data (like date formats, country names etc). It is provided by the
Unicode consortium.
See cldr2qlocalexml.py for how to run it and qlocalexml2cpp.py to
update the locale data tables (principally text/qlocale_data_p.h and
time/q*calendar_data_p.h under src/corelib/). See enumdata.py for when
and how to update the data it provides. You shall definitely need to
pass --no-verify or -n to git commit for these changes.
See cldr2qtimezone.py on how to update tables of Windows-specific
names for zones and UTC-offset zone names.

View File

@ -0,0 +1,760 @@
# Copyright (C) 2021 The Qt Company Ltd.
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
"""Digesting the CLDR's data.
Provides two classes:
CldrReader -- driver for reading CLDR data
CldrAccess -- used by the reader to access the tree of data files
The former should normally be all you need to access.
See individual classes for further detail.
"""
from typing import Iterable, TextIO
from xml.dom import minidom
from weakref import WeakValueDictionary as CacheDict
from pathlib import Path
from ldml import Error, Node, XmlScanner, Supplement, LocaleScanner
from qlocalexml import Locale
class CldrReader (object):
def __init__(self, root: Path, grumble = lambda msg: None, whitter = lambda msg: None):
"""Set up a reader object for reading CLDR data.
Single parameter, root, is the file-system path to the root of
the unpacked CLDR archive; its common/ sub-directory should
contain dtd/, main/ and supplemental/ sub-directories.
Optional second argument, grumble, is a callable that logs
warnings and complaints, e.g. sys.stderr.write would be a
suitable callable. The default is a no-op that ignores its
single argument. Optional third argument is similar, used for
less interesting output; pass sys.stderr.write for it for
verbose output."""
self.root = CldrAccess(root)
self.whitter, self.grumble = whitter, grumble
self.root.checkEnumData(grumble)
def likelySubTags(self):
"""Generator for likely subtag information.
Yields pairs (have, give) of 4-tuples; if what you have
matches the left member, giving the right member is probably
sensible. Each 4-tuple's entries are the full names of a
language, a script, a territory (usually a country) and a
variant (currently ignored)."""
skips = []
for got, use in self.root.likelySubTags():
try:
have = self.__parseTags(got)
give = self.__parseTags(use)
except Error as e:
if ((use.startswith(got) or got.startswith('und_'))
and e.message.startswith('Unknown ') and ' code ' in e.message):
skips.append(use)
else:
self.grumble(f'Skipping likelySubtag "{got}" -> "{use}" ({e})\n')
continue
if all(code.startswith('Any') and code[3].isupper() for code in have[:-1]):
continue
give = (give[0],
# Substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags
have[1] if give[1] == 'AnyScript' else give[1],
have[2] if give[2] == 'AnyTerritory' else give[2],
give[3]) # AnyVariant similarly ?
yield have, give
if skips:
# TODO: look at LDML's reserved locale tag names; they
# show up a lot in this, and may be grounds for filtering
# more out.
pass # self.__wrapped(self.whitter, 'Skipping likelySubtags (for unknown codes): ', skips)
def readLocales(self, calendars = ('gregorian',)):
locales = tuple(self.__allLocales(calendars))
return dict(((k.language_id, k.script_id, k.territory_id, k.variant_code),
k) for k in locales)
def __allLocales(self, calendars):
def skip(locale, reason):
return f'Skipping defaultContent locale "{locale}" ({reason})\n'
for locale in self.root.defaultContentLocales:
try:
language, script, territory, variant = self.__splitLocale(locale)
except ValueError:
self.whitter(skip(locale, 'only language tag'))
continue
if not (script or territory):
self.grumble(skip(locale, 'second tag is neither script nor territory'))
continue
if not (language and territory):
continue
try:
yield self.__getLocaleData(self.root.locale(locale), calendars,
language, script, territory, variant)
except Error as e:
self.grumble(skip(locale, e.message))
for locale in self.root.fileLocales:
try:
chain = self.root.locale(locale)
language, script, territory, variant = chain.tagCodes()
assert language
# TODO: this skip should probably be based on likely
# sub-tags, instead of empty territory: if locale has a
# likely-subtag expansion, that's what QLocale uses,
# and we'll be saving its data for the expanded locale
# anyway, so don't need to record it for itself.
# See also QLocaleXmlReader.loadLocaleMap's grumble.
if not territory:
continue
yield self.__getLocaleData(chain, calendars, language, script, territory, variant)
except Error as e:
self.grumble(f'Skipping file locale "{locale}" ({e})\n')
import textwrap
@staticmethod
def __wrapped(writer, prefix, tokens, wrap = textwrap.wrap):
writer('\n'.join(wrap(prefix + ', '.join(tokens),
subsequent_indent=' ', width=80)) + '\n')
del textwrap
def __parseTags(self, locale):
tags = self.__splitLocale(locale)
language = next(tags)
script = territory = variant = ''
try:
script, territory, variant = tags
except ValueError:
pass
return tuple(p[1] for p in self.root.codesToIdName(language, script, territory, variant))
def __splitLocale(self, name):
"""Generate (language, script, territory, variant) from a locale name
Ignores any trailing fields (with a warning), leaves script (a
capitalised four-letter token), territory (either a number or
an all-uppercase token) or variant (upper case and digits)
empty if unspecified. Only generates one entry if name is a
single tag (i.e. contains no underscores). Always yields 1 or
4 values, never 2 or 3."""
tags = iter(name.split('_'))
yield next(tags) # Language
try:
tag = next(tags)
except StopIteration:
return
# Script is always four letters, always capitalised:
if len(tag) == 4 and tag[0].isupper() and tag[1:].islower():
yield tag
try:
tag = next(tags)
except StopIteration:
tag = ''
else:
yield ''
# Territory is upper-case or numeric:
if tag and tag.isupper() or tag.isdigit():
yield tag
try:
tag = next(tags)
except StopIteration:
tag = ''
else:
yield ''
# Variant can be any mixture of upper-case and digits.
if tag and all(c.isupper() or c.isdigit() for c in tag):
yield tag
tag = ''
else:
yield ''
rest = [tag] if tag else []
rest.extend(tags)
if rest:
self.grumble(f'Ignoring unparsed cruft {"_".join(rest)} in {name}\n')
def __getLocaleData(self, scan, calendars, language, script, territory, variant):
ids, names = zip(*self.root.codesToIdName(language, script, territory, variant))
assert ids[0] > 0 and ids[2] > 0, (language, script, territory, variant)
locale = Locale(
language = names[0], language_code = language, language_id = ids[0],
script = names[1], script_code = script, script_id = ids[1],
territory = names[2], territory_code = territory, territory_id = ids[2],
variant_code = variant)
firstDay, weStart, weEnd = self.root.weekData(territory)
assert all(day in ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun')
for day in (firstDay, weStart, weEnd))
locale.update(firstDayOfWeek = firstDay,
weekendStart = weStart,
weekendEnd = weEnd)
iso, digits, rounding = self.root.currencyData(territory)
locale.update(currencyIsoCode = iso,
currencyDigits = int(digits),
currencyRounding = int(rounding))
locale.update(scan.currencyData(iso))
locale.update(scan.numericData(self.root.numberSystem, self.whitter))
locale.update(scan.textPatternData())
locale.update(scan.endonyms(language, script, territory, variant))
locale.update(scan.unitData()) # byte, kB, MB, GB, ..., KiB, MiB, GiB, ...
locale.update(scan.calendarNames(calendars)) # Names of days and months
return locale
# Note: various caches assume this class is a singleton, so the
# "default" value for a parameter no caller should pass can serve as
# the cache. If a process were to instantiate this class with distinct
# roots, each cache would be filled by the first to need it !
class CldrAccess (object):
def __init__(self, root: Path):
"""Set up a master object for accessing CLDR data.
Single parameter, root, is the file-system path to the root of
the unpacked CLDR archive; its common/ sub-directory should
contain dtd/, main/ and supplemental/ sub-directories."""
self.root = root
def xml(self, relative_path: str):
"""Load a single XML file and return its root element as an XmlScanner.
The path is interpreted relative to self.root"""
return XmlScanner(Node(self.__xml(relative_path)))
def supplement(self, name):
"""Loads supplemental data as a Supplement object.
The name should be that of a file in common/supplemental/, without path.
"""
return Supplement(Node(self.__xml(f'common/supplemental/{name}')))
def locale(self, name):
"""Loads all data for a locale as a LocaleScanner object.
The name should be a locale name; adding suffix '.xml' to it
should usually yield a file in common/main/. The returned
LocaleScanner object packages this file along with all those
from which it inherits; its methods know how to handle that
inheritance, where relevant."""
return LocaleScanner(name, self.__localeRoots(name), self.__rootLocale)
@property
def fileLocales(self) -> Iterable[str]:
"""Generator for locale IDs seen in file-names.
All *.xml other than root.xml in common/main/ are assumed to
identify locales."""
for path in self.root.joinpath('common/main').glob('*.xml'):
if path.stem != 'root':
yield path.stem
@property
def defaultContentLocales(self):
"""Generator for the default content locales."""
for name, attrs in self.supplement('supplementalMetadata.xml').find('metadata/defaultContent'):
try:
locales = attrs['locales']
except KeyError:
pass
else:
for locale in locales.split():
yield locale
def likelySubTags(self):
for ignore, attrs in self.supplement('likelySubtags.xml').find('likelySubtags'):
yield attrs['from'], attrs['to']
def numberSystem(self, system):
"""Get a description of a numbering system.
Returns a mapping, with keys 'digits', 'type' and 'id'; the
value for this last is system. Raises KeyError for unknown
number system, ldml.Error on failure to load data."""
try:
return self.__numberSystems[system]
except KeyError:
raise Error(f'Unsupported number system: {system}')
def weekData(self, territory):
"""Data on the weekly cycle.
Returns a triple (W, S, E) of en's short names for week-days;
W is the first day of the week, S the start of the week-end
and E the end of the week-end. Where data for a territory is
unavailable, the data for CLDR's territory 001 (The World) is
used."""
try:
return self.__weekData[territory]
except KeyError:
return self.__weekData['001']
def currencyData(self, territory):
"""Returns currency data for the given territory code.
Return value is a tuple (ISO4217 code, digit count, rounding
mode). If CLDR provides no data for this territory, ('', 2, 1)
is the default result.
"""
try:
return self.__currencyData[territory]
except KeyError:
return '', 2, 1
def codesToIdName(self, language, script, territory, variant = ''):
"""Maps each code to the appropriate ID and name.
Returns a 4-tuple of (ID, name) pairs corresponding to the
language, script, territory and variant given. Raises a
suitable error if any of them is unknown, indicating all that
are unknown plus suitable names for any that could sensibly be
added to enumdata.py to make them known.
Until we implement variant support (QTBUG-81051), the fourth
member of the returned tuple is always 0 paired with a string
that should not be used."""
enum = self.__enumMap
try:
return (enum('language')[language],
enum('script')[script],
enum('territory')[territory],
enum('variant')[variant])
except KeyError:
pass
parts, values = [], [language, script, territory, variant]
for index, key in enumerate(('language', 'script', 'territory', 'variant')):
naming, enums = self.__codeMap(key), enum(key)
value = values[index]
if value not in enums:
text = f'{key} code {value}'
name = naming.get(value)
if name and value != 'POSIX':
text += f' (could add {name})'
parts.append(text)
if len(parts) > 1:
parts[-1] = 'and ' + parts[-1]
assert parts
raise Error('Unknown ' + ', '.join(parts),
language, script, territory, variant)
@staticmethod
def __checkEnum(given, proper, scraps,
remap = { 'å': 'a', 'ã': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ü': 'u'},
prefix = { 'St.': 'Saint', 'U.S.': 'United States' },
suffixes = ( 'Han', ),
skip = '\u02bc'):
# Each is a { code: full name } mapping
for code, name in given.items():
try: right = proper[code]
except KeyError:
# No en.xml name for this code, but supplementalData's
# parentLocale may still believe in it:
if code not in scraps:
yield name, f'[Found no CLDR name for code {code}]'
continue
if name == right: continue
ok = right.replace('&', 'And')
for k, v in prefix.items():
if ok.startswith(k + ' '):
ok = v + ok[len(k):]
while '(' in ok:
try: f, t = ok.index('('), ok.index(')')
except ValueError: break
ok = ok[:f].rstrip() + ' ' + ok[t:].lstrip()
if any(name == ok + ' ' + s for s in suffixes):
continue
if ''.join(ch for ch in name.lower() if not ch.isspace()) in ''.join(
remap.get(ch, ch) for ch in ok.lower() if ch.isalpha() and ch not in skip):
continue
yield name, ok
def checkEnumData(self, grumble):
scraps = set()
for k in self.__parentLocale.keys():
for f in k.split('_'):
scraps.add(f)
from enumdata import language_map, territory_map, script_map
language = dict((v, k) for k, v in language_map.values() if not v.isspace())
territory = dict((v, k) for k, v in territory_map.values() if v != 'ZZ')
script = dict((v, k) for k, v in script_map.values() if v != 'Zzzz')
lang = dict(self.__checkEnum(language, self.__codeMap('language'), scraps))
land = dict(self.__checkEnum(territory, self.__codeMap('territory'), scraps))
text = dict(self.__checkEnum(script, self.__codeMap('script'), scraps))
if lang or land or text:
grumble("""\
Using names that don't match CLDR: consider updating the name(s) in
enumdata.py (keeping the old name as an alias):
""")
if lang:
grumble('Language:\n\t'
+ '\n\t'.join(f'{k} -> {v}' for k, v in lang.items())
+ '\n')
if land:
grumble('Territory:\n\t'
+ '\n\t'.join(f'{k} -> {v}' for k, v in land.items())
+ '\n')
if text:
grumble('Script:\n\t'
+ '\n\t'.join(f'{k} -> {v}' for k, v in text.items())
+ '\n')
grumble('\n')
def readWindowsTimeZones(self, lookup): # For use by cldr2qtimezone.py
"""Digest CLDR's MS-Win time-zone name mapping.
MS-Win have their own eccentric names for time-zones. CLDR
helpfully provides a translation to more orthodox names.
Single argument, lookup, is a mapping from known MS-Win names
for locales to a unique integer index (starting at 1).
The XML structure we read has the form:
<supplementalData>
<windowsZones>
<mapTimezones otherVersion="..." typeVersion="...">
<!-- (UTC-08:00) Pacific Time (US & Canada) -->
<mapZone other="Pacific Standard Time" territory="001" type="America/Los_Angeles"/>
<mapZone other="Pacific Standard Time" territory="CA" type="America/Vancouver America/Dawson America/Whitehorse"/>
<mapZone other="Pacific Standard Time" territory="US" type="America/Los_Angeles America/Metlakatla"/>
<mapZone other="Pacific Standard Time" territory="ZZ" type="PST8PDT"/>
</mapTimezones>
</windowsZones>
</supplementalData>
"""
zones = self.supplement('windowsZones.xml')
enum = self.__enumMap('territory')
badZones, unLands, defaults, windows = set(), set(), {}, {}
for name, attrs in zones.find('windowsZones/mapTimezones'):
if name != 'mapZone':
continue
wid, code = attrs['other'], attrs['territory']
data = dict(windowsId = wid,
territoryCode = code,
ianaList = attrs['type'])
try:
key = lookup[wid]
except KeyError:
badZones.add(wid)
key = 0
data['windowsKey'] = key
if code == '001':
defaults[key] = data['ianaList']
else:
try:
cid, name = enum[code]
except KeyError:
unLands.append(code)
continue
data.update(territoryId = cid, territory = name)
windows[key, cid] = data
if unLands:
raise Error('Unknown territory codes, please add to enumdata.py: '
+ ', '.join(sorted(unLands)))
if badZones:
raise Error('Unknown Windows IDs, please add to cldr2qtimezone.py: '
+ ', '.join(sorted(badZones)))
return self.cldrVersion, defaults, windows
@property
def cldrVersion(self):
# Evaluate so as to ensure __cldrVersion is set:
self.__unDistinguishedAttributes
return self.__cldrVersion
# Implementation details
def __xml(self, relative_path: str, cache = CacheDict(), read = minidom.parse):
try:
doc = cache[relative_path]
except KeyError:
cache[relative_path] = doc = read(str(self.root.joinpath(relative_path))).documentElement
return doc
def __open(self, relative_path: str) -> TextIO:
return self.root.joinpath(relative_path).open()
@property
def __rootLocale(self, cache = []):
if not cache:
cache.append(self.xml('common/main/root.xml'))
return cache[0]
@property
def __supplementalData(self, cache = []):
if not cache:
cache.append(self.supplement('supplementalData.xml'))
return cache[0]
@property
def __numberSystems(self, cache = {}):
if not cache:
for ignore, attrs in self.supplement('numberingSystems.xml').find('numberingSystems'):
cache[attrs['id']] = attrs
assert cache
return cache
@property
def __weekData(self, cache = {}):
if not cache:
firstDay, weStart, weEnd = self.__getWeekData()
# Massage those into an easily-consulted form:
# World defaults given for code '001':
mon, sat, sun = firstDay['001'], weStart['001'], weEnd['001']
lands = set(firstDay) | set(weStart) | set(weEnd)
cache.update((land,
(firstDay.get(land, mon), weStart.get(land, sat), weEnd.get(land, sun)))
for land in lands)
assert cache
return cache
def __getWeekData(self):
"""Scan for data on the weekly cycle.
Yields three mappings from locales to en's short names for
week-days; if a locale isn't a key of a given mapping, it
should use the '001' (world) locale's value. The first mapping
gives the day on which the week starts, the second gives the
day on which the week-end starts, the third gives the last day
of the week-end."""
source = self.__supplementalData
for key in ('firstDay', 'weekendStart', 'weekendEnd'):
result = {}
for ignore, attrs in source.find(f'weekData/{key}'):
assert ignore == key
day = attrs['day']
assert day in ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'), day
if 'alt' in attrs:
continue
for loc in attrs.get('territories', '').split():
result[loc] = day
yield result
@property
def __currencyData(self, cache = {}):
if not cache:
source = self.__supplementalData
for elt in source.findNodes('currencyData/region'):
iso, digits, rounding = '', 2, 1
try:
territory = elt.dom.attributes['iso3166'].nodeValue
except KeyError:
continue
for child in elt.findAllChildren('currency'):
try:
if child.dom.attributes['tender'].nodeValue == 'false':
continue
except KeyError:
pass
try:
child.dom.attributes['to'] # Is set if this element has gone out of date.
except KeyError:
iso = child.dom.attributes['iso4217'].nodeValue
break
if iso:
for tag, data in source.find(
f'currencyData/fractions/info[iso4217={iso}]'):
digits = data['digits']
rounding = data['rounding']
cache[territory] = iso, digits, rounding
assert cache
return cache
@property
def __unDistinguishedAttributes(self, cache = {}):
"""Mapping from tag names to lists of attributes.
LDML defines some attributes as 'distinguishing': if a node
has distinguishing attributes that weren't specified in an
XPath, a search on that XPath should exclude the node's
children.
This property is a mapping from tag names to tuples of
attribute names that *aren't* distinguishing for that tag.
Its value is cached (so its costly computation isonly done
once) and there's a side-effect of populating its cache: it
sets self.__cldrVersion to the value found in ldml.dtd, during
parsing."""
if not cache:
cache.update(self.__scanLdmlDtd())
assert cache
return cache
def __scanLdmlDtd(self):
"""Scan the LDML DTD, record CLDR version
Yields (tag, attrs) pairs: on elements with a given tag,
attributes named in its attrs (a tuple) may be ignored in an
XPath search; other attributes are distinguished attributes,
in the terminology of LDML's locale-inheritance rules.
Sets self.__cldrVersion as a side-effect, since this
information is found in the same file."""
with self.__open('common/dtd/ldml.dtd') as dtd:
tag, ignored, last = None, None, None
for line in dtd:
if line.startswith('<!ELEMENT '):
if ignored:
assert tag
yield tag, tuple(ignored)
tag, ignored, last = line.split()[1], [], None
continue
if line.startswith('<!ATTLIST '):
assert tag is not None
parts = line.split()
assert parts[1] == tag
last = parts[2]
if parts[1:5] == ['version', 'cldrVersion', 'CDATA', '#FIXED']:
# parts[5] is the version, in quotes, although the final > might be stuck on its end:
self.__cldrVersion = parts[5].split('"')[1]
continue
# <!ELEMENT...>s can also be @METADATA, but not @VALUE:
if '<!--@VALUE-->' in line or (last and '<!--@METADATA-->' in line):
assert last is not None
assert ignored is not None
assert tag is not None
ignored.append(last)
last = None # No attribute is both value and metadata
if tag and ignored:
yield tag, tuple(ignored)
def __enumMap(self, key, cache = {}):
if not cache:
cache['variant'] = {'': (0, 'This should never be seen outside ldml.py')}
# They're not actually lists: mappings from numeric value
# to pairs of full name and short code. What we want, in
# each case, is a mapping from code to the other two.
from enumdata import language_map, script_map, territory_map
for form, book, empty in (('language', language_map, 'AnyLanguage'),
('script', script_map, 'AnyScript'),
('territory', territory_map, 'AnyTerritory')):
cache[form] = dict((pair[1], (num, pair[0]))
for num, pair in book.items() if pair[0] != 'C')
# (Have to filter out the C locale, as we give it the
# same (all space) code as AnyLanguage, whose code
# should probably be 'und' instead.)
# Map empty to zero and the any value:
cache[form][''] = (0, empty)
# and map language code 'und' also to (0, any):
cache['language']['und'] = (0, 'AnyLanguage')
return cache[key]
def __codeMap(self, key, cache = {},
# Maps our name for it to CLDR's name:
naming = {'language': 'languages', 'script': 'scripts',
'territory': 'territories', 'variant': 'variants'}):
if not cache:
root = self.xml('common/main/en.xml').root.findUniqueChild('localeDisplayNames')
for dst, src in naming.items():
cache[dst] = dict(self.__codeMapScan(root.findUniqueChild(src)))
assert cache
return cache[key]
def __codeMapScan(self, node):
"""Get mapping from codes to element values.
Passed in node is a <languages>, <scripts>, <territories> or
<variants> node, each child of which is a <language>,
<script>, <territory> or <variant> node as appropriate, whose
type is a code (of the appropriate flavour) and content is its
full name. In some cases, two child nodes have the same type;
in these cases, one always has an alt attribute and we should
prefer the other. Yields all such type, content pairs found
in node's children (skipping any with an alt attribute, if
their type has been seen previously)."""
seen = set()
for elt in node.dom.childNodes:
try:
key, value = elt.attributes['type'].nodeValue, elt.childNodes[0].wholeText
except (KeyError, ValueError, TypeError):
pass
else:
if key not in seen or 'alt' not in elt.attributes:
yield key, value
seen.add(key)
# CLDR uses inheritance between locales to save repetition:
@property
def __parentLocale(self, cache = {}):
# see http://www.unicode.org/reports/tr35/#Parent_Locales
if not cache:
for tag, attrs in self.__supplementalData.find('parentLocales'):
parent = attrs.get('parent', '')
for child in attrs['locales'].split():
cache[child] = parent
assert cache
return cache
def __localeAsDoc(self, name: str, aliasFor = None):
path = f'common/main/{name}.xml'
if self.root.joinpath(path).exists():
elt = self.__xml(path)
for child in Node(elt).findAllChildren('alias'):
try:
alias = child.dom.attributes['source'].nodeValue
except (KeyError, AttributeError):
pass
else:
return self.__localeAsDoc(alias, aliasFor or name)
# No alias child with a source:
return elt
if aliasFor:
raise Error(f'Fatal error: found an alias "{aliasFor}" -> "{name}", '
'but found no file for the alias')
def __scanLocaleRoots(self, name):
while name and name != 'root':
doc = self.__localeAsDoc(name)
if doc is not None:
yield Node(doc, self.__unDistinguishedAttributes)
try:
name = self.__parentLocale[name]
except KeyError:
try:
name, tail = name.rsplit('_', 1)
except ValueError: # No tail to discard: we're done
break
class __Seq (list): pass # No weakref for tuple and list, but list sub-class is ok.
def __localeRoots(self, name, cache = CacheDict()):
try:
chain = cache[name]
except KeyError:
cache[name] = chain = self.__Seq(self.__scanLocaleRoots(name))
return chain
# Unpolute the namespace: we don't need to export these.
del minidom, CacheDict

View File

@ -0,0 +1,87 @@
#!/usr/bin/env python3
# Copyright (C) 2021 The Qt Company Ltd.
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
"""Convert CLDR data to QLocaleXML
The CLDR data can be downloaded from CLDR_, which has a sub-directory
for each version; you need the ``core.zip`` file for your version of
choice (typically the latest). This script has had updates to cope up
to v38.1; for later versions, we may need adaptations. Unpack the
downloaded ``core.zip`` and check it has a common/main/ sub-directory:
pass the path of that root of the download to this script as its first
command-line argument. Pass the name of the file in which to write
output as the second argument; either omit it or use '-' to select the
standard output. This file is the input needed by
``./qlocalexml2cpp.py``
When you update the CLDR data, be sure to also update
src/corelib/text/qt_attribution.json's entry for unicode-cldr. Check
this script's output for unknown language, territory or script messages;
if any can be resolved, use their entry in common/main/en.xml to
append new entries to enumdata.py's lists and update documentation in
src/corelib/text/qlocale.qdoc, adding the new entries in alphabetic
order.
While updating the locale data, check also for updates to MS-Win's
time zone names; see cldr2qtimezone.py for details.
All the scripts mentioned support --help to tell you how to use them.
.. _CLDR: https://unicode.org/Public/cldr/
"""
from pathlib import Path
import sys
import argparse
from cldr import CldrReader
from qlocalexml import QLocaleXmlWriter
def main(out, err):
all_calendars = ['gregorian', 'persian', 'islamic'] # 'hebrew'
parser = argparse.ArgumentParser(
description='Generate QLocaleXML from CLDR data.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('cldr_path', help='path to the root of the CLDR tree')
parser.add_argument('out_file', help='output XML file name',
nargs='?', metavar='out-file.xml')
parser.add_argument('--calendars', help='select calendars to emit data for',
nargs='+', metavar='CALENDAR',
choices=all_calendars, default=all_calendars)
args = parser.parse_args()
root = Path(args.cldr_path)
root_xml_path = 'common/main/root.xml'
if not root.joinpath(root_xml_path).exists():
parser.error('First argument is the root of the CLDR tree: '
f'found no {root_xml_path} under {root}')
xml = args.out_file
if not xml or xml == '-':
emit = out
elif not xml.endswith('.xml'):
parser.error(f'Please use a .xml extension on your output file name, not {xml}')
else:
try:
emit = open(xml, 'w')
except IOError as e:
parser.error(f'Failed to open "{xml}" to write output to it')
# TODO - command line options to tune choice of grumble and whitter:
reader = CldrReader(root, err.write, err.write)
writer = QLocaleXmlWriter(emit.write)
writer.version(reader.root.cldrVersion)
writer.enumData()
writer.likelySubTags(reader.likelySubTags())
writer.locales(reader.readLocales(args.calendars), args.calendars)
writer.close(err.write)
return 0
if __name__ == '__main__':
sys.exit(main(sys.stdout, sys.stderr))

View File

@ -0,0 +1,361 @@
#!/usr/bin/env python3
# Copyright (C) 2021 The Qt Company Ltd.
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
"""Parse CLDR data for QTimeZone use with MS-Windows
Script to parse the CLDR common/supplemental/windowsZones.xml file and
prepare its data for use in QTimeZone. See ``./cldr2qlocalexml.py`` for
where to get the CLDR data. Pass its root directory as first parameter
to this script. You can optionally pass the qtbase root directory as
second parameter; it defaults to the root of the checkout containing
this script. This script updates qtbase's
src/corelib/time/qtimezoneprivate_data_p.h with the new data.
"""
import datetime
from pathlib import Path
import textwrap
import argparse
from localetools import unicode2hex, wrap_list, Error, SourceFileEditor, qtbase_root
from cldr import CldrAccess
### Data that may need updates in response to new entries in the CLDR file ###
# This script shall report the updates you need to make, if any arise.
# However, you may need to research the relevant zone's standard offset.
# List of currently known Windows IDs.
# If this script reports missing IDs, please add them here.
# Look up the offset using (google and) timeanddate.com.
# Not public so may safely be changed. Please keep in alphabetic order by ID.
# ( Windows Id, Offset Seconds )
windowsIdList = (
('Afghanistan Standard Time', 16200),
('Alaskan Standard Time', -32400),
('Aleutian Standard Time', -36000),
('Altai Standard Time', 25200),
('Arab Standard Time', 10800),
('Arabian Standard Time', 14400),
('Arabic Standard Time', 10800),
('Argentina Standard Time', -10800),
('Astrakhan Standard Time', 14400),
('Atlantic Standard Time', -14400),
('AUS Central Standard Time', 34200),
('Aus Central W. Standard Time', 31500),
('AUS Eastern Standard Time', 36000),
('Azerbaijan Standard Time', 14400),
('Azores Standard Time', -3600),
('Bahia Standard Time', -10800),
('Bangladesh Standard Time', 21600),
('Belarus Standard Time', 10800),
('Bougainville Standard Time', 39600),
('Canada Central Standard Time', -21600),
('Cape Verde Standard Time', -3600),
('Caucasus Standard Time', 14400),
('Cen. Australia Standard Time', 34200),
('Central America Standard Time', -21600),
('Central Asia Standard Time', 21600),
('Central Brazilian Standard Time', -14400),
('Central Europe Standard Time', 3600),
('Central European Standard Time', 3600),
('Central Pacific Standard Time', 39600),
('Central Standard Time (Mexico)', -21600),
('Central Standard Time', -21600),
('China Standard Time', 28800),
('Chatham Islands Standard Time', 45900),
('Cuba Standard Time', -18000),
('Dateline Standard Time', -43200),
('E. Africa Standard Time', 10800),
('E. Australia Standard Time', 36000),
('E. Europe Standard Time', 7200),
('E. South America Standard Time', -10800),
('Easter Island Standard Time', -21600),
('Eastern Standard Time', -18000),
('Eastern Standard Time (Mexico)', -18000),
('Egypt Standard Time', 7200),
('Ekaterinburg Standard Time', 18000),
('Fiji Standard Time', 43200),
('FLE Standard Time', 7200),
('Georgian Standard Time', 14400),
('GMT Standard Time', 0),
('Greenland Standard Time', -10800),
('Greenwich Standard Time', 0),
('GTB Standard Time', 7200),
('Haiti Standard Time', -18000),
('Hawaiian Standard Time', -36000),
('India Standard Time', 19800),
('Iran Standard Time', 12600),
('Israel Standard Time', 7200),
('Jordan Standard Time', 7200),
('Kaliningrad Standard Time', 7200),
('Korea Standard Time', 32400),
('Libya Standard Time', 7200),
('Line Islands Standard Time', 50400),
('Lord Howe Standard Time', 37800),
('Magadan Standard Time', 36000),
('Magallanes Standard Time', -10800), # permanent DST
('Marquesas Standard Time', -34200),
('Mauritius Standard Time', 14400),
('Middle East Standard Time', 7200),
('Montevideo Standard Time', -10800),
('Morocco Standard Time', 0),
('Mountain Standard Time (Mexico)', -25200),
('Mountain Standard Time', -25200),
('Myanmar Standard Time', 23400),
('N. Central Asia Standard Time', 21600),
('Namibia Standard Time', 3600),
('Nepal Standard Time', 20700),
('New Zealand Standard Time', 43200),
('Newfoundland Standard Time', -12600),
('Norfolk Standard Time', 39600),
('North Asia East Standard Time', 28800),
('North Asia Standard Time', 25200),
('North Korea Standard Time', 30600),
('Omsk Standard Time', 21600),
('Pacific SA Standard Time', -10800),
('Pacific Standard Time', -28800),
('Pacific Standard Time (Mexico)', -28800),
('Pakistan Standard Time', 18000),
('Paraguay Standard Time', -14400),
('Qyzylorda Standard Time', 18000), # a.k.a. Kyzylorda, in Kazakhstan
('Romance Standard Time', 3600),
('Russia Time Zone 3', 14400),
('Russia Time Zone 10', 39600),
('Russia Time Zone 11', 43200),
('Russian Standard Time', 10800),
('SA Eastern Standard Time', -10800),
('SA Pacific Standard Time', -18000),
('SA Western Standard Time', -14400),
('Saint Pierre Standard Time', -10800), # New France
('Sakhalin Standard Time', 39600),
('Samoa Standard Time', 46800),
('Sao Tome Standard Time', 0),
('Saratov Standard Time', 14400),
('SE Asia Standard Time', 25200),
('Singapore Standard Time', 28800),
('South Africa Standard Time', 7200),
('South Sudan Standard Time', 7200),
('Sri Lanka Standard Time', 19800),
('Sudan Standard Time', 7200), # unless they mean South Sudan, +03:00
('Syria Standard Time', 7200),
('Taipei Standard Time', 28800),
('Tasmania Standard Time', 36000),
('Tocantins Standard Time', -10800),
('Tokyo Standard Time', 32400),
('Tomsk Standard Time', 25200),
('Tonga Standard Time', 46800),
('Transbaikal Standard Time', 32400), # Yakutsk
('Turkey Standard Time', 7200),
('Turks And Caicos Standard Time', -14400),
('Ulaanbaatar Standard Time', 28800),
('US Eastern Standard Time', -18000),
('US Mountain Standard Time', -25200),
('UTC-11', -39600),
('UTC-09', -32400),
('UTC-08', -28800),
('UTC-02', -7200),
('UTC', 0),
('UTC+12', 43200),
('UTC+13', 46800),
('Venezuela Standard Time', -16200),
('Vladivostok Standard Time', 36000),
('Volgograd Standard Time', 14400),
('W. Australia Standard Time', 28800),
('W. Central Africa Standard Time', 3600),
('W. Europe Standard Time', 3600),
('W. Mongolia Standard Time', 25200), # Hovd
('West Asia Standard Time', 18000),
('West Bank Standard Time', 7200),
('West Pacific Standard Time', 36000),
('Yakutsk Standard Time', 32400),
('Yukon Standard Time', -25200), # Non-DST Mountain Standard Time since 2020-11-01
)
# List of standard UTC IDs to use. Not public so may be safely changed.
# Do not remove IDs, as each entry is part of the API/behavior guarantee.
# ( UTC Id, Offset Seconds )
utcIdList = (
('UTC', 0), # Goes first so is default
('UTC-14:00', -50400),
('UTC-13:00', -46800),
('UTC-12:00', -43200),
('UTC-11:00', -39600),
('UTC-10:00', -36000),
('UTC-09:00', -32400),
('UTC-08:00', -28800),
('UTC-07:00', -25200),
('UTC-06:00', -21600),
('UTC-05:00', -18000),
('UTC-04:30', -16200),
('UTC-04:00', -14400),
('UTC-03:30', -12600),
('UTC-03:00', -10800),
('UTC-02:00', -7200),
('UTC-01:00', -3600),
('UTC-00:00', 0),
('UTC+00:00', 0),
('UTC+01:00', 3600),
('UTC+02:00', 7200),
('UTC+03:00', 10800),
('UTC+03:30', 12600),
('UTC+04:00', 14400),
('UTC+04:30', 16200),
('UTC+05:00', 18000),
('UTC+05:30', 19800),
('UTC+05:45', 20700),
('UTC+06:00', 21600),
('UTC+06:30', 23400),
('UTC+07:00', 25200),
('UTC+08:00', 28800),
('UTC+08:30', 30600),
('UTC+09:00', 32400),
('UTC+09:30', 34200),
('UTC+10:00', 36000),
('UTC+11:00', 39600),
('UTC+12:00', 43200),
('UTC+13:00', 46800),
('UTC+14:00', 50400),
)
### End of data that may need updates in response to CLDR ###
class ByteArrayData:
def __init__(self):
self.data = []
self.hash = {}
def append(self, s):
s = s + '\0'
if s in self.hash:
return self.hash[s]
lst = unicode2hex(s)
index = len(self.data)
if index > 0xffff:
raise Error(f'Index ({index}) outside the uint16 range !')
self.hash[s] = index
self.data += lst
return index
def write(self, out, name):
out(f'\nstatic constexpr char {name}[] = {{\n')
out(wrap_list(self.data))
out('\n};\n')
class ZoneIdWriter (SourceFileEditor):
def write(self, version, defaults, windowsIds):
self.__writeWarning(version)
windows, iana = self.__writeTables(self.writer.write, defaults, windowsIds)
windows.write(self.writer.write, 'windowsIdData')
iana.write(self.writer.write, 'ianaIdData')
def __writeWarning(self, version):
self.writer.write(f"""
/*
This part of the file was generated on {datetime.date.today()} from the
Common Locale Data Repository v{version} file supplemental/windowsZones.xml
http://www.unicode.org/cldr/
Do not edit this code: run cldr2qtimezone.py on updated (or
edited) CLDR data; see qtbase/util/locale_database/.
*/
""")
@staticmethod
def __writeTables(out, defaults, windowsIds):
windowsIdData, ianaIdData = ByteArrayData(), ByteArrayData()
# Write Windows/IANA table
out('// Windows ID Key, Territory Enum, IANA ID Index\n')
out('static constexpr QZoneData zoneDataTable[] = {\n')
for index, data in sorted(windowsIds.items()):
out(' {{ {:6d},{:6d},{:6d} }}, // {} / {}\n'.format(
data['windowsKey'], data['territoryId'],
ianaIdData.append(data['ianaList']),
data['windowsId'], data['territory']))
out('};\n\n')
# Write Windows ID key table
out('// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n')
out('static constexpr QWindowsData windowsDataTable[] = {\n')
for index, pair in enumerate(windowsIdList, 1):
out(' {{ {:6d},{:6d},{:6d},{:6d} }}, // {}\n'.format(
index,
windowsIdData.append(pair[0]),
ianaIdData.append(defaults[index]),
pair[1], pair[0]))
out('};\n\n')
# Write UTC ID key table
out('// IANA ID Index, UTC Offset\n')
out('static constexpr QUtcData utcDataTable[] = {\n')
for pair in utcIdList:
out(' {{ {:6d},{:6d} }}, // {}\n'.format(
ianaIdData.append(pair[0]), pair[1], pair[0]))
out('};\n')
return windowsIdData, ianaIdData
def main(out, err):
"""Parses CLDR's data and updates Qt's representation of it.
Takes sys.stdout, sys.stderr (or equivalents) as
arguments. Expects two command-line options: the root of the
unpacked CLDR data-file tree and the root of the qtbase module's
checkout. Updates QTimeZone's private data about Windows time-zone
IDs."""
parser = argparse.ArgumentParser(
description="Update Qt's CLDR-derived timezone data.")
parser.add_argument('cldr_path', help='path to the root of the CLDR tree')
parser.add_argument('qtbase_path',
help='path to the root of the qtbase source tree',
nargs='?', default=qtbase_root)
args = parser.parse_args()
cldrPath = Path(args.cldr_path)
qtPath = Path(args.qtbase_path)
if not qtPath.is_dir():
parser.error(f"No such Qt directory: {qtPath}")
if not cldrPath.is_dir():
parser.error(f"No such CLDR directory: {cldrPath}")
dataFilePath = qtPath.joinpath('src/corelib/time/qtimezoneprivate_data_p.h')
if not dataFilePath.is_file():
parser.error(f'No such file: {dataFilePath}')
try:
version, defaults, winIds = CldrAccess(cldrPath).readWindowsTimeZones(
dict((name, ind) for ind, name in enumerate((x[0] for x in windowsIdList), 1)))
except IOError as e:
parser.error(
f'Failed to open common/supplemental/windowsZones.xml: {e}')
return 1
except Error as e:
err.write('\n'.join(textwrap.wrap(
f'Failed to read windowsZones.xml: {e}',
subsequent_indent=' ', width=80)) + '\n')
return 1
out.write('Input file parsed, now writing data\n')
try:
with ZoneIdWriter(dataFilePath, qtPath) as writer:
writer.write(version, defaults, winIds)
except Exception as e:
err.write(f'\nError while updating timezone data: {e}\n')
return 1
out.write(f'Data generation completed, please check the new file at {dataFilePath}\n')
return 0
if __name__ == '__main__':
import sys
sys.exit(main(sys.stdout, sys.stderr))

View File

@ -0,0 +1,81 @@
# Copyright (C) 2016 The Qt Company Ltd.
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
import re
def _convert_pattern(pattern):
# patterns from http://www.unicode.org/reports/tr35/#Date_Format_Patterns
qt_regexps = {
r"yyy{3,}" : "yyyy", # more that three digits hence convert to four-digit year
r"L" : "M", # stand-alone month names. not supported.
r"g{1,}": "", # modified julian day. not supported.
r"S{1,}" : "", # fractional seconds. not supported.
r"A{1,}" : "" # milliseconds in day. not supported.
}
qt_patterns = {
"G" : "", "GG" : "", "GGG" : "", "GGGG" : "", "GGGGG" : "", # Era. not supported.
"y" : "yyyy", # four-digit year without leading zeroes
"Q" : "", "QQ" : "", "QQQ" : "", "QQQQ" : "", # quarter. not supported.
"q" : "", "qq" : "", "qqq" : "", "qqqq" : "", # quarter. not supported.
"MMMMM" : "MMM", # narrow month name.
"LLLLL" : "MMM", # stand-alone narrow month name.
"l" : "", # special symbol for chinese leap month. not supported.
"w" : "", "W" : "", # week of year/month. not supported.
"D" : "", "DD" : "", "DDD" : "", # day of year. not supported.
"F" : "", # day of week in month. not supported.
"E" : "ddd", "EE" : "ddd", "EEE" : "ddd", "EEEEE" : "ddd", "EEEE" : "dddd", # day of week
"e" : "ddd", "ee" : "ddd", "eee" : "ddd", "eeeee" : "ddd", "eeee" : "dddd", # local day of week
"c" : "ddd", "cc" : "ddd", "ccc" : "ddd", "ccccc" : "ddd", "cccc" : "dddd", # stand-alone local day of week
"a" : "AP", # AM/PM
"K" : "h", # Hour 0-11
"k" : "H", # Hour 1-24
"j" : "", # special reserved symbol.
"z" : "t", "zz" : "t", "zzz" : "t", "zzzz" : "t", # timezone
"Z" : "t", "ZZ" : "t", "ZZZ" : "t", "ZZZZ" : "t", # timezone
"v" : "t", "vv" : "t", "vvv" : "t", "vvvv" : "t", # timezone
"V" : "t", "VV" : "t", "VVV" : "t", "VVVV" : "t" # timezone
}
if pattern in qt_patterns:
return qt_patterns[pattern]
for r,v in qt_regexps.items():
pattern = re.sub(r, v, pattern)
return pattern
def convert_date(input):
result = ""
patterns = "GyYuQqMLlwWdDFgEecahHKkjmsSAzZvV"
last = ""
inquote = 0
chars_to_strip = " -"
for c in input:
if c == "'":
inquote = inquote + 1
if inquote % 2 == 0:
if c in patterns:
if not last:
last = c
else:
if c in last:
last += c
else:
# pattern changed
converted = _convert_pattern(last)
result += converted
if not converted:
result = result.rstrip(chars_to_strip)
last = c
continue
if last:
# pattern ended
converted = _convert_pattern(last)
result += converted
if not converted:
result = result.rstrip(chars_to_strip)
last = ""
result += c
if last:
converted = _convert_pattern(last)
result += converted
if not converted:
result = result.rstrip(chars_to_strip)
return result.lstrip(chars_to_strip)

View File

@ -0,0 +1,850 @@
# Copyright (C) 2021 The Qt Company Ltd.
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
# A run of cldr2qlocalexml.py will produce output reporting any
# language, script and territory codes it sees, in data, for which it
# can find a name (taken always from en.xml) that could potentially be
# used. There is no point adding a mapping for such a code unless the
# CLDR's common/main/ contains an XML file for at least one locale
# that exerciss it.
# Each *_list reflects the current values of its enums in qlocale.h;
# if new xml language files are available in CLDR, these languages and
# territories need to be *appended* to this list (for compatibility
# between versions). Include any spaces present in names (scripts
# shall squish them out for the enum entries) in *_list, but use the
# squished forms of names in the *_aliases mappings.
# For a new major version (and only then), we can change the
# numbering, so re-sort each list into alphabetic order (e.g. using
# sort -k2); but keep the Any and C entries first. That's why those
# are offset with a blank line, below. After doing that, regenerate
# locale data as usual; this will cause a binary-incompatible change.
# Note on "macrolanguage" comments: see "ISO 639 macrolanguage" on
# Wikipedia. A "macrolanguage" is (loosely-speaking) a group of
# languages so closely related to one another that they could also be
# regarded as divergent dialects of the macrolanguage.
language_map = {
0: ("AnyLanguage", " "),
1: ("C", " "),
2: ("Abkhazian", "ab"),
3: ("Afar", "aa"),
4: ("Afrikaans", "af"),
5: ("Aghem", "agq"),
6: ("Akan", "ak"), # macrolanguage
7: ("Akkadian", "akk"),
8: ("Akoose", "bss"),
9: ("Albanian", "sq"), # macrolanguage
10: ("American Sign Language", "ase"),
11: ("Amharic", "am"),
12: ("Ancient Egyptian", "egy"),
13: ("Ancient Greek", "grc"),
14: ("Arabic", "ar"), # macrolanguage
15: ("Aragonese", "an"),
16: ("Aramaic", "arc"),
17: ("Armenian", "hy"),
18: ("Assamese", "as"),
19: ("Asturian", "ast"),
20: ("Asu", "asa"),
21: ("Atsam", "cch"),
22: ("Avaric", "av"),
23: ("Avestan", "ae"),
24: ("Aymara", "ay"), # macrolanguage
25: ("Azerbaijani", "az"), # macrolanguage
26: ("Bafia", "ksf"),
27: ("Balinese", "ban"),
28: ("Bambara", "bm"),
29: ("Bamun", "bax"),
30: ("Bangla", "bn"),
31: ("Basaa", "bas"),
32: ("Bashkir", "ba"),
33: ("Basque", "eu"),
34: ("Batak Toba", "bbc"),
35: ("Belarusian", "be"),
36: ("Bemba", "bem"),
37: ("Bena", "bez"),
38: ("Bhojpuri", "bho"),
39: ("Bislama", "bi"),
40: ("Blin", "byn"),
41: ("Bodo", "brx"),
42: ("Bosnian", "bs"),
43: ("Breton", "br"),
44: ("Buginese", "bug"),
45: ("Bulgarian", "bg"),
46: ("Burmese", "my"),
47: ("Cantonese", "yue"),
48: ("Catalan", "ca"),
49: ("Cebuano", "ceb"),
50: ("Central Atlas Tamazight", "tzm"),
51: ("Central Kurdish", "ckb"),
52: ("Chakma", "ccp"),
53: ("Chamorro", "ch"),
54: ("Chechen", "ce"),
55: ("Cherokee", "chr"),
56: ("Chickasaw", "cic"),
57: ("Chiga", "cgg"),
58: ("Chinese", "zh"), # macrolanguage
59: ("Church", "cu"), # macrolanguage
60: ("Chuvash", "cv"),
61: ("Colognian", "ksh"),
62: ("Coptic", "cop"),
63: ("Cornish", "kw"),
64: ("Corsican", "co"),
65: ("Cree", "cr"), # macrolanguage
66: ("Croatian", "hr"),
67: ("Czech", "cs"),
68: ("Danish", "da"),
69: ("Divehi", "dv"),
70: ("Dogri", "doi"), # macrolanguage
71: ("Duala", "dua"),
72: ("Dutch", "nl"),
73: ("Dzongkha", "dz"),
74: ("Embu", "ebu"),
75: ("English", "en"),
76: ("Erzya", "myv"),
77: ("Esperanto", "eo"),
78: ("Estonian", "et"), # macrolanguage
79: ("Ewe", "ee" ),
80: ("Ewondo", "ewo"),
81: ("Faroese", "fo"),
82: ("Fijian", "fj"),
83: ("Filipino", "fil"),
84: ("Finnish", "fi"),
85: ("French", "fr"),
86: ("Friulian", "fur"),
87: ("Fulah", "ff"), # macrolanguage
88: ("Gaelic", "gd"),
89: ("Ga", "gaa"),
90: ("Galician", "gl"),
91: ("Ganda", "lg"),
92: ("Geez", "gez"),
93: ("Georgian", "ka"),
94: ("German", "de"),
95: ("Gothic", "got"),
96: ("Greek", "el"),
97: ("Guarani", "gn"), # macrolanguage
98: ("Gujarati", "gu"),
99: ("Gusii", "guz"),
100: ("Haitian", "ht"),
101: ("Hausa", "ha"),
102: ("Hawaiian", "haw"),
103: ("Hebrew", "he"),
104: ("Herero", "hz"),
105: ("Hindi", "hi"),
106: ("Hiri Motu", "ho"),
107: ("Hungarian", "hu"),
108: ("Icelandic", "is"),
109: ("Ido", "io"),
110: ("Igbo", "ig" ),
111: ("Inari Sami", "smn"),
112: ("Indonesian", "id"),
113: ("Ingush", "inh"),
114: ("Interlingua", "ia"),
115: ("Interlingue", "ie"),
116: ("Inuktitut", "iu"), # macrolanguage
117: ("Inupiaq", "ik"), # macrolanguage
118: ("Irish", "ga"),
119: ("Italian", "it"),
120: ("Japanese", "ja"),
121: ("Javanese", "jv"),
122: ("Jju", "kaj"),
123: ("Jola Fonyi", "dyo"),
124: ("Kabuverdianu", "kea"),
125: ("Kabyle", "kab"),
126: ("Kako", "kkj"),
127: ("Kalaallisut", "kl"),
128: ("Kalenjin", "kln"),
129: ("Kamba", "kam"),
130: ("Kannada", "kn"),
131: ("Kanuri", "kr"), # macrolanguage
132: ("Kashmiri", "ks"),
133: ("Kazakh", "kk"),
134: ("Kenyang", "ken"),
135: ("Khmer", "km"),
136: ("Kiche", "quc"),
137: ("Kikuyu", "ki"),
138: ("Kinyarwanda", "rw"),
139: ("Komi", "kv"), # macrolanguage
140: ("Kongo", "kg"), # macrolanguage
141: ("Konkani", "kok"),
142: ("Korean", "ko"),
143: ("Koro", "kfo"),
144: ("Koyraboro Senni", "ses"),
145: ("Koyra Chiini", "khq"),
146: ("Kpelle", "kpe"),
147: ("Kuanyama", "kj"),
148: ("Kurdish", "ku"), # macrolanguage
149: ("Kwasio", "nmg"),
150: ("Kyrgyz", "ky"),
151: ("Lakota", "lkt"),
152: ("Langi", "lag"),
153: ("Lao", "lo"),
154: ("Latin", "la"),
155: ("Latvian", "lv"), # macrolanguage
156: ("Lezghian", "lez"),
157: ("Limburgish", "li"),
158: ("Lingala", "ln"),
159: ("Literary Chinese", "lzh"),
160: ("Lithuanian", "lt"),
161: ("Lojban", "jbo"),
162: ("Lower Sorbian", "dsb"),
163: ("Low German", "nds"),
164: ("Luba Katanga", "lu"),
165: ("Lule Sami", "smj"),
166: ("Luo", "luo"),
167: ("Luxembourgish", "lb"),
168: ("Luyia", "luy"),
169: ("Macedonian", "mk"),
170: ("Machame", "jmc"),
171: ("Maithili", "mai"),
172: ("Makhuwa Meetto", "mgh"),
173: ("Makonde", "kde"),
174: ("Malagasy", "mg"), # macrolanguage
175: ("Malayalam", "ml"),
176: ("Malay", "ms"), # macrolanguage
177: ("Maltese", "mt"),
178: ("Mandingo", "man"), # macrolanguage
179: ("Manipuri", "mni"),
180: ("Manx", "gv"),
181: ("Maori", "mi"),
182: ("Mapuche", "arn"),
183: ("Marathi", "mr"),
184: ("Marshallese", "mh"),
185: ("Masai", "mas"),
186: ("Mazanderani", "mzn"),
187: ("Mende", "men"),
188: ("Meru", "mer"),
189: ("Meta", "mgo"),
190: ("Mohawk", "moh"),
191: ("Mongolian", "mn"), # macrolanguage
192: ("Morisyen", "mfe"),
193: ("Mundang", "mua"),
194: ("Muscogee", "mus"),
195: ("Nama", "naq"),
196: ("Nauru", "na"),
197: ("Navajo", "nv"),
198: ("Ndonga", "ng"),
199: ("Nepali", "ne"), # macrolanguage
200: ("Newari", "new"),
201: ("Ngiemboon", "nnh"),
202: ("Ngomba", "jgo"),
203: ("Nigerian Pidgin", "pcm"),
204: ("Nko", "nqo"),
205: ("Northern Luri", "lrc"),
206: ("Northern Sami", "se" ),
207: ("Northern Sotho", "nso"),
208: ("North Ndebele", "nd"),
209: ("Norwegian Bokmal", "nb"),
210: ("Norwegian Nynorsk", "nn"),
211: ("Nuer", "nus"),
212: ("Nyanja", "ny" ),
213: ("Nyankole", "nyn"),
214: ("Occitan", "oc"),
215: ("Odia", "or"), # macrolanguage
216: ("Ojibwa", "oj"), # macrolanguage
217: ("Old Irish", "sga"),
218: ("Old Norse", "non"),
219: ("Old Persian", "peo"),
220: ("Oromo", "om"), # macrolanguage
221: ("Osage", "osa"),
222: ("Ossetic", "os"),
223: ("Pahlavi", "pal"),
224: ("Palauan", "pau"),
225: ("Pali", "pi"), # macrolanguage
226: ("Papiamento", "pap"),
227: ("Pashto", "ps"), # macrolanguage
228: ("Persian", "fa"), # macrolanguage
229: ("Phoenician", "phn"),
230: ("Polish", "pl"),
231: ("Portuguese", "pt"),
232: ("Prussian", "prg"),
233: ("Punjabi", "pa"),
234: ("Quechua", "qu"), # macrolanguage
235: ("Romanian", "ro"),
236: ("Romansh", "rm"),
237: ("Rombo", "rof"),
238: ("Rundi", "rn"),
239: ("Russian", "ru"),
240: ("Rwa", "rwk"),
241: ("Saho", "ssy"),
242: ("Sakha", "sah"),
243: ("Samburu", "saq"),
244: ("Samoan", "sm"),
245: ("Sango", "sg"),
246: ("Sangu", "sbp"),
247: ("Sanskrit", "sa"),
248: ("Santali", "sat"),
249: ("Sardinian", "sc"), # macrolanguage
250: ("Saurashtra", "saz"),
251: ("Sena", "seh"),
252: ("Serbian", "sr"),
253: ("Shambala", "ksb"),
254: ("Shona", "sn"),
255: ("Sichuan Yi", "ii" ),
256: ("Sicilian", "scn"),
257: ("Sidamo", "sid"),
258: ("Silesian", "szl"),
259: ("Sindhi", "sd"),
260: ("Sinhala", "si"),
261: ("Skolt Sami", "sms"),
262: ("Slovak", "sk"),
263: ("Slovenian", "sl"),
264: ("Soga", "xog"),
265: ("Somali", "so"),
266: ("Southern Kurdish", "sdh"),
267: ("Southern Sami", "sma"),
268: ("Southern Sotho", "st"),
269: ("South Ndebele", "nr" ),
270: ("Spanish", "es"),
271: ("Standard Moroccan Tamazight", "zgh"),
272: ("Sundanese", "su"),
273: ("Swahili", "sw"), # macrolanguage
274: ("Swati", "ss"),
275: ("Swedish", "sv"),
276: ("Swiss German", "gsw"),
277: ("Syriac", "syr"),
278: ("Tachelhit", "shi"),
279: ("Tahitian", "ty"),
280: ("Tai Dam", "blt"),
281: ("Taita", "dav"),
282: ("Tajik", "tg"),
283: ("Tamil", "ta"),
284: ("Taroko", "trv"),
285: ("Tasawaq", "twq"),
286: ("Tatar", "tt"),
287: ("Telugu", "te"),
288: ("Teso", "teo"),
289: ("Thai", "th"),
290: ("Tibetan", "bo"),
291: ("Tigre", "tig"),
292: ("Tigrinya", "ti"),
293: ("Tokelau", "tkl"),
294: ("Tok Pisin", "tpi"),
295: ("Tongan", "to"),
296: ("Tsonga", "ts"),
297: ("Tswana", "tn"),
298: ("Turkish", "tr"),
299: ("Turkmen", "tk"),
300: ("Tuvalu", "tvl"),
301: ("Tyap", "kcg"),
302: ("Ugaritic", "uga"),
303: ("Ukrainian", "uk"),
304: ("Upper Sorbian", "hsb"),
305: ("Urdu", "ur"),
306: ("Uyghur", "ug"),
307: ("Uzbek", "uz"), # macrolanguage
308: ("Vai", "vai"),
309: ("Venda", "ve" ),
310: ("Vietnamese", "vi"),
311: ("Volapuk", "vo"),
312: ("Vunjo", "vun"),
313: ("Walloon", "wa"),
314: ("Walser", "wae"),
315: ("Warlpiri", "wbp"),
316: ("Welsh", "cy"),
317: ("Western Balochi", "bgn"),
318: ("Western Frisian", "fy"),
319: ("Wolaytta", "wal"),
320: ("Wolof", "wo"),
321: ("Xhosa", "xh"),
322: ("Yangben", "yav"),
323: ("Yiddish", "yi"), # macrolanguage
324: ("Yoruba", "yo"),
325: ("Zarma", "dje"),
326: ("Zhuang", "za"), # macrolanguage
327: ("Zulu", "zu"),
# added in CLDR v40
328: ("Kaingang", "kgp"),
329: ("Nheengatu", "yrl"),
# added in CLDR v42
330: ("Haryanvi", "bgc"),
331: ("Moksha", "mdf"),
332: ("Northern Frisian", "frr"),
333: ("Obolo", "ann"),
334: ("Pijin", "pis"),
335: ("Rajasthani", "raj"),
336: ("Toki Pona", "tok"),
}
language_aliases = {
# Renamings prior to Qt 6.0 (CLDR v37):
'Afan': 'Oromo',
'Byelorussian': 'Belarusian',
'Bhutani': 'Dzongkha',
'Cambodian': 'Khmer',
'Kurundi': 'Rundi',
'RhaetoRomance': 'Romansh',
'Chewa': 'Nyanja',
'Frisian': 'WesternFrisian',
'Uigur': 'Uyghur',
# Renamings:
'Uighur': 'Uyghur',
'Kwanyama': 'Kuanyama',
'Inupiak': 'Inupiaq',
'Bengali': 'Bangla',
'CentralMoroccoTamazight': 'CentralAtlasTamazight',
'Greenlandic': 'Kalaallisut',
'Walamo': 'Wolaytta',
'Navaho': 'Navajo',
'Oriya': 'Odia',
'Kirghiz': 'Kyrgyz'
}
territory_map = {
0: ("AnyTerritory", "ZZ"),
1: ("Afghanistan", "AF"),
2: ("Aland Islands", "AX"),
3: ("Albania", "AL"),
4: ("Algeria", "DZ"),
5: ("American Samoa", "AS"),
6: ("Andorra", "AD"),
7: ("Angola", "AO"),
8: ("Anguilla", "AI"),
9: ("Antarctica", "AQ"),
10: ("Antigua And Barbuda", "AG"),
11: ("Argentina", "AR"),
12: ("Armenia", "AM"),
13: ("Aruba", "AW"),
14: ("Ascension Island", "AC"),
15: ("Australia", "AU"),
16: ("Austria", "AT"),
17: ("Azerbaijan", "AZ"),
18: ("Bahamas", "BS"),
19: ("Bahrain", "BH"),
20: ("Bangladesh", "BD"),
21: ("Barbados", "BB"),
22: ("Belarus", "BY"),
23: ("Belgium", "BE"),
24: ("Belize", "BZ"),
25: ("Benin", "BJ"),
26: ("Bermuda", "BM"),
27: ("Bhutan", "BT"),
28: ("Bolivia", "BO"),
29: ("Bosnia And Herzegovina", "BA"),
30: ("Botswana", "BW"),
31: ("Bouvet Island", "BV"),
32: ("Brazil", "BR"),
33: ("British Indian Ocean Territory", "IO"),
34: ("British Virgin Islands", "VG"),
35: ("Brunei", "BN"),
36: ("Bulgaria", "BG"),
37: ("Burkina Faso", "BF"),
38: ("Burundi", "BI"),
39: ("Cambodia", "KH"),
40: ("Cameroon", "CM"),
41: ("Canada", "CA"),
42: ("Canary Islands", "IC"),
43: ("Cape Verde", "CV"),
44: ("Caribbean Netherlands", "BQ"),
45: ("Cayman Islands", "KY"),
46: ("Central African Republic", "CF"),
47: ("Ceuta And Melilla", "EA"),
48: ("Chad", "TD"),
49: ("Chile", "CL"),
50: ("China", "CN"),
51: ("Christmas Island", "CX"),
52: ("Clipperton Island", "CP"),
53: ("Cocos Islands", "CC"),
54: ("Colombia", "CO"),
55: ("Comoros", "KM"),
56: ("Congo Brazzaville", "CG"),
57: ("Congo Kinshasa", "CD"),
58: ("Cook Islands", "CK"),
59: ("Costa Rica", "CR"),
60: ("Croatia", "HR"),
61: ("Cuba", "CU"),
62: ("Curacao", "CW"),
63: ("Cyprus", "CY"),
64: ("Czechia", "CZ"),
65: ("Denmark", "DK"),
66: ("Diego Garcia", "DG"),
67: ("Djibouti", "DJ"),
68: ("Dominica", "DM"),
69: ("Dominican Republic", "DO"),
70: ("Ecuador", "EC"),
71: ("Egypt", "EG"),
72: ("El Salvador", "SV"),
73: ("Equatorial Guinea", "GQ"),
74: ("Eritrea", "ER"),
75: ("Estonia", "EE"),
76: ("Eswatini", "SZ"),
77: ("Ethiopia", "ET"),
78: ("Europe", "150"),
79: ("European Union", "EU"),
80: ("Falkland Islands", "FK"),
81: ("Faroe Islands", "FO"),
82: ("Fiji", "FJ"),
83: ("Finland", "FI"),
84: ("France", "FR"),
85: ("French Guiana", "GF"),
86: ("French Polynesia", "PF"),
87: ("French Southern Territories", "TF"),
88: ("Gabon", "GA"),
89: ("Gambia", "GM"),
90: ("Georgia", "GE"),
91: ("Germany", "DE"),
92: ("Ghana", "GH"),
93: ("Gibraltar", "GI"),
94: ("Greece", "GR"),
95: ("Greenland", "GL"),
96: ("Grenada", "GD"),
97: ("Guadeloupe", "GP"),
98: ("Guam", "GU"),
99: ("Guatemala", "GT"),
100: ("Guernsey", "GG"),
101: ("Guinea Bissau", "GW"),
102: ("Guinea", "GN"),
103: ("Guyana", "GY"),
104: ("Haiti", "HT"),
105: ("Heard And McDonald Islands", "HM"),
106: ("Honduras", "HN"),
107: ("Hong Kong", "HK"),
108: ("Hungary", "HU"),
109: ("Iceland", "IS"),
110: ("India", "IN"),
111: ("Indonesia", "ID"),
112: ("Iran", "IR"),
113: ("Iraq", "IQ"),
114: ("Ireland", "IE"),
115: ("Isle Of Man", "IM"),
116: ("Israel", "IL"),
117: ("Italy", "IT"),
# Officially Côte dIvoire, which we'd ned to map to CotedIvoire
# or CoteDIvoire, either failing to make the d' separate from
# Cote or messing with its case. So stick with Ivory Coast:
118: ("Ivory Coast", "CI"),
119: ("Jamaica", "JM"),
120: ("Japan", "JP"),
121: ("Jersey", "JE"),
122: ("Jordan", "JO"),
123: ("Kazakhstan", "KZ"),
124: ("Kenya", "KE"),
125: ("Kiribati", "KI"),
126: ("Kosovo", "XK"),
127: ("Kuwait", "KW"),
128: ("Kyrgyzstan", "KG"),
129: ("Laos", "LA"),
130: ("Latin America", "419"),
131: ("Latvia", "LV"),
132: ("Lebanon", "LB"),
133: ("Lesotho", "LS"),
134: ("Liberia", "LR"),
135: ("Libya", "LY"),
136: ("Liechtenstein", "LI"),
137: ("Lithuania", "LT"),
138: ("Luxembourg", "LU"),
139: ("Macao", "MO"),
140: ("Macedonia", "MK"),
141: ("Madagascar", "MG"),
142: ("Malawi", "MW"),
143: ("Malaysia", "MY"),
144: ("Maldives", "MV"),
145: ("Mali", "ML"),
146: ("Malta", "MT"),
147: ("Marshall Islands", "MH"),
148: ("Martinique", "MQ"),
149: ("Mauritania", "MR"),
150: ("Mauritius", "MU"),
151: ("Mayotte", "YT"),
152: ("Mexico", "MX"),
153: ("Micronesia", "FM"),
154: ("Moldova", "MD"),
155: ("Monaco", "MC"),
156: ("Mongolia", "MN"),
157: ("Montenegro", "ME"),
158: ("Montserrat", "MS"),
159: ("Morocco", "MA"),
160: ("Mozambique", "MZ"),
161: ("Myanmar", "MM"),
162: ("Namibia", "NA"),
163: ("Nauru", "NR"),
164: ("Nepal", "NP"),
165: ("Netherlands", "NL"),
166: ("New Caledonia", "NC"),
167: ("New Zealand", "NZ"),
168: ("Nicaragua", "NI"),
169: ("Nigeria", "NG"),
170: ("Niger", "NE"),
171: ("Niue", "NU"),
172: ("Norfolk Island", "NF"),
173: ("Northern Mariana Islands", "MP"),
174: ("North Korea", "KP"),
175: ("Norway", "NO"),
176: ("Oman", "OM"),
177: ("Outlying Oceania", "QO"),
178: ("Pakistan", "PK"),
179: ("Palau", "PW"),
180: ("Palestinian Territories", "PS"),
181: ("Panama", "PA"),
182: ("Papua New Guinea", "PG"),
183: ("Paraguay", "PY"),
184: ("Peru", "PE"),
185: ("Philippines", "PH"),
186: ("Pitcairn", "PN"),
187: ("Poland", "PL"),
188: ("Portugal", "PT"),
189: ("Puerto Rico", "PR"),
190: ("Qatar", "QA"),
191: ("Reunion", "RE"),
192: ("Romania", "RO"),
193: ("Russia", "RU"),
194: ("Rwanda", "RW"),
195: ("Saint Barthelemy", "BL"),
196: ("Saint Helena", "SH"),
197: ("Saint Kitts And Nevis", "KN"),
198: ("Saint Lucia", "LC"),
199: ("Saint Martin", "MF"),
200: ("Saint Pierre And Miquelon", "PM"),
201: ("Saint Vincent And Grenadines", "VC"),
202: ("Samoa", "WS"),
203: ("San Marino", "SM"),
204: ("Sao Tome And Principe", "ST"),
205: ("Saudi Arabia", "SA"),
206: ("Senegal", "SN"),
207: ("Serbia", "RS"),
208: ("Seychelles", "SC"),
209: ("Sierra Leone", "SL"),
210: ("Singapore", "SG"),
211: ("Sint Maarten", "SX"),
212: ("Slovakia", "SK"),
213: ("Slovenia", "SI"),
214: ("Solomon Islands", "SB"),
215: ("Somalia", "SO"),
216: ("South Africa", "ZA"),
217: ("South Georgia And South Sandwich Islands", "GS"),
218: ("South Korea", "KR"),
219: ("South Sudan", "SS"),
220: ("Spain", "ES"),
221: ("Sri Lanka", "LK"),
222: ("Sudan", "SD"),
223: ("Suriname", "SR"),
224: ("Svalbard And Jan Mayen", "SJ"),
225: ("Sweden", "SE"),
226: ("Switzerland", "CH"),
227: ("Syria", "SY"),
228: ("Taiwan", "TW"),
229: ("Tajikistan", "TJ"),
230: ("Tanzania", "TZ"),
231: ("Thailand", "TH"),
232: ("Timor-Leste", "TL"),
233: ("Togo", "TG"),
234: ("Tokelau", "TK"),
235: ("Tonga", "TO"),
236: ("Trinidad And Tobago", "TT"),
237: ("Tristan Da Cunha", "TA"),
238: ("Tunisia", "TN"),
239: ("Turkey", "TR"),
240: ("Turkmenistan", "TM"),
241: ("Turks And Caicos Islands", "TC"),
242: ("Tuvalu", "TV"),
243: ("Uganda", "UG"),
244: ("Ukraine", "UA"),
245: ("United Arab Emirates", "AE"),
246: ("United Kingdom", "GB"),
247: ("United States Outlying Islands", "UM"),
248: ("United States", "US"),
249: ("United States Virgin Islands", "VI"),
250: ("Uruguay", "UY"),
251: ("Uzbekistan", "UZ"),
252: ("Vanuatu", "VU"),
253: ("Vatican City", "VA"),
254: ("Venezuela", "VE"),
255: ("Vietnam", "VN"),
256: ("Wallis And Futuna", "WF"),
257: ("Western Sahara", "EH"),
258: ("World", "001"),
259: ("Yemen", "YE"),
260: ("Zambia", "ZM"),
261: ("Zimbabwe", "ZW"),
}
territory_aliases = {
# Renamings prior to Qt 6.0 (CLDR v37):
'DemocraticRepublicOfCongo': 'CongoKinshasa',
'PeoplesRepublicOfCongo': 'CongoBrazzaville',
'DemocraticRepublicOfKorea': 'NorthKorea',
'RepublicOfKorea': 'SouthKorea',
'RussianFederation': 'Russia',
'SyrianArabRepublic': 'Syria',
'LatinAmericaAndTheCaribbean': 'LatinAmerica',
# Renamings:
'EastTimor': 'TimorLeste',
'Bonaire': 'CaribbeanNetherlands',
'Macau': 'Macao',
'SouthGeorgiaAndTheSouthSandwichIslands': 'SouthGeorgiaAndSouthSandwichIslands',
'WallisAndFutunaIslands': 'WallisAndFutuna',
'SaintVincentAndTheGrenadines': 'SaintVincentAndGrenadines',
'BosniaAndHerzegowina': 'BosniaAndHerzegovina',
'SvalbardAndJanMayenIslands': 'SvalbardAndJanMayen',
'VaticanCityState': 'VaticanCity',
'Swaziland': 'Eswatini',
'UnitedStatesMinorOutlyingIslands': 'UnitedStatesOutlyingIslands',
'CuraSao': 'Curacao',
'CzechRepublic': 'Czechia',
# Backwards compatibility with old Country enum, prior to Qt 6.2:
'AnyCountry': 'AnyTerritory',
'NauruCountry': 'NauruTerritory',
'TokelauCountry': 'TokelauTerritory',
'TuvaluCountry': 'TuvaluTerritory',
}
script_map = {
0: ("AnyScript", "Zzzz"),
1: ("Adlam", "Adlm"),
2: ("Ahom", "Ahom"),
3: ("Anatolian Hieroglyphs", "Hluw"),
4: ("Arabic", "Arab"),
5: ("Armenian", "Armn"),
6: ("Avestan", "Avst"),
7: ("Balinese", "Bali"),
8: ("Bamum", "Bamu"),
9: ("Bangla", "Beng"),
10: ("Bassa Vah", "Bass"),
11: ("Batak", "Batk"),
12: ("Bhaiksuki", "Bhks"),
13: ("Bopomofo", "Bopo"),
14: ("Brahmi", "Brah"),
15: ("Braille", "Brai"),
16: ("Buginese", "Bugi"),
17: ("Buhid", "Buhd"),
18: ("Canadian Aboriginal", "Cans"),
19: ("Carian", "Cari"),
20: ("Caucasian Albanian", "Aghb"),
21: ("Chakma", "Cakm"),
22: ("Cham", "Cham"),
23: ("Cherokee", "Cher"),
24: ("Coptic", "Copt"),
25: ("Cuneiform", "Xsux"),
26: ("Cypriot", "Cprt"),
27: ("Cyrillic", "Cyrl"),
28: ("Deseret", "Dsrt"),
29: ("Devanagari", "Deva"),
30: ("Duployan", "Dupl"),
31: ("Egyptian Hieroglyphs", "Egyp"),
32: ("Elbasan", "Elba"),
33: ("Ethiopic", "Ethi"),
34: ("Fraser", "Lisu"),
35: ("Georgian", "Geor"),
36: ("Glagolitic", "Glag"),
37: ("Gothic", "Goth"),
38: ("Grantha", "Gran"),
39: ("Greek", "Grek"),
40: ("Gujarati", "Gujr"),
41: ("Gurmukhi", "Guru"),
42: ("Hangul", "Hang"),
43: ("Han", "Hani"),
44: ("Hanunoo", "Hano"),
45: ("Han with Bopomofo", "Hanb"),
46: ("Hatran", "Hatr"),
47: ("Hebrew", "Hebr"),
48: ("Hiragana", "Hira"),
49: ("Imperial Aramaic", "Armi"),
50: ("Inscriptional Pahlavi", "Phli"),
51: ("Inscriptional Parthian", "Prti"),
52: ("Jamo", "Jamo"),
53: ("Japanese", "Jpan"),
54: ("Javanese", "Java"),
55: ("Kaithi", "Kthi"),
56: ("Kannada", "Knda"),
57: ("Katakana", "Kana"),
58: ("Kayah Li", "Kali"),
59: ("Kharoshthi", "Khar"),
60: ("Khmer", "Khmr"),
61: ("Khojki", "Khoj"),
62: ("Khudawadi", "Sind"),
63: ("Korean", "Kore"),
64: ("Lanna", "Lana"),
65: ("Lao", "Laoo"),
66: ("Latin", "Latn"),
67: ("Lepcha", "Lepc"),
68: ("Limbu", "Limb"),
69: ("Linear A", "Lina"),
70: ("Linear B", "Linb"),
71: ("Lycian", "Lyci"),
72: ("Lydian", "Lydi"),
73: ("Mahajani", "Mahj"),
74: ("Malayalam", "Mlym"),
75: ("Mandaean", "Mand"),
76: ("Manichaean", "Mani"),
77: ("Marchen", "Marc"),
78: ("Meitei Mayek", "Mtei"),
79: ("Mende", "Mend"),
80: ("Meroitic Cursive", "Merc"),
81: ("Meroitic", "Mero"),
82: ("Modi", "Modi"),
83: ("Mongolian", "Mong"),
84: ("Mro", "Mroo"),
85: ("Multani", "Mult"),
86: ("Myanmar", "Mymr"),
87: ("Nabataean", "Nbat"),
88: ("Newa", "Newa"),
89: ("New Tai Lue", "Talu"),
90: ("Nko", "Nkoo"),
91: ("Odia", "Orya"),
92: ("Ogham", "Ogam"),
93: ("Ol Chiki", "Olck"),
94: ("Old Hungarian", "Hung"),
95: ("Old Italic", "Ital"),
96: ("Old North Arabian", "Narb"),
97: ("Old Permic", "Perm"),
98: ("Old Persian", "Xpeo"),
99: ("Old South Arabian", "Sarb"),
100: ("Orkhon", "Orkh"),
101: ("Osage", "Osge"),
102: ("Osmanya", "Osma"),
103: ("Pahawh Hmong", "Hmng"),
104: ("Palmyrene", "Palm"),
105: ("Pau Cin Hau", "Pauc"),
106: ("Phags Pa", "Phag"),
107: ("Phoenician", "Phnx"),
108: ("Pollard Phonetic", "Plrd"),
109: ("Psalter Pahlavi", "Phlp"),
110: ("Rejang", "Rjng"),
111: ("Runic", "Runr"),
112: ("Samaritan", "Samr"),
113: ("Saurashtra", "Saur"),
114: ("Sharada", "Shrd"),
115: ("Shavian", "Shaw"),
116: ("Siddham", "Sidd"),
117: ("Sign Writing", "Sgnw"),
118: ("Simplified Han", "Hans"),
119: ("Sinhala", "Sinh"),
120: ("Sora Sompeng", "Sora"),
121: ("Sundanese", "Sund"),
122: ("Syloti Nagri", "Sylo"),
123: ("Syriac", "Syrc"),
124: ("Tagalog", "Tglg"),
125: ("Tagbanwa", "Tagb"),
126: ("Tai Le", "Tale"),
127: ("Tai Viet", "Tavt"),
128: ("Takri", "Takr"),
129: ("Tamil", "Taml"),
130: ("Tangut", "Tang"),
131: ("Telugu", "Telu"),
132: ("Thaana", "Thaa"),
133: ("Thai", "Thai"),
134: ("Tibetan", "Tibt"),
135: ("Tifinagh", "Tfng"),
136: ("Tirhuta", "Tirh"),
137: ("Traditional Han", "Hant"),
138: ("Ugaritic", "Ugar"),
139: ("Vai", "Vaii"),
140: ("Varang Kshiti", "Wara"),
141: ("Yi", "Yiii"),
}
script_aliases = {
# Renamings prior to Qt 6.0 (CLDR v37):
'SimplifiedChineseScript': 'SimplifiedHanScript',
'TraditionalChineseScript': 'TraditionalHanScript',
# Renamings:
'OriyaScript': 'OdiaScript',
'MendeKikakuiScript': 'MendeScript',
'BengaliScript': 'BanglaScript',
}

View File

@ -0,0 +1,23 @@
d
dd
ddd
dddd
M
MM
MMM
MMMM
yy
yyyy
h the hour without a leading zero (0 to 23 or 1 to 12 if AM/PM display)
hh the hour with a leading zero (00 to 23 or 01 to 12 if AM/PM display)
H the hour without a leading zero (0 to 23, even with AM/PM display)
HH the hour with a leading zero (00 to 23, even with AM/PM display)
m
mm
s
ss
z the milliseconds without leading zeroes (0 to 999)
zzz the milliseconds with leading zeroes (000 to 999)
AP or A interpret as an AM/PM time. AP must be either "AM" or "PM"
ap or a Interpret as an AM/PM time. ap must be either "am" or "pm"
t time zone

View File

@ -0,0 +1,80 @@
# Copyright (C) 2021 The Qt Company Ltd.
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
from dataclasses import dataclass
from typing import Dict, Optional
@dataclass
class LanguageCodeEntry:
part3Code: str
part2BCode: Optional[str]
part2TCode: Optional[str]
part1Code: Optional[str]
def id(self) -> str:
if self.part1Code:
return self.part1Code
if self.part2BCode:
return self.part2BCode
return self.part3Code
def __repr__(self) -> str:
parts = [f'{self.__class__.__name__}({self.id()!r}, part3Code={self.part3Code!r}']
if self.part2BCode is not None and self.part2BCode != self.part3Code:
parts.append(f', part2BCode={self.part2BCode!r}')
if self.part2TCode != self.part2BCode:
parts.append(f', part2TCode={self.part2TCode!r}')
if self.part1Code is not None:
parts.append(f', part1Code={self.part1Code!r}')
parts.append(')')
return ''.join(parts)
class LanguageCodeData:
"""
Representation of ISO639-2 language code data.
"""
def __init__(self, fileName: str):
"""
Construct the object populating the data from the given file.
"""
self.__codeMap: Dict[str, LanguageCodeEntry] = {}
with open(fileName, 'r', encoding='utf-8') as stream:
stream.readline() # skip the header
for line in stream.readlines():
part3Code, part2BCode, part2TCode, part1Code, _ = line.split('\t', 4)
# sanity checks
assert all(p.isascii() for p in (part3Code, part2BCode, part2TCode, part1Code)), \
f'Non-ascii characters in code names: {part3Code!r} {part2BCode!r} '\
f'{part2TCode!r} {part1Code!r}'
assert len(part3Code) == 3, f'Invalid Part 3 code length for {part3Code!r}'
assert not part1Code or len(part1Code) == 2, \
f'Invalid Part 1 code length for {part3Code!r}: {part1Code!r}'
assert not part2BCode or len(part2BCode) == 3, \
f'Invalid Part 2B code length for {part3Code!r}: {part2BCode!r}'
assert not part2TCode or len(part2TCode) == 3, \
f'Invalid Part 2T code length for {part3Code!r}: {part2TCode!r}'
assert (part2BCode == '') == (part2TCode == ''), \
f'Only one Part 2 code is specified for {part3Code!r}: ' \
f'{part2BCode!r} vs {part2TCode!r}'
assert not part2TCode or part2TCode == part3Code, \
f'Part 3 code {part3Code!r} does not match Part 2T code {part2TCode!r}'
entry = LanguageCodeEntry(part3Code, part2BCode or None,
part2TCode or None, part1Code or None)
self.__codeMap[entry.id()] = entry
def query(self, code: str) -> Optional[LanguageCodeEntry]:
"""
Lookup the entry with the given code and return it.
The entries can be looked up by using either the Alpha2 code or the bibliographical
Alpha3 code.
"""
return self.__codeMap.get(code)

View File

@ -0,0 +1,599 @@
# Copyright (C) 2020 The Qt Company Ltd.
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
"""Parsing the Locale Data Markup Language
It's an XML format, so the raw parsing of XML is, of course, delegated
to xml.dom.minidom; but it has its own specific schemata and some
funky rules for combining data from various files (inheritance between
locales). The use of it we're interested in is extraction of CLDR's
data, so some of the material here is specific to CLDR; see cldr.py
for how it is mainly used.
Provides various classes to wrap xml.dom's objects, specifically those
returned by minidom.parse() and their child-nodes:
Node -- wraps any node in the DOM tree
XmlScanner -- wraps the root element of a stand-alone XML file
Supplement -- specializes XmlScanner for supplemental data files
LocaleScanner -- wraps a locale's inheritance-chain of file roots
See individual classes for further detail.
"""
from localetools import Error
from dateconverter import convert_date
class Node (object):
"""Wrapper for an arbitrary DOM node.
Provides various ways to select chldren of a node. Selected child
nodes are returned wrapped as Node objects. A Node exposes the
raw DOM node it wraps via its .dom attribute."""
def __init__(self, elt, dullAttrs = None, draft = 0):
"""Wraps a DOM node for ease of access.
First argument, elt, is the DOM node to wrap.
Optional second argument, dullAttrs, should either be None or
map each LDML tag name to a list of the names of
non-distinguishing attributes for nodes with the given tag
name. If None is given, no distinguishing attribute checks are
performed.
(Optional third argument, draft, should only be supplied by
this class's creation of child nodes; it is the maximum draft
score of any ancestor of the new node.)"""
self.dom, self.__dull = elt, dullAttrs
try:
attr = elt.attributes['draft'].nodeValue
except KeyError:
self.draft = draft
else:
self.draft = max(draft, self.draftScore(attr))
def findAllChildren(self, tag, wanted = None, allDull = False):
"""All children that do have the given tag and attributes.
First argument is the tag: children with any other tag are
ignored.
Optional second argument, wanted, should either be None or map
attribute names to the values they must have. Only child nodes
with thes attributes set to the given values are yielded.
By default, nodes that have distinguishing attributes, other
than those specified in wanted, are ignored. Pass the allDull
parameter a true value to suppress this check."""
if self.__dull is None:
allDull = True
dull = () if allDull else self.__dull[tag]
for child in self.dom.childNodes:
if child.nodeType != child.ELEMENT_NODE:
continue
if child.nodeName != tag:
continue
if wanted:
try:
if any(child.attributes[k].nodeValue != v
for k, v in wanted.items()):
continue
except KeyError: # Some wanted attribute is missing
continue
if not (allDull or all(k in dull or k in wanted
for k in child.attributes.keys())):
continue
elif not (allDull or all(k in dull
for k in child.attributes.keys())):
continue
yield Node(child, self.__dull, self.draft)
def findUniqueChild(self, tag):
"""Returns the single child with the given nodeName.
Raises Error if there is no such child or there is more than
one."""
seq = self.findAllChildren(tag)
try:
node = next(seq)
except StopIteration:
raise Error('No child found where one was expected', tag)
for it in seq:
raise Error('Many children found where only one was expected', tag)
return node
@classmethod
def draftScore(cls, level):
"""Maps draft level names to numeric scores.
Single parameter, level, is the least sure value of the draft
attribute on a node that you're willing to accept; returns a
numeric value (lower is less drafty).
Tempting as it is to insist on low draft scores, there are
many locales in which pretty much every leaf is
unconfirmed. It may make sense to actually check each
XmlScanner object, or each node in each LocaleScanner's nodes
list, to see what its distribution of draft level looks like,
so as to set the acceptable draft score for its elements
accordingly. However, for the moment, we mostly just accept
all elements, regardless of draft values (the one exception is
am/pm indicators)."""
return cls.__draftScores.get(level, 5) if level else 0
# Implementation details:
__draftScores = dict(true = 4, unconfirmed = 3, provisional = 2,
contributed = 1, approved = 0, false = 0)
def _parseXPath(selector):
# Split "tag[attr=val][...]" into tag-name and attribute mapping
attrs = selector.split('[')
name = attrs.pop(0)
if attrs:
attrs = [x.strip() for x in attrs]
assert all(x.endswith(']') for x in attrs)
attrs = [x[:-1].split('=') for x in attrs]
assert all(len(x) in (1, 2) for x in attrs)
attrs = (('type', x[0]) if len(x) == 1 else x for x in attrs)
return name, dict(attrs)
def _iterateEach(iters):
# Flatten a two-layer iterator.
for it in iters:
for item in it:
yield item
class XmlScanner (object):
"""Wrap an XML file to enable XPath access to its nodes.
"""
def __init__(self, node):
self.root = node
def findNodes(self, xpath):
"""Return all nodes under self.root matching this xpath.
Ignores any excess attributes."""
elts = (self.root,)
for selector in xpath.split('/'):
tag, attrs = _parseXPath(selector)
elts = tuple(_iterateEach(e.findAllChildren(tag, attrs) for e in elts))
if not elts:
break
return elts
class Supplement (XmlScanner):
def find(self, xpath):
elts = self.findNodes(xpath)
for elt in _iterateEach(e.dom.childNodes if e.dom.childNodes else (e.dom,)
for e in elts):
if elt.attributes:
yield (elt.nodeName,
dict((k, v if isinstance(v, str) else v.nodeValue)
for k, v in elt.attributes.items()))
class LocaleScanner (object):
def __init__(self, name, nodes, root):
self.name, self.nodes, self.base = name, nodes, root
def find(self, xpath, default = None, draft = None):
"""XPath search for the content of an element.
Required argument, xpath, is the XPath to search for. Optional
second argument is a default value to use, if no such node is
found. Optional third argument is a draft score (see
Node.draftScore() for details); if given, leaf elements with
higher draft scores are ignored."""
try:
for elt in self.__find(xpath):
try:
if draft is None or elt.draft <= draft:
return elt.dom.firstChild.nodeValue
except (AttributeError, KeyError):
pass
except Error as e:
if default is None:
raise
return default
def tagCodes(self):
"""Yields four tag codes
The tag codes are language, script, territory and variant; an
empty value for any of them indicates that no value was
provided. The values are obtained from the primary file's
top-level <identity> element. An Error is raised if any
top-level <alias> element of this file has a non-empty source
attribute; that attribute value is mentioned in the error's
message."""
root = self.nodes[0]
for alias in root.findAllChildren('alias', allDull=True):
try:
source = alias.dom.attributes['source'].nodeValue
except (KeyError, AttributeError):
pass
else:
raise Error(f'Alias to {source}')
ids = root.findUniqueChild('identity')
for code in ('language', 'script', 'territory', 'variant'):
for node in ids.findAllChildren(code, allDull=True):
try:
yield node.dom.attributes['type'].nodeValue
except (KeyError, AttributeError):
pass
else:
break # only want one value for each code
else: # No value for this code, use empty
yield ''
def currencyData(self, isoCode):
"""Fetches currency data for this locale.
Single argument, isoCode, is the ISO currency code for the
currency in use in the territory. See also numericData, which
includes some currency formats.
"""
if isoCode:
stem = f'numbers/currencies/currency[{isoCode}]/'
symbol = self.find(f'{stem}symbol', '')
name = self.__currencyDisplayName(stem)
else:
symbol = name = ''
yield 'currencySymbol', symbol
yield 'currencyDisplayName', name
def numericData(self, lookup, complain = lambda text: None):
"""Generate assorted numeric data for the locale.
First argument, lookup, is a callable that maps a numbering
system's name to certain data about the system, as a mapping;
we expect this to have 'digits' as a key.
"""
system = self.find('numbers/defaultNumberingSystem')
stem = f'numbers/symbols[numberSystem={system}]/'
decimal = self.find(f'{stem}decimal')
group = self.find(f'{stem}group')
assert decimal != group, (self.name, system, decimal)
yield 'decimal', decimal
yield 'group', group
yield 'percent', self.find(f'{stem}percentSign')
yield 'list', self.find(f'{stem}list')
yield 'exp', self.find(f'{stem}exponential')
yield 'groupSizes', self.__numberGrouping(system)
digits = lookup(system)['digits']
assert len(digits) == 10
zero = digits[0]
# Qt's number-formatting code assumes digits are consecutive
# (except Suzhou, CLDR's hanidec - see QTBUG-85409):
assert all(ord(c) == i + (0x3020 if ord(zero) == 0x3007 else ord(zero))
for i, c in enumerate(digits[1:], 1))
yield 'zero', zero
plus = self.find(f'{stem}plusSign')
minus = self.find(f'{stem}minusSign')
yield 'plus', plus
yield 'minus', minus
# Currency formatting:
xpath = 'numbers/currencyFormats/currencyFormatLength/currencyFormat[accounting]/pattern'
try:
money = self.find(xpath.replace('Formats/',
f'Formats[numberSystem={system}]/'))
except Error:
money = self.find(xpath)
money = self.__currencyFormats(money, plus, minus)
yield 'currencyFormat', next(money)
neg = ''
for it in money:
assert not neg, 'There should be at most one more pattern'
neg = it
yield 'currencyNegativeFormat', neg
def textPatternData(self):
for key in ('quotationStart', 'alternateQuotationEnd',
'quotationEnd', 'alternateQuotationStart'):
yield key, self.find(f'delimiters/{key}')
for key in ('start', 'middle', 'end'):
yield (f'listPatternPart{key.capitalize()}',
self.__fromLdmlListPattern(self.find(
f'listPatterns/listPattern/listPatternPart[{key}]')))
yield ('listPatternPartTwo',
self.__fromLdmlListPattern(self.find(
'listPatterns/listPattern/listPatternPart[2]')))
stem = 'dates/calendars/calendar[gregorian]/'
# TODO: is wide really the right width to use here ?
# abbreviated might be an option ... or try both ?
meridiem = f'{stem}dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/'
for key in ('am', 'pm'):
yield key, self.find(f'{meridiem}dayPeriod[{key}]',
draft = Node.draftScore('contributed'))
for pair in (('long', 'full'), ('short', 'short')):
for key in ('time', 'date'):
yield (f'{pair[0]}{key.capitalize()}Format',
convert_date(self.find(
f'{stem}{key}Formats/{key}FormatLength[{pair[1]}]/{key}Format/pattern')))
def endonyms(self, language, script, territory, variant):
# TODO: take variant into account ?
for seq in ((language, script, territory),
(language, script), (language, territory), (language,)):
if not all(seq):
continue
try:
yield ('languageEndonym',
self.find(f'localeDisplayNames/languages/language[{"_".join(seq)}]'))
except Error:
pass
else:
break
else:
# grumble(failed to find endonym for language)
yield 'languageEndonym', ''
yield ('territoryEndonym',
self.find(f'localeDisplayNames/territories/territory[{territory}]', ''))
def unitData(self):
yield ('byte_unit',
self.find('units/unitLength[long]/unit[digital-byte]/displayName',
'bytes'))
unit = self.__findUnit('', 'B')
cache = [] # Populated by the SI call, to give hints to the IEC call
yield ('byte_si_quantified',
';'.join(self.__unitCount('', unit, cache)))
# IEC 60027-2
# http://physics.nist.gov/cuu/Units/binary.html
yield ('byte_iec_quantified',
';'.join(self.__unitCount('bi', 'iB', cache)))
def calendarNames(self, calendars):
namings = self.__nameForms
for cal in calendars:
stem = f'dates/calendars/calendar[{cal}]/months/'
for key, mode, size in namings:
prop = f'monthContext[{mode}]/monthWidth[{size}]/'
yield (f'{key}Months_{cal}',
';'.join(self.find(f'{stem}{prop}month[{i}]')
for i in range(1, 13)))
# Day data (for Gregorian, at least):
stem = 'dates/calendars/calendar[gregorian]/days/'
days = ('sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat')
for (key, mode, size) in namings:
prop = f'dayContext[{mode}]/dayWidth[{size}]/day'
yield (f'{key}Days',
';'.join(self.find(f'{stem}{prop}[{day}]')
for day in days))
# Implementation details
__nameForms = (
('standaloneLong', 'stand-alone', 'wide'),
('standaloneShort', 'stand-alone', 'abbreviated'),
('standaloneNarrow', 'stand-alone', 'narrow'),
('long', 'format', 'wide'),
('short', 'format', 'abbreviated'),
('narrow', 'format', 'narrow'),
) # Used for month and day names
def __find(self, xpath):
retries = [ xpath.split('/') ]
while retries:
tags, elts, roots = retries.pop(), self.nodes, (self.base.root,)
for selector in tags:
tag, attrs = _parseXPath(selector)
elts = tuple(_iterateEach(e.findAllChildren(tag, attrs) for e in elts))
if not elts:
break
else: # Found matching elements
# Possibly filter elts to prefer the least drafty ?
for elt in elts:
yield elt
# Process roots separately: otherwise the alias-processing
# is excessive.
for i, selector in enumerate(tags):
tag, attrs = _parseXPath(selector)
for alias in tuple(_iterateEach(r.findAllChildren('alias', allDull=True)
for r in roots)):
if alias.dom.attributes['source'].nodeValue == 'locale':
replace = alias.dom.attributes['path'].nodeValue.split('/')
retries.append(self.__xpathJoin(tags[:i], replace, tags[i:]))
roots = tuple(_iterateEach(r.findAllChildren(tag, attrs) for r in roots))
if not roots:
if retries: # Let outer loop fall back on an alias path:
break
sought = '/'.join(tags)
if sought != xpath:
sought += f' (for {xpath})'
raise Error(f'All lack child {selector} for {sought} in {self.name}')
else: # Found matching elements
for elt in roots:
yield elt
sought = '/'.join(tags)
if sought != xpath:
sought += f' (for {xpath})'
raise Error(f'No {sought} in {self.name}')
def __currencyDisplayName(self, stem):
try:
return self.find(stem + 'displayName')
except Error:
pass
for x in ('zero', 'one', 'two', 'few', 'many', 'other'):
try:
return self.find(f'{stem}displayName[count={x}]')
except Error:
pass
return ''
def __findUnit(self, keySuffix, quantify, fallback=''):
# The displayName for a quantified unit in en.xml is kByte
# (even for unitLength[narrow]) instead of kB (etc.), so
# prefer any unitPattern provided, but prune its placeholder:
for size in ('short', 'narrow'): # TODO: reverse order ?
stem = f'units/unitLength[{size}{keySuffix}]/unit[digital-{quantify}byte]/'
for count in ('many', 'few', 'two', 'other', 'zero', 'one'):
try:
ans = self.find(f'{stem}unitPattern[count={count}]')
except Error:
continue
# TODO: do count-handling, instead of discarding placeholders
if False: # TODO: do it this way, instead !
ans = ans.replace('{0}', '').strip()
elif ans.startswith('{0}'):
ans = ans[3:].lstrip()
if ans:
return ans
try:
return self.find(f'{stem}displayName')
except Error:
pass
return fallback
def __unitCount(self, keySuffix, suffix, cache,
# Stop at exa/exbi: 16 exbi = 2^{64} < zetta =
# 1000^7 < zebi = 2^{70}, the next quantifiers up:
siQuantifiers = ('kilo', 'mega', 'giga', 'tera', 'peta', 'exa')):
"""Work out the unit quantifiers.
Unfortunately, the CLDR data only go up to terabytes and we
want all the way to exabytes; but we can recognize the SI
quantifiers as prefixes, strip and identify the tail as the
localized translation for 'B' (e.g. French has 'octet' for
'byte' and uses ko, Mo, Go, To from which we can extrapolate
Po, Eo).
Should be called first for the SI quantifiers, with suffix =
'B', then for the IEC ones, with suffix = 'iB'; the list cache
(initially empty before first call) is used to let the second
call know what the first learned about the localized unit.
"""
if suffix == 'iB': # second call, re-using first's cache
if cache:
byte = cache.pop()
if all(byte == k for k in cache):
suffix = f'i{byte}'
for q in siQuantifiers:
# Those don't (yet, v36) exist in CLDR, so we always get the fall-back:
yield self.__findUnit(keySuffix, q[:2], f'{q[0].upper()}{suffix}')
else: # first call
tail = suffix = suffix or 'B'
for q in siQuantifiers:
it = self.__findUnit(keySuffix, q)
# kB for kilobyte, in contrast with KiB for IEC:
q = q[0] if q == 'kilo' else q[0].upper()
if not it:
it = q + tail
elif it.startswith(q):
rest = it[1:]
tail = rest if all(rest == k for k in cache) else suffix
cache.append(rest)
yield it
def __numberGrouping(self, system):
"""Sizes of groups of digits within a number.
Returns a triple (least, higher, top) for which:
* least is the number of digits after the last grouping
separator;
* higher is the number of digits between grouping
separators;
* top is the fewest digits that can appear before the first
grouping separator.
Thus (4, 3, 2) would want 1e7 as 1000,0000 but 1e8 as 10,000,0000.
Note: CLDR does countenance the possibility of grouping also
in the fractional part. This is not presently attempted. Nor
is placement of the sign character anywhere but at the start
of the number (some formats may place it at the end, possibly
elsewhere)."""
top = int(self.find('numbers/minimumGroupingDigits'))
assert top < 4, top # We store it in a 2-bit field
grouping = self.find(f'numbers/decimalFormats[numberSystem={system}]/'
'decimalFormatLength/decimalFormat/pattern')
groups = grouping.split('.')[0].split(',')[-3:]
assert all(len(x) < 8 for x in groups[-2:]), grouping # we store them in 3-bit fields
if len(groups) > 2:
return len(groups[-1]), len(groups[-2]), top
size = len(groups[-1]) if len(groups) == 2 else 3
return size, size, top
@staticmethod
def __currencyFormats(patterns, plus, minus):
for p in patterns.split(';'):
p = p.replace('0', '#').replace(',', '').replace('.', '')
try:
cut = p.find('#') + 1
except ValueError:
pass
else:
p = p[:cut] + p[cut:].replace('#', '')
p = p.replace('#', "%1")
# According to http://www.unicode.org/reports/tr35/#Number_Format_Patterns
# there can be doubled or trippled currency sign, however none of the
# locales use that.
p = p.replace('\xa4', "%2")
# Single quote goes away, but double goes to single:
p = p.replace("''", '###').replace("'", '').replace('###', "'")
# Use number system's signs:
p = p.replace('+', plus).replace('-', minus)
yield p
@staticmethod
def __fromLdmlListPattern(pattern):
# This is a very limited parsing of the format for list pattern part only.
return pattern.replace('{0}', '%1').replace('{1}', '%2').replace('{2}', '%3')
@staticmethod
def __fromLdmlPath(seq): # tool function for __xpathJoin()
"""Convert LDML's [@name='value'] to our [name=value] form."""
for it in seq:
# First dismember it:
attrs = it.split('[')
tag = attrs.pop(0)
if not attrs: # Short-cut the easy case:
yield it
continue
assert all(x.endswith(']') for x in attrs)
attrs = [x[:-1].split('=') for x in attrs]
# Then fix each attribute specification in it:
attrs = [(x[0][1:] if x[0].startswith('@') else x[0],
x[1][1:-1] if x[1].startswith("'") and x[1].endswith("'") else x[1])
for x in attrs]
# Finally, put it all back together:
attrs = ['='.join(x) + ']' for x in attrs]
attrs.insert(0, tag)
yield '['.join(attrs)
@classmethod
def __xpathJoin(cls, head, insert, tail):
"""Join three lists of XPath selectors.
Each of head, insert and tail is a sequence of selectors but
insert may start with some uses of '..', that we want to
resolve away, and may use LDML's attribute format, that we
want to convert to our format."""
while insert and insert[0] == '..':
insert.pop(0)
head.pop()
return head + list(cls.__fromLdmlPath(insert)) + tail

View File

@ -0,0 +1,184 @@
# Copyright (C) 2020 The Qt Company Ltd.
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
"""Utilities shared among the CLDR extraction tools.
Functions:
unicode2hex() -- converts unicode text to UCS-2 in hex form.
wrap_list() -- map list to comma-separated string, 20 entries per line.
Classes:
Error -- A shared error class.
Transcriber -- edit a file by writing a temporary file, then renaming.
SourceFileEditor -- adds standard prelude and tail handling to Transcriber.
"""
from contextlib import ExitStack, contextmanager
from pathlib import Path
from tempfile import NamedTemporaryFile
qtbase_root = Path(__file__).parents[2]
assert qtbase_root.name == 'qtbase'
class Error (Exception):
def __init__(self, msg, *args):
super().__init__(msg, *args)
self.message = msg
def __str__(self):
return self.message
def unicode2hex(s):
lst = []
for x in s:
v = ord(x)
if v > 0xFFFF:
# make a surrogate pair
# copied from qchar.h
high = (v >> 10) + 0xd7c0
low = (v % 0x400 + 0xdc00)
lst.append(hex(high))
lst.append(hex(low))
else:
lst.append(hex(v))
return lst
def wrap_list(lst):
def split(lst, size):
while lst:
head, lst = lst[:size], lst[size:]
yield head
return ",\n".join(", ".join(x) for x in split(lst, 20))
@contextmanager
def AtomicRenameTemporaryFile(originalLocation: Path, *, prefix: str, dir: Path):
"""Context manager for safe file update via a temporary file.
Accepts path to the file to be updated. Yields a temporary file to the user
code, open for writing.
On success closes the temporary file and moves its content to the original
location. On error, removes temporary file, without disturbing the original.
"""
tempFile = NamedTemporaryFile('w', prefix=prefix, dir=dir, delete=False)
try:
yield tempFile
tempFile.close()
# Move the modified file to the original location
Path(tempFile.name).rename(originalLocation)
except Exception:
# delete the temporary file in case of error
tempFile.close()
Path(tempFile.name).unlink()
raise
class Transcriber:
"""Context manager base-class to manage source file rewrites.
Derived classes need to implement transcribing of the content, with
whatever modifications they may want. Members reader and writer
are exposed; use writer.write() to output to the new file; use
reader.readline() or iterate reader to read the original.
This class is intended to be used as context manager only (inside a
`with` statement).
Reimplement onEnter() to write any preamble the file may have,
onExit() to write any tail. The body of the with statement takes
care of anything in between, using methods provided by derived classes.
The data is written to a temporary file first. The temporary file data
is then moved to the original location if there were no errors. Otherwise
the temporary file is removed and the original is left unchanged.
"""
def __init__(self, path: Path, temp_dir: Path):
self.path = path
self.tempDir = temp_dir
def onEnter(self) -> None:
"""
Called before transferring control to user code.
This function can be overridden in derived classes to perform actions
before transferring control to the user code.
The default implementation does nothing.
"""
pass
def onExit(self) -> None:
"""
Called after return from user code.
This function can be overridden in derived classes to perform actions
after successful return from user code.
The default implementation does nothing.
"""
pass
def __enter__(self):
with ExitStack() as resources:
# Create a temp file to write the new data into
self.writer = resources.enter_context(
AtomicRenameTemporaryFile(self.path, prefix=self.path.name, dir=self.tempDir))
# Open the old file
self.reader = resources.enter_context(open(self.path))
self.onEnter()
# Prevent resources from being closed on normal return from this
# method and make them available inside __exit__():
self.__resources = resources.pop_all()
return self
def __exit__(self, exc_type, exc_value, traceback):
if exc_type is None:
with self.__resources:
self.onExit()
else:
self.__resources.__exit__(exc_type, exc_value, traceback)
return False
class SourceFileEditor (Transcriber):
"""Transcriber with transcription of code around a gnerated block.
We have a common pattern of source files with a generated part
embedded in a context that's not touched by the regeneration
scripts. The generated part is, in each case, marked with a common
pair of start and end markers. We transcribe the old file to a new
temporary file; on success, we then remove the original and move
the new version to replace it.
This class takes care of transcribing the parts before and after
the generated content; on entering the context, an instance will
copy the preamble up to the start marker; on exit from the context
it will skip over the original's generated content and resume
transcribing with the end marker.
This class is only intended to be used as a context manager:
see Transcriber. Derived classes implement suitable methods for use in
the body of the with statement, using self.writer to rewrite the part
of the file between the start and end markers.
"""
GENERATED_BLOCK_START = '// GENERATED PART STARTS HERE'
GENERATED_BLOCK_END = '// GENERATED PART ENDS HERE'
def onEnter(self) -> None:
# Copy over the first non-generated section to the new file
for line in self.reader:
self.writer.write(line)
if line.strip() == self.GENERATED_BLOCK_START:
break
def onExit(self) -> None:
# Skip through the old generated data in the old file
for line in self.reader:
if line.strip() == self.GENERATED_BLOCK_END:
self.writer.write(line)
break
# Transcribe the remainder:
for line in self.reader:
self.writer.write(line)

View File

@ -0,0 +1,627 @@
# Copyright (C) 2021 The Qt Company Ltd.
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
"""Shared serialization-scanning code for QLocaleXML format.
Provides classes:
Locale -- common data-type representing one locale as a namespace
QLocaleXmlWriter -- helper to write a QLocaleXML file
QLocaleXmlReader -- helper to read a QLocaleXML file back in
Support:
Spacer -- provides control over indentation of the output.
RelaxNG schema for the used file format can be found in qlocalexml.rnc.
QLocaleXML files can be validated using:
jing -c qlocalexml.rnc <file.xml>
You can download jing from https://relaxng.org/jclark/jing.html if your
package manager lacks the jing package.
"""
from xml.sax.saxutils import escape
from localetools import Error
# Tools used by Locale:
def camel(seq):
yield next(seq)
for word in seq:
yield word.capitalize()
def camelCase(words):
return ''.join(camel(iter(words)))
def addEscapes(s):
return ''.join(c if n < 128 else f'\\x{n:02x}'
for n, c in ((ord(c), c) for c in s))
def startCount(c, text): # strspn
"""First index in text where it doesn't have a character in c"""
assert text and text[0] in c
try:
return next((j for j, d in enumerate(text) if d not in c))
except StopIteration:
return len(text)
def convertFormat(format):
"""Convert date/time format-specier from CLDR to Qt
Match up (as best we can) the differences between:
* https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
* QDateTimeParser::parseFormat() and QLocalePrivate::dateTimeToString()
"""
# Compare and contrast dateconverter.py's convert_date().
# Need to (check consistency and) reduce redundancy !
result = ""
i = 0
while i < len(format):
if format[i] == "'":
result += "'"
i += 1
while i < len(format) and format[i] != "'":
result += format[i]
i += 1
if i < len(format):
result += "'"
i += 1
else:
s = format[i:]
if s.startswith('E'): # week-day
n = startCount('E', s)
if n < 3:
result += 'ddd'
elif n == 4:
result += 'dddd'
else: # 5: narrow, 6 short; but should be name, not number :-(
result += 'd' if n < 6 else 'dd'
i += n
elif s[0] in 'ab': # am/pm
# 'b' should distinguish noon/midnight, too :-(
result += "AP"
i += startCount('ab', s)
elif s.startswith('S'): # fractions of seconds: count('S') == number of decimals to show
result += 'z'
i += startCount('S', s)
elif s.startswith('V'): # long time zone specifiers (and a deprecated short ID)
result += 't'
i += startCount('V', s)
elif s[0] in 'zv': # zone
# Should use full name, e.g. "Central European Time", if 'zzzz' :-(
# 'v' should get generic non-location format, e.g. PT for "Pacific Time", no DST indicator
result += "t"
i += startCount('zv', s)
else:
result += format[i]
i += 1
return result
class QLocaleXmlReader (object):
def __init__(self, filename):
self.root = self.__parse(filename)
# Lists of (id, name, code) triples:
languages = tuple(self.__loadMap('language'))
scripts = tuple(self.__loadMap('script'))
territories = tuple(self.__loadMap('territory'))
self.__likely = tuple(self.__likelySubtagsMap())
# Mappings {ID: (name, code)}
self.languages = dict((v[0], v[1:]) for v in languages)
self.scripts = dict((v[0], v[1:]) for v in scripts)
self.territories = dict((v[0], v[1:]) for v in territories)
# Private mappings {name: (ID, code)}
self.__langByName = dict((v[1], (v[0], v[2])) for v in languages)
self.__textByName = dict((v[1], (v[0], v[2])) for v in scripts)
self.__landByName = dict((v[1], (v[0], v[2])) for v in territories)
# Other properties:
self.dupes = set(v[1] for v in languages) & set(v[1] for v in territories)
self.cldrVersion = self.__firstChildText(self.root, "version")
def loadLocaleMap(self, calendars, grumble = lambda text: None):
kid = self.__firstChildText
likely = dict(self.__likely)
for elt in self.__eachEltInGroup(self.root, 'localeList', 'locale'):
locale = Locale.fromXmlData(lambda k: kid(elt, k), calendars)
language = self.__langByName[locale.language][0]
script = self.__textByName[locale.script][0]
territory = self.__landByName[locale.territory][0]
if language != 1: # C
if territory == 0:
grumble(f'loadLocaleMap: No territory id for "{locale.language}"\n')
if script == 0:
# Find default script for the given language and territory - see:
# http://www.unicode.org/reports/tr35/#Likely_Subtags
try:
try:
to = likely[(locale.language, 'AnyScript', locale.territory)]
except KeyError:
to = likely[(locale.language, 'AnyScript', 'AnyTerritory')]
except KeyError:
pass
else:
locale.script = to[1]
script = self.__textByName[locale.script][0]
yield (language, script, territory), locale
def languageIndices(self, locales):
index = 0
for key, value in self.languages.items():
i, count = 0, locales.count(key)
if count > 0:
i = index
index += count
yield i, value[0]
def likelyMap(self):
def tag(t):
lang, script, land = t
yield lang[1] if lang[0] else 'und'
if script[0]: yield script[1]
if land[0]: yield land[1]
def ids(t):
return tuple(x[0] for x in t)
for pair in self.__likely:
have = self.__fromNames(pair[0])
give = self.__fromNames(pair[1])
yield ('_'.join(tag(have)), ids(have),
'_'.join(tag(give)), ids(give))
def defaultMap(self):
"""Map language and script to their default territory by ID.
Yields ((language, script), territory) wherever the likely
sub-tags mapping says language's default locale uses the given
script and territory."""
for have, give in self.__likely:
if have[1:] == ('AnyScript', 'AnyTerritory') and give[2] != 'AnyTerritory':
assert have[0] == give[0], (have, give)
yield ((self.__langByName[give[0]][0],
self.__textByName[give[1]][0]),
self.__landByName[give[2]][0])
# Implementation details:
def __loadMap(self, category):
kid = self.__firstChildText
for element in self.__eachEltInGroup(self.root, f'{category}List', category):
yield int(kid(element, 'id')), kid(element, 'name'), kid(element, 'code')
def __likelySubtagsMap(self):
def triplet(element, keys=('language', 'script', 'territory'), kid = self.__firstChildText):
return tuple(kid(element, key) for key in keys)
kid = self.__firstChildElt
for elt in self.__eachEltInGroup(self.root, 'likelySubtags', 'likelySubtag'):
yield triplet(kid(elt, "from")), triplet(kid(elt, "to"))
def __fromNames(self, names):
return self.__langByName[names[0]], self.__textByName[names[1]], self.__landByName[names[2]]
# DOM access:
from xml.dom import minidom
@staticmethod
def __parse(filename, read = minidom.parse):
return read(filename).documentElement
@staticmethod
def __isNodeNamed(elt, name, TYPE=minidom.Node.ELEMENT_NODE):
return elt.nodeType == TYPE and elt.nodeName == name
del minidom
@staticmethod
def __eltWords(elt):
child = elt.firstChild
while child:
if child.nodeType == elt.TEXT_NODE:
yield child.nodeValue
child = child.nextSibling
@classmethod
def __firstChildElt(cls, parent, name):
child = parent.firstChild
while child:
if cls.__isNodeNamed(child, name):
return child
child = child.nextSibling
raise Error(f'No {name} child found')
@classmethod
def __firstChildText(cls, elt, key):
return ' '.join(cls.__eltWords(cls.__firstChildElt(elt, key)))
@classmethod
def __eachEltInGroup(cls, parent, group, key):
try:
element = cls.__firstChildElt(parent, group).firstChild
except Error:
element = None
while element:
if cls.__isNodeNamed(element, key):
yield element
element = element.nextSibling
class Spacer (object):
def __init__(self, indent = None, initial = ''):
"""Prepare to manage indentation and line breaks.
Arguments are both optional.
First argument, indent, is either None (its default, for
'minifying'), an ingeter (number of spaces) or the unit of
text that is to be used for each indentation level (e.g. '\t'
to use tabs). If indent is None, no indentation is added, nor
are line-breaks; otherwise, self(text), for non-empty text,
shall end with a newline and begin with indentation.
Second argument, initial, is the initial indentation; it is
ignored if indent is None. Indentation increases after each
call to self(text) in which text starts with a tag and doesn't
include its end-tag; indentation decreases if text starts with
an end-tag. The text is not parsed any more carefully than
just described.
"""
if indent is None:
self.__call = lambda x: x
else:
self.__each = ' ' * indent if isinstance(indent, int) else indent
self.current = initial
self.__call = self.__wrap
def __wrap(self, line):
if not line:
return '\n'
indent = self.current
if line.startswith('</'):
indent = self.current = indent[:-len(self.__each)]
elif line.startswith('<') and not line.startswith('<!'):
cut = line.find('>')
tag = (line[1:] if cut < 0 else line[1 : cut]).strip().split()[0]
if f'</{tag}>' not in line:
self.current += self.__each
return indent + line + '\n'
def __call__(self, line):
return self.__call(line)
class QLocaleXmlWriter (object):
def __init__(self, save = None, space = Spacer(4)):
"""Set up to write digested CLDR data as QLocale XML.
Arguments are both optional.
First argument, save, is None (its default) or a callable that
will write content to where you intend to save it. If None, it
is replaced with a callable that prints the given content,
suppressing the newline (but see the following); this is
equivalent to passing sys.stdout.write.
Second argument, space, is an object to call on each text
output to prepend indentation and append newlines, or not as
the case may be. The default is a Spacer(4), which grows
indent by four spaces after each unmatched new tag and shrinks
back on a close-tag (its parsing is naive, but adequate to how
this class uses it), while adding a newline to each line.
"""
self.__rawOutput = self.__printit if save is None else save
self.__wrap = space
self.__write('<localeDatabase>')
# Output of various sections, in their usual order:
def enumData(self):
from enumdata import language_map, script_map, territory_map
self.__enumTable('language', language_map)
self.__enumTable('script', script_map)
self.__enumTable('territory', territory_map)
# Prepare to detect any unused codes (see __writeLocale(), close()):
self.__languages = set(p[1] for p in language_map.values()
if not p[1].isspace())
self.__scripts = set(p[1] for p in script_map.values()
if p[1] != 'ZZ')
self.__territories = set(p[1] for p in territory_map.values()
if p[1] != 'Zzzz')
def likelySubTags(self, entries):
self.__openTag('likelySubtags')
for have, give in entries:
self.__openTag('likelySubtag')
self.__likelySubTag('from', have)
self.__likelySubTag('to', give)
self.__closeTag('likelySubtag')
self.__closeTag('likelySubtags')
def locales(self, locales, calendars):
self.__openTag('localeList')
self.__openTag('locale')
self.__writeLocale(Locale.C(calendars), calendars)
self.__closeTag('locale')
for key in sorted(locales.keys()):
self.__openTag('locale')
self.__writeLocale(locales[key], calendars)
self.__closeTag('locale')
self.__closeTag('localeList')
def version(self, cldrVersion):
self.inTag('version', cldrVersion)
def inTag(self, tag, text):
self.__write(f'<{tag}>{text}</{tag}>')
def close(self, grumble):
"""Finish writing and grumble any issues discovered."""
if self.__rawOutput != self.__complain:
self.__write('</localeDatabase>')
self.__rawOutput = self.__complain
if self.__languages or self.__scripts or self.territories:
grumble('Some enum members are unused, corresponding to these tags:\n')
import textwrap
def kvetch(kind, seq, g = grumble, w = textwrap.wrap):
g('\n\t'.join(w(f' {kind}: {", ".join(sorted(seq))}', width=80)) + '\n')
if self.__languages:
kvetch('Languages', self.__languages)
if self.__scripts:
kvetch('Scripts', self.__scripts)
if self.__territories:
kvetch('Territories', self.__territories)
grumble('It may make sense to deprecate them.\n')
# Implementation details
@staticmethod
def __printit(text):
print(text, end='')
@staticmethod
def __complain(text):
raise Error('Attempted to write data after closing :-(')
def __enumTable(self, tag, table):
self.__openTag(f'{tag}List')
for key, value in table.items():
self.__openTag(tag)
self.inTag('name', value[0])
self.inTag('id', key)
self.inTag('code', value[1])
self.__closeTag(tag)
self.__closeTag(f'{tag}List')
def __likelySubTag(self, tag, likely):
self.__openTag(tag)
self.inTag('language', likely[0])
self.inTag('script', likely[1])
self.inTag('territory', likely[2])
# self.inTag('variant', likely[3])
self.__closeTag(tag)
def __writeLocale(self, locale, calendars):
locale.toXml(self.inTag, calendars)
self.__languages.discard(locale.language_code)
self.__scripts.discard(locale.script_code)
self.__territories.discard(locale.territory_code)
def __openTag(self, tag):
self.__write(f'<{tag}>')
def __closeTag(self, tag):
self.__write(f'</{tag}>')
def __write(self, line):
self.__rawOutput(self.__wrap(line))
class Locale (object):
"""Holder for the assorted data representing one locale.
Implemented as a namespace; its constructor and update() have the
same signatures as those of a dict, acting on the instance's
__dict__, so the results are accessed as attributes rather than
mapping keys."""
def __init__(self, data=None, **kw):
self.update(data, **kw)
def update(self, data=None, **kw):
if data: self.__dict__.update(data)
if kw: self.__dict__.update(kw)
def __len__(self): # Used when testing as a boolean
return len(self.__dict__)
@staticmethod
def propsMonthDay(scale, lengths=('long', 'short', 'narrow')):
for L in lengths:
yield camelCase((L, scale))
yield camelCase(('standalone', L, scale))
# Expected to be numbers, read with int():
__asint = ("currencyDigits", "currencyRounding")
# Convert day-name to Qt day-of-week number:
__asdow = ("firstDayOfWeek", "weekendStart", "weekendEnd")
# Convert from CLDR format-strings to QDateTimeParser ones:
__asfmt = ("longDateFormat", "shortDateFormat", "longTimeFormat", "shortTimeFormat")
# Just use the raw text:
__astxt = ("language", "languageEndonym", "script", "territory", "territoryEndonym",
"decimal", "group", "zero",
"list", "percent", "minus", "plus", "exp",
"quotationStart", "quotationEnd",
"alternateQuotationStart", "alternateQuotationEnd",
"listPatternPartStart", "listPatternPartMiddle",
"listPatternPartEnd", "listPatternPartTwo", "am", "pm",
'byte_unit', 'byte_si_quantified', 'byte_iec_quantified',
"currencyIsoCode", "currencySymbol", "currencyDisplayName",
"currencyFormat", "currencyNegativeFormat")
# Day-of-Week numbering used by Qt:
__qDoW = {"mon": 1, "tue": 2, "wed": 3, "thu": 4, "fri": 5, "sat": 6, "sun": 7}
@classmethod
def fromXmlData(cls, lookup, calendars=('gregorian',)):
"""Constructor from the contents of XML elements.
Single parameter, lookup, is called with the names of XML
elements that should contain the relevant data, within a CLDR
locale element (within a localeList element); these names are
used for the attributes of the object constructed. Attribute
values are obtained by suitably digesting the returned element
texts.\n"""
data = {}
for k in cls.__asint:
data[k] = int(lookup(k))
for k in cls.__asdow:
data[k] = cls.__qDoW[lookup(k)]
for k in cls.__asfmt:
data[k] = convertFormat(lookup(k))
for k in cls.__astxt + tuple(cls.propsMonthDay('days')):
data['listDelim' if k == 'list' else k] = lookup(k)
for k in cls.propsMonthDay('months'):
data[k] = dict((cal, lookup('_'.join((k, cal)))) for cal in calendars)
grouping = lookup('groupSizes').split(';')
data.update(groupLeast = int(grouping[0]),
groupHigher = int(grouping[1]),
groupTop = int(grouping[2]))
return cls(data)
def toXml(self, write, calendars=('gregorian',)):
"""Writes its data as QLocale XML.
First argument, write, is a callable taking the name and
content of an XML element; it is expected to be the inTag
bound method of a QLocaleXmlWriter instance.
Optional second argument is a list of calendar names, in the
form used by CLDR; its default is ('gregorian',).
"""
get = lambda k: getattr(self, k)
for key in ('language', 'script', 'territory'):
write(key, get(key))
write(f'{key}code', get(f'{key}_code'))
for key in ('decimal', 'group', 'zero', 'list',
'percent', 'minus', 'plus', 'exp'):
write(key, get(key))
for key in ('languageEndonym', 'territoryEndonym',
'quotationStart', 'quotationEnd',
'alternateQuotationStart', 'alternateQuotationEnd',
'listPatternPartStart', 'listPatternPartMiddle',
'listPatternPartEnd', 'listPatternPartTwo',
'byte_unit', 'byte_si_quantified', 'byte_iec_quantified',
'am', 'pm', 'firstDayOfWeek',
'weekendStart', 'weekendEnd',
'longDateFormat', 'shortDateFormat',
'longTimeFormat', 'shortTimeFormat',
'currencyIsoCode', 'currencySymbol', 'currencyDisplayName',
'currencyFormat', 'currencyNegativeFormat'
) + tuple(self.propsMonthDay('days')) + tuple(
'_'.join((k, cal))
for k in self.propsMonthDay('months')
for cal in calendars):
write(key, escape(get(key)))
write('groupSizes', ';'.join(str(x) for x in get('groupSizes')))
for key in ('currencyDigits', 'currencyRounding'):
write(key, get(key))
# Tools used by __monthNames:
def fullName(i, name): return name
def firstThree(i, name): return name[:3]
def initial(i, name): return name[:1]
def number(i, name): return str(i + 1)
def islamicShort(i, name):
if not name: return name
if name == 'Shawwal': return 'Shaw.'
words = name.split()
if words[0].startswith('Dhu'):
words[0] = words[0][:7] + '.'
elif len(words[0]) > 3:
words[0] = words[0][:3] + '.'
return ' '.join(words)
@staticmethod
def __monthNames(calendars,
known={ # Map calendar to (names, extractors...):
# TODO: do we even need these ? CLDR's root.xml seems to
# have them, complete with yeartype="leap" handling for
# Hebrew's extra.
'gregorian': (('January', 'February', 'March', 'April', 'May', 'June', 'July',
'August', 'September', 'October', 'November', 'December'),
# Extractor pairs, (plain, standalone)
(fullName, fullName), # long
(firstThree, firstThree), # short
(number, initial)), # narrow
'persian': (('Farvardin', 'Ordibehesht', 'Khordad', 'Tir', 'Mordad',
'Shahrivar', 'Mehr', 'Aban', 'Azar', 'Dey', 'Bahman', 'Esfand'),
(fullName, fullName),
(firstThree, firstThree),
(number, initial)),
'islamic': (('Muharram', 'Safar', 'Rabiʻ I', 'Rabiʻ II', 'Jumada I',
'Jumada II', 'Rajab', 'Shaʻban', 'Ramadan', 'Shawwal',
'Dhuʻl-Qiʻdah', 'Dhuʻl-Hijjah'),
(fullName, fullName),
(islamicShort, islamicShort),
(number, number)),
'hebrew': (('Tishri', 'Heshvan', 'Kislev', 'Tevet', 'Shevat', 'Adar I',
'Adar', 'Nisan', 'Iyar', 'Sivan', 'Tamuz', 'Av'),
(fullName, fullName),
(fullName, fullName),
(number, number)),
},
sizes=('long', 'short', 'narrow')):
for cal in calendars:
try:
data = known[cal]
except KeyError as e: # Need to add an entry to known, above.
e.args += ('Unsupported calendar:', cal)
raise
names, get = data[0], data[1:]
for n, size in enumerate(sizes):
yield ('_'.join((camelCase((size, 'months')), cal)),
';'.join(get[n][0](i, x) for i, x in enumerate(names)))
yield ('_'.join((camelCase(('standalone', size, 'months')), cal)),
';'.join(get[n][1](i, x) for i, x in enumerate(names)))
del fullName, firstThree, initial, number, islamicShort
@classmethod
def C(cls, calendars=('gregorian',),
days = ('Sunday', 'Monday', 'Tuesday', 'Wednesday',
'Thursday', 'Friday', 'Saturday'),
quantifiers=('k', 'M', 'G', 'T', 'P', 'E')):
"""Returns an object representing the C locale."""
return cls(cls.__monthNames(calendars),
language='C', language_code='0', languageEndonym='',
script='AnyScript', script_code='0',
territory='AnyTerritory', territory_code='0', territoryEndonym='',
groupSizes=(3, 3, 1),
decimal='.', group=',', list=';', percent='%',
zero='0', minus='-', plus='+', exp='e',
quotationStart='"', quotationEnd='"',
alternateQuotationStart='\'', alternateQuotationEnd='\'',
listPatternPartStart='%1, %2',
listPatternPartMiddle='%1, %2',
listPatternPartEnd='%1, %2',
listPatternPartTwo='%1, %2',
byte_unit='bytes',
byte_si_quantified=';'.join(q + 'B' for q in quantifiers),
byte_iec_quantified=';'.join(q.upper() + 'iB' for q in quantifiers),
am='AM', pm='PM', firstDayOfWeek='mon',
weekendStart='sat', weekendEnd='sun',
longDateFormat='EEEE, d MMMM yyyy', shortDateFormat='d MMM yyyy',
longTimeFormat='HH:mm:ss z', shortTimeFormat='HH:mm:ss',
longDays=';'.join(days),
shortDays=';'.join(d[:3] for d in days),
narrowDays='7;1;2;3;4;5;6',
standaloneLongDays=';'.join(days),
standaloneShortDays=';'.join(d[:3] for d in days),
standaloneNarrowDays=';'.join(d[:1] for d in days),
currencyIsoCode='', currencySymbol='',
currencyDisplayName='',
currencyDigits=2, currencyRounding=1,
currencyFormat='%1%2', currencyNegativeFormat='')

View File

@ -0,0 +1,119 @@
# Copyright (C) 2021 The Qt Company Ltd.
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
# This is RelaxNG compact schema for qLocaleXML interemediate locale data
# representation format produced and consumed by the qlocalexml module.
#
# To validate an xml file run:
#
# jing -c qlocalexml.rnc <your-file.xml>
#
# You can download jing from https://relaxng.org/jclark/jing.html if your
# package manager lacks the jing package.
start = element localeDatabase {
element version { text },
element languageList { Language+ },
element scriptList { Script+ },
element territoryList { Territory+ },
element likelySubtags { LikelySubtag+ },
element localeList { Locale+ }
}
Language = element language { TagDescriptor }
Script = element script { TagDescriptor }
Territory = element territory { TagDescriptor }
TagDescriptor = (
element name { text },
element id { xsd:nonNegativeInteger },
element code { text }
)
LikelySubtag = element likelySubtag {
element from { LocaleTriplet },
element to { LocaleTriplet }
}
LocaleTriplet = (
element language { text },
element script { text },
element territory { text }
)
WeekDay = ("sun" | "mon" | "tue" | "wed" | "thu" | "fri" | "sat")
Digit = xsd:string { pattern = "\d" }
Punctuation = xsd:string { pattern = "\p{P}" }
GroupSizes = xsd:string { pattern = "\d;\d;\d" }
Locale = element locale {
element language { text },
element languagecode { text },
element script { text },
element scriptcode { text },
element territory { text },
element territorycode { text },
element decimal { Punctuation },
element group { text },
element zero { Digit },
element list { Punctuation },
element percent { text },
element minus { text },
element plus { text },
element exp { text },
element languageEndonym { text },
element territoryEndonym { text },
element quotationStart { Punctuation },
element quotationEnd { Punctuation },
element alternateQuotationStart { Punctuation },
element alternateQuotationEnd { Punctuation },
element listPatternPartStart { text },
element listPatternPartMiddle { text },
element listPatternPartEnd { text },
element listPatternPartTwo { text },
element byte_unit { text },
element byte_si_quantified { text },
element byte_iec_quantified { text },
element am { text },
element pm { text },
element firstDayOfWeek { text },
element weekendStart { WeekDay },
element weekendEnd { WeekDay },
element longDateFormat { text },
element shortDateFormat { text },
element longTimeFormat { text },
element shortTimeFormat { text },
element currencyIsoCode { text },
element currencySymbol { text },
element currencyDisplayName { text },
element currencyFormat { text },
element currencyNegativeFormat { text },
element longDays { text },
element standaloneLongDays { text },
element shortDays { text },
element standaloneShortDays { text },
element narrowDays { text },
element standaloneNarrowDays { text },
# Some of these entries may be absent depending on command line arguments
element longMonths_gregorian { text }?,
element longMonths_persian { text }?,
element longMonths_islamic { text }?,
element standaloneLongMonths_gregorian { text }?,
element standaloneLongMonths_persian { text }?,
element standaloneLongMonths_islamic { text }?,
element shortMonths_gregorian { text }?,
element shortMonths_persian { text }?,
element shortMonths_islamic { text }?,
element standaloneShortMonths_gregorian { text }?,
element standaloneShortMonths_persian { text }?,
element standaloneShortMonths_islamic { text }?,
element narrowMonths_gregorian { text }?,
element narrowMonths_persian { text }?,
element narrowMonths_islamic { text }?,
element standaloneNarrowMonths_gregorian { text }?,
element standaloneNarrowMonths_persian { text }?,
element standaloneNarrowMonths_islamic { text }?,
element groupSizes { GroupSizes },
element currencyDigits { xsd:nonNegativeInteger },
element currencyRounding { xsd:nonNegativeInteger }
}

View File

@ -0,0 +1,618 @@
#!/usr/bin/env python3
# Copyright (C) 2021 The Qt Company Ltd.
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
"""Script to generate C++ code from CLDR data in QLocaleXML form
See ``cldr2qlocalexml.py`` for how to generate the QLocaleXML data itself.
Pass the output file from that as first parameter to this script; pass the ISO
639-3 data file as second parameter. You can optionally pass the root of the
qtbase check-out as third parameter; it defaults to the root of the qtbase
check-out containing this script.
The ISO 639-3 data file can be downloaded from the SIL website:
https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3.tab
"""
import datetime
import argparse
from pathlib import Path
from typing import Optional
from qlocalexml import QLocaleXmlReader
from localetools import unicode2hex, wrap_list, Error, Transcriber, SourceFileEditor, qtbase_root
from iso639_3 import LanguageCodeData
class LocaleKeySorter:
"""Sort-ordering representation of a locale key.
This is for passing to a sorting algorithm as key-function, that
it applies to each entry in the list to decide which belong
earlier. It adds an entry to the (language, script, territory)
triple, just before script, that sorts earlier if the territory is
the default for the given language and script, later otherwise.
"""
# TODO: study the relationship between this and CLDR's likely
# sub-tags algorithm. Work out how locale sort-order impacts
# QLocale's likely sub-tag matching algorithms. Make sure this is
# sorting in an order compatible with those algorithms.
def __init__(self, defaults):
self.map = dict(defaults)
def foreign(self, key):
default = self.map.get(key[:2])
return default is None or default != key[2]
def __call__(self, key):
# TODO: should we compare territory before or after script ?
return (key[0], self.foreign(key)) + key[1:]
class StringDataToken:
def __init__(self, index, length, bits):
if index > 0xffff:
raise ValueError(f'Start-index ({index}) exceeds the uint16 range!')
if length >= (1 << bits):
raise ValueError(f'Data size ({length}) exceeds the {bits}-bit range!')
self.index = index
self.length = length
class StringData:
def __init__(self, name):
self.data = []
self.hash = {}
self.name = name
self.text = '' # Used in quick-search for matches in data
def append(self, s, bits = 8):
try:
token = self.hash[s]
except KeyError:
token = self.__store(s, bits)
self.hash[s] = token
return token
def __store(self, s, bits):
"""Add string s to known data.
Seeks to avoid duplication, where possible.
For example, short-forms may be prefixes of long-forms.
"""
if not s:
return StringDataToken(0, 0, bits)
ucs2 = unicode2hex(s)
try:
index = self.text.index(s) - 1
matched = 0
while matched < len(ucs2):
index, matched = self.data.index(ucs2[0], index + 1), 1
if index + len(ucs2) >= len(self.data):
raise ValueError # not found after all !
while matched < len(ucs2) and self.data[index + matched] == ucs2[matched]:
matched += 1
except ValueError:
index = len(self.data)
self.data += ucs2
self.text += s
assert index >= 0
try:
return StringDataToken(index, len(ucs2), bits)
except ValueError as e:
e.args += (self.name, s)
raise
def write(self, fd):
if len(self.data) > 0xffff:
raise ValueError(f'Data is too big ({len(self.data)}) for quint16 index to its end!',
self.name)
fd.write(f"\nstatic constexpr char16_t {self.name}[] = {{\n")
fd.write(wrap_list(self.data))
fd.write("\n};\n")
def currencyIsoCodeData(s):
if s:
return '{' + ",".join(str(ord(x)) for x in s) + '}'
return "{0,0,0}"
class LocaleSourceEditor (SourceFileEditor):
def __init__(self, path: Path, temp: Path, version: str):
super().__init__(path, temp)
self.version = version
def onEnter(self) -> None:
super().onEnter()
self.writer.write(f"""
/*
This part of the file was generated on {datetime.date.today()} from the
Common Locale Data Repository v{self.version}
http://www.unicode.org/cldr/
Do not edit this section: instead regenerate it using
cldr2qlocalexml.py and qlocalexml2cpp.py on updated (or
edited) CLDR data; see qtbase/util/locale_database/.
*/
""")
class LocaleDataWriter (LocaleSourceEditor):
def likelySubtags(self, likely):
# First sort likely, so that we can use binary search in C++
# code. Although the entries are (lang, script, region), sort
# as (lang, region, script) and sort 0 after all non-zero
# values. This ensures that, when several mappings partially
# match a requested locale, the one we should prefer to use
# appears first.
huge = 0x10000 # > any ushort; all tag values are ushort
def keyLikely(entry):
have = entry[1] # Numeric id triple
return have[0] or huge, have[2] or huge, have[1] or huge # language, region, script
likely = sorted(likely, key=keyLikely)
i = 0
self.writer.write('static constexpr QLocaleId likely_subtags[] = {\n')
for had, have, got, give in likely:
i += 1
self.writer.write(' {{ {:3d}, {:3d}, {:3d} }}'.format(*have))
self.writer.write(', {{ {:3d}, {:3d}, {:3d} }}'.format(*give))
self.writer.write(' ' if i == len(likely) else ',')
self.writer.write(f' // {had} -> {got}\n')
self.writer.write('};\n\n')
def localeIndex(self, indices):
self.writer.write('static constexpr quint16 locale_index[] = {\n')
for index, name in indices:
self.writer.write(f'{index:6d}, // {name}\n')
self.writer.write(' 0 // trailing 0\n')
self.writer.write('};\n\n')
def localeData(self, locales, names):
list_pattern_part_data = StringData('list_pattern_part_data')
single_character_data = StringData('single_character_data')
date_format_data = StringData('date_format_data')
time_format_data = StringData('time_format_data')
days_data = StringData('days_data')
am_data = StringData('am_data')
pm_data = StringData('pm_data')
byte_unit_data = StringData('byte_unit_data')
currency_symbol_data = StringData('currency_symbol_data')
currency_display_name_data = StringData('currency_display_name_data')
currency_format_data = StringData('currency_format_data')
endonyms_data = StringData('endonyms_data')
# Locale data
self.writer.write('static constexpr QLocaleData locale_data[] = {\n')
# Table headings: keep each label centred in its field, matching line_format:
self.writer.write(' // '
# Width 6 + comma
' lang ' # IDs
'script '
' terr '
# Range entries (all start-indices, then all sizes)
# Width 5 + comma
'lStrt ' # List pattern
'lpMid '
'lpEnd '
'lPair '
'lDelm ' # List delimiter
# Representing numbers
' dec '
'group '
'prcnt '
' zero '
'minus '
'plus '
' exp '
# Quotation marks
'qtOpn '
'qtEnd '
'altQO '
'altQE '
'lDFmt ' # Date format
'sDFmt '
'lTFmt ' # Time format
'sTFmt '
'slDay ' # Day names
'lDays '
'ssDys '
'sDays '
'snDay '
'nDays '
' am ' # am/pm indicators
' pm '
' byte '
'siQnt '
'iecQn '
'crSym ' # Currency formatting
'crDsp '
'crFmt '
'crFNg '
'ntLng ' # Name of language in itself, and of territory
'ntTer '
# Width 3 + comma for each size; no header
+ ' ' * 37 +
# Strays (char array, bit-fields):
# Width 10 + 2 spaces + comma
' currISO '
# Width 6 + comma
'curDgt ' # Currency digits
'curRnd ' # Currencty rounding (unused: QTBUG-81343)
'dow1st ' # First day of week
' wknd+ ' # Week-end start/end days
' wknd- '
'grpTop '
'grpMid '
'grpEnd'
# No trailing space on last entry (be sure to
# pad before adding anything after it).
'\n')
formatLine = ''.join((
' {{ ',
# Locale-identifier
'{:6d},' * 3,
# List patterns, date/time formats, day names, am/pm
# SI/IEC byte-unit abbreviations
# Currency and endonyms
# Range starts
'{:5d},' * 37,
# Range sizes
'{:3d},' * 37,
# Currency ISO code
' {:>10s}, ',
# Currency formatting
'{:6d},{:6d}',
# Day of week and week-end
',{:6d}' * 3,
# Number group sizes
',{:6d}' * 3,
' }}')).format
for key in names:
locale = locales[key]
# Sequence of StringDataToken:
ranges = (tuple(list_pattern_part_data.append(p) for p in # 5 entries:
(locale.listPatternPartStart, locale.listPatternPartMiddle,
locale.listPatternPartEnd, locale.listPatternPartTwo,
locale.listDelim)) +
tuple(single_character_data.append(p) for p in # 11 entries
(locale.decimal, locale.group, locale.percent, locale.zero,
locale.minus, locale.plus, locale.exp,
locale.quotationStart, locale.quotationEnd,
locale.alternateQuotationStart, locale.alternateQuotationEnd)) +
tuple (date_format_data.append(f) for f in # 2 entries:
(locale.longDateFormat, locale.shortDateFormat)) +
tuple(time_format_data.append(f) for f in # 2 entries:
(locale.longTimeFormat, locale.shortTimeFormat)) +
tuple(days_data.append(d) for d in # 6 entries:
(locale.standaloneLongDays, locale.longDays,
locale.standaloneShortDays, locale.shortDays,
locale.standaloneNarrowDays, locale.narrowDays)) +
(am_data.append(locale.am), pm_data.append(locale.pm)) + # 2 entries
tuple(byte_unit_data.append(b) for b in # 3 entries:
(locale.byte_unit,
locale.byte_si_quantified,
locale.byte_iec_quantified)) +
(currency_symbol_data.append(locale.currencySymbol),
currency_display_name_data.append(locale.currencyDisplayName),
currency_format_data.append(locale.currencyFormat),
currency_format_data.append(locale.currencyNegativeFormat),
endonyms_data.append(locale.languageEndonym),
endonyms_data.append(locale.territoryEndonym)) # 6 entries
) # Total: 37 entries
assert len(ranges) == 37
self.writer.write(formatLine(*(
key +
tuple(r.index for r in ranges) +
tuple(r.length for r in ranges) +
(currencyIsoCodeData(locale.currencyIsoCode),
locale.currencyDigits,
locale.currencyRounding, # unused (QTBUG-81343)
locale.firstDayOfWeek, locale.weekendStart, locale.weekendEnd,
locale.groupTop, locale.groupHigher, locale.groupLeast) ))
+ f', // {locale.language}/{locale.script}/{locale.territory}\n')
self.writer.write(formatLine(*( # All zeros, matching the format:
(0,) * 3 + (0,) * 37 * 2
+ (currencyIsoCodeData(0),)
+ (0,) * 8 ))
+ ' // trailing zeros\n')
self.writer.write('};\n')
# StringData tables:
for data in (list_pattern_part_data, single_character_data,
date_format_data, time_format_data, days_data,
byte_unit_data, am_data, pm_data, currency_symbol_data,
currency_display_name_data, currency_format_data,
endonyms_data):
data.write(self.writer)
@staticmethod
def __writeNameData(out, book, form):
out(f'static constexpr char {form}_name_list[] =\n')
out('"Default\\0"\n')
for key, value in book.items():
if key == 0:
continue
out(f'"{value[0]}\\0"\n')
out(';\n\n')
out(f'static constexpr quint16 {form}_name_index[] = {{\n')
out(f' 0, // Any{form.capitalize()}\n')
index = 8
for key, value in book.items():
if key == 0:
continue
name = value[0]
out(f'{index:6d}, // {name}\n')
index += len(name) + 1
out('};\n\n')
@staticmethod
def __writeCodeList(out, book, form, width):
out(f'static constexpr unsigned char {form}_code_list[] =\n')
for key, value in book.items():
code = value[1]
code += r'\0' * max(width - len(code), 0)
out(f'"{code}" // {value[0]}\n')
out(';\n\n')
def languageNames(self, languages):
self.__writeNameData(self.writer.write, languages, 'language')
def scriptNames(self, scripts):
self.__writeNameData(self.writer.write, scripts, 'script')
def territoryNames(self, territories):
self.__writeNameData(self.writer.write, territories, 'territory')
# TODO: unify these next three into the previous three; kept
# separate for now to verify we're not changing data.
def languageCodes(self, languages, code_data: LanguageCodeData):
out = self.writer.write
out(f'constexpr std::array<LanguageCodeEntry, {len(languages)}> languageCodeList {{\n')
def q(val: Optional[str], size: int) -> str:
"""Quote the value and adjust the result for tabular view."""
chars = []
if val is not None:
for c in val:
chars.append(f"'{c}'")
s = ', '.join(chars)
s = f'{{{s}}}'
else:
s = ''
if size == 0:
return f'{{{s}}}'
else:
return f'{{{s}}},'.ljust(size * 5 + 4)
for key, value in languages.items():
code = value[1]
if key < 2:
result = code_data.query('und')
else:
result = code_data.query(code)
assert code == result.id()
assert result is not None
codeString = q(result.part1Code, 2)
codeString += q(result.part2BCode, 3)
codeString += q(result.part2TCode, 3)
codeString += q(result.part3Code, 0)
out(f' LanguageCodeEntry {{{codeString}}}, // {value[0]}\n')
out('};\n\n')
def scriptCodes(self, scripts):
self.__writeCodeList(self.writer.write, scripts, 'script', 4)
def territoryCodes(self, territories): # TODO: unify with territoryNames()
self.__writeCodeList(self.writer.write, territories, 'territory', 3)
class CalendarDataWriter (LocaleSourceEditor):
formatCalendar = (
' {{'
+ ','.join(('{:6d}',) * 3 + ('{:5d}',) * 6 + ('{:3d}',) * 6)
+ ' }},').format
def write(self, calendar, locales, names):
months_data = StringData('months_data')
self.writer.write('static constexpr QCalendarLocale locale_data[] = {\n')
self.writer.write(
' //'
# IDs, width 7 (6 + comma)
' lang '
' script'
' terr '
# Month-name start-indices, width 6 (5 + comma)
'sLong '
' long '
'sShrt '
'short '
'sNarw '
'narow '
# No individual headers for the sizes.
'Sizes...'
'\n')
for key in names:
locale = locales[key]
# Sequence of StringDataToken:
try:
# Twelve long month names can add up to more than 256 (e.g. kde_TZ: 264)
ranges = (tuple(months_data.append(m[calendar], 16) for m in
(locale.standaloneLongMonths, locale.longMonths)) +
tuple(months_data.append(m[calendar]) for m in
(locale.standaloneShortMonths, locale.shortMonths,
locale.standaloneNarrowMonths, locale.narrowMonths)))
except ValueError as e:
e.args += (locale.language, locale.script, locale.territory)
raise
self.writer.write(
self.formatCalendar(*(
key +
tuple(r.index for r in ranges) +
tuple(r.length for r in ranges) ))
+ f'// {locale.language}/{locale.script}/{locale.territory}\n')
self.writer.write(self.formatCalendar(*( (0,) * (3 + 6 * 2) ))
+ '// trailing zeros\n')
self.writer.write('};\n')
months_data.write(self.writer)
class LocaleHeaderWriter (SourceFileEditor):
def __init__(self, path, temp, dupes):
super().__init__(path, temp)
self.__dupes = dupes
def languages(self, languages):
self.__enum('Language', languages, self.__language)
self.writer.write('\n')
def territories(self, territories):
self.writer.write(" // ### Qt 7: Rename to Territory\n")
self.__enum('Country', territories, self.__territory, 'Territory')
def scripts(self, scripts):
self.__enum('Script', scripts, self.__script)
self.writer.write('\n')
# Implementation details
from enumdata import (language_aliases as __language,
territory_aliases as __territory,
script_aliases as __script)
def __enum(self, name, book, alias, suffix = None):
assert book
if suffix is None:
suffix = name
out, dupes = self.writer.write, self.__dupes
out(f' enum {name} : ushort {{\n')
for key, value in book.items():
member = value[0].replace('-', ' ')
if name == 'Script':
# Don't .capitalize() as some names are already camel-case (see enumdata.py):
member = ''.join(word[0].upper() + word[1:] for word in member.split())
if not member.endswith('Script'):
member += 'Script'
if member in dupes:
raise Error(f'The script name "{member}" is messy')
else:
member = ''.join(member.split())
member = member + suffix if member in dupes else member
out(f' {member} = {key},\n')
out('\n '
+ ',\n '.join(f'{k} = {v}' for k, v in sorted(alias.items()))
+ f',\n\n Last{suffix} = {member}')
# for "LastCountry = LastTerritory"
# ### Qt 7: Remove
if suffix != name:
out(f',\n Last{name} = Last{suffix}')
out('\n };\n')
def main(out, err):
calendars_map = {
# CLDR name: Qt file name fragment
'gregorian': 'roman',
'persian': 'jalali',
'islamic': 'hijri',
# 'hebrew': 'hebrew'
}
all_calendars = list(calendars_map.keys())
parser = argparse.ArgumentParser(
description='Generate C++ code from CLDR data in QLocaleXML form.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('input_file', help='input XML file name',
metavar='input-file.xml')
parser.add_argument('iso_path', help='path to the ISO 639-3 data file',
metavar='iso-639-3.tab')
parser.add_argument('qtbase_path', help='path to the root of the qtbase source tree',
nargs='?', default=qtbase_root)
parser.add_argument('--calendars', help='select calendars to emit data for',
nargs='+', metavar='CALENDAR',
choices=all_calendars, default=all_calendars)
args = parser.parse_args()
qlocalexml = args.input_file
qtsrcdir = Path(args.qtbase_path)
calendars = {cal: calendars_map[cal] for cal in args.calendars}
if not (qtsrcdir.is_dir()
and all(qtsrcdir.joinpath('src/corelib/text', leaf).is_file()
for leaf in ('qlocale_data_p.h', 'qlocale.h', 'qlocale.qdoc'))):
parser.error(f'Missing expected files under qtbase source root {qtsrcdir}')
reader = QLocaleXmlReader(qlocalexml)
locale_map = dict(reader.loadLocaleMap(calendars, err.write))
locale_keys = sorted(locale_map.keys(), key=LocaleKeySorter(reader.defaultMap()))
code_data = LanguageCodeData(args.iso_path)
try:
with LocaleDataWriter(qtsrcdir.joinpath('src/corelib/text/qlocale_data_p.h'),
qtsrcdir, reader.cldrVersion) as writer:
writer.likelySubtags(reader.likelyMap())
writer.localeIndex(reader.languageIndices(tuple(k[0] for k in locale_map)))
writer.localeData(locale_map, locale_keys)
writer.writer.write('\n')
writer.languageNames(reader.languages)
writer.scriptNames(reader.scripts)
writer.territoryNames(reader.territories)
# TODO: merge the next three into the previous three
writer.languageCodes(reader.languages, code_data)
writer.scriptCodes(reader.scripts)
writer.territoryCodes(reader.territories)
except Exception as e:
err.write(f'\nError updating locale data: {e}\n')
return 1
# Generate calendar data
for calendar, stem in calendars.items():
try:
with CalendarDataWriter(
qtsrcdir.joinpath(f'src/corelib/time/q{stem}calendar_data_p.h'),
qtsrcdir, reader.cldrVersion) as writer:
writer.write(calendar, locale_map, locale_keys)
except Exception as e:
err.write(f'\nError updating {calendar} locale data: {e}\n')
# qlocale.h
try:
with LocaleHeaderWriter(qtsrcdir.joinpath('src/corelib/text/qlocale.h'),
qtsrcdir, reader.dupes) as writer:
writer.languages(reader.languages)
writer.scripts(reader.scripts)
writer.territories(reader.territories)
except Exception as e:
err.write(f'\nError updating qlocale.h: {e}\n')
# qlocale.qdoc
try:
with Transcriber(qtsrcdir.joinpath('src/corelib/text/qlocale.qdoc'), qtsrcdir) as qdoc:
DOCSTRING = " QLocale's data is based on Common Locale Data Repository "
for line in qdoc.reader:
if DOCSTRING in line:
qdoc.writer.write(f'{DOCSTRING}v{reader.cldrVersion}.\n')
else:
qdoc.writer.write(line)
except Exception as e:
err.write(f'\nError updating qlocale.h: {e}\n')
return 1
return 0
if __name__ == "__main__":
import sys
sys.exit(main(sys.stdout, sys.stderr))

View File

@ -0,0 +1,424 @@
// Copyright (C) 2016 The Qt Company Ltd.
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
#include "localemodel.h"
#include <QLocale>
#include <QDate>
#include <qdebug.h>
static const int g_model_cols = 6;
struct LocaleListItem
{
int language;
int territory;
};
const LocaleListItem g_locale_list[] = {
{ 1, 0 }, // C/AnyTerritory
{ 3, 69 }, // Afan/Ethiopia
{ 3, 111 }, // Afan/Kenya
{ 4, 59 }, // Afar/Djibouti
{ 4, 67 }, // Afar/Eritrea
{ 4, 69 }, // Afar/Ethiopia
{ 5, 195 }, // Afrikaans/SouthAfrica
{ 5, 148 }, // Afrikaans/Namibia
{ 6, 2 }, // Albanian/Albania
{ 7, 69 }, // Amharic/Ethiopia
{ 8, 186 }, // Arabic/SaudiArabia
{ 8, 3 }, // Arabic/Algeria
{ 8, 17 }, // Arabic/Bahrain
{ 8, 64 }, // Arabic/Egypt
{ 8, 103 }, // Arabic/Iraq
{ 8, 109 }, // Arabic/Jordan
{ 8, 115 }, // Arabic/Kuwait
{ 8, 119 }, // Arabic/Lebanon
{ 8, 122 }, // Arabic/LibyanArabJamahiriya
{ 8, 145 }, // Arabic/Morocco
{ 8, 162 }, // Arabic/Oman
{ 8, 175 }, // Arabic/Qatar
{ 8, 201 }, // Arabic/Sudan
{ 8, 207 }, // Arabic/SyrianArabRepublic
{ 8, 216 }, // Arabic/Tunisia
{ 8, 223 }, // Arabic/UnitedArabEmirates
{ 8, 237 }, // Arabic/Yemen
{ 9, 11 }, // Armenian/Armenia
{ 10, 100 }, // Assamese/India
{ 12, 15 }, // Azerbaijani/Azerbaijan
{ 14, 197 }, // Basque/Spain
{ 15, 18 }, // Bengali/Bangladesh
{ 15, 100 }, // Bengali/India
{ 16, 25 }, // Bhutani/Bhutan
{ 20, 33 }, // Bulgarian/Bulgaria
{ 22, 20 }, // Byelorussian/Belarus
{ 23, 36 }, // Cambodian/Cambodia
{ 24, 197 }, // Catalan/Spain
{ 25, 44 }, // Chinese/China
{ 25, 97 }, // Chinese/HongKong
{ 25, 126 }, // Chinese/Macau
{ 25, 190 }, // Chinese/Singapore
{ 25, 208 }, // Chinese/Taiwan
{ 27, 54 }, // Croatian/Croatia
{ 28, 57 }, // Czech/CzechRepublic
{ 29, 58 }, // Danish/Denmark
{ 30, 151 }, // Dutch/Netherlands
{ 30, 21 }, // Dutch/Belgium
{ 31, 225 }, // English/UnitedStates
{ 31, 4 }, // English/AmericanSamoa
{ 31, 13 }, // English/Australia
{ 31, 21 }, // English/Belgium
{ 31, 22 }, // English/Belize
{ 31, 28 }, // English/Botswana
{ 31, 38 }, // English/Canada
{ 31, 89 }, // English/Guam
{ 31, 97 }, // English/HongKong
{ 31, 100 }, // English/India
{ 31, 104 }, // English/Ireland
{ 31, 107 }, // English/Jamaica
{ 31, 133 }, // English/Malta
{ 31, 134 }, // English/MarshallIslands
{ 31, 148 }, // English/Namibia
{ 31, 154 }, // English/NewZealand
{ 31, 160 }, // English/NorthernMarianaIslands
{ 31, 163 }, // English/Pakistan
{ 31, 170 }, // English/Philippines
{ 31, 190 }, // English/Singapore
{ 31, 195 }, // English/SouthAfrica
{ 31, 215 }, // English/TrinidadAndTobago
{ 31, 224 }, // English/UnitedKingdom
{ 31, 226 }, // English/UnitedStatesMinorOutlyingIslands
{ 31, 234 }, // English/USVirginIslands
{ 31, 240 }, // English/Zimbabwe
{ 33, 68 }, // Estonian/Estonia
{ 34, 71 }, // Faroese/FaroeIslands
{ 36, 73 }, // Finnish/Finland
{ 37, 74 }, // French/France
{ 37, 21 }, // French/Belgium
{ 37, 38 }, // French/Canada
{ 37, 125 }, // French/Luxembourg
{ 37, 142 }, // French/Monaco
{ 37, 206 }, // French/Switzerland
{ 40, 197 }, // Galician/Spain
{ 41, 81 }, // Georgian/Georgia
{ 42, 82 }, // German/Germany
{ 42, 14 }, // German/Austria
{ 42, 21 }, // German/Belgium
{ 42, 123 }, // German/Liechtenstein
{ 42, 125 }, // German/Luxembourg
{ 42, 206 }, // German/Switzerland
{ 43, 85 }, // Greek/Greece
{ 43, 56 }, // Greek/Cyprus
{ 44, 86 }, // Greenlandic/Greenland
{ 46, 100 }, // Gujarati/India
{ 47, 83 }, // Hausa/Ghana
{ 47, 156 }, // Hausa/Niger
{ 47, 157 }, // Hausa/Nigeria
{ 48, 105 }, // Hebrew/Israel
{ 49, 100 }, // Hindi/India
{ 50, 98 }, // Hungarian/Hungary
{ 51, 99 }, // Icelandic/Iceland
{ 52, 101 }, // Indonesian/Indonesia
{ 57, 104 }, // Irish/Ireland
{ 58, 106 }, // Italian/Italy
{ 58, 206 }, // Italian/Switzerland
{ 59, 108 }, // Japanese/Japan
{ 61, 100 }, // Kannada/India
{ 63, 110 }, // Kazakh/Kazakhstan
{ 64, 179 }, // Kinyarwanda/Rwanda
{ 65, 116 }, // Kirghiz/Kyrgyzstan
{ 66, 114 }, // Korean/RepublicOfKorea
{ 67, 102 }, // Kurdish/Iran
{ 67, 103 }, // Kurdish/Iraq
{ 67, 207 }, // Kurdish/SyrianArabRepublic
{ 67, 217 }, // Kurdish/Turkey
{ 69, 117 }, // Laothian/Lao
{ 71, 118 }, // Latvian/Latvia
{ 72, 49 }, // Lingala/DemocraticRepublicOfCongo
{ 72, 50 }, // Lingala/PeoplesRepublicOfCongo
{ 73, 124 }, // Lithuanian/Lithuania
{ 74, 127 }, // Macedonian/Macedonia
{ 76, 130 }, // Malay/Malaysia
{ 76, 32 }, // Malay/BruneiDarussalam
{ 77, 100 }, // Malayalam/India
{ 78, 133 }, // Maltese/Malta
{ 80, 100 }, // Marathi/India
{ 82, 143 }, // Mongolian/Mongolia
{ 84, 150 }, // Nepali/Nepal
{ 85, 161 }, // Norwegian/Norway
{ 87, 100 }, // Oriya/India
{ 88, 1 }, // Pashto/Afghanistan
{ 89, 102 }, // Persian/Iran
{ 89, 1 }, // Persian/Afghanistan
{ 90, 172 }, // Polish/Poland
{ 91, 173 }, // Portuguese/Portugal
{ 91, 30 }, // Portuguese/Brazil
{ 92, 100 }, // Punjabi/India
{ 92, 163 }, // Punjabi/Pakistan
{ 95, 177 }, // Romanian/Romania
{ 96, 178 }, // Russian/RussianFederation
{ 96, 222 }, // Russian/Ukraine
{ 99, 100 }, // Sanskrit/India
{ 100, 241 }, // Serbian/SerbiaAndMontenegro
{ 100, 27 }, // Serbian/BosniaAndHerzegowina
{ 100, 238 }, // Serbian/Yugoslavia
{ 101, 241 }, // SerboCroatian/SerbiaAndMontenegro
{ 101, 27 }, // SerboCroatian/BosniaAndHerzegowina
{ 101, 238 }, // SerboCroatian/Yugoslavia
{ 102, 195 }, // Sesotho/SouthAfrica
{ 103, 195 }, // Setswana/SouthAfrica
{ 107, 195 }, // Siswati/SouthAfrica
{ 108, 191 }, // Slovak/Slovakia
{ 109, 192 }, // Slovenian/Slovenia
{ 110, 194 }, // Somali/Somalia
{ 110, 59 }, // Somali/Djibouti
{ 110, 69 }, // Somali/Ethiopia
{ 110, 111 }, // Somali/Kenya
{ 111, 197 }, // Spanish/Spain
{ 111, 10 }, // Spanish/Argentina
{ 111, 26 }, // Spanish/Bolivia
{ 111, 43 }, // Spanish/Chile
{ 111, 47 }, // Spanish/Colombia
{ 111, 52 }, // Spanish/CostaRica
{ 111, 61 }, // Spanish/DominicanRepublic
{ 111, 63 }, // Spanish/Ecuador
{ 111, 65 }, // Spanish/ElSalvador
{ 111, 90 }, // Spanish/Guatemala
{ 111, 96 }, // Spanish/Honduras
{ 111, 139 }, // Spanish/Mexico
{ 111, 155 }, // Spanish/Nicaragua
{ 111, 166 }, // Spanish/Panama
{ 111, 168 }, // Spanish/Paraguay
{ 111, 169 }, // Spanish/Peru
{ 111, 174 }, // Spanish/PuertoRico
{ 111, 225 }, // Spanish/UnitedStates
{ 111, 227 }, // Spanish/Uruguay
{ 111, 231 }, // Spanish/Venezuela
{ 113, 111 }, // Swahili/Kenya
{ 113, 210 }, // Swahili/Tanzania
{ 114, 205 }, // Swedish/Sweden
{ 114, 73 }, // Swedish/Finland
{ 116, 209 }, // Tajik/Tajikistan
{ 117, 100 }, // Tamil/India
{ 118, 178 }, // Tatar/RussianFederation
{ 119, 100 }, // Telugu/India
{ 120, 211 }, // Thai/Thailand
{ 122, 67 }, // Tigrinya/Eritrea
{ 122, 69 }, // Tigrinya/Ethiopia
{ 124, 195 }, // Tsonga/SouthAfrica
{ 125, 217 }, // Turkish/Turkey
{ 129, 222 }, // Ukrainian/Ukraine
{ 130, 100 }, // Urdu/India
{ 130, 163 }, // Urdu/Pakistan
{ 131, 228 }, // Uzbek/Uzbekistan
{ 131, 1 }, // Uzbek/Afghanistan
{ 132, 232 }, // Vietnamese/VietNam
{ 134, 224 }, // Welsh/UnitedKingdom
{ 136, 195 }, // Xhosa/SouthAfrica
{ 138, 157 }, // Yoruba/Nigeria
{ 140, 195 }, // Zulu/SouthAfrica
{ 141, 161 }, // Nynorsk/Norway
{ 142, 27 }, // Bosnian/BosniaAndHerzegowina
{ 143, 131 }, // Divehi/Maldives
{ 144, 224 }, // Manx/UnitedKingdom
{ 145, 224 }, // Cornish/UnitedKingdom
{ 146, 83 }, // Akan/Ghana
{ 147, 100 }, // Konkani/India
{ 148, 83 }, // Ga/Ghana
{ 149, 157 }, // Igbo/Nigeria
{ 150, 111 }, // Kamba/Kenya
{ 151, 207 }, // Syriac/SyrianArabRepublic
{ 152, 67 }, // Blin/Eritrea
{ 153, 67 }, // Geez/Eritrea
{ 153, 69 }, // Geez/Ethiopia
{ 154, 157 }, // Koro/Nigeria
{ 155, 69 }, // Sidamo/Ethiopia
{ 156, 157 }, // Atsam/Nigeria
{ 157, 67 }, // Tigre/Eritrea
{ 158, 157 }, // Jju/Nigeria
{ 159, 106 }, // Friulian/Italy
{ 160, 195 }, // Venda/SouthAfrica
{ 161, 83 }, // Ewe/Ghana
{ 161, 212 }, // Ewe/Togo
{ 163, 225 }, // Hawaiian/UnitedStates
{ 164, 157 }, // Tyap/Nigeria
{ 165, 129 }, // Chewa/Malawi
};
static const int g_locale_list_count = sizeof(g_locale_list)/sizeof(g_locale_list[0]);
LocaleModel::LocaleModel(QObject *parent)
: QAbstractItemModel(parent)
{
m_data_list.append(1234.5678);
m_data_list.append(QDate::currentDate());
m_data_list.append(QDate::currentDate());
m_data_list.append(QTime::currentTime());
m_data_list.append(QTime::currentTime());
}
QVariant LocaleModel::data(const QModelIndex &index, int role) const
{
if (!index.isValid()
|| role != Qt::DisplayRole && role != Qt::EditRole && role != Qt::ToolTipRole
|| index.column() >= g_model_cols
|| index.row() >= g_locale_list_count + 2)
return QVariant();
QVariant data;
if (index.column() < g_model_cols - 1)
data = m_data_list.at(index.column());
if (index.row() == 0) {
if (role == Qt::ToolTipRole)
return QVariant();
switch (index.column()) {
case 0:
return data.toDouble();
case 1:
return data.toDate();
case 2:
return data.toDate();
case 3:
return data.toTime();
case 4:
return data.toTime();
case 5:
return QVariant();
default:
break;
}
} else {
QLocale locale;
if (index.row() == 1) {
locale = QLocale::system();
} else {
LocaleListItem item = g_locale_list[index.row() - 2];
locale = QLocale((QLocale::Language)item.language, (QLocale::Territory)item.territory);
}
switch (index.column()) {
case 0:
if (role == Qt::ToolTipRole)
return QVariant();
return locale.toString(data.toDouble());
case 1:
if (role == Qt::ToolTipRole)
return locale.dateFormat(QLocale::LongFormat);
return locale.toString(data.toDate(), QLocale::LongFormat);
case 2:
if (role == Qt::ToolTipRole)
return locale.dateFormat(QLocale::ShortFormat);
return locale.toString(data.toDate(), QLocale::ShortFormat);
case 3:
if (role == Qt::ToolTipRole)
return locale.timeFormat(QLocale::LongFormat);
return locale.toString(data.toTime(), QLocale::LongFormat);
case 4:
if (role == Qt::ToolTipRole)
return locale.timeFormat(QLocale::ShortFormat);
return locale.toString(data.toTime(), QLocale::ShortFormat);
case 5:
if (role == Qt::ToolTipRole)
return QVariant();
return locale.name();
default:
break;
}
}
return QVariant();
}
QVariant LocaleModel::headerData(int section, Qt::Orientation orientation, int role) const
{
if (role != Qt::DisplayRole)
return QVariant();
if (orientation == Qt::Horizontal) {
switch (section) {
case 0:
return QLatin1String("Double");
case 1:
return QLatin1String("Long Date");
case 2:
return QLatin1String("Short Date");
case 3:
return QLatin1String("Long Time");
case 4:
return QLatin1String("Short Time");
case 5:
return QLatin1String("Name");
default:
break;
}
} else {
if (section >= g_locale_list_count + 2)
return QVariant();
if (section == 0) {
return QLatin1String("Input");
} else if (section == 1) {
return QLatin1String("System");
} else {
LocaleListItem item = g_locale_list[section - 2];
return QLocale::languageToString((QLocale::Language)item.language)
+ QLatin1Char('/')
+ QLocale::territoryToString((QLocale::Territory)item.territory);
}
}
return QVariant();
}
QModelIndex LocaleModel::index(int row, int column,
const QModelIndex &parent) const
{
if (parent.isValid()
|| row >= g_locale_list_count + 2
|| column >= g_model_cols)
return QModelIndex();
return createIndex(row, column);
}
QModelIndex LocaleModel::parent(const QModelIndex&) const
{
return QModelIndex();
}
int LocaleModel::columnCount(const QModelIndex&) const
{
return g_model_cols;
}
int LocaleModel::rowCount(const QModelIndex &parent) const
{
if (parent.isValid())
return 0;
return g_locale_list_count + 2;
}
Qt::ItemFlags LocaleModel::flags(const QModelIndex &index) const
{
if (!index.isValid())
return 0;
if (index.row() == 0 && index.column() == g_model_cols - 1)
return 0;
if (index.row() == 0)
return QAbstractItemModel::flags(index) | Qt::ItemIsEditable;
return QAbstractItemModel::flags(index);
}
bool LocaleModel::setData(const QModelIndex &index, const QVariant &value, int role)
{
if (!index.isValid()
|| index.row() != 0
|| index.column() >= g_model_cols - 1
|| role != Qt::EditRole
|| m_data_list.at(index.column()).type() != value.type())
return false;
m_data_list[index.column()] = value;
emit dataChanged(createIndex(1, index.column()),
createIndex(g_locale_list_count, index.column()));
return true;
}

View File

@ -0,0 +1,31 @@
// Copyright (C) 2016 The Qt Company Ltd.
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
#ifndef LOCALEMODEL_H
#define LOCALEMODEL_H
#include <QAbstractItemModel>
#include <QList>
#include <QVariant>
class LocaleModel : public QAbstractItemModel
{
Q_OBJECT
public:
LocaleModel(QObject *parent = nullptr);
virtual int columnCount(const QModelIndex &parent = QModelIndex()) const;
virtual QVariant data(const QModelIndex &index, int role = Qt::DisplayRole) const;
virtual QModelIndex index(int row, int column,
const QModelIndex &parent = QModelIndex()) const;
virtual QModelIndex parent(const QModelIndex &index) const;
virtual int rowCount(const QModelIndex &parent = QModelIndex()) const;
virtual QVariant headerData(int section, Qt::Orientation orientation,
int role = Qt::DisplayRole ) const;
virtual Qt::ItemFlags flags(const QModelIndex &index) const;
virtual bool setData(const QModelIndex &index, const QVariant &value,
int role = Qt::EditRole);
private:
QList<QVariant> m_data_list;
};
#endif // LOCALEMODEL_H

View File

@ -0,0 +1,51 @@
// Copyright (C) 2016 The Qt Company Ltd.
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
#include <QTableView>
#include <QVBoxLayout>
#include <QItemDelegate>
#include <QItemEditorFactory>
#include <QDoubleSpinBox>
#include "localewidget.h"
#include "localemodel.h"
class DoubleEditorCreator : public QItemEditorCreatorBase
{
public:
QWidget *createWidget(QWidget *parent) const {
QDoubleSpinBox *w = new QDoubleSpinBox(parent);
w->setDecimals(4);
w->setRange(-10000.0, 10000.0);
return w;
}
virtual QByteArray valuePropertyName() const {
return QByteArray("value");
}
};
class EditorFactory : public QItemEditorFactory
{
public:
EditorFactory() {
static DoubleEditorCreator double_editor_creator;
registerEditor(QVariant::Double, &double_editor_creator);
}
};
LocaleWidget::LocaleWidget(QWidget *parent)
: QWidget(parent)
{
m_model = new LocaleModel(this);
m_view = new QTableView(this);
QItemDelegate *delegate = qobject_cast<QItemDelegate*>(m_view->itemDelegate());
Q_ASSERT(delegate != 0);
static EditorFactory editor_factory;
delegate->setItemEditorFactory(&editor_factory);
m_view->setModel(m_model);
QVBoxLayout *layout = new QVBoxLayout(this);
layout->setMargin(0);
layout->addWidget(m_view);
}

View File

@ -0,0 +1,21 @@
// Copyright (C) 2016 The Qt Company Ltd.
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
#ifndef LOCALEWIDGET_H
#define LOCALEWIDGET_H
#include <QWidget>
class LocaleModel;
class QTableView;
class LocaleWidget : public QWidget
{
Q_OBJECT
public:
LocaleWidget(QWidget *parent = nullptr);
private:
LocaleModel *m_model;
QTableView *m_view;
};
#endif // LOCALEWIDGET_H

View File

@ -0,0 +1,13 @@
// Copyright (C) 2016 The Qt Company Ltd.
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
#include <QApplication>
#include "localewidget.h"
int main(int argc, char *argv[])
{
QApplication app(argc, argv);
LocaleWidget wgt;
wgt.show();
return app.exec();
}

View File

@ -0,0 +1,4 @@
TARGET = testlocales
CONFIG += debug
SOURCES += localemodel.cpp localewidget.cpp main.cpp
HEADERS += localemodel.h localewidget.h