qt 6.5.1 original

2025-07-02 23:35:28 +08:00 · 2023-10-29 23:33:08 +01:00
parent 71d22ab6b0
commit 85d238dfda
21202 changed files with 5499099 additions and 0 deletions
--- a/util/locale_database/README
+++ b/util/locale_database/README
@ -0,0 +1,14 @@
+locale_database is used to generate qlocale data from CLDR.
+
+CLDR is the Common Locale Data Repository, a database for localized
+data (like date formats, country names etc).  It is provided by the
+Unicode consortium.
+
+See cldr2qlocalexml.py for how to run it and qlocalexml2cpp.py to
+update the locale data tables (principally text/qlocale_data_p.h and
+time/q*calendar_data_p.h under src/corelib/). See enumdata.py for when
+and how to update the data it provides. You shall definitely need to
+pass --no-verify or -n to git commit for these changes.
+
+See cldr2qtimezone.py on how to update tables of Windows-specific
+names for zones and UTC-offset zone names.
--- a/util/locale_database/cldr.py
+++ b/util/locale_database/cldr.py
@ -0,0 +1,760 @@
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+"""Digesting the CLDR's data.
+
+Provides two classes:
+  CldrReader -- driver for reading CLDR data
+  CldrAccess -- used by the reader to access the tree of data files
+
+The former should normally be all you need to access.
+See individual classes for further detail.
+"""
+
+from typing import Iterable, TextIO
+from xml.dom import minidom
+from weakref import WeakValueDictionary as CacheDict
+from pathlib import Path
+
+from ldml import Error, Node, XmlScanner, Supplement, LocaleScanner
+from qlocalexml import Locale
+
+class CldrReader (object):
+    def __init__(self, root: Path, grumble = lambda msg: None, whitter = lambda msg: None):
+        """Set up a reader object for reading CLDR data.
+
+        Single parameter, root, is the file-system path to the root of
+        the unpacked CLDR archive; its common/ sub-directory should
+        contain dtd/, main/ and supplemental/ sub-directories.
+
+        Optional second argument, grumble, is a callable that logs
+        warnings and complaints, e.g. sys.stderr.write would be a
+        suitable callable.  The default is a no-op that ignores its
+        single argument.  Optional third argument is similar, used for
+        less interesting output; pass sys.stderr.write for it for
+        verbose output."""
+        self.root = CldrAccess(root)
+        self.whitter, self.grumble = whitter, grumble
+        self.root.checkEnumData(grumble)
+
+    def likelySubTags(self):
+        """Generator for likely subtag information.
+
+        Yields pairs (have, give) of 4-tuples; if what you have
+        matches the left member, giving the right member is probably
+        sensible. Each 4-tuple's entries are the full names of a
+        language, a script, a territory (usually a country) and a
+        variant (currently ignored)."""
+        skips = []
+        for got, use in self.root.likelySubTags():
+            try:
+                have = self.__parseTags(got)
+                give = self.__parseTags(use)
+            except Error as e:
+                if ((use.startswith(got) or got.startswith('und_'))
+                    and e.message.startswith('Unknown ') and ' code ' in e.message):
+                    skips.append(use)
+                else:
+                    self.grumble(f'Skipping likelySubtag "{got}" -> "{use}" ({e})\n')
+                continue
+            if all(code.startswith('Any') and code[3].isupper() for code in have[:-1]):
+                continue
+
+            give = (give[0],
+                    # Substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags
+                    have[1] if give[1] == 'AnyScript' else give[1],
+                    have[2] if give[2] == 'AnyTerritory' else give[2],
+                    give[3]) # AnyVariant similarly ?
+
+            yield have, give
+
+        if skips:
+            # TODO: look at LDML's reserved locale tag names; they
+            # show up a lot in this, and may be grounds for filtering
+            # more out.
+            pass # self.__wrapped(self.whitter, 'Skipping likelySubtags (for unknown codes): ', skips)
+
+    def readLocales(self, calendars = ('gregorian',)):
+        locales = tuple(self.__allLocales(calendars))
+        return dict(((k.language_id, k.script_id, k.territory_id, k.variant_code),
+                     k) for k in locales)
+
+    def __allLocales(self, calendars):
+        def skip(locale, reason):
+            return f'Skipping defaultContent locale "{locale}" ({reason})\n'
+
+        for locale in self.root.defaultContentLocales:
+            try:
+                language, script, territory, variant = self.__splitLocale(locale)
+            except ValueError:
+                self.whitter(skip(locale, 'only language tag'))
+                continue
+
+            if not (script or territory):
+                self.grumble(skip(locale, 'second tag is neither script nor territory'))
+                continue
+
+            if not (language and territory):
+                continue
+
+            try:
+                yield self.__getLocaleData(self.root.locale(locale), calendars,
+                                           language, script, territory, variant)
+            except Error as e:
+                self.grumble(skip(locale, e.message))
+
+        for locale in self.root.fileLocales:
+            try:
+                chain = self.root.locale(locale)
+                language, script, territory, variant = chain.tagCodes()
+                assert language
+                # TODO: this skip should probably be based on likely
+                # sub-tags, instead of empty territory: if locale has a
+                # likely-subtag expansion, that's what QLocale uses,
+                # and we'll be saving its data for the expanded locale
+                # anyway, so don't need to record it for itself.
+                # See also QLocaleXmlReader.loadLocaleMap's grumble.
+                if not territory:
+                    continue
+                yield self.__getLocaleData(chain, calendars, language, script, territory, variant)
+            except Error as e:
+                self.grumble(f'Skipping file locale "{locale}" ({e})\n')
+
+    import textwrap
+    @staticmethod
+    def __wrapped(writer, prefix, tokens, wrap = textwrap.wrap):
+        writer('\n'.join(wrap(prefix + ', '.join(tokens),
+                              subsequent_indent=' ', width=80)) + '\n')
+    del textwrap
+
+    def __parseTags(self, locale):
+        tags = self.__splitLocale(locale)
+        language = next(tags)
+        script = territory = variant = ''
+        try:
+            script, territory, variant = tags
+        except ValueError:
+            pass
+        return tuple(p[1] for p in self.root.codesToIdName(language, script, territory, variant))
+
+    def __splitLocale(self, name):
+        """Generate (language, script, territory, variant) from a locale name
+
+        Ignores any trailing fields (with a warning), leaves script (a
+        capitalised four-letter token), territory (either a number or
+        an all-uppercase token) or variant (upper case and digits)
+        empty if unspecified.  Only generates one entry if name is a
+        single tag (i.e. contains no underscores).  Always yields 1 or
+        4 values, never 2 or 3."""
+        tags = iter(name.split('_'))
+        yield next(tags) # Language
+
+        try:
+            tag = next(tags)
+        except StopIteration:
+            return
+
+        # Script is always four letters, always capitalised:
+        if len(tag) == 4 and tag[0].isupper() and tag[1:].islower():
+            yield tag
+            try:
+                tag = next(tags)
+            except StopIteration:
+                tag = ''
+        else:
+            yield ''
+
+        # Territory is upper-case or numeric:
+        if tag and tag.isupper() or tag.isdigit():
+            yield tag
+            try:
+                tag = next(tags)
+            except StopIteration:
+                tag = ''
+        else:
+            yield ''
+
+        # Variant can be any mixture of upper-case and digits.
+        if tag and all(c.isupper() or c.isdigit() for c in tag):
+            yield tag
+            tag = ''
+        else:
+            yield ''
+
+        rest = [tag] if tag else []
+        rest.extend(tags)
+
+        if rest:
+            self.grumble(f'Ignoring unparsed cruft {"_".join(rest)} in {name}\n')
+
+    def __getLocaleData(self, scan, calendars, language, script, territory, variant):
+        ids, names = zip(*self.root.codesToIdName(language, script, territory, variant))
+        assert ids[0] > 0 and ids[2] > 0, (language, script, territory, variant)
+        locale = Locale(
+            language = names[0], language_code = language, language_id = ids[0],
+            script = names[1], script_code = script, script_id = ids[1],
+            territory = names[2], territory_code = territory, territory_id = ids[2],
+            variant_code = variant)
+
+        firstDay, weStart, weEnd = self.root.weekData(territory)
+        assert all(day in ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun')
+                   for day in (firstDay, weStart, weEnd))
+
+        locale.update(firstDayOfWeek = firstDay,
+                      weekendStart = weStart,
+                      weekendEnd = weEnd)
+
+        iso, digits, rounding = self.root.currencyData(territory)
+        locale.update(currencyIsoCode = iso,
+                      currencyDigits = int(digits),
+                      currencyRounding = int(rounding))
+
+        locale.update(scan.currencyData(iso))
+        locale.update(scan.numericData(self.root.numberSystem, self.whitter))
+        locale.update(scan.textPatternData())
+        locale.update(scan.endonyms(language, script, territory, variant))
+        locale.update(scan.unitData()) # byte, kB, MB, GB, ..., KiB, MiB, GiB, ...
+        locale.update(scan.calendarNames(calendars)) # Names of days and months
+
+        return locale
+
+# Note: various caches assume this class is a singleton, so the
+# "default" value for a parameter no caller should pass can serve as
+# the cache. If a process were to instantiate this class with distinct
+# roots, each cache would be filled by the first to need it !
+class CldrAccess (object):
+    def __init__(self, root: Path):
+        """Set up a master object for accessing CLDR data.
+
+        Single parameter, root, is the file-system path to the root of
+        the unpacked CLDR archive; its common/ sub-directory should
+        contain dtd/, main/ and supplemental/ sub-directories."""
+        self.root = root
+
+    def xml(self, relative_path: str):
+        """Load a single XML file and return its root element as an XmlScanner.
+
+        The path is interpreted relative to self.root"""
+        return XmlScanner(Node(self.__xml(relative_path)))
+
+    def supplement(self, name):
+        """Loads supplemental data as a Supplement object.
+
+        The name should be that of a file in common/supplemental/, without path.
+        """
+        return Supplement(Node(self.__xml(f'common/supplemental/{name}')))
+
+    def locale(self, name):
+        """Loads all data for a locale as a LocaleScanner object.
+
+        The name should be a locale name; adding suffix '.xml' to it
+        should usually yield a file in common/main/.  The returned
+        LocaleScanner object packages this file along with all those
+        from which it inherits; its methods know how to handle that
+        inheritance, where relevant."""
+        return LocaleScanner(name, self.__localeRoots(name), self.__rootLocale)
+
+    @property
+    def fileLocales(self) -> Iterable[str]:
+        """Generator for locale IDs seen in file-names.
+
+        All *.xml other than root.xml in common/main/ are assumed to
+        identify locales."""
+        for path in self.root.joinpath('common/main').glob('*.xml'):
+            if path.stem != 'root':
+                yield path.stem
+
+    @property
+    def defaultContentLocales(self):
+        """Generator for the default content locales."""
+        for name, attrs in self.supplement('supplementalMetadata.xml').find('metadata/defaultContent'):
+            try:
+                locales = attrs['locales']
+            except KeyError:
+                pass
+            else:
+                for locale in locales.split():
+                    yield locale
+
+    def likelySubTags(self):
+        for ignore, attrs in self.supplement('likelySubtags.xml').find('likelySubtags'):
+            yield attrs['from'], attrs['to']
+
+    def numberSystem(self, system):
+        """Get a description of a numbering system.
+
+        Returns a mapping, with keys 'digits', 'type' and 'id'; the
+        value for this last is system. Raises KeyError for unknown
+        number system, ldml.Error on failure to load data."""
+        try:
+            return self.__numberSystems[system]
+        except KeyError:
+            raise Error(f'Unsupported number system: {system}')
+
+    def weekData(self, territory):
+        """Data on the weekly cycle.
+
+        Returns a triple (W, S, E) of en's short names for week-days;
+        W is the first day of the week, S the start of the week-end
+        and E the end of the week-end.  Where data for a territory is
+        unavailable, the data for CLDR's territory 001 (The World) is
+        used."""
+        try:
+            return self.__weekData[territory]
+        except KeyError:
+            return self.__weekData['001']
+
+    def currencyData(self, territory):
+        """Returns currency data for the given territory code.
+
+        Return value is a tuple (ISO4217 code, digit count, rounding
+        mode).  If CLDR provides no data for this territory, ('', 2, 1)
+        is the default result.
+        """
+        try:
+            return self.__currencyData[territory]
+        except KeyError:
+            return '', 2, 1
+
+    def codesToIdName(self, language, script, territory, variant = ''):
+        """Maps each code to the appropriate ID and name.
+
+        Returns a 4-tuple of (ID, name) pairs corresponding to the
+        language, script, territory and variant given.  Raises a
+        suitable error if any of them is unknown, indicating all that
+        are unknown plus suitable names for any that could sensibly be
+        added to enumdata.py to make them known.
+
+        Until we implement variant support (QTBUG-81051), the fourth
+        member of the returned tuple is always 0 paired with a string
+        that should not be used."""
+        enum = self.__enumMap
+        try:
+            return (enum('language')[language],
+                    enum('script')[script],
+                    enum('territory')[territory],
+                    enum('variant')[variant])
+        except KeyError:
+            pass
+
+        parts, values = [], [language, script, territory, variant]
+        for index, key in enumerate(('language', 'script', 'territory', 'variant')):
+            naming, enums = self.__codeMap(key), enum(key)
+            value = values[index]
+            if value not in enums:
+                text = f'{key} code {value}'
+                name = naming.get(value)
+                if name and value != 'POSIX':
+                    text += f' (could add {name})'
+                parts.append(text)
+        if len(parts) > 1:
+            parts[-1] = 'and ' + parts[-1]
+        assert parts
+        raise Error('Unknown ' + ', '.join(parts),
+                    language, script, territory, variant)
+
+    @staticmethod
+    def __checkEnum(given, proper, scraps,
+                    remap = { 'å': 'a', 'ã': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ü': 'u'},
+                    prefix = { 'St.': 'Saint', 'U.S.': 'United States' },
+                    suffixes = ( 'Han', ),
+                    skip = '\u02bc'):
+        # Each is a { code: full name } mapping
+        for code, name in given.items():
+            try: right = proper[code]
+            except KeyError:
+                # No en.xml name for this code, but supplementalData's
+                # parentLocale may still believe in it:
+                if code not in scraps:
+                    yield name, f'[Found no CLDR name for code {code}]'
+                continue
+            if name == right: continue
+            ok = right.replace('&', 'And')
+            for k, v in prefix.items():
+                if ok.startswith(k + ' '):
+                    ok = v + ok[len(k):]
+            while '(' in ok:
+                try: f, t = ok.index('('), ok.index(')')
+                except ValueError: break
+                ok = ok[:f].rstrip() + ' ' + ok[t:].lstrip()
+            if any(name == ok + ' ' + s for s in suffixes):
+                continue
+            if ''.join(ch for ch in name.lower() if not ch.isspace()) in ''.join(
+                remap.get(ch, ch) for ch in ok.lower() if ch.isalpha() and ch not in skip):
+                continue
+            yield name, ok
+
+    def checkEnumData(self, grumble):
+        scraps = set()
+        for k in self.__parentLocale.keys():
+            for f in k.split('_'):
+                scraps.add(f)
+        from enumdata import language_map, territory_map, script_map
+        language = dict((v, k) for k, v in language_map.values() if not v.isspace())
+        territory = dict((v, k) for k, v in territory_map.values() if v != 'ZZ')
+        script = dict((v, k) for k, v in script_map.values() if v != 'Zzzz')
+        lang = dict(self.__checkEnum(language, self.__codeMap('language'), scraps))
+        land = dict(self.__checkEnum(territory, self.__codeMap('territory'), scraps))
+        text = dict(self.__checkEnum(script, self.__codeMap('script'), scraps))
+        if lang or land or text:
+            grumble("""\
+Using names that don't match CLDR: consider updating the name(s) in
+enumdata.py (keeping the old name as an alias):
+""")
+            if lang:
+                grumble('Language:\n\t'
+                        + '\n\t'.join(f'{k} -> {v}' for k, v in lang.items())
+                        + '\n')
+            if land:
+                grumble('Territory:\n\t'
+                        + '\n\t'.join(f'{k} -> {v}' for k, v in land.items())
+                        + '\n')
+            if text:
+                grumble('Script:\n\t'
+                        + '\n\t'.join(f'{k} -> {v}' for k, v in text.items())
+                        + '\n')
+            grumble('\n')
+
+    def readWindowsTimeZones(self, lookup): # For use by cldr2qtimezone.py
+        """Digest CLDR's MS-Win time-zone name mapping.
+
+        MS-Win have their own eccentric names for time-zones.  CLDR
+        helpfully provides a translation to more orthodox names.
+
+        Single argument, lookup, is a mapping from known MS-Win names
+        for locales to a unique integer index (starting at 1).
+
+        The XML structure we read has the form:
+
+ <supplementalData>
+     <windowsZones>
+         <mapTimezones otherVersion="..." typeVersion="...">
+             <!-- (UTC-08:00) Pacific Time (US & Canada) -->
+             <mapZone other="Pacific Standard Time" territory="001" type="America/Los_Angeles"/>
+             <mapZone other="Pacific Standard Time" territory="CA" type="America/Vancouver America/Dawson America/Whitehorse"/>
+             <mapZone other="Pacific Standard Time" territory="US" type="America/Los_Angeles America/Metlakatla"/>
+             <mapZone other="Pacific Standard Time" territory="ZZ" type="PST8PDT"/>
+         </mapTimezones>
+     </windowsZones>
+ </supplementalData>
+"""
+        zones = self.supplement('windowsZones.xml')
+        enum = self.__enumMap('territory')
+        badZones, unLands, defaults, windows = set(), set(), {}, {}
+
+        for name, attrs in zones.find('windowsZones/mapTimezones'):
+            if name != 'mapZone':
+                continue
+
+            wid, code = attrs['other'], attrs['territory']
+            data = dict(windowsId = wid,
+                        territoryCode = code,
+                        ianaList = attrs['type'])
+
+            try:
+                key = lookup[wid]
+            except KeyError:
+                badZones.add(wid)
+                key = 0
+            data['windowsKey'] = key
+
+            if code == '001':
+                defaults[key] = data['ianaList']
+            else:
+                try:
+                    cid, name = enum[code]
+                except KeyError:
+                    unLands.append(code)
+                    continue
+                data.update(territoryId = cid, territory = name)
+                windows[key, cid] = data
+
+        if unLands:
+            raise Error('Unknown territory codes, please add to enumdata.py: '
+                        + ', '.join(sorted(unLands)))
+
+        if badZones:
+            raise Error('Unknown Windows IDs, please add to cldr2qtimezone.py: '
+                        + ', '.join(sorted(badZones)))
+
+        return self.cldrVersion, defaults, windows
+
+    @property
+    def cldrVersion(self):
+        # Evaluate so as to ensure __cldrVersion is set:
+        self.__unDistinguishedAttributes
+        return self.__cldrVersion
+
+    # Implementation details
+    def __xml(self, relative_path: str, cache = CacheDict(), read = minidom.parse):
+        try:
+            doc = cache[relative_path]
+        except KeyError:
+            cache[relative_path] = doc = read(str(self.root.joinpath(relative_path))).documentElement
+        return doc
+
+    def __open(self, relative_path: str) -> TextIO:
+        return self.root.joinpath(relative_path).open()
+
+    @property
+    def __rootLocale(self, cache = []):
+        if not cache:
+            cache.append(self.xml('common/main/root.xml'))
+        return cache[0]
+
+    @property
+    def __supplementalData(self, cache = []):
+        if not cache:
+            cache.append(self.supplement('supplementalData.xml'))
+        return cache[0]
+
+    @property
+    def __numberSystems(self, cache = {}):
+        if not cache:
+            for ignore, attrs in self.supplement('numberingSystems.xml').find('numberingSystems'):
+                cache[attrs['id']] = attrs
+            assert cache
+        return cache
+
+    @property
+    def __weekData(self, cache = {}):
+        if not cache:
+            firstDay, weStart, weEnd = self.__getWeekData()
+            # Massage those into an easily-consulted form:
+            # World defaults given for code '001':
+            mon, sat, sun = firstDay['001'], weStart['001'], weEnd['001']
+            lands = set(firstDay) | set(weStart) | set(weEnd)
+            cache.update((land,
+                          (firstDay.get(land, mon), weStart.get(land, sat), weEnd.get(land, sun)))
+                         for land in lands)
+            assert cache
+        return cache
+
+    def __getWeekData(self):
+        """Scan for data on the weekly cycle.
+
+        Yields three mappings from locales to en's short names for
+        week-days; if a locale isn't a key of a given mapping, it
+        should use the '001' (world) locale's value. The first mapping
+        gives the day on which the week starts, the second gives the
+        day on which the week-end starts, the third gives the last day
+        of the week-end."""
+        source = self.__supplementalData
+        for key in ('firstDay', 'weekendStart', 'weekendEnd'):
+            result = {}
+            for ignore, attrs in source.find(f'weekData/{key}'):
+                assert ignore == key
+                day = attrs['day']
+                assert day in ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'), day
+                if 'alt' in attrs:
+                    continue
+                for loc in attrs.get('territories', '').split():
+                    result[loc] = day
+            yield result
+
+    @property
+    def __currencyData(self, cache = {}):
+        if not cache:
+            source = self.__supplementalData
+            for elt in source.findNodes('currencyData/region'):
+                iso, digits, rounding = '', 2, 1
+                try:
+                    territory = elt.dom.attributes['iso3166'].nodeValue
+                except KeyError:
+                    continue
+                for child in elt.findAllChildren('currency'):
+                    try:
+                        if child.dom.attributes['tender'].nodeValue == 'false':
+                            continue
+                    except KeyError:
+                        pass
+                    try:
+                        child.dom.attributes['to'] # Is set if this element has gone out of date.
+                    except KeyError:
+                        iso = child.dom.attributes['iso4217'].nodeValue
+                        break
+                if iso:
+                    for tag, data in source.find(
+                        f'currencyData/fractions/info[iso4217={iso}]'):
+                        digits = data['digits']
+                        rounding = data['rounding']
+                cache[territory] = iso, digits, rounding
+            assert cache
+
+        return cache
+
+    @property
+    def __unDistinguishedAttributes(self, cache = {}):
+        """Mapping from tag names to lists of attributes.
+
+        LDML defines some attributes as 'distinguishing': if a node
+        has distinguishing attributes that weren't specified in an
+        XPath, a search on that XPath should exclude the node's
+        children.
+
+        This property is a mapping from tag names to tuples of
+        attribute names that *aren't* distinguishing for that tag.
+        Its value is cached (so its costly computation isonly done
+        once) and there's a side-effect of populating its cache: it
+        sets self.__cldrVersion to the value found in ldml.dtd, during
+        parsing."""
+        if not cache:
+            cache.update(self.__scanLdmlDtd())
+            assert cache
+
+        return cache
+
+    def __scanLdmlDtd(self):
+        """Scan the LDML DTD, record CLDR version
+
+        Yields (tag, attrs) pairs: on elements with a given tag,
+        attributes named in its attrs (a tuple) may be ignored in an
+        XPath search; other attributes are distinguished attributes,
+        in the terminology of LDML's locale-inheritance rules.
+
+        Sets self.__cldrVersion as a side-effect, since this
+        information is found in the same file."""
+        with self.__open('common/dtd/ldml.dtd') as dtd:
+            tag, ignored, last = None, None, None
+
+            for line in dtd:
+                if line.startswith('<!ELEMENT '):
+                    if ignored:
+                        assert tag
+                        yield tag, tuple(ignored)
+                    tag, ignored, last = line.split()[1], [], None
+                    continue
+
+                if line.startswith('<!ATTLIST '):
+                    assert tag is not None
+                    parts = line.split()
+                    assert parts[1] == tag
+                    last = parts[2]
+                    if parts[1:5] == ['version', 'cldrVersion', 'CDATA', '#FIXED']:
+                        # parts[5] is the version, in quotes, although the final > might be stuck on its end:
+                        self.__cldrVersion = parts[5].split('"')[1]
+                    continue
+
+                # <!ELEMENT...>s can also be @METADATA, but not @VALUE:
+                if '<!--@VALUE-->' in line or (last and '<!--@METADATA-->' in line):
+                    assert last is not None
+                    assert ignored is not None
+                    assert tag is not None
+                    ignored.append(last)
+                    last = None # No attribute is both value and metadata
+
+            if tag and ignored:
+                yield tag, tuple(ignored)
+
+    def __enumMap(self, key, cache = {}):
+        if not cache:
+            cache['variant'] = {'': (0, 'This should never be seen outside ldml.py')}
+            # They're not actually lists: mappings from numeric value
+            # to pairs of full name and short code. What we want, in
+            # each case, is a mapping from code to the other two.
+            from enumdata import language_map, script_map, territory_map
+            for form, book, empty in (('language', language_map, 'AnyLanguage'),
+                                      ('script', script_map, 'AnyScript'),
+                                      ('territory', territory_map, 'AnyTerritory')):
+                cache[form] = dict((pair[1], (num, pair[0]))
+                                   for num, pair in book.items() if pair[0] != 'C')
+                # (Have to filter out the C locale, as we give it the
+                # same (all space) code as AnyLanguage, whose code
+                # should probably be 'und' instead.)
+
+                # Map empty to zero and the any value:
+                cache[form][''] = (0, empty)
+            # and map language code 'und' also to (0, any):
+            cache['language']['und'] = (0, 'AnyLanguage')
+
+        return cache[key]
+
+    def __codeMap(self, key, cache = {},
+                  # Maps our name for it to CLDR's name:
+                  naming = {'language': 'languages', 'script': 'scripts',
+                            'territory': 'territories', 'variant': 'variants'}):
+        if not cache:
+            root = self.xml('common/main/en.xml').root.findUniqueChild('localeDisplayNames')
+            for dst, src in naming.items():
+                cache[dst] = dict(self.__codeMapScan(root.findUniqueChild(src)))
+            assert cache
+
+        return cache[key]
+
+    def __codeMapScan(self, node):
+        """Get mapping from codes to element values.
+
+        Passed in node is a <languages>, <scripts>, <territories> or
+        <variants> node, each child of which is a <language>,
+        <script>, <territory> or <variant> node as appropriate, whose
+        type is a code (of the appropriate flavour) and content is its
+        full name.  In some cases, two child nodes have the same type;
+        in these cases, one always has an alt attribute and we should
+        prefer the other.  Yields all such type, content pairs found
+        in node's children (skipping any with an alt attribute, if
+        their type has been seen previously)."""
+        seen = set()
+        for elt in node.dom.childNodes:
+            try:
+                key, value = elt.attributes['type'].nodeValue, elt.childNodes[0].wholeText
+            except (KeyError, ValueError, TypeError):
+                pass
+            else:
+                if key not in seen or 'alt' not in elt.attributes:
+                    yield key, value
+                    seen.add(key)
+
+    # CLDR uses inheritance between locales to save repetition:
+    @property
+    def __parentLocale(self, cache = {}):
+        # see http://www.unicode.org/reports/tr35/#Parent_Locales
+        if not cache:
+            for tag, attrs in self.__supplementalData.find('parentLocales'):
+                parent = attrs.get('parent', '')
+                for child in attrs['locales'].split():
+                    cache[child] = parent
+            assert cache
+
+        return cache
+
+    def __localeAsDoc(self, name: str, aliasFor = None):
+        path = f'common/main/{name}.xml'
+        if self.root.joinpath(path).exists():
+            elt = self.__xml(path)
+            for child in Node(elt).findAllChildren('alias'):
+                try:
+                    alias = child.dom.attributes['source'].nodeValue
+                except (KeyError, AttributeError):
+                    pass
+                else:
+                    return self.__localeAsDoc(alias, aliasFor or name)
+            # No alias child with a source:
+            return elt
+
+        if aliasFor:
+            raise Error(f'Fatal error: found an alias "{aliasFor}" -> "{name}", '
+                        'but found no file for the alias')
+
+    def __scanLocaleRoots(self, name):
+        while name and name != 'root':
+            doc = self.__localeAsDoc(name)
+            if doc is not None:
+                yield Node(doc, self.__unDistinguishedAttributes)
+
+            try:
+                name = self.__parentLocale[name]
+            except KeyError:
+                try:
+                    name, tail = name.rsplit('_', 1)
+                except ValueError: # No tail to discard: we're done
+                    break
+
+    class __Seq (list): pass # No weakref for tuple and list, but list sub-class is ok.
+    def __localeRoots(self, name, cache = CacheDict()):
+        try:
+            chain = cache[name]
+        except KeyError:
+            cache[name] = chain = self.__Seq(self.__scanLocaleRoots(name))
+        return chain
+
+# Unpolute the namespace: we don't need to export these.
+del minidom, CacheDict
--- a/util/locale_database/cldr2qlocalexml.py
+++ b/util/locale_database/cldr2qlocalexml.py
@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+"""Convert CLDR data to QLocaleXML
+
+The CLDR data can be downloaded from CLDR_, which has a sub-directory
+for each version; you need the ``core.zip`` file for your version of
+choice (typically the latest). This script has had updates to cope up
+to v38.1; for later versions, we may need adaptations. Unpack the
+downloaded ``core.zip`` and check it has a common/main/ sub-directory:
+pass the path of that root of the download to this script as its first
+command-line argument. Pass the name of the file in which to write
+output as the second argument; either omit it or use '-' to select the
+standard output. This file is the input needed by
+``./qlocalexml2cpp.py``
+
+When you update the CLDR data, be sure to also update
+src/corelib/text/qt_attribution.json's entry for unicode-cldr. Check
+this script's output for unknown language, territory or script messages;
+if any can be resolved, use their entry in common/main/en.xml to
+append new entries to enumdata.py's lists and update documentation in
+src/corelib/text/qlocale.qdoc, adding the new entries in alphabetic
+order.
+
+While updating the locale data, check also for updates to MS-Win's
+time zone names; see cldr2qtimezone.py for details.
+
+All the scripts mentioned support --help to tell you how to use them.
+
+.. _CLDR: https://unicode.org/Public/cldr/
+"""
+
+from pathlib import Path
+import sys
+import argparse
+
+from cldr import CldrReader
+from qlocalexml import QLocaleXmlWriter
+
+
+def main(out, err):
+    all_calendars = ['gregorian', 'persian', 'islamic']  # 'hebrew'
+
+    parser = argparse.ArgumentParser(
+        description='Generate QLocaleXML from CLDR data.',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('cldr_path', help='path to the root of the CLDR tree')
+    parser.add_argument('out_file', help='output XML file name',
+                        nargs='?', metavar='out-file.xml')
+    parser.add_argument('--calendars', help='select calendars to emit data for',
+                        nargs='+', metavar='CALENDAR',
+                        choices=all_calendars, default=all_calendars)
+
+    args = parser.parse_args()
+
+    root = Path(args.cldr_path)
+    root_xml_path = 'common/main/root.xml'
+
+    if not root.joinpath(root_xml_path).exists():
+        parser.error('First argument is the root of the CLDR tree: '
+                     f'found no {root_xml_path} under {root}')
+
+    xml = args.out_file
+    if not xml or xml == '-':
+        emit = out
+    elif not xml.endswith('.xml'):
+        parser.error(f'Please use a .xml extension on your output file name, not {xml}')
+    else:
+        try:
+            emit = open(xml, 'w')
+        except IOError as e:
+            parser.error(f'Failed to open "{xml}" to write output to it')
+
+    # TODO - command line options to tune choice of grumble and whitter:
+    reader = CldrReader(root, err.write, err.write)
+    writer = QLocaleXmlWriter(emit.write)
+
+    writer.version(reader.root.cldrVersion)
+    writer.enumData()
+    writer.likelySubTags(reader.likelySubTags())
+    writer.locales(reader.readLocales(args.calendars), args.calendars)
+
+    writer.close(err.write)
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main(sys.stdout, sys.stderr))
--- a/util/locale_database/cldr2qtimezone.py
+++ b/util/locale_database/cldr2qtimezone.py
@ -0,0 +1,361 @@
+#!/usr/bin/env python3
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+"""Parse CLDR data for QTimeZone use with MS-Windows
+
+Script to parse the CLDR common/supplemental/windowsZones.xml file and
+prepare its data for use in QTimeZone.  See ``./cldr2qlocalexml.py`` for
+where to get the CLDR data.  Pass its root directory as first parameter
+to this script.  You can optionally pass the qtbase root directory as
+second parameter; it defaults to the root of the checkout containing
+this script.  This script updates qtbase's
+src/corelib/time/qtimezoneprivate_data_p.h with the new data.
+"""
+
+import datetime
+from pathlib import Path
+import textwrap
+import argparse
+
+from localetools import unicode2hex, wrap_list, Error, SourceFileEditor, qtbase_root
+from cldr import CldrAccess
+
+### Data that may need updates in response to new entries in the CLDR file ###
+
+# This script shall report the updates you need to make, if any arise.
+# However, you may need to research the relevant zone's standard offset.
+
+# List of currently known Windows IDs.
+# If this script reports missing IDs, please add them here.
+# Look up the offset using (google and) timeanddate.com.
+# Not public so may safely be changed.  Please keep in alphabetic order by ID.
+# ( Windows Id, Offset Seconds )
+windowsIdList = (
+    ('Afghanistan Standard Time',        16200),
+    ('Alaskan Standard Time',           -32400),
+    ('Aleutian Standard Time',          -36000),
+    ('Altai Standard Time',              25200),
+    ('Arab Standard Time',               10800),
+    ('Arabian Standard Time',            14400),
+    ('Arabic Standard Time',             10800),
+    ('Argentina Standard Time',         -10800),
+    ('Astrakhan Standard Time',          14400),
+    ('Atlantic Standard Time',          -14400),
+    ('AUS Central Standard Time',        34200),
+    ('Aus Central W. Standard Time',     31500),
+    ('AUS Eastern Standard Time',        36000),
+    ('Azerbaijan Standard Time',         14400),
+    ('Azores Standard Time',             -3600),
+    ('Bahia Standard Time',             -10800),
+    ('Bangladesh Standard Time',         21600),
+    ('Belarus Standard Time',            10800),
+    ('Bougainville Standard Time',       39600),
+    ('Canada Central Standard Time',    -21600),
+    ('Cape Verde Standard Time',         -3600),
+    ('Caucasus Standard Time',           14400),
+    ('Cen. Australia Standard Time',     34200),
+    ('Central America Standard Time',   -21600),
+    ('Central Asia Standard Time',       21600),
+    ('Central Brazilian Standard Time', -14400),
+    ('Central Europe Standard Time',      3600),
+    ('Central European Standard Time',    3600),
+    ('Central Pacific Standard Time',    39600),
+    ('Central Standard Time (Mexico)',  -21600),
+    ('Central Standard Time',           -21600),
+    ('China Standard Time',              28800),
+    ('Chatham Islands Standard Time',    45900),
+    ('Cuba Standard Time',              -18000),
+    ('Dateline Standard Time',          -43200),
+    ('E. Africa Standard Time',          10800),
+    ('E. Australia Standard Time',       36000),
+    ('E. Europe Standard Time',           7200),
+    ('E. South America Standard Time',  -10800),
+    ('Easter Island Standard Time',     -21600),
+    ('Eastern Standard Time',           -18000),
+    ('Eastern Standard Time (Mexico)',  -18000),
+    ('Egypt Standard Time',               7200),
+    ('Ekaterinburg Standard Time',       18000),
+    ('Fiji Standard Time',               43200),
+    ('FLE Standard Time',                 7200),
+    ('Georgian Standard Time',           14400),
+    ('GMT Standard Time',                    0),
+    ('Greenland Standard Time',         -10800),
+    ('Greenwich Standard Time',              0),
+    ('GTB Standard Time',                 7200),
+    ('Haiti Standard Time',             -18000),
+    ('Hawaiian Standard Time',          -36000),
+    ('India Standard Time',              19800),
+    ('Iran Standard Time',               12600),
+    ('Israel Standard Time',              7200),
+    ('Jordan Standard Time',              7200),
+    ('Kaliningrad Standard Time',         7200),
+    ('Korea Standard Time',              32400),
+    ('Libya Standard Time',               7200),
+    ('Line Islands Standard Time',       50400),
+    ('Lord Howe Standard Time',          37800),
+    ('Magadan Standard Time',            36000),
+    ('Magallanes Standard Time',        -10800), # permanent DST
+    ('Marquesas Standard Time',         -34200),
+    ('Mauritius Standard Time',          14400),
+    ('Middle East Standard Time',         7200),
+    ('Montevideo Standard Time',        -10800),
+    ('Morocco Standard Time',                0),
+    ('Mountain Standard Time (Mexico)', -25200),
+    ('Mountain Standard Time',          -25200),
+    ('Myanmar Standard Time',            23400),
+    ('N. Central Asia Standard Time',    21600),
+    ('Namibia Standard Time',             3600),
+    ('Nepal Standard Time',              20700),
+    ('New Zealand Standard Time',        43200),
+    ('Newfoundland Standard Time',      -12600),
+    ('Norfolk Standard Time',            39600),
+    ('North Asia East Standard Time',    28800),
+    ('North Asia Standard Time',         25200),
+    ('North Korea Standard Time',        30600),
+    ('Omsk Standard Time',               21600),
+    ('Pacific SA Standard Time',        -10800),
+    ('Pacific Standard Time',           -28800),
+    ('Pacific Standard Time (Mexico)',  -28800),
+    ('Pakistan Standard Time',           18000),
+    ('Paraguay Standard Time',          -14400),
+    ('Qyzylorda Standard Time',          18000), # a.k.a. Kyzylorda, in Kazakhstan
+    ('Romance Standard Time',             3600),
+    ('Russia Time Zone 3',               14400),
+    ('Russia Time Zone 10',              39600),
+    ('Russia Time Zone 11',              43200),
+    ('Russian Standard Time',            10800),
+    ('SA Eastern Standard Time',        -10800),
+    ('SA Pacific Standard Time',        -18000),
+    ('SA Western Standard Time',        -14400),
+    ('Saint Pierre Standard Time',      -10800), # New France
+    ('Sakhalin Standard Time',           39600),
+    ('Samoa Standard Time',              46800),
+    ('Sao Tome Standard Time',               0),
+    ('Saratov Standard Time',            14400),
+    ('SE Asia Standard Time',            25200),
+    ('Singapore Standard Time',          28800),
+    ('South Africa Standard Time',        7200),
+    ('South Sudan Standard Time',         7200),
+    ('Sri Lanka Standard Time',          19800),
+    ('Sudan Standard Time',               7200), # unless they mean South Sudan, +03:00
+    ('Syria Standard Time',               7200),
+    ('Taipei Standard Time',             28800),
+    ('Tasmania Standard Time',           36000),
+    ('Tocantins Standard Time',         -10800),
+    ('Tokyo Standard Time',              32400),
+    ('Tomsk Standard Time',              25200),
+    ('Tonga Standard Time',              46800),
+    ('Transbaikal Standard Time',        32400), # Yakutsk
+    ('Turkey Standard Time',              7200),
+    ('Turks And Caicos Standard Time',  -14400),
+    ('Ulaanbaatar Standard Time',        28800),
+    ('US Eastern Standard Time',        -18000),
+    ('US Mountain Standard Time',       -25200),
+    ('UTC-11',                          -39600),
+    ('UTC-09',                          -32400),
+    ('UTC-08',                          -28800),
+    ('UTC-02',                           -7200),
+    ('UTC',                                  0),
+    ('UTC+12',                           43200),
+    ('UTC+13',                           46800),
+    ('Venezuela Standard Time',         -16200),
+    ('Vladivostok Standard Time',        36000),
+    ('Volgograd Standard Time',          14400),
+    ('W. Australia Standard Time',       28800),
+    ('W. Central Africa Standard Time',   3600),
+    ('W. Europe Standard Time',           3600),
+    ('W. Mongolia Standard Time',        25200), # Hovd
+    ('West Asia Standard Time',          18000),
+    ('West Bank Standard Time',           7200),
+    ('West Pacific Standard Time',       36000),
+    ('Yakutsk Standard Time',            32400),
+    ('Yukon Standard Time',             -25200), # Non-DST Mountain Standard Time since 2020-11-01
+)
+
+# List of standard UTC IDs to use.  Not public so may be safely changed.
+# Do not remove IDs, as each entry is part of the API/behavior guarantee.
+# ( UTC Id, Offset Seconds )
+utcIdList = (
+    ('UTC',            0),  # Goes first so is default
+    ('UTC-14:00', -50400),
+    ('UTC-13:00', -46800),
+    ('UTC-12:00', -43200),
+    ('UTC-11:00', -39600),
+    ('UTC-10:00', -36000),
+    ('UTC-09:00', -32400),
+    ('UTC-08:00', -28800),
+    ('UTC-07:00', -25200),
+    ('UTC-06:00', -21600),
+    ('UTC-05:00', -18000),
+    ('UTC-04:30', -16200),
+    ('UTC-04:00', -14400),
+    ('UTC-03:30', -12600),
+    ('UTC-03:00', -10800),
+    ('UTC-02:00',  -7200),
+    ('UTC-01:00',  -3600),
+    ('UTC-00:00',      0),
+    ('UTC+00:00',      0),
+    ('UTC+01:00',   3600),
+    ('UTC+02:00',   7200),
+    ('UTC+03:00',  10800),
+    ('UTC+03:30',  12600),
+    ('UTC+04:00',  14400),
+    ('UTC+04:30',  16200),
+    ('UTC+05:00',  18000),
+    ('UTC+05:30',  19800),
+    ('UTC+05:45',  20700),
+    ('UTC+06:00',  21600),
+    ('UTC+06:30',  23400),
+    ('UTC+07:00',  25200),
+    ('UTC+08:00',  28800),
+    ('UTC+08:30',  30600),
+    ('UTC+09:00',  32400),
+    ('UTC+09:30',  34200),
+    ('UTC+10:00',  36000),
+    ('UTC+11:00',  39600),
+    ('UTC+12:00',  43200),
+    ('UTC+13:00',  46800),
+    ('UTC+14:00',  50400),
+)
+
+### End of data that may need updates in response to CLDR ###
+
+class ByteArrayData:
+    def __init__(self):
+        self.data = []
+        self.hash = {}
+
+    def append(self, s):
+        s = s + '\0'
+        if s in self.hash:
+            return self.hash[s]
+
+        lst = unicode2hex(s)
+        index = len(self.data)
+        if index > 0xffff:
+            raise Error(f'Index ({index}) outside the uint16 range !')
+        self.hash[s] = index
+        self.data += lst
+        return index
+
+    def write(self, out, name):
+        out(f'\nstatic constexpr char {name}[] = {{\n')
+        out(wrap_list(self.data))
+        out('\n};\n')
+
+class ZoneIdWriter (SourceFileEditor):
+    def write(self, version, defaults, windowsIds):
+        self.__writeWarning(version)
+        windows, iana = self.__writeTables(self.writer.write, defaults, windowsIds)
+        windows.write(self.writer.write, 'windowsIdData')
+        iana.write(self.writer.write, 'ianaIdData')
+
+    def __writeWarning(self, version):
+        self.writer.write(f"""
+/*
+    This part of the file was generated on {datetime.date.today()} from the
+    Common Locale Data Repository v{version} file supplemental/windowsZones.xml
+
+    http://www.unicode.org/cldr/
+
+    Do not edit this code: run cldr2qtimezone.py on updated (or
+    edited) CLDR data; see qtbase/util/locale_database/.
+*/
+
+""")
+
+    @staticmethod
+    def __writeTables(out, defaults, windowsIds):
+        windowsIdData, ianaIdData = ByteArrayData(), ByteArrayData()
+
+        # Write Windows/IANA table
+        out('// Windows ID Key, Territory Enum, IANA ID Index\n')
+        out('static constexpr QZoneData zoneDataTable[] = {\n')
+        for index, data in sorted(windowsIds.items()):
+            out('    {{ {:6d},{:6d},{:6d} }}, // {} / {}\n'.format(
+                    data['windowsKey'], data['territoryId'],
+                    ianaIdData.append(data['ianaList']),
+                    data['windowsId'], data['territory']))
+        out('};\n\n')
+
+        # Write Windows ID key table
+        out('// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n')
+        out('static constexpr QWindowsData windowsDataTable[] = {\n')
+        for index, pair in enumerate(windowsIdList, 1):
+            out('    {{ {:6d},{:6d},{:6d},{:6d} }}, // {}\n'.format(
+                    index,
+                    windowsIdData.append(pair[0]),
+                    ianaIdData.append(defaults[index]),
+                    pair[1], pair[0]))
+        out('};\n\n')
+
+        # Write UTC ID key table
+        out('// IANA ID Index, UTC Offset\n')
+        out('static constexpr QUtcData utcDataTable[] = {\n')
+        for pair in utcIdList:
+            out('    {{ {:6d},{:6d} }}, // {}\n'.format(
+                    ianaIdData.append(pair[0]), pair[1], pair[0]))
+        out('};\n')
+
+        return windowsIdData, ianaIdData
+
+
+def main(out, err):
+    """Parses CLDR's data and updates Qt's representation of it.
+
+    Takes sys.stdout, sys.stderr (or equivalents) as
+    arguments. Expects two command-line options: the root of the
+    unpacked CLDR data-file tree and the root of the qtbase module's
+    checkout. Updates QTimeZone's private data about Windows time-zone
+    IDs."""
+    parser = argparse.ArgumentParser(
+        description="Update Qt's CLDR-derived timezone data.")
+    parser.add_argument('cldr_path', help='path to the root of the CLDR tree')
+    parser.add_argument('qtbase_path',
+                        help='path to the root of the qtbase source tree',
+                        nargs='?', default=qtbase_root)
+
+    args = parser.parse_args()
+
+    cldrPath = Path(args.cldr_path)
+    qtPath = Path(args.qtbase_path)
+
+    if not qtPath.is_dir():
+        parser.error(f"No such Qt directory: {qtPath}")
+
+    if not cldrPath.is_dir():
+        parser.error(f"No such CLDR directory: {cldrPath}")
+
+    dataFilePath = qtPath.joinpath('src/corelib/time/qtimezoneprivate_data_p.h')
+
+    if not dataFilePath.is_file():
+        parser.error(f'No such file: {dataFilePath}')
+
+    try:
+        version, defaults, winIds = CldrAccess(cldrPath).readWindowsTimeZones(
+            dict((name, ind) for ind, name in enumerate((x[0] for x in windowsIdList), 1)))
+    except IOError as e:
+        parser.error(
+            f'Failed to open common/supplemental/windowsZones.xml: {e}')
+        return 1
+    except Error as e:
+        err.write('\n'.join(textwrap.wrap(
+                    f'Failed to read windowsZones.xml: {e}',
+                    subsequent_indent=' ', width=80)) + '\n')
+        return 1
+
+    out.write('Input file parsed, now writing data\n')
+
+    try:
+        with ZoneIdWriter(dataFilePath, qtPath) as writer:
+            writer.write(version, defaults, winIds)
+    except Exception as e:
+        err.write(f'\nError while updating timezone data: {e}\n')
+        return 1
+
+    out.write(f'Data generation completed, please check the new file at {dataFilePath}\n')
+    return 0
+
+if __name__ == '__main__':
+    import sys
+    sys.exit(main(sys.stdout, sys.stderr))
--- a/util/locale_database/dateconverter.py
+++ b/util/locale_database/dateconverter.py
@ -0,0 +1,81 @@
+# Copyright (C) 2016 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+
+import re
+
+def _convert_pattern(pattern):
+    # patterns from http://www.unicode.org/reports/tr35/#Date_Format_Patterns
+    qt_regexps = {
+        r"yyy{3,}" : "yyyy", # more that three digits hence convert to four-digit year
+        r"L" : "M",          # stand-alone month names. not supported.
+        r"g{1,}": "",        # modified julian day. not supported.
+        r"S{1,}" : "",       # fractional seconds. not supported.
+        r"A{1,}" : ""        # milliseconds in day. not supported.
+    }
+    qt_patterns = {
+        "G" : "", "GG" : "", "GGG" : "", "GGGG" : "", "GGGGG" : "", # Era. not supported.
+        "y" : "yyyy", # four-digit year without leading zeroes
+        "Q" : "", "QQ" : "", "QQQ" : "", "QQQQ" : "", # quarter. not supported.
+        "q" : "", "qq" : "", "qqq" : "", "qqqq" : "", # quarter. not supported.
+        "MMMMM" : "MMM", # narrow month name.
+        "LLLLL" : "MMM", # stand-alone narrow month name.
+        "l" : "", # special symbol for chinese leap month. not supported.
+        "w" : "", "W" : "", # week of year/month. not supported.
+        "D" : "", "DD" : "", "DDD" : "", # day of year. not supported.
+        "F" : "", # day of week in month. not supported.
+        "E" : "ddd", "EE" : "ddd", "EEE" : "ddd", "EEEEE" : "ddd", "EEEE" : "dddd", # day of week
+        "e" : "ddd", "ee" : "ddd", "eee" : "ddd", "eeeee" : "ddd", "eeee" : "dddd", # local day of week
+        "c" : "ddd", "cc" : "ddd", "ccc" : "ddd", "ccccc" : "ddd", "cccc" : "dddd", # stand-alone local day of week
+        "a" : "AP", # AM/PM
+        "K" : "h", # Hour 0-11
+        "k" : "H", # Hour 1-24
+        "j" : "", # special reserved symbol.
+        "z" : "t", "zz" : "t", "zzz" : "t", "zzzz" : "t", # timezone
+        "Z" : "t", "ZZ" : "t", "ZZZ" : "t", "ZZZZ" : "t", # timezone
+        "v" : "t", "vv" : "t", "vvv" : "t", "vvvv" : "t", # timezone
+        "V" : "t", "VV" : "t", "VVV" : "t", "VVVV" : "t"  # timezone
+    }
+    if pattern in qt_patterns:
+        return qt_patterns[pattern]
+    for r,v in qt_regexps.items():
+        pattern = re.sub(r, v, pattern)
+    return pattern
+
+def convert_date(input):
+    result = ""
+    patterns = "GyYuQqMLlwWdDFgEecahHKkjmsSAzZvV"
+    last = ""
+    inquote = 0
+    chars_to_strip = " -"
+    for c in input:
+        if c == "'":
+            inquote = inquote + 1
+        if inquote % 2 == 0:
+            if c in patterns:
+                if not last:
+                    last = c
+                else:
+                    if c in last:
+                        last += c
+                    else:
+                        # pattern changed
+                        converted = _convert_pattern(last)
+                        result += converted
+                        if not converted:
+                            result = result.rstrip(chars_to_strip)
+                        last = c
+                continue
+        if last:
+            # pattern ended
+            converted = _convert_pattern(last)
+            result += converted
+            if not converted:
+                result = result.rstrip(chars_to_strip)
+            last = ""
+        result += c
+    if last:
+        converted = _convert_pattern(last)
+        result += converted
+        if not converted:
+            result = result.rstrip(chars_to_strip)
+    return result.lstrip(chars_to_strip)
--- a/util/locale_database/enumdata.py
+++ b/util/locale_database/enumdata.py
@ -0,0 +1,850 @@
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+
+# A run of cldr2qlocalexml.py will produce output reporting any
+# language, script and territory codes it sees, in data, for which it
+# can find a name (taken always from en.xml) that could potentially be
+# used. There is no point adding a mapping for such a code unless the
+# CLDR's common/main/ contains an XML file for at least one locale
+# that exerciss it.
+
+# Each *_list reflects the current values of its enums in qlocale.h;
+# if new xml language files are available in CLDR, these languages and
+# territories need to be *appended* to this list (for compatibility
+# between versions).  Include any spaces present in names (scripts
+# shall squish them out for the enum entries) in *_list, but use the
+# squished forms of names in the *_aliases mappings.
+
+# For a new major version (and only then), we can change the
+# numbering, so re-sort each list into alphabetic order (e.g. using
+# sort -k2); but keep the Any and C entries first. That's why those
+# are offset with a blank line, below. After doing that, regenerate
+# locale data as usual; this will cause a binary-incompatible change.
+
+# Note on "macrolanguage" comments: see "ISO 639 macrolanguage" on
+# Wikipedia. A "macrolanguage" is (loosely-speaking) a group of
+# languages so closely related to one another that they could also be
+# regarded as divergent dialects of the macrolanguage.
+
+language_map = {
+      0: ("AnyLanguage",                 "  "),
+      1: ("C",                           "  "),
+
+      2: ("Abkhazian",                   "ab"),
+      3: ("Afar",                        "aa"),
+      4: ("Afrikaans",                   "af"),
+      5: ("Aghem",                       "agq"),
+      6: ("Akan",                        "ak"), # macrolanguage
+      7: ("Akkadian",                    "akk"),
+      8: ("Akoose",                      "bss"),
+      9: ("Albanian",                    "sq"), # macrolanguage
+     10: ("American Sign Language",      "ase"),
+     11: ("Amharic",                     "am"),
+     12: ("Ancient Egyptian",            "egy"),
+     13: ("Ancient Greek",               "grc"),
+     14: ("Arabic",                      "ar"), # macrolanguage
+     15: ("Aragonese",                   "an"),
+     16: ("Aramaic",                     "arc"),
+     17: ("Armenian",                    "hy"),
+     18: ("Assamese",                    "as"),
+     19: ("Asturian",                    "ast"),
+     20: ("Asu",                         "asa"),
+     21: ("Atsam",                       "cch"),
+     22: ("Avaric",                      "av"),
+     23: ("Avestan",                     "ae"),
+     24: ("Aymara",                      "ay"), # macrolanguage
+     25: ("Azerbaijani",                 "az"), # macrolanguage
+     26: ("Bafia",                       "ksf"),
+     27: ("Balinese",                    "ban"),
+     28: ("Bambara",                     "bm"),
+     29: ("Bamun",                       "bax"),
+     30: ("Bangla",                      "bn"),
+     31: ("Basaa",                       "bas"),
+     32: ("Bashkir",                     "ba"),
+     33: ("Basque",                      "eu"),
+     34: ("Batak Toba",                  "bbc"),
+     35: ("Belarusian",                  "be"),
+     36: ("Bemba",                       "bem"),
+     37: ("Bena",                        "bez"),
+     38: ("Bhojpuri",                    "bho"),
+     39: ("Bislama",                     "bi"),
+     40: ("Blin",                        "byn"),
+     41: ("Bodo",                        "brx"),
+     42: ("Bosnian",                     "bs"),
+     43: ("Breton",                      "br"),
+     44: ("Buginese",                    "bug"),
+     45: ("Bulgarian",                   "bg"),
+     46: ("Burmese",                     "my"),
+     47: ("Cantonese",                   "yue"),
+     48: ("Catalan",                     "ca"),
+     49: ("Cebuano",                     "ceb"),
+     50: ("Central Atlas Tamazight",     "tzm"),
+     51: ("Central Kurdish",             "ckb"),
+     52: ("Chakma",                      "ccp"),
+     53: ("Chamorro",                    "ch"),
+     54: ("Chechen",                     "ce"),
+     55: ("Cherokee",                    "chr"),
+     56: ("Chickasaw",                   "cic"),
+     57: ("Chiga",                       "cgg"),
+     58: ("Chinese",                     "zh"), # macrolanguage
+     59: ("Church",                      "cu"), # macrolanguage
+     60: ("Chuvash",                     "cv"),
+     61: ("Colognian",                   "ksh"),
+     62: ("Coptic",                      "cop"),
+     63: ("Cornish",                     "kw"),
+     64: ("Corsican",                    "co"),
+     65: ("Cree",                        "cr"), # macrolanguage
+     66: ("Croatian",                    "hr"),
+     67: ("Czech",                       "cs"),
+     68: ("Danish",                      "da"),
+     69: ("Divehi",                      "dv"),
+     70: ("Dogri",                       "doi"), # macrolanguage
+     71: ("Duala",                       "dua"),
+     72: ("Dutch",                       "nl"),
+     73: ("Dzongkha",                    "dz"),
+     74: ("Embu",                        "ebu"),
+     75: ("English",                     "en"),
+     76: ("Erzya",                       "myv"),
+     77: ("Esperanto",                   "eo"),
+     78: ("Estonian",                    "et"), # macrolanguage
+     79: ("Ewe",                         "ee" ),
+     80: ("Ewondo",                      "ewo"),
+     81: ("Faroese",                     "fo"),
+     82: ("Fijian",                      "fj"),
+     83: ("Filipino",                    "fil"),
+     84: ("Finnish",                     "fi"),
+     85: ("French",                      "fr"),
+     86: ("Friulian",                    "fur"),
+     87: ("Fulah",                       "ff"), # macrolanguage
+     88: ("Gaelic",                      "gd"),
+     89: ("Ga",                          "gaa"),
+     90: ("Galician",                    "gl"),
+     91: ("Ganda",                       "lg"),
+     92: ("Geez",                        "gez"),
+     93: ("Georgian",                    "ka"),
+     94: ("German",                      "de"),
+     95: ("Gothic",                      "got"),
+     96: ("Greek",                       "el"),
+     97: ("Guarani",                     "gn"), # macrolanguage
+     98: ("Gujarati",                    "gu"),
+     99: ("Gusii",                       "guz"),
+    100: ("Haitian",                     "ht"),
+    101: ("Hausa",                       "ha"),
+    102: ("Hawaiian",                    "haw"),
+    103: ("Hebrew",                      "he"),
+    104: ("Herero",                      "hz"),
+    105: ("Hindi",                       "hi"),
+    106: ("Hiri Motu",                   "ho"),
+    107: ("Hungarian",                   "hu"),
+    108: ("Icelandic",                   "is"),
+    109: ("Ido",                         "io"),
+    110: ("Igbo",                        "ig" ),
+    111: ("Inari Sami",                  "smn"),
+    112: ("Indonesian",                  "id"),
+    113: ("Ingush",                      "inh"),
+    114: ("Interlingua",                 "ia"),
+    115: ("Interlingue",                 "ie"),
+    116: ("Inuktitut",                   "iu"), # macrolanguage
+    117: ("Inupiaq",                     "ik"), # macrolanguage
+    118: ("Irish",                       "ga"),
+    119: ("Italian",                     "it"),
+    120: ("Japanese",                    "ja"),
+    121: ("Javanese",                    "jv"),
+    122: ("Jju",                         "kaj"),
+    123: ("Jola Fonyi",                  "dyo"),
+    124: ("Kabuverdianu",                "kea"),
+    125: ("Kabyle",                      "kab"),
+    126: ("Kako",                        "kkj"),
+    127: ("Kalaallisut",                 "kl"),
+    128: ("Kalenjin",                    "kln"),
+    129: ("Kamba",                       "kam"),
+    130: ("Kannada",                     "kn"),
+    131: ("Kanuri",                      "kr"), # macrolanguage
+    132: ("Kashmiri",                    "ks"),
+    133: ("Kazakh",                      "kk"),
+    134: ("Kenyang",                     "ken"),
+    135: ("Khmer",                       "km"),
+    136: ("Kiche",                       "quc"),
+    137: ("Kikuyu",                      "ki"),
+    138: ("Kinyarwanda",                 "rw"),
+    139: ("Komi",                        "kv"), # macrolanguage
+    140: ("Kongo",                       "kg"), # macrolanguage
+    141: ("Konkani",                     "kok"),
+    142: ("Korean",                      "ko"),
+    143: ("Koro",                        "kfo"),
+    144: ("Koyraboro Senni",             "ses"),
+    145: ("Koyra Chiini",                "khq"),
+    146: ("Kpelle",                      "kpe"),
+    147: ("Kuanyama",                    "kj"),
+    148: ("Kurdish",                     "ku"), # macrolanguage
+    149: ("Kwasio",                      "nmg"),
+    150: ("Kyrgyz",                      "ky"),
+    151: ("Lakota",                      "lkt"),
+    152: ("Langi",                       "lag"),
+    153: ("Lao",                         "lo"),
+    154: ("Latin",                       "la"),
+    155: ("Latvian",                     "lv"), # macrolanguage
+    156: ("Lezghian",                    "lez"),
+    157: ("Limburgish",                  "li"),
+    158: ("Lingala",                     "ln"),
+    159: ("Literary Chinese",            "lzh"),
+    160: ("Lithuanian",                  "lt"),
+    161: ("Lojban",                      "jbo"),
+    162: ("Lower Sorbian",               "dsb"),
+    163: ("Low German",                  "nds"),
+    164: ("Luba Katanga",                "lu"),
+    165: ("Lule Sami",                   "smj"),
+    166: ("Luo",                         "luo"),
+    167: ("Luxembourgish",               "lb"),
+    168: ("Luyia",                       "luy"),
+    169: ("Macedonian",                  "mk"),
+    170: ("Machame",                     "jmc"),
+    171: ("Maithili",                    "mai"),
+    172: ("Makhuwa Meetto",              "mgh"),
+    173: ("Makonde",                     "kde"),
+    174: ("Malagasy",                    "mg"), # macrolanguage
+    175: ("Malayalam",                   "ml"),
+    176: ("Malay",                       "ms"), # macrolanguage
+    177: ("Maltese",                     "mt"),
+    178: ("Mandingo",                    "man"), # macrolanguage
+    179: ("Manipuri",                    "mni"),
+    180: ("Manx",                        "gv"),
+    181: ("Maori",                       "mi"),
+    182: ("Mapuche",                     "arn"),
+    183: ("Marathi",                     "mr"),
+    184: ("Marshallese",                 "mh"),
+    185: ("Masai",                       "mas"),
+    186: ("Mazanderani",                 "mzn"),
+    187: ("Mende",                       "men"),
+    188: ("Meru",                        "mer"),
+    189: ("Meta",                        "mgo"),
+    190: ("Mohawk",                      "moh"),
+    191: ("Mongolian",                   "mn"), # macrolanguage
+    192: ("Morisyen",                    "mfe"),
+    193: ("Mundang",                     "mua"),
+    194: ("Muscogee",                    "mus"),
+    195: ("Nama",                        "naq"),
+    196: ("Nauru",                       "na"),
+    197: ("Navajo",                      "nv"),
+    198: ("Ndonga",                      "ng"),
+    199: ("Nepali",                      "ne"), # macrolanguage
+    200: ("Newari",                      "new"),
+    201: ("Ngiemboon",                   "nnh"),
+    202: ("Ngomba",                      "jgo"),
+    203: ("Nigerian Pidgin",             "pcm"),
+    204: ("Nko",                         "nqo"),
+    205: ("Northern Luri",               "lrc"),
+    206: ("Northern Sami",               "se" ),
+    207: ("Northern Sotho",              "nso"),
+    208: ("North Ndebele",               "nd"),
+    209: ("Norwegian Bokmal",            "nb"),
+    210: ("Norwegian Nynorsk",           "nn"),
+    211: ("Nuer",                        "nus"),
+    212: ("Nyanja",                      "ny" ),
+    213: ("Nyankole",                    "nyn"),
+    214: ("Occitan",                     "oc"),
+    215: ("Odia",                        "or"), # macrolanguage
+    216: ("Ojibwa",                      "oj"), # macrolanguage
+    217: ("Old Irish",                   "sga"),
+    218: ("Old Norse",                   "non"),
+    219: ("Old Persian",                 "peo"),
+    220: ("Oromo",                       "om"), # macrolanguage
+    221: ("Osage",                       "osa"),
+    222: ("Ossetic",                     "os"),
+    223: ("Pahlavi",                     "pal"),
+    224: ("Palauan",                     "pau"),
+    225: ("Pali",                        "pi"), # macrolanguage
+    226: ("Papiamento",                  "pap"),
+    227: ("Pashto",                      "ps"), # macrolanguage
+    228: ("Persian",                     "fa"), # macrolanguage
+    229: ("Phoenician",                  "phn"),
+    230: ("Polish",                      "pl"),
+    231: ("Portuguese",                  "pt"),
+    232: ("Prussian",                    "prg"),
+    233: ("Punjabi",                     "pa"),
+    234: ("Quechua",                     "qu"), # macrolanguage
+    235: ("Romanian",                    "ro"),
+    236: ("Romansh",                     "rm"),
+    237: ("Rombo",                       "rof"),
+    238: ("Rundi",                       "rn"),
+    239: ("Russian",                     "ru"),
+    240: ("Rwa",                         "rwk"),
+    241: ("Saho",                        "ssy"),
+    242: ("Sakha",                       "sah"),
+    243: ("Samburu",                     "saq"),
+    244: ("Samoan",                      "sm"),
+    245: ("Sango",                       "sg"),
+    246: ("Sangu",                       "sbp"),
+    247: ("Sanskrit",                    "sa"),
+    248: ("Santali",                     "sat"),
+    249: ("Sardinian",                   "sc"), # macrolanguage
+    250: ("Saurashtra",                  "saz"),
+    251: ("Sena",                        "seh"),
+    252: ("Serbian",                     "sr"),
+    253: ("Shambala",                    "ksb"),
+    254: ("Shona",                       "sn"),
+    255: ("Sichuan Yi",                  "ii" ),
+    256: ("Sicilian",                    "scn"),
+    257: ("Sidamo",                      "sid"),
+    258: ("Silesian",                    "szl"),
+    259: ("Sindhi",                      "sd"),
+    260: ("Sinhala",                     "si"),
+    261: ("Skolt Sami",                  "sms"),
+    262: ("Slovak",                      "sk"),
+    263: ("Slovenian",                   "sl"),
+    264: ("Soga",                        "xog"),
+    265: ("Somali",                      "so"),
+    266: ("Southern Kurdish",            "sdh"),
+    267: ("Southern Sami",               "sma"),
+    268: ("Southern Sotho",              "st"),
+    269: ("South Ndebele",               "nr" ),
+    270: ("Spanish",                     "es"),
+    271: ("Standard Moroccan Tamazight", "zgh"),
+    272: ("Sundanese",                   "su"),
+    273: ("Swahili",                     "sw"), # macrolanguage
+    274: ("Swati",                       "ss"),
+    275: ("Swedish",                     "sv"),
+    276: ("Swiss German",                "gsw"),
+    277: ("Syriac",                      "syr"),
+    278: ("Tachelhit",                   "shi"),
+    279: ("Tahitian",                    "ty"),
+    280: ("Tai Dam",                     "blt"),
+    281: ("Taita",                       "dav"),
+    282: ("Tajik",                       "tg"),
+    283: ("Tamil",                       "ta"),
+    284: ("Taroko",                      "trv"),
+    285: ("Tasawaq",                     "twq"),
+    286: ("Tatar",                       "tt"),
+    287: ("Telugu",                      "te"),
+    288: ("Teso",                        "teo"),
+    289: ("Thai",                        "th"),
+    290: ("Tibetan",                     "bo"),
+    291: ("Tigre",                       "tig"),
+    292: ("Tigrinya",                    "ti"),
+    293: ("Tokelau",                     "tkl"),
+    294: ("Tok Pisin",                   "tpi"),
+    295: ("Tongan",                      "to"),
+    296: ("Tsonga",                      "ts"),
+    297: ("Tswana",                      "tn"),
+    298: ("Turkish",                     "tr"),
+    299: ("Turkmen",                     "tk"),
+    300: ("Tuvalu",                      "tvl"),
+    301: ("Tyap",                        "kcg"),
+    302: ("Ugaritic",                    "uga"),
+    303: ("Ukrainian",                   "uk"),
+    304: ("Upper Sorbian",               "hsb"),
+    305: ("Urdu",                        "ur"),
+    306: ("Uyghur",                      "ug"),
+    307: ("Uzbek",                       "uz"), # macrolanguage
+    308: ("Vai",                         "vai"),
+    309: ("Venda",                       "ve" ),
+    310: ("Vietnamese",                  "vi"),
+    311: ("Volapuk",                     "vo"),
+    312: ("Vunjo",                       "vun"),
+    313: ("Walloon",                     "wa"),
+    314: ("Walser",                      "wae"),
+    315: ("Warlpiri",                    "wbp"),
+    316: ("Welsh",                       "cy"),
+    317: ("Western Balochi",             "bgn"),
+    318: ("Western Frisian",             "fy"),
+    319: ("Wolaytta",                    "wal"),
+    320: ("Wolof",                       "wo"),
+    321: ("Xhosa",                       "xh"),
+    322: ("Yangben",                     "yav"),
+    323: ("Yiddish",                     "yi"), # macrolanguage
+    324: ("Yoruba",                      "yo"),
+    325: ("Zarma",                       "dje"),
+    326: ("Zhuang",                      "za"), # macrolanguage
+    327: ("Zulu",                        "zu"),
+    # added in CLDR v40
+    328: ("Kaingang",                    "kgp"),
+    329: ("Nheengatu",                   "yrl"),
+    # added in CLDR v42
+    330: ("Haryanvi",                    "bgc"),
+    331: ("Moksha",                      "mdf"),
+    332: ("Northern Frisian",            "frr"),
+    333: ("Obolo",                       "ann"),
+    334: ("Pijin",                       "pis"),
+    335: ("Rajasthani",                  "raj"),
+    336: ("Toki Pona",                   "tok"),
+}
+
+language_aliases = {
+    # Renamings prior to Qt 6.0 (CLDR v37):
+    'Afan': 'Oromo',
+    'Byelorussian': 'Belarusian',
+    'Bhutani': 'Dzongkha',
+    'Cambodian': 'Khmer',
+    'Kurundi': 'Rundi',
+    'RhaetoRomance': 'Romansh',
+    'Chewa': 'Nyanja',
+    'Frisian': 'WesternFrisian',
+    'Uigur': 'Uyghur',
+    # Renamings:
+    'Uighur': 'Uyghur',
+    'Kwanyama': 'Kuanyama',
+    'Inupiak': 'Inupiaq',
+    'Bengali': 'Bangla',
+    'CentralMoroccoTamazight': 'CentralAtlasTamazight',
+    'Greenlandic': 'Kalaallisut',
+    'Walamo': 'Wolaytta',
+    'Navaho': 'Navajo',
+    'Oriya': 'Odia',
+    'Kirghiz': 'Kyrgyz'
+    }
+
+territory_map = {
+      0: ("AnyTerritory",                                 "ZZ"),
+
+      1: ("Afghanistan",                                  "AF"),
+      2: ("Aland Islands",                                "AX"),
+      3: ("Albania",                                      "AL"),
+      4: ("Algeria",                                      "DZ"),
+      5: ("American Samoa",                               "AS"),
+      6: ("Andorra",                                      "AD"),
+      7: ("Angola",                                       "AO"),
+      8: ("Anguilla",                                     "AI"),
+      9: ("Antarctica",                                   "AQ"),
+     10: ("Antigua And Barbuda",                          "AG"),
+     11: ("Argentina",                                    "AR"),
+     12: ("Armenia",                                      "AM"),
+     13: ("Aruba",                                        "AW"),
+     14: ("Ascension Island",                             "AC"),
+     15: ("Australia",                                    "AU"),
+     16: ("Austria",                                      "AT"),
+     17: ("Azerbaijan",                                   "AZ"),
+     18: ("Bahamas",                                      "BS"),
+     19: ("Bahrain",                                      "BH"),
+     20: ("Bangladesh",                                   "BD"),
+     21: ("Barbados",                                     "BB"),
+     22: ("Belarus",                                      "BY"),
+     23: ("Belgium",                                      "BE"),
+     24: ("Belize",                                       "BZ"),
+     25: ("Benin",                                        "BJ"),
+     26: ("Bermuda",                                      "BM"),
+     27: ("Bhutan",                                       "BT"),
+     28: ("Bolivia",                                      "BO"),
+     29: ("Bosnia And Herzegovina",                       "BA"),
+     30: ("Botswana",                                     "BW"),
+     31: ("Bouvet Island",                                "BV"),
+     32: ("Brazil",                                       "BR"),
+     33: ("British Indian Ocean Territory",               "IO"),
+     34: ("British Virgin Islands",                       "VG"),
+     35: ("Brunei",                                       "BN"),
+     36: ("Bulgaria",                                     "BG"),
+     37: ("Burkina Faso",                                 "BF"),
+     38: ("Burundi",                                      "BI"),
+     39: ("Cambodia",                                     "KH"),
+     40: ("Cameroon",                                     "CM"),
+     41: ("Canada",                                       "CA"),
+     42: ("Canary Islands",                               "IC"),
+     43: ("Cape Verde",                                   "CV"),
+     44: ("Caribbean Netherlands",                        "BQ"),
+     45: ("Cayman Islands",                               "KY"),
+     46: ("Central African Republic",                     "CF"),
+     47: ("Ceuta And Melilla",                            "EA"),
+     48: ("Chad",                                         "TD"),
+     49: ("Chile",                                        "CL"),
+     50: ("China",                                        "CN"),
+     51: ("Christmas Island",                             "CX"),
+     52: ("Clipperton Island",                            "CP"),
+     53: ("Cocos Islands",                                "CC"),
+     54: ("Colombia",                                     "CO"),
+     55: ("Comoros",                                      "KM"),
+     56: ("Congo Brazzaville",                            "CG"),
+     57: ("Congo Kinshasa",                               "CD"),
+     58: ("Cook Islands",                                 "CK"),
+     59: ("Costa Rica",                                   "CR"),
+     60: ("Croatia",                                      "HR"),
+     61: ("Cuba",                                         "CU"),
+     62: ("Curacao",                                      "CW"),
+     63: ("Cyprus",                                       "CY"),
+     64: ("Czechia",                                      "CZ"),
+     65: ("Denmark",                                      "DK"),
+     66: ("Diego Garcia",                                 "DG"),
+     67: ("Djibouti",                                     "DJ"),
+     68: ("Dominica",                                     "DM"),
+     69: ("Dominican Republic",                           "DO"),
+     70: ("Ecuador",                                      "EC"),
+     71: ("Egypt",                                        "EG"),
+     72: ("El Salvador",                                  "SV"),
+     73: ("Equatorial Guinea",                            "GQ"),
+     74: ("Eritrea",                                      "ER"),
+     75: ("Estonia",                                      "EE"),
+     76: ("Eswatini",                                     "SZ"),
+     77: ("Ethiopia",                                     "ET"),
+     78: ("Europe",                                       "150"),
+     79: ("European Union",                               "EU"),
+     80: ("Falkland Islands",                             "FK"),
+     81: ("Faroe Islands",                                "FO"),
+     82: ("Fiji",                                         "FJ"),
+     83: ("Finland",                                      "FI"),
+     84: ("France",                                       "FR"),
+     85: ("French Guiana",                                "GF"),
+     86: ("French Polynesia",                             "PF"),
+     87: ("French Southern Territories",                  "TF"),
+     88: ("Gabon",                                        "GA"),
+     89: ("Gambia",                                       "GM"),
+     90: ("Georgia",                                      "GE"),
+     91: ("Germany",                                      "DE"),
+     92: ("Ghana",                                        "GH"),
+     93: ("Gibraltar",                                    "GI"),
+     94: ("Greece",                                       "GR"),
+     95: ("Greenland",                                    "GL"),
+     96: ("Grenada",                                      "GD"),
+     97: ("Guadeloupe",                                   "GP"),
+     98: ("Guam",                                         "GU"),
+     99: ("Guatemala",                                    "GT"),
+    100: ("Guernsey",                                     "GG"),
+    101: ("Guinea Bissau",                                "GW"),
+    102: ("Guinea",                                       "GN"),
+    103: ("Guyana",                                       "GY"),
+    104: ("Haiti",                                        "HT"),
+    105: ("Heard And McDonald Islands",                   "HM"),
+    106: ("Honduras",                                     "HN"),
+    107: ("Hong Kong",                                    "HK"),
+    108: ("Hungary",                                      "HU"),
+    109: ("Iceland",                                      "IS"),
+    110: ("India",                                        "IN"),
+    111: ("Indonesia",                                    "ID"),
+    112: ("Iran",                                         "IR"),
+    113: ("Iraq",                                         "IQ"),
+    114: ("Ireland",                                      "IE"),
+    115: ("Isle Of Man",                                  "IM"),
+    116: ("Israel",                                       "IL"),
+    117: ("Italy",                                        "IT"),
+      # Officially Côte d’Ivoire, which we'd ned to map to CotedIvoire
+      # or CoteDIvoire, either failing to make the d' separate from
+      # Cote or messing with its case. So stick with Ivory Coast:
+    118: ("Ivory Coast",                                  "CI"),
+    119: ("Jamaica",                                      "JM"),
+    120: ("Japan",                                        "JP"),
+    121: ("Jersey",                                       "JE"),
+    122: ("Jordan",                                       "JO"),
+    123: ("Kazakhstan",                                   "KZ"),
+    124: ("Kenya",                                        "KE"),
+    125: ("Kiribati",                                     "KI"),
+    126: ("Kosovo",                                       "XK"),
+    127: ("Kuwait",                                       "KW"),
+    128: ("Kyrgyzstan",                                   "KG"),
+    129: ("Laos",                                         "LA"),
+    130: ("Latin America",                                "419"),
+    131: ("Latvia",                                       "LV"),
+    132: ("Lebanon",                                      "LB"),
+    133: ("Lesotho",                                      "LS"),
+    134: ("Liberia",                                      "LR"),
+    135: ("Libya",                                        "LY"),
+    136: ("Liechtenstein",                                "LI"),
+    137: ("Lithuania",                                    "LT"),
+    138: ("Luxembourg",                                   "LU"),
+    139: ("Macao",                                        "MO"),
+    140: ("Macedonia",                                    "MK"),
+    141: ("Madagascar",                                   "MG"),
+    142: ("Malawi",                                       "MW"),
+    143: ("Malaysia",                                     "MY"),
+    144: ("Maldives",                                     "MV"),
+    145: ("Mali",                                         "ML"),
+    146: ("Malta",                                        "MT"),
+    147: ("Marshall Islands",                             "MH"),
+    148: ("Martinique",                                   "MQ"),
+    149: ("Mauritania",                                   "MR"),
+    150: ("Mauritius",                                    "MU"),
+    151: ("Mayotte",                                      "YT"),
+    152: ("Mexico",                                       "MX"),
+    153: ("Micronesia",                                   "FM"),
+    154: ("Moldova",                                      "MD"),
+    155: ("Monaco",                                       "MC"),
+    156: ("Mongolia",                                     "MN"),
+    157: ("Montenegro",                                   "ME"),
+    158: ("Montserrat",                                   "MS"),
+    159: ("Morocco",                                      "MA"),
+    160: ("Mozambique",                                   "MZ"),
+    161: ("Myanmar",                                      "MM"),
+    162: ("Namibia",                                      "NA"),
+    163: ("Nauru",                                        "NR"),
+    164: ("Nepal",                                        "NP"),
+    165: ("Netherlands",                                  "NL"),
+    166: ("New Caledonia",                                "NC"),
+    167: ("New Zealand",                                  "NZ"),
+    168: ("Nicaragua",                                    "NI"),
+    169: ("Nigeria",                                      "NG"),
+    170: ("Niger",                                        "NE"),
+    171: ("Niue",                                         "NU"),
+    172: ("Norfolk Island",                               "NF"),
+    173: ("Northern Mariana Islands",                     "MP"),
+    174: ("North Korea",                                  "KP"),
+    175: ("Norway",                                       "NO"),
+    176: ("Oman",                                         "OM"),
+    177: ("Outlying Oceania",                             "QO"),
+    178: ("Pakistan",                                     "PK"),
+    179: ("Palau",                                        "PW"),
+    180: ("Palestinian Territories",                      "PS"),
+    181: ("Panama",                                       "PA"),
+    182: ("Papua New Guinea",                             "PG"),
+    183: ("Paraguay",                                     "PY"),
+    184: ("Peru",                                         "PE"),
+    185: ("Philippines",                                  "PH"),
+    186: ("Pitcairn",                                     "PN"),
+    187: ("Poland",                                       "PL"),
+    188: ("Portugal",                                     "PT"),
+    189: ("Puerto Rico",                                  "PR"),
+    190: ("Qatar",                                        "QA"),
+    191: ("Reunion",                                      "RE"),
+    192: ("Romania",                                      "RO"),
+    193: ("Russia",                                       "RU"),
+    194: ("Rwanda",                                       "RW"),
+    195: ("Saint Barthelemy",                             "BL"),
+    196: ("Saint Helena",                                 "SH"),
+    197: ("Saint Kitts And Nevis",                        "KN"),
+    198: ("Saint Lucia",                                  "LC"),
+    199: ("Saint Martin",                                 "MF"),
+    200: ("Saint Pierre And Miquelon",                    "PM"),
+    201: ("Saint Vincent And Grenadines",                 "VC"),
+    202: ("Samoa",                                        "WS"),
+    203: ("San Marino",                                   "SM"),
+    204: ("Sao Tome And Principe",                        "ST"),
+    205: ("Saudi Arabia",                                 "SA"),
+    206: ("Senegal",                                      "SN"),
+    207: ("Serbia",                                       "RS"),
+    208: ("Seychelles",                                   "SC"),
+    209: ("Sierra Leone",                                 "SL"),
+    210: ("Singapore",                                    "SG"),
+    211: ("Sint Maarten",                                 "SX"),
+    212: ("Slovakia",                                     "SK"),
+    213: ("Slovenia",                                     "SI"),
+    214: ("Solomon Islands",                              "SB"),
+    215: ("Somalia",                                      "SO"),
+    216: ("South Africa",                                 "ZA"),
+    217: ("South Georgia And South Sandwich Islands",     "GS"),
+    218: ("South Korea",                                  "KR"),
+    219: ("South Sudan",                                  "SS"),
+    220: ("Spain",                                        "ES"),
+    221: ("Sri Lanka",                                    "LK"),
+    222: ("Sudan",                                        "SD"),
+    223: ("Suriname",                                     "SR"),
+    224: ("Svalbard And Jan Mayen",                       "SJ"),
+    225: ("Sweden",                                       "SE"),
+    226: ("Switzerland",                                  "CH"),
+    227: ("Syria",                                        "SY"),
+    228: ("Taiwan",                                       "TW"),
+    229: ("Tajikistan",                                   "TJ"),
+    230: ("Tanzania",                                     "TZ"),
+    231: ("Thailand",                                     "TH"),
+    232: ("Timor-Leste",                                  "TL"),
+    233: ("Togo",                                         "TG"),
+    234: ("Tokelau",                                      "TK"),
+    235: ("Tonga",                                        "TO"),
+    236: ("Trinidad And Tobago",                          "TT"),
+    237: ("Tristan Da Cunha",                             "TA"),
+    238: ("Tunisia",                                      "TN"),
+    239: ("Turkey",                                       "TR"),
+    240: ("Turkmenistan",                                 "TM"),
+    241: ("Turks And Caicos Islands",                     "TC"),
+    242: ("Tuvalu",                                       "TV"),
+    243: ("Uganda",                                       "UG"),
+    244: ("Ukraine",                                      "UA"),
+    245: ("United Arab Emirates",                         "AE"),
+    246: ("United Kingdom",                               "GB"),
+    247: ("United States Outlying Islands",               "UM"),
+    248: ("United States",                                "US"),
+    249: ("United States Virgin Islands",                 "VI"),
+    250: ("Uruguay",                                      "UY"),
+    251: ("Uzbekistan",                                   "UZ"),
+    252: ("Vanuatu",                                      "VU"),
+    253: ("Vatican City",                                 "VA"),
+    254: ("Venezuela",                                    "VE"),
+    255: ("Vietnam",                                      "VN"),
+    256: ("Wallis And Futuna",                            "WF"),
+    257: ("Western Sahara",                               "EH"),
+    258: ("World",                                        "001"),
+    259: ("Yemen",                                        "YE"),
+    260: ("Zambia",                                       "ZM"),
+    261: ("Zimbabwe",                                     "ZW"),
+}
+
+territory_aliases = {
+    # Renamings prior to Qt 6.0 (CLDR v37):
+    'DemocraticRepublicOfCongo': 'CongoKinshasa',
+    'PeoplesRepublicOfCongo': 'CongoBrazzaville',
+    'DemocraticRepublicOfKorea': 'NorthKorea',
+    'RepublicOfKorea': 'SouthKorea',
+    'RussianFederation': 'Russia',
+    'SyrianArabRepublic': 'Syria',
+    'LatinAmericaAndTheCaribbean': 'LatinAmerica',
+    # Renamings:
+    'EastTimor': 'TimorLeste',
+    'Bonaire': 'CaribbeanNetherlands',
+    'Macau': 'Macao',
+    'SouthGeorgiaAndTheSouthSandwichIslands': 'SouthGeorgiaAndSouthSandwichIslands',
+    'WallisAndFutunaIslands': 'WallisAndFutuna',
+    'SaintVincentAndTheGrenadines': 'SaintVincentAndGrenadines',
+    'BosniaAndHerzegowina': 'BosniaAndHerzegovina',
+    'SvalbardAndJanMayenIslands': 'SvalbardAndJanMayen',
+    'VaticanCityState': 'VaticanCity',
+    'Swaziland': 'Eswatini',
+    'UnitedStatesMinorOutlyingIslands': 'UnitedStatesOutlyingIslands',
+    'CuraSao': 'Curacao',
+    'CzechRepublic': 'Czechia',
+
+    # Backwards compatibility with old Country enum, prior to Qt 6.2:
+    'AnyCountry': 'AnyTerritory',
+    'NauruCountry': 'NauruTerritory',
+    'TokelauCountry': 'TokelauTerritory',
+    'TuvaluCountry': 'TuvaluTerritory',
+}
+
+script_map = {
+      0: ("AnyScript",              "Zzzz"),
+
+      1: ("Adlam",                  "Adlm"),
+      2: ("Ahom",                   "Ahom"),
+      3: ("Anatolian Hieroglyphs",  "Hluw"),
+      4: ("Arabic",                 "Arab"),
+      5: ("Armenian",               "Armn"),
+      6: ("Avestan",                "Avst"),
+      7: ("Balinese",               "Bali"),
+      8: ("Bamum",                  "Bamu"),
+      9: ("Bangla",                 "Beng"),
+     10: ("Bassa Vah",              "Bass"),
+     11: ("Batak",                  "Batk"),
+     12: ("Bhaiksuki",              "Bhks"),
+     13: ("Bopomofo",               "Bopo"),
+     14: ("Brahmi",                 "Brah"),
+     15: ("Braille",                "Brai"),
+     16: ("Buginese",               "Bugi"),
+     17: ("Buhid",                  "Buhd"),
+     18: ("Canadian Aboriginal",    "Cans"),
+     19: ("Carian",                 "Cari"),
+     20: ("Caucasian Albanian",     "Aghb"),
+     21: ("Chakma",                 "Cakm"),
+     22: ("Cham",                   "Cham"),
+     23: ("Cherokee",               "Cher"),
+     24: ("Coptic",                 "Copt"),
+     25: ("Cuneiform",              "Xsux"),
+     26: ("Cypriot",                "Cprt"),
+     27: ("Cyrillic",               "Cyrl"),
+     28: ("Deseret",                "Dsrt"),
+     29: ("Devanagari",             "Deva"),
+     30: ("Duployan",               "Dupl"),
+     31: ("Egyptian Hieroglyphs",   "Egyp"),
+     32: ("Elbasan",                "Elba"),
+     33: ("Ethiopic",               "Ethi"),
+     34: ("Fraser",                 "Lisu"),
+     35: ("Georgian",               "Geor"),
+     36: ("Glagolitic",             "Glag"),
+     37: ("Gothic",                 "Goth"),
+     38: ("Grantha",                "Gran"),
+     39: ("Greek",                  "Grek"),
+     40: ("Gujarati",               "Gujr"),
+     41: ("Gurmukhi",               "Guru"),
+     42: ("Hangul",                 "Hang"),
+     43: ("Han",                    "Hani"),
+     44: ("Hanunoo",                "Hano"),
+     45: ("Han with Bopomofo",      "Hanb"),
+     46: ("Hatran",                 "Hatr"),
+     47: ("Hebrew",                 "Hebr"),
+     48: ("Hiragana",               "Hira"),
+     49: ("Imperial Aramaic",       "Armi"),
+     50: ("Inscriptional Pahlavi",  "Phli"),
+     51: ("Inscriptional Parthian", "Prti"),
+     52: ("Jamo",                   "Jamo"),
+     53: ("Japanese",               "Jpan"),
+     54: ("Javanese",               "Java"),
+     55: ("Kaithi",                 "Kthi"),
+     56: ("Kannada",                "Knda"),
+     57: ("Katakana",               "Kana"),
+     58: ("Kayah Li",               "Kali"),
+     59: ("Kharoshthi",             "Khar"),
+     60: ("Khmer",                  "Khmr"),
+     61: ("Khojki",                 "Khoj"),
+     62: ("Khudawadi",              "Sind"),
+     63: ("Korean",                 "Kore"),
+     64: ("Lanna",                  "Lana"),
+     65: ("Lao",                    "Laoo"),
+     66: ("Latin",                  "Latn"),
+     67: ("Lepcha",                 "Lepc"),
+     68: ("Limbu",                  "Limb"),
+     69: ("Linear A",               "Lina"),
+     70: ("Linear B",               "Linb"),
+     71: ("Lycian",                 "Lyci"),
+     72: ("Lydian",                 "Lydi"),
+     73: ("Mahajani",               "Mahj"),
+     74: ("Malayalam",              "Mlym"),
+     75: ("Mandaean",               "Mand"),
+     76: ("Manichaean",             "Mani"),
+     77: ("Marchen",                "Marc"),
+     78: ("Meitei Mayek",           "Mtei"),
+     79: ("Mende",                  "Mend"),
+     80: ("Meroitic Cursive",       "Merc"),
+     81: ("Meroitic",               "Mero"),
+     82: ("Modi",                   "Modi"),
+     83: ("Mongolian",              "Mong"),
+     84: ("Mro",                    "Mroo"),
+     85: ("Multani",                "Mult"),
+     86: ("Myanmar",                "Mymr"),
+     87: ("Nabataean",              "Nbat"),
+     88: ("Newa",                   "Newa"),
+     89: ("New Tai Lue",            "Talu"),
+     90: ("Nko",                    "Nkoo"),
+     91: ("Odia",                   "Orya"),
+     92: ("Ogham",                  "Ogam"),
+     93: ("Ol Chiki",               "Olck"),
+     94: ("Old Hungarian",          "Hung"),
+     95: ("Old Italic",             "Ital"),
+     96: ("Old North Arabian",      "Narb"),
+     97: ("Old Permic",             "Perm"),
+     98: ("Old Persian",            "Xpeo"),
+     99: ("Old South Arabian",      "Sarb"),
+    100: ("Orkhon",                 "Orkh"),
+    101: ("Osage",                  "Osge"),
+    102: ("Osmanya",                "Osma"),
+    103: ("Pahawh Hmong",           "Hmng"),
+    104: ("Palmyrene",              "Palm"),
+    105: ("Pau Cin Hau",            "Pauc"),
+    106: ("Phags Pa",               "Phag"),
+    107: ("Phoenician",             "Phnx"),
+    108: ("Pollard Phonetic",       "Plrd"),
+    109: ("Psalter Pahlavi",        "Phlp"),
+    110: ("Rejang",                 "Rjng"),
+    111: ("Runic",                  "Runr"),
+    112: ("Samaritan",              "Samr"),
+    113: ("Saurashtra",             "Saur"),
+    114: ("Sharada",                "Shrd"),
+    115: ("Shavian",                "Shaw"),
+    116: ("Siddham",                "Sidd"),
+    117: ("Sign Writing",           "Sgnw"),
+    118: ("Simplified Han",         "Hans"),
+    119: ("Sinhala",                "Sinh"),
+    120: ("Sora Sompeng",           "Sora"),
+    121: ("Sundanese",              "Sund"),
+    122: ("Syloti Nagri",           "Sylo"),
+    123: ("Syriac",                 "Syrc"),
+    124: ("Tagalog",                "Tglg"),
+    125: ("Tagbanwa",               "Tagb"),
+    126: ("Tai Le",                 "Tale"),
+    127: ("Tai Viet",               "Tavt"),
+    128: ("Takri",                  "Takr"),
+    129: ("Tamil",                  "Taml"),
+    130: ("Tangut",                 "Tang"),
+    131: ("Telugu",                 "Telu"),
+    132: ("Thaana",                 "Thaa"),
+    133: ("Thai",                   "Thai"),
+    134: ("Tibetan",                "Tibt"),
+    135: ("Tifinagh",               "Tfng"),
+    136: ("Tirhuta",                "Tirh"),
+    137: ("Traditional Han",        "Hant"),
+    138: ("Ugaritic",               "Ugar"),
+    139: ("Vai",                    "Vaii"),
+    140: ("Varang Kshiti",          "Wara"),
+    141: ("Yi",                     "Yiii"),
+}
+
+script_aliases = {
+    # Renamings prior to Qt 6.0 (CLDR v37):
+    'SimplifiedChineseScript': 'SimplifiedHanScript',
+    'TraditionalChineseScript': 'TraditionalHanScript',
+    # Renamings:
+    'OriyaScript': 'OdiaScript',
+    'MendeKikakuiScript': 'MendeScript',
+    'BengaliScript': 'BanglaScript',
+}
--- a/util/locale_database/formattags.txt
+++ b/util/locale_database/formattags.txt
@ -0,0 +1,23 @@
+d
+dd
+ddd
+dddd
+M
+MM
+MMM
+MMMM
+yy
+yyyy
+h the hour without a leading zero (0 to 23 or 1 to 12 if AM/PM display)
+hh the hour with a leading zero (00 to 23 or 01 to 12 if AM/PM display)
+H the hour without a leading zero (0 to 23, even with AM/PM display)
+HH the hour with a leading zero (00 to 23, even with AM/PM display)
+m
+mm
+s
+ss
+z the milliseconds without leading zeroes (0 to 999)
+zzz the milliseconds with leading zeroes (000 to 999)
+AP or A interpret as an AM/PM time. AP must be either "AM" or "PM"
+ap or a Interpret as an AM/PM time. ap must be either "am" or "pm"
+t time zone
--- a/util/locale_database/iso639_3.py
+++ b/util/locale_database/iso639_3.py
@ -0,0 +1,80 @@
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+
+from dataclasses import dataclass
+from typing import Dict, Optional
+
+
+@dataclass
+class LanguageCodeEntry:
+    part3Code: str
+    part2BCode: Optional[str]
+    part2TCode: Optional[str]
+    part1Code: Optional[str]
+
+    def id(self) -> str:
+        if self.part1Code:
+            return self.part1Code
+        if self.part2BCode:
+            return self.part2BCode
+        return self.part3Code
+
+    def __repr__(self) -> str:
+        parts = [f'{self.__class__.__name__}({self.id()!r}, part3Code={self.part3Code!r}']
+        if self.part2BCode is not None and self.part2BCode != self.part3Code:
+            parts.append(f', part2BCode={self.part2BCode!r}')
+            if self.part2TCode != self.part2BCode:
+                parts.append(f', part2TCode={self.part2TCode!r}')
+        if self.part1Code is not None:
+            parts.append(f', part1Code={self.part1Code!r}')
+        parts.append(')')
+        return ''.join(parts)
+
+
+class LanguageCodeData:
+    """
+    Representation of ISO639-2 language code data.
+    """
+    def __init__(self, fileName: str):
+        """
+        Construct the object populating the data from the given file.
+        """
+        self.__codeMap: Dict[str, LanguageCodeEntry] = {}
+
+        with open(fileName, 'r', encoding='utf-8') as stream:
+            stream.readline() # skip the header
+            for line in stream.readlines():
+                part3Code, part2BCode, part2TCode, part1Code, _ = line.split('\t', 4)
+
+                # sanity checks
+                assert all(p.isascii() for p in (part3Code, part2BCode, part2TCode, part1Code)), \
+                    f'Non-ascii characters in code names: {part3Code!r} {part2BCode!r} '\
+                        f'{part2TCode!r} {part1Code!r}'
+
+                assert len(part3Code) == 3, f'Invalid Part 3 code length for {part3Code!r}'
+                assert not part1Code or len(part1Code) == 2, \
+                    f'Invalid Part 1 code length for {part3Code!r}: {part1Code!r}'
+                assert not part2BCode or len(part2BCode) == 3, \
+                    f'Invalid Part 2B code length for {part3Code!r}: {part2BCode!r}'
+                assert not part2TCode or len(part2TCode) == 3, \
+                    f'Invalid Part 2T code length for {part3Code!r}: {part2TCode!r}'
+
+                assert (part2BCode == '') == (part2TCode == ''), \
+                    f'Only one Part 2 code is specified for {part3Code!r}: ' \
+                    f'{part2BCode!r} vs {part2TCode!r}'
+                assert not part2TCode or part2TCode == part3Code, \
+                    f'Part 3 code {part3Code!r} does not match Part 2T code {part2TCode!r}'
+
+                entry = LanguageCodeEntry(part3Code, part2BCode or None,
+                    part2TCode or None, part1Code or None)
+
+                self.__codeMap[entry.id()] = entry
+
+    def query(self, code: str) -> Optional[LanguageCodeEntry]:
+        """
+        Lookup the entry with the given code and return it.
+
+        The entries can be looked up by using either the Alpha2 code or the bibliographical
+        Alpha3 code.
+        """
+        return self.__codeMap.get(code)
--- a/util/locale_database/ldml.py
+++ b/util/locale_database/ldml.py
@ -0,0 +1,599 @@
+# Copyright (C) 2020 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+"""Parsing the Locale Data Markup Language
+
+It's an XML format, so the raw parsing of XML is, of course, delegated
+to xml.dom.minidom; but it has its own specific schemata and some
+funky rules for combining data from various files (inheritance between
+locales). The use of it we're interested in is extraction of CLDR's
+data, so some of the material here is specific to CLDR; see cldr.py
+for how it is mainly used.
+
+Provides various classes to wrap xml.dom's objects, specifically those
+returned by minidom.parse() and their child-nodes:
+  Node -- wraps any node in the DOM tree
+  XmlScanner -- wraps the root element of a stand-alone XML file
+  Supplement -- specializes XmlScanner for supplemental data files
+  LocaleScanner -- wraps a locale's inheritance-chain of file roots
+
+See individual classes for further detail.
+"""
+from localetools import Error
+from dateconverter import convert_date
+
+class Node (object):
+    """Wrapper for an arbitrary DOM node.
+
+    Provides various ways to select chldren of a node. Selected child
+    nodes are returned wrapped as Node objects.  A Node exposes the
+    raw DOM node it wraps via its .dom attribute."""
+
+    def __init__(self, elt, dullAttrs = None, draft = 0):
+        """Wraps a DOM node for ease of access.
+
+        First argument, elt, is the DOM node to wrap.
+
+        Optional second argument, dullAttrs, should either be None or
+        map each LDML tag name to a list of the names of
+        non-distinguishing attributes for nodes with the given tag
+        name. If None is given, no distinguishing attribute checks are
+        performed.
+
+        (Optional third argument, draft, should only be supplied by
+        this class's creation of child nodes; it is the maximum draft
+        score of any ancestor of the new node.)"""
+        self.dom, self.__dull = elt, dullAttrs
+        try:
+            attr = elt.attributes['draft'].nodeValue
+        except KeyError:
+            self.draft = draft
+        else:
+            self.draft = max(draft, self.draftScore(attr))
+
+    def findAllChildren(self, tag, wanted = None, allDull = False):
+        """All children that do have the given tag and attributes.
+
+        First argument is the tag: children with any other tag are
+        ignored.
+
+        Optional second argument, wanted, should either be None or map
+        attribute names to the values they must have. Only child nodes
+        with thes attributes set to the given values are yielded.
+
+        By default, nodes that have distinguishing attributes, other
+        than those specified in wanted, are ignored.  Pass the allDull
+        parameter a true value to suppress this check."""
+
+        if self.__dull is None:
+            allDull = True
+        dull = () if allDull else self.__dull[tag]
+
+        for child in self.dom.childNodes:
+            if child.nodeType != child.ELEMENT_NODE:
+                continue
+            if child.nodeName != tag:
+                continue
+
+            if wanted:
+                try:
+                    if any(child.attributes[k].nodeValue != v
+                           for k, v in wanted.items()):
+                        continue
+                except KeyError: # Some wanted attribute is missing
+                    continue
+
+                if not (allDull or all(k in dull or k in wanted
+                                       for k in child.attributes.keys())):
+                    continue
+
+            elif not (allDull or all(k in dull
+                                     for k in child.attributes.keys())):
+                continue
+
+            yield Node(child, self.__dull, self.draft)
+
+    def findUniqueChild(self, tag):
+        """Returns the single child with the given nodeName.
+
+        Raises Error if there is no such child or there is more than
+        one."""
+        seq = self.findAllChildren(tag)
+        try:
+            node = next(seq)
+        except StopIteration:
+            raise Error('No child found where one was expected', tag)
+        for it in seq:
+            raise Error('Many children found where only one was expected', tag)
+        return node
+
+    @classmethod
+    def draftScore(cls, level):
+        """Maps draft level names to numeric scores.
+
+        Single parameter, level, is the least sure value of the draft
+        attribute on a node that you're willing to accept; returns a
+        numeric value (lower is less drafty).
+
+        Tempting as it is to insist on low draft scores, there are
+        many locales in which pretty much every leaf is
+        unconfirmed. It may make sense to actually check each
+        XmlScanner object, or each node in each LocaleScanner's nodes
+        list, to see what its distribution of draft level looks like,
+        so as to set the acceptable draft score for its elements
+        accordingly. However, for the moment, we mostly just accept
+        all elements, regardless of draft values (the one exception is
+        am/pm indicators)."""
+        return cls.__draftScores.get(level, 5) if level else 0
+
+    # Implementation details:
+    __draftScores = dict(true = 4, unconfirmed = 3, provisional = 2,
+                         contributed = 1, approved = 0, false = 0)
+
+def _parseXPath(selector):
+    # Split "tag[attr=val][...]" into tag-name and attribute mapping
+    attrs = selector.split('[')
+    name = attrs.pop(0)
+    if attrs:
+        attrs = [x.strip() for x in attrs]
+        assert all(x.endswith(']') for x in attrs)
+        attrs = [x[:-1].split('=') for x in attrs]
+        assert all(len(x) in (1, 2) for x in attrs)
+        attrs = (('type', x[0]) if len(x) == 1 else x for x in attrs)
+    return name, dict(attrs)
+
+def _iterateEach(iters):
+    # Flatten a two-layer iterator.
+    for it in iters:
+        for item in it:
+            yield item
+
+class XmlScanner (object):
+    """Wrap an XML file to enable XPath access to its nodes.
+    """
+    def __init__(self, node):
+        self.root = node
+
+    def findNodes(self, xpath):
+        """Return all nodes under self.root matching this xpath.
+
+        Ignores any excess attributes."""
+        elts = (self.root,)
+        for selector in xpath.split('/'):
+            tag, attrs = _parseXPath(selector)
+            elts = tuple(_iterateEach(e.findAllChildren(tag, attrs) for e in elts))
+            if not elts:
+                break
+        return elts
+
+class Supplement (XmlScanner):
+    def find(self, xpath):
+        elts = self.findNodes(xpath)
+        for elt in _iterateEach(e.dom.childNodes if e.dom.childNodes else (e.dom,)
+                                for e in elts):
+            if elt.attributes:
+                yield (elt.nodeName,
+                       dict((k, v if isinstance(v, str) else v.nodeValue)
+                            for k, v in elt.attributes.items()))
+
+class LocaleScanner (object):
+    def __init__(self, name, nodes, root):
+        self.name, self.nodes, self.base = name, nodes, root
+
+    def find(self, xpath, default = None, draft = None):
+        """XPath search for the content of an element.
+
+        Required argument, xpath, is the XPath to search for. Optional
+        second argument is a default value to use, if no such node is
+        found.  Optional third argument is a draft score (see
+        Node.draftScore() for details); if given, leaf elements with
+        higher draft scores are ignored."""
+        try:
+            for elt in self.__find(xpath):
+                try:
+                    if draft is None or elt.draft <= draft:
+                        return elt.dom.firstChild.nodeValue
+                except (AttributeError, KeyError):
+                    pass
+        except Error as e:
+            if default is None:
+                raise
+            return default
+
+    def tagCodes(self):
+        """Yields four tag codes
+
+        The tag codes are language, script, territory and variant; an
+        empty value for any of them indicates that no value was
+        provided.  The values are obtained from the primary file's
+        top-level <identity> element.  An Error is raised if any
+        top-level <alias> element of this file has a non-empty source
+        attribute; that attribute value is mentioned in the error's
+        message."""
+        root = self.nodes[0]
+        for alias in root.findAllChildren('alias', allDull=True):
+            try:
+                source = alias.dom.attributes['source'].nodeValue
+            except (KeyError, AttributeError):
+                pass
+            else:
+                raise Error(f'Alias to {source}')
+
+        ids = root.findUniqueChild('identity')
+        for code in ('language', 'script', 'territory', 'variant'):
+            for node in ids.findAllChildren(code, allDull=True):
+                try:
+                    yield node.dom.attributes['type'].nodeValue
+                except (KeyError, AttributeError):
+                    pass
+                else:
+                    break # only want one value for each code
+            else: # No value for this code, use empty
+                yield ''
+
+    def currencyData(self, isoCode):
+        """Fetches currency data for this locale.
+
+        Single argument, isoCode, is the ISO currency code for the
+        currency in use in the territory. See also numericData, which
+        includes some currency formats.
+        """
+        if isoCode:
+            stem = f'numbers/currencies/currency[{isoCode}]/'
+            symbol = self.find(f'{stem}symbol', '')
+            name = self.__currencyDisplayName(stem)
+        else:
+            symbol = name = ''
+        yield 'currencySymbol', symbol
+        yield 'currencyDisplayName', name
+
+    def numericData(self, lookup, complain = lambda text: None):
+        """Generate assorted numeric data for the locale.
+
+        First argument, lookup, is a callable that maps a numbering
+        system's name to certain data about the system, as a mapping;
+        we expect this to have 'digits' as a key.
+        """
+        system = self.find('numbers/defaultNumberingSystem')
+        stem = f'numbers/symbols[numberSystem={system}]/'
+        decimal = self.find(f'{stem}decimal')
+        group = self.find(f'{stem}group')
+        assert decimal != group, (self.name, system, decimal)
+        yield 'decimal', decimal
+        yield 'group', group
+        yield 'percent', self.find(f'{stem}percentSign')
+        yield 'list', self.find(f'{stem}list')
+        yield 'exp', self.find(f'{stem}exponential')
+        yield 'groupSizes', self.__numberGrouping(system)
+
+        digits = lookup(system)['digits']
+        assert len(digits) == 10
+        zero = digits[0]
+        # Qt's number-formatting code assumes digits are consecutive
+        # (except Suzhou, CLDR's hanidec - see QTBUG-85409):
+        assert all(ord(c) == i + (0x3020 if ord(zero) == 0x3007 else ord(zero))
+                   for i, c in enumerate(digits[1:], 1))
+        yield 'zero', zero
+
+        plus = self.find(f'{stem}plusSign')
+        minus = self.find(f'{stem}minusSign')
+        yield 'plus', plus
+        yield 'minus', minus
+
+        # Currency formatting:
+        xpath = 'numbers/currencyFormats/currencyFormatLength/currencyFormat[accounting]/pattern'
+        try:
+            money = self.find(xpath.replace('Formats/',
+                                            f'Formats[numberSystem={system}]/'))
+        except Error:
+            money = self.find(xpath)
+        money = self.__currencyFormats(money, plus, minus)
+        yield 'currencyFormat', next(money)
+        neg = ''
+        for it in money:
+            assert not neg, 'There should be at most one more pattern'
+            neg = it
+        yield 'currencyNegativeFormat', neg
+
+    def textPatternData(self):
+        for key in ('quotationStart', 'alternateQuotationEnd',
+                    'quotationEnd', 'alternateQuotationStart'):
+            yield key, self.find(f'delimiters/{key}')
+
+        for key in ('start', 'middle', 'end'):
+            yield (f'listPatternPart{key.capitalize()}',
+                   self.__fromLdmlListPattern(self.find(
+                        f'listPatterns/listPattern/listPatternPart[{key}]')))
+        yield ('listPatternPartTwo',
+               self.__fromLdmlListPattern(self.find(
+                    'listPatterns/listPattern/listPatternPart[2]')))
+
+        stem = 'dates/calendars/calendar[gregorian]/'
+        # TODO: is wide really the right width to use here ?
+        # abbreviated might be an option ... or try both ?
+        meridiem = f'{stem}dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/'
+        for key in ('am', 'pm'):
+            yield key, self.find(f'{meridiem}dayPeriod[{key}]',
+                                 draft = Node.draftScore('contributed'))
+
+        for pair in (('long', 'full'), ('short', 'short')):
+            for key in ('time', 'date'):
+                yield (f'{pair[0]}{key.capitalize()}Format',
+                       convert_date(self.find(
+                            f'{stem}{key}Formats/{key}FormatLength[{pair[1]}]/{key}Format/pattern')))
+
+    def endonyms(self, language, script, territory, variant):
+        # TODO: take variant into account ?
+        for seq in ((language, script, territory),
+                    (language, script), (language, territory), (language,)):
+            if not all(seq):
+                continue
+            try:
+                yield ('languageEndonym',
+                       self.find(f'localeDisplayNames/languages/language[{"_".join(seq)}]'))
+            except Error:
+                pass
+            else:
+                break
+        else:
+            # grumble(failed to find endonym for language)
+            yield 'languageEndonym', ''
+
+        yield ('territoryEndonym',
+               self.find(f'localeDisplayNames/territories/territory[{territory}]', ''))
+
+    def unitData(self):
+        yield ('byte_unit',
+               self.find('units/unitLength[long]/unit[digital-byte]/displayName',
+                         'bytes'))
+
+        unit = self.__findUnit('', 'B')
+        cache = [] # Populated by the SI call, to give hints to the IEC call
+        yield ('byte_si_quantified',
+               ';'.join(self.__unitCount('', unit, cache)))
+        # IEC 60027-2
+        # http://physics.nist.gov/cuu/Units/binary.html
+        yield ('byte_iec_quantified',
+               ';'.join(self.__unitCount('bi', 'iB', cache)))
+
+    def calendarNames(self, calendars):
+        namings = self.__nameForms
+        for cal in calendars:
+            stem = f'dates/calendars/calendar[{cal}]/months/'
+            for key, mode, size in namings:
+                prop = f'monthContext[{mode}]/monthWidth[{size}]/'
+                yield (f'{key}Months_{cal}',
+                       ';'.join(self.find(f'{stem}{prop}month[{i}]')
+                                for i in range(1, 13)))
+
+        # Day data (for Gregorian, at least):
+        stem = 'dates/calendars/calendar[gregorian]/days/'
+        days = ('sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat')
+        for (key, mode, size) in namings:
+            prop = f'dayContext[{mode}]/dayWidth[{size}]/day'
+            yield (f'{key}Days',
+                   ';'.join(self.find(f'{stem}{prop}[{day}]')
+                            for day in days))
+
+    # Implementation details
+    __nameForms = (
+        ('standaloneLong', 'stand-alone', 'wide'),
+        ('standaloneShort', 'stand-alone', 'abbreviated'),
+        ('standaloneNarrow', 'stand-alone', 'narrow'),
+        ('long', 'format', 'wide'),
+        ('short', 'format', 'abbreviated'),
+        ('narrow', 'format', 'narrow'),
+        ) # Used for month and day names
+
+    def __find(self, xpath):
+        retries = [ xpath.split('/') ]
+        while retries:
+            tags, elts, roots = retries.pop(), self.nodes, (self.base.root,)
+            for selector in tags:
+                tag, attrs = _parseXPath(selector)
+                elts = tuple(_iterateEach(e.findAllChildren(tag, attrs) for e in elts))
+                if not elts:
+                    break
+
+            else: # Found matching elements
+                # Possibly filter elts to prefer the least drafty ?
+                for elt in elts:
+                    yield elt
+
+            # Process roots separately: otherwise the alias-processing
+            # is excessive.
+            for i, selector in enumerate(tags):
+                tag, attrs = _parseXPath(selector)
+
+                for alias in tuple(_iterateEach(r.findAllChildren('alias', allDull=True)
+                                                for r in roots)):
+                    if alias.dom.attributes['source'].nodeValue == 'locale':
+                        replace = alias.dom.attributes['path'].nodeValue.split('/')
+                        retries.append(self.__xpathJoin(tags[:i], replace, tags[i:]))
+
+                roots = tuple(_iterateEach(r.findAllChildren(tag, attrs) for r in roots))
+                if not roots:
+                    if retries: # Let outer loop fall back on an alias path:
+                        break
+                    sought = '/'.join(tags)
+                    if sought != xpath:
+                        sought += f' (for {xpath})'
+                    raise Error(f'All lack child {selector} for {sought} in {self.name}')
+
+            else: # Found matching elements
+                for elt in roots:
+                    yield elt
+
+        sought = '/'.join(tags)
+        if sought != xpath:
+            sought += f' (for {xpath})'
+        raise Error(f'No {sought} in {self.name}')
+
+    def __currencyDisplayName(self, stem):
+        try:
+            return self.find(stem + 'displayName')
+        except Error:
+            pass
+        for x in  ('zero', 'one', 'two', 'few', 'many', 'other'):
+            try:
+                return self.find(f'{stem}displayName[count={x}]')
+            except Error:
+                pass
+        return ''
+
+    def __findUnit(self, keySuffix, quantify, fallback=''):
+        # The displayName for a quantified unit in en.xml is kByte
+        # (even for unitLength[narrow]) instead of kB (etc.), so
+        # prefer any unitPattern provided, but prune its placeholder:
+        for size in ('short', 'narrow'): # TODO: reverse order ?
+            stem = f'units/unitLength[{size}{keySuffix}]/unit[digital-{quantify}byte]/'
+            for count in ('many', 'few', 'two', 'other', 'zero', 'one'):
+                try:
+                    ans = self.find(f'{stem}unitPattern[count={count}]')
+                except Error:
+                    continue
+
+                # TODO: do count-handling, instead of discarding placeholders
+                if False: # TODO: do it this way, instead !
+                    ans = ans.replace('{0}', '').strip()
+                elif ans.startswith('{0}'):
+                    ans = ans[3:].lstrip()
+                if ans:
+                    return ans
+
+            try:
+                return self.find(f'{stem}displayName')
+            except Error:
+                pass
+
+        return fallback
+
+    def __unitCount(self, keySuffix, suffix, cache,
+                    # Stop at exa/exbi: 16 exbi = 2^{64} < zetta =
+                    # 1000^7 < zebi = 2^{70}, the next quantifiers up:
+                    siQuantifiers = ('kilo', 'mega', 'giga', 'tera', 'peta', 'exa')):
+        """Work out the unit quantifiers.
+
+        Unfortunately, the CLDR data only go up to terabytes and we
+        want all the way to exabytes; but we can recognize the SI
+        quantifiers as prefixes, strip and identify the tail as the
+        localized translation for 'B' (e.g. French has 'octet' for
+        'byte' and uses ko, Mo, Go, To from which we can extrapolate
+        Po, Eo).
+
+        Should be called first for the SI quantifiers, with suffix =
+        'B', then for the IEC ones, with suffix = 'iB'; the list cache
+        (initially empty before first call) is used to let the second
+        call know what the first learned about the localized unit.
+        """
+        if suffix == 'iB': # second call, re-using first's cache
+            if cache:
+                byte = cache.pop()
+                if all(byte == k for k in cache):
+                    suffix = f'i{byte}'
+            for q in siQuantifiers:
+                # Those don't (yet, v36) exist in CLDR, so we always get the fall-back:
+                yield self.__findUnit(keySuffix, q[:2], f'{q[0].upper()}{suffix}')
+        else: # first call
+            tail = suffix = suffix or 'B'
+            for q in siQuantifiers:
+                it = self.__findUnit(keySuffix, q)
+                # kB for kilobyte, in contrast with KiB for IEC:
+                q = q[0] if q == 'kilo' else q[0].upper()
+                if not it:
+                    it = q + tail
+                elif it.startswith(q):
+                    rest = it[1:]
+                    tail = rest if all(rest == k for k in cache) else suffix
+                    cache.append(rest)
+                yield it
+
+    def __numberGrouping(self, system):
+        """Sizes of groups of digits within a number.
+
+        Returns a triple (least, higher, top) for which:
+          * least is the number of digits after the last grouping
+            separator;
+          * higher is the number of digits between grouping
+            separators;
+          * top is the fewest digits that can appear before the first
+            grouping separator.
+
+        Thus (4, 3, 2) would want 1e7 as 1000,0000 but 1e8 as 10,000,0000.
+
+        Note: CLDR does countenance the possibility of grouping also
+        in the fractional part.  This is not presently attempted.  Nor
+        is placement of the sign character anywhere but at the start
+        of the number (some formats may place it at the end, possibly
+        elsewhere)."""
+        top = int(self.find('numbers/minimumGroupingDigits'))
+        assert top < 4, top # We store it in a 2-bit field
+        grouping = self.find(f'numbers/decimalFormats[numberSystem={system}]/'
+                             'decimalFormatLength/decimalFormat/pattern')
+        groups = grouping.split('.')[0].split(',')[-3:]
+        assert all(len(x) < 8 for x in groups[-2:]), grouping # we store them in 3-bit fields
+        if len(groups) > 2:
+            return len(groups[-1]), len(groups[-2]), top
+
+        size = len(groups[-1]) if len(groups) == 2 else 3
+        return size, size, top
+
+    @staticmethod
+    def __currencyFormats(patterns, plus, minus):
+        for p in patterns.split(';'):
+            p = p.replace('0', '#').replace(',', '').replace('.', '')
+            try:
+                cut = p.find('#') + 1
+            except ValueError:
+                pass
+            else:
+                p = p[:cut] + p[cut:].replace('#', '')
+            p = p.replace('#', "%1")
+            # According to http://www.unicode.org/reports/tr35/#Number_Format_Patterns
+            # there can be doubled or trippled currency sign, however none of the
+            # locales use that.
+            p = p.replace('\xa4', "%2")
+            # Single quote goes away, but double goes to single:
+            p = p.replace("''", '###').replace("'", '').replace('###', "'")
+            # Use number system's signs:
+            p = p.replace('+', plus).replace('-', minus)
+            yield p
+
+    @staticmethod
+    def __fromLdmlListPattern(pattern):
+        # This is a very limited parsing of the format for list pattern part only.
+        return pattern.replace('{0}', '%1').replace('{1}', '%2').replace('{2}', '%3')
+
+    @staticmethod
+    def __fromLdmlPath(seq): # tool function for __xpathJoin()
+        """Convert LDML's [@name='value'] to our [name=value] form."""
+        for it in seq:
+            # First dismember it:
+            attrs = it.split('[')
+            tag = attrs.pop(0)
+            if not attrs: # Short-cut the easy case:
+                yield it
+                continue
+
+            assert all(x.endswith(']') for x in attrs)
+            attrs = [x[:-1].split('=') for x in attrs]
+            # Then fix each attribute specification in it:
+            attrs = [(x[0][1:] if x[0].startswith('@') else x[0],
+                      x[1][1:-1] if x[1].startswith("'") and x[1].endswith("'") else x[1])
+                     for x in attrs]
+            # Finally, put it all back together:
+            attrs = ['='.join(x) + ']' for x in attrs]
+            attrs.insert(0, tag)
+            yield '['.join(attrs)
+
+    @classmethod
+    def __xpathJoin(cls, head, insert, tail):
+        """Join three lists of XPath selectors.
+
+        Each of head, insert and tail is a sequence of selectors but
+        insert may start with some uses of '..', that we want to
+        resolve away, and may use LDML's attribute format, that we
+        want to convert to our format."""
+        while insert and insert[0] == '..':
+            insert.pop(0)
+            head.pop()
+        return head + list(cls.__fromLdmlPath(insert)) + tail
--- a/util/locale_database/localetools.py
+++ b/util/locale_database/localetools.py
@ -0,0 +1,184 @@
+# Copyright (C) 2020 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+"""Utilities shared among the CLDR extraction tools.
+
+Functions:
+  unicode2hex() -- converts unicode text to UCS-2 in hex form.
+  wrap_list() -- map list to comma-separated string, 20 entries per line.
+
+Classes:
+  Error -- A shared error class.
+  Transcriber -- edit a file by writing a temporary file, then renaming.
+  SourceFileEditor -- adds standard prelude and tail handling to Transcriber.
+"""
+
+from contextlib import ExitStack, contextmanager
+from pathlib import Path
+from tempfile import NamedTemporaryFile
+
+qtbase_root = Path(__file__).parents[2]
+assert qtbase_root.name == 'qtbase'
+
+class Error (Exception):
+    def __init__(self, msg, *args):
+        super().__init__(msg, *args)
+        self.message = msg
+    def __str__(self):
+        return self.message
+
+def unicode2hex(s):
+    lst = []
+    for x in s:
+        v = ord(x)
+        if v > 0xFFFF:
+            # make a surrogate pair
+            # copied from qchar.h
+            high = (v >> 10) + 0xd7c0
+            low = (v % 0x400 + 0xdc00)
+            lst.append(hex(high))
+            lst.append(hex(low))
+        else:
+            lst.append(hex(v))
+    return lst
+
+def wrap_list(lst):
+    def split(lst, size):
+        while lst:
+            head, lst = lst[:size], lst[size:]
+            yield head
+    return ",\n".join(", ".join(x) for x in split(lst, 20))
+
+
+@contextmanager
+def AtomicRenameTemporaryFile(originalLocation: Path, *, prefix: str, dir: Path):
+    """Context manager for safe file update via a temporary file.
+
+    Accepts path to the file to be updated. Yields a temporary file to the user
+    code, open for writing.
+
+    On success closes the temporary file and moves its content to the original
+    location. On error, removes temporary file, without disturbing the original.
+    """
+    tempFile = NamedTemporaryFile('w', prefix=prefix, dir=dir, delete=False)
+    try:
+        yield tempFile
+        tempFile.close()
+        # Move the modified file to the original location
+        Path(tempFile.name).rename(originalLocation)
+    except Exception:
+        # delete the temporary file in case of error
+        tempFile.close()
+        Path(tempFile.name).unlink()
+        raise
+
+
+class Transcriber:
+    """Context manager base-class to manage source file rewrites.
+
+    Derived classes need to implement transcribing of the content, with
+    whatever modifications they may want.  Members reader and writer
+    are exposed; use writer.write() to output to the new file; use
+    reader.readline() or iterate reader to read the original.
+
+    This class is intended to be used as context manager only (inside a
+    `with` statement).
+
+    Reimplement onEnter() to write any preamble the file may have,
+    onExit() to write any tail. The body of the with statement takes
+    care of anything in between, using methods provided by derived classes.
+
+    The data is written to a temporary file first. The temporary file data
+    is then moved to the original location if there were no errors. Otherwise
+    the temporary file is removed and the original is left unchanged.
+    """
+    def __init__(self, path: Path, temp_dir: Path):
+        self.path = path
+        self.tempDir = temp_dir
+
+    def onEnter(self) -> None:
+        """
+        Called before transferring control to user code.
+
+        This function can be overridden in derived classes to perform actions
+        before transferring control to the user code.
+
+        The default implementation does nothing.
+        """
+        pass
+
+    def onExit(self) -> None:
+        """
+        Called after return from user code.
+
+        This function can be overridden in derived classes to perform actions
+        after successful return from user code.
+
+        The default implementation does nothing.
+        """
+        pass
+
+    def __enter__(self):
+        with ExitStack() as resources:
+            # Create a temp file to write the new data into
+            self.writer = resources.enter_context(
+                AtomicRenameTemporaryFile(self.path, prefix=self.path.name, dir=self.tempDir))
+            # Open the old file
+            self.reader = resources.enter_context(open(self.path))
+
+            self.onEnter()
+
+            # Prevent resources from being closed on normal return from this
+            # method and make them available inside __exit__():
+            self.__resources = resources.pop_all()
+            return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        if exc_type is None:
+            with self.__resources:
+               self.onExit()
+        else:
+            self.__resources.__exit__(exc_type, exc_value, traceback)
+
+        return False
+
+
+class SourceFileEditor (Transcriber):
+    """Transcriber with transcription of code around a gnerated block.
+
+    We have a common pattern of source files with a generated part
+    embedded in a context that's not touched by the regeneration
+    scripts. The generated part is, in each case, marked with a common
+    pair of start and end markers. We transcribe the old file to a new
+    temporary file; on success, we then remove the original and move
+    the new version to replace it.
+
+    This class takes care of transcribing the parts before and after
+    the generated content; on entering the context, an instance will
+    copy the preamble up to the start marker; on exit from the context
+    it will skip over the original's generated content and resume
+    transcribing with the end marker.
+
+    This class is only intended to be used as a context manager:
+    see Transcriber. Derived classes implement suitable methods for use in
+    the body of the with statement, using self.writer to rewrite the part
+    of the file between the start and end markers.
+    """
+    GENERATED_BLOCK_START = '// GENERATED PART STARTS HERE'
+    GENERATED_BLOCK_END = '// GENERATED PART ENDS HERE'
+
+    def onEnter(self) -> None:
+        # Copy over the first non-generated section to the new file
+        for line in self.reader:
+            self.writer.write(line)
+            if line.strip() == self.GENERATED_BLOCK_START:
+                break
+
+    def onExit(self) -> None:
+        # Skip through the old generated data in the old file
+        for line in self.reader:
+            if line.strip() == self.GENERATED_BLOCK_END:
+                self.writer.write(line)
+                break
+        # Transcribe the remainder:
+        for line in self.reader:
+            self.writer.write(line)
--- a/util/locale_database/qlocalexml.py
+++ b/util/locale_database/qlocalexml.py
@ -0,0 +1,627 @@
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+"""Shared serialization-scanning code for QLocaleXML format.
+
+Provides classes:
+  Locale -- common data-type representing one locale as a namespace
+  QLocaleXmlWriter -- helper to write a QLocaleXML file
+  QLocaleXmlReader -- helper to read a QLocaleXML file back in
+
+Support:
+  Spacer -- provides control over indentation of the output.
+
+RelaxNG schema for the used file format can be found in qlocalexml.rnc.
+QLocaleXML files can be validated using:
+
+    jing -c qlocalexml.rnc <file.xml>
+
+You can download jing from https://relaxng.org/jclark/jing.html if your
+package manager lacks the jing package.
+"""
+
+from xml.sax.saxutils import escape
+
+from localetools import Error
+
+# Tools used by Locale:
+def camel(seq):
+    yield next(seq)
+    for word in seq:
+        yield word.capitalize()
+
+def camelCase(words):
+    return ''.join(camel(iter(words)))
+
+def addEscapes(s):
+    return ''.join(c if n < 128 else f'\\x{n:02x}'
+                   for n, c in ((ord(c), c) for c in s))
+
+def startCount(c, text): # strspn
+    """First index in text where it doesn't have a character in c"""
+    assert text and text[0] in c
+    try:
+        return next((j for j, d in enumerate(text) if d not in c))
+    except StopIteration:
+        return len(text)
+
+def convertFormat(format):
+    """Convert date/time format-specier from CLDR to Qt
+
+    Match up (as best we can) the differences between:
+    * https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
+    * QDateTimeParser::parseFormat() and QLocalePrivate::dateTimeToString()
+    """
+    # Compare and contrast dateconverter.py's convert_date().
+    # Need to (check consistency and) reduce redundancy !
+    result = ""
+    i = 0
+    while i < len(format):
+        if format[i] == "'":
+            result += "'"
+            i += 1
+            while i < len(format) and format[i] != "'":
+                result += format[i]
+                i += 1
+            if i < len(format):
+                result += "'"
+                i += 1
+        else:
+            s = format[i:]
+            if s.startswith('E'): # week-day
+                n = startCount('E', s)
+                if n < 3:
+                    result += 'ddd'
+                elif n == 4:
+                    result += 'dddd'
+                else: # 5: narrow, 6 short; but should be name, not number :-(
+                    result += 'd' if n < 6 else 'dd'
+                i += n
+            elif s[0] in 'ab': # am/pm
+                # 'b' should distinguish noon/midnight, too :-(
+                result += "AP"
+                i += startCount('ab', s)
+            elif s.startswith('S'): # fractions of seconds: count('S') == number of decimals to show
+                result += 'z'
+                i += startCount('S', s)
+            elif s.startswith('V'): # long time zone specifiers (and a deprecated short ID)
+                result += 't'
+                i += startCount('V', s)
+            elif s[0] in 'zv': # zone
+                # Should use full name, e.g. "Central European Time", if 'zzzz' :-(
+                # 'v' should get generic non-location format, e.g. PT for "Pacific Time", no DST indicator
+                result += "t"
+                i += startCount('zv', s)
+            else:
+                result += format[i]
+                i += 1
+
+    return result
+
+class QLocaleXmlReader (object):
+    def __init__(self, filename):
+        self.root = self.__parse(filename)
+        # Lists of (id, name, code) triples:
+        languages = tuple(self.__loadMap('language'))
+        scripts = tuple(self.__loadMap('script'))
+        territories = tuple(self.__loadMap('territory'))
+        self.__likely = tuple(self.__likelySubtagsMap())
+        # Mappings {ID: (name, code)}
+        self.languages = dict((v[0], v[1:]) for v in languages)
+        self.scripts = dict((v[0], v[1:]) for v in scripts)
+        self.territories = dict((v[0], v[1:]) for v in territories)
+        # Private mappings {name: (ID, code)}
+        self.__langByName = dict((v[1], (v[0], v[2])) for v in languages)
+        self.__textByName = dict((v[1], (v[0], v[2])) for v in scripts)
+        self.__landByName = dict((v[1], (v[0], v[2])) for v in territories)
+        # Other properties:
+        self.dupes = set(v[1] for v in languages) & set(v[1] for v in territories)
+        self.cldrVersion = self.__firstChildText(self.root, "version")
+
+    def loadLocaleMap(self, calendars, grumble = lambda text: None):
+        kid = self.__firstChildText
+        likely = dict(self.__likely)
+        for elt in self.__eachEltInGroup(self.root, 'localeList', 'locale'):
+            locale = Locale.fromXmlData(lambda k: kid(elt, k), calendars)
+            language = self.__langByName[locale.language][0]
+            script = self.__textByName[locale.script][0]
+            territory = self.__landByName[locale.territory][0]
+
+            if language != 1: # C
+                if territory == 0:
+                    grumble(f'loadLocaleMap: No territory id for "{locale.language}"\n')
+
+                if script == 0:
+                    # Find default script for the given language and territory - see:
+                    # http://www.unicode.org/reports/tr35/#Likely_Subtags
+                    try:
+                        try:
+                            to = likely[(locale.language, 'AnyScript', locale.territory)]
+                        except KeyError:
+                            to = likely[(locale.language, 'AnyScript', 'AnyTerritory')]
+                    except KeyError:
+                        pass
+                    else:
+                        locale.script = to[1]
+                        script = self.__textByName[locale.script][0]
+
+            yield (language, script, territory), locale
+
+    def languageIndices(self, locales):
+        index = 0
+        for key, value in self.languages.items():
+            i, count = 0, locales.count(key)
+            if count > 0:
+                i = index
+                index += count
+            yield i, value[0]
+
+    def likelyMap(self):
+        def tag(t):
+            lang, script, land = t
+            yield lang[1] if lang[0] else 'und'
+            if script[0]: yield script[1]
+            if land[0]: yield land[1]
+
+        def ids(t):
+            return tuple(x[0] for x in t)
+
+        for pair in self.__likely:
+            have = self.__fromNames(pair[0])
+            give = self.__fromNames(pair[1])
+            yield ('_'.join(tag(have)), ids(have),
+                   '_'.join(tag(give)), ids(give))
+
+    def defaultMap(self):
+        """Map language and script to their default territory by ID.
+
+        Yields ((language, script), territory) wherever the likely
+        sub-tags mapping says language's default locale uses the given
+        script and territory."""
+        for have, give in self.__likely:
+            if have[1:] == ('AnyScript', 'AnyTerritory') and give[2] != 'AnyTerritory':
+                assert have[0] == give[0], (have, give)
+                yield ((self.__langByName[give[0]][0],
+                        self.__textByName[give[1]][0]),
+                       self.__landByName[give[2]][0])
+
+    # Implementation details:
+    def __loadMap(self, category):
+        kid = self.__firstChildText
+        for element in self.__eachEltInGroup(self.root, f'{category}List', category):
+            yield int(kid(element, 'id')), kid(element, 'name'), kid(element, 'code')
+
+    def __likelySubtagsMap(self):
+        def triplet(element, keys=('language', 'script', 'territory'), kid = self.__firstChildText):
+            return tuple(kid(element, key) for key in keys)
+
+        kid = self.__firstChildElt
+        for elt in self.__eachEltInGroup(self.root, 'likelySubtags', 'likelySubtag'):
+            yield triplet(kid(elt, "from")), triplet(kid(elt, "to"))
+
+    def __fromNames(self, names):
+        return self.__langByName[names[0]], self.__textByName[names[1]], self.__landByName[names[2]]
+
+    # DOM access:
+    from xml.dom import minidom
+    @staticmethod
+    def __parse(filename, read = minidom.parse):
+        return read(filename).documentElement
+
+    @staticmethod
+    def __isNodeNamed(elt, name, TYPE=minidom.Node.ELEMENT_NODE):
+        return elt.nodeType == TYPE and elt.nodeName == name
+    del minidom
+
+    @staticmethod
+    def __eltWords(elt):
+        child = elt.firstChild
+        while child:
+            if child.nodeType == elt.TEXT_NODE:
+                yield child.nodeValue
+            child = child.nextSibling
+
+    @classmethod
+    def __firstChildElt(cls, parent, name):
+        child = parent.firstChild
+        while child:
+            if cls.__isNodeNamed(child, name):
+                return child
+            child = child.nextSibling
+
+        raise Error(f'No {name} child found')
+
+    @classmethod
+    def __firstChildText(cls, elt, key):
+        return ' '.join(cls.__eltWords(cls.__firstChildElt(elt, key)))
+
+    @classmethod
+    def __eachEltInGroup(cls, parent, group, key):
+        try:
+            element = cls.__firstChildElt(parent, group).firstChild
+        except Error:
+            element = None
+
+        while element:
+            if cls.__isNodeNamed(element, key):
+                yield element
+            element = element.nextSibling
+
+
+class Spacer (object):
+    def __init__(self, indent = None, initial = ''):
+        """Prepare to manage indentation and line breaks.
+
+        Arguments are both optional.
+
+        First argument, indent, is either None (its default, for
+        'minifying'), an ingeter (number of spaces) or the unit of
+        text that is to be used for each indentation level (e.g. '\t'
+        to use tabs).  If indent is None, no indentation is added, nor
+        are line-breaks; otherwise, self(text), for non-empty text,
+        shall end with a newline and begin with indentation.
+
+        Second argument, initial, is the initial indentation; it is
+        ignored if indent is None.  Indentation increases after each
+        call to self(text) in which text starts with a tag and doesn't
+        include its end-tag; indentation decreases if text starts with
+        an end-tag.  The text is not parsed any more carefully than
+        just described.
+        """
+        if indent is None:
+            self.__call = lambda x: x
+        else:
+            self.__each = ' ' * indent if isinstance(indent, int) else indent
+            self.current = initial
+            self.__call = self.__wrap
+
+    def __wrap(self, line):
+        if not line:
+            return '\n'
+
+        indent = self.current
+        if line.startswith('</'):
+            indent = self.current = indent[:-len(self.__each)]
+        elif line.startswith('<') and not line.startswith('<!'):
+            cut = line.find('>')
+            tag = (line[1:] if cut < 0 else line[1 : cut]).strip().split()[0]
+            if f'</{tag}>' not in line:
+                self.current += self.__each
+        return indent + line + '\n'
+
+    def __call__(self, line):
+        return self.__call(line)
+
+class QLocaleXmlWriter (object):
+    def __init__(self, save = None, space = Spacer(4)):
+        """Set up to write digested CLDR data as QLocale XML.
+
+        Arguments are both optional.
+
+        First argument, save, is None (its default) or a callable that
+        will write content to where you intend to save it. If None, it
+        is replaced with a callable that prints the given content,
+        suppressing the newline (but see the following); this is
+        equivalent to passing sys.stdout.write.
+
+        Second argument, space, is an object to call on each text
+        output to prepend indentation and append newlines, or not as
+        the case may be. The default is a Spacer(4), which grows
+        indent by four spaces after each unmatched new tag and shrinks
+        back on a close-tag (its parsing is naive, but adequate to how
+        this class uses it), while adding a newline to each line.
+        """
+        self.__rawOutput = self.__printit if save is None else save
+        self.__wrap = space
+        self.__write('<localeDatabase>')
+
+    # Output of various sections, in their usual order:
+    def enumData(self):
+        from enumdata import language_map, script_map, territory_map
+        self.__enumTable('language', language_map)
+        self.__enumTable('script', script_map)
+        self.__enumTable('territory', territory_map)
+        # Prepare to detect any unused codes (see __writeLocale(), close()):
+        self.__languages = set(p[1] for p in language_map.values()
+                               if not p[1].isspace())
+        self.__scripts = set(p[1] for p in script_map.values()
+                             if p[1] != 'ZZ')
+        self.__territories = set(p[1] for p in territory_map.values()
+                                 if p[1] != 'Zzzz')
+
+    def likelySubTags(self, entries):
+        self.__openTag('likelySubtags')
+        for have, give in entries:
+            self.__openTag('likelySubtag')
+            self.__likelySubTag('from', have)
+            self.__likelySubTag('to', give)
+            self.__closeTag('likelySubtag')
+        self.__closeTag('likelySubtags')
+
+    def locales(self, locales, calendars):
+        self.__openTag('localeList')
+        self.__openTag('locale')
+        self.__writeLocale(Locale.C(calendars), calendars)
+        self.__closeTag('locale')
+        for key in sorted(locales.keys()):
+            self.__openTag('locale')
+            self.__writeLocale(locales[key], calendars)
+            self.__closeTag('locale')
+        self.__closeTag('localeList')
+
+    def version(self, cldrVersion):
+        self.inTag('version', cldrVersion)
+
+    def inTag(self, tag, text):
+        self.__write(f'<{tag}>{text}</{tag}>')
+
+    def close(self, grumble):
+        """Finish writing and grumble any issues discovered."""
+        if self.__rawOutput != self.__complain:
+            self.__write('</localeDatabase>')
+        self.__rawOutput = self.__complain
+
+        if self.__languages or self.__scripts or self.territories:
+            grumble('Some enum members are unused, corresponding to these tags:\n')
+            import textwrap
+            def kvetch(kind, seq, g = grumble, w = textwrap.wrap):
+                g('\n\t'.join(w(f' {kind}: {", ".join(sorted(seq))}', width=80)) + '\n')
+            if self.__languages:
+                kvetch('Languages', self.__languages)
+            if self.__scripts:
+                kvetch('Scripts', self.__scripts)
+            if self.__territories:
+                kvetch('Territories', self.__territories)
+            grumble('It may make sense to deprecate them.\n')
+
+    # Implementation details
+    @staticmethod
+    def __printit(text):
+        print(text, end='')
+    @staticmethod
+    def __complain(text):
+        raise Error('Attempted to write data after closing :-(')
+
+    def __enumTable(self, tag, table):
+        self.__openTag(f'{tag}List')
+        for key, value in table.items():
+            self.__openTag(tag)
+            self.inTag('name', value[0])
+            self.inTag('id', key)
+            self.inTag('code', value[1])
+            self.__closeTag(tag)
+        self.__closeTag(f'{tag}List')
+
+    def __likelySubTag(self, tag, likely):
+        self.__openTag(tag)
+        self.inTag('language', likely[0])
+        self.inTag('script', likely[1])
+        self.inTag('territory', likely[2])
+        # self.inTag('variant', likely[3])
+        self.__closeTag(tag)
+
+    def __writeLocale(self, locale, calendars):
+        locale.toXml(self.inTag, calendars)
+        self.__languages.discard(locale.language_code)
+        self.__scripts.discard(locale.script_code)
+        self.__territories.discard(locale.territory_code)
+
+    def __openTag(self, tag):
+        self.__write(f'<{tag}>')
+    def __closeTag(self, tag):
+        self.__write(f'</{tag}>')
+
+    def __write(self, line):
+        self.__rawOutput(self.__wrap(line))
+
+class Locale (object):
+    """Holder for the assorted data representing one locale.
+
+    Implemented as a namespace; its constructor and update() have the
+    same signatures as those of a dict, acting on the instance's
+    __dict__, so the results are accessed as attributes rather than
+    mapping keys."""
+    def __init__(self, data=None, **kw):
+        self.update(data, **kw)
+
+    def update(self, data=None, **kw):
+        if data: self.__dict__.update(data)
+        if kw: self.__dict__.update(kw)
+
+    def __len__(self): # Used when testing as a boolean
+        return len(self.__dict__)
+
+    @staticmethod
+    def propsMonthDay(scale, lengths=('long', 'short', 'narrow')):
+        for L in lengths:
+            yield camelCase((L, scale))
+            yield camelCase(('standalone', L, scale))
+
+    # Expected to be numbers, read with int():
+    __asint = ("currencyDigits", "currencyRounding")
+    # Convert day-name to Qt day-of-week number:
+    __asdow = ("firstDayOfWeek", "weekendStart", "weekendEnd")
+    # Convert from CLDR format-strings to QDateTimeParser ones:
+    __asfmt = ("longDateFormat", "shortDateFormat", "longTimeFormat", "shortTimeFormat")
+    # Just use the raw text:
+    __astxt = ("language", "languageEndonym", "script", "territory", "territoryEndonym",
+               "decimal", "group", "zero",
+               "list", "percent", "minus", "plus", "exp",
+               "quotationStart", "quotationEnd",
+               "alternateQuotationStart", "alternateQuotationEnd",
+               "listPatternPartStart", "listPatternPartMiddle",
+               "listPatternPartEnd", "listPatternPartTwo", "am", "pm",
+               'byte_unit', 'byte_si_quantified', 'byte_iec_quantified',
+               "currencyIsoCode", "currencySymbol", "currencyDisplayName",
+               "currencyFormat", "currencyNegativeFormat")
+
+    # Day-of-Week numbering used by Qt:
+    __qDoW = {"mon": 1, "tue": 2, "wed": 3, "thu": 4, "fri": 5, "sat": 6, "sun": 7}
+
+    @classmethod
+    def fromXmlData(cls, lookup, calendars=('gregorian',)):
+        """Constructor from the contents of XML elements.
+
+        Single parameter, lookup, is called with the names of XML
+        elements that should contain the relevant data, within a CLDR
+        locale element (within a localeList element); these names are
+        used for the attributes of the object constructed.  Attribute
+        values are obtained by suitably digesting the returned element
+        texts.\n"""
+        data = {}
+        for k in cls.__asint:
+            data[k] = int(lookup(k))
+
+        for k in cls.__asdow:
+            data[k] = cls.__qDoW[lookup(k)]
+
+        for k in cls.__asfmt:
+            data[k] = convertFormat(lookup(k))
+
+        for k in cls.__astxt + tuple(cls.propsMonthDay('days')):
+            data['listDelim' if k == 'list' else k] = lookup(k)
+
+        for k in cls.propsMonthDay('months'):
+            data[k] = dict((cal, lookup('_'.join((k, cal)))) for cal in calendars)
+
+        grouping = lookup('groupSizes').split(';')
+        data.update(groupLeast = int(grouping[0]),
+                    groupHigher = int(grouping[1]),
+                    groupTop = int(grouping[2]))
+
+        return cls(data)
+
+    def toXml(self, write, calendars=('gregorian',)):
+        """Writes its data as QLocale XML.
+
+        First argument, write, is a callable taking the name and
+        content of an XML element; it is expected to be the inTag
+        bound method of a QLocaleXmlWriter instance.
+
+        Optional second argument is a list of calendar names, in the
+        form used by CLDR; its default is ('gregorian',).
+        """
+        get = lambda k: getattr(self, k)
+        for key in ('language', 'script', 'territory'):
+            write(key, get(key))
+            write(f'{key}code', get(f'{key}_code'))
+
+        for key in ('decimal', 'group', 'zero', 'list',
+                    'percent', 'minus', 'plus', 'exp'):
+            write(key, get(key))
+
+        for key in ('languageEndonym', 'territoryEndonym',
+                    'quotationStart', 'quotationEnd',
+                    'alternateQuotationStart', 'alternateQuotationEnd',
+                    'listPatternPartStart', 'listPatternPartMiddle',
+                    'listPatternPartEnd', 'listPatternPartTwo',
+                    'byte_unit', 'byte_si_quantified', 'byte_iec_quantified',
+                    'am', 'pm', 'firstDayOfWeek',
+                    'weekendStart', 'weekendEnd',
+                    'longDateFormat', 'shortDateFormat',
+                    'longTimeFormat', 'shortTimeFormat',
+                    'currencyIsoCode', 'currencySymbol', 'currencyDisplayName',
+                    'currencyFormat', 'currencyNegativeFormat'
+                    ) + tuple(self.propsMonthDay('days')) + tuple(
+                '_'.join((k, cal))
+                for k in self.propsMonthDay('months')
+                for cal in calendars):
+            write(key, escape(get(key)))
+
+        write('groupSizes', ';'.join(str(x) for x in get('groupSizes')))
+        for key in ('currencyDigits', 'currencyRounding'):
+            write(key, get(key))
+
+    # Tools used by __monthNames:
+    def fullName(i, name): return name
+    def firstThree(i, name): return name[:3]
+    def initial(i, name): return name[:1]
+    def number(i, name): return str(i + 1)
+    def islamicShort(i, name):
+        if not name: return name
+        if name == 'Shawwal': return 'Shaw.'
+        words = name.split()
+        if words[0].startswith('Dhu'):
+            words[0] = words[0][:7] + '.'
+        elif len(words[0]) > 3:
+            words[0] = words[0][:3] + '.'
+        return ' '.join(words)
+    @staticmethod
+    def __monthNames(calendars,
+                     known={ # Map calendar to (names, extractors...):
+            # TODO: do we even need these ?  CLDR's root.xml seems to
+            # have them, complete with yeartype="leap" handling for
+            # Hebrew's extra.
+            'gregorian': (('January', 'February', 'March', 'April', 'May', 'June', 'July',
+                           'August', 'September', 'October', 'November', 'December'),
+                          # Extractor pairs, (plain, standalone)
+                          (fullName, fullName), # long
+                          (firstThree, firstThree), # short
+                          (number, initial)), # narrow
+            'persian': (('Farvardin', 'Ordibehesht', 'Khordad', 'Tir', 'Mordad',
+                         'Shahrivar', 'Mehr', 'Aban', 'Azar', 'Dey', 'Bahman', 'Esfand'),
+                        (fullName, fullName),
+                        (firstThree, firstThree),
+                        (number, initial)),
+            'islamic': (('Muharram', 'Safar', 'Rabiʻ I', 'Rabiʻ II', 'Jumada I',
+                         'Jumada II', 'Rajab', 'Shaʻban', 'Ramadan', 'Shawwal',
+                         'Dhuʻl-Qiʻdah', 'Dhuʻl-Hijjah'),
+                        (fullName, fullName),
+                        (islamicShort, islamicShort),
+                        (number, number)),
+            'hebrew': (('Tishri', 'Heshvan', 'Kislev', 'Tevet', 'Shevat', 'Adar I',
+                        'Adar', 'Nisan', 'Iyar', 'Sivan', 'Tamuz', 'Av'),
+                       (fullName, fullName),
+                       (fullName, fullName),
+                       (number, number)),
+            },
+                     sizes=('long', 'short', 'narrow')):
+        for cal in calendars:
+            try:
+                data = known[cal]
+            except KeyError as e: # Need to add an entry to known, above.
+                e.args += ('Unsupported calendar:', cal)
+                raise
+            names, get = data[0], data[1:]
+            for n, size in enumerate(sizes):
+                yield ('_'.join((camelCase((size, 'months')), cal)),
+                       ';'.join(get[n][0](i, x) for i, x in enumerate(names)))
+                yield ('_'.join((camelCase(('standalone', size, 'months')), cal)),
+                       ';'.join(get[n][1](i, x) for i, x in enumerate(names)))
+    del fullName, firstThree, initial, number, islamicShort
+
+    @classmethod
+    def C(cls, calendars=('gregorian',),
+          days = ('Sunday', 'Monday', 'Tuesday', 'Wednesday',
+                  'Thursday', 'Friday', 'Saturday'),
+          quantifiers=('k', 'M', 'G', 'T', 'P', 'E')):
+        """Returns an object representing the C locale."""
+        return cls(cls.__monthNames(calendars),
+                   language='C', language_code='0', languageEndonym='',
+                   script='AnyScript', script_code='0',
+                   territory='AnyTerritory', territory_code='0', territoryEndonym='',
+                   groupSizes=(3, 3, 1),
+                   decimal='.', group=',', list=';', percent='%',
+                   zero='0', minus='-', plus='+', exp='e',
+                   quotationStart='"', quotationEnd='"',
+                   alternateQuotationStart='\'', alternateQuotationEnd='\'',
+                   listPatternPartStart='%1, %2',
+                   listPatternPartMiddle='%1, %2',
+                   listPatternPartEnd='%1, %2',
+                   listPatternPartTwo='%1, %2',
+                   byte_unit='bytes',
+                   byte_si_quantified=';'.join(q + 'B' for q in quantifiers),
+                   byte_iec_quantified=';'.join(q.upper() + 'iB' for q in quantifiers),
+                   am='AM', pm='PM', firstDayOfWeek='mon',
+                   weekendStart='sat', weekendEnd='sun',
+                   longDateFormat='EEEE, d MMMM yyyy', shortDateFormat='d MMM yyyy',
+                   longTimeFormat='HH:mm:ss z', shortTimeFormat='HH:mm:ss',
+                   longDays=';'.join(days),
+                   shortDays=';'.join(d[:3] for d in days),
+                   narrowDays='7;1;2;3;4;5;6',
+                   standaloneLongDays=';'.join(days),
+                   standaloneShortDays=';'.join(d[:3] for d in days),
+                   standaloneNarrowDays=';'.join(d[:1] for d in days),
+                   currencyIsoCode='', currencySymbol='',
+                   currencyDisplayName='',
+                   currencyDigits=2, currencyRounding=1,
+                   currencyFormat='%1%2', currencyNegativeFormat='')
--- a/util/locale_database/qlocalexml.rnc
+++ b/util/locale_database/qlocalexml.rnc
@ -0,0 +1,119 @@
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+# This is RelaxNG compact schema for qLocaleXML interemediate locale data
+# representation format produced and consumed by the qlocalexml module.
+#
+# To validate an xml file run:
+#
+#      jing -c qlocalexml.rnc <your-file.xml>
+#
+# You can download jing from https://relaxng.org/jclark/jing.html if your
+# package manager lacks the jing package.
+
+start = element localeDatabase {
+  element version { text },
+  element languageList { Language+ },
+  element scriptList { Script+ },
+  element territoryList { Territory+ },
+  element likelySubtags { LikelySubtag+ },
+  element localeList { Locale+ }
+}
+
+Language = element language { TagDescriptor }
+Script = element script { TagDescriptor }
+Territory = element territory { TagDescriptor }
+TagDescriptor = (
+  element name { text },
+  element id { xsd:nonNegativeInteger },
+  element code { text }
+)
+
+LikelySubtag = element likelySubtag {
+  element from { LocaleTriplet },
+  element to { LocaleTriplet }
+}
+
+LocaleTriplet = (
+  element language { text },
+  element script { text },
+  element territory { text }
+)
+
+WeekDay = ("sun" | "mon" | "tue" | "wed" | "thu" | "fri" | "sat")
+Digit = xsd:string { pattern = "\d" }
+Punctuation = xsd:string { pattern = "\p{P}" }
+GroupSizes = xsd:string { pattern = "\d;\d;\d" }
+
+Locale = element locale {
+  element language { text },
+  element languagecode { text },
+  element script { text },
+  element scriptcode { text },
+  element territory { text },
+  element territorycode { text },
+  element decimal { Punctuation },
+  element group { text },
+  element zero { Digit },
+  element list { Punctuation },
+  element percent { text },
+  element minus { text },
+  element plus { text },
+  element exp { text },
+  element languageEndonym { text },
+  element territoryEndonym { text },
+  element quotationStart { Punctuation },
+  element quotationEnd { Punctuation },
+  element alternateQuotationStart { Punctuation },
+  element alternateQuotationEnd { Punctuation },
+  element listPatternPartStart { text },
+  element listPatternPartMiddle { text },
+  element listPatternPartEnd { text },
+  element listPatternPartTwo { text },
+  element byte_unit { text },
+  element byte_si_quantified { text },
+  element byte_iec_quantified { text },
+  element am { text },
+  element pm { text },
+  element firstDayOfWeek { text },
+  element weekendStart { WeekDay },
+  element weekendEnd { WeekDay },
+  element longDateFormat { text },
+  element shortDateFormat { text },
+  element longTimeFormat { text },
+  element shortTimeFormat { text },
+  element currencyIsoCode { text },
+  element currencySymbol { text },
+  element currencyDisplayName { text },
+  element currencyFormat { text },
+  element currencyNegativeFormat { text },
+  element longDays { text },
+  element standaloneLongDays { text },
+  element shortDays { text },
+  element standaloneShortDays { text },
+  element narrowDays { text },
+  element standaloneNarrowDays { text },
+
+  # Some of these entries may be absent depending on command line arguments
+  element longMonths_gregorian { text }?,
+  element longMonths_persian { text }?,
+  element longMonths_islamic { text }?,
+  element standaloneLongMonths_gregorian { text }?,
+  element standaloneLongMonths_persian { text }?,
+  element standaloneLongMonths_islamic { text }?,
+  element shortMonths_gregorian { text }?,
+  element shortMonths_persian { text }?,
+  element shortMonths_islamic { text }?,
+  element standaloneShortMonths_gregorian { text }?,
+  element standaloneShortMonths_persian { text }?,
+  element standaloneShortMonths_islamic { text }?,
+  element narrowMonths_gregorian { text }?,
+  element narrowMonths_persian { text }?,
+  element narrowMonths_islamic { text }?,
+  element standaloneNarrowMonths_gregorian { text }?,
+  element standaloneNarrowMonths_persian { text }?,
+  element standaloneNarrowMonths_islamic { text }?,
+
+  element groupSizes { GroupSizes },
+  element currencyDigits { xsd:nonNegativeInteger },
+  element currencyRounding { xsd:nonNegativeInteger }
+}
--- a/util/locale_database/qlocalexml2cpp.py
+++ b/util/locale_database/qlocalexml2cpp.py
@ -0,0 +1,618 @@
+#!/usr/bin/env python3
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+"""Script to generate C++ code from CLDR data in QLocaleXML form
+
+See ``cldr2qlocalexml.py`` for how to generate the QLocaleXML data itself.
+Pass the output file from that as first parameter to this script; pass the ISO
+639-3 data file as second parameter. You can optionally pass the root of the
+qtbase check-out as third parameter; it defaults to the root of the qtbase
+check-out containing this script.
+
+The ISO 639-3 data file can be downloaded from the SIL website:
+
+    https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3.tab
+"""
+
+import datetime
+import argparse
+from pathlib import Path
+from typing import Optional
+
+from qlocalexml import QLocaleXmlReader
+from localetools import unicode2hex, wrap_list, Error, Transcriber, SourceFileEditor, qtbase_root
+from iso639_3 import LanguageCodeData
+
+class LocaleKeySorter:
+    """Sort-ordering representation of a locale key.
+
+    This is for passing to a sorting algorithm as key-function, that
+    it applies to each entry in the list to decide which belong
+    earlier. It adds an entry to the (language, script, territory)
+    triple, just before script, that sorts earlier if the territory is
+    the default for the given language and script, later otherwise.
+    """
+
+    # TODO: study the relationship between this and CLDR's likely
+    # sub-tags algorithm. Work out how locale sort-order impacts
+    # QLocale's likely sub-tag matching algorithms. Make sure this is
+    # sorting in an order compatible with those algorithms.
+
+    def __init__(self, defaults):
+        self.map = dict(defaults)
+    def foreign(self, key):
+        default = self.map.get(key[:2])
+        return default is None or default != key[2]
+    def __call__(self, key):
+        # TODO: should we compare territory before or after script ?
+        return (key[0], self.foreign(key)) + key[1:]
+
+class StringDataToken:
+    def __init__(self, index, length, bits):
+        if index > 0xffff:
+            raise ValueError(f'Start-index ({index}) exceeds the uint16 range!')
+        if length >= (1 << bits):
+            raise ValueError(f'Data size ({length}) exceeds the {bits}-bit range!')
+
+        self.index = index
+        self.length = length
+
+class StringData:
+    def __init__(self, name):
+        self.data = []
+        self.hash = {}
+        self.name = name
+        self.text = '' # Used in quick-search for matches in data
+
+    def append(self, s, bits = 8):
+        try:
+            token = self.hash[s]
+        except KeyError:
+            token = self.__store(s, bits)
+            self.hash[s] = token
+        return token
+
+    def __store(self, s, bits):
+        """Add string s to known data.
+
+        Seeks to avoid duplication, where possible.
+        For example, short-forms may be prefixes of long-forms.
+        """
+        if not s:
+            return StringDataToken(0, 0, bits)
+        ucs2 = unicode2hex(s)
+        try:
+            index = self.text.index(s) - 1
+            matched = 0
+            while matched < len(ucs2):
+                index, matched = self.data.index(ucs2[0], index + 1), 1
+                if index + len(ucs2) >= len(self.data):
+                    raise ValueError # not found after all !
+                while matched < len(ucs2) and self.data[index + matched] == ucs2[matched]:
+                    matched += 1
+        except ValueError:
+            index = len(self.data)
+            self.data += ucs2
+            self.text += s
+
+        assert index >= 0
+        try:
+            return StringDataToken(index, len(ucs2), bits)
+        except ValueError as e:
+            e.args += (self.name, s)
+            raise
+
+    def write(self, fd):
+        if len(self.data) > 0xffff:
+            raise ValueError(f'Data is too big ({len(self.data)}) for quint16 index to its end!',
+                             self.name)
+        fd.write(f"\nstatic constexpr char16_t {self.name}[] = {{\n")
+        fd.write(wrap_list(self.data))
+        fd.write("\n};\n")
+
+def currencyIsoCodeData(s):
+    if s:
+        return '{' + ",".join(str(ord(x)) for x in s) + '}'
+    return "{0,0,0}"
+
+class LocaleSourceEditor (SourceFileEditor):
+    def __init__(self, path: Path, temp: Path, version: str):
+        super().__init__(path, temp)
+        self.version = version
+
+    def onEnter(self) -> None:
+        super().onEnter()
+        self.writer.write(f"""
+/*
+    This part of the file was generated on {datetime.date.today()} from the
+    Common Locale Data Repository v{self.version}
+
+    http://www.unicode.org/cldr/
+
+    Do not edit this section: instead regenerate it using
+    cldr2qlocalexml.py and qlocalexml2cpp.py on updated (or
+    edited) CLDR data; see qtbase/util/locale_database/.
+*/
+
+""")
+
+class LocaleDataWriter (LocaleSourceEditor):
+    def likelySubtags(self, likely):
+        # First sort likely, so that we can use binary search in C++
+        # code. Although the entries are (lang, script, region), sort
+        # as (lang, region, script) and sort 0 after all non-zero
+        # values. This ensures that, when several mappings partially
+        # match a requested locale, the one we should prefer to use
+        # appears first.
+        huge = 0x10000 # > any ushort; all tag values are ushort
+        def keyLikely(entry):
+            have = entry[1] # Numeric id triple
+            return have[0] or huge, have[2] or huge, have[1] or huge # language, region, script
+        likely = sorted(likely, key=keyLikely)
+
+        i = 0
+        self.writer.write('static constexpr QLocaleId likely_subtags[] = {\n')
+        for had, have, got, give in likely:
+            i += 1
+            self.writer.write('    {{ {:3d}, {:3d}, {:3d} }}'.format(*have))
+            self.writer.write(', {{ {:3d}, {:3d}, {:3d} }}'.format(*give))
+            self.writer.write(' ' if i == len(likely) else ',')
+            self.writer.write(f' // {had} -> {got}\n')
+        self.writer.write('};\n\n')
+
+    def localeIndex(self, indices):
+        self.writer.write('static constexpr quint16 locale_index[] = {\n')
+        for index, name in indices:
+            self.writer.write(f'{index:6d}, // {name}\n')
+        self.writer.write('     0 // trailing 0\n')
+        self.writer.write('};\n\n')
+
+    def localeData(self, locales, names):
+        list_pattern_part_data = StringData('list_pattern_part_data')
+        single_character_data = StringData('single_character_data')
+        date_format_data = StringData('date_format_data')
+        time_format_data = StringData('time_format_data')
+        days_data = StringData('days_data')
+        am_data = StringData('am_data')
+        pm_data = StringData('pm_data')
+        byte_unit_data = StringData('byte_unit_data')
+        currency_symbol_data = StringData('currency_symbol_data')
+        currency_display_name_data = StringData('currency_display_name_data')
+        currency_format_data = StringData('currency_format_data')
+        endonyms_data = StringData('endonyms_data')
+
+        # Locale data
+        self.writer.write('static constexpr QLocaleData locale_data[] = {\n')
+        # Table headings: keep each label centred in its field, matching line_format:
+        self.writer.write('   // '
+                          # Width 6 + comma
+                          ' lang  ' # IDs
+                          'script '
+                          '  terr '
+
+                          # Range entries (all start-indices, then all sizes)
+                          # Width 5 + comma
+                          'lStrt ' # List pattern
+                          'lpMid '
+                          'lpEnd '
+                          'lPair '
+                          'lDelm ' # List delimiter
+                          # Representing numbers
+                          ' dec  '
+                          'group '
+                          'prcnt '
+                          ' zero '
+                          'minus '
+                          'plus  '
+                          ' exp  '
+                          # Quotation marks
+                          'qtOpn '
+                          'qtEnd '
+                          'altQO '
+                          'altQE '
+                          'lDFmt ' # Date format
+                          'sDFmt '
+                          'lTFmt ' # Time format
+                          'sTFmt '
+                          'slDay ' # Day names
+                          'lDays '
+                          'ssDys '
+                          'sDays '
+                          'snDay '
+                          'nDays '
+                          '  am  ' # am/pm indicators
+                          '  pm  '
+                          ' byte '
+                          'siQnt '
+                          'iecQn '
+                          'crSym ' # Currency formatting
+                          'crDsp '
+                          'crFmt '
+                          'crFNg '
+                          'ntLng ' # Name of language in itself, and of territory
+                          'ntTer '
+                          # Width 3 + comma for each size; no header
+                          + '    ' * 37 +
+
+                          # Strays (char array, bit-fields):
+                          # Width 10 + 2 spaces + comma
+                          '   currISO   '
+                          # Width 6 + comma
+                          'curDgt ' # Currency digits
+                          'curRnd ' # Currencty rounding (unused: QTBUG-81343)
+                          'dow1st ' # First day of week
+                          ' wknd+ ' # Week-end start/end days
+                          ' wknd- '
+                          'grpTop '
+                          'grpMid '
+                          'grpEnd'
+                          # No trailing space on last entry (be sure to
+                          # pad before adding anything after it).
+                          '\n')
+
+        formatLine = ''.join((
+            '    {{ ',
+            # Locale-identifier
+            '{:6d},' * 3,
+            # List patterns, date/time formats, day names, am/pm
+            # SI/IEC byte-unit abbreviations
+            # Currency and endonyms
+            # Range starts
+            '{:5d},' * 37,
+            # Range sizes
+            '{:3d},' * 37,
+
+            # Currency ISO code
+            ' {:>10s}, ',
+            # Currency formatting
+            '{:6d},{:6d}',
+            # Day of week and week-end
+            ',{:6d}' * 3,
+            # Number group sizes
+            ',{:6d}' * 3,
+            ' }}')).format
+        for key in names:
+            locale = locales[key]
+            # Sequence of StringDataToken:
+            ranges = (tuple(list_pattern_part_data.append(p) for p in # 5 entries:
+                            (locale.listPatternPartStart, locale.listPatternPartMiddle,
+                             locale.listPatternPartEnd, locale.listPatternPartTwo,
+                             locale.listDelim)) +
+                      tuple(single_character_data.append(p) for p in # 11 entries
+                            (locale.decimal, locale.group, locale.percent, locale.zero,
+                             locale.minus, locale.plus, locale.exp,
+                             locale.quotationStart, locale.quotationEnd,
+                             locale.alternateQuotationStart, locale.alternateQuotationEnd)) +
+                      tuple (date_format_data.append(f) for f in # 2 entries:
+                             (locale.longDateFormat, locale.shortDateFormat)) +
+                      tuple(time_format_data.append(f) for f in # 2 entries:
+                            (locale.longTimeFormat, locale.shortTimeFormat)) +
+                      tuple(days_data.append(d) for d in # 6 entries:
+                            (locale.standaloneLongDays, locale.longDays,
+                             locale.standaloneShortDays, locale.shortDays,
+                             locale.standaloneNarrowDays, locale.narrowDays)) +
+                      (am_data.append(locale.am), pm_data.append(locale.pm)) + # 2 entries
+                      tuple(byte_unit_data.append(b) for b in # 3 entries:
+                            (locale.byte_unit,
+                             locale.byte_si_quantified,
+                             locale.byte_iec_quantified)) +
+                      (currency_symbol_data.append(locale.currencySymbol),
+                       currency_display_name_data.append(locale.currencyDisplayName),
+                       currency_format_data.append(locale.currencyFormat),
+                       currency_format_data.append(locale.currencyNegativeFormat),
+                       endonyms_data.append(locale.languageEndonym),
+                       endonyms_data.append(locale.territoryEndonym)) # 6 entries
+                      ) # Total: 37 entries
+            assert len(ranges) == 37
+
+            self.writer.write(formatLine(*(
+                        key +
+                        tuple(r.index for r in ranges) +
+                        tuple(r.length for r in ranges) +
+                        (currencyIsoCodeData(locale.currencyIsoCode),
+                         locale.currencyDigits,
+                         locale.currencyRounding, # unused (QTBUG-81343)
+                         locale.firstDayOfWeek, locale.weekendStart, locale.weekendEnd,
+                         locale.groupTop, locale.groupHigher, locale.groupLeast) ))
+                              + f', // {locale.language}/{locale.script}/{locale.territory}\n')
+        self.writer.write(formatLine(*( # All zeros, matching the format:
+                    (0,) * 3 + (0,) * 37 * 2
+                    + (currencyIsoCodeData(0),)
+                    + (0,) * 8 ))
+                          + ' // trailing zeros\n')
+        self.writer.write('};\n')
+
+        # StringData tables:
+        for data in (list_pattern_part_data, single_character_data,
+                     date_format_data, time_format_data, days_data,
+                     byte_unit_data, am_data, pm_data, currency_symbol_data,
+                     currency_display_name_data, currency_format_data,
+                     endonyms_data):
+            data.write(self.writer)
+
+    @staticmethod
+    def __writeNameData(out, book, form):
+        out(f'static constexpr char {form}_name_list[] =\n')
+        out('"Default\\0"\n')
+        for key, value in book.items():
+            if key == 0:
+                continue
+            out(f'"{value[0]}\\0"\n')
+        out(';\n\n')
+
+        out(f'static constexpr quint16 {form}_name_index[] = {{\n')
+        out(f'     0, // Any{form.capitalize()}\n')
+        index = 8
+        for key, value in book.items():
+            if key == 0:
+                continue
+            name = value[0]
+            out(f'{index:6d}, // {name}\n')
+            index += len(name) + 1
+        out('};\n\n')
+
+    @staticmethod
+    def __writeCodeList(out, book, form, width):
+        out(f'static constexpr unsigned char {form}_code_list[] =\n')
+        for key, value in book.items():
+            code = value[1]
+            code += r'\0' * max(width - len(code), 0)
+            out(f'"{code}" // {value[0]}\n')
+        out(';\n\n')
+
+    def languageNames(self, languages):
+        self.__writeNameData(self.writer.write, languages, 'language')
+
+    def scriptNames(self, scripts):
+        self.__writeNameData(self.writer.write, scripts, 'script')
+
+    def territoryNames(self, territories):
+        self.__writeNameData(self.writer.write, territories, 'territory')
+
+    # TODO: unify these next three into the previous three; kept
+    # separate for now to verify we're not changing data.
+
+    def languageCodes(self, languages, code_data: LanguageCodeData):
+        out = self.writer.write
+
+        out(f'constexpr std::array<LanguageCodeEntry, {len(languages)}> languageCodeList {{\n')
+
+        def q(val: Optional[str], size: int) -> str:
+            """Quote the value and adjust the result for tabular view."""
+            chars = []
+            if val is not None:
+                for c in val:
+                    chars.append(f"'{c}'")
+                s = ', '.join(chars)
+                s = f'{{{s}}}'
+            else:
+                s = ''
+            if size == 0:
+                return f'{{{s}}}'
+            else:
+                return f'{{{s}}},'.ljust(size * 5 + 4)
+
+        for key, value in languages.items():
+            code = value[1]
+            if key < 2:
+                result = code_data.query('und')
+            else:
+                result = code_data.query(code)
+                assert code == result.id()
+            assert result is not None
+
+            codeString = q(result.part1Code, 2)
+            codeString += q(result.part2BCode, 3)
+            codeString += q(result.part2TCode, 3)
+            codeString += q(result.part3Code, 0)
+            out(f'    LanguageCodeEntry {{{codeString}}}, // {value[0]}\n')
+
+        out('};\n\n')
+
+    def scriptCodes(self, scripts):
+        self.__writeCodeList(self.writer.write, scripts, 'script', 4)
+
+    def territoryCodes(self, territories): # TODO: unify with territoryNames()
+        self.__writeCodeList(self.writer.write, territories, 'territory', 3)
+
+class CalendarDataWriter (LocaleSourceEditor):
+    formatCalendar = (
+        '      {{'
+        + ','.join(('{:6d}',) * 3 + ('{:5d}',) * 6 + ('{:3d}',) * 6)
+        + ' }},').format
+    def write(self, calendar, locales, names):
+        months_data = StringData('months_data')
+
+        self.writer.write('static constexpr QCalendarLocale locale_data[] = {\n')
+        self.writer.write(
+            '     //'
+            # IDs, width 7 (6 + comma)
+            ' lang  '
+            ' script'
+            ' terr  '
+            # Month-name start-indices, width 6 (5 + comma)
+            'sLong '
+            ' long '
+            'sShrt '
+            'short '
+            'sNarw '
+            'narow '
+            #  No individual headers for the sizes.
+            'Sizes...'
+            '\n')
+        for key in names:
+            locale = locales[key]
+            # Sequence of StringDataToken:
+            try:
+                # Twelve long month names can add up to more than 256 (e.g. kde_TZ: 264)
+                ranges = (tuple(months_data.append(m[calendar], 16) for m in
+                                (locale.standaloneLongMonths, locale.longMonths)) +
+                          tuple(months_data.append(m[calendar]) for m in
+                                (locale.standaloneShortMonths, locale.shortMonths,
+                                 locale.standaloneNarrowMonths, locale.narrowMonths)))
+            except ValueError as e:
+                e.args += (locale.language, locale.script, locale.territory)
+                raise
+
+            self.writer.write(
+                self.formatCalendar(*(
+                        key +
+                        tuple(r.index for r in ranges) +
+                        tuple(r.length for r in ranges) ))
+                + f'// {locale.language}/{locale.script}/{locale.territory}\n')
+        self.writer.write(self.formatCalendar(*( (0,) * (3 + 6 * 2) ))
+                          + '// trailing zeros\n')
+        self.writer.write('};\n')
+        months_data.write(self.writer)
+
+class LocaleHeaderWriter (SourceFileEditor):
+    def __init__(self, path, temp, dupes):
+        super().__init__(path, temp)
+        self.__dupes = dupes
+
+    def languages(self, languages):
+        self.__enum('Language', languages, self.__language)
+        self.writer.write('\n')
+
+    def territories(self, territories):
+        self.writer.write("    // ### Qt 7: Rename to Territory\n")
+        self.__enum('Country', territories, self.__territory, 'Territory')
+
+    def scripts(self, scripts):
+        self.__enum('Script', scripts, self.__script)
+        self.writer.write('\n')
+
+    # Implementation details
+    from enumdata import (language_aliases as __language,
+                          territory_aliases as __territory,
+                          script_aliases as __script)
+
+    def __enum(self, name, book, alias, suffix = None):
+        assert book
+
+        if suffix is None:
+            suffix = name
+
+        out, dupes = self.writer.write, self.__dupes
+        out(f'    enum {name} : ushort {{\n')
+        for key, value in book.items():
+            member = value[0].replace('-', ' ')
+            if name == 'Script':
+                # Don't .capitalize() as some names are already camel-case (see enumdata.py):
+                member = ''.join(word[0].upper() + word[1:] for word in member.split())
+                if not member.endswith('Script'):
+                    member += 'Script'
+                if member in dupes:
+                    raise Error(f'The script name "{member}" is messy')
+            else:
+                member = ''.join(member.split())
+                member = member + suffix if member in dupes else member
+            out(f'        {member} = {key},\n')
+
+        out('\n        '
+            + ',\n        '.join(f'{k} = {v}' for k, v in sorted(alias.items()))
+            + f',\n\n        Last{suffix} = {member}')
+
+        # for "LastCountry = LastTerritory"
+        # ### Qt 7: Remove
+        if suffix != name:
+            out(f',\n        Last{name} = Last{suffix}')
+
+        out('\n    };\n')
+
+
+def main(out, err):
+    calendars_map = {
+        # CLDR name: Qt file name fragment
+        'gregorian': 'roman',
+        'persian': 'jalali',
+        'islamic': 'hijri',
+        # 'hebrew': 'hebrew'
+    }
+    all_calendars = list(calendars_map.keys())
+
+    parser = argparse.ArgumentParser(
+        description='Generate C++ code from CLDR data in QLocaleXML form.',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('input_file', help='input XML file name',
+                        metavar='input-file.xml')
+    parser.add_argument('iso_path', help='path to the ISO 639-3 data file',
+                        metavar='iso-639-3.tab')
+    parser.add_argument('qtbase_path', help='path to the root of the qtbase source tree',
+                        nargs='?', default=qtbase_root)
+    parser.add_argument('--calendars', help='select calendars to emit data for',
+                        nargs='+', metavar='CALENDAR',
+                        choices=all_calendars, default=all_calendars)
+    args = parser.parse_args()
+
+    qlocalexml = args.input_file
+    qtsrcdir = Path(args.qtbase_path)
+    calendars = {cal: calendars_map[cal] for cal in args.calendars}
+
+    if not (qtsrcdir.is_dir()
+            and all(qtsrcdir.joinpath('src/corelib/text', leaf).is_file()
+                    for leaf in ('qlocale_data_p.h', 'qlocale.h', 'qlocale.qdoc'))):
+        parser.error(f'Missing expected files under qtbase source root {qtsrcdir}')
+
+    reader = QLocaleXmlReader(qlocalexml)
+    locale_map = dict(reader.loadLocaleMap(calendars, err.write))
+    locale_keys = sorted(locale_map.keys(), key=LocaleKeySorter(reader.defaultMap()))
+
+    code_data = LanguageCodeData(args.iso_path)
+
+    try:
+        with LocaleDataWriter(qtsrcdir.joinpath('src/corelib/text/qlocale_data_p.h'),
+                              qtsrcdir, reader.cldrVersion) as writer:
+            writer.likelySubtags(reader.likelyMap())
+            writer.localeIndex(reader.languageIndices(tuple(k[0] for k in locale_map)))
+            writer.localeData(locale_map, locale_keys)
+            writer.writer.write('\n')
+            writer.languageNames(reader.languages)
+            writer.scriptNames(reader.scripts)
+            writer.territoryNames(reader.territories)
+            # TODO: merge the next three into the previous three
+            writer.languageCodes(reader.languages, code_data)
+            writer.scriptCodes(reader.scripts)
+            writer.territoryCodes(reader.territories)
+    except Exception as e:
+        err.write(f'\nError updating locale data: {e}\n')
+        return 1
+
+    # Generate calendar data
+    for calendar, stem in calendars.items():
+        try:
+            with CalendarDataWriter(
+                    qtsrcdir.joinpath(f'src/corelib/time/q{stem}calendar_data_p.h'),
+                    qtsrcdir, reader.cldrVersion) as writer:
+                writer.write(calendar, locale_map, locale_keys)
+        except Exception as e:
+            err.write(f'\nError updating {calendar} locale data: {e}\n')
+
+    # qlocale.h
+    try:
+        with LocaleHeaderWriter(qtsrcdir.joinpath('src/corelib/text/qlocale.h'),
+                                qtsrcdir, reader.dupes) as writer:
+            writer.languages(reader.languages)
+            writer.scripts(reader.scripts)
+            writer.territories(reader.territories)
+    except Exception as e:
+        err.write(f'\nError updating qlocale.h: {e}\n')
+
+    # qlocale.qdoc
+    try:
+        with Transcriber(qtsrcdir.joinpath('src/corelib/text/qlocale.qdoc'), qtsrcdir) as qdoc:
+            DOCSTRING = "    QLocale's data is based on Common Locale Data Repository "
+            for line in qdoc.reader:
+                if DOCSTRING in line:
+                    qdoc.writer.write(f'{DOCSTRING}v{reader.cldrVersion}.\n')
+                else:
+                    qdoc.writer.write(line)
+    except Exception as e:
+        err.write(f'\nError updating qlocale.h: {e}\n')
+        return 1
+
+    return 0
+
+if __name__ == "__main__":
+    import sys
+    sys.exit(main(sys.stdout, sys.stderr))
--- a/util/locale_database/testlocales/localemodel.cpp
+++ b/util/locale_database/testlocales/localemodel.cpp
@ -0,0 +1,424 @@
+// Copyright (C) 2016 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+#include "localemodel.h"
+
+#include <QLocale>
+#include <QDate>
+#include <qdebug.h>
+
+static const int g_model_cols = 6;
+
+struct LocaleListItem
+{
+    int language;
+    int territory;
+};
+
+const LocaleListItem g_locale_list[] = {
+    {      1,     0 }, // C/AnyTerritory
+    {      3,    69 }, // Afan/Ethiopia
+    {      3,   111 }, // Afan/Kenya
+    {      4,    59 }, // Afar/Djibouti
+    {      4,    67 }, // Afar/Eritrea
+    {      4,    69 }, // Afar/Ethiopia
+    {      5,   195 }, // Afrikaans/SouthAfrica
+    {      5,   148 }, // Afrikaans/Namibia
+    {      6,     2 }, // Albanian/Albania
+    {      7,    69 }, // Amharic/Ethiopia
+    {      8,   186 }, // Arabic/SaudiArabia
+    {      8,     3 }, // Arabic/Algeria
+    {      8,    17 }, // Arabic/Bahrain
+    {      8,    64 }, // Arabic/Egypt
+    {      8,   103 }, // Arabic/Iraq
+    {      8,   109 }, // Arabic/Jordan
+    {      8,   115 }, // Arabic/Kuwait
+    {      8,   119 }, // Arabic/Lebanon
+    {      8,   122 }, // Arabic/LibyanArabJamahiriya
+    {      8,   145 }, // Arabic/Morocco
+    {      8,   162 }, // Arabic/Oman
+    {      8,   175 }, // Arabic/Qatar
+    {      8,   201 }, // Arabic/Sudan
+    {      8,   207 }, // Arabic/SyrianArabRepublic
+    {      8,   216 }, // Arabic/Tunisia
+    {      8,   223 }, // Arabic/UnitedArabEmirates
+    {      8,   237 }, // Arabic/Yemen
+    {      9,    11 }, // Armenian/Armenia
+    {     10,   100 }, // Assamese/India
+    {     12,    15 }, // Azerbaijani/Azerbaijan
+    {     14,   197 }, // Basque/Spain
+    {     15,    18 }, // Bengali/Bangladesh
+    {     15,   100 }, // Bengali/India
+    {     16,    25 }, // Bhutani/Bhutan
+    {     20,    33 }, // Bulgarian/Bulgaria
+    {     22,    20 }, // Byelorussian/Belarus
+    {     23,    36 }, // Cambodian/Cambodia
+    {     24,   197 }, // Catalan/Spain
+    {     25,    44 }, // Chinese/China
+    {     25,    97 }, // Chinese/HongKong
+    {     25,   126 }, // Chinese/Macau
+    {     25,   190 }, // Chinese/Singapore
+    {     25,   208 }, // Chinese/Taiwan
+    {     27,    54 }, // Croatian/Croatia
+    {     28,    57 }, // Czech/CzechRepublic
+    {     29,    58 }, // Danish/Denmark
+    {     30,   151 }, // Dutch/Netherlands
+    {     30,    21 }, // Dutch/Belgium
+    {     31,   225 }, // English/UnitedStates
+    {     31,     4 }, // English/AmericanSamoa
+    {     31,    13 }, // English/Australia
+    {     31,    21 }, // English/Belgium
+    {     31,    22 }, // English/Belize
+    {     31,    28 }, // English/Botswana
+    {     31,    38 }, // English/Canada
+    {     31,    89 }, // English/Guam
+    {     31,    97 }, // English/HongKong
+    {     31,   100 }, // English/India
+    {     31,   104 }, // English/Ireland
+    {     31,   107 }, // English/Jamaica
+    {     31,   133 }, // English/Malta
+    {     31,   134 }, // English/MarshallIslands
+    {     31,   148 }, // English/Namibia
+    {     31,   154 }, // English/NewZealand
+    {     31,   160 }, // English/NorthernMarianaIslands
+    {     31,   163 }, // English/Pakistan
+    {     31,   170 }, // English/Philippines
+    {     31,   190 }, // English/Singapore
+    {     31,   195 }, // English/SouthAfrica
+    {     31,   215 }, // English/TrinidadAndTobago
+    {     31,   224 }, // English/UnitedKingdom
+    {     31,   226 }, // English/UnitedStatesMinorOutlyingIslands
+    {     31,   234 }, // English/USVirginIslands
+    {     31,   240 }, // English/Zimbabwe
+    {     33,    68 }, // Estonian/Estonia
+    {     34,    71 }, // Faroese/FaroeIslands
+    {     36,    73 }, // Finnish/Finland
+    {     37,    74 }, // French/France
+    {     37,    21 }, // French/Belgium
+    {     37,    38 }, // French/Canada
+    {     37,   125 }, // French/Luxembourg
+    {     37,   142 }, // French/Monaco
+    {     37,   206 }, // French/Switzerland
+    {     40,   197 }, // Galician/Spain
+    {     41,    81 }, // Georgian/Georgia
+    {     42,    82 }, // German/Germany
+    {     42,    14 }, // German/Austria
+    {     42,    21 }, // German/Belgium
+    {     42,   123 }, // German/Liechtenstein
+    {     42,   125 }, // German/Luxembourg
+    {     42,   206 }, // German/Switzerland
+    {     43,    85 }, // Greek/Greece
+    {     43,    56 }, // Greek/Cyprus
+    {     44,    86 }, // Greenlandic/Greenland
+    {     46,   100 }, // Gujarati/India
+    {     47,    83 }, // Hausa/Ghana
+    {     47,   156 }, // Hausa/Niger
+    {     47,   157 }, // Hausa/Nigeria
+    {     48,   105 }, // Hebrew/Israel
+    {     49,   100 }, // Hindi/India
+    {     50,    98 }, // Hungarian/Hungary
+    {     51,    99 }, // Icelandic/Iceland
+    {     52,   101 }, // Indonesian/Indonesia
+    {     57,   104 }, // Irish/Ireland
+    {     58,   106 }, // Italian/Italy
+    {     58,   206 }, // Italian/Switzerland
+    {     59,   108 }, // Japanese/Japan
+    {     61,   100 }, // Kannada/India
+    {     63,   110 }, // Kazakh/Kazakhstan
+    {     64,   179 }, // Kinyarwanda/Rwanda
+    {     65,   116 }, // Kirghiz/Kyrgyzstan
+    {     66,   114 }, // Korean/RepublicOfKorea
+    {     67,   102 }, // Kurdish/Iran
+    {     67,   103 }, // Kurdish/Iraq
+    {     67,   207 }, // Kurdish/SyrianArabRepublic
+    {     67,   217 }, // Kurdish/Turkey
+    {     69,   117 }, // Laothian/Lao
+    {     71,   118 }, // Latvian/Latvia
+    {     72,    49 }, // Lingala/DemocraticRepublicOfCongo
+    {     72,    50 }, // Lingala/PeoplesRepublicOfCongo
+    {     73,   124 }, // Lithuanian/Lithuania
+    {     74,   127 }, // Macedonian/Macedonia
+    {     76,   130 }, // Malay/Malaysia
+    {     76,    32 }, // Malay/BruneiDarussalam
+    {     77,   100 }, // Malayalam/India
+    {     78,   133 }, // Maltese/Malta
+    {     80,   100 }, // Marathi/India
+    {     82,   143 }, // Mongolian/Mongolia
+    {     84,   150 }, // Nepali/Nepal
+    {     85,   161 }, // Norwegian/Norway
+    {     87,   100 }, // Oriya/India
+    {     88,     1 }, // Pashto/Afghanistan
+    {     89,   102 }, // Persian/Iran
+    {     89,     1 }, // Persian/Afghanistan
+    {     90,   172 }, // Polish/Poland
+    {     91,   173 }, // Portuguese/Portugal
+    {     91,    30 }, // Portuguese/Brazil
+    {     92,   100 }, // Punjabi/India
+    {     92,   163 }, // Punjabi/Pakistan
+    {     95,   177 }, // Romanian/Romania
+    {     96,   178 }, // Russian/RussianFederation
+    {     96,   222 }, // Russian/Ukraine
+    {     99,   100 }, // Sanskrit/India
+    {    100,   241 }, // Serbian/SerbiaAndMontenegro
+    {    100,    27 }, // Serbian/BosniaAndHerzegowina
+    {    100,   238 }, // Serbian/Yugoslavia
+    {    101,   241 }, // SerboCroatian/SerbiaAndMontenegro
+    {    101,    27 }, // SerboCroatian/BosniaAndHerzegowina
+    {    101,   238 }, // SerboCroatian/Yugoslavia
+    {    102,   195 }, // Sesotho/SouthAfrica
+    {    103,   195 }, // Setswana/SouthAfrica
+    {    107,   195 }, // Siswati/SouthAfrica
+    {    108,   191 }, // Slovak/Slovakia
+    {    109,   192 }, // Slovenian/Slovenia
+    {    110,   194 }, // Somali/Somalia
+    {    110,    59 }, // Somali/Djibouti
+    {    110,    69 }, // Somali/Ethiopia
+    {    110,   111 }, // Somali/Kenya
+    {    111,   197 }, // Spanish/Spain
+    {    111,    10 }, // Spanish/Argentina
+    {    111,    26 }, // Spanish/Bolivia
+    {    111,    43 }, // Spanish/Chile
+    {    111,    47 }, // Spanish/Colombia
+    {    111,    52 }, // Spanish/CostaRica
+    {    111,    61 }, // Spanish/DominicanRepublic
+    {    111,    63 }, // Spanish/Ecuador
+    {    111,    65 }, // Spanish/ElSalvador
+    {    111,    90 }, // Spanish/Guatemala
+    {    111,    96 }, // Spanish/Honduras
+    {    111,   139 }, // Spanish/Mexico
+    {    111,   155 }, // Spanish/Nicaragua
+    {    111,   166 }, // Spanish/Panama
+    {    111,   168 }, // Spanish/Paraguay
+    {    111,   169 }, // Spanish/Peru
+    {    111,   174 }, // Spanish/PuertoRico
+    {    111,   225 }, // Spanish/UnitedStates
+    {    111,   227 }, // Spanish/Uruguay
+    {    111,   231 }, // Spanish/Venezuela
+    {    113,   111 }, // Swahili/Kenya
+    {    113,   210 }, // Swahili/Tanzania
+    {    114,   205 }, // Swedish/Sweden
+    {    114,    73 }, // Swedish/Finland
+    {    116,   209 }, // Tajik/Tajikistan
+    {    117,   100 }, // Tamil/India
+    {    118,   178 }, // Tatar/RussianFederation
+    {    119,   100 }, // Telugu/India
+    {    120,   211 }, // Thai/Thailand
+    {    122,    67 }, // Tigrinya/Eritrea
+    {    122,    69 }, // Tigrinya/Ethiopia
+    {    124,   195 }, // Tsonga/SouthAfrica
+    {    125,   217 }, // Turkish/Turkey
+    {    129,   222 }, // Ukrainian/Ukraine
+    {    130,   100 }, // Urdu/India
+    {    130,   163 }, // Urdu/Pakistan
+    {    131,   228 }, // Uzbek/Uzbekistan
+    {    131,     1 }, // Uzbek/Afghanistan
+    {    132,   232 }, // Vietnamese/VietNam
+    {    134,   224 }, // Welsh/UnitedKingdom
+    {    136,   195 }, // Xhosa/SouthAfrica
+    {    138,   157 }, // Yoruba/Nigeria
+    {    140,   195 }, // Zulu/SouthAfrica
+    {    141,   161 }, // Nynorsk/Norway
+    {    142,    27 }, // Bosnian/BosniaAndHerzegowina
+    {    143,   131 }, // Divehi/Maldives
+    {    144,   224 }, // Manx/UnitedKingdom
+    {    145,   224 }, // Cornish/UnitedKingdom
+    {    146,    83 }, // Akan/Ghana
+    {    147,   100 }, // Konkani/India
+    {    148,    83 }, // Ga/Ghana
+    {    149,   157 }, // Igbo/Nigeria
+    {    150,   111 }, // Kamba/Kenya
+    {    151,   207 }, // Syriac/SyrianArabRepublic
+    {    152,    67 }, // Blin/Eritrea
+    {    153,    67 }, // Geez/Eritrea
+    {    153,    69 }, // Geez/Ethiopia
+    {    154,   157 }, // Koro/Nigeria
+    {    155,    69 }, // Sidamo/Ethiopia
+    {    156,   157 }, // Atsam/Nigeria
+    {    157,    67 }, // Tigre/Eritrea
+    {    158,   157 }, // Jju/Nigeria
+    {    159,   106 }, // Friulian/Italy
+    {    160,   195 }, // Venda/SouthAfrica
+    {    161,    83 }, // Ewe/Ghana
+    {    161,   212 }, // Ewe/Togo
+    {    163,   225 }, // Hawaiian/UnitedStates
+    {    164,   157 }, // Tyap/Nigeria
+    {    165,   129 }, // Chewa/Malawi
+};
+static const int g_locale_list_count = sizeof(g_locale_list)/sizeof(g_locale_list[0]);
+
+LocaleModel::LocaleModel(QObject *parent)
+    : QAbstractItemModel(parent)
+{
+    m_data_list.append(1234.5678);
+    m_data_list.append(QDate::currentDate());
+    m_data_list.append(QDate::currentDate());
+    m_data_list.append(QTime::currentTime());
+    m_data_list.append(QTime::currentTime());
+}
+
+QVariant LocaleModel::data(const QModelIndex &index, int role) const
+{
+    if (!index.isValid()
+        || role != Qt::DisplayRole && role != Qt::EditRole && role != Qt::ToolTipRole
+        || index.column() >= g_model_cols
+        || index.row() >= g_locale_list_count + 2)
+        return QVariant();
+
+    QVariant data;
+    if (index.column() < g_model_cols - 1)
+        data = m_data_list.at(index.column());
+
+    if (index.row() == 0) {
+        if (role == Qt::ToolTipRole)
+            return QVariant();
+        switch (index.column()) {
+            case 0:
+                return data.toDouble();
+            case 1:
+                return data.toDate();
+            case 2:
+                return data.toDate();
+            case 3:
+                return data.toTime();
+            case 4:
+                return data.toTime();
+            case 5:
+                return QVariant();
+            default:
+                break;
+        }
+    } else {
+        QLocale locale;
+        if (index.row() == 1) {
+            locale = QLocale::system();
+        } else {
+            LocaleListItem item = g_locale_list[index.row() - 2];
+            locale = QLocale((QLocale::Language)item.language, (QLocale::Territory)item.territory);
+        }
+
+        switch (index.column()) {
+            case 0:
+                if (role == Qt::ToolTipRole)
+                    return QVariant();
+                return locale.toString(data.toDouble());
+            case 1:
+                if (role == Qt::ToolTipRole)
+                    return locale.dateFormat(QLocale::LongFormat);
+                return locale.toString(data.toDate(), QLocale::LongFormat);
+            case 2:
+                if (role == Qt::ToolTipRole)
+                    return locale.dateFormat(QLocale::ShortFormat);
+                return locale.toString(data.toDate(), QLocale::ShortFormat);
+            case 3:
+                if (role == Qt::ToolTipRole)
+                    return locale.timeFormat(QLocale::LongFormat);
+                return locale.toString(data.toTime(), QLocale::LongFormat);
+            case 4:
+                if (role == Qt::ToolTipRole)
+                    return locale.timeFormat(QLocale::ShortFormat);
+                return locale.toString(data.toTime(), QLocale::ShortFormat);
+            case 5:
+                if (role == Qt::ToolTipRole)
+                    return QVariant();
+                return locale.name();
+            default:
+                break;
+        }
+    }
+
+    return QVariant();
+}
+
+QVariant LocaleModel::headerData(int section, Qt::Orientation orientation, int role) const
+{
+    if (role != Qt::DisplayRole)
+        return QVariant();
+
+    if (orientation == Qt::Horizontal) {
+        switch (section) {
+            case 0:
+                return QLatin1String("Double");
+            case 1:
+                return QLatin1String("Long Date");
+            case 2:
+                return QLatin1String("Short Date");
+            case 3:
+                return QLatin1String("Long Time");
+            case 4:
+                return QLatin1String("Short Time");
+            case 5:
+                return QLatin1String("Name");
+            default:
+                break;
+        }
+    } else {
+        if (section >= g_locale_list_count + 2)
+            return QVariant();
+        if (section == 0) {
+            return QLatin1String("Input");
+        } else if (section == 1) {
+            return QLatin1String("System");
+        } else {
+            LocaleListItem item = g_locale_list[section - 2];
+            return QLocale::languageToString((QLocale::Language)item.language)
+                    + QLatin1Char('/')
+                    + QLocale::territoryToString((QLocale::Territory)item.territory);
+        }
+    }
+
+    return QVariant();
+}
+
+QModelIndex LocaleModel::index(int row, int column,
+                    const QModelIndex &parent) const
+{
+    if (parent.isValid()
+        || row >= g_locale_list_count + 2
+        || column >= g_model_cols)
+        return QModelIndex();
+
+    return createIndex(row, column);
+}
+
+QModelIndex LocaleModel::parent(const QModelIndex&) const
+{
+    return QModelIndex();
+}
+
+int LocaleModel::columnCount(const QModelIndex&) const
+{
+    return g_model_cols;
+}
+
+int LocaleModel::rowCount(const QModelIndex &parent) const
+{
+    if (parent.isValid())
+        return 0;
+    return g_locale_list_count + 2;
+}
+
+Qt::ItemFlags LocaleModel::flags(const QModelIndex &index) const
+{
+    if (!index.isValid())
+        return 0;
+    if (index.row() == 0 && index.column() == g_model_cols - 1)
+        return 0;
+    if (index.row() == 0)
+        return QAbstractItemModel::flags(index) | Qt::ItemIsEditable;
+    return QAbstractItemModel::flags(index);
+}
+
+bool LocaleModel::setData(const QModelIndex &index, const QVariant &value, int role)
+{
+    if (!index.isValid()
+        || index.row() != 0
+        || index.column() >= g_model_cols - 1
+        || role != Qt::EditRole
+        || m_data_list.at(index.column()).type() != value.type())
+        return false;
+
+    m_data_list[index.column()] = value;
+    emit dataChanged(createIndex(1, index.column()),
+            createIndex(g_locale_list_count, index.column()));
+
+    return true;
+}
--- a/util/locale_database/testlocales/localemodel.h
+++ b/util/locale_database/testlocales/localemodel.h
@ -0,0 +1,31 @@
+// Copyright (C) 2016 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+#ifndef LOCALEMODEL_H
+#define LOCALEMODEL_H
+
+#include <QAbstractItemModel>
+#include <QList>
+#include <QVariant>
+
+class LocaleModel : public QAbstractItemModel
+{
+    Q_OBJECT
+public:
+    LocaleModel(QObject *parent = nullptr);
+
+    virtual int columnCount(const QModelIndex &parent = QModelIndex()) const;
+    virtual QVariant data(const QModelIndex &index, int role = Qt::DisplayRole) const;
+    virtual QModelIndex index(int row, int column,
+                                const QModelIndex &parent = QModelIndex()) const;
+    virtual QModelIndex parent(const QModelIndex &index) const;
+    virtual int rowCount(const QModelIndex &parent = QModelIndex()) const;
+    virtual QVariant headerData(int section, Qt::Orientation orientation,
+                                int role = Qt::DisplayRole ) const;
+    virtual Qt::ItemFlags flags(const QModelIndex &index) const;
+    virtual bool setData(const QModelIndex &index, const QVariant &value,
+                            int role = Qt::EditRole);
+private:
+    QList<QVariant> m_data_list;
+};
+
+#endif // LOCALEMODEL_H
--- a/util/locale_database/testlocales/localewidget.cpp
+++ b/util/locale_database/testlocales/localewidget.cpp
@ -0,0 +1,51 @@
+// Copyright (C) 2016 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+#include <QTableView>
+#include <QVBoxLayout>
+#include <QItemDelegate>
+#include <QItemEditorFactory>
+#include <QDoubleSpinBox>
+
+#include "localewidget.h"
+#include "localemodel.h"
+
+class DoubleEditorCreator : public QItemEditorCreatorBase
+{
+public:
+    QWidget *createWidget(QWidget *parent) const {
+        QDoubleSpinBox *w = new QDoubleSpinBox(parent);
+        w->setDecimals(4);
+        w->setRange(-10000.0, 10000.0);
+        return w;
+    }
+    virtual QByteArray valuePropertyName() const {
+        return QByteArray("value");
+    }
+};
+
+class EditorFactory : public QItemEditorFactory
+{
+public:
+    EditorFactory() {
+        static DoubleEditorCreator double_editor_creator;
+        registerEditor(QVariant::Double, &double_editor_creator);
+    }
+};
+
+LocaleWidget::LocaleWidget(QWidget *parent)
+    : QWidget(parent)
+{
+    m_model = new LocaleModel(this);
+    m_view = new QTableView(this);
+
+    QItemDelegate *delegate = qobject_cast<QItemDelegate*>(m_view->itemDelegate());
+    Q_ASSERT(delegate != 0);
+    static EditorFactory editor_factory;
+    delegate->setItemEditorFactory(&editor_factory);
+
+    m_view->setModel(m_model);
+
+    QVBoxLayout *layout = new QVBoxLayout(this);
+    layout->setMargin(0);
+    layout->addWidget(m_view);
+}
--- a/util/locale_database/testlocales/localewidget.h
+++ b/util/locale_database/testlocales/localewidget.h
@ -0,0 +1,21 @@
+// Copyright (C) 2016 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+#ifndef LOCALEWIDGET_H
+#define LOCALEWIDGET_H
+
+#include <QWidget>
+
+class LocaleModel;
+class QTableView;
+
+class LocaleWidget : public QWidget
+{
+    Q_OBJECT
+public:
+    LocaleWidget(QWidget *parent = nullptr);
+private:
+    LocaleModel *m_model;
+    QTableView *m_view;
+};
+
+#endif // LOCALEWIDGET_H
--- a/util/locale_database/testlocales/main.cpp
+++ b/util/locale_database/testlocales/main.cpp
@ -0,0 +1,13 @@
+// Copyright (C) 2016 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+#include <QApplication>
+
+#include "localewidget.h"
+
+int main(int argc, char *argv[])
+{
+    QApplication app(argc, argv);
+    LocaleWidget wgt;
+    wgt.show();
+    return app.exec();
+}
--- a/util/locale_database/testlocales/testlocales.pro
+++ b/util/locale_database/testlocales/testlocales.pro
@ -0,0 +1,4 @@
+TARGET = testlocales
+CONFIG += debug
+SOURCES += localemodel.cpp  localewidget.cpp  main.cpp
+HEADERS += localemodel.h  localewidget.h