qt 6.5.1 original

2025-07-04 16:25:27 +08:00 · 2023-10-29 23:33:08 +01:00
parent 71d22ab6b0
commit 85d238dfda
21202 changed files with 5499099 additions and 0 deletions
--- a/tests/auto/corelib/io/qurlinternal/CMakeLists.txt
+++ b/tests/auto/corelib/io/qurlinternal/CMakeLists.txt
@ -0,0 +1,18 @@
+# Copyright (C) 2022 The Qt Company Ltd.
+# SPDX-License-Identifier: BSD-3-Clause
+
+if(NOT QT_FEATURE_private_tests)
+    return()
+endif()
+
+#####################################################################
+## tst_qurlinternal Test:
+#####################################################################
+
+qt_internal_add_test(tst_qurlinternal
+    SOURCES
+        tst_qurlinternal.cpp
+        utf8data.cpp
+    LIBRARIES
+        Qt::CorePrivate
+)
--- a/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp
+++ b/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp
@ -0,0 +1,730 @@
+// Copyright (C) 2016 The Qt Company Ltd.
+// Copyright (C) 2016 Intel Corporation.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+
+#include <QtCore/QUrl>
+#include <QTest>
+
+#include "private/qurl_p.h"
+
+// For testsuites
+#define IDNA_ACE_PREFIX "xn--"
+#define IDNA_SUCCESS 1
+#define STRINGPREP_NO_UNASSIGNED 1
+#define STRINGPREP_CONTAINS_UNASSIGNED 2
+#define STRINGPREP_CONTAINS_PROHIBITED 3
+#define STRINGPREP_BIDI_BOTH_L_AND_RAL 4
+#define STRINGPREP_BIDI_LEADTRAIL_NOT_RAL 5
+
+using namespace Qt::StringLiterals;
+
+struct char16array {
+    char16array() {}
+    template <size_t N>
+    char16array(char16_t (&array)[N])
+    {
+        memcpy(points, array, N*sizeof(char16_t));
+    }
+
+    char16_t points[100];
+};
+
+Q_DECLARE_METATYPE(char16array)
+Q_DECLARE_METATYPE(QUrl::FormattingOptions)
+Q_DECLARE_METATYPE(QUrl::ComponentFormattingOptions)
+
+class tst_QUrlInternal : public QObject
+{
+    Q_OBJECT
+
+private Q_SLOTS:
+    // IDNA internals
+#ifdef QT_BUILD_INTERNAL
+    void idna_testsuite_data();
+    void idna_testsuite();
+#endif
+    void ace_testsuite_data();
+    void ace_testsuite();
+    void std3violations_data();
+    void std3violations();
+    void std3deviations_data();
+    void std3deviations();
+
+    // percent-encoding internals
+    void correctEncodedMistakes_data();
+    void correctEncodedMistakes();
+    void encodingRecode_data();
+    void encodingRecode();
+    void encodingRecodeInvalidUtf8_data();
+    void encodingRecodeInvalidUtf8();
+};
+#include "tst_qurlinternal.moc"
+
+#ifdef QT_BUILD_INTERNAL
+void tst_QUrlInternal::idna_testsuite_data()
+{
+    QTest::addColumn<int>("numchars");
+    QTest::addColumn<char16array>("unicode");
+    QTest::addColumn<QByteArray>("punycode");
+    QTest::addColumn<int>("allowunassigned");
+    QTest::addColumn<int>("usestd3asciirules");
+    QTest::addColumn<int>("toasciirc");
+    QTest::addColumn<int>("tounicoderc");
+
+    char16_t d1[] = { 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643,
+                      0x0644, 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A,
+                      0x061F };
+    QTest::newRow("Arabic (Egyptian)") << 17 << char16array(d1)
+                                    << QByteArray(IDNA_ACE_PREFIX "egbpdaj6bu4bxfgehfvwxn")
+                                    << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
+
+    char16_t d2[] = { 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D,
+                      0x6587 };
+    QTest::newRow("Chinese (simplified)") << 9 << char16array(d2)
+                                       << QByteArray(IDNA_ACE_PREFIX "ihqwcrb4cv8a8dqg056pqjye")
+                                       << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
+
+    char16_t d3[] = { 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D,
+                      0x6587 };
+    QTest::newRow("Chinese (traditional)") << 9 << char16array(d3)
+                                        << QByteArray(IDNA_ACE_PREFIX "ihqwctvzc91f659drss3x8bo0yb")
+                                        << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
+
+    char16_t d4[] = { 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073,
+                      0x0074, 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076,
+                      0x00ED, 0x010D, 0x0065, 0x0073, 0x006B, 0x0079 };
+    QTest::newRow("Czech") << 22 << char16array(d4)
+                        << QByteArray(IDNA_ACE_PREFIX "Proprostnemluvesky-uyb24dma41a")
+                        << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
+
+    char16_t d5[] = { 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5,
+                      0x05D8, 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9,
+                      0x05DD, 0x05E2, 0x05D1, 0x05E8, 0x05D9, 0x05EA };
+    QTest::newRow("Hebrew") << 22 << char16array(d5)
+                         << QByteArray(IDNA_ACE_PREFIX "4dbcagdahymbxekheh6e0a7fei0b")
+                         << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
+
+    char16_t d6[] = { 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928,
+                      0x094D, 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902,
+                      0x0928, 0x0939, 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938,
+                      0x0915, 0x0924, 0x0947, 0x0939, 0x0948, 0x0902 };
+    QTest::newRow("Hindi (Devanagari)") << 30 << char16array(d6)
+                                     << QByteArray(IDNA_ACE_PREFIX "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd")
+                                     << 0 << 0 << IDNA_SUCCESS;
+
+    char16_t d7[] = { 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E,
+                      0x3092, 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044,
+                      0x306E, 0x304B };
+    QTest::newRow("Japanese (kanji and hiragana)") << 18 << char16array(d7)
+                                                << QByteArray(IDNA_ACE_PREFIX "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa")
+                                                << 0 << 0 << IDNA_SUCCESS;
+
+    char16_t d8[] = { 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435,
+                      0x043E, 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432,
+                      0x043E, 0x0440, 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443,
+                      0x0441, 0x0441, 0x043A, 0x0438 };
+    QTest::newRow("Russian (Cyrillic)") << 28 << char16array(d8)
+                                     << QByteArray(IDNA_ACE_PREFIX "b1abfaaepdrnnbgefbadotcwatmq2g4l")
+                                     << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
+
+    char16_t d9[] = { 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F,
+                      0x0070, 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069,
+                      0x006D, 0x0070, 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074,
+                      0x0065, 0x0068, 0x0061, 0x0062, 0x006C, 0x0061, 0x0072, 0x0065,
+                      0x006E, 0x0045, 0x0073, 0x0070, 0x0061, 0x00F1, 0x006F, 0x006C };
+    QTest::newRow("Spanish") << 40 << char16array(d9)
+                          << QByteArray(IDNA_ACE_PREFIX "PorqunopuedensimplementehablarenEspaol-fmd56a")
+                          << 0 << 0 << IDNA_SUCCESS;
+
+    char16_t d10[] = { 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD,
+                       0x006B, 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3,
+                       0x0063, 0x0068, 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069,
+                       0x1EBF, 0x006E, 0x0067, 0x0056, 0x0069, 0x1EC7, 0x0074 };
+    QTest::newRow("Vietnamese") << 31 << char16array(d10)
+                             << QByteArray(IDNA_ACE_PREFIX "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g")
+                             << 0 << 0 << IDNA_SUCCESS;
+
+    char16_t d11[] = { 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F };
+    QTest::newRow("Japanese") << 8 << char16array(d11)
+                           << QByteArray(IDNA_ACE_PREFIX "3B-ww4c5e180e575a65lsy2b")
+                           << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
+
+    // this test does NOT include nameprepping, so the capitals will remain
+    char16_t d12[] = { 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069,
+                       0x0074, 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052,
+                       0x002D, 0x004D, 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053 };
+    QTest::newRow("Japanese2") << 24 << char16array(d12)
+                            << QByteArray(IDNA_ACE_PREFIX "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n")
+                            << 0 << 0 << IDNA_SUCCESS;
+
+    char16_t d13[] = { 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E,
+                       0x006F, 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061,
+                       0x0079, 0x002D, 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834,
+                       0x6240 };
+    QTest::newRow("Japanese3") << 25 << char16array(d13)
+                            << QByteArray(IDNA_ACE_PREFIX "Hello-Another-Way--fc4qua05auwb3674vfr0b")
+                            << 0 << 0 << IDNA_SUCCESS;
+
+    char16_t d14[] = { 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032 };
+    QTest::newRow("Japanese4") << 8 << char16array(d14)
+                            << QByteArray(IDNA_ACE_PREFIX "2-u9tlzr9756bt3uc0v")
+                            << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
+
+    char16_t d15[] = { 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069,
+                       0x3059, 0x308B, 0x0035, 0x79D2, 0x524D };
+    QTest::newRow("Japanese5") << 13 << char16array(d15)
+                            << QByteArray(IDNA_ACE_PREFIX "MajiKoi5-783gue6qz075azm5e")
+                            << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
+
+    char16_t d16[] = { 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0 };
+    QTest::newRow("Japanese6") << 9 << char16array(d16)
+                            << QByteArray(IDNA_ACE_PREFIX "de-jg4avhby1noc0d")
+                            << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
+
+    char16_t d17[] = { 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067 };
+    QTest::newRow("Japanese7") << 7 << char16array(d17)
+                            << QByteArray(IDNA_ACE_PREFIX "d9juau41awczczp")
+                            << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
+
+    char16_t d18[] = { 0x03b5, 0x03bb, 0x03bb, 0x03b7, 0x03bd, 0x03b9, 0x03ba, 0x03ac };
+    QTest::newRow("Greek") << 8 << char16array(d18)
+                        << QByteArray(IDNA_ACE_PREFIX "hxargifdar")
+                        << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
+
+    char16_t d19[] = { 0x0062, 0x006f, 0x006e, 0x0121, 0x0075, 0x0073, 0x0061, 0x0127,
+                       0x0127, 0x0061 };
+    QTest::newRow("Maltese (Malti)") << 10 << char16array(d19)
+                                  << QByteArray(IDNA_ACE_PREFIX "bonusaa-5bb1da")
+                                  << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
+
+    char16_t d20[] = {0x043f, 0x043e, 0x0447, 0x0435, 0x043c, 0x0443, 0x0436, 0x0435,
+                      0x043e, 0x043d, 0x0438, 0x043d, 0x0435, 0x0433, 0x043e, 0x0432,
+                      0x043e, 0x0440, 0x044f, 0x0442, 0x043f, 0x043e, 0x0440, 0x0443,
+                      0x0441, 0x0441, 0x043a, 0x0438 };
+    QTest::newRow("Russian (Cyrillic)") << 28 << char16array(d20)
+                                     << QByteArray(IDNA_ACE_PREFIX "b1abfaaepdrnnbgefbadotcwatmq2g4l")
+                                     << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
+
+    char16_t d21[] = { 0xd800, 0xdef7 };
+    QTest::newRow("U+102F7") << 2 << char16array(d21) << QByteArray(IDNA_ACE_PREFIX "r97c");
+}
+#endif
+
+#ifdef QT_BUILD_INTERNAL
+void tst_QUrlInternal::idna_testsuite()
+{
+    QFETCH(int, numchars);
+    QFETCH(char16array, unicode);
+    QFETCH(QByteArray, punycode);
+
+    QString result;
+    qt_punycodeEncoder(QStringView{unicode.points, numchars}, &result);
+    QCOMPARE(result.toLatin1(), punycode);
+    QCOMPARE(qt_punycodeDecoder(result), QString::fromUtf16(unicode.points, numchars));
+}
+#endif
+
+void tst_QUrlInternal::ace_testsuite_data()
+{
+    QTest::addColumn<QString>("in");
+    QTest::addColumn<QString>("toace");
+    QTest::addColumn<QString>("fromace");
+    QTest::addColumn<QString>("unicode");
+
+    QTest::newRow("ascii-lower") << "fluke" << "fluke" << "fluke" << "fluke";
+    QTest::newRow("ascii-mixed") << "FLuke" << "fluke" << "fluke" << "fluke";
+    QTest::newRow("ascii-upper") << "FLUKE" << "fluke" << "fluke" << "fluke";
+
+    // U+FB01 LATIN SMALL LIGATURE FI
+    QTest::newRow("asciifolded") << u"\uFB01le"_s << "file" << "." << "file";
+    QTest::newRow("asciifolded-dotcom") << u"\uFB01le.example.com"_s << "file.example.com" << "." << "file.example.com";
+    QTest::newRow("greek-mu") << QString::fromLatin1("\265V")
+                              <<"xn--v-lmb"
+                              << "."
+                              << QString::fromUtf8("\316\274v");
+
+    QTest::newRow("non-ascii-lower") << QString::fromLatin1("alqualond\353")
+                                     << "xn--alqualond-34a"
+                                     << "."
+                                     << QString::fromLatin1("alqualond\353");
+    QTest::newRow("non-ascii-mixed") << QString::fromLatin1("Alqualond\353")
+                                     << "xn--alqualond-34a"
+                                     << "."
+                                     << QString::fromLatin1("alqualond\353");
+    QTest::newRow("non-ascii-upper") << QString::fromLatin1("ALQUALOND\313")
+                                     << "xn--alqualond-34a"
+                                     << "."
+                                     << QString::fromLatin1("alqualond\353");
+
+    QTest::newRow("idn-lower") << "xn--alqualond-34a" << "xn--alqualond-34a"
+                               << QString::fromLatin1("alqualond\353")
+                               << QString::fromLatin1("alqualond\353");
+    QTest::newRow("idn-mixed") << "Xn--alqualond-34a" << "xn--alqualond-34a"
+                               << QString::fromLatin1("alqualond\353")
+                               << QString::fromLatin1("alqualond\353");
+    QTest::newRow("idn-mixed2") << "XN--alqualond-34a" << "xn--alqualond-34a"
+                                << QString::fromLatin1("alqualond\353")
+                                << QString::fromLatin1("alqualond\353");
+    QTest::newRow("idn-mixed3") << "xn--ALQUALOND-34a" << "xn--alqualond-34a"
+                                << QString::fromLatin1("alqualond\353")
+                                << QString::fromLatin1("alqualond\353");
+    QTest::newRow("idn-mixed4") << "xn--alqualond-34A" << "xn--alqualond-34a"
+                                << QString::fromLatin1("alqualond\353")
+                                << QString::fromLatin1("alqualond\353");
+    QTest::newRow("idn-upper") << "XN--ALQUALOND-34A" << "xn--alqualond-34a"
+                               << QString::fromLatin1("alqualond\353")
+                               << QString::fromLatin1("alqualond\353");
+
+    QTest::newRow("separator-3002") << QString::fromUtf8("example\343\200\202com")
+                                    << "example.com" << "." << "example.com";
+
+    QString egyptianIDN =
+        QString::fromUtf8("\331\210\330\262\330\247\330\261\330\251\055\330\247\331\204\330"
+                          "\243\330\252\330\265\330\247\331\204\330\247\330\252.\331\205"
+                          "\330\265\330\261");
+    QTest::newRow("egyptian-tld-ace")
+        << "xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c"
+        << "xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c"
+        << "."
+        << egyptianIDN;
+    QTest::newRow("egyptian-tld-unicode")
+        << egyptianIDN
+        << "xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c"
+        << "."
+        << egyptianIDN;
+    QTest::newRow("egyptian-tld-mix1")
+        << QString::fromUtf8("\331\210\330\262\330\247\330\261\330\251\055\330\247\331\204\330"
+                             "\243\330\252\330\265\330\247\331\204\330\247\330\252.xn--wgbh1c")
+        << "xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c"
+        << "."
+        << egyptianIDN;
+    QTest::newRow("egyptian-tld-mix2")
+        << QString::fromUtf8("xn----rmckbbajlc6dj7bxne2c.\331\205\330\265\330\261")
+        << "xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c"
+        << "."
+        << egyptianIDN;
+
+    QString russianIDN = QString::fromUtf8("\321\217\320\275\320\264\320\265\320\272\321\201.\321\200\321\204");
+    QTest::newRow("russian-tld-ace")
+        << "xn--d1acpjx3f.xn--p1ai"
+        << "xn--d1acpjx3f.xn--p1ai"
+        << "."
+        << russianIDN;
+
+    QString taiwaneseIDN = QString::fromUtf8("\345\217\260\345\214\227\346\214\211\346\221\251.\345\217\260\347\201\243");
+    QTest::newRow("taiwanese-tld-ace")
+        << "xn--djrptm67aikb.xn--kpry57d"
+        << "xn--djrptm67aikb.xn--kpry57d"
+        << "."
+        << taiwaneseIDN;
+
+    // violations / invalids
+    auto badRow = [](const char *name, const char *text) {
+        QTest::newRow(name) << text << text << text << text;
+    };
+
+    badRow("invalid-punycode", "xn--z");
+
+    // U+00A0 NO-BREAK SPACE encodes to Punycode "6a"
+    // but it is prohibited and should have caused encoding failure
+    badRow("invalid-nameprep-prohibited", "xn--6a");
+
+    // U+00AD SOFT HYPHEN between "a" and "b" encodes to Punycode "ab-5da"
+    // but it should have been removed in the nameprep stage
+    badRow("invalid-nameprep-maptonothing", "xn-ab-5da");
+
+    // U+00C1 LATIN CAPITAL LETTER A WITH ACUTE encodes to Punycode "4ba"
+    // but it should have nameprepped to lowercase first
+    badRow("invalid-nameprep-uppercase", "xn--4ba");
+
+    // U+00B5 MICRO SIGN encodes to Punycode "sba"
+    // but is should have nameprepped to NFKC U+03BC GREEK SMALL LETTER MU
+    badRow("invalid-nameprep-nonnfkc", "xn--sba");
+
+    // Decodes to "a" in some versions, see QTBUG-95689
+    badRow("punycode-overflow-1", "xn--5p32g");
+    // Decodes to the same string as "xn--097c" in some versions, see QTBUG-95689
+    badRow("punycode-overflow-2", "xn--400595c");
+
+    // Encodes 2**32, decodes to empty string in some versions
+    badRow("punycode-overflow-3", "xn--l0902716a");
+}
+
+void tst_QUrlInternal::ace_testsuite()
+{
+    static const char canonsuffix[] = ".troll.no";
+    QFETCH(QString, in);
+    QFETCH(QString, toace);
+    QFETCH(QString, fromace);
+    QFETCH(QString, unicode);
+
+    const char *suffix = canonsuffix;
+    if (toace.contains('.'))
+        suffix = 0;
+
+    QString domain = in + suffix;
+    QCOMPARE(QString::fromLatin1(QUrl::toAce(domain)), toace + suffix);
+    if (fromace != ".")
+        QCOMPARE(QUrl::fromAce(domain.toLatin1()), fromace + suffix);
+    QCOMPARE(QUrl::fromAce(QUrl::toAce(domain)), unicode + suffix);
+
+    QUrl u;
+    u.setHost(domain);
+    QVERIFY(u.isValid());
+    QCOMPARE(u.host(), unicode + suffix);
+    QCOMPARE(u.host(QUrl::EncodeUnicode), toace + suffix);
+    QCOMPARE(u.toEncoded(), "//" + toace.toLatin1() + suffix);
+    QCOMPARE(u.toDisplayString(), "//" + unicode + suffix);
+
+    domain = in + (suffix ? ".troll.No" : "");
+    QCOMPARE(QString::fromLatin1(QUrl::toAce(domain)), toace + suffix);
+    if (fromace != ".")
+        QCOMPARE(QUrl::fromAce(domain.toLatin1()), fromace + suffix);
+    QCOMPARE(QUrl::fromAce(QUrl::toAce(domain)), unicode + suffix);
+
+    domain = in + (suffix ? ".troll.NO" : "");
+    QCOMPARE(QString::fromLatin1(QUrl::toAce(domain)), toace + suffix);
+    if (fromace != ".")
+        QCOMPARE(QUrl::fromAce(domain.toLatin1()), fromace + suffix);
+    QCOMPARE(QUrl::fromAce(QUrl::toAce(domain)), unicode + suffix);
+}
+
+void tst_QUrlInternal::std3violations_data()
+{
+    QTest::addColumn<QString>("source");
+    QTest::addColumn<bool>("validUrl");
+
+    QTest::newRow("too-long") << "this-domain-is-far-too-long-for-its-own-good-and-should-have-been-limited-to-63-chars" << false;
+    QTest::newRow("dash-begin") << "-x-foo" << false;
+    QTest::newRow("dash-end") << "x-foo-" << false;
+    QTest::newRow("dash-begin-end") << "-foo-" << false;
+
+    QTest::newRow("control") << "\033foo" << false;
+    QTest::newRow("bang") << "foo!" << false;
+    QTest::newRow("plus") << "foo+bar" << false;
+    QTest::newRow("slash") << "foo/bar" << true;
+    QTest::newRow("colon") << "foo:80" << true;
+    QTest::newRow("question") << "foo?bar" << true;
+    QTest::newRow("at") << "foo@bar" << true;
+    QTest::newRow("backslash") << "foo\\bar" << false;
+
+    // these characters are transformed by NFKC to non-LDH characters
+    QTest::newRow("dot-like") << QString::fromUtf8("foo\342\200\244bar") << false;  // U+2024 ONE DOT LEADER
+    QTest::newRow("slash-like") << QString::fromUtf8("foo\357\274\217bar") << false;    // U+FF0F FULLWIDTH SOLIDUS
+
+    // The following should be invalid but isn't
+    // the DIVISON SLASH doesn't case-fold to a slash
+    // is this a problem with RFC 3490?
+    //QTest::newRow("slash-like2") << QString::fromUtf8("foo\342\210\225bar") << false; // U+2215 DIVISION SLASH
+}
+
+void tst_QUrlInternal::std3violations()
+{
+    QFETCH(QString, source);
+
+    QUrl url;
+    url.setHost(source);
+    QVERIFY(url.host().isEmpty());
+
+    QFETCH(bool, validUrl);
+    if (validUrl)
+        return;  // test ends here for these cases
+
+    url = QUrl("http://" + source + "/some/path");
+    QVERIFY(!url.isValid());
+}
+
+void tst_QUrlInternal::std3deviations_data()
+{
+    QTest::addColumn<QString>("source");
+
+    QTest::newRow("ending-dot") << "example.com.";
+    QTest::newRow("ending-dot3002") << QString("example.com") + QChar(0x3002);
+    QTest::newRow("underline") << "foo_bar";  //QTBUG-7434
+}
+
+void tst_QUrlInternal::std3deviations()
+{
+    QFETCH(QString, source);
+    QVERIFY(!QUrl::toAce(source).isEmpty());
+
+    QUrl url;
+    url.setHost(source);
+    QVERIFY(!url.host().isEmpty());
+}
+
+void tst_QUrlInternal::correctEncodedMistakes_data()
+{
+    QTest::addColumn<QString>("input");
+    QTest::addColumn<QString>("expected");
+
+    QTest::newRow("empty") << "" << "";
+
+    // these contain one invalid percent
+    QTest::newRow("%") << QString("%") << QString("%25");
+    QTest::newRow("3%") << QString("3%") << QString("3%25");
+    QTest::newRow("13%") << QString("13%") << QString("13%25");
+    QTest::newRow("13%!") << QString("13%!") << QString("13%25!");
+    QTest::newRow("13%!!") << QString("13%!!") << QString("13%25!!");
+    QTest::newRow("13%a") << QString("13%a") << QString("13%25a");
+    QTest::newRow("13%az") << QString("13%az") << QString("13%25az");
+
+    // two invalid percents
+    QTest::newRow("13%%") << "13%%" << "13%25%25";
+    QTest::newRow("13%a%a") << "13%a%a" << "13%25a%25a";
+    QTest::newRow("13%az%az") << "13%az%az" << "13%25az%25az";
+
+    // these are correct (idempotent)
+    QTest::newRow("13%25") << QString("13%25")  << QString("13%25");
+    QTest::newRow("13%25%25") << QString("13%25%25")  << QString("13%25%25");
+
+    // these contain one invalid and one valid
+    // the code assumes they are all invalid
+    QTest::newRow("13%13..%") << "13%13..%" << "13%2513..%25";
+    QTest::newRow("13%..%13") << "13%..%13" << "13%25..%2513";
+
+    // three percents, one invalid
+    QTest::newRow("%01%02%3") << "%01%02%3" << "%2501%2502%253";
+
+    // now mix bad percents with Unicode decoding
+    QTest::newRow("%C2%") << "%C2%" << "%25C2%25";
+    QTest::newRow("%C2%A") << "%C2%A" << "%25C2%25A";
+    QTest::newRow("%C2%Az") << "%C2%Az" << "%25C2%25Az";
+    QTest::newRow("%E2%A0%") << "%E2%A0%" << "%25E2%25A0%25";
+    QTest::newRow("%E2%A0%A") << "%E2%A0%A" << "%25E2%25A0%25A";
+    QTest::newRow("%E2%A0%Az") << "%E2%A0%Az" << "%25E2%25A0%25Az";
+    QTest::newRow("%F2%A0%A0%") << "%F2%A0%A0%" << "%25F2%25A0%25A0%25";
+    QTest::newRow("%F2%A0%A0%A") << "%F2%A0%A0%A" << "%25F2%25A0%25A0%25A";
+    QTest::newRow("%F2%A0%A0%Az") << "%F2%A0%A0%Az" << "%25F2%25A0%25A0%25Az";
+}
+
+void tst_QUrlInternal::correctEncodedMistakes()
+{
+    QFETCH(QString, input);
+    QFETCH(QString, expected);
+
+    // prepend some data to be sure that it remains there
+    QString dataTag = QTest::currentDataTag();
+    QString output = dataTag;
+
+    if (!qt_urlRecode(output, input, { }))
+        output += input;
+    QCOMPARE(output, dataTag + expected);
+
+    // now try the full decode mode
+    output = dataTag;
+    QString expected2 = QUrl::fromPercentEncoding(expected.toLatin1());
+
+    if (!qt_urlRecode(output, input, QUrl::FullyDecoded))
+        output += input;
+    QCOMPARE(output, dataTag + expected2);
+}
+
+static void addUtf8Data(const char *name, const char *data)
+{
+    QString encoded = QByteArray(data).toPercentEncoding();
+    QString decoded = QString::fromUtf8(data);
+
+    // this data contains invaild UTF-8 sequences, so FullyDecoded doesn't work (by design)
+    // use PrettyDecoded instead
+    QTest::newRow(QByteArray("decode-") + name) << encoded << QUrl::ComponentFormattingOptions(QUrl::PrettyDecoded) << decoded;
+    QTest::newRow(QByteArray("encode-") + name) << decoded << QUrl::ComponentFormattingOptions(QUrl::FullyEncoded) << encoded;
+}
+
+void tst_QUrlInternal::encodingRecode_data()
+{
+    typedef QUrl::ComponentFormattingOptions F;
+    QTest::addColumn<QString>("input");
+    QTest::addColumn<F>("encodingMode");
+    QTest::addColumn<QString>("expected");
+
+    // -- idempotent tests --
+    static int modes[] = { QUrl::PrettyDecoded,
+                           QUrl::EncodeSpaces,
+                           QUrl::EncodeSpaces | QUrl::EncodeUnicode,
+                           QUrl::EncodeSpaces | QUrl::EncodeUnicode | QUrl::EncodeDelimiters,
+                           QUrl::EncodeSpaces | QUrl::EncodeUnicode | QUrl::EncodeDelimiters | QUrl::EncodeReserved,
+                           QUrl::EncodeSpaces | QUrl::EncodeUnicode | QUrl::EncodeDelimiters | QUrl::DecodeReserved,
+                           QUrl::EncodeSpaces | QUrl::EncodeUnicode | QUrl::EncodeReserved,
+                           QUrl::EncodeSpaces | QUrl::EncodeUnicode | QUrl::DecodeReserved,
+                           QUrl::EncodeSpaces | QUrl::EncodeDelimiters,
+                           QUrl::EncodeSpaces | QUrl::EncodeDelimiters | QUrl::EncodeReserved,
+                           QUrl::EncodeSpaces | QUrl::EncodeDelimiters | QUrl::DecodeReserved,
+                           QUrl::EncodeSpaces | QUrl::EncodeReserved,
+                           QUrl::EncodeSpaces | QUrl::DecodeReserved,
+
+                           QUrl::EncodeUnicode,
+                           QUrl::EncodeUnicode | QUrl::EncodeDelimiters,
+                           QUrl::EncodeUnicode | QUrl::EncodeDelimiters | QUrl::EncodeReserved,
+                           QUrl::EncodeUnicode | QUrl::EncodeDelimiters | QUrl::DecodeReserved,
+                           QUrl::EncodeUnicode | QUrl::EncodeReserved,
+
+                           QUrl::EncodeDelimiters,
+                           QUrl::EncodeDelimiters | QUrl::EncodeReserved,
+                           QUrl::EncodeDelimiters | QUrl::DecodeReserved,
+                           QUrl::EncodeReserved,
+                           QUrl::DecodeReserved };
+    for (uint i = 0; i < sizeof(modes)/sizeof(modes[0]); ++i) {
+        QByteArray code = QByteArray::number(modes[i], 16);
+        F mode = QUrl::ComponentFormattingOption(modes[i]);
+
+        QTest::newRow("null-0x" + code) << QString() << mode << QString();
+        QTest::newRow("empty-0x" + code) << "" << mode << "";
+
+        //    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
+        // Unreserved characters are never encoded
+        QTest::newRow("alpha-0x" + code) << "abcABCZZzz" << mode << "abcABCZZzz";
+        QTest::newRow("digits-0x" + code) << "01234567890" << mode << "01234567890";
+        QTest::newRow("otherunreserved-0x" + code) << "-._~" << mode << "-._~";
+
+        // Control characters are always encoded
+        // Use uppercase because the output is also uppercased
+        QTest::newRow("control-nul-0x" + code) << "%00" << mode << "%00";
+        QTest::newRow("control-0x" + code) << "%0D%0A%1F%1A%7F" << mode << "%0D%0A%1F%1A%7F";
+
+        // The percent is always encoded
+        QTest::newRow("percent-0x" + code) << "25%2525" << mode << "25%2525";
+
+        // mixed control and unreserved
+        QTest::newRow("control-unreserved-0x" + code) << "Foo%00Bar%0D%0Abksp%7F" << mode << "Foo%00Bar%0D%0Abksp%7F";
+    }
+
+    // however, control characters and the percent *are* decoded in FullyDecoded mode
+    // this is the only exception
+    QTest::newRow("control-nul-fullydecoded") << "%00" << F(QUrl::FullyDecoded) << QStringLiteral("\0");
+    QTest::newRow("control-fullydecoded") << "%0D%0A%1F%1A%7F" << F(QUrl::FullyDecoded) << "\r\n\x1f\x1a\x7f";
+    QTest::newRow("percent-fullydecoded") << "25%2525" << F(QUrl::FullyDecoded) << "25%25";
+    QTest::newRow("control-unreserved-fullydecoded") << "Foo%00Bar%0D%0Abksp%7F" << F(QUrl::FullyDecoded)
+                                                     << QStringLiteral("Foo\0Bar\r\nbksp\x7F");
+
+    //    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+    //    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
+    //                  / "*" / "+" / "," / ";" / "="
+    // in the default operation, delimiters don't get encoded or decoded
+    static const char delimiters[] =  ":/?#[]@" "!$&'()*+,;=";
+    for (const char *c = delimiters; *c; ++c) {
+        QByteArray code = QByteArray::number(*c, 16);
+        QString encoded = QString("abc%") + code.toUpper() + "def" ;
+        QString decoded = QString("abc") + *c + "def" ;
+        QTest::newRow("delimiter-encoded-" + code) << encoded << F(QUrl::FullyEncoded) << encoded;
+        QTest::newRow("delimiter-decoded-" + code) << decoded << F(QUrl::FullyEncoded) << decoded;
+    }
+
+    // encode control characters
+    QTest::newRow("encode-control") << "\1abc\2\033esc" << F(QUrl::PrettyDecoded) << "%01abc%02%1Besc";
+    QTest::newRow("encode-nul") << QString::fromLatin1("abc\0def", 7) << F(QUrl::PrettyDecoded) << "abc%00def";
+
+    // space
+    QTest::newRow("space-leave-decoded") << "Hello World " << F(QUrl::PrettyDecoded) << "Hello World ";
+    QTest::newRow("space-leave-encoded") << "Hello%20World%20" << F(QUrl::FullyEncoded) << "Hello%20World%20";
+    QTest::newRow("space-encode") << "Hello World " << F(QUrl::FullyEncoded) << "Hello%20World%20";
+    QTest::newRow("space-decode") << "Hello%20World%20" << F(QUrl::PrettyDecoded) << "Hello World ";
+
+    // decode unreserved
+    QTest::newRow("unreserved-decode") << "%66%6f%6f%42a%72" << F(QUrl::FullyEncoded) << "fooBar";
+
+    // mix encoding with decoding
+    QTest::newRow("encode-control-decode-space") << "\1\2%200" << F(QUrl::PrettyDecoded) << "%01%02 0";
+    QTest::newRow("decode-space-encode-control") << "%20\1\2" << F(QUrl::PrettyDecoded) << " %01%02";
+
+    // decode and encode valid UTF-8 data
+    // invalid is tested in encodingRecodeInvalidUtf8
+    addUtf8Data("utf8-2char-1", "\xC2\x80"); // U+0080
+    addUtf8Data("utf8-2char-2", "\xDF\xBF"); // U+07FF
+    addUtf8Data("utf8-3char-1", "\xE0\xA0\x80"); // U+0800
+    addUtf8Data("utf8-3char-2", "\xED\x9F\xBF"); // U+D7FF
+    addUtf8Data("utf8-3char-3", "\xEE\x80\x80"); // U+E000
+    addUtf8Data("utf8-3char-4", "\xEF\xBF\xBD"); // U+FFFD
+    addUtf8Data("utf8-4char-1", "\xF0\x90\x80\x80"); // U+10000
+    addUtf8Data("utf8-4char-2", "\xF4\x8F\xBF\xBD"); // U+10FFFD
+
+    // longer UTF-8 sequences, mixed with unreserved
+    addUtf8Data("utf8-string-1", "R\xc3\xa9sum\xc3\xa9");
+    addUtf8Data("utf8-string-2", "\xDF\xBF\xE0\xA0\x80""A");
+    addUtf8Data("utf8-string-3", "\xE0\xA0\x80\xDF\xBF...");
+
+    QTest::newRow("encode-unicode-noncharacter") << QString(QChar(0xffff)) << F(QUrl::FullyEncoded) << "%EF%BF%BF";
+    QTest::newRow("decode-unicode-noncharacter") << QString(QChar(0xffff)) << F(QUrl::PrettyDecoded) << QString::fromUtf8("\xEF\xBF\xBF");
+
+    // special cases: stuff we can encode, but not decode
+    QTest::newRow("unicode-lo-surrogate") << QString(QChar(0xD800)) << F(QUrl::FullyEncoded) << "%ED%A0%80";
+    QTest::newRow("unicode-hi-surrogate") << QString(QChar(0xDC00)) << F(QUrl::FullyEncoded) << "%ED%B0%80";
+
+    // a couple of Unicode strings with leading spaces
+    QTest::newRow("space-unicode") << QString::fromUtf8(" \xc2\xa0") << F(QUrl::FullyEncoded) << "%20%C2%A0";
+    QTest::newRow("space-space-unicode") << QString::fromUtf8("  \xc2\xa0") << F(QUrl::FullyEncoded) << "%20%20%C2%A0";
+    QTest::newRow("space-space-space-unicode") << QString::fromUtf8("   \xc2\xa0") << F(QUrl::FullyEncoded) << "%20%20%20%C2%A0";
+
+    // hex case testing
+    QTest::newRow("FF") << "%FF" << F(QUrl::FullyEncoded) << "%FF";
+    QTest::newRow("Ff") << "%Ff" << F(QUrl::FullyEncoded) << "%FF";
+    QTest::newRow("fF") << "%fF" << F(QUrl::FullyEncoded) << "%FF";
+    QTest::newRow("ff") << "%ff" << F(QUrl::FullyEncoded) << "%FF";
+
+    // decode UTF-8 mixed with non-UTF-8 and unreserved
+    QTest::newRow("utf8-mix-1") << "%80%C2%80" << F(QUrl::PrettyDecoded) << QString::fromUtf8("%80\xC2\x80");
+    QTest::newRow("utf8-mix-2") << "%C2%C2%80" << F(QUrl::PrettyDecoded) << QString::fromUtf8("%C2\xC2\x80");
+    QTest::newRow("utf8-mix-3") << "%E0%C2%80" << F(QUrl::PrettyDecoded) << QString::fromUtf8("%E0\xC2\x80");
+    QTest::newRow("utf8-mix-3") << "A%C2%80" << F(QUrl::PrettyDecoded) << QString::fromUtf8("A\xC2\x80");
+    QTest::newRow("utf8-mix-3") << "%C2%80A" << F(QUrl::PrettyDecoded) << QString::fromUtf8("\xC2\x80""A");
+}
+
+void tst_QUrlInternal::encodingRecode()
+{
+    QFETCH(QString, input);
+    QFETCH(QString, expected);
+    QFETCH(QUrl::ComponentFormattingOptions, encodingMode);
+
+    // prepend some data to be sure that it remains there
+    QString output = QTest::currentDataTag();
+    expected.prepend(output);
+
+    if (!qt_urlRecode(output, input, encodingMode))
+        output += input;
+    QCOMPARE(output, expected);
+}
+
+void tst_QUrlInternal::encodingRecodeInvalidUtf8_data()
+{
+    QTest::addColumn<QByteArray>("utf8");
+    QTest::addColumn<QString>("utf16");
+
+    extern void loadInvalidUtf8Rows();
+    extern void loadNonCharactersRows();
+    loadInvalidUtf8Rows();
+    loadNonCharactersRows();
+
+    QTest::newRow("utf8-mix-4") << QByteArray("\xE0.A2\x80");
+    QTest::newRow("utf8-mix-5") << QByteArray("\xE0\xA2.80");
+    QTest::newRow("utf8-mix-6") << QByteArray("\xE0\xA2\x33");
+}
+
+void tst_QUrlInternal::encodingRecodeInvalidUtf8()
+{
+    QFETCH(QByteArray, utf8);
+    QString input = utf8.toPercentEncoding();
+
+    // prepend some data to be sure that it remains there
+    QString output = QTest::currentDataTag();
+
+    if (!qt_urlRecode(output, input, QUrl::PrettyDecoded))
+        output += input;
+    QCOMPARE(output, QTest::currentDataTag() + input);
+
+    // this is just control
+    output = QTest::currentDataTag();
+    if (!qt_urlRecode(output, input, QUrl::FullyEncoded))
+        output += input;
+    QCOMPARE(output, QTest::currentDataTag() + input);
+
+    // verify for security reasons that all bad UTF-8 data got replaced by QChar::ReplacementCharacter
+    output = QTest::currentDataTag();
+    if (!qt_urlRecode(output, input, QUrl::FullyEncoded))
+        output += input;
+    for (int i = int(strlen(QTest::currentDataTag())); i < output.size(); ++i) {
+        QVERIFY2(output.at(i).unicode() < 0x80 || output.at(i) == QChar::ReplacementCharacter,
+                 qPrintable(QString("Character at i == %1 was U+%2").arg(i).arg(output.at(i).unicode(), 4, 16, QLatin1Char('0'))));
+    }
+}
+
+QTEST_APPLESS_MAIN(tst_QUrlInternal)
--- a/tests/auto/corelib/io/qurlinternal/utf8data.cpp
+++ b/tests/auto/corelib/io/qurlinternal/utf8data.cpp
@ -0,0 +1,135 @@
+// Copyright (C) 2018 The Qt Company Ltd.
+// Copyright (C) 2018 Intel Corporation.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+#include <QTest>
+
+void loadInvalidUtf8Rows()
+{
+    // Wrong continuations
+    QTest::newRow("bad-continuation-1char") << QByteArray("\x80");
+    QTest::newRow("bad-continuation-2chars-1") << QByteArray("\xC2\xC0");
+    QTest::newRow("bad-continuation-2chars-2") << QByteArray("\xC3\xDF");
+    QTest::newRow("bad-continuation-2chars-3") << QByteArray("\xC7\xF0");
+    QTest::newRow("bad-continuation-3chars-1") << QByteArray("\xE0\xA0\xC0");
+    QTest::newRow("bad-continuation-3chars-2") << QByteArray("\xE0\xC0\xA0");
+    QTest::newRow("bad-continuation-4chars-1") << QByteArray("\xF0\x90\x80\xC0");
+    QTest::newRow("bad-continuation-4chars-2") << QByteArray("\xF0\x90\xC0\x80");
+    QTest::newRow("bad-continuation-4chars-3") << QByteArray("\xF0\xC0\x80\x80");
+
+    // Too short
+    QTest::newRow("too-short-2chars") << QByteArray("\xC2");
+    QTest::newRow("too-short-3chars-1") << QByteArray("\xE0");
+    QTest::newRow("too-short-3chars-2") << QByteArray("\xE0\xA0");
+    QTest::newRow("too-short-4chars-1") << QByteArray("\xF0");
+    QTest::newRow("too-short-4chars-2") << QByteArray("\xF0\x90");
+    QTest::newRow("too-short-4chars-3") << QByteArray("\xF0\x90\x80");
+
+    // Surrogate pairs must now be present either
+    // U+D800:        1101   10 0000   00 0000
+    // encoding: xxxz:1101 xz10:0000 xz00:0000
+    QTest::newRow("hi-surrogate") << QByteArray("\xED\xA0\x80");
+    // U+DC00:        1101   11 0000   00 0000
+    // encoding: xxxz:1101 xz11:0000 xz00:0000
+    QTest::newRow("lo-surrogate") << QByteArray("\xED\xB0\x80");
+
+    // not even in pair:
+    QTest::newRow("surrogate-pair") << QByteArray("\xED\xA0\x80\xED\xB0\x80");
+
+    // Characters outside the Unicode range:
+    // 0x110000:   00 0100   01 0000   00 0000   00 0000
+    // encoding: xxxx:z100 xz01:0000 xz00:0000 xz00:0000
+    QTest::newRow("non-unicode-1") << QByteArray("\xF4\x90\x80\x80");
+    // 0x200000:             00 1000   00 0000   00 0000   00 0000
+    // encoding: xxxx:xz00 xz00:1000 xz00:0000 xz00:0000 xz00:0000
+    QTest::newRow("non-unicode-2") << QByteArray("\xF8\x88\x80\x80\x80");
+    // 0x04000000:              0100   00 0000   00 0000   00 0000   00 0000
+    // encoding: xxxx:xxz0 xz00:0100 xz00:0000 xz00:0000 xz00:0001 xz00:0001
+    QTest::newRow("non-unicode-3") << QByteArray("\xFC\x84\x80\x80\x80\x80");
+    // 0x7fffffff:       1   11 1111   11 1111   11 1111   11 1111   11 1111
+    // encoding: xxxx:xxz0 xz00:0100 xz00:0000 xz00:0000 xz00:0001 xz00:0001
+    QTest::newRow("non-unicode-4") << QByteArray("\xFD\xBF\xBF\xBF\xBF\xBF");
+
+    // As seen above, 0xFE and 0xFF never appear:
+    QTest::newRow("fe") << QByteArray("\xFE");
+    QTest::newRow("fe-bis") << QByteArray("\xFE\xBF\xBF\xBF\xBF\xBF\xBF");
+    QTest::newRow("ff") << QByteArray("\xFF");
+    QTest::newRow("ff-bis") << QByteArray("\xFF\xBF\xBF\xBF\xBF\xBF\xBF\xBF");
+
+    // some combinations in UTF-8 are invalid even though they have the proper bits set
+    // these are known as overlong sequences
+
+    // "A": U+0041:                                               01   00 0001
+    // overlong 2:                                         xxz0:0001 xz00:0001
+    QTest::newRow("overlong-1-2") << QByteArray("\xC1\x81");
+    // overlong 3:                               xxxz:0000 xz00:0001 xz00:0001
+    QTest::newRow("overlong-1-3") << QByteArray("\xE0\x81\x81");
+    // overlong 4:                     xxxx:z000 xz00:0000 xz00:0001 xz00:0001
+    QTest::newRow("overlong-1-4") << QByteArray("\xF0\x80\x81\x81");
+    // overlong 5:           xxxx:xz00 xz00:0000 xz00:0000 xz00:0001 xz00:0001
+    QTest::newRow("overlong-1-5") << QByteArray("\xF8\x80\x80\x81\x81");
+    // overlong 6: xxxx:xxz0 xz00:0000 xz00:0000 xz00:0000 xz00:0001 xz00:0001
+    QTest::newRow("overlong-1-6") << QByteArray("\xFC\x80\x80\x80\x81\x81");
+
+    // U+0080:                                                    10   00 0000
+    // proper encoding:                                    xxz0:0010 xz00:0000
+    // overlong 3:                               xxxz:0000 xz00:0010 xz00:0000
+    QTest::newRow("overlong-2-3") << QByteArray("\xE0\x82\x80");
+    // overlong 4:                     xxxx:z000 xz00:0000 xz00:0010 xz00:0000
+    QTest::newRow("overlong-2-4") << QByteArray("\xF0\x80\x82\x80");
+    // overlong 5:           xxxx:xz00 xz00:0000 xz00:0000 xz00:0010 xz00:0000
+    QTest::newRow("overlong-2-5") << QByteArray("\xF8\x80\x80\x82\x80");
+    // overlong 6: xxxx:xxz0 xz00:0000 xz00:0000 xz00:0000 xz00:0010 xz00:0000
+    QTest::newRow("overlong-2-6") << QByteArray("\xFC\x80\x80\x80\x82\x80");
+
+    // U+0800:                                               10 0000   00 0000
+    // proper encoding:                          xxxz:0000 xz10:0000 xz00:0000
+    // overlong 4:                     xxxx:z000 xz00:0000 xz10:0000 xz00:0000
+    QTest::newRow("overlong-3-4") << QByteArray("\xF0\x80\xA0\x80");
+    // overlong 5:           xxxx:xz00 xz00:0000 xz00:0000 xz10:0000 xz00:0000
+    QTest::newRow("overlong-3-5") << QByteArray("\xF8\x80\x80\xA0\x80");
+    // overlong 6: xxxx:xxz0 xz00:0000 xz00:0000 xz00:0000 xz10:0000 xz00:0000
+    QTest::newRow("overlong-3-6") << QByteArray("\xFC\x80\x80\x80\xA0\x80");
+
+    // U+010000:                                   00 0100   00 0000   00 0000
+    // proper encoding:                xxxx:z000 xz00:0100 xz00:0000 xz00:0000
+    // overlong 5:           xxxx:xz00 xz00:0000 xz00:0100 xz00:0000 xz00:0000
+    QTest::newRow("overlong-4-5") << QByteArray("\xF8\x80\x84\x80\x80");
+    // overlong 6: xxxx:xxz0 xz00:0000 xz00:0000 xz00:0100 xz00:0000 xz00:0000
+    QTest::newRow("overlong-4-6") << QByteArray("\xFC\x80\x80\x84\x80\x80");
+
+}
+
+void loadNonCharactersRows()
+{
+    // Unicode has a couple of "non-characters" that one can use internally
+    // These characters are allowed for text-interchange (see http://www.unicode.org/versions/corrigendum9.html)
+    //
+    // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
+    // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
+    // U+FDEF (inclusive)
+
+    // U+FDD0 through U+FDEF
+    for (int i = 0; i < 16; ++i) {
+        char utf8[] = { char(0357), char(0267), char(0220 + i), 0 };
+        QString utf16 = QChar(0xfdd0 + i);
+        QTest::newRow(qPrintable(QString::number(0xfdd0 + i, 16))) << QByteArray(utf8) << utf16;
+    }
+
+    // the last two in Planes 1 through 16
+    for (uint plane = 1; plane <= 16; ++plane) {
+        for (uint lower = 0xfffe; lower < 0x10000; ++lower) {
+            uint ucs4 = (plane << 16) | lower;
+            char utf8[] = { char(0xf0 | uchar(ucs4 >> 18)),
+                            char(0x80 | (uchar(ucs4 >> 12) & 0x3f)),
+                            char(0x80 | (uchar(ucs4 >> 6) & 0x3f)),
+                            char(0x80 | (uchar(ucs4) & 0x3f)),
+                            0 };
+            const auto utf16 = QChar::fromUcs4(ucs4);
+
+            QTest::newRow(qPrintable(QString::number(ucs4, 16))) << QByteArray(utf8) << QStringView{utf16}.toString();
+        }
+    }
+
+    QTest::newRow("fffe") << QByteArray("\xEF\xBF\xBE") << QString(QChar(0xfffe));
+    QTest::newRow("ffff") << QByteArray("\xEF\xBF\xBF") << QString(QChar(0xffff));
+}