Remove string combining character list. (#616)

It was possible to access it from the WordBreakProperty list
This commit is contained in:
Arthur Sonzogni 2023-04-10 17:00:02 +02:00 committed by GitHub
parent 4bdbf6352d
commit 1397c10234
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 115 deletions

View File

@ -48,6 +48,7 @@ enum class WordBreakProperty {
WSegSpace, WSegSpace,
ZWJ, ZWJ,
}; };
WordBreakProperty CodepointToWordBreakProperty(uint32_t codepoint);
std::vector<WordBreakProperty> Utf8ToWordBreakProperty( std::vector<WordBreakProperty> Utf8ToWordBreakProperty(
const std::string& input); const std::string& input);

View File

@ -21,120 +21,6 @@ struct Interval {
uint32_t last; uint32_t last;
}; };
// Sorted list of non-overlapping intervals of non-spacing characters
// generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c"
// As of Unicode 13.0.0
const std::array<Interval, 324> g_combining_characters = {{
{0x00300, 0x0036f}, {0x00483, 0x00489}, {0x00591, 0x005bd},
{0x005bf, 0x005bf}, {0x005c1, 0x005c2}, {0x005c4, 0x005c5},
{0x005c7, 0x005c7}, {0x00610, 0x0061a}, {0x0064b, 0x0065f},
{0x00670, 0x00670}, {0x006d6, 0x006dc}, {0x006df, 0x006e4},
{0x006e7, 0x006e8}, {0x006ea, 0x006ed}, {0x00711, 0x00711},
{0x00730, 0x0074a}, {0x007a6, 0x007b0}, {0x007eb, 0x007f3},
{0x007fd, 0x007fd}, {0x00816, 0x00819}, {0x0081b, 0x00823},
{0x00825, 0x00827}, {0x00829, 0x0082d}, {0x00859, 0x0085b},
{0x008d3, 0x008e1}, {0x008e3, 0x00902}, {0x0093a, 0x0093a},
{0x0093c, 0x0093c}, {0x00941, 0x00948}, {0x0094d, 0x0094d},
{0x00951, 0x00957}, {0x00962, 0x00963}, {0x00981, 0x00981},
{0x009bc, 0x009bc}, {0x009c1, 0x009c4}, {0x009cd, 0x009cd},
{0x009e2, 0x009e3}, {0x009fe, 0x009fe}, {0x00a01, 0x00a02},
{0x00a3c, 0x00a3c}, {0x00a41, 0x00a42}, {0x00a47, 0x00a48},
{0x00a4b, 0x00a4d}, {0x00a51, 0x00a51}, {0x00a70, 0x00a71},
{0x00a75, 0x00a75}, {0x00a81, 0x00a82}, {0x00abc, 0x00abc},
{0x00ac1, 0x00ac5}, {0x00ac7, 0x00ac8}, {0x00acd, 0x00acd},
{0x00ae2, 0x00ae3}, {0x00afa, 0x00aff}, {0x00b01, 0x00b01},
{0x00b3c, 0x00b3c}, {0x00b3f, 0x00b3f}, {0x00b41, 0x00b44},
{0x00b4d, 0x00b4d}, {0x00b55, 0x00b56}, {0x00b62, 0x00b63},
{0x00b82, 0x00b82}, {0x00bc0, 0x00bc0}, {0x00bcd, 0x00bcd},
{0x00c00, 0x00c00}, {0x00c04, 0x00c04}, {0x00c3e, 0x00c40},
{0x00c46, 0x00c48}, {0x00c4a, 0x00c4d}, {0x00c55, 0x00c56},
{0x00c62, 0x00c63}, {0x00c81, 0x00c81}, {0x00cbc, 0x00cbc},
{0x00cbf, 0x00cbf}, {0x00cc6, 0x00cc6}, {0x00ccc, 0x00ccd},
{0x00ce2, 0x00ce3}, {0x00d00, 0x00d01}, {0x00d3b, 0x00d3c},
{0x00d41, 0x00d44}, {0x00d4d, 0x00d4d}, {0x00d62, 0x00d63},
{0x00d81, 0x00d81}, {0x00dca, 0x00dca}, {0x00dd2, 0x00dd4},
{0x00dd6, 0x00dd6}, {0x00e31, 0x00e31}, {0x00e34, 0x00e3a},
{0x00e47, 0x00e4e}, {0x00eb1, 0x00eb1}, {0x00eb4, 0x00ebc},
{0x00ec8, 0x00ecd}, {0x00f18, 0x00f19}, {0x00f35, 0x00f35},
{0x00f37, 0x00f37}, {0x00f39, 0x00f39}, {0x00f71, 0x00f7e},
{0x00f80, 0x00f84}, {0x00f86, 0x00f87}, {0x00f8d, 0x00f97},
{0x00f99, 0x00fbc}, {0x00fc6, 0x00fc6}, {0x0102d, 0x01030},
{0x01032, 0x01037}, {0x01039, 0x0103a}, {0x0103d, 0x0103e},
{0x01058, 0x01059}, {0x0105e, 0x01060}, {0x01071, 0x01074},
{0x01082, 0x01082}, {0x01085, 0x01086}, {0x0108d, 0x0108d},
{0x0109d, 0x0109d}, {0x0135d, 0x0135f}, {0x01712, 0x01714},
{0x01732, 0x01734}, {0x01752, 0x01753}, {0x01772, 0x01773},
{0x017b4, 0x017b5}, {0x017b7, 0x017bd}, {0x017c6, 0x017c6},
{0x017c9, 0x017d3}, {0x017dd, 0x017dd}, {0x0180b, 0x0180d},
{0x01885, 0x01886}, {0x018a9, 0x018a9}, {0x01920, 0x01922},
{0x01927, 0x01928}, {0x01932, 0x01932}, {0x01939, 0x0193b},
{0x01a17, 0x01a18}, {0x01a1b, 0x01a1b}, {0x01a56, 0x01a56},
{0x01a58, 0x01a5e}, {0x01a60, 0x01a60}, {0x01a62, 0x01a62},
{0x01a65, 0x01a6c}, {0x01a73, 0x01a7c}, {0x01a7f, 0x01a7f},
{0x01ab0, 0x01ac0}, {0x01b00, 0x01b03}, {0x01b34, 0x01b34},
{0x01b36, 0x01b3a}, {0x01b3c, 0x01b3c}, {0x01b42, 0x01b42},
{0x01b6b, 0x01b73}, {0x01b80, 0x01b81}, {0x01ba2, 0x01ba5},
{0x01ba8, 0x01ba9}, {0x01bab, 0x01bad}, {0x01be6, 0x01be6},
{0x01be8, 0x01be9}, {0x01bed, 0x01bed}, {0x01bef, 0x01bf1},
{0x01c2c, 0x01c33}, {0x01c36, 0x01c37}, {0x01cd0, 0x01cd2},
{0x01cd4, 0x01ce0}, {0x01ce2, 0x01ce8}, {0x01ced, 0x01ced},
{0x01cf4, 0x01cf4}, {0x01cf8, 0x01cf9}, {0x01dc0, 0x01df9},
{0x01dfb, 0x01dff}, {0x020d0, 0x020f0}, {0x02cef, 0x02cf1},
{0x02d7f, 0x02d7f}, {0x02de0, 0x02dff}, {0x0302a, 0x0302d},
{0x03099, 0x0309a}, {0x0a66f, 0x0a672}, {0x0a674, 0x0a67d},
{0x0a69e, 0x0a69f}, {0x0a6f0, 0x0a6f1}, {0x0a802, 0x0a802},
{0x0a806, 0x0a806}, {0x0a80b, 0x0a80b}, {0x0a825, 0x0a826},
{0x0a82c, 0x0a82c}, {0x0a8c4, 0x0a8c5}, {0x0a8e0, 0x0a8f1},
{0x0a8ff, 0x0a8ff}, {0x0a926, 0x0a92d}, {0x0a947, 0x0a951},
{0x0a980, 0x0a982}, {0x0a9b3, 0x0a9b3}, {0x0a9b6, 0x0a9b9},
{0x0a9bc, 0x0a9bd}, {0x0a9e5, 0x0a9e5}, {0x0aa29, 0x0aa2e},
{0x0aa31, 0x0aa32}, {0x0aa35, 0x0aa36}, {0x0aa43, 0x0aa43},
{0x0aa4c, 0x0aa4c}, {0x0aa7c, 0x0aa7c}, {0x0aab0, 0x0aab0},
{0x0aab2, 0x0aab4}, {0x0aab7, 0x0aab8}, {0x0aabe, 0x0aabf},
{0x0aac1, 0x0aac1}, {0x0aaec, 0x0aaed}, {0x0aaf6, 0x0aaf6},
{0x0abe5, 0x0abe5}, {0x0abe8, 0x0abe8}, {0x0abed, 0x0abed},
{0x0fb1e, 0x0fb1e}, {0x0fe00, 0x0fe0f}, {0x0fe20, 0x0fe2f},
{0x101fd, 0x101fd}, {0x102e0, 0x102e0}, {0x10376, 0x1037a},
{0x10a01, 0x10a03}, {0x10a05, 0x10a06}, {0x10a0c, 0x10a0f},
{0x10a38, 0x10a3a}, {0x10a3f, 0x10a3f}, {0x10ae5, 0x10ae6},
{0x10d24, 0x10d27}, {0x10eab, 0x10eac}, {0x10f46, 0x10f50},
{0x11001, 0x11001}, {0x11038, 0x11046}, {0x1107f, 0x11081},
{0x110b3, 0x110b6}, {0x110b9, 0x110ba}, {0x11100, 0x11102},
{0x11127, 0x1112b}, {0x1112d, 0x11134}, {0x11173, 0x11173},
{0x11180, 0x11181}, {0x111b6, 0x111be}, {0x111c9, 0x111cc},
{0x111cf, 0x111cf}, {0x1122f, 0x11231}, {0x11234, 0x11234},
{0x11236, 0x11237}, {0x1123e, 0x1123e}, {0x112df, 0x112df},
{0x112e3, 0x112ea}, {0x11300, 0x11301}, {0x1133b, 0x1133c},
{0x11340, 0x11340}, {0x11366, 0x1136c}, {0x11370, 0x11374},
{0x11438, 0x1143f}, {0x11442, 0x11444}, {0x11446, 0x11446},
{0x1145e, 0x1145e}, {0x114b3, 0x114b8}, {0x114ba, 0x114ba},
{0x114bf, 0x114c0}, {0x114c2, 0x114c3}, {0x115b2, 0x115b5},
{0x115bc, 0x115bd}, {0x115bf, 0x115c0}, {0x115dc, 0x115dd},
{0x11633, 0x1163a}, {0x1163d, 0x1163d}, {0x1163f, 0x11640},
{0x116ab, 0x116ab}, {0x116ad, 0x116ad}, {0x116b0, 0x116b5},
{0x116b7, 0x116b7}, {0x1171d, 0x1171f}, {0x11722, 0x11725},
{0x11727, 0x1172b}, {0x1182f, 0x11837}, {0x11839, 0x1183a},
{0x1193b, 0x1193c}, {0x1193e, 0x1193e}, {0x11943, 0x11943},
{0x119d4, 0x119d7}, {0x119da, 0x119db}, {0x119e0, 0x119e0},
{0x11a01, 0x11a0a}, {0x11a33, 0x11a38}, {0x11a3b, 0x11a3e},
{0x11a47, 0x11a47}, {0x11a51, 0x11a56}, {0x11a59, 0x11a5b},
{0x11a8a, 0x11a96}, {0x11a98, 0x11a99}, {0x11c30, 0x11c36},
{0x11c38, 0x11c3d}, {0x11c3f, 0x11c3f}, {0x11c92, 0x11ca7},
{0x11caa, 0x11cb0}, {0x11cb2, 0x11cb3}, {0x11cb5, 0x11cb6},
{0x11d31, 0x11d36}, {0x11d3a, 0x11d3a}, {0x11d3c, 0x11d3d},
{0x11d3f, 0x11d45}, {0x11d47, 0x11d47}, {0x11d90, 0x11d91},
{0x11d95, 0x11d95}, {0x11d97, 0x11d97}, {0x11ef3, 0x11ef4},
{0x16af0, 0x16af4}, {0x16b30, 0x16b36}, {0x16f4f, 0x16f4f},
{0x16f8f, 0x16f92}, {0x16fe4, 0x16fe4}, {0x1bc9d, 0x1bc9e},
{0x1d167, 0x1d169}, {0x1d17b, 0x1d182}, {0x1d185, 0x1d18b},
{0x1d1aa, 0x1d1ad}, {0x1d242, 0x1d244}, {0x1da00, 0x1da36},
{0x1da3b, 0x1da6c}, {0x1da75, 0x1da75}, {0x1da84, 0x1da84},
{0x1da9b, 0x1da9f}, {0x1daa1, 0x1daaf}, {0x1e000, 0x1e006},
{0x1e008, 0x1e018}, {0x1e01b, 0x1e021}, {0x1e023, 0x1e024},
{0x1e026, 0x1e02a}, {0x1e130, 0x1e136}, {0x1e2ec, 0x1e2ef},
{0x1e8d0, 0x1e8d6}, {0x1e944, 0x1e94a}, {0xe0100, 0xe01ef},
}};
// As of Unicode 13.0.0 // As of Unicode 13.0.0
const std::array<Interval, 116> g_full_width_characters = {{ const std::array<Interval, 116> g_full_width_characters = {{
{0x01100, 0x0115f}, {0x0231a, 0x0231b}, {0x02329, 0x0232a}, {0x01100, 0x0115f}, {0x0231a, 0x0231b}, {0x02329, 0x0232a},
@ -1526,7 +1412,7 @@ bool Bisearch(uint32_t ucs, const std::array<C, N> table, C* out) {
} }
bool IsCombining(uint32_t ucs) { bool IsCombining(uint32_t ucs) {
return Bisearch(ucs, g_combining_characters); return ftxui::CodepointToWordBreakProperty(ucs) == WBP::Extend;
} }
bool IsFullWidth(uint32_t ucs) { bool IsFullWidth(uint32_t ucs) {
@ -1680,6 +1566,13 @@ bool EatCodePoint(const std::wstring& input,
} // namespace } // namespace
namespace ftxui { namespace ftxui {
WordBreakProperty CodepointToWordBreakProperty(uint32_t codepoint) {
WordBreakPropertyInterval interval = {0, 0, WBP::ALetter};
std::ignore = Bisearch(codepoint, g_word_break_intervals, &interval);
return interval.property;
}
int wchar_width(wchar_t ucs) { int wchar_width(wchar_t ucs) {
return codepoint_width(uint32_t(ucs)); return codepoint_width(uint32_t(ucs));
} }