Modified: trunk/Source/WebCore/platform/text/LocaleToScriptMapping.cpp (276224 => 276225)
--- trunk/Source/WebCore/platform/text/LocaleToScriptMapping.cpp 2021-04-18 07:14:07 UTC (rev 276224)
+++ trunk/Source/WebCore/platform/text/LocaleToScriptMapping.cpp 2021-04-18 07:43:33 UTC (rev 276225)
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2011 Google Inc. All rights reserved.
+ * Copyright (C) 2021 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -37,368 +38,460 @@
namespace WebCore {
-struct ScriptNameCode {
- ASCIILiteral name;
- UScriptCode code;
-};
+template<typename StorageInteger>
+class PackedASCIILowerCodes {
+public:
+ static_assert(std::is_unsigned_v<StorageInteger>);
-// This generally maps an ISO 15924 script code to its UScriptCode, but certain families of script codes are
-// treated as a single script for assigning a per-script font in Settings. For example, "hira" is mapped to
-// USCRIPT_KATAKANA_OR_HIRAGANA instead of USCRIPT_HIRAGANA, since we want all Japanese scripts to be rendered
-// using the same font setting.
-static const ScriptNameCode scriptNameCodeList[] = {
- { "zyyy"_s, USCRIPT_COMMON },
- { "qaai"_s, USCRIPT_INHERITED },
- { "arab"_s, USCRIPT_ARABIC },
- { "armn"_s, USCRIPT_ARMENIAN },
- { "beng"_s, USCRIPT_BENGALI },
- { "bopo"_s, USCRIPT_BOPOMOFO },
- { "cher"_s, USCRIPT_CHEROKEE },
- { "copt"_s, USCRIPT_COPTIC },
- { "cyrl"_s, USCRIPT_CYRILLIC },
- { "dsrt"_s, USCRIPT_DESERET },
- { "deva"_s, USCRIPT_DEVANAGARI },
- { "ethi"_s, USCRIPT_ETHIOPIC },
- { "geor"_s, USCRIPT_GEORGIAN },
- { "goth"_s, USCRIPT_GOTHIC },
- { "grek"_s, USCRIPT_GREEK },
- { "gujr"_s, USCRIPT_GUJARATI },
- { "guru"_s, USCRIPT_GURMUKHI },
- { "hani"_s, USCRIPT_HAN },
- { "hang"_s, USCRIPT_HANGUL },
- { "hebr"_s, USCRIPT_HEBREW },
- { "hira"_s, USCRIPT_KATAKANA_OR_HIRAGANA },
- { "knda"_s, USCRIPT_KANNADA },
- { "kana"_s, USCRIPT_KATAKANA_OR_HIRAGANA },
- { "khmr"_s, USCRIPT_KHMER },
- { "laoo"_s, USCRIPT_LAO },
- { "latn"_s, USCRIPT_LATIN },
- { "mlym"_s, USCRIPT_MALAYALAM },
- { "mong"_s, USCRIPT_MONGOLIAN },
- { "mymr"_s, USCRIPT_MYANMAR },
- { "ogam"_s, USCRIPT_OGHAM },
- { "ital"_s, USCRIPT_OLD_ITALIC },
- { "orya"_s, USCRIPT_ORIYA },
- { "runr"_s, USCRIPT_RUNIC },
- { "sinh"_s, USCRIPT_SINHALA },
- { "syrc"_s, USCRIPT_SYRIAC },
- { "taml"_s, USCRIPT_TAMIL },
- { "telu"_s, USCRIPT_TELUGU },
- { "thaa"_s, USCRIPT_THAANA },
- { "thai"_s, USCRIPT_THAI },
- { "tibt"_s, USCRIPT_TIBETAN },
- { "cans"_s, USCRIPT_CANADIAN_ABORIGINAL },
- { "yiii"_s, USCRIPT_YI },
- { "tglg"_s, USCRIPT_TAGALOG },
- { "hano"_s, USCRIPT_HANUNOO },
- { "buhd"_s, USCRIPT_BUHID },
- { "tagb"_s, USCRIPT_TAGBANWA },
- { "brai"_s, USCRIPT_BRAILLE },
- { "cprt"_s, USCRIPT_CYPRIOT },
- { "limb"_s, USCRIPT_LIMBU },
- { "linb"_s, USCRIPT_LINEAR_B },
- { "osma"_s, USCRIPT_OSMANYA },
- { "shaw"_s, USCRIPT_SHAVIAN },
- { "tale"_s, USCRIPT_TAI_LE },
- { "ugar"_s, USCRIPT_UGARITIC },
- { "hrkt"_s, USCRIPT_KATAKANA_OR_HIRAGANA },
- { "bugi"_s, USCRIPT_BUGINESE },
- { "glag"_s, USCRIPT_GLAGOLITIC },
- { "khar"_s, USCRIPT_KHAROSHTHI },
- { "sylo"_s, USCRIPT_SYLOTI_NAGRI },
- { "talu"_s, USCRIPT_NEW_TAI_LUE },
- { "tfng"_s, USCRIPT_TIFINAGH },
- { "xpeo"_s, USCRIPT_OLD_PERSIAN },
- { "bali"_s, USCRIPT_BALINESE },
- { "batk"_s, USCRIPT_BATAK },
- { "blis"_s, USCRIPT_BLISSYMBOLS },
- { "brah"_s, USCRIPT_BRAHMI },
- { "cham"_s, USCRIPT_CHAM },
- { "cirt"_s, USCRIPT_CIRTH },
- { "cyrs"_s, USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC },
- { "egyd"_s, USCRIPT_DEMOTIC_EGYPTIAN },
- { "egyh"_s, USCRIPT_HIERATIC_EGYPTIAN },
- { "egyp"_s, USCRIPT_EGYPTIAN_HIEROGLYPHS },
- { "geok"_s, USCRIPT_KHUTSURI },
- { "hans"_s, USCRIPT_SIMPLIFIED_HAN },
- { "hant"_s, USCRIPT_TRADITIONAL_HAN },
- { "hmng"_s, USCRIPT_PAHAWH_HMONG },
- { "hung"_s, USCRIPT_OLD_HUNGARIAN },
- { "inds"_s, USCRIPT_HARAPPAN_INDUS },
- { "java"_s, USCRIPT_JAVANESE },
- { "kali"_s, USCRIPT_KAYAH_LI },
- { "latf"_s, USCRIPT_LATIN_FRAKTUR },
- { "latg"_s, USCRIPT_LATIN_GAELIC },
- { "lepc"_s, USCRIPT_LEPCHA },
- { "lina"_s, USCRIPT_LINEAR_A },
- { "mand"_s, USCRIPT_MANDAEAN },
- { "maya"_s, USCRIPT_MAYAN_HIEROGLYPHS },
- { "mero"_s, USCRIPT_MEROITIC },
- { "nkoo"_s, USCRIPT_NKO },
- { "orkh"_s, USCRIPT_ORKHON },
- { "perm"_s, USCRIPT_OLD_PERMIC },
- { "phag"_s, USCRIPT_PHAGS_PA },
- { "phnx"_s, USCRIPT_PHOENICIAN },
- { "plrd"_s, USCRIPT_PHONETIC_POLLARD },
- { "roro"_s, USCRIPT_RONGORONGO },
- { "sara"_s, USCRIPT_SARATI },
- { "syre"_s, USCRIPT_ESTRANGELO_SYRIAC },
- { "syrj"_s, USCRIPT_WESTERN_SYRIAC },
- { "syrn"_s, USCRIPT_EASTERN_SYRIAC },
- { "teng"_s, USCRIPT_TENGWAR },
- { "vaii"_s, USCRIPT_VAI },
- { "visp"_s, USCRIPT_VISIBLE_SPEECH },
- { "xsux"_s, USCRIPT_CUNEIFORM },
- { "jpan"_s, USCRIPT_KATAKANA_OR_HIRAGANA },
- { "kore"_s, USCRIPT_HANGUL },
- { "zxxx"_s, USCRIPT_UNWRITTEN_LANGUAGES },
- { "zzzz"_s, USCRIPT_UNKNOWN }
+ template<unsigned characterCountPlusOne>
+ constexpr PackedASCIILowerCodes(const char (&string)[characterCountPlusOne])
+ {
+ constexpr unsigned length = characterCountPlusOne - 1;
+ ASSERT_UNDER_CONSTEXPR_CONTEXT(length <= sizeof(StorageInteger));
+ ASSERT_UNDER_CONSTEXPR_CONTEXT(!string[length]);
+ StorageInteger result = 0;
+ for (unsigned index = 0; index < length; ++index) {
+ uint8_t code = static_cast<uint8_t>(string[index]);
+ result |= static_cast<StorageInteger>(code) << ((sizeof(StorageInteger) - index - 1) * 8);
+ }
+ m_value = result;
+ }
+
+ static Optional<PackedASCIILowerCodes> parse(StringView string)
+ {
+ if (string.length() > sizeof(StorageInteger))
+ return WTF::nullopt;
+ StorageInteger result = 0;
+ for (unsigned index = 0; index < string.length(); ++index) {
+ UChar code = string[index];
+ if (!isASCII(code))
+ return WTF::nullopt;
+ result |= static_cast<StorageInteger>(toASCIILower(code)) << ((sizeof(StorageInteger) - index - 1) * 8);
+ }
+ return PackedASCIILowerCodes(result);
+ }
+
+ friend constexpr bool operator==(PackedASCIILowerCodes lhs, PackedASCIILowerCodes rhs)
+ {
+ return lhs.m_value == rhs.m_value;
+ }
+
+ friend constexpr bool operator!=(PackedASCIILowerCodes lhs, PackedASCIILowerCodes rhs)
+ {
+ return lhs.m_value != rhs.m_value;
+ }
+
+ friend constexpr bool operator<(PackedASCIILowerCodes lhs, PackedASCIILowerCodes rhs)
+ {
+ return lhs.m_value < rhs.m_value;
+ }
+
+ friend constexpr bool operator<=(PackedASCIILowerCodes lhs, PackedASCIILowerCodes rhs)
+ {
+ return lhs.m_value <= rhs.m_value;
+ }
+
+ friend constexpr bool operator>(PackedASCIILowerCodes lhs, PackedASCIILowerCodes rhs)
+ {
+ return lhs.m_value > rhs.m_value;
+ }
+
+ friend constexpr bool operator>=(PackedASCIILowerCodes lhs, PackedASCIILowerCodes rhs)
+ {
+ return lhs.m_value >= rhs.m_value;
+ }
+
+ constexpr StorageInteger value() const { return m_value; }
+
+private:
+ explicit constexpr PackedASCIILowerCodes(StorageInteger value)
+ : m_value(value)
+ {
+ }
+
+ StorageInteger m_value { 0 };
};
-struct ScriptNameCodeMapHashTraits : public HashTraits<String> {
- static const int minimumTableSize = WTF::HashTableCapacityForSize<WTF_ARRAY_LENGTH(scriptNameCodeList)>::value;
+using ScriptName = PackedASCIILowerCodes<uint32_t>;
+struct ScriptNameCode {
+ ScriptName name;
+ UScriptCode code;
};
UScriptCode scriptNameToCode(const String& scriptName)
{
- static const auto scriptNameCodeMap = makeNeverDestroyed([] {
- HashMap<String, UScriptCode, ASCIICaseInsensitiveHash, ScriptNameCodeMapHashTraits> map;
- for (auto& nameAndCode : scriptNameCodeList)
- map.add(nameAndCode.name, nameAndCode.code);
- return map;
- }());
+ // This generally maps an ISO 15924 script code to its UScriptCode, but certain families of script codes are
+ // treated as a single script for assigning a per-script font in Settings. For example, "hira" is mapped to
+ // USCRIPT_KATAKANA_OR_HIRAGANA instead of USCRIPT_HIRAGANA, since we want all Japanese scripts to be rendered
+ // using the same font setting.
+ static constexpr ScriptNameCode scriptNameCodeList[] = {
+ { "arab", USCRIPT_ARABIC },
+ { "armn", USCRIPT_ARMENIAN },
+ { "bali", USCRIPT_BALINESE },
+ { "batk", USCRIPT_BATAK },
+ { "beng", USCRIPT_BENGALI },
+ { "blis", USCRIPT_BLISSYMBOLS },
+ { "bopo", USCRIPT_BOPOMOFO },
+ { "brah", USCRIPT_BRAHMI },
+ { "brai", USCRIPT_BRAILLE },
+ { "bugi", USCRIPT_BUGINESE },
+ { "buhd", USCRIPT_BUHID },
+ { "cans", USCRIPT_CANADIAN_ABORIGINAL },
+ { "cham", USCRIPT_CHAM },
+ { "cher", USCRIPT_CHEROKEE },
+ { "cirt", USCRIPT_CIRTH },
+ { "copt", USCRIPT_COPTIC },
+ { "cprt", USCRIPT_CYPRIOT },
+ { "cyrl", USCRIPT_CYRILLIC },
+ { "cyrs", USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC },
+ { "deva", USCRIPT_DEVANAGARI },
+ { "dsrt", USCRIPT_DESERET },
+ { "egyd", USCRIPT_DEMOTIC_EGYPTIAN },
+ { "egyh", USCRIPT_HIERATIC_EGYPTIAN },
+ { "egyp", USCRIPT_EGYPTIAN_HIEROGLYPHS },
+ { "ethi", USCRIPT_ETHIOPIC },
+ { "geok", USCRIPT_KHUTSURI },
+ { "geor", USCRIPT_GEORGIAN },
+ { "glag", USCRIPT_GLAGOLITIC },
+ { "goth", USCRIPT_GOTHIC },
+ { "grek", USCRIPT_GREEK },
+ { "gujr", USCRIPT_GUJARATI },
+ { "guru", USCRIPT_GURMUKHI },
+ { "hang", USCRIPT_HANGUL },
+ { "hani", USCRIPT_HAN },
+ { "hano", USCRIPT_HANUNOO },
+ { "hans", USCRIPT_SIMPLIFIED_HAN },
+ { "hant", USCRIPT_TRADITIONAL_HAN },
+ { "hebr", USCRIPT_HEBREW },
+ { "hira", USCRIPT_KATAKANA_OR_HIRAGANA },
+ { "hmng", USCRIPT_PAHAWH_HMONG },
+ { "hrkt", USCRIPT_KATAKANA_OR_HIRAGANA },
+ { "hung", USCRIPT_OLD_HUNGARIAN },
+ { "inds", USCRIPT_HARAPPAN_INDUS },
+ { "ital", USCRIPT_OLD_ITALIC },
+ { "java", USCRIPT_JAVANESE },
+ { "jpan", USCRIPT_KATAKANA_OR_HIRAGANA },
+ { "kali", USCRIPT_KAYAH_LI },
+ { "kana", USCRIPT_KATAKANA_OR_HIRAGANA },
+ { "khar", USCRIPT_KHAROSHTHI },
+ { "khmr", USCRIPT_KHMER },
+ { "knda", USCRIPT_KANNADA },
+ { "kore", USCRIPT_HANGUL },
+ { "laoo", USCRIPT_LAO },
+ { "latf", USCRIPT_LATIN_FRAKTUR },
+ { "latg", USCRIPT_LATIN_GAELIC },
+ { "latn", USCRIPT_LATIN },
+ { "lepc", USCRIPT_LEPCHA },
+ { "limb", USCRIPT_LIMBU },
+ { "lina", USCRIPT_LINEAR_A },
+ { "linb", USCRIPT_LINEAR_B },
+ { "mand", USCRIPT_MANDAEAN },
+ { "maya", USCRIPT_MAYAN_HIEROGLYPHS },
+ { "mero", USCRIPT_MEROITIC },
+ { "mlym", USCRIPT_MALAYALAM },
+ { "mong", USCRIPT_MONGOLIAN },
+ { "mymr", USCRIPT_MYANMAR },
+ { "nkoo", USCRIPT_NKO },
+ { "ogam", USCRIPT_OGHAM },
+ { "orkh", USCRIPT_ORKHON },
+ { "orya", USCRIPT_ORIYA },
+ { "osma", USCRIPT_OSMANYA },
+ { "perm", USCRIPT_OLD_PERMIC },
+ { "phag", USCRIPT_PHAGS_PA },
+ { "phnx", USCRIPT_PHOENICIAN },
+ { "plrd", USCRIPT_PHONETIC_POLLARD },
+ { "qaai", USCRIPT_INHERITED },
+ { "roro", USCRIPT_RONGORONGO },
+ { "runr", USCRIPT_RUNIC },
+ { "sara", USCRIPT_SARATI },
+ { "shaw", USCRIPT_SHAVIAN },
+ { "sinh", USCRIPT_SINHALA },
+ { "sylo", USCRIPT_SYLOTI_NAGRI },
+ { "syrc", USCRIPT_SYRIAC },
+ { "syre", USCRIPT_ESTRANGELO_SYRIAC },
+ { "syrj", USCRIPT_WESTERN_SYRIAC },
+ { "syrn", USCRIPT_EASTERN_SYRIAC },
+ { "tagb", USCRIPT_TAGBANWA },
+ { "tale", USCRIPT_TAI_LE },
+ { "talu", USCRIPT_NEW_TAI_LUE },
+ { "taml", USCRIPT_TAMIL },
+ { "telu", USCRIPT_TELUGU },
+ { "teng", USCRIPT_TENGWAR },
+ { "tfng", USCRIPT_TIFINAGH },
+ { "tglg", USCRIPT_TAGALOG },
+ { "thaa", USCRIPT_THAANA },
+ { "thai", USCRIPT_THAI },
+ { "tibt", USCRIPT_TIBETAN },
+ { "ugar", USCRIPT_UGARITIC },
+ { "vaii", USCRIPT_VAI },
+ { "visp", USCRIPT_VISIBLE_SPEECH },
+ { "xpeo", USCRIPT_OLD_PERSIAN },
+ { "xsux", USCRIPT_CUNEIFORM },
+ { "yiii", USCRIPT_YI },
+ { "zxxx", USCRIPT_UNWRITTEN_LANGUAGES },
+ { "zyyy", USCRIPT_COMMON },
+ { "zzzz", USCRIPT_UNKNOWN },
+ };
- auto it = scriptNameCodeMap.get().find(scriptName);
- if (it != scriptNameCodeMap.get().end())
- return it->value;
+ static_assert(ScriptName("arab").value() == 0x61726162U);
+ static_assert(ScriptName("zzzz").value() == 0x7a7a7a7aU);
+
+ ASSERT(
+ std::is_sorted(std::begin(scriptNameCodeList), std::end(scriptNameCodeList),
+ [](const ScriptNameCode& a, const ScriptNameCode& b) {
+ return a.name < b.name;
+ }));
+
+ auto name = ScriptName::parse(scriptName);
+ if (!name)
+ return USCRIPT_INVALID_CODE;
+
+ auto* element = tryBinarySearch<ScriptNameCode>(scriptNameCodeList, std::size(scriptNameCodeList), name.value(),
+ [](const ScriptNameCode* scriptNameCode) {
+ return scriptNameCode->name;
+ });
+ if (element)
+ return element->code;
return USCRIPT_INVALID_CODE;
}
+using LocaleName = PackedASCIILowerCodes<uint64_t>;
struct LocaleScript {
- ASCIILiteral locale;
+ LocaleName locale;
UScriptCode script;
};
-static const LocaleScript localeScriptList[] = {
- { "aa"_s, USCRIPT_LATIN },
- { "ab"_s, USCRIPT_CYRILLIC },
- { "ady"_s, USCRIPT_CYRILLIC },
- { "af"_s, USCRIPT_LATIN },
- { "ak"_s, USCRIPT_LATIN },
- { "am"_s, USCRIPT_ETHIOPIC },
- { "ar"_s, USCRIPT_ARABIC },
- { "as"_s, USCRIPT_BENGALI },
- { "ast"_s, USCRIPT_LATIN },
- { "av"_s, USCRIPT_CYRILLIC },
- { "ay"_s, USCRIPT_LATIN },
- { "az"_s, USCRIPT_LATIN },
- { "ba"_s, USCRIPT_CYRILLIC },
- { "be"_s, USCRIPT_CYRILLIC },
- { "bg"_s, USCRIPT_CYRILLIC },
- { "bi"_s, USCRIPT_LATIN },
- { "bn"_s, USCRIPT_BENGALI },
- { "bo"_s, USCRIPT_TIBETAN },
- { "bs"_s, USCRIPT_LATIN },
- { "ca"_s, USCRIPT_LATIN },
- { "ce"_s, USCRIPT_CYRILLIC },
- { "ceb"_s, USCRIPT_LATIN },
- { "ch"_s, USCRIPT_LATIN },
- { "chk"_s, USCRIPT_LATIN },
- { "cs"_s, USCRIPT_LATIN },
- { "cy"_s, USCRIPT_LATIN },
- { "da"_s, USCRIPT_LATIN },
- { "de"_s, USCRIPT_LATIN },
- { "dv"_s, USCRIPT_THAANA },
- { "dz"_s, USCRIPT_TIBETAN },
- { "ee"_s, USCRIPT_LATIN },
- { "efi"_s, USCRIPT_LATIN },
- { "el"_s, USCRIPT_GREEK },
- { "en"_s, USCRIPT_LATIN },
- { "es"_s, USCRIPT_LATIN },
- { "et"_s, USCRIPT_LATIN },
- { "eu"_s, USCRIPT_LATIN },
- { "fa"_s, USCRIPT_ARABIC },
- { "fi"_s, USCRIPT_LATIN },
- { "fil"_s, USCRIPT_LATIN },
- { "fj"_s, USCRIPT_LATIN },
- { "fo"_s, USCRIPT_LATIN },
- { "fr"_s, USCRIPT_LATIN },
- { "fur"_s, USCRIPT_LATIN },
- { "fy"_s, USCRIPT_LATIN },
- { "ga"_s, USCRIPT_LATIN },
- { "gaa"_s, USCRIPT_LATIN },
- { "gd"_s, USCRIPT_LATIN },
- { "gil"_s, USCRIPT_LATIN },
- { "gl"_s, USCRIPT_LATIN },
- { "gn"_s, USCRIPT_LATIN },
- { "gsw"_s, USCRIPT_LATIN },
- { "gu"_s, USCRIPT_GUJARATI },
- { "ha"_s, USCRIPT_LATIN },
- { "haw"_s, USCRIPT_LATIN },
- { "he"_s, USCRIPT_HEBREW },
- { "hi"_s, USCRIPT_DEVANAGARI },
- { "hil"_s, USCRIPT_LATIN },
- { "ho"_s, USCRIPT_LATIN },
- { "hr"_s, USCRIPT_LATIN },
- { "ht"_s, USCRIPT_LATIN },
- { "hu"_s, USCRIPT_LATIN },
- { "hy"_s, USCRIPT_ARMENIAN },
- { "id"_s, USCRIPT_LATIN },
- { "ig"_s, USCRIPT_LATIN },
- { "ii"_s, USCRIPT_YI },
- { "ilo"_s, USCRIPT_LATIN },
- { "inh"_s, USCRIPT_CYRILLIC },
- { "is"_s, USCRIPT_LATIN },
- { "it"_s, USCRIPT_LATIN },
- { "iu"_s, USCRIPT_CANADIAN_ABORIGINAL },
- { "ja"_s, USCRIPT_KATAKANA_OR_HIRAGANA },
- { "jv"_s, USCRIPT_LATIN },
- { "ka"_s, USCRIPT_GEORGIAN },
- { "kaj"_s, USCRIPT_LATIN },
- { "kam"_s, USCRIPT_LATIN },
- { "kbd"_s, USCRIPT_CYRILLIC },
- { "kha"_s, USCRIPT_LATIN },
- { "kk"_s, USCRIPT_CYRILLIC },
- { "kl"_s, USCRIPT_LATIN },
- { "km"_s, USCRIPT_KHMER },
- { "kn"_s, USCRIPT_KANNADA },
- { "ko"_s, USCRIPT_HANGUL },
- { "kok"_s, USCRIPT_DEVANAGARI },
- { "kos"_s, USCRIPT_LATIN },
- { "kpe"_s, USCRIPT_LATIN },
- { "krc"_s, USCRIPT_CYRILLIC },
- { "ks"_s, USCRIPT_ARABIC },
- { "ku"_s, USCRIPT_ARABIC },
- { "kum"_s, USCRIPT_CYRILLIC },
- { "ky"_s, USCRIPT_CYRILLIC },
- { "la"_s, USCRIPT_LATIN },
- { "lah"_s, USCRIPT_ARABIC },
- { "lb"_s, USCRIPT_LATIN },
- { "lez"_s, USCRIPT_CYRILLIC },
- { "ln"_s, USCRIPT_LATIN },
- { "lo"_s, USCRIPT_LAO },
- { "lt"_s, USCRIPT_LATIN },
- { "lv"_s, USCRIPT_LATIN },
- { "mai"_s, USCRIPT_DEVANAGARI },
- { "mdf"_s, USCRIPT_CYRILLIC },
- { "mg"_s, USCRIPT_LATIN },
- { "mh"_s, USCRIPT_LATIN },
- { "mi"_s, USCRIPT_LATIN },
- { "mk"_s, USCRIPT_CYRILLIC },
- { "ml"_s, USCRIPT_MALAYALAM },
- { "mn"_s, USCRIPT_CYRILLIC },
- { "mr"_s, USCRIPT_DEVANAGARI },
- { "ms"_s, USCRIPT_LATIN },
- { "mt"_s, USCRIPT_LATIN },
- { "my"_s, USCRIPT_MYANMAR },
- { "myv"_s, USCRIPT_CYRILLIC },
- { "na"_s, USCRIPT_LATIN },
- { "nb"_s, USCRIPT_LATIN },
- { "ne"_s, USCRIPT_DEVANAGARI },
- { "niu"_s, USCRIPT_LATIN },
- { "nl"_s, USCRIPT_LATIN },
- { "nn"_s, USCRIPT_LATIN },
- { "nr"_s, USCRIPT_LATIN },
- { "nso"_s, USCRIPT_LATIN },
- { "ny"_s, USCRIPT_LATIN },
- { "oc"_s, USCRIPT_LATIN },
- { "om"_s, USCRIPT_LATIN },
- { "or"_s, USCRIPT_ORIYA },
- { "os"_s, USCRIPT_CYRILLIC },
- { "pa"_s, USCRIPT_GURMUKHI },
- { "pag"_s, USCRIPT_LATIN },
- { "pap"_s, USCRIPT_LATIN },
- { "pau"_s, USCRIPT_LATIN },
- { "pl"_s, USCRIPT_LATIN },
- { "pon"_s, USCRIPT_LATIN },
- { "ps"_s, USCRIPT_ARABIC },
- { "pt"_s, USCRIPT_LATIN },
- { "qu"_s, USCRIPT_LATIN },
- { "rm"_s, USCRIPT_LATIN },
- { "rn"_s, USCRIPT_LATIN },
- { "ro"_s, USCRIPT_LATIN },
- { "ru"_s, USCRIPT_CYRILLIC },
- { "rw"_s, USCRIPT_LATIN },
- { "sa"_s, USCRIPT_DEVANAGARI },
- { "sah"_s, USCRIPT_CYRILLIC },
- { "sat"_s, USCRIPT_LATIN },
- { "sd"_s, USCRIPT_ARABIC },
- { "se"_s, USCRIPT_LATIN },
- { "sg"_s, USCRIPT_LATIN },
- { "si"_s, USCRIPT_SINHALA },
- { "sid"_s, USCRIPT_LATIN },
- { "sk"_s, USCRIPT_LATIN },
- { "sl"_s, USCRIPT_LATIN },
- { "sm"_s, USCRIPT_LATIN },
- { "so"_s, USCRIPT_LATIN },
- { "sq"_s, USCRIPT_LATIN },
- { "sr"_s, USCRIPT_CYRILLIC },
- { "ss"_s, USCRIPT_LATIN },
- { "st"_s, USCRIPT_LATIN },
- { "su"_s, USCRIPT_LATIN },
- { "sv"_s, USCRIPT_LATIN },
- { "sw"_s, USCRIPT_LATIN },
- { "ta"_s, USCRIPT_TAMIL },
- { "te"_s, USCRIPT_TELUGU },
- { "tet"_s, USCRIPT_LATIN },
- { "tg"_s, USCRIPT_CYRILLIC },
- { "th"_s, USCRIPT_THAI },
- { "ti"_s, USCRIPT_ETHIOPIC },
- { "tig"_s, USCRIPT_ETHIOPIC },
- { "tk"_s, USCRIPT_LATIN },
- { "tkl"_s, USCRIPT_LATIN },
- { "tl"_s, USCRIPT_LATIN },
- { "tn"_s, USCRIPT_LATIN },
- { "to"_s, USCRIPT_LATIN },
- { "tpi"_s, USCRIPT_LATIN },
- { "tr"_s, USCRIPT_LATIN },
- { "trv"_s, USCRIPT_LATIN },
- { "ts"_s, USCRIPT_LATIN },
- { "tt"_s, USCRIPT_CYRILLIC },
- { "tvl"_s, USCRIPT_LATIN },
- { "tw"_s, USCRIPT_LATIN },
- { "ty"_s, USCRIPT_LATIN },
- { "tyv"_s, USCRIPT_CYRILLIC },
- { "udm"_s, USCRIPT_CYRILLIC },
- { "ug"_s, USCRIPT_ARABIC },
- { "uk"_s, USCRIPT_CYRILLIC },
- { "und"_s, USCRIPT_LATIN },
- { "ur"_s, USCRIPT_ARABIC },
- { "uz"_s, USCRIPT_CYRILLIC },
- { "ve"_s, USCRIPT_LATIN },
- { "vi"_s, USCRIPT_LATIN },
- { "wal"_s, USCRIPT_ETHIOPIC },
- { "war"_s, USCRIPT_LATIN },
- { "wo"_s, USCRIPT_LATIN },
- { "xh"_s, USCRIPT_LATIN },
- { "yap"_s, USCRIPT_LATIN },
- { "yo"_s, USCRIPT_LATIN },
- { "za"_s, USCRIPT_LATIN },
- { "zh"_s, USCRIPT_HAN },
- { "zh_hk"_s, USCRIPT_TRADITIONAL_HAN },
- { "zh_tw"_s, USCRIPT_TRADITIONAL_HAN },
- { "zu"_s, USCRIPT_LATIN }
-};
-
-struct LocaleScriptMapHashTraits : public HashTraits<String> {
- static const int minimumTableSize = WTF::HashTableCapacityForSize<WTF_ARRAY_LENGTH(localeScriptList)>::value;
-};
-
UScriptCode localeToScriptCodeForFontSelection(const String& locale)
{
- static const auto localeScriptMap = makeNeverDestroyed([] {
- HashMap<String, UScriptCode, ASCIICaseInsensitiveHash, LocaleScriptMapHashTraits> map;
- for (auto& localeAndScript : localeScriptList)
- map.add(localeAndScript.locale, localeAndScript.script);
- return map;
- }());
+ static constexpr LocaleScript localeScriptList[] = {
+ { "aa", USCRIPT_LATIN },
+ { "ab", USCRIPT_CYRILLIC },
+ { "ady", USCRIPT_CYRILLIC },
+ { "af", USCRIPT_LATIN },
+ { "ak", USCRIPT_LATIN },
+ { "am", USCRIPT_ETHIOPIC },
+ { "ar", USCRIPT_ARABIC },
+ { "as", USCRIPT_BENGALI },
+ { "ast", USCRIPT_LATIN },
+ { "av", USCRIPT_CYRILLIC },
+ { "ay", USCRIPT_LATIN },
+ { "az", USCRIPT_LATIN },
+ { "ba", USCRIPT_CYRILLIC },
+ { "be", USCRIPT_CYRILLIC },
+ { "bg", USCRIPT_CYRILLIC },
+ { "bi", USCRIPT_LATIN },
+ { "bn", USCRIPT_BENGALI },
+ { "bo", USCRIPT_TIBETAN },
+ { "bs", USCRIPT_LATIN },
+ { "ca", USCRIPT_LATIN },
+ { "ce", USCRIPT_CYRILLIC },
+ { "ceb", USCRIPT_LATIN },
+ { "ch", USCRIPT_LATIN },
+ { "chk", USCRIPT_LATIN },
+ { "cs", USCRIPT_LATIN },
+ { "cy", USCRIPT_LATIN },
+ { "da", USCRIPT_LATIN },
+ { "de", USCRIPT_LATIN },
+ { "dv", USCRIPT_THAANA },
+ { "dz", USCRIPT_TIBETAN },
+ { "ee", USCRIPT_LATIN },
+ { "efi", USCRIPT_LATIN },
+ { "el", USCRIPT_GREEK },
+ { "en", USCRIPT_LATIN },
+ { "es", USCRIPT_LATIN },
+ { "et", USCRIPT_LATIN },
+ { "eu", USCRIPT_LATIN },
+ { "fa", USCRIPT_ARABIC },
+ { "fi", USCRIPT_LATIN },
+ { "fil", USCRIPT_LATIN },
+ { "fj", USCRIPT_LATIN },
+ { "fo", USCRIPT_LATIN },
+ { "fr", USCRIPT_LATIN },
+ { "fur", USCRIPT_LATIN },
+ { "fy", USCRIPT_LATIN },
+ { "ga", USCRIPT_LATIN },
+ { "gaa", USCRIPT_LATIN },
+ { "gd", USCRIPT_LATIN },
+ { "gil", USCRIPT_LATIN },
+ { "gl", USCRIPT_LATIN },
+ { "gn", USCRIPT_LATIN },
+ { "gsw", USCRIPT_LATIN },
+ { "gu", USCRIPT_GUJARATI },
+ { "ha", USCRIPT_LATIN },
+ { "haw", USCRIPT_LATIN },
+ { "he", USCRIPT_HEBREW },
+ { "hi", USCRIPT_DEVANAGARI },
+ { "hil", USCRIPT_LATIN },
+ { "ho", USCRIPT_LATIN },
+ { "hr", USCRIPT_LATIN },
+ { "ht", USCRIPT_LATIN },
+ { "hu", USCRIPT_LATIN },
+ { "hy", USCRIPT_ARMENIAN },
+ { "id", USCRIPT_LATIN },
+ { "ig", USCRIPT_LATIN },
+ { "ii", USCRIPT_YI },
+ { "ilo", USCRIPT_LATIN },
+ { "inh", USCRIPT_CYRILLIC },
+ { "is", USCRIPT_LATIN },
+ { "it", USCRIPT_LATIN },
+ { "iu", USCRIPT_CANADIAN_ABORIGINAL },
+ { "ja", USCRIPT_KATAKANA_OR_HIRAGANA },
+ { "jv", USCRIPT_LATIN },
+ { "ka", USCRIPT_GEORGIAN },
+ { "kaj", USCRIPT_LATIN },
+ { "kam", USCRIPT_LATIN },
+ { "kbd", USCRIPT_CYRILLIC },
+ { "kha", USCRIPT_LATIN },
+ { "kk", USCRIPT_CYRILLIC },
+ { "kl", USCRIPT_LATIN },
+ { "km", USCRIPT_KHMER },
+ { "kn", USCRIPT_KANNADA },
+ { "ko", USCRIPT_HANGUL },
+ { "kok", USCRIPT_DEVANAGARI },
+ { "kos", USCRIPT_LATIN },
+ { "kpe", USCRIPT_LATIN },
+ { "krc", USCRIPT_CYRILLIC },
+ { "ks", USCRIPT_ARABIC },
+ { "ku", USCRIPT_ARABIC },
+ { "kum", USCRIPT_CYRILLIC },
+ { "ky", USCRIPT_CYRILLIC },
+ { "la", USCRIPT_LATIN },
+ { "lah", USCRIPT_ARABIC },
+ { "lb", USCRIPT_LATIN },
+ { "lez", USCRIPT_CYRILLIC },
+ { "ln", USCRIPT_LATIN },
+ { "lo", USCRIPT_LAO },
+ { "lt", USCRIPT_LATIN },
+ { "lv", USCRIPT_LATIN },
+ { "mai", USCRIPT_DEVANAGARI },
+ { "mdf", USCRIPT_CYRILLIC },
+ { "mg", USCRIPT_LATIN },
+ { "mh", USCRIPT_LATIN },
+ { "mi", USCRIPT_LATIN },
+ { "mk", USCRIPT_CYRILLIC },
+ { "ml", USCRIPT_MALAYALAM },
+ { "mn", USCRIPT_CYRILLIC },
+ { "mr", USCRIPT_DEVANAGARI },
+ { "ms", USCRIPT_LATIN },
+ { "mt", USCRIPT_LATIN },
+ { "my", USCRIPT_MYANMAR },
+ { "myv", USCRIPT_CYRILLIC },
+ { "na", USCRIPT_LATIN },
+ { "nb", USCRIPT_LATIN },
+ { "ne", USCRIPT_DEVANAGARI },
+ { "niu", USCRIPT_LATIN },
+ { "nl", USCRIPT_LATIN },
+ { "nn", USCRIPT_LATIN },
+ { "nr", USCRIPT_LATIN },
+ { "nso", USCRIPT_LATIN },
+ { "ny", USCRIPT_LATIN },
+ { "oc", USCRIPT_LATIN },
+ { "om", USCRIPT_LATIN },
+ { "or", USCRIPT_ORIYA },
+ { "os", USCRIPT_CYRILLIC },
+ { "pa", USCRIPT_GURMUKHI },
+ { "pag", USCRIPT_LATIN },
+ { "pap", USCRIPT_LATIN },
+ { "pau", USCRIPT_LATIN },
+ { "pl", USCRIPT_LATIN },
+ { "pon", USCRIPT_LATIN },
+ { "ps", USCRIPT_ARABIC },
+ { "pt", USCRIPT_LATIN },
+ { "qu", USCRIPT_LATIN },
+ { "rm", USCRIPT_LATIN },
+ { "rn", USCRIPT_LATIN },
+ { "ro", USCRIPT_LATIN },
+ { "ru", USCRIPT_CYRILLIC },
+ { "rw", USCRIPT_LATIN },
+ { "sa", USCRIPT_DEVANAGARI },
+ { "sah", USCRIPT_CYRILLIC },
+ { "sat", USCRIPT_LATIN },
+ { "sd", USCRIPT_ARABIC },
+ { "se", USCRIPT_LATIN },
+ { "sg", USCRIPT_LATIN },
+ { "si", USCRIPT_SINHALA },
+ { "sid", USCRIPT_LATIN },
+ { "sk", USCRIPT_LATIN },
+ { "sl", USCRIPT_LATIN },
+ { "sm", USCRIPT_LATIN },
+ { "so", USCRIPT_LATIN },
+ { "sq", USCRIPT_LATIN },
+ { "sr", USCRIPT_CYRILLIC },
+ { "ss", USCRIPT_LATIN },
+ { "st", USCRIPT_LATIN },
+ { "su", USCRIPT_LATIN },
+ { "sv", USCRIPT_LATIN },
+ { "sw", USCRIPT_LATIN },
+ { "ta", USCRIPT_TAMIL },
+ { "te", USCRIPT_TELUGU },
+ { "tet", USCRIPT_LATIN },
+ { "tg", USCRIPT_CYRILLIC },
+ { "th", USCRIPT_THAI },
+ { "ti", USCRIPT_ETHIOPIC },
+ { "tig", USCRIPT_ETHIOPIC },
+ { "tk", USCRIPT_LATIN },
+ { "tkl", USCRIPT_LATIN },
+ { "tl", USCRIPT_LATIN },
+ { "tn", USCRIPT_LATIN },
+ { "to", USCRIPT_LATIN },
+ { "tpi", USCRIPT_LATIN },
+ { "tr", USCRIPT_LATIN },
+ { "trv", USCRIPT_LATIN },
+ { "ts", USCRIPT_LATIN },
+ { "tt", USCRIPT_CYRILLIC },
+ { "tvl", USCRIPT_LATIN },
+ { "tw", USCRIPT_LATIN },
+ { "ty", USCRIPT_LATIN },
+ { "tyv", USCRIPT_CYRILLIC },
+ { "udm", USCRIPT_CYRILLIC },
+ { "ug", USCRIPT_ARABIC },
+ { "uk", USCRIPT_CYRILLIC },
+ { "und", USCRIPT_LATIN },
+ { "ur", USCRIPT_ARABIC },
+ { "uz", USCRIPT_CYRILLIC },
+ { "ve", USCRIPT_LATIN },
+ { "vi", USCRIPT_LATIN },
+ { "wal", USCRIPT_ETHIOPIC },
+ { "war", USCRIPT_LATIN },
+ { "wo", USCRIPT_LATIN },
+ { "xh", USCRIPT_LATIN },
+ { "yap", USCRIPT_LATIN },
+ { "yo", USCRIPT_LATIN },
+ { "za", USCRIPT_LATIN },
+ { "zh", USCRIPT_HAN },
+ { "zh_hk", USCRIPT_TRADITIONAL_HAN },
+ { "zh_tw", USCRIPT_TRADITIONAL_HAN },
+ { "zu", USCRIPT_LATIN },
+ };
+ static_assert(LocaleName("aa").value() == 0x6161000000000000ULL);
+ static_assert(LocaleName("zh_tw").value() == 0x7a685f7477000000ULL);
+
+ ASSERT(
+ std::is_sorted(std::begin(localeScriptList), std::end(localeScriptList),
+ [](const LocaleScript& a, const LocaleScript& b) {
+ return a.locale < b.locale;
+ }));
+
+ auto tryFindScriptCode = [&] (const String& string) -> Optional<UScriptCode> {
+ auto localeName = LocaleName::parse(string);
+ if (!localeName)
+ return WTF::nullopt;
+
+ auto* element = tryBinarySearch<LocaleScript>(localeScriptList, std::size(localeScriptList), localeName.value(),
+ [](const LocaleScript* localeScript) {
+ return localeScript->locale;
+ });
+ if (element)
+ return element->script;
+ return WTF::nullopt;
+ };
+
String canonicalLocale = locale;
canonicalLocale.replace('-', '_');
while (!canonicalLocale.isEmpty()) {
- auto it = localeScriptMap.get().find(canonicalLocale);
- if (it != localeScriptMap.get().end())
- return it->value;
+ if (auto scriptCode = tryFindScriptCode(canonicalLocale))
+ return scriptCode.value();
auto underscorePosition = canonicalLocale.reverseFind('_');
if (underscorePosition == notFound)
break;