Title: [275115] trunk
Revision
275115
Author
[email protected]
Date
2021-03-26 15:00:33 -0700 (Fri, 26 Mar 2021)

Log Message

[JSC] Use AppleICU SPI for canonicalization
https://bugs.webkit.org/show_bug.cgi?id=223552

Reviewed by Ryosuke Niwa.

JSTests:

* stress/intl-canonical-locale-alias-mapping.js: Added.
(shouldBe):
(Intl.getCanonicalLocales):

Source/_javascript_Core:

uloc_canonicalize does not perform alias mapping. This is different from ECMA402's canonicalization requirement.
ICU C++ icu::Locale can canonicalize locale ID with alias mapping, but this is not exposed to C API.

In this patch, we adopt AppleICU SPI "ualoc_canonicalForm" added in rdar://74314220. This canonicalization can perform
alias mapping too. We do not extend uloc_canonicalize since this API explicitly says "It does NOT map aliased names in any way."[1].
In [2], we are tracking upstreaming of this new SPI. Once it is upstreamed to the mainline ICU, we will switch to that.

[1]: https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/uloc_8h.html#a69b148194cf57ac40d4bb15c5b905260
[2]: https://unicode-org.atlassian.net/browse/ICU-21506

* runtime/IntlLocale.cpp:
(JSC::LocaleIDBuilder::initialize):
(JSC::LocaleIDBuilder::toCanonical):
* runtime/IntlObject.cpp:
(JSC::localeIDBufferForLanguageTagWithNullTerminator):
(JSC::canonicalizeLanguageTag):
(JSC::canonicalizeLocaleIDWithoutNullTerminator):
(JSC::localeIDBufferForLanguageTag): Deleted.
* runtime/IntlObject.h:

Modified Paths

Added Paths

Diff

Modified: trunk/JSTests/ChangeLog (275114 => 275115)


--- trunk/JSTests/ChangeLog	2021-03-26 21:28:11 UTC (rev 275114)
+++ trunk/JSTests/ChangeLog	2021-03-26 22:00:33 UTC (rev 275115)
@@ -1,3 +1,14 @@
+2021-03-26  Yusuke Suzuki  <[email protected]>
+
+        [JSC] Use AppleICU SPI for canonicalization
+        https://bugs.webkit.org/show_bug.cgi?id=223552
+
+        Reviewed by Ryosuke Niwa.
+
+        * stress/intl-canonical-locale-alias-mapping.js: Added.
+        (shouldBe):
+        (Intl.getCanonicalLocales):
+
 2021-03-25  Truitt Savell  <[email protected]>
 
         Unreviewed, reverting r275056.

Added: trunk/JSTests/stress/intl-canonical-locale-alias-mapping.js (0 => 275115)


--- trunk/JSTests/stress/intl-canonical-locale-alias-mapping.js	                        (rev 0)
+++ trunk/JSTests/stress/intl-canonical-locale-alias-mapping.js	2021-03-26 22:00:33 UTC (rev 275115)
@@ -0,0 +1,13 @@
+function shouldBe(actual, expected) {
+    if (actual !== expected)
+        throw new Error(`expected ${expected} but got ${actual}`);
+}
+
+if (Intl.getCanonicalLocales('tl')[0] === 'fil') {
+    shouldBe(Intl.getCanonicalLocales('nb')[0], 'nb');
+    shouldBe(Intl.getCanonicalLocales('no')[0], 'nb');
+    shouldBe(Intl.getCanonicalLocales('iw')[0], 'he');
+    shouldBe(Intl.getCanonicalLocales('prs')[0], 'fa-AF');
+    shouldBe(Intl.getCanonicalLocales('swc')[0], 'sw-CD');
+    shouldBe(Intl.getCanonicalLocales('tl')[0], 'fil');
+}

Modified: trunk/Source/_javascript_Core/ChangeLog (275114 => 275115)


--- trunk/Source/_javascript_Core/ChangeLog	2021-03-26 21:28:11 UTC (rev 275114)
+++ trunk/Source/_javascript_Core/ChangeLog	2021-03-26 22:00:33 UTC (rev 275115)
@@ -1,3 +1,30 @@
+2021-03-26  Yusuke Suzuki  <[email protected]>
+
+        [JSC] Use AppleICU SPI for canonicalization
+        https://bugs.webkit.org/show_bug.cgi?id=223552
+
+        Reviewed by Ryosuke Niwa.
+
+        uloc_canonicalize does not perform alias mapping. This is different from ECMA402's canonicalization requirement.
+        ICU C++ icu::Locale can canonicalize locale ID with alias mapping, but this is not exposed to C API.
+
+        In this patch, we adopt AppleICU SPI "ualoc_canonicalForm" added in rdar://74314220. This canonicalization can perform
+        alias mapping too. We do not extend uloc_canonicalize since this API explicitly says "It does NOT map aliased names in any way."[1].
+        In [2], we are tracking upstreaming of this new SPI. Once it is upstreamed to the mainline ICU, we will switch to that.
+
+        [1]: https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/uloc_8h.html#a69b148194cf57ac40d4bb15c5b905260
+        [2]: https://unicode-org.atlassian.net/browse/ICU-21506
+
+        * runtime/IntlLocale.cpp:
+        (JSC::LocaleIDBuilder::initialize):
+        (JSC::LocaleIDBuilder::toCanonical):
+        * runtime/IntlObject.cpp:
+        (JSC::localeIDBufferForLanguageTagWithNullTerminator):
+        (JSC::canonicalizeLanguageTag):
+        (JSC::canonicalizeLocaleIDWithoutNullTerminator):
+        (JSC::localeIDBufferForLanguageTag): Deleted.
+        * runtime/IntlObject.h:
+
 2021-03-26  Don Olmstead  <[email protected]>
 
         [CMake] Deprecate using DERIVED_SOURCES_DIR/FOWARDING_HEADERS_DIR directly

Modified: trunk/Source/_javascript_Core/runtime/IntlLocale.cpp (275114 => 275115)


--- trunk/Source/_javascript_Core/runtime/IntlLocale.cpp	2021-03-26 21:28:11 UTC (rev 275114)
+++ trunk/Source/_javascript_Core/runtime/IntlLocale.cpp	2021-03-26 22:00:33 UTC (rev 275115)
@@ -87,7 +87,7 @@
     if (!isStructurallyValidLanguageTag(tag))
         return false;
     ASSERT(tag.isAllASCII());
-    m_buffer = localeIDBufferForLanguageTag(tag.ascii());
+    m_buffer = localeIDBufferForLanguageTagWithNullTerminator(tag.ascii());
     return m_buffer.size();
 }
 
@@ -95,12 +95,11 @@
 {
     ASSERT(m_buffer.size());
 
-    Vector<char, 32> buffer;
-    auto status = callBufferProducingFunction(uloc_canonicalize, m_buffer.data(), buffer);
-    if (U_FAILURE(status))
+    auto buffer = canonicalizeLocaleIDWithoutNullTerminator(m_buffer.data());
+    if (!buffer)
         return CString();
 
-    auto result = canonicalizeUnicodeExtensionsAfterICULocaleCanonicalization(WTFMove(buffer));
+    auto result = canonicalizeUnicodeExtensionsAfterICULocaleCanonicalization(WTFMove(buffer.value()));
     return CString(result.data(), result.size());
 }
 

Modified: trunk/Source/_javascript_Core/runtime/IntlObject.cpp (275114 => 275115)


--- trunk/Source/_javascript_Core/runtime/IntlObject.cpp	2021-03-26 21:28:11 UTC (rev 275114)
+++ trunk/Source/_javascript_Core/runtime/IntlObject.cpp	2021-03-26 22:00:33 UTC (rev 275115)
@@ -246,7 +246,7 @@
     return subtags;
 }
 
-Vector<char, 32> localeIDBufferForLanguageTag(const CString& tag)
+Vector<char, 32> localeIDBufferForLanguageTagWithNullTerminator(const CString& tag)
 {
     if (!tag.length())
         return { };
@@ -603,11 +603,15 @@
 // https://tc39.es/ecma402/#sec-canonicalizeunicodelocaleid
 static String canonicalizeLanguageTag(const CString& tag)
 {
-    auto buffer = localeIDBufferForLanguageTag(tag);
+    auto buffer = localeIDBufferForLanguageTagWithNullTerminator(tag);
     if (buffer.isEmpty())
         return String();
-
-    return languageTagForLocaleID(buffer.data());
+    auto canonicalized = canonicalizeLocaleIDWithoutNullTerminator(buffer.data());
+    if (!canonicalized)
+        return String();
+    canonicalized->append('\0');
+    ASSERT(canonicalized->contains('\0'));
+    return languageTagForLocaleID(canonicalized->data());
 }
 
 Vector<String> canonicalizeLocaleList(JSGlobalObject* globalObject, JSValue locales)
@@ -1409,6 +1413,26 @@
     return currency.length() == 3 && currency.isAllSpecialCharacters<isASCIIAlpha>();
 }
 
+Optional<Vector<char, 32>> canonicalizeLocaleIDWithoutNullTerminator(const char* localeID)
+{
+    ASSERT(localeID);
+    Vector<char, 32> buffer;
+#if U_ICU_VERSION_MAJOR_NUM >= 68 && USE(APPLE_INTERNAL_SDK)
+    // Use ualoc_canonicalForm AppleICU SPI, which can perform mapping of aliases.
+    // ICU-21506 is a bug upstreaming this SPI to ICU.
+    // https://unicode-org.atlassian.net/browse/ICU-21506
+    auto status = callBufferProducingFunction(ualoc_canonicalForm, localeID, buffer);
+    if (U_FAILURE(status))
+        return WTF::nullopt;
+    return buffer;
+#else
+    auto status = callBufferProducingFunction(uloc_canonicalize, localeID, buffer);
+    if (U_FAILURE(status))
+        return WTF::nullopt;
+    return buffer;
+#endif
+}
+
 JSC_DEFINE_HOST_FUNCTION(intlObjectFuncGetCanonicalLocales, (JSGlobalObject* globalObject, CallFrame* callFrame))
 {
     // Intl.getCanonicalLocales(locales)

Modified: trunk/Source/_javascript_Core/runtime/IntlObject.h (275114 => 275115)


--- trunk/Source/_javascript_Core/runtime/IntlObject.h	2021-03-26 21:28:11 UTC (rev 275114)
+++ trunk/Source/_javascript_Core/runtime/IntlObject.h	2021-03-26 22:00:33 UTC (rev 275115)
@@ -95,7 +95,7 @@
 String intlStringOption(JSGlobalObject*, Optional<JSObject&> options, PropertyName, std::initializer_list<const char*> values, const char* notFound, const char* fallback);
 unsigned intlNumberOption(JSGlobalObject*, Optional<JSObject&> options, PropertyName, unsigned minimum, unsigned maximum, unsigned fallback);
 unsigned intlDefaultNumberOption(JSGlobalObject*, JSValue, PropertyName, unsigned minimum, unsigned maximum, unsigned fallback);
-Vector<char, 32> localeIDBufferForLanguageTag(const CString&);
+Vector<char, 32> localeIDBufferForLanguageTagWithNullTerminator(const CString&);
 String languageTagForLocaleID(const char*, bool isImmortal = false);
 Vector<String> canonicalizeLocaleList(JSGlobalObject*, JSValue locales);
 
@@ -127,6 +127,8 @@
 
 bool isWellFormedCurrencyCode(StringView);
 
+Optional<Vector<char, 32>> canonicalizeLocaleIDWithoutNullTerminator(const char* localeID);
+
 struct UFieldPositionIteratorDeleter {
     void operator()(UFieldPositionIterator*) const;
 };
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to