i18npool/qa/cppunit/test_characterclassification.cxx |  102 +++++++++++++++++++
 i18nutil/source/utility/casefolding.cxx              |   39 +++++++
 i18nutil/source/utility/casefolding_data.h           |    6 -
 3 files changed, 143 insertions(+), 4 deletions(-)

New commits:
commit 88790291ddc00bfa18d3f7bcc0825b1ce14cb490
Author:     Khaled Hosny <kha...@libreoffice.org>
AuthorDate: Mon Jul 24 16:29:40 2023 +0300
Commit:     خالد حسني <kha...@libreoffice.org>
CommitDate: Mon Jul 24 20:20:21 2023 +0200

    tdf#97152: Fix upper case mapping of lunate sigma (U+03F2)
    
    It was mapped to uppercase sigma (U+03A3) while it should be mapped to
    uppercase lunate sigma (U+03F9). Fix by letting this slot fallback to
    ICU case folding.
    
    Change-Id: I14ffa0151c740779b67af14be8c7af8c51c3a1e0
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/154845
    Tested-by: Jenkins
    Reviewed-by: خالد حسني <kha...@libreoffice.org>

diff --git a/i18npool/qa/cppunit/test_characterclassification.cxx 
b/i18npool/qa/cppunit/test_characterclassification.cxx
index 5b01f73bcf35..206bcec83882 100644
--- a/i18npool/qa/cppunit/test_characterclassification.cxx
+++ b/i18npool/qa/cppunit/test_characterclassification.cxx
@@ -190,8 +190,8 @@ CPPUNIT_TEST_FIXTURE(TestCharacterClassification, 
testTdf97152)
         OUString sTest(u"ͲͰϽϾϿͿϏϹ");
         OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {});
         CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", OUString(u"ͳͱͻͼͽϳϗϲ"), 
sLowerCase);
-        //OUString sUpperCase = m_xCC->toUpper(sLowerCase, 0, 
sLowerCase.getLength(), {});
-        //CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sTest, sUpperCase);
+        OUString sUpperCase = m_xCC->toUpper(sLowerCase, 0, 
sLowerCase.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sTest, sUpperCase);
     }
 
     {
@@ -199,10 +199,10 @@ CPPUNIT_TEST_FIXTURE(TestCharacterClassification, 
testTdf97152)
         OUString sTest(u"ͳͱͻͼͽϳϗϲ");
         OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {});
         CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", OUString(u"Ͳͱͻͼͽϳϗϲ"), 
sTitleCase);
-        //OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), 
{});
-        //CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", 
OUString(u"ͲͰϽϾϿͿϏϹ"), sUpperCase);
-        //OUString sLowerCase = m_xCC->toLower(sUpperCase, 0, 
sUpperCase.getLength(), {});
-        //CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", sTest, sLowerCase);
+        OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", OUString(u"ͲͰϽϾϿͿϏϹ"), 
sUpperCase);
+        OUString sLowerCase = m_xCC->toLower(sUpperCase, 0, 
sUpperCase.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", sTest, sLowerCase);
     }
 
     {
@@ -210,8 +210,8 @@ CPPUNIT_TEST_FIXTURE(TestCharacterClassification, 
testTdf97152)
         OUString sTest(u"Ͳͱͻͼͽϳϗϲ");
         OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {});
         CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", OUString(u"Ͳͱͻͼͽϳϗϲ"), 
sTitleCase);
-        //OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), 
{});
-        //CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", 
OUString(u"ͲͰϽϾϿͿϏϹ"), sUpperCase);
+        OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", OUString(u"ͲͰϽϾϿͿϏϹ"), 
sUpperCase);
         OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {});
         CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", OUString(u"ͳͱͻͼͽϳϗϲ"), 
sLowerCase);
     }
diff --git a/i18nutil/source/utility/casefolding_data.h 
b/i18nutil/source/utility/casefolding_data.h
index 3ea677e83405..f3f22c734e24 100644
--- a/i18nutil/source/utility/casefolding_data.h
+++ b/i18nutil/source/utility/casefolding_data.h
@@ -219,7 +219,7 @@ const Value CaseMappingValue[] = {
     {0x00, 0x0000}, {0x00, 0x0000}, {0x6a, 0x03DB}, {0x15, 0x03DA}, {0x6a, 
0x03DD}, {0x15, 0x03DC}, {0x6a, 0x03DF}, {0x15, 0x03DE}, // 03d8 - 03df
     {0x6a, 0x03E1}, {0x15, 0x03E0}, {0x6a, 0x03E3}, {0x15, 0x03E2}, {0x6a, 
0x03E5}, {0x15, 0x03E4}, {0x6a, 0x03E7}, {0x15, 0x03E6}, // 03e0 - 03e7
     {0x6a, 0x03E9}, {0x15, 0x03E8}, {0x6a, 0x03EB}, {0x15, 0x03EA}, {0x6a, 
0x03ED}, {0x15, 0x03EC}, {0x6a, 0x03EF}, {0x15, 0x03EE}, // 03e8 - 03ef
-    {0xf5, 0x0044}, {0xf5, 0x0046}, {0xf5, 0x0048}, {0x00, 0x0000}, {0x6a, 
0x03B8}, {0xf5, 0x004A}, {0x00, 0x0000}, {0x00, 0x0000}, // 03f0 - 03f7
+    {0xf5, 0x0044}, {0xf5, 0x0046}, {0x00, 0x0000}, {0x00, 0x0000}, {0x6a, 
0x03B8}, {0xf5, 0x004A}, {0x00, 0x0000}, {0x00, 0x0000}, // 03f0 - 03f7
     {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 
0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, // 03f8 - 03ff
 
     {0x6a, 0x0450}, {0x6a, 0x0451}, {0x6a, 0x0452}, {0x6a, 0x0453}, {0x6a, 
0x0454}, {0x6a, 0x0455}, {0x6a, 0x0456}, {0x6a, 0x0457}, // 0400 - 0407
@@ -761,8 +761,8 @@ Mapping const CaseMappingExtra[] = {
     {0x60, 1, {0x03BA, 0x0000, 0x0000}}, // 0x03f0 (69  0x0045)
     {0x15, 1, {0x03A1, 0x0000, 0x0000}}, // 0x03f1 (70  0x0046)
     {0x60, 1, {0x03C1, 0x0000, 0x0000}}, // 0x03f1 (71  0x0047)
-    {0x15, 1, {0x03A3, 0x0000, 0x0000}}, // 0x03f2 (72  0x0048)
-    {0x60, 1, {0x03C3, 0x0000, 0x0000}}, // 0x03f2 (73  0x0049)
+    {0x15, 1, {0x03A3, 0x0000, 0x0000}}, // 0x03f2 (72  0x0048) -- not hit, 
but keep slot (tdf#97152)
+    {0x60, 1, {0x03C3, 0x0000, 0x0000}}, // 0x03f2 (73  0x0049) -- not hit, 
but keep slot (tdf#97152)
     {0x15, 1, {0x0395, 0x0000, 0x0000}}, // 0x03f5 (74  0x004A)
     {0x60, 1, {0x03B5, 0x0000, 0x0000}}, // 0x03f5 (75  0x004B)
     {0x05, 2, {0x0535, 0x0552, 0x0000}}, // 0x0587 (76  0x004C)
commit 9eb88d78c8bc9e942814eb6fc4fe06a4e5736256
Author:     Khaled Hosny <kha...@libreoffice.org>
AuthorDate: Sun Jul 23 14:45:34 2023 +0300
Commit:     خالد حسني <kha...@libreoffice.org>
CommitDate: Mon Jul 24 20:20:07 2023 +0200

    tdf#96343, tdf#134766, tdf#97152: Fallback to ICU for case mapping
    
    If we are requested to case map a character not present in our case
    mapping data, fallback to ICU case mapping functions.
    
    We should switch completely to ICU at some point, but we need to
    evaluate our case mapping data and see if it differs from ICU and if
    there is a reason for it.
    
    Does not handle the case of U+03F2 turning into Sigma from tdf#97152.
    
    Change-Id: Icf13ac7aab6d07b2a90fc0ff5ef1c4f50c7a7f8c
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/154803
    Tested-by: Jenkins
    Reviewed-by: خالد حسني <kha...@libreoffice.org>

diff --git a/i18npool/qa/cppunit/test_characterclassification.cxx 
b/i18npool/qa/cppunit/test_characterclassification.cxx
index dc8b361fc53c..5b01f73bcf35 100644
--- a/i18npool/qa/cppunit/test_characterclassification.cxx
+++ b/i18npool/qa/cppunit/test_characterclassification.cxx
@@ -115,6 +115,108 @@ CPPUNIT_TEST_FIXTURE(TestCharacterClassification, 
testSigma)
     }
 }
 
+CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testTdf96343)
+{
+    {
+        // From upper case
+        OUString sTest(u"ꙊꙌꙖ");
+        OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", OUString(u"ꙋꙍꙗ"), 
sLowerCase);
+        OUString sUpperCase = m_xCC->toUpper(sLowerCase, 0, 
sLowerCase.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sTest, sUpperCase);
+    }
+
+    {
+        // From lower case
+        OUString sTest(u"ꙋꙍꙗ");
+        OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", OUString(u"Ꙋꙍꙗ"), 
sTitleCase);
+        OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", OUString(u"ꙊꙌꙖ"), 
sUpperCase);
+        OUString sLowerCase = m_xCC->toLower(sUpperCase, 0, 
sUpperCase.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", sTest, sLowerCase);
+    }
+
+    {
+        // From title case
+        OUString sTest(u"Ꙋꙍꙗ");
+        OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", OUString(u"Ꙋꙍꙗ"), 
sTitleCase);
+        OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", OUString(u"ꙊꙌꙖ"), 
sUpperCase);
+        OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", OUString(u"ꙋꙍꙗ"), 
sLowerCase);
+    }
+}
+
+CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testTdf134766)
+{
+    {
+        // From upper case
+        OUString sTest(u"QꞋORBꞋAL");
+        OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", OUString(u"qꞌorbꞌal"), 
sLowerCase);
+        OUString sUpperCase = m_xCC->toUpper(sLowerCase, 0, 
sLowerCase.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sTest, sUpperCase);
+    }
+
+    {
+        // From lower case
+        OUString sTest(u"qꞌorbꞌal");
+        OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", OUString(u"Qꞌorbꞌal"), 
sTitleCase);
+        OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", OUString(u"QꞋORBꞋAL"), 
sUpperCase);
+        OUString sLowerCase = m_xCC->toLower(sUpperCase, 0, 
sUpperCase.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", sTest, sLowerCase);
+    }
+
+    {
+        // From title case
+        OUString sTest(u"Qꞌorbꞌal");
+        OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", OUString(u"Qꞌorbꞌal"), 
sTitleCase);
+        OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", OUString(u"QꞋORBꞋAL"), 
sUpperCase);
+        OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", OUString(u"qꞌorbꞌal"), 
sLowerCase);
+    }
+}
+
+CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testTdf97152)
+{
+    {
+        // From upper case
+        OUString sTest(u"ͲͰϽϾϿͿϏϹ");
+        OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", OUString(u"ͳͱͻͼͽϳϗϲ"), 
sLowerCase);
+        //OUString sUpperCase = m_xCC->toUpper(sLowerCase, 0, 
sLowerCase.getLength(), {});
+        //CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sTest, sUpperCase);
+    }
+
+    {
+        // From lower case
+        OUString sTest(u"ͳͱͻͼͽϳϗϲ");
+        OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", OUString(u"Ͳͱͻͼͽϳϗϲ"), 
sTitleCase);
+        //OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), 
{});
+        //CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", 
OUString(u"ͲͰϽϾϿͿϏϹ"), sUpperCase);
+        //OUString sLowerCase = m_xCC->toLower(sUpperCase, 0, 
sUpperCase.getLength(), {});
+        //CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", sTest, sLowerCase);
+    }
+
+    {
+        // From title case
+        OUString sTest(u"Ͳͱͻͼͽϳϗϲ");
+        OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", OUString(u"Ͳͱͻͼͽϳϗϲ"), 
sTitleCase);
+        //OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), 
{});
+        //CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", 
OUString(u"ͲͰϽϾϿͿϏϹ"), sUpperCase);
+        OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {});
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", OUString(u"ͳͱͻͼͽϳϗϲ"), 
sLowerCase);
+    }
+}
+
 void TestCharacterClassification::setUp()
 {
     BootstrapFixtureBase::setUp();
diff --git a/i18nutil/source/utility/casefolding.cxx 
b/i18nutil/source/utility/casefolding.cxx
index 432de0bf1228..d4f79927c131 100644
--- a/i18nutil/source/utility/casefolding.cxx
+++ b/i18nutil/source/utility/casefolding.cxx
@@ -26,6 +26,8 @@
 #include <com/sun/star/uno/RuntimeException.hpp>
 #include <rtl/character.hxx>
 
+#include <unicode/uchar.h>
+
 using namespace com::sun::star::lang;
 using namespace com::sun::star::uno;
 
@@ -125,10 +127,45 @@ Mapping casefolding::getValue(const sal_Unicode* str, 
sal_Int32 pos, sal_Int32 l
                     // Should not come here
                     throw RuntimeException();
                 }
-            } else
+            }
+            else
+            {
                 dummy.map[0] = CaseMappingValue[address].value;
+                return dummy;
+            }
         }
     }
+
+    // If the code point is not supported by our case mapping tables,
+    // fallback to ICU functions.
+    // TODO: this does not handle special case mapping as these require
+    // using ustring.h APIs, which work on the whole string not character
+    // by character.
+    // TODO: what is the difference between ToLower and UpperToLower etc.?
+    sal_uInt32 value = 0;
+    switch (nMappingType)
+    {
+        case MappingType::ToLower:
+        case MappingType::UpperToLower:
+            value = u_tolower(c);
+            break;
+        case MappingType::ToUpper:
+        case MappingType::LowerToUpper:
+            value = u_toupper(c);
+            break;
+        case MappingType::ToTitle:
+            value = u_totitle(c);
+            break;
+        case MappingType::SimpleFolding:
+        case MappingType::FullFolding:
+            value = u_foldCase(c, U_FOLD_CASE_DEFAULT);
+            break;
+        default: break;
+    }
+
+    if (value && value != c)
+        dummy.nmap = rtl::splitSurrogates(value, dummy.map);
+
     return dummy;
 }
 

Reply via email to