i18npool/qa/cppunit/test_breakiterator.cxx | 88 +++-------------------------- i18npool/source/localedata/data/da_DK.xml | 7 ++ 2 files changed, 18 insertions(+), 77 deletions(-)
New commits: commit f23c1baa4646957ad8a7060376638935a5e87889 Author: Jonathan Clark <[email protected]> AuthorDate: Thu Nov 28 11:08:20 2024 -0700 Commit: Jonathan Clark <[email protected]> CommitDate: Fri Nov 29 22:09:00 2024 +0100 tdf#164062 i18npool: Added da_DK as a prepostdash language Previously, Danish words ending with hyphens were not handled correctly during spell checking. This change fixes the issue by modifying the da_DK locale to reference the existing breakiterator rules for languages with word-final hyphens. Change-Id: I9b08fb5246b5b846814dc3b4b9ba249b29fc7a84 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/177504 Tested-by: Jenkins Reviewed-by: Jonathan Clark <[email protected]> diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx index 9d9712e54e71..24666ca4ac80 100644 --- a/i18npool/qa/cppunit/test_breakiterator.cxx +++ b/i18npool/qa/cppunit/test_breakiterator.cxx @@ -44,11 +44,8 @@ public: void testJapanese(); void testChinese(); - void testLegacyDictWordPrepostDash_de_DE(); - void testLegacyDictWordPrepostDash_nds_DE(); - void testLegacyDictWordPrepostDash_nl_NL(); - void testLegacyDictWordPrepostDash_sv_SE(); void testDictWordAbbreviation(); + void testDictWordPrepostDash(); void testHebrewGereshGershaim(); void testLegacySurrogatePairs(); void testWordCount(); @@ -68,11 +65,8 @@ public: #endif CPPUNIT_TEST(testJapanese); CPPUNIT_TEST(testChinese); - CPPUNIT_TEST(testLegacyDictWordPrepostDash_de_DE); - CPPUNIT_TEST(testLegacyDictWordPrepostDash_nds_DE); - CPPUNIT_TEST(testLegacyDictWordPrepostDash_nl_NL); - CPPUNIT_TEST(testLegacyDictWordPrepostDash_sv_SE); CPPUNIT_TEST(testDictWordAbbreviation); + CPPUNIT_TEST(testDictWordPrepostDash); CPPUNIT_TEST(testHebrewGereshGershaim); CPPUNIT_TEST(testLegacySurrogatePairs); CPPUNIT_TEST(testWordCount); @@ -1647,22 +1641,25 @@ void TestBreakIterator::testChinese() } } -void TestBreakIterator::testLegacyDictWordPrepostDash_de_DE() +void TestBreakIterator::testDictWordPrepostDash() { - lang::Locale aLocale; - aLocale.Language = "de"; - aLocale.Country = "DE"; + std::vector<lang::Locale> aLocale{ { "de", "DE", "" }, + { "nds", "DE", "" }, + { "nl", "NL", "" }, + { "sv", "SE", "" }, + { "da", "DK", "" } }; + for (const auto& rLocale : aLocale) { auto aTest = u"Arbeits- -nehmer"_ustr; i18n::Boundary aBounds - = m_xBreak->getWordBoundary(aTest, 3, aLocale, i18n::WordType::DICTIONARY_WORD, false); + = m_xBreak->getWordBoundary(aTest, 3, rLocale, i18n::WordType::DICTIONARY_WORD, false); CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); CPPUNIT_ASSERT_EQUAL(sal_Int32(8), aBounds.endPos); aBounds - = m_xBreak->getWordBoundary(aTest, 13, aLocale, i18n::WordType::DICTIONARY_WORD, false); + = m_xBreak->getWordBoundary(aTest, 13, rLocale, i18n::WordType::DICTIONARY_WORD, false); CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.startPos); CPPUNIT_ASSERT_EQUAL(sal_Int32(16), aBounds.endPos); } @@ -1711,69 +1708,6 @@ void TestBreakIterator::testDictWordAbbreviation() } } -void TestBreakIterator::testLegacyDictWordPrepostDash_nds_DE() -{ - lang::Locale aLocale; - aLocale.Language = "nds"; - aLocale.Country = "DE"; - - { - auto aTest = u"Arbeits- -nehmer"_ustr; - - i18n::Boundary aBounds - = m_xBreak->getWordBoundary(aTest, 3, aLocale, i18n::WordType::DICTIONARY_WORD, false); - CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); - CPPUNIT_ASSERT_EQUAL(sal_Int32(8), aBounds.endPos); - - aBounds - = m_xBreak->getWordBoundary(aTest, 13, aLocale, i18n::WordType::DICTIONARY_WORD, false); - CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.startPos); - CPPUNIT_ASSERT_EQUAL(sal_Int32(16), aBounds.endPos); - } -} - -void TestBreakIterator::testLegacyDictWordPrepostDash_nl_NL() -{ - lang::Locale aLocale; - aLocale.Language = "nl"; - aLocale.Country = "NL"; - - { - auto aTest = u"Arbeits- -nehmer"_ustr; - - i18n::Boundary aBounds - = m_xBreak->getWordBoundary(aTest, 3, aLocale, i18n::WordType::DICTIONARY_WORD, false); - CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); - CPPUNIT_ASSERT_EQUAL(sal_Int32(8), aBounds.endPos); - - aBounds - = m_xBreak->getWordBoundary(aTest, 13, aLocale, i18n::WordType::DICTIONARY_WORD, false); - CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.startPos); - CPPUNIT_ASSERT_EQUAL(sal_Int32(16), aBounds.endPos); - } -} - -void TestBreakIterator::testLegacyDictWordPrepostDash_sv_SE() -{ - lang::Locale aLocale; - aLocale.Language = "sv"; - aLocale.Country = "SE"; - - { - auto aTest = u"Arbeits- -nehmer"_ustr; - - i18n::Boundary aBounds - = m_xBreak->getWordBoundary(aTest, 3, aLocale, i18n::WordType::DICTIONARY_WORD, false); - CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); - CPPUNIT_ASSERT_EQUAL(sal_Int32(8), aBounds.endPos); - - aBounds - = m_xBreak->getWordBoundary(aTest, 13, aLocale, i18n::WordType::DICTIONARY_WORD, false); - CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.startPos); - CPPUNIT_ASSERT_EQUAL(sal_Int32(16), aBounds.endPos); - } -} - void TestBreakIterator::testHebrewGereshGershaim() { // In Hebrew documents, there are multiple valid ways to represent the geresh and gershaim diff --git a/i18npool/source/localedata/data/da_DK.xml b/i18npool/source/localedata/data/da_DK.xml index 05e2278857da..ae76f1e9fd38 100644 --- a/i18npool/source/localedata/data/da_DK.xml +++ b/i18npool/source/localedata/data/da_DK.xml @@ -367,6 +367,13 @@ </LC_CURRENCY> <LC_TRANSLITERATION ref="en_US"/> <LC_MISC> + <BreakIteratorRules> + <EditMode/> + <DictionaryMode>dict_word_prepostdash</DictionaryMode> + <WordCountMode/> + <CharacterMode/> + <LineMode/> + </BreakIteratorRules> <ReservedWords> <trueWord>sand</trueWord> <falseWord>falsk</falseWord>
