i18nlangtag/Library_i18nlangtag.mk | 1 i18nlangtag/source/languagetag/languagetag.cxx | 91 +++++++++++++++++-------- 2 files changed, 64 insertions(+), 28 deletions(-)
New commits: commit 7c032aa60eaccde88a9064a80bb69fe8076a040b Author: Eike Rathke <[email protected]> Date: Tue Sep 3 14:17:54 2013 +0200 resolve all known fallbacks Needed for rsc during build time to not pull in liblangtag and its data. Change-Id: I1d4dd32b04ed93ec75720132a30b66ef63fec179 diff --git a/i18nlangtag/Library_i18nlangtag.mk b/i18nlangtag/Library_i18nlangtag.mk index fd1548b..30b6e01 100644 --- a/i18nlangtag/Library_i18nlangtag.mk +++ b/i18nlangtag/Library_i18nlangtag.mk @@ -21,6 +21,7 @@ $(eval $(call gb_Library_use_libraries,i18nlangtag,\ )) $(eval $(call gb_Library_use_externals,i18nlangtag,\ + boost_headers \ icu_headers \ icuuc \ )) diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx index 65236ce..de48d06 100644 --- a/i18nlangtag/source/languagetag/languagetag.cxx +++ b/i18nlangtag/source/languagetag/languagetag.cxx @@ -16,6 +16,7 @@ #include <osl/file.hxx> #include <rtl/instance.hxx> #include <rtl/locale.h> +#include <boost/unordered_set.hpp> //#define erDEBUG @@ -46,13 +47,44 @@ struct myLtError ~myLtError() { if (p) lt_error_unref( p); } }; - // "statics" to be returned as const reference to an empty locale and string. namespace { struct theEmptyLocale : public rtl::Static< lang::Locale, theEmptyLocale > {}; struct theEmptyBcp47 : public rtl::Static< OUString, theEmptyBcp47 > {}; } +typedef ::boost::unordered_set< OUString, OUStringHash > KnownTagSet; +namespace { +struct theKnowns : public rtl::Static< KnownTagSet, theKnowns > {}; +struct theMutex : public rtl::Static< osl::Mutex, theMutex > {}; +} + +static const KnownTagSet & getKnowns() +{ + KnownTagSet & rKnowns = theKnowns::get(); + if (rKnowns.empty()) + { + osl::MutexGuard aGuard( theMutex::get()); + if (rKnowns.empty()) + { + ::std::vector< MsLangId::LanguagetagMapping > aDefined( MsLangId::getDefinedLanguagetags()); + for (::std::vector< MsLangId::LanguagetagMapping >::const_iterator it( aDefined.begin()); + it != aDefined.end(); ++it) + { + // Do not use the BCP47 string here to initialize the + // LanguageTag because then canonicalize() would call this + // getKnowns() again.. + ::std::vector< OUString > aFallbacks( LanguageTag( (*it).mnLang).getFallbackStrings()); + for (::std::vector< OUString >::const_iterator fb( aFallbacks.begin()); fb != aFallbacks.end(); ++fb) + { + rKnowns.insert( *fb); + } + } + } + } + return rKnowns; +} + /** A reference holder for liblangtag data de/initialization, one static instance. Currently implemented such that the first "ref" inits and dtor @@ -448,7 +480,7 @@ bool LanguageTag::canonicalize() // Now this is getting funny.. we only have some BCP47 string // and want to determine if parsing it would be possible // without using liblangtag just to see if it is a simple known - // locale. + // locale or could fall back to one. OUString aLanguage, aScript, aCountry, aVariants; Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aVariants); if (eExt != EXTRACTED_NONE) @@ -512,6 +544,12 @@ bool LanguageTag::canonicalize() } if (mnLangID != LANGUAGE_DONTKNOW && mnLangID != LANGUAGE_SYSTEM) meIsLiblangtagNeeded = DECISION_NO; // known locale + else + { + const KnownTagSet& rKnowns = getKnowns(); + if (rKnowns.find( maBcp47) != rKnowns.end()) + meIsLiblangtagNeeded = DECISION_NO; // known fallback + } } } if (bTemporaryLocale) commit c318f19c492f76e3b7d557257b3706f05b6fed62 Author: Eike Rathke <[email protected]> Date: Tue Sep 3 11:31:39 2013 +0200 simplified simpleExtract() and made some cases work Change-Id: I5d39a020a4bb6164c0d7695d2f2e43785869b345 diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx index 22f82fe..65236ce 100644 --- a/i18nlangtag/source/languagetag/languagetag.cxx +++ b/i18nlangtag/source/languagetag/languagetag.cxx @@ -1302,9 +1302,9 @@ LanguageTag::Extraction LanguageTag::simpleExtract( const OUString& rBcp47, Extraction eRet = EXTRACTED_NONE; const sal_Int32 nLen = rBcp47.getLength(); const sal_Int32 nHyph1 = rBcp47.indexOf( '-'); - const sal_Int32 nHyph2 = (nHyph1 < 0 ? -1 : rBcp47.indexOf( '-', nHyph1 + 1)); - const sal_Int32 nHyph3 = (nHyph2 < 0 ? -1 : rBcp47.indexOf( '-', nHyph2 + 1)); - const sal_Int32 nHyph4 = (nHyph3 < 0 ? -1 : rBcp47.indexOf( '-', nHyph3 + 1)); + sal_Int32 nHyph2 = (nHyph1 < 0 ? -1 : rBcp47.indexOf( '-', nHyph1 + 1)); + sal_Int32 nHyph3 = (nHyph2 < 0 ? -1 : rBcp47.indexOf( '-', nHyph2 + 1)); + sal_Int32 nHyph4 = (nHyph3 < 0 ? -1 : rBcp47.indexOf( '-', nHyph3 + 1)); if (nLen == 1 && rBcp47[0] == '*') // * the dreaded jolly joker { // It's f*d up but we need to recognize this. @@ -1362,45 +1362,42 @@ LanguageTag::Extraction LanguageTag::simpleExtract( const OUString& rBcp47, else if ( (nHyph1 == 2 && nHyph2 == 7 && nHyph3 == 10 && nLen >= 15) // ll-Ssss-CC-vvvv[vvvv][-...] || (nHyph1 == 3 && nHyph2 == 8 && nHyph3 == 11 && nLen >= 16)) // lll-Ssss-CC-vvvv[vvvv][-...] { - if (nHyph4 < 0 || (nHyph4 - nHyph3 > 4 && nHyph4 - nHyph3 <= 9)) + if (nHyph4 < 0) + nHyph4 = rBcp47.getLength(); + if (nHyph4 - nHyph3 > 4 && nHyph4 - nHyph3 <= 9) { + rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase(); + rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase(); + rCountry = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase(); rVariants = rBcp47.copy( nHyph3 + 1); - if (nHyph4 < 0 && (rVariants.getLength() < 4 || 8 < rVariants.getLength())) - { - rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase(); - rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase(); - rCountry = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase(); - eRet = EXTRACTED_LV; - } + eRet = EXTRACTED_LV; } } else if ( (nHyph1 == 2 && nHyph2 == 5 && nLen >= 10) // ll-CC-vvvv[vvvv][-...] || (nHyph1 == 3 && nHyph2 == 6 && nLen >= 11)) // lll-CC-vvvv[vvvv][-...] { - if (nHyph3 < 0 || (nHyph3 - nHyph2 > 4 && nHyph3 - nHyph2 <= 9)) + if (nHyph3 < 0) + nHyph3 = rBcp47.getLength(); + if (nHyph3 - nHyph2 > 4 && nHyph3 - nHyph2 <= 9) { + rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase(); + rScript = OUString(); + rCountry = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase(); rVariants = rBcp47.copy( nHyph2 + 1); - if (nHyph3 < 0 && (rVariants.getLength() < 4 || 8 < rVariants.getLength())) - { - rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase(); - rCountry = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase(); - rScript = OUString(); - eRet = EXTRACTED_LV; - } + eRet = EXTRACTED_LV; } } else if ( (nHyph1 == 2 && nLen >= 8) // ll-vvvvv[vvv][-...] || (nHyph1 == 3 && nLen >= 9)) // lll-vvvvv[vvv][-...] { - if (nHyph2 < 0 || (nHyph2 - nHyph1 > 5 && nHyph2 - nHyph1 <= 9)) + if (nHyph2 < 0) + nHyph2 = rBcp47.getLength(); + if (nHyph2 - nHyph1 > 5 && nHyph2 - nHyph1 <= 9) { + rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase(); + rScript = rCountry = OUString(); rVariants = rBcp47.copy( nHyph1 + 1); - if (nHyph2 < 0 && (rVariants.getLength() < 5 || 8 < rVariants.getLength())) - { - rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase(); - rScript = rCountry = OUString(); - eRet = EXTRACTED_LV; - } + eRet = EXTRACTED_LV; } } if (eRet == EXTRACTED_NONE) _______________________________________________ Libreoffice-commits mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits
