[Libreoffice-commits] .: Branch 'libreoffice-4-0' - i18npool/inc i18npool/source

2012-12-21 Thread Libreoffice Gerrit user
 i18npool/inc/breakiterator_unicode.hxx  |7 -
 i18npool/source/breakiterator/breakiterator_unicode.cxx |   80 +++-
 2 files changed, 41 insertions(+), 46 deletions(-)

New commits:
commit dd0af402771c3e7fada4fd8dc69fa12066c6766e
Author: Michael Meeks michael.me...@suse.com
Date:   Thu Dec 20 23:04:15 2012 +

fdo#58590 - cleanup and accelerate break-iterators.

Doing word-count by switching per-word between two different
kinds of word iterator was insanely slow. This preserves an
ICU break-iterator for each type of word-breaking.

diff --git a/i18npool/inc/breakiterator_unicode.hxx 
b/i18npool/inc/breakiterator_unicode.hxx
index 26046ea..fe226d4 100644
--- a/i18npool/inc/breakiterator_unicode.hxx
+++ b/i18npool/inc/breakiterator_unicode.hxx
@@ -80,6 +80,7 @@ protected:
 rtl::OUString aICUText;
 UText *ut;
 icu::BreakIterator *aBreakIterator;
+com::sun::star::lang::Locale maLocale;
 
 BI_Data()
 : ut(NULL)
@@ -91,10 +92,10 @@ protected:
 utext_close(ut);
 }
 
-} character, word, sentence, line, *icuBI;
+} character, sentence, line, *icuBI;
+BI_Data words[4]; // 4 is css::i18n::WordType enumeration size
 
-com::sun::star::lang::Locale aLocale;
-sal_Int16 aBreakType, aWordType;
+sal_Int16 aBreakType;
 
 void SAL_CALL loadICUBreakIterator(const com::sun::star::lang::Locale 
rLocale,
 sal_Int16 rBreakType, sal_Int16 rWordType, const sal_Char* name, const 
rtl::OUString rText) throw(com::sun::star::uno::RuntimeException);
diff --git a/i18npool/source/breakiterator/breakiterator_unicode.cxx 
b/i18npool/source/breakiterator/breakiterator_unicode.cxx
index 242cfa6..77ca831 100644
--- a/i18npool/source/breakiterator/breakiterator_unicode.cxx
+++ b/i18npool/source/breakiterator/breakiterator_unicode.cxx
@@ -44,29 +44,17 @@ BreakIterator_Unicode::BreakIterator_Unicode() :
 cBreakIterator( com.sun.star.i18n.BreakIterator_Unicode ),// 
implementation name
 wordRule( word ),
 lineRule( line ),
-result(),
-character(),
-word(),
-sentence(),
-line(),
-icuBI( NULL ),
-aLocale(),
-aBreakType(),
-aWordType()
+icuBI( NULL )
 {
 }
 
-
 BreakIterator_Unicode::~BreakIterator_Unicode()
 {
-if (icuBI  icuBI-aBreakIterator) {
-delete icuBI-aBreakIterator;
-icuBI-aBreakIterator=NULL;
-}
-if (character.aBreakIterator) delete character.aBreakIterator;
-if (word.aBreakIterator) delete word.aBreakIterator;
-if (sentence.aBreakIterator) delete sentence.aBreakIterator;
-if (line.aBreakIterator) delete line.aBreakIterator;
+delete character.aBreakIterator;
+delete sentence.aBreakIterator;
+delete line.aBreakIterator;
+for (size_t i = 0; i  SAL_N_ELEMENTS(words); i++)
+delete words[i].aBreakIterator;
 }
 
 /*
@@ -86,26 +74,34 @@ class OOoRuleBasedBreakIterator : public 
RuleBasedBreakIterator {
 
 // loading ICU breakiterator on demand.
 void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const 
com::sun::star::lang::Locale rLocale,
-sal_Int16 rBreakType, sal_Int16 rWordType, const sal_Char *rule, const 
OUString rText) throw(uno::RuntimeException)
+sal_Int16 rBreakType, sal_Int16 nWordType, const sal_Char *rule, const 
OUString rText) throw(uno::RuntimeException)
 {
 sal_Bool newBreak = sal_False;
 UErrorCode status = U_ZERO_ERROR;
 sal_Int16 breakType = 0;
 switch (rBreakType) {
 case LOAD_CHARACTER_BREAKITERATOR: icuBI=character; breakType = 3; 
break;
-case LOAD_WORD_BREAKITERATOR: icuBI=word;
-switch (rWordType) {
-case WordType::ANYWORD_IGNOREWHITESPACES: breakType = 0; 
rule=wordRule = edit_word; break;
-case WordType::DICTIONARY_WORD: breakType = 1; rule=wordRule = 
dict_word; break;
-case WordType::WORD_COUNT: breakType = 2; rule=wordRule = 
count_word; break;
+case LOAD_WORD_BREAKITERATOR:
+assert (nWordType = 0  nWordType= WordType::WORD_COUNT);
+icuBI=words[nWordType];
+switch (nWordType) {
+case WordType::ANY_WORD: break; // odd but previous behavior
+case WordType::ANYWORD_IGNOREWHITESPACES:
+breakType = 0; rule = wordRule = edit_word; break;
+case WordType::DICTIONARY_WORD:
+breakType = 1; rule = wordRule = dict_word; break;
+default:
+case WordType::WORD_COUNT:
+breakType = 2; rule = wordRule = count_word; break;
 }
 break;
 case LOAD_SENTENCE_BREAKITERATOR: icuBI=sentence; breakType = 5; 
break;
 case LOAD_LINE_BREAKITERATOR: icuBI=line; breakType = 4; break;
 }
-if (!icuBI-aBreakIterator || rWordType != aWordType ||
-rLocale.Language != aLocale.Language || 

[Libreoffice-commits] .: Branch 'libreoffice-4-0' - i18npool/inc i18npool/source

2012-12-13 Thread Libreoffice Gerrit user
 i18npool/inc/i18npool/lang.h|2 ++
 i18npool/source/isolang/isolang.cxx |2 ++
 2 files changed, 4 insertions(+)

New commits:
commit ebcc02052d6fdfad59cdbcb4f18df0c007033999
Author: Eike Rathke er...@redhat.com
Date:   Thu Dec 13 13:45:14 2012 +0100

introduced [mul] multiple and [und] undetermined language codes

Various places use an empty locale or string to indicate different
meanings and/or variants of this is not a real locale but I didn't know
what to use else or abuse LANGUAGE_DONTKNOW in one or the other way. In
preparation of changing that awkward situation now offer the ISO 639
codes for multiple languages [mul] and language undetermined [und],
mapping to LANGUAGE_MULTIPLE and LANGUAGE_UNDETERMINED.

Change-Id: I687de23ffc00a9a056f2837b024e0a62658e3df2
(cherry picked from commit 09bc1464ec9dde61e69cca393e65e72143bdd383)

diff --git a/i18npool/inc/i18npool/lang.h b/i18npool/inc/i18npool/lang.h
index cf4ec6b..932f92b 100644
--- a/i18npool/inc/i18npool/lang.h
+++ b/i18npool/inc/i18npool/lang.h
@@ -566,6 +566,8 @@ typedef unsigned short LanguageType;
 #define LANGUAGE_USER_YOMBE 0x0684
 #define LANGUAGE_USER_YOMBE_CONGO   0x8284  /* makeLangID( 0x20, 
getPrimaryLanguage( LANGUAGE_USER_YOMBE)) */
 #define LANGUAGE_USER_SIDAMA0x0685
+#define LANGUAGE_MULTIPLE   0xFFEF  /* multiple languages, 
primary 0x3ef, sub 0x3f */
+#define LANGUAGE_UNDETERMINED   0xFFF0  /* undetermined language, 
primary 0x3f0, sub 0x3f */
 #define LANGUAGE_USER_SYSTEM_CONFIG 0xFFFE  /* not a locale, to be 
used only in configuration context to obtain system default, primary 0x3fe, sub 
0x3f */
 
 #endif /* INCLUDED_I18NPOOL_LANG_H */
diff --git a/i18npool/source/isolang/isolang.cxx 
b/i18npool/source/isolang/isolang.cxx
index 439590a..bbe3e0f 100644
--- a/i18npool/source/isolang/isolang.cxx
+++ b/i18npool/source/isolang/isolang.cxx
@@ -511,6 +511,8 @@ static MsLangId::IsoLangEntry const aImplIsoLangEntries[] =
 { LANGUAGE_USER_YOMBE, yom, CD },
 { LANGUAGE_USER_YOMBE_CONGO,   yom, CG },
 { LANGUAGE_USER_SIDAMA,sid, ET },
+{ LANGUAGE_MULTIPLE,   mul,},   // multiple 
languages, many languages are used
+{ LANGUAGE_UNDETERMINED,   und,},   // undetermined 
language, language cannot be identified
 { LANGUAGE_NONE,   zxx,},   // added to ISO 
639-2 on 2006-01-11: Used to declare the absence of linguistic information
 { LANGUAGE_DONTKNOW,,  }// marks end of 
table
 };
___
Libreoffice-commits mailing list
libreoffice-comm...@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits