i18npool/inc/breakiterator_unicode.hxx |7 -
i18npool/source/breakiterator/breakiterator_unicode.cxx | 80 +++-
2 files changed, 41 insertions(+), 46 deletions(-)
New commits:
commit dd0af402771c3e7fada4fd8dc69fa12066c6766e
Author: Michael Meeks michael.me...@suse.com
Date: Thu Dec 20 23:04:15 2012 +
fdo#58590 - cleanup and accelerate break-iterators.
Doing word-count by switching per-word between two different
kinds of word iterator was insanely slow. This preserves an
ICU break-iterator for each type of word-breaking.
diff --git a/i18npool/inc/breakiterator_unicode.hxx
b/i18npool/inc/breakiterator_unicode.hxx
index 26046ea..fe226d4 100644
--- a/i18npool/inc/breakiterator_unicode.hxx
+++ b/i18npool/inc/breakiterator_unicode.hxx
@@ -80,6 +80,7 @@ protected:
rtl::OUString aICUText;
UText *ut;
icu::BreakIterator *aBreakIterator;
+com::sun::star::lang::Locale maLocale;
BI_Data()
: ut(NULL)
@@ -91,10 +92,10 @@ protected:
utext_close(ut);
}
-} character, word, sentence, line, *icuBI;
+} character, sentence, line, *icuBI;
+BI_Data words[4]; // 4 is css::i18n::WordType enumeration size
-com::sun::star::lang::Locale aLocale;
-sal_Int16 aBreakType, aWordType;
+sal_Int16 aBreakType;
void SAL_CALL loadICUBreakIterator(const com::sun::star::lang::Locale
rLocale,
sal_Int16 rBreakType, sal_Int16 rWordType, const sal_Char* name, const
rtl::OUString rText) throw(com::sun::star::uno::RuntimeException);
diff --git a/i18npool/source/breakiterator/breakiterator_unicode.cxx
b/i18npool/source/breakiterator/breakiterator_unicode.cxx
index 242cfa6..77ca831 100644
--- a/i18npool/source/breakiterator/breakiterator_unicode.cxx
+++ b/i18npool/source/breakiterator/breakiterator_unicode.cxx
@@ -44,29 +44,17 @@ BreakIterator_Unicode::BreakIterator_Unicode() :
cBreakIterator( com.sun.star.i18n.BreakIterator_Unicode ),//
implementation name
wordRule( word ),
lineRule( line ),
-result(),
-character(),
-word(),
-sentence(),
-line(),
-icuBI( NULL ),
-aLocale(),
-aBreakType(),
-aWordType()
+icuBI( NULL )
{
}
-
BreakIterator_Unicode::~BreakIterator_Unicode()
{
-if (icuBI icuBI-aBreakIterator) {
-delete icuBI-aBreakIterator;
-icuBI-aBreakIterator=NULL;
-}
-if (character.aBreakIterator) delete character.aBreakIterator;
-if (word.aBreakIterator) delete word.aBreakIterator;
-if (sentence.aBreakIterator) delete sentence.aBreakIterator;
-if (line.aBreakIterator) delete line.aBreakIterator;
+delete character.aBreakIterator;
+delete sentence.aBreakIterator;
+delete line.aBreakIterator;
+for (size_t i = 0; i SAL_N_ELEMENTS(words); i++)
+delete words[i].aBreakIterator;
}
/*
@@ -86,26 +74,34 @@ class OOoRuleBasedBreakIterator : public
RuleBasedBreakIterator {
// loading ICU breakiterator on demand.
void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const
com::sun::star::lang::Locale rLocale,
-sal_Int16 rBreakType, sal_Int16 rWordType, const sal_Char *rule, const
OUString rText) throw(uno::RuntimeException)
+sal_Int16 rBreakType, sal_Int16 nWordType, const sal_Char *rule, const
OUString rText) throw(uno::RuntimeException)
{
sal_Bool newBreak = sal_False;
UErrorCode status = U_ZERO_ERROR;
sal_Int16 breakType = 0;
switch (rBreakType) {
case LOAD_CHARACTER_BREAKITERATOR: icuBI=character; breakType = 3;
break;
-case LOAD_WORD_BREAKITERATOR: icuBI=word;
-switch (rWordType) {
-case WordType::ANYWORD_IGNOREWHITESPACES: breakType = 0;
rule=wordRule = edit_word; break;
-case WordType::DICTIONARY_WORD: breakType = 1; rule=wordRule =
dict_word; break;
-case WordType::WORD_COUNT: breakType = 2; rule=wordRule =
count_word; break;
+case LOAD_WORD_BREAKITERATOR:
+assert (nWordType = 0 nWordType= WordType::WORD_COUNT);
+icuBI=words[nWordType];
+switch (nWordType) {
+case WordType::ANY_WORD: break; // odd but previous behavior
+case WordType::ANYWORD_IGNOREWHITESPACES:
+breakType = 0; rule = wordRule = edit_word; break;
+case WordType::DICTIONARY_WORD:
+breakType = 1; rule = wordRule = dict_word; break;
+default:
+case WordType::WORD_COUNT:
+breakType = 2; rule = wordRule = count_word; break;
}
break;
case LOAD_SENTENCE_BREAKITERATOR: icuBI=sentence; breakType = 5;
break;
case LOAD_LINE_BREAKITERATOR: icuBI=line; breakType = 4; break;
}
-if (!icuBI-aBreakIterator || rWordType != aWordType ||
-rLocale.Language != aLocale.Language ||