Author: hdu
Date: Mon Jan 23 13:14:56 2012
New Revision: 1234777

URL: http://svn.apache.org/viewvc?rev=1234777&view=rev
Log:
emulate word boundary matching of old regex engine

The new ICU regex engine has much improved unicode capabilities.
The old regex engine had the extensions \< and \> for matching word boundaries.
For the convenience of a smooth upgrade experience these artifacts now get 
mapped to \b which is supported by almost all regex engines.

Modified:
    incubator/ooo/trunk/main/i18npool/source/search/textsearch.cxx
    incubator/ooo/trunk/main/i18npool/source/search/textsearch.hxx

Modified: incubator/ooo/trunk/main/i18npool/source/search/textsearch.cxx
URL: 
http://svn.apache.org/viewvc/incubator/ooo/trunk/main/i18npool/source/search/textsearch.cxx?rev=1234777&r1=1234776&r2=1234777&view=diff
==============================================================================
--- incubator/ooo/trunk/main/i18npool/source/search/textsearch.cxx (original)
+++ incubator/ooo/trunk/main/i18npool/source/search/textsearch.cxx Mon Jan 23 
13:14:56 2012
@@ -176,27 +176,8 @@ void TextSearch::setOptions( const Searc
                case SearchAlgorithms_REGEXP:
                        fnForward = &TextSearch::RESrchFrwrd;
                        fnBackward = &TextSearch::RESrchBkwrd;
-
-                       {
-                       sal_uInt32 nIcuSearchFlags = 0;
-                       // map com::sun::star::util::SearchFlags to ICU 
uregex.h flags
-                       // TODO: REG_EXTENDED, REG_NOT_BEGINOFLINE, 
REG_NOT_ENDOFLINE
-                       // REG_NEWLINE is neither defined properly nor used 
anywhere => not implemented
-                       // REG_NOSUB is not used anywhere => not implemented
-                       // NORM_WORD_ONLY is only used for 
SearchAlgorithm==Absolute
-                       // LEV_RELAXED is only used for 
SearchAlgorithm==Approximate
-                       // why is even ALL_IGNORE_CASE deprecated in UNO? 
because of transliteration taking care of it???
-                       if( (aSrchPara.searchFlag & 
com::sun::star::util::SearchFlags::ALL_IGNORE_CASE) != 0)
-                               nIcuSearchFlags |= UREGEX_CASE_INSENSITIVE;
-                       UErrorCode nIcuErr = U_ZERO_ERROR;
-                       // assumption: transliteration doesn't mangle regexp 
control chars
-                       OUString& rPatternStr = (aSrchPara.transliterateFlags & 
SIMPLE_TRANS_MASK) ? sSrchStr
-                                       : ((aSrchPara.transliterateFlags & 
COMPLEX_TRANS_MASK) ? sSrchStr2 : aSrchPara.searchString);
-                       const IcuUniString aIcuSearchPatStr( 
rPatternStr.getStr(), rPatternStr.getLength());
-                       pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, 
nIcuSearchFlags, nIcuErr);
-                       if( nIcuErr)
-                               { delete pRegexMatcher; pRegexMatcher = NULL;}
-                       } break;
+                       RESrchPrepare( aSrchPara);
+                       break;
 
                case SearchAlgorithms_APPROXIMATE:
             fnForward = &TextSearch::ApproxSrchFrwrd;
@@ -720,6 +701,41 @@ SearchResult TextSearch::NSrchBkwrd( con
     return aRet;
 }
 
+void TextSearch::RESrchPrepare( const ::com::sun::star::util::SearchOptions& 
rOptions)
+{
+       // select the transliterated pattern string
+       const OUString& rPatternStr = 
+               (rOptions.transliterateFlags & SIMPLE_TRANS_MASK) ? sSrchStr
+               : ((rOptions.transliterateFlags & COMPLEX_TRANS_MASK) ? 
sSrchStr2 : rOptions.searchString);
+
+       sal_uInt32 nIcuSearchFlags = 0;
+       // map com::sun::star::util::SearchFlags to ICU uregex.h flags
+       // TODO: REG_EXTENDED, REG_NOT_BEGINOFLINE, REG_NOT_ENDOFLINE
+       // REG_NEWLINE is neither properly defined nor used anywhere => not 
implemented
+       // REG_NOSUB is not used anywhere => not implemented
+       // NORM_WORD_ONLY is only used for SearchAlgorithm==Absolute
+       // LEV_RELAXED is only used for SearchAlgorithm==Approximate
+       // why is even ALL_IGNORE_CASE deprecated in UNO? because of 
transliteration taking care of it???
+       if( (rOptions.searchFlag & 
com::sun::star::util::SearchFlags::ALL_IGNORE_CASE) != 0)
+               nIcuSearchFlags |= UREGEX_CASE_INSENSITIVE;
+       UErrorCode nIcuErr = U_ZERO_ERROR;
+       // assumption: transliteration didn't mangle regexp control chars
+       IcuUniString aIcuSearchPatStr( rPatternStr.getStr(), 
rPatternStr.getLength());
+#if 1
+       // for conveniance specific syntax elements of the old regex engine are 
emulated
+       // by using regular word boundary matching \b to replace \< and \>
+       static const IcuUniString aChevronPattern( "\\<|\\>", -1, 
IcuUniString::kInvariant);
+       static const IcuUniString aChevronReplace( "\\b", -1, 
IcuUniString::kInvariant);
+       static RegexMatcher aChevronMatcher( aChevronPattern, 0, nIcuErr);
+       aChevronMatcher.reset( aIcuSearchPatStr);
+       aIcuSearchPatStr = aChevronMatcher.replaceAll( aChevronReplace, 
nIcuErr);
+       aChevronMatcher.reset();
+#endif
+       pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, 
nIcuErr);
+       if( nIcuErr)
+               { delete pRegexMatcher; pRegexMatcher = NULL;}
+}
+
 //---------------------------------------------------------------------------
 
 SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr,

Modified: incubator/ooo/trunk/main/i18npool/source/search/textsearch.hxx
URL: 
http://svn.apache.org/viewvc/incubator/ooo/trunk/main/i18npool/source/search/textsearch.hxx?rev=1234777&r1=1234776&r2=1234777&view=diff
==============================================================================
--- incubator/ooo/trunk/main/i18npool/source/search/textsearch.hxx (original)
+++ incubator/ooo/trunk/main/i18npool/source/search/textsearch.hxx Mon Jan 23 
13:14:56 2012
@@ -101,6 +101,7 @@ class TextSearch: public cppu::WeakImplH
                RESrchBkwrd( const ::rtl::OUString& searchStr,
                                                                sal_Int32 
startPos, sal_Int32 endPos )
                                                        
throw(::com::sun::star::uno::RuntimeException);
+       void RESrchPrepare( const ::com::sun::star::util::SearchOptions&);
 
        // Members and methods for the "Weight Levenshtein-Distance" search
        int nLimit;


Reply via email to