i18npool/source/search/textsearch.cxx | 81 ++++++++++++++++++++++++++-------- sw/source/core/crsr/findtxt.cxx | 13 +++-- 2 files changed, 71 insertions(+), 23 deletions(-)
New commits: commit ccc349d3abb70ef38cd2b7706da51b060a385908 Author: Eike Rathke <er...@redhat.com> Date: Sat Mar 9 18:47:29 2013 +0100 regex: don't loop 10000 identical matches to find a single $ anchor Change-Id: Ic130ecc4b0b6d58ba7ef063040bd9a11a90db425 diff --git a/i18npool/source/search/textsearch.cxx b/i18npool/source/search/textsearch.cxx index 14ecdcc..997b01d 100644 --- a/i18npool/source/search/textsearch.cxx +++ b/i18npool/source/search/textsearch.cxx @@ -752,6 +752,11 @@ SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr, int nEndOfs = pRegexMatcher->end( nIcuErr); if( nStartOfs < nEndOfs) break; + // If the zero-length match is behind the string, do not match it again + // and again until startPos reaches there. A match behind the string is + // a "$" anchor. + if (nStartOfs == endPos) + break; // try at next position if there was a zero-length match if( ++startPos >= endPos) return aRet; commit 3bc5cb3c485d67f1ce0541d349d11637f52ebda5 Author: Eike Rathke <er...@redhat.com> Date: Sat Mar 9 18:41:01 2013 +0100 regex: handle zero-length matches, fdo#60259 related Also in backward search ignore all zero-length matches except the text end single $ anchor search. The anchor search is a valid match, treat it as such in Writer. This still doesn't solve the backward $ backward search, the convoluted Writer code in that place apparently never worked, someone more familiar with those internals should straighten out the mess. Change-Id: I2bc83cdcfe6425f9bebf06e23e879bc79b82daa9 diff --git a/i18npool/source/search/textsearch.cxx b/i18npool/source/search/textsearch.cxx index 72e1d60..14ecdcc 100644 --- a/i18npool/source/search/textsearch.cxx +++ b/i18npool/source/search/textsearch.cxx @@ -797,17 +797,35 @@ SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr, // find the last match int nLastPos = 0; int nFoundEnd = 0; + int nGoodPos = 0, nGoodEnd = 0; + bool bFirst = true; do { nLastPos = pRegexMatcher->start( nIcuErr); nFoundEnd = pRegexMatcher->end( nIcuErr); + if (nLastPos < nFoundEnd) + { + // remember last non-zero-length match + nGoodPos = nLastPos; + nGoodEnd = nFoundEnd; + } if( nFoundEnd >= startPos) break; + bFirst = false; if( nFoundEnd == nLastPos) ++nFoundEnd; } while( pRegexMatcher->find( nFoundEnd, nIcuErr)); + // Ignore all zero-length matches except "$" anchor on first match. + if (nGoodPos == nGoodEnd) + { + if (bFirst && nLastPos == startPos) + nGoodPos = nLastPos; + else + return aRet; + } + // find last match again to get its details - pRegexMatcher->find( nLastPos, nIcuErr); + pRegexMatcher->find( nGoodPos, nIcuErr); // fill in the details of the last match const int nGroupCount = pRegexMatcher->groupCount(); diff --git a/sw/source/core/crsr/findtxt.cxx b/sw/source/core/crsr/findtxt.cxx index b2fbea8..e1c9358 100644 --- a/sw/source/core/crsr/findtxt.cxx +++ b/sw/source/core/crsr/findtxt.cxx @@ -450,8 +450,9 @@ bool SwPaM::DoSearch( const SearchOptions& rSearchOpt, utl::TextSearch& rSTxt, } xub_StrLen nStringEnd = nEnd; - while ( (bSrchForward && nStart < nStringEnd) || - (! bSrchForward && nStart > nStringEnd) ) + bool bZeroMatch = false; // zero-length match, i.e. only $ anchor as regex + while ( ((bSrchForward && nStart < nStringEnd) || + (! bSrchForward && nStart > nStringEnd)) && !bZeroMatch ) { // SearchAlgorithms_APPROXIMATE works on a per word base so we have to // provide the text searcher with the correct locale, because it uses @@ -479,7 +480,8 @@ bool SwPaM::DoSearch( const SearchOptions& rSearchOpt, utl::TextSearch& rSTxt, } if( nSearchScript == nCurrScript && - (rSTxt.*fnMove->fnSearch)( sCleanStr, &nStart, &nEnd, 0 )) + (rSTxt.*fnMove->fnSearch)( sCleanStr, &nStart, &nEnd, 0 ) && + !(bZeroMatch = (nStart == nEnd))) { // set section correctly *GetPoint() = *pPam->GetPoint(); @@ -522,11 +524,14 @@ bool SwPaM::DoSearch( const SearchOptions& rSearchOpt, utl::TextSearch& rSTxt, if ( bFound ) return true; - else if( ( bChkEmptyPara && !nStart && !nTxtLen ) || bChkParaEnd ) + else if( ( bChkEmptyPara && !nStart && !nTxtLen ) || (bChkParaEnd && bZeroMatch && nEnd == nTxtLen)) { *GetPoint() = *pPam->GetPoint(); GetPoint()->nContent = bChkParaEnd ? nTxtLen : 0; SetMark(); + /* FIXME: this condition does not work for !bSrchForward backward + * search, it probably never did. (pSttNd != &rNdIdx.GetNode()) + * is never true in this case. */ if( (bSrchForward || pSttNd != &rNdIdx.GetNode()) && Move( fnMoveForward, fnGoCntnt ) && (!bSrchForward || pSttNd != &GetPoint()->nNode.GetNode()) && commit c00601dab0f5533b152cd63cec0a89bfec1ba95f Author: Eike Rathke <er...@redhat.com> Date: Fri Mar 8 21:36:44 2013 +0100 fdo#60259 prevent crash when searching backward for $ anchor regex Old code wasn't prepared that searching for $ may actually return a result set pointing behind the search string which it does with the ICU regex engine. Change-Id: I95612f676008cf6758a5ade3d674f38168944a4d diff --git a/i18npool/source/search/textsearch.cxx b/i18npool/source/search/textsearch.cxx index 97f6da4..72e1d60 100644 --- a/i18npool/source/search/textsearch.cxx +++ b/i18npool/source/search/textsearch.cxx @@ -208,16 +208,26 @@ SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 sta sres = (this->*fnForward)( in_str, newStartPos, newEndPos ); - for ( int k = 0; k < sres.startOffset.getLength(); k++ ) + // Map offsets back to untransliterated string. + const sal_Int32 nOffsets = offset.getLength(); + if (nOffsets) { - if (sres.startOffset[k]) - sres.startOffset[k] = offset[sres.startOffset[k]]; - // JP 20.6.2001: end is ever exclusive and then don't return - // the position of the next character - return the - // next position behind the last found character! - // "a b c" find "b" must return 2,3 and not 2,4!!! - if (sres.endOffset[k]) - sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1; + // For regex nGroups is the number of groups+1 with group 0 being + // the entire match. + const sal_Int32 nGroups = sres.startOffset.getLength(); + for ( sal_Int32 k = 0; k < nGroups; k++ ) + { + const sal_Int32 nStart = sres.startOffset[k]; + if (nStart > 0) + sres.startOffset[k] = (nStart < nOffsets ? offset[nStart] : (offset[nOffsets - 1] + 1)); + // JP 20.6.2001: end is ever exclusive and then don't return + // the position of the next character - return the + // next position behind the last found character! + // "a b c" find "b" must return 2,3 and not 2,4!!! + const sal_Int32 nStop = sres.endOffset[k]; + if (nStop > 0) + sres.endOffset[k] = offset[(nStop <= nOffsets ? nStop : nOffsets) - 1] + 1; + } } } else @@ -297,16 +307,26 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st sres = (this->*fnBackward)( in_str, newStartPos, newEndPos ); - for ( int k = 0; k < sres.startOffset.getLength(); k++ ) + // Map offsets back to untransliterated string. + const sal_Int32 nOffsets = offset.getLength(); + if (nOffsets) { - if (sres.startOffset[k]) - sres.startOffset[k] = offset[sres.startOffset[k] - 1] + 1; - // JP 20.6.2001: end is ever exclusive and then don't return - // the position of the next character - return the - // next position behind the last found character! - // "a b c" find "b" must return 2,3 and not 2,4!!! - if (sres.endOffset[k]) - sres.endOffset[k] = offset[sres.endOffset[k]]; + // For regex nGroups is the number of groups+1 with group 0 being + // the entire match. + const sal_Int32 nGroups = sres.startOffset.getLength(); + for ( sal_Int32 k = 0; k < nGroups; k++ ) + { + const sal_Int32 nStart = sres.startOffset[k]; + if (nStart > 0) + sres.startOffset[k] = offset[(nStart <= nOffsets ? nStart : nOffsets) - 1] + 1; + // JP 20.6.2001: end is ever exclusive and then don't return + // the position of the next character - return the + // next position behind the last found character! + // "a b c" find "b" must return 2,3 and not 2,4!!! + const sal_Int32 nStop = sres.endOffset[k]; + if (nStop > 0) + sres.endOffset[k] = (nStop < nOffsets ? offset[nStop] : (offset[nOffsets - 1] + 1)); + } } } else _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits