Paul J. Lucas has proposed merging lp:~paul-lucas/zorba/bug-924063 into lp:zorba.
Requested reviews: Paul J. Lucas (paul-lucas) Matthias Brantner (matthias-brantner) Related bugs: Bug #924063 in Zorba: "Sentence is incorrectly incremented when token characters end without sentence terminator, take 2" https://bugs.launchpad.net/zorba/+bug/924063 For more details, see: https://code.launchpad.net/~paul-lucas/zorba/bug-924063/+merge/90887 Real fix this time. -- https://code.launchpad.net/~paul-lucas/zorba/bug-924063/+merge/90887 Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/runtime/full_text/icu_tokenizer.cpp' --- src/runtime/full_text/icu_tokenizer.cpp 2011-12-21 14:40:33 +0000 +++ src/runtime/full_text/icu_tokenizer.cpp 2012-01-31 15:33:31 +0000 @@ -357,7 +357,7 @@ t.append( utf8_buf, utf8_len ); else { # if DEBUG_TOKENIZER - cout << "setting token" << endl; + cout << " setting token" << endl; # endif t.set( utf8_buf, utf8_len, numbers().token, numbers().sent, numbers().para @@ -369,15 +369,24 @@ next: # if DEBUG_TOKENIZER cout << "at next" << endl; + cout << " word_start = " << word_start << endl; + cout << " word_end = " << word_end << endl; + cout << " sent_end = " << sent_end << endl; # endif word_start = word_end, word_end = word_it_->next(); +# if DEBUG_TOKENIZER + cout << " word_start = " << word_start << endl; + cout << " word_end = " << word_end << endl; +# endif + if ( word_end >= sent_end && sent_end != BreakIterator::DONE ) { sent_end = sent_it_->next(); +# if DEBUG_TOKENIZER + cout << " sent_end = " << sent_end << endl; +# endif // The addition of the "if" fixes: // https://bugs.launchpad.net/bugs/863320 -#if 0 if ( sent_end != BreakIterator::DONE ) -#endif ++numbers().sent; } } // while @@ -389,9 +398,7 @@ t.send( payload, callback ); // Incrementing "sent" here fixes: // https://bugs.launchpad.net/bugs/897800 -#if 0 ++numbers().sent; -#endif #if DEBUG_TOKENIZER cout << "--------------------\n"; #endif /* DEBUG_TOKENIZER */ === modified file 'test/rbkt/Queries/CMakeLists.txt' --- test/rbkt/Queries/CMakeLists.txt 2012-01-25 11:57:01 +0000 +++ test/rbkt/Queries/CMakeLists.txt 2012-01-31 15:33:31 +0000 @@ -539,7 +539,3 @@ EXPECTED_FAILURE(test/rbkt/zorba/windowing/tumbling20 899364) EXPECTED_FAILURE(test/rbkt/zorba/windowing/tumbling17 899364) EXPECTED_FAILURE(test/rbkt/zorba/windowing/tumbling21 899364) - -IF (NOT ZORBA_NO_FULL_TEXT) - EXPECTED_FAILURE(test/rbkt/zorba/fulltext/ft-same-sentence-true-4 900552) -ENDIF (NOT ZORBA_NO_FULL_TEXT) === modified file 'test/rbkt/Queries/zorba/fulltext/ft-same-sentence-false-2.xq' --- test/rbkt/Queries/zorba/fulltext/ft-same-sentence-false-2.xq 2011-12-21 14:40:33 +0000 +++ test/rbkt/Queries/zorba/fulltext/ft-same-sentence-false-2.xq 2012-01-31 15:33:31 +0000 @@ -1,2 +1,2 @@ -let $x := <msg>hello. world</msg> +let $x := <msg>Hello. World</msg> return $x contains text "hello" ftand "world" same sentence === modified file 'test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-2.xq' --- test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-2.xq 2011-06-14 14:21:49 +0000 +++ test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-2.xq 2012-01-31 15:33:31 +0000 @@ -1,2 +1,2 @@ -let $x := <msg>hello world.</msg> +let $x := <msg>Hello world.</msg> return $x contains text "hello" ftand "world" same sentence === modified file 'test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-3.xq' --- test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-3.xq 2011-06-14 14:21:49 +0000 +++ test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-3.xq 2012-01-31 15:33:31 +0000 @@ -1,3 +1,3 @@ -let $x := <msg>hello +let $x := <msg>Hello world.</msg> return $x contains text "hello" ftand "world" same sentence === modified file 'test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-4.xq' --- test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-4.xq 2011-09-30 14:31:41 +0000 +++ test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-4.xq 2012-01-31 15:33:31 +0000 @@ -1,2 +1,2 @@ -let $x := <msg>hello world</msg> +let $x := <msg>Hello world</msg> return $x contains text "hello" ftand "world" same sentence
-- Mailing list: https://launchpad.net/~zorba-coders Post to : zorba-coders@lists.launchpad.net Unsubscribe : https://launchpad.net/~zorba-coders More help : https://help.launchpad.net/ListHelp