Paul J. Lucas has proposed merging lp:~paul-lucas/zorba/bug-924063 into 
lp:zorba.

Requested reviews:
  Paul J. Lucas (paul-lucas)
  Matthias Brantner (matthias-brantner)
Related bugs:
  Bug #924063 in Zorba: "Sentence is incorrectly incremented when token 
characters end without sentence terminator, take 2"
  https://bugs.launchpad.net/zorba/+bug/924063

For more details, see:
https://code.launchpad.net/~paul-lucas/zorba/bug-924063/+merge/90887

Real fix this time.
-- 
https://code.launchpad.net/~paul-lucas/zorba/bug-924063/+merge/90887
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/runtime/full_text/icu_tokenizer.cpp'
--- src/runtime/full_text/icu_tokenizer.cpp	2011-12-21 14:40:33 +0000
+++ src/runtime/full_text/icu_tokenizer.cpp	2012-01-31 15:33:31 +0000
@@ -357,7 +357,7 @@
         t.append( utf8_buf, utf8_len );
       else {
 #       if DEBUG_TOKENIZER
-        cout << "setting token" << endl;
+        cout << "  setting token" << endl;
 #       endif
         t.set(
           utf8_buf, utf8_len, numbers().token, numbers().sent, numbers().para
@@ -369,15 +369,24 @@
 next:
 #   if DEBUG_TOKENIZER
     cout << "at next" << endl;
+    cout << "  word_start = " << word_start << endl;
+    cout << "  word_end   = " << word_end   << endl;
+    cout << "  sent_end   = " << sent_end   << endl;
 #   endif
     word_start = word_end, word_end = word_it_->next();
+#   if DEBUG_TOKENIZER
+    cout << "  word_start = " << word_start << endl;
+    cout << "  word_end   = " << word_end   << endl;
+#   endif
+
     if ( word_end >= sent_end && sent_end != BreakIterator::DONE ) {
       sent_end = sent_it_->next();
+#     if DEBUG_TOKENIZER
+      cout << "  sent_end   = " << sent_end   << endl;
+#     endif
       // The addition of the "if" fixes:
       // https://bugs.launchpad.net/bugs/863320
-#if 0
       if ( sent_end != BreakIterator::DONE )
-#endif
         ++numbers().sent;
     }
   } // while
@@ -389,9 +398,7 @@
   t.send( payload, callback );
   // Incrementing "sent" here fixes:
   // https://bugs.launchpad.net/bugs/897800
-#if 0
   ++numbers().sent;
-#endif
 #if DEBUG_TOKENIZER
   cout << "--------------------\n";
 #endif /* DEBUG_TOKENIZER */

=== modified file 'test/rbkt/Queries/CMakeLists.txt'
--- test/rbkt/Queries/CMakeLists.txt	2012-01-25 11:57:01 +0000
+++ test/rbkt/Queries/CMakeLists.txt	2012-01-31 15:33:31 +0000
@@ -539,7 +539,3 @@
 EXPECTED_FAILURE(test/rbkt/zorba/windowing/tumbling20 899364)
 EXPECTED_FAILURE(test/rbkt/zorba/windowing/tumbling17 899364)
 EXPECTED_FAILURE(test/rbkt/zorba/windowing/tumbling21 899364)
-
-IF (NOT ZORBA_NO_FULL_TEXT)
-  EXPECTED_FAILURE(test/rbkt/zorba/fulltext/ft-same-sentence-true-4 900552)
-ENDIF (NOT ZORBA_NO_FULL_TEXT)

=== modified file 'test/rbkt/Queries/zorba/fulltext/ft-same-sentence-false-2.xq'
--- test/rbkt/Queries/zorba/fulltext/ft-same-sentence-false-2.xq	2011-12-21 14:40:33 +0000
+++ test/rbkt/Queries/zorba/fulltext/ft-same-sentence-false-2.xq	2012-01-31 15:33:31 +0000
@@ -1,2 +1,2 @@
-let $x := <msg>hello. world</msg>
+let $x := <msg>Hello. World</msg>
 return $x contains text "hello" ftand "world" same sentence

=== modified file 'test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-2.xq'
--- test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-2.xq	2011-06-14 14:21:49 +0000
+++ test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-2.xq	2012-01-31 15:33:31 +0000
@@ -1,2 +1,2 @@
-let $x := <msg>hello world.</msg>
+let $x := <msg>Hello world.</msg>
 return $x contains text "hello" ftand "world" same sentence

=== modified file 'test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-3.xq'
--- test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-3.xq	2011-06-14 14:21:49 +0000
+++ test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-3.xq	2012-01-31 15:33:31 +0000
@@ -1,3 +1,3 @@
-let $x := <msg>hello
+let $x := <msg>Hello
 world.</msg>
 return $x contains text "hello" ftand "world" same sentence

=== modified file 'test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-4.xq'
--- test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-4.xq	2011-09-30 14:31:41 +0000
+++ test/rbkt/Queries/zorba/fulltext/ft-same-sentence-true-4.xq	2012-01-31 15:33:31 +0000
@@ -1,2 +1,2 @@
-let $x := <msg>hello world</msg>
+let $x := <msg>Hello world</msg>
 return $x contains text "hello" ftand "world" same sentence

-- 
Mailing list: https://launchpad.net/~zorba-coders
Post to     : zorba-coders@lists.launchpad.net
Unsubscribe : https://launchpad.net/~zorba-coders
More help   : https://help.launchpad.net/ListHelp

Reply via email to