I'll backport to 3x tomorrow.

Koji

--
http://www.rondhuit.com/en/

(10/09/28 1:10), [email protected] wrote:
Author: koji
Date: Mon Sep 27 16:10:29 2010
New Revision: 1001796

URL: http://svn.apache.org/viewvc?rev=1001796&view=rev
Log:
LUCENE-2529, LUCENE-2668: always apply position increment gap and offset gap 
between values

Modified:
     lucene/dev/trunk/lucene/CHANGES.txt
     
lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
     
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
     
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java
     
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java
     
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java
     
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java
     
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
     
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
     
lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java
     
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1001796&r1=1001795&r2=1001796&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Mon Sep 27 16:10:29 2010
@@ -108,6 +108,11 @@ Changes in backwards compatibility polic
  * LUCENE-2600: Remove IndexReader.isDeleted in favor of
    IndexReader.getDeletedDocs().  (Mike McCandless)

+* LUCENE-2529, LUCENE-2668: Position increment gap and offset gap of empty
+  values in multi-valued field has been changed for some cases in index.
+  If you index empty fields and uses positions/offsets information on that
+  fields, reindex is recommended. (David Smiley, Koji Sekiguchi)
+
  Changes in Runtime Behavior

  * LUCENE-2650: The behavior of FSDirectory.open has changed. On 64-bit

Modified: 
lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
URL: 
http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java?rev=1001796&r1=1001795&r2=1001796&view=diff
==============================================================================
--- 
lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
 (original)
+++ 
lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
 Mon Sep 27 16:10:29 2010
@@ -156,8 +156,7 @@ public abstract class BaseFragmentsBuild
        int startOffset, int endOffset ){
      while( buffer.length()<  endOffset&&  index[0]<  values.length ){
        buffer.append( values[index[0]] );
-      if( values[index[0]].length()>  0&&  index[0] + 1<  values.length )
-        buffer.append( multiValuedSeparator );
+      buffer.append( multiValuedSeparator );
        index[0]++;
      }
      int eo = buffer.length()<  endOffset ? buffer.length() : endOffset;
@@ -168,7 +167,7 @@ public abstract class BaseFragmentsBuild
        int startOffset, int endOffset ){
      while( buffer.length()<  endOffset&&  index[0]<  values.length ){
        buffer.append( values[index[0]].stringValue() );
-      if( values[index[0]].isTokenized()&&  values[index[0]].stringValue().length()>  
0&&  index[0] + 1<  values.length )
+      if( values[index[0]].isTokenized() )
          buffer.append( multiValuedSeparator );
        index[0]++;
      }

Modified: 
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
URL: 
http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java?rev=1001796&r1=1001795&r2=1001796&view=diff
==============================================================================
--- 
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
 (original)
+++ 
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
 Mon Sep 27 16:10:29 2010
@@ -355,16 +355,20 @@ public abstract class AbstractTestCase e

    protected void makeIndexShortMV() throws Exception {

+    //  0
      // ""
+    //  1
      // ""

-    //  012345
+    //  234567
      // "a b c"
      //  0 1 2
-
+
+    //  8
      // ""

-    //  6789
+    //   111
+    //  9012
      // "d e"
      //  3 4
      make1dmfIndex( shortMVValues );

Modified: 
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java
URL: 
http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java?rev=1001796&r1=1001795&r2=1001796&view=diff
==============================================================================
--- 
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java
 (original)
+++ 
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java
 Mon Sep 27 16:10:29 2010
@@ -165,7 +165,7 @@ public class FieldPhraseListTest extends
      FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
      FieldPhraseList fpl = new FieldPhraseList( stack, fq );
      assertEquals( 1, fpl.phraseList.size() );
-    assertEquals( "d(1.0)((6,7))", fpl.phraseList.get( 0 ).toString() );
+    assertEquals( "d(1.0)((9,10))", fpl.phraseList.get( 0 ).toString() );
    }

    public void test1PhraseLongMV() throws Exception {

Modified: 
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java
URL: 
http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java?rev=1001796&r1=1001795&r2=1001796&view=diff
==============================================================================
--- 
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java
 (original)
+++ 
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java
 Mon Sep 27 16:10:29 2010
@@ -132,7 +132,7 @@ public class FieldTermStackTest extends
      FieldQuery fq = new FieldQuery( tq( "d" ), true, true );
      FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
      assertEquals( 1, stack.termList.size() );
-    assertEquals( "d(6,7,3)", stack.pop().toString() );
+    assertEquals( "d(9,10,3)", stack.pop().toString() );
    }

    public void test1PhraseLongMV() throws Exception {

Modified: 
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java
URL: 
http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java?rev=1001796&r1=1001795&r2=1001796&view=diff
==============================================================================
--- 
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java
 (original)
+++ 
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java
 Mon Sep 27 16:10:29 2010
@@ -27,7 +27,7 @@ public class ScoreOrderFragmentsBuilderT
      String[] f = sofb.createFragments( reader, 0, F, ffl, 3 );
      assertEquals( 3, f.length );
      // check score order
-    assertEquals( "<b>c</b>  <b>a</b>  <b>a</b>  b b", f[0] );
+    assertEquals( "<b>c</b>  <b>a</b>  <b>a</b>  b b ", f[0] );
      assertEquals( "b b<b>a</b>  b<b>a</b>  b b b b b ", f[1] );
      assertEquals( "<b>a</b>  b b b b b b b b b ", f[2] );
    }

Modified: 
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java
URL: 
http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java?rev=1001796&r1=1001795&r2=1001796&view=diff
==============================================================================
--- 
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java
 (original)
+++ 
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java
 Mon Sep 27 16:10:29 2010
@@ -143,7 +143,7 @@ public class SimpleFragListBuilderTest e
      SimpleFragListBuilder sflb = new SimpleFragListBuilder();
      FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
      assertEquals( 1, ffl.fragInfos.size() );
-    assertEquals( "subInfos=(d((6,7)))/1.0(0,100)", ffl.fragInfos.get( 0 
).toString() );
+    assertEquals( "subInfos=(d((9,10)))/1.0(3,103)", ffl.fragInfos.get( 0 
).toString() );
    }

    public void test1PhraseLongMV() throws Exception {

Modified: 
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
URL: 
http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java?rev=1001796&r1=1001795&r2=1001796&view=diff
==============================================================================
--- 
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
 (original)
+++ 
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
 Mon Sep 27 16:10:29 2010
@@ -34,11 +34,11 @@ public class SimpleFragmentsBuilderTest
    public void test1TermIndex() throws Exception {
      FieldFragList ffl = ffl( "a", "a" );
      SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
-    assertEquals( "<b>a</b>", sfb.createFragment( reader, 0, F, ffl ) );
+    assertEquals( "<b>a</b>  ", sfb.createFragment( reader, 0, F, ffl ) );

      // change tags
      sfb = new SimpleFragmentsBuilder( new String[]{ "[" }, new String[]{ "]" 
} );
-    assertEquals( "[a]", sfb.createFragment( reader, 0, F, ffl ) );
+    assertEquals( "[a] ", sfb.createFragment( reader, 0, F, ffl ) );
    }

    public void test2Frags() throws Exception {
@@ -48,7 +48,7 @@ public class SimpleFragmentsBuilderTest
      // 3 snippets requested, but should be 2
      assertEquals( 2, f.length );
      assertEquals( "<b>a</b>  b b b b b b b b b ", f[0] );
-    assertEquals( "b b<b>a</b>  b<b>a</b>  b", f[1] );
+    assertEquals( "b b<b>a</b>  b<b>a</b>  b ", f[1] );
    }

    public void test3Frags() throws Exception {
@@ -58,7 +58,7 @@ public class SimpleFragmentsBuilderTest
      assertEquals( 3, f.length );
      assertEquals( "<b>a</b>  b b b b b b b b b ", f[0] );
      assertEquals( "b b<b>a</b>  b<b>a</b>  b b b b b ", f[1] );
-    assertEquals( "<b>c</b>  <b>a</b>  <b>a</b>  b b", f[2] );
+    assertEquals( "<b>c</b>  <b>a</b>  <b>a</b>  b b ", f[2] );
    }

    public void testTagsAndEncoder() throws Exception {
@@ -66,7 +66,7 @@ public class SimpleFragmentsBuilderTest
      SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
      String[] preTags = { "[" };
      String[] postTags = { "]" };
-    assertEquals( "&lt;h1&gt; [a]&lt;/h1&gt;",
+    assertEquals( "&lt;h1&gt; [a]&lt;/h1&gt; ",
          sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new 
SimpleHTMLEncoder() ) );
    }

@@ -88,7 +88,7 @@ public class SimpleFragmentsBuilderTest
      SimpleFragListBuilder sflb = new SimpleFragListBuilder();
      FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
      SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
-    assertEquals( "a b c<b>d</b>  e", sfb.createFragment( reader, 0, F, ffl ) 
);
+    assertEquals( " b c<b>d</b>  e ", sfb.createFragment( reader, 0, F, ffl ) 
);
    }

    public void test1PhraseLongMV() throws Exception {
@@ -113,7 +113,7 @@ public class SimpleFragmentsBuilderTest
      SimpleFragListBuilder sflb = new SimpleFragListBuilder();
      FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
      SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
-    assertEquals( "ssing<b>speed</b>, the", sfb.createFragment( reader, 0, F, 
ffl ) );
+    assertEquals( "ssing<b>speed</b>, the ", sfb.createFragment( reader, 0, F, 
ffl ) );
    }

    public void testUnstoredField() throws Exception {
@@ -163,6 +163,6 @@ public class SimpleFragmentsBuilderTest
      FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
      SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
      sfb.setMultiValuedSeparator( '/' );
-    assertEquals( "a b c/<b>d</b>  e", sfb.createFragment( reader, 0, F, ffl ) 
);
+    assertEquals( " b c//<b>d</b>  e/", sfb.createFragment( reader, 0, F, ffl 
) );
    }
  }

Modified: 
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
URL: 
http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java?rev=1001796&r1=1001795&r2=1001796&view=diff
==============================================================================
--- 
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
 (original)
+++ 
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
 Mon Sep 27 16:10:29 2010
@@ -75,10 +75,8 @@ final class DocInverterPerField extends
        // consumer if it wants to see this particular field
        // tokenized.
        if (field.isIndexed()&&  doInvert) {
-
-        final boolean anyToken;

-        if (fieldState.length>  0)
+        if (i>  0)
            fieldState.position += 
docState.analyzer.getPositionIncrementGap(fieldInfo.name);

          if (!field.isTokenized()) {             // un-tokenized field
@@ -99,7 +97,6 @@ final class DocInverterPerField extends
            fieldState.offset += valueLength;
            fieldState.length++;
            fieldState.position++;
-          anyToken = valueLength>  0;
          } else {                                  // tokenized field
            final TokenStream stream;
            final TokenStream streamValue = field.tokenStreamValue();
@@ -189,14 +186,12 @@ final class DocInverterPerField extends
              stream.end();

              fieldState.offset += offsetAttribute.endOffset();
-            anyToken = fieldState.length>  startLength;
            } finally {
              stream.close();
            }
          }

-        if (anyToken)
-          fieldState.offset += docState.analyzer.getOffsetGap(field);
+        fieldState.offset += docState.analyzer.getOffsetGap(field);
          fieldState.boost *= field.getBoost();
        }


Modified: 
lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java
URL: 
http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java?rev=1001796&r1=1001795&r2=1001796&view=diff
==============================================================================
--- 
lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java 
(original)
+++ 
lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java 
Mon Sep 27 16:10:29 2010
@@ -30,6 +30,7 @@ public final class MockAnalyzer extends
    private final boolean lowerCase;
    private final CharacterRunAutomaton filter;
    private final boolean enablePositionIncrements;
+  private int positionIncrementGap;

    /**
     * Creates a new MockAnalyzer.
@@ -89,4 +90,13 @@ public final class MockAnalyzer extends
        return saved.filter;
      }
    }
+
+  public void setPositionIncrementGap(int positionIncrementGap){
+    this.positionIncrementGap = positionIncrementGap;
+  }
+
+  @Override
+  public int getPositionIncrementGap(String fieldName){
+    return positionIncrementGap;
+  }
  }
\ No newline at end of file

Modified: 
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: 
http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1001796&r1=1001795&r2=1001796&view=diff
==============================================================================
--- 
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java 
(original)
+++ 
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java 
Mon Sep 27 16:10:29 2010
@@ -4266,11 +4266,11 @@ public class TestIndexWriter extends Luc
      TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, 
"field"));
      TermVectorOffsetInfo[] termOffsets = tpv.getOffsets(0);
      assertEquals(1, termOffsets.length);
-    assertEquals(0, termOffsets[0].getStartOffset());
-    assertEquals(6, termOffsets[0].getEndOffset());
+    assertEquals(1, termOffsets[0].getStartOffset());
+    assertEquals(7, termOffsets[0].getEndOffset());
      termOffsets = tpv.getOffsets(1);
-    assertEquals(7, termOffsets[0].getStartOffset());
-    assertEquals(10, termOffsets[0].getEndOffset());
+    assertEquals(8, termOffsets[0].getStartOffset());
+    assertEquals(11, termOffsets[0].getEndOffset());
      r.close();
      dir.close();
    }
@@ -4301,8 +4301,37 @@ public class TestIndexWriter extends Luc
      assertEquals(0, termOffsets[0].getStartOffset());
      assertEquals(4, termOffsets[0].getEndOffset());
      termOffsets = tpv.getOffsets(1);
-    assertEquals(5, termOffsets[0].getStartOffset());
-    assertEquals(11, termOffsets[0].getEndOffset());
+    assertEquals(6, termOffsets[0].getStartOffset());
+    assertEquals(12, termOffsets[0].getEndOffset());
+    r.close();
+    dir.close();
+  }
+
+  // LUCENE-2529
+  public void testPositionIncrementGapEmptyField() throws Exception {
+    Directory dir = newDirectory();
+    MockAnalyzer analyzer = new MockAnalyzer();
+    analyzer.setPositionIncrementGap( 100 );
+    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
+        TEST_VERSION_CURRENT, analyzer));
+    Document doc = new Document();
+    Field f = newField("field", "", Field.Store.NO,
+                        Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS);
+    Field f2 = newField("field", "crunch man", Field.Store.NO,
+        Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS);
+    doc.add(f);
+    doc.add(f2);
+    w.addDocument(doc);
+    w.close();
+
+    IndexReader r = IndexReader.open(dir, true);
+    TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, 
"field"));
+    int[] poss = tpv.getTermPositions(0);
+    assertEquals(1, poss.length);
+    assertEquals(100, poss[0]);
+    poss = tpv.getTermPositions(1);
+    assertEquals(1, poss.length);
+    assertEquals(101, poss[0]);
      r.close();
      dir.close();
    }





---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to