Author: koji
Date: Thu Dec 25 17:08:18 2008
New Revision: 729450

URL: http://svn.apache.org/viewvc?rev=729450&view=rev
Log:
SOLR-925: Fixed highlighting on fields with multiValued="true" and 
termOffsets="true"

Modified:
    lucene/solr/trunk/CHANGES.txt
    
lucene/solr/trunk/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
    lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java
    lucene/solr/trunk/src/test/test-files/solr/conf/schema.xml

Modified: lucene/solr/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=729450&r1=729449&r2=729450&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Thu Dec 25 17:08:18 2008
@@ -118,7 +118,7 @@
     optimized distributed faceting refinement by lowering parsing overhead and
     by making requests and responses smaller.
 
-25. SOLR-876: WOrdDelimiterFilter now supports a splitOnNumerics 
+25. SOLR-876: WordDelimiterFilter now supports a splitOnNumerics 
     option, as well as a list of protected terms.
     (Dan Rosher via hossman)
 
@@ -200,6 +200,8 @@
 
 22. SOLR-897: Fixed Argument list too long error when there are lots of 
snapshots/backups (Dan Rosher via billa)
 
+23. SOLR-925: Fixed highlighting on fields with multiValued="true" and 
termOffsets="true" (koji)
+
 
 Other Changes
 ----------------------

Modified: 
lucene/solr/trunk/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java?rev=729450&r1=729449&r2=729450&view=diff
==============================================================================
--- 
lucene/solr/trunk/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
 (original)
+++ 
lucene/solr/trunk/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
 Thu Dec 25 17:08:18 2008
@@ -271,11 +271,14 @@
 
           String[] summaries = null;
           List<TextFragment> frags = new ArrayList<TextFragment>();
+          TermOffsetsTokenStream tots = null;
           for (int j = 0; j < docTexts.length; j++) {
             // create TokenStream
             try {
               // attempt term vectors
-              tstream = TokenSources.getTokenStream(searcher.getReader(), 
docId, fieldName);
+              if( tots == null )
+                tots = new TermOffsetsTokenStream( 
TokenSources.getTokenStream(searcher.getReader(), docId, fieldName) );
+              tstream = tots.getMultiValuedTokenStream( docTexts[j].length() );
             }
             catch (IllegalArgumentException e) {
               // fall back to anaylzer
@@ -410,3 +413,44 @@
     return queue.isEmpty() ? null : queue.removeFirst();
   }
 }
+
+class TermOffsetsTokenStream {
+
+  TokenStream bufferedTokenStream = null;
+  Token bufferedToken;
+  int startOffset;
+  int endOffset;
+
+  public TermOffsetsTokenStream( TokenStream tstream ){
+    bufferedTokenStream = tstream;
+    startOffset = 0;
+    bufferedToken = null;
+  }
+
+  public TokenStream getMultiValuedTokenStream( final int length ){
+    endOffset = startOffset + length;
+    return new TokenStream(){
+      Token token;
+      public Token next() throws IOException {
+        while( true ){
+          if( bufferedToken == null )
+            bufferedToken = bufferedTokenStream.next();
+          if( bufferedToken == null ) return null;
+          if( startOffset <= bufferedToken.startOffset() &&
+              bufferedToken.endOffset() <= endOffset ){
+            token = bufferedToken;
+            bufferedToken = null;
+            token.setStartOffset( token.startOffset() - startOffset );
+            token.setEndOffset( token.endOffset() - startOffset );
+            return token;
+          }
+          else if( bufferedToken.endOffset() > endOffset ){
+            startOffset += length + 1;
+            return null;
+          }
+          bufferedToken = null;
+        }
+      }
+    };
+  }
+}

Modified: 
lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java?rev=729450&r1=729449&r2=729450&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java 
(original)
+++ lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java 
Thu Dec 25 17:08:18 2008
@@ -17,10 +17,16 @@
 
 package org.apache.solr.highlight;
 
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.util.*;
 import org.apache.solr.common.params.HighlightParams;
 
+import java.io.IOException;
+import java.io.StringReader;
 import java.util.HashMap;
 
 /**
@@ -140,6 +146,47 @@
             "//a...@name='tv_text']/str[.=' <em>long</em> fragments.']"
             );
   }
+  
+  public void testTermOffsetsTokenStream() throws Exception {
+    String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };
+    Analyzer a1 = new WhitespaceAnalyzer();
+    TermOffsetsTokenStream tots = new TermOffsetsTokenStream(
+        a1.tokenStream( "", new StringReader( "a b c d e f g h i j k l m n" ) 
) );
+    for( String v : multivalued ){
+      TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() );
+      Analyzer a2 = new WhitespaceAnalyzer();
+      TokenStream ts2 = a2.tokenStream( "", new StringReader( v ) );
+      Token t1 = new Token();
+      Token t2 = new Token();
+      for( t1 = ts1.next( t1 ); t1 != null; t1 = ts1.next( t1 ) ){
+        t2 = ts2.next( t2 );
+        assertEquals( t2, t1 );
+      }
+    }
+  }
+
+  public void testTermVecMultiValuedHighlight() throws Exception {
+
+    // do summarization using term vectors on multivalued field
+    HashMap<String,String> args = new HashMap<String,String>();
+    args.put("hl", "true");
+    args.put("hl.fl", "tv_mv_text");
+    args.put("hl.snippets", "2");
+    TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
+      "standard",0,200,args);
+    
+    assertU(adoc("tv_mv_text", LONG_TEXT, 
+                 "tv_mv_text", LONG_TEXT, 
+                 "id", "1"));
+    assertU(commit());
+    assertU(optimize());
+    assertQ("Basic summarization",
+            sumLRF.makeRequest("tv_mv_text:long"),
+            "//l...@name='highlighting']/l...@name='1']",
+            "//l...@name='1']/a...@name='tv_mv_text']/str[.='a <em>long</em> 
days night this should be a piece of text which']",
+            "//a...@name='tv_mv_text']/str[.=' <em>long</em> fragments.']"
+            );
+  }
 
   public void testDisMaxHighlight() {
 

Modified: lucene/solr/trunk/src/test/test-files/solr/conf/schema.xml
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/test-files/solr/conf/schema.xml?rev=729450&r1=729449&r2=729450&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/test-files/solr/conf/schema.xml (original)
+++ lucene/solr/trunk/src/test/test-files/solr/conf/schema.xml Thu Dec 25 
17:08:18 2008
@@ -455,6 +455,8 @@
    <dynamicField name="t_*"  type="text"    indexed="true"  stored="true"/>
    <dynamicField name="tv_*"  type="text" indexed="true"  stored="true" 
       termVectors="true" termPositions="true" termOffsets="true"/>
+   <dynamicField name="tv_mv_*"  type="text" indexed="true"  stored="true" 
multivalued="true"
+      termVectors="true" termPositions="true" termOffsets="true"/>
 
    <!-- special fields for dynamic copyField test -->
    <dynamicField name="dynamic_*" type="string" indexed="true" stored="true"/>


Reply via email to