Author: koji
Date: Thu Dec 25 17:08:18 2008
New Revision: 729450
URL: http://svn.apache.org/viewvc?rev=729450&view=rev
Log:
SOLR-925: Fixed highlighting on fields with multiValued="true" and
termOffsets="true"
Modified:
lucene/solr/trunk/CHANGES.txt
lucene/solr/trunk/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java
lucene/solr/trunk/src/test/test-files/solr/conf/schema.xml
Modified: lucene/solr/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=729450&r1=729449&r2=729450&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Thu Dec 25 17:08:18 2008
@@ -118,7 +118,7 @@
optimized distributed faceting refinement by lowering parsing overhead and
by making requests and responses smaller.
-25. SOLR-876: WOrdDelimiterFilter now supports a splitOnNumerics
+25. SOLR-876: WordDelimiterFilter now supports a splitOnNumerics
option, as well as a list of protected terms.
(Dan Rosher via hossman)
@@ -200,6 +200,8 @@
22. SOLR-897: Fixed Argument list too long error when there are lots of
snapshots/backups (Dan Rosher via billa)
+23. SOLR-925: Fixed highlighting on fields with multiValued="true" and
termOffsets="true" (koji)
+
Other Changes
----------------------
Modified:
lucene/solr/trunk/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java?rev=729450&r1=729449&r2=729450&view=diff
==============================================================================
---
lucene/solr/trunk/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
(original)
+++
lucene/solr/trunk/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
Thu Dec 25 17:08:18 2008
@@ -271,11 +271,14 @@
String[] summaries = null;
List<TextFragment> frags = new ArrayList<TextFragment>();
+ TermOffsetsTokenStream tots = null;
for (int j = 0; j < docTexts.length; j++) {
// create TokenStream
try {
// attempt term vectors
- tstream = TokenSources.getTokenStream(searcher.getReader(),
docId, fieldName);
+ if( tots == null )
+ tots = new TermOffsetsTokenStream(
TokenSources.getTokenStream(searcher.getReader(), docId, fieldName) );
+ tstream = tots.getMultiValuedTokenStream( docTexts[j].length() );
}
catch (IllegalArgumentException e) {
// fall back to anaylzer
@@ -410,3 +413,44 @@
return queue.isEmpty() ? null : queue.removeFirst();
}
}
+
+class TermOffsetsTokenStream {
+
+ TokenStream bufferedTokenStream = null;
+ Token bufferedToken;
+ int startOffset;
+ int endOffset;
+
+ public TermOffsetsTokenStream( TokenStream tstream ){
+ bufferedTokenStream = tstream;
+ startOffset = 0;
+ bufferedToken = null;
+ }
+
+ public TokenStream getMultiValuedTokenStream( final int length ){
+ endOffset = startOffset + length;
+ return new TokenStream(){
+ Token token;
+ public Token next() throws IOException {
+ while( true ){
+ if( bufferedToken == null )
+ bufferedToken = bufferedTokenStream.next();
+ if( bufferedToken == null ) return null;
+ if( startOffset <= bufferedToken.startOffset() &&
+ bufferedToken.endOffset() <= endOffset ){
+ token = bufferedToken;
+ bufferedToken = null;
+ token.setStartOffset( token.startOffset() - startOffset );
+ token.setEndOffset( token.endOffset() - startOffset );
+ return token;
+ }
+ else if( bufferedToken.endOffset() > endOffset ){
+ startOffset += length + 1;
+ return null;
+ }
+ bufferedToken = null;
+ }
+ }
+ };
+ }
+}
Modified:
lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java?rev=729450&r1=729449&r2=729450&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java
(original)
+++ lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java
Thu Dec 25 17:08:18 2008
@@ -17,10 +17,16 @@
package org.apache.solr.highlight;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.solr.core.SolrCore;
import org.apache.solr.util.*;
import org.apache.solr.common.params.HighlightParams;
+import java.io.IOException;
+import java.io.StringReader;
import java.util.HashMap;
/**
@@ -140,6 +146,47 @@
"//a...@name='tv_text']/str[.=' <em>long</em> fragments.']"
);
}
+
+ public void testTermOffsetsTokenStream() throws Exception {
+ String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };
+ Analyzer a1 = new WhitespaceAnalyzer();
+ TermOffsetsTokenStream tots = new TermOffsetsTokenStream(
+ a1.tokenStream( "", new StringReader( "a b c d e f g h i j k l m n" )
) );
+ for( String v : multivalued ){
+ TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() );
+ Analyzer a2 = new WhitespaceAnalyzer();
+ TokenStream ts2 = a2.tokenStream( "", new StringReader( v ) );
+ Token t1 = new Token();
+ Token t2 = new Token();
+ for( t1 = ts1.next( t1 ); t1 != null; t1 = ts1.next( t1 ) ){
+ t2 = ts2.next( t2 );
+ assertEquals( t2, t1 );
+ }
+ }
+ }
+
+ public void testTermVecMultiValuedHighlight() throws Exception {
+
+ // do summarization using term vectors on multivalued field
+ HashMap<String,String> args = new HashMap<String,String>();
+ args.put("hl", "true");
+ args.put("hl.fl", "tv_mv_text");
+ args.put("hl.snippets", "2");
+ TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
+ "standard",0,200,args);
+
+ assertU(adoc("tv_mv_text", LONG_TEXT,
+ "tv_mv_text", LONG_TEXT,
+ "id", "1"));
+ assertU(commit());
+ assertU(optimize());
+ assertQ("Basic summarization",
+ sumLRF.makeRequest("tv_mv_text:long"),
+ "//l...@name='highlighting']/l...@name='1']",
+ "//l...@name='1']/a...@name='tv_mv_text']/str[.='a <em>long</em>
days night this should be a piece of text which']",
+ "//a...@name='tv_mv_text']/str[.=' <em>long</em> fragments.']"
+ );
+ }
public void testDisMaxHighlight() {
Modified: lucene/solr/trunk/src/test/test-files/solr/conf/schema.xml
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/test-files/solr/conf/schema.xml?rev=729450&r1=729449&r2=729450&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/test-files/solr/conf/schema.xml (original)
+++ lucene/solr/trunk/src/test/test-files/solr/conf/schema.xml Thu Dec 25
17:08:18 2008
@@ -455,6 +455,8 @@
<dynamicField name="t_*" type="text" indexed="true" stored="true"/>
<dynamicField name="tv_*" type="text" indexed="true" stored="true"
termVectors="true" termPositions="true" termOffsets="true"/>
+ <dynamicField name="tv_mv_*" type="text" indexed="true" stored="true"
multivalued="true"
+ termVectors="true" termPositions="true" termOffsets="true"/>
<!-- special fields for dynamic copyField test -->
<dynamicField name="dynamic_*" type="string" indexed="true" stored="true"/>