Author: klaas
Date: Tue Jan 8 14:02:46 2008
New Revision: 610191
URL: http://svn.apache.org/viewvc?rev=610191&view=rev
Log:
SOLR-452 commit: hl.mergeContiguous
Modified:
lucene/solr/trunk/CHANGES.txt
lucene/solr/trunk/src/java/org/apache/solr/common/params/HighlightParams.java
lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrHighlighter.java
lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java
Modified: lucene/solr/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=610191&r1=610190&r2=610191&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Tue Jan 8 14:02:46 2008
@@ -87,9 +87,11 @@
13. SOLR-225: Enable pluggable highlighting classes. Allow configurable
highlighting formatters and Fragmenters. (ryan)
-14. SOLR-273/376: Added hl.maxAnalyzedChars highlighting parameter, defaulting
to
- 50k. Also add hl.alternateField, which allows the specification of a
backup
- field to use as summary if no keywords are matched. (klaas)
+14. SOLR-273/376/452: Added hl.maxAnalyzedChars highlighting parameter,
defaulting
+ to 50k, hl.alternateField, which allows the specification of a backup
+ field to use as summary if no keywords are matched, and hl.mergeContiguous,
+ which combines fragments if they are adjacent in the source document.
+ (klaas, Grant Ingersoll via klaas)
15. SOLR-291: Control maximum number of documents to cache for any entry
in the queryResultCache via queryResultMaxDocsCached solrconfig.xml
Modified:
lucene/solr/trunk/src/java/org/apache/solr/common/params/HighlightParams.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/common/params/HighlightParams.java?rev=610191&r1=610190&r2=610191&view=diff
==============================================================================
---
lucene/solr/trunk/src/java/org/apache/solr/common/params/HighlightParams.java
(original)
+++
lucene/solr/trunk/src/java/org/apache/solr/common/params/HighlightParams.java
Tue Jan 8 14:02:46 2008
@@ -33,6 +33,7 @@
public static final String FIELD_MATCH = HIGHLIGHT+".requireFieldMatch";
public static final String ALTERNATE_FIELD = HIGHLIGHT+".alternateField";
+ public static final String MERGE_CONTIGUOUS_FRAGMENTS = HIGHLIGHT +
".mergeContiguous";
// Formatter
public static final String SIMPLE = "simple";
public static final String SIMPLE_PRE = HIGHLIGHT+"."+SIMPLE+".pre";
Modified:
lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrHighlighter.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrHighlighter.java?rev=610191&r1=610190&r2=610191&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrHighlighter.java
(original)
+++ lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrHighlighter.java
Tue Jan 8 14:02:46 2008
@@ -187,6 +187,15 @@
protected int getMaxSnippets(String fieldName, SolrParams params) {
return params.getFieldInt(fieldName, HighlightParams.SNIPPETS,1);
}
+
+ /**
+ * Return whether adjacent fragments should be merged.
+ * @param fieldName The name of the field
+ * @param params The params controlling Highlighting
+ */
+ protected boolean isMergeContiguousFragments(String fieldName, SolrParams
params){
+ return params.getFieldBool(fieldName,
HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, false);
+ }
/**
* Return a formatter appropriate for this field. If a formatter
@@ -260,20 +269,22 @@
searcher.readDocs(readDocs, docs, fset);
}
- // Highlight each document
- DocIterator iterator = docs.iterator();
- for (int i = 0; i < docs.size(); i++) {
- int docId = iterator.nextDoc();
- Document doc = readDocs[i];
- NamedList docSummaries = new SimpleOrderedMap();
- for (String fieldName : fieldNames) {
- fieldName = fieldName.trim();
- String[] docTexts = doc.getValues(fieldName);
- if (docTexts == null) continue;
- // get highlighter, and number of fragments for this field
- Highlighter highlighter = getHighlighter(query, fieldName, req);
- int numFragments = getMaxSnippets(fieldName, params);
+ // Highlight each document
+ DocIterator iterator = docs.iterator();
+ for (int i = 0; i < docs.size(); i++) {
+ int docId = iterator.nextDoc();
+ Document doc = readDocs[i];
+ NamedList docSummaries = new SimpleOrderedMap();
+ for (String fieldName : fieldNames) {
+ fieldName = fieldName.trim();
+ String[] docTexts = doc.getValues(fieldName);
+ if (docTexts == null) continue;
+
+ // get highlighter, and number of fragments for this field
+ Highlighter highlighter = getHighlighter(query, fieldName, req);
+ int numFragments = getMaxSnippets(fieldName, params);
+ boolean mergeContiguousFragments =
isMergeContiguousFragments(fieldName, params);
String[] summaries = null;
TextFragment[] frag;
@@ -288,7 +299,7 @@
// fall back to analyzer
tstream = new
TokenOrderingFilter(schema.getAnalyzer().tokenStream(fieldName, new
StringReader(docTexts[0])), 10);
}
- frag = highlighter.getBestTextFragments(tstream, docTexts[0],
false, numFragments);
+ frag = highlighter.getBestTextFragments(tstream, docTexts[0],
mergeContiguousFragments, numFragments);
}
else {
// multi-valued field
Modified:
lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java?rev=610191&r1=610190&r2=610191&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java
(original)
+++ lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java
Tue Jan 8 14:02:46 2008
@@ -19,6 +19,8 @@
import org.apache.solr.core.SolrCore;
import org.apache.solr.util.*;
+import org.apache.solr.common.params.HighlightParams;
+
import java.util.HashMap;
/**
@@ -27,7 +29,11 @@
*/
public class HighlighterTest extends AbstractSolrTestCase {
- private static String LONG_TEXT = "a long days night this should be a piece
of text which is is is is is is is is is is is is is is is is is is is is is is
is is isis is is is is is is is is is is is is is is is is is is is is is is is
is is is is is is is is is is is is is is is is is is is is is is is is is is
is is is is is is is is is is is is is is is is is is is is is is is is is
sufficiently lengthly to produce multiple fragments which are not concatenated
at all--we want two disjoint long fragments.";
+ private static String LONG_TEXT = "a long days night this should be a piece
of text which is is is is is is is is is is is is is is is is is is is " +
+ "is is is is is isis is is is is is is is is is is is is is is is is
is is is is is is is is is is is is is is is is is is is is is is is is is " +
+ "is is is is is is is is is is is is is " +
+ "is is is is is is is is is is is is is is is is is is is is
sufficiently lengthly to produce multiple fragments which are not concatenated
" +
+ "at all--we want two disjoint long fragments.";
@Override public String getSchemaFile() { return "schema.xml"; }
@Override public String getSolrConfigFile() { return "solrconfig.xml"; }
@@ -65,6 +71,52 @@
assertSame( gap, frag );
assertTrue( gap instanceof GapFragmenter );
assertTrue( regex instanceof RegexFragmenter );
+ }
+
+ public void testMergeContiguous() throws Exception {
+ HashMap<String,String> args = new HashMap<String,String>();
+ args.put(HighlightParams.HIGHLIGHT, "true");
+ args.put("df", "t_text");
+ args.put(HighlightParams.FIELDS, "");
+ args.put(HighlightParams.SNIPPETS, String.valueOf(4));
+ args.put(HighlightParams.FRAGSIZE, String.valueOf(40));
+ args.put(HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "true");
+ TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
+ "standard", 0, 200, args);
+ String input = "this is some long text. It has the word long in many
places. In fact, it has long on some different fragments. " +
+ "Let us see what happens to long in this case.";
+ String gold = "this is some <em>long</em> text. It has the word
<em>long</em> in many places. In fact, it has <em>long</em> on some different
fragments. " +
+ "Let us see what happens to <em>long</em> in this case.";
+ assertU(adoc("t_text", input, "id", "1"));
+ assertU(commit());
+ assertU(optimize());
+ assertQ("Merge Contiguous",
+ sumLRF.makeRequest("t_text:long"),
+ "//[EMAIL PROTECTED]'highlighting']/[EMAIL PROTECTED]'1']",
+ "//[EMAIL PROTECTED]'1']/[EMAIL PROTECTED]'t_text']/str[.='" +
gold + "']"
+ );
+ args.put("f.t_text." + HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "true");
+ assertU(adoc("t_text", input, "id", "1"));
+ assertU(commit());
+ assertU(optimize());
+ assertQ("Merge Contiguous",
+ sumLRF.makeRequest("t_text:long"),
+ "//[EMAIL PROTECTED]'highlighting']/[EMAIL PROTECTED]'1']",
+ "//[EMAIL PROTECTED]'1']/[EMAIL PROTECTED]'t_text']/str[.='" +
gold + "']"
+ );
+
+ args.put(HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "false");
+ args.put("f.t_text." + HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS,
"false");
+ sumLRF = h.getRequestFactory(
+ "standard", 0, 200, args);
+ assertQ("Merge Contiguous",
+ sumLRF.makeRequest("t_text:long"),
+ "//[EMAIL PROTECTED]'highlighting']/[EMAIL PROTECTED]'1']",
+ "//[EMAIL PROTECTED]'1']/[EMAIL PROTECTED]'t_text']/str[.='this is
some <em>long</em> text. It has']",
+ "//[EMAIL PROTECTED]'1']/[EMAIL PROTECTED]'t_text']/str[.=' the
word <em>long</em> in many places. In fact, it has']",
+ "//[EMAIL PROTECTED]'1']/[EMAIL PROTECTED]'t_text']/str[.='
<em>long</em> on some different fragments. Let us']",
+ "//[EMAIL PROTECTED]'1']/[EMAIL PROTECTED]'t_text']/str[.=' see
what happens to <em>long</em> in this case.']"
+ );
}
public void testTermVecHighlight() {