Author: klaas
Date: Tue Jan  8 14:02:46 2008
New Revision: 610191

URL: http://svn.apache.org/viewvc?rev=610191&view=rev
Log:
SOLR-452 commit: hl.mergeContiguous

Modified:
    lucene/solr/trunk/CHANGES.txt
    
lucene/solr/trunk/src/java/org/apache/solr/common/params/HighlightParams.java
    lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrHighlighter.java
    lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java

Modified: lucene/solr/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=610191&r1=610190&r2=610191&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Tue Jan  8 14:02:46 2008
@@ -87,9 +87,11 @@
 13. SOLR-225: Enable pluggable highlighting classes.  Allow configurable
     highlighting formatters and Fragmenters.  (ryan)
 
-14. SOLR-273/376: Added hl.maxAnalyzedChars highlighting parameter, defaulting 
to
-    50k.  Also add hl.alternateField, which allows the specification of a 
backup
-    field to use as summary if no keywords are matched. (klaas)
+14. SOLR-273/376/452: Added hl.maxAnalyzedChars highlighting parameter, 
defaulting 
+    to 50k, hl.alternateField, which allows the specification of a backup
+    field to use as summary if no keywords are matched, and hl.mergeContiguous,
+    which combines fragments if they are adjacent in the source document.
+    (klaas, Grant Ingersoll via klaas)
 
 15. SOLR-291: Control maximum number of documents to cache for any entry
     in the queryResultCache via queryResultMaxDocsCached solrconfig.xml 

Modified: 
lucene/solr/trunk/src/java/org/apache/solr/common/params/HighlightParams.java
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/common/params/HighlightParams.java?rev=610191&r1=610190&r2=610191&view=diff
==============================================================================
--- 
lucene/solr/trunk/src/java/org/apache/solr/common/params/HighlightParams.java 
(original)
+++ 
lucene/solr/trunk/src/java/org/apache/solr/common/params/HighlightParams.java 
Tue Jan  8 14:02:46 2008
@@ -33,6 +33,7 @@
   public static final String FIELD_MATCH = HIGHLIGHT+".requireFieldMatch";
   public static final String ALTERNATE_FIELD = HIGHLIGHT+".alternateField";
 
+  public static final String MERGE_CONTIGUOUS_FRAGMENTS = HIGHLIGHT + 
".mergeContiguous";
   // Formatter
   public static final String SIMPLE = "simple";
   public static final String SIMPLE_PRE  = HIGHLIGHT+"."+SIMPLE+".pre";

Modified: 
lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrHighlighter.java
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrHighlighter.java?rev=610191&r1=610190&r2=610191&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrHighlighter.java 
(original)
+++ lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrHighlighter.java 
Tue Jan  8 14:02:46 2008
@@ -187,6 +187,15 @@
   protected int getMaxSnippets(String fieldName, SolrParams params) {
      return params.getFieldInt(fieldName, HighlightParams.SNIPPETS,1);
   }
+
+  /**
+   * Return whether adjacent fragments should be merged.
+   * @param fieldName The name of the field
+   * @param params The params controlling Highlighting
+   */
+  protected boolean isMergeContiguousFragments(String fieldName, SolrParams 
params){
+    return params.getFieldBool(fieldName, 
HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, false);
+  }
   
   /**
    * Return a formatter appropriate for this field. If a formatter
@@ -260,20 +269,22 @@
        searcher.readDocs(readDocs, docs, fset);
      }
 
-     // Highlight each document
-     DocIterator iterator = docs.iterator();
-     for (int i = 0; i < docs.size(); i++) {
-        int docId = iterator.nextDoc();
-        Document doc = readDocs[i];
-        NamedList docSummaries = new SimpleOrderedMap();
-        for (String fieldName : fieldNames) {
-           fieldName = fieldName.trim();
-           String[] docTexts = doc.getValues(fieldName);
-           if (docTexts == null) continue;
 
-           // get highlighter, and number of fragments for this field
-           Highlighter highlighter = getHighlighter(query, fieldName, req);
-           int numFragments = getMaxSnippets(fieldName, params);
+    // Highlight each document
+    DocIterator iterator = docs.iterator();
+    for (int i = 0; i < docs.size(); i++) {
+       int docId = iterator.nextDoc();
+       Document doc = readDocs[i];
+       NamedList docSummaries = new SimpleOrderedMap();
+       for (String fieldName : fieldNames) {
+          fieldName = fieldName.trim();
+          String[] docTexts = doc.getValues(fieldName);
+          if (docTexts == null) continue;
+
+          // get highlighter, and number of fragments for this field
+          Highlighter highlighter = getHighlighter(query, fieldName, req);
+          int numFragments = getMaxSnippets(fieldName, params);
+          boolean mergeContiguousFragments = 
isMergeContiguousFragments(fieldName, params);
 
            String[] summaries = null;
            TextFragment[] frag;
@@ -288,7 +299,7 @@
                  // fall back to analyzer
                  tstream = new 
TokenOrderingFilter(schema.getAnalyzer().tokenStream(fieldName, new 
StringReader(docTexts[0])), 10);
               }
-              frag = highlighter.getBestTextFragments(tstream, docTexts[0], 
false, numFragments);
+              frag = highlighter.getBestTextFragments(tstream, docTexts[0], 
mergeContiguousFragments, numFragments);
            }
            else {
               // multi-valued field

Modified: 
lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java?rev=610191&r1=610190&r2=610191&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java 
(original)
+++ lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java 
Tue Jan  8 14:02:46 2008
@@ -19,6 +19,8 @@
 
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.util.*;
+import org.apache.solr.common.params.HighlightParams;
+
 import java.util.HashMap;
 
 /**
@@ -27,7 +29,11 @@
  */
 public class HighlighterTest extends AbstractSolrTestCase {
 
-  private static String LONG_TEXT = "a long days night this should be a piece 
of text which is is is is is is is is is is is is is is is is is is is is is is 
is is isis is is is is is is is is is is is is is is is is is is is is is is is 
is is is is is is is is is is is is is is is is is is is is is is is is is is 
is is is is is is is is is is is is is is is is is is is is is is is is is 
sufficiently lengthly to produce multiple fragments which are not concatenated 
at all--we want two disjoint long fragments.";
+  private static String LONG_TEXT = "a long days night this should be a piece 
of text which is is is is is is is is is is is is is is is is is is is " +
+          "is is is is is isis is is is is is is is is is is is is is is is is 
is is is is is is is is is is is is is is is is is is is is is is is is is " +
+          "is is is is is is is is is is is is is " +
+          "is is is is is is is is is is is is is is is is is is is is 
sufficiently lengthly to produce multiple fragments which are not concatenated 
" +
+          "at all--we want two disjoint long fragments.";
 
   @Override public String getSchemaFile() { return "schema.xml"; }
   @Override public String getSolrConfigFile() { return "solrconfig.xml"; }
@@ -65,6 +71,52 @@
     assertSame( gap, frag );
     assertTrue( gap instanceof GapFragmenter );
     assertTrue( regex instanceof RegexFragmenter );
+  }
+
+  public void testMergeContiguous() throws Exception {
+    HashMap<String,String> args = new HashMap<String,String>();
+    args.put(HighlightParams.HIGHLIGHT, "true");
+    args.put("df", "t_text");
+    args.put(HighlightParams.FIELDS, "");
+    args.put(HighlightParams.SNIPPETS, String.valueOf(4));
+    args.put(HighlightParams.FRAGSIZE, String.valueOf(40));
+    args.put(HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "true");
+    TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
+      "standard", 0, 200, args);
+    String input = "this is some long text.  It has the word long in many 
places.  In fact, it has long on some different fragments.  " +
+            "Let us see what happens to long in this case.";
+    String gold = "this is some <em>long</em> text.  It has the word 
<em>long</em> in many places.  In fact, it has <em>long</em> on some different 
fragments.  " +
+            "Let us see what happens to <em>long</em> in this case.";
+    assertU(adoc("t_text", input, "id", "1"));
+    assertU(commit());
+    assertU(optimize());
+    assertQ("Merge Contiguous",
+            sumLRF.makeRequest("t_text:long"),
+            "//[EMAIL PROTECTED]'highlighting']/[EMAIL PROTECTED]'1']",
+            "//[EMAIL PROTECTED]'1']/[EMAIL PROTECTED]'t_text']/str[.='" + 
gold + "']"
+            );
+    args.put("f.t_text." + HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "true");
+    assertU(adoc("t_text", input, "id", "1"));
+    assertU(commit());
+    assertU(optimize());
+    assertQ("Merge Contiguous",
+            sumLRF.makeRequest("t_text:long"),
+            "//[EMAIL PROTECTED]'highlighting']/[EMAIL PROTECTED]'1']",
+            "//[EMAIL PROTECTED]'1']/[EMAIL PROTECTED]'t_text']/str[.='" + 
gold + "']"
+            );
+
+    args.put(HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "false");
+    args.put("f.t_text." + HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, 
"false");
+    sumLRF = h.getRequestFactory(
+      "standard", 0, 200, args);
+    assertQ("Merge Contiguous",
+            sumLRF.makeRequest("t_text:long"),
+            "//[EMAIL PROTECTED]'highlighting']/[EMAIL PROTECTED]'1']",
+            "//[EMAIL PROTECTED]'1']/[EMAIL PROTECTED]'t_text']/str[.='this is 
some <em>long</em> text.  It has']",
+            "//[EMAIL PROTECTED]'1']/[EMAIL PROTECTED]'t_text']/str[.=' the 
word <em>long</em> in many places.  In fact, it has']",
+            "//[EMAIL PROTECTED]'1']/[EMAIL PROTECTED]'t_text']/str[.=' 
<em>long</em> on some different fragments.  Let us']",
+            "//[EMAIL PROTECTED]'1']/[EMAIL PROTECTED]'t_text']/str[.=' see 
what happens to <em>long</em> in this case.']"
+            );
   }
 
   public void testTermVecHighlight() {


Reply via email to