Author: dnaber
Date: Sat Feb 19 11:08:52 2005
New Revision: 154444

URL: http://svn.apache.org/viewcvs?view=rev&rev=154444
Log:
offer additional methods that take analyzer + text instead of tokenstream; fix 
some unused imports and variables

Modified:
    
lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
    
lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java

Modified: 
lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
URL: 
http://svn.apache.org/viewcvs/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java?view=diff&r1=154443&r2=154444
==============================================================================
--- 
lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
 (original)
+++ 
lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
 Sat Feb 19 11:08:52 2005
@@ -16,9 +16,11 @@
  */
 
 import java.io.IOException;
+import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.Iterator;
 
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.util.PriorityQueue;
 
@@ -57,8 +59,24 @@
                this.fragmentScorer = fragmentScorer;
        }
 
-
-
+       /**
+        * Highlights chosen terms in a text, extracting the most relevant 
section.
+        * This is a convenience method that calls
+        * [EMAIL PROTECTED] #getBestFragment(TokenStream, String)}
+        *
+        * @param analyzer   the analyzer that will be used to split 
<code>text</code>
+        * into chunks  
+        * @param text text to highlight terms in
+        *
+        * @return highlighted text fragment or null if no terms found
+        */
+       public final String getBestFragment(Analyzer analyzer, String text)
+               throws IOException
+       {
+               TokenStream tokenStream = analyzer.tokenStream("field", new 
StringReader(text));
+               return getBestFragment(tokenStream, text);
+       }
+  
        /**
         * Highlights chosen terms in a text, extracting the most relevant 
section.
         * The document text is analysed in chunks to record hit statistics
@@ -84,6 +102,29 @@
                }
                return null;
        }
+
+       /**
+        * Highlights chosen terms in a text, extracting the most relevant 
sections.
+        * This is a convenience method that calls
+        * [EMAIL PROTECTED] #getBestFragments(TokenStream, String, int)}
+        *
+        * @param analyzer   the analyzer that will be used to split 
<code>text</code>
+        * into chunks  
+        * @param text          text to highlight terms in
+        * @param maxNumFragments  the maximum number of fragments.
+        *
+        * @return highlighted text fragments (between 0 and maxNumFragments 
number of fragments)
+        */
+       public final String[] getBestFragments(
+               Analyzer analyzer,      
+               String text,
+               int maxNumFragments)
+               throws IOException
+       {
+               TokenStream tokenStream = analyzer.tokenStream("field", new 
StringReader(text));
+               return getBestFragments(tokenStream, text, maxNumFragments);
+       }
+       
        /**
         * Highlights chosen terms in a text, extracting the most relevant 
sections.
         * The document text is analysed in chunks to record hit statistics

Modified: 
lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: 
http://svn.apache.org/viewcvs/lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?view=diff&r1=154443&r2=154444
==============================================================================
--- 
lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
 (original)
+++ 
lucene/java/trunk/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
 Sat Feb 19 11:08:52 2005
@@ -17,7 +17,6 @@
  */
 
 import java.io.ByteArrayInputStream;
-import java.io.File;
 import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
@@ -27,7 +26,6 @@
 
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
 
 import junit.framework.TestCase;
 
@@ -50,7 +48,6 @@
 import org.apache.lucene.store.RAMDirectory;
 import org.w3c.dom.Element;
 import org.w3c.dom.NodeList;
-import org.xml.sax.SAXException;
 
 /**
  * JUnit Test for Highlighter class.
@@ -157,7 +154,6 @@
                assertTrue("Failed to find correct number of highlights " + 
numHighlights + " found", numHighlights == 5);
        }
 
-
        public void testGetBestSingleFragment() throws Exception
        {
                doSearching("Kennedy");
@@ -172,6 +168,23 @@
                        System.out.println("\t" + result);
                }
                assertTrue("Failed to find correct number of highlights " + 
numHighlights + " found", numHighlights == 4);
+
+               numHighlights = 0;
+               for (int i = 0; i < hits.length(); i++)
+               {
+               String text = hits.doc(i).get(FIELD_NAME);
+               highlighter.getBestFragment(analyzer, text);
+               }
+               assertTrue("Failed to find correct number of highlights " + 
numHighlights + " found", numHighlights == 4);
+
+               numHighlights = 0;
+               for (int i = 0; i < hits.length(); i++)
+               {
+               String text = hits.doc(i).get(FIELD_NAME);
+               highlighter.getBestFragments(analyzer, text, 10);
+               }
+               assertTrue("Failed to find correct number of highlights " + 
numHighlights + " found", numHighlights == 4);
+
        }
 
        public void testGetBestSingleFragmentWithWeights() throws Exception
@@ -278,7 +291,7 @@
                TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new 
StringReader(texts[0]));
                String result = 
highlighter.getBestFragment(tokenStream,texts[0]);
                assertTrue("Setting MaxDocBytesToAnalyze should have prevented 
" +
-                       "us from finding matches for this record" + 
numHighlights +
+                       "us from finding matches for this record: " + 
numHighlights +
                         " found", numHighlights == 0);
        }
 
@@ -322,7 +335,6 @@
                Highlighter highlighter =
                        new Highlighter(this,new QueryScorer(query));
 
-               int highlightFragmentSizeInBytes = 40;
                for (int i = 0; i < texts.length; i++)
                {
                        String text = texts[i];
@@ -568,8 +580,8 @@
 //========== THESE LOOK LIKE, WITH SOME MORE EFFORT THESE COULD BE
 //========== MADE MORE GENERALLY USEFUL.
 // TODO - make synonyms all interchangeable with each other and produce
-// a version that does antonyms(?) - the "is a specialised type of ...."
-// so that car=audi, bmw and volkswagen but bmw != audi so different
+// a version that does hyponyms - the "is a specialised type of ...."
+// so that car = audi, bmw and volkswagen but bmw != audi so different
 // behaviour to synonyms
 //===================================================================
 
@@ -587,7 +599,6 @@
         */
        public TokenStream tokenStream(String arg0, Reader arg1)
        {
-
                return new SynonymTokenizer(new LowerCaseTokenizer(arg1), 
synonyms);
        }
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to