Hi all,
I spent some time tracing down strange OutOfMemory errors in nutch and when I got it I found two issues on nutch issue tracker that are related to it:
[ 1110947 ] Serious bug: OutOfMemoryError: Java heap space
[ 957684 ] endless loop with unbalanced quote in query


I have prepared a JUnit test and patch for NutchAnalysis.jj. JUnit test throws OutOfMemory with current NutchAnalysis. After applying the patch all test succeeded. At first I decided to throw ParseException on unbalanced quote but after looking at google and short discussion with friend I have chosen to automatically add missing quote at the end of the query. If you think throwing an exception is better I can submit ealier version. I think my solution should not change other behavior of NutchAnalysis but I hope it can be quickly reviewed by more experienced JavaCC developer.
After applying the patch you can close both bugs mentioned ealier.
Regards
Piotr Kosiorowski



Index: NutchAnalysis.jj
===================================================================
RCS file: /cvsroot/nutch/nutch/src/java/net/nutch/analysis/NutchAnalysis.jj,v
retrieving revision 1.10
diff -u -r1.10 NutchAnalysis.jj
--- NutchAnalysis.jj    29 Sep 2004 18:55:35 -0000      1.10
+++ NutchAnalysis.jj    22 Feb 2005 20:22:42 -0000
@@ -226,7 +226,7 @@
 
   { end = token.endColumn; }
 
-  <QUOTE>
+  (<QUOTE>|<EOF>)
     
   {
     if (QueryFilters.isRawField(field)) {
@@ -281,7 +281,13 @@
 void nonTerm() :
 {}
 {
-  <WHITE> | infix() | <EOF>
+  <WHITE> | infix() 
+}
+
+void nonTermOrEOF() :
+{}
+{
+  nonTerm() | <EOF>
 }
 
 
@@ -289,7 +295,7 @@
 void nonOpOrTerm() :
 {}
 {
-  (LOOKAHEAD(2) (<WHITE> | nonOpInfix() | ((<PLUS>|<MINUS>) nonTerm())))*
+  (LOOKAHEAD(2) (<WHITE> | nonOpInfix() | ((<PLUS>|<MINUS>) nonTermOrEOF())))*
 }
 
 /** Characters which can be used to form compound terms. */
/* Copyright (c) 2003 The Nutch Organization.  All rights reserved.   */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */

package net.nutch.analysis;

import java.io.IOException;

import net.nutch.searcher.Query;
import junit.framework.TestCase;

/**
 * Tests for NutchAnalysis.
 *
 * @author Piotr Kosiorowski
 */
public class TestNutchAnalysis extends TestCase {

    public static void main(String[] args) {
        junit.textui.TestRunner.run(TestNutchAnalysis.class);
    }
    
    public void testSimpleQuery() throws IOException{
        Query q=NutchAnalysis.parseQuery("test");
        assertNotNull(q);
        assertEquals("test",q.toString());
        String [] terms =q.getTerms();
        assertEquals(1,terms.length );
        assertEquals("test", terms[0]);
    }
    
    
    public void testTwoTokenQuery() throws IOException{
        Query q=NutchAnalysis.parseQuery("test abc");
        assertNotNull(q);
        assertEquals("test abc",q.toString());
        String [] terms =q.getTerms();
        assertEquals(2,terms.length );
        assertEquals("test", terms[0]);
        assertEquals("abc", terms[1]);
    }
    
    
    public void testMinusQuery() throws IOException{
        Query q=NutchAnalysis.parseQuery("-def  ");
        assertNotNull(q);
        assertEquals("-def",q.toString());
        Query.Clause[] clauses=q.getClauses();
        assertEquals(1,clauses.length );
        assertTrue(clauses[0].isProhibited());
    }
    
    public void testIgnoredMinusQuery() throws IOException{
        Query q=NutchAnalysis.parseQuery("- def ");
        assertNotNull(q);
        assertEquals("def",q.toString());
        Query.Clause[] clauses=q.getClauses();
        assertEquals(1,clauses.length );
        assertFalse(clauses[0].isProhibited());
    }
    
    public void testPhraseQuery() throws IOException{
        Query q=NutchAnalysis.parseQuery("\"abc def\"");
        assertNotNull(q);
        assertEquals("\"abc def\"",q.toString());
        String [] terms =q.getTerms();
        assertEquals(2,terms.length );
        assertEquals("abc", terms[0]);
        assertEquals("def", terms[1]);
    }
    
    public void testBrokenQuote() throws IOException{
            Query q=NutchAnalysis.parseQuery("\"");
            assertNotNull(q);
            assertEquals("",q.toString());
            String [] terms =q.getTerms();
            assertEquals(0,terms.length );
    }
    
    public void testBrokenQuote2() throws IOException{
            Query q=NutchAnalysis.parseQuery("\" abc def ");
            assertNotNull(q);
            assertEquals("\"abc def\"",q.toString());
            String [] terms =q.getTerms();
            assertEquals(2,terms.length );
            assertEquals("abc", terms[0]);
            assertEquals("def", terms[1]);
    }
    
    public void testBrokenQuote3() throws IOException{
            Query q=NutchAnalysis.parseQuery("\" abc def \"\"");
            assertNotNull(q);
            assertEquals("\"abc def\"",q.toString());
            String [] terms =q.getTerms();
            assertEquals(2,terms.length );
            assertEquals("abc", terms[0]);
            assertEquals("def", terms[1]);
    }
    
    public void testBrokenQuote5() throws IOException{
            Query q=NutchAnalysis.parseQuery("\" abc def \" \" def ghi ");
            assertNotNull(q);
            assertEquals("\"abc def\" \"def ghi\"",q.toString());
            String [] terms =q.getTerms();
            assertEquals(4,terms.length );
            assertEquals("abc", terms[0]);
            assertEquals("def", terms[1]);
            assertEquals("def", terms[2]);
            assertEquals("ghi", terms[3]);
    }
}

Reply via email to