Author: sshafroi
Date: 2008-11-03 15:09:42 +0100 (Mon, 03 Nov 2008)
New Revision: 6902

Modified:
   trunk/query-api/pom.xml
   
trunk/query-api/src/main/java/no/sesat/search/query/parser/AbstractQueryParser.java
   trunk/query-api/src/main/java/no/sesat/search/query/parser/QueryParser.java
   trunk/query-api/src/main/javacc/QueryParserImpl.jj
Log:
Update parser to handle empty () in query, and mismatched () and "". Also 
better error handling when sub parsing content of quoted string.

Related to: Issue SKER4951:  (Encoding problems when characters like ,() are 
used in a query)



Modified: trunk/query-api/pom.xml
===================================================================
--- trunk/query-api/pom.xml     2008-10-29 21:38:16 UTC (rev 6901)
+++ trunk/query-api/pom.xml     2008-11-03 14:09:42 UTC (rev 6902)
@@ -29,6 +29,7 @@
                 <configuration>
                     
<sourceDirectory>${basedir}/src/main/javacc</sourceDirectory>
                     <packageName>no.sesat.search.query.parser</packageName>
+                    <!-- <debugTokenManager>true</debugTokenManager> -->
                 </configuration>
             </plugin>
          </plugins>
@@ -91,4 +92,4 @@
     </reporting>
 
 
-</project>
\ No newline at end of file
+</project>

Modified: 
trunk/query-api/src/main/java/no/sesat/search/query/parser/AbstractQueryParser.java
===================================================================
--- 
trunk/query-api/src/main/java/no/sesat/search/query/parser/AbstractQueryParser.java
 2008-10-29 21:38:16 UTC (rev 6901)
+++ 
trunk/query-api/src/main/java/no/sesat/search/query/parser/AbstractQueryParser.java
 2008-11-03 14:09:42 UTC (rev 6902)
@@ -198,16 +198,25 @@
      *
      * @param method the name of the method
      */
-    protected final void enterMethod(final String method){
+    protected final void enterMethod(final String method, final Token token){
         if( LOG.isTraceEnabled() ){
             methodStack.push(method);
+
             final StringBuilder sb = new StringBuilder();
             for( Iterator it = methodStack.iterator(); it.hasNext(); ){
                 final String m = (String)it.next();
                 sb.append("." + m );
             }
+            Token t = token;
+            while (t != null) {
+                sb.append(" " + QueryParserImplConstants.tokenImage[t.kind]);
+                if (token.image != null) {
+                    sb.append("(" + token.image + ")");
+                }
+                t = t.next;
+            }
             LOG.trace(sb.toString());
-        }
+         }
     }
 
     /**

Modified: 
trunk/query-api/src/main/java/no/sesat/search/query/parser/QueryParser.java
===================================================================
--- trunk/query-api/src/main/java/no/sesat/search/query/parser/QueryParser.java 
2008-10-29 21:38:16 UTC (rev 6901)
+++ trunk/query-api/src/main/java/no/sesat/search/query/parser/QueryParser.java 
2008-11-03 14:09:42 UTC (rev 6902)
@@ -51,7 +51,7 @@
 
     /**
      * Duplication of the parser's definition of SKIP. Must be kept uptodate!
-     * It's actually a duplication of the WORD_SEPARATOR (but that is itself a 
duplication of SKIP.
+     * It's actually a duplication of SKIP.
      */
     char[][] SKIP_CHARACTER_RANGES = {
         {' ', ' '},

Modified: trunk/query-api/src/main/javacc/QueryParserImpl.jj
===================================================================
--- trunk/query-api/src/main/javacc/QueryParserImpl.jj  2008-10-29 21:38:16 UTC 
(rev 6901)
+++ trunk/query-api/src/main/javacc/QueryParserImpl.jj  2008-11-03 14:09:42 UTC 
(rev 6902)
@@ -29,6 +29,7 @@
 import java.util.ArrayList;
 import no.sesat.search.query.*;
 
+
 /** This class is NOT synchronized. You must use a separate instance for each 
query.
  *
  * @version $Id$
@@ -41,12 +42,11 @@
         context = cxt;
         //checks Query String is ok
         String qs = cxt.getQueryString();
-        qs = balance(qs, '(', ')');
-        qs = even(qs, '\"');
         qs = numberNeedsTrailingSpace(qs);
         qs = fixFloatingHyphon(qs);
+
         token_source.SwitchTo(DEFAULT);
-        //LOG.setLevel(org.apache.log4j.Level.TRACE);
+//        LOG.setLevel(org.apache.log4j.Level.TRACE);
     }
 
     public QueryParserImpl(final Context cxt, final int lexicalState){
@@ -54,10 +54,9 @@
         context = cxt;
         //checks Query String is ok
         String qs = cxt.getQueryString();
-        qs = balance(qs, '(', ')');
-        qs = even(qs, '\"');
         qs = numberNeedsTrailingSpace(qs);
         qs = fixFloatingHyphon(qs);
+
         token_source.SwitchTo(lexicalState);
     }
 
@@ -71,8 +70,9 @@
  * WARNING!! These ranges are duplicated in QueryParser.java
  *             !! Always update it after any changes here !!
 **/
+
 <*>SKIP : {
-      " " | "!"
+      " " | "!" | "\""
     | < [ "\u0023"-"\u0027" ] >
     | < [ "\u002a"-"\u002c" ] >
     | < [ "\u002e"-"\u002f" ] >
@@ -111,9 +111,8 @@
 TOKEN : { <NUMBER_GROUP: ((<DIGIT>)+(" ")+){2,}> }
 
 
<DEFAULT,URL_DISABLED,EMAIL_DISABLED,PHONE_NUMBER_DISABLED,NUMBER_GROUP_DISABLED>
-TOKEN : { <QUOTED_WORD: "\"" (~[])* "\""> }
+TOKEN : { <QUOTED_WORD: "\"" (~["\""])+ "\""> }
 
-
 <*>TOKEN : {
       <AND: ("AND"|"+")>
     | <OR:  ("OR"|"|")>
@@ -127,16 +126,6 @@
     | <#PHONE_SYMBOL: (".")|("-")|("/")>
     | <#WORD_SYMBOL_PREFIX: (".")|("<")|("=")|(">")>
     | <#WORD_SYMBOL_MIDDLE: (".")|<HYPON>|("_")|("+")>
-    | <#WORD_SEPARATOR: [ // just a copy of the SKIP declaration. see SKIP 
comment!
-            " ", "!",
-            "\u0023"-"\u0029",
-            "\u003b"-"\u0040",
-            "\u005b"-"\u0060",
-            "\u007b"-"\u00bf",
-            "\u00d7",
-            "\u00f7",
-            "\u2010"-"\u2015"
-        ]>
     | <#HYPON: // Different types of hypons, 
http://www.cs.tut.fi/~jkorpela/dashes.html
         [
             "-",
@@ -172,27 +161,33 @@
            "\u0ed0"-"\u0ed9",
            "\u1040"-"\u1049"
           ]>
+    | <OPENP: "(">
+    | <CLOSEP: ")">
 }
 
 Clause parse() : {
   Clause clause;
   LOG.info("parsing: "+context.getQueryString());
 }{
-        (clause = rootPrecedence()) {return clause;}
+        (clause = rootPrecedence()) 
+ {
+        LOG.info("parsing of " + context.getQueryString() + " resulted in: " + 
clause);
+        return clause;
+ }
 }
 
 /** PRECEDENCES **/
 
 Clause rootPrecedence() :{
     Clause clause;
-    enterMethod("rootPrecedence()");
+    enterMethod("rootPrecedence()", token);
 }{
     (clause = noPrecedence()) { try{return clause;}finally{exitMethod();} }
-}
+ }
 
 Clause noPrecedence() :{
     Clause clause;
-    enterMethod("noPrecedence()");
+    enterMethod("noPrecedence()", token);
 }{
     (clause = hiddenDefaultOperation()) { try{return 
clause;}finally{exitMethod();} }
     |
@@ -203,7 +198,7 @@
 Clause looseJoinPrecedence() :{
     Clause clause = null;
     Token field = null;
-    enterMethod("looseJoinPrecedence()");
+    enterMethod("looseJoinPrecedence()", token);
 }{
     // Quotes or phrases take higher precedence than an OrOperation created by 
()'s
     ((field=<WORD>)<FIELD_SUFFIX>)(clause = fieldedQuote(field)) {try{return 
clause;}finally{exitMethod();}}
@@ -217,7 +212,7 @@
 
 Clause strongJoinPrecedence() :{
     Clause clause;
-    enterMethod("strongJoinPrecedence()");
+    enterMethod("strongJoinPrecedence()", token);
 }{
     (clause = andOperation()) { try{return clause;}finally{exitMethod();} }
     |
@@ -228,7 +223,7 @@
 
 Clause leafPrecedence() :{
     Clause clause;
-    enterMethod("leafPrecedence()");
+    enterMethod("leafPrecedence()", token);
 }{
     (clause = notOperation()) { try{return clause;}finally{exitMethod();} }
     |
@@ -241,7 +236,7 @@
 
 DefaultOperatorClause hiddenDefaultOperation() :{
     Clause left,right;
-    enterMethod("hiddenDefaultOperation()");
+    enterMethod("hiddenDefaultOperation()", token);
 }{
     ((left = looseJoinPrecedence())(right = noPrecedence()))
         { try{return 
context.createDefaultOperatorClause(left,right);}finally{exitMethod();} }
@@ -250,7 +245,7 @@
 
 AndNotClause andNotOperation() :{
     Clause right;
-    enterMethod("andNotOperation()");
+    enterMethod("andNotOperation()", token);
 }{
     (<ANDNOT>(right = noPrecedence())) { try{return 
context.createAndNotClause(right);}finally{exitMethod();} }
 }
@@ -258,11 +253,11 @@
 Clause orOperation() :{
     Clause left,right;
     Clause clause;
-    enterMethod("orOperation()");
+    enterMethod("orOperation()", token);
 }{
-    ("("(clause = hiddenOrOperation())")") { try{return 
clause;}finally{exitMethod();} }
+    ( <OPENP> (clause = hiddenOrOperation()) <CLOSEP>) { try{return 
clause;}finally{exitMethod();} }
     |
-    ("("(clause = orOperation())")") { try{return 
clause;}finally{exitMethod();} }
+    ( <OPENP> (clause = orOperation()) <CLOSEP>) { try{return 
clause;}finally{exitMethod();} }
     |
     ((left = strongJoinPrecedence())<OR>(right = looseJoinPrecedence()))
         { try{return 
context.createOrClause(left,right);}finally{exitMethod();} }
@@ -270,7 +265,7 @@
 
 AndClause andOperation() :{
     Clause left,right;
-    enterMethod("andOperation()");
+    enterMethod("andOperation()", token);
 }{
     ((left = leafPrecedence())<AND>(right = strongJoinPrecedence()))
         { try{return 
context.createAndClause(left,right);}finally{exitMethod();} }
@@ -278,7 +273,7 @@
 
 AndClause hiddenAndOperation() :{
     Clause left,right;
-    enterMethod("hiddenAndOperation()");
+    enterMethod("hiddenAndOperation()", token);
 }{
     ((left = leafPrecedence())(right = hiddenAndOperation()))
         { try{return 
context.createAndClause(left,right);}finally{exitMethod();} }
@@ -289,7 +284,7 @@
 
 Clause hiddenOrOperation() :{
     Clause left,right;
-    enterMethod("hiddenOrOperation()");
+    enterMethod("hiddenOrOperation()", token);
 }{
 
     // These are the real hidden or operands
@@ -306,7 +301,7 @@
 
 NotClause notOperation() :{
     Clause left;
-    enterMethod("notOperation()");
+    enterMethod("notOperation()", token);
 }{
     (<NOT>(left = leaf()))  { try{return 
context.createNotClause(left);}finally{exitMethod();} }
 }
@@ -317,22 +312,22 @@
 Clause leaf() :{
     Token field = null;
     Clause clause, left, right = null;
-    enterMethod("leaf()");
+    enterMethod("leaf()", token);
 }{
     // A real field
-    ((field=<WORD>)<FIELD_SUFFIX>)(clause = fieldedLeaf(field)) {try{return 
clause;}finally{exitMethod();}}
+    ((<OPENP>|<CLOSEP>)* (field=<WORD>)<FIELD_SUFFIX>)(clause = 
fieldedLeaf(field)) {try{return clause;}finally{exitMethod();}}
     |
     // An accidential field.
     // XXX This could cause problems as it destroys the construction of a 
right-leaning forests.
-    (left = fieldedLeaf(null)<FIELD_SUFFIX>)(right = fieldedLeaf(null))
+    ((<OPENP>|<CLOSEP>)* left = fieldedLeaf(null)<FIELD_SUFFIX>)(right = 
fieldedLeaf(null))
         {try{return 
context.createDefaultOperatorClause(left,right);}finally{exitMethod();}}
     |
-    ((<FIELD_SUFFIX>)?(clause = fieldedLeaf(null))) {try{return 
clause;}finally{exitMethod();}}
+    ((<OPENP>|<CLOSEP>)* (<FIELD_SUFFIX>)?(clause = fieldedLeaf(null))) 
{try{return clause;}finally{exitMethod();}}
 }
 
 
 Clause fieldedLeaf(final Token field) :{
-    enterMethod("fieldedLeaf()");
+    enterMethod("fieldedLeaf()", token);
     Clause clause = null;
 }{
     <PHONE_NUMBER>
@@ -406,8 +401,8 @@
         }
 }
 
-XorClause fieldedQuote(final Token field) :{
-    enterMethod("fieldedQuote()");
+Clause fieldedQuote(final Token field) :{
+    enterMethod("fieldedQuote()", token);
 }{
     <QUOTED_WORD>
         {
@@ -421,12 +416,19 @@
                                         .replaceAll(SKIP_REGEX, " ")
                                         .replaceAll(OPERATOR_REGEX, " ")
                                         .trim();
-                final QueryParserImpl p = new 
QueryParserImpl(createContext(term), QUOTED_WORD_DISABLED);
-                final Clause altClause = p.parse();
 
-                // Create a XorClause
-                return context.createXorClause(phClause, altClause, 
XorClause.Hint.PHRASE_ON_LEFT);
+                try { // if we can parse the content again, then make an 
xorclause
+                    final QueryParserImpl p = new 
QueryParserImpl(createContext(term), QUOTED_WORD_DISABLED);
+                    final Clause altClause = p.parse();
+                    return context.createXorClause(phClause, altClause, 
XorClause.Hint.PHRASE_ON_LEFT);
 
+                }
+                catch (ParseException e) {
+                    LOG.warn("Parsing content of QUOTED_WORD: " + term, e);
+                }
+
+                return phClause;
+
             }finally{exitMethod();}
         }
 }

_______________________________________________
Kernel-commits mailing list
[email protected]
http://sesat.no/mailman/listinfo/kernel-commits

Reply via email to