otis        2004/03/03 03:24:49

  Modified:    .        CHANGES.txt
               src/java/org/apache/lucene/queryParser QueryParser.java
                        QueryParser.jj
               src/test/org/apache/lucene/queryParser TestQueryParser.java
  Log:
  - Applied the patch that fixes query string escaping:
    http://issues.apache.org/bugzilla/show_bug.cgi?id=24665
    Contributed by Jean-Francois Halleux
  
  Revision  Changes    Path
  1.75      +5 -1      jakarta-lucene/CHANGES.txt
  
  Index: CHANGES.txt
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/CHANGES.txt,v
  retrieving revision 1.74
  retrieving revision 1.75
  diff -u -r1.74 -r1.75
  --- CHANGES.txt       20 Feb 2004 20:14:55 -0000      1.74
  +++ CHANGES.txt       3 Mar 2004 11:24:48 -0000       1.75
  @@ -57,6 +57,10 @@
   10. Added support for term vectors.  See Field#isTermVectorStored().
       (Grant Ingersoll, Cutting & Dmitry)
   
  +11. Fixed the old bug with escaping of special characters in query
  +    strings: http://issues.apache.org/bugzilla/show_bug.cgi?id=24665
  +    (Jean-Francois Halleux via Otis)
  +
   
   1.3 final
   
  
  
  
  1.9       +41 -12    
jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.java
  
  Index: QueryParser.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.java,v
  retrieving revision 1.8
  retrieving revision 1.9
  diff -u -r1.8 -r1.9
  --- QueryParser.java  26 Nov 2003 11:00:58 -0000      1.8
  +++ QueryParser.java  3 Mar 2004 11:24:48 -0000       1.9
  @@ -132,7 +132,7 @@
   
     /**
      * Sets the boolean operator of the QueryParser.
  -   * In classic mode (<code>DEFAULT_OPERATOR_OR</mode>) terms without any modifiers
  +   * In classic mode (<code>DEFAULT_OPERATOR_OR</code>) terms without any modifiers
      * are considered optional: for example <code>capital of Hungary</code> is equal 
to
      * <code>capital OR of OR Hungary</code>.<br/>
      * In <code>DEFAULT_OPERATOR_AND</code> terms are considered to be in conjuction: 
the
  @@ -386,6 +386,22 @@
       return new FuzzyQuery(t);
     }
   
  +  /**
  +   * Returns a String where the escape char has been
  +   * removed, or kept only once if there was a double escape.
  +   */
  +  private String discardEscapeChar(String input) {
  +    char[] caSource = input.toCharArray();
  +    char[] caDest = new char[caSource.length];
  +    int j = 0;
  +    for (int i = 0; i < caSource.length; i++) {
  +      if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) {
  +        caDest[j++]=caSource[i];
  +      }
  +    }
  +    return new String(caDest, 0, j);
  +  }
  +
     public static void main(String[] args) throws Exception {
       QueryParser qp = new QueryParser("field",
                              new org.apache.lucene.analysis.SimpleAnalyzer());
  @@ -506,7 +522,7 @@
       if (jj_2_1(2)) {
         fieldToken = jj_consume_token(TERM);
         jj_consume_token(COLON);
  -                                field = fieldToken.image;
  +        field=discardEscapeChar(fieldToken.image);
       } else {
         ;
       }
  @@ -609,15 +625,17 @@
           jj_la1[10] = jj_gen;
           ;
         }
  +       String termImage=discardEscapeChar(term.image);
          if (wildcard) {
  -         q = getWildcardQuery(field, term.image);
  +             q = getWildcardQuery(field, termImage);
          } else if (prefix) {
  -         q = getPrefixQuery(field, term.image.substring
  -                            (0, term.image.length()-1));
  +         q = getPrefixQuery(field,
  +                discardEscapeChar(term.image.substring
  +                            (0, term.image.length()-1)));
          } else if (fuzzy) {
  -         q = getFuzzyQuery(field, term.image);
  +         q = getFuzzyQuery(field, termImage);
          } else {
  -         q = getFieldQuery(field, analyzer, term.image);
  +         q = getFieldQuery(field, analyzer, termImage);
          }
         break;
       case RANGEIN_START:
  @@ -664,11 +682,16 @@
           jj_la1[14] = jj_gen;
           ;
         }
  -          if (goop1.kind == RANGEIN_QUOTED)
  +          if (goop1.kind == RANGEIN_QUOTED) {
               goop1.image = goop1.image.substring(1, goop1.image.length()-1);
  -          if (goop2.kind == RANGEIN_QUOTED)
  +          } else {
  +                goop1.image = discardEscapeChar(goop1.image);
  +          }
  +          if (goop2.kind == RANGEIN_QUOTED) {
               goop2.image = goop2.image.substring(1, goop2.image.length()-1);
  -
  +                  } else {
  +                        goop2.image = discardEscapeChar(goop2.image);
  +                  }
             q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true);
         break;
       case RANGEEX_START:
  @@ -715,10 +738,16 @@
           jj_la1[18] = jj_gen;
           ;
         }
  -          if (goop1.kind == RANGEEX_QUOTED)
  +          if (goop1.kind == RANGEEX_QUOTED) {
               goop1.image = goop1.image.substring(1, goop1.image.length()-1);
  -          if (goop2.kind == RANGEEX_QUOTED)
  +          } else {
  +            goop1.image = discardEscapeChar(goop1.image);
  +          }
  +          if (goop2.kind == RANGEEX_QUOTED) {
               goop2.image = goop2.image.substring(1, goop2.image.length()-1);
  +                  } else {
  +                        goop2.image = discardEscapeChar(goop2.image);
  +                  }
   
             q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false);
         break;
  
  
  
  1.40      +70 -78    
jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj
  
  Index: QueryParser.jj
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj,v
  retrieving revision 1.39
  retrieving revision 1.40
  diff -u -r1.39 -r1.40
  --- QueryParser.jj    26 Jan 2004 18:53:14 -0000      1.39
  +++ QueryParser.jj    3 Mar 2004 11:24:48 -0000       1.40
  @@ -1,58 +1,19 @@
  -/* ====================================================================
  - * The Apache Software License, Version 1.1
  - *
  - * Copyright (c) 2001, 2002, 2003 The Apache Software Foundation.  All
  - * rights reserved.
  - *
  - * Redistribution and use in source and binary forms, with or without
  - * modification, are permitted provided that the following conditions
  - * are met:
  - *
  - * 1. Redistributions of source code must retain the above copyright
  - *    notice, this list of conditions and the following disclaimer.
  - *
  - * 2. Redistributions in binary form must reproduce the above copyright
  - *    notice, this list of conditions and the following disclaimer in
  - *    the documentation and/or other materials provided with the
  - *    distribution.
  - *
  - * 3. The end-user documentation included with the redistribution,
  - *    if any, must include the following acknowledgment:
  - *       "This product includes software developed by the
  - *        Apache Software Foundation (http://www.apache.org/)."
  - *    Alternately, this acknowledgment may appear in the software itself,
  - *    if and wherever such third-party acknowledgments normally appear.
  - *
  - * 4. The names "Apache" and "Apache Software Foundation" and
  - *    "Apache Lucene" must not be used to endorse or promote products
  - *    derived from this software without prior written permission. For
  - *    written permission, please contact [EMAIL PROTECTED]
  - *
  - * 5. Products derived from this software may not be called "Apache",
  - *    "Apache Lucene", nor may "Apache" appear in their name, without
  - *    prior written permission of the Apache Software Foundation.
  - *
  - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  - * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  - * SUCH DAMAGE.
  - * ====================================================================
  +/**
  + * Copyright 2004 The Apache Software Foundation
    *
  - * This software consists of voluntary contributions made by many
  - * individuals on behalf of the Apache Software Foundation.  For more
  - * information on the Apache Software Foundation, please see
  - * <http://www.apache.org/>.
  + * Licensed under the Apache License, Version 2.0 (the "License");
  + * you may not use this file except in compliance with the License.
  + * You may obtain a copy of the License at
  + *
  + *     http://www.apache.org/licenses/LICENSE-2.0
  + *
  + * Unless required by applicable law or agreed to in writing, software
  + * distributed under the License is distributed on an "AS IS" BASIS,
  + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  + * See the License for the specific language governing permissions and
  + * limitations under the License.
    */
   
  -
   options {
     STATIC=false;
     JAVA_UNICODE_ESCAPE=true;
  @@ -138,8 +99,8 @@
     Locale locale = Locale.getDefault();
   
     /** Parses a query string, returning a [EMAIL PROTECTED] 
org.apache.lucene.search.Query}.
  -   *  @param query   the query string to be parsed.
  -   *  @param field   the default field for query terms.
  +   *  @param query  the query string to be parsed.
  +   *  @param field  the default field for query terms.
      *  @param analyzer   used to find terms in the query text.
      *  @throws ParseException if the parsing fails
      */
  @@ -150,7 +111,7 @@
     }
   
     /** Constructs a query parser.
  -   *  @param f       the default field for query terms.
  +   *  @param f  the default field for query terms.
      *  @param a   used to find terms in the query text.
      */
     public QueryParser(String f, Analyzer a) {
  @@ -161,7 +122,7 @@
   
     /** Parses a query string, returning a
      * <a href="lucene.search.Query.html">Query</a>.
  -   *  @param query   the query string to be parsed.
  +   *  @param query  the query string to be parsed.
      *  @throws ParseException if the parsing fails
      */
     public Query parse(String query) throws ParseException {
  @@ -364,7 +325,7 @@
     {
       BooleanQuery query = new BooleanQuery();
       for (int i = 0; i < clauses.size(); i++) {
  -     query.add((BooleanClause)clauses.elementAt(i));
  +  query.add((BooleanClause)clauses.elementAt(i));
       }
       return query;
     }
  @@ -393,7 +354,7 @@
     protected Query getWildcardQuery(String field, String termStr) throws 
ParseException
     {
       if (lowercaseWildcardTerms) {
  -     termStr = termStr.toLowerCase();
  +  termStr = termStr.toLowerCase();
       }
       Term t = new Term(field, termStr);
       return new WildcardQuery(t);
  @@ -425,7 +386,7 @@
     protected Query getPrefixQuery(String field, String termStr) throws ParseException
     {
       if (lowercaseWildcardTerms) {
  -     termStr = termStr.toLowerCase();
  +  termStr = termStr.toLowerCase();
       }
       Term t = new Term(field, termStr);
       return new PrefixQuery(t);
  @@ -448,6 +409,22 @@
       return new FuzzyQuery(t);
     }
   
  +  /**
  +   * Returns a String where the escape char has been
  +   * removed, or kept only once if there was a double escape.
  +   */
  +  private String discardEscapeChar(String input) {
  +    char[] caSource = input.toCharArray();
  +    char[] caDest = new char[caSource.length];
  +    int j = 0;
  +    for (int i = 0; i < caSource.length; i++) {
  +      if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) {
  +        caDest[j++]=caSource[i];
  +      }
  +    }
  +    return new String(caDest, 0, j);
  +  }
  +
     public static void main(String[] args) throws Exception {
       QueryParser qp = new QueryParser("field",
                              new org.apache.lucene.analysis.SimpleAnalyzer());
  @@ -572,7 +549,7 @@
         if (clauses.size() == 1 && firstQuery != null)
           return firstQuery;
         else {
  -     return getBooleanQuery(clauses);
  +  return getBooleanQuery(clauses);
         }
       }
   }
  @@ -584,7 +561,9 @@
   {
     [
       LOOKAHEAD(2)
  -    fieldToken=<TERM> <COLON> { field = fieldToken.image; }
  +    fieldToken=<TERM> <COLON> {
  +      field=discardEscapeChar(fieldToken.image);
  +    }
     ]
   
     (
  @@ -594,11 +573,11 @@
     )
       {
         if (boost != null) {
  -             float f = (float)1.0;
  -     try {
  -       f = Float.valueOf(boost.image).floatValue();
  +        float f = (float)1.0;
  +  try {
  +    f = Float.valueOf(boost.image).floatValue();
             q.setBoost(f);
  -     } catch (Exception ignored) { }
  +  } catch (Exception ignored) { }
         }
         return q;
       }
  @@ -624,15 +603,17 @@
        [ <FUZZY> { fuzzy=true; } ]
        [ <CARAT> boost=<NUMBER> [ <FUZZY> { fuzzy=true; } ] ]
        {
  +       String termImage=discardEscapeChar(term.image);
          if (wildcard) {
  -      q = getWildcardQuery(field, term.image);
  +       q = getWildcardQuery(field, termImage);
          } else if (prefix) {
  -         q = getPrefixQuery(field, term.image.substring
  -                         (0, term.image.length()-1));
  +         q = getPrefixQuery(field,
  +           discardEscapeChar(term.image.substring
  +          (0, term.image.length()-1)));
          } else if (fuzzy) {
  -         q = getFuzzyQuery(field, term.image);
  +         q = getFuzzyQuery(field, termImage);
          } else {
  -         q = getFieldQuery(field, analyzer, term.image);
  +         q = getFieldQuery(field, analyzer, termImage);
          }
        }
        | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
  @@ -640,11 +621,16 @@
            <RANGEIN_END> )
          [ <CARAT> boost=<NUMBER> ]
           {
  -          if (goop1.kind == RANGEIN_QUOTED)
  +          if (goop1.kind == RANGEIN_QUOTED) {
               goop1.image = goop1.image.substring(1, goop1.image.length()-1);
  -          if (goop2.kind == RANGEIN_QUOTED)
  +          } else {
  +            goop1.image = discardEscapeChar(goop1.image);
  +          }
  +          if (goop2.kind == RANGEIN_QUOTED) {
               goop2.image = goop2.image.substring(1, goop2.image.length()-1);
  -
  +      } else {
  +        goop2.image = discardEscapeChar(goop2.image);
  +      }
             q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true);
           }
        | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )
  @@ -652,10 +638,16 @@
            <RANGEEX_END> )
          [ <CARAT> boost=<NUMBER> ]
           {
  -          if (goop1.kind == RANGEEX_QUOTED)
  +          if (goop1.kind == RANGEEX_QUOTED) {
               goop1.image = goop1.image.substring(1, goop1.image.length()-1);
  -          if (goop2.kind == RANGEEX_QUOTED)
  +          } else {
  +            goop1.image = discardEscapeChar(goop1.image);
  +          }
  +          if (goop2.kind == RANGEEX_QUOTED) {
               goop2.image = goop2.image.substring(1, goop2.image.length()-1);
  +      } else {
  +        goop2.image = discardEscapeChar(goop2.image);
  +      }
   
             q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false);
           }
  @@ -681,9 +673,9 @@
           f = Float.valueOf(boost.image).floatValue();
         }
         catch (Exception ignored) {
  -       /* Should this be handled somehow? (defaults to "no boost", if
  -        * boost number is invalid)
  -        */
  +    /* Should this be handled somehow? (defaults to "no boost", if
  +     * boost number is invalid)
  +     */
         }
   
         // avoid boosting null queries, such as those caused by stop words
  
  
  
  1.24      +41 -51    
jakarta-lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java
  
  Index: TestQueryParser.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java,v
  retrieving revision 1.23
  retrieving revision 1.24
  diff -u -r1.23 -r1.24
  --- TestQueryParser.java      26 Nov 2003 11:03:30 -0000      1.23
  +++ TestQueryParser.java      3 Mar 2004 11:24:49 -0000       1.24
  @@ -1,57 +1,19 @@
   package org.apache.lucene.queryParser;
   
  -/* ====================================================================
  - * The Apache Software License, Version 1.1
  - *
  - * Copyright (c) 2001, 2002, 2003 The Apache Software Foundation.  All
  - * rights reserved.
  - *
  - * Redistribution and use in source and binary forms, with or without
  - * modification, are permitted provided that the following conditions
  - * are met:
  - *
  - * 1. Redistributions of source code must retain the above copyright
  - *    notice, this list of conditions and the following disclaimer.
  - *
  - * 2. Redistributions in binary form must reproduce the above copyright
  - *    notice, this list of conditions and the following disclaimer in
  - *    the documentation and/or other materials provided with the
  - *    distribution.
  - *
  - * 3. The end-user documentation included with the redistribution,
  - *    if any, must include the following acknowledgment:
  - *       "This product includes software developed by the
  - *        Apache Software Foundation (http://www.apache.org/)."
  - *    Alternately, this acknowledgment may appear in the software itself,
  - *    if and wherever such third-party acknowledgments normally appear.
  - *
  - * 4. The names "Apache" and "Apache Software Foundation" and
  - *    "Apache Lucene" must not be used to endorse or promote products
  - *    derived from this software without prior written permission. For
  - *    written permission, please contact [EMAIL PROTECTED]
  +/**
  + * Copyright 2002-2004 The Apache Software Foundation
    *
  - * 5. Products derived from this software may not be called "Apache",
  - *    "Apache Lucene", nor may "Apache" appear in their name, without
  - *    prior written permission of the Apache Software Foundation.
  + * Licensed under the Apache License, Version 2.0 (the "License");
  + * you may not use this file except in compliance with the License.
  + * You may obtain a copy of the License at
    *
  - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  - * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  - * SUCH DAMAGE.
  - * ====================================================================
  + *     http://www.apache.org/licenses/LICENSE-2.0
    *
  - * This software consists of voluntary contributions made by many
  - * individuals on behalf of the Apache Software Foundation.  For more
  - * information on the Apache Software Foundation, please see
  - * <http://www.apache.org/>.
  + * Unless required by applicable law or agreed to in writing, software
  + * distributed under the License is distributed on an "AS IS" BASIS,
  + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  + * See the License for the specific language governing permissions and
  + * limitations under the License.
    */
   
   import junit.framework.TestCase;
  @@ -355,7 +317,7 @@
   
     public void testEscaped() throws Exception {
       Analyzer a = new WhitespaceAnalyzer();
  -    assertQueryEquals("\\[brackets", a, "\\[brackets");
  + /*   assertQueryEquals("\\[brackets", a, "\\[brackets");
       assertQueryEquals("\\[brackets", null, "brackets");
       assertQueryEquals("\\\\", a, "\\\\");
       assertQueryEquals("\\+blah", a, "\\+blah");
  @@ -377,8 +339,36 @@
       assertQueryEquals("\\?blah", a, "\\?blah");
       assertQueryEquals("foo \\&& bar", a, "foo \\&& bar");
       assertQueryEquals("foo \\|| bar", a, "foo \\|| bar");
  -    assertQueryEquals("foo \\AND bar", a, "foo \\AND bar");
  +    assertQueryEquals("foo \\AND bar", a, "foo \\AND bar"); */
   
  +     assertQueryEquals("a\\-b:c",a,"a-b:c");
  +     assertQueryEquals("a\\+b:c",a,"a+b:c");
  +     assertQueryEquals("a\\:b:c",a,"a:b:c");
  +     assertQueryEquals("a\\\\b:c",a,"a\\b:c");
  +
  +     assertQueryEquals("a:b\\-c",a,"a:b-c");
  +     assertQueryEquals("a:b\\+c",a,"a:b+c");
  +     assertQueryEquals("a:b\\:c",a,"a:b:c");
  +     assertQueryEquals("a:b\\\\c",a,"a:b\\c");
  +
  +     assertQueryEquals("a:b\\-c*",a,"a:b-c*");
  +     assertQueryEquals("a:b\\+c*",a,"a:b+c*");
  +     assertQueryEquals("a:b\\:c*",a,"a:b:c*");
  +     assertQueryEquals("a:b\\\\c*",a,"a:b\\c*");
  +
  +     assertQueryEquals("a:b\\-?c",a,"a:b-?c");
  +     assertQueryEquals("a:b\\+?c",a,"a:b+?c");
  +     assertQueryEquals("a:b\\:?c",a,"a:b:?c");
  +     assertQueryEquals("a:b\\\\?c",a,"a:b\\?c");
  +
  +     assertQueryEquals("a:b\\-c~",a,"a:b-c~");
  +     assertQueryEquals("a:b\\+c~",a,"a:b+c~");
  +     assertQueryEquals("a:b\\:c~",a,"a:b:c~");
  +     assertQueryEquals("a:b\\\\c~",a,"a:b\\c~");
  +
  +     assertQueryEquals("[ a\\- TO a\\+ ]", null, "[a- TO a+]");
  +     assertQueryEquals("[ a\\: TO a\\~ ]", null, "[a: TO a~]");
  +     assertQueryEquals("[ a\\\\ TO a\\* ]", null, "[a\\ TO a*]");
     }
   
     public void testSimpleDAO()
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to