otis 2004/03/03 03:24:49 Modified: . CHANGES.txt src/java/org/apache/lucene/queryParser QueryParser.java QueryParser.jj src/test/org/apache/lucene/queryParser TestQueryParser.java Log: - Applied the patch that fixes query string escaping: http://issues.apache.org/bugzilla/show_bug.cgi?id=24665 Contributed by Jean-Francois Halleux Revision Changes Path 1.75 +5 -1 jakarta-lucene/CHANGES.txt Index: CHANGES.txt =================================================================== RCS file: /home/cvs/jakarta-lucene/CHANGES.txt,v retrieving revision 1.74 retrieving revision 1.75 diff -u -r1.74 -r1.75 --- CHANGES.txt 20 Feb 2004 20:14:55 -0000 1.74 +++ CHANGES.txt 3 Mar 2004 11:24:48 -0000 1.75 @@ -57,6 +57,10 @@ 10. Added support for term vectors. See Field#isTermVectorStored(). (Grant Ingersoll, Cutting & Dmitry) +11. Fixed the old bug with escaping of special characters in query + strings: http://issues.apache.org/bugzilla/show_bug.cgi?id=24665 + (Jean-Francois Halleux via Otis) + 1.3 final 1.9 +41 -12 jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.java Index: QueryParser.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.java,v retrieving revision 1.8 retrieving revision 1.9 diff -u -r1.8 -r1.9 --- QueryParser.java 26 Nov 2003 11:00:58 -0000 1.8 +++ QueryParser.java 3 Mar 2004 11:24:48 -0000 1.9 @@ -132,7 +132,7 @@ /** * Sets the boolean operator of the QueryParser. - * In classic mode (<code>DEFAULT_OPERATOR_OR</mode>) terms without any modifiers + * In classic mode (<code>DEFAULT_OPERATOR_OR</code>) terms without any modifiers * are considered optional: for example <code>capital of Hungary</code> is equal to * <code>capital OR of OR Hungary</code>.<br/> * In <code>DEFAULT_OPERATOR_AND</code> terms are considered to be in conjuction: the @@ -386,6 +386,22 @@ return new FuzzyQuery(t); } + /** + * Returns a String where the escape char has been + * removed, or kept only once if there was a double escape. + */ + private String discardEscapeChar(String input) { + char[] caSource = input.toCharArray(); + char[] caDest = new char[caSource.length]; + int j = 0; + for (int i = 0; i < caSource.length; i++) { + if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) { + caDest[j++]=caSource[i]; + } + } + return new String(caDest, 0, j); + } + public static void main(String[] args) throws Exception { QueryParser qp = new QueryParser("field", new org.apache.lucene.analysis.SimpleAnalyzer()); @@ -506,7 +522,7 @@ if (jj_2_1(2)) { fieldToken = jj_consume_token(TERM); jj_consume_token(COLON); - field = fieldToken.image; + field=discardEscapeChar(fieldToken.image); } else { ; } @@ -609,15 +625,17 @@ jj_la1[10] = jj_gen; ; } + String termImage=discardEscapeChar(term.image); if (wildcard) { - q = getWildcardQuery(field, term.image); + q = getWildcardQuery(field, termImage); } else if (prefix) { - q = getPrefixQuery(field, term.image.substring - (0, term.image.length()-1)); + q = getPrefixQuery(field, + discardEscapeChar(term.image.substring + (0, term.image.length()-1))); } else if (fuzzy) { - q = getFuzzyQuery(field, term.image); + q = getFuzzyQuery(field, termImage); } else { - q = getFieldQuery(field, analyzer, term.image); + q = getFieldQuery(field, analyzer, termImage); } break; case RANGEIN_START: @@ -664,11 +682,16 @@ jj_la1[14] = jj_gen; ; } - if (goop1.kind == RANGEIN_QUOTED) + if (goop1.kind == RANGEIN_QUOTED) { goop1.image = goop1.image.substring(1, goop1.image.length()-1); - if (goop2.kind == RANGEIN_QUOTED) + } else { + goop1.image = discardEscapeChar(goop1.image); + } + if (goop2.kind == RANGEIN_QUOTED) { goop2.image = goop2.image.substring(1, goop2.image.length()-1); - + } else { + goop2.image = discardEscapeChar(goop2.image); + } q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true); break; case RANGEEX_START: @@ -715,10 +738,16 @@ jj_la1[18] = jj_gen; ; } - if (goop1.kind == RANGEEX_QUOTED) + if (goop1.kind == RANGEEX_QUOTED) { goop1.image = goop1.image.substring(1, goop1.image.length()-1); - if (goop2.kind == RANGEEX_QUOTED) + } else { + goop1.image = discardEscapeChar(goop1.image); + } + if (goop2.kind == RANGEEX_QUOTED) { goop2.image = goop2.image.substring(1, goop2.image.length()-1); + } else { + goop2.image = discardEscapeChar(goop2.image); + } q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false); break; 1.40 +70 -78 jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj Index: QueryParser.jj =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj,v retrieving revision 1.39 retrieving revision 1.40 diff -u -r1.39 -r1.40 --- QueryParser.jj 26 Jan 2004 18:53:14 -0000 1.39 +++ QueryParser.jj 3 Mar 2004 11:24:48 -0000 1.40 @@ -1,58 +1,19 @@ -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001, 2002, 2003 The Apache Software Foundation. All - * rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact [EMAIL PROTECTED] - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== +/** + * Copyright 2004 The Apache Software Foundation * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * <http://www.apache.org/>. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ - options { STATIC=false; JAVA_UNICODE_ESCAPE=true; @@ -138,8 +99,8 @@ Locale locale = Locale.getDefault(); /** Parses a query string, returning a [EMAIL PROTECTED] org.apache.lucene.search.Query}. - * @param query the query string to be parsed. - * @param field the default field for query terms. + * @param query the query string to be parsed. + * @param field the default field for query terms. * @param analyzer used to find terms in the query text. * @throws ParseException if the parsing fails */ @@ -150,7 +111,7 @@ } /** Constructs a query parser. - * @param f the default field for query terms. + * @param f the default field for query terms. * @param a used to find terms in the query text. */ public QueryParser(String f, Analyzer a) { @@ -161,7 +122,7 @@ /** Parses a query string, returning a * <a href="lucene.search.Query.html">Query</a>. - * @param query the query string to be parsed. + * @param query the query string to be parsed. * @throws ParseException if the parsing fails */ public Query parse(String query) throws ParseException { @@ -364,7 +325,7 @@ { BooleanQuery query = new BooleanQuery(); for (int i = 0; i < clauses.size(); i++) { - query.add((BooleanClause)clauses.elementAt(i)); + query.add((BooleanClause)clauses.elementAt(i)); } return query; } @@ -393,7 +354,7 @@ protected Query getWildcardQuery(String field, String termStr) throws ParseException { if (lowercaseWildcardTerms) { - termStr = termStr.toLowerCase(); + termStr = termStr.toLowerCase(); } Term t = new Term(field, termStr); return new WildcardQuery(t); @@ -425,7 +386,7 @@ protected Query getPrefixQuery(String field, String termStr) throws ParseException { if (lowercaseWildcardTerms) { - termStr = termStr.toLowerCase(); + termStr = termStr.toLowerCase(); } Term t = new Term(field, termStr); return new PrefixQuery(t); @@ -448,6 +409,22 @@ return new FuzzyQuery(t); } + /** + * Returns a String where the escape char has been + * removed, or kept only once if there was a double escape. + */ + private String discardEscapeChar(String input) { + char[] caSource = input.toCharArray(); + char[] caDest = new char[caSource.length]; + int j = 0; + for (int i = 0; i < caSource.length; i++) { + if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) { + caDest[j++]=caSource[i]; + } + } + return new String(caDest, 0, j); + } + public static void main(String[] args) throws Exception { QueryParser qp = new QueryParser("field", new org.apache.lucene.analysis.SimpleAnalyzer()); @@ -572,7 +549,7 @@ if (clauses.size() == 1 && firstQuery != null) return firstQuery; else { - return getBooleanQuery(clauses); + return getBooleanQuery(clauses); } } } @@ -584,7 +561,9 @@ { [ LOOKAHEAD(2) - fieldToken=<TERM> <COLON> { field = fieldToken.image; } + fieldToken=<TERM> <COLON> { + field=discardEscapeChar(fieldToken.image); + } ] ( @@ -594,11 +573,11 @@ ) { if (boost != null) { - float f = (float)1.0; - try { - f = Float.valueOf(boost.image).floatValue(); + float f = (float)1.0; + try { + f = Float.valueOf(boost.image).floatValue(); q.setBoost(f); - } catch (Exception ignored) { } + } catch (Exception ignored) { } } return q; } @@ -624,15 +603,17 @@ [ <FUZZY> { fuzzy=true; } ] [ <CARAT> boost=<NUMBER> [ <FUZZY> { fuzzy=true; } ] ] { + String termImage=discardEscapeChar(term.image); if (wildcard) { - q = getWildcardQuery(field, term.image); + q = getWildcardQuery(field, termImage); } else if (prefix) { - q = getPrefixQuery(field, term.image.substring - (0, term.image.length()-1)); + q = getPrefixQuery(field, + discardEscapeChar(term.image.substring + (0, term.image.length()-1))); } else if (fuzzy) { - q = getFuzzyQuery(field, term.image); + q = getFuzzyQuery(field, termImage); } else { - q = getFieldQuery(field, analyzer, term.image); + q = getFieldQuery(field, analyzer, termImage); } } | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> ) @@ -640,11 +621,16 @@ <RANGEIN_END> ) [ <CARAT> boost=<NUMBER> ] { - if (goop1.kind == RANGEIN_QUOTED) + if (goop1.kind == RANGEIN_QUOTED) { goop1.image = goop1.image.substring(1, goop1.image.length()-1); - if (goop2.kind == RANGEIN_QUOTED) + } else { + goop1.image = discardEscapeChar(goop1.image); + } + if (goop2.kind == RANGEIN_QUOTED) { goop2.image = goop2.image.substring(1, goop2.image.length()-1); - + } else { + goop2.image = discardEscapeChar(goop2.image); + } q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true); } | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> ) @@ -652,10 +638,16 @@ <RANGEEX_END> ) [ <CARAT> boost=<NUMBER> ] { - if (goop1.kind == RANGEEX_QUOTED) + if (goop1.kind == RANGEEX_QUOTED) { goop1.image = goop1.image.substring(1, goop1.image.length()-1); - if (goop2.kind == RANGEEX_QUOTED) + } else { + goop1.image = discardEscapeChar(goop1.image); + } + if (goop2.kind == RANGEEX_QUOTED) { goop2.image = goop2.image.substring(1, goop2.image.length()-1); + } else { + goop2.image = discardEscapeChar(goop2.image); + } q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false); } @@ -681,9 +673,9 @@ f = Float.valueOf(boost.image).floatValue(); } catch (Exception ignored) { - /* Should this be handled somehow? (defaults to "no boost", if - * boost number is invalid) - */ + /* Should this be handled somehow? (defaults to "no boost", if + * boost number is invalid) + */ } // avoid boosting null queries, such as those caused by stop words 1.24 +41 -51 jakarta-lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java Index: TestQueryParser.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java,v retrieving revision 1.23 retrieving revision 1.24 diff -u -r1.23 -r1.24 --- TestQueryParser.java 26 Nov 2003 11:03:30 -0000 1.23 +++ TestQueryParser.java 3 Mar 2004 11:24:49 -0000 1.24 @@ -1,57 +1,19 @@ package org.apache.lucene.queryParser; -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001, 2002, 2003 The Apache Software Foundation. All - * rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact [EMAIL PROTECTED] +/** + * Copyright 2002-2004 The Apache Software Foundation * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== + * http://www.apache.org/licenses/LICENSE-2.0 * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * <http://www.apache.org/>. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ import junit.framework.TestCase; @@ -355,7 +317,7 @@ public void testEscaped() throws Exception { Analyzer a = new WhitespaceAnalyzer(); - assertQueryEquals("\\[brackets", a, "\\[brackets"); + /* assertQueryEquals("\\[brackets", a, "\\[brackets"); assertQueryEquals("\\[brackets", null, "brackets"); assertQueryEquals("\\\\", a, "\\\\"); assertQueryEquals("\\+blah", a, "\\+blah"); @@ -377,8 +339,36 @@ assertQueryEquals("\\?blah", a, "\\?blah"); assertQueryEquals("foo \\&& bar", a, "foo \\&& bar"); assertQueryEquals("foo \\|| bar", a, "foo \\|| bar"); - assertQueryEquals("foo \\AND bar", a, "foo \\AND bar"); + assertQueryEquals("foo \\AND bar", a, "foo \\AND bar"); */ + assertQueryEquals("a\\-b:c",a,"a-b:c"); + assertQueryEquals("a\\+b:c",a,"a+b:c"); + assertQueryEquals("a\\:b:c",a,"a:b:c"); + assertQueryEquals("a\\\\b:c",a,"a\\b:c"); + + assertQueryEquals("a:b\\-c",a,"a:b-c"); + assertQueryEquals("a:b\\+c",a,"a:b+c"); + assertQueryEquals("a:b\\:c",a,"a:b:c"); + assertQueryEquals("a:b\\\\c",a,"a:b\\c"); + + assertQueryEquals("a:b\\-c*",a,"a:b-c*"); + assertQueryEquals("a:b\\+c*",a,"a:b+c*"); + assertQueryEquals("a:b\\:c*",a,"a:b:c*"); + assertQueryEquals("a:b\\\\c*",a,"a:b\\c*"); + + assertQueryEquals("a:b\\-?c",a,"a:b-?c"); + assertQueryEquals("a:b\\+?c",a,"a:b+?c"); + assertQueryEquals("a:b\\:?c",a,"a:b:?c"); + assertQueryEquals("a:b\\\\?c",a,"a:b\\?c"); + + assertQueryEquals("a:b\\-c~",a,"a:b-c~"); + assertQueryEquals("a:b\\+c~",a,"a:b+c~"); + assertQueryEquals("a:b\\:c~",a,"a:b:c~"); + assertQueryEquals("a:b\\\\c~",a,"a:b\\c~"); + + assertQueryEquals("[ a\\- TO a\\+ ]", null, "[a- TO a+]"); + assertQueryEquals("[ a\\: TO a\\~ ]", null, "[a: TO a~]"); + assertQueryEquals("[ a\\\\ TO a\\* ]", null, "[a\\ TO a*]"); } public void testSimpleDAO()
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]