goller 2004/10/01 02:59:23 Modified: src/java/org/apache/lucene/queryParser Tag: lucene_1_4_2_dev QueryParser.java QueryParser.jj QueryParserConstants.java QueryParserTokenManager.java src/test/org/apache/lucene/queryParser Tag: lucene_1_4_2_dev TestQueryParser.java Log: Fix for ArrayIndexOutOfBoundsException inQueryParser (patch #9110), some unused method parameters removed, minimum similarity for FuzzyQuery. (Backport) Revision Changes Path No revision No revision 1.11.2.1 +84 -45 jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.java Index: QueryParser.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.java,v retrieving revision 1.11 retrieving revision 1.11.2.1 diff -u -r1.11 -r1.11.2.1 --- QueryParser.java 22 May 2004 17:34:31 -0000 1.11 +++ QueryParser.java 1 Oct 2004 09:59:23 -0000 1.11.2.1 @@ -73,6 +73,7 @@ Analyzer analyzer; String field; int phraseSlop = 0; + float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; Locale locale = Locale.getDefault(); /** Parses a query string, returning a [EMAIL PROTECTED] org.apache.lucene.search.Query}. @@ -115,6 +116,33 @@ } } + /** + * @return Returns the analyzer. + */ + public Analyzer getAnalyzer() { + return analyzer; + } + + /** + * @return Returns the field. + */ + public String getField() { + return field; + } + + /** + * Get the default minimal similarity for fuzzy queries. + */ + public float getFuzzyMinSim() { + return fuzzyMinSim; + } + /** + *Set the default minimum similarity for fuzzy queries. + */ + public void setFuzzyMinSim(float fuzzyMinSim) { + this.fuzzyMinSim = fuzzyMinSim; + } + /** * Sets the default slop for phrases. If zero, then exact phrase matches * are required. Default value is zero. @@ -172,18 +200,18 @@ return locale; } - protected void addClause(Vector clauses, int conj, int mods, Query q) { + protected void addClause(Vector clauses, int conj, int mods, Query q) { boolean required, prohibited; // If this term is introduced by AND, make the preceding term required, // unless it's already prohibited - if (conj == CONJ_AND) { + if (clauses.size() > 0 && conj == CONJ_AND) { BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); if (!c.prohibited) c.required = true; } - if (operator == DEFAULT_OPERATOR_AND && conj == CONJ_OR) { + if (clauses.size() > 0 && operator == DEFAULT_OPERATOR_AND && conj == CONJ_OR) { // If this term is introduced by OR, make the preceding term optional, // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) // notice if the input is a OR b, first term is parsed as required; without @@ -218,9 +246,7 @@ /** * @exception ParseException throw in overridden method to disallow */ - protected Query getFieldQuery(String field, - Analyzer analyzer, - String queryText) throws ParseException { + protected Query getFieldQuery(String field, String queryText) throws ParseException { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count @@ -262,17 +288,15 @@ } /** - * Base implementation delegates to [EMAIL PROTECTED] #getFieldQuery(String,Analyzer,String)}. + * Base implementation delegates to [EMAIL PROTECTED] #getFieldQuery(String,String)}. * This method may be overridden, for example, to return * a SpanNearQuery instead of a PhraseQuery. * * @exception ParseException throw in overridden method to disallow */ - protected Query getFieldQuery(String field, - Analyzer analyzer, - String queryText, - int slop) throws ParseException { - Query query = getFieldQuery(field, analyzer, queryText); + protected Query getFieldQuery(String field, String queryText, int slop) + throws ParseException { + Query query = getFieldQuery(field, queryText); if (query instanceof PhraseQuery) { ((PhraseQuery) query).setSlop(slop); @@ -285,7 +309,6 @@ * @exception ParseException throw in overridden method to disallow */ protected Query getRangeQuery(String field, - Analyzer analyzer, String part1, String part2, boolean inclusive) throws ParseException @@ -400,10 +423,10 @@ * @return Resulting [EMAIL PROTECTED] Query} built for the term * @exception ParseException throw in overridden method to disallow */ - protected Query getFuzzyQuery(String field, String termStr) throws ParseException + protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException { Term t = new Term(field, termStr); - return new FuzzyQuery(t); + return new FuzzyQuery(t, minSimilarity); } /** @@ -422,6 +445,25 @@ return new String(caDest, 0, j); } + /** + * Returns a String where those characters that QueryParser + * expects to be escaped are escaped, i.e. preceded by a <code>\</code>. + */ + public static String escape(String s) { + StringBuffer sb = new StringBuffer(); + for (int i = 0; i < s.length(); i++) { + char c = s.charAt(i); + // NOTE: keep this in sync with _ESCAPED_CHAR below! + if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' + || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' + || c == '*' || c == '?') { + sb.append('\\'); + } + sb.append(c); + } + return sb.toString(); + } + public static void main(String[] args) throws Exception { QueryParser qp = new QueryParser("field", new org.apache.lucene.analysis.SimpleAnalyzer()); @@ -587,7 +629,7 @@ } final public Query Term(String field) throws ParseException { - Token term, boost=null, slop=null, goop1, goop2; + Token term, boost=null, fuzzySlop=null, goop1, goop2; boolean prefix = false; boolean wildcard = false; boolean fuzzy = false; @@ -619,9 +661,9 @@ throw new ParseException(); } switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case FUZZY: - jj_consume_token(FUZZY); - fuzzy=true; + case FUZZY_SLOP: + fuzzySlop = jj_consume_token(FUZZY_SLOP); + fuzzy=true; break; default: jj_la1[8] = jj_gen; @@ -632,9 +674,9 @@ jj_consume_token(CARAT); boost = jj_consume_token(NUMBER); switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case FUZZY: - jj_consume_token(FUZZY); - fuzzy=true; + case FUZZY_SLOP: + fuzzySlop = jj_consume_token(FUZZY_SLOP); + fuzzy=true; break; default: jj_la1[9] = jj_gen; @@ -653,9 +695,16 @@ discardEscapeChar(term.image.substring (0, term.image.length()-1))); } else if (fuzzy) { - q = getFuzzyQuery(field, termImage); + float fms = fuzzyMinSim; + try { + fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue(); + } catch (Exception ignored) { } + if(fms < 0.0f || fms > 1.0f){ + {if (true) throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");} + } + q = getFuzzyQuery(field, termImage, fms); } else { - q = getFieldQuery(field, analyzer, termImage); + q = getFieldQuery(field, termImage); } break; case RANGEIN_START: @@ -712,7 +761,7 @@ } else { goop2.image = discardEscapeChar(goop2.image); } - q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true); + q = getRangeQuery(field, goop1.image, goop2.image, true); break; case RANGEEX_START: jj_consume_token(RANGEEX_START); @@ -769,13 +818,13 @@ goop2.image = discardEscapeChar(goop2.image); } - q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false); + q = getRangeQuery(field, goop1.image, goop2.image, false); break; case QUOTED: term = jj_consume_token(QUOTED); switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case SLOP: - slop = jj_consume_token(SLOP); + case FUZZY_SLOP: + fuzzySlop = jj_consume_token(FUZZY_SLOP); break; default: jj_la1[19] = jj_gen; @@ -792,15 +841,13 @@ } int s = phraseSlop; - if (slop != null) { + if (fuzzySlop != null) { try { - s = Float.valueOf(slop.image.substring(1)).intValue(); + s = Float.valueOf(fuzzySlop.image.substring(1)).intValue(); } catch (Exception ignored) { } } - q = getFieldQuery(field, analyzer, - term.image.substring(1, term.image.length()-1), - s); + q = getFieldQuery(field, term.image.substring(1, term.image.length()-1), s); break; default: jj_la1[21] = jj_gen; @@ -850,16 +897,11 @@ private int jj_gen; final private int[] jj_la1 = new int[22]; static private int[] jj_la1_0; - static private int[] jj_la1_1; static { jj_la1_0(); - jj_la1_1(); } private static void jj_la1_0() { - jj_la1_0 = new int[] {0x180,0x180,0xe00,0xe00,0x1f31f80,0x8000,0x1f31000,0x1320000,0x40000,0x40000,0x8000,0x18000000,0x2000000,0x18000000,0x8000,0x80000000,0x20000000,0x80000000,0x8000,0x80000,0x8000,0x1f30000,}; - } - private static void jj_la1_1() { - jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x1,0x0,0x0,0x0,0x0,}; + jj_la1_0 = new int[] {0x180,0x180,0xe00,0xe00,0xfb1f80,0x8000,0xfb1000,0x9a0000,0x40000,0x40000,0x8000,0xc000000,0x1000000,0xc000000,0x8000,0xc0000000,0x10000000,0xc0000000,0x8000,0x40000,0x8000,0xfb0000,}; } final private JJCalls[] jj_2_rtns = new JJCalls[1]; private boolean jj_rescan = false; @@ -1008,8 +1050,8 @@ public ParseException generateParseException() { jj_expentries.removeAllElements(); - boolean[] la1tokens = new boolean[33]; - for (int i = 0; i < 33; i++) { + boolean[] la1tokens = new boolean[32]; + for (int i = 0; i < 32; i++) { la1tokens[i] = false; } if (jj_kind >= 0) { @@ -1022,13 +1064,10 @@ if ((jj_la1_0[i] & (1<<j)) != 0) { la1tokens[j] = true; } - if ((jj_la1_1[i] & (1<<j)) != 0) { - la1tokens[32+j] = true; - } } } } - for (int i = 0; i < 33; i++) { + for (int i = 0; i < 32; i++) { if (la1tokens[i]) { jj_expentry = new int[1]; jj_expentry[0] = i; 1.43.2.1 +78 -31 jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj Index: QueryParser.jj =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj,v retrieving revision 1.43 retrieving revision 1.43.2.1 diff -u -r1.43 -r1.43.2.1 --- QueryParser.jj 22 May 2004 17:34:31 -0000 1.43 +++ QueryParser.jj 1 Oct 2004 09:59:23 -0000 1.43.2.1 @@ -96,6 +96,7 @@ Analyzer analyzer; String field; int phraseSlop = 0; + float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; Locale locale = Locale.getDefault(); /** Parses a query string, returning a [EMAIL PROTECTED] org.apache.lucene.search.Query}. @@ -137,6 +138,33 @@ throw new ParseException("Too many boolean clauses"); } } + + /** + * @return Returns the analyzer. + */ + public Analyzer getAnalyzer() { + return analyzer; + } + + /** + * @return Returns the field. + */ + public String getField() { + return field; + } + + /** + * Get the default minimal similarity for fuzzy queries. + */ + public float getFuzzyMinSim() { + return fuzzyMinSim; + } + /** + *Set the default minimum similarity for fuzzy queries. + */ + public void setFuzzyMinSim(float fuzzyMinSim) { + this.fuzzyMinSim = fuzzyMinSim; + } /** * Sets the default slop for phrases. If zero, then exact phrase matches @@ -194,19 +222,19 @@ public Locale getLocale() { return locale; } - - protected void addClause(Vector clauses, int conj, int mods, Query q) { + + protected void addClause(Vector clauses, int conj, int mods, Query q) { boolean required, prohibited; // If this term is introduced by AND, make the preceding term required, // unless it's already prohibited - if (conj == CONJ_AND) { + if (clauses.size() > 0 && conj == CONJ_AND) { BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); if (!c.prohibited) c.required = true; } - if (operator == DEFAULT_OPERATOR_AND && conj == CONJ_OR) { + if (clauses.size() > 0 && operator == DEFAULT_OPERATOR_AND && conj == CONJ_OR) { // If this term is introduced by OR, make the preceding term optional, // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) // notice if the input is a OR b, first term is parsed as required; without @@ -241,9 +269,7 @@ /** * @exception ParseException throw in overridden method to disallow */ - protected Query getFieldQuery(String field, - Analyzer analyzer, - String queryText) throws ParseException { + protected Query getFieldQuery(String field, String queryText) throws ParseException { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count @@ -285,17 +311,15 @@ } /** - * Base implementation delegates to [EMAIL PROTECTED] #getFieldQuery(String,Analyzer,String)}. + * Base implementation delegates to [EMAIL PROTECTED] #getFieldQuery(String,String)}. * This method may be overridden, for example, to return * a SpanNearQuery instead of a PhraseQuery. * * @exception ParseException throw in overridden method to disallow */ - protected Query getFieldQuery(String field, - Analyzer analyzer, - String queryText, - int slop) throws ParseException { - Query query = getFieldQuery(field, analyzer, queryText); + protected Query getFieldQuery(String field, String queryText, int slop) + throws ParseException { + Query query = getFieldQuery(field, queryText); if (query instanceof PhraseQuery) { ((PhraseQuery) query).setSlop(slop); @@ -308,7 +332,6 @@ * @exception ParseException throw in overridden method to disallow */ protected Query getRangeQuery(String field, - Analyzer analyzer, String part1, String part2, boolean inclusive) throws ParseException @@ -423,10 +446,10 @@ * @return Resulting [EMAIL PROTECTED] Query} built for the term * @exception ParseException throw in overridden method to disallow */ - protected Query getFuzzyQuery(String field, String termStr) throws ParseException + protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException { Term t = new Term(field, termStr); - return new FuzzyQuery(t); + return new FuzzyQuery(t, minSimilarity); } /** @@ -445,6 +468,25 @@ return new String(caDest, 0, j); } + /** + * Returns a String where those characters that QueryParser + * expects to be escaped are escaped, i.e. preceded by a <code>\</code>. + */ + public static String escape(String s) { + StringBuffer sb = new StringBuffer(); + for (int i = 0; i < s.length(); i++) { + char c = s.charAt(i); + // NOTE: keep this in sync with _ESCAPED_CHAR below! + if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' + || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' + || c == '*' || c == '?') { + sb.append('\\'); + } + sb.append(c); + } + return sb.toString(); + } + public static void main(String[] args) throws Exception { QueryParser qp = new QueryParser("field", new org.apache.lucene.analysis.SimpleAnalyzer()); @@ -461,6 +503,7 @@ <*> TOKEN : { <#_NUM_CHAR: ["0"-"9"] > +// NOTE: keep this in sync with escape(String) above! | <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^", "[", "]", "\"", "{", "}", "~", "*", "?" ] > | <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^", @@ -495,8 +538,7 @@ | <CARAT: "^" > : Boost | <QUOTED: "\"" (~["\""])+ "\""> | <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* > -| <FUZZY: "~" > -| <SLOP: "~" (<_NUM_CHAR>)+ > +| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? > | <PREFIXTERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" > | <WILDTERM: <_TERM_START_CHAR> (<_TERM_CHAR> | ( [ "*", "?" ] ))* > @@ -605,7 +647,7 @@ Query Term(String field) : { - Token term, boost=null, slop=null, goop1, goop2; + Token term, boost=null, fuzzySlop=null, goop1, goop2; boolean prefix = false; boolean wildcard = false; boolean fuzzy = false; @@ -620,8 +662,8 @@ | term=<WILDTERM> { wildcard=true; } | term=<NUMBER> ) - [ <FUZZY> { fuzzy=true; } ] - [ <CARAT> boost=<NUMBER> [ <FUZZY> { fuzzy=true; } ] ] + [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] + [ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ] { String termImage=discardEscapeChar(term.image); if (wildcard) { @@ -631,9 +673,16 @@ discardEscapeChar(term.image.substring (0, term.image.length()-1))); } else if (fuzzy) { - q = getFuzzyQuery(field, termImage); + float fms = fuzzyMinSim; + try { + fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue(); + } catch (Exception ignored) { } + if(fms < 0.0f || fms > 1.0f){ + throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !"); + } + q = getFuzzyQuery(field, termImage, fms); } else { - q = getFieldQuery(field, analyzer, termImage); + q = getFieldQuery(field, termImage); } } | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> ) @@ -651,7 +700,7 @@ } else { goop2.image = discardEscapeChar(goop2.image); } - q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true); + q = getRangeQuery(field, goop1.image, goop2.image, true); } | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> ) [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> ) @@ -669,23 +718,21 @@ goop2.image = discardEscapeChar(goop2.image); } - q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false); + q = getRangeQuery(field, goop1.image, goop2.image, false); } | term=<QUOTED> - [ slop=<SLOP> ] + [ fuzzySlop=<FUZZY_SLOP> ] [ <CARAT> boost=<NUMBER> ] { int s = phraseSlop; - if (slop != null) { + if (fuzzySlop != null) { try { - s = Float.valueOf(slop.image.substring(1)).intValue(); + s = Float.valueOf(fuzzySlop.image.substring(1)).intValue(); } catch (Exception ignored) { } } - q = getFieldQuery(field, analyzer, - term.image.substring(1, term.image.length()-1), - s); + q = getFieldQuery(field, term.image.substring(1, term.image.length()-1), s); } ) { 1.1.2.1 +15 -17 jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParserConstants.java Index: QueryParserConstants.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParserConstants.java,v retrieving revision 1.1 retrieving revision 1.1.2.1 diff -u -r1.1 -r1.1.2.1 --- QueryParserConstants.java 11 Sep 2003 01:51:33 -0000 1.1 +++ QueryParserConstants.java 1 Oct 2004 09:59:23 -0000 1.1.2.1 @@ -20,21 +20,20 @@ int CARAT = 15; int QUOTED = 16; int TERM = 17; - int FUZZY = 18; - int SLOP = 19; - int PREFIXTERM = 20; - int WILDTERM = 21; - int RANGEIN_START = 22; - int RANGEEX_START = 23; - int NUMBER = 24; - int RANGEIN_TO = 25; - int RANGEIN_END = 26; - int RANGEIN_QUOTED = 27; - int RANGEIN_GOOP = 28; - int RANGEEX_TO = 29; - int RANGEEX_END = 30; - int RANGEEX_QUOTED = 31; - int RANGEEX_GOOP = 32; + int FUZZY_SLOP = 18; + int PREFIXTERM = 19; + int WILDTERM = 20; + int RANGEIN_START = 21; + int RANGEEX_START = 22; + int NUMBER = 23; + int RANGEIN_TO = 24; + int RANGEIN_END = 25; + int RANGEIN_QUOTED = 26; + int RANGEIN_GOOP = 27; + int RANGEEX_TO = 28; + int RANGEEX_END = 29; + int RANGEEX_QUOTED = 30; + int RANGEEX_GOOP = 31; int Boost = 0; int RangeEx = 1; @@ -60,8 +59,7 @@ "\"^\"", "<QUOTED>", "<TERM>", - "\"~\"", - "<SLOP>", + "<FUZZY_SLOP>", "<PREFIXTERM>", "<WILDTERM>", "\"[\"", 1.3.2.1 +133 -117 jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java Index: QueryParserTokenManager.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java,v retrieving revision 1.3 retrieving revision 1.3.2.1 diff -u -r1.3 -r1.3.2.1 --- QueryParserTokenManager.java 24 Mar 2004 10:12:27 -0000 1.3 +++ QueryParserTokenManager.java 1 Oct 2004 09:59:23 -0000 1.3.2.1 @@ -54,13 +54,11 @@ case 58: return jjStopAtPos(0, 14); case 91: - return jjStopAtPos(0, 22); + return jjStopAtPos(0, 21); case 94: return jjStopAtPos(0, 15); case 123: - return jjStopAtPos(0, 23); - case 126: - return jjStartNfaWithStates_3(0, 18, 18); + return jjStopAtPos(0, 22); default : return jjMoveNfa_3(0, 0); } @@ -105,7 +103,7 @@ { int[] nextStates; int startsAt = 0; - jjnewStateCnt = 31; + jjnewStateCnt = 33; int i = 1; jjstateSet[0] = startState; int j, kind = 0x7fffffff; @@ -169,56 +167,67 @@ case 18: if ((0x3ff000000000000L & l) == 0L) break; - if (kind > 19) - kind = 19; - jjstateSet[jjnewStateCnt++] = 18; + if (kind > 18) + kind = 18; + jjAddStates(7, 8); break; case 19: + if (curChar == 46) + jjCheckNAdd(20); + break; + case 20: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 18) + kind = 18; + jjCheckNAdd(20); + break; + case 21: if ((0x7bffd0f8ffffd9ffL & l) == 0L) break; if (kind > 17) kind = 17; jjCheckNAddStates(0, 6); break; - case 20: + case 22: if ((0x7bfff8f8ffffd9ffL & l) == 0L) break; if (kind > 17) kind = 17; - jjCheckNAddTwoStates(20, 21); + jjCheckNAddTwoStates(22, 23); break; - case 22: + case 24: if ((0x84002f0600000000L & l) == 0L) break; if (kind > 17) kind = 17; - jjCheckNAddTwoStates(20, 21); + jjCheckNAddTwoStates(22, 23); break; - case 23: + case 25: if ((0x7bfff8f8ffffd9ffL & l) != 0L) - jjCheckNAddStates(7, 9); - break; - case 24: - if (curChar == 42 && kind > 20) - kind = 20; + jjCheckNAddStates(9, 11); break; case 26: + if (curChar == 42 && kind > 19) + kind = 19; + break; + case 28: if ((0x84002f0600000000L & l) != 0L) - jjCheckNAddStates(7, 9); + jjCheckNAddStates(9, 11); break; - case 27: + case 29: if ((0xfbfffcf8ffffd9ffL & l) == 0L) break; - if (kind > 21) - kind = 21; - jjCheckNAddTwoStates(27, 28); + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(29, 30); break; - case 29: + case 31: if ((0x84002f0600000000L & l) == 0L) break; - if (kind > 21) - kind = 21; - jjCheckNAddTwoStates(27, 28); + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(29, 30); break; default : break; } @@ -239,9 +248,13 @@ jjCheckNAddStates(0, 6); } else if (curChar == 126) + { + if (kind > 18) + kind = 18; jjstateSet[jjnewStateCnt++] = 18; + } if (curChar == 92) - jjCheckNAddStates(10, 12); + jjCheckNAddStates(12, 14); else if (curChar == 78) jjstateSet[jjnewStateCnt++] = 11; else if (curChar == 124) @@ -292,70 +305,73 @@ jjstateSet[jjnewStateCnt++] = 11; break; case 15: - jjAddStates(13, 14); + jjAddStates(15, 16); break; case 17: - if (curChar == 126) - jjstateSet[jjnewStateCnt++] = 18; + if (curChar != 126) + break; + if (kind > 18) + kind = 18; + jjstateSet[jjnewStateCnt++] = 18; break; - case 19: + case 21: if ((0x97ffffff97ffffffL & l) == 0L) break; if (kind > 17) kind = 17; jjCheckNAddStates(0, 6); break; - case 20: + case 22: if ((0x97ffffff97ffffffL & l) == 0L) break; if (kind > 17) kind = 17; - jjCheckNAddTwoStates(20, 21); + jjCheckNAddTwoStates(22, 23); break; - case 21: + case 23: if (curChar == 92) - jjCheckNAddTwoStates(22, 22); + jjCheckNAddTwoStates(24, 24); break; - case 22: + case 24: if ((0x6800000078000000L & l) == 0L) break; if (kind > 17) kind = 17; - jjCheckNAddTwoStates(20, 21); + jjCheckNAddTwoStates(22, 23); break; - case 23: + case 25: if ((0x97ffffff97ffffffL & l) != 0L) - jjCheckNAddStates(7, 9); + jjCheckNAddStates(9, 11); break; - case 25: + case 27: if (curChar == 92) - jjCheckNAddTwoStates(26, 26); + jjCheckNAddTwoStates(28, 28); break; - case 26: + case 28: if ((0x6800000078000000L & l) != 0L) - jjCheckNAddStates(7, 9); + jjCheckNAddStates(9, 11); break; - case 27: + case 29: if ((0x97ffffff97ffffffL & l) == 0L) break; - if (kind > 21) - kind = 21; - jjCheckNAddTwoStates(27, 28); + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(29, 30); break; - case 28: + case 30: if (curChar == 92) - jjCheckNAddTwoStates(29, 29); + jjCheckNAddTwoStates(31, 31); break; - case 29: + case 31: if ((0x6800000078000000L & l) == 0L) break; - if (kind > 21) - kind = 21; - jjCheckNAddTwoStates(27, 28); + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(29, 30); break; - case 30: + case 32: if (curChar == 92) - jjCheckNAddStates(10, 12); + jjCheckNAddStates(12, 14); break; default : break; } @@ -381,25 +397,25 @@ break; case 15: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjAddStates(13, 14); + jjAddStates(15, 16); break; - case 20: + case 22: if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) break; if (kind > 17) kind = 17; - jjCheckNAddTwoStates(20, 21); + jjCheckNAddTwoStates(22, 23); break; - case 23: + case 25: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(7, 9); + jjCheckNAddStates(9, 11); break; - case 27: + case 29: if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) break; - if (kind > 21) - kind = 21; - jjCheckNAddTwoStates(27, 28); + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(29, 30); break; default : break; } @@ -412,7 +428,7 @@ kind = 0x7fffffff; } ++curPos; - if ((i = jjnewStateCnt) == (startsAt = 31 - (jjnewStateCnt = startsAt))) + if ((i = jjnewStateCnt) == (startsAt = 33 - (jjnewStateCnt = startsAt))) return curPos; try { curChar = input_stream.readChar(); } catch(java.io.IOException e) { return curPos; } @@ -423,9 +439,9 @@ switch (pos) { case 0: - if ((active0 & 0x20000000L) != 0L) + if ((active0 & 0x10000000L) != 0L) { - jjmatchedKind = 32; + jjmatchedKind = 31; return 4; } return -1; @@ -450,9 +466,9 @@ switch(curChar) { case 84: - return jjMoveStringLiteralDfa1_1(0x20000000L); + return jjMoveStringLiteralDfa1_1(0x10000000L); case 125: - return jjStopAtPos(0, 30); + return jjStopAtPos(0, 29); default : return jjMoveNfa_1(0, 0); } @@ -467,8 +483,8 @@ switch(curChar) { case 79: - if ((active0 & 0x20000000L) != 0L) - return jjStartNfaWithStates_1(1, 29, 4); + if ((active0 & 0x10000000L) != 0L) + return jjStartNfaWithStates_1(1, 28, 4); break; default : break; @@ -497,8 +513,8 @@ case 0: if ((0xfffffffeffffffffL & l) != 0L) { - if (kind > 32) - kind = 32; + if (kind > 31) + kind = 31; jjCheckNAdd(4); } if ((0x100002600L & l) != 0L) @@ -518,14 +534,14 @@ jjCheckNAddTwoStates(2, 3); break; case 3: - if (curChar == 34 && kind > 31) - kind = 31; + if (curChar == 34 && kind > 30) + kind = 30; break; case 4: if ((0xfffffffeffffffffL & l) == 0L) break; - if (kind > 32) - kind = 32; + if (kind > 31) + kind = 31; jjCheckNAdd(4); break; default : break; @@ -543,12 +559,12 @@ case 4: if ((0xdfffffffffffffffL & l) == 0L) break; - if (kind > 32) - kind = 32; + if (kind > 31) + kind = 31; jjCheckNAdd(4); break; case 2: - jjAddStates(15, 16); + jjAddStates(17, 18); break; default : break; } @@ -569,13 +585,13 @@ case 4: if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) break; - if (kind > 32) - kind = 32; + if (kind > 31) + kind = 31; jjCheckNAdd(4); break; case 2: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjAddStates(15, 16); + jjAddStates(17, 18); break; default : break; } @@ -620,9 +636,9 @@ case 0: if ((0x3ff000000000000L & l) == 0L) break; - if (kind > 24) - kind = 24; - jjAddStates(17, 18); + if (kind > 23) + kind = 23; + jjAddStates(19, 20); break; case 1: if (curChar == 46) @@ -631,8 +647,8 @@ case 2: if ((0x3ff000000000000L & l) == 0L) break; - if (kind > 24) - kind = 24; + if (kind > 23) + kind = 23; jjCheckNAdd(2); break; default : break; @@ -683,9 +699,9 @@ switch (pos) { case 0: - if ((active0 & 0x2000000L) != 0L) + if ((active0 & 0x1000000L) != 0L) { - jjmatchedKind = 28; + jjmatchedKind = 27; return 4; } return -1; @@ -710,9 +726,9 @@ switch(curChar) { case 84: - return jjMoveStringLiteralDfa1_2(0x2000000L); + return jjMoveStringLiteralDfa1_2(0x1000000L); case 93: - return jjStopAtPos(0, 26); + return jjStopAtPos(0, 25); default : return jjMoveNfa_2(0, 0); } @@ -727,8 +743,8 @@ switch(curChar) { case 79: - if ((active0 & 0x2000000L) != 0L) - return jjStartNfaWithStates_2(1, 25, 4); + if ((active0 & 0x1000000L) != 0L) + return jjStartNfaWithStates_2(1, 24, 4); break; default : break; @@ -757,8 +773,8 @@ case 0: if ((0xfffffffeffffffffL & l) != 0L) { - if (kind > 28) - kind = 28; + if (kind > 27) + kind = 27; jjCheckNAdd(4); } if ((0x100002600L & l) != 0L) @@ -778,14 +794,14 @@ jjCheckNAddTwoStates(2, 3); break; case 3: - if (curChar == 34 && kind > 27) - kind = 27; + if (curChar == 34 && kind > 26) + kind = 26; break; case 4: if ((0xfffffffeffffffffL & l) == 0L) break; - if (kind > 28) - kind = 28; + if (kind > 27) + kind = 27; jjCheckNAdd(4); break; default : break; @@ -803,12 +819,12 @@ case 4: if ((0xffffffffdfffffffL & l) == 0L) break; - if (kind > 28) - kind = 28; + if (kind > 27) + kind = 27; jjCheckNAdd(4); break; case 2: - jjAddStates(15, 16); + jjAddStates(17, 18); break; default : break; } @@ -829,13 +845,13 @@ case 4: if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) break; - if (kind > 28) - kind = 28; + if (kind > 27) + kind = 27; jjCheckNAdd(4); break; case 2: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjAddStates(15, 16); + jjAddStates(17, 18); break; default : break; } @@ -855,8 +871,8 @@ } } static final int[] jjnextStates = { - 20, 23, 24, 27, 28, 25, 21, 23, 24, 25, 22, 26, 29, 15, 16, 2, - 3, 0, 1, + 22, 25, 26, 29, 30, 27, 23, 18, 19, 25, 26, 27, 24, 28, 31, 15, + 16, 2, 3, 0, 1, }; private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2) { @@ -872,8 +888,8 @@ } public static final String[] jjstrLiteralImages = { "", null, null, null, null, null, null, null, null, null, "\53", "\55", "\50", -"\51", "\72", "\136", null, null, "\176", null, null, null, "\133", "\173", null, -"\124\117", "\135", null, null, "\124\117", "\175", null, null, }; +"\51", "\72", "\136", null, null, null, null, null, "\133", "\173", null, "\124\117", +"\135", null, null, "\124\117", "\175", null, null, }; public static final String[] lexStateNames = { "Boost", "RangeEx", @@ -881,18 +897,18 @@ "DEFAULT", }; public static final int[] jjnewLexState = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, 2, 1, 3, - -1, 3, -1, -1, -1, 3, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 2, 1, 3, -1, + 3, -1, -1, -1, 3, -1, -1, }; static final long[] jjtoToken = { - 0x1ffffff81L, + 0xffffff81L, }; static final long[] jjtoSkip = { 0x40L, }; protected CharStream input_stream; -private final int[] jjrounds = new int[31]; -private final int[] jjstateSet = new int[62]; +private final int[] jjrounds = new int[33]; +private final int[] jjstateSet = new int[66]; protected char curChar; public QueryParserTokenManager(CharStream stream) { @@ -914,7 +930,7 @@ { int i; jjround = 0x80000001; - for (i = 31; i-- > 0;) + for (i = 33; i-- > 0;) jjrounds[i] = 0x80000000; } public void ReInit(CharStream stream, int lexState) No revision No revision 1.26.2.1 +54 -37 jakarta-lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java Index: TestQueryParser.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java,v retrieving revision 1.26 retrieving revision 1.26.2.1 diff -u -r1.26 -r1.26.2.1 --- TestQueryParser.java 30 May 2004 20:24:20 -0000 1.26 +++ TestQueryParser.java 1 Oct 2004 09:59:23 -0000 1.26.2.1 @@ -89,7 +89,7 @@ super(f, a); } - protected Query getFuzzyQuery(String field, String termStr) throws ParseException { + protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException { throw new ParseException("Fuzzy queries not allowed"); } @@ -235,15 +235,29 @@ public void testWildcard() throws Exception { assertQueryEquals("term*", null, "term*"); assertQueryEquals("term*^2", null, "term*^2.0"); - assertQueryEquals("term~", null, "term~"); - assertQueryEquals("term~^2", null, "term^2.0~"); - assertQueryEquals("term^2~", null, "term^2.0~"); + assertQueryEquals("term~", null, "term~0.5"); + assertQueryEquals("term~0.7", null, "term~0.7"); + assertQueryEquals("term~^2", null, "term^2.0~0.5"); + assertQueryEquals("term^2~", null, "term^2.0~0.5"); assertQueryEquals("term*germ", null, "term*germ"); assertQueryEquals("term*germ^3", null, "term*germ^3.0"); assertTrue(getQuery("term*", null) instanceof PrefixQuery); assertTrue(getQuery("term*^2", null) instanceof PrefixQuery); assertTrue(getQuery("term~", null) instanceof FuzzyQuery); + assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery); + FuzzyQuery fq = (FuzzyQuery)getQuery("term~0.7", null); + assertEquals(0.7f, fq.getMinSimilarity(), 0.1f); + assertEquals(0, fq.getPrefixLength()); + fq = (FuzzyQuery)getQuery("term~", null); + assertEquals(0.5f, fq.getMinSimilarity(), 0.1f); + assertEquals(0, fq.getPrefixLength()); + try { + getQuery("term~1.1", null); // value > 1, throws exception + fail(); + } catch(ParseException pe) { + // expected exception + } assertTrue(getQuery("term*germ", null) instanceof WildcardQuery); /* Tests to see that wild card terms are (or are not) properly @@ -317,7 +331,8 @@ public void testEscaped() throws Exception { Analyzer a = new WhitespaceAnalyzer(); - /* assertQueryEquals("\\[brackets", a, "\\[brackets"); + + /*assertQueryEquals("\\[brackets", a, "\\[brackets"); assertQueryEquals("\\[brackets", null, "brackets"); assertQueryEquals("\\\\", a, "\\\\"); assertQueryEquals("\\+blah", a, "\\+blah"); @@ -337,38 +352,40 @@ assertQueryEquals("\\~blah", a, "\\~blah"); assertQueryEquals("\\*blah", a, "\\*blah"); assertQueryEquals("\\?blah", a, "\\?blah"); - assertQueryEquals("foo \\&& bar", a, "foo \\&& bar"); - assertQueryEquals("foo \\|| bar", a, "foo \\|| bar"); - assertQueryEquals("foo \\AND bar", a, "foo \\AND bar"); */ - - assertQueryEquals("a\\-b:c",a,"a-b:c"); - assertQueryEquals("a\\+b:c",a,"a+b:c"); - assertQueryEquals("a\\:b:c",a,"a:b:c"); - assertQueryEquals("a\\\\b:c",a,"a\\b:c"); - - assertQueryEquals("a:b\\-c",a,"a:b-c"); - assertQueryEquals("a:b\\+c",a,"a:b+c"); - assertQueryEquals("a:b\\:c",a,"a:b:c"); - assertQueryEquals("a:b\\\\c",a,"a:b\\c"); - - assertQueryEquals("a:b\\-c*",a,"a:b-c*"); - assertQueryEquals("a:b\\+c*",a,"a:b+c*"); - assertQueryEquals("a:b\\:c*",a,"a:b:c*"); - assertQueryEquals("a:b\\\\c*",a,"a:b\\c*"); - - assertQueryEquals("a:b\\-?c",a,"a:b-?c"); - assertQueryEquals("a:b\\+?c",a,"a:b+?c"); - assertQueryEquals("a:b\\:?c",a,"a:b:?c"); - assertQueryEquals("a:b\\\\?c",a,"a:b\\?c"); - - assertQueryEquals("a:b\\-c~",a,"a:b-c~"); - assertQueryEquals("a:b\\+c~",a,"a:b+c~"); - assertQueryEquals("a:b\\:c~",a,"a:b:c~"); - assertQueryEquals("a:b\\\\c~",a,"a:b\\c~"); - - assertQueryEquals("[ a\\- TO a\\+ ]", null, "[a- TO a+]"); - assertQueryEquals("[ a\\: TO a\\~ ]", null, "[a: TO a~]"); - assertQueryEquals("[ a\\\\ TO a\\* ]", null, "[a\\ TO a*]"); + //assertQueryEquals("foo \\&\\& bar", a, "foo \\&\\& bar"); + //assertQueryEquals("foo \\|| bar", a, "foo \\|| bar"); + //assertQueryEquals("foo \\AND bar", a, "foo \\AND bar");*/ + + assertQueryEquals("a\\-b:c", a, "a-b:c"); + assertQueryEquals("a\\+b:c", a, "a+b:c"); + assertQueryEquals("a\\:b:c", a, "a:b:c"); + assertQueryEquals("a\\\\b:c", a, "a\\b:c"); + + assertQueryEquals("a:b\\-c", a, "a:b-c"); + assertQueryEquals("a:b\\+c", a, "a:b+c"); + assertQueryEquals("a:b\\:c", a, "a:b:c"); + assertQueryEquals("a:b\\\\c", a, "a:b\\c"); + + assertQueryEquals("a:b\\-c*", a, "a:b-c*"); + assertQueryEquals("a:b\\+c*", a, "a:b+c*"); + assertQueryEquals("a:b\\:c*", a, "a:b:c*"); + + assertQueryEquals("a:b\\\\c*", a, "a:b\\c*"); + + assertQueryEquals("a:b\\-?c", a, "a:b-?c"); + assertQueryEquals("a:b\\+?c", a, "a:b+?c"); + assertQueryEquals("a:b\\:?c", a, "a:b:?c"); + + assertQueryEquals("a:b\\\\?c", a, "a:b\\?c"); + + assertQueryEquals("a:b\\-c~", a, "a:b-c~0.5"); + assertQueryEquals("a:b\\+c~", a, "a:b+c~0.5"); + assertQueryEquals("a:b\\:c~", a, "a:b:c~0.5"); + assertQueryEquals("a:b\\\\c~", a, "a:b\\c~0.5"); + + assertQueryEquals("[ a\\- TO a\\+ ]", null, "[a- TO a+]"); + assertQueryEquals("[ a\\: TO a\\~ ]", null, "[a: TO a~]"); + assertQueryEquals("[ a\\\\ TO a\\* ]", null, "[a\\ TO a*]"); } public void testTabNewlineCarriageReturn()
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]