Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/QueryParser/QueryParser.JJ URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/QueryParser/QueryParser.JJ?rev=411501&r1=411500&r2=411501&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Lucene.Net/QueryParser/QueryParser.JJ (original) +++ incubator/lucene.net/trunk/C#/src/Lucene.Net/QueryParser/QueryParser.JJ Sat Jun 3 19:41:13 2006 @@ -1,710 +1,988 @@ -/** - * Copyright 2004 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -options { - STATIC=false; - JAVA_UNICODE_ESCAPE=true; - USER_CHAR_STREAM=true; -} - -PARSER_BEGIN(QueryParser) - -package org.apache.lucene.queryParser; - -import java.util.Vector; -import java.io.*; -import java.text.*; -import java.util.*; -import org.apache.lucene.index.Term; -import org.apache.lucene.analysis.*; -import org.apache.lucene.document.*; -import org.apache.lucene.search.*; - -/** - * This class is generated by JavaCC. The only method that clients should need - * to call is <a href="#parse">parse()</a>. - * - * The syntax for query strings is as follows: - * A Query is a series of clauses. - * A clause may be prefixed by: - * <ul> - * <li> a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating - * that the clause is required or prohibited respectively; or - * <li> a term followed by a colon, indicating the field to be searched. - * This enables one to construct queries which search multiple fields. - * </ul> - * - * A clause may be either: - * <ul> - * <li> a term, indicating all the documents that contain this term; or - * <li> a nested query, enclosed in parentheses. Note that this may be used - * with a <code>+</code>/<code>-</code> prefix to require any of a set of - * terms. - * </ul> - * - * Thus, in BNF, the query grammar is: - * <pre> - * Query ::= ( Clause )* - * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) - * </pre> - * - * <p> - * Examples of appropriately formatted queries can be found in the <a - * href="http://jakarta.apache.org/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java">test cases</a>. - * </p> - * - * @author Brian Goetz - * @author Peter Halacsy - * @author Tatu Saloranta - */ - -public class QueryParser { - - private static final int CONJ_NONE = 0; - private static final int CONJ_AND = 1; - private static final int CONJ_OR = 2; - - private static final int MOD_NONE = 0; - private static final int MOD_NOT = 10; - private static final int MOD_REQ = 11; - - public static final int DEFAULT_OPERATOR_OR = 0; - public static final int DEFAULT_OPERATOR_AND = 1; - - /** The actual operator that parser uses to combine query terms */ - private int operator = DEFAULT_OPERATOR_OR; - - /** - * Whether terms of wildcard and prefix queries are to be automatically - * lower-cased or not. Default is <code>true</code>. - */ - boolean lowercaseWildcardTerms = true; - - Analyzer analyzer; - String field; - int phraseSlop = 0; - Locale locale = Locale.getDefault(); - - /** Parses a query string, returning a [EMAIL PROTECTED] org.apache.lucene.search.Query}. - * @param query the query string to be parsed. - * @param field the default field for query terms. - * @param analyzer used to find terms in the query text. - * @throws ParseException if the parsing fails - */ - static public Query parse(String query, String field, Analyzer analyzer) - throws ParseException { - QueryParser parser = new QueryParser(field, analyzer); - return parser.parse(query); - } - - /** Constructs a query parser. - * @param f the default field for query terms. - * @param a used to find terms in the query text. - */ - public QueryParser(String f, Analyzer a) { - this(new FastCharStream(new StringReader(""))); - analyzer = a; - field = f; - } - - /** Parses a query string, returning a - * <a href="lucene.search.Query.html">Query</a>. - * @param query the query string to be parsed. - * @throws ParseException if the parsing fails - */ - public Query parse(String query) throws ParseException { - ReInit(new FastCharStream(new StringReader(query))); - try { - return Query(field); - } - catch (TokenMgrError tme) { - throw new ParseException(tme.getMessage()); - } - catch (BooleanQuery.TooManyClauses tmc) { - throw new ParseException("Too many boolean clauses"); - } - } - - /** - * Sets the default slop for phrases. If zero, then exact phrase matches - * are required. Default value is zero. - */ - public void setPhraseSlop(int phraseSlop) { - this.phraseSlop = phraseSlop; - } - - /** - * Gets the default slop for phrases. - */ - public int getPhraseSlop() { - return phraseSlop; - } - - /** - * Sets the boolean operator of the QueryParser. - * In classic mode (<code>DEFAULT_OPERATOR_OR</code>) terms without any modifiers - * are considered optional: for example <code>capital of Hungary</code> is equal to - * <code>capital OR of OR Hungary</code>.<br/> - * In <code>DEFAULT_OPERATOR_AND</code> terms are considered to be in conjuction: the - * above mentioned query is parsed as <code>capital AND of AND Hungary</code> - */ - public void setOperator(int operator) { - this.operator = operator; - } - - /** - * Gets implicit operator setting, which will be either DEFAULT_OPERATOR_AND - * or DEFAULT_OPERATOR_OR. - */ - public int getOperator() { - return operator; - } - - public void setLowercaseWildcardTerms(boolean lowercaseWildcardTerms) { - this.lowercaseWildcardTerms = lowercaseWildcardTerms; - } - - public boolean getLowercaseWildcardTerms() { - return lowercaseWildcardTerms; - } - - /** - * Set locale used by date range parsing. - */ - public void setLocale(Locale locale) { - this.locale = locale; - } - - /** - * Returns current locale, allowing access by subclasses. - */ - public Locale getLocale() { - return locale; - } - - protected void addClause(Vector clauses, int conj, int mods, Query q) { - boolean required, prohibited; - - // If this term is introduced by AND, make the preceding term required, - // unless it's already prohibited - if (conj == CONJ_AND) { - BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); - if (!c.prohibited) - c.required = true; - } - - if (operator == DEFAULT_OPERATOR_AND && conj == CONJ_OR) { - // If this term is introduced by OR, make the preceding term optional, - // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) - // notice if the input is a OR b, first term is parsed as required; without - // this modification a OR b would parsed as +a OR b - BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); - if (!c.prohibited) - c.required = false; - } - - // We might have been passed a null query; the term might have been - // filtered away by the analyzer. - if (q == null) - return; - - if (operator == DEFAULT_OPERATOR_OR) { - // We set REQUIRED if we're introduced by AND or +; PROHIBITED if - // introduced by NOT or -; make sure not to set both. - prohibited = (mods == MOD_NOT); - required = (mods == MOD_REQ); - if (conj == CONJ_AND && !prohibited) { - required = true; - } - } else { - // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED - // if not PROHIBITED and not introduced by OR - prohibited = (mods == MOD_NOT); - required = (!prohibited && conj != CONJ_OR); - } - clauses.addElement(new BooleanClause(q, required, prohibited)); - } - - /** - * @exception ParseException throw in overridden method to disallow - */ - protected Query getFieldQuery(String field, - Analyzer analyzer, - String queryText) throws ParseException { - // Use the analyzer to get all the tokens, and then build a TermQuery, - // PhraseQuery, or nothing based on the term count - - TokenStream source = analyzer.tokenStream(field, - new StringReader(queryText)); - Vector v = new Vector(); - org.apache.lucene.analysis.Token t; - - while (true) { - try { - t = source.next(); - } - catch (IOException e) { - t = null; - } - if (t == null) - break; - v.addElement(t.termText()); - } - try { - source.close(); - } - catch (IOException e) { - // ignore - } - - if (v.size() == 0) - return null; - else if (v.size() == 1) - return new TermQuery(new Term(field, (String) v.elementAt(0))); - else { - PhraseQuery q = new PhraseQuery(); - q.setSlop(phraseSlop); - for (int i=0; i<v.size(); i++) { - q.add(new Term(field, (String) v.elementAt(i))); - } - return q; - } - } - - /** - * Base implementation delegates to [EMAIL PROTECTED] #getFieldQuery(String,Analyzer,String)}. - * This method may be overridden, for example, to return - * a SpanNearQuery instead of a PhraseQuery. - * - * @exception ParseException throw in overridden method to disallow - */ - protected Query getFieldQuery(String field, - Analyzer analyzer, - String queryText, - int slop) throws ParseException { - Query query = getFieldQuery(field, analyzer, queryText); - - if (query instanceof PhraseQuery) { - ((PhraseQuery) query).setSlop(slop); - } - - return query; - } - - /** - * @exception ParseException throw in overridden method to disallow - */ - protected Query getRangeQuery(String field, - Analyzer analyzer, - String part1, - String part2, - boolean inclusive) throws ParseException - { - try { - DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale); - df.setLenient(true); - Date d1 = df.parse(part1); - Date d2 = df.parse(part2); - part1 = DateField.dateToString(d1); - part2 = DateField.dateToString(d2); - } - catch (Exception e) { } - - return new RangeQuery(new Term(field, part1), - new Term(field, part2), - inclusive); - } - - /** - * Factory method for generating query, given a set of clauses. - * By default creates a boolean query composed of clauses passed in. - * - * Can be overridden by extending classes, to modify query being - * returned. - * - * @param clauses Vector that contains [EMAIL PROTECTED] BooleanClause} instances - * to join. - * - * @return Resulting [EMAIL PROTECTED] Query} object. - * @exception ParseException throw in overridden method to disallow - */ - protected Query getBooleanQuery(Vector clauses) throws ParseException - { - BooleanQuery query = new BooleanQuery(); - for (int i = 0; i < clauses.size(); i++) { - query.add((BooleanClause)clauses.elementAt(i)); - } - return query; - } - - /** - * Factory method for generating a query. Called when parser - * parses an input term token that contains one or more wildcard - * characters (? and *), but is not a prefix term token (one - * that has just a single * character at the end) - *<p> - * Depending on settings, prefix term may be lower-cased - * automatically. It will not go through the default Analyzer, - * however, since normal Analyzers are unlikely to work properly - * with wildcard templates. - *<p> - * Can be overridden by extending classes, to provide custom handling for - * wildcard queries, which may be necessary due to missing analyzer calls. - * - * @param field Name of the field query will use. - * @param termStr Term token that contains one or more wild card - * characters (? or *), but is not simple prefix term - * - * @return Resulting [EMAIL PROTECTED] Query} built for the term - * @exception ParseException throw in overridden method to disallow - */ - protected Query getWildcardQuery(String field, String termStr) throws ParseException - { - if (lowercaseWildcardTerms) { - termStr = termStr.toLowerCase(); - } - Term t = new Term(field, termStr); - return new WildcardQuery(t); - } - - /** - * Factory method for generating a query (similar to - * ([EMAIL PROTECTED] #getWildcardQuery}). Called when parser parses an input term - * token that uses prefix notation; that is, contains a single '*' wildcard - * character as its last character. Since this is a special case - * of generic wildcard term, and such a query can be optimized easily, - * this usually results in a different query object. - *<p> - * Depending on settings, a prefix term may be lower-cased - * automatically. It will not go through the default Analyzer, - * however, since normal Analyzers are unlikely to work properly - * with wildcard templates. - *<p> - * Can be overridden by extending classes, to provide custom handling for - * wild card queries, which may be necessary due to missing analyzer calls. - * - * @param field Name of the field query will use. - * @param termStr Term token to use for building term for the query - * (<b>without</b> trailing '*' character!) - * - * @return Resulting [EMAIL PROTECTED] Query} built for the term - * @exception ParseException throw in overridden method to disallow - */ - protected Query getPrefixQuery(String field, String termStr) throws ParseException - { - if (lowercaseWildcardTerms) { - termStr = termStr.toLowerCase(); - } - Term t = new Term(field, termStr); - return new PrefixQuery(t); - } - - /** - * Factory method for generating a query (similar to - * ([EMAIL PROTECTED] #getWildcardQuery}). Called when parser parses - * an input term token that has the fuzzy suffix (~) appended. - * - * @param field Name of the field query will use. - * @param termStr Term token to use for building term for the query - * - * @return Resulting [EMAIL PROTECTED] Query} built for the term - * @exception ParseException throw in overridden method to disallow - */ - protected Query getFuzzyQuery(String field, String termStr) throws ParseException - { - Term t = new Term(field, termStr); - return new FuzzyQuery(t); - } - - /** - * Returns a String where the escape char has been - * removed, or kept only once if there was a double escape. - */ - private String discardEscapeChar(String input) { - char[] caSource = input.toCharArray(); - char[] caDest = new char[caSource.length]; - int j = 0; - for (int i = 0; i < caSource.length; i++) { - if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) { - caDest[j++]=caSource[i]; - } - } - return new String(caDest, 0, j); - } - - public static void main(String[] args) throws Exception { - QueryParser qp = new QueryParser("field", - new org.apache.lucene.analysis.SimpleAnalyzer()); - Query q = qp.parse(args[0]); - System.out.println(q.toString("field")); - } -} - -PARSER_END(QueryParser) - -/* ***************** */ -/* Token Definitions */ -/* ***************** */ - -<*> TOKEN : { - <#_NUM_CHAR: ["0"-"9"] > -| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^", - "[", "]", "\"", "{", "}", "~", "*", "?" ] > -| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^", - "[", "]", "\"", "{", "}", "~", "*", "?" ] - | <_ESCAPED_CHAR> ) > -| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) > -| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r") > -} - -<DEFAULT, RangeIn, RangeEx> SKIP : { - <<_WHITESPACE>> -} - -// OG: to support prefix queries: -// http://nagoya.apache.org/bugzilla/show_bug.cgi?id=12137 -// Change from: -// | <WILDTERM: <_TERM_START_CHAR> -// (<_TERM_CHAR> | ( [ "*", "?" ] ))* > -// To: -// -// | <WILDTERM: (<_TERM_CHAR> | ( [ "*", "?" ] ))* > - -<DEFAULT> TOKEN : { - <AND: ("AND" | "&&") > -| <OR: ("OR" | "||") > -| <NOT: ("NOT" | "!") > -| <PLUS: "+" > -| <MINUS: "-" > -| <LPAREN: "(" > -| <RPAREN: ")" > -| <COLON: ":" > -| <CARAT: "^" > : Boost -| <QUOTED: "\"" (~["\""])+ "\""> -| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* > -| <FUZZY: "~" > -| <SLOP: "~" (<_NUM_CHAR>)+ > -| <PREFIXTERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" > -| <WILDTERM: <_TERM_START_CHAR> - (<_TERM_CHAR> | ( [ "*", "?" ] ))* > -| <RANGEIN_START: "[" > : RangeIn -| <RANGEEX_START: "{" > : RangeEx -} - -<Boost> TOKEN : { -<NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT -} - -<RangeIn> TOKEN : { -<RANGEIN_TO: "TO"> -| <RANGEIN_END: "]"> : DEFAULT -| <RANGEIN_QUOTED: "\"" (~["\""])+ "\""> -| <RANGEIN_GOOP: (~[ " ", "]" ])+ > -} - -<RangeEx> TOKEN : { -<RANGEEX_TO: "TO"> -| <RANGEEX_END: "}"> : DEFAULT -| <RANGEEX_QUOTED: "\"" (~["\""])+ "\""> -| <RANGEEX_GOOP: (~[ " ", "}" ])+ > -} - -// * Query ::= ( Clause )* -// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) - -int Conjunction() : { - int ret = CONJ_NONE; -} -{ - [ - <AND> { ret = CONJ_AND; } - | <OR> { ret = CONJ_OR; } - ] - { return ret; } -} - -int Modifiers() : { - int ret = MOD_NONE; -} -{ - [ - <PLUS> { ret = MOD_REQ; } - | <MINUS> { ret = MOD_NOT; } - | <NOT> { ret = MOD_NOT; } - ] - { return ret; } -} - -Query Query(String field) : -{ - Vector clauses = new Vector(); - Query q, firstQuery=null; - int conj, mods; -} -{ - mods=Modifiers() q=Clause(field) - { - addClause(clauses, CONJ_NONE, mods, q); - if (mods == MOD_NONE) - firstQuery=q; - } - ( - conj=Conjunction() mods=Modifiers() q=Clause(field) - { addClause(clauses, conj, mods, q); } - )* - { - if (clauses.size() == 1 && firstQuery != null) - return firstQuery; - else { - return getBooleanQuery(clauses); - } - } -} - -Query Clause(String field) : { - Query q; - Token fieldToken=null, boost=null; -} -{ - [ - LOOKAHEAD(2) - fieldToken=<TERM> <COLON> { - field=discardEscapeChar(fieldToken.image); - } - ] - - ( - q=Term(field) - | <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? - - ) - { - if (boost != null) { - float f = (float)1.0; - try { - f = Float.valueOf(boost.image).floatValue(); - q.setBoost(f); - } catch (Exception ignored) { } - } - return q; - } -} - - -Query Term(String field) : { - Token term, boost=null, slop=null, goop1, goop2; - boolean prefix = false; - boolean wildcard = false; - boolean fuzzy = false; - boolean rangein = false; - Query q; -} -{ - ( - ( - term=<TERM> - | term=<PREFIXTERM> { prefix=true; } - | term=<WILDTERM> { wildcard=true; } - | term=<NUMBER> - ) - [ <FUZZY> { fuzzy=true; } ] - [ <CARAT> boost=<NUMBER> [ <FUZZY> { fuzzy=true; } ] ] - { - String termImage=discardEscapeChar(term.image); - if (wildcard) { - q = getWildcardQuery(field, termImage); - } else if (prefix) { - q = getPrefixQuery(field, - discardEscapeChar(term.image.substring - (0, term.image.length()-1))); - } else if (fuzzy) { - q = getFuzzyQuery(field, termImage); - } else { - q = getFieldQuery(field, analyzer, termImage); - } - } - | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> ) - [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> ) - <RANGEIN_END> ) - [ <CARAT> boost=<NUMBER> ] - { - if (goop1.kind == RANGEIN_QUOTED) { - goop1.image = goop1.image.substring(1, goop1.image.length()-1); - } else { - goop1.image = discardEscapeChar(goop1.image); - } - if (goop2.kind == RANGEIN_QUOTED) { - goop2.image = goop2.image.substring(1, goop2.image.length()-1); - } else { - goop2.image = discardEscapeChar(goop2.image); - } - q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true); - } - | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> ) - [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> ) - <RANGEEX_END> ) - [ <CARAT> boost=<NUMBER> ] - { - if (goop1.kind == RANGEEX_QUOTED) { - goop1.image = goop1.image.substring(1, goop1.image.length()-1); - } else { - goop1.image = discardEscapeChar(goop1.image); - } - if (goop2.kind == RANGEEX_QUOTED) { - goop2.image = goop2.image.substring(1, goop2.image.length()-1); - } else { - goop2.image = discardEscapeChar(goop2.image); - } - - q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false); - } - | term=<QUOTED> - [ slop=<SLOP> ] - [ <CARAT> boost=<NUMBER> ] - { - int s = phraseSlop; - - if (slop != null) { - try { - s = Float.valueOf(slop.image.substring(1)).intValue(); - } - catch (Exception ignored) { } - } - q = getFieldQuery(field, analyzer, - term.image.substring(1, term.image.length()-1), - s); - } - ) - { - if (boost != null) { - float f = (float) 1.0; - try { - f = Float.valueOf(boost.image).floatValue(); - } - catch (Exception ignored) { - /* Should this be handled somehow? (defaults to "no boost", if - * boost number is invalid) - */ - } - - // avoid boosting null queries, such as those caused by stop words - if (q != null) { - q.setBoost(f); - } - } - return q; - } -} +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +options { + STATIC=false; + JAVA_UNICODE_ESCAPE=true; + USER_CHAR_STREAM=true; +} + +PARSER_BEGIN(QueryParser) + +package org.apache.lucene.queryParser; + +import java.util.Vector; +import java.io.*; +import java.text.*; +import java.util.*; +import org.apache.lucene.index.Term; +import org.apache.lucene.analysis.*; +import org.apache.lucene.document.*; +import org.apache.lucene.search.*; +import org.apache.lucene.util.Parameter; + +/** + * This class is generated by JavaCC. The most important method is + * [EMAIL PROTECTED] #parse(String)}. + * + * The syntax for query strings is as follows: + * A Query is a series of clauses. + * A clause may be prefixed by: + * <ul> + * <li> a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating + * that the clause is required or prohibited respectively; or + * <li> a term followed by a colon, indicating the field to be searched. + * This enables one to construct queries which search multiple fields. + * </ul> + * + * A clause may be either: + * <ul> + * <li> a term, indicating all the documents that contain this term; or + * <li> a nested query, enclosed in parentheses. Note that this may be used + * with a <code>+</code>/<code>-</code> prefix to require any of a set of + * terms. + * </ul> + * + * Thus, in BNF, the query grammar is: + * <pre> + * Query ::= ( Clause )* + * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) + * </pre> + * + * <p> + * Examples of appropriately formatted queries can be found in the <a + * href="http://lucene.apache.org/java/docs/queryparsersyntax.html">query syntax + * documentation</a>. + * </p> + * + * <p>Note that QueryParser is <em>not</em> thread-safe.</p> + * + * @author Brian Goetz + * @author Peter Halacsy + * @author Tatu Saloranta + */ + +public class QueryParser { + + private static final int CONJ_NONE = 0; + private static final int CONJ_AND = 1; + private static final int CONJ_OR = 2; + + private static final int MOD_NONE = 0; + private static final int MOD_NOT = 10; + private static final int MOD_REQ = 11; + + /** @deprecated use [EMAIL PROTECTED] #OR_OPERATOR} instead */ + public static final int DEFAULT_OPERATOR_OR = 0; + /** @deprecated use [EMAIL PROTECTED] #AND_OPERATOR} instead */ + public static final int DEFAULT_OPERATOR_AND = 1; + + // make it possible to call setDefaultOperator() without accessing + // the nested class: + /** Alternative form of QueryParser.Operator.AND */ + public static final Operator AND_OPERATOR = Operator.AND; + /** Alternative form of QueryParser.Operator.OR */ + public static final Operator OR_OPERATOR = Operator.OR; + + /** The actual operator that parser uses to combine query terms */ + private Operator operator = OR_OPERATOR; + + boolean lowercaseExpandedTerms = true; + + Analyzer analyzer; + String field; + int phraseSlop = 0; + float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; + int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; + Locale locale = Locale.getDefault(); + + /** The default operator for parsing queries. + * Use [EMAIL PROTECTED] QueryParser#setDefaultOperator} to change it. + */ + static public final class Operator extends Parameter { + private Operator(String name) { + super(name); + } + static public final Operator OR = new Operator("OR"); + static public final Operator AND = new Operator("AND"); + } + + /** Parses a query string, returning a [EMAIL PROTECTED] org.apache.lucene.search.Query}. + * @param query the query string to be parsed. + * @param field the default field for query terms. + * @param analyzer used to find terms in the query text. + * @throws ParseException if the parsing fails + * + * @deprecated Use an instance of QueryParser and the [EMAIL PROTECTED] #parse(String)} method instead. + */ + static public Query parse(String query, String field, Analyzer analyzer) + throws ParseException { + QueryParser parser = new QueryParser(field, analyzer); + return parser.parse(query); + } + + /** Constructs a query parser. + * @param f the default field for query terms. + * @param a used to find terms in the query text. + */ + public QueryParser(String f, Analyzer a) { + this(new FastCharStream(new StringReader(""))); + analyzer = a; + field = f; + } + + /** Parses a query string, returning a [EMAIL PROTECTED] org.apache.lucene.search.Query}. + * @param query the query string to be parsed. + * @throws ParseException if the parsing fails + */ + public Query parse(String query) throws ParseException { + ReInit(new FastCharStream(new StringReader(query))); + try { + return Query(field); + } + catch (TokenMgrError tme) { + throw new ParseException(tme.getMessage()); + } + catch (BooleanQuery.TooManyClauses tmc) { + throw new ParseException("Too many boolean clauses"); + } + } + + /** + * @return Returns the analyzer. + */ + public Analyzer getAnalyzer() { + return analyzer; + } + + /** + * @return Returns the field. + */ + public String getField() { + return field; + } + + /** + * Get the minimal similarity for fuzzy queries. + */ + public float getFuzzyMinSim() { + return fuzzyMinSim; + } + + /** + * Set the minimum similarity for fuzzy queries. + * Default is 0.5f. + */ + public void setFuzzyMinSim(float fuzzyMinSim) { + this.fuzzyMinSim = fuzzyMinSim; + } + + /** + * Get the prefix length for fuzzy queries. + * @return Returns the fuzzyPrefixLength. + */ + public int getFuzzyPrefixLength() { + return fuzzyPrefixLength; + } + + /** + * Set the prefix length for fuzzy queries. Default is 0. + * @param fuzzyPrefixLength The fuzzyPrefixLength to set. + */ + public void setFuzzyPrefixLength(int fuzzyPrefixLength) { + this.fuzzyPrefixLength = fuzzyPrefixLength; + } + + /** + * Sets the default slop for phrases. If zero, then exact phrase matches + * are required. Default value is zero. + */ + public void setPhraseSlop(int phraseSlop) { + this.phraseSlop = phraseSlop; + } + + /** + * Gets the default slop for phrases. + */ + public int getPhraseSlop() { + return phraseSlop; + } + + /** + * Sets the boolean operator of the QueryParser. + * In default mode (<code>DEFAULT_OPERATOR_OR</code>) terms without any modifiers + * are considered optional: for example <code>capital of Hungary</code> is equal to + * <code>capital OR of OR Hungary</code>.<br/> + * In <code>DEFAULT_OPERATOR_AND</code> terms are considered to be in conjuction: the + * above mentioned query is parsed as <code>capital AND of AND Hungary</code> + * @deprecated use [EMAIL PROTECTED] #setDefaultOperator(QueryParser.Operator)} instead + */ + public void setOperator(int op) { + if (op == DEFAULT_OPERATOR_AND) + this.operator = AND_OPERATOR; + else if (op == DEFAULT_OPERATOR_OR) + this.operator = OR_OPERATOR; + else + throw new IllegalArgumentException("Unknown operator " + op); + } + + /** + * Sets the boolean operator of the QueryParser. + * In default mode (<code>OR_OPERATOR</code>) terms without any modifiers + * are considered optional: for example <code>capital of Hungary</code> is equal to + * <code>capital OR of OR Hungary</code>.<br/> + * In <code>AND_OPERATOR</code> mode terms are considered to be in conjuction: the + * above mentioned query is parsed as <code>capital AND of AND Hungary</code> + */ + public void setDefaultOperator(Operator op) { + this.operator = op; + } + + /** + * Gets implicit operator setting, which will be either DEFAULT_OPERATOR_AND + * or DEFAULT_OPERATOR_OR. + * @deprecated use [EMAIL PROTECTED] #getDefaultOperator()} instead + */ + public int getOperator() { + if(operator == AND_OPERATOR) + return DEFAULT_OPERATOR_AND; + else if(operator == OR_OPERATOR) + return DEFAULT_OPERATOR_OR; + else + throw new IllegalStateException("Unknown operator " + operator); + } + + /** + * Gets implicit operator setting, which will be either AND_OPERATOR + * or OR_OPERATOR. + */ + public Operator getDefaultOperator() { + return operator; + } + + /** + * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically + * lower-cased or not. Default is <code>true</code>. + * @deprecated use [EMAIL PROTECTED] #setLowercaseExpandedTerms(boolean)} instead + */ + public void setLowercaseWildcardTerms(boolean lowercaseExpandedTerms) { + this.lowercaseExpandedTerms = lowercaseExpandedTerms; + } + + /** + * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically + * lower-cased or not. Default is <code>true</code>. + */ + public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) { + this.lowercaseExpandedTerms = lowercaseExpandedTerms; + } + + /** + * @deprecated use [EMAIL PROTECTED] #getLowercaseExpandedTerms()} instead + */ + public boolean getLowercaseWildcardTerms() { + return lowercaseExpandedTerms; + } + + /** + * @see #setLowercaseExpandedTerms(boolean) + */ + public boolean getLowercaseExpandedTerms() { + return lowercaseExpandedTerms; + } + + /** + * Set locale used by date range parsing. + */ + public void setLocale(Locale locale) { + this.locale = locale; + } + + /** + * Returns current locale, allowing access by subclasses. + */ + public Locale getLocale() { + return locale; + } + + protected void addClause(Vector clauses, int conj, int mods, Query q) { + boolean required, prohibited; + + // If this term is introduced by AND, make the preceding term required, + // unless it's already prohibited + if (clauses.size() > 0 && conj == CONJ_AND) { + BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); + if (!c.isProhibited()) + c.setOccur(BooleanClause.Occur.MUST); + } + + if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) { + // If this term is introduced by OR, make the preceding term optional, + // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) + // notice if the input is a OR b, first term is parsed as required; without + // this modification a OR b would parsed as +a OR b + BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); + if (!c.isProhibited()) + c.setOccur(BooleanClause.Occur.SHOULD); + } + + // We might have been passed a null query; the term might have been + // filtered away by the analyzer. + if (q == null) + return; + + if (operator == OR_OPERATOR) { + // We set REQUIRED if we're introduced by AND or +; PROHIBITED if + // introduced by NOT or -; make sure not to set both. + prohibited = (mods == MOD_NOT); + required = (mods == MOD_REQ); + if (conj == CONJ_AND && !prohibited) { + required = true; + } + } else { + // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED + // if not PROHIBITED and not introduced by OR + prohibited = (mods == MOD_NOT); + required = (!prohibited && conj != CONJ_OR); + } + if (required && !prohibited) + clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST)); + else if (!required && !prohibited) + clauses.addElement(new BooleanClause(q, BooleanClause.Occur.SHOULD)); + else if (!required && prohibited) + clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST_NOT)); + else + throw new RuntimeException("Clause cannot be both required and prohibited"); + } + + /** + * Note that parameter analyzer is ignored. Calls inside the parser always + * use class member analyzer. + * + * @exception ParseException throw in overridden method to disallow + * @deprecated use [EMAIL PROTECTED] #getFieldQuery(String, String)} + */ + protected Query getFieldQuery(String field, + Analyzer analyzer, + String queryText) throws ParseException { + return getFieldQuery(field, queryText); + } + + /** + * @exception ParseException throw in overridden method to disallow + */ + protected Query getFieldQuery(String field, String queryText) throws ParseException { + // Use the analyzer to get all the tokens, and then build a TermQuery, + // PhraseQuery, or nothing based on the term count + + TokenStream source = analyzer.tokenStream(field, new StringReader(queryText)); + Vector v = new Vector(); + org.apache.lucene.analysis.Token t; + int positionCount = 0; + boolean severalTokensAtSamePosition = false; + + while (true) { + try { + t = source.next(); + } + catch (IOException e) { + t = null; + } + if (t == null) + break; + v.addElement(t); + if (t.getPositionIncrement() != 0) + positionCount += t.getPositionIncrement(); + else + severalTokensAtSamePosition = true; + } + try { + source.close(); + } + catch (IOException e) { + // ignore + } + + if (v.size() == 0) + return null; + else if (v.size() == 1) { + t = (org.apache.lucene.analysis.Token) v.elementAt(0); + return new TermQuery(new Term(field, t.termText())); + } else { + if (severalTokensAtSamePosition) { + if (positionCount == 1) { + // no phrase query: + BooleanQuery q = new BooleanQuery(true); + for (int i = 0; i < v.size(); i++) { + t = (org.apache.lucene.analysis.Token) v.elementAt(i); + TermQuery currentQuery = new TermQuery( + new Term(field, t.termText())); + q.add(currentQuery, BooleanClause.Occur.SHOULD); + } + return q; + } + else { + // phrase query: + MultiPhraseQuery mpq = new MultiPhraseQuery(); + List multiTerms = new ArrayList(); + for (int i = 0; i < v.size(); i++) { + t = (org.apache.lucene.analysis.Token) v.elementAt(i); + if (t.getPositionIncrement() == 1 && multiTerms.size() > 0) { + mpq.add((Term[])multiTerms.toArray(new Term[0])); + multiTerms.clear(); + } + multiTerms.add(new Term(field, t.termText())); + } + mpq.add((Term[])multiTerms.toArray(new Term[0])); + return mpq; + } + } + else { + PhraseQuery q = new PhraseQuery(); + q.setSlop(phraseSlop); + for (int i = 0; i < v.size(); i++) { + q.add(new Term(field, ((org.apache.lucene.analysis.Token) + v.elementAt(i)).termText())); + + } + return q; + } + } + } + + /** + * Note that parameter analyzer is ignored. Calls inside the parser always + * use class member analyzer. + * + * @exception ParseException throw in overridden method to disallow + * @deprecated use [EMAIL PROTECTED] #getFieldQuery(String, String, int)} + */ + protected Query getFieldQuery(String field, + Analyzer analyzer, + String queryText, + int slop) throws ParseException { + return getFieldQuery(field, queryText, slop); + } + + /** + * Base implementation delegates to [EMAIL PROTECTED] #getFieldQuery(String,String)}. + * This method may be overridden, for example, to return + * a SpanNearQuery instead of a PhraseQuery. + * + * @exception ParseException throw in overridden method to disallow + */ + protected Query getFieldQuery(String field, String queryText, int slop) + throws ParseException { + Query query = getFieldQuery(field, queryText); + + if (query instanceof PhraseQuery) { + ((PhraseQuery) query).setSlop(slop); + } + if (query instanceof MultiPhraseQuery) { + ((MultiPhraseQuery) query).setSlop(slop); + } + + return query; + } + + /** + * Note that parameter analyzer is ignored. Calls inside the parser always + * use class member analyzer. + * + * @exception ParseException throw in overridden method to disallow + * @deprecated use [EMAIL PROTECTED] #getRangeQuery(String, String, String, boolean)} + */ + protected Query getRangeQuery(String field, + Analyzer analyzer, + String part1, + String part2, + boolean inclusive) throws ParseException { + return getRangeQuery(field, part1, part2, inclusive); + } + + /** + * @exception ParseException throw in overridden method to disallow + */ + protected Query getRangeQuery(String field, + String part1, + String part2, + boolean inclusive) throws ParseException + { + if (lowercaseExpandedTerms) { + part1 = part1.toLowerCase(); + part2 = part2.toLowerCase(); + } + try { + DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale); + df.setLenient(true); + Date d1 = df.parse(part1); + Date d2 = df.parse(part2); + part1 = DateField.dateToString(d1); + part2 = DateField.dateToString(d2); + } + catch (Exception e) { } + + return new RangeQuery(new Term(field, part1), + new Term(field, part2), + inclusive); + } + + /** + * Factory method for generating query, given a set of clauses. + * By default creates a boolean query composed of clauses passed in. + * + * Can be overridden by extending classes, to modify query being + * returned. + * + * @param clauses Vector that contains [EMAIL PROTECTED] BooleanClause} instances + * to join. + * + * @return Resulting [EMAIL PROTECTED] Query} object. + * @exception ParseException throw in overridden method to disallow + */ + protected Query getBooleanQuery(Vector clauses) throws ParseException { + return getBooleanQuery(clauses, false); + } + + /** + * Factory method for generating query, given a set of clauses. + * By default creates a boolean query composed of clauses passed in. + * + * Can be overridden by extending classes, to modify query being + * returned. + * + * @param clauses Vector that contains [EMAIL PROTECTED] BooleanClause} instances + * to join. + * @param disableCoord true if coord scoring should be disabled. + * + * @return Resulting [EMAIL PROTECTED] Query} object. + * @exception ParseException throw in overridden method to disallow + */ + protected Query getBooleanQuery(Vector clauses, boolean disableCoord) + throws ParseException + { + BooleanQuery query = new BooleanQuery(disableCoord); + for (int i = 0; i < clauses.size(); i++) { + query.add((BooleanClause)clauses.elementAt(i)); + } + return query; + } + + /** + * Factory method for generating a query. Called when parser + * parses an input term token that contains one or more wildcard + * characters (? and *), but is not a prefix term token (one + * that has just a single * character at the end) + *<p> + * Depending on settings, prefix term may be lower-cased + * automatically. It will not go through the default Analyzer, + * however, since normal Analyzers are unlikely to work properly + * with wildcard templates. + *<p> + * Can be overridden by extending classes, to provide custom handling for + * wildcard queries, which may be necessary due to missing analyzer calls. + * + * @param field Name of the field query will use. + * @param termStr Term token that contains one or more wild card + * characters (? or *), but is not simple prefix term + * + * @return Resulting [EMAIL PROTECTED] Query} built for the term + * @exception ParseException throw in overridden method to disallow + */ + protected Query getWildcardQuery(String field, String termStr) throws ParseException + { + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); + } + Term t = new Term(field, termStr); + return new WildcardQuery(t); + } + + /** + * Factory method for generating a query (similar to + * [EMAIL PROTECTED] #getWildcardQuery}). Called when parser parses an input term + * token that uses prefix notation; that is, contains a single '*' wildcard + * character as its last character. Since this is a special case + * of generic wildcard term, and such a query can be optimized easily, + * this usually results in a different query object. + *<p> + * Depending on settings, a prefix term may be lower-cased + * automatically. It will not go through the default Analyzer, + * however, since normal Analyzers are unlikely to work properly + * with wildcard templates. + *<p> + * Can be overridden by extending classes, to provide custom handling for + * wild card queries, which may be necessary due to missing analyzer calls. + * + * @param field Name of the field query will use. + * @param termStr Term token to use for building term for the query + * (<b>without</b> trailing '*' character!) + * + * @return Resulting [EMAIL PROTECTED] Query} built for the term + * @exception ParseException throw in overridden method to disallow + */ + protected Query getPrefixQuery(String field, String termStr) throws ParseException + { + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); + } + Term t = new Term(field, termStr); + return new PrefixQuery(t); + } + + /** + * @deprecated use [EMAIL PROTECTED] #getFuzzyQuery(String, String, float)} + */ + protected Query getFuzzyQuery(String field, String termStr) throws ParseException { + return getFuzzyQuery(field, termStr, fuzzyMinSim); + } + + /** + * Factory method for generating a query (similar to + * [EMAIL PROTECTED] #getWildcardQuery}). Called when parser parses + * an input term token that has the fuzzy suffix (~) appended. + * + * @param field Name of the field query will use. + * @param termStr Term token to use for building term for the query + * + * @return Resulting [EMAIL PROTECTED] Query} built for the term + * @exception ParseException throw in overridden method to disallow + */ + protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException + { + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); + } + Term t = new Term(field, termStr); + return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength); + } + + /** + * Returns a String where the escape char has been + * removed, or kept only once if there was a double escape. + */ + private String discardEscapeChar(String input) { + char[] caSource = input.toCharArray(); + char[] caDest = new char[caSource.length]; + int j = 0; + for (int i = 0; i < caSource.length; i++) { + if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) { + caDest[j++]=caSource[i]; + } + } + return new String(caDest, 0, j); + } + + /** + * Returns a String where those characters that QueryParser + * expects to be escaped are escaped by a preceding <code>\</code>. + */ + public static String escape(String s) { + StringBuffer sb = new StringBuffer(); + for (int i = 0; i < s.length(); i++) { + char c = s.charAt(i); + // NOTE: keep this in sync with _ESCAPED_CHAR below! + if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' + || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' + || c == '*' || c == '?') { + sb.append('\\'); + } + sb.append(c); + } + return sb.toString(); + } + + /** + * Command line tool to test QueryParser, using [EMAIL PROTECTED] org.apache.lucene.analysis.SimpleAnalyzer}. + * Usage:<br> + * <code>java org.apache.lucene.queryParser.QueryParser <input></code> + */ + public static void main(String[] args) throws Exception { + if (args.length == 0) { + System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>"); + System.exit(0); + } + QueryParser qp = new QueryParser("field", + new org.apache.lucene.analysis.SimpleAnalyzer()); + Query q = qp.parse(args[0]); + System.out.println(q.toString("field")); + } +} + +PARSER_END(QueryParser) + +/* ***************** */ +/* Token Definitions */ +/* ***************** */ + +<*> TOKEN : { + <#_NUM_CHAR: ["0"-"9"] > +// NOTE: keep this in sync with escape(String) above! +| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^", + "[", "]", "\"", "{", "}", "~", "*", "?" ] > +| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^", + "[", "]", "\"", "{", "}", "~", "*", "?" ] + | <_ESCAPED_CHAR> ) > +| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) > +| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r") > +} + +<DEFAULT, RangeIn, RangeEx> SKIP : { + <<_WHITESPACE>> +} + +// OG: to support prefix queries: +// http://issues.apache.org/bugzilla/show_bug.cgi?id=12137 +// Change from: +// +// | <WILDTERM: <_TERM_START_CHAR> +// (<_TERM_CHAR> | ( [ "*", "?" ] ))* > +// To: +// +// (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* > + +<DEFAULT> TOKEN : { + <AND: ("AND" | "&&") > +| <OR: ("OR" | "||") > +| <NOT: ("NOT" | "!") > +| <PLUS: "+" > +| <MINUS: "-" > +| <LPAREN: "(" > +| <RPAREN: ")" > +| <COLON: ":" > +| <CARAT: "^" > : Boost +| <QUOTED: "\"" (~["\""])+ "\""> +| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* > +| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? > +| <PREFIXTERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" > +| <WILDTERM: <_TERM_START_CHAR> + (<_TERM_CHAR> | ( [ "*", "?" ] ))* > +| <RANGEIN_START: "[" > : RangeIn +| <RANGEEX_START: "{" > : RangeEx +} + +<Boost> TOKEN : { +<NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT +} + +<RangeIn> TOKEN : { +<RANGEIN_TO: "TO"> +| <RANGEIN_END: "]"> : DEFAULT +| <RANGEIN_QUOTED: "\"" (~["\""])+ "\""> +| <RANGEIN_GOOP: (~[ " ", "]" ])+ > +} + +<RangeEx> TOKEN : { +<RANGEEX_TO: "TO"> +| <RANGEEX_END: "}"> : DEFAULT +| <RANGEEX_QUOTED: "\"" (~["\""])+ "\""> +| <RANGEEX_GOOP: (~[ " ", "}" ])+ > +} + +// * Query ::= ( Clause )* +// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) + +int Conjunction() : { + int ret = CONJ_NONE; +} +{ + [ + <AND> { ret = CONJ_AND; } + | <OR> { ret = CONJ_OR; } + ] + { return ret; } +} + +int Modifiers() : { + int ret = MOD_NONE; +} +{ + [ + <PLUS> { ret = MOD_REQ; } + | <MINUS> { ret = MOD_NOT; } + | <NOT> { ret = MOD_NOT; } + ] + { return ret; } +} + +Query Query(String field) : +{ + Vector clauses = new Vector(); + Query q, firstQuery=null; + int conj, mods; +} +{ + mods=Modifiers() q=Clause(field) + { + addClause(clauses, CONJ_NONE, mods, q); + if (mods == MOD_NONE) + firstQuery=q; + } + ( + conj=Conjunction() mods=Modifiers() q=Clause(field) + { addClause(clauses, conj, mods, q); } + )* + { + if (clauses.size() == 1 && firstQuery != null) + return firstQuery; + else { + return getBooleanQuery(clauses); + } + } +} + +Query Clause(String field) : { + Query q; + Token fieldToken=null, boost=null; +} +{ + [ + LOOKAHEAD(2) + fieldToken=<TERM> <COLON> { + field=discardEscapeChar(fieldToken.image); + } + ] + + ( + q=Term(field) + | <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? + + ) + { + if (boost != null) { + float f = (float)1.0; + try { + f = Float.valueOf(boost.image).floatValue(); + q.setBoost(f); + } catch (Exception ignored) { } + } + return q; + } +} + + +Query Term(String field) : { + Token term, boost=null, fuzzySlop=null, goop1, goop2; + boolean prefix = false; + boolean wildcard = false; + boolean fuzzy = false; + boolean rangein = false; + Query q; +} +{ + ( + ( + term=<TERM> + | term=<PREFIXTERM> { prefix=true; } + | term=<WILDTERM> { wildcard=true; } + | term=<NUMBER> + ) + [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] + [ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ] + { + String termImage=discardEscapeChar(term.image); + if (wildcard) { + q = getWildcardQuery(field, termImage); + } else if (prefix) { + q = getPrefixQuery(field, + discardEscapeChar(term.image.substring + (0, term.image.length()-1))); + } else if (fuzzy) { + float fms = fuzzyMinSim; + try { + fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue(); + } catch (Exception ignored) { } + if(fms < 0.0f || fms > 1.0f){ + throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !"); + } + if(fms == fuzzyMinSim) + q = getFuzzyQuery(field, termImage); + else + q = getFuzzyQuery(field, termImage, fms); + } else { + q = getFieldQuery(field, analyzer, termImage); + } + } + | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> ) + [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> ) + <RANGEIN_END> ) + [ <CARAT> boost=<NUMBER> ] + { + if (goop1.kind == RANGEIN_QUOTED) { + goop1.image = goop1.image.substring(1, goop1.image.length()-1); + } else { + goop1.image = discardEscapeChar(goop1.image); + } + if (goop2.kind == RANGEIN_QUOTED) { + goop2.image = goop2.image.substring(1, goop2.image.length()-1); + } else { + goop2.image = discardEscapeChar(goop2.image); + } + q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true); + } + | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> ) + [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> ) + <RANGEEX_END> ) + [ <CARAT> boost=<NUMBER> ] + { + if (goop1.kind == RANGEEX_QUOTED) { + goop1.image = goop1.image.substring(1, goop1.image.length()-1); + } else { + goop1.image = discardEscapeChar(goop1.image); + } + if (goop2.kind == RANGEEX_QUOTED) { + goop2.image = goop2.image.substring(1, goop2.image.length()-1); + } else { + goop2.image = discardEscapeChar(goop2.image); + } + + q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false); + } + | term=<QUOTED> + [ fuzzySlop=<FUZZY_SLOP> ] + [ <CARAT> boost=<NUMBER> ] + { + int s = phraseSlop; + + if (fuzzySlop != null) { + try { + s = Float.valueOf(fuzzySlop.image.substring(1)).intValue(); + } + catch (Exception ignored) { } + } + q = getFieldQuery(field, analyzer, term.image.substring(1, term.image.length()-1), s); + } + ) + { + if (boost != null) { + float f = (float) 1.0; + try { + f = Float.valueOf(boost.image).floatValue(); + } + catch (Exception ignored) { + /* Should this be handled somehow? (defaults to "no boost", if + * boost number is invalid) + */ + } + + // avoid boosting null queries, such as those caused by stop words + if (q != null) { + q.setBoost(f); + } + } + return q; + } +}