/* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2001 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation" and * "Apache Lucene" must not be used to endorse or promote products * derived from this software without prior written permission. For * written permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache", * "Apache Lucene", nor may "Apache" appear in their name, without * prior written permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * . */ options { STATIC= false; DEBUG_PARSER=false; UNICODE_INPUT=true; } PARSER_BEGIN(QueryParser) package org.apache.lucene.queryParser; import java.util.Vector; import java.io.*; import org.apache.lucene.index.Term; import org.apache.lucene.analysis.*; import org.apache.lucene.search.*; /** * This class is generated by JavaCC. The only method that clients should need * to call is parse(). * * The syntax for query strings is as follows: * A Query is a series of clauses. * A clause may be prefixed by: *

a plus (+) or a minus (-) sign, indicating * that the clause is required or prohibited respectively; or *
a term followed by a colon, indicating the field to be searched. * This enables one to construct queries which search multiple fields. *

* * A clause may be either a: *

a term, indicating all the documents that contain this term; or *
a nested query, enclosed in parentheses. Note that this may be used * with a +/- prefix to require any of a set of * terms. *

* * Thus, in BNF, the query grammar is: *

 *   Query  ::= ( Clause )*
 *   Clause ::= ["+", "-"] [ ":"] (  | "(" Query ")" )
 *

*/ public class QueryParser { /** Parses a query string, returning a * Query. * @param query the query string to be parsed. * @param field the default field for query terms. * @param analyzer used to find terms in the query text. */ static public Query parse(String query, String field, Analyzer analyzer) throws ParseException { QueryParser parser = new QueryParser(field, analyzer); return parser.parse(query); } Analyzer analyzer; String field; int phraseSlop = 0; /** Constructs a query parser. * @param field the default field for query terms. * @param analyzer used to find terms in the query text. */ public QueryParser(String f, Analyzer a) { this(new StringReader("")); analyzer = a; field = f; } /** Parses a query string, returning a * Query. * @param query the query string to be parsed. */ public Query parse(String query) throws ParseException { ReInit(new StringReader(query)); return Query(field); } /** Sets the default slop for phrases. If zero, then exact phrase matches are required. Zero by default. */ public void setPhraseSlop(int s) { phraseSlop = s; } /** Gets the default slop for phrases. */ public int getPhraseSlop() { return phraseSlop; } private void addClause(Vector clauses, int conj, int mods, Query q) { boolean required, prohibited; // If this term is introduced by AND, make the preceding term required, // unless it's already prohibited if (conj == CONJ_AND) { BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); if (!c.prohibited) c.required = true; } // We might have been passed a null query; the term might have been // filtered away by the analyzer. if (q == null) return; // We set REQUIRED if we're introduced by AND or +; PROHIBITED if // introduced by NOT or -; make sure not to set both. prohibited = (mods == MOD_NOT); required = (mods == MOD_REQ); if (conj == CONJ_AND && !prohibited) required = true; clauses.addElement(new BooleanClause(q, required, prohibited)); } private Query getFieldQuery(String field, Analyzer analyzer, String queryText) { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count TokenStream source = analyzer.tokenStream(field, new StringReader(queryText)); Vector v = new Vector(); org.apache.lucene.analysis.Token t; while (true) { try { t = source.next(); } catch (IOException e) { t = null; } if (t == null) break; v.addElement(t.termText()); } if (v.size() == 0) return null; else if (v.size() == 1) return new TermQuery(new Term(field, (String) v.elementAt(0))); else { PhraseQuery q = new PhraseQuery(); q.setSlop(phraseSlop); for (int i=0; i TOKEN : { <#_NUM_CHAR: ["0"-"9"] > | <#_TERM_CHAR: ~["\"", " ", "\t", "(", ")", ":", "&", "|", "^", "*", "?", "~", "{", "}", "[", "]" ] > //| <#_NEWLINE: ( "\r\n" | "\r" | "\n" ) > | <#_WHITESPACE: ( " " | "\t" ) > //| <#_QCHAR: ( "\\" (<_NEWLINE> | ~["a"-"z", "A"-"Z", "0"-"9"] ) ) > //| <#_RESTOFLINE: (~["\r", "\n"])* > } TOKEN : { | | | | | | | | | | | )+ "." (<_NUM_CHAR>)+ > | )+ > | | ( ~["\"", " ", "\t", "(", ")", ":", "&", "|", "^", "~", "{", "}", "[", "]" ] )+ <_TERM_CHAR>> | | } SKIP : { <<_WHITESPACE>> } // * Query ::= ( Clause )* // * Clause ::= ["+", "-"] [ ":"] ( | "(" Query ")" ) int Conjunction() : { int ret = CONJ_NONE; } { [ { ret = CONJ_AND; } | { ret = CONJ_OR; } ] { return ret; } } int Modifiers() : { int ret = MOD_NONE; } { [ { ret = MOD_REQ; } | { ret = MOD_NOT; } | { ret = MOD_NOT; } ] { return ret; } } Query Query(String field) : { Vector clauses = new Vector(); Query q; int conj, mods; } { mods=Modifiers() q=Clause(field) { addClause(clauses, CONJ_NONE, mods, q); } ( conj=Conjunction() mods=Modifiers() q=Clause(field) { addClause(clauses, conj, mods, q); } )* { BooleanQuery query = new BooleanQuery(); for (int i = 0; i < clauses.size(); i++) query.add((BooleanClause)clauses.elementAt(i)); return query; } } Query Clause(String field) : { Query q; Token fieldToken=null; } { [ LOOKAHEAD(2) fieldToken= { field = fieldToken.image; } ] ( q=Term(field) | q=Query(field) ) { return q; } } Query Term(String field) : { Token term, boost=null; boolean prefix = false; boolean wildcard = false; boolean fuzzy = false; boolean rangein = false; Query q; } { ( (term=|term={wildcard=true;}|term=)[{prefix=true;}|{fuzzy=true;}][ boost=] { if (wildcard) q = new WildcardQuery(new Term(field, term.image)); else if (prefix) q = new PrefixQuery(new Term(field, term.image)); else if (fuzzy) q = new FuzzyQuery(new Term(field, term.image)); else q = getFieldQuery(field, analyzer, term.image); } | (term={rangein=true;}|term=) { q = getRangeQuery(field, analyzer, term.image.substring(1, term.image.length()-1), rangein); } | term= { q = getFieldQuery(field, analyzer, term.image.substring(1, term.image.length()-1)); } ) { if (boost != null) { float f = (float) 1.0; try { f = Float.valueOf(boost.image).floatValue(); } catch (Exception ignored) { } if (q instanceof TermQuery) ((TermQuery) q).setBoost(f); else if (q instanceof PhraseQuery) ((PhraseQuery) q).setBoost(f); else if (q instanceof MultiTermQuery) ((MultiTermQuery) q).setBoost(f); } return q; } }