/* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2001 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" and
 *    "Apache Lucene" must not be used to endorse or promote products
 *    derived from this software without prior written permission. For
 *    written permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    "Apache Lucene", nor may "Apache" appear in their name, without
 *    prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */


options {
  STATIC= false;
  DEBUG_PARSER=false;
  UNICODE_INPUT=true;
}

PARSER_BEGIN(QueryParser)

package org.apache.lucene.queryParser;

import java.util.Vector;
import java.io.*;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.*;
import org.apache.lucene.search.*;

/**
 * This class is generated by JavaCC.  The only method that clients should need
 * to call is <a href="#parse">parse()</a>.
 *
 * The syntax for query strings is as follows:
 * A Query is a series of clauses.
 * A clause may be prefixed by:
 * <ul>
 * <li> a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating
 * that the clause is required or prohibited respectively; or
 * <li> a term followed by a colon, indicating the field to be searched.
 * This enables one to construct queries which search multiple fields.
 * </ul>
 *
 * A clause may be either a:
 * <ul>
 * <li> a term, indicating all the documents that contain this term; or
 * <li> a nested query, enclosed in parentheses.  Note that this may be used
 * with a <code>+</code>/<code>-</code> prefix to require any of a set of
 * terms.
 * </ul>
 *
 * Thus, in BNF, the query grammar is:
 * <pre>
 *   Query  ::= ( Clause )*
 *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
 * </pre>
 */

public class QueryParser {


  /** Parses a query string, returning a
   * <a href="lucene.search.Query.html">Query</a>.
   *  @param query	the query string to be parsed.
   *  @param field	the default field for query terms.
   *  @param analyzer   used to find terms in the query text.
   */
  static public Query parse(String query, String field, Analyzer analyzer)
       throws ParseException {
    QueryParser parser = new QueryParser(field, analyzer);
    return parser.parse(query);
  }

  Analyzer analyzer;
  String field;
  int phraseSlop = 0;

  /** Constructs a query parser.
   *  @param field	the default field for query terms.
   *  @param analyzer   used to find terms in the query text.
   */
  public QueryParser(String f, Analyzer a) {
    this(new StringReader(""));
    analyzer = a;
    field = f;
  }

  /** Parses a query string, returning a
   * <a href="lucene.search.Query.html">Query</a>.
   *  @param query	the query string to be parsed.
   */
  public Query parse(String query) throws ParseException {
    ReInit(new StringReader(query));
    return Query(field);
  }

  /** Sets the default slop for phrases.  If zero, then exact phrase matches
    are required.  Zero by default. */
  public void setPhraseSlop(int s) { phraseSlop = s; }
  /** Gets the default slop for phrases. */
  public int getPhraseSlop() { return phraseSlop; }

  private void addClause(Vector clauses, int conj, int mods,
                        Query q) {
    boolean required, prohibited;

    // If this term is introduced by AND, make the preceding term required,
    // unless it's already prohibited
    if (conj == CONJ_AND) {
      BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
      if (!c.prohibited)
        c.required = true;
    }

    // We might have been passed a null query; the term might have been
    // filtered away by the analyzer.
    if (q == null)
      return;

    // We set REQUIRED if we're introduced by AND or +; PROHIBITED if
    // introduced by NOT or -; make sure not to set both.
    prohibited = (mods == MOD_NOT);
    required = (mods == MOD_REQ);
    if (conj == CONJ_AND && !prohibited)
      required = true;
    clauses.addElement(new BooleanClause(q, required, prohibited));
  }

  private Query getFieldQuery(String field, Analyzer analyzer, String queryText) {
    // Use the analyzer to get all the tokens, and then build a TermQuery,
    // PhraseQuery, or nothing based on the term count

    TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
    Vector v = new Vector();
    org.apache.lucene.analysis.Token t;

    while (true) {
      try {
        t = source.next();
      }
      catch (IOException e) {
        t = null;
      }
      if (t == null)
        break;
      v.addElement(t.termText());
    }
    if (v.size() == 0)
      return null;
    else if (v.size() == 1)
      return new TermQuery(new Term(field, (String) v.elementAt(0)));
    else {
      PhraseQuery q = new PhraseQuery();
      q.setSlop(phraseSlop);
      for (int i=0; i<v.size(); i++) {
        q.add(new Term(field, (String) v.elementAt(i)));
      }
      return q;
    }
  }

  private Query getRangeQuery(String field, Analyzer analyzer, String queryText, boolean inclusive)
  {
    // Use the analyzer to get all the tokens.  There should be 1 or 2.
    TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
    Term[] terms = new Term[2];
    org.apache.lucene.analysis.Token t;

    for (int i = 0; i < 2; i++)
    {
      try
      {
        t = source.next();
      }
      catch (IOException e)
      {
        t = null;
      }
      if (t != null)
      {
        String text = t.termText();
        if (!text.equalsIgnoreCase("NULL"))
        {
          terms[i] = new Term(field, text);
        }
      }
    }
    return new RangeQuery(terms[0], terms[1], inclusive);
  }

  public static void main(String[] args) throws Exception {
    QueryParser qp = new QueryParser("field",
                                     new org.apache.lucene.analysis.SimpleAnalyzer());
    Query q = qp.parse(args[0]);
    System.out.println(q.toString("field"));
  }

  private static final int CONJ_NONE   = 0;
  private static final int CONJ_AND    = 1;
  private static final int CONJ_OR     = 2;

  private static final int MOD_NONE    = 0;
  private static final int MOD_NOT     = 10;
  private static final int MOD_REQ     = 11;
}

PARSER_END(QueryParser)

/* ***************** */
/* Token Definitions */
/* ***************** */


<*> TOKEN : {
  <#_NUM_CHAR:   ["0"-"9"] >
| <#_TERM_CHAR: ~["\"", " ", "\t", "(", ")", ":", "&", "|",
                  "^", "*", "?", "~", "{", "}", "[", "]" ] >
//| <#_NEWLINE:    ( "\r\n" | "\r" | "\n" ) >
| <#_WHITESPACE: ( " " | "\t" ) >
//| <#_QCHAR:      ( "\\" (<_NEWLINE> | ~["a"-"z", "A"-"Z", "0"-"9"] ) ) >
//| <#_RESTOFLINE: (~["\r", "\n"])* >
}

<DEFAULT> TOKEN : {
  <AND:       ("AND" | "&&") >
| <OR:        ("OR" | "||") >
| <NOT:       ("NOT" | "!") >
| <PLUS:      "+" >
| <MINUS:     "-" >
| <LPAREN:    "(" >
| <RPAREN:    ")" >
| <COLON:     ":" >
| <CARAT:     "^" >
| <STAR:      "*" >
| <QUOTED:     "\"" (~["\""])+ "\"">
| <NUMBER:    (["+","-"])? (<_NUM_CHAR>)+ "." (<_NUM_CHAR>)+ >
| <TERM:      (<_TERM_CHAR>)+ >
| <FUZZY:     "~" >
| <WILDTERM:  <_TERM_CHAR>
              ( ~["\"", " ", "\t", "(", ")", ":", "&", "|", "^", "~", "{",
"}", "[", "]" ] )+ <_TERM_CHAR>>
| <RANGEIN:   "[" (~["]"])+ "]">
| <RANGEEX:   "{" (~["}"])+ "}">
}

<DEFAULT> SKIP : {
  <<_WHITESPACE>>
}


// *   Query  ::= ( Clause )*
// *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )

int Conjunction() : {
  int ret = CONJ_NONE;
}
{
  [
    <AND> { ret = CONJ_AND; }
    | <OR>  { ret = CONJ_OR; }
  ]
  { return ret; }
}

int Modifiers() : {
  int ret = MOD_NONE;
}
{
  [
     <PLUS> { ret = MOD_REQ; }
     | <MINUS> { ret = MOD_NOT; }
     | <NOT> { ret = MOD_NOT; }
  ]
  { return ret; }
}

Query Query(String field) :
{
  Vector clauses = new Vector();
  Query q;
  int conj, mods;
}
{
  mods=Modifiers() q=Clause(field)
  { addClause(clauses, CONJ_NONE, mods, q); }
  (
    conj=Conjunction() mods=Modifiers() q=Clause(field)
    { addClause(clauses, conj, mods, q); }
  )*
    {
      BooleanQuery query = new BooleanQuery();
      for (int i = 0; i < clauses.size(); i++)
  	query.add((BooleanClause)clauses.elementAt(i));
      return query;
    }
}

Query Clause(String field) : {
  Query q;
  Token fieldToken=null;
}
{
  [
    LOOKAHEAD(2)
    fieldToken=<TERM> <COLON> { field = fieldToken.image; }
  ]

  (
   q=Term(field)
   | <LPAREN> q=Query(field) <RPAREN>
  )
    {
      return q;
    }
}


Query Term(String field) : {
  Token term, boost=null;
  boolean prefix = false;
  boolean wildcard = false;
  boolean fuzzy = false;
  boolean rangein = false;
  Query q;
}
{
  (
     (term=<TERM>|term=<WILDTERM>{wildcard=true;}|term=<NUMBER>)[<STAR>{prefix=true;}|<FUZZY>{fuzzy=true;}][<CARAT> boost=<NUMBER>]
      { if (wildcard)
          q = new WildcardQuery(new Term(field, term.image));
        else if (prefix)
          q = new PrefixQuery(new Term(field, term.image));
        else if (fuzzy)
          q = new FuzzyQuery(new Term(field, term.image));
        else
          q = getFieldQuery(field, analyzer, term.image); }
    | (term=<RANGEIN>{rangein=true;}|term=<RANGEEX>)
        {
          q = getRangeQuery(field, analyzer,
                            term.image.substring(1, term.image.length()-1), rangein);
        }
    | term=<QUOTED>
      { q = getFieldQuery(field, analyzer,
                          term.image.substring(1, term.image.length()-1)); }
  )
  {
    if (boost != null) {
      float f = (float) 1.0;
      try {
        f = Float.valueOf(boost.image).floatValue();
      }
      catch (Exception ignored) { }

      if (q instanceof TermQuery)
        ((TermQuery) q).setBoost(f);
      else if (q instanceof PhraseQuery)
        ((PhraseQuery) q).setBoost(f);
      else if (q instanceof MultiTermQuery)
        ((MultiTermQuery) q).setBoost(f);
    }
    return q;
  }
}


