otis 2002/07/14 10:16:21 Modified: src/java/org/apache/lucene/queryParser QueryParser.jj Log: - Added Péter Halácsy's changes that allow setting of default boolean operator. Revision Changes Path 1.19 +105 -62 jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj Index: QueryParser.jj =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj,v retrieving revision 1.18 retrieving revision 1.19 diff -u -r1.18 -r1.19 --- QueryParser.jj 25 Jun 2002 00:05:31 -0000 1.18 +++ QueryParser.jj 14 Jul 2002 17:16:21 -0000 1.19 @@ -78,7 +78,7 @@ * * The syntax for query strings is as follows: * A Query is a series of clauses. - * A clause may be prefixed by: + * A clause may be prefixed by: * <ul> * <li> a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating * that the clause is required or prohibited respectively; or @@ -121,11 +121,11 @@ QueryParser parser = new QueryParser(field, analyzer); return parser.parse(query); } - catch (TokenMgrError tme) { + catch (TokenMgrError tme) { throw new ParseException(tme.getMessage()); } } - + Analyzer analyzer; String field; int phraseSlop = 0; @@ -157,8 +157,30 @@ /** Gets the default slop for phrases. */ public int getPhraseSlop() { return phraseSlop; } - private void addClause(Vector clauses, int conj, int mods, - Query q) { + // CODE ADDED BY PETER HALACSY + + /** The actual mode that parses uses to parse queries */ + public static final int DEFAULT_OPERATOR_OR = 0; + public static final int DEFAULT_OPERATOR_AND = 1; + + private int mode = DEFAULT_OPERATOR_OR; + + /** + * Set the mode of the QueryParser. In classic mode (<code>DEFAULT_OPERATOR_OR</mode>) + * term without any modifiers are considered optional: for example <code> + * capital of Hungary</code> is equal to <code>capital OR of OR Hungary</code>.<br/> + * In <code>DEFAULT_OPERATOR_AND</code> terms are considered to be in conjuction: the + * above mentioned query is parsed as <code>capital AND of AND Hungary</code> + */ + public void setMode(int mode) { + this.mode = mode; + } + + public int getMode() { + return this.mode; + } + + private void addClause(Vector clauses, int conj, int mods, Query q) { boolean required, prohibited; // If this term is introduced by AND, make the preceding term required, @@ -168,28 +190,49 @@ if (!c.prohibited) c.required = true; } + // THIS CODE ADDED PETER HALACSY + if(mode == DEFAULT_OPERATOR_AND && conj == CONJ_OR) { + // If this term is introduced by OR, make the preceding term optional, + // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) + // notice if the input is a OR b, first term is parsed as required; without + // this modification a OR b would parsed as +a OR b + BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); + if (!c.prohibited) + c.required = false; + } + // THIS CODE ADDED BY PETER HALACSY // We might have been passed a null query; the term might have been - // filtered away by the analyzer. + // filtered away by the analyzer. if (q == null) return; - // We set REQUIRED if we're introduced by AND or +; PROHIBITED if - // introduced by NOT or -; make sure not to set both. - prohibited = (mods == MOD_NOT); - required = (mods == MOD_REQ); - if (conj == CONJ_AND && !prohibited) - required = true; + if(mode == DEFAULT_OPERATOR_OR) { + // THIS IS THE ORIGINAL CODE + // We set REQUIRED if we're introduced by AND or +; PROHIBITED if + // introduced by NOT or -; make sure not to set both. + prohibited = (mods == MOD_NOT); + required = (mods == MOD_REQ); + if (conj == CONJ_AND && !prohibited) { + required = true; + } + } else { + // THIS CODE ADDED BY PETER HALACSY + // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED + // if not PROHIBITED and not introduced by OR + prohibited = (mods == MOD_NOT); + required = (!prohibited && conj != CONJ_OR); + } clauses.addElement(new BooleanClause(q, required, prohibited)); } - private Query getFieldQuery(String field, - Analyzer analyzer, + private Query getFieldQuery(String field, + Analyzer analyzer, String queryText) { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count - - TokenStream source = analyzer.tokenStream(field, + + TokenStream source = analyzer.tokenStream(field, new StringReader(queryText)); Vector v = new Vector(); org.apache.lucene.analysis.Token t; @@ -197,17 +240,17 @@ while (true) { try { t = source.next(); - } + } catch (IOException e) { t = null; } - if (t == null) + if (t == null) break; v.addElement(t.termText()); } - if (v.size() == 0) + if (v.size() == 0) return null; - else if (v.size() == 1) + else if (v.size() == 1) return new TermQuery(new Term(field, (String) v.elementAt(0))); else { PhraseQuery q = new PhraseQuery(); @@ -219,11 +262,11 @@ } } - private Query getRangeQuery(String field, - Analyzer analyzer, - String part1, + private Query getRangeQuery(String field, + Analyzer analyzer, + String part1, String part2, - boolean inclusive) + boolean inclusive) { boolean isDate = false, isNumber = false; @@ -242,13 +285,13 @@ // @@@ Add number support } - return new RangeQuery(new Term(field, part1), - new Term(field, part2), + return new RangeQuery(new Term(field, part1), + new Term(field, part2), inclusive); } public static void main(String[] args) throws Exception { - QueryParser qp = new QueryParser("field", + QueryParser qp = new QueryParser("field", new org.apache.lucene.analysis.SimpleAnalyzer()); Query q = qp.parse(args[0]); System.out.println(q.toString("field")); @@ -271,10 +314,10 @@ <*> TOKEN : { <#_NUM_CHAR: ["0"-"9"] > -| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^", +| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^", "[", "]", "\"", "{", "}", "~", "*", "?" ] > -| <#_TERM_START_CHAR: ( ~[ " ", "\t", "+", "-", "!", "(", ")", ":", "^", - "[", "]", "\"", "{", "}", "~", "*", "?" ] +| <#_TERM_START_CHAR: ( ~[ " ", "\t", "+", "-", "!", "(", ")", ":", "^", + "[", "]", "\"", "{", "}", "~", "*", "?" ] | <_ESCAPED_CHAR> ) > | <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> ) > | <#_WHITESPACE: ( " " | "\t" ) > @@ -299,7 +342,7 @@ | <FUZZY: "~" > | <SLOP: "~" (<_NUM_CHAR>)+ > | <PREFIXTERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" > -| <WILDTERM: <_TERM_START_CHAR> +| <WILDTERM: <_TERM_START_CHAR> (<_TERM_CHAR> | ( [ "*", "?" ] ))* > | <RANGEIN_START: "[" > : RangeIn | <RANGEEX_START: "{" > : RangeEx @@ -326,23 +369,23 @@ // * Query ::= ( Clause )* // * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) -int Conjunction() : { +int Conjunction() : { int ret = CONJ_NONE; } { - [ - <AND> { ret = CONJ_AND; } + [ + <AND> { ret = CONJ_AND; } | <OR> { ret = CONJ_OR; } ] { return ret; } } -int Modifiers() : { +int Modifiers() : { int ret = MOD_NONE; } { - [ - <PLUS> { ret = MOD_REQ; } + [ + <PLUS> { ret = MOD_REQ; } | <MINUS> { ret = MOD_NOT; } | <NOT> { ret = MOD_NOT; } ] @@ -353,17 +396,17 @@ { Vector clauses = new Vector(); Query q, firstQuery=null; - int conj, mods; + int conj, mods; } { - mods=Modifiers() q=Clause(field) - { - addClause(clauses, CONJ_NONE, mods, q); - if (mods == MOD_NONE) - firstQuery=q; + mods=Modifiers() q=Clause(field) + { + addClause(clauses, CONJ_NONE, mods, q); + if (mods == MOD_NONE) + firstQuery=q; } - ( - conj=Conjunction() mods=Modifiers() q=Clause(field) + ( + conj=Conjunction() mods=Modifiers() q=Clause(field) { addClause(clauses, conj, mods, q); } )* { @@ -389,16 +432,16 @@ ] ( - q=Term(field) + q=Term(field) | <LPAREN> q=Query(field) <RPAREN> ) { return q; } } - -Query Term(String field) : { + +Query Term(String field) : { Token term, boost=null, slop=null, goop1, goop2; boolean prefix = false; boolean wildcard = false; @@ -407,7 +450,7 @@ Query q; } { - ( + ( ( term=<TERM> | term=<PREFIXTERM> { prefix=true; } @@ -416,19 +459,19 @@ ) [ <FUZZY> { fuzzy=true; } ] [ <CARAT> boost=<NUMBER> [ <FUZZY> { fuzzy=true; } ] ] - { + { if (wildcard) q = new WildcardQuery(new Term(field, term.image)); - else if (prefix) + else if (prefix) q = new PrefixQuery(new Term(field, term.image.substring (0, term.image.length()-1))); else if (fuzzy) q = new FuzzyQuery(new Term(field, term.image)); else - q = getFieldQuery(field, analyzer, term.image); + q = getFieldQuery(field, analyzer, term.image); } | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> ) - [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> ) + [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> ) <RANGEIN_END> ) [ <CARAT> boost=<NUMBER> ] { @@ -440,7 +483,7 @@ q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true); } | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> ) - [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> ) + [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> ) <RANGEEX_END> ) [ <CARAT> boost=<NUMBER> ] { @@ -451,14 +494,14 @@ q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false); } - | term=<QUOTED> + | term=<QUOTED> [ slop=<SLOP> ] [ <CARAT> boost=<NUMBER> ] - { - q = getFieldQuery(field, analyzer, - term.image.substring(1, term.image.length()-1)); + { + q = getFieldQuery(field, analyzer, + term.image.substring(1, term.image.length()-1)); if (slop != null && q instanceof PhraseQuery) { - try { + try { int s = Float.valueOf(slop.image.substring(1)).intValue(); ((PhraseQuery) q).setSlop(s); } @@ -466,16 +509,16 @@ } } ) - { + { if (boost != null) { float f = (float) 1.0; - try { + try { f = Float.valueOf(boost.image).floatValue(); } catch (Exception ignored) { } q.setBoost(f); } - return q; + return q; } }
-- To unsubscribe, e-mail: <mailto:[EMAIL PROTECTED]> For additional commands, e-mail: <mailto:[EMAIL PROTECTED]>