otis 2003/03/01 17:36:38 Modified: src/java/org/apache/lucene/queryParser QueryParser.jj Log: - Added set/getLowercaseWildcardTerms methods and a few ger*Query methods that make it easier to extend QueryParser. Contributed by: Tatu Saloranta Revision Changes Path 1.27 +128 -16 jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj Index: QueryParser.jj =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj,v retrieving revision 1.26 retrieving revision 1.27 diff -u -r1.26 -r1.27 --- QueryParser.jj 23 Feb 2003 08:51:33 -0000 1.26 +++ QueryParser.jj 2 Mar 2003 01:36:38 -0000 1.27 @@ -1,8 +1,8 @@ /* ==================================================================== * The Apache Software License, Version 1.1 * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. + * Copyright (c) 2001, 2002, 2003 The Apache Software Foundation. All + * rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -129,6 +129,11 @@ Analyzer analyzer; String field; int phraseSlop = 0; + /** + * Whether terms of wildcard and prefix queries are to be automatically + * lower-cased or not. Default is <code>true</code>. + */ + boolean lowercaseWildcardTerms = true; /** Constructs a query parser. * @param field the default field for query terms. @@ -164,7 +169,7 @@ private int operator = DEFAULT_OPERATOR_OR; /** - * Set the boolean operator of the QueryParser. + * Sets the boolean operator of the QueryParser. * In classic mode (<code>DEFAULT_OPERATOR_OR</mode>) terms without any modifiers * are considered optional: for example <code>capital of Hungary</code> is equal to * <code>capital OR of OR Hungary</code>.<br/> @@ -179,6 +184,14 @@ return this.operator; } + public void setLowercaseWildcardTerms(boolean b) { + owercaseWildcardTerms = b; + } + + public boolean getLowercaseWildcardTerms() { + return lowercaseWildcardTerms; + } + private void addClause(Vector clauses, int conj, int mods, Query q) { boolean required, prohibited; @@ -288,6 +301,103 @@ inclusive); } + /** + * Factory method for generating query, given set of clauses. + * By default creates a boolean query composed of clauses passed in. + * + * Can be overridden by extending classes, to modify query being + * returned. + * + * @param clauses Vector that contains [EMAIL PROTECTED] BooleanClause} instances + * to join. + * + * @return Resulting [EMAIL PROTECTED] Query} object. + */ + protected Query getBooleanQuery(Vector clauses) + { + BooleanQuery query = new BooleanQuery(); + for (int i = 0; i < clauses.size(); i++) { + query.add((BooleanClause)clauses.elementAt(i)); + } + return query; + } + + /** + * Factory method for generating a query. Called when parser + * parses an input term token that contains one or more wildcard + * characters (? and *), but is not a prefix term token (one + * that has just a single * character at the end) + *<p> + * Depending on settings, prefix term may be lower-cased + * automatically. It will not go through the default analyzer, + * however, since normal analyzers are unlikely to work properly + * with wildcard templates. + *<p> + * Can be overridden by extending classes, to provide custom handling for + * wild card queries (which may be necessary due to missing analyzer calls) + * + * @param field Name of the field query will use. + * @param termStr Term token that contains one or more wild card + * characters (? or *), but is not simple prefix term + * + * @return Resulting query build for the term + */ + protected Query getWildcardQuery(String field, String termStr) + { + if (lowercaseWildcardTerms) { + termStr = termStr.toLowerCase(); + } + Term t = new Term(field, termStr); + return new WildcardQuery(t); + } + + /** + * Factory method for generating a query (similar to + * (@link getWildcardQuery}). Called when parser parses an input term + * token that uses prefix notation; that is, contains a single '*' wild + * char character as it's last character. Since this is a special case + * of generic wild card term, and such a query can be optimized easily, + * this usually results in different query object. + *<p> + * Depending on settings, prefix term may be lower-cased + * automatically. It will not go through the default analyzer, + * however, since normal analyzers are unlikely to work properly + * with wildcard templates. + *<p> + * Can be overridden by extending classes, to provide custom handling for + * wild card queries (which may be necessary due to missing analyzer calls) + * + * @param field Name of the field query will use. + * @param termStr Term token to use for building term for the query + * (<b>without</b> trailing '*' character!) + * + * @return Resulting query build for the term + */ + protected Query getPrefixQuery(String field, String termStr) + { + if (lowercaseWildcardTerms) { + termStr = termStr.toLowerCase(); + } + Term t = new Term(field, termStr); + return new PrefixQuery(t); + } + + /** + * Factory method for generating a query (similar to + * (@link getWildcardQuery}). Called when parser parses + * an input term token that has the fuzzy suffix (~) appended. + * + * @param field Name of the field query will use. + * @param termStr Term token to use for building term for the query + * + * @return Resulting query build for the term + */ + protected Query getFuzzyQuery(String field, String termStr) + { + Term t = new Term(field, termStr); + return new FuzzyQuery(t); + } + public static void main(String[] args) throws Exception { QueryParser qp = new QueryParser("field", new org.apache.lucene.analysis.SimpleAnalyzer()); @@ -420,10 +530,7 @@ if (clauses.size() == 1 && firstQuery != null) return firstQuery; else { - BooleanQuery query = new BooleanQuery(); - for (int i = 0; i < clauses.size(); i++) - query.add((BooleanClause)clauses.elementAt(i)); - return query; + return getBooleanQuery(clauses); } } } @@ -475,15 +582,16 @@ [ <FUZZY> { fuzzy=true; } ] [ <CARAT> boost=<NUMBER> [ <FUZZY> { fuzzy=true; } ] ] { - if (wildcard) - q = new WildcardQuery(new Term(field, term.image)); - else if (prefix) - q = new PrefixQuery(new Term(field, term.image.substring - (0, term.image.length()-1))); - else if (fuzzy) - q = new FuzzyQuery(new Term(field, term.image)); - else + if (wildcard) { + q = getWildcardQuery(field, term.image); + } else if (prefix) { + q = getPrefixQuery(field, term.image.substring + (0, term.image.length()-1)); + } else if (fuzzy) { + q = getFuzzyQuery(field, term.image); + } else { q = getFieldQuery(field, analyzer, term.image); + } } | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> ) [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> ) @@ -530,7 +638,11 @@ try { f = Float.valueOf(boost.image).floatValue(); } - catch (Exception ignored) { } + catch (Exception ignored) { + /* Should this be handled somehow? (defaults to "no boost", if + * boost number is invalid) + */ + } // avoid boosting null queries, such as those caused by stop words if (q != null) {
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]