briangoetz 2002/06/24 17:05:31 Modified: src/java/org/apache/lucene/document DateField.java Field.java src/java/org/apache/lucene/queryParser QueryParser.jj src/test/org/apache/lucene/queryParser TestQueryParser.java Log: Support for new range query syntax. The delimiter is " TO ", but is optional for backward compatibility with previous syntax. If the range arguments match the format supported by DateFormat.getDateInstance(DateFormat.SHORT), then they will be converted into the appropriate date strings a la DateField. Added Field.Keyword "constructor" for Date-valued arguments. Optimized DateField.timeToString function. Submitted by: Brian Goetz Revision Changes Path 1.4 +7 -2 jakarta-lucene/src/java/org/apache/lucene/document/DateField.java Index: DateField.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/document/DateField.java,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- DateField.java 9 Jun 2002 20:47:22 -0000 1.3 +++ DateField.java 25 Jun 2002 00:05:31 -0000 1.4 @@ -105,8 +105,13 @@ if (s.length() > DATE_LEN) throw new RuntimeException("time too late"); - while (s.length() < DATE_LEN) - s = "0" + s; // pad with leading zeros + // Pad with leading zeros + if (s.length() < DATE_LEN) { + StringBuffer sb = new StringBuffer(s); + while (sb.length() < DATE_LEN) + sb.insert(0, ' '); + s = sb.toString(); + } return s; } 1.3 +8 -0 jakarta-lucene/src/java/org/apache/lucene/document/Field.java Index: Field.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/document/Field.java,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- Field.java 20 Nov 2001 05:22:31 -0000 1.2 +++ Field.java 25 Jun 2002 00:05:31 -0000 1.3 @@ -55,6 +55,7 @@ */ import java.io.Reader; +import java.util.Date; /** A field is a section of a Document. Each field has two parts, a name and a @@ -89,6 +90,13 @@ fields, like "title" or "subject". */ public static final Field Text(String name, String value) { return new Field(name, value, true, true, true); + } + + /** Constructs a Date-valued Field that is tokenized and indexed, + and is stored in the index, for return with hits. Useful for short text + fields, like "title" or "subject". */ + public static final Field Keyword(String name, Date value) { + return new Field(name, DateField.dateToString(value), true, true, true); } /** Constructs a String-valued Field that is tokenized and indexed, 1.18 +62 -33 jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj Index: QueryParser.jj =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj,v retrieving revision 1.17 retrieving revision 1.18 diff -u -r1.17 -r1.18 --- QueryParser.jj 20 May 2002 15:45:43 -0000 1.17 +++ QueryParser.jj 25 Jun 2002 00:05:31 -0000 1.18 @@ -65,8 +65,11 @@ import java.util.Vector; import java.io.*; +import java.text.*; +import java.util.*; import org.apache.lucene.index.Term; import org.apache.lucene.analysis.*; +import org.apache.lucene.document.*; import org.apache.lucene.search.*; /** @@ -218,35 +221,30 @@ private Query getRangeQuery(String field, Analyzer analyzer, - String queryText, + String part1, + String part2, boolean inclusive) { - // Use the analyzer to get all the tokens. There should be 1 or 2. - TokenStream source = analyzer.tokenStream(field, - new StringReader(queryText)); - Term[] terms = new Term[2]; - org.apache.lucene.analysis.Token t; + boolean isDate = false, isNumber = false; - for (int i = 0; i < 2; i++) - { - try - { - t = source.next(); - } - catch (IOException e) - { - t = null; - } - if (t != null) - { - String text = t.termText(); - if (!text.equalsIgnoreCase("NULL")) - { - terms[i] = new Term(field, text); - } - } + try { + DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT); + df.setLenient(true); + Date d1 = df.parse(part1); + Date d2 = df.parse(part2); + part1 = DateField.dateToString(d1); + part2 = DateField.dateToString(d2); + isDate = true; } - return new RangeQuery(terms[0], terms[1], inclusive); + catch (Exception e) { } + + if (!isDate) { + // @@@ Add number support + } + + return new RangeQuery(new Term(field, part1), + new Term(field, part2), + inclusive); } public static void main(String[] args) throws Exception { @@ -282,7 +280,7 @@ | <#_WHITESPACE: ( " " | "\t" ) > } -<DEFAULT> SKIP : { +<DEFAULT, RangeIn, RangeEx> SKIP : { <<_WHITESPACE>> } @@ -303,14 +301,28 @@ | <PREFIXTERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" > | <WILDTERM: <_TERM_START_CHAR> (<_TERM_CHAR> | ( [ "*", "?" ] ))* > -| <RANGEIN: "[" ( ~[ "]" ] )+ "]"> -| <RANGEEX: "{" ( ~[ "}" ] )+ "}"> +| <RANGEIN_START: "[" > : RangeIn +| <RANGEEX_START: "{" > : RangeEx } <Boost> TOKEN : { <NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT } +<RangeIn> TOKEN : { +<RANGEIN_TO: "TO"> +| <RANGEIN_END: "]"> : DEFAULT +| <RANGEIN_QUOTED: "\"" (~["\""])+ "\""> +| <RANGEIN_GOOP: (~[ " ", "]" ])+ > +} + +<RangeEx> TOKEN : { +<RANGEEX_TO: "TO"> +| <RANGEEX_END: "}"> : DEFAULT +| <RANGEEX_QUOTED: "\"" (~["\""])+ "\""> +| <RANGEEX_GOOP: (~[ " ", "}" ])+ > +} + // * Query ::= ( Clause )* // * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) @@ -387,7 +399,7 @@ Query Term(String field) : { - Token term, boost=null, slop=null; + Token term, boost=null, slop=null, goop1, goop2; boolean prefix = false; boolean wildcard = false; boolean fuzzy = false; @@ -415,12 +427,29 @@ else q = getFieldQuery(field, analyzer, term.image); } - | ( term=<RANGEIN> { rangein=true; } | term=<RANGEEX> ) + | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> ) + [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> ) + <RANGEIN_END> ) + [ <CARAT> boost=<NUMBER> ] + { + if (goop1.kind == RANGEIN_QUOTED) + goop1.image = goop1.image.substring(1, goop1.image.length()-1); + if (goop2.kind == RANGEIN_QUOTED) + goop2.image = goop2.image.substring(1, goop2.image.length()-1); + + q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true); + } + | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> ) + [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> ) + <RANGEEX_END> ) [ <CARAT> boost=<NUMBER> ] { - q = getRangeQuery(field, analyzer, - term.image.substring(1, term.image.length()-1), - rangein); + if (goop1.kind == RANGEEX_QUOTED) + goop1.image = goop1.image.substring(1, goop1.image.length()-1); + if (goop2.kind == RANGEEX_QUOTED) + goop2.image = goop2.image.substring(1, goop2.image.length()-1); + + q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false); } | term=<QUOTED> [ slop=<SLOP> ] 1.12 +25 -10 jakarta-lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java Index: TestQueryParser.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java,v retrieving revision 1.11 retrieving revision 1.12 diff -u -r1.11 -r1.12 --- TestQueryParser.java 6 May 2002 21:59:44 -0000 1.11 +++ TestQueryParser.java 25 Jun 2002 00:05:31 -0000 1.12 @@ -55,11 +55,14 @@ */ import java.io.*; +import java.text.*; +import java.util.*; import junit.framework.*; import org.apache.lucene.*; import org.apache.lucene.queryParser.*; import org.apache.lucene.search.*; +import org.apache.lucene.document.DateField; import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.standard.*; import org.apache.lucene.analysis.Token; @@ -235,16 +238,28 @@ } public void testRange() throws Exception { - assertQueryEquals("[ a z]", null, "[a-z]"); - assertTrue(getQuery("[ a z]", null) instanceof RangeQuery); - assertQueryEquals("[ a z ]", null, "[a-z]"); - assertQueryEquals("{ a z}", null, "{a-z}"); - assertQueryEquals("{ a z }", null, "{a-z}"); - assertQueryEquals("{ a z }^2.0", null, "{a-z}^2.0"); - assertQueryEquals("[ a z] OR bar", null, "[a-z] bar"); - assertQueryEquals("[ a z] AND bar", null, "+[a-z] +bar"); - assertQueryEquals("( bar blar { a z}) ", null, "bar blar {a-z}"); - assertQueryEquals("gack ( bar blar { a z}) ", null, "gack (bar blar {a-z})"); + assertQueryEquals("[ a TO z]", null, "[a-z]"); + assertTrue(getQuery("[ a TO z]", null) instanceof RangeQuery); + assertQueryEquals("[ a TO z ]", null, "[a-z]"); + assertQueryEquals("{ a TO z}", null, "{a-z}"); + assertQueryEquals("{ a TO z }", null, "{a-z}"); + assertQueryEquals("{ a TO z }^2.0", null, "{a-z}^2.0"); + assertQueryEquals("[ a TO z] OR bar", null, "[a-z] bar"); + assertQueryEquals("[ a TO z] AND bar", null, "+[a-z] +bar"); + assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a-z}"); + assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a-z})"); + } + + public String getDate(String s) throws Exception { + DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT); + return DateField.dateToString(df.parse(s)); + } + + public void testDateRange() throws Exception { + assertQueryEquals("[ 1/1/02 TO 1/4/02]", null, + "[" + getDate("1/1/02") + "-" + getDate("1/4/02") + "]"); + assertQueryEquals("{ 1/1/02 1/4/02 }", null, + "{" + getDate("1/1/02") + "-" + getDate("1/4/02") + "}"); } public void testEscaped() throws Exception {
-- To unsubscribe, e-mail: <mailto:[EMAIL PROTECTED]> For additional commands, e-mail: <mailto:[EMAIL PROTECTED]>