tjones 2004/04/22 15:23:15 Modified: src/java/org/apache/lucene/search FieldDoc.java FieldDocSortedHitQueue.java FieldSortedHitQueue.java FloatSortedHitQueue.java IntegerSortedHitQueue.java MultiFieldSortedHitQueue.java ScoreDocComparator.java ScoreDocLookupComparator.java SortField.java StringSortedHitQueue.java src/test/org/apache/lucene/search TestSort.java Added: src/java/org/apache/lucene/search SortComparatorSource.java src/test/org/apache/lucene/search SampleComparable.java Log: new SortField type for using a custom comparator Revision Changes Path 1.4 +3 -3 jakarta-lucene/src/java/org/apache/lucene/search/FieldDoc.java Index: FieldDoc.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/FieldDoc.java,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- FieldDoc.java 2 Mar 2004 14:54:55 -0000 1.3 +++ FieldDoc.java 22 Apr 2004 22:23:14 -0000 1.4 @@ -48,7 +48,7 @@ * @see Sort * @see Searchable#search(Query,Filter,int,Sort) */ - public Object[] fields; + public Comparable[] fields; /** Expert: Creates one of these objects with empty sort information. */ public FieldDoc (int doc, float score) { @@ -56,7 +56,7 @@ } /** Expert: Creates one of these objects with the given sort information. */ - public FieldDoc (int doc, float score, Object[] fields) { + public FieldDoc (int doc, float score, Comparable[] fields) { super (doc, score); this.fields = fields; } 1.3 +7 -1 jakarta-lucene/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java Index: FieldDocSortedHitQueue.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- FieldDocSortedHitQueue.java 24 Feb 2004 19:34:58 -0000 1.2 +++ FieldDocSortedHitQueue.java 22 Apr 2004 22:23:14 -0000 1.3 @@ -109,6 +109,9 @@ if (f1 > f2) c = -1; if (f1 < f2) c = 1; break; + case SortField.CUSTOM: + c = docB.fields[i].compareTo (docA.fields[i]); + break; case SortField.AUTO: // we cannot handle this - even if we determine the type of object (Float or // Integer), we don't necessarily know how to compare them (both SCORE and @@ -143,6 +146,9 @@ float f2 = ((Float)docB.fields[i]).floatValue(); if (f1 < f2) c = -1; if (f1 > f2) c = 1; + break; + case SortField.CUSTOM: + c = docA.fields[i].compareTo (docB.fields[i]); break; case SortField.AUTO: // we cannot handle this - even if we determine the type of object (Float or 1.7 +5 -4 jakarta-lucene/src/java/org/apache/lucene/search/FieldSortedHitQueue.java Index: FieldSortedHitQueue.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/FieldSortedHitQueue.java,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- FieldSortedHitQueue.java 24 Mar 2004 19:23:11 -0000 1.6 +++ FieldSortedHitQueue.java 22 Apr 2004 22:23:14 -0000 1.7 @@ -103,7 +103,7 @@ * @throws IOException If an error occurs reading the index. * @see #determineComparator */ - static ScoreDocComparator getCachedComparator (final IndexReader reader, final String field, final int type) + static ScoreDocComparator getCachedComparator (final IndexReader reader, final String field, final int type, final SortComparatorSource factory) throws IOException { if (type == SortField.DOC) return ScoreDocComparator.INDEXORDER; @@ -124,10 +124,11 @@ switch (type) { case SortField.SCORE: comparer = ScoreDocComparator.RELEVANCE; break; case SortField.DOC: comparer = ScoreDocComparator.INDEXORDER; break; + case SortField.AUTO: comparer = determineComparator (reader, field); break; + case SortField.STRING: comparer = StringSortedHitQueue.comparator (reader, field); break; case SortField.INT: comparer = IntegerSortedHitQueue.comparator (reader, field); break; case SortField.FLOAT: comparer = FloatSortedHitQueue.comparator (reader, field); break; - case SortField.STRING: comparer = StringSortedHitQueue.comparator (reader, field); break; - case SortField.AUTO: comparer = determineComparator (reader, field); break; + case SortField.CUSTOM: comparer = factory.newComparator (reader, field); break; default: throw new RuntimeException ("invalid sort field type: "+type); } 1.5 +2 -2 jakarta-lucene/src/java/org/apache/lucene/search/FloatSortedHitQueue.java Index: FloatSortedHitQueue.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/FloatSortedHitQueue.java,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- FloatSortedHitQueue.java 29 Mar 2004 18:55:30 -0000 1.4 +++ FloatSortedHitQueue.java 22 Apr 2004 22:23:14 -0000 1.5 @@ -138,7 +138,7 @@ return fieldOrder.length == n; } - public Object sortValue (final ScoreDoc i) { + public Comparable sortValue (final ScoreDoc i) { return new Float (fieldOrder[i.doc]); } 1.5 +2 -2 jakarta-lucene/src/java/org/apache/lucene/search/IntegerSortedHitQueue.java Index: IntegerSortedHitQueue.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/IntegerSortedHitQueue.java,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- IntegerSortedHitQueue.java 29 Mar 2004 18:55:30 -0000 1.4 +++ IntegerSortedHitQueue.java 22 Apr 2004 22:23:14 -0000 1.5 @@ -139,7 +139,7 @@ return fieldOrder.length == n; } - public Object sortValue (final ScoreDoc i) { + public Comparable sortValue (final ScoreDoc i) { return new Integer (fieldOrder[i.doc]); } 1.4 +3 -3 jakarta-lucene/src/java/org/apache/lucene/search/MultiFieldSortedHitQueue.java Index: MultiFieldSortedHitQueue.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/MultiFieldSortedHitQueue.java,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- MultiFieldSortedHitQueue.java 23 Mar 2004 16:49:56 -0000 1.3 +++ MultiFieldSortedHitQueue.java 22 Apr 2004 22:23:14 -0000 1.4 @@ -51,7 +51,7 @@ this.fields = new SortField[n]; for (int i=0; i<n; ++i) { String fieldname = fields[i].getField(); - comparators[i] = FieldSortedHitQueue.getCachedComparator (reader, fieldname, fields[i].getType()); + comparators[i] = FieldSortedHitQueue.getCachedComparator (reader, fieldname, fields[i].getType(), fields[i].getFactory()); this.fields[i] = new SortField (fieldname, comparators[i].sortType(), fields[i].getReverse()); } initialize (size); @@ -107,7 +107,7 @@ */ FieldDoc fillFields (final FieldDoc doc) { final int n = comparators.length; - final Object[] fields = new Object[n]; + final Comparable[] fields = new Comparable[n]; for (int i=0; i<n; ++i) fields[i] = comparators[i].sortValue(doc); doc.fields = fields; 1.3 +9 -9 jakarta-lucene/src/java/org/apache/lucene/search/ScoreDocComparator.java Index: ScoreDocComparator.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/ScoreDocComparator.java,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- ScoreDocComparator.java 27 Feb 2004 12:29:31 -0000 1.2 +++ ScoreDocComparator.java 22 Apr 2004 22:23:14 -0000 1.3 @@ -26,7 +26,7 @@ * @since lucene 1.4 * @version $Id$ */ -interface ScoreDocComparator { +public interface ScoreDocComparator { /** Special comparator for sorting hits according to computed relevance (document score). */ static final ScoreDocComparator RELEVANCE = new ScoreDocComparator() { @@ -40,7 +40,7 @@ if (i.score > j.score) return 1; return 0; } - public Object sortValue (ScoreDoc i) { + public Comparable sortValue (ScoreDoc i) { return new Float (i.score); } public int sortType() { @@ -61,7 +61,7 @@ if (i.doc < j.doc) return 1; return 0; } - public Object sortValue (ScoreDoc i) { + public Comparable sortValue (ScoreDoc i) { return new Integer (i.doc); } public int sortType() { @@ -93,13 +93,13 @@ /** - * Returns the value used to sort the given document. This is - * currently always either an Integer or Float, but could be extended - * to return any object used to sort by. + * Returns the value used to sort the given document. The + * object returned must implement the java.io.Serializable + * interface. * @param i Document - * @return Integer or Float + * @return Serializable object */ - Object sortValue (ScoreDoc i); + Comparable sortValue (ScoreDoc i); /** 1.3 +2 -2 jakarta-lucene/src/java/org/apache/lucene/search/ScoreDocLookupComparator.java Index: ScoreDocLookupComparator.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/ScoreDocLookupComparator.java,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- ScoreDocLookupComparator.java 27 Feb 2004 12:29:31 -0000 1.2 +++ ScoreDocLookupComparator.java 22 Apr 2004 22:23:14 -0000 1.3 @@ -26,7 +26,7 @@ * @since lucene 1.4 * @version $Id$ */ -interface ScoreDocLookupComparator +public interface ScoreDocLookupComparator extends ScoreDocComparator { /** 1.6 +35 -2 jakarta-lucene/src/java/org/apache/lucene/search/SortField.java Index: SortField.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/SortField.java,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- SortField.java 4 Apr 2004 15:47:31 -0000 1.5 +++ SortField.java 22 Apr 2004 22:23:14 -0000 1.6 @@ -58,6 +58,10 @@ * lower values are at the front. */ public static final int FLOAT = 5; + /** Sort using a custom Comparator. Sort values are any Comparable and + * sorting is done according to natural order. */ + public static final int CUSTOM = 9; + /** Represents sorting by document score (relevancy). */ public static final SortField FIELD_SCORE = new SortField (null, SCORE); @@ -68,7 +72,7 @@ private String field; private int type = AUTO; // defaults to determining type dynamically boolean reverse = false; // defaults to natural order - + private SortComparatorSource factory; /** Creates a sort by terms in the given field where the type of term value * is determined dynamically ([EMAIL PROTECTED] #AUTO AUTO}). @@ -112,6 +116,28 @@ this.reverse = reverse; } + /** Creates a sort with a custom comparison function. + * @param field Name of field to sort by; cannot be <code>null</code>. + * @param comparator Returns a comparator for sorting hits. + */ + public SortField (String field, SortComparatorSource comparator) { + this.field = (field != null) ? field.intern() : field; + this.type = CUSTOM; + this.factory = comparator; + } + + /** Creates a sort, possibly in reverse, with a custom comparison function. + * @param field Name of field to sort by; cannot be <code>null</code>. + * @param comparator Returns a comparator for sorting hits. + * @param reverse True if natural order should be reversed. + */ + public SortField (String field, SortComparatorSource comparator, boolean reverse) { + this.field = (field != null) ? field.intern() : field; + this.type = CUSTOM; + this.reverse = reverse; + this.factory = comparator; + } + /** Returns the name of the field. Could return <code>null</code> * if the sort is by SCORE or DOC. * @return Name of field, possibly <code>null</code>. @@ -134,6 +160,10 @@ return reverse; } + public SortComparatorSource getFactory() { + return factory; + } + public String toString() { StringBuffer buffer = new StringBuffer(); switch (type) { @@ -141,6 +171,9 @@ break; case DOC: buffer.append("<doc>"); + break; + + case CUSTOM: buffer.append ("<custom:\""+"\">"); break; default: buffer.append("\"" + field + "\""); 1.6 +2 -2 jakarta-lucene/src/java/org/apache/lucene/search/StringSortedHitQueue.java Index: StringSortedHitQueue.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/StringSortedHitQueue.java,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- StringSortedHitQueue.java 29 Mar 2004 18:55:30 -0000 1.5 +++ StringSortedHitQueue.java 22 Apr 2004 22:23:14 -0000 1.6 @@ -173,7 +173,7 @@ return fieldOrder.length == n; } - public Object sortValue (final ScoreDoc i) { + public Comparable sortValue (final ScoreDoc i) { return terms[fieldOrder[i.doc]]; } 1.1 jakarta-lucene/src/java/org/apache/lucene/search/SortComparatorSource.java Index: SortComparatorSource.java =================================================================== package org.apache.lucene.search; import org.apache.lucene.index.IndexReader; import java.io.IOException; import java.io.Serializable; /** * Expert: returns a comparator for sorting ScoreDocs. * * <p>Created: Apr 21, 2004 3:49:28 PM * * @author Tim Jones * @version $Id: SortComparatorSource.java,v 1.1 2004/04/22 22:23:14 tjones Exp $ * @since 1.4 */ public interface SortComparatorSource extends Serializable { /** * Creates a comparator for the field in the given index. * @param reader Index to create comparator for. * @param fieldname Field to create comparator for. * @return Comparator of ScoreDoc objects. * @throws IOException If an error occurs reading the index. */ ScoreDocLookupComparator newComparator (IndexReader reader, String fieldname) throws IOException; } 1.5 +34 -15 jakarta-lucene/src/test/org/apache/lucene/search/TestSort.java Index: TestSort.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/test/org/apache/lucene/search/TestSort.java,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- TestSort.java 24 Mar 2004 19:23:11 -0000 1.4 +++ TestSort.java 22 Apr 2004 22:23:15 -0000 1.5 @@ -17,8 +17,7 @@ */ import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.Term; +import org.apache.lucene.index.*; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -27,6 +26,7 @@ import java.rmi.registry.LocateRegistry; import java.rmi.registry.Registry; import java.io.IOException; +import java.io.Serializable; import java.util.regex.Pattern; import java.util.HashMap; import java.util.Iterator; @@ -47,7 +47,8 @@ */ public class TestSort -extends TestCase { +extends TestCase +implements Serializable { private Searcher full; private Searcher searchX; @@ -89,17 +90,17 @@ // the float field to sort by float // the string field to sort by string private String[][] data = new String[][] { - // tracer contents int float string - { "A", "x a", "5", "4f", "c" }, - { "B", "y a", "5", "3.4028235E38", "i" }, - { "C", "x a b c", "2147483647", "1.0", "j" }, - { "D", "y a b c", "-1", "0.0f", "a" }, - { "E", "x a b c d", "5", "2f", "h" }, - { "F", "y a b c d", "2", "3.14159f", "g" }, - { "G", "x a b c d", "3", "-1.0", "f" }, - { "H", "y a b c d", "0", "1.4E-45", "e" }, - { "I", "x a b c d e f", "-2147483648", "1.0e+0", "d" }, - { "J", "y a b c d e f", "4", ".5", "b" }, + // tracer contents int float string custom + { "A", "x a", "5", "4f", "c", "A-3" }, + { "B", "y a", "5", "3.4028235E38", "i", "B-10" }, + { "C", "x a b c", "2147483647", "1.0", "j", "A-2" }, + { "D", "y a b c", "-1", "0.0f", "a", "C-0" }, + { "E", "x a b c d", "5", "2f", "h", "B-8" }, + { "F", "y a b c d", "2", "3.14159f", "g", "B-1" }, + { "G", "x a b c d", "3", "-1.0", "f", "C-100" }, + { "H", "y a b c d", "0", "1.4E-45", "e", "C-88" }, + { "I", "x a b c d e f", "-2147483648", "1.0e+0", "d", "A-10" }, + { "J", "y a b c d e f", "4", ".5", "b", "C-7" }, }; // create an index of all the documents, or just the x, or just the y documents @@ -115,6 +116,7 @@ doc.add (new Field ("int", data[i][2], false, true, false)); doc.add (new Field ("float", data[i][3], false, true, false)); doc.add (new Field ("string", data[i][4], false, true, false)); + doc.add (new Field ("custom", data[i][5], false, true, false)); writer.addDocument (doc); } } @@ -249,6 +251,14 @@ assertMatches (full, queryX, sort, "GICEA"); } + + public void testCustomSorts() throws Exception { + sort.setSort (new SortField ("custom", SampleComparable.getComparator())); + assertMatches (full, queryX, sort, "CAIEG"); + sort.setSort (new SortField ("custom", SampleComparable.getComparator(), true)); + assertMatches (full, queryY, sort, "HJDBF"); + } + // test a variety of sorts using more than one searcher public void testMultiSort() throws Exception { MultiSearcher searcher = new MultiSearcher (new Searchable[] { searchX, searchY }); @@ -266,6 +276,15 @@ Searchable searcher = getRemote(); MultiSearcher multi = new MultiSearcher (new Searchable[] { searcher }); runMultiSorts (multi); + } + + public void testRemoteCustomSort() throws Exception { + Searchable searcher = getRemote(); + MultiSearcher multi = new MultiSearcher (new Searchable[] { searcher }); + sort.setSort (new SortField ("custom", SampleComparable.getComparator())); + assertMatches (multi, queryX, sort, "CAIEG"); + sort.setSort (new SortField ("custom", SampleComparable.getComparator(), true)); + assertMatches (multi, queryY, sort, "HJDBF"); } // test that the relevancy scores are the same even if 1.1 jakarta-lucene/src/test/org/apache/lucene/search/SampleComparable.java Index: SampleComparable.java =================================================================== package org.apache.lucene.search; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermEnum; import java.io.IOException; import java.io.Serializable; /** * An example Comparable for use with the custom sort tests. * It implements a comparable for "id" sort of values which * consist of an alphanumeric part and a numeric part, such as: * <p/> * <P>ABC-123, A-1, A-7, A-100, B-99999 * <p/> * <p>Such values cannot be sorted as strings, since A-100 needs * to come after A-7. * <p/> * <p>It could be argued that the "ids" should be rewritten as * A-0001, A-0100, etc. so they will sort as strings. That is * a valid alternate way to solve it - but * this is only supposed to be a simple test case. * <p/> * <p>Created: Apr 21, 2004 5:34:47 PM * * @author Tim Jones * @version $Id: SampleComparable.java,v 1.1 2004/04/22 22:23:15 tjones Exp $ * @since 1.4 */ public class SampleComparable implements Comparable, Serializable { String string_part; Integer int_part; public SampleComparable (String s) { int i = s.indexOf ("-"); string_part = s.substring (0, i); int_part = new Integer (s.substring (i + 1)); } public int compareTo (Object o) { SampleComparable otherid = (SampleComparable) o; int i = string_part.compareTo (otherid.string_part); if (i == 0) return int_part.compareTo (otherid.int_part); return i; } public static SortComparatorSource getComparator () { return new SortComparatorSource () { public ScoreDocLookupComparator newComparator (final IndexReader reader, String fieldname) throws IOException { final String field = fieldname.intern (); final TermEnum enumerator = reader.terms (new Term (fieldname, "")); try { return new ScoreDocLookupComparator () { protected Comparable[] cachedValues = fillCache (reader, enumerator, field); public boolean sizeMatches (int n) { return (cachedValues.length == n); } public int compare (ScoreDoc i, ScoreDoc j) { return cachedValues[i.doc].compareTo (cachedValues[j.doc]); } public int compareReverse (ScoreDoc i, ScoreDoc j) { return cachedValues[j.doc].compareTo (cachedValues[i.doc]); } public Comparable sortValue (ScoreDoc i) { return cachedValues[i.doc]; } public int sortType () { return SortField.CUSTOM; } }; } finally { enumerator.close (); } } /** * Returns an array of objects which represent that natural order * of the term values in the given field. * * @param reader Terms are in this index. * @param enumerator Use this to get the term values and TermDocs. * @param fieldname Comparables should be for this field. * @return Array of objects representing natural order of terms in field. * @throws IOException If an error occurs reading the index. */ protected Comparable[] fillCache (IndexReader reader, TermEnum enumerator, String fieldname) throws IOException { final String field = fieldname.intern (); Comparable[] retArray = new Comparable[reader.maxDoc ()]; if (retArray.length > 0) { TermDocs termDocs = reader.termDocs (); try { if (enumerator.term () == null) { throw new RuntimeException ("no terms in field " + field); } do { Term term = enumerator.term (); if (term.field () != field) break; Comparable termval = getComparable (term.text ()); termDocs.seek (enumerator); while (termDocs.next ()) { retArray[termDocs.doc ()] = termval; } } while (enumerator.next ()); } finally { termDocs.close (); } } return retArray; } Comparable getComparable (String termtext) { return new SampleComparable (termtext); } }; } }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]