oops - forgot to include the comments on the SampleComparable below to explain what it is all about.
--
I just committed some code which adds a new SortField type where the comparison function can be specified.
I also wrote another class which I have not committed - it is very convenient, but has a heavy memory footprint.
Attached below is the class, and example of its use. What do you guys think of it?
Tim
--
========= the class =========
package org.apache.lucene.search;
import org.apache.lucene.index.*; import java.io.IOException;
/** * Abstract base class for sorting hits returned by a Query. * * <p>This class should only be used if the other SortField * types (SCORE, DOC, STRING, INT, FLOAT) do not provide an * adequate sorting. It maintains an internal cache of values which * could be quite large. The cache is an array of Comparable, * one for each document in the index. There is a distinct * Comparable for each unique term in the field - if * some documents have the same term in the field, the cache * array will have entries which reference the same Comparable. */ public abstract class SortComparator implements SortComparatorSource {
// inherit javadocs public ScoreDocLookupComparator newComparator (final IndexReader reader, String fieldname) throws IOException { final String field = fieldname.intern(); final TermEnum enumerator = reader.terms (new Term (fieldname, "")); try { return new ScoreDocLookupComparator() { protected Comparable[] cachedValues = fillCache (reader, enumerator, field);
public boolean sizeMatches (int n) { return (cachedValues.length == n); }
public int compare (ScoreDoc i, ScoreDoc j) { return cachedValues[i.doc].compareTo (cachedValues[j.doc]); }
public int compareReverse (ScoreDoc i, ScoreDoc j) { return cachedValues[j.doc].compareTo (cachedValues[i.doc]); }
public Comparable sortValue (ScoreDoc i) { return cachedValues[i.doc]; }
public int sortType(){ return SortField.CUSTOM; } }; } finally { enumerator.close(); } }
/** * Returns an array of objects which represent that natural order * of the term values in the given field. * @param reader Terms are in this index. * @param enumerator Use this to get the term values and TermDocs. * @param fieldname Comparables should be for this field. * @return Array of objects representing natural order of terms in field. * @throws IOException If an error occurs reading the index. */ protected Comparable[] fillCache (IndexReader reader, TermEnum enumerator, String fieldname) throws IOException { final String field = fieldname.intern(); Comparable[] retArray = new Comparable[reader.maxDoc()]; if (retArray.length > 0) { TermDocs termDocs = reader.termDocs(); try { if (enumerator.term() == null) { throw new RuntimeException ("no terms in field " + field); } do { Term term = enumerator.term(); if (term.field() != field) break; Comparable termval = getComparable (term.text()); termDocs.seek (enumerator); while (termDocs.next()) { retArray[termDocs.doc()] = termval; } } while (enumerator.next()); } finally { termDocs.close(); } } return retArray; }
/** * Returns an object which, when sorted according to natural order, * will order the Term values in the correct order. * <p>For example, if the Terms contained integer values, this method * would return <code>new Integer(termtext)</code>. Note that this * might not always be the most efficient implementation - for this * particular example, a better implementation might be to make a * ScoreDocLookupComparator that uses an internal lookup table of int. * @param termtext The textual value of the term. * @return An object representing <code>termtext</code> that sorts according to the natural order of <code>termtext</code>. * @see Comparable * @see ScoreDocLookupComparator */ protected abstract Comparable getComparable (String termtext);
}
==================== How it would be used ==================== ... new SortField ("idfield", new SortComparator() { protected Comparable getComparable (String termtext) { return new SampleComparable (termtext); } }); ...
/** * An example Comparable for use with the custom sort tests. * It implements a comparable for "id" sort of values which * consist of an alphanumeric part and a numeric part, such as: * <p/> * <P>ABC-123, A-1, A-7, A-100, B-99999 * <p/> * <p>Such values cannot be sorted as strings, since A-100 needs * to come after A-7. * <p/> * <p>It could be argued that the "ids" should be rewritten as * A-0001, A-0100, etc. so they will sort as strings. That is * a valid alternate way to solve it - but * this is only supposed to be a simple test case. */ public class SampleComparable implements Comparable, Serializable {
String string_part; Integer int_part;
public SampleComparable (String s) { int i = s.indexOf ("-"); string_part = s.substring (0, i); int_part = new Integer (s.substring (i + 1)); }
public int compareTo (Object o) { SampleComparable otherid = (SampleComparable) o; int i = string_part.compareTo (otherid.string_part); if (i == 0) return int_part.compareTo (otherid.int_part); return i; } }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]