tjones 2004/05/24 15:51:42 Modified: src/java/org/apache/lucene/search FieldCacheImpl.java FieldDocSortedHitQueue.java FieldSortedHitQueue.java SortField.java src/test/org/apache/lucene/search TestSort.java Log: added a SortField which uses a Locale to sort strings. also fixed the discrepancy about what happens when a document has no terms in a sorted field. added test cases for both of the above. Revision Changes Path 1.2 +10 -3 jakarta-lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java Index: FieldCacheImpl.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- FieldCacheImpl.java 19 May 2004 23:05:27 -0000 1.1 +++ FieldCacheImpl.java 24 May 2004 22:51:42 -0000 1.2 @@ -230,11 +230,18 @@ Object ret = lookup (reader, field, STRING_INDEX); if (ret == null) { final int[] retArray = new int[reader.maxDoc()]; - String[] mterms = new String[reader.maxDoc()]; + String[] mterms = new String[reader.maxDoc()+1]; if (retArray.length > 0) { TermDocs termDocs = reader.termDocs(); TermEnum termEnum = reader.terms (new Term (field, "")); int t = 0; // current term number + + // an entry for documents that have no terms in this field + // should a document with no terms be at top or bottom? + // this puts them at the top - if it is changed, FieldDocSortedHitQueue + // needs to change as well. + mterms[t++] = null; + try { if (termEnum.term() == null) { throw new RuntimeException ("no terms in field " + field); @@ -264,7 +271,7 @@ // if there are no terms, make the term array // have a single null entry mterms = new String[1]; - } else if (t < mterms.length) { + } else if (t < mterms.length) { // if there are less terms than documents, // trim off the dead array space String[] terms = new String[t]; 1.5 +42 -4 jakarta-lucene/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java Index: FieldDocSortedHitQueue.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- FieldDocSortedHitQueue.java 19 May 2004 23:05:27 -0000 1.4 +++ FieldDocSortedHitQueue.java 24 May 2004 22:51:42 -0000 1.5 @@ -19,6 +19,8 @@ import org.apache.lucene.util.PriorityQueue; import java.io.IOException; +import java.text.Collator; +import java.util.Locale; /** * Expert: Collects sorted results from Searchable's and collates them. @@ -37,6 +39,10 @@ // have been resolved by the time this class is used. volatile SortField[] fields; + // used in the case where the fields are sorted by locale + // based strings + volatile Collator[] collators; + /** * Creates a hit queue sorted by the given list of fields. @@ -47,6 +53,7 @@ FieldDocSortedHitQueue (SortField[] fields, int size) throws IOException { this.fields = fields; + this.collators = hasCollators (fields); initialize (size); } @@ -60,7 +67,10 @@ * @param fields */ synchronized void setFields (SortField[] fields) { - if (this.fields == null) this.fields = fields; + if (this.fields == null) { + this.fields = fields; + this.collators = hasCollators (fields); + } } @@ -70,6 +80,23 @@ } + /** Returns an array of collators, possibly <code>null</code>. The collators + * correspond to any SortFields which were given a specific locale. + * @param fields Array of sort fields. + * @return Array, possibly <code>null</code>. + */ + private Collator[] hasCollators (final SortField[] fields) { + if (fields == null) return null; + Collator[] ret = new Collator[fields.length]; + for (int i=0; i<fields.length; ++i) { + Locale locale = fields[i].getLocale(); + if (locale != null) + ret[i] = Collator.getInstance (locale); + } + return ret; + } + + /** * Returns whether <code>a</code> is less relevant than <code>b</code>. * @param a ScoreDoc @@ -103,7 +130,11 @@ String s2 = (String) docB.fields[i]; if (s2 == null) c = -1; // could be null if there are else if (s1 == null) c = 1; // no terms in the given field - else c = s2.compareTo(s1); + else if (fields[i].getLocale() == null) { + c = s2.compareTo(s1); + } else { + c = collators[i].compare (s2, s1); + } break; case SortField.FLOAT: float f1 = ((Float)docA.fields[i]).floatValue(); @@ -141,9 +172,16 @@ case SortField.STRING: String s1 = (String) docA.fields[i]; String s2 = (String) docB.fields[i]; + // null values need to be sorted first, because of how FieldCache.getStringIndex() + // works - in that routine, any documents without a value in the given field are + // put first. if (s1 == null) c = -1; // could be null if there are else if (s2 == null) c = 1; // no terms in the given field - else c = s1.compareTo(s2); + else if (fields[i].getLocale() == null) { + c = s1.compareTo(s2); + } else { + c = collators[i].compare (s1, s2); + } break; case SortField.FLOAT: float f1 = ((Float)docA.fields[i]).floatValue(); 1.10 +35 -4 jakarta-lucene/src/java/org/apache/lucene/search/FieldSortedHitQueue.java Index: FieldSortedHitQueue.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/FieldSortedHitQueue.java,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- FieldSortedHitQueue.java 19 May 2004 23:05:27 -0000 1.9 +++ FieldSortedHitQueue.java 24 May 2004 22:51:42 -0000 1.10 @@ -22,6 +22,8 @@ import java.io.IOException; import java.util.WeakHashMap; import java.util.Map; +import java.util.Locale; +import java.text.Collator; /** * Expert: A hit queue for sorting by hits by terms in more than one field. @@ -52,7 +54,7 @@ this.fields = new SortField[n]; for (int i=0; i<n; ++i) { String fieldname = fields[i].getField(); - comparators[i] = getCachedComparator (reader, fieldname, fields[i].getType(), fields[i].getFactory()); + comparators[i] = getCachedComparator (reader, fieldname, fields[i].getType(), fields[i].getLocale(), fields[i].getFactory()); this.fields[i] = new SortField (fieldname, comparators[i].sortType(), fields[i].getReverse()); } initialize (size); @@ -144,7 +146,7 @@ } } - static ScoreDocComparator getCachedComparator (IndexReader reader, String fieldname, int type, SortComparatorSource factory) + static ScoreDocComparator getCachedComparator (IndexReader reader, String fieldname, int type, Locale locale, SortComparatorSource factory) throws IOException { if (type == SortField.DOC) return ScoreDocComparator.INDEXORDER; if (type == SortField.SCORE) return ScoreDocComparator.RELEVANCE; @@ -161,7 +163,8 @@ comparator = comparatorFloat (reader, fieldname); break; case SortField.STRING: - comparator = comparatorString (reader, fieldname); + if (locale != null) comparator = comparatorStringLocale (reader, fieldname, locale); + else comparator = comparatorString (reader, fieldname); break; case SortField.CUSTOM: comparator = factory.newComparator (reader, fieldname); @@ -261,6 +264,34 @@ public Comparable sortValue (final ScoreDoc i) { return index.lookup[index.order[i.doc]]; + } + + public int sortType() { + return SortField.STRING; + } + }; + } + + /** + * Returns a comparator for sorting hits according to a field containing strings. + * @param reader Index to use. + * @param fieldname Field containg string values. + * @return Comparator for sorting hits. + * @throws IOException If an error occurs reading the index. + */ + static ScoreDocComparator comparatorStringLocale (final IndexReader reader, final String fieldname, final Locale locale) + throws IOException { + final Collator collator = Collator.getInstance (locale); + final String field = fieldname.intern(); + return new ScoreDocComparator() { + final String[] index = FieldCache.DEFAULT.getStrings (reader, field); + + public final int compare (final ScoreDoc i, final ScoreDoc j) { + return collator.compare (index[i.doc], index[j.doc]); + } + + public Comparable sortValue (final ScoreDoc i) { + return index[i.doc]; } public int sortType() { 1.9 +37 -4 jakarta-lucene/src/java/org/apache/lucene/search/SortField.java Index: SortField.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/SortField.java,v retrieving revision 1.8 retrieving revision 1.9 diff -u -r1.8 -r1.9 --- SortField.java 19 May 2004 23:05:27 -0000 1.8 +++ SortField.java 24 May 2004 22:51:42 -0000 1.9 @@ -17,6 +17,7 @@ */ import java.io.Serializable; +import java.util.Locale; /** * Stores information about how to sort documents by terms in an individual @@ -66,7 +67,7 @@ // as the above static int values. Any new values must not have the same value // as FieldCache.STRING_INDEX. - + /** Represents sorting by document score (relevancy). */ public static final SortField FIELD_SCORE = new SortField (null, SCORE); @@ -76,6 +77,7 @@ private String field; private int type = AUTO; // defaults to determining type dynamically + private Locale locale; // defaults to "natural order" (no Locale) boolean reverse = false; // defaults to natural order private SortComparatorSource factory; @@ -121,6 +123,29 @@ this.reverse = reverse; } + /** Creates a sort by terms in the given field sorted + * according to the given locale. + * @param field Name of field to sort by, cannot be <code>null</code>. + * @param locale Locale of values in the field. + */ + public SortField (String field, Locale locale) { + this.field = field.intern(); + this.type = STRING; + this.locale = locale; + } + + /** Creates a sort, possibly in reverse, by terms in the given field sorted + * according to the given locale. + * @param field Name of field to sort by, cannot be <code>null</code>. + * @param locale Locale of values in the field. + */ + public SortField (String field, Locale locale, boolean reverse) { + this.field = field.intern(); + this.type = STRING; + this.locale = locale; + this.reverse = reverse; + } + /** Creates a sort with a custom comparison function. * @param field Name of field to sort by; cannot be <code>null</code>. * @param comparator Returns a comparator for sorting hits. @@ -158,6 +183,14 @@ return type; } + /** Returns the Locale by which term values are interpreted. + * May return <code>null</code> if no Locale was specified. + * @return Locale, or <code>null</code>. + */ + public Locale getLocale() { + return locale; + } + /** Returns whether the sort should be reversed. * @return True if natural order should be reversed. */ @@ -186,8 +219,8 @@ break; } - if (reverse) - buffer.append('!'); + if (locale != null) buffer.append ("("+locale+")"); + if (reverse) buffer.append('!'); return buffer.toString(); } 1.7 +65 -7 jakarta-lucene/src/test/org/apache/lucene/search/TestSort.java Index: TestSort.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/test/org/apache/lucene/search/TestSort.java,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- TestSort.java 19 May 2004 23:05:27 -0000 1.6 +++ TestSort.java 24 May 2004 22:51:42 -0000 1.7 @@ -30,6 +30,7 @@ import java.util.regex.Pattern; import java.util.HashMap; import java.util.Iterator; +import java.util.Locale; import junit.framework.TestCase; import junit.framework.Test; @@ -56,6 +57,7 @@ private Query queryX; private Query queryY; private Query queryA; + private Query queryF; private Sort sort; @@ -101,6 +103,7 @@ { "H", "y a b c d", "0", "1.4E-45", "e", "C-88" }, { "I", "x a b c d e f", "-2147483648", "1.0e+0", "d", "A-10" }, { "J", "y a b c d e f", "4", ".5", "b", "C-7" }, + { "Z", "f", null, null, null, null } }; // create an index of all the documents, or just the x, or just the y documents @@ -113,10 +116,10 @@ Document doc = new Document(); // store, index, token doc.add (new Field ("tracer", data[i][0], true, false, false)); doc.add (new Field ("contents", data[i][1], false, true, true)); - doc.add (new Field ("int", data[i][2], false, true, false)); - doc.add (new Field ("float", data[i][3], false, true, false)); - doc.add (new Field ("string", data[i][4], false, true, false)); - doc.add (new Field ("custom", data[i][5], false, true, false)); + if (data[i][2] != null) doc.add (new Field ("int", data[i][2], false, true, false)); + if (data[i][3] != null) doc.add (new Field ("float", data[i][3], false, true, false)); + if (data[i][4] != null) doc.add (new Field ("string", data[i][4], false, true, false)); + if (data[i][5] != null) doc.add (new Field ("custom", data[i][5], false, true, false)); writer.addDocument (doc); } } @@ -152,6 +155,7 @@ queryX = new TermQuery (new Term ("contents", "x")); queryY = new TermQuery (new Term ("contents", "y")); queryA = new TermQuery (new Term ("contents", "a")); + queryF = new TermQuery (new Term ("contents", "f")); sort = new Sort(); } @@ -239,6 +243,27 @@ assertMatches (full, queryY, sort, "BFHJD"); } + // test sorting when the sort field is empty (undefined) for some of the documents + public void testEmptyFieldSort() throws Exception { + sort.setSort ("string"); + assertMatches (full, queryF, sort, "ZJI"); + + sort.setSort ("string", true); + assertMatches (full, queryF, sort, "IJZ"); + + sort.setSort ("int"); + assertMatches (full, queryF, sort, "IZJ"); + + sort.setSort ("int", true); + assertMatches (full, queryF, sort, "JZI"); + + sort.setSort ("float"); + assertMatches (full, queryF, sort, "ZJI"); + + sort.setSort ("float", true); + assertMatches (full, queryF, sort, "IJZ"); + } + // test sorts using a series of fields public void testSortCombos() throws Exception { sort.setSort (new String[] {"int","float"}); @@ -251,7 +276,18 @@ assertMatches (full, queryX, sort, "GICEA"); } + // test using a Locale for sorting strings + public void testLocaleSort() throws Exception { + sort.setSort (new SortField[] { new SortField ("string", Locale.US) }); + assertMatches (full, queryX, sort, "AIGEC"); + assertMatches (full, queryY, sort, "DJHFB"); + sort.setSort (new SortField[] { new SortField ("string", Locale.US, true) }); + assertMatches (full, queryX, sort, "CEGIA"); + assertMatches (full, queryY, sort, "BFHJD"); + } + + // test a custom sort function public void testCustomSorts() throws Exception { sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource())); assertMatches (full, queryX, sort, "CAIEG"); @@ -283,6 +319,7 @@ runMultiSorts (multi); } + // test custom search when remote public void testRemoteCustomSort() throws Exception { Searchable searcher = getRemote(); MultiSearcher multi = new MultiSearcher (new Searchable[] { searcher }); @@ -438,11 +475,32 @@ sort.setSort ("string", true); assertMatches (multi, queryA, sort, "CBEFGHIAJD"); + sort.setSort (new SortField[] { new SortField ("string", Locale.US) }); + assertMatches (multi, queryA, sort, "DJAIHGFEBC"); + + sort.setSort (new SortField[] { new SortField ("string", Locale.US, true) }); + assertMatches (multi, queryA, sort, "CBEFGHIAJD"); + sort.setSort (new String[] {"int","float"}); - assertMatches (full, queryA, sort, "IDHFGJEABC"); + assertMatches (multi, queryA, sort, "IDHFGJEABC"); sort.setSort (new String[] {"float","string"}); - assertMatches (full, queryA, sort, "GDHJICEFAB"); + assertMatches (multi, queryA, sort, "GDHJICEFAB"); + + sort.setSort ("int"); + assertMatches (multi, queryF, sort, "IZJ"); + + sort.setSort ("int", true); + assertMatches (multi, queryF, sort, "JZI"); + + sort.setSort ("float"); + assertMatches (multi, queryF, sort, "ZJI"); + + sort.setSort ("string"); + assertMatches (multi, queryF, sort, "ZJI"); + + sort.setSort ("string", true); + assertMatches (multi, queryF, sort, "IJZ"); } // make sure the documents returned by the search match the expected list
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]