tjones      2004/05/24 15:51:42

  Modified:    src/java/org/apache/lucene/search FieldCacheImpl.java
                        FieldDocSortedHitQueue.java
                        FieldSortedHitQueue.java SortField.java
               src/test/org/apache/lucene/search TestSort.java
  Log:
  added a SortField which uses a Locale to sort strings.
  also fixed the discrepancy about what happens when a document has no terms in a 
sorted field.
  added test cases for both of the above.
  
  Revision  Changes    Path
  1.2       +10 -3     
jakarta-lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java
  
  Index: FieldCacheImpl.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- FieldCacheImpl.java       19 May 2004 23:05:27 -0000      1.1
  +++ FieldCacheImpl.java       24 May 2004 22:51:42 -0000      1.2
  @@ -230,11 +230,18 @@
       Object ret = lookup (reader, field, STRING_INDEX);
       if (ret == null) {
         final int[] retArray = new int[reader.maxDoc()];
  -      String[] mterms = new String[reader.maxDoc()];
  +      String[] mterms = new String[reader.maxDoc()+1];
         if (retArray.length > 0) {
           TermDocs termDocs = reader.termDocs();
           TermEnum termEnum = reader.terms (new Term (field, ""));
           int t = 0;  // current term number
  +
  +        // an entry for documents that have no terms in this field
  +        // should a document with no terms be at top or bottom?
  +        // this puts them at the top - if it is changed, FieldDocSortedHitQueue
  +        // needs to change as well.
  +        mterms[t++] = null;
  +
           try {
             if (termEnum.term() == null) {
               throw new RuntimeException ("no terms in field " + field);
  @@ -264,7 +271,7 @@
             // if there are no terms, make the term array
             // have a single null entry
             mterms = new String[1];
  -             } else if (t < mterms.length) {
  +        } else if (t < mterms.length) {
             // if there are less terms than documents,
             // trim off the dead array space
             String[] terms = new String[t];
  
  
  
  1.5       +42 -4     
jakarta-lucene/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java
  
  Index: FieldDocSortedHitQueue.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- FieldDocSortedHitQueue.java       19 May 2004 23:05:27 -0000      1.4
  +++ FieldDocSortedHitQueue.java       24 May 2004 22:51:42 -0000      1.5
  @@ -19,6 +19,8 @@
   import org.apache.lucene.util.PriorityQueue;
   
   import java.io.IOException;
  +import java.text.Collator;
  +import java.util.Locale;
   
   /**
    * Expert: Collects sorted results from Searchable's and collates them.
  @@ -37,6 +39,10 @@
        // have been resolved by the time this class is used.
        volatile SortField[] fields;
   
  +     // used in the case where the fields are sorted by locale
  +     // based strings
  +     volatile Collator[] collators;
  +
   
        /**
         * Creates a hit queue sorted by the given list of fields.
  @@ -47,6 +53,7 @@
        FieldDocSortedHitQueue (SortField[] fields, int size)
        throws IOException {
                this.fields = fields;
  +             this.collators = hasCollators (fields);
                initialize (size);
        }
   
  @@ -60,7 +67,10 @@
         * @param fields
         */
        synchronized void setFields (SortField[] fields) {
  -             if (this.fields == null) this.fields = fields;
  +             if (this.fields == null) {
  +                     this.fields = fields;
  +                     this.collators = hasCollators (fields);
  +             }
        }
   
   
  @@ -70,6 +80,23 @@
        }
   
   
  +     /** Returns an array of collators, possibly <code>null</code>.  The collators
  +      * correspond to any SortFields which were given a specific locale.
  +      * @param fields Array of sort fields.
  +      * @return Array, possibly <code>null</code>.
  +      */
  +     private Collator[] hasCollators (final SortField[] fields) {
  +             if (fields == null) return null;
  +             Collator[] ret = new Collator[fields.length];
  +             for (int i=0; i<fields.length; ++i) {
  +                     Locale locale = fields[i].getLocale();
  +                     if (locale != null)
  +                             ret[i] = Collator.getInstance (locale);
  +             }
  +             return ret;
  +     }
  +
  +
        /**
         * Returns whether <code>a</code> is less relevant than <code>b</code>.
         * @param a ScoreDoc
  @@ -103,7 +130,11 @@
                                                String s2 = (String) docB.fields[i];
                                                if (s2 == null) c = -1;      // could 
be null if there are
                                                else if (s1 == null) c = 1;  // no 
terms in the given field
  -                                             else c = s2.compareTo(s1);
  +                                             else if (fields[i].getLocale() == 
null) {
  +                                                     c = s2.compareTo(s1);
  +                                             } else {
  +                                                     c = collators[i].compare (s2, 
s1);
  +                                             }
                                                break;
                                        case SortField.FLOAT:
                                                float f1 = 
((Float)docA.fields[i]).floatValue();
  @@ -141,9 +172,16 @@
                                        case SortField.STRING:
                                                String s1 = (String) docA.fields[i];
                                                String s2 = (String) docB.fields[i];
  +                                             // null values need to be sorted 
first, because of how FieldCache.getStringIndex()
  +                                             // works - in that routine, any 
documents without a value in the given field are
  +                                             // put first.
                                                if (s1 == null) c = -1;      // could 
be null if there are
                                                else if (s2 == null) c = 1;  // no 
terms in the given field
  -                                             else c = s1.compareTo(s2);
  +                                             else if (fields[i].getLocale() == 
null) {
  +                                                     c = s1.compareTo(s2);
  +                                             } else {
  +                                                     c = collators[i].compare (s1, 
s2);
  +                                             }
                                                break;
                                        case SortField.FLOAT:
                                                float f1 = 
((Float)docA.fields[i]).floatValue();
  
  
  
  1.10      +35 -4     
jakarta-lucene/src/java/org/apache/lucene/search/FieldSortedHitQueue.java
  
  Index: FieldSortedHitQueue.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/FieldSortedHitQueue.java,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- FieldSortedHitQueue.java  19 May 2004 23:05:27 -0000      1.9
  +++ FieldSortedHitQueue.java  24 May 2004 22:51:42 -0000      1.10
  @@ -22,6 +22,8 @@
   import java.io.IOException;
   import java.util.WeakHashMap;
   import java.util.Map;
  +import java.util.Locale;
  +import java.text.Collator;
   
   /**
    * Expert: A hit queue for sorting by hits by terms in more than one field.
  @@ -52,7 +54,7 @@
       this.fields = new SortField[n];
       for (int i=0; i<n; ++i) {
         String fieldname = fields[i].getField();
  -      comparators[i] = getCachedComparator (reader, fieldname, fields[i].getType(), 
fields[i].getFactory());
  +      comparators[i] = getCachedComparator (reader, fieldname, fields[i].getType(), 
fields[i].getLocale(), fields[i].getFactory());
         this.fields[i] = new SortField (fieldname, comparators[i].sortType(), 
fields[i].getReverse());
       }
       initialize (size);
  @@ -144,7 +146,7 @@
       }
     }
   
  -  static ScoreDocComparator getCachedComparator (IndexReader reader, String 
fieldname, int type, SortComparatorSource factory)
  +  static ScoreDocComparator getCachedComparator (IndexReader reader, String 
fieldname, int type, Locale locale, SortComparatorSource factory)
     throws IOException {
       if (type == SortField.DOC) return ScoreDocComparator.INDEXORDER;
       if (type == SortField.SCORE) return ScoreDocComparator.RELEVANCE;
  @@ -161,7 +163,8 @@
             comparator = comparatorFloat (reader, fieldname);
             break;
           case SortField.STRING:
  -          comparator = comparatorString (reader, fieldname);
  +          if (locale != null) comparator = comparatorStringLocale (reader, 
fieldname, locale);
  +          else comparator = comparatorString (reader, fieldname);
             break;
           case SortField.CUSTOM:
             comparator = factory.newComparator (reader, fieldname);
  @@ -261,6 +264,34 @@
   
         public Comparable sortValue (final ScoreDoc i) {
           return index.lookup[index.order[i.doc]];
  +      }
  +
  +      public int sortType() {
  +        return SortField.STRING;
  +      }
  +    };
  +  }
  +
  +  /**
  +   * Returns a comparator for sorting hits according to a field containing strings.
  +   * @param reader  Index to use.
  +   * @param fieldname  Field containg string values.
  +   * @return  Comparator for sorting hits.
  +   * @throws IOException If an error occurs reading the index.
  +   */
  +  static ScoreDocComparator comparatorStringLocale (final IndexReader reader, final 
String fieldname, final Locale locale)
  +  throws IOException {
  +    final Collator collator = Collator.getInstance (locale);
  +    final String field = fieldname.intern();
  +    return new ScoreDocComparator() {
  +      final String[] index = FieldCache.DEFAULT.getStrings (reader, field);
  +
  +      public final int compare (final ScoreDoc i, final ScoreDoc j) {
  +        return collator.compare (index[i.doc], index[j.doc]);
  +      }
  +
  +      public Comparable sortValue (final ScoreDoc i) {
  +        return index[i.doc];
         }
   
         public int sortType() {
  
  
  
  1.9       +37 -4     jakarta-lucene/src/java/org/apache/lucene/search/SortField.java
  
  Index: SortField.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/SortField.java,v
  retrieving revision 1.8
  retrieving revision 1.9
  diff -u -r1.8 -r1.9
  --- SortField.java    19 May 2004 23:05:27 -0000      1.8
  +++ SortField.java    24 May 2004 22:51:42 -0000      1.9
  @@ -17,6 +17,7 @@
    */
   
   import java.io.Serializable;
  +import java.util.Locale;
   
   /**
    * Stores information about how to sort documents by terms in an individual
  @@ -66,7 +67,7 @@
     // as the above static int values.  Any new values must not have the same value
     // as FieldCache.STRING_INDEX.
   
  -     
  +
     /** Represents sorting by document score (relevancy). */
     public static final SortField FIELD_SCORE = new SortField (null, SCORE);
   
  @@ -76,6 +77,7 @@
   
     private String field;
     private int type = AUTO;  // defaults to determining type dynamically
  +  private Locale locale;    // defaults to "natural order" (no Locale)
     boolean reverse = false;  // defaults to natural order
     private SortComparatorSource factory;
   
  @@ -121,6 +123,29 @@
       this.reverse = reverse;
     }
   
  +  /** Creates a sort by terms in the given field sorted
  +   * according to the given locale.
  +   * @param field  Name of field to sort by, cannot be <code>null</code>.
  +   * @param locale Locale of values in the field.
  +   */
  +  public SortField (String field, Locale locale) {
  +    this.field = field.intern();
  +    this.type = STRING;
  +    this.locale = locale;
  +  }
  +
  +  /** Creates a sort, possibly in reverse, by terms in the given field sorted
  +   * according to the given locale.
  +   * @param field  Name of field to sort by, cannot be <code>null</code>.
  +   * @param locale Locale of values in the field.
  +   */
  +  public SortField (String field, Locale locale, boolean reverse) {
  +    this.field = field.intern();
  +    this.type = STRING;
  +    this.locale = locale;
  +    this.reverse = reverse;
  +  }
  +
     /** Creates a sort with a custom comparison function.
      * @param field Name of field to sort by; cannot be <code>null</code>.
      * @param comparator Returns a comparator for sorting hits.
  @@ -158,6 +183,14 @@
       return type;
     }
   
  +  /** Returns the Locale by which term values are interpreted.
  +   * May return <code>null</code> if no Locale was specified.
  +   * @return Locale, or <code>null</code>.
  +   */
  +  public Locale getLocale() {
  +    return locale;
  +  }
  +
     /** Returns whether the sort should be reversed.
      * @return  True if natural order should be reversed.
      */
  @@ -186,8 +219,8 @@
                  break;
       }
   
  -    if (reverse)
  -      buffer.append('!');
  +    if (locale != null) buffer.append ("("+locale+")");
  +    if (reverse) buffer.append('!');
   
       return buffer.toString();
     }
  
  
  
  1.7       +65 -7     jakarta-lucene/src/test/org/apache/lucene/search/TestSort.java
  
  Index: TestSort.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/test/org/apache/lucene/search/TestSort.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- TestSort.java     19 May 2004 23:05:27 -0000      1.6
  +++ TestSort.java     24 May 2004 22:51:42 -0000      1.7
  @@ -30,6 +30,7 @@
   import java.util.regex.Pattern;
   import java.util.HashMap;
   import java.util.Iterator;
  +import java.util.Locale;
   
   import junit.framework.TestCase;
   import junit.framework.Test;
  @@ -56,6 +57,7 @@
        private Query queryX;
        private Query queryY;
        private Query queryA;
  +     private Query queryF;
        private Sort sort;
   
   
  @@ -101,6 +103,7 @@
        {   "H",   "y a b c d",     "0",           "1.4E-45",      "e",     "C-88"  },
        {   "I",   "x a b c d e f", "-2147483648", "1.0e+0",       "d",     "A-10"  },
        {   "J",   "y a b c d e f", "4",           ".5",           "b",     "C-7"   },
  +     {   "Z",   "f",             null,          null,           null,    null    }
        };
   
        // create an index of all the documents, or just the x, or just the y documents
  @@ -113,10 +116,10 @@
                                Document doc = new Document();          // store, 
index, token
                                doc.add (new Field ("tracer",   data[i][0], true, 
false, false));
                                doc.add (new Field ("contents", data[i][1], false, 
true, true));
  -                             doc.add (new Field ("int",      data[i][2], false, 
true, false));
  -                             doc.add (new Field ("float",    data[i][3], false, 
true, false));
  -                             doc.add (new Field ("string",   data[i][4], false, 
true, false));
  -                             doc.add (new Field ("custom",   data[i][5], false, 
true, false));
  +                             if (data[i][2] != null) doc.add (new Field ("int",     
 data[i][2], false, true, false));
  +                             if (data[i][3] != null) doc.add (new Field ("float",   
 data[i][3], false, true, false));
  +                             if (data[i][4] != null) doc.add (new Field ("string",  
 data[i][4], false, true, false));
  +                             if (data[i][5] != null) doc.add (new Field ("custom",  
 data[i][5], false, true, false));
                                writer.addDocument (doc);
                        }
                }
  @@ -152,6 +155,7 @@
                queryX = new TermQuery (new Term ("contents", "x"));
                queryY = new TermQuery (new Term ("contents", "y"));
                queryA = new TermQuery (new Term ("contents", "a"));
  +             queryF = new TermQuery (new Term ("contents", "f"));
                sort = new Sort();
        }
   
  @@ -239,6 +243,27 @@
                assertMatches (full, queryY, sort, "BFHJD");
        }
   
  +     // test sorting when the sort field is empty (undefined) for some of the 
documents
  +     public void testEmptyFieldSort() throws Exception {
  +             sort.setSort ("string");
  +             assertMatches (full, queryF, sort, "ZJI");
  +
  +             sort.setSort ("string", true);
  +             assertMatches (full, queryF, sort, "IJZ");
  +
  +             sort.setSort ("int");
  +             assertMatches (full, queryF, sort, "IZJ");
  +
  +             sort.setSort ("int", true);
  +             assertMatches (full, queryF, sort, "JZI");
  +
  +             sort.setSort ("float");
  +             assertMatches (full, queryF, sort, "ZJI");
  +
  +             sort.setSort ("float", true);
  +             assertMatches (full, queryF, sort, "IJZ");
  +     }
  +
        // test sorts using a series of fields
        public void testSortCombos() throws Exception {
                sort.setSort (new String[] {"int","float"});
  @@ -251,7 +276,18 @@
                assertMatches (full, queryX, sort, "GICEA");
        }
   
  +     // test using a Locale for sorting strings
  +     public void testLocaleSort() throws Exception {
  +             sort.setSort (new SortField[] { new SortField ("string", Locale.US) });
  +             assertMatches (full, queryX, sort, "AIGEC");
  +             assertMatches (full, queryY, sort, "DJHFB");
   
  +             sort.setSort (new SortField[] { new SortField ("string", Locale.US, 
true) });
  +             assertMatches (full, queryX, sort, "CEGIA");
  +             assertMatches (full, queryY, sort, "BFHJD");
  +     }
  +
  +     // test a custom sort function
        public void testCustomSorts() throws Exception {
                sort.setSort (new SortField ("custom", 
SampleComparable.getComparatorSource()));
                assertMatches (full, queryX, sort, "CAIEG");
  @@ -283,6 +319,7 @@
                runMultiSorts (multi);
        }
   
  +     // test custom search when remote
        public void testRemoteCustomSort() throws Exception {
                Searchable searcher = getRemote();
                MultiSearcher multi = new MultiSearcher (new Searchable[] { searcher 
});
  @@ -438,11 +475,32 @@
                sort.setSort ("string", true);
                assertMatches (multi, queryA, sort, "CBEFGHIAJD");
   
  +             sort.setSort (new SortField[] { new SortField ("string", Locale.US) });
  +             assertMatches (multi, queryA, sort, "DJAIHGFEBC");
  +
  +             sort.setSort (new SortField[] { new SortField ("string", Locale.US, 
true) });
  +             assertMatches (multi, queryA, sort, "CBEFGHIAJD");
  +
                sort.setSort (new String[] {"int","float"});
  -             assertMatches (full, queryA, sort, "IDHFGJEABC");
  +             assertMatches (multi, queryA, sort, "IDHFGJEABC");
   
                sort.setSort (new String[] {"float","string"});
  -             assertMatches (full, queryA, sort, "GDHJICEFAB");
  +             assertMatches (multi, queryA, sort, "GDHJICEFAB");
  +
  +             sort.setSort ("int");
  +             assertMatches (multi, queryF, sort, "IZJ");
  +
  +             sort.setSort ("int", true);
  +             assertMatches (multi, queryF, sort, "JZI");
  +
  +             sort.setSort ("float");
  +             assertMatches (multi, queryF, sort, "ZJI");
  +
  +             sort.setSort ("string");
  +             assertMatches (multi, queryF, sort, "ZJI");
  +
  +             sort.setSort ("string", true);
  +             assertMatches (multi, queryF, sort, "IJZ");
        }
   
        // make sure the documents returned by the search match the expected list
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to