search StringSortedHitQueue.java

cutting Tue, 24 Feb 2004 12:41:10 -0800

cutting     2004/02/24 12:41:16

  Modified:    src/java/org/apache/lucene/search StringSortedHitQueue.java
  Log:
  Fixed problem with sorting.
  
  Revision  Changes    Path
  1.3       +62 -14    
jakarta-lucene/src/java/org/apache/lucene/search/StringSortedHitQueue.java
  
  Index: StringSortedHitQueue.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/StringSortedHitQueue.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- StringSortedHitQueue.java 24 Feb 2004 19:34:58 -0000      1.2
  +++ StringSortedHitQueue.java 24 Feb 2004 20:41:16 -0000      1.3
  @@ -26,8 +26,9 @@
   /**
    * Expert: A sorted hit queue for fields that contain string values.
    * Hits are sorted into the queue by the values in the field and then by document 
number.
  - * The internal cache contains integers - the strings are sorted and
  - * then only their sequence number cached.
  + * Warning: The internal cache could be quite large, depending on the number of 
terms
  + * in the field!  All the terms are kept in memory, as well as a sorted array of
  + * integers representing their relative position.
    *
    * <p>Created: Feb 2, 2004 9:26:33 AM
    *
  @@ -68,21 +69,24 @@
        /**
         * Returns a comparator for sorting hits according to a field containing 
strings.
         * @param reader  Index to use.
  -      * @param field  Field containg string values.
  +      * @param fieldname  Field containg string values.
         * @return  Comparator for sorting hits.
         * @throws IOException If an error occurs reading the index.
         */
  -     static ScoreDocLookupComparator comparator (final IndexReader reader, final 
String field)
  +     static ScoreDocLookupComparator comparator (final IndexReader reader, final 
String fieldname)
        throws IOException {
  +             final String field = fieldname.intern();
                return new ScoreDocLookupComparator() {
   
                        /** The sort information being used by this instance */
                        protected final int[] fieldOrder = generateSortIndex();
  +                     protected String[] terms;
   
                        private final int[] generateSortIndex()
                        throws IOException {
   
                                final int[] retArray = new int[reader.maxDoc()];
  +                             final String[] mterms = new String[reader.maxDoc()];   
// guess length
   
                                TermEnum enumerator = reader.terms (new Term (field, 
""));
                                TermDocs termDocs = reader.termDocs();
  @@ -98,22 +102,43 @@
                                // following loop will automatically sort the
                                // terms in the correct order.
   
  +                             // if a given document has more than one term
  +                             // in the field, only the last one will be used.
  +
  +                             int t = 0;  // current term number
                                try {
  -                                     int t = 0;  // current term number
                                        do {
                                                Term term = enumerator.term();
                                                if (term.field() != field) break;
  -                                             t++;
  +
  +                                             // store term text
  +                                             // we expect that there is at most one 
term per document
  +                                             if (t >= mterms.length) throw new 
RuntimeException ("there are more terms than documents in field \""+field+"\"");
  +                                             mterms[t] = term.text();
  +
  +                                             // store which documents use this term
                                                termDocs.seek (enumerator);
                                                while (termDocs.next()) {
                                                        retArray[termDocs.doc()] = t;
                                                }
  +
  +                                             t++;
                                        } while (enumerator.next());
  +
                                } finally {
                                        enumerator.close();
                                        termDocs.close();
                                }
   
  +                             // if there are less terms than documents,
  +                             // trim off the dead array space
  +                             if (t < mterms.length) {
  +                                     terms = new String[t];
  +                                     System.arraycopy (mterms, 0, terms, 0, t);
  +                             } else {
  +                                     terms = mterms;
  +                             }
  +
                                return retArray;
                        }
   
  @@ -138,11 +163,11 @@
                        }
   
                        public Object sortValue (final ScoreDoc i) {
  -                             return new Integer(fieldOrder[i.doc]);
  +                             return terms[fieldOrder[i.doc]];
                        }
   
                        public int sortType() {
  -                             return SortField.INT;
  +                             return SortField.STRING;
                        }
                };
        }
  @@ -152,20 +177,23 @@
         * Returns a comparator for sorting hits according to a field containing 
strings using the given enumerator
         * to collect term values.
         * @param reader  Index to use.
  -      * @param field  Field containg string values.
  +      * @param fieldname  Field containg string values.
         * @return  Comparator for sorting hits.
         * @throws IOException If an error occurs reading the index.
         */
  -     static ScoreDocLookupComparator comparator (final IndexReader reader, final 
TermEnum enumerator, final String field)
  +     static ScoreDocLookupComparator comparator (final IndexReader reader, final 
TermEnum enumerator, final String fieldname)
        throws IOException {
  +             final String field = fieldname.intern();
                return new ScoreDocLookupComparator() {
   
                        protected final int[] fieldOrder = generateSortIndex();
  +                     protected String[] terms;
   
                        private final int[] generateSortIndex()
                        throws IOException {
   
                                final int[] retArray = new int[reader.maxDoc()];
  +                             final String[] mterms = new String[reader.maxDoc()];  
// guess length
   
                                // NOTE: the contract for TermEnum says the
                                // terms will be in natural order (which is
  @@ -175,22 +203,42 @@
                                // following loop will automatically sort the
                                // terms in the correct order.
   
  +                             // if a given document has more than one term
  +                             // in the field, only the last one will be used.
  +
                                TermDocs termDocs = reader.termDocs();
  +                             int t = 0;  // current term number
                                try {
  -                                     int t = 0;  // current term number
                                        do {
                                                Term term = enumerator.term();
                                                if (term.field() != field) break;
  -                                             t++;
  +
  +                                             // store term text
  +                                             // we expect that there is at most one 
term per document
  +                                             if (t >= mterms.length) throw new 
RuntimeException ("there are more terms than documents in field \""+field+"\"");
  +                                             mterms[t] = term.text();
  +
  +                                             // store which documents use this term
                                                termDocs.seek (enumerator);
                                                while (termDocs.next()) {
                                                        retArray[termDocs.doc()] = t;
                                                }
  +
  +                                             t++;
                                        } while (enumerator.next());
                                } finally {
                                        termDocs.close();
                                }
   
  +                             // if there are less terms than documents,
  +                             // trim off the dead array space
  +                             if (t < mterms.length) {
  +                                     terms = new String[t];
  +                                     System.arraycopy (mterms, 0, terms, 0, t);
  +                             } else {
  +                                     terms = mterms;
  +                             }
  +
                                return retArray;
                        }
   
  @@ -215,11 +263,11 @@
                        }
   
                        public Object sortValue (final ScoreDoc i) {
  -                             return new Integer(fieldOrder[i.doc]);
  +                             return terms[fieldOrder[i.doc]];
                        }
   
                        public int sortType() {
  -                             return SortField.INT;
  +                             return SortField.STRING;
                        }
                };
        }


---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

cvs commit: jakarta-lucene/src/java/org/apache/lucene/search StringSortedHitQueue.java

Reply via email to