Hello,
the method search(Query, Filter, int) in IndexSearcher makes use of a
Priority Queue to temporarily store hits before transfering them into an
array. This results in n X (put + pop) or a complexity of about 2 n log (n).
Using a treeset to store hits and an iterator to transfer them to an array
allows for an n (log (n) + 1) complexity. Moreover, there is no need for the
HitQueue class anymore, the parameter nDoc is no more required and possibly
a very efficient Sorted Set implementation can be plugged in later on (or is
TreeSet already fast enough?). Because search is often called, I thought
that this would be worthwile.
All required modifications follow.
KR,
Jeff Halleux
----
Index: HitQueue.java
===================================================================
RCS file: HitQueue.java
diff -N HitQueue.java
--- HitQueue.java 18 Sep 2001 16:29:56 -0000 1.1.1.1
+++ /dev/null 1 Jan 1970 00:00:00 -0000
@@ -1,72 +0,0 @@
-package org.apache.lucene.search;
-
-/* ====================================================================
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 2001 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Apache" and "Apache Software Foundation" and
- * "Apache Lucene" must not be used to endorse or promote products
- * derived from this software without prior written permission. For
- * written permission, please contact [EMAIL PROTECTED]
- *
- * 5. Products derived from this software may not be called "Apache",
- * "Apache Lucene", nor may "Apache" appear in their name, without
- * prior written permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation. For more
- * information on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
-
-import org.apache.lucene.util.PriorityQueue;
-
-final class HitQueue extends PriorityQueue {
- HitQueue(int size) {
- initialize(size);
- }
-
- protected final boolean lessThan(Object a, Object b) {
- ScoreDoc hitA = (ScoreDoc)a;
- ScoreDoc hitB = (ScoreDoc)b;
- if (hitA.score == hitB.score)
- return hitA.doc > hitB.doc;
- else
- return hitA.score < hitB.score;
- }
-}
Index: IndexSearcher.java
===================================================================
RCS file:
/home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/IndexSearch
er.java,v
retrieving revision 1.11
diff -u -r1.11 IndexSearcher.java
--- IndexSearcher.java 16 Sep 2003 20:06:32 -0000 1.11
+++ IndexSearcher.java 14 Jan 2004 16:33:49 -0000
@@ -56,11 +56,14 @@
import java.io.IOException;
import java.util.BitSet;
+import java.util.Iterator;
+import java.util.SortedSet;
+import java.util.TreeSet;
-import org.apache.lucene.store.Directory;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
+import org.apache.lucene.store.Directory;
/** Implements search over a single IndexReader.
*
@@ -130,23 +133,27 @@
return new TopDocs(0, new ScoreDoc[0]);
final BitSet bits = filter != null ? filter.bits(reader) : null;
- final HitQueue hq = new HitQueue(nDocs);
- final int[] totalHits = new int[1];
+ final SortedSet ss = new TreeSet();
+
scorer.score(new HitCollector() {
public final void collect(int doc, float score) {
if (score > 0.0f && // ignore zeroed buckets
(bits==null || bits.get(doc))) { // skip docs not in bits
- totalHits[0]++;
- hq.insert(new ScoreDoc(doc, score));
+ ss.add(new ScoreDoc(doc, score));
}
}
}, reader.maxDoc());
- ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
- for (int i = hq.size()-1; i >= 0; i--) // put docs in array
- scoreDocs[i] = (ScoreDoc)hq.pop();
-
- return new TopDocs(totalHits[0], scoreDocs);
+ ScoreDoc[] scoreDocs = new ScoreDoc[ss.size()];
+
+ Iterator it=ss.iterator();
+
+ int i=0;
+ while (it.hasNext()) {
+ scoreDocs[i++]=(ScoreDoc)it.next();
+ }
+
+ return new TopDocs(ss.size(), scoreDocs);
}
/** Lower-level search API.
Index: ScoreDoc.java
===================================================================
RCS file:
/home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/ScoreDoc.ja
va,v
retrieving revision 1.3
diff -u -r1.3 ScoreDoc.java
--- ScoreDoc.java 17 Jul 2002 21:54:38 -0000 1.3
+++ ScoreDoc.java 14 Jan 2004 16:33:51 -0000
@@ -56,18 +56,28 @@
/** Expert: Returned by low-level search implementations.
* @see TopDocs */
-public class ScoreDoc implements java.io.Serializable {
- /** Expert: The score of this document for the query. */
- public float score;
+public class ScoreDoc implements java.io.Serializable, Comparable {
+ /** Expert: The score of this document for the query. */
+ public float score;
- /** Expert: A hit document's number.
- * @see Searcher#doc(int)
- */
- public int doc;
+ /** Expert: A hit document's number.
+ * @see Searcher#doc(int)
+ */
+ public int doc;
+
+ /** Expert: Constructs a ScoreDoc. */
+ public ScoreDoc(int doc, float score) {
+ this.doc = doc;
+ this.score = score;
+ }
+
+ public int compareTo(Object o) {
+ ScoreDoc other = (ScoreDoc) o;
+
+ if (score < other.score) return 1;
+ if (score > other.score) return -1;
+ if (doc > other.doc) return 1;
+ return -1;
+ }
- /** Expert: Constructs a ScoreDoc. */
- public ScoreDoc(int doc, float score) {
- this.doc = doc;
- this.score = score;
- }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]