Hi Doug,
I think this change is not correct in the FieldDoc case. In case of sort, score is not necessarily the criterion for the decision whether a FieldDoc is inserted into the queue or not!
Christoph
[EMAIL PROTECTED] wrote:
cutting 2004/09/22 10:03:00
Modified: src/java/org/apache/lucene/search IndexSearcher.java
Log:
Do not construct a ScoreDoc for every non-zero hit, but only for those
in the current top scoring set. This makes a substantial performance
improvement for queries that match lots of documents.
Revision Changes Path
1.20 +26 -18 jakarta-lucene/src/java/org/apache/lucene/search/IndexSearcher.java
Index: IndexSearcher.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/IndexSearcher.java,v
retrieving revision 1.19
retrieving revision 1.20
diff -u -r1.19 -r1.20
--- IndexSearcher.java 19 May 2004 23:05:27 -0000 1.19
+++ IndexSearcher.java 22 Sep 2004 17:03:00 -0000 1.20
@@ -90,17 +90,21 @@
final HitQueue hq = new HitQueue(nDocs);
final int[] totalHits = new int[1];
scorer.score(new HitCollector() {
- public final void collect(int doc, float score) {
- if (score > 0.0f && // ignore zeroed buckets
- (bits==null || bits.get(doc))) { // skip docs not in bits
- totalHits[0]++;
- hq.insert(new ScoreDoc(doc, score));
- }
- }
+ private float minScore = 0.0f;
+ public final void collect(int doc, float score) {
+ if (score > 0.0f && // ignore zeroed buckets
+ (bits==null || bits.get(doc))) { // skip docs not in bits
+ totalHits[0]++;
+ if (hq.size() < nDocs || score >= minScore) {
+ hq.insert(new ScoreDoc(doc, score));
+ minScore = ((ScoreDoc)hq.top()).score; // maintain minScore
+ }
+ }
+ }
});
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
- for (int i = hq.size()-1; i >= 0; i--) // put docs in array
+ for (int i = hq.size()-1; i >= 0; i--) // put docs in array
scoreDocs[i] = (ScoreDoc)hq.pop();
return new TopDocs(totalHits[0], scoreDocs);
@@ -119,17 +123,21 @@
new FieldSortedHitQueue(reader, sort.fields, nDocs);
final int[] totalHits = new int[1];
scorer.score(new HitCollector() {
+ private float minScore = 0.0f;
public final void collect(int doc, float score) {
- if (score > 0.0f && // ignore zeroed buckets
- (bits==null || bits.get(doc))) { // skip docs not in bits
+ if (score > 0.0f && // ignore zeroed buckets
+ (bits==null || bits.get(doc))) { // skip docs not in bits
totalHits[0]++;
- hq.insert(new FieldDoc(doc, score));
+ if (hq.size() < nDocs || score >= minScore) {
+ hq.insert(new FieldDoc(doc, score));
+ minScore = ((FieldDoc)hq.top()).score; // maintain minScore
+ }
}
}
});
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
- for (int i = hq.size()-1; i >= 0; i--) // put docs in array
+ for (int i = hq.size()-1; i >= 0; i--) // put docs in array
scoreDocs[i] = hq.fillFields ((FieldDoc) hq.pop());
return new TopFieldDocs(totalHits[0], scoreDocs, hq.getFields());
@@ -143,12 +151,12 @@
if (filter != null) {
final BitSet bits = filter.bits(reader);
collector = new HitCollector() {
- public final void collect(int doc, float score) {
- if (bits.get(doc)) { // skip docs not in bits
- results.collect(doc, score);
- }
- }
- };
+ public final void collect(int doc, float score) {
+ if (bits.get(doc)) { // skip docs not in bits
+ results.collect(doc, score);
+ }
+ }
+ };
}
Scorer scorer = query.weight(this).scorer(reader);
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]
-- ************************************************************* * Dr. Christoph Goller Tel. : +49 89 203 45734 * * Geschäftsführer Email: [EMAIL PROTECTED] * * Detego Software GmbH Mail : Keuslinstr. 13, * * 80798 München, Germany * *************************************************************
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]