Dear developers,

public TopDocs search(Query query, Filter filter, int nDocs)
contains an
else break; 
which discards previous interim results. 

Since I expect to need in the order of 100 best results from
20 databases on a regular basis I don't really like this.

This is the current code:

    for (int i = 0; i < searchables.length; i++) { // search each searcher
      TopDocs docs = searchables[i].search(query, filter, nDocs);
      totalHits += docs.totalHits;                // update totalHits
      ScoreDoc[] scoreDocs = docs.scoreDocs;
      for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq
        ScoreDoc scoreDoc = scoreDocs[j];
        if (scoreDoc.score >= minScore) {
          scoreDoc.doc += starts[i];              // convert doc
          hq.put(scoreDoc);                       // update hit queue
          if (hq.size() > nDocs) {                // if hit queue overfull
            hq.pop();                             // remove lowest in hit queue
            minScore = ((ScoreDoc)hq.top()).score; // reset minScore
          }
        } else
          break;                                  // no more scores > minScore
      }
    }


Attached is an untested patch for this. It works by implementing
a MultiCollector that has the state to collect results from
the subsearchers without discarding interim results.
The patch is a dif -c against current CVS.

I'd like to add some test cases, but before I do that
I'd prefer to have comments.

I checked the testcases for MultiSearcher, but they don't
seem to exercise the code in the patch.
The existing test-unit build runs fine with the patch.

Regards,
Ype
Index: jakarta-lucene/src/java/org/apache/lucene/search/MultiSearcher.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/MultiSearcher.java,v
retrieving revision 1.10
diff -c -r1.10 MultiSearcher.java
*** jakarta-lucene/src/java/org/apache/lucene/search/MultiSearcher.java	29 Jan 2003 17:18:54 -0000	1.10
--- jakarta-lucene/src/java/org/apache/lucene/search/MultiSearcher.java	3 Feb 2003 22:43:35 -0000
***************
*** 141,175 ****
      return maxDoc;
    }
  
    public TopDocs search(Query query, Filter filter, int nDocs)
        throws IOException {
!     HitQueue hq = new HitQueue(nDocs);
!     float minScore = 0.0f;
!     int totalHits = 0;
! 
!     for (int i = 0; i < searchables.length; i++) { // search each searcher
!       TopDocs docs = searchables[i].search(query, filter, nDocs);
!       totalHits += docs.totalHits;		  // update totalHits
!       ScoreDoc[] scoreDocs = docs.scoreDocs;
!       for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq
! 	ScoreDoc scoreDoc = scoreDocs[j];
! 	if (scoreDoc.score >= minScore) {
! 	  scoreDoc.doc += starts[i];		  // convert doc
! 	  hq.put(scoreDoc);			  // update hit queue
! 	  if (hq.size() > nDocs) {		  // if hit queue overfull
! 	    hq.pop();				  // remove lowest in hit queue
! 	    minScore = ((ScoreDoc)hq.top()).score; // reset minScore
  	  }
! 	} else
! 	  break;				  // no more scores > minScore
        }
      }
  
      ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
!     for (int i = hq.size()-1; i >= 0; i--)	  // put docs in array
        scoreDocs[i] = (ScoreDoc)hq.pop();
  
!     return new TopDocs(totalHits, scoreDocs);
    }
  
  
--- 141,198 ----
      return maxDoc;
    }
  
+ 
    public TopDocs search(Query query, Filter filter, int nDocs)
        throws IOException {
! 
!     class MultiCollector extends HitCollector {
!       HitQueue hq;
!       int nDocs = 0;
!       int totalHits = 0;
!       int start = 0;
!       float minScore = 0.0f;
!       ScoreDoc scoreDoc = null; /* reuse last one discarded from hitqueue hq */
! 
!       public MultiCollector(int nd) {
!         nDocs = nd;
! 	hq = new HitQueue(nd);
!       }
! 
!       public void collect(int doc, float score) {
!         totalHits++;
!         System.out.println(getClass() + " hits: " + totalHits + ", start: " + start
!                                + ", docNr: " + doc + ", score: " + score);
!         if (score >= minScore) {
! 	  if (scoreDoc == null) {
! 	    scoreDoc = new ScoreDoc(doc + start, score);
! 	  } else {
! 	    scoreDoc.doc = doc + start;
! 	    scoreDoc.score = score;
! 	  }
!           hq.put(scoreDoc);
! 	  if (hq.size() > nDocs) {
! 	    scoreDoc = (ScoreDoc) hq.pop();
! 	    minScore = ((ScoreDoc)hq.top()).score;
! 	  } else {
! 	    scoreDoc = null;
  	  }
! 	}
        }
      }
  
+     MultiCollector mc = new MultiCollector(nDocs);
+ 
+     for (int i = 0; i < searchables.length; i++) {
+       mc.start = starts[i];
+       searchables[i].search(query, filter, mc);
+     }
+ 
+     HitQueue hq = mc.hq;
      ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
!     for (int i = hq.size()-1; i >= 0; i--)
        scoreDocs[i] = (ScoreDoc)hq.pop();
  
!     return new TopDocs(mc.totalHits, scoreDocs);
    }
  
  
***************
*** 201,207 ****
  
      }
    }
!   
    public Query rewrite(Query original) throws IOException {
      Query[] queries = new Query[searchables.length];
      for (int i = 0; i < searchables.length; i++) {
--- 224,230 ----
  
      }
    }
! 
    public Query rewrite(Query original) throws IOException {
      Query[] queries = new Query[searchables.length];
      for (int i = 0; i < searchables.length; i++) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to