tjones      2004/03/23 08:49:56

  Modified:    src/java/org/apache/lucene/search
                        MultiFieldSortedHitQueue.java
               src/test/org/apache/lucene/search TestSort.java
  Log:
  fix to properly normalize scores even when hits are sorted
  also wrote tests to verify scores are the same whether sorted or not
  
  Revision  Changes    Path
  1.3       +14 -1     
jakarta-lucene/src/java/org/apache/lucene/search/MultiFieldSortedHitQueue.java
  
  Index: MultiFieldSortedHitQueue.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/MultiFieldSortedHitQueue.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- MultiFieldSortedHitQueue.java     24 Feb 2004 19:34:58 -0000      1.2
  +++ MultiFieldSortedHitQueue.java     23 Mar 2004 16:49:56 -0000      1.3
  @@ -64,6 +64,11 @@
        /** Stores the sort criteria being used. */
        protected SortField[] fields;
   
  +     /** Stores the maximum score value encountered, for normalizing.
  +      *  we only care about scores greater than 1.0 - if all the scores
  +      *  are less than 1.0, we don't have to normalize. */
  +     protected float maxscore = 1.0f;
  +
   
        /**
         * Returns whether <code>a</code> is less relevant than <code>b</code>.
  @@ -74,6 +79,12 @@
        protected final boolean lessThan (final Object a, final Object b) {
                final ScoreDoc docA = (ScoreDoc) a;
                final ScoreDoc docB = (ScoreDoc) b;
  +
  +             // keep track of maximum score
  +             if (docA.score > maxscore) maxscore = docA.score;
  +             if (docB.score > maxscore) maxscore = docB.score;
  +
  +             // run comparators
                final int n = comparators.length;
                int c = 0;
                for (int i=0; i<n && c==0; ++i) {
  @@ -100,6 +111,7 @@
                for (int i=0; i<n; ++i)
                        fields[i] = comparators[i].sortValue(doc);
                doc.fields = fields;
  +             if (maxscore > 1.0f) doc.score /= maxscore;   // normalize scores
                return doc;
        }
   
  @@ -108,4 +120,5 @@
        SortField[] getFields() {
                return fields;
        }
  +
   }
  
  
  
  1.3       +136 -1    jakarta-lucene/src/test/org/apache/lucene/search/TestSort.java
  
  Index: TestSort.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/test/org/apache/lucene/search/TestSort.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- TestSort.java     23 Mar 2004 15:59:49 -0000      1.2
  +++ TestSort.java     23 Mar 2004 16:49:56 -0000      1.3
  @@ -28,6 +28,8 @@
   import java.rmi.registry.Registry;
   import java.io.IOException;
   import java.util.regex.Pattern;
  +import java.util.HashMap;
  +import java.util.Iterator;
   
   import junit.framework.TestCase;
   import junit.framework.Test;
  @@ -241,6 +243,115 @@
                runMultiSorts (multi);
        }
   
  +     // test that the relevancy scores are the same even if
  +     // hits are sorted
  +     public void testNormalizedScores() throws Exception {
  +
  +             // capture relevancy scores
  +             HashMap scoresX = getScores (full.search (queryX));
  +             HashMap scoresY = getScores (full.search (queryY));
  +             HashMap scoresA = getScores (full.search (queryA));
  +
  +             // we'll test searching locally, remote and multi
  +             // note: the multi test depends on each separate index containing
  +             // the same documents as our local index, so the computed normalization
  +             // will be the same.  so we make a multi searcher over two equal 
document
  +             // sets - not realistic, but necessary for testing.
  +             MultiSearcher remote = new MultiSearcher (new Searchable[] { 
getRemote() });
  +             MultiSearcher multi  = new MultiSearcher (new Searchable[] { full, 
full });
  +
  +             // change sorting and make sure relevancy stays the same
  +
  +             sort = new Sort();
  +             assertSameValues (scoresX, getScores(full.search(queryX,sort)));
  +             assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
  +             assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
  +             assertSameValues (scoresY, getScores(full.search(queryY,sort)));
  +             assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
  +             assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
  +             assertSameValues (scoresA, getScores(full.search(queryA,sort)));
  +             assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
  +             assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
  +
  +             sort.setSort(SortField.FIELD_DOC);
  +             assertSameValues (scoresX, getScores(full.search(queryX,sort)));
  +             assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
  +             assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
  +             assertSameValues (scoresY, getScores(full.search(queryY,sort)));
  +             assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
  +             assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
  +             assertSameValues (scoresA, getScores(full.search(queryA,sort)));
  +             assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
  +             assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
  +
  +             sort.setSort ("int");
  +             assertSameValues (scoresX, getScores(full.search(queryX,sort)));
  +             assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
  +             assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
  +             assertSameValues (scoresY, getScores(full.search(queryY,sort)));
  +             assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
  +             assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
  +             assertSameValues (scoresA, getScores(full.search(queryA,sort)));
  +             assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
  +             assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
  +
  +             sort.setSort ("float");
  +             assertSameValues (scoresX, getScores(full.search(queryX,sort)));
  +             assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
  +             assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
  +             assertSameValues (scoresY, getScores(full.search(queryY,sort)));
  +             assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
  +             assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
  +             assertSameValues (scoresA, getScores(full.search(queryA,sort)));
  +             assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
  +             assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
  +
  +             sort.setSort ("string");
  +             assertSameValues (scoresX, getScores(full.search(queryX,sort)));
  +             assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
  +             assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
  +             assertSameValues (scoresY, getScores(full.search(queryY,sort)));
  +             assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
  +             assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
  +             assertSameValues (scoresA, getScores(full.search(queryA,sort)));
  +             assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
  +             assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
  +
  +             sort.setSort (new String[] {"int","float"});
  +             assertSameValues (scoresX, getScores(full.search(queryX,sort)));
  +             assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
  +             assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
  +             assertSameValues (scoresY, getScores(full.search(queryY,sort)));
  +             assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
  +             assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
  +             assertSameValues (scoresA, getScores(full.search(queryA,sort)));
  +             assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
  +             assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
  +
  +             sort.setSort (new SortField[] { new SortField ("int", true), new 
SortField (null, SortField.DOC, true) });
  +             assertSameValues (scoresX, getScores(full.search(queryX,sort)));
  +             assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
  +             assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
  +             assertSameValues (scoresY, getScores(full.search(queryY,sort)));
  +             assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
  +             assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
  +             assertSameValues (scoresA, getScores(full.search(queryA,sort)));
  +             assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
  +             assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
  +
  +             sort.setSort (new String[] {"float","string"});
  +             assertSameValues (scoresX, getScores(full.search(queryX,sort)));
  +             assertSameValues (scoresX, getScores(remote.search(queryX,sort)));
  +             assertSameValues (scoresX, getScores(multi.search(queryX,sort)));
  +             assertSameValues (scoresY, getScores(full.search(queryY,sort)));
  +             assertSameValues (scoresY, getScores(remote.search(queryY,sort)));
  +             assertSameValues (scoresY, getScores(multi.search(queryY,sort)));
  +             assertSameValues (scoresA, getScores(full.search(queryA,sort)));
  +             assertSameValues (scoresA, getScores(remote.search(queryA,sort)));
  +             assertSameValues (scoresA, getScores(multi.search(queryA,sort)));
  +
  +     }
  +
        // runs a variety of sorts useful for multisearchers
        private void runMultiSorts (Searcher multi) throws Exception {
                sort.setSort (SortField.FIELD_DOC);
  @@ -313,6 +424,30 @@
                assertTrue 
(Pattern.compile(pattern).matcher(buff.toString()).matches());
        }
   
  +     private HashMap getScores (Hits hits)
  +     throws IOException {
  +             HashMap scoreMap = new HashMap();
  +             int n = hits.length();
  +             for (int i=0; i<n; ++i) {
  +                     Document doc = hits.doc(i);
  +                     String[] v = doc.getValues("tracer");
  +                     assertEquals (v.length, 1);
  +                     scoreMap.put (v[0], new Float(hits.score(i)));
  +             }
  +             return scoreMap;
  +     }
  +
  +     // make sure all the values in the maps match
  +     private void assertSameValues (HashMap m1, HashMap m2) {
  +             int n = m1.size();
  +             int m = m2.size();
  +             assertEquals (n, m);
  +             Iterator iter = m1.keySet().iterator();
  +             while (iter.hasNext()) {
  +                     Object key = iter.next();
  +                     assertEquals (m1.get(key), m2.get(key));
  +             }
  +     }
   
        private Searchable getRemote () throws Exception {
                try {
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to