tjones 2004/03/23 08:49:56 Modified: src/java/org/apache/lucene/search MultiFieldSortedHitQueue.java src/test/org/apache/lucene/search TestSort.java Log: fix to properly normalize scores even when hits are sorted also wrote tests to verify scores are the same whether sorted or not Revision Changes Path 1.3 +14 -1 jakarta-lucene/src/java/org/apache/lucene/search/MultiFieldSortedHitQueue.java Index: MultiFieldSortedHitQueue.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/MultiFieldSortedHitQueue.java,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- MultiFieldSortedHitQueue.java 24 Feb 2004 19:34:58 -0000 1.2 +++ MultiFieldSortedHitQueue.java 23 Mar 2004 16:49:56 -0000 1.3 @@ -64,6 +64,11 @@ /** Stores the sort criteria being used. */ protected SortField[] fields; + /** Stores the maximum score value encountered, for normalizing. + * we only care about scores greater than 1.0 - if all the scores + * are less than 1.0, we don't have to normalize. */ + protected float maxscore = 1.0f; + /** * Returns whether <code>a</code> is less relevant than <code>b</code>. @@ -74,6 +79,12 @@ protected final boolean lessThan (final Object a, final Object b) { final ScoreDoc docA = (ScoreDoc) a; final ScoreDoc docB = (ScoreDoc) b; + + // keep track of maximum score + if (docA.score > maxscore) maxscore = docA.score; + if (docB.score > maxscore) maxscore = docB.score; + + // run comparators final int n = comparators.length; int c = 0; for (int i=0; i<n && c==0; ++i) { @@ -100,6 +111,7 @@ for (int i=0; i<n; ++i) fields[i] = comparators[i].sortValue(doc); doc.fields = fields; + if (maxscore > 1.0f) doc.score /= maxscore; // normalize scores return doc; } @@ -108,4 +120,5 @@ SortField[] getFields() { return fields; } + } 1.3 +136 -1 jakarta-lucene/src/test/org/apache/lucene/search/TestSort.java Index: TestSort.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/test/org/apache/lucene/search/TestSort.java,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- TestSort.java 23 Mar 2004 15:59:49 -0000 1.2 +++ TestSort.java 23 Mar 2004 16:49:56 -0000 1.3 @@ -28,6 +28,8 @@ import java.rmi.registry.Registry; import java.io.IOException; import java.util.regex.Pattern; +import java.util.HashMap; +import java.util.Iterator; import junit.framework.TestCase; import junit.framework.Test; @@ -241,6 +243,115 @@ runMultiSorts (multi); } + // test that the relevancy scores are the same even if + // hits are sorted + public void testNormalizedScores() throws Exception { + + // capture relevancy scores + HashMap scoresX = getScores (full.search (queryX)); + HashMap scoresY = getScores (full.search (queryY)); + HashMap scoresA = getScores (full.search (queryA)); + + // we'll test searching locally, remote and multi + // note: the multi test depends on each separate index containing + // the same documents as our local index, so the computed normalization + // will be the same. so we make a multi searcher over two equal document + // sets - not realistic, but necessary for testing. + MultiSearcher remote = new MultiSearcher (new Searchable[] { getRemote() }); + MultiSearcher multi = new MultiSearcher (new Searchable[] { full, full }); + + // change sorting and make sure relevancy stays the same + + sort = new Sort(); + assertSameValues (scoresX, getScores(full.search(queryX,sort))); + assertSameValues (scoresX, getScores(remote.search(queryX,sort))); + assertSameValues (scoresX, getScores(multi.search(queryX,sort))); + assertSameValues (scoresY, getScores(full.search(queryY,sort))); + assertSameValues (scoresY, getScores(remote.search(queryY,sort))); + assertSameValues (scoresY, getScores(multi.search(queryY,sort))); + assertSameValues (scoresA, getScores(full.search(queryA,sort))); + assertSameValues (scoresA, getScores(remote.search(queryA,sort))); + assertSameValues (scoresA, getScores(multi.search(queryA,sort))); + + sort.setSort(SortField.FIELD_DOC); + assertSameValues (scoresX, getScores(full.search(queryX,sort))); + assertSameValues (scoresX, getScores(remote.search(queryX,sort))); + assertSameValues (scoresX, getScores(multi.search(queryX,sort))); + assertSameValues (scoresY, getScores(full.search(queryY,sort))); + assertSameValues (scoresY, getScores(remote.search(queryY,sort))); + assertSameValues (scoresY, getScores(multi.search(queryY,sort))); + assertSameValues (scoresA, getScores(full.search(queryA,sort))); + assertSameValues (scoresA, getScores(remote.search(queryA,sort))); + assertSameValues (scoresA, getScores(multi.search(queryA,sort))); + + sort.setSort ("int"); + assertSameValues (scoresX, getScores(full.search(queryX,sort))); + assertSameValues (scoresX, getScores(remote.search(queryX,sort))); + assertSameValues (scoresX, getScores(multi.search(queryX,sort))); + assertSameValues (scoresY, getScores(full.search(queryY,sort))); + assertSameValues (scoresY, getScores(remote.search(queryY,sort))); + assertSameValues (scoresY, getScores(multi.search(queryY,sort))); + assertSameValues (scoresA, getScores(full.search(queryA,sort))); + assertSameValues (scoresA, getScores(remote.search(queryA,sort))); + assertSameValues (scoresA, getScores(multi.search(queryA,sort))); + + sort.setSort ("float"); + assertSameValues (scoresX, getScores(full.search(queryX,sort))); + assertSameValues (scoresX, getScores(remote.search(queryX,sort))); + assertSameValues (scoresX, getScores(multi.search(queryX,sort))); + assertSameValues (scoresY, getScores(full.search(queryY,sort))); + assertSameValues (scoresY, getScores(remote.search(queryY,sort))); + assertSameValues (scoresY, getScores(multi.search(queryY,sort))); + assertSameValues (scoresA, getScores(full.search(queryA,sort))); + assertSameValues (scoresA, getScores(remote.search(queryA,sort))); + assertSameValues (scoresA, getScores(multi.search(queryA,sort))); + + sort.setSort ("string"); + assertSameValues (scoresX, getScores(full.search(queryX,sort))); + assertSameValues (scoresX, getScores(remote.search(queryX,sort))); + assertSameValues (scoresX, getScores(multi.search(queryX,sort))); + assertSameValues (scoresY, getScores(full.search(queryY,sort))); + assertSameValues (scoresY, getScores(remote.search(queryY,sort))); + assertSameValues (scoresY, getScores(multi.search(queryY,sort))); + assertSameValues (scoresA, getScores(full.search(queryA,sort))); + assertSameValues (scoresA, getScores(remote.search(queryA,sort))); + assertSameValues (scoresA, getScores(multi.search(queryA,sort))); + + sort.setSort (new String[] {"int","float"}); + assertSameValues (scoresX, getScores(full.search(queryX,sort))); + assertSameValues (scoresX, getScores(remote.search(queryX,sort))); + assertSameValues (scoresX, getScores(multi.search(queryX,sort))); + assertSameValues (scoresY, getScores(full.search(queryY,sort))); + assertSameValues (scoresY, getScores(remote.search(queryY,sort))); + assertSameValues (scoresY, getScores(multi.search(queryY,sort))); + assertSameValues (scoresA, getScores(full.search(queryA,sort))); + assertSameValues (scoresA, getScores(remote.search(queryA,sort))); + assertSameValues (scoresA, getScores(multi.search(queryA,sort))); + + sort.setSort (new SortField[] { new SortField ("int", true), new SortField (null, SortField.DOC, true) }); + assertSameValues (scoresX, getScores(full.search(queryX,sort))); + assertSameValues (scoresX, getScores(remote.search(queryX,sort))); + assertSameValues (scoresX, getScores(multi.search(queryX,sort))); + assertSameValues (scoresY, getScores(full.search(queryY,sort))); + assertSameValues (scoresY, getScores(remote.search(queryY,sort))); + assertSameValues (scoresY, getScores(multi.search(queryY,sort))); + assertSameValues (scoresA, getScores(full.search(queryA,sort))); + assertSameValues (scoresA, getScores(remote.search(queryA,sort))); + assertSameValues (scoresA, getScores(multi.search(queryA,sort))); + + sort.setSort (new String[] {"float","string"}); + assertSameValues (scoresX, getScores(full.search(queryX,sort))); + assertSameValues (scoresX, getScores(remote.search(queryX,sort))); + assertSameValues (scoresX, getScores(multi.search(queryX,sort))); + assertSameValues (scoresY, getScores(full.search(queryY,sort))); + assertSameValues (scoresY, getScores(remote.search(queryY,sort))); + assertSameValues (scoresY, getScores(multi.search(queryY,sort))); + assertSameValues (scoresA, getScores(full.search(queryA,sort))); + assertSameValues (scoresA, getScores(remote.search(queryA,sort))); + assertSameValues (scoresA, getScores(multi.search(queryA,sort))); + + } + // runs a variety of sorts useful for multisearchers private void runMultiSorts (Searcher multi) throws Exception { sort.setSort (SortField.FIELD_DOC); @@ -313,6 +424,30 @@ assertTrue (Pattern.compile(pattern).matcher(buff.toString()).matches()); } + private HashMap getScores (Hits hits) + throws IOException { + HashMap scoreMap = new HashMap(); + int n = hits.length(); + for (int i=0; i<n; ++i) { + Document doc = hits.doc(i); + String[] v = doc.getValues("tracer"); + assertEquals (v.length, 1); + scoreMap.put (v[0], new Float(hits.score(i))); + } + return scoreMap; + } + + // make sure all the values in the maps match + private void assertSameValues (HashMap m1, HashMap m2) { + int n = m1.size(); + int m = m2.size(); + assertEquals (n, m); + Iterator iter = m1.keySet().iterator(); + while (iter.hasNext()) { + Object key = iter.next(); + assertEquals (m1.get(key), m2.get(key)); + } + } private Searchable getRemote () throws Exception { try {
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]