[GitHub] [lucene-solr] jpountz commented on a change in pull request #1567: LUCENE-9402: Let MultiCollector handle minCompetitiveScore
jpountz commented on a change in pull request #1567: URL: https://github.com/apache/lucene-solr/pull/1567#discussion_r440628129 ## File path: lucene/core/src/test/org/apache/lucene/search/MultiCollectorTest.java ## @@ -163,4 +163,115 @@ public void testCacheScoresIfNecessary() throws IOException { reader.close(); dir.close(); } + + public void testScorerWrappingForTopScores() throws IOException { +Directory dir = newDirectory(); +RandomIndexWriter iw = new RandomIndexWriter(random(), dir); +iw.addDocument(new Document()); +DirectoryReader reader = iw.getReader(); +iw.close(); +final LeafReaderContext ctx = reader.leaves().get(0); +Collector c1 = collector(ScoreMode.TOP_SCORES, MultiCollector.MinCompetitiveScoreAwareScorable.class); +Collector c2 = collector(ScoreMode.TOP_SCORES, MultiCollector.MinCompetitiveScoreAwareScorable.class); +MultiCollector.wrap(c1, c2).getLeafCollector(ctx).setScorer(new ScoreAndDoc()); + +c1 = collector(ScoreMode.TOP_SCORES, ScoreCachingWrappingScorer.class); +c2 = collector(ScoreMode.COMPLETE, ScoreCachingWrappingScorer.class); +MultiCollector.wrap(c1, c2).getLeafCollector(ctx).setScorer(new ScoreAndDoc()); + +reader.close(); +dir.close(); + } + + public void testMinCompetitiveScore() throws IOException { +float[] currentMinScores = new float[3]; +float[] minCompetitiveScore = new float[1]; +Scorable scorer = new Scorable() { + + @Override + public float score() throws IOException { +return 0; + } + + @Override + public int docID() { +return 0; + } + + @Override + public void setMinCompetitiveScore(float minScore) throws IOException { +minCompetitiveScore[0] = minScore; + } +}; +Scorable s0 = new MultiCollector.MinCompetitiveScoreAwareScorable(scorer, 0, currentMinScores); +Scorable s1 = new MultiCollector.MinCompetitiveScoreAwareScorable(scorer, 1, currentMinScores); +Scorable s2 = new MultiCollector.MinCompetitiveScoreAwareScorable(scorer, 2, currentMinScores); +assertEquals(0f, minCompetitiveScore[0], 0); +s0.setMinCompetitiveScore(0.5f); +assertEquals(0f, minCompetitiveScore[0], 0); +s1.setMinCompetitiveScore(0.8f); +assertEquals(0f, minCompetitiveScore[0], 0); +s2.setMinCompetitiveScore(0.3f); +assertEquals(0.3f, minCompetitiveScore[0], 0); +s2.setMinCompetitiveScore(0.1f); +assertEquals(0.3f, minCompetitiveScore[0], 0); +s1.setMinCompetitiveScore(Float.MAX_VALUE); +assertEquals(0.3f, minCompetitiveScore[0], 0); +s2.setMinCompetitiveScore(Float.MAX_VALUE); +assertEquals(0.5f, minCompetitiveScore[0], 0); +s0.setMinCompetitiveScore(Float.MAX_VALUE); +assertEquals(Float.MAX_VALUE, minCompetitiveScore[0], 0); + } + + public void testCollectionTermination() throws IOException { +Directory dir = newDirectory(); +RandomIndexWriter iw = new RandomIndexWriter(random(), dir); +iw.addDocument(new Document()); +DirectoryReader reader = iw.getReader(); +iw.close(); +final LeafReaderContext ctx = reader.leaves().get(0); +DummyCollector c1 = new DummyCollector() { + @Override + public void collect(int doc) throws IOException { +if (doc == 1) { + throw new CollectionTerminatedException(); +} +super.collect(doc); + } + +}; + +DummyCollector c2 = new DummyCollector() { + @Override + public void collect(int doc) throws IOException { +if (doc == 2) { + throw new CollectionTerminatedException(); +} +super.collect(doc); + } + +}; + +Collector mc = MultiCollector.wrap(c1, c2); +LeafCollector lc = mc.getLeafCollector(ctx); +lc.setScorer(new ScoreAndDoc()); +lc.collect(0); // OK +assertTrue("c1's collect should be called", c1.collectCalled); +assertTrue("c2's collect should be called", c2.collectCalled); +c1.collectCalled = false; +c2.collectCalled = false; +lc.collect(1); // OK, but c1 should terminate Review comment: maybe create a new variant of this test that calls setScorer again after this collect call? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] jpountz commented on a change in pull request #1567: LUCENE-9402: Let MultiCollector handle minCompetitiveScore
jpountz commented on a change in pull request #1567: URL: https://github.com/apache/lucene-solr/pull/1567#discussion_r440242303 ## File path: lucene/core/src/java/org/apache/lucene/search/MultiCollector.java ## @@ -134,69 +134,110 @@ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOExcept case 1: return leafCollectors.get(0); default: -return new MultiLeafCollector(leafCollectors, cacheScores); +return new MultiLeafCollector(leafCollectors, cacheScores, scoreMode() == ScoreMode.TOP_SCORES); } } private static class MultiLeafCollector implements LeafCollector { private final boolean cacheScores; private final LeafCollector[] collectors; -private int numCollectors; +private final float[] minScores; +private final boolean skipNonCompetitiveScores; -private MultiLeafCollector(List collectors, boolean cacheScores) { +private MultiLeafCollector(List collectors, boolean cacheScores, boolean skipNonCompetitive) { this.collectors = collectors.toArray(new LeafCollector[collectors.size()]); this.cacheScores = cacheScores; - this.numCollectors = this.collectors.length; + this.skipNonCompetitiveScores = skipNonCompetitive; + this.minScores = this.skipNonCompetitiveScores ? new float[this.collectors.length] : null; } @Override public void setScorer(Scorable scorer) throws IOException { if (cacheScores) { scorer = new ScoreCachingWrappingScorer(scorer); } - scorer = new FilterScorable(scorer) { -@Override -public void setMinCompetitiveScore(float minScore) { - // Ignore calls to setMinCompetitiveScore so that if we wrap two - // collectors and one of them wants to skip low-scoring hits, then - // the other collector still sees all hits. We could try to reconcile - // min scores and take the maximum min score across collectors, but - // this is very unlikely to be helpful in practice. + if (skipNonCompetitiveScores) { +for (int i = 0; i < collectors.length; ++i) { + final LeafCollector c = collectors[i]; + assert c != null; Review comment: I don't think that this assertion is right, the collector could be null if the collector already threw a CollectionTerminatedException? (we don't disallow calling `setCollector` after collection started) This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] jpountz commented on a change in pull request #1567: LUCENE-9402: Let MultiCollector handle minCompetitiveScore
jpountz commented on a change in pull request #1567: URL: https://github.com/apache/lucene-solr/pull/1567#discussion_r438575844 ## File path: lucene/core/src/test/org/apache/lucene/search/MultiCollectorTest.java ## @@ -163,4 +163,64 @@ public void testCacheScoresIfNecessary() throws IOException { reader.close(); dir.close(); } + + public void testScorerWrappingForTopScores() throws IOException { +Directory dir = newDirectory(); +RandomIndexWriter iw = new RandomIndexWriter(random(), dir); +iw.addDocument(new Document()); +iw.commit(); Review comment: no need to commit, the follow-up call to getReader() creates a NRT segment anyway ## File path: lucene/core/src/java/org/apache/lucene/search/MultiCollector.java ## @@ -143,32 +143,43 @@ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOExcept private final boolean cacheScores; private final LeafCollector[] collectors; private int numCollectors; +private final float[] minScores; +private final boolean skipNonCompetitiveScores; -private MultiLeafCollector(List collectors, boolean cacheScores) { +private MultiLeafCollector(List collectors, boolean cacheScores, boolean skipNonCompetitive) { this.collectors = collectors.toArray(new LeafCollector[collectors.size()]); this.cacheScores = cacheScores; this.numCollectors = this.collectors.length; + this.skipNonCompetitiveScores = skipNonCompetitive; + this.minScores = new float[this.skipNonCompetitiveScores ? this.numCollectors : 0]; Review comment: Let's make the array null when `skipNonCompetitiveScores` is false? ```suggestion this.minScores = this.skipNonCompetitiveScores ? new float[ this.numCollectors] : null; ``` This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org