Staszek, add an entry to CHANGES as well? Dawid On Wed, Aug 3, 2011 at 11:08 AM, <[email protected]> wrote:
> Author: stanislaw > Date: Wed Aug 3 09:08:39 2011 > New Revision: 1153399 > > URL: http://svn.apache.org/viewvc?rev=1153399&view=rev > Log: > SOLR-1692: CarrotClusteringEngine produce summary does nothing: improved > unit tests > > Added: > > > lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java > Modified: > > > lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java > > > lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/conf/solrconfig.xml > > > lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java > > Modified: > lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java?rev=1153399&r1=1153398&r2=1153399&view=diff > > ============================================================================== > --- > lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java > (original) > +++ > lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java > Wed Aug 3 09:08:39 2011 > @@ -264,7 +264,7 @@ public class CarrotClusteringEngine exte > > SolrQueryRequest req = null; > String[] snippetFieldAry = null; > - if (produceSummary == true) { > + if (produceSummary) { > highlighter = HighlightComponent.getHighlighter(core); > if (highlighter != null){ > Map<String, Object> args = Maps.newHashMap(); > > Modified: > lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/conf/solrconfig.xml > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/conf/solrconfig.xml?rev=1153399&r1=1153398&r2=1153399&view=diff > > ============================================================================== > --- > lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/conf/solrconfig.xml > (original) > +++ > lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/conf/solrconfig.xml > Wed Aug 3 09:08:39 2011 > @@ -397,6 +397,10 @@ > <str > name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.MockClusteringAlgorithm</str> > </lst> > <lst name="engine"> > + <str name="name">echo</str> > + <str > name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.EchoClusteringAlgorithm</str> > + </lst> > + <lst name="engine"> > <str name="name">lexical-resource-check</str> > <str > name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.LexicalResourcesCheckClusteringAlgorithm</str> > </lst> > > Modified: > lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java?rev=1153399&r1=1153398&r2=1153399&view=diff > > ============================================================================== > --- > lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java > (original) > +++ > lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java > Wed Aug 3 09:08:39 2011 > @@ -58,14 +58,52 @@ public class CarrotClusteringEngineTest > > @Test > public void testProduceSummary() throws Exception { > - ModifiableSolrParams solrParams = new ModifiableSolrParams(); > - solrParams.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet"); > - solrParams.add(CarrotParams.SUMMARY_FRAGSIZE, "200");//how do we > validate this? > + // We'll make two queries, one with- and another one without summary > + // and assert that documents are shorter when highlighter is in use. > + final List<NamedList<Object>> noSummaryClusters = > clusterWithHighlighting(false, 80); > + final List<NamedList<Object>> summaryClusters = > clusterWithHighlighting(true, 80); > + > + assertEquals("Equal number of clusters", noSummaryClusters.size(), > summaryClusters.size()); > + for (int i = 0; i < noSummaryClusters.size(); i++) { > + assertTrue("Summary shorter than original document", > + getLabels(noSummaryClusters.get(i)).get(1).length() > > + getLabels(summaryClusters.get(i)).get(1).length()); > + } > + } > + > + @Test > + public void testSummaryFragSize() throws Exception { > + // We'll make two queries, one short summaries and another one with > longer > + // summaries and will check that the results differ. > + final List<NamedList<Object>> shortSummaryClusters = > clusterWithHighlighting(true, 30); > + final List<NamedList<Object>> longSummaryClusters = > clusterWithHighlighting(true, 80); > + > + assertEquals("Equal number of clusters", shortSummaryClusters.size(), > longSummaryClusters.size()); > + for (int i = 0; i < shortSummaryClusters.size(); i++) { > + assertTrue("Summary shorter than original document", > + getLabels(shortSummaryClusters.get(i)).get(1).length() < > + getLabels(longSummaryClusters.get(i)).get(1).length()); > + } > + } > + > + private List<NamedList<Object>> clusterWithHighlighting( > + boolean enableHighlighting, int fragSize) throws IOException { > + > + final TermQuery query = new TermQuery(new Term("snippet", "mine")); > + // Two documents don't have mining in the snippet > + int expectedNumDocuments = numberOfDocs - 2; > + > + final ModifiableSolrParams summaryParams = new ModifiableSolrParams(); > + summaryParams.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet"); > + summaryParams.add(CarrotParams.PRODUCE_SUMMARY, > + Boolean.toString(enableHighlighting)); > + summaryParams > + .add(CarrotParams.SUMMARY_FRAGSIZE, Integer.toString(fragSize)); > + final List<NamedList<Object>> summaryClusters = checkEngine( > + getClusteringEngine("echo"), expectedNumDocuments, > + expectedNumDocuments, query, summaryParams); > > - // Note: the expected number of clusters may change after upgrading > Carrot2 > - // due to e.g. internal improvements or tuning of Carrot2 > clustering. > - final int expectedNumClusters = 15; > - checkEngine(getClusteringEngine("default"), numberOfDocs -2 /*two > don't have mining in the snippet*/, expectedNumClusters, new TermQuery(new > Term("snippet", "mine")), solrParams); > + return summaryClusters; > } > > @Test > @@ -227,7 +265,6 @@ public class CarrotClusteringEngineTest > assertEquals("docList size", expectedNumDocs, docList.matches()); > > ModifiableSolrParams solrParams = new ModifiableSolrParams(); > - solrParams.add(CarrotParams.PRODUCE_SUMMARY, "true"); > solrParams.add(clusteringParams); > > // Perform clustering > > Added: > lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java > URL: > http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java?rev=1153399&view=auto > > ============================================================================== > --- > lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java > (added) > +++ > lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java > Wed Aug 3 09:08:39 2011 > @@ -0,0 +1,62 @@ > +package org.apache.solr.handler.clustering.carrot2; > +/** > + * Licensed to the Apache Software Foundation (ASF) under one or more > + * contributor license agreements. See the NOTICE file distributed with > + * this work for additional information regarding copyright ownership. > + * The ASF licenses this file to You under the Apache License, Version 2.0 > + * (the "License"); you may not use this file except in compliance with > + * the License. You may obtain a copy of the License at > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or > implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > +import java.util.List; > + > +import org.carrot2.core.Cluster; > +import org.carrot2.core.Document; > +import org.carrot2.core.IClusteringAlgorithm; > +import org.carrot2.core.ProcessingComponentBase; > +import org.carrot2.core.ProcessingException; > +import org.carrot2.core.attribute.AttributeNames; > +import org.carrot2.core.attribute.Processing; > +import org.carrot2.util.attribute.Attribute; > +import org.carrot2.util.attribute.Bindable; > +import org.carrot2.util.attribute.Input; > +import org.carrot2.util.attribute.Output; > + > +import com.google.common.collect.Lists; > + > +/** > + * A mock Carrot2 clustering algorithm that outputs input documents as > clusters. > + * Useful only in tests. > + */ > +@Bindable(prefix = "EchoClusteringAlgorithm") > +public class EchoClusteringAlgorithm extends ProcessingComponentBase > implements > + IClusteringAlgorithm { > + @Input > + @Processing > + @Attribute(key = AttributeNames.DOCUMENTS) > + private List<Document> documents; > + > + @Output > + @Processing > + @Attribute(key = AttributeNames.CLUSTERS) > + private List<Cluster> clusters; > + > + @Override > + public void process() throws ProcessingException { > + clusters = Lists.newArrayListWithCapacity(documents.size()); > + > + for (Document document : documents) { > + final Cluster cluster = new Cluster(); > + cluster.addPhrases(document.getTitle(), document.getSummary()); > + cluster.addDocuments(document); > + clusters.add(cluster); > + } > + } > +} > > >
