Author: gsingers
Date: Mon Jan 4 14:28:59 2010
New Revision: 895640
URL: http://svn.apache.org/viewvc?rev=895640&view=rev
Log:
SOLR-1692: fix produceSummary issue with Carrot2 clustering
Modified:
lucene/solr/trunk/contrib/clustering/CHANGES.txt
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
Modified: lucene/solr/trunk/contrib/clustering/CHANGES.txt
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/CHANGES.txt?rev=895640&r1=895639&r2=895640&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/clustering/CHANGES.txt (original)
+++ lucene/solr/trunk/contrib/clustering/CHANGES.txt Mon Jan 4 14:28:59 2010
@@ -12,6 +12,8 @@
* SOLR-1684: Switch to use the SolrIndexSearcher.doc(int, Set<String>) method
b/c it can use the document cache (gsingers)
+* SOLR-1692: Fix bug relating to carrot.produceSummary option (gsingers)
+
================== Release 1.4.0 ==================
Solr Clustering will be released for the first time in Solr 1.4. See
http://wiki.apache.org/solr/ClusteringComponent
Modified:
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java?rev=895640&r1=895639&r2=895640&view=diff
==============================================================================
---
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
(original)
+++
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
Mon Jan 4 14:28:59 2010
@@ -113,7 +113,7 @@
*/
private List<Document> getDocuments(DocList docList,
Query query, final SolrQueryRequest
sreq) throws IOException {
- SolrHighlighter highligher = null;
+ SolrHighlighter highlighter = null;
SolrParams solrParams = sreq.getParams();
SolrCore core = sreq.getCore();
@@ -137,17 +137,25 @@
SolrQueryRequest req = null;
String[] snippetFieldAry = null;
if (produceSummary == true) {
- highligher = core.getHighlighter();
- Map args = new HashMap();
- snippetFieldAry = new String[]{snippetField};
- args.put(HighlightParams.FIELDS, snippetFieldAry);
- args.put(HighlightParams.HIGHLIGHT, "true");
- req = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1, args) {
- @Override
- public SolrIndexSearcher getSearcher() {
- return sreq.getSearcher();
- }
- };
+ highlighter = core.getHighlighter();
+ if (highlighter != null){
+ Map args = new HashMap();
+ snippetFieldAry = new String[]{snippetField};
+ args.put(HighlightParams.FIELDS, snippetFieldAry);
+ args.put(HighlightParams.HIGHLIGHT, "true");
+ args.put(HighlightParams.SIMPLE_PRE, ""); //we don't care about
actually highlighting the area
+ args.put(HighlightParams.SIMPLE_POST, "");
+ args.put(HighlightParams.FRAGSIZE,
solrParams.getInt(CarrotParams.SUMMARY_FRAGSIZE,
solrParams.getInt(HighlightParams.FRAGSIZE, 100)));
+ req = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1,
args) {
+ @Override
+ public SolrIndexSearcher getSearcher() {
+ return sreq.getSearcher();
+ }
+ };
+ } else {
+ log.warn("No highlighter configured, cannot produce summary");
+ produceSummary = false;
+ }
}
SolrIndexSearcher searcher = sreq.getSearcher();
@@ -165,11 +173,19 @@
if (produceSummary == true) {
docsHolder[0] = id.intValue();
DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
- highligher.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
+ NamedList highlights = highlighter.doHighlighting(docAsList, theQuery,
req, snippetFieldAry);
+ if (highlights != null && highlights.size() == 1) {//should only be
one value given our setup
+ //should only be one document with one field
+ NamedList tmp = (NamedList) highlights.getVal(0);
+ String [] highlt = (String[]) tmp.get(snippetField);
+ if (highlt != null && highlt.length == 1) {
+ snippet = highlt[0];
+ }
+ }
}
Document carrotDocument = new Document(getValue(doc, titleField),
snippet, doc.get(urlField));
- carrotDocument.addField("solrId", doc.get(idFieldName));
+ carrotDocument.setField("solrId", doc.get(idFieldName));
result.add(carrotDocument);
}
Modified:
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java?rev=895640&r1=895639&r2=895640&view=diff
==============================================================================
---
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
(original)
+++
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
Mon Jan 4 14:28:59 2010
@@ -3,6 +3,7 @@
import java.util.Set;
import com.google.common.collect.ImmutableSet;
+import org.apache.solr.common.params.HighlightParams;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -33,8 +34,9 @@
String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary";
String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
+ String SUMMARY_FRAGSIZE = CARROT_PREFIX + "fragzise";
public static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
ALGORITHM, TITLE_FIELD_NAME, URL_FIELD_NAME, SNIPPET_FIELD_NAME,
- PRODUCE_SUMMARY, NUM_DESCRIPTIONS, OUTPUT_SUB_CLUSTERS);
+ PRODUCE_SUMMARY, NUM_DESCRIPTIONS, OUTPUT_SUB_CLUSTERS,
SUMMARY_FRAGSIZE);
}
Modified:
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java?rev=895640&r1=895639&r2=895640&view=diff
==============================================================================
---
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
(original)
+++
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
Mon Jan 4 14:28:59 2010
@@ -17,9 +17,11 @@
* limitations under the License.
*/
+import org.apache.lucene.index.Term;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.TermQuery;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
@@ -43,6 +45,13 @@
checkEngine(getClusteringEngine("default"), 10);
}
+ public void testProduceSummary() throws Exception {
+ ModifiableSolrParams solrParams = new ModifiableSolrParams();
+ solrParams.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet");
+ solrParams.add(CarrotParams.SUMMARY_FRAGSIZE, "200");//how do we validate
this?
+ checkEngine(getClusteringEngine("default"), numberOfDocs -2 /*two don't
have mining in the snippet*/, 16, new TermQuery(new Term("snippet", "mine")),
solrParams);
+ }
+
public void testCarrotStc() throws Exception {
checkEngine(getClusteringEngine("stc"), 1);
}
@@ -55,8 +64,7 @@
public void testWithSubclusters() throws Exception {
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CarrotParams.OUTPUT_SUB_CLUSTERS, true);
- checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs,
- params), 1, 1, 2);
+ checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs),
1, 1, 2);
}
public void testNumDescriptions() throws Exception {
@@ -87,21 +95,27 @@
}
private List checkEngine(CarrotClusteringEngine engine,
- int expectedNumClusters) throws IOException {
- return checkEngine(engine, expectedNumClusters, new
ModifiableSolrParams());
+ int expectedNumClusters) throws IOException {
+ return checkEngine(engine, numberOfDocs, expectedNumClusters, new
MatchAllDocsQuery(), new ModifiableSolrParams());
}
private List checkEngine(CarrotClusteringEngine engine,
- int expectedNumClusters, SolrParams
clusteringParams) throws IOException {
+ int expectedNumClusters, SolrParams
clusteringParams) throws IOException {
+ return checkEngine(engine, numberOfDocs, expectedNumClusters, new
MatchAllDocsQuery(), clusteringParams);
+ }
+
+
+ private List checkEngine(CarrotClusteringEngine engine, int expectedNumDocs,
+ int expectedNumClusters, Query query, SolrParams
clusteringParams) throws IOException {
// Get all documents to cluster
RefCounted<SolrIndexSearcher> ref = h.getCore().getSearcher();
- MatchAllDocsQuery query = new MatchAllDocsQuery();
+
DocList docList;
try {
SolrIndexSearcher searcher = ref.get();
docList = searcher.getDocList(query, (Query) null, new Sort(), 0,
numberOfDocs);
- assertEquals("docList size", this.numberOfDocs, docList.matches());
+ assertEquals("docList size", expectedNumDocs, docList.matches());
} finally {
ref.decref();
}
@@ -114,7 +128,7 @@
LocalSolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(),
solrParams);
List results = (List) engine.cluster(query, docList, req);
req.close();
- assertEquals("number of clusters", expectedNumClusters, results.size());
+ assertEquals("number of clusters: " + results, expectedNumClusters,
results.size());
checkClusters(results, false);
return results;
}