Author: yonik
Date: Thu Jul 2 14:08:37 2009
New Revision: 790599
URL: http://svn.apache.org/viewvc?rev=790599&view=rev
Log:
reformat to remove tabs
Modified:
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/AbstractClusteringTest.java
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/ClusteringComponentTest.java
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java
Modified:
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java?rev=790599&r1=790598&r2=790599&view=diff
==============================================================================
---
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
(original)
+++
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
Thu Jul 2 14:08:37 2009
@@ -45,204 +45,206 @@
/**
* Search results clustering engine based on Carrot2 clustering algorithms.
- *
+ * <p/>
* Output from this class is subject to change.
- *
+ *
* @link http://project.carrot2.org
*/
@SuppressWarnings("unchecked")
public class CarrotClusteringEngine extends SearchClusteringEngine {
- private transient static Logger log = LoggerFactory
- .getLogger(CarrotClusteringEngine.class);
+ private transient static Logger log = LoggerFactory
+ .getLogger(CarrotClusteringEngine.class);
- /** Carrot2 controller that manages instances of clustering algorithms
*/
- private CachingController controller = new CachingController();
- private Class<? extends IClusteringAlgorithm> clusteringAlgorithmClass;
-
- private String idFieldName;
-
- public Object cluster(Query query, DocList docList, SolrQueryRequest
sreq) {
- try {
- // Prepare attributes for Carrot2 clustering call
- Map<String, Object> attributes = new HashMap<String,
Object>();
- List<Document> documents = getDocuments(docList, query,
sreq);
- attributes.put(AttributeNames.DOCUMENTS, documents);
- attributes.put(AttributeNames.QUERY, query.toString());
-
- // Pass extra overriding attributes from the request,
if any
- extractCarrotAttributes(sreq.getParams(), attributes);
-
- // Perform clustering and convert to named list
- return
clustersToNamedList(controller.process(attributes,
-
clusteringAlgorithmClass).getClusters(), sreq.getParams());
- } catch (Exception e) {
- log.error("Carrot2 clustering failed", e);
- throw new RuntimeException(e);
- }
- }
-
- @Override
- public String init(NamedList config, final SolrCore core) {
- String result = super.init(config, core);
- SolrParams initParams = SolrParams.toSolrParams(config);
-
- // Initialize Carrot2 controller. Pass initialization
attributes, if any.
- HashMap<String, Object> initAttributes = new HashMap<String,
Object>();
- extractCarrotAttributes(initParams, initAttributes);
- this.controller.init(initAttributes);
-
- this.idFieldName =
core.getSchema().getUniqueKeyField().getName();
-
- // Make sure the requested Carrot2 clustering algorithm class
is available
- String carrotAlgorithmClassName =
initParams.get(CarrotParams.ALGORITHM);
- try {
- Class<?> algorithmClass =
Thread.currentThread().getContextClassLoader()
- .loadClass(carrotAlgorithmClassName);
- if
(!IClusteringAlgorithm.class.isAssignableFrom(algorithmClass)) {
- throw new IllegalArgumentException("Class
provided as "
- + CarrotParams.ALGORITHM + "
must implement "
- +
IClusteringAlgorithm.class.getName());
- }
- this.clusteringAlgorithmClass = (Class<? extends
IClusteringAlgorithm>) algorithmClass;
- } catch (ClassNotFoundException e) {
- throw new RuntimeException(
- "Failed to load Carrot clustering
algorithm class", e);
- }
-
- return result;
- }
-
- /**
- * Prepares Carrot2 documents for clustering.
- */
- private List<Document> getDocuments(DocList docList,
- Query query, final SolrQueryRequest sreq) throws
IOException {
- SolrHighlighter highligher = null;
- SolrParams solrParams = sreq.getParams();
- SolrCore core = sreq.getCore();
-
- // Names of fields to deliver content for clustering
- String urlField = solrParams.get(CarrotParams.URL_FIELD_NAME,
"url");
- String titleField =
solrParams.get(CarrotParams.TITLE_FIELD_NAME, "title");
- String snippetField =
solrParams.get(CarrotParams.SNIPPET_FIELD_NAME,
- titleField);
- if (StringUtils.isBlank(snippetField)) {
- throw new
SolrException(SolrException.ErrorCode.SERVER_ERROR,
CarrotParams.SNIPPET_FIELD_NAME
- + " must not be blank.");
- }
- Set<String> fieldsToLoad = Sets.newHashSet(urlField, titleField,
- snippetField, idFieldName);
-
- // Get the documents
- DocIterator docsIter = docList.iterator();
- boolean produceSummary =
solrParams.getBool(CarrotParams.PRODUCE_SUMMARY,
- false);
-
- SolrQueryRequest req = null;
- String[] snippetFieldAry = null;
- if (produceSummary == true) {
- highligher = core.getHighlighter();
- Map args = new HashMap();
- snippetFieldAry = new String[] { snippetField };
- args.put(HighlightParams.FIELDS, snippetFieldAry);
- args.put(HighlightParams.HIGHLIGHT, "true");
- req = new LocalSolrQueryRequest(core, query.toString(),
"", 0, 1, args) {
- @Override
- public SolrIndexSearcher getSearcher() {
- return sreq.getSearcher();
- }
- };
- }
-
- SolrIndexSearcher searcher = sreq.getSearcher();
- List<Document> result = new ArrayList<Document>(docList.size());
- FieldSelector fieldSelector = new
SetBasedFieldSelector(fieldsToLoad,
- Collections.emptySet());
- float[] scores = { 1.0f };
- int[] docsHolder = new int[1];
- Query theQuery = query;
-
- while (docsIter.hasNext()) {
- Integer id = docsIter.next();
- org.apache.lucene.document.Document doc =
searcher.doc(id,
- fieldSelector);
- String snippet = getValue(doc, snippetField);
- if (produceSummary == true) {
- docsHolder[0] = id.intValue();
- DocList docAsList = new DocSlice(0, 1,
docsHolder, scores, 1, 1.0f);
- highligher.doHighlighting(docAsList,
theQuery, req, snippetFieldAry);
- }
- Document carrotDocument = new
Document(getValue(doc, titleField),
- snippet, doc.get(urlField));
- carrotDocument.addField("solrId",
doc.get(idFieldName));
- result.add(carrotDocument);
- }
-
- return result;
- }
-
- protected String getValue(org.apache.lucene.document.Document doc,
- String field) {
- StringBuilder result = new StringBuilder();
- String[] vals = doc.getValues(field);
- for (int i = 0; i < vals.length; i++) {
- // Join multiple values with a period so that Carrot2
does not pick up
- // phrases that cross field value boundaries (in most
cases it would
- // create useless phrases).
- result.append(vals[i]).append(" . ");
- }
- return result.toString().trim();
- }
-
- private List clustersToNamedList(List<Cluster> carrotClusters,
- SolrParams solrParams) {
- List result = new ArrayList();
- clustersToNamedList(carrotClusters, result, solrParams.getBool(
- CarrotParams.OUTPUT_SUB_CLUSTERS, false),
solrParams.getInt(
- CarrotParams.NUM_DESCRIPTIONS,
Integer.MAX_VALUE));
- return result;
- }
-
- private void clustersToNamedList(List<Cluster> outputClusters,
- List parent, boolean outputSubClusters, int maxLabels) {
- for (Cluster outCluster : outputClusters) {
- NamedList cluster = new SimpleOrderedMap();
- parent.add(cluster);
-
- List<String> labels = outCluster.getPhrases();
- if (labels.size() > maxLabels)
- labels = labels.subList(0,maxLabels);
- cluster.add("labels", labels);
-
- List<Document> docs = outCluster.getDocuments();
- List docList = new ArrayList();
- cluster.add("docs", docList);
- for (Document doc : docs) {
- docList.add(doc.getField("solrId"));
- }
-
- if (outputSubClusters) {
- List subclusters = new ArrayList();
- cluster.add("clusters",subclusters);
-
clustersToNamedList(outCluster.getSubclusters(), subclusters,
- outputSubClusters, maxLabels);
- }
- }
- }
-
- /**
- * Extracts parameters that can possibly match some attributes of
Carrot2 algorithms.
- */
- private void extractCarrotAttributes(SolrParams solrParams,
- Map<String, Object> attributes) {
- // Extract all non-predefined parameters. This way, we'll be
able to set all
- // parameters of Carrot2 algorithms without defining their
names as constants.
- for (Iterator<String> paramNames =
solrParams.getParameterNamesIterator(); paramNames
- .hasNext();) {
- String paramName = paramNames.next();
- if
(!CarrotParams.CARROT_PARAM_NAMES.contains(paramName)) {
- attributes.put(paramName,
solrParams.get(paramName));
- }
- }
- }
+ /**
+ * Carrot2 controller that manages instances of clustering algorithms
+ */
+ private CachingController controller = new CachingController();
+ private Class<? extends IClusteringAlgorithm> clusteringAlgorithmClass;
+
+ private String idFieldName;
+
+ public Object cluster(Query query, DocList docList, SolrQueryRequest sreq) {
+ try {
+ // Prepare attributes for Carrot2 clustering call
+ Map<String, Object> attributes = new HashMap<String, Object>();
+ List<Document> documents = getDocuments(docList, query, sreq);
+ attributes.put(AttributeNames.DOCUMENTS, documents);
+ attributes.put(AttributeNames.QUERY, query.toString());
+
+ // Pass extra overriding attributes from the request, if any
+ extractCarrotAttributes(sreq.getParams(), attributes);
+
+ // Perform clustering and convert to named list
+ return clustersToNamedList(controller.process(attributes,
+ clusteringAlgorithmClass).getClusters(), sreq.getParams());
+ } catch (Exception e) {
+ log.error("Carrot2 clustering failed", e);
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public String init(NamedList config, final SolrCore core) {
+ String result = super.init(config, core);
+ SolrParams initParams = SolrParams.toSolrParams(config);
+
+ // Initialize Carrot2 controller. Pass initialization attributes, if any.
+ HashMap<String, Object> initAttributes = new HashMap<String, Object>();
+ extractCarrotAttributes(initParams, initAttributes);
+ this.controller.init(initAttributes);
+
+ this.idFieldName = core.getSchema().getUniqueKeyField().getName();
+
+ // Make sure the requested Carrot2 clustering algorithm class is available
+ String carrotAlgorithmClassName = initParams.get(CarrotParams.ALGORITHM);
+ try {
+ Class<?> algorithmClass = Thread.currentThread().getContextClassLoader()
+ .loadClass(carrotAlgorithmClassName);
+ if (!IClusteringAlgorithm.class.isAssignableFrom(algorithmClass)) {
+ throw new IllegalArgumentException("Class provided as "
+ + CarrotParams.ALGORITHM + " must implement "
+ + IClusteringAlgorithm.class.getName());
+ }
+ this.clusteringAlgorithmClass = (Class<? extends IClusteringAlgorithm>)
algorithmClass;
+ } catch (ClassNotFoundException e) {
+ throw new RuntimeException(
+ "Failed to load Carrot clustering algorithm class", e);
+ }
+
+ return result;
+ }
+
+ /**
+ * Prepares Carrot2 documents for clustering.
+ */
+ private List<Document> getDocuments(DocList docList,
+ Query query, final SolrQueryRequest
sreq) throws IOException {
+ SolrHighlighter highligher = null;
+ SolrParams solrParams = sreq.getParams();
+ SolrCore core = sreq.getCore();
+
+ // Names of fields to deliver content for clustering
+ String urlField = solrParams.get(CarrotParams.URL_FIELD_NAME, "url");
+ String titleField = solrParams.get(CarrotParams.TITLE_FIELD_NAME, "title");
+ String snippetField = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME,
+ titleField);
+ if (StringUtils.isBlank(snippetField)) {
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
CarrotParams.SNIPPET_FIELD_NAME
+ + " must not be blank.");
+ }
+ Set<String> fieldsToLoad = Sets.newHashSet(urlField, titleField,
+ snippetField, idFieldName);
+
+ // Get the documents
+ DocIterator docsIter = docList.iterator();
+ boolean produceSummary = solrParams.getBool(CarrotParams.PRODUCE_SUMMARY,
+ false);
+
+ SolrQueryRequest req = null;
+ String[] snippetFieldAry = null;
+ if (produceSummary == true) {
+ highligher = core.getHighlighter();
+ Map args = new HashMap();
+ snippetFieldAry = new String[]{snippetField};
+ args.put(HighlightParams.FIELDS, snippetFieldAry);
+ args.put(HighlightParams.HIGHLIGHT, "true");
+ req = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1, args) {
+ @Override
+ public SolrIndexSearcher getSearcher() {
+ return sreq.getSearcher();
+ }
+ };
+ }
+
+ SolrIndexSearcher searcher = sreq.getSearcher();
+ List<Document> result = new ArrayList<Document>(docList.size());
+ FieldSelector fieldSelector = new SetBasedFieldSelector(fieldsToLoad,
+ Collections.emptySet());
+ float[] scores = {1.0f};
+ int[] docsHolder = new int[1];
+ Query theQuery = query;
+
+ while (docsIter.hasNext()) {
+ Integer id = docsIter.next();
+ org.apache.lucene.document.Document doc = searcher.doc(id,
+ fieldSelector);
+ String snippet = getValue(doc, snippetField);
+ if (produceSummary == true) {
+ docsHolder[0] = id.intValue();
+ DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
+ highligher.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
+ }
+ Document carrotDocument = new Document(getValue(doc, titleField),
+ snippet, doc.get(urlField));
+ carrotDocument.addField("solrId", doc.get(idFieldName));
+ result.add(carrotDocument);
+ }
+
+ return result;
+ }
+
+ protected String getValue(org.apache.lucene.document.Document doc,
+ String field) {
+ StringBuilder result = new StringBuilder();
+ String[] vals = doc.getValues(field);
+ for (int i = 0; i < vals.length; i++) {
+ // Join multiple values with a period so that Carrot2 does not pick up
+ // phrases that cross field value boundaries (in most cases it would
+ // create useless phrases).
+ result.append(vals[i]).append(" . ");
+ }
+ return result.toString().trim();
+ }
+
+ private List clustersToNamedList(List<Cluster> carrotClusters,
+ SolrParams solrParams) {
+ List result = new ArrayList();
+ clustersToNamedList(carrotClusters, result, solrParams.getBool(
+ CarrotParams.OUTPUT_SUB_CLUSTERS, false), solrParams.getInt(
+ CarrotParams.NUM_DESCRIPTIONS, Integer.MAX_VALUE));
+ return result;
+ }
+
+ private void clustersToNamedList(List<Cluster> outputClusters,
+ List parent, boolean outputSubClusters, int
maxLabels) {
+ for (Cluster outCluster : outputClusters) {
+ NamedList cluster = new SimpleOrderedMap();
+ parent.add(cluster);
+
+ List<String> labels = outCluster.getPhrases();
+ if (labels.size() > maxLabels)
+ labels = labels.subList(0, maxLabels);
+ cluster.add("labels", labels);
+
+ List<Document> docs = outCluster.getDocuments();
+ List docList = new ArrayList();
+ cluster.add("docs", docList);
+ for (Document doc : docs) {
+ docList.add(doc.getField("solrId"));
+ }
+
+ if (outputSubClusters) {
+ List subclusters = new ArrayList();
+ cluster.add("clusters", subclusters);
+ clustersToNamedList(outCluster.getSubclusters(), subclusters,
+ outputSubClusters, maxLabels);
+ }
+ }
+ }
+
+ /**
+ * Extracts parameters that can possibly match some attributes of Carrot2
algorithms.
+ */
+ private void extractCarrotAttributes(SolrParams solrParams,
+ Map<String, Object> attributes) {
+ // Extract all non-predefined parameters. This way, we'll be able to set
all
+ // parameters of Carrot2 algorithms without defining their names as
constants.
+ for (Iterator<String> paramNames = solrParams.getParameterNamesIterator();
paramNames
+ .hasNext();) {
+ String paramName = paramNames.next();
+ if (!CarrotParams.CARROT_PARAM_NAMES.contains(paramName)) {
+ attributes.put(paramName, solrParams.get(paramName));
+ }
+ }
+ }
}
Modified:
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java?rev=790599&r1=790598&r2=790599&view=diff
==============================================================================
---
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
(original)
+++
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
Thu Jul 2 14:08:37 2009
@@ -21,22 +21,20 @@
* limitations under the License.
*/
-/**
- *
- */
+
public interface CarrotParams {
- String CARROT_PREFIX = "carrot.";
+ String CARROT_PREFIX = "carrot.";
- String ALGORITHM = CARROT_PREFIX + "algorithm";
- String TITLE_FIELD_NAME = CARROT_PREFIX + "title";
- String URL_FIELD_NAME = CARROT_PREFIX + "url";
- String SNIPPET_FIELD_NAME = CARROT_PREFIX + "snippet";
- String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary";
- String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
- String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
+ String ALGORITHM = CARROT_PREFIX + "algorithm";
+ String TITLE_FIELD_NAME = CARROT_PREFIX + "title";
+ String URL_FIELD_NAME = CARROT_PREFIX + "url";
+ String SNIPPET_FIELD_NAME = CARROT_PREFIX + "snippet";
+ String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary";
+ String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
+ String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
- public static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
- ALGORITHM, TITLE_FIELD_NAME, URL_FIELD_NAME,
SNIPPET_FIELD_NAME,
- PRODUCE_SUMMARY, NUM_DESCRIPTIONS, OUTPUT_SUB_CLUSTERS);
+ public static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
+ ALGORITHM, TITLE_FIELD_NAME, URL_FIELD_NAME, SNIPPET_FIELD_NAME,
+ PRODUCE_SUMMARY, NUM_DESCRIPTIONS, OUTPUT_SUB_CLUSTERS);
}
Modified:
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/AbstractClusteringTest.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/AbstractClusteringTest.java?rev=790599&r1=790598&r2=790599&view=diff
==============================================================================
---
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/AbstractClusteringTest.java
(original)
+++
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/AbstractClusteringTest.java
Thu Jul 2 14:08:37 2009
@@ -23,21 +23,21 @@
*
*/
public class AbstractClusteringTest extends AbstractSolrTestCase {
- protected int numberOfDocs = 0;
-
+ protected int numberOfDocs = 0;
+
@Override
public void setUp() throws Exception {
super.setUp();
numberOfDocs = 0;
for (String[] doc : DOCUMENTS) {
- assertU("add failed", adoc("id", Integer.toString(numberOfDocs), "url",
doc[0], "title", doc[1], "snippet", doc[2]));
- numberOfDocs++;
- }
+ assertU("add failed", adoc("id", Integer.toString(numberOfDocs), "url",
doc[0], "title", doc[1], "snippet", doc[2]));
+ numberOfDocs++;
+ }
assertU("commit", commit());
}
- public String getSchemaFile() {
+ public String getSchemaFile() {
return "schema.xml";
}
@@ -45,154 +45,154 @@
return "solrconfig.xml";
}
- final String [][] DOCUMENTS = new String[][] {
- { "http://en.wikipedia.org/wiki/Data_mining",
- "Data Mining - Wikipedia",
- "Article about knowledge-discovery in databases (KDD), the
practice of automatically searching large stores of data for patterns." },
+ final String[][] DOCUMENTS = new String[][]{
+ {"http://en.wikipedia.org/wiki/Data_mining",
+ "Data Mining - Wikipedia",
+ "Article about knowledge-discovery in databases (KDD), the
practice of automatically searching large stores of data for patterns."},
- { "http://en.wikipedia.org/wiki/Datamining",
- "Data mining - Wikipedia, the free encyclopedia",
- "Data mining is the entire process of applying computer-based
methodology, ... Moreover, some data-mining systems such as neural networks are
inherently geared ..." },
+ {"http://en.wikipedia.org/wiki/Datamining",
+ "Data mining - Wikipedia, the free encyclopedia",
+ "Data mining is the entire process of applying
computer-based methodology, ... Moreover, some data-mining systems such as
neural networks are inherently geared ..."},
- { "http://www.statsoft.com/textbook/stdatmin.html",
- "Electronic Statistics Textbook: Data Mining Techniques",
- "Outlines the crucial concepts in data mining, defines the data
warehousing process, and offers examples of computational and graphical
exploratory data analysis techniques." },
+ {"http://www.statsoft.com/textbook/stdatmin.html",
+ "Electronic Statistics Textbook: Data Mining Techniques",
+ "Outlines the crucial concepts in data mining, defines the
data warehousing process, and offers examples of computational and graphical
exploratory data analysis techniques."},
- { "http://www.thearling.com/text/dmwhite/dmwhite.htm",
- "An Introduction to Data Mining",
- "Data mining, the extraction of hidden predictive information
from large ... Data mining tools predict future trends and behaviors, allowing
businesses to ..." },
+ {"http://www.thearling.com/text/dmwhite/dmwhite.htm",
+ "An Introduction to Data Mining",
+ "Data mining, the extraction of hidden predictive
information from large ... Data mining tools predict future trends and
behaviors, allowing businesses to ..."},
- {
"http://www.anderson.ucla.edu/faculty/jason.frand/teacher/technologies/palace/datamining.htm",
- "Data Mining: What is Data Mining?",
- "Outlines what knowledge discovery, the process of analyzing
data from different perspectives and summarizing it into useful information,
can do and how it works." },
+
{"http://www.anderson.ucla.edu/faculty/jason.frand/teacher/technologies/palace/datamining.htm",
+ "Data Mining: What is Data Mining?",
+ "Outlines what knowledge discovery, the process of analyzing
data from different perspectives and summarizing it into useful information,
can do and how it works."},
- { "http://www.spss.com/datamine",
- "Data Mining Software, Data Mining Applications and Data Mining
Solutions",
- "The patterns uncovered using data mining help organizations
make better and ... data mining customer ... Data mining applications, on the
other hand, embed ..." },
+ {"http://www.spss.com/datamine",
+ "Data Mining Software, Data Mining Applications and Data
Mining Solutions",
+ "The patterns uncovered using data mining help organizations
make better and ... data mining customer ... Data mining applications, on the
other hand, embed ..."},
- { "http://www.kdnuggets.com/",
- "KD Nuggets",
- "Newsletter on the data mining and knowledge industries,
offering information on data mining, knowledge discovery, text mining, and web
mining software, courses, jobs, publications, and meetings." },
+ {"http://www.kdnuggets.com/",
+ "KD Nuggets",
+ "Newsletter on the data mining and knowledge industries,
offering information on data mining, knowledge discovery, text mining, and web
mining software, courses, jobs, publications, and meetings."},
- { "http://www.answers.com/topic/data-mining",
- "data mining: Definition from Answers.com",
- "data mining n. The automatic extraction of useful, often
previously unknown information from large databases or data ... Data Mining For
Investing ..." },
+ {"http://www.answers.com/topic/data-mining",
+ "data mining: Definition from Answers.com",
+ "data mining n. The automatic extraction of useful, often
previously unknown information from large databases or data ... Data Mining For
Investing ..."},
- { "http://www.statsoft.com/products/dataminer.htm",
- "STATISTICA Data Mining and Predictive Modeling Solutions",
- "GRC site-wide menuing system research and development. ...
Contact a Data Mining Solutions Consultant. News and Success Stories. Events
..." },
+ {"http://www.statsoft.com/products/dataminer.htm",
+ "STATISTICA Data Mining and Predictive Modeling Solutions",
+ "GRC site-wide menuing system research and development. ...
Contact a Data Mining Solutions Consultant. News and Success Stories. Events
..."},
- { "http://datamining.typepad.com/",
- "Data Mining: Text Mining, Visualization and Social Media",
- "Commentary on text mining, data mining, social media and data
visualization. ... While mining Twitter data for business and marketing
intelligence (trend/buzz ..." },
+ {"http://datamining.typepad.com/",
+ "Data Mining: Text Mining, Visualization and Social Media",
+ "Commentary on text mining, data mining, social media and
data visualization. ... While mining Twitter data for business and marketing
intelligence (trend/buzz ..."},
- { "http://www.twocrows.com/",
- "Two Crows Corporation",
- "Dedicated to the development, marketing, sales and support of
tools for knowledge discovery to make data mining accessible and easy to use."
},
+ {"http://www.twocrows.com/",
+ "Two Crows Corporation",
+ "Dedicated to the development, marketing, sales and support
of tools for knowledge discovery to make data mining accessible and easy to
use."},
- { "http://www.thearling.com/",
- "Thearling.com",
- "Kurt Thearling's site dedicated to sharing information about
data mining, the automated extraction of hidden predictive information from
databases, and other analytic technologies." },
+ {"http://www.thearling.com/",
+ "Thearling.com",
+ "Kurt Thearling's site dedicated to sharing information
about data mining, the automated extraction of hidden predictive information
from databases, and other analytic technologies."},
- { "http://www.ccsu.edu/datamining/",
- "CCSU - Data Mining",
- "Offers degrees and certificates in data mining. Allows
students to explore cutting-edge data mining techniques and applications:
market basket analysis, decision trees, neural networks, machine learning, web
mining, and data modeling." },
+ {"http://www.ccsu.edu/datamining/",
+ "CCSU - Data Mining",
+ "Offers degrees and certificates in data mining. Allows
students to explore cutting-edge data mining techniques and applications:
market basket analysis, decision trees, neural networks, machine learning, web
mining, and data modeling."},
- { "http://www.oracle.com/technology/products/bi/odm",
- "Oracle Data Mining",
- "Oracle Data Mining Product Center ... New Oracle Data Mining
Powers New Social CRM Application (more information ... Mining High-Dimensional
Data for ..." },
+ {"http://www.oracle.com/technology/products/bi/odm",
+ "Oracle Data Mining",
+ "Oracle Data Mining Product Center ... New Oracle Data
Mining Powers New Social CRM Application (more information ... Mining
High-Dimensional Data for ..."},
- { "http://databases.about.com/od/datamining/a/datamining.htm",
- "Data Mining: An Introduction",
- "About.com article on how businesses are discovering new trends
and patterns of behavior that previously went unnoticed through data mining,
automated statistical analysis techniques." },
+ {"http://databases.about.com/od/datamining/a/datamining.htm",
+ "Data Mining: An Introduction",
+ "About.com article on how businesses are discovering new
trends and patterns of behavior that previously went unnoticed through data
mining, automated statistical analysis techniques."},
- { "http://www.dmoz.org/Computers/Software/Databases/Data_Mining/",
- "Open Directory - Computers: Software: Databases: Data Mining",
- "Data Mining and Knowledge Discovery - A peer-reviewed journal
publishing ... Data mining creates information assets that an organization can
leverage to ..." },
+ {"http://www.dmoz.org/Computers/Software/Databases/Data_Mining/",
+ "Open Directory - Computers: Software: Databases: Data
Mining",
+ "Data Mining and Knowledge Discovery - A peer-reviewed
journal publishing ... Data mining creates information assets that an
organization can leverage to ..."},
- { "http://www.cs.wisc.edu/dmi/",
- "DMI:Data Mining Institute",
- "Data Mining Institute at UW-Madison ... The Data Mining
Institute (DMI) was started on June 1, 1999 at the Computer ... of the Data
Mining Group of Microsoft ..." },
+ {"http://www.cs.wisc.edu/dmi/",
+ "DMI:Data Mining Institute",
+ "Data Mining Institute at UW-Madison ... The Data Mining
Institute (DMI) was started on June 1, 1999 at the Computer ... of the Data
Mining Group of Microsoft ..."},
- { "http://www.the-data-mine.com/",
- "The Data Mine",
- "Provides information about data mining also known as knowledge
discovery in databases (KDD) or simply knowledge discovery. List software,
events, organizations, and people working in data mining." },
+ {"http://www.the-data-mine.com/",
+ "The Data Mine",
+ "Provides information about data mining also known as
knowledge discovery in databases (KDD) or simply knowledge discovery. List
software, events, organizations, and people working in data mining."},
- { "http://www.statserv.com/datamining.html",
- "s...@tserv - About Data Mining",
- "s...@tserv Data Mining page ... Data mining in molecular
biology, by Alvis Brazma. Graham Williams page. Knowledge Discovery and Data
Mining Resources, ..." },
+ {"http://www.statserv.com/datamining.html",
+ "s...@tserv - About Data Mining",
+ "s...@tserv Data Mining page ... Data mining in molecular
biology, by Alvis Brazma. Graham Williams page. Knowledge Discovery and Data
Mining Resources, ..."},
- {
"http://ocw.mit.edu/OcwWeb/Sloan-School-of-Management/15-062Data-MiningSpring2003/CourseHome/index.htm",
- "MIT OpenCourseWare | Sloan School of Management | 15.062 Data
Mining ...",
- "Introduces students to a class of methods known as data mining
that assists managers in recognizing patterns and making intelligent use of
massive amounts of ..." },
+
{"http://ocw.mit.edu/OcwWeb/Sloan-School-of-Management/15-062Data-MiningSpring2003/CourseHome/index.htm",
+ "MIT OpenCourseWare | Sloan School of Management | 15.062
Data Mining ...",
+ "Introduces students to a class of methods known as data
mining that assists managers in recognizing patterns and making intelligent use
of massive amounts of ..."},
- { "http://www.pentaho.com/products/data_mining/",
- "Pentaho Commercial Open Source Business Intelligence: Data
Mining",
- "For example, data mining can warn you there's a high
probability a specific ... Pentaho Data Mining is differentiated by its open,
standards-compliant nature, ..." },
+ {"http://www.pentaho.com/products/data_mining/",
+ "Pentaho Commercial Open Source Business Intelligence: Data
Mining",
+ "For example, data mining can warn you there's a high
probability a specific ... Pentaho Data Mining is differentiated by its open,
standards-compliant nature, ..."},
- { "http://www.investorhome.com/mining.htm",
- "Investor Home - Data Mining",
- "Data Mining or Data Snooping is the practice of searching for
relationships and ... Data mining involves searching through databases for
correlations and patterns ..." },
+ {"http://www.investorhome.com/mining.htm",
+ "Investor Home - Data Mining",
+ "Data Mining or Data Snooping is the practice of searching
for relationships and ... Data mining involves searching through databases for
correlations and patterns ..."},
- { "http://www.datamining.com/",
- "Predictive Modeling and Predictive Analytics Solutions |
Enterprise ...",
- "Insightful Enterprise Miner - Enterprise data mining for
predictive modeling and predictive analytics." },
+ {"http://www.datamining.com/",
+ "Predictive Modeling and Predictive Analytics Solutions |
Enterprise ...",
+ "Insightful Enterprise Miner - Enterprise data mining for
predictive modeling and predictive analytics."},
- { "http://www.sourcewatch.org/index.php?title=Data_mining",
- "Data mining - SourceWatch",
- "These agencies reported 199 data mining projects, of which 68
... Office, \"DATA MINING. ... powerful technology known as data mining -- and
how, in the ..." },
+ {"http://www.sourcewatch.org/index.php?title=Data_mining",
+ "Data mining - SourceWatch",
+ "These agencies reported 199 data mining projects, of which
68 ... Office, \"DATA MINING. ... powerful technology known as data mining --
and how, in the ..."},
- { "http://www.autonlab.org/tutorials/",
- "Statistical Data Mining Tutorials",
- "Includes a set of tutorials on many aspects of statistical
data mining, including the foundations of probability, the foundations of
statistical data analysis, and most of the classic machine learning and data
mining algorithms." },
+ {"http://www.autonlab.org/tutorials/",
+ "Statistical Data Mining Tutorials",
+ "Includes a set of tutorials on many aspects of statistical
data mining, including the foundations of probability, the foundations of
statistical data analysis, and most of the classic machine learning and data
mining algorithms."},
- { "http://www.microstrategy.com/data-mining/index.asp",
- "Data Mining",
- "With MicroStrategy, data mining scoring is fully integrated
into mainstream ... The integration of data mining models from other
applications is accomplished by ..." },
+ {"http://www.microstrategy.com/data-mining/index.asp",
+ "Data Mining",
+ "With MicroStrategy, data mining scoring is fully integrated
into mainstream ... The integration of data mining models from other
applications is accomplished by ..."},
- { "http://www.datamininglab.com/",
- "Elder Research",
- "Provides consulting and short courses in data mining and
pattern discovery patterns in data." },
+ {"http://www.datamininglab.com/",
+ "Elder Research",
+ "Provides consulting and short courses in data mining and
pattern discovery patterns in data."},
- { "http://www.sqlserverdatamining.com/",
- "SQL Server Data Mining > Home",
- "SQL Server Data Mining Portal ... Data Mining as an
Application Platform (Whitepaper) Creating a Web Cross-sell Application with
SQL Server 2005 Data Mining (Article) ..." },
+ {"http://www.sqlserverdatamining.com/",
+ "SQL Server Data Mining > Home",
+ "SQL Server Data Mining Portal ... Data Mining as an
Application Platform (Whitepaper) Creating a Web Cross-sell Application with
SQL Server 2005 Data Mining (Article) ..."},
- { "http://databases.about.com/cs/datamining/g/dmining.htm",
- "Data Mining",
- "What is data mining? Find out here! ... Book Review: Data
Mining and Statistical Analysis Using SQL. What is Data Mining, and What Does
it Have to Do with ..." },
+ {"http://databases.about.com/cs/datamining/g/dmining.htm",
+ "Data Mining",
+ "What is data mining? Find out here! ... Book Review: Data
Mining and Statistical Analysis Using SQL. What is Data Mining, and What Does
it Have to Do with ..."},
- { "http://www.sas.com/technologies/analytics/datamining/index.html",
- "Data Mining Software and Text Mining | SAS",
- "... raw data to smarter ... Data Mining is an iterative
process of creating ... The knowledge gleaned from data and text mining can be
used to fuel ..." }
+ {"http://www.sas.com/technologies/analytics/datamining/index.html",
+ "Data Mining Software and Text Mining | SAS",
+ "... raw data to smarter ... Data Mining is an iterative
process of creating ... The knowledge gleaned from data and text mining can be
used to fuel ..."}
};
}
Modified:
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/ClusteringComponentTest.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/ClusteringComponentTest.java?rev=790599&r1=790598&r2=790599&view=diff
==============================================================================
---
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/ClusteringComponentTest.java
(original)
+++
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/ClusteringComponentTest.java
Thu Jul 2 14:08:37 2009
@@ -16,18 +16,16 @@
* limitations under the License.
*/
-import org.apache.solr.util.AbstractSolrTestCase;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.handler.component.SearchComponent;
-import org.apache.solr.handler.component.SpellCheckComponent;
-import org.apache.solr.handler.component.QueryComponent;
-import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
-import org.apache.solr.request.SolrRequestHandler;
-import org.apache.solr.request.SolrQueryResponse;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.component.QueryComponent;
+import org.apache.solr.handler.component.SearchComponent;
import org.apache.solr.request.LocalSolrQueryRequest;
+import org.apache.solr.request.SolrQueryResponse;
+import org.apache.solr.request.SolrRequestHandler;
/**
@@ -45,7 +43,7 @@
params.add(ClusteringComponent.COMPONENT_NAME, "true");
params.add(CommonParams.Q, "*:*");
-
+
params.add(ClusteringParams.USE_SEARCH_RESULTS, "true");
@@ -76,7 +74,7 @@
//System.out.println("Clusters: " + clusters);
assertTrue("clusters is null and it shouldn't be", clusters != null);
-
+
}
}
Modified:
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java?rev=790599&r1=790598&r2=790599&view=diff
==============================================================================
---
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java
(original)
+++
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java
Thu Jul 2 14:08:37 2009
@@ -1,7 +1,7 @@
package org.apache.solr.handler.clustering;
-import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
import org.apache.solr.search.DocSet;
@@ -9,7 +9,7 @@
*
*
**/
-public class MockDocumentClusteringEngine extends DocumentClusteringEngine{
+public class MockDocumentClusteringEngine extends DocumentClusteringEngine {
public NamedList cluster(DocSet docs, SolrParams solrParams) {
NamedList result = new NamedList();
return result;
Modified:
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java?rev=790599&r1=790598&r2=790599&view=diff
==============================================================================
---
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
(original)
+++
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
Thu Jul 2 14:08:37 2009
@@ -17,151 +17,153 @@
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.lucene.search.*;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Sort;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.handler.clustering.AbstractClusteringTest;
import org.apache.solr.handler.clustering.ClusteringComponent;
+import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
-import org.apache.solr.request.LocalSolrQueryRequest;
import org.carrot2.util.attribute.AttributeUtils;
+import java.io.IOException;
+import java.util.List;
+
/**
*
*/
@SuppressWarnings("unchecked")
public class CarrotClusteringEngineTest extends AbstractClusteringTest {
- public void testCarrotLingo() throws Exception {
- checkEngine(getClusteringEngine("default"), 9);
- }
-
- public void testCarrotStc() throws Exception {
- checkEngine(getClusteringEngine("stc"), 2);
- }
-
- public void testWithoutSubclusters() throws Exception {
- checkClusters(checkEngine(getClusteringEngine("mock"),
this.numberOfDocs),
- 1, 1, 0);
- }
-
- public void testWithSubclusters() throws Exception {
- ModifiableSolrParams params = new ModifiableSolrParams();
- params.set(CarrotParams.OUTPUT_SUB_CLUSTERS, true);
- checkClusters(checkEngine(getClusteringEngine("mock"),
this.numberOfDocs,
- params), 1, 1, 2);
- }
-
- public void testNumDescriptions() throws Exception {
- ModifiableSolrParams params = new ModifiableSolrParams();
- params.set(AttributeUtils.getKey(MockClusteringAlgorithm.class,
"labels"), 5);
- params.set(CarrotParams.NUM_DESCRIPTIONS, 3);
- checkClusters(checkEngine(getClusteringEngine("mock"),
this.numberOfDocs,
- params), 1, 3, 0);
- }
-
- public void testCarrotAttributePassing() throws Exception {
- ModifiableSolrParams params = new ModifiableSolrParams();
- params.set(AttributeUtils.getKey(MockClusteringAlgorithm.class,
"depth"), 1);
- params.set(AttributeUtils.getKey(MockClusteringAlgorithm.class,
"labels"), 3);
- checkClusters(checkEngine(getClusteringEngine("mock"),
this.numberOfDocs,
- params), 1, 3, 0);
- }
-
- private CarrotClusteringEngine getClusteringEngine(String engineName) {
- ClusteringComponent comp = (ClusteringComponent) h.getCore()
- .getSearchComponent("clustering");
- assertNotNull("clustering component should not be null", comp);
- CarrotClusteringEngine engine = (CarrotClusteringEngine) comp
- .getSearchClusteringEngines().get(engineName);
- assertNotNull("clustering engine for name: " + engineName
- + " should not be null", engine);
- return engine;
- }
-
- private List checkEngine(CarrotClusteringEngine engine,
- int expectedNumClusters) throws IOException {
- return checkEngine(engine, expectedNumClusters, new
ModifiableSolrParams());
- }
-
- private List checkEngine(CarrotClusteringEngine engine,
- int expectedNumClusters, SolrParams clusteringParams)
throws IOException {
- // Get all documents to cluster
- RefCounted<SolrIndexSearcher> ref = h.getCore().getSearcher();
- MatchAllDocsQuery query = new MatchAllDocsQuery();
- DocList docList;
- try {
- SolrIndexSearcher searcher = ref.get();
- docList = searcher.getDocList(query, (Query) null, new
Sort(), 0,
- numberOfDocs);
- assertEquals("docList size", this.numberOfDocs,
docList.matches());
- } finally {
- ref.decref();
- }
-
- ModifiableSolrParams solrParams = new ModifiableSolrParams();
- solrParams.add(CarrotParams.PRODUCE_SUMMARY, "true");
- solrParams.add(clusteringParams);
-
- // Perform clustering
- LocalSolrQueryRequest req = new
LocalSolrQueryRequest(h.getCore(), solrParams);
- List results = (List)engine.cluster(query, docList, req);
- req.close();
- assertEquals("number of clusters", expectedNumClusters,
results.size());
- checkClusters(results, false);
- return results;
- }
-
- private void checkClusters(List results, int expectedDocCount,
- int expectedLabelCount, int expectedSubclusterCount) {
- for (int i = 0; i < results.size(); i++) {
- NamedList cluster = (NamedList) results.get(i);
- checkCluster(cluster, expectedDocCount,
expectedLabelCount,
- expectedSubclusterCount);
- }
- }
-
- private void checkClusters(List results, boolean hasSubclusters) {
- for (int i = 0; i < results.size(); i++) {
- checkCluster((NamedList)results.get(i), hasSubclusters );
- }
- }
-
- private void checkCluster(NamedList cluster, boolean hasSubclusters) {
- List docs = (List)cluster.get("docs");
- assertNotNull("docs is null and it shouldn't be", docs);
- for (int j = 0; j < docs.size(); j++) {
- String id = (String) docs.get(j);
- assertNotNull("id is null and it shouldn't be", id);
- }
-
- List labels = (List) cluster.get("labels");
- assertNotNull("labels is null but it shouldn't be", labels);
-
- if (hasSubclusters) {
- List subclusters = (List) cluster.get("clusters");
- assertNotNull("subclusters is null but it shouldn't
be", subclusters);
- }
- }
-
- private void checkCluster(NamedList cluster, int expectedDocCount,
- int expectedLabelCount, int expectedSubclusterCount) {
- checkCluster(cluster, expectedSubclusterCount > 0);
- assertEquals("number of docs in cluster", expectedDocCount,
- ((List) cluster.get("docs")).size());
- assertEquals("number of labels in cluster", expectedLabelCount,
- ((List) cluster.get("labels")).size());
-
- if (expectedSubclusterCount > 0) {
- List subclusters = (List) cluster.get("clusters");
- assertEquals("numClusters", expectedSubclusterCount,
subclusters.size());
- assertEquals("number of subclusters in cluster",
- expectedSubclusterCount,
subclusters.size());
- }
- }
+ public void testCarrotLingo() throws Exception {
+ checkEngine(getClusteringEngine("default"), 9);
+ }
+
+ public void testCarrotStc() throws Exception {
+ checkEngine(getClusteringEngine("stc"), 2);
+ }
+
+ public void testWithoutSubclusters() throws Exception {
+ checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs),
+ 1, 1, 0);
+ }
+
+ public void testWithSubclusters() throws Exception {
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ params.set(CarrotParams.OUTPUT_SUB_CLUSTERS, true);
+ checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs,
+ params), 1, 1, 2);
+ }
+
+ public void testNumDescriptions() throws Exception {
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ params.set(AttributeUtils.getKey(MockClusteringAlgorithm.class, "labels"),
5);
+ params.set(CarrotParams.NUM_DESCRIPTIONS, 3);
+ checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs,
+ params), 1, 3, 0);
+ }
+
+ public void testCarrotAttributePassing() throws Exception {
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ params.set(AttributeUtils.getKey(MockClusteringAlgorithm.class, "depth"),
1);
+ params.set(AttributeUtils.getKey(MockClusteringAlgorithm.class, "labels"),
3);
+ checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs,
+ params), 1, 3, 0);
+ }
+
+ private CarrotClusteringEngine getClusteringEngine(String engineName) {
+ ClusteringComponent comp = (ClusteringComponent) h.getCore()
+ .getSearchComponent("clustering");
+ assertNotNull("clustering component should not be null", comp);
+ CarrotClusteringEngine engine = (CarrotClusteringEngine) comp
+ .getSearchClusteringEngines().get(engineName);
+ assertNotNull("clustering engine for name: " + engineName
+ + " should not be null", engine);
+ return engine;
+ }
+
+ private List checkEngine(CarrotClusteringEngine engine,
+ int expectedNumClusters) throws IOException {
+ return checkEngine(engine, expectedNumClusters, new
ModifiableSolrParams());
+ }
+
+ private List checkEngine(CarrotClusteringEngine engine,
+ int expectedNumClusters, SolrParams
clusteringParams) throws IOException {
+ // Get all documents to cluster
+ RefCounted<SolrIndexSearcher> ref = h.getCore().getSearcher();
+ MatchAllDocsQuery query = new MatchAllDocsQuery();
+ DocList docList;
+ try {
+ SolrIndexSearcher searcher = ref.get();
+ docList = searcher.getDocList(query, (Query) null, new Sort(), 0,
+ numberOfDocs);
+ assertEquals("docList size", this.numberOfDocs, docList.matches());
+ } finally {
+ ref.decref();
+ }
+
+ ModifiableSolrParams solrParams = new ModifiableSolrParams();
+ solrParams.add(CarrotParams.PRODUCE_SUMMARY, "true");
+ solrParams.add(clusteringParams);
+
+ // Perform clustering
+ LocalSolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(),
solrParams);
+ List results = (List) engine.cluster(query, docList, req);
+ req.close();
+ assertEquals("number of clusters", expectedNumClusters, results.size());
+ checkClusters(results, false);
+ return results;
+ }
+
+ private void checkClusters(List results, int expectedDocCount,
+ int expectedLabelCount, int
expectedSubclusterCount) {
+ for (int i = 0; i < results.size(); i++) {
+ NamedList cluster = (NamedList) results.get(i);
+ checkCluster(cluster, expectedDocCount, expectedLabelCount,
+ expectedSubclusterCount);
+ }
+ }
+
+ private void checkClusters(List results, boolean hasSubclusters) {
+ for (int i = 0; i < results.size(); i++) {
+ checkCluster((NamedList) results.get(i), hasSubclusters);
+ }
+ }
+
+ private void checkCluster(NamedList cluster, boolean hasSubclusters) {
+ List docs = (List) cluster.get("docs");
+ assertNotNull("docs is null and it shouldn't be", docs);
+ for (int j = 0; j < docs.size(); j++) {
+ String id = (String) docs.get(j);
+ assertNotNull("id is null and it shouldn't be", id);
+ }
+
+ List labels = (List) cluster.get("labels");
+ assertNotNull("labels is null but it shouldn't be", labels);
+
+ if (hasSubclusters) {
+ List subclusters = (List) cluster.get("clusters");
+ assertNotNull("subclusters is null but it shouldn't be", subclusters);
+ }
+ }
+
+ private void checkCluster(NamedList cluster, int expectedDocCount,
+ int expectedLabelCount, int
expectedSubclusterCount) {
+ checkCluster(cluster, expectedSubclusterCount > 0);
+ assertEquals("number of docs in cluster", expectedDocCount,
+ ((List) cluster.get("docs")).size());
+ assertEquals("number of labels in cluster", expectedLabelCount,
+ ((List) cluster.get("labels")).size());
+
+ if (expectedSubclusterCount > 0) {
+ List subclusters = (List) cluster.get("clusters");
+ assertEquals("numClusters", expectedSubclusterCount, subclusters.size());
+ assertEquals("number of subclusters in cluster",
+ expectedSubclusterCount, subclusters.size());
+ }
+ }
}
Modified:
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java?rev=790599&r1=790598&r2=790599&view=diff
==============================================================================
---
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java
(original)
+++
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java
Thu Jul 2 14:08:37 2009
@@ -1,69 +1,68 @@
package org.apache.solr.handler.clustering.carrot2;
-import java.util.List;
-
+import com.google.common.collect.Lists;
import org.carrot2.core.*;
import org.carrot2.core.attribute.AttributeNames;
import org.carrot2.core.attribute.Processing;
import org.carrot2.util.attribute.*;
import org.carrot2.util.attribute.constraint.IntRange;
-import com.google.common.collect.Lists;
+import java.util.List;
@Bindable(prefix = "MockClusteringAlgorithm")
public class MockClusteringAlgorithm extends ProcessingComponentBase implements
- IClusteringAlgorithm {
- @Input
- @Processing
- @Attribute(key = AttributeNames.DOCUMENTS)
- private List<Document> documents;
-
- @Output
- @Processing
- @Attribute(key = AttributeNames.CLUSTERS)
- private List<Cluster> clusters;
-
- @Input
- @Processing
- @Attribute
- @IntRange(min = 1, max = 5)
- private int depth = 2;
-
- @Input
- @Processing
- @Attribute
- @IntRange(min = 1, max = 5)
- private int labels = 1;
-
- @Override
- public void process() throws ProcessingException {
- clusters = Lists.newArrayList();
- if (documents == null) {
- return;
- }
-
- int documentIndex = 1;
- for (Document document : documents) {
- StringBuilder label = new StringBuilder("Cluster " +
documentIndex);
- Cluster cluster = createCluster(label.toString(),
document);
- clusters.add(cluster);
- for (int i = 1; i <= depth; i++) {
- label.append(".");
- label.append(i);
- Cluster newCluster =
createCluster(label.toString(), document);
-
cluster.addSubclusters(createCluster(label.toString(), document), newCluster);
- cluster = newCluster;
- }
- documentIndex++;
- }
- }
-
- private Cluster createCluster(String labelBase, Document... documents) {
- Cluster cluster = new Cluster();
- for (int i = 0; i < labels; i++) {
- cluster.addPhrases(labelBase + "#" + (i + 1));
- }
- cluster.addDocuments(documents);
- return cluster;
- }
+ IClusteringAlgorithm {
+ @Input
+ @Processing
+ @Attribute(key = AttributeNames.DOCUMENTS)
+ private List<Document> documents;
+
+ @Output
+ @Processing
+ @Attribute(key = AttributeNames.CLUSTERS)
+ private List<Cluster> clusters;
+
+ @Input
+ @Processing
+ @Attribute
+ @IntRange(min = 1, max = 5)
+ private int depth = 2;
+
+ @Input
+ @Processing
+ @Attribute
+ @IntRange(min = 1, max = 5)
+ private int labels = 1;
+
+ @Override
+ public void process() throws ProcessingException {
+ clusters = Lists.newArrayList();
+ if (documents == null) {
+ return;
+ }
+
+ int documentIndex = 1;
+ for (Document document : documents) {
+ StringBuilder label = new StringBuilder("Cluster " + documentIndex);
+ Cluster cluster = createCluster(label.toString(), document);
+ clusters.add(cluster);
+ for (int i = 1; i <= depth; i++) {
+ label.append(".");
+ label.append(i);
+ Cluster newCluster = createCluster(label.toString(), document);
+ cluster.addSubclusters(createCluster(label.toString(), document),
newCluster);
+ cluster = newCluster;
+ }
+ documentIndex++;
+ }
+ }
+
+ private Cluster createCluster(String labelBase, Document... documents) {
+ Cluster cluster = new Cluster();
+ for (int i = 0; i < labels; i++) {
+ cluster.addPhrases(labelBase + "#" + (i + 1));
+ }
+ cluster.addDocuments(documents);
+ return cluster;
+ }
}