svn commit: r790599 - in /lucene/solr/trunk/contrib/clustering/src: main/java/org/apache/solr/handler/clustering/carrot2/ test/java/org/apache/solr/handler/clustering/ test/java/org/apache/solr/handler/clustering/carrot2/

yonik Thu, 02 Jul 2009 07:09:12 -0700

Author: yonik
Date: Thu Jul  2 14:08:37 2009
New Revision: 790599

URL: http://svn.apache.org/viewvc?rev=790599&view=rev
Log:
reformat to remove tabs


Modified:
    
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
    
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
    
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/AbstractClusteringTest.java
    
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/ClusteringComponentTest.java
    
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java
    
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
    
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java

Modified: 
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java?rev=790599&r1=790598&r2=790599&view=diff
==============================================================================
--- 
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
 (original)
+++ 
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
 Thu Jul  2 14:08:37 2009
@@ -45,204 +45,206 @@
 
 /**
  * Search results clustering engine based on Carrot2 clustering algorithms.
- *
+ * <p/>
  * Output from this class is subject to change.
- * 
+ *
  * @link http://project.carrot2.org
  */
 @SuppressWarnings("unchecked")
 public class CarrotClusteringEngine extends SearchClusteringEngine {
-       private transient static Logger log = LoggerFactory
-                       .getLogger(CarrotClusteringEngine.class);
+  private transient static Logger log = LoggerFactory
+          .getLogger(CarrotClusteringEngine.class);
 
-       /** Carrot2 controller that manages instances of clustering algorithms 
*/
-       private CachingController controller = new CachingController();
-       private Class<? extends IClusteringAlgorithm> clusteringAlgorithmClass;
-       
-       private String idFieldName;
-
-       public Object cluster(Query query, DocList docList, SolrQueryRequest 
sreq) {
-               try {
-                       // Prepare attributes for Carrot2 clustering call
-                       Map<String, Object> attributes = new HashMap<String, 
Object>();
-                       List<Document> documents = getDocuments(docList, query, 
sreq);
-                       attributes.put(AttributeNames.DOCUMENTS, documents);
-                       attributes.put(AttributeNames.QUERY, query.toString());
-                       
-                       // Pass extra overriding attributes from the request, 
if any
-                       extractCarrotAttributes(sreq.getParams(), attributes);
-
-                       // Perform clustering and convert to named list
-                       return 
clustersToNamedList(controller.process(attributes,
-                                       
clusteringAlgorithmClass).getClusters(), sreq.getParams());
-               } catch (Exception e) {
-                       log.error("Carrot2 clustering failed", e);
-                       throw new RuntimeException(e);
-               }
-       }
-
-       @Override
-       public String init(NamedList config, final SolrCore core) {
-               String result = super.init(config, core);
-               SolrParams initParams = SolrParams.toSolrParams(config);
-               
-               // Initialize Carrot2 controller. Pass initialization 
attributes, if any. 
-               HashMap<String, Object> initAttributes = new HashMap<String, 
Object>();
-               extractCarrotAttributes(initParams, initAttributes);
-               this.controller.init(initAttributes);
-               
-               this.idFieldName = 
core.getSchema().getUniqueKeyField().getName();
-
-               // Make sure the requested Carrot2 clustering algorithm class 
is available 
-               String carrotAlgorithmClassName = 
initParams.get(CarrotParams.ALGORITHM);
-               try {
-                       Class<?> algorithmClass = 
Thread.currentThread().getContextClassLoader()
-                                       .loadClass(carrotAlgorithmClassName);
-                       if 
(!IClusteringAlgorithm.class.isAssignableFrom(algorithmClass)) {
-                               throw new IllegalArgumentException("Class 
provided as "
-                                               + CarrotParams.ALGORITHM + " 
must implement "
-                                               + 
IClusteringAlgorithm.class.getName());
-                       }
-                       this.clusteringAlgorithmClass = (Class<? extends 
IClusteringAlgorithm>) algorithmClass;
-               } catch (ClassNotFoundException e) {
-                       throw new RuntimeException(
-                                       "Failed to load Carrot clustering 
algorithm class", e);
-               }
-
-               return result;
-       }
-
-       /**
-        * Prepares Carrot2 documents for clustering.
-        */
-       private List<Document> getDocuments(DocList docList,
-                       Query query, final SolrQueryRequest sreq) throws 
IOException {
-               SolrHighlighter highligher = null;
-                SolrParams solrParams = sreq.getParams();
-                SolrCore core = sreq.getCore();
-
-               // Names of fields to deliver content for clustering
-               String urlField = solrParams.get(CarrotParams.URL_FIELD_NAME, 
"url");
-               String titleField = 
solrParams.get(CarrotParams.TITLE_FIELD_NAME, "title");
-               String snippetField = 
solrParams.get(CarrotParams.SNIPPET_FIELD_NAME,
-                               titleField);
-               if (StringUtils.isBlank(snippetField)) {
-                       throw new 
SolrException(SolrException.ErrorCode.SERVER_ERROR, 
CarrotParams.SNIPPET_FIELD_NAME
-                                       + " must not be blank.");
-               }
-               Set<String> fieldsToLoad = Sets.newHashSet(urlField, titleField,
-                               snippetField, idFieldName);
-
-               // Get the documents
-               DocIterator docsIter = docList.iterator();
-               boolean produceSummary = 
solrParams.getBool(CarrotParams.PRODUCE_SUMMARY,
-                               false);
-
-               SolrQueryRequest req = null;
-               String[] snippetFieldAry = null;
-               if (produceSummary == true) {
-                       highligher = core.getHighlighter();
-                       Map args = new HashMap();
-                       snippetFieldAry = new String[] { snippetField };
-                       args.put(HighlightParams.FIELDS, snippetFieldAry);
-                       args.put(HighlightParams.HIGHLIGHT, "true");
-                       req = new LocalSolrQueryRequest(core, query.toString(), 
"", 0, 1, args) {
-                          @Override
-                          public SolrIndexSearcher getSearcher() {
-                            return sreq.getSearcher();
-                          }
-                        };
-               }
-
-               SolrIndexSearcher searcher = sreq.getSearcher();
-               List<Document> result = new ArrayList<Document>(docList.size());
-                       FieldSelector fieldSelector = new 
SetBasedFieldSelector(fieldsToLoad,
-                                       Collections.emptySet());
-                       float[] scores = { 1.0f };
-                       int[] docsHolder = new int[1];
-                       Query theQuery = query;
-
-                       while (docsIter.hasNext()) {
-                               Integer id = docsIter.next();
-                               org.apache.lucene.document.Document doc = 
searcher.doc(id,
-                                               fieldSelector);
-                               String snippet = getValue(doc, snippetField);
-                               if (produceSummary == true) {
-                                       docsHolder[0] = id.intValue();
-                                       DocList docAsList = new DocSlice(0, 1, 
docsHolder, scores, 1, 1.0f);
-                                       highligher.doHighlighting(docAsList, 
theQuery, req, snippetFieldAry);
-                               }
-                               Document carrotDocument = new 
Document(getValue(doc, titleField),
-                                               snippet, doc.get(urlField));
-                               carrotDocument.addField("solrId", 
doc.get(idFieldName));
-                               result.add(carrotDocument);
-                       }
-
-               return result;
-       }
-
-       protected String getValue(org.apache.lucene.document.Document doc,
-                       String field) {
-               StringBuilder result = new StringBuilder();
-               String[] vals = doc.getValues(field);
-               for (int i = 0; i < vals.length; i++) {
-                       // Join multiple values with a period so that Carrot2 
does not pick up
-                       // phrases that cross field value boundaries (in most 
cases it would
-                       // create useless phrases).
-                       result.append(vals[i]).append(" . ");
-               }
-               return result.toString().trim();
-       }
-
-       private List clustersToNamedList(List<Cluster> carrotClusters,
-                       SolrParams solrParams) {
-          List result = new ArrayList();
-               clustersToNamedList(carrotClusters, result, solrParams.getBool(
-                               CarrotParams.OUTPUT_SUB_CLUSTERS, false), 
solrParams.getInt(
-                               CarrotParams.NUM_DESCRIPTIONS, 
Integer.MAX_VALUE));
-               return result;
-       }
-
-       private void clustersToNamedList(List<Cluster> outputClusters,
-                       List parent, boolean outputSubClusters, int maxLabels) {
-               for (Cluster outCluster : outputClusters) {
-                       NamedList cluster = new SimpleOrderedMap();
-                       parent.add(cluster);
-
-                       List<String> labels = outCluster.getPhrases();
-                  if (labels.size() > maxLabels)
-                    labels = labels.subList(0,maxLabels);
-                       cluster.add("labels", labels);
-
-                       List<Document> docs = outCluster.getDocuments();
-                       List docList = new ArrayList();
-                       cluster.add("docs", docList);
-                       for (Document doc : docs) {
-                               docList.add(doc.getField("solrId"));
-                       }
-
-                       if (outputSubClusters) {
-                               List subclusters = new ArrayList();
-                               cluster.add("clusters",subclusters);
-                               
clustersToNamedList(outCluster.getSubclusters(), subclusters,
-                                               outputSubClusters, maxLabels);
-                       }
-               }
-       }
-
-       /**
-        * Extracts parameters that can possibly match some attributes of 
Carrot2 algorithms.
-        */
-       private void extractCarrotAttributes(SolrParams solrParams,
-                       Map<String, Object> attributes) {
-               // Extract all non-predefined parameters. This way, we'll be 
able to set all 
-               // parameters of Carrot2 algorithms without defining their 
names as constants.
-               for (Iterator<String> paramNames = 
solrParams.getParameterNamesIterator(); paramNames
-                               .hasNext();) {
-                       String paramName = paramNames.next();
-                       if 
(!CarrotParams.CARROT_PARAM_NAMES.contains(paramName)) {
-                               attributes.put(paramName, 
solrParams.get(paramName));
-                       }
-               }
-       }
+  /**
+   * Carrot2 controller that manages instances of clustering algorithms
+   */
+  private CachingController controller = new CachingController();
+  private Class<? extends IClusteringAlgorithm> clusteringAlgorithmClass;
+
+  private String idFieldName;
+
+  public Object cluster(Query query, DocList docList, SolrQueryRequest sreq) {
+    try {
+      // Prepare attributes for Carrot2 clustering call
+      Map<String, Object> attributes = new HashMap<String, Object>();
+      List<Document> documents = getDocuments(docList, query, sreq);
+      attributes.put(AttributeNames.DOCUMENTS, documents);
+      attributes.put(AttributeNames.QUERY, query.toString());
+
+      // Pass extra overriding attributes from the request, if any
+      extractCarrotAttributes(sreq.getParams(), attributes);
+
+      // Perform clustering and convert to named list
+      return clustersToNamedList(controller.process(attributes,
+              clusteringAlgorithmClass).getClusters(), sreq.getParams());
+    } catch (Exception e) {
+      log.error("Carrot2 clustering failed", e);
+      throw new RuntimeException(e);
+    }
+  }
+
+  @Override
+  public String init(NamedList config, final SolrCore core) {
+    String result = super.init(config, core);
+    SolrParams initParams = SolrParams.toSolrParams(config);
+
+    // Initialize Carrot2 controller. Pass initialization attributes, if any.
+    HashMap<String, Object> initAttributes = new HashMap<String, Object>();
+    extractCarrotAttributes(initParams, initAttributes);
+    this.controller.init(initAttributes);
+
+    this.idFieldName = core.getSchema().getUniqueKeyField().getName();
+
+    // Make sure the requested Carrot2 clustering algorithm class is available
+    String carrotAlgorithmClassName = initParams.get(CarrotParams.ALGORITHM);
+    try {
+      Class<?> algorithmClass = Thread.currentThread().getContextClassLoader()
+              .loadClass(carrotAlgorithmClassName);
+      if (!IClusteringAlgorithm.class.isAssignableFrom(algorithmClass)) {
+        throw new IllegalArgumentException("Class provided as "
+                + CarrotParams.ALGORITHM + " must implement "
+                + IClusteringAlgorithm.class.getName());
+      }
+      this.clusteringAlgorithmClass = (Class<? extends IClusteringAlgorithm>) 
algorithmClass;
+    } catch (ClassNotFoundException e) {
+      throw new RuntimeException(
+              "Failed to load Carrot clustering algorithm class", e);
+    }
+
+    return result;
+  }
+
+  /**
+   * Prepares Carrot2 documents for clustering.
+   */
+  private List<Document> getDocuments(DocList docList,
+                                      Query query, final SolrQueryRequest 
sreq) throws IOException {
+    SolrHighlighter highligher = null;
+    SolrParams solrParams = sreq.getParams();
+    SolrCore core = sreq.getCore();
+
+    // Names of fields to deliver content for clustering
+    String urlField = solrParams.get(CarrotParams.URL_FIELD_NAME, "url");
+    String titleField = solrParams.get(CarrotParams.TITLE_FIELD_NAME, "title");
+    String snippetField = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME,
+            titleField);
+    if (StringUtils.isBlank(snippetField)) {
+      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, 
CarrotParams.SNIPPET_FIELD_NAME
+              + " must not be blank.");
+    }
+    Set<String> fieldsToLoad = Sets.newHashSet(urlField, titleField,
+            snippetField, idFieldName);
+
+    // Get the documents
+    DocIterator docsIter = docList.iterator();
+    boolean produceSummary = solrParams.getBool(CarrotParams.PRODUCE_SUMMARY,
+            false);
+
+    SolrQueryRequest req = null;
+    String[] snippetFieldAry = null;
+    if (produceSummary == true) {
+      highligher = core.getHighlighter();
+      Map args = new HashMap();
+      snippetFieldAry = new String[]{snippetField};
+      args.put(HighlightParams.FIELDS, snippetFieldAry);
+      args.put(HighlightParams.HIGHLIGHT, "true");
+      req = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1, args) {
+        @Override
+        public SolrIndexSearcher getSearcher() {
+          return sreq.getSearcher();
+        }
+      };
+    }
+
+    SolrIndexSearcher searcher = sreq.getSearcher();
+    List<Document> result = new ArrayList<Document>(docList.size());
+    FieldSelector fieldSelector = new SetBasedFieldSelector(fieldsToLoad,
+            Collections.emptySet());
+    float[] scores = {1.0f};
+    int[] docsHolder = new int[1];
+    Query theQuery = query;
+
+    while (docsIter.hasNext()) {
+      Integer id = docsIter.next();
+      org.apache.lucene.document.Document doc = searcher.doc(id,
+              fieldSelector);
+      String snippet = getValue(doc, snippetField);
+      if (produceSummary == true) {
+        docsHolder[0] = id.intValue();
+        DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
+        highligher.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
+      }
+      Document carrotDocument = new Document(getValue(doc, titleField),
+              snippet, doc.get(urlField));
+      carrotDocument.addField("solrId", doc.get(idFieldName));
+      result.add(carrotDocument);
+    }
+
+    return result;
+  }
+
+  protected String getValue(org.apache.lucene.document.Document doc,
+                            String field) {
+    StringBuilder result = new StringBuilder();
+    String[] vals = doc.getValues(field);
+    for (int i = 0; i < vals.length; i++) {
+      // Join multiple values with a period so that Carrot2 does not pick up
+      // phrases that cross field value boundaries (in most cases it would
+      // create useless phrases).
+      result.append(vals[i]).append(" . ");
+    }
+    return result.toString().trim();
+  }
+
+  private List clustersToNamedList(List<Cluster> carrotClusters,
+                                   SolrParams solrParams) {
+    List result = new ArrayList();
+    clustersToNamedList(carrotClusters, result, solrParams.getBool(
+            CarrotParams.OUTPUT_SUB_CLUSTERS, false), solrParams.getInt(
+            CarrotParams.NUM_DESCRIPTIONS, Integer.MAX_VALUE));
+    return result;
+  }
+
+  private void clustersToNamedList(List<Cluster> outputClusters,
+                                   List parent, boolean outputSubClusters, int 
maxLabels) {
+    for (Cluster outCluster : outputClusters) {
+      NamedList cluster = new SimpleOrderedMap();
+      parent.add(cluster);
+
+      List<String> labels = outCluster.getPhrases();
+      if (labels.size() > maxLabels)
+        labels = labels.subList(0, maxLabels);
+      cluster.add("labels", labels);
+
+      List<Document> docs = outCluster.getDocuments();
+      List docList = new ArrayList();
+      cluster.add("docs", docList);
+      for (Document doc : docs) {
+        docList.add(doc.getField("solrId"));
+      }
+
+      if (outputSubClusters) {
+        List subclusters = new ArrayList();
+        cluster.add("clusters", subclusters);
+        clustersToNamedList(outCluster.getSubclusters(), subclusters,
+                outputSubClusters, maxLabels);
+      }
+    }
+  }
+
+  /**
+   * Extracts parameters that can possibly match some attributes of Carrot2 
algorithms.
+   */
+  private void extractCarrotAttributes(SolrParams solrParams,
+                                       Map<String, Object> attributes) {
+    // Extract all non-predefined parameters. This way, we'll be able to set 
all
+    // parameters of Carrot2 algorithms without defining their names as 
constants.
+    for (Iterator<String> paramNames = solrParams.getParameterNamesIterator(); 
paramNames
+            .hasNext();) {
+      String paramName = paramNames.next();
+      if (!CarrotParams.CARROT_PARAM_NAMES.contains(paramName)) {
+        attributes.put(paramName, solrParams.get(paramName));
+      }
+    }
+  }
 }

Modified: 
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java?rev=790599&r1=790598&r2=790599&view=diff
==============================================================================
--- 
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
 (original)
+++ 
lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
 Thu Jul  2 14:08:37 2009
@@ -21,22 +21,20 @@
  * limitations under the License.
  */
 
-/**
- *
- */
+
 public interface CarrotParams {
 
-       String CARROT_PREFIX = "carrot.";
+  String CARROT_PREFIX = "carrot.";
 
-       String ALGORITHM = CARROT_PREFIX + "algorithm";
-       String TITLE_FIELD_NAME = CARROT_PREFIX + "title";
-       String URL_FIELD_NAME = CARROT_PREFIX + "url";
-       String SNIPPET_FIELD_NAME = CARROT_PREFIX + "snippet";
-       String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary";
-       String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
-       String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
+  String ALGORITHM = CARROT_PREFIX + "algorithm";
+  String TITLE_FIELD_NAME = CARROT_PREFIX + "title";
+  String URL_FIELD_NAME = CARROT_PREFIX + "url";
+  String SNIPPET_FIELD_NAME = CARROT_PREFIX + "snippet";
+  String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary";
+  String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
+  String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
 
-       public static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
-                       ALGORITHM, TITLE_FIELD_NAME, URL_FIELD_NAME, 
SNIPPET_FIELD_NAME,
-                       PRODUCE_SUMMARY, NUM_DESCRIPTIONS, OUTPUT_SUB_CLUSTERS);
+  public static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
+          ALGORITHM, TITLE_FIELD_NAME, URL_FIELD_NAME, SNIPPET_FIELD_NAME,
+          PRODUCE_SUMMARY, NUM_DESCRIPTIONS, OUTPUT_SUB_CLUSTERS);
 }

Modified: 
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/AbstractClusteringTest.java
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/AbstractClusteringTest.java?rev=790599&r1=790598&r2=790599&view=diff
==============================================================================
--- 
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/AbstractClusteringTest.java
 (original)
+++ 
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/AbstractClusteringTest.java
 Thu Jul  2 14:08:37 2009
@@ -23,21 +23,21 @@
  *
  */
 public class AbstractClusteringTest extends AbstractSolrTestCase {
-       protected int numberOfDocs = 0;
-       
+  protected int numberOfDocs = 0;
+
   @Override
   public void setUp() throws Exception {
     super.setUp();
 
     numberOfDocs = 0;
     for (String[] doc : DOCUMENTS) {
-       assertU("add failed", adoc("id", Integer.toString(numberOfDocs), "url", 
doc[0], "title", doc[1], "snippet", doc[2]));
-       numberOfDocs++;
-               }
+      assertU("add failed", adoc("id", Integer.toString(numberOfDocs), "url", 
doc[0], "title", doc[1], "snippet", doc[2]));
+      numberOfDocs++;
+    }
     assertU("commit", commit());
   }
 
-       public String getSchemaFile() {
+  public String getSchemaFile() {
     return "schema.xml";
   }
 
@@ -45,154 +45,154 @@
     return "solrconfig.xml";
   }
 
-  final String [][] DOCUMENTS = new String[][] {
-       { "http://en.wikipedia.org/wiki/Data_mining";,
-               "Data Mining - Wikipedia",
-               "Article about knowledge-discovery in databases (KDD), the 
practice of automatically searching large stores of data for patterns." },
+  final String[][] DOCUMENTS = new String[][]{
+          {"http://en.wikipedia.org/wiki/Data_mining";,
+                  "Data Mining - Wikipedia",
+                  "Article about knowledge-discovery in databases (KDD), the 
practice of automatically searching large stores of data for patterns."},
 
 
-       { "http://en.wikipedia.org/wiki/Datamining";,
-               "Data mining - Wikipedia, the free encyclopedia",
-               "Data mining is the entire process of applying computer-based 
methodology, ... Moreover, some data-mining systems such as neural networks are 
inherently geared ..." },
+          {"http://en.wikipedia.org/wiki/Datamining";,
+                  "Data mining - Wikipedia, the free encyclopedia",
+                  "Data mining is the entire process of applying 
computer-based methodology, ... Moreover, some data-mining systems such as 
neural networks are inherently geared ..."},
 
 
-       { "http://www.statsoft.com/textbook/stdatmin.html";,
-               "Electronic Statistics Textbook: Data Mining Techniques",
-               "Outlines the crucial concepts in data mining, defines the data 
warehousing process, and offers examples of computational and graphical 
exploratory data analysis techniques." },
+          {"http://www.statsoft.com/textbook/stdatmin.html";,
+                  "Electronic Statistics Textbook: Data Mining Techniques",
+                  "Outlines the crucial concepts in data mining, defines the 
data warehousing process, and offers examples of computational and graphical 
exploratory data analysis techniques."},
 
 
-       { "http://www.thearling.com/text/dmwhite/dmwhite.htm";,
-               "An Introduction to Data Mining",
-               "Data mining, the extraction of hidden predictive information 
from large ... Data mining tools predict future trends and behaviors, allowing 
businesses to ..." },
+          {"http://www.thearling.com/text/dmwhite/dmwhite.htm";,
+                  "An Introduction to Data Mining",
+                  "Data mining, the extraction of hidden predictive 
information from large ... Data mining tools predict future trends and 
behaviors, allowing businesses to ..."},
 
 
-       { 
"http://www.anderson.ucla.edu/faculty/jason.frand/teacher/technologies/palace/datamining.htm";,
-               "Data Mining: What is Data Mining?",
-               "Outlines what knowledge discovery, the process of analyzing 
data from different perspectives and summarizing it into useful information, 
can do and how it works." },
+          
{"http://www.anderson.ucla.edu/faculty/jason.frand/teacher/technologies/palace/datamining.htm";,
+                  "Data Mining: What is Data Mining?",
+                  "Outlines what knowledge discovery, the process of analyzing 
data from different perspectives and summarizing it into useful information, 
can do and how it works."},
 
 
-       { "http://www.spss.com/datamine";,
-               "Data Mining Software, Data Mining Applications and Data Mining 
Solutions",
-               "The patterns uncovered using data mining help organizations 
make better and ... data mining customer ... Data mining applications, on the 
other hand, embed ..." },
+          {"http://www.spss.com/datamine";,
+                  "Data Mining Software, Data Mining Applications and Data 
Mining Solutions",
+                  "The patterns uncovered using data mining help organizations 
make better and ... data mining customer ... Data mining applications, on the 
other hand, embed ..."},
 
 
-       { "http://www.kdnuggets.com/";,
-               "KD Nuggets",
-               "Newsletter on the data mining and knowledge industries, 
offering information on data mining, knowledge discovery, text mining, and web 
mining software, courses, jobs, publications, and meetings." },
+          {"http://www.kdnuggets.com/";,
+                  "KD Nuggets",
+                  "Newsletter on the data mining and knowledge industries, 
offering information on data mining, knowledge discovery, text mining, and web 
mining software, courses, jobs, publications, and meetings."},
 
 
-       { "http://www.answers.com/topic/data-mining";,
-               "data mining: Definition from Answers.com",
-               "data mining n. The automatic extraction of useful, often 
previously unknown information from large databases or data ... Data Mining For 
Investing ..." },
+          {"http://www.answers.com/topic/data-mining";,
+                  "data mining: Definition from Answers.com",
+                  "data mining n. The automatic extraction of useful, often 
previously unknown information from large databases or data ... Data Mining For 
Investing ..."},
 
 
-       { "http://www.statsoft.com/products/dataminer.htm";,
-               "STATISTICA Data Mining and Predictive Modeling Solutions",
-               "GRC site-wide menuing system research and development. ... 
Contact a Data Mining Solutions Consultant. News and Success Stories. Events 
..." },
+          {"http://www.statsoft.com/products/dataminer.htm";,
+                  "STATISTICA Data Mining and Predictive Modeling Solutions",
+                  "GRC site-wide menuing system research and development. ... 
Contact a Data Mining Solutions Consultant. News and Success Stories. Events 
..."},
 
 
-       { "http://datamining.typepad.com/";,
-               "Data Mining: Text Mining, Visualization and Social Media",
-               "Commentary on text mining, data mining, social media and data 
visualization. ... While mining Twitter data for business and marketing 
intelligence (trend/buzz ..." },
+          {"http://datamining.typepad.com/";,
+                  "Data Mining: Text Mining, Visualization and Social Media",
+                  "Commentary on text mining, data mining, social media and 
data visualization. ... While mining Twitter data for business and marketing 
intelligence (trend/buzz ..."},
 
 
-       { "http://www.twocrows.com/";,
-               "Two Crows Corporation",
-               "Dedicated to the development, marketing, sales and support of 
tools for knowledge discovery to make data mining accessible and easy to use." 
},
+          {"http://www.twocrows.com/";,
+                  "Two Crows Corporation",
+                  "Dedicated to the development, marketing, sales and support 
of tools for knowledge discovery to make data mining accessible and easy to 
use."},
 
 
-       { "http://www.thearling.com/";,
-               "Thearling.com",
-               "Kurt Thearling's site dedicated to sharing information about 
data mining, the automated extraction of hidden predictive information from 
databases, and other analytic technologies." },
+          {"http://www.thearling.com/";,
+                  "Thearling.com",
+                  "Kurt Thearling's site dedicated to sharing information 
about data mining, the automated extraction of hidden predictive information 
from databases, and other analytic technologies."},
 
 
-       { "http://www.ccsu.edu/datamining/";,
-               "CCSU - Data Mining",
-               "Offers degrees and certificates in data mining. Allows 
students to explore cutting-edge data mining techniques and applications: 
market basket analysis, decision trees, neural networks, machine learning, web 
mining, and data modeling." },
+          {"http://www.ccsu.edu/datamining/";,
+                  "CCSU - Data Mining",
+                  "Offers degrees and certificates in data mining. Allows 
students to explore cutting-edge data mining techniques and applications: 
market basket analysis, decision trees, neural networks, machine learning, web 
mining, and data modeling."},
 
 
-       { "http://www.oracle.com/technology/products/bi/odm";,
-               "Oracle Data Mining",
-               "Oracle Data Mining Product Center ... New Oracle Data Mining 
Powers New Social CRM Application (more information ... Mining High-Dimensional 
Data for ..." },
+          {"http://www.oracle.com/technology/products/bi/odm";,
+                  "Oracle Data Mining",
+                  "Oracle Data Mining Product Center ... New Oracle Data 
Mining Powers New Social CRM Application (more information ... Mining 
High-Dimensional Data for ..."},
 
 
-       { "http://databases.about.com/od/datamining/a/datamining.htm";,
-               "Data Mining: An Introduction",
-               "About.com article on how businesses are discovering new trends 
and patterns of behavior that previously went unnoticed through data mining, 
automated statistical analysis techniques." },
+          {"http://databases.about.com/od/datamining/a/datamining.htm";,
+                  "Data Mining: An Introduction",
+                  "About.com article on how businesses are discovering new 
trends and patterns of behavior that previously went unnoticed through data 
mining, automated statistical analysis techniques."},
 
 
-       { "http://www.dmoz.org/Computers/Software/Databases/Data_Mining/";,
-               "Open Directory - Computers: Software: Databases: Data Mining",
-               "Data Mining and Knowledge Discovery - A peer-reviewed journal 
publishing ... Data mining creates information assets that an organization can 
leverage to ..." },
+          {"http://www.dmoz.org/Computers/Software/Databases/Data_Mining/";,
+                  "Open Directory - Computers: Software: Databases: Data 
Mining",
+                  "Data Mining and Knowledge Discovery - A peer-reviewed 
journal publishing ... Data mining creates information assets that an 
organization can leverage to ..."},
 
 
-       { "http://www.cs.wisc.edu/dmi/";,
-               "DMI:Data Mining Institute",
-               "Data Mining Institute at UW-Madison ... The Data Mining 
Institute (DMI) was started on June 1, 1999 at the Computer ... of the Data 
Mining Group of Microsoft ..." },
+          {"http://www.cs.wisc.edu/dmi/";,
+                  "DMI:Data Mining Institute",
+                  "Data Mining Institute at UW-Madison ... The Data Mining 
Institute (DMI) was started on June 1, 1999 at the Computer ... of the Data 
Mining Group of Microsoft ..."},
 
 
-       { "http://www.the-data-mine.com/";,
-               "The Data Mine",
-               "Provides information about data mining also known as knowledge 
discovery in databases (KDD) or simply knowledge discovery. List software, 
events, organizations, and people working in data mining." },
+          {"http://www.the-data-mine.com/";,
+                  "The Data Mine",
+                  "Provides information about data mining also known as 
knowledge discovery in databases (KDD) or simply knowledge discovery. List 
software, events, organizations, and people working in data mining."},
 
 
-       { "http://www.statserv.com/datamining.html";,
-               "s...@tserv - About Data Mining",
-               "s...@tserv Data Mining page ... Data mining in molecular 
biology, by Alvis Brazma. Graham Williams page. Knowledge Discovery and Data 
Mining Resources, ..." },
+          {"http://www.statserv.com/datamining.html";,
+                  "s...@tserv - About Data Mining",
+                  "s...@tserv Data Mining page ... Data mining in molecular 
biology, by Alvis Brazma. Graham Williams page. Knowledge Discovery and Data 
Mining Resources, ..."},
 
 
-       { 
"http://ocw.mit.edu/OcwWeb/Sloan-School-of-Management/15-062Data-MiningSpring2003/CourseHome/index.htm";,
-               "MIT OpenCourseWare | Sloan School of Management | 15.062 Data 
Mining ...",
-               "Introduces students to a class of methods known as data mining 
that assists managers in recognizing patterns and making intelligent use of 
massive amounts of ..." },
+          
{"http://ocw.mit.edu/OcwWeb/Sloan-School-of-Management/15-062Data-MiningSpring2003/CourseHome/index.htm";,
+                  "MIT OpenCourseWare | Sloan School of Management | 15.062 
Data Mining ...",
+                  "Introduces students to a class of methods known as data 
mining that assists managers in recognizing patterns and making intelligent use 
of massive amounts of ..."},
 
 
-       { "http://www.pentaho.com/products/data_mining/";,
-               "Pentaho Commercial Open Source Business Intelligence: Data 
Mining",
-               "For example, data mining can warn you there's a high 
probability a specific ... Pentaho Data Mining is differentiated by its open, 
standards-compliant nature, ..." },
+          {"http://www.pentaho.com/products/data_mining/";,
+                  "Pentaho Commercial Open Source Business Intelligence: Data 
Mining",
+                  "For example, data mining can warn you there's a high 
probability a specific ... Pentaho Data Mining is differentiated by its open, 
standards-compliant nature, ..."},
 
 
-       { "http://www.investorhome.com/mining.htm";,
-               "Investor Home - Data Mining",
-               "Data Mining or Data Snooping is the practice of searching for 
relationships and ... Data mining involves searching through databases for 
correlations and patterns ..." },
+          {"http://www.investorhome.com/mining.htm";,
+                  "Investor Home - Data Mining",
+                  "Data Mining or Data Snooping is the practice of searching 
for relationships and ... Data mining involves searching through databases for 
correlations and patterns ..."},
 
 
-       { "http://www.datamining.com/";,
-               "Predictive Modeling and Predictive Analytics Solutions | 
Enterprise ...",
-               "Insightful Enterprise Miner - Enterprise data mining for 
predictive modeling and predictive analytics." },
+          {"http://www.datamining.com/";,
+                  "Predictive Modeling and Predictive Analytics Solutions | 
Enterprise ...",
+                  "Insightful Enterprise Miner - Enterprise data mining for 
predictive modeling and predictive analytics."},
 
 
-       { "http://www.sourcewatch.org/index.php?title=Data_mining";,
-               "Data mining - SourceWatch",
-               "These agencies reported 199 data mining projects, of which 68 
... Office, \"DATA MINING. ... powerful technology known as data mining -- and 
how, in the ..." },
+          {"http://www.sourcewatch.org/index.php?title=Data_mining";,
+                  "Data mining - SourceWatch",
+                  "These agencies reported 199 data mining projects, of which 
68 ... Office, \"DATA MINING. ... powerful technology known as data mining -- 
and how, in the ..."},
 
 
-       { "http://www.autonlab.org/tutorials/";,
-               "Statistical Data Mining Tutorials",
-               "Includes a set of tutorials on many aspects of statistical 
data mining, including the foundations of probability, the foundations of 
statistical data analysis, and most of the classic machine learning and data 
mining algorithms." },
+          {"http://www.autonlab.org/tutorials/";,
+                  "Statistical Data Mining Tutorials",
+                  "Includes a set of tutorials on many aspects of statistical 
data mining, including the foundations of probability, the foundations of 
statistical data analysis, and most of the classic machine learning and data 
mining algorithms."},
 
 
-       { "http://www.microstrategy.com/data-mining/index.asp";,
-               "Data Mining",
-               "With MicroStrategy, data mining scoring is fully integrated 
into mainstream ... The integration of data mining models from other 
applications is accomplished by ..." },
+          {"http://www.microstrategy.com/data-mining/index.asp";,
+                  "Data Mining",
+                  "With MicroStrategy, data mining scoring is fully integrated 
into mainstream ... The integration of data mining models from other 
applications is accomplished by ..."},
 
 
-       { "http://www.datamininglab.com/";,
-               "Elder Research",
-               "Provides consulting and short courses in data mining and 
pattern discovery patterns in data." },
+          {"http://www.datamininglab.com/";,
+                  "Elder Research",
+                  "Provides consulting and short courses in data mining and 
pattern discovery patterns in data."},
 
 
-       { "http://www.sqlserverdatamining.com/";,
-               "SQL Server Data Mining > Home",
-               "SQL Server Data Mining Portal ... Data Mining as an 
Application Platform (Whitepaper) Creating a Web Cross-sell Application with 
SQL Server 2005 Data Mining (Article) ..." },
+          {"http://www.sqlserverdatamining.com/";,
+                  "SQL Server Data Mining > Home",
+                  "SQL Server Data Mining Portal ... Data Mining as an 
Application Platform (Whitepaper) Creating a Web Cross-sell Application with 
SQL Server 2005 Data Mining (Article) ..."},
 
 
-       { "http://databases.about.com/cs/datamining/g/dmining.htm";,
-               "Data Mining",
-               "What is data mining? Find out here! ... Book Review: Data 
Mining and Statistical Analysis Using SQL. What is Data Mining, and What Does 
it Have to Do with ..." },
+          {"http://databases.about.com/cs/datamining/g/dmining.htm";,
+                  "Data Mining",
+                  "What is data mining? Find out here! ... Book Review: Data 
Mining and Statistical Analysis Using SQL. What is Data Mining, and What Does 
it Have to Do with ..."},
 
 
-       { "http://www.sas.com/technologies/analytics/datamining/index.html";,
-               "Data Mining Software and Text Mining | SAS",
-               "... raw data to smarter ... Data Mining is an iterative 
process of creating ... The knowledge gleaned from data and text mining can be 
used to fuel ..." }
+          {"http://www.sas.com/technologies/analytics/datamining/index.html";,
+                  "Data Mining Software and Text Mining | SAS",
+                  "... raw data to smarter ... Data Mining is an iterative 
process of creating ... The knowledge gleaned from data and text mining can be 
used to fuel ..."}
   };
 }

Modified: 
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/ClusteringComponentTest.java
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/ClusteringComponentTest.java?rev=790599&r1=790598&r2=790599&view=diff
==============================================================================
--- 
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/ClusteringComponentTest.java
 (original)
+++ 
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/ClusteringComponentTest.java
 Thu Jul  2 14:08:37 2009
@@ -16,18 +16,16 @@
  * limitations under the License.
  */
 
-import org.apache.solr.util.AbstractSolrTestCase;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.handler.component.SearchComponent;
-import org.apache.solr.handler.component.SpellCheckComponent;
-import org.apache.solr.handler.component.QueryComponent;
-import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
-import org.apache.solr.request.SolrRequestHandler;
-import org.apache.solr.request.SolrQueryResponse;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.component.QueryComponent;
+import org.apache.solr.handler.component.SearchComponent;
 import org.apache.solr.request.LocalSolrQueryRequest;
+import org.apache.solr.request.SolrQueryResponse;
+import org.apache.solr.request.SolrRequestHandler;
 
 
 /**
@@ -45,7 +43,7 @@
 
     params.add(ClusteringComponent.COMPONENT_NAME, "true");
     params.add(CommonParams.Q, "*:*");
-    
+
     params.add(ClusteringParams.USE_SEARCH_RESULTS, "true");
 
 
@@ -76,7 +74,7 @@
     //System.out.println("Clusters: " + clusters);
     assertTrue("clusters is null and it shouldn't be", clusters != null);
 
-    
+
   }
 
 }

Modified: 
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java?rev=790599&r1=790598&r2=790599&view=diff
==============================================================================
--- 
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java
 (original)
+++ 
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java
 Thu Jul  2 14:08:37 2009
@@ -1,7 +1,7 @@
 package org.apache.solr.handler.clustering;
 
-import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
 import org.apache.solr.search.DocSet;
 
 
@@ -9,7 +9,7 @@
  *
  *
  **/
-public class MockDocumentClusteringEngine extends DocumentClusteringEngine{
+public class MockDocumentClusteringEngine extends DocumentClusteringEngine {
   public NamedList cluster(DocSet docs, SolrParams solrParams) {
     NamedList result = new NamedList();
     return result;

Modified: 
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java?rev=790599&r1=790598&r2=790599&view=diff
==============================================================================
--- 
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
 (original)
+++ 
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
 Thu Jul  2 14:08:37 2009
@@ -17,151 +17,153 @@
  * limitations under the License.
  */
 
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.lucene.search.*;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Sort;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.handler.clustering.AbstractClusteringTest;
 import org.apache.solr.handler.clustering.ClusteringComponent;
+import org.apache.solr.request.LocalSolrQueryRequest;
 import org.apache.solr.search.DocList;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.util.RefCounted;
-import org.apache.solr.request.LocalSolrQueryRequest;
 import org.carrot2.util.attribute.AttributeUtils;
 
+import java.io.IOException;
+import java.util.List;
+
 /**
  *
  */
 @SuppressWarnings("unchecked")
 public class CarrotClusteringEngineTest extends AbstractClusteringTest {
-       public void testCarrotLingo() throws Exception {
-               checkEngine(getClusteringEngine("default"), 9);
-       }
-
-       public void testCarrotStc() throws Exception {
-               checkEngine(getClusteringEngine("stc"), 2);
-       }
-
-       public void testWithoutSubclusters() throws Exception {
-               checkClusters(checkEngine(getClusteringEngine("mock"), 
this.numberOfDocs),
-                               1, 1, 0);
-       }
-
-       public void testWithSubclusters() throws Exception {
-               ModifiableSolrParams params = new ModifiableSolrParams();
-               params.set(CarrotParams.OUTPUT_SUB_CLUSTERS, true);
-               checkClusters(checkEngine(getClusteringEngine("mock"), 
this.numberOfDocs,
-                               params), 1, 1, 2);
-       }
-       
-       public void testNumDescriptions() throws Exception {
-               ModifiableSolrParams params = new ModifiableSolrParams();
-               params.set(AttributeUtils.getKey(MockClusteringAlgorithm.class, 
"labels"), 5);
-               params.set(CarrotParams.NUM_DESCRIPTIONS, 3);
-               checkClusters(checkEngine(getClusteringEngine("mock"), 
this.numberOfDocs,
-                               params), 1, 3, 0);
-       }
-       
-       public void testCarrotAttributePassing() throws Exception {
-               ModifiableSolrParams params = new ModifiableSolrParams();
-               params.set(AttributeUtils.getKey(MockClusteringAlgorithm.class, 
"depth"), 1);
-               params.set(AttributeUtils.getKey(MockClusteringAlgorithm.class, 
"labels"), 3);
-               checkClusters(checkEngine(getClusteringEngine("mock"), 
this.numberOfDocs,
-                               params), 1, 3, 0);
-       }
-       
-       private CarrotClusteringEngine getClusteringEngine(String engineName) {
-               ClusteringComponent comp = (ClusteringComponent) h.getCore()
-                               .getSearchComponent("clustering");
-               assertNotNull("clustering component should not be null", comp);
-               CarrotClusteringEngine engine = (CarrotClusteringEngine) comp
-                               .getSearchClusteringEngines().get(engineName);
-               assertNotNull("clustering engine for name: " + engineName
-                               + " should not be null", engine);
-               return engine;
-       }
-
-       private List checkEngine(CarrotClusteringEngine engine,
-                       int expectedNumClusters) throws IOException {
-               return checkEngine(engine, expectedNumClusters, new 
ModifiableSolrParams());
-       }
-
-       private List checkEngine(CarrotClusteringEngine engine,
-                       int expectedNumClusters, SolrParams clusteringParams) 
throws IOException {
-               // Get all documents to cluster
-               RefCounted<SolrIndexSearcher> ref = h.getCore().getSearcher();
-               MatchAllDocsQuery query = new MatchAllDocsQuery();
-               DocList docList;
-               try {
-                       SolrIndexSearcher searcher = ref.get();
-                       docList = searcher.getDocList(query, (Query) null, new 
Sort(), 0,
-                                       numberOfDocs);
-                       assertEquals("docList size", this.numberOfDocs, 
docList.matches());
-               } finally {
-                       ref.decref();
-               }
-
-               ModifiableSolrParams solrParams = new ModifiableSolrParams();
-               solrParams.add(CarrotParams.PRODUCE_SUMMARY, "true");
-               solrParams.add(clusteringParams);
-
-               // Perform clustering
-                LocalSolrQueryRequest req = new 
LocalSolrQueryRequest(h.getCore(), solrParams);
-               List results = (List)engine.cluster(query, docList, req);
-                req.close();
-               assertEquals("number of clusters", expectedNumClusters, 
results.size());
-               checkClusters(results, false);
-               return results;
-       }
-
-       private void checkClusters(List results, int expectedDocCount,
-                       int expectedLabelCount, int expectedSubclusterCount) {
-               for (int i = 0; i < results.size(); i++) {
-                               NamedList cluster = (NamedList) results.get(i);
-                               checkCluster(cluster, expectedDocCount, 
expectedLabelCount,
-                                               expectedSubclusterCount);
-               }
-       }
-
-       private void checkClusters(List results, boolean hasSubclusters) {
-               for (int i = 0; i < results.size(); i++) {
-                  checkCluster((NamedList)results.get(i), hasSubclusters );
-               }
-       }
-
-       private void checkCluster(NamedList cluster, boolean hasSubclusters) {
-               List docs = (List)cluster.get("docs");
-               assertNotNull("docs is null and it shouldn't be", docs);
-               for (int j = 0; j < docs.size(); j++) {
-                       String id = (String) docs.get(j);
-                       assertNotNull("id is null and it shouldn't be", id);
-               }
-
-               List labels = (List) cluster.get("labels");
-               assertNotNull("labels is null but it shouldn't be", labels);
-
-               if (hasSubclusters) {
-                       List subclusters = (List) cluster.get("clusters");
-                       assertNotNull("subclusters is null but it shouldn't 
be", subclusters);
-               }
-       }
-
-       private void checkCluster(NamedList cluster, int expectedDocCount,
-                       int expectedLabelCount, int expectedSubclusterCount) {
-               checkCluster(cluster, expectedSubclusterCount > 0);
-               assertEquals("number of docs in cluster", expectedDocCount,
-                               ((List) cluster.get("docs")).size());
-               assertEquals("number of labels in cluster", expectedLabelCount,
-                               ((List) cluster.get("labels")).size());
-
-               if (expectedSubclusterCount > 0) {
-                       List subclusters = (List) cluster.get("clusters");
-                       assertEquals("numClusters", expectedSubclusterCount, 
subclusters.size());
-                       assertEquals("number of subclusters in cluster",
-                                       expectedSubclusterCount, 
subclusters.size());
-               }
-       }
+  public void testCarrotLingo() throws Exception {
+    checkEngine(getClusteringEngine("default"), 9);
+  }
+
+  public void testCarrotStc() throws Exception {
+    checkEngine(getClusteringEngine("stc"), 2);
+  }
+
+  public void testWithoutSubclusters() throws Exception {
+    checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs),
+            1, 1, 0);
+  }
+
+  public void testWithSubclusters() throws Exception {
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    params.set(CarrotParams.OUTPUT_SUB_CLUSTERS, true);
+    checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs,
+            params), 1, 1, 2);
+  }
+
+  public void testNumDescriptions() throws Exception {
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    params.set(AttributeUtils.getKey(MockClusteringAlgorithm.class, "labels"), 
5);
+    params.set(CarrotParams.NUM_DESCRIPTIONS, 3);
+    checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs,
+            params), 1, 3, 0);
+  }
+
+  public void testCarrotAttributePassing() throws Exception {
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    params.set(AttributeUtils.getKey(MockClusteringAlgorithm.class, "depth"), 
1);
+    params.set(AttributeUtils.getKey(MockClusteringAlgorithm.class, "labels"), 
3);
+    checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs,
+            params), 1, 3, 0);
+  }
+
+  private CarrotClusteringEngine getClusteringEngine(String engineName) {
+    ClusteringComponent comp = (ClusteringComponent) h.getCore()
+            .getSearchComponent("clustering");
+    assertNotNull("clustering component should not be null", comp);
+    CarrotClusteringEngine engine = (CarrotClusteringEngine) comp
+            .getSearchClusteringEngines().get(engineName);
+    assertNotNull("clustering engine for name: " + engineName
+            + " should not be null", engine);
+    return engine;
+  }
+
+  private List checkEngine(CarrotClusteringEngine engine,
+                           int expectedNumClusters) throws IOException {
+    return checkEngine(engine, expectedNumClusters, new 
ModifiableSolrParams());
+  }
+
+  private List checkEngine(CarrotClusteringEngine engine,
+                           int expectedNumClusters, SolrParams 
clusteringParams) throws IOException {
+    // Get all documents to cluster
+    RefCounted<SolrIndexSearcher> ref = h.getCore().getSearcher();
+    MatchAllDocsQuery query = new MatchAllDocsQuery();
+    DocList docList;
+    try {
+      SolrIndexSearcher searcher = ref.get();
+      docList = searcher.getDocList(query, (Query) null, new Sort(), 0,
+              numberOfDocs);
+      assertEquals("docList size", this.numberOfDocs, docList.matches());
+    } finally {
+      ref.decref();
+    }
+
+    ModifiableSolrParams solrParams = new ModifiableSolrParams();
+    solrParams.add(CarrotParams.PRODUCE_SUMMARY, "true");
+    solrParams.add(clusteringParams);
+
+    // Perform clustering
+    LocalSolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(), 
solrParams);
+    List results = (List) engine.cluster(query, docList, req);
+    req.close();
+    assertEquals("number of clusters", expectedNumClusters, results.size());
+    checkClusters(results, false);
+    return results;
+  }
+
+  private void checkClusters(List results, int expectedDocCount,
+                             int expectedLabelCount, int 
expectedSubclusterCount) {
+    for (int i = 0; i < results.size(); i++) {
+      NamedList cluster = (NamedList) results.get(i);
+      checkCluster(cluster, expectedDocCount, expectedLabelCount,
+              expectedSubclusterCount);
+    }
+  }
+
+  private void checkClusters(List results, boolean hasSubclusters) {
+    for (int i = 0; i < results.size(); i++) {
+      checkCluster((NamedList) results.get(i), hasSubclusters);
+    }
+  }
+
+  private void checkCluster(NamedList cluster, boolean hasSubclusters) {
+    List docs = (List) cluster.get("docs");
+    assertNotNull("docs is null and it shouldn't be", docs);
+    for (int j = 0; j < docs.size(); j++) {
+      String id = (String) docs.get(j);
+      assertNotNull("id is null and it shouldn't be", id);
+    }
+
+    List labels = (List) cluster.get("labels");
+    assertNotNull("labels is null but it shouldn't be", labels);
+
+    if (hasSubclusters) {
+      List subclusters = (List) cluster.get("clusters");
+      assertNotNull("subclusters is null but it shouldn't be", subclusters);
+    }
+  }
+
+  private void checkCluster(NamedList cluster, int expectedDocCount,
+                            int expectedLabelCount, int 
expectedSubclusterCount) {
+    checkCluster(cluster, expectedSubclusterCount > 0);
+    assertEquals("number of docs in cluster", expectedDocCount,
+            ((List) cluster.get("docs")).size());
+    assertEquals("number of labels in cluster", expectedLabelCount,
+            ((List) cluster.get("labels")).size());
+
+    if (expectedSubclusterCount > 0) {
+      List subclusters = (List) cluster.get("clusters");
+      assertEquals("numClusters", expectedSubclusterCount, subclusters.size());
+      assertEquals("number of subclusters in cluster",
+              expectedSubclusterCount, subclusters.size());
+    }
+  }
 }

Modified: 
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java?rev=790599&r1=790598&r2=790599&view=diff
==============================================================================
--- 
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java
 (original)
+++ 
lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java
 Thu Jul  2 14:08:37 2009
@@ -1,69 +1,68 @@
 package org.apache.solr.handler.clustering.carrot2;
 
-import java.util.List;
-
+import com.google.common.collect.Lists;
 import org.carrot2.core.*;
 import org.carrot2.core.attribute.AttributeNames;
 import org.carrot2.core.attribute.Processing;
 import org.carrot2.util.attribute.*;
 import org.carrot2.util.attribute.constraint.IntRange;
 
-import com.google.common.collect.Lists;
+import java.util.List;
 
 @Bindable(prefix = "MockClusteringAlgorithm")
 public class MockClusteringAlgorithm extends ProcessingComponentBase implements
-               IClusteringAlgorithm {
-       @Input
-       @Processing
-       @Attribute(key = AttributeNames.DOCUMENTS)
-       private List<Document> documents;
-
-       @Output
-       @Processing
-       @Attribute(key = AttributeNames.CLUSTERS)
-       private List<Cluster> clusters;
-
-       @Input
-       @Processing
-       @Attribute
-       @IntRange(min = 1, max = 5)
-       private int depth = 2;
-
-       @Input
-       @Processing
-       @Attribute
-       @IntRange(min = 1, max = 5)
-       private int labels = 1;
-
-       @Override
-       public void process() throws ProcessingException {
-               clusters = Lists.newArrayList();
-               if (documents == null) {
-                       return;
-               }
-
-               int documentIndex = 1;
-               for (Document document : documents) {
-                       StringBuilder label = new StringBuilder("Cluster " + 
documentIndex);
-                       Cluster cluster = createCluster(label.toString(), 
document);
-                       clusters.add(cluster);
-                       for (int i = 1; i <= depth; i++) {
-                               label.append(".");
-                               label.append(i);
-                               Cluster newCluster = 
createCluster(label.toString(), document);
-                               
cluster.addSubclusters(createCluster(label.toString(), document), newCluster);
-                               cluster = newCluster;
-                       }
-                       documentIndex++;
-               }
-       }
-
-       private Cluster createCluster(String labelBase, Document... documents) {
-               Cluster cluster = new Cluster();
-               for (int i = 0; i < labels; i++) {
-                       cluster.addPhrases(labelBase + "#" + (i + 1));
-               }
-               cluster.addDocuments(documents);
-               return cluster;
-       }
+        IClusteringAlgorithm {
+  @Input
+  @Processing
+  @Attribute(key = AttributeNames.DOCUMENTS)
+  private List<Document> documents;
+
+  @Output
+  @Processing
+  @Attribute(key = AttributeNames.CLUSTERS)
+  private List<Cluster> clusters;
+
+  @Input
+  @Processing
+  @Attribute
+  @IntRange(min = 1, max = 5)
+  private int depth = 2;
+
+  @Input
+  @Processing
+  @Attribute
+  @IntRange(min = 1, max = 5)
+  private int labels = 1;
+
+  @Override
+  public void process() throws ProcessingException {
+    clusters = Lists.newArrayList();
+    if (documents == null) {
+      return;
+    }
+
+    int documentIndex = 1;
+    for (Document document : documents) {
+      StringBuilder label = new StringBuilder("Cluster " + documentIndex);
+      Cluster cluster = createCluster(label.toString(), document);
+      clusters.add(cluster);
+      for (int i = 1; i <= depth; i++) {
+        label.append(".");
+        label.append(i);
+        Cluster newCluster = createCluster(label.toString(), document);
+        cluster.addSubclusters(createCluster(label.toString(), document), 
newCluster);
+        cluster = newCluster;
+      }
+      documentIndex++;
+    }
+  }
+
+  private Cluster createCluster(String labelBase, Document... documents) {
+    Cluster cluster = new Cluster();
+    for (int i = 0; i < labels; i++) {
+      cluster.addPhrases(labelBase + "#" + (i + 1));
+    }
+    cluster.addDocuments(documents);
+    return cluster;
+  }
 }

svn commit: r790599 - in /lucene/solr/trunk/contrib/clustering/src: main/java/org/apache/solr/handler/clustering/carrot2/ test/java/org/apache/solr/handler/clustering/ test/java/org/apache/solr/handler/clustering/carrot2/

Reply via email to