Author: siren
Date: Sun May 28 11:18:13 2006
New Revision: 409972

URL: http://svn.apache.org/viewvc?rev=409972&view=rev
Log:
clustering web-ui-plugin initial version

Added:
    lucene/nutch/trunk/contrib/web2/plugins/web-clustering/
    lucene/nutch/trunk/contrib/web2/plugins/web-clustering/build.xml
    lucene/nutch/trunk/contrib/web2/plugins/web-clustering/lib/
    lucene/nutch/trunk/contrib/web2/plugins/web-clustering/plugin.xml
    lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/
    lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/conf/
    
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/conf/tiles-defs.xml
    lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/
    lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/
    lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/
    
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/
    
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/
    
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/ClusterResult.java
    
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/ClusteringPresearchExtension.java
    
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/Clusters.java
    
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/webapp/
    
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/webapp/controller/
    
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/webapp/controller/ClusteringController.java
    lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/resources/
    lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/test/
    lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/web/
    
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/web/web-clustering/
    
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/web/web-clustering/cluster.jsp
Modified:
    lucene/nutch/trunk/contrib/web2/plugins/build.xml

Modified: lucene/nutch/trunk/contrib/web2/plugins/build.xml
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/build.xml?rev=409972&r1=409971&r2=409972&view=diff
==============================================================================
--- lucene/nutch/trunk/contrib/web2/plugins/build.xml (original)
+++ lucene/nutch/trunk/contrib/web2/plugins/build.xml Sun May 28 11:18:13 2006
@@ -11,9 +11,10 @@
   <!-- Build & deploy all the plugin jars.                    -->
   <!-- ====================================================== -->
   <target name="deploy">
-     <ant dir="web-caching-oscache" target="deploy"/>
+    <ant dir="web-caching-oscache" target="deploy"/>
     <ant dir="web-more" target="deploy"/>
     <ant dir="web-resources" target="deploy"/>
+    <ant dir="web-clustering" target="deploy"/>
   </target>
 
   <!-- ====================================================== -->
@@ -32,6 +33,7 @@
     <ant dir="web-caching-oscache" target="clean"/>
     <ant dir="web-resources" target="clean"/>
     <ant dir="web-more" target="clean"/>
+    <ant dir="web-clustering" target="clean"/>
   </target>
 
 </project>

Added: lucene/nutch/trunk/contrib/web2/plugins/web-clustering/build.xml
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-clustering/build.xml?rev=409972&view=auto
==============================================================================
--- lucene/nutch/trunk/contrib/web2/plugins/web-clustering/build.xml (added)
+++ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/build.xml Sun May 28 
11:18:13 2006
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<project name="web-clustering" default="jar-core">
+       <import file="../build-plugin.xml" />
+       <property name="nutch.root" location="${root}/../../../../" />
+       <target name="init-plugin">
+               <echo>Copying resources templates</echo>
+               <copy todir="${build.classes}/resources">
+                       <fileset dir="${resources.dir}" includes="**/*" />
+               </copy>
+               <echo>Copying UI configuration</echo>
+               <copy todir="${build.classes}">
+                       <fileset dir="src/conf" includes="**/*" />
+               </copy>
+               <echo>Copying UI templates</echo>
+               <copy todir="${deploy.dir}/web">
+                       <fileset dir="src/web" includes="**/*" />
+               </copy>
+       </target>
+</project>

Added: lucene/nutch/trunk/contrib/web2/plugins/web-clustering/plugin.xml
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-clustering/plugin.xml?rev=409972&view=auto
==============================================================================
--- lucene/nutch/trunk/contrib/web2/plugins/web-clustering/plugin.xml (added)
+++ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/plugin.xml Sun May 
28 11:18:13 2006
@@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<plugin
+   id="web-clustering"
+   name="Clustering web interface extension"
+   version="1.0.0"
+   provider-name="apache.org">
+
+   <runtime>
+      <library name="web-clustering.jar">
+         <export name="*"/>
+      </library>
+   </runtime>
+
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+        <!--  
+        Following is not a proper requirement, should really require
+        generic clustering plugin not any specific impplementation
+        
+        (TODO: remove generic clustergin stuff from core and move it
+        into a plugin?) 
+        
+        -->
+      <import plugin="clustering-carrot2"/>
+   </requires>
+
+    <extension id="org.apache.nutch.webapp.extension.UIExtensionPoint"
+      name="Clustering extension for Web UI"
+      point="org.apache.nutch.webapp.extension.UIExtensionPoint">
+      <implementation id="web-clustering"
+                      
class="org.apache.nutch.webapp.extension.UIExtension.VoidImplementation"/>
+   </extension>
+   
+   <extension-point
+      id="org.apache.nutch.webapp.extension.PreSearchExtensionPoint"
+      name="Pre search extension"/>
+      
+   <extension id="org.apache.nutch.webapp.extension.PreSearchExtensionPoint"
+       name="Clustering extension for Web UI"
+       point="org.apache.nutch.webapp.extension.PreSearchExtensionPoint">
+       <implementation id="web-clustering-presearch"
+               class="org.apache.nutch.clustering.ClusteringPresearchExtension"
+               />
+               </extension>
+</plugin>

Added: 
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/conf/tiles-defs.xml
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/conf/tiles-defs.xml?rev=409972&view=auto
==============================================================================
--- 
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/conf/tiles-defs.xml 
(added)
+++ 
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/conf/tiles-defs.xml 
Sun May 28 11:18:13 2006
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE tiles-definitions PUBLIC "-//Apache Software Foundation//DTD Tiles 
Configuration 1.1//EN"
+        "http://struts.apache.org/dtds/tiles-config_1_1.dtd";>
+<tiles-definitions>
+       <definition name="cluster" 
+         path="/plugin/web-clustering/cluster.jsp"
+    controllerClass="org.apache.nutch.webapp.controller.ClusteringController"  
/>
+</tiles-definitions>
\ No newline at end of file

Added: 
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/ClusterResult.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/ClusterResult.java?rev=409972&view=auto
==============================================================================
--- 
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/ClusterResult.java
 (added)
+++ 
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/ClusterResult.java
 Sun May 28 11:18:13 2006
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.clustering;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.nutch.webapp.common.SearchResultBean;
+
+/** Wrapper for HitsCluster
+ * 
+ */
+public class ClusterResult {
+
+  private int numDocs;
+  private String labelString="";
+  HitsCluster cluster;
+  
+  public ClusterResult(HitsCluster cluster, int numLabels, int numDocs){
+    this.numDocs=numDocs;
+    this.cluster=cluster;
+    int maxSize=Math.min(numLabels, cluster.getDescriptionLabels().length);
+    for(int i=0;i<maxSize;i++){
+      if(i>0) {
+        labelString+=", ";
+      }
+      labelString+=cluster.getDescriptionLabels()[i];
+    }
+  }
+  
+  public String getLabel(){
+    return labelString;
+  }  
+  
+  /** return document samples of cluster
+   * 
+   * @return
+   */
+  public List getDocs(){
+    int maxNumDocs=Math.min(numDocs,cluster.getHits().length);
+    List docs=new ArrayList(maxNumDocs);
+    for(int i=0;i<maxNumDocs;i++){
+      docs.add(new SearchResultBean(null, null, cluster.getHits()[i],null));
+    }
+    return docs;
+  }
+
+}

Added: 
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/ClusteringPresearchExtension.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/ClusteringPresearchExtension.java?rev=409972&view=auto
==============================================================================
--- 
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/ClusteringPresearchExtension.java
 (added)
+++ 
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/ClusteringPresearchExtension.java
 Sun May 28 11:18:13 2006
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.clustering;
+
+import org.apache.nutch.webapp.common.SearchContext;
+import org.apache.nutch.webapp.extension.PreSearchExtensionPoint;
+
+public class ClusteringPresearchExtension implements PreSearchExtensionPoint {
+
+  /** 
+   * This hook is executed before actual search
+   * so we have a change to expand the result window
+   * for clusterer
+   */
+  public void doPreSearch(SearchContext context) {
+    System.out.println("Woohoo, executing presearch for clustering");
+    int orig=context.getSearch().getHitsRequired();
+
+    //TODO set this configurable
+    if(orig < 100){
+      context.getSearch().setHitsRequired(100);
+    }
+  }
+}

Added: 
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/Clusters.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/Clusters.java?rev=409972&view=auto
==============================================================================
--- 
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/Clusters.java
 (added)
+++ 
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/Clusters.java
 Sun May 28 11:18:13 2006
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.clustering;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Wrapper object to clustering results
+ */
+public class Clusters {
+  
+  private List clusters;
+
+  public Clusters(final HitsCluster [] clustersArray, final int numClusters, 
final int numDocs, final int numLabels){
+    clusters=new ArrayList();
+    for(int i=0;i<numClusters;i++){
+      clusters.add(new ClusterResult(clustersArray[i], numDocs, numLabels));
+    }
+  }
+  
+  public List getClusters(){
+    return clusters;
+  }
+  
+  public boolean getHasClusters(){
+    return clusters.size()>0;
+  }
+
+
+}

Added: 
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/webapp/controller/ClusteringController.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/webapp/controller/ClusteringController.java?rev=409972&view=auto
==============================================================================
--- 
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/webapp/controller/ClusteringController.java
 (added)
+++ 
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/webapp/controller/ClusteringController.java
 Sun May 28 11:18:13 2006
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.webapp.controller;
+
+import java.io.IOException;
+
+import javax.servlet.ServletContext;
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.apache.nutch.clustering.Clusters;
+import org.apache.nutch.clustering.HitsCluster;
+import org.apache.nutch.clustering.OnlineClusterer;
+import org.apache.nutch.clustering.OnlineClustererFactory;
+import org.apache.nutch.plugin.PluginRuntimeException;
+import org.apache.nutch.searcher.HitDetails;
+import org.apache.nutch.searcher.Summary;
+import org.apache.nutch.webapp.common.ServiceLocator;
+import org.apache.struts.tiles.ComponentContext;
+
+public class ClusteringController extends NutchController {
+
+  public static final String REQ_ATTR_CLUSTERS = "clusters";
+
+  public void nutchPerform(ComponentContext tileContext,
+      HttpServletRequest request, HttpServletResponse response,
+      ServletContext servletContext) throws ServletException, IOException {
+
+    ServiceLocator locator = getServiceLocator(request);
+
+    int HITS_TO_CLUSTER = locator.getConfiguration().getInt(
+        "extension.clustering.hits-to-cluster", 100);
+
+    // display top N clusters and top Q documents inside them.
+    // TODO move these to configuration
+    int N = locator.getConfiguration().getInt(
+        "extension.clustering.cluster-count", 10);
+    int Q = locator.getConfiguration().getInt(
+        "extension.clustering.cluster-top-documents-count", 3);
+    int maxLabels = 2;
+
+    OnlineClusterer clusterer = null;
+    try {
+      clusterer = new OnlineClustererFactory(locator.getConfiguration())
+          .getOnlineClusterer();
+    } catch (PluginRuntimeException e) {
+      LOG.info("Could not ionitialize Clusterer, is the plugin enabled?");
+      return;
+    }
+
+    HitDetails[] details = locator.getSearch().getDetails();
+    Summary[] summaries = locator.getSearch().getSummaries();
+
+    HitsCluster[] clusters = null;
+    if (clusterer != null) {
+      final long clusteringStart = System.currentTimeMillis();
+      try {
+        clusters = clusterer.clusterHits(details, 
Summary.toStrings(summaries));
+        final long clusteringDuration = System.currentTimeMillis()
+            - clusteringStart;
+        LOG.info("Clustering took: " + clusteringDuration + " milliseconds.");
+        
+      } catch (Exception e) {
+        LOG.info("Could not do clustering???" + e);
+        return;
+      }
+    }
+
+    // set new limit if fever than N results
+    N = Math.min(N, clusters.length);
+
+    //set to request
+    Clusters clusterResult = new Clusters(clusters, N, Q, maxLabels);
+    request.setAttribute(REQ_ATTR_CLUSTERS, clusterResult);
+  }
+}

Added: 
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/web/web-clustering/cluster.jsp
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/web/web-clustering/cluster.jsp?rev=409972&view=auto
==============================================================================
--- 
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/web/web-clustering/cluster.jsp
 (added)
+++ 
lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/web/web-clustering/cluster.jsp
 Sun May 28 11:18:13 2006
@@ -0,0 +1,51 @@
+<%@ page session="false"%>
+<%@ taglib prefix="tiles" uri="http://jakarta.apache.org/struts/tags-tiles"; %>
+<%@ taglib prefix="c" uri="http://java.sun.com/jstl/core"; %>
+<%@ taglib prefix="fmt" uri="http://java.sun.com/jstl/fmt"; %>
+<%
+// @author Dawid Weiss
+//
+// PERFORMANCE/USER INTERFACE NOTE:
+//
+// What I do here is merely a demonstration. In real life the clustering
+// process should be done in a separate "processing" stream, most likely
+// a separate HTML frame that the user's browser requests data to.
+// We don't want the user to wait with plain snippets until the clusters
+// are created.
+//
+// Also: clustering is resource consuming, so a cache of recent queries 
+// would be in place. Besides, such cache would also be beneficial for the
+// purpose of re-querying existing clusters (remember that the
+// clustering extension may be a heuristic returning a DIFFERENT set of
+// clusters for an identical input).
+// See www.vivisimo.com for details of how this can be done using frames, or
+// http://carrot.cs.put.poznan.pl for an example of a Javascript solution.
+%>
+<div id="cluster"><c:choose>
+ <c:when test="${clusters!=null}">
+
+  <c:choose>
+   <c:when test="${clusters.hasClusters}">
+    <c:forEach var="cluster" items="${clusters.clusters}">
+     <div style="margin: 0px; padding: 0px; font-weight: bold;"><c:out
+      value="${cluster.label}"></c:out><br />
+     </div>
+     <c:forEach var="doc" items="${cluster.docs}">
+      <li><a href="<c:out value="${doc.url}"/>"><c:out
+       value="${doc.title}" /></a></li>
+     </c:forEach>
+    </c:forEach>
+   </c:when>
+   <c:otherwise>
+    <!--  todo: i18n -->
+No clusters available
+</c:otherwise>
+  </c:choose>
+
+ </c:when>
+ <c:otherwise>
+  <!--  todo: i18n -->
+Unable to do clustering.
+</c:otherwise>
+</c:choose>
+</div>




-------------------------------------------------------
All the advantages of Linux Managed Hosting--Without the Cost and Risk!
Fully trained technicians. The highest number of Red Hat certifications in
the hosting industry. Fanatical Support. Click to learn more
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=107521&bid=248729&dat=121642
_______________________________________________
Nutch-cvs mailing list
Nutch-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nutch-cvs

Reply via email to