Author: siren Date: Sun May 28 11:18:13 2006 New Revision: 409972 URL: http://svn.apache.org/viewvc?rev=409972&view=rev Log: clustering web-ui-plugin initial version
Added: lucene/nutch/trunk/contrib/web2/plugins/web-clustering/ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/build.xml lucene/nutch/trunk/contrib/web2/plugins/web-clustering/lib/ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/plugin.xml lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/conf/ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/conf/tiles-defs.xml lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/ClusterResult.java lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/ClusteringPresearchExtension.java lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/Clusters.java lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/webapp/ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/webapp/controller/ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/webapp/controller/ClusteringController.java lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/resources/ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/test/ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/web/ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/web/web-clustering/ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/web/web-clustering/cluster.jsp Modified: lucene/nutch/trunk/contrib/web2/plugins/build.xml Modified: lucene/nutch/trunk/contrib/web2/plugins/build.xml URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/build.xml?rev=409972&r1=409971&r2=409972&view=diff ============================================================================== --- lucene/nutch/trunk/contrib/web2/plugins/build.xml (original) +++ lucene/nutch/trunk/contrib/web2/plugins/build.xml Sun May 28 11:18:13 2006 @@ -11,9 +11,10 @@ <!-- Build & deploy all the plugin jars. --> <!-- ====================================================== --> <target name="deploy"> - <ant dir="web-caching-oscache" target="deploy"/> + <ant dir="web-caching-oscache" target="deploy"/> <ant dir="web-more" target="deploy"/> <ant dir="web-resources" target="deploy"/> + <ant dir="web-clustering" target="deploy"/> </target> <!-- ====================================================== --> @@ -32,6 +33,7 @@ <ant dir="web-caching-oscache" target="clean"/> <ant dir="web-resources" target="clean"/> <ant dir="web-more" target="clean"/> + <ant dir="web-clustering" target="clean"/> </target> </project> Added: lucene/nutch/trunk/contrib/web2/plugins/web-clustering/build.xml URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-clustering/build.xml?rev=409972&view=auto ============================================================================== --- lucene/nutch/trunk/contrib/web2/plugins/web-clustering/build.xml (added) +++ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/build.xml Sun May 28 11:18:13 2006 @@ -0,0 +1,19 @@ +<?xml version="1.0"?> +<project name="web-clustering" default="jar-core"> + <import file="../build-plugin.xml" /> + <property name="nutch.root" location="${root}/../../../../" /> + <target name="init-plugin"> + <echo>Copying resources templates</echo> + <copy todir="${build.classes}/resources"> + <fileset dir="${resources.dir}" includes="**/*" /> + </copy> + <echo>Copying UI configuration</echo> + <copy todir="${build.classes}"> + <fileset dir="src/conf" includes="**/*" /> + </copy> + <echo>Copying UI templates</echo> + <copy todir="${deploy.dir}/web"> + <fileset dir="src/web" includes="**/*" /> + </copy> + </target> +</project> Added: lucene/nutch/trunk/contrib/web2/plugins/web-clustering/plugin.xml URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-clustering/plugin.xml?rev=409972&view=auto ============================================================================== --- lucene/nutch/trunk/contrib/web2/plugins/web-clustering/plugin.xml (added) +++ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/plugin.xml Sun May 28 11:18:13 2006 @@ -0,0 +1,45 @@ +<?xml version="1.0" encoding="UTF-8"?> +<plugin + id="web-clustering" + name="Clustering web interface extension" + version="1.0.0" + provider-name="apache.org"> + + <runtime> + <library name="web-clustering.jar"> + <export name="*"/> + </library> + </runtime> + + <requires> + <import plugin="nutch-extensionpoints"/> + <!-- + Following is not a proper requirement, should really require + generic clustering plugin not any specific impplementation + + (TODO: remove generic clustergin stuff from core and move it + into a plugin?) + + --> + <import plugin="clustering-carrot2"/> + </requires> + + <extension id="org.apache.nutch.webapp.extension.UIExtensionPoint" + name="Clustering extension for Web UI" + point="org.apache.nutch.webapp.extension.UIExtensionPoint"> + <implementation id="web-clustering" + class="org.apache.nutch.webapp.extension.UIExtension.VoidImplementation"/> + </extension> + + <extension-point + id="org.apache.nutch.webapp.extension.PreSearchExtensionPoint" + name="Pre search extension"/> + + <extension id="org.apache.nutch.webapp.extension.PreSearchExtensionPoint" + name="Clustering extension for Web UI" + point="org.apache.nutch.webapp.extension.PreSearchExtensionPoint"> + <implementation id="web-clustering-presearch" + class="org.apache.nutch.clustering.ClusteringPresearchExtension" + /> + </extension> +</plugin> Added: lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/conf/tiles-defs.xml URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/conf/tiles-defs.xml?rev=409972&view=auto ============================================================================== --- lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/conf/tiles-defs.xml (added) +++ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/conf/tiles-defs.xml Sun May 28 11:18:13 2006 @@ -0,0 +1,8 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE tiles-definitions PUBLIC "-//Apache Software Foundation//DTD Tiles Configuration 1.1//EN" + "http://struts.apache.org/dtds/tiles-config_1_1.dtd"> +<tiles-definitions> + <definition name="cluster" + path="/plugin/web-clustering/cluster.jsp" + controllerClass="org.apache.nutch.webapp.controller.ClusteringController" /> +</tiles-definitions> \ No newline at end of file Added: lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/ClusterResult.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/ClusterResult.java?rev=409972&view=auto ============================================================================== --- lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/ClusterResult.java (added) +++ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/ClusterResult.java Sun May 28 11:18:13 2006 @@ -0,0 +1,61 @@ +/* + * Copyright 2006 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.clustering; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.nutch.webapp.common.SearchResultBean; + +/** Wrapper for HitsCluster + * + */ +public class ClusterResult { + + private int numDocs; + private String labelString=""; + HitsCluster cluster; + + public ClusterResult(HitsCluster cluster, int numLabels, int numDocs){ + this.numDocs=numDocs; + this.cluster=cluster; + int maxSize=Math.min(numLabels, cluster.getDescriptionLabels().length); + for(int i=0;i<maxSize;i++){ + if(i>0) { + labelString+=", "; + } + labelString+=cluster.getDescriptionLabels()[i]; + } + } + + public String getLabel(){ + return labelString; + } + + /** return document samples of cluster + * + * @return + */ + public List getDocs(){ + int maxNumDocs=Math.min(numDocs,cluster.getHits().length); + List docs=new ArrayList(maxNumDocs); + for(int i=0;i<maxNumDocs;i++){ + docs.add(new SearchResultBean(null, null, cluster.getHits()[i],null)); + } + return docs; + } + +} Added: lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/ClusteringPresearchExtension.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/ClusteringPresearchExtension.java?rev=409972&view=auto ============================================================================== --- lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/ClusteringPresearchExtension.java (added) +++ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/ClusteringPresearchExtension.java Sun May 28 11:18:13 2006 @@ -0,0 +1,37 @@ +/* + * Copyright 2006 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.clustering; + +import org.apache.nutch.webapp.common.SearchContext; +import org.apache.nutch.webapp.extension.PreSearchExtensionPoint; + +public class ClusteringPresearchExtension implements PreSearchExtensionPoint { + + /** + * This hook is executed before actual search + * so we have a change to expand the result window + * for clusterer + */ + public void doPreSearch(SearchContext context) { + System.out.println("Woohoo, executing presearch for clustering"); + int orig=context.getSearch().getHitsRequired(); + + //TODO set this configurable + if(orig < 100){ + context.getSearch().setHitsRequired(100); + } + } +} Added: lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/Clusters.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/Clusters.java?rev=409972&view=auto ============================================================================== --- lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/Clusters.java (added) +++ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/clustering/Clusters.java Sun May 28 11:18:13 2006 @@ -0,0 +1,44 @@ +/* + * Copyright 2006 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.clustering; + +import java.util.ArrayList; +import java.util.List; + +/** + * Wrapper object to clustering results + */ +public class Clusters { + + private List clusters; + + public Clusters(final HitsCluster [] clustersArray, final int numClusters, final int numDocs, final int numLabels){ + clusters=new ArrayList(); + for(int i=0;i<numClusters;i++){ + clusters.add(new ClusterResult(clustersArray[i], numDocs, numLabels)); + } + } + + public List getClusters(){ + return clusters; + } + + public boolean getHasClusters(){ + return clusters.size()>0; + } + + +} Added: lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/webapp/controller/ClusteringController.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/webapp/controller/ClusteringController.java?rev=409972&view=auto ============================================================================== --- lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/webapp/controller/ClusteringController.java (added) +++ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/java/org/apache/nutch/webapp/controller/ClusteringController.java Sun May 28 11:18:13 2006 @@ -0,0 +1,90 @@ +/* + * Copyright 2006 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.webapp.controller; + +import java.io.IOException; + +import javax.servlet.ServletContext; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.nutch.clustering.Clusters; +import org.apache.nutch.clustering.HitsCluster; +import org.apache.nutch.clustering.OnlineClusterer; +import org.apache.nutch.clustering.OnlineClustererFactory; +import org.apache.nutch.plugin.PluginRuntimeException; +import org.apache.nutch.searcher.HitDetails; +import org.apache.nutch.searcher.Summary; +import org.apache.nutch.webapp.common.ServiceLocator; +import org.apache.struts.tiles.ComponentContext; + +public class ClusteringController extends NutchController { + + public static final String REQ_ATTR_CLUSTERS = "clusters"; + + public void nutchPerform(ComponentContext tileContext, + HttpServletRequest request, HttpServletResponse response, + ServletContext servletContext) throws ServletException, IOException { + + ServiceLocator locator = getServiceLocator(request); + + int HITS_TO_CLUSTER = locator.getConfiguration().getInt( + "extension.clustering.hits-to-cluster", 100); + + // display top N clusters and top Q documents inside them. + // TODO move these to configuration + int N = locator.getConfiguration().getInt( + "extension.clustering.cluster-count", 10); + int Q = locator.getConfiguration().getInt( + "extension.clustering.cluster-top-documents-count", 3); + int maxLabels = 2; + + OnlineClusterer clusterer = null; + try { + clusterer = new OnlineClustererFactory(locator.getConfiguration()) + .getOnlineClusterer(); + } catch (PluginRuntimeException e) { + LOG.info("Could not ionitialize Clusterer, is the plugin enabled?"); + return; + } + + HitDetails[] details = locator.getSearch().getDetails(); + Summary[] summaries = locator.getSearch().getSummaries(); + + HitsCluster[] clusters = null; + if (clusterer != null) { + final long clusteringStart = System.currentTimeMillis(); + try { + clusters = clusterer.clusterHits(details, Summary.toStrings(summaries)); + final long clusteringDuration = System.currentTimeMillis() + - clusteringStart; + LOG.info("Clustering took: " + clusteringDuration + " milliseconds."); + + } catch (Exception e) { + LOG.info("Could not do clustering???" + e); + return; + } + } + + // set new limit if fever than N results + N = Math.min(N, clusters.length); + + //set to request + Clusters clusterResult = new Clusters(clusters, N, Q, maxLabels); + request.setAttribute(REQ_ATTR_CLUSTERS, clusterResult); + } +} Added: lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/web/web-clustering/cluster.jsp URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/web/web-clustering/cluster.jsp?rev=409972&view=auto ============================================================================== --- lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/web/web-clustering/cluster.jsp (added) +++ lucene/nutch/trunk/contrib/web2/plugins/web-clustering/src/web/web-clustering/cluster.jsp Sun May 28 11:18:13 2006 @@ -0,0 +1,51 @@ +<%@ page session="false"%> +<%@ taglib prefix="tiles" uri="http://jakarta.apache.org/struts/tags-tiles" %> +<%@ taglib prefix="c" uri="http://java.sun.com/jstl/core" %> +<%@ taglib prefix="fmt" uri="http://java.sun.com/jstl/fmt" %> +<% +// @author Dawid Weiss +// +// PERFORMANCE/USER INTERFACE NOTE: +// +// What I do here is merely a demonstration. In real life the clustering +// process should be done in a separate "processing" stream, most likely +// a separate HTML frame that the user's browser requests data to. +// We don't want the user to wait with plain snippets until the clusters +// are created. +// +// Also: clustering is resource consuming, so a cache of recent queries +// would be in place. Besides, such cache would also be beneficial for the +// purpose of re-querying existing clusters (remember that the +// clustering extension may be a heuristic returning a DIFFERENT set of +// clusters for an identical input). +// See www.vivisimo.com for details of how this can be done using frames, or +// http://carrot.cs.put.poznan.pl for an example of a Javascript solution. +%> +<div id="cluster"><c:choose> + <c:when test="${clusters!=null}"> + + <c:choose> + <c:when test="${clusters.hasClusters}"> + <c:forEach var="cluster" items="${clusters.clusters}"> + <div style="margin: 0px; padding: 0px; font-weight: bold;"><c:out + value="${cluster.label}"></c:out><br /> + </div> + <c:forEach var="doc" items="${cluster.docs}"> + <li><a href="<c:out value="${doc.url}"/>"><c:out + value="${doc.title}" /></a></li> + </c:forEach> + </c:forEach> + </c:when> + <c:otherwise> + <!-- todo: i18n --> +No clusters available +</c:otherwise> + </c:choose> + + </c:when> + <c:otherwise> + <!-- todo: i18n --> +Unable to do clustering. +</c:otherwise> +</c:choose> +</div> ------------------------------------------------------- All the advantages of Linux Managed Hosting--Without the Cost and Risk! Fully trained technicians. The highest number of Red Hat certifications in the hosting industry. Fanatical Support. Click to learn more http://sel.as-us.falkag.net/sel?cmd=lnk&kid=107521&bid=248729&dat=121642 _______________________________________________ Nutch-cvs mailing list Nutch-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/nutch-cvs