Author: siren
Date: Sun Jun 4 12:43:47 2006
New Revision: 411593
URL: http://svn.apache.org/viewvc?rev=411593&view=rev
Log:
initial import of web-keymatch plugin
Added:
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/README.txt
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/build.xml
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/keymatches.xml
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/lib/
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/plugin.xml
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/conf/
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/conf/tiles-defs.xml
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/AbstractFilter.java
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/CountFilter.java
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/DomUtil.java
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/KeyMatch.java
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/KeyMatchFilter.java
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/SimpleKeyMatcher.java
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/ViewCountSorter.java
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/package.html
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/webapp/
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/webapp/controller/
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/webapp/controller/KeyMatchController.java
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/resources/
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/test/
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/test/org/
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/test/org/apache/
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/test/org/apache/nutch/
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/test/org/apache/nutch/keymatch/
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/test/org/apache/nutch/keymatch/TestSimpleKeyMatcher.java
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/test/org/apache/nutch/keymatch/TestViewCountSorter.java
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/web/
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/web/web-keymatch/
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/web/web-keymatch/keymatch.jsp
Added: lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/README.txt
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/README.txt?rev=411593&view=auto
==============================================================================
--- lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/README.txt (added)
+++ lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/README.txt Sun Jun 4
12:43:47 2006
@@ -0,0 +1,9 @@
+Instructions
+
+0. see general instructions from web2 README.txt
+1. Copy your keymatches.xml to nutch config directory.
+2. enable web-keymatches
+3. insert <tiles:insert name="keymatch"/> into web page to enable
+functionality
+4. ant war
+5. deploy war
Added: lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/build.xml
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/build.xml?rev=411593&view=auto
==============================================================================
--- lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/build.xml (added)
+++ lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/build.xml Sun Jun 4
12:43:47 2006
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<project name="web-keymatch" default="jar-core">
+ <import file="../build-plugin.xml" />
+ <property name="nutch.root" location="${root}/../../../../" />
+ <target name="init-plugin">
+ <echo>Copying resources templates</echo>
+ <copy todir="${build.classes}/resources">
+ <fileset dir="${resources.dir}" includes="**/*" />
+ </copy>
+ <echo>Copying UI configuration</echo>
+ <copy todir="${build.classes}">
+ <fileset dir="src/conf" includes="**/*"/>
+ </copy>
+ <echo>Copying UI templates</echo>
+ <copy todir="${deploy.dir}/web">
+ <fileset dir="src/web" includes="**/*"/>
+ </copy>
+ </target>
+</project>
Added: lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/keymatches.xml
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/keymatches.xml?rev=411593&view=auto
==============================================================================
--- lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/keymatches.xml (added)
+++ lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/keymatches.xml Sun Jun
4 12:43:47 2006
@@ -0,0 +1,23 @@
+<?xml version="1.0"?>
+<keymatches>
+ <keymatch type="keyword">
+ <term>mapred</term>
+ <url>http://lucene.apache.org/hafoop/</url>
+ <title>Try Hadoop today!</title>
+ </keymatch>
+ <keymatch type="phrase">
+ <term>search engine</term>
+ <url>http://lucene.apache.org/nutch/</url>
+ <title>Try nutch!</title>
+ </keymatch>
+ <keymatch type="exact">
+ <term>apache search engine</term>
+ <url>http://lucene.apache.org/nutch/</url>
+ <title>Try apache nutch!</title>
+ </keymatch>
+ <keymatch type="exact">
+ <term>kw1 kw2 kw3 kw4</term>
+ <url>url/</url>
+ <title>title</title>
+ </keymatch>
+</keymatches>
\ No newline at end of file
Added: lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/plugin.xml
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/plugin.xml?rev=411593&view=auto
==============================================================================
--- lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/plugin.xml (added)
+++ lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/plugin.xml Sun Jun 4
12:43:47 2006
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<plugin
+ id="web-keymatch"
+ name="KeyMatcher for promoting urls"
+ version="1.0.0"
+ provider-name="apache.org">
+
+ <runtime>
+ <library name="web-keymatch.jar">
+ <export name="*"/>
+ </library>
+ </runtime>
+
+ <requires>
+ <import plugin="nutch-extensionpoints"/>
+ </requires>
+
+ <extension id="org.apache.nutch.webapp.extension.UIExtensionPoint"
+ name="Nutch ui extension point"
+ point="org.apache.nutch.webapp.extension.UIExtensionPoint">
+ <implementation id="web-keymatch"
+
class="org.apache.nutch.webapp.extension.UIExtension.VoidImplementation"/>
+ </extension>
+
+</plugin>
Added:
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/conf/tiles-defs.xml
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/conf/tiles-defs.xml?rev=411593&view=auto
==============================================================================
---
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/conf/tiles-defs.xml
(added)
+++
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/conf/tiles-defs.xml
Sun Jun 4 12:43:47 2006
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE tiles-definitions PUBLIC "-//Apache Software Foundation//DTD Tiles
Configuration 1.1//EN"
+ "http://struts.apache.org/dtds/tiles-config_1_1.dtd">
+<tiles-definitions>
+ <definition name="keymatch" extends="decoratedDefinition"
controllerClass="org.apache.nutch.webapp.controller.KeyMatchController">
+ <put name="name" type="string">keymatch</put>
+ <put name="decorator" type="string"
value="/plugin/web-keymatch/keymatch.jsp"/>
+ </definition>
+</tiles-definitions>
\ No newline at end of file
Added:
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/AbstractFilter.java
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/AbstractFilter.java?rev=411593&view=auto
==============================================================================
---
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/AbstractFilter.java
(added)
+++
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/AbstractFilter.java
Sun Jun 4 12:43:47 2006
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.keymatch;
+
+import java.util.List;
+import java.util.Map;
+
+public abstract class AbstractFilter implements KeyMatchFilter {
+
+ KeyMatchFilter next=null;
+
+ /* (non-Javadoc)
+ * @see
org.apache.nutch.keymatch.IKeyMatchFilter#setNext(org.apache.nutch.keymatch.IKeyMatchFilter)
+ */
+ public void setNext(KeyMatchFilter next){
+ this.next=next;
+ }
+
+
+ /* (non-Javadoc)
+ * @see org.apache.nutch.keymatch.IKeyMatchFilter#filter(java.util.List,
java.util.Map)
+ */
+ public KeyMatch[] filter(List matches, Map context) {
+ if (next==null) {
+
+ //update view counts
+ for(int i=0;i<matches.size();i++){
+ ((KeyMatch)matches.get(i)).viewCount++;
+ }
+
+ return (KeyMatch[])matches.toArray(new KeyMatch[matches.size()]);
+ } else {
+ return next.filter(matches, context);
+ }
+ }
+}
Added:
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/CountFilter.java
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/CountFilter.java?rev=411593&view=auto
==============================================================================
---
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/CountFilter.java
(added)
+++
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/CountFilter.java
Sun Jun 4 12:43:47 2006
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.keymatch;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * <p>Implementation of KeyMatchFilter that simply
+ * crops the count of matches to defined level or
+ * by default of 3.</p>
+ *
+ * <p>The number of results returned is controlled
+ * with context parameter under key "count"</p>
+ *
+ * @author Sami Siren
+ */
+public class CountFilter extends AbstractFilter {
+
+ public static final String KEY_COUNT="count";
+ public static final int DEFAULT_COUNT=3;
+
+ public KeyMatch[] filter(List matches, Map context) {
+ int count=DEFAULT_COUNT;
+
+ try{
+ count=Integer.parseInt((String)context.get(KEY_COUNT));
+ } catch (Exception e){
+ //ignore
+ }
+
+ if(matches.size()>count) {
+ return super.filter(matches.subList(0,count), context);
+ } else {
+ return super.filter(matches, context);
+ }
+ }
+}
Added:
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/DomUtil.java
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/DomUtil.java?rev=411593&view=auto
==============================================================================
---
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/DomUtil.java
(added)
+++
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/DomUtil.java
Sun Jun 4 12:43:47 2006
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.keymatch;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerConfigurationException;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import org.apache.xerces.parsers.DOMParser;
+import org.w3c.dom.Element;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+public class DomUtil {
+
+ /**
+ * Returns parsed dom tree or null if any error
+ *
+ * @param is
+ * @return
+ */
+ public static Element getDom(InputStream is) {
+
+ Element element = null;
+
+ DOMParser parser = new DOMParser();
+
+ InputSource input;
+ try {
+ input = new InputSource(is);
+ input.setEncoding("UTF-8");
+ parser.parse(input);
+ element = (Element) parser.getDocument().getChildNodes().item(0);
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ } catch (SAXException e) {
+ e.printStackTrace();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ return element;
+ }
+
+ /**
+ * save dom into ouputstream
+ *
+ * @param os
+ * @param e
+ */
+ public static void saveDom(OutputStream os, Element e) {
+
+ DOMSource source = new DOMSource(e);
+ TransformerFactory transFactory = TransformerFactory.newInstance();
+ Transformer transformer;
+ try {
+ transformer = transFactory.newTransformer();
+ transformer.setOutputProperty("indent", "yes");
+ StreamResult result = new StreamResult(os);
+ transformer.transform(source, result);
+ os.flush();
+ } catch (UnsupportedEncodingException e1) {
+ // TODO Auto-generated catch block
+ e1.printStackTrace();
+ } catch (IOException e1) {
+ // TODO Auto-generated catch block
+ e1.printStackTrace();
+ } catch (TransformerConfigurationException e2) {
+ // TODO Auto-generated catch block
+ e2.printStackTrace();
+ } catch (TransformerException ex) {
+ // TODO Auto-generated catch block
+ ex.printStackTrace();
+ }
+ }
+}
Added:
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/KeyMatch.java
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/KeyMatch.java?rev=411593&view=auto
==============================================================================
---
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/KeyMatch.java
(added)
+++
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/KeyMatch.java
Sun Jun 4 12:43:47 2006
@@ -0,0 +1,179 @@
+/*
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.keymatch;
+
+import org.apache.xerces.util.DOMUtil;
+import org.w3c.dom.Element;
+
+public class KeyMatch {
+
+ public static final String TAG_TERM = "term";
+
+ public static final String TAG_URL = "url";
+
+ public static final String TAG_TITLE = "title";
+
+ public static final String ATTR_TYPE = "type";
+
+ public static final String TYPES[] = { "keyword", "phrase", "exact" };
+
+ public static final int TYPE_KEYWORD=0;
+ public static final int TYPE_PHRASE=1;
+ public static final int TYPE_EXACT=2;
+
+ static int counter = 0;
+
+ String term;
+ String url;
+ String title;
+ int type;
+ int viewCount=0;
+
+ transient String identifier;
+
+ public KeyMatch() {
+ // generate unique id
+ this.identifier = "m-" + counter++;
+ }
+
+ public KeyMatch(String terms, String url, String title, int type) {
+ this();
+ this.term = terms;
+ this.url = url;
+ this.title = title;
+
+ if (type > TYPES.length) {
+ this.type = 0;
+ } else {
+ this.type = type;
+ }
+
+ }
+
+ /**
+ * Initialize object from Element
+ *
+ * @param element
+ */
+ public void initialize(final Element element) {
+ try {
+ term = DOMUtil.getChildText(
+ element.getElementsByTagName(TAG_TERM).item(0)).trim();
+
+ String stype = element.getAttribute(ATTR_TYPE);
+ for (int i = 0; i < TYPES.length; i++) {
+ if (TYPES[i].equals(stype)) {
+ type = i;
+ }
+ }
+
+ url = DOMUtil.getChildText(element.getElementsByTagName(TAG_URL).item(0))
+ .trim();
+ title = DOMUtil.getChildText(
+ element.getElementsByTagName(TAG_TITLE).item(0)).trim();
+ } catch (Exception ex) {
+ // ignore
+ }
+ }
+
+ /**
+ * Fill in element with data from this object
+ *
+ * @param element
+ */
+ public void populateElement(final Element element) {
+ final Element term = element.getOwnerDocument().createElement(TAG_TERM);
+ term.setNodeValue(this.term);
+ element.appendChild(term);
+ element.setAttribute(ATTR_TYPE, TYPES[type]);
+ final Element url = element.getOwnerDocument().createElement(TAG_URL);
+ url.setNodeValue(this.url);
+ element.appendChild(url);
+ final Element title = element.getOwnerDocument().createElement(TAG_TITLE);
+ title.setNodeValue(this.title);
+ element.appendChild(title);
+ }
+
+ /**
+ * @return Returns the term.
+ */
+ public String getTerm() {
+ return term;
+ }
+
+ /**
+ * @param term
+ * The term to set.
+ */
+ public void setTerm(final String term) {
+ this.term = term;
+ }
+
+ /**
+ * @return Returns the title.
+ */
+ public String getTitle() {
+ return title;
+ }
+
+ /**
+ * @param title
+ * The title to set.
+ */
+ public void setTitle(final String title) {
+ this.title = title;
+ }
+
+ /**
+ * @return Returns the url.
+ */
+ public String getUrl() {
+ return url;
+ }
+
+ /**
+ * @param url
+ * The url to set.
+ */
+ public void setUrl(final String url) {
+ this.url = url;
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#equals(java.lang.Object)
+ */
+ public boolean equals(Object obj) {
+ if(obj instanceof KeyMatch) {
+ KeyMatch other=(KeyMatch) obj;
+ return (other.type==type && other.term.equals(term) &&
other.title.equals(title) && other.url.equals(url));
+ } else
+ return super.equals(obj);
+ }
+
+ /**
+ * @return Returns the type.
+ */
+ public int getType() {
+ return type;
+ }
+
+ /**
+ * @param type The type to set.
+ */
+ public void setType(int type) {
+ this.type = type;
+ }
+}
Added:
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/KeyMatchFilter.java
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/KeyMatchFilter.java?rev=411593&view=auto
==============================================================================
---
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/KeyMatchFilter.java
(added)
+++
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/KeyMatchFilter.java
Sun Jun 4 12:43:47 2006
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.keymatch;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * <p>All implementing classes should extend AbstractFilter
+ * </p>
+ */
+public interface KeyMatchFilter {
+
+ /**
+ * Do filtering for matches
+ * @param matches current List of matches
+ * @param context the evaluation context
+ * @return
+ */
+ public KeyMatch[] filter(List matches, Map context);
+
+ /**
+ * <p>Set the next filter that is processed after this
+ * one</p>
+ * @param filter the filter to set
+ */
+ public void setNext(KeyMatchFilter filter);
+
+}
Added:
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/SimpleKeyMatcher.java
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/SimpleKeyMatcher.java?rev=411593&view=auto
==============================================================================
---
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/SimpleKeyMatcher.java
(added)
+++
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/SimpleKeyMatcher.java
Sun Jun 4 12:43:47 2006
@@ -0,0 +1,339 @@
+/*
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.keymatch;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Iterator;
+import java.util.logging.Logger;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.nutch.searcher.Query;
+import org.apache.xerces.dom.DocumentImpl;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+
+/**
+ * <p>SimpleKeyMatcher is responsible for targetting predefined links for
defined
+ * keywords for example to promote some urls that are not yet part of
+ * production index.</p>
+ * <p>SimpleKeyMatcher is not a textadd targetting system</p>
+ * <p>KeyMatcher is configured with xml configuration file:
+ * <br><pre>
+ * <?xml version="1.0"?>
+ * <keymatches>
+ * <keymatch type="keyword|phrase|exact">
+ * <term>search engine</term>
+ * <url>http://lucene.apache.org/nutch</url>
+ * <title>Your favourite search engine!</title>
+ * </keymatch>
+ * </keymatches></pre>
+ * By default Keymatcher expects the file be named keymatches.xml
+ * </p>
+ * <p>Match type can be one of the following: keyword, phrase, exact match.
+ * Terms of a query are produced by the Query object and none of the
+ * matches is case sensitive</p>
+ * <b>keyword</b><br>
+ * Matches on keyword level, for example query "search engine" would match both
+ * keywords search and engine<br>
+ * <br>
+ * <b>phrase</b><br>
+ * Matches phrase, for example: query "open source search engine" "search
engine watch"
+ * would match "search engine", but query "search from engine" would not.<br>
+ * <br>
+ * <b>exact</b><br>
+ * Query "open source search engine" would match "open source search engine",
but not
+ * "search engine" nor "best open source engine"<br>
+ *
+ */
+public class SimpleKeyMatcher extends Configured {
+
+ static final char PREFIX_KEYWORD='k';
+ static final char PREFIX_PHRASE='p';
+ static final char PREFIX_EXACT='e';
+
+ class KeyMatcherStats {
+ int terms[];
+
+ void addStats(int numTerms) {
+ if (numTerms <= terms.length) {
+ terms[numTerms]++;
+ }
+ }
+
+ public KeyMatcherStats(int size) {
+ terms = new int[size];
+ for (int i = 0; i < size; i++) {
+ terms[i] = 0;
+ }
+ }
+ }
+
+ static final Logger LOG = LogFormatter.getLogger(SimpleKeyMatcher.class
+ .getName());
+
+ public static final String TAG_KEYMATCH = "keymatch";
+
+ public static final String TAG_KEYMATCHES = "keymatches";
+
+ static final String DEFAULT_CONFIG_FILE = "keymatches.xml";
+
+ static final int MAX_TERMS = 5;
+
+ KeyMatcherStats stats;
+ KeyMatchFilter currentFilter;
+
+ HashMap matches = new HashMap();
+ private String configName;
+
+ public SimpleKeyMatcher(Configuration conf) {
+ this(DEFAULT_CONFIG_FILE,conf);
+ }
+
+ /**
+ * Sets currentFilter
+ * @param filter the filter to set
+ */
+ public void setFilter(KeyMatchFilter filter) {
+ this.currentFilter=filter;
+ }
+
+ /**
+ * Construct new SimpleKeyMatcher with provided filename and configuration
+ * @param resourceName
+ * @param conf
+ */
+ public SimpleKeyMatcher(String resourceName, Configuration conf) {
+ super(conf);
+ configName=resourceName;
+ stats = new KeyMatcherStats(MAX_TERMS);
+ currentFilter=new ViewCountSorter();
+ init();
+ }
+
+ /**
+ * Initialize keyword matcher
+ *
+ */
+ protected void init() {
+ final HashMap tempMap = new HashMap();
+ final InputStream input = getConf().getConfResourceAsInputStream(
+ configName);
+
+ if (input != null) {
+ final Element root = DomUtil.getDom(input);
+ try {
+ input.close();
+ } catch (IOException e1) {
+ e1.printStackTrace();
+ }
+
+ final NodeList nodeList = root.getElementsByTagName(TAG_KEYMATCH);
+
+ LOG.fine("Configuration file has " + nodeList.getLength()
+ + " KeyMatch entries.");
+ for (int i = 0; i < nodeList.getLength(); i++) {
+ final Element element = (Element) nodeList.item(i);
+ final KeyMatch keyMatch = new KeyMatch();
+ keyMatch.initialize(element);
+ addKeyMatch(tempMap, keyMatch);
+ }
+
+ matches=tempMap;
+ }
+ }
+
+ /**
+ * Get keymatches for query
+ * @param query parsed query
+ * @param context evaluation context
+ * @return array of keymatches
+ */
+ public KeyMatch[] getMatches(final Query query, Map context) {
+
+ final ArrayList currentMatches=new ArrayList();
+
+ final String terms[]=query.getTerms();
+
+ //"keyword"
+ for(int i=0;i<terms.length;i++){
+ LOG.fine("keyword: '" + terms[i] + "'");
+
+ addMatches(currentMatches, matches.get(PREFIX_KEYWORD + terms[i]));
+ }
+
+ //"phrase"
+ for(int l=2;l<=terms.length;l++){
+ if(stats.terms[l]>0) {
+ for(int p=0;p<=terms.length-l;p++){
+ String key="";
+ for(int i=p;i<p+l;i++){
+ key+=terms[i];
+ if(i!=p+l-1) key+=" ";
+ }
+
+ LOG.fine("phrase key: '" + key + "'");
+ addMatches(currentMatches, matches.get(PREFIX_PHRASE + key));
+ }
+ }
+ }
+
+ //"exact"
+ String key=query.toString();
+ LOG.fine("exact key: '" + key + "'");
+
+ addMatches(currentMatches, matches.get(PREFIX_EXACT + key));
+
+ return currentFilter.filter(currentMatches, context);
+ }
+
+ void addMatches(ArrayList currentMatches, Object match){
+ if(match!=null) {
+ if(match instanceof ArrayList) {
+ currentMatches.addAll(((ArrayList)match));
+ } else {
+ currentMatches.add(match);
+ }
+ }
+ }
+
+ /** Get tokens of a string with nutch Query parser
+ *
+ * @param string
+ * @return
+ */
+ private String[] getTokens(final String string){
+ org.apache.nutch.searcher.Query q;
+ try {
+ q = org.apache.nutch.searcher.Query.parse(string, getConf());
+ return q.getTerms();
+ } catch (IOException e) {
+ LOG.info("Error getting terms from query:" + e);
+ }
+ return new String[0];
+ }
+
+ /**
+ * add new keymatch
+ *
+ * @param keymatch
+ */
+ protected void addKeyMatch(Map map, final KeyMatch keymatch) {
+ String key="";
+
+ LOG.info("Adding keymatch: MATCHTYPE=" + KeyMatch.TYPES[keymatch.type] +
", TERM='" + keymatch.term + "', TITLE='"
+ + keymatch.title + "' ,URL='" + keymatch.url + "'");
+
+ keymatch.term=keymatch.term.toLowerCase();
+ switch (keymatch.type) {
+ case KeyMatch.TYPE_EXACT: key+=PREFIX_EXACT;break;
+ case KeyMatch.TYPE_PHRASE: key+=PREFIX_PHRASE;break;
+ default: key+=PREFIX_KEYWORD;break;
+ }
+
+ //add info obout kw count for optimization
+ if(keymatch.type==KeyMatch.TYPE_PHRASE) {
+ stats.addStats(getTokens(keymatch.term).length);
+ }
+
+ key+=keymatch.term;
+
+ if(map.containsKey(key)) {
+ ArrayList l;
+
+ Object o = matches.get(key);
+ if(o instanceof ArrayList) {
+ l=(ArrayList) o;
+ } else {
+ KeyMatch temp=(KeyMatch)o;
+ l=new ArrayList();
+ l.add(temp);
+ }
+ l.add(keymatch);
+ map.put(key,l);
+ } else {
+ map.put(key, keymatch);
+ }
+ }
+
+ /**
+ * Add Keymatch
+ *
+ */
+ public void addKeyMatch(KeyMatch match){
+ addKeyMatch(matches, match);
+ }
+
+ /**
+ * Saves keymatch configuration into file.
+ *
+ * @throws IOException
+ */
+ public void save() throws IOException {
+ try {
+ final URL url = getConf().getResource(configName);
+ if (url == null) {
+ throw new IOException("Resource not found: " + configName);
+ }
+ final FileOutputStream fos = new FileOutputStream(new
File(url.getFile()));
+ final DocumentImpl doc = new DocumentImpl();
+ final Element keymatches = doc.createElement(TAG_KEYMATCHES);
+ final Iterator iterator = matches.values().iterator();
+
+ while (iterator.hasNext()) {
+ final Element keymatch = doc.createElement(TAG_KEYMATCH);
+ final KeyMatch keyMatch = (KeyMatch) iterator.next();
+ keyMatch.populateElement(keymatch);
+ keymatches.appendChild(keymatch);
+ }
+
+ DomUtil.saveDom(fos, keymatches);
+ fos.flush();
+ fos.close();
+ } catch (FileNotFoundException e) {
+ throw new IOException(e.toString());
+ }
+ }
+
+ /**
+ * Clear keymatches from this SimpleKeyMatcher instance
+ *
+ */
+ public void clear(){
+ matches=new HashMap();
+ }
+
+ public void setKeyMatches(List keymatches){
+ HashMap hm=new HashMap();
+ Iterator i=keymatches.iterator();
+ while(i.hasNext()) {
+ KeyMatch km=(KeyMatch)i.next();
+ addKeyMatch(hm,km);
+ }
+ matches=hm;
+ }
+
+}
Added:
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/ViewCountSorter.java
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/ViewCountSorter.java?rev=411593&view=auto
==============================================================================
---
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/ViewCountSorter.java
(added)
+++
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/ViewCountSorter.java
Sun Jun 4 12:43:47 2006
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.keymatch;
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * <p>Implementation of KeyMatchFilter that sorts
+ * keymatches based on KeyMatch view counts. It allows
+ * simple rotation of matches (keymatches with lower view
+ * counts are "prioritized").</p>
+ */
+public class ViewCountSorter extends AbstractFilter {
+
+ public class ViewCountComparator implements Comparator {
+
+ public int compare(Object o1, Object o2) {
+ return ((KeyMatch)o1).viewCount-((KeyMatch)o2).viewCount;
+ }
+ }
+
+ public KeyMatch[] filter(final List currentMatches, final Map context){
+ Collections.sort(currentMatches, new
ViewCountSorter.ViewCountComparator());
+ return super.filter(currentMatches, context);
+ }
+}
Added:
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/package.html
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/package.html?rev=411593&view=auto
==============================================================================
---
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/package.html
(added)
+++
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/keymatch/package.html
Sun Jun 4 12:43:47 2006
@@ -0,0 +1,6 @@
+<html>
+<body>
+SimpleKeyMatcher is a utility for promoting certain web pages in your
+search wich are not yet part of production index or have low score.
+</body>
+</html>
\ No newline at end of file
Added:
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/webapp/controller/KeyMatchController.java
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/webapp/controller/KeyMatchController.java?rev=411593&view=auto
==============================================================================
---
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/webapp/controller/KeyMatchController.java
(added)
+++
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/java/org/apache/nutch/webapp/controller/KeyMatchController.java
Sun Jun 4 12:43:47 2006
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.webapp.controller;
+
+import java.io.IOException;
+import java.util.HashMap;
+
+import javax.servlet.ServletContext;
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.apache.nutch.keymatch.CountFilter;
+import org.apache.nutch.keymatch.KeyMatch;
+import org.apache.nutch.keymatch.SimpleKeyMatcher;
+import org.apache.nutch.webapp.common.ServiceLocator;
+import org.apache.nutch.webapp.common.Startable;
+import org.apache.struts.tiles.ComponentContext;
+
+public class KeyMatchController extends NutchController implements Startable{
+
+ public static final String ATTR_KEYMATCHES="keymatches";
+
+ static SimpleKeyMatcher keymatcher;
+ static HashMap context;
+
+ public void nutchPerform(ComponentContext tileContext,
+ HttpServletRequest request, HttpServletResponse response,
+ ServletContext servletContext) throws ServletException, IOException {
+ ServiceLocator serviceLocator=getServiceLocator(request);
+ HashMap context=new HashMap();
+ KeyMatch[]
matches=keymatcher.getMatches(serviceLocator.getSearch().getQuery(),context);
+ request.setAttribute(ATTR_KEYMATCHES, matches);
+ }
+
+ public void start(ServletContext servletContext) {
+ LOG.info("Starting keymatcher");
+ ServiceLocator serviceLocator=getServiceLocator(servletContext);
+ keymatcher=new SimpleKeyMatcher(serviceLocator.getConfiguration());
+ context=new HashMap();
+ //how many matches
+ context.put(CountFilter.KEY_COUNT,"1");
+ LOG.info("Starting keymatcher ok");
+ }
+}
Added:
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/test/org/apache/nutch/keymatch/TestSimpleKeyMatcher.java
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/test/org/apache/nutch/keymatch/TestSimpleKeyMatcher.java?rev=411593&view=auto
==============================================================================
---
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/test/org/apache/nutch/keymatch/TestSimpleKeyMatcher.java
(added)
+++
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/test/org/apache/nutch/keymatch/TestSimpleKeyMatcher.java
Sun Jun 4 12:43:47 2006
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.keymatch;
+
+import java.util.HashMap;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.searcher.Query;
+import org.apache.nutch.util.NutchConfiguration;
+
+import junit.framework.TestCase;
+
+public class TestSimpleKeyMatcher extends TestCase {
+
+
+ /* (non-Javadoc)
+ * @see junit.framework.TestCase#setUp()
+ */
+ protected void setUp() throws Exception {
+ super.setUp();
+ conf=NutchConfiguration.create();
+ km=new SimpleKeyMatcher(conf);
+ }
+
+ SimpleKeyMatcher km;
+
+ Configuration conf;
+
+ /*
+ * Test method for
'org.apache.nutch.keymatch.SimpleKeyMatcher.getMatches(Query, int)'
+ */
+ public void testGetMatches() {
+ HashMap context=new HashMap();
+ context.put(CountFilter.KEY_COUNT,"1");
+
+ //keyword
+ KeyMatch[] matches=getKeyMatchesForString("kw1 kw2 kw3 auto");
+ assertEquals(1,matches.length);
+
+ //phrase
+ matches=getKeyMatchesForString("search engine");
+ assertEquals(1,matches.length);
+
+ //exact + phrase
+ matches=getKeyMatchesForString("apache search engine");
+ assertEquals(2,matches.length);
+
+ //exact
+ matches=getKeyMatchesForString("kw1 kw2 kw3 kw4");
+ assertEquals(1,matches.length);
+
+ matches=getKeyMatchesForString("kw2 kw2 kw3 kw4");
+ assertEquals(0,matches.length);
+
+ }
+
+ /*
+ * Test method for
'org.apache.nutch.keymatch.SimpleKeyMatcher.addKeyMatch(Map, KeyMatch, boolean)'
+ */
+ public void testAddKeyMatch() {
+ KeyMatch keymatch=new KeyMatch("httpd","http://www.apache.org/","apache",
KeyMatch.TYPE_KEYWORD);
+ km.addKeyMatch(keymatch);
+ KeyMatch matched[]=getKeyMatchesForString("httpd");
+ assertTrue(keymatch.equals(matched[0]));
+ }
+
+ private KeyMatch[] getKeyMatchesForString(String string) {
+
+ Query q;
+ HashMap context=new HashMap();
+ context.put(CountFilter.KEY_COUNT,"1");
+ try {
+ q = Query.parse(string, conf);
+ return km.getMatches(q,context);
+ } catch (Exception e){
+
+ }
+ return new KeyMatch[0];
+ }
+}
Added:
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/test/org/apache/nutch/keymatch/TestViewCountSorter.java
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/test/org/apache/nutch/keymatch/TestViewCountSorter.java?rev=411593&view=auto
==============================================================================
---
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/test/org/apache/nutch/keymatch/TestViewCountSorter.java
(added)
+++
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/test/org/apache/nutch/keymatch/TestViewCountSorter.java
Sun Jun 4 12:43:47 2006
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.keymatch;
+
+import java.util.HashMap;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.searcher.Query;
+import org.apache.nutch.util.NutchConfiguration;
+
+import junit.framework.TestCase;
+
+public class TestViewCountSorter extends TestCase {
+
+ SimpleKeyMatcher km;
+ Configuration conf;
+
+ /* (non-Javadoc)
+ * @see junit.framework.TestCase#setUp()
+ */
+ protected void setUp() throws Exception {
+ super.setUp();
+ conf=NutchConfiguration.create();
+ km=new SimpleKeyMatcher(conf);
+ km.clear();
+ KeyMatch m=new KeyMatch("kw1","u1","t1",KeyMatch.TYPE_KEYWORD);
+ km.addKeyMatch(m);
+ m=new KeyMatch("kw1","u2","t2",KeyMatch.TYPE_KEYWORD);
+ km.addKeyMatch(m);
+ m=new KeyMatch("kw1","u3","t3",KeyMatch.TYPE_KEYWORD);
+ km.addKeyMatch(m);
+ ViewCountSorter vcs=new ViewCountSorter();
+ vcs.setNext(new CountFilter());
+ km.setFilter(vcs);
+ }
+
+ /*
+ * Test method for 'org.apache.nutch.keymatch.ViewCountSorter.filter(List,
Map)'
+ */
+ public void testFilter() {
+ KeyMatch m1,m2,m3;
+
+ KeyMatch[] matches=getKeyMatchesForString("kw1");
+ m1=matches[0];
+ assertNotNull(m1);
+
+ matches=getKeyMatchesForString("kw1");
+ m2=matches[0];
+ assertNotNull(m2);
+
+ matches=getKeyMatchesForString("kw1");
+ m3=matches[0];
+ assertNotNull(m3);
+
+ assertFalse(m1.equals(m2));
+ assertFalse(m2.equals(m3));
+ assertFalse(m1.equals(m3));
+ }
+
+ private KeyMatch[] getKeyMatchesForString(String string) {
+
+ Query q;
+ HashMap context=new HashMap();
+ context.put(CountFilter.KEY_COUNT,"1");
+ try {
+ q = Query.parse(string, conf);
+ return km.getMatches(q,context);
+ } catch (Exception e){
+
+ }
+ return new KeyMatch[0];
+ }
+
+}
Added:
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/web/web-keymatch/keymatch.jsp
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/web/web-keymatch/keymatch.jsp?rev=411593&view=auto
==============================================================================
---
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/web/web-keymatch/keymatch.jsp
(added)
+++
lucene/nutch/trunk/contrib/web2/plugins/web-keymatch/src/web/web-keymatch/keymatch.jsp
Sun Jun 4 12:43:47 2006
@@ -0,0 +1,11 @@
+<%@ page session="false"%>
+<%@ taglib prefix="tiles" uri="http://jakarta.apache.org/struts/tags-tiles"%>
+<%@ taglib prefix="c" uri="http://java.sun.com/jstl/core"%>
+<%@ taglib prefix="fmt" uri="http://java.sun.com/jstl/fmt"%>
+<div id="keymatch">
+<c:forEach var="keymatch" items="${keymatches}">
+ <a href="search.do?query=<c:out value="${keymatch.url}"/>">
+ <c:out value="${keymatch.title}"/>
+ </a><br/>
+</c:forEach>
+</div>