Author: siren
Date: Fri May 12 11:47:30 2006
New Revision: 405831
URL: http://svn.apache.org/viewcvs?rev=405831&view=rev
Log:
keeping up with nutch changes, cleaning up, fixed urls in oss
Added:
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/NutchHttpServlet.java
Modified:
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Preferences.java
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/SearchResultBean.java
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/CachedServlet.java
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/OpenSearchServlet.java
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/tiles/ExtendableDefinitionsFactory.java
Modified:
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Preferences.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Preferences.java?rev=405831&r1=405830&r2=405831&view=diff
==============================================================================
---
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Preferences.java
(original)
+++
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Preferences.java
Fri May 12 11:47:30 2006
@@ -80,10 +80,8 @@
public static void setPreferencesCookie(HttpServletRequest request,
HttpServletResponse response, Preferences prefs) {
if (defaults.equals(prefs)) {
- System.out.println("default qeuals prefs, removing");
removeCookie(response);
} else {
- System.out.println("setting preferences to cookie");
setPreferencesCookie(response, prefs);
}
}
@@ -142,17 +140,13 @@
* @return parsed Preferences
*/
public static Preferences parse(String data, String valueValueSeparator,
String keyValueSeparator) {
-
- System.out.println("data:" + data);
Preferences p = new Preferences();
p.putAll(defaults);
String[] dataitems = data.split(valueValueSeparator);
- System.out.println(dataitems.length + " dataitems submitted");
for (int i = 0; i < dataitems.length; i++) {
String keyvalue[] = dataitems[i].split(keyValueSeparator);
if (keyvalue.length == 2) {
try {
- System.out.println("adding:" + keyvalue[0] + "=" + keyvalue[1]);
p.put(keyvalue[0], URLDecoder.decode((String)keyvalue[1],"UTF-8"));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
@@ -201,8 +195,6 @@
txt.append(DEFAULTVALVALSEPARATOR);
}
- System.out.println("toString():" + txt.toString());
-
return txt.toString();
}
Modified:
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java?rev=405831&r1=405830&r2=405831&view=diff
==============================================================================
---
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java
(original)
+++
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java
Fri May 12 11:47:30 2006
@@ -27,6 +27,7 @@
import org.apache.nutch.searcher.Hits;
import org.apache.nutch.searcher.NutchBean;
import org.apache.nutch.searcher.Query;
+import org.apache.nutch.searcher.Summary;
/**
* Search is a bean that represents an ongoing search.
@@ -45,8 +46,6 @@
Query query;
- int maxHits;
-
int startOffset;
int hitsPerDup;
@@ -63,7 +62,7 @@
HitDetails[] details;
- String[] summaries;
+ Summary[] summaries;
ArrayList results = null;
@@ -83,14 +82,14 @@
public void performSearch(NutchBean bean) {
try {
- hits = bean.search(getQuery(), getStartOffset() + getMaxHits(),
+ hits = bean.search(getQuery(), getStartOffset() + getHitsPerPage(),
getHitsPerSite(), getDupField(), getSortColumn(), isSortDesc());
} catch (IOException e) {
hits = new Hits(0, new Hit[0]);
}
int realEnd = (int) Math.min(hits.getLength(), getStartOffset()
- + getMaxHits());
+ + getHitsPerPage());
int endOffset=hits.getLength();
@@ -158,8 +157,6 @@
hitsPerDup = parseInt(form.getValueString(SearchForm.NAME_HITSPERDUP),
prefs.getInt(
Preferences.KEY_HITS_PER_DUP, 2));
- maxHits = hitsPerPage;
-
sortColumn = form.getValueString(SearchForm.NAME_SORTCOLUMN);
sortDesc = (sortColumn != null && "true".equals(form
@@ -207,22 +204,7 @@
protected void setHitsPerSite(int hitsPerSite) {
this.hitsPerDup = hitsPerSite;
}
-
- /**
- * @return Returns the maxHits.
- */
- public int getMaxHits() {
- return maxHits;
- }
-
- /**
- * @param maxHits
- * The maxHits to set.
- */
- protected void setMaxHits(int maxHits) {
- this.maxHits = maxHits;
- }
-
+
/**
* @return Returns the query.
*/
@@ -365,7 +347,7 @@
/**
* @return Returns the summaries.
*/
- public String[] getSummaries() {
+ public Summary[] getSummaries() {
return summaries;
}
@@ -373,7 +355,7 @@
* @param summaries
* The summaries to set.
*/
- protected void setSummaries(String[] summaries) {
+ protected void setSummaries(Summary[] summaries) {
this.summaries = summaries;
}
Modified:
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/SearchResultBean.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/SearchResultBean.java?rev=405831&r1=405830&r2=405831&view=diff
==============================================================================
---
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/SearchResultBean.java
(original)
+++
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/SearchResultBean.java
Fri May 12 11:47:30 2006
@@ -21,6 +21,8 @@
import org.apache.nutch.html.Entities;
import org.apache.nutch.searcher.Hit;
import org.apache.nutch.searcher.HitDetails;
+import org.apache.nutch.searcher.Summary;
+import org.apache.nutch.searcher.Summary.Fragment;
/**
* SearchResultBean contains information about one search result in easily
@@ -30,14 +32,14 @@
Hit hit;
- String summary;
+ Summary summary;
HitDetails details;
Search search;
public SearchResultBean(Search search, Hit hit, HitDetails details,
- String summary) {
+ Summary summary) {
this.search = search;
this.hit = hit;
this.details = details;
@@ -78,7 +80,21 @@
* @return
*/
public String getSummary() {
- return summary;
+
+ StringBuffer sum = new StringBuffer();
+ Fragment[] fragments = summary.getFragments();
+ for (int j=0; j<fragments.length; j++) {
+ if (fragments[j].isHighlight()) {
+ sum.append("<span class=\"highlight\">")
+ .append(Entities.encode(fragments[j].getText()))
+ .append("</span>");
+ } else if (fragments[j].isEllipsis()) {
+ sum.append("<span class=\"ellipsis\"> ... </span>");
+ } else {
+ sum.append(Entities.encode(fragments[j].getText()));
+ }
+ }
+ return sum.toString();
}
/**
@@ -167,7 +183,7 @@
*
* @return true if more dups available
*/
- public Boolean getHasMore() {
- return new Boolean(hit.moreFromDupExcluded());
+ public boolean getHasMore() {
+ return hit.moreFromDupExcluded();
}
}
Modified:
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/CachedServlet.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/CachedServlet.java?rev=405831&r1=405830&r2=405831&view=diff
==============================================================================
---
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/CachedServlet.java
(original)
+++
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/CachedServlet.java
Fri May 12 11:47:30 2006
@@ -21,11 +21,9 @@
import org.apache.nutch.searcher.NutchBean;
import org.apache.nutch.searcher.Hit;
import org.apache.nutch.searcher.HitDetails;
-import org.apache.nutch.webapp.common.ServiceLocator;
-import org.apache.nutch.webapp.common.ServletContextServiceLocator;
import javax.servlet.ServletConfig;
-import javax.servlet.http.HttpServlet;
+import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
@@ -41,16 +39,12 @@
* @author John Xing
*/
-public class CachedServlet extends HttpServlet {
+public class CachedServlet extends NutchHttpServlet {
private static final long serialVersionUID = 1L;
- NutchBean bean = null;
-
- public void init(ServletConfig conf) {
- ServiceLocator locator = ServletContextServiceLocator.getInstance(conf
- .getServletContext());
- bean = locator.getNutchBean();
+ public void init(ServletConfig conf) throws ServletException {
+ super.init(conf);
}
public void destroy() {
@@ -61,21 +55,17 @@
public void doGet(HttpServletRequest request, HttpServletResponse response)
throws IOException {
- // quit if no bean
- if (bean == null)
- return;
-
NutchBean.LOG.info("request from " + request.getRemoteAddr());
Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")), Integer
.parseInt(request.getParameter("id")));
- HitDetails details = bean.getDetails(hit);
+ HitDetails details = getServiceLocator().getNutchBean().getDetails(hit);
// raw bytes
- byte[] bytes = bean.getContent(details);
+ byte[] bytes = getServiceLocator().getNutchBean().getContent(details);
// pass all original headers? only these for now.
- Metadata metadata = bean.getParseData(details).getContentMeta();
+ Metadata metadata =
getServiceLocator().getNutchBean().getParseData(details).getContentMeta();
String contentType = metadata.get(Response.CONTENT_TYPE);
// String lastModified = metadata.get(Metadata.LAST_MODIFIED);
// String contentLength = metadata.get(Metadata.CONTENT_LENGTH);
Added:
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/NutchHttpServlet.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/NutchHttpServlet.java?rev=405831&view=auto
==============================================================================
---
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/NutchHttpServlet.java
(added)
+++
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/NutchHttpServlet.java
Fri May 12 11:47:30 2006
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.webapp.servlet;
+
+import javax.servlet.ServletConfig;
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServlet;
+
+import org.apache.nutch.webapp.common.ServiceLocator;
+import org.apache.nutch.webapp.common.ServletContextServiceLocator;
+
+/**
+ * Abstract base Servlet for nutch
+ */
+public abstract class NutchHttpServlet extends HttpServlet{
+
+ private static final long serialVersionUID = 1L;
+
+ private ServiceLocator locator;
+
+ public void init(ServletConfig servletConfig) throws ServletException {
+ locator = ServletContextServiceLocator.getInstance(servletConfig
+ .getServletContext());
+ }
+
+ public ServiceLocator getServiceLocator(){
+ return locator;
+ }
+}
Modified:
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/OpenSearchServlet.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/OpenSearchServlet.java?rev=405831&r1=405830&r2=405831&view=diff
==============================================================================
---
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/OpenSearchServlet.java
(original)
+++
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/OpenSearchServlet.java
Fri May 12 11:47:30 2006
@@ -26,20 +26,17 @@
import javax.servlet.ServletException;
import javax.servlet.ServletConfig;
-import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.xml.parsers.*;
-import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.searcher.Hit;
import org.apache.nutch.searcher.HitDetails;
import org.apache.nutch.searcher.Hits;
import org.apache.nutch.searcher.NutchBean;
import org.apache.nutch.searcher.Query;
-import org.apache.nutch.webapp.common.ServiceLocator;
-import org.apache.nutch.webapp.common.ServletContextServiceLocator;
+import org.apache.nutch.searcher.Summary;
import org.w3c.dom.*;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.Transformer;
@@ -50,7 +47,7 @@
* Present search results using A9's OpenSearch extensions to RSS, plus a few
* Nutch-specific extensions.
*/
-public class OpenSearchServlet extends HttpServlet {
+public class OpenSearchServlet extends NutchHttpServlet {
private static final long serialVersionUID = 1L;
private static final Map NS_MAP = new HashMap();
@@ -66,15 +63,8 @@
SKIP_DETAILS.add("title"); // redundant with RSS title
}
- private NutchBean bean;
-
- private Configuration conf;
-
- public void init(ServletConfig conf) {
- ServiceLocator locator = ServletContextServiceLocator.getInstance(conf
- .getServletContext());
- bean = locator.getNutchBean();
- this.conf = locator.getConfiguration();
+ public void init(ServletConfig conf) throws ServletException {
+ super.init(conf);
}
public void doGet(HttpServletRequest request, HttpServletResponse response)
@@ -129,13 +119,13 @@
+ (reverse ? "&reverse=true" : "")
+ (dedupField == null ? "" : "&dedupField=" + dedupField));
- Query query = Query.parse(queryString, this.conf);
+ Query query = Query.parse(queryString,
getServiceLocator().getConfiguration());
NutchBean.LOG.info("query: " + queryString);
// execute the query
Hits hits;
try {
- hits = bean.search(query, start + hitsPerPage, hitsPerDup, dedupField,
+ hits = getServiceLocator().getNutchBean().search(query, start +
hitsPerPage, hitsPerDup, dedupField,
sort, reverse);
} catch (IOException e) {
NutchBean.LOG.log(Level.WARNING, "Search Error", e);
@@ -149,8 +139,8 @@
int length = end - start;
Hit[] show = hits.getHits(start, end - start);
- HitDetails[] details = bean.getDetails(show);
- String[] summaries = bean.getSummary(details, query);
+ HitDetails[] details = getServiceLocator().getNutchBean().getDetails(show);
+ Summary[] summaries =
getServiceLocator().getNutchBean().getSummary(details, query);
String requestUrl = request.getRequestURL().toString();
String base = requestUrl.substring(0, requestUrl.lastIndexOf('/'));
@@ -204,13 +194,13 @@
Element item = addNode(doc, channel, "item");
addNode(doc, item, "title", title);
- addNode(doc, item, "description", summaries[i]);
+ addNode(doc, item, "description", summaries[i].toString());
addNode(doc, item, "link", url);
addNode(doc, item, "nutch", "site", hit.getDedupValue());
- addNode(doc, item, "nutch", "cache", base + "/cached.jsp?" + id);
- addNode(doc, item, "nutch", "explain", base + "/explain.jsp?" + id
+ addNode(doc, item, "nutch", "cache", base + "/cached.do?" + id);
+ addNode(doc, item, "nutch", "explain", base + "/explain.do?" + id
+ "&query=" + urlQuery);
if (hit.moreFromDupExcluded()) {
Modified:
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/tiles/ExtendableDefinitionsFactory.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/tiles/ExtendableDefinitionsFactory.java?rev=405831&r1=405830&r2=405831&view=diff
==============================================================================
---
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/tiles/ExtendableDefinitionsFactory.java
(original)
+++
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/tiles/ExtendableDefinitionsFactory.java
Fri May 12 11:47:30 2006
@@ -25,7 +25,6 @@
import javax.servlet.ServletContext;
import javax.servlet.ServletRequest;
-import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.LogFormatter;
import org.apache.nutch.plugin.Extension;
@@ -89,8 +88,6 @@
} catch (Exception e) {
e.printStackTrace(System.out);
}
- LOG.fine("Restoring ClassLoader.");
-
}
protected XmlDefinitionsSet getDefinitions() {
@@ -181,10 +178,6 @@
+ definitions.toString());
}
- protected Configuration getNutchConfig(ServletContext context) {
- return
ServletContextServiceLocator.getInstance(context).getConfiguration();
- }
-
/*
* (non-Javadoc)
*
@@ -204,7 +197,6 @@
PluginResourceLoader loader = ServletContextServiceLocator.getInstance(
servletContext).getPluginResourceLoader(current);
- //TODO: fix this!!!
Thread.currentThread().setContextClassLoader(loader);
initDefinitions(definitions);
Thread.currentThread().setContextClassLoader(current);