Author: siren Date: Fri May 12 11:47:30 2006 New Revision: 405831 URL: http://svn.apache.org/viewcvs?rev=405831&view=rev Log: keeping up with nutch changes, cleaning up, fixed urls in oss
Added: lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/NutchHttpServlet.java Modified: lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Preferences.java lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/SearchResultBean.java lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/CachedServlet.java lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/OpenSearchServlet.java lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/tiles/ExtendableDefinitionsFactory.java Modified: lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Preferences.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Preferences.java?rev=405831&r1=405830&r2=405831&view=diff ============================================================================== --- lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Preferences.java (original) +++ lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Preferences.java Fri May 12 11:47:30 2006 @@ -80,10 +80,8 @@ public static void setPreferencesCookie(HttpServletRequest request, HttpServletResponse response, Preferences prefs) { if (defaults.equals(prefs)) { - System.out.println("default qeuals prefs, removing"); removeCookie(response); } else { - System.out.println("setting preferences to cookie"); setPreferencesCookie(response, prefs); } } @@ -142,17 +140,13 @@ * @return parsed Preferences */ public static Preferences parse(String data, String valueValueSeparator, String keyValueSeparator) { - - System.out.println("data:" + data); Preferences p = new Preferences(); p.putAll(defaults); String[] dataitems = data.split(valueValueSeparator); - System.out.println(dataitems.length + " dataitems submitted"); for (int i = 0; i < dataitems.length; i++) { String keyvalue[] = dataitems[i].split(keyValueSeparator); if (keyvalue.length == 2) { try { - System.out.println("adding:" + keyvalue[0] + "=" + keyvalue[1]); p.put(keyvalue[0], URLDecoder.decode((String)keyvalue[1],"UTF-8")); } catch (UnsupportedEncodingException e) { e.printStackTrace(); @@ -201,8 +195,6 @@ txt.append(DEFAULTVALVALSEPARATOR); } - System.out.println("toString():" + txt.toString()); - return txt.toString(); } Modified: lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java?rev=405831&r1=405830&r2=405831&view=diff ============================================================================== --- lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java (original) +++ lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java Fri May 12 11:47:30 2006 @@ -27,6 +27,7 @@ import org.apache.nutch.searcher.Hits; import org.apache.nutch.searcher.NutchBean; import org.apache.nutch.searcher.Query; +import org.apache.nutch.searcher.Summary; /** * Search is a bean that represents an ongoing search. @@ -45,8 +46,6 @@ Query query; - int maxHits; - int startOffset; int hitsPerDup; @@ -63,7 +62,7 @@ HitDetails[] details; - String[] summaries; + Summary[] summaries; ArrayList results = null; @@ -83,14 +82,14 @@ public void performSearch(NutchBean bean) { try { - hits = bean.search(getQuery(), getStartOffset() + getMaxHits(), + hits = bean.search(getQuery(), getStartOffset() + getHitsPerPage(), getHitsPerSite(), getDupField(), getSortColumn(), isSortDesc()); } catch (IOException e) { hits = new Hits(0, new Hit[0]); } int realEnd = (int) Math.min(hits.getLength(), getStartOffset() - + getMaxHits()); + + getHitsPerPage()); int endOffset=hits.getLength(); @@ -158,8 +157,6 @@ hitsPerDup = parseInt(form.getValueString(SearchForm.NAME_HITSPERDUP), prefs.getInt( Preferences.KEY_HITS_PER_DUP, 2)); - maxHits = hitsPerPage; - sortColumn = form.getValueString(SearchForm.NAME_SORTCOLUMN); sortDesc = (sortColumn != null && "true".equals(form @@ -207,22 +204,7 @@ protected void setHitsPerSite(int hitsPerSite) { this.hitsPerDup = hitsPerSite; } - - /** - * @return Returns the maxHits. - */ - public int getMaxHits() { - return maxHits; - } - - /** - * @param maxHits - * The maxHits to set. - */ - protected void setMaxHits(int maxHits) { - this.maxHits = maxHits; - } - + /** * @return Returns the query. */ @@ -365,7 +347,7 @@ /** * @return Returns the summaries. */ - public String[] getSummaries() { + public Summary[] getSummaries() { return summaries; } @@ -373,7 +355,7 @@ * @param summaries * The summaries to set. */ - protected void setSummaries(String[] summaries) { + protected void setSummaries(Summary[] summaries) { this.summaries = summaries; } Modified: lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/SearchResultBean.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/SearchResultBean.java?rev=405831&r1=405830&r2=405831&view=diff ============================================================================== --- lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/SearchResultBean.java (original) +++ lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/SearchResultBean.java Fri May 12 11:47:30 2006 @@ -21,6 +21,8 @@ import org.apache.nutch.html.Entities; import org.apache.nutch.searcher.Hit; import org.apache.nutch.searcher.HitDetails; +import org.apache.nutch.searcher.Summary; +import org.apache.nutch.searcher.Summary.Fragment; /** * SearchResultBean contains information about one search result in easily @@ -30,14 +32,14 @@ Hit hit; - String summary; + Summary summary; HitDetails details; Search search; public SearchResultBean(Search search, Hit hit, HitDetails details, - String summary) { + Summary summary) { this.search = search; this.hit = hit; this.details = details; @@ -78,7 +80,21 @@ * @return */ public String getSummary() { - return summary; + + StringBuffer sum = new StringBuffer(); + Fragment[] fragments = summary.getFragments(); + for (int j=0; j<fragments.length; j++) { + if (fragments[j].isHighlight()) { + sum.append("<span class=\"highlight\">") + .append(Entities.encode(fragments[j].getText())) + .append("</span>"); + } else if (fragments[j].isEllipsis()) { + sum.append("<span class=\"ellipsis\"> ... </span>"); + } else { + sum.append(Entities.encode(fragments[j].getText())); + } + } + return sum.toString(); } /** @@ -167,7 +183,7 @@ * * @return true if more dups available */ - public Boolean getHasMore() { - return new Boolean(hit.moreFromDupExcluded()); + public boolean getHasMore() { + return hit.moreFromDupExcluded(); } } Modified: lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/CachedServlet.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/CachedServlet.java?rev=405831&r1=405830&r2=405831&view=diff ============================================================================== --- lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/CachedServlet.java (original) +++ lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/CachedServlet.java Fri May 12 11:47:30 2006 @@ -21,11 +21,9 @@ import org.apache.nutch.searcher.NutchBean; import org.apache.nutch.searcher.Hit; import org.apache.nutch.searcher.HitDetails; -import org.apache.nutch.webapp.common.ServiceLocator; -import org.apache.nutch.webapp.common.ServletContextServiceLocator; import javax.servlet.ServletConfig; -import javax.servlet.http.HttpServlet; +import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; @@ -41,16 +39,12 @@ * @author John Xing */ -public class CachedServlet extends HttpServlet { +public class CachedServlet extends NutchHttpServlet { private static final long serialVersionUID = 1L; - NutchBean bean = null; - - public void init(ServletConfig conf) { - ServiceLocator locator = ServletContextServiceLocator.getInstance(conf - .getServletContext()); - bean = locator.getNutchBean(); + public void init(ServletConfig conf) throws ServletException { + super.init(conf); } public void destroy() { @@ -61,21 +55,17 @@ public void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException { - // quit if no bean - if (bean == null) - return; - NutchBean.LOG.info("request from " + request.getRemoteAddr()); Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")), Integer .parseInt(request.getParameter("id"))); - HitDetails details = bean.getDetails(hit); + HitDetails details = getServiceLocator().getNutchBean().getDetails(hit); // raw bytes - byte[] bytes = bean.getContent(details); + byte[] bytes = getServiceLocator().getNutchBean().getContent(details); // pass all original headers? only these for now. - Metadata metadata = bean.getParseData(details).getContentMeta(); + Metadata metadata = getServiceLocator().getNutchBean().getParseData(details).getContentMeta(); String contentType = metadata.get(Response.CONTENT_TYPE); // String lastModified = metadata.get(Metadata.LAST_MODIFIED); // String contentLength = metadata.get(Metadata.CONTENT_LENGTH); Added: lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/NutchHttpServlet.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/NutchHttpServlet.java?rev=405831&view=auto ============================================================================== --- lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/NutchHttpServlet.java (added) +++ lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/NutchHttpServlet.java Fri May 12 11:47:30 2006 @@ -0,0 +1,42 @@ +/* + * Copyright 2006 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.webapp.servlet; + +import javax.servlet.ServletConfig; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; + +import org.apache.nutch.webapp.common.ServiceLocator; +import org.apache.nutch.webapp.common.ServletContextServiceLocator; + +/** + * Abstract base Servlet for nutch + */ +public abstract class NutchHttpServlet extends HttpServlet{ + + private static final long serialVersionUID = 1L; + + private ServiceLocator locator; + + public void init(ServletConfig servletConfig) throws ServletException { + locator = ServletContextServiceLocator.getInstance(servletConfig + .getServletContext()); + } + + public ServiceLocator getServiceLocator(){ + return locator; + } +} Modified: lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/OpenSearchServlet.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/OpenSearchServlet.java?rev=405831&r1=405830&r2=405831&view=diff ============================================================================== --- lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/OpenSearchServlet.java (original) +++ lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/servlet/OpenSearchServlet.java Fri May 12 11:47:30 2006 @@ -26,20 +26,17 @@ import javax.servlet.ServletException; import javax.servlet.ServletConfig; -import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import javax.xml.parsers.*; -import org.apache.hadoop.conf.Configuration; import org.apache.nutch.searcher.Hit; import org.apache.nutch.searcher.HitDetails; import org.apache.nutch.searcher.Hits; import org.apache.nutch.searcher.NutchBean; import org.apache.nutch.searcher.Query; -import org.apache.nutch.webapp.common.ServiceLocator; -import org.apache.nutch.webapp.common.ServletContextServiceLocator; +import org.apache.nutch.searcher.Summary; import org.w3c.dom.*; import javax.xml.transform.TransformerFactory; import javax.xml.transform.Transformer; @@ -50,7 +47,7 @@ * Present search results using A9's OpenSearch extensions to RSS, plus a few * Nutch-specific extensions. */ -public class OpenSearchServlet extends HttpServlet { +public class OpenSearchServlet extends NutchHttpServlet { private static final long serialVersionUID = 1L; private static final Map NS_MAP = new HashMap(); @@ -66,15 +63,8 @@ SKIP_DETAILS.add("title"); // redundant with RSS title } - private NutchBean bean; - - private Configuration conf; - - public void init(ServletConfig conf) { - ServiceLocator locator = ServletContextServiceLocator.getInstance(conf - .getServletContext()); - bean = locator.getNutchBean(); - this.conf = locator.getConfiguration(); + public void init(ServletConfig conf) throws ServletException { + super.init(conf); } public void doGet(HttpServletRequest request, HttpServletResponse response) @@ -129,13 +119,13 @@ + (reverse ? "&reverse=true" : "") + (dedupField == null ? "" : "&dedupField=" + dedupField)); - Query query = Query.parse(queryString, this.conf); + Query query = Query.parse(queryString, getServiceLocator().getConfiguration()); NutchBean.LOG.info("query: " + queryString); // execute the query Hits hits; try { - hits = bean.search(query, start + hitsPerPage, hitsPerDup, dedupField, + hits = getServiceLocator().getNutchBean().search(query, start + hitsPerPage, hitsPerDup, dedupField, sort, reverse); } catch (IOException e) { NutchBean.LOG.log(Level.WARNING, "Search Error", e); @@ -149,8 +139,8 @@ int length = end - start; Hit[] show = hits.getHits(start, end - start); - HitDetails[] details = bean.getDetails(show); - String[] summaries = bean.getSummary(details, query); + HitDetails[] details = getServiceLocator().getNutchBean().getDetails(show); + Summary[] summaries = getServiceLocator().getNutchBean().getSummary(details, query); String requestUrl = request.getRequestURL().toString(); String base = requestUrl.substring(0, requestUrl.lastIndexOf('/')); @@ -204,13 +194,13 @@ Element item = addNode(doc, channel, "item"); addNode(doc, item, "title", title); - addNode(doc, item, "description", summaries[i]); + addNode(doc, item, "description", summaries[i].toString()); addNode(doc, item, "link", url); addNode(doc, item, "nutch", "site", hit.getDedupValue()); - addNode(doc, item, "nutch", "cache", base + "/cached.jsp?" + id); - addNode(doc, item, "nutch", "explain", base + "/explain.jsp?" + id + addNode(doc, item, "nutch", "cache", base + "/cached.do?" + id); + addNode(doc, item, "nutch", "explain", base + "/explain.do?" + id + "&query=" + urlQuery); if (hit.moreFromDupExcluded()) { Modified: lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/tiles/ExtendableDefinitionsFactory.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/tiles/ExtendableDefinitionsFactory.java?rev=405831&r1=405830&r2=405831&view=diff ============================================================================== --- lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/tiles/ExtendableDefinitionsFactory.java (original) +++ lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/tiles/ExtendableDefinitionsFactory.java Fri May 12 11:47:30 2006 @@ -25,7 +25,6 @@ import javax.servlet.ServletContext; import javax.servlet.ServletRequest; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.LogFormatter; import org.apache.nutch.plugin.Extension; @@ -89,8 +88,6 @@ } catch (Exception e) { e.printStackTrace(System.out); } - LOG.fine("Restoring ClassLoader."); - } protected XmlDefinitionsSet getDefinitions() { @@ -181,10 +178,6 @@ + definitions.toString()); } - protected Configuration getNutchConfig(ServletContext context) { - return ServletContextServiceLocator.getInstance(context).getConfiguration(); - } - /* * (non-Javadoc) * @@ -204,7 +197,6 @@ PluginResourceLoader loader = ServletContextServiceLocator.getInstance( servletContext).getPluginResourceLoader(current); - //TODO: fix this!!! Thread.currentThread().setContextClassLoader(loader); initDefinitions(definitions); Thread.currentThread().setContextClassLoader(current);