resp...

kubes Fri, 02 Jan 2009 13:39:37 -0800

Author: kubes
Date: Fri Jan  2 13:38:58 2009
New Revision: 730845

URL: http://svn.apache.org/viewvc?rev=730845&view=rev
Log:
NUTCH-594: Serve Nutch search results in multiple formats including XML and 
JSON.


Added:
    lucene/nutch/trunk/lib/commons-beanutils-1.8.0.jar   (with props)
    lucene/nutch/trunk/lib/commons-collections-3.2.1.jar   (with props)
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/
    
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/RequestUtils.java
    
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriter.java
    
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriters.java
    
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchResults.java
    
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchServlet.java
    lucene/nutch/trunk/src/plugin/response-json/
    lucene/nutch/trunk/src/plugin/response-json/build.xml
    lucene/nutch/trunk/src/plugin/response-json/lib/
    lucene/nutch/trunk/src/plugin/response-json/lib/ezmorph-1.0.6.jar   (with 
props)
    lucene/nutch/trunk/src/plugin/response-json/lib/json-lib-2.2.2-jdk15.jar   
(with props)
    lucene/nutch/trunk/src/plugin/response-json/plugin.xml
    lucene/nutch/trunk/src/plugin/response-json/src/
    lucene/nutch/trunk/src/plugin/response-json/src/java/
    lucene/nutch/trunk/src/plugin/response-json/src/java/org/
    lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/
    lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/
    
lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/
    
lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/
    
lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/json/
    
lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/json/JSONResponseWriter.java
    lucene/nutch/trunk/src/plugin/response-xml/
    lucene/nutch/trunk/src/plugin/response-xml/build.xml
    lucene/nutch/trunk/src/plugin/response-xml/plugin.xml
    lucene/nutch/trunk/src/plugin/response-xml/src/
    lucene/nutch/trunk/src/plugin/response-xml/src/java/
    lucene/nutch/trunk/src/plugin/response-xml/src/java/org/
    lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/
    lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/
    
lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/
    
lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/
    
lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/xml/
    
lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/xml/XMLResponseWriter.java
Modified:
    lucene/nutch/trunk/CHANGES.txt
    lucene/nutch/trunk/build.xml
    lucene/nutch/trunk/conf/nutch-default.xml
    lucene/nutch/trunk/src/plugin/build.xml
    lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml
    lucene/nutch/trunk/src/web/web.xml

Modified: lucene/nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=730845&r1=730844&r2=730845&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Fri Jan  2 13:38:58 2009
@@ -300,6 +300,9 @@
 111. NUTCH-646 -  New Indexing Framework for Nutch. (kubes)
 
 112. NUTCH-668 -  Domain URL Filter. (kubes)
+
+113. NUTCH-594 -  Serve Nutch search results in multiple formats including 
+                  XML and JSON. (kubes)
      
 Release 0.9 - 2007-04-02
 

Modified: lucene/nutch/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/build.xml?rev=730845&r1=730844&r2=730845&view=diff
==============================================================================
--- lucene/nutch/trunk/build.xml (original)
+++ lucene/nutch/trunk/build.xml Fri Jan  2 13:38:58 2009
@@ -172,32 +172,34 @@
        <outputproperty name="indent" value="yes"/>
     </xslt>
     <war destfile="${build.dir}/${final.name}.war"
-        webxml="${web.src.dir}/web.xml">
+       webxml="${web.src.dir}/web.xml">
       <fileset dir="${web.src.dir}/jsp"/>
       <zipfileset dir="${docs.src}" includes="include/*.html"/>
       <zipfileset dir="${build.docs}" includes="*/include/*.html"/>
       <fileset dir="${docs.dir}"/>
       <lib dir="${lib.dir}">
-       <include name="lucene*.jar"/>
-       <include name="taglibs-*.jar"/>
-       <include name="hadoop-*.jar"/>
-       <include name="dom4j-*.jar"/>
-       <include name="xerces-*.jar"/>
-        <include name="tika-*.jar"/>           
+        <include name="lucene*.jar"/>
+        <include name="taglibs-*.jar"/>
+        <include name="hadoop-*.jar"/>
+        <include name="dom4j-*.jar"/>
+        <include name="xerces-*.jar"/>
+        <include name="tika-*.jar"/>
+        <include name="commons-collections-*.jar"/>
+        <include name="commons-beanutils-*.jar"/>
         <include name="commons-cli-*.jar"/>
         <include name="commons-lang-*.jar"/>
         <include name="commons-logging-*.jar"/>
         <include name="log4j-*.jar"/>
       </lib>
       <lib dir="${build.dir}">
-       <include name="${final.name}.jar"/>
+             <include name="${final.name}.jar"/>
       </lib>
       <classes dir="${conf.dir}" excludes="**/*.template"/>
       <classes dir="${web.src.dir}/locale"/>
       <classes file="${web.src.dir}/log4j.properties"/>
       <zipfileset prefix="WEB-INF/classes/plugins" dir="${build.plugins}"/>
       <webinf dir="${lib.dir}">
-       <include name="taglibs-*.tld"/>
+             <include name="taglibs-*.tld"/>
       </webinf>
     </war>
    </target>

Modified: lucene/nutch/trunk/conf/nutch-default.xml
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/conf/nutch-default.xml?rev=730845&r1=730844&r2=730845&view=diff
==============================================================================
--- lucene/nutch/trunk/conf/nutch-default.xml (original)
+++ lucene/nutch/trunk/conf/nutch-default.xml Fri Jan  2 13:38:58 2009
@@ -886,7 +886,7 @@
 
 <property>
   <name>plugin.includes</name>
-  
<value>protocol-http|urlfilter-regex|parse-(text|html|js)|index-(basic|anchor)|query-(basic|site|url)|summary-basic|scoring-opic|urlnormalizer-(pass|regex|basic)</value>
+  
<value>protocol-http|urlfilter-regex|parse-(text|html|js)|index-(basic|anchor)|query-(basic|site|url)|response-(json|xml)|summary-basic|scoring-opic|urlnormalizer-(pass|regex|basic)</value>
   <description>Regular expression naming plugin directory names to
   include.  Any plugin not matching this expression is excluded.
   In any case you need at least include the nutch-extensionpoints plugin. By
@@ -1209,4 +1209,63 @@
   </description>
 </property>
 
+<!-- response writer properties -->
+
+<property>
+  <name>search.response.default.type</name>
+  <value>xml</value>
+  <description>
+  The default response type returned if none is specified.
+  </description>
+</property>
+
+<property>
+  <name>search.response.default.lang</name>
+  <value>en</value>
+  <description>
+  The default response language if none is specified.
+  </description>
+</property>
+
+<property>
+  <name>search.response.default.numrows</name>
+  <value>10</value>
+  <description>
+  The default number of rows to return if none is specified.
+  </description>
+</property>
+
+<property>
+  <name>search.response.default.dedupfield</name>
+  <value>site</value>
+  <description>
+  The default dedup field if none is specified.
+  </description>
+</property>
+
+<property>
+  <name>search.response.default.numdupes</name>
+  <value>1</value>
+  <description>
+  The default number of duplicates returned if none is specified.
+  </description>
+</property>
+
+<property>
+  <name>searcher.response.maxage</name>
+  <value>86400</value>
+  <description>
+  The maxage of a response in seconds. Used in caching headers.
+  </description>
+</property>
+
+<property>
+  <name>searcher.response.prettyprint</name>
+  <value>true</value>
+  <description>
+  Should the response output be pretty printed.  Setting to true enables better
+  debugging, false removes unneeded spaces and gives better throughput.
+  </description>
+</property>
+
 </configuration>

Added: lucene/nutch/trunk/lib/commons-beanutils-1.8.0.jar
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/commons-beanutils-1.8.0.jar?rev=730845&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/lib/commons-beanutils-1.8.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/lib/commons-collections-3.2.1.jar
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/commons-collections-3.2.1.jar?rev=730845&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/lib/commons-collections-3.2.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: 
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/RequestUtils.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/RequestUtils.java?rev=730845&view=auto
==============================================================================
--- 
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/RequestUtils.java
 (added)
+++ 
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/RequestUtils.java
 Fri Jan  2 13:38:58 2009
@@ -0,0 +1,72 @@
+package org.apache.nutch.searcher.response;
+
+import javax.servlet.http.HttpServletRequest;
+
+import org.apache.commons.lang.StringUtils;
+
+/**
+ * A set of utility methods for getting request paramters.
+ */
+public class RequestUtils {
+
+  public static boolean parameterExists(HttpServletRequest request, String 
param) {
+    String value = request.getParameter(param);
+    return value != null;
+  }
+
+  public static Integer getIntegerParameter(HttpServletRequest request,
+    String param) {
+    if (parameterExists(request, param)) {
+      String value = request.getParameter(param);
+      if (StringUtils.isNotBlank(value) && StringUtils.isNumeric(value)) {
+        return new Integer(value);
+      }
+    }
+    return null;
+  }
+
+  public static Integer getIntegerParameter(HttpServletRequest request,
+    String param, Integer def) {
+    Integer value = getIntegerParameter(request, param);
+    return (value == null) ? def : value;
+  }
+
+  public static String getStringParameter(HttpServletRequest request,
+    String param) {
+    if (parameterExists(request, param)) {
+      return request.getParameter(param);
+    }
+    return null;
+  }
+
+  public static String getStringParameter(HttpServletRequest request,
+    String param, String def) {
+    String value = getStringParameter(request, param);
+    return (value == null) ? def : value;
+  }
+
+  public static Boolean getBooleanParameter(HttpServletRequest request,
+    String param) {
+    if (parameterExists(request, param)) {
+      String value = request.getParameter(param);
+      if (StringUtils.isNotBlank(value)
+        && (StringUtils.equals(param, "1")
+          || StringUtils.equalsIgnoreCase(param, "true") || 
StringUtils.equalsIgnoreCase(
+          param, "yes"))) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  public static Boolean getBooleanParameter(HttpServletRequest request,
+    String param, Boolean def) {
+    if (parameterExists(request, param)) {
+      String value = request.getParameter(param);
+      return (StringUtils.isNotBlank(value) && (StringUtils.equals(param, "1")
+        || StringUtils.equalsIgnoreCase(param, "true") || 
StringUtils.equalsIgnoreCase(
+        param, "yes")));
+    }
+    return def;
+  }
+}

Added: 
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriter.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriter.java?rev=730845&view=auto
==============================================================================
--- 
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriter.java
 (added)
+++ 
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriter.java
 Fri Jan  2 13:38:58 2009
@@ -0,0 +1,43 @@
+package org.apache.nutch.searcher.response;
+
+import java.io.IOException;
+
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.nutch.plugin.Pluggable;
+
+/**
+ * Nutch extension point which allow writing search results in many different
+ * output formats.
+ */
+public interface ResponseWriter
+  extends Pluggable, Configurable {
+
+  public final static String X_POINT_ID = ResponseWriter.class.getName();
+  
+  /**
+   * Sets the returned content MIME type.  Populated though variables set in
+   * the plugin.xml file of the ResponseWriter.  This allows easily changing
+   * output content types, for example for JSON from text/plain during tesing
+   * and debugging to application/json in production.
+   * 
+   * @param contentType The MIME content type to set.
+   */
+  public void setContentType(String contentType);
+
+  /**
+   * Writes out the search results response to the HttpServletResponse.
+   * 
+   * @param results The SearchResults object containing hits and other info.
+   * @param request The HttpServletRequest object.
+   * @param response The HttpServletResponse object.
+   * 
+   * @throws IOException If an error occurs while writing out the response.
+   */
+  public void writeResponse(SearchResults results, HttpServletRequest request,
+    HttpServletResponse response)
+    throws IOException;
+
+}

Added: 
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriters.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriters.java?rev=730845&view=auto
==============================================================================
--- 
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriters.java
 (added)
+++ 
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriters.java
 Fri Jan  2 13:38:58 2009
@@ -0,0 +1,90 @@
+package org.apache.nutch.searcher.response;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.plugin.Extension;
+import org.apache.nutch.plugin.ExtensionPoint;
+import org.apache.nutch.plugin.PluginRepository;
+import org.apache.nutch.plugin.PluginRuntimeException;
+import org.apache.nutch.util.ObjectCache;
+
+/**
+ * Utility class for getting all ResponseWriter implementations and for
+ * returning the correct ResponseWriter for a given request type.
+ */
+public class ResponseWriters {
+
+  private Map<String, ResponseWriter> responseWriters;
+
+  /**
+   * Constructor that configures the cache of ResponseWriter objects.
+   * 
+   * @param conf The Nutch configuration object.
+   */
+  public ResponseWriters(Configuration conf) {
+
+    // get the cache and the cache key
+    String cacheKey = ResponseWriter.class.getName();
+    ObjectCache objectCache = ObjectCache.get(conf);
+    this.responseWriters = (Map<String, 
ResponseWriter>)objectCache.getObject(cacheKey);
+
+    // if already populated do nothing
+    if (this.responseWriters == null) {
+
+      try {
+
+        // get the extension point and all ResponseWriter extensions
+        ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+          ResponseWriter.X_POINT_ID);
+        if (point == null) {
+          throw new RuntimeException(ResponseWriter.X_POINT_ID + " not 
found.");
+        }
+
+        // populate content type on the ResponseWriter classes, each response
+        // writer can handle more than one response type
+        Extension[] extensions = point.getExtensions();
+        Map<String, ResponseWriter> writers = new HashMap<String, 
ResponseWriter>();
+        for (int i = 0; i < extensions.length; i++) {
+          Extension extension = extensions[i];
+          ResponseWriter writer = 
(ResponseWriter)extension.getExtensionInstance();
+          String[] responseTypes = 
extension.getAttribute("responseType").split(
+            ",");
+          String contentType = extension.getAttribute("contentType");
+          writer.setContentType(contentType);
+          for (int k = 0; k < responseTypes.length; k++) {
+            writers.put(responseTypes[k], writer);
+          }
+        }
+
+        // set null object if no writers, otherwise set the writers
+        if (writers == null) {
+          objectCache.setObject(cacheKey, new HashMap<String, 
ResponseWriter>());
+        }
+        else {
+          objectCache.setObject(cacheKey, writers);
+        }
+      }
+      catch (PluginRuntimeException e) {
+        throw new RuntimeException(e);
+      }
+
+      // set the response writers map
+      this.responseWriters = (Map<String, 
ResponseWriter>)objectCache.getObject(cacheKey);
+    }
+  }
+
+  /**
+   * Return the correct ResponseWriter object for the response type.
+   * 
+   * @param respType The response type, such as xml or json. Must correspond to
+   * the value set in the plugin.xml file for the ResponseWriter extension.
+   * 
+   * @return The ResponseWriter that handles that response type or null if no
+   * such object exists.
+   */
+  public ResponseWriter getResponseWriter(String respType) {
+    return responseWriters.get(respType);
+  }
+}

Added: 
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchResults.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchResults.java?rev=730845&view=auto
==============================================================================
--- 
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchResults.java
 (added)
+++ 
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchResults.java
 Fri Jan  2 13:38:58 2009
@@ -0,0 +1,140 @@
+package org.apache.nutch.searcher.response;
+
+import org.apache.nutch.searcher.Hit;
+import org.apache.nutch.searcher.HitDetails;
+import org.apache.nutch.searcher.Summary;
+
+public class SearchResults {
+
+  private String[] fields;
+  private String responseType;
+  private String query;
+  private String lang;
+  private String sort;
+  private boolean reverse;
+  private boolean withSummary = true;
+  private int start;
+  private int rows;
+  private int end;
+  private long totalHits;
+  private Hit[] hits;
+  private HitDetails[] details;
+  private Summary[] summaries;
+
+  public SearchResults() {
+
+  }
+
+  public String[] getFields() {
+    return fields;
+  }
+
+  public void setFields(String[] fields) {
+    this.fields = fields;
+  }
+
+  public boolean isWithSummary() {
+    return withSummary;
+  }
+
+  public void setWithSummary(boolean withSummary) {
+    this.withSummary = withSummary;
+  }
+
+  public String getResponseType() {
+    return responseType;
+  }
+
+  public void setResponseType(String responseType) {
+    this.responseType = responseType;
+  }
+
+  public String getQuery() {
+    return query;
+  }
+
+  public void setQuery(String query) {
+    this.query = query;
+  }
+
+  public String getLang() {
+    return lang;
+  }
+
+  public void setLang(String lang) {
+    this.lang = lang;
+  }
+
+  public String getSort() {
+    return sort;
+  }
+
+  public void setSort(String sort) {
+    this.sort = sort;
+  }
+
+  public boolean isReverse() {
+    return reverse;
+  }
+
+  public void setReverse(boolean reverse) {
+    this.reverse = reverse;
+  }
+
+  public int getStart() {
+    return start;
+  }
+
+  public void setStart(int start) {
+    this.start = start;
+  }
+
+  public int getRows() {
+    return rows;
+  }
+
+  public void setRows(int rows) {
+    this.rows = rows;
+  }
+
+  public int getEnd() {
+    return end;
+  }
+
+  public void setEnd(int end) {
+    this.end = end;
+  }
+
+  public long getTotalHits() {
+    return totalHits;
+  }
+
+  public void setTotalHits(long totalHits) {
+    this.totalHits = totalHits;
+  }
+
+  public Hit[] getHits() {
+    return hits;
+  }
+
+  public void setHits(Hit[] hits) {
+    this.hits = hits;
+  }
+
+  public HitDetails[] getDetails() {
+    return details;
+  }
+
+  public void setDetails(HitDetails[] details) {
+    this.details = details;
+  }
+
+  public Summary[] getSummaries() {
+    return summaries;
+  }
+
+  public void setSummaries(Summary[] summaries) {
+    this.summaries = summaries;
+  }
+
+}

Added: 
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchServlet.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchServlet.java?rev=730845&view=auto
==============================================================================
--- 
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchServlet.java
 (added)
+++ 
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchServlet.java
 Fri Jan  2 13:38:58 2009
@@ -0,0 +1,196 @@
+package org.apache.nutch.searcher.response;
+
+import java.io.IOException;
+
+import javax.servlet.ServletConfig;
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.searcher.Hit;
+import org.apache.nutch.searcher.HitDetails;
+import org.apache.nutch.searcher.Hits;
+import org.apache.nutch.searcher.NutchBean;
+import org.apache.nutch.searcher.Query;
+import org.apache.nutch.searcher.Summary;
+import org.apache.nutch.util.NutchConfiguration;
+
+/**
+ * Servlet that allows returning search results in multiple different formats
+ * through a ResponseWriter Nutch extension point.
+ * 
+ * @see org.apache.nutch.searcher.response.ResponseWriter
+ */
+public class SearchServlet
+  extends HttpServlet {
+
+  public static final Log LOG = LogFactory.getLog(SearchServlet.class);
+  private NutchBean bean;
+  private Configuration conf;
+  private ResponseWriters writers;
+
+  private String defaultRespType = "xml";
+  private String defaultLang = null;
+  private int defaultNumRows = 10;
+  private String defaultDedupField = "site";
+  private int defaultNumDupes = 1;
+
+  public static final String RESPONSE_TYPE = "rt";
+  public static final String QUERY = "query";
+  public static final String LANG = "lang";
+  public static final String START = "start";
+  public static final String ROWS = "rows";
+  public static final String SORT = "sort";
+  public static final String REVERSE = "reverse";
+  public static final String DEDUPE = "ddf";
+  public static final String NUM_DUPES = "dupes";
+  public static final String SUMMARY = "summary";
+  public static final String FIELDS = "field";
+
+  /**
+   * Initializes servlet configuration default values.  Gets NutchBean and 
+   * ResponseWriters.
+   */
+  public void init(ServletConfig config)
+    throws ServletException {
+
+    // set sensible defaults for response writer values and cache NutchBean.
+    // Also get and cache all ResponseWriter implementations.
+    super.init(config);
+    try {
+      this.conf = NutchConfiguration.get(config.getServletContext());
+      this.defaultRespType = conf.get("search.response.default.type", "xml");
+      this.defaultLang = conf.get("search.response.default.lang");
+      this.defaultNumRows = conf.getInt("search.response.default.numrows", 10);
+      this.defaultDedupField = conf.get("search.response.default.dedupfield",
+        "site");
+      this.defaultNumDupes = conf.getInt("search.response.default.numdupes", 
1);
+      bean = NutchBean.get(config.getServletContext(), this.conf);
+      writers = new ResponseWriters(conf);
+    }
+    catch (IOException e) {
+      throw new ServletException(e);
+    }
+  }
+
+  /**
+   * Forwards all responses to doGet.
+   */
+  protected void doPost(HttpServletRequest request, HttpServletResponse 
response)
+    throws ServletException, IOException {
+    doGet(request, response);
+  }
+
+  /**
+   * Handles all search requests.  Gets parameter input.  Does the search and 
+   * gets Hits, details, and summaries.  Passes off to ResponseWriter classes
+   * to writer different output formats directly to HttpServletResponse.
+   */
+  protected void doGet(HttpServletRequest request, HttpServletResponse 
response)
+    throws ServletException, IOException {
+
+    if (NutchBean.LOG.isInfoEnabled()) {
+      NutchBean.LOG.info("Query request from " + request.getRemoteAddr());
+    }
+
+    // get the response type, used to call the correct ResponseWriter
+    String respType = RequestUtils.getStringParameter(request, RESPONSE_TYPE,
+      defaultRespType);
+    ResponseWriter writer = writers.getResponseWriter(respType);
+    if (writer == null) {
+      throw new IOException("Unknown response type " + respType);
+    }
+
+    // get the query
+    String query = RequestUtils.getStringParameter(request, QUERY);
+    if (StringUtils.isBlank(query)) {
+      throw new IOException("Query cannot be empty!");
+    }
+    
+    // get the language from parameter, then request, then finally 
configuration
+    String lang = RequestUtils.getStringParameter(request, LANG);
+    if (StringUtils.isBlank(lang)) {
+      lang = request.getLocale().getLanguage();
+      if (StringUtils.isBlank(lang)) {
+        lang = defaultLang;
+      }
+    }
+
+    // get various other search parameters, fields allows only returning a 
+    // given set of fields
+    boolean withSummary = RequestUtils.getBooleanParameter(request, SUMMARY,
+      true);
+    String sort = RequestUtils.getStringParameter(request, SORT);
+    int start = RequestUtils.getIntegerParameter(request, START, 0);
+    int rows = RequestUtils.getIntegerParameter(request, ROWS, defaultNumRows);
+    boolean reverse = RequestUtils.getBooleanParameter(request, REVERSE, 
false);
+    String dedup = RequestUtils.getStringParameter(request, DEDUPE,
+      defaultDedupField);
+    int numDupes = RequestUtils.getIntegerParameter(request, NUM_DUPES,
+      defaultNumDupes);
+    String[] fields = request.getParameterValues(FIELDS);
+
+    // parse out the query
+    Query queryObj = Query.parse(query, lang, this.conf);
+    if (NutchBean.LOG.isInfoEnabled()) {
+      NutchBean.LOG.info("query: " + query);
+      NutchBean.LOG.info("lang: " + lang);
+    }
+
+    // search and return hits
+    Hits hits;
+    try {
+      hits = bean.search(queryObj, start + rows, numDupes, dedup, sort, 
reverse);
+    }
+    catch (IOException e) {
+      if (NutchBean.LOG.isWarnEnabled()) {
+        NutchBean.LOG.warn("Search Error", e);
+      }
+      hits = new Hits(0, new Hit[0]);
+    }
+
+    // get the total number of hits, the hits to show, and the hit details
+    long totalHits = hits.getTotal();
+    int end = (int)Math.min(hits.getLength(), start + rows);
+    int numHits = (end > start) ? (end - start) : 0;
+    Hit[] show = hits.getHits(start, numHits);
+    HitDetails[] details = bean.getDetails(show);
+
+    // setup the SearchResults object, used in response writing
+    SearchResults results = new SearchResults();
+    results.setResponseType(respType);
+    results.setQuery(query);
+    results.setLang(lang);
+    results.setSort(sort);
+    results.setReverse(reverse);
+    results.setStart(start);
+    results.setRows(rows);
+    results.setEnd(end);
+    results.setTotalHits(totalHits);
+    results.setHits(show);
+    results.setDetails(details);
+
+    // are we returning summaries with results, if not avoid network hit
+    if (withSummary) {
+      Summary[] summaries = bean.getSummary(details, queryObj);
+      results.setSummaries(summaries);
+      results.setWithSummary(true);
+    }
+    else {
+      results.setWithSummary(false);
+    }
+
+    // set return fields if any specified, if not all fields are returned
+    if (fields != null && fields.length > 0) {
+      results.setFields(fields);
+    }
+
+    // call the response writer to write out content to HttpResponse directly
+    writer.writeResponse(results, request, response);
+  }
+}

Modified: lucene/nutch/trunk/src/plugin/build.xml
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/build.xml?rev=730845&r1=730844&r2=730845&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/build.xml (original)
+++ lucene/nutch/trunk/src/plugin/build.xml Fri Jan  2 13:38:58 2009
@@ -68,6 +68,8 @@
      <ant dir="query-site" target="deploy"/>
         <ant dir="query-custom" target="deploy"/>
      <ant dir="query-url" target="deploy"/>
+     <ant dir="response-json" target="deploy"/>
+     <ant dir="response-xml" target="deploy"/>
      <ant dir="scoring-opic" target="deploy"/>
         <ant dir="scoring-link" target="deploy"/>
      <ant dir="summary-basic" target="deploy"/>
@@ -105,7 +107,7 @@
      <ant dir="parse-pdf" target="test"/>
      <ant dir="parse-rss" target="test"/>
      <ant dir="feed" target="test"/>
- <!-- <ant dir="parse-rtf" target="test"/> -->
+     <!-- <ant dir="parse-rtf" target="test"/> -->
      <ant dir="parse-swf" target="test"/>
      <ant dir="parse-zip" target="test"/>
      <ant dir="query-url" target="test"/>
@@ -168,6 +170,8 @@
     <ant dir="query-site" target="clean"/>
     <ant dir="query-url" target="clean"/>
        <ant dir="query-custom" target="clean"/>
+    <ant dir="response-json" target="clean"/>
+    <ant dir="response-xml" target="clean"/>
     <ant dir="scoring-opic" target="clean"/>
        <ant dir="scoring-link" target="clean"/>
     <ant dir="subcollection" target="clean"/>

Modified: lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml?rev=730845&r1=730844&r2=730845&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml Fri Jan  2 
13:38:58 2009
@@ -69,6 +69,10 @@
       name="Nutch Analysis"/>
 
 <extension-point
+      id="org.apache.nutch.searcher.response.ResponseWriter"
+      name="Nutch Search Results Response Writer"/>
+      
+<extension-point
       id="org.apache.nutch.searcher.Summarizer"
       name="Nutch Summarizer"/>
 

Added: lucene/nutch/trunk/src/plugin/response-json/build.xml
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-json/build.xml?rev=730845&view=auto
==============================================================================
--- lucene/nutch/trunk/src/plugin/response-json/build.xml (added)
+++ lucene/nutch/trunk/src/plugin/response-json/build.xml Fri Jan  2 13:38:58 
2009
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project name="response-json" default="jar-core">
+
+  <import file="../build-plugin.xml"/>
+
+</project>

Added: lucene/nutch/trunk/src/plugin/response-json/lib/ezmorph-1.0.6.jar
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-json/lib/ezmorph-1.0.6.jar?rev=730845&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/response-json/lib/ezmorph-1.0.6.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/response-json/lib/json-lib-2.2.2-jdk15.jar
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-json/lib/json-lib-2.2.2-jdk15.jar?rev=730845&view=auto
==============================================================================
Binary file - no diff available.

Propchange: 
lucene/nutch/trunk/src/plugin/response-json/lib/json-lib-2.2.2-jdk15.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/response-json/plugin.xml
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-json/plugin.xml?rev=730845&view=auto
==============================================================================
--- lucene/nutch/trunk/src/plugin/response-json/plugin.xml (added)
+++ lucene/nutch/trunk/src/plugin/response-json/plugin.xml Fri Jan  2 13:38:58 
2009
@@ -0,0 +1,62 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!--
+ ! JSON-lib is a java library for transforming beans, maps, collections, java 
+ ! arrays and XML to JSON and back again.  EZMorph is one of its dependencies.
+ ! Both are licensed under the apache license.
+ ! 
+ ! JSON-lib Project: http://json-lib.sourceforge.net/index.html 
+ ! JSON-lib Download: 
http://sourceforge.net/project/showfiles.php?group_id=171425
+ ! License: http://json-lib.sourceforge.net/license.html
+ !
+ ! EZMorph Project: http://ezmorph.sourceforge.net/license.html
+ ! EZMorph Download: 
http://sourceforge.net/project/showfiles.php?group_id=174866
+ ! License: http://ezmorph.sourceforge.net/license.html
+ !-->
+<plugin
+   id="response-json"
+   name="JSON Response Writer Plug-in"
+   version="1.0.0"
+   provider-name="nutch.org">
+
+   <runtime>
+      <library name="response-json.jar">
+         <export name="*"/>
+      </library>
+      <library name="ezmorph-1.0.6.jar"/>
+      <library name="json-lib-2.2.2-jdk15.jar"/>
+   </runtime>
+
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
+   <extension id="org.apache.nutch.searcher.response"
+              name="ResponseWriter"
+              point="org.apache.nutch.searcher.response.ResponseWriter">
+
+      <implementation 
id="org.apache.nutch.searcher.response.json.JSONResponseWriter"
+        class="org.apache.nutch.searcher.response.json.JSONResponseWriter">
+        <parameter name="responseType" value="json"/>
+        <!--<parameter name="contentType" value="application/json"/>-->
+        <parameter name="contentType" value="text/plain"/>
+      </implementation>
+
+   </extension>
+
+</plugin>

Added: 
lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/json/JSONResponseWriter.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/json/JSONResponseWriter.java?rev=730845&view=auto
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/json/JSONResponseWriter.java
 (added)
+++ 
lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/json/JSONResponseWriter.java
 Fri Jan  2 13:38:58 2009
@@ -0,0 +1,141 @@
+package org.apache.nutch.searcher.response.json;
+
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.HashSet;
+import java.util.Set;
+
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import net.sf.json.JSONArray;
+import net.sf.json.JSONObject;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.searcher.Hit;
+import org.apache.nutch.searcher.HitDetails;
+import org.apache.nutch.searcher.Summary;
+import org.apache.nutch.searcher.response.RequestUtils;
+import org.apache.nutch.searcher.response.ResponseWriter;
+import org.apache.nutch.searcher.response.SearchResults;
+
+/**
+ * A ResponseWriter implementation that returns search results in JSON format.
+ */
+public class JSONResponseWriter
+  implements ResponseWriter {
+
+  private String contentType = null;
+  private Configuration conf;
+  private int maxAgeInSeconds;
+  private boolean prettyPrint = true;
+
+  public void setContentType(String contentType) {
+    this.contentType = contentType;
+  }
+
+  public Configuration getConf() {
+    return conf;
+  }
+
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+    this.maxAgeInSeconds = conf.getInt("searcher.response.maxage", 86400);
+    this.prettyPrint = conf.getBoolean("searcher.response.prettyprint", true);
+  }
+
+  public void writeResponse(SearchResults results, HttpServletRequest request,
+    HttpServletResponse response)
+    throws IOException {
+
+    // the function name, if any wrapping the JSON output
+    String func = RequestUtils.getStringParameter(request, "func");
+
+    // create the JSON object and add common values
+    JSONObject jsonObj = new JSONObject();
+    jsonObj.accumulate("query", results.getQuery());
+    jsonObj.accumulate("lang", results.getLang());
+    jsonObj.accumulate("sort", results.getSort());
+    jsonObj.accumulate("reverse", results.isReverse());
+    jsonObj.accumulate("start", results.getStart());
+    jsonObj.accumulate("end", results.getEnd());
+    jsonObj.accumulate("rows", results.getRows());
+    jsonObj.accumulate("totalhits", results.getTotalHits());
+    jsonObj.accumulate("withSummary", results.isWithSummary());
+
+    String[] searchFields = results.getFields();
+    Set<String> fieldSet = new HashSet<String>();
+    if (searchFields != null && searchFields.length > 0) {
+      jsonObj.accumulate("fields", StringUtils.join(searchFields, ","));
+      for (int i = 0; i < searchFields.length; i++) {
+        fieldSet.add(searchFields[i]);
+      }
+    }
+
+    // add the documents from search hits
+    JSONArray docsAr = new JSONArray();
+    HitDetails[] details = results.getDetails();
+    Hit[] hits = results.getHits();
+    Summary[] summaries = results.getSummaries();
+    for (int i = 0; i < details.length; i++) {
+      
+      // every document has an indexno and an indexdocno
+      JSONObject result = new JSONObject();
+      HitDetails detail = details[i];
+      Hit hit = hits[i];
+      result.accumulate("indexno", hit.getIndexNo());
+      result.accumulate("indexdocno", hit.getIndexDocNo());
+      
+      // don't add summaries not including summaries
+      if (summaries != null && results.isWithSummary()) {
+        Summary summary = summaries[i];
+        result.accumulate("summary", summary.toString());
+      }
+      
+      // add the fields from hit details
+      JSONObject fields = new JSONObject();
+      for (int k = 0; k < detail.getLength(); k++) {
+        String name = detail.getField(k);
+        String[] values = detail.getValues(name);
+        
+        // if we specified fields to return, only return those fields
+        if (fieldSet.size() == 0 || fieldSet.contains(name)) {
+          JSONArray valuesAr = new JSONArray();
+          for (int m = 0; m < values.length; m++) {
+            valuesAr.add(values[m]);
+          }
+          fields.accumulate(name, valuesAr);
+        }
+      }
+      result.accumulate("fields", fields);
+      docsAr.add(result);
+    }
+
+    jsonObj.accumulate("documents", docsAr);
+    
+    // pretty printing can be set through configuration, write out the wrapper
+    // function if there is one
+    StringBuilder builder = new StringBuilder();
+    if (StringUtils.isNotBlank(func)) {
+      builder.append(func + "(");
+    }    
+    builder.append(prettyPrint ? jsonObj.toString(2) : jsonObj.toString());
+    if (StringUtils.isNotBlank(func)) {
+      builder.append(")");
+    }
+
+    // Cache control headers
+    SimpleDateFormat sdf = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss 
'GMT'");
+    long relExpiresInMillis = System.currentTimeMillis()
+      + (1000 * maxAgeInSeconds);
+    response.setContentType(contentType);
+    response.setHeader("Cache-Control", "max-age=" + maxAgeInSeconds);
+    response.setHeader("Expires", sdf.format(relExpiresInMillis));
+    
+    // write out the content to the response
+    response.getOutputStream().write(builder.toString().getBytes());
+    response.flushBuffer();
+  }
+
+}

Added: lucene/nutch/trunk/src/plugin/response-xml/build.xml
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-xml/build.xml?rev=730845&view=auto
==============================================================================
--- lucene/nutch/trunk/src/plugin/response-xml/build.xml (added)
+++ lucene/nutch/trunk/src/plugin/response-xml/build.xml Fri Jan  2 13:38:58 
2009
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project name="response-xml" default="jar-core">
+
+  <import file="../build-plugin.xml"/>
+
+</project>

Added: lucene/nutch/trunk/src/plugin/response-xml/plugin.xml
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-xml/plugin.xml?rev=730845&view=auto
==============================================================================
--- lucene/nutch/trunk/src/plugin/response-xml/plugin.xml (added)
+++ lucene/nutch/trunk/src/plugin/response-xml/plugin.xml Fri Jan  2 13:38:58 
2009
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<plugin
+   id="response-xml"
+   name="XML Response Writer Plug-in"
+   version="1.0.0"
+   provider-name="nutch.org">
+
+   <runtime>
+      <library name="response-xml.jar">
+         <export name="*"/>
+      </library>    
+   </runtime>
+
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
+   <extension id="org.apache.nutch.searcher.response"
+              name="ResponseWriter"
+              point="org.apache.nutch.searcher.response.ResponseWriter">
+
+      <implementation 
id="org.apache.nutch.searcher.response.xml.XMLResponseWriter"
+        class="org.apache.nutch.searcher.response.xml.XMLResponseWriter">
+        <parameter name="responseType" value="xml"/>
+        <parameter name="contentType" value="text/xml"/>
+      </implementation>
+
+   </extension>
+
+</plugin>

Added: 
lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/xml/XMLResponseWriter.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/xml/XMLResponseWriter.java?rev=730845&view=auto
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/xml/XMLResponseWriter.java
 (added)
+++ 
lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/xml/XMLResponseWriter.java
 Fri Jan  2 13:38:58 2009
@@ -0,0 +1,267 @@
+package org.apache.nutch.searcher.response.xml;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.HashSet;
+import java.util.Set;
+
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.html.Entities;
+import org.apache.nutch.searcher.Hit;
+import org.apache.nutch.searcher.HitDetails;
+import org.apache.nutch.searcher.Summary;
+import org.apache.nutch.searcher.response.ResponseWriter;
+import org.apache.nutch.searcher.response.SearchResults;
+import org.w3c.dom.Attr;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+
+/**
+ * A ResponseWriter implementation that returns search results in XML format.
+ */
+public class XMLResponseWriter
+  implements ResponseWriter {
+
+  private String contentType = null;
+  private Configuration conf;
+  private int maxAgeInSeconds;
+  private boolean prettyPrint;
+
+  /**
+   * Creates and returns a new node within the XML document.
+   * 
+   * @param doc The XML document.
+   * @param parent The parent Node.
+   * @param name The name of the new node.
+   * 
+   * @return The newly created node Element.
+   */
+  private static Element addNode(Document doc, Node parent, String name) {
+    Element child = doc.createElement(name);
+    parent.appendChild(child);
+    return child;
+  }
+
+  /**
+   * Creates and returns a new node within the XML document.  The node contains
+   * the text supplied as a child node.
+   * 
+   * @param doc The XML document.
+   * @param parent The parent Node.
+   * @param name The name of the new node.
+   * @param text A text string to append as a child node.
+   * 
+   * @return The newly created node Element.
+   */
+  private static void addNode(Document doc, Node parent, String name,
+    String text) {
+    Element child = doc.createElement(name);
+    child.appendChild(doc.createTextNode(getLegalXml(text)));
+    parent.appendChild(child);
+  }
+
+  /**
+   * Adds an attribute name and value to a node Element in the XML document.
+   * 
+   * @param doc The XML document.
+   * @param node The node Element on which to attach the attribute.
+   * @param name The name of the attribute.
+   * @param value The value of the attribute.
+   */
+  private static void addAttribute(Document doc, Element node, String name,
+    String value) {
+    Attr attribute = doc.createAttribute(name);
+    attribute.setValue(getLegalXml(value));
+    node.getAttributes().setNamedItem(attribute);
+  }
+
+  /**
+   * Transforms and returns the text string as legal XML text.
+   * 
+   * @param text The text to transform.
+   * 
+   * @return The text string in the form of legal XML text.
+   */
+  protected static String getLegalXml(String text) {
+    
+    if (text == null) {
+      return null;
+    }
+    StringBuffer buffer = null;
+    for (int i = 0; i < text.length(); i++) {
+      char c = text.charAt(i);
+      if (!isLegalXml(c)) {
+        if (buffer == null) {
+          buffer = new StringBuffer(text.length());
+          buffer.append(text.substring(0, i));
+        }
+      }
+      else {
+        if (buffer != null) {
+          buffer.append(c);
+        }
+      }
+    }
+    return (buffer != null) ? buffer.toString() : text;
+  }
+
+  /**
+   * Determines if the character is a legal XML character.
+   * 
+   * @param c The character to check.
+   * 
+   * @return True if the character is legal xml, false otherwise.
+   */
+  private static boolean isLegalXml(final char c) {
+    return c == 0x9 || c == 0xa || c == 0xd || (c >= 0x20 && c <= 0xd7ff)
+      || (c >= 0xe000 && c <= 0xfffd) || (c >= 0x10000 && c <= 0x10ffff);
+  }
+
+  public void setContentType(String contentType) {
+    this.contentType = contentType;
+  }
+
+  public Configuration getConf() {
+    return conf;
+  }
+
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+    this.maxAgeInSeconds = conf.getInt("searcher.response.maxage", 86400);
+    this.prettyPrint = conf.getBoolean("searcher.response.prettyprint", true);
+  }
+
+  public void writeResponse(SearchResults results, HttpServletRequest request,
+    HttpServletResponse response)
+    throws IOException {
+
+    try {
+      
+      // create the xml document and add the results and search nodes
+      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+      Document xmldoc = factory.newDocumentBuilder().newDocument();
+      Element resEl = addNode(xmldoc, xmldoc, "results");
+      Element searchEl = addNode(xmldoc, resEl, "search");
+      
+      // add common nodes
+      String query = results.getQuery();
+      addNode(xmldoc, searchEl, "query", query);
+      addNode(xmldoc, searchEl, "totalhits",
+        String.valueOf(results.getTotalHits()));
+      String lang = results.getLang();
+      if (lang != null) {
+        addNode(xmldoc, searchEl, "lang", lang);
+      }
+      String sort = results.getSort();
+      if (sort != null) {
+        addNode(xmldoc, searchEl, "sort", sort);
+      }
+      addNode(xmldoc, searchEl, "reverse", results.isReverse() ? "true"
+        : "false");
+      addNode(xmldoc, searchEl, "start", String.valueOf(results.getStart()));
+      addNode(xmldoc, searchEl, "end", String.valueOf(results.getEnd()));
+      addNode(xmldoc, searchEl, "rows", String.valueOf(results.getRows()));
+      addNode(xmldoc, searchEl, "totalhits",
+        String.valueOf(results.getTotalHits()));
+      addNode(xmldoc, searchEl, "withSummary",
+        String.valueOf(results.isWithSummary()));
+
+      String[] searchFields = results.getFields();
+      Set<String> fieldSet = new HashSet<String>();
+      if (searchFields != null && searchFields.length > 0) {
+        addNode(xmldoc, searchEl, "fields", StringUtils.join(searchFields, 
","));
+        for (int i = 0; i < searchFields.length; i++) {
+          fieldSet.add(searchFields[i]);
+        }
+      }
+
+      // add documents
+      Element documents = addNode(xmldoc, resEl, "documents");
+      HitDetails[] details = results.getDetails();
+      Hit[] hits = results.getHits();
+      Summary[] summaries = results.getSummaries();
+      for (int i = 0; i < details.length; i++) {
+
+        // every document has an indexno and an indexdocno
+        Element document = addNode(xmldoc, documents, "document");
+        addAttribute(xmldoc, document, "indexno",
+          String.valueOf(hits[i].getIndexNo()));
+        addAttribute(xmldoc, document, "indexdocno",
+          String.valueOf(hits[i].getIndexDocNo()));
+        
+        // don't add summaries not including summaries
+        if (summaries != null && results.isWithSummary()) {
+          String encSumm = Entities.encode(summaries[i].toString());
+          addNode(xmldoc, document, "summary", encSumm);
+        }
+
+        // add the fields from hit details
+        Element fields = addNode(xmldoc, document, "fields");
+        HitDetails detail = details[i];
+        for (int j = 0; j < detail.getLength(); j++) {
+          String fieldName = detail.getField(j);
+          String[] fieldValues = detail.getValues(fieldName);
+          
+          // if we specified fields to return, only return those fields
+          if (fieldSet.size() == 0 || fieldSet.contains(fieldName)) {
+            Element field = addNode(xmldoc, fields, "field");
+            addAttribute(xmldoc, field, "name", fieldName);
+            for (int k = 0; k < fieldValues.length; k++) {
+              String encFieldVal = Entities.encode(fieldValues[k]);
+              addNode(xmldoc, field, "value", encFieldVal);
+            }
+          }
+        }
+      }
+
+      // get the xml source and a transformer to print it out
+      DOMSource source = new DOMSource(xmldoc);
+      TransformerFactory transFactory = TransformerFactory.newInstance();
+      Transformer transformer = transFactory.newTransformer();
+      
+      // pretty printing can be set through configuration
+      if (prettyPrint) {
+        transformer.setOutputProperty("indent", "yes");
+        transformer.setOutputProperty(OutputKeys.INDENT, "yes");
+        transformer.setOutputProperty(
+          "{http://xml.apache.org/xslt}indent-amount";, "2");
+      }
+      
+      // write out the content to a byte array
+      ByteArrayOutputStream baos = new ByteArrayOutputStream();
+      StreamResult result = new StreamResult(baos);
+      transformer.transform(source, result);
+      baos.flush();
+      baos.close();
+
+      // cache control headers
+      SimpleDateFormat sdf = new SimpleDateFormat(
+        "E, d MMM yyyy HH:mm:ss 'GMT'");
+      long relExpiresInMillis = System.currentTimeMillis()
+        + (1000 * maxAgeInSeconds);
+      response.setContentType(contentType);
+      response.setHeader("Cache-Control", "max-age=" + maxAgeInSeconds);
+      response.setHeader("Expires", sdf.format(relExpiresInMillis));
+      
+      // write out the content to the response
+      response.getOutputStream().write(baos.toByteArray());
+      response.flushBuffer();
+    }
+    catch (Exception e) {
+      throw new IOException(e);
+    }
+
+  }
+}

Modified: lucene/nutch/trunk/src/web/web.xml
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/web/web.xml?rev=730845&r1=730844&r2=730845&view=diff
==============================================================================
--- lucene/nutch/trunk/src/web/web.xml (original)
+++ lucene/nutch/trunk/src/web/web.xml Fri Jan  2 13:38:58 2009
@@ -36,6 +36,11 @@
   <servlet-class>org.apache.nutch.searcher.OpenSearchServlet</servlet-class>
 </servlet>
 
+<servlet>
+  <servlet-name>SearchServlet</servlet-name>
+  
<servlet-class>org.apache.nutch.searcher.response.SearchServlet</servlet-class>
+</servlet>
+
 <servlet-mapping>
   <servlet-name>Cached</servlet-name>
   <url-pattern>/servlet/cached</url-pattern>
@@ -46,6 +51,11 @@
   <url-pattern>/opensearch</url-pattern>
 </servlet-mapping>
 
+<servlet-mapping>
+  <servlet-name>SearchServlet</servlet-name>
+  <url-pattern>/search</url-pattern>
+</servlet-mapping>
+
 <welcome-file-list>
   <welcome-file>search.html</welcome-file>
   <welcome-file>index.html</welcome-file>

svn commit: r730845 - in /lucene/nutch/trunk: ./ conf/ lib/ src/java/org/apache/nutch/searcher/response/ src/plugin/ src/plugin/nutch-extensionpoints/ src/plugin/response-json/ src/plugin/response-json/lib/ src/plugin/response-json/src/ src/plugin/resp...

Reply via email to