Author: kubes Date: Fri Jan 2 13:38:58 2009 New Revision: 730845 URL: http://svn.apache.org/viewvc?rev=730845&view=rev Log: NUTCH-594: Serve Nutch search results in multiple formats including XML and JSON.
Added: lucene/nutch/trunk/lib/commons-beanutils-1.8.0.jar (with props) lucene/nutch/trunk/lib/commons-collections-3.2.1.jar (with props) lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/RequestUtils.java lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriter.java lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriters.java lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchResults.java lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchServlet.java lucene/nutch/trunk/src/plugin/response-json/ lucene/nutch/trunk/src/plugin/response-json/build.xml lucene/nutch/trunk/src/plugin/response-json/lib/ lucene/nutch/trunk/src/plugin/response-json/lib/ezmorph-1.0.6.jar (with props) lucene/nutch/trunk/src/plugin/response-json/lib/json-lib-2.2.2-jdk15.jar (with props) lucene/nutch/trunk/src/plugin/response-json/plugin.xml lucene/nutch/trunk/src/plugin/response-json/src/ lucene/nutch/trunk/src/plugin/response-json/src/java/ lucene/nutch/trunk/src/plugin/response-json/src/java/org/ lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/ lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/ lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/ lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/ lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/json/ lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/json/JSONResponseWriter.java lucene/nutch/trunk/src/plugin/response-xml/ lucene/nutch/trunk/src/plugin/response-xml/build.xml lucene/nutch/trunk/src/plugin/response-xml/plugin.xml lucene/nutch/trunk/src/plugin/response-xml/src/ lucene/nutch/trunk/src/plugin/response-xml/src/java/ lucene/nutch/trunk/src/plugin/response-xml/src/java/org/ lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/ lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/ lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/ lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/ lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/xml/ lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/xml/XMLResponseWriter.java Modified: lucene/nutch/trunk/CHANGES.txt lucene/nutch/trunk/build.xml lucene/nutch/trunk/conf/nutch-default.xml lucene/nutch/trunk/src/plugin/build.xml lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml lucene/nutch/trunk/src/web/web.xml Modified: lucene/nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=730845&r1=730844&r2=730845&view=diff ============================================================================== --- lucene/nutch/trunk/CHANGES.txt (original) +++ lucene/nutch/trunk/CHANGES.txt Fri Jan 2 13:38:58 2009 @@ -300,6 +300,9 @@ 111. NUTCH-646 - New Indexing Framework for Nutch. (kubes) 112. NUTCH-668 - Domain URL Filter. (kubes) + +113. NUTCH-594 - Serve Nutch search results in multiple formats including + XML and JSON. (kubes) Release 0.9 - 2007-04-02 Modified: lucene/nutch/trunk/build.xml URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/build.xml?rev=730845&r1=730844&r2=730845&view=diff ============================================================================== --- lucene/nutch/trunk/build.xml (original) +++ lucene/nutch/trunk/build.xml Fri Jan 2 13:38:58 2009 @@ -172,32 +172,34 @@ <outputproperty name="indent" value="yes"/> </xslt> <war destfile="${build.dir}/${final.name}.war" - webxml="${web.src.dir}/web.xml"> + webxml="${web.src.dir}/web.xml"> <fileset dir="${web.src.dir}/jsp"/> <zipfileset dir="${docs.src}" includes="include/*.html"/> <zipfileset dir="${build.docs}" includes="*/include/*.html"/> <fileset dir="${docs.dir}"/> <lib dir="${lib.dir}"> - <include name="lucene*.jar"/> - <include name="taglibs-*.jar"/> - <include name="hadoop-*.jar"/> - <include name="dom4j-*.jar"/> - <include name="xerces-*.jar"/> - <include name="tika-*.jar"/> + <include name="lucene*.jar"/> + <include name="taglibs-*.jar"/> + <include name="hadoop-*.jar"/> + <include name="dom4j-*.jar"/> + <include name="xerces-*.jar"/> + <include name="tika-*.jar"/> + <include name="commons-collections-*.jar"/> + <include name="commons-beanutils-*.jar"/> <include name="commons-cli-*.jar"/> <include name="commons-lang-*.jar"/> <include name="commons-logging-*.jar"/> <include name="log4j-*.jar"/> </lib> <lib dir="${build.dir}"> - <include name="${final.name}.jar"/> + <include name="${final.name}.jar"/> </lib> <classes dir="${conf.dir}" excludes="**/*.template"/> <classes dir="${web.src.dir}/locale"/> <classes file="${web.src.dir}/log4j.properties"/> <zipfileset prefix="WEB-INF/classes/plugins" dir="${build.plugins}"/> <webinf dir="${lib.dir}"> - <include name="taglibs-*.tld"/> + <include name="taglibs-*.tld"/> </webinf> </war> </target> Modified: lucene/nutch/trunk/conf/nutch-default.xml URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/conf/nutch-default.xml?rev=730845&r1=730844&r2=730845&view=diff ============================================================================== --- lucene/nutch/trunk/conf/nutch-default.xml (original) +++ lucene/nutch/trunk/conf/nutch-default.xml Fri Jan 2 13:38:58 2009 @@ -886,7 +886,7 @@ <property> <name>plugin.includes</name> - <value>protocol-http|urlfilter-regex|parse-(text|html|js)|index-(basic|anchor)|query-(basic|site|url)|summary-basic|scoring-opic|urlnormalizer-(pass|regex|basic)</value> + <value>protocol-http|urlfilter-regex|parse-(text|html|js)|index-(basic|anchor)|query-(basic|site|url)|response-(json|xml)|summary-basic|scoring-opic|urlnormalizer-(pass|regex|basic)</value> <description>Regular expression naming plugin directory names to include. Any plugin not matching this expression is excluded. In any case you need at least include the nutch-extensionpoints plugin. By @@ -1209,4 +1209,63 @@ </description> </property> +<!-- response writer properties --> + +<property> + <name>search.response.default.type</name> + <value>xml</value> + <description> + The default response type returned if none is specified. + </description> +</property> + +<property> + <name>search.response.default.lang</name> + <value>en</value> + <description> + The default response language if none is specified. + </description> +</property> + +<property> + <name>search.response.default.numrows</name> + <value>10</value> + <description> + The default number of rows to return if none is specified. + </description> +</property> + +<property> + <name>search.response.default.dedupfield</name> + <value>site</value> + <description> + The default dedup field if none is specified. + </description> +</property> + +<property> + <name>search.response.default.numdupes</name> + <value>1</value> + <description> + The default number of duplicates returned if none is specified. + </description> +</property> + +<property> + <name>searcher.response.maxage</name> + <value>86400</value> + <description> + The maxage of a response in seconds. Used in caching headers. + </description> +</property> + +<property> + <name>searcher.response.prettyprint</name> + <value>true</value> + <description> + Should the response output be pretty printed. Setting to true enables better + debugging, false removes unneeded spaces and gives better throughput. + </description> +</property> + </configuration> Added: lucene/nutch/trunk/lib/commons-beanutils-1.8.0.jar URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/commons-beanutils-1.8.0.jar?rev=730845&view=auto ============================================================================== Binary file - no diff available. Propchange: lucene/nutch/trunk/lib/commons-beanutils-1.8.0.jar ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: lucene/nutch/trunk/lib/commons-collections-3.2.1.jar URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/commons-collections-3.2.1.jar?rev=730845&view=auto ============================================================================== Binary file - no diff available. Propchange: lucene/nutch/trunk/lib/commons-collections-3.2.1.jar ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/RequestUtils.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/RequestUtils.java?rev=730845&view=auto ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/RequestUtils.java (added) +++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/RequestUtils.java Fri Jan 2 13:38:58 2009 @@ -0,0 +1,72 @@ +package org.apache.nutch.searcher.response; + +import javax.servlet.http.HttpServletRequest; + +import org.apache.commons.lang.StringUtils; + +/** + * A set of utility methods for getting request paramters. + */ +public class RequestUtils { + + public static boolean parameterExists(HttpServletRequest request, String param) { + String value = request.getParameter(param); + return value != null; + } + + public static Integer getIntegerParameter(HttpServletRequest request, + String param) { + if (parameterExists(request, param)) { + String value = request.getParameter(param); + if (StringUtils.isNotBlank(value) && StringUtils.isNumeric(value)) { + return new Integer(value); + } + } + return null; + } + + public static Integer getIntegerParameter(HttpServletRequest request, + String param, Integer def) { + Integer value = getIntegerParameter(request, param); + return (value == null) ? def : value; + } + + public static String getStringParameter(HttpServletRequest request, + String param) { + if (parameterExists(request, param)) { + return request.getParameter(param); + } + return null; + } + + public static String getStringParameter(HttpServletRequest request, + String param, String def) { + String value = getStringParameter(request, param); + return (value == null) ? def : value; + } + + public static Boolean getBooleanParameter(HttpServletRequest request, + String param) { + if (parameterExists(request, param)) { + String value = request.getParameter(param); + if (StringUtils.isNotBlank(value) + && (StringUtils.equals(param, "1") + || StringUtils.equalsIgnoreCase(param, "true") || StringUtils.equalsIgnoreCase( + param, "yes"))) { + return true; + } + } + return false; + } + + public static Boolean getBooleanParameter(HttpServletRequest request, + String param, Boolean def) { + if (parameterExists(request, param)) { + String value = request.getParameter(param); + return (StringUtils.isNotBlank(value) && (StringUtils.equals(param, "1") + || StringUtils.equalsIgnoreCase(param, "true") || StringUtils.equalsIgnoreCase( + param, "yes"))); + } + return def; + } +} Added: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriter.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriter.java?rev=730845&view=auto ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriter.java (added) +++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriter.java Fri Jan 2 13:38:58 2009 @@ -0,0 +1,43 @@ +package org.apache.nutch.searcher.response; + +import java.io.IOException; + +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.hadoop.conf.Configurable; +import org.apache.nutch.plugin.Pluggable; + +/** + * Nutch extension point which allow writing search results in many different + * output formats. + */ +public interface ResponseWriter + extends Pluggable, Configurable { + + public final static String X_POINT_ID = ResponseWriter.class.getName(); + + /** + * Sets the returned content MIME type. Populated though variables set in + * the plugin.xml file of the ResponseWriter. This allows easily changing + * output content types, for example for JSON from text/plain during tesing + * and debugging to application/json in production. + * + * @param contentType The MIME content type to set. + */ + public void setContentType(String contentType); + + /** + * Writes out the search results response to the HttpServletResponse. + * + * @param results The SearchResults object containing hits and other info. + * @param request The HttpServletRequest object. + * @param response The HttpServletResponse object. + * + * @throws IOException If an error occurs while writing out the response. + */ + public void writeResponse(SearchResults results, HttpServletRequest request, + HttpServletResponse response) + throws IOException; + +} Added: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriters.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriters.java?rev=730845&view=auto ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriters.java (added) +++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/ResponseWriters.java Fri Jan 2 13:38:58 2009 @@ -0,0 +1,90 @@ +package org.apache.nutch.searcher.response; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.nutch.plugin.Extension; +import org.apache.nutch.plugin.ExtensionPoint; +import org.apache.nutch.plugin.PluginRepository; +import org.apache.nutch.plugin.PluginRuntimeException; +import org.apache.nutch.util.ObjectCache; + +/** + * Utility class for getting all ResponseWriter implementations and for + * returning the correct ResponseWriter for a given request type. + */ +public class ResponseWriters { + + private Map<String, ResponseWriter> responseWriters; + + /** + * Constructor that configures the cache of ResponseWriter objects. + * + * @param conf The Nutch configuration object. + */ + public ResponseWriters(Configuration conf) { + + // get the cache and the cache key + String cacheKey = ResponseWriter.class.getName(); + ObjectCache objectCache = ObjectCache.get(conf); + this.responseWriters = (Map<String, ResponseWriter>)objectCache.getObject(cacheKey); + + // if already populated do nothing + if (this.responseWriters == null) { + + try { + + // get the extension point and all ResponseWriter extensions + ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint( + ResponseWriter.X_POINT_ID); + if (point == null) { + throw new RuntimeException(ResponseWriter.X_POINT_ID + " not found."); + } + + // populate content type on the ResponseWriter classes, each response + // writer can handle more than one response type + Extension[] extensions = point.getExtensions(); + Map<String, ResponseWriter> writers = new HashMap<String, ResponseWriter>(); + for (int i = 0; i < extensions.length; i++) { + Extension extension = extensions[i]; + ResponseWriter writer = (ResponseWriter)extension.getExtensionInstance(); + String[] responseTypes = extension.getAttribute("responseType").split( + ","); + String contentType = extension.getAttribute("contentType"); + writer.setContentType(contentType); + for (int k = 0; k < responseTypes.length; k++) { + writers.put(responseTypes[k], writer); + } + } + + // set null object if no writers, otherwise set the writers + if (writers == null) { + objectCache.setObject(cacheKey, new HashMap<String, ResponseWriter>()); + } + else { + objectCache.setObject(cacheKey, writers); + } + } + catch (PluginRuntimeException e) { + throw new RuntimeException(e); + } + + // set the response writers map + this.responseWriters = (Map<String, ResponseWriter>)objectCache.getObject(cacheKey); + } + } + + /** + * Return the correct ResponseWriter object for the response type. + * + * @param respType The response type, such as xml or json. Must correspond to + * the value set in the plugin.xml file for the ResponseWriter extension. + * + * @return The ResponseWriter that handles that response type or null if no + * such object exists. + */ + public ResponseWriter getResponseWriter(String respType) { + return responseWriters.get(respType); + } +} Added: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchResults.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchResults.java?rev=730845&view=auto ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchResults.java (added) +++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchResults.java Fri Jan 2 13:38:58 2009 @@ -0,0 +1,140 @@ +package org.apache.nutch.searcher.response; + +import org.apache.nutch.searcher.Hit; +import org.apache.nutch.searcher.HitDetails; +import org.apache.nutch.searcher.Summary; + +public class SearchResults { + + private String[] fields; + private String responseType; + private String query; + private String lang; + private String sort; + private boolean reverse; + private boolean withSummary = true; + private int start; + private int rows; + private int end; + private long totalHits; + private Hit[] hits; + private HitDetails[] details; + private Summary[] summaries; + + public SearchResults() { + + } + + public String[] getFields() { + return fields; + } + + public void setFields(String[] fields) { + this.fields = fields; + } + + public boolean isWithSummary() { + return withSummary; + } + + public void setWithSummary(boolean withSummary) { + this.withSummary = withSummary; + } + + public String getResponseType() { + return responseType; + } + + public void setResponseType(String responseType) { + this.responseType = responseType; + } + + public String getQuery() { + return query; + } + + public void setQuery(String query) { + this.query = query; + } + + public String getLang() { + return lang; + } + + public void setLang(String lang) { + this.lang = lang; + } + + public String getSort() { + return sort; + } + + public void setSort(String sort) { + this.sort = sort; + } + + public boolean isReverse() { + return reverse; + } + + public void setReverse(boolean reverse) { + this.reverse = reverse; + } + + public int getStart() { + return start; + } + + public void setStart(int start) { + this.start = start; + } + + public int getRows() { + return rows; + } + + public void setRows(int rows) { + this.rows = rows; + } + + public int getEnd() { + return end; + } + + public void setEnd(int end) { + this.end = end; + } + + public long getTotalHits() { + return totalHits; + } + + public void setTotalHits(long totalHits) { + this.totalHits = totalHits; + } + + public Hit[] getHits() { + return hits; + } + + public void setHits(Hit[] hits) { + this.hits = hits; + } + + public HitDetails[] getDetails() { + return details; + } + + public void setDetails(HitDetails[] details) { + this.details = details; + } + + public Summary[] getSummaries() { + return summaries; + } + + public void setSummaries(Summary[] summaries) { + this.summaries = summaries; + } + +} Added: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchServlet.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchServlet.java?rev=730845&view=auto ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchServlet.java (added) +++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/response/SearchServlet.java Fri Jan 2 13:38:58 2009 @@ -0,0 +1,196 @@ +package org.apache.nutch.searcher.response; + +import java.io.IOException; + +import javax.servlet.ServletConfig; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.nutch.searcher.Hit; +import org.apache.nutch.searcher.HitDetails; +import org.apache.nutch.searcher.Hits; +import org.apache.nutch.searcher.NutchBean; +import org.apache.nutch.searcher.Query; +import org.apache.nutch.searcher.Summary; +import org.apache.nutch.util.NutchConfiguration; + +/** + * Servlet that allows returning search results in multiple different formats + * through a ResponseWriter Nutch extension point. + * + * @see org.apache.nutch.searcher.response.ResponseWriter + */ +public class SearchServlet + extends HttpServlet { + + public static final Log LOG = LogFactory.getLog(SearchServlet.class); + private NutchBean bean; + private Configuration conf; + private ResponseWriters writers; + + private String defaultRespType = "xml"; + private String defaultLang = null; + private int defaultNumRows = 10; + private String defaultDedupField = "site"; + private int defaultNumDupes = 1; + + public static final String RESPONSE_TYPE = "rt"; + public static final String QUERY = "query"; + public static final String LANG = "lang"; + public static final String START = "start"; + public static final String ROWS = "rows"; + public static final String SORT = "sort"; + public static final String REVERSE = "reverse"; + public static final String DEDUPE = "ddf"; + public static final String NUM_DUPES = "dupes"; + public static final String SUMMARY = "summary"; + public static final String FIELDS = "field"; + + /** + * Initializes servlet configuration default values. Gets NutchBean and + * ResponseWriters. + */ + public void init(ServletConfig config) + throws ServletException { + + // set sensible defaults for response writer values and cache NutchBean. + // Also get and cache all ResponseWriter implementations. + super.init(config); + try { + this.conf = NutchConfiguration.get(config.getServletContext()); + this.defaultRespType = conf.get("search.response.default.type", "xml"); + this.defaultLang = conf.get("search.response.default.lang"); + this.defaultNumRows = conf.getInt("search.response.default.numrows", 10); + this.defaultDedupField = conf.get("search.response.default.dedupfield", + "site"); + this.defaultNumDupes = conf.getInt("search.response.default.numdupes", 1); + bean = NutchBean.get(config.getServletContext(), this.conf); + writers = new ResponseWriters(conf); + } + catch (IOException e) { + throw new ServletException(e); + } + } + + /** + * Forwards all responses to doGet. + */ + protected void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + doGet(request, response); + } + + /** + * Handles all search requests. Gets parameter input. Does the search and + * gets Hits, details, and summaries. Passes off to ResponseWriter classes + * to writer different output formats directly to HttpServletResponse. + */ + protected void doGet(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + if (NutchBean.LOG.isInfoEnabled()) { + NutchBean.LOG.info("Query request from " + request.getRemoteAddr()); + } + + // get the response type, used to call the correct ResponseWriter + String respType = RequestUtils.getStringParameter(request, RESPONSE_TYPE, + defaultRespType); + ResponseWriter writer = writers.getResponseWriter(respType); + if (writer == null) { + throw new IOException("Unknown response type " + respType); + } + + // get the query + String query = RequestUtils.getStringParameter(request, QUERY); + if (StringUtils.isBlank(query)) { + throw new IOException("Query cannot be empty!"); + } + + // get the language from parameter, then request, then finally configuration + String lang = RequestUtils.getStringParameter(request, LANG); + if (StringUtils.isBlank(lang)) { + lang = request.getLocale().getLanguage(); + if (StringUtils.isBlank(lang)) { + lang = defaultLang; + } + } + + // get various other search parameters, fields allows only returning a + // given set of fields + boolean withSummary = RequestUtils.getBooleanParameter(request, SUMMARY, + true); + String sort = RequestUtils.getStringParameter(request, SORT); + int start = RequestUtils.getIntegerParameter(request, START, 0); + int rows = RequestUtils.getIntegerParameter(request, ROWS, defaultNumRows); + boolean reverse = RequestUtils.getBooleanParameter(request, REVERSE, false); + String dedup = RequestUtils.getStringParameter(request, DEDUPE, + defaultDedupField); + int numDupes = RequestUtils.getIntegerParameter(request, NUM_DUPES, + defaultNumDupes); + String[] fields = request.getParameterValues(FIELDS); + + // parse out the query + Query queryObj = Query.parse(query, lang, this.conf); + if (NutchBean.LOG.isInfoEnabled()) { + NutchBean.LOG.info("query: " + query); + NutchBean.LOG.info("lang: " + lang); + } + + // search and return hits + Hits hits; + try { + hits = bean.search(queryObj, start + rows, numDupes, dedup, sort, reverse); + } + catch (IOException e) { + if (NutchBean.LOG.isWarnEnabled()) { + NutchBean.LOG.warn("Search Error", e); + } + hits = new Hits(0, new Hit[0]); + } + + // get the total number of hits, the hits to show, and the hit details + long totalHits = hits.getTotal(); + int end = (int)Math.min(hits.getLength(), start + rows); + int numHits = (end > start) ? (end - start) : 0; + Hit[] show = hits.getHits(start, numHits); + HitDetails[] details = bean.getDetails(show); + + // setup the SearchResults object, used in response writing + SearchResults results = new SearchResults(); + results.setResponseType(respType); + results.setQuery(query); + results.setLang(lang); + results.setSort(sort); + results.setReverse(reverse); + results.setStart(start); + results.setRows(rows); + results.setEnd(end); + results.setTotalHits(totalHits); + results.setHits(show); + results.setDetails(details); + + // are we returning summaries with results, if not avoid network hit + if (withSummary) { + Summary[] summaries = bean.getSummary(details, queryObj); + results.setSummaries(summaries); + results.setWithSummary(true); + } + else { + results.setWithSummary(false); + } + + // set return fields if any specified, if not all fields are returned + if (fields != null && fields.length > 0) { + results.setFields(fields); + } + + // call the response writer to write out content to HttpResponse directly + writer.writeResponse(results, request, response); + } +} Modified: lucene/nutch/trunk/src/plugin/build.xml URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/build.xml?rev=730845&r1=730844&r2=730845&view=diff ============================================================================== --- lucene/nutch/trunk/src/plugin/build.xml (original) +++ lucene/nutch/trunk/src/plugin/build.xml Fri Jan 2 13:38:58 2009 @@ -68,6 +68,8 @@ <ant dir="query-site" target="deploy"/> <ant dir="query-custom" target="deploy"/> <ant dir="query-url" target="deploy"/> + <ant dir="response-json" target="deploy"/> + <ant dir="response-xml" target="deploy"/> <ant dir="scoring-opic" target="deploy"/> <ant dir="scoring-link" target="deploy"/> <ant dir="summary-basic" target="deploy"/> @@ -105,7 +107,7 @@ <ant dir="parse-pdf" target="test"/> <ant dir="parse-rss" target="test"/> <ant dir="feed" target="test"/> - <!-- <ant dir="parse-rtf" target="test"/> --> + <!-- <ant dir="parse-rtf" target="test"/> --> <ant dir="parse-swf" target="test"/> <ant dir="parse-zip" target="test"/> <ant dir="query-url" target="test"/> @@ -168,6 +170,8 @@ <ant dir="query-site" target="clean"/> <ant dir="query-url" target="clean"/> <ant dir="query-custom" target="clean"/> + <ant dir="response-json" target="clean"/> + <ant dir="response-xml" target="clean"/> <ant dir="scoring-opic" target="clean"/> <ant dir="scoring-link" target="clean"/> <ant dir="subcollection" target="clean"/> Modified: lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml?rev=730845&r1=730844&r2=730845&view=diff ============================================================================== --- lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml (original) +++ lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml Fri Jan 2 13:38:58 2009 @@ -69,6 +69,10 @@ name="Nutch Analysis"/> <extension-point + id="org.apache.nutch.searcher.response.ResponseWriter" + name="Nutch Search Results Response Writer"/> + +<extension-point id="org.apache.nutch.searcher.Summarizer" name="Nutch Summarizer"/> Added: lucene/nutch/trunk/src/plugin/response-json/build.xml URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-json/build.xml?rev=730845&view=auto ============================================================================== --- lucene/nutch/trunk/src/plugin/response-json/build.xml (added) +++ lucene/nutch/trunk/src/plugin/response-json/build.xml Fri Jan 2 13:38:58 2009 @@ -0,0 +1,22 @@ +<?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<project name="response-json" default="jar-core"> + + <import file="../build-plugin.xml"/> + +</project> Added: lucene/nutch/trunk/src/plugin/response-json/lib/ezmorph-1.0.6.jar URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-json/lib/ezmorph-1.0.6.jar?rev=730845&view=auto ============================================================================== Binary file - no diff available. Propchange: lucene/nutch/trunk/src/plugin/response-json/lib/ezmorph-1.0.6.jar ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: lucene/nutch/trunk/src/plugin/response-json/lib/json-lib-2.2.2-jdk15.jar URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-json/lib/json-lib-2.2.2-jdk15.jar?rev=730845&view=auto ============================================================================== Binary file - no diff available. Propchange: lucene/nutch/trunk/src/plugin/response-json/lib/json-lib-2.2.2-jdk15.jar ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: lucene/nutch/trunk/src/plugin/response-json/plugin.xml URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-json/plugin.xml?rev=730845&view=auto ============================================================================== --- lucene/nutch/trunk/src/plugin/response-json/plugin.xml (added) +++ lucene/nutch/trunk/src/plugin/response-json/plugin.xml Fri Jan 2 13:38:58 2009 @@ -0,0 +1,62 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<!-- + ! JSON-lib is a java library for transforming beans, maps, collections, java + ! arrays and XML to JSON and back again. EZMorph is one of its dependencies. + ! Both are licensed under the apache license. + ! + ! JSON-lib Project: http://json-lib.sourceforge.net/index.html + ! JSON-lib Download: http://sourceforge.net/project/showfiles.php?group_id=171425 + ! License: http://json-lib.sourceforge.net/license.html + ! + ! EZMorph Project: http://ezmorph.sourceforge.net/license.html + ! EZMorph Download: http://sourceforge.net/project/showfiles.php?group_id=174866 + ! License: http://ezmorph.sourceforge.net/license.html + !--> +<plugin + id="response-json" + name="JSON Response Writer Plug-in" + version="1.0.0" + provider-name="nutch.org"> + + <runtime> + <library name="response-json.jar"> + <export name="*"/> + </library> + <library name="ezmorph-1.0.6.jar"/> + <library name="json-lib-2.2.2-jdk15.jar"/> + </runtime> + + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> + + <extension id="org.apache.nutch.searcher.response" + name="ResponseWriter" + point="org.apache.nutch.searcher.response.ResponseWriter"> + + <implementation id="org.apache.nutch.searcher.response.json.JSONResponseWriter" + class="org.apache.nutch.searcher.response.json.JSONResponseWriter"> + <parameter name="responseType" value="json"/> + <!--<parameter name="contentType" value="application/json"/>--> + <parameter name="contentType" value="text/plain"/> + </implementation> + + </extension> + +</plugin> Added: lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/json/JSONResponseWriter.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/json/JSONResponseWriter.java?rev=730845&view=auto ============================================================================== --- lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/json/JSONResponseWriter.java (added) +++ lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/json/JSONResponseWriter.java Fri Jan 2 13:38:58 2009 @@ -0,0 +1,141 @@ +package org.apache.nutch.searcher.response.json; + +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.HashSet; +import java.util.Set; + +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import net.sf.json.JSONArray; +import net.sf.json.JSONObject; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.nutch.searcher.Hit; +import org.apache.nutch.searcher.HitDetails; +import org.apache.nutch.searcher.Summary; +import org.apache.nutch.searcher.response.RequestUtils; +import org.apache.nutch.searcher.response.ResponseWriter; +import org.apache.nutch.searcher.response.SearchResults; + +/** + * A ResponseWriter implementation that returns search results in JSON format. + */ +public class JSONResponseWriter + implements ResponseWriter { + + private String contentType = null; + private Configuration conf; + private int maxAgeInSeconds; + private boolean prettyPrint = true; + + public void setContentType(String contentType) { + this.contentType = contentType; + } + + public Configuration getConf() { + return conf; + } + + public void setConf(Configuration conf) { + this.conf = conf; + this.maxAgeInSeconds = conf.getInt("searcher.response.maxage", 86400); + this.prettyPrint = conf.getBoolean("searcher.response.prettyprint", true); + } + + public void writeResponse(SearchResults results, HttpServletRequest request, + HttpServletResponse response) + throws IOException { + + // the function name, if any wrapping the JSON output + String func = RequestUtils.getStringParameter(request, "func"); + + // create the JSON object and add common values + JSONObject jsonObj = new JSONObject(); + jsonObj.accumulate("query", results.getQuery()); + jsonObj.accumulate("lang", results.getLang()); + jsonObj.accumulate("sort", results.getSort()); + jsonObj.accumulate("reverse", results.isReverse()); + jsonObj.accumulate("start", results.getStart()); + jsonObj.accumulate("end", results.getEnd()); + jsonObj.accumulate("rows", results.getRows()); + jsonObj.accumulate("totalhits", results.getTotalHits()); + jsonObj.accumulate("withSummary", results.isWithSummary()); + + String[] searchFields = results.getFields(); + Set<String> fieldSet = new HashSet<String>(); + if (searchFields != null && searchFields.length > 0) { + jsonObj.accumulate("fields", StringUtils.join(searchFields, ",")); + for (int i = 0; i < searchFields.length; i++) { + fieldSet.add(searchFields[i]); + } + } + + // add the documents from search hits + JSONArray docsAr = new JSONArray(); + HitDetails[] details = results.getDetails(); + Hit[] hits = results.getHits(); + Summary[] summaries = results.getSummaries(); + for (int i = 0; i < details.length; i++) { + + // every document has an indexno and an indexdocno + JSONObject result = new JSONObject(); + HitDetails detail = details[i]; + Hit hit = hits[i]; + result.accumulate("indexno", hit.getIndexNo()); + result.accumulate("indexdocno", hit.getIndexDocNo()); + + // don't add summaries not including summaries + if (summaries != null && results.isWithSummary()) { + Summary summary = summaries[i]; + result.accumulate("summary", summary.toString()); + } + + // add the fields from hit details + JSONObject fields = new JSONObject(); + for (int k = 0; k < detail.getLength(); k++) { + String name = detail.getField(k); + String[] values = detail.getValues(name); + + // if we specified fields to return, only return those fields + if (fieldSet.size() == 0 || fieldSet.contains(name)) { + JSONArray valuesAr = new JSONArray(); + for (int m = 0; m < values.length; m++) { + valuesAr.add(values[m]); + } + fields.accumulate(name, valuesAr); + } + } + result.accumulate("fields", fields); + docsAr.add(result); + } + + jsonObj.accumulate("documents", docsAr); + + // pretty printing can be set through configuration, write out the wrapper + // function if there is one + StringBuilder builder = new StringBuilder(); + if (StringUtils.isNotBlank(func)) { + builder.append(func + "("); + } + builder.append(prettyPrint ? jsonObj.toString(2) : jsonObj.toString()); + if (StringUtils.isNotBlank(func)) { + builder.append(")"); + } + + // Cache control headers + SimpleDateFormat sdf = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss 'GMT'"); + long relExpiresInMillis = System.currentTimeMillis() + + (1000 * maxAgeInSeconds); + response.setContentType(contentType); + response.setHeader("Cache-Control", "max-age=" + maxAgeInSeconds); + response.setHeader("Expires", sdf.format(relExpiresInMillis)); + + // write out the content to the response + response.getOutputStream().write(builder.toString().getBytes()); + response.flushBuffer(); + } + +} Added: lucene/nutch/trunk/src/plugin/response-xml/build.xml URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-xml/build.xml?rev=730845&view=auto ============================================================================== --- lucene/nutch/trunk/src/plugin/response-xml/build.xml (added) +++ lucene/nutch/trunk/src/plugin/response-xml/build.xml Fri Jan 2 13:38:58 2009 @@ -0,0 +1,22 @@ +<?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<project name="response-xml" default="jar-core"> + + <import file="../build-plugin.xml"/> + +</project> Added: lucene/nutch/trunk/src/plugin/response-xml/plugin.xml URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-xml/plugin.xml?rev=730845&view=auto ============================================================================== --- lucene/nutch/trunk/src/plugin/response-xml/plugin.xml (added) +++ lucene/nutch/trunk/src/plugin/response-xml/plugin.xml Fri Jan 2 13:38:58 2009 @@ -0,0 +1,46 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<plugin + id="response-xml" + name="XML Response Writer Plug-in" + version="1.0.0" + provider-name="nutch.org"> + + <runtime> + <library name="response-xml.jar"> + <export name="*"/> + </library> + </runtime> + + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> + + <extension id="org.apache.nutch.searcher.response" + name="ResponseWriter" + point="org.apache.nutch.searcher.response.ResponseWriter"> + + <implementation id="org.apache.nutch.searcher.response.xml.XMLResponseWriter" + class="org.apache.nutch.searcher.response.xml.XMLResponseWriter"> + <parameter name="responseType" value="xml"/> + <parameter name="contentType" value="text/xml"/> + </implementation> + + </extension> + +</plugin> Added: lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/xml/XMLResponseWriter.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/xml/XMLResponseWriter.java?rev=730845&view=auto ============================================================================== --- lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/xml/XMLResponseWriter.java (added) +++ lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/xml/XMLResponseWriter.java Fri Jan 2 13:38:58 2009 @@ -0,0 +1,267 @@ +package org.apache.nutch.searcher.response.xml; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.HashSet; +import java.util.Set; + +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.nutch.html.Entities; +import org.apache.nutch.searcher.Hit; +import org.apache.nutch.searcher.HitDetails; +import org.apache.nutch.searcher.Summary; +import org.apache.nutch.searcher.response.ResponseWriter; +import org.apache.nutch.searcher.response.SearchResults; +import org.w3c.dom.Attr; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; + +/** + * A ResponseWriter implementation that returns search results in XML format. + */ +public class XMLResponseWriter + implements ResponseWriter { + + private String contentType = null; + private Configuration conf; + private int maxAgeInSeconds; + private boolean prettyPrint; + + /** + * Creates and returns a new node within the XML document. + * + * @param doc The XML document. + * @param parent The parent Node. + * @param name The name of the new node. + * + * @return The newly created node Element. + */ + private static Element addNode(Document doc, Node parent, String name) { + Element child = doc.createElement(name); + parent.appendChild(child); + return child; + } + + /** + * Creates and returns a new node within the XML document. The node contains + * the text supplied as a child node. + * + * @param doc The XML document. + * @param parent The parent Node. + * @param name The name of the new node. + * @param text A text string to append as a child node. + * + * @return The newly created node Element. + */ + private static void addNode(Document doc, Node parent, String name, + String text) { + Element child = doc.createElement(name); + child.appendChild(doc.createTextNode(getLegalXml(text))); + parent.appendChild(child); + } + + /** + * Adds an attribute name and value to a node Element in the XML document. + * + * @param doc The XML document. + * @param node The node Element on which to attach the attribute. + * @param name The name of the attribute. + * @param value The value of the attribute. + */ + private static void addAttribute(Document doc, Element node, String name, + String value) { + Attr attribute = doc.createAttribute(name); + attribute.setValue(getLegalXml(value)); + node.getAttributes().setNamedItem(attribute); + } + + /** + * Transforms and returns the text string as legal XML text. + * + * @param text The text to transform. + * + * @return The text string in the form of legal XML text. + */ + protected static String getLegalXml(String text) { + + if (text == null) { + return null; + } + StringBuffer buffer = null; + for (int i = 0; i < text.length(); i++) { + char c = text.charAt(i); + if (!isLegalXml(c)) { + if (buffer == null) { + buffer = new StringBuffer(text.length()); + buffer.append(text.substring(0, i)); + } + } + else { + if (buffer != null) { + buffer.append(c); + } + } + } + return (buffer != null) ? buffer.toString() : text; + } + + /** + * Determines if the character is a legal XML character. + * + * @param c The character to check. + * + * @return True if the character is legal xml, false otherwise. + */ + private static boolean isLegalXml(final char c) { + return c == 0x9 || c == 0xa || c == 0xd || (c >= 0x20 && c <= 0xd7ff) + || (c >= 0xe000 && c <= 0xfffd) || (c >= 0x10000 && c <= 0x10ffff); + } + + public void setContentType(String contentType) { + this.contentType = contentType; + } + + public Configuration getConf() { + return conf; + } + + public void setConf(Configuration conf) { + this.conf = conf; + this.maxAgeInSeconds = conf.getInt("searcher.response.maxage", 86400); + this.prettyPrint = conf.getBoolean("searcher.response.prettyprint", true); + } + + public void writeResponse(SearchResults results, HttpServletRequest request, + HttpServletResponse response) + throws IOException { + + try { + + // create the xml document and add the results and search nodes + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + Document xmldoc = factory.newDocumentBuilder().newDocument(); + Element resEl = addNode(xmldoc, xmldoc, "results"); + Element searchEl = addNode(xmldoc, resEl, "search"); + + // add common nodes + String query = results.getQuery(); + addNode(xmldoc, searchEl, "query", query); + addNode(xmldoc, searchEl, "totalhits", + String.valueOf(results.getTotalHits())); + String lang = results.getLang(); + if (lang != null) { + addNode(xmldoc, searchEl, "lang", lang); + } + String sort = results.getSort(); + if (sort != null) { + addNode(xmldoc, searchEl, "sort", sort); + } + addNode(xmldoc, searchEl, "reverse", results.isReverse() ? "true" + : "false"); + addNode(xmldoc, searchEl, "start", String.valueOf(results.getStart())); + addNode(xmldoc, searchEl, "end", String.valueOf(results.getEnd())); + addNode(xmldoc, searchEl, "rows", String.valueOf(results.getRows())); + addNode(xmldoc, searchEl, "totalhits", + String.valueOf(results.getTotalHits())); + addNode(xmldoc, searchEl, "withSummary", + String.valueOf(results.isWithSummary())); + + String[] searchFields = results.getFields(); + Set<String> fieldSet = new HashSet<String>(); + if (searchFields != null && searchFields.length > 0) { + addNode(xmldoc, searchEl, "fields", StringUtils.join(searchFields, ",")); + for (int i = 0; i < searchFields.length; i++) { + fieldSet.add(searchFields[i]); + } + } + + // add documents + Element documents = addNode(xmldoc, resEl, "documents"); + HitDetails[] details = results.getDetails(); + Hit[] hits = results.getHits(); + Summary[] summaries = results.getSummaries(); + for (int i = 0; i < details.length; i++) { + + // every document has an indexno and an indexdocno + Element document = addNode(xmldoc, documents, "document"); + addAttribute(xmldoc, document, "indexno", + String.valueOf(hits[i].getIndexNo())); + addAttribute(xmldoc, document, "indexdocno", + String.valueOf(hits[i].getIndexDocNo())); + + // don't add summaries not including summaries + if (summaries != null && results.isWithSummary()) { + String encSumm = Entities.encode(summaries[i].toString()); + addNode(xmldoc, document, "summary", encSumm); + } + + // add the fields from hit details + Element fields = addNode(xmldoc, document, "fields"); + HitDetails detail = details[i]; + for (int j = 0; j < detail.getLength(); j++) { + String fieldName = detail.getField(j); + String[] fieldValues = detail.getValues(fieldName); + + // if we specified fields to return, only return those fields + if (fieldSet.size() == 0 || fieldSet.contains(fieldName)) { + Element field = addNode(xmldoc, fields, "field"); + addAttribute(xmldoc, field, "name", fieldName); + for (int k = 0; k < fieldValues.length; k++) { + String encFieldVal = Entities.encode(fieldValues[k]); + addNode(xmldoc, field, "value", encFieldVal); + } + } + } + } + + // get the xml source and a transformer to print it out + DOMSource source = new DOMSource(xmldoc); + TransformerFactory transFactory = TransformerFactory.newInstance(); + Transformer transformer = transFactory.newTransformer(); + + // pretty printing can be set through configuration + if (prettyPrint) { + transformer.setOutputProperty("indent", "yes"); + transformer.setOutputProperty(OutputKeys.INDENT, "yes"); + transformer.setOutputProperty( + "{http://xml.apache.org/xslt}indent-amount", "2"); + } + + // write out the content to a byte array + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + StreamResult result = new StreamResult(baos); + transformer.transform(source, result); + baos.flush(); + baos.close(); + + // cache control headers + SimpleDateFormat sdf = new SimpleDateFormat( + "E, d MMM yyyy HH:mm:ss 'GMT'"); + long relExpiresInMillis = System.currentTimeMillis() + + (1000 * maxAgeInSeconds); + response.setContentType(contentType); + response.setHeader("Cache-Control", "max-age=" + maxAgeInSeconds); + response.setHeader("Expires", sdf.format(relExpiresInMillis)); + + // write out the content to the response + response.getOutputStream().write(baos.toByteArray()); + response.flushBuffer(); + } + catch (Exception e) { + throw new IOException(e); + } + + } +} Modified: lucene/nutch/trunk/src/web/web.xml URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/web/web.xml?rev=730845&r1=730844&r2=730845&view=diff ============================================================================== --- lucene/nutch/trunk/src/web/web.xml (original) +++ lucene/nutch/trunk/src/web/web.xml Fri Jan 2 13:38:58 2009 @@ -36,6 +36,11 @@ <servlet-class>org.apache.nutch.searcher.OpenSearchServlet</servlet-class> </servlet> +<servlet> + <servlet-name>SearchServlet</servlet-name> + <servlet-class>org.apache.nutch.searcher.response.SearchServlet</servlet-class> +</servlet> + <servlet-mapping> <servlet-name>Cached</servlet-name> <url-pattern>/servlet/cached</url-pattern> @@ -46,6 +51,11 @@ <url-pattern>/opensearch</url-pattern> </servlet-mapping> +<servlet-mapping> + <servlet-name>SearchServlet</servlet-name> + <url-pattern>/search</url-pattern> +</servlet-mapping> + <welcome-file-list> <welcome-file>search.html</welcome-file> <welcome-file>index.html</welcome-file>