Author: siren Date: Thu May 4 09:37:25 2006 New Revision: 399758 URL: http://svn.apache.org/viewcvs?rev=399758&view=rev Log: fixed cached page
Modified: lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/controller/CachedController.java lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/cached.jsp lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/tiles-defs.xml Modified: lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/controller/CachedController.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/controller/CachedController.java?rev=399758&r1=399757&r2=399758&view=diff ============================================================================== --- lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/controller/CachedController.java (original) +++ lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/controller/CachedController.java Thu May 4 09:37:25 2006 @@ -16,16 +16,17 @@ package org.apache.nutch.webapp.controller; import java.io.IOException; +import java.io.UnsupportedEncodingException; import javax.servlet.ServletContext; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; +import org.apache.nutch.metadata.Metadata; import org.apache.nutch.searcher.Hit; import org.apache.nutch.searcher.HitDetails; import org.apache.nutch.searcher.NutchBean; -import org.apache.nutch.searcher.Query; import org.apache.nutch.webapp.common.ServiceLocator; import org.apache.struts.tiles.ComponentContext; @@ -34,17 +35,56 @@ public void nutchPerform(ComponentContext tileContext, HttpServletRequest request, HttpServletResponse response, ServletContext servletContext) throws ServletException, IOException { + + ServiceLocator locator = getServiceLocator(request); NutchBean bean = locator.getNutchBean(); - Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")), Integer - .parseInt(request.getParameter("id"))); + LOG.info("Cache request from " + request.getRemoteAddr()); + + Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")), + Integer.parseInt(request.getParameter("id"))); + HitDetails details = bean.getDetails(hit); - Query query = Query.parse(request.getParameter("query"), locator - .getConfiguration()); + String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getIndexDocNo(); - request.setAttribute("explanation", bean.getExplanation(query, hit)); - request.setAttribute("hitDetails", details); - logRequestAttributes(request); + Metadata metaData = bean.getParseData(details).getContentMeta(); + + String content = null; + String contentType = (String) metaData.get(Metadata.CONTENT_TYPE); + + + if (contentType.startsWith("text/html")) { + // FIXME : it's better to emit the original 'byte' sequence + // with 'charset' set to the value of 'CharEncoding', + // but I don't know how to emit 'byte sequence' in JSP. + // out.getOutputStream().write(bean.getContent(details)) may work, + // but I'm not sure. + String encoding = (String) metaData.get("CharEncodingForConversion"); + if (encoding != null) { + try { + content = new String(bean.getContent(details), encoding); + } + catch (UnsupportedEncodingException e) { + //fallback to configured charset + content = new String(bean.getContent(details), locator.getConfiguration().get("parser.character.encoding.default")); + } + } + else { + //construct String with system default encoding + content = new String(bean.getContent(details)); + } + } + + // page content + request.setAttribute("content", content); + // page content type + request.setAttribute("contentType", contentType); + // page url + request.setAttribute("url", details.getValue("url")); + // page id + request.setAttribute("id", id); + // page content if html + request.setAttribute("isHtml", new Boolean(contentType.startsWith("text/html"))); } } Modified: lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties?rev=399758&r1=399757&r2=399758&view=diff ============================================================================== --- lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties (original) +++ lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties Thu May 4 09:37:25 2006 @@ -14,6 +14,7 @@ cached.title=nutch cache cached.page=page: <a href="{0}">{0}</a> cached.noContent=Sorry, no content is cached for this page. +cached.notHtml=The cached content has mime type {0}, click this <a href="servlet/cached?{1}">link</a> to download it directly. #explain page title explain.title=score explanation Modified: lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/cached.jsp URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/cached.jsp?rev=399758&r1=399757&r2=399758&view=diff ============================================================================== --- lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/cached.jsp (original) +++ lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/cached.jsp Thu May 4 09:37:25 2006 @@ -1,67 +1,28 @@ -<%@ page - session="false" - contentType="text/html; charset=UTF-8" - import="java.io.*" - import="java.util.*" - - import="org.apache.nutch.searcher.*" - import="org.apache.nutch.parse.ParseData" - import="org.apache.nutch.metadata.Metadata" - import="org.apache.hadoop.conf.Configuration" - import="org.apache.nutch.util.NutchConfiguration" -%><%@ taglib prefix="bean" uri="/tags/struts-bean" %><% - Configuration nutchConf = (Configuration) application.getAttribute(Configuration.class.getName()); - if (nutchConf == null) { - nutchConf = NutchConfiguration.create(); - application.setAttribute(Configuration.class.getName(), nutchConf); - } - NutchBean bean = NutchBean.get(application, nutchConf); - bean.LOG.info("cache request from " + request.getRemoteAddr()); - Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")), - Integer.parseInt(request.getParameter("id"))); - HitDetails details = bean.getDetails(hit); - String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getIndexDocNo(); - - Metadata metaData = bean.getParseData(details).getContentMeta(); - - String content = null; - String contentType = (String) metaData.get(Metadata.CONTENT_TYPE); - if (contentType.startsWith("text/html")) { - // FIXME : it's better to emit the original 'byte' sequence - // with 'charset' set to the value of 'CharEncoding', - // but I don't know how to emit 'byte sequence' in JSP. - // out.getOutputStream().write(bean.getContent(details)) may work, - // but I'm not sure. - String encoding = (String) metaData.get("CharEncodingForConversion"); - if (encoding != null) { - try { - content = new String(bean.getContent(details), encoding); - } - catch (UnsupportedEncodingException e) { - // fallback to windows-1252 - content = new String(bean.getContent(details), "windows-1252"); - } - } - else - content = new String(bean.getContent(details)); - } -%> -<base href="<%=details.getValue("url")%>"> +<%@ include file="common.jsp"%> +<c:if test="${isHtml}"> + <base href="<c:out value="${url}"/>"> +</c:if> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> -<h2 style="{color: rgb(255, 153, 0)}"><bean:message key="cached.title"/></h2> -<h3><bean:message key="cached.page" arg0="<%=details.getValue("url")%>" /></h3> -<hr> -<% if (contentType.startsWith("text/html")) {%> - -<% if (content != null && !content.equals("")) {%> -<%= content %> -<% } else { %> -<bean:message key="cached.noContent"/> -<% } %> - -<% } else { %> - -The cached content has mime type "<%=contentType%>", -click this <a href="servlet/cached?<%=id%>">link</a> to download it directly. - -<% } %> +<h2 style="{color: rgb(255, 153, 0)}"><fmt:message key="cached.title" /></h2> +<h3><fmt:message key="cached.page"> + <fmt:param value="${url}" /> +</fmt:message></h3> +<hr /> +<c:choose> + <c:when test="${isHtml}"> + <c:choose> + <c:when test="${content!=null && content!=''}"> + <c:out value="${content}" escapeXml="false"/> + </c:when> + <c:otherwise> + <fmt:message key="cached.noContent" /> + </c:otherwise> + </c:choose> + </c:when> + <c:otherwise> + <fmt:message key="cached.notHtml"> + <fmt:param value="${contentType}" /> + <fmt:param value="${id}" /> + </fmt:message> + </c:otherwise> +</c:choose> Modified: lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/tiles-defs.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/tiles-defs.xml?rev=399758&r1=399757&r2=399758&view=diff ============================================================================== --- lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/tiles-defs.xml (original) +++ lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/tiles-defs.xml Thu May 4 09:37:25 2006 @@ -17,7 +17,8 @@ <put name="pageBody" value="/WEB-INF/jsp/search.jsp" /> </definition> <!-- Cached --> - <definition name="cachedPage" path="/WEB-INF/jsp/cached.jsp"> + <definition name="cachedPage" path="/WEB-INF/jsp/cached.jsp" + controllerClass="org.apache.nutch.webapp.controller.CachedController"> <put name="title" value="cached.title" /> </definition> <!-- Explain --> ------------------------------------------------------- Using Tomcat but need to do more? Need to support web services, security? Get stuff done quickly with pre-integrated technology to make your job easier Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642 _______________________________________________ Nutch-cvs mailing list Nutch-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/nutch-cvs