nicolaken 2002/06/14 09:19:14 Modified: . changes.xml src/webapp/welcome welcome.xhtml Added: src/webapp/samples/linkstatus linkstatus.xsl sitemap.xmap src/java/org/apache/cocoon/generation LinkStatusGenerator.java Removed: src/scratchpad/webapp/mount/linkstatus linkstatus.xsl sitemap.xmap src/scratchpad/src/org/apache/cocoon/generation LinkStatusGenerator.java Log: <action dev="NKB" type="update"> Moved linkstatus sample and LinkStatusGenerator to core from scratchpad; added a link to test the documentation links on the Cocoon demo webapp frontpage. </action> <action dev="NKB" type="fix"> Removed Java 1.3+ only method (URL.getPath()) from LinkStatusGenerator. </action> Revision Changes Path 1.188 +8 -1 xml-cocoon2/changes.xml Index: changes.xml =================================================================== RCS file: /home/cvs/xml-cocoon2/changes.xml,v retrieving revision 1.187 retrieving revision 1.188 diff -u -r1.187 -r1.188 --- changes.xml 11 Jun 2002 13:47:36 -0000 1.187 +++ changes.xml 14 Jun 2002 16:19:13 -0000 1.188 @@ -38,6 +38,13 @@ </devs> <release version="@version@" date="@date@"> + <action dev="NKB" type="update"> + Moved linkstatus sample and LinkStatusGenerator to core from scratchpad; + added a link to test the documentation links on the Cocoon demo webapp frontpage. + </action> + <action dev="NKB" type="fix"> + Removed Java 1.3+ only method (URL.getPath()) from LinkStatusGenerator. + </action> <action dev="SW" type="update"> CocoonServlet no longer builds its own classloader. Also, it no more sets the thread's context classloader unless the "init-classloader" parameter is true. To have Cocoon use its own classloader, 1.1 xml-cocoon2/src/webapp/samples/linkstatus/linkstatus.xsl Index: linkstatus.xsl =================================================================== <?xml version="1.0"?> <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:linkstatus="http://apache.org/cocoon/linkstatus/2.0"> <xsl:template match="linkstatus:linkstatus"> <html> <body> <table border="1"> <tr><th>URL</th><th>referrer</th><th>content-type</th><th>status</th><th>message</th></tr> <xsl:apply-templates/> </table> </body> </html> </xsl:template> <xsl:template match="linkstatus:link"> <tr><xsl:attribute name = "bgcolor" ><xsl:choose> <xsl:when test="normalize-space(@status)='200'">#00ff00</xsl:when> <xsl:when test="normalize-space(@status)='404'">#ffff00</xsl:when> <xsl:otherwise>#ff0000</xsl:otherwise></xsl:choose> </xsl:attribute> <td><a><xsl:attribute name="href"><xsl:value-of select="@href"/></xsl:attribute> <xsl:value-of select="@href"/></a></td> <td><a><xsl:attribute name="href"><xsl:value-of select="@referrer"/></xsl:attribute> referrer</a></td> <td><xsl:value-of select="@content"/></td> <td><xsl:value-of select="@status"/></td> <td><xsl:value-of select="@message"/></td> </tr> </xsl:template> </xsl:stylesheet> 1.1 xml-cocoon2/src/webapp/samples/linkstatus/sitemap.xmap Index: sitemap.xmap =================================================================== <?xml version="1.0"?> <map:sitemap xmlns:map="http://apache.org/cocoon/sitemap/1.0"> <!-- =========================== Components ================================ --> <map:components> <map:generators default="file"> <map:generator name="linkstatus" logger="sitemap.generator.linkstatus" label="content,data" src="org.apache.cocoon.generation.LinkStatusGenerator"/> </map:generators> <map:transformers default="xslt"/> <map:readers default="resource"/> <map:serializers default="html"/> <map:selectors default="browser"/> <map:matchers default="wildcard"> <map:matcher name="wildcard" src="org.apache.cocoon.matching.WildcardURIMatcherFactory"/> </map:matchers> </map:components> <map:views> <map:view name="links" from-position="last"> <map:serialize type="links"/> </map:view> </map:views> <!-- =========================== Pipelines ================================= --> <map:pipelines> <map:pipeline> <map:match pattern=""> <map:redirect-to uri="linkstatus/http/localhost/8080/cocoon/documents/index.html"/> </map:match> <map:match pattern="linkstatus/*/*/*/**"> <map:generate type="linkstatus" src="{1}://{2}:{3}/{4}"/> <map:transform src="linkstatus.xsl"/> <map:serialize/> </map:match> </map:pipeline> </map:pipelines> </map:sitemap> <!-- end of file --> 1.1 xml-cocoon2/src/java/org/apache/cocoon/generation/LinkStatusGenerator.java Index: LinkStatusGenerator.java =================================================================== package org.apache.cocoon.generation; import org.apache.avalon.excalibur.pool.Recyclable; import org.apache.avalon.framework.parameters.Parameters; import org.apache.avalon.framework.configuration.Configurable; import org.apache.avalon.framework.configuration.Configuration; import org.apache.avalon.framework.configuration.ConfigurationException; import org.apache.cocoon.ProcessingException; import org.apache.cocoon.ResourceNotFoundException; import org.apache.cocoon.environment.SourceResolver; import org.apache.cocoon.Constants; import org.apache.cocoon.util.Tokenizer; import org.apache.regexp.RE; import org.apache.regexp.RESyntaxException; import org.apache.log.Logger; import org.xml.sax.SAXException; import org.xml.sax.helpers.AttributesImpl; import java.io.IOException; import java.io.InputStream; import java.io.BufferedReader; import java.io.InputStreamReader; import java.net.URLConnection; import java.net.HttpURLConnection; import java.net.URL; import java.util.Map; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.ArrayList; /** * Generates a list of links that are reachable from the src and their status. * * @author Michael Homeijer * @author Nicola Ken Barozzi ([EMAIL PROTECTED]) */ public class LinkStatusGenerator extends ComposerGenerator implements Recyclable, Configurable { /** The URI of the namespace of this generator. */ protected static final String URI = "http://apache.org/cocoon/linkstatus/2.0"; /** The namespace prefix for this namespace. */ protected static final String PREFIX = "linkstatus"; /* Node and attribute names */ protected static final String TOP_NODE_NAME = "linkstatus"; protected static final String LINK_NODE_NAME = "link"; protected static final String HREF_ATTR_NAME = "href"; protected static final String REFERRER_ATTR_NAME = "referrer"; protected static final String CONTENT_ATTR_NAME = "content"; protected static final String STATUS_ATTR_NAME = "status"; protected static final String MESSAGE_ATTR_NAME = "message"; protected AttributesImpl attributes = new AttributesImpl(); /** * Config element name specifying expected link content-typ. * <p> * Its value is <code>link-content-type</code>. * </p> * * @since */ public final static String LINK_CONTENT_TYPE_CONFIG = "link-content-type"; /** * Default value of <code>link-content-type</code> configuration value. * <p> * Its value is <code>application/x-cocoon-links</code>. * </p> * * @since */ public final String LINK_CONTENT_TYPE_DEFAULT = "application/x-cocoon-links"; /** * Config element name specifying query-string appendend for requesting links * of an URL. * <p> * Its value is <code>link-view-query</code>. * </p> * * @since */ public final static String LINK_VIEW_QUERY_CONFIG = "link-view-query"; /** * Default value of <code>link-view-query</code> configuration value. * <p> * Its value is <code>?cocoon-view=links</code>. * </p> * * @since */ public final static String LINK_VIEW_QUERY_DEFAULT = "cocoon-view=links"; /** * Config element name specifying excluding regular expression pattern. * <p> * Its value is <code>exclude</code>. * </p> * * @since */ public final static String EXCLUDE_CONFIG = "exclude"; /** * Config element name specifying including regular expression pattern. * <p> * Its value is <code>include</code>. * </p> * * @since */ public final static String INCLUDE_CONFIG = "include"; /** * Config element name specifying http header value for user-Agent. * <p> * Its value is <code>user-agent</code>. * </p> * * @since */ public final static String USER_AGENT_CONFIG = "user-agent"; /** * Default value of <code>user-agent</code> configuration value. * <p> * Its value is @see org.apache.cocoon.Constants#COMPLETE_NAME. * </p> * * @since */ public final static String USER_AGENT_DEFAULT = Constants.COMPLETE_NAME; /** * Config element name specifying http header value for accept. * <p> * Its value is <code>accept</code>. * </p> * * @since */ public final static String ACCEPT_CONFIG = "accept"; /** * Default value of <code>accept</code> configuration value. * <p> * Its value is <code>* / *</code> * </p> * * @since */ public final static String ACCEPT_DEFAULT = "*/*"; private String linkViewQuery = LINK_VIEW_QUERY_DEFAULT; private String linkContentType = LINK_CONTENT_TYPE_DEFAULT; private HashSet excludeCrawlingURL; private HashSet includeCrawlingURL; private String userAgent = USER_AGENT_DEFAULT; private String accept = ACCEPT_DEFAULT; private HashSet crawled; private HashSet linksToProcess; /** * Stores links to process and the referrer links */ private class Link { private URL url; private String referrer; public Link( URL url, String referrer ) { this.url = url; this.referrer = referrer; } public URL getURL() { return url; } public String getReferrer() { return referrer; } public boolean equals( Link l ) { return url.equals( l.getURL()); } } /** * Configure the crawler component. * <p> * Configure can specify which URI to include, and which URI to exclude * from crawling. You specify the patterns as regular expressions. * </p> * <p> * Morover you can configure * the required content-type of crawling request, and the * query-string appended to each crawling request. * </p> * <pre><tt> * <include>.*\.html?</exclude> or <exclude>.*\.html?, .*\.xsp</exclude> * <exclude>.*\.gif</exclude> or <exclude>.*\.gif, .*\.jpe?g</exclude> * <link-content-type> application/x-cocoon-links </link-content-type> * <link-view-query> ?cocoon-view=links </link-view-query> * </tt></pre> * * @param configuration XML configuration of this avalon component. * @exception ConfigurationException is throwing if configuration is invalid. * @since */ public void configure(Configuration configuration) throws ConfigurationException { Configuration[] children; children = configuration.getChildren(INCLUDE_CONFIG); if (children != null && children.length > 0) { includeCrawlingURL = new HashSet(); for (int i = 0; i < children.length; i++) { String pattern = children[i].getValue(); try { Tokenizer t = new Tokenizer(pattern, ", "); while (t.hasMoreTokens()) { String tokenized_pattern = t.nextToken(); this.includeCrawlingURL.add(new RE(tokenized_pattern)); } } catch (RESyntaxException rese) { getLogger().error("Cannot create includeing regular-expression for " + pattern, rese); } } } children = configuration.getChildren(EXCLUDE_CONFIG); if (children != null && children.length > 0) { excludeCrawlingURL = new HashSet(); for (int i = 0; i < children.length; i++) { String pattern = children[i].getValue(); try { Tokenizer t = new Tokenizer(pattern, ", "); while (t.hasMoreTokens()) { String tokenized_pattern = t.nextToken(); this.excludeCrawlingURL.add(new RE(tokenized_pattern)); } } catch (RESyntaxException rese) { getLogger().error("Cannot create excluding regular-expression for " + pattern, rese); } } } else { excludeCrawlingURL = new HashSet(); setDefaultExcludeFromCrawling(); } Configuration child; String value; child = configuration.getChild(LINK_CONTENT_TYPE_CONFIG, false); if (child != null) { value = child.getValue(); if (value != null && value.length() > 0) { this.linkContentType = value.trim(); } } child = configuration.getChild(LINK_VIEW_QUERY_CONFIG, false); if (child != null) { value = child.getValue(); if (value != null && value.length() > 0) { this.linkViewQuery = value.trim(); } } child = configuration.getChild(USER_AGENT_CONFIG, false); if (child != null) { value = child.getValue(); if (value != null && value.length() > 0) { this.userAgent = value; } } child = configuration.getChild(ACCEPT_CONFIG, false); if (child != null) { value = child.getValue(); if (value != null && value.length() > 0) { this.accept = value; } } } public void setup(SourceResolver resolver, Map objectModel, String src, Parameters par) throws ProcessingException, SAXException, IOException { super.setup(resolver, objectModel, src, par); /* Create a reusable attributes for creating nodes */ this.attributes = new AttributesImpl(); excludeCrawlingURL = new HashSet(); this.setDefaultExcludeFromCrawling(); } /** * Generate XML data. * * @throws SAXException * if an error occurs while outputting the document * @throws ProcessingException * if the requsted URI wasn't found */ public void generate() throws SAXException, ProcessingException { try { crawled = new HashSet(); linksToProcess = new HashSet(); URL root = new URL(source); linksToProcess.add(new Link( root, "")); if (getLogger().isDebugEnabled()) { getLogger().debug("crawl URL " + root); } this.contentHandler.startDocument(); this.contentHandler.startPrefixMapping(PREFIX,URI); attributes.clear(); super.contentHandler.startElement(URI, TOP_NODE_NAME, URI+':'+TOP_NODE_NAME, attributes); while (linksToProcess.size() > 0) { Iterator i = linksToProcess.iterator(); if (i.hasNext()) { // fetch a URL Link link = (Link) i.next(); URL url = link.getURL(); // remove it from the to-do list linksToProcess.remove(link); URLConnection conn = processURL(url, link.getReferrer()); // calc all links from this url if (conn != null) { List url_links = getLinksFromConnection(conn, url); if (url_links != null) { // add links of this url to the to-do list linksToProcess.addAll(url_links); } } } } super.contentHandler.endElement(URI, TOP_NODE_NAME, URI+':'+TOP_NODE_NAME); this.contentHandler.endPrefixMapping(PREFIX); this.contentHandler.endDocument(); } catch (IOException ioe) { getLogger().warn("Could not read source ", ioe); throw new ResourceNotFoundException("Could not read source ", ioe); } } /** * Default exclude patterns. * <p> * By default URLs matching following patterns are excluded: * </p> * <ul> * <li>.*\\.gif(\\?.*)?$ - exclude gif images</li> * <li>.*\\.png(\\?.*)?$ - exclude png images</li> * <li>.*\\.jpe?g(\\?.*)?$ - exclude jpeg images</li> * <li>.*\\.js(\\?.*)?$ - exclude javascript </li> * <li>.*\\.css(\\?.*)?$ - exclude cascaded stylesheets</li> * </ul> * * @since */ private void setDefaultExcludeFromCrawling() { String[] EXCLUDE_FROM_CRAWLING_DEFAULT = { ".*\\.gif(\\?.*)?$", ".*\\.png(\\?.*)?$", ".*\\.jpe?g(\\?.*)?$", ".*\\.js(\\?.*)?$", ".*\\.css(\\?.*)?$" }; for (int i = 0; i < EXCLUDE_FROM_CRAWLING_DEFAULT.length; i++) { String pattern = EXCLUDE_FROM_CRAWLING_DEFAULT[i]; try { excludeCrawlingURL.add(new RE(pattern)); } catch (RESyntaxException rese) { getLogger().error("Cannot create excluding regular-expression for " + pattern, rese); } } } protected List getLinksFromConnection(URLConnection conn, URL url) { List url_links = null; try { String content_type = conn.getContentType(); if (getLogger().isDebugEnabled()) { getLogger().debug("Content-type: " + content_type); } if (content_type.equals(linkContentType)) { url_links = new ArrayList(); InputStream is = conn.getInputStream(); BufferedReader br = new BufferedReader(new InputStreamReader(is)); // content is supposed to be a list of links, // relative to current URL String line; String referrer = url.toString(); while ((line = br.readLine()) != null) { URL new_url = new URL(url, line); boolean add_url = true; // don't add new_url twice if (add_url) { add_url &= !url_links.contains(new_url); } // don't add new_url if it has been crawled already if (add_url) { add_url &= !crawled.contains(new_url.toString()); } Link new_link = new Link( new_url, referrer ); if (add_url) { add_url &= !linksToProcess.contains(new_link); } // don't add if is not matched by existing include definition if (add_url) { add_url &= isIncludedURL(new_url.toString()); } if (add_url) { if (getLogger().isDebugEnabled()) { getLogger().debug("Add URL: " + new_url.toString()); } url_links.add(new_link); } } // now we have a list of URL which should be examined } } catch (IOException ioe) { getLogger().warn("Problems get links of " + url, ioe); } return url_links; } protected URLConnection processURL(URL url, String referrer) throws SAXException { if (getLogger().isDebugEnabled()) { getLogger().debug("getLinks URL " + url); } URLConnection result = null; // don't try to investigate url which has been crawled already if (crawled.contains(url.toString())) { return null; } // mark it as crawled crawled.add(url.toString()); attributes.clear(); attributes.addAttribute("", HREF_ATTR_NAME, HREF_ATTR_NAME, "CDATA", url.toString()); attributes.addAttribute("", REFERRER_ATTR_NAME, REFERRER_ATTR_NAME, "CDATA", referrer); // don't try to get links for url which is excluded from crawling if (isExcludedURL(url.toString())) { // Check for status and output it. try { URLConnection links_url_connection = url.openConnection(); HttpURLConnection h = (HttpURLConnection)links_url_connection; String content_type = links_url_connection.getContentType(); attributes.addAttribute("", CONTENT_ATTR_NAME, CONTENT_ATTR_NAME, "CDATA", content_type); attributes.addAttribute("", MESSAGE_ATTR_NAME, MESSAGE_ATTR_NAME, "CDATA", h.getResponseMessage()); attributes.addAttribute("", STATUS_ATTR_NAME, STATUS_ATTR_NAME, "CDATA", String.valueOf(h.getResponseCode())); } catch (IOException ioe) { attributes.addAttribute("", MESSAGE_ATTR_NAME, MESSAGE_ATTR_NAME, "CDATA", ioe.getMessage()); } } else { // Output url, referrer, content-type, status, message for traversable url's // add prefix and query to get data from the linkserializer. try { URL links_url = new URL(url.toExternalForm() + ((url.toExternalForm().indexOf("?") == -1) ? "?" : "&") + linkViewQuery); URLConnection links_url_connection = links_url.openConnection(); HttpURLConnection h = (HttpURLConnection)links_url_connection; result = links_url_connection; attributes.addAttribute("", CONTENT_ATTR_NAME, CONTENT_ATTR_NAME, "CDATA", links_url_connection.getContentType()); attributes.addAttribute("", MESSAGE_ATTR_NAME, MESSAGE_ATTR_NAME, "CDATA", h.getResponseMessage()); attributes.addAttribute("", STATUS_ATTR_NAME, STATUS_ATTR_NAME, "CDATA", String.valueOf(h.getResponseCode())); } catch(IOException ioe ) { // Output url referrer status message attributes.addAttribute("", MESSAGE_ATTR_NAME, MESSAGE_ATTR_NAME, "CDATA", ioe.getMessage()); } } super.contentHandler.startElement(URI, LINK_NODE_NAME, URI+':'+LINK_NODE_NAME, attributes); super.contentHandler.endElement(URI, LINK_NODE_NAME, URI+':'+LINK_NODE_NAME); return result; } /** * check if URL is a candidate for indexing * * @param url Description of Parameter * @return The excludedURL value * @since */ private boolean isExcludedURL(String url) { // by default include URL for crawling if (excludeCrawlingURL == null) { if (getLogger().isDebugEnabled()) { getLogger().debug("exclude no URL " + url); } return false; } final String s = url.toString(); Iterator i = excludeCrawlingURL.iterator(); while (i.hasNext()) { RE pattern = (RE) i.next(); if (pattern.match(s)) { if (getLogger().isDebugEnabled()) { getLogger().debug("exclude URL " + url); } return true; } } if (getLogger().isDebugEnabled()) { getLogger().debug("exclude not URL " + url); } return false; } /** * check if URL is a candidate for indexing * * @param url Description of Parameter * @return The includedURL value * @since */ private boolean isIncludedURL(String url) { // by default include URL for crawling if (includeCrawlingURL == null) { if (getLogger().isDebugEnabled()) { getLogger().debug("include all URL " + url); } return true; } final String s = url.toString(); Iterator i = includeCrawlingURL.iterator(); while (i.hasNext()) { RE pattern = (RE) i.next(); if (pattern.match(s)) { if (getLogger().isDebugEnabled()) { getLogger().debug("include URL " + url); } return true; } } if (getLogger().isDebugEnabled()) { getLogger().debug("include not URL " + url); } return false; } public void recycle() { super.recycle(); this.attributes = null; this.excludeCrawlingURL = null; } } 1.8 +12 -6 xml-cocoon2/src/webapp/welcome/welcome.xhtml Index: welcome.xhtml =================================================================== RCS file: /home/cvs/xml-cocoon2/src/webapp/welcome/welcome.xhtml,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- welcome.xhtml 4 Jun 2002 23:44:40 -0000 1.7 +++ welcome.xhtml 14 Jun 2002 16:19:14 -0000 1.8 @@ -2,7 +2,7 @@ <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> - <title>Apache Cocoon @version@</title> + <title>Apache Cocoon 2.1-dev</title> <link href="favicon.ico" rel="SHORTCUT ICON" /> </head> @@ -25,7 +25,7 @@ <tr> <td align="center" width="30%"> <font color="#000000" face="arial,helvetica,sanserif"> - <b>version @version@</b> + <b>version 2.1-dev</b> </font> </td> </tr> @@ -52,7 +52,7 @@ <font color="#000000" face="arial,helvetica,sanserif" size="+0">It seems you got me running :) <br /> - Here are some links to the relevant portions of the demo webapp.</font> + Here are some links to the relevant portions of the default cocoon webapp.</font> <br /> @@ -75,7 +75,12 @@ <font color="#000000" face="arial,helvetica,sanserif" size="+0"> <a href="documents/index">Documentation</a> - - Who am I? Information, tutorials and references about me are here.</font> + - Who am <a href="http://xml.apache.org/cocoon/">I</a>? + <a href="documents/index">Information</a>, + <a href="tutorial/home.html">tutorials</a> and + <a href="documents/doclist.html">references</a> about me + <a href="documents/index">are here</a> + (<a href="samples/linkstatus/">check links</a>).</font> </td> </tr> @@ -84,7 +89,8 @@ <font color="#000000" face="arial,helvetica,sanserif" size="+0"> <a href="search/welcome">Search</a> - - Need to search for a specific thing about me? Let me do the work for you.</font> + - Need to <a href="search/welcome">search</a> for a specific + thing about me? Let me do the work for you.</font> </td> </tr> @@ -107,7 +113,7 @@ <p align="center"> <br /> - <font size="-1">Copyright © @year@ + <font size="-1">Copyright © 1999-2002 <a href="http://www.apache.org/">The Apache Software Foundation</a> .
---------------------------------------------------------------------- In case of troubles, e-mail: [EMAIL PROTECTED] To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]