Author: upayavira Date: Tue Oct 12 04:08:27 2004 New Revision: 54666 Modified: cocoon/trunk/src/java/org/apache/cocoon/bean/BeanListener.java cocoon/trunk/src/java/org/apache/cocoon/bean/CocoonBean.java cocoon/trunk/src/java/org/apache/cocoon/bean/Target.java cocoon/trunk/src/java/org/apache/cocoon/bean/helpers/BeanConfigurator.java cocoon/trunk/src/java/org/apache/cocoon/bean/helpers/Crawler.java cocoon/trunk/src/java/org/apache/cocoon/bean/helpers/OutputStreamListener.java Log: Broken link reporting now includes referring pages (requested by Forrest)
Modified: cocoon/trunk/src/java/org/apache/cocoon/bean/BeanListener.java ============================================================================== --- cocoon/trunk/src/java/org/apache/cocoon/bean/BeanListener.java (original) +++ cocoon/trunk/src/java/org/apache/cocoon/bean/BeanListener.java Tue Oct 12 04:08:27 2004 @@ -15,12 +15,14 @@ */ package org.apache.cocoon.bean; +import java.util.List; + /** * Interface allowing caller to install a listener so that it can be informed * as the bean makes progress through the links to be called. * * @author <a href="mailto:[EMAIL PROTECTED]">Upayavira</a> - * @version CVS $Id: BeanListener.java,v 1.5 2004/03/05 13:02:45 bdelacretaz Exp $ + * @version CVS $Id$ */ public interface BeanListener { @@ -72,6 +74,7 @@ * @param message A reason why the link was not generated */ public void brokenLinkFound(String uri, String parentURI, String message, Throwable t); + public void brokenLinkFound(String uri, List parentURIs, String message, Throwable t); /** * Signals completion of the generation process. This method can Modified: cocoon/trunk/src/java/org/apache/cocoon/bean/CocoonBean.java ============================================================================== --- cocoon/trunk/src/java/org/apache/cocoon/bean/CocoonBean.java (original) +++ cocoon/trunk/src/java/org/apache/cocoon/bean/CocoonBean.java Tue Oct 12 04:08:27 2004 @@ -60,7 +60,7 @@ * @author <a href="mailto:[EMAIL PROTECTED]">Nicola Ken Barozzi</a> * @author <a href="mailto:[EMAIL PROTECTED]">Vadim Gritsenko</a> * @author <a href="mailto:[EMAIL PROTECTED]">Upayavira</a> - * @version CVS $Id: CocoonBean.java,v 1.45 2004/07/11 23:02:54 antonio Exp $ + * @version CVS $Id$ */ public class CocoonBean extends CocoonWrapper { @@ -309,6 +309,14 @@ } } + public void sendBrokenLinkWarning(String uri, List referrers, String warning, Throwable t) { + Iterator i = listeners.iterator(); + while (i.hasNext()) { + BeanListener l = (BeanListener) i.next(); + l.brokenLinkFound(uri, referrers, warning, t); + } + } + public void pageSkipped(String uri, String message) { Iterator i = listeners.iterator(); while (i.hasNext()) { @@ -460,7 +468,7 @@ newLinkCount++; } } catch (ProcessingException pe) { - this.sendBrokenLinkWarning(linkTarget.getSourceURI(), pe.getMessage()); + this.sendBrokenLinkWarning(linkTarget.getSourceURI(), linkTarget.getReferringURIs(), pe.getMessage(), pe); if (this.brokenLinkGenerate) { if (crawler.addTarget(linkTarget)) { newLinkCount++; @@ -530,7 +538,9 @@ output = null; this.resourceUnavailable(target); this.sendBrokenLinkWarning(target.getSourceURI(), - DefaultNotifyingBuilder.getRootCause(pe).getMessage()); + target.getReferringURIs(), + DefaultNotifyingBuilder.getRootCause(pe).getMessage(), + DefaultNotifyingBuilder.getRootCause(pe)); } finally { if (output != null && status != -1) { Modified: cocoon/trunk/src/java/org/apache/cocoon/bean/Target.java ============================================================================== --- cocoon/trunk/src/java/org/apache/cocoon/bean/Target.java (original) +++ cocoon/trunk/src/java/org/apache/cocoon/bean/Target.java Tue Oct 12 04:08:27 2004 @@ -15,6 +15,8 @@ */ package org.apache.cocoon.bean; +import java.util.ArrayList; +import java.util.List; import java.util.TreeMap; import org.apache.cocoon.Constants; @@ -29,7 +31,7 @@ * written (the destination URI). * * @author <a href="mailto:[EMAIL PROTECTED]">Upayavira</a> - * @version CVS $Id: Target.java,v 1.14 2004/05/06 19:32:37 upayavira Exp $ + * @version CVS $Id$ */ public class Target { // Defult type is append @@ -50,7 +52,8 @@ private String defaultFilename = Constants.INDEX_URI; private String finalDestinationURI = null; private String extension = null; - + private List referringURIs = null; + private boolean followLinks; private boolean confirmExtension; private String logger; @@ -80,6 +83,7 @@ sourceURI = NetUtils.normalize(root + sourceURI); this.deparameterizedSourceURI = NetUtils.deparameterize(sourceURI, this.parameters); this.sourceURI = NetUtils.parameterize(this.deparameterizedSourceURI, this.parameters); + this.referringURIs = new ArrayList(); } public Target(String type, String sourceURI, String destURI) @@ -111,7 +115,7 @@ Target target = new Target(this.type, this.root, linkURI, this.destURI); target.setOriginalURI(originalLinkURI); - target.setParentURI(this.sourceURI); + target.addReferringURI(this.sourceURI); target.setConfirmExtension(this.confirmExtension); target.setFollowLinks(this.followLinks); target.setDefaultFilename(this.defaultFilename); @@ -132,9 +136,10 @@ /** * Sets the URI of the page that contained the link to this * URI. Used for reporting purposes. + * @deprecated Use the addPerentURIs method instead */ public void setParentURI(String uri) { - this.parentURI = uri; + this.referringURIs.add(uri); } /** @@ -181,6 +186,30 @@ this.defaultFilename = filename; } + /** + * Adds a URI for a referring page. This will be used later if + * this page causes a broken link in order to list all pages + * that refer to this broken link + * @param uri + */ + public void addReferringURI(String uri) { + this.referringURIs.add(uri); + } + + /** + * Returns the first referring URI. If this method is + * called, their should only be one entry in the list + */ + public String getReferringURI() { + return (String)referringURIs.get(0); + } + + /** + * Get all referring URIs. + */ + public List getReferringURIs() { + return referringURIs; + } /** * Gets the filename from the source URI, without the path. * This is used to fill out relative URIs that have Modified: cocoon/trunk/src/java/org/apache/cocoon/bean/helpers/BeanConfigurator.java ============================================================================== --- cocoon/trunk/src/java/org/apache/cocoon/bean/helpers/BeanConfigurator.java (original) +++ cocoon/trunk/src/java/org/apache/cocoon/bean/helpers/BeanConfigurator.java Tue Oct 12 04:08:27 2004 @@ -33,7 +33,7 @@ * Static class for configuring a CocoonBean from a DOM Document object * * @author <a href="mailto:[EMAIL PROTECTED]">Upayavira</a> - * @version CVS $Id: BeanConfigurator.java,v 1.8 2004/03/28 20:51:24 antonio Exp $ + * @version CVS $Id$ */ public class BeanConfigurator { @@ -67,6 +67,7 @@ private static final String ATTR_BROKEN_LINK_REPORT_FILE = "file"; private static final String ATTR_BROKEN_LINK_GENERATE = "generate"; private static final String ATTR_BROKEN_LINK_EXTENSION = "extension"; + private static final String ATTR_BROKEN_LINK_SHOW_REFERRERS="show-referrers"; private static final String NODE_AGENT = "user-agent"; private static final String NODE_ACCEPT = "accept"; @@ -257,10 +258,13 @@ listener.setReportType(getAttributeValue(node, ATTR_BROKEN_LINK_REPORT_TYPE)); } if (hasAttribute(node, ATTR_BROKEN_LINK_GENERATE)) { - cocoon.setBrokenLinkGenerate(getBooleanAttributeValue(node, ATTR_BROKEN_LINK_GENERATE)); + cocoon.setBrokenLinkGenerate(getBooleanAttributeValue(node, ATTR_BROKEN_LINK_GENERATE)); } if (hasAttribute(node, ATTR_BROKEN_LINK_EXTENSION)) { - cocoon.setBrokenLinkExtension(getAttributeValue(node, ATTR_BROKEN_LINK_EXTENSION)); + cocoon.setBrokenLinkExtension(getAttributeValue(node, ATTR_BROKEN_LINK_EXTENSION)); + } + if (hasAttribute(node, ATTR_BROKEN_LINK_SHOW_REFERRERS)) { + listener.setIsShowingReferrers(getBooleanAttributeValue(node, ATTR_BROKEN_LINK_SHOW_REFERRERS)); } NodeList nodes = node.getChildNodes(); if (nodes.getLength()!=0) { Modified: cocoon/trunk/src/java/org/apache/cocoon/bean/helpers/Crawler.java ============================================================================== --- cocoon/trunk/src/java/org/apache/cocoon/bean/helpers/Crawler.java (original) +++ cocoon/trunk/src/java/org/apache/cocoon/bean/helpers/Crawler.java Tue Oct 12 04:08:27 2004 @@ -28,17 +28,17 @@ * A simple Cocoon crawler * * @author <a href="mailto:[EMAIL PROTECTED]">Upayavira</a> - * @version CVS $Id: Crawler.java,v 1.4 2004/03/05 13:02:45 bdelacretaz Exp $ + * @version CVS $Id$ */ public class Crawler { private Map allTranslatedLinks; private Map stillNotVisited; - private Set visitedAlready; + private Map visitedAlready; public Crawler() { - visitedAlready = new HashSet(); + visitedAlready = new HashMap(); stillNotVisited = new HashMap(); allTranslatedLinks = new HashMap(); } @@ -48,10 +48,13 @@ */ public boolean addTarget(Target target) { String targetString = target.toString(); - if (!visitedAlready.contains(targetString)) { + if (!visitedAlready.containsKey(targetString)) { if (!stillNotVisited.containsKey(targetString)) { stillNotVisited.put(targetString, target); return true; + } else { + Target existingTarget = (Target)stillNotVisited.get(targetString); + existingTarget.addReferringURI(target.getReferringURI()); } } return false; @@ -94,9 +97,9 @@ public class CrawlingIterator implements Iterator { private Map stillNotVisited; - private Set visitedAlready; + private Map visitedAlready; - public CrawlingIterator(Set visitedAlready, Map stillNotVisited) { + public CrawlingIterator(Map visitedAlready, Map stillNotVisited) { this.visitedAlready = visitedAlready; this.stillNotVisited = stillNotVisited; } @@ -128,7 +131,7 @@ // could this be simpler: Object nextKey = stillNotVisited.keySet().toArray()[0]; Object nextElement = stillNotVisited.remove(nextKey); - visitedAlready.add(nextKey); + visitedAlready.put(nextKey, nextElement); return nextElement; } } Modified: cocoon/trunk/src/java/org/apache/cocoon/bean/helpers/OutputStreamListener.java ============================================================================== --- cocoon/trunk/src/java/org/apache/cocoon/bean/helpers/OutputStreamListener.java (original) +++ cocoon/trunk/src/java/org/apache/cocoon/bean/helpers/OutputStreamListener.java Tue Oct 12 04:08:27 2004 @@ -22,8 +22,10 @@ import java.io.PrintWriter; import java.text.DecimalFormat; import java.util.ArrayList; +import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Map; import org.apache.cocoon.bean.BeanListener; @@ -32,18 +34,19 @@ * with file destination. * * @author <a href="mailto:[EMAIL PROTECTED]">Upayavira</a> - * @version CVS $Id: OutputStreamListener.java,v 1.8 2004/05/10 12:28:27 sylvain Exp $ + * @version CVS $Id$ */ public class OutputStreamListener implements BeanListener { private final PrintWriter writer; - private final List brokenLinks = new ArrayList(); + private final Map brokenLinks = new HashMap(); private final long startTimeMillis; private String reportFile = null; private String reportType = "text"; private long siteSize = 0L; private int sitePages = 0; - + private boolean isShowingReferrers = false; + public OutputStreamListener(OutputStream os) { writer = new PrintWriter(os); startTimeMillis = System.currentTimeMillis(); @@ -57,6 +60,10 @@ reportType = type; } + public void setIsShowingReferrers(boolean isShowingReferrers) { + this.isShowingReferrers = isShowingReferrers; + } + public void pageGenerated(String sourceURI, String destinationURI, int pageSize, @@ -96,13 +103,18 @@ this.print("Warning: "+warning + " when generating " + uri); } + /** + * @deprecated use brokenLinkFound(String, List, String, Throwable) instead + */ public void brokenLinkFound(String uri, String parentURI, String message, Throwable t) { - this.print(pad(42,"X [0] ")+uri+"\tBROKEN: "+message); - brokenLinks.add(uri + "\t" + message); + List parents = new ArrayList(1); + parents.add(parentURI); + brokenLinkFound(uri, parents, message, t); + } -// StringWriter sw = new StringWriter(); -// t.printStackTrace(new PrintWriter(sw)); -// System.out.println(sw.toString()); + public void brokenLinkFound(String uri, List parentURIs, String message, Throwable t) { + this.print(pad(42,"X [0] ")+uri+"\tBROKEN: "+message); + brokenLinks.put(uri + "\t" + message, parentURIs); } @@ -145,7 +157,7 @@ new PrintWriter( new FileWriter(new File(reportFile)), true); - for (Iterator i = brokenLinks.iterator(); i.hasNext();) { + for (Iterator i = brokenLinks.keySet().iterator(); i.hasNext();) { writer.println((String) i.next()); } writer.close(); @@ -161,11 +173,21 @@ new FileWriter(new File(reportFile)), true); writer.println("<broken-links>"); - for (Iterator i = brokenLinks.iterator(); i.hasNext();) { + for (Iterator i = brokenLinks.keySet().iterator(); i.hasNext();) { String linkMsg = (String) i.next(); String uri = linkMsg.substring(0,linkMsg.indexOf('\t')); String msg = linkMsg.substring(linkMsg.indexOf('\t')+1); - writer.println(" <link message=\"" + msg + "\">" + uri + "</link>"); + if (!isShowingReferrers) { + writer.println(" <link message=\"" + msg + "\">" + uri + "</link>"); + } else { + writer.println(" <link message=\"" + msg + "\" uri=\"" + uri + "\">"); + List referrers = (List)brokenLinks.get(linkMsg); + for (Iterator j = referrers.iterator(); j.hasNext();) { + String referrer=(String) j.next(); + writer.println(" <referrer uri=\"" + referrer + "\"/>"); + } + writer.println(" </link>"); + } } writer.println("</broken-links>"); writer.close();