On Thursday 08 December 2005 23:48, Roy, Ansuman wrote:
> I would be very grateful to you michael for this.
> Actually i tried to fix this myself but failed
> bitterly.
>

Attached is a patch with changes I made to statichtmlexporter, I also created 
a statichtmldeactivator and associated wdelete class.

As usual, I don't guaranteee the patch will apply or compile, it's more as a 
guide on how to accomplish this yourself :)

Michael
diff -Nwurd /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/cms/publishing/StaticHTMLDeactivator.java _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/cms/publishing/StaticHTMLDeactivator.java
--- /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/cms/publishing/StaticHTMLDeactivator.java	1970-01-01 10:00:00.000000000 +1000
+++ _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/cms/publishing/StaticHTMLDeactivator.java	2005-12-08 23:37:49.000000000 +1100
@@ -0,0 +1,172 @@
+/**
+ * 
+ */
+package org.apache.lenya.cms.publishing;
+
+import java.io.File;
+import java.net.URL;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+import java.util.StringTokenizer;
+
+import org.apache.avalon.framework.parameters.Parameters;
+import org.apache.lenya.cms.publication.SiteTree;
+import org.apache.lenya.cms.publication.SiteTreeNode;
+import org.apache.lenya.cms.publication.file.FilePublication;
+import org.apache.lenya.cms.task.ExecutionException;
+import org.apache.log4j.Category;
+
+/**
+ * This class will export any page which is a parent or sibling to the page being deactivated.
+ */
+public class StaticHTMLDeactivator extends StaticHTMLExporter {
+
+    private static Category log = Category.getInstance(StaticHTMLExporter.class);
+    public static final String PARAMETER_DOCUMENT_ID = "document-id";
+
+    /**
+     * This method is the complement of export,
+     * it will delete files from the destination which has been deactivated.
+     *
+     * @param serverURI The hostname of the server which lenya is running on.
+     * @param serverPort The port which lenya is running on
+     * @param publicationPath The path which the publication can be found under
+     * @param exportPath The path we want the publication exported to
+     * @param uris The pages we want deactivated
+     * @param substituteExpression What to find
+     * @param substituteReplacement What to replace
+     * 
+     * @throws ExportException If anything doesn't work we throw one of these?
+     */
+    public void deactivate(URL serverURI, int serverPort, String publicationPath, String exportPath, String uri, String substituteExpression, String substituteReplacement)
+        throws ExportException {
+        try {
+            String exportDirectory = publicationPath + exportPath;
+
+            if (new File(exportPath).isAbsolute()) {
+                exportDirectory = exportPath;
+            }
+
+            log.info(".export(): Export directory: " + exportDirectory + " (" + publicationPath +
+                " , " + exportPath + ")");
+
+            org.apache.lenya.net.WDelete wdelete = new org.apache.lenya.net.WDelete();
+            wdelete.setDirectoryPrefix(exportDirectory);
+
+            String fullServerURI = serverURI + ":" + serverPort;
+
+                URL url = new URL(fullServerURI + uri);
+                log.info(".export(): Export static HTML: " + uri);
+
+                wdelete.delete(url, substituteExpression, substituteReplacement);
+        } catch (Exception e) {
+            throw new ExportException(e);
+        }
+    }
+
+    
+    /**
+     * This is called by org.apache.lenya.cms.task.TaskSequence when a user deactivate.
+     *
+     * @param contextPath The directory when the lenya webapp lives
+     */
+    public void execute(String contextPath) throws ExecutionException {
+        try {
+            String publicationId = getParameters().getParameter(PARAMETER_PUBLICATION_ID);
+
+            Parameters taskParameters = new Parameters();
+
+            PublishingEnvironment environment = new PublishingEnvironment(contextPath, publicationId);
+
+            // read default parameters from PublishingEnvironment
+            taskParameters.setParameter(PublishingEnvironment.PARAMETER_EXPORT_PATH,
+                environment.getExportDirectory());
+            taskParameters.setParameter(PublishingEnvironment.PARAMETER_SUBSTITUTE_REGEXP,
+                environment.getSubstituteExpression());
+            taskParameters.setParameter(PublishingEnvironment.PARAMETER_SUBSTITUTE_REPLACEMENT,
+                environment.getSubstituteReplacement());
+
+            taskParameters.merge(getParameters());
+            parameterize(taskParameters);
+
+            String publicationPath = PublishingEnvironment.getPublicationPath(contextPath,
+                    publicationId);
+
+            int serverPort = getParameters().getParameterAsInteger(PARAMETER_SERVER_PORT);
+            log.debug(".execute(): Server Port: " + serverPort);
+
+            String serverURI = getParameters().getParameter(PARAMETER_SERVER_URI);
+
+            String documentId = getParameters().getParameter(PARAMETER_DOCUMENT_ID);
+            
+            // find pages that are parents or siblings of this page
+            // we look for them in the authoring tree and then check if they are also in the live tree
+            // we rewrite pages in the live tree so they no longer have links to the removed page.
+            FilePublication fp=new FilePublication(publicationId,contextPath);
+            SiteTree authoringTree=fp.getTree("authoring");
+            SiteTree liveTree=fp.getTree("live");
+
+            Set rewriteUris = new HashSet();
+            
+            	
+            	
+            	// look up the document in the authoring tree
+            	SiteTreeNode node=authoringTree.getNode(documentId);
+            	
+            	if (node!=null && node.visibleInNav()) {
+                	// if we found it and it's visible, get it's parent
+            		SiteTreeNode parent=node.getParent();
+            		
+            		if (parent!=null && parent.visibleInNav()) {
+            			System.out.println("Node has parent");
+            			
+            			// get the children of the parent, which are
+            			// obviously siblings of the original node
+            			// check if the sibling is in the live tree,
+            			// and add it to the list to rewrite if it is
+            			SiteTreeNode[] siblings=parent.getChildren();
+            			for (int i=0; i<siblings.length; i++) {
+            				String relatedDocumentId=siblings[i].getAbsoluteId();
+            				
+            				SiteTreeNode liveNode=liveTree.getNode(relatedDocumentId);
+            				if (liveNode != null) {
+                				String rewriteUri="/"+publicationId+"/"+"live"+relatedDocumentId+".html";
+                				rewriteUris.add(rewriteUri);
+            				}
+            			}
+            			// it's safe to assume the parent node is in the live tree
+            			rewriteUris.add("/"+publicationId+"/"+"live"+parent.getAbsoluteId()+".html");
+            		} 
+            	} else {
+            		System.out.println("Node not visible");
+            	}
+            
+            int i=0;
+            String[] writeUris=new String[rewriteUris.size()];
+            
+            for (Iterator iter=rewriteUris.iterator(); iter.hasNext();) {
+            	Object obj=iter.next();
+            	if (String.class.isInstance(obj))
+            	writeUris[i++]=(String)obj;
+            }
+
+            // export the page we have been asked to export
+            export(new URL(serverURI), serverPort, publicationPath,
+                getParameters().getParameter(PublishingEnvironment.PARAMETER_EXPORT_PATH), writeUris,
+                getParameters().getParameter(PublishingEnvironment.PARAMETER_SUBSTITUTE_REGEXP),
+                getParameters().getParameter(PublishingEnvironment.PARAMETER_SUBSTITUTE_REPLACEMENT));
+            
+            String deleteUri="/"+publicationId+"/"+"live"+documentId+".html";
+                        
+            deactivate(new URL(serverURI), serverPort, publicationPath,
+                getParameters().getParameter(PublishingEnvironment.PARAMETER_EXPORT_PATH), deleteUri,
+                getParameters().getParameter(PublishingEnvironment.PARAMETER_SUBSTITUTE_REGEXP),
+                getParameters().getParameter(PublishingEnvironment.PARAMETER_SUBSTITUTE_REPLACEMENT));
+            
+        } catch (Exception e) {
+            throw new ExecutionException(e);
+        }
+    }
+
+}
diff -Nwurd /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/cms/publishing/StaticHTMLExporter.java _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/cms/publishing/StaticHTMLExporter.java
--- /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/cms/publishing/StaticHTMLExporter.java	2005-06-26 10:52:32.000000000 +1000
+++ _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/cms/publishing/StaticHTMLExporter.java	2005-12-08 23:37:49.000000000 +1100
@@ -21,43 +21,70 @@
 
 import java.io.File;
 import java.net.URL;
+import java.util.Iterator;
 import java.util.StringTokenizer;
+import java.util.HashSet;
+import java.util.Set;
 
 import org.apache.avalon.framework.parameters.Parameters;
+import org.apache.lenya.cms.publication.SiteTree;
+import org.apache.lenya.cms.publication.SiteTreeNode;
+import org.apache.lenya.cms.publication.file.FilePublication;
 import org.apache.lenya.cms.task.ExecutionException;
 import org.apache.log4j.Category;
 
-
 /**
- * This Exporter uses WGet to download HTML files from URIs and saves them. The Task parameters
- * are: <code><strong>server-uri</strong></code>: the server uri<br/>
- * <code><strong>server-port</strong></code>: the server port<br/>
+ * This Exporter uses WGet to download HTML files from URIs and saves them. The
+ * Task parameters are: <code><strong>server-uri</strong></code>: the server
+ * uri<br/> <code><strong>server-port</strong></code>: the server port<br/>
  * <code><strong>publication-id</strong></code>: the publication id<br/>
- * <code><strong>export-path-prefix</strong></code>: the path to save the files to<br/>
- * <code><strong>uris</strong></code>: a comma-separated list of uris to download (without server
- * + port)<br/>
- * <code><strong>substitute-regexp</strong></code>: a regular expression to substitute a part of
- * the path<br/>
+ * <code><strong>export-path-prefix</strong></code>: the path to save the
+ * files to<br/> <code><strong>uris</strong></code>: a comma-separated list
+ * of uris to download (without server + port)<br/>
+ * <code><strong>substitute-regexp</strong></code>: a regular expression to
+ * substitute a part of the path<br/>
  */
 public class StaticHTMLExporter extends AbstractExporter {
-    private static Category log = Category.getInstance(StaticHTMLExporter.class);
+	private static Category log = Category
+			.getInstance(StaticHTMLExporter.class);
+
     public static final String PARAMETER_URIS = "uris";
+	public static final String PARAMETER_SCHEDULER_URL = "scheduler.document-url";
+
+	public void export(URL serverURI, int serverPort, String publicationPath,
+			String exportPath, String[] uris, String substituteExpression,
+			String substituteReplacement) throws ExportException {
+		Set uriSet=new HashSet();
+		for (int i=0; i<uris.length; i++) {
+			uriSet.add(uris[i]);
+		}
+		export(serverURI, serverPort, publicationPath, exportPath, uriSet, substituteExpression, substituteReplacement);
+	}
 
     /**
      * DOCUMENT ME!
      *
-     * @param serverURI DOCUMENT ME!
-     * @param serverPort DOCUMENT ME!
-     * @param publicationPath DOCUMENT ME!
-     * @param exportPath DOCUMENT ME!
-     * @param uris DOCUMENT ME!
-     * @param substituteExpression DOCUMENT ME!
+	 * @param serverURI
+	 *            The hostname of the server which lenya is running on.
+	 * @param serverPort
+	 *            The port which lenya is running on
+	 * @param publicationPath
+	 *            The path which the publication can be found under
+	 * @param exportPath
+	 *            The path we want the publication exported to
+	 * @param uris
+	 *            The pages we want exported
+	 * @param substituteExpression
+	 *            What to find
+	 * @param substituteReplacement
+	 *            What to replace
      *
-     * @throws ExportException DOCUMENT ME!
+	 * @throws ExportException
+	 *             If anything doesn't work we throw one of these?
      */
-    public void export(URL serverURI, int serverPort, String publicationPath, String exportPath,
-        String[] uris, String substituteExpression, String substituteReplacement)
-        throws ExportException {
+	public void export(URL serverURI, int serverPort, String publicationPath,
+			String exportPath, Set uris, String substituteExpression,
+			String substituteReplacement) throws ExportException {
         try {
             String exportDirectory = publicationPath + exportPath;
 
@@ -65,72 +92,182 @@
                 exportDirectory = exportPath;
             }
 
-            log.info(".export(): Export directory: " + exportDirectory + " (" + publicationPath +
-                " , " + exportPath + ")");
+			log.info(".export(): Export directory: " + exportDirectory + " ("
+					+ publicationPath + " , " + exportPath + ")");
 
             org.apache.lenya.net.WGet wget = new org.apache.lenya.net.WGet();
             wget.setDirectoryPrefix(exportDirectory);
 
             String fullServerURI = serverURI + ":" + serverPort;
+            Set resources=new HashSet();
 
-            for (int i = 0; i < uris.length; i++) {
-                URL uri = new URL(fullServerURI + uris[i]);
+            for (Iterator iter=uris.iterator(); iter.hasNext();) {
+                URL uri = new URL(fullServerURI + ((String)iter.next()));
                 log.info(".export(): Export static HTML: " + uri);
 
                 wget.download(uri, substituteExpression, substituteReplacement);
+                resources.addAll(wget.getLinks(uri));
             }
+            wget.downloadResources(resources,substituteExpression, substituteReplacement);
         } catch (Exception e) {
             throw new ExportException(e);
         }
     }
 
     /**
-     * DOCUMENT ME!
+	 * This is called by org.apache.lenya.cms.task.TaskSequence when a use
+	 * clicks publish.
      *
-     * @param contextPath DOCUMENT ME!
+	 * @param contextPath
+	 *            The directory when the lenya webapp lives
      */
     public void execute(String contextPath) throws ExecutionException {
+		String publicationId = null;
+		String publicationPath = null;
+		String serverURI = null;
+		int serverPort = 0;
         try {
-            String publicationId = getParameters().getParameter(PARAMETER_PUBLICATION_ID);
+			publicationId = getParameters().getParameter(
+					PARAMETER_PUBLICATION_ID);
 
             Parameters taskParameters = new Parameters();
 
-            PublishingEnvironment environment = new PublishingEnvironment(contextPath, publicationId);
+			PublishingEnvironment environment = new PublishingEnvironment(
+					contextPath, publicationId);
 
             // read default parameters from PublishingEnvironment
-            taskParameters.setParameter(PublishingEnvironment.PARAMETER_EXPORT_PATH,
-                environment.getExportDirectory());
-            taskParameters.setParameter(PublishingEnvironment.PARAMETER_SUBSTITUTE_REGEXP,
+			taskParameters.setParameter(
+					PublishingEnvironment.PARAMETER_EXPORT_PATH, environment
+							.getExportDirectory());
+			taskParameters.setParameter(
+					PublishingEnvironment.PARAMETER_SUBSTITUTE_REGEXP,
                 environment.getSubstituteExpression());
-            taskParameters.setParameter(PublishingEnvironment.PARAMETER_SUBSTITUTE_REPLACEMENT,
+			taskParameters.setParameter(
+					PublishingEnvironment.PARAMETER_SUBSTITUTE_REPLACEMENT,
                 environment.getSubstituteReplacement());
 
             taskParameters.merge(getParameters());
             parameterize(taskParameters);
 
-            String publicationPath = PublishingEnvironment.getPublicationPath(contextPath,
-                    publicationId);
+			publicationPath = PublishingEnvironment.getPublicationPath(
+					contextPath, publicationId);
 
-            int serverPort = getParameters().getParameterAsInteger(PARAMETER_SERVER_PORT);
+			serverPort = getParameters().getParameterAsInteger(
+					PARAMETER_SERVER_PORT);
             log.debug(".execute(): Server Port: " + serverPort);
 
-            String serverURI = getParameters().getParameter(PARAMETER_SERVER_URI);
+			serverURI = getParameters().getParameter(PARAMETER_SERVER_URI);
+		} catch (Exception e) {
+			throw new ExecutionException(e);
+		}
 
+		// when scheduled export look for 'scheduler.document-url'
+		Set requestedUris = new HashSet();
+		try {
             String urisString = getParameters().getParameter(PARAMETER_URIS);
+
             StringTokenizer st = new StringTokenizer(urisString, ",");
-            String[] uris = new String[st.countTokens()];
-            int i = 0;
 
             while (st.hasMoreTokens()) {
-                uris[i++] = st.nextToken();
+				requestedUris.add(st.nextToken());
+			}
+		} catch (Exception e) {
+			System.out.println("StaticHTMLExported, couldn't find parameter: "+PARAMETER_URIS+" - "+e);
+		}
+		// if uris is empty, then we may be running from a scheduled published, as we didn't find the uris above.
+		if (requestedUris.isEmpty()) {
+			try {
+				String url=getParameters().getParameter(this.PARAMETER_SCHEDULER_URL);
+				requestedUris.add(url);
+			} catch (Exception e) {
+				System.out.println("StaticHTMLExported, couldn't find parameter: "+PARAMETER_SCHEDULER_URL+" - "+e);
+			}
             }
 
-            export(new URL(serverURI), serverPort, publicationPath,
-                getParameters().getParameter(PublishingEnvironment.PARAMETER_EXPORT_PATH), uris,
-                getParameters().getParameter(PublishingEnvironment.PARAMETER_SUBSTITUTE_REGEXP),
-                getParameters().getParameter(PublishingEnvironment.PARAMETER_SUBSTITUTE_REPLACEMENT));
+		FilePublication fp = null;
+		SiteTree tree = null;
+		try {
+			// find pages that are parents or siblings of this page
+			fp = new FilePublication(publicationId, contextPath);
+			tree = fp.getTree("live");
         } catch (Exception e) {
             throw new ExecutionException(e);
         }
+
+		Set relatedUris = new HashSet();
+
+		for (Iterator iter = requestedUris.iterator(); iter.hasNext();) {
+			String uri = (String) iter.next();
+			int endPubStartArea = uri.indexOf('/', 1);
+			int endAreaStartDoc = uri.indexOf('/', endPubStartArea + 1);
+			String documentIdPlusXML = uri.substring(endAreaStartDoc);
+			String documentId = documentIdPlusXML.substring(0,
+					documentIdPlusXML.indexOf('.'));
+			if (documentId.equals("/footer") || documentId.startsWith("/promo")) {
+				// rewrite everything when the footer changes
+				Set nodes = this.getAllTreeNodes(tree.getTopNodes());
+				for (Iterator innerIter = nodes.iterator(); innerIter.hasNext();) {
+					SiteTreeNode node = (SiteTreeNode) innerIter.next();
+					String nodeUri = "/" + publicationId + "/live"
+							+ node.getAbsoluteId() + ".html";
+					relatedUris.add(nodeUri);
+				}
+			} else {
+				SiteTreeNode node = tree.getNode(documentId);
+				if (node != null && node.visibleInNav()) {
+					SiteTreeNode parent = node.getParent();
+					if (parent != null && parent.visibleInNav()) {
+						System.out.println("Node has parent");
+						SiteTreeNode[] siblings = parent.getChildren();
+						for (int i = 0; i < siblings.length; i++) {
+							String relatedUri = "/" + publicationId + "/live"
+									+ siblings[i].getAbsoluteId() + ".html";
+							relatedUris.add(relatedUri);
+						}
+						relatedUris.add("/" + publicationId + "/live"
+								+ parent.getAbsoluteId() + ".html");
+					}
+				} else {
+					System.out.println("Node not visible");
+				}
+			}
+		}
+
+		Set joinedUris = new HashSet();
+		joinedUris.addAll(requestedUris);
+		joinedUris.addAll(relatedUris);
+		joinedUris.add("/" + publicationId + "/live/sitemap.html");
+
+		// export the page we have been asked to export
+		try {
+			export(
+					new URL(serverURI),
+					serverPort,
+					publicationPath,
+					getParameters().getParameter(
+							PublishingEnvironment.PARAMETER_EXPORT_PATH),
+					joinedUris,
+					getParameters().getParameter(
+							PublishingEnvironment.PARAMETER_SUBSTITUTE_REGEXP),
+					getParameters()
+							.getParameter(
+									PublishingEnvironment.PARAMETER_SUBSTITUTE_REPLACEMENT));
+
+		} catch (Exception e) {
+			throw new ExecutionException(e);
+		}
+	}
+
+	private Set getAllTreeNodes(SiteTreeNode[] nodes) {
+		Set result = new HashSet();
+
+		for (int i = 0; i < nodes.length; i++) {
+			result.add(nodes[i]);
+			if (nodes[i].getChildren() != null) {
+				result.addAll(this.getAllTreeNodes(nodes[i].getChildren()));
+			}
+		}
+
+		return result;
     }
 }
diff -Nwurd /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/net/WDelete.java _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/net/WDelete.java
--- /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/net/WDelete.java	1970-01-01 10:00:00.000000000 +1000
+++ _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/net/WDelete.java	2005-12-08 23:37:49.000000000 +1100
@@ -0,0 +1,62 @@
+/**
+ * 
+ */
+package org.apache.lenya.net;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+
+import org.apache.log4j.Category;
+
+/**
+ * Similar to wget, but deletes files instead of writing them :)
+ *
+ */
+public class WDelete {
+
+	static Category log = Category.getInstance(WGet.class);
+    String directory_prefix = null;
+
+	/**
+	 * 
+	 */
+	public WDelete() {
+        directory_prefix = System.getProperty("user.dir");
+	}
+
+	// the directory in which we will be deleting stuff
+    public void setDirectoryPrefix(String directory_prefix) {
+        this.directory_prefix = directory_prefix;
+    }
+
+    /**
+     * @param url The url of the resource to DELETE
+     * @param prefixSubstitute Regexp which shall be replaced
+     * @param substituteReplacement Replacement of the regexp
+     *
+     */
+    public void delete(URL url, String prefixSubstitute, String substituteReplacement)
+        throws IOException {
+        log.debug(".download(): " + url + " " + prefixSubstitute + " " + substituteReplacement);
+
+        File deleteThisFile=new File(deleteFileName(url, prefixSubstitute, substituteReplacement));
+        
+        if (deleteThisFile.delete()) {
+        	System.out.println("WDelete successfully deleted: "+deleteThisFile.getAbsolutePath());
+        } else {
+        	System.out.println("WDelete FAILED TO DELETE: "+deleteThisFile.getAbsolutePath());
+        	throw new IOException("Failed to delete"+deleteThisFile.getAbsolutePath());
+        }
+    }
+
+    /**
+     * @param url URL of resource, which has been downloaded and shall be DELETED
+     * @return Absolute substituted filename
+     */
+    public String deleteFileName(URL url, String prefixSubstitute, String substituteReplacement) {
+        File file = new File(directory_prefix + File.separator + url.getFile().toString().replaceAll(prefixSubstitute, substituteReplacement));
+       	return file.getAbsolutePath();
+    }
+
+}
diff -Nwurd /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/net/WGet.java _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/net/WGet.java
--- /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/net/WGet.java	2005-06-26 10:52:31.000000000 +1000
+++ _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/net/WGet.java	2005-12-08 23:37:49.000000000 +1100
@@ -28,17 +28,19 @@
 import java.net.HttpURLConnection;
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Set;
 
 import org.apache.log4j.Category;
 
-
 /**
  * Similar to the UNIX wget
  */
 public class WGet {
     static Category log = Category.getInstance(WGet.class);
+	
     String directory_prefix = null;
 
     /**
@@ -51,11 +53,13 @@
     /**
      * DOCUMENT ME!
      *
-     * @param args DOCUMENT ME!
+	 * @param args
+	 *            DOCUMENT ME!
      */
     public static void main(String[] args) {
         if (args.length == 0) {
-            System.out.println("Usage: org.apache.lenya.net.WGet [URL] -P/home/lenya/download");
+			System.out
+			.println("Usage: org.apache.lenya.net.WGet [URL] -P/home/lenya/download");
 
             return;
         }
@@ -65,11 +69,14 @@
 
             for (int i = 0; i < args.length; i++) {
                 if (args[i].indexOf("-P") == 0) {
-                    wget.setDirectoryPrefix(args[i].substring(2)); // -P/home/lenya/download, 2: remove "-P"
+					wget.setDirectoryPrefix(args[i].substring(2)); // -P/home/lenya/download,
+					// 2: remove
+					// "-P"
                 }
             }
 
-            byte[] response = wget.download(new URL(args[0]), "s/\\/lenya\\/oscom//g", "");
+			byte[] response = wget.download(new URL(args[0]),
+					"s/\\/lenya\\/oscom//g", "");
         } catch (MalformedURLException e) {
             System.err.println(e);
         } catch (Exception e) {
@@ -80,33 +87,42 @@
     /**
      * -P
      *
-     * @param directory_prefix DOCUMENT ME!
+	 * @param directory_prefix
+	 *            DOCUMENT ME!
      */
     public void setDirectoryPrefix(String directory_prefix) {
         this.directory_prefix = directory_prefix;
     }
 
     /**
-     * @param url The url of the resource to download
-     * @param prefixSubstitute Regexp which shall be replaced
-     * @param substituteReplacement Replacement of the regexp
+	 * @param url
+	 *            The url of the resource to download
+	 * @param prefixSubstitute
+	 *            Regexp which shall be replaced
+	 * @param substituteReplacement
+	 *            Replacement of the regexp
      *
      * @return bytes of downloaded resource
      *
-     * @throws IOException URL might not exist
+	 * @throws IOException
+	 *             URL might not exist
      */
-    public byte[] download(URL url, String prefixSubstitute, String substituteReplacement)
-        throws IOException {
-        log.debug(".download(): " + url + " " + prefixSubstitute + " " + substituteReplacement);
+	public byte[] download(URL url, String prefixSubstitute,
+			String substituteReplacement) throws IOException {
+		log.debug(".download(): " + url + " " + prefixSubstitute + " "
+				+ substituteReplacement);
 
-        return downloadUsingHttpClient(url, prefixSubstitute, substituteReplacement);
+		return downloadUsingHttpClient(url, prefixSubstitute,
+				substituteReplacement);
     }
 
     /**
      * DOCUMENT ME!
      *
-     * @param url DOCUMENT ME!
-     * @param prefixSubstitute DOCUMENT ME!
+	 * @param url
+	 *            DOCUMENT ME!
+	 * @param prefixSubstitute
+	 *            DOCUMENT ME!
      *
      * @return DOCUMENT ME!
      */
@@ -119,11 +135,13 @@
         try {
             sresponse = getResource(url);
 
-            File file = new File(createFileName(url, prefixSubstitute, substituteReplacement));
+			File file = new File(createFileName(url, prefixSubstitute,
+					substituteReplacement));
 
             saveToFile(file.getAbsolutePath(), sresponse);
 
-            substitutePrefix(file.getAbsolutePath(), prefixSubstitute, substituteReplacement);
+			substitutePrefix(file.getAbsolutePath(), prefixSubstitute,
+					substituteReplacement);
         } catch (MalformedURLException e) {
             log.error(".downloadUsingHttpClient(): ", e);
         } catch (FileNotFoundException e) {
@@ -131,44 +149,34 @@
         } catch (IOException e) {
             log.error(".downloadUsingHttpClient(): ", e);
         }
-
-        List links = null;
-
-        try {
-            links = getLinks(url);
-        } catch (IOException ioe) {
-            log.error(".downloadUsingHttpClient(): ", ioe);
+		return sresponse;
         }
 
-        if (links != null) {
+	public void downloadResources(Set links, String prefixSubstitute,
+			String substituteReplacement) {
             Iterator iterator = links.iterator();
 
             while (iterator.hasNext()) {
-                String link = (String) iterator.next();
-
                 try {
-                    URL child_url = new URL(org.apache.lenya.util.URLUtil.complete(url.toString(),
-                                link));
+				URL child_url = (URL) iterator.next();
 
                     byte[] child_sresponse = getResource(child_url);
-                    saveToFile(createFileName(child_url, prefixSubstitute, substituteReplacement),
-                        child_sresponse);
+				saveToFile(createFileName(child_url, prefixSubstitute,
+						substituteReplacement), child_sresponse);
                 } catch (Exception e) {
                     log.error(".downloadUsingHttpClient(): ", e);
                 }
             }
         }
 
-        return sresponse;
-    }
-
     /**
      *
      */
     public byte[] getResource(URL url) throws IOException {
         log.debug(".getResource(): " + url);
 
-        HttpURLConnection httpConnection = (HttpURLConnection) url.openConnection();
+		HttpURLConnection httpConnection = (HttpURLConnection) url
+		.openConnection();
         InputStream in = httpConnection.getInputStream();
         byte[] buffer = new byte[1024];
         int bytes_read;
@@ -179,6 +187,8 @@
         }
 
         byte[] sresponse = bufferOut.toByteArray();
+		bufferOut.close();
+		in.close();
         httpConnection.disconnect();
 
         return sresponse;
@@ -187,13 +197,14 @@
     /**
      *
      */
-    public List getLinks(URL url) throws IOException {
+	public Set getLinks(URL url) throws IOException {
         log.debug(".getLinks(): Get links from " + url);
 
         List links = null;
 
         try {
-            org.apache.lenya.util.HTML html = new org.apache.lenya.util.HTML(url.toString());
+			org.apache.lenya.util.HTML html = new org.apache.lenya.util.HTML(
+					url.toString());
             links = html.getImageSrcs(false);
             links.addAll(html.getLinkHRefs(false));
         } catch (Exception e) {
@@ -203,23 +214,38 @@
         if (links != null) {
             log.debug(".getLinks(): Number of links found: " + links.size());
         }
+		Set result = new HashSet();
 
-        return links;
+		for (Iterator iter = links.iterator(); iter.hasNext();) {
+			URL child_url = new URL(org.apache.lenya.util.URLUtil.complete(url
+					.toString(), (String) iter.next()));
+			result.add(child_url);
+		}
+		
+		return result;
     }
 
     /**
      * Substitute prefix, e.g. "/lenya/blog/live/" by "/"
      *
-     * @param filename Filename
-     * @param prefixSubstitute Prefix which shall be replaced
-     * @param substituteReplacement Prefix which is going to replace the original
+	 * @param filename
+	 *            Filename
+	 * @param prefixSubstitute
+	 *            Prefix which shall be replaced
+	 * @param substituteReplacement
+	 *            Prefix which is going to replace the original
      *
-     * @throws IOException DOCUMENT ME!
+	 * @throws IOException
+	 *             DOCUMENT ME!
      */
-    public void substitutePrefix(String filename, String prefixSubstitute, String substituteReplacement) throws IOException {
-        log.debug("Replace " + prefixSubstitute + " by " + substituteReplacement);
+	public void substitutePrefix(String filename, String prefixSubstitute,
+			String substituteReplacement) throws IOException {
+		log.debug("Replace " + prefixSubstitute + " by "
+				+ substituteReplacement);
 
-	org.apache.lenya.util.SED.replaceAll(new File(filename), escapeSlashes(prefixSubstitute), escapeSlashes(substituteReplacement));
+		org.apache.lenya.util.SED.replaceAll(new File(filename),
+				escapeSlashes(prefixSubstitute),
+				escapeSlashes(substituteReplacement));
     }
 
     /**
@@ -259,9 +285,11 @@
         File parent = new File(file.getParent());
 
         if (!parent.exists()) {
-            log.warn(".saveToFile(): Directory will be created: " + parent.getAbsolutePath());
+			log.warn(".saveToFile(): Directory will be created: "
+					+ parent.getAbsolutePath());
             parent.mkdirs();
         }
+		System.out.println("Writing to: " + file.getAbsolutePath());
 
         FileOutputStream out = new FileOutputStream(file.getAbsolutePath());
         out.write(bytes);
@@ -269,13 +297,17 @@
     }
 
     /**
-     * @param url URL of resource, which has been downloaded and shall be saved
+	 * @param url
+	 *            URL of resource, which has been downloaded and shall be saved
      * @return Absolute substituted filename
      */
-    public String createFileName(URL url, String prefixSubstitute, String substituteReplacement) {
-        File file = new File(directory_prefix + File.separator + url.getFile());
-
-        return file.getAbsolutePath().replaceAll(prefixSubstitute, substituteReplacement);
+	public String createFileName(URL url, String prefixSubstitute,
+			String substituteReplacement) {
+		File file = new File(directory_prefix
+				+ File.separator
+				+ url.getFile().toString().replaceAll(prefixSubstitute,
+						substituteReplacement));
+		return file.getAbsolutePath();
     }
 
     /**
@@ -292,10 +324,11 @@
         while ((bytes_read = in.read(buffer)) != -1) {
             baout.write(buffer, 0, bytes_read);
         }
+		in.close();
 
         if (baout.toString().length() > 0) {
-            log.debug(".runProcess(): %%%InputStream:START" + baout.toString() +
-                "END:InputStream%%%");
+			log.debug(".runProcess(): %%%InputStream:START" + baout.toString()
+					+ "END:InputStream%%%");
         }
 
         java.io.InputStream in_e = process.getErrorStream();
@@ -304,12 +337,18 @@
         while ((bytes_read = in_e.read(buffer)) != -1) {
             baout_e.write(buffer, 0, bytes_read);
         }
+		in_e.close();
 
         if (baout_e.toString().length() > 0) {
-            log.error(".runProcess(): ###ErrorStream:START" + baout_e.toString() +
-                "END:ErrorStream###");
+			log.error(".runProcess(): ###ErrorStream:START"
+					+ baout_e.toString() + "END:ErrorStream###");
         }
+		baout_e.close();
 
-        return baout.toByteArray();
+		byte[] result = baout.toByteArray();
+		
+		baout.close();
+		
+		return result;
     }
 }
diff -Nwurd /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/util/HTMLHandler.java _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/util/HTMLHandler.java
--- /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/util/HTMLHandler.java	2005-06-26 10:52:32.000000000 +1000
+++ _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/util/HTMLHandler.java	2005-12-08 23:37:49.000000000 +1100
@@ -19,32 +19,71 @@
 
 package org.apache.lenya.util;
 
+import org.apache.lenya.cms.publication.URLInformation;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.IOException;
+import java.net.HttpURLConnection;
 import java.util.ArrayList;
+import java.util.Iterator;
+import java.net.URL;
+import java.net.MalformedURLException;
+import java.net.URLConnection;
 
 import javax.swing.text.MutableAttributeSet;
 import javax.swing.text.html.HTML;
 import javax.swing.text.html.HTML.Tag;
 import javax.swing.text.html.HTMLEditorKit.ParserCallback;
 
-import org.apache.log4j.Category;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import org.xml.sax.SAXException;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
 
+import org.apache.log4j.Category;
 
 /**
  * DOCUMENT ME!
  */
 public class HTMLHandler extends ParserCallback {
     Category log = Category.getInstance(HTMLHandler.class);
+	
+	private String uri;
+	
     private ArrayList img_src;
+	
     private ArrayList img_src_all;
+	
     private ArrayList a_href;
+	
     private ArrayList a_href_all;
+	
     private ArrayList link_href;
+	
     private ArrayList link_href_all;
 
+	protected void addImageSrc(String src) {
+		if (src != null) {
+			img_src_all.add(src);
+			
+			if (!img_src.contains(src)) {
+				img_src.add(src);
+			}
+		}
+	}
+	
     /**
      * Creates a new HTMLHandler object.
      */
-    public HTMLHandler() {
+	public HTMLHandler(String uri) {
+		this.uri = uri;
         img_src_all = new ArrayList();
         img_src = new ArrayList();
         a_href_all = new ArrayList();
@@ -105,6 +144,29 @@
                 }
             }
         }
+		
+		// looking for a flash param tag
+		if (tag.equals(HTML.Tag.PARAM)) {
+			String name = (String) attributes.getAttribute(HTML.Attribute.NAME);
+			System.out.println("%%% Found a param tag, name = " + name);
+			
+			if (name.equals("FlashVars")) {
+				String value = (String) attributes
+				.getAttribute(HTML.Attribute.VALUE);
+				System.out.println("$$$ Found FlashVars, value = " + value);
+				
+				String xmlFile = value.substring(value.indexOf('=') + 1, value
+						.length());
+				System.out.println("### xmlFile = " + xmlFile);
+				
+				link_href_all.add(xmlFile);
+				if (!link_href.contains(xmlFile)) {
+					link_href.add(xmlFile);
+				}
+				this.processGallery(xmlFile);
+				
+			}
+		}
     }
 
     /**
@@ -160,4 +222,43 @@
     public ArrayList getAllAHRefs() {
         return a_href_all;
     }
+	
+	private void processGallery(String surl) {
+		if (surl.startsWith("/")) {
+			String serverName = this.uri.substring(0, this.uri.indexOf('/', 8));
+			System.out.println(serverName);
+			try {
+				URL galUrl = new URL(serverName + surl);
+				URLConnection connection = galUrl.openConnection();
+				
+				DocumentBuilderFactory dbf = DocumentBuilderFactory
+				.newInstance();
+				DocumentBuilder dbuild = dbf.newDocumentBuilder();
+				
+				InputStream is=connection.getInputStream();
+				Document doc = dbuild.parse(is);
+				is.close();
+				
+				NodeList nodes=doc.getElementsByTagName("image");
+				
+				for (int i=0; i<nodes.getLength(); i++) {
+					Element elephant=(Element) nodes.item(i);
+					String source=elephant.getAttribute("source");
+					String thumb=elephant.getAttribute("thumb");
+					this.img_src_all.add(source);
+					this.img_src_all.add(thumb);
+					if (!this.img_src.contains(source)) this.img_src.add(source);
+					if (!this.img_src.contains(thumb)) this.img_src.add(thumb);
+				}
+			} catch (MalformedURLException mfurle) {
+				System.out.println(mfurle);
+			} catch (IOException ioe) {
+				System.out.println(ioe);
+			} catch (ParserConfigurationException pce) {
+				System.out.println(pce);
+			} catch (SAXException saxe) {
+				System.out.println(saxe);
+			}
+		}
+	}
 }
diff -Nwurd /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/util/HTML.java _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/util/HTML.java
--- /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/util/HTML.java	2005-06-26 10:52:33.000000000 +1000
+++ _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/util/HTML.java	2005-12-08 23:37:49.000000000 +1100
@@ -45,9 +45,12 @@
      * @throws IOException DOCUMENT ME!
      */
     public HTML(String uri) throws IOException {
+    	System.out.println("HTML Constructor uri = "+uri);
         ParserDelegator pd = new ParserDelegator();
-        htmlHandler = new HTMLHandler();
-        pd.parse(getReader(uri), htmlHandler, true);
+        htmlHandler = new HTMLHandler(uri);
+        Reader rdr=this.getReader(uri);
+        pd.parse(rdr, htmlHandler, true);
+        rdr.close();
     }
 
     /**
diff -Nwurd /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/util/SED.java _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/util/SED.java
--- /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/util/SED.java	2005-06-26 10:52:31.000000000 +1000
+++ _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/util/SED.java	2005-12-08 23:37:49.000000000 +1100
@@ -90,5 +90,6 @@
         ps.print(outString);
         ps.close();
         fos.close();
+        fis.close();
     }
 }

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to