On Thursday 08 December 2005 23:48, Roy, Ansuman wrote:
> I would be very grateful to you michael for this.
> Actually i tried to fix this myself but failed
> bitterly.
>
Attached is a patch with changes I made to statichtmlexporter, I also created
a statichtmldeactivator and associated wdelete class.
As usual, I don't guaranteee the patch will apply or compile, it's more as a
guide on how to accomplish this yourself :)
Michael
diff -Nwurd /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/cms/publishing/StaticHTMLDeactivator.java _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/cms/publishing/StaticHTMLDeactivator.java
--- /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/cms/publishing/StaticHTMLDeactivator.java 1970-01-01 10:00:00.000000000 +1000
+++ _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/cms/publishing/StaticHTMLDeactivator.java 2005-12-08 23:37:49.000000000 +1100
@@ -0,0 +1,172 @@
+/**
+ *
+ */
+package org.apache.lenya.cms.publishing;
+
+import java.io.File;
+import java.net.URL;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+import java.util.StringTokenizer;
+
+import org.apache.avalon.framework.parameters.Parameters;
+import org.apache.lenya.cms.publication.SiteTree;
+import org.apache.lenya.cms.publication.SiteTreeNode;
+import org.apache.lenya.cms.publication.file.FilePublication;
+import org.apache.lenya.cms.task.ExecutionException;
+import org.apache.log4j.Category;
+
+/**
+ * This class will export any page which is a parent or sibling to the page being deactivated.
+ */
+public class StaticHTMLDeactivator extends StaticHTMLExporter {
+
+ private static Category log = Category.getInstance(StaticHTMLExporter.class);
+ public static final String PARAMETER_DOCUMENT_ID = "document-id";
+
+ /**
+ * This method is the complement of export,
+ * it will delete files from the destination which has been deactivated.
+ *
+ * @param serverURI The hostname of the server which lenya is running on.
+ * @param serverPort The port which lenya is running on
+ * @param publicationPath The path which the publication can be found under
+ * @param exportPath The path we want the publication exported to
+ * @param uris The pages we want deactivated
+ * @param substituteExpression What to find
+ * @param substituteReplacement What to replace
+ *
+ * @throws ExportException If anything doesn't work we throw one of these?
+ */
+ public void deactivate(URL serverURI, int serverPort, String publicationPath, String exportPath, String uri, String substituteExpression, String substituteReplacement)
+ throws ExportException {
+ try {
+ String exportDirectory = publicationPath + exportPath;
+
+ if (new File(exportPath).isAbsolute()) {
+ exportDirectory = exportPath;
+ }
+
+ log.info(".export(): Export directory: " + exportDirectory + " (" + publicationPath +
+ " , " + exportPath + ")");
+
+ org.apache.lenya.net.WDelete wdelete = new org.apache.lenya.net.WDelete();
+ wdelete.setDirectoryPrefix(exportDirectory);
+
+ String fullServerURI = serverURI + ":" + serverPort;
+
+ URL url = new URL(fullServerURI + uri);
+ log.info(".export(): Export static HTML: " + uri);
+
+ wdelete.delete(url, substituteExpression, substituteReplacement);
+ } catch (Exception e) {
+ throw new ExportException(e);
+ }
+ }
+
+
+ /**
+ * This is called by org.apache.lenya.cms.task.TaskSequence when a user deactivate.
+ *
+ * @param contextPath The directory when the lenya webapp lives
+ */
+ public void execute(String contextPath) throws ExecutionException {
+ try {
+ String publicationId = getParameters().getParameter(PARAMETER_PUBLICATION_ID);
+
+ Parameters taskParameters = new Parameters();
+
+ PublishingEnvironment environment = new PublishingEnvironment(contextPath, publicationId);
+
+ // read default parameters from PublishingEnvironment
+ taskParameters.setParameter(PublishingEnvironment.PARAMETER_EXPORT_PATH,
+ environment.getExportDirectory());
+ taskParameters.setParameter(PublishingEnvironment.PARAMETER_SUBSTITUTE_REGEXP,
+ environment.getSubstituteExpression());
+ taskParameters.setParameter(PublishingEnvironment.PARAMETER_SUBSTITUTE_REPLACEMENT,
+ environment.getSubstituteReplacement());
+
+ taskParameters.merge(getParameters());
+ parameterize(taskParameters);
+
+ String publicationPath = PublishingEnvironment.getPublicationPath(contextPath,
+ publicationId);
+
+ int serverPort = getParameters().getParameterAsInteger(PARAMETER_SERVER_PORT);
+ log.debug(".execute(): Server Port: " + serverPort);
+
+ String serverURI = getParameters().getParameter(PARAMETER_SERVER_URI);
+
+ String documentId = getParameters().getParameter(PARAMETER_DOCUMENT_ID);
+
+ // find pages that are parents or siblings of this page
+ // we look for them in the authoring tree and then check if they are also in the live tree
+ // we rewrite pages in the live tree so they no longer have links to the removed page.
+ FilePublication fp=new FilePublication(publicationId,contextPath);
+ SiteTree authoringTree=fp.getTree("authoring");
+ SiteTree liveTree=fp.getTree("live");
+
+ Set rewriteUris = new HashSet();
+
+
+
+ // look up the document in the authoring tree
+ SiteTreeNode node=authoringTree.getNode(documentId);
+
+ if (node!=null && node.visibleInNav()) {
+ // if we found it and it's visible, get it's parent
+ SiteTreeNode parent=node.getParent();
+
+ if (parent!=null && parent.visibleInNav()) {
+ System.out.println("Node has parent");
+
+ // get the children of the parent, which are
+ // obviously siblings of the original node
+ // check if the sibling is in the live tree,
+ // and add it to the list to rewrite if it is
+ SiteTreeNode[] siblings=parent.getChildren();
+ for (int i=0; i<siblings.length; i++) {
+ String relatedDocumentId=siblings[i].getAbsoluteId();
+
+ SiteTreeNode liveNode=liveTree.getNode(relatedDocumentId);
+ if (liveNode != null) {
+ String rewriteUri="/"+publicationId+"/"+"live"+relatedDocumentId+".html";
+ rewriteUris.add(rewriteUri);
+ }
+ }
+ // it's safe to assume the parent node is in the live tree
+ rewriteUris.add("/"+publicationId+"/"+"live"+parent.getAbsoluteId()+".html");
+ }
+ } else {
+ System.out.println("Node not visible");
+ }
+
+ int i=0;
+ String[] writeUris=new String[rewriteUris.size()];
+
+ for (Iterator iter=rewriteUris.iterator(); iter.hasNext();) {
+ Object obj=iter.next();
+ if (String.class.isInstance(obj))
+ writeUris[i++]=(String)obj;
+ }
+
+ // export the page we have been asked to export
+ export(new URL(serverURI), serverPort, publicationPath,
+ getParameters().getParameter(PublishingEnvironment.PARAMETER_EXPORT_PATH), writeUris,
+ getParameters().getParameter(PublishingEnvironment.PARAMETER_SUBSTITUTE_REGEXP),
+ getParameters().getParameter(PublishingEnvironment.PARAMETER_SUBSTITUTE_REPLACEMENT));
+
+ String deleteUri="/"+publicationId+"/"+"live"+documentId+".html";
+
+ deactivate(new URL(serverURI), serverPort, publicationPath,
+ getParameters().getParameter(PublishingEnvironment.PARAMETER_EXPORT_PATH), deleteUri,
+ getParameters().getParameter(PublishingEnvironment.PARAMETER_SUBSTITUTE_REGEXP),
+ getParameters().getParameter(PublishingEnvironment.PARAMETER_SUBSTITUTE_REPLACEMENT));
+
+ } catch (Exception e) {
+ throw new ExecutionException(e);
+ }
+ }
+
+}
diff -Nwurd /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/cms/publishing/StaticHTMLExporter.java _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/cms/publishing/StaticHTMLExporter.java
--- /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/cms/publishing/StaticHTMLExporter.java 2005-06-26 10:52:32.000000000 +1000
+++ _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/cms/publishing/StaticHTMLExporter.java 2005-12-08 23:37:49.000000000 +1100
@@ -21,43 +21,70 @@
import java.io.File;
import java.net.URL;
+import java.util.Iterator;
import java.util.StringTokenizer;
+import java.util.HashSet;
+import java.util.Set;
import org.apache.avalon.framework.parameters.Parameters;
+import org.apache.lenya.cms.publication.SiteTree;
+import org.apache.lenya.cms.publication.SiteTreeNode;
+import org.apache.lenya.cms.publication.file.FilePublication;
import org.apache.lenya.cms.task.ExecutionException;
import org.apache.log4j.Category;
-
/**
- * This Exporter uses WGet to download HTML files from URIs and saves them. The Task parameters
- * are: <code><strong>server-uri</strong></code>: the server uri<br/>
- * <code><strong>server-port</strong></code>: the server port<br/>
+ * This Exporter uses WGet to download HTML files from URIs and saves them. The
+ * Task parameters are: <code><strong>server-uri</strong></code>: the server
+ * uri<br/> <code><strong>server-port</strong></code>: the server port<br/>
* <code><strong>publication-id</strong></code>: the publication id<br/>
- * <code><strong>export-path-prefix</strong></code>: the path to save the files to<br/>
- * <code><strong>uris</strong></code>: a comma-separated list of uris to download (without server
- * + port)<br/>
- * <code><strong>substitute-regexp</strong></code>: a regular expression to substitute a part of
- * the path<br/>
+ * <code><strong>export-path-prefix</strong></code>: the path to save the
+ * files to<br/> <code><strong>uris</strong></code>: a comma-separated list
+ * of uris to download (without server + port)<br/>
+ * <code><strong>substitute-regexp</strong></code>: a regular expression to
+ * substitute a part of the path<br/>
*/
public class StaticHTMLExporter extends AbstractExporter {
- private static Category log = Category.getInstance(StaticHTMLExporter.class);
+ private static Category log = Category
+ .getInstance(StaticHTMLExporter.class);
+
public static final String PARAMETER_URIS = "uris";
+ public static final String PARAMETER_SCHEDULER_URL = "scheduler.document-url";
+
+ public void export(URL serverURI, int serverPort, String publicationPath,
+ String exportPath, String[] uris, String substituteExpression,
+ String substituteReplacement) throws ExportException {
+ Set uriSet=new HashSet();
+ for (int i=0; i<uris.length; i++) {
+ uriSet.add(uris[i]);
+ }
+ export(serverURI, serverPort, publicationPath, exportPath, uriSet, substituteExpression, substituteReplacement);
+ }
/**
* DOCUMENT ME!
*
- * @param serverURI DOCUMENT ME!
- * @param serverPort DOCUMENT ME!
- * @param publicationPath DOCUMENT ME!
- * @param exportPath DOCUMENT ME!
- * @param uris DOCUMENT ME!
- * @param substituteExpression DOCUMENT ME!
+ * @param serverURI
+ * The hostname of the server which lenya is running on.
+ * @param serverPort
+ * The port which lenya is running on
+ * @param publicationPath
+ * The path which the publication can be found under
+ * @param exportPath
+ * The path we want the publication exported to
+ * @param uris
+ * The pages we want exported
+ * @param substituteExpression
+ * What to find
+ * @param substituteReplacement
+ * What to replace
*
- * @throws ExportException DOCUMENT ME!
+ * @throws ExportException
+ * If anything doesn't work we throw one of these?
*/
- public void export(URL serverURI, int serverPort, String publicationPath, String exportPath,
- String[] uris, String substituteExpression, String substituteReplacement)
- throws ExportException {
+ public void export(URL serverURI, int serverPort, String publicationPath,
+ String exportPath, Set uris, String substituteExpression,
+ String substituteReplacement) throws ExportException {
try {
String exportDirectory = publicationPath + exportPath;
@@ -65,72 +92,182 @@
exportDirectory = exportPath;
}
- log.info(".export(): Export directory: " + exportDirectory + " (" + publicationPath +
- " , " + exportPath + ")");
+ log.info(".export(): Export directory: " + exportDirectory + " ("
+ + publicationPath + " , " + exportPath + ")");
org.apache.lenya.net.WGet wget = new org.apache.lenya.net.WGet();
wget.setDirectoryPrefix(exportDirectory);
String fullServerURI = serverURI + ":" + serverPort;
+ Set resources=new HashSet();
- for (int i = 0; i < uris.length; i++) {
- URL uri = new URL(fullServerURI + uris[i]);
+ for (Iterator iter=uris.iterator(); iter.hasNext();) {
+ URL uri = new URL(fullServerURI + ((String)iter.next()));
log.info(".export(): Export static HTML: " + uri);
wget.download(uri, substituteExpression, substituteReplacement);
+ resources.addAll(wget.getLinks(uri));
}
+ wget.downloadResources(resources,substituteExpression, substituteReplacement);
} catch (Exception e) {
throw new ExportException(e);
}
}
/**
- * DOCUMENT ME!
+ * This is called by org.apache.lenya.cms.task.TaskSequence when a use
+ * clicks publish.
*
- * @param contextPath DOCUMENT ME!
+ * @param contextPath
+ * The directory when the lenya webapp lives
*/
public void execute(String contextPath) throws ExecutionException {
+ String publicationId = null;
+ String publicationPath = null;
+ String serverURI = null;
+ int serverPort = 0;
try {
- String publicationId = getParameters().getParameter(PARAMETER_PUBLICATION_ID);
+ publicationId = getParameters().getParameter(
+ PARAMETER_PUBLICATION_ID);
Parameters taskParameters = new Parameters();
- PublishingEnvironment environment = new PublishingEnvironment(contextPath, publicationId);
+ PublishingEnvironment environment = new PublishingEnvironment(
+ contextPath, publicationId);
// read default parameters from PublishingEnvironment
- taskParameters.setParameter(PublishingEnvironment.PARAMETER_EXPORT_PATH,
- environment.getExportDirectory());
- taskParameters.setParameter(PublishingEnvironment.PARAMETER_SUBSTITUTE_REGEXP,
+ taskParameters.setParameter(
+ PublishingEnvironment.PARAMETER_EXPORT_PATH, environment
+ .getExportDirectory());
+ taskParameters.setParameter(
+ PublishingEnvironment.PARAMETER_SUBSTITUTE_REGEXP,
environment.getSubstituteExpression());
- taskParameters.setParameter(PublishingEnvironment.PARAMETER_SUBSTITUTE_REPLACEMENT,
+ taskParameters.setParameter(
+ PublishingEnvironment.PARAMETER_SUBSTITUTE_REPLACEMENT,
environment.getSubstituteReplacement());
taskParameters.merge(getParameters());
parameterize(taskParameters);
- String publicationPath = PublishingEnvironment.getPublicationPath(contextPath,
- publicationId);
+ publicationPath = PublishingEnvironment.getPublicationPath(
+ contextPath, publicationId);
- int serverPort = getParameters().getParameterAsInteger(PARAMETER_SERVER_PORT);
+ serverPort = getParameters().getParameterAsInteger(
+ PARAMETER_SERVER_PORT);
log.debug(".execute(): Server Port: " + serverPort);
- String serverURI = getParameters().getParameter(PARAMETER_SERVER_URI);
+ serverURI = getParameters().getParameter(PARAMETER_SERVER_URI);
+ } catch (Exception e) {
+ throw new ExecutionException(e);
+ }
+ // when scheduled export look for 'scheduler.document-url'
+ Set requestedUris = new HashSet();
+ try {
String urisString = getParameters().getParameter(PARAMETER_URIS);
+
StringTokenizer st = new StringTokenizer(urisString, ",");
- String[] uris = new String[st.countTokens()];
- int i = 0;
while (st.hasMoreTokens()) {
- uris[i++] = st.nextToken();
+ requestedUris.add(st.nextToken());
+ }
+ } catch (Exception e) {
+ System.out.println("StaticHTMLExported, couldn't find parameter: "+PARAMETER_URIS+" - "+e);
+ }
+ // if uris is empty, then we may be running from a scheduled published, as we didn't find the uris above.
+ if (requestedUris.isEmpty()) {
+ try {
+ String url=getParameters().getParameter(this.PARAMETER_SCHEDULER_URL);
+ requestedUris.add(url);
+ } catch (Exception e) {
+ System.out.println("StaticHTMLExported, couldn't find parameter: "+PARAMETER_SCHEDULER_URL+" - "+e);
+ }
}
- export(new URL(serverURI), serverPort, publicationPath,
- getParameters().getParameter(PublishingEnvironment.PARAMETER_EXPORT_PATH), uris,
- getParameters().getParameter(PublishingEnvironment.PARAMETER_SUBSTITUTE_REGEXP),
- getParameters().getParameter(PublishingEnvironment.PARAMETER_SUBSTITUTE_REPLACEMENT));
+ FilePublication fp = null;
+ SiteTree tree = null;
+ try {
+ // find pages that are parents or siblings of this page
+ fp = new FilePublication(publicationId, contextPath);
+ tree = fp.getTree("live");
} catch (Exception e) {
throw new ExecutionException(e);
}
+
+ Set relatedUris = new HashSet();
+
+ for (Iterator iter = requestedUris.iterator(); iter.hasNext();) {
+ String uri = (String) iter.next();
+ int endPubStartArea = uri.indexOf('/', 1);
+ int endAreaStartDoc = uri.indexOf('/', endPubStartArea + 1);
+ String documentIdPlusXML = uri.substring(endAreaStartDoc);
+ String documentId = documentIdPlusXML.substring(0,
+ documentIdPlusXML.indexOf('.'));
+ if (documentId.equals("/footer") || documentId.startsWith("/promo")) {
+ // rewrite everything when the footer changes
+ Set nodes = this.getAllTreeNodes(tree.getTopNodes());
+ for (Iterator innerIter = nodes.iterator(); innerIter.hasNext();) {
+ SiteTreeNode node = (SiteTreeNode) innerIter.next();
+ String nodeUri = "/" + publicationId + "/live"
+ + node.getAbsoluteId() + ".html";
+ relatedUris.add(nodeUri);
+ }
+ } else {
+ SiteTreeNode node = tree.getNode(documentId);
+ if (node != null && node.visibleInNav()) {
+ SiteTreeNode parent = node.getParent();
+ if (parent != null && parent.visibleInNav()) {
+ System.out.println("Node has parent");
+ SiteTreeNode[] siblings = parent.getChildren();
+ for (int i = 0; i < siblings.length; i++) {
+ String relatedUri = "/" + publicationId + "/live"
+ + siblings[i].getAbsoluteId() + ".html";
+ relatedUris.add(relatedUri);
+ }
+ relatedUris.add("/" + publicationId + "/live"
+ + parent.getAbsoluteId() + ".html");
+ }
+ } else {
+ System.out.println("Node not visible");
+ }
+ }
+ }
+
+ Set joinedUris = new HashSet();
+ joinedUris.addAll(requestedUris);
+ joinedUris.addAll(relatedUris);
+ joinedUris.add("/" + publicationId + "/live/sitemap.html");
+
+ // export the page we have been asked to export
+ try {
+ export(
+ new URL(serverURI),
+ serverPort,
+ publicationPath,
+ getParameters().getParameter(
+ PublishingEnvironment.PARAMETER_EXPORT_PATH),
+ joinedUris,
+ getParameters().getParameter(
+ PublishingEnvironment.PARAMETER_SUBSTITUTE_REGEXP),
+ getParameters()
+ .getParameter(
+ PublishingEnvironment.PARAMETER_SUBSTITUTE_REPLACEMENT));
+
+ } catch (Exception e) {
+ throw new ExecutionException(e);
+ }
+ }
+
+ private Set getAllTreeNodes(SiteTreeNode[] nodes) {
+ Set result = new HashSet();
+
+ for (int i = 0; i < nodes.length; i++) {
+ result.add(nodes[i]);
+ if (nodes[i].getChildren() != null) {
+ result.addAll(this.getAllTreeNodes(nodes[i].getChildren()));
+ }
+ }
+
+ return result;
}
}
diff -Nwurd /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/net/WDelete.java _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/net/WDelete.java
--- /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/net/WDelete.java 1970-01-01 10:00:00.000000000 +1000
+++ _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/net/WDelete.java 2005-12-08 23:37:49.000000000 +1100
@@ -0,0 +1,62 @@
+/**
+ *
+ */
+package org.apache.lenya.net;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+
+import org.apache.log4j.Category;
+
+/**
+ * Similar to wget, but deletes files instead of writing them :)
+ *
+ */
+public class WDelete {
+
+ static Category log = Category.getInstance(WGet.class);
+ String directory_prefix = null;
+
+ /**
+ *
+ */
+ public WDelete() {
+ directory_prefix = System.getProperty("user.dir");
+ }
+
+ // the directory in which we will be deleting stuff
+ public void setDirectoryPrefix(String directory_prefix) {
+ this.directory_prefix = directory_prefix;
+ }
+
+ /**
+ * @param url The url of the resource to DELETE
+ * @param prefixSubstitute Regexp which shall be replaced
+ * @param substituteReplacement Replacement of the regexp
+ *
+ */
+ public void delete(URL url, String prefixSubstitute, String substituteReplacement)
+ throws IOException {
+ log.debug(".download(): " + url + " " + prefixSubstitute + " " + substituteReplacement);
+
+ File deleteThisFile=new File(deleteFileName(url, prefixSubstitute, substituteReplacement));
+
+ if (deleteThisFile.delete()) {
+ System.out.println("WDelete successfully deleted: "+deleteThisFile.getAbsolutePath());
+ } else {
+ System.out.println("WDelete FAILED TO DELETE: "+deleteThisFile.getAbsolutePath());
+ throw new IOException("Failed to delete"+deleteThisFile.getAbsolutePath());
+ }
+ }
+
+ /**
+ * @param url URL of resource, which has been downloaded and shall be DELETED
+ * @return Absolute substituted filename
+ */
+ public String deleteFileName(URL url, String prefixSubstitute, String substituteReplacement) {
+ File file = new File(directory_prefix + File.separator + url.getFile().toString().replaceAll(prefixSubstitute, substituteReplacement));
+ return file.getAbsolutePath();
+ }
+
+}
diff -Nwurd /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/net/WGet.java _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/net/WGet.java
--- /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/net/WGet.java 2005-06-26 10:52:31.000000000 +1000
+++ _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/net/WGet.java 2005-12-08 23:37:49.000000000 +1100
@@ -28,17 +28,19 @@
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
+import java.util.Set;
import org.apache.log4j.Category;
-
/**
* Similar to the UNIX wget
*/
public class WGet {
static Category log = Category.getInstance(WGet.class);
+
String directory_prefix = null;
/**
@@ -51,11 +53,13 @@
/**
* DOCUMENT ME!
*
- * @param args DOCUMENT ME!
+ * @param args
+ * DOCUMENT ME!
*/
public static void main(String[] args) {
if (args.length == 0) {
- System.out.println("Usage: org.apache.lenya.net.WGet [URL] -P/home/lenya/download");
+ System.out
+ .println("Usage: org.apache.lenya.net.WGet [URL] -P/home/lenya/download");
return;
}
@@ -65,11 +69,14 @@
for (int i = 0; i < args.length; i++) {
if (args[i].indexOf("-P") == 0) {
- wget.setDirectoryPrefix(args[i].substring(2)); // -P/home/lenya/download, 2: remove "-P"
+ wget.setDirectoryPrefix(args[i].substring(2)); // -P/home/lenya/download,
+ // 2: remove
+ // "-P"
}
}
- byte[] response = wget.download(new URL(args[0]), "s/\\/lenya\\/oscom//g", "");
+ byte[] response = wget.download(new URL(args[0]),
+ "s/\\/lenya\\/oscom//g", "");
} catch (MalformedURLException e) {
System.err.println(e);
} catch (Exception e) {
@@ -80,33 +87,42 @@
/**
* -P
*
- * @param directory_prefix DOCUMENT ME!
+ * @param directory_prefix
+ * DOCUMENT ME!
*/
public void setDirectoryPrefix(String directory_prefix) {
this.directory_prefix = directory_prefix;
}
/**
- * @param url The url of the resource to download
- * @param prefixSubstitute Regexp which shall be replaced
- * @param substituteReplacement Replacement of the regexp
+ * @param url
+ * The url of the resource to download
+ * @param prefixSubstitute
+ * Regexp which shall be replaced
+ * @param substituteReplacement
+ * Replacement of the regexp
*
* @return bytes of downloaded resource
*
- * @throws IOException URL might not exist
+ * @throws IOException
+ * URL might not exist
*/
- public byte[] download(URL url, String prefixSubstitute, String substituteReplacement)
- throws IOException {
- log.debug(".download(): " + url + " " + prefixSubstitute + " " + substituteReplacement);
+ public byte[] download(URL url, String prefixSubstitute,
+ String substituteReplacement) throws IOException {
+ log.debug(".download(): " + url + " " + prefixSubstitute + " "
+ + substituteReplacement);
- return downloadUsingHttpClient(url, prefixSubstitute, substituteReplacement);
+ return downloadUsingHttpClient(url, prefixSubstitute,
+ substituteReplacement);
}
/**
* DOCUMENT ME!
*
- * @param url DOCUMENT ME!
- * @param prefixSubstitute DOCUMENT ME!
+ * @param url
+ * DOCUMENT ME!
+ * @param prefixSubstitute
+ * DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
@@ -119,11 +135,13 @@
try {
sresponse = getResource(url);
- File file = new File(createFileName(url, prefixSubstitute, substituteReplacement));
+ File file = new File(createFileName(url, prefixSubstitute,
+ substituteReplacement));
saveToFile(file.getAbsolutePath(), sresponse);
- substitutePrefix(file.getAbsolutePath(), prefixSubstitute, substituteReplacement);
+ substitutePrefix(file.getAbsolutePath(), prefixSubstitute,
+ substituteReplacement);
} catch (MalformedURLException e) {
log.error(".downloadUsingHttpClient(): ", e);
} catch (FileNotFoundException e) {
@@ -131,44 +149,34 @@
} catch (IOException e) {
log.error(".downloadUsingHttpClient(): ", e);
}
-
- List links = null;
-
- try {
- links = getLinks(url);
- } catch (IOException ioe) {
- log.error(".downloadUsingHttpClient(): ", ioe);
+ return sresponse;
}
- if (links != null) {
+ public void downloadResources(Set links, String prefixSubstitute,
+ String substituteReplacement) {
Iterator iterator = links.iterator();
while (iterator.hasNext()) {
- String link = (String) iterator.next();
-
try {
- URL child_url = new URL(org.apache.lenya.util.URLUtil.complete(url.toString(),
- link));
+ URL child_url = (URL) iterator.next();
byte[] child_sresponse = getResource(child_url);
- saveToFile(createFileName(child_url, prefixSubstitute, substituteReplacement),
- child_sresponse);
+ saveToFile(createFileName(child_url, prefixSubstitute,
+ substituteReplacement), child_sresponse);
} catch (Exception e) {
log.error(".downloadUsingHttpClient(): ", e);
}
}
}
- return sresponse;
- }
-
/**
*
*/
public byte[] getResource(URL url) throws IOException {
log.debug(".getResource(): " + url);
- HttpURLConnection httpConnection = (HttpURLConnection) url.openConnection();
+ HttpURLConnection httpConnection = (HttpURLConnection) url
+ .openConnection();
InputStream in = httpConnection.getInputStream();
byte[] buffer = new byte[1024];
int bytes_read;
@@ -179,6 +187,8 @@
}
byte[] sresponse = bufferOut.toByteArray();
+ bufferOut.close();
+ in.close();
httpConnection.disconnect();
return sresponse;
@@ -187,13 +197,14 @@
/**
*
*/
- public List getLinks(URL url) throws IOException {
+ public Set getLinks(URL url) throws IOException {
log.debug(".getLinks(): Get links from " + url);
List links = null;
try {
- org.apache.lenya.util.HTML html = new org.apache.lenya.util.HTML(url.toString());
+ org.apache.lenya.util.HTML html = new org.apache.lenya.util.HTML(
+ url.toString());
links = html.getImageSrcs(false);
links.addAll(html.getLinkHRefs(false));
} catch (Exception e) {
@@ -203,23 +214,38 @@
if (links != null) {
log.debug(".getLinks(): Number of links found: " + links.size());
}
+ Set result = new HashSet();
- return links;
+ for (Iterator iter = links.iterator(); iter.hasNext();) {
+ URL child_url = new URL(org.apache.lenya.util.URLUtil.complete(url
+ .toString(), (String) iter.next()));
+ result.add(child_url);
+ }
+
+ return result;
}
/**
* Substitute prefix, e.g. "/lenya/blog/live/" by "/"
*
- * @param filename Filename
- * @param prefixSubstitute Prefix which shall be replaced
- * @param substituteReplacement Prefix which is going to replace the original
+ * @param filename
+ * Filename
+ * @param prefixSubstitute
+ * Prefix which shall be replaced
+ * @param substituteReplacement
+ * Prefix which is going to replace the original
*
- * @throws IOException DOCUMENT ME!
+ * @throws IOException
+ * DOCUMENT ME!
*/
- public void substitutePrefix(String filename, String prefixSubstitute, String substituteReplacement) throws IOException {
- log.debug("Replace " + prefixSubstitute + " by " + substituteReplacement);
+ public void substitutePrefix(String filename, String prefixSubstitute,
+ String substituteReplacement) throws IOException {
+ log.debug("Replace " + prefixSubstitute + " by "
+ + substituteReplacement);
- org.apache.lenya.util.SED.replaceAll(new File(filename), escapeSlashes(prefixSubstitute), escapeSlashes(substituteReplacement));
+ org.apache.lenya.util.SED.replaceAll(new File(filename),
+ escapeSlashes(prefixSubstitute),
+ escapeSlashes(substituteReplacement));
}
/**
@@ -259,9 +285,11 @@
File parent = new File(file.getParent());
if (!parent.exists()) {
- log.warn(".saveToFile(): Directory will be created: " + parent.getAbsolutePath());
+ log.warn(".saveToFile(): Directory will be created: "
+ + parent.getAbsolutePath());
parent.mkdirs();
}
+ System.out.println("Writing to: " + file.getAbsolutePath());
FileOutputStream out = new FileOutputStream(file.getAbsolutePath());
out.write(bytes);
@@ -269,13 +297,17 @@
}
/**
- * @param url URL of resource, which has been downloaded and shall be saved
+ * @param url
+ * URL of resource, which has been downloaded and shall be saved
* @return Absolute substituted filename
*/
- public String createFileName(URL url, String prefixSubstitute, String substituteReplacement) {
- File file = new File(directory_prefix + File.separator + url.getFile());
-
- return file.getAbsolutePath().replaceAll(prefixSubstitute, substituteReplacement);
+ public String createFileName(URL url, String prefixSubstitute,
+ String substituteReplacement) {
+ File file = new File(directory_prefix
+ + File.separator
+ + url.getFile().toString().replaceAll(prefixSubstitute,
+ substituteReplacement));
+ return file.getAbsolutePath();
}
/**
@@ -292,10 +324,11 @@
while ((bytes_read = in.read(buffer)) != -1) {
baout.write(buffer, 0, bytes_read);
}
+ in.close();
if (baout.toString().length() > 0) {
- log.debug(".runProcess(): %%%InputStream:START" + baout.toString() +
- "END:InputStream%%%");
+ log.debug(".runProcess(): %%%InputStream:START" + baout.toString()
+ + "END:InputStream%%%");
}
java.io.InputStream in_e = process.getErrorStream();
@@ -304,12 +337,18 @@
while ((bytes_read = in_e.read(buffer)) != -1) {
baout_e.write(buffer, 0, bytes_read);
}
+ in_e.close();
if (baout_e.toString().length() > 0) {
- log.error(".runProcess(): ###ErrorStream:START" + baout_e.toString() +
- "END:ErrorStream###");
+ log.error(".runProcess(): ###ErrorStream:START"
+ + baout_e.toString() + "END:ErrorStream###");
}
+ baout_e.close();
- return baout.toByteArray();
+ byte[] result = baout.toByteArray();
+
+ baout.close();
+
+ return result;
}
}
diff -Nwurd /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/util/HTMLHandler.java _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/util/HTMLHandler.java
--- /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/util/HTMLHandler.java 2005-06-26 10:52:32.000000000 +1000
+++ _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/util/HTMLHandler.java 2005-12-08 23:37:49.000000000 +1100
@@ -19,32 +19,71 @@
package org.apache.lenya.util;
+import org.apache.lenya.cms.publication.URLInformation;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.IOException;
+import java.net.HttpURLConnection;
import java.util.ArrayList;
+import java.util.Iterator;
+import java.net.URL;
+import java.net.MalformedURLException;
+import java.net.URLConnection;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTML.Tag;
import javax.swing.text.html.HTMLEditorKit.ParserCallback;
-import org.apache.log4j.Category;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import org.xml.sax.SAXException;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+import org.apache.log4j.Category;
/**
* DOCUMENT ME!
*/
public class HTMLHandler extends ParserCallback {
Category log = Category.getInstance(HTMLHandler.class);
+
+ private String uri;
+
private ArrayList img_src;
+
private ArrayList img_src_all;
+
private ArrayList a_href;
+
private ArrayList a_href_all;
+
private ArrayList link_href;
+
private ArrayList link_href_all;
+ protected void addImageSrc(String src) {
+ if (src != null) {
+ img_src_all.add(src);
+
+ if (!img_src.contains(src)) {
+ img_src.add(src);
+ }
+ }
+ }
+
/**
* Creates a new HTMLHandler object.
*/
- public HTMLHandler() {
+ public HTMLHandler(String uri) {
+ this.uri = uri;
img_src_all = new ArrayList();
img_src = new ArrayList();
a_href_all = new ArrayList();
@@ -105,6 +144,29 @@
}
}
}
+
+ // looking for a flash param tag
+ if (tag.equals(HTML.Tag.PARAM)) {
+ String name = (String) attributes.getAttribute(HTML.Attribute.NAME);
+ System.out.println("%%% Found a param tag, name = " + name);
+
+ if (name.equals("FlashVars")) {
+ String value = (String) attributes
+ .getAttribute(HTML.Attribute.VALUE);
+ System.out.println("$$$ Found FlashVars, value = " + value);
+
+ String xmlFile = value.substring(value.indexOf('=') + 1, value
+ .length());
+ System.out.println("### xmlFile = " + xmlFile);
+
+ link_href_all.add(xmlFile);
+ if (!link_href.contains(xmlFile)) {
+ link_href.add(xmlFile);
+ }
+ this.processGallery(xmlFile);
+
+ }
+ }
}
/**
@@ -160,4 +222,43 @@
public ArrayList getAllAHRefs() {
return a_href_all;
}
+
+ private void processGallery(String surl) {
+ if (surl.startsWith("/")) {
+ String serverName = this.uri.substring(0, this.uri.indexOf('/', 8));
+ System.out.println(serverName);
+ try {
+ URL galUrl = new URL(serverName + surl);
+ URLConnection connection = galUrl.openConnection();
+
+ DocumentBuilderFactory dbf = DocumentBuilderFactory
+ .newInstance();
+ DocumentBuilder dbuild = dbf.newDocumentBuilder();
+
+ InputStream is=connection.getInputStream();
+ Document doc = dbuild.parse(is);
+ is.close();
+
+ NodeList nodes=doc.getElementsByTagName("image");
+
+ for (int i=0; i<nodes.getLength(); i++) {
+ Element elephant=(Element) nodes.item(i);
+ String source=elephant.getAttribute("source");
+ String thumb=elephant.getAttribute("thumb");
+ this.img_src_all.add(source);
+ this.img_src_all.add(thumb);
+ if (!this.img_src.contains(source)) this.img_src.add(source);
+ if (!this.img_src.contains(thumb)) this.img_src.add(thumb);
+ }
+ } catch (MalformedURLException mfurle) {
+ System.out.println(mfurle);
+ } catch (IOException ioe) {
+ System.out.println(ioe);
+ } catch (ParserConfigurationException pce) {
+ System.out.println(pce);
+ } catch (SAXException saxe) {
+ System.out.println(saxe);
+ }
+ }
+ }
}
diff -Nwurd /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/util/HTML.java _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/util/HTML.java
--- /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/util/HTML.java 2005-06-26 10:52:33.000000000 +1000
+++ _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/util/HTML.java 2005-12-08 23:37:49.000000000 +1100
@@ -45,9 +45,12 @@
* @throws IOException DOCUMENT ME!
*/
public HTML(String uri) throws IOException {
+ System.out.println("HTML Constructor uri = "+uri);
ParserDelegator pd = new ParserDelegator();
- htmlHandler = new HTMLHandler();
- pd.parse(getReader(uri), htmlHandler, true);
+ htmlHandler = new HTMLHandler(uri);
+ Reader rdr=this.getReader(uri);
+ pd.parse(rdr, htmlHandler, true);
+ rdr.close();
}
/**
diff -Nwurd /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/util/SED.java _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/util/SED.java
--- /home/michael/Java/lenya-1.2.4/src/java/org/apache/lenya/util/SED.java 2005-06-26 10:52:31.000000000 +1000
+++ _home_michael_apache-lenya-1.2.4-ben_src_java/org/apache/lenya/util/SED.java 2005-12-08 23:37:49.000000000 +1100
@@ -90,5 +90,6 @@
ps.print(outString);
ps.close();
fos.close();
+ fis.close();
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]