Update of
/var/cvs/contributions/CMSContainer_Modules/staticdownload/src/java/com/finalist/cmsc/staticdownload
In directory james.mmbase.org:/tmp/cvs-serv14824
Modified Files:
DownloadThread.java
Log Message:
CMSC-1086,improve the static download module.using wget
See also:
http://cvs.mmbase.org/viewcvs/contributions/CMSContainer_Modules/staticdownload/src/java/com/finalist/cmsc/staticdownload
See also: http://www.mmbase.org/jira/browse/CMSC-1086
Index: DownloadThread.java
===================================================================
RCS file:
/var/cvs/contributions/CMSContainer_Modules/staticdownload/src/java/com/finalist/cmsc/staticdownload/DownloadThread.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -b -r1.8 -r1.9
--- DownloadThread.java 15 Jul 2008 10:24:55 -0000 1.8
+++ DownloadThread.java 14 Oct 2008 03:28:54 -0000 1.9
@@ -5,6 +5,7 @@
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
+import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FilenameFilter;
@@ -34,7 +35,7 @@
private static Log log = LogFactory.getLog(DownloadThread.class);
- private String url;
+ private String filePath;
private DownloadSettings downloadSettings;
private String currentFile;
private Exception exception;
@@ -46,6 +47,7 @@
private String webPath;
private String webappName = "";
private String[] suffix;
+ private static ArrayList<String> cssFiles = new ArrayList<String>();
private static final String[] EXCLUDE_FILES = new String[] { "admin",
"data", "editors", "htmlarea", "jsp", "META-INF",
"mmbase",
@@ -57,7 +59,7 @@
public DownloadThread(String url, DownloadSettings downloadSettings) {
super("Downloading " + url);
- this.url = url;
+ this.filePath = url;
this.downloadSettings = downloadSettings;
this.webappName = downloadSettings.getWebappName();
}
@@ -100,9 +102,11 @@
File file = new
File(downloadSettings.getTempPath());//get the files which wget last download
setupSuffix();
modifyDownloadPath();
+ downloadAssociatedCss(file,new FileNameFilter(".css",".html"));
findAssociatedFiles(file, new MyFilenameFilter(".css",
".js", ".html"));
redownload(redownloadfiles);
redownloadfiles.clear();//this will avoid to redownload
picture in different request
+ replaceAllLinks(file,new FileNameFilter(".html"));
zip();
} catch (Exception e) {
exception = e;
@@ -321,7 +325,7 @@
}
private void download() throws IOException {
- pubDownload(url);
+ pubDownload(filePath);
}
private void processErrors(String text) {
@@ -384,7 +388,7 @@
public final static String fixFlash(String string) {
int startIndex = 0;
while ((startIndex = string.indexOf("<object", startIndex)) !=
-1) {
- System.out.println("before >> " + string);
+
int endIndex = string.indexOf("</object", startIndex);
if (endIndex == -1) {
endIndex = string.length();
@@ -412,7 +416,6 @@
string = string.substring(0, swfStart[to]) +
swfString[from]
+ string.substring(swfEnd[to]);
- System.out.println("after >>> " + string);
startIndex = string.indexOf("</object", startIndex);
}
return string;
@@ -442,6 +445,32 @@
}
}
}
+ static class FileNameFilter implements FilenameFilter {
+
+ ArrayList<String> list = new ArrayList<String>();
+
+ public FileNameFilter(String... suffixs) {
+ for(String suffix:suffixs){
+ if(suffix != null && suffix.trim().length() != 0){
+ list.add(suffix);
+ }
+ }
+ }
+ public boolean accept(File dir, String name) {
+ if (new File(dir, name).isFile()) {
+ for(String suffix : list) {
+ if(name.endsWith(suffix)){
+ return true;
+ }
+ }
+ return false;
+ }
+ else {
+ return true;
+ }
+
+ }
+ }
private void spellPath(String targetPath, String directoryName) {
String regEx1 = "\\.\\.";
@@ -490,6 +519,95 @@
}
}
+ /**
+ * replace the Links
+ */
+ private void replaceAllLinks(File file,FileNameFilter fileNameFilter) {
+ if (!file.exists()) {
+ return;
+ }
+ File[] files = file.listFiles(fileNameFilter);
+ for (int i = 0; i < files.length; i++) {
+ String outStrings = "";
+ if (files[i].isFile()) {
+ BufferedReader reader = null;
+ try {
+ reader = new BufferedReader(new FileReader(files[i]));
+ String tempString = null;
+ while ((tempString = reader.readLine()) != null) {
+ String regex = "http://"+webPath+"";
+ Pattern p2 = Pattern.compile(regex);
+ Matcher matcher = p2.matcher(tempString);
+ if(matcher.find()){
+ String filePath = files[i].getAbsolutePath();
+ String liveUrl = downloadSettings.getLiveUrl();
+ filePath = regularReplace(filePath, "\\\\", "/");
+
+ String urls = "";
+ if(tempString.indexOf(liveUrl) > -1) {
+ String temp =
tempString.substring(tempString.indexOf("http://"+liveUrl)) ;
+ urls = temp.substring(0, temp.indexOf("\""));
+ }
+ else {
+ continue;
+ }
+ filePath =
filePath.substring(filePath.indexOf(liveUrl)+liveUrl.length()+1);
+ String path = filePath;
+ if(path.lastIndexOf("/") > -1) {
+ path = path.substring(0,path.lastIndexOf("/"));
+ }
+ else {
+ if(path.indexOf(".") > -1) {
+ path = "";
+ }
+ }
+ String[] grades = path.split("/");
+ String newUrl = urls.replace("http://"+webPath, "");
+ int iCount = grades.length;
+ if(grades != null && grades.length > 0) {
+ for(int j = 0 ; j < grades.length ; j++) {
+ if(grades[j] == null ||
grades[j].trim().length() == 0) {
+ iCount--;
+ }
+ if(grades[j] != null &&
grades[j].trim().length() > 0 && newUrl.startsWith(grades[j])) {
+
if(newUrl.indexOf(grades[j])+grades[j].length()+1 < newUrl.length()) {
+ newUrl =
newUrl.substring(newUrl.indexOf(grades[j])+grades[j].length()+1);
+ iCount--;
+ }
+ }
+ else{
+ break;
+ }
+ }
+ }
+
+ if(iCount > 0) {
+ for(int j = 0 ; j < iCount; j++) {
+ newUrl = "../"+newUrl;
+ }
+ }
+ if(newUrl.indexOf(".") == -1) {
+ newUrl += ".html";
+ }
+ tempString = tempString.replace(urls, newUrl);
+ log.info("#################
newUrl=========>"+newUrl);
+ }
+ outStrings += tempString + "\r\n";
+ }
+ writeFile(files[i], outStrings);
+ }
+ catch (FileNotFoundException e) {
+ log.info("File not found --->"+files[i].getName());
+ }
+ catch (IOException e) {
+ log.info("IOException-------->"+e.getMessage());
+ }
+
+ } else if (files[i].isDirectory()) {
+ replaceAllLinks(files[i], fileNameFilter);
+ }
+ }
+ }
/**Regular expressions for judging*/
private String regularJudge(String regEx, String targetString) {
String targetPath = "";
@@ -526,15 +644,19 @@
downloadPath = webPath + targetStringNew.substring(1);
} else {
downloadPath = webPath
- +
targetStringNew.substring(webappName.length() + 2);
+ + targetStringNew.substring(targetStringNew.indexOf(webappName)
+webappName.length()+ 1);
}
if (!redownloadfiles.contains(downloadPath))
redownloadfiles.add(downloadPath);
String filePath = file.getParent();
- int beginIndex = filePath.indexOf(getUrl());
+ String lurl = changeUrl(getUrl());
+ if(lurl.contains("/")){
+ lurl = regularReplace(lurl,"/","\\\\");
+ }
+ int beginIndex = filePath.indexOf(lurl);
String filePathIntercept = filePath.substring(beginIndex
- + getUrl().length());
+ + lurl.length());
String[] strs = filePathIntercept.split("\\\\");
int num = strs.length;
String docs = "";
@@ -566,7 +688,11 @@
private void addDownloadPath(String str, File file, String targetPath) {
String filepath = file.getAbsolutePath();
- String url = downloadSettings.getLiveUrl();
+ String url = changeUrl(getUrl());
+// String url = "web.finalist.hk\\finalist";
+ if(url.contains("/")){
+ url = regularReplace(url,"/","\\\\");
+ }
int tagStart = filepath.indexOf(url);
String u = filepath.substring(tagStart);
int tagEnd = u.indexOf("\\" + str);
@@ -577,7 +703,13 @@
if (!redownloadfiles.contains(str1))
redownloadfiles.add(str1);
}
-
+ private String changeUrl(String url){
+ String exUrl = url;
+ if(url.contains("//")){
+ exUrl = url.substring(url.indexOf("//")+2);
+ }
+ return exUrl;
+ }
/**
* Use regular expressions to get the paths of pictures
* and rewrite the path into the file
@@ -601,23 +733,23 @@
String fileParentPath =
file.getParent() + File.separator;
String directoryName = "";
if (StringUtils.isNotEmpty(webappName)
- && (tagStart =
fileParentPath.indexOf(webappName)) >= 0
+ && (tagStart = fileParentPath.indexOf("\\"+webappName))
>= 0
&&
fileParentPath.contains("js")) {
int tagEnd =
fileParentPath.indexOf("\\js", tagStart
- +
webappName.length() + 1);
+ + webappName.length() + 2);
if (tagEnd >= 0) {
directoryName =
fileParentPath.substring(tagStart
- +
webappName.length() + 1, tagEnd);
+ + webappName.length() + 2, tagEnd);
this.spellPath(targetPath, directoryName);
}
} else if
(StringUtils.isNotEmpty(webappName)
- && (tagStart =
fileParentPath.indexOf(webappName)) >= 0
+ && (tagStart = fileParentPath.indexOf("\\"+webappName))
>= 0
&&
fileParentPath.contains("css")) {
int tagEnd =
fileParentPath.indexOf("\\css", tagStart
- +
webappName.length() + 1);
+ + webappName.length() + 2);
if (tagEnd >= 0) {
directoryName =
fileParentPath.substring(tagStart
- +
webappName.length() + 1, tagEnd);
+ + webappName.length() + 2, tagEnd);
this.spellPath(targetPath, directoryName);
}
} else if
(StringUtils.isEmpty(webappName)
@@ -664,6 +796,13 @@
}
}
}
+ str = regularJudge("[\\w/_:.]*#top_anchor",tempString);
+ if(StringUtils.isNotEmpty(str)){
+ tempString = regularReplace(tempString,str,"#top_anchor");
+ }
+ if(tempString.indexOf(":80") > -1) {
+ tempString = tempString.replace(":80", "");
+ }
outputData += tempString + "\r\n";
}
} catch (IOException e) {
@@ -675,6 +814,64 @@
}
return outputData;
}
+ /**
+ * download associate css file. e.g. import css file using [EMAIL PROTECTED]
+ * @throws IOException
+ */
+ private void downloadAssociatedCss(File file,FileNameFilter fileNameFilter)
throws IOException{
+ recurFindCss(file,fileNameFilter);
+ redownload(cssFiles);
+ }
+
+
+ private void recurFindCss(File file,FileNameFilter fileNameFilter) {
+ if (!file.exists()) {
+ return;
+ }
+ File[] files = file.listFiles(fileNameFilter);
+ for (int i = 0; i < files.length; i++) {
+
+ if (files[i].isFile()) {
+ BufferedReader reader = null;
+ try {
+ reader = new BufferedReader(new FileReader(files[i]));
+ String tempString = null;
+ while ((tempString = reader.readLine()) != null) {
+ String regex = "[EMAIL
PROTECTED](\"(\\S+)\"\\)\\s*;?\\s*";
+ Pattern p2 = Pattern.compile(regex);
+ Matcher matcher = p2.matcher(tempString);
+ // System.out.print("--->"+matcher.group(1));
+ String str1 = "" ;
+ if(matcher.find()){
+ String filepath = file.getAbsolutePath();
+ String url = downloadSettings.getLiveUrl();
+ filepath = regularReplace(filepath, "\\\\", "/");
+ int tagStart = filepath.indexOf(url);
+ if(tagStart != -1)
+ {
+ String u = filepath.substring(tagStart);
+ String ss = regularReplace(u, "\\\\", "/");
+ str1 = ss;
+ }
+ if(!str1.endsWith("/")) {
+ str1 += "/";
+ }
+ cssFiles.add(str1+matcher.group(1));
+ }
+ }
+ }
+ catch (FileNotFoundException e) {
+ log.info("File not found --->"+files[i].getName());
+ }
+ catch (IOException e) {
+ log.info("IOException-------->"+e.getMessage());
+ }
+
+ } else if (files[i].isDirectory()) {
+ recurFindCss(files[i], fileNameFilter);
+ }
+ }
+ }
public boolean isDownloading() {
return (endTime == -1);
_______________________________________________
Cvs mailing list
[email protected]
http://lists.mmbase.org/mailman/listinfo/cvs