Author: danielr
Date: 2008-03-15 22:57:55 +0100 (Sat, 15 Mar 2008)
New Revision: 4567

Modified:
   trunk/source/de/anomic/net/ftpc.java
   trunk/source/de/anomic/plasma/crawler/plasmaFTPLoader.java
Log:
FTP:
- report connection status (to break if no connection possible)
- fixed isFolder()
- additional error output
- fixed paths with encoded symbols (ie. a%20file.txt)
- refactoring


Modified: trunk/source/de/anomic/net/ftpc.java
===================================================================
--- trunk/source/de/anomic/net/ftpc.java        2008-03-15 10:56:47 UTC (rev 
4566)
+++ trunk/source/de/anomic/net/ftpc.java        2008-03-15 21:57:55 UTC (rev 
4567)
@@ -249,11 +249,11 @@
                         .booleanValue());
             } catch (final InvocationTargetException e) {
                 if (e.getMessage() == null) {
-                } else if (ControlSocket == null) {
+                } else if (notConnected()) {
                     // the error was probably caused because there is no
                     // connection
                     errPrintln("not connected. no effect.");
-                    e.printStackTrace();
+                    e.printStackTrace(err);
                     return ret;
                 } else {
                     errPrintln("ftp internal exception: target exception " + 
e);
@@ -266,7 +266,7 @@
                 // consider first that the user attempted to execute a java
                 // command from
                 // the current path; either local or remote
-                if (ControlSocket == null) {
+                if (notConnected()) {
                     // try a local exec
                     try {
                         javaexec(cmd);
@@ -470,7 +470,7 @@
             errPrintln("Syntax: CD <path>");
             return true;
         }
-        if (ControlSocket == null) {
+        if (notConnected()) {
             return LCD();
         }
         try {
@@ -531,7 +531,7 @@
             errPrintln("Syntax: DEL <file>");
             return true;
         }
-        if (ControlSocket == null) {
+        if (notConnected()) {
             return LDEL();
         }
         try {
@@ -551,7 +551,7 @@
             errPrintln("Syntax: DIR [<path>|<file>]");
             return true;
         }
-        if (ControlSocket == null) {
+        if (notConnected()) {
             return LDIR();
         }
         try {
@@ -575,18 +575,21 @@
         } catch (final IOException e) {
             errPrintln("Connection to server lost.");
         }
-        ControlSocket = null;
-        DataSocketActive = null;
-        DataSocketPassive = null;
-        clientInput = null;
-        clientOutput = null;
+        try {
+            closeConnection();
+        } catch (final IOException e) {
+            ControlSocket = null;
+            DataSocketActive = null;
+            DataSocketPassive = null;
+            clientInput = null;
+            clientOutput = null;
+        }
         prompt = "ftp [local]>";
         return true;
     }
 
     private String quit() throws IOException {
 
-        // send delete command
         send("QUIT");
 
         // read status reply
@@ -595,25 +598,8 @@
             throw new IOException(reply);
         }
 
-        // cleanup
-        if (ControlSocket != null) {
-            clientOutput.close();
-            clientInput.close();
-            ControlSocket.close();
-            ControlSocket = null;
-        }
+        closeConnection();
 
-        if (DataSocketActive != null) {
-            DataSocketActive.close();
-            DataSocketActive = null;
-        }
-        if (DataSocketPassive != null) {
-            DataSocketPassive.close();
-            DataSocketPassive = null; // "Once a socket has been closed, it is
-            // not available for further networking
-            // use"
-        }
-
         return reply;
     }
 
@@ -633,8 +619,8 @@
         final File local = absoluteLocalFile(localFilename);
 
         if (local.exists()) {
-            errPrintln("Error: local file " + local.toString() + " already 
exists.");
-            errPrintln(logPrefix + "            File " + remote + " not 
retrieved. Local file unchanged.");
+            errPrintln("Error: local file " + local.toString() + " already 
exists.\n" + "               File " + remote
+                    + " not retrieved. Local file unchanged.");
         } else {
             if (withoutLocalFile) {
                 retrieveFilesRecursively(remote, false);
@@ -732,7 +718,8 @@
             }
             // check if we actually changed into the folder
             final String changedPath = pwd();
-            if (!(changedPath.equals(path) || changedPath.equals(currentFolder 
+ "/" + path))) {
+            if (!(changedPath.equals(path) || changedPath.equals(currentFolder
+                    + (currentFolder.endsWith("/") ? "" : "/") + path))) {
                 throw new IOException("folder is '" + changedPath + "' should 
be '" + path + "'");
             }
             // return to last folder
@@ -1172,7 +1159,7 @@
             final String dateString = tokens.group(3) + " " + tokens.group(4) 
+ " " + year + " " + time;
             try {
                 date = lsDateFormat.parse(dateString);
-            } catch (ParseException e) {
+            } catch (final ParseException e) {
                 errPrintln(logPrefix + "---- Error: not ls date-format '" + 
dateString + "': " + e.getMessage());
                 date = new Date();
             }
@@ -1391,7 +1378,7 @@
             errPrintln("Syntax: LS [<path>|<file>]");
             return true;
         }
-        if (ControlSocket == null) {
+        if (notConnected()) {
             return LLS();
         }
         try {
@@ -1415,7 +1402,6 @@
         outPrintln("---- 
v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v");
         for (final String element : list) {
             outPrintln(element);
-            outPrintln("--> " + parseListData(element));
         }
         outPrintln("---- 
^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^");
     }
@@ -1477,7 +1463,7 @@
             errPrintln("Syntax: MKDIR <folder-name>");
             return true;
         }
-        if (ControlSocket == null) {
+        if (notConnected()) {
             return LMKDIR();
         }
         try {
@@ -1545,7 +1531,7 @@
             errPrintln("Syntax: MV <from> <to>");
             return true;
         }
-        if (ControlSocket == null) {
+        if (notConnected()) {
             return LMV();
         }
         try {
@@ -1604,7 +1590,7 @@
             outPrintln("---- Connection to " + cmd[1] + " established.");
             prompt = "ftp [" + cmd[1] + "]>";
         } catch (final IOException e) {
-            errPrintln("Error: connecting " + cmd[1] + " on port " + port + " 
failed.");
+            errPrintln("Error: connecting " + cmd[1] + " on port " + port + " 
failed: " + e.getMessage());
         }
         return true;
     }
@@ -1614,20 +1600,59 @@
             exec("close", false); // close any existing connections first
         }
 
-        ControlSocket = new Socket(host, port);
-        ControlSocket.setSoTimeout(getTimeout());
-        clientInput = new BufferedReader(new 
InputStreamReader(ControlSocket.getInputStream()));
-        clientOutput = new DataOutputStream(new 
BufferedOutputStream(ControlSocket.getOutputStream()));
+        try {
+            ControlSocket = new Socket(host, port);
+            ControlSocket.setSoTimeout(getTimeout());
+            clientInput = new BufferedReader(new 
InputStreamReader(ControlSocket.getInputStream()));
+            clientOutput = new DataOutputStream(new 
BufferedOutputStream(ControlSocket.getOutputStream()));
 
-        // read and return server message
-        this.host = host;
-        this.port = port;
-        remotemessage = receive();
-        if ((remotemessage != null) && (remotemessage.length() > 3)) {
-            remotemessage = remotemessage.substring(4);
+            // read and return server message
+            this.host = host;
+            this.port = port;
+            remotemessage = receive();
+            if ((remotemessage != null) && (remotemessage.length() > 3)) {
+                remotemessage = remotemessage.substring(4);
+            }
+        } catch (final IOException e) {
+            // if a connection was opened, it should not be used
+            closeConnection();
+            throw new IOException(e);
         }
     }
 
+    /**
+     * @return
+     */
+    public boolean notConnected() {
+        return ControlSocket == null;
+    }
+
+    /**
+     * close all sockets
+     * 
+     * @throws IOException
+     */
+    private void closeConnection() throws IOException {
+        // cleanup
+        if (ControlSocket != null) {
+            clientOutput.close();
+            clientInput.close();
+            ControlSocket.close();
+            ControlSocket = null;
+        }
+
+        if (DataSocketActive != null) {
+            DataSocketActive.close();
+            DataSocketActive = null;
+        }
+        if (DataSocketPassive != null) {
+            DataSocketPassive.close();
+            DataSocketPassive = null; // "Once a socket has been closed, it is
+            // not available for further networking
+            // use"
+        }
+    }
+
     public boolean PROMPT() {
         errPrintln("prompt is always off");
         return true;
@@ -1658,7 +1683,7 @@
             errPrintln("Syntax: PWD  (no parameter)");
             return true;
         }
-        if (ControlSocket == null) {
+        if (notConnected()) {
             return LPWD();
         }
         try {
@@ -1701,7 +1726,7 @@
             errPrintln("Syntax: RMDIR <folder-name>");
             return true;
         }
-        if (ControlSocket == null) {
+        if (notConnected()) {
             return LRMDIR();
         }
         try {
@@ -1713,7 +1738,7 @@
     }
 
     public boolean QUIT() {
-        if (ControlSocket != null) {
+        if (!notConnected()) {
             exec("close", false);
         }
         return false;
@@ -1780,7 +1805,7 @@
             login(cmd[1], cmd[2]);
             outPrintln("---- Granted access for user " + cmd[1] + ".");
         } catch (final IOException e) {
-            errPrintln("Error: authorization of user " + cmd[1] + " failed.");
+            errPrintln("Error: authorization of user " + cmd[1] + " failed: " 
+ e.getMessage());
         }
         return true;
     }
@@ -2354,6 +2379,7 @@
      * @throws IOException
      */
     private void login(final String account, final String password) throws 
IOException {
+        unsetLoginData();
 
         // send user name
         send("USER " + account);
@@ -2381,6 +2407,15 @@
     }
 
     /**
+     * we are authorized to use the server
+     * 
+     * @return
+     */
+    public boolean isLoggedIn() {
+        return (account != null && password != null && remotegreeting != null);
+    }
+
+    /**
      * remember username and password which were used to login
      * 
      * @param account
@@ -2394,6 +2429,12 @@
         remotegreeting = reply;
     }
 
+    private void unsetLoginData() {
+        account = null;
+        password = null;
+        remotegreeting = null;
+    }
+
     public void sys() throws IOException {
         // send system command
         send("SYST");
@@ -2451,7 +2492,7 @@
      * @param timeout
      *                in seconds, 0 = infinite
      */
-    public void setDataSocketTimeout(int timeout) {
+    public void setDataSocketTimeout(final int timeout) {
         DataSocketTimeout = timeout;
 
         try {

Modified: trunk/source/de/anomic/plasma/crawler/plasmaFTPLoader.java
===================================================================
--- trunk/source/de/anomic/plasma/crawler/plasmaFTPLoader.java  2008-03-15 
10:56:47 UTC (rev 4566)
+++ trunk/source/de/anomic/plasma/crawler/plasmaFTPLoader.java  2008-03-15 
21:57:55 UTC (rev 4567)
@@ -90,8 +90,12 @@
      */
     public plasmaHTCache.Entry load(final plasmaCrawlEntry entry) {
         final yacyURL entryUrl = entry.url();
-        final String fullPath = entryUrl.getPath();
+        final String fullPath = getPath(entryUrl);
+        final File cacheFile = createCachefile(entryUrl);
 
+        // the return value
+        plasmaHTCache.Entry htCache = null;
+
         // determine filename and path
         String file, path;
         if (fullPath.endsWith("/")) {
@@ -113,72 +117,78 @@
         final ByteArrayOutputStream berr = new ByteArrayOutputStream();
         final ftpc ftpClient = createFTPClient(berr);
 
-        plasmaHTCache.Entry htCache = null;
-        try {
-            openConnection(ftpClient, entryUrl);
+        if (openConnection(ftpClient, entryUrl)) {
+            // ftp stuff
+            try {
+                // testing if the specified file is a directory
+                if (file.length() > 0) {
+                    ftpClient.exec("cd \"" + path + "\"", false);
 
-            // testing if the specified file is a directory
-            if (file.length() > 0) {
-                ftpClient.exec("cd \"" + path + "\"", false);
+                    final boolean isFolder = ftpClient.isFolder(file);
+                    if (isFolder) {
+                        path = fullPath + "/";
+                        file = "";
+                    }
+                }
 
-                // testing if the current name is a directoy
-                final boolean isFolder = ftpClient.isFolder(file);
-                if (isFolder) {
-                    path = fullPath + "/";
-                    file = "";
+                if (file.length() == 0) {
+                    // directory -> get list of files
+                    // create a htcache entry
+                    htCache = createCacheEntry(entry, "text/html", new Date());
+                    if (!generateDirlist(ftpClient, entry, path, cacheFile)) {
+                        htCache = null;
+                    }
+                } else {
+                    // file -> download
+                    try {
+                        htCache = getFile(ftpClient, entry, cacheFile);
+                    } catch (final Exception e) {
+                        // add message to errorLog
+                        (new PrintStream(berr)).print(e.getMessage());
+                    }
                 }
+            } finally {
+                closeConnection(ftpClient);
             }
+        }
 
-            // creating a cache file object
-            final File cacheFile = plasmaHTCache.getCachePath(entryUrl);
+        // pass the downloaded resource to the cache manager
+        if (berr.size() > 0 || htCache == null) {
+            // some error logging
+            final String detail = (berr.size() > 0) ? "\n    Errorlog: " + 
berr.toString() : "";
+            log.logWarning("Unable to download URL " + entry.url().toString() 
+ detail);
+            sb.crawlQueues.errorURL.newEntry(entry, null, new Date(), 1, 
plasmaCrawlEURL.DENIED_SERVER_DOWNLOAD_ERROR);
 
-            // TODO: invalid file path check
-
-            // testing if the file already exists
-            if (cacheFile.isFile()) {
-                // delete the file if it already exists
-                plasmaHTCache.deleteURLfromCache(entryUrl);
-            } else {
-                // create parent directories
-                cacheFile.getParentFile().mkdirs();
+            // an error has occured. cleanup
+            if (cacheFile.exists()) {
+                cacheFile.delete();
             }
+        } else {
+            // announce the file
+            plasmaHTCache.writeFileAnnouncement(cacheFile);
+        }
 
-            if (file.length() == 0) {
-                // directory -> get list of files
-                // create a htcache entry
-                htCache = createCacheEntry(entry, "text/html", new Date());
-                if (!generateDirlist(ftpClient, entry, path, cacheFile)) {
-                    htCache = null;
-                }
-            } else {
-                // file -> download
-                try {
-                    htCache = getFile(ftpClient, entry, cacheFile);
-                } catch (final Exception e) {
-                }
-            }
+        return htCache;
+    }
 
-            // pass the downloaded resource to the cache manager
-            if (berr.size() > 0 || htCache == null) {
-                // some error logging
-                final String detail = (berr.size() > 0) ? "\n    Errorlog: " + 
berr.toString() : "";
-                log.logWarning("Unable to download URL " + 
entry.url().toString() + detail);
-                sb.crawlQueues.errorURL.newEntry(entry, null, new Date(), 1,
-                        plasmaCrawlEURL.DENIED_SERVER_DOWNLOAD_ERROR);
+    /**
+     * creating a cache file object
+     * 
+     * @param entryUrl
+     * @return
+     */
+    private File createCachefile(final yacyURL entryUrl) {
+        final File cacheFile = plasmaHTCache.getCachePath(entryUrl);
 
-                // an error has occured. cleanup
-                if (cacheFile.exists()) {
-                    cacheFile.delete();
-                }
-            } else {
-                // announce the file
-                plasmaHTCache.writeFileAnnouncement(cacheFile);
-            }
-
-            return htCache;
-        } finally {
-            closeConnection(ftpClient);
+        // testing if the file already exists
+        if (cacheFile.isFile()) {
+            // delete the file if it already exists
+            plasmaHTCache.deleteURLfromCache(entryUrl);
+        } else {
+            // create parent directories
+            cacheFile.getParentFile().mkdirs();
         }
+        return cacheFile;
     }
 
     /**
@@ -196,8 +206,9 @@
      * @param ftpClient
      * @param host
      * @param port
+     * @return success
      */
-    private void openConnection(final ftpc ftpClient, final yacyURL entryUrl) {
+    private boolean openConnection(final ftpc ftpClient, final yacyURL 
entryUrl) {
         // get username and password
         final String userInfo = entryUrl.getUserInfo();
         String userName = "anonymous", userPwd = "anonymous";
@@ -218,12 +229,20 @@
         } else {
             ftpClient.exec("open " + host + " " + port, false);
         }
+        if (ftpClient.notConnected()) {
+            return false;
+        }
 
         // login to the server
         ftpClient.exec("user " + userName + " " + userPwd, false);
 
-        // change transfer mode to binary
-        ftpClient.exec("binary", false);
+        if (ftpClient.isLoggedIn()) {
+            // change transfer mode to binary
+            ftpClient.exec("binary", false);
+        } else {
+            return false;
+        }
+        return true;
     }
 
     /**
@@ -240,7 +259,7 @@
         final yacyURL entryUrl = entry.url();
         final String extension = plasmaParser.getFileExt(entryUrl);
         final String mimeType = plasmaParser.getMimeTypeByFileExt(extension);
-        final String path = entryUrl.getPath();
+        final String path = getPath(entryUrl);
 
         // if the mimetype and file extension is supported we start to download
         // the file
@@ -264,7 +283,7 @@
                 log.logInfo("REJECTED TOO BIG FILE with size " + size + " 
Bytes for URL " + entry.url().toString());
                 sb.crawlQueues.errorURL.newEntry(entry, null, new Date(), 1,
                         plasmaCrawlEURL.DENIED_FILESIZE_LIMIT_EXCEEDED);
-                throw new Exception("filesize too big: " + size + " bytes");
+                throw new Exception("file size exceeds limit");
             }
         } else {
             // if the response has not the right file type then reject file
@@ -276,6 +295,16 @@
     }
 
     /**
+     * gets path suitable for FTP (url-decoded, double-quotes escaped)
+     * 
+     * @param entryUrl
+     * @return
+     */
+    private String getPath(final yacyURL entryUrl) {
+        return yacyURL.unescape(entryUrl.getPath()).replace("\"", "\"\"");
+    }
+
+    /**
      * @param ftpClient
      * @param entry
      * @param cacheFile

_______________________________________________
YaCy-svn mailing list
YaCy-svn@lists.berlios.de
https://lists.berlios.de/mailman/listinfo/yacy-svn

Antwort per Email an