I'm aware that there's a desire to re-write the ftp portion of wget, but here is a patch against CVS that so far allowed me to spider ftp URLs. it's a dirty hack that simply uses the opt.spider variable to keep from downloading files by returning RETROK (or maybe it was RETRFINISHED) after observing whether there was an 505 or 200 from the "RETR" command. also using opt.spider I attempted to stop any calculations or displaying of downloads. thus i didn't really verify whether this is the proper protocol to "spider" in ftp, or whether all handles were closed properly.
and AFAICT, it appears to be spidering when --recursive is used. right now it will create the directories to write the ".listing" files (which can be shut of with --no-directories). I've been validating URLs in the GNU Free Software Directory's CVS repository, and so far nothing has been downloaded into the working directory (i do have --output-document set to /dev/null to make sure). i didn't completely verify whether --verbose or --debug are still outputing legitimate information with --spider. I think that's everything I know. wget is great (especially --spider), /a ChangeLog and diff to ftp.c follow and are also attached. 2003-02-06 Aaron Hawley <[EMAIL PROTECTED]> * ftp.c (getftp): --spider option should now work with FTP. (ftp_loop_internal): --spider option will not calculate or show what was downloaded (nor delete from using --delete-after). (ftp_loop): --spider will not HTML-ify listing. Index: ftp.c =================================================================== RCS file: /pack/anoncvs/wget/src/ftp.c,v retrieving revision 1.61 diff -u -r1.61 ftp.c --- ftp.c 2003/01/11 20:12:35 1.61 +++ ftp.c 2003/02/07 01:48:37 @@ -818,6 +818,9 @@ expected_bytes = ftp_expected_bytes (ftp_last_respline); } /* cmd & DO_LIST */ + if (!(cmd & (DO_LIST | DO_RETR)) || (opt.spider && !(cmd & DO_LIST))) + return RETRFINISHED; + /* Some FTP servers return the total length of file after REST command, others just return the remaining size. */ if (*len && restval && expected_bytes @@ -828,9 +831,6 @@ } /* If no transmission was required, then everything is OK. */ - if (!(cmd & (DO_LIST | DO_RETR))) - return RETRFINISHED; - if (!pasv_mode_open) /* we are not using pasive mode so we need to accept */ { @@ -1153,7 +1153,8 @@ } /* Time? */ tms = time_str (NULL); - tmrate = retr_rate (len - restval, con->dltime, 0); + if (!opt.spider) + tmrate = retr_rate (len - restval, con->dltime, 0); /* If we get out of the switch above without continue'ing, we've successfully downloaded a file. Remember this fact. */ @@ -1164,8 +1165,9 @@ CLOSE (RBUF_FD (&con->rbuf)); rbuf_uninitialize (&con->rbuf); } - logprintf (LOG_VERBOSE, _("%s (%s) - `%s' saved [%ld]\n\n"), - tms, tmrate, locf, len); + if (!opt.spider) + logprintf (LOG_VERBOSE, _("%s (%s) - `%s' saved [%ld]\n\n"), + tms, tmrate, locf, len); if (!opt.verbose && !opt.quiet) { /* Need to hide the password from the URL. The `if' is here @@ -1192,7 +1194,7 @@ by the more specific option --dont-remove-listing, and the code to do this deletion is in another function. */ } - else + else if (!opt.spider) /* This is not a directory listing file. */ { /* Unlike directory listing files, don't pretend normal files weren't @@ -1718,7 +1720,7 @@ if (res == RETROK) { - if (opt.htmlify) + if (opt.htmlify && !opt.spider) { char *filename = (opt.output_document ? xstrdup (opt.output_document)
Index: ChangeLog =================================================================== RCS file: /pack/anoncvs/wget/src/ChangeLog,v retrieving revision 1.417 diff -u -r1.417 ChangeLog --- ChangeLog 2003/01/11 20:12:35 1.417 +++ ChangeLog 2003/02/07 01:49:49 @@ -1,3 +1,11 @@ +2003-02-06 Aaron Hawley <[EMAIL PROTECTED]> + + * ftp.c + (getftp): --spider option should now work with FTP. + (ftp_loop_internal): --spider option will not calculate or + show what was downloaded (nor delete from using --delete-after). + (ftp_loop): --spider will not HTML-ify listing. + 2003-01-11 Ian Abbott <[EMAIL PROTECTED]> * ftp.c (ftp_retrieve_glob): Reject insecure filenames as determined
Index: ftp.c =================================================================== RCS file: /pack/anoncvs/wget/src/ftp.c,v retrieving revision 1.61 diff -u -r1.61 ftp.c --- ftp.c 2003/01/11 20:12:35 1.61 +++ ftp.c 2003/02/07 01:48:37 @@ -818,6 +818,9 @@ expected_bytes = ftp_expected_bytes (ftp_last_respline); } /* cmd & DO_LIST */ + if (!(cmd & (DO_LIST | DO_RETR)) || (opt.spider && !(cmd & DO_LIST))) + return RETRFINISHED; + /* Some FTP servers return the total length of file after REST command, others just return the remaining size. */ if (*len && restval && expected_bytes @@ -828,9 +831,6 @@ } /* If no transmission was required, then everything is OK. */ - if (!(cmd & (DO_LIST | DO_RETR))) - return RETRFINISHED; - if (!pasv_mode_open) /* we are not using pasive mode so we need to accept */ { @@ -1153,7 +1153,8 @@ } /* Time? */ tms = time_str (NULL); - tmrate = retr_rate (len - restval, con->dltime, 0); + if (!opt.spider) + tmrate = retr_rate (len - restval, con->dltime, 0); /* If we get out of the switch above without continue'ing, we've successfully downloaded a file. Remember this fact. */ @@ -1164,8 +1165,9 @@ CLOSE (RBUF_FD (&con->rbuf)); rbuf_uninitialize (&con->rbuf); } - logprintf (LOG_VERBOSE, _("%s (%s) - `%s' saved [%ld]\n\n"), - tms, tmrate, locf, len); + if (!opt.spider) + logprintf (LOG_VERBOSE, _("%s (%s) - `%s' saved [%ld]\n\n"), + tms, tmrate, locf, len); if (!opt.verbose && !opt.quiet) { /* Need to hide the password from the URL. The `if' is here @@ -1192,7 +1194,7 @@ by the more specific option --dont-remove-listing, and the code to do this deletion is in another function. */ } - else + else if (!opt.spider) /* This is not a directory listing file. */ { /* Unlike directory listing files, don't pretend normal files weren't @@ -1718,7 +1720,7 @@ if (res == RETROK) { - if (opt.htmlify) + if (opt.htmlify && !opt.spider) { char *filename = (opt.output_document ? xstrdup (opt.output_document)