Hello,
after reading so much about regex support for wget (espacially the lack
of it) and experiencing myself how annoying it can be if you have
downloaded a hundred /thumbs/ directories, I tried to implement regex
support myself.
I used pcre library from http://www.pcre.org which was pretty easy to
use, given the fact that I never ever touched a single line of C (or
C++) code before.
Unfortunately I don't know jack about autoconf, makefiles etc.
The patch in its current form is only useful with MSVC as I didn't alter
any other makefiles.
I hope someone can do that for me and include the pcre license from
http://www.pcre.org/license.txt
As you can see pcre.h and pcre.lib need to be somwhere the compiler can
find them and HAVE_REGEX needs to be defined.
Files and directories are ignored if the regex given on the command line
match. For Syntax see wget --help.
The patch was made against current cvs code.
Hope this helps somehow.
Tobias
diff -ruwb wget-regex2/src/ftp.c wget-regex3/src/ftp.c
--- wget-regex2/src/ftp.c Sat Apr 02 02:41:04 2005
+++ wget-regex3/src/ftp.c Wed Apr 06 18:55:24 2005
@@ -1749,7 +1749,11 @@
return res;
/* First: weed out that do not conform the global rules given in
opt.accepts and opt.rejects. */
+#ifdef HAVE_REGEX
+ if (opt.accepts || opt.rejects || opt.exclregfile)
+#else
if (opt.accepts || opt.rejects)
+#endif /* HAVE_REGEX */
{
f = start;
while (f)
diff -ruwb wget-regex2/src/init.c wget-regex3/src/init.c
--- wget-regex2/src/init.c Sun Mar 20 17:07:38 2005
+++ wget-regex3/src/init.c Wed Apr 06 19:37:13 2005
@@ -137,6 +137,10 @@
#endif
{ excludedirectories, opt.excludes, cmd_directory_vector },
{ excludedomains, opt.exclude_domains, cmd_vector },
+#ifdef HAVE_REGEX
+ { excluderegexdir, opt.exclregdir,cmd_string },
+ { excluderegexfile, opt.exclregfile, cmd_string },
+#endif /* HAVE_REGEX */
{ followftp, opt.follow_ftp,cmd_boolean },
{ followtags, opt.follow_tags, cmd_vector },
{ forcehtml, opt.force_html,cmd_boolean },
@@ -1367,6 +1371,12 @@
xfree_null (opt.sslcertkey);
xfree_null (opt.sslcertfile);
#endif /* HAVE_SSL */
+#ifdef HAVE_REGEX
+ xfree_null (opt.exclregdir_c)
+ xfree_null (opt.exclregfile_c)
+ xfree_null (opt.exclregdir);
+ xfree_null (opt.exclregfile);
+#endif /* HAVE_REGEX */
xfree_null (opt.bind_address);
xfree_null (opt.cookies_input);
xfree_null (opt.cookies_output);
diff -ruwb wget-regex2/src/main.c wget-regex3/src/main.c
--- wget-regex2/src/main.c Tue Mar 22 15:20:02 2005
+++ wget-regex3/src/main.c Wed Apr 06 19:03:56 2005
@@ -68,6 +68,10 @@
/* On GNU system this will include system-wide getopt.h. */
#include getopt.h
+#ifdef HAVE_REGEX
+#include pcre.h
+#endif /* HAVE_REGEX */
+
#ifndef PATH_SEPARATOR
# define PATH_SEPARATOR '/'
#endif
@@ -176,6 +180,10 @@
{ egd-file, 0, OPT_VALUE, egdfile, -1 },
{ exclude-directories, 'X', OPT_VALUE, excludedirectories, -1 },
{ exclude-domains, 0, OPT_VALUE, excludedomains, -1 },
+#ifdef HAVE_REGEX
+{ exclude-regex-dirs, 0, OPT_VALUE, excluderegexdir, -1 },
+{ exclude-regex-files, 0, OPT_VALUE, excluderegexfile, -1 },
+#endif
{ execute, 'e', OPT__EXECUTE, NULL, required_argument },
{ follow-ftp, 0, OPT_BOOLEAN, followftp, -1 },
{ follow-tags, 0, OPT_VALUE, followtags, -1 },
@@ -591,6 +599,12 @@
-D, --domains=LIST comma-separated list of accepted
domains.\n),
N_(\
--exclude-domains=LIST comma-separated list of rejected
domains.\n),
+#ifdef HAVE_REGEX
+N_(\
+ --exclude-regex-dirs=PATTERN pattern of directories to reject.\n),
+ N_(\
+ --exclude-regex-files=PATTERN pattern of files to reject.\n),
+#endif /* HAVE_REGEX */
N_(\
--follow-ftpfollow FTP links from HTML documents.\n),
N_(\
@@ -647,6 +661,7 @@
int i, ret, longindex;
int nurl, status;
int append_to_log = 0;
+ const char *error;
i18n_initialize ();
@@ -819,6 +834,40 @@
exit (1);
}
#endif
+
+#ifdef HAVE_REGEX
+ if (opt.exclregdir)
+{
+ opt.exclregdir_c = pcre_compile(
+opt.exclregdir, /* the pattern */
+0,/* default options */
+error, /* for error message */
+i, /* for error offset */
+NULL);/* use default character tables */
+
+ if (opt.exclregdir_c == NULL)
+{
+ printf (_(Directory RegEx compilation failed at offset %d: %s\n),
i, error);
+ exit (1);
+}
+}
+
+if (opt.exclregfile)
+{
+ opt.exclregfile_c = pcre_compile(
+opt.exclregfile, /* the pattern */
+0,/* default options */
+error, /* for error message */
+i,