Hello,
I created a patch to store the URL inside the user xattrs of the downloaded
file; this way, its origin can be identified afterwards.
I uploaded the change to my Github account and attached the diff, and I am
still working on portability issues, but I'd like to hear some opinions on
this:
http://github.com/wertarbyte/wget/tree/xattrurl
Sincerly,
Stefan Tomanek
diff --git a/src/Makefile.am b/src/Makefile.am
index ac9c9c9..31d0032 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -50,7 +50,7 @@ wget_SOURCES = cmpt.c connect.c convert.c cookies.c ftp.c \
http.h http-ntlm.h init.h log.h mswindows.h netrc.h \
options.h progress.h ptimer.h recur.h res.h retr.h \
spider.h ssl.h sysdep.h url.h utils.h wget.h iri.h \
- exits.h gettext.h
+ exits.h gettext.h xattr.c xattr.h
nodist_wget_SOURCES = version.c
EXTRA_wget_SOURCES = mswindows.c iri.c
LDADD = $(LIBOBJS) ../lib/libgnu.a @MD5_LDADD@
diff --git a/src/Makefile.in b/src/Makefile.in
index 46e9b28..d3b451f 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -120,7 +120,7 @@ am__libunittest_a_SOURCES_DIST = cmpt.c connect.c convert.c cookies.c \
hash.h host.h html-parse.h html-url.h http.h http-ntlm.h \
init.h log.h mswindows.h netrc.h options.h progress.h ptimer.h \
recur.h res.h retr.h spider.h ssl.h sysdep.h url.h utils.h \
- wget.h iri.h exits.h gettext.h test.c test.h
+ wget.h iri.h exits.h gettext.h test.c test.h xattr.c xattr.h
@iri_is_enabled_t...@am__objects_1 = libunittest_a-iri.$(OBJEXT)
am__objects_2 = libunittest_a-cmpt.$(OBJEXT) \
libunittest_a-connect.$(OBJEXT) \
@@ -159,7 +159,7 @@ am__wget_SOURCES_DIST = cmpt.c connect.c convert.c cookies.c ftp.c \
hash.h host.h html-parse.h html-url.h http.h http-ntlm.h \
init.h log.h mswindows.h netrc.h options.h progress.h ptimer.h \
recur.h res.h retr.h spider.h ssl.h sysdep.h url.h utils.h \
- wget.h iri.h exits.h gettext.h
+ wget.h iri.h exits.h gettext.h xattr.h xattr.c
@iri_is_enabled_t...@am__objects_3 = iri.$(OBJEXT)
am_wget_OBJECTS = cmpt.$(OBJEXT) connect.$(OBJEXT) convert.$(OBJEXT) \
cookies.$(OBJEXT) ftp.$(OBJEXT) css.$(OBJEXT) \
@@ -170,6 +170,7 @@ am_wget_OBJECTS = cmpt.$(OBJEXT) connect.$(OBJEXT) convert.$(OBJEXT) \
ptimer.$(OBJEXT) recur.$(OBJEXT) res.$(OBJEXT) retr.$(OBJEXT) \
snprintf.$(OBJEXT) spider.$(OBJEXT) url.$(OBJEXT) \
utils.$(OBJEXT) exits.$(OBJEXT) build_info.$(OBJEXT) \
+ xattr.$(OBJEXT) \
$(am__objects_3)
nodist_wget_OBJECTS = version.$(OBJEXT)
wget_OBJECTS = $(am_wget_OBJECTS) $(nodist_wget_OBJECTS)
@@ -678,7 +679,7 @@ wget_SOURCES = cmpt.c connect.c convert.c cookies.c ftp.c \
http.h http-ntlm.h init.h log.h mswindows.h netrc.h \
options.h progress.h ptimer.h recur.h res.h retr.h \
spider.h ssl.h sysdep.h url.h utils.h wget.h iri.h \
- exits.h gettext.h
+ exits.h gettext.h xattr.h xattr.c
nodist_wget_SOURCES = version.c
EXTRA_wget_SOURCES = mswindows.c iri.c
diff --git a/src/ftp.c b/src/ftp.c
index dfdd83c..4a4e84a 100644
--- a/src/ftp.c
+++ b/src/ftp.c
@@ -51,6 +51,8 @@ as that of the covered work. */
#include "convert.h" /* for downloaded_file */
#include "recur.h" /* for INFINITE_RECURSION */
+#include "xattr.h"
+
#ifdef __VMS
# include "vms.h"
#endif /* def __VMS */
@@ -1206,6 +1208,11 @@ Error in server response, closing control connection.\n"));
else
fp = output_stream;
+ if (opt.xattr_url && file_exists_p(con->target))
+ {
+ set_xattr( u, con->target );
+ }
+
if (passed_expected_bytes)
{
print_length (passed_expected_bytes, restval, true);
diff --git a/src/http.c b/src/http.c
index 3a46764..42f7900 100644
--- a/src/http.c
+++ b/src/http.c
@@ -62,6 +62,8 @@ as that of the covered work. */
#include "convert.h"
#include "spider.h"
+#include "xattr.h"
+
#ifdef TESTING
#include "test.h"
#endif
@@ -2359,6 +2361,11 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file));
else
fp = output_stream;
+ if (opt.xattr_url && file_exists_p(hs->local_file))
+ {
+ set_xattr( u, hs->local_file );
+ }
+
/* Print fetch message, if opt.verbose. */
if (opt.verbose)
{
diff --git a/src/init.c b/src/init.c
index 5a05d03..6565655 100644
--- a/src/init.c
+++ b/src/init.c
@@ -252,6 +252,7 @@ static const struct {
#ifdef USE_WATT32
{ "wdebug", &opt.wdebug, cmd_boolean },
#endif
+ { "xattrurl", &opt.xattr_url, cmd_boolean },
};
/* Look up CMDNAME in the commands[] and return its position in the
diff --git a/src/main.c b/src/main.c
index dddc4b2..06e3504 100644
--- a/src/main.c
+++ b/src/main.c
@@ -55,6 +55,7 @@ as that of the covered work. */
#include "convert.h"
#include "spider.h"
#include "http.h" /* for save_cookies */
+#include "xattr.h"
#include <getopt.h>
#include <getpass.h>
@@ -258,6 +259,7 @@ static struct cmdline_option option_data[] =
{ "retry-connrefused", 0, OPT_BOOLEAN, "retryconnrefused", -1 },
{ "save-cookies", 0, OPT_VALUE, "savecookies", -1 },
{ "save-headers", 0, OPT_BOOLEAN, "saveheaders", -1 },
+ { "xattr-url", 0, OPT_BOOLEAN, "xattrurl", -1 },
{ IF_SSL ("secure-protocol"), 0, OPT_VALUE, "secureprotocol", -1 },
{ "server-response", 'S', OPT_BOOLEAN, "serverresponse", -1 },
{ "span-hosts", 'H', OPT_BOOLEAN, "spanhosts", -1 },
@@ -555,6 +557,8 @@ HTTP options:\n"),
N_("\
--save-headers save the HTTP headers to file.\n"),
N_("\
+ --xattr-url save the URL to extended file attributes.\n"),
+ N_("\
-U, --user-agent=AGENT identify as AGENT instead of Wget/VERSION.\n"),
N_("\
--no-http-keep-alive disable HTTP keep-alive (persistent connections).\n"),
diff --git a/src/options.h b/src/options.h
index a895863..5e4b6f4 100644
--- a/src/options.h
+++ b/src/options.h
@@ -127,6 +127,7 @@ struct options
bool server_response; /* Do we print server response? */
bool save_headers; /* Do we save headers together with
file? */
+ bool xattr_url;
#ifdef ENABLE_DEBUG
bool debug; /* Debugging on/off */
diff --git a/src/xattr.c b/src/xattr.c
new file mode 100644
index 0000000..5eba265
--- /dev/null
+++ b/src/xattr.c
@@ -0,0 +1,13 @@
+#include "wget.h"
+#include "url.h"
+#include <attr/xattr.h>
+
+int set_xattr( struct url *origin, const char *filename ) {
+ char *url = url_string(origin, URL_AUTH_HIDE);
+ logprintf (LOG_VERBOSE, _("Placing URL '%s' in extended attributes.\n"), url);
+ int err = setxattr( filename, "user.wget.origin", url, strlen(url), 0 );
+ if (err) {
+ logprintf (LOG_NOTQUIET, "setxattr: %s\n", strerror (errno));
+ }
+ return err;
+}
diff --git a/src/xattr.h b/src/xattr.h
new file mode 100644
index 0000000..d0f5e06
--- /dev/null
+++ b/src/xattr.h
@@ -0,0 +1 @@
+int set_xattr( struct url *origin, const char *filename );
--
1.7.1