Hi,

Sorry, my previous replies to various concerns were all direct rather than
to the list.  In summary, I promised to fix the style issues and I also
agreed that DEBUG on failure to set xattr was a good approach.  Also, I
confirmed that I work for Google.

Attached is a patch for the style issues and DEBUG.



On Fri, Jul 22, 2016 at 10:39 PM, Ander Juaristi <[email protected]> wrote:

> Also, shouldn't we print something when xattrs are not supported? In
> Linux fsetxattr() returns -1 when that is the case.
>
> <pedantic_comment>
> Extended attributes are not supported by all filesystems, eg:
>
> dd if=/dev/zero bs=64M count=1 of=./minix.img
> mkfs.minix ./minix.img
> mount -o loop -t minix minix.img /tmp/minix-test
>
> And then:
> wget --xattr -O /tmp/minix-test/archive.html http://archive.org
> getfattr -d /tmp/minix-test/archive.html
> <empty>
>
> Yeah, Minix is not used in the real world, but NTFS is (sometimes in the
> form of portable USB drives) and I think it also does not support
> extended attributes. Didn't try it - more hassle to set up in 5 minutes.
>
> If we trust Wikipedia [1], other filesystems from the MS world (such as
> FAT) do not support xattrs either.
> </pedantic_comment>
>
> [1] https://en.wikipedia.org/wiki/Extended_file_attributes#Linux
>
> On 21/07/16 06:33, Sean Burford wrote:
> > Hi,
> >
> > I find it useful to keep track of where files are downloaded from.  POSIX
> > extended attributes provide a lightweight portable method of keeping this
> > information across Linux, OS/X, FreeBSD and many other platforms.
> >
> > This compliments wget's existing WARC support, which serves a related but
> > different use case closer to tcpdump or tar for web pages.  Extended
> > attributes can provide a quick answer to "where did I get this file from
> > again?"
> >
> > This patch changes:
> > *   autoconf detects whether extended attributes are available and
> enables
> > the code if they are.
> > *   The new flags --xattr and --no-xattr control whether xattr is
> enabled.
> > *   The new command "xattr = (on|off)" can be used in ~/.wgetrc or
> > /etc/wgetrc
> > *   The original and redirected URLs are recorded as shown below.
> > *   This works for both single fetches and recursive mode.
> >
> > Here is an example, where http://archive.org redirects to
> > https://archive.org:
> > $ wget --xattr http://archive.org
> > ...
> > $ getfattr -d index.html
> > user.xdg.origin.url="https://archive.org/";
> > user.xdg.referrer.url="http://archive.org/";
> >
> > These attributes were chosen based on those stored by Google Chrome (
> > https://bugs.chromium.org/p/chromium/issues/detail?id=45903) and curl (
> > https://github.com/curl/curl/blob/master/src/tool_xattr.c)
> >
>
>


-- 
Sean Burford <[email protected]>
From c564c41d8369465e224a4d3349b45c65b1b64572 Mon Sep 17 00:00:00 2001
From: Sean Burford <[email protected]>
Date: Wed, 27 Jul 2016 09:30:41 +1000
Subject: [PATCH] [xattr] Style fixes and DEBUG on setxattr failure.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ander requested style fixes:
 - Don't use braces on single line blocks after if.
 - Use a space before function () and ?
I fixed a few more space style problems, let me know if that is too much.

Tim suggested adding DEBUG on failure so I've added that.

$ dd if=/dev/zero of=/tmp/vfat bs=1M count=1024
$ mkfs.vfat /tmp/vfat
$ sudo mount -o loop -o user=$USER /tmp/vfat /tmp/tmpfs

$ ./src/wget --debug -O /tmp/robots.txt http://www.google.com/robots.txt
$ getfattr /tmp/robots.txt
user.xdg.origin.url

$ ./src/wget --debug -O /tmp/tmpfs/robots.txt http://www.google.com/robots.txt
Failed to set xattr ‘user.xdg.origin.url’.
$ getfattr /tmp/tmpfs/robots.txt
---
 src/ftp.c   | 14 ++++++-------
 src/http.c  | 70 +++++++++++++++++++++++++++++--------------------------------
 src/xattr.c |  8 +++----
 src/xattr.h |  8 +++----
 4 files changed, 47 insertions(+), 53 deletions(-)

diff --git a/src/ftp.c b/src/ftp.c
index 27d90d6..d5d0032 100644
--- a/src/ftp.c
+++ b/src/ftp.c
@@ -242,7 +242,7 @@ print_length (wgint size, wgint start, bool authoritative)
 static uerr_t ftp_get_listing (struct url *, struct url *, ccon *, struct fileinfo **);
 
 static uerr_t
-get_ftp_greeting(int csock, ccon *con)
+get_ftp_greeting (int csock, ccon *con)
 {
   uerr_t err = 0;
 
@@ -875,7 +875,7 @@ Error in server response, closing control connection.\n"));
              Unlike the rest of this block, this particular behavior
              _is_ VMS-specific, so it gets its own VMS test.
           */
-          if ((con->rs == ST_VMS) && (strchr( target, '/') != NULL))
+          if ((con->rs == ST_VMS) && (strchr (target, '/') != NULL))
             {
               cwd_end = 3;
               DEBUGP (("Using extra \"CWD []\" step for VMS server.\n"));
@@ -1551,9 +1551,7 @@ Error in server response, closing control connection.\n"));
 
 #ifdef ENABLE_XATTR
   if (opt.enable_xattr)
-    {
-      set_file_metadata (u->url, NULL, fp);
-    }
+    set_file_metadata (u->url, NULL, fp);
 #endif
 
   fd_close (local_sock);
@@ -1651,10 +1649,10 @@ Error in server response, closing control connection.\n"));
 #ifdef __VMS
       char *targ;
 
-      targ = ods_conform( con->target);
+      targ = ods_conform (con->target);
       if (targ != con->target)
         {
-          xfree( con->target);
+          xfree (con->target);
           con->target = targ;
         }
 #endif /* def __VMS */
@@ -2513,7 +2511,7 @@ is_invalid_entry (struct fileinfo *f)
   while (cur->next)
     {
       cur = cur->next;
-      if (strcmp(f_name, cur->name) == 0)
+      if (strcmp (f_name, cur->name) == 0)
           return true;
     }
   return false;
diff --git a/src/http.c b/src/http.c
index 0cd142c..1091121 100644
--- a/src/http.c
+++ b/src/http.c
@@ -423,7 +423,7 @@ maybe_send_basic_creds (const char *hostname, const char *user,
       do_challenge = true;
     }
   else if (basic_authed_hosts
-      && hash_table_contains(basic_authed_hosts, hostname))
+      && hash_table_contains (basic_authed_hosts, hostname))
     {
       DEBUGP (("Found %s in basic_authed_hosts.\n", quote (hostname)));
       do_challenge = true;
@@ -449,9 +449,9 @@ register_basic_auth_host (const char *hostname)
     {
       basic_authed_hosts = make_nocase_string_hash_table (1);
     }
-  if (!hash_table_contains(basic_authed_hosts, hostname))
+  if (!hash_table_contains (basic_authed_hosts, hostname))
     {
-      hash_table_put (basic_authed_hosts, xstrdup(hostname), NULL);
+      hash_table_put (basic_authed_hosts, xstrdup (hostname), NULL);
       DEBUGP (("Inserted %s into basic_authed_hosts\n", quote (hostname)));
     }
 }
@@ -849,7 +849,7 @@ resp_free (struct response **resp_ref)
    caused crashes in UTF-8 locales.  */
 
 static void
-print_response_line(const char *prefix, const char *b, const char *e)
+print_response_line (const char *prefix, const char *b, const char *e)
 {
   char *copy;
   BOUNDED_TO_ALLOCA(b, e, copy);
@@ -875,7 +875,7 @@ print_server_response (const struct response *resp, const char *prefix)
         --e;
       if (b < e && e[-1] == '\r')
         --e;
-      print_response_line(prefix, b, e);
+      print_response_line (prefix, b, e);
     }
 }
 
@@ -1029,18 +1029,18 @@ skip_short_body (int fd, wgint contlen, bool chunked)
    or a fragment of a long parameter value
 */
 static int
-modify_param_name(param_token *name)
+modify_param_name (param_token *name)
 {
   const char *delim1 = memchr (name->b, '*', name->e - name->b);
   const char *delim2 = memrchr (name->b, '*', name->e - name->b);
 
   int result;
 
-  if(delim1 == NULL)
+  if (delim1 == NULL)
     {
       result = NOT_RFC2231;
     }
-  else if(delim1 == delim2)
+  else if (delim1 == delim2)
     {
       if ((name->e - 1) == delim1)
         {
@@ -1158,12 +1158,12 @@ extract_param (const char **source, param_token *name, param_token *value,
     }
   *source = p;
 
-  param_type = modify_param_name(name);
+  param_type = modify_param_name (name);
   if (param_type != NOT_RFC2231)
     {
       if (param_type == RFC2231_ENCODING && is_url_encoded)
         *is_url_encoded = true;
-      modify_param_value(value, param_type);
+      modify_param_value (value, param_type);
     }
   return true;
 }
@@ -1178,8 +1178,8 @@ static void
 append_value_to_filename (char **filename, param_token const * const value,
                           bool is_url_encoded)
 {
-  int original_length = strlen(*filename);
-  int new_length = strlen(*filename) + (value->e - value->b);
+  int original_length = strlen (*filename);
+  int new_length = strlen (*filename) + (value->e - value->b);
   *filename = xrealloc (*filename, new_length+1);
   memcpy (*filename + original_length, value->b, (value->e - value->b));
   (*filename)[new_length] = '\0';
@@ -1284,7 +1284,7 @@ parse_strict_transport_security (const char *header, time_t *max_age, bool *incl
         {
           if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "max-age"))
             {
-              xfree(c_max_age);
+              xfree (c_max_age);
               c_max_age = strdupdelim (value.b, value.e);
             }
           else if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "includeSubDomains"))
@@ -1886,7 +1886,7 @@ initialize_request (struct url *u, struct http_stat *hs, int *dt, struct url *pr
     {
       /* If this is a host for which we've already received a Basic
        * challenge, we'll go ahead and send Basic authentication creds. */
-      *basic_auth_finished = maybe_send_basic_creds(u->host, *user, *passwd, req);
+      *basic_auth_finished = maybe_send_basic_creds (u->host, *user, *passwd, req);
     }
 
   /* Generate the Host header, HOST:PORT.  Take into account that:
@@ -2122,7 +2122,7 @@ establish_connection (struct url *u, struct url **conn_ref,
               xfree (head);
               return HERR;
             }
-          xfree(hs->message);
+          xfree (hs->message);
           hs->message = xstrdup (message);
           resp_free (&resp);
           xfree (head);
@@ -3000,7 +3000,7 @@ gethttp (struct url *u, struct url *original_url, struct http_stat *hs,
   hs->res = -1;
   hs->rderrmsg = NULL;
   hs->newloc = NULL;
-  xfree(hs->remote_time);
+  xfree (hs->remote_time);
   hs->error = NULL;
   hs->message = NULL;
 
@@ -3139,7 +3139,7 @@ gethttp (struct url *u, struct url *original_url, struct http_stat *hs,
       bool warc_result;
 
       /* Generate a timestamp and uuid for this request. */
-      warc_timestamp (warc_timestamp_str, sizeof(warc_timestamp_str));
+      warc_timestamp (warc_timestamp_str, sizeof (warc_timestamp_str));
       warc_uuid_str (warc_request_uuid);
 
       /* Create a request record and store it in the WARC file. */
@@ -3185,7 +3185,7 @@ gethttp (struct url *u, struct url *original_url, struct http_stat *hs,
         resp = resp_new (head);
 
         /* Check for status line.  */
-        xfree(message);
+        xfree (message);
         statcode = resp_status (resp, &message);
         if (statcode < 0)
           {
@@ -3214,7 +3214,7 @@ gethttp (struct url *u, struct url *original_url, struct http_stat *hs,
     while (_repeat);
   }
 
-  xfree(hs->message);
+  xfree (hs->message);
   hs->message = xstrdup (message);
   if (!opt.server_response)
     logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
@@ -3443,7 +3443,7 @@ gethttp (struct url *u, struct url *original_url, struct http_stat *hs,
               tmp = parse_charset (tmp2);
               if (tmp)
                 set_content_encoding (iri, tmp);
-              xfree(tmp);
+              xfree (tmp);
             }
 #endif
         }
@@ -3761,13 +3761,9 @@ gethttp (struct url *u, struct url *original_url, struct http_stat *hs,
   if (opt.enable_xattr)
     {
       if (original_url != u)
-        {
-          set_file_metadata (u->url, original_url->url, fp);
-        }
+        set_file_metadata (u->url, original_url->url, fp);
       else
-        {
-          set_file_metadata (u->url, NULL, fp);
-        }
+        set_file_metadata (u->url, NULL, fp);
     }
 #endif
 
@@ -4633,7 +4629,7 @@ digest_authentication_encode (const char *au, const char *user,
           }
     }
 
-  if (qop != NULL && strcmp(qop,"auth"))
+  if (qop != NULL && strcmp (qop,"auth"))
     {
       logprintf (LOG_NOTQUIET, _("Unsupported quality of protection '%s'.\n"), qop);
       xfree (qop); /* force freeing mem and return */
@@ -4678,7 +4674,7 @@ digest_authentication_encode (const char *au, const char *user,
     if (algorithm && !strcmp (algorithm, "MD5-sess"))
       {
         /* A1BUF = H( H(user ":" realm ":" password) ":" nonce ":" cnonce ) */
-        snprintf (cnonce, sizeof (cnonce), "%08x", random_number(INT_MAX));
+        snprintf (cnonce, sizeof (cnonce), "%08x", random_number (INT_MAX));
 
         md5_init_ctx (&ctx);
         /* md5_process_bytes (hash, MD5_DIGEST_SIZE, &ctx); */
@@ -4700,12 +4696,12 @@ digest_authentication_encode (const char *au, const char *user,
     md5_finish_ctx (&ctx, hash);
     dump_hash (a2buf, hash);
 
-    if (qop && !strcmp(qop, "auth"))
+    if (qop && !strcmp (qop, "auth"))
       {
         /* RFC 2617 Digest Access Authentication */
         /* generate random hex string */
         if (!*cnonce)
-          snprintf(cnonce, sizeof(cnonce), "%08x", random_number(INT_MAX));
+          snprintf (cnonce, sizeof (cnonce), "%08x", random_number (INT_MAX));
 
         /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" noncecount ":" clientnonce ":" qop ": " A2BUF) */
         md5_init_ctx (&ctx);
@@ -4715,9 +4711,9 @@ digest_authentication_encode (const char *au, const char *user,
         md5_process_bytes ((unsigned char *)":", 1, &ctx);
         md5_process_bytes ((unsigned char *)"00000001", 8, &ctx); /* TODO: keep track of server nonce values */
         md5_process_bytes ((unsigned char *)":", 1, &ctx);
-        md5_process_bytes ((unsigned char *)cnonce, strlen(cnonce), &ctx);
+        md5_process_bytes ((unsigned char *)cnonce, strlen (cnonce), &ctx);
         md5_process_bytes ((unsigned char *)":", 1, &ctx);
-        md5_process_bytes ((unsigned char *)qop, strlen(qop), &ctx);
+        md5_process_bytes ((unsigned char *)qop, strlen (qop), &ctx);
         md5_process_bytes ((unsigned char *)":", 1, &ctx);
         md5_process_bytes ((unsigned char *)a2buf, MD5_DIGEST_SIZE * 2, &ctx);
         md5_finish_ctx (&ctx, hash);
@@ -4767,12 +4763,12 @@ digest_authentication_encode (const char *au, const char *user,
 
     if (opaque)
       {
-        res_len += snprintf(res + res_len, res_size - res_len, ", opaque=\"%s\"", opaque);
+        res_len += snprintf (res + res_len, res_size - res_len, ", opaque=\"%s\"", opaque);
       }
 
     if (algorithm)
       {
-        snprintf(res + res_len, res_size - res_len, ", algorithm=\"%s\"", algorithm);
+        snprintf (res + res_len, res_size - res_len, ", algorithm=\"%s\"", algorithm);
       }
   }
 
@@ -4921,7 +4917,7 @@ ensure_extension (struct http_stat *hs, const char *ext, int *dt)
 #ifdef TESTING
 
 const char *
-test_parse_range_header(void)
+test_parse_range_header (void)
 {
   unsigned i;
   static const struct {
@@ -4966,7 +4962,7 @@ test_parse_range_header(void)
 }
 
 const char *
-test_parse_content_disposition(void)
+test_parse_content_disposition (void)
 {
   unsigned i;
   static const struct {
@@ -4988,7 +4984,7 @@ test_parse_content_disposition(void)
 filename*1=\"B\"", "AA.ext", true },
   };
 
-  for (i = 0; i < countof(test_array); ++i)
+  for (i = 0; i < countof (test_array); ++i)
     {
       char *filename;
       bool res;
diff --git a/src/xattr.c b/src/xattr.c
index 11247db..f2b208a 100644
--- a/src/xattr.c
+++ b/src/xattr.c
@@ -32,9 +32,11 @@ write_xattr_metadata (const char *name, const char *value, FILE *fp)
 
   if (name && value && fp)
     {
-      retval = fsetxattr (fileno(fp), name, value, strlen(value), 0);
+      retval = fsetxattr (fileno (fp), name, value, strlen (value), 0);
       /* FreeBSD's extattr_set_fd returns the length of the extended attribute. */
       retval = (retval < 0) ? retval : 0;
+      if (retval)
+        DEBUGP (("Failed to set xattr %s.\n", quote(name)));
     }
 
   return retval;
@@ -71,9 +73,7 @@ set_file_metadata (const char *origin_url, const char *referrer_url, FILE *fp)
 
   retval = write_xattr_metadata ("user.xdg.origin.url", escnonprint_uri (origin_url), fp);
   if ((!retval) && referrer_url)
-    {
-      retval = write_xattr_metadata ("user.xdg.referrer.url", escnonprint_uri (referrer_url), fp);
-    }
+    retval = write_xattr_metadata ("user.xdg.referrer.url", escnonprint_uri (referrer_url), fp);
 
   return retval;
 }
diff --git a/src/xattr.h b/src/xattr.h
index 375d34f..e7ebed8 100644
--- a/src/xattr.h
+++ b/src/xattr.h
@@ -30,15 +30,15 @@ int set_file_metadata (const char *origin_url, const char *referrer_url, FILE *f
 #elif defined(__APPLE__)
 /* libc on OS/X has fsetxattr (6 arguments). */
 #  include <sys/xattr.h>
-#  define fsetxattr(file, name, buffer, size, flags) \
-          fsetxattr((file), (name), (buffer), (size), 0, (flags))
+#  define fsetxattr (file, name, buffer, size, flags) \
+          fsetxattr ((file), (name), (buffer), (size), 0, (flags))
 #  define USE_XATTR
 #elif defined(__FreeBSD_version) && (__FreeBSD_version > 500000)
 /* FreeBSD */
 #  include <sys/types.h>
 #  include <sys/extattr.h>
-#  define fsetxattr(file, name, buffer, size, flags) \
-          extattr_set_fd((file), EXTATTR_NAMESPACE_USER, (name), (buffer), (size))
+#  define fsetxattr (file, name, buffer, size, flags) \
+          extattr_set_fd ((file), EXTATTR_NAMESPACE_USER, (name), (buffer), (size))
 #  define USE_XATTR
 #endif
 
-- 
2.8.0.rc3.226.g39d4020

Reply via email to