Hello!

I think that sometimes it could help to keep downloaded Metalink's files which 
have a bad hash.

The default wget behaviour is to delete such files.

This patch provides a way to keep files which have a bad hash through the 
option --keep-badhash. It appends the suffix .badhash to the file name, except 
without overwriting existing files. In the latter case, an unique suffix is 
appended after .badhash.

I made this patch working on the following branch:
master (latest 20cac2c5ab3d63aacfba35fb10878a2d490e2377)
git://git.savannah.gnu.org/wget.git

What do you think?

-- 
Matthew White <[email protected]>
>From 94ceddf1019f32181a317cba6f8918d7d73100cf Mon Sep 17 00:00:00 2001
From: Matthew White <[email protected]>
Date: Thu, 28 Jul 2016 20:21:48 +0200
Subject: [PATCH] Implement --keep-badhash to keep Metalink's files with a
 bad hash

With --keep-badhash, append .badhash to Metalink's files with checksum
mismatch, except without overwriting existing files.

Without --keep-badhash, remove downloaded files with checksum mismatch
(this conforms to the old behaviour).

* src/init.c: keepbadhash
* src/main.c: keep-badhash
* src/options.h: keep_badhash
* doc/wget.texi: --keep-badhash
* src/metalink.h: badhash_suffix(), badhash_or_remove()
* src/metalink.c: On error, append .badhash if opt.keep_badhash
---
 doc/wget.texi  |  6 ++++++
 src/init.c     |  1 +
 src/main.c     |  3 +++
 src/metalink.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++-------
 src/metalink.h |  3 +++
 src/options.h  |  1 +
 6 files changed, 60 insertions(+), 7 deletions(-)

diff --git a/doc/wget.texi b/doc/wget.texi
index f6f0fbc..909696c 100644
--- a/doc/wget.texi
+++ b/doc/wget.texi
@@ -512,6 +512,12 @@ href if none was specified.
 Downloads files covered in local Metalink @var{file}. Metalink version 3
 and 4 are supported.
 
+@cindex keep-badhash
+@item --keep-badhash
+Keeps downloaded Metalink's files with a bad hash. It appends .badhash
+to the name of Metalink's files which have a checksum mismatch, except
+without overwriting existing files.
+
 @cindex metalink-over-http
 @item --metalink-over-http
 Issues HTTP HEAD request instead of GET and extracts Metalink metadata
diff --git a/src/init.c b/src/init.c
index 06d2e44..dc2d9a6 100644
--- a/src/init.c
+++ b/src/init.c
@@ -237,6 +237,7 @@ static const struct {
   { "input-metalink",   &opt.input_metalink,    cmd_file },
 #endif
   { "iri",              &opt.enable_iri,        cmd_boolean },
+  { "keepbadhash",      &opt.keep_badhash,      cmd_boolean },
   { "keepsessioncookies", &opt.keep_session_cookies, cmd_boolean },
   { "limitrate",        &opt.limit_rate,        cmd_bytes },
   { "loadcookies",      &opt.cookies_input,     cmd_file },
diff --git a/src/main.c b/src/main.c
index 4d69e03..57c7703 100644
--- a/src/main.c
+++ b/src/main.c
@@ -343,6 +343,7 @@ static struct cmdline_option option_data[] =
     { "input-metalink", 0, OPT_VALUE, "input-metalink", -1 },
 #endif
     { "iri", 0, OPT_BOOLEAN, "iri", -1 },
+    { "keep-badhash", 0, OPT_BOOLEAN, "keepbadhash", -1 },
     { "keep-session-cookies", 0, OPT_BOOLEAN, "keepsessioncookies", -1 },
     { "level", 'l', OPT_VALUE, "reclevel", -1 },
     { "limit-rate", 0, OPT_VALUE, "limitrate", -1 },
@@ -703,6 +704,8 @@ Download:\n"),
        --unlink                    remove file before clobber\n"),
 #ifdef HAVE_METALINK
     N_("\
+       --keep-badhash              keep files with checksum mismatch (append .badhash)\n"),
+    N_("\
        --metalink-over-http        use Metalink metadata from HTTP response headers\n"),
     N_("\
        --preferred-location        preferred location for Metalink resources\n"),
diff --git a/src/metalink.c b/src/metalink.c
index f21f3aa..bae8135 100644
--- a/src/metalink.c
+++ b/src/metalink.c
@@ -117,13 +117,13 @@ retrieve_from_metalink (const metalink_t* metalink)
           retr_err = METALINK_RETR_ERROR;
 
           /* If output_stream is not NULL, then we have failed on
-             previous resource and are retrying. Thus, remove the file.  */
+             previous resource and are retrying. Thus, rename/remove
+             the file.  */
           if (output_stream)
             {
               fclose (output_stream);
               output_stream = NULL;
-              if (unlink (filename))
-                logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
+              badhash_or_remove (filename);
               xfree (filename);
             }
 
@@ -489,15 +489,13 @@ gpg_skip_verification:
 #endif
       last_retr_err = retr_err == RETROK ? last_retr_err : retr_err;
 
-      /* Remove the file if error encountered or if option specified.
+      /* Rename the file if error encountered; remove if option specified.
          Note: the file has been downloaded using *_loop. Therefore, it
          is not necessary to keep the file for continuated download.  */
       if ((retr_err != RETROK || opt.delete_after)
            && filename != NULL && file_exists_p (filename))
         {
-          logprintf (LOG_VERBOSE, _("Removing %s.\n"), quote (filename));
-          if (unlink (filename))
-            logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
+          badhash_or_remove (filename);
         }
       if (output_stream)
         {
@@ -515,6 +513,47 @@ gpg_skip_verification:
   return last_retr_err;
 }
 
+/* Append the suffix ".badhash" to the file NAME, except without
+   overwriting an existing file with that name and suffix.  */
+void
+badhash_suffix (char *name)
+{
+  char *bhash, *uname;
+  bhash = malloc (strlen (name) + strlen (".badhash") + 1);
+  strcat (strcpy (bhash, name), ".badhash");
+  uname = unique_name (bhash, false);
+
+  logprintf (LOG_VERBOSE, _("Renaming ‘%s’ to ‘%s’.\n"), name, uname);
+
+  if (link (name, uname))
+    logprintf (LOG_NOTQUIET, "link: %s\n", strerror (errno));
+  else if (unlink (name))
+    logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
+
+  xfree (bhash);
+  xfree (uname);
+}
+
+/* Append the suffix ".badhash" to the file NAME, except without
+   overwriting an existing file with that name and suffix.
+
+   Remove the file NAME if the option --delete-after is specified, or
+   if the option --keep-badhash isn't set.  */
+void
+badhash_or_remove (char *name)
+{
+  if (opt.delete_after || !opt.keep_badhash)
+    {
+      logprintf (LOG_VERBOSE, _("Removing %s.\n"), quote (name));
+      if (unlink (name))
+        logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
+    }
+  else
+    {
+      badhash_suffix(name);
+    }
+}
+
 int metalink_res_cmp (const void* v1, const void* v2)
 {
   const metalink_resource_t *res1 = *(metalink_resource_t **) v1,
diff --git a/src/metalink.h b/src/metalink.h
index e98c210..020fdf5 100644
--- a/src/metalink.h
+++ b/src/metalink.h
@@ -47,6 +47,9 @@ uerr_t retrieve_from_metalink (const metalink_t *metalink);
 
 int metalink_res_cmp (const void *res1, const void *res2);
 
+void badhash_suffix (char *name);
+void badhash_or_remove (char *name);
+
 bool find_key_value (const char *start,
                      const char *end,
                      const char *key,
diff --git a/src/options.h b/src/options.h
index b2e31a8..63b9bba 100644
--- a/src/options.h
+++ b/src/options.h
@@ -260,6 +260,7 @@ struct options
   bool cookies;                 /* whether cookies are used. */
   char *cookies_input;          /* file we're loading the cookies from. */
   char *cookies_output;         /* file we're saving the cookies to. */
+  bool keep_badhash;            /* Keep files with checksum mismatch. */
   bool keep_session_cookies;    /* whether session cookies should be
                                    saved and loaded. */
 
-- 
2.7.3

Reply via email to