Hi Tim,

Thank you for the review comments. Please find attached a new version
of the patch prepared according to your guidance. I found it easier to
use plain old strchr instead of strchrnul, I hope that is OK.

Thanks,
Tom

On Thu, Feb 09, 2017 at 09:38:26PM +0100, Tim Rühsen wrote:
> Hi Tom,
> 
> thanks for your work and for signing the FSF copyright assignment !
> 
> Just some smallish things:
> 
> - Please amend the commit message to be in GNU style (see 'git log' for 
> examples). The easiest for us maintainers is when you create the patch with 
> 'git format-patch -1' and add it as attachment.
> 
> - Please add a space between function name and (
> 
> - Please use xfree instead of free
> 
> - Could you amend check_retry_on_http_error so that no memory allocation is 
> used ? E.g. using a strchrnul () loop to separate the status codes.
> 
> - There is a function 'cleanup' in src/init.c to free allocated stuff (if 
> DEBUG_MALLOC is defined). Please free opt.retry_on_http_error there.
> 
> Regards, Tim
> 
> 
> On Montag, 6. Februar 2017 20:42:29 CET Tom Szilagyi wrote:
> > Consider given HTTP response codes as non-fatal, transient errors.
> > Supply a comma-separated list of 3-digit HTTP response codes as
> > argument. Useful to work around special circumstances where retries
> > are required, but the server responds with an error code normally not
> > retried by Wget. Such errors might be 503 (Service Unavailable) and
> > 429 (Too Many Requests). Retries enabled by this option are performed
> > subject to the normal retry timing and retry count limitations of
> > Wget.
> > 
> > Using this option is intended to support special use cases only and is
> > generally not recommended, as it can force retries even in cases where
> > the server is actually trying to decrease its load. Please use it
> > wisely and only if you know what you are doing.
> > 
> > Example use and a starting point for manual testing:
> >   wget --retry-on-http-error=429,503 http://httpbin.org/status/503
> > ---
> >  doc/wget.texi | 15 +++++++++++++++
> >  src/http.c    | 29 +++++++++++++++++++++++++++++
> >  src/init.c    |  1 +
> >  src/main.c    |  1 +
> >  src/options.h |  1 +
> >  5 files changed, 47 insertions(+)
> > 
> > diff --git a/doc/wget.texi b/doc/wget.texi
> > index 8e57aaa..00a8d4b 100644
> > --- a/doc/wget.texi
> > +++ b/doc/wget.texi
> > @@ -1718,6 +1718,21 @@ some few obscure servers, which never send HTTP
> > authentication challenges, but accept unsolicited auth info, say, in
> > addition to form-based authentication.
> > 
> > +@item --retry-on-http-error=@var{code[,code,...]}
> > +Consider given HTTP response codes as non-fatal, transient errors.
> > +Supply a comma-separated list of 3-digit HTTP response codes as
> > +argument. Useful to work around special circumstances where retries
> > +are required, but the server responds with an error code normally not
> > +retried by Wget. Such errors might be 503 (Service Unavailable) and
> > +429 (Too Many Requests). Retries enabled by this option are performed
> > +subject to the normal retry timing and retry count limitations of
> > +Wget.
> > +
> > +Using this option is intended to support special use cases only and is
> > +generally not recommended, as it can force retries even in cases where
> > +the server is actually trying to decrease its load. Please use it
> > +wisely and only if you know what you are doing.
> > +
> >  @end table
> > 
> >  @node HTTPS (SSL/TLS) Options, FTP Options, HTTP Options, Invoking
> > diff --git a/src/http.c b/src/http.c
> > index 3c3c8b2..6822bee 100644
> > --- a/src/http.c
> > +++ b/src/http.c
> > @@ -3982,6 +3982,30 @@ gethttp (const struct url *u, struct url
> > *original_url, struct http_stat *hs, return retval;
> >  }
> > 
> > +/* Check whether the supplied HTTP status code is among those
> > +   listed for the --retry-on-http-error option. */
> > +static bool
> > +check_retry_on_http_error (const int statcode)
> > +{
> > +  if (!opt.retry_on_http_error)
> > +    return false;
> > +
> > +  bool ret = false;
> > +  char * retry_conf = strdup(opt.retry_on_http_error);
> > +  char * tok = strtok(retry_conf, ",");
> > +  while (tok)
> > +    {
> > +      if (atoi(tok) == statcode)
> > +        {
> > +          ret = true;
> > +          break;
> > +        }
> > +      tok = strtok(NULL, ",");
> > +    }
> > +  free(retry_conf);
> > +  return ret;
> > +}
> > +
> >  /* The genuine HTTP loop!  This is the part where the retrieval is
> >     retried, and retried, and retried, and...  */
> >  uerr_t
> > @@ -4319,6 +4343,11 @@ http_loop (const struct url *u, struct url
> > *original_url, char **newloc, logprintf (LOG_NOTQUIET, _("\
> >  Remote file does not exist -- broken link!!!\n"));
> >              }
> > +          else if (check_retry_on_http_error(hstat.statcode))
> > +            {
> > +              printwhat (count, opt.ntry);
> > +              continue;
> > +            }
> >            else
> >              {
> >                logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
> > diff --git a/src/init.c b/src/init.c
> > index 623ef9d..01dcb06 100644
> > --- a/src/init.c
> > +++ b/src/init.c
> > @@ -304,6 +304,7 @@ static const struct {
> >    { "restrictfilenames", NULL,                 
> > cmd_spec_restrict_file_names }, { "retrsymlinks",     &opt.retr_symlinks,  
> >   cmd_boolean },
> >    { "retryconnrefused", &opt.retry_connrefused, cmd_boolean },
> > +  { "retryonhttperror", &opt.retry_on_http_error, cmd_string },
> >    { "robots",           &opt.use_robots,        cmd_boolean },
> >    { "savecookies",      &opt.cookies_output,    cmd_file },
> >    { "saveheaders",      &opt.save_headers,      cmd_boolean },
> > diff --git a/src/main.c b/src/main.c
> > index e393597..581a33d 100644
> > --- a/src/main.c
> > +++ b/src/main.c
> > @@ -404,6 +404,7 @@ static struct cmdline_option option_data[] =
> >      { "restrict-file-names", 0, OPT_BOOLEAN, "restrictfilenames", -1 },
> >      { "retr-symlinks", 0, OPT_BOOLEAN, "retrsymlinks", -1 },
> >      { "retry-connrefused", 0, OPT_BOOLEAN, "retryconnrefused", -1 },
> > +    { "retry-on-http-error", 0, OPT_VALUE, "retryonhttperror", -1 },
> >      { "save-cookies", 0, OPT_VALUE, "savecookies", -1 },
> >      { "save-headers", 0, OPT_BOOLEAN, "saveheaders", -1 },
> >      { IF_SSL ("secure-protocol"), 0, OPT_VALUE, "secureprotocol", -1 },
> > diff --git a/src/options.h b/src/options.h
> > index 8a818ca..3972945 100644
> > --- a/src/options.h
> > +++ b/src/options.h
> > @@ -43,6 +43,7 @@ struct options
> >    bool quiet;                   /* Are we quiet? */
> >    int ntry;                     /* Number of tries per URL */
> >    bool retry_connrefused;       /* Treat CONNREFUSED as non-fatal. */
> > +  char *retry_on_http_error;    /* Treat given HTTP errors as non-fatal. */
> > bool background;              /* Whether we should work in background. */
> > bool ignore_length;           /* Do we heed content-length at all?  */ bool
> > recursive;               /* Are we recursive? */
> 


>From 23251ebc73024e359ed65fac9f2c00920e49b1b5 Mon Sep 17 00:00:00 2001
From: Tom Szilagyi <tomszila...@gmail.com>
Date: Mon, 6 Feb 2017 19:40:02 +0100
Subject: [PATCH] Add support for --retry-on-http-error

* doc/wget.text: Add documentation
* src/http.c: Add function check_retry_on_http_error ()
* src/init.c: Add opt.retry_on_http_error
* src/main.c: Add struct for retry-on-http-error to option_data[]
* src/options.h: Add retry_on_http_error to struct options
---
 doc/wget.texi | 15 +++++++++++++++
 src/http.c    | 21 +++++++++++++++++++++
 src/init.c    |  2 ++
 src/main.c    |  1 +
 src/options.h |  1 +
 5 files changed, 40 insertions(+)

diff --git a/doc/wget.texi b/doc/wget.texi
index 8e57aaa..755f882 100644
--- a/doc/wget.texi
+++ b/doc/wget.texi
@@ -1718,6 +1718,21 @@ some few obscure servers, which never send HTTP authentication
 challenges, but accept unsolicited auth info, say, in addition to
 form-based authentication.
 
+@item --retry-on-http-error=@var{code[,code,...]}
+Consider given HTTP response codes as non-fatal, transient errors.
+Supply a comma-separated list of 3-digit HTTP response codes as
+argument. Useful to work around special circumstances where retries
+are required, but the server responds with an error code normally not
+retried by Wget. Such errors might be 503 (Service Unavailable) and
+429 (Too Many Requests). Retries enabled by this option are performed
+subject to the normal retry timing and retry count limitations of
+Wget.
+
+Using this option is intended to support special use cases only and is
+generally not recommended, as it can force retries even in cases where
+the server is actually trying to decrease its load. Please use wisely
+and only if you know what you are doing.
+
 @end table
 
 @node HTTPS (SSL/TLS) Options, FTP Options, HTTP Options, Invoking
diff --git a/src/http.c b/src/http.c
index 3c3c8b2..bf04cd6 100644
--- a/src/http.c
+++ b/src/http.c
@@ -3982,6 +3982,22 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
   return retval;
 }
 
+/* Check whether the supplied HTTP status code is among those
+   listed for the --retry-on-http-error option. */
+static bool
+check_retry_on_http_error (const int statcode)
+{
+  char * tok = opt.retry_on_http_error;
+  while (tok)
+    {
+      if (atoi (tok) == statcode)
+        return true;
+      if (tok = strchr (tok, ','))
+        ++tok;
+    }
+  return false;
+}
+
 /* The genuine HTTP loop!  This is the part where the retrieval is
    retried, and retried, and retried, and...  */
 uerr_t
@@ -4319,6 +4335,11 @@ http_loop (const struct url *u, struct url *original_url, char **newloc,
               logprintf (LOG_NOTQUIET, _("\
 Remote file does not exist -- broken link!!!\n"));
             }
+          else if (check_retry_on_http_error (hstat.statcode))
+            {
+              printwhat (count, opt.ntry);
+              continue;
+            }
           else
             {
               logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
diff --git a/src/init.c b/src/init.c
index 623ef9d..e6aa673 100644
--- a/src/init.c
+++ b/src/init.c
@@ -304,6 +304,7 @@ static const struct {
   { "restrictfilenames", NULL,                  cmd_spec_restrict_file_names },
   { "retrsymlinks",     &opt.retr_symlinks,     cmd_boolean },
   { "retryconnrefused", &opt.retry_connrefused, cmd_boolean },
+  { "retryonhttperror", &opt.retry_on_http_error, cmd_string },
   { "robots",           &opt.use_robots,        cmd_boolean },
   { "savecookies",      &opt.cookies_output,    cmd_file },
   { "saveheaders",      &opt.save_headers,      cmd_boolean },
@@ -1979,6 +1980,7 @@ cleanup (void)
   xfree (opt.body_file);
   xfree (opt.rejected_log);
   xfree (opt.use_askpass);
+  xfree (opt.retry_on_http_error);
 
 #ifdef HAVE_LIBCARES
 #include <ares.h>
diff --git a/src/main.c b/src/main.c
index e393597..581a33d 100644
--- a/src/main.c
+++ b/src/main.c
@@ -404,6 +404,7 @@ static struct cmdline_option option_data[] =
     { "restrict-file-names", 0, OPT_BOOLEAN, "restrictfilenames", -1 },
     { "retr-symlinks", 0, OPT_BOOLEAN, "retrsymlinks", -1 },
     { "retry-connrefused", 0, OPT_BOOLEAN, "retryconnrefused", -1 },
+    { "retry-on-http-error", 0, OPT_VALUE, "retryonhttperror", -1 },
     { "save-cookies", 0, OPT_VALUE, "savecookies", -1 },
     { "save-headers", 0, OPT_BOOLEAN, "saveheaders", -1 },
     { IF_SSL ("secure-protocol"), 0, OPT_VALUE, "secureprotocol", -1 },
diff --git a/src/options.h b/src/options.h
index 8a818ca..3972945 100644
--- a/src/options.h
+++ b/src/options.h
@@ -43,6 +43,7 @@ struct options
   bool quiet;                   /* Are we quiet? */
   int ntry;                     /* Number of tries per URL */
   bool retry_connrefused;       /* Treat CONNREFUSED as non-fatal. */
+  char *retry_on_http_error;    /* Treat given HTTP errors as non-fatal. */
   bool background;              /* Whether we should work in background. */
   bool ignore_length;           /* Do we heed content-length at all?  */
   bool recursive;               /* Are we recursive? */
-- 
2.1.4

Reply via email to