From 7e60d975ce83ecb83eb775c393dbd0004a3d53d3 Mon Sep 17 00:00:00 2001
From: Darshit Shah <darnir@gmail.com>
Date: Mon, 17 Jun 2013 00:16:50 +0530
Subject: [PATCH] Follow RFC 2616 and httpbis specifications when handling
 redirects

---
 doc/ChangeLog |  6 ++++++
 doc/wget.texi | 35 +++++++++++++++++++----------------
 src/ChangeLog | 12 ++++++++++++
 src/http.c    | 33 ++++++++++++++++++++++++++++-----
 src/main.c    |  8 ++++++++
 src/retr.c    | 38 +++++++++++++++++++-------------------
 6 files changed, 92 insertions(+), 40 deletions(-)

diff --git a/doc/ChangeLog b/doc/ChangeLog
index 1b0173b..5e29c54 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,9 @@
+2013-06-17  Darshit Shah  <darnir@gmail.com>
+
+	* wget.texi (POST): Explain the new redirection rules.
+	* wget.texi (Other HTTP Methods): Same.
+	* wget.texi (body-data): Fix typo in description.
+
 2013-05-10  Darshit Shah <darnir@gmail.com>  (tiny change)
 
 	* wget.texi (No of tries): Fix typo to make it clear that --tries
diff --git a/doc/wget.texi b/doc/wget.texi
index c2230a9..23c3067 100644
--- a/doc/wget.texi
+++ b/doc/wget.texi
@@ -1466,23 +1466,24 @@ simply transmit whatever data is provided to it. Most servers however expect
 the POST data to be in the above format when processing HTML Forms.
 
 Please be aware that Wget needs to know the size of the POST data in
-advance.  Therefore the argument to @code{--post-file} must be a regular
+advance. Therefore the argument to @code{--post-file} must be a regular
 file; specifying a FIFO or something like @file{/dev/stdin} won't work.
 It's not quite clear how to work around this limitation inherent in
-HTTP/1.0.  Although HTTP/1.1 introduces @dfn{chunked} transfer that
+HTTP/1.0. Although HTTP/1.1 introduces @dfn{chunked} transfer that
 doesn't require knowing the request length in advance, a client can't
-use chunked unless it knows it's talking to an HTTP/1.1 server.  And it
+use chunked unless it knows it's talking to an HTTP/1.1 server. And it
 can't know that until it receives a response, which in turn requires the
 request to have been completed -- a chicken-and-egg problem.
 
-Note: if Wget is redirected after the POST request is completed, it
-will not send the POST data to the redirected URL.  This is because
-URLs that process POST often respond with a redirection to a regular
-page, which does not desire or accept POST.  It is not completely
-clear that this behavior is optimal; if it doesn't work out, it might
-be changed in the future.
+Note: As of versin 1.15 if Wget is redirected after the POST request is
+completed, its behaviour will depend on the response code returned by the
+server. In case of a 301 Moved Permanently, 302 Moved Temporarily or
+307 Temporary Redirect, Wget will, in accordance with RFC2616, continue
+to send a POST request.
+In case a server wants the client to change the Request method upon
+redirection, it should send a 303 See Other response code.
 
-This example shows how to log to a server using POST and then proceed to
+This example shows how to log in to a server using POST and then proceed to
 download the desired pages, presumably only accessible to authorized
 users:
 
@@ -1515,8 +1516,8 @@ Method to the server.
 @item --body-data=@var{Data-String}
 @itemx --body-file=@var{Data-File}
 Must be set when additional data needs to be sent to the server along with the
-Method specified using @samp{--method}. @samp{--post-data} sends @var{string} as
-data, whereas @samp{--post-file} sends the contents of @var{file}. Other than that,
+Method specified using @samp{--method}. @samp{--body-data} sends @var{string} as
+data, whereas @samp{--body-file} sends the contents of @var{file}. Other than that,
 they work in exactly the same way.
 
 Currently, @samp{--body-file} is @emph{not} for transmitting files as a whole.
@@ -1528,10 +1529,12 @@ BODY Data in advance, and hence the argument to @samp{--body-file} should be a
 regular file. See @samp{--post-file} for a more detailed explanation.
 Only one of @samp{--body-data} and @samp{--body-file} should be specified.
 
-Wget handles these requests in the same way that it handles @samp{--post-data}
-and @samp{--post-file}. If you invoke Wget with @samp{--method=POST} and the server
-responds with a redirect request, then Wget will revert to a GET request during the
-redirection as is explained in @samp{--post-data}.
+If Wget is redirected after the request is completed, Wget will suspend the current
+method and send a GET request till the redirection is completed. This is true for
+all redirection response codes except 307 Temporary Redirect which is used to
+explcitly specify that the request method should @emph{not} change. Another exception
+is when the method is set to @code{POST}, in which case the redirection rules
+specified under @samp{--post-data} are followed.
 
 @cindex Content-Disposition
 @item --content-disposition
diff --git a/src/ChangeLog b/src/ChangeLog
index 32f3b82..f609698 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,15 @@
+2013-06-13  Darshit Shah  <darnir@gmail.com>
+
+	* http.c (gethttp): Follow RFC 2616 and httpbis specifications when
+	handling redirections. Do not suspend the method on 301/302 redirects.
+	(gethttp): If method if not GET, we do not intend to download
+	anything.
+	* main.c (main): Set spider mode when opt.method is HEAD. This will
+	prevent Wget from downloading any file.
+	* retr.c (SUSPEND_METHOD): Rename macro SUSPEND_POST_DATA to
+	SUSPEND_METHOD to more accurately reflect its use. Similarly rename
+	related variables.
+
 2013-05-14 Bykov Aleksey <gnfalex@rambler.ru>
 
 	* warc.c (warc_tempfile): For fix "Could not open temporary WARC manifest
diff --git a/src/http.c b/src/http.c
index 644b8f8..1fcf72b 100644
--- a/src/http.c
+++ b/src/http.c
@@ -2641,12 +2641,35 @@ read_header:
           /* From RFC2616: The status codes 303 and 307 have
              been added for servers that wish to make unambiguously
              clear which kind of reaction is expected of the client.
-             
+
              A 307 should be redirected using the same method,
              in other words, a POST should be preserved and not
-             converted to a GET in that case. */
-          if (statcode == HTTP_STATUS_TEMPORARY_REDIRECT)
-            return NEWLOCATION_KEEP_POST;
+             converted to a GET in that case.
+
+             With strict adherence to RFC2616, POST requests are not
+             converted to a GET request on 301 Permanent Redirect
+             or 302 Temporary Redirect.
+
+             A switch may be provided later based on the HTTPbis draft
+             that allows clients to convert POST requests to GET
+             requests on 301 and 302 response codes. */
+          switch (statcode)
+          {
+            case HTTP_STATUS_TEMPORARY_REDIRECT:
+              return NEWLOCATION_KEEP_POST;
+              break;
+            case HTTP_STATUS_MOVED_PERMANENTLY:
+              if (opt.method && strcasecmp (opt.method, "post") != 0)
+                return NEWLOCATION_KEEP_POST;
+              break;
+            case HTTP_STATUS_MOVED_TEMPORARILY:
+              if (opt.method && strcasecmp (opt.method, "post") != 0)
+                return NEWLOCATION_KEEP_POST;
+              break;
+            default:
+              return NEWLOCATION;
+              break;
+          }
           return NEWLOCATION;
         }
     }
@@ -2755,7 +2778,7 @@ read_header:
     }
 
   /* Return if we have no intention of further downloading.  */
-  if ((!(*dt & RETROKF) && !opt.content_on_error) || head_only)
+  if ((!(*dt & RETROKF) && !opt.content_on_error) || head_only || (opt.method && strcasecmp (opt.method, "GET") != 0))
     {
       /* In case the caller cares to look...  */
       hs->len = 0;
diff --git a/src/main.c b/src/main.c
index 2b42d2d..ac1b005 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1397,6 +1397,14 @@ for details.\n\n"));
         }
     }
 
+  /* Set various options as required for opt.method. */
+
+  /* When user specifies HEAD as the method, we do not wish to download any
+     files. Hence, set wget to run in spider mode.
+  */
+  if (opt.method && strcasecmp (opt.method, "HEAD") == 0)
+    setoptval ("spider", "1", "spider");
+
   /* Convert post_data to body-data and post_file_name to body-file options.
      This is required so as to remove redundant code later on in gethttp().
      The --post-data and --post-file options may also be removed in
diff --git a/src/retr.c b/src/retr.c
index 9002b0e..3d51ef9 100644
--- a/src/retr.c
+++ b/src/retr.c
@@ -677,23 +677,23 @@ calc_rate (wgint bytes, double secs, int *units)
 }
 
 
-#define SUSPEND_POST_DATA do {                  \
-  post_data_suspended = true;                   \
-  saved_post_data = opt.body_data;              \
-  saved_post_file_name = opt.body_file;         \
+#define SUSPEND_METHOD do {                     \
+  method_suspended = true;                      \
+  saved_body_data = opt.body_data;              \
+  saved_body_file_name = opt.body_file;         \
   saved_method = opt.method;                    \
   opt.body_data = NULL;                         \
   opt.body_file = NULL;                         \
   opt.method = NULL;                            \
 } while (0)
 
-#define RESTORE_POST_DATA do {                          \
-  if (post_data_suspended)                              \
+#define RESTORE_METHOD do {                             \
+  if (method_suspended)                                 \
     {                                                   \
-      opt.body_data = saved_post_data;                  \
-      opt.body_file = saved_post_file_name;             \
+      opt.body_data = saved_body_data;                  \
+      opt.body_file = saved_body_file_name;             \
       opt.method = saved_method;                        \
-      post_data_suspended = false;                      \
+      method_suspended = false;                         \
     }                                                   \
 } while (0)
 
@@ -721,10 +721,10 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
   char *local_file;
   int redirection_count = 0;
 
-  bool post_data_suspended = false;
-  char *saved_post_data = NULL;
+  bool method_suspended = false;
+  char *saved_body_data = NULL;
   char *saved_method = NULL;
-  char *saved_post_file_name = NULL;
+  char *saved_body_file_name = NULL;
 
   /* If dt is NULL, use local storage.  */
   if (!dt)
@@ -765,7 +765,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
                      proxy, error);
           xfree (url);
           xfree (error);
-          RESTORE_POST_DATA;
+          RESTORE_METHOD;
           result = PROXERR;
           goto bail;
         }
@@ -774,7 +774,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
           logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
           url_free (proxy_url);
           xfree (url);
-          RESTORE_POST_DATA;
+          RESTORE_METHOD;
           result = PROXERR;
           goto bail;
         }
@@ -858,7 +858,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
           xfree (url);
           xfree (mynewloc);
           xfree (error);
-          RESTORE_POST_DATA;
+          RESTORE_METHOD;
           goto bail;
         }
 
@@ -880,7 +880,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
             }
           xfree (url);
           xfree (mynewloc);
-          RESTORE_POST_DATA;
+          RESTORE_METHOD;
           result = WRONGCODE;
           goto bail;
         }
@@ -903,8 +903,8 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
 	 RFC2616 HTTP/1.1 introduces code 307 Temporary Redirect
 	 specifically to preserve the method of the request.
 	 */
-      if (result != NEWLOCATION_KEEP_POST && !post_data_suspended)
-        SUSPEND_POST_DATA;
+      if (result != NEWLOCATION_KEEP_POST && !method_suspended)
+        SUSPEND_METHOD;
 
       goto redirected;
     }
@@ -967,7 +967,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
       xfree (url);
     }
 
-  RESTORE_POST_DATA;
+  RESTORE_METHOD;
 
 bail:
   if (register_status)
-- 
1.8.3.1

