From: Jake <[email protected]>

In GNU wget the --spider[1] first issues a HEAD request[2], then if HEAD fails, 
issues a GET request[3].
In BusyBox wget, only a GET request is sent. All webservers including BB httpd 
and uhttpd supports the HEAD.
The patch changes GET to HEAD e.g. get the file size only without downloading 
first.
This is still not totally compatible with GNU wget because it does not retry 
with GET if HEAD fails.
Potentially someone may use the --spider to call a GET only API, so they may be 
affected.
But this is incorrect usage while others may expect that the spider uses HEAD 
and don't expect a download.

For testing use a CGI script /www/cgi-bin/echo.sh:

#!/bin/sh
CONTENT=$(cat -)
printf "Content-Length: ${#CONTENT}\r\n"
printf "Content-Type: text/html\r\n"
printf "REQUEST_METHOD: $REQUEST_METHOD\r\n"
printf "CONTENT_TYPE: $CONTENT_TYPE\r\n"
printf "CONTENT_LENGTH: $CONTENT_LENGTH\r\n"
printf "\r\n"
printf "$CONTENT"

Then call it:

$ busybox wget -O - -S -q --spider  http://localhost:8080/cgi-bin/echo.sh
HTTP/1.0 200 OK
Content-Length: 0
Content-Type: text/html
REQUEST_METHOD: HEAD
CONTENT_TYPE:
CONTENT_LENGTH:

When both post-data and spider options then gnu wget behaves confusing[4].
It sets Content-Type: application/x-www-form-urlencoded as for post-data but 
anyway sends a HEAD request:

$ wget -O - -S -q --post-data="trololo" --spider  
http://localhost:8080/cgi-bin/echo.shest.sh
HTTP/1.0 200 OK
Content-Length: 7
Content-Type: text/html
REQUEST_METHOD: HEAD
CONTENT_TYPE: application/x-www-form-urlencoded
CONTENT_LENGTH:

Instead, this version will send the request as POST but still skip it's 
response body:

$ busybox wget -O - -S -q --post-data="trololo" --spider  
http://localhost:8080/cgi-bin/echo.sh
HTTP/1.0 200 OK
Content-Length: 7
Content-Type: text/html
REQUEST_METHOD: POST
CONTENT_TYPE: application/x-www-form-urlencoded
CONTENT_LENGTH: 7

This would be useful for heavy API calls but we have to wait what GNU wget 
author will say.
We may change this behaviour later.

[1] https://www.gnu.org/software/wget/manual/wget.html#index-spider
[2] https://httpwg.org/specs/rfc7231.html#HEAD
[3] https://git.savannah.gnu.org/cgit/wget.git/tree/src/http.c#n4304
[4] https://savannah.gnu.org/bugs/index.php?56808

function                                             old     new   delta
wget_main                                           2797    2824     +27
.rodata                                            10213   10217      +4
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/0 up/down: 31/0)               Total: 31 bytes
text    data     bss     dec     hex filename
177416    3971    1688  183075   2cb23 busybox_old
177447    3971    1688  183106   2cb42 busybox_unstripped

Signed-off-by: Sergey Ponomarev <[email protected]>
---
 networking/wget.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/networking/wget.c b/networking/wget.c
index 9ec0e67b9..0006f8807 100644
--- a/networking/wget.c
+++ b/networking/wget.c
@@ -242,6 +242,7 @@ static const char wget_user_headers[] ALIGN1 =
 
 /* Globals */
 struct globals {
+       const char *method;
        off_t content_len;        /* Content-length of the file */
        off_t beg_range;          /* Range at which continue begins */
 #if ENABLE_FEATURE_WGET_STATUSBAR
@@ -1220,12 +1221,13 @@ static void download_one_url(const char *url)
 #endif
                /* Send HTTP request */
                if (use_proxy) {
-                       SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
+                       SENDFMT(sfp, "%s %s://%s/%s HTTP/1.1\r\n",
+                               G.method,
                                target.protocol, target.host,
                                target.path);
                } else {
                        SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
-                               (option_mask32 & WGET_OPT_POST) ? "POST" : 
"GET",
+                               G.method,
                                target.path);
                }
                if (!USR_HEADER_HOST)
@@ -1582,6 +1584,15 @@ IF_DESKTOP(      "no-parent\0"        No_argument       
"\xf0")
 #endif
        argv += optind;
 
+       if (option_mask32 & WGET_OPT_POST) {
+               G.method = "POST";
+       } else if (option_mask32 & WGET_OPT_SPIDER) {
+               /* Note: GNU wget --spider sends a HEAD and if it failed 
repeats with a GET */
+               G.method = "HEAD";
+       } else {
+               G.method = "GET";
+       }
+
 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
        if (headers_llist) {
                int size = 0;
-- 
2.34.1

_______________________________________________
busybox mailing list
[email protected]
http://lists.busybox.net/mailman/listinfo/busybox

Reply via email to