Hi,

https://bugs.debian.org/744725

wget is appending instead of replacing with "--header"

To quote current big wget docs
http://www.gnu.org/software/wget/manual/html_node/HTTP-Options.html

<quote>
‘--header=header-line’
Send header-line along with the rest of the headers in each HTTP request. The 
supplied header is sent as-is, which means it must contain name and value 
separated by colon, and must not contain newlines.

You may define more than one additional header by specifying ‘--header’ more 
than once.

wget --header='Accept-Charset: iso-8859-2' \
     --header='Accept-Language: hr'        \
       http://fly.srk.fer.hr/
Specification of an empty string as the header value will clear all previous 
user-defined headers.

As of Wget 1.10, this option can be used to override headers otherwise 
generated automatically. This example instructs Wget to connect to localhost, 
but to specify ‘foo.bar’ in the Host header:

wget --header="Host: foo.bar" http://localhost/
In versions of Wget prior to 1.10 such use of ‘--header’ caused sending of 
duplicate headers.
</quote>

I'm attaching 2 variants to implement the "override headers" part that was
introduced in the big wget in 1.10. As i'm just now seeing that "clear all
previous user headers" so that part is not implemented and i'm not sure how
useful it is in practise (i would not blindly add it).
Neither variant make me particularly happy and i'm not sure how to phrase
it better right now.

Better ideas?
Denys, is one of this variants good enough to apply as is for now?

TIA and cheers,
Bernhard
function                                             old     new   delta
wget_main                                           2967    3108    +141
.rodata                                           155809  155872     +63
wget_user_headers                                      -      62     +62
retrieve_file_data                                   435     456     +21
progress_meter                                       120     126      +6
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 4/0 up/down: 293/0)             Total: 293 bytes
   text   data    bss    dec    hexfilename
 822042   4123   9552 835717  cc085busybox_old
 822273   4123   9552 835948  cc16cbusybox_unstripped
diff --git a/networking/wget.c b/networking/wget.c
index 1013f66..3cfe706 100644
--- a/networking/wget.c
+++ b/networking/wget.c
@@ -55,6 +55,33 @@ static const char P_FTP[] = "ftp";
 static const char P_HTTP[] = "http";
 static const char P_HTTPS[] = "https";
 
+#if ENABLE_FEATURE_WGET_LONG_OPTIONS
+/* User-specified headers prevent using our corresponding built-in headers.  */
+enum {
+	HDR_HOST = (1<<0),
+	HDR_USER_AGENT = (1<<1),
+	HDR_RANGE = (1<<2),
+	HDR_AUTHORIZATION = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
+	HDR_PROXY_AUTHORIZATION = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
+};
+static const char wget_user_headers[] ALIGN1 =
+	"Host:\0User-Agent:\0Range:\0"
+# if ENABLE_FEATURE_WGET_AUTHENTICATION
+	"Authorization:\0Proxy-Authorization:\0"
+# endif
+	;
+# define USR_HEADER_HOST (G.user_headers & HDR_HOST)
+# define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
+# define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
+# define USR_HEADER_AUTHORIZATION (G.user_headers & HDR_AUTHORIZATION)
+# define USR_HEADER_PROXY_AUTHORIZATION (G.user_headers & HDR_PROXY_AUTHORIZATION)
+#else /* No long options, no user-headers :( */
+# define USR_HEADER_HOST 0
+# define USR_HEADER_USER_AGENT 0
+# define USR_HEADER_RANGE 0
+# define USR_HEADER_AUTHORIZATION 0
+# define USR_HEADER_PROXY_AUTHORIZATION 0
+#endif
 
 /* Globals */
 struct globals {
@@ -69,6 +96,7 @@ struct globals {
 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
 	char *post_data;
 	char *extra_headers;
+	unsigned char user_headers; /* Headers mentioned by the user */
 #endif
 	char *fname_out;        /* where to direct output (-O) */
 	const char *proxy_flag; /* Use proxies if env vars are set */
@@ -722,6 +750,9 @@ static void NOINLINE retrieve_file_data(FILE *dfp)
 	progress_meter(PROGRESS_END);
 }
 
+#define httpcmd(fp, fmt, ...) \
+	{ log_io("> " fmt, ##__VA_ARGS__); fprintf(fp, fmt, ##__VA_ARGS__); }
+
 static void download_one_url(const char *url)
 {
 	bool use_proxy;                 /* Use proxies if env vars are set  */
@@ -830,43 +861,46 @@ static void download_one_url(const char *url)
 #endif
 		/* Send HTTP request */
 		if (use_proxy) {
-			fprintf(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
+			httpcmd(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
 				target.protocol, target.host,
 				target.path);
 		} else {
-			fprintf(sfp, "%s /%s HTTP/1.1\r\n",
+			httpcmd(sfp, "%s /%s HTTP/1.1\r\n",
 				(option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
 				target.path);
 		}
-
-		fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
-			target.host, G.user_agent);
+		if (!USR_HEADER_HOST)
+			httpcmd(sfp, "Host: %s\r\n", target.host);
+		if (!USR_HEADER_USER_AGENT)
+			httpcmd(sfp, "User-Agent: %s\r\n", G.user_agent);
 
 		/* Ask server to close the connection as soon as we are done
 		 * (IOW: we do not intend to send more requests)
 		 */
-		fprintf(sfp, "Connection: close\r\n");
+		httpcmd(sfp, "Connection: close\r\n");
 
 #if ENABLE_FEATURE_WGET_AUTHENTICATION
-		if (target.user) {
-			fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
+		if (target.user && !USR_HEADER_AUTHORIZATION) {
+			httpcmd(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
 				base64enc(target.user));
 		}
-		if (use_proxy && server.user) {
-			fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
+		if (use_proxy && server.user && !USR_HEADER_PROXY_AUTHORIZATION) {
+			httpcmd(sfp, "Proxy-Authorization: Basic %s\r\n",
 				base64enc(server.user));
 		}
 #endif
 
-		if (G.beg_range != 0)
-			fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
+		if (G.beg_range != 0 && !USR_HEADER_RANGE)
+			httpcmd(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
 
 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
-		if (G.extra_headers)
+		if (G.extra_headers) {
+			log_io(G.extra_headers);
 			fputs(G.extra_headers, sfp);
+		}
 
 		if (option_mask32 & WGET_OPT_POST_DATA) {
-			fprintf(sfp,
+			httpcmd(sfp,
 				"Content-Type: application/x-www-form-urlencoded\r\n"
 				"Content-Length: %u\r\n"
 				"\r\n"
@@ -876,7 +910,7 @@ static void download_one_url(const char *url)
 		} else
 #endif
 		{
-			fprintf(sfp, "\r\n");
+			httpcmd(sfp, "\r\n");
 		}
 
 		fflush(sfp);
@@ -1115,7 +1149,21 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
 		}
 		G.extra_headers = cp = xmalloc(size);
 		while (headers_llist) {
-			cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
+			int usr = -1, idx = 0;
+			const char* words = wget_user_headers;
+			size = sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
+			/* a bit like index_in_substrings but don't match full key */
+			while (*words) {
+				if (strstr(cp, words) == cp) {
+					usr = idx;
+					break;
+				}
+				++idx;
+				words += strlen(words) + 1;
+			}
+			if (usr != -1)
+				G.user_headers |= (1 << usr);
+			cp += size;
 		}
 	}
 #endif
function                                             old     new   delta
httpcmd                                                -     257    +257
wget_main                                           2967    3129    +162
wget_user_headers                                      -      62     +62
.rodata                                           155809  155871     +62
retrieve_file_data                                   435     456     +21
progress_meter                                       120     126      +6
------------------------------------------------------------------------------
(add/remove: 2/0 grow/shrink: 4/0 up/down: 570/0)             Total: 570 bytes
diff --git a/networking/wget.c b/networking/wget.c
index 1013f66..de7f3e1 100644
--- a/networking/wget.c
+++ b/networking/wget.c
@@ -36,13 +36,14 @@
 
 #include "libbb.h"
 
-#if 0
+#undef DEBUG
+#define DEBUG 0
+#if DEBUG
 # define log_io(...) bb_error_msg(__VA_ARGS__)
 #else
 # define log_io(...) ((void)0)
 #endif
 
-
 struct host_info {
 	char *allocated;
 	const char *path;
@@ -55,6 +56,15 @@ static const char P_FTP[] = "ftp";
 static const char P_HTTP[] = "http";
 static const char P_HTTPS[] = "https";
 
+#if ENABLE_FEATURE_WGET_LONG_OPTIONS
+/* User-specified headers prevent using our corresponding built-in headers.  */
+static const char wget_user_headers[] ALIGN1 =
+	"Host:\0User-Agent:\0Range:\0"
+# if ENABLE_FEATURE_WGET_AUTHENTICATION
+	"Authorization:\0Proxy-Authorization:\0"
+# endif
+	;
+#endif
 
 /* Globals */
 struct globals {
@@ -69,6 +79,7 @@ struct globals {
 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
 	char *post_data;
 	char *extra_headers;
+	unsigned char user_headers; /* Headers mentioned by the user */
 #endif
 	char *fname_out;        /* where to direct output (-O) */
 	const char *proxy_flag; /* Use proxies if env vars are set */
@@ -722,6 +733,41 @@ static void NOINLINE retrieve_file_data(FILE *dfp)
 	progress_meter(PROGRESS_END);
 }
 
+static void httpcmd(FILE *fp, const char *format, ...)
+{
+	va_list ap;
+	char *fmt;
+# if ENABLE_FEATURE_WGET_LONG_OPTIONS
+	const char* words = wget_user_headers;
+	int idx = 0;
+
+	/* a bit like index_in_substrings but don't match full key */
+	while (*words) {
+		if (strstr(format, words) == format
+			&& (G.user_headers & (1 << idx))) {
+			/* Prefer the user supplied header */
+			return;
+		}
+		words += strlen(words) + 1;
+		++idx;
+	}
+#endif
+	fmt = xasprintf("%s\r\n", format);
+	va_start(ap, format);
+	if (DEBUG) {
+		va_list ap_dbg;
+		char *fmt_dbg = xasprintf("> %s", format);
+
+		va_copy(ap_dbg, ap);
+		bb_verror_msg(fmt_dbg, ap_dbg, NULL);
+		va_end(ap_dbg);
+		free(fmt_dbg);
+	}
+	vfprintf(fp, fmt, ap);
+	va_end(ap);
+	free(fmt);
+}
+
 static void download_one_url(const char *url)
 {
 	bool use_proxy;                 /* Use proxies if env vars are set  */
@@ -830,53 +876,53 @@ static void download_one_url(const char *url)
 #endif
 		/* Send HTTP request */
 		if (use_proxy) {
-			fprintf(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
+			httpcmd(sfp, "GET %s://%s/%s HTTP/1.1",
 				target.protocol, target.host,
 				target.path);
 		} else {
-			fprintf(sfp, "%s /%s HTTP/1.1\r\n",
+			httpcmd(sfp, "%s /%s HTTP/1.1",
 				(option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
 				target.path);
 		}
-
-		fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
-			target.host, G.user_agent);
+		httpcmd(sfp, "Host: %s", target.host);
+		httpcmd(sfp, "User-Agent: %s", G.user_agent);
 
 		/* Ask server to close the connection as soon as we are done
 		 * (IOW: we do not intend to send more requests)
 		 */
-		fprintf(sfp, "Connection: close\r\n");
+		httpcmd(sfp, "Connection: close");
 
 #if ENABLE_FEATURE_WGET_AUTHENTICATION
 		if (target.user) {
-			fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
+			httpcmd(sfp, "Proxy-Authorization: Basic %s"+6,
 				base64enc(target.user));
 		}
 		if (use_proxy && server.user) {
-			fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
+			httpcmd(sfp, "Proxy-Authorization: Basic %s",
 				base64enc(server.user));
 		}
 #endif
 
 		if (G.beg_range != 0)
-			fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
+			httpcmd(sfp, "Range: bytes=%"OFF_FMT"u-", G.beg_range);
 
 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
-		if (G.extra_headers)
+		if (G.extra_headers) {
+			/* the debug output is not pretty because it's different to
+			 * all the other httpcmd; Not worth the bits though */
+			log_io(G.extra_headers);
 			fputs(G.extra_headers, sfp);
+		}
 
 		if (option_mask32 & WGET_OPT_POST_DATA) {
-			fprintf(sfp,
-				"Content-Type: application/x-www-form-urlencoded\r\n"
-				"Content-Length: %u\r\n"
-				"\r\n"
-				"%s",
-				(int) strlen(G.post_data), G.post_data
-			);
+			httpcmd(sfp, "Content-Type: application/x-www-form-urlencoded");
+			httpcmd(sfp, "Content-Length: %u", (int) strlen(G.post_data));
+			httpcmd(sfp, "");
+			httpcmd(sfp, "%s", G.post_data);
 		} else
 #endif
 		{
-			fprintf(sfp, "\r\n");
+			httpcmd(sfp, "");
 		}
 
 		fflush(sfp);
@@ -1115,7 +1161,21 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
 		}
 		G.extra_headers = cp = xmalloc(size);
 		while (headers_llist) {
-			cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
+			int usr = -1, idx = 0;
+			const char* words = wget_user_headers;
+			size = sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
+			/* a bit like index_in_substrings but don't match full key */
+			while (*words) {
+				if (strstr(cp, words) == cp) {
+					usr = idx;
+					break;
+				}
+				++idx;
+				words += strlen(words) + 1;
+			}
+			if (usr != -1)
+				G.user_headers |= (1 << usr);
+			cp += size;
 		}
 	}
 #endif
_______________________________________________
busybox mailing list
[email protected]
http://lists.busybox.net/mailman/listinfo/busybox

Reply via email to