Hi,
https://bugs.debian.org/744725
wget is appending instead of replacing with "--header"
To quote current big wget docs
http://www.gnu.org/software/wget/manual/html_node/HTTP-Options.html
<quote>
‘--header=header-line’
Send header-line along with the rest of the headers in each HTTP request. The
supplied header is sent as-is, which means it must contain name and value
separated by colon, and must not contain newlines.
You may define more than one additional header by specifying ‘--header’ more
than once.
wget --header='Accept-Charset: iso-8859-2' \
--header='Accept-Language: hr' \
http://fly.srk.fer.hr/
Specification of an empty string as the header value will clear all previous
user-defined headers.
As of Wget 1.10, this option can be used to override headers otherwise
generated automatically. This example instructs Wget to connect to localhost,
but to specify ‘foo.bar’ in the Host header:
wget --header="Host: foo.bar" http://localhost/
In versions of Wget prior to 1.10 such use of ‘--header’ caused sending of
duplicate headers.
</quote>
I'm attaching 2 variants to implement the "override headers" part that was
introduced in the big wget in 1.10. As i'm just now seeing that "clear all
previous user headers" so that part is not implemented and i'm not sure how
useful it is in practise (i would not blindly add it).
Neither variant make me particularly happy and i'm not sure how to phrase
it better right now.
Better ideas?
Denys, is one of this variants good enough to apply as is for now?
TIA and cheers,
Bernhard
function old new delta
wget_main 2967 3108 +141
.rodata 155809 155872 +63
wget_user_headers - 62 +62
retrieve_file_data 435 456 +21
progress_meter 120 126 +6
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 4/0 up/down: 293/0) Total: 293 bytes
text data bss dec hexfilename
822042 4123 9552 835717 cc085busybox_old
822273 4123 9552 835948 cc16cbusybox_unstripped
diff --git a/networking/wget.c b/networking/wget.c
index 1013f66..3cfe706 100644
--- a/networking/wget.c
+++ b/networking/wget.c
@@ -55,6 +55,33 @@ static const char P_FTP[] = "ftp";
static const char P_HTTP[] = "http";
static const char P_HTTPS[] = "https";
+#if ENABLE_FEATURE_WGET_LONG_OPTIONS
+/* User-specified headers prevent using our corresponding built-in headers. */
+enum {
+ HDR_HOST = (1<<0),
+ HDR_USER_AGENT = (1<<1),
+ HDR_RANGE = (1<<2),
+ HDR_AUTHORIZATION = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
+ HDR_PROXY_AUTHORIZATION = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
+};
+static const char wget_user_headers[] ALIGN1 =
+ "Host:\0User-Agent:\0Range:\0"
+# if ENABLE_FEATURE_WGET_AUTHENTICATION
+ "Authorization:\0Proxy-Authorization:\0"
+# endif
+ ;
+# define USR_HEADER_HOST (G.user_headers & HDR_HOST)
+# define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
+# define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
+# define USR_HEADER_AUTHORIZATION (G.user_headers & HDR_AUTHORIZATION)
+# define USR_HEADER_PROXY_AUTHORIZATION (G.user_headers & HDR_PROXY_AUTHORIZATION)
+#else /* No long options, no user-headers :( */
+# define USR_HEADER_HOST 0
+# define USR_HEADER_USER_AGENT 0
+# define USR_HEADER_RANGE 0
+# define USR_HEADER_AUTHORIZATION 0
+# define USR_HEADER_PROXY_AUTHORIZATION 0
+#endif
/* Globals */
struct globals {
@@ -69,6 +96,7 @@ struct globals {
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
char *post_data;
char *extra_headers;
+ unsigned char user_headers; /* Headers mentioned by the user */
#endif
char *fname_out; /* where to direct output (-O) */
const char *proxy_flag; /* Use proxies if env vars are set */
@@ -722,6 +750,9 @@ static void NOINLINE retrieve_file_data(FILE *dfp)
progress_meter(PROGRESS_END);
}
+#define httpcmd(fp, fmt, ...) \
+ { log_io("> " fmt, ##__VA_ARGS__); fprintf(fp, fmt, ##__VA_ARGS__); }
+
static void download_one_url(const char *url)
{
bool use_proxy; /* Use proxies if env vars are set */
@@ -830,43 +861,46 @@ static void download_one_url(const char *url)
#endif
/* Send HTTP request */
if (use_proxy) {
- fprintf(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
+ httpcmd(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
target.protocol, target.host,
target.path);
} else {
- fprintf(sfp, "%s /%s HTTP/1.1\r\n",
+ httpcmd(sfp, "%s /%s HTTP/1.1\r\n",
(option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
target.path);
}
-
- fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
- target.host, G.user_agent);
+ if (!USR_HEADER_HOST)
+ httpcmd(sfp, "Host: %s\r\n", target.host);
+ if (!USR_HEADER_USER_AGENT)
+ httpcmd(sfp, "User-Agent: %s\r\n", G.user_agent);
/* Ask server to close the connection as soon as we are done
* (IOW: we do not intend to send more requests)
*/
- fprintf(sfp, "Connection: close\r\n");
+ httpcmd(sfp, "Connection: close\r\n");
#if ENABLE_FEATURE_WGET_AUTHENTICATION
- if (target.user) {
- fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
+ if (target.user && !USR_HEADER_AUTHORIZATION) {
+ httpcmd(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
base64enc(target.user));
}
- if (use_proxy && server.user) {
- fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
+ if (use_proxy && server.user && !USR_HEADER_PROXY_AUTHORIZATION) {
+ httpcmd(sfp, "Proxy-Authorization: Basic %s\r\n",
base64enc(server.user));
}
#endif
- if (G.beg_range != 0)
- fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
+ if (G.beg_range != 0 && !USR_HEADER_RANGE)
+ httpcmd(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
- if (G.extra_headers)
+ if (G.extra_headers) {
+ log_io(G.extra_headers);
fputs(G.extra_headers, sfp);
+ }
if (option_mask32 & WGET_OPT_POST_DATA) {
- fprintf(sfp,
+ httpcmd(sfp,
"Content-Type: application/x-www-form-urlencoded\r\n"
"Content-Length: %u\r\n"
"\r\n"
@@ -876,7 +910,7 @@ static void download_one_url(const char *url)
} else
#endif
{
- fprintf(sfp, "\r\n");
+ httpcmd(sfp, "\r\n");
}
fflush(sfp);
@@ -1115,7 +1149,21 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
}
G.extra_headers = cp = xmalloc(size);
while (headers_llist) {
- cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
+ int usr = -1, idx = 0;
+ const char* words = wget_user_headers;
+ size = sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
+ /* a bit like index_in_substrings but don't match full key */
+ while (*words) {
+ if (strstr(cp, words) == cp) {
+ usr = idx;
+ break;
+ }
+ ++idx;
+ words += strlen(words) + 1;
+ }
+ if (usr != -1)
+ G.user_headers |= (1 << usr);
+ cp += size;
}
}
#endif
function old new delta
httpcmd - 257 +257
wget_main 2967 3129 +162
wget_user_headers - 62 +62
.rodata 155809 155871 +62
retrieve_file_data 435 456 +21
progress_meter 120 126 +6
------------------------------------------------------------------------------
(add/remove: 2/0 grow/shrink: 4/0 up/down: 570/0) Total: 570 bytes
diff --git a/networking/wget.c b/networking/wget.c
index 1013f66..de7f3e1 100644
--- a/networking/wget.c
+++ b/networking/wget.c
@@ -36,13 +36,14 @@
#include "libbb.h"
-#if 0
+#undef DEBUG
+#define DEBUG 0
+#if DEBUG
# define log_io(...) bb_error_msg(__VA_ARGS__)
#else
# define log_io(...) ((void)0)
#endif
-
struct host_info {
char *allocated;
const char *path;
@@ -55,6 +56,15 @@ static const char P_FTP[] = "ftp";
static const char P_HTTP[] = "http";
static const char P_HTTPS[] = "https";
+#if ENABLE_FEATURE_WGET_LONG_OPTIONS
+/* User-specified headers prevent using our corresponding built-in headers. */
+static const char wget_user_headers[] ALIGN1 =
+ "Host:\0User-Agent:\0Range:\0"
+# if ENABLE_FEATURE_WGET_AUTHENTICATION
+ "Authorization:\0Proxy-Authorization:\0"
+# endif
+ ;
+#endif
/* Globals */
struct globals {
@@ -69,6 +79,7 @@ struct globals {
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
char *post_data;
char *extra_headers;
+ unsigned char user_headers; /* Headers mentioned by the user */
#endif
char *fname_out; /* where to direct output (-O) */
const char *proxy_flag; /* Use proxies if env vars are set */
@@ -722,6 +733,41 @@ static void NOINLINE retrieve_file_data(FILE *dfp)
progress_meter(PROGRESS_END);
}
+static void httpcmd(FILE *fp, const char *format, ...)
+{
+ va_list ap;
+ char *fmt;
+# if ENABLE_FEATURE_WGET_LONG_OPTIONS
+ const char* words = wget_user_headers;
+ int idx = 0;
+
+ /* a bit like index_in_substrings but don't match full key */
+ while (*words) {
+ if (strstr(format, words) == format
+ && (G.user_headers & (1 << idx))) {
+ /* Prefer the user supplied header */
+ return;
+ }
+ words += strlen(words) + 1;
+ ++idx;
+ }
+#endif
+ fmt = xasprintf("%s\r\n", format);
+ va_start(ap, format);
+ if (DEBUG) {
+ va_list ap_dbg;
+ char *fmt_dbg = xasprintf("> %s", format);
+
+ va_copy(ap_dbg, ap);
+ bb_verror_msg(fmt_dbg, ap_dbg, NULL);
+ va_end(ap_dbg);
+ free(fmt_dbg);
+ }
+ vfprintf(fp, fmt, ap);
+ va_end(ap);
+ free(fmt);
+}
+
static void download_one_url(const char *url)
{
bool use_proxy; /* Use proxies if env vars are set */
@@ -830,53 +876,53 @@ static void download_one_url(const char *url)
#endif
/* Send HTTP request */
if (use_proxy) {
- fprintf(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
+ httpcmd(sfp, "GET %s://%s/%s HTTP/1.1",
target.protocol, target.host,
target.path);
} else {
- fprintf(sfp, "%s /%s HTTP/1.1\r\n",
+ httpcmd(sfp, "%s /%s HTTP/1.1",
(option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
target.path);
}
-
- fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
- target.host, G.user_agent);
+ httpcmd(sfp, "Host: %s", target.host);
+ httpcmd(sfp, "User-Agent: %s", G.user_agent);
/* Ask server to close the connection as soon as we are done
* (IOW: we do not intend to send more requests)
*/
- fprintf(sfp, "Connection: close\r\n");
+ httpcmd(sfp, "Connection: close");
#if ENABLE_FEATURE_WGET_AUTHENTICATION
if (target.user) {
- fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
+ httpcmd(sfp, "Proxy-Authorization: Basic %s"+6,
base64enc(target.user));
}
if (use_proxy && server.user) {
- fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
+ httpcmd(sfp, "Proxy-Authorization: Basic %s",
base64enc(server.user));
}
#endif
if (G.beg_range != 0)
- fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
+ httpcmd(sfp, "Range: bytes=%"OFF_FMT"u-", G.beg_range);
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
- if (G.extra_headers)
+ if (G.extra_headers) {
+ /* the debug output is not pretty because it's different to
+ * all the other httpcmd; Not worth the bits though */
+ log_io(G.extra_headers);
fputs(G.extra_headers, sfp);
+ }
if (option_mask32 & WGET_OPT_POST_DATA) {
- fprintf(sfp,
- "Content-Type: application/x-www-form-urlencoded\r\n"
- "Content-Length: %u\r\n"
- "\r\n"
- "%s",
- (int) strlen(G.post_data), G.post_data
- );
+ httpcmd(sfp, "Content-Type: application/x-www-form-urlencoded");
+ httpcmd(sfp, "Content-Length: %u", (int) strlen(G.post_data));
+ httpcmd(sfp, "");
+ httpcmd(sfp, "%s", G.post_data);
} else
#endif
{
- fprintf(sfp, "\r\n");
+ httpcmd(sfp, "");
}
fflush(sfp);
@@ -1115,7 +1161,21 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
}
G.extra_headers = cp = xmalloc(size);
while (headers_llist) {
- cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
+ int usr = -1, idx = 0;
+ const char* words = wget_user_headers;
+ size = sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
+ /* a bit like index_in_substrings but don't match full key */
+ while (*words) {
+ if (strstr(cp, words) == cp) {
+ usr = idx;
+ break;
+ }
+ ++idx;
+ words += strlen(words) + 1;
+ }
+ if (usr != -1)
+ G.user_headers |= (1 << usr);
+ cp += size;
}
}
#endif
_______________________________________________
busybox mailing list
[email protected]
http://lists.busybox.net/mailman/listinfo/busybox