Thanks for the updated patch and for sticking around through my nitpicky reviews.
I'd like to spend some more time reviewing this patch and testing it out. So, a full review will likely have to wait till the coming weekend. * adham elkarn <[email protected]> [190506 11:26]: > From: sulfastor <[email protected]> > > Hi, Thank you again Darshit for your response. The RejectHeaderField > rule rejects ANY header > of the header field while RejectHeader rejects ONLY the specified full > header. > Since we wanted to be sure a header field is not sent to the server we > wrote this rule. > > * doc/wget.texi: Added --disable-header documentation. > * fuzz/wget_options_fuzzer.dict: Update with --disable-header inputs. > * src/http.c (disabled_header): Checks for disabled headers > (request_set_header): Doesn't let header to be set if disabled > (gethttp): frees disabled header to let overriding > * src/init.c (cmd_dis_header), (check_user_disabled_header) added new > option disabled_headers. > * src/main.c: added new option --disable-header, added help > description > * src/options.h: added new option --disable-header > * src/utils.h (vec_remove_header) > * src/utils.c (vec_remove_header) removes all header instances from > vector > * testenv/Makefile.am: Added new test files > * testenv/server/http/http_server.py: Added new rule RejectHeaderField > * testenv/conf/reject_header_field.py: Added new rule > RejectHeaderField > * testenv/README: Added help description for new rule > * testenv/Test-disable-default-headers.py: Test without using --header > * testenv/Test-disable-headers-after.py: Test using --header before > --disable-header > * testenv/Test-disable-headers-before.py: Test using --header after > --disable-header > > Signed-off-by: sulfastor <[email protected]>, adham elkarn > <[email protected]> > --- > doc/wget.texi | 15 +++++ > fuzz/wget_options_fuzzer.dict | 17 ++++++ > src/http.c | 28 ++++++++- > src/init.c | 48 ++++++++++++--- > src/main.c | 3 + > src/options.h | 1 + > src/utils.c | 43 +++++++++++++ > src/utils.h | 1 + > testenv/Makefile.am | 3 + > testenv/README | 4 ++ > testenv/Test-disable-default-headers.py | 74 +++++++++++++++++++++++ > testenv/Test-disable-headers-after.py | 80 +++++++++++++++++++++++++ > testenv/Test-disable-headers-before.py | 78 ++++++++++++++++++++++++ > testenv/conf/reject_header_field.py | 12 ++++ > testenv/server/http/http_server.py | 8 +++ > 15 files changed, 405 insertions(+), 10 deletions(-) > create mode 100644 testenv/Test-disable-default-headers.py > create mode 100644 testenv/Test-disable-headers-after.py > create mode 100644 testenv/Test-disable-headers-before.py > create mode 100644 testenv/conf/reject_header_field.py > > diff --git a/doc/wget.texi b/doc/wget.texi > index 7eada2dd..6301cbf6 100644 > --- a/doc/wget.texi > +++ b/doc/wget.texi > @@ -1542,6 +1542,21 @@ wget --header="Host: foo.bar" http://localhost/ > In versions of Wget prior to 1.10 such use of @samp{--header} caused > sending of duplicate headers. > > +@cindex disable header, choose > +@item --disable-header=@var{list} > +Specify comma-separated header fields to remove from the @sc{http} > +request. > + > +@example > +@group > +wget --disable-header='Accept,User-Agent,Authorization' \ > + https://example.com/ > +@end group > +@end example > + > +Specifying a header field with @samp{--header} after disabling it > +will override it and include it in the @sc{http} request headers. > + > @cindex Content-Encoding, choose > @item --compression=@var{type} > Choose the type of compression to be used. Legal values are > diff --git a/fuzz/wget_options_fuzzer.dict b/fuzz/wget_options_fuzzer.dict > index 9a2dbd8e..12d54d60 100644 > --- a/fuzz/wget_options_fuzzer.dict > +++ b/fuzz/wget_options_fuzzer.dict > @@ -30,6 +30,22 @@ > "human" > "csv" > "json" > +"Authorization" > +"User-Agent" > +"Referer" > +"Cache-Control" > +"Pragma" > +"If-Modified-Since" > +"Range" > +"Accept" > +"Accept-Encoding" > +"Host" > +"Connection" > +"Proxy-Connection" > +"Content-Type" > +"Content-Length" > +"Proxy-Authorization" > +"Cookie" > "accept=" > "accept-regex=" > "adjust-extension=" > @@ -66,6 +82,7 @@ > "delete-after=" > "directories=" > "directory-prefix=" > +"disable-header=" > "dns-caching=" > "dns-timeout=" > "domains=" > diff --git a/src/http.c b/src/http.c > index 289d1101..24b16cf1 100644 > --- a/src/http.c > +++ b/src/http.c > @@ -88,6 +88,7 @@ static char *basic_authentication_encode (const char *, > const char *); > static bool known_authentication_scheme_p (const char *, const char *); > static void ensure_extension (struct http_stat *, const char *, int *); > static void load_cookies (void); > +static bool disabled_header (const char*); > > static bool cookies_loaded_p; > static struct cookie_jar *wget_cookie_jar; > @@ -236,7 +237,7 @@ request_set_header (struct request *req, const char > *name, const char *value, > struct request_header *hdr; > int i; > > - if (!value) > + if (disabled_header (name) || !value) > { > /* A NULL value is a no-op; if freeing the name is requested, > free it now to avoid leaks. */ > @@ -1842,6 +1843,27 @@ time_to_rfc1123 (time_t time, char *buf, size_t > bufsize) > return RETROK; > } > > +static bool > +disabled_header (const char* header_name) > +{ > + char** p = opt.disabled_headers; > + char *s; > + size_t n; > + > + if (!p) > + return 0; > + > + for (; *p; ++p) > + { > + s = strchrnul (header_name, ':'); > + n = (size_t) (s - header_name); > + if (n == strlen (*p) && 0 == strncmp (header_name, *p, n)) > + return 1; > + } > + > + return 0; > +} > + > static struct request * > initialize_request (const struct url *u, struct http_stat *hs, int *dt, > struct url *proxy, > bool inhibit_keep_alive, bool *basic_auth_finished, > @@ -3263,6 +3285,10 @@ gethttp (const struct url *u, struct url > *original_url, struct http_stat *hs, > if (opt.user_headers) > { > int i; > + /* Empty the disabled headers as they are no longer used > + and this will let headers to be overriden by the user */ > + free_vec (opt.disabled_headers); > + opt.disabled_headers = NULL; > for (i = 0; opt.user_headers[i]; i++) > request_set_user_header (req, opt.user_headers[i]); > } > diff --git a/src/init.c b/src/init.c > index 9b6665a6..d1d8112d 100644 > --- a/src/init.c > +++ b/src/init.c > @@ -101,6 +101,7 @@ CMD_DECLARE (cmd_spec_compression); > #endif > CMD_DECLARE (cmd_spec_dirstruct); > CMD_DECLARE (cmd_spec_header); > +CMD_DECLARE (cmd_dis_header); > CMD_DECLARE (cmd_spec_warc_header); > CMD_DECLARE (cmd_spec_htmlify); > CMD_DECLARE (cmd_spec_mirror); > @@ -183,6 +184,7 @@ static const struct { > { "deleteafter", &opt.delete_after, cmd_boolean }, > { "dirprefix", &opt.dir_prefix, cmd_directory }, > { "dirstruct", NULL, cmd_spec_dirstruct }, > + { "disableheader", NULL, cmd_dis_header }, > { "dnscache", &opt.dns_cache, cmd_boolean }, > #ifdef HAVE_LIBCARES > { "dnsservers", &opt.dns_servers, cmd_string }, > @@ -398,6 +400,7 @@ defaults (void) > opt.metalink_index = -1; > #endif > > + opt.disabled_headers = NULL; > opt.cookies = true; > opt.verbose = -1; > opt.ntry = 20; > @@ -1458,7 +1461,7 @@ cmd_cert_type (const char *com, const char *val, void > *place) > /* Specialized helper functions, used by `commands' to handle some > options specially. */ > > -static bool check_user_specified_header (const char *); > +static bool check_user_specified_header (const char *, bool); > > #ifdef HAVE_LIBZ > static bool > @@ -1493,6 +1496,33 @@ cmd_spec_dirstruct (const char *com, const char *val, > void *place_ignored _GL_UN > return true; > } > > +static bool > +cmd_dis_header (const char *com, const char *val, void *place_ignored > _GL_UNUSED) > +{ > + /* Empty value means reset the list of headers. */ > + if (*val == '\0') > + { > + free_vec (opt.disabled_headers); > + opt.disabled_headers = NULL; > + return true; > + } > + cmd_vector (com, val, &opt.disabled_headers); > + char **p = opt.disabled_headers; > + for (; *p; ++p) > + { > + if (!check_user_specified_header (*p, false)) > + { > + fprintf (stderr, _("%s: %s: Invalid header field %s.\n"), > + exec_name, com, quote (*p)); > + free_vec (opt.disabled_headers); > + opt.disabled_headers = NULL; > + return false; > + } > + opt.user_headers = vec_remove_header (opt.user_headers, *p); > + } > + return true; > +} > + > static bool > cmd_spec_header (const char *com, const char *val, void *place_ignored > _GL_UNUSED) > { > @@ -1504,7 +1534,7 @@ cmd_spec_header (const char *com, const char *val, void > *place_ignored _GL_UNUSE > return true; > } > > - if (!check_user_specified_header (val)) > + if (!check_user_specified_header (val, true)) > { > fprintf (stderr, _("%s: %s: Invalid header %s.\n"), > exec_name, com, quote (val)); > @@ -1525,7 +1555,7 @@ cmd_spec_warc_header (const char *com, const char *val, > void *place_ignored _GL_ > return true; > } > > - if (!check_user_specified_header (val)) > + if (!check_user_specified_header (val, true)) > { > fprintf (stderr, _("%s: %s: Invalid WARC header %s.\n"), > exec_name, com, quote (val)); > @@ -1851,18 +1881,17 @@ simple_atof (const char *beg, const char *end, double > *dest) > contain newlines. */ > > static bool > -check_user_specified_header (const char *s) > +check_user_specified_header (const char *s, bool full_header) > { > const char *p; > > for (p = s; *p && *p != ':' && !c_isspace (*p); p++) > ; > - /* The header MUST contain `:' preceded by at least one > - non-whitespace character. */ > - if (*p != ':' || p == s) > - return false; > + if (*p != ':' && full_header) > + return false; /* The header MUST contain `:' preceded by at least one > + non-whitespace character. */ > /* The header MUST NOT contain newlines. */ > - if (strchr (s, '\n')) > + if (p == s || strchr (s, '\n')) > return false; > return true; > } > @@ -1977,6 +2006,7 @@ cleanup (void) > xfree (opt.http_passwd); > xfree (opt.dot_style); > free_vec (opt.user_headers); > + free_vec (opt.disabled_headers); > free_vec (opt.warc_user_headers); > # ifdef HAVE_SSL > xfree (opt.cert_file); > diff --git a/src/main.c b/src/main.c > index 65b7f3f3..5971e83d 100644 > --- a/src/main.c > +++ b/src/main.c > @@ -304,6 +304,7 @@ static struct cmdline_option option_data[] = > { "delete-after", 0, OPT_BOOLEAN, "deleteafter", -1 }, > { "directories", 0, OPT_BOOLEAN, "dirstruct", -1 }, > { "directory-prefix", 'P', OPT_VALUE, "dirprefix", -1 }, > + { "disable-header", 0, OPT_VALUE, "disableheader", -1 }, > { "dns-cache", 0, OPT_BOOLEAN, "dnscache", -1 }, > #ifdef HAVE_LIBCARES > { "dns-servers", 0, OPT_VALUE, "dnsservers", -1 }, > @@ -792,6 +793,8 @@ HTTP options:\n"), > --ignore-length ignore 'Content-Length' header field\n"), > N_("\ > --header=STRING insert STRING among the headers\n"), > + N_("\ > + --disable-header=LIST comma-separated list of disabled header > fields\n"), > #ifdef HAVE_LIBZ > N_("\ > --compression=TYPE choose compression, one of auto, gzip and > none. (default: none)\n"), > diff --git a/src/options.h b/src/options.h > index 881e2b2e..5559c6fa 100644 > --- a/src/options.h > +++ b/src/options.h > @@ -147,6 +147,7 @@ struct options > char *http_user; /* HTTP username. */ > char *http_passwd; /* HTTP password. */ > char **user_headers; /* User-defined header(s). */ > + char **disabled_headers; /* User-disabled header(s) */ > bool http_keep_alive; /* whether we use keep-alive */ > > bool use_proxy; /* Do we use proxy? */ > diff --git a/src/utils.c b/src/utils.c > index 89a87b07..6168a0ff 100644 > --- a/src/utils.c > +++ b/src/utils.c > @@ -1405,6 +1405,49 @@ wget_read_file_free (struct file_memory *fm) > xfree (fm); > } > > +/* Removes a header from a request headers vector */ > +char ** > +vec_remove_header (char **vec, const char *str) > +{ > + char* s; > + int i, cnt; /* count of vector elements */ > + size_t n; > + > + if (vec != NULL) > + { > + for (cnt = 0; vec[cnt]; cnt++) > + ; > + /* remove all duplicates */ > + i = 0; > + while (vec[i]) > + { > + s = strchrnul (vec[i], ':'); > + n = (size_t) (s - vec[i]); > + if (n == strlen (str) && 0 == strncmp (vec[i], str, n)) > + { > + if (cnt == 1) > + { > + xfree (vec[0]); > + vec = xrealloc (vec, sizeof (char *)); > + vec[0] = NULL; > + return vec; > + } > + else > + { > + vec[i] = xstrdup (vec[cnt-1]); > + xfree (vec[cnt-1]); > + vec = xrealloc (vec, cnt * sizeof (char *)); > + vec[cnt - 1] = NULL; > + --cnt; > + } > + } > + else > + ++i; > + } > + } > + return vec; > +} > + > /* Free the pointers in a NULL-terminated vector of pointers, then > free the pointer itself. */ > void > diff --git a/src/utils.h b/src/utils.h > index 3cffd2a5..3c856bb5 100644 > --- a/src/utils.h > +++ b/src/utils.h > @@ -110,6 +110,7 @@ bool has_html_suffix_p (const char *); > struct file_memory *wget_read_file (const char *); > void wget_read_file_free (struct file_memory *); > > +char ** vec_remove_header (char **, const char *); > void free_vec (char **); > char **merge_vecs (char **, char **); > char **vec_append (char **, const char *); > diff --git a/testenv/Makefile.am b/testenv/Makefile.am > index b5a39ad2..4b3e2d08 100644 > --- a/testenv/Makefile.am > +++ b/testenv/Makefile.am > @@ -95,6 +95,9 @@ if HAVE_PYTHON3 > Test-cookie-domain-mismatch.py \ > Test-cookie-expires.py \ > Test-cookie.py \ > + Test-disable-default-headers.py \ > + Test-disable-headers-after.py \ > + Test-disable-headers-before.py \ > Test-Head.py \ > Test-hsts.py \ > Test--https.py \ > diff --git a/testenv/README b/testenv/README > index 6580bc99..d2f38a67 100644 > --- a/testenv/README > +++ b/testenv/README > @@ -182,6 +182,10 @@ This section lists the currently supported File Rules > and their structure. > * RejectHeader : This list of Headers must NEVER occur in a request. It > uses the same value format as ExpectHeader. > > + * RejectHeaderField : This list of Headers Fields must NOT appear in a > request. > + The value for this key is a list of strings where each header field is > represented as: > + |-->Header Field: <Header Field Name> > + > * SendHeader : This list of Headers will be sent in EVERY response to > a > request for the respective file. It follows the same value format as > ExpectHeader. Additionally you can specify a list of strings as <Header > Data> > diff --git a/testenv/Test-disable-default-headers.py > b/testenv/Test-disable-default-headers.py > new file mode 100644 > index 00000000..4d8ed4db > --- /dev/null > +++ b/testenv/Test-disable-default-headers.py > @@ -0,0 +1,74 @@ > +#!/usr/bin/env python3 > +from sys import exit > +from test.http_test import HTTPTest > +from test.base_test import HTTP, HTTPS > +from misc.wget_file import WgetFile > + > +""" > + This is test ensures that the --disable-header option removes default > request > + headers. There aren't any user defined header. > +""" > +############# File Definitions > ############################################### > +file_content = """Les paroles de la bouche d'un homme sont des eaux > profondes; > +La source de la sagesse est un torrent qui jaillit.""" > + > +Headers = { > + 'Authorization', > + 'User-Agent', > + 'Referer', > + 'Cache-Control', > + 'Pragma', > + 'If-Modified-Since', > + 'Range', > + 'Accept', > + 'Accept-Encoding', > + 'Host', > + 'Connection', > + 'Proxy-Connection', > + 'Content-Type', > + 'Content-Length', > + 'Proxy-Authorization', > + 'Cookie', > + 'MyHeader', > +} > + > +WGET_OPTIONS = '--disable-header="' > +WGET_URLS = [[]] > +Files = [[]] > +headers_len = len(Headers) > + > +for index, header in enumerate(Headers, start=1): > + File_rules = { > + "RejectHeaderField" : { > + header > + } > + } > + file_name = "File" + str(index) > + Files[0].append (WgetFile(file_name, file_content, rules=File_rules)) > + WGET_OPTIONS += header + (',' if index < headers_len else '"') > + WGET_URLS[0].append (file_name) > + > +Servers = [HTTP] > + > +ExpectedReturnCode = 0 > + > +################ Pre and Post Test Hooks > ##################################### > +pre_test = { > + "ServerFiles" : Files > +} > +test_options = { > + "WgetCommands" : WGET_OPTIONS, > + "Urls" : WGET_URLS > +} > +post_test = { > + "ExpectedRetcode" : ExpectedReturnCode > +} > + > +err = HTTPTest ( > + pre_hook=pre_test, > + test_params=test_options, > + post_hook=post_test, > + protocols=Servers > +).begin () > + > +exit (err) > diff --git a/testenv/Test-disable-headers-after.py > b/testenv/Test-disable-headers-after.py > new file mode 100644 > index 00000000..344301a3 > --- /dev/null > +++ b/testenv/Test-disable-headers-after.py > @@ -0,0 +1,80 @@ > +#!/usr/bin/env python3 > +from sys import exit > +from test.http_test import HTTPTest > +from test.base_test import HTTP, HTTPS > +from misc.wget_file import WgetFile > + > +""" > + This is test ensures that the --disable-header option removes user > headers > + from the HTTP request when it's placed after --header="header: value". > +""" > +############# File Definitions > ############################################### > +file_content = """Les paroles de la bouche d'un homme sont des eaux > profondes; > +La source de la sagesse est un torrent qui jaillit.""" > + > +Headers = { > + 'Authorization', > + 'User-Agent', > + 'Referer', > + 'Cache-Control', > + 'Pragma', > + 'If-Modified-Since', > + 'Range', > + 'Accept', > + 'Accept-Encoding', > + 'Host', > + 'Connection', > + 'Proxy-Connection', > + 'Content-Type', > + 'Content-Length', > + 'Proxy-Authorization', > + 'Cookie', > + 'MyHeader', > +} > + > +WGET_OPTIONS = '' > +WGET_URLS = [[]] > +Files = [[]] > + > +# Define user defined headers > +for header in Headers: > + WGET_OPTIONS += ' --header="' + header + ': any"' > + > +WGET_OPTIONS += ' --disable-header="' > +headers_len = len(Headers) > + > +for index, header in enumerate(Headers, start=1): > + File_rules = { > + "RejectHeader" : { > + header : 'any' > + } > + } > + file_name = "File" + str(index) > + Files[0].append(WgetFile(file_name, file_content, rules=File_rules)) > + WGET_OPTIONS += header + (',' if index < headers_len else '"') > + WGET_URLS[0].append(file_name) > + > +Servers = [HTTP] > + > +ExpectedReturnCode = 0 > + > +################ Pre and Post Test Hooks > ##################################### > +pre_test = { > + "ServerFiles" : Files > +} > +test_options = { > + "WgetCommands" : WGET_OPTIONS, > + "Urls" : WGET_URLS > +} > +post_test = { > + "ExpectedRetcode" : ExpectedReturnCode > +} > + > +err = HTTPTest ( > + pre_hook=pre_test, > + test_params=test_options, > + post_hook=post_test, > + protocols=Servers > +).begin () > + > +exit (err) > diff --git a/testenv/Test-disable-headers-before.py > b/testenv/Test-disable-headers-before.py > new file mode 100644 > index 00000000..bc19fda9 > --- /dev/null > +++ b/testenv/Test-disable-headers-before.py > @@ -0,0 +1,78 @@ > +#!/usr/bin/env python3 > +from sys import exit > +from test.http_test import HTTPTest > +from test.base_test import HTTP, HTTPS > +from misc.wget_file import WgetFile > + > +""" > + This is test ensures that the --disable-header option doesn't remove > user headers > + from the HTTP request when it's placed before --header="header: value". > +""" > +############# File Definitions > ############################################### > +file_content = """Les paroles de la bouche d'un homme sont des eaux > profondes; > +La source de la sagesse est un torrent qui jaillit.""" > + > +Headers = { > + 'Authorization', > + 'User-Agent', > + 'Referer', > + 'Cache-Control', > + 'Pragma', > + 'If-Modified-Since', > + 'Range', > + 'Accept', > + 'Accept-Encoding', > + 'Host', > + 'Connection', > + 'Proxy-Connection', > + 'Content-Type', > + 'Content-Length', > + 'Proxy-Authorization', > + 'Cookie', > + 'MyHeader', > +} > + > +WGET_OPTIONS = '--disable-header="' > +WGET_URLS = [[]] > +Files = [[]] > +headers_len = len(Headers) > + > +for index, header in enumerate(Headers, start=1): > + File_rules = { > + "ExpectHeader" : { > + header : 'any' > + } > + } > + file_name = "File" + str(index) > + Files[0].append (WgetFile(file_name, file_content, rules=File_rules)) > + WGET_OPTIONS += header + (',' if index < headers_len else '"') > + WGET_URLS[0].append (file_name) > + > +# Define user defined headers > +for header in Headers: > + WGET_OPTIONS += ' --header="' + header + ': any"' > + > +Servers = [HTTP] > + > +ExpectedReturnCode = 0 > + > +################ Pre and Post Test Hooks > ##################################### > +pre_test = { > + "ServerFiles" : Files > +} > +test_options = { > + "WgetCommands" : WGET_OPTIONS, > + "Urls" : WGET_URLS > +} > +post_test = { > + "ExpectedRetcode" : ExpectedReturnCode > +} > + > +err = HTTPTest ( > + pre_hook=pre_test, > + test_params=test_options, > + post_hook=post_test, > + protocols=Servers > +).begin () > + > +exit (err) > diff --git a/testenv/conf/reject_header_field.py > b/testenv/conf/reject_header_field.py > new file mode 100644 > index 00000000..e1009cdd > --- /dev/null > +++ b/testenv/conf/reject_header_field.py > @@ -0,0 +1,12 @@ > +from conf import rule > + > +""" Rule: RejectHeaderField > +This is a server side rule which expects a string list of Header Fields > +which should be blacklisted by the server for a particular file's requests. > +""" > + > + > +@rule() > +class RejectHeaderField: > + def __init__(self, header_fields): > + self.header_fields = header_fields > diff --git a/testenv/server/http/http_server.py > b/testenv/server/http/http_server.py > index 2cc82fb9..6f358335 100644 > --- a/testenv/server/http/http_server.py > +++ b/testenv/server/http/http_server.py > @@ -370,6 +370,14 @@ class _Handler(BaseHTTPRequestHandler): > header_line) > raise ServerError("Header " + header_line + ' received') > > + def RejectHeaderField(self, header_fields_obj): > + rej_header_fields = header_fields_obj.header_fields > + for field in rej_header_fields: > + if field in self.headers: > + self.send_error(400, 'Blacklisted Header Field %s received' % > + field) > + raise ServerError('Header Field %s received' % field) > + > def __log_request(self, method): > req = method + " " + self.path > self.server.request_headers.append(req) > -- > 2.21.0 > -- Thanking You, Darshit Shah PGP Fingerprint: 7845 120B 07CB D8D6 ECE5 FF2B 2A17 43ED A91A 35B6
signature.asc
Description: PGP signature
