I have reworked my patches. Specifically: 1) --if-modified-since option is enabled by default and has only effect in timestamping mode. And yes, --no-if-modified-since is added automatically. 2) I added all legal date formats to my test. 3) I added another case to my test (local file is strictly newer than remote file). 3) If time_to_rfc1123 fails, there is simple fall back behavior. 4) I added work around behavior for servers ignoring If-Modified-Since (like for example our Perl test server).
Patches are attached here as well as on Github for easy viewing. https://github.com/jy987321/Wget/commits/master-hubert Thank you, Hubert W dniu 14.05.2015 o 22:35, Hubert Tarasiuk pisze: > W dniu 14.05.2015 o 21:12, Tim Rühsen pisze: >> Am Donnerstag, 14. Mai 2015, 15:43:54 schrieb Hubert Tarasiuk: >>> W dniu 13.05.2015 o 13:28, Ander Juaristi pisze: >>>> And second, I'm not really sure whether --condget is the best name for >>>> the switch. >>>> Requests that include any of If-Unmodified-Since, If-Match, >>>> If-None-Match, or If-Range >>>> header fields are also "conditional GETs" as well. >>>> We might want to implement one of those in the future and we'd be forced >>>> to choose a name which could easily be >>>> inconsistent/confusing with --condget. Or maybe we won't. But we don't >>>> know that now, so I think >>>> it's better to choose a switch more specific to the fact that an >>>> If-Modified-Since header will be sent >>>> so as to avoid confusion. >>> >>> Do you have an idea for a better switch name that would not be too long? >>> I have noticed that issue earlier, but could not think of a better name >>> that would not be too long. :D >>> >>> Thank you for the suggestions, >> >> Hi Hubert, >> >> why not --if-modified-since as a boolean option ? > Sounds good. >> >> I personally would set it to true by default, since it is a very >> common/basic >> HTTP 1.1 header. > Ok, I will name the option "--no-if-modified-since" and will enable that > by default. >> >> Regards, Tim >> >
From f4599cae4056cab7b34968b5d76700bd9ea2e547 Mon Sep 17 00:00:00 2001 From: Hubert Tarasiuk <[email protected]> Date: Thu, 7 May 2015 18:34:17 +0200 Subject: [PATCH 1/6] Implement timestamp support for local files in testenv * testenv/README: Change timestamp format definition * testenv/conf/local_files.py: Set proper timestamps --- testenv/README | 2 +- testenv/conf/local_files.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/testenv/README b/testenv/README index 081a957..cf22f89 100644 --- a/testenv/README +++ b/testenv/README @@ -124,7 +124,7 @@ WgetFile (str name, str contents, str timestamp, dict rules) None except name is a mandatory paramter, one may pass only those parameters that are required by the File object. -The timestamp string should be a valid Unix Timestamp as defined in RFC xxxx. +The timestamp string should be in a format: "YYYY-MM-DD HH:MM:SS" in UTC zone. The rules object is a dictionary element, with the key as the Rule Name and value as the Rule Data. In most cases, the Rule Data is another dictionary. diff --git a/testenv/conf/local_files.py b/testenv/conf/local_files.py index 5f9c8fa..908ced1 100644 --- a/testenv/conf/local_files.py +++ b/testenv/conf/local_files.py @@ -1,3 +1,6 @@ +from os import utime +from time import strptime +from calendar import timegm from conf import hook """ Pre-Test Hook: LocalFiles @@ -16,3 +19,8 @@ class LocalFiles: for f in self.local_files: with open(f.name, 'w') as fp: fp.write(f.content) + if f.timestamp is not None: + tstamp = timegm(strptime(f.timestamp, '%Y-%m-%d %H:%M:%S')) + atime = tstamp + mtime = tstamp + utime(f.name, (atime, mtime)) -- 2.4.0
From bcbf875fe9292ce122748b41397a29c50def2149 Mon Sep 17 00:00:00 2001 From: Hubert Tarasiuk <[email protected]> Date: Mon, 11 May 2015 12:34:05 +0200 Subject: [PATCH 2/6] Support conditional GET in testenv server. * src/exc/server_error.py: Add exception for GET to HEAD fallback. * src/server/http/http_server.py: Do not send body if 304 return code requested for a file. --- testenv/exc/server_error.py | 6 ++++++ testenv/server/http/http_server.py | 7 ++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/testenv/exc/server_error.py b/testenv/exc/server_error.py index dc676a8..fe359f5 100644 --- a/testenv/exc/server_error.py +++ b/testenv/exc/server_error.py @@ -6,6 +6,12 @@ class ServerError (Exception): def __init__(self, err_message): self.err_message = err_message +class NoBodyServerError (Exception): + """ A custom exception which is raised by the test servers. + Used if no body should be sent in response. """ + + def __init__(self, err_message): + self.err_message = err_message class AuthError (ServerError): """ A custom exception raised byt he servers when authentication of the diff --git a/testenv/server/http/http_server.py b/testenv/server/http/http_server.py index 7c0b472..2356f1c 100644 --- a/testenv/server/http/http_server.py +++ b/testenv/server/http/http_server.py @@ -1,5 +1,5 @@ from http.server import HTTPServer, BaseHTTPRequestHandler -from exc.server_error import ServerError, AuthError +from exc.server_error import ServerError, AuthError, NoBodyServerError from socketserver import BaseServer from posixpath import basename, splitext from base64 import b64encode @@ -201,6 +201,8 @@ class _Handler(BaseHTTPRequestHandler): def Response(self, resp_obj): self.send_response(resp_obj.response_code) self.finish_headers() + if resp_obj.response_code == 304: + raise NoBodyServerError("Conditional get falling to head") raise ServerError("Custom Response code sent.") def custom_response(self): @@ -401,6 +403,9 @@ class _Handler(BaseHTTPRequestHandler): except AuthError as ae: print(ae.__str__()) return(None, None) + except NoBodyServerError as nbse: + print(nbse.__str__()) + return(None, None) except ServerError as se: print(se.__str__()) return(content, None) -- 2.4.0
From e42aad867381458fd6bb3e4cb59fd50a1fbaedc1 Mon Sep 17 00:00:00 2001 From: Hubert Tarasiuk <[email protected]> Date: Thu, 7 May 2015 18:45:10 +0200 Subject: [PATCH 3/6] Add test for condget requests. * testenv/Test-condget.py: the test * testenv/Makefile.am: add to tests list --- testenv/Makefile.am | 3 +- testenv/Test-condget.py | 140 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+), 1 deletion(-) create mode 100755 testenv/Test-condget.py diff --git a/testenv/Makefile.am b/testenv/Makefile.am index 9acf0f3..1058421 100644 --- a/testenv/Makefile.am +++ b/testenv/Makefile.am @@ -54,7 +54,8 @@ if HAVE_PYTHON3 Test-504.py \ Test--spider-r.py \ Test-redirect-crash.py \ - Test-reserved-chars.py + Test-reserved-chars.py \ + Test-condget.py # added test cases expected to fail here and under TESTS XFAIL_TESTS = diff --git a/testenv/Test-condget.py b/testenv/Test-condget.py new file mode 100755 index 0000000..c9e8b2a --- /dev/null +++ b/testenv/Test-condget.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 +from sys import exit +from test.http_test import HTTPTest +from misc.wget_file import WgetFile + +""" + Simple test for HTTP Conditional-GET Requests using the -N command +""" +TEST_NAME = "HTTP Conditional-GET Requests" +############# File Definitions ############################################### +# Keep same length ! +Cont1 = """THIS IS 1 FILE""" +Cont2 = """THIS IS 2 FILE""" +Cont3 = """THIS IS 3 FILE""" +Cont4 = """THIS IS 4 FILE""" + +# Local Wget files + +# These have same timestamp as remote files +UpToDate_Local_File1 = WgetFile ("UpToDateFile1", Cont1, timestamp="1995-01-01 00:00:00") +UpToDate_Local_File2 = WgetFile ("UpToDateFile2", Cont1, timestamp="1995-01-01 00:00:00") +UpToDate_Local_File3 = WgetFile ("UpToDateFile3", Cont1, timestamp="1995-01-01 00:00:00") + +# This is newer than remote (expected same behaviour as for above files) +Newer_Local_File = WgetFile ("NewerFile", Cont1, timestamp="1995-02-02 02:02:02") + +# This is older than remote - should be clobbered +Outdated_Local_File = WgetFile ("UpdatedFile", Cont2, timestamp="1990-01-01 00:00:00") + +UpToDate_Rules1 = { + "SendHeader" : { + # RFC1123 format + "Last-Modified" : "Sun, 01 Jan 1995 00:00:00 GMT", + }, + "Response": 304, + "ExpectHeader" : { + "If-Modified-Since" : "Sun, 01 Jan 1995 00:00:00 GMT" + }, +} + +UpToDate_Rules2 = { + "SendHeader" : { + # RFC850 format + "Last-Modified" : "Sunday, 01-Jan-95 00:00:00 GMT", + }, + "Response": 304, + "ExpectHeader" : { + "If-Modified-Since" : "Sun, 01 Jan 1995 00:00:00 GMT" + }, +} + +UpToDate_Rules3 = { + "SendHeader" : { + # Asctime format + "Last-Modified" : "Sun Jan 01 00:00:00 1995", + }, + "Response": 304, + "ExpectHeader" : { + "If-Modified-Since" : "Sun, 01 Jan 1995 00:00:00 GMT" + }, +} + +Newer_Rules = { + "SendHeader" : { + # Asctime format + "Last-Modified" : "Sun Jan 01 00:00:00 1995", + }, + "Response": 304, + "ExpectHeader" : { + "If-Modified-Since" : "Thu, 02 Feb 1995 02:02:02 GMT" + }, +} + +Outdated_Rules = { + "SendHeader" : { + # RFC850 format + "Last-Modified" : "Thursday, 01-Jan-15 00:00:00 GMT", + }, + "ExpectHeader" : { + "If-Modified-Since" : "Mon, 01 Jan 1990 00:00:00 GMT", + }, +} + +UpToDate_Server_File1 = WgetFile ("UpToDateFile1", Cont3, rules=UpToDate_Rules1) +UpToDate_Server_File2 = WgetFile ("UpToDateFile2", Cont3, rules=UpToDate_Rules2) +UpToDate_Server_File3 = WgetFile ("UpToDateFile3", Cont3, rules=UpToDate_Rules3) +Newer_Server_File = WgetFile ("NewerFile", Cont3, rules=Newer_Rules) +Updated_Server_File = WgetFile ("UpdatedFile", Cont4, rules=Outdated_Rules) + +WGET_OPTIONS = "-N" +WGET_URLS = [["UpToDateFile1", "UpToDateFile2", "UpToDateFile3", "NewerFile", + "UpdatedFile", ]] + +Files = [[UpToDate_Server_File1, UpToDate_Server_File2, UpToDate_Server_File3, + Newer_Server_File, Updated_Server_File, ]] + +Existing_Files = [UpToDate_Local_File1, UpToDate_Local_File2, + UpToDate_Local_File3, Newer_Local_File, Outdated_Local_File] + +ExpectedReturnCode = 0 + +# The uptodate file should not be downloaded +ExpectedDownloadedFiles = [UpToDate_Local_File1, UpToDate_Local_File2, + UpToDate_Local_File3, Newer_Local_File, + Updated_Server_File] + +# Kind of hack to ensure proper request types +Request_List = [ + [ + "GET /UpToDateFile1", + "GET /UpToDateFile2", + "GET /UpToDateFile3", + "GET /NewerFile", + "GET /UpdatedFile", + ] +] + +################ Pre and Post Test Hooks ##################################### +pre_test = { + "ServerFiles" : Files, + "LocalFiles" : Existing_Files +} +test_options = { + "WgetCommands" : WGET_OPTIONS, + "Urls" : WGET_URLS +} +post_test = { + "ExpectedFiles" : ExpectedDownloadedFiles, + "ExpectedRetcode" : ExpectedReturnCode, + "FilesCrawled" : Request_List, +} + +err = HTTPTest ( + name=TEST_NAME, + pre_hook=pre_test, + test_params=test_options, + post_hook=post_test +).begin () + +exit (err) -- 2.4.0
From 593e1ceff0501689d3d770f5d75ff2c2ffa8656e Mon Sep 17 00:00:00 2001 From: Hubert Tarasiuk <[email protected]> Date: Tue, 5 May 2015 19:33:02 +0200 Subject: [PATCH 4/6] Add --if-modified-since option * src/init.c: Add to commands array. * src/main.c: Add to cmdline_option. Add to help message. * src/options.h: Add to options struct. --- src/init.c | 2 ++ src/main.c | 4 ++++ src/options.h | 1 + 3 files changed, 7 insertions(+) diff --git a/src/init.c b/src/init.c index 33888de..cada4d6 100644 --- a/src/init.c +++ b/src/init.c @@ -205,6 +205,7 @@ static const struct { #endif { "httpsproxy", &opt.https_proxy, cmd_string }, { "httpuser", &opt.http_user, cmd_string }, + { "if-modified-since", &opt.if_modif_since, cmd_boolean }, { "ignorecase", &opt.ignore_case, cmd_boolean }, { "ignorelength", &opt.ignore_length, cmd_boolean }, { "ignoretags", &opt.ignore_tags, cmd_vector }, @@ -361,6 +362,7 @@ defaults (void) opt.no_proxy = sepstring (tmp); opt.prefer_family = prefer_none; opt.allow_cache = true; + opt.if_modif_since = true; opt.read_timeout = 900; opt.use_robots = true; diff --git a/src/main.c b/src/main.c index b59fcab..a0044d9 100644 --- a/src/main.c +++ b/src/main.c @@ -301,6 +301,7 @@ static struct cmdline_option option_data[] = { "strict-comments", 0, OPT_BOOLEAN, "strictcomments", -1 }, { "timeout", 'T', OPT_VALUE, "timeout", -1 }, { "timestamping", 'N', OPT_BOOLEAN, "timestamping", -1 }, + { "if-modified-since", 0, OPT_BOOLEAN, "if-modified-since", -1 }, { "tries", 't', OPT_VALUE, "tries", -1 }, { "unlink", 0, OPT_BOOLEAN, "unlink", -1 }, { "trust-server-names", 0, OPT_BOOLEAN, "trustservernames", -1 }, @@ -516,6 +517,9 @@ Download:\n"), -N, --timestamping don't re-retrieve files unless newer than\n\ local\n"), N_("\ + --no-if-modified-since don't use conditional if-modified-since get\n\ + requests in timestamping mode\n"), + N_("\ --no-use-server-timestamps don't set the local file's timestamp by\n\ the one on the server\n"), N_("\ diff --git a/src/options.h b/src/options.h index bded0c4..5e93751 100644 --- a/src/options.h +++ b/src/options.h @@ -165,6 +165,7 @@ struct options #endif bool timestamping; /* Whether to use time-stamping. */ + bool if_modif_since; /* Whether to use conditional get requests */ bool backup_converted; /* Do we save pre-converted files as *.orig? */ int backups; /* Are numeric backups made? */ -- 2.4.0
From af5f411a699484482bc951d6b4efb6a90575f653 Mon Sep 17 00:00:00 2001 From: Hubert Tarasiuk <[email protected]> Date: Sat, 9 May 2015 22:47:24 +0200 Subject: [PATCH 5/6] Prototype of If-Modified-Since. * src/wget.h: Add IF_MODIF_SINCE enum for dt. * src/http.c (time_to_rfc1123): Convert time_t do http time. * src/http.c (initialize_request): Include If-Modified-Since header if appropriate. * src/http.c (set_file_timestamp): Separate this code from check_file_output. * src/http.c (check_file_output): Use set_file_timestamp. * src/http.c (gethttp): Handle properly 304 return code and 200 if server ignores If-Modified-Since headers. * src/http.c (http_loop): Load filename to hstat if condget was requested, use IF_MODIF_SINCE if requested and current timestamp can be obtained. --- src/http.c | 246 ++++++++++++++++++++++++++++++++++++++++++++++--------------- src/wget.h | 6 +- 2 files changed, 190 insertions(+), 62 deletions(-) diff --git a/src/http.c b/src/http.c index 54eb106..836e3f9 100644 --- a/src/http.c +++ b/src/http.c @@ -1681,6 +1681,59 @@ read_response_body (struct http_stat *hs, int sock, FILE *fp, wgint contlen, } while (0) #endif /* def __VMS [else] */ +/* + Convert time_t to one of valid HTTP date formats + ie. rfc1123-date. + + HTTP-date = rfc1123-date | rfc850-date | asctime-date + rfc1123-date = wkday "," SP date1 SP time SP "GMT" + rfc850-date = weekday "," SP date2 SP time SP "GMT" + asctime-date = wkday SP date3 SP time SP 4DIGIT + date1 = 2DIGIT SP month SP 4DIGIT + ; day month year (e.g., 02 Jun 1982) + date2 = 2DIGIT "-" month "-" 2DIGIT + ; day-month-year (e.g., 02-Jun-82) + date3 = month SP ( 2DIGIT | ( SP 1DIGIT )) + ; month day (e.g., Jun 2) + time = 2DIGIT ":" 2DIGIT ":" 2DIGIT + ; 00:00:00 - 23:59:59 + wkday = "Mon" | "Tue" | "Wed" + | "Thu" | "Fri" | "Sat" | "Sun" + weekday = "Monday" | "Tuesday" | "Wednesday" + | "Thursday" | "Friday" | "Saturday" | "Sunday" + month = "Jan" | "Feb" | "Mar" | "Apr" + | "May" | "Jun" | "Jul" | "Aug" + | "Sep" | "Oct" | "Nov" | "Dec" + + source: RFC2616 */ +static uerr_t +time_to_rfc1123 (time_t time, char *buf, size_t bufsize) +{ + struct tm *gtm; + static const char *wkday[] = { "Sun", "Mon", "Tue", "Wed", + "Thu", "Fri", "Sat" }; + static const char *month[] = { "Jan", "Feb", "Mar", "Apr", + "May", "Jun", "Jul", "Aug", + "Sep", "Oct", "Nov", "Dec" }; + /* rfc1123 example: Thu, 01 Jan 1998 22:12:57 GMT */ + static const char *time_format = "%s, %02d %s %04d %02d:%02d:%02d GMT"; + + gtm = gmtime (&time); + if (!gtm) + { + logprintf (LOG_NOTQUIET, + "gmtime failed. This is probably a bug.\n"); + return TIMECONV_ERR; + } + + snprintf (buf, bufsize, time_format, wkday[gtm->tm_wday], + gtm->tm_mday, month[gtm->tm_mon], + gtm->tm_year + 1900, gtm->tm_hour, + gtm->tm_min, gtm->tm_sec); + + return RETROK; +} + static struct request * initialize_request (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, bool inhibit_keep_alive, bool *basic_auth_finished, @@ -1723,6 +1776,21 @@ initialize_request (struct url *u, struct http_stat *hs, int *dt, struct url *pr /* ... but some HTTP/1.0 caches doesn't implement Cache-Control. */ request_set_header (req, "Pragma", "no-cache", rel_none); } + if (*dt & IF_MODIF_SINCE) + { + char strtime[32]; + uerr_t err; + + err = time_to_rfc1123 (hs->orig_file_tstamp, strtime, countof (strtime)); + if (err != RETROK) + { + logputs (LOG_VERBOSE, _("Cannot convert timestamp to http format. " + "Falling back to time 0 as last modification " + "time.\n")); + strcpy (strtime, "Thu, 01 Jan 1970 00:00:00 GMT"); + } + request_set_header (req, "If-Modified-Since", xstrdup(strtime), rel_value); + } if (hs->restval) request_set_header (req, "Range", aprintf ("bytes=%s-", @@ -2025,6 +2093,69 @@ establish_connection (struct url *u, struct url **conn_ref, } static uerr_t +set_file_timestamp (struct http_stat *hs) +{ + size_t filename_len = strlen (hs->local_file); + char *filename_plus_orig_suffix = alloca (filename_len + sizeof (ORIG_SFX)); + bool local_dot_orig_file_exists = false; + char *local_filename = NULL; + struct_stat st; + + if (opt.backup_converted) + /* If -K is specified, we'll act on the assumption that it was specified + last time these files were downloaded as well, and instead of just + comparing local file X against server file X, we'll compare local + file X.orig (if extant, else X) against server file X. If -K + _wasn't_ specified last time, or the server contains files called + *.orig, -N will be back to not operating correctly with -k. */ + { + /* Would a single s[n]printf() call be faster? --dan + + Definitely not. sprintf() is horribly slow. It's a + different question whether the difference between the two + affects a program. Usually I'd say "no", but at one + point I profiled Wget, and found that a measurable and + non-negligible amount of time was lost calling sprintf() + in url.c. Replacing sprintf with inline calls to + strcpy() and number_to_string() made a difference. + --hniksic */ + memcpy (filename_plus_orig_suffix, hs->local_file, filename_len); + memcpy (filename_plus_orig_suffix + filename_len, + ORIG_SFX, sizeof (ORIG_SFX)); + + /* Try to stat() the .orig file. */ + if (stat (filename_plus_orig_suffix, &st) == 0) + { + local_dot_orig_file_exists = true; + local_filename = filename_plus_orig_suffix; + } + } + + if (!local_dot_orig_file_exists) + /* Couldn't stat() <file>.orig, so try to stat() <file>. */ + if (stat (hs->local_file, &st) == 0) + local_filename = hs->local_file; + + if (local_filename != NULL) + /* There was a local file, so we'll check later to see if the version + the server has is the same version we already have, allowing us to + skip a download. */ + { + hs->orig_file_name = xstrdup (local_filename); + hs->orig_file_size = st.st_size; + hs->orig_file_tstamp = st.st_mtime; +#ifdef WINDOWS + /* Modification time granularity is 2 seconds for Windows, so + increase local time by 1 second for later comparison. */ + ++hs->orig_file_tstamp; +#endif + hs->timestamp_checked = true; + } + + return RETROK; +} + +static uerr_t check_file_output (struct url *u, struct http_stat *hs, struct response *resp, char *hdrval, size_t hdrsize) { @@ -2077,61 +2208,9 @@ check_file_output (struct url *u, struct http_stat *hs, /* Support timestamping */ if (opt.timestamping && !hs->timestamp_checked) { - size_t filename_len = strlen (hs->local_file); - char *filename_plus_orig_suffix = alloca (filename_len + sizeof (ORIG_SFX)); - bool local_dot_orig_file_exists = false; - char *local_filename = NULL; - struct_stat st; - - if (opt.backup_converted) - /* If -K is specified, we'll act on the assumption that it was specified - last time these files were downloaded as well, and instead of just - comparing local file X against server file X, we'll compare local - file X.orig (if extant, else X) against server file X. If -K - _wasn't_ specified last time, or the server contains files called - *.orig, -N will be back to not operating correctly with -k. */ - { - /* Would a single s[n]printf() call be faster? --dan - - Definitely not. sprintf() is horribly slow. It's a - different question whether the difference between the two - affects a program. Usually I'd say "no", but at one - point I profiled Wget, and found that a measurable and - non-negligible amount of time was lost calling sprintf() - in url.c. Replacing sprintf with inline calls to - strcpy() and number_to_string() made a difference. - --hniksic */ - memcpy (filename_plus_orig_suffix, hs->local_file, filename_len); - memcpy (filename_plus_orig_suffix + filename_len, - ORIG_SFX, sizeof (ORIG_SFX)); - - /* Try to stat() the .orig file. */ - if (stat (filename_plus_orig_suffix, &st) == 0) - { - local_dot_orig_file_exists = true; - local_filename = filename_plus_orig_suffix; - } - } - - if (!local_dot_orig_file_exists) - /* Couldn't stat() <file>.orig, so try to stat() <file>. */ - if (stat (hs->local_file, &st) == 0) - local_filename = hs->local_file; - - if (local_filename != NULL) - /* There was a local file, so we'll check later to see if the version - the server has is the same version we already have, allowing us to - skip a download. */ - { - hs->orig_file_name = xstrdup (local_filename); - hs->orig_file_size = st.st_size; - hs->orig_file_tstamp = st.st_mtime; -#ifdef WINDOWS - /* Modification time granularity is 2 seconds for Windows, so - increase local time by 1 second for later comparison. */ - ++hs->orig_file_tstamp; -#endif - } + uerr_t timestamp_err = set_file_timestamp (hs); + if (timestamp_err != RETROK) + return timestamp_err; } return RETROK; } @@ -2421,6 +2500,9 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, POST). */ bool head_only = !!(*dt & HEAD_ONLY); + /* Whether conditional get request will be issued */ + bool cond_get = !!(*dt & IF_MODIF_SINCE); + char *head = NULL; struct response *resp = NULL; char hdrval[512]; @@ -3020,6 +3102,35 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, } } + if (cond_get && statcode == HTTP_STATUS_NOT_MODIFIED) + { + logprintf (LOG_VERBOSE, _("File %s not modified on server. " + "Omiting download.\n\n"), quote(hs->local_file)); + *dt |= RETROKF; + CLOSE_FINISH (sock); + retval = RETRUNNEEDED; + goto cleanup; + } + + /* Handle the case when server ignores If-Modified-Since header */ + if (cond_get && statcode == HTTP_STATUS_OK && hs->remote_time) + { + time_t tmr; + tmr = http_atotm (hs->remote_time); + if (tmr != (time_t) -1 && tmr <= hs->orig_file_tstamp + && (contlen == -1 || contlen == hs->orig_file_size)) + { + logprintf (LOG_VERBOSE, _("Server ignored If-Modified-Since header " + "for file %s. " + "You might want to add --no-if-modified-since " + "option.\n\n"), quote(hs->local_file)); + *dt |= RETROKF; + CLOSE_INVALIDATE (sock); + retval = RETRUNNEEDED; + goto cleanup; + } + } + if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE || (!opt.timestamping && hs->restval > 0 && statcode == HTTP_STATUS_OK && contrange == 0 && contlen >= 0 && hs->restval >= contlen)) @@ -3263,15 +3374,30 @@ http_loop (struct url *u, struct url *original_url, char **newloc, if (opt.content_disposition && opt.always_rest) send_head_first = true; - /* Send preliminary HEAD request if -N is given and we have an existing - * destination file. */ if (!opt.output_document) file_name = url_file_name (opt.trustservernames ? u : original_url, NULL); else file_name = xstrdup (opt.output_document); - if (opt.timestamping && (file_exists_p (file_name) - || opt.content_disposition)) - send_head_first = true; + + if (opt.timestamping) + { + /* Use conditional get request if requested + * and if timestamp is known at this moment. */ + if (opt.if_modif_since && file_exists_p (file_name) && !send_head_first) + { + *dt |= IF_MODIF_SINCE; + { + uerr_t timestamp_err = set_file_timestamp (&hstat); + if (timestamp_err != RETROK) + return timestamp_err; + } + } + /* Send preliminary HEAD request if -N is given and we have an existing + * destination file. */ + else if (file_exists_p (file_name) || opt.content_disposition) + send_head_first = true; + } + xfree (file_name); /* THE loop */ diff --git a/src/wget.h b/src/wget.h index 8d2b0f1..c1c8f4c 100644 --- a/src/wget.h +++ b/src/wget.h @@ -331,7 +331,8 @@ enum SEND_NOCACHE = 0x0008, /* send Pragma: no-cache directive */ ACCEPTRANGES = 0x0010, /* Accept-ranges header was found */ ADDED_HTML_EXTENSION = 0x0020, /* added ".html" extension due to -E */ - TEXTCSS = 0x0040 /* document is of type text/css */ + TEXTCSS = 0x0040, /* document is of type text/css */ + IF_MODIF_SINCE = 0x0080, /* use if-modified-since header */ }; /* Universal error type -- used almost everywhere. Error reporting of @@ -351,7 +352,8 @@ typedef enum RETRBADPATTERN, PROXERR, AUTHFAILED, QUOTEXC, WRITEFAILED, SSLINITFAILED, VERIFCERTERR, UNLINKERR, NEWLOCATION_KEEP_POST, CLOSEFAILED, ATTRMISSING, UNKNOWNATTR, - WARC_ERR, WARC_TMP_FOPENERR, WARC_TMP_FWRITEERR + WARC_ERR, WARC_TMP_FOPENERR, WARC_TMP_FWRITEERR, + TIMECONV_ERR } uerr_t; /* 2005-02-19 SMS. -- 2.4.0
From ede69428ecee59ef26e3f2120683b289309abfcc Mon Sep 17 00:00:00 2001 From: Hubert Tarasiuk <[email protected]> Date: Tue, 12 May 2015 21:21:33 +0200 Subject: [PATCH 6/6] Include --if-modified-since option in user manual. * doc/wget.texi: Add --if-modified-since section. --- doc/wget.texi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/wget.texi b/doc/wget.texi index 689fb97..16cc5db 100644 --- a/doc/wget.texi +++ b/doc/wget.texi @@ -788,6 +788,10 @@ used alongside the @samp{--logfile} option. @itemx --timestamping Turn on time-stamping. @xref{Time-Stamping}, for details. +@item --no-if-modified-since +Do not send If-Modified-Since header in @samp{-N} mode. Send preliminary HEAD +request instead. This has only effect in @samp{-N} mode. + @item --no-use-server-timestamps Don't set the local file's timestamp by the one on the server. -- 2.4.0
signature.asc
Description: OpenPGP digital signature
