Hi,
so I recently ran into the known bug #81 [0] and in certain use cases
like the usage of some large download sites / one-click hosters it
happens often.
The basic problem is that curl (the binary) opens the file very early
without any knowledge of the server response. This also leads to another
bug/inconsistency. The effective URL does not need to match the
effective filename since the filename is statically computed from the
input URL and not from the effective URL (which can be different due to
3xx HTTP redirections).
This patch them attempts to delay the opening of the output file until
actually needed (when writing to the file).
However this patch is still work-in-progress as it does not solve the
key problem of #81 which is to actually resume the download. The patch
however gets the "right" file, yet does not resume the download. This is
because when we do the initial first request we do not know the output
file name and thus cannot determine the filesize of our local file.
Later when we get to know the "dynamic" filename (result of either
redirect or Content-Disposition filename), we are already reading the
response (since we got the information from there).
Now to solve this problem one could check whether the file exists. If
not, one can happily continue. If it does exist however, one would need
to abort the connection and launch a new request with byte-range.
In order to prevent a possible connection abortion like this one could
do a HEAD request first just to get the filename.
Regarding the code in the patch I am not to sure about my check for the
"Location" header response field or whether one can perform this check
in a better way.
Since I developed this patch against curl-7.38 (version in debian
testing/unstable) it might not compile against git [I still attached a
applicable patch for the git version though].
Also while testing I just noticed there are problems when doing a HEAD
request (curl -I):
Warning: Remote filename has no length!
* Failed writing body (0 != 33)
* Closing connection 0
curl: (23) Failed writing body (0 != 33)
I am not sure why there is an attempt to write to the file when I am
just doing a HEAD request though.
My test case (for location redirect):
curl -v -LOC- http://goo.gl/WQcAFw
Current behavior:
Saves into WQcAFw
Patch behavior:
Saves into ubuntu-14.04.2-server-amd64.iso
Let me know you think,
Leon
[0] http://sourceforge.net/p/curl/bugs/1169/
diff --git a/src/tool_cb_hdr.c b/src/tool_cb_hdr.c
index af9bb87..4cc51c9 100644
--- a/src/tool_cb_hdr.c
+++ b/src/tool_cb_hdr.c
@@ -126,6 +126,10 @@ size_t tool_header_cb(void *ptr, size_t size, size_t nmemb, void *userdata)
return failure;
}
}
+ else if (heads->config->followlocation && checkprefix("Location:", str)) {
+ outs->stream = NULL;
+ outs->is_opened = FALSE;
+ }
return cb;
}
diff --git a/src/tool_cb_wrt.c b/src/tool_cb_wrt.c
index 17d0409..2a5f5bc 100644
--- a/src/tool_cb_wrt.c
+++ b/src/tool_cb_wrt.c
@@ -28,9 +28,51 @@
#include "tool_cfgable.h"
#include "tool_msgs.h"
#include "tool_cb_wrt.h"
+#include "tool_operhlp.h"
#include "memdebug.h" /* keep this as LAST include */
+bool open_outfile(struct OperationConfig *const config, struct OutStruct *outs,
+ char *const outfile)
+{ if(config->resume_from_current) {
+ /* We're told to continue from where we are now. Get the size
+ of the file as it is now and open it for append instead */
+ struct_stat fileinfo;
+ /* VMS -- Danger, the filesize is only valid for stream files */
+ if(0 == stat(outfile, &fileinfo))
+ /* set offset to current file size: */
+ config->resume_from = fileinfo.st_size;
+ else
+ /* let offset be 0 */
+ config->resume_from = 0;
+ }
+
+ if(config->resume_from_current || config->resume_from) {
+#ifdef __VMS
+ /* open file for output, forcing VMS output format into stream
+ mode which is needed for stat() call above to always work. */
+ FILE *file = fopen(outfile, config->resume_from?"ab":"wb",
+ "ctx=stm", "rfm=stmlf", "rat=cr", "mrs=0");
+#else
+ /* open file for output: */
+ FILE *file = fopen(outfile, config->resume_from?"ab":"wb");
+#endif
+ if(!file) {
+ helpf(config->global->errors, "Can't open '%s'!\n", outfile);
+ return false;
+ }
+ outs->fopened = TRUE;
+ outs->stream = file;
+ outs->init = config->resume_from;
+ }
+ else {
+ outs->stream = NULL; /* open when needed */
+ }
+ outs->filename = outfile;
+ outs->s_isreg = TRUE;
+ return true;
+}
+
/*
** callback for CURLOPT_WRITEFUNCTION
*/
@@ -98,13 +140,26 @@ size_t tool_write_cb(void *buffer, size_t sz, size_t nmemb, void *userdata)
#endif
if(!outs->stream) {
- FILE *file;
+ // handle redirections
+ long redirects = 0;
+ curl_easy_getinfo(config->easy, CURLINFO_REDIRECT_COUNT, &redirects);
+ if(redirects > 0) {
+ char *location;
+ curl_easy_getinfo(config->easy, CURLINFO_EFFECTIVE_URL, &location);
+ if(get_url_file_name(&outs->filename, location)) return failure;
+ printf (stderr, "file name atfer redirections: %s\n", outs->filename);
+ }
if(!outs->filename || !*outs->filename) {
warnf(config->global, "Remote filename has no length!\n");
return failure;
}
+ // handle resumption
+ if(!open_outfile(config, outs, outs->filename)) return failure;
+ }
+ if(!outs->stream) {
+ FILE *file;
if(outs->is_cd_filename) {
/* don't overwrite existing files */
file = fopen(outs->filename, "rb");
diff --git a/src/tool_operate.c b/src/tool_operate.c
index a875f8d..1565e00 100644
--- a/src/tool_operate.c
+++ b/src/tool_operate.c
@@ -543,7 +543,9 @@ static CURLcode operate_do(struct GlobalConfig *global,
result = get_url_file_name(&outfile, this_url);
if(result)
goto show_error;
- if(!*outfile && !config->content_disposition) {
+ /* filename information might only be avialable on response */
+ if(!*outfile && !config->content_disposition &&
+ !config->followlocation) {
helpf(global->errors, "Remote file name has no length!\n");
result = CURLE_WRITE_ERROR;
goto quit_urls;
@@ -584,48 +586,17 @@ static CURLcode operate_do(struct GlobalConfig *global,
}
if((urlnode->flags & GETOUT_USEREMOTE)
- && config->content_disposition) {
+ && (config->content_disposition || config->followlocation)) {
/* Our header callback MIGHT set the filename */
DEBUGASSERT(!outs.filename);
}
- if(config->resume_from_current) {
- /* We're told to continue from where we are now. Get the size
- of the file as it is now and open it for append instead */
- struct_stat fileinfo;
- /* VMS -- Danger, the filesize is only valid for stream files */
- if(0 == stat(outfile, &fileinfo))
- /* set offset to current file size: */
- config->resume_from = fileinfo.st_size;
- else
- /* let offset be 0 */
- config->resume_from = 0;
- }
-
- if(config->resume_from) {
-#ifdef __VMS
- /* open file for output, forcing VMS output format into stream
- mode which is needed for stat() call above to always work. */
- FILE *file = fopen(outfile, config->resume_from?"ab":"wb",
- "ctx=stm", "rfm=stmlf", "rat=cr", "mrs=0");
-#else
- /* open file for output: */
- FILE *file = fopen(outfile, config->resume_from?"ab":"wb");
-#endif
- if(!file) {
- helpf(global->errors, "Can't open '%s'!\n", outfile);
- result = CURLE_WRITE_ERROR;
- goto quit_urls;
- }
- outs.fopened = TRUE;
- outs.stream = file;
- outs.init = config->resume_from;
- }
- else {
- outs.stream = NULL; /* open when needed */
+ // If output filename is fixed, open it already here
+ if(outfile && !config->content_disposition &&
+ !config->followlocation && !open_outfile(config, &outs, outfile))
+ { res = CURLE_WRITE_ERROR;
+ goto quit_urls;
}
- outs.filename = outfile;
- outs.s_isreg = TRUE;
}
if(uploadfile && !stdin_upload(uploadfile)) {
diff --git a/src/tool_cb_hdr.c b/src/tool_cb_hdr.c
index ef340f7..337b361 100644
--- a/src/tool_cb_hdr.c
+++ b/src/tool_cb_hdr.c
@@ -125,6 +125,10 @@ size_t tool_header_cb(void *ptr, size_t size, size_t nmemb, void *userdata)
return failure;
}
}
+ else if (heads->config->followlocation && checkprefix("Location:", str)) {
+ outs->stream = NULL;
+ outs->is_opened = FALSE;
+ }
return cb;
}
diff --git a/src/tool_cb_wrt.c b/src/tool_cb_wrt.c
index dfbf95c..2d7e9b0 100644
--- a/src/tool_cb_wrt.c
+++ b/src/tool_cb_wrt.c
@@ -28,9 +28,51 @@
#include "tool_cfgable.h"
#include "tool_msgs.h"
#include "tool_cb_wrt.h"
+#include "tool_operhlp.h"
#include "memdebug.h" /* keep this as LAST include */
+bool open_outfile(struct OperationConfig *const config, struct OutStruct *outs,
+ char *const outfile)
+{ if(config->resume_from_current) {
+ /* We're told to continue from where we are now. Get the size
+ of the file as it is now and open it for append instead */
+ struct_stat fileinfo;
+ /* VMS -- Danger, the filesize is only valid for stream files */
+ if(0 == stat(outfile, &fileinfo))
+ /* set offset to current file size: */
+ config->resume_from = fileinfo.st_size;
+ else
+ /* let offset be 0 */
+ config->resume_from = 0;
+ }
+
+ if(config->resume_from_current || config->resume_from) {
+#ifdef __VMS
+ /* open file for output, forcing VMS output format into stream
+ mode which is needed for stat() call above to always work. */
+ FILE *file = fopen(outfile, config->resume_from?"ab":"wb",
+ "ctx=stm", "rfm=stmlf", "rat=cr", "mrs=0");
+#else
+ /* open file for output: */
+ FILE *file = fopen(outfile, config->resume_from?"ab":"wb");
+#endif
+ if(!file) {
+ helpf(config->global->errors, "Can't open '%s'!\n", outfile);
+ return false;
+ }
+ outs->fopened = TRUE;
+ outs->stream = file;
+ outs->init = config->resume_from;
+ }
+ else {
+ outs->stream = NULL; /* open when needed */
+ }
+ outs->filename = outfile;
+ outs->s_isreg = TRUE;
+ return true;
+}
+
/*
** callback for CURLOPT_WRITEFUNCTION
*/
@@ -97,13 +139,26 @@ size_t tool_write_cb(void *buffer, size_t sz, size_t nmemb, void *userdata)
#endif
if(!outs->stream) {
- FILE *file;
+ // handle redirections
+ long redirects = 0;
+ curl_easy_getinfo(config->easy, CURLINFO_REDIRECT_COUNT, &redirects);
+ if(redirects > 0) {
+ char *location;
+ curl_easy_getinfo(config->easy, CURLINFO_EFFECTIVE_URL, &location);
+ if(get_url_file_name(&outs->filename, location)) return failure;
+ printf (stderr, "file name atfer redirections: %s\n", outs->filename);
+ }
if(!outs->filename || !*outs->filename) {
warnf(config, "Remote filename has no length!\n");
return failure;
}
+ // handle resumption
+ if(!open_outfile(config, outs, outs->filename)) return failure;
+ }
+ if(!outs->stream) {
+ FILE *file;
if(outs->is_cd_filename) {
/* don't overwrite existing files */
file = fopen(outs->filename, "rb");
diff --git a/src/tool_operate.c b/src/tool_operate.c
index fd2fd6d..ebe656d 100644
--- a/src/tool_operate.c
+++ b/src/tool_operate.c
@@ -543,7 +543,9 @@ static CURLcode operate_do(struct GlobalConfig *global,
res = get_url_file_name(&outfile, this_url);
if(res)
goto show_error;
- if((!outfile || !*outfile) && !config->content_disposition) {
+ /* filename information might only be avialable on response */
+ if(!*outfile && !config->content_disposition &&
+ !config->followlocation) {
helpf(global->errors, "Remote file name has no length!\n");
res = CURLE_WRITE_ERROR;
goto quit_urls;
@@ -584,48 +586,17 @@ static CURLcode operate_do(struct GlobalConfig *global,
}
if((urlnode->flags & GETOUT_USEREMOTE)
- && config->content_disposition) {
+ && (config->content_disposition || config->followlocation)) {
/* Our header callback MIGHT set the filename */
DEBUGASSERT(!outs.filename);
}
- if(config->resume_from_current) {
- /* We're told to continue from where we are now. Get the size
- of the file as it is now and open it for append instead */
- struct_stat fileinfo;
- /* VMS -- Danger, the filesize is only valid for stream files */
- if(0 == stat(outfile, &fileinfo))
- /* set offset to current file size: */
- config->resume_from = fileinfo.st_size;
- else
- /* let offset be 0 */
- config->resume_from = 0;
- }
-
- if(config->resume_from) {
-#ifdef __VMS
- /* open file for output, forcing VMS output format into stream
- mode which is needed for stat() call above to always work. */
- FILE *file = fopen(outfile, config->resume_from?"ab":"wb",
- "ctx=stm", "rfm=stmlf", "rat=cr", "mrs=0");
-#else
- /* open file for output: */
- FILE *file = fopen(outfile, config->resume_from?"ab":"wb");
-#endif
- if(!file) {
- helpf(global->errors, "Can't open '%s'!\n", outfile);
- res = CURLE_WRITE_ERROR;
- goto quit_urls;
- }
- outs.fopened = TRUE;
- outs.stream = file;
- outs.init = config->resume_from;
- }
- else {
- outs.stream = NULL; /* open when needed */
+ // If output filename is fixed, open it already here
+ if(outfile && !config->content_disposition &&
+ !config->followlocation && !open_outfile(config, &outs, outfile))
+ { res = CURLE_WRITE_ERROR;
+ goto quit_urls;
}
- outs.filename = outfile;
- outs.s_isreg = TRUE;
}
if(uploadfile && !stdin_upload(uploadfile)) {
-------------------------------------------------------------------
List admin: http://cool.haxx.se/list/listinfo/curl-library
Etiquette: http://curl.haxx.se/mail/etiquette.html