Hi,
Another small problem in the WARC section: wget crashes with a
segmentation fault if you have WARC output enabled and try to download a
file larger than 2GB. I think this is because of the size_t, ftell and
fseek in warc.c.
The attached patch changes the references from size_t to off_t, ftell to
ftello, fseek to fseeko. On my 64-bit system this seemed to fix the
problem (but I'm not an expert in these matters, so maybe this doesn't
hold for 32-bit systems).
Regards,
Gijs
=== modified file 'src/ChangeLog'
--- src/ChangeLog 2012-01-28 13:09:29 +
+++ src/ChangeLog 2012-01-31 23:16:33 +
@@ -1,3 +1,9 @@
+2012-02-01 Gijs van Tulder gvtul...@gmail.com
+
+ * warc.c: Fix large file support with ftello, fseeko.
+ * warc.h: Fix large file support.
+ * http.c: Fix large file support.
+
2012-01-27 Gijs van Tulder gvtul...@gmail.com
* retr.c (fd_read_body): If the response is chunked, the chunk
=== modified file 'src/http.c'
--- src/http.c 2012-01-28 13:08:52 +
+++ src/http.c 2012-01-31 22:34:45 +
@@ -1712,7 +1712,7 @@
char warc_timestamp_str [21];
char warc_request_uuid [48];
ip_address *warc_ip = NULL;
- long int warc_payload_offset = -1;
+ off_t warc_payload_offset = -1;
/* Whether this connection will be kept alive after the HTTP request
is done. */
@@ -2127,7 +2127,7 @@
if (write_error = 0 warc_tmp != NULL)
{
/* Remember end of headers / start of payload. */
- warc_payload_offset = ftell (warc_tmp);
+ warc_payload_offset = ftello (warc_tmp);
/* Write a copy of the data to the WARC record. */
int warc_tmp_written = fwrite (opt.post_data, 1, post_data_size, warc_tmp);
@@ -2139,7 +2139,7 @@
{
if (warc_tmp != NULL)
/* Remember end of headers / start of payload. */
-warc_payload_offset = ftell (warc_tmp);
+warc_payload_offset = ftello (warc_tmp);
write_error = post_file (sock, opt.post_file_name, post_data_size, warc_tmp);
}
=== modified file 'src/warc.c'
--- src/warc.c 2012-01-11 14:27:06 +
+++ src/warc.c 2012-01-31 22:35:00 +
@@ -50,10 +50,10 @@
static gzFile *warc_current_gzfile;
/* The offset of the current gzip record in the WARC file. */
-static size_t warc_current_gzfile_offset;
+static off_t warc_current_gzfile_offset;
/* The uncompressed size (so far) of the current record. */
-static size_t warc_current_gzfile_uncompressed_size;
+static off_t warc_current_gzfile_uncompressed_size;
# endif
/* This is true until a warc_write_* method fails. */
@@ -158,7 +158,7 @@
return false;
fflush (warc_current_file);
- if (opt.warc_maxsize 0 ftell (warc_current_file) = opt.warc_maxsize)
+ if (opt.warc_maxsize 0 ftello (warc_current_file) = opt.warc_maxsize)
warc_start_new_file (false);
#ifdef HAVE_LIBZ
@@ -166,7 +166,7 @@
if (opt.warc_compression_enabled)
{
/* Record the starting offset of the new record. */
- warc_current_gzfile_offset = ftell (warc_current_file);
+ warc_current_gzfile_offset = ftello (warc_current_file);
/* Reserve space for the extra GZIP header field.
In warc_write_end_record we will fill this space
@@ -217,8 +217,8 @@
{
/* Add the Content-Length header. */
char *content_length;
- fseek (data_in, 0L, SEEK_END);
- if (! asprintf (content_length, %ld, ftell (data_in)))
+ fseeko (data_in, 0L, SEEK_END);
+ if (! asprintf (content_length, %ld, ftello (data_in)))
{
warc_write_ok = false;
return false;
@@ -229,7 +229,7 @@
/* End of the WARC header section. */
warc_write_string (\r\n);
- if (fseek (data_in, 0L, SEEK_SET) != 0)
+ if (fseeko (data_in, 0L, SEEK_SET) != 0)
warc_write_ok = false;
/* Copy the data in the file to the WARC record. */
@@ -266,7 +266,7 @@
}
fflush (warc_current_file);
- fseek (warc_current_file, 0, SEEK_END);
+ fseeko (warc_current_file, 0, SEEK_END);
/* The WARC standard suggests that we add 'skip length' data in the
extra header field of the GZIP stream.
@@ -284,12 +284,12 @@
*/
/* Calculate the uncompressed and compressed sizes. */
- size_t current_offset = ftell (warc_current_file);
- size_t uncompressed_size = current_offset - warc_current_gzfile_offset;
- size_t compressed_size = warc_current_gzfile_uncompressed_size;
+ off_t current_offset = ftello (warc_current_file);
+ off_t uncompressed_size = current_offset - warc_current_gzfile_offset;
+ off_t compressed_size = warc_current_gzfile_uncompressed_size;
/* Go back to the static GZIP header. */
- fseek (warc_current_file, warc_current_gzfile_offset + EXTRA_GZIP_HEADER_SIZE, SEEK_SET);
+ fseeko (warc_current_file, warc_current_gzfile_offset + EXTRA_GZIP_HEADER_SIZE, SEEK_SET);
/* Read the header. */
char