Hi,
Here are two small patches. I hope they will be useful.
First, a patch that fixes a memory leak in fd_read_body (src/retr.c) and
skip_short_body (src/http.c) when it retrieves a response with
Transfer-Encoding: chunked. Both functions make calls to fd_read_line
but never free the result.
Second, a patch to the fd_read_body function that changes the way
chunked responses are saved in the WARC file. Until now, wget would
write a de-chunked response to the WARC file, which is wrong: the WARC
file is supposed to have an exact copy of the HTTP response, so it
should also include the chunk headers.
The first patch fixes the memory leaks. The second patch changes
fd_read_body to save the full, chunked response in the WARC file.
Regards,
Gijs
=== modified file 'src/ChangeLog'
--- src/ChangeLog 2012-01-11 14:27:06 +
+++ src/ChangeLog 2012-01-26 21:30:19 +
@@ -1,3 +1,8 @@
+2012-01-27 Gijs van Tulder gvtul...@gmail.com
+
+ * retr.c (fd_read_body): Fix a memory leak with chunked responses.
+ * http.c (skip_short_body): Fix the same memory leak.
+
2012-01-09 Gijs van Tulder gvtul...@gmail.com
* init.c: Disable WARC compression if zlib is disabled.
=== modified file 'src/http.c'
--- src/http.c 2012-01-08 23:03:23 +
+++ src/http.c 2012-01-26 21:30:19 +
@@ -951,9 +951,12 @@
break;
remaining_chunk_size = strtol (line, endl, 16);
+ xfree (line);
+
if (remaining_chunk_size == 0)
{
- fd_read_line (fd);
+ line = fd_read_line (fd);
+ xfree_null (line);
break;
}
}
@@ -978,8 +981,13 @@
{
remaining_chunk_size -= ret;
if (remaining_chunk_size == 0)
-if (fd_read_line (fd) == NULL)
- return false;
+{
+ char *line = fd_read_line (fd);
+ if (line == NULL)
+return false;
+ else
+xfree (line);
+}
}
/* Safe even if %.*s bogusly expects terminating \0 because
=== modified file 'src/retr.c'
--- src/retr.c 2011-11-04 21:25:00 +
+++ src/retr.c 2012-01-26 21:30:19 +
@@ -307,11 +307,16 @@
}
remaining_chunk_size = strtol (line, endl, 16);
+ xfree (line);
+
if (remaining_chunk_size == 0)
{
ret = 0;
- if (fd_read_line (fd) == NULL)
+ line = fd_read_line (fd);
+ if (line == NULL)
ret = -1;
+ else
+xfree (line);
break;
}
}
@@ -371,11 +376,16 @@
{
remaining_chunk_size -= ret;
if (remaining_chunk_size == 0)
-if (fd_read_line (fd) == NULL)
- {
-ret = -1;
-break;
- }
+{
+ char *line = fd_read_line (fd);
+ if (line == NULL)
+{
+ ret = -1;
+ break;
+}
+ else
+xfree (line);
+}
}
}
=== modified file 'src/ChangeLog'
--- src/ChangeLog 2012-01-26 21:30:19 +
+++ src/ChangeLog 2012-01-26 21:56:27 +
@@ -1,3 +1,9 @@
+2012-01-27 Gijs van Tulder gvtul...@gmail.com
+
+ * retr.c (fd_read_body): If the response is chunked, the chunk
+ headers are now written to the WARC file, making the WARC file
+ an exact copy of the HTTP response.
+
2012-01-27 Gijs van Tulder gvtul...@gmail.com
* retr.c (fd_read_body): Fix a memory leak with chunked responses.
* http.c (skip_short_body): Fix the same memory leak.
=== modified file 'src/retr.c'
--- src/retr.c 2012-01-26 21:30:19 +
+++ src/retr.c 2012-01-26 21:56:27 +
@@ -213,6 +213,9 @@
the data is stored to ELAPSED.
If OUT2 is non-NULL, the contents is also written to OUT2.
+ OUT2 will get an exact copy of the response: if this is a chunked
+ response, everything -- including the chunk headers -- is written
+ to OUT2. (OUT will only get the unchunked response.)
The function exits and returns the amount of data read. In case of
error while reading data, -1 is returned. In case of error while
@@ -305,6 +308,8 @@
ret = -1;
break;
}
+ else if (out2 != NULL)
+fwrite (line, 1, strlen (line), out2);
remaining_chunk_size = strtol (line, endl, 16);
xfree (line);
@@ -316,7 +321,11 @@
if (line == NULL)
ret = -1;
else
-xfree (line);
+{
+ if (out2 != NULL)
+