Hi,

Here are two small patches. I hope they will be useful.

First, a patch that fixes a memory leak in fd_read_body (src/retr.c) and skip_short_body (src/http.c) when it retrieves a response with "Transfer-Encoding: chunked". Both functions make calls to fd_read_line but never free the result.

Second, a patch to the fd_read_body function that changes the way chunked responses are saved in the WARC file. Until now, wget would write a de-chunked response to the WARC file, which is wrong: the WARC file is supposed to have an exact copy of the HTTP response, so it should also include the chunk headers.

The first patch fixes the memory leaks. The second patch changes fd_read_body to save the full, chunked response in the WARC file.

Regards,

Gijs

=== modified file 'src/ChangeLog'
--- src/ChangeLog	2012-01-11 14:27:06 +0000
+++ src/ChangeLog	2012-01-26 21:30:19 +0000
@@ -1,3 +1,8 @@
+2012-01-27  Gijs van Tulder  <gvtul...@gmail.com>
+
+	* retr.c (fd_read_body): Fix a memory leak with chunked responses.
+	* http.c (skip_short_body): Fix the same memory leak.
+
 2012-01-09  Gijs van Tulder  <gvtul...@gmail.com>
 
 	* init.c: Disable WARC compression if zlib is disabled.

=== modified file 'src/http.c'
--- src/http.c	2012-01-08 23:03:23 +0000
+++ src/http.c	2012-01-26 21:30:19 +0000
@@ -951,9 +951,12 @@
                 break;
 
               remaining_chunk_size = strtol (line, &endl, 16);
+              xfree (line);
+
               if (remaining_chunk_size == 0)
                 {
-                  fd_read_line (fd);
+                  line = fd_read_line (fd);
+                  xfree_null (line);
                   break;
                 }
             }
@@ -978,8 +981,13 @@
         {
           remaining_chunk_size -= ret;
           if (remaining_chunk_size == 0)
-            if (fd_read_line (fd) == NULL)
-              return false;
+            {
+              char *line = fd_read_line (fd);
+              if (line == NULL)
+                return false;
+              else
+                xfree (line);
+            }
         }
 
       /* Safe even if %.*s bogusly expects terminating \0 because

=== modified file 'src/retr.c'
--- src/retr.c	2011-11-04 21:25:00 +0000
+++ src/retr.c	2012-01-26 21:30:19 +0000
@@ -307,11 +307,16 @@
                 }
 
               remaining_chunk_size = strtol (line, &endl, 16);
+              xfree (line);
+
               if (remaining_chunk_size == 0)
                 {
                   ret = 0;
-                  if (fd_read_line (fd) == NULL)
+                  line = fd_read_line (fd);
+                  if (line == NULL)
                     ret = -1;
+                  else
+                    xfree (line);
                   break;
                 }
             }
@@ -371,11 +376,16 @@
             {
               remaining_chunk_size -= ret;
               if (remaining_chunk_size == 0)
-                if (fd_read_line (fd) == NULL)
-                  {
-                    ret = -1;
-                    break;
-                  }
+                {
+                  char *line = fd_read_line (fd);
+                  if (line == NULL)
+                    {
+                      ret = -1;
+                      break;
+                    }
+                  else
+                    xfree (line);
+                }
             }
         }
 


=== modified file 'src/ChangeLog'
--- src/ChangeLog	2012-01-26 21:30:19 +0000
+++ src/ChangeLog	2012-01-26 21:56:27 +0000
@@ -1,3 +1,9 @@
+2012-01-27  Gijs van Tulder  <gvtul...@gmail.com>
+
+	* retr.c (fd_read_body): If the response is chunked, the chunk
+	headers are now written to the WARC file, making the WARC file
+	an exact copy of the HTTP response.
+
 2012-01-27  Gijs van Tulder  <gvtul...@gmail.com>
 
 	* retr.c (fd_read_body): Fix a memory leak with chunked responses.
 	* http.c (skip_short_body): Fix the same memory leak.

=== modified file 'src/retr.c'
--- src/retr.c	2012-01-26 21:30:19 +0000
+++ src/retr.c	2012-01-26 21:56:27 +0000
@@ -213,6 +213,9 @@
    the data is stored to ELAPSED.
 
    If OUT2 is non-NULL, the contents is also written to OUT2.
+   OUT2 will get an exact copy of the response: if this is a chunked
+   response, everything -- including the chunk headers -- is written
+   to OUT2.  (OUT will only get the unchunked response.)
 
    The function exits and returns the amount of data read.  In case of
    error while reading data, -1 is returned.  In case of error while
@@ -305,6 +308,8 @@
                   ret = -1;
                   break;
                 }
+              else if (out2 != NULL)
+                fwrite (line, 1, strlen (line), out2);
 
               remaining_chunk_size = strtol (line, &endl, 16);
               xfree (line);
@@ -316,7 +321,11 @@
                   if (line == NULL)
                     ret = -1;
                   else
-                    xfree (line);
+                    {
+                      if (out2 != NULL)
+                        fwrite (line, 1, strlen (line), out2);
+                      xfree (line);
+                    }
                   break;
                 }
             }
@@ -384,7 +393,11 @@
                       break;
                     }
                   else
-                    xfree (line);
+                    {
+                      if (out2 != NULL)
+                        fwrite (line, 1, strlen (line), out2);
+                      xfree (line);
+                    }
                 }
             }
         }


Reply via email to