Author: kotkov
Date: Sat Sep 14 16:45:47 2019
New Revision: 1866950

URL: http://svn.apache.org/viewvc?rev=1866950&view=rev
Log:
Fix an issue with the readline implementation for file streams that could
cause excessive memory usage for inputs containing one or multiple \0 bytes.

This is the likely cause of the OOM reported in
  
https://lists.apache.org/thread.html/c96eb5618ac0bf6e083345e0fdcdcf834e30913f26eabe6ada7bab62@%3Cusers.subversion.apache.org%3E
(I think that the problem itself is a regression introduced by me in 1.10.)

Note: one thing I noticed while preparing the fix is that our `readline_fn`
functions for different streams have inconsistent behavior if the input data
contains \0 bytes.  More specifically, they may return different `line` values,
that may either be truncated at \0 or actually contain the whole data between
EOLs, including \0 bytes.  For now, this patch only fixes the excessive memory
usage problem, and I noted this related problem in the test and left it for
future work.

* subversion/libsvn_subr/stream.c
  (readline_apr_lf, readline_apr_generic): Reallocate the buffer based on its
   current size, instead of calculating the new size based on the already
   prealloc'd size.  There are no actual benefits in reallocating based on
   `blocksize`, and in the described case with \0 bytes doing so also backfires
   and may cause excessive allocations due to the actual size of the string
   being less than we expect it to.  A degenerate case of the erroneous
   behavior is ...

* subversion/tests/libsvn_subr/stream-test.c
  (test_stream_readline_file_nul): ...exploited in this new test.
  (test_funcs): Run new test.

* subversion/tests/libsvn_subr
  (): Adjust svn:ignore.

Modified:
    subversion/trunk/subversion/libsvn_subr/stream.c
    subversion/trunk/subversion/tests/libsvn_subr/   (props changed)
    subversion/trunk/subversion/tests/libsvn_subr/stream-test.c

Modified: subversion/trunk/subversion/libsvn_subr/stream.c
URL: 
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/stream.c?rev=1866950&r1=1866949&r2=1866950&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/stream.c (original)
+++ subversion/trunk/subversion/libsvn_subr/stream.c Sat Sep 14 16:45:47 2019
@@ -922,7 +922,7 @@ readline_apr_lf(apr_file_t *file,
       }
 
     /* Otherwise, prepare to read the next chunk. */
-    svn_stringbuf_ensure(buf, buf->blocksize + SVN__LINE_CHUNK_SIZE);
+    svn_stringbuf_ensure(buf, buf->len + SVN__LINE_CHUNK_SIZE);
   }
 }
 
@@ -982,7 +982,7 @@ readline_apr_generic(apr_file_t *file,
         }
 
       /* Prepare to read the next chunk. */
-      svn_stringbuf_ensure(buf, buf->blocksize + SVN__LINE_CHUNK_SIZE);
+      svn_stringbuf_ensure(buf, buf->len + SVN__LINE_CHUNK_SIZE);
     }
 }
 

Propchange: subversion/trunk/subversion/tests/libsvn_subr/
------------------------------------------------------------------------------
--- svn:ignore (original)
+++ svn:ignore Sat Sep 14 16:45:47 2019
@@ -59,3 +59,4 @@ test_apr_trunc_workaround
 save-cleartext
 test_stream_readline_file_crlf
 test_stream_readline_file_lf
+test_stream_readline_file_nul

Modified: subversion/trunk/subversion/tests/libsvn_subr/stream-test.c
URL: 
http://svn.apache.org/viewvc/subversion/trunk/subversion/tests/libsvn_subr/stream-test.c?rev=1866950&r1=1866949&r2=1866950&view=diff
==============================================================================
--- subversion/trunk/subversion/tests/libsvn_subr/stream-test.c (original)
+++ subversion/trunk/subversion/tests/libsvn_subr/stream-test.c Sat Sep 14 
16:45:47 2019
@@ -1000,6 +1000,71 @@ test_stream_readline_file_crlf(apr_pool_
   return SVN_NO_ERROR;
 }
 
+static svn_error_t *
+test_stream_readline_file_nul(apr_pool_t *pool)
+{
+  /* Test is written based on the problem report in
+       
https://lists.apache.org/thread.html/c96eb5618ac0bf6e083345e0fdcdcf834e30913f26eabe6ada7bab62@%3Cusers.subversion.apache.org%3E
+     where the user had an OOM when calling `svndumpfilter` with
+     a (most likely) invalid dump containing nul bytes.
+   */
+  const char data[] =
+    {
+      'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n',
+      'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n',
+      'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n',
+      'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n',
+      'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n',
+      'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n',
+      'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n',
+      'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n',
+      'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n',
+      'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n',
+      'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n',
+      'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n',
+      'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n',
+      'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n', 'a', '\0', '\n'
+    };
+  const char *tmp_dir;
+  const char *tmp_file;
+  svn_stream_t *stream;
+
+  SVN_ERR(svn_dirent_get_absolute(&tmp_dir, "test_stream_readline_file_nul", 
pool));
+  SVN_ERR(svn_io_remove_dir2(tmp_dir, TRUE, NULL, NULL, pool));
+  SVN_ERR(svn_io_make_dir_recursively(tmp_dir, pool));
+  svn_test_add_dir_cleanup(tmp_dir);
+
+  tmp_file = svn_dirent_join(tmp_dir, "file", pool);
+  SVN_ERR(svn_io_file_create_bytes(tmp_file, data, sizeof(data), pool));
+  SVN_ERR(svn_stream_open_readonly(&stream, tmp_file, pool, pool));
+
+  while (1)
+    {
+      svn_boolean_t eof;
+      svn_stringbuf_t *line;
+
+      SVN_ERR(svn_stream_readline(stream, &line, "\n", &eof, pool));
+      if (eof)
+        break;
+
+      /* Don't check the contents of the `line` here, at least for now.
+
+         In other words, we just test that this case does not crash or cause
+         unexpected errors.  The reason is that currently our `readline_fn`
+         implementations have inconsistent behavior when dealing with \0 bytes,
+         handling those differently, either in strchr() or memchr() styles.
+
+         Once we make them consistent (or even decide to bail out with an error
+         for \0), this part of the test should start properly checking `data`;
+         maybe also for non-file streams.
+       */
+    }
+
+  SVN_ERR(svn_stream_close(stream));
+
+  return SVN_NO_ERROR;
+}
+
 /* The test table.  */
 
 static int max_threads = 1;
@@ -1037,6 +1102,8 @@ static struct svn_test_descriptor_t test
                    "test reading LF-terminated lines from file"),
     SVN_TEST_PASS2(test_stream_readline_file_crlf,
                    "test reading CRLF-terminated lines from file"),
+    SVN_TEST_PASS2(test_stream_readline_file_nul,
+                   "test reading line from file with nul bytes"),
     SVN_TEST_NULL
   };
 


Reply via email to