Author: stefan2
Date: Sun Apr 13 04:40:40 2014
New Revision: 1586922

URL: http://svn.apache.org/r1586922
Log:
Speed up file / stream comparison, i.e. minimize the processing overhead
for finding the first mismatch.

The approach is two-sided.  Instead of fetching SVN__STREAM_CHUNK_SIZE
from all sources before comparing data, we start with a much lower 4kB
and increase the chunk size until we reach SVN__STREAM_CHUNK_SIZE while
making sure that all reads are naturally aligned.  So, we quickly find
mismatches near the beginning of the file.

On the other end side, we bump the SVN__STREAM_CHUNK_SIZE to 64kB which
gives better throughput for longer distances to the first mismatch -
without causing ill effects in APR's memory management.

* subversion/include/svn_types.h
  (SVN__STREAM_CHUNK_SIZE): Bump to 64k and add some documentation on
                            the general restrictions for future changes.

* subversion/include/private/svn_io_private.h
  (svn_io__next_chunk_size): New utility function generating the new
                             read block size sequence.

* subversion/libsvn_subr/io.c
  (svn_io__next_chunk_size): Implement.
  (contents_identical_p,
   contents_three_identical_p): Let the new utility determine the read
                                block size.

* subversion/libsvn_subr/stream.c
  (svn_stream_contents_same2): Ditto.

Modified:
    subversion/trunk/subversion/include/private/svn_io_private.h
    subversion/trunk/subversion/include/svn_types.h
    subversion/trunk/subversion/libsvn_subr/io.c
    subversion/trunk/subversion/libsvn_subr/stream.c

Modified: subversion/trunk/subversion/include/private/svn_io_private.h
URL: 
http://svn.apache.org/viewvc/subversion/trunk/subversion/include/private/svn_io_private.h?rev=1586922&r1=1586921&r2=1586922&view=diff
==============================================================================
--- subversion/trunk/subversion/include/private/svn_io_private.h (original)
+++ subversion/trunk/subversion/include/private/svn_io_private.h Sun Apr 13 
04:40:40 2014
@@ -72,6 +72,14 @@ svn_io__is_finfo_read_only(svn_boolean_t
                            apr_finfo_t *file_info,
                            apr_pool_t *pool);
 
+/** Given that @a total_read bytes have already been read from a file or
+ * stream, return a suggestion for the size of the next block to process.
+ * This value will be <= #SVN__STREAM_CHUNK_SIZE.
+ *
+ * @since New in 1.9.
+ */
+apr_size_t
+svn_io__next_chunk_size(apr_off_t total_read);
 
 /** Buffer test handler function for a generic stream. @see svn_stream_t
  * and svn_stream__is_buffered().

Modified: subversion/trunk/subversion/include/svn_types.h
URL: 
http://svn.apache.org/viewvc/subversion/trunk/subversion/include/svn_types.h?rev=1586922&r1=1586921&r2=1586922&view=diff
==============================================================================
--- subversion/trunk/subversion/include/svn_types.h (original)
+++ subversion/trunk/subversion/include/svn_types.h Sun Apr 13 04:40:40 2014
@@ -1142,8 +1142,12 @@ typedef svn_error_t *(*svn_commit_callba
  *
  * NOTE: This is an internal macro, put here for convenience.
  * No public API may depend on the particular value of this macro.
+ *
+ * NOTE: The implementation assumes that this is a power of two >= 4k.
+ *       Moreover, it should be less than 80kB to prevent memory
+ *       fragmentation in the APR memory allocator.
  */
-#define SVN__STREAM_CHUNK_SIZE 16384
+#define SVN__STREAM_CHUNK_SIZE 0x10000
 #endif
 
 /** The maximum amount we can ever hold in memory. */

Modified: subversion/trunk/subversion/libsvn_subr/io.c
URL: 
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/io.c?rev=1586922&r1=1586921&r2=1586922&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/io.c (original)
+++ subversion/trunk/subversion/libsvn_subr/io.c Sun Apr 13 04:40:40 2014
@@ -63,6 +63,7 @@
 #include "svn_config.h"
 #include "svn_private_config.h"
 #include "svn_ctype.h"
+#include "svn_sorts.h"
 
 #include "private/svn_atomic.h"
 #include "private/svn_io_private.h"
@@ -4501,6 +4502,17 @@ svn_io_read_version_file(int *version,
 }
 
 
+apr_size_t
+svn_io__next_chunk_size(apr_off_t total_read)
+{
+  /* Started with total_read===, this will generate a sequence ensuring
+     aligned access with increasing block size up to SVN__STREAM_CHUNK_SIZE:
+     4k@ offset 0, 4k@ offset 4k, 8k@ offset 8k, 16k@ offset 16k etc.
+     */
+  return total_read ? (apr_size_t)MIN(total_read, SVN__STREAM_CHUNK_SIZE)
+                    : (apr_size_t)4096;
+}
+
 
 /* Do a byte-for-byte comparison of FILE1 and FILE2. */
 static svn_error_t *
@@ -4517,6 +4529,7 @@ contents_identical_p(svn_boolean_t *iden
   apr_file_t *file2_h;
   svn_boolean_t eof1 = FALSE;
   svn_boolean_t eof2 = FALSE;
+  apr_off_t total_read = 0;
 
   SVN_ERR(svn_io_file_open(&file1_h, file1, APR_READ, APR_OS_DEFAULT,
                            pool));
@@ -4532,14 +4545,17 @@ contents_identical_p(svn_boolean_t *iden
   *identical_p = TRUE;  /* assume TRUE, until disproved below */
   while (!err && !eof1 && !eof2)
     {
+      apr_size_t to_read = svn_io__next_chunk_size(total_read);
+      total_read += to_read;
+
       err = svn_io_file_read_full2(file1_h, buf1,
-                                   SVN__STREAM_CHUNK_SIZE, &bytes_read1,
+                                   to_read, &bytes_read1,
                                    &eof1, pool);
       if (err)
           break;
 
       err = svn_io_file_read_full2(file2_h, buf2,
-                                   SVN__STREAM_CHUNK_SIZE, &bytes_read2,
+                                   to_read, &bytes_read2,
                                    &eof2, pool);
       if (err)
           break;
@@ -4585,6 +4601,7 @@ contents_three_identical_p(svn_boolean_t
   svn_boolean_t eof1 = FALSE;
   svn_boolean_t eof2 = FALSE;
   svn_boolean_t eof3 = FALSE;
+  apr_off_t total_read = 0;
 
   SVN_ERR(svn_io_file_open(&file1_h, file1, APR_READ, APR_OS_DEFAULT,
                            scratch_pool));
@@ -4621,6 +4638,9 @@ contents_three_identical_p(svn_boolean_t
       apr_size_t bytes_read1, bytes_read2, bytes_read3;
       svn_boolean_t read_1, read_2, read_3;
 
+      apr_size_t to_read = svn_io__next_chunk_size(total_read);
+      total_read += to_read;
+
       read_1 = read_2 = read_3 = FALSE;
 
       /* As long as a file is not at the end yet, and it is still
@@ -4628,8 +4648,8 @@ contents_three_identical_p(svn_boolean_t
       if (!eof1 && (*identical_p12 || *identical_p13))
         {
           err = svn_io_file_read_full2(file1_h, buf1,
-                                   SVN__STREAM_CHUNK_SIZE, &bytes_read1,
-                                   &eof1, scratch_pool);
+                                       to_read, &bytes_read1,
+                                       &eof1, scratch_pool);
           if (err)
               break;
           read_1 = TRUE;
@@ -4638,8 +4658,8 @@ contents_three_identical_p(svn_boolean_t
       if (!eof2 && (*identical_p12 || *identical_p23))
         {
           err = svn_io_file_read_full2(file2_h, buf2,
-                                   SVN__STREAM_CHUNK_SIZE, &bytes_read2,
-                                   &eof2, scratch_pool);
+                                       to_read, &bytes_read2,
+                                       &eof2, scratch_pool);
           if (err)
               break;
           read_2 = TRUE;
@@ -4648,8 +4668,8 @@ contents_three_identical_p(svn_boolean_t
       if (!eof3 && (*identical_p13 || *identical_p23))
         {
           err = svn_io_file_read_full2(file3_h, buf3,
-                                   SVN__STREAM_CHUNK_SIZE, &bytes_read3,
-                                   &eof3, scratch_pool);
+                                       to_read, &bytes_read3,
+                                       &eof3, scratch_pool);
           if (err)
               break;
           read_3 = TRUE;

Modified: subversion/trunk/subversion/libsvn_subr/stream.c
URL: 
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/stream.c?rev=1586922&r1=1586921&r2=1586922&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/stream.c (original)
+++ subversion/trunk/subversion/libsvn_subr/stream.c Sun Apr 13 04:40:40 2014
@@ -585,14 +585,20 @@ svn_stream_contents_same2(svn_boolean_t 
 {
   char *buf1 = apr_palloc(pool, SVN__STREAM_CHUNK_SIZE);
   char *buf2 = apr_palloc(pool, SVN__STREAM_CHUNK_SIZE);
-  apr_size_t bytes_read1 = SVN__STREAM_CHUNK_SIZE;
-  apr_size_t bytes_read2 = SVN__STREAM_CHUNK_SIZE;
+  apr_size_t to_read = 0;
+  apr_size_t bytes_read1 = 0;
+  apr_size_t bytes_read2 = 0;
+  apr_off_t total_read = 0;
   svn_error_t *err = NULL;
 
   *same = TRUE;  /* assume TRUE, until disproved below */
-  while (bytes_read1 == SVN__STREAM_CHUNK_SIZE
-         && bytes_read2 == SVN__STREAM_CHUNK_SIZE)
+  while (bytes_read1 == to_read && bytes_read2 == to_read)
     {
+      to_read = svn_io__next_chunk_size(total_read);
+      bytes_read1 = to_read;
+      bytes_read2 = to_read;
+      total_read += to_read;
+
       err = svn_stream_read_full(stream1, buf1, &bytes_read1);
       if (err)
         break;


Reply via email to