Author: stefan2
Date: Sun Apr 13 04:40:40 2014
New Revision: 1586922
URL: http://svn.apache.org/r1586922
Log:
Speed up file / stream comparison, i.e. minimize the processing overhead
for finding the first mismatch.
The approach is two-sided. Instead of fetching SVN__STREAM_CHUNK_SIZE
from all sources before comparing data, we start with a much lower 4kB
and increase the chunk size until we reach SVN__STREAM_CHUNK_SIZE while
making sure that all reads are naturally aligned. So, we quickly find
mismatches near the beginning of the file.
On the other end side, we bump the SVN__STREAM_CHUNK_SIZE to 64kB which
gives better throughput for longer distances to the first mismatch -
without causing ill effects in APR's memory management.
* subversion/include/svn_types.h
(SVN__STREAM_CHUNK_SIZE): Bump to 64k and add some documentation on
the general restrictions for future changes.
* subversion/include/private/svn_io_private.h
(svn_io__next_chunk_size): New utility function generating the new
read block size sequence.
* subversion/libsvn_subr/io.c
(svn_io__next_chunk_size): Implement.
(contents_identical_p,
contents_three_identical_p): Let the new utility determine the read
block size.
* subversion/libsvn_subr/stream.c
(svn_stream_contents_same2): Ditto.
Modified:
subversion/trunk/subversion/include/private/svn_io_private.h
subversion/trunk/subversion/include/svn_types.h
subversion/trunk/subversion/libsvn_subr/io.c
subversion/trunk/subversion/libsvn_subr/stream.c
Modified: subversion/trunk/subversion/include/private/svn_io_private.h
URL:
http://svn.apache.org/viewvc/subversion/trunk/subversion/include/private/svn_io_private.h?rev=1586922&r1=1586921&r2=1586922&view=diff
==============================================================================
--- subversion/trunk/subversion/include/private/svn_io_private.h (original)
+++ subversion/trunk/subversion/include/private/svn_io_private.h Sun Apr 13
04:40:40 2014
@@ -72,6 +72,14 @@ svn_io__is_finfo_read_only(svn_boolean_t
apr_finfo_t *file_info,
apr_pool_t *pool);
+/** Given that @a total_read bytes have already been read from a file or
+ * stream, return a suggestion for the size of the next block to process.
+ * This value will be <= #SVN__STREAM_CHUNK_SIZE.
+ *
+ * @since New in 1.9.
+ */
+apr_size_t
+svn_io__next_chunk_size(apr_off_t total_read);
/** Buffer test handler function for a generic stream. @see svn_stream_t
* and svn_stream__is_buffered().
Modified: subversion/trunk/subversion/include/svn_types.h
URL:
http://svn.apache.org/viewvc/subversion/trunk/subversion/include/svn_types.h?rev=1586922&r1=1586921&r2=1586922&view=diff
==============================================================================
--- subversion/trunk/subversion/include/svn_types.h (original)
+++ subversion/trunk/subversion/include/svn_types.h Sun Apr 13 04:40:40 2014
@@ -1142,8 +1142,12 @@ typedef svn_error_t *(*svn_commit_callba
*
* NOTE: This is an internal macro, put here for convenience.
* No public API may depend on the particular value of this macro.
+ *
+ * NOTE: The implementation assumes that this is a power of two >= 4k.
+ * Moreover, it should be less than 80kB to prevent memory
+ * fragmentation in the APR memory allocator.
*/
-#define SVN__STREAM_CHUNK_SIZE 16384
+#define SVN__STREAM_CHUNK_SIZE 0x10000
#endif
/** The maximum amount we can ever hold in memory. */
Modified: subversion/trunk/subversion/libsvn_subr/io.c
URL:
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/io.c?rev=1586922&r1=1586921&r2=1586922&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/io.c (original)
+++ subversion/trunk/subversion/libsvn_subr/io.c Sun Apr 13 04:40:40 2014
@@ -63,6 +63,7 @@
#include "svn_config.h"
#include "svn_private_config.h"
#include "svn_ctype.h"
+#include "svn_sorts.h"
#include "private/svn_atomic.h"
#include "private/svn_io_private.h"
@@ -4501,6 +4502,17 @@ svn_io_read_version_file(int *version,
}
+apr_size_t
+svn_io__next_chunk_size(apr_off_t total_read)
+{
+ /* Started with total_read===, this will generate a sequence ensuring
+ aligned access with increasing block size up to SVN__STREAM_CHUNK_SIZE:
+ 4k@ offset 0, 4k@ offset 4k, 8k@ offset 8k, 16k@ offset 16k etc.
+ */
+ return total_read ? (apr_size_t)MIN(total_read, SVN__STREAM_CHUNK_SIZE)
+ : (apr_size_t)4096;
+}
+
/* Do a byte-for-byte comparison of FILE1 and FILE2. */
static svn_error_t *
@@ -4517,6 +4529,7 @@ contents_identical_p(svn_boolean_t *iden
apr_file_t *file2_h;
svn_boolean_t eof1 = FALSE;
svn_boolean_t eof2 = FALSE;
+ apr_off_t total_read = 0;
SVN_ERR(svn_io_file_open(&file1_h, file1, APR_READ, APR_OS_DEFAULT,
pool));
@@ -4532,14 +4545,17 @@ contents_identical_p(svn_boolean_t *iden
*identical_p = TRUE; /* assume TRUE, until disproved below */
while (!err && !eof1 && !eof2)
{
+ apr_size_t to_read = svn_io__next_chunk_size(total_read);
+ total_read += to_read;
+
err = svn_io_file_read_full2(file1_h, buf1,
- SVN__STREAM_CHUNK_SIZE, &bytes_read1,
+ to_read, &bytes_read1,
&eof1, pool);
if (err)
break;
err = svn_io_file_read_full2(file2_h, buf2,
- SVN__STREAM_CHUNK_SIZE, &bytes_read2,
+ to_read, &bytes_read2,
&eof2, pool);
if (err)
break;
@@ -4585,6 +4601,7 @@ contents_three_identical_p(svn_boolean_t
svn_boolean_t eof1 = FALSE;
svn_boolean_t eof2 = FALSE;
svn_boolean_t eof3 = FALSE;
+ apr_off_t total_read = 0;
SVN_ERR(svn_io_file_open(&file1_h, file1, APR_READ, APR_OS_DEFAULT,
scratch_pool));
@@ -4621,6 +4638,9 @@ contents_three_identical_p(svn_boolean_t
apr_size_t bytes_read1, bytes_read2, bytes_read3;
svn_boolean_t read_1, read_2, read_3;
+ apr_size_t to_read = svn_io__next_chunk_size(total_read);
+ total_read += to_read;
+
read_1 = read_2 = read_3 = FALSE;
/* As long as a file is not at the end yet, and it is still
@@ -4628,8 +4648,8 @@ contents_three_identical_p(svn_boolean_t
if (!eof1 && (*identical_p12 || *identical_p13))
{
err = svn_io_file_read_full2(file1_h, buf1,
- SVN__STREAM_CHUNK_SIZE, &bytes_read1,
- &eof1, scratch_pool);
+ to_read, &bytes_read1,
+ &eof1, scratch_pool);
if (err)
break;
read_1 = TRUE;
@@ -4638,8 +4658,8 @@ contents_three_identical_p(svn_boolean_t
if (!eof2 && (*identical_p12 || *identical_p23))
{
err = svn_io_file_read_full2(file2_h, buf2,
- SVN__STREAM_CHUNK_SIZE, &bytes_read2,
- &eof2, scratch_pool);
+ to_read, &bytes_read2,
+ &eof2, scratch_pool);
if (err)
break;
read_2 = TRUE;
@@ -4648,8 +4668,8 @@ contents_three_identical_p(svn_boolean_t
if (!eof3 && (*identical_p13 || *identical_p23))
{
err = svn_io_file_read_full2(file3_h, buf3,
- SVN__STREAM_CHUNK_SIZE, &bytes_read3,
- &eof3, scratch_pool);
+ to_read, &bytes_read3,
+ &eof3, scratch_pool);
if (err)
break;
read_3 = TRUE;
Modified: subversion/trunk/subversion/libsvn_subr/stream.c
URL:
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/stream.c?rev=1586922&r1=1586921&r2=1586922&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/stream.c (original)
+++ subversion/trunk/subversion/libsvn_subr/stream.c Sun Apr 13 04:40:40 2014
@@ -585,14 +585,20 @@ svn_stream_contents_same2(svn_boolean_t
{
char *buf1 = apr_palloc(pool, SVN__STREAM_CHUNK_SIZE);
char *buf2 = apr_palloc(pool, SVN__STREAM_CHUNK_SIZE);
- apr_size_t bytes_read1 = SVN__STREAM_CHUNK_SIZE;
- apr_size_t bytes_read2 = SVN__STREAM_CHUNK_SIZE;
+ apr_size_t to_read = 0;
+ apr_size_t bytes_read1 = 0;
+ apr_size_t bytes_read2 = 0;
+ apr_off_t total_read = 0;
svn_error_t *err = NULL;
*same = TRUE; /* assume TRUE, until disproved below */
- while (bytes_read1 == SVN__STREAM_CHUNK_SIZE
- && bytes_read2 == SVN__STREAM_CHUNK_SIZE)
+ while (bytes_read1 == to_read && bytes_read2 == to_read)
{
+ to_read = svn_io__next_chunk_size(total_read);
+ bytes_read1 = to_read;
+ bytes_read2 = to_read;
+ total_read += to_read;
+
err = svn_stream_read_full(stream1, buf1, &bytes_read1);
if (err)
break;