Hi devs,

this is part of the delta_files() speedup patch series:
http://svn.haxx.se/dev/archive-2010-03/0604.shtml

Under Windows, a significant part of the overall runtime
is spent in writing data to the (already buffered) APR
file implementation. It seems that the mutex serializing
the buffer access in apr_file_write is expensive. 

Also, >50% of all write requests are 2 bytes or smaller
(i.e. line endings and empty lines). For them, the deep
call hierarchy constitutes a large overhead on register-
lacking x86.

This patch eliminates far over 90% of all write requests
bringing the portion of time spent in _svn_io_file_write_full
down from about 7 to below 3 percent on 32 bit Windows.

Performance gain is ~1% under Linux but due to the
larger async I/O and mutex overhead it is about 4%
under Windows:

s~$ time ~/1.7-928181/svn export --ignore-externals -q $REPO/trunk /dev/shm/t
real    0m3.727s
user    0m3.189s
sys     0m0.542s

~$ time ~/1.7-patched/svn export --ignore-externals -q $REPO/trunk /dev/shm/t
real    0m3.697s
user    0m3.148s
sys     0m0.558s

-- Stefan^2.

[[[
Buffer small write requests to APR file streams. For details see
http:// ...

* subversion/libsvn_subr/stream.c
  (baton_apr): add write buffer members
  (flush_buffer_apr): new function
  (read_handler_apr): auto-allocate write buffer and re-direct
   small writes there.
  (read_handler_apr, close_handler_apr, mark_handler_apr,
   seek_handler_apr): flush write buffer before calling i/o functions.

patch by stefanfuhrmann < at > alice-dsl.de
]]]

Index: subversion/libsvn_subr/stream.c
===================================================================
--- subversion/libsvn_subr/stream.c	(revision 928181)
+++ subversion/libsvn_subr/stream.c	(working copy)
@@ -667,15 +667,45 @@
    * When either of these is negative, no range has been specified. */
   apr_off_t start;
   apr_off_t end;
+
+  /* Collect short write requests here. You must clear this
+   * buffer before any other APR i/o call for this file.
+   * Initially NULL, will be auto-allocated just before use. */
+  char *write_buffer;
+  apr_size_t buffered;
 };
 
+/* Ensure that no data is left in the write buffer so that
+   the file contents is actually up-to-date and following 
+   APR i/o calls will return the expected data.
+ */
+static svn_error_t *
+flush_buffer_apr(struct baton_apr *btn)
+{
+  if (btn->buffered > 0)
+    {
+      apr_size_t written = 0;
+      SVN_ERR(svn_io_file_write_full(btn->file, 
+                                     btn->write_buffer, 
+                                     btn->buffered, 
+                                     &written, 
+                                     btn->pool));
+      btn->buffered -= written;
+    }
 
+  if (btn->buffered > 0)
+    return svn_error_create(SVN_ERR_IO_WRITE_ERROR, NULL, NULL);
+
+  return SVN_NO_ERROR;
+}
+
 static svn_error_t *
 read_handler_apr(void *baton, char *buffer, apr_size_t *len)
 {
   struct baton_apr *btn = baton;
   svn_error_t *err;
 
+  SVN_ERR(flush_buffer_apr(btn));
   err = svn_io_file_read_full(btn->file, buffer, *len, len, btn->pool);
   if (err && APR_STATUS_IS_EOF(err->apr_err))
     {
@@ -686,19 +716,62 @@
   return err;
 }
 
+/* typical code lines should get into the buffer */
+#define BUFFER_THRESHOLD (128)
+#define BUFFER_SIZE (8*BUFFER_THRESHOLD)
+
 static svn_error_t *
 write_handler_apr(void *baton, const char *data, apr_size_t *len)
 {
   struct baton_apr *btn = baton;
+  apr_size_t temp = *len;
 
+  /* better safe than sorry ... */
+  if (temp == 0)
+    return SVN_NO_ERROR;
+
+  /* Buffer short requests here as the underlying APR implementation
+   * needs to employ mutexes etc. When processing source files, 
+   * virtually all write requests get buffered here. */
+  if (temp < BUFFER_THRESHOLD)
+    {
+      if (btn->write_buffer == NULL)
+        btn->write_buffer = apr_palloc(btn->pool,BUFFER_SIZE);
+
+      if (btn->buffered + temp > BUFFER_SIZE)
+        SVN_ERR(flush_buffer_apr(btn));
+
+      /* In some cases, line endings account for 50+% of all 
+         write requests. Make them as cheap as possible. */
+      if (temp > 2)
+        memcpy(btn->write_buffer + btn->buffered, data, temp);
+      else
+        {
+          char* dest = btn->write_buffer + btn->buffered;
+          dest[0] = data[0];
+
+          if (temp == 2)
+            dest[1] = data[1];
+        }
+
+      btn->buffered += temp;
+      return SVN_NO_ERROR;
+    }
+
+  SVN_ERR(flush_buffer_apr(btn));
   return svn_io_file_write_full(btn->file, data, *len, len, btn->pool);
 }
 
+/* pre-processor cleanup - not strictly necessary */
+#undef BUFFER_THRESHOLD
+#undef BUFFER_SIZE
+
 static svn_error_t *
 close_handler_apr(void *baton)
 {
   struct baton_apr *btn = baton;
 
+  SVN_ERR(flush_buffer_apr(btn));
   return svn_io_file_close(btn->file, btn->pool);
 }
 
@@ -721,6 +794,7 @@
   struct baton_apr *btn = baton;
   (*mark) = apr_palloc(pool, sizeof(**mark));
   (*mark)->m.apr = 0;
+  SVN_ERR(flush_buffer_apr(btn));
   SVN_ERR(svn_io_file_seek(btn->file, APR_CUR, &(*mark)->m.apr, btn->pool));
   return SVN_NO_ERROR;
 }
@@ -729,6 +803,7 @@
 seek_handler_apr(void *baton, svn_stream_mark_t *mark)
 {
   struct baton_apr *btn = baton;
+  SVN_ERR(flush_buffer_apr(btn));
   SVN_ERR(svn_io_file_seek(btn->file, APR_SET, &mark->m.apr, btn->pool));
   return SVN_NO_ERROR;
 }
@@ -804,6 +879,8 @@
   baton->pool = pool;
   baton->start = -1;
   baton->end = -1;
+  baton->write_buffer = NULL;
+  baton->buffered = 0;
   stream = svn_stream_create(baton, pool);
   svn_stream_set_read(stream, read_handler_apr);
   svn_stream_set_write(stream, write_handler_apr);

Reply via email to