Hi devs,

further reducing my backlog of patches sitting in my
working copy, this and the next patch optimize code
locally - shaving off cycles here and there. The net
effect is somewhere between 3 and 10 percent
for repository access (ls, export, etc.).

In this patch, I eliminated calls to memcpy for small
copies as they are particularly expensive in the MS CRT.

-- Stefan^2.

[[[
Eliminate memcpy from critical paths during reading
data from the repository.

* subversion/libsvn_delta/text_delta.c
 (svn_txdelta_apply_instructions): replace memcpy
 for small amounts of data; optimize overlapping
 copies; optimize 'buffer full' detection

* subversion/libsvn_subr/svn_string.c
 (svn_stringbuf_appendbytes): replace memcpy
 with specialized code when adding single chars.
]]]

Index: subversion/libsvn_delta/text_delta.c
===================================================================
--- subversion/libsvn_delta/text_delta.c        (revision 937673)
+++ subversion/libsvn_delta/text_delta.c        (working copy)
@@ -32,6 +32,7 @@
 #include "svn_io.h"
 #include "svn_pools.h"
 #include "svn_checksum.h"
+#include "svn_private_config.h"
 
 #include "delta.h"
 
@@ -570,23 +597,38 @@
                                const char *sbuf, char *tbuf,
                                apr_size_t *tlen)
 {
-  const svn_txdelta_op_t *op;
-  apr_size_t i, j, tpos = 0;
+  const svn_txdelta_op_t *op, *last_op = window->ops + window->num_ops;
+  apr_size_t to_fill = *tlen > window->tview_len ? window->tview_len : *tlen;
+  apr_size_t left = to_fill;
+  const char* end, *source;
+  char *target = tbuf;
 
-  for (op = window->ops; op < window->ops + window->num_ops; op++)
+  for (op = window->ops; left > 0; op++)
     {
-      const apr_size_t buf_len = (op->length < *tlen - tpos
-                                  ? op->length : *tlen - tpos);
+      const apr_size_t buf_len = op->length > left ? left : op->length;
+      left -= buf_len;
 
       /* Check some invariants common to all instructions.  */
-      assert(tpos + op->length <= window->tview_len);
+      assert(target - tbuf + op->length <= window->tview_len);
 
       switch (op->action_code)
         {
         case svn_txdelta_source:
           /* Copy from source area.  */
           assert(op->offset + op->length <= window->sview_len);
-          memcpy(tbuf + tpos, sbuf + op->offset, buf_len);
+          if (buf_len > 7)
+            {
+              memcpy(target, sbuf + op->offset, buf_len);
+              target += buf_len;
+            }
+          else
+            {
+              /* memcpy is not exactly fast for small block sizes.
+                 Since they are common, let's run optimized code for them. */
+              end = sbuf + op->offset + buf_len;
+              for (source = sbuf + op->offset; source != end; source++)
+                *(target++) = *source;
+            }
           break;
 
         case svn_txdelta_target:
@@ -594,31 +636,46 @@
              semantics aren't guaranteed for overlapping memory areas,
              and target copies are allowed to overlap to generate
              repeated data.  */
-          assert(op->offset < tpos);
-          for (i = op->offset, j = tpos; i < op->offset + buf_len; i++)
-            tbuf[j++] = tbuf[i];
+          
+          assert(op->offset < target - *tbuf);
+          source = tbuf + op->offset;
+          end = tbuf + op->offset + buf_len;
+
+          if (end <= target)
+            for (; source + sizeof (unsigned) <= end; 
+                   source += sizeof (unsigned), target += sizeof (unsigned))
+              *(unsigned*)(target) = *(unsigned*)(source);
+
+          for (; source != end; source++)
+            *(target++) = *source;
           break;
 
         case svn_txdelta_new:
           /* Copy from window new area.  */
           assert(op->offset + op->length <= window->new_data->len);
-          memcpy(tbuf + tpos,
-                 window->new_data->data + op->offset,
-                 buf_len);
+          if (buf_len > 7)
+            {
+              memcpy(target,
+                     window->new_data->data + op->offset,
+                     buf_len);
+              target += buf_len;
+            }
+          else
+            {
+              /* memcpy is not exactly fast for small block sizes.
+                 Since they are common, let's run optimized code for them. */
+              end = window->new_data->data + op->offset + buf_len;
+              for (source = window->new_data->data + op->offset; source != 
end; source++)
+                *(target++) = *source;
+            }
           break;
 
         default:
           assert(!"Invalid delta instruction code");
         }
-
-      tpos += op->length;
-      if (tpos >= *tlen)
-        return;                 /* The buffer is full. */
     }
 
-  /* Check that we produced the right amount of data.  */
-  assert(tpos == window->tview_len);
-  *tlen = tpos;
+  *tlen = to_fill;
 }
 
 /* This is a private interlibrary compatibility wrapper. */
Index: subversion/libsvn_subr/svn_string.c
===================================================================
--- subversion/libsvn_subr/svn_string.c (revision 937673)
+++ subversion/libsvn_subr/svn_string.c (working copy)
@@ -391,20 +391,34 @@
   apr_size_t total_len;
   void *start_address;
 
-  total_len = str->len + count;  /* total size needed */
+  /* This function is frequently called by svn_stream_readline
+     adding one char at a time. Eliminate the 'evil' memcpy in
+     that case unless the buffer must be resized. */
 
-  /* +1 for null terminator. */
-  svn_stringbuf_ensure(str, (total_len + 1));
+  apr_size_t old_len = str->len;
+  if ((count == 1) && (str->blocksize > old_len + 1))
+    {
+      str->data[old_len] = *bytes;
+      str->data[old_len+1] = '\0';
+      str->len++;
+    }
+  else
+   {
+      total_len = old_len + count;  /* total size needed */
 
-  /* get address 1 byte beyond end of original bytestring */
-  start_address = (str->data + str->len);
+      /* +1 for null terminator. */
+      svn_stringbuf_ensure(str, (total_len + 1));
 
-  memcpy(start_address, bytes, count);
-  str->len = total_len;
+      /* get address 1 byte beyond end of original bytestring */
+      start_address = (str->data + old_len);
 
-  str->data[str->len] = '\0';  /* We don't know if this is binary
-                                  data or not, but convention is
-                                  to null-terminate. */
+      memcpy(start_address, bytes, count);
+      str->len = total_len;
+
+      str->data[str->len] = '\0';  /* We don't know if this is binary
+                                      data or not, but convention is
+                                      to null-terminate. */
+    }
 }
 
 

Reply via email to