Hi devs, further reducing my backlog of patches sitting in my working copy, this and the next patch optimize code locally - shaving off cycles here and there. The net effect is somewhere between 3 and 10 percent for repository access (ls, export, etc.).
In this patch, I eliminated calls to memcpy for small copies as they are particularly expensive in the MS CRT. -- Stefan^2. [[[ Eliminate memcpy from critical paths during reading data from the repository. * subversion/libsvn_delta/text_delta.c (svn_txdelta_apply_instructions): replace memcpy for small amounts of data; optimize overlapping copies; optimize 'buffer full' detection * subversion/libsvn_subr/svn_string.c (svn_stringbuf_appendbytes): replace memcpy with specialized code when adding single chars. ]]]
Index: subversion/libsvn_delta/text_delta.c =================================================================== --- subversion/libsvn_delta/text_delta.c (revision 937673) +++ subversion/libsvn_delta/text_delta.c (working copy) @@ -32,6 +32,7 @@ #include "svn_io.h" #include "svn_pools.h" #include "svn_checksum.h" +#include "svn_private_config.h" #include "delta.h" @@ -570,23 +597,38 @@ const char *sbuf, char *tbuf, apr_size_t *tlen) { - const svn_txdelta_op_t *op; - apr_size_t i, j, tpos = 0; + const svn_txdelta_op_t *op, *last_op = window->ops + window->num_ops; + apr_size_t to_fill = *tlen > window->tview_len ? window->tview_len : *tlen; + apr_size_t left = to_fill; + const char* end, *source; + char *target = tbuf; - for (op = window->ops; op < window->ops + window->num_ops; op++) + for (op = window->ops; left > 0; op++) { - const apr_size_t buf_len = (op->length < *tlen - tpos - ? op->length : *tlen - tpos); + const apr_size_t buf_len = op->length > left ? left : op->length; + left -= buf_len; /* Check some invariants common to all instructions. */ - assert(tpos + op->length <= window->tview_len); + assert(target - tbuf + op->length <= window->tview_len); switch (op->action_code) { case svn_txdelta_source: /* Copy from source area. */ assert(op->offset + op->length <= window->sview_len); - memcpy(tbuf + tpos, sbuf + op->offset, buf_len); + if (buf_len > 7) + { + memcpy(target, sbuf + op->offset, buf_len); + target += buf_len; + } + else + { + /* memcpy is not exactly fast for small block sizes. + Since they are common, let's run optimized code for them. */ + end = sbuf + op->offset + buf_len; + for (source = sbuf + op->offset; source != end; source++) + *(target++) = *source; + } break; case svn_txdelta_target: @@ -594,31 +636,46 @@ semantics aren't guaranteed for overlapping memory areas, and target copies are allowed to overlap to generate repeated data. */ - assert(op->offset < tpos); - for (i = op->offset, j = tpos; i < op->offset + buf_len; i++) - tbuf[j++] = tbuf[i]; + + assert(op->offset < target - *tbuf); + source = tbuf + op->offset; + end = tbuf + op->offset + buf_len; + + if (end <= target) + for (; source + sizeof (unsigned) <= end; + source += sizeof (unsigned), target += sizeof (unsigned)) + *(unsigned*)(target) = *(unsigned*)(source); + + for (; source != end; source++) + *(target++) = *source; break; case svn_txdelta_new: /* Copy from window new area. */ assert(op->offset + op->length <= window->new_data->len); - memcpy(tbuf + tpos, - window->new_data->data + op->offset, - buf_len); + if (buf_len > 7) + { + memcpy(target, + window->new_data->data + op->offset, + buf_len); + target += buf_len; + } + else + { + /* memcpy is not exactly fast for small block sizes. + Since they are common, let's run optimized code for them. */ + end = window->new_data->data + op->offset + buf_len; + for (source = window->new_data->data + op->offset; source != end; source++) + *(target++) = *source; + } break; default: assert(!"Invalid delta instruction code"); } - - tpos += op->length; - if (tpos >= *tlen) - return; /* The buffer is full. */ } - /* Check that we produced the right amount of data. */ - assert(tpos == window->tview_len); - *tlen = tpos; + *tlen = to_fill; } /* This is a private interlibrary compatibility wrapper. */ Index: subversion/libsvn_subr/svn_string.c =================================================================== --- subversion/libsvn_subr/svn_string.c (revision 937673) +++ subversion/libsvn_subr/svn_string.c (working copy) @@ -391,20 +391,34 @@ apr_size_t total_len; void *start_address; - total_len = str->len + count; /* total size needed */ + /* This function is frequently called by svn_stream_readline + adding one char at a time. Eliminate the 'evil' memcpy in + that case unless the buffer must be resized. */ - /* +1 for null terminator. */ - svn_stringbuf_ensure(str, (total_len + 1)); + apr_size_t old_len = str->len; + if ((count == 1) && (str->blocksize > old_len + 1)) + { + str->data[old_len] = *bytes; + str->data[old_len+1] = '\0'; + str->len++; + } + else + { + total_len = old_len + count; /* total size needed */ - /* get address 1 byte beyond end of original bytestring */ - start_address = (str->data + str->len); + /* +1 for null terminator. */ + svn_stringbuf_ensure(str, (total_len + 1)); - memcpy(start_address, bytes, count); - str->len = total_len; + /* get address 1 byte beyond end of original bytestring */ + start_address = (str->data + old_len); - str->data[str->len] = '\0'; /* We don't know if this is binary - data or not, but convention is - to null-terminate. */ + memcpy(start_address, bytes, count); + str->len = total_len; + + str->data[str->len] = '\0'; /* We don't know if this is binary + data or not, but convention is + to null-terminate. */ + } }