Julian Foad wrote on Fri, Jun 03, 2011 at 09:52:21 +0100:
> Hi Stefan2 and others.
> 
> This patch simplifies code in RA-svn/marshal.c by using a single code
> path to read both short and long strings efficiently.  Using a single
> code path is beneficial for test coverage.
> 
> It is an alternative to r1028352 which merged r985606 and r1028092 from
> the performance branch.
> 
> I have run the test suite over RA-svn (with BDB) but haven't done any
> more testing than that.

Unless you built with SUSPICIOUSLY_HUGE_STRING_SIZE_THRESHOLD set to
something less than 20, this may not have tested the "Large string" code
path (when the loop loops).

> It's just something I noticed while looking at
> that code, it's not solving a problem I've noticed or anything like
> that.  By inspection I believe it is more efficient than
> read_long_string() was, and unchanged for short strings, but if you're
> able to do any performance analysis on it, that would be great.
> 
> - Julian
> 

> Simplify and optimize RA-svn's string marshalling.
> 
> This largely reverts r1028352 which merged r985606 and r1028092 from the
> performance branch.  That change used separate functions to handle short and
> long strings.  This change instead improves the original code to handle both
> short and long strings efficiently.
> 
> Compared with the pre-r1028352 code, this version uses a 1-MB chunk size
> instead of 4-KB and eliminates one memcpy of the data.
> 
> * subversion/libsvn_ra_svn/marshal.c
>   (read_long_string): Remove.
>   (read_string): Handle long strings with the same code as short strings, by
>     reading directly into pre-allocated space, but pre-allocate no more than
>     a reasonable chunk size at a time.
> --This line, and those below, will be ignored--
> 
> Index: subversion/libsvn_ra_svn/marshal.c
> ===================================================================
> --- subversion/libsvn_ra_svn/marshal.c        (revision 1130931)
> +++ subversion/libsvn_ra_svn/marshal.c        (working copy)
> @@ -563,36 +563,6 @@ svn_error_t *svn_ra_svn_write_tuple(svn_
>  
>  /* --- READING DATA ITEMS --- */
>  
> -/* Read LEN bytes from CONN into a supposedly empty STRINGBUF.
> - * POOL will be used for temporary allocations. */
> -static svn_error_t *
> -read_long_string(svn_ra_svn_conn_t *conn, apr_pool_t *pool,
> -                 svn_stringbuf_t *stringbuf, apr_uint64_t len)
> -{
> -  char readbuf[4096];
> -  apr_size_t readbuf_len;
> -
> -  /* We can't store strings longer than the maximum size of apr_size_t,
> -   * so check for wrapping */
> -  if (((apr_size_t) len) < len)

Why did you remove this check?

> -    return svn_error_create(SVN_ERR_RA_SVN_MALFORMED_DATA, NULL,
> -                            _("String length larger than maximum"));
> -
> -  while (len)
> -    {
> -      readbuf_len = len > sizeof(readbuf) ? sizeof(readbuf) : 
> (apr_size_t)len;
> -
> -      SVN_ERR(readbuf_read(conn, pool, readbuf, readbuf_len));
> -      /* Read into a stringbuf_t to so we don't allow the sender to allocate
> -       * an arbitrary amount of memory without actually sending us that much
> -       * data */
> -      svn_stringbuf_appendbytes(stringbuf, readbuf, readbuf_len);
> -      len -= readbuf_len;
> -    }
> -
> -  return SVN_NO_ERROR;
> -}
> -
>  /* Read LEN bytes from CONN into already-allocated structure ITEM.
>   * Afterwards, *ITEM is of type 'SVN_RA_SVN_STRING', and its string
>   * data is allocated in POOL. */
> @@ -601,42 +571,34 @@ static svn_error_t *read_string(svn_ra_s
>  {
>    svn_stringbuf_t *stringbuf;
>  
> -  /* We should not use large strings in our protocol. However, we may
> -   * receive a claim that a very long string is going to follow. In that
> -   * case, we start small and wait for all that data to actually show up.
> -   * This does not fully prevent DOS attacs but makes them harder (you
> -   * have to actually send gigabytes of data).
> -   */
> -  if (len > SUSPICIOUSLY_HUGE_STRING_SIZE_THRESHOLD)
> -    {
> -      /* This string might take a large amount of memory. Don't allocate
> -       * the whole buffer at once, so to prevent OOM issues by corrupted
> -       * network data.
> -       */
> -      stringbuf = svn_stringbuf_create("", pool);
> -      SVN_ERR(read_long_string(conn, pool, stringbuf, len));
> -    }
> -  else
> +  /* Read the string in chunks.  The chunk size is large enough to avoid
> +   * re-allocation in typical cases, and small enough to ensure we do not
> +   * pre-allocate an unreasonable amount of memory if (perhaps due to
> +   * network data corruption or a DOS attack), we receive a bogus claim that
> +   * a very long string is going to follow.  In that case, we start small
> +   * and wait for all that data to actually show up.  This does not fully
> +   * prevent DOS attacks but makes them harder (you have to actually send
> +   * gigabytes of data). */
> +  stringbuf = svn_stringbuf_create_ensure(
> +                (apr_size_t)(len < SUSPICIOUSLY_HUGE_STRING_SIZE_THRESHOLD
> +                             ? len : 
> SUSPICIOUSLY_HUGE_STRING_SIZE_THRESHOLD),
> +                pool);
> +
> +  /* Read the string data directly into the string structure.
> +   * Do it iteratively, if necessary.  */
> +  while (len)
>      {
> -      /* This is a reasonably sized string. So, provide a buffer large
> -       * enough to prevent re-allocation as long as the data transmission
> -       * is not flawed.
> -       */
> -      stringbuf = svn_stringbuf_create_ensure((apr_size_t)len, pool);
> -
> -      /* Read the string data directly into the string structure.
> -       * Do it iteratively, if necessary.
> -       */
> -      while (len)
> -        {
> -          apr_size_t readbuf_len = (apr_size_t)len;
> -          char *dest = stringbuf->data + stringbuf->len;
> -          SVN_ERR(readbuf_read(conn, pool, dest, readbuf_len));
> -
> -          stringbuf->len += readbuf_len;
> -          stringbuf->data[stringbuf->len] = '\0';
> -          len -= readbuf_len;
> -        }
> +      apr_size_t readbuf_len
> +        = (apr_size_t)(len < SUSPICIOUSLY_HUGE_STRING_SIZE_THRESHOLD
> +                       ? len : SUSPICIOUSLY_HUGE_STRING_SIZE_THRESHOLD);
> +      char *dest = stringbuf->data + stringbuf->len;
> +
> +      svn_stringbuf_ensure(stringbuf, stringbuf->len + readbuf_len);

Need ->len + ->len + 1, for NUL.  (Oddly, this API requires this
off-by-one while _create_ensure() does it for you.)

> +      SVN_ERR(readbuf_read(conn, pool, dest, readbuf_len));
> +
> +      stringbuf->len += readbuf_len;
> +      stringbuf->data[stringbuf->len] = '\0';
> +      len -= readbuf_len;

What happens if, at this point, there are less than READBUF_LEN bytes
available?

(e.g., if the client terminates the connection in the middle of
a string literal.)

>      }
>  
>    /* Return the string properly wrapped into an RA_SVN item.

Reply via email to