On Mon, Jun 23, 2014 at 11:41 PM, Stefan Sperling <s...@elego.de> wrote:

> The FSFS changes cache returns an array representation of changed
> path data which needs to be converted back to a hash for API reasons
> by svn_fs_fs__paths_changed(). No other function accesses the
> changes cache so returning a temporary array seems odd.
> Wouldn't it make sense for the cache to return hashes directly?
>
> Isn't it better to convert from hash to array during serialization,
> which ideally happens only once, rather than constructing an array
> and then a hash from array elements every time the cache is accessed?
>

The rationale is as follows - which may or may not be correct:

* constructing an array is extremely cheap
  (even in the ruby example, it uses only ~2MB, half of it due to resize;
  adding entries is trivial) and having it as an intermediate step almost
  no extra costs.
* not every item that gets put into cache will ever get retrieved.
  Adding data to the cache is therefore the hot path (with notable
exceptions).

Looking at the (de-)serializer code, there is still room for improvement.
The array could be used directly instead of creating a temporary copy.

-- Stefan^2.


> I'm not sure how to measure the effects of this.
>
> Index: subversion/libsvn_fs_fs/cached_data.c
> ===================================================================
> --- subversion/libsvn_fs_fs/cached_data.c       (revision 1604933)
> +++ subversion/libsvn_fs_fs/cached_data.c       (working copy)
> @@ -2686,7 +2686,7 @@ svn_fs_fs__get_proplist(apr_hash_t **proplist_p,
>  }
>
>  svn_error_t *
> -svn_fs_fs__get_changes(apr_array_header_t **changes,
> +svn_fs_fs__get_changes(apr_hash_t **changes,
>                         svn_fs_t *fs,
>                         svn_revnum_t rev,
>                         apr_pool_t *result_pool)
> @@ -3093,7 +3093,7 @@ auto_select_stream(svn_stream_t **stream,
>   * FILE and wrapping FILE_STREAM.  Use POOL for allocations.
>   */
>  static svn_error_t *
> -block_read_changes(apr_array_header_t **changes,
> +block_read_changes(apr_hash_t **changes,
>                     svn_fs_t *fs,
>                     svn_fs_fs__revision_file_t *rev_file,
>                     svn_fs_fs__p2l_entry_t *entry,
> @@ -3303,7 +3303,7 @@ block_read(void **result,
>                      break;
>
>                    case SVN_FS_FS__ITEM_TYPE_CHANGES:
> -                    SVN_ERR(block_read_changes((apr_array_header_t
> **)&item,
> +                    SVN_ERR(block_read_changes((apr_hash_t **)&item,
>                                                 fs, revision_file,
>                                                 entry, is_result,
>                                                 pool, iterpool));
> Index: subversion/libsvn_fs_fs/cached_data.h
> ===================================================================
> --- subversion/libsvn_fs_fs/cached_data.h       (revision 1604933)
> +++ subversion/libsvn_fs_fs/cached_data.h       (working copy)
> @@ -150,7 +150,7 @@ svn_fs_fs__get_proplist(apr_hash_t **proplist,
>   * Allocate the result in POOL.
>   */
>  svn_error_t *
> -svn_fs_fs__get_changes(apr_array_header_t **changes,
> +svn_fs_fs__get_changes(apr_hash_t **changes,
>                         svn_fs_t *fs,
>                         svn_revnum_t rev,
>                         apr_pool_t *pool);
> Index: subversion/libsvn_fs_fs/low_level.c
> ===================================================================
> --- subversion/libsvn_fs_fs/low_level.c (revision 1604933)
> +++ subversion/libsvn_fs_fs/low_level.c (working copy)
> @@ -381,7 +381,7 @@ read_change(change_t **change_p,
>  }
>
>  svn_error_t *
> -svn_fs_fs__read_changes(apr_array_header_t **changes,
> +svn_fs_fs__read_changes(apr_hash_t **changes,
>                          svn_stream_t *stream,
>                          apr_pool_t *result_pool,
>                          apr_pool_t *scratch_pool)
> @@ -389,15 +389,14 @@ svn_error_t *
>    change_t *change;
>    apr_pool_t *iterpool;
>
> -  /* pre-allocate enough room for most change lists
> -     (will be auto-expanded as necessary) */
> -  *changes = apr_array_make(result_pool, 30, sizeof(change_t *));
> +  *changes = apr_hash_make(result_pool);
>
>    SVN_ERR(read_change(&change, stream, result_pool, scratch_pool));
>    iterpool = svn_pool_create(scratch_pool);
>    while (change)
>      {
> -      APR_ARRAY_PUSH(*changes, change_t*) = change;
> +      apr_hash_set(*changes, change->path.data, change->path.len,
> +                   &change->info);
>        SVN_ERR(read_change(&change, stream, result_pool, iterpool));
>        svn_pool_clear(iterpool);
>      }
> Index: subversion/libsvn_fs_fs/low_level.h
> ===================================================================
> --- subversion/libsvn_fs_fs/low_level.h (revision 1604933)
> +++ subversion/libsvn_fs_fs/low_level.h (working copy)
> @@ -86,7 +86,7 @@ svn_fs_fs__unparse_footer(apr_off_t l2p_offset,
>  /* Read all the changes from STREAM and store them in *CHANGES,
>     allocated in RESULT_POOL. Do temporary allocations in SCRATCH_POOL. */
>  svn_error_t *
> -svn_fs_fs__read_changes(apr_array_header_t **changes,
> +svn_fs_fs__read_changes(apr_hash_t **changes,
>                          svn_stream_t *stream,
>                          apr_pool_t *result_pool,
>                          apr_pool_t *scratch_pool);
> Index: subversion/libsvn_fs_fs/temp_serializer.c
> ===================================================================
> --- subversion/libsvn_fs_fs/temp_serializer.c   (revision 1604933)
> +++ subversion/libsvn_fs_fs/temp_serializer.c   (working copy)
> @@ -1125,7 +1125,7 @@ deserialize_change(void *buffer, change_t **change
>  }
>
>  /* Auxiliary structure representing the content of a change_t array.
> -   This structure is much easier to (de-)serialize than an APR array.
> +   This structure is much easier to (de-)serialize than an APR hash.
>   */
>  typedef struct changes_data_t
>  {
> @@ -1142,20 +1142,30 @@ svn_fs_fs__serialize_changes(void **data,
>                               void *in,
>                               apr_pool_t *pool)
>  {
> -  apr_array_header_t *array = in;
> +  apr_hash_t *hash = in;
>    changes_data_t changes;
>    svn_temp_serializer__context_t *context;
>    svn_stringbuf_t *serialized;
>    int i;
> +  apr_hash_index_t *hi;
>
>    /* initialize our auxiliary data structure */
> -  changes.count = array->nelts;
> +  changes.count = apr_hash_count(hash);
>    changes.changes = apr_palloc(pool, sizeof(change_t*) * changes.count);
>
> -  /* populate it with the array elements */
> -  for (i = 0; i < changes.count; ++i)
> -    changes.changes[i] = APR_ARRAY_IDX(array, i, change_t*);
> +  /* populate it with the hash elements */
> +  i = 0;
> +  for (hi = apr_hash_first(pool, hash); hi; hi = apr_hash_next(hi), ++i)
> +    {
> +      const char *path = svn__apr_hash_index_key(hi);
> +      svn_fs_path_change2_t *info = svn__apr_hash_index_val(hi);
>
> +      changes.changes[i] = apr_palloc(pool, sizeof (change_t));
> +      changes.changes[i]->path.data = path;
> +      changes.changes[i]->path.len = strlen(path);
> +      changes.changes[i]->info = *info;
> +    }
> +
>    /* serialize it and all its elements */
>    context = svn_temp_serializer__init(&changes,
>                                        sizeof(changes),
> @@ -1188,8 +1198,7 @@ svn_fs_fs__deserialize_changes(void **out,
>  {
>    int i;
>    changes_data_t *changes = (changes_data_t *)data;
> -  apr_array_header_t *array = apr_array_make(pool, changes->count,
> -                                             sizeof(change_t *));
> +  apr_hash_t *hash = apr_hash_make(pool);
>
>    /* de-serialize our auxiliary data structure */
>    svn_temp_deserializer__resolve(changes, (void**)&changes->changes);
> @@ -1197,13 +1206,17 @@ svn_fs_fs__deserialize_changes(void **out,
>    /* de-serialize each entry and add it to the array */
>    for (i = 0; i < changes->count; ++i)
>      {
> +      change_t *change;
> +
>        deserialize_change(changes->changes,
>                           (change_t **)&changes->changes[i]);
> -      APR_ARRAY_PUSH(array, change_t *) = changes->changes[i];
> +      change = changes->changes[i];
> +      apr_hash_set(hash, change->path.data, change->path.len,
> +                   &change->info);
>      }
>
>    /* done */
> -  *out = array;
> +  *out = hash;
>
>    return SVN_NO_ERROR;
>  }
> Index: subversion/libsvn_fs_fs/transaction.c
> ===================================================================
> --- subversion/libsvn_fs_fs/transaction.c       (revision 1604933)
> +++ subversion/libsvn_fs_fs/transaction.c       (working copy)
> @@ -881,23 +881,8 @@ svn_fs_fs__paths_changed(apr_hash_t **changed_path
>                           svn_revnum_t rev,
>                           apr_pool_t *pool)
>  {
> -  apr_hash_t *changed_paths;
> -  apr_array_header_t *changes;
> -  int i;
> -
> -  SVN_ERR(svn_fs_fs__get_changes(&changes, fs, rev, pool));
> -
> -  changed_paths = svn_hash__make(pool);
> -  for (i = 0; i < changes->nelts; ++i)
> -    {
> -      change_t *change = APR_ARRAY_IDX(changes, i, change_t *);
> -      apr_hash_set(changed_paths, change->path.data, change->path.len,
> -                   &change->info);
> -    }
> -
> -  *changed_paths_p = changed_paths;
> -
> -  return SVN_NO_ERROR;
> +  return svn_error_trace(svn_fs_fs__get_changes(changed_paths_p, fs, rev,
> +                                                pool));
>  }
>
>  /* Copy a revision node-rev SRC into the current transaction TXN_ID in
>

Reply via email to