On Mon, Jun 23, 2014 at 11:41 PM, Stefan Sperling <s...@elego.de> wrote:
> The FSFS changes cache returns an array representation of changed > path data which needs to be converted back to a hash for API reasons > by svn_fs_fs__paths_changed(). No other function accesses the > changes cache so returning a temporary array seems odd. > Wouldn't it make sense for the cache to return hashes directly? > > Isn't it better to convert from hash to array during serialization, > which ideally happens only once, rather than constructing an array > and then a hash from array elements every time the cache is accessed? > The rationale is as follows - which may or may not be correct: * constructing an array is extremely cheap (even in the ruby example, it uses only ~2MB, half of it due to resize; adding entries is trivial) and having it as an intermediate step almost no extra costs. * not every item that gets put into cache will ever get retrieved. Adding data to the cache is therefore the hot path (with notable exceptions). Looking at the (de-)serializer code, there is still room for improvement. The array could be used directly instead of creating a temporary copy. -- Stefan^2. > I'm not sure how to measure the effects of this. > > Index: subversion/libsvn_fs_fs/cached_data.c > =================================================================== > --- subversion/libsvn_fs_fs/cached_data.c (revision 1604933) > +++ subversion/libsvn_fs_fs/cached_data.c (working copy) > @@ -2686,7 +2686,7 @@ svn_fs_fs__get_proplist(apr_hash_t **proplist_p, > } > > svn_error_t * > -svn_fs_fs__get_changes(apr_array_header_t **changes, > +svn_fs_fs__get_changes(apr_hash_t **changes, > svn_fs_t *fs, > svn_revnum_t rev, > apr_pool_t *result_pool) > @@ -3093,7 +3093,7 @@ auto_select_stream(svn_stream_t **stream, > * FILE and wrapping FILE_STREAM. Use POOL for allocations. > */ > static svn_error_t * > -block_read_changes(apr_array_header_t **changes, > +block_read_changes(apr_hash_t **changes, > svn_fs_t *fs, > svn_fs_fs__revision_file_t *rev_file, > svn_fs_fs__p2l_entry_t *entry, > @@ -3303,7 +3303,7 @@ block_read(void **result, > break; > > case SVN_FS_FS__ITEM_TYPE_CHANGES: > - SVN_ERR(block_read_changes((apr_array_header_t > **)&item, > + SVN_ERR(block_read_changes((apr_hash_t **)&item, > fs, revision_file, > entry, is_result, > pool, iterpool)); > Index: subversion/libsvn_fs_fs/cached_data.h > =================================================================== > --- subversion/libsvn_fs_fs/cached_data.h (revision 1604933) > +++ subversion/libsvn_fs_fs/cached_data.h (working copy) > @@ -150,7 +150,7 @@ svn_fs_fs__get_proplist(apr_hash_t **proplist, > * Allocate the result in POOL. > */ > svn_error_t * > -svn_fs_fs__get_changes(apr_array_header_t **changes, > +svn_fs_fs__get_changes(apr_hash_t **changes, > svn_fs_t *fs, > svn_revnum_t rev, > apr_pool_t *pool); > Index: subversion/libsvn_fs_fs/low_level.c > =================================================================== > --- subversion/libsvn_fs_fs/low_level.c (revision 1604933) > +++ subversion/libsvn_fs_fs/low_level.c (working copy) > @@ -381,7 +381,7 @@ read_change(change_t **change_p, > } > > svn_error_t * > -svn_fs_fs__read_changes(apr_array_header_t **changes, > +svn_fs_fs__read_changes(apr_hash_t **changes, > svn_stream_t *stream, > apr_pool_t *result_pool, > apr_pool_t *scratch_pool) > @@ -389,15 +389,14 @@ svn_error_t * > change_t *change; > apr_pool_t *iterpool; > > - /* pre-allocate enough room for most change lists > - (will be auto-expanded as necessary) */ > - *changes = apr_array_make(result_pool, 30, sizeof(change_t *)); > + *changes = apr_hash_make(result_pool); > > SVN_ERR(read_change(&change, stream, result_pool, scratch_pool)); > iterpool = svn_pool_create(scratch_pool); > while (change) > { > - APR_ARRAY_PUSH(*changes, change_t*) = change; > + apr_hash_set(*changes, change->path.data, change->path.len, > + &change->info); > SVN_ERR(read_change(&change, stream, result_pool, iterpool)); > svn_pool_clear(iterpool); > } > Index: subversion/libsvn_fs_fs/low_level.h > =================================================================== > --- subversion/libsvn_fs_fs/low_level.h (revision 1604933) > +++ subversion/libsvn_fs_fs/low_level.h (working copy) > @@ -86,7 +86,7 @@ svn_fs_fs__unparse_footer(apr_off_t l2p_offset, > /* Read all the changes from STREAM and store them in *CHANGES, > allocated in RESULT_POOL. Do temporary allocations in SCRATCH_POOL. */ > svn_error_t * > -svn_fs_fs__read_changes(apr_array_header_t **changes, > +svn_fs_fs__read_changes(apr_hash_t **changes, > svn_stream_t *stream, > apr_pool_t *result_pool, > apr_pool_t *scratch_pool); > Index: subversion/libsvn_fs_fs/temp_serializer.c > =================================================================== > --- subversion/libsvn_fs_fs/temp_serializer.c (revision 1604933) > +++ subversion/libsvn_fs_fs/temp_serializer.c (working copy) > @@ -1125,7 +1125,7 @@ deserialize_change(void *buffer, change_t **change > } > > /* Auxiliary structure representing the content of a change_t array. > - This structure is much easier to (de-)serialize than an APR array. > + This structure is much easier to (de-)serialize than an APR hash. > */ > typedef struct changes_data_t > { > @@ -1142,20 +1142,30 @@ svn_fs_fs__serialize_changes(void **data, > void *in, > apr_pool_t *pool) > { > - apr_array_header_t *array = in; > + apr_hash_t *hash = in; > changes_data_t changes; > svn_temp_serializer__context_t *context; > svn_stringbuf_t *serialized; > int i; > + apr_hash_index_t *hi; > > /* initialize our auxiliary data structure */ > - changes.count = array->nelts; > + changes.count = apr_hash_count(hash); > changes.changes = apr_palloc(pool, sizeof(change_t*) * changes.count); > > - /* populate it with the array elements */ > - for (i = 0; i < changes.count; ++i) > - changes.changes[i] = APR_ARRAY_IDX(array, i, change_t*); > + /* populate it with the hash elements */ > + i = 0; > + for (hi = apr_hash_first(pool, hash); hi; hi = apr_hash_next(hi), ++i) > + { > + const char *path = svn__apr_hash_index_key(hi); > + svn_fs_path_change2_t *info = svn__apr_hash_index_val(hi); > > + changes.changes[i] = apr_palloc(pool, sizeof (change_t)); > + changes.changes[i]->path.data = path; > + changes.changes[i]->path.len = strlen(path); > + changes.changes[i]->info = *info; > + } > + > /* serialize it and all its elements */ > context = svn_temp_serializer__init(&changes, > sizeof(changes), > @@ -1188,8 +1198,7 @@ svn_fs_fs__deserialize_changes(void **out, > { > int i; > changes_data_t *changes = (changes_data_t *)data; > - apr_array_header_t *array = apr_array_make(pool, changes->count, > - sizeof(change_t *)); > + apr_hash_t *hash = apr_hash_make(pool); > > /* de-serialize our auxiliary data structure */ > svn_temp_deserializer__resolve(changes, (void**)&changes->changes); > @@ -1197,13 +1206,17 @@ svn_fs_fs__deserialize_changes(void **out, > /* de-serialize each entry and add it to the array */ > for (i = 0; i < changes->count; ++i) > { > + change_t *change; > + > deserialize_change(changes->changes, > (change_t **)&changes->changes[i]); > - APR_ARRAY_PUSH(array, change_t *) = changes->changes[i]; > + change = changes->changes[i]; > + apr_hash_set(hash, change->path.data, change->path.len, > + &change->info); > } > > /* done */ > - *out = array; > + *out = hash; > > return SVN_NO_ERROR; > } > Index: subversion/libsvn_fs_fs/transaction.c > =================================================================== > --- subversion/libsvn_fs_fs/transaction.c (revision 1604933) > +++ subversion/libsvn_fs_fs/transaction.c (working copy) > @@ -881,23 +881,8 @@ svn_fs_fs__paths_changed(apr_hash_t **changed_path > svn_revnum_t rev, > apr_pool_t *pool) > { > - apr_hash_t *changed_paths; > - apr_array_header_t *changes; > - int i; > - > - SVN_ERR(svn_fs_fs__get_changes(&changes, fs, rev, pool)); > - > - changed_paths = svn_hash__make(pool); > - for (i = 0; i < changes->nelts; ++i) > - { > - change_t *change = APR_ARRAY_IDX(changes, i, change_t *); > - apr_hash_set(changed_paths, change->path.data, change->path.len, > - &change->info); > - } > - > - *changed_paths_p = changed_paths; > - > - return SVN_NO_ERROR; > + return svn_error_trace(svn_fs_fs__get_changes(changed_paths_p, fs, rev, > + pool)); > } > > /* Copy a revision node-rev SRC into the current transaction TXN_ID in >