Modified: subversion/branches/showing-merge-info/subversion/libsvn_delta/xdelta.c URL: http://svn.apache.org/viewvc/subversion/branches/showing-merge-info/subversion/libsvn_delta/xdelta.c?rev=1231975&r1=1231974&r2=1231975&view=diff ============================================================================== --- subversion/branches/showing-merge-info/subversion/libsvn_delta/xdelta.c (original) +++ subversion/branches/showing-merge-info/subversion/libsvn_delta/xdelta.c Mon Jan 16 12:32:43 2012 @@ -44,6 +44,10 @@ */ #define MATCH_BLOCKSIZE 64 +/* "no" / "invalid" / "unused" value for positions within the detla windows + */ +#define NO_POSITION ((apr_uint32_t)-1) + /* Feed C_IN into the adler32 checksum and remove C_OUT at the same time. * This function may (and will) only be called for characters that are * MATCH_BLOCKSIZE positions apart. @@ -96,17 +100,22 @@ init_adler32(const char *data) struct block { apr_uint32_t adlersum; - apr_size_t pos; + +/* Even in 64 bit systems, store only 32 bit offsets in our hash table + (our delta window size much much smaller then 4GB). + That reduces the hash table size by 50% from 32to 16KB + and makes it easier to fit into the CPU's L1 cache. */ + apr_uint32_t pos; /* NO_POSITION -> block is not used */ }; /* A hash table, using open addressing, of the blocks of the source. */ struct blocks { /* The largest valid index of slots. */ - apr_size_t max; + apr_uint32_t max; /* Source buffer that the positions in SLOTS refer to. */ const char* data; - /* The vector of blocks. A pos value of (apr_size_t)-1 represents an unused + /* The vector of blocks. A pos value of NO_POSITION represents an unused slot. */ struct block *slots; }; @@ -114,7 +123,7 @@ struct blocks /* Return a hash value calculated from the adler32 SUM, suitable for use with our hash table. */ -static apr_size_t hash_func(apr_uint32_t sum) +static apr_uint32_t hash_func(apr_uint32_t sum) { /* Since the adl32 checksum have a bad distribution for the 11th to 16th bits when used for our small block size, we add some bits from the @@ -126,12 +135,12 @@ static apr_size_t hash_func(apr_uint32_t data into the table BLOCKS. Ignore true duplicates, i.e. blocks with actually the same content. */ static void -add_block(struct blocks *blocks, apr_uint32_t adlersum, apr_size_t pos) +add_block(struct blocks *blocks, apr_uint32_t adlersum, apr_uint32_t pos) { - apr_size_t h = hash_func(adlersum) & blocks->max; + apr_uint32_t h = hash_func(adlersum) & blocks->max; /* This will terminate, since we know that we will not fill the table. */ - for (; blocks->slots[h].pos != (apr_size_t)-1; h = (h + 1) & blocks->max) + for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max) if (blocks->slots[h].adlersum == adlersum) if (memcmp(blocks->data + blocks->slots[h].pos, blocks->data + pos, MATCH_BLOCKSIZE) == 0) @@ -143,21 +152,21 @@ add_block(struct blocks *blocks, apr_uin /* Find a block in BLOCKS with the checksum ADLERSUM and matching the content at DATA, returning its position in the source data. If there is no such - block, return (apr_size_t)-1. */ -static apr_size_t + block, return NO_POSITION. */ +static apr_uint32_t find_block(const struct blocks *blocks, apr_uint32_t adlersum, const char* data) { - apr_size_t h = hash_func(adlersum) & blocks->max; + apr_uint32_t h = hash_func(adlersum) & blocks->max; - for (; blocks->slots[h].pos != (apr_size_t)-1; h = (h + 1) & blocks->max) + for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max) if (blocks->slots[h].adlersum == adlersum) if (memcmp(blocks->data + blocks->slots[h].pos, data, MATCH_BLOCKSIZE) == 0) return blocks->slots[h].pos; - return (apr_size_t)-1; + return NO_POSITION; } /* Initialize the matches table from DATA of size DATALEN. This goes @@ -187,7 +196,7 @@ init_blocks_table(const char *data, { /* Avoid using an indeterminate value in the lookup. */ blocks->slots[i].adlersum = 0; - blocks->slots[i].pos = (apr_size_t)-1; + blocks->slots[i].pos = NO_POSITION; } /* If there is an odd block at the end of the buffer, we will @@ -226,6 +235,39 @@ match_length(const char *a, const char * return pos; } +/* Return the smallest byte index at which positions left of A and B differ + * (A[-result] != B[-result]). If no difference can be found in the first + * MAX_LEN characters, MAX_LEN will be returned. + */ +static apr_size_t +reverse_match_length(const char *a, const char *b, apr_size_t max_len) +{ + apr_size_t pos = 0; + +#if SVN_UNALIGNED_ACCESS_IS_OK + + /* Chunky processing is so much faster ... + * + * We can't make this work on architectures that require aligned access + * because A and B will probably have different alignment. So, skipping + * the first few chars until alignment is reached is not an option. + */ + for (pos = sizeof(apr_size_t); pos <= max_len; pos += sizeof(apr_size_t)) + if (*(const apr_size_t*)(a - pos) != *(const apr_size_t*)(b - pos)) + break; + + pos -= sizeof(apr_size_t); + +#endif + + while (++pos <= max_len) + if (a[-pos] != b[-pos]) + break; + + return pos-1; +} + + /* Try to find a match for the target data B in BLOCKS, and then extend the match as long as data in A and B at the match position continues to match. We set the position in A we ended up in (in @@ -252,7 +294,7 @@ find_match(const struct blocks *blocks, apos = find_block(blocks, rolling, b + bpos); /* See if we have a match. */ - if (apos == (apr_size_t)-1) + if (apos == NO_POSITION) return 0; /* Extend the match forward as far as possible */ @@ -278,6 +320,38 @@ find_match(const struct blocks *blocks, return MATCH_BLOCKSIZE + delta; } +/* Utility for compute_delta() that compares the range B[START,BSIZE) with + * the range of similar size before A[ASIZE]. Create corresponding copy and + * insert operations. + * + * BUILD_BATON and POOL will be passed through from compute_delta(). + */ +static void +store_delta_trailer(svn_txdelta__ops_baton_t *build_baton, + const char *a, + apr_size_t asize, + const char *b, + apr_size_t bsize, + apr_size_t start, + apr_pool_t *pool) +{ + apr_size_t end_match; + apr_size_t max_len = asize > (bsize - start) ? bsize - start : asize; + if (max_len == 0) + return; + + end_match = reverse_match_length(a + asize, b + bsize, max_len); + if (end_match <= 4) + end_match = 0; + + if (bsize - start > end_match) + svn_txdelta__insert_op(build_baton, svn_txdelta_new, + start, bsize - start - end_match, b + start, pool); + if (end_match) + svn_txdelta__insert_op(build_baton, svn_txdelta_source, + asize - end_match, end_match, NULL, pool); +} + /* Compute a delta from A to B using xdelta. @@ -315,12 +389,24 @@ compute_delta(svn_txdelta__ops_baton_t * apr_uint32_t rolling; apr_size_t lo = 0, pending_insert_start = 0; + /* Optimization: directly compare window starts. If more than 4 + * bytes match, we can immediately create a matching windows. + * Shorter sequences result in a net data increase. */ + lo = match_length(a, b, asize > bsize ? bsize : asize); + if ((lo > 4) || (lo == bsize)) + { + svn_txdelta__insert_op(build_baton, svn_txdelta_source, + 0, lo, NULL, pool); + pending_insert_start = lo; + } + else + lo = 0; + /* If the size of the target is smaller than the match blocksize, just insert the entire target. */ - if (bsize < MATCH_BLOCKSIZE) + if ((bsize - lo < MATCH_BLOCKSIZE) || (asize < MATCH_BLOCKSIZE)) { - svn_txdelta__insert_op(build_baton, svn_txdelta_new, - 0, bsize, b, pool); + store_delta_trailer(build_baton, a, asize, b, bsize, lo, pool); return; } @@ -328,7 +414,7 @@ compute_delta(svn_txdelta__ops_baton_t * init_blocks_table(a, asize, &blocks, pool); /* Initialize our rolling checksum. */ - rolling = init_adler32(b); + rolling = init_adler32(b + lo); while (lo < bsize) { apr_size_t matchlen = 0; @@ -356,6 +442,19 @@ compute_delta(svn_txdelta__ops_baton_t * svn_txdelta__insert_op(build_baton, svn_txdelta_new, 0, lo - pending_insert_start, b + pending_insert_start, pool); + else + { + /* the match borders on the previous op. Maybe, we found a + * match that is better than / overlapping the previous one. */ + apr_size_t len = reverse_match_length(a + apos, b + lo, apos < lo ? apos : lo); + if (len > 0) + { + len = svn_txdelta__remove_copy(build_baton, len); + apos -= len; + matchlen += len; + lo -= len; + } + } /* Reset the pending insert start to immediately after the match. */ @@ -373,12 +472,7 @@ compute_delta(svn_txdelta__ops_baton_t * } /* If we still have an insert pending at the end, throw it in. */ - if (lo - pending_insert_start > 0) - { - svn_txdelta__insert_op(build_baton, svn_txdelta_new, - 0, lo - pending_insert_start, - b + pending_insert_start, pool); - } + store_delta_trailer(build_baton, a, asize, b, bsize, pending_insert_start, pool); } void
Modified: subversion/branches/showing-merge-info/subversion/libsvn_fs/fs-loader.c URL: http://svn.apache.org/viewvc/subversion/branches/showing-merge-info/subversion/libsvn_fs/fs-loader.c?rev=1231975&r1=1231974&r2=1231975&view=diff ============================================================================== --- subversion/branches/showing-merge-info/subversion/libsvn_fs/fs-loader.c (original) +++ subversion/branches/showing-merge-info/subversion/libsvn_fs/fs-loader.c Mon Jan 16 12:32:43 2012 @@ -424,16 +424,72 @@ svn_fs_delete_fs(const char *path, apr_p } svn_error_t * -svn_fs_hotcopy(const char *src_path, const char *dest_path, - svn_boolean_t clean, apr_pool_t *pool) +svn_fs_hotcopy2(const char *src_path, const char *dst_path, + svn_boolean_t clean, svn_boolean_t incremental, + svn_cancel_func_t cancel_func, void *cancel_baton, + apr_pool_t *scratch_pool) { fs_library_vtable_t *vtable; - const char *fs_type; + const char *src_fs_type; + svn_fs_t *src_fs; + svn_fs_t *dst_fs; + const char *dst_fs_type; + svn_node_kind_t dst_kind; + + if (strcmp(src_path, dst_path) == 0) + return svn_error_create(SVN_ERR_INCORRECT_PARAMS, NULL, + _("Hotcopy source and destination are equal")); + + SVN_ERR(svn_fs_type(&src_fs_type, src_path, scratch_pool)); + SVN_ERR(get_library_vtable(&vtable, src_fs_type, scratch_pool)); + src_fs = fs_new(NULL, scratch_pool); + dst_fs = fs_new(NULL, scratch_pool); + + SVN_ERR(svn_io_check_path(dst_path, &dst_kind, scratch_pool)); + if (dst_kind == svn_node_file) + return svn_error_createf(SVN_ERR_NODE_UNEXPECTED_KIND, NULL, + _("'%s' already exists and is a file"), + svn_dirent_local_style(dst_path, + scratch_pool)); + if (dst_kind == svn_node_unknown) + return svn_error_createf(SVN_ERR_NODE_UNEXPECTED_KIND, NULL, + _("'%s' already exists and has an unknown " + "node kind"), + svn_dirent_local_style(dst_path, + scratch_pool)); + if (dst_kind == svn_node_dir) + { + svn_node_kind_t type_file_kind; + + SVN_ERR(svn_io_check_path(svn_dirent_join(dst_path, + FS_TYPE_FILENAME, + scratch_pool), + &type_file_kind, scratch_pool)); + if (type_file_kind != svn_node_none) + { + SVN_ERR(svn_fs_type(&dst_fs_type, dst_path, scratch_pool)); + if (strcmp(src_fs_type, dst_fs_type) != 0) + return svn_error_createf( + SVN_ERR_ILLEGAL_TARGET, NULL, + _("The filesystem type of the hotcopy source " + "('%s') does not match the filesystem " + "type of the hotcopy destination ('%s')"), + src_fs_type, dst_fs_type); + } + } - SVN_ERR(svn_fs_type(&fs_type, src_path, pool)); - SVN_ERR(get_library_vtable(&vtable, fs_type, pool)); - SVN_ERR(vtable->hotcopy(src_path, dest_path, clean, pool)); - return svn_error_trace(write_fs_type(dest_path, fs_type, pool)); + SVN_ERR(vtable->hotcopy(src_fs, dst_fs, src_path, dst_path, clean, + incremental, cancel_func, cancel_baton, + scratch_pool)); + return svn_error_trace(write_fs_type(dst_path, src_fs_type, scratch_pool)); +} + +svn_error_t * +svn_fs_hotcopy(const char *src_path, const char *dest_path, + svn_boolean_t clean, apr_pool_t *pool) +{ + return svn_error_trace(svn_fs_hotcopy2(src_path, dest_path, clean, + FALSE, NULL, NULL, pool)); } svn_error_t * Modified: subversion/branches/showing-merge-info/subversion/libsvn_fs/fs-loader.h URL: http://svn.apache.org/viewvc/subversion/branches/showing-merge-info/subversion/libsvn_fs/fs-loader.h?rev=1231975&r1=1231974&r2=1231975&view=diff ============================================================================== --- subversion/branches/showing-merge-info/subversion/libsvn_fs/fs-loader.h (original) +++ subversion/branches/showing-merge-info/subversion/libsvn_fs/fs-loader.h Mon Jan 16 12:32:43 2012 @@ -93,8 +93,11 @@ typedef struct fs_library_vtable_t apr_pool_t *pool, apr_pool_t *common_pool); svn_error_t *(*delete_fs)(const char *path, apr_pool_t *pool); - svn_error_t *(*hotcopy)(const char *src_path, const char *dest_path, - svn_boolean_t clean, apr_pool_t *pool); + svn_error_t *(*hotcopy)(svn_fs_t *src_fs, svn_fs_t *dst_fs, + const char *src_path, const char *dst_path, + svn_boolean_t clean, svn_boolean_t incremental, + svn_cancel_func_t cancel_func, void *cancel_baton, + apr_pool_t *pool); const char *(*get_description)(void); svn_error_t *(*recover)(svn_fs_t *fs, svn_cancel_func_t cancel_func, void *cancel_baton, Modified: subversion/branches/showing-merge-info/subversion/libsvn_fs_base/bdb/env.c URL: http://svn.apache.org/viewvc/subversion/branches/showing-merge-info/subversion/libsvn_fs_base/bdb/env.c?rev=1231975&r1=1231974&r2=1231975&view=diff ============================================================================== --- subversion/branches/showing-merge-info/subversion/libsvn_fs_base/bdb/env.c (original) +++ subversion/branches/showing-merge-info/subversion/libsvn_fs_base/bdb/env.c Mon Jan 16 12:32:43 2012 @@ -226,7 +226,7 @@ bdb_error_gatherer(const DB_ENV *dbenv, SVN_BDB_ERROR_GATHERER_IGNORE(dbenv); - new_err = svn_error_createf(SVN_NO_ERROR, NULL, "bdb: %s", msg); + new_err = svn_error_createf(APR_SUCCESS, NULL, "bdb: %s", msg); if (error_info->pending_errors) svn_error_compose(error_info->pending_errors, new_err); else Modified: subversion/branches/showing-merge-info/subversion/libsvn_fs_base/fs.c URL: http://svn.apache.org/viewvc/subversion/branches/showing-merge-info/subversion/libsvn_fs_base/fs.c?rev=1231975&r1=1231974&r2=1231975&view=diff ============================================================================== --- subversion/branches/showing-merge-info/subversion/libsvn_fs_base/fs.c (original) +++ subversion/branches/showing-merge-info/subversion/libsvn_fs_base/fs.c Mon Jan 16 12:32:43 2012 @@ -1165,9 +1165,14 @@ copy_db_file_safely(const char *src_dir, static svn_error_t * -base_hotcopy(const char *src_path, +base_hotcopy(svn_fs_t *src_fs, + svn_fs_t *dst_fs, + const char *src_path, const char *dest_path, svn_boolean_t clean_logs, + svn_boolean_t incremental, + svn_cancel_func_t cancel_func, + void *cancel_baton, apr_pool_t *pool) { svn_error_t *err; @@ -1175,6 +1180,11 @@ base_hotcopy(const char *src_path, svn_boolean_t log_autoremove = FALSE; int format; + if (incremental) + return svn_error_createf(SVN_ERR_UNSUPPORTED_FEATURE, NULL, + _("BDB repositories do not support incremental " + "hotcopy")); + /* Check the FS format number to be certain that we know how to hotcopy this FS. Pre-1.2 filesystems did not have a format file (you could say they were format "0"), so we will error here. This is not Modified: subversion/branches/showing-merge-info/subversion/libsvn_fs_base/id.c URL: http://svn.apache.org/viewvc/subversion/branches/showing-merge-info/subversion/libsvn_fs_base/id.c?rev=1231975&r1=1231974&r2=1231975&view=diff ============================================================================== --- subversion/branches/showing-merge-info/subversion/libsvn_fs_base/id.c (original) +++ subversion/branches/showing-merge-info/subversion/libsvn_fs_base/id.c Mon Jan 16 12:32:43 2012 @@ -168,7 +168,7 @@ svn_fs_base__id_parse(const char *data, { svn_fs_id_t *id; id_private_t *pvt; - char *data_copy, *str, *last_str; + char *data_copy, *str; /* Dup the ID data into POOL. Our returned ID will have references into this memory. */ @@ -181,24 +181,25 @@ svn_fs_base__id_parse(const char *data, id->fsap_data = pvt; /* Now, we basically just need to "split" this data on `.' - characters. We will use apr_strtok, which will put terminators - where each of the '.'s used to be. Then our new id field will - reference string locations inside our duplicate string.*/ + characters. We will use svn_cstring_tokenize, which will put + terminators where each of the '.'s used to be. Then our new + id field will reference string locations inside our duplicate + string.*/ /* Node Id */ - str = apr_strtok(data_copy, ".", &last_str); + str = svn_cstring_tokenize(".", &data_copy); if (str == NULL) return NULL; pvt->node_id = str; /* Copy Id */ - str = apr_strtok(NULL, ".", &last_str); + str = svn_cstring_tokenize(".", &data_copy); if (str == NULL) return NULL; pvt->copy_id = str; /* Txn Id */ - str = apr_strtok(NULL, ".", &last_str); + str = svn_cstring_tokenize(".", &data_copy); if (str == NULL) return NULL; pvt->txn_id = str; Modified: subversion/branches/showing-merge-info/subversion/libsvn_fs_fs/caching.c URL: http://svn.apache.org/viewvc/subversion/branches/showing-merge-info/subversion/libsvn_fs_fs/caching.c?rev=1231975&r1=1231974&r2=1231975&view=diff ============================================================================== --- subversion/branches/showing-merge-info/subversion/libsvn_fs_fs/caching.c (original) +++ subversion/branches/showing-merge-info/subversion/libsvn_fs_fs/caching.c Mon Jan 16 12:32:43 2012 @@ -219,7 +219,7 @@ create_cache(svn_cache__t **cache_p, { SVN_ERR(svn_cache__create_membuffer_cache( cache_p, membuffer, serializer, deserializer, - klen, prefix, pool)); + klen, prefix, FALSE, pool)); } else if (pages) { Modified: subversion/branches/showing-merge-info/subversion/libsvn_fs_fs/fs.c URL: http://svn.apache.org/viewvc/subversion/branches/showing-merge-info/subversion/libsvn_fs_fs/fs.c?rev=1231975&r1=1231974&r2=1231975&view=diff ============================================================================== --- subversion/branches/showing-merge-info/subversion/libsvn_fs_fs/fs.c (original) +++ subversion/branches/showing-merge-info/subversion/libsvn_fs_fs/fs.c Mon Jan 16 12:32:43 2012 @@ -276,16 +276,28 @@ fs_pack(svn_fs_t *fs, /* This implements the fs_library_vtable_t.hotcopy() API. Copy a - possibly live Subversion filesystem from SRC_PATH to DEST_PATH. + possibly live Subversion filesystem SRC_FS from SRC_PATH to a + DST_FS at DEST_PATH. If INCREMENTAL is TRUE, make an effort not to + re-copy data which already exists in DST_FS. The CLEAN_LOGS argument is ignored and included for Subversion 1.0.x compatibility. Perform all temporary allocations in POOL. */ static svn_error_t * -fs_hotcopy(const char *src_path, - const char *dest_path, +fs_hotcopy(svn_fs_t *src_fs, + svn_fs_t *dst_fs, + const char *src_path, + const char *dst_path, svn_boolean_t clean_logs, + svn_boolean_t incremental, + svn_cancel_func_t cancel_func, + void *cancel_baton, apr_pool_t *pool) { - return svn_fs_fs__hotcopy(src_path, dest_path, pool); + SVN_ERR(initialize_fs_struct(src_fs)); + SVN_ERR(fs_serialized_init(src_fs, pool, pool)); + SVN_ERR(initialize_fs_struct(dst_fs)); + SVN_ERR(fs_serialized_init(dst_fs, pool, pool)); + return svn_fs_fs__hotcopy(src_fs, dst_fs, src_path, dst_path, + incremental, cancel_func, cancel_baton, pool); }
