Author: stefan2
Date: Sun Dec 2 22:59:13 2012
New Revision: 1416283
URL: http://svn.apache.org/viewvc?rev=1416283&view=rev
Log:
Finish documentation of fsfs-reorg tool.
Also, simplify code as we go.
* tools/server-side/fsfs-reorg.c
(get_fragment_content,
update_noderevs,
get_content_length,
move_fragment,
pack_revisions,
write_revisions,
pack_and_write_revisions,
get_updated_dir,
update_id,
update_text): add docstrings and comments
(diff_write_baton_t,
diff_write_handler): drop
(diff_stringbufs): update caller; add docstring and comments
Modified:
subversion/trunk/tools/server-side/fsfs-reorg.c
Modified: subversion/trunk/tools/server-side/fsfs-reorg.c
URL:
http://svn.apache.org/viewvc/subversion/trunk/tools/server-side/fsfs-reorg.c?rev=1416283&r1=1416282&r2=1416283&view=diff
==============================================================================
--- subversion/trunk/tools/server-side/fsfs-reorg.c (original)
+++ subversion/trunk/tools/server-side/fsfs-reorg.c Sun Dec 2 22:59:13 2012
@@ -2282,6 +2282,13 @@ get_fragment_content(svn_string_t **cont
fragment_t *fragment,
apr_pool_t *pool);
+/* Directory content may change and with it, the deltified representations
+ * may significantly. This function causes all directory target reps in
+ * PACK of FS to be built and their new MD5 as well as rep sizes be updated.
+ * We must do that before attempting to write noderevs.
+ *
+ * Use POOL for allocations.
+ */
static svn_error_t *
update_noderevs(fs_fs_t *fs,
revision_pack_t *pack,
@@ -2297,6 +2304,8 @@ update_noderevs(fs_fs_t *fs,
{
svn_string_t *content;
+ /* request updated rep content but ignore the result.
+ * We are only interested in the MD5, content and rep size updates.
*/
SVN_ERR(get_fragment_content(&content, fs, fragment, itempool));
svn_pool_clear(itempool);
}
@@ -2307,6 +2316,11 @@ update_noderevs(fs_fs_t *fs,
return SVN_NO_ERROR;
}
+/* Determine the target size of the FRAGMENT in FS and return the value
+ * in *LENGTH. If ADD_PADDING has been set, slightly fudge the numbers
+ * to account for changes in offset lengths etc. Use POOL for temporary
+ * allocations.
+ */
static svn_error_t *
get_content_length(apr_size_t *length,
fs_fs_t *fs,
@@ -2336,6 +2350,9 @@ get_content_length(apr_size_t *length,
return SVN_NO_ERROR;
}
+/* Move the FRAGMENT to global file offset NEW_POSITION. Update the target
+ * location info of the underlying object as well.
+ */
static void
move_fragment(fragment_t *fragment,
apr_size_t new_position)
@@ -2343,9 +2360,11 @@ move_fragment(fragment_t *fragment,
revision_info_t *info;
representation_t *representation;
noderev_t *node;
-
+
+ /* move the fragment */
fragment->position = new_position;
+ /* move the underlying object */
switch (fragment->kind)
{
case header_fragment:
@@ -2372,6 +2391,10 @@ move_fragment(fragment_t *fragment,
}
}
+/* Move the fragments in PACK's target fragment list to their final offsets.
+ * This may require several iterations if the fudge factors turned out to
+ * be insufficient. Use POOL for allocations.
+ */
static svn_error_t *
pack_revisions(fs_fs_t *fs,
revision_pack_t *pack,
@@ -2385,8 +2408,13 @@ pack_revisions(fs_fs_t *fs,
apr_pool_t *itempool = svn_pool_create(pool);
+ /* update all directory reps. Chances are that most of the target rep
+ * sizes are now close to accurate. */
SVN_ERR(update_noderevs(fs, pack, pool));
+ /* compression phase: pack all fragments tightly with only a very small
+ * fudge factor. This should cause offsets to shrink, thus all the
+ * actual fragment rate should tend to be even smaller afterwards. */
current_pos = pack->info->nelts > 1 ? 64 : 0;
for (i = 0; i + 1 < pack->fragments->nelts; ++i)
{
@@ -2398,9 +2426,15 @@ pack_revisions(fs_fs_t *fs,
svn_pool_clear(itempool);
}
+ /* don't forget the final fragment (last revision's revision header) */
fragment = &APR_ARRAY_IDX(pack->fragments, pack->fragments->nelts-1,
fragment_t);
fragment->position = current_pos;
+ /* expansion phase: check whether all fragments fit into their allotted
+ * slots. Grow them geometrically if they don't fit. Retry until they
+ * all do fit.
+ * Note: there is an upper limit to which fragments can grow. So, this
+ * loop will terminate. Often, no expansion will be necessary at all. */
do
{
needed_to_expand = FALSE;
@@ -2437,6 +2471,8 @@ pack_revisions(fs_fs_t *fs,
fragment = &APR_ARRAY_IDX(pack->fragments, pack->fragments->nelts-1,
fragment_t);
fragment->position = current_pos;
+ /* update the revision
+ * sizes (they all end at the end of the pack file now) */
SVN_ERR(get_content_length(&len, fs, fragment, FALSE, itempool));
current_pos += len;
@@ -2453,6 +2489,8 @@ pack_revisions(fs_fs_t *fs,
return SVN_NO_ERROR;
}
+/* Write reorg'ed target content for PACK in FS. Use POOL for allocations.
+ */
static svn_error_t *
write_revisions(fs_fs_t *fs,
revision_pack_t *pack,
@@ -2469,6 +2507,7 @@ write_revisions(fs_fs_t *fs,
apr_size_t current_pos = 0;
svn_stringbuf_t *null_buffer = svn_stringbuf_create_empty(iterpool);
+ /* create the target file */
const char *dir = apr_psprintf(iterpool, "%s/new/%ld%s",
fs->path, pack->base / fs->max_files_per_dir,
pack->info->nelts > 1 ? ".pack" : "");
@@ -2481,38 +2520,46 @@ write_revisions(fs_fs_t *fs,
APR_OS_DEFAULT,
iterpool));
+ /* write all fragments */
for (i = 0; i < pack->fragments->nelts; ++i)
{
apr_size_t padding;
+
+ /* get fragment content to write */
fragment = &APR_ARRAY_IDX(pack->fragments, i, fragment_t);
SVN_ERR(get_fragment_content(&content, fs, fragment, itempool));
-
SVN_ERR_ASSERT(fragment->position >= current_pos);
+
+ /* number of bytes between this and the previous fragment */
if ( fragment->kind == header_fragment
&& i+1 < pack->fragments->nelts)
+ /* special case: header fragments are aligned to the slot end */
padding = APR_ARRAY_IDX(pack->fragments, i+1, fragment_t).position -
content->len - current_pos;
else
+ /* standard case: fragments are aligned to the slot start */
padding = fragment->position - current_pos;
+ /* write padding between fragments */
if (padding)
{
while (null_buffer->len < padding)
svn_stringbuf_appendbyte(null_buffer, 0);
SVN_ERR(svn_io_file_write_full(file,
- null_buffer->data,
- padding,
- NULL,
- itempool));
+ null_buffer->data,
+ padding,
+ NULL,
+ itempool));
current_pos += padding;
}
+ /* write fragment content */
SVN_ERR(svn_io_file_write_full(file,
- content->data,
- content->len,
- NULL,
- itempool));
+ content->data,
+ content->len,
+ NULL,
+ itempool));
current_pos += content->len;
svn_pool_clear(itempool);
@@ -2520,6 +2567,7 @@ write_revisions(fs_fs_t *fs,
apr_file_close(file);
+ /* write new manifest file */
if (pack->info->nelts > 1)
{
svn_stream_t *stream;
@@ -2541,12 +2589,17 @@ write_revisions(fs_fs_t *fs,
}
}
+ /* cleanup */
svn_pool_destroy(itempool);
svn_pool_destroy(iterpool);
return SVN_NO_ERROR;
}
+/* Write reorg'ed target content for all revisions in FS. To maximize
+ * data locality, pack and write in one go per pack file.
+ * Use POOL for allocations.
+ */
static svn_error_t *
pack_and_write_revisions(fs_fs_t *fs,
apr_pool_t *pool)
@@ -2570,6 +2623,10 @@ pack_and_write_revisions(fs_fs_t *fs,
return SVN_NO_ERROR;
}
+/* For the directory REPRESENTATION in FS, construct the new (target)
+ * serialized plaintext representation and return it in *CONTENT.
+ * Allocate the result in POOL and temporaries in SCRATCH_POOL.
+ */
static svn_error_t *
get_updated_dir(svn_string_t **content,
fs_fs_t *fs,
@@ -2583,14 +2640,19 @@ get_updated_dir(svn_string_t **content,
int i;
svn_stream_t *stream;
svn_stringbuf_t *result;
-
+
+ /* get the original content */
SVN_ERR(read_dir(&hash, fs, representation, scratch_pool));
hash = apr_hash_copy(hash_pool, hash);
+
+ /* update all entries */
for (i = 0; i < dir->nelts; ++i)
{
char buffer[256];
svn_string_t *new_val;
apr_size_t pos;
+
+ /* find the original entry for for the current name */
direntry_t *entry = APR_ARRAY_IDX(dir, i, direntry_t *);
svn_string_t *str_val = apr_hash_get(hash, entry->name, entry->name_len);
if (str_val == NULL)
@@ -2598,54 +2660,40 @@ get_updated_dir(svn_string_t **content,
_("Dir entry '%s' not found"), entry->name);
SVN_ERR_ASSERT(str_val->len < sizeof(buffer));
-
+
+ /* create and updated node ID */
memcpy(buffer, str_val->data, str_val->len+1);
pos = strchr(buffer, '/') - buffer + 1;
pos += svn__ui64toa(buffer + pos, entry->node->target.offset -
entry->node->revision->target.offset);
new_val = svn_string_ncreate(buffer, pos, hash_pool);
+ /* store it in the hash */
apr_hash_set(hash, entry->name, entry->name_len, new_val);
}
+ /* serialize the updated hash */
result = svn_stringbuf_create_ensure(representation->target.size, pool);
stream = svn_stream_from_stringbuf(result, hash_pool);
SVN_ERR(svn_hash_write2(hash, stream, SVN_HASH_TERMINATOR, hash_pool));
svn_pool_destroy(hash_pool);
+ /* done */
*content = svn_stringbuf__morph_into_string(result);
return SVN_NO_ERROR;
}
-struct diff_write_baton_t
-{
- svn_stream_t *stream;
- apr_size_t size;
-};
-
-static svn_error_t *
-diff_write_handler(void *baton,
- const char *data,
- apr_size_t *len)
-{
- struct diff_write_baton_t *whb = baton;
-
- SVN_ERR(svn_stream_write(whb->stream, data, len));
- whb->size += *len;
-
- return SVN_NO_ERROR;
-}
-
+/* Calculate the delta representation for the given CONTENT and BASE.
+ * Return the rep in *DIFF. Use POOL for allocations.
+ */
static svn_error_t *
diff_stringbufs(svn_stringbuf_t *diff,
- apr_size_t *inflated_size,
svn_string_t *base,
svn_string_t *content,
apr_pool_t *pool)
{
svn_txdelta_window_handler_t diff_wh;
void *diff_whb;
- struct diff_write_baton_t whb;
svn_stream_t *stream;
svn_stream_t *source = svn_stream_from_string(base, pool);
@@ -2659,20 +2707,20 @@ diff_stringbufs(svn_stringbuf_t *diff,
SVN_DELTA_COMPRESSION_LEVEL_DEFAULT,
pool);
- whb.stream = svn_txdelta_target_push(diff_wh, diff_whb, source, pool);
- whb.size = 0;
-
- stream = svn_stream_create(&whb, pool);
- svn_stream_set_write(stream, diff_write_handler);
+ /* create delta stream */
+ stream = svn_txdelta_target_push(diff_wh, diff_whb, source, pool);
+ /* run delta */
SVN_ERR(svn_stream_write(stream, content->data, &content->len));
- SVN_ERR(svn_stream_close(whb.stream));
SVN_ERR(svn_stream_close(stream));
- *inflated_size = whb.size;
return SVN_NO_ERROR;
}
+/* Update the noderev id value for KEY in the textual noderev representation
+ * in NODE_REV. Take the new id from NODE. This is a no-op if the KEY
+ * cannot be found.
+ */
static void
update_id(svn_stringbuf_t *node_rev,
const char *key,
@@ -2681,6 +2729,7 @@ update_id(svn_stringbuf_t *node_rev,
char *newline_pos = 0;
char *pos;
+ /* we need to update the offset only -> find its position */
pos = strstr(node_rev->data, key);
if (pos)
pos = strchr(pos, '/');
@@ -2689,6 +2738,7 @@ update_id(svn_stringbuf_t *node_rev,
if (pos && newline_pos)
{
+ /* offset data has been found -> replace it */
char temp[SVN_INT64_BUFFER_SIZE];
apr_size_t len = svn__i64toa(temp, node->target.offset -
node->revision->target.offset);
svn_stringbuf_replace(node_rev,
@@ -2697,6 +2747,11 @@ update_id(svn_stringbuf_t *node_rev,
}
}
+/* Update the representation id value for KEY in the textual noderev
+ * representation in NODE_REV. Take the offset, sizes and new MD5 from
+ * REPRESENTATION. Use SCRATCH_POOL for allocations.
+ * This is a no-op if the KEY cannot be found.
+ */
static void
update_text(svn_stringbuf_t *node_rev,
const char *key,
@@ -2713,6 +2768,7 @@ update_text(svn_stringbuf_t *node_rev,
val_pos = pos + key_len;
if (representation->dir)
{
+ /* for directories, we need to write all rep info anew */
char *newline_pos = strchr(val_pos, '\n');
svn_checksum_t checksum;
const char* temp = apr_psprintf(scratch_pool, "%ld %" APR_SIZE_T_FMT "
%"
@@ -2732,6 +2788,8 @@ update_text(svn_stringbuf_t *node_rev,
}
else
{
+ /* ordinary representation: replace offset and rep size only.
+ * Content size and checksums are unchanged. */
const char* temp;
char *end_pos = strchr(val_pos, ' ');
@@ -2747,6 +2805,13 @@ update_text(svn_stringbuf_t *node_rev,
}
}
+/* Get the target content (data block as to be written to the file) for
+ * the given FRAGMENT in FS. Return the content in *CONTENT. Use POOL
+ * for allocations.
+ *
+ * Note that, as a side-effect, this will update the target rep. info for
+ * directories.
+ */
static svn_error_t *
get_fragment_content(svn_string_t **content,
fs_fs_t *fs,
@@ -2763,6 +2828,7 @@ get_fragment_content(svn_string_t **cont
switch (fragment->kind)
{
+ /* revision headers can be constructed from target position info */
case header_fragment:
info = fragment->data;
*content = svn_string_createf(pool,
@@ -2771,6 +2837,7 @@ get_fragment_content(svn_string_t **cont
info->target.changes);
return SVN_NO_ERROR;
+ /* The changes list remains untouched */
case changes_fragment:
info = fragment->data;
SVN_ERR(get_content(&revision_content, fs, info->revision, pool));
@@ -2780,6 +2847,9 @@ get_fragment_content(svn_string_t **cont
(*content)->len = info->target.changes_len;
return SVN_NO_ERROR;
+ /* property and file reps get new headers any need to be rewritten,
+ * iff the base rep is a directory. The actual (deltified) content
+ * remains unchanged, though. MD5 etc. do not change. */
case property_fragment:
case file_fragment:
representation = fragment->data;
@@ -2789,6 +2859,8 @@ get_fragment_content(svn_string_t **cont
if (representation->delta_base)
if (representation->delta_base->dir)
{
+ /* if the base happens to be a directory, reconstruct the
+ * full text and represent it as PLAIN rep. */
SVN_ERR(get_combined_window(&text, fs, representation, pool));
representation->target.size = text->len;
@@ -2799,6 +2871,7 @@ get_fragment_content(svn_string_t **cont
return SVN_NO_ERROR;
}
else
+ /* construct a new rep header */
if (representation->delta_base == fs->null_base)
header = svn_stringbuf_create("DELTA\n", pool);
else
@@ -2811,6 +2884,8 @@ get_fragment_content(svn_string_t **cont
else
header = svn_stringbuf_create("PLAIN\n", pool);
+ /* if it exists, the actual delta base is unchanged. Hence, this
+ * rep is unchanged even if it has been deltified. */
header_size = strchr(revision_content->data +
representation->original.offset, '\n') -
revision_content->data -
@@ -2824,7 +2899,10 @@ get_fragment_content(svn_string_t **cont
*content = svn_stringbuf__morph_into_string(header);
return SVN_NO_ERROR;
+ /* directory reps need to be rewritten (and deltified) completely.
+ * As a side-effect, update the MD5 and target content size. */
case dir_fragment:
+ /* construct new content and update MD5 */
representation = fragment->data;
SVN_ERR(get_updated_dir(&revision_content, fs, representation,
pool, pool));
@@ -2835,15 +2913,18 @@ get_fragment_content(svn_string_t **cont
checksum->digest,
sizeof(representation->dir->target_md5));
+ /* deltify against the base rep if necessary */
if (representation->delta_base)
{
if (representation->delta_base->dir == NULL)
{
+ /* dummy or non-dir base rep -> self-compress only */
header = svn_stringbuf_create("DELTA\n", pool);
base_content = svn_string_create_empty(pool);
}
else
{
+ /* deltify against base rep (which is a directory, too)*/
representation_t *base_rep = representation->delta_base;
header = svn_stringbuf_createf(pool,
"DELTA %ld %" APR_SIZE_T_FMT "
%" APR_SIZE_T_FMT "\n",
@@ -2854,16 +2935,18 @@ get_fragment_content(svn_string_t **cont
pool, pool));
}
+ /* run deltification and update target content size */
header_size = header->len;
- SVN_ERR(diff_stringbufs(header, &representation->dir->size,
- base_content,
+ SVN_ERR(diff_stringbufs(header, base_content,
revision_content, pool));
+ representation->dir->size = revision_content->len;
representation->target.size = header->len - header_size;
svn_stringbuf_appendcstr(header, "ENDREP\n");
*content = svn_stringbuf__morph_into_string(header);
}
else
{
+ /* no delta base (not even a dummy) -> PLAIN rep */
representation->target.size = revision_content->len;
representation->dir->size = revision_content->len;
*content = svn_string_createf(pool, "PLAIN\n%sENDREP\n",
@@ -2872,7 +2955,9 @@ get_fragment_content(svn_string_t **cont
return SVN_NO_ERROR;
+ /* construct the new noderev content. No side-effects.*/
case noderev_fragment:
+ /* get the original noderev as string */
node = fragment->data;
SVN_ERR(get_content(&revision_content, fs,
node->revision->revision, pool));
@@ -2881,6 +2966,7 @@ get_fragment_content(svn_string_t **cont
node->original.size,
pool);
+ /* update the values that may have hanged for target */
update_id(node_rev, "id: ", node);
update_id(node_rev, "pred: ", node->predecessor);
update_text(node_rev, "text: ", node->text, pool);