Author: stefan2
Date: Sun Dec  2 22:59:13 2012
New Revision: 1416283

URL: http://svn.apache.org/viewvc?rev=1416283&view=rev
Log:
Finish documentation of fsfs-reorg tool.
Also, simplify code as we go.

* tools/server-side/fsfs-reorg.c
  (get_fragment_content,
   update_noderevs,
   get_content_length,
   move_fragment,
   pack_revisions,
   write_revisions,
   pack_and_write_revisions,
   get_updated_dir,
   update_id,
   update_text): add docstrings and comments
  (diff_write_baton_t,
   diff_write_handler): drop
  (diff_stringbufs): update caller; add docstring and comments

Modified:
    subversion/trunk/tools/server-side/fsfs-reorg.c

Modified: subversion/trunk/tools/server-side/fsfs-reorg.c
URL: 
http://svn.apache.org/viewvc/subversion/trunk/tools/server-side/fsfs-reorg.c?rev=1416283&r1=1416282&r2=1416283&view=diff
==============================================================================
--- subversion/trunk/tools/server-side/fsfs-reorg.c (original)
+++ subversion/trunk/tools/server-side/fsfs-reorg.c Sun Dec  2 22:59:13 2012
@@ -2282,6 +2282,13 @@ get_fragment_content(svn_string_t **cont
                      fragment_t *fragment,
                      apr_pool_t *pool);
 
+/* Directory content may change and with it, the deltified representations
+ * may significantly.  This function causes all directory target reps in
+ * PACK of FS to be built and their new MD5 as well as rep sizes be updated.
+ * We must do that before attempting to write noderevs.
+ * 
+ * Use POOL for allocations.
+ */
 static svn_error_t *
 update_noderevs(fs_fs_t *fs,
                 revision_pack_t *pack,
@@ -2297,6 +2304,8 @@ update_noderevs(fs_fs_t *fs,
         {
           svn_string_t *content;
 
+          /* request updated rep content but ignore the result.
+           * We are only interested in the MD5, content and rep size updates. 
*/
           SVN_ERR(get_fragment_content(&content, fs, fragment, itempool));
           svn_pool_clear(itempool);
         }
@@ -2307,6 +2316,11 @@ update_noderevs(fs_fs_t *fs,
   return SVN_NO_ERROR;
 }
 
+/* Determine the target size of the FRAGMENT in FS and return the value
+ * in *LENGTH.  If ADD_PADDING has been set, slightly fudge the numbers
+ * to account for changes in offset lengths etc.  Use POOL for temporary
+ * allocations.
+ */
 static svn_error_t *
 get_content_length(apr_size_t *length,
                    fs_fs_t *fs,
@@ -2336,6 +2350,9 @@ get_content_length(apr_size_t *length,
   return SVN_NO_ERROR;
 }
 
+/* Move the FRAGMENT to global file offset NEW_POSITION.  Update the target
+ * location info of the underlying object as well.
+ */
 static void
 move_fragment(fragment_t *fragment,
               apr_size_t new_position)
@@ -2343,9 +2360,11 @@ move_fragment(fragment_t *fragment,
   revision_info_t *info;
   representation_t *representation;
   noderev_t *node;
-  
+
+  /* move the fragment */
   fragment->position = new_position; 
 
+  /* move the underlying object */
   switch (fragment->kind)
     {
       case header_fragment:
@@ -2372,6 +2391,10 @@ move_fragment(fragment_t *fragment,
     }
 }
 
+/* Move the fragments in PACK's target fragment list to their final offsets.
+ * This may require several iterations if the fudge factors turned out to
+ * be insufficient.  Use POOL for allocations.
+ */
 static svn_error_t *
 pack_revisions(fs_fs_t *fs,
                revision_pack_t *pack,
@@ -2385,8 +2408,13 @@ pack_revisions(fs_fs_t *fs,
 
   apr_pool_t *itempool = svn_pool_create(pool);
 
+  /* update all directory reps. Chances are that most of the target rep
+   * sizes are now close to accurate. */
   SVN_ERR(update_noderevs(fs, pack, pool));
 
+  /* compression phase: pack all fragments tightly with only a very small
+   * fudge factor.  This should cause offsets to shrink, thus all the
+   * actual fragment rate should tend to be even smaller afterwards. */
   current_pos = pack->info->nelts > 1 ? 64 : 0;
   for (i = 0; i + 1 < pack->fragments->nelts; ++i)
     {
@@ -2398,9 +2426,15 @@ pack_revisions(fs_fs_t *fs,
       svn_pool_clear(itempool);
     }
 
+  /* don't forget the final fragment (last revision's revision header) */
   fragment = &APR_ARRAY_IDX(pack->fragments, pack->fragments->nelts-1, 
fragment_t);
   fragment->position = current_pos;
 
+  /* expansion phase: check whether all fragments fit into their allotted
+   * slots.  Grow them geometrically if they don't fit.  Retry until they
+   * all do fit.
+   * Note: there is an upper limit to which fragments can grow.  So, this
+   * loop will terminate.  Often, no expansion will be necessary at all. */
   do
     {
       needed_to_expand = FALSE;
@@ -2437,6 +2471,8 @@ pack_revisions(fs_fs_t *fs,
       fragment = &APR_ARRAY_IDX(pack->fragments, pack->fragments->nelts-1, 
fragment_t);
       fragment->position = current_pos;
 
+      /* update the revision
+       * sizes (they all end at the end of the pack file now) */
       SVN_ERR(get_content_length(&len, fs, fragment, FALSE, itempool));
       current_pos += len;
 
@@ -2453,6 +2489,8 @@ pack_revisions(fs_fs_t *fs,
   return SVN_NO_ERROR;
 }
 
+/* Write reorg'ed target content for PACK in FS.  Use POOL for allocations.
+ */
 static svn_error_t *
 write_revisions(fs_fs_t *fs,
                 revision_pack_t *pack,
@@ -2469,6 +2507,7 @@ write_revisions(fs_fs_t *fs,
   apr_size_t current_pos = 0;
   svn_stringbuf_t *null_buffer = svn_stringbuf_create_empty(iterpool);
 
+  /* create the target file */
   const char *dir = apr_psprintf(iterpool, "%s/new/%ld%s",
                                   fs->path, pack->base / fs->max_files_per_dir,
                                   pack->info->nelts > 1 ? ".pack" : "");
@@ -2481,38 +2520,46 @@ write_revisions(fs_fs_t *fs,
                             APR_OS_DEFAULT,
                             iterpool));
 
+  /* write all fragments */
   for (i = 0; i < pack->fragments->nelts; ++i)
     {
       apr_size_t padding;
+
+      /* get fragment content to write */
       fragment = &APR_ARRAY_IDX(pack->fragments, i, fragment_t);
       SVN_ERR(get_fragment_content(&content, fs, fragment, itempool));
-
       SVN_ERR_ASSERT(fragment->position >= current_pos);
+
+      /* number of bytes between this and the previous fragment */
       if (   fragment->kind == header_fragment
           && i+1 < pack->fragments->nelts)
+        /* special case: header fragments are aligned to the slot end */
         padding = APR_ARRAY_IDX(pack->fragments, i+1, fragment_t).position -
                   content->len - current_pos;
       else
+        /* standard case: fragments are aligned to the slot start */
         padding = fragment->position - current_pos;
 
+      /* write padding between fragments */
       if (padding)
         {
           while (null_buffer->len < padding)
             svn_stringbuf_appendbyte(null_buffer, 0);
 
           SVN_ERR(svn_io_file_write_full(file,
-                                          null_buffer->data,
-                                          padding,
-                                          NULL,
-                                          itempool));
+                                         null_buffer->data,
+                                         padding,
+                                         NULL,
+                                         itempool));
           current_pos += padding;
         }
 
+      /* write fragment content */
       SVN_ERR(svn_io_file_write_full(file,
-                                      content->data,
-                                      content->len,
-                                      NULL,
-                                      itempool));
+                                     content->data,
+                                     content->len,
+                                     NULL,
+                                     itempool));
       current_pos += content->len;
 
       svn_pool_clear(itempool);
@@ -2520,6 +2567,7 @@ write_revisions(fs_fs_t *fs,
 
   apr_file_close(file);
 
+  /* write new manifest file */
   if (pack->info->nelts > 1)
     {
       svn_stream_t *stream;
@@ -2541,12 +2589,17 @@ write_revisions(fs_fs_t *fs,
         }
     }
 
+  /* cleanup */
   svn_pool_destroy(itempool);
   svn_pool_destroy(iterpool);
 
   return SVN_NO_ERROR;
 }
 
+/* Write reorg'ed target content for all revisions in FS.  To maximize
+ * data locality, pack and write in one go per pack file.
+ * Use POOL for allocations.
+ */
 static svn_error_t *
 pack_and_write_revisions(fs_fs_t *fs,
                          apr_pool_t *pool)
@@ -2570,6 +2623,10 @@ pack_and_write_revisions(fs_fs_t *fs,
   return SVN_NO_ERROR;
 }
 
+/* For the directory REPRESENTATION in FS, construct the new (target)
+ * serialized plaintext representation and return it in *CONTENT.
+ * Allocate the result in POOL and temporaries in SCRATCH_POOL.
+ */
 static svn_error_t *
 get_updated_dir(svn_string_t **content,
                 fs_fs_t *fs,
@@ -2583,14 +2640,19 @@ get_updated_dir(svn_string_t **content,
   int i;
   svn_stream_t *stream;
   svn_stringbuf_t *result;
-  
+
+  /* get the original content */
   SVN_ERR(read_dir(&hash, fs, representation, scratch_pool));
   hash = apr_hash_copy(hash_pool, hash);
+
+  /* update all entries */
   for (i = 0; i < dir->nelts; ++i)
     {
       char buffer[256];
       svn_string_t *new_val;
       apr_size_t pos;
+
+      /* find the original entry for for the current name */
       direntry_t *entry = APR_ARRAY_IDX(dir, i, direntry_t *);
       svn_string_t *str_val = apr_hash_get(hash, entry->name, entry->name_len);
       if (str_val == NULL)
@@ -2598,54 +2660,40 @@ get_updated_dir(svn_string_t **content,
                                  _("Dir entry '%s' not found"), entry->name);
 
       SVN_ERR_ASSERT(str_val->len < sizeof(buffer));
-      
+
+      /* create and updated node ID */
       memcpy(buffer, str_val->data, str_val->len+1);
       pos = strchr(buffer, '/') - buffer + 1;
       pos += svn__ui64toa(buffer + pos, entry->node->target.offset - 
entry->node->revision->target.offset);
       new_val = svn_string_ncreate(buffer, pos, hash_pool);
 
+      /* store it in the hash */
       apr_hash_set(hash, entry->name, entry->name_len, new_val);
     }
 
+  /* serialize the updated hash */
   result = svn_stringbuf_create_ensure(representation->target.size, pool);
   stream = svn_stream_from_stringbuf(result, hash_pool);
   SVN_ERR(svn_hash_write2(hash, stream, SVN_HASH_TERMINATOR, hash_pool));
   svn_pool_destroy(hash_pool);
 
+  /* done */
   *content = svn_stringbuf__morph_into_string(result);
   
   return SVN_NO_ERROR;
 }
 
-struct diff_write_baton_t
-{
-  svn_stream_t *stream;
-  apr_size_t size;
-};
-
-static svn_error_t *
-diff_write_handler(void *baton,
-                   const char *data,
-                   apr_size_t *len)
-{
-  struct diff_write_baton_t *whb = baton;
-
-  SVN_ERR(svn_stream_write(whb->stream, data, len));
-  whb->size += *len;
-
-  return SVN_NO_ERROR;
-}
-
+/* Calculate the delta representation for the given CONTENT and BASE.
+ * Return the rep in *DIFF.  Use POOL for allocations.
+ */
 static svn_error_t *
 diff_stringbufs(svn_stringbuf_t *diff,
-                apr_size_t *inflated_size,
                 svn_string_t *base,
                 svn_string_t *content,
                 apr_pool_t *pool)
 {
   svn_txdelta_window_handler_t diff_wh;
   void *diff_whb;
-  struct diff_write_baton_t whb;
 
   svn_stream_t *stream;
   svn_stream_t *source = svn_stream_from_string(base, pool);
@@ -2659,20 +2707,20 @@ diff_stringbufs(svn_stringbuf_t *diff,
                           SVN_DELTA_COMPRESSION_LEVEL_DEFAULT,
                           pool);
 
-  whb.stream = svn_txdelta_target_push(diff_wh, diff_whb, source, pool);
-  whb.size = 0;
-
-  stream = svn_stream_create(&whb, pool);
-  svn_stream_set_write(stream, diff_write_handler);
+  /* create delta stream */
+  stream = svn_txdelta_target_push(diff_wh, diff_whb, source, pool);
 
+  /* run delta */
   SVN_ERR(svn_stream_write(stream, content->data, &content->len));
-  SVN_ERR(svn_stream_close(whb.stream));
   SVN_ERR(svn_stream_close(stream));
 
-  *inflated_size = whb.size;
   return SVN_NO_ERROR;
 }
 
+/* Update the noderev id value for KEY in the textual noderev representation
+ * in NODE_REV.  Take the new id from NODE.  This is a no-op if the KEY
+ * cannot be found.
+ */
 static void
 update_id(svn_stringbuf_t *node_rev,
           const char *key,
@@ -2681,6 +2729,7 @@ update_id(svn_stringbuf_t *node_rev,
   char *newline_pos = 0;
   char *pos;
 
+  /* we need to update the offset only -> find its position */
   pos = strstr(node_rev->data, key);
   if (pos)
     pos = strchr(pos, '/');
@@ -2689,6 +2738,7 @@ update_id(svn_stringbuf_t *node_rev,
 
   if (pos && newline_pos)
     {
+      /* offset data has been found -> replace it */
       char temp[SVN_INT64_BUFFER_SIZE];
       apr_size_t len = svn__i64toa(temp, node->target.offset - 
node->revision->target.offset);
       svn_stringbuf_replace(node_rev,
@@ -2697,6 +2747,11 @@ update_id(svn_stringbuf_t *node_rev,
     }
 }
 
+/* Update the representation id value for KEY in the textual noderev
+ * representation in NODE_REV.  Take the offset, sizes and new MD5 from
+ * REPRESENTATION.  Use SCRATCH_POOL for allocations.
+ * This is a no-op if the KEY cannot be found.
+ */
 static void
 update_text(svn_stringbuf_t *node_rev,
             const char *key,
@@ -2713,6 +2768,7 @@ update_text(svn_stringbuf_t *node_rev,
   val_pos = pos + key_len;
   if (representation->dir)
     {
+      /* for directories, we need to write all rep info anew */
       char *newline_pos = strchr(val_pos, '\n');
       svn_checksum_t checksum;
       const char* temp = apr_psprintf(scratch_pool, "%ld %" APR_SIZE_T_FMT " 
%" 
@@ -2732,6 +2788,8 @@ update_text(svn_stringbuf_t *node_rev,
     }
   else
     {
+      /* ordinary representation: replace offset and rep size only.
+       * Content size and checksums are unchanged. */
       const char* temp;
       char *end_pos = strchr(val_pos, ' ');
       
@@ -2747,6 +2805,13 @@ update_text(svn_stringbuf_t *node_rev,
     }
 }
 
+/* Get the target content (data block as to be written to the file) for
+ * the given FRAGMENT in FS.  Return the content in *CONTENT.  Use POOL
+ * for allocations.
+ *
+ * Note that, as a side-effect, this will update the target rep. info for
+ * directories.
+ */
 static svn_error_t *
 get_fragment_content(svn_string_t **content,
                      fs_fs_t *fs,
@@ -2763,6 +2828,7 @@ get_fragment_content(svn_string_t **cont
 
   switch (fragment->kind)
     {
+      /* revision headers can be constructed from target position info */
       case header_fragment:
         info = fragment->data;
         *content = svn_string_createf(pool,
@@ -2771,6 +2837,7 @@ get_fragment_content(svn_string_t **cont
                                       info->target.changes);
         return SVN_NO_ERROR;
 
+      /* The changes list remains untouched */
       case changes_fragment:
         info = fragment->data;
         SVN_ERR(get_content(&revision_content, fs, info->revision, pool));
@@ -2780,6 +2847,9 @@ get_fragment_content(svn_string_t **cont
         (*content)->len = info->target.changes_len;
         return SVN_NO_ERROR;
 
+      /* property and file reps get new headers any need to be rewritten,
+       * iff the base rep is a directory.  The actual (deltified) content
+       * remains unchanged, though.  MD5 etc. do not change. */
       case property_fragment:
       case file_fragment:
         representation = fragment->data;
@@ -2789,6 +2859,8 @@ get_fragment_content(svn_string_t **cont
         if (representation->delta_base)
           if (representation->delta_base->dir)
             {
+              /* if the base happens to be a directory, reconstruct the
+               * full text and represent it as PLAIN rep. */
               SVN_ERR(get_combined_window(&text, fs, representation, pool));
               representation->target.size = text->len;
 
@@ -2799,6 +2871,7 @@ get_fragment_content(svn_string_t **cont
               return SVN_NO_ERROR;
             }
           else
+            /* construct a new rep header */
             if (representation->delta_base == fs->null_base)
               header = svn_stringbuf_create("DELTA\n", pool);
             else
@@ -2811,6 +2884,8 @@ get_fragment_content(svn_string_t **cont
         else
           header = svn_stringbuf_create("PLAIN\n", pool);
 
+        /* if it exists, the actual delta base is unchanged. Hence, this
+         * rep is unchanged even if it has been deltified. */
         header_size = strchr(revision_content->data +
                              representation->original.offset, '\n') -
                       revision_content->data -
@@ -2824,7 +2899,10 @@ get_fragment_content(svn_string_t **cont
         *content = svn_stringbuf__morph_into_string(header);
         return SVN_NO_ERROR;
 
+      /* directory reps need to be rewritten (and deltified) completely.
+       * As a side-effect, update the MD5 and target content size. */
       case dir_fragment:
+        /* construct new content and update MD5 */
         representation = fragment->data;
         SVN_ERR(get_updated_dir(&revision_content, fs, representation,
                                 pool, pool));
@@ -2835,15 +2913,18 @@ get_fragment_content(svn_string_t **cont
                checksum->digest,
                sizeof(representation->dir->target_md5));
 
+        /* deltify against the base rep if necessary */
         if (representation->delta_base)
           {
             if (representation->delta_base->dir == NULL)
               {
+                /* dummy or non-dir base rep -> self-compress only */
                 header = svn_stringbuf_create("DELTA\n", pool);
                 base_content = svn_string_create_empty(pool);
               }
             else
               {
+                /* deltify against base rep (which is a directory, too)*/
                 representation_t *base_rep = representation->delta_base;
                 header = svn_stringbuf_createf(pool,
                                                "DELTA %ld %" APR_SIZE_T_FMT " 
%" APR_SIZE_T_FMT "\n",
@@ -2854,16 +2935,18 @@ get_fragment_content(svn_string_t **cont
                                         pool, pool));
               }
 
+            /* run deltification and update target content size */
             header_size = header->len;
-            SVN_ERR(diff_stringbufs(header, &representation->dir->size,
-                                    base_content,
+            SVN_ERR(diff_stringbufs(header, base_content,
                                     revision_content, pool));
+            representation->dir->size = revision_content->len;
             representation->target.size = header->len - header_size;
             svn_stringbuf_appendcstr(header, "ENDREP\n");
             *content = svn_stringbuf__morph_into_string(header);
           }
         else
           {
+            /* no delta base (not even a dummy) -> PLAIN rep */
             representation->target.size = revision_content->len;
             representation->dir->size = revision_content->len;
             *content = svn_string_createf(pool, "PLAIN\n%sENDREP\n",
@@ -2872,7 +2955,9 @@ get_fragment_content(svn_string_t **cont
 
         return SVN_NO_ERROR;
 
+      /* construct the new noderev content.  No side-effects.*/
       case noderev_fragment:
+        /* get the original noderev as string */
         node = fragment->data;
         SVN_ERR(get_content(&revision_content, fs,
                             node->revision->revision, pool));
@@ -2881,6 +2966,7 @@ get_fragment_content(svn_string_t **cont
                                          node->original.size,
                                          pool);
 
+        /* update the values that may have hanged for target */
         update_id(node_rev, "id: ", node);
         update_id(node_rev, "pred: ", node->predecessor);
         update_text(node_rev, "text: ", node->text, pool);


Reply via email to