I am working on an API to report the size on disk of a given revision, in FSFS. 
A client wants this for (human) informational purposes -- so users or 
administrators can understand how much space their commits are using.

Before FSFS f7, this information was easy to obtain from the on-disk size of a 
revision file, or for a packed shard by taking (offset[REV] - offset[REV-1]) 
from the manifest file. Since f7 it's harder, when revisions are packed.

This seems a reasonable feature to me.

The attached patch starts to implement it, providing an API in this form which 
is like a cut-down version of svn_fs_fs__get_stats():

/* Set *REV_SIZE to the total size of objects belonging to revision REVISION
 * in FS. */
svn_error_t *
svn_fs_fs__get_rev_size(apr_off_t *rev_size,
                        svn_fs_t *fs,
                        svn_revnum_t revision,
                        apr_pool_t *scratch_pool);

and a CLI in this form:

    $ svnfsfs rev-size /path/to/repo -r1
    1238 bytes in revision 1

The definition of "size" I have chosen excludes the index entries, because 
determining the on-disk size of them is tricky (they are stored in a 
variable-length encoding) and their size is not particularly important for the 
purposes.

TODO before commit:
  * support FSFS pre-f7 and physical addressing mode
  * simplify implementation by using the existing svn_fs_fs__dump_index() API 
with a simple callback

Any comments?

- Julian
Add an API and CLI to report a revision's size on disk.

### So far, this patch only works with FSFS logical addressing mode.

It reports the total size in bytes of the representation on disk of
a revision, excluding its indexes.

    $ svnfsfs rev-size /path/to/repo -r1
    1238 bytes in revision 1


* subversion/include/private/svn_fs_fs_private.h,
  subversion/libsvn_fs_fs/stats.c
  (svn_fs_fs__get_rev_size,
   rev_size_log_rev_or_packfile): New.

* subversion/svnfsfs/svnfsfs.h,
  subversion/svnfsfs/stats-cmd.c
  (subcommand__rev_size): New.

* subversion/svnfsfs/svnfsfs.c
  (cmd_table): Add the command.
--This line, and those below, will be ignored--

Index: subversion/include/private/svn_fs_fs_private.h
===================================================================
--- subversion/include/private/svn_fs_fs_private.h	(revision 1855386)
+++ subversion/include/private/svn_fs_fs_private.h	(working copy)
@@ -268,12 +268,21 @@ svn_fs_fs__get_stats(svn_fs_fs__stats_t
                      void *progress_baton,
                      svn_cancel_func_t cancel_func,
                      void *cancel_baton,
                      apr_pool_t *result_pool,
                      apr_pool_t *scratch_pool);
 
+/* Set *REV_SIZE to the total size of objects belonging to revision REVISION
+ * in FS.
+ */
+svn_error_t *
+svn_fs_fs__get_rev_size(apr_off_t *rev_size,
+                        svn_fs_t *fs,
+                        svn_revnum_t revision,
+                        apr_pool_t *scratch_pool);
+
 /* A node-revision ID in FSFS consists of 3 sub-IDs ("parts") that consist
  * of a creation REVISION number and some revision- / transaction-local
  * counter value (NUMBER).  Old-style ID parts use global counter values.
  *
  * The parts are: node_id, copy_id and txn_id for in-txn IDs as well as
  * node_id, copy_id and rev_item for in-revision IDs.  This struct is the
Index: subversion/libsvn_fs_fs/stats.c
===================================================================
--- subversion/libsvn_fs_fs/stats.c	(revision 1855386)
+++ subversion/libsvn_fs_fs/stats.c	(working copy)
@@ -1131,12 +1131,81 @@ read_log_rev_or_packfile(query_t *query,
   /* clean up and close file handles */
   svn_pool_destroy(iterpool);
 
   return SVN_NO_ERROR;
 }
 
+/* Set *REV_SIZE to the total size of objects belonging to revision REVISION,
+ * excluding its indexes, using logical addressing mode.
+ */
+static svn_error_t *
+rev_size_log_rev_or_packfile(apr_off_t *rev_size,
+                             query_t *query,
+                             svn_revnum_t revision,
+                             apr_pool_t *scratch_pool)
+{
+  fs_fs_data_t *ffd = query->fs->fsap_data;
+  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
+  apr_off_t max_offset;
+  apr_off_t offset = 0;
+  int i;
+  svn_fs_fs__revision_file_t *rev_file;
+
+  /* open the pack / rev file that is covered by the p2l index */
+  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, query->fs, revision,
+                                           scratch_pool, iterpool));
+  SVN_ERR(svn_fs_fs__p2l_get_max_offset(&max_offset, query->fs, rev_file,
+                                        revision, scratch_pool));
+
+  *rev_size = 0;
+
+  /* for all offsets in the file, get the P2L index entries and process
+     the interesting items (change lists, noderevs) */
+  for (offset = 0; offset < max_offset; )
+    {
+      apr_array_header_t *entries;
+
+      svn_pool_clear(iterpool);
+
+      /* get all entries for the current block */
+      SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, query->fs, rev_file, revision,
+                                          offset, ffd->p2l_page_size,
+                                          iterpool, iterpool));
+
+      /* process all entries (and later continue with the next block) */
+      for (i = 0; i < entries->nelts; ++i)
+        {
+          svn_fs_fs__p2l_entry_t *entry
+            = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
+
+          /* skip bits we previously processed */
+          if (i == 0 && entry->offset < offset)
+            continue;
+
+          /* skip zero-sized entries */
+          if (entry->size == 0)
+            continue;
+
+          /* read and process interesting items */
+          if (entry->item.revision == revision
+              && entry->type != SVN_FS_FS__ITEM_TYPE_UNUSED)
+            {
+              *rev_size += entry->size;
+            }
+
+          /* advance offset */
+          offset += entry->size;
+        }
+    }
+
+  /* clean up and close file handles */
+  svn_pool_destroy(iterpool);
+
+  return SVN_NO_ERROR;
+}
+
 /* Read the content of the pack file staring at revision BASE logical
  * addressing mode and store it in QUERY.
  *
  * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
  * temporaries.
  */
@@ -1394,6 +1463,22 @@ svn_fs_fs__get_stats(svn_fs_fs__stats_t
                        scratch_pool));
   SVN_ERR(read_revisions(query, scratch_pool, scratch_pool));
   aggregate_stats(query->revisions, *stats);
 
   return SVN_NO_ERROR;
 }
+
+svn_error_t *
+svn_fs_fs__get_rev_size(apr_off_t *rev_size,
+                        svn_fs_t *fs,
+                        svn_revnum_t revision,
+                        apr_pool_t *scratch_pool)
+{
+  query_t *query;
+
+  SVN_ERR(create_query(&query, fs, NULL, NULL, NULL, NULL, NULL,
+                       scratch_pool, scratch_pool));
+  SVN_ERR(rev_size_log_rev_or_packfile(rev_size, query, revision,
+                                       scratch_pool));
+
+  return SVN_NO_ERROR;
+}
Index: subversion/svnfsfs/stats-cmd.c
===================================================================
--- subversion/svnfsfs/stats-cmd.c	(revision 1855386)
+++ subversion/svnfsfs/stats-cmd.c	(working copy)
@@ -509,6 +509,30 @@ subcommand__stats(apr_getopt_t *os, void
                                check_cancel, NULL, pool, pool));
 
   print_stats(stats, pool);
 
   return SVN_NO_ERROR;
 }
+
+/* This implements `svn_opt_subcommand_t'. */
+svn_error_t *
+subcommand__rev_size(apr_getopt_t *os, void *baton, apr_pool_t *pool)
+{
+  svnfsfs__opt_state *opt_state = baton;
+  svn_revnum_t revision;
+  apr_off_t rev_size;
+  svn_fs_t *fs;
+
+  if (opt_state->start_revision.kind != svn_opt_revision_number
+      || opt_state->end_revision.kind != svn_opt_revision_unspecified)
+    return svn_error_create(SVN_ERR_CL_ARG_PARSING_ERROR, NULL,
+                            _("Invalid revision specifier"));
+  revision = opt_state->start_revision.value.number;
+
+  SVN_ERR(open_fs(&fs, opt_state->repository_path, pool));
+  SVN_ERR(svn_fs_fs__get_rev_size(&rev_size, fs, revision, pool));
+
+  printf("%"APR_OFF_T_FMT" bytes in revision %ld\n",
+         rev_size, revision);
+
+  return SVN_NO_ERROR;
+}
Index: subversion/svnfsfs/svnfsfs.c
===================================================================
--- subversion/svnfsfs/svnfsfs.c	(revision 1855386)
+++ subversion/svnfsfs/svnfsfs.c	(working copy)
@@ -166,12 +166,20 @@ static const svn_opt_subcommand_desc3_t
     "usage: svnfsfs stats REPOS_PATH\n"
     "\n"), N_(
     "Write object size statistics to console.\n"
    )},
    {'M'} },
 
+  {"rev-size", subcommand__rev_size, {0}, {N_(
+    "usage: svnfsfs rev-size REPOS_PATH -r REVISION\n"
+    "\n"), N_(
+    "Write to console the total size in bytes of the representation on disk of\n"
+    "revision REVISION, excluding its indexes.\n"
+   )},
+   {'r', 'M'} },
+
   { NULL, NULL, {0}, {NULL}, {0} }
 };
 
 
 svn_error_t *
 open_fs(svn_fs_t **fs,
Index: subversion/svnfsfs/svnfsfs.h
===================================================================
--- subversion/svnfsfs/svnfsfs.h	(revision 1855386)
+++ subversion/svnfsfs/svnfsfs.h	(working copy)
@@ -49,13 +49,14 @@ typedef struct svnfsfs__opt_state
 
 /* Declare all the command procedures */
 svn_opt_subcommand_t
   subcommand__help,
   subcommand__dump_index,
   subcommand__load_index,
-  subcommand__stats;
+  subcommand__stats,
+  subcommand__rev_size;
 
 
 /* Check that the filesystem at PATH is an FSFS repository and then open it.
  * Return the filesystem in *FS, allocated in POOL. */
 svn_error_t *
 open_fs(svn_fs_t **fs,

Reply via email to