I am working on an API to report the size on disk of a given revision, in FSFS.
A client wants this for (human) informational purposes -- so users or
administrators can understand how much space their commits are using.
Before FSFS f7, this information was easy to obtain from the on-disk size of a
revision file, or for a packed shard by taking (offset[REV] - offset[REV-1])
from the manifest file. Since f7 it's harder, when revisions are packed.
This seems a reasonable feature to me.
The attached patch starts to implement it, providing an API in this form which
is like a cut-down version of svn_fs_fs__get_stats():
/* Set *REV_SIZE to the total size of objects belonging to revision REVISION
* in FS. */
svn_error_t *
svn_fs_fs__get_rev_size(apr_off_t *rev_size,
svn_fs_t *fs,
svn_revnum_t revision,
apr_pool_t *scratch_pool);
and a CLI in this form:
$ svnfsfs rev-size /path/to/repo -r1
1238 bytes in revision 1
The definition of "size" I have chosen excludes the index entries, because
determining the on-disk size of them is tricky (they are stored in a
variable-length encoding) and their size is not particularly important for the
purposes.
TODO before commit:
* support FSFS pre-f7 and physical addressing mode
* simplify implementation by using the existing svn_fs_fs__dump_index() API
with a simple callback
Any comments?
- Julian
Add an API and CLI to report a revision's size on disk.
### So far, this patch only works with FSFS logical addressing mode.
It reports the total size in bytes of the representation on disk of
a revision, excluding its indexes.
$ svnfsfs rev-size /path/to/repo -r1
1238 bytes in revision 1
* subversion/include/private/svn_fs_fs_private.h,
subversion/libsvn_fs_fs/stats.c
(svn_fs_fs__get_rev_size,
rev_size_log_rev_or_packfile): New.
* subversion/svnfsfs/svnfsfs.h,
subversion/svnfsfs/stats-cmd.c
(subcommand__rev_size): New.
* subversion/svnfsfs/svnfsfs.c
(cmd_table): Add the command.
--This line, and those below, will be ignored--
Index: subversion/include/private/svn_fs_fs_private.h
===================================================================
--- subversion/include/private/svn_fs_fs_private.h (revision 1855386)
+++ subversion/include/private/svn_fs_fs_private.h (working copy)
@@ -268,12 +268,21 @@ svn_fs_fs__get_stats(svn_fs_fs__stats_t
void *progress_baton,
svn_cancel_func_t cancel_func,
void *cancel_baton,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool);
+/* Set *REV_SIZE to the total size of objects belonging to revision REVISION
+ * in FS.
+ */
+svn_error_t *
+svn_fs_fs__get_rev_size(apr_off_t *rev_size,
+ svn_fs_t *fs,
+ svn_revnum_t revision,
+ apr_pool_t *scratch_pool);
+
/* A node-revision ID in FSFS consists of 3 sub-IDs ("parts") that consist
* of a creation REVISION number and some revision- / transaction-local
* counter value (NUMBER). Old-style ID parts use global counter values.
*
* The parts are: node_id, copy_id and txn_id for in-txn IDs as well as
* node_id, copy_id and rev_item for in-revision IDs. This struct is the
Index: subversion/libsvn_fs_fs/stats.c
===================================================================
--- subversion/libsvn_fs_fs/stats.c (revision 1855386)
+++ subversion/libsvn_fs_fs/stats.c (working copy)
@@ -1131,12 +1131,81 @@ read_log_rev_or_packfile(query_t *query,
/* clean up and close file handles */
svn_pool_destroy(iterpool);
return SVN_NO_ERROR;
}
+/* Set *REV_SIZE to the total size of objects belonging to revision REVISION,
+ * excluding its indexes, using logical addressing mode.
+ */
+static svn_error_t *
+rev_size_log_rev_or_packfile(apr_off_t *rev_size,
+ query_t *query,
+ svn_revnum_t revision,
+ apr_pool_t *scratch_pool)
+{
+ fs_fs_data_t *ffd = query->fs->fsap_data;
+ apr_pool_t *iterpool = svn_pool_create(scratch_pool);
+ apr_off_t max_offset;
+ apr_off_t offset = 0;
+ int i;
+ svn_fs_fs__revision_file_t *rev_file;
+
+ /* open the pack / rev file that is covered by the p2l index */
+ SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, query->fs, revision,
+ scratch_pool, iterpool));
+ SVN_ERR(svn_fs_fs__p2l_get_max_offset(&max_offset, query->fs, rev_file,
+ revision, scratch_pool));
+
+ *rev_size = 0;
+
+ /* for all offsets in the file, get the P2L index entries and process
+ the interesting items (change lists, noderevs) */
+ for (offset = 0; offset < max_offset; )
+ {
+ apr_array_header_t *entries;
+
+ svn_pool_clear(iterpool);
+
+ /* get all entries for the current block */
+ SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, query->fs, rev_file, revision,
+ offset, ffd->p2l_page_size,
+ iterpool, iterpool));
+
+ /* process all entries (and later continue with the next block) */
+ for (i = 0; i < entries->nelts; ++i)
+ {
+ svn_fs_fs__p2l_entry_t *entry
+ = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
+
+ /* skip bits we previously processed */
+ if (i == 0 && entry->offset < offset)
+ continue;
+
+ /* skip zero-sized entries */
+ if (entry->size == 0)
+ continue;
+
+ /* read and process interesting items */
+ if (entry->item.revision == revision
+ && entry->type != SVN_FS_FS__ITEM_TYPE_UNUSED)
+ {
+ *rev_size += entry->size;
+ }
+
+ /* advance offset */
+ offset += entry->size;
+ }
+ }
+
+ /* clean up and close file handles */
+ svn_pool_destroy(iterpool);
+
+ return SVN_NO_ERROR;
+}
+
/* Read the content of the pack file staring at revision BASE logical
* addressing mode and store it in QUERY.
*
* Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
* temporaries.
*/
@@ -1394,6 +1463,22 @@ svn_fs_fs__get_stats(svn_fs_fs__stats_t
scratch_pool));
SVN_ERR(read_revisions(query, scratch_pool, scratch_pool));
aggregate_stats(query->revisions, *stats);
return SVN_NO_ERROR;
}
+
+svn_error_t *
+svn_fs_fs__get_rev_size(apr_off_t *rev_size,
+ svn_fs_t *fs,
+ svn_revnum_t revision,
+ apr_pool_t *scratch_pool)
+{
+ query_t *query;
+
+ SVN_ERR(create_query(&query, fs, NULL, NULL, NULL, NULL, NULL,
+ scratch_pool, scratch_pool));
+ SVN_ERR(rev_size_log_rev_or_packfile(rev_size, query, revision,
+ scratch_pool));
+
+ return SVN_NO_ERROR;
+}
Index: subversion/svnfsfs/stats-cmd.c
===================================================================
--- subversion/svnfsfs/stats-cmd.c (revision 1855386)
+++ subversion/svnfsfs/stats-cmd.c (working copy)
@@ -509,6 +509,30 @@ subcommand__stats(apr_getopt_t *os, void
check_cancel, NULL, pool, pool));
print_stats(stats, pool);
return SVN_NO_ERROR;
}
+
+/* This implements `svn_opt_subcommand_t'. */
+svn_error_t *
+subcommand__rev_size(apr_getopt_t *os, void *baton, apr_pool_t *pool)
+{
+ svnfsfs__opt_state *opt_state = baton;
+ svn_revnum_t revision;
+ apr_off_t rev_size;
+ svn_fs_t *fs;
+
+ if (opt_state->start_revision.kind != svn_opt_revision_number
+ || opt_state->end_revision.kind != svn_opt_revision_unspecified)
+ return svn_error_create(SVN_ERR_CL_ARG_PARSING_ERROR, NULL,
+ _("Invalid revision specifier"));
+ revision = opt_state->start_revision.value.number;
+
+ SVN_ERR(open_fs(&fs, opt_state->repository_path, pool));
+ SVN_ERR(svn_fs_fs__get_rev_size(&rev_size, fs, revision, pool));
+
+ printf("%"APR_OFF_T_FMT" bytes in revision %ld\n",
+ rev_size, revision);
+
+ return SVN_NO_ERROR;
+}
Index: subversion/svnfsfs/svnfsfs.c
===================================================================
--- subversion/svnfsfs/svnfsfs.c (revision 1855386)
+++ subversion/svnfsfs/svnfsfs.c (working copy)
@@ -166,12 +166,20 @@ static const svn_opt_subcommand_desc3_t
"usage: svnfsfs stats REPOS_PATH\n"
"\n"), N_(
"Write object size statistics to console.\n"
)},
{'M'} },
+ {"rev-size", subcommand__rev_size, {0}, {N_(
+ "usage: svnfsfs rev-size REPOS_PATH -r REVISION\n"
+ "\n"), N_(
+ "Write to console the total size in bytes of the representation on disk of\n"
+ "revision REVISION, excluding its indexes.\n"
+ )},
+ {'r', 'M'} },
+
{ NULL, NULL, {0}, {NULL}, {0} }
};
svn_error_t *
open_fs(svn_fs_t **fs,
Index: subversion/svnfsfs/svnfsfs.h
===================================================================
--- subversion/svnfsfs/svnfsfs.h (revision 1855386)
+++ subversion/svnfsfs/svnfsfs.h (working copy)
@@ -49,13 +49,14 @@ typedef struct svnfsfs__opt_state
/* Declare all the command procedures */
svn_opt_subcommand_t
subcommand__help,
subcommand__dump_index,
subcommand__load_index,
- subcommand__stats;
+ subcommand__stats,
+ subcommand__rev_size;
/* Check that the filesystem at PATH is an FSFS repository and then open it.
* Return the filesystem in *FS, allocated in POOL. */
svn_error_t *
open_fs(svn_fs_t **fs,