Author: stefan2
Date: Fri Aug 9 10:29:28 2013
New Revision: 1512240
URL: http://svn.apache.org/r1512240
Log:
On the log-addressing branch: Extend verification code to check
for index consistency, i.e. log-to-phys must match phys-to-log 1:1.
To do this, we need two extra functions in the index API that
basically tell us the size of the index.
* subversion/libsvn_fs_fs/index.h
(svn_fs_fs__l2p_get_max_ids,
svn_fs_fs__p2l_get_max_offset): declare new private API
* subversion/libsvn_fs_fs/index.c
(get_l2p_header): caching wrapper around get_l2p_header_body as
the other functions won't return the full header
(svn_fs_fs__l2p_get_max_ids): implement
(p2l_get_max_offset_func): cache data extraction function
(svn_fs_fs__p2l_get_max_offset): implement
* subversion/libsvn_fs_fs/verify.c
(compare_l2p_to_p2l_index,
compare_p2l_to_l2p_index): implement two-way index comparison
(verify_index_consistency): new verification function
(svn_fs_fs__verify): call the new function for log. addressed range
Modified:
subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.c
subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.h
subversion/branches/log-addressing/subversion/libsvn_fs_fs/verify.c
Modified: subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.c
URL:
http://svn.apache.org/viewvc/subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.c?rev=1512240&r1=1512239&r2=1512240&view=diff
==============================================================================
--- subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.c
(original)
+++ subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.c Fri Aug
9 10:29:28 2013
@@ -1347,6 +1347,90 @@ l2p_proto_index_lookup(apr_off_t *offset
return SVN_NO_ERROR;
}
+/* Read the log-to-phys header info of the index covering REVISION from FS
+ * and return it in *HEADER. To maximize efficiency, use or return the
+ * data stream in *STREAM. Use POOL for allocations.
+ */
+static svn_error_t *
+get_l2p_header(l2p_header_t **header,
+ packed_number_stream_t **stream,
+ svn_fs_t *fs,
+ svn_revnum_t revision,
+ apr_pool_t *pool)
+{
+ fs_fs_data_t *ffd = fs->fsap_data;
+ svn_boolean_t is_cached = FALSE;
+
+ /* first, try cache lookop */
+ pair_cache_key_t key;
+ key.revision = base_revision(fs, revision);
+ key.second = svn_fs_fs__is_packed_rev(fs, revision);
+ SVN_ERR(svn_cache__get((void**)header, &is_cached, ffd->l2p_header_cache,
+ &key, pool));
+ if (is_cached)
+ return SVN_NO_ERROR;
+
+ /* read from disk and cache the result */
+ SVN_ERR(get_l2p_header_body(header, stream, fs, revision, pool));
+
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_fs_fs__l2p_get_max_ids(apr_array_header_t **max_ids,
+ svn_fs_t *fs,
+ svn_revnum_t start_rev,
+ apr_size_t count,
+ apr_pool_t *pool)
+{
+ l2p_header_t *header = NULL;
+ svn_revnum_t revision;
+ svn_revnum_t last_rev = (svn_revnum_t)(start_rev + count);
+ packed_number_stream_t *stream = NULL;
+ apr_pool_t *header_pool = svn_pool_create(pool);
+
+ /* read index master data structure for the index covering START_REV */
+ SVN_ERR(get_l2p_header(&header, &stream, fs, start_rev, header_pool));
+ SVN_ERR(packed_stream_close(stream));
+ stream = NULL;
+
+ /* Determine the length of the item index list for each rev.
+ * Read new index headers as required. */
+ *max_ids = apr_array_make(pool, (int)count, sizeof(apr_uint64_t));
+ for (revision = start_rev; revision < last_rev; ++revision)
+ {
+ apr_uint64_t full_page_count;
+ apr_uint64_t item_count;
+ apr_size_t first_page_index, last_page_index;
+
+ if (revision >= header->first_revision + header->revision_count)
+ {
+ /* need to read the next index. Clear up memory used for the
+ * previous one. */
+ svn_pool_clear(header_pool);
+ SVN_ERR(get_l2p_header(&header, &stream, fs, revision,
+ header_pool));
+ SVN_ERR(packed_stream_close(stream));
+ stream = NULL;
+ }
+
+ /* in a revision with N index pages, the first N-1 index pages are
+ * "full", i.e. contain HEADER->PAGE_SIZE entries */
+ first_page_index
+ = header->page_table_index[revision - header->first_revision];
+ last_page_index
+ = header->page_table_index[revision - header->first_revision + 1];
+ full_page_count = last_page_index - first_page_index - 1;
+ item_count = full_page_count * header->page_size
+ + header->page_table[last_page_index - 1].entry_count;
+
+ APR_ARRAY_PUSH(*max_ids, apr_uint64_t) = item_count;
+ }
+
+ svn_pool_destroy(header_pool);
+ return SVN_NO_ERROR;
+}
+
svn_error_t *
svn_fs_fs__item_offset(apr_off_t *absolute_position,
svn_fs_t *fs,
@@ -2252,6 +2336,58 @@ svn_fs_fs__p2l_entry_lookup(svn_fs_fs__p
return SVN_NO_ERROR;
}
+/* Implements svn_cache__partial_getter_func_t for P2L headers, setting *OUT
+ * to the largest the first offset not covered by this P2L index.
+ */
+static svn_error_t *
+p2l_get_max_offset_func(void **out,
+ const void *data,
+ apr_size_t data_len,
+ void *baton,
+ apr_pool_t *result_pool)
+{
+ const p2l_header_t *header = data;
+ apr_off_t max_offset = header->page_size * header->page_count;
+ *out = apr_pmemdup(result_pool, &max_offset, sizeof(max_offset));
+
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_fs_fs__p2l_get_max_offset(apr_off_t *offset,
+ svn_fs_t *fs,
+ svn_revnum_t revision,
+ apr_pool_t *pool)
+{
+ fs_fs_data_t *ffd = fs->fsap_data;
+ packed_number_stream_t *stream = NULL;
+ p2l_header_t *header;
+ svn_boolean_t is_cached = FALSE;
+ apr_off_t *offset_p;
+
+ /* look for the header data in our cache */
+ pair_cache_key_t key;
+ key.revision = base_revision(fs, revision);
+ key.second = svn_fs_fs__is_packed_rev(fs, revision);
+
+ SVN_ERR(svn_cache__get_partial((void **)&offset_p, &is_cached,
+ ffd->p2l_header_cache, &key,
+ p2l_get_max_offset_func, NULL, pool));
+ if (is_cached)
+ {
+ *offset = *offset_p;
+ return SVN_NO_ERROR;
+ }
+
+ SVN_ERR(get_p2l_header(&header, &stream, fs, revision, pool, pool));
+ *offset = header->page_count * header->page_size;
+
+ /* make sure we close files after usage */
+ SVN_ERR(packed_stream_close(stream));
+
+ return SVN_NO_ERROR;
+}
+
/*
* Standard (de-)serialization functions
*/
Modified: subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.h
URL:
http://svn.apache.org/viewvc/subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.h?rev=1512240&r1=1512239&r2=1512240&view=diff
==============================================================================
--- subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.h
(original)
+++ subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.h Fri Aug
9 10:29:28 2013
@@ -181,6 +181,28 @@ svn_fs_fs__item_offset(apr_off_t *absolu
apr_uint64_t item_index,
apr_pool_t *pool);
+/* Use the log-to-phys indexes in FS to determine the maximum item indexes
+ * assigned to revision START_REV to START_REV + COUNT - 1. That is a
+ * close upper limit to the actual number of items in the respective revs.
+ * Return the results in *MAX_IDS, allocated in POOL.
+ */
+svn_error_t *
+svn_fs_fs__l2p_get_max_ids(apr_array_header_t **max_ids,
+ svn_fs_t *fs,
+ svn_revnum_t start_rev,
+ apr_size_t count,
+ apr_pool_t *pool);
+
+/* In *OFFSET, return the first OFFSET in the pack / rev file containing
+ * REVISION in FS not covered by the log-to-phys index.
+ * Use POOL for allocations.
+ */
+svn_error_t *
+svn_fs_fs__p2l_get_max_offset(apr_off_t *offset,
+ svn_fs_t *fs,
+ svn_revnum_t revision,
+ apr_pool_t *pool);
+
/* Serialization and caching interface
*/
Modified: subversion/branches/log-addressing/subversion/libsvn_fs_fs/verify.c
URL:
http://svn.apache.org/viewvc/subversion/branches/log-addressing/subversion/libsvn_fs_fs/verify.c?rev=1512240&r1=1512239&r2=1512240&view=diff
==============================================================================
--- subversion/branches/log-addressing/subversion/libsvn_fs_fs/verify.c
(original)
+++ subversion/branches/log-addressing/subversion/libsvn_fs_fs/verify.c Fri Aug
9 10:29:28 2013
@@ -20,12 +20,15 @@
* ====================================================================
*/
+#include "svn_sorts.h"
+
#include "verify.h"
#include "fs_fs.h"
#include "cached_data.h"
#include "rep-cache.h"
#include "util.h"
+#include "index.h"
#include "../libsvn_fs/fs-loader.h"
@@ -154,6 +157,224 @@ verify_rep_cache(svn_fs_t *fs,
return SVN_NO_ERROR;
}
+/* Verify that for all log-to-phys index entries for revisions START to
+ * START + COUNT-1 in FS there is a consistent entry in the phys-to-log
+ * index. If given, invoke CANCEL_FUNC with CANCEL_BATON at regular
+ * intervals. Use POOL for allocations.
+ */
+static svn_error_t *
+compare_l2p_to_p2l_index(svn_fs_t *fs,
+ svn_revnum_t start,
+ svn_revnum_t count,
+ svn_cancel_func_t cancel_func,
+ void *cancel_baton,
+ apr_pool_t *pool)
+{
+ svn_revnum_t i;
+ apr_pool_t *iterpool = svn_pool_create(pool);
+ apr_array_header_t *max_ids;
+
+ /* determine the range of items to check for each revision */
+ SVN_ERR(svn_fs_fs__l2p_get_max_ids(&max_ids, fs, start, count, pool));
+
+ /* check all items in all revisions if the given range */
+ for (i = 0; i < max_ids->nelts; ++i)
+ {
+ apr_uint64_t k;
+ apr_uint64_t max_id = APR_ARRAY_IDX(max_ids, i, apr_uint64_t);
+ svn_revnum_t revision = start + i;
+
+ for (k = 0; k < max_id; ++k)
+ {
+ apr_off_t offset;
+ svn_fs_fs__p2l_entry_t *p2l_entry;
+
+ /* get L2P entry. Ignore unused entries. */
+ SVN_ERR(svn_fs_fs__item_offset(&offset, fs,
+ revision, NULL, k, iterpool));
+ if (offset == -1)
+ continue;
+
+ /* find the corresponding P2L entry */
+ SVN_ERR(svn_fs_fs__p2l_entry_lookup(&p2l_entry, fs, start,
+ offset, iterpool));
+
+ if (p2l_entry == NULL)
+ return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_INCONSISTENT,
+ NULL,
+ _("p2l index entry not found for "
+ "PHYS %s returned by "
+ "l2p index for LOG r%ld:i%ld"),
+ apr_off_t_toa(pool, offset),
+ revision, (long)k);
+
+ if ( p2l_entry->item.number != k
+ || p2l_entry->item.revision != revision)
+ return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_INCONSISTENT,
+ NULL,
+ _("p2l index info LOG r%ld:i%ld"
+ " does not match "
+ "l2p index for LOG r%ld:i%ld"),
+ p2l_entry->item.revision,
+ (long)p2l_entry->item.number,
+ revision, (long)k);
+
+ svn_pool_clear(iterpool);
+ }
+
+ if (cancel_func)
+ SVN_ERR(cancel_func(cancel_baton));
+ }
+
+ svn_pool_destroy(iterpool);
+
+ return SVN_NO_ERROR;
+}
+
+/* Verify that for all phys-to-log index entries for revisions START to
+ * START + COUNT-1 in FS there is a consistent entry in the log-to-phys
+ * index. If given, invoke CANCEL_FUNC with CANCEL_BATON at regular
+ * intervals. Use POOL for allocations.
+ *
+ * Please note that we can only check on pack / rev file granularity and
+ * must only be called for a single rev / pack file.
+ */
+static svn_error_t *
+compare_p2l_to_l2p_index(svn_fs_t *fs,
+ svn_revnum_t start,
+ svn_revnum_t count,
+ svn_cancel_func_t cancel_func,
+ void *cancel_baton,
+ apr_pool_t *pool)
+{
+ apr_pool_t *iterpool = svn_pool_create(pool);
+ apr_off_t max_offset;
+ apr_off_t offset = 0;
+
+ /* get the size of the rev / pack file as covered by the P2L index */
+ SVN_ERR(svn_fs_fs__p2l_get_max_offset(&max_offset, fs, start, pool));
+
+ /* for all offsets in the file, get the P2L index entries and check
+ them against the L2P index */
+ for (offset = 0; offset < max_offset; )
+ {
+ apr_array_header_t *entries;
+ svn_fs_fs__p2l_entry_t *last_entry;
+ int i;
+
+ /* get all entries for the current block */
+ SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, fs, start, offset,
+ iterpool));
+ if (entries->nelts == 0)
+ return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_CORRUPTION,
+ NULL,
+ _("p2l does not cover offset %s"
+ " for revision %ld"),
+ apr_off_t_toa(pool, offset), start);
+
+ /* process all entries (and later continue with the next block) */
+ last_entry
+ = &APR_ARRAY_IDX(entries, entries->nelts-1, svn_fs_fs__p2l_entry_t);
+ offset = last_entry->offset + last_entry->size;
+
+ for (i = 0; i < entries->nelts; ++i)
+ {
+ svn_fs_fs__p2l_entry_t *entry
+ = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
+
+ /* check all sub-items for consist entries in the L2P index */
+ if (entry->type != SVN_FS_FS__ITEM_TYPE_UNUSED)
+ {
+ apr_off_t l2p_offset;
+ SVN_ERR(svn_fs_fs__item_offset(&l2p_offset, fs,
+ entry->item.revision, NULL,
+ entry->item.number, iterpool));
+
+ if (l2p_offset != entry->offset)
+ return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_INCONSISTENT,
+ NULL,
+ _("l2p index entry PHYS %s"
+ "does not match p2l index value "
+ "LOG r%ld:i%ld for PHYS %s"),
+ apr_off_t_toa(pool, l2p_offset),
+ entry->item.revision,
+ (long)entry->item.number,
+ apr_off_t_toa(pool, entry->offset));
+ }
+ }
+
+ svn_pool_clear(iterpool);
+
+ if (cancel_func)
+ SVN_ERR(cancel_func(cancel_baton));
+ }
+
+ svn_pool_destroy(iterpool);
+
+ return SVN_NO_ERROR;
+}
+
+static svn_revnum_t
+packed_base_rev(svn_fs_t *fs, svn_revnum_t rev)
+{
+ fs_fs_data_t *ffd = fs->fsap_data;
+
+ return rev < ffd->min_unpacked_rev
+ ? rev - (rev % ffd->max_files_per_dir)
+ : rev;
+}
+
+static svn_revnum_t
+pack_size(svn_fs_t *fs, svn_revnum_t rev)
+{
+ fs_fs_data_t *ffd = fs->fsap_data;
+
+ return rev < ffd->min_unpacked_rev ? ffd->max_files_per_dir : 1;
+}
+
+/* Verify that the log-to-phys indexes and phys-to-log indexes are
+ * consistent with each other. The function signature is similar to
+ * svn_fs_fs__verify.
+ *
+ * The values of START and END have already been auto-selected and
+ * verified. You may call this for format7 or higher repos.
+ */
+static svn_error_t *
+verify_index_consistency(svn_fs_t *fs,
+ svn_revnum_t start,
+ svn_revnum_t end,
+ svn_fs_progress_notify_func_t notify_func,
+ void *notify_baton,
+ svn_cancel_func_t cancel_func,
+ void *cancel_baton,
+ apr_pool_t *pool)
+{
+ fs_fs_data_t *ffd = fs->fsap_data;
+ svn_revnum_t revision, pack_start, pack_end;
+ apr_pool_t *iterpool = svn_pool_create(pool);
+
+ for (revision = start; revision <= end; revision = pack_end)
+ {
+ pack_start = packed_base_rev(fs, revision);
+ pack_end = pack_start + pack_size(fs, revision);
+
+ if (notify_func && (pack_start % ffd->max_files_per_dir == 0))
+ notify_func(pack_start, notify_baton, iterpool);
+
+ /* two-way index check */
+ SVN_ERR(compare_l2p_to_p2l_index(fs, pack_start, pack_end - pack_start,
+ cancel_func, cancel_baton, iterpool));
+ SVN_ERR(compare_p2l_to_l2p_index(fs, pack_start, pack_end - pack_start,
+ cancel_func, cancel_baton, iterpool));
+
+ svn_pool_clear(iterpool);
+ }
+
+ svn_pool_destroy(iterpool);
+
+ return SVN_NO_ERROR;
+}
+
svn_error_t *
svn_fs_fs__verify(svn_fs_t *fs,
svn_revnum_t start,
@@ -175,6 +396,14 @@ svn_fs_fs__verify(svn_fs_t *fs,
SVN_ERR(svn_fs_fs__ensure_revision_exists(start, fs, pool));
SVN_ERR(svn_fs_fs__ensure_revision_exists(end, fs, pool));
+ /* log/phys index consistency. We need to check them first to make
+ sure we can access the rev / pack files in format7. */
+ if (svn_fs_fs__use_log_addressing(fs, end))
+ SVN_ERR(verify_index_consistency(fs,
+ MAX(start, ffd->min_log_addressing_rev),
+ end, notify_func, notify_baton,
+ cancel_func, cancel_baton, pool));
+
/* rep cache consistency */
if (ffd->format >= SVN_FS_FS__MIN_REP_SHARING_FORMAT)
SVN_ERR(verify_rep_cache(fs, start, end, notify_func, notify_baton,