Author: stefan2
Date: Fri Aug  9 10:29:28 2013
New Revision: 1512240

URL: http://svn.apache.org/r1512240
Log:
On the log-addressing branch:  Extend verification code to check
for index consistency, i.e. log-to-phys must match phys-to-log 1:1.

To do this, we need two extra functions in the index API that
basically tell us the size of the index.

* subversion/libsvn_fs_fs/index.h
  (svn_fs_fs__l2p_get_max_ids,
   svn_fs_fs__p2l_get_max_offset): declare new private API

* subversion/libsvn_fs_fs/index.c
  (get_l2p_header): caching wrapper around get_l2p_header_body as
                    the other functions won't return the full header
  (svn_fs_fs__l2p_get_max_ids): implement
  (p2l_get_max_offset_func): cache data extraction function
  (svn_fs_fs__p2l_get_max_offset): implement

* subversion/libsvn_fs_fs/verify.c
  (compare_l2p_to_p2l_index,
   compare_p2l_to_l2p_index): implement two-way index comparison
  (verify_index_consistency): new verification function
  (svn_fs_fs__verify): call the new function for log. addressed range

Modified:
    subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.c
    subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.h
    subversion/branches/log-addressing/subversion/libsvn_fs_fs/verify.c

Modified: subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.c
URL: 
http://svn.apache.org/viewvc/subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.c?rev=1512240&r1=1512239&r2=1512240&view=diff
==============================================================================
--- subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.c 
(original)
+++ subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.c Fri Aug  
9 10:29:28 2013
@@ -1347,6 +1347,90 @@ l2p_proto_index_lookup(apr_off_t *offset
   return SVN_NO_ERROR;
 }
 
+/* Read the log-to-phys header info of the index covering REVISION from FS
+ * and return it in *HEADER.  To maximize efficiency, use or return the
+ * data stream in *STREAM.  Use POOL for allocations.
+ */
+static svn_error_t *
+get_l2p_header(l2p_header_t **header,
+               packed_number_stream_t **stream,
+               svn_fs_t *fs,
+               svn_revnum_t revision,
+               apr_pool_t *pool)
+{
+  fs_fs_data_t *ffd = fs->fsap_data;
+  svn_boolean_t is_cached = FALSE;
+
+  /* first, try cache lookop */
+  pair_cache_key_t key;
+  key.revision = base_revision(fs, revision);
+  key.second = svn_fs_fs__is_packed_rev(fs, revision);
+  SVN_ERR(svn_cache__get((void**)header, &is_cached, ffd->l2p_header_cache,
+                         &key, pool));
+  if (is_cached)
+    return SVN_NO_ERROR;
+
+  /* read from disk and cache the result */
+  SVN_ERR(get_l2p_header_body(header, stream, fs, revision, pool));
+
+  return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_fs_fs__l2p_get_max_ids(apr_array_header_t **max_ids,
+                           svn_fs_t *fs,
+                           svn_revnum_t start_rev,
+                           apr_size_t count,
+                           apr_pool_t *pool)
+{
+  l2p_header_t *header = NULL;
+  svn_revnum_t revision;
+  svn_revnum_t last_rev = (svn_revnum_t)(start_rev + count);
+  packed_number_stream_t *stream = NULL;
+  apr_pool_t *header_pool = svn_pool_create(pool);
+
+  /* read index master data structure for the index covering START_REV */
+  SVN_ERR(get_l2p_header(&header, &stream, fs, start_rev, header_pool));
+  SVN_ERR(packed_stream_close(stream));
+  stream = NULL;
+
+  /* Determine the length of the item index list for each rev.
+   * Read new index headers as required. */
+  *max_ids = apr_array_make(pool, (int)count, sizeof(apr_uint64_t));
+  for (revision = start_rev; revision < last_rev; ++revision)
+    {
+      apr_uint64_t full_page_count;
+      apr_uint64_t item_count;
+      apr_size_t first_page_index, last_page_index;
+
+      if (revision >= header->first_revision + header->revision_count)
+        {
+          /* need to read the next index. Clear up memory used for the
+           * previous one. */
+          svn_pool_clear(header_pool);
+          SVN_ERR(get_l2p_header(&header, &stream, fs, revision,
+                                 header_pool));
+          SVN_ERR(packed_stream_close(stream));
+          stream = NULL;
+        }
+
+      /* in a revision with N index pages, the first N-1 index pages are
+       * "full", i.e. contain HEADER->PAGE_SIZE entries */
+      first_page_index
+         = header->page_table_index[revision - header->first_revision];
+      last_page_index
+         = header->page_table_index[revision - header->first_revision + 1];
+      full_page_count = last_page_index - first_page_index - 1;
+      item_count = full_page_count * header->page_size
+                 + header->page_table[last_page_index - 1].entry_count;
+
+      APR_ARRAY_PUSH(*max_ids, apr_uint64_t) = item_count;
+    }
+
+  svn_pool_destroy(header_pool);
+  return SVN_NO_ERROR;
+}
+
 svn_error_t *
 svn_fs_fs__item_offset(apr_off_t *absolute_position,
                        svn_fs_t *fs,
@@ -2252,6 +2336,58 @@ svn_fs_fs__p2l_entry_lookup(svn_fs_fs__p
   return SVN_NO_ERROR;
 }
 
+/* Implements svn_cache__partial_getter_func_t for P2L headers, setting *OUT
+ * to the largest the first offset not covered by this P2L index.
+ */
+static svn_error_t *
+p2l_get_max_offset_func(void **out,
+                        const void *data,
+                        apr_size_t data_len,
+                        void *baton,
+                        apr_pool_t *result_pool)
+{
+  const p2l_header_t *header = data;
+  apr_off_t max_offset = header->page_size * header->page_count;
+  *out = apr_pmemdup(result_pool, &max_offset, sizeof(max_offset));
+
+  return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_fs_fs__p2l_get_max_offset(apr_off_t *offset,
+                              svn_fs_t *fs,
+                              svn_revnum_t revision,
+                              apr_pool_t *pool)
+{
+  fs_fs_data_t *ffd = fs->fsap_data;
+  packed_number_stream_t *stream = NULL;
+  p2l_header_t *header;
+  svn_boolean_t is_cached = FALSE;
+  apr_off_t *offset_p;
+
+  /* look for the header data in our cache */
+  pair_cache_key_t key;
+  key.revision = base_revision(fs, revision);
+  key.second = svn_fs_fs__is_packed_rev(fs, revision);
+
+  SVN_ERR(svn_cache__get_partial((void **)&offset_p, &is_cached,
+                                 ffd->p2l_header_cache, &key,
+                                 p2l_get_max_offset_func, NULL, pool));
+  if (is_cached)
+    {
+      *offset = *offset_p;
+      return SVN_NO_ERROR;
+    }
+
+  SVN_ERR(get_p2l_header(&header, &stream, fs, revision, pool, pool));
+  *offset = header->page_count * header->page_size;
+  
+  /* make sure we close files after usage */
+  SVN_ERR(packed_stream_close(stream));
+
+  return SVN_NO_ERROR;
+}
+
 /*
  * Standard (de-)serialization functions
  */

Modified: subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.h
URL: 
http://svn.apache.org/viewvc/subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.h?rev=1512240&r1=1512239&r2=1512240&view=diff
==============================================================================
--- subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.h 
(original)
+++ subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.h Fri Aug  
9 10:29:28 2013
@@ -181,6 +181,28 @@ svn_fs_fs__item_offset(apr_off_t *absolu
                        apr_uint64_t item_index,
                        apr_pool_t *pool);
 
+/* Use the log-to-phys indexes in FS to determine the maximum item indexes
+ * assigned to revision START_REV to START_REV + COUNT - 1.  That is a
+ * close upper limit to the actual number of items in the respective revs.
+ * Return the results in *MAX_IDS,  allocated in POOL.
+ */
+svn_error_t *
+svn_fs_fs__l2p_get_max_ids(apr_array_header_t **max_ids,
+                           svn_fs_t *fs,
+                           svn_revnum_t start_rev,
+                           apr_size_t count,
+                           apr_pool_t *pool);
+
+/* In *OFFSET, return the first OFFSET in the pack / rev file containing
+ * REVISION in FS not covered by the log-to-phys index.
+ * Use POOL for allocations.
+ */
+svn_error_t *
+svn_fs_fs__p2l_get_max_offset(apr_off_t *offset,
+                              svn_fs_t *fs,
+                              svn_revnum_t revision,
+                              apr_pool_t *pool);
+
 /* Serialization and caching interface
  */
 

Modified: subversion/branches/log-addressing/subversion/libsvn_fs_fs/verify.c
URL: 
http://svn.apache.org/viewvc/subversion/branches/log-addressing/subversion/libsvn_fs_fs/verify.c?rev=1512240&r1=1512239&r2=1512240&view=diff
==============================================================================
--- subversion/branches/log-addressing/subversion/libsvn_fs_fs/verify.c 
(original)
+++ subversion/branches/log-addressing/subversion/libsvn_fs_fs/verify.c Fri Aug 
 9 10:29:28 2013
@@ -20,12 +20,15 @@
  * ====================================================================
  */
 
+#include "svn_sorts.h"
+
 #include "verify.h"
 #include "fs_fs.h"
 
 #include "cached_data.h"
 #include "rep-cache.h"
 #include "util.h"
+#include "index.h"
 
 #include "../libsvn_fs/fs-loader.h"
 
@@ -154,6 +157,224 @@ verify_rep_cache(svn_fs_t *fs,
   return SVN_NO_ERROR;
 }
 
+/* Verify that for all log-to-phys index entries for revisions START to
+ * START + COUNT-1 in FS there is a consistent entry in the phys-to-log
+ * index.  If given, invoke CANCEL_FUNC with CANCEL_BATON at regular
+ * intervals. Use POOL for allocations.
+ */
+static svn_error_t *
+compare_l2p_to_p2l_index(svn_fs_t *fs,
+                         svn_revnum_t start,
+                         svn_revnum_t count,
+                         svn_cancel_func_t cancel_func,
+                         void *cancel_baton,
+                         apr_pool_t *pool)
+{
+  svn_revnum_t i;
+  apr_pool_t *iterpool = svn_pool_create(pool);
+  apr_array_header_t *max_ids;
+
+  /* determine the range of items to check for each revision */
+  SVN_ERR(svn_fs_fs__l2p_get_max_ids(&max_ids, fs, start, count, pool));
+
+  /* check all items in all revisions if the given range */
+  for (i = 0; i < max_ids->nelts; ++i)
+    {
+      apr_uint64_t k;
+      apr_uint64_t max_id = APR_ARRAY_IDX(max_ids, i, apr_uint64_t);
+      svn_revnum_t revision = start + i;
+
+      for (k = 0; k < max_id; ++k)
+        {
+          apr_off_t offset;
+          svn_fs_fs__p2l_entry_t *p2l_entry;
+
+          /* get L2P entry.  Ignore unused entries. */
+          SVN_ERR(svn_fs_fs__item_offset(&offset, fs,
+                                         revision, NULL, k, iterpool));
+          if (offset == -1)
+            continue;
+
+          /* find the corresponding P2L entry */
+          SVN_ERR(svn_fs_fs__p2l_entry_lookup(&p2l_entry, fs, start,
+                                              offset, iterpool));
+
+          if (p2l_entry == NULL)
+            return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_INCONSISTENT,
+                                     NULL,
+                                     _("p2l index entry not found for "
+                                       "PHYS %s returned by "
+                                       "l2p index for LOG r%ld:i%ld"),
+                                     apr_off_t_toa(pool, offset),
+                                     revision, (long)k);
+
+          if (   p2l_entry->item.number != k
+              || p2l_entry->item.revision != revision)
+            return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_INCONSISTENT,
+                                     NULL,
+                                     _("p2l index info LOG r%ld:i%ld"
+                                       " does not match "
+                                       "l2p index for LOG r%ld:i%ld"),
+                                     p2l_entry->item.revision,
+                                     (long)p2l_entry->item.number,
+                                     revision, (long)k);
+
+          svn_pool_clear(iterpool);
+        }
+
+      if (cancel_func)
+        SVN_ERR(cancel_func(cancel_baton));
+    }
+
+  svn_pool_destroy(iterpool);
+
+  return SVN_NO_ERROR;
+}
+
+/* Verify that for all phys-to-log index entries for revisions START to
+ * START + COUNT-1 in FS there is a consistent entry in the log-to-phys
+ * index.  If given, invoke CANCEL_FUNC with CANCEL_BATON at regular
+ * intervals. Use POOL for allocations.
+ *
+ * Please note that we can only check on pack / rev file granularity and
+ * must only be called for a single rev / pack file.
+ */
+static svn_error_t *
+compare_p2l_to_l2p_index(svn_fs_t *fs,
+                         svn_revnum_t start,
+                         svn_revnum_t count,
+                         svn_cancel_func_t cancel_func,
+                         void *cancel_baton,
+                         apr_pool_t *pool)
+{
+  apr_pool_t *iterpool = svn_pool_create(pool);
+  apr_off_t max_offset;
+  apr_off_t offset = 0;
+
+  /* get the size of the rev / pack file as covered by the P2L index */
+  SVN_ERR(svn_fs_fs__p2l_get_max_offset(&max_offset, fs, start, pool));
+
+  /* for all offsets in the file, get the P2L index entries and check
+     them against the L2P index */
+  for (offset = 0; offset < max_offset; )
+    {
+      apr_array_header_t *entries;
+      svn_fs_fs__p2l_entry_t *last_entry;
+      int i;
+
+      /* get all entries for the current block */
+      SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, fs, start, offset,
+                                          iterpool));
+      if (entries->nelts == 0)
+        return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_CORRUPTION,
+                                 NULL,
+                                 _("p2l does not cover offset %s"
+                                   " for revision %ld"),
+                                  apr_off_t_toa(pool, offset), start);
+
+      /* process all entries (and later continue with the next block) */
+      last_entry
+        = &APR_ARRAY_IDX(entries, entries->nelts-1, svn_fs_fs__p2l_entry_t);
+      offset = last_entry->offset + last_entry->size;
+      
+      for (i = 0; i < entries->nelts; ++i)
+        {
+          svn_fs_fs__p2l_entry_t *entry
+            = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
+
+          /* check all sub-items for consist entries in the L2P index */
+          if (entry->type != SVN_FS_FS__ITEM_TYPE_UNUSED)
+            {
+              apr_off_t l2p_offset;
+              SVN_ERR(svn_fs_fs__item_offset(&l2p_offset, fs,
+                                             entry->item.revision, NULL,
+                                             entry->item.number, iterpool));
+
+              if (l2p_offset != entry->offset)
+                return svn_error_createf(SVN_ERR_FS_ITEM_INDEX_INCONSISTENT,
+                                         NULL,
+                                         _("l2p index entry PHYS %s"
+                                           "does not match p2l index value "
+                                           "LOG r%ld:i%ld for PHYS %s"),
+                                         apr_off_t_toa(pool, l2p_offset),
+                                         entry->item.revision,
+                                         (long)entry->item.number,
+                                         apr_off_t_toa(pool, entry->offset));
+            }
+        }
+
+      svn_pool_clear(iterpool);
+
+      if (cancel_func)
+        SVN_ERR(cancel_func(cancel_baton));
+    }
+
+  svn_pool_destroy(iterpool);
+
+  return SVN_NO_ERROR;
+}
+
+static svn_revnum_t
+packed_base_rev(svn_fs_t *fs, svn_revnum_t rev)
+{
+  fs_fs_data_t *ffd = fs->fsap_data;
+
+  return rev < ffd->min_unpacked_rev
+       ? rev - (rev % ffd->max_files_per_dir)
+       : rev;
+}
+
+static svn_revnum_t
+pack_size(svn_fs_t *fs, svn_revnum_t rev)
+{
+  fs_fs_data_t *ffd = fs->fsap_data;
+
+  return rev < ffd->min_unpacked_rev ? ffd->max_files_per_dir : 1;
+}
+
+/* Verify that the log-to-phys indexes and phys-to-log indexes are
+ * consistent with each other.  The function signature is similar to
+ * svn_fs_fs__verify.
+ *
+ * The values of START and END have already been auto-selected and
+ * verified.  You may call this for format7 or higher repos.
+ */
+static svn_error_t *
+verify_index_consistency(svn_fs_t *fs,
+                         svn_revnum_t start,
+                         svn_revnum_t end,
+                         svn_fs_progress_notify_func_t notify_func,
+                         void *notify_baton,
+                         svn_cancel_func_t cancel_func,
+                         void *cancel_baton,
+                         apr_pool_t *pool)
+{
+  fs_fs_data_t *ffd = fs->fsap_data;
+  svn_revnum_t revision, pack_start, pack_end;
+  apr_pool_t *iterpool = svn_pool_create(pool);
+
+  for (revision = start; revision <= end; revision = pack_end)
+    {
+      pack_start = packed_base_rev(fs, revision);
+      pack_end = pack_start + pack_size(fs, revision);
+
+      if (notify_func && (pack_start % ffd->max_files_per_dir == 0))
+        notify_func(pack_start, notify_baton, iterpool);
+
+      /* two-way index check */
+      SVN_ERR(compare_l2p_to_p2l_index(fs, pack_start, pack_end - pack_start,
+                                       cancel_func, cancel_baton, iterpool));
+      SVN_ERR(compare_p2l_to_l2p_index(fs, pack_start, pack_end - pack_start,
+                                       cancel_func, cancel_baton, iterpool));
+
+      svn_pool_clear(iterpool);
+    }
+
+  svn_pool_destroy(iterpool);
+
+  return SVN_NO_ERROR;
+}
+
 svn_error_t *
 svn_fs_fs__verify(svn_fs_t *fs,
                   svn_revnum_t start,
@@ -175,6 +396,14 @@ svn_fs_fs__verify(svn_fs_t *fs,
   SVN_ERR(svn_fs_fs__ensure_revision_exists(start, fs, pool));
   SVN_ERR(svn_fs_fs__ensure_revision_exists(end, fs, pool));
 
+  /* log/phys index consistency.  We need to check them first to make
+     sure we can access the rev / pack files in format7. */
+  if (svn_fs_fs__use_log_addressing(fs, end))
+    SVN_ERR(verify_index_consistency(fs,
+                                     MAX(start, ffd->min_log_addressing_rev),
+                                     end, notify_func, notify_baton,
+                                     cancel_func, cancel_baton, pool));
+
   /* rep cache consistency */
   if (ffd->format >= SVN_FS_FS__MIN_REP_SHARING_FORMAT)
     SVN_ERR(verify_rep_cache(fs, start, end, notify_func, notify_baton,


Reply via email to