Author: stefan2
Date: Sun Aug 24 22:49:37 2014
New Revision: 1620224

URL: http://svn.apache.org/r1620224
Log:
On the revprop-caching-ng branch:  Detect and handle revprop generation
file read / write hazards.

At least in theory, even a small file write might get interrupted on some
OS allowing the read to "overtake" the write.  The reader will then see
only partially updated content.  We store a checksum of the serialized
generation number in the same file as the latter.  If both are consistent,
we can be reasonably sure that read contents is valid.

* subversion/include/svn_error_codes.h
  (SVN_ERR_FS_INVALID_GENERATION): Declare new error code.

* subversion/libsvn_fs_fs/revprops.c
  (GENERATION_READ_RETRY_COUNT,
   CHECKSUMMED_NUMBER_BUFFER_LEN): Define new constants.
  (checkedsummed_number): New code to serialize and checksum a number. 
  (verify_extract_number): New code to do the reverse.
  (read_revprop_generation_file): Read the new format and retry in
                                  case of an incomplete update.
  (write_revprop_generation_file,
   svn_fs_fs__reset_revprop_generation_file): Write the new format.

* subversion/tests/cmdline/svnadmin_tests.py
  (check_hotcopy_fsfs_fsx): Ignore the checksum when checking for revprop
                            generation 0.

Modified:
    subversion/branches/revprop-caching-ng/subversion/libsvn_fs_fs/revprops.c

Modified: 
subversion/branches/revprop-caching-ng/subversion/libsvn_fs_fs/revprops.c
URL: 
http://svn.apache.org/viewvc/subversion/branches/revprop-caching-ng/subversion/libsvn_fs_fs/revprops.c?rev=1620224&r1=1620223&r2=1620224&view=diff
==============================================================================
--- subversion/branches/revprop-caching-ng/subversion/libsvn_fs_fs/revprops.c 
(original)
+++ subversion/branches/revprop-caching-ng/subversion/libsvn_fs_fs/revprops.c 
Sun Aug 24 22:49:37 2014
@@ -21,6 +21,7 @@
  */
 
 #include <assert.h>
+#include <apr_md5.h>
 
 #include "svn_pools.h"
 #include "svn_hash.h"
@@ -41,6 +42,16 @@
    process got aborted and that we have re-read revprops. */
 #define REVPROP_CHANGE_TIMEOUT (10 * 1000000)
 
+/* In case of an inconsistent read, close the generation file, yield,
+   re-open and re-read.  This is the number of times we try this before
+   giving up. */
+#define GENERATION_READ_RETRY_COUNT 100
+
+/* Maximum size of the generation number file contents (including NUL). */
+#define CHECKSUMMED_NUMBER_BUFFER_LEN \
+           (SVN_INT64_BUFFER_SIZE + 3 + APR_MD5_DIGESTSIZE * 2)
+
+
 svn_error_t *
 svn_fs_fs__upgrade_pack_revprops(svn_fs_t *fs,
                                  svn_fs_upgrade_notify_t notify_func,
@@ -213,6 +224,62 @@ close_revprop_generation_file(svn_fs_t *
   return SVN_NO_ERROR;
 }
 
+/* Return the textual representation of NUMBER and its checksum in *BUFFER.
+ */
+static svn_error_t *
+checkedsummed_number(svn_stringbuf_t **buffer,
+                     apr_int64_t number,
+                     apr_pool_t *result_pool,
+                     apr_pool_t *scratch_pool)
+{
+  svn_checksum_t *checksum;
+  const char *digest;
+
+  char str[SVN_INT64_BUFFER_SIZE];
+  apr_size_t len = svn__i64toa(str, number);
+  str[len] = 0;
+
+  SVN_ERR(svn_checksum(&checksum, svn_checksum_md5, str, len, scratch_pool));
+  digest = svn_checksum_to_cstring_display(checksum, scratch_pool);
+
+  *buffer = svn_stringbuf_createf(result_pool, "%s %s\n", digest, str);
+
+  return SVN_NO_ERROR;
+}
+
+/* Extract the generation number from the text BUFFER of LEN bytes and
+ * verify it against the checksum in the same BUFFER.  If they match, return
+ * the generation in *NUMBER.  Otherwise, return an error.
+ * BUFFER does not need to be NUL-terminated.
+ */
+static svn_error_t *
+verify_extract_number(apr_int64_t *number,
+                      const char *buffer,
+                      apr_size_t len,
+                      apr_pool_t *scratch_pool)
+{
+  const char *digest_end = strchr(buffer, ' ');
+
+  /* Does the buffer even contain checksum _and_ number? */
+  if (digest_end != NULL)
+    {
+      svn_checksum_t *expected;
+      svn_checksum_t *actual;
+
+      SVN_ERR(svn_checksum_parse_hex(&expected, svn_checksum_md5, buffer,
+                                     scratch_pool));
+      SVN_ERR(svn_checksum(&actual, svn_checksum_md5, digest_end + 1,
+                           (buffer + len) - (digest_end + 1), scratch_pool));
+
+      if (svn_checksum_match(expected, actual))
+        return svn_error_trace(svn_cstring_atoi64(number, digest_end + 1));
+    }
+
+  /* Incomplete buffer or not a match. */
+  return svn_error_create(SVN_ERR_FS_INVALID_GENERATION, NULL,
+                          _("Invalid generation number data."));
+}
+
 /* Read revprop generation as stored on disk for repository FS. The result is
  * returned in *CURRENT.  Call only for repos that support revprop caching.
  */
@@ -222,25 +289,50 @@ read_revprop_generation_file(apr_int64_t
                              apr_pool_t *scratch_pool)
 {
   fs_fs_data_t *ffd = fs->fsap_data;
-  char buf[80];
+  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
+  char buf[CHECKSUMMED_NUMBER_BUFFER_LEN];
   apr_size_t len;
   apr_off_t offset = 0;
+  int i;
+  svn_error_t *err = SVN_NO_ERROR;
 
-  SVN_ERR(ensure_revprop_generation(fs, scratch_pool));
-  SVN_ERR(svn_io_file_seek(ffd->revprop_generation_file, APR_SET, &offset,
-                           scratch_pool));
+  /* Retry in case of incomplete file buffer updates. */
+  for (i = 0; i < GENERATION_READ_RETRY_COUNT; ++i)
+    {
+      svn_error_clear(err);
+      svn_pool_clear(iterpool);
 
-  len = sizeof(buf);
-  SVN_ERR(svn_io_read_length_line(ffd->revprop_generation_file, buf, &len,
-                                  scratch_pool));
+      /* If we can't even access the data, things are very wrong.
+       * Don't retry in that case.
+       */
+      SVN_ERR(ensure_revprop_generation(fs, iterpool));
+      SVN_ERR(svn_io_file_seek(ffd->revprop_generation_file, APR_SET, &offset,
+                              iterpool));
 
-  /* Check that the first line contains only digits. */
-/*  SVN_ERR(svn_fs_fs__check_file_buffer_numeric(buf, 0,
-                                               ffd->revprop_generation_file,
-                                               "Revprop Generation", pool));*/
-  SVN_ERR(svn_cstring_atoi64(current, buf));
+      len = sizeof(buf);
+      SVN_ERR(svn_io_read_length_line(ffd->revprop_generation_file, buf, &len,
+                                      iterpool));
+
+      /* Some data has been read.  It will most likely be complete and
+       * consistent.  Extract and verify anyway. */
+      err = verify_extract_number(current, buf, len, iterpool);
+      if (!err)
+        break;
+
+      /* Got unlucky and data was invalid.  Retry. */
+      SVN_ERR(close_revprop_generation_file(fs, iterpool));
+
+#if APR_HAS_THREADS
+      apr_thread_yield();
+#else
+      apr_sleep(0);
+#endif
+    }
 
-  return SVN_NO_ERROR;
+  svn_pool_destroy(iterpool);
+
+  /* If we had to give up, propagate the error. */
+  return svn_error_trace(err);
 }
 
 /* Write the CURRENT revprop generation to disk for repository FS.
@@ -252,18 +344,18 @@ write_revprop_generation_file(svn_fs_t *
                               apr_pool_t *scratch_pool)
 {
   fs_fs_data_t *ffd = fs->fsap_data;
-  char buf[SVN_INT64_BUFFER_SIZE];
+  svn_stringbuf_t *buffer;
   apr_off_t offset = 0;
 
-  apr_size_t len = svn__i64toa(buf, current);
-  buf[len] = '\n';
+  SVN_ERR(checkedsummed_number(&buffer, current, scratch_pool, scratch_pool));
 
   SVN_ERR(ensure_revprop_generation(fs, scratch_pool));
   SVN_ERR(svn_io_file_seek(ffd->revprop_generation_file, APR_SET, &offset,
                            scratch_pool));
-  SVN_ERR(svn_io_file_write_full(ffd->revprop_generation_file, buf, len + 1,
-                                 NULL, scratch_pool));
-  SVN_ERR(svn_io_file_flush_to_disk(ffd->revprop_generation_file, 
scratch_pool));
+  SVN_ERR(svn_io_file_write_full(ffd->revprop_generation_file, buffer->data,
+                                 buffer->len, NULL, scratch_pool));
+  SVN_ERR(svn_io_file_flush_to_disk(ffd->revprop_generation_file,
+                                    scratch_pool));
 
   return SVN_NO_ERROR;
 }
@@ -288,7 +380,13 @@ svn_fs_fs__reset_revprop_generation_file
    * the current format.  This ensures consistent on-disk state for new
    * format repositories. */
   if (ffd->format >= SVN_FS_FS__MIN_REVPROP_CACHING_FORMAT)
-    SVN_ERR(svn_io_write_atomic(path, "0\n", 2, NULL, scratch_pool));
+    {
+      svn_stringbuf_t *buffer;
+
+      SVN_ERR(checkedsummed_number(&buffer, 0, scratch_pool, scratch_pool));
+      SVN_ERR(svn_io_write_atomic(path, buffer->data, buffer->len, NULL,
+                                  scratch_pool));
+    }
 
   /* ffd->revprop_generation_file will be re-opened on demand. */
 


Reply via email to