Author: stefan2 Date: Tue Jul 16 15:00:49 2013 New Revision: 1503742 URL: http://svn.apache.org/r1503742 Log: On the fsfs-improvements branch: Move recovery code out of fs_fs.* into a new, separate recovery.* pair of files. This requires the creation of two new utility APIs, svn_fs_fs__write_current and svn_fs_fs__find_max_ids. The latter is a wrapper around the existing recover_find_max_ids code.
* subversion/libsvn_fs_fs/util.h (svn_fs_fs__write_current): declare new private API * subversion/libsvn_fs_fs/util.c (svn_fs_fs__write_current): code moved over from fs_fs.c * subversion/libsvn_fs_fs/recovery.h (svn_fs_fs__find_max_ids): declare new private API (svn_fs_fs__recover): declaration moved over from fs_fs.h * subversion/libsvn_fs_fs/util.c (recover_read_from_file_baton, recover_baton): types moved over from fs_fs.c (recover_get_largest_revision, read_handler_recover, recover_find_max_ids, recover_body): static functions moved over from fs_fs.c (svn_fs_fs__find_max_ids): implement new API function (svn_fs_fs__recover): code moved over from fs_fs.c * subversion/libsvn_fs_fs/fs.c (): #include new header * subversion/libsvn_fs_fs/fs_fs.c (): #include new header (write_current): moved to utils.c (recover_read_from_file_baton, recover_baton, recover_get_largest_revision, read_handler_recover, recover_find_max_ids, recover_body, svn_fs_fs__recover): moved to recovery.c (write_final_current): update caller (hotcopy_update_current): simplify using the new API * subversion/libsvn_fs_fs/fs_fs.h (svn_fs_fs__recover): moved to recovery.h Added: subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/recovery.c subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/recovery.h Modified: subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/fs.c subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/fs_fs.c subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/fs_fs.h subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/util.c subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/util.h Modified: subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/fs.c URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/fs.c?rev=1503742&r1=1503741&r2=1503742&view=diff ============================================================================== --- subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/fs.c (original) +++ subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/fs.c Tue Jul 16 15:00:49 2013 @@ -39,6 +39,7 @@ #include "lock.h" #include "id.h" #include "pack.h" +#include "recovery.h" #include "rep-cache.h" #include "revprops.h" #include "verify.h" Modified: subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/fs_fs.c URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/fs_fs.c?rev=1503742&r1=1503741&r2=1503742&view=diff ============================================================================== --- subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/fs_fs.c (original) +++ subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/fs_fs.c Tue Jul 16 15:00:49 2013 @@ -60,6 +60,7 @@ #include "id.h" #include "low_level.h" #include "pack.h" +#include "recovery.h" #include "rep-cache.h" #include "revprops.h" #include "temp_serializer.h" @@ -3603,31 +3604,6 @@ write_final_changed_path_info(apr_off_t return SVN_NO_ERROR; } -/* Atomically update the 'current' file to hold the specifed REV, - NEXT_NODE_ID, and NEXT_COPY_ID. (The two next-ID parameters are - ignored and may be NULL if the FS format does not use them.) - Perform temporary allocations in POOL. */ -static svn_error_t * -write_current(svn_fs_t *fs, svn_revnum_t rev, const char *next_node_id, - const char *next_copy_id, apr_pool_t *pool) -{ - char *buf; - const char *name; - fs_fs_data_t *ffd = fs->fsap_data; - - /* Now we can just write out this line. */ - if (ffd->format >= SVN_FS_FS__MIN_NO_GLOBAL_IDS_FORMAT) - buf = apr_psprintf(pool, "%ld\n", rev); - else - buf = apr_psprintf(pool, "%ld %s %s\n", rev, next_node_id, next_copy_id); - - name = svn_fs_fs__path_current(fs, pool); - SVN_ERR(svn_io_write_atomic(name, buf, strlen(buf), - name /* copy_perms_path */, pool)); - - return SVN_NO_ERROR; -} - /* Open a new svn_fs_t handle to FS, set that handle's concept of "current youngest revision" to NEW_REV, and call svn_fs_fs__verify_root() on NEW_REV's revision root. @@ -3690,7 +3666,7 @@ write_final_current(svn_fs_t *fs, fs_fs_data_t *ffd = fs->fsap_data; if (ffd->format >= SVN_FS_FS__MIN_NO_GLOBAL_IDS_FORMAT) - return write_current(fs, rev, NULL, NULL, pool); + return svn_fs_fs__write_current(fs, rev, NULL, NULL, pool); /* To find the next available ids, we add the id that used to be in the 'current' file, to the next ids from the transaction file. */ @@ -3699,7 +3675,7 @@ write_final_current(svn_fs_t *fs, svn_fs_fs__add_keys(start_node_id, txn_node_id, new_node_id); svn_fs_fs__add_keys(start_copy_id, txn_copy_id, new_copy_id); - return write_current(fs, rev, new_node_id, new_copy_id, pool); + return svn_fs_fs__write_current(fs, rev, new_node_id, new_copy_id, pool); } /* Verify that the user registed with FS has all the locks necessary to @@ -4189,424 +4165,6 @@ svn_fs_fs__create(svn_fs_t *fs, return SVN_NO_ERROR; } -/* Part of the recovery procedure. Return the largest revision *REV in - filesystem FS. Use POOL for temporary allocation. */ -static svn_error_t * -recover_get_largest_revision(svn_fs_t *fs, svn_revnum_t *rev, apr_pool_t *pool) -{ - /* Discovering the largest revision in the filesystem would be an - expensive operation if we did a readdir() or searched linearly, - so we'll do a form of binary search. left is a revision that we - know exists, right a revision that we know does not exist. */ - apr_pool_t *iterpool; - svn_revnum_t left, right = 1; - - iterpool = svn_pool_create(pool); - /* Keep doubling right, until we find a revision that doesn't exist. */ - while (1) - { - svn_error_t *err; - apr_file_t *file; - - err = svn_fs_fs__open_pack_or_rev_file(&file, fs, right, iterpool); - svn_pool_clear(iterpool); - - if (err && err->apr_err == SVN_ERR_FS_NO_SUCH_REVISION) - { - svn_error_clear(err); - break; - } - else - SVN_ERR(err); - - right <<= 1; - } - - left = right >> 1; - - /* We know that left exists and right doesn't. Do a normal bsearch to find - the last revision. */ - while (left + 1 < right) - { - svn_revnum_t probe = left + ((right - left) / 2); - svn_error_t *err; - apr_file_t *file; - - err = svn_fs_fs__open_pack_or_rev_file(&file, fs, probe, iterpool); - svn_pool_clear(iterpool); - - if (err && err->apr_err == SVN_ERR_FS_NO_SUCH_REVISION) - { - svn_error_clear(err); - right = probe; - } - else - { - SVN_ERR(err); - left = probe; - } - } - - svn_pool_destroy(iterpool); - - /* left is now the largest revision that exists. */ - *rev = left; - return SVN_NO_ERROR; -} - -/* A baton for reading a fixed amount from an open file. For - recover_find_max_ids() below. */ -struct recover_read_from_file_baton -{ - apr_file_t *file; - apr_pool_t *pool; - apr_off_t remaining; -}; - -/* A stream read handler used by recover_find_max_ids() below. - Read and return at most BATON->REMAINING bytes from the stream, - returning nothing after that to indicate EOF. */ -static svn_error_t * -read_handler_recover(void *baton, char *buffer, apr_size_t *len) -{ - struct recover_read_from_file_baton *b = baton; - svn_filesize_t bytes_to_read = *len; - - if (b->remaining == 0) - { - /* Return a successful read of zero bytes to signal EOF. */ - *len = 0; - return SVN_NO_ERROR; - } - - if (bytes_to_read > b->remaining) - bytes_to_read = b->remaining; - b->remaining -= bytes_to_read; - - return svn_io_file_read_full2(b->file, buffer, (apr_size_t) bytes_to_read, - len, NULL, b->pool); -} - -/* Part of the recovery procedure. Read the directory noderev at offset - OFFSET of file REV_FILE (the revision file of revision REV of - filesystem FS), and set MAX_NODE_ID and MAX_COPY_ID to be the node-id - and copy-id of that node, if greater than the current value stored - in either. Recurse into any child directories that were modified in - this revision. - - MAX_NODE_ID and MAX_COPY_ID must be arrays of at least MAX_KEY_SIZE. - - Perform temporary allocation in POOL. */ -static svn_error_t * -recover_find_max_ids(svn_fs_t *fs, svn_revnum_t rev, - apr_file_t *rev_file, apr_off_t offset, - char *max_node_id, char *max_copy_id, - apr_pool_t *pool) -{ - svn_fs_fs__rep_header_t *header; - struct recover_read_from_file_baton baton; - svn_stream_t *stream; - apr_hash_t *entries; - apr_hash_index_t *hi; - apr_pool_t *iterpool; - node_revision_t *noderev; - - stream = svn_stream_from_aprfile2(rev_file, TRUE, pool); - SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &offset, pool)); - SVN_ERR(svn_fs_fs__read_noderev(&noderev, stream, pool)); - - /* Check that this is a directory. It should be. */ - if (noderev->kind != svn_node_dir) - return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, - _("Recovery encountered a non-directory node")); - - /* Get the data location. No data location indicates an empty directory. */ - if (!noderev->data_rep) - return SVN_NO_ERROR; - - /* If the directory's data representation wasn't changed in this revision, - we've already scanned the directory's contents for noderevs, so we don't - need to again. This will occur if a property is changed on a directory - without changing the directory's contents. */ - if (noderev->data_rep->revision != rev) - return SVN_NO_ERROR; - - /* We could use get_dir_contents(), but this is much cheaper. It does - rely on directory entries being stored as PLAIN reps, though. */ - offset = noderev->data_rep->offset; - SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &offset, pool)); - SVN_ERR(svn_fs_fs__read_rep_header(&header, stream, pool)); - if (header->type != svn_fs_fs__rep_plain) - return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, - _("Recovery encountered a deltified directory " - "representation")); - - /* Now create a stream that's allowed to read only as much data as is - stored in the representation. */ - baton.file = rev_file; - baton.pool = pool; - baton.remaining = (apr_size_t) noderev->data_rep->expanded_size; - stream = svn_stream_create(&baton, pool); - svn_stream_set_read(stream, read_handler_recover); - - /* Now read the entries from that stream. */ - entries = apr_hash_make(pool); - SVN_ERR(svn_hash_read2(entries, stream, SVN_HASH_TERMINATOR, pool)); - SVN_ERR(svn_stream_close(stream)); - - /* Now check each of the entries in our directory to find new node and - copy ids, and recurse into new subdirectories. */ - iterpool = svn_pool_create(pool); - for (hi = apr_hash_first(pool, entries); hi; hi = apr_hash_next(hi)) - { - char *str_val; - char *str; - svn_node_kind_t kind; - svn_fs_id_t *id; - const char *node_id, *copy_id; - apr_off_t child_dir_offset; - const svn_string_t *path = svn__apr_hash_index_val(hi); - - svn_pool_clear(iterpool); - - str_val = apr_pstrdup(iterpool, path->data); - - str = svn_cstring_tokenize(" ", &str_val); - if (str == NULL) - return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, - _("Directory entry corrupt")); - - if (strcmp(str, SVN_FS_FS__KIND_FILE) == 0) - kind = svn_node_file; - else if (strcmp(str, SVN_FS_FS__KIND_DIR) == 0) - kind = svn_node_dir; - else - { - return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, - _("Directory entry corrupt")); - } - - str = svn_cstring_tokenize(" ", &str_val); - if (str == NULL) - return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, - _("Directory entry corrupt")); - - id = svn_fs_fs__id_parse(str, strlen(str), iterpool); - - if (svn_fs_fs__id_rev(id) != rev) - { - /* If the node wasn't modified in this revision, we've already - checked the node and copy id. */ - continue; - } - - node_id = svn_fs_fs__id_node_id(id); - copy_id = svn_fs_fs__id_copy_id(id); - - if (svn_fs_fs__key_compare(node_id, max_node_id) > 0) - { - SVN_ERR_ASSERT(strlen(node_id) < MAX_KEY_SIZE); - apr_cpystrn(max_node_id, node_id, MAX_KEY_SIZE); - } - if (svn_fs_fs__key_compare(copy_id, max_copy_id) > 0) - { - SVN_ERR_ASSERT(strlen(copy_id) < MAX_KEY_SIZE); - apr_cpystrn(max_copy_id, copy_id, MAX_KEY_SIZE); - } - - if (kind == svn_node_file) - continue; - - child_dir_offset = svn_fs_fs__id_offset(id); - SVN_ERR(recover_find_max_ids(fs, rev, rev_file, child_dir_offset, - max_node_id, max_copy_id, iterpool)); - } - svn_pool_destroy(iterpool); - - return SVN_NO_ERROR; -} - -/* Baton used for recover_body below. */ -struct recover_baton { - svn_fs_t *fs; - svn_cancel_func_t cancel_func; - void *cancel_baton; -}; - -/* The work-horse for svn_fs_fs__recover, called with the FS - write lock. This implements the svn_fs_fs__with_write_lock() - 'body' callback type. BATON is a 'struct recover_baton *'. */ -static svn_error_t * -recover_body(void *baton, apr_pool_t *pool) -{ - struct recover_baton *b = baton; - svn_fs_t *fs = b->fs; - fs_fs_data_t *ffd = fs->fsap_data; - svn_revnum_t max_rev; - char next_node_id_buf[MAX_KEY_SIZE], next_copy_id_buf[MAX_KEY_SIZE]; - char *next_node_id = NULL, *next_copy_id = NULL; - svn_revnum_t youngest_rev; - svn_node_kind_t youngest_revprops_kind; - - /* Lose potentially corrupted data in temp files */ - SVN_ERR(svn_fs_fs__cleanup_revprop_namespace(fs)); - - /* We need to know the largest revision in the filesystem. */ - SVN_ERR(recover_get_largest_revision(fs, &max_rev, pool)); - - /* Get the expected youngest revision */ - SVN_ERR(get_youngest(&youngest_rev, fs->path, pool)); - - /* Policy note: - - Since the revprops file is written after the revs file, the true - maximum available revision is the youngest one for which both are - present. That's probably the same as the max_rev we just found, - but if it's not, we could, in theory, repeatedly decrement - max_rev until we find a revision that has both a revs and - revprops file, then write db/current with that. - - But we choose not to. If a repository is so corrupt that it's - missing at least one revprops file, we shouldn't assume that the - youngest revision for which both the revs and revprops files are - present is healthy. In other words, we're willing to recover - from a missing or out-of-date db/current file, because db/current - is truly redundant -- it's basically a cache so we don't have to - find max_rev each time, albeit a cache with unusual semantics, - since it also officially defines when a revision goes live. But - if we're missing more than the cache, it's time to back out and - let the admin reconstruct things by hand: correctness at that - point may depend on external things like checking a commit email - list, looking in particular working copies, etc. - - This policy matches well with a typical naive backup scenario. - Say you're rsyncing your FSFS repository nightly to the same - location. Once revs and revprops are written, you've got the - maximum rev; if the backup should bomb before db/current is - written, then db/current could stay arbitrarily out-of-date, but - we can still recover. It's a small window, but we might as well - do what we can. */ - - /* Even if db/current were missing, it would be created with 0 by - get_youngest(), so this conditional remains valid. */ - if (youngest_rev > max_rev) - return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, - _("Expected current rev to be <= %ld " - "but found %ld"), max_rev, youngest_rev); - - /* We only need to search for maximum IDs for old FS formats which - se global ID counters. */ - if (ffd->format < SVN_FS_FS__MIN_NO_GLOBAL_IDS_FORMAT) - { - /* Next we need to find the maximum node id and copy id in use across the - filesystem. Unfortunately, the only way we can get this information - is to scan all the noderevs of all the revisions and keep track as - we go along. */ - svn_revnum_t rev; - apr_pool_t *iterpool = svn_pool_create(pool); - char max_node_id[MAX_KEY_SIZE] = "0", max_copy_id[MAX_KEY_SIZE] = "0"; - apr_size_t len; - - for (rev = 0; rev <= max_rev; rev++) - { - apr_file_t *rev_file; - apr_off_t root_offset; - svn_fs_id_t *root_id; - - svn_pool_clear(iterpool); - - if (b->cancel_func) - SVN_ERR(b->cancel_func(b->cancel_baton)); - - SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, rev, - iterpool)); - SVN_ERR(svn_fs_fs__rev_get_root(&root_id, fs, rev, iterpool)); - - root_offset = svn_fs_fs__id_offset(root_id); - SVN_ERR(recover_find_max_ids(fs, rev, rev_file, root_offset, - max_node_id, max_copy_id, iterpool)); - SVN_ERR(svn_io_file_close(rev_file, iterpool)); - } - svn_pool_destroy(iterpool); - - /* Now that we finally have the maximum revision, node-id and copy-id, we - can bump the two ids to get the next of each. */ - len = strlen(max_node_id); - svn_fs_fs__next_key(max_node_id, &len, next_node_id_buf); - next_node_id = next_node_id_buf; - len = strlen(max_copy_id); - svn_fs_fs__next_key(max_copy_id, &len, next_copy_id_buf); - next_copy_id = next_copy_id_buf; - } - - /* Before setting current, verify that there is a revprops file - for the youngest revision. (Issue #2992) */ - SVN_ERR(svn_io_check_path(svn_fs_fs__path_revprops(fs, max_rev, pool), - &youngest_revprops_kind, pool)); - if (youngest_revprops_kind == svn_node_none) - { - svn_boolean_t missing = TRUE; - if (!svn_fs_fs__packed_revprop_available(&missing, fs, max_rev, pool)) - { - if (missing) - { - return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, - _("Revision %ld has a revs file but no " - "revprops file"), - max_rev); - } - else - { - return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, - _("Revision %ld has a revs file but the " - "revprops file is inaccessible"), - max_rev); - } - } - } - else if (youngest_revprops_kind != svn_node_file) - { - return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, - _("Revision %ld has a non-file where its " - "revprops file should be"), - max_rev); - } - - /* Prune younger-than-(newfound-youngest) revisions from the rep - cache if sharing is enabled taking care not to create the cache - if it does not exist. */ - if (ffd->rep_sharing_allowed) - { - svn_boolean_t rep_cache_exists; - - SVN_ERR(svn_fs_fs__exists_rep_cache(&rep_cache_exists, fs, pool)); - if (rep_cache_exists) - SVN_ERR(svn_fs_fs__del_rep_reference(fs, max_rev, pool)); - } - - /* Now store the discovered youngest revision, and the next IDs if - relevant, in a new 'current' file. */ - return write_current(fs, max_rev, next_node_id, next_copy_id, pool); -} - -/* This implements the fs_library_vtable_t.recover() API. */ -svn_error_t * -svn_fs_fs__recover(svn_fs_t *fs, - svn_cancel_func_t cancel_func, void *cancel_baton, - apr_pool_t *pool) -{ - struct recover_baton b; - - /* We have no way to take out an exclusive lock in FSFS, so we're - restricted as to the types of recovery we can do. Luckily, - we just want to recreate the 'current' file, and we can do that just - by blocking other writers. */ - b.fs = fs; - b.cancel_func = cancel_func; - b.cancel_baton = cancel_baton; - return svn_fs_fs__with_write_lock(fs, recover_body, &b, pool); -} - svn_error_t * svn_fs_fs__set_uuid(svn_fs_t *fs, const char *uuid, @@ -5415,29 +4973,13 @@ hotcopy_update_current(svn_revnum_t *dst /* If necessary, get new current next_node and next_copy IDs. */ if (dst_ffd->format < SVN_FS_FS__MIN_NO_GLOBAL_IDS_FORMAT) - { - apr_off_t root_offset; - apr_file_t *rev_file; - svn_fs_id_t *root_id; - - if (dst_ffd->format >= SVN_FS_FS__MIN_PACKED_FORMAT) - SVN_ERR(svn_fs_fs__update_min_unpacked_rev(dst_fs, scratch_pool)); - - SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, dst_fs, - new_youngest, scratch_pool)); - SVN_ERR(svn_fs_fs__rev_get_root(&root_id, dst_fs, new_youngest, - scratch_pool)); - - root_offset = svn_fs_fs__id_offset(root_id); - SVN_ERR(recover_find_max_ids(dst_fs, new_youngest, rev_file, - root_offset, next_node_id, next_copy_id, - scratch_pool)); - SVN_ERR(svn_io_file_close(rev_file, scratch_pool)); - } + SVN_ERR(svn_fs_fs__find_max_ids(dst_fs, new_youngest, + next_node_id, next_copy_id, + scratch_pool)); /* Update 'current'. */ - SVN_ERR(write_current(dst_fs, new_youngest, next_node_id, next_copy_id, - scratch_pool)); + SVN_ERR(svn_fs_fs__write_current(dst_fs, new_youngest, next_node_id, + next_copy_id, scratch_pool)); *dst_youngest = new_youngest; Modified: subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/fs_fs.h URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/fs_fs.h?rev=1503742&r1=1503741&r2=1503742&view=diff ============================================================================== --- subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/fs_fs.h (original) +++ subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/fs_fs.h Tue Jul 16 15:00:49 2013 @@ -57,14 +57,6 @@ svn_error_t * svn_fs_fs__hotcopy(svn_fs_ void *cancel_baton, apr_pool_t *pool); -/* Recover the fsfs associated with filesystem FS. - Use optional CANCEL_FUNC/CANCEL_BATON for cancellation support. - Use POOL for temporary allocations. */ -svn_error_t *svn_fs_fs__recover(svn_fs_t *fs, - svn_cancel_func_t cancel_func, - void *cancel_baton, - apr_pool_t *pool); - /* Store NODEREV as the node-revision for the node whose id is ID in FS, after setting its is_fresh_txn_root to FRESH_TXN_ROOT. Do any necessary temporary allocation in POOL. */ Added: subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/recovery.c URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/recovery.c?rev=1503742&view=auto ============================================================================== --- subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/recovery.c (added) +++ subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/recovery.c Tue Jul 16 15:00:49 2013 @@ -0,0 +1,481 @@ +/* recovery.c --- FSFS recovery functionality +* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#include "recovery.h" + +#include "svn_hash.h" +#include "svn_pools.h" +#include "private/svn_string_private.h" + +#include "key-gen.h" +#include "low_level.h" +#include "rep-cache.h" +#include "revprops.h" +#include "util.h" +#include "cached_data.h" + +#include "../libsvn_fs/fs-loader.h" + +#include "svn_private_config.h" + +/* Part of the recovery procedure. Return the largest revision *REV in + filesystem FS. Use POOL for temporary allocation. */ +static svn_error_t * +recover_get_largest_revision(svn_fs_t *fs, svn_revnum_t *rev, apr_pool_t *pool) +{ + /* Discovering the largest revision in the filesystem would be an + expensive operation if we did a readdir() or searched linearly, + so we'll do a form of binary search. left is a revision that we + know exists, right a revision that we know does not exist. */ + apr_pool_t *iterpool; + svn_revnum_t left, right = 1; + + iterpool = svn_pool_create(pool); + /* Keep doubling right, until we find a revision that doesn't exist. */ + while (1) + { + svn_error_t *err; + apr_file_t *file; + + err = svn_fs_fs__open_pack_or_rev_file(&file, fs, right, iterpool); + svn_pool_clear(iterpool); + + if (err && err->apr_err == SVN_ERR_FS_NO_SUCH_REVISION) + { + svn_error_clear(err); + break; + } + else + SVN_ERR(err); + + right <<= 1; + } + + left = right >> 1; + + /* We know that left exists and right doesn't. Do a normal bsearch to find + the last revision. */ + while (left + 1 < right) + { + svn_revnum_t probe = left + ((right - left) / 2); + svn_error_t *err; + apr_file_t *file; + + err = svn_fs_fs__open_pack_or_rev_file(&file, fs, probe, iterpool); + svn_pool_clear(iterpool); + + if (err && err->apr_err == SVN_ERR_FS_NO_SUCH_REVISION) + { + svn_error_clear(err); + right = probe; + } + else + { + SVN_ERR(err); + left = probe; + } + } + + svn_pool_destroy(iterpool); + + /* left is now the largest revision that exists. */ + *rev = left; + return SVN_NO_ERROR; +} + +/* A baton for reading a fixed amount from an open file. For + recover_find_max_ids() below. */ +struct recover_read_from_file_baton +{ + apr_file_t *file; + apr_pool_t *pool; + apr_off_t remaining; +}; + +/* A stream read handler used by recover_find_max_ids() below. + Read and return at most BATON->REMAINING bytes from the stream, + returning nothing after that to indicate EOF. */ +static svn_error_t * +read_handler_recover(void *baton, char *buffer, apr_size_t *len) +{ + struct recover_read_from_file_baton *b = baton; + svn_filesize_t bytes_to_read = *len; + + if (b->remaining == 0) + { + /* Return a successful read of zero bytes to signal EOF. */ + *len = 0; + return SVN_NO_ERROR; + } + + if (bytes_to_read > b->remaining) + bytes_to_read = b->remaining; + b->remaining -= bytes_to_read; + + return svn_io_file_read_full2(b->file, buffer, (apr_size_t) bytes_to_read, + len, NULL, b->pool); +} + +/* Part of the recovery procedure. Read the directory noderev at offset + OFFSET of file REV_FILE (the revision file of revision REV of + filesystem FS), and set MAX_NODE_ID and MAX_COPY_ID to be the node-id + and copy-id of that node, if greater than the current value stored + in either. Recurse into any child directories that were modified in + this revision. + + MAX_NODE_ID and MAX_COPY_ID must be arrays of at least MAX_KEY_SIZE. + + Perform temporary allocation in POOL. */ +static svn_error_t * +recover_find_max_ids(svn_fs_t *fs, svn_revnum_t rev, + apr_file_t *rev_file, apr_off_t offset, + char *max_node_id, char *max_copy_id, + apr_pool_t *pool) +{ + svn_fs_fs__rep_header_t *header; + struct recover_read_from_file_baton baton; + svn_stream_t *stream; + apr_hash_t *entries; + apr_hash_index_t *hi; + apr_pool_t *iterpool; + node_revision_t *noderev; + + stream = svn_stream_from_aprfile2(rev_file, TRUE, pool); + SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &offset, pool)); + SVN_ERR(svn_fs_fs__read_noderev(&noderev, stream, pool)); + + /* Check that this is a directory. It should be. */ + if (noderev->kind != svn_node_dir) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Recovery encountered a non-directory node")); + + /* Get the data location. No data location indicates an empty directory. */ + if (!noderev->data_rep) + return SVN_NO_ERROR; + + /* If the directory's data representation wasn't changed in this revision, + we've already scanned the directory's contents for noderevs, so we don't + need to again. This will occur if a property is changed on a directory + without changing the directory's contents. */ + if (noderev->data_rep->revision != rev) + return SVN_NO_ERROR; + + /* We could use get_dir_contents(), but this is much cheaper. It does + rely on directory entries being stored as PLAIN reps, though. */ + offset = noderev->data_rep->offset; + SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &offset, pool)); + SVN_ERR(svn_fs_fs__read_rep_header(&header, stream, pool)); + if (header->type != svn_fs_fs__rep_plain) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Recovery encountered a deltified directory " + "representation")); + + /* Now create a stream that's allowed to read only as much data as is + stored in the representation. */ + baton.file = rev_file; + baton.pool = pool; + baton.remaining = (apr_size_t) noderev->data_rep->expanded_size; + stream = svn_stream_create(&baton, pool); + svn_stream_set_read(stream, read_handler_recover); + + /* Now read the entries from that stream. */ + entries = apr_hash_make(pool); + SVN_ERR(svn_hash_read2(entries, stream, SVN_HASH_TERMINATOR, pool)); + SVN_ERR(svn_stream_close(stream)); + + /* Now check each of the entries in our directory to find new node and + copy ids, and recurse into new subdirectories. */ + iterpool = svn_pool_create(pool); + for (hi = apr_hash_first(pool, entries); hi; hi = apr_hash_next(hi)) + { + char *str_val; + char *str; + svn_node_kind_t kind; + svn_fs_id_t *id; + const char *node_id, *copy_id; + apr_off_t child_dir_offset; + const svn_string_t *path = svn__apr_hash_index_val(hi); + + svn_pool_clear(iterpool); + + str_val = apr_pstrdup(iterpool, path->data); + + str = svn_cstring_tokenize(" ", &str_val); + if (str == NULL) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Directory entry corrupt")); + + if (strcmp(str, SVN_FS_FS__KIND_FILE) == 0) + kind = svn_node_file; + else if (strcmp(str, SVN_FS_FS__KIND_DIR) == 0) + kind = svn_node_dir; + else + { + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Directory entry corrupt")); + } + + str = svn_cstring_tokenize(" ", &str_val); + if (str == NULL) + return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, + _("Directory entry corrupt")); + + id = svn_fs_fs__id_parse(str, strlen(str), iterpool); + + if (svn_fs_fs__id_rev(id) != rev) + { + /* If the node wasn't modified in this revision, we've already + checked the node and copy id. */ + continue; + } + + node_id = svn_fs_fs__id_node_id(id); + copy_id = svn_fs_fs__id_copy_id(id); + + if (svn_fs_fs__key_compare(node_id, max_node_id) > 0) + { + SVN_ERR_ASSERT(strlen(node_id) < MAX_KEY_SIZE); + apr_cpystrn(max_node_id, node_id, MAX_KEY_SIZE); + } + if (svn_fs_fs__key_compare(copy_id, max_copy_id) > 0) + { + SVN_ERR_ASSERT(strlen(copy_id) < MAX_KEY_SIZE); + apr_cpystrn(max_copy_id, copy_id, MAX_KEY_SIZE); + } + + if (kind == svn_node_file) + continue; + + child_dir_offset = svn_fs_fs__id_offset(id); + SVN_ERR(recover_find_max_ids(fs, rev, rev_file, child_dir_offset, + max_node_id, max_copy_id, iterpool)); + } + svn_pool_destroy(iterpool); + + return SVN_NO_ERROR; +} + +svn_error_t * +svn_fs_fs__find_max_ids(svn_fs_t *fs, svn_revnum_t youngest, + char *max_node_id, char *max_copy_id, + apr_pool_t *pool) +{ + fs_fs_data_t *ffd = fs->fsap_data; + apr_off_t root_offset; + apr_file_t *rev_file; + svn_fs_id_t *root_id; + + /* call this function for old repo formats only */ + SVN_ERR_ASSERT(ffd->format < SVN_FS_FS__MIN_NO_GLOBAL_IDS_FORMAT); + + SVN_ERR(svn_fs_fs__rev_get_root(&root_id, fs, youngest, pool)); + root_offset = svn_fs_fs__id_offset(root_id); + + SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, youngest, pool)); + SVN_ERR(recover_find_max_ids(fs, youngest, rev_file, root_offset, + max_node_id, max_copy_id, pool)); + SVN_ERR(svn_io_file_close(rev_file, pool)); + + return SVN_NO_ERROR; +} + +/* Baton used for recover_body below. */ +struct recover_baton { + svn_fs_t *fs; + svn_cancel_func_t cancel_func; + void *cancel_baton; +}; + +/* The work-horse for svn_fs_fs__recover, called with the FS + write lock. This implements the svn_fs_fs__with_write_lock() + 'body' callback type. BATON is a 'struct recover_baton *'. */ +static svn_error_t * +recover_body(void *baton, apr_pool_t *pool) +{ + struct recover_baton *b = baton; + svn_fs_t *fs = b->fs; + fs_fs_data_t *ffd = fs->fsap_data; + svn_revnum_t max_rev; + char next_node_id_buf[MAX_KEY_SIZE], next_copy_id_buf[MAX_KEY_SIZE]; + char *next_node_id = NULL, *next_copy_id = NULL; + svn_revnum_t youngest_rev; + svn_node_kind_t youngest_revprops_kind; + + /* Lose potentially corrupted data in temp files */ + SVN_ERR(svn_fs_fs__cleanup_revprop_namespace(fs)); + + /* We need to know the largest revision in the filesystem. */ + SVN_ERR(recover_get_largest_revision(fs, &max_rev, pool)); + + /* Get the expected youngest revision */ + SVN_ERR(svn_fs_fs__youngest_rev(&youngest_rev, fs, pool)); + + /* Policy note: + + Since the revprops file is written after the revs file, the true + maximum available revision is the youngest one for which both are + present. That's probably the same as the max_rev we just found, + but if it's not, we could, in theory, repeatedly decrement + max_rev until we find a revision that has both a revs and + revprops file, then write db/current with that. + + But we choose not to. If a repository is so corrupt that it's + missing at least one revprops file, we shouldn't assume that the + youngest revision for which both the revs and revprops files are + present is healthy. In other words, we're willing to recover + from a missing or out-of-date db/current file, because db/current + is truly redundant -- it's basically a cache so we don't have to + find max_rev each time, albeit a cache with unusual semantics, + since it also officially defines when a revision goes live. But + if we're missing more than the cache, it's time to back out and + let the admin reconstruct things by hand: correctness at that + point may depend on external things like checking a commit email + list, looking in particular working copies, etc. + + This policy matches well with a typical naive backup scenario. + Say you're rsyncing your FSFS repository nightly to the same + location. Once revs and revprops are written, you've got the + maximum rev; if the backup should bomb before db/current is + written, then db/current could stay arbitrarily out-of-date, but + we can still recover. It's a small window, but we might as well + do what we can. */ + + /* Even if db/current were missing, it would be created with 0 by + get_youngest(), so this conditional remains valid. */ + if (youngest_rev > max_rev) + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Expected current rev to be <= %ld " + "but found %ld"), max_rev, youngest_rev); + + /* We only need to search for maximum IDs for old FS formats which + se global ID counters. */ + if (ffd->format < SVN_FS_FS__MIN_NO_GLOBAL_IDS_FORMAT) + { + /* Next we need to find the maximum node id and copy id in use across the + filesystem. Unfortunately, the only way we can get this information + is to scan all the noderevs of all the revisions and keep track as + we go along. */ + svn_revnum_t rev; + apr_pool_t *iterpool = svn_pool_create(pool); + char max_node_id[MAX_KEY_SIZE] = "0", max_copy_id[MAX_KEY_SIZE] = "0"; + apr_size_t len; + + for (rev = 0; rev <= max_rev; rev++) + { + apr_file_t *rev_file; + apr_off_t root_offset; + svn_fs_id_t *root_id; + + svn_pool_clear(iterpool); + + if (b->cancel_func) + SVN_ERR(b->cancel_func(b->cancel_baton)); + + SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, rev, + iterpool)); + SVN_ERR(svn_fs_fs__rev_get_root(&root_id, fs, rev, iterpool)); + + root_offset = svn_fs_fs__id_offset(root_id); + SVN_ERR(recover_find_max_ids(fs, rev, rev_file, root_offset, + max_node_id, max_copy_id, iterpool)); + SVN_ERR(svn_io_file_close(rev_file, iterpool)); + } + svn_pool_destroy(iterpool); + + /* Now that we finally have the maximum revision, node-id and copy-id, we + can bump the two ids to get the next of each. */ + len = strlen(max_node_id); + svn_fs_fs__next_key(max_node_id, &len, next_node_id_buf); + next_node_id = next_node_id_buf; + len = strlen(max_copy_id); + svn_fs_fs__next_key(max_copy_id, &len, next_copy_id_buf); + next_copy_id = next_copy_id_buf; + } + + /* Before setting current, verify that there is a revprops file + for the youngest revision. (Issue #2992) */ + SVN_ERR(svn_io_check_path(svn_fs_fs__path_revprops(fs, max_rev, pool), + &youngest_revprops_kind, pool)); + if (youngest_revprops_kind == svn_node_none) + { + svn_boolean_t missing = TRUE; + if (!svn_fs_fs__packed_revprop_available(&missing, fs, max_rev, pool)) + { + if (missing) + { + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Revision %ld has a revs file but no " + "revprops file"), + max_rev); + } + else + { + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Revision %ld has a revs file but the " + "revprops file is inaccessible"), + max_rev); + } + } + } + else if (youngest_revprops_kind != svn_node_file) + { + return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, + _("Revision %ld has a non-file where its " + "revprops file should be"), + max_rev); + } + + /* Prune younger-than-(newfound-youngest) revisions from the rep + cache if sharing is enabled taking care not to create the cache + if it does not exist. */ + if (ffd->rep_sharing_allowed) + { + svn_boolean_t rep_cache_exists; + + SVN_ERR(svn_fs_fs__exists_rep_cache(&rep_cache_exists, fs, pool)); + if (rep_cache_exists) + SVN_ERR(svn_fs_fs__del_rep_reference(fs, max_rev, pool)); + } + + /* Now store the discovered youngest revision, and the next IDs if + relevant, in a new 'current' file. */ + return svn_fs_fs__write_current(fs, max_rev, next_node_id, next_copy_id, + pool); +} + +/* This implements the fs_library_vtable_t.recover() API. */ +svn_error_t * +svn_fs_fs__recover(svn_fs_t *fs, + svn_cancel_func_t cancel_func, void *cancel_baton, + apr_pool_t *pool) +{ + struct recover_baton b; + + /* We have no way to take out an exclusive lock in FSFS, so we're + restricted as to the types of recovery we can do. Luckily, + we just want to recreate the 'current' file, and we can do that just + by blocking other writers. */ + b.fs = fs; + b.cancel_func = cancel_func; + b.cancel_baton = cancel_baton; + return svn_fs_fs__with_write_lock(fs, recover_body, &b, pool); +} Added: subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/recovery.h URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/recovery.h?rev=1503742&view=auto ============================================================================== --- subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/recovery.h (added) +++ subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/recovery.h Tue Jul 16 15:00:49 2013 @@ -0,0 +1,46 @@ +/* recovery.h : interface to the FSFS recovery functionality + * + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + */ + +#ifndef SVN_LIBSVN_FS__RECOVERY_H +#define SVN_LIBSVN_FS__RECOVERY_H + +#include "fs.h" + +/* Find the "largest / max" node IDs in FS with the given YOUNGEST revision. + Return the result in the pre-allocated MAX_NODE_ID and MAX_COPY_ID data + buffer, respectively. Use POOL for allocations. */ +svn_error_t * +svn_fs_fs__find_max_ids(svn_fs_t *fs, + svn_revnum_t youngest, + char *max_node_id, + char *max_copy_id, + apr_pool_t *pool); + +/* Recover the fsfs associated with filesystem FS. + Use optional CANCEL_FUNC/CANCEL_BATON for cancellation support. + Use POOL for temporary allocations. */ +svn_error_t *svn_fs_fs__recover(svn_fs_t *fs, + svn_cancel_func_t cancel_func, + void *cancel_baton, + apr_pool_t *pool); + +#endif Modified: subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/util.c URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/util.c?rev=1503742&r1=1503741&r2=1503742&view=diff ============================================================================== --- subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/util.c (original) +++ subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/util.c Tue Jul 16 15:00:49 2013 @@ -306,6 +306,30 @@ svn_fs_fs__write_revnum_file(svn_fs_t *f } svn_error_t * +svn_fs_fs__write_current(svn_fs_t *fs, + svn_revnum_t rev, + const char *next_node_id, + const char *next_copy_id, + apr_pool_t *pool) +{ + char *buf; + const char *name; + fs_fs_data_t *ffd = fs->fsap_data; + + /* Now we can just write out this line. */ + if (ffd->format >= SVN_FS_FS__MIN_NO_GLOBAL_IDS_FORMAT) + buf = apr_psprintf(pool, "%ld\n", rev); + else + buf = apr_psprintf(pool, "%ld %s %s\n", rev, next_node_id, next_copy_id); + + name = svn_fs_fs__path_current(fs, pool); + SVN_ERR(svn_io_write_atomic(name, buf, strlen(buf), + name /* copy_perms_path */, pool)); + + return SVN_NO_ERROR; +} + +svn_error_t * svn_fs_fs__try_stringbuf_from_file(svn_stringbuf_t **content, svn_boolean_t *missing, const char *path, Modified: subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/util.h URL: http://svn.apache.org/viewvc/subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/util.h?rev=1503742&r1=1503741&r2=1503742&view=diff ============================================================================== --- subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/util.h (original) +++ subversion/branches/fsfs-improvements/subversion/libsvn_fs_fs/util.h Tue Jul 16 15:00:49 2013 @@ -235,6 +235,17 @@ svn_fs_fs__write_revnum_file(svn_fs_t *f svn_revnum_t revnum, apr_pool_t *scratch_pool); +/* Atomically update the 'current' file to hold the specifed REV, + NEXT_NODE_ID, and NEXT_COPY_ID. (The two next-ID parameters are + ignored and may be 0 if the FS format does not use them.) + Perform temporary allocations in POOL. */ +svn_error_t * +svn_fs_fs__write_current(svn_fs_t *fs, + svn_revnum_t rev, + const char *next_node_id, + const char *next_copy_id, + apr_pool_t *pool); + /* Read the file at PATH and return its content in *CONTENT. *CONTENT will * not be modified unless the whole file was read successfully. *