I'm working towards cleaning up the pristine store automatically.
Although not strictly necessary, I think it will be helpful if a
pristine text checksum in the DB shall be guaranteed always to reference
a pristine text that is currently in the store. In support of this
goal, the current patch enables SQLite checking for that constraint (no
effect in sqlite versions < 3.6.19) and adjusts the 1.6-to-1.7 upgrade
code to follow that rule.
I think I only made minor edits since it last passed all tests.
Any feedback before I commit it? Philip, you probably know the upgrade
code best. How does this look?
- Julian
Enforce in the WC DB schema that a pristine text checksum always references
a pristine text that is in the store. To accord with this, change the
1.6-to-1.7 upgrade code so that it populates the pristine store before the
NODES table, and so that it always writes the SHA-1 checksum (not the MD-5)
into the NODES table.
### The assertions in write_entry are a bit ugly and in one case incomplete.
* subversion/libsvn_wc/wc-metadata.sql
(ACTUAL_NODE): Declare 'older_checksum', 'left_checksum', 'right_checksum'
as referencing a pristine text.
(NODES): Declare 'checksum' as referencing a pristine text.
* subversion/libsvn_wc/entries.c
(write_entry): Receive the text-base checksums through a new 'text_base_info'
parameter and use them in the NODES table rows.
(entries_write_baton, entries_write_new_cb, svn_wc__write_upgraded_entries):
Pass the text base info down the call stack to write_entry().
* subversion/libsvn_wc/entries.h
(svn_wc__text_base_file_info_t, svn_wc__text_base_info_t): New structs.
(svn_wc__write_upgraded_entries): Take a 'text_bases_info' parameter.
* subversion/libsvn_wc/upgrade.c
(migrate_text_bases): Return the checksums of all the files. Remove the
'dir_relpath' parameter and the code that filled in checksum columns in
existing NODES rows.
(upgrade_to_wcng): Migrate the text bases before the 'entries', and pass
all the text-base checksums from the former to the latter.
--This line, and those below, will be ignored--
Index: subversion/libsvn_wc/entries.c
===================================================================
--- subversion/libsvn_wc/entries.c (revision 1051073)
+++ subversion/libsvn_wc/entries.c (working copy)
@@ -1578,6 +1578,7 @@ write_entry(struct write_baton **entry_n
apr_int64_t wc_id,
apr_int64_t repos_id,
const svn_wc_entry_t *entry,
+ const svn_wc__text_base_info_t *text_base_info,
const char *local_relpath,
const char *entry_abspath,
const svn_wc_entry_t *this_dir,
@@ -1851,14 +1852,42 @@ write_entry(struct write_baton **entry_n
}
}
- /* If there is a copied WORKING node file then we don't have the
- revert-base checksum for this file. It will get updated
- later when we transfer the pristine texts. */
- if (entry->kind == svn_node_dir || (working_node && entry->copied))
+ if (entry->kind == svn_node_dir)
base_node->checksum = NULL;
else
- SVN_ERR(svn_checksum_parse_hex(&base_node->checksum, svn_checksum_md5,
- entry->checksum, result_pool));
+ {
+ if (text_base_info && text_base_info->revert_base.sha1_checksum)
+ base_node->checksum = text_base_info->revert_base.sha1_checksum;
+ else if (text_base_info && text_base_info->normal_base.sha1_checksum)
+ base_node->checksum = text_base_info->normal_base.sha1_checksum;
+ else
+ base_node->checksum = NULL;
+
+ /* The base MD5 checksum is available in the entry, unless there
+ * is a copied WORKING node. If possible, verify that the entry
+ * checksum matches the base file that we found. */
+#ifdef SVN_DEBUG
+ if (! (working_node && entry->copied))
+ {
+ svn_checksum_t *entry_md5_checksum, *found_md5_checksum;
+ SVN_ERR(svn_checksum_parse_hex(&entry_md5_checksum, svn_checksum_md5,
+ entry->checksum, scratch_pool));
+ if (text_base_info && text_base_info->revert_base.md5_checksum)
+ found_md5_checksum = text_base_info->revert_base.md5_checksum;
+ else if (text_base_info && text_base_info->normal_base.md5_checksum)
+ found_md5_checksum = text_base_info->normal_base.md5_checksum;
+ else
+ found_md5_checksum = NULL;
+ if (entry_md5_checksum && found_md5_checksum)
+ SVN_ERR_ASSERT(svn_checksum_match(entry_md5_checksum,
+ found_md5_checksum));
+ else
+ {
+ /* SVN_ERR_ASSERT(entry->deleted || ...); */
+ }
+ }
+#endif
+ }
if (this_dir->repos)
{
@@ -1960,10 +1989,9 @@ write_entry(struct write_baton **entry_n
below_working_node->moved_here = FALSE;
below_working_node->moved_to = FALSE;
- /* We have a problem, the revert_base information isn't
- available in the entry structure. The checksum will get
- updated later when we transfer the pristines. */
- below_working_node->checksum = NULL;
+ /* The revert_base checksum isn't available in the entry structure,
+ so the caller provides it. */
+ below_working_node->checksum = text_base_info->revert_base.sha1_checksum;
below_working_node->translated_size = 0;
below_working_node->changed_rev = SVN_INVALID_REVNUM;
below_working_node->changed_date = 0;
@@ -1995,9 +2023,24 @@ write_entry(struct write_baton **entry_n
if (entry->kind == svn_node_dir)
working_node->checksum = NULL;
else
- SVN_ERR(svn_checksum_parse_hex(&working_node->checksum,
- svn_checksum_md5,
- entry->checksum, result_pool));
+ {
+ working_node->checksum = text_base_info->normal_base.sha1_checksum;
+
+ /* If an MD5 checksum is present in the entry, we can verify that
+ * it matches the MD5 of the base file we found earlier. */
+#ifdef SVN_DEBUG
+ if (entry->checksum)
+ {
+ svn_checksum_t *md5_checksum;
+ SVN_ERR(svn_checksum_parse_hex(&md5_checksum, svn_checksum_md5,
+ entry->checksum, result_pool));
+ SVN_ERR_ASSERT(
+ md5_checksum && text_base_info->normal_base.md5_checksum);
+ SVN_ERR_ASSERT(svn_checksum_match(
+ md5_checksum, text_base_info->normal_base.md5_checksum));
+ }
+#endif
+ }
/* All subdirs start of incomplete, and stop being incomplete
when the entries file in the subdir is upgraded. */
@@ -2098,6 +2141,7 @@ struct entries_write_baton
const char *dir_abspath;
const char *new_root_abspath;
apr_hash_t *entries;
+ apr_hash_t *text_bases_info;
struct write_baton *parent_node;
struct write_baton *dir_node;
apr_pool_t *result_pool;
@@ -2139,7 +2183,7 @@ entries_write_new_cb(void *baton,
/* Write out "this dir" */
SVN_ERR(write_entry(&ewb->dir_node, ewb->parent_node, db, sdb,
- ewb->wc_id, ewb->repos_id, this_dir, dir_relpath,
+ ewb->wc_id, ewb->repos_id, this_dir, NULL, dir_relpath,
svn_dirent_join(new_root_abspath, dir_relpath,
scratch_pool),
this_dir, FALSE, ewb->result_pool, iterpool));
@@ -2150,6 +2194,8 @@ entries_write_new_cb(void *baton,
const char *name = svn__apr_hash_index_key(hi);
const svn_wc_entry_t *this_entry = svn__apr_hash_index_val(hi);
const char *child_abspath, *child_relpath;
+ svn_wc__text_base_info_t *text_base_info
+ = apr_hash_get(ewb->text_bases_info, name, APR_HASH_KEY_STRING);
svn_pool_clear(iterpool);
@@ -2163,7 +2209,7 @@ entries_write_new_cb(void *baton,
child_relpath = svn_dirent_skip_ancestor(old_root_abspath, child_abspath);
SVN_ERR(write_entry(NULL, ewb->dir_node, db, sdb,
ewb->wc_id, ewb->repos_id,
- this_entry, child_relpath,
+ this_entry, text_base_info, child_relpath,
svn_dirent_join(new_root_abspath, child_relpath,
scratch_pool),
this_dir, TRUE, iterpool, iterpool));
@@ -2184,6 +2230,7 @@ svn_wc__write_upgraded_entries(void **di
const char *dir_abspath,
const char *new_root_abspath,
apr_hash_t *entries,
+ apr_hash_t *text_bases_info,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
@@ -2195,6 +2242,7 @@ svn_wc__write_upgraded_entries(void **di
ewb.dir_abspath = dir_abspath;
ewb.new_root_abspath = new_root_abspath;
ewb.entries = entries;
+ ewb.text_bases_info = text_bases_info;
ewb.parent_node = parent_baton;
ewb.result_pool = result_pool;
Index: subversion/libsvn_wc/entries.h
===================================================================
--- subversion/libsvn_wc/entries.h (revision 1051073)
+++ subversion/libsvn_wc/entries.h (working copy)
@@ -87,10 +87,30 @@ svn_wc__read_entries_old(apr_hash_t **en
apr_pool_t *result_pool,
apr_pool_t *scratch_pool);
+/* The checksums of one pre-1.7 text-base file. If the text-base file
+ * exists, both checksums are filled in, otherwise both fields are NULL. */
+typedef struct svn_wc__text_base_file_info_t
+{
+ svn_checksum_t *sha1_checksum;
+ svn_checksum_t *md5_checksum;
+} svn_wc__text_base_file_info_t;
+
+/* The text-base checksums of the normal base and/or the revert-base of one
+ * pre-1.7 versioned text file. */
+typedef struct svn_wc__text_base_info_t
+{
+ svn_wc__text_base_file_info_t normal_base;
+ svn_wc__text_base_file_info_t revert_base;
+} svn_wc__text_base_info_t;
+
/* For internal use by upgrade.c to write entries in the wc-ng format.
Return in DIR_BATON the baton to be passed as PARENT_BATON when
upgrading child directories. Pass a NULL PARENT_BATON when upgrading
- the root directory. */
+ the root directory.
+
+ TEXT_BASES_INFO is a hash of information about all the text bases found
+ in this directory's admin area, keyed on (const char *) name of the
+ versioned file, with (svn_wc__text_base_info_t *) values. */
svn_error_t *
svn_wc__write_upgraded_entries(void **dir_baton,
void *parent_baton,
@@ -101,6 +121,7 @@ svn_wc__write_upgraded_entries(void **di
const char *dir_abspath,
const char *new_root_abspath,
apr_hash_t *entries,
+ apr_hash_t *text_bases_info,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool);
Index: subversion/libsvn_wc/upgrade.c
===================================================================
--- subversion/libsvn_wc/upgrade.c (revision 1051073)
+++ subversion/libsvn_wc/upgrade.c (working copy)
@@ -958,14 +958,15 @@ remove_suffix(const char *str, const cha
DIR_ABSPATH into the pristine store of SDB which is located in directory
NEW_WCROOT_ABSPATH.
- If DIR_RELPATH is set then any .svn-revert files will trigger an
- attempt to update the checksum in a NODES row below the top WORKING
- node. */
+ Set *TEXT_BASES_INFO to a new hash, allocated in RESULT_POOL, that maps
+ (const char *) name of the versioned file to (svn_wc__text_base_info_t *)
+ information about the pristine text. */
static svn_error_t *
-migrate_text_bases(const char *dir_abspath,
+migrate_text_bases(apr_hash_t **text_bases_info,
+ const char *dir_abspath,
const char *new_wcroot_abspath,
- const char *dir_relpath,
svn_sqlite__db_t *sdb,
+ apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
{
apr_hash_t *dirents;
@@ -975,6 +976,8 @@ migrate_text_bases(const char *dir_abspa
TEXT_BASE_SUBDIR,
scratch_pool);
+ *text_bases_info = apr_hash_make(result_pool);
+
/* Iterate over the text-base files */
SVN_ERR(svn_io_get_dirents3(&dirents, text_base_dir, TRUE,
scratch_pool, scratch_pool));
@@ -982,11 +985,33 @@ migrate_text_bases(const char *dir_abspa
hi = apr_hash_next(hi))
{
const char *text_base_basename = svn__apr_hash_index_key(hi);
- svn_checksum_t *md5_checksum;
- svn_checksum_t *sha1_checksum;
+ const char *versioned_file_name;
+ svn_boolean_t is_revert_base;
+ svn_wc__text_base_info_t *info;
+ svn_wc__text_base_file_info_t *file_info;
svn_pool_clear(iterpool);
+ /* Determine the versioned file name and whether this is a normal base
+ * or a revert base. */
+ versioned_file_name
+ = remove_suffix(text_base_basename, SVN_WC__REVERT_EXT, result_pool);
+ if (versioned_file_name)
+ {
+ is_revert_base = TRUE;
+ }
+ else
+ {
+ versioned_file_name
+ = remove_suffix(text_base_basename, SVN_WC__BASE_EXT, result_pool);
+ is_revert_base = FALSE;
+ }
+ info = apr_hash_get(*text_bases_info, versioned_file_name,
+ APR_HASH_KEY_STRING);
+ if (info == NULL)
+ info = apr_pcalloc(result_pool, sizeof(*info));
+ file_info = (is_revert_base ? &info->revert_base : &info->normal_base);
+
/* Calculate its checksums and copy it to the pristine store */
{
const char *pristine_path;
@@ -1009,23 +1034,25 @@ migrate_text_bases(const char *dir_abspa
complexity. :) */
/* Gather the two checksums. */
- SVN_ERR(svn_io_file_checksum2(&md5_checksum, text_base_path,
- svn_checksum_md5, iterpool));
- SVN_ERR(svn_io_file_checksum2(&sha1_checksum, text_base_path,
- svn_checksum_sha1, iterpool));
+ SVN_ERR(svn_io_file_checksum2(&file_info->md5_checksum, text_base_path,
+ svn_checksum_md5, result_pool));
+ SVN_ERR(svn_io_file_checksum2(&file_info->sha1_checksum, text_base_path,
+ svn_checksum_sha1, result_pool));
SVN_ERR(svn_io_stat(&finfo, text_base_path, APR_FINFO_SIZE, iterpool));
/* Insert a row into the pristine table. */
SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, STMT_INSERT_PRISTINE));
- SVN_ERR(svn_sqlite__bind_checksum(stmt, 1, sha1_checksum, iterpool));
- SVN_ERR(svn_sqlite__bind_checksum(stmt, 2, md5_checksum, iterpool));
+ SVN_ERR(svn_sqlite__bind_checksum(stmt, 1, file_info->sha1_checksum,
+ iterpool));
+ SVN_ERR(svn_sqlite__bind_checksum(stmt, 2, file_info->md5_checksum,
+ iterpool));
SVN_ERR(svn_sqlite__bind_int64(stmt, 3, finfo.size));
SVN_ERR(svn_sqlite__insert(NULL, stmt));
SVN_ERR(svn_wc__db_pristine_get_future_path(&pristine_path,
new_wcroot_abspath,
- sha1_checksum,
+ file_info->sha1_checksum,
iterpool, iterpool));
/* Ensure any sharding directories exist. */
@@ -1040,51 +1067,8 @@ migrate_text_bases(const char *dir_abspa
iterpool));
}
- /* If DIR_RELPATH is set and this text base is a "revert base", then
- attempt to update the checksum in a NODES row below the top WORKING
- node. */
- if (dir_relpath)
- {
- const char *name
- = remove_suffix(text_base_basename, SVN_WC__REVERT_EXT, iterpool);
-
- if (name)
- {
- /* Assumming this revert-base is not an orphan, the
- upgrade process will have inserted a NODES row with a
- null checksum below the top-level working node.
- Update that checksum now. */
- apr_int64_t op_depth = -1, wc_id = 1;
- const char *local_relpath = svn_relpath_join(dir_relpath, name,
- iterpool);
- svn_boolean_t have_row;
- svn_sqlite__stmt_t *stmt;
-
- SVN_ERR(svn_sqlite__get_statement(&stmt, sdb,
- STMT_SELECT_NODE_INFO));
- SVN_ERR(svn_sqlite__bindf(stmt, "is", wc_id, local_relpath));
- SVN_ERR(svn_sqlite__step(&have_row, stmt));
- if (have_row)
- {
- SVN_ERR(svn_sqlite__step(&have_row, stmt));
- if (have_row && svn_sqlite__column_is_null(stmt, 6)
- && !strcmp(svn_sqlite__column_text(stmt, 4, NULL),
- "file"))
- op_depth = svn_sqlite__column_int64(stmt, 0);
- }
- SVN_ERR(svn_sqlite__reset(stmt));
- if (op_depth != -1)
- {
- SVN_ERR(svn_sqlite__get_statement(&stmt, sdb,
- STMT_UPDATE_CHECKSUM));
- SVN_ERR(svn_sqlite__bindf(stmt, "isi", wc_id, local_relpath,
- op_depth));
- SVN_ERR(svn_sqlite__bind_checksum(stmt, 4, sha1_checksum,
- iterpool));
- SVN_ERR(svn_sqlite__update(NULL, stmt));
- }
- }
- }
+ apr_hash_set(*text_bases_info, versioned_file_name, APR_HASH_KEY_STRING,
+ info);
}
svn_pool_destroy(iterpool);
@@ -1176,6 +1160,7 @@ upgrade_to_wcng(void **dir_baton,
apr_hash_t *entries;
svn_wc_entry_t *this_dir;
const char *old_wcroot_abspath, *dir_relpath;
+ apr_hash_t *text_bases_info;
/* Don't try to mess with the WC if there are old log files left. */
@@ -1197,18 +1182,23 @@ upgrade_to_wcng(void **dir_baton,
* The semantics and storage mechanisms between the two are vastly different,
* so it's going to be a bit painful. Here's a plan for the operation:
*
- * 1) The 'entries' file needs to be moved to the new format. We read it
- * using the old-format reader, and then use our compatibility code
- * for writing entries to fill out the (new) wc_db state.
+ * 1) Read the old 'entries' using the old-format reader.
+ *
+ * 2) Create the new DB if it hasn't already been created.
*
- * 2) Convert wcprop to the wc-ng format
+ * 3) Use our compatibility code for writing entries to fill out the (new)
+ * DB state. Use the remembered checksums, since an entry has only the
+ * MD5 not the SHA1 checksum, and in the case of a revert-base doesn't
+ * even have that.
*
- * 3) Trash old, unused files and subdirs
+ * 4) Convert wcprop to the wc-ng format
*
- * ### (fill in other bits as they are implemented)
+ * 5) Migrate regular properties to the WC-NG DB.
+ *
+ * 6) Trash old, unused files and subdirs.
*/
- /***** ENTRIES *****/
+ /***** ENTRIES - READ *****/
SVN_ERR(svn_wc__read_entries_old(&entries, dir_abspath,
scratch_pool, scratch_pool));
@@ -1229,6 +1219,7 @@ upgrade_to_wcng(void **dir_baton,
apr_pstrdup(hash_pool, this_dir->uuid));
}
+ /* Create the new DB if it hasn't already been created. */
if (!data->sdb)
{
const char *root_adm_abspath;
@@ -1262,20 +1253,25 @@ upgrade_to_wcng(void **dir_baton,
SVN_ERR(svn_wc__db_wclock_obtain(db, data->root_abspath, 0, FALSE,
scratch_pool));
}
-
+
+ old_wcroot_abspath = svn_dirent_get_longest_ancestor(dir_abspath,
+ data->root_abspath,
+ scratch_pool);
+ dir_relpath = svn_dirent_skip_ancestor(old_wcroot_abspath, dir_abspath);
+
+ /***** TEXT BASES *****/
+ SVN_ERR(migrate_text_bases(&text_bases_info, dir_abspath, data->root_abspath,
+ data->sdb, scratch_pool, scratch_pool));
+
+ /***** ENTRIES - WRITE *****/
SVN_ERR(svn_wc__write_upgraded_entries(dir_baton, parent_baton, db, data->sdb,
data->repos_id, data->wc_id,
dir_abspath, data->root_abspath,
- entries,
+ entries, text_bases_info,
result_pool, scratch_pool));
/***** WC PROPS *****/
-
- /* Ugh. We don't know precisely where the wcprops are. Ignore them. */
- old_wcroot_abspath = svn_dirent_get_longest_ancestor(dir_abspath,
- data->root_abspath,
- scratch_pool);
- dir_relpath = svn_dirent_skip_ancestor(old_wcroot_abspath, dir_abspath);
+ /* If we don't know precisely where the wcprops are, ignore them. */
if (old_format != SVN_WC__WCPROPS_LOST)
{
apr_hash_t *all_wcprops;
@@ -1291,9 +1287,6 @@ upgrade_to_wcng(void **dir_baton,
all_wcprops, scratch_pool));
}
- SVN_ERR(migrate_text_bases(dir_abspath, data->root_abspath, dir_relpath,
- data->sdb, scratch_pool));
-
/* Upgrade all the properties (including "this dir").
Note: this must come AFTER the entries have been migrated into the
Index: subversion/libsvn_wc/wc-metadata.sql
===================================================================
--- subversion/libsvn_wc/wc-metadata.sql (revision 1051073)
+++ subversion/libsvn_wc/wc-metadata.sql (working copy)
@@ -167,9 +167,9 @@ CREATE TABLE ACTUAL_NODE (
conflicts? Why do we need these in a column to refer to the
pristine store? Can't we just parse the checksums from
conflict_data as well? */
- older_checksum TEXT,
- left_checksum TEXT,
- right_checksum TEXT,
+ older_checksum TEXT REFERENCES PRISTINE (checksum),
+ left_checksum TEXT REFERENCES PRISTINE (checksum),
+ right_checksum TEXT REFERENCES PRISTINE (checksum),
PRIMARY KEY (wc_id, local_relpath)
);
@@ -415,7 +415,7 @@ CREATE TABLE NODES (
/* The SHA-1 checksum of the pristine text, if this node is a file and was
moved here or copied here, else NULL. */
- checksum TEXT,
+ checksum TEXT REFERENCES PRISTINE (checksum),
/* for kind==symlink, this specifies the target. */
symlink_target TEXT,