Author: stefan2
Date: Thu Jul 2 22:59:34 2015
New Revision: 1688924
URL: http://svn.apache.org/r1688924
Log:
On the svn-mergeinfo-normalizer branch:
Add a utility object that allows us to skip most repository lookups to
check whether a branch still exists.
This is an important performance improvement because each lookup requries
a full network roundtrip and there can be hundreds of paths to check per
working copy node - painful in a WAN. We now simply keep a list of all
paths known to exist and those known to not exist. Only if a lookup in
that list fails, will be actually contact the repository.
* tools/client-side/svn-mergeinfo-normalizer/mergeinfo-normalizer.h
(svn_min__branch_lookup_t,
svn_min__branch_lookup_create,
svn_min__branch_lookup): Declare new internal API.
* tools/client-side/svn-mergeinfo-normalizer/missing-branches.c
New file implementing the new internal API.
* tools/client-side/svn-mergeinfo-normalizer/logic.c
(remove_obsolete_lines): Take the new lookup structure instead of a plain
session and use it for efficient path checks.
(normalize): Update session / lookup pass-through.
(svn_min__run_normalize): Wrap the session into the new lookup structure.
Added:
subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/missing-branches.c
(with props)
Modified:
subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/logic.c
subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/mergeinfo-normalizer.h
Modified:
subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/logic.c
URL:
http://svn.apache.org/viewvc/subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/logic.c?rev=1688924&r1=1688923&r2=1688924&view=diff
==============================================================================
---
subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/logic.c
(original)
+++
subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/logic.c
Thu Jul 2 22:59:34 2015
@@ -129,7 +129,7 @@ typedef struct progress_t
} progress_t;
static svn_error_t *
-remove_obsolete_lines(svn_ra_session_t *session,
+remove_obsolete_lines(svn_min__branch_lookup_t *lookup,
svn_mergeinfo_t mergeinfo,
svn_min__opt_state_t *opt_state,
progress_t *progress,
@@ -151,12 +151,11 @@ remove_obsolete_lines(svn_ra_session_t *
hi = apr_hash_next(hi))
{
const char *path = apr_hash_this_key(hi);
- svn_node_kind_t kind;
+ svn_boolean_t deleted;
- SVN_ERR_ASSERT(*path == '/');
- SVN_ERR(svn_ra_check_path(session, path + 1, SVN_INVALID_REVNUM, &kind,
- scratch_pool));
- if (kind == svn_node_none)
+ SVN_ERR(svn_min__branch_lookup(&deleted, lookup, path, FALSE,
+ scratch_pool));
+ if (deleted)
APR_ARRAY_PUSH(to_remove, const char *) = path;
}
@@ -290,7 +289,7 @@ progress_string(const progress_t *progre
static svn_error_t *
normalize(apr_array_header_t *wc_mergeinfo,
svn_min__log_t *log,
- svn_ra_session_t *session,
+ svn_min__branch_lookup_t *lookup,
svn_min__opt_state_t *opt_state,
apr_pool_t *scratch_pool)
{
@@ -310,7 +309,7 @@ normalize(apr_array_header_t *wc_mergein
progress.nodes_todo = i;
/* Eliminate entries for deleted branches. */
- SVN_ERR(remove_obsolete_lines(session,
+ SVN_ERR(remove_obsolete_lines(lookup,
svn_min__get_mergeinfo(wc_mergeinfo, i),
opt_state, &progress, iterpool));
@@ -325,7 +324,7 @@ normalize(apr_array_header_t *wc_mergein
/* Eliminate entries for deleted branches such that parent and
sub-node mergeinfo align again. */
- SVN_ERR(remove_obsolete_lines(session, parent_mergeinfo,
+ SVN_ERR(remove_obsolete_lines(lookup, parent_mergeinfo,
opt_state, &progress, iterpool));
parent_mergeinfo_copy = svn_mergeinfo_dup(parent_mergeinfo,
@@ -419,7 +418,7 @@ svn_min__run_normalize(apr_getopt_t *os,
{
apr_array_header_t *wc_mergeinfo;
svn_min__log_t *log = NULL;
- svn_ra_session_t *session = NULL;
+ svn_min__branch_lookup_t *lookup = NULL;
const char *url;
const char *common_path;
@@ -450,11 +449,14 @@ svn_min__run_normalize(apr_getopt_t *os,
/* open RA session */
if (needs_session(cmd_baton->opt_state))
{
+ svn_ra_session_t *session;
+
svn_pool_clear(subpool);
SVN_ERR(svn_min__add_wc_info(baton, i, iterpool, subpool));
SVN_ERR(svn_client_open_ra_session2(&session, cmd_baton->repo_root,
NULL, cmd_baton->ctx, iterpool,
subpool));
+ lookup = svn_min__branch_lookup_create(session, iterpool);
}
/* actual normalization */
@@ -464,7 +466,7 @@ svn_min__run_normalize(apr_getopt_t *os,
subpool),
stdout, subpool));
- SVN_ERR(normalize(wc_mergeinfo, log, session, cmd_baton->opt_state,
+ SVN_ERR(normalize(wc_mergeinfo, log, lookup, cmd_baton->opt_state,
subpool));
/* write results to disk */
Modified:
subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/mergeinfo-normalizer.h
URL:
http://svn.apache.org/viewvc/subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/mergeinfo-normalizer.h?rev=1688924&r1=1688923&r2=1688924&view=diff
==============================================================================
---
subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/mergeinfo-normalizer.h
(original)
+++
subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/mergeinfo-normalizer.h
Thu Jul 2 22:59:34 2015
@@ -179,11 +179,25 @@ svn_error_t *
svn_min__print_log_stats(svn_min__log_t *log,
apr_pool_t *scratch_pool);
+typedef struct svn_min__branch_lookup_t svn_min__branch_lookup_t;
+
+svn_min__branch_lookup_t *
+svn_min__branch_lookup_create(svn_ra_session_t *session,
+ apr_pool_t *result_pool);
+
+svn_error_t *
+svn_min__branch_lookup(svn_boolean_t *deleted,
+ svn_min__branch_lookup_t *lookup,
+ const char *branch,
+ svn_boolean_t local_only,
+ apr_pool_t *scratch_pool);
+
svn_error_t *
svn_min__run_normalize(apr_getopt_t *os,
void *baton,
apr_pool_t *pool);
+
#ifdef __cplusplus
}
#endif /* __cplusplus */
Added:
subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/missing-branches.c
URL:
http://svn.apache.org/viewvc/subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/missing-branches.c?rev=1688924&view=auto
==============================================================================
---
subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/missing-branches.c
(added)
+++
subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/missing-branches.c
Thu Jul 2 22:59:34 2015
@@ -0,0 +1,266 @@
+/*
+ * missing-branches.c -- Efficiently scan for missing branches.
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ */
+
+/* ==================================================================== */
+
+
+
+/*** Includes. ***/
+
+#include <assert.h>
+
+#include "svn_hash.h"
+#include "svn_pools.h"
+#include "private/svn_subr_private.h"
+
+#include "mergeinfo-normalizer.h"
+
+
+/*** Code. ***/
+
+struct svn_min__branch_lookup_t
+{
+ /* Connection to the repository where we are looking for paths. */
+ svn_ra_session_t *session;
+
+ /* Keyed by const char * FS paths that are known not to exist.
+ It is implied that sub-paths won't and can't exist either. */
+ apr_hash_t *deleted;
+
+ /* Keyed by const char * FS paths that are known to exist. */
+ apr_hash_t *existing;
+};
+
+/* Return the location of the last '/' in PATH before LEN.
+ Return 0 for root and empty paths. PATH must be a canonical FS path. */
+static apr_size_t
+parent_segment(const char *path,
+ apr_size_t len)
+{
+ assert(path[0] == '/');
+
+ if (len <= 1)
+ return 0;
+
+ --len;
+ while (path[len] != '/')
+ --len;
+
+ return len;
+}
+
+/* Look for BRANCH in LOOKUP without connecting to the server. Return
+ * svn_tristate_true, if it is known to exist, svn_tristate_false if it is
+ * known to not exist. Otherwise return svn_tristate_unknown. */
+static svn_tristate_t
+local_lookup(const svn_min__branch_lookup_t *lookup,
+ const char *branch)
+{
+ apr_size_t len;
+
+ /* Non-canonical paths are bad but we let the remote lookup take care of
+ * them. Our hashes simply have no info on them. */
+ if (branch[0] != '/')
+ return svn_tristate_unknown;
+
+ /* Hard-coded: "/" always exists. */
+ if (branch[1] == '\0')
+ return svn_tristate_true;
+
+ /* For every existing path that we encountered, there is an entry in the
+ EXISITING hash. So, we can just use that. */
+ len = strlen(branch);
+ if (apr_hash_get(lookup->existing, branch, len))
+ return svn_tristate_true;
+
+ /* Not known to exist and might be known to not exist. We only record
+ the top level deleted directory for DELETED branches, so we need to
+ walk up the path until we either find that deletion or an existing
+ path. In the latter case, we don't know what happened to the levels
+ below that, including BRANCH. */
+ while (len > 0)
+ {
+ /* Known deleted? Note that we checked BRANCH for existence but not
+ for deletion, yet. */
+ if (apr_hash_get(lookup->deleted, branch, len))
+ return svn_tristate_false;
+
+ /* Parent known to exist?
+ Then, we don't know what happened to the BRANCH. */
+ len = parent_segment(branch, len);
+
+ if (apr_hash_get(lookup->existing, branch, len))
+ return svn_tristate_unknown;
+ }
+
+ /* We don't know. */
+ return svn_tristate_unknown;
+}
+
+/* Set *DELETED to TRUE, if PATH can't be found at HEAD in SESSION.
+ Use SCRATCH_POOL for temporary allocations. */
+static svn_error_t *
+path_deleted(svn_boolean_t *deleted,
+ svn_ra_session_t *session,
+ const char *path,
+ apr_pool_t *scratch_pool)
+{
+ svn_node_kind_t kind;
+
+ SVN_ERR_ASSERT(*path == '/');
+ SVN_ERR(svn_ra_check_path(session, path + 1, SVN_INVALID_REVNUM, &kind,
+ scratch_pool));
+ *deleted = kind == svn_node_none;
+
+ return SVN_NO_ERROR;
+}
+
+/* Chop the last segment off PATH. PATH must be a canonical FS path.
+ No-op for the root path. */
+static void
+to_parent(svn_stringbuf_t *path)
+{
+ path->len = parent_segment(path->data, path->len);
+ if (path->len == 0)
+ path->len = 1;
+
+ path->data[path->len] = '\0';
+}
+
+/* Contact the repository used by LOOKUP and set *DELETED to TRUE, if path
+ BRANCH does not exist at HEAD. Cache the lookup results in LOOKUP and
+ use SCRATCH_POOL for temporary allocations. Call this only if
+ local_lookup returned svn_tristate_unknown. */
+static svn_error_t *
+remote_lookup(svn_boolean_t *deleted,
+ const svn_min__branch_lookup_t *lookup,
+ const char *branch,
+ apr_pool_t *scratch_pool)
+{
+ svn_stringbuf_t *path = svn_stringbuf_create(branch, scratch_pool);
+ apr_pool_t *iterpool = svn_pool_create(scratch_pool);
+
+ /* We shall call this function only after the local lookup failed. */
+ assert(local_lookup(lookup, branch) == svn_tristate_unknown);
+
+ /* Actual repository lookup. */
+ SVN_ERR(path_deleted(deleted, lookup->session, branch, scratch_pool));
+
+ /* If the path did not exist, store the furthest non-existent parent. */
+ if (*deleted)
+ {
+ svn_boolean_t parent_deleted;
+ const char *deleted_path;
+ apr_size_t len;
+
+ /* Find the closest parent that does exist.
+ "/" exists, hence, this will terminate. */
+ do
+ {
+ svn_pool_clear(iterpool);
+
+ len = path->len;
+ to_parent(path);
+
+ /* We often know that "/branches" etc. to exist. So, we can skip
+ the final lookup in that case. */
+ if (local_lookup(lookup, path->data) == svn_tristate_true)
+ break;
+
+ /* Get the info from the repository. */
+ SVN_ERR(path_deleted(&parent_deleted, lookup->session, path->data,
+ iterpool));
+ }
+ while (parent_deleted);
+
+ /* PATH exists, it's sub-path of length LEN does not. */
+ deleted_path = apr_pstrmemdup(apr_hash_pool_get(lookup->deleted),
+ branch, len);
+ apr_hash_set(lookup->deleted, deleted_path, len, deleted_path);
+ }
+
+ /* PATH and all its parents exist. Add them to the EXISITING hash.
+ Make sure to allocate only the longest path and then reference
+ sub-sequences of it to keep memory usage in check. */
+ if (!apr_hash_get(lookup->existing, path->data, path->len))
+ {
+ const char *hash_path
+ = apr_pstrmemdup(apr_hash_pool_get(lookup->existing), path->data,
+ path->len);
+
+ /* Note that we don't need to check for exiting entries here because
+ the APR hash will reuse existing nodes and we are not allocating
+ anything else here. So, this does not allocate duplicate nodes. */
+ for (; path->len > 1; to_parent(path))
+ apr_hash_set(lookup->existing, hash_path, path->len, hash_path);
+ }
+
+ svn_pool_destroy(iterpool);
+
+ return SVN_NO_ERROR;
+}
+
+svn_min__branch_lookup_t *
+svn_min__branch_lookup_create(svn_ra_session_t *session,
+ apr_pool_t *result_pool)
+{
+ svn_min__branch_lookup_t *result = apr_pcalloc(result_pool,
+ sizeof(*result));
+ result->session = session;
+ result->deleted = svn_hash__make(result_pool);
+ result->existing = svn_hash__make(result_pool);
+
+ return result;
+}
+
+svn_error_t *
+svn_min__branch_lookup(svn_boolean_t *deleted,
+ svn_min__branch_lookup_t *lookup,
+ const char *branch,
+ svn_boolean_t local_only,
+ apr_pool_t *scratch_pool)
+{
+ switch (local_lookup(lookup, branch))
+ {
+ case svn_tristate_false:
+ *deleted = TRUE;
+ return SVN_NO_ERROR;
+
+ case svn_tristate_true:
+ *deleted = FALSE;
+ return SVN_NO_ERROR;
+
+ default:
+ /* If the state is unknown and we are only allowed to do a local
+ lookup, default to a possible false negative. */
+ if (local_only)
+ {
+ *deleted = FALSE;
+ return SVN_NO_ERROR;
+ }
+ }
+
+ return svn_error_trace(remote_lookup(deleted, lookup, branch,
+ scratch_pool));
+}
+
Propchange:
subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/missing-branches.c
------------------------------------------------------------------------------
svn:eol-style = native