Author: philip
Date: Fri Jul 29 15:50:09 2011
New Revision: 1152282

URL: http://svn.apache.org/viewvc?rev=1152282&view=rev
Log:
Improve the performance of "log -g" by avoiding scanning the same
set of paths/ranges more than once.

* subversion/libsvn_repos/log.c
  (get_combined_mergeinfo_changes): Don't bother combining two NULL
   mergeinfos.
  (reduce_search, store_search): New.
  (handle_merged_revisions): Add processed parameter.
  (do_logs): Add processed parameter, allocate hash, store processed
   paths/revisions and use to avoid duplicate history scans.
  (svn_repos_get_logs4): Pass NULL.

Modified:
    subversion/trunk/subversion/libsvn_repos/log.c

Modified: subversion/trunk/subversion/libsvn_repos/log.c
URL: 
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_repos/log.c?rev=1152282&r1=1152281&r2=1152282&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_repos/log.c (original)
+++ subversion/trunk/subversion/libsvn_repos/log.c Fri Jul 29 15:50:09 2011
@@ -857,6 +857,9 @@ get_combined_mergeinfo_changes(svn_merge
                                     iterpool));
       mergeinfo = apr_hash_get(catalog, path, APR_HASH_KEY_STRING);
 
+      if (!prev_mergeinfo && !mergeinfo)
+        continue;
+
       /* Compare, constrast, and combine the results. */
       SVN_ERR(svn_mergeinfo_diff(&deleted, &added, prev_mergeinfo,
                                  mergeinfo, FALSE, iterpool));
@@ -1514,6 +1517,7 @@ static svn_error_t *
 do_logs(svn_fs_t *fs,
         const apr_array_header_t *paths,
         svn_mergeinfo_t log_target_history_as_mergeinfo,
+        svn_mergeinfo_t processed,
         apr_hash_t *nested_merges,
         svn_revnum_t hist_start,
         svn_revnum_t hist_end,
@@ -1568,6 +1572,7 @@ handle_merged_revisions(svn_revnum_t rev
                         svn_fs_t *fs,
                         svn_mergeinfo_t log_target_history_as_mergeinfo,
                         apr_hash_t *nested_merges,
+                        svn_mergeinfo_t processed,
                         svn_mergeinfo_t added_mergeinfo,
                         svn_mergeinfo_t deleted_mergeinfo,
                         svn_boolean_t discover_changed_paths,
@@ -1610,7 +1615,7 @@ handle_merged_revisions(svn_revnum_t rev
 
       svn_pool_clear(iterpool);
       SVN_ERR(do_logs(fs, pl_range->paths, log_target_history_as_mergeinfo,
-                      nested_merges,
+                      processed, nested_merges,
                       pl_range->range.start, pl_range->range.end, 0,
                       discover_changed_paths, strict_node_history,
                       TRUE, pl_range->reverse_merge, TRUE, TRUE,
@@ -1633,6 +1638,115 @@ struct added_deleted_mergeinfo
   svn_mergeinfo_t deleted_mergeinfo;
 };
 
+/* Reduce the search range PATHS, HIST_START, HIST_END by removing
+   parts already covered by PROCESSED.  If reduction is possible
+   elements may be removed from PATHS and *START_REDUCED and
+   *END_REDUCED may be set to a narrower range. */
+static svn_error_t *
+reduce_search(apr_array_header_t *paths,
+              svn_revnum_t *hist_start,
+              svn_revnum_t *hist_end,
+              svn_mergeinfo_t processed,
+              apr_pool_t *scratch_pool)
+{
+  /* We add 1 to end to compensate for store_search */
+  svn_revnum_t start = *hist_start <= *hist_end ? *hist_start : *hist_end;
+  svn_revnum_t end = *hist_start <= *hist_end ? *hist_end + 1 : *hist_start + 
1;
+  int i;
+
+  for (i = 0; i < paths->nelts; ++i)
+    {
+      const char *path = APR_ARRAY_IDX(paths, i, const char *);
+      apr_array_header_t *ranges = apr_hash_get(processed, path,
+                                                APR_HASH_KEY_STRING);
+      int j;
+
+      if (!ranges)
+        continue;
+
+      /* ranges is ordered, could we use some sort of binay search
+         rather than iterating? */
+      for (j = 0; j < ranges->nelts; ++j)
+        {
+          svn_merge_range_t *range = APR_ARRAY_IDX(ranges, j,
+                                                   svn_merge_range_t *);
+          if (range->start <= start && range->end >= end)
+            {
+              for (j = i; j < paths->nelts - 1; ++j)
+                APR_ARRAY_IDX(paths, j, const char *)
+                  = APR_ARRAY_IDX(paths, j + 1, const char *);
+
+              --paths->nelts;
+              --i;
+              break;
+            }
+
+          /* If there is only one path then we also check for a
+             partial overlap rather than the full overlap above, and
+             reduce the [hist_start, hist_end] range rather than
+             dropping the path. */
+          if (paths->nelts == 1)
+            {
+              if (range->start <= start && range->end > start)
+                {
+                  if (start == *hist_start)
+                    *hist_start = range->end - 1;
+                  else
+                    *hist_end = range->end - 1;
+                  break;
+                }
+              if (range->start < end && range->end >= end)
+                {
+                  if (start == *hist_start)
+                    *hist_end = range->start;
+                  else
+                    *hist_start = range->start;
+                  break;
+                }
+            }
+        }
+    }
+
+  return SVN_NO_ERROR;
+}
+
+/* Extend PROCESSED to cover PATHS from HIST_START to HIST_END */
+static svn_error_t *
+store_search(svn_mergeinfo_t processed,
+             const apr_array_header_t *paths,
+             svn_revnum_t hist_start,
+             svn_revnum_t hist_end,
+             apr_pool_t *scratch_pool)
+{
+  /* We add 1 to end so that we can use the mergeinfo API to handle
+     singe revisions where HIST_START is equal to HIST_END. */
+  svn_revnum_t start = hist_start <= hist_end ? hist_start : hist_end;
+  svn_revnum_t end = hist_start <= hist_end ? hist_end + 1 : hist_start + 1;
+  svn_mergeinfo_t mergeinfo = apr_hash_make(scratch_pool);
+  apr_pool_t *processed_pool = apr_hash_pool_get(processed);
+  int i;
+
+  for (i = 0; i < paths->nelts; ++i)
+    {
+      const char *path = APR_ARRAY_IDX(paths, i, const char *);
+      apr_array_header_t *ranges = apr_array_make(processed_pool, 1,
+                                                  sizeof(svn_merge_range_t*));
+      svn_merge_range_t *range = apr_palloc(processed_pool,
+                                            sizeof(svn_merge_range_t));
+      
+      range->start = start;
+      range->end = end;
+      range->inheritable = TRUE;
+      APR_ARRAY_PUSH(ranges, svn_merge_range_t *) = range;
+      apr_hash_set(mergeinfo, apr_pstrdup(processed_pool, path),
+                   APR_HASH_KEY_STRING, ranges);
+    }
+  SVN_ERR(svn_mergeinfo_merge(processed, mergeinfo,
+                              apr_hash_pool_get(processed)));
+
+  return SVN_NO_ERROR;
+}
+
 /* Find logs for PATHS from HIST_START to HIST_END in FS, and invoke
    RECEIVER with RECEIVER_BATON on them.  If DESCENDING_ORDER is TRUE, send
    the logs back as we find them, else buffer the logs and send them back
@@ -1657,12 +1771,17 @@ struct added_deleted_mergeinfo
    do_logs()/send_logs()/handle_merge_revisions() recursions, see also the
    argument of the same name in send_logs().
 
+   If PROCESSED is a mergeinfo hash that represents the paths and
+   revisions that have already been searched.  Allocated like
+   NESTED_MERGES above.
+
    All other parameters are the same as svn_repos_get_logs4().
  */
 static svn_error_t *
 do_logs(svn_fs_t *fs,
         const apr_array_header_t *paths,
         svn_mergeinfo_t log_target_history_as_mergeinfo,
+        svn_mergeinfo_t processed,
         apr_hash_t *nested_merges,
         svn_revnum_t hist_start,
         svn_revnum_t hist_end,
@@ -1691,6 +1810,20 @@ do_logs(svn_fs_t *fs,
   int send_count = 0;
   int i;
 
+  if (processed)
+    {
+      /* Casting away const. This only happens on recursive calls when
+         it is known to be safe because we allocated paths. */
+      SVN_ERR(reduce_search((apr_array_header_t *)paths, &hist_start, 
&hist_end,
+                            processed, pool));
+    }
+
+  if (!paths->nelts)
+    return SVN_NO_ERROR;
+
+  if (processed)
+    SVN_ERR(store_search(processed, paths, hist_start, hist_end, pool));
+
   /* We have a list of paths and a revision range.  But we don't care
      about all the revisions in the range -- only the ones in which
      one of our paths was changed.  So let's go figure out which
@@ -1779,11 +1912,13 @@ do_logs(svn_fs_t *fs,
                          recursions so we can track and squelch duplicates. */
                       subpool = svn_pool_create(pool);
                       nested_merges = apr_hash_make(subpool);
+                      processed = apr_hash_make(subpool);
                     }
 
                   SVN_ERR(handle_merged_revisions(
                     current, fs,
                     log_target_history_as_mergeinfo, nested_merges,
+                    processed,
                     added_mergeinfo, deleted_mergeinfo,
                     discover_changed_paths,
                     strict_node_history,
@@ -1882,6 +2017,7 @@ do_logs(svn_fs_t *fs,
               SVN_ERR(handle_merged_revisions(current, fs,
                                               log_target_history_as_mergeinfo,
                                               nested_merges,
+                                              processed,
                                               added_mergeinfo,
                                               deleted_mergeinfo,
                                               discover_changed_paths,
@@ -2092,7 +2228,7 @@ svn_repos_get_logs4(svn_repos_t *repos,
       svn_pool_destroy(subpool);
     }
 
-  return do_logs(repos->fs, paths, paths_history_mergeinfo, NULL, start, end,
+  return do_logs(repos->fs, paths, paths_history_mergeinfo, NULL, NULL, start, 
end,
                  limit, discover_changed_paths, strict_node_history,
                  include_merged_revisions, FALSE, FALSE, FALSE, revprops,
                  descending_order, receiver, receiver_baton,


Reply via email to