[PATCH v2] improve git svn performance

2014-01-22 Thread manjian2006
From: manjian2006 manjian2...@gmail.com


* perl/Git/SVN.pm
  Modified according to Eric Wong normalper...@yhbt.net

Hi, I'm interested in this.  How much did performance improve by
(and how many revisions is the repository)
Our svn server are built in a LAN,15152 revisions.Not optimized git-svn used 10 
hours or more to accomplish,
while optimized one using only 3-4 hours.


According to some profiling data,_rev_list subroutine and rebuild subroutine 
are consuming a large proportion of time.
So I improve _rev_list's performance by memoize its results,and avoid 
subprocess invocation by memoize rebuild subroutine's key data.

Signed-off-by: manjian2006 manjian2...@gmail.com
---
 perl/Git/SVN.pm | 41 ++---
 1 file changed, 38 insertions(+), 3 deletions(-)

diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm
index 5273ee8..dc7942b 100644
--- a/perl/Git/SVN.pm
+++ b/perl/Git/SVN.pm
@@ -1599,6 +1599,7 @@ sub tie_for_persistent_memoization {
my %lookup_svn_merge_cache;
my %check_cherry_pick_cache;
my %has_no_changes_cache;
+   my %_rev_list_cache;
 
tie_for_persistent_memoization(\%lookup_svn_merge_cache,
$cache_path/lookup_svn_merge);
@@ -1620,6 +1621,14 @@ sub tie_for_persistent_memoization {
SCALAR_CACHE = ['HASH' = \%has_no_changes_cache],
LIST_CACHE = 'FAULT',
;
+
+   tie_for_persistent_memoization(\%_rev_list_cache,
+   $cache_path/_rev_list);
+   memoize '_rev_list',
+   SCALAR_CACHE = 'FAULT',
+   LIST_CACHE = ['HASH' = \%_rev_list_cache],
+   ;
+
}
 
sub unmemoize_svn_mergeinfo_functions {
@@ -1629,6 +1638,7 @@ sub tie_for_persistent_memoization {
Memoize::unmemoize 'lookup_svn_merge';
Memoize::unmemoize 'check_cherry_pick';
Memoize::unmemoize 'has_no_changes';
+   Memoize::unmemoize '_rev_list';
}
 
sub clear_memoized_mergeinfo_caches {
@@ -1959,11 +1969,25 @@ sub rebuild_from_rev_db {
unlink $path or croak unlink: $!;
 }
 
+#define a global associate map to record rebuild status
+my %rebuild_status;
+#define a global associate map to record rebuild verify status
+my %rebuild_verify_status;
+
 sub rebuild {
my ($self) = @_;
my $map_path = $self-map_path;
my $partial = (-e $map_path  ! -z $map_path);
-   return unless ::verify_ref($self-refname.'^0');
+   my $verify_key = $self-refname.'^0';
+   if (! exists $rebuild_verify_status{$verify_key} || ! defined 
$rebuild_verify_status{$verify_key} ) {
+   my $verify_result = ::verify_ref($verify_key);
+   if ($verify_result) {
+   $rebuild_verify_status{$verify_key} = 1;
+   }
+   }
+   if (! exists $rebuild_verify_status{$verify_key}) {
+   return;
+   }
if (!$partial  ($self-use_svm_props || $self-no_metadata)) {
my $rev_db = $self-rev_db_path;
$self-rebuild_from_rev_db($rev_db);
@@ -1977,10 +2001,21 @@ sub rebuild {
print Rebuilding $map_path ...\n if (!$partial);
my ($base_rev, $head) = ($partial ? $self-rev_map_max_norebuild(1) :
(undef, undef));
+   my $key_value = ($head ? $head.. : ) . $self-refname;
+   if (exists $rebuild_status{$key_value}) {
+   print Done rebuilding $map_path\n if (!$partial || !$head);
+   my $rev_db_path = $self-rev_db_path;
+   if (-f $self-rev_db_path) {
+   unlink $self-rev_db_path or croak unlink: $!;
+   }
+   $self-unlink_rev_db_symlink;
+   return;
+   }
my ($log, $ctx) =
-   command_output_pipe(qw/rev-list --pretty=raw --reverse/,
-   ($head ? $head.. : ) . $self-refname,
+   command_output_pipe(qw/rev-list --pretty=raw --reverse/,
+   $key_value, 
'--');
+   $rebuild_status{$key_value} = 1;
my $metadata_url = $self-metadata_url;
remove_username($metadata_url);
my $svn_uuid = $self-rewrite_uuid || $self-ra_uuid;
-- 
1.8.3.2

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3] git-svn: memoize _rev_list and rebuild

2014-01-22 Thread manjian2006
From: lin zuojian manjian2...@gmail.com

According to profile data, _rev_list and rebuild consume a large
portion of time.  Memoize the results of _rev_list and memoize
rebuild internals to avoid subprocess invocation.

When importing 15152 revisions on a LAN, time improved from 10
hours to 3-4 hours.

Signed-off-by: lin zuojian manjian2...@gmail.com
---
 perl/Git/SVN.pm | 41 ++---
 1 file changed, 38 insertions(+), 3 deletions(-)

diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm
index 5273ee8..6e804a2 100644
--- a/perl/Git/SVN.pm
+++ b/perl/Git/SVN.pm
@@ -1599,6 +1599,7 @@ sub tie_for_persistent_memoization {
my %lookup_svn_merge_cache;
my %check_cherry_pick_cache;
my %has_no_changes_cache;
+   my %_rev_list_cache;
 
tie_for_persistent_memoization(\%lookup_svn_merge_cache,
$cache_path/lookup_svn_merge);
@@ -1620,6 +1621,14 @@ sub tie_for_persistent_memoization {
SCALAR_CACHE = ['HASH' = \%has_no_changes_cache],
LIST_CACHE = 'FAULT',
;
+
+   tie_for_persistent_memoization(\%_rev_list_cache,
+   $cache_path/_rev_list);
+   memoize '_rev_list',
+   SCALAR_CACHE = 'FAULT',
+   LIST_CACHE = ['HASH' = \%_rev_list_cache],
+   ;
+
}
 
sub unmemoize_svn_mergeinfo_functions {
@@ -1629,6 +1638,7 @@ sub tie_for_persistent_memoization {
Memoize::unmemoize 'lookup_svn_merge';
Memoize::unmemoize 'check_cherry_pick';
Memoize::unmemoize 'has_no_changes';
+   Memoize::unmemoize '_rev_list';
}
 
sub clear_memoized_mergeinfo_caches {
@@ -1959,11 +1969,25 @@ sub rebuild_from_rev_db {
unlink $path or croak unlink: $!;
 }
 
+#define a global associate map to record rebuild status
+my %rebuild_status;
+#define a global associate map to record rebuild verify status
+my %rebuild_verify_status;
+
 sub rebuild {
my ($self) = @_;
my $map_path = $self-map_path;
my $partial = (-e $map_path  ! -z $map_path);
-   return unless ::verify_ref($self-refname.'^0');
+   my $verify_key = $self-refname.'^0';
+   if (!$rebuild_verify_status{$verify_key}) {
+   my $verify_result = ::verify_ref($verify_key);
+   if ($verify_result) {
+   $rebuild_verify_status{$verify_key} = 1;
+   }
+   }
+   if (!$rebuild_verify_status{$verify_key}) {
+   return;
+   }
if (!$partial  ($self-use_svm_props || $self-no_metadata)) {
my $rev_db = $self-rev_db_path;
$self-rebuild_from_rev_db($rev_db);
@@ -1977,10 +2001,21 @@ sub rebuild {
print Rebuilding $map_path ...\n if (!$partial);
my ($base_rev, $head) = ($partial ? $self-rev_map_max_norebuild(1) :
(undef, undef));
+   my $key_value = ($head ? $head.. : ) . $self-refname;
+   if (exists $rebuild_status{$key_value}) {
+   print Done rebuilding $map_path\n if (!$partial || !$head);
+   my $rev_db_path = $self-rev_db_path;
+   if (-f $self-rev_db_path) {
+   unlink $self-rev_db_path or croak unlink: $!;
+   }
+   $self-unlink_rev_db_symlink;
+   return;
+   }
my ($log, $ctx) =
-   command_output_pipe(qw/rev-list --pretty=raw --reverse/,
-   ($head ? $head.. : ) . $self-refname,
+   command_output_pipe(qw/rev-list --pretty=raw --reverse/,
+   $key_value,
'--');
+   $rebuild_status{$key_value} = 1;
my $metadata_url = $self-metadata_url;
remove_username($metadata_url);
my $svn_uuid = $self-rewrite_uuid || $self-ra_uuid;
-- 
1.8.3.2

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] improve git svn performance Hi, I am trying to improve git svn's performance according to some profiling data.As the data showed,_rev_list subroutine and rebuild subroutine are consuming a l

2014-01-19 Thread manjian2006
From: linzj li...@ucweb.com

---
 perl/Git/SVN.pm | 63 -
 1 file changed, 49 insertions(+), 14 deletions(-)

diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm
index 5273ee8..3cd1c8f 100644
--- a/perl/Git/SVN.pm
+++ b/perl/Git/SVN.pm
@@ -1599,27 +1599,36 @@ sub tie_for_persistent_memoization {
my %lookup_svn_merge_cache;
my %check_cherry_pick_cache;
my %has_no_changes_cache;
+   my %_rev_list_cache;
 
tie_for_persistent_memoization(\%lookup_svn_merge_cache,
-   $cache_path/lookup_svn_merge);
+   $cache_path/lookup_svn_merge);
memoize 'lookup_svn_merge',
-   SCALAR_CACHE = 'FAULT',
-   LIST_CACHE = ['HASH' = \%lookup_svn_merge_cache],
-   ;
+   SCALAR_CACHE = 'FAULT',
+   LIST_CACHE = ['HASH' = 
\%lookup_svn_merge_cache],
+   ;
 
tie_for_persistent_memoization(\%check_cherry_pick_cache,
-   $cache_path/check_cherry_pick);
+   $cache_path/check_cherry_pick);
memoize 'check_cherry_pick',
-   SCALAR_CACHE = 'FAULT',
-   LIST_CACHE = ['HASH' = \%check_cherry_pick_cache],
-   ;
+   SCALAR_CACHE = 'FAULT',
+   LIST_CACHE = ['HASH' = 
\%check_cherry_pick_cache],
+   ;
 
tie_for_persistent_memoization(\%has_no_changes_cache,
-   $cache_path/has_no_changes);
+   $cache_path/has_no_changes);
memoize 'has_no_changes',
-   SCALAR_CACHE = ['HASH' = \%has_no_changes_cache],
-   LIST_CACHE = 'FAULT',
-   ;
+   SCALAR_CACHE = ['HASH' = 
\%has_no_changes_cache],
+   LIST_CACHE = 'FAULT',
+   ;
+
+   tie_for_persistent_memoization(\%_rev_list_cache,
+   $cache_path/_rev_list);
+   memoize '_rev_list',
+   SCALAR_CACHE = 'FAULT',
+   LIST_CACHE = ['HASH' = \%_rev_list_cache],
+   ;
+
}
 
sub unmemoize_svn_mergeinfo_functions {
@@ -1629,6 +1638,7 @@ sub tie_for_persistent_memoization {
Memoize::unmemoize 'lookup_svn_merge';
Memoize::unmemoize 'check_cherry_pick';
Memoize::unmemoize 'has_no_changes';
+   Memoize::unmemoize '_rev_list';
}
 
sub clear_memoized_mergeinfo_caches {
@@ -1959,11 +1969,25 @@ sub rebuild_from_rev_db {
unlink $path or croak unlink: $!;
 }
 
+#define a global associate map to record rebuild status
+my %rebuildStatus;
+#define a global associate map to record rebuild verify status
+my %rebuildVerifyStatus;
+
 sub rebuild {
my ($self) = @_;
my $map_path = $self-map_path;
my $partial = (-e $map_path  ! -z $map_path);
-   return unless ::verify_ref($self-refname.'^0');
+my $verifyKey = $self-refname.'^0';
+if (! exists $rebuildVerifyStatus{$verifyKey} || ! defined 
$rebuildVerifyStatus{$verifyKey} ) {
+my $verifyResult = ::verify_ref($verifyKey);
+if ($verifyResult) {
+$rebuildVerifyStatus{$verifyKey} = 1;
+}
+}
+if (! exists $rebuildVerifyStatus{$verifyKey}) {
+return;
+}
if (!$partial  ($self-use_svm_props || $self-no_metadata)) {
my $rev_db = $self-rev_db_path;
$self-rebuild_from_rev_db($rev_db);
@@ -1977,10 +2001,21 @@ sub rebuild {
print Rebuilding $map_path ...\n if (!$partial);
my ($base_rev, $head) = ($partial ? $self-rev_map_max_norebuild(1) :
(undef, undef));
+my $keyValue = ($head ? $head.. : ) . $self-refname;
+if (exists $rebuildStatus{$keyValue}) {
+print Done rebuilding $map_path\n if (!$partial || !$head);
+my $rev_db_path = $self-rev_db_path;
+if (-f $self-rev_db_path) {
+unlink $self-rev_db_path or croak unlink: $!;
+}
+$self-unlink_rev_db_symlink;
+   return;
+}
my ($log, $ctx) =
command_output_pipe(qw/rev-list --pretty=raw --reverse/,
-   ($head ? $head.. : ) . $self-refname,
+   $keyValue,  
'--');
+$rebuildStatus{$keyValue} = 1;
my $metadata_url = $self-metadata_url;
remove_username($metadata_url);
my $svn_uuid = $self-rewrite_uuid || $self-ra_uuid;
-- 
1.8.3.2

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to 

[PATCH] improve git svn performance

2014-01-19 Thread manjian2006
From: linzj li...@ucweb.com

Hi,
  I am trying to improve git svn's performance according to some profiling 
data.As the data showed,_rev_list subroutine and rebuild subroutine are 
consuming a large proportion of time.So I improve _rev_list's performance by 
memoize its results,and avoid subprocess invocation by memoize rebuild 
subroutine's key data.Here's my patch:
---
 perl/Git/SVN.pm | 63 -
 1 file changed, 49 insertions(+), 14 deletions(-)

diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm
index 5273ee8..3cd1c8f 100644
--- a/perl/Git/SVN.pm
+++ b/perl/Git/SVN.pm
@@ -1599,27 +1599,36 @@ sub tie_for_persistent_memoization {
my %lookup_svn_merge_cache;
my %check_cherry_pick_cache;
my %has_no_changes_cache;
+   my %_rev_list_cache;
 
tie_for_persistent_memoization(\%lookup_svn_merge_cache,
-   $cache_path/lookup_svn_merge);
+   $cache_path/lookup_svn_merge);
memoize 'lookup_svn_merge',
-   SCALAR_CACHE = 'FAULT',
-   LIST_CACHE = ['HASH' = \%lookup_svn_merge_cache],
-   ;
+   SCALAR_CACHE = 'FAULT',
+   LIST_CACHE = ['HASH' = 
\%lookup_svn_merge_cache],
+   ;
 
tie_for_persistent_memoization(\%check_cherry_pick_cache,
-   $cache_path/check_cherry_pick);
+   $cache_path/check_cherry_pick);
memoize 'check_cherry_pick',
-   SCALAR_CACHE = 'FAULT',
-   LIST_CACHE = ['HASH' = \%check_cherry_pick_cache],
-   ;
+   SCALAR_CACHE = 'FAULT',
+   LIST_CACHE = ['HASH' = 
\%check_cherry_pick_cache],
+   ;
 
tie_for_persistent_memoization(\%has_no_changes_cache,
-   $cache_path/has_no_changes);
+   $cache_path/has_no_changes);
memoize 'has_no_changes',
-   SCALAR_CACHE = ['HASH' = \%has_no_changes_cache],
-   LIST_CACHE = 'FAULT',
-   ;
+   SCALAR_CACHE = ['HASH' = 
\%has_no_changes_cache],
+   LIST_CACHE = 'FAULT',
+   ;
+
+   tie_for_persistent_memoization(\%_rev_list_cache,
+   $cache_path/_rev_list);
+   memoize '_rev_list',
+   SCALAR_CACHE = 'FAULT',
+   LIST_CACHE = ['HASH' = \%_rev_list_cache],
+   ;
+
}
 
sub unmemoize_svn_mergeinfo_functions {
@@ -1629,6 +1638,7 @@ sub tie_for_persistent_memoization {
Memoize::unmemoize 'lookup_svn_merge';
Memoize::unmemoize 'check_cherry_pick';
Memoize::unmemoize 'has_no_changes';
+   Memoize::unmemoize '_rev_list';
}
 
sub clear_memoized_mergeinfo_caches {
@@ -1959,11 +1969,25 @@ sub rebuild_from_rev_db {
unlink $path or croak unlink: $!;
 }
 
+#define a global associate map to record rebuild status
+my %rebuildStatus;
+#define a global associate map to record rebuild verify status
+my %rebuildVerifyStatus;
+
 sub rebuild {
my ($self) = @_;
my $map_path = $self-map_path;
my $partial = (-e $map_path  ! -z $map_path);
-   return unless ::verify_ref($self-refname.'^0');
+my $verifyKey = $self-refname.'^0';
+if (! exists $rebuildVerifyStatus{$verifyKey} || ! defined 
$rebuildVerifyStatus{$verifyKey} ) {
+my $verifyResult = ::verify_ref($verifyKey);
+if ($verifyResult) {
+$rebuildVerifyStatus{$verifyKey} = 1;
+}
+}
+if (! exists $rebuildVerifyStatus{$verifyKey}) {
+return;
+}
if (!$partial  ($self-use_svm_props || $self-no_metadata)) {
my $rev_db = $self-rev_db_path;
$self-rebuild_from_rev_db($rev_db);
@@ -1977,10 +2001,21 @@ sub rebuild {
print Rebuilding $map_path ...\n if (!$partial);
my ($base_rev, $head) = ($partial ? $self-rev_map_max_norebuild(1) :
(undef, undef));
+my $keyValue = ($head ? $head.. : ) . $self-refname;
+if (exists $rebuildStatus{$keyValue}) {
+print Done rebuilding $map_path\n if (!$partial || !$head);
+my $rev_db_path = $self-rev_db_path;
+if (-f $self-rev_db_path) {
+unlink $self-rev_db_path or croak unlink: $!;
+}
+$self-unlink_rev_db_symlink;
+   return;
+}
my ($log, $ctx) =
command_output_pipe(qw/rev-list --pretty=raw --reverse/,
-   ($head ? $head.. : ) . $self-refname,
+