[PATCH v2] improve git svn performance
From: manjian2006 manjian2...@gmail.com * perl/Git/SVN.pm Modified according to Eric Wong normalper...@yhbt.net Hi, I'm interested in this. How much did performance improve by (and how many revisions is the repository) Our svn server are built in a LAN,15152 revisions.Not optimized git-svn used 10 hours or more to accomplish, while optimized one using only 3-4 hours. According to some profiling data,_rev_list subroutine and rebuild subroutine are consuming a large proportion of time. So I improve _rev_list's performance by memoize its results,and avoid subprocess invocation by memoize rebuild subroutine's key data. Signed-off-by: manjian2006 manjian2...@gmail.com --- perl/Git/SVN.pm | 41 ++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm index 5273ee8..dc7942b 100644 --- a/perl/Git/SVN.pm +++ b/perl/Git/SVN.pm @@ -1599,6 +1599,7 @@ sub tie_for_persistent_memoization { my %lookup_svn_merge_cache; my %check_cherry_pick_cache; my %has_no_changes_cache; + my %_rev_list_cache; tie_for_persistent_memoization(\%lookup_svn_merge_cache, $cache_path/lookup_svn_merge); @@ -1620,6 +1621,14 @@ sub tie_for_persistent_memoization { SCALAR_CACHE = ['HASH' = \%has_no_changes_cache], LIST_CACHE = 'FAULT', ; + + tie_for_persistent_memoization(\%_rev_list_cache, + $cache_path/_rev_list); + memoize '_rev_list', + SCALAR_CACHE = 'FAULT', + LIST_CACHE = ['HASH' = \%_rev_list_cache], + ; + } sub unmemoize_svn_mergeinfo_functions { @@ -1629,6 +1638,7 @@ sub tie_for_persistent_memoization { Memoize::unmemoize 'lookup_svn_merge'; Memoize::unmemoize 'check_cherry_pick'; Memoize::unmemoize 'has_no_changes'; + Memoize::unmemoize '_rev_list'; } sub clear_memoized_mergeinfo_caches { @@ -1959,11 +1969,25 @@ sub rebuild_from_rev_db { unlink $path or croak unlink: $!; } +#define a global associate map to record rebuild status +my %rebuild_status; +#define a global associate map to record rebuild verify status +my %rebuild_verify_status; + sub rebuild { my ($self) = @_; my $map_path = $self-map_path; my $partial = (-e $map_path ! -z $map_path); - return unless ::verify_ref($self-refname.'^0'); + my $verify_key = $self-refname.'^0'; + if (! exists $rebuild_verify_status{$verify_key} || ! defined $rebuild_verify_status{$verify_key} ) { + my $verify_result = ::verify_ref($verify_key); + if ($verify_result) { + $rebuild_verify_status{$verify_key} = 1; + } + } + if (! exists $rebuild_verify_status{$verify_key}) { + return; + } if (!$partial ($self-use_svm_props || $self-no_metadata)) { my $rev_db = $self-rev_db_path; $self-rebuild_from_rev_db($rev_db); @@ -1977,10 +2001,21 @@ sub rebuild { print Rebuilding $map_path ...\n if (!$partial); my ($base_rev, $head) = ($partial ? $self-rev_map_max_norebuild(1) : (undef, undef)); + my $key_value = ($head ? $head.. : ) . $self-refname; + if (exists $rebuild_status{$key_value}) { + print Done rebuilding $map_path\n if (!$partial || !$head); + my $rev_db_path = $self-rev_db_path; + if (-f $self-rev_db_path) { + unlink $self-rev_db_path or croak unlink: $!; + } + $self-unlink_rev_db_symlink; + return; + } my ($log, $ctx) = - command_output_pipe(qw/rev-list --pretty=raw --reverse/, - ($head ? $head.. : ) . $self-refname, + command_output_pipe(qw/rev-list --pretty=raw --reverse/, + $key_value, '--'); + $rebuild_status{$key_value} = 1; my $metadata_url = $self-metadata_url; remove_username($metadata_url); my $svn_uuid = $self-rewrite_uuid || $self-ra_uuid; -- 1.8.3.2 -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3] git-svn: memoize _rev_list and rebuild
From: lin zuojian manjian2...@gmail.com According to profile data, _rev_list and rebuild consume a large portion of time. Memoize the results of _rev_list and memoize rebuild internals to avoid subprocess invocation. When importing 15152 revisions on a LAN, time improved from 10 hours to 3-4 hours. Signed-off-by: lin zuojian manjian2...@gmail.com --- perl/Git/SVN.pm | 41 ++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm index 5273ee8..6e804a2 100644 --- a/perl/Git/SVN.pm +++ b/perl/Git/SVN.pm @@ -1599,6 +1599,7 @@ sub tie_for_persistent_memoization { my %lookup_svn_merge_cache; my %check_cherry_pick_cache; my %has_no_changes_cache; + my %_rev_list_cache; tie_for_persistent_memoization(\%lookup_svn_merge_cache, $cache_path/lookup_svn_merge); @@ -1620,6 +1621,14 @@ sub tie_for_persistent_memoization { SCALAR_CACHE = ['HASH' = \%has_no_changes_cache], LIST_CACHE = 'FAULT', ; + + tie_for_persistent_memoization(\%_rev_list_cache, + $cache_path/_rev_list); + memoize '_rev_list', + SCALAR_CACHE = 'FAULT', + LIST_CACHE = ['HASH' = \%_rev_list_cache], + ; + } sub unmemoize_svn_mergeinfo_functions { @@ -1629,6 +1638,7 @@ sub tie_for_persistent_memoization { Memoize::unmemoize 'lookup_svn_merge'; Memoize::unmemoize 'check_cherry_pick'; Memoize::unmemoize 'has_no_changes'; + Memoize::unmemoize '_rev_list'; } sub clear_memoized_mergeinfo_caches { @@ -1959,11 +1969,25 @@ sub rebuild_from_rev_db { unlink $path or croak unlink: $!; } +#define a global associate map to record rebuild status +my %rebuild_status; +#define a global associate map to record rebuild verify status +my %rebuild_verify_status; + sub rebuild { my ($self) = @_; my $map_path = $self-map_path; my $partial = (-e $map_path ! -z $map_path); - return unless ::verify_ref($self-refname.'^0'); + my $verify_key = $self-refname.'^0'; + if (!$rebuild_verify_status{$verify_key}) { + my $verify_result = ::verify_ref($verify_key); + if ($verify_result) { + $rebuild_verify_status{$verify_key} = 1; + } + } + if (!$rebuild_verify_status{$verify_key}) { + return; + } if (!$partial ($self-use_svm_props || $self-no_metadata)) { my $rev_db = $self-rev_db_path; $self-rebuild_from_rev_db($rev_db); @@ -1977,10 +2001,21 @@ sub rebuild { print Rebuilding $map_path ...\n if (!$partial); my ($base_rev, $head) = ($partial ? $self-rev_map_max_norebuild(1) : (undef, undef)); + my $key_value = ($head ? $head.. : ) . $self-refname; + if (exists $rebuild_status{$key_value}) { + print Done rebuilding $map_path\n if (!$partial || !$head); + my $rev_db_path = $self-rev_db_path; + if (-f $self-rev_db_path) { + unlink $self-rev_db_path or croak unlink: $!; + } + $self-unlink_rev_db_symlink; + return; + } my ($log, $ctx) = - command_output_pipe(qw/rev-list --pretty=raw --reverse/, - ($head ? $head.. : ) . $self-refname, + command_output_pipe(qw/rev-list --pretty=raw --reverse/, + $key_value, '--'); + $rebuild_status{$key_value} = 1; my $metadata_url = $self-metadata_url; remove_username($metadata_url); my $svn_uuid = $self-rewrite_uuid || $self-ra_uuid; -- 1.8.3.2 -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] improve git svn performance Hi, I am trying to improve git svn's performance according to some profiling data.As the data showed,_rev_list subroutine and rebuild subroutine are consuming a l
From: linzj li...@ucweb.com --- perl/Git/SVN.pm | 63 - 1 file changed, 49 insertions(+), 14 deletions(-) diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm index 5273ee8..3cd1c8f 100644 --- a/perl/Git/SVN.pm +++ b/perl/Git/SVN.pm @@ -1599,27 +1599,36 @@ sub tie_for_persistent_memoization { my %lookup_svn_merge_cache; my %check_cherry_pick_cache; my %has_no_changes_cache; + my %_rev_list_cache; tie_for_persistent_memoization(\%lookup_svn_merge_cache, - $cache_path/lookup_svn_merge); + $cache_path/lookup_svn_merge); memoize 'lookup_svn_merge', - SCALAR_CACHE = 'FAULT', - LIST_CACHE = ['HASH' = \%lookup_svn_merge_cache], - ; + SCALAR_CACHE = 'FAULT', + LIST_CACHE = ['HASH' = \%lookup_svn_merge_cache], + ; tie_for_persistent_memoization(\%check_cherry_pick_cache, - $cache_path/check_cherry_pick); + $cache_path/check_cherry_pick); memoize 'check_cherry_pick', - SCALAR_CACHE = 'FAULT', - LIST_CACHE = ['HASH' = \%check_cherry_pick_cache], - ; + SCALAR_CACHE = 'FAULT', + LIST_CACHE = ['HASH' = \%check_cherry_pick_cache], + ; tie_for_persistent_memoization(\%has_no_changes_cache, - $cache_path/has_no_changes); + $cache_path/has_no_changes); memoize 'has_no_changes', - SCALAR_CACHE = ['HASH' = \%has_no_changes_cache], - LIST_CACHE = 'FAULT', - ; + SCALAR_CACHE = ['HASH' = \%has_no_changes_cache], + LIST_CACHE = 'FAULT', + ; + + tie_for_persistent_memoization(\%_rev_list_cache, + $cache_path/_rev_list); + memoize '_rev_list', + SCALAR_CACHE = 'FAULT', + LIST_CACHE = ['HASH' = \%_rev_list_cache], + ; + } sub unmemoize_svn_mergeinfo_functions { @@ -1629,6 +1638,7 @@ sub tie_for_persistent_memoization { Memoize::unmemoize 'lookup_svn_merge'; Memoize::unmemoize 'check_cherry_pick'; Memoize::unmemoize 'has_no_changes'; + Memoize::unmemoize '_rev_list'; } sub clear_memoized_mergeinfo_caches { @@ -1959,11 +1969,25 @@ sub rebuild_from_rev_db { unlink $path or croak unlink: $!; } +#define a global associate map to record rebuild status +my %rebuildStatus; +#define a global associate map to record rebuild verify status +my %rebuildVerifyStatus; + sub rebuild { my ($self) = @_; my $map_path = $self-map_path; my $partial = (-e $map_path ! -z $map_path); - return unless ::verify_ref($self-refname.'^0'); +my $verifyKey = $self-refname.'^0'; +if (! exists $rebuildVerifyStatus{$verifyKey} || ! defined $rebuildVerifyStatus{$verifyKey} ) { +my $verifyResult = ::verify_ref($verifyKey); +if ($verifyResult) { +$rebuildVerifyStatus{$verifyKey} = 1; +} +} +if (! exists $rebuildVerifyStatus{$verifyKey}) { +return; +} if (!$partial ($self-use_svm_props || $self-no_metadata)) { my $rev_db = $self-rev_db_path; $self-rebuild_from_rev_db($rev_db); @@ -1977,10 +2001,21 @@ sub rebuild { print Rebuilding $map_path ...\n if (!$partial); my ($base_rev, $head) = ($partial ? $self-rev_map_max_norebuild(1) : (undef, undef)); +my $keyValue = ($head ? $head.. : ) . $self-refname; +if (exists $rebuildStatus{$keyValue}) { +print Done rebuilding $map_path\n if (!$partial || !$head); +my $rev_db_path = $self-rev_db_path; +if (-f $self-rev_db_path) { +unlink $self-rev_db_path or croak unlink: $!; +} +$self-unlink_rev_db_symlink; + return; +} my ($log, $ctx) = command_output_pipe(qw/rev-list --pretty=raw --reverse/, - ($head ? $head.. : ) . $self-refname, + $keyValue, '--'); +$rebuildStatus{$keyValue} = 1; my $metadata_url = $self-metadata_url; remove_username($metadata_url); my $svn_uuid = $self-rewrite_uuid || $self-ra_uuid; -- 1.8.3.2 -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to
[PATCH] improve git svn performance
From: linzj li...@ucweb.com Hi, I am trying to improve git svn's performance according to some profiling data.As the data showed,_rev_list subroutine and rebuild subroutine are consuming a large proportion of time.So I improve _rev_list's performance by memoize its results,and avoid subprocess invocation by memoize rebuild subroutine's key data.Here's my patch: --- perl/Git/SVN.pm | 63 - 1 file changed, 49 insertions(+), 14 deletions(-) diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm index 5273ee8..3cd1c8f 100644 --- a/perl/Git/SVN.pm +++ b/perl/Git/SVN.pm @@ -1599,27 +1599,36 @@ sub tie_for_persistent_memoization { my %lookup_svn_merge_cache; my %check_cherry_pick_cache; my %has_no_changes_cache; + my %_rev_list_cache; tie_for_persistent_memoization(\%lookup_svn_merge_cache, - $cache_path/lookup_svn_merge); + $cache_path/lookup_svn_merge); memoize 'lookup_svn_merge', - SCALAR_CACHE = 'FAULT', - LIST_CACHE = ['HASH' = \%lookup_svn_merge_cache], - ; + SCALAR_CACHE = 'FAULT', + LIST_CACHE = ['HASH' = \%lookup_svn_merge_cache], + ; tie_for_persistent_memoization(\%check_cherry_pick_cache, - $cache_path/check_cherry_pick); + $cache_path/check_cherry_pick); memoize 'check_cherry_pick', - SCALAR_CACHE = 'FAULT', - LIST_CACHE = ['HASH' = \%check_cherry_pick_cache], - ; + SCALAR_CACHE = 'FAULT', + LIST_CACHE = ['HASH' = \%check_cherry_pick_cache], + ; tie_for_persistent_memoization(\%has_no_changes_cache, - $cache_path/has_no_changes); + $cache_path/has_no_changes); memoize 'has_no_changes', - SCALAR_CACHE = ['HASH' = \%has_no_changes_cache], - LIST_CACHE = 'FAULT', - ; + SCALAR_CACHE = ['HASH' = \%has_no_changes_cache], + LIST_CACHE = 'FAULT', + ; + + tie_for_persistent_memoization(\%_rev_list_cache, + $cache_path/_rev_list); + memoize '_rev_list', + SCALAR_CACHE = 'FAULT', + LIST_CACHE = ['HASH' = \%_rev_list_cache], + ; + } sub unmemoize_svn_mergeinfo_functions { @@ -1629,6 +1638,7 @@ sub tie_for_persistent_memoization { Memoize::unmemoize 'lookup_svn_merge'; Memoize::unmemoize 'check_cherry_pick'; Memoize::unmemoize 'has_no_changes'; + Memoize::unmemoize '_rev_list'; } sub clear_memoized_mergeinfo_caches { @@ -1959,11 +1969,25 @@ sub rebuild_from_rev_db { unlink $path or croak unlink: $!; } +#define a global associate map to record rebuild status +my %rebuildStatus; +#define a global associate map to record rebuild verify status +my %rebuildVerifyStatus; + sub rebuild { my ($self) = @_; my $map_path = $self-map_path; my $partial = (-e $map_path ! -z $map_path); - return unless ::verify_ref($self-refname.'^0'); +my $verifyKey = $self-refname.'^0'; +if (! exists $rebuildVerifyStatus{$verifyKey} || ! defined $rebuildVerifyStatus{$verifyKey} ) { +my $verifyResult = ::verify_ref($verifyKey); +if ($verifyResult) { +$rebuildVerifyStatus{$verifyKey} = 1; +} +} +if (! exists $rebuildVerifyStatus{$verifyKey}) { +return; +} if (!$partial ($self-use_svm_props || $self-no_metadata)) { my $rev_db = $self-rev_db_path; $self-rebuild_from_rev_db($rev_db); @@ -1977,10 +2001,21 @@ sub rebuild { print Rebuilding $map_path ...\n if (!$partial); my ($base_rev, $head) = ($partial ? $self-rev_map_max_norebuild(1) : (undef, undef)); +my $keyValue = ($head ? $head.. : ) . $self-refname; +if (exists $rebuildStatus{$keyValue}) { +print Done rebuilding $map_path\n if (!$partial || !$head); +my $rev_db_path = $self-rev_db_path; +if (-f $self-rev_db_path) { +unlink $self-rev_db_path or croak unlink: $!; +} +$self-unlink_rev_db_symlink; + return; +} my ($log, $ctx) = command_output_pipe(qw/rev-list --pretty=raw --reverse/, - ($head ? $head.. : ) . $self-refname, +