While uncommon, some git repos have hundreds of thousands of
refs and slurping that output into memory can bloat the heap.
Introduce a sha_all sub in PublicInbox::SHA to loop until EOF
and rely on autodie for checking sysread errors.
---
lib/PublicInbox/CodeSearchIdx.pm | 7 ++-----
lib/PublicInbox/Fetch.pm | 4 ++--
lib/PublicInbox/Git.pm | 6 ++----
lib/PublicInbox/LeiMirror.pm | 14 +++++++-------
lib/PublicInbox/SHA.pm | 11 ++++++++++-
5 files changed, 23 insertions(+), 19 deletions(-)
diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index e31432b9..aeee37c0 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -45,7 +45,7 @@ use POSIX qw(WNOHANG SEEK_SET);
use File::Path ();
use File::Spec ();
use List::Util qw(max);
-use PublicInbox::SHA qw(sha256_hex);
+use PublicInbox::SHA qw(sha256_hex sha_all);
use PublicInbox::Search qw(xap_terms);
use PublicInbox::SearchIdx qw(add_val);
use PublicInbox::Config qw(glob2re rel2abs_collapsed);
@@ -386,10 +386,7 @@ sub fp_fini { # run_git cb
my (undef, $self, $git, $prep_repo) = @_;
my $refs = $git->{-repo}->{refs} // die 'BUG: no {-repo}->{refs}';
sysseek($refs, 0, SEEK_SET);
- my $buf;
- my $dig = PublicInbox::SHA->new(256);
- while (sysread($refs, $buf, 65536)) { $dig->add($buf) }
- $git->{-repo}->{fp} = $dig->hexdigest;
+ $git->{-repo}->{fp} = sha_all(256, $refs)->hexdigest;
}
sub ct_start ($$$) {
diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm
index 6e9b1e94..e41dd448 100644
--- a/lib/PublicInbox/Fetch.pm
+++ b/lib/PublicInbox/Fetch.pm
@@ -10,6 +10,7 @@ use PublicInbox::Admin;
use PublicInbox::LEI;
use PublicInbox::LeiCurl;
use PublicInbox::LeiMirror;
+use PublicInbox::SHA qw(sha_all);
use File::Temp ();
sub new { bless {}, __PACKAGE__ }
@@ -92,9 +93,8 @@ sub do_manifest ($$$) {
sub get_fingerprint2 {
my ($git_dir) = @_;
- require PublicInbox::SHA;
my $rd = popen_rd([qw(git show-ref)], undef, { -C => $git_dir });
- PublicInbox::SHA::sha256(do { local $/; <$rd> });
+ sha_all(256, $rd)->digest; # ignore show-ref errors
}
sub writable_dir ($) {
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 476dcf30..9c26d8bf 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -23,7 +23,7 @@ use PublicInbox::ProcessIONBF;
use PublicInbox::Tmpfile;
use IO::Poll qw(POLLIN);
use Carp qw(croak carp);
-use PublicInbox::SHA ();
+use PublicInbox::SHA qw(sha_all);
our %HEXLEN2SHA = (40 => 1, 64 => 256);
our %OFMT2HEXLEN = (sha1 => 40, sha256 => 64);
our @EXPORT_OK = qw(git_unquote git_quote %HEXLEN2SHA %OFMT2HEXLEN read_all);
@@ -620,10 +620,8 @@ sub manifest_entry {
$ent->{reference} = $buf;
}
}
- my $dig = PublicInbox::SHA->new(1);
- while (CORE::read($sr, $buf, 65536)) { $dig->add($buf) }
+ $ent->{fingerprint} = sha_all(1, $sr)->hexdigest;
CORE::close $sr or return; # empty, uninitialized git repo
- $ent->{fingerprint} = $dig->hexdigest;
$ent->{modified} = modified(undef, $mod);
chomp($buf = <$own> // '');
utf8::decode($buf);
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index 47fb767b..43e59e6c 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -19,10 +19,10 @@ use PublicInbox::Inbox;
use PublicInbox::Git qw(read_all);
use PublicInbox::LeiCurl;
use PublicInbox::OnDestroy;
-use PublicInbox::SHA qw(sha256_hex sha1_hex);
+use PublicInbox::SHA qw(sha256_hex sha_all);
use POSIX qw(strftime);
-use autodie qw(chdir chmod close open pipe readlink seek symlink sysopen
- truncate unlink);
+use autodie qw(chdir chmod close open pipe readlink
+ seek symlink sysopen sysseek truncate unlink);
our $LIVE; # pid => callback
our $FGRP_TODO; # objstore -> [[ to resume ], [ to clone ]]
@@ -533,10 +533,10 @@ sub fp_done {
}
return if !keep_going($self);
my $fh = delete $self->{-show_ref} // die 'BUG: no show-ref output';
- seek($fh, SEEK_SET, 0);
+ sysseek($fh, SEEK_SET, 0);
$self->{-ent} // die 'BUG: no -ent';
my $A = $self->{-ent}->{fingerprint} // die 'BUG: no fingerprint';
- my $B = sha1_hex(read_all($fh));
+ my $B = sha_all(1, $fh)->hexdigest;
return $cb->($self, @arg) if $A ne $B;
$self->{lei}->qerr("# $self->{-key} up-to-date");
}
@@ -730,10 +730,10 @@ sub up_fp_done {
my ($self) = @_;
return if !keep_going($self);
my $fh = delete $self->{-show_ref_up} // die 'BUG: no show-ref output';
- seek($fh, SEEK_SET, 0);
+ sysseek($fh, SEEK_SET, 0);
$self->{-ent} // die 'BUG: no -ent';
my $A = $self->{-ent}->{fingerprint} // die 'BUG: no fingerprint';
- my $B = sha1_hex(read_all($fh));
+ my $B = sha_all(1, $fh)->hexdigest;
return if $A eq $B;
$self->{-ent}->{fingerprint} = $B;
push @{$self->{chg}->{fp_mismatch}}, $self->{-key};
diff --git a/lib/PublicInbox/SHA.pm b/lib/PublicInbox/SHA.pm
index 81f62618..3fa8530e 100644
--- a/lib/PublicInbox/SHA.pm
+++ b/lib/PublicInbox/SHA.pm
@@ -12,7 +12,8 @@
package PublicInbox::SHA;
use v5.12;
require Exporter;
-our @EXPORT_OK = qw(sha1_hex sha256_hex sha256);
+our @EXPORT_OK = qw(sha1_hex sha256_hex sha256 sha_all);
+use autodie qw(sysread);
our @ISA;
BEGIN {
@@ -55,4 +56,12 @@ EOM
}
} # /BEGIN
+
+sub sha_all ($$) {
+ my ($n, $fh) = @_;
+ my ($dig, $buf) = (PublicInbox::SHA->new($n));
+ while (sysread($fh, $buf, 65536)) { $dig->add($buf) }
+ $dig
+}
+
1;