This will allow us to quickly check fingerprints against
remotes with a single HTTP(S) request, saving us numerous
`git show-refs' invocations.
---
 Documentation/public-inbox-clone.pod | 10 ++++++++
 lib/PublicInbox/LeiMirror.pm         | 37 ++++++++++++++++++++++++----
 script/public-inbox-clone            |  2 +-
 3 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/Documentation/public-inbox-clone.pod 
b/Documentation/public-inbox-clone.pod
index 257967d9..9288b175 100644
--- a/Documentation/public-inbox-clone.pod
+++ b/Documentation/public-inbox-clone.pod
@@ -94,6 +94,16 @@ C<DESTINATION> directory.  If only C<--objstore=> is 
specified
 where C<DIR> is an empty string (C<"">), then C<objstore>
 (C<$DESTINATION/objstore>) is the implied value of C<DIR>.
 
+=item --manifest=FILE
+
+When incrementally updating an existing mirror, load the given
+manifest (typically C<manifest.js.gz>) to speed up updates.
+
+If C<FILE> is not an absolute path, it is relative to the
+C<DESTINATION> directory.  If only C<--manifest => is specified
+where C<FILE > is an empty string (C<"">), then C<manifest.js.gz>
+(C<$DESTINATION/manifest.js.gz>) is the implied value of C<FILE>.
+
 =item -n
 
 =item --dry-run
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index e744f06a..51cc6d05 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -497,6 +497,13 @@ sub fp_done {
 
 sub cmp_fp_fetch {
        my ($self, $go_fetch) = @_;
+       # $go_fetch is either resume_fetch or fgrp_enqueue
+       my $new = $self->{-ent}->{fingerprint} // die 'BUG: no fingerprint';
+       my $key = $self->{-key} // die 'BUG: no -key';
+       if (my $cur_ent = $self->{-local_manifest}->{$key}) {
+               # runs go_fetch->DESTROY run if eq
+               return $go_fetch->cancel if $cur_ent->{fingerprint} eq $new;
+       }
        my $dst = $self->{cur_dst} // $self->{dst};
        my $cmd = ['git', "--git-dir=$dst", 'show-ref'];
        my $opt = { 2 => $self->{lei}->{2} };
@@ -677,7 +684,10 @@ sub v1_done { # called via OnDestroy
        _write_inbox_config($self);
        my $dst = $self->{cur_dst} // $self->{dst};
        if (defined(my $o = $self->{-ent} ? $self->{-ent}->{owner} : undef)) {
-               run_die([qw(git config -f), "$dst/config", 'gitweb.owner', $o]);
+               my $key = $self->{-key} // die 'BUG: no -key';
+               my $cur = $self->{-local_manifest}->{$key}->{owner} // "\0";
+               $cur eq $o or run_die([qw(git config -f),
+                                       "$dst/config", 'gitweb.owner', $o]);
        }
        my $o = "$dst/objects";
        if (open(my $fh, '<', my $fn = "$o/info/alternates")) {;
@@ -796,6 +806,19 @@ sub decode_manifest ($$$) {
        $m;
 }
 
+sub load_current_manifest ($) {
+       my ($self) = @_;
+       my $fn = $self->{-manifest} // return;
+       if (open(my $fh, '<', $fn)) {
+               decode_manifest($fh, $fn, $fn);
+       } elsif ($!{ENOENT}) { # non-fatal, we can just do it slowly
+               warn "open($fn): $!\n";
+               undef;
+       } else {
+               die "open($fn): $!\n";
+       }
+}
+
 sub multi_inbox ($$$) {
        my ($self, $path, $m) = @_;
        my $incl = $self->{lei}->{opt}->{include};
@@ -932,6 +955,7 @@ sub try_manifest {
                warn $@;
                return try_scrape($self);
        }
+       local $self->{-local_manifest} = load_current_manifest($self);
        my ($path_pfx, $n, $multi) = multi_inbox($self, \$path, $m);
        return $lei->child_error(1, $multi) if !ref($multi);
        my $v2 = delete $multi->{v2};
@@ -1012,10 +1036,13 @@ sub do_mirror { # via wq_io_do or public-inbox-clone
                $ic =~ /\A(?:v1|v2|always|never)\z/s or die <<"";
 --inbox-config must be one of `always', `v2', `v1', or `never'
 
-               if (defined(my $os = $lei->{opt}->{objstore})) {
-                       $os = 'objstore' if $os eq ''; # --objstore w/o args
-                       $os = "$self->{dst}/$os" if $os !~ m!\A/!;
-                       $self->{-objstore} = $os;
+               # we support --objstore= and --manifest= with '' (empty string)
+               for my $default (qw(objstore manifest.js.gz)) {
+                       my ($k) = (split(/\./, $default))[0];
+                       my $v = $lei->{opt}->{$k} // next;
+                       $v = $default if $v eq '';
+                       $v = "$self->{dst}/$v" if $v !~ m!\A/!;
+                       $self->{"-$k"} = $v;
                }
                local $LIVE;
                my $iv = $lei->{opt}->{'inbox-version'} //
diff --git a/script/public-inbox-clone b/script/public-inbox-clone
index e38d7b0d..a11c6874 100755
--- a/script/public-inbox-clone
+++ b/script/public-inbox-clone
@@ -23,7 +23,7 @@ options:
     -C DIR            chdir to specified directory
 EOF
 GetOptions($opt, qw(help|h quiet|q verbose|v+ C=s@ c=s@ include|I=s@ exclude=s@
-       inbox-config=s inbox-version=i objstore=s
+       inbox-config=s inbox-version=i objstore=s manifest=s
        dry-run|n jobs|j=i no-torsocks torsocks=s epoch=s)) or die $help;
 if ($opt->{help}) { print $help; exit };
 require PublicInbox::Admin; # loads Config

Reply via email to