Merely pruning mail synchronization information was
insufficient for Maildir: renames are common in Maildir
and we need to detect them after-the-fact when lei-daemon
isn't running.

Running this command could make "lei index" far more
useful...
---
 MANIFEST                                      |  2 +
 lib/PublicInbox/LEI.pm                        |  3 +-
 ...PruneMailSync.pm => LeiRefreshMailSync.pm} | 36 +++++++---
 lib/PublicInbox/LeiStore.pm                   |  5 ++
 t/lei-export-kw.t                             |  1 -
 t/lei-refresh-mail-sync.t                     | 67 +++++++++++++++++++
 6 files changed, 103 insertions(+), 11 deletions(-)
 rename lib/PublicInbox/{LeiPruneMailSync.pm => LeiRefreshMailSync.pm} (70%)
 create mode 100644 t/lei-refresh-mail-sync.t

diff --git a/MANIFEST b/MANIFEST
index 640eabd1..221cb992 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -238,6 +238,7 @@ lib/PublicInbox/LeiPmdir.pm
 lib/PublicInbox/LeiPruneMailSync.pm
 lib/PublicInbox/LeiQuery.pm
 lib/PublicInbox/LeiRediff.pm
+lib/PublicInbox/LeiRefreshMailSync.pm
 lib/PublicInbox/LeiRemote.pm
 lib/PublicInbox/LeiRm.pm
 lib/PublicInbox/LeiRmWatch.pm
@@ -450,6 +451,7 @@ t/lei-q-kw.t
 t/lei-q-remote-import.t
 t/lei-q-save.t
 t/lei-q-thread.t
+t/lei-refresh-mail-sync.t
 t/lei-sigpipe.t
 t/lei-tag.t
 t/lei-up.t
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index ec103231..9794497b 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -263,7 +263,7 @@ our %CMD = ( # sorted in order of importance/use:
        @net_opt, @c_opt ],
 'forget-mail-sync' => [ 'LOCATION...',
        'forget sync information for a mail folder', @c_opt ],
-'prune-mail-sync' => [ 'LOCATION...|--all',
+'refresh-mail-sync' => [ 'LOCATION...|--all',
        'prune dangling sync data for a mail folder', 'all:s', @c_opt ],
 'export-kw' => [ 'LOCATION...|--all',
        'one-time export of keywords of sync sources',
@@ -616,6 +616,7 @@ sub pkt_ops {
        $ops->{x_it} = [ \&x_it, $lei ];
        $ops->{child_error} = [ \&child_error, $lei ];
        $ops->{incr} = [ \&incr, $lei ];
+       $ops->{sto_done_request} = [ \&sto_done_request, $lei, $lei->{sock} ];
        $ops;
 }
 
diff --git a/lib/PublicInbox/LeiPruneMailSync.pm 
b/lib/PublicInbox/LeiRefreshMailSync.pm
similarity index 70%
rename from lib/PublicInbox/LeiPruneMailSync.pm
rename to lib/PublicInbox/LeiRefreshMailSync.pm
index 3678bd04..07b0aa52 100644
--- a/lib/PublicInbox/LeiPruneMailSync.pm
+++ b/lib/PublicInbox/LeiRefreshMailSync.pm
@@ -1,16 +1,20 @@
 # Copyright (C) 2021 all contributors <[email protected]>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
-# "lei prune-mail-sync" drops dangling sync information
-package PublicInbox::LeiPruneMailSync;
+# "lei refresh-mail-sync" drops dangling sync information
+# and attempts to detect moved files
+package PublicInbox::LeiRefreshMailSync;
 use strict;
 use v5.10.1;
 use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
 use PublicInbox::LeiExportKw;
 use PublicInbox::InboxWritable qw(eml_from_path);
+use PublicInbox::ContentHash qw(git_sha);
+use PublicInbox::Import;
 
 sub eml_match ($$) {
        my ($eml, $oidbin) = @_;
+       $eml->header_set($_) for @PublicInbox::Import::UNWANTED_HEADERS;
        $oidbin eq git_sha(length($oidbin) == 20 ? 1 : 256, $eml)->digest;
 }
 
@@ -20,7 +24,7 @@ sub prune_mdir { # lms->each_src callback
        for my $d (@try) {
                my $src = "$mdir/$d/$$id";
                if ($self->{verify}) {
-                       my $eml = eml_from_path($src) or next;
+                       my $eml = eml_from_path($src) // next;
                        return if eml_match($eml, $oidbin);
                } elsif (-f $src) {
                        return;
@@ -38,12 +42,27 @@ sub prune_imap { # lms->each_src callback
        $self->{lei}->{sto}->ipc_do('lms_clear_src', $url, $uid);
 }
 
+# detects missed file moves
+sub pmdir_cb { # called via LeiPmdir->each_mdir_fn
+       my ($self, $f, $fl) = @_;
+       my ($folder, $bn) = ($f =~ m!\A(.+?)/(?:new|cur)/([^/]+)\z!) or
+               die "BUG: $f was not from a Maildir?";
+       substr($folder, 0, 0) = 'maildir:'; # add prefix
+       my $lms = $self->{-lms_ro} //= $self->{lei}->lms;
+       return if defined($lms->name_oidbin($folder, $bn));
+       my $eml = eml_from_path($f) // return;
+       my $oidbin = $self->{lei}->git_oid($eml)->digest;
+       $self->{lei}->{sto}->ipc_do('lms_set_src', $oidbin, $folder, \$bn);
+}
+
 sub input_path_url { # overrides PublicInbox::LeiInput::input_path_url
        my ($self, $input, @args) = @_;
        my $lms = $self->{-lms_ro} //= $self->{lei}->lms;
        if ($input =~ /\Amaildir:(.+)/i) {
-               my $mdir = $1;
-               $lms->each_src($input, \&prune_mdir, $self, $mdir);
+               $lms->each_src($input, \&prune_mdir, $self, my $mdir = $1);
+               $self->{lse} //= $self->{lei}->{sto}->search;
+               # call pmdir_cb (via maildir_each_file -> each_mdir_fn)
+               PublicInbox::LeiInput::input_path_url($self, $input);
        } elsif ($input =~ m!\Aimaps?://!i) {
                my $uri = PublicInbox::URIimap->new($input);
                my $mic = $self->{lei}->{net}->mic_for_folder($uri);
@@ -51,10 +70,10 @@ sub input_path_url { # overrides 
PublicInbox::LeiInput::input_path_url
                $uids = +{ map { $_ => undef } @$uids };
                $lms->each_src($$uri, \&prune_imap, $self, $uids, $$uri);
        } else { die "BUG: $input not supported" }
-       my $wait = $self->{lei}->{sto}->ipc_do('done');
+       $self->{lei}->{pkt_op_p}->pkt_do('sto_done_request');
 }
 
-sub lei_prune_mail_sync {
+sub lei_refresh_mail_sync {
        my ($lei, @folders) = @_;
        my $sto = $lei->_lei_store or return $lei->fail(<<EOM);
 lei/store uninitialized, see lei-import(1)
@@ -78,7 +97,6 @@ EOM
        $self->prepare_inputs($lei, \@folders) or return;
        my $j = $lei->{opt}->{jobs} || scalar(@{$self->{inputs}}) || 1;
        my $ops = {};
-       $sto->write_prepare($lei);
        $lei->{auth}->op_merge($ops, $self) if $lei->{auth};
        $self->{-wq_nr_workers} = $j // 1; # locked
        (my $op_c, $ops) = $lei->workers_start($self, $j, $ops);
@@ -89,7 +107,7 @@ EOM
 }
 
 no warnings 'once';
-*_complete_prune_mail_sync = \&PublicInbox::LeiExportKw::_complete_export_kw;
+*_complete_refresh_mail_sync = \&PublicInbox::LeiExportKw::_complete_export_kw;
 *ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child;
 *net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;
 
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index e8bcb04e..32f55abd 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -293,6 +293,11 @@ sub set_sync_info {
        _lms_rw($self)->set_src(pack('H*', $oidhex), $folder, $id);
 }
 
+sub lms_set_src {
+       my ($self, $oidbin, $folder, $id) = @_;
+       _lms_rw($self)->set_src($oidbin, $folder, $id);
+}
+
 sub _remove_if_local { # git->cat_async arg
        my ($bref, $oidhex, $type, $size, $self) = @_;
        $self->{im}->remove($bref) if $bref;
diff --git a/t/lei-export-kw.t b/t/lei-export-kw.t
index 9531949a..1fe940bb 100644
--- a/t/lei-export-kw.t
+++ b/t/lei-export-kw.t
@@ -6,7 +6,6 @@ use File::Copy qw(cp);
 use File::Path qw(make_path);
 require_mods(qw(lei -imapd Mail::IMAPClient));
 my ($tmpdir, $for_destroy) = tmpdir;
-my ($ro_home, $cfg_path) = setup_public_inboxes;
 my $expect = eml_load('t/data/0001.patch');
 test_lei({ tmpdir => $tmpdir }, sub {
        my $home = $ENV{HOME};
diff --git a/t/lei-refresh-mail-sync.t b/t/lei-refresh-mail-sync.t
new file mode 100644
index 00000000..ff558277
--- /dev/null
+++ b/t/lei-refresh-mail-sync.t
@@ -0,0 +1,67 @@
+#!perl -w
+# Copyright (C) all contributors <[email protected]>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict; use v5.10.1; use PublicInbox::TestCommon;
+require_mods(qw(lei));
+
+my $stop_daemon = sub { # needed since we don't have inotify
+       lei_ok qw(daemon-pid);
+       chomp(my $pid = $lei_out);
+       $pid > 0 or xbail "bad pid: $pid";
+       kill('TERM', $pid) or xbail "kill: $!";
+       for (0..10) {
+               tick;
+               kill(0, $pid) or last;
+       }
+       kill(0, $pid) and xbail "daemon still running (PID:$pid)";
+};
+
+test_lei({ daemon_only => 1 }, sub {
+       my $d = "$ENV{HOME}/d";
+       my ($ro_home, $cfg_path) = setup_public_inboxes;
+       lei_ok qw(daemon-pid);
+       lei_ok qw(add-external), "$ro_home/t2";
+       lei_ok qw(q mid:[email protected] -o), "Maildir:$d";
+       my (@o) = glob("$d/*/*");
+       scalar(@o) == 1 or xbail('multiple results', \@o);
+       my ($bn0) = ($o[0] =~ m!/([^/]+)\z!);
+
+       my $oid = '9bf1002c49eb075df47247b74d69bcd555e23422';
+       lei_ok 'inspect', "blob:$oid";
+       my $before = json_utf8->decode($lei_out);
+       my $exp0 = { 'mail-sync' => { "maildir:$d" => [ $bn0 ] } };
+       is_deeply($before, $exp0, 'inspect shows expected');
+
+       $stop_daemon->();
+       my $dst = $o[0];
+       $dst =~ s/:2,.*\z// and $dst =~ s!/cur/!/new/! and
+               rename($o[0], $dst) or xbail "rename($o[0] => $dst): $!";
+
+       lei_ok 'inspect', "blob:$oid";
+       is_deeply(json_utf8->decode($lei_out),
+               $before, 'inspect unchanged immediately after restart');
+       lei_ok 'refresh-mail-sync', '--all';
+       lei_ok 'inspect', "blob:$oid";
+       my ($bn1) = ($dst =~ m!/([^/]+)\z!);
+       my $exp1 = { 'mail-sync' => { "maildir:$d" => [ $bn1 ] } };
+       is_deeply(json_utf8->decode($lei_out), $exp1,
+               'refresh-mail-sync updated location');
+
+       $stop_daemon->();
+       rename($dst, "$d/unwatched") or xbail "rename $dst out-of-the-way $!";
+
+       lei_ok 'refresh-mail-sync', $d;
+       lei_ok 'inspect', "blob:$oid";
+       is($lei_out, '{}', 'no known locations after "removal"');
+       lei_ok 'refresh-mail-sync', "Maildir:$d";
+
+       $stop_daemon->();
+       rename("$d/unwatched", $dst) or xbail "rename $dst back";
+
+       lei_ok 'refresh-mail-sync', "Maildir:$d";
+       lei_ok 'inspect', "blob:$oid";
+       is_deeply(json_utf8->decode($lei_out), $exp1,
+               'replaced file noted again');
+});
+
+done_testing;
--
unsubscribe: one-click, see List-Unsubscribe header
archive: https://public-inbox.org/meta/

Reply via email to