Since we store all watched directory names as keys in %mdmap,
there should be no need to keep an array of those directories
around.
t/watch_maildir*.t required changes to remove trained spam.
Once we've trained something as spam, there shouldn't be
a need to rescan it.
---
lib/PublicInbox/WatchMaildir.pm | 22 ++++++++--------------
t/watch_maildir.t | 2 ++
t/watch_maildir_v2.t | 2 ++
3 files changed, 12 insertions(+), 14 deletions(-)
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index 621d41bd81d..8d2dc432684 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -40,8 +40,7 @@ sub compile_watchheaders ($) {
sub new {
my ($class, $config) = @_;
- my (%mdmap, @mdir, $spamc);
- my %uniq; # directory => count
+ my (%mdmap, $spamc);
my %imap; # url => [inbox objects] or 'watchspam'
# "publicinboxwatch" is the documented namespace
@@ -54,10 +53,7 @@ sub new {
for my $dir (@$dirs) {
if (is_maildir($dir)) {
# skip "new", no MUA has seen it, yet.
- my $cur = "$dir/cur";
- push @mdir, $cur;
- $uniq{$cur}++;
- $mdmap{$cur} = 'watchspam';
+ $mdmap{"$dir/cur"} = 'watchspam';
} elsif (my $url = imap_url($dir)) {
$imap{$url} = 'watchspam';
} else {
@@ -83,8 +79,6 @@ sub new {
my ($new, $cur) = ("$watch/new", "$watch/cur");
my $cur_dst = $mdmap{$cur} //= [];
return if is_watchspam($cur, $cur_dst, $ibx);
- push @mdir, $new unless $uniq{$new}++;
- push @mdir, $cur unless $uniq{$cur}++;
push @{$mdmap{$new} //= []}, $ibx;
push @$cur_dst, $ibx;
} elsif (my $url = imap_url($watch)) {
@@ -96,17 +90,16 @@ sub new {
}
}
});
- return unless scalar(@mdir) || scalar(keys %imap);
my $mdre;
- if (@mdir) {
- $mdre = join('|', map { quotemeta($_) } @mdir);
+ if (scalar keys %mdmap) {
+ $mdre = join('|', map { quotemeta($_) } keys %mdmap);
$mdre = qr!\A($mdre)/!;
}
+ return unless $mdre || scalar(keys %imap);
bless {
spamcheck => $spamcheck,
mdmap => \%mdmap,
- mdir => \@mdir,
mdre => $mdre,
config => $config,
imap => scalar keys %imap ? \%imap : undef,
@@ -231,7 +224,8 @@ sub watch_fs_init ($) {
$self->{done_timer} //= PublicInbox::DS::requeue($done);
};
require PublicInbox::DirIdle;
- PublicInbox::DirIdle->new($self->{mdir}, $cb); # EPOLL_CTL_ADD
+ # inotify_create + EPOLL_CTL_ADD
+ PublicInbox::DirIdle->new([keys %{$self->{mdmap}}], $cb);
}
# returns the git config section name, e.g [imap "imaps://[email protected]"]
@@ -688,7 +682,7 @@ sub fs_scan_step {
$opendirs->{$dir} = $dh if $n < 0;
}
if ($op && $op eq 'full') {
- foreach my $dir (@{$self->{mdir}}) {
+ foreach my $dir (keys %{$self->{mdmap}}) {
next if $opendirs->{$dir}; # already in progress
my $ok = opendir(my $dh, $dir);
unless ($ok) {
diff --git a/t/watch_maildir.t b/t/watch_maildir.t
index c8658140cf2..c44273f0519 100644
--- a/t/watch_maildir.t
+++ b/t/watch_maildir.t
@@ -84,6 +84,7 @@ PublicInbox::WatchMaildir->new($config)->scan('full');
is(scalar @list, 2, 'two revisions in rev-list');
@list = $git->qx(qw(ls-tree -r --name-only refs/heads/master));
is(scalar @list, 0, 'tree is empty');
+is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam');
# check with scrubbing
{
@@ -105,6 +106,7 @@ More majordomo info at
http://vger.kernel.org/majordomo-info.html\n);
is(scalar @list, 0, 'tree is empty');
@list = $git->qx(qw(rev-list refs/heads/master));
is(scalar @list, 4, 'four revisions in rev-list');
+ is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam');
}
{
diff --git a/t/watch_maildir_v2.t b/t/watch_maildir_v2.t
index 6cc8b6ff0e9..f5b8e932985 100644
--- a/t/watch_maildir_v2.t
+++ b/t/watch_maildir_v2.t
@@ -71,6 +71,7 @@ $write_spam->();
is(unlink(glob("$maildir/new/*")), 1, 'unlinked old spam');
PublicInbox::WatchMaildir->new($config)->scan('full');
is(($srch->reopen->query(''))[0], 0, 'deleted file');
+is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam');
# check with scrubbing
{
@@ -90,6 +91,7 @@ More majordomo info at
http://vger.kernel.org/majordomo-info.html\n);
PublicInbox::WatchMaildir->new($config)->scan('full');
($nr, $msgs) = $srch->reopen->query('');
is($nr, 0, 'inbox is empty again');
+ is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam');
}
{
--
unsubscribe: one-click, see List-Unsubscribe header
archive: https://public-inbox.org/meta/