> for what it's worth, i stopped trying to do large-scale searching of
> my mail with the mh commands long ago, and instead use mairix:
+1 for mairix: it's fast and encoding-aware.
although, like Paul Fox says, the indexing is a bit slow: 15 seconds for
2.3GB of msgs on a 2.4 GHz Pentium4.
also, the syntax is CRAP so i wrote a wrapper around it.
steve
--
$ grep-mail -h
usage: grep-mail [args] [STR]
STR match STR
-STR do not match STR (must be two characters or more)
+FOLDER match messages in FOLDER
-u summary output
-w match whole strings
-N match STR with up to N errors
-t STR match STR in To: lines only
-c STR match STR in cc: lines only
-f STR match STR in From: lines only
-s STR match STR in Subject: lines only
-b STR match STR in message bodies only
-n STR match STR in attachment names only
-d BEGIN-END match messages with Date: between BEGIN to END
-x update the index database
-v print debug messages
$ time grep-mail paul vixie
matches+ has 37 messages (1-37); cur=1.
1 Mon 03/23/1998 10:07 To:NetSurfer Re: Criminal Activity on Your Net
2 Thu 06/04/1998 13:57 To:Michael Dorl Re: fyi | >> From: IN%"[email protected]
3 Wed 11/11/1998 18:28 Paul A Vixie gotta love those internic boyz |
4 Wed 11/11/1998 19:39 Paul A Vixie was i asleep when the gtld server
5 Wed 11/11/1998 20:07 Marc Slemko Re: was i asleep when the gtld se
6 Thu 11/12/1998 00:02 "Steven J. Sobol" Re: gotta love those internic boy
7 Sat 11/21/1998 07:01 Paul Vixie Re: NSI policy on lame delagation
8 Sat 11/21/1998 18:39 Paul A Vixie Re: NSI policy on lame delagation
9 Sun 11/22/1998 22:38 Paul Vixie Re: NSI policy on lame delagation
10 Sun 11/22/1998 23:53 Paul A Vixie Re: NSI policy on lame delagation
11 Mon 11/23/1998 10:28 To:Paul Vixie eta re way-cool 8.1.2 (was Re: NS
12 Mon 11/23/1998 12:09 Wayne Re: WebTV.Net: Censorship Central
13 Mon 11/23/1998 13:13 Paul A Vixie Re: eta re way-cool 8.1.2 (was Re
14 Mon 11/23/1998 22:22 Paul Vixie Re: NSI policy on lame delagation
15 Tue 11/24/1998 10:42 To:Paul A Vixie ideas for sending some $ twords I
16 Tue 11/24/1998 10:42 To:Paul A Vixie ideas for sending some $ twords I
17 Fri 01/15/1999 10:47 To:"Jonathan A. Z Re: nocolr | > > Wanna test drive
18 Fri 01/15/1999 10:47 To:"Jonathan A. Z Re: nocolr | > > Wanna test drive
19 Thu 02/11/1999 12:11 To:Paul A Vixie Re: eta re way-cool 8.1.2 (was Re
20 Fri 02/12/1999 11:00 To:registry-proje What now? [was Re: Templates, etc
21 Mon 03/01/1999 14:57 To:"Steven J. Sob Re: status | > > On Mon, Mar 01,
22 Tue 05/11/1999 11:04 "Susan R. Harris" Draft NANOG agenda | Greetings -
23 Thu 06/17/1999 13:15 Levon Esibov RE: SRV records in BIND? | >Micro
24 Thu 06/17/1999 14:13 Paul Vixie Re: SRV records in BIND? | This i
25 Tue 06/22/1999 11:32 To:Paul A Vixie Re: [BIND-BUGS #142] [ISC SUPPORT
26 Tue 06/22/1999 14:13 To:Paul A Vixie Re: [BIND-BUGS #142] [ISC SUPPORT
27 Fri 07/02/1999 13:17 To:Paul A Vixie Re: followup, re: dns cache corru
28 Tue 07/20/1999 09:03 To:Paul Vixie Re: $GENERATE or $EXEC? | > From:
29 Fri 08/27/1999 14:09 "Susan R. Harris" NANOG17 agenda topics | Greetings
30 Wed 10/13/1999 16:11 Sean Donelan Re: MCI Worldcom fiber cut in Whi
31 Sat 04/13/2002 12:49 Tobias Oetiker [rrd-users] rrdtool-1.0.36 is rel
32 Fri 04/16/2010 10:42 To:UW-Physics Hel Re: [issue6436] Mar Proposals [st
33 Sun 02/06/2011 01:51 bergman@merctech. Re: [Nmh-workers] indexing | The
34 Tue 02/08/2011 11:59 Joel Uckelman Re: [Nmh-workers] some indexing r
35 Tue 02/08/2011 19:26 Paul Vixie Re: [Nmh-workers] some indexing r
36 Tue 02/08/2011 15:28 Earl Hood Re: [Nmh-workers] some indexing r
37 Tue 02/08/2011 22:29 Paul Vixie Re: [Nmh-workers] some indexing r
real 0m0.294s
user 0m0.240s
sys 0m0.050s
#!/usr/bin/perl -w
use strict;
my $matchdir = '/home/rader/Mail/matches';
my $debug = 0;
my $matchwholewords = 0;
my $index = 0;
my $summary = 0;
my $args = '';
my $margs = '';
my $fuzz = '';
my $folder = '';
#------------------------------
while (@ARGV) {
my $arg = $ARGV[0];
if ( $debug ) { print "ARG $arg\n"; }
shift @ARGV;
if ( $arg eq '-h' ) { &usage; exit; }
if ( $arg eq '-v' ) { $debug = 1; next; }
if ( $arg eq '-x' ) { $index = 1; next; }
if ( $arg eq '-a' ) { $args .= " -a"; next; }
if ( $arg eq '-u' ) { $summary = 1; next; }
if ( $arg eq '-w' ) { $matchwholewords = 1; next; }
if ( $arg =~ /^-(\d+)/ ) { $fuzz = $1; next; }
if ( $arg =~ /^\+(.*)/ ) { $folder = $1; next; }
if ( $arg =~ /^-(.)$/ ) {
my $f = $1;
if ( $f !~ /^[tcfsbndx]{1}$/ ) { &usage; exit; }
if ( ! $ARGV[0] ) { &usage; exit; }
if ( $debug ) { print "ARG $ARGV[0]\n"; }
if ( $margs eq '' ) { $margs = "$f:"; } else { $margs = "$f$margs"; }
if ( $debug ) { print "MARGS $margs\n"; }
if ( $matchwholewords ) {
$args .= " $margs$ARGV[0]$fuzz";
} else {
$args .= " $margs$ARGV[0]=$fuzz";
}
shift @ARGV;
next;
}
$arg =~ s/^-/~/;
if ( $matchwholewords ) {
$args .= " $margs$arg";
} else {
$args .= " $margs$arg=";
}
}
if ( $debug ) { print "ARGS$args\n"; }
if ( $args eq '' && ! $index ) { &usage; exit; }
#------------------------------
if ( $index ) { &mairix_index; exit; }
#------------------------------
if ( $debug ) { print "EXEC mairix$args\n"; }
my $mout = `mairix$args 2>&1`;
if ( $mout !~ /^Matched/ ) {
print $mout;
exit;
}
#------------------------------
if ( $folder ) { &prune_search_results; }
#------------------------------
if ( $debug ) { print "EXEC sortm +matches -datefield date\n"; }
my $out = `sortm +matches -datefield date 2>&1`;
if ( ! $summary ) { print $out; }
if ( $debug ) { print "EXEC folder +matches -pack\n"; }
$out = `folder +matches -pack 2>&1`;
if ( ! $summary ) { print $out; }
if ( $debug ) { print "EXEC rm -f $matchdir/.mh_sequences\n"; }
`rm -f $matchdir/.mh_sequences 2>&1`;
if ( $debug ) { print "EXEC rm -f $matchdir/.xmhcache\n"; }
`rm -f $matchdir/.xmhcache 2>&1`;
#------------------------------
if ( $summary ) {
if ( $mout =~ /Matched 1 messages/ ) {
print "Matched 1 message\n";
} else {
print $mout;
}
exit;
}
if ( $debug ) { print "EXEC scan -form filter.scan.grep-mail +matches\n"; }
print `scan -form filter.scan.grep-mail +matches 2>&1`;
exit;
#---------------------------------------------------------------------------
sub mairix_index {
my $l = 0; my $w = 0; my $n = 0; my $p = 0;
if ( $debug ) { print "EXEC mairix -v -p\n"; }
open(IN,"mairix -v -p 2>&1 |");
while(<IN>) {
if ( $_ =~ /Loaded (\d+) existing/ ) { $l = $1; }
if ( $_ =~ /Wrote (\d+) messages/ ) { $w = $1; }
if ( $_ =~ /No new messages found/ ) { $n = 1; }
if ( $_ =~ /(\d+) newly dead messages/ ) { $p = $1; }
if ( $_ =~ /^Database .* appears to be locked/ ) {
print "Database is locked\n";
exit;
}
}
close(IN);
if ( $p == 1 ) { print "Removed 1 old message\n"; }
if ( $p > 1 ) { print "Removed $p old messages\n"; }
if ( $n == 0 ) {
$n = $w - $l + $p;
if ( $n == 1 ) {
print "Added 1 new message\n";
} else {
print "Added $n new messages\n";
}
} else {
print "No new messages found\n";
}
if ( $w == 1 ) {
print "Indexed 1 message\n";
} elsif ( $w > 0 ) {
print "Indexed $w messages\n";
} else {
print "Indexed $l messages\n";
}
}
#---------------------------------------------------------------------------
sub prune_search_results {
if ( $debug ) { print "EXEC prune...\n"; }
my $i = 0;
opendir(DIR,"$matchdir");
for my $f (readdir(DIR)) {
if ( $f !~ /^\d+$/ ) { next; }
my $dst = readlink("$matchdir/$f");
if ($dst =~ /\/$folder\/\d+$/ ) {
$i++;
} else {
if ( $debug ) { print("REMOVE $matchdir/$f -> $dst\n"); }
print `rm -f $matchdir/$f`;
}
}
closedir(DIR);
$mout = "Matched $i messages\n";
}
#---------------------------------------------------------------------------
sub usage {
print <<EOT;
usage: grep-mail [args] [STR]
STR match STR
-STR do not match STR (must be two characters or more)
+FOLDER match messages in FOLDER
-u summary output
-w match whole strings
-N match STR with up to N errors
-t STR match STR in To: lines only
-c STR match STR in cc: lines only
-f STR match STR in From: lines only
-s STR match STR in Subject: lines only
-b STR match STR in message bodies only
-n STR match STR in attachment names only
-d BEGIN-END match messages with Date: between BEGIN to END
-x update the index database
-v print debug messages
EOT
}
_______________________________________________
Nmh-workers mailing list
[email protected]
http://lists.nongnu.org/mailman/listinfo/nmh-workers