A member of this list suggested on clpm that I might post a script to this list to see if it were suitable for public consumption. I have appended the script to this message and attached a copy. While I am aware that, at minimum, the script will need a little work to be CPAN worthy, any feedback will be received with great interest. Ronald Schmidt [EMAIL PROTECTED] #!/usr/bin/perl -w ###################################################################### # # Program: fetch_unanswered.pl # # Retrieve articles from one newsgroup to which no reply # has yet been posted. Articles are all printed to the # standard output. # # options: # -g <news group name> override default news group # -j Turn off threading of articles by subject. # -n <limit> Fetch at most <limit> NOV records # with one request to server. # -s <news server name> override default news server # # Current version of this program will be accessible from. # http://www.software-path.com/scripts.html # # Please send any comments to: [EMAIL PROTECTED] # # A version with a reply feature exists. The reply feature is not included # here since it requires about 200 lines of additional unrelated code and # belongs in a separate script. CPAN script submission currently requires # that "It must be a single file ...". Contact the author if interested in # the reply feature. # ###################################################################### use strict; my $VERSION = 0.15; use vars qw($opt_g $opt_j $opt_n $opt_s $VERSION); # server will be set to (in order of decreasing priority) # -s command line parameter # NNTPSERVER environment variable # /etc/nntpserver # default set here my $server; my $default_server = 'news.compuserve.com'; my $group = 'comp.lang.perl.misc'; my $xover_batch_size = 500; my $default_email = '[EMAIL PROTECTED]'; ###################################################################### # "Nice to have" enhancements: # support for newnews # time estimation # FAQ filtering option/kill file. ###################################################################### ###################################################################### # A compatible News::NNTPClient module may be retrieved from URL: # http://www.perl.com/CPAN/authors/id/RVA/NNTPClient-0.36.tar.gz ###################################################################### use News::NNTPClient; use Getopt::Std; my $news_client; my %unanswered = (); my %record_dup_subj = (); my ($first_num, $last_num); ###################################################################### # Print a status message to STDERR. If caller does not provide # line termination then terminate line with time stamp and LF("\n"). ###################################################################### sub post_console_message { print STDERR @_; print STDERR " (", scalar(localtime()), ")\n" unless ( $_[$#_] =~ /\n/ # Last parm has LF. ); } ###################################################################### # Here we remove messages with subjects that look like replies and # begin to track groups of messages with the same subject. # User may request no filter by subject. ###################################################################### sub FilterSubject { my $msg_id = shift; my $subj = lc(shift); my $has_ref = shift; $subj =~ s/^\s*//; $subj =~ s/\s*$//; # if subject filtering remove msgs with subject that looks like reply delete $unanswered{$msg_id} if ( ($subj =~ s/^re(\:?)\s+//) && (! $has_ref) ); # List of message id's by subject. Advanced technique - sorry! push @{$record_dup_subj{$subj}}, $msg_id; } ###################################################################### # Look through duplicate subject hash for cases where multiple messages # had the same subject and remove their message id's from the unanswered # list. ###################################################################### sub RemoveDuplicateSubject { foreach my $msg_id_lh (values %record_dup_subj) { if (scalar(@$msg_id_lh) > 1) { foreach my $dup_msg_id (@$msg_id_lh) { delete $unanswered{$dup_msg_id}; } } } } ###################################################################### # Use NNTP XOVER request to fetch header information needed to # determine which articles have not yet received a response. # This is one of the more efficient approaches. ###################################################################### sub SetUnansweredXover { my ($news_client, $first_num, $last_num, $batch_size) = @_; my ($batch_first, $batch_last); my $overview_fmt; my ($i, %overview_fields, $id_field, $ref_field, $subject_field); my @all_ref; $overview_fmt = $news_client->list('overview.fmt'); die $news_client->message() unless ($news_client->ok()); %overview_fields = map((uc($_), $i++), grep(s/\s*$//, @$overview_fmt)); $id_field = $overview_fields{'MESSAGE-ID:'}; $ref_field = $overview_fields{'REFERENCES:'}; $subject_field = $overview_fields{'SUBJECT:'}; for ( $batch_first = $first_num, $batch_last = $first_num + $batch_size -1; $batch_first < $last_num; $batch_first = $batch_last + 1, $batch_last = $batch_first + $batch_size -1 ) { $batch_last = $last_num if ($batch_last > $last_num); foreach my $xover_line ($news_client->xover("${batch_first}-${batch_last}")) { my ($msg_num, $msg_id, $ref, $subject) = (split /\t/, $xover_line) [0, $id_field +1, $ref_field +1, $subject_field +1]; my $has_ref = (defined($ref) && $ref); if ($has_ref) { foreach my $ref_id (split(' ', $ref)) { delete $unanswered{$ref_id}; } } else { $unanswered{$msg_id} = $msg_num; } FilterSubject($msg_id, $subject, $has_ref) unless ($opt_j); } post_console_message 'Processed requests for ', $batch_last - $first_num +1, " NOV records of ", $last_num - $first_num +1, '.'; } } ###################################################################### # Fetch each article header, one at a time, to determine which # articles have not yet received any response. # This is a very inefficient approach but does not require any # NNTP extension services. ###################################################################### sub SetUnansweredHead { my ($news_client, $first_num, $last_num) = @_; my ($article_num, $err_count); my $i = 0; for ( $article_num = $first_num; $article_num <= $last_num; $article_num++) { my $head; my ($msg_id, $ref_id); post_console_message("counting heads: $i") if ((++$i % 100)==0); $head = $news_client->head($article_num); unless ($news_client->ok()) { $err_count++ if ( $news_client->message() !~ /bad article number/i ); next; } ($msg_id) = grep(/Message\-ID\:/i, @$head); ($msg_id) = ($msg_id =~ /Message\-ID\: (\<.*?\>)/i); ($ref_id) = grep(/References\:/i, @$head); if (defined $ref_id) { ($ref_id) = ($ref_id =~ /References\: (\<.*?\>)/i); delete $unanswered{$ref_id}; } else { $unanswered{$msg_id} = $article_num; } unless ($opt_j) { my ($subject) = grep(/Subject\:/i, @$head); ($subject) = ($subject =~ /Subject: (.*)/i); FilterSubject($msg_id, $subject, defined($ref_id)); } } post_console_message("counting heads: $i") unless (($i % 100)==0); post_console_message("*Warning* errors: $err_count.") if ($err_count); } ###################################################################### # Here we expend too much effort to be platform independent. # We really should `cat ...` ###################################################################### sub read_etc_nntpserver { my $rc; open(FH, '</etc/nntpserver') || return undef; $rc = scalar(<FH>); close(FH); $rc =~ s/\s*$//; return $rc || undef; } ###################################################################### # Start of program. ###################################################################### # process command line options getopts("g:jn:rs:"); $server = $opt_s if (defined($opt_s)); $group = $opt_g if (defined($opt_g)); $xover_batch_size = $opt_n if (defined($opt_n)); $server = $ENV{'NNTPSERVER'} if ( (! defined($server)) && $ENV{'NNTPSERVER'} ); $server = read_etc_nntpserver() if ( (! defined($server)) && (-r '/etc/nntpserver') ); $server = $default_server unless(defined $server); # connect to news server $news_client = new News::NNTPClient($server); unless ($news_client->ok()) { $news_client->quit(); die $news_client->message(); } $news_client->debug(0); $news_client->mode_reader(); # get news article number range ($first_num, $last_num) = $news_client->group($group); die $news_client->message() unless ($news_client->ok()); # Test scaffolding. Under Linux this forces overview analysis to fail. # $news_client->quit(); # $news_client = new News::NNTPClient($server); # $news_client->debug(0); post_console_message('Finding unanswered articles.'); ###################################################################### # The actual work of deciding which articles for the group are # unanswered is done here. ###################################################################### eval { SetUnansweredXover( $news_client, $first_num, $last_num, $xover_batch_size ); }; if ($@) { post_console_message 'Xover failed; trying one message at a time. ', 'This may take a while.', "\n"; # more test scaffolding # $news_client->mode_reader(); # $news_client->group($group); SetUnansweredHead($news_client, $first_num, $last_num); } unless ($opt_j) { RemoveDuplicateSubject(); %record_dup_subj = (); # free what may be substantial memory } ###################################################################### # End of "find unanswered" code block. ###################################################################### post_console_message('Done finding unanswered articles.'); post_console_message('Fetching ', scalar(keys %unanswered), ' unanswered articles.'); # Fetch each unanswered article from the news server # and print it to the standard output. foreach my $article_id ( sort {$unanswered{$b} <=> $unanswered{$a}} keys(%unanswered) ) { my $msg = $news_client->article($article_id); print @$msg; } post_console_message('Done.'); $news_client->quit(); =head1 NAME fetch_unanswered.pl - Retrieve news articles that do not have a reply. =head1 DESCRIPTION Retrieve articles from one newsgroup to which no reply has yet been posted. Articles are all printed to the standard output and status messages are printed to STDERR. =head1 COMMAND LINE OPTIONS =over 4 =item -g <news group name> Override default news group. Default is comp.lang.perl.misc. =item -j Turn off threading of articles by subject. Turning this off also saves (some) time and memory. Article threading eliminates articles starting with 'Re:' and groups of articles with the same subject. =item -n <NOV record batch size> Limit number of NOV records we read from server with one request. A small number will result in more frequent feedback to the user. =item -s <news server name> Override default news server. Default is: (in order of decreasing priority) value of NNTPSERVER environment variable value from /etc/nntpserver file value set at start of fetch_unanswered.pl source code. =back 4 =head1 PREREQUISITES This script requires the C<strict> module. It also requires C<Getopt::Std> and C<News::NNTPClient>. =pod OSNAMES any =pod SCRIPT CATEGORIES Networking =cut
#!/usr/bin/perl -w ###################################################################### # # Program: fetch_unanswered.pl # # Retrieve articles from one newsgroup to which no reply # has yet been posted. Articles are all printed to the # standard output. # # options: # -g <news group name> override default news group # -j Turn off threading of articles by subject. # -n <limit> Fetch at most <limit> NOV records # with one request to server. # -s <news server name> override default news server # # Current version of this program will be accessible from. # http://www.software-path.com/scripts.html # # Please send any comments to: [EMAIL PROTECTED] # # A version with a reply feature exists. The reply feature is not included # here since it requires about 200 lines of additional unrelated code and # belongs in a separate script. CPAN script submission currently requires # that "It must be a single file ...". Contact the author if interested in # the reply feature. # ###################################################################### use strict; my $VERSION = 0.15; use vars qw($opt_g $opt_j $opt_n $opt_s $VERSION); # server will be set to (in order of decreasing priority) # -s command line parameter # NNTPSERVER environment variable # /etc/nntpserver # default set here my $server; my $default_server = 'news.compuserve.com'; my $group = 'comp.lang.perl.misc'; my $xover_batch_size = 500; my $default_email = '[EMAIL PROTECTED]'; ###################################################################### # "Nice to have" enhancements: # support for newnews # time estimation # FAQ filtering option/kill file. ###################################################################### ###################################################################### # A compatible News::NNTPClient module may be retrieved from URL: # http://www.perl.com/CPAN/authors/id/RVA/NNTPClient-0.36.tar.gz ###################################################################### use News::NNTPClient; use Getopt::Std; my $news_client; my %unanswered = (); my %record_dup_subj = (); my ($first_num, $last_num); ###################################################################### # Print a status message to STDERR. If caller does not provide # line termination then terminate line with time stamp and LF("\n"). ###################################################################### sub post_console_message { print STDERR @_; print STDERR " (", scalar(localtime()), ")\n" unless ( $_[$#_] =~ /\n/ # Last parm has LF. ); } ###################################################################### # Here we remove messages with subjects that look like replies and # begin to track groups of messages with the same subject. # User may request no filter by subject. ###################################################################### sub FilterSubject { my $msg_id = shift; my $subj = lc(shift); my $has_ref = shift; $subj =~ s/^\s*//; $subj =~ s/\s*$//; # if subject filtering remove msgs with subject that looks like reply delete $unanswered{$msg_id} if ( ($subj =~ s/^re(\:?)\s+//) && (! $has_ref) ); # List of message id's by subject. Advanced technique - sorry! push @{$record_dup_subj{$subj}}, $msg_id; } ###################################################################### # Look through duplicate subject hash for cases where multiple messages # had the same subject and remove their message id's from the unanswered # list. ###################################################################### sub RemoveDuplicateSubject { foreach my $msg_id_lh (values %record_dup_subj) { if (scalar(@$msg_id_lh) > 1) { foreach my $dup_msg_id (@$msg_id_lh) { delete $unanswered{$dup_msg_id}; } } } } ###################################################################### # Use NNTP XOVER request to fetch header information needed to # determine which articles have not yet received a response. # This is one of the more efficient approaches. ###################################################################### sub SetUnansweredXover { my ($news_client, $first_num, $last_num, $batch_size) = @_; my ($batch_first, $batch_last); my $overview_fmt; my ($i, %overview_fields, $id_field, $ref_field, $subject_field); my @all_ref; $overview_fmt = $news_client->list('overview.fmt'); die $news_client->message() unless ($news_client->ok()); %overview_fields = map((uc($_), $i++), grep(s/\s*$//, @$overview_fmt)); $id_field = $overview_fields{'MESSAGE-ID:'}; $ref_field = $overview_fields{'REFERENCES:'}; $subject_field = $overview_fields{'SUBJECT:'}; for ( $batch_first = $first_num, $batch_last = $first_num + $batch_size -1; $batch_first < $last_num; $batch_first = $batch_last + 1, $batch_last = $batch_first + $batch_size -1 ) { $batch_last = $last_num if ($batch_last > $last_num); foreach my $xover_line ($news_client->xover("${batch_first}-${batch_last}")) { my ($msg_num, $msg_id, $ref, $subject) = (split /\t/, $xover_line) [0, $id_field +1, $ref_field +1, $subject_field +1]; my $has_ref = (defined($ref) && $ref); if ($has_ref) { foreach my $ref_id (split(' ', $ref)) { delete $unanswered{$ref_id}; } } else { $unanswered{$msg_id} = $msg_num; } FilterSubject($msg_id, $subject, $has_ref) unless ($opt_j); } post_console_message 'Processed requests for ', $batch_last - $first_num +1, " NOV records of ", $last_num - $first_num +1, '.'; } } ###################################################################### # Fetch each article header, one at a time, to determine which # articles have not yet received any response. # This is a very inefficient approach but does not require any # NNTP extension services. ###################################################################### sub SetUnansweredHead { my ($news_client, $first_num, $last_num) = @_; my ($article_num, $err_count); my $i = 0; for ( $article_num = $first_num; $article_num <= $last_num; $article_num++) { my $head; my ($msg_id, $ref_id); post_console_message("counting heads: $i") if ((++$i % 100)==0); $head = $news_client->head($article_num); unless ($news_client->ok()) { $err_count++ if ( $news_client->message() !~ /bad article number/i ); next; } ($msg_id) = grep(/Message\-ID\:/i, @$head); ($msg_id) = ($msg_id =~ /Message\-ID\: (\<.*?\>)/i); ($ref_id) = grep(/References\:/i, @$head); if (defined $ref_id) { ($ref_id) = ($ref_id =~ /References\: (\<.*?\>)/i); delete $unanswered{$ref_id}; } else { $unanswered{$msg_id} = $article_num; } unless ($opt_j) { my ($subject) = grep(/Subject\:/i, @$head); ($subject) = ($subject =~ /Subject: (.*)/i); FilterSubject($msg_id, $subject, defined($ref_id)); } } post_console_message("counting heads: $i") unless (($i % 100)==0); post_console_message("*Warning* errors: $err_count.") if ($err_count); } ###################################################################### # Here we expend too much effort to be platform independent. # We really should `cat ...` ###################################################################### sub read_etc_nntpserver { my $rc; open(FH, '</etc/nntpserver') || return undef; $rc = scalar(<FH>); close(FH); $rc =~ s/\s*$//; return $rc || undef; } ###################################################################### # Start of program. ###################################################################### # process command line options getopts("g:jn:rs:"); $server = $opt_s if (defined($opt_s)); $group = $opt_g if (defined($opt_g)); $xover_batch_size = $opt_n if (defined($opt_n)); $server = $ENV{'NNTPSERVER'} if ( (! defined($server)) && $ENV{'NNTPSERVER'} ); $server = read_etc_nntpserver() if ( (! defined($server)) && (-r '/etc/nntpserver') ); $server = $default_server unless(defined $server); # connect to news server $news_client = new News::NNTPClient($server); unless ($news_client->ok()) { $news_client->quit(); die $news_client->message(); } $news_client->debug(0); $news_client->mode_reader(); # get news article number range ($first_num, $last_num) = $news_client->group($group); die $news_client->message() unless ($news_client->ok()); # Test scaffolding. Under Linux this forces overview analysis to fail. # $news_client->quit(); # $news_client = new News::NNTPClient($server); # $news_client->debug(0); post_console_message('Finding unanswered articles.'); ###################################################################### # The actual work of deciding which articles for the group are # unanswered is done here. ###################################################################### eval { SetUnansweredXover( $news_client, $first_num, $last_num, $xover_batch_size ); }; if ($@) { post_console_message 'Xover failed; trying one message at a time. ', 'This may take a while.', "\n"; # more test scaffolding # $news_client->mode_reader(); # $news_client->group($group); SetUnansweredHead($news_client, $first_num, $last_num); } unless ($opt_j) { RemoveDuplicateSubject(); %record_dup_subj = (); # free what may be substantial memory } ###################################################################### # End of "find unanswered" code block. ###################################################################### post_console_message('Done finding unanswered articles.'); post_console_message('Fetching ', scalar(keys %unanswered), ' unanswered articles.'); # Fetch each unanswered article from the news server # and print it to the standard output. foreach my $article_id ( sort {$unanswered{$b} <=> $unanswered{$a}} keys(%unanswered) ) { my $msg = $news_client->article($article_id); print @$msg; } post_console_message('Done.'); $news_client->quit(); =head1 NAME fetch_unanswered.pl - Retrieve news articles that do not have a reply. =head1 DESCRIPTION Retrieve articles from one newsgroup to which no reply has yet been posted. Articles are all printed to the standard output and status messages are printed to STDERR. =head1 COMMAND LINE OPTIONS =over 4 =item -g <news group name> Override default news group. Default is comp.lang.perl.misc. =item -j Turn off threading of articles by subject. Turning this off also saves (some) time and memory. Article threading eliminates articles starting with 'Re:' and groups of articles with the same subject. =item -n <NOV record batch size> Limit number of NOV records we read from server with one request. A small number will result in more frequent feedback to the user. =item -s <news server name> Override default news server. Default is: (in order of decreasing priority) value of NNTPSERVER environment variable value from /etc/nntpserver file value set at start of fetch_unanswered.pl source code. =back 4 =head1 PREREQUISITES This script requires the C<strict> module. It also requires C<Getopt::Std> and C<News::NNTPClient>. =pod OSNAMES any =pod SCRIPT CATEGORIES Networking =cut
