asdasd
<[EMAIL PROTECTED]> wrote in message news:[EMAIL PROTECTED]
> (This time hopefully with attachment.  Sorry!)
> A member of this list suggested on clpm that I might post a script to this
> list to see if it were suitable for public consumption.  I have appended 
> the
> script to this message and attached a copy.  While I am aware that, at
> minimum, the script will need a little work to be CPAN worthy, any 
> feedback
> will be received with great interest.
>
> Ronald Schmidt
> [EMAIL PROTECTED]
>
> #!/usr/bin/perl -w
>
> ######################################################################
> #
> # Program: fetch_unanswered.pl
> #
> #       Retrieve articles from one newsgroup to which no reply
> #       has yet been posted.  Articles are all printed to the
> #       standard output.
> #
> #       options:
> #               -g <news group name>    override default news group
> #               -j Turn off threading of articles by subject.
> #               -n <limit>              Fetch at most <limit> NOV records
> #                                       with one request to server.
> #               -s <news server name>   override default news server
> #
> # Current version of this program will be accessible from.
> #       http://www.software-path.com/scripts.html
> #
> # Please send any comments to: [EMAIL PROTECTED]
> #
> # A version with a reply feature exists.  The reply feature is not 
> included
> # here since it requires about 200 lines of additional unrelated code and
> # belongs in a separate script. CPAN script submission currently requires
> # that "It must be a single file ...". Contact the author if interested in
> # the reply feature.
> #
> ######################################################################
>
> use strict;
>
> my $VERSION = 0.15;
>
> use vars qw($opt_g $opt_j $opt_n $opt_s $VERSION);
>
> # server will be set to (in order of decreasing priority)
> #       -s command line parameter
> #       NNTPSERVER environment variable
> #       /etc/nntpserver
> #       default set here
> my $server;
> my $default_server = 'news.compuserve.com';
>
> my $group = 'comp.lang.perl.misc';
> my $xover_batch_size = 500;
> my $default_email = '[EMAIL PROTECTED]';
>
> ######################################################################
> # "Nice to have" enhancements:
> #       support for newnews
> #       time estimation
> #       FAQ filtering option/kill file.
> ######################################################################
>
> ######################################################################
> # A compatible News::NNTPClient module may be retrieved from URL:
> #       http://www.perl.com/CPAN/authors/id/RVA/NNTPClient-0.36.tar.gz
> ######################################################################
> use News::NNTPClient;
> use Getopt::Std;
>
> my $news_client;
> my %unanswered = ();
> my %record_dup_subj = ();
> my ($first_num, $last_num);
>
> ######################################################################
> # Print a status message to STDERR.  If caller does not provide
> # line termination then terminate line with time stamp and LF("\n").
> ######################################################################
> sub post_console_message {
>        print STDERR @_;
>        print STDERR " (", scalar(localtime()), ")\n" unless (
>                $_[$#_] =~ /\n/         # Last parm has LF.
>        );
> }
>
> ######################################################################
> # Here we remove messages with subjects that look like replies and
> # begin to track groups of messages with the same subject.
> # User may request no filter by subject.
> ######################################################################
> sub FilterSubject {
>        my $msg_id = shift;
>        my $subj = lc(shift);
>        my $has_ref = shift;
>
>        $subj =~ s/^\s*//;
>        $subj =~ s/\s*$//;
>
>        # if subject filtering remove msgs with subject that looks like 
> reply
>        delete $unanswered{$msg_id} if (
>                ($subj =~ s/^re(\:?)\s+//) && (! $has_ref)
>        );
>
>        # List of message id's by subject.  Advanced technique - sorry!
>        push @{$record_dup_subj{$subj}}, $msg_id;
> }
>
> ######################################################################
> # Look through duplicate subject hash for cases where multiple messages
> # had the same subject and remove their message id's from the unanswered
> # list.
> ######################################################################
> sub RemoveDuplicateSubject {
>        foreach my $msg_id_lh (values %record_dup_subj) {
>                if (scalar(@$msg_id_lh) > 1) {
>                        foreach my $dup_msg_id (@$msg_id_lh) {
>                                delete $unanswered{$dup_msg_id};
>                        }
>                }
>        }
> }
>
> ######################################################################
> # Use NNTP XOVER request to fetch header information needed to
> # determine which articles have not yet received a response.
> # This is one of the more efficient approaches.
> ######################################################################
> sub SetUnansweredXover {
>        my ($news_client, $first_num, $last_num, $batch_size) = @_;
>
>        my ($batch_first, $batch_last);
>        my $overview_fmt;
>        my ($i, %overview_fields, $id_field, $ref_field, $subject_field);
>        my @all_ref;
>
>        $overview_fmt = $news_client->list('overview.fmt');
>        die $news_client->message() unless ($news_client->ok());
>
>        %overview_fields = map((uc($_), $i++),
>                grep(s/\s*$//, @$overview_fmt));
>        $id_field = $overview_fields{'MESSAGE-ID:'};
>        $ref_field = $overview_fields{'REFERENCES:'};
>        $subject_field = $overview_fields{'SUBJECT:'};
>
>        for (   $batch_first = $first_num,
>                $batch_last = $first_num + $batch_size -1;
>                $batch_first < $last_num;
>                $batch_first = $batch_last + 1,
>                $batch_last = $batch_first + $batch_size -1
>        ) {
>                $batch_last = $last_num if ($batch_last > $last_num);
>                foreach my $xover_line
> 
> ($news_client->xover("${batch_first}-${batch_last}"))
> {
>                        my ($msg_num, $msg_id, $ref, $subject) =
>                                (split /\t/, $xover_line)
>                                 [0, $id_field +1,
>                                        $ref_field +1, $subject_field +1];
>                        my $has_ref = (defined($ref) && $ref);
>                        if ($has_ref) {
>                                foreach my $ref_id (split(' ', $ref)) {
>                                        delete $unanswered{$ref_id};
>                                }
>                        }
>                        else {
>                                $unanswered{$msg_id} = $msg_num;
>                        }
>                        FilterSubject($msg_id, $subject, $has_ref)
>                                unless ($opt_j);
>                }
>                post_console_message 'Processed requests for ',
>                        $batch_last - $first_num +1,
>                        " NOV records of ", $last_num - $first_num +1, '.';
>        }
> }
>
> ######################################################################
> # Fetch each article header, one at a time, to determine which
> # articles have not yet received any response.
> # This is a very inefficient approach but does not require any
> # NNTP extension services.
> ######################################################################
> sub SetUnansweredHead {
>        my ($news_client, $first_num, $last_num) = @_;
>
>        my ($article_num, $err_count);
>        my $i = 0;
>
>        for (   $article_num = $first_num;
>                $article_num <= $last_num;
>                $article_num++) {
>                my $head;
>                my ($msg_id, $ref_id);
>
>                post_console_message("counting heads: $i") if ((++$i %
> 100)==0);
>                $head = $news_client->head($article_num);
>                unless ($news_client->ok()) {
>                        $err_count++ if (
>                                $news_client->message() !~
>                                        /bad article number/i
>                        );
>                        next;
>                }
>
>                ($msg_id) = grep(/Message\-ID\:/i, @$head);
>                ($msg_id) = ($msg_id =~ /Message\-ID\: (\<.*?\>)/i);
>                ($ref_id) = grep(/References\:/i, @$head);
>                if (defined $ref_id) {
>                        ($ref_id) = ($ref_id =~ /References\: (\<.*?\>)/i);
>                        delete $unanswered{$ref_id};
>                }
>                else {
>                        $unanswered{$msg_id} = $article_num;
>                }
>                unless ($opt_j) {
>                        my ($subject) = grep(/Subject\:/i, @$head);
>                        ($subject) = ($subject =~ /Subject: (.*)/i);
>                        FilterSubject($msg_id, $subject, defined($ref_id));
>                }
>        }
>        post_console_message("counting heads: $i") unless (($i % 100)==0);
>        post_console_message("*Warning* errors: $err_count.") if 
> ($err_count);
> }
>
> ######################################################################
> # Here we expend too much effort to be platform independent.
> # We really should `cat ...`
> ######################################################################
> sub read_etc_nntpserver {
>        my $rc;
>
>        open(FH, '</etc/nntpserver') || return undef;
>        $rc = scalar(<FH>);
>        close(FH);
>        $rc =~ s/\s*$//;
>        return $rc || undef;
> }
>
> ######################################################################
> # Start of program.
> ######################################################################
>
> # process command line options
> getopts("g:jn:rs:");
> $server = $opt_s if (defined($opt_s));
> $group = $opt_g if (defined($opt_g));
> $xover_batch_size = $opt_n if (defined($opt_n));
>
> $server = $ENV{'NNTPSERVER'} if (
>        (! defined($server))            &&
>        $ENV{'NNTPSERVER'}
> );
> $server = read_etc_nntpserver() if (
>        (! defined($server))            &&
>        (-r '/etc/nntpserver')
> );
> $server = $default_server unless(defined $server);
>
> # connect to news server
> $news_client = new News::NNTPClient($server);
> unless ($news_client->ok()) {
>        $news_client->quit();
>        die $news_client->message();
> }
>
> $news_client->debug(0);
> $news_client->mode_reader();
>
> # get news article number range
> ($first_num, $last_num) = $news_client->group($group);
> die $news_client->message() unless ($news_client->ok());
>
> # Test scaffolding.  Under Linux this forces overview analysis to fail.
> # $news_client->quit();
> # $news_client = new News::NNTPClient($server);
> # $news_client->debug(0);
>
> post_console_message('Finding unanswered articles.');
>
> ######################################################################
> # The actual work of deciding which articles for the group are
> # unanswered is done here.
> ######################################################################
> eval {
>        SetUnansweredXover(
>                $news_client, $first_num, $last_num, $xover_batch_size
>        );
> };
> if ($@) {
>        post_console_message 'Xover failed; trying one message at a time. 
> ',
>                'This may take a while.', "\n";
>
> # more test scaffolding
> #        $news_client->mode_reader();
> #        $news_client->group($group);
>
>       SetUnansweredHead($news_client, $first_num, $last_num);
> }
>
> unless ($opt_j) {
>        RemoveDuplicateSubject();
>        %record_dup_subj = ();  # free what may be substantial memory
> }
>
> ######################################################################
> # End of "find unanswered" code block.
> ######################################################################
> post_console_message('Done finding unanswered articles.');
> post_console_message('Fetching ', scalar(keys %unanswered),
>        ' unanswered articles.');
>
> # Fetch each unanswered article from the news server
> # and print it to the standard output.
> foreach my $article_id (
>                sort {$unanswered{$b} <=> $unanswered{$a}} 
> keys(%unanswered)
>        ) {
>        my $msg = $news_client->article($article_id);
>        print @$msg;
> }
> post_console_message('Done.');
>
> $news_client->quit();
>
>
> =head1 NAME
>
> fetch_unanswered.pl - Retrieve news articles that do not have a reply.
>
> =head1 DESCRIPTION
>
>
> Retrieve articles from one newsgroup to which no reply has yet been 
> posted.
> Articles are all printed to the standard output and status messages are
> printed to STDERR.
>
> =head1 COMMAND LINE OPTIONS
>
> =over 4
>
> =item -g <news group name>
>
> Override default news group.  Default is comp.lang.perl.misc.
>
> =item -j
>
> Turn off threading of articles by subject.  Turning this off also
> saves (some) time and memory.  Article threading eliminates
> articles starting with 'Re:' and groups of articles with the
> same subject.
>
> =item -n <NOV record batch size>
>
> Limit number of NOV records we read from server with one
> request.  A small number will result in more frequent
> feedback to the user.
>
> =item  -s <news server name>
>
> Override default news server.
>
>        Default is: (in order of decreasing priority)
>        value of NNTPSERVER environment variable
>        value from /etc/nntpserver file
>        value set at start of fetch_unanswered.pl source code.
>
> =back 4
>
> =head1 PREREQUISITES
>
> This script requires the C<strict> module.  It also requires
> C<Getopt::Std> and C<News::NNTPClient>.
>
> =pod OSNAMES
>
> any
>
> =pod SCRIPT CATEGORIES
>
> Networking
>
> =cut
>
> --------------------
> #!/usr/bin/perl -w
>
> ######################################################################
> #
> # Program: fetch_unanswered.pl
> #
> #       Retrieve articles from one newsgroup to which no reply
> #       has yet been posted.  Articles are all printed to the
> #       standard output.
> #
> #       options:
> #               -g <news group name>    override default news group
> #               -j Turn off threading of articles by subject.
> #               -n <limit>              Fetch at most <limit> NOV records
> #                                       with one request to server.
> #               -s <news server name>   override default news server
> #
> # Current version of this program will be accessible from.
> #       http://www.software-path.com/scripts.html
> #
> # Please send any comments to: [EMAIL PROTECTED]
> #
> # A version with a reply feature exists.  The reply feature is not 
> included
> # here since it requires about 200 lines of additional unrelated code and
> # belongs in a separate script. CPAN script submission currently requires
> # that "It must be a single file ...". Contact the author if interested in
> # the reply feature.
> #
> ######################################################################
>
> use strict;
>
> my $VERSION = 0.15;
>
> use vars qw($opt_g $opt_j $opt_n $opt_s $VERSION);
>
> # server will be set to (in order of decreasing priority)
> #       -s command line parameter
> #       NNTPSERVER environment variable
> #       /etc/nntpserver
> #       default set here
> my $server;
> my $default_server = 'news.compuserve.com';
>
> my $group = 'comp.lang.perl.misc';
> my $xover_batch_size = 500;
> my $default_email = '[EMAIL PROTECTED]';
>
> ######################################################################
> # "Nice to have" enhancements:
> #       support for newnews
> #       time estimation
> #       FAQ filtering option/kill file.
> ######################################################################
>
> ######################################################################
> # A compatible News::NNTPClient module may be retrieved from URL:
> #       http://www.perl.com/CPAN/authors/id/RVA/NNTPClient-0.36.tar.gz
> ######################################################################
> use News::NNTPClient;
> use Getopt::Std;
>
> my $news_client;
> my %unanswered = ();
> my %record_dup_subj = ();
> my ($first_num, $last_num);
>
> ######################################################################
> # Print a status message to STDERR.  If caller does not provide
> # line termination then terminate line with time stamp and LF("\n").
> ######################################################################
> sub post_console_message {
>        print STDERR @_;
>        print STDERR " (", scalar(localtime()), ")\n" unless (
>                $_[$#_] =~ /\n/         # Last parm has LF.
>        );
> }
>
> ######################################################################
> # Here we remove messages with subjects that look like replies and
> # begin to track groups of messages with the same subject.
> # User may request no filter by subject.
> ######################################################################
> sub FilterSubject {
>        my $msg_id = shift;
>        my $subj = lc(shift);
>        my $has_ref = shift;
>
>        $subj =~ s/^\s*//;
>        $subj =~ s/\s*$//;
>
>        # if subject filtering remove msgs with subject that looks like 
> reply
>        delete $unanswered{$msg_id} if (
>                ($subj =~ s/^re(\:?)\s+//) && (! $has_ref)
>        );
>
>        # List of message id's by subject.  Advanced technique - sorry!
>        push @{$record_dup_subj{$subj}}, $msg_id;
> }
>
> ######################################################################
> # Look through duplicate subject hash for cases where multiple messages
> # had the same subject and remove their message id's from the unanswered
> # list.
> ######################################################################
> sub RemoveDuplicateSubject {
>        foreach my $msg_id_lh (values %record_dup_subj) {
>                if (scalar(@$msg_id_lh) > 1) {
>                        foreach my $dup_msg_id (@$msg_id_lh) {
>                                delete $unanswered{$dup_msg_id};
>                        }
>                }
>        }
> }
>
> ######################################################################
> # Use NNTP XOVER request to fetch header information needed to
> # determine which articles have not yet received a response.
> # This is one of the more efficient approaches.
> ######################################################################
> sub SetUnansweredXover {
>        my ($news_client, $first_num, $last_num, $batch_size) = @_;
>
>        my ($batch_first, $batch_last);
>        my $overview_fmt;
>        my ($i, %overview_fields, $id_field, $ref_field, $subject_field);
>        my @all_ref;
>
>        $overview_fmt = $news_client->list('overview.fmt');
>        die $news_client->message() unless ($news_client->ok());
>
>        %overview_fields = map((uc($_), $i++),
>                grep(s/\s*$//, @$overview_fmt));
>        $id_field = $overview_fields{'MESSAGE-ID:'};
>        $ref_field = $overview_fields{'REFERENCES:'};
>        $subject_field = $overview_fields{'SUBJECT:'};
>
>        for (   $batch_first = $first_num,
>                $batch_last = $first_num + $batch_size -1;
>                $batch_first < $last_num;
>                $batch_first = $batch_last + 1,
>                $batch_last = $batch_first + $batch_size -1
>        ) {
>                $batch_last = $last_num if ($batch_last > $last_num);
>                foreach my $xover_line
> 
> ($news_client->xover("${batch_first}-${batch_last}"))
> {
>                        my ($msg_num, $msg_id, $ref, $subject) =
>                                (split /\t/, $xover_line)
>                                 [0, $id_field +1,
>                                        $ref_field +1, $subject_field +1];
>                        my $has_ref = (defined($ref) && $ref);
>                        if ($has_ref) {
>                                foreach my $ref_id (split(' ', $ref)) {
>                                        delete $unanswered{$ref_id};
>                                }
>                        }
>                        else {
>                                $unanswered{$msg_id} = $msg_num;
>                        }
>                        FilterSubject($msg_id, $subject, $has_ref)
>                                unless ($opt_j);
>                }
>                post_console_message 'Processed requests for ',
>                        $batch_last - $first_num +1,
>                        " NOV records of ", $last_num - $first_num +1, '.';
>        }
> }
>
> ######################################################################
> # Fetch each article header, one at a time, to determine which
> # articles have not yet received any response.
> # This is a very inefficient approach but does not require any
> # NNTP extension services.
> ######################################################################
> sub SetUnansweredHead {
>        my ($news_client, $first_num, $last_num) = @_;
>
>        my ($article_num, $err_count);
>        my $i = 0;
>
>        for (   $article_num = $first_num;
>                $article_num <= $last_num;
>                $article_num++) {
>                my $head;
>                my ($msg_id, $ref_id);
>
>                post_console_message("counting heads: $i") if ((++$i %
> 100)==0);
>                $head = $news_client->head($article_num);
>                unless ($news_client->ok()) {
>                        $err_count++ if (
>                                $news_client->message() !~
>                                        /bad article number/i
>                        );
>                        next;
>                }
>
>                ($msg_id) = grep(/Message\-ID\:/i, @$head);
>                ($msg_id) = ($msg_id =~ /Message\-ID\: (\<.*?\>)/i);
>                ($ref_id) = grep(/References\:/i, @$head);
>                if (defined $ref_id) {
>                        ($ref_id) = ($ref_id =~ /References\: (\<.*?\>)/i);
>                        delete $unanswered{$ref_id};
>                }
>                else {
>                        $unanswered{$msg_id} = $article_num;
>                }
>                unless ($opt_j) {
>                        my ($subject) = grep(/Subject\:/i, @$head);
>                        ($subject) = ($subject =~ /Subject: (.*)/i);
>                        FilterSubject($msg_id, $subject, defined($ref_id));
>                }
>        }
>        post_console_message("counting heads: $i") unless (($i % 100)==0);
>        post_console_message("*Warning* errors: $err_count.") if 
> ($err_count);
> }
>
> ######################################################################
> # Here we expend too much effort to be platform independent.
> # We really should `cat ...`
> ######################################################################
> sub read_etc_nntpserver {
>        my $rc;
>
>        open(FH, '</etc/nntpserver') || return undef;
>        $rc = scalar(<FH>);
>        close(FH);
>        $rc =~ s/\s*$//;
>        return $rc || undef;
> }
>
> ######################################################################
> # Start of program.
> ######################################################################
>
> # process command line options
> getopts("g:jn:rs:");
> $server = $opt_s if (defined($opt_s));
> $group = $opt_g if (defined($opt_g));
> $xover_batch_size = $opt_n if (defined($opt_n));
>
> $server = $ENV{'NNTPSERVER'} if (
>        (! defined($server))            &&
>        $ENV{'NNTPSERVER'}
> );
> $server = read_etc_nntpserver() if (
>        (! defined($server))            &&
>        (-r '/etc/nntpserver')
> );
> $server = $default_server unless(defined $server);
>
> # connect to news server
> $news_client = new News::NNTPClient($server);
> unless ($news_client->ok()) {
>        $news_client->quit();
>        die $news_client->message();
> }
>
> $news_client->debug(0);
> $news_client->mode_reader();
>
> # get news article number range
> ($first_num, $last_num) = $news_client->group($group);
> die $news_client->message() unless ($news_client->ok());
>
> # Test scaffolding.  Under Linux this forces overview analysis to fail.
> # $news_client->quit();
> # $news_client = new News::NNTPClient($server);
> # $news_client->debug(0);
>
> post_console_message('Finding unanswered articles.');
>
> ######################################################################
> # The actual work of deciding which articles for the group are
> # unanswered is done here.
> ######################################################################
> eval {
>        SetUnansweredXover(
>                $news_client, $first_num, $last_num, $xover_batch_size
>        );
> };
> if ($@) {
>        post_console_message 'Xover failed; trying one message at a time. 
> ',
>                'This may take a while.', "\n";
>
> # more test scaffolding
> #        $news_client->mode_reader();
> #        $news_client->group($group);
>
>       SetUnansweredHead($news_client, $first_num, $last_num);
> }
>
> unless ($opt_j) {
>        RemoveDuplicateSubject();
>        %record_dup_subj = ();  # free what may be substantial memory
> }
>
> ######################################################################
> # End of "find unanswered" code block.
> ######################################################################
> post_console_message('Done finding unanswered articles.');
> post_console_message('Fetching ', scalar(keys %unanswered),
>        ' unanswered articles.');
>
> # Fetch each unanswered article from the news server
> # and print it to the standard output.
> foreach my $article_id (
>                sort {$unanswered{$b} <=> $unanswered{$a}} 
> keys(%unanswered)
>        ) {
>        my $msg = $news_client->article($article_id);
>        print @$msg;
> }
> post_console_message('Done.');
>
> $news_client->quit();
>
>
> =head1 NAME
>
> fetch_unanswered.pl - Retrieve news articles that do not have a reply.
>
> =head1 DESCRIPTION
>
>
> Retrieve articles from one newsgroup to which no reply has yet been 
> posted.
> Articles are all printed to the standard output and status messages are
> printed to STDERR.
>
> =head1 COMMAND LINE OPTIONS
>
> =over 4
>
> =item -g <news group name>
>
> Override default news group.  Default is comp.lang.perl.misc.
>
> =item -j
>
> Turn off threading of articles by subject.  Turning this off also
> saves (some) time and memory.  Article threading eliminates
> articles starting with 'Re:' and groups of articles with the
> same subject.
>
> =item -n <NOV record batch size>
>
> Limit number of NOV records we read from server with one
> request.  A small number will result in more frequent
> feedback to the user.
>
> =item  -s <news server name>
>
> Override default news server.
>
>        Default is: (in order of decreasing priority)
>        value of NNTPSERVER environment variable
>        value from /etc/nntpserver file
>        value set at start of fetch_unanswered.pl source code.
>
> =back 4
>
> =head1 PREREQUISITES
>
> This script requires the C<strict> module.  It also requires
> C<Getopt::Std> and C<News::NNTPClient>.
>
> =pod OSNAMES
>
> any
>
> =pod SCRIPT CATEGORIES
>
> Networking
>
> =cut
>
>


--------------------------------------------------------------------------------


#!/usr/bin/perl -w

######################################################################
#
# Program: fetch_unanswered.pl
#
#       Retrieve articles from one newsgroup to which no reply
#       has yet been posted.  Articles are all printed to the
#       standard output.
#
#       options:
#               -g <news group name>    override default news group
#               -j Turn off threading of articles by subject.
#               -n <limit>              Fetch at most <limit> NOV records
#                                       with one request to server.
#               -s <news server name>   override default news server
#
# Current version of this program will be accessible from.
#       http://www.software-path.com/scripts.html
#
# Please send any comments to: [EMAIL PROTECTED]
#
# A version with a reply feature exists.  The reply feature is not included
# here since it requires about 200 lines of additional unrelated code and
# belongs in a separate script. CPAN script submission currently requires
# that "It must be a single file ...". Contact the author if interested in
# the reply feature.
#
######################################################################

use strict;

my $VERSION = 0.15;

use vars qw($opt_g $opt_j $opt_n $opt_s $VERSION);

# server will be set to (in order of decreasing priority)
#       -s command line parameter
#       NNTPSERVER environment variable
#       /etc/nntpserver
#       default set here
my $server;
my $default_server = 'news.compuserve.com';

my $group = 'comp.lang.perl.misc';
my $xover_batch_size = 500;
my $default_email = '[EMAIL PROTECTED]';

######################################################################
# "Nice to have" enhancements:
#       support for newnews
#       time estimation
#       FAQ filtering option/kill file.
######################################################################

######################################################################
# A compatible News::NNTPClient module may be retrieved from URL:
#       http://www.perl.com/CPAN/authors/id/RVA/NNTPClient-0.36.tar.gz
######################################################################
use News::NNTPClient;
use Getopt::Std;

my $news_client;
my %unanswered = ();
my %record_dup_subj = ();
my ($first_num, $last_num);

######################################################################
# Print a status message to STDERR.  If caller does not provide
# line termination then terminate line with time stamp and LF("\n").
######################################################################
sub post_console_message {
        print STDERR @_;
        print STDERR " (", scalar(localtime()), ")\n" unless (
                $_[$#_] =~ /\n/         # Last parm has LF.
        );
}

######################################################################
# Here we remove messages with subjects that look like replies and
# begin to track groups of messages with the same subject.
# User may request no filter by subject.
######################################################################
sub FilterSubject {
        my $msg_id = shift;
        my $subj = lc(shift);
        my $has_ref = shift;

        $subj =~ s/^\s*//;
        $subj =~ s/\s*$//;

        # if subject filtering remove msgs with subject that looks like 
reply
        delete $unanswered{$msg_id} if (
                ($subj =~ s/^re(\:?)\s+//) && (! $has_ref)
        );

        # List of message id's by subject.  Advanced technique - sorry!
        push @{$record_dup_subj{$subj}}, $msg_id;
}

######################################################################
# Look through duplicate subject hash for cases where multiple messages
# had the same subject and remove their message id's from the unanswered
# list.
######################################################################
sub RemoveDuplicateSubject {
        foreach my $msg_id_lh (values %record_dup_subj) {
                if (scalar(@$msg_id_lh) > 1) {
                        foreach my $dup_msg_id (@$msg_id_lh) {
                                delete $unanswered{$dup_msg_id};
                        }
                }
        }
}

######################################################################
# Use NNTP XOVER request to fetch header information needed to
# determine which articles have not yet received a response.
# This is one of the more efficient approaches.
######################################################################
sub SetUnansweredXover {
        my ($news_client, $first_num, $last_num, $batch_size) = @_;

        my ($batch_first, $batch_last);
        my $overview_fmt;
        my ($i, %overview_fields, $id_field, $ref_field, $subject_field);
        my @all_ref;

        $overview_fmt = $news_client->list('overview.fmt');
        die $news_client->message() unless ($news_client->ok());

        %overview_fields = map((uc($_), $i++),
                grep(s/\s*$//, @$overview_fmt));
        $id_field = $overview_fields{'MESSAGE-ID:'};
        $ref_field = $overview_fields{'REFERENCES:'};
        $subject_field = $overview_fields{'SUBJECT:'};

        for (   $batch_first = $first_num,
                $batch_last = $first_num + $batch_size -1;
                $batch_first < $last_num;
                $batch_first = $batch_last + 1,
                $batch_last = $batch_first + $batch_size -1
        ) {
                $batch_last = $last_num if ($batch_last > $last_num);
                foreach my $xover_line
                        ($news_client->xover("${batch_first}-${batch_last}")) 
{
                        my ($msg_num, $msg_id, $ref, $subject) =
                                (split /\t/, $xover_line)
                                 [0, $id_field +1,
                                        $ref_field +1, $subject_field +1];
                        my $has_ref = (defined($ref) && $ref);
                        if ($has_ref) {
                                foreach my $ref_id (split(' ', $ref)) {
                                        delete $unanswered{$ref_id};
                                }
                        }
                        else {
                                $unanswered{$msg_id} = $msg_num;
                        }
                        FilterSubject($msg_id, $subject, $has_ref)
                                unless ($opt_j);
                }
                post_console_message 'Processed requests for ',
                        $batch_last - $first_num +1,
                        " NOV records of ", $last_num - $first_num +1, '.';
        }
}

######################################################################
# Fetch each article header, one at a time, to determine which
# articles have not yet received any response.
# This is a very inefficient approach but does not require any
# NNTP extension services.
######################################################################
sub SetUnansweredHead {
        my ($news_client, $first_num, $last_num) = @_;

        my ($article_num, $err_count);
        my $i = 0;

        for (   $article_num = $first_num;
                $article_num <= $last_num;
                $article_num++) {
                my $head;
                my ($msg_id, $ref_id);

                post_console_message("counting heads: $i") if ((++$i % 
100)==0);
                $head = $news_client->head($article_num);
                unless ($news_client->ok()) {
                        $err_count++ if (
                                $news_client->message() !~
                                        /bad article number/i
                        );
                        next;
                }

                ($msg_id) = grep(/Message\-ID\:/i, @$head);
                ($msg_id) = ($msg_id =~ /Message\-ID\: (\<.*?\>)/i);
                ($ref_id) = grep(/References\:/i, @$head);
                if (defined $ref_id) {
                        ($ref_id) = ($ref_id =~ /References\: (\<.*?\>)/i);
                        delete $unanswered{$ref_id};
                }
                else {
                        $unanswered{$msg_id} = $article_num;
                }
                unless ($opt_j) {
                        my ($subject) = grep(/Subject\:/i, @$head);
                        ($subject) = ($subject =~ /Subject: (.*)/i);
                        FilterSubject($msg_id, $subject, defined($ref_id));
                }
        }
        post_console_message("counting heads: $i") unless (($i % 100)==0);
        post_console_message("*Warning* errors: $err_count.") if 
($err_count);
}

######################################################################
# Here we expend too much effort to be platform independent.
# We really should `cat ...`
######################################################################
sub read_etc_nntpserver {
        my $rc;

        open(FH, '</etc/nntpserver') || return undef;
        $rc = scalar(<FH>);
        close(FH);
        $rc =~ s/\s*$//;
        return $rc || undef;
}

######################################################################
# Start of program.
######################################################################

# process command line options
getopts("g:jn:rs:");
$server = $opt_s if (defined($opt_s));
$group = $opt_g if (defined($opt_g));
$xover_batch_size = $opt_n if (defined($opt_n));

$server = $ENV{'NNTPSERVER'} if (
        (! defined($server))            &&
        $ENV{'NNTPSERVER'}
);
$server = read_etc_nntpserver() if (
        (! defined($server))            &&
        (-r '/etc/nntpserver')
);
$server = $default_server unless(defined $server);

# connect to news server
$news_client = new News::NNTPClient($server);
unless ($news_client->ok()) {
        $news_client->quit();
        die $news_client->message();
}

$news_client->debug(0);
$news_client->mode_reader();

# get news article number range
($first_num, $last_num) = $news_client->group($group);
die $news_client->message() unless ($news_client->ok());

# Test scaffolding.  Under Linux this forces overview analysis to fail.
# $news_client->quit();
# $news_client = new News::NNTPClient($server);
# $news_client->debug(0);

post_console_message('Finding unanswered articles.');

######################################################################
# The actual work of deciding which articles for the group are
# unanswered is done here.
######################################################################
eval {
        SetUnansweredXover(
                $news_client, $first_num, $last_num, $xover_batch_size
        );
};
if ($@) {
        post_console_message 'Xover failed; trying one message at a time. 
',
                'This may take a while.', "\n";

# more test scaffolding
#        $news_client->mode_reader();
#        $news_client->group($group);

       SetUnansweredHead($news_client, $first_num, $last_num);
}

unless ($opt_j) {
        RemoveDuplicateSubject();
        %record_dup_subj = ();  # free what may be substantial memory
}

######################################################################
# End of "find unanswered" code block.
######################################################################
post_console_message('Done finding unanswered articles.');
post_console_message('Fetching ', scalar(keys %unanswered),
        ' unanswered articles.');

# Fetch each unanswered article from the news server
# and print it to the standard output.
foreach my $article_id (
                sort {$unanswered{$b} <=> $unanswered{$a}} keys(%unanswered)
        ) {
        my $msg = $news_client->article($article_id);
        print @$msg;
}
post_console_message('Done.');

$news_client->quit();


=head1 NAME

fetch_unanswered.pl - Retrieve news articles that do not have a reply.

=head1 DESCRIPTION


Retrieve articles from one newsgroup to which no reply has yet been posted.
Articles are all printed to the standard output and status messages are
printed to STDERR.

=head1 COMMAND LINE OPTIONS

=over 4

=item -g <news group name>

Override default news group.  Default is comp.lang.perl.misc.

=item -j

Turn off threading of articles by subject.  Turning this off also
saves (some) time and memory.  Article threading eliminates
articles starting with 'Re:' and groups of articles with the
same subject.

=item -n <NOV record batch size>

Limit number of NOV records we read from server with one
request.  A small number will result in more frequent
feedback to the user.

=item  -s <news server name>

Override default news server.

        Default is: (in order of decreasing priority)
        value of NNTPSERVER environment variable
        value from /etc/nntpserver file
        value set at start of fetch_unanswered.pl source code.

=back 4

=head1 PREREQUISITES

This script requires the C<strict> module.  It also requires
C<Getopt::Std> and C<News::NNTPClient>.

=pod OSNAMES

any

=pod SCRIPT CATEGORIES

Networking

=cut


Reply via email to