I had a lot of problems with the existing pipermail.pl filter, so I took the mhonarc.pl filter, which seemed to be similar, and created what is, in my opinion at least, a better pipermail.pl to be found below. (Watch out for wrapped regular expressions...)
Best regards, Tom Morrison ======================= # # -*- Perl -*- # # This is free software with ABSOLUTELY NO WARRANTY. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either versions 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA # 02111-1307, USA # package pipermail; use strict; require 'util.pl'; require 'gfilter.pl'; require 'html.pl'; require 'mailnews.pl'; # # This pattern specifies pipermail's file names. # my $PIPERMAIL_MESSAGE_FILE = '\d{6}\.html'; sub mediatype() { return ('text/html; x-type=pipermail'); } sub status() { return 'yes'; } sub recursive() { return 0; } sub pre_codeconv() { return 1; } sub post_codeconv () { return 0; } sub add_magic ($) { return; } sub filter ($$$$$) { my ($orig_cfile, $contref, $weighted_str, $headings, $fields) = @_; my $cfile = defined $orig_cfile ? $$orig_cfile : ''; util::vprint("Processing pipermail file ...\n"); unless ($cfile =~ /($PIPERMAIL_MESSAGE_FILE)$/o) { return "is Pipermail's index file! skipped."; # error } pipermail_filter($contref, $weighted_str, $fields); html::html_filter($contref, $weighted_str, $fields, $headings); $$contref =~ s/^\s+//; mailnews::uuencode_filter($contref); mailnews::mailnews_filter($contref, $weighted_str, $fields); mailnews::mailnews_citation_filter($contref, $weighted_str); gfilter::line_adjust_filter($contref); gfilter::line_adjust_filter($weighted_str); gfilter::white_space_adjust_filter($contref); gfilter::show_filter_debug_info($contref, $weighted_str, $fields, $headings); return undef; } # Assume a normal message files by pipermail (mailman edition) sub pipermail_filter ($$) { my ($contref, $weighted_str, $fields) = @_; my $pipermail_fields = { }; my $pos = index($$contref, '<!--beginarticle-->'); if ($pos > 0) { load_pipermail_fields($pipermail_fields, $weighted_str, substr($$contref, 0, $pos)); } # Strip off front-matter if ( $pipermail_fields->{'title'} ) { my $title = $pipermail_fields->{'title'}; $$contref =~ s/(<TITLE>).*(<\/TITLE>)/$1$title$2/si; } $$contref =~ s/(<\/TITLE>).*(<\/HEAD>)/$1$2/si; $$contref =~ s/<H1>.*<!--beginarticle-->//si; # Strip off end-matter $$contref =~ s/<!--endarticle-->.*//s; # Move pipermail fields to the filter fields my($fld_name, $fld_value); while (($fld_name, $fld_value) = each %$pipermail_fields) { $fields->{$fld_name} = $fld_value; } # Return extract MHonArc fields #$pipermail_fields; } sub load_pipermail_fields { my $fields = shift; my $weighted_str = shift; my $pipermail_head = shift; if ($pipermail_head =~ m{<h1>(.*?)</h1>\s*<b>(.*?)\s*</b>\s*<a href=.*?>(.*?)\s*</a>\s*<br>\s*<i>(.*?)</i>}is) { if ($1) { my $subject = uncommentize($1); 1 while ($subject =~ s/\A\s*(re|sv|fwd|fw)[\[\]\d]*[:>-]+\s*//i); $subject =~ s/\A\s*\[[^\]]+\]\s*//; $fields->{'title'} = $subject; } if ($2) { my $from; if ($3) { $from = uncommentize("$2 ($3)"); } else { $from = uncommentize($2); } $fields->{'from'} = $from; } if ($4) { $fields->{'date'} = uncommentize($4); } } } sub uncommentize { my($txt) = $_[0]; $txt =~ s/&#(\d+);/pack("C",$1)/ge; $txt; } 1; ------------------------------------------------------ Mailman-Users mailing list [EMAIL PROTECTED] http://mail.python.org/mailman/listinfo/mailman-users Mailman FAQ: http://www.python.org/cgi-bin/faqw-mm.py Searchable Archives: http://www.mail-archive.com/mailman-users%40python.org/