On Mon, Nov 26, 2012 at 09:57:33AM +0000, Jamie Paul Griffin wrote:
> 
> Does anyone have or know of a perl or python script, or even a shell
> script, that removes the multipart/(mixed|alternative| ... ) parts of
> incoming mail and leaves or converts the message into plain text?

I have two scripts for that. Both are not perfect and sometimes
eat mails or attachments, esp. from MUAs with certain bugs
(specifically Apple mail, html, and attachments are a known
culprit)

The effect comes from inherently from the way is works, it parses
the mime-structures and recreates it, thereby not necessarily
groking all unknowns.

Bye,

Joerg

a) remove text/html part from maultipart/alternativ
# Clean MIME mails
:0 fhbw
* ^Content-Type:.*multipart/
# Apple mail attachment bug
* ! ^X-Mailer: Apple Mail
| fixmail.pl


#!/usr/bin/perl -w
#
# This is a mail filter. Takes multipart from STDIN, deletes
# superfluous MIME-parts and reduces multipart/alternative to
# singlepart, finally writes cleaned MIME mail to STDOUT.
#
# By Boris 'pi' Piwinger <3...@piology.org>. Please let me know if you
# improve it or fix bugs.
#
# Based on tinnef.pl (there is not much left, though;-) by Gerd Knorr
# <gkn...@berlinonline.de>. Get it there: http://www.ch-open.ch/~cho13093/
#
# This code is public domain. It comes with absolutely no warranty.
# If it eats your mails for lunch, that's your problem. If you don't
# like this, don't use it.
#
# Best with Procmail, e.g.:
#
#     # Clean MIME mails
#     :0
#     * ^Content-Type:.*multipart/
#     {
#       :0c:
#       tmp/fixmail
#       :0fhbw
#       | fixmail.pl
#     }

# Save the From line
my$from = <STDIN>;
# Create parser, we are being daring here (huge mails might cause problems)
use MIME::Parser;
my$done="";
my$parser=MIME::Parser->new;
$parser->output_to_core(1);
my$top=$parser->read(\*STDIN) or die "Couldn't parse MIME stream.\n";
$top=&analyze($top);
$top->head->add('X-pi-MIME-Parts-removed',$done) if $done;
$top->sync_headers(Length=>'COMPUTE');
# Print From line
print $from;
$top->print(\*STDOUT);
exit 0;

sub analyze {
  my($body,$i)=(@_,0);
  my($parts)=$body->{ME_Parts};
  if ($body->mime_type eq "multipart/alternative") {
    # Reduce multipart/alternative
    $i=-1;
    $i++ until ($$parts[$i]->mime_type eq "text/plain" || $i==$#{$parts});
    if ($$parts[$i]->mime_type eq "text/plain") {
      @$parts=@$parts[$i];
      $done.=" multipart/alternative";
    }
  } else {
    # Kill superfluous junk:
    # - text/x-vcard
    # - application/x-pkcs7-signature
    # - application/ms-tnef
    # Recursion on multipart
    while ($i<=$#{$parts}) {
# try pkcs7 for a while (mutt supports it now)
#      if ($$parts[$i]->mime_type =~ 
/(text\/x-vcard|application\/(?:x-pkcs7-signature|ms-tnef))/) {
      if ($$parts[$i]->mime_type =~ /(text\/x-vcard|application\/(?:ms-tnef))/) 
{
        $done.=" $1";
        splice(@$parts,$i,1);
      } elsif ($$parts[$i]->mime_type =~ /^multipart\//) {
        $$parts[$i]=&analyze($$parts[$i]);
        $i++;
      } else {$i++}
    }
  }
  $body->{ME_Parts}=$parts;
  if ($body->mime_type =~ /^multipart\/related/ && 
$body->head->mime_attr("content-type.type") eq "multipart/alternative")
    {$body->head->mime_attr("content-type.type" => "text/plain")}
  $body->make_singlepart if $body->parts==1;
  return $body;
}

b) adds a plain/text part to html-only mails
# Add text/plain
:0 fhbw
* ^Content-Type:.*text/html
| addtext.pl


#!/usr/bin/perl -w

# This is a mail filter. Takes a mail from STDIN,
# adds a text/plain section to every text/html not in
# multipart/alternative, finally writes cleaned MIME mail to
# STDOUT.
#
# text/plain is generated by lynx -dump
#
# TODO: fix charset
#
# Inspired by fixmail.pl by Boris 'pi' Piwinger
# <3...@piology.org>
# 
# (c) Joerg Dorchain <jo...@dorchain.net>
# This code is public domain. It comes with absolutely no warranty.
# If it eats your mails for lunch, that's your problem. If you don't
# like this, don't use it.
#
# Best with Procmail, e.g.:
#
#     # Add text/plain
#     :0
#     * ^Content-Type:.*text/html
#     {
#       :0c:
#       tmp/addtext
#       :0fhbw
#       | addtext.pl
#     }



use IPC::Run qw(run);

# Save the From line
my $from = <STDIN>;

# Create parser, we are being daring here (huge mails might cause
# problems)
use MIME::Parser;
my $done="";

my $parser=MIME::Parser->new;
$parser->output_to_core(1);
my $top=$parser->read(\*STDIN) or die "Couldn't parse MIME stream.\n";

$top = &analyse($top);
$top->head->add('X-JD-Mime-Part-added',$done) if $done;
$top->sync_headers(Length=>'COMPUTE');

# Print From line
print $from;
$top->print(\*STDOUT);
exit 0;

sub analyse {
my ($top) = (@_);

if ($top->is_multipart) {
  my ($i,$ti,$hi);
  my ($parts) = $top->{ME_Parts};
  for($i = 0; $i < $#{$parts}; $i++) {
    if ($top->effective_type ne "multipart/alternative") {
      $$parts[$i] = &analyse($$parts[$i]);
    } else {
      if ($$parts[$i]->effective_type eq "text/plain") { $ti = $i; }
      if ($$parts[$i]->effective_type eq "text/html") { $hi = $i; }
    }
  }
  if ($top->effective_type eq "multipart/alternative") {
    if ($hi && !$ti) { # html found, but not text
      $$parts[$hi] = &analyse($$parts[$hi]);
    }
  }
  $top->{ME_Parts} = $parts;
}

if ($top->effective_type ne "text/html") { return $top; }

# Now do the dirty job
my ($html, $plain, $err);
$top->make_multipart("alternative");
$html = $top->parts(0)->bodyhandle->as_string;
run ["lynx", "-dump", "-stdin",  "-stderr"], '<', \$html, '>', \$plain, '2>', 
\$err;
$top->attach(Data => $err.$plain, Type => "text/plain", Encoding => "8bit",
    Charset => "utf-8");
$done.=" text/plain";
return $top;
}

Attachment: signature.asc
Description: Digital signature

Reply via email to