Okay, I wrote this script because I've been trying to find a way to
search my attachments in Thunderbird.
I figured I'm not the only one but it's a quick hack and not a module
so I don't know  where to post it, so here it is:

You can do anything you want with it. It's not very polished because I
thought I'd give myself more time before polishing it up and put
options into it. Right now it's just a pipe - cos I can't guarantee
that it won't kill your mbox!

After running your mbox through this code you should be able to search
your attachment names under the tage "X-META-01".

#!/bin/perl
# process mbox and add X-META-01 tag with filenames of attachments
# code by zlel
# put into the public domain 2010.01.03
# filename: mboxtagattachments
# usage   : cat Inbox | mboxtagattachments > newInbox

$OUTPUTENCODING = "UTF-8";

use MIME::Base64 qw( encode_base64 decode_base64 );
use Text::Iconv;
use Data::Dumper;

$STATE = "INIT";
$LASTLINE = 1;
while (($_=<STDIN>) || $LASTLINE) {
        if (!$_) {
                $LASTLINE = 0;
                $_ = "From -";
        }
        s/[\n\r]*$//g;
        $_ .= "\r\n";
        if (/^From -/) {
                if ($RAW_PROLOG) {
                        ## -------------------------------------------
                        ## HANDLE MAIL THAT ENDED
                        ## -------------------------------------------

                        %subvalues = getSubValues($MAIL_HEADER
{"Content-Type"});
                        $CHARSET = $subvalues{"charset"};

                        # print STDERR "$CHARSET\n";
                        # if ($CHARSET) {
                        #        $MAIL_BODY = Text::Iconv->new
($CHARSET, $OUTPUTENCODING)->convert($MAIL_BODY);
                        #}
                        #print STDERR Dumper(\%MAIL_HEADER);

                        print $RAW_PROLOG;
                        for (keys %MAIL_ATTACHMENTS) {
                                $fn = $MAIL_ATTACHMENTS{$_};
                                if ($fn =~ /[^[:alnum:][:punct:]
[:space:]]/) {
                                        if ($CHARSET) {
                                                $fn = Text::Iconv->new
($OUTPUTENCODING, $CHARSET)->convert($fn);
                                        }
                                }
                                print "X-META-01: $fn\r\n";
                        }
                        print $RAW_HEADER;
                        print "\r\n";
                        print $RAW_BODY;
                }
                $RAW_PROLOG = $_;
                $RAW_HEADER = "";
                $RAW_BODY = "";
                %MAIL_HEADER = ();
                $MAIL_BODY = "";
                %MAIL_ATTACHMENTS = ();
                $STATE = "HEADER";
                $KEY = "";
                next;
        }
        if ($STATE eq "HEADER") {
                if (/^\s*$/ && ($MAIL_HEADER{"From"} ne "")) {
                        $STATE = "BODY";
                        if ($MAIL_HEADER{"Content-Type"} =~ /
multipart/) {

                                %subvalues = getSubValues($MAIL_HEADER
{"Content-Type"});
                                $partbody_boundary = $subvalues
{"boundary"};

                                %partbody_headers = ();
                                $partbody_count = 0;
                                $partbody_filename = "";
                                $KEY = "";
                                $STATE = "PART-HEADER";
                        }
                } else {
                        if (/^X-META-01:/) {
                                next;
                        } else {
                                $RAW_HEADER .= $_;
                        }
                }
                if (/^\s/) {
                        chomp;
                        $KEY = $PREVKEY;
                        $VALUE = $_;
                } else {
                        ($KEY, $VALUE) = /^(\S[^:]*):(.*)/;
                        $PREVKEY = $KEY;
                }
                if ($KEY eq "Subject") {
                        $VALUE =~ s/^\s*//;
                        $VALUE = decode($VALUE);
                }
                if ($KEY) {
                        $MAIL_HEADER{$KEY} .= $VALUE;
                }
                next;
        }
        if ($STATE eq "PART-BODY") {
                $RAW_BODY .= $_;
                if (/$partbody_boundary/) {
                        {
                                ## -----------------------------------
                                ## HANDLE COMPLETED PART
                                ## -----------------------------------

                                %subvalues = getSubValues
($partbody_headers{"Content-Type"});
                                $partbody_filename = decode($subvalues
{"name"});

                                if ($partbody_filename eq "") {
                                        %subvalues = getSubValues
($partbody_headers{"Content-Disposition"});
                                        $partbody_filename = $subvalues
{"filename"};
                                }

                                # if ($partbody_filename eq "") {
                                #         for (my $c=0 ; $subvalues
{"filename*$c*"}; $c++) {
                                #
$partbody_filename .= $subvalues{"filename*$c*"};
                                #         }
                                #        $partbody_filename = decode
($partbody_filename, "url");
                                # }

                                if ($partbody_filename) {
                                        $MAIL_ATTACHMENTS
{$partbody_count} = $partbody_filename;
                                }

                                # print "PART $partbody_count\n";
                                # print Dumper(\%partbody_headers);
                                # print "FILENAME $partbody_filename
\n";
                                # print $partbody;
                        }
                        $STATE = "PART-HEADER";
                        $partbody = "";
                        $partbody_count++;
                        $partbody_filename = "";
                } else {
                        $partbody .= $_;
                }
                next;
        }
        if ($STATE eq "PART-HEADER") {
                $RAW_BODY .= $_;
                if (/^\s*$/) {
                        $STATE = "PART-BODY";
                }
                if (/^\s/) {
                        chomp;
                        if ($_) {
                                $partbody_headers{$PREVKEY} .= "\n".
$_;
                        }
                        next;
                } else {
                        ($KEY, $VALUE) = /^(\S[^:]*):(.*)/;
                        if ($KEY && $VALUE) {
                                $partbody_headers{$KEY} = $VALUE;
                        }
                        $PREVKEY = $KEY;
                }
                next;
        }
        if ($STATE eq "BODY") {
                $RAW_BODY .= $_;
                $MAIL_BODY .= $_;
                next;
        }
}

sub getSubValues {
        my $value = shift @_;
        my %subvalues = ();
        my $initial;
        my $oldvalue = "";

        ($initial, $value) = $value =~ m/^([^;\n\r]*)[;[:space:]\n\r]*
(.*)/sg;
        $subvalues{""} = $initial;
        while ($value) {
                ($key, $value) = $value =~ /([^=]*)=(.*)/s;
                if ($value =~ /^"/) {
                        ($keyvalue, $value) = $value =~ /"([^"]
*)"[[:space:]\n\r]*(.*)/s;
                } else {
                        ($keyvalue, $value) = $value =~ /\s*([^;
[:space:]]*)[;[:space:]\n\r]*(.*)/s;
                }
                $subvalues{$key} = $keyvalue;
                if ($oldvalue eq $value) {
                        break;
                }
                $oldvalue = $value;
        }
        return %subvalues;
}

sub encode {
        my $fn = shift @_;
        my $encoding = shift @_;
        my $charset = shift @_;
        if ($charset eq "") {
                $charset = $OUTPUTENCODING;
        }
        if ($encoding eq "url") {
                $fn = "$charset''".URLEncode(Text::Iconv->new
($OUTPUTENCODING, $charset)->convert($fn));
        } else {
                $fn = encode_base64(Text::Iconv->new($OUTPUTENCODING,
$charset)->convert($fn));
                chomp($fn);
                $fn = "=?$charset?$fn?=";
        }
        return $fn;
}
sub decode {
        my $fn = shift @_;
        my $encoding = shift;
        if ($encoding eq "url") {
                $fn =~ s/([^']*?)''([^;]*?);/Text::Iconv->new($1,
$OUTPUTENCODING)->convert(URLDecode($2))/eg;
        } else {
                $fn =~ s/=\?([^\?]*?)\?([^\?]*?)\?([^\?]*?)\?=/
Text::Iconv->new($1, $OUTPUTENCODING)->convert(decode_base64($3))/eg;
        }
        return $fn;
}
sub URLDecode {
        my $theURL = $_[0];
        $theURL =~ tr/+/ /;
        $theURL =~ s/%([a-fA-F0-9]{2,2})/chr(hex($1))/eg;
        $theURL =~ s/<!?(.|\n)*?>//g;
        return $theURL;
}
sub URLEncode {
        my $theURL = $_[0];
        $theURL =~ s/([\W])/%".uc(sprintf("%2.2x",ord($1)))/eg;
        return $theURL;
}
exit;

Reply via email to