Script-ul atasat mai jos este dedicat tuturor celor care folosesc
sisteme UTF-8-impaired, si isi doresc sa citeasca in mesajele de pe RLUG
litere cu diacritice in loc de carcalaci. Bug reports / comentarii /
injuraturi / etc. pe personala.
Note:
-----
(1) Din motive bine intemeiate, script-ul are nevoie de MIME-tools
>= 6.2; versiunile 5.x pe care le instaleaza in mod normal
diferitele package managers sub Linux nu sunt bune;
http://search.cpan.org/CPAN/authors/id/E/ER/ERYQ/MIME-tools-6.200_02.tar.gz
(2) Semnaturile PGP nu pot supravietui re-incodarii, motiv pentru care
script-ul le taie;
(3) Incantatie pentru Procmail:
:0
* ^List-Id:.*\.lists\.lug\.ro>
{
:0 fw
* ^Subject:.*=\?UTF-?8\?
| utf8-recode.pl iso-8859-2
:0 Efw
* HB ?? ^(Content-Type:.*|[ ]+)charset="?utf-?8"?\>
| utf8-recode.pl iso-8859-2
}
([ ] de mai sus contine un spatiu si un tab).
Salutari,
Liviu Daia
--
Dr. Liviu Daia http://www.imar.ro/~daia
#! /usr/bin/perl
#
# Copyright (c) 2005 Liviu Daia <[EMAIL PROTECTED]>
# All rights reserved.
#
# $Revision: 1.5 $
# $Id: utf8-recode.pl,v 1.5 2006/01/05 06:33:31 daia Exp $
# $Source: /usr/share/CVS/Scripts/scripts/utf8-recode.pl,v $
#
use File::Basename;
use MIME::Parser 6.108;
use MIME::Words qw(:all);
use IO::File;
use Locale::Recode;
# $^W = 1;
# use strict;
# use Carp ();
# local $SIG{__WARN__} = \&Carp::cluck;
my ($chr_to, $recode, $tmp, $parser, $parsedir, $f, $e);
my ($idx, $h, @chunks, $data, $charset, $io);
sub recode_recursive ($);
sub recode_recursive ($)
{
my $e = shift;
# embedded messages can have subjects
$e->head->modify (0);
$idx = 0;
for ($e->head->get ('Subject'))
{
$h = '';
@chunks = decode_mimewords $_;
for (@chunks)
{
($data, $charset) = @$_;
if (defined $charset and $charset =~ m/^utf-?8$/io)
{
$recode->recode ($data)
or die $recode->getError;
$charset = $chr_to;
}
$h .= $charset ? (encode_mimeword $data, 'B', $charset) : $data;
}
$e->head->replace ('Subject', $h, $idx++);
}
# Lines and Content-Length are probably wrong
$e->head->delete ('Lines');
$e->head->delete ('Content-Length');
if ($e->is_multipart)
{
for ($e->parts)
{
recode_recursive ($_);
}
}
elsif (lc $e->effective_type eq 'text/plain' and
# elsif ($e->effective_type =~ m!^text/!io and
defined $e->head->mime_attr ('content-type.charset') and
$e->head->mime_attr ('content-type.charset') =~ /^utf-?8$/io)
{
# avoids messing with internal pathnames
# avoids reading unbound data in memory
$tmp->seek (0, 0);
$tmp->truncate (0);
$io = $e->bodyhandle->open ('r');
$tmp->print ($_)
while (defined ($_ = $io->getline));
$io->close;
$tmp->flush;
$tmp->seek (0, 0);
$io = $e->bodyhandle->open ('w');
while (defined ($_ = $tmp->getline))
{
$recode->recode ($_)
or die $recode->getError;
$io->print ($_);
}
$io->close;
$e->head->mime_attr ('Content-Type.charset' => $chr_to);
}
}
sub chop_signatures ($);
sub chop_signatures ($)
{
my $e = shift;
if ($e->is_multipart)
{
$e->parts ([grep { lc $_->effective_type ne 'application/pgp-signature' }
$e->parts]);
$e->make_singlepart;
chop_signatures ($_)
for ($e->parts);
}
}
#
# main
#
die ("usage: " . (basename $0) . " [ -l | <encoding> ]\n")
unless (@ARGV == 1 and ($ARGV[0] eq '-l' or $chr_to =
Locale::Recode->resolveAlias ($ARGV[0])));
if ($ARGV[0] eq '-l')
{
print "$_\n"
for (sort @{Locale::Recode->getCharsets});
exit;
}
$parser = MIME::Parser->new ();
$parser->extract_encoded_messages (1);
$parser->extract_nested_messages ('NEST');
$parser->extract_uuencode (1);
$parser->ignore_errors (1);
$parser->output_to_core (0);
$parser->tmp_recycling (1);
$parser->tmp_to_core (0);
$parser->use_inner_files (0);
if (defined $ENV{'TMPDIR'}) { $parsedir = $ENV{'TMPDIR'}; }
elsif (defined $ENV{'TMP'}) { $parsedir = $ENV{'TMP'}; }
else { $parsedir = '/tmp'; }
$f = MIME::Parser::FlatFiler->new ($parsedir);
$f->ignore_filename (1);
$parser->filer ($f);
# XXX can't tell the parser to keep From_
my $from = <STDIN>;
$e = eval { $parser->parse (\*STDIN); };
print STDERR $_
for ($parser->results->msgs);
die qq{can't parse message: [EMAIL PROTECTED]
unless defined $e;
$recode = Locale::Recode->new (from => 'UTF-8', to => $chr_to);
die $recode->getError
if $recode->getError;
$tmp = IO::File->new_tmpfile
or die qq{can't create temp file: $!};
# PGP signatures are useless by now
chop_signatures $e;
# $e->dump_skeleton (\*STDERR);
eval { recode_recursive ($e); };
die qq{can't recode message: [EMAIL PROTECTED]
if ($@);
$e->sync_headers (Length => 'DELETE');
print $from;
$e->print;
undef $tmp;
$parser->filer->purge;
_______________________________________________
RLUG mailing list
[email protected]
http://lists.lug.ro/mailman/listinfo/rlug