The attached patch fixes an UTF8 encoding error in amavis. p@rick
-- [*] sys4 AG https://sys4.de, +49 (89) 30 90 46 64 Schleißheimer Straße 26/MG,80333 München Sitz der Gesellschaft: München, Amtsgericht München: HRB 199263 Vorstand: Patrick Ben Koetter, Marc Schiffbauer, Wolfgang Stief Aufsichtsratsvorsitzender: Florian Kirstein
--- amavisd-new-2.11.0/amavisd 2016-04-26 21:24:33.000000000 +0200 +++ amavis-patched/amavisd 2018-07-11 16:38:46.631642227 +0200 @@ -5154,6 +5154,203 @@ 1; +#^L +package Email::MIME::RFC2047::Encoder; +$Email::MIME::RFC2047::Encoder::VERSION = '0.97'; +use strict; +use utf8; + +# ABSTRACT: Encoding of non-ASCII MIME headers + +use Encode (); +use MIME::Base64 (); # Not present, but not needed because we use this module only for Q encoding. + +my $rfc_specials = '()<>\[\]:;\@\\,."'; + +sub new { + my $package = shift; + my $options = ref($_[0]) ? $_[0] : { @_ }; + + my ($encoding, $method) = ($options->{encoding}, $options->{method}); + + if (!defined($encoding)) { + $encoding = 'utf-8'; + $method = 'Q' if !defined($method); + } + else { + $method = 'B' if !defined($method); + } + + my $encoder = Encode::find_encoding($encoding) + or die("encoding '$encoding' not found"); + + my $self = { + encoding => $encoding, + encoder => $encoder, + method => uc($method), + }; + + return bless($self, $package); +} + +sub encode_text { + my ($self, $string) = @_; + + return $self->_encode('text', $string); +} + +sub encode_phrase { + my ($self, $string) = @_; + + return $self->_encode('phrase', $string); +} + +sub _encode { + my ($self, $mode, $string) = @_; + + my $encoder = $self->{encoder}; + my $result = ''; + + # $string is split on whitespace. Each $word is categorized into + # 'mime', 'quoted' or 'text'. The intermediate result of the conversion of + # consecutive words of the same types is accumulated in $buffer. + # The type of the buffer is tracked in $buffer_type. + # The method _finish_buffer is called to finish the encoding of the + # buffered content and append to the result. + my $buffer = ''; + my $buffer_type; + + for my $word (split(/\s+/, $string)) { + next if $word eq ''; # ignore leading white space + + $word =~ s/[\x00-\x1f\x7f]//g; # better remove control chars + + my $word_type; + + if ($word =~ /[\x80-\x{10ffff}]|(^=\?.*\?=\z)/s) { + # also encode any word that starts with '=?' and ends with '?=' + $word_type = 'mime'; + } + elsif ($mode eq 'phrase') { + $word_type = 'quoted'; + } + else { + $word_type = 'text'; + } + + $self->_finish_buffer(\$result, $buffer_type, \$buffer) + if $buffer ne '' && $buffer_type ne $word_type; + $buffer_type = $word_type; + + if ($word_type eq 'text') { + $result .= ' ' if $result ne ''; + $result .= $word; + } + elsif ($word_type eq 'quoted') { + $buffer .= ' ' if $buffer ne ''; + $buffer .= $word; + } + else { + my $max_len = 75 - 7 - length($self->{encoding}); + $max_len = 3 * ($max_len >> 2) if $self->{method} eq 'B'; + + my @chars; + push(@chars, ' ') if $buffer ne ''; + push(@chars, split(//, $word)); + + for my $char (@chars) { + my $chunk; + + if ($self->{method} eq 'B') { + $chunk = $encoder->encode($char); + } + elsif ($char =~ /[()<>@,;:\\".\[\]=?_]/) { + # special character + $chunk = sprintf('=%02x', ord($char)); + } + elsif ($char =~ /[\x80-\x{10ffff}]/) { + # non-ASCII character + + my $enc_char = $encoder->encode($char); + $chunk = ''; + + for my $byte (unpack('C*', $enc_char)) { + $chunk .= sprintf('=%02x', $byte); + } + } + elsif ($char eq ' ') { + $chunk = '_'; + } + else { + $chunk = $char; + } + + if (length($buffer) + length($chunk) <= $max_len) { + $buffer .= $chunk; + } + else { + $self->_finish_buffer(\$result, 'mime', \$buffer); + $buffer = $chunk; + } + } + } + } + + $self->_finish_buffer(\$result, $buffer_type, \$buffer) + if $buffer ne ''; + + return $result; +} + +sub _finish_buffer { + my ($self, $result, $buffer_type, $buffer) = @_; + + $$result .= ' ' if $$result ne ''; + + if ($buffer_type eq 'quoted') { + if ($$buffer =~ /[$rfc_specials]/) { + # use quoted string if buffer contains special chars + $$buffer =~ s/[\\"]/\\$&/g; + + $$result .= qq("$$buffer"); + } + else { + $$result .= $$buffer; + } + } + elsif ($buffer_type eq 'mime') { + $$result .= "=?$self->{encoding}?$self->{method}?"; + + if ($self->{method} eq 'B') { + $$result .= MIME::Base64::encode_base64($$buffer, ''); + } + else { + $$result .= $$buffer; + } + + $$result .= '?='; + } + + $$buffer = ''; + + return; +} + +1; + +#^L +package Amavis::Custom::rfc2047_Tools; +use strict; + +# replace buggy q_encode function, original amavis code breaks multibyte characters +sub q_encode($$$) { + my($octets,$encoding,$charset) = @_; + my $encoder = Email::MIME::RFC2047::Encoder->new; + $encoder->encode_text(Encode::decode_utf8($octets)); +} + +1; + # package Amavis::rfc2821_2822_Tools; use strict; @@ -9375,7 +9572,7 @@ $field_body_is_utf8?'Y':'N', $chset, $field_name, $field_body, $field_body_octets); my $qb = c('hdr_encoding_qb'); - my $encoder_func = uc $qb eq 'Q' ? \&q_encode + my $encoder_func = uc $qb eq 'Q' ? \&Amavis::Custom::rfc2047_Tools::q_encode : \&MIME::Words::encode_mimeword; $field_body = join("\n", map { /^[\001-\011\013\014\016-\177]*\z/ ? $_ : &$encoder_func($_,$qb,$chset) }