On Mon, Jun 25, 2018 at 09:54:27AM -0400, Kevin A. McGrail wrote: > Just to be clear, others concur that with 3 uris or less, it works, 4 or > more it fails. It's inconsistent and exists in trunk as well. > > It's inconsistent depending on the platform as well. > > I am not sure if it is a Perl bug or an SA bug or something we are doing > wrong but it is a blocker. > reverting r1823205 makes the regression test work on 3.4 and trunk for me. tested on CentOS6 (perl 5.10.1), can anybody confirm ?
Regards Giovanni > -- > Kevin A. McGrail > VP Fundraising, Apache Software Foundation > Chair Emeritus Apache SpamAssassin Project > https://www.linkedin.com/in/kmcgrail - 703.798.0171 > > On Sat, Jun 23, 2018 at 10:15 PM, Bill Cole < > [email protected]> wrote: > > > On 22 Jun 2018, at 14:29 (-0400), Kevin A. McGrail wrote: > > > > Hi All, > >> > >> 3.4 is not passing tests for me with the idn_dots.t and it appears to > >> point > >> to a problem in P:M:S::get_uri_list. I'm bleary from looking at this for > >> three days. Can someone take a look at this? > >> > >> If you modify the t/idn_dots to print the uri list from the generated > >> message in the test, it fails in 3.4 but passes in Trunk and in the 3.4.1 > >> release. See below for output but basically there is a missing URI which > >> is why the test correctly fails. > >> > > > > I have made the test work by adding "use utf8" to the test script. This is > > just avoiding the underlying subtle bug. > > > > The breakage is only seen (so far) on the RedHat perl 5.16.3 packaged for > > EL7 and derivatives. I believe that 5.16.x was the last major release to > > NOT work in UTF-8 by default without "use utf8" explicitly used. I have > > replicated the incorrect parse with the spamassassin script and a message > > with all-ascii URLs, so the problem is somewhere in the spectacularly > > complicated RE that extracts URIs from the cooked text array inside > > PerMsgStatus->get_uri_detail_list. Making matters worse, if I run either > > t/idn_dots.t or spamassassin with the Perl debugger (-d) the parse works. > > > > Anyone who is still using an even older Perl could assist simply by > > confirming that the 3.4 branch from SVN fails subtest 4 of t/idn_dots.t if > > you remove or comment out the "use utf8" line I added to that file today. > > > > It would be interesting to see it the problem would be solved by adding > > "use utf8" to every .pm that had a "use bytes" declaration before 2017. > > This is a bit of a shotgun approach but simpler than hunting for the > > specific issue. I'd try it myself, but that I'm basically on my last > > stealable minute for the weekend already. > > > > > > -- > > Bill Cole > > [email protected] or [email protected] > > (AKA @grumpybozo and many *@billmail.scconsult.com addresses) > > Currently Seeking Steadier Work: https://linkedin.com/in/billcole > >
Index: lib/Mail/SpamAssassin/Conf/Parser.pm
===================================================================
--- lib/Mail/SpamAssassin/Conf/Parser.pm (revisione 1834377)
+++ lib/Mail/SpamAssassin/Conf/Parser.pm (copia locale)
@@ -259,7 +259,6 @@
while (defined ($line = shift @conf_lines)) {
local ($1); # bug 3838: prevent random taint flagging of $1
- if (index($line,'#') > -1) {
# bug 5545: used to support testing rules in the ruleqa system
if ($keepmetadata && $line =~ /^\#testrules/) {
$self->{file_scoped_attrs}->{testrules}++;
@@ -275,12 +274,8 @@
$line =~ s/(?<!\\)#.*$//; # remove comments
$line =~ s/\\#/#/g; # hash chars are escaped, so unescape them
- }
-
- if ($line =~ tr{ \t\r\n\f}{}) {
$line =~ s/^\s+//; # remove leading whitespace
$line =~ s/\s+$//; # remove tailing whitespace
- }
next unless($line); # skip empty lines
# handle i18n
@@ -289,7 +284,7 @@
my($key, $value) = split(/\s+/, $line, 2);
$key = lc $key;
# convert all dashes in setting name to underscores.
- $key =~ tr/-/_/;
+ $key =~ s/-/_/g;
$value = '' unless defined($value);
# # Do a better job untainting this info ...
@@ -339,26 +334,26 @@
}
# now handle the commands.
- elsif ($key eq 'include') {
+ if ($key eq 'include') {
$value = $self->fix_path_relative_to_current_file($value);
my $text = $conf->{main}->read_cf($value, 'included file');
unshift (@conf_lines, split (/\n/, $text));
next;
}
- elsif ($key eq 'ifplugin') {
+ if ($key eq 'ifplugin') {
$self->handle_conditional ($key, "plugin ($value)",
\@if_stack, \$skip_parsing);
next;
}
- elsif ($key eq 'if') {
+ if ($key eq 'if') {
$self->handle_conditional ($key, $value,
\@if_stack, \$skip_parsing);
next;
}
- elsif ($key eq 'else') {
+ if ($key eq 'else') {
# TODO: if/else/else won't get flagged here :(
if (!@if_stack) {
$parse_error = "config: found else without matching conditional";
@@ -370,7 +365,7 @@
}
# and the endif statement:
- elsif ($key eq 'endif') {
+ if ($key eq 'endif') {
my $lastcond = pop @if_stack;
if (!defined $lastcond) {
$parse_error = "config: found endif without matching conditional";
@@ -509,7 +504,7 @@
my $conf = $self->{conf};
my $lexer = ARITH_EXPRESSION_LEXER;
- my @tokens = ($value =~ m/($lexer)/og);
+ my @tokens = ($value =~ m/($lexer)/g);
my $eval = '';
my $bad = 0;
@@ -989,14 +984,14 @@
# Lex the rule into tokens using a rather simple RE method ...
my $lexer = ARITH_EXPRESSION_LEXER;
- my @tokens = ($rule =~ m/$lexer/og);
+ my @tokens = ($rule =~ m/$lexer/g);
# Go through each token in the meta rule
my $conf_tests = $conf->{tests};
foreach my $token (@tokens) {
# has to be an alpha+numeric token
- next if $token =~ tr{A-Za-z0-9_}{}c || substr($token,0,1) =~
tr{A-Za-z_}{}c; # even faster
-
+ # next if $token =~ /^(?:\W+|[+-]?\d+(?:\.\d+)?)$/;
+ next if $token !~ /^[A-Za-z_][A-Za-z0-9_]*\z/s; # faster
# and has to be a rule name
next unless exists $conf_tests->{$token};
@@ -1302,7 +1297,7 @@
# Lex the rule into tokens using a rather simple RE method ...
my $lexer = ARITH_EXPRESSION_LEXER;
- my @tokens = ($rule =~ m/$lexer/og);
+ my @tokens = ($rule =~ m/$lexer/g);
if (length($name) == 1) {
for (@tokens) {
print "$name $_\n " or die "Error writing token: $!";
Index: lib/Mail/SpamAssassin/Util.pm
===================================================================
--- lib/Mail/SpamAssassin/Util.pm (revisione 1834377)
+++ lib/Mail/SpamAssassin/Util.pm (copia locale)
@@ -280,7 +280,11 @@
sub untaint_var {
# my $arg = $_[0]; # avoid copying unnecessarily
if (!ref $_[0]) { # optimized by-far-the-most-common case
- return defined $_[0] ? scalar each %{ { $_[0] => undef } } : undef; ## no
critic (ProhibitExplicitReturnUndef) - See Bug 7120 - fast untaint (hash keys
cannot be tainted)
+ no re 'taint'; # override a "use re 'taint'" from outer scope
+ return undef if !defined $_[0]; ## no critic (ProhibitExplicitReturnUndef)
- See Bug 7120
+ local($1); # avoid Perl taint bug: tainted global $1 propagates taintedness
+ $_[0] =~ /^(.*)\z/s;
+ return $1;
} else {
my $r = ref $_[0];
if ($r eq 'ARRAY') {
Index: lib/Mail/SpamAssassin/Plugin/Check.pm
===================================================================
--- lib/Mail/SpamAssassin/Plugin/Check.pm (revisione 1834377)
+++ lib/Mail/SpamAssassin/Plugin/Check.pm (copia locale)
@@ -533,7 +533,7 @@
# Lex the rule into tokens using a rather simple RE method ...
my $lexer = ARITH_EXPRESSION_LEXER;
- my @tokens = ($rule =~ m/$lexer/og);
+ my @tokens = ($rule =~ m/$lexer/g);
# Set the rule blank to start
$meta{$rulename} = "";
@@ -545,7 +545,8 @@
foreach my $token (@tokens) {
# Numbers can't be rule names
- if ($token =~ tr{A-Za-z0-9_}{}c || substr($token,0,1) =~ tr{A-Za-z_}{}c)
{
+ # if ($token =~ /^(?:\W+|[+-]?\d+(?:\.\d+)?)$/) {
+ if ($token !~ /^[A-Za-z_][A-Za-z0-9_]*\z/s) { # faster
$meta{$rulename} .= "$token ";
}
else { # token is a rule name
@@ -611,7 +612,7 @@
warn "no meta_dependencies defined for $metas[$i]";
}
my $alldeps = join ' ', grep {
- index( ($tflags->{$_}||''),'net') > -1 && ($tflags->{$_}||'')
=~ /\bnet\b/
+ ($tflags->{$_}||'') =~ /\bnet\b/
} split (' ', $conf->{meta_dependencies}->{ $metas[$i] } );
if ($alldeps ne '') {
signature.asc
Description: PGP signature
