# New Ticket Created by Dave Rolsky # Please include the string: [perl #36928] # in the subject line of all future correspondence about this issue. # <URL: https://rt.perl.org/rt3/Ticket/Display.html?id=36928 >
This is a bug report for perl from [EMAIL PROTECTED], generated with the help of perlbug 1.35 running under perl v5.8.7. ----------------------------------------------------------------- [Please enter your report here] Check out the following test: use strict; use warnings; use Test::More tests => 6; use Encode; binmode STDOUT, ':utf8'; { open my $fh, '<:utf8', 'invalid-8bit.txt' or die $!; my $data = do { local $/; <$fh> }; like( $data, qr/\x{FFFD}/, 'bad UTF8 was converted to 0xFFFD' ); ok( Encode::is_utf8($data), 'data is marked as utf8' ); { use bytes; unlike( $data, qr/\x{96}/, 'does not contain 0x96' ) } $data =~ /(?:^\.([\w\-]+)\ *\n)((?:.*\n)*?)(?:^\.\1\ *\n|\z)/m; } { open my $fh, 'invalid-8bit.txt' or die $!; my $data = Encode::decode('utf8', join '', <$fh>); like( $data, qr/\x{FFFD}/, 'bad UTF8 was converted to 0xFFFD' ); ok( Encode::is_utf8($data), 'data is marked as utf8' ); { use bytes; unlike( $data, qr/\x{96}/, 'does not contain 0x96' ) } $data =~ /(?:^\.([\w\-]+)\ *\n)((?:.*\n)*?)(?:^\.\1\ *\n|\z)/m; } Given a file with data that is _not_ valid utf8 (attached), the layer version gives a bunch of warnings, and then you have a string that is marked as utf8 and appears to have code point 0 instead of the bad data. Then it hangs in the regex after warning: Malformed UTF-8 character (unexpected non-continuation byte 0x0a, immediately after start byte 0xd8) in pattern match (m//) at layer.t line 23 The second set of tests gives no warnings, and gives you a string with 0xFFFD instead of the bad data. The regex does not hang. So two things: 1. The hang is clearly a bug. 2. The fact that they layer and Encode act differently is really not well documented, and I'm not sure whether the differences are intentional, whether there is a bug in the layer code, or what [Please do not change anything below this line] ----------------------------------------------------------------- --- Flags: category=core severity=low --- Site configuration information for perl v5.8.7: Configured by Debian Project at Sat Jul 9 12:13:16 EST 2005. Summary of my perl5 (revision 5 version 8 subversion 7) configuration: Platform: osname=linux, osvers=2.4.27-ti1211, archname=i486-linux-gnu-thread-multi uname='linux kosh 2.4.27-ti1211 #1 sun sep 19 18:17:45 est 2004 i686 gnulinux ' config_args='-Dusethreads -Duselargefiles -Dccflags=-DDEBIAN -Dcccdlflags=-fPIC -Darchname=i486-linux-gnu -Dprefix=/usr -Dprivlib=/usr/share/perl/5.8 -Darchlib=/usr/lib/perl/5.8 -Dvendorprefix=/usr -Dvendorlib=/usr/share/perl5 -Dvendorarch=/usr/lib/perl5 -Dsiteprefix=/usr/local -Dsitelib=/usr/local/share/perl/5.8.7 -Dsitearch=/usr/local/lib/perl/5.8.7 -Dman1dir=/usr/share/man/man1 -Dman3dir=/usr/share/man/man3 -Dsiteman1dir=/usr/local/man/man1 -Dsiteman3dir=/usr/local/man/man3 -Dman1ext=1 -Dman3ext=3perl -Dpager=/usr/bin/sensible-pager -Uafs -Ud_csh -Uusesfio -Uusenm -Duseshrplib -Dlibperl=libperl.so.5.8.7 -Dd_dosuid -des' hint=recommended, useposix=true, d_sigaction=define usethreads=define use5005threads=undef useithreads=define usemultiplicity=define useperlio=define d_sfio=undef uselargefiles=define usesocks=undef use64bitint=undef use64bitall=undef uselongdouble=undef usemymalloc=n, bincompat5005=undef Compiler: cc='cc', ccflags ='-D_REENTRANT -D_GNU_SOURCE -DTHREADS_HAVE_PIDS -DDEBIAN -fno-strict-aliasing -pipe -I/usr/local/include -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64', optimize='-O2', cppflags='-D_REENTRANT -D_GNU_SOURCE -DTHREADS_HAVE_PIDS -DDEBIAN -fno-strict-aliasing -pipe -I/usr/local/include' ccversion='', gccversion='4.0.1 20050701 (prerelease) (Debian 4.0.0-12)', gccosandvers='' intsize=4, longsize=4, ptrsize=4, doublesize=8, byteorder=1234 d_longlong=define, longlongsize=8, d_longdbl=define, longdblsize=12 ivtype='long', ivsize=4, nvtype='double', nvsize=8, Off_t='off_t', lseeksize=8 alignbytes=4, prototype=define Linker and Libraries: ld='cc', ldflags =' -L/usr/local/lib' libpth=/usr/local/lib /lib /usr/lib libs=-lgdbm -lgdbm_compat -ldb -ldl -lm -lpthread -lc -lcrypt perllibs=-ldl -lm -lpthread -lc -lcrypt libc=/lib/libc-2.3.2.so, so=so, useshrplib=true, libperl=libperl.so.5.8.7 gnulibc_version='2.3.2' Dynamic Linking: dlsrc=dl_dlopen.xs, dlext=so, d_dlsymun=undef, ccdlflags='-Wl,-E' cccdlflags='-fPIC', lddlflags='-shared -L/usr/local/lib' Locally applied patches: --- @INC for perl v5.8.7: /etc/perl /usr/local/lib/perl/5.8.7 /usr/local/share/perl/5.8.7 /usr/lib/perl5 /usr/share/perl5 /usr/lib/perl/5.8 /usr/share/perl/5.8 /usr/local/lib/site_perl /usr/local/lib/perl/5.8.4 /usr/local/share/perl/5.8.4 . --- Environment for perl v5.8.7: HOME=/home/autarch LANG (unset) LANGUAGE=en_US:en_GB:en LD_LIBRARY_PATH (unset) LOGDIR (unset) PATH=/usr/local/bin:/usr/bin:/bin:/usr/bin/X11:/usr/games:/home/autarch/bin:/home/autarch/bin PERL_BADLANG (unset) SHELL=/bin/bash