# New Ticket Created by  Dave Rolsky 
# Please include the string:  [perl #36928]
# in the subject line of all future correspondence about this issue. 
# <URL: https://rt.perl.org/rt3/Ticket/Display.html?id=36928 >



This is a bug report for perl from [EMAIL PROTECTED],
generated with the help of perlbug 1.35 running under perl v5.8.7.


-----------------------------------------------------------------
[Please enter your report here]

Check out the following test:

  use strict;
  use warnings;

  use Test::More tests => 6;

  use Encode;

  binmode STDOUT, ':utf8';

  {
      open my $fh, '<:utf8', 'invalid-8bit.txt'
          or die $!;

      my $data = do { local $/; <$fh> };

      like( $data, qr/\x{FFFD}/, 'bad UTF8 was converted to 0xFFFD' );
      ok( Encode::is_utf8($data), 'data is marked as utf8' );
      {
          use bytes;
          unlike( $data, qr/\x{96}/, 'does not contain 0x96' )
      }

      $data =~ /(?:^\.([\w\-]+)\ *\n)((?:.*\n)*?)(?:^\.\1\ *\n|\z)/m;
  }

  {
      open my $fh, 'invalid-8bit.txt'
          or die $!;

      my $data = Encode::decode('utf8', join '', <$fh>);

      like( $data, qr/\x{FFFD}/, 'bad UTF8 was converted to 0xFFFD' );
      ok( Encode::is_utf8($data), 'data is marked as utf8' );
      {
          use bytes;
          unlike( $data, qr/\x{96}/, 'does not contain 0x96' )
      }

      $data =~ /(?:^\.([\w\-]+)\ *\n)((?:.*\n)*?)(?:^\.\1\ *\n|\z)/m;
  }

Given a file with data that is _not_ valid utf8 (attached), the layer
version gives a bunch of warnings, and then you have a string that is
marked as utf8 and appears to have code point 0 instead of the bad
data.

Then it hangs in the regex after warning:

 Malformed UTF-8 character (unexpected non-continuation byte 0x0a,
 immediately after start byte 0xd8) in pattern match (m//) at layer.t
 line 23

The second set of tests gives no warnings, and gives you a string with
0xFFFD instead of the bad data.  The regex does not hang.

So two things:

1. The hang is clearly a bug.

2. The fact that they layer and Encode act differently is really not
well documented, and I'm not sure whether the differences are
intentional, whether there is a bug in the layer code, or what


[Please do not change anything below this line]
-----------------------------------------------------------------
---
Flags:
    category=core
    severity=low
---
Site configuration information for perl v5.8.7:

Configured by Debian Project at Sat Jul  9 12:13:16 EST 2005.

Summary of my perl5 (revision 5 version 8 subversion 7) configuration:
  Platform:
    osname=linux, osvers=2.4.27-ti1211, archname=i486-linux-gnu-thread-multi
    uname='linux kosh 2.4.27-ti1211 #1 sun sep 19 18:17:45 est 2004 i686 
gnulinux '
    config_args='-Dusethreads -Duselargefiles -Dccflags=-DDEBIAN 
-Dcccdlflags=-fPIC -Darchname=i486-linux-gnu -Dprefix=/usr 
-Dprivlib=/usr/share/perl/5.8 -Darchlib=/usr/lib/perl/5.8 -Dvendorprefix=/usr 
-Dvendorlib=/usr/share/perl5 -Dvendorarch=/usr/lib/perl5 
-Dsiteprefix=/usr/local -Dsitelib=/usr/local/share/perl/5.8.7 
-Dsitearch=/usr/local/lib/perl/5.8.7 -Dman1dir=/usr/share/man/man1 
-Dman3dir=/usr/share/man/man3 -Dsiteman1dir=/usr/local/man/man1 
-Dsiteman3dir=/usr/local/man/man3 -Dman1ext=1 -Dman3ext=3perl 
-Dpager=/usr/bin/sensible-pager -Uafs -Ud_csh -Uusesfio -Uusenm -Duseshrplib 
-Dlibperl=libperl.so.5.8.7 -Dd_dosuid -des'
    hint=recommended, useposix=true, d_sigaction=define
    usethreads=define use5005threads=undef useithreads=define 
usemultiplicity=define
    useperlio=define d_sfio=undef uselargefiles=define usesocks=undef
    use64bitint=undef use64bitall=undef uselongdouble=undef
    usemymalloc=n, bincompat5005=undef
  Compiler:
    cc='cc', ccflags ='-D_REENTRANT -D_GNU_SOURCE -DTHREADS_HAVE_PIDS -DDEBIAN 
-fno-strict-aliasing -pipe -I/usr/local/include -D_LARGEFILE_SOURCE 
-D_FILE_OFFSET_BITS=64',
    optimize='-O2',
    cppflags='-D_REENTRANT -D_GNU_SOURCE -DTHREADS_HAVE_PIDS -DDEBIAN 
-fno-strict-aliasing -pipe -I/usr/local/include'
    ccversion='', gccversion='4.0.1 20050701 (prerelease) (Debian 4.0.0-12)', 
gccosandvers=''
    intsize=4, longsize=4, ptrsize=4, doublesize=8, byteorder=1234
    d_longlong=define, longlongsize=8, d_longdbl=define, longdblsize=12
    ivtype='long', ivsize=4, nvtype='double', nvsize=8, Off_t='off_t', 
lseeksize=8
    alignbytes=4, prototype=define
  Linker and Libraries:
    ld='cc', ldflags =' -L/usr/local/lib'
    libpth=/usr/local/lib /lib /usr/lib
    libs=-lgdbm -lgdbm_compat -ldb -ldl -lm -lpthread -lc -lcrypt
    perllibs=-ldl -lm -lpthread -lc -lcrypt
    libc=/lib/libc-2.3.2.so, so=so, useshrplib=true, libperl=libperl.so.5.8.7
    gnulibc_version='2.3.2'
  Dynamic Linking:
    dlsrc=dl_dlopen.xs, dlext=so, d_dlsymun=undef, ccdlflags='-Wl,-E'
    cccdlflags='-fPIC', lddlflags='-shared -L/usr/local/lib'

Locally applied patches:
    

---
@INC for perl v5.8.7:
    /etc/perl
    /usr/local/lib/perl/5.8.7
    /usr/local/share/perl/5.8.7
    /usr/lib/perl5
    /usr/share/perl5
    /usr/lib/perl/5.8
    /usr/share/perl/5.8
    /usr/local/lib/site_perl
    /usr/local/lib/perl/5.8.4
    /usr/local/share/perl/5.8.4
    .

---
Environment for perl v5.8.7:
    HOME=/home/autarch
    LANG (unset)
    LANGUAGE=en_US:en_GB:en
    LD_LIBRARY_PATH (unset)
    LOGDIR (unset)
    
PATH=/usr/local/bin:/usr/bin:/bin:/usr/bin/X11:/usr/games:/home/autarch/bin:/home/autarch/bin
    PERL_BADLANG (unset)
    SHELL=/bin/bash


Reply via email to