In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/3f60a9307162888df8e8e13b2361a3b8380c8744?hp=e1d354993802640ccfdfb4949cc8dc3ae14db4f1>
- Log ----------------------------------------------------------------- commit 3f60a9307162888df8e8e13b2361a3b8380c8744 Author: Aaron Crane <a...@cpan.org> Date: Tue Jul 18 18:06:46 2017 +0100 Import Encode-2.92 from CPAN This also permits removing the local customisation for the previous version. M MANIFEST M Porting/Maintainers.pl M cpan/Encode/Encode.pm M cpan/Encode/Encode.xs M cpan/Encode/Makefile.PL M cpan/Encode/Unicode/Unicode.pm M cpan/Encode/Unicode/Unicode.xs M cpan/Encode/bin/enc2xs M cpan/Encode/bin/ucmlint M cpan/Encode/encoding.pm M cpan/Encode/lib/Encode/Alias.pm M cpan/Encode/lib/Encode/CN/HZ.pm M cpan/Encode/lib/Encode/Encoding.pm M cpan/Encode/lib/Encode/GSM0338.pm M cpan/Encode/lib/Encode/Guess.pm M cpan/Encode/lib/Encode/JP/JIS7.pm M cpan/Encode/lib/Encode/KR/2022_KR.pm M cpan/Encode/lib/Encode/MIME/Header.pm M cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm M cpan/Encode/lib/Encode/Unicode/UTF7.pm M cpan/Encode/t/CJKT.t M cpan/Encode/t/enc_data.t M cpan/Encode/t/enc_eucjp.t M cpan/Encode/t/enc_module.t M cpan/Encode/t/enc_utf8.t M cpan/Encode/t/fallback.t M cpan/Encode/t/guess.t M cpan/Encode/t/jperl.t M cpan/Encode/t/mime-header.t A cpan/Encode/t/truncated_utf8.t A cpan/Encode/t/undef.t A cpan/Encode/t/use-Encode-Alias.t A cpan/Encode/t/utf8messages.t A cpan/Encode/t/whatwg-aliases.json A cpan/Encode/t/whatwg-aliases.t M t/porting/customized.dat commit 589c97f41d373f2e7205a4ffbcb7a639635b7bda Author: Aaron Crane <a...@cpan.org> Date: Tue Jul 18 12:14:09 2017 +0100 Porting/perldelta_template.pod: tiny grammar tweak M Porting/perldelta_template.pod ----------------------------------------------------------------------- Summary of changes: MANIFEST | 6 + Porting/Maintainers.pl | 3 +- Porting/perldelta_template.pod | 16 +- cpan/Encode/Encode.pm | 277 ++++++------- cpan/Encode/Encode.xs | 324 +++++++-------- cpan/Encode/Makefile.PL | 100 ++++- cpan/Encode/Unicode/Unicode.pm | 14 +- cpan/Encode/Unicode/Unicode.xs | 6 +- cpan/Encode/bin/enc2xs | 14 +- cpan/Encode/bin/ucmlint | 7 +- cpan/Encode/encoding.pm | 21 +- cpan/Encode/lib/Encode/Alias.pm | 8 +- cpan/Encode/lib/Encode/CN/HZ.pm | 4 +- cpan/Encode/lib/Encode/Encoding.pm | 24 +- cpan/Encode/lib/Encode/GSM0338.pm | 11 +- cpan/Encode/lib/Encode/Guess.pm | 5 +- cpan/Encode/lib/Encode/JP/JIS7.pm | 7 +- cpan/Encode/lib/Encode/KR/2022_KR.pm | 4 +- cpan/Encode/lib/Encode/MIME/Header.pm | 23 +- cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm | 6 +- cpan/Encode/lib/Encode/Unicode/UTF7.pm | 9 +- cpan/Encode/t/CJKT.t | 6 +- cpan/Encode/t/enc_data.t | 2 +- cpan/Encode/t/enc_eucjp.t | 23 +- cpan/Encode/t/enc_module.t | 2 +- cpan/Encode/t/enc_utf8.t | 23 +- cpan/Encode/t/fallback.t | 44 ++- cpan/Encode/t/guess.t | 6 +- cpan/Encode/t/jperl.t | 2 +- cpan/Encode/t/mime-header.t | 8 +- cpan/Encode/t/truncated_utf8.t | 55 +++ cpan/Encode/t/undef.t | 25 ++ cpan/Encode/t/use-Encode-Alias.t | 8 + cpan/Encode/t/utf8messages.t | 33 ++ cpan/Encode/t/whatwg-aliases.json | 455 ++++++++++++++++++++++ cpan/Encode/t/whatwg-aliases.t | 66 ++++ t/porting/customized.dat | 1 - 37 files changed, 1218 insertions(+), 430 deletions(-) create mode 100644 cpan/Encode/t/truncated_utf8.t create mode 100644 cpan/Encode/t/undef.t create mode 100644 cpan/Encode/t/use-Encode-Alias.t create mode 100644 cpan/Encode/t/utf8messages.t create mode 100644 cpan/Encode/t/whatwg-aliases.json create mode 100644 cpan/Encode/t/whatwg-aliases.t diff --git a/MANIFEST b/MANIFEST index 01cb8b36a1..1cefc5c6b5 100644 --- a/MANIFEST +++ b/MANIFEST @@ -984,11 +984,17 @@ cpan/Encode/t/rt76824.t test script cpan/Encode/t/rt85489.t test script cpan/Encode/t/rt86327.t test script cpan/Encode/t/taint.t +cpan/Encode/t/truncated_utf8.t +cpan/Encode/t/undef.t cpan/Encode/t/unibench.pl benchmark script cpan/Encode/t/Unicode.t test script +cpan/Encode/t/use-Encode-Alias.t +cpan/Encode/t/utf8messages.t cpan/Encode/t/utf8ref.t test script cpan/Encode/t/utf8strict.t test script cpan/Encode/t/utf8warnings.t +cpan/Encode/t/whatwg-aliases.json +cpan/Encode/t/whatwg-aliases.t cpan/Encode/TW/Makefile.PL Encode extension cpan/Encode/TW/TW.pm Encode extension cpan/Encode/ucm/8859-1.ucm Unicode Character Map diff --git a/Porting/Maintainers.pl b/Porting/Maintainers.pl index 7a703b7697..ccde06feab 100755 --- a/Porting/Maintainers.pl +++ b/Porting/Maintainers.pl @@ -386,9 +386,8 @@ use File::Glob qw(:case); }, 'Encode' => { - 'DISTRIBUTION' => 'DANKOGAI/Encode-2.88.tar.gz', + 'DISTRIBUTION' => 'DANKOGAI/Encode-2.92.tar.gz', 'FILES' => q[cpan/Encode], - 'CUSTOMIZED' => [ qw(Unicode/Unicode.pm) ], }, 'encoding::warnings' => { diff --git a/Porting/perldelta_template.pod b/Porting/perldelta_template.pod index de166467b5..c0b6e6f8a1 100644 --- a/Porting/perldelta_template.pod +++ b/Porting/perldelta_template.pod @@ -83,7 +83,7 @@ as an updated module in the L</Modules and Pragmata> section. XXX Changes which enhance performance without changing behaviour go here. There may well be none in a stable release. -[ List each enhancement as a =item entry ] +[ List each enhancement as an =item entry ] =over 4 @@ -101,7 +101,7 @@ following sections using F<Porting/corelist-perldelta.pl>. A paragraph summary for important changes should then be added by hand. In an ideal world, dual-life modules would have a F<Changes> file that could be cribbed. -[ Within each section, list entries as a =item entry ] +[ Within each section, list entries as an =item entry ] =head2 New Modules and Pragmata @@ -240,7 +240,7 @@ go here. Any other changes to the Perl build process should be listed here. However, any platform-specific changes should be listed in the L</Platform Support> section, instead. -[ List changes as a =item entry ]. +[ List changes as an =item entry ]. =over 4 @@ -269,7 +269,7 @@ Tests were added and changed to reflect the other additions and changes in this release. Furthermore, these significant changes were made: -[ List each test improvement as a =item entry ] +[ List each test improvement as an =item entry ] =over 4 @@ -283,7 +283,7 @@ XXX XXX Any changes to platform support should be listed in the sections below. -[ Within the sections, list each platform as a =item entry with specific +[ Within the sections, list each platform as an =item entry with specific changes as paragraphs below it. ] =head2 New Platforms @@ -334,7 +334,7 @@ XXX Changes which affect the interface available to C<XS> code go here. Other significant internal changes for future core maintainers should be noted as well. -[ List each change as a =item entry ] +[ List each change as an =item entry ] =over 4 @@ -349,7 +349,7 @@ XXX XXX Important bug fixes in the core language are summarized here. Bug fixes in files in F<ext/> and F<lib/> are best summarized in L</Modules and Pragmata>. -[ List each fix as a =item entry ] +[ List each fix as an =item entry ] =over 4 @@ -365,7 +365,7 @@ XXX Descriptions of platform agnostic bugs we know we can't fix go here. Any tests that had to be C<TODO>ed for the release would be noted here. Unfixed platform specific bugs also go here. -[ List each fix as a =item entry ] +[ List each fix as an =item entry ] =over 4 diff --git a/cpan/Encode/Encode.pm b/cpan/Encode/Encode.pm index 57b4292279..5a27c5990c 100644 --- a/cpan/Encode/Encode.pm +++ b/cpan/Encode/Encode.pm @@ -1,16 +1,21 @@ # -# $Id: Encode.pm,v 2.88 2016/11/29 23:30:30 dankogai Exp dankogai $ +# $Id: Encode.pm,v 2.92 2017/07/18 07:15:29 dankogai Exp dankogai $ # package Encode; use strict; use warnings; -our $VERSION = sprintf "%d.%02d", q$Revision: 2.88 $ =~ /(\d+)/g; use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; -use XSLoader (); -XSLoader::load( __PACKAGE__, $VERSION ); +our $VERSION; +BEGIN { + $VERSION = sprintf "%d.%02d", q$Revision: 2.92 $ =~ /(\d+)/g; + require XSLoader; + XSLoader::load( __PACKAGE__, $VERSION ); +} use Exporter 5.57 'import'; +our @CARP_NOT = qw(Encode::Encoder); + # Public, encouraged API is exported by default our @EXPORT = qw( @@ -44,7 +49,10 @@ our %EXPORT_TAGS = ( our $ON_EBCDIC = ( ord("A") == 193 ); -use Encode::Alias; +use Encode::Alias (); +use Encode::MIME::Name; + +use Storable; # Make a %Encoding package variable to allow a certain amount of cheating our %Encoding; @@ -96,6 +104,9 @@ sub define_encoding { my $alias = shift; define_alias( $alias, $obj ); } + my $class = ref($obj); + push @Encode::CARP_NOT, $class unless grep { $_ eq $class } @Encode::CARP_NOT; + push @Encode::Encoding::CARP_NOT, $class unless grep { $_ eq $class } @Encode::Encoding::CARP_NOT; return $obj; } @@ -127,6 +138,15 @@ sub getEncoding { return; } +# HACK: These two functions must be defined in Encode and because of +# cyclic dependency between Encode and Encode::Alias, Exporter does not work +sub find_alias { + goto &Encode::Alias::find_alias; +} +sub define_alias { + goto &Encode::Alias::define_alias; +} + sub find_encoding($;$) { my ( $name, $skip_external ) = @_; return __PACKAGE__->getEncoding( $name, $skip_external ); @@ -134,8 +154,6 @@ sub find_encoding($;$) { sub find_mime_encoding($;$) { my ( $mime_name, $skip_external ) = @_; - eval { require Encode::MIME::Name; }; - $@ and return; my $name = Encode::MIME::Name::get_encode_name( $mime_name ); return find_encoding( $name, $skip_external ); } @@ -149,8 +167,6 @@ sub resolve_alias($) { sub clone_encoding($) { my $obj = find_encoding(shift); ref $obj or return; - eval { require Storable }; - $@ and return; return Storable::dclone($obj); } @@ -182,7 +198,7 @@ sub encode($$;$) { else { $octets = $enc->encode( $string, $check ); } - $_[1] = $string if $check and !ref $check and !( $check & LEAVE_SRC() ); + $_[1] = $string if $check and !ref $check and !( $check & LEAVE_SRC ); return $octets; } *str2bytes = \&encode; @@ -211,7 +227,7 @@ sub decode($$;$) { else { $string = $enc->decode( $octets, $check ); } - $_[1] = $octets if $check and !ref $check and !( $check & LEAVE_SRC() ); + $_[1] = $octets if $check and !ref $check and !( $check & LEAVE_SRC ); return $string; } *bytes2str = \&decode; @@ -278,133 +294,87 @@ sub decode_utf8($;$) { $check ||= 0; $utf8enc ||= find_encoding('utf8'); my $string = $utf8enc->decode( $octets, $check ); - $_[0] = $octets if $check and !ref $check and !( $check & LEAVE_SRC() ); + $_[0] = $octets if $check and !ref $check and !( $check & LEAVE_SRC ); return $string; } -# sub decode_utf8($;$) { -# my ( $str, $check ) = @_; -# return $str if is_utf8($str); -# if ($check) { -# return decode( "utf8", $str, $check ); -# } -# else { -# return decode( "utf8", $str ); -# return $str; -# } -# } - -predefine_encodings(1); - -# -# This is to restore %Encoding if really needed; -# - -sub predefine_encodings { - require Encode::Encoding; - no warnings 'redefine'; - my $use_xs = shift; - if ($ON_EBCDIC) { - - # was in Encode::UTF_EBCDIC - package Encode::UTF_EBCDIC; - push @Encode::UTF_EBCDIC::ISA, 'Encode::Encoding'; - *decode = sub { - my ( undef, $str, $chk ) = @_; - my $res = ''; - for ( my $i = 0 ; $i < length($str) ; $i++ ) { - $res .= - chr( - utf8::unicode_to_native( ord( substr( $str, $i, 1 ) ) ) - ); - } - $_[1] = '' if $chk; - return $res; - }; - *encode = sub { - my ( undef, $str, $chk ) = @_; - my $res = ''; - for ( my $i = 0 ; $i < length($str) ; $i++ ) { - $res .= - chr( - utf8::native_to_unicode( ord( substr( $str, $i, 1 ) ) ) - ); - } - $_[1] = '' if $chk; - return $res; - }; - $Encode::Encoding{Unicode} = - bless { Name => "UTF_EBCDIC" } => "Encode::UTF_EBCDIC"; +onBOOT; + +if ($ON_EBCDIC) { + package Encode::UTF_EBCDIC; + use parent 'Encode::Encoding'; + my $obj = bless { Name => "UTF_EBCDIC" } => "Encode::UTF_EBCDIC"; + Encode::define_encoding($obj, 'Unicode'); + sub decode { + my ( undef, $str, $chk ) = @_; + my $res = ''; + for ( my $i = 0 ; $i < length($str) ; $i++ ) { + $res .= + chr( + utf8::unicode_to_native( ord( substr( $str, $i, 1 ) ) ) + ); + } + $_[1] = '' if $chk; + return $res; } - else { - - package Encode::Internal; - push @Encode::Internal::ISA, 'Encode::Encoding'; - *decode = sub { - my ( undef, $str, $chk ) = @_; - utf8::upgrade($str); - $_[1] = '' if $chk; - return $str; - }; - *encode = \&decode; - $Encode::Encoding{Unicode} = - bless { Name => "Internal" } => "Encode::Internal"; + sub encode { + my ( undef, $str, $chk ) = @_; + my $res = ''; + for ( my $i = 0 ; $i < length($str) ; $i++ ) { + $res .= + chr( + utf8::native_to_unicode( ord( substr( $str, $i, 1 ) ) ) + ); + } + $_[1] = '' if $chk; + return $res; } - { - # https://rt.cpan.org/Public/Bug/Display.html?id=103253 - package Encode::XS; - push @Encode::XS::ISA, 'Encode::Encoding'; +} else { + package Encode::Internal; + use parent 'Encode::Encoding'; + my $obj = bless { Name => "Internal" } => "Encode::Internal"; + Encode::define_encoding($obj, 'Unicode'); + sub decode { + my ( undef, $str, $chk ) = @_; + utf8::upgrade($str); + $_[1] = '' if $chk; + return $str; } - { + *encode = \&decode; +} - # was in Encode::utf8 - package Encode::utf8; - push @Encode::utf8::ISA, 'Encode::Encoding'; +{ + # https://rt.cpan.org/Public/Bug/Display.html?id=103253 + package Encode::XS; + use parent 'Encode::Encoding'; +} - # - if ($use_xs) { - Encode::DEBUG and warn __PACKAGE__, " XS on"; - *decode = \&decode_xs; - *encode = \&encode_xs; - } - else { - Encode::DEBUG and warn __PACKAGE__, " XS off"; - *decode = sub { - my ( undef, $octets, $chk ) = @_; - my $str = Encode::decode_utf8($octets); - if ( defined $str ) { - $_[1] = '' if $chk; - return $str; - } - return undef; - }; - *encode = sub { - my ( undef, $string, $chk ) = @_; - my $octets = Encode::encode_utf8($string); - $_[1] = '' if $chk; - return $octets; - }; +{ + package Encode::utf8; + use parent 'Encode::Encoding'; + my %obj = ( + 'utf8' => { Name => 'utf8' }, + 'utf-8-strict' => { Name => 'utf-8-strict', strict_utf8 => 1 } + ); + for ( keys %obj ) { + bless $obj{$_} => __PACKAGE__; + Encode::define_encoding( $obj{$_} => $_ ); + } + sub cat_decode { + # ($obj, $dst, $src, $pos, $trm, $chk) + # currently ignores $chk + my ( undef, undef, undef, $pos, $trm ) = @_; + my ( $rdst, $rsrc, $rpos ) = \@_[ 1, 2, 3 ]; + use bytes; + if ( ( my $npos = index( $$rsrc, $trm, $pos ) ) >= 0 ) { + $$rdst .= + substr( $$rsrc, $pos, $npos - $pos + length($trm) ); + $$rpos = $npos + length($trm); + return 1; } - *cat_decode = sub { # ($obj, $dst, $src, $pos, $trm, $chk) - # currently ignores $chk - my ( undef, undef, undef, $pos, $trm ) = @_; - my ( $rdst, $rsrc, $rpos ) = \@_[ 1, 2, 3 ]; - use bytes; - if ( ( my $npos = index( $$rsrc, $trm, $pos ) ) >= 0 ) { - $$rdst .= - substr( $$rsrc, $pos, $npos - $pos + length($trm) ); - $$rpos = $npos + length($trm); - return 1; - } - $$rdst .= substr( $$rsrc, $pos ); - $$rpos = length($$rsrc); - return ''; - }; - $Encode::Encoding{utf8} = - bless { Name => "utf8" } => "Encode::utf8"; - $Encode::Encoding{"utf-8-strict"} = - bless { Name => "utf-8-strict", strict_utf8 => 1 } - => "Encode::utf8"; + $$rdst .= substr( $$rsrc, $pos ); + $$rpos = length($$rsrc); + return ''; } } @@ -516,14 +486,16 @@ ISO-8859-1, also known as Latin1: $octets = encode("iso-8859-1", $string); -B<CAVEAT>: When you run C<$octets = encode("utf8", $string)>, then +B<CAVEAT>: When you run C<$octets = encode("UTF-8", $string)>, then $octets I<might not be equal to> $string. Though both contain the same data, the UTF8 flag for $octets is I<always> off. When you encode anything, the UTF8 flag on the result is always off, even when it -contains a completely valid utf8 string. See L</"The UTF8 flag"> below. +contains a completely valid UTF-8 string. See L</"The UTF8 flag"> below. If the $string is C<undef>, then C<undef> is returned. +C<str2bytes> may be used as an alias for C<encode>. + =head3 decode $string = decode(ENCODING, OCTETS[, CHECK]) @@ -544,13 +516,15 @@ internal format: $string = decode("iso-8859-1", $octets); -B<CAVEAT>: When you run C<$string = decode("utf8", $octets)>, then $string +B<CAVEAT>: When you run C<$string = decode("UTF-8", $octets)>, then $string I<might not be equal to> $octets. Though both contain the same data, the UTF8 flag for $string is on. See L</"The UTF8 flag"> below. If the $string is C<undef>, then C<undef> is returned. +C<bytes2str> may be used as an alias for C<decode>. + =head3 find_encoding [$obj =] find_encoding(ENCODING) @@ -559,11 +533,11 @@ Returns the I<encoding object> corresponding to I<ENCODING>. Returns C<undef> if no matching I<ENCODING> is find. The returned object is what does the actual encoding or decoding. - $utf8 = decode($name, $bytes); + $string = decode($name, $bytes); is in fact - $utf8 = do { + $string = do { $obj = find_encoding($name); croak qq(encoding "$name" not found) unless ref $obj; $obj->decode($bytes); @@ -575,8 +549,8 @@ You can therefore save time by reusing this object as follows; my $enc = find_encoding("iso-8859-1"); while(<>) { - my $utf8 = $enc->decode($_); - ... # now do something with $utf8; + my $string = $enc->decode($_); + ... # now do something with $string; } Besides L</decode> and L</encode>, other methods are @@ -624,13 +598,13 @@ and C<undef> on error. B<CAVEAT>: The following operations may look the same, but are not: - from_to($data, "iso-8859-1", "utf8"); #1 + from_to($data, "iso-8859-1", "UTF-8"); #1 $data = decode("iso-8859-1", $data); #2 Both #1 and #2 make $data consist of a completely valid UTF-8 string, but only #2 turns the UTF8 flag on. #1 is equivalent to: - $data = encode("utf8", decode("iso-8859-1", $data)); + $data = encode("UTF-8", decode("iso-8859-1", $data)); See L</"The UTF8 flag"> below. @@ -655,7 +629,11 @@ followed by C<encode> as follows: Equivalent to C<$octets = encode("utf8", $string)>. The characters in $string are encoded in Perl's internal format, and the result is returned as a sequence of octets. Because all possible characters in Perl have a -(loose, not strict) UTF-8 representation, this function cannot fail. +(loose, not strict) utf8 representation, this function cannot fail. + +B<WARNING>: do not use this function for data exchange as it can produce +not strict utf8 $octets! For strictly valid UTF-8 output use +C<$octets = encode("UTF-8", $string)>. =head3 decode_utf8 @@ -663,11 +641,15 @@ as a sequence of octets. Because all possible characters in Perl have a Equivalent to C<$string = decode("utf8", $octets [, CHECK])>. The sequence of octets represented by $octets is decoded -from UTF-8 into a sequence of logical characters. -Because not all sequences of octets are valid UTF-8, +from (loose, not strict) utf8 into a sequence of logical characters. +Because not all sequences of octets are valid not strict utf8, it is quite possible for this function to fail. For CHECK, see L</"Handling Malformed Data">. +B<WARNING>: do not use this function for data exchange as it can produce +$string with not strict utf8 representation! For strictly valid UTF-8 +$string representation use C<$string = decode("UTF-8", $octets [, CHECK])>. + B<CAVEAT>: the input I<$octets> might be modified in-place depending on what is set in CHECK. See L</LEAVE_SRC> if you want your inputs to be left unchanged. @@ -903,15 +885,14 @@ octets that represent the fallback character. For instance: Acts like C<FB_PERLQQ> but U+I<XXXX> is used instead of C<\x{I<XXXX>}>. -Even the fallback for C<decode> must return octets, which are -then decoded with the character encoding that C<decode> accepts. So for +Fallback for C<decode> must return decoded string (sequence of characters) +and takes a list of ordinal values as its arguments. So for example if you wish to decode octets as UTF-8, and use ISO-8859-15 as a fallback for bytes that are not valid UTF-8, you could write $str = decode 'UTF-8', $octets, sub { - my $tmp = chr shift; - from_to $tmp, 'ISO-8859-15', 'UTF-8'; - return $tmp; + my $tmp = join '', map chr, @_; + return decode 'ISO-8859-15', $tmp; }; =head1 Defining Encodings @@ -980,9 +961,9 @@ When you I<encode>, the resulting UTF8 flag is always B<off>. When you I<decode>, the resulting UTF8 flag is B<on>--I<unless> you can unambiguously represent data. Here is what we mean by "unambiguously". -After C<$utf8 = decode("foo", $octet)>, +After C<$str = decode("foo", $octet)>, - When $octet is... The UTF8 flag in $utf8 is + When $octet is... The UTF8 flag in $str is --------------------------------------------- In ASCII only (or EBCDIC only) OFF In ISO-8859-1 ON diff --git a/cpan/Encode/Encode.xs b/cpan/Encode/Encode.xs index b5160d2516..6c077bec3a 100644 --- a/cpan/Encode/Encode.xs +++ b/cpan/Encode/Encode.xs @@ -1,5 +1,5 @@ /* - $Id: Encode.xs,v 2.39 2016/11/29 23:29:23 dankogai Exp dankogai $ + $Id: Encode.xs,v 2.41 2017/06/10 17:23:50 dankogai Exp $ */ #define PERL_NO_GET_CONTEXT @@ -35,17 +35,6 @@ UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) #define SvIV_nomg SvIV #endif -#ifdef UTF8_DISALLOW_ILLEGAL_INTERCHANGE -# define UTF8_ALLOW_STRICT UTF8_DISALLOW_ILLEGAL_INTERCHANGE -#else -# define UTF8_ALLOW_STRICT 0 -#endif - -#define UTF8_ALLOW_NONSTRICT (UTF8_ALLOW_ANY & \ - ~(UTF8_ALLOW_CONTINUATION | \ - UTF8_ALLOW_NON_CONTINUATION | \ - UTF8_ALLOW_LONG)) - static void Encode_XSEncoding(pTHX_ encode_t * enc) { @@ -114,24 +103,52 @@ utf8_safe_upgrade(pTHX_ SV ** src, U8 ** s, STRLEN * slen, bool modify) #define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s" #define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode" +#define ERR_DECODE_STR_NOMAP "%s \"%s\" does not map to Unicode" static SV * do_fallback_cb(pTHX_ UV ch, SV *fallback_cb) { dSP; int argc; - SV *retval = newSVpv("",0); + SV *retval; ENTER; SAVETMPS; PUSHMARK(sp); - XPUSHs(sv_2mortal(newSVnv((UV)ch))); + XPUSHs(sv_2mortal(newSVuv(ch))); PUTBACK; argc = call_sv(fallback_cb, G_SCALAR); SPAGAIN; if (argc != 1){ croak("fallback sub must return scalar!"); } - sv_catsv(retval, POPs); + retval = POPs; + SvREFCNT_inc(retval); + PUTBACK; + FREETMPS; + LEAVE; + return retval; +} + +static SV * +do_bytes_fallback_cb(pTHX_ U8 *s, STRLEN slen, SV *fallback_cb) +{ + dSP; + int argc; + STRLEN i; + SV *retval; + ENTER; + SAVETMPS; + PUSHMARK(sp); + for (i=0; i<slen; ++i) + XPUSHs(sv_2mortal(newSVuv(s[i]))); + PUTBACK; + argc = call_sv(fallback_cb, G_SCALAR); + SPAGAIN; + if (argc != 1){ + croak("fallback sub must return scalar!"); + } + retval = POPs; + SvREFCNT_inc(retval); PUTBACK; FREETMPS; LEAVE; @@ -241,16 +258,22 @@ encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, U8 * goto ENCODE_SET_SRC; } if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ + STRLEN sublen; + char *substr; SV* subchar = (fallback_cb != &PL_sv_undef) ? do_fallback_cb(aTHX_ ch, fallback_cb) : newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04" UVxf "}" : check & ENCODE_HTMLCREF ? "&#%" UVuf ";" : "&#x%" UVxf ";", (UV)ch); - SvUTF8_off(subchar); /* make sure no decoded string gets in */ + substr = SvPV(subchar, sublen); + if (SvUTF8(subchar) && sublen && !utf8_to_bytes((U8 *)substr, &sublen)) { /* make sure no decoded string gets in */ + SvREFCNT_dec(subchar); + croak("Wide character"); + } sdone += slen + clen; - ddone += dlen + SvCUR(subchar); - sv_catsv(dst, subchar); + ddone += dlen + sublen; + sv_catpvn(dst, substr, sublen); SvREFCNT_dec(subchar); } else { /* fallback char */ @@ -277,18 +300,21 @@ encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, U8 * } if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ + STRLEN sublen; + char *substr; SV* subchar = (fallback_cb != &PL_sv_undef) ? do_fallback_cb(aTHX_ (UV)s[slen], fallback_cb) : newSVpvf("\\x%02" UVXf, (UV)s[slen]); + substr = SvPVutf8(subchar, sublen); sdone += slen + 1; - ddone += dlen + SvCUR(subchar); - sv_catsv(dst, subchar); + ddone += dlen + sublen; + sv_catpvn(dst, substr, sublen); SvREFCNT_dec(subchar); } else { sdone += slen + 1; ddone += dlen + strlen(FBCHAR_UTF8); - sv_catpv(dst, FBCHAR_UTF8); + sv_catpvn(dst, FBCHAR_UTF8, strlen(FBCHAR_UTF8)); } } /* settle variables when fallback */ @@ -382,7 +408,7 @@ convert_utf8_multi_seq(U8* s, STRLEN len, STRLEN *rlen) U8 *ptr = s; bool overflowed = 0; - uv = NATIVE_TO_UTF(*s) & UTF_START_MASK(len); + uv = NATIVE_TO_UTF(*s) & UTF_START_MASK(UTF8SKIP(s)); len--; s++; @@ -401,7 +427,6 @@ convert_utf8_multi_seq(U8* s, STRLEN len, STRLEN *rlen) *rlen = s-ptr; if (overflowed || *rlen > (STRLEN)UNISKIP(uv)) { - *rlen = 1; return 0; } @@ -418,6 +443,8 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv, int check; U8 *d; STRLEN dlen; + char esc[UTF8_MAXLEN * 6 + 1]; + STRLEN i; if (SvROK(check_sv)) { /* croak("UTF-8 decoder doesn't support callback CHECK"); */ @@ -441,22 +468,24 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv, continue; } + uv = 0; ulen = 1; - if (UTF8_IS_START(*s)) { + if (! UTF8_IS_CONTINUATION(*s)) { + /* Not an invariant nor a continuation; must be a start byte. (We + * can't test for UTF8_IS_START as that excludes things like \xC0 + * which are start bytes, but always lead to overlongs */ + U8 skip = UTF8SKIP(s); if ((s + skip) > e) { - if (stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL)) { - const U8 *p = s + 1; - for (; p < e; p++) { - if (!UTF8_IS_CONTINUATION(*p)) { - ulen = p-s; - goto malformed_byte; - } - } + /* just calculate ulen, in pathological cases can be smaller then e-s */ + if (e-s >= 2) + convert_utf8_multi_seq(s, e-s, &ulen); + else + ulen = 1; + + if ((stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL)) && ulen == (STRLEN)(e-s)) break; - } - ulen = e-s; goto malformed_byte; } @@ -475,44 +504,67 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv, } /* If we get here there is something wrong with alleged UTF-8 */ + /* uv is used only when encoding */ malformed_byte: - uv = (UV)*s; - if (ulen == 0) + if (uv == 0) + uv = (UV)*s; + if (encode || ulen == 0) ulen = 1; malformed: + if (!encode && (check & (ENCODE_DIE_ON_ERR|ENCODE_WARN_ON_ERR|ENCODE_PERLQQ))) + for (i=0; i<ulen; ++i) sprintf(esc+4*i, "\\x%02X", s[i]); if (check & ENCODE_DIE_ON_ERR){ if (encode) - Perl_croak(aTHX_ ERR_ENCODE_NOMAP, uv, "utf8"); + Perl_croak(aTHX_ ERR_ENCODE_NOMAP, uv, (strict ? "UTF-8" : "utf8")); else - Perl_croak(aTHX_ ERR_DECODE_NOMAP, "utf8", uv); + Perl_croak(aTHX_ ERR_DECODE_STR_NOMAP, (strict ? "UTF-8" : "utf8"), esc); } if (check & ENCODE_WARN_ON_ERR){ if (encode) Perl_warner(aTHX_ packWARN(WARN_UTF8), - ERR_ENCODE_NOMAP, uv, "utf8"); + ERR_ENCODE_NOMAP, uv, (strict ? "UTF-8" : "utf8")); else Perl_warner(aTHX_ packWARN(WARN_UTF8), - ERR_DECODE_NOMAP, "utf8", uv); + ERR_DECODE_STR_NOMAP, (strict ? "UTF-8" : "utf8"), esc); } if (check & ENCODE_RETURN_ON_ERR) { break; } if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ - SV* subchar = - (fallback_cb != &PL_sv_undef) - ? do_fallback_cb(aTHX_ uv, fallback_cb) - : newSVpvf(check & ENCODE_PERLQQ - ? (ulen == 1 ? "\\x%02" UVXf : "\\x{%04" UVXf "}") - : check & ENCODE_HTMLCREF ? "&#%" UVuf ";" - : "&#x%" UVxf ";", uv); - if (encode){ - SvUTF8_off(subchar); /* make sure no decoded string gets in */ - } - dlen += SvCUR(subchar) - ulen; + STRLEN sublen; + char *substr; + SV* subchar; + if (encode) { + subchar = + (fallback_cb != &PL_sv_undef) + ? do_fallback_cb(aTHX_ uv, fallback_cb) + : newSVpvf(check & ENCODE_PERLQQ + ? (ulen == 1 ? "\\x%02" UVXf : "\\x{%04" UVXf "}") + : check & ENCODE_HTMLCREF ? "&#%" UVuf ";" + : "&#x%" UVxf ";", uv); + substr = SvPV(subchar, sublen); + if (SvUTF8(subchar) && sublen && !utf8_to_bytes((U8 *)substr, &sublen)) { /* make sure no decoded string gets in */ + SvREFCNT_dec(subchar); + croak("Wide character"); + } + } else { + if (fallback_cb != &PL_sv_undef) { + /* in decode mode we have sequence of wrong bytes */ + subchar = do_bytes_fallback_cb(aTHX_ s, ulen, fallback_cb); + } else { + char *ptr = esc; + /* ENCODE_PERLQQ is already stored in esc */ + if (check & (ENCODE_HTMLCREF|ENCODE_XMLCREF)) + for (i=0; i<ulen; ++i) ptr += sprintf(ptr, ((check & ENCODE_HTMLCREF) ? "&#%u;" : "&#x%02X;"), s[i]); + subchar = newSVpvn(esc, strlen(esc)); + } + substr = SvPVutf8(subchar, sublen); + } + dlen += sublen - ulen; SvCUR_set(dst, d-(U8 *)SvPVX(dst)); *SvEND(dst) = '\0'; - sv_catsv(dst, subchar); + sv_catpvn(dst, substr, sublen); SvREFCNT_dec(subchar); d = (U8 *) SvGROW(dst, dlen) + SvCUR(dst); } else { @@ -539,7 +591,7 @@ MODULE = Encode PACKAGE = Encode::utf8 PREFIX = Method_ PROTOTYPES: DISABLE void -Method_decode_xs(obj,src,check_sv = &PL_sv_no) +Method_decode(obj,src,check_sv = &PL_sv_no) SV * obj SV * src SV * check_sv @@ -551,14 +603,13 @@ PREINIT: bool renewed = 0; int check; bool modify; + dSP; INIT: SvGETMAGIC(src); SvGETMAGIC(check_sv); check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); modify = (check && !(check & ENCODE_LEAVE_SRC)); -CODE: -{ - dSP; +PPCODE: if (!SvOK(src)) XSRETURN_UNDEF; s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen); @@ -600,10 +651,9 @@ CODE: if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */ ST(0) = dst; XSRETURN(1); -} void -Method_encode_xs(obj,src,check_sv = &PL_sv_no) +Method_encode(obj,src,check_sv = &PL_sv_no) SV * obj SV * src SV * check_sv @@ -619,8 +669,7 @@ INIT: SvGETMAGIC(check_sv); check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); modify = (check && !(check & ENCODE_LEAVE_SRC)); -CODE: -{ +PPCODE: if (!SvOK(src)) XSRETURN_UNDEF; s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen); @@ -673,20 +722,19 @@ CODE: if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */ ST(0) = dst; XSRETURN(1); -} MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ -PROTOTYPES: ENABLE +PROTOTYPES: DISABLE -void +SV * Method_renew(obj) SV * obj CODE: -{ PERL_UNUSED_VAR(obj); - XSRETURN(1); -} + RETVAL = newSVsv(obj); +OUTPUT: + RETVAL int Method_renewed(obj) @@ -697,17 +745,19 @@ CODE: OUTPUT: RETVAL -void +SV * Method_name(obj) SV * obj +PREINIT: + encode_t *enc; +INIT: + enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); CODE: -{ - encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); - ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0]))); - XSRETURN(1); -} + RETVAL = newSVpvn(enc->name[0], strlen(enc->name[0])); +OUTPUT: + RETVAL -void +bool Method_cat_decode(obj, dst, src, off, term, check_sv = &PL_sv_no) SV * obj SV * dst @@ -734,7 +784,6 @@ INIT: enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); offset = (STRLEN)SvIV(off); CODE: -{ if (!SvOK(src)) XSRETURN_NO; s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen); @@ -745,13 +794,9 @@ CODE: sv_catsv(dst, tmp); SvREFCNT_dec(tmp); SvIV_set(off, (IV)offset); - if (code == ENCODE_FOUND_TERM) { - ST(0) = &PL_sv_yes; - }else{ - ST(0) = &PL_sv_no; - } - XSRETURN(1); -} + RETVAL = (code == ENCODE_FOUND_TERM); +OUTPUT: + RETVAL SV * Method_decode(obj,src,check_sv = &PL_sv_no) @@ -773,7 +818,6 @@ INIT: modify = (check && !(check & ENCODE_LEAVE_SRC)); enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); CODE: -{ if (!SvOK(src)) XSRETURN_UNDEF; s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen); @@ -782,7 +826,6 @@ CODE: RETVAL = encode_method(aTHX_ enc, enc->t_utf8, src, s, slen, check, NULL, Nullsv, NULL, fallback_cb); SvUTF8_on(RETVAL); -} OUTPUT: RETVAL @@ -806,7 +849,6 @@ INIT: modify = (check && !(check & ENCODE_LEAVE_SRC)); enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); CODE: -{ if (!SvOK(src)) XSRETURN_UNDEF; s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen); @@ -814,76 +856,51 @@ CODE: utf8_safe_upgrade(aTHX_ &src, &s, &slen, modify); RETVAL = encode_method(aTHX_ enc, enc->f_utf8, src, s, slen, check, NULL, Nullsv, NULL, fallback_cb); -} OUTPUT: RETVAL -void +bool Method_needs_lines(obj) SV * obj CODE: -{ - /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ PERL_UNUSED_VAR(obj); - ST(0) = &PL_sv_no; - XSRETURN(1); -} + RETVAL = FALSE; +OUTPUT: + RETVAL -void +bool Method_perlio_ok(obj) SV * obj PREINIT: SV *sv; CODE: -{ - /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ - /* require_pv(PERLIO_FILENAME); */ - PERL_UNUSED_VAR(obj); - eval_pv("require PerlIO::encoding", 0); - SPAGAIN; - - sv = get_sv("@", 0); - if (SvTRUE(sv)) { - ST(0) = &PL_sv_no; - }else{ - ST(0) = &PL_sv_yes; - } - XSRETURN(1); -} + sv = eval_pv("require PerlIO::encoding", 0); + RETVAL = SvTRUE(sv); +OUTPUT: + RETVAL -void +SV * Method_mime_name(obj) SV * obj PREINIT: - SV *sv; + encode_t *enc; +INIT: + enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); CODE: -{ - encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); - SV *retval; - eval_pv("require Encode::MIME::Name", 0); + ENTER; + SAVETMPS; + PUSHMARK(sp); + XPUSHs(sv_2mortal(newSVpvn(enc->name[0], strlen(enc->name[0])))); + PUTBACK; + call_pv("Encode::MIME::Name::get_mime_name", G_SCALAR); SPAGAIN; - - sv = get_sv("@", 0); - if (SvTRUE(sv)) { - ST(0) = &PL_sv_undef; - }else{ - ENTER; - SAVETMPS; - PUSHMARK(sp); - XPUSHs(sv_2mortal(newSVpvn(enc->name[0], strlen(enc->name[0])))); - PUTBACK; - call_pv("Encode::MIME::Name::get_mime_name", G_SCALAR); - SPAGAIN; - retval = newSVsv(POPs); - PUTBACK; - FREETMPS; - LEAVE; - /* enc->name[0] */ - ST(0) = retval; - } - XSRETURN(1); -} + RETVAL = newSVsv(POPs); + PUTBACK; + FREETMPS; + LEAVE; +OUTPUT: + RETVAL MODULE = Encode PACKAGE = Encode @@ -892,10 +909,11 @@ PROTOTYPES: ENABLE I32 _bytes_to_utf8(sv, ...) SV * sv +PREINIT: + SV * encoding; +INIT: + encoding = items == 2 ? ST(1) : Nullsv; CODE: -{ - SV * encoding = items == 2 ? ST(1) : Nullsv; - if (encoding) RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); else { @@ -909,18 +927,19 @@ CODE: Safefree(converted); /* ... so free it */ RETVAL = len; } -} OUTPUT: RETVAL I32 _utf8_to_bytes(sv, ...) SV * sv +PREINIT: + SV * to; + SV * check; +INIT: + to = items > 1 ? ST(1) : Nullsv; + check = items > 2 ? ST(2) : Nullsv; CODE: -{ - SV * to = items > 1 ? ST(1) : Nullsv; - SV * check = items > 2 ? ST(2) : Nullsv; - if (to) { RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); } else { @@ -980,7 +999,6 @@ CODE: RETVAL = (utf8_to_bytes(s, &len) ? len : 0); } } -} OUTPUT: RETVAL @@ -992,13 +1010,11 @@ PREINIT: char *str; STRLEN len; CODE: -{ SvGETMAGIC(sv); /* SvGETMAGIC() can modify SvOK flag */ str = SvOK(sv) ? SvPV_nomg(sv, len) : NULL; /* SvPV() can modify SvUTF8 flag */ RETVAL = SvUTF8(sv) ? TRUE : FALSE; if (RETVAL && check && (!str || !is_utf8_string((U8 *)str, len))) RETVAL = FALSE; -} OUTPUT: RETVAL @@ -1006,17 +1022,15 @@ SV * _utf8_on(sv) SV * sv CODE: -{ SvGETMAGIC(sv); if (!SvTAINTED(sv) && SvPOKp(sv)) { if (SvTHINKFIRST(sv)) sv_force_normal(sv); - RETVAL = newSViv(SvUTF8(sv)); + RETVAL = boolSV(SvUTF8(sv)); SvUTF8_on(sv); SvSETMAGIC(sv); } else { RETVAL = &PL_sv_undef; } -} OUTPUT: RETVAL @@ -1024,20 +1038,25 @@ SV * _utf8_off(sv) SV * sv CODE: -{ SvGETMAGIC(sv); if (!SvTAINTED(sv) && SvPOKp(sv)) { if (SvTHINKFIRST(sv)) sv_force_normal(sv); - RETVAL = newSViv(SvUTF8(sv)); + RETVAL = boolSV(SvUTF8(sv)); SvUTF8_off(sv); SvSETMAGIC(sv); } else { RETVAL = &PL_sv_undef; } -} OUTPUT: RETVAL +void +onBOOT() +CODE: +{ +#include "def_t.exh" +} + BOOT: { HV *stash = gv_stashpvn("Encode", strlen("Encode"), GV_ADD); @@ -1057,6 +1076,3 @@ BOOT: newCONSTSUB(stash, "FB_HTMLCREF", newSViv(ENCODE_FB_HTMLCREF)); newCONSTSUB(stash, "FB_XMLCREF", newSViv(ENCODE_FB_XMLCREF)); } -{ -#include "def_t.exh" -} diff --git a/cpan/Encode/Makefile.PL b/cpan/Encode/Makefile.PL index 8203105247..8b801443d8 100644 --- a/cpan/Encode/Makefile.PL +++ b/cpan/Encode/Makefile.PL @@ -1,9 +1,10 @@ # -# $Id: Makefile.PL,v 2.18 2016/11/29 23:29:23 dankogai Exp dankogai $ +# $Id: Makefile.PL,v 2.21 2017/07/18 07:15:29 dankogai Exp dankogai $ # use 5.007003; use strict; use warnings; +use utf8; use ExtUtils::MakeMaker; use File::Spec; use Config; @@ -15,9 +16,12 @@ $ENV{PERL_CORE} ||= $ARGV{PERL_CORE} if $ARGV{PERL_CORE}; # similar strictness as in core my $ccflags = $Config{ccflags}; if (!$ENV{PERL_CORE}) { - if ($Config{gccversion}) { - $ccflags .= ' -Werror=declaration-after-statement'; - $ccflags .= ' -Wpointer-sign' unless $Config{d_cplusplus}; + if (my $gccver = $Config{gccversion}) { + $gccver =~ s/\.//g; $gccver =~ s/ .*//; + $gccver .= "0" while length $gccver < 3; + $gccver = 0+$gccver; + $ccflags .= ' -Werror=declaration-after-statement' if $gccver > 400; + $ccflags .= ' -Wpointer-sign' if !$Config{d_cplusplus} and $gccver > 400; $ccflags .= ' -fpermissive' if $Config{d_cplusplus}; } } @@ -49,6 +53,8 @@ WriteMakefile( NAME => "Encode", EXE_FILES => \@exe_files, VERSION_FROM => 'Encode.pm', + ABSTRACT_FROM=> 'Encode.pm', + AUTHOR => 'Dan Kogai <danko...@dan.co.jp>', OBJECT => '$(O_FILES)', 'dist' => { COMPRESS => 'gzip -9f', @@ -61,6 +67,7 @@ WriteMakefile( PREREQ_PM => { Exporter => '5.57', # use Exporter 'import'; parent => '0.221', # version bundled with 5.10.1 + Storable => '0', # bundled with Perl 5.7.3 }, TEST_REQUIRES => { 'Test::More' => '0.81_01', @@ -71,6 +78,91 @@ WriteMakefile( resources => { repository => 'https://github.com/dankogai/p5-encode', }, + x_contributors => [ + 'Alex Davies <alex.dav...@talktalk.net>', + 'Alex Kapranoff <a...@kapranoff.ru>', + 'Alex Vandiver <a...@chmrr.net>', + 'Andreas J. Koenig <andreas.koe...@anima.de>', + 'Andrew Pennebaker <andrew.penneba...@networkedinsights.com>', + 'Andy Grundman <an...@activestate.com>', + 'Anton Tagunov <tagu...@motor.ru>', + 'Autrijus Tang <autri...@autrijus.org>', + 'Benjamin Goldberg <gold...@earthlink.net>', + 'Bjoern Hoehrmann <derhoe...@gmx.net>', + 'Bjoern Jacke <debianb...@j3e.de>', + 'bulk88 <bul...@hotmail.com>', + 'Craig A. Berry <craigbe...@mac.com>', + 'Curtis Jewell <csjew...@cpan.org>', + 'Dan Kogai <danko...@dan.co.jp>', + 'Dave Evans <d...@rudolf.org.uk>', + 'David Golden <dagol...@cpan.org>', + 'David Steinbrunner <dsteinbrun...@pobox.com>', + 'Deng Liu <deng...@ntu.edu.tw>', + 'Dominic Dunlop <d...@computer.org>', + 'drry', + 'Elizabeth Mattijsen <l...@dijkmat.nl>', + 'Flavio Poletti <fla...@polettix.it>', + 'Gerrit P. Haase <g...@familiehaase.de>', + 'Gisle Aas <gi...@activestate.com>', + 'Graham Barr <gb...@pobox.com>', + 'Graham Knop <ha...@haarg.org>', + 'Graham Ollis <p...@wdlabs.com>', + 'Gurusamy Sarathy <g...@activestate.com>', + 'H.Merijn Brand <h.m.br...@xs4all.nl>', + 'Hugo van der Sanden <h...@crypt.org>', + 'chansen <chan...@cpan.org>', + 'Chris Nandor <pu...@pobox.com>', + 'Inaba Hiroto <in...@st.rim.or.jp>', + 'Jarkko Hietaniemi <j...@iki.fi>', + 'Jesse Vincent <je...@fsck.com>', + 'Jungshik Shin <js...@mailaps.org>', + 'Karen Etheridge <et...@cpan.org>', + 'Karl Williamson <k...@cpan.org>', + 'Kenichi Ishigaki <ishig...@cpan.org>', + 'KONNO Hiroharu <hiroharu.ko...@bowneglobal.co.jp>', + 'Laszlo Molnar <ml1...@freemail.hu>', + 'Makamaka <makam...@donzoko.net>', + 'Mark-Jason Dominus <m...@plover.com>', + 'Masahiro Iuchi <masahiro.iu...@gmail.com>', + 'MATSUNO Tokuhiro <tokuhirom+c...@gmail.com>', + 'Mattia Barbon <mbar...@dsi.unive.it>', + 'Michael G Schwern <schw...@pobox.com>', + 'Michael LaGrasta <mich...@lagrasta.com>', + 'Miron Cuperman <mi...@hyper.to>', + 'Moritz Lenz <mor...@faui2k3.org>', + 'MORIYAMA Masayuki <m...@mtg.biglobe.ne.jp>', + 'Nick Ing-Simmons <n...@ing-simmons.net>', + 'Nicholas Clark <n...@ccl4.org>', + 'Olivier Mengué <dol...@cpan.org>', + 'otsune', + 'Pali <p...@cpan.org>', + 'Paul Marquess <paul_marqu...@yahoo.co.uk>', + 'Peter Prymmer <p...@best.com>', + 'Peter Rabbitson <ribasu...@cpan.org>', + 'Philip Newton <p...@cpan.org>', + 'Piotr Fusik <pfu...@op.pl>', + 'Rafael Garcia-Suarez <rgarciasua...@mandriva.com>', + 'Randy Stauner <ra...@magnificent-tears.com>', + 'Reini Urban <rur...@cpan.org>', + 'Robin Barker <r...@cise.npl.co.uk>', + 'SADAHIRO Tomoyuki <sadah...@cpan.org>', + 'Simon Cozens <si...@netthink.co.uk>', + 'Slaven Rezic <sre...@cpan.org>', + 'Spider Boardman <spi...@web.zk3.dec.com>', + 'Steve Hay <steve.m....@googlemail.com>', + 'Steve Peters <st...@fisharerojo.org>', + 'SUGAWARA Hajime <sugaw...@hdt.co.jp>', + 'SUZUKI Norio <zap00...@nifty.com>', + 'szr8 <blz.mar...@gmail.com>', + 'Tatsuhiko Miyagawa <miyag...@bulknews.net>', + 'Tels <perl_du...@bloodgate.com>', + 'Tony Cook <t...@develop-help.com>', + 'Vadim Konovalov <vkonova...@peterstar.ru>', + 'Victor <vic...@vsespb.ru>', + 'Ville Skyttä <ville.sky...@iki.fi>', + 'Vincent van Dam <vvan...@sandvine.com>', + 'Yitzchak Scott-Thoennes <sthoe...@efn.org>', + ], }, ); diff --git a/cpan/Encode/Unicode/Unicode.pm b/cpan/Encode/Unicode/Unicode.pm index fc1d3d1382..c56745d7b1 100644 --- a/cpan/Encode/Unicode/Unicode.pm +++ b/cpan/Encode/Unicode/Unicode.pm @@ -2,9 +2,8 @@ package Encode::Unicode; use strict; use warnings; -no warnings 'redefine'; -our $VERSION = do { my @r = ( q$Revision: 2.15_01 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.16 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use XSLoader; XSLoader::load( __PACKAGE__, $VERSION ); @@ -13,7 +12,7 @@ XSLoader::load( __PACKAGE__, $VERSION ); # Object Generator 8 transcoders all at once! # -require Encode; +use Encode (); our %BOM_Unknown = map { $_ => 1 } qw(UTF-16 UTF-32); @@ -34,12 +33,13 @@ for my $name ( $endian = ( $3 eq 'BE' ) ? 'n' : ( $3 eq 'LE' ) ? 'v' : ''; $size == 4 and $endian = uc($endian); - $Encode::Encoding{$name} = bless { + my $obj = bless { Name => $name, size => $size, endian => $endian, ucs2 => $ucs2, } => __PACKAGE__; + Encode::define_encoding($obj, $name); } use parent qw(Encode::Encoding); @@ -52,12 +52,6 @@ sub renew { return $clone; } -# There used to be a perl implementation of (en|de)code but with -# XS version is ripe, perl version is zapped for optimal speed - -*decode = \&decode_xs; -*encode = \&encode_xs; - 1; __END__ diff --git a/cpan/Encode/Unicode/Unicode.xs b/cpan/Encode/Unicode/Unicode.xs index 117e14d83f..b3b1d2fea8 100644 --- a/cpan/Encode/Unicode/Unicode.xs +++ b/cpan/Encode/Unicode/Unicode.xs @@ -1,5 +1,5 @@ /* - $Id: Unicode.xs,v 2.15 2016/11/29 23:29:23 dankogai Exp dankogai $ + $Id: Unicode.xs,v 2.16 2017/06/10 17:23:50 dankogai Exp $ */ #define PERL_NO_GET_CONTEXT @@ -127,7 +127,7 @@ PROTOTYPES: DISABLE *hv_fetch((HV *)SvRV(obj),k,l,0) : &PL_sv_undef) void -decode_xs(obj, str, check = 0) +decode(obj, str, check = 0) SV * obj SV * str IV check @@ -345,7 +345,7 @@ CODE: } void -encode_xs(obj, utf8, check = 0) +encode(obj, utf8, check = 0) SV * obj SV * utf8 IV check diff --git a/cpan/Encode/bin/enc2xs b/cpan/Encode/bin/enc2xs index bd39639ae8..619b64b757 100644 --- a/cpan/Encode/bin/enc2xs +++ b/cpan/Encode/bin/enc2xs @@ -11,7 +11,7 @@ use warnings; use Getopt::Std; use Config; my @orig_ARGV = @ARGV; -our $VERSION = do { my @r = (q$Revision: 2.20 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 2.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; # These may get re-ordered. # RAW is a do_now as inserted by &enter @@ -1038,8 +1038,7 @@ sub find_e2x{ sub make_makefile_pl { - eval { require Encode; }; - $@ and die "You need to install Encode to use enc2xs -M\nerror: $@\n"; + eval { require Encode } or die "You need to install Encode to use enc2xs -M\nerror: $@\n"; # our used for variable expansion $_Enc2xs = $0; $_Version = $VERSION; @@ -1063,8 +1062,7 @@ use vars qw( ); sub make_configlocal_pm { - eval { require Encode; }; - $@ and die "Unable to require Encode: $@\n"; + eval { require Encode } or die "Unable to require Encode: $@\n"; eval { require File::Spec; }; # our used for variable expantion @@ -1084,8 +1082,7 @@ sub make_configlocal_pm { $mod =~ s/.*\bEncode\b/Encode/o; $mod =~ s/\.pm\z//o; $mod =~ s,/,::,og; - eval qq{ require $mod; }; - return if $@; + eval qq{ require $mod; } or return; warn qq{ require $mod;\n}; for my $enc ( Encode->encodings() ) { no warnings; @@ -1119,8 +1116,7 @@ sub _mkversion{ } sub _print_expand{ - eval { require File::Basename; }; - $@ and die "File::Basename needed. Are you on miniperl?;\nerror: $@\n"; + eval { require File::Basename } or die "File::Basename needed. Are you on miniperl?;\nerror: $@\n"; File::Basename->import(); my ($src, $dst, $clobber) = @_; if (!$clobber and -e $dst){ diff --git a/cpan/Encode/bin/ucmlint b/cpan/Encode/bin/ucmlint index a240f2c75e..a31a7a28f6 100644 --- a/cpan/Encode/bin/ucmlint +++ b/cpan/Encode/bin/ucmlint @@ -1,19 +1,18 @@ #!/usr/local/bin/perl # -# $Id: ucmlint,v 2.3 2016/08/04 03:15:58 dankogai Exp $ +# $Id: ucmlint,v 2.4 2017/06/10 17:23:50 dankogai Exp $ # BEGIN { pop @INC if $INC[-1] eq '.' } use strict; -our $VERSION = do { my @r = (q$Revision: 2.3 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 2.4 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use Getopt::Std; our %Opt; getopts("Dehfv", \%Opt); if ($Opt{e}){ - eval{ require Encode; }; - $@ and die "can't load Encode : $@"; + eval { require Encode } or die "can't load Encode : $@"; } $Opt{h} and help(); diff --git a/cpan/Encode/encoding.pm b/cpan/Encode/encoding.pm index dc342683ee..7cd9eb2949 100644 --- a/cpan/Encode/encoding.pm +++ b/cpan/Encode/encoding.pm @@ -1,15 +1,16 @@ -# $Id: encoding.pm,v 2.19 2016/11/01 13:30:38 dankogai Exp $ +# $Id: encoding.pm,v 2.20 2017/06/10 17:23:50 dankogai Exp $ package encoding; -our $VERSION = sprintf "%d.%02d", q$Revision: 2.19 $ =~ /(\d+)/g; +our $VERSION = sprintf "%d.%02d", q$Revision: 2.20 $ =~ /(\d+)/g; use Encode; use strict; use warnings; +use Config; use constant { DEBUG => !!$ENV{PERL_ENCODE_DEBUG}, HAS_PERLIO => eval { require PerlIO::encoding; PerlIO::encoding->VERSION(0.02) }, - PERL_5_21_7 => $^V && $^V ge v5.21.7, + PERL_5_21_7 => $^V && $^V ge v5.21.7, # lexically scoped }; sub _exception { @@ -115,7 +116,8 @@ sub import { } my $deprecate = - $] >= 5.017 ? "Use of the encoding pragma is deprecated" : 0; + ($] >= 5.017 and !$Config{usecperl}) + ? "Use of the encoding pragma is deprecated" : 0; my $class = shift; my $name = shift; @@ -132,6 +134,7 @@ sub import { return; } $name = _get_locale_encoding() if $name eq ':locale'; + BEGIN { strict->unimport('hashpairs') if $] >= 5.027 and $^V =~ /c$/; } my %arg = @_; $name = $ENV{PERL_ENCODING} unless defined $name; my $enc = find_encoding($name); @@ -141,9 +144,9 @@ sub import { } $name = $enc->name; # canonize unless ( $arg{Filter} ) { - if ($] >= 5.025003) { + if ($] >= 5.025003 and !$Config{usecperl}) { require Carp; - Carp::croak("The encoding pragma is no longer supported"); + Carp::croak("The encoding pragma is no longer supported. Check cperl"); } warnings::warnif("deprecated",$deprecate) if $deprecate; @@ -186,8 +189,8 @@ sub import { $status; } ); - }; - $@ eq '' and DEBUG and warn "Filter installed"; + 1; + } and DEBUG and warn "Filter installed"; } defined ${^UNICODE} and ${^UNICODE} != 0 and return 1; for my $h (qw(STDIN STDOUT)) { @@ -368,7 +371,7 @@ Note that C<STDERR> WILL NOT be changed, regardless. Also note that non-STD file handles remain unaffected. Use C<use open> or C<binmode> to change the layers of those. -=item C<use encoding I<ENCNAME> Filter=E<gt>1;> +=item C<use encoding I<ENCNAME>, Filter=E<gt>1;> This operates as above, but the C<Filter> argument with a non-zero value causes the entire script, and not just literals, to be translated from diff --git a/cpan/Encode/lib/Encode/Alias.pm b/cpan/Encode/lib/Encode/Alias.pm index 0a252560f5..6dcd112a40 100644 --- a/cpan/Encode/lib/Encode/Alias.pm +++ b/cpan/Encode/lib/Encode/Alias.pm @@ -1,8 +1,7 @@ package Encode::Alias; use strict; use warnings; -no warnings 'redefine'; -our $VERSION = do { my @r = ( q$Revision: 2.21 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.23 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; use Exporter 'import'; @@ -19,7 +18,6 @@ our @Alias; # ordered matching list our %Alias; # cached known aliases sub find_alias { - require Encode; my $class = shift; my $find = shift; unless ( exists $Alias{$find} ) { @@ -109,6 +107,9 @@ sub define_alias { } } +# HACK: Encode must be used after define_alias is declarated as Encode calls define_alias +use Encode (); + # Allow latin-1 style names as well # 0 1 2 3 4 5 6 7 8 9 10 our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 ); @@ -134,7 +135,6 @@ sub undef_aliases { } sub init_aliases { - require Encode; undef_aliases(); # Try all-lower-case version should all else fails diff --git a/cpan/Encode/lib/Encode/CN/HZ.pm b/cpan/Encode/lib/Encode/CN/HZ.pm index 4510b0b400..a0dc59d153 100644 --- a/cpan/Encode/lib/Encode/CN/HZ.pm +++ b/cpan/Encode/lib/Encode/CN/HZ.pm @@ -5,7 +5,7 @@ use warnings; use utf8 (); use vars qw($VERSION); -$VERSION = do { my @r = ( q$Revision: 2.8 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +$VERSION = do { my @r = ( q$Revision: 2.9 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use Encode qw(:fallbacks); @@ -21,6 +21,7 @@ sub needs_lines { 1 } sub decode ($$;$) { my ( $obj, $str, $chk ) = @_; + return undef unless defined $str; my $GB = Encode::find_encoding('gb2312-raw'); my $ret = substr($str, 0, 0); # to propagate taintedness @@ -135,6 +136,7 @@ sub cat_decode { sub encode($$;$) { my ( $obj, $str, $chk ) = @_; + return undef unless defined $str; my $GB = Encode::find_encoding('gb2312-raw'); my $ret = substr($str, 0, 0); # to propagate taintedness; diff --git a/cpan/Encode/lib/Encode/Encoding.pm b/cpan/Encode/lib/Encode/Encoding.pm index 39d2e0ab64..815937f455 100644 --- a/cpan/Encode/lib/Encode/Encoding.pm +++ b/cpan/Encode/lib/Encode/Encoding.pm @@ -3,11 +3,15 @@ package Encode::Encoding; # Base class for classes which implement encodings use strict; use warnings; -our $VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.8 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; -require Encode; +our @CARP_NOT = qw(Encode Encode::Encoder); -sub DEBUG { 0 } +use Carp (); +use Encode (); +use Encode::MIME::Name; + +use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; sub Define { my $obj = shift; @@ -20,13 +24,10 @@ sub Define { sub name { return shift->{'Name'} } -sub mime_name{ - require Encode::MIME::Name; +sub mime_name { return Encode::MIME::Name::get_mime_name(shift->name); } -# sub renew { return $_[0] } - sub renew { my $self = shift; my $clone = bless {%$self} => ref($self); @@ -42,8 +43,7 @@ sub renewed { return $_[0]->{renewed} || 0 } sub needs_lines { 0 } sub perlio_ok { - eval { require PerlIO::encoding }; - return $@ ? 0 : 1; + return eval { require PerlIO::encoding } ? 1 : 0; } # (Temporary|legacy) methods @@ -56,14 +56,12 @@ sub fromUnicode { shift->encode(@_) } # sub encode { - require Carp; my $obj = shift; my $class = ref($obj) ? ref($obj) : $obj; Carp::croak( $class . "->encode() not defined!" ); } sub decode { - require Carp; my $obj = shift; my $class = ref($obj) ? ref($obj) : $obj; Carp::croak( $class . "->encode() not defined!" ); @@ -188,7 +186,6 @@ MUST return the string representing the canonical name of the encoding. Predefined As: sub mime_name{ - require Encode::MIME::Name; return Encode::MIME::Name::get_mime_name(shift->name); } @@ -226,8 +223,7 @@ unless the value is numeric so return 0 for false. Predefined As: sub perlio_ok { - eval{ require PerlIO::encoding }; - return $@ ? 0 : 1; + return eval { require PerlIO::encoding } ? 1 : 0; } If your encoding does not support PerlIO for some reasons, just; diff --git a/cpan/Encode/lib/Encode/GSM0338.pm b/cpan/Encode/lib/Encode/GSM0338.pm index 20257a1cbd..e87141ebc4 100644 --- a/cpan/Encode/lib/Encode/GSM0338.pm +++ b/cpan/Encode/lib/Encode/GSM0338.pm @@ -1,5 +1,5 @@ # -# $Id: GSM0338.pm,v 2.5 2013/09/14 07:51:59 dankogai Exp $ +# $Id: GSM0338.pm,v 2.7 2017/06/10 17:23:50 dankogai Exp $ # package Encode::GSM0338; @@ -8,7 +8,7 @@ use warnings; use Carp; use vars qw($VERSION); -$VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +$VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use Encode qw(:fallbacks); @@ -171,6 +171,7 @@ our $NBSP = "\x{00A0}"; sub decode ($$;$) { my ( $obj, $bytes, $chk ) = @_; + return undef unless defined $bytes; my $str = substr($bytes, 0, 0); # to propagate taintedness; while ( length $bytes ) { my $c = substr( $bytes, 0, 1, '' ); @@ -216,6 +217,7 @@ sub decode ($$;$) { sub encode($$;$) { my ( $obj, $str, $chk ) = @_; + return undef unless defined $str; my $bytes = substr($str, 0, 0); # to propagate taintedness while ( length $str ) { my $u = substr( $str, 0, 1, '' ); @@ -270,10 +272,9 @@ expression with C<eval {}> block as follows; eval { $utf8 = decode("gsm0338", $gsm0338, $chk); - }; - if ($@){ + } or do { # handle exception here - } + }; =head1 BUGS diff --git a/cpan/Encode/lib/Encode/Guess.pm b/cpan/Encode/lib/Encode/Guess.pm index b44daf59eb..41fc19b799 100644 --- a/cpan/Encode/lib/Encode/Guess.pm +++ b/cpan/Encode/lib/Encode/Guess.pm @@ -2,15 +2,16 @@ package Encode::Guess; use strict; use warnings; use Encode qw(:fallbacks find_encoding); -our $VERSION = do { my @r = ( q$Revision: 2.6 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; my $Canon = 'Guess'; use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; our %DEF_SUSPECTS = map { $_ => find_encoding($_) } qw(ascii utf8); -$Encode::Encoding{$Canon} = bless { +my $obj = bless { Name => $Canon, Suspects => {%DEF_SUSPECTS}, } => __PACKAGE__; +Encode::define_encoding($obj, $Canon); use parent qw(Encode::Encoding); sub needs_lines { 1 } diff --git a/cpan/Encode/lib/Encode/JP/JIS7.pm b/cpan/Encode/lib/Encode/JP/JIS7.pm index 588389a034..a0629a3690 100644 --- a/cpan/Encode/lib/Encode/JP/JIS7.pm +++ b/cpan/Encode/lib/Encode/JP/JIS7.pm @@ -1,7 +1,7 @@ package Encode::JP::JIS7; use strict; use warnings; -our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use Encode qw(:fallbacks); @@ -9,11 +9,12 @@ for my $name ( '7bit-jis', 'iso-2022-jp', 'iso-2022-jp-1' ) { my $h2z = ( $name eq '7bit-jis' ) ? 0 : 1; my $jis0212 = ( $name eq 'iso-2022-jp' ) ? 0 : 1; - $Encode::Encoding{$name} = bless { + my $obj = bless { Name => $name, h2z => $h2z, jis0212 => $jis0212, } => __PACKAGE__; + Encode::define_encoding($obj, $name); } use parent qw(Encode::Encoding); @@ -29,6 +30,7 @@ use Encode::CJKConstants qw(:all); sub decode($$;$) { my ( $obj, $str, $chk ) = @_; + return undef unless defined $str; my $residue = ''; if ($chk) { $str =~ s/([^\x00-\x7f].*)$//so and $residue = $1; @@ -45,6 +47,7 @@ sub decode($$;$) { sub encode($$;$) { require Encode::JP::H2Z; my ( $obj, $utf8, $chk ) = @_; + return undef unless defined $utf8; # empty the input string in the stack so perlio is ok $_[1] = '' if $chk; diff --git a/cpan/Encode/lib/Encode/KR/2022_KR.pm b/cpan/Encode/lib/Encode/KR/2022_KR.pm index 44373e5d58..122326403b 100644 --- a/cpan/Encode/lib/Encode/KR/2022_KR.pm +++ b/cpan/Encode/lib/Encode/KR/2022_KR.pm @@ -1,7 +1,7 @@ package Encode::KR::2022_KR; use strict; use warnings; -our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use Encode qw(:fallbacks); @@ -16,6 +16,7 @@ sub perlio_ok { sub decode { my ( $obj, $str, $chk ) = @_; + return undef unless defined $str; my $res = $str; my $residue = iso_euc( \$res ); @@ -26,6 +27,7 @@ sub decode { sub encode { my ( $obj, $utf8, $chk ) = @_; + return undef unless defined $utf8; # empty the input string in the stack so perlio is ok $_[1] = '' if $chk; diff --git a/cpan/Encode/lib/Encode/MIME/Header.pm b/cpan/Encode/lib/Encode/MIME/Header.pm index ad14dba374..e23abffe37 100644 --- a/cpan/Encode/lib/Encode/MIME/Header.pm +++ b/cpan/Encode/lib/Encode/MIME/Header.pm @@ -2,7 +2,7 @@ package Encode::MIME::Header; use strict; use warnings; -our $VERSION = do { my @r = ( q$Revision: 2.24 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.27 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use Carp (); use Encode (); @@ -16,24 +16,28 @@ my %seed = ( bpl => 75, # bytes per line ); -$Encode::Encoding{'MIME-Header'} = bless { +my @objs; + +push @objs, bless { %seed, Name => 'MIME-Header', } => __PACKAGE__; -$Encode::Encoding{'MIME-B'} = bless { +push @objs, bless { %seed, decode_q => 0, Name => 'MIME-B', } => __PACKAGE__; -$Encode::Encoding{'MIME-Q'} = bless { +push @objs, bless { %seed, decode_b => 0, encode => 'Q', Name => 'MIME-Q', } => __PACKAGE__; +Encode::define_encoding($_, $_->{Name}) foreach @objs; + use parent qw(Encode::Encoding); sub needs_lines { 1 } @@ -52,7 +56,7 @@ my $re_capture_encoded_word_split = qr/=\?($re_charset)((?:\*$re_language)?)\?($ my $re_encoding_strict_b = qr/[Bb]/; my $re_encoding_strict_q = qr/[Qq]/; my $re_encoded_text_strict_b = qr/[0-9A-Za-z\+\/]*={0,2}/; -my $re_encoded_text_strict_q = qr/(?:[^\?\s=]|=[0-9A-Fa-f]{2})*/; +my $re_encoded_text_strict_q = qr/(?:[\x21-\x3C\x3E\x40-\x7E]|=[0-9A-Fa-f]{2})*/; # NOTE: first part are printable US-ASCII except ?, =, SPACE and TAB my $re_encoded_word_strict = qr/=\?$re_charset(?:\*$re_language)?\?(?:$re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/; my $re_capture_encoded_word_strict = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/; @@ -74,6 +78,7 @@ our $STRICT_DECODE = 0; sub decode($$;$) { my ($obj, $str, $chk) = @_; + return undef unless defined $str; my $re_match_decode = $STRICT_DECODE ? $re_match_strict : $re_match; my $re_capture_decode = $STRICT_DECODE ? $re_capture_strict : $re_capture; @@ -194,7 +199,6 @@ sub _decode_q { sub _decode_octets { my ($enc, $octets, $chk) = @_; $chk &= ~Encode::LEAVE_SRC if not ref $chk and $chk; - local $Carp::CarpLevel = $Carp::CarpLevel + 1; # propagate Carp messages back to caller my $output = $enc->decode($octets, $chk); return undef if not ref $chk and $chk and $octets ne ''; return $output; @@ -202,6 +206,7 @@ sub _decode_octets { sub encode($$;$) { my ($obj, $str, $chk) = @_; + return undef unless defined $str; my $output = $obj->_fold_line($obj->_encode_string($str, $chk)); $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC); return $output . substr($str, 0, 0); # to propagate taintedness @@ -237,11 +242,7 @@ sub _encode_string { my @result = (); my $octets = ''; while ( length( my $chr = substr($str, 0, 1, '') ) ) { - my $seq; - { - local $Carp::CarpLevel = $Carp::CarpLevel + 1; # propagate Carp messages back to caller - $seq = $enc->encode($chr, $enc_chk); - } + my $seq = $enc->encode($chr, $enc_chk); if ( not length($seq) ) { substr($str, 0, 0, $chr); last; diff --git a/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm b/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm index 86e66c371c..dc1e4275f0 100644 --- a/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm +++ b/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm @@ -5,16 +5,17 @@ use warnings; use parent qw(Encode::MIME::Header); -$Encode::Encoding{'MIME-Header-ISO_2022_JP'} = +my $obj = bless { decode_b => '1', decode_q => '1', encode => 'B', bpl => 76, Name => 'MIME-Header-ISO_2022_JP' } => __PACKAGE__; +Encode::define_encoding($obj, 'MIME-Header-ISO_2022_JP'); use constant HEAD => '=?ISO-2022-JP?B?'; use constant TAIL => '?='; use Encode::CJKConstants qw(%RE); -our $VERSION = do { my @r = ( q$Revision: 1.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 1.9 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; # I owe the below codes totally to **** PATCH TRUNCATED AT 2000 LINES -- 1047 NOT SHOWN **** -- Perl5 Master Repository