In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/d6c970c7e66d6f55dba7f13549143a2f4ba641c7?hp=fb387a96f609c00a517aa640d941f26f1e52f15a>

- Log -----------------------------------------------------------------
commit d6c970c7e66d6f55dba7f13549143a2f4ba641c7
Author: Aaron Crane <a...@cpan.org>
Date:   Sun Nov 20 14:18:22 2016 +0000

    Fix the Unicode Bug in the range operator
-----------------------------------------------------------------------

Summary of changes:
 lib/feature.pm       |  6 ++++--
 pod/perldelta.pod    |  7 +++++++
 pod/perlop.pod       |  6 ++++++
 pod/perlunicode.pod  | 10 ++++++++++
 pod/perluniintro.pod | 10 +++++++---
 pp_ctl.c             |  2 ++
 pp_hot.c             |  2 ++
 regen/feature.pl     |  6 ++++--
 t/op/range.t         | 16 +++++++++++++++-
 9 files changed, 57 insertions(+), 8 deletions(-)

diff --git a/lib/feature.pm b/lib/feature.pm
index e97ffff152..fe5c513e59 100644
--- a/lib/feature.pm
+++ b/lib/feature.pm
@@ -5,7 +5,7 @@
 
 package feature;
 
-our $VERSION = '1.45';
+our $VERSION = '1.46';
 
 our %feature = (
     fc              => 'feature_fc',
@@ -174,7 +174,9 @@ potentially using Unicode in your program, the
 C<use feature 'unicode_strings'> subpragma is B<strongly> recommended.
 
 This feature is available starting with Perl 5.12; was almost fully
-implemented in Perl 5.14; and extended in Perl 5.16 to cover C<quotemeta>.
+implemented in Perl 5.14; and extended in Perl 5.16 to cover C<quotemeta>;
+and extended further in Perl 5.26 to cover L<the range
+operator|perlop/Range Operators>.
 
 =head2 The 'unicode_eval' and 'evalbytes' features
 
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 6a312bebf9..168e961ece 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -507,6 +507,13 @@ files in F<ext/> and F<lib/> are best summarized in 
L</Modules and Pragmata>.
 Under C<use utf8>, the entire Perl program is now checked that the UTF-8
 is wellformed.  This resolves [perl #126310].
 
+=item *
+
+The range operator C<..> on strings now handles its arguments correctly when in
+the scope of the L<< C<unicode_strings>|feature/"The 'unicode_strings' 
feature" >>
+feature. The previous behaviour was sufficiently unexpected that we believe no
+correct program could have made use of it.
+
 =back
 
 =head1 Known Problems
diff --git a/pod/perlop.pod b/pod/perlop.pod
index 2640b5173d..82dca55d52 100644
--- a/pod/perlop.pod
+++ b/pod/perlop.pod
@@ -1074,6 +1074,12 @@ If the final value specified is not in the sequence that 
the magical
 increment would produce, the sequence goes until the next value would
 be longer than the final value specified.
 
+As of Perl 5.26, the list-context range operator on strings works as expected
+in the scope of L<< S<C<"use feature 'unicode_strings">>|feature/The
+'unicode_strings' feature >>. In previous versions, and outside the scope of
+that feature, it exhibits L<perlunicode/The "Unicode Bug">: its behavior
+depends on the internal encoding of the range endpoint.
+
 If the initial value specified isn't part of a magical increment
 sequence (that is, a non-empty string matching C</^[a-zA-Z]*[0-9]*\z/>),
 only the initial value will be returned.  So the following will only
diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod
index 152c34bbe2..33e52b31b3 100644
--- a/pod/perlunicode.pod
+++ b/pod/perlunicode.pod
@@ -1814,6 +1814,16 @@ Prior to that, or outside its scope, no code points 
above 127 are quoted
 in UTF-8 encoded strings, but in byte encoded strings, code points
 between 128-255 are always quoted.
 
+=item *
+
+In the C<..> or L<range|perlop/Range Operators> operator.
+
+Starting in Perl 5.26.0, the range operator on strings treats their lengths
+consistently within the scope of C<unicode_strings>. Prior to that, or
+outside its scope, it could produce strings whose length in characters
+exceeded that of the right-hand side, where the right-hand side took up more
+bytes than the correct range endpoint.
+
 =back
 
 You can see from the above that the effect of C<unicode_strings>
diff --git a/pod/perluniintro.pod b/pod/perluniintro.pod
index beccd3c6a4..5b571fbbc1 100644
--- a/pod/perluniintro.pod
+++ b/pod/perluniintro.pod
@@ -151,9 +151,13 @@ serious Unicode work.  The maintenance release 5.6.1 fixed 
many of the
 problems of the initial Unicode implementation, but for example
 regular expressions still do not work with Unicode in 5.6.1.
 Perl v5.14.0 is the first release where Unicode support is
-(almost) seamlessly integrable without some gotchas (the exception being
-some differences in L<quotemeta|perlfunc/quotemeta>, and that is fixed
-starting in Perl 5.16.0).   To enable this
+(almost) seamlessly integrable without some gotchas. (There are two
+exceptions. Firstly, some differences in L<quotemeta|perlfunc/quotemeta>
+were fixed starting in Perl 5.16.0. Secondly, some differences in
+L<the range operator|perlop/Range Operators> were fixed starting in
+Perl 5.26.0.)
+
+To enable this
 seamless support, you should C<use feature 'unicode_strings'> (which is
 automatically selected if you C<use 5.012> or higher).  See L<feature>.
 (5.14 also fixes a number of bugs and departures from the Unicode
diff --git a/pp_ctl.c b/pp_ctl.c
index ec0ad7d317..c65733675d 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -1222,6 +1222,8 @@ PP(pp_flop)
            const char * const tmps = SvPV_nomg_const(right, len);
 
            SV *sv = newSVpvn_flags(lpv, llen, SvUTF8(left)|SVs_TEMP);
+            if (DO_UTF8(right) && IN_UNI_8_BIT)
+                len = sv_len_utf8_nomg(right);
            while (!SvNIOKp(sv) && SvCUR(sv) <= len) {
                XPUSHs(sv);
                if (strEQ(SvPVX_const(sv),tmps))
diff --git a/pp_hot.c b/pp_hot.c
index 97d46f6511..ec3afe4dd9 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -2883,6 +2883,8 @@ PP(pp_iter)
            It has SvPVX of "" and SvCUR of 0, which is what we want.  */
         STRLEN maxlen = 0;
         const char *max = SvPV_const(end, maxlen);
+        if (DO_UTF8(end) && IN_UNI_8_BIT)
+            maxlen = sv_len_utf8_nomg(end);
         if (UNLIKELY(SvNIOK(cur) || SvCUR(cur) > maxlen))
             goto retno;
 
diff --git a/regen/feature.pl b/regen/feature.pl
index 7a5671276e..66fc017da6 100755
--- a/regen/feature.pl
+++ b/regen/feature.pl
@@ -367,7 +367,7 @@ read_only_bottom_close_and_rename($h);
 __END__
 package feature;
 
-our $VERSION = '1.45';
+our $VERSION = '1.46';
 
 FEATURES
 
@@ -484,7 +484,9 @@ potentially using Unicode in your program, the
 C<use feature 'unicode_strings'> subpragma is B<strongly> recommended.
 
 This feature is available starting with Perl 5.12; was almost fully
-implemented in Perl 5.14; and extended in Perl 5.16 to cover C<quotemeta>.
+implemented in Perl 5.14; and extended in Perl 5.16 to cover C<quotemeta>;
+and extended further in Perl 5.26 to cover L<the range
+operator|perlop/Range Operators>.
 
 =head2 The 'unicode_eval' and 'evalbytes' features
 
diff --git a/t/op/range.t b/t/op/range.t
index f30fa8d7b7..e58a39c8e8 100644
--- a/t/op/range.t
+++ b/t/op/range.t
@@ -9,7 +9,7 @@ BEGIN {
 
 use Config;
 
-plan (141);
+plan (145);
 
 is(join(':',1..5), '1:2:3:4:5');
 
@@ -42,6 +42,20 @@ is($x, 'abcdefghijklmnopqrstuvwxyz');
 @x = 'A'..'ZZ';
 is (scalar @x, 27 * 26);
 
+foreach (0, 1) {
+    use feature 'unicode_strings';
+    $s = "a";
+    $e = "\xFF";
+    utf8::upgrade($e) if $_;
+    @x = $s .. $e;
+    is (scalar @x, 26, "list-context range with rhs 0xFF, utf8=$_");
+    @y = ();
+    foreach ($s .. $e) {
+        push @y, $_;
+    }
+    is(join(",", @y), join(",", @x), "foreach range with rhs 0xFF, utf8=$_");
+}
+
 @x = '09' .. '08';  # should produce '09', '10',... '99' (strange but true)
 is(join(",", @x), join(",", map {sprintf "%02d",$_} 9..99));
 

--
Perl5 Master Repository

Reply via email to