In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/bfdf22ecfe7cf92546a7f989d23ce76679360abd?hp=ba207afd4f7e36d6017bca62c51c750ddc6beb7a>

- Log -----------------------------------------------------------------
commit bfdf22ecfe7cf92546a7f989d23ce76679360abd
Author: Karl Williamson <[email protected]>
Date:   Wed Jun 6 15:56:09 2012 -0600

    utf8.c: White-space only

M       utf8.c

commit cbe07460d443564aea40e4397ab55080aab2d0b9
Author: Karl Williamson <[email protected]>
Date:   Wed Jun 6 15:50:53 2012 -0600

    utf8.c: Refactor a portion of to_utf8_case()
    
    This routine can never return 0, as if there is no case mapping, the
    input is used instead.  The code point for that input has already been
    derived earlier in the function, so it doesn't have to be recalculated.
    And, rearrange the order of things slightly.

M       utf8.c

commit dd9267d7ed4287d573abd56d1aadf2152c628baf
Author: Karl Williamson <[email protected]>
Date:   Wed Jun 6 15:40:38 2012 -0600

    utf8.c: Avoid some extra work
    
    In the case changed, the output is the input, so can just Copy it
    instead of re-deriving it.

M       utf8.c

commit 2114036ce68685bad99e2d43873afa0ff512b0a6
Author: Karl Williamson <[email protected]>
Date:   Wed Jun 6 15:40:02 2012 -0600

    utf8.c: Add, revise comments

M       utf8.c

commit 2269d15c887e7326906ea6195d5970ac188c3411
Author: Karl Williamson <[email protected]>
Date:   Wed Jun 6 11:12:24 2012 -0600

    docs patch: 'unicode_strings' doesn't change utf8ness
    
    We had a field report of lack of clarity around this, so add some
    text.

M       lib/feature.pm
M       pod/perlunicode.pod
M       regen/feature.pl

commit 3c8317961e30b9ee408493f68b38d723f6748319
Author: Karl Williamson <[email protected]>
Date:   Wed Jun 6 11:11:34 2012 -0600

    perlfunc: Add comma

M       pod/perlfunc.pod
-----------------------------------------------------------------------

Summary of changes:
 lib/feature.pm      |    5 +++--
 pod/perlfunc.pod    |    2 +-
 pod/perlunicode.pod |    6 ++++--
 regen/feature.pl    |    5 +++--
 utf8.c              |   21 +++++++++++++++------
 5 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/lib/feature.pm b/lib/feature.pm
index 37e571f..cc89bde 100644
--- a/lib/feature.pm
+++ b/lib/feature.pm
@@ -5,7 +5,7 @@
 
 package feature;
 
-our $VERSION = '1.28';
+our $VERSION = '1.29';
 
 our %feature = (
     fc              => 'feature_fc',
@@ -139,7 +139,8 @@ C<use feature 'unicode_strings'> tells the compiler to use 
Unicode semantics
 in all string operations executed within its scope (unless they are also
 within the scope of either C<use locale> or C<use bytes>).  The same applies
 to all regular expressions compiled within the scope, even if executed outside
-it.
+it.  It does not change the internal representation of strings, but only how
+they are interpreted.
 
 C<no feature 'unicode_strings'> tells the compiler to use the traditional
 Perl semantics wherein the native character set semantics is used unless it is
diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod
index 3482f36..99cdeec 100644
--- a/pod/perlfunc.pod
+++ b/pod/perlfunc.pod
@@ -7252,7 +7252,7 @@ Perl versions, it should call C<srand>; otherwise most 
programs won't call
 C<srand()> at all.
 
 But there are a few situations in recent Perls where programs are likely to
-want to call C<srand>.  One is for generating predictable results generally for
+want to call C<srand>.  One is for generating predictable results, generally 
for
 testing or debugging.  There, you use C<srand($seed)>, with the same C<$seed>
 each time.  Another case is that you may want to call C<srand()>
 after a C<fork()> to avoid child processes sharing the same seed value as the
diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod
index 77daca3..e893571 100644
--- a/pod/perlunicode.pod
+++ b/pod/perlunicode.pod
@@ -28,8 +28,10 @@ C<use feature 'unicode_strings'> is specified.  (This is 
automatically
 selected if you use C<use 5.012> or higher.)  Failure to do this can
 trigger unexpected surprises.  See L</The "Unicode Bug"> below.
 
-This pragma doesn't affect I/O, and there are still several places
-where Unicode isn't fully supported, such as in filenames.
+This pragma doesn't affect I/O.  Nor does it change the internal
+representation of strings, only their interpretation.  There are still
+several places where Unicode isn't fully supported, such as in
+filenames.
 
 =item Input and Output Layers
 
diff --git a/regen/feature.pl b/regen/feature.pl
index 15315c7..1a85d0a 100755
--- a/regen/feature.pl
+++ b/regen/feature.pl
@@ -338,7 +338,7 @@ read_only_bottom_close_and_rename($h);
 __END__
 package feature;
 
-our $VERSION = '1.28';
+our $VERSION = '1.29';
 
 FEATURES
 
@@ -437,7 +437,8 @@ C<use feature 'unicode_strings'> tells the compiler to use 
Unicode semantics
 in all string operations executed within its scope (unless they are also
 within the scope of either C<use locale> or C<use bytes>).  The same applies
 to all regular expressions compiled within the scope, even if executed outside
-it.
+it.  It does not change the internal representation of strings, but only how
+they are interpreted.
 
 C<no feature 'unicode_strings'> tells the compiler to use the traditional
 Perl semantics wherein the native character set semantics is used unless it is
diff --git a/utf8.c b/utf8.c
index 018c85a..0a6f9ed 100644
--- a/utf8.c
+++ b/utf8.c
@@ -998,7 +998,7 @@ Perl_utf8_to_uvuni_buf(pTHX_ const U8 *s, const U8 *send, 
STRLEN *retlen)
 }
 
 /* Like L</utf8_to_uvuni_buf>(), but should only be called when it is known 
that
- * there are no malformations in the input UTF-8 string C<s>.  surrogates,
+ * there are no malformations in the input UTF-8 string C<s>.  Surrogates,
  * non-character code points, and non-Unicode code points are allowed */
 
 UV
@@ -2410,7 +2410,7 @@ Perl_to_utf8_case(pTHX_ const U8 *p, U8* ustrp, STRLEN 
*lenp,
     }
 
     if (!len && *swashp) {
-       const UV uv2 = swash_fetch(*swashp, tmpbuf, TRUE);
+       const UV uv2 = swash_fetch(*swashp, tmpbuf, TRUE /* => is utf8 */);
 
         if (uv2) {
              /* It was "normal" (a single character mapping). */
@@ -2419,14 +2419,23 @@ Perl_to_utf8_case(pTHX_ const U8 *p, U8* ustrp, STRLEN 
*lenp,
         }
     }
 
-    if (!len) /* Neither: just copy.  In other words, there was no mapping
-                defined, which means that the code point maps to itself */
-        len = uvchr_to_utf8(ustrp, uv0) - ustrp;
+    if (len) {
+        if (lenp) {
+            *lenp = len;
+        }
+        return valid_utf8_to_uvchr(ustrp, 0);
+    }
+
+    /* Here, there was no mapping defined, which means that the code point maps
+     * to itself.  Return the inputs */
+    len = UTF8SKIP(p);
+    Copy(p, ustrp, len, U8);
 
     if (lenp)
         *lenp = len;
 
-    return len ? valid_utf8_to_uvchr(ustrp, 0) : 0;
+    return uv0;
+
 }
 
 STATIC UV

--
Perl5 Master Repository

Reply via email to