In perl.git, the branch blead has been updated

<https://perl5.git.perl.org/perl.git/commitdiff/ae315a0a3c51e68887704d4907bb6a502a6d4e3f?hp=0b13e5291ebd9c786dea21905e17886c5a310454>

- Log -----------------------------------------------------------------
commit ae315a0a3c51e68887704d4907bb6a502a6d4e3f
Author: Karl Williamson <k...@cpan.org>
Date:   Sun Feb 4 21:47:09 2018 -0700

    APItest: Add tests for utf8_to_bytes()

commit 8132136a878b27b9619d552278dd329a2f289bd4
Author: Karl Williamson <k...@cpan.org>
Date:   Sun Feb 4 21:44:17 2018 -0700

    APItest:t/utf8_setup.pl: Display printables as themselves
    
    Instead of the harder to read \xXX

-----------------------------------------------------------------------

Summary of changes:
 MANIFEST                         |  1 +
 ext/XS-APItest/APItest.xs        | 18 +++++++++++
 ext/XS-APItest/t/utf8_setup.pl   |  6 +++-
 ext/XS-APItest/t/utf8_to_bytes.t | 68 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 92 insertions(+), 1 deletion(-)
 create mode 100644 ext/XS-APItest/t/utf8_to_bytes.t

diff --git a/MANIFEST b/MANIFEST
index 96c8da5b5e..4a5c649e45 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -4394,6 +4394,7 @@ ext/XS-APItest/t/underscore_length.t      Test 
find_rundefsv()
 ext/XS-APItest/t/utf16_to_utf8.t       Test behaviour of 
utf16_to_utf8{,reversed}
 ext/XS-APItest/t/utf8.t                Tests for code in utf8.c
 ext/XS-APItest/t/utf8_setup.pl Tests for code in utf8.c
+ext/XS-APItest/t/utf8_to_bytes.t       Tests for code in utf8.c
 ext/XS-APItest/t/utf8_warn00.t Tests for code in utf8.c
 ext/XS-APItest/t/utf8_warn01.t Tests for code in utf8.c
 ext/XS-APItest/t/utf8_warn02.t Tests for code in utf8.c
diff --git a/ext/XS-APItest/APItest.xs b/ext/XS-APItest/APItest.xs
index 0be5d95310..5e67e7fa40 100644
--- a/ext/XS-APItest/APItest.xs
+++ b/ext/XS-APItest/APItest.xs
@@ -1379,6 +1379,24 @@ bytes_cmp_utf8(bytes, utf8)
     OUTPUT:
        RETVAL
 
+AV *
+test_utf8_to_bytes(bytes, lenp)
+        unsigned char * bytes
+        STRLEN lenp
+    PREINIT:
+        char * ret;
+    CODE:
+        RETVAL = newAV();
+        sv_2mortal((SV*)RETVAL);
+
+        ret = (char *) utf8_to_bytes(bytes, &lenp);
+        av_push(RETVAL, newSVpv(ret, 0));
+        av_push(RETVAL, newSViv(lenp));
+        av_push(RETVAL, newSVpv((const char *) bytes, 0));
+
+    OUTPUT:
+        RETVAL
+
 AV *
 test_utf8n_to_uvchr_msgs(s, len, flags)
         char *s
diff --git a/ext/XS-APItest/t/utf8_setup.pl b/ext/XS-APItest/t/utf8_setup.pl
index ec7a5ce3d1..231b4d9494 100644
--- a/ext/XS-APItest/t/utf8_setup.pl
+++ b/ext/XS-APItest/t/utf8_setup.pl
@@ -11,7 +11,11 @@ sub isASCII { ord "A" == 65 }
 sub display_bytes_no_quotes {
     use bytes;
     my $string = shift;
-    return join("", map { sprintf("\\x%02x", ord $_) } split "", $string)
+    return join("", map {
+                          ($_ =~ /[[:print:]]/)
+                          ? $_
+                          : sprintf("\\x%02x", ord $_)
+                        } split "", $string)
 }
 
 sub display_bytes {
diff --git a/ext/XS-APItest/t/utf8_to_bytes.t b/ext/XS-APItest/t/utf8_to_bytes.t
new file mode 100644
index 0000000000..4c03f842f5
--- /dev/null
+++ b/ext/XS-APItest/t/utf8_to_bytes.t
@@ -0,0 +1,68 @@
+#!perl -w
+
+# This is a base file to be used by various .t's in its directory
+# It tests various malformed UTF-8 sequences and some code points that are
+# "problematic", and verifies that the correct warnings/flags etc are
+# generated when using them.  For the code points, it also takes the UTF-8 and
+# perturbs it to be malformed in various ways, and tests that this gets
+# appropriately detected.
+
+use strict;
+use Test::More;
+
+BEGIN {
+    require './t/utf8_setup.pl';
+    use_ok('XS::APItest');
+};
+
+$|=1;
+
+use Data::Dumper;
+
+my @well_formed = (
+            "\xE1",
+            "The quick brown fox jumped over the lazy dog",
+            "Ces systèmes de codage sont souvent incompatibles entre eux.  
Ainsi, deux systèmes peuvent utiliser le même nombre pour deux caractères 
différents ou utiliser différents nombres pour le même caractère.",
+            "Kelimelerin m\xC3\xAAme caract\xC3\xA8re ve yaz\xC3\xB1abc",
+);
+
+my @malformed = (
+            "Kelimelerin m\xC3\xAAme caract\xC3\xA8re ve yaz\xC4\xB1abc",
+            "Kelimelerin m\xC3\xAAme caract\xC3\xA8re ve 
yaz\xC4\xB1\xC3\xA8abc",
+            "Kelimelerin m\xC3\xAAme caract\xC3re ve yazi\xC3\xA8abc",
+            "Kelimelerin m\xC3\xAAme caract\xA8 ve yazi\xC3\xA8abc",
+            "Kelimelerin m\xC3\xAAme caract\xC3\xA8\xC3re ve yazi\xC3\xA8abc",
+);
+
+for my $test (@well_formed) {
+    my $utf8 = $test;
+    utf8::upgrade($utf8);
+    my $utf8_length;
+    my $byte_length = length $test;
+
+    {
+        use bytes;
+        $utf8_length = length $utf8;
+    }
+
+    my $ret_ref = test_utf8_to_bytes($utf8, $utf8_length);
+
+    is ($ret_ref->[0], $test, "Successfully downgraded "
+                            . display_bytes($utf8));
+    is ($ret_ref->[1], $byte_length, "... And returned correct length("
+                                     . $byte_length . ")");
+}
+
+for my $test (@malformed) {
+    my $utf8 = $test;
+    my $utf8_length = length $test;
+
+    my $ret_ref = test_utf8_to_bytes($utf8, $utf8_length);
+
+    ok (! defined $ret_ref->[0], "Returned undef for malformed "
+                                . display_bytes($utf8));
+    is ($ret_ref->[1], -1, "... And returned length -1");
+    is ($ret_ref->[2], $utf8, "... And left the input unchanged");
+}
+
+done_testing();

-- 
Perl5 Master Repository

Reply via email to