Change 11990 by jhi@alpha on 2001/09/10 23:59:25

        Subject: [PATCH] Re: the remaining bugs in \x escapes (was Re: [PATCH] oct and 
hex in glorious 64 bit (with less bugs) (was Re: hex and oct again (was Re: FreeBSD 
MD5 crypt? Re: crypt/hex/oct and Unicode?)))                  
        From: Nicholas Clark <[EMAIL PROTECTED]>
        Date: Tue, 11 Sep 2001 00:00:31 +0100
        Message-ID: <[EMAIL PROTECTED]>

Affected files ...

... //depot/perl/MANIFEST#555 edit
... //depot/perl/numeric.c#14 edit
... //depot/perl/perl.h#391 edit
... //depot/perl/regcomp.c#237 edit
... //depot/perl/t/op/pat.t#118 edit
... //depot/perl/t/op/qq.t#1 add
... //depot/perl/toke.c#384 edit

Differences ...

==== //depot/perl/MANIFEST#555 (text) ====
Index: perl/MANIFEST
--- perl/MANIFEST.~1~   Mon Sep 10 18:15:05 2001
+++ perl/MANIFEST       Mon Sep 10 18:15:05 2001
@@ -2096,6 +2096,7 @@
 t/op/pos.t                     See if pos works
 t/op/push.t                    See if push and pop work
 t/op/pwent.t                   See if getpw*() functions work
+t/op/qq.t                      See if qq works
 t/op/quotemeta.t               See if quotemeta works
 t/op/rand.t                    See if rand works
 t/op/range.t                   See if .. works

==== //depot/perl/numeric.c#14 (text) ====
Index: perl/numeric.c
--- perl/numeric.c.~1~  Mon Sep 10 18:15:05 2001
+++ perl/numeric.c      Mon Sep 10 18:15:05 2001
@@ -122,8 +122,9 @@
 and writes the value to I<*result> (or the value is discarded if I<result>
 is NULL).
 
-The hex number may optinally be prefixed with "0b" or "b". If
-C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> on entry then the binary
+The hex number may optinally be prefixed with "0b" or "b" unless
+C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
+C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the binary
 number may use '_' characters to separate digits.
 
 =cut
@@ -140,18 +141,20 @@
     bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
     bool overflowed = FALSE;
 
-    /* strip off leading b or 0b.
-       for compatibility silently suffer "b" and "0b" as valid binary numbers.
-    */
-    if (len >= 1) {
-       if (s[0] == 'b') {
-           s++;
-           len--;
-       }
-       else if (len >= 2 && s[0] == '0' && s[1] == 'b') {
-           s+=2;
-           len-=2;
-       }
+    if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
+        /* strip off leading b or 0b.
+           for compatibility silently suffer "b" and "0b" as valid binary
+           numbers. */
+        if (len >= 1) {
+            if (s[0] == 'b') {
+                s++;
+                len--;
+            }
+            else if (len >= 2 && s[0] == '0' && s[1] == 'b') {
+                s+=2;
+                len-=2;
+            }
+        }
     }
 
     for (; len-- && *s; s++) {
@@ -233,8 +236,9 @@
 and writes the value to I<*result> (or the value is discarded if I<result>
 is NULL).
 
-The hex number may optinally be prefixed with "0x" or "x". If
-C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> on entry then the hex
+The hex number may optinally be prefixed with "0x" or "x" unless
+C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
+C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the hex
 number may use '_' characters to separate digits.
 
 =cut
@@ -252,17 +256,20 @@
     bool overflowed = FALSE;
     const char *hexdigit;
 
-    /* strip off leading x or 0x.
-       for compatibility silently suffer "x" and "0x" as valid hex numbers.  */
-    if (len >= 1) {
-       if (s[0] == 'x') {
-           s++;
-           len--;
-       }
-       else if (len >= 2 && s[0] == '0' && s[1] == 'x') {
-           s+=2;
-           len-=2;
-       }
+    if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
+        /* strip off leading x or 0x.
+           for compatibility silently suffer "x" and "0x" as valid hex numbers.
+        */
+        if (len >= 1) {
+            if (s[0] == 'x') {
+                s++;
+                len--;
+            }
+            else if (len >= 2 && s[0] == '0' && s[1] == 'x') {
+                s+=2;
+                len-=2;
+            }
+        }
     }
 
     for (; len-- && *s; s++) {

==== //depot/perl/perl.h#391 (text) ====
Index: perl/perl.h
--- perl/perl.h.~1~     Mon Sep 10 18:15:05 2001
+++ perl/perl.h Mon Sep 10 18:15:05 2001
@@ -3860,6 +3860,7 @@
 
 /* Input flags: */
 #define PERL_SCAN_ALLOW_UNDERSCORES   0x01 /* grok_??? accept _ in numbers */
+#define PERL_SCAN_DISALLOW_PREFIX     0x02 /* grok_??? reject 0x in hex etc */
 /* Output flags: */
 #define PERL_SCAN_GREATER_THAN_UV_MAX 0x02 /* should this merge with above? */
 

==== //depot/perl/regcomp.c#237 (text) ====
Index: perl/regcomp.c
--- perl/regcomp.c.~1~  Mon Sep 10 18:15:05 2001
+++ perl/regcomp.c      Mon Sep 10 18:15:05 2001
@@ -3039,7 +3039,8 @@
                                vFAIL("Missing right brace on \\x{}");
                            }
                            else {
-                                I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
+                                I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
+                                    | PERL_SCAN_DISALLOW_PREFIX;
                                 numlen = e - p - 1;
                                ender = grok_hex(p + 1, &numlen, &flags, NULL);
                                if (ender > 0xff)
@@ -3053,7 +3054,7 @@
                            }
                        }
                        else {
-                            I32 flags = 0;
+                            I32 flags = PERL_SCAN_DISALLOW_PREFIX;
                            numlen = 2;
                            ender = grok_hex(p, &numlen, &flags, NULL);
                            p += numlen;
@@ -3449,7 +3450,8 @@
            case 'a':   value = ASCII_TO_NATIVE('\007');break;
            case 'x':
                if (*RExC_parse == '{') {
-                    I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
+                    I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
+                        | PERL_SCAN_DISALLOW_PREFIX;
                    e = strchr(RExC_parse++, '}');
                     if (!e)
                         vFAIL("Missing right brace on \\x{}");
@@ -3459,7 +3461,7 @@
                    RExC_parse = e + 1;
                }
                else {
-                    I32 flags = 0;
+                    I32 flags = PERL_SCAN_DISALLOW_PREFIX;
                    numlen = 2;
                    value = grok_hex(RExC_parse, &numlen, &flags, NULL);
                    RExC_parse += numlen;

==== //depot/perl/t/op/pat.t#118 (xtext) ====
Index: perl/t/op/pat.t
--- perl/t/op/pat.t.~1~ Mon Sep 10 18:15:05 2001
+++ perl/t/op/pat.t     Mon Sep 10 18:15:05 2001
@@ -6,7 +6,7 @@
 
 $| = 1;
 
-print "1..686\n";
+print "1..714\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -2008,3 +2008,113 @@
     print "not " unless length($y) == 2 && $y eq $x;
     print "ok 686\n";
 }
+
+my $test = 687;
+
+# Force scalar context on the patern match
+sub ok ($$) {
+    my($ok, $name) = @_;
+
+    printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name;
+
+    printf "# Failed test at line %d\n", (caller)[2] unless $ok;
+
+    $test++;
+    return $ok;
+}
+
+{
+    # Check that \x## works. 5.6.1 and 5.005_03 fail some of these.
+    $x = "\x4e" . "E";
+    ok ($x =~ /^\x4EE$/, "Check only 2 bytes of hex are matched.");
+
+    $x = "\x4e" . "i";
+    ok ($x =~ /^\x4Ei$/, "Check that invalid hex digit stops it (2)");
+
+    $x = "\x4" . "j";
+    ok ($x =~ /^\x4j$/,  "Check that invalid hex digit stops it (1)");
+
+    $x = "\x0" . "k";
+    ok ($x =~ /^\xk$/,   "Check that invalid hex digit stops it (0)");
+
+    $x = "\x0" . "x";
+    ok ($x =~ /^\xx$/, "\\xx isn't to be treated as \\0");
+
+    $x = "\x0" . "xa";
+    ok ($x =~ /^\xxa$/, "\\xxa isn't to be treated as \\xa");
+
+    $x = "\x9" . "_b";
+    ok ($x =~ /^\x9_b$/, "\\x9_b isn't to be treated as \\x9b");
+
+    print "# and now again in [] ranges\n";
+
+    $x = "\x4e" . "E";
+    ok ($x =~ /^[\x4EE]{2}$/, "Check only 2 bytes of hex are matched.");
+
+    $x = "\x4e" . "i";
+    ok ($x =~ /^[\x4Ei]{2}$/, "Check that invalid hex digit stops it (2)");
+
+    $x = "\x4" . "j";
+    ok ($x =~ /^[\x4j]{2}$/,  "Check that invalid hex digit stops it (1)");
+
+    $x = "\x0" . "k";
+    ok ($x =~ /^[\xk]{2}$/,   "Check that invalid hex digit stops it (0)");
+
+    $x = "\x0" . "x";
+    ok ($x =~ /^[\xx]{2}$/, "\\xx isn't to be treated as \\0");
+
+    $x = "\x0" . "xa";
+    ok ($x =~ /^[\xxa]{3}$/, "\\xxa isn't to be treated as \\xa");
+
+    $x = "\x9" . "_b";
+    ok ($x =~ /^[\x9_b]{3}$/, "\\x9_b isn't to be treated as \\x9b");
+
+}
+
+{
+    # Check that \x{##} works. 5.6.1 fails quite a few of these.
+
+    $x = "\x9b";
+    ok ($x =~ /^\x{9_b}$/, "\\x{9_b} is to be treated as \\x9b");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^\x{9_b}y$/, "\\x{9_b} is to be treated as \\x9b (again)");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^\x{9b_}y$/, "\\x{9b_} is to be treated as \\x9b");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^\x{9_bq}y$/, "\\x{9_bc} is to be treated as \\x9b");
+
+    $x = "\x0" . "y";
+    ok ($x =~ /^\x{x9b}y$/, "\\x{x9b} is to be treated as \\x0");
+
+    $x = "\x0" . "y";
+    ok ($x =~ /^\x{0x9b}y$/, "\\x{0x9b} is to be treated as \\x0");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^\x{09b}y$/, "\\x{09b} is to be treated as \\x9b");
+
+    print "# and now again in [] ranges\n";
+
+    $x = "\x9b";
+    ok ($x =~ /^[\x{9_b}]$/, "\\x{9_b} is to be treated as \\x9b");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^[\x{9_b}y]{2}$/, "\\x{9_b} is to be treated as \\x9b (again)");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^[\x{9b_}y]{2}$/, "\\x{9b_} is to be treated as \\x9b");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^[\x{9_bq}y]{2}$/, "\\x{9_bc} is to be treated as \\x9b");
+
+    $x = "\x0" . "y";
+    ok ($x =~ /^[\x{x9b}y]{2}$/, "\\x{x9b} is to be treated as \\x0");
+
+    $x = "\x0" . "y";
+    ok ($x =~ /^[\x{0x9b}y]{2}$/, "\\x{0x9b} is to be treated as \\x0");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^[\x{09b}y]{2}$/, "\\x{09b} is to be treated as \\x9b");
+}

==== //depot/perl/toke.c#384 (text) ====
Index: perl/toke.c
--- perl/toke.c.~1~     Mon Sep 10 18:15:05 2001
+++ perl/toke.c Mon Sep 10 18:15:05 2001
@@ -1452,7 +1452,8 @@
                ++s;
                if (*s == '{') {
                    char* e = strchr(s, '}');
-                    I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
+                    I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
+                      PERL_SCAN_DISALLOW_PREFIX;
                    STRLEN len;
 
                     ++s;
@@ -1467,7 +1468,7 @@
                else {
                    {
                        STRLEN len = 2;
-                        I32 flags = 0;
+                        I32 flags = PERL_SCAN_DISALLOW_PREFIX;
                        uv = grok_hex(s, &len, &flags, NULL);
                        s += len;
                    }

==== //depot/perl/t/op/qq.t#1 (text) ====
Index: perl/t/op/qq.t
--- perl/t/op/qq.t.~1~  Mon Sep 10 18:15:05 2001
+++ perl/t/op/qq.t      Mon Sep 10 18:15:05 2001
@@ -0,0 +1,63 @@
+#!./perl
+
+BEGIN {
+    chdir 't' if -d 't';
+    @INC = '../lib';
+}
+
+print q(1..21
+);
+
+# This is() function is written to avoid ""
+my $test = 1;
+sub is {
+    my($left, $right) = @_;
+
+    if ($left eq $right) {
+      printf 'ok %d
+', $test++;
+      return 1;
+    }
+    foreach ($left, $right) {
+      # Comment out these regexps to map non-printables to ord if the perl under
+      # test is so broken that it's not helping
+      s/([^-+A-Za-z_0-9])/sprintf q{'.chr(%d).'}, ord $1/ge;
+      $_ = sprintf q('%s'), $_;
+      s/^''\.//;
+      s/\.''$//;
+    }
+    printf q(not ok %d - got %s expected %s
+), $test++, $left, $right;
+
+    printf q(# Failed test at line %d
+), (caller)[2];
+
+    return 0;
+}
+
+is ("\x53", chr 83);
+is ("\x4EE", chr (78) . 'E');
+is ("\x4i", chr (4) . 'i');    # This will warn
+is ("\xh", chr (0) . 'h');     # This will warn
+is ("\xx", chr (0) . 'x');     # This will warn
+is ("\xx9", chr (0) . 'x9');   # This will warn. \x9 is tab in EBCDIC too?
+is ("\x9_E", chr (9) . '_E');  # This will warn
+
+is ("\x{4E}", chr 78);
+is ("\x{6_9}", chr 105);
+is ("\x{_6_3}", chr 99);
+is ("\x{_6B}", chr 107);
+
+is ("\x{9__0}", chr 9);                # multiple underscores not allowed.
+is ("\x{77_}", chr 119);       # trailing underscore warns.
+is ("\x{6FQ}z", chr (111) . 'z');
+
+is ("\x{0x4E}", chr 0);
+is ("\x{x4E}", chr 0);
+
+is ("\x{0065}", chr 101);
+is ("\x{000000000000000000000000000000000000000000000000000000000000000072}",
+    chr 114);
+is ("\x{0_06_5}", chr 101);
+is ("\x{1234}", chr 4660);
+is ("\x{98765432}", chr 2557891634);
End of Patch.

Reply via email to