Change 11990 by jhi@alpha on 2001/09/10 23:59:25
Subject: [PATCH] Re: the remaining bugs in \x escapes (was Re: [PATCH] oct and
hex in glorious 64 bit (with less bugs) (was Re: hex and oct again (was Re: FreeBSD
MD5 crypt? Re: crypt/hex/oct and Unicode?)))
From: Nicholas Clark <[EMAIL PROTECTED]>
Date: Tue, 11 Sep 2001 00:00:31 +0100
Message-ID: <[EMAIL PROTECTED]>
Affected files ...
... //depot/perl/MANIFEST#555 edit
... //depot/perl/numeric.c#14 edit
... //depot/perl/perl.h#391 edit
... //depot/perl/regcomp.c#237 edit
... //depot/perl/t/op/pat.t#118 edit
... //depot/perl/t/op/qq.t#1 add
... //depot/perl/toke.c#384 edit
Differences ...
==== //depot/perl/MANIFEST#555 (text) ====
Index: perl/MANIFEST
--- perl/MANIFEST.~1~ Mon Sep 10 18:15:05 2001
+++ perl/MANIFEST Mon Sep 10 18:15:05 2001
@@ -2096,6 +2096,7 @@
t/op/pos.t See if pos works
t/op/push.t See if push and pop work
t/op/pwent.t See if getpw*() functions work
+t/op/qq.t See if qq works
t/op/quotemeta.t See if quotemeta works
t/op/rand.t See if rand works
t/op/range.t See if .. works
==== //depot/perl/numeric.c#14 (text) ====
Index: perl/numeric.c
--- perl/numeric.c.~1~ Mon Sep 10 18:15:05 2001
+++ perl/numeric.c Mon Sep 10 18:15:05 2001
@@ -122,8 +122,9 @@
and writes the value to I<*result> (or the value is discarded if I<result>
is NULL).
-The hex number may optinally be prefixed with "0b" or "b". If
-C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> on entry then the binary
+The hex number may optinally be prefixed with "0b" or "b" unless
+C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
+C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the binary
number may use '_' characters to separate digits.
=cut
@@ -140,18 +141,20 @@
bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
bool overflowed = FALSE;
- /* strip off leading b or 0b.
- for compatibility silently suffer "b" and "0b" as valid binary numbers.
- */
- if (len >= 1) {
- if (s[0] == 'b') {
- s++;
- len--;
- }
- else if (len >= 2 && s[0] == '0' && s[1] == 'b') {
- s+=2;
- len-=2;
- }
+ if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
+ /* strip off leading b or 0b.
+ for compatibility silently suffer "b" and "0b" as valid binary
+ numbers. */
+ if (len >= 1) {
+ if (s[0] == 'b') {
+ s++;
+ len--;
+ }
+ else if (len >= 2 && s[0] == '0' && s[1] == 'b') {
+ s+=2;
+ len-=2;
+ }
+ }
}
for (; len-- && *s; s++) {
@@ -233,8 +236,9 @@
and writes the value to I<*result> (or the value is discarded if I<result>
is NULL).
-The hex number may optinally be prefixed with "0x" or "x". If
-C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> on entry then the hex
+The hex number may optinally be prefixed with "0x" or "x" unless
+C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
+C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the hex
number may use '_' characters to separate digits.
=cut
@@ -252,17 +256,20 @@
bool overflowed = FALSE;
const char *hexdigit;
- /* strip off leading x or 0x.
- for compatibility silently suffer "x" and "0x" as valid hex numbers. */
- if (len >= 1) {
- if (s[0] == 'x') {
- s++;
- len--;
- }
- else if (len >= 2 && s[0] == '0' && s[1] == 'x') {
- s+=2;
- len-=2;
- }
+ if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
+ /* strip off leading x or 0x.
+ for compatibility silently suffer "x" and "0x" as valid hex numbers.
+ */
+ if (len >= 1) {
+ if (s[0] == 'x') {
+ s++;
+ len--;
+ }
+ else if (len >= 2 && s[0] == '0' && s[1] == 'x') {
+ s+=2;
+ len-=2;
+ }
+ }
}
for (; len-- && *s; s++) {
==== //depot/perl/perl.h#391 (text) ====
Index: perl/perl.h
--- perl/perl.h.~1~ Mon Sep 10 18:15:05 2001
+++ perl/perl.h Mon Sep 10 18:15:05 2001
@@ -3860,6 +3860,7 @@
/* Input flags: */
#define PERL_SCAN_ALLOW_UNDERSCORES 0x01 /* grok_??? accept _ in numbers */
+#define PERL_SCAN_DISALLOW_PREFIX 0x02 /* grok_??? reject 0x in hex etc */
/* Output flags: */
#define PERL_SCAN_GREATER_THAN_UV_MAX 0x02 /* should this merge with above? */
==== //depot/perl/regcomp.c#237 (text) ====
Index: perl/regcomp.c
--- perl/regcomp.c.~1~ Mon Sep 10 18:15:05 2001
+++ perl/regcomp.c Mon Sep 10 18:15:05 2001
@@ -3039,7 +3039,8 @@
vFAIL("Missing right brace on \\x{}");
}
else {
- I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
+ I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
+ | PERL_SCAN_DISALLOW_PREFIX;
numlen = e - p - 1;
ender = grok_hex(p + 1, &numlen, &flags, NULL);
if (ender > 0xff)
@@ -3053,7 +3054,7 @@
}
}
else {
- I32 flags = 0;
+ I32 flags = PERL_SCAN_DISALLOW_PREFIX;
numlen = 2;
ender = grok_hex(p, &numlen, &flags, NULL);
p += numlen;
@@ -3449,7 +3450,8 @@
case 'a': value = ASCII_TO_NATIVE('\007');break;
case 'x':
if (*RExC_parse == '{') {
- I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
+ I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
+ | PERL_SCAN_DISALLOW_PREFIX;
e = strchr(RExC_parse++, '}');
if (!e)
vFAIL("Missing right brace on \\x{}");
@@ -3459,7 +3461,7 @@
RExC_parse = e + 1;
}
else {
- I32 flags = 0;
+ I32 flags = PERL_SCAN_DISALLOW_PREFIX;
numlen = 2;
value = grok_hex(RExC_parse, &numlen, &flags, NULL);
RExC_parse += numlen;
==== //depot/perl/t/op/pat.t#118 (xtext) ====
Index: perl/t/op/pat.t
--- perl/t/op/pat.t.~1~ Mon Sep 10 18:15:05 2001
+++ perl/t/op/pat.t Mon Sep 10 18:15:05 2001
@@ -6,7 +6,7 @@
$| = 1;
-print "1..686\n";
+print "1..714\n";
BEGIN {
chdir 't' if -d 't';
@@ -2008,3 +2008,113 @@
print "not " unless length($y) == 2 && $y eq $x;
print "ok 686\n";
}
+
+my $test = 687;
+
+# Force scalar context on the patern match
+sub ok ($$) {
+ my($ok, $name) = @_;
+
+ printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name;
+
+ printf "# Failed test at line %d\n", (caller)[2] unless $ok;
+
+ $test++;
+ return $ok;
+}
+
+{
+ # Check that \x## works. 5.6.1 and 5.005_03 fail some of these.
+ $x = "\x4e" . "E";
+ ok ($x =~ /^\x4EE$/, "Check only 2 bytes of hex are matched.");
+
+ $x = "\x4e" . "i";
+ ok ($x =~ /^\x4Ei$/, "Check that invalid hex digit stops it (2)");
+
+ $x = "\x4" . "j";
+ ok ($x =~ /^\x4j$/, "Check that invalid hex digit stops it (1)");
+
+ $x = "\x0" . "k";
+ ok ($x =~ /^\xk$/, "Check that invalid hex digit stops it (0)");
+
+ $x = "\x0" . "x";
+ ok ($x =~ /^\xx$/, "\\xx isn't to be treated as \\0");
+
+ $x = "\x0" . "xa";
+ ok ($x =~ /^\xxa$/, "\\xxa isn't to be treated as \\xa");
+
+ $x = "\x9" . "_b";
+ ok ($x =~ /^\x9_b$/, "\\x9_b isn't to be treated as \\x9b");
+
+ print "# and now again in [] ranges\n";
+
+ $x = "\x4e" . "E";
+ ok ($x =~ /^[\x4EE]{2}$/, "Check only 2 bytes of hex are matched.");
+
+ $x = "\x4e" . "i";
+ ok ($x =~ /^[\x4Ei]{2}$/, "Check that invalid hex digit stops it (2)");
+
+ $x = "\x4" . "j";
+ ok ($x =~ /^[\x4j]{2}$/, "Check that invalid hex digit stops it (1)");
+
+ $x = "\x0" . "k";
+ ok ($x =~ /^[\xk]{2}$/, "Check that invalid hex digit stops it (0)");
+
+ $x = "\x0" . "x";
+ ok ($x =~ /^[\xx]{2}$/, "\\xx isn't to be treated as \\0");
+
+ $x = "\x0" . "xa";
+ ok ($x =~ /^[\xxa]{3}$/, "\\xxa isn't to be treated as \\xa");
+
+ $x = "\x9" . "_b";
+ ok ($x =~ /^[\x9_b]{3}$/, "\\x9_b isn't to be treated as \\x9b");
+
+}
+
+{
+ # Check that \x{##} works. 5.6.1 fails quite a few of these.
+
+ $x = "\x9b";
+ ok ($x =~ /^\x{9_b}$/, "\\x{9_b} is to be treated as \\x9b");
+
+ $x = "\x9b" . "y";
+ ok ($x =~ /^\x{9_b}y$/, "\\x{9_b} is to be treated as \\x9b (again)");
+
+ $x = "\x9b" . "y";
+ ok ($x =~ /^\x{9b_}y$/, "\\x{9b_} is to be treated as \\x9b");
+
+ $x = "\x9b" . "y";
+ ok ($x =~ /^\x{9_bq}y$/, "\\x{9_bc} is to be treated as \\x9b");
+
+ $x = "\x0" . "y";
+ ok ($x =~ /^\x{x9b}y$/, "\\x{x9b} is to be treated as \\x0");
+
+ $x = "\x0" . "y";
+ ok ($x =~ /^\x{0x9b}y$/, "\\x{0x9b} is to be treated as \\x0");
+
+ $x = "\x9b" . "y";
+ ok ($x =~ /^\x{09b}y$/, "\\x{09b} is to be treated as \\x9b");
+
+ print "# and now again in [] ranges\n";
+
+ $x = "\x9b";
+ ok ($x =~ /^[\x{9_b}]$/, "\\x{9_b} is to be treated as \\x9b");
+
+ $x = "\x9b" . "y";
+ ok ($x =~ /^[\x{9_b}y]{2}$/, "\\x{9_b} is to be treated as \\x9b (again)");
+
+ $x = "\x9b" . "y";
+ ok ($x =~ /^[\x{9b_}y]{2}$/, "\\x{9b_} is to be treated as \\x9b");
+
+ $x = "\x9b" . "y";
+ ok ($x =~ /^[\x{9_bq}y]{2}$/, "\\x{9_bc} is to be treated as \\x9b");
+
+ $x = "\x0" . "y";
+ ok ($x =~ /^[\x{x9b}y]{2}$/, "\\x{x9b} is to be treated as \\x0");
+
+ $x = "\x0" . "y";
+ ok ($x =~ /^[\x{0x9b}y]{2}$/, "\\x{0x9b} is to be treated as \\x0");
+
+ $x = "\x9b" . "y";
+ ok ($x =~ /^[\x{09b}y]{2}$/, "\\x{09b} is to be treated as \\x9b");
+}
==== //depot/perl/toke.c#384 (text) ====
Index: perl/toke.c
--- perl/toke.c.~1~ Mon Sep 10 18:15:05 2001
+++ perl/toke.c Mon Sep 10 18:15:05 2001
@@ -1452,7 +1452,8 @@
++s;
if (*s == '{') {
char* e = strchr(s, '}');
- I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
+ I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
+ PERL_SCAN_DISALLOW_PREFIX;
STRLEN len;
++s;
@@ -1467,7 +1468,7 @@
else {
{
STRLEN len = 2;
- I32 flags = 0;
+ I32 flags = PERL_SCAN_DISALLOW_PREFIX;
uv = grok_hex(s, &len, &flags, NULL);
s += len;
}
==== //depot/perl/t/op/qq.t#1 (text) ====
Index: perl/t/op/qq.t
--- perl/t/op/qq.t.~1~ Mon Sep 10 18:15:05 2001
+++ perl/t/op/qq.t Mon Sep 10 18:15:05 2001
@@ -0,0 +1,63 @@
+#!./perl
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = '../lib';
+}
+
+print q(1..21
+);
+
+# This is() function is written to avoid ""
+my $test = 1;
+sub is {
+ my($left, $right) = @_;
+
+ if ($left eq $right) {
+ printf 'ok %d
+', $test++;
+ return 1;
+ }
+ foreach ($left, $right) {
+ # Comment out these regexps to map non-printables to ord if the perl under
+ # test is so broken that it's not helping
+ s/([^-+A-Za-z_0-9])/sprintf q{'.chr(%d).'}, ord $1/ge;
+ $_ = sprintf q('%s'), $_;
+ s/^''\.//;
+ s/\.''$//;
+ }
+ printf q(not ok %d - got %s expected %s
+), $test++, $left, $right;
+
+ printf q(# Failed test at line %d
+), (caller)[2];
+
+ return 0;
+}
+
+is ("\x53", chr 83);
+is ("\x4EE", chr (78) . 'E');
+is ("\x4i", chr (4) . 'i'); # This will warn
+is ("\xh", chr (0) . 'h'); # This will warn
+is ("\xx", chr (0) . 'x'); # This will warn
+is ("\xx9", chr (0) . 'x9'); # This will warn. \x9 is tab in EBCDIC too?
+is ("\x9_E", chr (9) . '_E'); # This will warn
+
+is ("\x{4E}", chr 78);
+is ("\x{6_9}", chr 105);
+is ("\x{_6_3}", chr 99);
+is ("\x{_6B}", chr 107);
+
+is ("\x{9__0}", chr 9); # multiple underscores not allowed.
+is ("\x{77_}", chr 119); # trailing underscore warns.
+is ("\x{6FQ}z", chr (111) . 'z');
+
+is ("\x{0x4E}", chr 0);
+is ("\x{x4E}", chr 0);
+
+is ("\x{0065}", chr 101);
+is ("\x{000000000000000000000000000000000000000000000000000000000000000072}",
+ chr 114);
+is ("\x{0_06_5}", chr 101);
+is ("\x{1234}", chr 4660);
+is ("\x{98765432}", chr 2557891634);
End of Patch.