Change 18290 by jhi@lyta on 2002/12/11 18:56:01
Fix [perl #15763].
Affected files ...
... //depot/maint-5.8/perl/regexec.c#4 edit
... //depot/maint-5.8/perl/t/op/pat.t#6 edit
Differences ...
==== //depot/maint-5.8/perl/regexec.c#4 (text) ====
Index: perl/regexec.c
--- perl/regexec.c#3~18095~ Mon Nov 4 09:45:47 2002
+++ perl/regexec.c Wed Dec 11 10:56:01 2002
@@ -1882,9 +1882,12 @@
goto phooey;
}
else if ((c = prog->regstclass)) {
- if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
+ if (minlen) {
+ I32 op = (U8)OP(prog->regstclass);
/* don't bother with what can't match */
- strend = HOPc(strend, -(minlen - 1));
+ if (PL_regkind[op] != EXACT && op != CANY)
+ strend = HOPc(strend, -(minlen - 1));
+ }
DEBUG_r({
SV *prop = sv_newmortal();
char *s0;
@@ -2269,17 +2272,17 @@
regprop(prop, scan);
{
char *s0 =
- do_utf8 ?
+ do_utf8 && OP(scan) != CANY ?
pv_uni_display(dsv0, (U8*)(locinput - pref_len),
pref0_len, 60, UNI_DISPLAY_REGEX) :
locinput - pref_len;
int len0 = do_utf8 ? strlen(s0) : pref0_len;
- char *s1 = do_utf8 ?
+ char *s1 = do_utf8 && OP(scan) != CANY ?
pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
locinput - pref_len + pref0_len;
int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
- char *s2 = do_utf8 ?
+ char *s2 = do_utf8 && OP(scan) != CANY ?
pv_uni_display(dsv2, (U8*)locinput,
PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
locinput;
==== //depot/maint-5.8/perl/t/op/pat.t#6 (xtext) ====
Index: perl/t/op/pat.t
--- perl/t/op/pat.t#5~18276~ Mon Dec 9 15:49:14 2002
+++ perl/t/op/pat.t Wed Dec 11 10:56:01 2002
@@ -6,7 +6,7 @@
$| = 1;
-print "1..942\n";
+print "1..968\n";
BEGIN {
chdir 't' if -d 't';
@@ -3006,4 +3006,53 @@
++$test;
}
-# last test 942
+{
+ print "# [perl #15763]\n";
+
+ $a = "x\x{100}";
+ chop $a; # but leaves the UTF-8 flag
+ $a .= "y"; # 1 byte before "y"
+
+ ok($a =~ /^\C/, 'match one \C on 1-byte UTF-8');
+ ok($a =~ /^\C{1}/, 'match \C{1}');
+
+ ok($a =~ /^\Cy/, 'match \Cy');
+ ok($a =~ /^\C{1}y/, 'match \C{1}y');
+
+ $a = "\x{100}y"; # 2 bytes before "y"
+
+ ok($a =~ /^\C/, 'match one \C on 2-byte UTF-8');
+ ok($a =~ /^\C{1}/, 'match \C{1}');
+ ok($a =~ /^\C\C/, 'match two \C');
+ ok($a =~ /^\C{2}/, 'match \C{2}');
+
+ ok($a =~ /^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte');
+ ok($a =~ /^\C{3}/, 'match \C{3}');
+
+ ok($a =~ /^\C\Cy/, 'match two \C');
+ ok($a =~ /^\C{2}y/, 'match \C{2}');
+
+ ok($a !~ /^\C\C\Cy/, 'not match three \Cy');
+ ok($a !~ /^\C{2}\Cy/, 'not match \C{3}y');
+
+ $a = "\x{1000}y"; # 3 bytes before "y"
+
+ ok($a =~ /^\C/, 'match one \C on three-byte UTF-8');
+ ok($a =~ /^\C{1}/, 'match \C{1}');
+ ok($a =~ /^\C\C/, 'match two \C');
+ ok($a =~ /^\C{2}/, 'match \C{2}');
+ ok($a =~ /^\C\C\C/, 'match three \C');
+ ok($a =~ /^\C{3}/, 'match \C{3}');
+
+ ok($a =~ /^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte');
+ ok($a =~ /^\C{4}/, 'match \C{4}');
+
+ ok($a =~ /^\C\C\Cy/, 'match three \Cy');
+ ok($a =~ /^\C{3}y/, 'match \C{3}y');
+
+ ok($a !~ /^\C\C\C\C\y/, 'not match four \Cy');
+ ok($a !~ /^\C{4}y/, 'not match \C{4}y');
+}
+
+# last test 968
+
End of Patch.