Change 34292 by [EMAIL PROTECTED] on 2008/09/06 07:17:39
Integrate:
[ 34268]
Integrate:
[ 34125]
Simplify code in pp_formline, removing a goto and localising and
consting a variable.
[ 34129]
Regression tests for, and fix quite a lot of bugs in, cases FF_LINESNGL
and FF_LINEGLOB in pp_formline. (While investigating RT #55668)
Sometimes I wonder if this is actually fun.
[ 34135]
Format tests for @* too.
[ 34270]
Integrate:
[ 34139]
For cases FF_LINESNGL and FF_LINEGLOB in pp_formline, take great care
to call get magic exactly once. This doesn't just avoid logical errors
with tied variables, it actually avoids panics (or worse) because a
pointer is retained to the string returned by the (first) call to
SvPV_const() for a future sv_chop(), and any future call to get magic
can invalidate the buffer that that pointer points to.
Also this removes the original crazy code that would set then reset the
length of a scalar, so as to only copy an initial portion of it, and
also copy the entire scalar including trailing newline (which might
require allocating more memory), only to immediately remove the newline
from the copy by reducing the length by one.
[ 34140]
Make format items @* and ^* work with references (safely). Note no-one
said anything about sanely.
Affected files ...
... //depot/maint-5.8/perl/pp_ctl.c#190 integrate
... //depot/maint-5.8/perl/t/op/write.t#15 integrate
Differences ...
==== //depot/maint-5.8/perl/pp_ctl.c#190 (text) ====
Index: perl/pp_ctl.c
--- perl/pp_ctl.c#189~33926~ 2008-05-25 14:12:26.000000000 -0700
+++ perl/pp_ctl.c 2008-09-06 00:17:39.000000000 -0700
@@ -356,7 +356,6 @@
SV * nsv = NULL;
OP * parseres = NULL;
const char *fmt;
- bool oneline;
if (!SvMAGICAL(tmpForm) || !SvCOMPILED(tmpForm)) {
if (SvREADONLY(tmpForm)) {
@@ -422,6 +421,7 @@
*t = '\0';
sv_catpvn_utf8_upgrade(PL_formtarget, f, arg, nsv);
t = SvEND(PL_formtarget);
+ f += arg;
break;
}
if (!targ_is_utf8 && DO_UTF8(tmpForm)) {
@@ -682,51 +682,76 @@
case FF_LINESNGL:
chopspace = 0;
- oneline = TRUE;
- goto ff_line;
case FF_LINEGLOB:
- oneline = FALSE;
- ff_line:
{
+ const bool oneline = fpc[-1] == FF_LINESNGL;
const char *s = item = SvPV_const(sv, len);
+ item_is_utf8 = DO_UTF8(sv);
itemsize = len;
- if ((item_is_utf8 = DO_UTF8(sv)))
- itemsize = sv_len_utf8(sv);
if (itemsize) {
- bool chopped = FALSE;
+ STRLEN to_copy = itemsize;
const char *const send = s + len;
+ const U8 *source = (const U8 *) s;
+ U8 *tmp = NULL;
+
gotsome = TRUE;
chophere = s + itemsize;
while (s < send) {
if (*s++ == '\n') {
if (oneline) {
- chopped = TRUE;
+ to_copy = s - SvPVX_const(sv) - 1;
chophere = s;
break;
} else {
if (s == send) {
itemsize--;
- chopped = TRUE;
+ to_copy--;
} else
lines++;
}
}
}
- SvCUR_set(PL_formtarget, t - SvPVX_const(PL_formtarget));
- if (targ_is_utf8)
- SvUTF8_on(PL_formtarget);
- if (oneline) {
- SvCUR_set(sv, chophere - item);
- sv_catsv(PL_formtarget, sv);
- SvCUR_set(sv, itemsize);
- } else
- sv_catsv(PL_formtarget, sv);
- if (chopped)
- SvCUR_set(PL_formtarget, SvCUR(PL_formtarget) - 1);
- SvGROW(PL_formtarget, SvCUR(PL_formtarget) + fudge + 1);
+ if (targ_is_utf8 && !item_is_utf8) {
+ source = tmp = bytes_to_utf8(source, &to_copy);
+ SvCUR_set(PL_formtarget,
+ t - SvPVX_const(PL_formtarget));
+ } else {
+ if (item_is_utf8 && !targ_is_utf8) {
+ /* Upgrade targ to UTF8, and then we reduce it to
+ a problem we have a simple solution for. */
+ SvCUR_set(PL_formtarget,
+ t - SvPVX_const(PL_formtarget));
+ targ_is_utf8 = TRUE;
+ /* Don't need get magic. */
+ sv_utf8_upgrade_flags(PL_formtarget, 0);
+ } else {
+ SvCUR_set(PL_formtarget,
+ t - SvPVX_const(PL_formtarget));
+ }
+
+ /* Easy. They agree. */
+ assert (item_is_utf8 == targ_is_utf8);
+ }
+ SvGROW(PL_formtarget,
+ SvCUR(PL_formtarget) + to_copy + fudge + 1);
t = SvPVX(PL_formtarget) + SvCUR(PL_formtarget);
- if (item_is_utf8)
- targ_is_utf8 = TRUE;
+
+ Copy(source, t, to_copy, char);
+ t += to_copy;
+ SvCUR_set(PL_formtarget, SvCUR(PL_formtarget) + to_copy);
+ if (item_is_utf8) {
+ if (SvGMAGICAL(sv)) {
+ /* Mustn't call sv_pos_b2u() as it does a second
+ mg_get(). Is this a bug? Do we need a _flags()
+ variant? */
+ itemsize = utf8_length(source, source + itemsize);
+ } else {
+ sv_pos_b2u(sv, &itemsize);
+ }
+ assert(!tmp);
+ } else if (tmp) {
+ Safefree(tmp);
+ }
}
break;
}
==== //depot/maint-5.8/perl/t/op/write.t#15 (xtext) ====
Index: perl/t/op/write.t
--- perl/t/op/write.t#14~34291~ 2008-09-05 23:54:40.000000000 -0700
+++ perl/t/op/write.t 2008-09-06 00:17:39.000000000 -0700
@@ -61,7 +61,7 @@
my $bas_tests = 20;
# number of tests in section 3
-my $bug_tests = 4;
+my $bug_tests = 4 + 3 * 3 * 5 * 2 * 3 + 2;
# number of tests in section 4
my $hmb_tests = 35;
@@ -511,6 +511,72 @@
like $@, qr/Undefined format/, 'no such format';
}
+{
+ package Count;
+
+ sub TIESCALAR {
+ my $class = shift;
+ bless [shift, 0, 0], $class;
+ }
+
+ sub FETCH {
+ my $self = shift;
+ ++$self->[1];
+ $self->[0];
+ }
+
+ sub STORE {
+ my $self = shift;
+ ++$self->[2];
+ $self->[0] = shift;
+ }
+}
+
+{
+ my ($pound_utf8, $pm_utf8) = map { my $a = "$_\x{100}"; chop $a; $a}
+ my ($pound, $pm) = ("\xA3", "\xB1");
+
+ foreach my $first ('N', $pound, $pound_utf8) {
+ foreach my $base ('N', $pm, $pm_utf8) {
+ foreach my $second ($base, "$base\n", "$base\nMoo!", "$base\nMoo!\n",
+ "$base\nMoo!\n",) {
+ foreach (['^*', qr/(.+)/], ['@*', qr/(.*?)$/s]) {
+ my ($format, $re) = @$_;
+ foreach my $class ('', 'Count') {
+ my $name = "$first, $second $format $class";
+ $name =~ s/\n/\\n/g;
+
+ $first =~ /(.+)/ or die $first;
+ my $expect = "1${1}2";
+ $second =~ $re or die $second;
+ $expect .= " 3${1}4";
+
+ if ($class) {
+ my $copy1 = $first;
+ my $copy2;
+ tie $copy2, $class, $second;
+ is swrite("1^*2 3${format}4", $copy1, $copy2), $expect, $name;
+ my $obj = tied $copy2;
+ is $obj->[1], 1, 'value read exactly once';
+ } else {
+ my ($copy1, $copy2) = ($first, $second);
+ is swrite("1^*2 3${format}4", $copy1, $copy2), $expect, $name;
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+{
+ # This will fail an assertion in 5.10.0 built with -DDEBUGGING (because
+ # pp_formline attempts to set SvCUR() on an SVt_RV). I suspect that it will
+ # be doing something similarly out of bounds on everything from 5.000
+ my $ref = [];
+ is swrite('>^*<', $ref), ">$ref<";
+ is swrite('>@*<', $ref), ">$ref<";
+}
format EMPTY =
.
End of Patch.