In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/2780a6ede39cb2f190256ad43e1c61535103caf9?hp=49fb45ddc8d9f3f37c5080633e16ae291297ddc2>
- Log ----------------------------------------------------------------- commit 2780a6ede39cb2f190256ad43e1c61535103caf9 Merge: 49fb45d dbf00f6 Author: Tony Cook <[email protected]> Date: Wed Jul 17 11:09:21 2013 +1000 [perl #74798] useqq implementation for xs commit dbf00f6932923fb26983322f5049702fc937a399 Author: Tony Cook <[email protected]> Date: Wed Jul 10 14:54:20 2013 +1000 handle xs Useqq dumping of strings with an escape followed by a digit The original patch didn't handle a string like "\x001" correctly, encoding it as "\01" rather than "\0001". Added tests for this case and some possible corner cases M dist/Data-Dumper/Dumper.pm M dist/Data-Dumper/Dumper.xs M dist/Data-Dumper/t/dumper.t commit f1c459431a0f7a1dabba8b8a2b063a110f2f6284 Author: Tony Cook <[email protected]> Date: Wed Jul 10 14:19:17 2013 +1000 adjust indentation to match other DD code M dist/Data-Dumper/Dumper.xs commit 9baac1a3613bd641a847683d7877b3cfab3244bc Author: Slaven Rezic <[email protected]> Date: Wed Jul 10 14:18:18 2013 +1000 Data::Dumper: useqq implementation for xs Tests are mainly unchanged, just a "cheat" and a couple of TODOs were removed. M dist/Data-Dumper/Dumper.pm M dist/Data-Dumper/Dumper.xs M dist/Data-Dumper/t/dumper.t ----------------------------------------------------------------------- Summary of changes: dist/Data-Dumper/Dumper.pm | 3 +- dist/Data-Dumper/Dumper.xs | 106 +++++++++++++++++++++++++++++++++----------- dist/Data-Dumper/t/dumper.t | 51 ++++++++++++++------- 3 files changed, 116 insertions(+), 44 deletions(-) diff --git a/dist/Data-Dumper/Dumper.pm b/dist/Data-Dumper/Dumper.pm index 7c778dc..fca6ab1 100644 --- a/dist/Data-Dumper/Dumper.pm +++ b/dist/Data-Dumper/Dumper.pm @@ -10,7 +10,7 @@ package Data::Dumper; BEGIN { - $VERSION = '2.146'; # Don't forget to set version and release + $VERSION = '2.147'; # Don't forget to set version and release } # date in POD below! #$| = 1; @@ -221,7 +221,6 @@ sub DESTROY {} sub Dump { return &Dumpxs unless $Data::Dumper::Useperl || (ref($_[0]) && $_[0]->{useperl}) || - $Data::Dumper::Useqq || (ref($_[0]) && $_[0]->{useqq}) || $Data::Dumper::Deparse || (ref($_[0]) && $_[0]->{deparse}); return &Dumpperl; } diff --git a/dist/Data-Dumper/Dumper.xs b/dist/Data-Dumper/Dumper.xs index b74650a..0194a2c 100644 --- a/dist/Data-Dumper/Dumper.xs +++ b/dist/Data-Dumper/Dumper.xs @@ -18,7 +18,7 @@ static I32 num_q (const char *s, STRLEN slen); static I32 esc_q (char *dest, const char *src, STRLEN slen); -static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen); +static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq); static I32 needs_quote(const char *s, STRLEN len); static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n); static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, @@ -26,7 +26,7 @@ static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity, I32 deepcopy, I32 quotekeys, SV *bless, - I32 maxdepth, SV *sortkeys, int use_sparse_seen_hash); + I32 maxdepth, SV *sortkeys, int use_sparse_seen_hash, I32 useqq); #ifndef HvNAME_get #define HvNAME_get HvNAME @@ -158,8 +158,9 @@ esc_q(char *d, const char *s, STRLEN slen) return ret; } +/* this function is also misused for implementing $Useqq */ static I32 -esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen) +esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq) { char *r, *rstart; const char *s = src; @@ -174,14 +175,21 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen) STRLEN qq_escapables = 0; /* " $ @ will need a \ in "" strings. */ STRLEN normal = 0; int increment; + UV next; /* this will need EBCDICification */ - for (s = src; s < send; s += increment) { - const UV k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL); + for (s = src; s < send; do_utf8 ? s += increment : s++) { + const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s; /* check for invalid utf8 */ increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s); + /* this is only used to check if the next character is an + * ASCII digit, which are invariant, so if the following collects + * a UTF-8 start byte it does no harm + */ + next = (s + increment >= send ) ? 0 : *(U8*)(s+increment); + #ifdef EBCDIC if (!isprint(k) || k > 256) { #else @@ -195,6 +203,17 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen) k <= 0xFFFFFFFF ? 8 : UVSIZE * 4 #endif ); +#ifndef EBCDIC + } else if (useqq && + /* we can't use the short form like '\0' if followed by a digit */ + ((k >= 7 && k <= 10 || k == 12 || k == 13 || k == 27) + || (k < 8 && (next < '0' || next > '9')))) { + grow += 2; + } else if (useqq && k <= 31 && (next < '0' || next > '9')) { + grow += 3; + } else if (useqq && (k <= 31 || k >= 127)) { + grow += 4; +#endif } else if (k == '\\') { backslashes++; } else if (k == '\'') { @@ -205,7 +224,7 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen) normal++; } } - if (grow) { + if (grow || useqq) { /* We have something needing hex. 3 is ""\0 */ sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes + 2*qq_escapables + normal); @@ -213,8 +232,8 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen) *r++ = '"'; - for (s = src; s < send; s += UTF8SKIP(s)) { - const UV k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL); + for (s = src; s < send; do_utf8 ? s += UTF8SKIP(s) : s++) { + const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s; if (k == '"' || k == '\\' || k == '$' || k == '@') { *r++ = '\\'; @@ -224,7 +243,44 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen) #ifdef EBCDIC if (isprint(k) && k < 256) #else - if (k < 0x80) + if (useqq && (k <= 31 || k == 127 || (!do_utf8 && k > 127))) { + bool next_is_digit; + + *r++ = '\\'; + switch (k) { + case 7: *r++ = 'a'; break; + case 8: *r++ = 'b'; break; + case 9: *r++ = 't'; break; + case 10: *r++ = 'n'; break; + case 12: *r++ = 'f'; break; + case 13: *r++ = 'r'; break; + case 27: *r++ = 'e'; break; + default: + increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s); + + /* only ASCII digits matter here, which are invariant, + * since we only encode characters \377 and under, or + * \x177 and under for a unicode string + */ + next = (s+increment < send) ? *(U8*)(s+increment) : 0; + next_is_digit = next >= '0' && next <= '9'; + + /* faster than + * r = r + my_sprintf(r, "%o", k); + */ + if (k <= 7 && !next_is_digit) { + *r++ = (char)k + '0'; + } else if (k <= 63 && !next_is_digit) { + *r++ = (char)(k>>3) + '0'; + *r++ = (char)(k&7) + '0'; + } else { + *r++ = (char)(k>>6) + '0'; + *r++ = (char)((k&63)>>3) + '0'; + *r++ = (char)(k&7) + '0'; + } + } + } + else if (k < 0x80) #endif *r++ = (char)k; else { @@ -298,7 +354,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity, I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys, - int use_sparse_seen_hash) + int use_sparse_seen_hash, I32 useqq) { char tmpbuf[128]; U32 i; @@ -524,7 +580,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv, postav, levelp, indent, pad, xpad, apad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth, sortkeys, use_sparse_seen_hash); + maxdepth, sortkeys, use_sparse_seen_hash, useqq); sv_catpvn(retval, ")}", 2); } /* plain */ else { @@ -532,7 +588,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv, postav, levelp, indent, pad, xpad, apad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth, sortkeys, use_sparse_seen_hash); + maxdepth, sortkeys, use_sparse_seen_hash, useqq); } SvREFCNT_dec(namesv); } @@ -544,7 +600,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv, postav, levelp, indent, pad, xpad, apad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth, sortkeys, use_sparse_seen_hash); + maxdepth, sortkeys, use_sparse_seen_hash, useqq); SvREFCNT_dec(namesv); } else if (realtype == SVt_PVAV) { @@ -617,7 +673,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav, levelp, indent, pad, xpad, apad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth, sortkeys, use_sparse_seen_hash); + maxdepth, sortkeys, use_sparse_seen_hash, useqq); if (ix < ixmax) sv_catpvn(retval, ",", 1); } @@ -777,9 +833,9 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, The code is also smaller (22044 vs 22260) because I've been able to pull the common logic out to both sides. */ if (quotekeys || needs_quote(key,keylen)) { - if (do_utf8) { + if (do_utf8 || useqq) { STRLEN ocur = SvCUR(retval); - nlen = esc_q_utf8(aTHX_ retval, key, klen); + nlen = esc_q_utf8(aTHX_ retval, key, klen, do_utf8, useqq); nkey = SvPVX(retval) + ocur; } else { @@ -824,7 +880,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv, postav, levelp, indent, pad, xpad, newapad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, - maxdepth, sortkeys, use_sparse_seen_hash); + maxdepth, sortkeys, use_sparse_seen_hash, useqq); SvREFCNT_dec(sname); Safefree(nkey_buffer); if (indent >= 2) @@ -973,7 +1029,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, r = SvPVX(retval)+SvCUR(retval); r[0] = '*'; r[1] = '{'; SvCUR_set(retval, SvCUR(retval)+2); - esc_q_utf8(aTHX_ retval, c, i); + esc_q_utf8(aTHX_ retval, c, i, 1, useqq); sv_grow(retval, SvCUR(retval)+2); r = SvPVX(retval)+SvCUR(retval); r[0] = '}'; r[1] = '\0'; @@ -1033,7 +1089,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, seenhv, postav, &nlevel, indent, pad, xpad, newapad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, bless, maxdepth, - sortkeys, use_sparse_seen_hash); + sortkeys, use_sparse_seen_hash, useqq); SvREFCNT_dec(e); } } @@ -1062,8 +1118,8 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, else { integer_came_from_string: c = SvPV(val, i); - if (DO_UTF8(val)) - i += esc_q_utf8(aTHX_ retval, c, i); + if (DO_UTF8(val) || useqq) + i += esc_q_utf8(aTHX_ retval, c, i, DO_UTF8(val), useqq); else { sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */ r = SvPVX(retval) + SvCUR(retval); @@ -1108,7 +1164,7 @@ Data_Dumper_Dumpxs(href, ...) HV *seenhv = NULL; AV *postav, *todumpav, *namesav; I32 level = 0; - I32 indent, terse, i, imax, postlen; + I32 indent, terse, useqq, i, imax, postlen; SV **svp; SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname; SV *freezer, *toaster, *bless, *sortkeys; @@ -1149,7 +1205,7 @@ Data_Dumper_Dumpxs(href, ...) = freezer = toaster = bless = sortkeys = &PL_sv_undef; name = sv_newmortal(); indent = 2; - terse = purity = deepcopy = 0; + terse = purity = deepcopy = useqq = 0; quotekeys = 1; retval = newSVpvn("", 0); @@ -1173,10 +1229,8 @@ Data_Dumper_Dumpxs(href, ...) purity = SvIV(*svp); if ((svp = hv_fetch(hv, "terse", 5, FALSE))) terse = SvTRUE(*svp); -#if 0 /* useqq currently unused */ if ((svp = hv_fetch(hv, "useqq", 5, FALSE))) useqq = SvTRUE(*svp); -#endif if ((svp = hv_fetch(hv, "pad", 3, FALSE))) pad = *svp; if ((svp = hv_fetch(hv, "xpad", 4, FALSE))) @@ -1280,7 +1334,7 @@ Data_Dumper_Dumpxs(href, ...) DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv, postav, &level, indent, pad, xpad, newapad, sep, pair, freezer, toaster, purity, deepcopy, quotekeys, - bless, maxdepth, sortkeys, use_sparse_seen_hash); + bless, maxdepth, sortkeys, use_sparse_seen_hash, useqq); SPAGAIN; if (indent >= 2 && !terse) diff --git a/dist/Data-Dumper/t/dumper.t b/dist/Data-Dumper/t/dumper.t index c1e5fe6..0a3c28c 100644 --- a/dist/Data-Dumper/t/dumper.t +++ b/dist/Data-Dumper/t/dumper.t @@ -83,11 +83,11 @@ sub SKIP_TEST { $Data::Dumper::Useperl = 1; if (defined &Data::Dumper::Dumpxs) { print "### XS extension loaded, will run XS tests\n"; - $TMAX = 402; $XS = 1; + $TMAX = 420; $XS = 1; } else { print "### XS extensions not loaded, will NOT run XS tests\n"; - $TMAX = 201; $XS = 0; + $TMAX = 210; $XS = 0; } print "1..$TMAX\n"; @@ -307,20 +307,9 @@ $foo = { "abc\000\'\efg" => "mno\000", { local $Data::Dumper::Useqq = 1; TEST q(Dumper($foo)); + TEST q(Data::Dumper::DumperX($foo)) if $XS; } - $WANT = <<"EOT"; -#\$VAR1 = { -# 'abc\0\\'\efg' => 'mno\0', -# 'reftest' => \\\\1 -#}; -EOT - - { - local $Data::Dumper::Useqq = 1; - TEST q(Data::Dumper::DumperX($foo)) if $XS; # cheat - } - ############# @@ -1461,7 +1450,7 @@ EOT $foo = [ join "", map chr, 0..255 ]; local $Data::Dumper::Useqq = 1; TEST q(Dumper($foo)), 'All latin1 characters'; - for (1..3) { print "not ok " . (++$TNUM) . " # TODO NYI\n" if $XS } # TEST q(Data::Dumper::DumperX($foo)) if $XS; + TEST q(Data::Dumper::DumperX($foo)) if $XS; } ############# 372 @@ -1481,7 +1470,7 @@ EOT TEST q(Dumper($foo)), 'All latin1 characters with utf8 flag including a wide character'; } - for (1..3) { print "not ok " . (++$TNUM) . " # TODO NYI\n" if $XS } # TEST q(Data::Dumper::DumperX($foo)) if $XS; + TEST q(Data::Dumper::DumperX($foo)) if $XS; } ############# 378 @@ -1537,3 +1526,33 @@ EOW TEST q(Data::Dumper->Dumpxs([\*finkle])), 'blessed overloaded globs (xs)' if $XS; } +############# 390 +{ + # [perl #74798] uncovered behaviour + $WANT = <<'EOW'; +#$VAR1 = "\0000"; +EOW + local $Data::Dumper::Useqq = 1; + TEST q(Data::Dumper->Dump(["\x000"])), + "\\ octal followed by digit"; + TEST q(Data::Dumper->Dumpxs(["\x000"])), '\\ octal followed by digit (xs)' + if $XS; + + $WANT = <<'EOW'; +#$VAR1 = "\x{100}\0000"; +EOW + local $Data::Dumper::Useqq = 1; + TEST q(Data::Dumper->Dump(["\x{100}\x000"])), + "\\ octal followed by digit unicode"; + TEST q(Data::Dumper->Dumpxs(["\x{100}\x000"])), '\\ octal followed by digit unicode (xs)' + if $XS; + + + $WANT = <<'EOW'; +#$VAR1 = "\0\x{660}"; +EOW + TEST q(Data::Dumper->Dump(["\\x00\\x{0660}"])), + "\\ octal followed by unicode digit"; + TEST q(Data::Dumper->Dumpxs(["\\x00\\x{0660}"])), '\\ octal followed by unicode digit (xs)' + if $XS; +} -- Perl5 Master Repository
