In perl.git, the branch smoke-me/remove-regcomp-setjmp has been updated <http://perl5.git.perl.org/perl.git/commitdiff/281ebc7936c3263bdbd589f5d2fe217e39676b43?hp=7eb35c035978bf0530591a57244b6922682a3344>
- Log ----------------------------------------------------------------- commit 281ebc7936c3263bdbd589f5d2fe217e39676b43 Author: Nicholas Clark <n...@ccl4.org> Date: Wed Jan 16 11:48:04 2013 +0100 Perl_sv_uni_display() needs to be aware of RX_WRAPPED() Commit 8d919b0a35f2b57a changed the storage location of the string in SVt_REGEXP. It updated most code to deal with this, but missed the use of SvPVX_const() in Perl_sv_uni_display(). This breaks dumping regular expressions which have the UTF-8 flag set. M ext/Devel-Peek/t/Peek.t M utf8.c commit e6e55f028ab8d809a04f11386765deae2c594b55 Author: Nicholas Clark <n...@ccl4.org> Date: Mon Jan 14 09:46:48 2013 +0100 Remove unreachable duplicate (?#...) parsing code from S_reg() I believe that this code was rendered unreachable when perl 5.001 added code to S_nextchar() to skip over embedded comments. Adrian Enache noted this in March 2003, and proposed a patch which removed it. See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2003-03/msg00840.html The patch wasn't applied at that time, and when he sent it again August, he omitted that hunk. See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2003-08/msg01820.html That version was applied as commit e994fd663a4d8acc. M regcomp.c ----------------------------------------------------------------------- Summary of changes: ext/Devel-Peek/t/Peek.t | 35 +++++++++++++++++++++++++++++++++++ regcomp.c | 8 -------- utf8.c | 5 ++++- 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/ext/Devel-Peek/t/Peek.t b/ext/Devel-Peek/t/Peek.t index b3dbc9b..a8b0d22 100644 --- a/ext/Devel-Peek/t/Peek.t +++ b/ext/Devel-Peek/t/Peek.t @@ -938,4 +938,39 @@ unless ($Config{useithreads}) { close OUT; } +do_test('UTF-8 in a regular expression', + qr/\x{100}/, +'SV = IV\($ADDR\) at $ADDR + REFCNT = 1 + FLAGS = \(ROK\) + RV = $ADDR + SV = REGEXP\($ADDR\) at $ADDR + REFCNT = 1 + FLAGS = \(OBJECT,FAKE,UTF8\) + PV = $ADDR "\(\?\^u:\\\\\\\\x\{100\}\)" \[UTF8 "\(\?\^u:\\\\\\\\x\{100\}\)"\] + CUR = 13 + STASH = $ADDR "Regexp" + EXTFLAGS = 0x680040 \(CHECK_ALL,USE_INTUIT_NOML,USE_INTUIT_ML\) + INTFLAGS = 0x0 + NPARENS = 0 + LASTPAREN = 0 + LASTCLOSEPAREN = 0 + MINLEN = 1 + MINLENRET = 1 + GOFS = 0 + PRE_PREFIX = 5 + SUBLEN = 0 + SUBOFFSET = 0 + SUBCOFFSET = 0 + SUBBEG = 0x0 + ENGINE = $ADDR + MOTHER_RE = $ADDR + PAREN_NAMES = 0x0 + SUBSTRS = $ADDR + PPRIVATE = $ADDR + OFFS = $ADDR + QR_ANONCV = 0x0 + SAVED_COPY = 0x0 +'); + done_testing(); diff --git a/regcomp.c b/regcomp.c index 05e9fe5..22a3191 100644 --- a/regcomp.c +++ b/regcomp.c @@ -8630,14 +8630,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) case '@': /* (?@...) */ vFAIL2("Sequence (?%c...) not implemented", (int)paren); break; - case '#': /* (?#...) */ - while (*RExC_parse && *RExC_parse != ')') - RExC_parse++; - if (*RExC_parse != ')') - FAIL("Sequence (?#... not terminated"); - nextchar(pRExC_state); - *flagp = TRYAGAIN; - return NULL; case '0' : /* (?0) */ case 'R' : /* (?R) */ if (*RExC_parse != ')') diff --git a/utf8.c b/utf8.c index dfb303f..7818ec6 100644 --- a/utf8.c +++ b/utf8.c @@ -4423,9 +4423,12 @@ The pointer to the PV of the C<dsv> is returned. char * Perl_sv_uni_display(pTHX_ SV *dsv, SV *ssv, STRLEN pvlim, UV flags) { + const char * const ptr = + isREGEXP(ssv) ? RX_WRAPPED((REGEXP*)ssv) : SvPVX_const(ssv); + PERL_ARGS_ASSERT_SV_UNI_DISPLAY; - return Perl_pv_uni_display(aTHX_ dsv, (const U8*)SvPVX_const(ssv), + return Perl_pv_uni_display(aTHX_ dsv, (const U8*)ptr, SvCUR(ssv), pvlim, flags); } -- Perl5 Master Repository