In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/d981ef2485de59a9de4d1fe1ddfbd76889abe0b9?hp=b57e41186b2ceb48bef4f0588dcd19e105cc8a38>
- Log ----------------------------------------------------------------- commit d981ef2485de59a9de4d1fe1ddfbd76889abe0b9 Author: Karl Williamson <pub...@khwilliamson.com> Date: Tue Jan 18 16:08:08 2011 -0700 regexec.c: Remove break statements from macros This is so future coders won't be tempted to rely on them. M regexec.c commit 6895a8aa39da30e5d27a918f8082897ce5f21779 Author: Karl Williamson <pub...@khwilliamson.com> Date: Tue Jan 18 16:06:47 2011 -0700 regexec.c: Don't rely on break stmts in macros It is safer and clearer to have the break statement in each case statement at the source level M regexec.c commit 8e9da4d422fd1fb3711b88105d4e58f6b8f88877 Author: Karl Williamson <pub...@khwilliamson.com> Date: Tue Jan 18 16:01:11 2011 -0700 regexec.c: Fix /a complements This showed up only on some systems in the current test suite, but processing eg, \D has to care about the target string being utf8. M regexec.c ----------------------------------------------------------------------- Summary of changes: regexec.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 files changed, 48 insertions(+), 9 deletions(-) diff --git a/regexec.c b/regexec.c index 748e047..b59b8bf 100644 --- a/regexec.c +++ b/regexec.c @@ -1333,8 +1333,7 @@ if ((!reginfo || regtry(reginfo, &s))) \ } \ else { \ REXEC_FBC_CLASS_SCAN(CoNd); \ - } \ - break + } #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd) \ if (utf8_target) { \ @@ -1343,8 +1342,7 @@ if ((!reginfo || regtry(reginfo, &s))) \ } \ else { \ REXEC_FBC_CLASS_SCAN(CoNd); \ - } \ - break + } #define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd) \ PL_reg_flags |= RF_tainted; \ @@ -1353,8 +1351,7 @@ if ((!reginfo || regtry(reginfo, &s))) \ } \ else { \ REXEC_FBC_CLASS_SCAN(CoNd); \ - } \ - break + } #define DUMP_EXEC_POS(li,s,doutf8) \ dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8) @@ -1676,128 +1673,170 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, isALNUM_LC_utf8((U8*)s), isALNUM_LC(*s) ); + break; case ALNUMU: REXEC_FBC_CSCAN_PRELOAD( LOAD_UTF8_CHARCLASS_PERL_WORD(), swash_fetch(RE_utf8_perl_word,(U8*)s, utf8_target), isWORDCHAR_L1((U8) *s) ); + break; case ALNUM: REXEC_FBC_CSCAN_PRELOAD( LOAD_UTF8_CHARCLASS_PERL_WORD(), swash_fetch(RE_utf8_perl_word,(U8*)s, utf8_target), isWORDCHAR((U8) *s) ); + break; case ALNUMA: + /* Don't need to worry about utf8, as it can match only a single + * byte invariant character */ REXEC_FBC_CLASS_SCAN( isWORDCHAR_A(*s)); + break; case NALNUMU: REXEC_FBC_CSCAN_PRELOAD( LOAD_UTF8_CHARCLASS_PERL_WORD(), swash_fetch(RE_utf8_perl_word,(U8*)s, utf8_target), ! isWORDCHAR_L1((U8) *s) ); + break; case NALNUM: REXEC_FBC_CSCAN_PRELOAD( LOAD_UTF8_CHARCLASS_PERL_WORD(), !swash_fetch(RE_utf8_perl_word, (U8*)s, utf8_target), ! isALNUM(*s) ); + break; case NALNUMA: - REXEC_FBC_UTF8_CLASS_SCAN( !isWORDCHAR_A(*s)); + REXEC_FBC_CSCAN( + !isWORDCHAR_A(*s), + !isWORDCHAR_A(*s) + ); + break; case NALNUML: REXEC_FBC_CSCAN_TAINT( !isALNUM_LC_utf8((U8*)s), !isALNUM_LC(*s) ); + break; case SPACEU: REXEC_FBC_CSCAN_PRELOAD( LOAD_UTF8_CHARCLASS_PERL_SPACE(), *s == ' ' || swash_fetch(RE_utf8_perl_space,(U8*)s, utf8_target), isSPACE_L1((U8) *s) ); + break; case SPACE: REXEC_FBC_CSCAN_PRELOAD( LOAD_UTF8_CHARCLASS_PERL_SPACE(), *s == ' ' || swash_fetch(RE_utf8_perl_space,(U8*)s, utf8_target), isSPACE((U8) *s) ); + break; case SPACEA: + /* Don't need to worry about utf8, as it can match only a single + * byte invariant character */ REXEC_FBC_CLASS_SCAN( isSPACE_A(*s)); + break; case SPACEL: REXEC_FBC_CSCAN_TAINT( isSPACE_LC_utf8((U8*)s), isSPACE_LC(*s) ); + break; case NSPACEU: REXEC_FBC_CSCAN_PRELOAD( LOAD_UTF8_CHARCLASS_PERL_SPACE(), !( *s == ' ' || swash_fetch(RE_utf8_perl_space,(U8*)s, utf8_target)), ! isSPACE_L1((U8) *s) ); + break; case NSPACE: REXEC_FBC_CSCAN_PRELOAD( LOAD_UTF8_CHARCLASS_PERL_SPACE(), !(*s == ' ' || swash_fetch(RE_utf8_perl_space,(U8*)s, utf8_target)), ! isSPACE((U8) *s) ); + break; case NSPACEA: - REXEC_FBC_UTF8_CLASS_SCAN( !isSPACE_A(*s)); + REXEC_FBC_CSCAN( + !isSPACE_A(*s), + !isSPACE_A(*s) + ); + break; case NSPACEL: REXEC_FBC_CSCAN_TAINT( !isSPACE_LC_utf8((U8*)s), !isSPACE_LC(*s) ); + break; case DIGIT: REXEC_FBC_CSCAN_PRELOAD( LOAD_UTF8_CHARCLASS_POSIX_DIGIT(), swash_fetch(RE_utf8_posix_digit,(U8*)s, utf8_target), isDIGIT(*s) ); + break; case DIGITA: + /* Don't need to worry about utf8, as it can match only a single + * byte invariant character */ REXEC_FBC_CLASS_SCAN( isDIGIT_A(*s)); + break; case DIGITL: REXEC_FBC_CSCAN_TAINT( isDIGIT_LC_utf8((U8*)s), isDIGIT_LC(*s) ); + break; case NDIGIT: REXEC_FBC_CSCAN_PRELOAD( LOAD_UTF8_CHARCLASS_POSIX_DIGIT(), !swash_fetch(RE_utf8_posix_digit,(U8*)s, utf8_target), !isDIGIT(*s) ); + break; case NDIGITA: - REXEC_FBC_UTF8_CLASS_SCAN( !isDIGIT_A(*s)); + REXEC_FBC_CSCAN( + !isDIGIT_A(*s), + !isDIGIT_A(*s) + ); + break; case NDIGITL: REXEC_FBC_CSCAN_TAINT( !isDIGIT_LC_utf8((U8*)s), !isDIGIT_LC(*s) ); + break; case LNBREAK: REXEC_FBC_CSCAN( is_LNBREAK_utf8(s), is_LNBREAK_latin1(s) ); + break; case VERTWS: REXEC_FBC_CSCAN( is_VERTWS_utf8(s), is_VERTWS_latin1(s) ); + break; case NVERTWS: REXEC_FBC_CSCAN( !is_VERTWS_utf8(s), !is_VERTWS_latin1(s) ); + break; case HORIZWS: REXEC_FBC_CSCAN( is_HORIZWS_utf8(s), is_HORIZWS_latin1(s) ); + break; case NHORIZWS: REXEC_FBC_CSCAN( !is_HORIZWS_utf8(s), !is_HORIZWS_latin1(s) ); + break; case AHOCORASICKC: case AHOCORASICK: { -- Perl5 Master Repository