In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/6dd6e9f9592f7349aa8ad652821e3faf61292a83?hp=989c251f8fe733cc99daad2f2b7e558ee5938b48>
- Log ----------------------------------------------------------------- commit 6dd6e9f9592f7349aa8ad652821e3faf61292a83 Author: Karl Williamson <[email protected]> Date: Thu Dec 25 13:16:19 2014 -0700 regcomp.c: Fix [_A-Z] for EBCDIC Special handling is required on EBCDIC for ranges that are subsets of either a-z or A-Z. This is triggered when both ends are literals. It is implemented by keeping a count of the literal endpoints, and when that is two do the handling. But the count was not getting reset, so it could go to 3, 4, ... so the special handling would only get triggered if the range was the first thing in the brackets, like [A-Z], but not if there was something before it, like [_A-Z]. The solution is to reset the counter appropriately each time through the loop. For the A-Z range, the ASCII-equivalent characters wrongly matched were backslash and '}'. For a-z, it was '~' M regcomp.c M t/re/re_tests commit 17491e96699ffe025decd7cd43635402fb5b58e2 Author: Karl Williamson <[email protected]> Date: Thu Dec 25 13:15:58 2014 -0700 regcomp.c: Replace dead code with NOT_REACHED M regcomp.c ----------------------------------------------------------------------- Summary of changes: regcomp.c | 5 ++++- t/re/re_tests | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/regcomp.c b/regcomp.c index b19d5e7..30a94dc 100644 --- a/regcomp.c +++ b/regcomp.c @@ -13911,6 +13911,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, if (!range) { rangebegin = RExC_parse; element_count++; +#ifdef EBCDIC + literal_endpoint = 0; +#endif } if (UTF) { value = utf8n_to_uvchr((U8*)RExC_parse, @@ -14497,7 +14500,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, vFAIL2utf8f( "Invalid [] range \"%"UTF8f"\"", UTF8fARG(UTF, w, rangebegin)); - range = 0; /* not a valid range */ + NOT_REACHED; /* NOT REACHED */ } } else { diff --git a/t/re/re_tests b/t/re/re_tests index c208eba..0341f77 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -256,6 +256,7 @@ a[bcd]+dcdcde adcdcde n - - ((a)(b)c)(d) abcd y $-[4] 3 ((a)(b)c)(d) abcd y $+[4] 4 [a-zA-Z_][a-zA-Z0-9_]* alpha y $& alpha +[_A-Z] } n - - # This could match on EBCDIC if A-Z not excluding things in middle ^a(bc+|b[eh])g|.h$ abh y $&-$1 bh- (bc+d$|ef*g.|h?i(j|k)) effgz y $&-$1-$2 effgz-effgz- (bc+d$|ef*g.|h?i(j|k)) ij y $&-$1-$2 ij-ij-j -- Perl5 Master Repository
