In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/85c8e3067c12431ded3d5289a8ec16cf2e33ebaf?hp=f7866a8bf5b54e475ca2ed917f5767b7573c6c8e>
- Log ----------------------------------------------------------------- commit 85c8e3067c12431ded3d5289a8ec16cf2e33ebaf Author: Karl Williamson <[email protected]> Date: Wed Mar 12 13:15:44 2014 -0600 regcomp.c: Make SSC node clone safe This just sets the ptr field in the Synthetic Start Class that will be passed to regexec.c NULL, and clarifies the comments in regcomp.h. See the thread starting at http://markmail.org/message/2txwaqnjco6zodeo M regcomp.c M regcomp.h commit 507ce328d735b95134ee5c4a262cd72eda563fbe Author: Karl Williamson <[email protected]> Date: Tue Mar 11 15:58:54 2014 -0600 regen/regcharclass.pl: Don't generate unused macros Having these unused macros around just clutters up the header file M regcharclass.h M regen/regcharclass.pl commit 8120045400790e80eeca2ed84ec4decbaa9c27a6 Author: Karl Williamson <[email protected]> Date: Tue Mar 11 15:47:53 2014 -0600 regen/regcharclass.pl: Generate correct macro instead of skipping It makes no sense to check for length safeness for The macros generated by this program which take a single UV code point as a parameter. Prior to this patch, it would skip trying to generate them if asked. But, because of the way things are structured, that means that if you need just this and the safe versions, you can't do it so easily. What this commit does is generate the cp macro if requested even if the 'safe' version of other macros are also requested. M regen/regcharclass.pl ----------------------------------------------------------------------- Summary of changes: regcharclass.h | 186 +++----------------------------------------------- regcomp.c | 3 + regcomp.h | 22 +++--- regen/regcharclass.pl | 11 ++- 4 files changed, 30 insertions(+), 192 deletions(-) diff --git a/regcharclass.h b/regcharclass.h index 1412800..b0f635d 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -84,80 +84,6 @@ \p{HorizSpace} */ /*** GENERATED CODE ***/ -#define is_HORIZWS(s,is_utf8) \ -( ( 0x09 == NATIVE_TO_LATIN1(((U8*)s)[0]) || 0x20 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? 1\ -: ( is_utf8 ) ? \ - ( ( 0xC2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0xA0 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 ) \ - : ( 0xE1 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( ( 0x9A == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ - : ( 0xE2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ - ( ( ( 0x80 <= NATIVE_TO_LATIN1(((U8*)s)[2]) && NATIVE_TO_LATIN1(((U8*)s)[2]) <= 0x8A ) || 0xAF == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? 3 : 0 )\ - : ( ( 0x81 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x9F == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ - : ( ( ( 0xE3 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ -: ( 0xA0 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ) - -/*** GENERATED CODE ***/ -#define is_HORIZWS_safe(s,e,is_utf8) \ -( ((e) > (s)) ? \ - ( ( 0x09 == NATIVE_TO_LATIN1(((U8*)s)[0]) || 0x20 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? 1\ - : (! is_utf8 ) ? \ - ( 0xA0 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) \ - : (((e) - (s)) >= UTF8SKIP(s)) ? \ - ( ( 0xC2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0xA0 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 ) \ - : ( 0xE1 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( ( 0x9A == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ - : ( 0xE2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ - ( ( ( 0x80 <= NATIVE_TO_LATIN1(((U8*)s)[2]) && NATIVE_TO_LATIN1(((U8*)s)[2]) <= 0x8A ) || 0xAF == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? 3 : 0 )\ - : ( ( 0x81 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x9F == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ - : ( ( ( 0xE3 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ - : 0 ) \ -: 0 ) - -/*** GENERATED CODE ***/ -#define is_HORIZWS_utf8(s) \ -( ( 0x09 == NATIVE_TO_LATIN1(((U8*)s)[0]) || 0x20 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? 1\ -: ( 0xC2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0xA0 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 ) \ -: ( 0xE1 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( ( 0x9A == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ -: ( 0xE2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ - ( ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) <= 0x8A ) || 0xAF == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? 3 : 0 )\ - : ( ( 0x81 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x9F == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ -: ( ( ( 0xE3 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 ) - -/*** GENERATED CODE ***/ -#define is_HORIZWS_utf8_safe(s,e) \ -( ((e) > (s)) ? \ - ( ( 0x09 == NATIVE_TO_LATIN1(((U8*)s)[0]) || 0x20 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? 1\ - : (((e) - (s)) >= UTF8SKIP(s)) ? \ - ( ( 0xC2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0xA0 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 ) \ - : ( 0xE1 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( ( 0x9A == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ - : ( 0xE2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ - ( ( ( 0x80 <= NATIVE_TO_LATIN1(((U8*)s)[2]) && NATIVE_TO_LATIN1(((U8*)s)[2]) <= 0x8A ) || 0xAF == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? 3 : 0 )\ - : ( ( 0x81 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x9F == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ - : ( ( ( 0xE3 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ - : 0 ) \ -: 0 ) - -/*** GENERATED CODE ***/ -#define is_HORIZWS_latin1(s) \ -( NATIVE_TO_LATIN1(((U8*)s)[0]) == 0x09 || ( ( NATIVE_TO_LATIN1(((U8*)s)[0]) & 0x7F ) == 0x20 ) ) - -/*** GENERATED CODE ***/ -#define is_HORIZWS_latin1_safe(s,e) \ -( ( ((e) - (s)) >= 1 ) ? \ - ( NATIVE_TO_LATIN1(((U8*)s)[0]) == 0x09 || ( ( NATIVE_TO_LATIN1(((U8*)s)[0]) & 0x7F ) == 0x20 ) )\ -: 0 ) - -/*** GENERATED CODE ***/ #define is_HORIZWS_high(s) \ ( ( 0xE1 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ ( ( ( 0x9A == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ @@ -168,18 +94,6 @@ : ( ( ( 0xE3 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 ) /*** GENERATED CODE ***/ -#define is_HORIZWS_high_safe(s,e) \ -( ( ((e) - (s)) >= 3 ) ? \ - ( ( 0xE1 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( ( 0x9A == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ - : ( 0xE2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ - ( ( ( 0x80 <= NATIVE_TO_LATIN1(((U8*)s)[2]) && NATIVE_TO_LATIN1(((U8*)s)[2]) <= 0x8A ) || 0xAF == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? 3 : 0 )\ - : ( ( 0x81 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x9F == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ - : ( ( ( 0xE3 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ -: 0 ) - -/*** GENERATED CODE ***/ #define is_HORIZWS_cp_high(cp) \ ( 0x1680 == NATIVE_TO_UNI(cp) || ( 0x1680 < NATIVE_TO_UNI(cp) && \ ( ( 0x2000 <= NATIVE_TO_UNI(cp) && NATIVE_TO_UNI(cp) <= 0x200A ) || ( 0x200A < NATIVE_TO_UNI(cp) &&\ @@ -192,64 +106,10 @@ \p{VertSpace} */ /*** GENERATED CODE ***/ -#define is_VERTWS(s,is_utf8) \ -( ( 0x0A <= NATIVE_TO_LATIN1(((U8*)s)[0]) && NATIVE_TO_LATIN1(((U8*)s)[0]) <= 0x0D ) ? 1\ -: ( is_utf8 ) ? \ - ( ( 0xC2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 ) \ - : ( ( ( 0xE2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) && ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFE ) == 0xA8 ) ) ? 3 : 0 )\ -: ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ) - -/*** GENERATED CODE ***/ -#define is_VERTWS_safe(s,e,is_utf8) \ -( ((e) > (s)) ? \ - ( ( 0x0A <= NATIVE_TO_LATIN1(((U8*)s)[0]) && NATIVE_TO_LATIN1(((U8*)s)[0]) <= 0x0D ) ? 1\ - : (! is_utf8 ) ? \ - ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) \ - : (((e) - (s)) >= UTF8SKIP(s)) ? \ - ( ( 0xC2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 ) \ - : ( ( ( 0xE2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) && ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFE ) == 0xA8 ) ) ? 3 : 0 )\ - : 0 ) \ -: 0 ) - -/*** GENERATED CODE ***/ -#define is_VERTWS_utf8(s) \ -( ( 0x0A <= NATIVE_TO_LATIN1(((U8*)s)[0]) && NATIVE_TO_LATIN1(((U8*)s)[0]) <= 0x0D ) ? 1\ -: ( 0xC2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 ) \ -: ( ( ( 0xE2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) && ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFE ) == 0xA8 ) ) ? 3 : 0 ) - -/*** GENERATED CODE ***/ -#define is_VERTWS_utf8_safe(s,e) \ -( ((e) > (s)) ? \ - ( ( 0x0A <= NATIVE_TO_LATIN1(((U8*)s)[0]) && NATIVE_TO_LATIN1(((U8*)s)[0]) <= 0x0D ) ? 1\ - : (((e) - (s)) >= UTF8SKIP(s)) ? \ - ( ( 0xC2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 ) \ - : ( ( ( 0xE2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) && ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFE ) == 0xA8 ) ) ? 3 : 0 )\ - : 0 ) \ -: 0 ) - -/*** GENERATED CODE ***/ #define is_VERTWS_high(s) \ ( ( ( ( 0xE2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) && ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFE ) == 0xA8 ) ) ? 3 : 0 ) /*** GENERATED CODE ***/ -#define is_VERTWS_high_safe(s,e) \ -( ( ( ( ( ((e) - (s)) >= 3 ) && ( 0xE2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) && ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFE ) == 0xA8 ) ) ? 3 : 0 ) - -/*** GENERATED CODE ***/ -#define is_VERTWS_latin1(s) \ -( ( 0x0A <= NATIVE_TO_LATIN1(((U8*)s)[0]) && NATIVE_TO_LATIN1(((U8*)s)[0]) <= 0x0D ) || 0x85 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) - -/*** GENERATED CODE ***/ -#define is_VERTWS_latin1_safe(s,e) \ -( ( ((e) - (s)) >= 1 ) ? \ - ( ( 0x0A <= NATIVE_TO_LATIN1(((U8*)s)[0]) && NATIVE_TO_LATIN1(((U8*)s)[0]) <= 0x0D ) || 0x85 == NATIVE_TO_LATIN1(((U8*)s)[0]) )\ -: 0 ) - -/*** GENERATED CODE ***/ #define is_VERTWS_cp_high(cp) \ ( 0x2028 == NATIVE_TO_UNI(cp) || 0x2029 == NATIVE_TO_UNI(cp) ) @@ -259,15 +119,6 @@ \p{XDigit} */ /*** GENERATED CODE ***/ -#define is_XDIGIT_utf8(s) \ -( ( ( 0x30 <= NATIVE_TO_LATIN1(((U8*)s)[0]) && NATIVE_TO_LATIN1(((U8*)s)[0]) <= 0x39 ) || ( 0x41 <= NATIVE_TO_LATIN1(((U8*)s)[0]) && NATIVE_TO_LATIN1(((U8*)s)[0]) <= 0x46 ) || ( 0x61 <= NATIVE_TO_LAT ... [66 chars truncated] -: ( 0xEF == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0xBC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ - ( ( ( 0x90 <= NATIVE_TO_LATIN1(((U8*)s)[2]) && NATIVE_TO_LATIN1(((U8*)s)[2]) <= 0x99 ) || ( 0xA1 <= NATIVE_TO_LATIN1(((U8*)s)[2]) && NATIVE_TO_LATIN1(((U8*)s)[2]) <= 0xA6 ) ) ? 3 : 0 )\ - : ( ( 0xBD == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x81 <= NATIVE_TO_LATIN1(((U8*)s)[2]) && NATIVE_TO_LATIN1(((U8*)s)[2]) <= 0x86 ) ) ? 3 : 0 )\ -: 0 ) - -/*** GENERATED CODE ***/ #define is_XDIGIT_high(s) \ ( ( 0xEF == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ ( ( 0xBC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ @@ -286,34 +137,6 @@ \p{XPerlSpace} */ /*** GENERATED CODE ***/ -#define is_XPERLSPACE(s,is_utf8) \ -( ( ( 0x09 <= NATIVE_TO_LATIN1(((U8*)s)[0]) && NATIVE_TO_LATIN1(((U8*)s)[0]) <= 0x0D ) || 0x20 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? 1\ -: ( is_utf8 ) ? \ - ( ( 0xC2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[1]) || 0xA0 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 )\ - : ( 0xE1 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( ( 0x9A == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ - : ( 0xE2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ - ( ( ( 0x80 <= NATIVE_TO_LATIN1(((U8*)s)[2]) && NATIVE_TO_LATIN1(((U8*)s)[2]) <= 0x8A ) || ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFE ) == 0xA8 || 0xAF == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? 3 : 0 )\ - : ( ( 0x81 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x9F == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ - : ( ( ( 0xE3 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ -: ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[0]) || 0xA0 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ) - -/*** GENERATED CODE ***/ -#define is_XPERLSPACE_utf8(s) \ -( ( ( 0x09 <= NATIVE_TO_LATIN1(((U8*)s)[0]) && NATIVE_TO_LATIN1(((U8*)s)[0]) <= 0x0D ) || 0x20 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? 1\ -: ( 0xC2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[1]) || 0xA0 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 )\ -: ( 0xE1 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( ( 0x9A == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ -: ( 0xE2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ - ( ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) <= 0x8A ) || ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFE ) == 0xA8 || 0xAF == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? 3 : 0 )\ - : ( ( 0x81 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x9F == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ -: ( ( ( 0xE3 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 ) - -/*** GENERATED CODE ***/ #define is_XPERLSPACE_high(s) \ ( ( 0xE1 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ ( ( ( 0x9A == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ @@ -950,6 +773,15 @@ ( 0x200F == NATIVE_TO_UNI(cp) || ( 0x200F < NATIVE_TO_UNI(cp) && \ ( 0x2028 == NATIVE_TO_UNI(cp) || 0x2029 == NATIVE_TO_UNI(cp) ) ) ) ) ) ) ) ) ) ) ) +/*** GENERATED CODE ***/ +#define is_PATWS_cp(cp) \ +( ( 0x09 <= NATIVE_TO_UNI(cp) && NATIVE_TO_UNI(cp) <= 0x0D ) || ( 0x0D < NATIVE_TO_UNI(cp) &&\ +( 0x20 == NATIVE_TO_UNI(cp) || ( 0x20 < NATIVE_TO_UNI(cp) && \ +( 0x85 == NATIVE_TO_UNI(cp) || ( 0x85 < NATIVE_TO_UNI(cp) && \ +( 0x200E == NATIVE_TO_UNI(cp) || ( 0x200E < NATIVE_TO_UNI(cp) && \ +( 0x200F == NATIVE_TO_UNI(cp) || ( 0x200F < NATIVE_TO_UNI(cp) && \ +( 0x2028 == NATIVE_TO_UNI(cp) || 0x2029 == NATIVE_TO_UNI(cp) ) ) ) ) ) ) ) ) ) ) ) + #endif /* H_REGCHARCLASS */ diff --git a/regcomp.c b/regcomp.c index c5dbe13..d72d344 100644 --- a/regcomp.c +++ b/regcomp.c @@ -1445,6 +1445,9 @@ S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc) set_ANYOF_arg(pRExC_state, (regnode *) ssc, invlist, NULL, NULL, NULL, FALSE); + /* Make sure is clone-safe */ + ssc->invlist = NULL; + if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) { ANYOF_FLAGS(ssc) |= ANYOF_POSIXL; } diff --git a/regcomp.h b/regcomp.h index 700d6c1..3bb1a53 100644 --- a/regcomp.h +++ b/regcomp.h @@ -217,14 +217,15 @@ struct regnode_charclass_class { U32 classflags; /* and run-time */ }; -/* A synthetic start class; is a regnode_charclass_posixl_fold, plus an extra - * SV*, used only during its construction and which is not used by regexec.c. - * Note that the 'next_off' field is unused, as the SSC stands alone, so there - * is never a next node. Also, there is no alignment issue, becase these are - * declared or allocated as a complete unit so the compiler takes care of - * alignment. This is unlike ithe other regnodes which are allocated in terms - * of multiples of a single-argument regnode. Because there is no alignment - * issue, these can have a pointer field */ +/* A synthetic start class (SSC); is a regnode_charclass_posixl_fold, plus an + * extra SV*, used only during its construction and which is not used by + * regexec.c. Note that the 'next_off' field is unused, as the SSC stands + * alone, so there is never a next node. Also, there is no alignment issue, + * becase these are declared or allocated as a complete unit so the compiler + * takes care of alignment. This is unlike the other regnodes which are + * allocated in terms of multiples of a single-argument regnode. SSC nodes can + * have a pointer field because there is no alignment issue, and because it is + * set to NULL after construction, before any cloning of the pattern */ struct regnode_ssc { U8 flags; /* ANYOF_POSIXL bit must go here */ U8 type; @@ -232,7 +233,10 @@ struct regnode_ssc { U32 arg1; char bitmap[ANYOF_BITMAP_SIZE]; /* both compile-time */ U32 classflags; /* and run-time */ - SV* invlist; /* list of code points matched */ + + /* Auxiliary, only used during construction; NULL afterwards: list of code + * points matched */ + SV* invlist; }; /* We take advantage of 'next_off' not otherwise being used in the SSC by diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl index 5b794ea..fa947a3 100755 --- a/regen/regcharclass.pl +++ b/regen/regcharclass.pl @@ -1389,12 +1389,11 @@ EOF my ( $type, $ret )= split /-/, $type_spec; $ret ||= 'len'; foreach my $mod ( @mods ) { - next if $mod eq 'safe' and $type =~ /^cp/; delete $mods{$mod}; my $macro= $obj->make_macro( type => $type, ret_type => $ret, - safe => $mod eq 'safe' + safe => $mod eq 'safe' && $type !~ /^cp/, ); print $out_fh $macro, "\n"; } @@ -1552,19 +1551,19 @@ LNBREAK: Line Break: \R \p{VertSpace} HORIZWS: Horizontal Whitespace: \h \H -=> generic UTF8 LATIN1 high cp_high :fast safe +=> high cp_high : fast \p{HorizSpace} VERTWS: Vertical Whitespace: \v \V -=> generic UTF8 high LATIN1 cp_high :fast safe +=> high cp_high : fast \p{VertSpace} XDIGIT: Hexadecimal digits -=> UTF8 high cp_high :fast +=> high cp_high : fast \p{XDigit} XPERLSPACE: \p{XPerlSpace} -=> generic UTF8 high cp_high :fast +=> high cp_high : fast \p{XPerlSpace} REPLACEMENT: Unicode REPLACEMENT CHARACTER -- Perl5 Master Repository
