In perl.git, the branch blead has been updated <https://perl5.git.perl.org/perl.git/commitdiff/62286d5869e7680741523d2a11be77cb79b83c99?hp=b877c1ff7374e89bad1d494c2a847af552d40782>
- Log ----------------------------------------------------------------- commit 62286d5869e7680741523d2a11be77cb79b83c99 Author: Karl Williamson <[email protected]> Date: Fri Mar 22 11:32:32 2019 -0600 PATCH: [perl #133889] Assertion failure I did not bisect this, but this is a regression. This code is using a user-defined property that isn't defined. It should catch that and, since this is within regex sets, quit, but instead continues and ends up using an undefined value. ----------------------------------------------------------------------- Summary of changes: embed.fnc | 2 ++ embed.h | 4 ++-- proto.h | 4 ++-- regcomp.c | 17 ++++++++++++++++- t/re/reg_mesg.t | 2 +- t/re/regex_sets.t | 5 +++++ 6 files changed, 28 insertions(+), 6 deletions(-) diff --git a/embed.fnc b/embed.fnc index 68d1872401..e2ca5c5808 100644 --- a/embed.fnc +++ b/embed.fnc @@ -2507,6 +2507,7 @@ EpX |SV * |parse_uniprop_string|NN const char * const name \ |const bool is_utf8 \ |const bool to_fold \ |const bool runtime \ + |const bool deferrable \ |NN bool * user_defined_ptr \ |NN SV * msg \ |const STRLEN level @@ -2515,6 +2516,7 @@ EXp |SV * |handle_user_defined_property|NN const char * name \ |const bool is_utf8 \ |const bool to_fold \ |const bool runtime \ + |const bool deferrable \ |NN SV* contents \ |NN bool *user_defined_ptr \ |NN SV * msg \ diff --git a/embed.h b/embed.h index 722d1e375c..94acff2444 100644 --- a/embed.h +++ b/embed.h @@ -1188,7 +1188,7 @@ #define handle_named_backref(a,b,c,d) S_handle_named_backref(aTHX_ a,b,c,d) #define handle_possible_posix(a,b,c,d,e) S_handle_possible_posix(aTHX_ a,b,c,d,e) #define handle_regex_sets(a,b,c,d,e) S_handle_regex_sets(aTHX_ a,b,c,d,e) -#define handle_user_defined_property(a,b,c,d,e,f,g,h,i) Perl_handle_user_defined_property(aTHX_ a,b,c,d,e,f,g,h,i) +#define handle_user_defined_property(a,b,c,d,e,f,g,h,i,j) Perl_handle_user_defined_property(aTHX_ a,b,c,d,e,f,g,h,i,j) #define invlist_contents(a,b) S_invlist_contents(aTHX_ a,b) #define invlist_highest S_invlist_highest #define invlist_is_iterating S_invlist_is_iterating @@ -1202,7 +1202,7 @@ #define nextchar(a) S_nextchar(aTHX_ a) #define output_posix_warnings(a,b) S_output_posix_warnings(aTHX_ a,b) #define parse_lparen_question_flags(a) S_parse_lparen_question_flags(aTHX_ a) -#define parse_uniprop_string(a,b,c,d,e,f,g,h) Perl_parse_uniprop_string(aTHX_ a,b,c,d,e,f,g,h) +#define parse_uniprop_string(a,b,c,d,e,f,g,h,i) Perl_parse_uniprop_string(aTHX_ a,b,c,d,e,f,g,h,i) #define populate_ANYOF_from_invlist(a,b) S_populate_ANYOF_from_invlist(aTHX_ a,b) #define reg(a,b,c,d) S_reg(aTHX_ a,b,c,d) #define reg2Lanode(a,b,c,d) S_reg2Lanode(aTHX_ a,b,c,d) diff --git a/proto.h b/proto.h index c02b034675..b9662c6a33 100644 --- a/proto.h +++ b/proto.h @@ -5457,7 +5457,7 @@ STATIC int S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state, const char* STATIC regnode_offset S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV ** return_invlist, I32 *flagp, U32 depth, char * const oregcomp_parse); #define PERL_ARGS_ASSERT_HANDLE_REGEX_SETS \ assert(pRExC_state); assert(flagp); assert(oregcomp_parse) -PERL_CALLCONV SV * Perl_handle_user_defined_property(pTHX_ const char * name, const STRLEN name_len, const bool is_utf8, const bool to_fold, const bool runtime, SV* contents, bool *user_defined_ptr, SV * msg, const STRLEN level); +PERL_CALLCONV SV * Perl_handle_user_defined_property(pTHX_ const char * name, const STRLEN name_len, const bool is_utf8, const bool to_fold, const bool runtime, const bool deferrable, SV* contents, bool *user_defined_ptr, SV * msg, const STRLEN level); #define PERL_ARGS_ASSERT_HANDLE_USER_DEFINED_PROPERTY \ assert(name); assert(contents); assert(user_defined_ptr); assert(msg) STATIC SV* S_invlist_contents(pTHX_ SV* const invlist, const bool traditional_style) @@ -5517,7 +5517,7 @@ STATIC void S_output_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_w STATIC void S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state); #define PERL_ARGS_ASSERT_PARSE_LPAREN_QUESTION_FLAGS \ assert(pRExC_state) -PERL_CALLCONV SV * Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t name_len, const bool is_utf8, const bool to_fold, const bool runtime, bool * user_defined_ptr, SV * msg, const STRLEN level); +PERL_CALLCONV SV * Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t name_len, const bool is_utf8, const bool to_fold, const bool runtime, const bool deferrable, bool * user_defined_ptr, SV * msg, const STRLEN level); #define PERL_ARGS_ASSERT_PARSE_UNIPROP_STRING \ assert(name); assert(user_defined_ptr); assert(msg) STATIC void S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr); diff --git a/regcomp.c b/regcomp.c index 864f9a02f2..a56e75bb9a 100644 --- a/regcomp.c +++ b/regcomp.c @@ -17094,6 +17094,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, SV * prop_definition = parse_uniprop_string( name, n, UTF, FOLD, FALSE, /* This is compile-time */ + + /* We can't defer this defn when + * the full result is required in + * this call */ + ! cBOOL(ret_invlist), + &user_defined, msg, 0 /* Base level */ @@ -19202,6 +19208,7 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog, stored here for just this occasion */ TRUE, /* run time */ + FALSE, /* This call must find the defn */ si, /* The property definition */ &user_defined, msg, @@ -22103,6 +22110,8 @@ Perl_handle_user_defined_property(pTHX_ const bool is_utf8, /* ? Is 'name' encoded in UTF-8 */ const bool to_fold, /* ? Is this under /i */ const bool runtime, /* ? Are we in compile- or run-time */ + const bool deferrable, /* Is it ok for this property's full definition + to be deferred until later? */ SV* contents, /* The property's definition */ bool *user_defined_ptr, /* This will be set TRUE as we wouldn't be getting called unless this is thought to be @@ -22290,6 +22299,7 @@ Perl_handle_user_defined_property(pTHX_ this_definition = parse_uniprop_string(s0, s - s0, is_utf8, to_fold, runtime, + deferrable, user_defined_ptr, msg, (name_len == 0) ? level /* Don't increase level @@ -22441,6 +22451,8 @@ Perl_parse_uniprop_string(pTHX_ const bool is_utf8, /* ? Is 'name' encoded in UTF-8 */ const bool to_fold, /* ? Is this under /i */ const bool runtime, /* TRUE if this is being called at run time */ + const bool deferrable, /* TRUE if it's ok for the definition to not be + known at this call */ bool *user_defined_ptr, /* Upon return from this function it will be set to TRUE if any component is a user-defined property */ @@ -22681,6 +22693,7 @@ Perl_parse_uniprop_string(pTHX_ is_utf8, to_fold, runtime, + deferrable, user_defined_ptr, msg, level + 1); @@ -23169,6 +23182,7 @@ Perl_parse_uniprop_string(pTHX_ * handle it */ prop_definition = handle_user_defined_property(name, name_len, is_utf8, to_fold, runtime, + deferrable, POPs, user_defined_ptr, msg, level); @@ -23258,7 +23272,7 @@ Perl_parse_uniprop_string(pTHX_ * compile time, it might just be that the subroutine for that * property hasn't been encountered yet, but at runtime, it's * an error to try to use an undefined one */ - if (runtime) { + if (! deferrable) { if (SvCUR(msg) > 0) sv_catpvs(msg, "; "); sv_catpvs(msg, "Unknown user-defined property name"); goto append_name_to_msg; @@ -23491,6 +23505,7 @@ Perl_parse_uniprop_string(pTHX_ 0, /* Not UTF-8 */ 0, /* Not folded */ runtime, + deferrable, pu_definition, &dummy, msg, diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t index 8634866f2d..3d60c4a5dc 100644 --- a/t/re/reg_mesg.t +++ b/t/re/reg_mesg.t @@ -315,7 +315,7 @@ my @death = "/$bug133423/" => "Operand with no preceding operator {#} m/(?[(?^:(?[\\ '/[^/' => 'Unmatched [ {#} m/[{#}^/', # [perl #133767] '/\p{Is_Other_Alphabetic=F}/ ' => 'Can\'t find Unicode property definition "Is_Other_Alphabetic=F" {#} m/\p{Is_Other_Alphabetic=F}{#}/', - + '/\p{Is_Other_Alphabetic=F}/ ' => 'Can\'t find Unicode property definition "Is_Other_Alphabetic=F" {#} m/\p{Is_Other_Alphabetic=F}{#}/', ); # These are messages that are death under 'use re "strict"', and may or may diff --git a/t/re/regex_sets.t b/t/re/regex_sets.t index e70df81254..fc089a90b6 100644 --- a/t/re/regex_sets.t +++ b/t/re/regex_sets.t @@ -215,6 +215,11 @@ for my $char ("٠", "٥", "٩") { 'qr/(?[ ( \p{Uppercase} ) + (\p{Lowercase} - ([a] + [b])) ]) compiles and properly matches'); } +{ # [perl #133889] Caused assertion failure + fresh_perl_like('no warnings "experimental::regex_sets"; + qr/(?[\P{Is0}])/', qr/\QUnknown user-defined property name "Is0"/, {}, "[perl #133889]"); +} + done_testing(); 1; -- Perl5 Master Repository
