In perl.git, the branch blead has been updated <https://perl5.git.perl.org/perl.git/commitdiff/0b97d15690d19a1fee0c903da7f5898764d8f15e?hp=bc508755449a899a1f962877248064475fb91770>
- Log ----------------------------------------------------------------- commit 0b97d15690d19a1fee0c903da7f5898764d8f15e Author: Karl Williamson <[email protected]> Date: Tue Mar 19 12:37:04 2019 -0600 PATCH: [perl #133937] Assertion failure This recently added assertion actually caught an error, which is a potential read beyond end of buffer. This doesn't actually happen in this case because this is a regular expression pattern, and the toker makes sure there is a trailing NUL (that isn't counted). The solution is to check the bounds before reading. commit 83a1b28e808bee1a60973291307e3ce0cb77aad5 Author: Karl Williamson <[email protected]> Date: Tue Mar 19 12:21:49 2019 -0600 dquote.c: Prevent possible out-of-bounds read This code read a byte that was potentially out-of-bounds. I don't know how it could get this far, but maybe some fuzzing code could get it. commit e750debb242ae6fdd7b786405ba167620fb77a2d Author: Karl Williamson <[email protected]> Date: Tue Mar 19 12:14:26 2019 -0600 Change error wording for empty \x{} An empty \x{} is unfortunately legal (returning a NUL) except in the scope of "use re 'strict'". Since this is an experimental feature, things like wording changes are allowed. It is unlikely anyone is relying on the precise wording of this fatal error under 'strict', and now all the messages for similar errors are similarly worded. commit ccad88426c3827a98f048facac304bace92604e7 Author: Karl Williamson <[email protected]> Date: Tue Mar 19 12:08:06 2019 -0600 Change error wording for \o{} An empty \o{} no longer says "Number with no digits" in favor of "Empty \o{}" which is more consistent with errors raised for things like \b{}, \P{}. There is a small risk of breakage with this change, as with any diagnostic wording change. However, this construct is relatively new and rarely used, and this is a fatal error, not a warning you might want to trap on. There are no empty \o{} instances in CPAN. ----------------------------------------------------------------------- Summary of changes: dquote.c | 19 ++++++++++++++++--- pod/perldelta.pod | 15 ++++++++++++++- pod/perldiag.pod | 10 +++++++--- t/lib/warnings/toke | 2 +- t/re/reg_mesg.t | 16 +++++++++------- 5 files changed, 47 insertions(+), 15 deletions(-) diff --git a/dquote.c b/dquote.c index 10fb2b5df0..bf5cf902a2 100644 --- a/dquote.c +++ b/dquote.c @@ -106,7 +106,7 @@ Perl_grok_bslash_o(pTHX_ char **s, const char * const send, UV *uv, assert(* *s == 'o'); (*s)++; - if (**s != '{') { + if (send <= *s || **s != '{') { *error_msg = "Missing braces on \\o{}"; return FALSE; } @@ -126,7 +126,7 @@ Perl_grok_bslash_o(pTHX_ char **s, const char * const send, UV *uv, numbers_len = e - *s; if (numbers_len == 0) { (*s)++; /* Move past the } */ - *error_msg = "Number with no digits"; + *error_msg = "Empty \\o{}"; return FALSE; } @@ -210,8 +210,21 @@ Perl_grok_bslash_x(pTHX_ char **s, const char * const send, UV *uv, assert(*(*s - 1) == '\\'); assert(* *s == 'x'); + (*s)++; + if (send <= *s) { + if (strict) { + *error_msg = "Empty \\x"; + return FALSE; + } + + /* Sadly, to preserve backcompat, an empty \x at the end of string is + * interpreted as a NUL */ + *uv = 0; + return TRUE; + } + if (strict || ! output_warning) { flags |= PERL_SCAN_SILENT_ILLDIGIT; } @@ -253,7 +266,7 @@ Perl_grok_bslash_x(pTHX_ char **s, const char * const send, UV *uv, if (numbers_len == 0) { if (strict) { (*s)++; /* Move past the } */ - *error_msg = "Number with no digits"; + *error_msg = "Empty \\x{}"; return FALSE; } *s = e + 1; diff --git a/pod/perldelta.pod b/pod/perldelta.pod index ece46e6757..4d94cceb69 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -306,7 +306,20 @@ XXX Changes (i.e. rewording) of diagnostic messages go here =item * -XXX Describe change here +Specifying C<\o{}> without anything between the braces now yields the +fatal error message "Empty \o{}". Previously it was "Number with no +digits". This means the same wording is used for this kind of error as +with similar constructs such as C<\p{}>. + +=item * + +Within the scope of the experimental feature C<use re 'strict'>, +specifying C<\x{}> without anything between the braces now yields the +fatal error message "Empty \x{}". Previously it was "Number with no +digits". This means the same wording is used for this kind of error as +with similar constructs such as C<\p{}>. It is legal, though not wise +to have an empty C<\x> outside of C<re 'strict'>; it silently generates +a NUL character. =back diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 4b618cf7ac..8163dde583 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -2159,11 +2159,15 @@ unlikely to be what you want. =item Empty \%c in regex; marked by S<<-- HERE> in m/%s/ +=item Empty \%c{} + =item Empty \%c{} in regex; marked by S<<-- HERE> in m/%s/ -(F) C<\p> and C<\P> are used to introduce a named Unicode property, as -described in L<perlunicode> and L<perlre>. You used C<\p> or C<\P> in -a regular expression without specifying the property name. +(F) You used something like C<\b{}>, C<\B{}>, C<\o{}>, C<\p>, C<\P>, or +C<\x> without specifying anything for it to operate on. + +Unfortunately, for backwards compatibility reasons, an empty C<\x> is +legal outside S<C<use re 'strict'>> and expands to a NUL character. =item ${^ENCODING} is no longer supported diff --git a/t/lib/warnings/toke b/t/lib/warnings/toke index 1c85d7bc44..faaf5bc6b8 100644 --- a/t/lib/warnings/toke +++ b/t/lib/warnings/toke @@ -1372,7 +1372,7 @@ my $a = "\o{}"; EXPECT Missing braces on \o{} at - line 3, within string Missing right brace on \o{ at - line 4, within string -Number with no digits at - line 5, within string +Empty \o{} at - line 5, within string BEGIN not safe after errors--compilation aborted at - line 6. ######## # toke.c diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t index e7251a9571..8634866f2d 100644 --- a/t/re/reg_mesg.t +++ b/t/re/reg_mesg.t @@ -238,10 +238,10 @@ my @death = '/(?[ \0004 ])/' => 'Need exactly 3 octal digits {#} m/(?[ \0004 {#}])/', '/(?[ \05 ])/' => 'Need exactly 3 octal digits {#} m/(?[ \05 {#}])/', '/(?[ \o{1038} ])/' => 'Non-octal character {#} m/(?[ \o{1038{#}} ])/', - '/(?[ \o{} ])/' => 'Number with no digits {#} m/(?[ \o{}{#} ])/', + '/(?[ \o{} ])/' => 'Empty \o{} {#} m/(?[ \o{}{#} ])/', '/(?[ \x{defg} ])/' => 'Non-hex character {#} m/(?[ \x{defg{#}} ])/', '/(?[ \xabcdef ])/' => 'Use \\x{...} for more than two hex characters {#} m/(?[ \xabc{#}def ])/', - '/(?[ \x{} ])/' => 'Number with no digits {#} m/(?[ \x{}{#} ])/', + '/(?[ \x{} ])/' => 'Empty \x{} {#} m/(?[ \x{}{#} ])/', '/(?[ \cK + ) ])/' => 'Unexpected \')\' {#} m/(?[ \cK + ){#} ])/', '/(?[ \cK + ])/' => 'Incomplete expression within \'(?[ ])\' {#} m/(?[ \cK + {#}])/', '/(?[ ( ) ])/' => 'Incomplete expression within \'(?[ ])\' {#} m/(?[ ( ){#} ])/', @@ -265,10 +265,10 @@ my @death = 'm/(?[ \p{Digit} & (?(?[ \p{Thai} | \p{Lao} ]))])/' => 'Unexpected character {#} m/(?[ \p{Digit} & (?{#}(?[ \p{Thai} | \p{Lao} ]))])/', 'm/\o{/' => 'Missing right brace on \o{ {#} m/\o{{#}/', 'm/\o/' => 'Missing braces on \o{} {#} m/\o{#}/', - 'm/\o{}/' => 'Number with no digits {#} m/\o{}{#}/', + 'm/\o{}/' => 'Empty \o{} {#} m/\o{}{#}/', 'm/[\o{]/' => 'Missing right brace on \o{ {#} m/[\o{{#}]/', 'm/[\o]/' => 'Missing braces on \o{} {#} m/[\o{#}]/', - 'm/[\o{}]/' => 'Number with no digits {#} m/[\o{}{#}]/', + 'm/[\o{}]/' => 'Empty \o{} {#} m/[\o{}{#}]/', 'm/(?^-i:foo)/' => 'Sequence (?^-...) not recognized {#} m/(?^-{#}i:foo)/', 'm/\87/' => 'Reference to nonexistent group {#} m/\87{#}/', 'm/a\87/' => 'Reference to nonexistent group {#} m/a\87{#}/', @@ -342,9 +342,9 @@ my @death_only_under_strict = ( 'm/[\o{789}]/' => 'Non-octal character \'8\'. Resolved as "\o{7}"', => 'Non-octal character {#} m/[\o{78{#}9}]/', 'm/\x{}/' => "", - => 'Number with no digits {#} m/\x{}{#}/', + => 'Empty \x{} {#} m/\x{}{#}/', 'm/[\x{}]/' => "", - => 'Number with no digits {#} m/[\x{}{#}]/', + => 'Empty \x{} {#} m/[\x{}{#}]/', 'm/\x{ABCDEFG}/' => 'Illegal hexadecimal digit \'G\' ignored', => 'Non-hex character {#} m/\x{ABCDEFG{#}}/', 'm/[\x{ABCDEFG}]/' => 'Illegal hexadecimal digit \'G\' ignored', @@ -411,6 +411,8 @@ my @death_only_under_strict = ( => 'Unescaped left brace in regex is illegal here {#} m/[x]{{#}/', '/\p{Latin}{/' => 'Unescaped left brace in regex is passed through {#} m/\p{Latin}{{#}/', => 'Unescaped left brace in regex is illegal here {#} m/\p{Latin}{{#}/', + '/\x{100}\x/' => "", + => "Empty \\x {#} m/\\x{100}\\x{#}/", ); # These need the character 'ネ' as a marker for mark_as_utf8() @@ -477,7 +479,7 @@ my @death_utf8 = mark_as_utf8( '/ネ(?[ \cK [ネ] ])ネ/' => 'Operand with no preceding operator {#} m/ネ(?[ \cK [ネ{#}] ])ネ/', '/ネ(?[ \0004 ])ネ/' => 'Need exactly 3 octal digits {#} m/ネ(?[ \0004 {#}])ネ/', '/(?[ \o{ネ} ])ネ/' => 'Non-octal character {#} m/(?[ \o{ネ{#}} ])ネ/', - '/ネ(?[ \o{} ])ネ/' => 'Number with no digits {#} m/ネ(?[ \o{}{#} ])ネ/', + '/ネ(?[ \o{} ])ネ/' => 'Empty \o{} {#} m/ネ(?[ \o{}{#} ])ネ/', '/(?[ \x{ネ} ])ネ/' => 'Non-hex character {#} m/(?[ \x{ネ{#}} ])ネ/', '/(?[ \p{ネ} ])/' => 'Can\'t find Unicode property definition "ネ" {#} m/(?[ \p{ネ}{#} ])/', '/(?[ \p{ ネ = bar } ])/' => 'Can\'t find Unicode property definition "ネ = bar" {#} m/(?[ \p{ ネ = bar }{#} ])/', -- Perl5 Master Repository
