In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/fb2eed93790ce9b6d4c7cfd4bca70e7e1769d10d?hp=c3890f9c66df165fa44ec6d4da220ab976c4d31d>
- Log ----------------------------------------------------------------- commit fb2eed93790ce9b6d4c7cfd4bca70e7e1769d10d Author: Father Chrysostomos <[email protected]> Date: Sun Feb 1 22:38:00 2015 -0800 toke.c: Simplify \N{U+...} code If we are parsing a \N{U+XXX.YYY} construct in a regexp literal, we do not need to pass it to grok_hex, because we do not need the numeric value at this point. The regexp engine will be calling grok_hex again, after all. A simple scan for hex digits should be faster, and makes the code a little simpler, too. ----------------------------------------------------------------------- Summary of changes: toke.c | 55 +++++++++++++++++++++++++++---------------------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/toke.c b/toke.c index 67b6096..559c74c 100644 --- a/toke.c +++ b/toke.c @@ -3288,44 +3288,43 @@ S_scan_const(pTHX_ char *start) /* Here it looks like a named character */ if (*s == 'U' && s[1] == '+') { /* \N{U+...} */ - I32 flags = PERL_SCAN_ALLOW_UNDERSCORES - | PERL_SCAN_SILENT_ILLDIGIT - | PERL_SCAN_DISALLOW_PREFIX; - STRLEN len; - s += 2; /* Skip to next char after the 'U+' */ - len = e - s; - uv = grok_hex(s, &len, &flags, NULL); - if (len == 0 - || ( len != (STRLEN)(e - s) && s[len] != '.' - && PL_lex_inpat)) - { - bad_NU: - yyerror("Invalid hexadecimal number in \\N{U+...}"); - s = e + 1; - continue; - } - if (PL_lex_inpat) { /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */ - const char * const orig_s = s - 5; - while (*s == '.') { - s++; - len = e - s; - uv = grok_hex(s, &len, &flags, NULL); - if (!len - || (len != (STRLEN)(e - s) && s[len] != '.')) - goto bad_NU; + /* Check the syntax. */ + const char *orig_s; + orig_s = s - 5; + if (!isXDIGIT(*s)) { + bad_NU: + yyerror( + "Invalid hexadecimal number in \\N{U+...}" + ); + s = e + 1; + continue; + } + while (++s < e) { + if (isXDIGIT(*s)) + continue; + else if ((*s == '.' || *s == '_') + && isXDIGIT(s[1])) + continue; + goto bad_NU; } - /* Pass everything through unchanged. The reason we - * evaluate the numbers is to make sure there wasn't a - * syntax error. +1 is for the '}' */ + /* Pass everything through unchanged. + * +1 is for the '}' */ Copy(orig_s, d, e - orig_s + 1, char); d += e - orig_s + 1; } else { /* Not a pattern: convert the hex to string */ + I32 flags = PERL_SCAN_ALLOW_UNDERSCORES + | PERL_SCAN_SILENT_ILLDIGIT + | PERL_SCAN_DISALLOW_PREFIX; + STRLEN len = e - s; + uv = grok_hex(s, &len, &flags, NULL); + if (len == 0 || (len != (STRLEN)(e - s))) + goto bad_NU; /* If the destination is not in utf8, unconditionally * recode it to be so. This is because \N{} implies -- Perl5 Master Repository
