details: https://hg.nginx.org/njs/rev/3ec3e7d2ce5f branches: changeset: 2124:3ec3e7d2ce5f user: Dmitry Volyntsev <xei...@nginx.com> date: Fri May 19 20:22:14 2023 -0700 description: Added support of regular expressions not supported directly by PCRE2.
The following patterns were fixed: `[]` - matches nothing, previously was rejected as invalid expression. `[^]` - matched any character, unlike `.` this syntax matches new line, previously was rejected as invalid expression. `++`, `*+`, `?+` - are rejected now, whereas in PCRE2 they are considered valid possessive quantifiers. diffstat: external/njs_regex.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++ src/njs_regexp.c | 45 +++++++++++++++++++++++++++++++- src/test/njs_unit_test.c | 41 +++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+), 1 deletions(-) diffs (206 lines): diff -r 4d26300ddc64 -r 3ec3e7d2ce5f external/njs_regex.c --- a/external/njs_regex.c Thu May 18 18:33:36 2023 -0700 +++ b/external/njs_regex.c Fri May 19 20:22:14 2023 -0700 @@ -94,6 +94,73 @@ njs_int_t njs_regex_escape(njs_mp_t *mp, njs_str_t *text) { #ifdef NJS_HAVE_PCRE2 + size_t anychars, nomatches; + u_char *p, *dst, *start, *end; + + /* + * 1) [^] is a valid regexp expression in JavaScript, but PCRE2 + * rejects it as invalid, replacing it with equivalent PCRE2 [\s\S] + * expression. + * 2) [] is a valid regexp expression in JavaScript, but PCRE2 + * rejects it as invalid, replacing it with equivalent PCRE2 (?!) + * expression which matches nothing. + */ + + start = text->start; + end = text->start + text->length; + + anychars = 0; + nomatches = 0; + + for (p = start; p < end; p++) { + switch (*p) { + case '[': + if (p + 1 < end && p[1] == ']') { + p += 1; + nomatches += 1; + + } else if (p + 2 < end && p[1] == '^' && p[2] == ']') { + p += 2; + anychars += 1; + } + + break; + } + } + + if (!anychars && !nomatches) { + return NJS_OK; + } + + text->length = text->length + + anychars * (njs_length("\\s\\S") - njs_length("^")) + + nomatches * (njs_length("?!")); + + text->start = njs_mp_alloc(mp, text->length); + if (njs_slow_path(text->start == NULL)) { + return NJS_ERROR; + } + + dst = text->start; + + for (p = start; p < end; p++) { + + switch (*p) { + case '[': + if (p + 1 < end && p[1] == ']') { + p += 1; + dst = njs_cpymem(dst, "(?!)", 4); + continue; + + } else if (p + 2 < end && p[1] == '^' && p[2] == ']') { + p += 2; + dst = njs_cpymem(dst, "[\\s\\S]", 6); + continue; + } + } + + *dst++ = *p; + } return NJS_OK; diff -r 4d26300ddc64 -r 3ec3e7d2ce5f src/njs_regexp.c --- a/src/njs_regexp.c Thu May 18 18:33:36 2023 -0700 +++ b/src/njs_regexp.c Fri May 19 20:22:14 2023 -0700 @@ -263,9 +263,10 @@ njs_regexp_pattern_create(njs_vm_t *vm, njs_regex_flags_t flags) { int ret; - u_char *p; + u_char *p, *end; size_t size; njs_str_t text; + njs_bool_t in; njs_uint_t n; njs_regex_t *regex; njs_regexp_group_t *group; @@ -274,6 +275,42 @@ njs_regexp_pattern_create(njs_vm_t *vm, text.start = start; text.length = length; + in = 0; + end = start + length; + + for (p = start; p < end; p++) { + + switch (*p) { + case '[': + in = 1; + break; + + case ']': + in = 0; + break; + + case '\\': + p++; + break; + + case '+': + if (njs_slow_path(!in + && (p - 1 > start) + && (p[-1] == '+'|| p[-1] == '*' || p[-1] == '?')) + && (p - 2 >= start && p[-2] != '\\')) + { + /** + * PCRE possessive quantifiers `++`, `*+`, `?+` + * are not allowed in JavaScript. Whereas `[++]` or `\?+` are + * allowed. + */ + goto nothing_to_repeat; + } + + break; + } + } + ret = njs_regex_escape(vm->mem_pool, &text); if (njs_slow_path(ret != NJS_OK)) { njs_memory_error(vm); @@ -370,6 +407,12 @@ fail: njs_mp_free(vm->mem_pool, pattern); return NULL; + +nothing_to_repeat: + + njs_syntax_error(vm, "Invalid regular expression \"%V\" nothing to repeat", + &text); + return NULL; } diff -r 4d26300ddc64 -r 3ec3e7d2ce5f src/test/njs_unit_test.c --- a/src/test/njs_unit_test.c Thu May 18 18:33:36 2023 -0700 +++ b/src/test/njs_unit_test.c Fri May 19 20:22:14 2023 -0700 @@ -11810,6 +11810,38 @@ static njs_unit_test_t njs_test[] = { njs_str("var r = /./; r"), njs_str("/./") }, + { njs_str("/[^]+|[^]+/.test('\\n| ')"), + njs_str("true") }, + + { njs_str("/[^]+|[^][^]/.test('|aa')"), + njs_str("true") }, + + { njs_str("/a[]/.test('a')"), + njs_str("false") }, + + { njs_str("/[]a/.test('a')"), + njs_str("false") }, + +#ifdef NJS_HAVE_PCRE2 + { njs_str("/[]*a/.test('a')"), + njs_str("true") }, +#endif + + { njs_str("/Ca++BB/"), + njs_str("SyntaxError: Invalid regular expression \"Ca++BB\" nothing to repeat in 1") }, + + { njs_str("/a*+/"), + njs_str("SyntaxError: Invalid regular expression \"a*+\" nothing to repeat in 1") }, + + { njs_str("/a?+/"), + njs_str("SyntaxError: Invalid regular expression \"a?+\" nothing to repeat in 1") }, + + { njs_str(" /\\[[]++\\]/"), + njs_str("SyntaxError: Invalid regular expression \"\\[[]++\\]\" nothing to repeat in 1") }, + + { njs_str("/\\?+/"), + njs_str("/\\?+/") }, + { njs_str("var r = new RegExp(); r"), njs_str("/(?:)/") }, @@ -11870,6 +11902,15 @@ static njs_unit_test_t njs_test[] = { njs_str("RegExp(new RegExp('expr'))"), njs_str("/expr/") }, + { njs_str("RegExp(RegExp('[^]+|[^][^]')).test('| \\na')"), + njs_str("true") }, + + { njs_str("RegExp('a++')"), + njs_str("SyntaxError: Invalid regular expression \"a++\" nothing to repeat") }, + + { njs_str("RegExp('[a++]')"), + njs_str("/[a++]/") }, + { njs_str("RegExp(new RegExp('expr')).multiline"), njs_str("false") }, _______________________________________________ nginx-devel mailing list nginx-devel@nginx.org https://mailman.nginx.org/mailman/listinfo/nginx-devel