details: http://hg.nginx.org/njs/rev/da63e61ecef4 branches: changeset: 223:da63e61ecef4 user: Igor Sysoev <i...@sysoev.ru> date: Thu Oct 27 10:56:54 2016 +0300 description: String.prototype.lastIndexOf() method fixes and optimizations.
In collaboration with Valentin Bartenev. diffstat: njs/njs_string.c | 129 ++++++++++++++++++++++------------------------ njs/test/njs_unit_test.c | 41 ++++++++++++++- nxt/nxt_utf8.h | 27 ++++++++- 3 files changed, 123 insertions(+), 74 deletions(-) diffs (274 lines): diff -r 74785cebd8df -r da63e61ecef4 njs/njs_string.c --- a/njs/njs_string.c Thu Oct 27 10:47:48 2016 +0300 +++ b/njs/njs_string.c Thu Oct 27 10:56:54 2016 +0300 @@ -83,8 +83,6 @@ static nxt_noinline void njs_string_slic njs_value_t *args, nxt_uint_t nargs); static njs_ret_t njs_string_from_char_code(njs_vm_t *vm, njs_value_t *args, nxt_uint_t nargs, njs_index_t unused); -static nxt_noinline ssize_t njs_string_index_of(njs_vm_t *vm, - njs_value_t *src, njs_value_t *search_string, size_t index); static njs_ret_t njs_string_match_multiple(njs_vm_t *vm, njs_value_t *args, njs_regexp_pattern_t *pattern); static njs_ret_t njs_string_split_part_add(njs_vm_t *vm, njs_array_t *array, @@ -1288,91 +1286,86 @@ static njs_ret_t njs_string_prototype_last_index_of(njs_vm_t *vm, njs_value_t *args, nxt_uint_t nargs, njs_index_t unused) { - ssize_t ret, index, last; - - index = -1; - - if (nargs > 1) { - last = NJS_STRING_MAX_LENGTH; - - if (nargs > 2) { - last = args[2].data.u.number; - - if (last < 0) { - last = 0; - } - } - - ret = 0; - - for ( ;; ) { - ret = njs_string_index_of(vm, &args[0], &args[1], ret); - - if (ret < 0 || ret >= last) { - break; - } - - index = ret++; - } - } - - njs_number_set(&vm->retval, index); - - return NXT_OK; -} - - -static nxt_noinline ssize_t -njs_string_index_of(njs_vm_t *vm, njs_value_t *src, njs_value_t *search_string, - size_t index) -{ - size_t length; + ssize_t index, start, length, search_length; const u_char *p, *end; njs_string_prop_t string, search; - (void) njs_string_prop(&search, search_string); - - length = njs_string_prop(&string, src); - - if (index < length) { - - if (string.size == length) { + if (nargs > 1) { + length = njs_string_prop(&string, &args[0]); + search_length = njs_string_prop(&search, &args[1]); + + if (length < search_length) { + goto small; + } + + index = NJS_STRING_MAX_LENGTH; + + if (nargs > 2) { + index = args[2].data.u.number; + + if (index < 0) { + index = 0; + } + } + + if (index > length) { + index = length; + } + + if (string.size == (size_t) length) { /* Byte or ASCII string. */ + + start = length - search.size; + + if (index > start) { + index = start; + } + p = string.start + index; - end = (string.start + string.size) - (search.size - 1); - - while (p < end) { + + do { if (memcmp(p, search.start, search.size) == 0) { - return index; + goto done; } - index++; - p++; - } + p--; + index--; + + } while (index >= 0); } else { /* UTF-8 string. */ + end = string.start + string.size; - p = njs_string_offset(string.start, end, index); - - end -= search.size - 1; - - while (p < end) { + end -= search.size; + + while (p > end) { + index--; + p = nxt_utf8_prev(p); + } + + do { if (memcmp(p, search.start, search.size) == 0) { - return index; + goto done; } - index++; - p = nxt_utf8_next(p, end); - } + p = nxt_utf8_prev(p); + index--; + + } while (index >= 0); } - - } else if (search.size == 0) { - return length; } - return -1; +small: + + index = -1; + +done: + + njs_number_set(&vm->retval, index); + + return NXT_OK; } diff -r 74785cebd8df -r da63e61ecef4 njs/test/njs_unit_test.c --- a/njs/test/njs_unit_test.c Thu Oct 27 10:47:48 2016 +0300 +++ b/njs/test/njs_unit_test.c Thu Oct 27 10:56:54 2016 +0300 @@ -3224,6 +3224,9 @@ static njs_unit_test_t njs_test[] = { nxt_string("''.indexOf.call(12345, 45, '0')"), nxt_string("3") }, + { nxt_string("'abc'.lastIndexOf('abcdef')"), + nxt_string("-1") }, + { nxt_string("'abc abc abc abc'.lastIndexOf('abc')"), nxt_string("12") }, @@ -3231,7 +3234,43 @@ static njs_unit_test_t njs_test[] = nxt_string("8") }, { nxt_string("'abc abc abc abc'.lastIndexOf('abc', 0)"), - nxt_string("-1") }, + nxt_string("0") }, + + { nxt_string("'abc abc abc abc'.lastIndexOf('', 0)"), + nxt_string("0") }, + + { nxt_string("'abc abc abc abc'.lastIndexOf('', 5)"), + nxt_string("5") }, + + { nxt_string("'abc abc абвгд abc'.lastIndexOf('абвгд')"), + nxt_string("8") }, + + { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд')"), + nxt_string("8") }, + + { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд', 11)"), + nxt_string("8") }, + + { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд', 12)"), + nxt_string("8") }, + + { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд', 13)"), + nxt_string("8") }, + + { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд', 14)"), + nxt_string("8") }, + + { nxt_string("'abc abc абвгдежз'.lastIndexOf('абвгд', 15)"), + nxt_string("8") }, + + { nxt_string("'abc abc абвгдежз'.lastIndexOf('')"), + nxt_string("16") }, + + { nxt_string("'abc abc абвгдежз'.lastIndexOf('', 12)"), + nxt_string("12") }, + + { nxt_string("''.lastIndexOf('')"), + nxt_string("0") }, { nxt_string("'ABC'.toLowerCase()"), nxt_string("abc") }, diff -r 74785cebd8df -r da63e61ecef4 nxt/nxt_utf8.h --- a/nxt/nxt_utf8.h Thu Oct 27 10:47:48 2016 +0300 +++ b/nxt/nxt_utf8.h Thu Oct 27 10:56:54 2016 +0300 @@ -29,7 +29,12 @@ NXT_EXPORT ssize_t nxt_utf8_length(const NXT_EXPORT nxt_bool_t nxt_utf8_is_valid(const u_char *p, size_t len); -/* nxt_utf8_next() expects a valid UTF-8 string. */ +/* + * nxt_utf8_next() and nxt_utf8_prev() expect a valid UTF-8 string. + * + * The leading UTF-8 byte is either 0xxxxxxx or 11xxxxxx. + * The continuation UTF-8 bytes are 10xxxxxx. + */ nxt_inline const u_char * nxt_utf8_next(const u_char *p, const u_char *end) @@ -41,10 +46,6 @@ nxt_utf8_next(const u_char *p, const u_c if ((c & 0x80) != 0) { do { - /* - * The first UTF-8 byte is either 0xxxxxxx or 11xxxxxx. - * The next UTF-8 bytes are 10xxxxxx. - */ c = *p; if ((c & 0xC0) != 0x80) { @@ -60,6 +61,22 @@ nxt_utf8_next(const u_char *p, const u_c } +nxt_inline const u_char * +nxt_utf8_prev(const u_char *p) +{ + u_char c; + + do { + p--; + c = *p; + + } while ((c & 0xC0) == 0x80); + + return p; +} + + + #define nxt_utf8_size(u) \ ((u < 0x80) ? 1 : ((u < 0x0800) ? 2 : ((u < 0x10000) ? 3 : 4))) _______________________________________________ nginx-devel mailing list nginx-devel@nginx.org http://mailman.nginx.org/mailman/listinfo/nginx-devel