details: http://hg.nginx.org/njs/rev/adf61ca4267b branches: changeset: 250:adf61ca4267b user: Valentin Bartenev <vb...@nginx.com> date: Thu Nov 10 18:54:28 2016 +0300 description: On-demand initialization of UTF-8 strings offset map.
diffstat: njs/njs_string.c | 70 ++++++++++++++++++++++--------------------------------- njs/njs_string.h | 6 ++-- njs/njs_vm.c | 4 --- 3 files changed, 31 insertions(+), 49 deletions(-) diffs (229 lines): diff -r 187882f1895a -r adf61ca4267b njs/njs_string.c --- a/njs/njs_string.c Thu Nov 10 18:45:10 2016 +0300 +++ b/njs/njs_string.c Thu Nov 10 18:54:28 2016 +0300 @@ -178,11 +178,6 @@ njs_string_new(njs_vm_t *vm, njs_value_t if (nxt_fast_path(p != NULL)) { memcpy(p, start, size); - - if (size != length && length >= NJS_STRING_MAP_STRIDE) { - njs_string_offset_map_init(p, size); - } - return NXT_OK; } @@ -194,7 +189,7 @@ nxt_noinline u_char * njs_string_alloc(njs_vm_t *vm, njs_value_t *value, uint32_t size, uint32_t length) { - uint32_t total; + uint32_t total, map_offset, *map; njs_string_t *string; value->type = NJS_STRING; @@ -217,9 +212,11 @@ njs_string_alloc(njs_vm_t *vm, njs_value value->data.string_size = size; if (size != length && length > NJS_STRING_MAP_STRIDE) { - total = njs_string_map_offset(size) + njs_string_map_size(length); + map_offset = njs_string_map_offset(size); + total = map_offset + njs_string_map_size(length); } else { + map_offset = 0; total = size; } @@ -233,6 +230,11 @@ njs_string_alloc(njs_vm_t *vm, njs_value string->length = length; string->retain = 1; + if (map_offset != 0) { + map = (uint32_t *) (string->start + map_offset); + map[0] = 0; + } + return string->start; } @@ -251,15 +253,16 @@ njs_string_copy(njs_value_t *dst, njs_va /* * njs_string_validate() validates an UTF-8 string, evaluates its length, - * sets njs_string_prop_t struct, and initializes offset map if it is required. + * sets njs_string_prop_t struct. */ nxt_noinline njs_ret_t njs_string_validate(njs_vm_t *vm, njs_string_prop_t *string, njs_value_t *value) { - u_char *start; - size_t new_size; - ssize_t size, length; + u_char *start; + size_t new_size, map_offset; + ssize_t size, length; + uint32_t *map; size = value->short_string.size; @@ -297,8 +300,8 @@ njs_string_validate(njs_vm_t *vm, njs_st * Reallocate the long string with offset map * after the string. */ - new_size = njs_string_map_offset(size) - + njs_string_map_size(length); + map_offset = njs_string_map_offset(size); + new_size = map_offset + njs_string_map_size(length); start = nxt_mem_cache_alloc(vm->mem_cache_pool, new_size); if (nxt_slow_path(start == NULL)) { @@ -309,7 +312,8 @@ njs_string_validate(njs_vm_t *vm, njs_st string->start = start; value->data.u.string->start = start; - njs_string_offset_map_init(start, size); + map = (uint32_t *) (start + map_offset); + map[0] = 0; } } @@ -649,10 +653,6 @@ njs_string_prototype_concat(njs_vm_t *vm p += string.size; } - if (length >= NJS_STRING_MAP_STRIDE && size != length) { - njs_string_offset_map_init(start, size); - } - return NXT_OK; } @@ -766,10 +766,6 @@ njs_string_prototype_from_bytes(njs_vm_t for (p = string.start; p < end; p++) { s = nxt_utf8_encode(s, *p); } - - if (slice.length >= NJS_STRING_MAP_STRIDE || size != slice.length) { - njs_string_offset_map_init(start, size); - } } return NXT_OK; @@ -1518,8 +1514,7 @@ done: /* - * njs_string_offset() assumes that index is correct - * and the optional offset map has been initialized. + * njs_string_offset() assumes that index is correct. */ nxt_noinline const u_char * @@ -1531,6 +1526,10 @@ njs_string_offset(const u_char *start, c if (index >= NJS_STRING_MAP_STRIDE) { map = njs_string_map_start(end); + if (map[0] == 0) { + njs_string_offset_map_init(start, end - start); + } + start += map[index / NJS_STRING_MAP_STRIDE - 1]; } @@ -1543,8 +1542,7 @@ njs_string_offset(const u_char *start, c /* - * njs_string_index() assumes that offset is correct - * and the optional offset map has been initialized. + * njs_string_index() assumes that offset is correct. */ nxt_noinline uint32_t @@ -1565,6 +1563,10 @@ njs_string_index(njs_string_prop_t *stri end = string->start + string->size; map = njs_string_map_start(end); + if (map[0] == 0) { + njs_string_offset_map_init(string->start, string->size); + } + while (index + NJS_STRING_MAP_STRIDE < string->length && *map <= offset) { @@ -1628,10 +1630,6 @@ njs_string_prototype_to_lower_case(njs_v p = nxt_utf8_encode(p, nxt_utf8_lower_case(&s, end)); size--; } - - if (string.length >= NJS_STRING_MAP_STRIDE) { - njs_string_offset_map_init(start, string.size); - } } return NXT_OK; @@ -1680,10 +1678,6 @@ njs_string_prototype_to_upper_case(njs_v p = nxt_utf8_encode(p, nxt_utf8_upper_case(&s, end)); size--; } - - if (string.length >= NJS_STRING_MAP_STRIDE) { - njs_string_offset_map_init(start, string.size); - } } return NXT_OK; @@ -1865,10 +1859,6 @@ njs_string_prototype_repeat(njs_vm_t *vm n--; } - if (length >= NJS_STRING_MAP_STRIDE && size != length) { - njs_string_offset_map_init(start, size); - } - return NXT_OK; } @@ -2882,10 +2872,6 @@ njs_string_replace_join(njs_vm_t *vm, nj /* GC: release valid values. */ } - if (length >= NJS_STRING_MAP_STRIDE && size != length) { - njs_string_offset_map_init(string, size); - } - nxt_array_destroy(&r->parts, &njs_array_mem_proto, vm->mem_cache_pool); return NXT_OK; diff -r 187882f1895a -r adf61ca4267b njs/njs_string.h --- a/njs/njs_string.h Thu Nov 10 18:45:10 2016 +0300 +++ b/njs/njs_string.h Thu Nov 10 18:54:28 2016 +0300 @@ -53,9 +53,9 @@ * To speed up this search a map of offsets is stored after the UTF-8 string. * The map is aligned to uint32_t and contains byte positions of each * NJS_STRING_MAP_STRIDE UTF-8 character except zero position. The map - * can be allocated and updated on demand. If a string come outside - * JavaScript as byte sequnece just to be concatenated or to be used in - * regular expressions the offset map is not required. + * can be initialized on demand. If a string come outside JavaScript as + * byte sequnece just to be concatenated or to be used in regular expressions + * the offset map is not required. * * The map is not allocated: * 1) if the length is zero hence it is a byte string; diff -r 187882f1895a -r adf61ca4267b njs/njs_vm.c --- a/njs/njs_vm.c Thu Nov 10 18:45:10 2016 +0300 +++ b/njs/njs_vm.c Thu Nov 10 18:54:28 2016 +0300 @@ -1556,10 +1556,6 @@ njs_vmcode_addition(njs_vm_t *vm, njs_va (void) memcpy(start, string1.start, string1.size); (void) memcpy(start + string1.size, string2.start, string2.size); - if (length >= NJS_STRING_MAP_STRIDE && size != length) { - njs_string_offset_map_init(start, size); - } - return sizeof(njs_vmcode_3addr_t); } _______________________________________________ nginx-devel mailing list nginx-devel@nginx.org http://mailman.nginx.org/mailman/listinfo/nginx-devel