details: http://hg.nginx.org/njs/rev/187882f1895a branches: changeset: 249:187882f1895a user: Valentin Bartenev <vb...@nginx.com> date: Thu Nov 10 18:45:10 2016 +0300 description: Improved UTF-8 offset map related macros (no functional changes).
diffstat: njs/njs_parser.c | 4 +- njs/njs_string.c | 61 ++++++++++++++++++++++++++----------------------------- njs/njs_string.h | 14 ++++++++++-- njs/njs_vm.c | 2 +- 4 files changed, 43 insertions(+), 38 deletions(-) diffs (250 lines): diff -r 60c2930eb951 -r 187882f1895a njs/njs_parser.c --- a/njs/njs_parser.c Thu Nov 10 16:47:52 2016 +0300 +++ b/njs/njs_parser.c Thu Nov 10 18:45:10 2016 +0300 @@ -1984,7 +1984,7 @@ njs_parser_string_create(njs_vm_t *vm, n if (nxt_fast_path(p != NULL)) { memcpy(p, src->start, src->length); - if (length > NJS_STRING_MAP_OFFSET && (size_t) length != src->length) { + if (length > NJS_STRING_MAP_STRIDE && (size_t) length != src->length) { njs_string_offset_map_init(p, src->length); } @@ -2144,7 +2144,7 @@ njs_parser_escape_string_create(njs_vm_t } if (start != NULL) { - if (length > NJS_STRING_MAP_OFFSET && length != size) { + if (length > NJS_STRING_MAP_STRIDE && length != size) { njs_string_offset_map_init(start, size); } diff -r 60c2930eb951 -r 187882f1895a njs/njs_string.c --- a/njs/njs_string.c Thu Nov 10 16:47:52 2016 +0300 +++ b/njs/njs_string.c Thu Nov 10 18:45:10 2016 +0300 @@ -179,7 +179,7 @@ njs_string_new(njs_vm_t *vm, njs_value_t if (nxt_fast_path(p != NULL)) { memcpy(p, start, size); - if (size != length && length >= NJS_STRING_MAP_OFFSET) { + if (size != length && length >= NJS_STRING_MAP_STRIDE) { njs_string_offset_map_init(p, size); } @@ -216,9 +216,8 @@ njs_string_alloc(njs_vm_t *vm, njs_value value->data.external0 = 0; value->data.string_size = size; - if (size != length && length > NJS_STRING_MAP_OFFSET) { - total = nxt_align_size(size, sizeof(uint32_t)); - total += ((length - 1) / NJS_STRING_MAP_OFFSET) * sizeof(uint32_t); + if (size != length && length > NJS_STRING_MAP_STRIDE) { + total = njs_string_map_offset(size) + njs_string_map_size(length); } else { total = size; @@ -293,14 +292,13 @@ njs_string_validate(njs_vm_t *vm, njs_st return length; } - if (length > NJS_STRING_MAP_OFFSET) { + if (length > NJS_STRING_MAP_STRIDE) { /* * Reallocate the long string with offset map * after the string. */ - new_size = nxt_align_size(size, sizeof(uint32_t)); - new_size += ((length - 1) / NJS_STRING_MAP_OFFSET) - * sizeof(uint32_t); + new_size = njs_string_map_offset(size) + + njs_string_map_size(length); start = nxt_mem_cache_alloc(vm->mem_cache_pool, new_size); if (nxt_slow_path(start == NULL)) { @@ -473,15 +471,15 @@ njs_string_offset_map_init(const u_char const u_char *p, *end; end = start + size; - map = (uint32_t *) nxt_align_ptr(end, sizeof(uint32_t)); + map = njs_string_map_start(end); p = start; n = 0; - offset = NJS_STRING_MAP_OFFSET; + offset = NJS_STRING_MAP_STRIDE; do { if (offset == 0) { map[n++] = p - start; - offset = NJS_STRING_MAP_OFFSET; + offset = NJS_STRING_MAP_STRIDE; } /* The UTF-8 string should be valid since its length is known. */ @@ -651,7 +649,7 @@ njs_string_prototype_concat(njs_vm_t *vm p += string.size; } - if (length >= NJS_STRING_MAP_OFFSET && size != length) { + if (length >= NJS_STRING_MAP_STRIDE && size != length) { njs_string_offset_map_init(start, size); } @@ -685,7 +683,7 @@ njs_string_prototype_from_utf8(njs_vm_t if (length >= 0) { - if (length < NJS_STRING_MAP_OFFSET || (size_t) length == slice.length) { + if (length < NJS_STRING_MAP_STRIDE || (size_t) length == slice.length) { /* ASCII or short UTF-8 string. */ return njs_string_create(vm, &vm->retval, string.start, slice.length, length); @@ -769,7 +767,7 @@ njs_string_prototype_from_bytes(njs_vm_t s = nxt_utf8_encode(s, *p); } - if (slice.length >= NJS_STRING_MAP_OFFSET || size != slice.length) { + if (slice.length >= NJS_STRING_MAP_STRIDE || size != slice.length) { njs_string_offset_map_init(start, size); } } @@ -1530,13 +1528,13 @@ njs_string_offset(const u_char *start, c uint32_t *map; nxt_uint_t skip; - if (index >= NJS_STRING_MAP_OFFSET) { - map = (uint32_t *) nxt_align_ptr(end, sizeof(uint32_t)); - - start += map[index / NJS_STRING_MAP_OFFSET - 1]; + if (index >= NJS_STRING_MAP_STRIDE) { + map = njs_string_map_start(end); + + start += map[index / NJS_STRING_MAP_STRIDE - 1]; } - for (skip = index % NJS_STRING_MAP_OFFSET; skip != 0; skip--) { + for (skip = index % NJS_STRING_MAP_STRIDE; skip != 0; skip--) { start = nxt_utf8_next(start, end); } @@ -1562,16 +1560,16 @@ njs_string_index(njs_string_prop_t *stri last = 0; index = 0; - if (string->length >= NJS_STRING_MAP_OFFSET) { + if (string->length >= NJS_STRING_MAP_STRIDE) { end = string->start + string->size; - map = (uint32_t *) nxt_align_ptr(end, sizeof(uint32_t)); - - while (index + NJS_STRING_MAP_OFFSET < string->length + map = njs_string_map_start(end); + + while (index + NJS_STRING_MAP_STRIDE < string->length && *map <= offset) { last = *map++; - index += NJS_STRING_MAP_OFFSET; + index += NJS_STRING_MAP_STRIDE; } } @@ -1631,7 +1629,7 @@ njs_string_prototype_to_lower_case(njs_v size--; } - if (string.length >= NJS_STRING_MAP_OFFSET) { + if (string.length >= NJS_STRING_MAP_STRIDE) { njs_string_offset_map_init(start, string.size); } } @@ -1683,7 +1681,7 @@ njs_string_prototype_to_upper_case(njs_v size--; } - if (string.length >= NJS_STRING_MAP_OFFSET) { + if (string.length >= NJS_STRING_MAP_STRIDE) { njs_string_offset_map_init(start, string.size); } } @@ -1867,7 +1865,7 @@ njs_string_prototype_repeat(njs_vm_t *vm n--; } - if (length >= NJS_STRING_MAP_OFFSET && size != length) { + if (length >= NJS_STRING_MAP_STRIDE && size != length) { njs_string_offset_map_init(start, size); } @@ -2884,7 +2882,7 @@ njs_string_replace_join(njs_vm_t *vm, nj /* GC: release valid values. */ } - if (length >= NJS_STRING_MAP_OFFSET && size != length) { + if (length >= NJS_STRING_MAP_STRIDE && size != length) { njs_string_offset_map_init(string, size); } @@ -3655,10 +3653,9 @@ njs_value_index(njs_vm_t *vm, njs_parser length = src->data.u.string->length; - if (size != length && length > NJS_STRING_MAP_OFFSET) { - size = nxt_align_size(size, sizeof(uint32_t)); - size += ((length - 1) / NJS_STRING_MAP_OFFSET) - * sizeof(uint32_t); + if (size != length && length > NJS_STRING_MAP_STRIDE) { + size = njs_string_map_offset(size) + + njs_string_map_size(length); } } diff -r 60c2930eb951 -r 187882f1895a njs/njs_string.h --- a/njs/njs_string.h Thu Nov 10 16:47:52 2016 +0300 +++ b/njs/njs_string.h Thu Nov 10 18:45:10 2016 +0300 @@ -31,7 +31,15 @@ * division and remainder operations but no less than 16 because the maximum * length of short string inlined in njs_value_t is less than 16 bytes. */ -#define NJS_STRING_MAP_OFFSET 32 +#define NJS_STRING_MAP_STRIDE 32 + +#define njs_string_map_offset(size) nxt_align_size((size), sizeof(uint32_t)) + +#define njs_string_map_start(p) \ + ((uint32_t *) nxt_align_ptr((p), sizeof(uint32_t))) + +#define njs_string_map_size(length) \ + (((length - 1) / NJS_STRING_MAP_STRIDE) * sizeof(uint32_t)) /* * The JavaScript standard states that strings are stored in UTF-16. @@ -44,7 +52,7 @@ * encoding does not allow to get quickly a character at specified position. * To speed up this search a map of offsets is stored after the UTF-8 string. * The map is aligned to uint32_t and contains byte positions of each - * NJS_STRING_MAP_OFFSET UTF-8 character except zero position. The map + * NJS_STRING_MAP_STRIDE UTF-8 character except zero position. The map * can be allocated and updated on demand. If a string come outside * JavaScript as byte sequnece just to be concatenated or to be used in * regular expressions the offset map is not required. @@ -53,7 +61,7 @@ * 1) if the length is zero hence it is a byte string; * 2) if the size and length are equal so the string contains only ASCII * characters map is not required; - * 3) if the length is less than NJS_STRING_MAP_OFFSET. + * 3) if the length is less than NJS_STRING_MAP_STRIDE. * * The current implementation does not support Unicode surrogate pairs. * If offset in map points to surrogate pair then the previous offset diff -r 60c2930eb951 -r 187882f1895a njs/njs_vm.c --- a/njs/njs_vm.c Thu Nov 10 16:47:52 2016 +0300 +++ b/njs/njs_vm.c Thu Nov 10 18:45:10 2016 +0300 @@ -1556,7 +1556,7 @@ njs_vmcode_addition(njs_vm_t *vm, njs_va (void) memcpy(start, string1.start, string1.size); (void) memcpy(start + string1.size, string2.start, string2.size); - if (length >= NJS_STRING_MAP_OFFSET && size != length) { + if (length >= NJS_STRING_MAP_STRIDE && size != length) { njs_string_offset_map_init(start, size); } _______________________________________________ nginx-devel mailing list nginx-devel@nginx.org http://mailman.nginx.org/mailman/listinfo/nginx-devel