This is an automated email from the ASF dual-hosted git repository. vatamane pushed a commit to branch quickjs-update in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit c124536a738c37d915307c353b3b6e97ad605c1b Author: Nick Vatamaniuc <[email protected]> AuthorDate: Sat Nov 22 12:48:23 2025 -0500 QuickJS: regex/string optimizations & workers uaf fix * Optimize String replace, args object creation, regex exec: https://github.com/bellard/quickjs/commit/9f11034a5aa2c007515385c2ab7cf37103d65ac5 * Optimize regex backgracking https://github.com/bellard/quickjs/commit/7ab23413b849692350fadf0578d31498cb0043a3 * Fix use-after-free error in worker cleanup (we don't use this): https://github.com/bellard/quickjs/commit/728ed948893731923126d1c69612f04e09598448 --- .../patches/01-spidermonkey-185-mode.patch | 6 +- src/couch_quickjs/patches/02-test262-errors.patch | 4 +- src/couch_quickjs/quickjs/Changelog | 2 +- src/couch_quickjs/quickjs/libregexp-opcode.h | 14 +- src/couch_quickjs/quickjs/libregexp.c | 645 ++++++++-------- src/couch_quickjs/quickjs/quickjs-libc.c | 24 +- src/couch_quickjs/quickjs/quickjs.c | 830 +++++++++++++-------- 7 files changed, 865 insertions(+), 660 deletions(-) diff --git a/src/couch_quickjs/patches/01-spidermonkey-185-mode.patch b/src/couch_quickjs/patches/01-spidermonkey-185-mode.patch index 082001359..049abc944 100644 --- a/src/couch_quickjs/patches/01-spidermonkey-185-mode.patch +++ b/src/couch_quickjs/patches/01-spidermonkey-185-mode.patch @@ -1,6 +1,6 @@ ---- quickjs-master/quickjs.c 2025-11-15 08:52:50 -+++ quickjs/quickjs.c 2025-11-17 17:35:22 -@@ -31337,10 +31337,24 @@ +--- quickjs-master/quickjs.c 2025-11-22 06:10:55.000000000 -0500 ++++ quickjs/quickjs.c 2025-11-22 12:45:37.890107480 -0500 +@@ -31420,10 +31420,24 @@ if (s->token.val == TOK_FUNCTION || (token_is_pseudo_keyword(s, JS_ATOM_async) && peek_token(s, TRUE) == TOK_FUNCTION)) { diff --git a/src/couch_quickjs/patches/02-test262-errors.patch b/src/couch_quickjs/patches/02-test262-errors.patch index b667d834b..581806578 100644 --- a/src/couch_quickjs/patches/02-test262-errors.patch +++ b/src/couch_quickjs/patches/02-test262-errors.patch @@ -1,5 +1,5 @@ ---- quickjs-master/test262_errors.txt 2025-11-15 08:52:50 -+++ quickjs/test262_errors.txt 2025-11-17 17:35:22 +--- quickjs-master/test262_errors.txt 2025-11-22 06:10:55.000000000 -0500 ++++ quickjs/test262_errors.txt 2025-11-22 12:45:37.894107458 -0500 @@ -19,6 +19,8 @@ test262/test/language/expressions/compound-assignment/S11.13.2_A6.10_T1.js:24: Test262Error: #1: innerX === 2. Actual: 5 test262/test/language/expressions/compound-assignment/S11.13.2_A6.11_T1.js:24: Test262Error: #1: innerX === 2. Actual: 5 diff --git a/src/couch_quickjs/quickjs/Changelog b/src/couch_quickjs/quickjs/Changelog index 7d6afd6da..070b0a77a 100644 --- a/src/couch_quickjs/quickjs/Changelog +++ b/src/couch_quickjs/quickjs/Changelog @@ -1,4 +1,4 @@ -- micro optimizations (15% faster on bench-v8) +- micro optimizations (30% faster on bench-v8) - added resizable array buffers - added ArrayBuffer.prototype.transfer - added the Iterator object and methods diff --git a/src/couch_quickjs/quickjs/libregexp-opcode.h b/src/couch_quickjs/quickjs/libregexp-opcode.h index ebab751df..9908cf373 100644 --- a/src/couch_quickjs/quickjs/libregexp-opcode.h +++ b/src/couch_quickjs/quickjs/libregexp-opcode.h @@ -39,12 +39,13 @@ DEF(goto, 5) DEF(split_goto_first, 5) DEF(split_next_first, 5) DEF(match, 1) +DEF(lookahead_match, 1) +DEF(negative_lookahead_match, 1) /* must come after */ DEF(save_start, 2) /* save start position */ DEF(save_end, 2) /* save end position, must come after saved_start */ DEF(save_reset, 3) /* reset save positions */ -DEF(loop, 5) /* decrement the top the stack and goto if != 0 */ -DEF(push_i32, 5) /* push integer on the stack */ -DEF(drop, 1) +DEF(loop, 6) /* decrement the top the stack and goto if != 0 */ +DEF(push_i32, 6) /* push integer on the stack */ DEF(word_boundary, 1) DEF(word_boundary_i, 1) DEF(not_word_boundary, 1) @@ -58,10 +59,9 @@ DEF(range_i, 3) /* variable length */ DEF(range32, 3) /* variable length */ DEF(range32_i, 3) /* variable length */ DEF(lookahead, 5) -DEF(negative_lookahead, 5) -DEF(push_char_pos, 1) /* push the character position on the stack */ -DEF(check_advance, 1) /* pop one stack element and check that it is different from the character position */ +DEF(negative_lookahead, 5) /* must come after */ +DEF(push_char_pos, 2) /* push the character position on the stack */ +DEF(check_advance, 2) /* pop one stack element and check that it is different from the character position */ DEF(prev, 1) /* go to the previous char */ -DEF(simple_greedy_quant, 17) #endif /* DEF */ diff --git a/src/couch_quickjs/quickjs/libregexp.c b/src/couch_quickjs/quickjs/libregexp.c index 118d950eb..28f407b72 100644 --- a/src/couch_quickjs/quickjs/libregexp.c +++ b/src/couch_quickjs/quickjs/libregexp.c @@ -41,9 +41,11 @@ model. */ -#if defined(TEST) +#if defined(TEST) #define DUMP_REOP #endif +//#define DUMP_REOP +//#define DUMP_EXEC typedef enum { #define DEF(id, size) REOP_ ## id, @@ -459,14 +461,14 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf, int buf_len) { int pos, len, opcode, bc_len, re_flags, i; - uint32_t val; + uint32_t val, val2; assert(buf_len >= RE_HEADER_LEN); re_flags = lre_get_flags(buf); bc_len = get_u32(buf + RE_HEADER_BYTECODE_LEN); assert(bc_len + RE_HEADER_LEN <= buf_len); - printf("flags: 0x%x capture_count=%d stack_size=%d\n", + printf("flags: 0x%x capture_count=%d aux_stack_size=%d\n", re_flags, buf[RE_HEADER_CAPTURE_COUNT], buf[RE_HEADER_STACK_SIZE]); if (re_flags & LRE_FLAG_NAMED_GROUPS) { const char *p; @@ -518,19 +520,17 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf, case REOP_goto: case REOP_split_goto_first: case REOP_split_next_first: - case REOP_loop: case REOP_lookahead: case REOP_negative_lookahead: val = get_u32(buf + pos + 1); val += (pos + 5); printf(" %u", val); break; - case REOP_simple_greedy_quant: - printf(" %u %u %u %u", - get_u32(buf + pos + 1) + (pos + 17), - get_u32(buf + pos + 1 + 4), - get_u32(buf + pos + 1 + 8), - get_u32(buf + pos + 1 + 12)); + case REOP_loop: + val2 = buf[pos + 1]; + val = get_u32(buf + pos + 2); + val += (pos + 6); + printf(" %u, %u", val2, val); break; case REOP_save_start: case REOP_save_end: @@ -544,8 +544,14 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf, printf(" %u %u", buf[pos + 1], buf[pos + 2]); break; case REOP_push_i32: - val = get_u32(buf + pos + 1); - printf(" %d", val); + val = buf[pos + 1]; + val2 = get_u32(buf + pos + 2); + printf(" %u, %d", val, val2); + break; + case REOP_push_char_pos: + case REOP_check_advance: + val = buf[pos + 1]; + printf(" %u", val); break; case REOP_range: case REOP_range_i: @@ -604,6 +610,16 @@ static int re_emit_goto(REParseState *s, int op, uint32_t val) return pos; } +static int re_emit_goto_u8(REParseState *s, int op, uint32_t arg, uint32_t val) +{ + int pos; + dbuf_putc(&s->byte_code, op); + dbuf_putc(&s->byte_code, arg); + pos = s->byte_code.size; + dbuf_put_u32(&s->byte_code, val - (pos + 4)); + return pos; +} + static void re_emit_op_u8(REParseState *s, int op, uint32_t val) { dbuf_putc(&s->byte_code, op); @@ -1532,7 +1548,6 @@ static BOOL re_need_check_advance(const uint8_t *bc_buf, int bc_buf_len) case REOP_line_end_m: case REOP_push_i32: case REOP_push_char_pos: - case REOP_drop: case REOP_word_boundary: case REOP_word_boundary_i: case REOP_not_word_boundary: @@ -1557,55 +1572,6 @@ static BOOL re_need_check_advance(const uint8_t *bc_buf, int bc_buf_len) return ret; } -/* return -1 if a simple quantifier cannot be used. Otherwise return - the number of characters in the atom. */ -static int re_is_simple_quantifier(const uint8_t *bc_buf, int bc_buf_len) -{ - int pos, opcode, len, count; - uint32_t val; - - count = 0; - pos = 0; - while (pos < bc_buf_len) { - opcode = bc_buf[pos]; - len = reopcode_info[opcode].size; - switch(opcode) { - case REOP_range: - case REOP_range_i: - val = get_u16(bc_buf + pos + 1); - len += val * 4; - goto simple_char; - case REOP_range32: - case REOP_range32_i: - val = get_u16(bc_buf + pos + 1); - len += val * 8; - goto simple_char; - case REOP_char: - case REOP_char_i: - case REOP_char32: - case REOP_char32_i: - case REOP_dot: - case REOP_any: - simple_char: - count++; - break; - case REOP_line_start: - case REOP_line_start_m: - case REOP_line_end: - case REOP_line_end_m: - case REOP_word_boundary: - case REOP_word_boundary_i: - case REOP_not_word_boundary: - case REOP_not_word_boundary_i: - break; - default: - return -1; - } - pos += len; - } - return count; -} - /* '*pp' is the first char after '<' */ static int re_parse_group_name(char *buf, int buf_size, const uint8_t **pp) { @@ -1921,7 +1887,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) p = s->buf_ptr; if (re_parse_expect(s, &p, ')')) return -1; - re_emit_op(s, REOP_match); + re_emit_op(s, REOP_lookahead_match + is_neg); /* jump after the 'match' after the lookahead is successful */ if (dbuf_error(&s->byte_code)) return -1; @@ -2176,38 +2142,6 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) if (last_atom_start < 0) { return re_parse_error(s, "nothing to repeat"); } - if (greedy) { - int len, pos; - - if (quant_max > 0) { - /* specific optimization for simple quantifiers */ - if (dbuf_error(&s->byte_code)) - goto out_of_memory; - len = re_is_simple_quantifier(s->byte_code.buf + last_atom_start, - s->byte_code.size - last_atom_start); - if (len > 0) { - re_emit_op(s, REOP_match); - - if (dbuf_insert(&s->byte_code, last_atom_start, 17)) - goto out_of_memory; - pos = last_atom_start; - s->byte_code.buf[pos++] = REOP_simple_greedy_quant; - put_u32(&s->byte_code.buf[pos], - s->byte_code.size - last_atom_start - 17); - pos += 4; - put_u32(&s->byte_code.buf[pos], quant_min); - pos += 4; - put_u32(&s->byte_code.buf[pos], quant_max); - pos += 4; - put_u32(&s->byte_code.buf[pos], len); - pos += 4; - goto done; - } - } - - if (dbuf_error(&s->byte_code)) - goto out_of_memory; - } /* the spec tells that if there is no advance when running the atom after the first quant_min times, then there is no match. We remove this test when we @@ -2232,34 +2166,37 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) s->byte_code.size = last_atom_start; } else if (quant_max == 1 || quant_max == INT32_MAX) { BOOL has_goto = (quant_max == INT32_MAX); - if (dbuf_insert(&s->byte_code, last_atom_start, 5 + add_zero_advance_check)) + if (dbuf_insert(&s->byte_code, last_atom_start, 5 + add_zero_advance_check * 2)) goto out_of_memory; s->byte_code.buf[last_atom_start] = REOP_split_goto_first + greedy; put_u32(s->byte_code.buf + last_atom_start + 1, - len + 5 * has_goto + add_zero_advance_check * 2); + len + 5 * has_goto + add_zero_advance_check * 2 * 2); if (add_zero_advance_check) { s->byte_code.buf[last_atom_start + 1 + 4] = REOP_push_char_pos; - re_emit_op(s, REOP_check_advance); + s->byte_code.buf[last_atom_start + 1 + 4 + 1] = 0; + re_emit_op_u8(s, REOP_check_advance, 0); } if (has_goto) re_emit_goto(s, REOP_goto, last_atom_start); } else { - if (dbuf_insert(&s->byte_code, last_atom_start, 10 + add_zero_advance_check)) + if (dbuf_insert(&s->byte_code, last_atom_start, 11 + add_zero_advance_check * 2)) goto out_of_memory; pos = last_atom_start; s->byte_code.buf[pos++] = REOP_push_i32; + s->byte_code.buf[pos++] = 0; put_u32(s->byte_code.buf + pos, quant_max); pos += 4; + s->byte_code.buf[pos++] = REOP_split_goto_first + greedy; - put_u32(s->byte_code.buf + pos, len + 5 + add_zero_advance_check * 2); + put_u32(s->byte_code.buf + pos, len + 6 + add_zero_advance_check * 2 * 2); pos += 4; if (add_zero_advance_check) { s->byte_code.buf[pos++] = REOP_push_char_pos; - re_emit_op(s, REOP_check_advance); + s->byte_code.buf[pos++] = 0; + re_emit_op_u8(s, REOP_check_advance, 0); } - re_emit_goto(s, REOP_loop, last_atom_start + 5); - re_emit_op(s, REOP_drop); + re_emit_goto_u8(s, REOP_loop, 0, last_atom_start + 6); } } else if (quant_min == 1 && quant_max == INT32_MAX && !add_zero_advance_check) { @@ -2269,39 +2206,39 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) if (quant_min == 1) { /* nothing to add */ } else { - if (dbuf_insert(&s->byte_code, last_atom_start, 5)) + if (dbuf_insert(&s->byte_code, last_atom_start, 6)) goto out_of_memory; - s->byte_code.buf[last_atom_start] = REOP_push_i32; - put_u32(s->byte_code.buf + last_atom_start + 1, - quant_min); - last_atom_start += 5; - re_emit_goto(s, REOP_loop, last_atom_start); - re_emit_op(s, REOP_drop); + s->byte_code.buf[last_atom_start++] = REOP_push_i32; + s->byte_code.buf[last_atom_start++] = 0; + put_u32(s->byte_code.buf + last_atom_start, quant_min); + last_atom_start += 4; + re_emit_goto_u8(s, REOP_loop, 0, last_atom_start); } if (quant_max == INT32_MAX) { pos = s->byte_code.size; re_emit_op_u32(s, REOP_split_goto_first + greedy, - len + 5 + add_zero_advance_check * 2); + len + 5 + add_zero_advance_check * 2 * 2); if (add_zero_advance_check) - re_emit_op(s, REOP_push_char_pos); + re_emit_op_u8(s, REOP_push_char_pos, 0); /* copy the atom */ dbuf_put_self(&s->byte_code, last_atom_start, len); if (add_zero_advance_check) - re_emit_op(s, REOP_check_advance); + re_emit_op_u8(s, REOP_check_advance, 0); re_emit_goto(s, REOP_goto, pos); } else if (quant_max > quant_min) { - re_emit_op_u32(s, REOP_push_i32, quant_max - quant_min); + re_emit_op_u8(s, REOP_push_i32, 0); + dbuf_put_u32(&s->byte_code, quant_max - quant_min); + pos = s->byte_code.size; re_emit_op_u32(s, REOP_split_goto_first + greedy, - len + 5 + add_zero_advance_check * 2); + len + 6 + add_zero_advance_check * 2 * 2); if (add_zero_advance_check) - re_emit_op(s, REOP_push_char_pos); + re_emit_op_u8(s, REOP_push_char_pos, 0); /* copy the atom */ dbuf_put_self(&s->byte_code, last_atom_start, len); if (add_zero_advance_check) - re_emit_op(s, REOP_check_advance); - re_emit_goto(s, REOP_loop, pos); - re_emit_op(s, REOP_drop); + re_emit_op_u8(s, REOP_check_advance, 0); + re_emit_goto_u8(s, REOP_loop, 0, pos); } } last_atom_start = -1; @@ -2311,7 +2248,6 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) break; } } - done: s->buf_ptr = p; return 0; out_of_memory: @@ -2386,8 +2322,9 @@ static int re_parse_disjunction(REParseState *s, BOOL is_backward_dir) return 0; } -/* the control flow is recursive so the analysis can be linear */ -static int compute_stack_size(const uint8_t *bc_buf, int bc_buf_len) +/* the control flow is recursive so the analysis can be linear. As a + side effect, the auxiliary stack addresses are computed. */ +static int compute_stack_size(uint8_t *bc_buf, int bc_buf_len) { int stack_size, stack_size_max, pos, opcode, len; uint32_t val; @@ -2405,6 +2342,7 @@ static int compute_stack_size(const uint8_t *bc_buf, int bc_buf_len) switch(opcode) { case REOP_push_i32: case REOP_push_char_pos: + bc_buf[pos + 1] = stack_size; stack_size++; if (stack_size > stack_size_max) { if (stack_size > STACK_SIZE_MAX) @@ -2412,10 +2350,11 @@ static int compute_stack_size(const uint8_t *bc_buf, int bc_buf_len) stack_size_max = stack_size; } break; - case REOP_drop: case REOP_check_advance: + case REOP_loop: assert(stack_size > 0); stack_size--; + bc_buf[pos + 1] = stack_size; break; case REOP_range: case REOP_range_i: @@ -2638,23 +2577,26 @@ static BOOL is_word_char(uint32_t c) } \ } while (0) -typedef uintptr_t StackInt; - typedef enum { RE_EXEC_STATE_SPLIT, RE_EXEC_STATE_LOOKAHEAD, RE_EXEC_STATE_NEGATIVE_LOOKAHEAD, - RE_EXEC_STATE_GREEDY_QUANT, } REExecStateEnum; -typedef struct REExecState { - REExecStateEnum type : 8; - uint8_t stack_len; - size_t count; /* only used for RE_EXEC_STATE_GREEDY_QUANT */ - const uint8_t *cptr; - const uint8_t *pc; - void *buf[0]; -} REExecState; +#if INTPTR_MAX >= INT64_MAX +#define BP_TYPE_BITS 3 +#else +#define BP_TYPE_BITS 2 +#endif + +typedef union { + uint8_t *ptr; + intptr_t val; /* for bp, the low BP_SHIFT bits store REExecStateEnum */ + struct { + uintptr_t val : sizeof(uintptr_t) * 8 - BP_TYPE_BITS; + uintptr_t type : BP_TYPE_BITS; + } bp; +} StackElem; typedef struct { const uint8_t *cbuf; @@ -2667,50 +2609,11 @@ typedef struct { int interrupt_counter; void *opaque; /* used for stack overflow check */ - size_t state_size; - uint8_t *state_stack; - size_t state_stack_size; - size_t state_stack_len; + StackElem *stack_buf; + size_t stack_size; + StackElem static_stack_buf[32]; /* static stack to avoid allocation in most cases */ } REExecContext; -static int push_state(REExecContext *s, - uint8_t **capture, - StackInt *stack, size_t stack_len, - const uint8_t *pc, const uint8_t *cptr, - REExecStateEnum type, size_t count) -{ - REExecState *rs; - uint8_t *new_stack; - size_t new_size, i, n; - StackInt *stack_buf; - - if (unlikely((s->state_stack_len + 1) > s->state_stack_size)) { - /* reallocate the stack */ - new_size = s->state_stack_size * 3 / 2; - if (new_size < 8) - new_size = 8; - new_stack = lre_realloc(s->opaque, s->state_stack, new_size * s->state_size); - if (!new_stack) - return -1; - s->state_stack_size = new_size; - s->state_stack = new_stack; - } - rs = (REExecState *)(s->state_stack + s->state_stack_len * s->state_size); - s->state_stack_len++; - rs->type = type; - rs->count = count; - rs->stack_len = stack_len; - rs->cptr = cptr; - rs->pc = pc; - n = 2 * s->capture_count; - for(i = 0; i < n; i++) - rs->buf[i] = capture[i]; - stack_buf = (StackInt *)(rs->buf + n); - for(i = 0; i < stack_len; i++) - stack_buf[i] = stack[i]; - return 0; -} - static int lre_poll_timeout(REExecContext *s) { if (unlikely(--s->interrupt_counter <= 0)) { @@ -2721,95 +2624,188 @@ static int lre_poll_timeout(REExecContext *s) return 0; } +static no_inline int stack_realloc(REExecContext *s, size_t n) +{ + StackElem *new_stack; + size_t new_size; + new_size = s->stack_size * 3 / 2; + if (new_size < n) + new_size = n; + if (s->stack_buf == s->static_stack_buf) { + new_stack = lre_realloc(s->opaque, NULL, new_size * sizeof(StackElem)); + if (!new_stack) + return -1; + /* XXX: could use correct size */ + memcpy(new_stack, s->stack_buf, s->stack_size * sizeof(StackElem)); + } else { + new_stack = lre_realloc(s->opaque, s->stack_buf, new_size * sizeof(StackElem)); + if (!new_stack) + return -1; + } + s->stack_size = new_size; + s->stack_buf = new_stack; + return 0; +} + /* return 1 if match, 0 if not match or < 0 if error. */ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, - StackInt *stack, int stack_len, - const uint8_t *pc, const uint8_t *cptr, - BOOL no_recurse) + uint8_t **aux_stack, const uint8_t *pc, const uint8_t *cptr) { - int opcode, ret; + int opcode; int cbuf_type; - uint32_t val, c; + uint32_t val, c, idx; const uint8_t *cbuf_end; - + StackElem *sp, *bp, *stack_end; +#ifdef DUMP_EXEC + const uint8_t *pc_start = pc; /* TEST */ +#endif cbuf_type = s->cbuf_type; cbuf_end = s->cbuf_end; + sp = s->stack_buf; + bp = s->stack_buf; + stack_end = s->stack_buf + s->stack_size; + +#define CHECK_STACK_SPACE(n) \ + if (unlikely((stack_end - sp) < (n))) { \ + size_t saved_sp = sp - s->stack_buf; \ + size_t saved_bp = bp - s->stack_buf; \ + if (stack_realloc(s, sp - s->stack_buf + (n))) \ + return LRE_RET_MEMORY_ERROR; \ + stack_end = s->stack_buf + s->stack_size; \ + sp = s->stack_buf + saved_sp; \ + bp = s->stack_buf + saved_bp; \ + } + + /* XXX: could test if the value was saved to reduce the stack size + but slower */ +#define SAVE_CAPTURE(idx, value) \ + { \ + CHECK_STACK_SPACE(2); \ + sp[0].val = idx; \ + sp[1].ptr = capture[idx]; \ + sp += 2; \ + capture[idx] = (value); \ + } + + /* avoid saving the previous value if already saved */ +#define SAVE_AUX_STACK(idx, value) \ + { \ + StackElem *sp1; \ + sp1 = sp; \ + for(;;) { \ + if (sp1 > bp) { \ + if (sp1[-2].val == -(int)(idx + 1)) \ + break; \ + sp1 -= 2; \ + } else { \ + CHECK_STACK_SPACE(2); \ + sp[0].val = -(int)(idx + 1); \ + sp[1].ptr = aux_stack[idx]; \ + sp += 2; \ + break; \ + } \ + } \ + aux_stack[idx] = (value); \ + } + + +#ifdef DUMP_EXEC + printf("%5s %5s %5s %5s %s\n", "PC", "CP", "BP", "SP", "OPCODE"); +#endif for(;;) { - // printf("top=%p: pc=%d\n", th_list.top, (int)(pc - (bc_buf + RE_HEADER_LEN))); opcode = *pc++; +#ifdef DUMP_EXEC + printf("%5ld %5ld %5ld %5ld %s\n", + pc - 1 - pc_start, + cbuf_type == 0 ? cptr - s->cbuf : (cptr - s->cbuf) / 2, + bp - s->stack_buf, + sp - s->stack_buf, + reopcode_info[opcode].name); +#endif switch(opcode) { case REOP_match: - { - REExecState *rs; - if (no_recurse) - return (intptr_t)cptr; - ret = 1; - goto recurse; - no_match: - if (no_recurse) + return 1; + no_match: + for(;;) { + REExecStateEnum type; + if (bp == s->stack_buf) return 0; - ret = 0; - recurse: + /* undo the modifications to capture[] and aux_stack[] */ + while (sp > bp) { + intptr_t idx2 = sp[-2].val; + if (idx2 >= 0) + capture[idx2] = sp[-1].ptr; + else + aux_stack[-idx2 - 1] = sp[-1].ptr; + sp -= 2; + } + + pc = sp[-3].ptr; + cptr = sp[-2].ptr; + type = sp[-1].bp.type; + bp = s->stack_buf + sp[-1].bp.val; + sp -= 3; + if (type != RE_EXEC_STATE_LOOKAHEAD) + break; + } + break; + case REOP_lookahead_match: + /* pop all the saved states until reaching the start of + the lookahead and keep the updated captures and + variables and the corresponding undo info. */ + { + StackElem *sp1, *sp_top, *next_sp; + REExecStateEnum type; + + sp_top = sp; for(;;) { - if (lre_poll_timeout(s)) - return LRE_RET_TIMEOUT; - if (s->state_stack_len == 0) - return ret; - rs = (REExecState *)(s->state_stack + - (s->state_stack_len - 1) * s->state_size); - if (rs->type == RE_EXEC_STATE_SPLIT) { - if (!ret) { - pop_state: - memcpy(capture, rs->buf, - sizeof(capture[0]) * 2 * s->capture_count); - pop_state1: - pc = rs->pc; - cptr = rs->cptr; - stack_len = rs->stack_len; - memcpy(stack, rs->buf + 2 * s->capture_count, - stack_len * sizeof(stack[0])); - s->state_stack_len--; - break; - } - } else if (rs->type == RE_EXEC_STATE_GREEDY_QUANT) { - if (!ret) { - uint32_t char_count, i; - memcpy(capture, rs->buf, - sizeof(capture[0]) * 2 * s->capture_count); - stack_len = rs->stack_len; - memcpy(stack, rs->buf + 2 * s->capture_count, - stack_len * sizeof(stack[0])); - pc = rs->pc; - cptr = rs->cptr; - /* go backward */ - char_count = get_u32(pc + 12); - for(i = 0; i < char_count; i++) { - PREV_CHAR(cptr, s->cbuf, cbuf_type); - } - pc = (pc + 16) + (int)get_u32(pc); - rs->cptr = cptr; - rs->count--; - if (rs->count == 0) { - s->state_stack_len--; - } - break; - } - } else { - ret = ((rs->type == RE_EXEC_STATE_LOOKAHEAD && ret) || - (rs->type == RE_EXEC_STATE_NEGATIVE_LOOKAHEAD && !ret)); - if (ret) { - /* keep the capture in case of positive lookahead */ - if (rs->type == RE_EXEC_STATE_LOOKAHEAD) - goto pop_state1; - else - goto pop_state; - } + sp1 = sp; + sp = bp; + pc = sp[-3].ptr; + cptr = sp[-2].ptr; + type = sp[-1].bp.type; + bp = s->stack_buf + sp[-1].bp.val; + sp[-1].ptr = (void *)sp1; /* save the next value for the copy step */ + sp -= 3; + if (type == RE_EXEC_STATE_LOOKAHEAD) + break; + } + if (sp != s->stack_buf) { + /* keep the undo info if there is a saved state */ + sp1 = sp; + while (sp1 < sp_top) { + next_sp = (void *)sp1[2].ptr; + sp1 += 3; + while (sp1 < next_sp) + *sp++ = *sp1++; } - s->state_stack_len--; } } break; + case REOP_negative_lookahead_match: + /* pop all the saved states until reaching start of the negative lookahead */ + for(;;) { + REExecStateEnum type; + type = bp[-1].bp.type; + /* undo the modifications to capture[] and aux_stack[] */ + while (sp > bp) { + intptr_t idx2 = sp[-2].val; + if (idx2 >= 0) + capture[idx2] = sp[-1].ptr; + else + aux_stack[-idx2 - 1] = sp[-1].ptr; + sp -= 2; + } + pc = sp[-3].ptr; + cptr = sp[-2].ptr; + type = sp[-1].bp.type; + bp = s->stack_buf + sp[-1].bp.val; + sp -= 3; + if (type == RE_EXEC_STATE_NEGATIVE_LOOKAHEAD) + break; + } + goto no_match; case REOP_char32: case REOP_char32_i: val = get_u32(pc); @@ -2842,24 +2838,40 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, pc1 = pc; pc = pc + (int)val; } - ret = push_state(s, capture, stack, stack_len, - pc1, cptr, RE_EXEC_STATE_SPLIT, 0); - if (ret < 0) - return LRE_RET_MEMORY_ERROR; - break; + CHECK_STACK_SPACE(3); + sp[0].ptr = (uint8_t *)pc1; + sp[1].ptr = (uint8_t *)cptr; + sp[2].bp.val = bp - s->stack_buf; + sp[2].bp.type = RE_EXEC_STATE_SPLIT; + sp += 3; + bp = sp; } + break; case REOP_lookahead: case REOP_negative_lookahead: val = get_u32(pc); pc += 4; - ret = push_state(s, capture, stack, stack_len, - pc + (int)val, cptr, - RE_EXEC_STATE_LOOKAHEAD + opcode - REOP_lookahead, - 0); - if (ret < 0) - return LRE_RET_MEMORY_ERROR; + if (opcode == REOP_lookahead && bp != s->stack_buf && 0) { + int i; + /* save all the capture state so that they can be + restored in case of failure after the lookahead + matches */ + idx = 4 * s->capture_count; + CHECK_STACK_SPACE(idx); + for(i = 0; i < 2 * s->capture_count; i++) { + sp[0].val = i; + sp[1].ptr = capture[i]; + sp += 2; + } + } + CHECK_STACK_SPACE(3); + sp[0].ptr = (uint8_t *)(pc + (int)val); + sp[1].ptr = (uint8_t *)cptr; + sp[2].bp.val = bp - s->stack_buf; + sp[2].bp.type = RE_EXEC_STATE_LOOKAHEAD + opcode - REOP_lookahead; + sp += 3; + bp = sp; break; - case REOP_goto: val = get_u32(pc); pc += 4 + (int)val; @@ -2902,7 +2914,8 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, case REOP_save_end: val = *pc++; assert(val < s->capture_count); - capture[2 * val + opcode - REOP_save_start] = (uint8_t *)cptr; + idx = 2 * val + opcode - REOP_save_start; + SAVE_CAPTURE(idx, (uint8_t *)cptr); break; case REOP_save_reset: { @@ -2911,35 +2924,47 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, val2 = pc[1]; pc += 2; assert(val2 < s->capture_count); + CHECK_STACK_SPACE(2 * (val2 - val + 1)); while (val <= val2) { - capture[2 * val] = NULL; - capture[2 * val + 1] = NULL; + idx = 2 * val; + SAVE_CAPTURE(idx, NULL); + idx = 2 * val + 1; + SAVE_CAPTURE(idx, NULL); val++; } } break; case REOP_push_i32: - val = get_u32(pc); - pc += 4; - stack[stack_len++] = val; - break; - case REOP_drop: - stack_len--; + idx = pc[0]; + val = get_u32(pc + 1); + pc += 5; + SAVE_AUX_STACK(idx, (void *)(uintptr_t)val); break; case REOP_loop: - val = get_u32(pc); - pc += 4; - if (--stack[stack_len - 1] != 0) { - pc += (int)val; - if (lre_poll_timeout(s)) - return LRE_RET_TIMEOUT; + { + uint32_t val2; + idx = pc[0]; + val = get_u32(pc + 1); + pc += 5; + + val2 = (uintptr_t)aux_stack[idx] - 1; + SAVE_AUX_STACK(idx, (void *)(uintptr_t)val2); + if (val2 != 0) { + pc += (int)val; + if (lre_poll_timeout(s)) + return LRE_RET_TIMEOUT; + } } break; case REOP_push_char_pos: - stack[stack_len++] = (uintptr_t)cptr; + idx = pc[0]; + pc++; + SAVE_AUX_STACK(idx, (uint8_t *)cptr); break; case REOP_check_advance: - if (stack[--stack_len] == (uintptr_t)cptr) + idx = pc[0]; + pc++; + if (aux_stack[idx] == cptr) goto no_match; break; case REOP_word_boundary: @@ -3104,50 +3129,10 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, goto no_match; PREV_CHAR(cptr, s->cbuf, cbuf_type); break; - case REOP_simple_greedy_quant: - { - uint32_t next_pos, quant_min, quant_max; - size_t q; - intptr_t res; - const uint8_t *pc1; - - next_pos = get_u32(pc); - quant_min = get_u32(pc + 4); - quant_max = get_u32(pc + 8); - pc += 16; - pc1 = pc; - pc += (int)next_pos; - - q = 0; - for(;;) { - if (lre_poll_timeout(s)) - return LRE_RET_TIMEOUT; - res = lre_exec_backtrack(s, capture, stack, stack_len, - pc1, cptr, TRUE); - if (res == LRE_RET_MEMORY_ERROR || - res == LRE_RET_TIMEOUT) - return res; - if (!res) - break; - cptr = (uint8_t *)res; - q++; - if (q >= quant_max && quant_max != INT32_MAX) - break; - } - if (q < quant_min) - goto no_match; - if (q > quant_min) { - /* will examine all matches down to quant_min */ - ret = push_state(s, capture, stack, stack_len, - pc1 - 16, cptr, - RE_EXEC_STATE_GREEDY_QUANT, - q - quant_min); - if (ret < 0) - return LRE_RET_MEMORY_ERROR; - } - } - break; default: +#ifdef DUMP_EXEC + printf("unknown opcode pc=%ld\n", pc - 1 - pc_start); +#endif abort(); } } @@ -3161,8 +3146,8 @@ int lre_exec(uint8_t **capture, int cbuf_type, void *opaque) { REExecContext s_s, *s = &s_s; - int re_flags, i, alloca_size, ret; - StackInt *stack_buf; + int re_flags, i, ret; + uint8_t **aux_stack; const uint8_t *cptr; re_flags = lre_get_flags(bc_buf); @@ -3177,17 +3162,12 @@ int lre_exec(uint8_t **capture, s->interrupt_counter = INTERRUPT_COUNTER_INIT; s->opaque = opaque; - s->state_size = sizeof(REExecState) + - s->capture_count * sizeof(capture[0]) * 2 + - s->stack_size_max * sizeof(stack_buf[0]); - s->state_stack = NULL; - s->state_stack_len = 0; - s->state_stack_size = 0; + s->stack_buf = s->static_stack_buf; + s->stack_size = countof(s->static_stack_buf); for(i = 0; i < s->capture_count * 2; i++) capture[i] = NULL; - alloca_size = s->stack_size_max * sizeof(stack_buf[0]); - stack_buf = alloca(alloca_size); + aux_stack = alloca(s->stack_size_max * sizeof(aux_stack[0])); cptr = cbuf + (cindex << cbuf_type); if (0 < cindex && cindex < clen && s->cbuf_type == 2) { @@ -3197,9 +3177,10 @@ int lre_exec(uint8_t **capture, } } - ret = lre_exec_backtrack(s, capture, stack_buf, 0, bc_buf + RE_HEADER_LEN, - cptr, FALSE); - lre_realloc(s->opaque, s->state_stack, 0); + ret = lre_exec_backtrack(s, capture, aux_stack, bc_buf + RE_HEADER_LEN, + cptr); + if (s->stack_buf != s->static_stack_buf) + lre_realloc(s->opaque, s->stack_buf, 0); return ret; } diff --git a/src/couch_quickjs/quickjs/quickjs-libc.c b/src/couch_quickjs/quickjs/quickjs-libc.c index 54a7a15bd..c24b6d53e 100644 --- a/src/couch_quickjs/quickjs/quickjs-libc.c +++ b/src/couch_quickjs/quickjs/quickjs-libc.c @@ -3543,7 +3543,8 @@ static void js_free_port(JSRuntime *rt, JSWorkerMessageHandler *port) if (port) { js_free_message_pipe(port->recv_pipe); JS_FreeValueRT(rt, port->on_message_func); - list_del(&port->link); + if (port->link.prev) + list_del(&port->link); js_free_rt(rt, port); } } @@ -3559,9 +3560,22 @@ static void js_worker_finalizer(JSRuntime *rt, JSValue val) } } +static void js_worker_mark(JSRuntime *rt, JSValueConst val, + JS_MarkFunc *mark_func) +{ + JSWorkerData *worker = JS_GetOpaque(val, js_worker_class_id); + if (worker) { + JSWorkerMessageHandler *port = worker->msg_handler; + if (port) { + JS_MarkValue(rt, port->on_message_func, mark_func); + } + } +} + static JSClassDef js_worker_class = { "Worker", .finalizer = js_worker_finalizer, + .gc_mark = js_worker_mark, }; static void *worker_func(void *opaque) @@ -4139,9 +4153,15 @@ void js_std_free_handlers(JSRuntime *rt) } #ifdef USE_WORKER - /* XXX: free port_list ? */ js_free_message_pipe(ts->recv_pipe); js_free_message_pipe(ts->send_pipe); + + list_for_each_safe(el, el1, &ts->port_list) { + JSWorkerMessageHandler *port = list_entry(el, JSWorkerMessageHandler, link); + /* unlink the message ports. They are freed by the Worker object */ + port->link.prev = NULL; + port->link.next = NULL; + } #endif free(ts); diff --git a/src/couch_quickjs/quickjs/quickjs.c b/src/couch_quickjs/quickjs/quickjs.c index daf06b6e1..b84af4a27 100644 --- a/src/couch_quickjs/quickjs/quickjs.c +++ b/src/couch_quickjs/quickjs/quickjs.c @@ -127,7 +127,7 @@ enum { JS_CLASS_BOOLEAN, /* u.object_data */ JS_CLASS_SYMBOL, /* u.object_data */ JS_CLASS_ARGUMENTS, /* u.array | length */ - JS_CLASS_MAPPED_ARGUMENTS, /* | length */ + JS_CLASS_MAPPED_ARGUMENTS, /* u.array | length */ JS_CLASS_DATE, /* u.object_data */ JS_CLASS_MODULE_NS, JS_CLASS_C_FUNCTION, /* u.cfunc */ @@ -460,6 +460,10 @@ struct JSContext { uint8_t std_array_prototype; JSShape *array_shape; /* initial shape for Array objects */ + JSShape *arguments_shape; /* shape for arguments objects */ + JSShape *mapped_arguments_shape; /* shape for mapped arguments objects */ + JSShape *regexp_shape; /* shape for regexp objects */ + JSShape *regexp_result_shape; /* shape for regexp result objects */ JSValue *class_proto; JSValue function_proto; @@ -937,7 +941,7 @@ struct JSObject { uint8_t extensible : 1; uint8_t free_mark : 1; /* only used when freeing objects with cycles */ uint8_t is_exotic : 1; /* TRUE if object has exotic property handlers */ - uint8_t fast_array : 1; /* TRUE if u.array is used for get/put (for JS_CLASS_ARRAY, JS_CLASS_ARGUMENTS and typed arrays) */ + uint8_t fast_array : 1; /* TRUE if u.array is used for get/put (for JS_CLASS_ARRAY, JS_CLASS_ARGUMENTS, JS_CLASS_MAPPED_ARGUMENTS and typed arrays) */ uint8_t is_constructor : 1; /* TRUE if object is a constructor function */ uint8_t has_immutable_prototype : 1; /* cannot modify the prototype */ uint8_t tmp_mark : 1; /* used in JS_WriteObjectRec() */ @@ -986,13 +990,14 @@ struct JSObject { int16_t magic; } cfunc; /* array part for fast arrays and typed arrays */ - struct { /* JS_CLASS_ARRAY, JS_CLASS_ARGUMENTS, JS_CLASS_UINT8C_ARRAY..JS_CLASS_FLOAT64_ARRAY */ + struct { /* JS_CLASS_ARRAY, JS_CLASS_ARGUMENTS, JS_CLASS_MAPPED_ARGUMENTS, JS_CLASS_UINT8C_ARRAY..JS_CLASS_FLOAT64_ARRAY */ union { - uint32_t size; /* JS_CLASS_ARRAY, JS_CLASS_ARGUMENTS */ + uint32_t size; /* JS_CLASS_ARRAY */ struct JSTypedArray *typed_array; /* JS_CLASS_UINT8C_ARRAY..JS_CLASS_FLOAT64_ARRAY */ } u1; union { JSValue *values; /* JS_CLASS_ARRAY, JS_CLASS_ARGUMENTS */ + JSVarRef **var_refs; /* JS_CLASS_MAPPED_ARGUMENTS */ void *ptr; /* JS_CLASS_UINT8C_ARRAY..JS_CLASS_FLOAT64_ARRAY */ int8_t *int8_ptr; /* JS_CLASS_INT8_ARRAY */ uint8_t *uint8_ptr; /* JS_CLASS_UINT8_ARRAY, JS_CLASS_UINT8C_ARRAY */ @@ -1122,6 +1127,8 @@ static JSValue js_function_apply(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv, int magic); static void js_array_finalizer(JSRuntime *rt, JSValue val); static void js_array_mark(JSRuntime *rt, JSValueConst val, JS_MarkFunc *mark_func); +static void js_mapped_arguments_finalizer(JSRuntime *rt, JSValue val); +static void js_mapped_arguments_mark(JSRuntime *rt, JSValueConst val, JS_MarkFunc *mark_func); static void js_object_data_finalizer(JSRuntime *rt, JSValue val); static void js_object_data_mark(JSRuntime *rt, JSValueConst val, JS_MarkFunc *mark_func); static void js_c_function_finalizer(JSRuntime *rt, JSValue val); @@ -1190,8 +1197,7 @@ static int JS_ToUint8ClampFree(JSContext *ctx, int32_t *pres, JSValue val); static JSValue js_new_string8_len(JSContext *ctx, const char *buf, int len); static JSValue js_compile_regexp(JSContext *ctx, JSValueConst pattern, JSValueConst flags); -static JSValue js_regexp_set_internal(JSContext *ctx, JSValue obj, - JSValue pattern, JSValue bc); +static JSValue JS_NewRegexp(JSContext *ctx, JSValue pattern, JSValue bc); static void gc_decref(JSRuntime *rt); static int JS_NewClass1(JSRuntime *rt, JSClassID class_id, const JSClassDef *class_def, JSAtom name); @@ -1211,6 +1217,7 @@ static JSValue JS_ToObject(JSContext *ctx, JSValueConst val); static JSValue JS_ToObjectFree(JSContext *ctx, JSValue val); static JSProperty *add_property(JSContext *ctx, JSObject *p, JSAtom prop, int prop_flags); +static void free_property(JSRuntime *rt, JSProperty *pr, int prop_flags); static int JS_ToBigInt64Free(JSContext *ctx, int64_t *pres, JSValue val); JSValue JS_ThrowOutOfMemory(JSContext *ctx); static JSValue JS_ThrowTypeErrorRevokedProxy(JSContext *ctx); @@ -1550,7 +1557,7 @@ static JSClassShortDef const js_std_class_def[] = { { JS_ATOM_Boolean, js_object_data_finalizer, js_object_data_mark }, /* JS_CLASS_BOOLEAN */ { JS_ATOM_Symbol, js_object_data_finalizer, js_object_data_mark }, /* JS_CLASS_SYMBOL */ { JS_ATOM_Arguments, js_array_finalizer, js_array_mark }, /* JS_CLASS_ARGUMENTS */ - { JS_ATOM_Arguments, NULL, NULL }, /* JS_CLASS_MAPPED_ARGUMENTS */ + { JS_ATOM_Arguments, js_mapped_arguments_finalizer, js_mapped_arguments_mark }, /* JS_CLASS_MAPPED_ARGUMENTS */ { JS_ATOM_Date, js_object_data_finalizer, js_object_data_mark }, /* JS_CLASS_DATE */ { JS_ATOM_Object, NULL, NULL }, /* JS_CLASS_MODULE_NS */ { JS_ATOM_Function, js_c_function_finalizer, js_c_function_mark }, /* JS_CLASS_C_FUNCTION */ @@ -1676,6 +1683,7 @@ JSRuntime *JS_NewRuntime2(const JSMallocFunctions *mf, void *opaque) countof(js_std_class_def)) < 0) goto fail; rt->class_array[JS_CLASS_ARGUMENTS].exotic = &js_arguments_exotic_methods; + rt->class_array[JS_CLASS_MAPPED_ARGUMENTS].exotic = &js_arguments_exotic_methods; rt->class_array[JS_CLASS_STRING].exotic = &js_string_exotic_methods; rt->class_array[JS_CLASS_MODULE_NS].exotic = &js_module_ns_exotic_methods; @@ -2313,6 +2321,18 @@ static void JS_MarkContext(JSRuntime *rt, JSContext *ctx, if (ctx->array_shape) mark_func(rt, &ctx->array_shape->header); + + if (ctx->arguments_shape) + mark_func(rt, &ctx->arguments_shape->header); + + if (ctx->mapped_arguments_shape) + mark_func(rt, &ctx->mapped_arguments_shape->header); + + if (ctx->regexp_shape) + mark_func(rt, &ctx->regexp_shape->header); + + if (ctx->regexp_result_shape) + mark_func(rt, &ctx->regexp_result_shape->header); } void JS_FreeContext(JSContext *ctx) @@ -2376,6 +2396,10 @@ void JS_FreeContext(JSContext *ctx) JS_FreeValue(ctx, ctx->function_proto); js_free_shape_null(ctx->rt, ctx->array_shape); + js_free_shape_null(ctx->rt, ctx->arguments_shape); + js_free_shape_null(ctx->rt, ctx->mapped_arguments_shape); + js_free_shape_null(ctx->rt, ctx->regexp_shape); + js_free_shape_null(ctx->rt, ctx->regexp_result_shape); list_del(&ctx->link); remove_gc_object(&ctx->header); @@ -5169,10 +5193,14 @@ static __maybe_unused void JS_DumpShapes(JSRuntime *rt) printf("}\n"); } -static JSValue JS_NewObjectFromShape(JSContext *ctx, JSShape *sh, JSClassID class_id) +/* 'props[]' is used to initialized the object properties. The number + of elements depends on the shape. */ +static JSValue JS_NewObjectFromShape(JSContext *ctx, JSShape *sh, JSClassID class_id, + JSProperty *props) { JSObject *p; - + int i; + js_trigger_gc(ctx->rt, sizeof(JSObject)); p = js_malloc(ctx, sizeof(JSObject)); if (unlikely(!p)) @@ -5194,6 +5222,13 @@ static JSValue JS_NewObjectFromShape(JSContext *ctx, JSShape *sh, JSClassID clas if (unlikely(!p->prop)) { js_free(ctx, p); fail: + if (props) { + JSShapeProperty *prs = get_shape_prop(sh); + for(i = 0; i < sh->prop_count; i++) { + free_property(ctx->rt, &props[i], prs->flags); + prs++; + } + } js_free_shape(ctx->rt, sh); return JS_EXCEPTION; } @@ -5209,22 +5244,26 @@ static JSValue JS_NewObjectFromShape(JSContext *ctx, JSShape *sh, JSClassID clas p->u.array.u.values = NULL; p->u.array.count = 0; p->u.array.u1.size = 0; - /* the length property is always the first one */ - if (likely(sh == ctx->array_shape)) { - pr = &p->prop[0]; - } else { - /* only used for the first array */ - /* cannot fail */ - pr = add_property(ctx, p, JS_ATOM_length, - JS_PROP_WRITABLE | JS_PROP_LENGTH); + if (!props) { + /* XXX: remove */ + /* the length property is always the first one */ + if (likely(sh == ctx->array_shape)) { + pr = &p->prop[0]; + } else { + /* only used for the first array */ + /* cannot fail */ + pr = add_property(ctx, p, JS_ATOM_length, + JS_PROP_WRITABLE | JS_PROP_LENGTH); + } + pr->u.value = JS_NewInt32(ctx, 0); } - pr->u.value = JS_NewInt32(ctx, 0); } break; case JS_CLASS_C_FUNCTION: p->prop[0].u.value = JS_UNDEFINED; break; case JS_CLASS_ARGUMENTS: + case JS_CLASS_MAPPED_ARGUMENTS: case JS_CLASS_UINT8C_ARRAY: case JS_CLASS_INT8_ARRAY: case JS_CLASS_UINT8_ARRAY: @@ -5270,6 +5309,10 @@ static JSValue JS_NewObjectFromShape(JSContext *ctx, JSShape *sh, JSClassID clas } p->header.ref_count = 1; add_gc_object(ctx->rt, &p->header, JS_GC_OBJ_TYPE_JS_OBJECT); + if (props) { + for(i = 0; i < sh->prop_count; i++) + p->prop[i] = props[i]; + } return JS_MKPTR(JS_TAG_OBJECT, p); } @@ -5297,7 +5340,7 @@ JSValue JS_NewObjectProtoClass(JSContext *ctx, JSValueConst proto_val, if (!sh) return JS_EXCEPTION; } - return JS_NewObjectFromShape(ctx, sh, class_id); + return JS_NewObjectFromShape(ctx, sh, class_id, NULL); } /* WARNING: the shape is not hashed. It is used for objects where @@ -5320,7 +5363,7 @@ static JSValue JS_NewObjectProtoClassAlloc(JSContext *ctx, JSValueConst proto_va sh = js_new_shape_nohash(ctx, proto, hash_size, n_alloc_props); if (!sh) return JS_EXCEPTION; - return JS_NewObjectFromShape(ctx, sh, class_id); + return JS_NewObjectFromShape(ctx, sh, class_id, NULL); } #if 0 @@ -5383,7 +5426,7 @@ JSValue JS_NewObjectProto(JSContext *ctx, JSValueConst proto) JSValue JS_NewArray(JSContext *ctx) { return JS_NewObjectFromShape(ctx, js_dup_shape(ctx->array_shape), - JS_CLASS_ARRAY); + JS_CLASS_ARRAY, NULL); } JSValue JS_NewObject(JSContext *ctx) @@ -6582,6 +6625,20 @@ void JS_ComputeMemoryUsage(JSRuntime *rt, JSMemoryUsage *s) } } break; + case JS_CLASS_MAPPED_ARGUMENTS: /* u.array | length */ + if (p->fast_array) { + s->fast_array_count++; + if (p->u.array.u.values) { + s->memory_used_count++; + s->memory_used_size += p->u.array.count * + sizeof(*p->u.array.u.var_refs); + s->fast_array_elements += p->u.array.count; + for (i = 0; i < p->u.array.count; i++) { + compute_value_size(*p->u.array.u.var_refs[i]->pvalue, hp); + } + } + } + break; case JS_CLASS_NUMBER: /* u.object_data */ case JS_CLASS_STRING: /* u.object_data */ case JS_CLASS_BOOLEAN: /* u.object_data */ @@ -8579,6 +8636,9 @@ static JSValue JS_GetPropertyValue(JSContext *ctx, JSValueConst this_obj, case JS_CLASS_ARGUMENTS: if (unlikely(idx >= p->u.array.count)) goto slow_path; return JS_DupValue(ctx, p->u.array.u.values[idx]); + case JS_CLASS_MAPPED_ARGUMENTS: + if (unlikely(idx >= p->u.array.count)) goto slow_path; + return JS_DupValue(ctx, *p->u.array.u.var_refs[idx]->pvalue); case JS_CLASS_INT8_ARRAY: if (unlikely(idx >= p->u.array.count)) goto slow_path; return JS_NewInt32(ctx, p->u.array.u.int8_ptr[idx]); @@ -8757,14 +8817,14 @@ static JSProperty *add_property(JSContext *ctx, return &p->prop[p->shape->prop_count - 1]; } -/* can be called on Array or Arguments objects. return < 0 if - memory alloc error. */ +/* can be called on JS_CLASS_ARRAY, JS_CLASS_ARGUMENTS or + JS_CLASS_MAPPED_ARGUMENTS objects. return < 0 if memory alloc + error. */ static no_inline __exception int convert_fast_array_to_array(JSContext *ctx, JSObject *p) { JSProperty *pr; JSShape *sh; - JSValue *tab; uint32_t i, len, new_count; if (js_shape_prepare_update(ctx, p, NULL)) @@ -8778,12 +8838,22 @@ static no_inline __exception int convert_fast_array_to_array(JSContext *ctx, return -1; } - tab = p->u.array.u.values; - for(i = 0; i < len; i++) { - /* add_property cannot fail here but - __JS_AtomFromUInt32(i) fails for i > INT32_MAX */ - pr = add_property(ctx, p, __JS_AtomFromUInt32(i), JS_PROP_C_W_E); - pr->u.value = *tab++; + if (p->class_id == JS_CLASS_MAPPED_ARGUMENTS) { + JSVarRef **tab = p->u.array.u.var_refs; + for(i = 0; i < len; i++) { + /* add_property cannot fail here but + __JS_AtomFromUInt32(i) fails for i > INT32_MAX */ + pr = add_property(ctx, p, __JS_AtomFromUInt32(i), JS_PROP_C_W_E | JS_PROP_VARREF); + pr->u.var_ref = *tab++; + } + } else { + JSValue *tab = p->u.array.u.values; + for(i = 0; i < len; i++) { + /* add_property cannot fail here but + __JS_AtomFromUInt32(i) fails for i > INT32_MAX */ + pr = add_property(ctx, p, __JS_AtomFromUInt32(i), JS_PROP_C_W_E); + pr->u.value = *tab++; + } } js_free(ctx, p->u.array.u.values); p->u.array.count = 0; @@ -8887,10 +8957,15 @@ static int delete_property(JSContext *ctx, JSObject *p, JSAtom atom) if (JS_AtomIsArrayIndex(ctx, &idx, atom) && idx < p->u.array.count) { if (p->class_id == JS_CLASS_ARRAY || - p->class_id == JS_CLASS_ARGUMENTS) { + p->class_id == JS_CLASS_ARGUMENTS || + p->class_id == JS_CLASS_MAPPED_ARGUMENTS) { /* Special case deleting the last element of a fast Array */ if (idx == p->u.array.count - 1) { - JS_FreeValue(ctx, p->u.array.u.values[idx]); + if (p->class_id == JS_CLASS_MAPPED_ARGUMENTS) { + free_var_ref(ctx->rt, p->u.array.u.var_refs[idx]); + } else { + JS_FreeValue(ctx, p->u.array.u.values[idx]); + } p->u.array.count = idx; return TRUE; } @@ -9469,6 +9544,11 @@ static int JS_SetPropertyValue(JSContext *ctx, JSValueConst this_obj, goto slow_path; set_value(ctx, &p->u.array.u.values[idx], val); break; + case JS_CLASS_MAPPED_ARGUMENTS: + if (unlikely(idx >= (uint32_t)p->u.array.count)) + goto slow_path; + set_value(ctx, p->u.array.u.var_refs[idx]->pvalue, val); + break; case JS_CLASS_UINT8C_ARRAY: if (JS_ToUint8ClampFree(ctx, &v, val)) return -1; @@ -15666,23 +15746,21 @@ static const JSClassExoticMethods js_arguments_exotic_methods = { static JSValue js_build_arguments(JSContext *ctx, int argc, JSValueConst *argv) { JSValue val, *tab; - JSProperty *pr; + JSProperty props[3]; JSObject *p; int i; - val = JS_NewObjectProtoClass(ctx, ctx->class_proto[JS_CLASS_OBJECT], - JS_CLASS_ARGUMENTS); + props[0].u.value = JS_NewInt32(ctx, argc); /* length */ + props[1].u.value = JS_DupValue(ctx, ctx->array_proto_values); /* Symbol.iterator */ + props[2].u.getset.getter = JS_VALUE_GET_OBJ(JS_DupValue(ctx, ctx->throw_type_error)); /* callee */ + props[2].u.getset.setter = JS_VALUE_GET_OBJ(JS_DupValue(ctx, ctx->throw_type_error)); /* callee */ + + val = JS_NewObjectFromShape(ctx, js_dup_shape(ctx->arguments_shape), + JS_CLASS_ARGUMENTS, props); if (JS_IsException(val)) return val; p = JS_VALUE_GET_OBJ(val); - /* add the length field (cannot fail) */ - pr = add_property(ctx, p, JS_ATOM_length, - JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE); - if (unlikely(!pr)) - goto fail; - pr->u.value = JS_NewInt32(ctx, argc); - /* initialize the fast array part */ tab = NULL; if (argc > 0) { @@ -15695,14 +15773,6 @@ static JSValue js_build_arguments(JSContext *ctx, int argc, JSValueConst *argv) } p->u.array.u.values = tab; p->u.array.count = argc; - - JS_DefinePropertyValue(ctx, val, JS_ATOM_Symbol_iterator, - JS_DupValue(ctx, ctx->array_proto_values), - JS_PROP_CONFIGURABLE | JS_PROP_WRITABLE); - /* add callee property to throw a TypeError in strict mode */ - JS_DefineProperty(ctx, val, JS_ATOM_callee, JS_UNDEFINED, - ctx->throw_type_error, ctx->throw_type_error, - JS_PROP_HAS_GET | JS_PROP_HAS_SET); return val; fail: JS_FreeValue(ctx, val); @@ -15712,58 +15782,75 @@ static JSValue js_build_arguments(JSContext *ctx, int argc, JSValueConst *argv) #define GLOBAL_VAR_OFFSET 0x40000000 #define ARGUMENT_VAR_OFFSET 0x20000000 +static void js_mapped_arguments_finalizer(JSRuntime *rt, JSValue val) +{ + JSObject *p = JS_VALUE_GET_OBJ(val); + JSVarRef **var_refs = p->u.array.u.var_refs; + int i; + for(i = 0; i < p->u.array.count; i++) + free_var_ref(rt, var_refs[i]); + js_free_rt(rt, var_refs); +} + +static void js_mapped_arguments_mark(JSRuntime *rt, JSValueConst val, + JS_MarkFunc *mark_func) +{ + JSObject *p = JS_VALUE_GET_OBJ(val); + JSVarRef **var_refs = p->u.array.u.var_refs; + int i; + + for(i = 0; i < p->u.array.count; i++) + mark_func(rt, &var_refs[i]->header); +} + /* legacy arguments object: add references to the function arguments */ static JSValue js_build_mapped_arguments(JSContext *ctx, int argc, JSValueConst *argv, JSStackFrame *sf, int arg_count) { JSValue val; - JSProperty *pr; + JSProperty props[3]; + JSVarRef **tab, *var_ref; JSObject *p; - int i; + int i, j; - val = JS_NewObjectProtoClass(ctx, ctx->class_proto[JS_CLASS_OBJECT], - JS_CLASS_MAPPED_ARGUMENTS); + props[0].u.value = JS_NewInt32(ctx, argc); /* length */ + props[1].u.value = JS_DupValue(ctx, ctx->array_proto_values); /* Symbol.iterator */ + props[2].u.value = JS_DupValue(ctx, ctx->rt->current_stack_frame->cur_func); /* callee */ + + val = JS_NewObjectFromShape(ctx, js_dup_shape(ctx->mapped_arguments_shape), + JS_CLASS_MAPPED_ARGUMENTS, props); if (JS_IsException(val)) return val; p = JS_VALUE_GET_OBJ(val); - /* add the length field (cannot fail) */ - pr = add_property(ctx, p, JS_ATOM_length, - JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE); - if (unlikely(!pr)) - goto fail; - pr->u.value = JS_NewInt32(ctx, argc); - - for(i = 0; i < arg_count; i++) { - JSVarRef *var_ref; - var_ref = get_var_ref(ctx, sf, i, TRUE); - if (!var_ref) - goto fail; - pr = add_property(ctx, p, __JS_AtomFromUInt32(i), JS_PROP_C_W_E | JS_PROP_VARREF); - if (!pr) { - free_var_ref(ctx->rt, var_ref); + /* initialize the fast array part */ + tab = NULL; + if (argc > 0) { + tab = js_malloc(ctx, sizeof(tab[0]) * argc); + if (!tab) goto fail; + for(i = 0; i < arg_count; i++) { + var_ref = get_var_ref(ctx, sf, i, TRUE); + if (!var_ref) + goto fail1; + tab[i] = var_ref; + } + for(i = arg_count; i < argc; i++) { + var_ref = js_create_var_ref(ctx, FALSE); + if (!var_ref) { + fail1: + for(j = 0; j < i; j++) + free_var_ref(ctx->rt, tab[j]); + js_free(ctx, tab); + goto fail; + } + var_ref->value = JS_DupValue(ctx, argv[i]); + tab[i] = var_ref; } - pr->u.var_ref = var_ref; - } - - /* the arguments not mapped to the arguments of the function can - be normal properties */ - for(i = arg_count; i < argc; i++) { - if (JS_DefinePropertyValueUint32(ctx, val, i, - JS_DupValue(ctx, argv[i]), - JS_PROP_C_W_E) < 0) - goto fail; } - - JS_DefinePropertyValue(ctx, val, JS_ATOM_Symbol_iterator, - JS_DupValue(ctx, ctx->array_proto_values), - JS_PROP_CONFIGURABLE | JS_PROP_WRITABLE); - /* callee returns this function in non strict mode */ - JS_DefinePropertyValue(ctx, val, JS_ATOM_callee, - JS_DupValue(ctx, ctx->rt->current_stack_frame->cur_func), - JS_PROP_CONFIGURABLE | JS_PROP_WRITABLE); + p->u.array.u.var_refs = tab; + p->u.array.count = argc; return val; fail: JS_FreeValue(ctx, val); @@ -17933,14 +18020,10 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, CASE(OP_regexp): { - JSValue obj; - obj = JS_NewObjectClass(ctx, JS_CLASS_REGEXP); - if (JS_IsException(obj)) - goto exception; - sp[-2] = js_regexp_set_internal(ctx, obj, sp[-2], sp[-1]); - if (JS_IsException(sp[-2])) - goto exception; + sp[-2] = JS_NewRegexp(ctx, sp[-2], sp[-1]); sp--; + if (JS_IsException(sp[-1])) + goto exception; } BREAK; @@ -40484,11 +40567,17 @@ static JSValue *build_arg_list(JSContext *ctx, uint32_t *plen, if (!tab) return NULL; p = JS_VALUE_GET_OBJ(array_arg); - if ((p->class_id == JS_CLASS_ARRAY || p->class_id == JS_CLASS_ARGUMENTS) && + if ((p->class_id == JS_CLASS_ARRAY || p->class_id == JS_CLASS_ARGUMENTS || p->class_id == JS_CLASS_MAPPED_ARGUMENTS) && p->fast_array && len == p->u.array.count) { - for(i = 0; i < len; i++) { - tab[i] = JS_DupValue(ctx, p->u.array.u.values[i]); + if (p->class_id == JS_CLASS_MAPPED_ARGUMENTS) { + for(i = 0; i < len; i++) { + tab[i] = JS_DupValue(ctx, *p->u.array.u.var_refs[i]->pvalue); + } + } else { + for(i = 0; i < len; i++) { + tab[i] = JS_DupValue(ctx, p->u.array.u.values[i]); + } } } else { for(i = 0; i < len; i++) { @@ -45096,41 +45185,43 @@ static JSValue js_string_match(JSContext *ctx, JSValueConst this_val, return result; } -static JSValue js_string___GetSubstitution(JSContext *ctx, JSValueConst this_val, - int argc, JSValueConst *argv) +/* if captures != NULL, captures_val and matched are ignored. Otherwise, + captures_len is ignored */ +static int js_string_GetSubstitution(JSContext *ctx, + StringBuffer *b, + JSValueConst matched, + JSString *sp, + uint32_t position, + JSValueConst captures_val, + JSValueConst namedCaptures, + JSValueConst rep, + uint8_t **captures, + uint32_t captures_len) { - // GetSubstitution(matched, str, position, captures, namedCaptures, rep) - JSValueConst matched, str, captures, namedCaptures, rep; JSValue capture, name, s; - uint32_t position, len, matched_len, captures_len; - int i, j, j0, k, k1; + uint32_t len, matched_len; + int i, j, j0, k, k1, shift; int c, c1; - StringBuffer b_s, *b = &b_s; - JSString *sp, *rp; - - matched = argv[0]; - str = argv[1]; - captures = argv[3]; - namedCaptures = argv[4]; - rep = argv[5]; - - if (!JS_IsString(rep) || !JS_IsString(str)) - return JS_ThrowTypeError(ctx, "not a string"); + JSString *rp; - sp = JS_VALUE_GET_STRING(str); + if (JS_VALUE_GET_TAG(rep) != JS_TAG_STRING) { + JS_ThrowTypeError(ctx, "not a string"); + goto exception; + } + shift = sp->is_wide_char; rp = JS_VALUE_GET_STRING(rep); - string_buffer_init(ctx, b, 0); - - captures_len = 0; - if (!JS_IsUndefined(captures)) { - if (js_get_length32(ctx, &captures_len, captures)) + if (captures) { + matched_len = (captures[1] - captures[0]) >> shift; + } else { + captures_len = 0; + if (!JS_IsUndefined(captures_val)) { + if (js_get_length32(ctx, &captures_len, captures_val)) + goto exception; + } + if (js_get_length32(ctx, &matched_len, matched)) goto exception; } - if (js_get_length32(ctx, &matched_len, matched)) - goto exception; - if (JS_ToUint32(ctx, &position, argv[2]) < 0) - goto exception; len = rp->len; i = 0; @@ -45144,8 +45235,12 @@ static JSValue js_string___GetSubstitution(JSContext *ctx, JSValueConst this_val if (c == '$') { string_buffer_putc8(b, '$'); } else if (c == '&') { - if (string_buffer_concat_value(b, matched)) - goto exception; + if (captures) { + string_buffer_concat(b, sp, position, position + matched_len); + } else { + if (string_buffer_concat_value(b, matched)) + goto exception; + } } else if (c == '`') { string_buffer_concat(b, sp, 0, position); } else if (c == '\'') { @@ -45166,12 +45261,21 @@ static JSValue js_string___GetSubstitution(JSContext *ctx, JSValueConst this_val } } if (k >= 1 && k < captures_len) { - s = JS_GetPropertyInt64(ctx, captures, k); - if (JS_IsException(s)) - goto exception; - if (!JS_IsUndefined(s)) { - if (string_buffer_concat_value_free(b, s)) + if (captures) { + int start, end; + if (captures[2 * k] && captures[2 * k + 1]) { + start = (captures[2 * k] - sp->u.str8) >> shift; + end = (captures[2 * k + 1] - sp->u.str8) >> shift; + string_buffer_concat(b, sp, start, end); + } + } else { + s = JS_GetPropertyInt64(ctx, captures_val, k); + if (JS_IsException(s)) goto exception; + if (!JS_IsUndefined(s)) { + if (string_buffer_concat_value_free(b, s)) + goto exception; + } } } else { goto norep; @@ -45198,10 +45302,9 @@ static JSValue js_string___GetSubstitution(JSContext *ctx, JSValueConst this_val i = j; } string_buffer_concat(b, rp, i, rp->len); - return string_buffer_end(b); + return 0; exception: - string_buffer_free(b); - return JS_EXCEPTION; + return -1; } static JSValue js_string_replace(JSContext *ctx, JSValueConst this_val, @@ -45210,7 +45313,7 @@ static JSValue js_string_replace(JSContext *ctx, JSValueConst this_val, { // replace(rx, rep) JSValueConst O = this_val, searchValue = argv[0], replaceValue = argv[1]; - JSValueConst args[6]; + JSValueConst args[3]; JSValue str, search_str, replaceValue_str, repl_str; JSString *sp, *searchp; StringBuffer b_s, *b = &b_s; @@ -45279,25 +45382,25 @@ static JSValue js_string_replace(JSContext *ctx, JSValueConst this_val, break; } } + + string_buffer_concat(b, sp, endOfLastMatch, pos); + if (functionalReplace) { args[0] = search_str; args[1] = JS_NewInt32(ctx, pos); args[2] = str; repl_str = JS_ToStringFree(ctx, JS_Call(ctx, replaceValue, JS_UNDEFINED, 3, args)); + if (JS_IsException(repl_str)) + goto exception; + string_buffer_concat_value_free(b, repl_str); } else { - args[0] = search_str; - args[1] = str; - args[2] = JS_NewInt32(ctx, pos); - args[3] = JS_UNDEFINED; - args[4] = JS_UNDEFINED; - args[5] = replaceValue_str; - repl_str = js_string___GetSubstitution(ctx, JS_UNDEFINED, 6, args); - } - if (JS_IsException(repl_str)) - goto exception; + if (js_string_GetSubstitution(ctx, b, search_str, sp, pos, + JS_UNDEFINED, JS_UNDEFINED, replaceValue_str, + NULL, 0)) { + goto exception; + } + } - string_buffer_concat(b, sp, endOfLastMatch, pos); - string_buffer_concat_value_free(b, repl_str); endOfLastMatch = pos + searchp->len; is_first = FALSE; if (!is_replaceAll) @@ -45932,47 +46035,6 @@ static JSValue js_string_toString(JSContext *ctx, JSValueConst this_val, return js_thisStringValue(ctx, this_val); } -#if 0 -static JSValue js_string___toStringCheckObject(JSContext *ctx, JSValueConst this_val, - int argc, JSValueConst *argv) -{ - return JS_ToStringCheckObject(ctx, argv[0]); -} - -static JSValue js_string___toString(JSContext *ctx, JSValueConst this_val, - int argc, JSValueConst *argv) -{ - return JS_ToString(ctx, argv[0]); -} - -static JSValue js_string___advanceStringIndex(JSContext *ctx, JSValueConst - this_val, - int argc, JSValueConst *argv) -{ - JSValue str; - int idx; - BOOL is_unicode; - JSString *p; - - str = JS_ToString(ctx, argv[0]); - if (JS_IsException(str)) - return str; - if (JS_ToInt32Sat(ctx, &idx, argv[1])) { - JS_FreeValue(ctx, str); - return JS_EXCEPTION; - } - is_unicode = JS_ToBool(ctx, argv[2]); - p = JS_VALUE_GET_STRING(str); - if (!is_unicode || (unsigned)idx >= p->len || !p->is_wide_char) { - idx++; - } else { - string_getc(p, &idx); - } - JS_FreeValue(ctx, str); - return JS_NewInt32(ctx, idx); -} -#endif - /* String Iterator */ static JSValue js_string_iterator_next(JSContext *ctx, JSValueConst this_val, @@ -46086,11 +46148,6 @@ static const JSCFunctionListEntry js_string_funcs[] = { JS_CFUNC_DEF("fromCharCode", 1, js_string_fromCharCode ), JS_CFUNC_DEF("fromCodePoint", 1, js_string_fromCodePoint ), JS_CFUNC_DEF("raw", 1, js_string_raw ), - //JS_CFUNC_DEF("__toString", 1, js_string___toString ), - //JS_CFUNC_DEF("__isSpace", 1, js_string___isSpace ), - //JS_CFUNC_DEF("__toStringCheckObject", 1, js_string___toStringCheckObject ), - //JS_CFUNC_DEF("__advanceStringIndex", 3, js_string___advanceStringIndex ), - //JS_CFUNC_DEF("__GetSubstitution", 6, js_string___GetSubstitution ), }; static const JSCFunctionListEntry js_string_proto_funcs[] = { @@ -46886,6 +46943,35 @@ static JSValue js_compile_regexp(JSContext *ctx, JSValueConst pattern, return ret; } +/* fast regexp creation */ +static JSValue JS_NewRegexp(JSContext *ctx, JSValue pattern, JSValue bc) +{ + JSValue obj; + JSProperty props[1]; + JSObject *p; + JSRegExp *re; + + /* sanity check */ + if (unlikely(JS_VALUE_GET_TAG(bc) != JS_TAG_STRING || + JS_VALUE_GET_TAG(pattern) != JS_TAG_STRING)) { + JS_ThrowTypeError(ctx, "string expected"); + goto fail; + } + props[0].u.value = JS_NewInt32(ctx, 0); /* lastIndex */ + obj = JS_NewObjectFromShape(ctx, js_dup_shape(ctx->regexp_shape), JS_CLASS_REGEXP, props); + if (JS_IsException(obj)) + goto fail; + p = JS_VALUE_GET_OBJ(obj); + re = &p->u.regexp; + re->pattern = JS_VALUE_GET_STRING(pattern); + re->bytecode = JS_VALUE_GET_STRING(bc); + return obj; + fail: + JS_FreeValue(ctx, bc); + JS_FreeValue(ctx, pattern); + return JS_EXCEPTION; +} + /* set the RegExp fields */ static JSValue js_regexp_set_internal(JSContext *ctx, JSValue obj, @@ -47069,27 +47155,6 @@ static JSValue js_regexp_compile(JSContext *ctx, JSValueConst this_val, return JS_EXCEPTION; } -#if 0 -static JSValue js_regexp_get___source(JSContext *ctx, JSValueConst this_val) -{ - JSRegExp *re = js_get_regexp(ctx, this_val, TRUE); - if (!re) - return JS_EXCEPTION; - return JS_DupValue(ctx, JS_MKPTR(JS_TAG_STRING, re->pattern)); -} - -static JSValue js_regexp_get___flags(JSContext *ctx, JSValueConst this_val) -{ - JSRegExp *re = js_get_regexp(ctx, this_val, TRUE); - int flags; - - if (!re) - return JS_EXCEPTION; - flags = lre_get_flags(re->bytecode->u.str8); - return JS_NewInt32(ctx, flags); -} -#endif - static JSValue js_regexp_get_source(JSContext *ctx, JSValueConst this_val) { JSRegExp *re; @@ -47308,18 +47373,52 @@ static JSValue js_regexp_escape(JSContext *ctx, JSValueConst this_val, return string_buffer_end(b); } +/* this_val must be of JS_CLASS_REGEXP */ +static force_inline int js_regexp_get_lastIndex(JSContext *ctx, int64_t *plast_index, + JSValueConst this_val) +{ + JSObject *p = JS_VALUE_GET_OBJ(this_val); + + /* lastIndex is always the first property (it is not configurable) */ + if (likely(JS_VALUE_GET_TAG(p->prop[0].u.value) == JS_TAG_INT)) { + *plast_index = max_int(JS_VALUE_GET_INT(p->prop[0].u.value), 0); + return 0; + } else { + return JS_ToLengthFree(ctx, plast_index, JS_DupValue(ctx, p->prop[0].u.value)); + } +} + +/* this_val must be of JS_CLASS_REGEXP */ +static force_inline int js_regexp_set_lastIndex(JSContext *ctx, JSValueConst this_val, + int last_index) +{ + JSObject *p = JS_VALUE_GET_OBJ(this_val); + + /* lastIndex is always the first property (it is not configurable) */ + if (likely(JS_VALUE_GET_TAG(p->prop[0].u.value) == JS_TAG_INT && + (get_shape_prop(p->shape)->flags & JS_PROP_WRITABLE))) { + set_value(ctx, &p->prop[0].u.value, JS_NewInt32(ctx, last_index)); + } else { + if (JS_SetProperty(ctx, this_val, JS_ATOM_lastIndex, + JS_NewInt32(ctx, last_index)) < 0) + return -1; + } + return 0; +} + static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { JSRegExp *re = js_get_regexp(ctx, this_val, TRUE); JSString *str; - JSValue t, ret, str_val, obj, val, groups; + JSValue t, ret, str_val, obj, groups; JSValue indices, indices_groups; uint8_t *re_bytecode; uint8_t **capture, *str_buf; int rc, capture_count, shift, i, re_flags; int64_t last_index; const char *group_name_ptr; + JSObject *p_obj; if (!re) return JS_EXCEPTION; @@ -47335,8 +47434,7 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val, indices_groups = JS_UNDEFINED; capture = NULL; - val = JS_GetProperty(ctx, this_val, JS_ATOM_lastIndex); - if (JS_IsException(val) || JS_ToLengthFree(ctx, &last_index, val)) + if (js_regexp_get_lastIndex(ctx, &last_index, this_val)) goto fail; re_bytecode = re->bytecode->u.str8; @@ -47363,8 +47461,7 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val, if (rc != 1) { if (rc >= 0) { if (rc == 2 || (re_flags & (LRE_FLAG_GLOBAL | LRE_FLAG_STICKY))) { - if (JS_SetProperty(ctx, this_val, JS_ATOM_lastIndex, - JS_NewInt32(ctx, 0)) < 0) + if (js_regexp_set_lastIndex(ctx, this_val, 0) < 0) goto fail; } } else { @@ -47377,14 +47474,13 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val, } } else { int prop_flags; + JSProperty props[4]; + if (re_flags & (LRE_FLAG_GLOBAL | LRE_FLAG_STICKY)) { - if (JS_SetProperty(ctx, this_val, JS_ATOM_lastIndex, - JS_NewInt32(ctx, (capture[1] - str_buf) >> shift)) < 0) + if (js_regexp_set_lastIndex(ctx, this_val, + (capture[1] - str_buf) >> shift) < 0) goto fail; } - obj = JS_NewArray(ctx); - if (JS_IsException(obj)) - goto fail; prop_flags = JS_PROP_C_W_E | JS_PROP_THROW; group_name_ptr = lre_get_groupnames(re_bytecode); if (group_name_ptr) { @@ -47403,6 +47499,21 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val, } } + props[0].u.value = JS_NewInt32(ctx, capture_count); /* length */ + props[1].u.value = JS_NewInt32(ctx, (capture[0] - str_buf) >> shift); /* index */ + props[2].u.value = str_val; /* input */ + props[3].u.value = JS_DupValue(ctx, groups); /* groups */ + + str_val = JS_UNDEFINED; + obj = JS_NewObjectFromShape(ctx, js_dup_shape(ctx->regexp_result_shape), + JS_CLASS_ARRAY, props); + if (JS_IsException(obj)) + goto fail; + + p_obj = JS_VALUE_GET_OBJ(obj); + if (expand_fast_array(ctx, p_obj, capture_count)) + goto fail; + for(i = 0; i < capture_count; i++) { const char *name = NULL; uint8_t **match = &capture[2 * i]; @@ -47468,23 +47579,7 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val, goto fail; } } - - if (JS_DefinePropertyValueUint32(ctx, obj, i, val, prop_flags) < 0) - goto fail; - } - - t = JS_NewInt32(ctx, (capture[0] - str_buf) >> shift); - if (JS_DefinePropertyValue(ctx, obj, JS_ATOM_index, t, prop_flags) < 0) - goto fail; - - t = str_val, str_val = JS_UNDEFINED; - if (JS_DefinePropertyValue(ctx, obj, JS_ATOM_input, t, prop_flags) < 0) - goto fail; - - t = groups, groups = JS_UNDEFINED; - if (JS_DefinePropertyValue(ctx, obj, JS_ATOM_groups, - t, prop_flags) < 0) { - goto fail; + p_obj->u.array.u.values[p_obj->u.array.count++] = val; } if (!JS_IsUndefined(indices)) { @@ -47512,12 +47607,13 @@ fail: return ret; } -/* delete portions of a string that match a given regex */ -static JSValue JS_RegExpDelete(JSContext *ctx, JSValueConst this_val, JSValueConst arg) +/* XXX: add group names support */ +static JSValue js_regexp_replace(JSContext *ctx, JSValueConst this_val, JSValueConst arg, + JSValueConst rep_val) { JSRegExp *re = js_get_regexp(ctx, this_val, TRUE); JSString *str; - JSValue str_val, val; + JSValue str_val; uint8_t *re_bytecode; int ret; uint8_t **capture, *str_buf; @@ -47525,10 +47621,17 @@ static JSValue JS_RegExpDelete(JSContext *ctx, JSValueConst this_val, JSValueCon int next_src_pos, start, end; int64_t last_index; StringBuffer b_s, *b = &b_s; - + JSString *rp = JS_VALUE_GET_STRING(rep_val); + const char *group_name_ptr; + BOOL fullUnicode; + if (!re) return JS_EXCEPTION; - + re_bytecode = re->bytecode->u.str8; + group_name_ptr = lre_get_groupnames(re_bytecode); + if (group_name_ptr) + return JS_UNDEFINED; /* group names are not supported yet */ + string_buffer_init(ctx, b, 0); capture = NULL; @@ -47536,13 +47639,16 @@ static JSValue JS_RegExpDelete(JSContext *ctx, JSValueConst this_val, JSValueCon if (JS_IsException(str_val)) goto fail; str = JS_VALUE_GET_STRING(str_val); - re_bytecode = re->bytecode->u.str8; re_flags = lre_get_flags(re_bytecode); + + if (re_flags & LRE_FLAG_GLOBAL) { + if (js_regexp_set_lastIndex(ctx, this_val, 0)) + goto fail; + } if ((re_flags & (LRE_FLAG_GLOBAL | LRE_FLAG_STICKY)) == 0) { last_index = 0; } else { - val = JS_GetProperty(ctx, this_val, JS_ATOM_lastIndex); - if (JS_IsException(val) || JS_ToLengthFree(ctx, &last_index, val)) + if (js_regexp_get_lastIndex(ctx, &last_index, this_val)) goto fail; } capture_count = lre_get_capture_count(re_bytecode); @@ -47551,20 +47657,21 @@ static JSValue JS_RegExpDelete(JSContext *ctx, JSValueConst this_val, JSValueCon if (!capture) goto fail; } + fullUnicode = ((re_flags & (LRE_FLAG_UNICODE | LRE_FLAG_UNICODE_SETS)) != 0); shift = str->is_wide_char; str_buf = str->u.str8; next_src_pos = 0; for (;;) { - if (last_index > str->len) - break; - - ret = lre_exec(capture, re_bytecode, - str_buf, last_index, str->len, shift, ctx); + if (last_index > str->len) { + ret = 0; + } else { + ret = lre_exec(capture, re_bytecode, + str_buf, last_index, str->len, shift, ctx); + } if (ret != 1) { if (ret >= 0) { if (ret == 2 || (re_flags & (LRE_FLAG_GLOBAL | LRE_FLAG_STICKY))) { - if (JS_SetProperty(ctx, this_val, JS_ATOM_lastIndex, - JS_NewInt32(ctx, 0)) < 0) + if (js_regexp_set_lastIndex(ctx, this_val, 0) < 0) goto fail; } } else { @@ -47584,19 +47691,23 @@ static JSValue JS_RegExpDelete(JSContext *ctx, JSValueConst this_val, JSValueCon if (string_buffer_concat(b, str, next_src_pos, start)) goto fail; } + if (rp->len != 0) { + if (js_string_GetSubstitution(ctx, b, JS_UNDEFINED, str, start, + JS_UNDEFINED, JS_UNDEFINED, rep_val, + capture, capture_count)) { + goto fail; + } + } next_src_pos = end; if (!(re_flags & LRE_FLAG_GLOBAL)) { - if (JS_SetProperty(ctx, this_val, JS_ATOM_lastIndex, - JS_NewInt32(ctx, end)) < 0) - goto fail; + if (re_flags & LRE_FLAG_STICKY) { + if (js_regexp_set_lastIndex(ctx, this_val, end) < 0) + goto fail; + } break; } if (end == start) { - if (!(re_flags & LRE_FLAG_UNICODE) || (unsigned)end >= str->len || !str->is_wide_char) { - end++; - } else { - string_getc(str, &end); - } + end = string_advance_index(str, end, fullUnicode); } last_index = end; } @@ -47633,19 +47744,6 @@ static JSValue JS_RegExpExec(JSContext *ctx, JSValueConst r, JSValueConst s) return js_regexp_exec(ctx, r, 1, &s); } -#if 0 -static JSValue js_regexp___RegExpExec(JSContext *ctx, JSValueConst this_val, - int argc, JSValueConst *argv) -{ - return JS_RegExpExec(ctx, argv[0], argv[1]); -} -static JSValue js_regexp___RegExpDelete(JSContext *ctx, JSValueConst this_val, - int argc, JSValueConst *argv) -{ - return JS_RegExpDelete(ctx, argv[0], argv[1]); -} -#endif - static JSValue js_regexp_test(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { @@ -47954,26 +48052,78 @@ static int value_buffer_append(ValueBuffer *b, JSValue val) return 0; } -static int js_is_standard_regexp(JSContext *ctx, JSValueConst rx) +/* find in 'p' or its prototypes */ +static JSShapeProperty *find_property_regexp(JSProperty **ppr, + JSObject *p, JSAtom atom) { - JSValue val; - int res; + JSShapeProperty *prs; - val = JS_GetProperty(ctx, rx, JS_ATOM_constructor); - if (JS_IsException(val)) - return -1; - // rx.constructor === RegExp - res = js_same_value(ctx, val, ctx->regexp_ctor); - JS_FreeValue(ctx, val); - if (res) { - val = JS_GetProperty(ctx, rx, JS_ATOM_exec); - if (JS_IsException(val)) - return -1; - // rx.exec === RE_exec - res = JS_IsCFunction(ctx, val, js_regexp_exec, 0); - JS_FreeValue(ctx, val); + for(;;) { + prs = find_own_property(ppr, p, atom); + if (prs) + return prs; + p = p->shape->proto; + if (!p) + return NULL; + if (p->is_exotic) + return NULL; } - return res; +} + +static BOOL check_regexp_getter(JSContext *ctx, + JSObject *p, JSAtom atom, + JSCFunction *func, int magic) +{ + JSProperty *pr; + JSShapeProperty *prs; + + prs = find_property_regexp(&pr, p, atom); + if (!prs) + return FALSE; + if ((prs->flags & JS_PROP_TMASK) != JS_PROP_GETSET) + return FALSE; + return JS_IsCFunction(ctx, JS_MKPTR(JS_TAG_OBJECT, pr->u.getset.getter), + func, magic); +} + +static BOOL js_is_standard_regexp(JSContext *ctx, JSValueConst obj) +{ + JSObject *p; + JSProperty *pr; + JSShapeProperty *prs; + JSCFunctionType ft; + + if (JS_VALUE_GET_TAG(obj) != JS_TAG_OBJECT) + return FALSE; + p = JS_VALUE_GET_OBJ(obj); + if (p->class_id != JS_CLASS_REGEXP) + return FALSE; + /* check that the lastIndex is a number (no side effect while getting it) */ + prs = find_own_property(&pr, p, JS_ATOM_lastIndex); + if (!prs) + return FALSE; + if (!JS_IsNumber(pr->u.value)) + return FALSE; + + /* check the 'exec' method. */ + prs = find_property_regexp(&pr, p, JS_ATOM_exec); + if (!prs) + return FALSE; + if ((prs->flags & JS_PROP_TMASK) != JS_PROP_NORMAL) + return FALSE; + if (!JS_IsCFunction(ctx, pr->u.value, js_regexp_exec, 0)) + return FALSE; + /* check the flag getters */ + ft.getter = js_regexp_get_flags; + if (!check_regexp_getter(ctx, p, JS_ATOM_flags, ft.generic, 0)) + return FALSE; + ft.getter_magic = js_regexp_get_flag; + if (!check_regexp_getter(ctx, p, JS_ATOM_global, ft.generic, LRE_FLAG_GLOBAL)) + return FALSE; + if (!check_regexp_getter(ctx, p, JS_ATOM_unicode, ft.generic, LRE_FLAG_UNICODE)) + return FALSE; + /* XXX: need to check all accessors, need a faster way. */ + return TRUE; } static JSValue js_regexp_Symbol_replace(JSContext *ctx, JSValueConst this_val, @@ -47983,7 +48133,7 @@ static JSValue js_regexp_Symbol_replace(JSContext *ctx, JSValueConst this_val, JSValueConst rx = this_val, rep = argv[1]; JSValueConst args[6]; JSValue flags, str, rep_val, matched, tab, rep_str, namedCaptures, res; - JSString *p, *sp, *rp; + JSString *p, *sp; StringBuffer b_s, *b = &b_s; ValueBuffer v_b, *results = &v_b; int nextSourcePosition, n, j, functionalReplace, is_global, fullUnicode; @@ -48008,15 +48158,20 @@ static JSValue js_regexp_Symbol_replace(JSContext *ctx, JSValueConst this_val, goto exception; sp = JS_VALUE_GET_STRING(str); - rp = NULL; functionalReplace = JS_IsFunction(ctx, rep); if (!functionalReplace) { rep_val = JS_ToString(ctx, rep); if (JS_IsException(rep_val)) goto exception; - rp = JS_VALUE_GET_STRING(rep_val); } + if (!functionalReplace && js_is_standard_regexp(ctx, rx)) { + /* use faster version for simple cases */ + res = js_regexp_replace(ctx, rx, str, rep_val); + if (!JS_IsUndefined(res)) + goto done; + } + flags = JS_GetProperty(ctx, rx, JS_ATOM_flags); if (JS_IsException(flags)) goto exception; @@ -48034,11 +48189,6 @@ static JSValue js_regexp_Symbol_replace(JSContext *ctx, JSValueConst this_val, goto exception; } - if (rp && rp->len == 0 && is_global && js_is_standard_regexp(ctx, rx)) { - /* use faster version for simple cases */ - res = JS_RegExpDelete(ctx, rx, str); - goto done; - } for(;;) { JSValue result; result = JS_RegExpExec(ctx, rx, str); @@ -48122,6 +48272,9 @@ static JSValue js_regexp_Symbol_replace(JSContext *ctx, JSValueConst this_val, rep_str = JS_ToStringFree(ctx, js_function_apply(ctx, rep, 2, args, 0)); } else { JSValue namedCaptures1; + StringBuffer b1_s, *b1 = &b1_s; + int ret; + if (!JS_IsUndefined(namedCaptures)) { namedCaptures1 = JS_ToObject(ctx, namedCaptures); if (JS_IsException(namedCaptures1)) @@ -48129,15 +48282,16 @@ static JSValue js_regexp_Symbol_replace(JSContext *ctx, JSValueConst this_val, } else { namedCaptures1 = JS_UNDEFINED; } - args[0] = matched; - args[1] = str; - args[2] = JS_NewInt32(ctx, position); - args[3] = tab; - args[4] = namedCaptures1; - args[5] = rep_val; JS_FreeValue(ctx, rep_str); - rep_str = js_string___GetSubstitution(ctx, JS_UNDEFINED, 6, args); + + string_buffer_init(ctx, b1, 0); + ret = js_string_GetSubstitution(ctx, b1, matched, sp, position, + tab, namedCaptures1, rep_val, + NULL, 0); + rep_str = string_buffer_end(b1); JS_FreeValue(ctx, namedCaptures1); + if (ret) + goto exception; } if (JS_IsException(rep_str)) goto exception; @@ -48355,8 +48509,6 @@ done: static const JSCFunctionListEntry js_regexp_funcs[] = { JS_CFUNC_DEF("escape", 1, js_regexp_escape ), JS_CGETSET_DEF("[Symbol.species]", js_get_this, NULL ), - //JS_CFUNC_DEF("__RegExpExec", 2, js_regexp___RegExpExec ), - //JS_CFUNC_DEF("__RegExpDelete", 2, js_regexp___RegExpDelete ), }; static const JSCFunctionListEntry js_regexp_proto_funcs[] = { @@ -48379,8 +48531,6 @@ static const JSCFunctionListEntry js_regexp_proto_funcs[] = { JS_CFUNC_DEF("[Symbol.matchAll]", 1, js_regexp_Symbol_matchAll ), JS_CFUNC_DEF("[Symbol.search]", 1, js_regexp_Symbol_search ), JS_CFUNC_DEF("[Symbol.split]", 2, js_regexp_Symbol_split ), - //JS_CGETSET_DEF("__source", js_regexp_get___source, NULL ), - //JS_CGETSET_DEF("__flags", js_regexp_get___flags, NULL ), }; static const JSCFunctionListEntry js_regexp_string_iterator_proto_funcs[] = { @@ -48415,6 +48565,32 @@ int JS_AddIntrinsicRegExp(JSContext *ctx) countof(js_regexp_string_iterator_proto_funcs)); if (JS_IsException(ctx->class_proto[JS_CLASS_REGEXP_STRING_ITERATOR])) return -1; + + ctx->regexp_shape = js_new_shape2(ctx, get_proto_obj(ctx->class_proto[JS_CLASS_REGEXP]), + JS_PROP_INITIAL_HASH_SIZE, 1); + if (!ctx->regexp_shape) + return -1; + if (add_shape_property(ctx, &ctx->regexp_shape, NULL, + JS_ATOM_lastIndex, JS_PROP_WRITABLE)) + return -1; + + ctx->regexp_result_shape = js_new_shape2(ctx, get_proto_obj(ctx->class_proto[JS_CLASS_ARRAY]), + JS_PROP_INITIAL_HASH_SIZE, 4); + if (!ctx->regexp_result_shape) + return -1; + if (add_shape_property(ctx, &ctx->regexp_result_shape, NULL, + JS_ATOM_length, JS_PROP_WRITABLE | JS_PROP_LENGTH)) + return -1; + if (add_shape_property(ctx, &ctx->regexp_result_shape, NULL, + JS_ATOM_index, JS_PROP_C_W_E)) + return -1; + if (add_shape_property(ctx, &ctx->regexp_result_shape, NULL, + JS_ATOM_input, JS_PROP_C_W_E)) + return -1; + if (add_shape_property(ctx, &ctx->regexp_result_shape, NULL, + JS_ATOM_groups, JS_PROP_C_W_E)) + return -1; + return 0; } @@ -55237,6 +55413,34 @@ static int JS_AddIntrinsicBasicObjects(JSContext *ctx) JS_ATOM_length, JS_PROP_WRITABLE | JS_PROP_LENGTH)) return -1; ctx->std_array_prototype = TRUE; + + ctx->arguments_shape = js_new_shape2(ctx, get_proto_obj(ctx->class_proto[JS_CLASS_OBJECT]), + JS_PROP_INITIAL_HASH_SIZE, 3); + if (!ctx->arguments_shape) + return -1; + if (add_shape_property(ctx, &ctx->arguments_shape, NULL, + JS_ATOM_length, JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE)) + return -1; + if (add_shape_property(ctx, &ctx->arguments_shape, NULL, + JS_ATOM_Symbol_iterator, JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE)) + return -1; + if (add_shape_property(ctx, &ctx->arguments_shape, NULL, + JS_ATOM_callee, JS_PROP_GETSET)) + return -1; + + ctx->mapped_arguments_shape = js_new_shape2(ctx, get_proto_obj(ctx->class_proto[JS_CLASS_OBJECT]), + JS_PROP_INITIAL_HASH_SIZE, 3); + if (!ctx->mapped_arguments_shape) + return -1; + if (add_shape_property(ctx, &ctx->mapped_arguments_shape, NULL, + JS_ATOM_length, JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE)) + return -1; + if (add_shape_property(ctx, &ctx->mapped_arguments_shape, NULL, + JS_ATOM_Symbol_iterator, JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE)) + return -1; + if (add_shape_property(ctx, &ctx->mapped_arguments_shape, NULL, + JS_ATOM_callee, JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE)) + return -1; return 0; }
