Emscripten's Fiber coroutine implements coroutine switching using the stack unwinding and rewinding capabilities of Asyncify [1]. When a coroutine yields (i.e. switches out), Asyncify unwinds the stack, returning control to Emscripten's JS code (Fiber.trampoline()), which then performs stack rewinding to resume execution in the target coroutine. Stack unwinding is implemented by a sequence of immediate function returns, while rewinding works by re-entering the functions in the call stack, skipping any code between the top of the function and the original call position [2].
This commit modifies the Wasm TB modules to support Fiber coroutines. Assuming the TCG CPU loop is executed by only one coroutine per thread, a TB module must allow helper functions to unwind and be resumed via rewinding. Specifically: - When a helper returns due to an unwind, the module must immediately return to its caller, allowing unwinding to propagate. - When being called again for a rewind, the module must skip any code between the top of the function and the call position that triggered the unwind, and directly enter the helper. To support this: - TBs now check the Asyncify.state JS object after each helper call. If unwinding is in progress, the TB immediately returns control to the caller. - Each function call is preceded by a block boundary and an update of the BLOCK_PTR_IDX variable. This enables the TB to re-enter execution at the correct point during a rewind, skipping earlier blocks. Additionally, this commit introduces wasmContext.do_init which is a flag indicating whether the TB should reset the BLOCK_PTR_IDX variable to 0 (i.e. start from the beginning). In call_wasm_tb, this is always set (ctx.do_init = 1) to ensure normal TB execution begins at the first block. Once the TB resets the BLOCK_PTR_IDX variable, it also clears do_init. During a rewind, the C code does not set ctx.do_init to 1, allowing the TB to preserve the BLOCK_PTR_IDX value from the previous unwind and correctly resume execution from the last unwound block. [1] https://emscripten.org/docs/api_reference/fiber.h.html [2] https://kripken.github.io/blog/wasm/2019/07/16/asyncify.html#new-asyncify Signed-off-by: Kohei Tokunaga <ktokunaga.m...@gmail.com> --- tcg/wasm32.c | 3 ++ tcg/wasm32.h | 11 ++++++++ tcg/wasm32/tcg-target.c.inc | 56 +++++++++++++++++++++++++++++++++++-- 3 files changed, 68 insertions(+), 2 deletions(-) diff --git a/tcg/wasm32.c b/tcg/wasm32.c index f2269c0a22..e6a3dbf750 100644 --- a/tcg/wasm32.c +++ b/tcg/wasm32.c @@ -53,6 +53,9 @@ EM_JS(int, instantiate_wasm, (int wasm_begin, const memory_v = new DataView(HEAP8.buffer); const wasm = HEAP8.subarray(wasm_begin, wasm_begin + wasm_size); var helper = {}; + helper.u = () => { + return (Asyncify.state != Asyncify.State.Unwinding) ? 1 : 0; + }; for (var i = 0; i < import_vec_size / 4; i++) { helper[i] = wasmTable.get( memory_v.getInt32(import_vec_begin + i * 4, true)); diff --git a/tcg/wasm32.h b/tcg/wasm32.h index f8651af4ee..f2749f1e0e 100644 --- a/tcg/wasm32.h +++ b/tcg/wasm32.h @@ -32,11 +32,22 @@ struct wasmContext { * Pointer to a stack array. */ uint64_t *stack; + + /* + * Flag indicates whether to initialize basic registers(1) or not(0). + */ + uint32_t do_init; }; /* Instantiated Wasm function of a TB */ typedef int32_t (*wasm_tb_func)(struct wasmContext *); +static inline int32_t call_wasm_tb(wasm_tb_func f, struct wasmContext *ctx) +{ + ctx->do_init = 1; /* reset block index (rewinding will skip this) */ + return f(ctx); +} + /* * TB of wasm backend starts from a header which stores pointers for each data * stored in the following region in the TB. diff --git a/tcg/wasm32/tcg-target.c.inc b/tcg/wasm32/tcg-target.c.inc index d9a3abae70..04cd9b6e4a 100644 --- a/tcg/wasm32/tcg-target.c.inc +++ b/tcg/wasm32/tcg-target.c.inc @@ -135,7 +135,8 @@ static const uint8_t tcg_target_reg_index[TCG_TARGET_NB_REGS] = { #define TMP64_LOCAL_0_IDX 3 /* function index */ -#define HELPER_IDX_START 0 /* helper funcitons */ +#define CHECK_UNWINDING_IDX 0 /* a funtion of checking Asyncify status */ +#define HELPER_IDX_START 1 /* helper funcitons */ #define BUF_SIZE 1024 typedef struct LinkedBuf { @@ -1895,6 +1896,17 @@ static int get_helper_idx(TCGContext *s, int helper_idx_on_qemu) return -1; } +static void tcg_wasm_out_handle_unwinding(TCGContext *s) +{ + tcg_wasm_out_op_call(s, CHECK_UNWINDING_IDX); + tcg_wasm_out_op_i32_eqz(s); + tcg_wasm_out_op_if_noret(s); + tcg_wasm_out_op_i32_const(s, 0); + /* returns if unwinding */ + tcg_wasm_out_op_return(s); + tcg_wasm_out_op_end(s); +} + static void tcg_wasm_out_call(TCGContext *s, int func, const TCGHelperInfo *info) { @@ -1908,7 +1920,16 @@ static void tcg_wasm_out_call(TCGContext *s, int func, tcg_wasm_out_op_i32_const(s, (int32_t)s->code_ptr); tcg_wasm_out_op_i32_store(s, 0, 0); + /* + * update the block index so that the possible rewinding will + * skip this block + */ + tcg_wasm_out_op_i64_const(s, block_idx + 1); + tcg_wasm_out_op_global_set(s, BLOCK_PTR_IDX); + tcg_wasm_out_new_block(s); + gen_call(s, info, func_idx); + tcg_wasm_out_handle_unwinding(s); } static void gen_func_type_qemu_ld(TCGContext *s, uint32_t oi) @@ -1986,6 +2007,14 @@ static void tcg_wasm_out_qemu_ld(TCGContext *s, TCGReg data_reg, addr_reg = TCG_REG_TMP; } + /* + * update the block index so that the possible rewinding will + * skip this block + */ + tcg_wasm_out_op_i64_const(s, block_idx + 1); + tcg_wasm_out_op_global_set(s, BLOCK_PTR_IDX); + tcg_wasm_out_new_block(s); + /* call helper */ tcg_wasm_out_op_global_get_r(s, TCG_AREG0); tcg_wasm_out_op_i32_wrap_i64(s); @@ -1995,6 +2024,7 @@ static void tcg_wasm_out_qemu_ld(TCGContext *s, TCGReg data_reg, tcg_wasm_out_op_call(s, func_idx); tcg_wasm_out_op_global_set_r(s, data_reg); + tcg_wasm_out_handle_unwinding(s); } static void *qemu_st_helper_ptr(uint32_t oi) @@ -2034,6 +2064,14 @@ static void tcg_wasm_out_qemu_st(TCGContext *s, TCGReg data_reg, addr_reg = TCG_REG_TMP; } + /* + * update the block index so that the possible rewinding will + * skip this block + */ + tcg_wasm_out_op_i64_const(s, block_idx + 1); + tcg_wasm_out_op_global_set(s, BLOCK_PTR_IDX); + tcg_wasm_out_new_block(s); + /* call helper */ tcg_wasm_out_op_global_get_r(s, TCG_AREG0); tcg_wasm_out_op_i32_wrap_i64(s); @@ -2051,6 +2089,7 @@ static void tcg_wasm_out_qemu_st(TCGContext *s, TCGReg data_reg, tcg_wasm_out_op_i32_const(s, (int32_t)s->code_ptr); tcg_wasm_out_op_call(s, func_idx); + tcg_wasm_out_handle_unwinding(s); } static bool patch_reloc(tcg_insn_unit *code_ptr_i, int type, @@ -3420,6 +3459,9 @@ static const uint8_t mod_1[] = { 0x60, /* 0: Type of "start" function */ 0x01, 0x7f, /* arg: ctx pointer (i32) */ 0x01, 0x7f, /* return: res (i32) */ + 0x60, /* 1: Type of the asyncify helper */ + 0x0, /* no argument */ + 0x01, 0x7f, /* return: res (i32) */ }; static const uint8_t mod_2[] = { @@ -3430,6 +3472,9 @@ static const uint8_t mod_2[] = { 0x06, 0x62, 0x75, 0x66, 0x66, 0x65, 0x72, /* name: "buffer" */ 0x02, 0x03, /* shared mem */ 0x00, 0xff, 0xff, 0x03, /* min: 0, max: 0xffff pages */ + 0x06, 0x68, 0x65, 0x6c, 0x70, 0x65, 0x72, /* module: "helper" */ + 0x01, 0x75, /* name: "u" */ + 0x00, 0x01, /* func type 1 */ }; static const uint8_t mod_3[] = { @@ -3488,7 +3533,7 @@ static int write_mod_1(TCGContext *s) linked_buf_write(types_buf_root, s->code_ptr); s->code_ptr += types_buf_len(); - uint32_t type_section_size = types_buf_len() + 10; + uint32_t type_section_size = types_buf_len() + 14; fill_uint32_leb128(base + 9, type_section_size); fill_uint32_leb128(base + 14, HELPER_IDX_START + helpers_num + 1); @@ -3613,6 +3658,11 @@ static void tcg_out_tb_start(TCGContext *s) h->tci_ptr = s->code_ptr; /* generate wasm code to initialize fundamental registers */ + tcg_wasm_out_ctx_i32_load(s, do_init); + tcg_wasm_out_op_i32_const(s, 0); + tcg_wasm_out_op_i32_ne(s); + tcg_wasm_out_op_if_noret(s); + tcg_wasm_out_op_global_get_r(s, TCG_AREG0); tcg_wasm_out_op_i64_eqz(s); tcg_wasm_out_op_if_noret(s); @@ -3626,8 +3676,10 @@ static void tcg_out_tb_start(TCGContext *s) tcg_wasm_out_op_global_set_r(s, TCG_REG_CALL_STACK); tcg_wasm_out_op_end(s); + tcg_wasm_out_ctx_i32_store_const(s, do_init, 0); tcg_wasm_out_op_i64_const(s, 0); tcg_wasm_out_op_global_set(s, BLOCK_PTR_IDX); + tcg_wasm_out_op_end(s); tcg_wasm_out_op_loop_noret(s); tcg_wasm_out_op_global_get(s, BLOCK_PTR_IDX); -- 2.43.0