Emscripten's Fiber coroutine implements coroutine switching using the stack
unwinding and rewinding capabilities of Asyncify [1]. When a coroutine
yields (i.e. switches out), Asyncify unwinds the stack, returning control to
Emscripten's JS code (Fiber.trampoline()), which then performs stack
rewinding to resume execution in the target coroutine. Stack unwinding is
implemented by a sequence of immediate function returns, while rewinding
works by re-entering the functions in the call stack, skipping any code
between the top of the function and the original call position [2].

This commit modifies the Wasm TB modules to support Fiber
coroutines. Assuming the TCG CPU loop is executed by only one coroutine per
thread, a TB module must allow helper functions to unwind and be resumed via
rewinding.

Specifically:

- When a helper returns due to an unwind, the module must immediately return
  to its caller, allowing unwinding to propagate.
- When being called again for a rewind, the module must skip any code
  between the top of the function and the call position that triggered the
  unwind, and directly enter the helper.

To support this:

- TBs now check the Asyncify.state JS object after each helper call. If
  unwinding is in progress, the TB immediately returns control to the
  caller.
- Each function call is preceded by a block boundary and an update of the
  BLOCK_PTR_IDX variable. This enables the TB to re-enter execution at the
  correct point during a rewind, skipping earlier blocks.

Additionally, this commit introduces wasmContext.do_init which is a flag
indicating whether the TB should reset the BLOCK_PTR_IDX variable to 0
(i.e. start from the beginning). In call_wasm_tb, this is always set
(ctx.do_init = 1) to ensure normal TB execution begins at the first
block. Once the TB resets the BLOCK_PTR_IDX variable, it also clears
do_init. During a rewind, the C code does not set ctx.do_init to 1, allowing
the TB to preserve the BLOCK_PTR_IDX value from the previous unwind and
correctly resume execution from the last unwound block.

[1] https://emscripten.org/docs/api_reference/fiber.h.html
[2] https://kripken.github.io/blog/wasm/2019/07/16/asyncify.html#new-asyncify

Signed-off-by: Kohei Tokunaga <ktokunaga.m...@gmail.com>
---
 tcg/wasm32.c                |  3 ++
 tcg/wasm32.h                | 11 ++++++++
 tcg/wasm32/tcg-target.c.inc | 56 +++++++++++++++++++++++++++++++++++--
 3 files changed, 68 insertions(+), 2 deletions(-)

diff --git a/tcg/wasm32.c b/tcg/wasm32.c
index f2269c0a22..e6a3dbf750 100644
--- a/tcg/wasm32.c
+++ b/tcg/wasm32.c
@@ -53,6 +53,9 @@ EM_JS(int, instantiate_wasm, (int wasm_begin,
     const memory_v = new DataView(HEAP8.buffer);
     const wasm = HEAP8.subarray(wasm_begin, wasm_begin + wasm_size);
     var helper = {};
+    helper.u = () => {
+        return (Asyncify.state != Asyncify.State.Unwinding) ? 1 : 0;
+    };
     for (var i = 0; i < import_vec_size / 4; i++) {
         helper[i] = wasmTable.get(
             memory_v.getInt32(import_vec_begin + i * 4, true));
diff --git a/tcg/wasm32.h b/tcg/wasm32.h
index f8651af4ee..f2749f1e0e 100644
--- a/tcg/wasm32.h
+++ b/tcg/wasm32.h
@@ -32,11 +32,22 @@ struct wasmContext {
      * Pointer to a stack array.
      */
     uint64_t *stack;
+
+    /*
+     * Flag indicates whether to initialize basic registers(1) or not(0).
+     */
+    uint32_t do_init;
 };
 
 /* Instantiated Wasm function of a TB */
 typedef int32_t (*wasm_tb_func)(struct wasmContext *);
 
+static inline int32_t call_wasm_tb(wasm_tb_func f, struct wasmContext *ctx)
+{
+    ctx->do_init = 1; /* reset block index (rewinding will skip this) */
+    return f(ctx);
+}
+
 /*
  * TB of wasm backend starts from a header which stores pointers for each data
  * stored in the following region in the TB.
diff --git a/tcg/wasm32/tcg-target.c.inc b/tcg/wasm32/tcg-target.c.inc
index d9a3abae70..04cd9b6e4a 100644
--- a/tcg/wasm32/tcg-target.c.inc
+++ b/tcg/wasm32/tcg-target.c.inc
@@ -135,7 +135,8 @@ static const uint8_t 
tcg_target_reg_index[TCG_TARGET_NB_REGS] = {
 #define TMP64_LOCAL_0_IDX 3
 
 /* function index */
-#define HELPER_IDX_START 0 /* helper funcitons */
+#define CHECK_UNWINDING_IDX 0 /* a funtion of checking Asyncify status */
+#define HELPER_IDX_START 1 /* helper funcitons */
 
 #define BUF_SIZE 1024
 typedef struct LinkedBuf {
@@ -1895,6 +1896,17 @@ static int get_helper_idx(TCGContext *s, int 
helper_idx_on_qemu)
     return -1;
 }
 
+static void tcg_wasm_out_handle_unwinding(TCGContext *s)
+{
+    tcg_wasm_out_op_call(s, CHECK_UNWINDING_IDX);
+    tcg_wasm_out_op_i32_eqz(s);
+    tcg_wasm_out_op_if_noret(s);
+    tcg_wasm_out_op_i32_const(s, 0);
+    /* returns if unwinding */
+    tcg_wasm_out_op_return(s);
+    tcg_wasm_out_op_end(s);
+}
+
 static void tcg_wasm_out_call(TCGContext *s, int func,
                               const TCGHelperInfo *info)
 {
@@ -1908,7 +1920,16 @@ static void tcg_wasm_out_call(TCGContext *s, int func,
     tcg_wasm_out_op_i32_const(s, (int32_t)s->code_ptr);
     tcg_wasm_out_op_i32_store(s, 0, 0);
 
+    /*
+     * update the block index so that the possible rewinding will
+     * skip this block
+     */
+    tcg_wasm_out_op_i64_const(s, block_idx + 1);
+    tcg_wasm_out_op_global_set(s, BLOCK_PTR_IDX);
+    tcg_wasm_out_new_block(s);
+
     gen_call(s, info, func_idx);
+    tcg_wasm_out_handle_unwinding(s);
 }
 
 static void gen_func_type_qemu_ld(TCGContext *s, uint32_t oi)
@@ -1986,6 +2007,14 @@ static void tcg_wasm_out_qemu_ld(TCGContext *s, TCGReg 
data_reg,
         addr_reg = TCG_REG_TMP;
     }
 
+    /*
+     * update the block index so that the possible rewinding will
+     * skip this block
+     */
+    tcg_wasm_out_op_i64_const(s, block_idx + 1);
+    tcg_wasm_out_op_global_set(s, BLOCK_PTR_IDX);
+    tcg_wasm_out_new_block(s);
+
     /* call helper */
     tcg_wasm_out_op_global_get_r(s, TCG_AREG0);
     tcg_wasm_out_op_i32_wrap_i64(s);
@@ -1995,6 +2024,7 @@ static void tcg_wasm_out_qemu_ld(TCGContext *s, TCGReg 
data_reg,
 
     tcg_wasm_out_op_call(s, func_idx);
     tcg_wasm_out_op_global_set_r(s, data_reg);
+    tcg_wasm_out_handle_unwinding(s);
 }
 
 static void *qemu_st_helper_ptr(uint32_t oi)
@@ -2034,6 +2064,14 @@ static void tcg_wasm_out_qemu_st(TCGContext *s, TCGReg 
data_reg,
         addr_reg = TCG_REG_TMP;
     }
 
+    /*
+     * update the block index so that the possible rewinding will
+     * skip this block
+     */
+    tcg_wasm_out_op_i64_const(s, block_idx + 1);
+    tcg_wasm_out_op_global_set(s, BLOCK_PTR_IDX);
+    tcg_wasm_out_new_block(s);
+
     /* call helper */
     tcg_wasm_out_op_global_get_r(s, TCG_AREG0);
     tcg_wasm_out_op_i32_wrap_i64(s);
@@ -2051,6 +2089,7 @@ static void tcg_wasm_out_qemu_st(TCGContext *s, TCGReg 
data_reg,
     tcg_wasm_out_op_i32_const(s, (int32_t)s->code_ptr);
 
     tcg_wasm_out_op_call(s, func_idx);
+    tcg_wasm_out_handle_unwinding(s);
 }
 
 static bool patch_reloc(tcg_insn_unit *code_ptr_i, int type,
@@ -3420,6 +3459,9 @@ static const uint8_t mod_1[] = {
     0x60,                         /* 0: Type of "start" function */
     0x01, 0x7f,                   /* arg: ctx pointer (i32) */
     0x01, 0x7f,                   /* return: res (i32) */
+    0x60,                         /* 1: Type of the asyncify helper */
+    0x0,                          /* no argument */
+    0x01, 0x7f,                   /* return: res (i32) */
 };
 
 static const uint8_t mod_2[] = {
@@ -3430,6 +3472,9 @@ static const uint8_t mod_2[] = {
     0x06, 0x62, 0x75, 0x66, 0x66, 0x65, 0x72, /* name: "buffer" */
     0x02, 0x03,                               /* shared mem */
     0x00, 0xff, 0xff, 0x03,                   /* min: 0, max: 0xffff pages */
+    0x06, 0x68, 0x65, 0x6c, 0x70, 0x65, 0x72, /* module: "helper" */
+    0x01, 0x75,                               /* name: "u" */
+    0x00, 0x01,                               /* func type 1 */
 };
 
 static const uint8_t mod_3[] = {
@@ -3488,7 +3533,7 @@ static int write_mod_1(TCGContext *s)
     linked_buf_write(types_buf_root, s->code_ptr);
     s->code_ptr += types_buf_len();
 
-    uint32_t type_section_size = types_buf_len() + 10;
+    uint32_t type_section_size = types_buf_len() + 14;
     fill_uint32_leb128(base + 9, type_section_size);
     fill_uint32_leb128(base + 14, HELPER_IDX_START + helpers_num + 1);
 
@@ -3613,6 +3658,11 @@ static void tcg_out_tb_start(TCGContext *s)
     h->tci_ptr = s->code_ptr;
 
     /* generate wasm code to initialize fundamental registers */
+    tcg_wasm_out_ctx_i32_load(s, do_init);
+    tcg_wasm_out_op_i32_const(s, 0);
+    tcg_wasm_out_op_i32_ne(s);
+    tcg_wasm_out_op_if_noret(s);
+
     tcg_wasm_out_op_global_get_r(s, TCG_AREG0);
     tcg_wasm_out_op_i64_eqz(s);
     tcg_wasm_out_op_if_noret(s);
@@ -3626,8 +3676,10 @@ static void tcg_out_tb_start(TCGContext *s)
     tcg_wasm_out_op_global_set_r(s, TCG_REG_CALL_STACK);
     tcg_wasm_out_op_end(s);
 
+    tcg_wasm_out_ctx_i32_store_const(s, do_init, 0);
     tcg_wasm_out_op_i64_const(s, 0);
     tcg_wasm_out_op_global_set(s, BLOCK_PTR_IDX);
+    tcg_wasm_out_op_end(s);
 
     tcg_wasm_out_op_loop_noret(s);
     tcg_wasm_out_op_global_get(s, BLOCK_PTR_IDX);
-- 
2.43.0


Reply via email to