This commit enables qemu_ld and qemu_st to perform TLB lookups, following
the approach used in other backends such as RISC-V. Unlike other backends,
the Wasm backend cannot use ldst labels, as jumping to specific code
addresses (e.g. raddr) is not possible in Wasm. Instead, each TLB lookup is
followed by a if branch: if the lookup succeeds, the memory is accessed
directly; otherwise, a fallback helper function is invoked. Support for
MO_BSWAP is not yet implemented, so has_memory_bswap is set to false.

Signed-off-by: Kohei Tokunaga <ktokunaga.m...@gmail.com>
---
 tcg/wasm32/tcg-target.c.inc | 223 +++++++++++++++++++++++++++++++++++-
 1 file changed, 221 insertions(+), 2 deletions(-)

diff --git a/tcg/wasm32/tcg-target.c.inc b/tcg/wasm32/tcg-target.c.inc
index f0c51a5d3d..a2815db6b5 100644
--- a/tcg/wasm32/tcg-target.c.inc
+++ b/tcg/wasm32/tcg-target.c.inc
@@ -3,8 +3,12 @@
  * Tiny Code Generator for QEMU
  *
  * Copyright (c) 2009, 2011 Stefan Weil
+ * Copyright (c) 2018 SiFive, Inc
+ * Copyright (c) 2008-2009 Arnaud Patard <arnaud.pat...@rtp-net.org>
+ * Copyright (c) 2009 Aurelien Jarno <aurel...@aurel32.net>
+ * Copyright (c) 2008 Fabrice Bellard
  *
- * Based on tci/tcg-target.c.inc
+ * Based on tci/tcg-target.c.inc and riscv/tcg-target.c.inc
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to 
deal
@@ -364,6 +368,10 @@ static void tcg_wasm_out_op_i64_eqz(TCGContext *s)
 {
     tcg_wasm_out8(s, 0x50);
 }
+static void tcg_wasm_out_op_i64_eq(TCGContext *s)
+{
+    tcg_wasm_out8(s, 0x51);
+}
 static void tcg_wasm_out_op_br(TCGContext *s, int i)
 {
     tcg_wasm_out8(s, 0x0c);
@@ -436,6 +444,10 @@ static void tcg_wasm_out_op_local_set(TCGContext *s, 
uint8_t i)
 {
     tcg_wasm_out_op_var(s, 0x21, i);
 }
+static void tcg_wasm_out_op_local_tee(TCGContext *s, uint8_t i)
+{
+    tcg_wasm_out_op_var(s, 0x22, i);
+}
 
 #define tcg_wasm_out_i64_calc(op)                                       \
     static void tcg_wasm_out_i64_calc_##op(                             \
@@ -1993,12 +2005,161 @@ static void *qemu_ld_helper_ptr(uint32_t oi)
     }
 }
 
+static void tcg_wasm_out_i32_load_s(TCGContext *s, int off)
+{
+    if (off < 0) {
+        tcg_wasm_out_op_i32_const(s, off);
+        tcg_wasm_out_op_i32_add(s);
+        off = 0;
+    }
+    tcg_wasm_out_op_i32_load(s, 0, off);
+}
+
+static void tcg_wasm_out_i64_load_s(TCGContext *s, int off)
+{
+    if (off < 0) {
+        tcg_wasm_out_op_i32_const(s, off);
+        tcg_wasm_out_op_i32_add(s);
+        off = 0;
+    }
+    tcg_wasm_out_op_i64_load(s, 0, off);
+}
+
+#define MIN_TLB_MASK_TABLE_OFS INT_MIN
+
+static uint8_t prepare_host_addr_wasm(TCGContext *s, uint8_t *hit_var,
+                                      TCGReg addr_reg, MemOpIdx oi,
+                                      bool is_ld)
+{
+    MemOp opc = get_memop(oi);
+    TCGAtomAlign aa;
+    unsigned a_mask;
+    unsigned s_bits = opc & MO_SIZE;
+    unsigned s_mask = (1u << s_bits) - 1;
+    int mem_index = get_mmuidx(oi);
+    int fast_ofs = tlb_mask_table_ofs(s, mem_index);
+    int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
+    int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
+    int add_off = offsetof(CPUTLBEntry, addend);
+    tcg_target_long compare_mask;
+
+    if (!tcg_use_softmmu) {
+        g_assert_not_reached();
+    }
+
+    *hit_var = TMP64_LOCAL_0_IDX;
+    tcg_wasm_out_op_i64_const(s, 0);
+    tcg_wasm_out_op_local_set(s, *hit_var);
+
+    aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
+    a_mask = (1u << aa.align) - 1;
+
+    /* Get the CPUTLBEntry offset */
+    tcg_wasm_out_op_global_get_r(s, addr_reg);
+    tcg_wasm_out_op_i64_const(s, s->page_bits - CPU_TLB_ENTRY_BITS);
+    tcg_wasm_out_op_i64_shr_u(s);
+    tcg_wasm_out_op_i32_wrap_i64(s);
+    tcg_wasm_out_op_global_get_r_i32(s, TCG_AREG0);
+    tcg_wasm_out_i32_load_s(s, mask_ofs);
+    tcg_wasm_out_op_i32_and(s);
+
+    /* Get the pointer to the target CPUTLBEntry */
+    tcg_wasm_out_op_global_get_r_i32(s, TCG_AREG0);
+    tcg_wasm_out_i32_load_s(s, table_ofs);
+    tcg_wasm_out_op_i32_add(s);
+    tcg_wasm_out_op_local_tee(s, TMP32_LOCAL_0_IDX);
+
+    /* Load the tlb copmarator */
+    tcg_wasm_out_i64_load_s(
+        s, is_ld ? offsetof(CPUTLBEntry, addr_read)
+        : offsetof(CPUTLBEntry, addr_write));
+
+    /*
+     * For aligned accesses, we check the first byte and include the
+     * alignment bits within the address.  For unaligned access, we
+     * check that we don't cross pages using the address of the last
+     * byte of the access.
+     */
+    tcg_wasm_out_op_global_get_r(s, addr_reg);
+    if (a_mask < s_mask) {
+        tcg_wasm_out_op_i64_const(s, s_mask - a_mask);
+        tcg_wasm_out_op_i64_add(s);
+    }
+    compare_mask = (uint64_t)s->page_mask | a_mask;
+    tcg_wasm_out_op_i64_const(s, compare_mask);
+    tcg_wasm_out_op_i64_and(s);
+
+    /* Compare masked address with the TLB entry. */
+    tcg_wasm_out_op_i64_eq(s);
+    tcg_wasm_out_op_if_noret(s);
+
+    /* TLB Hit - translate address using addend.  */
+    tcg_wasm_out_op_local_get(s, TMP32_LOCAL_0_IDX);
+    tcg_wasm_out_i32_load_s(s, add_off);
+    tcg_wasm_out_op_global_get_r(s, addr_reg);
+    tcg_wasm_out_op_i32_wrap_i64(s);
+    tcg_wasm_out_op_i32_add(s);
+    tcg_wasm_out_op_local_set(s, TMP32_LOCAL_1_IDX);
+    tcg_wasm_out_op_i64_const(s, 1);
+    tcg_wasm_out_op_local_set(s, *hit_var);
+
+    tcg_wasm_out_op_end(s);
+
+    return TMP32_LOCAL_1_IDX;
+}
+
+static void tcg_wasm_out_qemu_ld_direct(
+    TCGContext *s, TCGReg r, uint8_t base, MemOp opc)
+{
+    switch (opc & (MO_SSIZE)) {
+    case MO_UB:
+        tcg_wasm_out_op_local_get(s, base);
+        tcg_wasm_out_op_i64_load8_u(s, 0, 0);
+        tcg_wasm_out_op_global_set_r(s, r);
+        break;
+    case MO_SB:
+        tcg_wasm_out_op_local_get(s, base);
+        tcg_wasm_out_op_i64_load8_s(s, 0, 0);
+        tcg_wasm_out_op_global_set_r(s, r);
+        break;
+    case MO_UW:
+        tcg_wasm_out_op_local_get(s, base);
+        tcg_wasm_out_op_i64_load16_u(s, 0, 0);
+        tcg_wasm_out_op_global_set_r(s, r);
+        break;
+    case MO_SW:
+        tcg_wasm_out_op_local_get(s, base);
+        tcg_wasm_out_op_i64_load16_s(s, 0, 0);
+        tcg_wasm_out_op_global_set_r(s, r);
+        break;
+    case MO_UL:
+        tcg_wasm_out_op_local_get(s, base);
+        tcg_wasm_out_op_i64_load32_u(s, 0, 0);
+        tcg_wasm_out_op_global_set_r(s, r);
+        break;
+    case MO_SL:
+        tcg_wasm_out_op_local_get(s, base);
+        tcg_wasm_out_op_i64_load32_s(s, 0, 0);
+        tcg_wasm_out_op_global_set_r(s, r);
+        break;
+    case MO_UQ:
+        tcg_wasm_out_op_local_get(s, base);
+        tcg_wasm_out_op_i64_load(s, 0, 0);
+        tcg_wasm_out_op_global_set_r(s, r);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
 static void tcg_wasm_out_qemu_ld(TCGContext *s, TCGReg data_reg,
                                  TCGReg addr_reg, MemOpIdx oi)
 {
     int helper_idx;
     int func_idx;
     bool addr64 = s->addr_type == TCG_TYPE_I64;
+    MemOp mop = get_memop(oi);
+    uint8_t base_var, hit_var;
 
     helper_idx = (uint32_t)qemu_ld_helper_ptr(oi);
     func_idx = get_helper_idx(s, helper_idx);
@@ -2012,6 +2173,14 @@ static void tcg_wasm_out_qemu_ld(TCGContext *s, TCGReg 
data_reg,
         addr_reg = TCG_REG_TMP;
     }
 
+    base_var = prepare_host_addr_wasm(s, &hit_var, addr_reg, oi, true);
+    tcg_wasm_out_op_local_get(s, hit_var);
+    tcg_wasm_out_op_i64_const(s, 1);
+    tcg_wasm_out_op_i64_eq(s);
+    tcg_wasm_out_op_if_noret(s);
+    tcg_wasm_out_qemu_ld_direct(s, data_reg, base_var, mop); /* fast path */
+    tcg_wasm_out_op_end(s);
+
     /*
      * update the block index so that the possible rewinding will
      * skip this block
@@ -2020,6 +2189,10 @@ static void tcg_wasm_out_qemu_ld(TCGContext *s, TCGReg 
data_reg,
     tcg_wasm_out_op_global_set(s, BLOCK_PTR_IDX);
     tcg_wasm_out_new_block(s);
 
+    tcg_wasm_out_op_local_get(s, hit_var);
+    tcg_wasm_out_op_i64_eqz(s);
+    tcg_wasm_out_op_if_noret(s);
+
     /* call helper */
     tcg_wasm_out_op_global_get_r(s, TCG_AREG0);
     tcg_wasm_out_op_i32_wrap_i64(s);
@@ -2030,6 +2203,8 @@ static void tcg_wasm_out_qemu_ld(TCGContext *s, TCGReg 
data_reg,
     tcg_wasm_out_op_call(s, func_idx);
     tcg_wasm_out_op_global_set_r(s, data_reg);
     tcg_wasm_out_handle_unwinding(s);
+
+    tcg_wasm_out_op_end(s);
 }
 
 static void *qemu_st_helper_ptr(uint32_t oi)
@@ -2049,6 +2224,35 @@ static void *qemu_st_helper_ptr(uint32_t oi)
     }
 }
 
+static void tcg_wasm_out_qemu_st_direct(
+    TCGContext *s, TCGReg lo, uint8_t base, MemOp opc)
+{
+    switch (opc & (MO_SSIZE)) {
+    case MO_8:
+        tcg_wasm_out_op_local_get(s, base);
+        tcg_wasm_out_op_global_get_r(s, lo);
+        tcg_wasm_out_op_i64_store8(s, 0, 0);
+        break;
+    case MO_16:
+        tcg_wasm_out_op_local_get(s, base);
+        tcg_wasm_out_op_global_get_r(s, lo);
+        tcg_wasm_out_op_i64_store16(s, 0, 0);
+        break;
+    case MO_32:
+        tcg_wasm_out_op_local_get(s, base);
+        tcg_wasm_out_op_global_get_r(s, lo);
+        tcg_wasm_out_op_i64_store32(s, 0, 0);
+        break;
+    case MO_64:
+        tcg_wasm_out_op_local_get(s, base);
+        tcg_wasm_out_op_global_get_r(s, lo);
+        tcg_wasm_out_op_i64_store(s, 0, 0);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
 static void tcg_wasm_out_qemu_st(TCGContext *s, TCGReg data_reg,
                                  TCGReg addr_reg, MemOpIdx oi)
 {
@@ -2056,6 +2260,7 @@ static void tcg_wasm_out_qemu_st(TCGContext *s, TCGReg 
data_reg,
     int func_idx;
     bool addr64 = s->addr_type == TCG_TYPE_I64;
     MemOp mop = get_memop(oi);
+    uint8_t base_var, hit_var;
 
     helper_idx = (uint32_t)qemu_st_helper_ptr(oi);
     func_idx = get_helper_idx(s, helper_idx);
@@ -2069,6 +2274,14 @@ static void tcg_wasm_out_qemu_st(TCGContext *s, TCGReg 
data_reg,
         addr_reg = TCG_REG_TMP;
     }
 
+    base_var = prepare_host_addr_wasm(s, &hit_var, addr_reg, oi, false);
+    tcg_wasm_out_op_local_get(s, hit_var);
+    tcg_wasm_out_op_i64_const(s, 1);
+    tcg_wasm_out_op_i64_eq(s);
+    tcg_wasm_out_op_if_noret(s);
+    tcg_wasm_out_qemu_st_direct(s, data_reg, base_var, mop); /* fast path */
+    tcg_wasm_out_op_end(s);
+
     /*
      * update the block index so that the possible rewinding will
      * skip this block
@@ -2077,6 +2290,10 @@ static void tcg_wasm_out_qemu_st(TCGContext *s, TCGReg 
data_reg,
     tcg_wasm_out_op_global_set(s, BLOCK_PTR_IDX);
     tcg_wasm_out_new_block(s);
 
+    tcg_wasm_out_op_local_get(s, hit_var);
+    tcg_wasm_out_op_i64_eqz(s);
+    tcg_wasm_out_op_if_noret(s);
+
     /* call helper */
     tcg_wasm_out_op_global_get_r(s, TCG_AREG0);
     tcg_wasm_out_op_i32_wrap_i64(s);
@@ -2095,6 +2312,8 @@ static void tcg_wasm_out_qemu_st(TCGContext *s, TCGReg 
data_reg,
 
     tcg_wasm_out_op_call(s, func_idx);
     tcg_wasm_out_handle_unwinding(s);
+
+    tcg_wasm_out_op_end(s);
 }
 
 static bool patch_reloc(tcg_insn_unit *code_ptr_i, int type,
@@ -3752,7 +3971,7 @@ static int tcg_out_tb_end(TCGContext *s)
 
 bool tcg_target_has_memory_bswap(MemOp memop)
 {
-    return true;
+    return false;
 }
 
 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
-- 
2.43.0


Reply via email to