From: Jani Kokkonen <jani.kokko...@huawei.com> implement the fast path for tcg_out_qemu_ld/st.
Signed-off-by: Jani Kokkonen <jani.kokko...@huawei.com> --- tcg/aarch64/tcg-target.c | 161 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 157 insertions(+), 4 deletions(-) diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c index 24b2862..47ec4a7 100644 --- a/tcg/aarch64/tcg-target.c +++ b/tcg/aarch64/tcg-target.c @@ -700,6 +700,36 @@ static inline void tcg_out_uxt(TCGContext *s, int s_bits, #ifdef CONFIG_SOFTMMU #include "exec/softmmu_defs.h" +/* Load and compare a TLB entry, leaving the flags set. Leaves X2 pointing + to the tlb entry. Clobbers X0,X1,X2,X3 and TMP. */ + +static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, + int s_bits, uint8_t **label_ptr, int tlb_offset) +{ + TCGReg base = TCG_AREG0; + + tcg_out_shr(s, 1, TCG_REG_TMP, addr_reg, TARGET_PAGE_BITS); + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X1, tlb_offset); + tcg_out_arith(s, ARITH_ADD, 1, TCG_REG_X2, base, TCG_REG_X1, 0); + tcg_out_andi(s, 1, TCG_REG_X0, TCG_REG_TMP, CPU_TLB_BITS, 0); + tcg_out_arith(s, ARITH_ADD, 1, TCG_REG_X2, TCG_REG_X2, + TCG_REG_X0, -CPU_TLB_ENTRY_BITS); +#if TARGET_LONG_BITS == 64 + tcg_out_ldst(s, LDST_64, LDST_LD, TCG_REG_X3, TCG_REG_X2, 0); +#else + tcg_out_ldst(s, LDST_32, LDST_LD, TCG_REG_X3, TCG_REG_X2, 0); +#endif + /* check alignment */ + if (s_bits) { + tcg_out_tst(s, 1, addr_reg, s_bits, 0); + label_ptr[0] = s->code_ptr; + tcg_out_goto_cond_noaddr(s, TCG_COND_NE); + } + tcg_out_cmp(s, 1, TCG_REG_X3, TCG_REG_TMP, -TARGET_PAGE_BITS); + label_ptr[1] = s->code_ptr; + tcg_out_goto_cond_noaddr(s, TCG_COND_NE); +} + /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, int mmu_idx) */ static const void * const qemu_ld_helpers[4] = { @@ -723,18 +753,85 @@ static const void * const qemu_st_helpers[4] = { static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) { TCGReg addr_reg, data_reg; + bool bswap; #ifdef CONFIG_SOFTMMU int mem_index, s_bits; + int i; + uint8_t *label_ptr[2] = { NULL }; + uint8_t *label_ptr2; #endif data_reg = args[0]; addr_reg = args[1]; +#ifdef TARGET_WORDS_BIGENDIAN + bswap = 1; +#else + bswap = 0; +#endif #ifdef CONFIG_SOFTMMU mem_index = args[2]; s_bits = opc & 3; - /* TODO: insert TLB lookup here */ + tcg_out_tlb_read(s, addr_reg, s_bits, label_ptr, + offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)); + tcg_out_ldst(s, LDST_64, LDST_LD, TCG_REG_X1, TCG_REG_X2, + offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_read)); + switch (opc) { + case 0: + tcg_out_ldst_r(s, LDST_8, LDST_LD, data_reg, addr_reg, TCG_REG_X1); + break; + case 0 | 4: + tcg_out_ldst_r(s, LDST_8, LDST_LD_S_X, data_reg, addr_reg, TCG_REG_X1); + break; + case 1: + tcg_out_ldst_r(s, LDST_16, LDST_LD, data_reg, addr_reg, TCG_REG_X1); + if (bswap) { + tcg_out_rev16(s, 1, data_reg, data_reg); + } + break; + case 1 | 4: + if (bswap) { + tcg_out_ldst_r(s, LDST_16, LDST_LD, data_reg, addr_reg, TCG_REG_X1); + tcg_out_rev16(s, 1, data_reg, data_reg); + tcg_out_sxt(s, 1, s_bits, data_reg, data_reg); + } else { + tcg_out_ldst_r(s, LDST_16, LDST_LD_S_X, + data_reg, addr_reg, TCG_REG_X1); + } + break; + case 2: + tcg_out_ldst_r(s, LDST_32, LDST_LD, data_reg, addr_reg, TCG_REG_X1); + if (bswap) { + tcg_out_rev32(s, data_reg, data_reg); + } + break; + case 2 | 4: + if (bswap) { + tcg_out_ldst_r(s, LDST_32, LDST_LD, data_reg, addr_reg, TCG_REG_X1); + tcg_out_rev32(s, data_reg, data_reg); + tcg_out_sxt(s, 1, s_bits, data_reg, data_reg); + } else { + tcg_out_ldst_r(s, LDST_32, LDST_LD_S_X, + data_reg, addr_reg, TCG_REG_X1); + } + break; + case 3: + tcg_out_ldst_r(s, LDST_64, LDST_LD, data_reg, addr_reg, TCG_REG_X1); + if (bswap) { + tcg_out_rev(s, 1, data_reg, data_reg); + } + break; + default: + tcg_abort(); + } + label_ptr2 = s->code_ptr; + tcg_out_goto_noaddr(s); + for (i = 0; i < 2; i++) { + if (label_ptr[i]) { + reloc_pc19(label_ptr[i], (tcg_target_long)s->code_ptr); + } + } /* all arguments passed via registers */ tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0); tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, addr_reg); @@ -748,7 +845,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) } else { tcg_out_movr(s, 1, data_reg, TCG_REG_X0); } - + reloc_pc26(label_ptr2, (tcg_target_long)s->code_ptr); #else /* !CONFIG_SOFTMMU */ tcg_abort(); /* TODO */ #endif @@ -757,8 +854,17 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) { TCGReg addr_reg, data_reg; + bool bswap; #ifdef CONFIG_SOFTMMU int mem_index, s_bits; + int i; + uint8_t *label_ptr[2] = { NULL }; + uint8_t *label_ptr2; +#endif +#ifdef TARGET_WORDS_BIGENDIAN + bswap = 1; +#else + bswap = 0; #endif data_reg = args[0]; addr_reg = args[1]; @@ -767,8 +873,55 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) mem_index = args[2]; s_bits = opc & 3; - /* TODO: insert TLB lookup here */ + tcg_out_tlb_read(s, addr_reg, s_bits, label_ptr, + offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); + tcg_out_ldst(s, LDST_64, LDST_LD, TCG_REG_X1, TCG_REG_X2, + offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_write)); + switch (opc) { + case 0: + tcg_out_ldst_r(s, LDST_8, LDST_ST, data_reg, addr_reg, TCG_REG_X1); + break; + case 1: + if (bswap) { + tcg_out_rev16(s, 1, TCG_REG_X0, data_reg); + tcg_out_ldst_r(s, LDST_16, LDST_ST, TCG_REG_X0, + addr_reg, TCG_REG_X1); + } else { + tcg_out_ldst_r(s, LDST_16, LDST_ST, data_reg, + addr_reg, TCG_REG_X1); + } + break; + case 2: + if (bswap) { + tcg_out_rev32(s, TCG_REG_X0, data_reg); + tcg_out_ldst_r(s, LDST_32, LDST_ST, TCG_REG_X0, + addr_reg, TCG_REG_X1); + } else { + tcg_out_ldst_r(s, LDST_32, LDST_ST, data_reg, + addr_reg, TCG_REG_X1); + } + break; + case 3: + if (bswap) { + tcg_out_rev(s, 1, TCG_REG_X0, data_reg); + tcg_out_ldst_r(s, LDST_64, LDST_ST, TCG_REG_X0, + addr_reg, TCG_REG_X1); + } else { + tcg_out_ldst_r(s, LDST_64, LDST_ST, data_reg, + addr_reg, TCG_REG_X1); + } + break; + default: + tcg_abort(); + } + label_ptr2 = s->code_ptr; + tcg_out_goto_noaddr(s); + for (i = 0; i < 2; i++) { + if (label_ptr[i]) { + reloc_pc19(label_ptr[i], (tcg_target_long)s->code_ptr); + } + } /* all arguments passed via registers */ tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0); tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, addr_reg); @@ -777,7 +930,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (tcg_target_long)qemu_st_helpers[s_bits]); tcg_out_callr(s, TCG_REG_TMP); - + reloc_pc26(label_ptr2, (tcg_target_long)s->code_ptr); #else /* !CONFIG_SOFTMMU */ tcg_abort(); /* TODO */ #endif -- 1.8.1