Re: [Qemu-devel] [PATCH 07/18] tcg: add vector addition operations

2017-01-17 Thread Richard Henderson

On 01/17/2017 01:07 AM, Kirill Batuzov wrote:

+/***/
+/* 64-bit and 128-bit vector arithmetic.  */
+
+static inline void *tcg_v128_swap_slot(int n)
+{
+return _ctx.v128_swap[n * 16];
+}
+
+/* Find a memory location for 128-bit TCG variable. */
+static inline void tcg_v128_to_ptr(TCGv_v128 tmp, TCGv_ptr base, int slot,
+   TCGv_ptr *real_base, intptr_t *real_offset,
+   int is_read)


None of this needs to be inline in tcg-op.h.  All of it should be out-of-line 
in tcg-op.c.




@@ -750,6 +778,7 @@ struct TCGContext {
 void *code_gen_buffer;
 size_t code_gen_buffer_size;
 void *code_gen_ptr;
+uint8_t v128_swap[16 * 3];


This is not thread-safe.
Shouldn't use space in TCGContext; should use space on stack.

Since there is no function call that is live, you can re-use the space for 
on-stack arguments.  There is TCG_STATIC_CALL_ARGS_SIZE (128) bytes allocated 
for that.  Which should be more than enough.



r~



[Qemu-devel] [PATCH 07/18] tcg: add vector addition operations

2017-01-17 Thread Kirill Batuzov
Signed-off-by: Kirill Batuzov 
---
 tcg/tcg-op.h  | 169 ++
 tcg/tcg-opc.h |  12 +
 tcg/tcg.h |  29 ++
 3 files changed, 210 insertions(+)

diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index c469ea3..5de74d3 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -1153,6 +1153,8 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, 
TCGv_i64, TCGArg, TCGMemOp);
 tcg_gen_add_i32(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), TCGV_PTR_TO_NAT(B))
 # define tcg_gen_addi_ptr(R, A, B) \
 tcg_gen_addi_i32(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), (B))
+# define tcg_gen_movi_ptr(R, B) \
+tcg_gen_movi_i32(TCGV_PTR_TO_NAT(R), (B))
 # define tcg_gen_ext_i32_ptr(R, A) \
 tcg_gen_mov_i32(TCGV_PTR_TO_NAT(R), (A))
 #else
@@ -1164,6 +1166,173 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, 
TCGv_i64, TCGArg, TCGMemOp);
 tcg_gen_add_i64(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), TCGV_PTR_TO_NAT(B))
 # define tcg_gen_addi_ptr(R, A, B) \
 tcg_gen_addi_i64(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), (B))
+# define tcg_gen_movi_ptr(R, B) \
+tcg_gen_movi_i64(TCGV_PTR_TO_NAT(R), (B))
 # define tcg_gen_ext_i32_ptr(R, A) \
 tcg_gen_ext_i32_i64(TCGV_PTR_TO_NAT(R), (A))
 #endif /* UINTPTR_MAX == UINT32_MAX */
+
+/***/
+/* 64-bit and 128-bit vector arithmetic.  */
+
+static inline void *tcg_v128_swap_slot(int n)
+{
+return _ctx.v128_swap[n * 16];
+}
+
+/* Find a memory location for 128-bit TCG variable. */
+static inline void tcg_v128_to_ptr(TCGv_v128 tmp, TCGv_ptr base, int slot,
+   TCGv_ptr *real_base, intptr_t *real_offset,
+   int is_read)
+{
+int idx = GET_TCGV_V128(tmp);
+assert(idx >= 0 && idx < tcg_ctx.nb_temps);
+if (idx < tcg_ctx.nb_globals) {
+/* Globals use their locations within CPUArchState. */
+int env = GET_TCGV_PTR(tcg_ctx.tcg_env);
+TCGTemp *ts_env = _ctx.temps[env];
+TCGTemp *ts_arg = _ctx.temps[idx];
+
+/* Sanity checks: global's memory locations must be addressed
+   relative to ENV. */
+assert(ts_env->val_type == TEMP_VAL_REG &&
+   ts_env == ts_arg->mem_base &&
+   ts_arg->mem_allocated);
+
+*real_base = tcg_ctx.tcg_env;
+*real_offset = ts_arg->mem_offset;
+} else {
+/* Temporaries use swap space in TCGContext. Since we already have
+   a 128-bit temporary we'll assume that the target supports 128-bit
+   loads and stores. */
+*real_base = base;
+*real_offset = slot * 16;
+if (is_read) {
+tcg_gen_st_v128(tmp, base, slot * 16);
+}
+}
+}
+
+/* Find a memory location for 64-bit vector TCG variable. */
+static inline void tcg_v64_to_ptr(TCGv_v64 tmp, TCGv_ptr base, int slot,
+  TCGv_ptr *real_base, intptr_t *real_offset,
+  int is_read)
+{
+int idx = GET_TCGV_V64(tmp);
+assert(idx >= 0 && idx < tcg_ctx.nb_temps);
+if (idx < tcg_ctx.nb_globals) {
+/* Globals use their locations within CPUArchState. */
+int env = GET_TCGV_PTR(tcg_ctx.tcg_env);
+TCGTemp *ts_env = _ctx.temps[env];
+TCGTemp *ts_arg = _ctx.temps[idx];
+
+/* Sanity checks: global's memory locations must be addressed
+   relative to ENV. */
+assert(ts_env->val_type == TEMP_VAL_REG &&
+   ts_env == ts_arg->mem_base &&
+   ts_arg->mem_allocated);
+
+*real_base = tcg_ctx.tcg_env;
+*real_offset = ts_arg->mem_offset;
+} else {
+/* Temporaries use swap space in TCGContext. Since we already have
+   a 128-bit temporary we'll assume that the target supports 128-bit
+   loads and stores. */
+*real_base = base;
+*real_offset = slot * 16;
+if (is_read) {
+tcg_gen_st_v64(tmp, base, slot * 16);
+}
+}
+}
+
+#define GEN_VECT_WRAPPER(name, type, func)   \
+static inline void glue(tcg_gen_, name)(glue(TCGv_, type) res,   \
+glue(TCGv_, type) arg1,  \
+glue(TCGv_, type) arg2)  \
+{\
+if (glue(TCG_TARGET_HAS_, name)) {   \
+glue(tcg_gen_op3_, type)(glue(INDEX_op_, name), res, arg1,   \
+ arg2);  \
+} else { \
+TCGv_ptr base = tcg_temp_new_ptr();  \
+TCGv_ptr t1 = tcg_temp_new_ptr();\
+TCGv_ptr t2 = tcg_temp_new_ptr();