Re: [Qemu-devel] [PATCH v3 1/6] tcg: Add types and operations for host vectors

2017-09-27 Thread Richard Henderson
On 09/26/2017 12:28 PM, Alex Bennée wrote:
>>  * TCGv_ptr : a host pointer type
>> +* TCGv_vec : a host vector type; the exact size is not exposed
>> + to the CPU front-end code.
> 
> Isn't this a guest vector type (which is pointed to by a host pointer)?

No, it's a host vector, which we have created in response to expanding a guest
vector operation.

> A one line comment wouldn't go amiss here. This looks like we are
> allocating a new temp of the same type as an existing temp?
> 
>> +TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)

Yes.

>> +All of the vector ops have a final constant argument that specifies the
>> +length of the vector operation LEN as 64 << LEN bits.
> 
> That doesn't scan well. So would a 4 lane operation be encoded as 64 <<
> 4? Is this because we are using the bottom bits for something?

64 << 0 = 64
64 << 1 = 128
64 << 2 = 256.

I've fixed up the wording a bit.

>> +  Copy C across the entire vector.
>> +  At present the only supported values for C are 0 and -1.
> 
> I guess this is why the size in unimportant? This is for clearing or
> setting the whole of the vector? What does len mean in this case?

Yes.  Len still means the length of the whole vector.

Elsewhere there's a comment about maybe using dupi{8,16,32,64}_vec instead.
However I wanted to put that off until we do some more conversions and see
exactly what's going to be needed.


>> +* and_vec v0, v1, v2, len
>> +* or_vec  v0, v1, v2, len
>> +* xor_vec v0, v1, v2, len
>> +* andc_vecv0, v1, v2, len
>> +* orc_vec v0, v1, v2, len
>> +* not_vec v0, v1, len
>> +
>> +  Similarly, logical operations.
> 
> Similarly, logical operations with and without compliment?

Sure.


r~



Re: [Qemu-devel] [PATCH v3 1/6] tcg: Add types and operations for host vectors

2017-09-26 Thread Alex Bennée

Richard Henderson  writes:

> Nothing uses or enables them yet.
>
> Signed-off-by: Richard Henderson 
> ---
>  tcg/tcg-op.h  |  26 +++
>  tcg/tcg-opc.h |  37 ++
>  tcg/tcg.h |  34 +
>  tcg/tcg-op.c  | 234 
> ++
>  tcg/tcg.c |  77 ++-
>  tcg/README|  46 
>  6 files changed, 453 insertions(+), 1 deletion(-)
>
> diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
> index 5d3278f243..b9b0b9f46f 100644
> --- a/tcg/tcg-op.h
> +++ b/tcg/tcg-op.h
> @@ -915,6 +915,32 @@ void tcg_gen_atomic_or_fetch_i64(TCGv_i64, TCGv, 
> TCGv_i64, TCGArg, TCGMemOp);
>  void tcg_gen_atomic_xor_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, 
> TCGMemOp);
>  void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, 
> TCGMemOp);
>
> +void tcg_gen_mov_vec(TCGv_vec, TCGv_vec);
> +void tcg_gen_movi_vec(TCGv_vec, tcg_target_long);
> +void tcg_gen_add8_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
> +void tcg_gen_add16_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
> +void tcg_gen_add32_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
> +void tcg_gen_add64_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
> +void tcg_gen_sub8_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
> +void tcg_gen_sub16_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
> +void tcg_gen_sub32_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
> +void tcg_gen_sub64_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
> +void tcg_gen_and_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
> +void tcg_gen_or_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
> +void tcg_gen_xor_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
> +void tcg_gen_andc_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
> +void tcg_gen_orc_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
> +void tcg_gen_not_vec(TCGv_vec r, TCGv_vec a);
> +void tcg_gen_neg8_vec(TCGv_vec r, TCGv_vec a);
> +void tcg_gen_neg16_vec(TCGv_vec r, TCGv_vec a);
> +void tcg_gen_neg32_vec(TCGv_vec r, TCGv_vec a);
> +void tcg_gen_neg64_vec(TCGv_vec r, TCGv_vec a);
> +
> +void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset);
> +void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset);
> +void tcg_gen_ldz_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType sz);
> +void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType sz);
> +
>  #if TARGET_LONG_BITS == 64
>  #define tcg_gen_movi_tl tcg_gen_movi_i64
>  #define tcg_gen_mov_tl tcg_gen_mov_i64
> diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
> index 956fb1e9f3..8200184fa9 100644
> --- a/tcg/tcg-opc.h
> +++ b/tcg/tcg-opc.h
> @@ -204,8 +204,45 @@ DEF(qemu_ld_i64, DATA64_ARGS, TLADDR_ARGS, 1,
>  DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1,
>  TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
>
> +/* Host vector support.  */
> +
> +#define IMPLVEC  \
> +IMPL(TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256)
> +
> +DEF(mov_vec, 1, 1, 1, TCG_OPF_NOT_PRESENT)
> +
> +/* ??? Simple, but perhaps dupiN would be more descriptive.  */
> +DEF(movi_vec, 1, 0, 2, TCG_OPF_NOT_PRESENT)
> +
> +DEF(ld_vec, 1, 1, 2, IMPLVEC)
> +DEF(ldz_vec, 1, 1, 3, IMPLVEC)
> +DEF(st_vec, 0, 2, 2, IMPLVEC)
> +
> +DEF(add8_vec, 1, 2, 1, IMPLVEC)
> +DEF(add16_vec, 1, 2, 1, IMPLVEC)
> +DEF(add32_vec, 1, 2, 1, IMPLVEC)
> +DEF(add64_vec, 1, 2, 1, IMPLVEC)
> +
> +DEF(sub8_vec, 1, 2, 1, IMPLVEC)
> +DEF(sub16_vec, 1, 2, 1, IMPLVEC)
> +DEF(sub32_vec, 1, 2, 1, IMPLVEC)
> +DEF(sub64_vec, 1, 2, 1, IMPLVEC)
> +
> +DEF(neg8_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_neg_vec))
> +DEF(neg16_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_neg_vec))
> +DEF(neg32_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_neg_vec))
> +DEF(neg64_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_neg_vec))
> +
> +DEF(and_vec, 1, 2, 1, IMPLVEC)
> +DEF(or_vec, 1, 2, 1, IMPLVEC)
> +DEF(xor_vec, 1, 2, 1, IMPLVEC)
> +DEF(andc_vec, 1, 2, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_andc_vec))
> +DEF(orc_vec, 1, 2, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_orc_vec))
> +DEF(not_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec))
> +
>  #undef TLADDR_ARGS
>  #undef DATA64_ARGS
>  #undef IMPL
>  #undef IMPL64
> +#undef IMPLVEC
>  #undef DEF
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index 25662c36d4..7cd356e87f 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -173,6 +173,16 @@ typedef uint64_t TCGRegSet;
>  # error "Missing unsigned widening multiply"
>  #endif
>
> +#ifndef TCG_TARGET_HAS_v64
> +#define TCG_TARGET_HAS_v64  0
> +#define TCG_TARGET_HAS_v128 0
> +#define TCG_TARGET_HAS_v256 0
> +#define TCG_TARGET_HAS_neg_vec  0
> +#define TCG_TARGET_HAS_not_vec  0
> +#define TCG_TARGET_HAS_andc_vec 0
> +#define TCG_TARGET_HAS_orc_vec  0
> +#endif
> +
>  #ifndef TARGET_INSN_START_EXTRA_WORDS
>  # define TARGET_INSN_START_WORDS 1
>  #else
> @@ -249,6 +259,11 @@ typedef struct TCGPool {
>  typedef enum TCGType {
>  TCG_TYPE_I32,
>  TCG_TYPE_I64,
> +
> +TCG_TYPE_V64,
> +TCG_TYPE_V128,
> +

[Qemu-devel] [PATCH v3 1/6] tcg: Add types and operations for host vectors

2017-09-15 Thread Richard Henderson
Nothing uses or enables them yet.

Signed-off-by: Richard Henderson 
---
 tcg/tcg-op.h  |  26 +++
 tcg/tcg-opc.h |  37 ++
 tcg/tcg.h |  34 +
 tcg/tcg-op.c  | 234 ++
 tcg/tcg.c |  77 ++-
 tcg/README|  46 
 6 files changed, 453 insertions(+), 1 deletion(-)

diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 5d3278f243..b9b0b9f46f 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -915,6 +915,32 @@ void tcg_gen_atomic_or_fetch_i64(TCGv_i64, TCGv, TCGv_i64, 
TCGArg, TCGMemOp);
 void tcg_gen_atomic_xor_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
 void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
 
+void tcg_gen_mov_vec(TCGv_vec, TCGv_vec);
+void tcg_gen_movi_vec(TCGv_vec, tcg_target_long);
+void tcg_gen_add8_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_add16_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_add32_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_add64_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_sub8_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_sub16_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_sub32_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_sub64_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_and_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_or_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_xor_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_andc_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_orc_vec(TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_not_vec(TCGv_vec r, TCGv_vec a);
+void tcg_gen_neg8_vec(TCGv_vec r, TCGv_vec a);
+void tcg_gen_neg16_vec(TCGv_vec r, TCGv_vec a);
+void tcg_gen_neg32_vec(TCGv_vec r, TCGv_vec a);
+void tcg_gen_neg64_vec(TCGv_vec r, TCGv_vec a);
+
+void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset);
+void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset);
+void tcg_gen_ldz_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType sz);
+void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType sz);
+
 #if TARGET_LONG_BITS == 64
 #define tcg_gen_movi_tl tcg_gen_movi_i64
 #define tcg_gen_mov_tl tcg_gen_mov_i64
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 956fb1e9f3..8200184fa9 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -204,8 +204,45 @@ DEF(qemu_ld_i64, DATA64_ARGS, TLADDR_ARGS, 1,
 DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1,
 TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
 
+/* Host vector support.  */
+
+#define IMPLVEC  \
+IMPL(TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256)
+
+DEF(mov_vec, 1, 1, 1, TCG_OPF_NOT_PRESENT)
+
+/* ??? Simple, but perhaps dupiN would be more descriptive.  */
+DEF(movi_vec, 1, 0, 2, TCG_OPF_NOT_PRESENT)
+
+DEF(ld_vec, 1, 1, 2, IMPLVEC)
+DEF(ldz_vec, 1, 1, 3, IMPLVEC)
+DEF(st_vec, 0, 2, 2, IMPLVEC)
+
+DEF(add8_vec, 1, 2, 1, IMPLVEC)
+DEF(add16_vec, 1, 2, 1, IMPLVEC)
+DEF(add32_vec, 1, 2, 1, IMPLVEC)
+DEF(add64_vec, 1, 2, 1, IMPLVEC)
+
+DEF(sub8_vec, 1, 2, 1, IMPLVEC)
+DEF(sub16_vec, 1, 2, 1, IMPLVEC)
+DEF(sub32_vec, 1, 2, 1, IMPLVEC)
+DEF(sub64_vec, 1, 2, 1, IMPLVEC)
+
+DEF(neg8_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_neg_vec))
+DEF(neg16_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_neg_vec))
+DEF(neg32_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_neg_vec))
+DEF(neg64_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_neg_vec))
+
+DEF(and_vec, 1, 2, 1, IMPLVEC)
+DEF(or_vec, 1, 2, 1, IMPLVEC)
+DEF(xor_vec, 1, 2, 1, IMPLVEC)
+DEF(andc_vec, 1, 2, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_andc_vec))
+DEF(orc_vec, 1, 2, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_orc_vec))
+DEF(not_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec))
+
 #undef TLADDR_ARGS
 #undef DATA64_ARGS
 #undef IMPL
 #undef IMPL64
+#undef IMPLVEC
 #undef DEF
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 25662c36d4..7cd356e87f 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -173,6 +173,16 @@ typedef uint64_t TCGRegSet;
 # error "Missing unsigned widening multiply"
 #endif
 
+#ifndef TCG_TARGET_HAS_v64
+#define TCG_TARGET_HAS_v64  0
+#define TCG_TARGET_HAS_v128 0
+#define TCG_TARGET_HAS_v256 0
+#define TCG_TARGET_HAS_neg_vec  0
+#define TCG_TARGET_HAS_not_vec  0
+#define TCG_TARGET_HAS_andc_vec 0
+#define TCG_TARGET_HAS_orc_vec  0
+#endif
+
 #ifndef TARGET_INSN_START_EXTRA_WORDS
 # define TARGET_INSN_START_WORDS 1
 #else
@@ -249,6 +259,11 @@ typedef struct TCGPool {
 typedef enum TCGType {
 TCG_TYPE_I32,
 TCG_TYPE_I64,
+
+TCG_TYPE_V64,
+TCG_TYPE_V128,
+TCG_TYPE_V256,
+
 TCG_TYPE_COUNT, /* number of different types */
 
 /* An alias for the size of the host register.  */
@@ -399,6 +414,8 @@ typedef tcg_target_ulong TCGArg;
 * TCGv_i32 : 32 bit integer type
 * TCGv_i64 : 64 bit integer type
 * TCGv_ptr : a host pointer type
+* TCGv_vec : a