Re: [RFC 05/15] target/riscv: rvb: pack two words into one register

2020-11-19 Thread Richard Henderson
On 11/18/20 12:29 AM, frank.ch...@sifive.com wrote:
> +static void gen_pack(TCGv ret, TCGv arg1, TCGv arg2)
> +{
> +TCGv lower, higher;
> +lower = tcg_temp_new();
> +higher = tcg_temp_new();
> +
> +#ifdef TARGET_RISCV64
> +tcg_gen_ext32u_tl(lower, arg1);
> +tcg_gen_shli_tl(higher, arg2, 32);
> +#else
> +tcg_gen_ext16u_tl(lower, arg1);
> +tcg_gen_shli_tl(higher, arg2, 16);
> +#endif
> +

tcg_gen_deposit(ret, arg1, arg2,
TARGET_LONG_BITS / 2,
TARGET_LONG_BITS / 2);

> +static void gen_packu(TCGv ret, TCGv arg1, TCGv arg2)
> +{
> +TCGv lower, higher;
> +lower = tcg_temp_new();
> +higher = tcg_temp_new();
> +
> +#ifdef TARGET_RISCV64
> +tcg_gen_shri_tl(lower, arg1, 32);
> +tcg_gen_shri_tl(higher, arg2, 32);
> +tcg_gen_shli_tl(higher, higher, 32);
> +#else
> +tcg_gen_shri_tl(lower, arg1, 16);
> +tcg_gen_shri_tl(higher, arg2, 16);
> +tcg_gen_shli_tl(higher, higher, 16);
> +#endif
> +
> +tcg_gen_or_tl(ret, higher, lower);

tcg_gen_shri_tl(t, arg1, TARGET_LONG_BITS / 2);
tcg_gen_deposit_tl(ret, arg2, t, 0, TARGET_LONG_BITS / 2);

> +static void gen_packh(TCGv ret, TCGv arg1, TCGv arg2)
> +{
> +TCGv lower, higher;
> +lower = tcg_temp_new();
> +higher = tcg_temp_new();
> +
> +tcg_gen_ext8u_tl(lower, arg1);
> +tcg_gen_ext8u_tl(higher, arg2);
> +tcg_gen_shli_tl(higher, higher, 8);
> +
> +tcg_gen_or_tl(ret, higher, lower);

tcg_gen_ext8u_tl(t, arg2);
tcg_gen_deposit_tl(ret, arg1, t, 8, TARGET_LONG_BITS - 8);

> +static void gen_packw(TCGv ret, TCGv arg1, TCGv arg2)
> +{
> +TCGv lower, higher;
> +lower = tcg_temp_new();
> +higher = tcg_temp_new();
> +
> +tcg_gen_ext16u_tl(lower, arg1);
> +tcg_gen_shli_tl(higher, arg2, 16);
> +tcg_gen_or_tl(ret, higher, lower);
> +
> +tcg_gen_ext32s_tl(ret, ret);
> +
> +tcg_temp_free(lower);
> +tcg_temp_free(higher);
> +}

tcg_gen_ext16s_i64(t, arg2);
tcg_gen_deposit_i64(ret, arg1, t, 16, 48);

> +static void gen_packuw(TCGv ret, TCGv arg1, TCGv arg2)
> +{
> +TCGv lower, higher;
> +lower = tcg_temp_new();
> +higher = tcg_temp_new();
> +
> +tcg_gen_shri_tl(lower, arg1, 16);
> +tcg_gen_shri_tl(higher, arg2, 16);
> +tcg_gen_shli_tl(higher, higher, 16);
> +tcg_gen_or_tl(ret, higher, lower);
> +
> +tcg_gen_ext32s_tl(ret, ret);

tcg_gen_shri_i64(t, arg1, 16);
tcg_gen_deposit_i64(ret, arg2, t, 0, 16);
tcg_gen_ext32s_i64(ret, ret);


r~



[RFC 05/15] target/riscv: rvb: pack two words into one register

2020-11-18 Thread frank . chang
From: Kito Cheng 

Signed-off-by: Kito Cheng 
---
 target/riscv/insn32-64.decode   |  3 +
 target/riscv/insn32.decode  |  3 +
 target/riscv/insn_trans/trans_rvb.c.inc | 30 
 target/riscv/translate.c| 92 +
 4 files changed, 128 insertions(+)

diff --git a/target/riscv/insn32-64.decode b/target/riscv/insn32-64.decode
index d5bea5af273..2f00f96e36b 100644
--- a/target/riscv/insn32-64.decode
+++ b/target/riscv/insn32-64.decode
@@ -91,3 +91,6 @@ hsv_d 0110111  .   . 100 0 1110011 @r2_s
 clzw   0110 . 001 . 0011011 @r2
 ctzw   0111 . 001 . 0011011 @r2
 pcntw  01100010 . 001 . 0011011 @r2
+
+packw  100 .. 100 . 0111011 @r
+packuw 0100100 .. 100 . 0111011 @r
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 29a3d4c6ebc..79aa40f03dd 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -602,3 +602,6 @@ pcnt   01100010 . 001 . 0010011 @r2
 andn   010 .. 111 . 0110011 @r
 orn010 .. 110 . 0110011 @r
 xnor   010 .. 100 . 0110011 @r
+pack   100 .. 100 . 0110011 @r
+packu  0100100 .. 100 . 0110011 @r
+packh  100 .. 111 . 0110011 @r
diff --git a/target/riscv/insn_trans/trans_rvb.c.inc 
b/target/riscv/insn_trans/trans_rvb.c.inc
index be25431e990..9257373ae0b 100644
--- a/target/riscv/insn_trans/trans_rvb.c.inc
+++ b/target/riscv/insn_trans/trans_rvb.c.inc
@@ -53,6 +53,24 @@ static bool trans_xnor(DisasContext *ctx, arg_xnor *a)
 return gen_arith(ctx, a, _xnor);
 }
 
+static bool trans_pack(DisasContext *ctx, arg_pack *a)
+{
+REQUIRE_EXT(ctx, RVB);
+return gen_arith(ctx, a, _pack);
+}
+
+static bool trans_packu(DisasContext *ctx, arg_packu *a)
+{
+REQUIRE_EXT(ctx, RVB);
+return gen_arith(ctx, a, _packu);
+}
+
+static bool trans_packh(DisasContext *ctx, arg_packh *a)
+{
+REQUIRE_EXT(ctx, RVB);
+return gen_arith(ctx, a, _packh);
+}
+
 /* RV64-only instructions */
 #ifdef TARGET_RISCV64
 
@@ -74,4 +92,16 @@ static bool trans_pcntw(DisasContext *ctx, arg_pcntw *a)
 return gen_unary(ctx, a, _pcntw);
 }
 
+static bool trans_packw(DisasContext *ctx, arg_packw *a)
+{
+REQUIRE_EXT(ctx, RVB);
+return gen_arith(ctx, a, _packw);
+}
+
+static bool trans_packuw(DisasContext *ctx, arg_packuw *a)
+{
+REQUIRE_EXT(ctx, RVB);
+return gen_arith(ctx, a, _packuw);
+}
+
 #endif
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 254a9dca8c2..fb30ee83aa8 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -743,6 +743,65 @@ static void gen_xnor(TCGv ret, TCGv arg1, TCGv arg2)
 tcg_temp_free(t);
 }
 
+static void gen_pack(TCGv ret, TCGv arg1, TCGv arg2)
+{
+TCGv lower, higher;
+lower = tcg_temp_new();
+higher = tcg_temp_new();
+
+#ifdef TARGET_RISCV64
+tcg_gen_ext32u_tl(lower, arg1);
+tcg_gen_shli_tl(higher, arg2, 32);
+#else
+tcg_gen_ext16u_tl(lower, arg1);
+tcg_gen_shli_tl(higher, arg2, 16);
+#endif
+
+tcg_gen_or_tl(ret, higher, lower);
+
+tcg_temp_free(lower);
+tcg_temp_free(higher);
+}
+
+static void gen_packu(TCGv ret, TCGv arg1, TCGv arg2)
+{
+TCGv lower, higher;
+lower = tcg_temp_new();
+higher = tcg_temp_new();
+
+#ifdef TARGET_RISCV64
+tcg_gen_shri_tl(lower, arg1, 32);
+tcg_gen_shri_tl(higher, arg2, 32);
+tcg_gen_shli_tl(higher, higher, 32);
+#else
+tcg_gen_shri_tl(lower, arg1, 16);
+tcg_gen_shri_tl(higher, arg2, 16);
+tcg_gen_shli_tl(higher, higher, 16);
+#endif
+
+tcg_gen_or_tl(ret, higher, lower);
+
+tcg_temp_free(lower);
+tcg_temp_free(higher);
+}
+
+static void gen_packh(TCGv ret, TCGv arg1, TCGv arg2)
+{
+TCGv lower, higher;
+lower = tcg_temp_new();
+higher = tcg_temp_new();
+
+tcg_gen_ext8u_tl(lower, arg1);
+tcg_gen_ext8u_tl(higher, arg2);
+tcg_gen_shli_tl(higher, higher, 8);
+
+tcg_gen_or_tl(ret, higher, lower);
+
+tcg_temp_free(lower);
+tcg_temp_free(higher);
+}
+
+
 #ifdef TARGET_RISCV64
 
 static bool gen_cxzw(DisasContext *ctx, arg_r2 *a,
@@ -775,6 +834,39 @@ static void gen_pcntw(TCGv ret, TCGv arg1)
 tcg_gen_ctpop_tl(ret, arg1);
 }
 
+static void gen_packw(TCGv ret, TCGv arg1, TCGv arg2)
+{
+TCGv lower, higher;
+lower = tcg_temp_new();
+higher = tcg_temp_new();
+
+tcg_gen_ext16u_tl(lower, arg1);
+tcg_gen_shli_tl(higher, arg2, 16);
+tcg_gen_or_tl(ret, higher, lower);
+
+tcg_gen_ext32s_tl(ret, ret);
+
+tcg_temp_free(lower);
+tcg_temp_free(higher);
+}
+
+static void gen_packuw(TCGv ret, TCGv arg1, TCGv arg2)
+{
+TCGv lower, higher;
+lower = tcg_temp_new();
+higher = tcg_temp_new();
+
+tcg_gen_shri_tl(lower, arg1, 16);
+tcg_gen_shri_tl(higher, arg2, 16);
+tcg_gen_shli_tl(higher, higher, 16);
+