Re: [Qemu-devel] [PATCH 8/8] target-arm: A64: Add SIMD shift by immediate

2014-01-21 Thread Richard Henderson
On 01/17/2014 10:44 AM, Peter Maydell wrote:
 +/* Common SHL/SLI - Shift left with an optional insert */
 +static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
 + bool insert, int shift)
 +{
 +tcg_gen_shli_i64(tcg_src, tcg_src, shift);
 +if (insert) {
 +/* SLI */
 +uint64_t mask = (1ULL  shift) - 1;
 +tcg_gen_andi_i64(tcg_res, tcg_res, mask);
 +tcg_gen_or_i64(tcg_res, tcg_res, tcg_src);

This is

  tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);

We do already special case such remaining-width deposits for hosts that don't
implement deposit, so we should get the exact same insn sequence for x86.

 +tcg_gen_mov_i64(tcg_res, tcg_src);

Which means for the else you can elide the move and just shift directly into
the result.


r~




[Qemu-devel] [PATCH 8/8] target-arm: A64: Add SIMD shift by immediate

2014-01-17 Thread Peter Maydell
From: Alex Bennée alex.ben...@linaro.org

This implements a subset of the AdvSIMD shift operations (namely all the
none saturating or narrowing ones). The actual shift generation code
itself is common for both the scalar and vector cases but wrapped with
either vector element iteration or the fp reg access.

The rounding operations need to take special care to correctly reflect
the result of adding rounding bits on high bits as the intermediates do
not truncate.

Signed-off-by: Alex Bennée alex.ben...@linaro.org
Signed-off-by: Peter Maydell peter.mayd...@linaro.org
---
 target-arm/translate-a64.c | 385 -
 1 file changed, 383 insertions(+), 2 deletions(-)

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 9980759..bfcce09 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -5479,15 +5479,224 @@ static void disas_simd_scalar_pairwise(DisasContext 
*s, uint32_t insn)
 unsupported_encoding(s, insn);
 }
 
+/*
+ * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
+ *
+ * This code is handles the common shifting code and is used by both
+ * the vector and scalar code.
+ */
+static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
+TCGv_i64 tcg_rnd, bool accumulate,
+bool is_u, int size, int shift)
+{
+bool extended_result = false;
+bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
+int ext_lshift = 0;
+TCGv_i64 tcg_src_hi;
+
+if (round  size == 3) {
+extended_result = true;
+ext_lshift = 64 - shift;
+tcg_src_hi = tcg_temp_new_i64();
+} else if (shift == 64) {
+if (!accumulate  is_u) {
+/* result is zero */
+tcg_gen_movi_i64(tcg_res, 0);
+return;
+}
+}
+
+/* Deal with the rounding step */
+if (round) {
+if (extended_result) {
+TCGv_i64 tcg_zero = tcg_const_i64(0);
+if (!is_u) {
+/* take care of sign extending tcg_res */
+tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
+tcg_gen_add2_i64(tcg_src, tcg_src_hi,
+ tcg_src, tcg_src_hi,
+ tcg_rnd, tcg_zero);
+} else {
+tcg_gen_add2_i64(tcg_src, tcg_src_hi,
+ tcg_src, tcg_zero,
+ tcg_rnd, tcg_zero);
+}
+tcg_temp_free_i64(tcg_zero);
+} else {
+tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
+}
+}
+
+/* Now do the shift right */
+if (round  extended_result) {
+/* extended case, 64 bit precision required */
+if (ext_lshift == 0) {
+/* special case, only high bits matter */
+tcg_gen_mov_i64(tcg_src, tcg_src_hi);
+} else {
+tcg_gen_shri_i64(tcg_src, tcg_src, shift);
+tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
+tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
+}
+} else {
+if (is_u) {
+if (shift == 64) {
+/* essentially shifting in 64 zeros */
+tcg_gen_movi_i64(tcg_src, 0);
+} else {
+tcg_gen_shri_i64(tcg_src, tcg_src, shift);
+}
+} else {
+if (shift == 64) {
+/* effectively extending the sign-bit */
+tcg_gen_sari_i64(tcg_src, tcg_src, 63);
+} else {
+tcg_gen_sari_i64(tcg_src, tcg_src, shift);
+}
+}
+}
+
+if (accumulate) {
+tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
+} else {
+tcg_gen_mov_i64(tcg_res, tcg_src);
+}
+
+if (extended_result) {
+tcg_temp_free(tcg_src_hi);
+}
+}
+
+/* Common SHL/SLI - Shift left with an optional insert */
+static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
+ bool insert, int shift)
+{
+tcg_gen_shli_i64(tcg_src, tcg_src, shift);
+if (insert) {
+/* SLI */
+uint64_t mask = (1ULL  shift) - 1;
+tcg_gen_andi_i64(tcg_res, tcg_res, mask);
+tcg_gen_or_i64(tcg_res, tcg_res, tcg_src);
+} else {
+tcg_gen_mov_i64(tcg_res, tcg_src);
+}
+}
+
+/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
+static void handle_scalar_simd_shri(DisasContext *s,
+bool is_u, int immh, int immb,
+int opcode, int rn, int rd)
+{
+const int size = 3;
+int immhb = immh  3 | immb;
+int shift = 2 * (8  size) - immhb;
+bool accumulate = false;
+bool round = false;
+TCGv_i64 tcg_rn;
+TCGv_i64 tcg_rd;
+TCGv_i64 tcg_round;
+
+if (!extract32(immh, 3, 1)) {
+unallocated_encoding(s);
+return;
+}
+
+switch