On 5/12/24 11:38, Chinmay Rath wrote:
@@ -2934,6 +2870,184 @@ static bool do_vx_vaddsubcuw(DisasContext *ctx, arg_VX 
*a, int add)
      return true;
  }
+static inline void do_vadd_vsub_sat
+(
+    unsigned vece, TCGv_vec t, TCGv_vec sat, TCGv_vec a, TCGv_vec b,
+    void (*norm_op)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec),
+    void (*sat_op)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
+{
+    TCGv_vec x = tcg_temp_new_vec_matching(t);
+    norm_op(vece, x, a, b);
+    sat_op(vece, t, a, b);
+    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+    tcg_gen_or_vec(vece, sat, sat, x);
+}

As a separate change, before or after, the cmp_vec may be simplified to xor_vec. Which means that INDEX_op_cmp_vec need not be probed in the vecop_lists. See

https://lore.kernel.org/qemu-devel/20240506010403.6204-31-richard.hender...@linaro.org/

which is performing the same operation on AArch64.


+static bool do_vx_vadd_vsub_sat(DisasContext *ctx, arg_VX *a,
+                                int sign, int vece, int add)
+{
+    static const TCGOpcode vecop_list_sub_u[] = {
+        INDEX_op_sub_vec, INDEX_op_ussub_vec, INDEX_op_cmp_vec, 0
+    };
+    static const TCGOpcode vecop_list_sub_s[] = {
+        INDEX_op_sub_vec, INDEX_op_sssub_vec, INDEX_op_cmp_vec, 0
+    };
+    static const TCGOpcode vecop_list_add_u[] = {
+        INDEX_op_add_vec, INDEX_op_usadd_vec, INDEX_op_cmp_vec, 0
+    };
+    static const TCGOpcode vecop_list_add_s[] = {
+        INDEX_op_add_vec, INDEX_op_ssadd_vec, INDEX_op_cmp_vec, 0
+    };
+
+    static const GVecGen4 op[2][3][2] = {
+        {
+            {
+                {
+                    .fniv = gen_vsub_sat_u,
+                    .fno = gen_helper_VSUBUBS,
+                    .opt_opc = vecop_list_sub_u,
+                    .write_aofs = true,
+                    .vece = MO_8
+                },
+                {
+                    .fniv = gen_vadd_sat_u,
+                    .fno = gen_helper_VADDUBS,
+                    .opt_opc = vecop_list_add_u,
+                    .write_aofs = true,
+                    .vece = MO_8
+                },
+            },
+            {
+                {
+                    .fniv = gen_vsub_sat_u,
+                    .fno = gen_helper_VSUBUHS,
+                    .opt_opc = vecop_list_sub_u,
+                    .write_aofs = true,
+                    .vece = MO_16
+                },
+                {
+                    .fniv = gen_vadd_sat_u,
+                    .fno = gen_helper_VADDUHS,
+                    .opt_opc = vecop_list_add_u,
+                    .write_aofs = true,
+                    .vece = MO_16
+                },
+            },
+            {
+                {
+                    .fniv = gen_vsub_sat_u,
+                    .fno = gen_helper_VSUBUWS,
+                    .opt_opc = vecop_list_sub_u,
+                    .write_aofs = true,
+                    .vece = MO_32
+                },
+                {
+                    .fniv = gen_vadd_sat_u,
+                    .fno = gen_helper_VADDUWS,
+                    .opt_opc = vecop_list_add_u,
+                    .write_aofs = true,
+                    .vece = MO_32
+                },
+            },
+        },
+        {
+            {
+                {
+                    .fniv = gen_vsub_sat_s,
+                    .fno = gen_helper_VSUBSBS,
+                    .opt_opc = vecop_list_sub_s,
+                    .write_aofs = true,
+                    .vece = MO_8
+                },
+                {
+                    .fniv = gen_vadd_sat_s,
+                    .fno = gen_helper_VADDSBS,
+                    .opt_opc = vecop_list_add_s,
+                    .write_aofs = true,
+                    .vece = MO_8
+                },
+            },
+            {
+                {
+                    .fniv = gen_vsub_sat_s,
+                    .fno = gen_helper_VSUBSHS,
+                    .opt_opc = vecop_list_sub_s,
+                    .write_aofs = true,
+                    .vece = MO_16
+                },
+                {
+                    .fniv = gen_vadd_sat_s,
+                    .fno = gen_helper_VADDSHS,
+                    .opt_opc = vecop_list_add_s,
+                    .write_aofs = true,
+                    .vece = MO_16
+                },
+            },
+            {
+                {
+                    .fniv = gen_vsub_sat_s,
+                    .fno = gen_helper_VSUBSWS,
+                    .opt_opc = vecop_list_sub_s,
+                    .write_aofs = true,
+                    .vece = MO_32
+                },
+                {
+                    .fniv = gen_vadd_sat_s,
+                    .fno = gen_helper_VADDSWS,
+                    .opt_opc = vecop_list_add_s,
+                    .write_aofs = true,
+                    .vece = MO_32
+                },
+            },
+        },
+    };

While this table is not wrong, I think it is clearer to have separate tables, one per operation, which are then passed in to a common expander.

+
+    REQUIRE_INSNS_FLAGS(ctx, ALTIVEC);
+    REQUIRE_VECTOR(ctx);
+
+    tcg_gen_gvec_4(avr_full_offset(a->vrt), offsetof(CPUPPCState, vscr_sat),
+                   avr_full_offset(a->vra), avr_full_offset(a->vrb), 16, 16,
+                   &op[sign][vece][add]);
+
+    return true;
+}
+
+TRANS(VSUBUBS, do_vx_vadd_vsub_sat, 0, MO_8, 0)

I think it is clearer to use TRANS_FLAGS than to sink the ISA check into the helper. In general I seem to find the helper later gets reused for something else with a different ISA check.

Thus

static const TCGOpcode vecop_list_vsub_sat_u[] = {
    INDEX_op_sub_vec, INDEX_op_ussub_vec, 0
};
static const GVecGen4 op_vsububs = {
    .fno = gen_helper_VSUBUBS,
    .fniv = gen_vsub_sat_u,
    .opt_opc = vecop_list_vsub_sat_u,
    .write_aofs = true,
    .vece = MO_8
};
TRANS_FLAGS(VSUBUBS, do_vx_vadd_vsub_sat, &op_vsububs)

static const GVecGen4 op_vsubuhs = {
    .fno = gen_helper_VSUBUHS,
    .fniv = gen_vsub_sat_u,
    .opt_opc = vecop_list_vsub_sat_u,
    .write_aofs = true,
    .vece = MO_16
};
TRANS_FLAGS(VSUBUHS, do_vx_vadd_vsub_sat, &op_vsubuhs)

etc.

-GEN_VXFORM_DUAL(vaddubs, vmul10uq, 0, 8, PPC_ALTIVEC, PPC_NONE),

You are correct in your cover letter that this is not right.
We should have been testing ISA300 for vmul10uq here.

+GEN_VXFORM(vmul10euq, 0, 9),

And thus need GEN_VXFORM_300 here.

+GEN_VXFORM(vmul10euq, 0, 9),
+GEN_VXFORM(bcdcpsgn, 0, 13),
+GEN_VXFORM(bcdadd, 0, 24),
+GEN_VXFORM(bcdsub, 0, 25),
...
+GEN_VXFORM(xpnd04_2, 0, 30),

None of these are in the base ISA, so all need a flag check.



r~

Reply via email to