On 3/27/23 20:05, Song Gao wrote:
+static void gen_vaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1, t2; + + t1 = tcg_temp_new_i32(); + t2 = tcg_temp_new_i32(); + tcg_gen_shli_i32(t1, a, 16); + tcg_gen_sari_i32(t1, t1, 16); + tcg_gen_shli_i32(t2, b, 16); + tcg_gen_sari_i32(t2, t2, 16); + tcg_gen_add_i32(t, t1, t2); +} + +static void gen_vaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1, t2; + + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + tcg_gen_shli_i64(t1, a, 32); + tcg_gen_sari_i64(t1, t1, 32); + tcg_gen_shli_i64(t2, b, 32); + tcg_gen_sari_i64(t2, t2, 32); + tcg_gen_add_i64(t, t1, t2); +}
For integer code like this, use tcg_gen_ext16s_i32/tcg_gen_ext32s_i64.
+static void gen_vaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2; + + int halfbits = 4 << vece; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + + /* Zero-extend the even elements from a */ + tcg_gen_shli_vec(vece, t1, a, halfbits); + tcg_gen_shri_vec(vece, t1, t1, halfbits); + + /* Zero-extend the even elements from b */ + tcg_gen_shli_vec(vece, t2, b, halfbits); + tcg_gen_shri_vec(vece, t2, t2, halfbits); + + tcg_gen_add_vec(vece, t, t1, t2); +}
uint64_t mask = MAKE_64BIT_MASK(0, halfbits); tcg_gen_andi_vec(vece, t1, a, mask);
+static void gen_vaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1, t2; + + t1 = tcg_temp_new_i32(); + t2 = tcg_temp_new_i32(); + tcg_gen_shli_i32(t1, a, 16); + tcg_gen_shri_i32(t1, t1, 16); + tcg_gen_shli_i32(t2, b, 16); + tcg_gen_shri_i32(t2, t2, 16); + tcg_gen_add_i32(t, t1, t2); +}
tcg_gen_ext16u_i32. r~