Re: [Qemu-devel] [PATCH v3 14/31] arm/translate-a64: add FP16 FMULX/MLS/FMLA to simd_indexed

2018-02-23 Thread Richard Henderson
On 02/23/2018 07:36 AM, Alex Bennée wrote:
>  case 0x9: /* FMUL, FMULX */
> -if (!extract32(size, 1, 1)) {
> +if (size == 1 ||
> +(size < 2 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) {
>  unallocated_encoding(s);

You get to drop the check here...

> +case 0: /* half precision */
> +size = MO_16;
> +index = h << 2 | l << 1 | m;
> +is_fp16 = true;
> +if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
> +break;
> +}

... because you added it here instead.


r~



[Qemu-devel] [PATCH v3 14/31] arm/translate-a64: add FP16 FMULX/MLS/FMLA to simd_indexed

2018-02-23 Thread Alex Bennée
The helpers use the new re-factored muladd support in SoftFloat for
the float16 work.

Signed-off-by: Alex Bennée 

---
v3
  - re-jigged switch statement to fall-through for unalloc
  - added is_fp16 bool for fpst
  - fixed up some long lines
---
 target/arm/translate-a64.c | 83 +-
 1 file changed, 67 insertions(+), 16 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index e96e6cdd15..43bff5cd09 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -11198,6 +11198,7 @@ static void disas_simd_indexed(DisasContext *s, 
uint32_t insn)
 int rd = extract32(insn, 0, 5);
 bool is_long = false;
 bool is_fp = false;
+bool is_fp16 = false;
 int index;
 TCGv_ptr fpst;
 
@@ -11244,7 +11245,8 @@ static void disas_simd_indexed(DisasContext *s, 
uint32_t insn)
 }
 /* fall through */
 case 0x9: /* FMUL, FMULX */
-if (!extract32(size, 1, 1)) {
+if (size == 1 ||
+(size < 2 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) {
 unallocated_encoding(s);
 return;
 }
@@ -11256,18 +11258,34 @@ static void disas_simd_indexed(DisasContext *s, 
uint32_t insn)
 }
 
 if (is_fp) {
-/* low bit of size indicates single/double */
-size = extract32(size, 0, 1) ? 3 : 2;
-if (size == 2) {
+/* convert insn encoded size to TCGMemOp size */
+switch (size) {
+case 2: /* single precision */
+size = MO_32;
 index = h << 1 | l;
-} else {
+rm |= (m << 4);
+break;
+case 3: /* double precision */
+size = MO_64;
 if (l || !is_q) {
 unallocated_encoding(s);
 return;
 }
 index = h;
+rm |= (m << 4);
+break;
+case 0: /* half precision */
+size = MO_16;
+index = h << 2 | l << 1 | m;
+is_fp16 = true;
+if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+break;
+}
+/* fallthru */
+default: /* unallocated */
+unallocated_encoding(s);
+return;
 }
-rm |= (m << 4);
 } else {
 switch (size) {
 case 1:
@@ -11288,7 +11306,7 @@ static void disas_simd_indexed(DisasContext *s, 
uint32_t insn)
 }
 
 if (is_fp) {
-fpst = get_fpstatus_ptr(false);
+fpst = get_fpstatus_ptr(is_fp16);
 } else {
 fpst = NULL;
 }
@@ -11390,18 +11408,51 @@ static void disas_simd_indexed(DisasContext *s, 
uint32_t insn)
 break;
 }
 case 0x5: /* FMLS */
-/* As usual for ARM, separate negation for fused multiply-add 
*/
-gen_helper_vfp_negs(tcg_op, tcg_op);
-/* fall through */
 case 0x1: /* FMLA */
-read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
-gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, 
fpst);
+read_vec_element_i32(s, tcg_res, rd, pass,
+ is_scalar ? size : MO_32);
+switch (size) {
+case 1:
+if (opcode == 0x5) {
+/* As usual for ARM, separate negation for fused
+ * multiply-add */
+tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
+}
+gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
+   tcg_res, fpst);
+break;
+case 2:
+if (opcode == 0x5) {
+/* As usual for ARM, separate negation for
+ * fused multiply-add */
+tcg_gen_xori_i32(tcg_op, tcg_op, 0x8000);
+}
+gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
+   tcg_res, fpst);
+break;
+default:
+g_assert_not_reached();
+}
 break;
 case 0x9: /* FMUL, FMULX */
-if (u) {
-gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
-} else {
-gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
+switch (size) {
+case 1:
+if (u) {
+gen_helper_advsimd_mulxh(tcg_res, tcg_op, tcg_idx,
+ fpst);
+} else {
+g_assert_not_reached();
+}
+break;
+case 2:
+if (u) {
+gen_helper_vfp_mulxs(tcg_res,