On 6/7/21 9:58 AM, Peter Maydell wrote:
+#define DO_VADC(OP, INV) \ + uint32_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \ + void *vn, void *vm, uint32_t nzcv) \ + { \ + uint32_t *d = vd, *n = vn, *m = vm; \ + uint16_t mask = mve_element_mask(env); \ + unsigned e; \ + int carry = (nzcv & FPCR_C) ? 1 : 0; \ + /* If we do no additions at all the flags are preserved */ \ + bool updates_flags = (mask & 0x1111) != 0; \ + for (e = 0; e < 16 / 4; e++, mask >>= 4) { \ + uint64_t r = (uint64_t)n[H4(e)] + INV(m[H4(e)]) + carry; \ + if (mask & 1) { \ + carry = r >> 32; \ + } \ + uint64_t bytemask = mask_to_bytemask4(mask); \ + d[H4(e)] &= ~bytemask; \ + d[H4(e)] |= (r & bytemask); \ + } \ + mve_advance_vpt(env); \ + if (updates_flags) { \ + nzcv = carry ? FPCR_C : 0; \ + } \ + return nzcv; \ + }
...
+ /* + * This insn is subject to beat-wise execution. Partial execution + * of an I=1 (initial carry input fixed) insn which does not + * execute the first beat must start with the current FPSCR.NZCV + * value, not the fixed constant input. + */ + if (a->i && !mve_skip_first_beat(s)) { + /* Carry input is 0 (VADCI) or 1 (VSBCI), NZV zeroed */ + nzcv = tcg_const_i32(fixed_carry); + } else { + /* Carry input from existing NZCV flag values */ + nzcv = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]); + tcg_gen_andi_i32(nzcv, nzcv, FPCR_NZCV_MASK); + } + qd = mve_qreg_ptr(a->qd); + qn = mve_qreg_ptr(a->qn); + qm = mve_qreg_ptr(a->qm); + fn(nzcv, cpu_env, qd, qn, qm, nzcv); + fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]); + tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK); + tcg_gen_or_i32(fpscr, fpscr, nzcv); + store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]);
Hmm. It seems like you're having to work extra hard in tcg to extract and store nzcv.
How about four helper functions instead of 2. E.g. static void do_vadc(CPUARMState *env, uint32_t *d, uint32_t *n, uint32_t *m, uint32_t inv, uint32_t carry_in, bool update_flags) { uint16_t mask = mve_element_mask(env); unsigned e; /* If any additions trigger, we will update flags. */ if (mask & 0x1111) { update_flags = true; } for (e = 0; e < 16 / 4; e++, mask >>= 4) { uint32_t bmask = mask_to_bytemask4(mask); uint64_t r = carry_in; r += n[H4(e)]; r += m[H4(e)] ^ inv; if (mask & 1) { carry_in = r >> 32; } d[H4(e)] = (d[H4(e)] & ~bmask) | ((uint32_t)r & bmask); } if (update_flags) { /* Store C, clear NZV. */ env->vfp.xregs[ARM_VFP_FPSCR] &= ~FPCR_NZCV_MASK; env->vfp.xregs[ARM_VFP_FPSCR] |= carry_in * FPCR_C; } mve_advance_vpt(env); } void HELPER(mve_vadc)(CPUARMState *env, void *vd, void *vn, void *vm) { bool carry_in = env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_C; do_vadc(env, vd, vn, vm, 0, carry_in, false); } void HELPER(mve_vsbc)(CPUARMState *env, void *vd, void *vn, void *vm) { bool carry_in = env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_C; do_vadc(env, vd, vn, vm, -1, carry_in, false); } void HELPER(mve_vadci)(CPUARMState *env, void *vd, void *vn, void *vm) { do_vadc(env, vd, vn, vm, 0, 0, true); } void HELPER(mve_vsbci)(CPUARMState *env, void *vd, void *vn, void *vm) { do_vadc(env, vd, vn, vm, -1, 1, true); } r~