On 18 December 2017 at 17:30, Richard Henderson <richard.hender...@linaro.org> wrote: > Change vfp.regs as a uint64_t to vfp.zregs as an ARMVectorReg. > The previous patches have made the change in representation > relatively painless. > > Add vfp.pregs as an ARMPredicateReg. Let FFR be P16 to make > it easier to treat it as for any other predicate. > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > --- > target/arm/cpu.h | 64 > ++++++++++++++++++++++++++++++++-------------- > target/arm/machine.c | 37 ++++++++++++++++++++++++++- > target/arm/translate-a64.c | 8 +++--- > target/arm/translate.c | 12 ++++----- > 4 files changed, 90 insertions(+), 31 deletions(-) > > diff --git a/target/arm/cpu.h b/target/arm/cpu.h > index e1a8e2880d..150b0d9d84 100644 > --- a/target/arm/cpu.h > +++ b/target/arm/cpu.h > @@ -153,6 +153,47 @@ typedef struct { > uint32_t base_mask; > } TCR; > > +/* Define a maximum sized vector register. > + * For 32-bit, this is a 128-bit NEON/AdvSIMD register. > + * For 64-bit, this is a 2048-bit SVE register. > + * > + * Note that the mapping between S, D, and Q views of the register bank > + * differs between AArch64 and AArch32. > + * In AArch32: > + * Qn = regs[n].d[1]:regs[n].d[0] > + * Dn = regs[n / 2].d[n & 1] > + * Sn = regs[n / 4].d[n % 4 / 2], > + * bits 31..0 for even n, and bits 63..32 for odd n > + * (and regs[16] to regs[31] are inaccessible) > + * In AArch64: > + * Zn = regs[n].d[*] > + * Qn = regs[n].d[1]:regs[n].d[0] > + * Dn = regs[n].d[0] > + * Sn = regs[n].d[0] bits 31..0 > + * > + * This corresponds to the architecturally defined mapping between > + * the two execution states, and means we do not need to explicitly > + * map these registers when changing states. > + */
The transformations on the data structures (ie the meat of this patch looks good). > + > +#ifdef TARGET_AARCH64 > +# define ARM_MAX_VQ 16 > +#else > +# define ARM_MAX_VQ 1 > +#endif > + > +typedef struct ARMVectorReg { > + uint64_t d[2 * ARM_MAX_VQ] QEMU_ALIGNED(16); > +} ARMVectorReg; > + > +typedef struct ARMPredicateReg { > +#ifdef TARGET_AARCH64 > + uint64_t p[2 * ARM_MAX_VQ / 8] QEMU_ALIGNED(16); > +#else > + uint64_t p[0]; > +#endif > +} ARMPredicateReg; I think introducing the predicate registers should go in a separate patch. > --- a/target/arm/translate.c > +++ b/target/arm/translate.c > @@ -1513,19 +1513,17 @@ static inline void gen_vfp_st(DisasContext *s, int > dp, TCGv_i32 addr) > } > } > > -static inline long > -vfp_reg_offset (int dp, int reg) > +static inline long vfp_reg_offset(bool dp, unsigned reg) > { > if (dp) { > - return offsetof(CPUARMState, vfp.regs[reg]); > + return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]); > } else { > - long ofs = offsetof(CPUARMState, vfp.regs[reg >> 1]); > + long r = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & > 1]); > if (reg & 1) { > - ofs += offsetof(CPU_DoubleU, l.upper); > + return r + offsetof(CPU_DoubleU, l.upper); > } else { > - ofs += offsetof(CPU_DoubleU, l.lower); > + return r + offsetof(CPU_DoubleU, l.lower); > } > - return ofs; ...I see we're tweaking the logic on this code again. I was expecting that the changes in the previous patch would have turned out to be in support of just having to do a one-line change in this one, but apparently not ? > } > } thanks -- PMM