> That is incorrect. The Arm ARM states: > > "VFPv3 can be implemented with either thirty-two or sixteen doubleword > registers" > > "VFPv4 can be implemented with either thirty-two or sixteen doubleword > registers" > > The baseline for OpenBSD/armv7 assumes neon (which SAMA5D3 lacks) which > implies d32. SAMA5D2 and SAMA5D4 are still Cortex A5 but have neon.
Understood. My interpretation of http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0472h/CJADDCIF.html is that any armv7 processor that has VFP support but not NEON support is a D16 variant. With that in mind, please see the below modified patch. I've tested it on my Beaglebone Black and my SAMA5D3. Index: sys/arch/arm/arm/vfp.c =================================================================== RCS file: /cvs/src/sys/arch/arm/arm/vfp.c,v retrieving revision 1.3 diff -u -p -r1.3 vfp.c --- sys/arch/arm/arm/vfp.c 24 Jan 2019 13:19:19 -0000 1.3 +++ sys/arch/arm/arm/vfp.c 9 Mar 2019 07:14:59 -0000 @@ -43,9 +43,32 @@ get_vfp_fpexc(void) return val; } +static inline uint32_t +get_vfp_fpsid(void) +{ + uint32_t val; + __asm __volatile( + ".fpu vfpv3\n" + "vmrs %0, fpsid" : "=r" (val)); + return val; +} + +static inline uint32_t +get_vfp_mvfr1(void) +{ + uint32_t val; + __asm __volatile( + ".fpu vfpv3\n" + "vmrs %0, mvfr1" : "=r" (val)); + return val; +} + int vfp_fault(unsigned int, unsigned int, trapframe_t *, int); void vfp_load(struct proc *p); void vfp_store(struct fpreg *vfpsave); +static void vfp_check_for_d16(void); + +uint8_t vfp_is_d16 = 0; void vfp_init(void) @@ -59,6 +82,26 @@ vfp_init(void) val |= COPROC10 | COPROC11; __asm volatile("mcr p15, 0, %0, c1, c0, 2" :: "r" (val)); __asm volatile("isb"); + + vfp_check_for_d16(); +} + +static void +vfp_check_for_d16(void) +{ + uint32_t fpsid = get_vfp_fpsid(); + uint32_t mvfr1 = get_vfp_mvfr1(); + uint8_t subarch = (fpsid & VFPSID_SUBVERSION3_MASK) >> + VFPSID_SUBVERSION_OFF; + uint8_t neon_load_store_support = (mvfr1 & VMVFR1_LS_MASK) >> + VMVFR1_LS_OFF; + + /* + * If there is VFPv3 or v4 support but no NEON, then + * it is a D16 variant that only supports 16 registers. + */ + if (subarch == 2 && neon_load_store_support == 0) + vfp_is_d16 = 1; } void @@ -67,13 +110,22 @@ vfp_store(struct fpreg *vfpsave) uint32_t scratch; if (get_vfp_fpexc() & VFPEXC_EN) { - __asm __volatile( - ".fpu vfpv3\n" - "vstmia %1!, {d0-d15}\n" /* d0-d15 */ - "vstmia %1!, {d16-d31}\n" /* d16-d31 */ - "vmrs %0, fpscr\n" - "str %0, [%1]\n" /* save vfpscr */ - : "=&r" (scratch) : "r" (vfpsave)); + if (vfp_is_d16) { + __asm __volatile( + ".fpu vfpv3\n" + "vstmia %1!, {d0-d15}\n" /* d0-d15 */ + "vmrs %0, fpscr\n" + "str %0, [%1]\n" /* save vfpscr */ + : "=&r" (scratch) : "r" (vfpsave)); + } else { + __asm __volatile( + ".fpu vfpv3\n" + "vstmia %1!, {d0-d15}\n" /* d0-d15 */ + "vstmia %1!, {d16-d31}\n" /* d16-d31 */ + "vmrs %0, fpscr\n" + "str %0, [%1]\n" /* save vfpscr */ + : "=&r" (scratch) : "r" (vfpsave)); + } } /* disable FPU */ @@ -150,13 +202,22 @@ vfp_load(struct proc *p) /* enable to be able to load ctx */ set_vfp_fpexc(VFPEXC_EN); - __asm __volatile( - ".fpu vfpv3\n" - "vldmia %1!, {d0-d15}\n" /* d0-d15 */ - "vldmia %1!, {d16-d31}\n" /* d16-d31 */ - "ldr %0, [%1]\n" /* set old vfpscr */ - "vmsr fpscr, %0\n" - : "=&r" (scratch) : "r" (&pcb->pcb_fpstate)); + if (vfp_is_d16) { + __asm __volatile( + ".fpu vfpv3\n" + "vldmia %1!, {d0-d15}\n" /* d0-d15 */ + "ldr %0, [%1]\n" /* set old vfpscr */ + "vmsr fpscr, %0\n" + : "=&r" (scratch) : "r" (&pcb->pcb_fpstate)); + } else { + __asm __volatile( + ".fpu vfpv3\n" + "vldmia %1!, {d0-d15}\n" /* d0-d15 */ + "vldmia %1!, {d16-d31}\n" /* d16-d31 */ + "ldr %0, [%1]\n" /* set old vfpscr */ + "vmsr fpscr, %0\n" + : "=&r" (scratch) : "r" (&pcb->pcb_fpstate)); + } ci->ci_fpuproc = p; pcb->pcb_fpcpu = ci;
