On Fri, Mar 08, 2019 at 10:48:11PM -0500, Greg Czerniak wrote:
> This patch adds support for floating point operations on the ARM
> Cortex-A5. The Cortex-A5 uses VFPv4, which differs from the (mostly)
> standard VFPv3 in that VFPv4 has 16 FPU registers instead of 32 on
> VFPv3.
That is incorrect. The Arm ARM states:
"VFPv3 can be implemented with either thirty-two or sixteen doubleword
registers"
"VFPv4 can be implemented with either thirty-two or sixteen doubleword
registers"
The baseline for OpenBSD/armv7 assumes neon (which SAMA5D3 lacks) which
implies d32. SAMA5D2 and SAMA5D4 are still Cortex A5 but have neon.
>
> I've tested this on a BeagleBone Black (uses VFPv3) and the Atmel
> SAMA5D3 Xplained (uses VFPv4 on Cortex-A5), which I'm currently
> porting to OpenBSD.
>
>
> Index: sys/arch/arm/arm/vfp.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/arm/arm/vfp.c,v
> retrieving revision 1.3
> diff -u -p -r1.3 vfp.c
> --- sys/arch/arm/arm/vfp.c 24 Jan 2019 13:19:19 -0000 1.3
> +++ sys/arch/arm/arm/vfp.c 9 Mar 2019 03:39:13 -0000
> @@ -64,16 +64,27 @@ vfp_init(void)
> void
> vfp_store(struct fpreg *vfpsave)
> {
> + struct cpu_info *ci = curcpu();
> uint32_t scratch;
> + u_int cpuid = ci->ci_arm_cpuid;
>
> if (get_vfp_fpexc() & VFPEXC_EN) {
> - __asm __volatile(
> - ".fpu vfpv3\n"
> - "vstmia %1!, {d0-d15}\n" /* d0-d15 */
> - "vstmia %1!, {d16-d31}\n" /* d16-d31 */
> - "vmrs %0, fpscr\n"
> - "str %0, [%1]\n" /* save vfpscr */
> - : "=&r" (scratch) : "r" (vfpsave));
> + if ((cpuid & CPU_ID_CORTEX_MASK) == CPU_ID_CORTEX_A5) {
> + __asm __volatile(
> + ".fpu vfpv3\n"
> + "vstmia %1!, {d0-d15}\n" /* d0-d15 */
> + "vmrs %0, fpscr\n"
> + "str %0, [%1]\n" /* save vfpscr */
> + : "=&r" (scratch) : "r" (vfpsave));
> + } else {
> + __asm __volatile(
> + ".fpu vfpv3\n"
> + "vstmia %1!, {d0-d15}\n" /* d0-d15 */
> + "vstmia %1!, {d16-d31}\n" /* d16-d31 */
> + "vmrs %0, fpscr\n"
> + "str %0, [%1]\n" /* save vfpscr */
> + : "=&r" (scratch) : "r" (vfpsave));
> + }
> }
>
> /* disable FPU */
> @@ -137,6 +148,7 @@ vfp_load(struct proc *p)
> struct pcb *pcb = &p->p_addr->u_pcb;
> uint32_t scratch = 0;
> int psw;
> + u_int cpuid = ci->ci_arm_cpuid;
>
> /* do not allow a partially synced state here */
> psw = disable_interrupts(PSR_I|PSR_F);
> @@ -150,13 +162,22 @@ vfp_load(struct proc *p)
> /* enable to be able to load ctx */
> set_vfp_fpexc(VFPEXC_EN);
>
> - __asm __volatile(
> - ".fpu vfpv3\n"
> - "vldmia %1!, {d0-d15}\n" /* d0-d15 */
> - "vldmia %1!, {d16-d31}\n" /* d16-d31 */
> - "ldr %0, [%1]\n" /* set old vfpscr */
> - "vmsr fpscr, %0\n"
> - : "=&r" (scratch) : "r" (&pcb->pcb_fpstate));
> + if ((cpuid & CPU_ID_CORTEX_MASK) == CPU_ID_CORTEX_A5) {
> + __asm __volatile(
> + ".fpu vfpv3\n"
> + "vldmia %1!, {d0-d15}\n" /* d0-d15 */
> + "ldr %0, [%1]\n" /* set old vfpscr */
> + "vmsr fpscr, %0\n"
> + : "=&r" (scratch) : "r" (&pcb->pcb_fpstate));
> + } else {
> + __asm __volatile(
> + ".fpu vfpv3\n"
> + "vldmia %1!, {d0-d15}\n" /* d0-d15 */
> + "vldmia %1!, {d16-d31}\n" /* d16-d31 */
> + "ldr %0, [%1]\n" /* set old vfpscr */
> + "vmsr fpscr, %0\n"
> + : "=&r" (scratch) : "r" (&pcb->pcb_fpstate));
> + }
>
> ci->ci_fpuproc = p;
> pcb->pcb_fpcpu = ci;
>