> Date: Sat, 9 Mar 2019 02:22:35 -0500
> From: Greg Czerniak <[email protected]>
> 
> > That is incorrect.  The Arm ARM states:
> > 
> > "VFPv3 can be implemented with either thirty-two or sixteen doubleword
> > registers"
> > 
> > "VFPv4 can be implemented with either thirty-two or sixteen doubleword
> > registers"
> > 
> > The baseline for OpenBSD/armv7 assumes neon (which SAMA5D3 lacks) which
> > implies d32.  SAMA5D2 and SAMA5D4 are still Cortex A5 but have neon.
> 
> Understood.
> 
> My interpretation of 
> http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0472h/CJADDCIF.html
> is that any armv7 processor that has VFP support but not NEON
> support is a D16 variant.  With that in mind, please see the below
> modified patch.  I've tested it on my Beaglebone Black and my
> SAMA5D3.

I think it's bits 0:3 in MVFR0 that you're after if you want to
distinguish between D16 and D32 implementations.

I'm not necessarily opposed to adding something like this to the
OpenBSD kernel.  However we quite delibrately chose VFPv3-D32 + NEON
as a baseline for OpenBSD on armv7 and compile userland with those
assumptions.  That means that userland (base and ports) will not run
on your board, and we don't really have the resources to support both
NEON and non-NEON armv7.

> Index: sys/arch/arm/arm/vfp.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/arm/arm/vfp.c,v
> retrieving revision 1.3
> diff -u -p -r1.3 vfp.c
> --- sys/arch/arm/arm/vfp.c    24 Jan 2019 13:19:19 -0000      1.3
> +++ sys/arch/arm/arm/vfp.c    9 Mar 2019 07:14:59 -0000
> @@ -43,9 +43,32 @@ get_vfp_fpexc(void)
>       return val;
>  }
>  
> +static inline uint32_t
> +get_vfp_fpsid(void)
> +{
> +     uint32_t val;
> +     __asm __volatile(
> +         ".fpu vfpv3\n"
> +         "vmrs %0, fpsid" : "=r" (val));
> +     return val;
> +}
> +
> +static inline uint32_t
> +get_vfp_mvfr1(void)
> +{
> +     uint32_t val;
> +     __asm __volatile(
> +         ".fpu vfpv3\n"
> +         "vmrs %0, mvfr1" : "=r" (val));
> +     return val;
> +}
> +
>  int vfp_fault(unsigned int, unsigned int, trapframe_t *, int);
>  void vfp_load(struct proc *p);
>  void vfp_store(struct fpreg *vfpsave);
> +static void vfp_check_for_d16(void);
> +
> +uint8_t vfp_is_d16 = 0;
>  
>  void
>  vfp_init(void)
> @@ -59,6 +82,26 @@ vfp_init(void)
>       val |= COPROC10 | COPROC11;
>       __asm volatile("mcr p15, 0, %0, c1, c0, 2" :: "r" (val));
>       __asm volatile("isb");
> +
> +     vfp_check_for_d16();
> +}
> +
> +static void
> +vfp_check_for_d16(void)
> +{
> +     uint32_t fpsid = get_vfp_fpsid();
> +     uint32_t mvfr1 = get_vfp_mvfr1();
> +     uint8_t subarch = (fpsid & VFPSID_SUBVERSION3_MASK) >>
> +                       VFPSID_SUBVERSION_OFF;
> +     uint8_t neon_load_store_support = (mvfr1 & VMVFR1_LS_MASK) >>
> +                                       VMVFR1_LS_OFF;
> +
> +     /*
> +      * If there is VFPv3 or v4 support but no NEON, then
> +      * it is a D16 variant that only supports 16 registers.
> +      */
> +     if (subarch == 2 && neon_load_store_support == 0)
> +             vfp_is_d16 = 1;
>  }
>  
>  void
> @@ -67,13 +110,22 @@ vfp_store(struct fpreg *vfpsave)
>       uint32_t scratch;
>  
>       if (get_vfp_fpexc() & VFPEXC_EN) {
> -             __asm __volatile(
> -                 ".fpu vfpv3\n"
> -                 "vstmia     %1!, {d0-d15}\n"        /* d0-d15 */
> -                 "vstmia     %1!, {d16-d31}\n"       /* d16-d31 */
> -                 "vmrs       %0, fpscr\n"
> -                 "str        %0, [%1]\n"             /* save vfpscr */
> -             : "=&r" (scratch) : "r" (vfpsave));
> +             if (vfp_is_d16) {
> +                     __asm __volatile(
> +                         ".fpu vfpv3\n"
> +                         "vstmia     %1!, {d0-d15}\n"        /* d0-d15 */
> +                         "vmrs       %0, fpscr\n"
> +                         "str        %0, [%1]\n"     /* save vfpscr */
> +                     : "=&r" (scratch) : "r" (vfpsave));
> +             } else {
> +                     __asm __volatile(
> +                         ".fpu vfpv3\n"
> +                         "vstmia     %1!, {d0-d15}\n"        /* d0-d15 */
> +                         "vstmia     %1!, {d16-d31}\n"       /* d16-d31 */
> +                         "vmrs       %0, fpscr\n"
> +                         "str        %0, [%1]\n"     /* save vfpscr */
> +                     : "=&r" (scratch) : "r" (vfpsave));
> +             }
>       }
>  
>       /* disable FPU */
> @@ -150,13 +202,22 @@ vfp_load(struct proc *p)
>       /* enable to be able to load ctx */
>       set_vfp_fpexc(VFPEXC_EN);
>  
> -     __asm __volatile(
> -         ".fpu vfpv3\n"
> -         "vldmia     %1!, {d0-d15}\n"                /* d0-d15 */
> -         "vldmia     %1!, {d16-d31}\n"               /* d16-d31 */
> -         "ldr        %0, [%1]\n"                     /* set old vfpscr */
> -         "vmsr       fpscr, %0\n"
> -         : "=&r" (scratch) : "r" (&pcb->pcb_fpstate));
> +     if (vfp_is_d16) {
> +             __asm __volatile(
> +                 ".fpu vfpv3\n"
> +                 "vldmia     %1!, {d0-d15}\n"        /* d0-d15 */
> +                 "ldr        %0, [%1]\n"             /* set old vfpscr */
> +                 "vmsr       fpscr, %0\n"
> +                 : "=&r" (scratch) : "r" (&pcb->pcb_fpstate));
> +     } else {
> +             __asm __volatile(
> +                 ".fpu vfpv3\n"
> +                 "vldmia     %1!, {d0-d15}\n"        /* d0-d15 */
> +                 "vldmia     %1!, {d16-d31}\n"       /* d16-d31 */
> +                 "ldr        %0, [%1]\n"             /* set old vfpscr */
> +                 "vmsr       fpscr, %0\n"
> +                 : "=&r" (scratch) : "r" (&pcb->pcb_fpstate));
> +     }
>  
>       ci->ci_fpuproc = p;
>       pcb->pcb_fpcpu = ci;
> 
> 

Reply via email to