> That is incorrect.  The Arm ARM states:
> 
> "VFPv3 can be implemented with either thirty-two or sixteen doubleword
> registers"
> 
> "VFPv4 can be implemented with either thirty-two or sixteen doubleword
> registers"
> 
> The baseline for OpenBSD/armv7 assumes neon (which SAMA5D3 lacks) which
> implies d32.  SAMA5D2 and SAMA5D4 are still Cortex A5 but have neon.

Understood.

My interpretation of 
http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0472h/CJADDCIF.html
is that any armv7 processor that has VFP support but not NEON
support is a D16 variant.  With that in mind, please see the below
modified patch.  I've tested it on my Beaglebone Black and my
SAMA5D3.

Index: sys/arch/arm/arm/vfp.c
===================================================================
RCS file: /cvs/src/sys/arch/arm/arm/vfp.c,v
retrieving revision 1.3
diff -u -p -r1.3 vfp.c
--- sys/arch/arm/arm/vfp.c      24 Jan 2019 13:19:19 -0000      1.3
+++ sys/arch/arm/arm/vfp.c      9 Mar 2019 07:14:59 -0000
@@ -43,9 +43,32 @@ get_vfp_fpexc(void)
        return val;
 }
 
+static inline uint32_t
+get_vfp_fpsid(void)
+{
+       uint32_t val;
+       __asm __volatile(
+           ".fpu vfpv3\n"
+           "vmrs %0, fpsid" : "=r" (val));
+       return val;
+}
+
+static inline uint32_t
+get_vfp_mvfr1(void)
+{
+       uint32_t val;
+       __asm __volatile(
+           ".fpu vfpv3\n"
+           "vmrs %0, mvfr1" : "=r" (val));
+       return val;
+}
+
 int vfp_fault(unsigned int, unsigned int, trapframe_t *, int);
 void vfp_load(struct proc *p);
 void vfp_store(struct fpreg *vfpsave);
+static void vfp_check_for_d16(void);
+
+uint8_t vfp_is_d16 = 0;
 
 void
 vfp_init(void)
@@ -59,6 +82,26 @@ vfp_init(void)
        val |= COPROC10 | COPROC11;
        __asm volatile("mcr p15, 0, %0, c1, c0, 2" :: "r" (val));
        __asm volatile("isb");
+
+       vfp_check_for_d16();
+}
+
+static void
+vfp_check_for_d16(void)
+{
+       uint32_t fpsid = get_vfp_fpsid();
+       uint32_t mvfr1 = get_vfp_mvfr1();
+       uint8_t subarch = (fpsid & VFPSID_SUBVERSION3_MASK) >>
+                         VFPSID_SUBVERSION_OFF;
+       uint8_t neon_load_store_support = (mvfr1 & VMVFR1_LS_MASK) >>
+                                         VMVFR1_LS_OFF;
+
+       /*
+        * If there is VFPv3 or v4 support but no NEON, then
+        * it is a D16 variant that only supports 16 registers.
+        */
+       if (subarch == 2 && neon_load_store_support == 0)
+               vfp_is_d16 = 1;
 }
 
 void
@@ -67,13 +110,22 @@ vfp_store(struct fpreg *vfpsave)
        uint32_t scratch;
 
        if (get_vfp_fpexc() & VFPEXC_EN) {
-               __asm __volatile(
-                   ".fpu vfpv3\n"
-                   "vstmia     %1!, {d0-d15}\n"        /* d0-d15 */
-                   "vstmia     %1!, {d16-d31}\n"       /* d16-d31 */
-                   "vmrs       %0, fpscr\n"
-                   "str        %0, [%1]\n"             /* save vfpscr */
-               : "=&r" (scratch) : "r" (vfpsave));
+               if (vfp_is_d16) {
+                       __asm __volatile(
+                           ".fpu vfpv3\n"
+                           "vstmia     %1!, {d0-d15}\n"        /* d0-d15 */
+                           "vmrs       %0, fpscr\n"
+                           "str        %0, [%1]\n"     /* save vfpscr */
+                       : "=&r" (scratch) : "r" (vfpsave));
+               } else {
+                       __asm __volatile(
+                           ".fpu vfpv3\n"
+                           "vstmia     %1!, {d0-d15}\n"        /* d0-d15 */
+                           "vstmia     %1!, {d16-d31}\n"       /* d16-d31 */
+                           "vmrs       %0, fpscr\n"
+                           "str        %0, [%1]\n"     /* save vfpscr */
+                       : "=&r" (scratch) : "r" (vfpsave));
+               }
        }
 
        /* disable FPU */
@@ -150,13 +202,22 @@ vfp_load(struct proc *p)
        /* enable to be able to load ctx */
        set_vfp_fpexc(VFPEXC_EN);
 
-       __asm __volatile(
-           ".fpu vfpv3\n"
-           "vldmia     %1!, {d0-d15}\n"                /* d0-d15 */
-           "vldmia     %1!, {d16-d31}\n"               /* d16-d31 */
-           "ldr        %0, [%1]\n"                     /* set old vfpscr */
-           "vmsr       fpscr, %0\n"
-           : "=&r" (scratch) : "r" (&pcb->pcb_fpstate));
+       if (vfp_is_d16) {
+               __asm __volatile(
+                   ".fpu vfpv3\n"
+                   "vldmia     %1!, {d0-d15}\n"        /* d0-d15 */
+                   "ldr        %0, [%1]\n"             /* set old vfpscr */
+                   "vmsr       fpscr, %0\n"
+                   : "=&r" (scratch) : "r" (&pcb->pcb_fpstate));
+       } else {
+               __asm __volatile(
+                   ".fpu vfpv3\n"
+                   "vldmia     %1!, {d0-d15}\n"        /* d0-d15 */
+                   "vldmia     %1!, {d16-d31}\n"       /* d16-d31 */
+                   "ldr        %0, [%1]\n"             /* set old vfpscr */
+                   "vmsr       fpscr, %0\n"
+                   : "=&r" (scratch) : "r" (&pcb->pcb_fpstate));
+       }
 
        ci->ci_fpuproc = p;
        pcb->pcb_fpcpu = ci;

Reply via email to