Module: xenomai-2.6
Branch: master
Commit: 192597326a0becd1980cb6c5cc9395af18a19c60
URL:    
http://git.xenomai.org/?p=xenomai-2.6.git;a=commit;h=192597326a0becd1980cb6c5cc9395af18a19c60

Author: Jan Kiszka <jan.kis...@siemens.com>
Date:   Tue Jan 29 18:46:13 2013 +0100

switchtest: Add SSE and AVX check

Add a test for switching the lower SSE registers xmm0..7 or AVX
registers ymm0..7, provided the CPU supports the corresponding
feature. As xmm and ymm share their storage, we only need to check
one of the features.

Signed-off-by: Jan Kiszka <jan.kis...@siemens.com>

---

 include/asm-arm/fptest.h              |    4 +
 include/asm-blackfin/fptest.h         |    4 +
 include/asm-nios2/fptest.h            |    4 +
 include/asm-powerpc/fptest.h          |    4 +
 include/asm-sh/fptest.h               |    4 +
 include/asm-x86/fptest.h              |  110 ++++++++++++++++++++++++++++++++-
 ksrc/drivers/testing/switchtest.c     |    2 +
 src/testsuite/switchtest/switchtest.c |    2 +
 8 files changed, 133 insertions(+), 1 deletions(-)

diff --git a/include/asm-arm/fptest.h b/include/asm-arm/fptest.h
index 924c226..be2bd15 100644
--- a/include/asm-arm/fptest.h
+++ b/include/asm-arm/fptest.h
@@ -54,6 +54,10 @@ static void __attribute__((constructor)) fp_init(void)
 
 #endif /* !__KERNEL__ */
 
+static inline void fp_features_init(void)
+{
+}
+
 static inline void fp_regs_set(unsigned val)
 {
        if (have_vfp) {
diff --git a/include/asm-blackfin/fptest.h b/include/asm-blackfin/fptest.h
index d14181d..afb11e1 100644
--- a/include/asm-blackfin/fptest.h
+++ b/include/asm-blackfin/fptest.h
@@ -23,6 +23,10 @@ static inline void fp_linux_end(void)
 #define printk printf
 #endif /* !__KERNEL__ */
 
+static inline void fp_features_init(void)
+{
+}
+
 static inline void fp_regs_set(unsigned val)
 {
 }
diff --git a/include/asm-nios2/fptest.h b/include/asm-nios2/fptest.h
index c3a2fe8..9e0d14c 100644
--- a/include/asm-nios2/fptest.h
+++ b/include/asm-nios2/fptest.h
@@ -23,6 +23,10 @@ static inline void fp_linux_end(void)
 #define printk printf
 #endif /* !__KERNEL__ */
 
+static inline void fp_features_init(void)
+{
+}
+
 static inline void fp_regs_set(unsigned val)
 {
 }
diff --git a/include/asm-powerpc/fptest.h b/include/asm-powerpc/fptest.h
index 46b97e0..ab56a8d 100644
--- a/include/asm-powerpc/fptest.h
+++ b/include/asm-powerpc/fptest.h
@@ -36,6 +36,10 @@ static inline void fp_linux_end(void)
 #define printk printf
 #endif /* !__KERNEL__ */
 
+static inline void fp_features_init(void)
+{
+}
+
 static inline void fp_regs_set(unsigned val)
 {
        uint64_t fpval = val;
diff --git a/include/asm-sh/fptest.h b/include/asm-sh/fptest.h
index b6200fd..99547be 100644
--- a/include/asm-sh/fptest.h
+++ b/include/asm-sh/fptest.h
@@ -23,6 +23,10 @@ static inline void fp_linux_end(void)
 #define printk printf
 #endif /* !__KERNEL__ */
 
+static inline void fp_features_init(void)
+{
+}
+
 static inline void fp_regs_set(unsigned val)
 {
 }
diff --git a/include/asm-x86/fptest.h b/include/asm-x86/fptest.h
index 88baa34..054844c 100644
--- a/include/asm-x86/fptest.h
+++ b/include/asm-x86/fptest.h
@@ -43,21 +43,104 @@ static inline void fp_linux_end(void)
 #define printk printf
 #endif /* !__KERNEL__ */
 
+#define FP_FEATURE_SSE                 0x01
+#define FP_FEATURE_AVX                 0x02
+
+#define FP_CPUID_SSE_MASK              (1 << 25)
+#define FP_CPUID_AVX_MASK              ((1 << 27) | (1 << 28))
+
+static unsigned long fp_features;
+
+static inline void fp_features_init(void)
+{
+       unsigned int eax, ebx, ecx, edx;
+
+       eax = 1;
+       __asm__ __volatile__("cpuid"
+               : "=a" (eax),
+                 "=b" (ebx),
+                 "=c" (ecx),
+                 "=d" (edx)
+               : "0" (eax)
+               : "memory");
+
+       if (edx & FP_CPUID_SSE_MASK)
+               fp_features |= FP_FEATURE_SSE;
+       /* check for both xsave and avx */
+       if ((ecx & FP_CPUID_AVX_MASK) == FP_CPUID_AVX_MASK)
+               fp_features |= FP_FEATURE_AVX;
+}
+
 static inline void fp_regs_set(unsigned val)
 {
+       uint64_t vec[4] = { val, 0, val, 0 };
        unsigned i;
 
        for (i = 0; i < 8; i++)
                __asm__ __volatile__("fildl %0": /* no output */ :"m"(val));
+       if (fp_features & FP_FEATURE_AVX)
+               __asm__ __volatile__(
+                       "vmovupd %0,%%ymm0;"
+                       "vmovupd %0,%%ymm1;"
+                       "vmovupd %0,%%ymm2;"
+                       "vmovupd %0,%%ymm3;"
+                       "vmovupd %0,%%ymm4;"
+                       "vmovupd %0,%%ymm5;"
+                       "vmovupd %0,%%ymm6;"
+                       "vmovupd %0,%%ymm7;"
+                       : : "m" (vec[0]));
+       else if (fp_features & FP_FEATURE_SSE)
+               __asm__ __volatile__(
+                       "movupd %0,%%xmm0;"
+                       "movupd %0,%%xmm1;"
+                       "movupd %0,%%xmm2;"
+                       "movupd %0,%%xmm3;"
+                       "movupd %0,%%xmm4;"
+                       "movupd %0,%%xmm5;"
+                       "movupd %0,%%xmm6;"
+                       "movupd %0,%%xmm7;"
+                       : : "m" (vec[0]));
 }
 
 static inline unsigned fp_regs_check(unsigned val)
 {
        unsigned i, result = val;
+       uint64_t vec[8][4];
        unsigned e[8];
 
        for (i = 0; i < 8; i++)
                __asm__ __volatile__("fistpl %0":"=m"(e[7 - i]));
+       if (fp_features & FP_FEATURE_AVX) {
+               __asm__ __volatile__(
+                       "vmovupd %%ymm0,%0;"
+                       "vmovupd %%ymm1,%1;"
+                       "vmovupd %%ymm2,%2;"
+                       "vmovupd %%ymm3,%3;"
+                       "vmovupd %%ymm4,%4;"
+                       "vmovupd %%ymm5,%5;"
+                       "vmovupd %%ymm6,%6;"
+                       "vmovupd %%ymm7,%7;"
+                       :
+                       : "m" (vec[0][0]), "m" (vec[1][0]),
+                         "m" (vec[2][0]), "m" (vec[3][0]),
+                         "m" (vec[4][0]), "m" (vec[5][0]),
+                         "m" (vec[6][0]), "m" (vec[7][0]));
+       } else if (fp_features & FP_FEATURE_SSE) {
+               __asm__ __volatile__(
+                       "movupd %%xmm0,%0;"
+                       "movupd %%xmm1,%1;"
+                       "movupd %%xmm2,%2;"
+                       "movupd %%xmm3,%3;"
+                       "movupd %%xmm4,%4;"
+                       "movupd %%xmm5,%5;"
+                       "movupd %%xmm6,%6;"
+                       "movupd %%xmm7,%7;"
+                       :
+                       : "m" (vec[0][0]), "m" (vec[1][0]),
+                         "m" (vec[2][0]), "m" (vec[3][0]),
+                         "m" (vec[4][0]), "m" (vec[5][0]),
+                         "m" (vec[6][0]), "m" (vec[7][0]));
+       }
 
        for (i = 0; i < 8; i++)
                if (e[i] != val) {
@@ -65,8 +148,33 @@ static inline unsigned fp_regs_check(unsigned val)
                        result = e[i];
                }
 
+       if (fp_features & FP_FEATURE_AVX) {
+               for (i = 0; i < 8; i++) {
+                       int error = 0;
+                       if (vec[i][0] != val) {
+                               result = vec[i][0];
+                               error = 1;
+                       }
+                       if (vec[i][2] != val) {
+                               result = vec[i][2];
+                               error = 1;
+                       }
+                       if (error)
+                               printk("ymm%d: %llu/%llu != %u/%u\n",
+                                      i, (unsigned long long)vec[i][0],
+                                      (unsigned long long)vec[i][2],
+                                      val, val);
+               }
+       } else if (fp_features & FP_FEATURE_SSE) {
+               for (i = 0; i < 8; i++)
+                       if (vec[i][0] != val) {
+                               printk("xmm%d: %llu != %u\n",
+                                      i, (unsigned long long)vec[i][0], val);
+                               result = vec[i][0];
+                       }
+       }
+
        return result;
 }
 
-
 #endif /* _XENO_ASM_X86_FPTEST_H */
diff --git a/ksrc/drivers/testing/switchtest.c 
b/ksrc/drivers/testing/switchtest.c
index 308aaa5..addf66a 100644
--- a/ksrc/drivers/testing/switchtest.c
+++ b/ksrc/drivers/testing/switchtest.c
@@ -749,6 +749,8 @@ int __init __switchtest_init(void)
 {
        int err;
 
+       fp_features_init();
+
        do {
                snprintf(device.device_name, RTDM_MAX_DEVNAME_LEN,
                         "rttest-switchtest%d",
diff --git a/src/testsuite/switchtest/switchtest.c 
b/src/testsuite/switchtest/switchtest.c
index 4f61ee6..3b3c24a 100644
--- a/src/testsuite/switchtest/switchtest.c
+++ b/src/testsuite/switchtest/switchtest.c
@@ -1147,6 +1147,8 @@ int main(int argc, const char *argv[])
                exit(EXIT_FAILURE);
        }
 
+       fp_features_init();
+
        /* Parse command line options. */
        opterr = 0;
        for (;;) {


_______________________________________________
Xenomai-git mailing list
Xenomai-git@xenomai.org
http://www.xenomai.org/mailman/listinfo/xenomai-git

Reply via email to