fma4, sse4a, xop

Signed-off-by: Jiří Župka <[email protected]>
---
 client/virt/deps/test_cpu_flags/Makefile        |   83 +++++++++++------------
 client/virt/deps/test_cpu_flags/aes.c           |   13 +++-
 client/virt/deps/test_cpu_flags/avx.c           |   30 +++++----
 client/virt/deps/test_cpu_flags/cpuflags-test.c |   57 +++++++++++-----
 client/virt/deps/test_cpu_flags/fma4.c          |   31 +++++++++
 client/virt/deps/test_cpu_flags/pclmul.c        |   10 ++-
 client/virt/deps/test_cpu_flags/rdrand.c        |    6 +-
 client/virt/deps/test_cpu_flags/sse3.c          |   12 +++-
 client/virt/deps/test_cpu_flags/sse4.c          |   25 ++++++--
 client/virt/deps/test_cpu_flags/sse4a.c         |   37 ++++++++++
 client/virt/deps/test_cpu_flags/ssse3.c         |   14 +++-
 client/virt/deps/test_cpu_flags/stress.c        |    6 ++
 client/virt/deps/test_cpu_flags/tests.h         |   38 ++++++++--
 client/virt/deps/test_cpu_flags/xop.c           |   48 +++++++++++++
 14 files changed, 309 insertions(+), 101 deletions(-)
 create mode 100644 client/virt/deps/test_cpu_flags/fma4.c
 create mode 100644 client/virt/deps/test_cpu_flags/sse4a.c
 create mode 100644 client/virt/deps/test_cpu_flags/xop.c

diff --git a/client/virt/deps/test_cpu_flags/Makefile 
b/client/virt/deps/test_cpu_flags/Makefile
index 5b77740..b95c36e 100644
--- a/client/virt/deps/test_cpu_flags/Makefile
+++ b/client/virt/deps/test_cpu_flags/Makefile
@@ -3,57 +3,42 @@ MKDIR = mkdir -p
 OPTFLAGS=-O3
 
 CFLAGS= -m64 ${OPTFLAGS} -std=c99 -pipe \
-       -ftree-vectorize -ftree-vectorizer-verbose=0 \
+       -ftree-vectorize \
        -ffast-math \
        -fopenmp \
 
-CFLAGSAVX= -m64 ${OPTFLAGS} -std=c99 -pipe \
-       -ftree-vectorize -ftree-vectorizer-verbose=0 \
-       -ffast-math \
-       -mavx \
-       -fopenmp \
+CFLAGSAVX= ${CFLAGS} \
+               -mavx \
 
-CFLAGSSSE4= -m64 ${OPTFLAGS} -std=c99 -pipe \
-       -ftree-vectorize -ftree-vectorizer-verbose=0 \
-       -ffast-math \
+CFLAGSFMA4= ${CFLAGS} \
+       -mfma4 \
+
+CFLAGSSSE4A= ${CFLAGS} \
+       -msse4a \
+
+CFLAGSSSE4= ${CFLAGS} \
        -msse4 -msse4.1 -msse4.2 \
-       -fopenmp \
 
-CFLAGSSSSE3= -m64 ${OPTFLAGS} -std=c99 -pipe \
-       -ftree-vectorize -ftree-vectorizer-verbose=0 \
-       -ffast-math \
+CFLAGSSSSE3= ${CFLAGS} \
        -mssse3 \
-       -fopenmp \
 
-CFLAGSSSE3= -m64 ${OPTFLAGS} -std=c99 -pipe \
-       -ftree-vectorize -ftree-vectorizer-verbose=0 \
-       -ffast-math \
+CFLAGSSSE3= ${CFLAGS} \
        -msse3 \
-       -fopenmp \
 
-CFLAGSAES= -m64 ${OPTFLAGS} -std=c99 -pipe \
-       -ftree-vectorize -ftree-vectorizer-verbose=0 \
-       -ffast-math \
+CFLAGSAES= ${CFLAGS} \
        -maes \
-       -fopenmp \
 
-CFLAGSPCLMUL= -m64 ${OPTFLAGS} -std=c99 -pipe \
-       -ftree-vectorize -ftree-vectorizer-verbose=0 \
-       -ffast-math \
+CFLAGSPCLMUL= ${CFLAGS} \
        -mpclmul \
-       -fopenmp \
 
-CFLAGSRDRAND= -m64 ${OPTFLAGS} -std=c99 -pipe \
-       -ftree-vectorize -ftree-vectorizer-verbose=0 \
-       -ffast-math \
+CFLAGSRDRAND= ${CFLAGS} \
        -mrdrnd \
-       -fopenmp \
 
-CFLAGSSTRESS= -m64 ${OPTFLAGS} -std=c99 -pipe \
-       -ftree-vectorize -ftree-vectorizer-verbose=0 \
-       -ffast-math \
+CFLAGSXOP= ${CFLAGS} \
+       -mxop \
+
+CFLAGSSTRESS= ${CFLAGS} \
        $(EXTRA_FLAGS) \
-       -fopenmp \
 
 CXX=g++
 CC=gcc
@@ -66,39 +51,51 @@ default:cpuflags-test
 
 all:cpuflags-test
 
-cpuflags-test: avx.o sse4.o ssse3.o sse3.o aes.o pclmul.o rdrand.o stress.o
+cpuflags-test: avx.o fma4.o xop.o sse4a.o sse4.o ssse3.o sse3.o aes.o pclmul.o 
rdrand.o stress.o
        $(CC) $(CFLAGS) $(LIBS) cpuflags-test.c -o cpuflags-test \
                aes.o \
                pclmul.o \
                rdrand.o \
                avx.o \
+               fma4.o \
+               xop.o \
+               sse4a.o \
                sse4.o \
                ssse3.o \
                sse3.o \
                stress.o \
 
-aes.o: aes.c
+aes.o: aes.c tests.h
        $(CC) $(CFLAGSAES) $(LIBS) -c aes.c
 
-pclmul.o: pclmul.c
+pclmul.o: pclmul.c tests.h
        $(CC) $(CFLAGSPCLMUL) $(LIBS) -c pclmul.c
 
-rdrand.o: rdrand.c
+rdrand.o: rdrand.c tests.h
        $(CC) $(CFLAGSRDRAND) $(LIBS) -c rdrand.c
 
-avx.o: avx.c
+fma4.o: fma4.c tests.h
+       $(CC) $(CFLAGSFMA4) $(LIBS) -c fma4.c
+
+xop.o: xop.c tests.h
+       $(CC) $(CFLAGSXOP) $(LIBS) -c xop.c
+
+avx.o: avx.c tests.h
        $(CC) $(CFLAGSAVX) $(LIBS) -c avx.c
 
-sse4.o: sse4.c
+sse4a.o: sse4a.c tests.h
+       $(CC) $(CFLAGSSSE4A) $(LIBS) -c sse4a.c
+
+sse4.o: sse4.c tests.h
        $(CC) $(CFLAGSSSE4) $(LIBS) -c sse4.c
 
-ssse3.o: ssse3.c
+ssse3.o: ssse3.c tests.h
        $(CC) $(CFLAGSSSSE3) $(LIBS) -c ssse3.c
 
-sse3.o: sse3.c
+sse3.o: sse3.c tests.h
        $(CC) $(CFLAGSSSE3) $(LIBS) -c sse3.c
 
-stress.o: stress.c
+stress.o: stress.c tests.h
        $(CC) $(CFLAGSSTRESS) $(LIBS) -c stress.c
 
 ARCHIVE= cpuflags-test
diff --git a/client/virt/deps/test_cpu_flags/aes.c 
b/client/virt/deps/test_cpu_flags/aes.c
index b8dc5cc..7132ec7 100644
--- a/client/virt/deps/test_cpu_flags/aes.c
+++ b/client/virt/deps/test_cpu_flags/aes.c
@@ -7,8 +7,10 @@
 
 #include "tests.h"
 
+#define result (5931894172722287318L)
+
 #ifdef __AES__
-void aes(){
+int aes(){
        __ma128i v1;
        __ma128i v2;
        for (int i = 1;i >= 0; i--){
@@ -17,10 +19,15 @@ void aes(){
        }
        __ma128i v3;
        v3.i = _mm_aesdeclast_si128(v1.i, v2.i);
-       printf("[%d %d %d]\n",v1.ui64[0],v2.ui64[0],v3.ui64[0]);
+       if (v3.ui64[0] != result){
+               printf("Correct: %ld result: %ld\n", result, v3.ui64[0]);
+               return -1;
+       }
+       return 0;
 }
 #else
-void aes(){
+int aes(){
        printf("AES is not supported.");
+       return 0;
 }
 #endif
diff --git a/client/virt/deps/test_cpu_flags/avx.c 
b/client/virt/deps/test_cpu_flags/avx.c
index bf06929..179c51b 100644
--- a/client/virt/deps/test_cpu_flags/avx.c
+++ b/client/virt/deps/test_cpu_flags/avx.c
@@ -7,15 +7,8 @@
 #include "tests.h"
 
 #ifdef __AVX__
-
-typedef union __attribute__ ((aligned(32))){
-       __m256 v;
-       float f32[8];
-} __mar256;
-
-
-void avx(){
-       __mar256 a,b;
+int avx(){
+       __ma256 a,b,c;
 
        __m256 ymm0;
        __m256 ymm1;
@@ -27,17 +20,26 @@ void avx(){
 
        ymm0 = _mm256_load_ps(a.f32);
        ymm1 = _mm256_load_ps(b.f32);
-       __mar256 ymm3;
-       ymm3.v = _mm256_sub_ps(ymm0,ymm1);
-       _mm256_store_ps(b.f32, ymm3.v );
+       __ma256 ymm3;
+       ymm3.f = _mm256_sub_ps(ymm0,ymm1);
+       _mm256_store_ps(c.f32, ymm3.f);
        for (int i = 0;i < 8; i++){
-               printf("[%f]\n", b.f32[i]);
+               if (((a.f32[i] - b.f32[i]) - c.f32[i]) > FLT_EPSILON){
+                       printf("Wrong result:\n");
+                       for (int i = 0;i < 8; i++){
+                               printf("Correct: %f result: %f\n", a.f32[i] - 
b.f32[i],
+                                               c.f32[i]);
+                       }
+                       return -1;
+               }
        }
+       return 0;
 }
 
 #endif
 #ifndef __AVX__
-void avx(){
+int avx(){
        printf("AVX is not supported.");
+       return 0;
 }
 #endif
diff --git a/client/virt/deps/test_cpu_flags/cpuflags-test.c 
b/client/virt/deps/test_cpu_flags/cpuflags-test.c
index 483561c..0d7200e 100644
--- a/client/virt/deps/test_cpu_flags/cpuflags-test.c
+++ b/client/virt/deps/test_cpu_flags/cpuflags-test.c
@@ -5,12 +5,16 @@
 
 void print_help(){
        printf(
-                       "  --sse4                     test sse4 instruction.\n"
+                       "  --sse3                     test sse3 instruction.\n"
                        "  --ssse3                    test ssse3 instruction.\n"
+                       "  --sse4                     test sse4 instruction.\n"
+                       "  --sse4a                    test sse4a instruction.\n"
                        "  --avx                      test avx instruction.\n"
                        "  --aes                      test aes instruction.\n"
                        "  --pclmul                   test carry less 
multiplication.\n"
                        "  --rdrand                   test rdrand 
instruction.\n"
+                       "  --fma4                     test fma4 instruction.\n"
+                       "  --xop                     test fma4 instruction.\n"
                        "  --stress n_cpus,avx,aes    start stress on 
n_cpus.and cpuflags\n");
 }
 
@@ -35,6 +39,9 @@ inst parse_Inst(char * optarg){
                else if(strcmp(pch,"sse4") == 0){
                        i.sse4 = 1;
                }
+               else if(strcmp(pch,"sse4a") == 0){
+                       i.sse4a = 1;
+               }
                else if(strcmp(pch,"avx") == 0){
                        i.avx = 1;
                }
@@ -47,6 +54,12 @@ inst parse_Inst(char * optarg){
                else if(strcmp(pch,"rdrand") == 0){
                        i.rdrand = 1;
                }
+               else if(strcmp(pch,"fma4") == 0){
+                       i.fma4 = 1;
+               }
+               else if(strcmp(pch,"xop") == 0){
+                       i.xop = 1;
+               }
                pch = strtok (NULL, ",");
        }
        return i;
@@ -57,18 +70,22 @@ int main(int argc, char **argv) {
        int digit_optind = 0;
        int opt_count = 0;
 
+       int ret = 0;
        while (1) {
                int this_option_optind = optind ? optind : 1;
                int option_index = 0;
                static struct option long_options[] =
-                               {{ "sse3",  no_argument, 0, 0 },
+                               {{ "stress",required_argument, 0, 0 },
+                               { "sse3",   no_argument, 0, 0 },
                                { "ssse3",  no_argument, 0, 0 },
                                { "sse4",   no_argument, 0, 0 },
+                               { "sse4a",  no_argument, 0, 0 },
                                { "avx",    no_argument, 0, 0 },
                                { "aes",    no_argument, 0, 0 },
                                { "pclmul", no_argument, 0, 0 },
                                { "rdrand", no_argument, 0, 0 },
-                               { "stress", required_argument, 0, 0 },
+                               { "fma4",   no_argument, 0, 0 },
+                               { "xop",    no_argument, 0, 0 },
                                { 0, 0, 0, 0}};
 
                c = getopt_long(argc, argv, "", long_options, &option_index);
@@ -80,37 +97,41 @@ int main(int argc, char **argv) {
 
                switch (c) {
                case 0:
-                       printf("option %s", long_options[option_index].name);
-                       if (optarg)
-                               printf(" with arg %s", optarg);
-                       printf("\n");
                        switch (option_index) {
                        case 0:
-                               sse3();
+                               stress(parse_Inst(optarg));
                                break;
                        case 1:
-                               ssse3();
+                               ret += sse3();
                                break;
                        case 2:
-                               sse4();
+                               ret += ssse3();
                                break;
                        case 3:
-                               avx();
+                               ret += sse4();
                                break;
                        case 4:
-                               aes();
+                               ret += sse4a();
                                break;
                        case 5:
-                               pclmul();
+                               ret += avx();
                                break;
                        case 6:
-                               rdrand();
+                               ret += aes();
                                break;
                        case 7:
-                               stress(parse_Inst(optarg));
+                               ret += pclmul();
+                               break;
+                       case 8:
+                               ret += rdrand();
+                               break;
+                       case 9:
+                               ret += fma4();
+                               break;
+                       case 10:
+                               ret += xop();
                                break;
                        }
-                       printf("\n");
                        break;
 
                case '?':
@@ -123,5 +144,9 @@ int main(int argc, char **argv) {
                }
                opt_count += 1;
        }
+       if (ret > 0) {
+               printf("%d test fail.\n", ret);
+               exit(-1);
+       }
        exit(0);
 }
diff --git a/client/virt/deps/test_cpu_flags/fma4.c 
b/client/virt/deps/test_cpu_flags/fma4.c
new file mode 100644
index 0000000..48739e1
--- /dev/null
+++ b/client/virt/deps/test_cpu_flags/fma4.c
@@ -0,0 +1,31 @@
+/*
+ * fma4.c
+ *
+ *  Created on: Nov 29, 2011
+ *      Author: jzupka
+ */
+#include "tests.h"
+
+#ifdef __FMA4__
+
+int fma4(){
+       __ma256 a, b, c, d;
+       int i;
+       for (i = 0; i < 4; i++) {
+               a.d64[i] = i;
+               b.d64[i] = 2.;
+               c.d64[i] = 3.;
+       }
+       d.d = _mm256_macc_pd(a.d, b.d, c.d);
+       for (i = 0; i < 4; i++) printf(" %.3lf", d.d64[i]);
+       printf("\n");
+       return 0;
+}
+
+#endif
+#ifndef __FMA4__
+int fma4(){
+       printf("FMA4 is not supported.");
+       return 0;
+}
+#endif
diff --git a/client/virt/deps/test_cpu_flags/pclmul.c 
b/client/virt/deps/test_cpu_flags/pclmul.c
index 3387a17..1877e8b 100644
--- a/client/virt/deps/test_cpu_flags/pclmul.c
+++ b/client/virt/deps/test_cpu_flags/pclmul.c
@@ -8,7 +8,7 @@
 #include "tests.h"
 
 #ifdef __PCLMUL__
-void pclmul(){
+int pclmul(){
        __ma128i v1;
        __ma128i v2;
        for (int i = 1;i >= 0; i--){
@@ -17,10 +17,14 @@ void pclmul(){
        }
        __ma128i v3;
        v3.i = _mm_clmulepi64_si128(v1.i, v2.i, 0);
-       printf("[%d %d %d]\n",v1.ui64[0],v2.ui64[0],v3.ui64[0]);
+       if (v3.ui64[0] != 5)
+               printf("Correct: %d result: %d\n", 5, v3.ui64[0]);
+               return -1;
+       return 0;
 }
 #else
-void pclmul(){
+int pclmul(){
        printf("PCMUL is not supported.");
+       return 0;
 }
 #endif
diff --git a/client/virt/deps/test_cpu_flags/rdrand.c 
b/client/virt/deps/test_cpu_flags/rdrand.c
index f9d1b76..8a6cb58 100644
--- a/client/virt/deps/test_cpu_flags/rdrand.c
+++ b/client/virt/deps/test_cpu_flags/rdrand.c
@@ -8,7 +8,7 @@
 #include "tests.h"
 
 #ifdef __RDRND__
-void rdrand()
+int rdrand()
 {
        int val, num=1;
        while (num--) {
@@ -19,9 +19,11 @@ void rdrand()
                __asm volatile("movl %%eax,%0" : "=m"(val));
                printf("Random is %d\n",val);
        }
+       return 0;
 }
 #else
-void rdrand(){
+int rdrand(){
        printf("RDRAND is not supported.");
+       return 0;
 }
 #endif
diff --git a/client/virt/deps/test_cpu_flags/sse3.c 
b/client/virt/deps/test_cpu_flags/sse3.c
index 18d2643..fd38821 100644
--- a/client/virt/deps/test_cpu_flags/sse3.c
+++ b/client/virt/deps/test_cpu_flags/sse3.c
@@ -9,7 +9,7 @@
 #include "tests.h"
 
 #ifdef __SSE3__
-void sse3(){
+int sse3(){
        __ma128f v1;
        __ma128f v2;
        for (int i = 4;i >= 0; i--){
@@ -18,11 +18,17 @@ void sse3(){
        }
        __ma128f vo;
        vo.f = _mm_addsub_ps(v1.f,v2.f);
-       printf("[%f]\n", vo.f32[3]);
+       if (abs(vo.f32[3] - (v1.f32[3]+v2.f32[3])) < FLT_EPSILON){
+               return 0;
+       }else{
+               printf("Correct: %f result: %f\n",v1.f32[3]+v2.f32[3], 
vo.f32[3]);
+               return -1;
+       }
 }
 #else
-void sse3(){
+int sse3(){
        printf("SSE3 is not supported.");
+       return 0;
 }
 #endif
 
diff --git a/client/virt/deps/test_cpu_flags/sse4.c 
b/client/virt/deps/test_cpu_flags/sse4.c
index f9b60fb..0f0a5fb 100644
--- a/client/virt/deps/test_cpu_flags/sse4.c
+++ b/client/virt/deps/test_cpu_flags/sse4.c
@@ -8,21 +8,36 @@
 #include "tests.h"
 
 #if (defined __SSE4_1__ || defined __SSE4_2__)
-void sse4(){
+int sse4(){
        __ma128i v1;
        __ma128i v2;
-       for (int i = 16;i >= 0; i--){
+       for (int i = 15;i >= 0; i--){
                v1.ui8[i] = i;
                v2.ui8[i] = 16-i;
        }
        __ma128i v3;
        v3.i = _mm_max_epi8(v1.i,v2.i);
-       for (int i = 15;i >= 0; i--){
-               printf("max[%d]\n",v3.ui8[i]);
+       int ret = 0;
+       for (int i = 0;i < 16; i++){
+               if (v1.ui8[i] < v2.ui8[i]){
+                       if (v3.ui8[i] != v2.ui8[i])
+                               ret = 1;
+               }else{
+                       if (v3.ui8[i] != v1.ui8[i])
+                               ret = 1;
+               }
+       }
+       if (ret){
+               printf("Wrong result:\n");
+               for (int i = 15;i >= 0; i--){
+                       printf("max[%d]\n",v3.ui8[i]);
+               }
        }
+       return ret;
 }
 #else
-void sse4(){
+int sse4(){
        printf("SSE4 is not supported.");
+       return 0;
 }
 #endif
diff --git a/client/virt/deps/test_cpu_flags/sse4a.c 
b/client/virt/deps/test_cpu_flags/sse4a.c
new file mode 100644
index 0000000..a5fbcd9
--- /dev/null
+++ b/client/virt/deps/test_cpu_flags/sse4a.c
@@ -0,0 +1,37 @@
+/*
+ * sse4a.c
+ *
+ *  Created on: Nov 29, 2011
+ *      Author: jzupka
+ */
+#include "tests.h"
+
+#ifdef __SSE4A__
+
+int sse4a(){
+       __ma128f v;
+       double d[2];
+       d[0] = -1.;
+       d[1] = -2.;
+       v.d64[0] = 0.;
+       v.d64[1] = 1.;
+       _mm_stream_sd(&d[0], v.d);
+       for (int i = 0;i < 2; i++){
+               if (v.d64[i] != d[i]){
+                       printf("Wrong result:\n");
+                       for (int i = 0;i < 2; i++){
+                               printf("Correct: %f result: %f\n", d[i], 
v.d64[i]);
+                       }
+                       return -1;
+               }
+       }
+       return 0;
+}
+
+#endif
+#ifndef __SSE4A__
+int sse4a(){
+       printf("SSE4A is not supported.");
+       return 0;
+}
+#endif
diff --git a/client/virt/deps/test_cpu_flags/ssse3.c 
b/client/virt/deps/test_cpu_flags/ssse3.c
index 8372f43..6604764 100644
--- a/client/virt/deps/test_cpu_flags/ssse3.c
+++ b/client/virt/deps/test_cpu_flags/ssse3.c
@@ -8,17 +8,23 @@
 #include "tests.h"
 
 #ifdef __SSSE3__
-void ssse3(){
+int ssse3(){
        __ma128i v1;
        for (int i = 16;i >= 0; i--){
-               v1.ui8[i] = -i;
+               v1.i8[i] = -i;
        }
        __ma128i vo;
        vo.i = _mm_abs_epi8(v1.i);
-       printf("[%d]\n", vo.ui8[4]);
+       if (abs(v1.i8[4]) == vo.i8[4]){
+               return 0;
+       }else{
+               printf("Correct: %d result: %d\n", abs(v1.i8[4]), vo.i8[4]);
+               return -1;
+       }
 }
 #else
-void ssse3(){
+int ssse3(){
        printf("SSSE3 is not supported.");
+       return 0;
 }
 #endif
diff --git a/client/virt/deps/test_cpu_flags/stress.c 
b/client/virt/deps/test_cpu_flags/stress.c
index cad505b..5b0a9ed 100644
--- a/client/virt/deps/test_cpu_flags/stress.c
+++ b/client/virt/deps/test_cpu_flags/stress.c
@@ -63,6 +63,12 @@ void stress(inst in) {
                        pclmul();
                if (in.rdrand)
                        rdrand();
+               if (in.fma4)
+                       fma4();
+               if (in.xop)
+                       xop();
+               if (in.sse4a)
+                       sse4a();
        }
 
        int r = rand()%size;
diff --git a/client/virt/deps/test_cpu_flags/tests.h 
b/client/virt/deps/test_cpu_flags/tests.h
index a009923..b581864 100644
--- a/client/virt/deps/test_cpu_flags/tests.h
+++ b/client/virt/deps/test_cpu_flags/tests.h
@@ -10,19 +10,26 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include <immintrin.h>
+//#include <immintrin.h>
+#include <x86intrin.h>
 #include <stdint.h>
 #include <omp.h>
+#include <float.h>
+#include <math.h>
+
 
 typedef struct{
        int  num_threads;
        char sse3;
        char ssse3;
        char sse4;
+       char sse4a;
        char avx;
        char aes;
        char pclmul;
        char rdrand;
+       char fma4;
+       char xop;
 } inst;
 
 typedef uint16_t auint16_t __attribute__ ((aligned(16)));
@@ -30,7 +37,10 @@ typedef uint16_t auint16_t __attribute__ ((aligned(16)));
 typedef union __attribute__ ((aligned(16))){
        __m128i i;
        uint64_t ui64[2];
+       uint32_t ui32[4];
+       uint16_t ui16[8];
        uint8_t ui8[16];
+       int8_t i8[16];
 } __ma128i;
 
 typedef union __attribute__ ((aligned(32))){
@@ -40,14 +50,26 @@ typedef union __attribute__ ((aligned(32))){
        double d64[2];
 } __ma128f;
 
-void aes();
-void pclmul();
-void rdrand();
+#ifdef __AVX__
+typedef union __attribute__ ((aligned(32))){
+       __m256 f;
+       __m256d d;
+       float f32[8];
+       double d64[4];
+} __ma256;
+#endif
+
 
-void avx();
-void sse4();
-void sse3();
-void ssse3();
+int aes();
+int pclmul();
+int rdrand();
+int avx();
+int sse4();
+int sse4a();
+int sse3();
+int ssse3();
+int fma4();
+int xop();
 void stress(inst in);
 
 
diff --git a/client/virt/deps/test_cpu_flags/xop.c 
b/client/virt/deps/test_cpu_flags/xop.c
new file mode 100644
index 0000000..ef01dde
--- /dev/null
+++ b/client/virt/deps/test_cpu_flags/xop.c
@@ -0,0 +1,48 @@
+/*
+ * xop.c
+ *
+ *  Created on: Nov 29, 2011
+ *      Author: jzupka
+ */
+#include "tests.h"
+
+#ifdef __XOP__
+
+int xop(){
+       __ma128i a, b, selector, d;
+       int i;
+       a.ui64[1] = 0xccccccccccccccccll;
+       a.ui64[0] = 0x8888888888888888ll;
+       b.ui64[1] = 0x3333333333333333ll;
+       b.ui64[0] = 0x7777777777777777ll;
+       selector.ui64[1] = 0xfedcba9876543210ll;
+       selector.ui64[0] = 0x0123456789abcdefll;
+       d.i = _mm_cmov_si128(a.i, b.i, selector.i);
+       printf("a:        %016I64x %016I64x\n",
+                         a.ui64[1], a.ui64[0]);
+       printf("b:        %016I64x %016I64x\n",
+                         b.ui64[1], b.ui64[0]);
+       printf("selector  %016I64x %016I64x\n",
+                         selector.ui64[1], selector.ui64[0]);
+       printf("result:   %016I64x %016I64x\n",
+                         d.ui64[1], d.ui64[0]);
+
+       for (int i = 0; i < 4; i++) {
+               a.ui8[i] = -128;
+               a.ui8[i+4] = i-128;
+               a.ui8[i+8] = 10*i;
+               a.ui8[i+12] = 127;
+       }
+       d.i = _mm_haddd_epi8(a.i);
+       for (int i = 0; i < 4; i++) printf(" %d", d.ui32[i]);
+       printf("\n");
+       return 0;
+}
+
+#endif
+#ifndef __XOP__
+int xop(){
+       printf("XOP is not supported.");
+       return 0;
+}
+#endif
-- 
1.7.7.4

_______________________________________________
Autotest mailing list
[email protected]
http://test.kernel.org/cgi-bin/mailman/listinfo/autotest

Reply via email to