fma4, sse4a, xop
Signed-off-by: Jiří Župka <[email protected]>
---
client/virt/deps/test_cpu_flags/Makefile | 83 +++++++++++------------
client/virt/deps/test_cpu_flags/aes.c | 13 +++-
client/virt/deps/test_cpu_flags/avx.c | 30 +++++----
client/virt/deps/test_cpu_flags/cpuflags-test.c | 57 +++++++++++-----
client/virt/deps/test_cpu_flags/fma4.c | 31 +++++++++
client/virt/deps/test_cpu_flags/pclmul.c | 10 ++-
client/virt/deps/test_cpu_flags/rdrand.c | 6 +-
client/virt/deps/test_cpu_flags/sse3.c | 12 +++-
client/virt/deps/test_cpu_flags/sse4.c | 25 ++++++--
client/virt/deps/test_cpu_flags/sse4a.c | 37 ++++++++++
client/virt/deps/test_cpu_flags/ssse3.c | 14 +++-
client/virt/deps/test_cpu_flags/stress.c | 6 ++
client/virt/deps/test_cpu_flags/tests.h | 38 ++++++++--
client/virt/deps/test_cpu_flags/xop.c | 48 +++++++++++++
14 files changed, 309 insertions(+), 101 deletions(-)
create mode 100644 client/virt/deps/test_cpu_flags/fma4.c
create mode 100644 client/virt/deps/test_cpu_flags/sse4a.c
create mode 100644 client/virt/deps/test_cpu_flags/xop.c
diff --git a/client/virt/deps/test_cpu_flags/Makefile
b/client/virt/deps/test_cpu_flags/Makefile
index 5b77740..b95c36e 100644
--- a/client/virt/deps/test_cpu_flags/Makefile
+++ b/client/virt/deps/test_cpu_flags/Makefile
@@ -3,57 +3,42 @@ MKDIR = mkdir -p
OPTFLAGS=-O3
CFLAGS= -m64 ${OPTFLAGS} -std=c99 -pipe \
- -ftree-vectorize -ftree-vectorizer-verbose=0 \
+ -ftree-vectorize \
-ffast-math \
-fopenmp \
-CFLAGSAVX= -m64 ${OPTFLAGS} -std=c99 -pipe \
- -ftree-vectorize -ftree-vectorizer-verbose=0 \
- -ffast-math \
- -mavx \
- -fopenmp \
+CFLAGSAVX= ${CFLAGS} \
+ -mavx \
-CFLAGSSSE4= -m64 ${OPTFLAGS} -std=c99 -pipe \
- -ftree-vectorize -ftree-vectorizer-verbose=0 \
- -ffast-math \
+CFLAGSFMA4= ${CFLAGS} \
+ -mfma4 \
+
+CFLAGSSSE4A= ${CFLAGS} \
+ -msse4a \
+
+CFLAGSSSE4= ${CFLAGS} \
-msse4 -msse4.1 -msse4.2 \
- -fopenmp \
-CFLAGSSSSE3= -m64 ${OPTFLAGS} -std=c99 -pipe \
- -ftree-vectorize -ftree-vectorizer-verbose=0 \
- -ffast-math \
+CFLAGSSSSE3= ${CFLAGS} \
-mssse3 \
- -fopenmp \
-CFLAGSSSE3= -m64 ${OPTFLAGS} -std=c99 -pipe \
- -ftree-vectorize -ftree-vectorizer-verbose=0 \
- -ffast-math \
+CFLAGSSSE3= ${CFLAGS} \
-msse3 \
- -fopenmp \
-CFLAGSAES= -m64 ${OPTFLAGS} -std=c99 -pipe \
- -ftree-vectorize -ftree-vectorizer-verbose=0 \
- -ffast-math \
+CFLAGSAES= ${CFLAGS} \
-maes \
- -fopenmp \
-CFLAGSPCLMUL= -m64 ${OPTFLAGS} -std=c99 -pipe \
- -ftree-vectorize -ftree-vectorizer-verbose=0 \
- -ffast-math \
+CFLAGSPCLMUL= ${CFLAGS} \
-mpclmul \
- -fopenmp \
-CFLAGSRDRAND= -m64 ${OPTFLAGS} -std=c99 -pipe \
- -ftree-vectorize -ftree-vectorizer-verbose=0 \
- -ffast-math \
+CFLAGSRDRAND= ${CFLAGS} \
-mrdrnd \
- -fopenmp \
-CFLAGSSTRESS= -m64 ${OPTFLAGS} -std=c99 -pipe \
- -ftree-vectorize -ftree-vectorizer-verbose=0 \
- -ffast-math \
+CFLAGSXOP= ${CFLAGS} \
+ -mxop \
+
+CFLAGSSTRESS= ${CFLAGS} \
$(EXTRA_FLAGS) \
- -fopenmp \
CXX=g++
CC=gcc
@@ -66,39 +51,51 @@ default:cpuflags-test
all:cpuflags-test
-cpuflags-test: avx.o sse4.o ssse3.o sse3.o aes.o pclmul.o rdrand.o stress.o
+cpuflags-test: avx.o fma4.o xop.o sse4a.o sse4.o ssse3.o sse3.o aes.o pclmul.o
rdrand.o stress.o
$(CC) $(CFLAGS) $(LIBS) cpuflags-test.c -o cpuflags-test \
aes.o \
pclmul.o \
rdrand.o \
avx.o \
+ fma4.o \
+ xop.o \
+ sse4a.o \
sse4.o \
ssse3.o \
sse3.o \
stress.o \
-aes.o: aes.c
+aes.o: aes.c tests.h
$(CC) $(CFLAGSAES) $(LIBS) -c aes.c
-pclmul.o: pclmul.c
+pclmul.o: pclmul.c tests.h
$(CC) $(CFLAGSPCLMUL) $(LIBS) -c pclmul.c
-rdrand.o: rdrand.c
+rdrand.o: rdrand.c tests.h
$(CC) $(CFLAGSRDRAND) $(LIBS) -c rdrand.c
-avx.o: avx.c
+fma4.o: fma4.c tests.h
+ $(CC) $(CFLAGSFMA4) $(LIBS) -c fma4.c
+
+xop.o: xop.c tests.h
+ $(CC) $(CFLAGSXOP) $(LIBS) -c xop.c
+
+avx.o: avx.c tests.h
$(CC) $(CFLAGSAVX) $(LIBS) -c avx.c
-sse4.o: sse4.c
+sse4a.o: sse4a.c tests.h
+ $(CC) $(CFLAGSSSE4A) $(LIBS) -c sse4a.c
+
+sse4.o: sse4.c tests.h
$(CC) $(CFLAGSSSE4) $(LIBS) -c sse4.c
-ssse3.o: ssse3.c
+ssse3.o: ssse3.c tests.h
$(CC) $(CFLAGSSSSE3) $(LIBS) -c ssse3.c
-sse3.o: sse3.c
+sse3.o: sse3.c tests.h
$(CC) $(CFLAGSSSE3) $(LIBS) -c sse3.c
-stress.o: stress.c
+stress.o: stress.c tests.h
$(CC) $(CFLAGSSTRESS) $(LIBS) -c stress.c
ARCHIVE= cpuflags-test
diff --git a/client/virt/deps/test_cpu_flags/aes.c
b/client/virt/deps/test_cpu_flags/aes.c
index b8dc5cc..7132ec7 100644
--- a/client/virt/deps/test_cpu_flags/aes.c
+++ b/client/virt/deps/test_cpu_flags/aes.c
@@ -7,8 +7,10 @@
#include "tests.h"
+#define result (5931894172722287318L)
+
#ifdef __AES__
-void aes(){
+int aes(){
__ma128i v1;
__ma128i v2;
for (int i = 1;i >= 0; i--){
@@ -17,10 +19,15 @@ void aes(){
}
__ma128i v3;
v3.i = _mm_aesdeclast_si128(v1.i, v2.i);
- printf("[%d %d %d]\n",v1.ui64[0],v2.ui64[0],v3.ui64[0]);
+ if (v3.ui64[0] != result){
+ printf("Correct: %ld result: %ld\n", result, v3.ui64[0]);
+ return -1;
+ }
+ return 0;
}
#else
-void aes(){
+int aes(){
printf("AES is not supported.");
+ return 0;
}
#endif
diff --git a/client/virt/deps/test_cpu_flags/avx.c
b/client/virt/deps/test_cpu_flags/avx.c
index bf06929..179c51b 100644
--- a/client/virt/deps/test_cpu_flags/avx.c
+++ b/client/virt/deps/test_cpu_flags/avx.c
@@ -7,15 +7,8 @@
#include "tests.h"
#ifdef __AVX__
-
-typedef union __attribute__ ((aligned(32))){
- __m256 v;
- float f32[8];
-} __mar256;
-
-
-void avx(){
- __mar256 a,b;
+int avx(){
+ __ma256 a,b,c;
__m256 ymm0;
__m256 ymm1;
@@ -27,17 +20,26 @@ void avx(){
ymm0 = _mm256_load_ps(a.f32);
ymm1 = _mm256_load_ps(b.f32);
- __mar256 ymm3;
- ymm3.v = _mm256_sub_ps(ymm0,ymm1);
- _mm256_store_ps(b.f32, ymm3.v );
+ __ma256 ymm3;
+ ymm3.f = _mm256_sub_ps(ymm0,ymm1);
+ _mm256_store_ps(c.f32, ymm3.f);
for (int i = 0;i < 8; i++){
- printf("[%f]\n", b.f32[i]);
+ if (((a.f32[i] - b.f32[i]) - c.f32[i]) > FLT_EPSILON){
+ printf("Wrong result:\n");
+ for (int i = 0;i < 8; i++){
+ printf("Correct: %f result: %f\n", a.f32[i] -
b.f32[i],
+ c.f32[i]);
+ }
+ return -1;
+ }
}
+ return 0;
}
#endif
#ifndef __AVX__
-void avx(){
+int avx(){
printf("AVX is not supported.");
+ return 0;
}
#endif
diff --git a/client/virt/deps/test_cpu_flags/cpuflags-test.c
b/client/virt/deps/test_cpu_flags/cpuflags-test.c
index 483561c..0d7200e 100644
--- a/client/virt/deps/test_cpu_flags/cpuflags-test.c
+++ b/client/virt/deps/test_cpu_flags/cpuflags-test.c
@@ -5,12 +5,16 @@
void print_help(){
printf(
- " --sse4 test sse4 instruction.\n"
+ " --sse3 test sse3 instruction.\n"
" --ssse3 test ssse3 instruction.\n"
+ " --sse4 test sse4 instruction.\n"
+ " --sse4a test sse4a instruction.\n"
" --avx test avx instruction.\n"
" --aes test aes instruction.\n"
" --pclmul test carry less
multiplication.\n"
" --rdrand test rdrand
instruction.\n"
+ " --fma4 test fma4 instruction.\n"
+ " --xop test fma4 instruction.\n"
" --stress n_cpus,avx,aes start stress on
n_cpus.and cpuflags\n");
}
@@ -35,6 +39,9 @@ inst parse_Inst(char * optarg){
else if(strcmp(pch,"sse4") == 0){
i.sse4 = 1;
}
+ else if(strcmp(pch,"sse4a") == 0){
+ i.sse4a = 1;
+ }
else if(strcmp(pch,"avx") == 0){
i.avx = 1;
}
@@ -47,6 +54,12 @@ inst parse_Inst(char * optarg){
else if(strcmp(pch,"rdrand") == 0){
i.rdrand = 1;
}
+ else if(strcmp(pch,"fma4") == 0){
+ i.fma4 = 1;
+ }
+ else if(strcmp(pch,"xop") == 0){
+ i.xop = 1;
+ }
pch = strtok (NULL, ",");
}
return i;
@@ -57,18 +70,22 @@ int main(int argc, char **argv) {
int digit_optind = 0;
int opt_count = 0;
+ int ret = 0;
while (1) {
int this_option_optind = optind ? optind : 1;
int option_index = 0;
static struct option long_options[] =
- {{ "sse3", no_argument, 0, 0 },
+ {{ "stress",required_argument, 0, 0 },
+ { "sse3", no_argument, 0, 0 },
{ "ssse3", no_argument, 0, 0 },
{ "sse4", no_argument, 0, 0 },
+ { "sse4a", no_argument, 0, 0 },
{ "avx", no_argument, 0, 0 },
{ "aes", no_argument, 0, 0 },
{ "pclmul", no_argument, 0, 0 },
{ "rdrand", no_argument, 0, 0 },
- { "stress", required_argument, 0, 0 },
+ { "fma4", no_argument, 0, 0 },
+ { "xop", no_argument, 0, 0 },
{ 0, 0, 0, 0}};
c = getopt_long(argc, argv, "", long_options, &option_index);
@@ -80,37 +97,41 @@ int main(int argc, char **argv) {
switch (c) {
case 0:
- printf("option %s", long_options[option_index].name);
- if (optarg)
- printf(" with arg %s", optarg);
- printf("\n");
switch (option_index) {
case 0:
- sse3();
+ stress(parse_Inst(optarg));
break;
case 1:
- ssse3();
+ ret += sse3();
break;
case 2:
- sse4();
+ ret += ssse3();
break;
case 3:
- avx();
+ ret += sse4();
break;
case 4:
- aes();
+ ret += sse4a();
break;
case 5:
- pclmul();
+ ret += avx();
break;
case 6:
- rdrand();
+ ret += aes();
break;
case 7:
- stress(parse_Inst(optarg));
+ ret += pclmul();
+ break;
+ case 8:
+ ret += rdrand();
+ break;
+ case 9:
+ ret += fma4();
+ break;
+ case 10:
+ ret += xop();
break;
}
- printf("\n");
break;
case '?':
@@ -123,5 +144,9 @@ int main(int argc, char **argv) {
}
opt_count += 1;
}
+ if (ret > 0) {
+ printf("%d test fail.\n", ret);
+ exit(-1);
+ }
exit(0);
}
diff --git a/client/virt/deps/test_cpu_flags/fma4.c
b/client/virt/deps/test_cpu_flags/fma4.c
new file mode 100644
index 0000000..48739e1
--- /dev/null
+++ b/client/virt/deps/test_cpu_flags/fma4.c
@@ -0,0 +1,31 @@
+/*
+ * fma4.c
+ *
+ * Created on: Nov 29, 2011
+ * Author: jzupka
+ */
+#include "tests.h"
+
+#ifdef __FMA4__
+
+int fma4(){
+ __ma256 a, b, c, d;
+ int i;
+ for (i = 0; i < 4; i++) {
+ a.d64[i] = i;
+ b.d64[i] = 2.;
+ c.d64[i] = 3.;
+ }
+ d.d = _mm256_macc_pd(a.d, b.d, c.d);
+ for (i = 0; i < 4; i++) printf(" %.3lf", d.d64[i]);
+ printf("\n");
+ return 0;
+}
+
+#endif
+#ifndef __FMA4__
+int fma4(){
+ printf("FMA4 is not supported.");
+ return 0;
+}
+#endif
diff --git a/client/virt/deps/test_cpu_flags/pclmul.c
b/client/virt/deps/test_cpu_flags/pclmul.c
index 3387a17..1877e8b 100644
--- a/client/virt/deps/test_cpu_flags/pclmul.c
+++ b/client/virt/deps/test_cpu_flags/pclmul.c
@@ -8,7 +8,7 @@
#include "tests.h"
#ifdef __PCLMUL__
-void pclmul(){
+int pclmul(){
__ma128i v1;
__ma128i v2;
for (int i = 1;i >= 0; i--){
@@ -17,10 +17,14 @@ void pclmul(){
}
__ma128i v3;
v3.i = _mm_clmulepi64_si128(v1.i, v2.i, 0);
- printf("[%d %d %d]\n",v1.ui64[0],v2.ui64[0],v3.ui64[0]);
+ if (v3.ui64[0] != 5)
+ printf("Correct: %d result: %d\n", 5, v3.ui64[0]);
+ return -1;
+ return 0;
}
#else
-void pclmul(){
+int pclmul(){
printf("PCMUL is not supported.");
+ return 0;
}
#endif
diff --git a/client/virt/deps/test_cpu_flags/rdrand.c
b/client/virt/deps/test_cpu_flags/rdrand.c
index f9d1b76..8a6cb58 100644
--- a/client/virt/deps/test_cpu_flags/rdrand.c
+++ b/client/virt/deps/test_cpu_flags/rdrand.c
@@ -8,7 +8,7 @@
#include "tests.h"
#ifdef __RDRND__
-void rdrand()
+int rdrand()
{
int val, num=1;
while (num--) {
@@ -19,9 +19,11 @@ void rdrand()
__asm volatile("movl %%eax,%0" : "=m"(val));
printf("Random is %d\n",val);
}
+ return 0;
}
#else
-void rdrand(){
+int rdrand(){
printf("RDRAND is not supported.");
+ return 0;
}
#endif
diff --git a/client/virt/deps/test_cpu_flags/sse3.c
b/client/virt/deps/test_cpu_flags/sse3.c
index 18d2643..fd38821 100644
--- a/client/virt/deps/test_cpu_flags/sse3.c
+++ b/client/virt/deps/test_cpu_flags/sse3.c
@@ -9,7 +9,7 @@
#include "tests.h"
#ifdef __SSE3__
-void sse3(){
+int sse3(){
__ma128f v1;
__ma128f v2;
for (int i = 4;i >= 0; i--){
@@ -18,11 +18,17 @@ void sse3(){
}
__ma128f vo;
vo.f = _mm_addsub_ps(v1.f,v2.f);
- printf("[%f]\n", vo.f32[3]);
+ if (abs(vo.f32[3] - (v1.f32[3]+v2.f32[3])) < FLT_EPSILON){
+ return 0;
+ }else{
+ printf("Correct: %f result: %f\n",v1.f32[3]+v2.f32[3],
vo.f32[3]);
+ return -1;
+ }
}
#else
-void sse3(){
+int sse3(){
printf("SSE3 is not supported.");
+ return 0;
}
#endif
diff --git a/client/virt/deps/test_cpu_flags/sse4.c
b/client/virt/deps/test_cpu_flags/sse4.c
index f9b60fb..0f0a5fb 100644
--- a/client/virt/deps/test_cpu_flags/sse4.c
+++ b/client/virt/deps/test_cpu_flags/sse4.c
@@ -8,21 +8,36 @@
#include "tests.h"
#if (defined __SSE4_1__ || defined __SSE4_2__)
-void sse4(){
+int sse4(){
__ma128i v1;
__ma128i v2;
- for (int i = 16;i >= 0; i--){
+ for (int i = 15;i >= 0; i--){
v1.ui8[i] = i;
v2.ui8[i] = 16-i;
}
__ma128i v3;
v3.i = _mm_max_epi8(v1.i,v2.i);
- for (int i = 15;i >= 0; i--){
- printf("max[%d]\n",v3.ui8[i]);
+ int ret = 0;
+ for (int i = 0;i < 16; i++){
+ if (v1.ui8[i] < v2.ui8[i]){
+ if (v3.ui8[i] != v2.ui8[i])
+ ret = 1;
+ }else{
+ if (v3.ui8[i] != v1.ui8[i])
+ ret = 1;
+ }
+ }
+ if (ret){
+ printf("Wrong result:\n");
+ for (int i = 15;i >= 0; i--){
+ printf("max[%d]\n",v3.ui8[i]);
+ }
}
+ return ret;
}
#else
-void sse4(){
+int sse4(){
printf("SSE4 is not supported.");
+ return 0;
}
#endif
diff --git a/client/virt/deps/test_cpu_flags/sse4a.c
b/client/virt/deps/test_cpu_flags/sse4a.c
new file mode 100644
index 0000000..a5fbcd9
--- /dev/null
+++ b/client/virt/deps/test_cpu_flags/sse4a.c
@@ -0,0 +1,37 @@
+/*
+ * sse4a.c
+ *
+ * Created on: Nov 29, 2011
+ * Author: jzupka
+ */
+#include "tests.h"
+
+#ifdef __SSE4A__
+
+int sse4a(){
+ __ma128f v;
+ double d[2];
+ d[0] = -1.;
+ d[1] = -2.;
+ v.d64[0] = 0.;
+ v.d64[1] = 1.;
+ _mm_stream_sd(&d[0], v.d);
+ for (int i = 0;i < 2; i++){
+ if (v.d64[i] != d[i]){
+ printf("Wrong result:\n");
+ for (int i = 0;i < 2; i++){
+ printf("Correct: %f result: %f\n", d[i],
v.d64[i]);
+ }
+ return -1;
+ }
+ }
+ return 0;
+}
+
+#endif
+#ifndef __SSE4A__
+int sse4a(){
+ printf("SSE4A is not supported.");
+ return 0;
+}
+#endif
diff --git a/client/virt/deps/test_cpu_flags/ssse3.c
b/client/virt/deps/test_cpu_flags/ssse3.c
index 8372f43..6604764 100644
--- a/client/virt/deps/test_cpu_flags/ssse3.c
+++ b/client/virt/deps/test_cpu_flags/ssse3.c
@@ -8,17 +8,23 @@
#include "tests.h"
#ifdef __SSSE3__
-void ssse3(){
+int ssse3(){
__ma128i v1;
for (int i = 16;i >= 0; i--){
- v1.ui8[i] = -i;
+ v1.i8[i] = -i;
}
__ma128i vo;
vo.i = _mm_abs_epi8(v1.i);
- printf("[%d]\n", vo.ui8[4]);
+ if (abs(v1.i8[4]) == vo.i8[4]){
+ return 0;
+ }else{
+ printf("Correct: %d result: %d\n", abs(v1.i8[4]), vo.i8[4]);
+ return -1;
+ }
}
#else
-void ssse3(){
+int ssse3(){
printf("SSSE3 is not supported.");
+ return 0;
}
#endif
diff --git a/client/virt/deps/test_cpu_flags/stress.c
b/client/virt/deps/test_cpu_flags/stress.c
index cad505b..5b0a9ed 100644
--- a/client/virt/deps/test_cpu_flags/stress.c
+++ b/client/virt/deps/test_cpu_flags/stress.c
@@ -63,6 +63,12 @@ void stress(inst in) {
pclmul();
if (in.rdrand)
rdrand();
+ if (in.fma4)
+ fma4();
+ if (in.xop)
+ xop();
+ if (in.sse4a)
+ sse4a();
}
int r = rand()%size;
diff --git a/client/virt/deps/test_cpu_flags/tests.h
b/client/virt/deps/test_cpu_flags/tests.h
index a009923..b581864 100644
--- a/client/virt/deps/test_cpu_flags/tests.h
+++ b/client/virt/deps/test_cpu_flags/tests.h
@@ -10,19 +10,26 @@
#include <stdio.h>
#include <stdlib.h>
-#include <immintrin.h>
+//#include <immintrin.h>
+#include <x86intrin.h>
#include <stdint.h>
#include <omp.h>
+#include <float.h>
+#include <math.h>
+
typedef struct{
int num_threads;
char sse3;
char ssse3;
char sse4;
+ char sse4a;
char avx;
char aes;
char pclmul;
char rdrand;
+ char fma4;
+ char xop;
} inst;
typedef uint16_t auint16_t __attribute__ ((aligned(16)));
@@ -30,7 +37,10 @@ typedef uint16_t auint16_t __attribute__ ((aligned(16)));
typedef union __attribute__ ((aligned(16))){
__m128i i;
uint64_t ui64[2];
+ uint32_t ui32[4];
+ uint16_t ui16[8];
uint8_t ui8[16];
+ int8_t i8[16];
} __ma128i;
typedef union __attribute__ ((aligned(32))){
@@ -40,14 +50,26 @@ typedef union __attribute__ ((aligned(32))){
double d64[2];
} __ma128f;
-void aes();
-void pclmul();
-void rdrand();
+#ifdef __AVX__
+typedef union __attribute__ ((aligned(32))){
+ __m256 f;
+ __m256d d;
+ float f32[8];
+ double d64[4];
+} __ma256;
+#endif
+
-void avx();
-void sse4();
-void sse3();
-void ssse3();
+int aes();
+int pclmul();
+int rdrand();
+int avx();
+int sse4();
+int sse4a();
+int sse3();
+int ssse3();
+int fma4();
+int xop();
void stress(inst in);
diff --git a/client/virt/deps/test_cpu_flags/xop.c
b/client/virt/deps/test_cpu_flags/xop.c
new file mode 100644
index 0000000..ef01dde
--- /dev/null
+++ b/client/virt/deps/test_cpu_flags/xop.c
@@ -0,0 +1,48 @@
+/*
+ * xop.c
+ *
+ * Created on: Nov 29, 2011
+ * Author: jzupka
+ */
+#include "tests.h"
+
+#ifdef __XOP__
+
+int xop(){
+ __ma128i a, b, selector, d;
+ int i;
+ a.ui64[1] = 0xccccccccccccccccll;
+ a.ui64[0] = 0x8888888888888888ll;
+ b.ui64[1] = 0x3333333333333333ll;
+ b.ui64[0] = 0x7777777777777777ll;
+ selector.ui64[1] = 0xfedcba9876543210ll;
+ selector.ui64[0] = 0x0123456789abcdefll;
+ d.i = _mm_cmov_si128(a.i, b.i, selector.i);
+ printf("a: %016I64x %016I64x\n",
+ a.ui64[1], a.ui64[0]);
+ printf("b: %016I64x %016I64x\n",
+ b.ui64[1], b.ui64[0]);
+ printf("selector %016I64x %016I64x\n",
+ selector.ui64[1], selector.ui64[0]);
+ printf("result: %016I64x %016I64x\n",
+ d.ui64[1], d.ui64[0]);
+
+ for (int i = 0; i < 4; i++) {
+ a.ui8[i] = -128;
+ a.ui8[i+4] = i-128;
+ a.ui8[i+8] = 10*i;
+ a.ui8[i+12] = 127;
+ }
+ d.i = _mm_haddd_epi8(a.i);
+ for (int i = 0; i < 4; i++) printf(" %d", d.ui32[i]);
+ printf("\n");
+ return 0;
+}
+
+#endif
+#ifndef __XOP__
+int xop(){
+ printf("XOP is not supported.");
+ return 0;
+}
+#endif
--
1.7.7.4
_______________________________________________
Autotest mailing list
[email protected]
http://test.kernel.org/cgi-bin/mailman/listinfo/autotest