These 3 patches is to add SSSE3 fast path skeleton.
We hope to add a framework first and add more work later.

Great thanks to Siarhei, Soren and other contributors. :)


Regards,
Xinyun
>From be178d0b0fe837d9b1027be8e3797daa6351c2c3 Mon Sep 17 00:00:00 2001
From: Liu Xinyun <[email protected]>
Date: Wed, 8 Dec 2010 18:38:31 +0800
Subject: [PATCH 1/3] Check SSSE3 in the build system.

Add --disable-ssse3 option.

Signed-off-by: Liu Xinyun <[email protected]>
Signed-off-by: Xu Samuel <[email protected]>
Signed-off-by: Ma Ling <[email protected]>
Signed-off-by: Zhao Yakui <[email protected]>
---
 configure.ac |   46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 46 insertions(+), 0 deletions(-)

diff --git a/configure.ac b/configure.ac
index 147e1bf..6b92651 100644
--- a/configure.ac
+++ b/configure.ac
@@ -372,6 +372,47 @@ fi
 AM_CONDITIONAL(USE_SSE2, test $have_sse2_intrinsics = yes)
 
 dnl ===========================================================================
+dnl Check for SSSE3
+
+if test "x$SSSE3_CFLAGS" = "x" ; then
+   SSSE3_CFLAGS="-mmmx -mssse3 -Winline"
+fi
+
+have_ssse3_assembler=no
+AC_MSG_CHECKING(whether to use SSSE3 assembler)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS="$SSSE3_CFLAGS $CFLAGS"
+
+AC_COMPILE_IFELSE([
+#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 3))
+#      error "Need GCC >= 4.3 for SSSE3 assembler on x86"
+#endif
+int main () {
+    __asm__ __volatile__("PSHUFB %xmm1, %xmm0;");
+    return 0;
+}], have_ssse3_assembler=yes)
+CFLAGS=$xserver_save_CFLAGS
+
+AC_ARG_ENABLE(ssse3,
+   [AC_HELP_STRING([--disable-ssse3],
+                   [disable SSSE3 fast paths])],
+   [enable_ssse3=$enableval], [enable_ssse3=auto])
+
+if test $enable_ssse3 = no || test $SUNCC = yes ; then
+   have_ssse3_assembler=disabled
+fi
+
+if test $have_ssse3_assembler = yes ; then
+   AC_DEFINE(USE_SSSE3, 1, [use SSSE3 compiler assembler])
+fi
+
+AC_MSG_RESULT($have_ssse3_assembler)
+if test $enable_ssse3 = yes && test $have_assembler = no ; then
+   AC_MSG_ERROR([SSSE3 assembler not detected])
+fi
+
+AM_CONDITIONAL(USE_SSSE3, test $have_ssse3_assembler = yes)
+dnl ===========================================================================
 dnl Other special flags needed when building code using MMX or SSE instructions
 case $host_os in
    solaris*)
@@ -397,6 +438,9 @@ case $host_os in
       if test "x$SSE2_LDFLAGS" = "x" ; then
 	 SSE2_LDFLAGS="$HWCAP_LDFLAGS"
       fi
+      if test "x$SSSE3_LDFLAGS" = "x" ; then
+	 SSSE3_LDFLAGS="$HWCAP_LDFLAGS"
+      fi
       ;;
 esac
 
@@ -404,6 +448,8 @@ AC_SUBST(MMX_CFLAGS)
 AC_SUBST(MMX_LDFLAGS)
 AC_SUBST(SSE2_CFLAGS)
 AC_SUBST(SSE2_LDFLAGS)
+AC_SUBST(SSSE3_CFLAGS)
+AC_SUBST(SSSE3_LDFLAGS)
 
 dnl ===========================================================================
 dnl Check for VMX/Altivec
-- 
1.7.0.4

>From fb4c9843a5a45e10acaab994d98e1c443c7b37fe Mon Sep 17 00:00:00 2001
From: Liu Xinyun <[email protected]>
Date: Wed, 8 Dec 2010 18:46:43 +0800
Subject: [PATCH 2/3] Improve CPU feature detection.

Add SSSE3 dynamic detection.

Signed-off-by: Liu Xinyun <[email protected]>
Signed-off-by: Xu Samuel <[email protected]>
Signed-off-by: Ma Ling <[email protected]>
Signed-off-by: Zhao Yakui <[email protected]>
---
 pixman/pixman-cpu.c |  114 ++++++++++++++++++++++++++++++++++++++++++++------
 1 files changed, 100 insertions(+), 14 deletions(-)

diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c
index e4fb1e4..389c6e5 100644
--- a/pixman/pixman-cpu.c
+++ b/pixman/pixman-cpu.c
@@ -25,10 +25,13 @@
 
 #include <string.h>
 
-#if defined(USE_ARM_SIMD) && defined(_MSC_VER)
+#if defined(_MSC_VER)
+#include <intrin.h>
+#if defined(USE_ARM_SIMD)
 /* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */
 #include <windows.h>
-#endif
+#endif /* USE_ARM_SIMD */
+#endif /* _MSC_VER */
 
 #include "pixman-private.h"
 
@@ -332,14 +335,15 @@ pixman_have_arm_neon (void)
 
 #endif /* USE_ARM_SIMD || USE_ARM_NEON */
 
-#if defined(USE_MMX) || defined(USE_SSE2)
+#if defined(USE_MMX) || defined(USE_SSE2) || defined(USE_SSSE3)
 /* The CPU detection code needs to be in a file not compiled with
  * "-mmmx -msse", as gcc would generate CMOV instructions otherwise
  * that would lead to SIGILL instructions on old CPUs that don't have
  * it.
  */
-#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64)
-
+#if (!defined(__amd64__) && !defined(__x86_64__) && \
+     !defined(_M_AMD64)) || defined(USE_SSSE3)
+/*32 bit or (64 bit with USE_SSSE3 defined)*/
 #ifdef HAVE_GETISAX
 #include <sys/auxv.h>
 #endif
@@ -351,15 +355,19 @@ typedef enum
     MMX_EXTENSIONS = 0x2,
     SSE = 0x6,
     SSE2 = 0x8,
-    CMOV = 0x10
+    CMOV = 0x10,
+    SSSE3 = 0x20
 } cpu_features_t;
 
+#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64)
 
+/* 32 bits implementation */
 static unsigned int
 detect_cpu_features (void)
 {
     unsigned int features = 0;
     unsigned int result = 0;
+    unsigned int result_c = 0;
 
 #ifdef HAVE_GETISAX
     if (getisax (&result, 1))
@@ -374,6 +382,8 @@ detect_cpu_features (void)
 	    features |= SSE;
 	if (result & AV_386_SSE2)
 	    features |= SSE2;
+	if (result & AV_386_SSSE3)
+	    features |= SSSE3;
     }
 #else
     char vendor[13];
@@ -419,10 +429,12 @@ detect_cpu_features (void)
         "pop %%ebx\n"
         "1:\n"
         "mov %%edx, %0\n"
+	"mov %%ecx, %4\n"
 	: "=r" (result),
-        "=m" (vendor[0]),
-        "=m" (vendor[4]),
-        "=m" (vendor[8])
+	"=m" (vendor[0]),
+	"=m" (vendor[4]),
+	"=m" (vendor[8]),
+	"=r" (result_c)
 	:
 	: "%eax", "%ecx", "%edx"
         );
@@ -456,6 +468,7 @@ detect_cpu_features (void)
 	pop ebx
     nocpuid:
 	mov result, edx
+	mov result_c, ecx
     }
     memmove (vendor + 0, &vendor0, 4);
     memmove (vendor + 4, &vendor1, 4);
@@ -466,7 +479,7 @@ detect_cpu_features (void)
 #endif
 
     features = 0;
-    if (result)
+    if (result || result_c)
     {
 	/* result now contains the standard feature bits */
 	if (result & (1 << 15))
@@ -477,6 +490,8 @@ detect_cpu_features (void)
 	    features |= SSE;
 	if (result & (1 << 26))
 	    features |= SSE2;
+	if (result_c & (1 << 9))
+	    features |= SSSE3;
 	if ((features & MMX) && !(features & SSE) &&
 	    (strcmp (vendor, "AuthenticAMD") == 0 ||
 	     strcmp (vendor, "Geode by NSC") == 0))
@@ -498,7 +513,7 @@ detect_cpu_features (void)
 		: "=r" (result)
 		:
 		: "%eax", "%ecx", "%edx"
-	        );
+		);
 #elif defined _MSC_VER
 	    _asm {
 		push ebx
@@ -523,6 +538,48 @@ detect_cpu_features (void)
     return features;
 }
 
+#else         /* end dt_cpu32() */
+/* start dt_cpu64() */
+static unsigned int detect_cpu_features(void)
+{
+    unsigned int features = 0;
+    unsigned int result_c = 0;
+
+#ifdef HAVE_GETISAX
+    if (getisax (&result, 1)) {
+	if (result & AV_386_SSSE3)
+	    features |= (SSSE3|MMX|MMX_EXTENSIONS|SSE|SSE2);
+    }
+#elif defined(_MSC_VER)
+    int CPUInfo[4] = {-1};
+
+    __cpuid(CPUInfo, 1);
+    if ((CPUInfo[2] & 0x200)>>9)
+	features |= (SSSE3|MMX|MMX_EXTENSIONS|SSE|SSE2);
+#elif defined(__GNUC__)
+    __asm__ (
+	"mov $1, %%eax\n"
+	"cpuid\n"
+	"mov %%ecx, %0\n"
+	: "=r" (result_c)
+	:
+	: "%rax", "%rbx", "%rcx", "%rdx"
+    );
+    if (result_c & (1 << 9))
+	features |= (SSSE3|MMX|MMX_EXTENSIONS|SSE|SSE2);
+#else
+#   error unsupported compiler
+#endif
+
+    return features;
+}
+
+#endif /* end dt_cpu64() */
+
+#ifdef USE_MMX
+#if (!defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64))
+/*32 bit MMX*/
+
 static pixman_bool_t
 pixman_have_mmx (void)
 {
@@ -539,7 +596,15 @@ pixman_have_mmx (void)
     return mmx_present;
 }
 
+#else
+/*64 bit MMX*/
+#define pixman_have_mmx() TRUE
+#endif
+#endif
+
 #ifdef USE_SSE2
+#if (!defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64))
+/*32 bit SSE2*/
 static pixman_bool_t
 pixman_have_sse2 (void)
 {
@@ -555,18 +620,39 @@ pixman_have_sse2 (void)
 
     return sse2_present;
 }
+#else
+/*64 bit SSE2*/
+#define pixman_have_sse2() TRUE
+#endif
+#endif
+
+#ifdef USE_SSSE3
+static pixman_bool_t
+pixman_have_ssse3 (void)
+{
+    static pixman_bool_t initialized = FALSE;
+    static pixman_bool_t ssse3_present;
+
+    if (!initialized) {
+	unsigned int features = detect_cpu_features();
+	ssse3_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2 |
+			SSSE3)) == (MMX | MMX_EXTENSIONS | SSE | SSE2 | SSSE3);
+	initialized = TRUE;
+    }
+    return ssse3_present;
+}
 
 #endif
 
-#else /* __amd64__ */
+#else  /* (amd_64 && (MMX||SSE2))*/
 #ifdef USE_MMX
 #define pixman_have_mmx() TRUE
 #endif
 #ifdef USE_SSE2
 #define pixman_have_sse2() TRUE
 #endif
-#endif /* __amd64__ */
-#endif
+#endif /* end (amd_64 && (MMX || SSE2)) */
+#endif /* end (MMX || SSE2 || SSSE3 */
 
 pixman_implementation_t *
 _pixman_choose_implementation (void)
-- 
1.7.0.4

>From f379aaf3124948729d41cad279143b5a10148811 Mon Sep 17 00:00:00 2001
From: Liu Xinyun <[email protected]>
Date: Wed, 8 Dec 2010 18:51:27 +0800
Subject: [PATCH 3/3] Add SSSE3 fast path skeleton

Signed-off-by: Liu Xinyun <[email protected]>
Signed-off-by: Xu Samuel <[email protected]>
Signed-off-by: Ma Ling <[email protected]>
Signed-off-by: Zhao Yakui <[email protected]>
---
 pixman/Makefile.am      |   12 +++++++++
 pixman/pixman-cpu.c     |    5 ++++
 pixman/pixman-private.h |    5 ++++
 pixman/pixman-ssse3.c   |   58 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 80 insertions(+), 0 deletions(-)
 create mode 100644 pixman/pixman-ssse3.c

diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index ca31301..ba6810c 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -94,6 +94,18 @@ libpixman_1_la_LIBADD += libpixman-sse2.la
 ASM_CFLAGS_sse2=$(SSE2_CFLAGS)
 endif
 
+# ssse3 code
+if USE_SSSE3
+noinst_LTLIBRARIES += libpixman-ssse3.la
+libpixman_ssse3_la_SOURCES = \
+	pixman-ssse3.c
+libpixman_ssse3_la_CFLAGS = $(DEP_CFLAGS) $(SSSE3_CFLAGS)
+libpixman_ssse3_la_LIBADD = $(DEP_LIBS)
+libpixman_1_la_LDFLAGS += $(SSSE3_LDFLAGS)
+libpixman_1_la_LIBADD += libpixman-ssse3.la
+
+ASM_CFLAGS_ssse3=$(SSSE3_CFLAGS)
+endif
 # arm simd code
 if USE_ARM_SIMD
 noinst_LTLIBRARIES += libpixman-arm-simd.la
diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c
index 389c6e5..9be0d06 100644
--- a/pixman/pixman-cpu.c
+++ b/pixman/pixman-cpu.c
@@ -657,6 +657,11 @@ pixman_have_ssse3 (void)
 pixman_implementation_t *
 _pixman_choose_implementation (void)
 {
+#if defined(USE_SSSE3) && defined(__GNUC__)
+/* not for MSVC and SUN Studio (Macro expanding issue) */
+    if (pixman_have_ssse3())
+	return _pixman_implementation_create_ssse3();
+#endif
 #ifdef USE_SSE2
     if (pixman_have_sse2 ())
 	return _pixman_implementation_create_sse2 ();
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 383748a..026c57f 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -503,6 +503,11 @@ pixman_implementation_t *
 _pixman_implementation_create_sse2 (void);
 #endif
 
+#ifdef USE_SSSE3
+pixman_implementation_t *
+_pixman_implementation_create_ssse3(void);
+#endif
+
 #ifdef USE_ARM_SIMD
 pixman_implementation_t *
 _pixman_implementation_create_arm_simd (void);
diff --git a/pixman/pixman-ssse3.c b/pixman/pixman-ssse3.c
new file mode 100644
index 0000000..8025ced
--- /dev/null
+++ b/pixman/pixman-ssse3.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author:  Xu Samuel ([email protected])
+ *
+ * Based on work by Rodrigo Kumpera and André Tupinambá
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "pixman-private.h"
+
+#ifdef USE_SSSE3
+static const pixman_fast_path_t ssse3_fast_paths[] = {
+    { PIXMAN_OP_NONE },
+};
+
+#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
+__attribute__((__force_align_arg_pointer__))
+#endif
+
+pixman_implementation_t *
+_pixman_implementation_create_ssse3 (void)
+{
+#ifdef USE_SSE2
+    pixman_implementation_t *fallback = _pixman_implementation_create_sse2();
+#elif defined(USE_MMX)
+    pixman_implementation_t *fallback = _pixman_implementation_create_mmx();
+#else
+    pixman_implementation_t *fallback = _pixman_implementation_create_fast_path();
+#endif
+    pixman_implementation_t *imp = _pixman_implementation_create(fallback, ssse3_fast_paths);
+
+    return imp;
+}
+
+#endif /* USE_SSSE3 */
-- 
1.7.0.4

_______________________________________________
Pixman mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/pixman

Reply via email to