These 3 patches is to add SSSE3 fast path skeleton. We hope to add a framework first and add more work later.
Great thanks to Siarhei, Soren and other contributors. :) Regards, Xinyun
>From be178d0b0fe837d9b1027be8e3797daa6351c2c3 Mon Sep 17 00:00:00 2001 From: Liu Xinyun <[email protected]> Date: Wed, 8 Dec 2010 18:38:31 +0800 Subject: [PATCH 1/3] Check SSSE3 in the build system. Add --disable-ssse3 option. Signed-off-by: Liu Xinyun <[email protected]> Signed-off-by: Xu Samuel <[email protected]> Signed-off-by: Ma Ling <[email protected]> Signed-off-by: Zhao Yakui <[email protected]> --- configure.ac | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 46 insertions(+), 0 deletions(-) diff --git a/configure.ac b/configure.ac index 147e1bf..6b92651 100644 --- a/configure.ac +++ b/configure.ac @@ -372,6 +372,47 @@ fi AM_CONDITIONAL(USE_SSE2, test $have_sse2_intrinsics = yes) dnl =========================================================================== +dnl Check for SSSE3 + +if test "x$SSSE3_CFLAGS" = "x" ; then + SSSE3_CFLAGS="-mmmx -mssse3 -Winline" +fi + +have_ssse3_assembler=no +AC_MSG_CHECKING(whether to use SSSE3 assembler) +xserver_save_CFLAGS=$CFLAGS +CFLAGS="$SSSE3_CFLAGS $CFLAGS" + +AC_COMPILE_IFELSE([ +#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 3)) +# error "Need GCC >= 4.3 for SSSE3 assembler on x86" +#endif +int main () { + __asm__ __volatile__("PSHUFB %xmm1, %xmm0;"); + return 0; +}], have_ssse3_assembler=yes) +CFLAGS=$xserver_save_CFLAGS + +AC_ARG_ENABLE(ssse3, + [AC_HELP_STRING([--disable-ssse3], + [disable SSSE3 fast paths])], + [enable_ssse3=$enableval], [enable_ssse3=auto]) + +if test $enable_ssse3 = no || test $SUNCC = yes ; then + have_ssse3_assembler=disabled +fi + +if test $have_ssse3_assembler = yes ; then + AC_DEFINE(USE_SSSE3, 1, [use SSSE3 compiler assembler]) +fi + +AC_MSG_RESULT($have_ssse3_assembler) +if test $enable_ssse3 = yes && test $have_assembler = no ; then + AC_MSG_ERROR([SSSE3 assembler not detected]) +fi + +AM_CONDITIONAL(USE_SSSE3, test $have_ssse3_assembler = yes) +dnl =========================================================================== dnl Other special flags needed when building code using MMX or SSE instructions case $host_os in solaris*) @@ -397,6 +438,9 @@ case $host_os in if test "x$SSE2_LDFLAGS" = "x" ; then SSE2_LDFLAGS="$HWCAP_LDFLAGS" fi + if test "x$SSSE3_LDFLAGS" = "x" ; then + SSSE3_LDFLAGS="$HWCAP_LDFLAGS" + fi ;; esac @@ -404,6 +448,8 @@ AC_SUBST(MMX_CFLAGS) AC_SUBST(MMX_LDFLAGS) AC_SUBST(SSE2_CFLAGS) AC_SUBST(SSE2_LDFLAGS) +AC_SUBST(SSSE3_CFLAGS) +AC_SUBST(SSSE3_LDFLAGS) dnl =========================================================================== dnl Check for VMX/Altivec -- 1.7.0.4
>From fb4c9843a5a45e10acaab994d98e1c443c7b37fe Mon Sep 17 00:00:00 2001 From: Liu Xinyun <[email protected]> Date: Wed, 8 Dec 2010 18:46:43 +0800 Subject: [PATCH 2/3] Improve CPU feature detection. Add SSSE3 dynamic detection. Signed-off-by: Liu Xinyun <[email protected]> Signed-off-by: Xu Samuel <[email protected]> Signed-off-by: Ma Ling <[email protected]> Signed-off-by: Zhao Yakui <[email protected]> --- pixman/pixman-cpu.c | 114 ++++++++++++++++++++++++++++++++++++++++++++------ 1 files changed, 100 insertions(+), 14 deletions(-) diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c index e4fb1e4..389c6e5 100644 --- a/pixman/pixman-cpu.c +++ b/pixman/pixman-cpu.c @@ -25,10 +25,13 @@ #include <string.h> -#if defined(USE_ARM_SIMD) && defined(_MSC_VER) +#if defined(_MSC_VER) +#include <intrin.h> +#if defined(USE_ARM_SIMD) /* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */ #include <windows.h> -#endif +#endif /* USE_ARM_SIMD */ +#endif /* _MSC_VER */ #include "pixman-private.h" @@ -332,14 +335,15 @@ pixman_have_arm_neon (void) #endif /* USE_ARM_SIMD || USE_ARM_NEON */ -#if defined(USE_MMX) || defined(USE_SSE2) +#if defined(USE_MMX) || defined(USE_SSE2) || defined(USE_SSSE3) /* The CPU detection code needs to be in a file not compiled with * "-mmmx -msse", as gcc would generate CMOV instructions otherwise * that would lead to SIGILL instructions on old CPUs that don't have * it. */ -#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64) - +#if (!defined(__amd64__) && !defined(__x86_64__) && \ + !defined(_M_AMD64)) || defined(USE_SSSE3) +/*32 bit or (64 bit with USE_SSSE3 defined)*/ #ifdef HAVE_GETISAX #include <sys/auxv.h> #endif @@ -351,15 +355,19 @@ typedef enum MMX_EXTENSIONS = 0x2, SSE = 0x6, SSE2 = 0x8, - CMOV = 0x10 + CMOV = 0x10, + SSSE3 = 0x20 } cpu_features_t; +#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64) +/* 32 bits implementation */ static unsigned int detect_cpu_features (void) { unsigned int features = 0; unsigned int result = 0; + unsigned int result_c = 0; #ifdef HAVE_GETISAX if (getisax (&result, 1)) @@ -374,6 +382,8 @@ detect_cpu_features (void) features |= SSE; if (result & AV_386_SSE2) features |= SSE2; + if (result & AV_386_SSSE3) + features |= SSSE3; } #else char vendor[13]; @@ -419,10 +429,12 @@ detect_cpu_features (void) "pop %%ebx\n" "1:\n" "mov %%edx, %0\n" + "mov %%ecx, %4\n" : "=r" (result), - "=m" (vendor[0]), - "=m" (vendor[4]), - "=m" (vendor[8]) + "=m" (vendor[0]), + "=m" (vendor[4]), + "=m" (vendor[8]), + "=r" (result_c) : : "%eax", "%ecx", "%edx" ); @@ -456,6 +468,7 @@ detect_cpu_features (void) pop ebx nocpuid: mov result, edx + mov result_c, ecx } memmove (vendor + 0, &vendor0, 4); memmove (vendor + 4, &vendor1, 4); @@ -466,7 +479,7 @@ detect_cpu_features (void) #endif features = 0; - if (result) + if (result || result_c) { /* result now contains the standard feature bits */ if (result & (1 << 15)) @@ -477,6 +490,8 @@ detect_cpu_features (void) features |= SSE; if (result & (1 << 26)) features |= SSE2; + if (result_c & (1 << 9)) + features |= SSSE3; if ((features & MMX) && !(features & SSE) && (strcmp (vendor, "AuthenticAMD") == 0 || strcmp (vendor, "Geode by NSC") == 0)) @@ -498,7 +513,7 @@ detect_cpu_features (void) : "=r" (result) : : "%eax", "%ecx", "%edx" - ); + ); #elif defined _MSC_VER _asm { push ebx @@ -523,6 +538,48 @@ detect_cpu_features (void) return features; } +#else /* end dt_cpu32() */ +/* start dt_cpu64() */ +static unsigned int detect_cpu_features(void) +{ + unsigned int features = 0; + unsigned int result_c = 0; + +#ifdef HAVE_GETISAX + if (getisax (&result, 1)) { + if (result & AV_386_SSSE3) + features |= (SSSE3|MMX|MMX_EXTENSIONS|SSE|SSE2); + } +#elif defined(_MSC_VER) + int CPUInfo[4] = {-1}; + + __cpuid(CPUInfo, 1); + if ((CPUInfo[2] & 0x200)>>9) + features |= (SSSE3|MMX|MMX_EXTENSIONS|SSE|SSE2); +#elif defined(__GNUC__) + __asm__ ( + "mov $1, %%eax\n" + "cpuid\n" + "mov %%ecx, %0\n" + : "=r" (result_c) + : + : "%rax", "%rbx", "%rcx", "%rdx" + ); + if (result_c & (1 << 9)) + features |= (SSSE3|MMX|MMX_EXTENSIONS|SSE|SSE2); +#else +# error unsupported compiler +#endif + + return features; +} + +#endif /* end dt_cpu64() */ + +#ifdef USE_MMX +#if (!defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64)) +/*32 bit MMX*/ + static pixman_bool_t pixman_have_mmx (void) { @@ -539,7 +596,15 @@ pixman_have_mmx (void) return mmx_present; } +#else +/*64 bit MMX*/ +#define pixman_have_mmx() TRUE +#endif +#endif + #ifdef USE_SSE2 +#if (!defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64)) +/*32 bit SSE2*/ static pixman_bool_t pixman_have_sse2 (void) { @@ -555,18 +620,39 @@ pixman_have_sse2 (void) return sse2_present; } +#else +/*64 bit SSE2*/ +#define pixman_have_sse2() TRUE +#endif +#endif + +#ifdef USE_SSSE3 +static pixman_bool_t +pixman_have_ssse3 (void) +{ + static pixman_bool_t initialized = FALSE; + static pixman_bool_t ssse3_present; + + if (!initialized) { + unsigned int features = detect_cpu_features(); + ssse3_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2 | + SSSE3)) == (MMX | MMX_EXTENSIONS | SSE | SSE2 | SSSE3); + initialized = TRUE; + } + return ssse3_present; +} #endif -#else /* __amd64__ */ +#else /* (amd_64 && (MMX||SSE2))*/ #ifdef USE_MMX #define pixman_have_mmx() TRUE #endif #ifdef USE_SSE2 #define pixman_have_sse2() TRUE #endif -#endif /* __amd64__ */ -#endif +#endif /* end (amd_64 && (MMX || SSE2)) */ +#endif /* end (MMX || SSE2 || SSSE3 */ pixman_implementation_t * _pixman_choose_implementation (void) -- 1.7.0.4
>From f379aaf3124948729d41cad279143b5a10148811 Mon Sep 17 00:00:00 2001 From: Liu Xinyun <[email protected]> Date: Wed, 8 Dec 2010 18:51:27 +0800 Subject: [PATCH 3/3] Add SSSE3 fast path skeleton Signed-off-by: Liu Xinyun <[email protected]> Signed-off-by: Xu Samuel <[email protected]> Signed-off-by: Ma Ling <[email protected]> Signed-off-by: Zhao Yakui <[email protected]> --- pixman/Makefile.am | 12 +++++++++ pixman/pixman-cpu.c | 5 ++++ pixman/pixman-private.h | 5 ++++ pixman/pixman-ssse3.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 80 insertions(+), 0 deletions(-) create mode 100644 pixman/pixman-ssse3.c diff --git a/pixman/Makefile.am b/pixman/Makefile.am index ca31301..ba6810c 100644 --- a/pixman/Makefile.am +++ b/pixman/Makefile.am @@ -94,6 +94,18 @@ libpixman_1_la_LIBADD += libpixman-sse2.la ASM_CFLAGS_sse2=$(SSE2_CFLAGS) endif +# ssse3 code +if USE_SSSE3 +noinst_LTLIBRARIES += libpixman-ssse3.la +libpixman_ssse3_la_SOURCES = \ + pixman-ssse3.c +libpixman_ssse3_la_CFLAGS = $(DEP_CFLAGS) $(SSSE3_CFLAGS) +libpixman_ssse3_la_LIBADD = $(DEP_LIBS) +libpixman_1_la_LDFLAGS += $(SSSE3_LDFLAGS) +libpixman_1_la_LIBADD += libpixman-ssse3.la + +ASM_CFLAGS_ssse3=$(SSSE3_CFLAGS) +endif # arm simd code if USE_ARM_SIMD noinst_LTLIBRARIES += libpixman-arm-simd.la diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c index 389c6e5..9be0d06 100644 --- a/pixman/pixman-cpu.c +++ b/pixman/pixman-cpu.c @@ -657,6 +657,11 @@ pixman_have_ssse3 (void) pixman_implementation_t * _pixman_choose_implementation (void) { +#if defined(USE_SSSE3) && defined(__GNUC__) +/* not for MSVC and SUN Studio (Macro expanding issue) */ + if (pixman_have_ssse3()) + return _pixman_implementation_create_ssse3(); +#endif #ifdef USE_SSE2 if (pixman_have_sse2 ()) return _pixman_implementation_create_sse2 (); diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h index 383748a..026c57f 100644 --- a/pixman/pixman-private.h +++ b/pixman/pixman-private.h @@ -503,6 +503,11 @@ pixman_implementation_t * _pixman_implementation_create_sse2 (void); #endif +#ifdef USE_SSSE3 +pixman_implementation_t * +_pixman_implementation_create_ssse3(void); +#endif + #ifdef USE_ARM_SIMD pixman_implementation_t * _pixman_implementation_create_arm_simd (void); diff --git a/pixman/pixman-ssse3.c b/pixman/pixman-ssse3.c new file mode 100644 index 0000000..8025ced --- /dev/null +++ b/pixman/pixman-ssse3.c @@ -0,0 +1,58 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Author: Xu Samuel ([email protected]) + * + * Based on work by Rodrigo Kumpera and André Tupinambá + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "pixman-private.h" + +#ifdef USE_SSSE3 +static const pixman_fast_path_t ssse3_fast_paths[] = { + { PIXMAN_OP_NONE }, +}; + +#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) +__attribute__((__force_align_arg_pointer__)) +#endif + +pixman_implementation_t * +_pixman_implementation_create_ssse3 (void) +{ +#ifdef USE_SSE2 + pixman_implementation_t *fallback = _pixman_implementation_create_sse2(); +#elif defined(USE_MMX) + pixman_implementation_t *fallback = _pixman_implementation_create_mmx(); +#else + pixman_implementation_t *fallback = _pixman_implementation_create_fast_path(); +#endif + pixman_implementation_t *imp = _pixman_implementation_create(fallback, ssse3_fast_paths); + + return imp; +} + +#endif /* USE_SSSE3 */ -- 1.7.0.4
_______________________________________________ Pixman mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/pixman
