Thank you for the patches! Some comments inline. On Wed, Aug 22, 2018 at 10:03 AM raghuveer devulapalli <raghuveer.devulapa...@intel.com> wrote: > > --- > configure.ac | 44 ++++++++++++++++++++++++++++++++++++++++++++ > pixman/Makefile.am | 12 ++++++++++++ > pixman/pixman-avx2.c | 32 ++++++++++++++++++++++++++++++++ > pixman/pixman-private.h | 5 +++++ > pixman/pixman-x86.c | 15 +++++++++++++-- > 5 files changed, 106 insertions(+), 2 deletions(-) > create mode 100644 pixman/pixman-avx2.c > > diff --git a/configure.ac b/configure.ac > index e833e45..27f4305 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -503,6 +503,48 @@ fi > AM_CONDITIONAL(USE_SSSE3, test $have_ssse3_intrinsics = yes) > > dnl > =========================================================================== > +dnl Check for AVX2
Trailing whitespace > + > +if test "x$AVX2_CFLAGS" = "x" ; then > + AVX2_CFLAGS="-mavx2 -Winline" > +fi > + > +have_avx2_intrinsics=no > +AC_MSG_CHECKING(whether to use AVX2 intrinsics) > +xserver_save_CFLAGS=$CFLAGS > +CFLAGS="$AVX2_CFLAGS $CFLAGS" > + > +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ > +#include <immintrin.h> > +int param; > +int main () { > + __m256i a = _mm256_set1_epi32 (param), b = _mm256_set1_epi32 (param + > 1), c; > + c = _mm256_maddubs_epi16 (a, b); > + return _mm256_cvtsi256_si32(c); > +}]])], have_avx2_intrinsics=yes) > +CFLAGS=$xserver_save_CFLAGS > + > +AC_ARG_ENABLE(avx2, > + [AC_HELP_STRING([--disable-avx2], > + [disable AVX2 fast paths])], > + [enable_avx2=$enableval], [enable_avx2=auto]) > + > +if test $enable_avx2 = no ; then > + have_avx2_intrinsics=disabled > +fi > + > +if test $have_avx2_intrinsics = yes ; then > + AC_DEFINE(USE_AVX2, 1, [use AVX2 compiler intrinsics]) > +fi > + > +AC_MSG_RESULT($have_avx2_intrinsics) > +if test $enable_avx2 = yes && test $have_avx2_intrinsics = no ; then > + AC_MSG_ERROR([AVX2 intrinsics not detected]) > +fi > + > +AM_CONDITIONAL(USE_AVX2, test $have_avx2_intrinsics = yes) > + > +dnl > =========================================================================== > dnl Other special flags needed when building code using MMX or SSE > instructions > case $host_os in > solaris*) > @@ -538,6 +580,8 @@ AC_SUBST(MMX_LDFLAGS) > AC_SUBST(SSE2_CFLAGS) > AC_SUBST(SSE2_LDFLAGS) > AC_SUBST(SSSE3_CFLAGS) > +AC_SUBST(AVX2_CFLAGS) > +AC_SUBST(AVX2_LDFLAGS) > > dnl > =========================================================================== > dnl Check for VMX/Altivec > diff --git a/pixman/Makefile.am b/pixman/Makefile.am > index 581b6f6..7204621 100644 > --- a/pixman/Makefile.am > +++ b/pixman/Makefile.am > @@ -64,6 +64,18 @@ libpixman_1_la_LIBADD += libpixman-ssse3.la > ASM_CFLAGS_ssse3=$(SSSE3_CFLAGS) > endif > > +# avx2 code > +if USE_AVX2 > +noinst_LTLIBRARIES += libpixman-avx2.la > +libpixman_avx2_la_SOURCES = \ > + pixman-avx2.c > +libpixman_avx2_la_CFLAGS = $(AVX2_CFLAGS) > +libpixman_1_la_LDFLAGS += $(AVX2_LDFLAGS) > +libpixman_1_la_LIBADD += libpixman-avx2.la > + > +ASM_CFLAGS_avx2=$(AVX2_CFLAGS) > +endif > + > # arm simd code > if USE_ARM_SIMD > noinst_LTLIBRARIES += libpixman-arm-simd.la > diff --git a/pixman/pixman-avx2.c b/pixman/pixman-avx2.c > new file mode 100644 > index 0000000..d860d67 > --- /dev/null > +++ b/pixman/pixman-avx2.c > @@ -0,0 +1,32 @@ > +#ifdef HAVE_CONFIG_H > +#include <config.h> > +#endif > + > +#include <immintrin.h> /* for AVX2 intrinsics */ > +#include "pixman-private.h" > +#include "pixman-combine32.h" > +#include "pixman-inlines.h" > + > +static const pixman_fast_path_t avx2_fast_paths[] = > +{ > + { PIXMAN_OP_NONE }, > +}; > + > +static const pixman_iter_info_t avx2_iters[] = Trailing whitespace > +{ > + { PIXMAN_null }, > +}; > + > +#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) > +__attribute__((__force_align_arg_pointer__)) > +#endif > +pixman_implementation_t * > +_pixman_implementation_create_avx2 (pixman_implementation_t *fallback) > +{ > + pixman_implementation_t *imp = _pixman_implementation_create (fallback, > avx2_fast_paths); > + > + /* Set up function pointers */ > + imp->iter_info = avx2_iters; > + > + return imp; > +} > diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h > index 73a5414..b6b15df 100644 > --- a/pixman/pixman-private.h > +++ b/pixman/pixman-private.h > @@ -597,6 +597,11 @@ pixman_implementation_t * > _pixman_implementation_create_ssse3 (pixman_implementation_t *fallback); > #endif > > +#ifdef USE_AVX2 > +pixman_implementation_t * > +_pixman_implementation_create_avx2 (pixman_implementation_t *fallback); > +#endif > + > #ifdef USE_ARM_SIMD > pixman_implementation_t * > _pixman_implementation_create_arm_simd (pixman_implementation_t *fallback); > diff --git a/pixman/pixman-x86.c b/pixman/pixman-x86.c > index 05297c4..687c83b 100644 > --- a/pixman/pixman-x86.c > +++ b/pixman/pixman-x86.c At the top of this file there is a preprocessor check: #if defined(USE_X86_MMX) || defined (USE_SSE2) || defined (USE_SSSE3) I think || defined (USE_AVX2) should be added here. > @@ -40,7 +40,8 @@ typedef enum > X86_SSE = (1 << 2) | X86_MMX_EXTENSIONS, > X86_SSE2 = (1 << 3), > X86_CMOV = (1 << 4), > - X86_SSSE3 = (1 << 5) > + X86_SSSE3 = (1 << 5), > + X86_AVX2 = (1 << 6), I'm not 100% we can use trailing commas in pixman due to MSVC. Probably safer just to leave it off. > } cpu_features_t; > > #ifdef HAVE_GETISAX > @@ -119,7 +120,7 @@ pixman_cpuid (uint32_t feature, > __asm__ volatile ( > "cpuid" "\n\t" > : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d) > - : "a" (feature)); > + : "a" (feature), "c" (0)); Just to make sure I'm understanding: cpuid returns AVX2 presence in bit 5 of ebx when it is executed with eax=7 and ecx=0, so we need to ensure ecx is set to 0? I think that's fine. It seems like ecx isn't required to be any particular value for the other cases. Perhaps a comment would help future readers understand. > #else > /* On x86-32 we need to be careful about the handling of %ebx > * and %esp. We can't declare either one as clobbered > @@ -172,6 +173,10 @@ detect_cpu_features (void) > features |= X86_SSE2; > if (c & (1 << 9)) > features |= X86_SSSE3; > + Spurious whitespace > + pixman_cpuid (0x07, &a, &b, &c, &d); > + if (b & (1 << 5)) > + features |= X86_AVX2; > > /* Check for AMD specific features */ > if ((features & X86_MMX) && !(features & X86_SSE)) > @@ -228,6 +233,7 @@ _pixman_x86_get_implementations (pixman_implementation_t > *imp) > #define MMX_BITS (X86_MMX | X86_MMX_EXTENSIONS) > #define SSE2_BITS (X86_MMX | X86_MMX_EXTENSIONS | X86_SSE | X86_SSE2) > #define SSSE3_BITS (X86_SSE | X86_SSE2 | X86_SSSE3) > +#define AVX2_BITS (X86_AVX2) > > #ifdef USE_X86_MMX > if (!_pixman_disabled ("mmx") && have_feature (MMX_BITS)) > @@ -244,5 +250,10 @@ _pixman_x86_get_implementations (pixman_implementation_t > *imp) > imp = _pixman_implementation_create_ssse3 (imp); > #endif > > +#if (defined USE_AVX2 && defined USE_SSE2) > + if (!_pixman_disabled ("avx2") && have_feature (AVX2_BITS)) > + imp = _pixman_implementation_create_avx2(imp); > +#endif > + > return imp; > } > -- > 2.7.4 _______________________________________________ Pixman mailing list Pixman@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/pixman