Module: Mesa Branch: llvmpipe-rast-64 Commit: 410266f5477ebe683f15682efdcaaa0e9e1d9a8f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=410266f5477ebe683f15682efdcaaa0e9e1d9a8f
Author: José Fonseca <[email protected]> Date: Tue Oct 29 12:39:10 2013 +0000 llvmpipe: Take back AVX2 intrinsic support. The percentage of AVX2 systems out there is so small that it's hard to justify hand-coding AVX2 intrinsic code paths at this moment. (Of course, we can support AVX2 in LLVM IR though, but at the moment rasterization code does not use LLVM JIT.) --- scons/gallium.py | 2 +- src/gallium/auxiliary/util/u_sse.h | 25 --------- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 71 +-------------------------- src/gallium/include/pipe/p_config.h | 14 +----- 4 files changed, 4 insertions(+), 108 deletions(-) diff --git a/scons/gallium.py b/scons/gallium.py index 21cd7c0..be3c3e7 100755 --- a/scons/gallium.py +++ b/scons/gallium.py @@ -367,7 +367,7 @@ def generate(env): '-mtune=i686' # use i686 where we can ] if env['machine'] == 'x86_64': - ccflags += ['-m64', '-mavx', '-mavx2'] + ccflags += ['-m64'] if platform == 'darwin': ccflags += ['-fno-common'] if env['platform'] not in ('cygwin', 'haiku', 'windows'): diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h index 4ba8b61..d100c47 100644 --- a/src/gallium/auxiliary/util/u_sse.h +++ b/src/gallium/auxiliary/util/u_sse.h @@ -254,31 +254,6 @@ transpose4_epi32(const __m128i * restrict a, #define SCALAR_EPI32(m, i) _mm_shuffle_epi32((m), _MM_SHUFFLE(i,i,i,i)) -#if defined(PIPE_ARCH_AVX2) - -#include <immintrin.h> - -static INLINE void -transpose4_epi64(const __m256i * restrict a, - const __m256i * restrict b, - const __m256i * restrict c, - const __m256i * restrict d, - __m256i * restrict o, - __m256i * restrict p, - __m256i * restrict q, - __m256i * restrict r) -{ - __m256i t0 = _mm256_unpacklo_epi64(*a, *b); - __m256i t1 = _mm256_unpacklo_epi64(*c, *d); - __m256i t2 = _mm256_unpackhi_epi64(*a, *b); - __m256i t3 = _mm256_unpackhi_epi64(*c, *d); - - *o = _mm256_unpacklo_epi64(t0, t1); - *p = _mm256_unpackhi_epi64(t0, t1); - *q = _mm256_unpacklo_epi64(t2, t3); - *r = _mm256_unpackhi_epi64(t2, t3); -} -#endif /* PIPE_ARCH_AVX2 */ #endif /* PIPE_ARCH_SSE */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index aae48e7..1942c21 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -42,10 +42,6 @@ #define NUM_CHANNELS 4 -#if defined(PIPE_ARCH_AVX2) -#include <immintrin.h> -#endif - #if defined(PIPE_ARCH_SSE) #include <emmintrin.h> #endif @@ -366,72 +362,7 @@ do_triangle_ccw(struct lp_setup_context *setup, plane = GET_PLANES(tri); -#if defined(PIPE_ARCH_AVX2) - { - __m256i vertx, verty; - __m256i shufx, shufy; - __m256i dcdx, dcdy, c; - __m256i unused; - __m256i dcdx_neg_mask; - __m256i dcdy_neg_mask; - __m256i dcdx_zero_mask; - __m256i top_left_flag; - __m256i c_inc_mask, c_inc; - __m256i eo, p0, p1, p2; - __m256i zero = _mm256_setzero_si256(); - - vertx = _mm256_loadu_si256((__m256i *)position->x); /* vertex x coords */ - verty = _mm256_loadu_si256((__m256i *)position->y); /* vertex y coords */ - - shufx = _mm256_shuffle_epi64(vertx, _MM_SHUFFLE(3,0,2,1)); - shufy = _mm256_shuffle_epi64(verty, _MM_SHUFFLE(3,0,2,1)); - - dcdx = _mm256_sub_epi64(verty, shufy); - dcdy = _mm256_sub_epi64(vertx, shufx); - - dcdx_neg_mask = _mm256_srai_epi64(dcdx, FIXED_SHIFT); - dcdx_zero_mask = _mm256_cmpeq_epi64(dcdx, zero); - dcdy_neg_mask = _mm256_srai_epi64(dcdy, FIXED_SHIFT); - - top_left_flag = _mm256_set1_epi64x( - (setup->bottom_edge_rule == 0) ? ~0 : 0); - - c_inc_mask = _mm256_or_si256( - dcdx_neg_mask, - _mm256_and_si256(dcdx_zero_mask, - _mm256_xor_si256(dcdy_neg_mask, - top_left_flag))); - - c_inc = _mm256_srli_epi64(c_inc_mask, FIXED_SHIFT); - - c = _mm256_sub_epi64(mm256_mullo_epi64(dcdx, vertx), - mm256_mullo_epi64(dcdy, verty)); - - c = _mm256_add_epi64(c, c_inc); - - /* Scale up to match c: - */ - dcdx = _mm256_slli_epi64(dcdx, FIXED_ORDER); - dcdy = _mm256_slli_epi64(dcdy, FIXED_ORDER); - - /* Calculate trivial reject values: - */ - eo = _mm256_sub_epi64(_mm256_andnot_si256(dcdy_neg_mask, dcdy), - _mm256_and_si256(dcdx_neg_mask, dcdx)); - - /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */ - - /* Pointless transpose which gets undone immediately in - * rasterization: - */ - transpose4_epi64(&c, &dcdx, &dcdy, &eo, - &p0, &p1, &p2, &unused); - - _mm256_store_si256((__m256i *)&plane[0], p0); - _mm256_store_si256((__m256i *)&plane[1], p1); - _mm256_store_si256((__m256i *)&plane[2], p2); - } -#elif defined(PIPE_ARCH_SSE) +#if defined(PIPE_ARCH_SSE) { __m128i vertx, verty; __m128i shufx, shufy; diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h index 374d77a..9bccf32 100644 --- a/src/gallium/include/pipe/p_config.h +++ b/src/gallium/include/pipe/p_config.h @@ -98,22 +98,12 @@ #if defined(PIPE_CC_GCC) && !defined(__SSE2__) /* #warning SSE2 support requires -msse -msse2 compiler options */ #else -//#define PIPE_ARCH_SSE +#define PIPE_ARCH_SSE #endif #if defined(PIPE_CC_GCC) && !defined(__SSSE3__) /* #warning SSE3 support requires -msse3 compiler options */ #else -//#define PIPE_ARCH_SSSE3 -#endif -#if defined(PIPE_CC_GCC) && !defined(__AVX__) -/* #warning AVX support requires -mavx compiler options */ -#else -//#define PIPE_ARCH_AVX -#endif -#if defined(PIPE_CC_GCC) && !defined(__AVX2__) -/* #warning AVX2 support requires -mavx2 compiler options */ -#else -//#define PIPE_ARCH_AVX2 +#define PIPE_ARCH_SSSE3 #endif #endif _______________________________________________ mesa-commit mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-commit
