Siarhei, can you measure any performance improvement with this? I
can't... :(
---
 pixman/pixman-sse2.c |    8 +++-----
 1 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index efed310..4fbc045 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -32,6 +32,7 @@
 
 #include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */
 #include <emmintrin.h> /* for SSE2 intrinsics */
+#include <tmmintrin.h> /* for SSSE3 intrinsics */
 #include "pixman-private.h"
 #include "pixman-combine32.h"
 #include "pixman-inlines.h"
@@ -5414,7 +5415,7 @@ FAST_NEAREST_MAINLOOP_COMMON 
(sse2_8888_n_8888_normal_OVER,
 
 #define BILINEAR_INTERPOLATE_ONE_PIXEL(pix)                                    
\
 do {                                                                           
\
-    __m128i xmm_wh, xmm_lo, xmm_hi, a;                                         
\
+    __m128i xmm_wh, a;                                                         
\
     /* fetch 2x2 pixel block into sse2 registers */                            
\
     __m128i tltr = _mm_loadl_epi64 (                                           
\
                            (__m128i *)&src_top[pixman_fixed_to_int (vx)]);     
\
@@ -5443,10 +5444,7 @@ do {                                                     
                        \
                _mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS)));     
\
        xmm_x = _mm_add_epi16 (xmm_x, xmm_ux);                                  
\
        /* horizontal interpolation */                                          
\
-       xmm_lo = _mm_mullo_epi16 (a, xmm_wh);                                   
\
-       xmm_hi = _mm_mulhi_epu16 (a, xmm_wh);                                   
\
-       a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi),                 
\
-                          _mm_unpackhi_epi16 (xmm_lo, xmm_hi));                
\
+       a = _mm_maddubs_epi16 (a, xmm_wh);                                      
\
     }                                                                          
\
     /* shift and pack the result */                                            
\
     a = _mm_srli_epi32 (a, BILINEAR_INTERPOLATION_BITS * 2);                   
\
-- 
1.7.8.6

_______________________________________________
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman

Reply via email to