Re: [Pixman] [PATCH 1/5] mmx: add scaled bilinear src_8888_8888

2012-07-01 Thread Matt Turner
On Sun, Jul 1, 2012 at 12:56 PM, Søren Sandmann sandm...@cs.au.dk wrote:
 Matt Turner matts...@gmail.com writes:

 +SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8,  a8r8g8b8, 
 mmx__ ),
 +SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8,  x8r8g8b8, 
 mmx__ ),
 +SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8,  x8r8g8b8, 
 mmx__ ),
 +

 Looks like the abrg entries are missing.


 Soren

Indeed. They're missing from SSE2 as well. I'll fix that up when I push it.
___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 1/5] mmx: add scaled bilinear src_8888_8888

2012-06-27 Thread Matt Turner
---
 pixman/loongson-mmintrin.h |   73 ++
 pixman/pixman-mmx.c|   93 
 2 files changed, 166 insertions(+), 0 deletions(-)

diff --git a/pixman/loongson-mmintrin.h b/pixman/loongson-mmintrin.h
index 1a114fe..f0931ac 100644
--- a/pixman/loongson-mmintrin.h
+++ b/pixman/loongson-mmintrin.h
@@ -45,6 +45,28 @@ _mm_setzero_si64 (void)
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
+_mm_add_pi16 (__m64 __m1, __m64 __m2)
+{
+   __m64 ret;
+   asm(paddh %0, %1, %2\n\t
+  : =f (ret)
+  : f (__m1), f (__m2)
+   );
+   return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
+_mm_add_pi32 (__m64 __m1, __m64 __m2)
+{
+   __m64 ret;
+   asm(paddw %0, %1, %2\n\t
+  : =f (ret)
+  : f (__m1), f (__m2)
+   );
+   return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_adds_pu16 (__m64 __m1, __m64 __m2)
 {
__m64 ret;
@@ -150,6 +172,35 @@ _mm_packs_pu16 (__m64 __m1, __m64 __m2)
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
+_mm_packs_pi32 (__m64 __m1, __m64 __m2)
+{
+   __m64 ret;
+   asm(packsswh %0, %1, %2\n\t
+  : =f (ret)
+  : f (__m1), f (__m2)
+   );
+   return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
+_mm_set_pi16 (uint16_t __w3, uint16_t __w2, uint16_t __w1, uint16_t __w0)
+{
+   uint64_t val = ((uint64_t)__w3  48)
+| ((uint64_t)__w2  32)
+| ((uint64_t)__w1  16)
+| ((uint64_t)__w0   0);
+   return *(__m64 *)val;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
+_mm_set_pi32 (unsigned __i1, unsigned __i0)
+{
+   uint64_t val = ((uint64_t)__i1  32)
+| ((uint64_t)__i0   0);
+   return *(__m64 *)val;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_shuffle_pi16 (__m64 __m, int64_t __n)
 {
__m64 ret;
@@ -193,6 +244,17 @@ _mm_srli_pi16 (__m64 __m, int64_t __count)
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
+_mm_srli_pi32 (__m64 __m, int64_t __count)
+{
+   __m64 ret;
+   asm(psrlw %0, %1, %2\n\t
+  : =f (ret)
+  : f (__m), f (*(__m64 *)__count)
+   );
+   return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_srli_si64 (__m64 __m, int64_t __count)
 {
__m64 ret;
@@ -204,6 +266,17 @@ _mm_srli_si64 (__m64 __m, int64_t __count)
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
+_mm_sub_pi16 (__m64 __m1, __m64 __m2)
+{
+   __m64 ret;
+   asm(psubh %0, %1, %2\n\t
+  : =f (ret)
+  : f (__m1), f (__m2)
+   );
+   return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
 {
__m64 ret;
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index d869c04..904529f 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -42,6 +42,7 @@
 #endif
 #include pixman-private.h
 #include pixman-combine32.h
+#include pixman-inlines.h
 
 #define no_vERBOSE
 
@@ -3506,6 +3507,94 @@ mmx_composite_over_reverse_n_ 
(pixman_implementation_t *imp,
 _mm_empty ();
 }
 
+#define BSHIFT ((1  BILINEAR_INTERPOLATION_BITS))
+
+#define BILINEAR_DECLARE_VARIABLES 
\
+const __m64 mm_wt = _mm_set_pi16 (wt, wt, wt, wt); 
\
+const __m64 mm_wb = _mm_set_pi16 (wb, wb, wb, wb); 
\
+const __m64 mm_BSHIFT = _mm_set_pi16 (BSHIFT, BSHIFT, BSHIFT, BSHIFT); 
\
+const __m64 mm_ux = _mm_set_pi16 (unit_x, unit_x, unit_x, unit_x); 
\
+const __m64 mm_zero = _mm_setzero_si64 (); 
\
+__m64 mm_x = _mm_set_pi16 (vx, vx, vx, vx)
+
+#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix)
\
+do {   
\
+/* fetch 2x2 pixel block into 2 mmx registers */   
\
+__m64 t = ldq_u ((__m64 *)src_top [pixman_fixed_to_int (vx)]);
\
+__m64 b = ldq_u ((__m64 *)src_bottom [pixman_fixed_to_int (vx)]); 
\
+vx += unit_x;  
\
+/* vertical interpolation */   
\
+__m64 t_hi = _mm_mullo_pi16 (_mm_unpackhi_pi8 (t, mm_zero), mm_wt);
\
+__m64 t_lo = _mm_mullo_pi16 (_mm_unpacklo_pi8 (t,