---
pixman/loongson-mmintrin.h | 73 ++
pixman/pixman-mmx.c| 93
2 files changed, 166 insertions(+), 0 deletions(-)
diff --git a/pixman/loongson-mmintrin.h b/pixman/loongson-mmintrin.h
index 1a114fe..f0931ac 100644
--- a/pixman/loongson-mmintrin.h
+++ b/pixman/loongson-mmintrin.h
@@ -45,6 +45,28 @@ _mm_setzero_si64 (void)
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+_mm_add_pi16 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm(paddh %0, %1, %2\n\t
+ : =f (ret)
+ : f (__m1), f (__m2)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+_mm_add_pi32 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm(paddw %0, %1, %2\n\t
+ : =f (ret)
+ : f (__m1), f (__m2)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_adds_pu16 (__m64 __m1, __m64 __m2)
{
__m64 ret;
@@ -150,6 +172,35 @@ _mm_packs_pu16 (__m64 __m1, __m64 __m2)
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+_mm_packs_pi32 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm(packsswh %0, %1, %2\n\t
+ : =f (ret)
+ : f (__m1), f (__m2)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+_mm_set_pi16 (uint16_t __w3, uint16_t __w2, uint16_t __w1, uint16_t __w0)
+{
+ uint64_t val = ((uint64_t)__w3 48)
+| ((uint64_t)__w2 32)
+| ((uint64_t)__w1 16)
+| ((uint64_t)__w0 0);
+ return *(__m64 *)val;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+_mm_set_pi32 (unsigned __i1, unsigned __i0)
+{
+ uint64_t val = ((uint64_t)__i1 32)
+| ((uint64_t)__i0 0);
+ return *(__m64 *)val;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_shuffle_pi16 (__m64 __m, int64_t __n)
{
__m64 ret;
@@ -193,6 +244,17 @@ _mm_srli_pi16 (__m64 __m, int64_t __count)
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+_mm_srli_pi32 (__m64 __m, int64_t __count)
+{
+ __m64 ret;
+ asm(psrlw %0, %1, %2\n\t
+ : =f (ret)
+ : f (__m), f (*(__m64 *)__count)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_srli_si64 (__m64 __m, int64_t __count)
{
__m64 ret;
@@ -204,6 +266,17 @@ _mm_srli_si64 (__m64 __m, int64_t __count)
}
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+_mm_sub_pi16 (__m64 __m1, __m64 __m2)
+{
+ __m64 ret;
+ asm(psubh %0, %1, %2\n\t
+ : =f (ret)
+ : f (__m1), f (__m2)
+ );
+ return ret;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
{
__m64 ret;
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index d869c04..904529f 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -42,6 +42,7 @@
#endif
#include pixman-private.h
#include pixman-combine32.h
+#include pixman-inlines.h
#define no_vERBOSE
@@ -3506,6 +3507,94 @@ mmx_composite_over_reverse_n_
(pixman_implementation_t *imp,
_mm_empty ();
}
+#define BSHIFT ((1 BILINEAR_INTERPOLATION_BITS))
+
+#define BILINEAR_DECLARE_VARIABLES
\
+const __m64 mm_wt = _mm_set_pi16 (wt, wt, wt, wt);
\
+const __m64 mm_wb = _mm_set_pi16 (wb, wb, wb, wb);
\
+const __m64 mm_BSHIFT = _mm_set_pi16 (BSHIFT, BSHIFT, BSHIFT, BSHIFT);
\
+const __m64 mm_ux = _mm_set_pi16 (unit_x, unit_x, unit_x, unit_x);
\
+const __m64 mm_zero = _mm_setzero_si64 ();
\
+__m64 mm_x = _mm_set_pi16 (vx, vx, vx, vx)
+
+#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix)
\
+do {
\
+/* fetch 2x2 pixel block into 2 mmx registers */
\
+__m64 t = ldq_u ((__m64 *)src_top [pixman_fixed_to_int (vx)]);
\
+__m64 b = ldq_u ((__m64 *)src_bottom [pixman_fixed_to_int (vx)]);
\
+vx += unit_x;
\
+/* vertical interpolation */
\
+__m64 t_hi = _mm_mullo_pi16 (_mm_unpackhi_pi8 (t, mm_zero), mm_wt);
\
+__m64 t_lo = _mm_mullo_pi16 (_mm_unpacklo_pi8 (t,