Please note -- the following Pixman patch has been updated to reflect comments 
received after an earlier submission. It was tested successfully on real 
hardware with Pixman 0.21.2.


>From 9ecf61f2072d8f51f845b086686380ff3c34f861 Mon Sep 17 00:00:00 2001
From: Georgi Beloev <[email protected]>
Date: Wed, 1 Dec 2010 14:34:06 -0800
Subject: [PATCH] Added MIPS32R2 and MIPS DSP ASE optimized functions.

The following functions were implemented for MIPS32R2:
  - pixman_fill32()
  - fast_composite_over_n_8_8888()

The following functions were implemented for MIPS DSP ASE:
  - combine_over_u()
  - fast_composite_over_n_8_8888()

Additionally, MIPS DSP ASE uses the MIPS32R2 pixman_fill32() function.

Use configure commands similar to the ones below to select the target
processor and, correspondingly, the target instruction set:

  - MIPS32R2: configure CFLAGS='-march=24kc -O2'
  - MIPS DSP ASE: configure CFLAGS='-march=24kec -O2'
---
 configure.ac                     |   63 +++++++++++
 pixman/Makefile.am               |   22 ++++
 pixman/pixman-cpu.c              |   37 ++++++-
 pixman/pixman-mips-dspase1-asm.S |  221 ++++++++++++++++++++++++++++++++++++++
 pixman/pixman-mips-dspase1.c     |  128 ++++++++++++++++++++++
 pixman/pixman-mips32r2-asm.S     |  209 +++++++++++++++++++++++++++++++++++
 pixman/pixman-mips32r2.c         |  138 ++++++++++++++++++++++++
 pixman/pixman-private.h          |   11 ++
 8 files changed, 824 insertions(+), 5 deletions(-)
 create mode 100644 pixman/pixman-mips-dspase1-asm.S
 create mode 100644 pixman/pixman-mips-dspase1.c
 create mode 100644 pixman/pixman-mips32r2-asm.S
 create mode 100644 pixman/pixman-mips32r2.c

diff --git a/configure.ac b/configure.ac
index 77cd3f8..058d48b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -565,6 +565,69 @@ fi
 
 AM_CONDITIONAL(USE_GCC_INLINE_ASM, test $have_gcc_inline_asm = yes)
 
+dnl ==========================================================================
+dnl Check if the compiler supports MIPS32R2 instructions
+
+AC_MSG_CHECKING(whether to use MIPS32R2 instructions)
+AC_COMPILE_IFELSE([[
+void test()
+{
+        asm("ext \$v0,\$a0,8,8");
+}
+]], have_mips32r2=yes, have_mips32r2=no)
+
+AC_ARG_ENABLE(mips32r2,
+   [AC_HELP_STRING([--disable-mips32r2],
+                   [disable MIPS32R2 fast paths])],
+   [enable_mips32r2=$enableval], [enable_mips32r2=auto])
+
+if test $enable_mips32r2 = no ; then
+   have_mips32r2=disabled
+fi
+
+if test $have_mips32r2 = yes ; then
+   AC_DEFINE(USE_MIPS32R2, 1, [use MIPS32R2 optimizations])
+fi
+
+AM_CONDITIONAL(USE_MIPS32R2, test $have_mips32r2 = yes)
+
+AC_MSG_RESULT($have_mips32r2)
+if test $enable_mips32r2 = yes && test $have_mips32r2 = no ; then
+   AC_MSG_ERROR([MIPS32R2 not detected])
+fi
+
+
+dnl ==========================================================================
+dnl Check if the compiler supports MIPS DSP ASE Rev 1 instructions
+
+AC_MSG_CHECKING(whether to use MIPS DSP ASE Rev 1 instructions)
+AC_COMPILE_IFELSE([[
+void test()
+{
+        asm("addu.qb \$v0,\$a0,\$a1");
+}
+]], have_mips_dspase1=yes, have_mips_dspase1=no)
+
+AC_ARG_ENABLE(mips-dspase1,
+   [AC_HELP_STRING([--disable-mips-dspase1],
+                   [disable MIPS DSP ASE Rev 1 fast paths])],
+   [enable_mips_dspase1=$enableval], [enable_mips_dspase1=auto])
+
+if test $enable_mips_dspase1 = no ; then
+   have_mips_dspase1=disabled
+fi
+
+if test $have_mips_dspase1 = yes ; then
+   AC_DEFINE(USE_MIPS_DSPASE1, 1, [use MIPS DSP ASE Rev 1 optimizations])
+fi
+
+AM_CONDITIONAL(USE_MIPS_DSPASE1, test $have_mips_dspase1 = yes)
+
+AC_MSG_RESULT($have_mips_dspase1)
+if test $enable_mips_dspase1 = yes && test $have_mips_dspase1 = no ; then
+   AC_MSG_ERROR([MIPS DSP ASE Rev 1 not detected])
+fi
+
 dnl ==============================================
 dnl Timers
 
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index ca31301..d832db1 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -123,5 +123,27 @@ libpixman_1_la_LIBADD += libpixman-arm-neon.la
 ASM_CFLAGS_arm_neon=
 endif
 
+# MIPS32R2
+if USE_MIPS32R2
+noinst_LTLIBRARIES += libpixman-mips32r2.la
+libpixman_mips32r2_la_SOURCES = \
+       pixman-mips32r2.c \
+       pixman-mips32r2-asm.S
+libpixman_mips32r2_la_CFLAGS = $(DEP_CFLAGS)
+libpixman_mips32r2_la_LIBADD = $(DEP_LIBS)
+libpixman_1_la_LIBADD += libpixman-mips32r2.la
+endif
+
+# MIPS DSP ASE Rev 1
+if USE_MIPS_DSPASE1
+noinst_LTLIBRARIES += libpixman-mips-dspase1.la
+libpixman_mips_dspase1_la_SOURCES = \
+       pixman-mips-dspase1.c \
+       pixman-mips-dspase1-asm.S
+libpixman_mips_dspase1_la_CFLAGS = $(DEP_CFLAGS)
+libpixman_mips_dspase1_la_LIBADD = $(DEP_LIBS)
+libpixman_1_la_LIBADD += libpixman-mips-dspase1.la
+endif
+
 .c.s : $(libpixmaninclude_HEADERS) $(BUILT_SOURCES)
        $(CC) $(CFLAGS) $(ASM_CFLAGS_$(@:pixman-%.s=%)) 
$(ASM_CFLAGS_$(@:pixman-arm-%.s=arm_%)) -DHAVE_CONFIG_H -I$(srcdir) 
-I$(builddir) -I$(top_builddir) -S -o $@ $<
diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c
index e4fb1e4..9d2f3f3 100644
--- a/pixman/pixman-cpu.c
+++ b/pixman/pixman-cpu.c
@@ -568,29 +568,56 @@ pixman_have_sse2 (void)
 #endif /* __amd64__ */
 #endif
 
+#ifdef USE_MIPS32R2
+/* note: no runtime check for MIPS32R2 support */
+#define pixman_have_mips32r2() TRUE
+#endif
+
+#ifdef USE_MIPS_DSPASE1
+/* note: no runtime check for MIPS DSP ASE Rev 1 support */
+#define pixman_have_mips_dspase1() TRUE
+#endif
+
+
 pixman_implementation_t *
 _pixman_choose_implementation (void)
 {
+    pixman_implementation_t *imp = NULL;
+
 #ifdef USE_SSE2
     if (pixman_have_sse2 ())
-       return _pixman_implementation_create_sse2 ();
+        return _pixman_implementation_create_sse2 ();
 #endif
 #ifdef USE_MMX
     if (pixman_have_mmx ())
-       return _pixman_implementation_create_mmx ();
+        return _pixman_implementation_create_mmx ();
 #endif
 
 #ifdef USE_ARM_NEON
     if (pixman_have_arm_neon ())
-       return _pixman_implementation_create_arm_neon ();
+        return _pixman_implementation_create_arm_neon ();
 #endif
 #ifdef USE_ARM_SIMD
     if (pixman_have_arm_simd ())
-       return _pixman_implementation_create_arm_simd ();
+        return _pixman_implementation_create_arm_simd ();
 #endif
 #ifdef USE_VMX
     if (pixman_have_vmx ())
-       return _pixman_implementation_create_vmx ();
+        return _pixman_implementation_create_vmx ();
+#endif
+
+#if defined (USE_MIPS32R2) && defined (USE_MIPS_DSPASE1)
+    /* DSPASE1 version uses some MIPS32R2 functions; both need to be enabled */
+    if (pixman_have_mips32r2 () && pixman_have_mips_dspase1 ())
+    {
+        imp = _pixman_implementation_create_mips32r2 (NULL);
+        return _pixman_implementation_create_mips_dspase1 (imp);
+    }
+#endif
+
+#ifdef USE_MIPS32R2
+    if (pixman_have_mips32r2 ())
+        return _pixman_implementation_create_mips32r2 (NULL);
 #endif
 
     return _pixman_implementation_create_fast_path ();
diff --git a/pixman/pixman-mips-dspase1-asm.S b/pixman/pixman-mips-dspase1-asm.S
new file mode 100644
index 0000000..53d777d
--- /dev/null
+++ b/pixman/pixman-mips-dspase1-asm.S
@@ -0,0 +1,221 @@
+/*
+ * Copyright 2010, MIPS Technologies
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef _ABIO32
+#error This code works with the MIPS o32 ABI!
+#endif
+
+
+        .text
+        .set            noreorder
+        .set            nomacro
+
+
+/***************************************************************************
+void
+mips_dspase1_combine_over_u_nomask (uint32_t *dest, const uint32_t *src,
+                                    const uint32_t *mask, int width)
+***************************************************************************/
+
+        .global         mips_dspase1_combine_over_u_nomask
+        .ent            mips_dspase1_combine_over_u_nomask
+
+/* note: this version to be used only when mask = NULL */
+
+mips_dspase1_combine_over_u_nomask:
+        beqz            $a3, 1f
+        subu            $v0, $a1, $a0   /* diff = src - dest (for LWX) */
+
+        sll             $a3, $a3, 2     /* width <<= 2 */
+        addu            $a3, $a0, $a3   /* dest_end = dest + width */
+
+        lw              $t0, 0($a0)     /* dest */
+        lwx             $t1, $v0($a0)   /* src (dest + diff) */
+
+        li              $t9, 0x00800080
+
+0:
+        not             $t2, $t1        /* ~src */
+        srl             $t2, $t2, 24    /* ALPHA_8(~src) */
+        ins             $t2, $t2, 16, 8 /* 0:a:0:a; equivalent to replv.ph */
+
+        muleu_s.ph.qbl  $t3, $t0, $t2
+        muleu_s.ph.qbr  $t4, $t0, $t2
+
+        lw              $t0, 4($a0)     /* dest[1] for next loop iteration */
+        addiu           $a0, $a0, 4     /* dest++ */
+
+        addu            $t3, $t3, $t9   /* can't overflow; rev2: addu_s.ph */
+        addu            $t4, $t4, $t9   /* can't overflow; rev2: addu_s.ph */
+        preceu.ph.qbla  $t5, $t3        /* rev2: shrl.ph */
+        preceu.ph.qbla  $t6, $t4        /* rev2: shrl.ph */
+        addu            $t3, $t3, $t5   /* can't overflow; rev2: addu_s.ph */
+        addu            $t4, $t4, $t6   /* can't overflow; rev2: addu_s.ph */
+
+        precrq.qb.ph    $t3, $t3, $t4
+        addu_s.qb       $t3, $t3, $t1
+
+        lwx             $t1, $v0($a0)   /* src (dest + diff) for next loop 
iteration */
+
+        bne             $a0, $a3, 0b
+        sw              $t3, -4($a0)    /* dest */
+
+1:
+        jr              $ra
+        nop
+
+        .end            mips_dspase1_combine_over_u_nomask
+
+
+/***************************************************************************
+void
+mips_dspase1_combine_over_u_mask (uint32_t *dest, const uint32_t *src,
+                                  const uint32_t *mask, int width)
+***************************************************************************/
+
+        .global         mips_dspase1_combine_over_u_mask
+        .ent            mips_dspase1_combine_over_u_mask
+
+/* note: this version to be used only when mask != NULL */
+
+mips_dspase1_combine_over_u_mask:
+        beqz            $a3, 1f
+        subu            $v0, $a1, $a0   /* sdiff = src - dest (for LWX) */
+
+        subu            $v1, $a2, $a0   /* mdiff = mask - dest (for LWX) */
+
+        sll             $a3, $a3, 2     /* width <<= 2 */
+        addu            $a3, $a0, $a3   /* dest_end = dest + width */
+
+        li              $t9, 0x00800080
+
+0:
+        lwx             $t8, $v1($a0)   /* mask (dest + mdiff) */
+        lwx             $t1, $v0($a0)   /* src (dest + sdiff) */
+
+        srl             $t8, $t8, 24    /* mask >>= A_SHIFT */
+        ins             $t8, $t8, 16, 8 /* 0:m:0:m; equivalent to replv.ph */
+
+        muleu_s.ph.qbl  $t3, $t1, $t8
+        muleu_s.ph.qbr  $t4, $t1, $t8
+
+        lw              $t0, 0($a0)     /* dest */
+
+        addu            $t3, $t3, $t9   /* can't overflow; rev2: addu_s.ph */
+        addu            $t4, $t4, $t9   /* can't overflow; rev2: addu_s.ph */
+        preceu.ph.qbla  $t5, $t3        /* rev2: shrl.ph */
+        preceu.ph.qbla  $t6, $t4        /* rev2: shrl.ph */
+        addu            $t3, $t3, $t5   /* can't overflow; rev2: addu_s.ph */
+        addu            $t4, $t4, $t6   /* can't overflow; rev2: addu_s.ph */
+        precrq.qb.ph    $t1, $t3, $t4
+
+        not             $t2, $t1        /* ~src */
+        srl             $t2, $t2, 24    /* ALPHA_8(~src) */
+        ins             $t2, $t2, 16, 8 /* 0:a:0:a; equivalent to replv.ph */
+
+        muleu_s.ph.qbl  $t3, $t0, $t2
+        muleu_s.ph.qbr  $t4, $t0, $t2
+
+        addiu           $a0, $a0, 4     /* dest++ */
+
+        addu            $t3, $t3, $t9   /* can't overflow; rev2: addu_s.ph */
+        addu            $t4, $t4, $t9   /* can't overflow; rev2: addu_s.ph */
+        preceu.ph.qbla  $t5, $t3        /* rev2: shrl.ph */
+        preceu.ph.qbla  $t6, $t4        /* rev2: shrl.ph */
+        addu            $t3, $t3, $t5   /* can't overflow; rev2: addu_s.ph */
+        addu            $t4, $t4, $t6   /* can't overflow; rev2: addu_s.ph */
+        precrq.qb.ph    $t3, $t3, $t4
+        addu_s.qb       $t3, $t3, $t1
+
+        bne             $a0, $a3, 0b
+        sw              $t3, -4($a0)    /* dest */
+
+1:
+        jr              $ra
+        nop
+
+        .end            mips_dspase1_combine_over_u_mask
+
+
+/***************************************************************************
+void
+mips_dspase1_composite_over_n_8_8888_inner (uint32_t *dest, const uint32_t src,
+                                            const uint8_t *mask, int width)
+***************************************************************************/
+
+        .global         mips_dspase1_composite_over_n_8_8888_inner
+        .ent            mips_dspase1_composite_over_n_8_8888_inner
+
+mips_dspase1_composite_over_n_8_8888_inner:
+        beqz            $a3, 1f
+        sll             $a3, $a3, 2     /* width <<= 2 */
+
+        addu            $a3, $a0, $a3   /* dest_end = dest + width */
+
+        li              $t9, 0x00800080
+
+0:
+        lbu             $t8, 0($a2)     /* mask */
+        lw              $t0, 0($a0)     /* dest */
+        ins             $t8, $t8, 16, 8 /* 0:m:0:m; equivalent to replv.ph */
+
+        muleu_s.ph.qbl  $t3, $a1, $t8
+        muleu_s.ph.qbr  $t4, $a1, $t8
+
+        addiu           $a0, $a0, 4     /* dest++ */
+        addiu           $a2, $a2, 1     /* mask++ */
+
+        addu            $t3, $t3, $t9   /* can't overflow; rev2: addu_s.ph */
+        addu            $t4, $t4, $t9   /* can't overflow; rev2: addu_s.ph */
+        preceu.ph.qbla  $t5, $t3        /* rev2: shrl.ph */
+        preceu.ph.qbla  $t6, $t4        /* rev2: shrl.ph */
+        addu            $t3, $t3, $t5   /* can't overflow; rev2: addu_s.ph */
+        addu            $t4, $t4, $t6   /* can't overflow; rev2: addu_s.ph */
+        precrq.qb.ph    $t1, $t3, $t4   /* in(src,m) */
+
+        not             $t2, $t1        /* ~in(src,m) */
+        srl             $t2, $t2, 24
+        ins             $t2, $t2, 16, 8 /* 0:a:0:a; equivalent to replv.ph */
+
+        muleu_s.ph.qbl  $t3, $t0, $t2
+        muleu_s.ph.qbr  $t4, $t0, $t2
+
+        addu            $t3, $t3, $t9   /* can't overflow; rev2: addu_s.ph */
+        addu            $t4, $t4, $t9   /* can't overflow; rev2: addu_s.ph */
+        preceu.ph.qbla  $t5, $t3        /* rev2: shrl.ph */
+        preceu.ph.qbla  $t6, $t4        /* rev2: shrl.ph */
+        addu            $t3, $t3, $t5   /* can't overflow; rev2: addu_s.ph */
+        addu            $t4, $t4, $t6   /* can't overflow; rev2: addu_s.ph */
+        precrq.qb.ph    $t3, $t3, $t4
+        addu_s.qb       $t3, $t3, $t1   /* over(in(src,m),dest) */
+
+        bne             $a0, $a3, 0b
+        sw              $t3, -4($a0)    /* dest */
+
+1:
+        jr              $ra
+        nop
+
+        .end            mips_dspase1_composite_over_n_8_8888_inner
+
diff --git a/pixman/pixman-mips-dspase1.c b/pixman/pixman-mips-dspase1.c
new file mode 100644
index 0000000..7de4052
--- /dev/null
+++ b/pixman/pixman-mips-dspase1.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright 2010, MIPS Technologies
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "pixman-private.h"
+
+
+/* assembly-language functions */
+
+void
+mips_dspase1_combine_over_u_nomask (uint32_t *dest, const uint32_t *src,
+                                    const uint32_t *mask, int width);
+
+void
+mips_dspase1_combine_over_u_mask (uint32_t *dest, const uint32_t *src,
+                                  const uint32_t *mask, int width);
+
+void
+mips_dspase1_composite_over_n_8_8888_inner (uint32_t *dest, uint32_t src,
+                                            const uint8_t *mask, int width);
+
+
+/***************************************************************************/
+
+
+static void
+mips_dspase1_combine_over_u (pixman_implementation_t *imp,
+                             pixman_op_t              op,
+                             uint32_t *               dest,
+                             const uint32_t *         src,
+                             const uint32_t *         mask, 
+                             int                      width)
+{
+    if (mask)
+        mips_dspase1_combine_over_u_mask (dest, src, mask, width);
+    else
+        mips_dspase1_combine_over_u_nomask (dest, src, mask, width);
+}
+
+
+static void
+mips_dspase1_fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
+                                           pixman_op_t              op,
+                                           pixman_image_t *         src_image,
+                                           pixman_image_t *         mask_image,
+                                           pixman_image_t *         dst_image,
+                                           int32_t                  src_x,
+                                           int32_t                  src_y,
+                                           int32_t                  mask_x,
+                                           int32_t                  mask_y,
+                                           int32_t                  dest_x,
+                                           int32_t                  dest_y,
+                                           int32_t                  width,
+                                           int32_t                  height)
+{
+    uint32_t src, srca;
+    uint32_t *dst_line, *dst;
+    uint8_t  *mask_line, *mask;
+    int dst_stride, mask_stride;
+
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+
+    srca = src >> 24;
+    if (src == 0)
+        return;
+
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, 
dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, 
mask_line, 1);
+
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+
+        mips_dspase1_composite_over_n_8_8888_inner (dst, src, mask, width);
+    }
+}
+
+
+/***************************************************************************/
+
+
+static const pixman_fast_path_t mips_dspase1_fast_paths[] =
+{
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, 
mips_dspase1_fast_composite_over_n_8_8888),
+    { PIXMAN_OP_NONE }
+};
+
+
+pixman_implementation_t *
+_pixman_implementation_create_mips_dspase1 (pixman_implementation_t *delegate)
+{
+    pixman_implementation_t *imp;
+
+    if (delegate == NULL)
+        delegate = _pixman_implementation_create_fast_path ();
+
+    imp = _pixman_implementation_create (delegate, mips_dspase1_fast_paths);
+    imp->combine_32[PIXMAN_OP_OVER] = mips_dspase1_combine_over_u;
+
+    return imp;
+}
diff --git a/pixman/pixman-mips32r2-asm.S b/pixman/pixman-mips32r2-asm.S
new file mode 100644
index 0000000..02428a4
--- /dev/null
+++ b/pixman/pixman-mips32r2-asm.S
@@ -0,0 +1,209 @@
+/*
+ * Copyright 2010, MIPS Technologies
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef _ABIO32
+#error This code works with the MIPS o32 ABI!
+#endif
+
+
+        .text
+        .set            noreorder
+        .set            nomacro
+
+/***************************************************************************
+pixman_bool_t
+mips32r2_pixman_fill32 (uint32_t *bits, int stride, int x, int y,
+                        int width, int height, uint32_t  xor)
+***************************************************************************/
+
+        .global         mips32r2_pixman_fill32
+        .ent            mips32r2_pixman_fill32
+
+mips32r2_pixman_fill32:
+        mul             $a3, $a1, $a3
+        addu            $a3, $a3, $a2
+        sll             $a3, $a3, 2
+        addu            $a0, $a0, $a3   /* bits = bits + y * stride + x */
+
+        lw              $a2, 16($sp)    /* width */
+        lw              $a3, 20($sp)    /* height */
+        lw              $v0, 24($sp)    /* xor */
+
+        li              $t0, ~7
+        beqz            $a3, 5f         /* exit if height = 0 */
+        and             $t0, $a2, $t0   /* width8 = width & ~7 */
+
+        sll             $a1, $a1, 2     /* stride <<= 2 */
+        sll             $t0, $t0, 2     /* width8 <<= 2 */
+        sll             $a2, $a2, 2     /* width <<= 2 */
+
+0:
+        move            $t1, $a0        /* b = bits */
+        addu            $t2, $t1, $t0   /* b + width8 */
+
+        beq             $t1, $t2, 2f    /* skip unrolled loop if not enough 
samples */
+        addu            $t3, $t1, $a2   /* b + width */
+
+1:
+        sw              $v0, 0($t1)
+        sw              $v0, 4($t1)
+        sw              $v0, 8($t1)
+        sw              $v0,12($t1)
+        sw              $v0,16($t1)
+        sw              $v0,20($t1)
+        sw              $v0,24($t1)
+
+        addiu           $t1, $t1, 32    /* b += 8 */
+        bne             $t1, $t2, 1b    /* b = (bits + width8)? */
+        sw              $v0, -4($t1)
+
+2:
+        beq             $t1, $t3, 4f    /* skip single-sample loop if all work 
done */
+        addiu           $a3, $a3, -1    /* height-- */
+
+3:
+        addiu           $t1, $t1, 4
+        bne             $t1, $t3, 3b    /* b = (bits + width)? */
+        sw              $v0, -4($t1)
+
+4:
+        bnez            $a3, 0b
+        addu            $a0, $a0, $a1   /* bits += stride */
+
+5:
+        jr              $ra
+        li              $v0, 1
+
+        .end            mips32r2_pixman_fill32
+
+
+/***************************************************************************
+void
+mips32r2_composite_over_n_8_8888_inner (uint32_t *dest, const uint32_t src,
+                                        const uint8_t *mask, int width)
+***************************************************************************/
+
+        .global         mips32r2_composite_over_n_8_8888_inner
+        .ent            mips32r2_composite_over_n_8_8888_inner
+
+mips32r2_composite_over_n_8_8888_inner:
+        beqz            $a3, 1f
+        sll             $a3, $a3, 2     /* width <<= 2 */
+
+        addu            $a3, $a0, $a3   /* dest_end = dest + width */
+
+        li              $t7, 0x01000100
+        li              $t8, 0x00FF00FF /* RB_MASK */
+        li              $t9, 0x00800080
+
+0:
+        lbu             $t2, 0($a2)     /* mask */
+
+        /* in() */
+
+        and             $t5, $a1, $t8
+        mul             $t3, $t5, $t2
+
+        lw              $t0, 0($a0)     /* dest */
+        addiu           $a2, $a2, 1     /* mask++ */
+
+        srl             $t6, $a1, 8
+        and             $t6, $t6, $t8
+        mul             $t4, $t6, $t2
+
+        addu            $t3, $t3, $t9
+        srl             $t5, $t3, 8
+        and             $t5, $t5, $t8
+        addu            $t3, $t3, $t5
+        srl             $t3, $t3, 8
+        and             $t3, $t3, $t8
+
+        addu            $t4, $t4, $t9
+        srl             $t6, $t4, 8
+        and             $t6, $t6, $t8
+        addu            $t4, $t4, $t6
+        srl             $t4, $t4, 8
+        and             $t4, $t4, $t8
+
+        sll             $t4, $t4, 8
+        or              $t1, $t3, $t4
+
+
+        not             $t2, $t1        /* ~in() */
+        srl             $t2, $t2, 24
+
+        /* over(): UN8_rb_MUL_UN8() and UN8_rb_ADD_UN8_rb() */
+
+        and             $t5, $t0, $t8
+        mul             $t3, $t5, $t2
+
+        addiu           $a0, $a0, 4     /* dest++ */
+
+        srl             $t6, $t0, 8
+        and             $t6, $t6, $t8
+        mul             $t4, $t6, $t2
+
+        addu            $t3, $t3, $t9
+        srl             $t5, $t3, 8
+        and             $t5, $t5, $t8
+        addu            $t3, $t3, $t5
+        srl             $t3, $t3, 8
+        and             $t3, $t3, $t8
+
+        and             $t5, $t1, $t8
+        addu            $t3, $t3, $t5
+        srl             $t5, $t3, 8
+        and             $t5, $t5, $t8
+        subu            $t5, $t7, $t5
+        or              $t3, $t3, $t5
+        and             $t3, $t3, $t8
+
+        addu            $t4, $t4, $t9
+        srl             $t6, $t4, 8
+        and             $t6, $t6, $t8
+        addu            $t4, $t4, $t6
+        srl             $t4, $t4, 8
+        and             $t4, $t4, $t8
+
+        srl             $t6, $t1, 8
+        and             $t6, $t6, $t8
+        addu            $t4, $t4, $t6
+        srl             $t6, $t4, 8
+        and             $t6, $t6, $t8
+        subu            $t6, $t7, $t6
+        or              $t4, $t4, $t6
+        and             $t4, $t4, $t8
+
+        sll             $t4, $t4, 8
+        or              $t3, $t3, $t4
+
+        bne             $a0, $a3, 0b
+        sw              $t3, -4($a0)    /* dest */
+
+1:
+        jr              $ra
+        nop
+
+        .end            mips32r2_composite_over_n_8_8888_inner
+
diff --git a/pixman/pixman-mips32r2.c b/pixman/pixman-mips32r2.c
new file mode 100644
index 0000000..4db293e
--- /dev/null
+++ b/pixman/pixman-mips32r2.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright 2010, MIPS Technologies
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "pixman-private.h"
+
+
+/* assembly-language functions */
+
+pixman_bool_t
+mips32r2_pixman_fill32 (uint32_t *bits, int stride, int x, int y,
+                        int width, int height, uint32_t  xor);
+
+void
+mips32r2_composite_over_n_8_8888_inner (uint32_t *dest, uint32_t src,
+                                        const uint8_t *mask, int width);
+
+
+/***************************************************************************/
+
+
+static pixman_bool_t
+mips32r2_fill (pixman_implementation_t *imp,
+               uint32_t *               bits,
+               int                      stride,
+               int                      bpp,
+               int                      x,
+               int                      y,
+               int                      width,
+               int                      height,
+               uint32_t                 xor)
+{
+    pixman_bool_t b;
+
+    switch (bpp)
+    {
+    case 32:
+        b = mips32r2_pixman_fill32 (bits, stride, x, y, width, height, xor);
+        break;
+
+    default:
+        b = _pixman_implementation_fill (imp->delegate, bits, stride, bpp,
+                                         x, y, width, height, xor);
+        break;
+    }
+
+    return b;
+}
+
+
+static void
+mips32r2_fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
+                                       pixman_op_t              op,
+                                       pixman_image_t *         src_image,
+                                       pixman_image_t *         mask_image,
+                                       pixman_image_t *         dst_image,
+                                       int32_t                  src_x,
+                                       int32_t                  src_y,
+                                       int32_t                  mask_x,
+                                       int32_t                  mask_y,
+                                       int32_t                  dest_x,
+                                       int32_t                  dest_y,
+                                       int32_t                  width,
+                                       int32_t                  height)
+{
+    uint32_t src, srca;
+    uint32_t *dst_line, *dst;
+    uint8_t  *mask_line, *mask;
+    int dst_stride, mask_stride;
+
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+
+    srca = src >> 24;
+    if (src == 0)
+        return;
+
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, 
dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, 
mask_line, 1);
+
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+
+        mips32r2_composite_over_n_8_8888_inner (dst, src, mask, width);
+    }
+}
+
+
+/***************************************************************************/
+
+
+static const pixman_fast_path_t mips32r2_fast_paths[] =
+{
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, 
mips32r2_fast_composite_over_n_8_8888),
+    { PIXMAN_OP_NONE }
+};
+
+
+pixman_implementation_t *
+_pixman_implementation_create_mips32r2 (pixman_implementation_t *delegate)
+{
+    pixman_implementation_t *imp;
+
+    if (delegate == NULL)
+        delegate = _pixman_implementation_create_fast_path ();
+
+    imp = _pixman_implementation_create (delegate, mips32r2_fast_paths);
+    imp->fill = mips32r2_fill;
+
+    return imp;
+}
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 383748a..1895f1d 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -518,6 +518,17 @@ pixman_implementation_t *
 _pixman_implementation_create_vmx (void);
 #endif
 
+#ifdef USE_MIPS32R2
+pixman_implementation_t *
+_pixman_implementation_create_mips32r2 (pixman_implementation_t *delegate);
+#endif
+
+#ifdef USE_MIPS_DSPASE1
+pixman_implementation_t *
+_pixman_implementation_create_mips_dspase1 (pixman_implementation_t *delegate);
+#endif
+
+
 pixman_implementation_t *
 _pixman_choose_implementation (void);
 
-- 
1.7.1


_______________________________________________
Pixman mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/pixman

Reply via email to