>From 8e8b8b754d1d222e074138ba2d8c2bf617dd1a21 Mon Sep 17 00:00:00 2001
From: kui zheng <kui.zh...@arm.com>
Date: Wed, 16 Nov 2011 17:18:42 +0800
Subject: [PATCH] generic: Add NEON version of Dacc_modulate_rgb


Signed-off-by: kui zheng <kui.zh...@arm.com>
---
src/gfx/generic/generic.c      |    4 +-
src/gfx/generic/generic_neon.h |   81 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 84 insertions(+), 1 deletions(-)

diff --git a/src/gfx/generic/generic.c b/src/gfx/generic/generic.c
index a697155..cb35672 100644
--- a/src/gfx/generic/generic.c
+++ b/src/gfx/generic/generic.c
@@ -9672,7 +9672,9 @@ static void gInit_NEON( void )
      Dacc_modulation[DSBLIT_BLEND_ALPHACHANNEL |
                      DSBLIT_BLEND_COLORALPHA |
                      DSBLIT_COLORIZE] = Dacc_modulate_argb_NEON;
-
+     Dacc_modulation[DSBLIT_COLORIZE] = Dacc_modulate_rgb_NEON;
+     Dacc_modulation[DSBLIT_COLORIZE |
+                     DSBLIT_BLEND_ALPHACHANNEL] = Dacc_modulate_rgb_NEON;
}
 #endif
diff --git a/src/gfx/generic/generic_neon.h b/src/gfx/generic/generic_neon.h
index 79088e6..066d32a 100644
--- a/src/gfx/generic/generic_neon.h
+++ b/src/gfx/generic/generic_neon.h
@@ -977,3 +977,84 @@ static void Dacc_modulate_argb_NEON( GenefxState *gfxs )
      }
 }
+
+/*
+ * NEON version of Dacc_modulate_rgb_NEON.
+ */
+static void Dacc_modulate_rgb_NEON( GenefxState *gfxs )
+{
+     int                w = gfxs->length;
+     GenefxAccumulator *D = gfxs->Dacc;
+     GenefxAccumulator  Cacc = gfxs->Cacc;
+     unsigned int       loop = w >> 3;
+     unsigned int       single = w & 0x7;
+     u16                maska = 0xF000;
+
+     while (single){
+          if (!(D->RGB.a & 0xF000)) {
+               D->RGB.r = (Cacc.RGB.r * D->RGB.r) >> 8;
+               D->RGB.g = (Cacc.RGB.g * D->RGB.g) >> 8;
+               D->RGB.b = (Cacc.RGB.b * D->RGB.b) >> 8;
+          }
+          D++;
+          single--;
+     }
+
+     if (loop) {
+          __asm__ __volatile__ (
+               "mov             r4, %[D]        \n\t"
+               "mov             r5, %[D]        \n\t"
+               "vdup.16         q2, %[Cacc_r]   \n\t"
+               "vdup.16         q1, %[Cacc_g]   \n\t"
+               "vdup.16         q0, %[Cacc_b]   \n\t"
+               "vdup.16         q8, %[maska]    \n\t"
+               "1:                              \n\t"
+               "pld             [r4, #0xC0]     \n\t"
+               "pld             [r4, #0x100]     \n\t"
+               /* vload q4:b, q5:g, q6:r, q7:a */
+               "vld4.16         {d8, d10, d12, d14}, [r4]! \n\t"
+               "vld4.16         {d9, d11, d13, d15}, [r4]! \n\t"
+               "vand            q9, q7, q8      \n\t"
+               "vceq.i16        q9, q9, #0      \n\t"
+               /* b:q10  q0, q4 */
+               "vmull.u16       q3, d8, d0      \n\t"
+               "vshrn.i32       d20, q3, #8             \n\t"
+               "vmull.u16       q3, d9, d1      \n\t"
+               "vshrn.i32       d21, q3, #8             \n\t"
+               /* g:q11, q1, q5 */
+               "vmull.u16       q3, d10, d2        \n\t"
+               "vshrn.i32       d22, q3, #8             \n\t"
+               "vmull.u16       q3, d11, d3            \n\t"
+               "vshrn.i32       d23, q3, #8             \n\t"
+               /* r:q12, q2, q6 */
+               "vmull.u16       q3, d12, d4            \n\t"
+               "vshrn.i32       d24, q3, #8             \n\t"
+               "vmull.u16       q3, d13, d5            \n\t"
+               "vshrn.i32       d25, q3, #8             \n\t"
+               /* if (!(D->RGB.a & 0xF000)) */
+               "vand            q10, q9, q10    \n\t"
+               "vand            q11, q9, q11    \n\t"
+               "vand            q12, q9, q12    \n\t"
+               /* if ((D->RGB.a & 0xF000)) */
+               "vceq.i16        q9, q9, #0      \n\t"
+               "vand            q4, q9, q4      \n\t"
+               "vand            q5, q9, q5      \n\t"
+               "vand            q6, q9, q6      \n\t"
+               /* Dacc: q4(b), q5(g), q6(r), q7(a) */
+               "vorr            q4, q4, q10     \n\t"
+               "vorr            q5, q5, q11     \n\t"
+               "vorr            q6, q6, q12     \n\t"
+               "vst4.16         {d8, d10, d12, d14}, [r5]! \n\t"
+               "vst4.16         {d9, d11, d13, d15}, [r5]! \n\t"
+               "subs            %[loop], %[loop], #1       \n\t"
+               "bne             1b                  "
+               :
+               : [Cacc_r] "r" (Cacc.RGB.r), [Cacc_g] "r" (Cacc.RGB.g),
+                [Cacc_b] "r" (Cacc.RGB.b), [D] "r" (D), [maska] "r" (maska),
+                [loop] "r" (loop)
+               : "memory", "r4", "r5", "d0", "d1", "d2", "d3", "d4", "d5", 
"d6",
+                "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", 
"d16",
+                "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25"
+          );
+     }
+}
--
1.7.1


-- IMPORTANT NOTICE: The contents of this email and any attachments are 
confidential and may also be privileged. If you are not the intended recipient, 
please notify the sender immediately and do not disclose the contents to any 
other person, use it for any purpose, or store or copy the information in any 
medium. Thank you.

Attachment: 0011-generic-Add-NEON-version-of-Dacc_modulate_rgb.patch
Description: 0011-generic-Add-NEON-version-of-Dacc_modulate_rgb.patch

_______________________________________________
directfb-dev mailing list
directfb-dev@directfb.org
http://mail.directfb.org/cgi-bin/mailman/listinfo/directfb-dev

Reply via email to