From: Søren Sandmann Pedersen <s...@redhat.com>

New output of lowlevel-blt-bench over_x888_8_0565:

over_x888_8_0565 =  L1:  55.68  L2:  55.11  M: 52.83 ( 19.04%)  HT: 39.62  VT: 
37.70  R: 30.88  RT: 14.62 ( 174Kops/s)

The fetcher is looked up in a table, so that other fetchers can easily
be added.
---
 pixman/pixman-private.h |   18 ++++++---
 pixman/pixman-sse2.c    |   90 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+), 6 deletions(-)

diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 664260b..f5d0ba1 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -183,6 +183,9 @@ union pixman_image
 };
 
 typedef struct pixman_iter_t pixman_iter_t;
+typedef uint32_t *(* pixman_iter_get_scanline_t) (pixman_iter_t *iter, const 
uint32_t *mask);
+typedef void      (* pixman_iter_write_back_t)   (pixman_iter_t *iter);
+
 typedef enum
 {
     ITER_NARROW =              (1 << 0),
@@ -209,13 +212,16 @@ typedef enum
 
 struct pixman_iter_t
 {
-    uint32_t *(* get_scanline) (pixman_iter_t *iter, const uint32_t *mask);
-    void      (* write_back)   (pixman_iter_t *iter);
+    pixman_iter_get_scanline_t get_scanline;
+    pixman_iter_write_back_t   write_back;
+
+    pixman_image_t *           image;
+    uint32_t *                 buffer;
+    int                                x, y;
+    int                                width;
 
-    pixman_image_t *    image;
-    uint32_t *          buffer;
-    int                 x, y;
-    int                 width;
+    uint8_t *                  bits;
+    int                                stride;
 };
 
 void
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index ae55456..10a3dd0 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -5953,6 +5953,94 @@ sse2_fill (pixman_implementation_t *imp,
     return TRUE;
 }
 
+static uint32_t *
+sse2_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
+{
+    int w = iter->width;
+    __m128i ff000000 = mask_ff000000;
+    uint32_t *dst = iter->buffer;
+    uint32_t *src = (uint32_t *)iter->bits;
+
+    iter->bits += iter->stride;
+
+    while (w && ((unsigned long)dst) & 0x0f)
+    {
+       *dst++ = (*src++) | 0xff000000;
+       w--;
+    }
+
+    while (w >= 4)
+    {
+       save_128_aligned (
+           (__m128i *)dst, _mm_or_si128 (
+               load_128_unaligned ((__m128i *)src), ff000000));
+
+       dst += 4;
+       src += 4;
+       w -= 4;
+    }
+
+    while (w)
+    {
+       *dst++ = (*src++) | 0xff000000;
+       w--;
+    }
+
+    return iter->buffer;
+}
+
+typedef struct
+{
+    pixman_format_code_t       format;
+    pixman_iter_get_scanline_t get_scanline;
+} fetcher_info_t;
+
+static const fetcher_info_t fetchers[] =
+{
+    { PIXMAN_x8r8g8b8, sse2_fetch_x8r8g8b8 },
+    { PIXMAN_null }
+};
+
+static void
+sse2_src_iter_init (pixman_implementation_t *imp,
+                   pixman_iter_t *iter,
+                   pixman_image_t *image,
+                   int x, int y, int width, int height,
+                   uint8_t *buffer, iter_flags_t flags)
+{
+#define FLAGS                                                          \
+    (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM)
+
+    if ((flags & ITER_NARROW)                          &&
+       (image->common.flags & FLAGS) == FLAGS          &&
+       x >= 0 && y >= 0                                &&
+       x + width <= image->bits.width                  &&
+       y + height <= image->bits.height)
+    {
+       const fetcher_info_t *f;
+
+       for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
+       {
+           if (image->common.extended_format_code == f->format)
+           {
+               uint8_t *b = (uint8_t *)image->bits.bits;
+               int s = image->bits.rowstride * 4;
+
+               iter->bits = b + s * y + x * PIXMAN_FORMAT_BPP (f->format) / 8;
+               iter->stride = s;
+               iter->width = width;
+               iter->buffer = (uint32_t *)buffer;
+
+               iter->get_scanline = f->get_scanline;
+               return;
+           }
+       }
+    }
+
+    _pixman_implementation_src_iter_init (
+       imp->delegate, iter, image, x, y, width, height, buffer, flags);
+}
+
 #if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
 __attribute__((__force_align_arg_pointer__))
 #endif
@@ -6020,6 +6108,8 @@ _pixman_implementation_create_sse2 
(pixman_implementation_t *fallback)
     imp->blt = sse2_blt;
     imp->fill = sse2_fill;
 
+    imp->src_iter_init = sse2_src_iter_init;
+
     return imp;
 }
 
-- 
1.7.3.1

_______________________________________________
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman

Reply via email to