pixman: Changes to 'upstream-experimental'

Julien Cristau Tue, 21 Jul 2009 07:58:24 -0700

 configure.ac                      |   30 
 pixman/Makefile.am                |   10 
 pixman/Makefile.win32             |   57 
 pixman/make-combine.pl            |    5 
 pixman/pixman-access.c            | 2604 ++++++++-----
 pixman/pixman-accessor.h          |    4 
 pixman/pixman-arm-neon.c          | 4256 ++++++++++++---------
 pixman/pixman-arm-neon.h          |  179 
 pixman/pixman-arm-simd.c          |  564 +-
 pixman/pixman-arm-simd.h          |  106 
 pixman/pixman-bits-image.c        |  473 +-
 pixman/pixman-combine.c.template  | 2661 +++++++------
 pixman/pixman-combine.h.template  |  345 -
 pixman/pixman-compiler.h          |   12 
 pixman/pixman-conical-gradient.c  |  144 
 pixman/pixman-cpu.c               |  443 +-
 pixman/pixman-edge-imp.h          |   58 
 pixman/pixman-edge.c              |  369 -
 pixman/pixman-fast-path.c         | 1390 +++----
 pixman/pixman-general.c           |  273 -
 pixman/pixman-gradient-walker.c   |  184 
 pixman/pixman-image.c             |  226 -
 pixman/pixman-implementation.c    |  295 -
 pixman/pixman-linear-gradient.c   |  189 
 pixman/pixman-matrix.c            | 1010 ++---
 pixman/pixman-mmx.c               | 2915 +++++++-------
 pixman/pixman-private.h           |  822 ++--
 pixman/pixman-radial-gradient.c   |  171 
 pixman/pixman-region.c            | 2499 +++++++-----
 pixman/pixman-region16.c          |   10 
 pixman/pixman-solid-fill.c        |   38 
 pixman/pixman-sse2.c              | 7497 ++++++++++++++++++++------------------
 pixman/pixman-timer.c             |   12 
 pixman/pixman-trap.c              |  224 -
 pixman/pixman-utils.c             |  714 +--
 pixman/pixman-vmx.c               | 2361 ++++++-----
 pixman/pixman-x64-mmx-emulation.h |  263 +
 pixman/pixman.c                   |  289 -
 pixman/pixman.h                   |  483 +-
 test/Makefile.am                  |    6 
 test/composite-test.c             |  113 
 test/oob-test.c                   |   35 
 test/region-test.c                |   63 
 test/scaling-test.c               |  599 +--
 test/window-test.c                |  173 
 45 files changed, 19661 insertions(+), 15513 deletions(-)


New commits:
commit f3ac1368775542e09f3741d2ad7b72af20bd9663
Author: Søren Sandmann Pedersen <[email protected]>
Date:   Tue Jul 21 07:20:57 2009 -0400

    Pre-release version bump

diff --git a/configure.ac b/configure.ac
index 5a49a6c..7d825b7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -54,7 +54,7 @@ AC_PREREQ([2.57])
 
 m4_define([pixman_major], 0)
 m4_define([pixman_minor], 15)
-m4_define([pixman_micro], 17)
+m4_define([pixman_micro], 18)
 
 m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
 

commit 7c56911e3b5b97b26dceff9b68d9fed32693d57b
Author: Søren Sandmann Pedersen <[email protected]>
Date:   Tue Jul 21 07:01:10 2009 -0400

    Don't assert when malformed regions are detected.
    
    Instead print a message to stderr so that it will end up in the X log
    file.

diff --git a/pixman/pixman-region.c b/pixman/pixman-region.c
index 7820b94..387dbba 100644
--- a/pixman/pixman-region.c
+++ b/pixman/pixman-region.c
@@ -64,7 +64,43 @@
 #define PIXREGION_END(reg) PIXREGION_BOX (reg, (reg)->data->numRects - 1)
 
 #define GOOD_RECT(rect) ((rect)->x1 < (rect)->x2 && (rect)->y1 < (rect)->y2)
-#define GOOD(reg) assert (PREFIX (_selfcheck) (reg))
+
+#define PIXMAN_REGION_LOG_FAILURES
+
+#if defined PIXMAN_REGION_DEBUG
+
+#    define GOOD(reg) assert (PREFIX (_selfcheck) (reg))
+
+#elif defined PIXMAN_REGION_LOG_FAILURES
+
+static void
+log_region_error (void)
+{
+    static int n_messages = 0;
+
+    if (n_messages < 50)
+    {
+       fprintf (stderr,
+                "*** BUG ***\n"
+                "Malformed region detected\n"
+                "Set a breakpoint on 'log_region_error' to debug\n\n");
+
+       n_messages++;
+    }
+}
+
+#define GOOD(reg)                                                      \
+    do                                                                 \
+    {                                                                  \
+       if (!PREFIX (_selfcheck (reg)))                                 \
+           log_region_error ();                                        \
+    } while (0)
+
+#else
+
+#define GOOD(reg)
+
+#endif
 
 static const box_type_t PREFIX (_empty_box_) = { 0, 0, 0, 0 };
 static const region_data_type_t PREFIX (_empty_data_) = { 0, 0 };
@@ -467,7 +503,7 @@ PREFIX (_copy) (region_type_t *dst, region_type_t *src)
 {
     GOOD (dst);
     GOOD (src);
-    
+
     if (dst == src)
        return TRUE;
     

commit f9660ce29ed072c6cbaec711c5d18b9f0ba113ae
Author: Søren Sandmann Pedersen <[email protected]>
Date:   Tue Jul 21 04:23:56 2009 -0400

    Fix another search and replace issue

diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 0f78584..14f7482 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -1898,8 +1898,8 @@ pixman_fill_neon (uint32_t *bits,
 #define NEON_SCANLINE_BUFFER_PIXELS (1024)
 
 static inline void
-neon_quadword_copy ((void *)    dst,
-                   (void *)    src,
+neon_quadword_copy (void *   dst,
+                   void *   src,
                    uint32_t count,         /* of quadwords */
                    uint32_t trailer_count  /* of bytes */)
 {

commit b3196b63274134a594fc091ec2f8be3b44734411
Author: Søren Sandmann Pedersen <[email protected]>
Date:   Tue Jul 21 04:18:35 2009 -0400

    Fix search-and-replace issue pointed out by Koen Kooi.

diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 2f7b8a0..0f78584 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -169,8 +169,8 @@ neon_composite_add_8000_8000 (pixman_implementation_t * 
impl,
            w = width;
 
 #ifndef USE_GCC_INLINE_ASM
-           sval = vld1_u8 (((void *))src);
-           dval = vld1_u8 (((void *))dst);
+           sval = vld1_u8 ((void *)src);
+           dval = vld1_u8 ((void *)dst);
            keep_dst = dst;
 
            temp = vqadd_u8 (dval, sval);
@@ -181,10 +181,10 @@ neon_composite_add_8000_8000 (pixman_implementation_t * 
impl,
 
            while (w)
            {
-               sval = vld1_u8 (((void *))src);
-               dval = vld1_u8 (((void *))dst);
+               sval = vld1_u8 ((void *)src);
+               dval = vld1_u8 ((void *)dst);
 
-               vst1_u8 (((void *))keep_dst, temp);
+               vst1_u8 ((void *)keep_dst, temp);
                keep_dst = dst;
 
                temp = vqadd_u8 (dval, sval);
@@ -194,7 +194,7 @@ neon_composite_add_8000_8000 (pixman_implementation_t * 
impl,
                w -= 8;
            }
 
-           vst1_u8 (((void *))keep_dst, temp);
+           vst1_u8 ((void *)keep_dst, temp);
 #else
            asm volatile (
 /* avoid using d8-d15 (q4-q7) aapcs callee-save registers */
@@ -249,9 +249,9 @@ neon_composite_add_8000_8000 (pixman_implementation_t * 
impl,
            if (w & 4)
            {
                sval = vreinterpret_u8_u32 (
-                   vld1_lane_u32 (((void *))src, vreinterpret_u32_u8 (sval), 
1));
+                   vld1_lane_u32 ((void *)src, vreinterpret_u32_u8 (sval), 1));
                dval = vreinterpret_u8_u32 (
-                   vld1_lane_u32 (((void *))dst, vreinterpret_u32_u8 (dval), 
1));
+                   vld1_lane_u32 ((void *)dst, vreinterpret_u32_u8 (dval), 1));
 
                dst4 = dst;
                src += 4;
@@ -261,9 +261,9 @@ neon_composite_add_8000_8000 (pixman_implementation_t * 
impl,
            if (w & 2)
            {
                sval = vreinterpret_u8_u16 (
-                   vld1_lane_u16 (((void *))src, vreinterpret_u16_u8 (sval), 
1));
+                   vld1_lane_u16 ((void *)src, vreinterpret_u16_u8 (sval), 1));
                dval = vreinterpret_u8_u16 (
-                   vld1_lane_u16 (((void *))dst, vreinterpret_u16_u8 (dval), 
1));
+                   vld1_lane_u16 ((void *)dst, vreinterpret_u16_u8 (dval), 1));
 
                dst2 = dst;
                src += 2;
@@ -282,10 +282,10 @@ neon_composite_add_8000_8000 (pixman_implementation_t * 
impl,
                vst1_lane_u8 (dst, dval, 1);
 
            if (w & 2)
-               vst1_lane_u16 (((void *))dst2, vreinterpret_u16_u8 (dval), 1);
+               vst1_lane_u16 ((void *)dst2, vreinterpret_u16_u8 (dval), 1);
 
            if (w & 4)
-               vst1_lane_u32 (((void *))dst4, vreinterpret_u32_u8 (dval), 1);
+               vst1_lane_u32 ((void *)dst4, vreinterpret_u32_u8 (dval), 1);
        }
     }
 }
@@ -328,8 +328,8 @@ neon_composite_over_8888_8888 (pixman_implementation_t * 
impl,
            w = width;
 
 #ifndef USE_GCC_INLINE_ASM
-           sval = vld4_u8 (((void *))src);
-           dval = vld4_u8 (((void *))dst);
+           sval = vld4_u8 ((void *)src);
+           dval = vld4_u8 ((void *)dst);
            keep_dst = dst;
 
            temp = neon8mul (dval, vmvn_u8 (sval.val[3]));
@@ -341,10 +341,10 @@ neon_composite_over_8888_8888 (pixman_implementation_t * 
impl,
 
            while (w)
            {
-               sval = vld4_u8 (((void *))src);
-               dval = vld4_u8 (((void *))dst);
+               sval = vld4_u8 ((void *)src);
+               dval = vld4_u8 ((void *)dst);
 
-               vst4_u8 (((void *))keep_dst, temp);
+               vst4_u8 ((void *)keep_dst, temp);
                keep_dst = dst;
 
                temp = neon8mul (dval, vmvn_u8 (sval.val[3]));
@@ -355,7 +355,7 @@ neon_composite_over_8888_8888 (pixman_implementation_t * 
impl,
                w -= 8;
            }
 
-           vst4_u8 (((void *))keep_dst, temp);
+           vst4_u8 ((void *)keep_dst, temp);
 #else
            asm volatile (
 /* avoid using d8-d15 (q4-q7) aapcs callee-save registers */
@@ -427,10 +427,10 @@ neon_composite_over_8888_8888 (pixman_implementation_t * 
impl,
                uint8x8_t sval, dval;
 
                /* two 32-bit pixels packed into D-reg; ad-hoc vectorization */
-               sval = vreinterpret_u8_u32 (vld1_u32 (((void *))src));
-               dval = vreinterpret_u8_u32 (vld1_u32 (((void *))dst));
+               sval = vreinterpret_u8_u32 (vld1_u32 ((void *)src));
+               dval = vreinterpret_u8_u32 (vld1_u32 ((void *)dst));
                dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), 
alpha_selector));
-               vst1_u8 (((void *))dst, vqadd_u8 (sval, dval));
+               vst1_u8 ((void *)dst, vqadd_u8 (sval, dval));
 
                src += 2;
                dst += 2;
@@ -442,10 +442,10 @@ neon_composite_over_8888_8888 (pixman_implementation_t * 
impl,
                uint8x8_t sval, dval;
 
                /* single 32-bit pixel in lane 0 */
-               sval = vreinterpret_u8_u32 (vld1_dup_u32 (((void *))src));  /* 
only interested in lane 0 */
-               dval = vreinterpret_u8_u32 (vld1_dup_u32 (((void *))dst));  /* 
only interested in lane 0 */
+               sval = vreinterpret_u8_u32 (vld1_dup_u32 ((void *)src));  /* 
only interested in lane 0 */
+               dval = vreinterpret_u8_u32 (vld1_dup_u32 ((void *)dst));  /* 
only interested in lane 0 */
                dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), 
alpha_selector));
-               vst1_lane_u32 (((void *))dst, vreinterpret_u32_u8 (vqadd_u8 
(sval, dval)), 0);
+               vst1_lane_u32 ((void *)dst, vreinterpret_u32_u8 (vqadd_u8 
(sval, dval)), 0);
            }
        }
     }
@@ -495,8 +495,8 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * 
impl,
 #ifndef USE_GCC_INLINE_ASM
            uint8x8x4_t sval, dval, temp;
 
-           sval = vld4_u8 (((void *))src);
-           dval = vld4_u8 (((void *))dst);
+           sval = vld4_u8 ((void *)src);
+           dval = vld4_u8 ((void *)dst);
            keep_dst = dst;
 
            sval = neon8mul (sval, mask_alpha);
@@ -509,10 +509,10 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t 
* impl,
 
            while (w)
            {
-               sval = vld4_u8 (((void *))src);
-               dval = vld4_u8 (((void *))dst);
+               sval = vld4_u8 ((void *)src);
+               dval = vld4_u8 ((void *)dst);
 
-               vst4_u8 (((void *))keep_dst, temp);
+               vst4_u8 ((void *)keep_dst, temp);
                keep_dst = dst;
 
                sval = neon8mul (sval, mask_alpha);
@@ -523,7 +523,7 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * 
impl,
                dst += 8;
                w -= 8;
            }
-           vst4_u8 (((void *))keep_dst, temp);
+           vst4_u8 ((void *)keep_dst, temp);
 #else
            asm volatile (
 /* avoid using d8-d15 (q4-q7) aapcs callee-save registers */
@@ -612,8 +612,8 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * 
impl,
            {
                uint8x8_t sval, dval;
 
-               sval = vreinterpret_u8_u32 (vld1_u32 (((void *))src));
-               dval = vreinterpret_u8_u32 (vld1_u32 (((void *))dst));
+               sval = vreinterpret_u8_u32 (vld1_u32 ((void *)src));
+               dval = vreinterpret_u8_u32 (vld1_u32 ((void *)dst));
 
                /* sval * const alpha_mul */
                sval = neon2mul (sval, mask_alpha);
@@ -621,7 +621,7 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * 
impl,
                /* dval * 255-(src alpha) */
                dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), 
alpha_selector));
 
-               vst1_u8 (((void *))dst, vqadd_u8 (sval, dval));
+               vst1_u8 ((void *)dst, vqadd_u8 (sval, dval));
 
                src += 2;
                dst += 2;
@@ -632,8 +632,8 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * 
impl,
            {
                uint8x8_t sval, dval;
 
-               sval = vreinterpret_u8_u32 (vld1_dup_u32 (((void *))src));
-               dval = vreinterpret_u8_u32 (vld1_dup_u32 (((void *))dst));
+               sval = vreinterpret_u8_u32 (vld1_dup_u32 ((void *)src));
+               dval = vreinterpret_u8_u32 (vld1_dup_u32 ((void *)dst));
 
                /* sval * const alpha_mul */
                sval = neon2mul (sval, mask_alpha);
@@ -641,7 +641,7 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * 
impl,
                /* dval * 255-(src alpha) */
                dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), 
alpha_selector));
 
-               vst1_lane_u32 (((void *))dst, vreinterpret_u32_u8 (vqadd_u8 
(sval, dval)), 0);
+               vst1_lane_u32 ((void *)dst, vreinterpret_u32_u8 (vqadd_u8 
(sval, dval)), 0);
            }
        }
     }
@@ -703,8 +703,8 @@ neon_composite_over_n_8_0565 (pixman_implementation_t * 
impl,
            uint16x8_t dval, temp;
            uint8x8x4_t sval8temp;
 
-           alpha = vld1_u8 (((void *))mask);
-           dval = vld1q_u16 (((void *))dst);
+           alpha = vld1_u8 ((void *)mask);
+           dval = vld1q_u16 ((void *)dst);
            keep_dst = dst;
 
            sval8temp = neon8mul (sval8, alpha);
@@ -716,10 +716,10 @@ neon_composite_over_n_8_0565 (pixman_implementation_t * 
impl,
 
            while (w)
            {
-               dval = vld1q_u16 (((void *))dst);
-               alpha = vld1_u8 (((void *))mask);
+               dval = vld1q_u16 ((void *)dst);
+               alpha = vld1_u8 ((void *)mask);
 
-               vst1q_u16 (((void *))keep_dst, temp);
+               vst1q_u16 ((void *)keep_dst, temp);
                keep_dst = dst;
 
                sval8temp = neon8mul (sval8, alpha);
@@ -729,7 +729,7 @@ neon_composite_over_n_8_0565 (pixman_implementation_t * 
impl,
                dst+=8;
                w-=8;
            }
-           vst1q_u16 (((void *))keep_dst, temp);
+           vst1q_u16 ((void *)keep_dst, temp);
 #else
            asm volatile (
                "vdup.32      d0, %[src]\n\t"
@@ -842,35 +842,35 @@ neon_composite_over_n_8_0565 (pixman_implementation_t * 
impl,
 
            if (w&4)
            {
-               alpha = vreinterpret_u8_u32 (vld1_lane_u32 (((void *))mask, 
vreinterpret_u32_u8 (alpha),1));
-               dval = vreinterpretq_u16_u64 (vld1q_lane_u64 (((void *))dst, 
vreinterpretq_u64_u16 (dval),1));
+               alpha = vreinterpret_u8_u32 (vld1_lane_u32 ((void *)mask, 
vreinterpret_u32_u8 (alpha),1));
+               dval = vreinterpretq_u16_u64 (vld1q_lane_u64 ((void *)dst, 
vreinterpretq_u64_u16 (dval),1));
                dst4=dst;
                mask+=4;
                dst+=4;
            }
            if (w&2)
            {
-               alpha = vreinterpret_u8_u16 (vld1_lane_u16 (((void *))mask, 
vreinterpret_u16_u8 (alpha),1));
-               dval = vreinterpretq_u16_u32 (vld1q_lane_u32 (((void *))dst, 
vreinterpretq_u32_u16 (dval),1));
+               alpha = vreinterpret_u8_u16 (vld1_lane_u16 ((void *)mask, 
vreinterpret_u16_u8 (alpha),1));
+               dval = vreinterpretq_u16_u32 (vld1q_lane_u32 ((void *)dst, 
vreinterpretq_u32_u16 (dval),1));
                dst2=dst;
                mask+=2;
                dst+=2;
            }
            if (w&1)
            {
-               alpha = vld1_lane_u8 (((void *))mask, alpha,1);
-               dval = vld1q_lane_u16 (((void *))dst, dval,1);
+               alpha = vld1_lane_u8 ((void *)mask, alpha,1);
+               dval = vld1q_lane_u16 ((void *)dst, dval,1);
            }
 
            sval8temp = neon8mul (sval8, alpha);
            temp = pack0565 (neon8qadd (sval8temp, neon8mul (unpack0565 (dval), 
vmvn_u8 (sval8temp.val[3]))));
 
            if (w&1)
-               vst1q_lane_u16 (((void *))dst, temp,1);
+               vst1q_lane_u16 ((void *)dst, temp,1);
            if (w&2)
-               vst1q_lane_u32 (((void *))dst2, vreinterpretq_u32_u16 (temp),1);
+               vst1q_lane_u32 ((void *)dst2, vreinterpretq_u32_u16 (temp),1);
            if (w&4)
-               vst1q_lane_u64 (((void *))dst4, vreinterpretq_u64_u16 (temp),1);
+               vst1q_lane_u64 ((void *)dst4, vreinterpretq_u64_u16 (temp),1);
 #else
            asm volatile (
                "vdup.32      d0, %[src]\n\t"
@@ -1040,8 +1040,8 @@ neon_composite_over_n_8_8888 (pixman_implementation_t * 
impl,
            uint8x8_t alpha;
            uint8x8x4_t dval, temp;
 
-           alpha = vld1_u8 (((void *))mask);
-           dval = vld4_u8 (((void *))dst);
+           alpha = vld1_u8 ((void *)mask);
+           dval = vld4_u8 ((void *)dst);
            keep_dst = dst;
 
            temp = neon8mul (sval8, alpha);
@@ -1054,10 +1054,10 @@ neon_composite_over_n_8_8888 (pixman_implementation_t * 
impl,
 
            while (w)
            {
-               alpha = vld1_u8 (((void *))mask);
-               dval = vld4_u8 (((void *))dst);
+               alpha = vld1_u8 ((void *)mask);
+               dval = vld4_u8 ((void *)dst);
 
-               vst4_u8 (((void *))keep_dst, temp);
+               vst4_u8 ((void *)keep_dst, temp);
                keep_dst = dst;
 
                temp = neon8mul (sval8, alpha);
@@ -1068,7 +1068,7 @@ neon_composite_over_n_8_8888 (pixman_implementation_t * 
impl,
                dst += 8;
                w -= 8;
            }
-           vst4_u8 (((void *))keep_dst, temp);
+           vst4_u8 ((void *)keep_dst, temp);
 #else
            asm volatile (
                "vdup.32      d0, %[src]\n\t"
@@ -1160,14 +1160,14 @@ neon_composite_over_n_8_8888 (pixman_implementation_t * 
impl,
                uint8x8_t dval, temp, res;
 
                alpha = vtbl1_u8 (
-                   vreinterpret_u8_u16 (vld1_dup_u16 (((void *))mask)), 
mask_selector);
-               dval = vld1_u8 (((void *))dst);
+                   vreinterpret_u8_u16 (vld1_dup_u16 ((void *)mask)), 
mask_selector);
+               dval = vld1_u8 ((void *)dst);
 
                temp = neon2mul (sval2, alpha);
                res = vqadd_u8 (
                    temp, neon2mul (dval, vtbl1_u8 (vmvn_u8 (temp), 
alpha_selector)));
 
-               vst1_u8 (((void *))dst, res);
+               vst1_u8 ((void *)dst, res);
 
                mask += 2;
                dst += 2;
@@ -1178,14 +1178,14 @@ neon_composite_over_n_8_8888 (pixman_implementation_t * 
impl,
            {
                uint8x8_t dval, temp, res;
 
-               alpha = vtbl1_u8 (vld1_dup_u8 (((void *))mask), mask_selector);
-               dval = vreinterpret_u8_u32 (vld1_dup_u32 (((void *))dst));
+               alpha = vtbl1_u8 (vld1_dup_u8 ((void *)mask), mask_selector);
+               dval = vreinterpret_u8_u32 (vld1_dup_u32 ((void *)dst));
 
                temp = neon2mul (sval2, alpha);
                res = vqadd_u8 (
                    temp, neon2mul (dval, vtbl1_u8 (vmvn_u8 (temp), 
alpha_selector)));
 
-               vst1_lane_u32 (((void *))dst, vreinterpret_u32_u8 (res), 0);
+               vst1_lane_u32 ((void *)dst, vreinterpret_u32_u8 (res), 0);
            }
        }
     }

commit 0ff5733c16804d5b10782556eeeade7061924846
Author: George Yohng <[email protected]>
Date:   Tue Jul 21 03:43:42 2009 -0400

    Add implementation of MMX __m64 functions for MSVC x64.
    
    Microsoft C++ does not define __m64 and all related MMX functions in
    x64.  However, it succeeds in generating object files for SSE2 code
    inside pixman.
    
    The real problem happens during linking, when it cannot find MMX functions
    (which are not defined as intrinsics for AMD64 platform).
    
    I have implemented those missing functions using general programming.
    
    MMX __m64 is used relatively scarcely within SSE2 implementation, and the
    performance impact probably is negligible.
    
    Bug 22390.

diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 0c528d8..e19fa6e 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -52,7 +52,7 @@ pixman-combine64.h : pixman-combine.h.template make-combine.pl
        $(PERL) $(srcdir)/make-combine.pl 16 < 
$(srcdir)/pixman-combine.h.template > $@ || ($(RM) $@; exit 1)
 
 EXTRA_DIST = Makefile.win32 pixman-combine.c.template make-combine.pl 
pixman-region.c \
-       pixman-combine.h.template solaris-hwcap.mapfile
+       pixman-combine.h.template solaris-hwcap.mapfile 
pixman-x64-mmx-emulation.h
 CLEANFILES = pixman-combine32.c pixman-combine64.c pixman-combine32.h 
pixman-combine64.h
 
 # mmx code
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 3ed8165..2fa956e 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -36,6 +36,15 @@
 #include "pixman-private.h"
 #include "pixman-combine32.h"
 
+#if defined(_MSC_VER) && defined(_M_AMD64)
+/* Windows 64 doesn't allow MMX to be used, so
+ * the pixman-x64-mmx-emulation.h file contains
+ * implementations of those MMX intrinsics that
+ * are used in the SSE2 implementation.
+ */
+#   include "pixman-x64-mmx-emulation.h"
+#endif
+
 #ifdef USE_SSE2
 
 /* --------------------------------------------------------------------
diff --git a/pixman/pixman-x64-mmx-emulation.h 
b/pixman/pixman-x64-mmx-emulation.h
new file mode 100644
index 0000000..231030f
--- /dev/null
+++ b/pixman/pixman-x64-mmx-emulation.h
@@ -0,0 +1,263 @@
+#ifndef MMX_X64_H_INCLUDED
+#define MMX_X64_H_INCLUDED
+
+/* Implementation of x64 MMX substitition functions, before
+ * pixman is reimplemented not to use __m64 type on Visual C++
+ *
+ * Copyright (C)2009 by George Yohng
+ * Released in public domain.
+ */
+
+#include <intrin.h>
+
+#define M64C(a) (*(const __m64 *)(&a))
+#define M64U(a) (*(const unsigned long long *)(&a))
+
+__inline __m64
+_m_from_int (int a)
+{
+    long long i64 = a;
+
+    return M64C (i64);
+}
+
+__inline __m64
+_mm_setzero_si64 ()
+{
+    long long i64 = 0;
+
+    return M64C (i64);
+}
+
+__inline __m64
+_mm_set_pi32 (int i1,   int i0)
+{
+    unsigned long long i64 = ((unsigned)i0) + (((unsigned long 
long)(unsigned)i1) << 32);
+
+    return M64C (i64);
+}
+
+__inline void
+_m_empty ()
+{
+}
+
+__inline __m64
+_mm_set1_pi16 (short w)
+{
+    unsigned long long i64 = ((unsigned long long)(unsigned short)(w)) * 
0x0001000100010001ULL;
+
+    return M64C (i64);
+}
+
+__inline int
+_m_to_int (__m64 m)
+{
+    return m.m64_i32[0];
+}
+
+__inline __m64
+_mm_movepi64_pi64 (__m128i a)
+{
+    return M64C (a.m128i_i64[0]);
+}
+
+__inline __m64
+_m_pand (__m64 a, __m64 b)
+{
+    unsigned long long i64 = M64U (a) & M64U (b);
+
+    return M64C (i64);
+}
+
+__inline __m64
+_m_por (__m64 a, __m64 b)
+{
+    unsigned long long i64 = M64U (a) | M64U (b);
+
+    return M64C (i64);
+}
+
+__inline __m64
+_m_pxor (__m64 a, __m64 b)
+{
+    unsigned long long i64 = M64U (a) ^ M64U (b);
+
+    return M64C (i64);
+}
+
+__inline __m64
+_m_pmulhuw (__m64 a, __m64 b)        /* unoptimized */
+{
+    unsigned short d[4] =
+    {
+       (unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0]) >> 16),
+       (unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]) >> 16),
+       (unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]) >> 16),
+       (unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]) >> 16)
+    };
+
+    return M64C (d[0]);
+}
+
+__inline __m64
+_m_pmullw2 (__m64 a, __m64 b)        /* unoptimized */
+{
+    unsigned short d[4] =
+    {
+       (unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0])),
+       (unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1])),
+       (unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2])),
+       (unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]))
+    };
+
+    return M64C (d[0]);
+}
+
+__inline __m64
+_m_pmullw (__m64 a, __m64 b)        /* unoptimized */
+{
+    unsigned long long x =
+       ((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[0]) * 
b.m64_u16[0])))  +
+       (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[1]) * 
b.m64_u16[1]))) << 16)  +
+       (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[2]) * 
b.m64_u16[2]))) << 32)  +
+       (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[3]) * 
b.m64_u16[3]))) << 48);
+
+    return M64C (x);
+}
+
+__inline __m64
+_m_paddusb (__m64 a, __m64 b)        /* unoptimized */
+{
+    unsigned long long x = (M64U (a) & 0x00FF00FF00FF00FFULL) +
+                           (M64U (b) & 0x00FF00FF00FF00FFULL);
+
+    unsigned long long y = ((M64U (a) >> 8) & 0x00FF00FF00FF00FFULL) +
+                           ((M64U (b) >> 8) & 0x00FF00FF00FF00FFULL);
+
+    x | = ((x & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF;
+    y | = ((y & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF;
+
+    x = (x & 0x00FF00FF00FF00FFULL) | ((y & 0x00FF00FF00FF00FFULL) << 8);
+
+    return M64C (x);
+}
+
+__inline __m64
+_m_paddusw (__m64 a, __m64 b)        /* unoptimized */
+{
+    unsigned long long x = (M64U (a) & 0x0000FFFF0000FFFFULL) +
+                           (M64U (b) & 0x0000FFFF0000FFFFULL);
+
+    unsigned long long y = ((M64U (a) >> 16) & 0x0000FFFF0000FFFFULL) +
+                           ((M64U (b) >> 16) & 0x0000FFFF0000FFFFULL);
+
+    x | = ((x & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF;
+    y | = ((y & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF;
+
+    x = (x & 0x0000FFFF0000FFFFULL) | ((y & 0x0000FFFF0000FFFFULL) << 16);
+
+    return M64C (x);
+}
+
+__inline __m64
+_m_pshufw (__m64 a, int n)         /* unoptimized */
+{
+    unsigned short d[4] =
+    {
+       a.m64_u16[n & 3],
+       a.m64_u16[(n >> 2) & 3],
+       a.m64_u16[(n >> 4) & 3],
+       a.m64_u16[(n >> 6) & 3]
+    };
+
+    return M64C (d[0]);
+}
+
+__inline unsigned char
+sat16 (unsigned short d)
+{
+    if (d > 0xFF) return 0xFF;
+    else return d & 0xFF;
+}
+
+__inline __m64
+_m_packuswb (__m64 m1, __m64 m2)          /* unoptimized */
+{
+    unsigned char d[8] =
+    {
+       sat16 (m1.m64_u16[0]),
+       sat16 (m1.m64_u16[1]),
+       sat16 (m1.m64_u16[2]),
+       sat16 (m1.m64_u16[3]),
+       sat16 (m2.m64_u16[0]),
+       sat16 (m2.m64_u16[1]),
+       sat16 (m2.m64_u16[2]),
+       sat16 (m2.m64_u16[3])
+    };
+
+    return M64C (d[0]);
+}
+
+__inline __m64 _m_punpcklbw (__m64 m1, __m64 m2)          /* unoptimized */
+{
+    unsigned char d[8] =
+    {
+       m1.m64_u8[0],
+       m2.m64_u8[0],
+       m1.m64_u8[1],
+       m2.m64_u8[1],
+       m1.m64_u8[2],
+       m2.m64_u8[2],
+       m1.m64_u8[3],
+       m2.m64_u8[3],
+    };
+
+    return M64C (d[0]);
+}
+
+__inline __m64 _m_punpckhbw (__m64 m1, __m64 m2)          /* unoptimized */
+{
+    unsigned char d[8] =
+    {
+       m1.m64_u8[4],
+       m2.m64_u8[4],
+       m1.m64_u8[5],
+       m2.m64_u8[5],
+       m1.m64_u8[6],
+       m2.m64_u8[6],
+       m1.m64_u8[7],
+       m2.m64_u8[7],
+    };
+
+    return M64C (d[0]);
+}
+
+__inline __m64 _m_psrlwi (__m64 a, int n)       /* unoptimized */
+{
+    unsigned short d[4] =
+    {
+       a.m64_u16[0] >> n,
+       a.m64_u16[1] >> n,
+       a.m64_u16[2] >> n,
+       a.m64_u16[3] >> n
+    };
+
+    return M64C (d[0]);
+}
+
+__inline __m64 _m_psrlqi (__m64 m, int n)
+{
+    unsigned long long x = M64U (m) >> n;
+
+    return M64C (x);
+}
+
+__inline __m64 _m_psllqi (__m64 m, int n)
+{
+    unsigned long long x = M64U (m) << n;
+
+    return M64C (x);
+}
+
+#endif /* MMX_X64_H_INCLUDED */

commit 0b95afd259bb839a026955e7fda15b44fa22a805
Author: Chris Wilson <[email protected]>
Date:   Mon Jul 20 14:07:18 2009 +0100

    Fix read of BITS members from a solid image.
    
    During the fast-path query, the read_func and write_func from the bits
    structure are queried for the solid image.
    
    ==32723== Conditional jump or move depends on uninitialised value(s)
    ==32723==    at 0x412AF20: _pixman_run_fast_path (pixman-utils.c:681)
    ==32723==    by 0x4136319: sse2_composite (pixman-sse2.c:5554)
    ==32723==    by 0x4100CD2: _pixman_implementation_composite
    (pixman-implementation.c:227)
    ==32723==    by 0x412396E: pixman_image_composite (pixman.c:140)
    ==32723==    by 0x4123D64: pixman_image_fill_rectangles (pixman.c:322)
    ==32723==    by 0x40482B7: _cairo_image_surface_fill_rectangles
    (cairo-image-surface.c:1180)
    ==32723==    by 0x4063BE7: _cairo_surface_fill_rectangles
    (cairo-surface.c:1883)
    ==32723==    by 0x4063E38: _cairo_surface_fill_region
    (cairo-surface.c:1840)
    ==32723==    by 0x4067FDC: _clip_and_composite_trapezoids
    (cairo-surface-fallback.c:625)
    ==32723==    by 0x40689C5: _cairo_surface_fallback_paint
    (cairo-surface-fallback.c:835)
    ==32723==    by 0x4065731: _cairo_surface_paint (cairo-surface.c:1923)
    ==32723==    by 0x4044098: _cairo_gstate_paint (cairo-gstate.c:900)
    ==32723==  Uninitialised value was created by a heap allocation
    ==32723==    at 0x402732D: malloc (vg_replace_malloc.c:180)
    ==32723==    by 0x410099F: _pixman_image_allocate (pixman-image.c:100)
    ==32723==    by 0x41265B8: pixman_image_create_solid_fill
    (pixman-solid-fill.c:75)
    ==32723==    by 0x4123CE1: pixman_image_fill_rectangles (pixman.c:314)
    ==32723==    by 0x40482B7: _cairo_image_surface_fill_rectangles
    (cairo-image-surface.c:1180)
    ==32723==    by 0x4063BE7: _cairo_surface_fill_rectangles
    (cairo-surface.c:1883)
    ==32723==    by 0x4063E38: _cairo_surface_fill_region
    (cairo-surface.c:1840)
    ==32723==    by 0x4067FDC: _clip_and_composite_trapezoids
    (cairo-surface-fallback.c:625)
    ==32723==    by 0x40689C5: _cairo_surface_fallback_paint
    (cairo-surface-fallback.c:835)
    ==32723==    by 0x4065731: _cairo_surface_paint (cairo-surface.c:1923)
    ==32723==    by 0x4044098: _cairo_gstate_paint (cairo-gstate.c:900)
    ==32723==    by 0x403C10B: cairo_paint (cairo.c:2052)

diff --git a/pixman/pixman-utils.c b/pixman/pixman-utils.c
index 2c34d02..a981418 100644
--- a/pixman/pixman-utils.c
+++ b/pixman/pixman-utils.c
@@ -646,23 +646,40 @@ _pixman_run_fast_path (const pixman_fast_path_t *paths,
     pixman_bool_t mask_repeat =
        mask && mask->common.repeat == PIXMAN_REPEAT_NORMAL;
     pixman_bool_t result;
+    pixman_bool_t has_fast_path;
 
-    if ((src->type == BITS || _pixman_image_is_solid (src)) &&
-        (!mask || mask->type == BITS)
-        && !src->common.transform && !(mask && mask->common.transform)
-       && !src->common.alpha_map && !dest->common.alpha_map
-        && !(mask && mask->common.alpha_map)
-        && (src->common.filter != PIXMAN_FILTER_CONVOLUTION)
-        && (src->common.repeat != PIXMAN_REPEAT_PAD)
-        && (src->common.repeat != PIXMAN_REPEAT_REFLECT)
-        && (!mask || (mask->common.filter != PIXMAN_FILTER_CONVOLUTION &&
-                      mask->common.repeat != PIXMAN_REPEAT_PAD &&
-                      mask->common.repeat != PIXMAN_REPEAT_REFLECT))
-        && !src->bits.read_func && !src->bits.write_func
-        && !(mask && mask->bits.read_func)
-        && !(mask && mask->bits.write_func)
-        && !dest->bits.read_func
-        && !dest->bits.write_func)
+    has_fast_path = !dest->common.alpha_map &&
+                   !dest->bits.read_func &&
+                   !dest->bits.write_func;
+
+    if (has_fast_path)
+    {
+       has_fast_path = (src->type == BITS || _pixman_image_is_solid (src)) &&
+                       !src->common.transform &&
+                       !src->common.alpha_map &&
+                       src->common.filter != PIXMAN_FILTER_CONVOLUTION &&
+                       src->common.repeat != PIXMAN_REPEAT_PAD &&
+                       src->common.repeat != PIXMAN_REPEAT_REFLECT;
+       if (has_fast_path && src->type == BITS)
+       {
+           has_fast_path = !src->bits.read_func &&
+                           !src->bits.write_func;
+       }
+    }
+
+    if (mask && has_fast_path)
+    {
+       has_fast_path = mask->type == BITS &&
+                       !mask->common.transform &&
+                       !mask->common.alpha_map &&
+                       !mask->bits.read_func &&
+                       !mask->bits.write_func &&
+                       mask->common.filter != PIXMAN_FILTER_CONVOLUTION &&
+                       mask->common.repeat != PIXMAN_REPEAT_PAD &&
+                       mask->common.repeat != PIXMAN_REPEAT_REFLECT;
+    }
+
+    if (has_fast_path)
     {
        const pixman_fast_path_t *info;
        pixman_bool_t pixbuf;

commit c7b84f8b043018368fade4ad13730cfcaaf5c8cc
Author: Søren Sandmann Pedersen <[email protected]>
Date:   Tue Jul 21 00:17:15 2009 -0400

    Only apply the workaround if the clip region extends beyond the drawable.
    
    This works because the X server always attempts to set a clip region
    within the bounds of the drawable, and it only fails at it when it is
    computing the wrong translation and therefore needs the workaround.

diff --git a/pixman/pixman-bits-image.c b/pixman/pixman-bits-image.c
index be28ebc..ff29620 100644
--- a/pixman/pixman-bits-image.c
+++ b/pixman/pixman-bits-image.c
@@ -809,25 +809,18 @@ source_image_needs_out_of_bounds_workaround (bits_image_t 
*image)
        image->common.have_clip_region                  &&
         out_of_bounds_workaround)
     {
-       const pixman_box32_t *boxes;
-       int n;
-
        if (!image->common.client_clip)
        {
            /* There is no client clip, so the drawable in question
-            * is a window if the clip region is different from the
-            * full drawable
+            * is a window if the clip region extends beyond the
+            * drawable geometry.
             */
-           boxes = pixman_region32_rectangles (&image->common.clip_region, &n);
-           if (n == 1)
+           const pixman_box32_t *extents = pixman_region32_extents 
(&image->common.clip_region);
+
+           if (extents->x1 >= 0 && extents->x2 < image->width &&
+               extents->y1 >= 0 && extents->y2 < image->height)
            {
-               if (boxes[0].x1 == 0 && boxes[0].y1 == 0 &&
-                   boxes[0].x2 == image->width &&
-                   boxes[0].y2 == image->height)
-               {
-                   /* pixmap */
-                   return FALSE;


-- 
To UNSUBSCRIBE, email to [email protected]
with a subject of "unsubscribe". Trouble? Contact [email protected]

pixman: Changes to 'upstream-experimental'

Reply via email to