configure.ac | 30 pixman/Makefile.am | 10 pixman/Makefile.win32 | 57 pixman/make-combine.pl | 5 pixman/pixman-access.c | 2604 ++++++++----- pixman/pixman-accessor.h | 4 pixman/pixman-arm-neon.c | 4256 ++++++++++++--------- pixman/pixman-arm-neon.h | 179 pixman/pixman-arm-simd.c | 564 +- pixman/pixman-arm-simd.h | 106 pixman/pixman-bits-image.c | 473 +- pixman/pixman-combine.c.template | 2661 +++++++------ pixman/pixman-combine.h.template | 345 - pixman/pixman-compiler.h | 12 pixman/pixman-conical-gradient.c | 144 pixman/pixman-cpu.c | 443 +- pixman/pixman-edge-imp.h | 58 pixman/pixman-edge.c | 369 - pixman/pixman-fast-path.c | 1390 +++---- pixman/pixman-general.c | 273 - pixman/pixman-gradient-walker.c | 184 pixman/pixman-image.c | 226 - pixman/pixman-implementation.c | 295 - pixman/pixman-linear-gradient.c | 189 pixman/pixman-matrix.c | 1010 ++--- pixman/pixman-mmx.c | 2915 +++++++------- pixman/pixman-private.h | 822 ++-- pixman/pixman-radial-gradient.c | 171 pixman/pixman-region.c | 2499 +++++++----- pixman/pixman-region16.c | 10 pixman/pixman-solid-fill.c | 38 pixman/pixman-sse2.c | 7497 ++++++++++++++++++++------------------ pixman/pixman-timer.c | 12 pixman/pixman-trap.c | 224 - pixman/pixman-utils.c | 714 +-- pixman/pixman-vmx.c | 2361 ++++++----- pixman/pixman-x64-mmx-emulation.h | 263 + pixman/pixman.c | 289 - pixman/pixman.h | 483 +- test/Makefile.am | 6 test/composite-test.c | 113 test/oob-test.c | 35 test/region-test.c | 63 test/scaling-test.c | 599 +-- test/window-test.c | 173 45 files changed, 19661 insertions(+), 15513 deletions(-)
New commits: commit f3ac1368775542e09f3741d2ad7b72af20bd9663 Author: Søren Sandmann Pedersen <[email protected]> Date: Tue Jul 21 07:20:57 2009 -0400 Pre-release version bump diff --git a/configure.ac b/configure.ac index 5a49a6c..7d825b7 100644 --- a/configure.ac +++ b/configure.ac @@ -54,7 +54,7 @@ AC_PREREQ([2.57]) m4_define([pixman_major], 0) m4_define([pixman_minor], 15) -m4_define([pixman_micro], 17) +m4_define([pixman_micro], 18) m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro]) commit 7c56911e3b5b97b26dceff9b68d9fed32693d57b Author: Søren Sandmann Pedersen <[email protected]> Date: Tue Jul 21 07:01:10 2009 -0400 Don't assert when malformed regions are detected. Instead print a message to stderr so that it will end up in the X log file. diff --git a/pixman/pixman-region.c b/pixman/pixman-region.c index 7820b94..387dbba 100644 --- a/pixman/pixman-region.c +++ b/pixman/pixman-region.c @@ -64,7 +64,43 @@ #define PIXREGION_END(reg) PIXREGION_BOX (reg, (reg)->data->numRects - 1) #define GOOD_RECT(rect) ((rect)->x1 < (rect)->x2 && (rect)->y1 < (rect)->y2) -#define GOOD(reg) assert (PREFIX (_selfcheck) (reg)) + +#define PIXMAN_REGION_LOG_FAILURES + +#if defined PIXMAN_REGION_DEBUG + +# define GOOD(reg) assert (PREFIX (_selfcheck) (reg)) + +#elif defined PIXMAN_REGION_LOG_FAILURES + +static void +log_region_error (void) +{ + static int n_messages = 0; + + if (n_messages < 50) + { + fprintf (stderr, + "*** BUG ***\n" + "Malformed region detected\n" + "Set a breakpoint on 'log_region_error' to debug\n\n"); + + n_messages++; + } +} + +#define GOOD(reg) \ + do \ + { \ + if (!PREFIX (_selfcheck (reg))) \ + log_region_error (); \ + } while (0) + +#else + +#define GOOD(reg) + +#endif static const box_type_t PREFIX (_empty_box_) = { 0, 0, 0, 0 }; static const region_data_type_t PREFIX (_empty_data_) = { 0, 0 }; @@ -467,7 +503,7 @@ PREFIX (_copy) (region_type_t *dst, region_type_t *src) { GOOD (dst); GOOD (src); - + if (dst == src) return TRUE; commit f9660ce29ed072c6cbaec711c5d18b9f0ba113ae Author: Søren Sandmann Pedersen <[email protected]> Date: Tue Jul 21 04:23:56 2009 -0400 Fix another search and replace issue diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c index 0f78584..14f7482 100644 --- a/pixman/pixman-arm-neon.c +++ b/pixman/pixman-arm-neon.c @@ -1898,8 +1898,8 @@ pixman_fill_neon (uint32_t *bits, #define NEON_SCANLINE_BUFFER_PIXELS (1024) static inline void -neon_quadword_copy ((void *) dst, - (void *) src, +neon_quadword_copy (void * dst, + void * src, uint32_t count, /* of quadwords */ uint32_t trailer_count /* of bytes */) { commit b3196b63274134a594fc091ec2f8be3b44734411 Author: Søren Sandmann Pedersen <[email protected]> Date: Tue Jul 21 04:18:35 2009 -0400 Fix search-and-replace issue pointed out by Koen Kooi. diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c index 2f7b8a0..0f78584 100644 --- a/pixman/pixman-arm-neon.c +++ b/pixman/pixman-arm-neon.c @@ -169,8 +169,8 @@ neon_composite_add_8000_8000 (pixman_implementation_t * impl, w = width; #ifndef USE_GCC_INLINE_ASM - sval = vld1_u8 (((void *))src); - dval = vld1_u8 (((void *))dst); + sval = vld1_u8 ((void *)src); + dval = vld1_u8 ((void *)dst); keep_dst = dst; temp = vqadd_u8 (dval, sval); @@ -181,10 +181,10 @@ neon_composite_add_8000_8000 (pixman_implementation_t * impl, while (w) { - sval = vld1_u8 (((void *))src); - dval = vld1_u8 (((void *))dst); + sval = vld1_u8 ((void *)src); + dval = vld1_u8 ((void *)dst); - vst1_u8 (((void *))keep_dst, temp); + vst1_u8 ((void *)keep_dst, temp); keep_dst = dst; temp = vqadd_u8 (dval, sval); @@ -194,7 +194,7 @@ neon_composite_add_8000_8000 (pixman_implementation_t * impl, w -= 8; } - vst1_u8 (((void *))keep_dst, temp); + vst1_u8 ((void *)keep_dst, temp); #else asm volatile ( /* avoid using d8-d15 (q4-q7) aapcs callee-save registers */ @@ -249,9 +249,9 @@ neon_composite_add_8000_8000 (pixman_implementation_t * impl, if (w & 4) { sval = vreinterpret_u8_u32 ( - vld1_lane_u32 (((void *))src, vreinterpret_u32_u8 (sval), 1)); + vld1_lane_u32 ((void *)src, vreinterpret_u32_u8 (sval), 1)); dval = vreinterpret_u8_u32 ( - vld1_lane_u32 (((void *))dst, vreinterpret_u32_u8 (dval), 1)); + vld1_lane_u32 ((void *)dst, vreinterpret_u32_u8 (dval), 1)); dst4 = dst; src += 4; @@ -261,9 +261,9 @@ neon_composite_add_8000_8000 (pixman_implementation_t * impl, if (w & 2) { sval = vreinterpret_u8_u16 ( - vld1_lane_u16 (((void *))src, vreinterpret_u16_u8 (sval), 1)); + vld1_lane_u16 ((void *)src, vreinterpret_u16_u8 (sval), 1)); dval = vreinterpret_u8_u16 ( - vld1_lane_u16 (((void *))dst, vreinterpret_u16_u8 (dval), 1)); + vld1_lane_u16 ((void *)dst, vreinterpret_u16_u8 (dval), 1)); dst2 = dst; src += 2; @@ -282,10 +282,10 @@ neon_composite_add_8000_8000 (pixman_implementation_t * impl, vst1_lane_u8 (dst, dval, 1); if (w & 2) - vst1_lane_u16 (((void *))dst2, vreinterpret_u16_u8 (dval), 1); + vst1_lane_u16 ((void *)dst2, vreinterpret_u16_u8 (dval), 1); if (w & 4) - vst1_lane_u32 (((void *))dst4, vreinterpret_u32_u8 (dval), 1); + vst1_lane_u32 ((void *)dst4, vreinterpret_u32_u8 (dval), 1); } } } @@ -328,8 +328,8 @@ neon_composite_over_8888_8888 (pixman_implementation_t * impl, w = width; #ifndef USE_GCC_INLINE_ASM - sval = vld4_u8 (((void *))src); - dval = vld4_u8 (((void *))dst); + sval = vld4_u8 ((void *)src); + dval = vld4_u8 ((void *)dst); keep_dst = dst; temp = neon8mul (dval, vmvn_u8 (sval.val[3])); @@ -341,10 +341,10 @@ neon_composite_over_8888_8888 (pixman_implementation_t * impl, while (w) { - sval = vld4_u8 (((void *))src); - dval = vld4_u8 (((void *))dst); + sval = vld4_u8 ((void *)src); + dval = vld4_u8 ((void *)dst); - vst4_u8 (((void *))keep_dst, temp); + vst4_u8 ((void *)keep_dst, temp); keep_dst = dst; temp = neon8mul (dval, vmvn_u8 (sval.val[3])); @@ -355,7 +355,7 @@ neon_composite_over_8888_8888 (pixman_implementation_t * impl, w -= 8; } - vst4_u8 (((void *))keep_dst, temp); + vst4_u8 ((void *)keep_dst, temp); #else asm volatile ( /* avoid using d8-d15 (q4-q7) aapcs callee-save registers */ @@ -427,10 +427,10 @@ neon_composite_over_8888_8888 (pixman_implementation_t * impl, uint8x8_t sval, dval; /* two 32-bit pixels packed into D-reg; ad-hoc vectorization */ - sval = vreinterpret_u8_u32 (vld1_u32 (((void *))src)); - dval = vreinterpret_u8_u32 (vld1_u32 (((void *))dst)); + sval = vreinterpret_u8_u32 (vld1_u32 ((void *)src)); + dval = vreinterpret_u8_u32 (vld1_u32 ((void *)dst)); dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector)); - vst1_u8 (((void *))dst, vqadd_u8 (sval, dval)); + vst1_u8 ((void *)dst, vqadd_u8 (sval, dval)); src += 2; dst += 2; @@ -442,10 +442,10 @@ neon_composite_over_8888_8888 (pixman_implementation_t * impl, uint8x8_t sval, dval; /* single 32-bit pixel in lane 0 */ - sval = vreinterpret_u8_u32 (vld1_dup_u32 (((void *))src)); /* only interested in lane 0 */ - dval = vreinterpret_u8_u32 (vld1_dup_u32 (((void *))dst)); /* only interested in lane 0 */ + sval = vreinterpret_u8_u32 (vld1_dup_u32 ((void *)src)); /* only interested in lane 0 */ + dval = vreinterpret_u8_u32 (vld1_dup_u32 ((void *)dst)); /* only interested in lane 0 */ dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector)); - vst1_lane_u32 (((void *))dst, vreinterpret_u32_u8 (vqadd_u8 (sval, dval)), 0); + vst1_lane_u32 ((void *)dst, vreinterpret_u32_u8 (vqadd_u8 (sval, dval)), 0); } } } @@ -495,8 +495,8 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * impl, #ifndef USE_GCC_INLINE_ASM uint8x8x4_t sval, dval, temp; - sval = vld4_u8 (((void *))src); - dval = vld4_u8 (((void *))dst); + sval = vld4_u8 ((void *)src); + dval = vld4_u8 ((void *)dst); keep_dst = dst; sval = neon8mul (sval, mask_alpha); @@ -509,10 +509,10 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * impl, while (w) { - sval = vld4_u8 (((void *))src); - dval = vld4_u8 (((void *))dst); + sval = vld4_u8 ((void *)src); + dval = vld4_u8 ((void *)dst); - vst4_u8 (((void *))keep_dst, temp); + vst4_u8 ((void *)keep_dst, temp); keep_dst = dst; sval = neon8mul (sval, mask_alpha); @@ -523,7 +523,7 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * impl, dst += 8; w -= 8; } - vst4_u8 (((void *))keep_dst, temp); + vst4_u8 ((void *)keep_dst, temp); #else asm volatile ( /* avoid using d8-d15 (q4-q7) aapcs callee-save registers */ @@ -612,8 +612,8 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * impl, { uint8x8_t sval, dval; - sval = vreinterpret_u8_u32 (vld1_u32 (((void *))src)); - dval = vreinterpret_u8_u32 (vld1_u32 (((void *))dst)); + sval = vreinterpret_u8_u32 (vld1_u32 ((void *)src)); + dval = vreinterpret_u8_u32 (vld1_u32 ((void *)dst)); /* sval * const alpha_mul */ sval = neon2mul (sval, mask_alpha); @@ -621,7 +621,7 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * impl, /* dval * 255-(src alpha) */ dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector)); - vst1_u8 (((void *))dst, vqadd_u8 (sval, dval)); + vst1_u8 ((void *)dst, vqadd_u8 (sval, dval)); src += 2; dst += 2; @@ -632,8 +632,8 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * impl, { uint8x8_t sval, dval; - sval = vreinterpret_u8_u32 (vld1_dup_u32 (((void *))src)); - dval = vreinterpret_u8_u32 (vld1_dup_u32 (((void *))dst)); + sval = vreinterpret_u8_u32 (vld1_dup_u32 ((void *)src)); + dval = vreinterpret_u8_u32 (vld1_dup_u32 ((void *)dst)); /* sval * const alpha_mul */ sval = neon2mul (sval, mask_alpha); @@ -641,7 +641,7 @@ neon_composite_over_8888_n_8888 (pixman_implementation_t * impl, /* dval * 255-(src alpha) */ dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector)); - vst1_lane_u32 (((void *))dst, vreinterpret_u32_u8 (vqadd_u8 (sval, dval)), 0); + vst1_lane_u32 ((void *)dst, vreinterpret_u32_u8 (vqadd_u8 (sval, dval)), 0); } } } @@ -703,8 +703,8 @@ neon_composite_over_n_8_0565 (pixman_implementation_t * impl, uint16x8_t dval, temp; uint8x8x4_t sval8temp; - alpha = vld1_u8 (((void *))mask); - dval = vld1q_u16 (((void *))dst); + alpha = vld1_u8 ((void *)mask); + dval = vld1q_u16 ((void *)dst); keep_dst = dst; sval8temp = neon8mul (sval8, alpha); @@ -716,10 +716,10 @@ neon_composite_over_n_8_0565 (pixman_implementation_t * impl, while (w) { - dval = vld1q_u16 (((void *))dst); - alpha = vld1_u8 (((void *))mask); + dval = vld1q_u16 ((void *)dst); + alpha = vld1_u8 ((void *)mask); - vst1q_u16 (((void *))keep_dst, temp); + vst1q_u16 ((void *)keep_dst, temp); keep_dst = dst; sval8temp = neon8mul (sval8, alpha); @@ -729,7 +729,7 @@ neon_composite_over_n_8_0565 (pixman_implementation_t * impl, dst+=8; w-=8; } - vst1q_u16 (((void *))keep_dst, temp); + vst1q_u16 ((void *)keep_dst, temp); #else asm volatile ( "vdup.32 d0, %[src]\n\t" @@ -842,35 +842,35 @@ neon_composite_over_n_8_0565 (pixman_implementation_t * impl, if (w&4) { - alpha = vreinterpret_u8_u32 (vld1_lane_u32 (((void *))mask, vreinterpret_u32_u8 (alpha),1)); - dval = vreinterpretq_u16_u64 (vld1q_lane_u64 (((void *))dst, vreinterpretq_u64_u16 (dval),1)); + alpha = vreinterpret_u8_u32 (vld1_lane_u32 ((void *)mask, vreinterpret_u32_u8 (alpha),1)); + dval = vreinterpretq_u16_u64 (vld1q_lane_u64 ((void *)dst, vreinterpretq_u64_u16 (dval),1)); dst4=dst; mask+=4; dst+=4; } if (w&2) { - alpha = vreinterpret_u8_u16 (vld1_lane_u16 (((void *))mask, vreinterpret_u16_u8 (alpha),1)); - dval = vreinterpretq_u16_u32 (vld1q_lane_u32 (((void *))dst, vreinterpretq_u32_u16 (dval),1)); + alpha = vreinterpret_u8_u16 (vld1_lane_u16 ((void *)mask, vreinterpret_u16_u8 (alpha),1)); + dval = vreinterpretq_u16_u32 (vld1q_lane_u32 ((void *)dst, vreinterpretq_u32_u16 (dval),1)); dst2=dst; mask+=2; dst+=2; } if (w&1) { - alpha = vld1_lane_u8 (((void *))mask, alpha,1); - dval = vld1q_lane_u16 (((void *))dst, dval,1); + alpha = vld1_lane_u8 ((void *)mask, alpha,1); + dval = vld1q_lane_u16 ((void *)dst, dval,1); } sval8temp = neon8mul (sval8, alpha); temp = pack0565 (neon8qadd (sval8temp, neon8mul (unpack0565 (dval), vmvn_u8 (sval8temp.val[3])))); if (w&1) - vst1q_lane_u16 (((void *))dst, temp,1); + vst1q_lane_u16 ((void *)dst, temp,1); if (w&2) - vst1q_lane_u32 (((void *))dst2, vreinterpretq_u32_u16 (temp),1); + vst1q_lane_u32 ((void *)dst2, vreinterpretq_u32_u16 (temp),1); if (w&4) - vst1q_lane_u64 (((void *))dst4, vreinterpretq_u64_u16 (temp),1); + vst1q_lane_u64 ((void *)dst4, vreinterpretq_u64_u16 (temp),1); #else asm volatile ( "vdup.32 d0, %[src]\n\t" @@ -1040,8 +1040,8 @@ neon_composite_over_n_8_8888 (pixman_implementation_t * impl, uint8x8_t alpha; uint8x8x4_t dval, temp; - alpha = vld1_u8 (((void *))mask); - dval = vld4_u8 (((void *))dst); + alpha = vld1_u8 ((void *)mask); + dval = vld4_u8 ((void *)dst); keep_dst = dst; temp = neon8mul (sval8, alpha); @@ -1054,10 +1054,10 @@ neon_composite_over_n_8_8888 (pixman_implementation_t * impl, while (w) { - alpha = vld1_u8 (((void *))mask); - dval = vld4_u8 (((void *))dst); + alpha = vld1_u8 ((void *)mask); + dval = vld4_u8 ((void *)dst); - vst4_u8 (((void *))keep_dst, temp); + vst4_u8 ((void *)keep_dst, temp); keep_dst = dst; temp = neon8mul (sval8, alpha); @@ -1068,7 +1068,7 @@ neon_composite_over_n_8_8888 (pixman_implementation_t * impl, dst += 8; w -= 8; } - vst4_u8 (((void *))keep_dst, temp); + vst4_u8 ((void *)keep_dst, temp); #else asm volatile ( "vdup.32 d0, %[src]\n\t" @@ -1160,14 +1160,14 @@ neon_composite_over_n_8_8888 (pixman_implementation_t * impl, uint8x8_t dval, temp, res; alpha = vtbl1_u8 ( - vreinterpret_u8_u16 (vld1_dup_u16 (((void *))mask)), mask_selector); - dval = vld1_u8 (((void *))dst); + vreinterpret_u8_u16 (vld1_dup_u16 ((void *)mask)), mask_selector); + dval = vld1_u8 ((void *)dst); temp = neon2mul (sval2, alpha); res = vqadd_u8 ( temp, neon2mul (dval, vtbl1_u8 (vmvn_u8 (temp), alpha_selector))); - vst1_u8 (((void *))dst, res); + vst1_u8 ((void *)dst, res); mask += 2; dst += 2; @@ -1178,14 +1178,14 @@ neon_composite_over_n_8_8888 (pixman_implementation_t * impl, { uint8x8_t dval, temp, res; - alpha = vtbl1_u8 (vld1_dup_u8 (((void *))mask), mask_selector); - dval = vreinterpret_u8_u32 (vld1_dup_u32 (((void *))dst)); + alpha = vtbl1_u8 (vld1_dup_u8 ((void *)mask), mask_selector); + dval = vreinterpret_u8_u32 (vld1_dup_u32 ((void *)dst)); temp = neon2mul (sval2, alpha); res = vqadd_u8 ( temp, neon2mul (dval, vtbl1_u8 (vmvn_u8 (temp), alpha_selector))); - vst1_lane_u32 (((void *))dst, vreinterpret_u32_u8 (res), 0); + vst1_lane_u32 ((void *)dst, vreinterpret_u32_u8 (res), 0); } } } commit 0ff5733c16804d5b10782556eeeade7061924846 Author: George Yohng <[email protected]> Date: Tue Jul 21 03:43:42 2009 -0400 Add implementation of MMX __m64 functions for MSVC x64. Microsoft C++ does not define __m64 and all related MMX functions in x64. However, it succeeds in generating object files for SSE2 code inside pixman. The real problem happens during linking, when it cannot find MMX functions (which are not defined as intrinsics for AMD64 platform). I have implemented those missing functions using general programming. MMX __m64 is used relatively scarcely within SSE2 implementation, and the performance impact probably is negligible. Bug 22390. diff --git a/pixman/Makefile.am b/pixman/Makefile.am index 0c528d8..e19fa6e 100644 --- a/pixman/Makefile.am +++ b/pixman/Makefile.am @@ -52,7 +52,7 @@ pixman-combine64.h : pixman-combine.h.template make-combine.pl $(PERL) $(srcdir)/make-combine.pl 16 < $(srcdir)/pixman-combine.h.template > $@ || ($(RM) $@; exit 1) EXTRA_DIST = Makefile.win32 pixman-combine.c.template make-combine.pl pixman-region.c \ - pixman-combine.h.template solaris-hwcap.mapfile + pixman-combine.h.template solaris-hwcap.mapfile pixman-x64-mmx-emulation.h CLEANFILES = pixman-combine32.c pixman-combine64.c pixman-combine32.h pixman-combine64.h # mmx code diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c index 3ed8165..2fa956e 100644 --- a/pixman/pixman-sse2.c +++ b/pixman/pixman-sse2.c @@ -36,6 +36,15 @@ #include "pixman-private.h" #include "pixman-combine32.h" +#if defined(_MSC_VER) && defined(_M_AMD64) +/* Windows 64 doesn't allow MMX to be used, so + * the pixman-x64-mmx-emulation.h file contains + * implementations of those MMX intrinsics that + * are used in the SSE2 implementation. + */ +# include "pixman-x64-mmx-emulation.h" +#endif + #ifdef USE_SSE2 /* -------------------------------------------------------------------- diff --git a/pixman/pixman-x64-mmx-emulation.h b/pixman/pixman-x64-mmx-emulation.h new file mode 100644 index 0000000..231030f --- /dev/null +++ b/pixman/pixman-x64-mmx-emulation.h @@ -0,0 +1,263 @@ +#ifndef MMX_X64_H_INCLUDED +#define MMX_X64_H_INCLUDED + +/* Implementation of x64 MMX substitition functions, before + * pixman is reimplemented not to use __m64 type on Visual C++ + * + * Copyright (C)2009 by George Yohng + * Released in public domain. + */ + +#include <intrin.h> + +#define M64C(a) (*(const __m64 *)(&a)) +#define M64U(a) (*(const unsigned long long *)(&a)) + +__inline __m64 +_m_from_int (int a) +{ + long long i64 = a; + + return M64C (i64); +} + +__inline __m64 +_mm_setzero_si64 () +{ + long long i64 = 0; + + return M64C (i64); +} + +__inline __m64 +_mm_set_pi32 (int i1, int i0) +{ + unsigned long long i64 = ((unsigned)i0) + (((unsigned long long)(unsigned)i1) << 32); + + return M64C (i64); +} + +__inline void +_m_empty () +{ +} + +__inline __m64 +_mm_set1_pi16 (short w) +{ + unsigned long long i64 = ((unsigned long long)(unsigned short)(w)) * 0x0001000100010001ULL; + + return M64C (i64); +} + +__inline int +_m_to_int (__m64 m) +{ + return m.m64_i32[0]; +} + +__inline __m64 +_mm_movepi64_pi64 (__m128i a) +{ + return M64C (a.m128i_i64[0]); +} + +__inline __m64 +_m_pand (__m64 a, __m64 b) +{ + unsigned long long i64 = M64U (a) & M64U (b); + + return M64C (i64); +} + +__inline __m64 +_m_por (__m64 a, __m64 b) +{ + unsigned long long i64 = M64U (a) | M64U (b); + + return M64C (i64); +} + +__inline __m64 +_m_pxor (__m64 a, __m64 b) +{ + unsigned long long i64 = M64U (a) ^ M64U (b); + + return M64C (i64); +} + +__inline __m64 +_m_pmulhuw (__m64 a, __m64 b) /* unoptimized */ +{ + unsigned short d[4] = + { + (unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0]) >> 16), + (unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]) >> 16), + (unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]) >> 16), + (unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]) >> 16) + }; + + return M64C (d[0]); +} + +__inline __m64 +_m_pmullw2 (__m64 a, __m64 b) /* unoptimized */ +{ + unsigned short d[4] = + { + (unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0])), + (unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1])), + (unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2])), + (unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3])) + }; + + return M64C (d[0]); +} + +__inline __m64 +_m_pmullw (__m64 a, __m64 b) /* unoptimized */ +{ + unsigned long long x = + ((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0]))) + + (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]))) << 16) + + (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]))) << 32) + + (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]))) << 48); + + return M64C (x); +} + +__inline __m64 +_m_paddusb (__m64 a, __m64 b) /* unoptimized */ +{ + unsigned long long x = (M64U (a) & 0x00FF00FF00FF00FFULL) + + (M64U (b) & 0x00FF00FF00FF00FFULL); + + unsigned long long y = ((M64U (a) >> 8) & 0x00FF00FF00FF00FFULL) + + ((M64U (b) >> 8) & 0x00FF00FF00FF00FFULL); + + x | = ((x & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF; + y | = ((y & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF; + + x = (x & 0x00FF00FF00FF00FFULL) | ((y & 0x00FF00FF00FF00FFULL) << 8); + + return M64C (x); +} + +__inline __m64 +_m_paddusw (__m64 a, __m64 b) /* unoptimized */ +{ + unsigned long long x = (M64U (a) & 0x0000FFFF0000FFFFULL) + + (M64U (b) & 0x0000FFFF0000FFFFULL); + + unsigned long long y = ((M64U (a) >> 16) & 0x0000FFFF0000FFFFULL) + + ((M64U (b) >> 16) & 0x0000FFFF0000FFFFULL); + + x | = ((x & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF; + y | = ((y & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF; + + x = (x & 0x0000FFFF0000FFFFULL) | ((y & 0x0000FFFF0000FFFFULL) << 16); + + return M64C (x); +} + +__inline __m64 +_m_pshufw (__m64 a, int n) /* unoptimized */ +{ + unsigned short d[4] = + { + a.m64_u16[n & 3], + a.m64_u16[(n >> 2) & 3], + a.m64_u16[(n >> 4) & 3], + a.m64_u16[(n >> 6) & 3] + }; + + return M64C (d[0]); +} + +__inline unsigned char +sat16 (unsigned short d) +{ + if (d > 0xFF) return 0xFF; + else return d & 0xFF; +} + +__inline __m64 +_m_packuswb (__m64 m1, __m64 m2) /* unoptimized */ +{ + unsigned char d[8] = + { + sat16 (m1.m64_u16[0]), + sat16 (m1.m64_u16[1]), + sat16 (m1.m64_u16[2]), + sat16 (m1.m64_u16[3]), + sat16 (m2.m64_u16[0]), + sat16 (m2.m64_u16[1]), + sat16 (m2.m64_u16[2]), + sat16 (m2.m64_u16[3]) + }; + + return M64C (d[0]); +} + +__inline __m64 _m_punpcklbw (__m64 m1, __m64 m2) /* unoptimized */ +{ + unsigned char d[8] = + { + m1.m64_u8[0], + m2.m64_u8[0], + m1.m64_u8[1], + m2.m64_u8[1], + m1.m64_u8[2], + m2.m64_u8[2], + m1.m64_u8[3], + m2.m64_u8[3], + }; + + return M64C (d[0]); +} + +__inline __m64 _m_punpckhbw (__m64 m1, __m64 m2) /* unoptimized */ +{ + unsigned char d[8] = + { + m1.m64_u8[4], + m2.m64_u8[4], + m1.m64_u8[5], + m2.m64_u8[5], + m1.m64_u8[6], + m2.m64_u8[6], + m1.m64_u8[7], + m2.m64_u8[7], + }; + + return M64C (d[0]); +} + +__inline __m64 _m_psrlwi (__m64 a, int n) /* unoptimized */ +{ + unsigned short d[4] = + { + a.m64_u16[0] >> n, + a.m64_u16[1] >> n, + a.m64_u16[2] >> n, + a.m64_u16[3] >> n + }; + + return M64C (d[0]); +} + +__inline __m64 _m_psrlqi (__m64 m, int n) +{ + unsigned long long x = M64U (m) >> n; + + return M64C (x); +} + +__inline __m64 _m_psllqi (__m64 m, int n) +{ + unsigned long long x = M64U (m) << n; + + return M64C (x); +} + +#endif /* MMX_X64_H_INCLUDED */ commit 0b95afd259bb839a026955e7fda15b44fa22a805 Author: Chris Wilson <[email protected]> Date: Mon Jul 20 14:07:18 2009 +0100 Fix read of BITS members from a solid image. During the fast-path query, the read_func and write_func from the bits structure are queried for the solid image. ==32723== Conditional jump or move depends on uninitialised value(s) ==32723== at 0x412AF20: _pixman_run_fast_path (pixman-utils.c:681) ==32723== by 0x4136319: sse2_composite (pixman-sse2.c:5554) ==32723== by 0x4100CD2: _pixman_implementation_composite (pixman-implementation.c:227) ==32723== by 0x412396E: pixman_image_composite (pixman.c:140) ==32723== by 0x4123D64: pixman_image_fill_rectangles (pixman.c:322) ==32723== by 0x40482B7: _cairo_image_surface_fill_rectangles (cairo-image-surface.c:1180) ==32723== by 0x4063BE7: _cairo_surface_fill_rectangles (cairo-surface.c:1883) ==32723== by 0x4063E38: _cairo_surface_fill_region (cairo-surface.c:1840) ==32723== by 0x4067FDC: _clip_and_composite_trapezoids (cairo-surface-fallback.c:625) ==32723== by 0x40689C5: _cairo_surface_fallback_paint (cairo-surface-fallback.c:835) ==32723== by 0x4065731: _cairo_surface_paint (cairo-surface.c:1923) ==32723== by 0x4044098: _cairo_gstate_paint (cairo-gstate.c:900) ==32723== Uninitialised value was created by a heap allocation ==32723== at 0x402732D: malloc (vg_replace_malloc.c:180) ==32723== by 0x410099F: _pixman_image_allocate (pixman-image.c:100) ==32723== by 0x41265B8: pixman_image_create_solid_fill (pixman-solid-fill.c:75) ==32723== by 0x4123CE1: pixman_image_fill_rectangles (pixman.c:314) ==32723== by 0x40482B7: _cairo_image_surface_fill_rectangles (cairo-image-surface.c:1180) ==32723== by 0x4063BE7: _cairo_surface_fill_rectangles (cairo-surface.c:1883) ==32723== by 0x4063E38: _cairo_surface_fill_region (cairo-surface.c:1840) ==32723== by 0x4067FDC: _clip_and_composite_trapezoids (cairo-surface-fallback.c:625) ==32723== by 0x40689C5: _cairo_surface_fallback_paint (cairo-surface-fallback.c:835) ==32723== by 0x4065731: _cairo_surface_paint (cairo-surface.c:1923) ==32723== by 0x4044098: _cairo_gstate_paint (cairo-gstate.c:900) ==32723== by 0x403C10B: cairo_paint (cairo.c:2052) diff --git a/pixman/pixman-utils.c b/pixman/pixman-utils.c index 2c34d02..a981418 100644 --- a/pixman/pixman-utils.c +++ b/pixman/pixman-utils.c @@ -646,23 +646,40 @@ _pixman_run_fast_path (const pixman_fast_path_t *paths, pixman_bool_t mask_repeat = mask && mask->common.repeat == PIXMAN_REPEAT_NORMAL; pixman_bool_t result; + pixman_bool_t has_fast_path; - if ((src->type == BITS || _pixman_image_is_solid (src)) && - (!mask || mask->type == BITS) - && !src->common.transform && !(mask && mask->common.transform) - && !src->common.alpha_map && !dest->common.alpha_map - && !(mask && mask->common.alpha_map) - && (src->common.filter != PIXMAN_FILTER_CONVOLUTION) - && (src->common.repeat != PIXMAN_REPEAT_PAD) - && (src->common.repeat != PIXMAN_REPEAT_REFLECT) - && (!mask || (mask->common.filter != PIXMAN_FILTER_CONVOLUTION && - mask->common.repeat != PIXMAN_REPEAT_PAD && - mask->common.repeat != PIXMAN_REPEAT_REFLECT)) - && !src->bits.read_func && !src->bits.write_func - && !(mask && mask->bits.read_func) - && !(mask && mask->bits.write_func) - && !dest->bits.read_func - && !dest->bits.write_func) + has_fast_path = !dest->common.alpha_map && + !dest->bits.read_func && + !dest->bits.write_func; + + if (has_fast_path) + { + has_fast_path = (src->type == BITS || _pixman_image_is_solid (src)) && + !src->common.transform && + !src->common.alpha_map && + src->common.filter != PIXMAN_FILTER_CONVOLUTION && + src->common.repeat != PIXMAN_REPEAT_PAD && + src->common.repeat != PIXMAN_REPEAT_REFLECT; + if (has_fast_path && src->type == BITS) + { + has_fast_path = !src->bits.read_func && + !src->bits.write_func; + } + } + + if (mask && has_fast_path) + { + has_fast_path = mask->type == BITS && + !mask->common.transform && + !mask->common.alpha_map && + !mask->bits.read_func && + !mask->bits.write_func && + mask->common.filter != PIXMAN_FILTER_CONVOLUTION && + mask->common.repeat != PIXMAN_REPEAT_PAD && + mask->common.repeat != PIXMAN_REPEAT_REFLECT; + } + + if (has_fast_path) { const pixman_fast_path_t *info; pixman_bool_t pixbuf; commit c7b84f8b043018368fade4ad13730cfcaaf5c8cc Author: Søren Sandmann Pedersen <[email protected]> Date: Tue Jul 21 00:17:15 2009 -0400 Only apply the workaround if the clip region extends beyond the drawable. This works because the X server always attempts to set a clip region within the bounds of the drawable, and it only fails at it when it is computing the wrong translation and therefore needs the workaround. diff --git a/pixman/pixman-bits-image.c b/pixman/pixman-bits-image.c index be28ebc..ff29620 100644 --- a/pixman/pixman-bits-image.c +++ b/pixman/pixman-bits-image.c @@ -809,25 +809,18 @@ source_image_needs_out_of_bounds_workaround (bits_image_t *image) image->common.have_clip_region && out_of_bounds_workaround) { - const pixman_box32_t *boxes; - int n; - if (!image->common.client_clip) { /* There is no client clip, so the drawable in question - * is a window if the clip region is different from the - * full drawable + * is a window if the clip region extends beyond the + * drawable geometry. */ - boxes = pixman_region32_rectangles (&image->common.clip_region, &n); - if (n == 1) + const pixman_box32_t *extents = pixman_region32_extents (&image->common.clip_region); + + if (extents->x1 >= 0 && extents->x2 < image->width && + extents->y1 >= 0 && extents->y2 < image->height) { - if (boxes[0].x1 == 0 && boxes[0].y1 == 0 && - boxes[0].x2 == image->width && - boxes[0].y2 == image->height) - { - /* pixmap */ - return FALSE; -- To UNSUBSCRIBE, email to [email protected] with a subject of "unsubscribe". Trouble? Contact [email protected]

