Does it actually make sense? I remember somebody was strongly opposing
the idea of spawning threads in pixman in the past, but can't find
this e-mail right now.
Even if using multithreaded rendering is acceptable, the next question is
whether to rely on OpenMP for it. Currently OpenMP is disabled in Android
toolchain by default:
https://groups.google.com/forum/#!topic/android-ndk/pUfqxURgNbQ
Clang/LLVM does not support OpenMP either.
Some benchmarks with cairo-perf-trace (gcc 4.7.1, CFLAGS="-O2 -fopenmp"):
=== Core i7 860 @2.8GHz ===
before patch:
[ 0] image firefox-fishtank 66.912 66.931 0.13% 3/3
export OMP_NUM_THREADS=1
[ 0] image firefox-fishtank 67.285 67.393 0.12% 3/3
export OMP_NUM_THREADS=2
[ 0] image firefox-fishtank 40.156 40.192 0.07% 3/3
export OMP_NUM_THREADS=3
[ 0] image firefox-fishtank 31.152 31.241 0.21% 3/3
export OMP_NUM_THREADS=4
[ 0] image firefox-fishtank 26.507 26.540 0.15% 3/3
=== Radeon HD 6770 (xf86-video-ati-6.14.4, Mesa 8.1-devel (git-6e7756d)) ====
[ 0] xlib firefox-fishtank 34.135 34.156 0.23% 3/3
[ 0] gl firefox-fishtank 5.671 5.755 0.89% 3/3
---
pixman/pixman-inlines.h | 24 +++++++++++++++---------
1 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/pixman/pixman-inlines.h b/pixman/pixman-inlines.h
index 3532867..7ba0d09 100644
--- a/pixman/pixman-inlines.h
+++ b/pixman/pixman-inlines.h
@@ -765,6 +765,14 @@ bilinear_pad_repeat_get_scanline_bounds (int32_t
source_image_width,
* range and can fit into unsigned byte or be used with 8-bit SIMD
* multiplication instructions.
*/
+
+#define OMP_BILINEAR_PARALLEL_FOR _Pragma("omp parallel for default(none)
\
+ firstprivate(height,dst_line,dst_stride,unit_y,unit_x,src_first_line,
\
+ src_stride,max_vx,right_pad,left_pad,left_tz,right_tz,src_width,
\
+ src_width_fixed,src_image,need_src_extension,mask_line,
\
+ mask_stride,v,vy,width)
\
+ private(vx,y1,y2,mask) schedule(static) if(height > 1)")
+
#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t,
mask_type_t, \
dst_type_t, repeat_mode, flags)
\
static void
\
@@ -782,7 +790,7 @@ fast_composite_scaled_bilinear ## scale_func_name
(pixman_implementation_t *imp,
pixman_fixed_t unit_x, unit_y;
\
int32_t left_pad, left_tz, right_tz, right_pad;
\
\
- dst_type_t *dst;
\
+ int i;
\
mask_type_t solid_mask;
\
const mask_type_t *mask = &solid_mask;
\
int src_stride, mask_stride, dst_stride;
\
@@ -864,20 +872,19 @@ fast_composite_scaled_bilinear ## scale_func_name
(pixman_implementation_t *imp,
src_width_fixed = pixman_int_to_fixed (src_width);
\
}
\
\
- while (--height >= 0)
\
+ OMP_BILINEAR_PARALLEL_FOR
\
+ for (i = 0; i < height; i++)
\
{
\
int weight1, weight2;
\
- dst = dst_line;
\
- dst_line += dst_stride;
\
+ dst_type_t *dst = dst_line + (uintptr_t)dst_stride * i;
\
vx = v.vector[0];
\
if (flags & FLAG_HAVE_NON_SOLID_MASK)
\
{
\
- mask = mask_line;
\
- mask_line += mask_stride;
\
+ mask = mask_line + (uintptr_t)mask_stride * i;
\
}
\
\
- y1 = pixman_fixed_to_int (vy);
\
- weight2 = (vy >> 8) & 0xff;
\
+ y1 = pixman_fixed_to_int (vy + unit_y * i);
\
+ weight2 = ((vy + unit_y * i) >> 8) & 0xff;
\
if (weight2)
\
{
\
/* normal case, both row weights are in 0-255 range and fit
unsigned byte */ \
@@ -890,7 +897,6 @@ fast_composite_scaled_bilinear ## scale_func_name
(pixman_implementation_t *imp,
y2 = y1;
\
weight1 = weight2 = 128;
\
}
\
- vy += unit_y;
\
if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)
\
{
\
src_type_t *src1, *src2;
\
--
1.7.3.4
_______________________________________________
Pixman mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/pixman