Does it actually make sense? I remember somebody was strongly opposing
the idea of spawning threads in pixman in the past, but can't find
this e-mail right now.

Even if using multithreaded rendering is acceptable, the next question is
whether to rely on OpenMP for it. Currently OpenMP is disabled in Android
toolchain by default:
    https://groups.google.com/forum/#!topic/android-ndk/pUfqxURgNbQ
Clang/LLVM does not support OpenMP either.

Some benchmarks with cairo-perf-trace (gcc 4.7.1, CFLAGS="-O2 -fopenmp"):

=== Core i7 860 @2.8GHz ===

before patch:
[  0]    image             firefox-fishtank   66.912   66.931   0.13%    3/3

export OMP_NUM_THREADS=1
[  0]    image             firefox-fishtank   67.285   67.393   0.12%    3/3

export OMP_NUM_THREADS=2
[  0]    image             firefox-fishtank   40.156   40.192   0.07%    3/3

export OMP_NUM_THREADS=3
[  0]    image             firefox-fishtank   31.152   31.241   0.21%    3/3

export OMP_NUM_THREADS=4
[  0]    image             firefox-fishtank   26.507   26.540   0.15%    3/3

=== Radeon HD 6770 (xf86-video-ati-6.14.4, Mesa 8.1-devel (git-6e7756d)) ====

[  0]     xlib             firefox-fishtank   34.135   34.156   0.23%    3/3
[  0]       gl             firefox-fishtank    5.671    5.755   0.89%    3/3

---
 pixman/pixman-inlines.h |   24 +++++++++++++++---------
 1 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/pixman/pixman-inlines.h b/pixman/pixman-inlines.h
index 3532867..7ba0d09 100644
--- a/pixman/pixman-inlines.h
+++ b/pixman/pixman-inlines.h
@@ -765,6 +765,14 @@ bilinear_pad_repeat_get_scanline_bounds (int32_t         
source_image_width,
  *       range and can fit into unsigned byte or be used with 8-bit SIMD
  *       multiplication instructions.
  */
+
+#define OMP_BILINEAR_PARALLEL_FOR _Pragma("omp parallel for default(none)      
\
+     firstprivate(height,dst_line,dst_stride,unit_y,unit_x,src_first_line,     
\
+        src_stride,max_vx,right_pad,left_pad,left_tz,right_tz,src_width,       
\
+        src_width_fixed,src_image,need_src_extension,mask_line,                
        \
+        mask_stride,v,vy,width)                                                
        \
+     private(vx,y1,y2,mask) schedule(static) if(height > 1)")
+
 #define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, 
mask_type_t,    \
                                  dst_type_t, repeat_mode, flags)               
                \
 static void                                                                    
                \
@@ -782,7 +790,7 @@ fast_composite_scaled_bilinear ## scale_func_name 
(pixman_implementation_t *imp,
     pixman_fixed_t unit_x, unit_y;                                             
                \
     int32_t left_pad, left_tz, right_tz, right_pad;                            
                \
                                                                                
                \
-    dst_type_t *dst;                                                           
                \
+    int i;                                                                     
                \
     mask_type_t solid_mask;                                                    
                \
     const mask_type_t *mask = &solid_mask;                                     
                \
     int src_stride, mask_stride, dst_stride;                                   
                \
@@ -864,20 +872,19 @@ fast_composite_scaled_bilinear ## scale_func_name 
(pixman_implementation_t *imp,
        src_width_fixed = pixman_int_to_fixed (src_width);                      
                \
     }                                                                          
                \
                                                                                
                \
-    while (--height >= 0)                                                      
                \
+    OMP_BILINEAR_PARALLEL_FOR                                                  
                \
+    for (i = 0; i < height; i++)                                               
                \
     {                                                                          
                \
        int weight1, weight2;                                                   
                \
-       dst = dst_line;                                                         
                \
-       dst_line += dst_stride;                                                 
                \
+       dst_type_t *dst = dst_line + (uintptr_t)dst_stride * i;                 
                \
        vx = v.vector[0];                                                       
                \
        if (flags & FLAG_HAVE_NON_SOLID_MASK)                                   
                \
        {                                                                       
                \
-           mask = mask_line;                                                   
                \
-           mask_line += mask_stride;                                           
                \
+           mask = mask_line + (uintptr_t)mask_stride * i;                      
                \
        }                                                                       
                \
                                                                                
                \
-       y1 = pixman_fixed_to_int (vy);                                          
                \
-       weight2 = (vy >> 8) & 0xff;                                             
                \
+       y1 = pixman_fixed_to_int (vy + unit_y * i);                             
                \
+       weight2 = ((vy + unit_y * i) >> 8) & 0xff;                              
                \
        if (weight2)                                                            
                \
        {                                                                       
                \
            /* normal case, both row weights are in 0-255 range and fit 
unsigned byte */        \
@@ -890,7 +897,6 @@ fast_composite_scaled_bilinear ## scale_func_name 
(pixman_implementation_t *imp,
            y2 = y1;                                                            
                \
            weight1 = weight2 = 128;                                            
                \
        }                                                                       
                \
-       vy += unit_y;                                                           
                \
        if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)                 
                \
        {                                                                       
                \
            src_type_t *src1, *src2;                                            
                \
-- 
1.7.3.4

_______________________________________________
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman

Reply via email to