Commit: 2e77c413ab3cdc5880c99ba14436d56f8280d3fb
Author: Bastien Montagne
Date:   Sun Jan 3 23:54:44 2016 +0100
Branches: master
https://developer.blender.org/rB2e77c413ab3cdc5880c99ba14436d56f8280d3fb

Sculpt: OMP -> BLI_task, step II.

This time, all tools' code itself.

Not much to say, except that we can also get rid of that OMP caching 
pre-process ugly stuff
for multires smoothing.

Together with previous commit, we have about 5% average speedup on stroke 
execution
(though this vary a lot, up to 30% speedup in rare cases, and in even rarer 
cases some
odd massive slowdowns...).

Tech note: we may want to add 'guided'-similar feature to our BLI_task threaded 
loop,
I suspect this could explain random massive slowdowns of new code (very rare, 
but annoying...).

===================================================================

M       source/blender/editors/sculpt_paint/sculpt.c

===================================================================

diff --git a/source/blender/editors/sculpt_paint/sculpt.c 
b/source/blender/editors/sculpt_paint/sculpt.c
index a78a79a..d4afc6c 100644
--- a/source/blender/editors/sculpt_paint/sculpt.c
+++ b/source/blender/editors/sculpt_paint/sculpt.c
@@ -200,11 +200,6 @@ typedef struct StrokeCache {
        float clip_tolerance[3];
        float initial_mouse[2];
 
-       /* Pre-allocated temporary storage used during smoothing */
-       int num_threads, init_num_threads;
-       float (**tmpgrid_co)[3], (**tmprow_co)[3];
-       float **tmpgrid_mask, **tmprow_mask;
-
        /* Variants */
        float radius;
        float radius_squared;
@@ -1528,36 +1523,71 @@ static float bmesh_neighbor_average_mask(BMVert *v, 
const int cd_vert_mask_offse
        }
 }
 
-static void do_mesh_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode 
*node, float bstrength, int smooth_mask)
+typedef struct SculptDoBrushData {
+       Sculpt *sd;
+       Object *ob;
+       Brush *brush;
+    PBVHNode **nodes;
+
+       /* Data specific to some brushes. */
+       /* Note: even only one or two of those are used at a time, keeping them 
separated, names help figuring out
+        *       what it is, and memory overhead is ridiculous anyway... */
+       SculptProjectVector *spvc;
+       float flippedbstrength;
+       float angle;
+       float *offset;
+       float *grab_delta;
+       float *cono;
+       float *area_no;
+       float *area_no_sp;
+       float *area_co;
+       float (*mat)[4];
+       float strength;
+       bool smooth_mask;
+
+       ThreadMutex mutex;
+} SculptDoBrushData;
+
+/* Note: uses after-struct allocated mem to store actual cache... */
+typedef struct SculptDoBrushSmoothGridDataChunk {
+       size_t tmpgrid_size;
+} SculptDoBrushSmoothGridDataChunk;
+
+static void do_smooth_brush_mesh_task_cb(void *userdata, void 
*UNUSED(userdata_chunk), int n)
 {
-       Brush *brush = BKE_paint_brush(&sd->paint);
+       SculptDoBrushData *data = userdata;
+       SculptSession *ss = data->ob->sculpt;
+       Sculpt *sd = data->sd;
+       Brush *brush = data->brush;
+       const bool smooth_mask = data->smooth_mask;
+       float bstrength = data->strength;
+
        PBVHVertexIter vd;
        SculptBrushTest test;
-       
+
        CLAMP(bstrength, 0.0f, 1.0f);
 
        sculpt_brush_test_init(ss, &test);
 
-       BKE_pbvh_vertex_iter_begin(ss->pbvh, node, vd, PBVH_ITER_UNIQUE)
+       BKE_pbvh_vertex_iter_begin(ss->pbvh, data->nodes[n], vd, 
PBVH_ITER_UNIQUE)
        {
                if (sculpt_brush_test(&test, vd.co)) {
-                       const float fade = bstrength * tex_strength(ss, brush, 
vd.co, test.dist,
-                                                                   vd.no, 
vd.fno,
-                                                                   smooth_mask 
? 0 : (vd.mask ? *vd.mask : 0.0f));
+                       const float fade = bstrength * tex_strength(
+                                              ss, brush, vd.co, test.dist, 
vd.no, vd.fno,
+                                              smooth_mask ? 0.0f : (vd.mask ? 
*vd.mask : 0.0f));
                        if (smooth_mask) {
                                float val = neighbor_average_mask(ss, 
vd.vert_indices[vd.i]) - *vd.mask;
                                val *= fade * bstrength;
                                *vd.mask += val;
-                               CLAMP(*vd.mask, 0, 1);
+                               CLAMP(*vd.mask, 0.0f, 1.0f);
                        }
                        else {
                                float avg[3], val[3];
 
                                neighbor_average(ss, avg, 
vd.vert_indices[vd.i]);
                                sub_v3_v3v3(val, avg, vd.co);
-                               mul_v3_fl(val, fade);
 
-                               add_v3_v3(val, vd.co);
+                               madd_v3_v3v3fl(val, vd.co, val, fade);
 
                                sculpt_clip(sd, ss, vd.co, val);
                        }
@@ -1569,36 +1599,40 @@ static void do_mesh_smooth_brush(Sculpt *sd, 
SculptSession *ss, PBVHNode *node,
        BKE_pbvh_vertex_iter_end;
 }
 
-static void do_bmesh_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode 
*node, float bstrength, int smooth_mask)
+static void do_smooth_brush_bmesh_task_cb(void *userdata, void 
*UNUSED(userdata_chunk), int n)
 {
-       Brush *brush = BKE_paint_brush(&sd->paint);
+       SculptDoBrushData *data = userdata;
+       SculptSession *ss = data->ob->sculpt;
+       Sculpt *sd = data->sd;
+       Brush *brush = data->brush;
+       const bool smooth_mask = data->smooth_mask;
+       float bstrength = data->strength;
+
        PBVHVertexIter vd;
        SculptBrushTest test;
-       
+
        CLAMP(bstrength, 0.0f, 1.0f);
 
        sculpt_brush_test_init(ss, &test);
 
-       BKE_pbvh_vertex_iter_begin(ss->pbvh, node, vd, PBVH_ITER_UNIQUE)
+       BKE_pbvh_vertex_iter_begin(ss->pbvh, data->nodes[n], vd, 
PBVH_ITER_UNIQUE)
        {
                if (sculpt_brush_test(&test, vd.co)) {
-                       const float fade = bstrength * tex_strength(ss, brush, 
vd.co, test.dist,
-                                                                   vd.no, 
vd.fno,
-                                                                   smooth_mask 
? 0 : *vd.mask);
+                       const float fade = bstrength * tex_strength(
+                                              ss, brush, vd.co, test.dist, 
vd.no, vd.fno, smooth_mask ? 0.0f : *vd.mask);
                        if (smooth_mask) {
                                float val = 
bmesh_neighbor_average_mask(vd.bm_vert, vd.cd_vert_mask_offset) - *vd.mask;
                                val *= fade * bstrength;
                                *vd.mask += val;
-                               CLAMP(*vd.mask, 0, 1);
+                               CLAMP(*vd.mask, 0.0f, 1.0f);
                        }
                        else {
                                float avg[3], val[3];
 
                                bmesh_neighbor_average(avg, vd.bm_vert);
                                sub_v3_v3v3(val, avg, vd.co);
-                               mul_v3_fl(val, fade);
 
-                               add_v3_v3(val, vd.co);
+                               madd_v3_v3v3fl(val, vd.co, val, fade);
 
                                sculpt_clip(sd, ss, vd.co, val);
                        }
@@ -1610,74 +1644,77 @@ static void do_bmesh_smooth_brush(Sculpt *sd, 
SculptSession *ss, PBVHNode *node,
        BKE_pbvh_vertex_iter_end;
 }
 
-static void do_multires_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode 
*node,
-                                     float bstrength, int smooth_mask)
+static void do_smooth_brush_multires_task_cb(void *userdata, void 
*userdata_chunk, int n)
 {
-       Brush *brush = BKE_paint_brush(&sd->paint);
+       SculptDoBrushData *data = userdata;
+       SculptDoBrushSmoothGridDataChunk *data_chunk = userdata_chunk;
+       SculptSession *ss = data->ob->sculpt;
+       Sculpt *sd = data->sd;
+       Brush *brush = data->brush;
+       const bool smooth_mask = data->smooth_mask;
+       float bstrength = data->strength;
+
        SculptBrushTest test;
-       CCGElem **griddata, *data;
+       CCGElem **griddata, *gddata;
        CCGKey key;
-       float (*tmpgrid_co)[3], (*tmprow_co)[3];
-       float *tmpgrid_mask, *tmprow_mask;
-       int v1, v2, v3, v4;
-       int thread_num;
+
+       float (*tmpgrid_co)[3] = NULL;
+       float tmprow_co[2][3];
+       float *tmpgrid_mask = NULL;
+       float tmprow_mask[2];
+
        BLI_bitmap * const *grid_hidden;
-       int *grid_indices, totgrid, gridsize, i, x, y;
+       int *grid_indices, totgrid, gridsize;
+       int i, x, y;
 
        sculpt_brush_test_init(ss, &test);
 
        CLAMP(bstrength, 0.0f, 1.0f);
 
-       BKE_pbvh_node_get_grids(ss->pbvh, node, &grid_indices, &totgrid,
-                               NULL, &gridsize, &griddata);
+       BKE_pbvh_node_get_grids(ss->pbvh, data->nodes[n], &grid_indices, 
&totgrid, NULL, &gridsize, &griddata);
        BKE_pbvh_get_grid_key(ss->pbvh, &key);
 
        grid_hidden = BKE_pbvh_grid_hidden(ss->pbvh);
 
-#ifdef _OPENMP
-       thread_num = omp_get_thread_num();
-#else
-       thread_num = 0;
-#endif
-       tmpgrid_co = ss->cache->tmpgrid_co[thread_num];
-       tmprow_co = ss->cache->tmprow_co[thread_num];
-       tmpgrid_mask = ss->cache->tmpgrid_mask[thread_num];
-       tmprow_mask = ss->cache->tmprow_mask[thread_num];
+       if (smooth_mask)
+               tmpgrid_mask = (void *)(data_chunk + 1);
+       else
+               tmpgrid_co = (void *)(data_chunk + 1);
 
-       for (i = 0; i < totgrid; ++i) {
+       for (i = 0; i < totgrid; i++) {
                int gi = grid_indices[i];
                const BLI_bitmap *gh = grid_hidden[gi];
-               data = griddata[gi];
+               gddata = griddata[gi];
 
                if (smooth_mask)
-                       memset(tmpgrid_mask, 0, sizeof(float) * gridsize * 
gridsize);
+                       memset(tmpgrid_mask, 0, data_chunk->tmpgrid_size);
                else
-                       memset(tmpgrid_co, 0, sizeof(float) * 3 * gridsize * 
gridsize);
+                       memset(tmpgrid_co, 0, data_chunk->tmpgrid_size);
 
                for (y = 0; y < gridsize - 1; y++) {
-                       v1 = y * gridsize;
+                       const int v = y * gridsize;
                        if (smooth_mask) {
-                               tmprow_mask[0] = (*CCG_elem_offset_mask(&key, 
data, v1) +
-                                                 *CCG_elem_offset_mask(&key, 
data, v1 + gridsize));
+                               tmprow_mask[0] = (*CCG_elem_offset_mask(&key, 
gddata, v) +
+                                                 *CCG_elem_offset_mask(&key, 
gddata, v + gridsize));
                        }
                        else {
                                add_v3_v3v3(tmprow_co[0],
-                                           CCG_elem_offset_co(&key, data, v1),
-                                           CCG_elem_offset_co(&key, data, v1 + 
gridsize));
+                                           CCG_elem_offset_co(&key, gddata, v),
+                                           CCG_elem_offset_co(&key, gddata, v 
+ gridsize));
                        }
 
                        for (x = 0; x < gridsize - 1; x++) {
-                               v1 = x + y * gridsize;
-                               v2 = v1 + 1;
-                               v3 = v1 + gridsize;
-                               v4 = v3 + 1;
+                               const int v1 = x + y * gridsize;
+                               const int v2 = v1 + 1;
+                               const int v3 = v1 + gridsize;
+                               const int v4 = v3 + 1;
 
                                if (smooth_mask) {
                                        float tmp;
 
-                                       tmprow_mask[x + 1] = 
(*CCG_elem_offset_mask(&key, data, v2) +
-                                                             
*CCG_elem_offset_mask(&key, data, v4));
-                                       tmp = tmprow_mask[x + 1] + 
tmprow_mask[x];
+                                       tmprow_mask[(x + 1) % 2] = 
(*CCG_elem_offset_mask(&key, gddata, v2) +
+                                                                   
*CCG_elem_offset_mask(&key, gddata, v4));
+                                       tmp = tmprow_mask[(x + 1) % 2] + 
tmprow_mask[x % 2];
 
                                        tmpgrid_mask[v1] += tmp;
                                        tmpgrid_mask[v2] += tmp;
@@ -1687,10 +1724,10 @@ static void do_multires_smooth_brush(Sculpt *sd, 
SculptSession *ss, PBVHNode *no
                                else {
                                        float tmp[3];
 
-                                       add_v3_v3v3(tmprow_co[x + 1],
-                                                   CCG_elem_offset_co(&key, 
data, v2),
-                                                   CCG_elem_offset_co(&key, 
data, v4));
-                                       add_v3_v3v3(tmp, tmprow_co[x + 1], 
tmprow_co[x]);
+                                       add_v3_v3v3(tmprow_co[(x + 1) % 2],
+                                                   CCG_elem_offset_co(&key, 
gddata, v2),
+                                                   CCG_elem_offset_co(&key, 
gddata, v4));
+                                       add_v3_v3v3(tmp, tmprow_co[(x + 1) % 
2], tmprow_co[x % 2]);
 
                                        add_v3_v3(tmpgrid_co[v1], tmp);
                                        add_v3_v3(tmpgrid_co[v2], tmp);
@@ -1701,49 +1738,43 @@ static void do_multires_smooth_brush(Sculpt *sd, 
SculptSession *ss, PBVHNode *no
                }
 
                /* blend with existing coordinates */
-               for (y = 0; y < gridsize; ++y) {
-                       for (x = 0; x < gridsize; ++x) {
+               for (y = 0; y < gridsize; y++) {
+                       for (x = 0; x < gridsize; x++) {
                                float *co;
                                const float *fno;
                                float *mask;
-                               int index;
+                               const int index = y * gridsize + x;
 
                                if (gh) {
-                                       if (BLI_BITMAP_TEST(gh, y * gridsize + 
x))
+                                       if (BLI_BITMAP_TEST(gh, index))
                                                continue;
                                }
 
-                               index = x + y * gridsize;
-                               co = CCG_elem_offset_co(&key, data, index);
-                               fno = CCG_elem_offset_no(&key, data, index);
-                               mask = 

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
http://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to