[Bf-blender-cvs] [ef25a1e] compositor-2016: DynamicPaint: massive refactor, replace all OpenMP usage by BLI_task and other optimizations.

Bastien Montagne Wed, 08 Jun 2016 12:49:34 -0700

Commit: ef25a1ed871e96397ea8e4c856ed1236d5051404
Author: Bastien Montagne
Date:   Sat May 21 15:22:04 2016 +0200
Branches: compositor-2016
https://developer.blender.org/rBef25a1ed871e96397ea8e4c856ed1236d5051404


DynamicPaint: massive refactor, replace all OpenMP usage by BLI_task and other 
optimizations.

This commit makes Dynamicpaint modifier evaluation (during playback) a few 
percents quicker.
However, it makes dynapaint's 'image sequence' baking about 33% quicker (from 
119 to 77 seconds
in own heavy test), partly due to switch to BLI_task itself (about 20%), and 
partly due to
optimizations (remaining ~13%).

As usual, did a lot of tests here to ensure nothing is broken, but a lot more 
users' testing would definitively
be welcome too! ;)

Note that some quite meaningless omp forloops have been removed (parallelizing 
thousands of vec copy does
make it two or three times quicker, but the few hundreds of microseconds gained 
do not make any difference
in a hundreds millisecond process).

Also, this code could still use a lot more cleanup (naming etc.), the way it 
(tries to) handle malloc faults
is also totally flacky and makes the code horribly verbose and convoluted in 
some places - without actually
catching all possible faults (memarena could make it more easy to handle here), 
etc.

===================================================================

M       source/blender/blenkernel/intern/dynamicpaint.c

===================================================================

diff --git a/source/blender/blenkernel/intern/dynamicpaint.c 
b/source/blender/blenkernel/intern/dynamicpaint.c
index 8af9750..2dc0388 100644
--- a/source/blender/blenkernel/intern/dynamicpaint.c
+++ b/source/blender/blenkernel/intern/dynamicpaint.c
@@ -65,6 +65,7 @@
 #include "BKE_image.h"
 #include "BKE_main.h"
 #include "BKE_material.h"
+#include "BKE_mesh_mapping.h"
 #include "BKE_modifier.h"
 #include "BKE_object.h"
 #include "BKE_particle.h"
@@ -141,8 +142,8 @@ typedef struct Bounds2D {
 } Bounds2D;
 
 typedef struct Bounds3D {
-       int valid;
        float min[3], max[3];
+       bool valid;
 } Bounds3D;
 
 typedef struct VolumeGrid {
@@ -153,6 +154,8 @@ typedef struct VolumeGrid {
        int *s_pos;  /* (x*y*z) t_index begin id */
        int *s_num;  /* (x*y*z) number of t_index points */
        int *t_index;  /* actual surface point index, access: (s_pos + s_num) */
+
+       int *temp_t_index;
 } VolumeGrid;
 
 typedef struct Vec3f {
@@ -178,6 +181,7 @@ typedef struct PaintBakeData {
        int *s_num;  /* num of realCoord samples */
        Vec3f *realCoord;  /* current pixel center world-space coordinates for 
each sample ordered as (s_pos + s_num) */
        Bounds3D mesh_bounds;
+       float dim[3];
 
        /* adjacency info */
        BakeAdjPoint *bNeighs;  /* current global neighbor distances and 
directions, if required */
@@ -193,7 +197,6 @@ typedef struct PaintBakeData {
        MVert *prev_verts;      /* copy of previous frame vertices. used to 
observe surface movement */
        float prev_obmat[4][4]; /* previous frame object matrix */
        int clear;              /* flag to check if surface was cleared/reset 
-> have to redo velocity etc. */
-
 } PaintBakeData;
 
 /* UV Image sequence format point      */
@@ -422,7 +425,8 @@ static int surface_totalSamples(DynamicPaintSurface 
*surface)
        return surface->data->total_points;
 }
 
-static void blendColors(const float t_color[3], float t_alpha, const float 
s_color[3], float s_alpha, float result[4])
+static void blendColors(
+        const float t_color[3], const float t_alpha, const float s_color[3], 
const float s_alpha, float result[4])
 {
        /* Same thing as BLI's blend_color_mix_float(), but for 
non-premultiplied alpha. */
        int i;
@@ -576,7 +580,7 @@ static void boundInsert(Bounds3D *b, float point[3])
        if (!b->valid) {
                copy_v3_v3(b->min, point);
                copy_v3_v3(b->max, point);
-               b->valid = 1;
+               b->valid = true;
                return;
        }
 
@@ -603,27 +607,92 @@ static void freeGrid(PaintSurfaceData *data)
        bData->grid = NULL;
 }
 
+static void grid_bound_insert_cb_ex(void *userdata, void *userdata_chunk, 
const int i, const int UNUSED(thread_id))
+{
+       PaintBakeData *bData = userdata;
+
+       Bounds3D *grid_bound = userdata_chunk;
+
+       boundInsert(grid_bound, bData->realCoord[bData->s_pos[i]].v);
+}
+
+static void grid_bound_insert_finalize(void *userdata, void *userdata_chunk)
+{
+       PaintBakeData *bData = userdata;
+       VolumeGrid *grid = bData->grid;
+
+       Bounds3D *grid_bound = userdata_chunk;
+
+       boundInsert(&grid->grid_bounds, grid_bound->min);
+       boundInsert(&grid->grid_bounds, grid_bound->max);
+}
+
+static void grid_cell_points_cb_ex(void *userdata, void *userdata_chunk, const 
int i, const int UNUSED(thread_id))
+{
+       PaintBakeData *bData = userdata;
+       VolumeGrid *grid = bData->grid;
+       int *temp_t_index = grid->temp_t_index;
+       int *s_num = userdata_chunk;
+
+       int co[3];
+
+       for (int j = 3; j--;) {
+               co[j] = (int)floorf((bData->realCoord[bData->s_pos[i]].v[j] - 
grid->grid_bounds.min[j]) /
+                                   bData->dim[j] * grid->dim[j]);
+               CLAMP(co[j], 0, grid->dim[j] - 1);
+       }
+
+       temp_t_index[i] = co[0] + co[1] * grid->dim[0] + co[2] * grid->dim[0] * 
grid->dim[1];
+       s_num[temp_t_index[i]]++;
+}
+
+static void grid_cell_points_finalize(void *userdata, void *userdata_chunk)
+{
+       PaintBakeData *bData = userdata;
+       VolumeGrid *grid = bData->grid;
+       const int grid_cells = grid->dim[0] * grid->dim[1] * grid->dim[2];
+
+       int *s_num = userdata_chunk;
+
+       /* calculate grid indexes */
+       for (int i = 0; i < grid_cells; i++) {
+               grid->s_num[i] += s_num[i];
+       }
+}
+
+static void grid_cell_bounds_cb(void *userdata, const int x)
+{
+       PaintBakeData *bData = userdata;
+       VolumeGrid *grid = bData->grid;
+       float *dim = bData->dim;
+       int *grid_dim = grid->dim;
+
+       for (int y = 0; y < grid_dim[1]; y++) {
+               for (int z = 0; z < grid_dim[2]; z++) {
+                       const int b_index = x + y * grid_dim[0] + z * 
grid_dim[0] * grid_dim[1];
+                       /* set bounds */
+                       for (int j = 3; j--;) {
+                               const int s = (j == 0) ? x : ((j == 1) ? y : z);
+                               grid->bounds[b_index].min[j] = 
grid->grid_bounds.min[j] + dim[j] / grid_dim[j] * s;
+                               grid->bounds[b_index].max[j] = 
grid->grid_bounds.min[j] + dim[j] / grid_dim[j] * (s + 1);
+                       }
+                       grid->bounds[b_index].valid = true;
+               }
+       }
+}
+
 static void surfaceGenerateGrid(struct DynamicPaintSurface *surface)
 {
        PaintSurfaceData *sData = surface->data;
        PaintBakeData *bData = sData->bData;
-       Bounds3D *grid_bounds;
        VolumeGrid *grid;
        int grid_cells, axis = 3;
        int *temp_t_index = NULL;
        int *temp_s_num = NULL;
 
-#ifdef _OPENMP
-       int num_of_threads = omp_get_max_threads();
-#else
-       int num_of_threads = 1;
-#endif
-
        if (bData->grid)
                freeGrid(sData);
 
-       /* allocate separate bounds for each thread */
-       grid_bounds = MEM_callocN(sizeof(Bounds3D) * num_of_threads, "Grid 
Bounds");
        bData->grid = MEM_callocN(sizeof(VolumeGrid), "Surface Grid");
        grid = bData->grid;
 
@@ -634,27 +703,16 @@ static void surfaceGenerateGrid(struct 
DynamicPaintSurface *surface)
                float min_dim;
 
                /* calculate canvas dimensions */
-#pragma omp parallel for schedule(static)
-               for (i = 0; i < sData->total_points; i++) {
-#ifdef _OPENMP
-                       int id = omp_get_thread_num();
-                       boundInsert(&grid_bounds[id], 
(bData->realCoord[bData->s_pos[i]].v));
-#else
-                       boundInsert(&grid_bounds[0], 
(bData->realCoord[bData->s_pos[i]].v));
-#endif
-               }
-
-               /* get final dimensions */
-               for (i = 0; i < num_of_threads; i++) {
-                       boundInsert(&grid->grid_bounds, grid_bounds[i].min);
-                       boundInsert(&grid->grid_bounds, grid_bounds[i].max);
-               }
-
-               MEM_freeN(grid_bounds);
+               /* Important to init correctly our ref grid_bound... */
+               boundInsert(&grid->grid_bounds, 
bData->realCoord[bData->s_pos[0]].v);
+               BLI_task_parallel_range_finalize(
+                           0, sData->total_points, bData, &grid->grid_bounds, 
sizeof(grid->grid_bounds),
+                           grid_bound_insert_cb_ex, 
grid_bound_insert_finalize, sData->total_points > 1000, false);
 
                /* get dimensions */
                sub_v3_v3v3(dim, grid->grid_bounds.max, grid->grid_bounds.min);
                copy_v3_v3(td, dim);
+               copy_v3_v3(bData->dim, dim);
                min_dim = max_fff(td[0], td[1], td[2]) / 1000.f;
 
                /* deactivate zero axises */
@@ -687,10 +745,11 @@ static void surfaceGenerateGrid(struct 
DynamicPaintSurface *surface)
                /* allocate memory for grids */
                grid->bounds = MEM_callocN(sizeof(Bounds3D) * grid_cells, 
"Surface Grid Bounds");
                grid->s_pos = MEM_callocN(sizeof(int) * grid_cells, "Surface 
Grid Position");
-               grid->s_num = MEM_callocN(sizeof(int) * grid_cells * 
num_of_threads, "Surface Grid Points");
+
+               grid->s_num = MEM_callocN(sizeof(int) * grid_cells, "Surface 
Grid Points");
                temp_s_num = MEM_callocN(sizeof(int) * grid_cells, "Temp 
Surface Grid Points");
                grid->t_index = MEM_callocN(sizeof(int) * sData->total_points, 
"Surface Grid Target Ids");
-               temp_t_index = MEM_callocN(sizeof(int) * sData->total_points, 
"Temp Surface Grid Target Ids");
+               grid->temp_t_index = temp_t_index = MEM_callocN(sizeof(int) * 
sData->total_points, "Temp Surface Grid Target Ids");
 
                /* in case of an allocation failure abort here */
                if (!grid->bounds || !grid->s_pos || !grid->s_num || 
!grid->t_index || !temp_s_num || !temp_t_index)
@@ -698,33 +757,12 @@ static void surfaceGenerateGrid(struct 
DynamicPaintSurface *surface)
 
                if (!error) {
                        /* calculate number of points withing each cell */
-#pragma omp parallel for schedule(static)
-                       for (i = 0; i < sData->total_points; i++) {
-                               int co[3], j;
-                               for (j = 0; j < 3; j++) {
-                                       co[j] = 
(int)floor((bData->realCoord[bData->s_pos[i]].v[j] - grid->grid_bounds.min[j]) 
/ dim[j] * grid->dim[j]);
-                                       CLAMP(co[j], 0, grid->dim[j] - 1);
-                               }
-
-                               temp_t_index[i] = co[0] + co[1] * grid->dim[0] 
+ co[2] * grid->dim[0] * grid->dim[1];
-#ifdef _OPENMP
-                               grid->s_num[temp_t_index[i] + 
omp_get_thread_num() * grid_cells]++;
-#else
-                               grid->s_num[temp_t_index[i]]++;
-#endif
-                       }
-
-                       /* for first cell only calc s_num */
-                       for (i = 1; i < num_of_threads; i++) {
-                               grid->s_num[0] += grid->s_num[i * grid_cells];
-                       }
+                       BLI_task_parallel_range_finalize(
+                                   0, sData->total_points, bData, grid->s_num, 
sizeof(*grid->s_num) * grid_cells,
+                                   grid_cell_points_cb_ex, 
grid_cell_points_finalize, sData->total_points > 1000, false);
 
-                       /* calculate grid indexes */
+                       /* calculate grid indexes (not needed for first cell, 
which is zero). */
                        for (i = 1; i < grid_cells; i++) {
-                               int id;
-                               for (id = 1; id < num_of_threads; id++) {
-                                       grid->s_num[i] += grid->s_num[i + id * 
grid_cells];
-                               }
                                grid->s_pos[i] = grid->s_pos[i - 1] + 
grid->s_num[i - 1];
                        }
 
@@ -737,35 +775,14 @@ static void surfaceGenerateGrid(struct 
DynamicPaintSurface *surface)
                        }
 
                        /* calculate cell bounds */
-                       {
-                               int x;
-#pragma omp parallel for schedule(static)
-                               for (x = 0; x < grid->dim[0]; x++) {
-                                       int y;
-                                       for (y = 0; y < grid->dim[1]; y++) {
-                                               int z;
-                                               for (z = 0; z < grid->dim[2]; 
z++) {
-                                                       int j, b_index = x + y 
* grid->dim[0] + z * grid->dim[0] * grid->dim[1];
-                                                       /* set bounds */
-                                                       for (j = 0; j < 3; j++) 
{
-                                                               int s = (j == 
0) ? x : ((j == 1) ? y : z);
-                                                               
grid->bounds[b_index].min[j] = grid->grid_bounds.min[j] + dim[j] / grid->dim[j] 
* s;
-                                                               
grid->bounds[b_index].max[j] = grid->grid_bounds.min[j] + dim[j] / grid->dim[j] 
* (s + 1);
-                                                       }
-                                                       
grid->bounds[b_index].valid = 1;
-                                               }
-                                       }
-                               }
-                       }
+                       BLI_task_parallel_range(0, grid->dim[0], bData, 
grid_cell_bounds_cb, grid_cells > 1000);
                }
 
                if (temp_s_num)
                        MEM_freeN(temp_s_num);
                if (temp_t_index)
                        MEM_freeN(temp_t_index);
-
-               /* free per thread s_num values */
-               grid->s_num = MEM_reallocN(grid->s_num, sizeof(int) * 
grid_cells);
+               grid->temp_t_index = 

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [ef25a1e] compositor-2016: DynamicPaint: massive refactor, replace all OpenMP usage by BLI_task and other optimizations.

Reply via email to