Commit: ef25a1ed871e96397ea8e4c856ed1236d5051404
Author: Bastien Montagne
Date: Sat May 21 15:22:04 2016 +0200
Branches: compositor-2016
https://developer.blender.org/rBef25a1ed871e96397ea8e4c856ed1236d5051404
DynamicPaint: massive refactor, replace all OpenMP usage by BLI_task and other
optimizations.
This commit makes Dynamicpaint modifier evaluation (during playback) a few
percents quicker.
However, it makes dynapaint's 'image sequence' baking about 33% quicker (from
119 to 77 seconds
in own heavy test), partly due to switch to BLI_task itself (about 20%), and
partly due to
optimizations (remaining ~13%).
As usual, did a lot of tests here to ensure nothing is broken, but a lot more
users' testing would definitively
be welcome too! ;)
Note that some quite meaningless omp forloops have been removed (parallelizing
thousands of vec copy does
make it two or three times quicker, but the few hundreds of microseconds gained
do not make any difference
in a hundreds millisecond process).
Also, this code could still use a lot more cleanup (naming etc.), the way it
(tries to) handle malloc faults
is also totally flacky and makes the code horribly verbose and convoluted in
some places - without actually
catching all possible faults (memarena could make it more easy to handle here),
etc.
===================================================================
M source/blender/blenkernel/intern/dynamicpaint.c
===================================================================
diff --git a/source/blender/blenkernel/intern/dynamicpaint.c
b/source/blender/blenkernel/intern/dynamicpaint.c
index 8af9750..2dc0388 100644
--- a/source/blender/blenkernel/intern/dynamicpaint.c
+++ b/source/blender/blenkernel/intern/dynamicpaint.c
@@ -65,6 +65,7 @@
#include "BKE_image.h"
#include "BKE_main.h"
#include "BKE_material.h"
+#include "BKE_mesh_mapping.h"
#include "BKE_modifier.h"
#include "BKE_object.h"
#include "BKE_particle.h"
@@ -141,8 +142,8 @@ typedef struct Bounds2D {
} Bounds2D;
typedef struct Bounds3D {
- int valid;
float min[3], max[3];
+ bool valid;
} Bounds3D;
typedef struct VolumeGrid {
@@ -153,6 +154,8 @@ typedef struct VolumeGrid {
int *s_pos; /* (x*y*z) t_index begin id */
int *s_num; /* (x*y*z) number of t_index points */
int *t_index; /* actual surface point index, access: (s_pos + s_num) */
+
+ int *temp_t_index;
} VolumeGrid;
typedef struct Vec3f {
@@ -178,6 +181,7 @@ typedef struct PaintBakeData {
int *s_num; /* num of realCoord samples */
Vec3f *realCoord; /* current pixel center world-space coordinates for
each sample ordered as (s_pos + s_num) */
Bounds3D mesh_bounds;
+ float dim[3];
/* adjacency info */
BakeAdjPoint *bNeighs; /* current global neighbor distances and
directions, if required */
@@ -193,7 +197,6 @@ typedef struct PaintBakeData {
MVert *prev_verts; /* copy of previous frame vertices. used to
observe surface movement */
float prev_obmat[4][4]; /* previous frame object matrix */
int clear; /* flag to check if surface was cleared/reset
-> have to redo velocity etc. */
-
} PaintBakeData;
/* UV Image sequence format point */
@@ -422,7 +425,8 @@ static int surface_totalSamples(DynamicPaintSurface
*surface)
return surface->data->total_points;
}
-static void blendColors(const float t_color[3], float t_alpha, const float
s_color[3], float s_alpha, float result[4])
+static void blendColors(
+ const float t_color[3], const float t_alpha, const float s_color[3],
const float s_alpha, float result[4])
{
/* Same thing as BLI's blend_color_mix_float(), but for
non-premultiplied alpha. */
int i;
@@ -576,7 +580,7 @@ static void boundInsert(Bounds3D *b, float point[3])
if (!b->valid) {
copy_v3_v3(b->min, point);
copy_v3_v3(b->max, point);
- b->valid = 1;
+ b->valid = true;
return;
}
@@ -603,27 +607,92 @@ static void freeGrid(PaintSurfaceData *data)
bData->grid = NULL;
}
+static void grid_bound_insert_cb_ex(void *userdata, void *userdata_chunk,
const int i, const int UNUSED(thread_id))
+{
+ PaintBakeData *bData = userdata;
+
+ Bounds3D *grid_bound = userdata_chunk;
+
+ boundInsert(grid_bound, bData->realCoord[bData->s_pos[i]].v);
+}
+
+static void grid_bound_insert_finalize(void *userdata, void *userdata_chunk)
+{
+ PaintBakeData *bData = userdata;
+ VolumeGrid *grid = bData->grid;
+
+ Bounds3D *grid_bound = userdata_chunk;
+
+ boundInsert(&grid->grid_bounds, grid_bound->min);
+ boundInsert(&grid->grid_bounds, grid_bound->max);
+}
+
+static void grid_cell_points_cb_ex(void *userdata, void *userdata_chunk, const
int i, const int UNUSED(thread_id))
+{
+ PaintBakeData *bData = userdata;
+ VolumeGrid *grid = bData->grid;
+ int *temp_t_index = grid->temp_t_index;
+ int *s_num = userdata_chunk;
+
+ int co[3];
+
+ for (int j = 3; j--;) {
+ co[j] = (int)floorf((bData->realCoord[bData->s_pos[i]].v[j] -
grid->grid_bounds.min[j]) /
+ bData->dim[j] * grid->dim[j]);
+ CLAMP(co[j], 0, grid->dim[j] - 1);
+ }
+
+ temp_t_index[i] = co[0] + co[1] * grid->dim[0] + co[2] * grid->dim[0] *
grid->dim[1];
+ s_num[temp_t_index[i]]++;
+}
+
+static void grid_cell_points_finalize(void *userdata, void *userdata_chunk)
+{
+ PaintBakeData *bData = userdata;
+ VolumeGrid *grid = bData->grid;
+ const int grid_cells = grid->dim[0] * grid->dim[1] * grid->dim[2];
+
+ int *s_num = userdata_chunk;
+
+ /* calculate grid indexes */
+ for (int i = 0; i < grid_cells; i++) {
+ grid->s_num[i] += s_num[i];
+ }
+}
+
+static void grid_cell_bounds_cb(void *userdata, const int x)
+{
+ PaintBakeData *bData = userdata;
+ VolumeGrid *grid = bData->grid;
+ float *dim = bData->dim;
+ int *grid_dim = grid->dim;
+
+ for (int y = 0; y < grid_dim[1]; y++) {
+ for (int z = 0; z < grid_dim[2]; z++) {
+ const int b_index = x + y * grid_dim[0] + z *
grid_dim[0] * grid_dim[1];
+ /* set bounds */
+ for (int j = 3; j--;) {
+ const int s = (j == 0) ? x : ((j == 1) ? y : z);
+ grid->bounds[b_index].min[j] =
grid->grid_bounds.min[j] + dim[j] / grid_dim[j] * s;
+ grid->bounds[b_index].max[j] =
grid->grid_bounds.min[j] + dim[j] / grid_dim[j] * (s + 1);
+ }
+ grid->bounds[b_index].valid = true;
+ }
+ }
+}
+
static void surfaceGenerateGrid(struct DynamicPaintSurface *surface)
{
PaintSurfaceData *sData = surface->data;
PaintBakeData *bData = sData->bData;
- Bounds3D *grid_bounds;
VolumeGrid *grid;
int grid_cells, axis = 3;
int *temp_t_index = NULL;
int *temp_s_num = NULL;
-#ifdef _OPENMP
- int num_of_threads = omp_get_max_threads();
-#else
- int num_of_threads = 1;
-#endif
-
if (bData->grid)
freeGrid(sData);
- /* allocate separate bounds for each thread */
- grid_bounds = MEM_callocN(sizeof(Bounds3D) * num_of_threads, "Grid
Bounds");
bData->grid = MEM_callocN(sizeof(VolumeGrid), "Surface Grid");
grid = bData->grid;
@@ -634,27 +703,16 @@ static void surfaceGenerateGrid(struct
DynamicPaintSurface *surface)
float min_dim;
/* calculate canvas dimensions */
-#pragma omp parallel for schedule(static)
- for (i = 0; i < sData->total_points; i++) {
-#ifdef _OPENMP
- int id = omp_get_thread_num();
- boundInsert(&grid_bounds[id],
(bData->realCoord[bData->s_pos[i]].v));
-#else
- boundInsert(&grid_bounds[0],
(bData->realCoord[bData->s_pos[i]].v));
-#endif
- }
-
- /* get final dimensions */
- for (i = 0; i < num_of_threads; i++) {
- boundInsert(&grid->grid_bounds, grid_bounds[i].min);
- boundInsert(&grid->grid_bounds, grid_bounds[i].max);
- }
-
- MEM_freeN(grid_bounds);
+ /* Important to init correctly our ref grid_bound... */
+ boundInsert(&grid->grid_bounds,
bData->realCoord[bData->s_pos[0]].v);
+ BLI_task_parallel_range_finalize(
+ 0, sData->total_points, bData, &grid->grid_bounds,
sizeof(grid->grid_bounds),
+ grid_bound_insert_cb_ex,
grid_bound_insert_finalize, sData->total_points > 1000, false);
/* get dimensions */
sub_v3_v3v3(dim, grid->grid_bounds.max, grid->grid_bounds.min);
copy_v3_v3(td, dim);
+ copy_v3_v3(bData->dim, dim);
min_dim = max_fff(td[0], td[1], td[2]) / 1000.f;
/* deactivate zero axises */
@@ -687,10 +745,11 @@ static void surfaceGenerateGrid(struct
DynamicPaintSurface *surface)
/* allocate memory for grids */
grid->bounds = MEM_callocN(sizeof(Bounds3D) * grid_cells,
"Surface Grid Bounds");
grid->s_pos = MEM_callocN(sizeof(int) * grid_cells, "Surface
Grid Position");
- grid->s_num = MEM_callocN(sizeof(int) * grid_cells *
num_of_threads, "Surface Grid Points");
+
+ grid->s_num = MEM_callocN(sizeof(int) * grid_cells, "Surface
Grid Points");
temp_s_num = MEM_callocN(sizeof(int) * grid_cells, "Temp
Surface Grid Points");
grid->t_index = MEM_callocN(sizeof(int) * sData->total_points,
"Surface Grid Target Ids");
- temp_t_index = MEM_callocN(sizeof(int) * sData->total_points,
"Temp Surface Grid Target Ids");
+ grid->temp_t_index = temp_t_index = MEM_callocN(sizeof(int) *
sData->total_points, "Temp Surface Grid Target Ids");
/* in case of an allocation failure abort here */
if (!grid->bounds || !grid->s_pos || !grid->s_num ||
!grid->t_index || !temp_s_num || !temp_t_index)
@@ -698,33 +757,12 @@ static void surfaceGenerateGrid(struct
DynamicPaintSurface *surface)
if (!error) {
/* calculate number of points withing each cell */
-#pragma omp parallel for schedule(static)
- for (i = 0; i < sData->total_points; i++) {
- int co[3], j;
- for (j = 0; j < 3; j++) {
- co[j] =
(int)floor((bData->realCoord[bData->s_pos[i]].v[j] - grid->grid_bounds.min[j])
/ dim[j] * grid->dim[j]);
- CLAMP(co[j], 0, grid->dim[j] - 1);
- }
-
- temp_t_index[i] = co[0] + co[1] * grid->dim[0]
+ co[2] * grid->dim[0] * grid->dim[1];
-#ifdef _OPENMP
- grid->s_num[temp_t_index[i] +
omp_get_thread_num() * grid_cells]++;
-#else
- grid->s_num[temp_t_index[i]]++;
-#endif
- }
-
- /* for first cell only calc s_num */
- for (i = 1; i < num_of_threads; i++) {
- grid->s_num[0] += grid->s_num[i * grid_cells];
- }
+ BLI_task_parallel_range_finalize(
+ 0, sData->total_points, bData, grid->s_num,
sizeof(*grid->s_num) * grid_cells,
+ grid_cell_points_cb_ex,
grid_cell_points_finalize, sData->total_points > 1000, false);
- /* calculate grid indexes */
+ /* calculate grid indexes (not needed for first cell,
which is zero). */
for (i = 1; i < grid_cells; i++) {
- int id;
- for (id = 1; id < num_of_threads; id++) {
- grid->s_num[i] += grid->s_num[i + id *
grid_cells];
- }
grid->s_pos[i] = grid->s_pos[i - 1] +
grid->s_num[i - 1];
}
@@ -737,35 +775,14 @@ static void surfaceGenerateGrid(struct
DynamicPaintSurface *surface)
}
/* calculate cell bounds */
- {
- int x;
-#pragma omp parallel for schedule(static)
- for (x = 0; x < grid->dim[0]; x++) {
- int y;
- for (y = 0; y < grid->dim[1]; y++) {
- int z;
- for (z = 0; z < grid->dim[2];
z++) {
- int j, b_index = x + y
* grid->dim[0] + z * grid->dim[0] * grid->dim[1];
- /* set bounds */
- for (j = 0; j < 3; j++)
{
- int s = (j ==
0) ? x : ((j == 1) ? y : z);
-
grid->bounds[b_index].min[j] = grid->grid_bounds.min[j] + dim[j] / grid->dim[j]
* s;
-
grid->bounds[b_index].max[j] = grid->grid_bounds.min[j] + dim[j] / grid->dim[j]
* (s + 1);
- }
-
grid->bounds[b_index].valid = 1;
- }
- }
- }
- }
+ BLI_task_parallel_range(0, grid->dim[0], bData,
grid_cell_bounds_cb, grid_cells > 1000);
}
if (temp_s_num)
MEM_freeN(temp_s_num);
if (temp_t_index)
MEM_freeN(temp_t_index);
-
- /* free per thread s_num values */
- grid->s_num = MEM_reallocN(grid->s_num, sizeof(int) *
grid_cells);
+ grid->temp_t_index =
@@ Diff output truncated at 10240 characters. @@
_______________________________________________
Bf-blender-cvs mailing list
[email protected]
https://lists.blender.org/mailman/listinfo/bf-blender-cvs