[Bf-blender-cvs] [b871160] master: OMP -> BLI_task: BKE's pbvh.c

Bastien Montagne Wed, 27 Jan 2016 03:27:28 -0800

Commit: b871160dd962ac05b31456bc2f4c8f8fd45a839d
Author: Bastien Montagne
Date:   Wed Jan 27 12:14:00 2016 +0100
Branches: master
https://developer.blender.org/rBb871160dd962ac05b31456bc2f4c8f8fd45a839d


OMP -> BLI_task: BKE's pbvh.c

Should be the last bit of sculpt/paint code, now this is fully using BLI_task.

Note that PBVH normals update is now about 20% quicker than with OMP code
(from 27ms to 21ms with a big stroke over a 500k vertices monkey), even though
a missing thread-protection was added... atomic primitives ftw!

For the records, with the missing `#pragma omp critical` section added,
previous code was like four times slower (above 100ms).

===================================================================

M       source/blender/blenkernel/intern/pbvh.c

===================================================================

diff --git a/source/blender/blenkernel/intern/pbvh.c 
b/source/blender/blenkernel/intern/pbvh.c
index ba56af8..a13daf2 100644
--- a/source/blender/blenkernel/intern/pbvh.c
+++ b/source/blender/blenkernel/intern/pbvh.c
@@ -30,6 +30,7 @@
 #include "BLI_math.h"
 #include "BLI_utildefines.h"
 #include "BLI_ghash.h"
+#include "BLI_task.h"
 
 #include "BKE_pbvh.h"
 #include "BKE_ccg.h"
@@ -42,6 +43,8 @@
 
 #include "bmesh.h"
 
+#include "atomic_ops.h"
+
 #include "pbvh_intern.h"
 
 #include <limits.h>
@@ -52,14 +55,7 @@
 
 #define STACK_FIXED_DEPTH   100
 
-/* Setting zero so we can catch bugs in OpenMP/PBVH. */
-#ifdef _OPENMP
-#  ifdef DEBUG
-#    define PBVH_OMP_LIMIT 0
-#  else
-#    define PBVH_OMP_LIMIT 8
-#  endif
-#endif
+#define PBVH_THREADED_LIMIT 4
 
 typedef struct PBVHStack {
        PBVHNode *node;
@@ -931,13 +927,112 @@ static bool update_search_cb(PBVHNode *node, void 
*data_v)
        return true;
 }
 
+typedef struct PBVHUpdateData {
+       PBVH *bvh;
+       PBVHNode **nodes;
+       int totnode;
+
+       float (*fnors)[3];
+       float (*vnors)[3];
+       int flag;
+} PBVHUpdateData;
+
+static void pbvh_update_normals_accum_task_cb(void *userdata, const int n)
+{
+       PBVHUpdateData *data = userdata;
+
+       PBVH *bvh = data->bvh;
+       PBVHNode *node = data->nodes[n];
+       float (*fnors)[3] = data->fnors;
+       float (*vnors)[3] = data->vnors;
+
+       if ((node->flag & PBVH_UpdateNormals)) {
+               unsigned int mpoly_prev = UINT_MAX;
+               float fn[3];
+
+               const int *faces = node->prim_indices;
+               const int totface = node->totprim;
+
+               for (int i = 0; i < totface; ++i) {
+                       const MLoopTri *lt = &bvh->looptri[faces[i]];
+                       const unsigned int vtri[3] = {
+                               bvh->mloop[lt->tri[0]].v,
+                               bvh->mloop[lt->tri[1]].v,
+                               bvh->mloop[lt->tri[2]].v,
+                       };
+                       const int sides = 3;
+
+                       /* Face normal and mask */
+                       if (lt->poly != mpoly_prev) {
+                               const MPoly *mp = &bvh->mpoly[lt->poly];
+                               BKE_mesh_calc_poly_normal(mp, 
&bvh->mloop[mp->loopstart], bvh->verts, fn);
+                               mpoly_prev = lt->poly;
+
+                               if (fnors) {
+                                       /* We can assume a face is only present 
in one node ever. */
+                                       copy_v3_v3(fnors[lt->poly], fn);
+                               }
+                       }
+
+                       for (int j = sides; j--; ) {
+                               const int v = vtri[j];
+
+                               if (bvh->verts[v].flag & ME_VERT_PBVH_UPDATE) {
+                                       /* Note: This avoids `lock, add_v3_v3, 
unlock` and is five to ten times quicker than a spinlock.
+                                        *       Not exact equivalent though, 
since atomicity is only ensured for one component
+                                        *       of the vector at a time, but 
here it shall not make any sensible difference. */
+                                       for (int k = 3; k--; ) {
+                                               /* Atomic float addition.
+                                                * Note that since collision 
are unlikely, loop will nearly always run once. */
+                                               float oldval, newval;
+                                               uint32_t prevval;
+                                               do {
+                                                       oldval = vnors[v][k];
+                                                       newval = oldval + fn[k];
+                                                       prevval = 
atomic_cas_uint32(
+                                                                     (uint32_t 
*)&vnors[v][k], *(uint32_t *)(&oldval), *(uint32_t *)(&newval));
+                                               } while (UNLIKELY(prevval != 
*(uint32_t *)(&oldval)));
+                                       }
+                               }
+                       }
+               }
+       }
+}
+
+static void pbvh_update_normals_store_task_cb(void *userdata, const int n)
+{
+       PBVHUpdateData *data = userdata;
+       PBVH *bvh = data->bvh;
+       PBVHNode *node = data->nodes[n];
+       float (*vnors)[3] = data->vnors;
+
+       if (node->flag & PBVH_UpdateNormals) {
+               const int *verts = node->vert_indices;
+               const int totvert = node->uniq_verts;
+
+               for (int i = 0; i < totvert; ++i) {
+                       const int v = verts[i];
+                       MVert *mvert = &bvh->verts[v];
+
+                       /* mvert is shared between nodes, hence between 
threads. */
+                       if (atomic_fetch_and_and_uint8((uint8_t *)&mvert->flag, 
(uint8_t)~ME_VERT_PBVH_UPDATE) & ME_VERT_PBVH_UPDATE)
+                       {
+                               normalize_v3(vnors[v]);
+                               normal_float_to_short_v3(mvert->no, vnors[v]);
+                       }
+               }
+
+               node->flag &= ~PBVH_UpdateNormals;
+       }
+}
+
 static void pbvh_update_normals(PBVH *bvh, PBVHNode **nodes,
-                                int totnode, float (*face_nors)[3])
+                                int totnode, float (*fnors)[3])
 {
-       float (*vnor)[3];
+       float (*vnors)[3];
 
        if (bvh->type == PBVH_BMESH) {
-               BLI_assert(face_nors == NULL);
+               BLI_assert(fnors == NULL);
                pbvh_bmesh_normals_update(nodes, totnode);
                return;
        }
@@ -947,7 +1042,7 @@ static void pbvh_update_normals(PBVH *bvh, PBVHNode 
**nodes,
 
        /* could be per node to save some memory, but also means
         * we have to store for each vertex which node it is in */
-       vnor = MEM_callocN(sizeof(float) * 3 * bvh->totvert, "bvh temp vnors");
+       vnors = MEM_callocN(sizeof(*vnors) * bvh->totvert, __func__);
 
        /* subtle assumptions:
         * - We know that for all edited vertices, the nodes with faces
@@ -959,104 +1054,46 @@ static void pbvh_update_normals(PBVH *bvh, PBVHNode 
**nodes,
         *   can only update vertices marked with ME_VERT_PBVH_UPDATE.
         */
 
-       int n;
-#pragma omp parallel for private(n) schedule(static) if (totnode > 
PBVH_OMP_LIMIT)
-       for (n = 0; n < totnode; n++) {
-               PBVHNode *node = nodes[n];
-
-               if ((node->flag & PBVH_UpdateNormals)) {
-                       unsigned int mpoly_prev = UINT_MAX;
-                       float fn[3];
-
-                       const int *faces = node->prim_indices;
-                       const int totface = node->totprim;
-
-                       for (int i = 0; i < totface; ++i) {
-                               const MLoopTri *lt = &bvh->looptri[faces[i]];
-                               const unsigned int vtri[3] = {
-                                   bvh->mloop[lt->tri[0]].v,
-                                   bvh->mloop[lt->tri[1]].v,
-                                   bvh->mloop[lt->tri[2]].v,
-                               };
-                               const int sides = 3;
-
-                               /* Face normal and mask */
-                               if (lt->poly != mpoly_prev) {
-                                       const MPoly *mp = &bvh->mpoly[lt->poly];
-                                       BKE_mesh_calc_poly_normal(mp, 
&bvh->mloop[mp->loopstart], bvh->verts, fn);
-                                       mpoly_prev = lt->poly;
-
-                                       if (face_nors) {
-                                               copy_v3_v3(face_nors[lt->poly], 
fn);
-                                       }
-                               }
+       PBVHUpdateData data = {
+           .bvh = bvh, .nodes = nodes,
+               .fnors = fnors, .vnors = vnors,
+       };
 
-                               for (int j = 0; j < sides; ++j) {
-                                       int v = vtri[j];
-
-                                       if (bvh->verts[v].flag & 
ME_VERT_PBVH_UPDATE) {
-                                               /* this seems like it could be 
very slow but profile
-                                                * does not show this, so just 
leave it for now? */
-#pragma omp atomic
-                                               vnor[v][0] += fn[0];
-#pragma omp atomic
-                                               vnor[v][1] += fn[1];
-#pragma omp atomic
-                                               vnor[v][2] += fn[2];
-                                       }
-                               }
-                       }
-               }
-       }
-
-#pragma omp parallel for private(n) schedule(static) if (totnode > 
PBVH_OMP_LIMIT)
-       for (n = 0; n < totnode; n++) {
-               PBVHNode *node = nodes[n];
-
-               if (node->flag & PBVH_UpdateNormals) {
-                       const int *verts = node->vert_indices;
-                       const int totvert = node->uniq_verts;
+       BLI_task_parallel_range(0, totnode, &data, 
pbvh_update_normals_accum_task_cb, totnode > PBVH_THREADED_LIMIT);
 
-                       for (int i = 0; i < totvert; ++i) {
-                               const int v = verts[i];
-                               MVert *mvert = &bvh->verts[v];
+       BLI_task_parallel_range(0, totnode, &data, 
pbvh_update_normals_store_task_cb, totnode > PBVH_THREADED_LIMIT);
 
-                               if (mvert->flag & ME_VERT_PBVH_UPDATE) {
-                                       float no[3];
+       MEM_freeN(vnors);
+}
 
-                                       copy_v3_v3(no, vnor[v]);
-                                       normalize_v3(no);
-                                       normal_float_to_short_v3(mvert->no, no);
+static void pbvh_update_BB_redraw_task_cb(void *userdata, const int n)
+{
+       PBVHUpdateData *data = userdata;
+       PBVH *bvh = data->bvh;
+       PBVHNode *node = data->nodes[n];
+       const int flag = data->flag;
 
-                                       mvert->flag &= ~ME_VERT_PBVH_UPDATE;
-                               }
-                       }
+       if ((flag & PBVH_UpdateBB) && (node->flag & PBVH_UpdateBB))
+               /* don't clear flag yet, leave it for flushing later */
+               /* Note that bvh usage is read-only here, so no need to 
thread-protect it. */
+               update_node_vb(bvh, node);
 
-                       node->flag &= ~PBVH_UpdateNormals;
-               }
-       }
+       if ((flag & PBVH_UpdateOriginalBB) && (node->flag & 
PBVH_UpdateOriginalBB))
+               node->orig_vb = node->vb;
 
-       MEM_freeN(vnor);
+       if ((flag & PBVH_UpdateRedraw) && (node->flag & PBVH_UpdateRedraw))
+               node->flag &= ~PBVH_UpdateRedraw;
 }
 
 void pbvh_update_BB_redraw(PBVH *bvh, PBVHNode **nodes, int totnode, int flag)
 {
        /* update BB, redraw flag */
-       int n;
-#pragma omp parallel for private(n) schedule(static) if (totnode > 
PBVH_OMP_LIMIT)
-       for (n = 0; n < totnode; n++) {
-               PBVHNode *node = nodes[n];
-
-               if ((flag & PBVH_UpdateBB) && (node->flag & PBVH_UpdateBB))
-                       /* don't clear flag yet, leave it for flushing later */
-                       update_node_vb(bvh, node);
+       PBVHUpdateData data = {
+           .bvh = bvh, .nodes = nodes,
+               .flag = flag,
+       };
 
-               if ((flag & PBVH_UpdateOriginalBB) && (node->flag & 
PBVH_UpdateOriginalBB))
-                       node->orig_vb = node->vb;
-
-               if ((flag & PBVH_UpdateRedraw) && (node->flag & 
PBVH_UpdateRedraw))
-                       node->flag &= ~PBVH_UpdateRedraw;
-       }
+       BLI_task_parallel_range(0, totnode, &data, 
pbvh_update_BB_redraw_task_cb, totnode > PBVH_THREADED_LIMIT);
 }
 
 static void pbvh_update_draw_buffers(PBVH *bvh, PBVHNode **nodes, int totnode)
@@ -1174,7 +1211,7 @@ static int pbvh_flush_bb(PBVH *bvh, PBVHNode *node, int 
flag)
        return update;
 }
 
-void BKE_pbvh_update(PBVH *bvh, int flag, float (*face_nors)[3])
+void BKE_pbvh_update(PBVH *bvh, int flag, float (*fnors)[3])
 {
        if (!bvh->nodes)
                return;
@@ -1186,7 +1223,7 @@ void BKE_pbvh_update(PBVH *bvh, int flag, float 
(*face_nors)[3])
                               &nodes, &totnode);
 
        if (flag & PBVH_UpdateNormals)
-               pbvh_update_normals(bvh, nodes, totnode, face_nors);
+               pbvh_update_normals(bvh, nodes, totnode, fnors);
 
        if (flag & (PBVH_UpdateBB | PBVH_UpdateOriginalBB | PBVH_UpdateRedraw))
                pbvh_update_BB_redraw(bvh, nodes, totnode, flag);
@@ -1774,7 +1811,7 @@ static void pbvh_node_check_diffuse_changed(PBVH *bvh, 
PBVHNode *node)
                node->flag |= PBVH_UpdateDrawBuffers;
 }
 
-void BKE_pbvh_draw(PBVH *bvh, float (*planes)[4], float (*face_nors)[3],
+void BKE_pbvh_draw(PBVH *bvh, float (*planes)[4], float (*fnors)[3],
                    DMSetMaterial setMaterial, bool wireframe, bool fast)
 {
        PBVHNodeDrawData draw_data = {setMaterial, wireframe, fast};
@@ -1787,7 +1824,7 @@ void BKE_pbvh_draw(PBVH *bvh, float (*planes)[4], float 
(*face_nors)[3],
        BKE_pbvh_search_gather(bvh, update_search_cb, 
SET_INT_IN_POINTER(PBVH_UpdateNormals | PBVH_UpdateDrawBuffers),
                               &nodes, &totnode);
 
-       pbvh_update_normals(bvh, nodes, totnode, face_nors);
+       pbvh_update_normals(bvh, nodes, totnode, fnors);
        pbvh_update_draw_buffers(bvh, nodes, totnode);


@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
http://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [b871160] master: OMP -> BLI_task: BKE's pbvh.c

Reply via email to