Commit: f12040e088b66748340b0d7472d1155596e0f9e2
Author: Clément Foucault
Date:   Sat Apr 6 01:55:21 2019 +0200
Branches: master
https://developer.blender.org/rBf12040e088b66748340b0d7472d1155596e0f9e2

DRW: Opti: Fix hotspot in DRW_mesh_batch_cache_get_surface_shaded

The hotspot was generated by mesh_cd_layers_type_overlap who was testing
way more data than it should have.

Here we reduce the whole CD layer mask to a 32bit bitflag that is easily
or'ed and tested.

Bonus point: We use atomic operation to merge the mask in order to allow
future multi-threading. (this was a TODO)

In the scene attached to T58188 this removes 5% of CPU time.

===================================================================

M       source/blender/draw/CMakeLists.txt
M       source/blender/draw/intern/draw_cache_impl_mesh.c

===================================================================

diff --git a/source/blender/draw/CMakeLists.txt 
b/source/blender/draw/CMakeLists.txt
index aea08f87a84..3ac606fc48f 100644
--- a/source/blender/draw/CMakeLists.txt
+++ b/source/blender/draw/CMakeLists.txt
@@ -41,6 +41,7 @@ set(INC
 
        ../../../intern/glew-mx
        ../../../intern/guardedalloc
+       ../../../intern/atomic
 )
 
 set(INC_SYS
diff --git a/source/blender/draw/intern/draw_cache_impl_mesh.c 
b/source/blender/draw/intern/draw_cache_impl_mesh.c
index e47d46aca09..5643d23c7cd 100644
--- a/source/blender/draw/intern/draw_cache_impl_mesh.c
+++ b/source/blender/draw/intern/draw_cache_impl_mesh.c
@@ -48,6 +48,7 @@
 #include "BKE_mesh_runtime.h"
 #include "BKE_object_deform.h"
 
+#include "atomic_ops.h"
 
 #include "bmesh.h"
 
@@ -78,6 +79,14 @@ typedef struct DRW_MeshWeightState {
        int   defgroup_sel_count;
 } DRW_MeshWeightState;
 
+typedef struct DRW_MeshCDMask {
+       uint32_t uv : 8;
+       uint32_t tan : 8;
+       uint32_t vcol : 8;
+       uint32_t orco : 1;
+       uint32_t tan_orco : 1;
+} DRW_MeshCDMask;
+
 /* DRW_MeshWeightState.flags */
 enum {
        DRW_MESH_WEIGHT_STATE_MULTIPAINT          = (1 << 0),
@@ -354,72 +363,63 @@ BLI_INLINE bool bm_edge_is_loose_and_visible(const BMEdge 
*e)
 }
 
 /* Return true is all layers in _b_ are inside _a_. */
-static bool mesh_cd_layers_type_overlap(
-        const uchar av[CD_NUMTYPES], const ushort al[CD_NUMTYPES],
-        const uchar bv[CD_NUMTYPES], const ushort bl[CD_NUMTYPES])
+BLI_INLINE bool mesh_cd_layers_type_overlap(DRW_MeshCDMask a, DRW_MeshCDMask b)
 {
-       for (int i = 0; i < CD_NUMTYPES; ++i) {
-               if ((av[i] & bv[i]) != bv[i]) {
-                       return false;
-               }
-               if ((al[i] & bl[i]) != bl[i]) {
-                       return false;
-               }
-       }
-       return true;
+       return (*((uint32_t *)&a) & *((uint32_t *)&b)) == *((uint32_t *)&b);
 }
 
-static void mesh_cd_layers_type_merge(
-        uchar av[CD_NUMTYPES], ushort al[CD_NUMTYPES],
-        uchar bv[CD_NUMTYPES], ushort bl[CD_NUMTYPES])
+BLI_INLINE void mesh_cd_layers_type_merge(DRW_MeshCDMask *a, DRW_MeshCDMask b)
 {
-       for (int i = 0; i < CD_NUMTYPES; ++i) {
-               av[i] |= bv[i];
-               al[i] |= bl[i];
-       }
+       atomic_fetch_and_or_uint32((uint32_t *)a, *(uint32_t *)&b);
+}
+
+BLI_INLINE void mesh_cd_layers_type_clear(DRW_MeshCDMask *a)
+{
+       *((uint32_t *)a) = 0;
 }
 
 static void mesh_cd_calc_active_uv_layer(
-        const Mesh *me, ushort cd_lused[CD_NUMTYPES])
+        const Mesh *me, DRW_MeshCDMask *cd_used)
 {
        const CustomData *cd_ldata = (me->edit_mesh) ? 
&me->edit_mesh->bm->ldata : &me->ldata;
 
        int layer = CustomData_get_active_layer(cd_ldata, CD_MLOOPUV);
        if (layer != -1) {
-               cd_lused[CD_MLOOPUV] |= (1 << layer);
+               cd_used->uv |= (1 << layer);
        }
 }
 
 static void mesh_cd_calc_active_mask_uv_layer(
-        const Mesh *me, ushort cd_lused[CD_NUMTYPES])
+        const Mesh *me, DRW_MeshCDMask *cd_used)
 {
        const CustomData *cd_ldata = (me->edit_mesh) ? 
&me->edit_mesh->bm->ldata : &me->ldata;
 
        int layer = CustomData_get_stencil_layer(cd_ldata, CD_MLOOPUV);
        if (layer != -1) {
-               cd_lused[CD_MLOOPUV] |= (1 << layer);
+               cd_used->uv |= (1 << layer);
        }
 }
 
 static void mesh_cd_calc_active_vcol_layer(
-        const Mesh *me, ushort cd_lused[CD_NUMTYPES])
+        const Mesh *me, DRW_MeshCDMask *cd_used)
 {
        const CustomData *cd_ldata = (me->edit_mesh) ? 
&me->edit_mesh->bm->ldata : &me->ldata;
 
        int layer = CustomData_get_active_layer(cd_ldata, CD_MLOOPCOL);
        if (layer != -1) {
-               cd_lused[CD_MLOOPCOL] |= (1 << layer);
+               cd_used->vcol |= (1 << layer);
        }
 }
 
-static void mesh_cd_calc_used_gpu_layers(
-        const Mesh *me, uchar cd_vused[CD_NUMTYPES], ushort 
cd_lused[CD_NUMTYPES],
-        struct GPUMaterial **gpumat_array, int gpumat_array_len)
+static DRW_MeshCDMask mesh_cd_calc_used_gpu_layers(
+        const Mesh *me, struct GPUMaterial **gpumat_array, int 
gpumat_array_len)
 {
        const CustomData *cd_ldata = (me->edit_mesh) ? 
&me->edit_mesh->bm->ldata : &me->ldata;
 
        /* See: DM_vertex_attributes_from_gpu for similar logic */
        GPUVertAttrLayers gpu_attrs = {{{0}}};
+       DRW_MeshCDMask cd_used;
+       mesh_cd_layers_type_clear(&cd_used);
 
        for (int i = 0; i < gpumat_array_len; i++) {
                GPUMaterial *gpumat = gpumat_array[i];
@@ -468,7 +468,7 @@ static void mesh_cd_calc_used_gpu_layers(
                                                                
CustomData_get_active_layer(cd_ldata, CD_MLOOPUV);
                                                }
                                                if (layer != -1) {
-                                                       cd_lused[CD_MLOOPUV] |= 
(1 << layer);
+                                                       cd_used.uv |= (1 << 
layer);
                                                }
                                                break;
                                        }
@@ -485,12 +485,12 @@ static void mesh_cd_calc_used_gpu_layers(
                                                        }
                                                }
                                                if (layer != -1) {
-                                                       cd_lused[CD_TANGENT] |= 
(1 << layer);
+                                                       cd_used.tan |= (1 << 
layer);
                                                }
                                                else {
                                                        /* no UV layers at all 
=> requesting orco */
-                                                       cd_lused[CD_TANGENT] |= 
DM_TANGENT_MASK_ORCO;
-                                                       cd_vused[CD_ORCO] |= 1;
+                                                       cd_used.tan_orco = 1;
+                                                       cd_used.orco = 1;
                                                }
                                                break;
                                        }
@@ -502,19 +502,20 @@ static void mesh_cd_calc_used_gpu_layers(
                                                                
CustomData_get_active_layer(cd_ldata, CD_MLOOPCOL);
                                                }
                                                if (layer != -1) {
-                                                       cd_lused[CD_MLOOPCOL] 
|= (1 << layer);
+                                                       cd_used.vcol |= (1 << 
layer);
                                                }
                                                break;
                                        }
                                        case CD_ORCO:
                                        {
-                                               cd_vused[CD_ORCO] |= 1;
+                                               cd_used.orco = 1;
                                                break;
                                        }
                                }
                        }
                }
        }
+       return cd_used;
 }
 
 
@@ -544,13 +545,13 @@ static void mesh_render_calc_normals_loop_and_poly(const 
Mesh *me, const float s
 }
 
 static void mesh_cd_extract_auto_layers_names_and_srgb(
-        Mesh *me, const ushort cd_lused[CD_NUMTYPES],
+        Mesh *me, DRW_MeshCDMask cd_used,
         char **r_auto_layers_names, int **r_auto_layers_srgb, int 
*r_auto_layers_len)
 {
        const CustomData *cd_ldata = (me->edit_mesh) ? 
&me->edit_mesh->bm->ldata : &me->ldata;
 
-       int uv_len_used = count_bits_i(cd_lused[CD_MLOOPUV]);
-       int vcol_len_used = count_bits_i(cd_lused[CD_MLOOPCOL]);
+       int uv_len_used = count_bits_i(cd_used.uv);
+       int vcol_len_used = count_bits_i(cd_used.vcol);
        int uv_len = CustomData_number_of_layers(cd_ldata, CD_MLOOPUV);
        int vcol_len = CustomData_number_of_layers(cd_ldata, CD_MLOOPCOL);
 
@@ -561,7 +562,7 @@ static void mesh_cd_extract_auto_layers_names_and_srgb(
        int *auto_is_srgb = MEM_callocN(sizeof(int) * (uv_len_used + 
vcol_len_used), __func__);
 
        for (int i = 0; i < uv_len; i++) {
-               if ((cd_lused[CD_MLOOPUV] & (1 << i)) != 0) {
+               if ((cd_used.uv & (1 << i)) != 0) {
                        const char *name = CustomData_get_layer_name(cd_ldata, 
CD_MLOOPUV, i);
                        uint hash = BLI_ghashutil_strhash_p(name);
                        /* +1 to include '\0' terminator. */
@@ -571,7 +572,7 @@ static void mesh_cd_extract_auto_layers_names_and_srgb(
 
        uint auto_is_srgb_ofs = uv_len_used;
        for (int i = 0; i < vcol_len; i++) {
-               if ((cd_lused[CD_MLOOPCOL] & (1 << i)) != 0) {
+               if ((cd_used.vcol & (1 << i)) != 0) {
                        const char *name = CustomData_get_layer_name(cd_ldata, 
CD_MLOOPCOL, i);
                        /* We only do vcols that are not overridden by a uv 
layer with same name. */
                        if (CustomData_get_named_layer_index(cd_ldata, 
CD_MLOOPUV, name) == -1) {
@@ -602,7 +603,7 @@ static void mesh_cd_extract_auto_layers_names_and_srgb(
  * Although this only impacts the data that's generated, not the materials 
that display.
  */
 static MeshRenderData *mesh_render_data_create_ex(
-        Mesh *me, const int types, const uchar cd_vused[CD_NUMTYPES], const 
ushort cd_lused[CD_NUMTYPES],
+        Mesh *me, const int types, const DRW_MeshCDMask *cd_used,
         const ToolSettings *ts)
 {
        MeshRenderData *rdata = MEM_callocN(sizeof(*rdata), __func__);
@@ -842,7 +843,7 @@ static MeshRenderData *mesh_render_data_create_ex(
        if (types & MR_DATATYPE_SHADING) {
                CustomData *cd_vdata, *cd_ldata;
 
-               BLI_assert(cd_vused != NULL && cd_lused != NULL);
+               BLI_assert(cd_used != NULL);
 
                if (me->edit_mesh) {
                        BMesh *bm = me->edit_mesh->bm;
@@ -864,15 +865,15 @@ static MeshRenderData *mesh_render_data_create_ex(
                        active_index = -1; \
                } ((void)0)
 
-               CD_VALIDATE_ACTIVE_LAYER(rdata->cd.layers.uv_active, 
cd_lused[CD_MLOOPUV]);
-               CD_VALIDATE_ACTIVE_LAYER(rdata->cd.layers.uv_mask_active, 
cd_lused[CD_MLOOPUV]);
-               CD_VALIDATE_ACTIVE_LAYER(rdata->cd.layers.tangent_active, 
cd_lused[CD_TANGENT]);
-               CD_VALIDATE_ACTIVE_LAYER(rdata->cd.layers.vcol_active, 
cd_lused[CD_MLOOPCOL]);
+               CD_VALIDATE_ACTIVE_LAYER(rdata->cd.layers.uv_active, 
cd_used->uv);
+               CD_VALIDATE_ACTIVE_LAYER(rdata->cd.layers.uv_mask_active, 
cd_used->uv);
+               CD_VALIDATE_ACTIVE_LAYER(rdata->cd.layers.tangent_active, 
cd_used->tan);
+               CD_VALIDATE_ACTIVE_LAYER(rdata->cd.layers.vcol_active, 
cd_used->vcol);
 
 #undef CD_VALIDATE_ACTIVE_LAYER
 
                rdata->is_orco_allocated = false;
-               if (cd_vused[CD_ORCO] & 1) {
+               if (cd_used->orco != 0) {
                        rdata->orco = CustomData_get_layer(cd_vdata, CD_ORCO);
                        /* If orco is not available compute it ourselves */
                        if (!rdata->orco) {
@@ -912,9 +913,9 @@ static MeshRenderData *mesh_render_data_create_ex(
                        .vcol_len = CustomData_number_of_layers(cd_ldata, 
CD_MLOOPCOL),
                };
 
-               rdata->cd.layers.uv_len = min_ii(cd_layers_src.uv_len, 
count_bits_i(cd_lused[CD_MLOOPUV]));
-               rdata->cd.layers.tangent_len = 
count_bits_i(cd_lused[CD_TANGENT]);
-               rdata->cd.layers.vcol_len = min_ii(cd_layers_src.vcol_len, 
count_bits_i(cd_lused[CD_MLOOPCOL]));
+               rdata->cd.layers.uv_len = min_ii(cd_layers_src.uv_len, 
count_bits_i(cd_used->uv));
+               rdata->cd.layers.tangent_len = count_bits_i(cd_used->tan) + 
cd_used->tan_orco;
+               rdata->cd.layers.vcol_len = min_ii(cd_layers_src.vcol_len, 
count_bits_i(cd_used->vcol));
 
                rdata->cd.layers.uv = MEM_mallocN(sizeof(*rdata->cd.layers.uv) 
* rdata->cd.layers.uv_len, __func__);
                rdata->cd.layers.vcol = 
MEM_mallocN(sizeof(*rdata->cd.layers.vcol) * rdata->cd.layers.vcol_len, 
__func__);
@@ -943,7 +944,7 @@ static MeshRenderData *mesh_render_data_create_ex(
                if (rdata->cd.layers.vcol_len != 0) {
                        int act_vcol = rdata->cd.layers.vcol_active;
                        for (int i_src = 0, i_dst = 0; i_src < 
cd_layers_src.vcol_len; i_src++, i_dst++) {
-                               if ((cd_lused[CD_MLOOPCOL] & (1 << i_src)) == 
0) {
+                               if ((cd_used->vcol & (1 << i_src)) == 0) {
                                        /* This is a non-used VCol slot. Skip. 
*/
                                        i_dst--;
                                        if (rdata->cd.layers.vcol_active >= 
i_src) {
@@ -982,7 +983,7 @@ static MeshRenderDat

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to