Commit: f12040e088b66748340b0d7472d1155596e0f9e2 Author: Clément Foucault Date: Sat Apr 6 01:55:21 2019 +0200 Branches: master https://developer.blender.org/rBf12040e088b66748340b0d7472d1155596e0f9e2
DRW: Opti: Fix hotspot in DRW_mesh_batch_cache_get_surface_shaded The hotspot was generated by mesh_cd_layers_type_overlap who was testing way more data than it should have. Here we reduce the whole CD layer mask to a 32bit bitflag that is easily or'ed and tested. Bonus point: We use atomic operation to merge the mask in order to allow future multi-threading. (this was a TODO) In the scene attached to T58188 this removes 5% of CPU time. =================================================================== M source/blender/draw/CMakeLists.txt M source/blender/draw/intern/draw_cache_impl_mesh.c =================================================================== diff --git a/source/blender/draw/CMakeLists.txt b/source/blender/draw/CMakeLists.txt index aea08f87a84..3ac606fc48f 100644 --- a/source/blender/draw/CMakeLists.txt +++ b/source/blender/draw/CMakeLists.txt @@ -41,6 +41,7 @@ set(INC ../../../intern/glew-mx ../../../intern/guardedalloc + ../../../intern/atomic ) set(INC_SYS diff --git a/source/blender/draw/intern/draw_cache_impl_mesh.c b/source/blender/draw/intern/draw_cache_impl_mesh.c index e47d46aca09..5643d23c7cd 100644 --- a/source/blender/draw/intern/draw_cache_impl_mesh.c +++ b/source/blender/draw/intern/draw_cache_impl_mesh.c @@ -48,6 +48,7 @@ #include "BKE_mesh_runtime.h" #include "BKE_object_deform.h" +#include "atomic_ops.h" #include "bmesh.h" @@ -78,6 +79,14 @@ typedef struct DRW_MeshWeightState { int defgroup_sel_count; } DRW_MeshWeightState; +typedef struct DRW_MeshCDMask { + uint32_t uv : 8; + uint32_t tan : 8; + uint32_t vcol : 8; + uint32_t orco : 1; + uint32_t tan_orco : 1; +} DRW_MeshCDMask; + /* DRW_MeshWeightState.flags */ enum { DRW_MESH_WEIGHT_STATE_MULTIPAINT = (1 << 0), @@ -354,72 +363,63 @@ BLI_INLINE bool bm_edge_is_loose_and_visible(const BMEdge *e) } /* Return true is all layers in _b_ are inside _a_. */ -static bool mesh_cd_layers_type_overlap( - const uchar av[CD_NUMTYPES], const ushort al[CD_NUMTYPES], - const uchar bv[CD_NUMTYPES], const ushort bl[CD_NUMTYPES]) +BLI_INLINE bool mesh_cd_layers_type_overlap(DRW_MeshCDMask a, DRW_MeshCDMask b) { - for (int i = 0; i < CD_NUMTYPES; ++i) { - if ((av[i] & bv[i]) != bv[i]) { - return false; - } - if ((al[i] & bl[i]) != bl[i]) { - return false; - } - } - return true; + return (*((uint32_t *)&a) & *((uint32_t *)&b)) == *((uint32_t *)&b); } -static void mesh_cd_layers_type_merge( - uchar av[CD_NUMTYPES], ushort al[CD_NUMTYPES], - uchar bv[CD_NUMTYPES], ushort bl[CD_NUMTYPES]) +BLI_INLINE void mesh_cd_layers_type_merge(DRW_MeshCDMask *a, DRW_MeshCDMask b) { - for (int i = 0; i < CD_NUMTYPES; ++i) { - av[i] |= bv[i]; - al[i] |= bl[i]; - } + atomic_fetch_and_or_uint32((uint32_t *)a, *(uint32_t *)&b); +} + +BLI_INLINE void mesh_cd_layers_type_clear(DRW_MeshCDMask *a) +{ + *((uint32_t *)a) = 0; } static void mesh_cd_calc_active_uv_layer( - const Mesh *me, ushort cd_lused[CD_NUMTYPES]) + const Mesh *me, DRW_MeshCDMask *cd_used) { const CustomData *cd_ldata = (me->edit_mesh) ? &me->edit_mesh->bm->ldata : &me->ldata; int layer = CustomData_get_active_layer(cd_ldata, CD_MLOOPUV); if (layer != -1) { - cd_lused[CD_MLOOPUV] |= (1 << layer); + cd_used->uv |= (1 << layer); } } static void mesh_cd_calc_active_mask_uv_layer( - const Mesh *me, ushort cd_lused[CD_NUMTYPES]) + const Mesh *me, DRW_MeshCDMask *cd_used) { const CustomData *cd_ldata = (me->edit_mesh) ? &me->edit_mesh->bm->ldata : &me->ldata; int layer = CustomData_get_stencil_layer(cd_ldata, CD_MLOOPUV); if (layer != -1) { - cd_lused[CD_MLOOPUV] |= (1 << layer); + cd_used->uv |= (1 << layer); } } static void mesh_cd_calc_active_vcol_layer( - const Mesh *me, ushort cd_lused[CD_NUMTYPES]) + const Mesh *me, DRW_MeshCDMask *cd_used) { const CustomData *cd_ldata = (me->edit_mesh) ? &me->edit_mesh->bm->ldata : &me->ldata; int layer = CustomData_get_active_layer(cd_ldata, CD_MLOOPCOL); if (layer != -1) { - cd_lused[CD_MLOOPCOL] |= (1 << layer); + cd_used->vcol |= (1 << layer); } } -static void mesh_cd_calc_used_gpu_layers( - const Mesh *me, uchar cd_vused[CD_NUMTYPES], ushort cd_lused[CD_NUMTYPES], - struct GPUMaterial **gpumat_array, int gpumat_array_len) +static DRW_MeshCDMask mesh_cd_calc_used_gpu_layers( + const Mesh *me, struct GPUMaterial **gpumat_array, int gpumat_array_len) { const CustomData *cd_ldata = (me->edit_mesh) ? &me->edit_mesh->bm->ldata : &me->ldata; /* See: DM_vertex_attributes_from_gpu for similar logic */ GPUVertAttrLayers gpu_attrs = {{{0}}}; + DRW_MeshCDMask cd_used; + mesh_cd_layers_type_clear(&cd_used); for (int i = 0; i < gpumat_array_len; i++) { GPUMaterial *gpumat = gpumat_array[i]; @@ -468,7 +468,7 @@ static void mesh_cd_calc_used_gpu_layers( CustomData_get_active_layer(cd_ldata, CD_MLOOPUV); } if (layer != -1) { - cd_lused[CD_MLOOPUV] |= (1 << layer); + cd_used.uv |= (1 << layer); } break; } @@ -485,12 +485,12 @@ static void mesh_cd_calc_used_gpu_layers( } } if (layer != -1) { - cd_lused[CD_TANGENT] |= (1 << layer); + cd_used.tan |= (1 << layer); } else { /* no UV layers at all => requesting orco */ - cd_lused[CD_TANGENT] |= DM_TANGENT_MASK_ORCO; - cd_vused[CD_ORCO] |= 1; + cd_used.tan_orco = 1; + cd_used.orco = 1; } break; } @@ -502,19 +502,20 @@ static void mesh_cd_calc_used_gpu_layers( CustomData_get_active_layer(cd_ldata, CD_MLOOPCOL); } if (layer != -1) { - cd_lused[CD_MLOOPCOL] |= (1 << layer); + cd_used.vcol |= (1 << layer); } break; } case CD_ORCO: { - cd_vused[CD_ORCO] |= 1; + cd_used.orco = 1; break; } } } } } + return cd_used; } @@ -544,13 +545,13 @@ static void mesh_render_calc_normals_loop_and_poly(const Mesh *me, const float s } static void mesh_cd_extract_auto_layers_names_and_srgb( - Mesh *me, const ushort cd_lused[CD_NUMTYPES], + Mesh *me, DRW_MeshCDMask cd_used, char **r_auto_layers_names, int **r_auto_layers_srgb, int *r_auto_layers_len) { const CustomData *cd_ldata = (me->edit_mesh) ? &me->edit_mesh->bm->ldata : &me->ldata; - int uv_len_used = count_bits_i(cd_lused[CD_MLOOPUV]); - int vcol_len_used = count_bits_i(cd_lused[CD_MLOOPCOL]); + int uv_len_used = count_bits_i(cd_used.uv); + int vcol_len_used = count_bits_i(cd_used.vcol); int uv_len = CustomData_number_of_layers(cd_ldata, CD_MLOOPUV); int vcol_len = CustomData_number_of_layers(cd_ldata, CD_MLOOPCOL); @@ -561,7 +562,7 @@ static void mesh_cd_extract_auto_layers_names_and_srgb( int *auto_is_srgb = MEM_callocN(sizeof(int) * (uv_len_used + vcol_len_used), __func__); for (int i = 0; i < uv_len; i++) { - if ((cd_lused[CD_MLOOPUV] & (1 << i)) != 0) { + if ((cd_used.uv & (1 << i)) != 0) { const char *name = CustomData_get_layer_name(cd_ldata, CD_MLOOPUV, i); uint hash = BLI_ghashutil_strhash_p(name); /* +1 to include '\0' terminator. */ @@ -571,7 +572,7 @@ static void mesh_cd_extract_auto_layers_names_and_srgb( uint auto_is_srgb_ofs = uv_len_used; for (int i = 0; i < vcol_len; i++) { - if ((cd_lused[CD_MLOOPCOL] & (1 << i)) != 0) { + if ((cd_used.vcol & (1 << i)) != 0) { const char *name = CustomData_get_layer_name(cd_ldata, CD_MLOOPCOL, i); /* We only do vcols that are not overridden by a uv layer with same name. */ if (CustomData_get_named_layer_index(cd_ldata, CD_MLOOPUV, name) == -1) { @@ -602,7 +603,7 @@ static void mesh_cd_extract_auto_layers_names_and_srgb( * Although this only impacts the data that's generated, not the materials that display. */ static MeshRenderData *mesh_render_data_create_ex( - Mesh *me, const int types, const uchar cd_vused[CD_NUMTYPES], const ushort cd_lused[CD_NUMTYPES], + Mesh *me, const int types, const DRW_MeshCDMask *cd_used, const ToolSettings *ts) { MeshRenderData *rdata = MEM_callocN(sizeof(*rdata), __func__); @@ -842,7 +843,7 @@ static MeshRenderData *mesh_render_data_create_ex( if (types & MR_DATATYPE_SHADING) { CustomData *cd_vdata, *cd_ldata; - BLI_assert(cd_vused != NULL && cd_lused != NULL); + BLI_assert(cd_used != NULL); if (me->edit_mesh) { BMesh *bm = me->edit_mesh->bm; @@ -864,15 +865,15 @@ static MeshRenderData *mesh_render_data_create_ex( active_index = -1; \ } ((void)0) - CD_VALIDATE_ACTIVE_LAYER(rdata->cd.layers.uv_active, cd_lused[CD_MLOOPUV]); - CD_VALIDATE_ACTIVE_LAYER(rdata->cd.layers.uv_mask_active, cd_lused[CD_MLOOPUV]); - CD_VALIDATE_ACTIVE_LAYER(rdata->cd.layers.tangent_active, cd_lused[CD_TANGENT]); - CD_VALIDATE_ACTIVE_LAYER(rdata->cd.layers.vcol_active, cd_lused[CD_MLOOPCOL]); + CD_VALIDATE_ACTIVE_LAYER(rdata->cd.layers.uv_active, cd_used->uv); + CD_VALIDATE_ACTIVE_LAYER(rdata->cd.layers.uv_mask_active, cd_used->uv); + CD_VALIDATE_ACTIVE_LAYER(rdata->cd.layers.tangent_active, cd_used->tan); + CD_VALIDATE_ACTIVE_LAYER(rdata->cd.layers.vcol_active, cd_used->vcol); #undef CD_VALIDATE_ACTIVE_LAYER rdata->is_orco_allocated = false; - if (cd_vused[CD_ORCO] & 1) { + if (cd_used->orco != 0) { rdata->orco = CustomData_get_layer(cd_vdata, CD_ORCO); /* If orco is not available compute it ourselves */ if (!rdata->orco) { @@ -912,9 +913,9 @@ static MeshRenderData *mesh_render_data_create_ex( .vcol_len = CustomData_number_of_layers(cd_ldata, CD_MLOOPCOL), }; - rdata->cd.layers.uv_len = min_ii(cd_layers_src.uv_len, count_bits_i(cd_lused[CD_MLOOPUV])); - rdata->cd.layers.tangent_len = count_bits_i(cd_lused[CD_TANGENT]); - rdata->cd.layers.vcol_len = min_ii(cd_layers_src.vcol_len, count_bits_i(cd_lused[CD_MLOOPCOL])); + rdata->cd.layers.uv_len = min_ii(cd_layers_src.uv_len, count_bits_i(cd_used->uv)); + rdata->cd.layers.tangent_len = count_bits_i(cd_used->tan) + cd_used->tan_orco; + rdata->cd.layers.vcol_len = min_ii(cd_layers_src.vcol_len, count_bits_i(cd_used->vcol)); rdata->cd.layers.uv = MEM_mallocN(sizeof(*rdata->cd.layers.uv) * rdata->cd.layers.uv_len, __func__); rdata->cd.layers.vcol = MEM_mallocN(sizeof(*rdata->cd.layers.vcol) * rdata->cd.layers.vcol_len, __func__); @@ -943,7 +944,7 @@ static MeshRenderData *mesh_render_data_create_ex( if (rdata->cd.layers.vcol_len != 0) { int act_vcol = rdata->cd.layers.vcol_active; for (int i_src = 0, i_dst = 0; i_src < cd_layers_src.vcol_len; i_src++, i_dst++) { - if ((cd_lused[CD_MLOOPCOL] & (1 << i_src)) == 0) { + if ((cd_used->vcol & (1 << i_src)) == 0) { /* This is a non-used VCol slot. Skip. */ i_dst--; if (rdata->cd.layers.vcol_active >= i_src) { @@ -982,7 +983,7 @@ static MeshRenderDat @@ Diff output truncated at 10240 characters. @@ _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs