Commit: 0eb9351296dbed5e7ac10ca56132d5e51e5f388d Author: Germano Cavalcante Date: Thu Jun 10 11:13:01 2021 -0300 Branches: master https://developer.blender.org/rB0eb9351296dbed5e7ac10ca56132d5e51e5f388d
Refactor: use 'BLI_task_parallel_range' in Draw Cache One drawback to trying to predict the number of threads that will be used in the `task_graph` is that we are only sure of the number when the threads are running. Using `BLI_task_parallel_range` allows the driver to choose the best thread distribution through `parallel_reduce`. The benefit is most evident on hardware with fewer cores. This is the result on an 4-core laptop: ||before:|after: |---|---|---| |large_mesh_editing:|Average: 5.203638 FPS|Average: 5.398925 FPS ||rdata 15ms iter 43ms (frame 193ms)|rdata 14ms iter 36ms (frame 187ms) Differential Revision: https://developer.blender.org/D11558 =================================================================== M source/blender/draw/intern/draw_cache_extract_mesh.cc M source/blender/draw/intern/draw_cache_extract_mesh_private.h M source/blender/draw/intern/draw_manager_profiling.c M source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc M source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc M source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc M source/blender/gpu/GPU_index_buffer.h M source/blender/gpu/intern/gpu_index_buffer.cc M source/blender/gpu/tests/gpu_index_buffer_test.cc =================================================================== diff --git a/source/blender/draw/intern/draw_cache_extract_mesh.cc b/source/blender/draw/intern/draw_cache_extract_mesh.cc index c6303d541b3..c6b749fe11a 100644 --- a/source/blender/draw/intern/draw_cache_extract_mesh.cc +++ b/source/blender/draw/intern/draw_cache_extract_mesh.cc @@ -50,7 +50,7 @@ # include "PIL_time_utildefines.h" #endif -#define CHUNK_SIZE 1024 +#define MIM_RANGE_LEN 1024 namespace blender::draw { @@ -439,18 +439,18 @@ static void extract_task_range_run(void *__restrict taskdata) const eMRIterType iter_type = data->iter_type; const bool is_mesh = data->mr->extract_type != MR_EXTRACT_BMESH; + size_t userdata_chunk_size = data->extractors->data_size_total(); + char *userdata_chunk = new char[userdata_chunk_size]; + TaskParallelSettings settings; BLI_parallel_range_settings_defaults(&settings); - settings.func_reduce = extract_task_reduce; - settings.min_iter_per_thread = CHUNK_SIZE; settings.use_threading = data->use_threading; + settings.userdata_chunk = userdata_chunk; + settings.userdata_chunk_size = userdata_chunk_size; + settings.func_reduce = extract_task_reduce; + settings.min_iter_per_thread = MIM_RANGE_LEN; - size_t chunk_size = data->extractors->data_size_total(); - char *chunk = new char[chunk_size]; - extract_init(data->mr, data->cache, *data->extractors, data->mbc, (void *)chunk); - - settings.userdata_chunk = chunk; - settings.userdata_chunk_size = chunk_size; + extract_init(data->mr, data->cache, *data->extractors, data->mbc, (void *)userdata_chunk); if (iter_type & MR_ITER_LOOPTRI) { extract_task_range_run_iter(data->mr, data->extractors, MR_ITER_LOOPTRI, is_mesh, &settings); @@ -465,14 +465,14 @@ static void extract_task_range_run(void *__restrict taskdata) extract_task_range_run_iter(data->mr, data->extractors, MR_ITER_LVERT, is_mesh, &settings); } - extract_finish(data->mr, data->cache, *data->extractors, (void *)chunk); - delete[] chunk; + extract_finish(data->mr, data->cache, *data->extractors, (void *)userdata_chunk); + delete[] userdata_chunk; } /** \} */ /* ---------------------------------------------------------------------- */ -/** \name Extract Single Thread +/** \name Extract In Parallel Ranges * \{ */ static struct TaskNode *extract_task_node_create(struct TaskGraph *task_graph, @@ -705,7 +705,7 @@ static void mesh_buffer_cache_create_requested(struct TaskGraph *task_graph, task_graph, mr, iter_type, data_flag); /* Simple heuristic. */ - const bool use_thread = (mr->loop_len + mr->loop_loose_len) > CHUNK_SIZE; + const bool use_thread = (mr->loop_len + mr->loop_loose_len) > MIM_RANGE_LEN; if (use_thread) { /* First run the requested extractors that do not support asynchronous ranges. */ diff --git a/source/blender/draw/intern/draw_cache_extract_mesh_private.h b/source/blender/draw/intern/draw_cache_extract_mesh_private.h index 26849eca08c..2ece0b4f1db 100644 --- a/source/blender/draw/intern/draw_cache_extract_mesh_private.h +++ b/source/blender/draw/intern/draw_cache_extract_mesh_private.h @@ -194,8 +194,7 @@ typedef void(ExtractFinishFn)(const MeshRenderData *mr, struct MeshBatchCache *cache, void *buffer, void *data); -typedef void(ExtractTaskInitFn)(void *userdata, void *r_task_userdata); -typedef void(ExtractTaskFinishFn)(void *userdata, void *task_userdata); +typedef void(ExtractTaskReduceFn)(void *userdata, void *task_userdata); typedef struct MeshExtract { /** Executed on main thread and return user data for iteration functions. */ @@ -210,7 +209,7 @@ typedef struct MeshExtract { ExtractLVertBMeshFn *iter_lvert_bm; ExtractLVertMeshFn *iter_lvert_mesh; /** Executed on one worker thread after all elements iterations. */ - ExtractTaskFinishFn *task_reduce; + ExtractTaskReduceFn *task_reduce; ExtractFinishFn *finish; /** Used to request common data. */ eMRDataType data_type; diff --git a/source/blender/draw/intern/draw_manager_profiling.c b/source/blender/draw/intern/draw_manager_profiling.c index 9bfc8d98fe4..783ec1b1d7d 100644 --- a/source/blender/draw/intern/draw_manager_profiling.c +++ b/source/blender/draw/intern/draw_manager_profiling.c @@ -41,7 +41,7 @@ #define MAX_TIMER_NAME 32 #define MAX_NESTED_TIMER 8 -#define CHUNK_SIZE 8 +#define MIM_RANGE_LEN 8 #define GPU_TIMER_FALLOFF 0.1 typedef struct DRWTimer { @@ -82,7 +82,7 @@ void DRW_stats_begin(void) if (DTP.is_recording && DTP.timers == NULL) { DTP.chunk_count = 1; - DTP.timer_count = DTP.chunk_count * CHUNK_SIZE; + DTP.timer_count = DTP.chunk_count * MIM_RANGE_LEN; DTP.timers = MEM_callocN(sizeof(DRWTimer) * DTP.timer_count, "DRWTimer stack"); } else if (!DTP.is_recording && DTP.timers != NULL) { @@ -99,7 +99,7 @@ static DRWTimer *drw_stats_timer_get(void) if (UNLIKELY(DTP.timer_increment >= DTP.timer_count)) { /* Resize the stack. */ DTP.chunk_count++; - DTP.timer_count = DTP.chunk_count * CHUNK_SIZE; + DTP.timer_count = DTP.chunk_count * MIM_RANGE_LEN; DTP.timers = MEM_recallocN(DTP.timers, sizeof(DRWTimer) * DTP.timer_count); } diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc index 683794d4d66..64aaed6600f 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc @@ -138,11 +138,11 @@ static void extract_lines_iter_ledge_mesh(const MeshRenderData *mr, GPU_indexbuf_set_line_restart(elb, e_index); } -static void extract_lines_task_finish(void *_userdata_to, void *_userdata_from) +static void extract_lines_task_reduce(void *_userdata_to, void *_userdata_from) { GPUIndexBufBuilder *elb_to = static_cast<GPUIndexBufBuilder *>(_userdata_to); GPUIndexBufBuilder *elb_from = static_cast<GPUIndexBufBuilder *>(_userdata_from); - GPU_indexbuf_join_copies(elb_to, elb_from); + GPU_indexbuf_join(elb_to, elb_from); } static void extract_lines_finish(const MeshRenderData *UNUSED(mr), @@ -163,7 +163,7 @@ constexpr MeshExtract create_extractor_lines() extractor.iter_poly_mesh = extract_lines_iter_poly_mesh; extractor.iter_ledge_bm = extract_lines_iter_ledge_bm; extractor.iter_ledge_mesh = extract_lines_iter_ledge_mesh; - extractor.task_reduce = extract_lines_task_finish; + extractor.task_reduce = extract_lines_task_reduce; extractor.finish = extract_lines_finish; extractor.data_type = MR_DATA_NONE; extractor.data_size = sizeof(GPUIndexBufBuilder); @@ -208,7 +208,7 @@ constexpr MeshExtract create_extractor_lines_with_lines_loose() extractor.iter_poly_mesh = extract_lines_iter_poly_mesh; extractor.iter_ledge_bm = extract_lines_iter_ledge_bm; extractor.iter_ledge_mesh = extract_lines_iter_ledge_mesh; - extractor.task_reduce = extract_lines_task_finish; + extractor.task_reduce = extract_lines_task_reduce; extractor.finish = extract_lines_with_lines_loose_finish; extractor.data_type = MR_DATA_NONE; extractor.data_size = sizeof(GPUIndexBufBuilder); diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc index 25d1a159f60..1e4e76ba7c5 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc @@ -137,11 +137,11 @@ static void extract_points_iter_lvert_mesh(const MeshRenderData *mr, vert_set_mesh(elb, mr, mr->lverts[lvert_index], offset + lvert_index); } -static void extract_points_task_finish(void *_userdata_to, void *_userdata_from) +static void extract_points_task_reduce(void *_userdata_to, void *_userdata_from) { GPUIndexBufBuilder *elb_to = static_cast<GPUIndexBufBuilder *>(_userdata_to); GPUIndexBufBuilder *elb_from = static_cast<GPUIndexBufBuilder *>(_userdata_from); - GPU_indexbuf_join_copies(elb_to, elb_from); + GPU_indexbuf_join(elb_to, elb_from); } static void extract_points_finish(const MeshRenderData *UNUSED(mr), @@ -164,7 +164,7 @@ constexpr MeshExtract create_extractor_points() extractor.iter_ledge_mesh = extract_points_iter_ledge_mesh; extractor.iter_lvert_bm = extract_points_iter_lvert_bm; extractor.iter_lvert_mesh = extract_points_iter_lvert_mesh; - extractor.task_reduce = extract_points_task_finish; + extractor.task_reduce = extract_points_task_reduce; extractor.finish = extract_points_finish; extractor.use_threading = true; extractor.data_type = MR_DATA_NONE; diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc index 27929fa8ba3..70b46481b51 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc @@ -211,11 +211,11 @@ static void extract_tris_single_mat_iter_looptri_mesh(const MeshRenderData *mr, } } -static void extract_tris_single_mat_task_finish(void *_userdata_to, void *_userdata_from) +static void extract_tris_single_mat_task_reduce(void *_userdata_to, void *_userdata_from) { GPUIndexBufBuilder *elb_to = static_cast<GPUIndexBufBuilder *>(_userdata_to); GPUIndexBufBuilder *elb_from = static_cast<GPUIndexBufBuilder *>(_userdata_from); - GPU_indexbuf_join_copies(elb_to, elb_from); + GPU_indexbuf_join(elb_to, elb_from); } static void extract_tris_single_mat_finish(const MeshRenderData *mr, @@ -250,7 +250,7 @@ constexpr MeshExtract create_extractor_tris_single_mat() extractor.init = extract_tris_single_mat_init; extractor.iter_looptri_bm = extract_tris_single_mat_iter_looptri_bm; extractor.iter_looptri_mesh = extract_tris_single_mat_iter_looptri_mesh; - extractor.task_reduce = extract_tris_single_mat_task_finish; + extractor.task_reduce = extract_tris_single_mat_task_reduce; extractor.finish = extract_tris_single_mat_finish; extractor.data_type = MR_DATA_NONE; extractor.data_size = sizeof(GPUIndexBufBuilder); diff --git a/source/blender/gpu @@ Diff output truncated at 10240 characters. @@ _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org https://lists.blender.org/mailman/listinfo/bf-blender-cvs