Commit: 1ad2ad6d70b23c7f472cc187328d6ce1eeb80692
Author: Germano Cavalcante
Date:   Thu Jun 10 11:13:01 2021 -0300
Branches: asset-browser-poselib
https://developer.blender.org/rB1ad2ad6d70b23c7f472cc187328d6ce1eeb80692

Refactor: use 'BLI_task_parallel_range' in Draw Cache

One drawback to trying to predict the number of threads that will be
used in the `task_graph` is that we are only sure of the number when the
threads are running.

Using `BLI_task_parallel_range` allows the driver to
choose the best thread distribution through `parallel_reduce`.

The benefit is most evident on hardware with fewer cores.

This is the result on an 4-core laptop:
||before:|after:
|---|---|---|
|large_mesh_editing:|Average: 5.203638 FPS|Average: 5.398925 FPS
||rdata 15ms iter 43ms (frame 193ms)|rdata 14ms iter 36ms (frame 187ms)

Differential Revision: https://developer.blender.org/D11558

===================================================================

M       source/blender/draw/intern/draw_cache_extract_mesh.cc
M       source/blender/draw/intern/draw_cache_extract_mesh_private.h
M       source/blender/draw/intern/draw_manager_profiling.c
M       source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc
M       source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc
M       source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc
M       source/blender/gpu/GPU_index_buffer.h
M       source/blender/gpu/intern/gpu_index_buffer.cc
M       source/blender/gpu/tests/gpu_index_buffer_test.cc

===================================================================

diff --git a/source/blender/draw/intern/draw_cache_extract_mesh.cc 
b/source/blender/draw/intern/draw_cache_extract_mesh.cc
index c6303d541b3..c6b749fe11a 100644
--- a/source/blender/draw/intern/draw_cache_extract_mesh.cc
+++ b/source/blender/draw/intern/draw_cache_extract_mesh.cc
@@ -50,7 +50,7 @@
 #  include "PIL_time_utildefines.h"
 #endif
 
-#define CHUNK_SIZE 1024
+#define MIM_RANGE_LEN 1024
 
 namespace blender::draw {
 
@@ -439,18 +439,18 @@ static void extract_task_range_run(void *__restrict 
taskdata)
   const eMRIterType iter_type = data->iter_type;
   const bool is_mesh = data->mr->extract_type != MR_EXTRACT_BMESH;
 
+  size_t userdata_chunk_size = data->extractors->data_size_total();
+  char *userdata_chunk = new char[userdata_chunk_size];
+
   TaskParallelSettings settings;
   BLI_parallel_range_settings_defaults(&settings);
-  settings.func_reduce = extract_task_reduce;
-  settings.min_iter_per_thread = CHUNK_SIZE;
   settings.use_threading = data->use_threading;
+  settings.userdata_chunk = userdata_chunk;
+  settings.userdata_chunk_size = userdata_chunk_size;
+  settings.func_reduce = extract_task_reduce;
+  settings.min_iter_per_thread = MIM_RANGE_LEN;
 
-  size_t chunk_size = data->extractors->data_size_total();
-  char *chunk = new char[chunk_size];
-  extract_init(data->mr, data->cache, *data->extractors, data->mbc, (void 
*)chunk);
-
-  settings.userdata_chunk = chunk;
-  settings.userdata_chunk_size = chunk_size;
+  extract_init(data->mr, data->cache, *data->extractors, data->mbc, (void 
*)userdata_chunk);
 
   if (iter_type & MR_ITER_LOOPTRI) {
     extract_task_range_run_iter(data->mr, data->extractors, MR_ITER_LOOPTRI, 
is_mesh, &settings);
@@ -465,14 +465,14 @@ static void extract_task_range_run(void *__restrict 
taskdata)
     extract_task_range_run_iter(data->mr, data->extractors, MR_ITER_LVERT, 
is_mesh, &settings);
   }
 
-  extract_finish(data->mr, data->cache, *data->extractors, (void *)chunk);
-  delete[] chunk;
+  extract_finish(data->mr, data->cache, *data->extractors, (void 
*)userdata_chunk);
+  delete[] userdata_chunk;
 }
 
 /** \} */
 
 /* ---------------------------------------------------------------------- */
-/** \name Extract Single Thread
+/** \name Extract In Parallel Ranges
  * \{ */
 
 static struct TaskNode *extract_task_node_create(struct TaskGraph *task_graph,
@@ -705,7 +705,7 @@ static void mesh_buffer_cache_create_requested(struct 
TaskGraph *task_graph,
       task_graph, mr, iter_type, data_flag);
 
   /* Simple heuristic. */
-  const bool use_thread = (mr->loop_len + mr->loop_loose_len) > CHUNK_SIZE;
+  const bool use_thread = (mr->loop_len + mr->loop_loose_len) > MIM_RANGE_LEN;
 
   if (use_thread) {
     /* First run the requested extractors that do not support asynchronous 
ranges. */
diff --git a/source/blender/draw/intern/draw_cache_extract_mesh_private.h 
b/source/blender/draw/intern/draw_cache_extract_mesh_private.h
index 26849eca08c..2ece0b4f1db 100644
--- a/source/blender/draw/intern/draw_cache_extract_mesh_private.h
+++ b/source/blender/draw/intern/draw_cache_extract_mesh_private.h
@@ -194,8 +194,7 @@ typedef void(ExtractFinishFn)(const MeshRenderData *mr,
                               struct MeshBatchCache *cache,
                               void *buffer,
                               void *data);
-typedef void(ExtractTaskInitFn)(void *userdata, void *r_task_userdata);
-typedef void(ExtractTaskFinishFn)(void *userdata, void *task_userdata);
+typedef void(ExtractTaskReduceFn)(void *userdata, void *task_userdata);
 
 typedef struct MeshExtract {
   /** Executed on main thread and return user data for iteration functions. */
@@ -210,7 +209,7 @@ typedef struct MeshExtract {
   ExtractLVertBMeshFn *iter_lvert_bm;
   ExtractLVertMeshFn *iter_lvert_mesh;
   /** Executed on one worker thread after all elements iterations. */
-  ExtractTaskFinishFn *task_reduce;
+  ExtractTaskReduceFn *task_reduce;
   ExtractFinishFn *finish;
   /** Used to request common data. */
   eMRDataType data_type;
diff --git a/source/blender/draw/intern/draw_manager_profiling.c 
b/source/blender/draw/intern/draw_manager_profiling.c
index 9bfc8d98fe4..783ec1b1d7d 100644
--- a/source/blender/draw/intern/draw_manager_profiling.c
+++ b/source/blender/draw/intern/draw_manager_profiling.c
@@ -41,7 +41,7 @@
 
 #define MAX_TIMER_NAME 32
 #define MAX_NESTED_TIMER 8
-#define CHUNK_SIZE 8
+#define MIM_RANGE_LEN 8
 #define GPU_TIMER_FALLOFF 0.1
 
 typedef struct DRWTimer {
@@ -82,7 +82,7 @@ void DRW_stats_begin(void)
 
   if (DTP.is_recording && DTP.timers == NULL) {
     DTP.chunk_count = 1;
-    DTP.timer_count = DTP.chunk_count * CHUNK_SIZE;
+    DTP.timer_count = DTP.chunk_count * MIM_RANGE_LEN;
     DTP.timers = MEM_callocN(sizeof(DRWTimer) * DTP.timer_count, "DRWTimer 
stack");
   }
   else if (!DTP.is_recording && DTP.timers != NULL) {
@@ -99,7 +99,7 @@ static DRWTimer *drw_stats_timer_get(void)
   if (UNLIKELY(DTP.timer_increment >= DTP.timer_count)) {
     /* Resize the stack. */
     DTP.chunk_count++;
-    DTP.timer_count = DTP.chunk_count * CHUNK_SIZE;
+    DTP.timer_count = DTP.chunk_count * MIM_RANGE_LEN;
     DTP.timers = MEM_recallocN(DTP.timers, sizeof(DRWTimer) * DTP.timer_count);
   }
 
diff --git 
a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc 
b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc
index 683794d4d66..64aaed6600f 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc
@@ -138,11 +138,11 @@ static void extract_lines_iter_ledge_mesh(const 
MeshRenderData *mr,
   GPU_indexbuf_set_line_restart(elb, e_index);
 }
 
-static void extract_lines_task_finish(void *_userdata_to, void *_userdata_from)
+static void extract_lines_task_reduce(void *_userdata_to, void *_userdata_from)
 {
   GPUIndexBufBuilder *elb_to = static_cast<GPUIndexBufBuilder *>(_userdata_to);
   GPUIndexBufBuilder *elb_from = static_cast<GPUIndexBufBuilder 
*>(_userdata_from);
-  GPU_indexbuf_join_copies(elb_to, elb_from);
+  GPU_indexbuf_join(elb_to, elb_from);
 }
 
 static void extract_lines_finish(const MeshRenderData *UNUSED(mr),
@@ -163,7 +163,7 @@ constexpr MeshExtract create_extractor_lines()
   extractor.iter_poly_mesh = extract_lines_iter_poly_mesh;
   extractor.iter_ledge_bm = extract_lines_iter_ledge_bm;
   extractor.iter_ledge_mesh = extract_lines_iter_ledge_mesh;
-  extractor.task_reduce = extract_lines_task_finish;
+  extractor.task_reduce = extract_lines_task_reduce;
   extractor.finish = extract_lines_finish;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(GPUIndexBufBuilder);
@@ -208,7 +208,7 @@ constexpr MeshExtract 
create_extractor_lines_with_lines_loose()
   extractor.iter_poly_mesh = extract_lines_iter_poly_mesh;
   extractor.iter_ledge_bm = extract_lines_iter_ledge_bm;
   extractor.iter_ledge_mesh = extract_lines_iter_ledge_mesh;
-  extractor.task_reduce = extract_lines_task_finish;
+  extractor.task_reduce = extract_lines_task_reduce;
   extractor.finish = extract_lines_with_lines_loose_finish;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(GPUIndexBufBuilder);
diff --git 
a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc 
b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc
index 25d1a159f60..1e4e76ba7c5 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc
@@ -137,11 +137,11 @@ static void extract_points_iter_lvert_mesh(const 
MeshRenderData *mr,
   vert_set_mesh(elb, mr, mr->lverts[lvert_index], offset + lvert_index);
 }
 
-static void extract_points_task_finish(void *_userdata_to, void 
*_userdata_from)
+static void extract_points_task_reduce(void *_userdata_to, void 
*_userdata_from)
 {
   GPUIndexBufBuilder *elb_to = static_cast<GPUIndexBufBuilder *>(_userdata_to);
   GPUIndexBufBuilder *elb_from = static_cast<GPUIndexBufBuilder 
*>(_userdata_from);
-  GPU_indexbuf_join_copies(elb_to, elb_from);
+  GPU_indexbuf_join(elb_to, elb_from);
 }
 
 static void extract_points_finish(const MeshRenderData *UNUSED(mr),
@@ -164,7 +164,7 @@ constexpr MeshExtract create_extractor_points()
   extractor.iter_ledge_mesh = extract_points_iter_ledge_mesh;
   extractor.iter_lvert_bm = extract_points_iter_lvert_bm;
   extractor.iter_lvert_mesh = extract_points_iter_lvert_mesh;
-  extractor.task_reduce = extract_points_task_finish;
+  extractor.task_reduce = extract_points_task_reduce;
   extractor.finish = extract_points_finish;
   extractor.use_threading = true;
   extractor.data_type = MR_DATA_NONE;
diff --git 
a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc 
b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc
index 27929fa8ba3..70b46481b51 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc
@@ -211,11 +211,11 @@ static void 
extract_tris_single_mat_iter_looptri_mesh(const MeshRenderData *mr,
   }
 }
 
-static void extract_tris_single_mat_task_finish(void *_userdata_to, void 
*_userdata_from)
+static void extract_tris_single_mat_task_reduce(void *_userdata_to, void 
*_userdata_from)
 {
   GPUIndexBufBuilder *elb_to = static_cast<GPUIndexBufBuilder *>(_userdata_to);
   GPUIndexBufBuilder *elb_from = static_cast<GPUIndexBufBuilder 
*>(_userdata_from);
-  GPU_indexbuf_join_copies(elb_to, elb_from);
+  GPU_indexbuf_join(elb_to, elb_from);
 }
 
 static void extract_tris_single_mat_finish(const MeshRenderData *mr,
@@ -250,7 +250,7 @@ constexpr MeshExtract create_extractor_tris_single_mat()
   extractor.init = extract_tris_single_mat_init;
   extractor.iter_looptri_bm = extract_tris_single_mat_iter_looptri_bm;
   extractor.iter_looptri_mesh = extract_tris_single_mat_iter_looptri_mesh;
-  extractor.task_reduce = extract_tris_single_mat_task_finish;
+  extractor.task_reduce = extract_tris_single_mat_task_reduce;
   extractor.finish = extract_tris_single_mat_finish;
   extractor.data_type = MR_DATA_NONE;
   extractor.data_size = sizeof(GPUIndexBufBuilder);
diff --git a/source/blender/gpu

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to