Commit: 4b33667b93a3c3cf28478ce06e5200cb2ac2493e
Author: Martijn Berger
Date: Thu Oct 30 10:17:42 2014 +0100
Branches: master
https://developer.blender.org/rB4b33667b93a3c3cf28478ce06e5200cb2ac2493e
Deduplicate some code by using a function pointer to the real kernel
This has no performance impact what so ever and is already used in the adaptive
sampling patch
===================================================================
M intern/cycles/device/device_cpu.cpp
===================================================================
diff --git a/intern/cycles/device/device_cpu.cpp
b/intern/cycles/device/device_cpu.cpp
index c9b8a5b..242cc65 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -170,124 +170,42 @@ public:
#endif
RenderTile tile;
-
- while(task.acquire_tile(this, tile)) {
- float *render_buffer = (float*)tile.buffer;
- uint *rng_state = (uint*)tile.rng_state;
- int start_sample = tile.start_sample;
- int end_sample = tile.start_sample + tile.num_samples;
-
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
- if(system_cpu_support_avx2()) {
- for(int sample = start_sample; sample <
end_sample; sample++) {
- if (task.get_cancel() ||
task_pool.canceled()) {
- if(task.need_finish_queue ==
false)
- break;
- }
- for(int y = tile.y; y < tile.y +
tile.h; y++) {
- for(int x = tile.x; x < tile.x
+ tile.w; x++) {
-
kernel_cpu_avx2_path_trace(&kg, render_buffer, rng_state,
-
sample, x, y, tile.offset, tile.stride);
- }
- }
+ void(*path_trace_kernel)(KernelGlobals*, float*, unsigned int*,
int, int, int, int, int);
- tile.sample = sample + 1;
-
- task.update_progress(&tile);
- }
- }
- else
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
+ if(system_cpu_support_avx2())
+ path_trace_kernel = kernel_cpu_avx2_path_trace;
+ else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
- if(system_cpu_support_avx()) {
- for(int sample = start_sample; sample <
end_sample; sample++) {
- if (task.get_cancel() ||
task_pool.canceled()) {
- if(task.need_finish_queue ==
false)
- break;
- }
-
- for(int y = tile.y; y < tile.y +
tile.h; y++) {
- for(int x = tile.x; x < tile.x
+ tile.w; x++) {
-
kernel_cpu_avx_path_trace(&kg, render_buffer, rng_state,
- sample, x, y,
tile.offset, tile.stride);
- }
- }
-
- tile.sample = sample + 1;
-
- task.update_progress(&tile);
- }
- }
- else
+ if(system_cpu_support_avx())
+ path_trace_kernel = kernel_cpu_avx_path_trace;
+ else
#endif
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
- if(system_cpu_support_sse41()) {
- for(int sample = start_sample; sample <
end_sample; sample++) {
- if (task.get_cancel() ||
task_pool.canceled()) {
- if(task.need_finish_queue ==
false)
- break;
- }
-
- for(int y = tile.y; y < tile.y +
tile.h; y++) {
- for(int x = tile.x; x < tile.x
+ tile.w; x++) {
-
kernel_cpu_sse41_path_trace(&kg, render_buffer, rng_state,
- sample, x, y,
tile.offset, tile.stride);
- }
- }
-
- tile.sample = sample + 1;
-
- task.update_progress(&tile);
- }
- }
- else
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
+ if(system_cpu_support_sse41())
+ path_trace_kernel = kernel_cpu_sse41_path_trace;
+ else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
- if(system_cpu_support_sse3()) {
- for(int sample = start_sample; sample <
end_sample; sample++) {
- if (task.get_cancel() ||
task_pool.canceled()) {
- if(task.need_finish_queue ==
false)
- break;
- }
-
- for(int y = tile.y; y < tile.y +
tile.h; y++) {
- for(int x = tile.x; x < tile.x
+ tile.w; x++) {
-
kernel_cpu_sse3_path_trace(&kg, render_buffer, rng_state,
- sample, x, y,
tile.offset, tile.stride);
- }
- }
-
- tile.sample = sample + 1;
-
- task.update_progress(&tile);
- }
- }
- else
+ if(system_cpu_support_sse3())
+ path_trace_kernel = kernel_cpu_sse3_path_trace;
+ else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
- if(system_cpu_support_sse2()) {
- for(int sample = start_sample; sample <
end_sample; sample++) {
- if (task.get_cancel() ||
task_pool.canceled()) {
- if(task.need_finish_queue ==
false)
- break;
- }
-
- for(int y = tile.y; y < tile.y +
tile.h; y++) {
- for(int x = tile.x; x < tile.x
+ tile.w; x++) {
-
kernel_cpu_sse2_path_trace(&kg, render_buffer, rng_state,
- sample, x, y,
tile.offset, tile.stride);
- }
- }
-
- tile.sample = sample + 1;
-
- task.update_progress(&tile);
- }
- }
- else
+ if(system_cpu_support_sse2())
+ path_trace_kernel = kernel_cpu_sse2_path_trace;
+ else
#endif
- {
+ path_trace_kernel = kernel_cpu_path_trace;
+
+ while(task.acquire_tile(this, tile)) {
+ float *render_buffer = (float*)tile.buffer;
+ uint *rng_state = (uint*)tile.rng_state;
+ int start_sample = tile.start_sample;
+ int end_sample = tile.start_sample + tile.num_samples;
+
for(int sample = start_sample; sample <
end_sample; sample++) {
if (task.get_cancel() ||
task_pool.canceled()) {
if(task.need_finish_queue ==
false)
@@ -296,7 +214,7 @@ public:
for(int y = tile.y; y < tile.y +
tile.h; y++) {
for(int x = tile.x; x < tile.x
+ tile.w; x++) {
-
kernel_cpu_path_trace(&kg, render_buffer, rng_state,
+ path_trace_kernel(&kg,
render_buffer, rng_state,
sample, x, y,
tile.offset, tile.stride);
}
}
@@ -305,7 +223,7 @@ public:
task.update_progress(&tile);
}
- }
+
task.release_tile(tile);
@@ -325,110 +243,74 @@ public:
float sample_scale = 1.0f/(task.sample + 1);
if(task.rgba_half) {
+ void(*convert_to_half_float_kernel)(KernelGlobals *,
uchar4 *, float *, float, int, int, int, int);
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
- if(system_cpu_support_avx2()) {
- for(int y = task.y; y < task.y + task.h; y++)
- for(int x = task.x; x < task.x +
task.w; x++)
-
kernel_cpu_avx2_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half,
(float*)task.buffer,
-
sample_scale, x, y, task.offset,
task.stride);
- }
+ if(system_cpu_support_avx2())
+ convert_to_half_float_kernel =
kernel_cpu_avx2_convert_to_half_float;
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
- if(system_cpu_support_avx()) {
+ if(system_cpu_support_avx())
for(int y = task.y; y < task.y + task.h; y++)
- for(int x = task.x; x < task.x +
task.w; x++)
-
kernel_cpu_avx_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half,
(float*)task.buffer,
- sample_scale, x, y,
task.offset, task.stride);
- }
+ convert_to_half_float_kernel =
kernel_cpu_avx_convert_to_half_float;
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
- if(system_cpu_support_sse41()) {
- for(int y = task.y; y < task.y + task.h; y++)
- for(int x = task.x; x < task.x +
task.w; x++)
-
kernel_cpu_sse41_convert_to_half_float(&kernel_globals,
(uchar4*)task.rgba_half, (float*)task.buffer,
- sample_scale, x, y,
task.offset, task.stride);
- }
+ if(system_cpu_support_sse41())
+ convert_to_half_float_kernel =
kernel_cpu_sse41_convert_to_half_float;
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
- if(system_cpu_support_sse3()) {
- for(int y = task.y; y < task.y + task.h; y++)
- for(int x = task.x; x < task.x +
task.w; x++)
-
kernel_cpu_sse3_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half,
(float*)task.buffer,
- sample_scale, x, y,
task.offset, task.stride);
- }
+ if(system_cpu_support_sse3())
+ convert_to_half_float_kernel =
kernel_cpu_sse3_convert_to_half_float;
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
if(system_cpu_support_sse2()) {
- for(int y = task.y; y < task.y + task.h; y++)
- for(int x = task.x; x < task.x +
task.w; x++)
-
kernel_cpu_sse2_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half,
(float*)task.buffer,
- sample_scale, x, y,
task.offset, task.stride);
- }
+ convert_to_half_float_kernel =
kernel_cpu_sse2_convert_to_half_float;
else
#endif
- {
- for(int y = task.y; y < task.y + task.h; y++)
- for(int x = task.x; x < task.x +
task.w; x++)
-
kernel_cpu_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half,
(float*)task.buffer,
- sample_scale, x, y,
task.offset, task.stride);
- }
+ convert_to_half_float_kernel =
kernel_cpu_convert_to_half_float;
+
+ for(int y = task.y; y < task.y + task.h; y++)
+ for(int x = task.x; x < task.x + task.w; x++)
+
convert_to_half_float_kernel(&kernel_globals, (uchar4*)task.rgba_half,
(float*)task.buffer,
+ sample_scale, x, y,
task.offset, task.stride);
}
else {
+ void(*convert_to_byte_kernel)(KernelGlobals *, uchar4
*, float *, float, int, int, int, int);
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
- if(system_cpu_support_avx2()) {
- for(int y = task.y; y < task.y + task.h; y++)
- for(int x = task.x; x < task.x +
task.w; x++)
-
kernel_cpu_avx2_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte,
(float*)task.buffer,
-
sample_scale, x, y, task.offset, task.stride);
- }
+ if(system_cpu_support_avx2())
+ convert_to_byte_kernel =
kernel_cpu_avx2_convert_to_byte;
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
- if(system_cpu_support_avx()) {
- for(int y = task.y; y < task.y + task.h; y++)
- for(int x = task.x; x < task.x +
task.w; x++)
-
kernel_cpu_avx_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte,
(float*)task.buffer,
- sample_scale, x, y,
task.offset, task.stride);
- }
+ if(system_cpu_support_avx())
+ convert_to_byte_kernel =
kernel_cpu_avx_convert_to_byte;
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
- if(system_cpu_support_sse41()) {
- for(int y = task.y; y < task.y + task.h; y++)
- for(int x = task.x; x < task.x +
task.w; x++)
-
kernel_cpu_sse41_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte,
(float*)task.buffer,
- sample_scale, x, y,
task.offset, task.stride);
- }
+ if(system_cpu_support_sse41())
+ convert_to_byte_kernel =
kernel_cpu_sse41_convert_to_byte;
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
- if(system_cpu_support_sse3()) {
- for(int y = task.y; y < task.y + task.h; y++)
- for(int x = task.x; x < task.x +
task.w; x++)
-
kernel_cpu_sse3_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte,
(float*)task.buffer,
- sample_scale, x, y,
task.offset, task.stride);
- }
+ if(system_cpu_support_sse3())
+ convert_to_byte_kernel =
kernel_cpu_sse3_convert_to_byte;
@@ Diff output truncated at 10240 characters. @@
_______________________________________________
Bf-blender-cvs mailing list
[email protected]
http://lists.blender.org/mailman/listinfo/bf-blender-cvs