Commit: 2b5d60eb2deed64f234a9bafc70ed87d817bc8a9 Author: Sergey Sharybin Date: Wed Dec 30 17:54:02 2015 +0500 Branches: master https://developer.blender.org/rB2b5d60eb2deed64f234a9bafc70ed87d817bc8a9
Cycles: Deduplicte CPU kernel declaration and definition code Main goal is to make kernel signatures editing easier and less prone to the errors caused by missing function signature update or so. This will also make it easier to add new CPU architectures. Reviewers: juicyfruit, dingto, lukasstockner97, brecht Reviewed By: dingto, lukasstockner97, brecht Differential Revision: https://developer.blender.org/D1703 =================================================================== M intern/cycles/kernel/CMakeLists.txt M intern/cycles/kernel/kernel.h M intern/cycles/kernel/kernels/cpu/kernel.cpp M intern/cycles/kernel/kernels/cpu/kernel_avx.cpp M intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp A intern/cycles/kernel/kernels/cpu/kernel_cpu.h A intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h M intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp M intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp M intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp =================================================================== diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 5a7d245..20f2878 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -63,6 +63,9 @@ set(SRC_HEADERS kernel_types.h kernel_volume.h kernel_work_stealing.h + + kernels/cpu/kernel_cpu.h + kernels/cpu/kernel_cpu_impl.h ) set(SRC_CLOSURE_HEADERS diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h index b2596d1..9279a94 100644 --- a/intern/cycles/kernel/kernel.h +++ b/intern/cycles/kernel/kernel.h @@ -23,6 +23,10 @@ CCL_NAMESPACE_BEGIN +#define KERNEL_NAME_JOIN(x, y, z) x ## _ ## y ## _ ## z +#define KERNEL_NAME_EVAL(arch, name) KERNEL_NAME_JOIN(kernel, arch, name) +#define KERNEL_FUNCTION_FULL_NAME(name) KERNEL_NAME_EVAL(KERNEL_ARCH, name) + struct KernelGlobals; KernelGlobals *kernel_globals_create(); @@ -41,69 +45,33 @@ void kernel_tex_copy(KernelGlobals *kg, InterpolationType interpolation=INTERPOLATION_LINEAR, ExtensionType extension = EXTENSION_REPEAT); -void kernel_cpu_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, - int sample, int x, int y, int offset, int stride); -void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, - float sample_scale, int x, int y, int offset, int stride); -void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, - float sample_scale, int x, int y, int offset, int stride); -void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int offset, int sample); +#define KERNEL_ARCH cpu +#include "kernels/cpu/kernel_cpu.h" #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 -void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, - int sample, int x, int y, int offset, int stride); -void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, - float sample_scale, int x, int y, int offset, int stride); -void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, - float sample_scale, int x, int y, int offset, int stride); -void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int offset, int sample); -#endif +# define KERNEL_ARCH cpu_sse2 +# include "kernels/cpu/kernel_cpu.h" +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 -void kernel_cpu_sse3_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, - int sample, int x, int y, int offset, int stride); -void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, - float sample_scale, int x, int y, int offset, int stride); -void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, - float sample_scale, int x, int y, int offset, int stride); -void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int offset, int sample); -#endif +# define KERNEL_ARCH cpu_sse3 +# include "kernels/cpu/kernel_cpu.h" +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 -void kernel_cpu_sse41_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, - int sample, int x, int y, int offset, int stride); -void kernel_cpu_sse41_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, - float sample_scale, int x, int y, int offset, int stride); -void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, - float sample_scale, int x, int y, int offset, int stride); -void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int offset, int sample); -#endif +# define KERNEL_ARCH cpu_sse41 +# include "kernels/cpu/kernel_cpu.h" +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX -void kernel_cpu_avx_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, - int sample, int x, int y, int offset, int stride); -void kernel_cpu_avx_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, - float sample_scale, int x, int y, int offset, int stride); -void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, - float sample_scale, int x, int y, int offset, int stride); -void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int offset, int sample); -#endif +# define KERNEL_ARCH cpu_avx +# include "kernels/cpu/kernel_cpu.h" +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 -void kernel_cpu_avx2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, - int sample, int x, int y, int offset, int stride); -void kernel_cpu_avx2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, - float sample_scale, int x, int y, int offset, int stride); -void kernel_cpu_avx2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, - float sample_scale, int x, int y, int offset, int stride); -void kernel_cpu_avx2_shader(KernelGlobals *kg, uint4 *input, float4 *output, - int type, int i, int offset, int sample); -#endif +# define KERNEL_ARCH cpu_avx2 +# include "kernels/cpu/kernel_cpu.h" +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp index 2c8d350..5c6dc31 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp @@ -16,15 +16,19 @@ /* CPU kernel entry points */ -#include "kernel_compat_cpu.h" +/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this one with SSE2 intrinsics */ +#if defined(__x86_64__) || defined(_M_X64) +#define __KERNEL_SSE2__ +#endif + +/* quiet unused define warnings */ +#if defined(__KERNEL_SSE2__) + /* do nothing */ +#endif + #include "kernel.h" -#include "kernel_math.h" -#include "kernel_types.h" -#include "kernel_globals.h" -#include "kernel_film.h" -#include "kernel_path.h" -#include "kernel_path_branched.h" -#include "kernel_bake.h" +#define KERNEL_ARCH cpu +#include "kernel_cpu_impl.h" CCL_NAMESPACE_BEGIN @@ -94,49 +98,4 @@ void kernel_tex_copy(KernelGlobals *kg, assert(0); } -/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this one with SSE2 intrinsics */ -#if defined(__x86_64__) || defined(_M_X64) -#define __KERNEL_SSE2__ -#endif - -/* quiet unused define warnings */ -#if defined(__KERNEL_SSE2__) - /* do nothing */ -#endif - -/* Path Tracing */ - -void kernel_cpu_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride) -{ -#ifdef __BRANCHED_PATH__ - if(kernel_data.integrator.branched) - kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); - else -#endif - kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); -} - -/* Film */ - -void kernel_cpu_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) -{ - kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); -} - -void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) -{ - kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); -} - -/* Shader Evaluation */ - -void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) -{ - if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); - else - kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); -} - CCL_NAMESPACE_END - diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp index df77bed..bc754f6 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp @@ -30,58 +30,13 @@ #include "util_optimization.h" #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX - -#include "kernel_compat_cpu.h" -#include "kernel.h" -#include "kernel_math.h" -#include "kernel_types.h" -#include "kernel_globals.h" -#include "kernel_film.h" -#include "kernel_path.h" -#include "kernel_path_branched.h" -#include "kernel_bake.h" - -CCL_NAMESPACE_BEGIN - -/* Path Tracing */ - -void kernel_cpu_avx_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride) -{ -#ifdef __BRANCHED_PATH__ - if(kernel_data.integrator.branched) - kernel_branched_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); - else -#endif - kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); -} - -/* Film */ - -void kernel_cpu_avx_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) -{ - kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); -} - -void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, int x, int y, int offset, int stride) -{ - kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); -} - -/* Shader Evaluate */ - -void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i, int offset, int sample) -{ - if(type >= SHADER_EVAL_BAKE) - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i, offset, sample); - else - kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i, sample); -} - -CCL_NAMESPACE_END -#else +# include "kernel.h" +# define KERNEL_ARCH cpu_avx +# include "kernel_cpu_impl.h" +#else /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ /* needed for some linkers in combination with scons making empty compilation unit in a library */ void __dummy_function_cycles_avx(void); void __dummy_function_cycles_avx(void) {} -#endif +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ diff --git a/int @@ Diff output truncated at 10240 characters. @@ _______________________________________________ Bf-blender-cvs mailing list [email protected] http://lists.blender.org/mailman/listinfo/bf-blender-cvs
