Revision: 41882
          
http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-blender&revision=41882
Author:   blendix
Date:     2011-11-15 15:13:38 +0000 (Tue, 15 Nov 2011)
Log Message:
-----------
Fix #29259: cycles issues on certain processors. Now two versions of the kernel
are compiled, one SSE optimized and the other not, and it will choose between
them at runtime.

Modified Paths:
--------------
    trunk/blender/intern/cycles/CMakeLists.txt
    trunk/blender/intern/cycles/SConscript
    trunk/blender/intern/cycles/device/device_cpu.cpp
    trunk/blender/intern/cycles/kernel/CMakeLists.txt
    trunk/blender/intern/cycles/kernel/kernel.h
    trunk/blender/intern/cycles/util/util_system.cpp
    trunk/blender/intern/cycles/util/util_system.h

Added Paths:
-----------
    trunk/blender/intern/cycles/kernel/kernel_optimized.cpp

Modified: trunk/blender/intern/cycles/CMakeLists.txt
===================================================================
--- trunk/blender/intern/cycles/CMakeLists.txt  2011-11-15 14:58:14 UTC (rev 
41881)
+++ trunk/blender/intern/cycles/CMakeLists.txt  2011-11-15 15:13:38 UTC (rev 
41882)
@@ -9,32 +9,19 @@
 # Build Flags
 
 if(WITH_RAYOPTIMIZATION AND SUPPORT_SSE_BUILD)
-       set(GCC_OPTIM_FLAGS "-ffast-math -msse -msse2 -msse3")
-endif()
+       set(WITH_CYCLES_OPTIMIZED_KERNEL ON)
 
-if(APPLE)
-       set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GCC_OPTIM_FLAGS}")
-       set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
-endif()
-
-if(WIN32)
-       if(MSVC)
-               set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Ox /Ot /arch:SSE2 
-D_CRT_SECURE_NO_WARNINGS /EHsc /fp:fast")
-               set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
+       if(WIN32 AND MSVC)
+               set(CYCLES_OPTIMIZED_KERNEL_FLAGS "/Ox /Ot /arch:SSE2 
-D_CRT_SECURE_NO_WARNINGS /EHsc /fp:fast")
        elseif(CMAKE_COMPILER_IS_GNUCC)
-               set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GCC_OPTIM_FLAGS}")
-               set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI 
-DBOOST_NO_TYPEID")
+               set(CYCLES_OPTIMIZED_KERNEL_FLAGS "-ffast-math -msse -msse2 
-msse3 -DGOGOGO")
        endif()
 endif()
 
-if(UNIX AND NOT APPLE)
-       set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GCC_OPTIM_FLAGS}")
-       set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
-endif()
+# for OSL, not needed yet
+# set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
+# set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
 
-# not needed yet, is for open shading language
-set(RTTI_DISABLE_FLAGS "")
-
 # Definitions and Includes
 
 add_definitions(${BOOST_DEFINITIONS} ${OPENIMAGEIO_DEFINITIONS})
@@ -42,6 +29,10 @@
 add_definitions(-DCCL_NAMESPACE_BEGIN=namespace\ ccl\ {)
 add_definitions(-DCCL_NAMESPACE_END=})
 
+if(WITH_CYCLES_OPTIMIZED_KERNEL)
+       add_definitions(-DWITH_OPTIMIZED_KERNEL)
+endif()
+
 if(WITH_CYCLES_NETWORK)
   add_definitions(-DWITH_NETWORK)
 endif()

Modified: trunk/blender/intern/cycles/SConscript
===================================================================
--- trunk/blender/intern/cycles/SConscript      2011-11-15 14:58:14 UTC (rev 
41881)
+++ trunk/blender/intern/cycles/SConscript      2011-11-15 15:13:38 UTC (rev 
41882)
@@ -10,11 +10,10 @@
 
 sources.remove(path.join('util', 'util_view.cpp'))
 sources.remove(path.join('render', 'film_response.cpp'))
+sources.remove(path.join('kernel', 'kernel_optimized.cpp'))
 
 incs = [] 
 defs = []
-ccflags = []
-cxxflags = []
 
 defs.append('CCL_NAMESPACE_BEGIN=namespace ccl {')
 defs.append('CCL_NAMESPACE_END=}')
@@ -23,14 +22,6 @@
 defs.append('WITH_MULTI')
 defs.append('WITH_CUDA')
 
-if env['OURPLATFORM'] in ('win32-mingw'):
-    if env['WITH_BF_RAYOPTIMIZATION']:
-        cxxflags.append('-ffast-math -msse -msse2 -msse3'.split())
-        ccflags.append('-ffast-math -msse -msse2 -msse3'.split())
-    # not needed yet, is for open shading language
-    # cxxflags.append('-fno-rtti'.split())
-    # defs.append('BOOST_NO_RTTI BOOST_NO_TYPEID'.split())
-
 incs.extend('. bvh render device kernel kernel/osl kernel/svm util 
subd'.split())
 incs.extend('#intern/guardedalloc #source/blender/makesrna 
#source/blender/makesdna'.split())
 incs.extend('#source/blender/blenloader 
../../source/blender/makesrna/intern'.split())
@@ -39,5 +30,20 @@
 incs.append(cycles['BF_BOOST_INC'])
 incs.append(cycles['BF_PYTHON_INC'])
 
-cycles.BlenderLib('bf_intern_cycles', sources, incs, defs, libtype=['intern'], 
priority=[0], compileflags=[None], cc_compileflags=ccflags, 
cxx_compileflags=cxxflags)
+# optimized kernel
+if env['WITH_BF_RAYOPTIMIZATION']:
+    optim_cxxflags = []
 
+    if env['OURPLATFORM'] in ('win32-vc', 'win64-vc'):
+        optim_cxxflags.append('/Ox /Ot /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS 
/EHsc /fp:fast'.split())
+    else:
+        optim_cxxflags.append('-ffast-math -msse -msse2 -msse3'.split())
+    
+    optim_defs = defs + ['WITH_OPTIMIZED_KERNEL']
+    optim_sources = [path.join('kernel', 'kernel_optimized.cpp')]
+
+    cycles_optim = cycles.Clone()
+    cycles_optim.BlenderLib('bf_intern_cycles_optimized', optim_sources, incs, 
optim_defs, libtype=['intern'], priority=[0], compileflags=[None], 
cxx_compileflags=optim_cxxflags)
+
+cycles.BlenderLib('bf_intern_cycles', sources, incs, defs, libtype=['intern'], 
priority=[0], compileflags=[None])
+

Modified: trunk/blender/intern/cycles/device/device_cpu.cpp
===================================================================
--- trunk/blender/intern/cycles/device/device_cpu.cpp   2011-11-15 14:58:14 UTC 
(rev 41881)
+++ trunk/blender/intern/cycles/device/device_cpu.cpp   2011-11-15 15:13:38 UTC 
(rev 41882)
@@ -48,6 +48,9 @@
        {
                kg = kernel_globals_create();
 
+               /* do now to avoid thread issues */
+               system_cpu_support_optimized();
+
                if(threads_num == 0)
                        threads_num = system_cpu_thread_count();
 
@@ -155,14 +158,28 @@
                        OSLShader::thread_init(kg);
 #endif
 
-               for(int y = task.y; y < task.y + task.h; y++) {
-                       for(int x = task.x; x < task.x + task.w; x++)
-                               kernel_cpu_path_trace(kg, (float4*)task.buffer, 
(unsigned int*)task.rng_state, task.sample, x, y);
+#ifdef WITH_OPTIMIZED_KERNEL
+               if(system_cpu_support_optimized()) {
+                       for(int y = task.y; y < task.y + task.h; y++) {
+                               for(int x = task.x; x < task.x + task.w; x++)
+                                       kernel_cpu_optimized_path_trace(kg, 
(float4*)task.buffer, (unsigned int*)task.rng_state, task.sample, x, y);
 
-                       if(tasks.worker_cancel())
-                               break;
+                               if(tasks.worker_cancel())
+                                       break;
+                       }
                }
+               else
+#endif
+               {
+                       for(int y = task.y; y < task.y + task.h; y++) {
+                               for(int x = task.x; x < task.x + task.w; x++)
+                                       kernel_cpu_path_trace(kg, 
(float4*)task.buffer, (unsigned int*)task.rng_state, task.sample, x, y);
 
+                               if(tasks.worker_cancel())
+                                       break;
+                       }
+               }
+
 #ifdef WITH_OSL
                if(kernel_osl_use(kg))
                        OSLShader::thread_free(kg);
@@ -171,10 +188,19 @@
 
        void thread_tonemap(DeviceTask& task)
        {
-               for(int y = task.y; y < task.y + task.h; y++) {
-                       for(int x = task.x; x < task.x + task.w; x++)
-                               kernel_cpu_tonemap(kg, (uchar4*)task.rgba, 
(float4*)task.buffer, task.sample, task.resolution, x, y);
+#ifdef WITH_OPTIMIZED_KERNEL
+               if(system_cpu_support_optimized()) {
+                       for(int y = task.y; y < task.y + task.h; y++)
+                               for(int x = task.x; x < task.x + task.w; x++)
+                                       kernel_cpu_optimized_tonemap(kg, 
(uchar4*)task.rgba, (float4*)task.buffer, task.sample, task.resolution, x, y);
                }
+               else
+#endif
+               {
+                       for(int y = task.y; y < task.y + task.h; y++)
+                               for(int x = task.x; x < task.x + task.w; x++)
+                                       kernel_cpu_tonemap(kg, 
(uchar4*)task.rgba, (float4*)task.buffer, task.sample, task.resolution, x, y);
+               }
        }
 
        void thread_displace(DeviceTask& task)
@@ -184,13 +210,26 @@
                        OSLShader::thread_init(kg);
 #endif
 
-               for(int x = task.displace_x; x < task.displace_x + 
task.displace_w; x++) {
-                       kernel_cpu_displace(kg, (uint4*)task.displace_input, 
(float3*)task.displace_offset, x);
+#ifdef WITH_OPTIMIZED_KERNEL
+               if(system_cpu_support_optimized()) {
+                       for(int x = task.displace_x; x < task.displace_x + 
task.displace_w; x++) {
+                               kernel_cpu_optimized_displace(kg, 
(uint4*)task.displace_input, (float3*)task.displace_offset, x);
 
-                       if(tasks.worker_cancel())
-                               break;
+                               if(tasks.worker_cancel())
+                                       break;
+                       }
                }
+               else
+#endif
+               {
+                       for(int x = task.displace_x; x < task.displace_x + 
task.displace_w; x++) {
+                               kernel_cpu_displace(kg, 
(uint4*)task.displace_input, (float3*)task.displace_offset, x);
 
+                               if(tasks.worker_cancel())
+                                       break;
+                       }
+               }
+
 #ifdef WITH_OSL
                if(kernel_osl_use(kg))
                        OSLShader::thread_free(kg);

Modified: trunk/blender/intern/cycles/kernel/CMakeLists.txt
===================================================================
--- trunk/blender/intern/cycles/kernel/CMakeLists.txt   2011-11-15 14:58:14 UTC 
(rev 41881)
+++ trunk/blender/intern/cycles/kernel/CMakeLists.txt   2011-11-15 15:13:38 UTC 
(rev 41882)
@@ -8,6 +8,7 @@
 
 set(SRC
        kernel.cpp
+       kernel_optimized.cpp
        kernel.cl
        kernel.cu
 )
@@ -123,11 +124,15 @@
 
 add_library(cycles_kernel ${SRC} ${SRC_HEADERS} ${SRC_SVM_HEADERS})
 
+if(WITH_CYCLES_OPTIMIZED_KERNEL)
+       SET_SOURCE_FILES_PROPERTIES(kernel_optimized.cpp PROPERTIES 
COMPILE_FLAGS ${CYCLES_OPTIMIZED_KERNEL_FLAGS})
+endif()
+
 if(WITH_CYCLES_CUDA)
        add_dependencies(cycles_kernel cycles_kernel_cuda)
 endif()
 
-# OPENCL kernel
+# OpenCL kernel
 
 #set(KERNEL_PREPROCESSED ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl)
 #add_custom_command(
@@ -142,3 +147,4 @@
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" 
${CYCLES_INSTALL_PATH}/kernel)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SVM_HEADERS}" 
${CYCLES_INSTALL_PATH}/kernel/svm)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_UTIL_HEADERS}" 
${CYCLES_INSTALL_PATH}/kernel)
+

Modified: trunk/blender/intern/cycles/kernel/kernel.h
===================================================================
--- trunk/blender/intern/cycles/kernel/kernel.h 2011-11-15 14:58:14 UTC (rev 
41881)
+++ trunk/blender/intern/cycles/kernel/kernel.h 2011-11-15 15:13:38 UTC (rev 
41882)
@@ -38,9 +38,14 @@
 
 void kernel_cpu_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int 
*rng_state, int sample, int x, int y);
 void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int 
sample, int resolution, int x, int y);
-
 void kernel_cpu_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int 
i);
 
+#ifdef WITH_OPTIMIZED_KERNEL
+void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float4 *buffer, 
unsigned int *rng_state, int sample, int x, int y);
+void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 
*buffer, int sample, int resolution, int x, int y);
+void kernel_cpu_optimized_displace(KernelGlobals *kg, uint4 *input, float3 
*offset, int i);
+#endif
+
 CCL_NAMESPACE_END
 
 #endif /* __KERNEL_H__ */

Copied: trunk/blender/intern/cycles/kernel/kernel_optimized.cpp (from rev 
41876, trunk/blender/intern/cycles/kernel/kernel.cpp)
===================================================================
--- trunk/blender/intern/cycles/kernel/kernel_optimized.cpp                     
        (rev 0)
+++ trunk/blender/intern/cycles/kernel/kernel_optimized.cpp     2011-11-15 
15:13:38 UTC (rev 41882)
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License

@@ Diff output truncated at 10240 characters. @@
_______________________________________________
Bf-blender-cvs mailing list
[email protected]
http://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to