Commit: a8cc0d707e82ac781f44bf6cd7ed1e8974d8ed39
Author: Brecht Van Lommel
Date:   Wed Aug 2 02:09:08 2017 +0200
Branches: master
https://developer.blender.org/rBa8cc0d707e82ac781f44bf6cd7ed1e8974d8ed39

Code refactor: split defines into separate header, changes to SSE type headers.

I need to use some macros defined in util_simd.h for float3/float4, to emulate
SSE4 instructions on SSE2. But due to issues with order of header includes this
was not possible, this does some refactoring to make it work.

Differential Revision: https://developer.blender.org/D2764

===================================================================

M       intern/cycles/device/device_cpu.cpp
M       intern/cycles/kernel/CMakeLists.txt
M       intern/cycles/util/CMakeLists.txt
A       intern/cycles/util/util_defines.h
M       intern/cycles/util/util_optimization.h
M       intern/cycles/util/util_simd.h
M       intern/cycles/util/util_sseb.h
M       intern/cycles/util/util_ssef.h
M       intern/cycles/util/util_ssei.h
M       intern/cycles/util/util_types.h

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp 
b/intern/cycles/device/device_cpu.cpp
index 18112437b45..a00be3eeaab 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -48,6 +48,7 @@
 #include "util/util_logging.h"
 #include "util/util_map.h"
 #include "util/util_opengl.h"
+#include "util/util_optimization.h"
 #include "util/util_progress.h"
 #include "util/util_system.h"
 #include "util/util_thread.h"
diff --git a/intern/cycles/kernel/CMakeLists.txt 
b/intern/cycles/kernel/CMakeLists.txt
index 23e9bd311c4..88c4c4e3282 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -233,6 +233,7 @@ set(SRC_FILTER_HEADERS
 set(SRC_UTIL_HEADERS
        ../util/util_atomic.h
        ../util/util_color.h
+       ../util/util_defines.h
        ../util/util_half.h
        ../util/util_hash.h
        ../util/util_math.h
diff --git a/intern/cycles/util/CMakeLists.txt 
b/intern/cycles/util/CMakeLists.txt
index 43f9a57d099..7f3747a0f58 100644
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -38,6 +38,7 @@ set(SRC_HEADERS
        util_atomic.h
        util_boundbox.h
        util_debug.h
+       util_defines.h
        util_guarded_allocator.cpp
        util_foreach.h
        util_function.h
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_defines.h
similarity index 51%
copy from intern/cycles/util/util_types.h
copy to intern/cycles/util/util_defines.h
index a5d1d7152d5..d0d87e74332 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_defines.h
@@ -1,5 +1,6 @@
+
 /*
- * Copyright 2011-2013 Blender Foundation
+ * Copyright 2011-2017 Blender Foundation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,12 +15,8 @@
  * limitations under the License.
  */
 
-#ifndef __UTIL_TYPES_H__
-#define __UTIL_TYPES_H__
-
-#ifndef __KERNEL_OPENCL__
-#  include <stdlib.h>
-#endif
+#ifndef __UTIL_DEFINES_H__
+#define __UTIL_DEFINES_H__
 
 /* Bitness */
 
@@ -77,130 +74,6 @@
 #  endif
 #endif  /* __KERNEL_GPU__ */
 
-/* Standard Integer Types */
-
-#ifndef __KERNEL_GPU__
-/* int8_t, uint16_t, and friends */
-#  ifndef _WIN32
-#    include <stdint.h>
-#  endif
-/* SIMD Types */
-#  include "util/util_optimization.h"
-#endif  /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_BEGIN
-
-/* Types
- *
- * Define simpler unsigned type names, and integer with defined number of bits.
- * Also vector types, named to be compatible with OpenCL builtin types, while
- * working for CUDA and C++ too. */
-
-/* Shorter Unsigned Names */
-
-#ifndef __KERNEL_OPENCL__
-typedef unsigned char uchar;
-typedef unsigned int uint;
-typedef unsigned short ushort;
-#endif
-
-/* Fixed Bits Types */
-
-#ifdef __KERNEL_OPENCL__
-typedef ulong uint64_t;
-#endif
-
-#ifndef __KERNEL_GPU__
-#  ifdef _WIN32
-typedef signed char int8_t;
-typedef unsigned char uint8_t;
-
-typedef signed short int16_t;
-typedef unsigned short uint16_t;
-
-typedef signed int int32_t;
-typedef unsigned int uint32_t;
-
-typedef long long int64_t;
-typedef unsigned long long uint64_t;
-#    ifdef __KERNEL_64_BIT__
-typedef int64_t ssize_t;
-#    else
-typedef int32_t ssize_t;
-#    endif
-#  endif  /* _WIN32 */
-
-/* Generic Memory Pointer */
-
-typedef uint64_t device_ptr;
-#endif  /* __KERNEL_GPU__ */
-
-ccl_device_inline size_t align_up(size_t offset, size_t alignment)
-{
-       return (offset + alignment - 1) & ~(alignment - 1);
-}
-
-ccl_device_inline size_t divide_up(size_t x, size_t y)
-{
-       return (x + y - 1) / y;
-}
-
-ccl_device_inline size_t round_up(size_t x, size_t multiple)
-{
-       return ((x + multiple - 1) / multiple) * multiple;
-}
-
-ccl_device_inline size_t round_down(size_t x, size_t multiple)
-{
-       return (x / multiple) * multiple;
-}
-
-/* Interpolation types for textures
- * cuda also use texture space to store other objects */
-enum InterpolationType {
-       INTERPOLATION_NONE = -1,
-       INTERPOLATION_LINEAR = 0,
-       INTERPOLATION_CLOSEST = 1,
-       INTERPOLATION_CUBIC = 2,
-       INTERPOLATION_SMART = 3,
-
-       INTERPOLATION_NUM_TYPES,
-};
-
-/* Texture types
- * Since we store the type in the lower bits of a flat index,
- * the shift and bit mask constant below need to be kept in sync.
- */
-
-enum ImageDataType {
-       IMAGE_DATA_TYPE_FLOAT4 = 0,
-       IMAGE_DATA_TYPE_BYTE4 = 1,
-       IMAGE_DATA_TYPE_HALF4 = 2,
-       IMAGE_DATA_TYPE_FLOAT = 3,
-       IMAGE_DATA_TYPE_BYTE = 4,
-       IMAGE_DATA_TYPE_HALF = 5,
-
-       IMAGE_DATA_NUM_TYPES
-};
-
-#define IMAGE_DATA_TYPE_SHIFT 3
-#define IMAGE_DATA_TYPE_MASK 0x7
-
-/* Extension types for textures.
- *
- * Defines how the image is extrapolated past its original bounds.
- */
-enum ExtensionType {
-       /* Cause the image to repeat horizontally and vertically. */
-       EXTENSION_REPEAT = 0,
-       /* Extend by repeating edge pixels of the image. */
-       EXTENSION_EXTEND = 1,
-       /* Clip to image size and set exterior pixels as transparent. */
-       EXTENSION_CLIP = 2,
-
-       EXTENSION_NUM_TYPES,
-};
-
 /* macros */
 
 /* hints for branch prediction, only use in code that runs a _lot_ */
@@ -250,9 +123,6 @@ template<typename T> static inline T decltype_helper(T x) { 
return x; }
 #define CHECK_TYPE_INLINE(val, type) \
        ((void)(((type)0) != (val)))
 
-
-CCL_NAMESPACE_END
-
 #ifndef __KERNEL_GPU__
 #  include <cassert>
 #  define util_assert(statement)  assert(statement)
@@ -260,43 +130,5 @@ CCL_NAMESPACE_END
 #  define util_assert(statement)
 #endif
 
-/* Vectorized types declaration. */
-#include "util/util_types_uchar2.h"
-#include "util/util_types_uchar3.h"
-#include "util/util_types_uchar4.h"
-
-#include "util/util_types_int2.h"
-#include "util/util_types_int3.h"
-#include "util/util_types_int4.h"
-
-#include "util/util_types_uint2.h"
-#include "util/util_types_uint3.h"
-#include "util/util_types_uint4.h"
-
-#include "util/util_types_float2.h"
-#include "util/util_types_float3.h"
-#include "util/util_types_float4.h"
-
-#include "util/util_types_vector3.h"
-
-/* Vectorized types implementation. */
-#include "util/util_types_uchar2_impl.h"
-#include "util/util_types_uchar3_impl.h"
-#include "util/util_types_uchar4_impl.h"
-
-#include "util/util_types_int2_impl.h"
-#include "util/util_types_int3_impl.h"
-#include "util/util_types_int4_impl.h"
-
-#include "util/util_types_uint2_impl.h"
-#include "util/util_types_uint3_impl.h"
-#include "util/util_types_uint4_impl.h"
-
-#include "util/util_types_float2_impl.h"
-#include "util/util_types_float3_impl.h"
-#include "util/util_types_float4_impl.h"
-
-#include "util/util_types_vector3_impl.h"
-
-#endif /* __UTIL_TYPES_H__ */
+#endif /* __UTIL_DEFINES_H__ */
 
diff --git a/intern/cycles/util/util_optimization.h 
b/intern/cycles/util/util_optimization.h
index 6f70a474fe7..0382c0811dd 100644
--- a/intern/cycles/util/util_optimization.h
+++ b/intern/cycles/util/util_optimization.h
@@ -19,16 +19,6 @@
 
 #ifndef __KERNEL_GPU__
 
-/* quiet unused define warnings */
-#if defined(__KERNEL_SSE2__)  || \
-       defined(__KERNEL_SSE3__)  || \
-       defined(__KERNEL_SSSE3__) || \
-       defined(__KERNEL_SSE41__) || \
-       defined(__KERNEL_AVX__)   || \
-       defined(__KERNEL_AVX2__)
-       /* do nothing */
-#endif
-
 /* x86
  *
  * Compile a regular, SSE2 and SSE3 kernel. */
@@ -73,48 +63,6 @@
 
 #endif  /* defined(__x86_64__) || defined(_M_X64) */
 
-/* SSE Experiment
- *
- * This is disabled code for an experiment to use SSE types globally for types
- * such as float3 and float4. Currently this gives an overall slowdown. */
-
-#if 0
-#  define __KERNEL_SSE__
-#  ifndef __KERNEL_SSE2__
-#    define __KERNEL_SSE2__
-#  endif
-#  ifndef __KERNEL_SSE3__
-#    define __KERNEL_SSE3__
-#  endif
-#  ifndef __KERNEL_SSSE3__
-#    define __KERNEL_SSSE3__
-#  endif
-#  ifndef __KERNEL_SSE4__
-#    define __KERNEL_SSE4__
-#  endif
-#endif
-
-/* SSE Intrinsics includes
- *
- * We assume __KERNEL_SSEX__ flags to have been defined at this point */
-
-/* SSE intrinsics headers */
-#ifndef FREE_WINDOWS64
-
-#ifdef _MSC_VER
-#  include <intrin.h>
-#elif (defined(__x86_64__) || defined(__i386__))
-#  include <x86intrin.h>
-#endif
-
-#else
-
-/* MinGW64 has conflicting declarations for these SSE headers in <windows.h>.
- * Since we can't avoid including <windows.h>, better only include that */
-#include "util/util_windows.h"
-
-#endif
-
 #endif
 
 #endif /* __UTIL_OPTIMIZATION_H__ */
diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h
index 587febe3e52..7d938a0fbca 100644
--- a/intern/cycles/util/util_simd.h
+++ b/intern/cycles/util/util_simd.h
@@ -18,19 +18,38 @@
 #ifndef __UTIL_SIMD_TYPES_H__
 #define __UTIL_SIMD_TYPES_H__
 
+#ifndef __KERNEL_GPU__
+
 #include <limits>
 
 #include "util/util_debug.h"
-#include "util/util_types.h"
+#include "util/util_defines.h"
+
+/* SSE Intrinsics includes
+ *
+ * We assume __KERNEL_SSEX__ flags to have been defined at this point */
+
+/* SSE intrinsics headers */
+#ifndef FREE_WINDOWS64
+
+#ifdef _MSC_VER
+#  include <intrin.h>
+#elif (defined(__x86_64__) || defined(__i386__))
+#  include <x86intrin.h>
+#endif
+
+#else
+
+/* MinGW64 has conflicting declarations for these SSE headers in <windows.h>.
+ * Since we can't avoid including <windows.h>, better only include that */
+#include "util/util_windows.h"
+
+#endif
 
 CCL_NAMESPACE_BEGIN
 
 #ifdef __KERNEL_SSE2__
 
-struct sseb;
-struct ssei;
-struct ssef;
-
 extern const __m128 _mm_lookupmask_ps[16];
 
 /* Special Types */
@@ -496,13 +515,19 @@ ccl_device_inline int bitscan(int value)
 
 #endif /* __KERNEL_SSE2__ */
 
+/* quiet unused define warnings */
+#if defined(__KERNEL_SSE2__)  || \
+       defined(__KERNEL_SSE3__)  || \
+       defined(__KERNEL_SSSE3__) || \
+       defined(__KERNEL_SSE41__) || \
+       defined(__KERNEL_AVX__)   || \
+       defined(__KERNEL_AVX2__)
+       /* do nothing */
+#endif
+
 CCL_NAMESPACE_END
 
-#include "util/util_math.h"
-#include "util/util_sseb.h"
-#include "util/util_ssei.h"
-#include "util/util_ssef.h"
-#include "util/util_avxf.h"
+#endif /* __KERNEL_GPU__ */
 
 #endif /* __UTIL_SIMD_TYPES_H__ */
 
diff --git a/intern/cycles/util/util_sseb.h b/intern/cycles/util/util_sseb.h
index 6e669701f3b..93c22aafdcd 100644
--- a/intern/cycles/util/util_sseb.h
+++ b/intern/cycles/util/util_sseb.h
@@ -22,6 +22,9 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __KERNEL_SSE2__
 
+struct ssei;
+struct ssef;
+
 /*! 4-wide SSE 

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to