Commit: ef7d3af938514d4dfb7d7c2376c8fa6720c45279 Author: Thomas Dinges Date: Tue Aug 9 11:33:42 2016 +0200 Branches: soc-2016-cycles_images https://developer.blender.org/rBef7d3af938514d4dfb7d7c2376c8fa6720c45279
Half Floats: Initial support for CUDA. This is not working yet, due to an error on rendertime, I assume it's an error on reading the texture. If I render a scene, with the half float texture outside of the camera, it works. * Enable half floats for Kepler cards (1024 for half and 1024 for half4) * Use proper half data type for CUDA, don't declare our own. * Change __float2half_rn to __float2half, which should be the same according to http://stackoverflow.com/questions/35198856/half-precision-difference-between-float2half-vs-float2half-rn =================================================================== M intern/cycles/device/device_cuda.cpp M intern/cycles/kernel/kernel_compat_cuda.h M intern/cycles/kernel/svm/svm_image.h M intern/cycles/render/image.cpp M intern/cycles/util/util_half.h M intern/cycles/util/util_texture.h =================================================================== diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 6a511ea..331857a 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -576,6 +576,7 @@ public: case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break; case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break; case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break; + case TYPE_HALF: format = CU_AD_FORMAT_HALF; break; default: assert(0); return; } diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index 063220b..d656fac 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -31,6 +31,7 @@ #endif #include <cuda.h> +#include <cuda_fp16.h> #include <float.h> /* Qualifier wrappers for different names on different devices */ diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index b6b90df..d1bfcaa 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN /* Float4 textures on various devices. */ #if defined(__KERNEL_CPU__) -# define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_CPU +# define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_CPU #elif defined(__KERNEL_CUDA__) # if __CUDA_ARCH__ < 300 # define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_CUDA @@ -277,8 +277,21 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, } # else CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id); - if(id < 2048) /* TODO(dingto): Make this a variable */ + + /* Float and Byte, 4 components */ + if(id < TEX_START_FLOAT_CUDA_KEPLER) r = kernel_tex_image_interp_float4(tex, x, y); + /* Float and Byte, 1 component */ + else if(id < TEX_START_HALF4_CUDA_KEPLER) { + float f = kernel_tex_image_interp_float(tex, x, y); + r = make_float4(f, f, f, 1.0); + } + /* Half Float, 4 components */ + else if (id < TEX_START_HALF_CUDA_KEPLER){ + /* TODO(dingto): proper tex call here. */ + r = kernel_tex_image_interp_float4(tex, x, y); + } + /* Half Float, 1 component */ else { float f = kernel_tex_image_interp_float(tex, x, y); r = make_float4(f, f, f, 1.0); @@ -465,7 +478,7 @@ ccl_device void svm_node_tex_environment(KernelGlobals *kg, ShaderData *sd, floa float2 uv; co = normalize(co); - + if(projection == 0) uv = direction_to_equirectangular(co); else diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index 614620c..1adf510 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -216,7 +216,7 @@ ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filen } /* We use a consecutive slot counting scheme on the devices, in order - * float4, byte4, float, byte. + * float4, byte4, float, byte, half4 and half. * These functions convert the slot ids from ImageManager "images" ones * to device ones and vice versa. */ int ImageManager::type_index_to_flattened_slot(int slot, ImageDataType type) diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h index ae85ab3..7aa53e0 100644 --- a/intern/cycles/util/util_half.h +++ b/intern/cycles/util/util_half.h @@ -33,17 +33,20 @@ CCL_NAMESPACE_BEGIN #else +#ifndef __KERNEL_CUDA__ typedef unsigned short half; +#endif + struct half4 { half x, y, z, w; }; #ifdef __KERNEL_CUDA__ ccl_device_inline void float4_store_half(half *h, float4 f, float scale) { - h[0] = __float2half_rn(f.x * scale); - h[1] = __float2half_rn(f.y * scale); - h[2] = __float2half_rn(f.z * scale); - h[3] = __float2half_rn(f.w * scale); + h[0] = __float2half(f.x * scale); + h[1] = __float2half(f.y * scale); + h[2] = __float2half(f.z * scale); + h[3] = __float2half(f.w * scale); } #else diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h index 2ef4728..dee9c30 100644 --- a/intern/cycles/util/util_texture.h +++ b/intern/cycles/util/util_texture.h @@ -54,8 +54,8 @@ CCL_NAMESPACE_BEGIN #define TEX_NUM_BYTE4_CUDA_KEPLER 1024 #define TEX_NUM_FLOAT_CUDA_KEPLER 1024 #define TEX_NUM_BYTE_CUDA_KEPLER 1024 -#define TEX_NUM_HALF4_CUDA_KEPLER 0 -#define TEX_NUM_HALF_CUDA_KEPLER 0 +#define TEX_NUM_HALF4_CUDA_KEPLER 1024 +#define TEX_NUM_HALF_CUDA_KEPLER 1024 #define TEX_START_FLOAT4_CUDA_KEPLER 0 #define TEX_START_BYTE4_CUDA_KEPLER TEX_NUM_FLOAT4_CUDA_KEPLER #define TEX_START_FLOAT_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER) _______________________________________________ Bf-blender-cvs mailing list [email protected] https://lists.blender.org/mailman/listinfo/bf-blender-cvs
