Commit: 8905c5c87495ad8fd876de83a968d79ac54f0e17
Author: Hristo Gueorguiev
Date:   Sat Oct 22 23:38:42 2016 +0200
Branches: master
https://developer.blender.org/rB8905c5c87495ad8fd876de83a968d79ac54f0e17

Cycles: OpenCL 3d textures support.

Note that volume rendering is not supported yet, this is a step towards that.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D2299

===================================================================

M       intern/cycles/kernel/CMakeLists.txt
M       intern/cycles/kernel/geom/geom_volume.h
A       intern/cycles/kernel/kernel_image_opencl.h
M       intern/cycles/kernel/kernels/opencl/kernel.cl
M       intern/cycles/kernel/split/kernel_split_common.h
M       intern/cycles/kernel/svm/svm_image.h
M       intern/cycles/kernel/svm/svm_voxel.h
M       intern/cycles/render/image.cpp

===================================================================

diff --git a/intern/cycles/kernel/CMakeLists.txt 
b/intern/cycles/kernel/CMakeLists.txt
index e4341c8..694f19a 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -57,6 +57,7 @@ set(SRC_HEADERS
        kernel_emission.h
        kernel_film.h
        kernel_globals.h
+       kernel_image_opencl.h
        kernel_jitter.h
        kernel_light.h
        kernel_math.h
diff --git a/intern/cycles/kernel/geom/geom_volume.h 
b/intern/cycles/kernel/geom/geom_volume.h
index fd97a63..03724c95 100644
--- a/intern/cycles/kernel/geom/geom_volume.h
+++ b/intern/cycles/kernel/geom/geom_volume.h
@@ -29,7 +29,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Return position normalized to 0..1 in mesh bounds */
 
-#if defined(__KERNEL_GPU__) && __CUDA_ARCH__ < 300
+#if defined(__KERNEL_CUDA__) && __CUDA_ARCH__ < 300
 ccl_device float4 volume_image_texture_3d(int id, float x, float y, float z)
 {
        float4 r;
@@ -42,7 +42,7 @@ ccl_device float4 volume_image_texture_3d(int id, float x, 
float y, float z)
        }
        return r;
 }
-#endif  /* __KERNEL_GPU__ */
+#endif  /* __KERNEL_CUDA__ */
 
 ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg,
                                                     const ShaderData *sd,
@@ -64,8 +64,8 @@ ccl_device_inline float3 
volume_normalized_position(KernelGlobals *kg,
 
 ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData 
*sd, const AttributeDescriptor desc, float *dx, float *dy)
 {
-       float3 P = volume_normalized_position(kg, sd, sd->P);
-#ifdef __KERNEL_GPU__
+       float3 P = volume_normalized_position(kg, sd, ccl_fetch(sd, P));
+#ifdef __KERNEL_CUDA__
 #  if __CUDA_ARCH__ >= 300
        CUtexObject tex = kernel_tex_fetch(__bindless_mapping, desc.offset);
        float f = kernel_tex_image_interp_3d_float(tex, P.x, P.y, P.z);
@@ -73,6 +73,8 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, 
const ShaderData *sd,
 #  else
        float4 r = volume_image_texture_3d(desc.offset, P.x, P.y, P.z);
 #  endif
+#elif defined(__KERNEL_OPENCL__)
+       float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z);
 #else
        float4 r;
        if(sd->flag & SD_VOLUME_CUBIC)
@@ -89,14 +91,16 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, 
const ShaderData *sd,
 
 ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData 
*sd, const AttributeDescriptor desc, float3 *dx, float3 *dy)
 {
-       float3 P = volume_normalized_position(kg, sd, sd->P);
-#ifdef __KERNEL_GPU__
+       float3 P = volume_normalized_position(kg, sd, ccl_fetch(sd, P));
+#ifdef __KERNEL_CUDA__
 #  if __CUDA_ARCH__ >= 300
        CUtexObject tex = kernel_tex_fetch(__bindless_mapping, desc.offset);
        float4 r = kernel_tex_image_interp_3d_float4(tex, P.x, P.y, P.z);
 #  else
        float4 r = volume_image_texture_3d(desc.offset, P.x, P.y, P.z);
 #  endif
+#elif defined(__KERNEL_OPENCL__)
+       float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z);
 #else
        float4 r;
        if(sd->flag & SD_VOLUME_CUBIC)
diff --git a/intern/cycles/kernel/kernel_image_opencl.h 
b/intern/cycles/kernel/kernel_image_opencl.h
new file mode 100644
index 0000000..f6c31b2
--- /dev/null
+++ b/intern/cycles/kernel/kernel_image_opencl.h
@@ -0,0 +1,227 @@
+/*
+ * Copyright 2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/* For OpenCL all images are packed in a single array, and we do manual lookup
+ * and interpolation. */
+
+ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int 
offset)
+{
+       /* Float4 */
+       if(id < TEX_START_BYTE4_OPENCL) {
+               return kernel_tex_fetch(__tex_image_float4_packed, offset);
+       }
+       /* Byte4 */
+       else if(id < TEX_START_FLOAT_OPENCL) {
+               uchar4 r = kernel_tex_fetch(__tex_image_byte4_packed, offset);
+               float f = 1.0f/255.0f;
+               return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
+       }
+       /* Float */
+       else if(id < TEX_START_BYTE_OPENCL) {
+               float f = kernel_tex_fetch(__tex_image_float_packed, offset);
+               return make_float4(f, f, f, 1.0f);
+       }
+       /* Byte */
+       else {
+               uchar r = kernel_tex_fetch(__tex_image_byte_packed, offset);
+               float f = r * (1.0f/255.0f);
+               return make_float4(f, f, f, 1.0f);
+       }
+}
+
+ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width)
+{
+       x %= width;
+       if(x < 0)
+               x += width;
+       return x;
+}
+
+ccl_device_inline int svm_image_texture_wrap_clamp(int x, int width)
+{
+       return clamp(x, 0, width-1);
+}
+
+ccl_device_inline float svm_image_texture_frac(float x, int *ix)
+{
+       int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
+       *ix = i;
+       return x - (float)i;
+}
+
+ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, 
float y)
+{
+       uint4 info = kernel_tex_fetch(__tex_image_packed_info, id*2);
+       uint width = info.x;
+       uint height = info.y;
+       uint offset = info.z;
+
+       /* Image Options */
+       uint interpolation = (info.w & (1 << 0)) ? INTERPOLATION_CLOSEST : 
INTERPOLATION_LINEAR;
+       uint extension;
+       if(info.w & (1 << 1))
+               extension = EXTENSION_REPEAT;
+       else if(info.w & (1 << 2))
+               extension = EXTENSION_EXTEND;
+       else
+               extension = EXTENSION_CLIP;
+
+       float4 r;
+       int ix, iy, nix, niy;
+       if(interpolation == INTERPOLATION_CLOSEST) {
+               svm_image_texture_frac(x*width, &ix);
+               svm_image_texture_frac(y*height, &iy);
+
+               if(extension == EXTENSION_REPEAT) {
+                       ix = svm_image_texture_wrap_periodic(ix, width);
+                       iy = svm_image_texture_wrap_periodic(iy, height);
+               }
+               else {
+                       if(extension == EXTENSION_CLIP) {
+                               if(x < 0.0f || y < 0.0f || x > 1.0f || y > 
1.0f) {
+                                       return make_float4(0.0f, 0.0f, 0.0f, 
0.0f);
+                               }
+                       }
+                       /* Fall through. */
+                       /* EXTENSION_EXTEND */
+                       ix = svm_image_texture_wrap_clamp(ix, width);
+                       iy = svm_image_texture_wrap_clamp(iy, height);
+               }
+
+               r = svm_image_texture_read(kg, id, offset + ix + iy*width);
+       }
+       else { /* INTERPOLATION_LINEAR */
+               float tx = svm_image_texture_frac(x*width - 0.5f, &ix);
+               float ty = svm_image_texture_frac(y*height - 0.5f, &iy);
+
+               if(extension == EXTENSION_REPEAT) {
+                       ix = svm_image_texture_wrap_periodic(ix, width);
+                       iy = svm_image_texture_wrap_periodic(iy, height);
+
+                       nix = svm_image_texture_wrap_periodic(ix+1, width);
+                       niy = svm_image_texture_wrap_periodic(iy+1, height);
+               }
+               else {
+                       if(extension == EXTENSION_CLIP) {
+                               if(x < 0.0f || y < 0.0f || x > 1.0f || y > 
1.0f) {
+                                       return make_float4(0.0f, 0.0f, 0.0f, 
0.0f);
+                               }
+                       }
+                       nix = svm_image_texture_wrap_clamp(ix+1, width);
+                       niy = svm_image_texture_wrap_clamp(iy+1, height);
+                       ix = svm_image_texture_wrap_clamp(ix, width);
+                       iy = svm_image_texture_wrap_clamp(iy, height);
+               }
+
+               r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, 
offset + ix + iy*width);
+               r += (1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix 
+ iy*width);
+               r += ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix 
+ niy*width);
+               r += ty*tx*svm_image_texture_read(kg, id, offset + nix + 
niy*width);
+       }
+
+       return r;
+}
+
+
+ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float 
x, float y, float z)
+{
+       uint4 info = kernel_tex_fetch(__tex_image_packed_info, id*2);
+       uint width = info.x;
+       uint height = info.y;
+       uint offset = info.z;
+       uint depth = kernel_tex_fetch(__tex_image_packed_info, id*2+1).x;
+
+       /* Image Options */
+       uint interpolation = (info.w & (1 << 0)) ? INTERPOLATION_CLOSEST : 
INTERPOLATION_LINEAR;
+       uint extension;
+       if(info.w & (1 << 1))
+               extension = EXTENSION_REPEAT;
+       else if(info.w & (1 << 2))
+               extension = EXTENSION_EXTEND;
+       else
+               extension = EXTENSION_CLIP;
+
+       float4 r;
+       int ix, iy, iz, nix, niy, niz;
+       if(interpolation == INTERPOLATION_CLOSEST) {
+               svm_image_texture_frac(x*width, &ix);
+               svm_image_texture_frac(y*height, &iy);
+               svm_image_texture_frac(z*depth, &iz);
+
+               if (extension == EXTENSION_REPEAT) {
+                       ix = svm_image_texture_wrap_periodic(ix, width);
+                       iy = svm_image_texture_wrap_periodic(iy, height);
+                       iz = svm_image_texture_wrap_periodic(iz, depth);
+               }
+               else {
+                       if (extension == EXTENSION_CLIP) {
+                               if(x < 0.0f || y < 0.0f || z < 0.0f ||
+                                  x > 1.0f || y > 1.0f || z > 1.0f)
+                                       return make_float4(0.0f, 0.0f, 0.0f, 
0.0f);
+                       }
+                       /* Fall through. */
+                       /* EXTENSION_EXTEND */
+                       ix = svm_image_texture_wrap_clamp(ix, width);
+                       iy = svm_image_texture_wrap_clamp(iy, height);
+                       iz = svm_image_texture_wrap_clamp(iz, depth);
+               }
+               r = svm_image_texture_read(kg, id, offset + ix + iy*width + 
iz*width*height);
+       }
+       else { /* INTERPOLATION_LINEAR */
+               float tx = svm_image_texture_frac(x*(float)width - 0.5f, &ix);
+               float ty = svm_image_texture_frac(y*(float)height - 0.5f, &iy);
+               float tz = svm_image_texture_frac(z*(float)depth - 0.5f, &iz);
+
+               if(extension == EXTENSION_REPEAT) {
+                       ix = svm_image_texture_wrap_periodic(ix, width);
+                       iy = svm_image_texture_wrap_periodic(iy, height);
+                       iz = svm_image_texture_wrap_periodic(iz, depth);
+
+                       nix = svm_image_texture_wrap_periodic(ix+1, width);
+                       niy = svm_image_texture_wrap_periodic(iy+1, height);
+                       niz = svm_image_texture_wrap_periodic(iz+1, depth);
+               }
+               else {
+                       if (extension == EXTENSION_CLIP)
+                               if(x < 0.0f || y < 0.0f || z < 0.0f ||
+                                  x > 1.0f || y > 1.0f || z > 1.0f)
+                                       return make_float4(0.0f, 0.0f, 0.0f, 
0.0f);
+                       /* Fall through. */
+                       /*  EXTENSION_EXTEND */
+                       nix = svm_image_texture_wrap_clamp(ix+1, width);
+                       niy = svm_image_texture_wrap_clamp(iy+1, height);
+                       niz = svm_image_texture_wrap_clamp(iz+1, depth);
+
+                       ix = svm_image_texture_wrap_clamp(ix, width);
+                       iy = svm_image_texture_wrap_clamp(iy, height);
+                       iz = svm_image_texture_wrap_clamp(iz, depth);
+               }
+
+               r  = (1.0f - tz)*(1.0f - ty)*(1.0f - 
tx)*svm_image_texture_read(kg, id, offset + ix + iy*width + iz*width*height);
+               r += (1.0f - tz)*(1.0f - ty)*tx*svm_image_t

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to