Revision: 36795 http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-blender&revision=36795 Author: blendix Date: 2011-05-20 12:26:01 +0000 (Fri, 20 May 2011) Log Message: ----------- Cycles: some steps to getting OpenCL backend to compile.
Modified Paths: -------------- branches/cycles/intern/cycles/device/device_opencl.cpp branches/cycles/intern/cycles/kernel/CMakeLists.txt branches/cycles/intern/cycles/kernel/kernel.cl branches/cycles/intern/cycles/kernel/kernel_compat_cuda.h branches/cycles/intern/cycles/kernel/kernel_compat_opencl.h branches/cycles/intern/cycles/kernel/kernel_globals.h branches/cycles/intern/cycles/kernel/kernel_light.h branches/cycles/intern/cycles/kernel/kernel_triangle.h branches/cycles/intern/cycles/kernel/kernel_types.h branches/cycles/intern/cycles/kernel/svm/bsdf_ashikhmin_velvet.h branches/cycles/intern/cycles/kernel/svm/bsdf_diffuse.h branches/cycles/intern/cycles/kernel/svm/bsdf_microfacet.h branches/cycles/intern/cycles/kernel/svm/bsdf_ward.h branches/cycles/intern/cycles/kernel/svm/bsdf_westin.h branches/cycles/intern/cycles/kernel/svm/svm_blend.h branches/cycles/intern/cycles/kernel/svm/svm_displace.h branches/cycles/intern/cycles/kernel/svm/svm_distorted_noise.h branches/cycles/intern/cycles/kernel/svm/svm_image.h branches/cycles/intern/cycles/kernel/svm/svm_mix.h branches/cycles/intern/cycles/kernel/svm/svm_sky.h branches/cycles/intern/cycles/kernel/svm/svm_texture.h branches/cycles/intern/cycles/kernel/svm/svm_types.h branches/cycles/intern/cycles/util/util_color.h branches/cycles/intern/cycles/util/util_math.h Added Paths: ----------- branches/cycles/intern/cycles/kernel/kernel_textures.h Modified: branches/cycles/intern/cycles/device/device_opencl.cpp =================================================================== --- branches/cycles/intern/cycles/device/device_opencl.cpp 2011-05-20 11:15:44 UTC (rev 36794) +++ branches/cycles/intern/cycles/device/device_opencl.cpp 2011-05-20 12:26:01 UTC (rev 36795) @@ -55,6 +55,7 @@ cl_int ciErr; map<string, device_vector<uchar>*> const_mem_map; map<string, device_memory*> mem_map; + device_ptr null_mem; const char *opencl_error_string(cl_int err) { @@ -125,10 +126,10 @@ ciErr = clGetPlatformIDs(1, &cpPlatform, NULL); opencl_assert(ciErr); - ciErr = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 1, &cdDevice, NULL); + ciErr = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_CPU, 1, &cdDevice, NULL); opencl_assert(ciErr); - cxGPUContext = clCreateContext(0, 1, &cdDevice, NULL /*clLogMessagesToStdoutAPPLE */, NULL, &ciErr); + cxGPUContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ciErr); opencl_assert(ciErr); cqCommandQueue = clCreateCommandQueue(cxGPUContext, cdDevice, 0, &ciErr); @@ -137,11 +138,17 @@ /* compile kernel */ string source = string_printf("#include \"kernel.cl\" // %lf\n", time_dt()); size_t source_len = source.size(); - string build_options = "-I ../kernel -I ../util -Werror -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END="; //" + path_get("kernel") + " -Werror"; - //printf("path %s\n", path_get("kernel").c_str()); - //clUnloadCompiler(); + string build_options = ""; + //string csource = "../blender/intern/cycles"; + //build_options += "-I " + csource + "/kernel -I " + csource + "/util"; + + build_options += " -I " + path_get("kernel"); /* todo: escape path */ + + build_options += " -Werror"; + build_options += " -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END="; + cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&source, &source_len, &ciErr); opencl_assert(ciErr); @@ -170,10 +177,15 @@ opencl_assert(ciErr); ckFilmConvertKernel = clCreateKernel(cpProgram, "kernel_ocl_tonemap", &ciErr); opencl_assert(ciErr); + + null_mem = (device_ptr)clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY, 1, NULL, &ciErr); } ~OpenCLDevice() { + + clReleaseMemObject(CL_MEM_PTR(null_mem)); + map<string, device_vector<uchar>*>::iterator mt; for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) { mem_free(*(mt->second)); @@ -261,6 +273,7 @@ void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic) { mem_alloc(mem, MEM_READ_ONLY); + mem_copy_to(mem); mem_map[name] = &mem; } @@ -295,6 +308,11 @@ ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_data), (void*)&d_data); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_buffer), (void*)&d_buffer); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_rng_state), (void*)&d_rng_state); + +#define KERNEL_TEX(type, ttype, name) \ + ciErr |= set_kernel_arg_mem(ckPathTraceKernel, &narg, #name); +#include "kernel_textures.h" + ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_pass), (void*)&d_pass); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_x), (void*)&d_x); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y); @@ -314,11 +332,21 @@ cl_int set_kernel_arg_mem(cl_kernel kernel, int *narg, const char *name) { - device_memory *mem = mem_map[name]; - cl_mem ptr = CL_MEM_PTR(mem->device_pointer); - cl_int size = mem->data_width; - cl_int err = 0; + cl_mem ptr; + cl_int size, err = 0; + + if(mem_map.find(name) != mem_map.end()) { + device_memory *mem = mem_map[name]; + ptr = CL_MEM_PTR(mem->device_pointer); + size = mem->data_width; + } + else { + /* work around NULL not working, even though the spec says otherwise */ + ptr = CL_MEM_PTR(null_mem); + size = 1; + } + err |= clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr); opencl_assert(err); err |= clSetKernelArg(kernel, (*narg)++, sizeof(size), (void*)&size); @@ -347,9 +375,11 @@ ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_rgba), (void*)&d_rgba); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_buffer), (void*)&d_buffer); - ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, "__response_curve_R"); - ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, "__response_curve_G"); - ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, "__response_curve_B"); + +#define KERNEL_TEX(type, ttype, name) \ + ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, #name); +#include "kernel_textures.h" + ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_pass), (void*)&d_pass); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_resolution), (void*)&d_resolution); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_x), (void*)&d_x); Modified: branches/cycles/intern/cycles/kernel/CMakeLists.txt =================================================================== --- branches/cycles/intern/cycles/kernel/CMakeLists.txt 2011-05-20 11:15:44 UTC (rev 36794) +++ branches/cycles/intern/cycles/kernel/CMakeLists.txt 2011-05-20 12:26:01 UTC (rev 36795) @@ -25,8 +25,11 @@ kernel_qbvh.h kernel_random.h kernel_shader.h + kernel_textures.h kernel_triangle.h - kernel_types.h + kernel_types.h) + +SET(svm_headers svm/bsdf.h svm/bsdf_ashikhmin_velvet.h svm/bsdf_diffuse.h @@ -78,7 +81,7 @@ ENDIF() IF(WITH_CYCLES_CUDA) - SET(cuda_sources kernel.cu ${headers}) + SET(cuda_sources kernel.cu ${headers} ${svm_headers}) SET(cuda_cubins) FOREACH(arch ${CYCLES_CUDA_ARCH}) @@ -106,9 +109,23 @@ INCLUDE_DIRECTORIES(. ../util osl svm) -ADD_LIBRARY(cycles_kernel ${sources} ${headers}) +ADD_LIBRARY(cycles_kernel ${sources} ${headers} ${svm_headers}) IF(WITH_CYCLES_CUDA) ADD_DEPENDENCIES(cycles_kernel cycles_kernel_cuda) ENDIF() +# OPENCL kernel + +IF(WITH_CYCLES_OPENCL) + SET(util_headers + ../util/util_color.h + ../util/util_math.h + ../util/util_transform.h + ../util/util_types.h) + + INSTALL(FILES kernel.cl ${headers} DESTINATION ${CYCLES_INSTALL_PATH}/cycles/kernel) + INSTALL(FILES ${svm_headers} DESTINATION ${CYCLES_INSTALL_PATH}/cycles/kernel/svm) + INSTALL(FILES ${util_headers} DESTINATION ${CYCLES_INSTALL_PATH}/cycles/kernel) +ENDIF() + Modified: branches/cycles/intern/cycles/kernel/kernel.cl =================================================================== --- branches/cycles/intern/cycles/kernel/kernel.cl 2011-05-20 11:15:44 UTC (rev 36794) +++ branches/cycles/intern/cycles/kernel/kernel.cl 2011-05-20 12:26:01 UTC (rev 36795) @@ -23,72 +23,62 @@ #include "kernel_types.h" #include "kernel_globals.h" -typedef struct KernelGlobals { - __constant KernelData *data; - - __global float *__response_curve_R; - int __response_curve_R_width; - - __global float *__response_curve_G; - int __response_curve_G_width; - - __global float *__response_curve_B; - int __response_curve_B_width; -} KernelGlobals; - #include "kernel_film.h" -//#include "kernel_path.h" +#include "kernel_path.h" //#include "kernel_displace.h" -__kernel void kernel_ocl_path_trace(__constant KernelData *data, __global float4 *buffer, __global uint *rng_state, int pass, int sx, int sy, int sw, int sh) +__kernel void kernel_ocl_path_trace( + __constant KernelData *data, + __global float4 *buffer, + __global uint *rng_state, + +#define KERNEL_TEX(type, ttype, name) \ + __global type *name, \ + int name##_width, +#include "kernel_textures.h" + + int pass, + int sx, int sy, int sw, int sh) { KernelGlobals kglobals, *kg = &kglobals; + kg->data = data; - int x = get_global_id(0); - int y = get_global_id(1); +#define KERNEL_TEX(type, ttype, name) \ + kg->name = name; \ + kg->name##_width = name##_width; +#include "kernel_textures.h" + + int x = sx + get_global_id(0); + int y = sy + get_global_id(1); int w = kernel_data.cam.width; - if(x < sx + sw && y < sy + sh) { - if(pass == 0) { - buffer[x + w*y].x = 0.5f; - buffer[x + w*y].y = 0.5f; - buffer[x + w*y].z = 0.5f; - } - else { - buffer[x + w*y].x += 0.5f; - buffer[x + w*y].y += 0.5f; - buffer[x + w*y].z += 0.5f; - } - - //= make_float3(1.0f, 0.9f, 0.0f); - //kernel_path_trace(buffer, rng_state, pass, x, y); - } + if(x < sx + sw && y < sy + sh) + kernel_path_trace(kg, buffer, rng_state, pass, x, y); } __kernel void kernel_ocl_tonemap( __constant KernelData *data, __global uchar4 *rgba, __global float4 *buffer, - __global float *__response_curve_R, - int __response_curve_R_width, - __global float *__response_curve_G, - int __response_curve_G_width, - __global float *__response_curve_B, - int __response_curve_B_width, + +#define KERNEL_TEX(type, ttype, name) \ + __global type *name, \ + int name##_width, +#include "kernel_textures.h" + int pass, int resolution, int sx, int sy, int sw, int sh) { KernelGlobals kglobals, *kg = &kglobals; kg->data = data; - kg->__response_curve_R = __response_curve_R; - kg->__response_curve_R_width = __response_curve_R_width; - kg->__response_curve_G = __response_curve_G; - kg->__response_curve_G_width = __response_curve_G_width; - kg->__response_curve_B = __response_curve_B; - kg->__response_curve_B_width = __response_curve_B_width; +#define KERNEL_TEX(type, ttype, name) \ + kg->name = name; \ + kg->name##_width = name##_width; +#include "kernel_textures.h" + int x = sx + get_global_id(0); int y = sy + get_global_id(1); @@ -96,10 +86,10 @@ kernel_film_tonemap(kg, rgba, buffer, pass, resolution, x, y); } -__kernel void kernel_ocl_displace(__global uint4 *input, __global float3 *offset, int sx) +/*__kernel void kernel_ocl_displace(__global uint4 *input, __global float3 *offset, int sx) { int x = sx + get_global_id(0); kernel_displace(input, offset, x); -} +}*/ Modified: branches/cycles/intern/cycles/kernel/kernel_compat_cuda.h =================================================================== --- branches/cycles/intern/cycles/kernel/kernel_compat_cuda.h 2011-05-20 11:15:44 UTC (rev 36794) +++ branches/cycles/intern/cycles/kernel/kernel_compat_cuda.h 2011-05-20 12:26:01 UTC (rev 36795) @@ -35,7 +35,7 @@ #define __device_inline __device__ __inline__ #define __global #define __shared __shared__ -#define __constant __constant__ +#define __constant /* No assert supported for CUDA */ @@ Diff output truncated at 10240 characters. @@ _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org http://lists.blender.org/mailman/listinfo/bf-blender-cvs