Revision: 57002
          http://sourceforge.net/p/brlcad/code/57002
Author:   ejno
Date:     2013-08-20 19:01:59 +0000 (Tue, 20 Aug 2013)
Log Message:
-----------
fix problems; work with opencl 1.1; align memory used with opencl

Modified Paths:
--------------
    brlcad/branches/opencl/src/librt/primitives/sph/sph.c
    brlcad/branches/opencl/src/librt/primitives/sph/sph_shot.cl

Modified: brlcad/branches/opencl/src/librt/primitives/sph/sph.c
===================================================================
--- brlcad/branches/opencl/src/librt/primitives/sph/sph.c       2013-08-20 
18:04:58 UTC (rev 57001)
+++ brlcad/branches/opencl/src/librt/primitives/sph/sph.c       2013-08-20 
19:01:59 UTC (rev 57002)
@@ -70,7 +70,6 @@
 
 
 #define CLT
-/* #define CLT_SINGLE_PRECISION */
 
 
 #ifdef CLT
@@ -90,6 +89,29 @@
 #endif
 
 
+struct AlignedPtr
+{
+    void *alloc;
+    void *ptr;
+};
+
+
+inline struct AlignedPtr
+aligned_malloc(size_t alignment, size_t size)
+{
+    struct AlignedPtr ap;
+    ap.alloc = bu_malloc(size + alignment - 1, "failed to allocate memory in 
aligned_malloc()");
+    ap.ptr = (void *)(((uintptr_t)ap.alloc + alignment - 1) / 
alignment*alignment);
+    return ap;
+}
+
+
+#define ALIGNED_SET(name, type, value) \
+        {(name) = aligned_malloc(sizeof(type), sizeof(type));\
+        *((type *)name.ptr) = (value);}
+
+
+
 const int clt_semaphore = 12; /* FIXME: for testing; this isn't our semaphore 
*/
 static int clt_initialized = 0;
 static cl_device_id clt_device;
@@ -227,30 +249,43 @@
 static cl_double3
 clt_shot(cl_double3 o, cl_double3 dir, cl_double3 V, cl_double radsq, size_t 
hypersample)
 {
+    const char * const bu_free_error = "failed bu_free() in clt_shot()";
     cl_int error;
     cl_mem output;
     cl_double3 result;
-    cl_event done_kernel;
+    struct AlignedPtr a_result, a_o, a_dir, a_V, a_radsq;
 
+    VSET(result.s, 0, 0, 0);
+    ALIGNED_SET(a_result, cl_double3, result);
+    ALIGNED_SET(a_o, cl_double3, o);
+    ALIGNED_SET(a_dir, cl_double3, dir);
+    ALIGNED_SET(a_V, cl_double3, V);
+    ALIGNED_SET(a_radsq, cl_double, radsq);
 
-    VSET(result.s, 0, 0, 0);
-    output = clCreateBuffer(clt_context, CL_MEM_USE_HOST_PTR | 
CL_MEM_WRITE_ONLY,
-           sizeof(cl_double3), &result, &error);
+    output = clCreateBuffer(clt_context, CL_MEM_COPY_HOST_PTR | 
CL_MEM_WRITE_ONLY,
+           sizeof(cl_double3), a_result.ptr, &error);
     if (error != CL_SUCCESS) bu_bomb("failed to create OpenCL output buffer");
 
     bu_semaphore_acquire(clt_semaphore);
     error = clSetKernelArg(clt_kernel, 0, sizeof(cl_mem), &output);
-    error |= clSetKernelArg(clt_kernel, 1, sizeof(cl_double3), &o);
-    error |= clSetKernelArg(clt_kernel, 2, sizeof(cl_double3), &dir);
-    error |= clSetKernelArg(clt_kernel, 3, sizeof(cl_double3), &V);
-    error |= clSetKernelArg(clt_kernel, 4, sizeof(cl_double), &radsq);
+    error |= clSetKernelArg(clt_kernel, 1, sizeof(cl_double3), a_o.ptr);
+    error |= clSetKernelArg(clt_kernel, 2, sizeof(cl_double3), a_dir.ptr);
+    error |= clSetKernelArg(clt_kernel, 3, sizeof(cl_double3), a_V.ptr);
+    error |= clSetKernelArg(clt_kernel, 4, sizeof(cl_double), a_radsq.ptr);
     if (error != CL_SUCCESS) bu_bomb("failed to set OpenCL kernel arguments");
-    error = clEnqueueNDRangeKernel(clt_queue, clt_kernel, 1, NULL, 
&hypersample, NULL, 0, NULL, &done_kernel);
+    error = clEnqueueNDRangeKernel(clt_queue, clt_kernel, 1, NULL, 
&hypersample, NULL, 0, NULL, NULL);
     bu_semaphore_release(clt_semaphore);
     if (error != CL_SUCCESS) bu_bomb("failed to enqueue OpenCL kernel");
 
     if (clFinish(clt_queue) != CL_SUCCESS) bu_bomb("failure in clFinish()");
+    clEnqueueReadBuffer(clt_queue, output, CL_TRUE, 0, sizeof(cl_double3), 
&result, 0, NULL, NULL);
     clReleaseMemObject(output);
+    bu_free(a_result.alloc, bu_free_error);
+    bu_free(a_o.alloc, bu_free_error);
+    bu_free(a_dir.alloc, bu_free_error);
+    bu_free(a_V.alloc, bu_free_error);
+    bu_free(a_radsq.alloc, bu_free_error);
+
     return result;
 }
 #endif

Modified: brlcad/branches/opencl/src/librt/primitives/sph/sph_shot.cl
===================================================================
--- brlcad/branches/opencl/src/librt/primitives/sph/sph_shot.cl 2013-08-20 
18:04:58 UTC (rev 57001)
+++ brlcad/branches/opencl/src/librt/primitives/sph/sph_shot.cl 2013-08-20 
19:01:59 UTC (rev 57002)
@@ -34,9 +34,9 @@
 
     ov = V - o;
     magsq_ov = ov[0]*ov[0] + ov[1]*ov[1] + ov[2]*ov[2];
-    printf("TZ: ov: %0.30f\t%0.30f\t%0.30f\n", ov[0], ov[1], ov[2]);
+    //printf("TZ: ov: %0.30f\t%0.30f\t%0.30f\n", ov[0], ov[1], ov[2]);
     b = dot(dir, ov);
-    printf("TZ: b: %0.30f\n", b);
+    //printf("TZ: b: %0.30f\n", b);
 
     if (magsq_ov >= radsq) {
        // ray origin is outside of sphere

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Introducing Performance Central, a new site from SourceForge and 
AppDynamics. Performance Central is your source for news, insights, 
analysis and resources for efficient Application Performance Management. 
Visit us today!
http://pubads.g.doubleclick.net/gampad/clk?id=48897511&iu=/4140/ostg.clktrk
_______________________________________________
BRL-CAD Source Commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/brlcad-commits

Reply via email to