Revision: 57002
http://sourceforge.net/p/brlcad/code/57002
Author: ejno
Date: 2013-08-20 19:01:59 +0000 (Tue, 20 Aug 2013)
Log Message:
-----------
fix problems; work with opencl 1.1; align memory used with opencl
Modified Paths:
--------------
brlcad/branches/opencl/src/librt/primitives/sph/sph.c
brlcad/branches/opencl/src/librt/primitives/sph/sph_shot.cl
Modified: brlcad/branches/opencl/src/librt/primitives/sph/sph.c
===================================================================
--- brlcad/branches/opencl/src/librt/primitives/sph/sph.c 2013-08-20
18:04:58 UTC (rev 57001)
+++ brlcad/branches/opencl/src/librt/primitives/sph/sph.c 2013-08-20
19:01:59 UTC (rev 57002)
@@ -70,7 +70,6 @@
#define CLT
-/* #define CLT_SINGLE_PRECISION */
#ifdef CLT
@@ -90,6 +89,29 @@
#endif
+struct AlignedPtr
+{
+ void *alloc;
+ void *ptr;
+};
+
+
+inline struct AlignedPtr
+aligned_malloc(size_t alignment, size_t size)
+{
+ struct AlignedPtr ap;
+ ap.alloc = bu_malloc(size + alignment - 1, "failed to allocate memory in
aligned_malloc()");
+ ap.ptr = (void *)(((uintptr_t)ap.alloc + alignment - 1) /
alignment*alignment);
+ return ap;
+}
+
+
+#define ALIGNED_SET(name, type, value) \
+ {(name) = aligned_malloc(sizeof(type), sizeof(type));\
+ *((type *)name.ptr) = (value);}
+
+
+
const int clt_semaphore = 12; /* FIXME: for testing; this isn't our semaphore
*/
static int clt_initialized = 0;
static cl_device_id clt_device;
@@ -227,30 +249,43 @@
static cl_double3
clt_shot(cl_double3 o, cl_double3 dir, cl_double3 V, cl_double radsq, size_t
hypersample)
{
+ const char * const bu_free_error = "failed bu_free() in clt_shot()";
cl_int error;
cl_mem output;
cl_double3 result;
- cl_event done_kernel;
+ struct AlignedPtr a_result, a_o, a_dir, a_V, a_radsq;
+ VSET(result.s, 0, 0, 0);
+ ALIGNED_SET(a_result, cl_double3, result);
+ ALIGNED_SET(a_o, cl_double3, o);
+ ALIGNED_SET(a_dir, cl_double3, dir);
+ ALIGNED_SET(a_V, cl_double3, V);
+ ALIGNED_SET(a_radsq, cl_double, radsq);
- VSET(result.s, 0, 0, 0);
- output = clCreateBuffer(clt_context, CL_MEM_USE_HOST_PTR |
CL_MEM_WRITE_ONLY,
- sizeof(cl_double3), &result, &error);
+ output = clCreateBuffer(clt_context, CL_MEM_COPY_HOST_PTR |
CL_MEM_WRITE_ONLY,
+ sizeof(cl_double3), a_result.ptr, &error);
if (error != CL_SUCCESS) bu_bomb("failed to create OpenCL output buffer");
bu_semaphore_acquire(clt_semaphore);
error = clSetKernelArg(clt_kernel, 0, sizeof(cl_mem), &output);
- error |= clSetKernelArg(clt_kernel, 1, sizeof(cl_double3), &o);
- error |= clSetKernelArg(clt_kernel, 2, sizeof(cl_double3), &dir);
- error |= clSetKernelArg(clt_kernel, 3, sizeof(cl_double3), &V);
- error |= clSetKernelArg(clt_kernel, 4, sizeof(cl_double), &radsq);
+ error |= clSetKernelArg(clt_kernel, 1, sizeof(cl_double3), a_o.ptr);
+ error |= clSetKernelArg(clt_kernel, 2, sizeof(cl_double3), a_dir.ptr);
+ error |= clSetKernelArg(clt_kernel, 3, sizeof(cl_double3), a_V.ptr);
+ error |= clSetKernelArg(clt_kernel, 4, sizeof(cl_double), a_radsq.ptr);
if (error != CL_SUCCESS) bu_bomb("failed to set OpenCL kernel arguments");
- error = clEnqueueNDRangeKernel(clt_queue, clt_kernel, 1, NULL,
&hypersample, NULL, 0, NULL, &done_kernel);
+ error = clEnqueueNDRangeKernel(clt_queue, clt_kernel, 1, NULL,
&hypersample, NULL, 0, NULL, NULL);
bu_semaphore_release(clt_semaphore);
if (error != CL_SUCCESS) bu_bomb("failed to enqueue OpenCL kernel");
if (clFinish(clt_queue) != CL_SUCCESS) bu_bomb("failure in clFinish()");
+ clEnqueueReadBuffer(clt_queue, output, CL_TRUE, 0, sizeof(cl_double3),
&result, 0, NULL, NULL);
clReleaseMemObject(output);
+ bu_free(a_result.alloc, bu_free_error);
+ bu_free(a_o.alloc, bu_free_error);
+ bu_free(a_dir.alloc, bu_free_error);
+ bu_free(a_V.alloc, bu_free_error);
+ bu_free(a_radsq.alloc, bu_free_error);
+
return result;
}
#endif
Modified: brlcad/branches/opencl/src/librt/primitives/sph/sph_shot.cl
===================================================================
--- brlcad/branches/opencl/src/librt/primitives/sph/sph_shot.cl 2013-08-20
18:04:58 UTC (rev 57001)
+++ brlcad/branches/opencl/src/librt/primitives/sph/sph_shot.cl 2013-08-20
19:01:59 UTC (rev 57002)
@@ -34,9 +34,9 @@
ov = V - o;
magsq_ov = ov[0]*ov[0] + ov[1]*ov[1] + ov[2]*ov[2];
- printf("TZ: ov: %0.30f\t%0.30f\t%0.30f\n", ov[0], ov[1], ov[2]);
+ //printf("TZ: ov: %0.30f\t%0.30f\t%0.30f\n", ov[0], ov[1], ov[2]);
b = dot(dir, ov);
- printf("TZ: b: %0.30f\n", b);
+ //printf("TZ: b: %0.30f\n", b);
if (magsq_ov >= radsq) {
// ray origin is outside of sphere
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Introducing Performance Central, a new site from SourceForge and
AppDynamics. Performance Central is your source for news, insights,
analysis and resources for efficient Application Performance Management.
Visit us today!
http://pubads.g.doubleclick.net/gampad/clk?id=48897511&iu=/4140/ostg.clktrk
_______________________________________________
BRL-CAD Source Commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/brlcad-commits