Revision: 56744
          http://sourceforge.net/p/brlcad/code/56744
Author:   ejno
Date:     2013-08-12 15:17:39 +0000 (Mon, 12 Aug 2013)
Log Message:
-----------
experiments with double-precision, memory alignment, memory reading methods

Modified Paths:
--------------
    brlcad/branches/opencl/src/librt/primitives/sph/sph.c

Added Paths:
-----------
    brlcad/branches/opencl/src/librt/primitives/sph/aligned_sph.c
    brlcad/branches/opencl/src/librt/primitives/sph/mem_sph.c

Added: brlcad/branches/opencl/src/librt/primitives/sph/aligned_sph.c
===================================================================
--- brlcad/branches/opencl/src/librt/primitives/sph/aligned_sph.c               
                (rev 0)
+++ brlcad/branches/opencl/src/librt/primitives/sph/aligned_sph.c       
2013-08-12 15:17:39 UTC (rev 56744)
@@ -0,0 +1,666 @@
+/*                           S P H . C
+ * BRL-CAD
+ *
+ * Copyright (c) 1985-2013 United States Government as represented by
+ * the U.S. Army Research Laboratory.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * version 2.1 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this file; see the file named COPYING for more
+ * information.
+ */
+/** @addtogroup primitives */
+/** @{ */
+/** @file primitives/sph/sph.c
+ *
+ * Intersect a ray with a Sphere.
+ * Special case of the Generalized Ellipsoid
+ *
+ */
+/** @} */
+
+#include "common.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+#include <OpenCL/cl.h>
+#include "bio.h"
+
+#include "vmath.h"
+#include "rtgeom.h"
+#include "raytrace.h"
+
+
+/*
+ * Algorithm:
+ *
+ * Given V, A, where |A| = Radius, there is a set of points on this sphere
+ *
+ * { (x, y, z) | (x, y, z) is on sphere defined by V, A }
+ *
+ * To find the intersection of a line with the sphere, consider
+ * the parametric line L:
+ *
+ * L : { P(n) | P + t(n) . D }
+ *
+ * Call W the actual point of intersection between L and the sphere.
+ *
+ * NORMALS.  Given the point W on the sphere, what is the vector
+ * normal to the tangent plane at that point?
+ *
+ * N = (W - V) / |A|
+ */
+
+struct sph_specific {
+    vect_t sph_V;      /* Vector to center of sphere */
+    fastf_t sph_radsq; /* Radius squared */
+    fastf_t sph_invrad;        /* Inverse radius (for normal) */
+    fastf_t sph_rad;   /* Radius */
+    mat_t sph_SoR;     /* Rotate and scale for UV mapping */
+};
+
+
+#define OPENCL
+
+
+#ifdef OPENCL
+static int clt_initialized = 0;
+static cl_device_id clt_device;
+static cl_context clt_context;
+static cl_command_queue clt_queue;
+static cl_program clt_program;
+static cl_kernel clt_kernel;
+
+
+struct AlignedPtr
+{
+    void *alloc;
+    void *ptr;
+};
+
+
+inline struct AlignedPtr
+aligned_malloc(size_t alignment, size_t size)
+{
+    struct AlignedPtr ap;
+    ap.alloc = bu_malloc(size + alignment - 1, "failed to allocate memory in 
align()");
+    ap.ptr = (void *)(((uintptr_t)ap.alloc + alignment - 1) / 
alignment*alignment);
+    return ap;
+}
+
+
+#define ALIGNED_SET(name, type, value) \
+    {(name) = aligned_malloc(sizeof(type), sizeof(type));\
+    *((type *)name.ptr) = (value);}
+
+
+static void
+clt_cleanup()
+{
+    if (!clt_initialized) return;
+
+    clReleaseKernel(clt_kernel);
+    clReleaseCommandQueue(clt_queue);
+    clReleaseProgram(clt_program);
+    clReleaseContext(clt_context);
+
+    clt_initialized = 0;
+}
+
+
+const char * const clt_program_code = "\
+__kernel void ell_shot(__global float *output, float3 o, float3 l, float3 c, 
float r)\n\
+{\n\
+    float A = dot(l, l);\n\
+    float B = 2*dot(l, o-c);\n\
+    float C = dot(o-c, o-c) - r*r;\n\
+    float disc = B*B - 4*A*C;\n\
+\n\
+    if (disc <= 0) return;\n\
+\n\
+    float q = B < 0 ? (-B + sqrt(disc))/2 : (-B - sqrt(disc))/2;\n\
+    \n\
+    output[0] = q/A;\n\
+    output[1] = C/q;\n\
+}\n\
+\n\
+";
+
+
+/* for now, just get the first device from the first platform  */
+static cl_device_id
+clt_get_cl_device()
+{
+    cl_int error;
+    cl_platform_id platform;
+    cl_device_id device;
+
+    error = clGetPlatformIDs(1, &platform, NULL);
+    if (error != CL_SUCCESS) bu_bomb("failed to find an OpenCL platform");
+
+    error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
+    if (error == CL_DEVICE_NOT_FOUND)
+       error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device, NULL);
+    if (error != CL_SUCCESS) bu_bomb("failed to find an OpenCL device (using 
this method)");
+
+    return device;
+}
+
+
+static cl_program
+clt_get_program(cl_context context, cl_device_id device, const char *code)
+{
+    cl_int error;
+    cl_program program;
+    size_t code_size = strnlen(code, 1<<20);
+
+    program = clCreateProgramWithSource(context, 1, &code, &code_size, &error);
+    if (error != CL_SUCCESS) bu_bomb("failed to create OpenCL program");
+
+    error = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+    if (error != CL_SUCCESS) {
+       size_t log_size;
+       char *log_data;
+       clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, 
&log_size);
+       log_data = bu_malloc(log_size*sizeof(char), "failed to allocate memory 
for log");
+       clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 
log_size+1, log_data, NULL);
+       bu_log("BUILD LOG:\n%s\n", log_data);
+       bu_bomb("failed to build OpenCL program");
+    }
+
+    return program;
+}
+
+
+
+static void
+clt_init()
+{
+    cl_int error;
+
+    if (clt_initialized) return;
+    clt_initialized = 1;
+    atexit(clt_cleanup);
+
+    clt_device = clt_get_cl_device();
+
+    clt_context = clCreateContext(NULL, 1, &clt_device, NULL, NULL, &error);
+    if (error != CL_SUCCESS) bu_bomb("failed to create an OpenCL context");
+
+    clt_queue = clCreateCommandQueue(clt_context, clt_device, 0, &error);
+    if (error != CL_SUCCESS) bu_bomb("failed to create an OpenCL command 
queue");
+
+    clt_program = clt_get_program(clt_context, clt_device, clt_program_code);
+
+    clt_kernel = clCreateKernel(clt_program, "ell_shot", &error);
+    if (error != CL_SUCCESS) bu_bomb("failed to create an OpenCL kernel");
+
+    bu_log("initialized OpenCL");
+}
+
+
+static cl_float3
+clt_shot(cl_float3 o /* ray origin */, cl_float3 l /* ray direction (unit 
vector) */,
+cl_float3 c /* sphere center */, cl_float r /* sphere radius */)
+{
+    cl_int error;
+    cl_mem output;
+    const size_t global_size = 1;
+    cl_float3 vresult;
+    struct AlignedPtr result, a_o, a_l, a_c, a_r;
+
+    result = aligned_malloc(sizeof(cl_float3), sizeof(cl_float3));
+    VSET(((cl_float3 *)result.ptr)->s, 0, 0, 0);
+    ALIGNED_SET(a_o, cl_float3, o);
+    ALIGNED_SET(a_l, cl_float3, l);
+    ALIGNED_SET(a_c, cl_float3, c);
+    ALIGNED_SET(a_r, cl_float, r);
+
+    output = clCreateBuffer(clt_context, CL_MEM_USE_HOST_PTR | 
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
+           sizeof(cl_float3), result.ptr, &error);
+    if (error != CL_SUCCESS) bu_bomb("failed to create OpenCL output buffer");
+
+    error = clSetKernelArg(clt_kernel, 0, sizeof(cl_mem), &output);
+    error |= clSetKernelArg(clt_kernel, 1, sizeof(cl_float3), a_o.ptr);
+    error |= clSetKernelArg(clt_kernel, 2, sizeof(cl_float3), a_l.ptr);
+    error |= clSetKernelArg(clt_kernel, 3, sizeof(cl_float3), a_c.ptr);
+    error |= clSetKernelArg(clt_kernel, 4, sizeof(cl_float), a_r.ptr);
+    if (error != CL_SUCCESS) bu_bomb("failed to set OpenCL kernel arguments");
+
+    error = clEnqueueNDRangeKernel(clt_queue, clt_kernel, 1, NULL, 
&global_size, NULL, 0, NULL, NULL);
+    if (error != CL_SUCCESS) bu_bomb("failed to enqueue OpenCL kernel");
+
+    if (clFinish(clt_queue) != CL_SUCCESS) bu_bomb("failure in clFinish");
+
+    VMOVE(vresult.s, ((cl_float3 *)result.ptr)->s);
+    free(result.alloc);
+    free(a_o.alloc);
+    free(a_l.alloc);
+    free(a_c.alloc);
+    free(a_r.alloc);
+    clReleaseMemObject(output);
+
+    return vresult;
+}
+#endif
+
+
+/**
+ * R T _ S P H _ P R E P
+ *
+ * Given a pointer to a GED database record, and a transformation matrix,
+ * determine if this is a valid sphere, and if so, precompute various
+ * terms of the formula.
+ *
+ * Returns -
+ * 0 SPH is OK
+ * !0 Error in description
+ *
+ * Implicit return -
+ * A struct sph_specific is created, and its address is stored in
+ * stp->st_specific for use by rt_sph_shot().
+ * If the ELL is really a SPH, stp->st_id is modified to ID_SPH.
+ */
+int
+rt_sph_prep(struct soltab *stp, struct rt_db_internal *ip, struct rt_i *rtip)
+{
+    register struct sph_specific *sph;
+    fastf_t magsq_a, magsq_b, magsq_c;
+    vect_t Au, Bu, Cu; /* A, B, C with unit length */
+    fastf_t f;
+    struct rt_ell_internal *eip;
+clt_init();
+    eip = (struct rt_ell_internal *)ip->idb_ptr;
+    RT_ELL_CK_MAGIC(eip);
+
+#ifdef OPENCL
+    clt_init();
+#endif
+
+    /* Validate that |A| > 0, |B| > 0, |C| > 0 */
+    magsq_a = MAGSQ(eip->a);
+    magsq_b = MAGSQ(eip->b);
+    magsq_c = MAGSQ(eip->c);
+    if (magsq_a < rtip->rti_tol.dist_sq || magsq_b < rtip->rti_tol.dist_sq || 
magsq_c < rtip->rti_tol.dist_sq) {
+       bu_log("rt_sph_prep():  sph(%s) zero length A(%g), B(%g), or C(%g) 
vector\n",
+              stp->st_name, magsq_a, magsq_b, magsq_c);
+       return 1;               /* BAD */
+    }
+
+    /* Validate that |A|, |B|, and |C| are nearly equal */
+    if (!NEAR_EQUAL(magsq_a, magsq_b, rtip->rti_tol.dist_sq)
+       || !NEAR_EQUAL(magsq_a, magsq_c, rtip->rti_tol.dist_sq)) {
+       return 1;               /* ELL, not SPH */
+    }
+
+    /* Create unit length versions of A, B, C */
+    f = 1.0/sqrt(magsq_a);
+    VSCALE(Au, eip->a, f);
+    f = 1.0/sqrt(magsq_b);
+    VSCALE(Bu, eip->b, f);
+    f = 1.0/sqrt(magsq_c);
+    VSCALE(Cu, eip->c, f);
+
+    /* Validate that A.B == 0, B.C == 0, A.C == 0 (check dir only) */
+    f = VDOT(Au, Bu);
+    if (! NEAR_ZERO(f, rtip->rti_tol.dist)) {
+       bu_log("rt_sph_prep():  sph(%s) A not perpendicular to B, f=%f\n", 
stp->st_name, f);
+       return 1;               /* BAD */
+    }
+    f = VDOT(Bu, Cu);
+    if (! NEAR_ZERO(f, rtip->rti_tol.dist)) {
+       bu_log("rt_sph_prep():  sph(%s) B not perpendicular to C, f=%f\n", 
stp->st_name, f);
+       return 1;               /* BAD */
+    }
+    f = VDOT(Au, Cu);
+    if (! NEAR_ZERO(f, rtip->rti_tol.dist)) {
+       bu_log("rt_sph_prep():  sph(%s) A not perpendicular to C, f=%f\n", 
stp->st_name, f);
+       return 1;               /* BAD */
+    }
+
+    /*
+     * This ELL is really an SPH
+     */
+    stp->st_id = ID_SPH;               /* "fix" soltab ID */
+    stp->st_meth = &OBJ[ID_SPH];
+
+    /* Solid is OK, compute constant terms now */
+    BU_GET(sph, struct sph_specific);
+    stp->st_specific = (genptr_t)sph;
+
+    VMOVE(sph->sph_V, eip->v);
+
+    sph->sph_radsq = magsq_a;
+    sph->sph_rad = sqrt(sph->sph_radsq);
+    sph->sph_invrad = 1.0 / sph->sph_rad;
+
+    /*
+     * Save the matrix which rotates our ABC to world
+     * XYZ respectively, and scales points on surface
+     * to unit length.  Used here in UV mapping.
+     * See ell.c for details.
+     */
+    MAT_IDN(sph->sph_SoR);
+    VSCALE(&sph->sph_SoR[0], eip->a, 1.0/magsq_a);
+    VSCALE(&sph->sph_SoR[4], eip->b, 1.0/magsq_b);
+    VSCALE(&sph->sph_SoR[8], eip->c, 1.0/magsq_c);
+
+    /* Compute bounding sphere */
+    VMOVE(stp->st_center, sph->sph_V);
+    stp->st_aradius = stp->st_bradius = sph->sph_rad;
+
+    /* Compute bounding RPP */
+    if (stp->st_meth->ft_bbox(ip, &(stp->st_min), &(stp->st_max), 
&(rtip->rti_tol))) return 1;
+    return 0;                  /* OK */
+}
+
+
+/**
+ * R T _ S P H _ P R I N T
+ */
+void
+rt_sph_print(register const struct soltab *stp)
+{
+    register const struct sph_specific *sph =
+       (struct sph_specific *)stp->st_specific;
+
+    VPRINT("V", sph->sph_V);
+    bu_log("Rad %g\n", sph->sph_rad);
+    bu_log("Radsq %g\n", sph->sph_radsq);
+    bu_log("Invrad %g\n", sph->sph_invrad);
+    bn_mat_print("S o R", sph->sph_SoR);
+}
+
+
+/**
+ * R T _ S P H _ S H O T
+ *
+ * Intersect a ray with a sphere.  If an intersection occurs, a struct
+ * seg will be acquired and filled in.
+ *
+ * Notes: In the quadratic equation, A is MAGSQ(r_dir) which is always
+ * equal to 1, so it does not appear.  The sign of B is reversed
+ * (vector is reversed) to save negation.  We have factored out the 2
+ * and 4 constants.
+ *
+ * Claim: The straight quadratic formula leads to precision problems
+ * if either A or C are small.  In our case A is always 1.  C is a
+ * radial distance of the ray origin from the sphere surface.  Thus if
+ * we are shooting from near the surface we may have problems.  XXX -
+ * investigate this.
+ *
+ * Returns -
+ * 0 MISS
+ * >0 HIT
+ */
+int
+rt_sph_shot(struct soltab *stp, register struct xray *rp, struct application 
*ap, struct seg *seghead)
+{
+#ifdef OPENCL
+    cl_float3 o; /* ray origin  */
+    cl_float3 l; /* ray direction (unit vector) */
+    cl_float3 c; /* sphere center  */
+    cl_float r; /* sphere radius */
+    cl_float3 result;
+    struct seg *segp;
+
+    (void)rp; (void)ap;
+
+    VMOVE(o.s, rp->r_pt);
+    VMOVE(l.s, rp->r_dir);
+    VMOVE(c.s, stp->st_center);
+    r = ((struct sph_specific *)stp->st_specific)->sph_rad;
+    result = clt_shot(o, l, c, r);
+
+    if (EQUAL(result.s[0], 0) && EQUAL(result.s[1], 0)) return 0; /* no hit  */
+
+    RT_GET_SEG(segp, ap->a_resource);
+    segp->seg_stp = stp;
+
+    segp->seg_in.hit_dist = result.s[1];
+    segp->seg_out.hit_dist = result.s[0];
+    segp->seg_in.hit_surfno = 0;
+    segp->seg_out.hit_surfno = 0;
+    BU_LIST_INSERT(&(seghead->l), &(segp->l));
+    return 2;                  /* HIT */
+
+#else
+    register struct sph_specific *sph =
+       (struct sph_specific *)stp->st_specific;
+    register struct seg *segp;
+
+    vect_t ov;         /* ray origin to center (V - P) */
+    fastf_t magsq_ov;  /* length squared of ov */
+    fastf_t b;         /* second term of quadratic eqn */
+    fastf_t root;              /* root of radical */
+
+    VSUB2(ov, sph->sph_V, rp->r_pt);
+    b = VDOT(rp->r_dir, ov);
+    magsq_ov = MAGSQ(ov);
+
+    if (magsq_ov >= sph->sph_radsq) {
+       /* ray origin is outside of sphere */
+       if (b < 0) {
+           /* ray direction is away from sphere */
+           return 0;           /* No hit */
+       }
+       root = b*b - magsq_ov + sph->sph_radsq;
+       if (root <= 0) {
+           /* no real roots */
+           return 0;           /* No hit */
+       }
+    } else {
+       root = b*b - magsq_ov + sph->sph_radsq;
+    }
+    root = sqrt(root);
+
+    RT_GET_SEG(segp, ap->a_resource);
+    segp->seg_stp = stp;
+
+    /* we know root is positive, so we know the smaller t */
+    segp->seg_in.hit_dist = b - root;
+    segp->seg_out.hit_dist = b + root;
+    segp->seg_in.hit_surfno = 0;
+    segp->seg_out.hit_surfno = 0;
+    BU_LIST_INSERT(&(seghead->l), &(segp->l));
+    return 2;                  /* HIT */
+#endif
+}
+
+
+#define RT_SPH_SEG_MISS(SEG)           (SEG).seg_stp=(struct soltab *) 0;
+/**
+ * R T _ S P H _ V S H O T
+ *
+ * This is the Becker vectorized version
+ */
+void
+rt_sph_vshot(struct soltab **stp, struct xray **rp, struct seg *segp, int n, 
struct application *ap)
+    /* An array of solid pointers */
+    /* An array of ray pointers */
+    /* array of segs (results returned) */
+    /* Number of ray/object pairs */
+
+{
+    register struct sph_specific *sph;
+    register int i;
+
+    vect_t ov;         /* ray origin to center (V - P) */
+    fastf_t magsq_ov;  /* length squared of ov */
+    fastf_t b;         /* second term of quadratic eqn */
+    fastf_t root;              /* root of radical */
+
+    if (ap) RT_CK_APPLICATION(ap);
+
+    /* for each ray/sphere pair */
+    for (i = 0; i < n; i++) {
+       if (stp[i] == 0) continue; /* stp[i] == 0 signals skip ray */
+
+       sph = (struct sph_specific *)stp[i]->st_specific;
+       VSUB2(ov, sph->sph_V, rp[i]->r_pt);
+       b = VDOT(rp[i]->r_dir, ov);
+       magsq_ov = MAGSQ(ov);
+
+       if (magsq_ov >= sph->sph_radsq) {
+           /* ray origin is outside of sphere */
+           if (b < 0) {
+               /* ray direction is away from sphere */
+               RT_SPH_SEG_MISS(segp[i]);               /* No hit */
+               continue;
+           }
+           root = b*b - magsq_ov + sph->sph_radsq;
+           if (root <= 0) {
+               /* no real roots */
+               RT_SPH_SEG_MISS(segp[i]);               /* No hit */
+               continue;
+           }
+       } else {
+           root = b*b - magsq_ov + sph->sph_radsq;
+       }
+       root = sqrt(root);
+
+       segp[i].seg_stp = stp[i];
+
+       /* we know root is positive, so we know the smaller t */
+       segp[i].seg_in.hit_dist = b - root;
+       segp[i].seg_out.hit_dist = b + root;
+       segp[i].seg_in.hit_surfno = 0;
+       segp[i].seg_out.hit_surfno = 0;
+    }
+}
+
+
+/**
+ * R T _ S P H _ N O R M
+ *
+ * Given ONE ray distance, return the normal and entry/exit point.
+ */
+void
+rt_sph_norm(register struct hit *hitp, struct soltab *stp, register struct 
xray *rp)
+{
+    register struct sph_specific *sph =
+       (struct sph_specific *)stp->st_specific;
+
+    VJOIN1(hitp->hit_point, rp->r_pt, hitp->hit_dist, rp->r_dir);
+    VSUB2(hitp->hit_normal, hitp->hit_point, sph->sph_V);
+    VSCALE(hitp->hit_normal, hitp->hit_normal, sph->sph_invrad);
+}
+
+
+/**
+ * R T _ S P H _ C U R V E
+ *
+ * Return the curvature of the sphere.
+ */
+void
+rt_sph_curve(register struct curvature *cvp, register struct hit *hitp, struct 
soltab *stp)
+{
+    register struct sph_specific *sph =
+       (struct sph_specific *)stp->st_specific;
+
+    cvp->crv_c1 = cvp->crv_c2 = - sph->sph_invrad;
+
+    /* any tangent direction */
+    bn_vec_ortho(cvp->crv_pdir, hitp->hit_normal);
+}
+
+
+/**
+ * R T _ S P H _ U V
+ *
+ * For a hit on the surface of an SPH, return the (u, v) coordinates
+ * of the hit point, 0 <= u, v <= 1.
+ *
+ * u = azimuth
+ * v = elevation
+ */
+void
+rt_sph_uv(struct application *ap, struct soltab *stp, register struct hit 
*hitp, register struct uvcoord *uvp)
+{
+    register struct sph_specific *sph =
+       (struct sph_specific *)stp->st_specific;
+    fastf_t r;
+    vect_t work;
+    vect_t pprime;
+
+    /* hit_point is on surface; project back to unit sphere, creating
+     * a vector from vertex to hit point which always has length=1.0
+     */
+    VSUB2(work, hitp->hit_point, sph->sph_V);
+    MAT4X3VEC(pprime, sph->sph_SoR, work);
+    /* Assert that pprime has unit length */
+
+    /* U is azimuth, atan() range: -pi to +pi */
+    uvp->uv_u = bn_atan2(pprime[Y], pprime[X]) * bn_inv2pi;
+    if (uvp->uv_u < 0)
+       uvp->uv_u += 1.0;
+    /*
+     * V is elevation, atan() range: -pi/2 to +pi/2, because sqrt()
+     * ensures that X parameter is always >0
+     */
+    uvp->uv_v = bn_atan2(pprime[Z],
+                        sqrt(pprime[X] * pprime[X] + pprime[Y] * pprime[Y])) *
+       bn_invpi + 0.5;
+
+    /* approximation: r / (circumference, 2 * pi * aradius) */
+    r = ap->a_rbeam + ap->a_diverge * hitp->hit_dist;
+    uvp->uv_du = uvp->uv_dv =
+       bn_inv2pi * r / stp->st_aradius;
+}
+
+
+/**
+ * R T _ S P H _ F R E E
+ */
+void
+rt_sph_free(register struct soltab *stp)
+{
+    register struct sph_specific *sph =
+       (struct sph_specific *)stp->st_specific;
+
+    BU_PUT(sph, struct sph_specific);
+}
+
+
+int
+rt_sph_class(void)
+{
+    return 0;
+}
+
+
+/**
+ * R T _ S P H _ P A R A M S
+ *
+ */
+int
+rt_sph_params(struct pc_pc_set *UNUSED(ps), const struct rt_db_internal *ip)
+{
+    if (ip) RT_CK_DB_INTERNAL(ip);
+
+    return 0;                  /* OK */
+}
+
+
+/* ELL versions are used for many of the callbacks */
+
+
+/*
+ * Local Variables:
+ * mode: C
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * c-file-style: "stroustrup"
+ * End:
+ * ex: shiftwidth=4 tabstop=8
+ */


Property changes on: 
brlcad/branches/opencl/src/librt/primitives/sph/aligned_sph.c
___________________________________________________________________
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: brlcad/branches/opencl/src/librt/primitives/sph/mem_sph.c
===================================================================
--- brlcad/branches/opencl/src/librt/primitives/sph/mem_sph.c                   
        (rev 0)
+++ brlcad/branches/opencl/src/librt/primitives/sph/mem_sph.c   2013-08-12 
15:17:39 UTC (rev 56744)
@@ -0,0 +1,633 @@
+/*                           S P H . C
+ * BRL-CAD
+ *
+ * Copyright (c) 1985-2013 United States Government as represented by
+ * the U.S. Army Research Laboratory.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * version 2.1 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this file; see the file named COPYING for more
+ * information.
+ */
+/** @addtogroup primitives */
+/** @{ */
+/** @file primitives/sph/sph.c
+ *
+ * Intersect a ray with a Sphere.
+ * Special case of the Generalized Ellipsoid
+ *
+ */
+/** @} */
+
+#include "common.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+#include <OpenCL/cl.h>
+#include "bio.h"
+
+#include "vmath.h"
+#include "rtgeom.h"
+#include "raytrace.h"
+
+
+/*
+ * Algorithm:
+ *
+ * Given V, A, where |A| = Radius, there is a set of points on this sphere
+ *
+ * { (x, y, z) | (x, y, z) is on sphere defined by V, A }
+ *
+ * To find the intersection of a line with the sphere, consider
+ * the parametric line L:
+ *
+ * L : { P(n) | P + t(n) . D }
+ *
+ * Call W the actual point of intersection between L and the sphere.
+ *
+ * NORMALS.  Given the point W on the sphere, what is the vector
+ * normal to the tangent plane at that point?
+ *
+ * N = (W - V) / |A|
+ */
+
+struct sph_specific {
+    vect_t sph_V;      /* Vector to center of sphere */
+    fastf_t sph_radsq; /* Radius squared */
+    fastf_t sph_invrad;        /* Inverse radius (for normal) */
+    fastf_t sph_rad;   /* Radius */
+    mat_t sph_SoR;     /* Rotate and scale for UV mapping */
+};
+
+
+#define OPENCL
+
+
+#ifdef OPENCL
+static int clt_initialized = 0;
+static cl_device_id clt_device;
+static cl_context clt_context;
+static cl_command_queue clt_queue;
+static cl_program clt_program;
+static cl_kernel clt_kernel;
+
+
+static void
+clt_cleanup()
+{
+    if (!clt_initialized) return;
+
+    clReleaseKernel(clt_kernel);
+    clReleaseCommandQueue(clt_queue);
+    clReleaseProgram(clt_program);
+    clReleaseContext(clt_context);
+
+    clt_initialized = 0;
+}
+
+
+const char * const clt_program_code = "\
+__kernel void ell_shot(__global double3 *output, double3 o, double3 l, double3 
c, double r)\n\
+{\n\
+    double A = dot(l, l);\n\
+    double B = 2*dot(l, o-c);\n\
+    double C = dot(o-c, o-c) - r*r;\n\
+    double disc = B*B - 4*A*C;\n\
+\n\
+    if (disc <= 0) return;\n\
+\n\
+    double q = B < 0 ? (-B + sqrt(disc))/2 : (-B - sqrt(disc))/2;\n\
+    \n\
+    (*output)[0] = q/A;\n\
+    (*output)[1] = C/q;\n\
+}\n\
+\n\
+";
+
+
+/* for now, just get the first device from the first platform  */
+static cl_device_id
+clt_get_cl_device()
+{
+    cl_int error;
+    cl_platform_id platform;
+    cl_device_id device;
+
+    error = clGetPlatformIDs(1, &platform, NULL);
+    if (error != CL_SUCCESS) bu_bomb("failed to find an OpenCL platform");
+
+    error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
+    if (error == CL_DEVICE_NOT_FOUND)
+       error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device, NULL);
+    if (error != CL_SUCCESS) bu_bomb("failed to find an OpenCL device (using 
this method)");
+
+    return device;
+}
+
+
+static cl_program
+clt_get_program(cl_context context, cl_device_id device, const char *code)
+{
+    cl_int error;
+    cl_program program;
+    size_t code_size = strnlen(code, 1<<20);
+
+    program = clCreateProgramWithSource(context, 1, &code, &code_size, &error);
+    if (error != CL_SUCCESS) bu_bomb("failed to create OpenCL program");
+
+    error = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+    if (error != CL_SUCCESS) {
+       size_t log_size;
+       char *log_data;
+       clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, 
&log_size);
+       log_data = bu_malloc(log_size*sizeof(char), "failed to allocate memory 
for log");
+       clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 
log_size+1, log_data, NULL);
+       bu_log("BUILD LOG:\n%s\n", log_data);
+       bu_bomb("failed to build OpenCL program");
+    }
+
+    return program;
+}
+
+
+
+static void
+clt_init()
+{
+    cl_int error;
+
+    if (clt_initialized) return;
+    clt_initialized = 1;
+    atexit(clt_cleanup);
+
+    clt_device = clt_get_cl_device();
+
+    clt_context = clCreateContext(NULL, 1, &clt_device, NULL, NULL, &error);
+    if (error != CL_SUCCESS) bu_bomb("failed to create an OpenCL context");
+
+    clt_queue = clCreateCommandQueue(clt_context, clt_device, 0, &error);
+    if (error != CL_SUCCESS) bu_bomb("failed to create an OpenCL command 
queue");
+
+    clt_program = clt_get_program(clt_context, clt_device, clt_program_code);
+
+    clt_kernel = clCreateKernel(clt_program, "ell_shot", &error);
+    if (error != CL_SUCCESS) bu_bomb("failed to create an OpenCL kernel");
+
+    bu_log("initialized OpenCL");
+}
+
+
+static cl_double3
+clt_shot(cl_double3 o /* ray origin */, cl_double3 l /* ray direction (unit 
vector) */,
+cl_double3 c /* sphere center */, cl_double r /* sphere radius */)
+{
+    cl_int error;
+    cl_mem output;
+    cl_double3 result;
+    cl_double3 vres;
+    const size_t global_size = 1;
+
+    VSET(result.s, 0, 0, 0);
+    output = clCreateBuffer(clt_context, CL_MEM_COPY_HOST_PTR,
+           sizeof(cl_double3), &result, &error);
+    if (error != CL_SUCCESS) bu_bomb("failed to create OpenCL output buffer");
+
+    error = clSetKernelArg(clt_kernel, 0, sizeof(cl_mem), &output);
+    error |= clSetKernelArg(clt_kernel, 1, sizeof(cl_double3), &o);
+    error |= clSetKernelArg(clt_kernel, 2, sizeof(cl_double3), &l);
+    error |= clSetKernelArg(clt_kernel, 3, sizeof(cl_double3), &c);
+    error |= clSetKernelArg(clt_kernel, 4, sizeof(cl_double), &r);
+    if (error != CL_SUCCESS) bu_bomb("failed to set OpenCL kernel arguments");
+
+    error = clEnqueueNDRangeKernel(clt_queue, clt_kernel, 1, NULL, 
&global_size, NULL, 0, NULL, NULL);
+    if (error != CL_SUCCESS) bu_bomb("failed to enqueue OpenCL kernel");
+
+    if (clFinish(clt_queue) != CL_SUCCESS) bu_bomb("failure in clFinish");
+
+    error = clEnqueueReadBuffer(clt_queue, output, CL_TRUE, 0, 
sizeof(cl_double3), &vres, 0, NULL, NULL);
+    if (error != CL_SUCCESS) bu_bomb("failure to read OpenCL output buffer");
+    clReleaseMemObject(output);
+    return vres;
+}
+#endif
+
+
+/**
+ * R T _ S P H _ P R E P
+ *
+ * Given a pointer to a GED database record, and a transformation matrix,
+ * determine if this is a valid sphere, and if so, precompute various
+ * terms of the formula.
+ *
+ * Returns -
+ * 0 SPH is OK
+ * !0 Error in description
+ *
+ * Implicit return -
+ * A struct sph_specific is created, and its address is stored in
+ * stp->st_specific for use by rt_sph_shot().
+ * If the ELL is really a SPH, stp->st_id is modified to ID_SPH.
+ */
+int
+rt_sph_prep(struct soltab *stp, struct rt_db_internal *ip, struct rt_i *rtip)
+{
+    register struct sph_specific *sph;
+    fastf_t magsq_a, magsq_b, magsq_c;
+    vect_t Au, Bu, Cu; /* A, B, C with unit length */
+    fastf_t f;
+    struct rt_ell_internal *eip;
+clt_init();
+    eip = (struct rt_ell_internal *)ip->idb_ptr;
+    RT_ELL_CK_MAGIC(eip);
+
+#ifdef OPENCL
+    clt_init();
+#endif
+
+    /* Validate that |A| > 0, |B| > 0, |C| > 0 */
+    magsq_a = MAGSQ(eip->a);
+    magsq_b = MAGSQ(eip->b);
+    magsq_c = MAGSQ(eip->c);
+    if (magsq_a < rtip->rti_tol.dist_sq || magsq_b < rtip->rti_tol.dist_sq || 
magsq_c < rtip->rti_tol.dist_sq) {
+       bu_log("rt_sph_prep():  sph(%s) zero length A(%g), B(%g), or C(%g) 
vector\n",
+              stp->st_name, magsq_a, magsq_b, magsq_c);
+       return 1;               /* BAD */
+    }
+
+    /* Validate that |A|, |B|, and |C| are nearly equal */
+    if (!NEAR_EQUAL(magsq_a, magsq_b, rtip->rti_tol.dist_sq)
+       || !NEAR_EQUAL(magsq_a, magsq_c, rtip->rti_tol.dist_sq)) {
+       return 1;               /* ELL, not SPH */
+    }
+
+    /* Create unit length versions of A, B, C */
+    f = 1.0/sqrt(magsq_a);
+    VSCALE(Au, eip->a, f);
+    f = 1.0/sqrt(magsq_b);
+    VSCALE(Bu, eip->b, f);
+    f = 1.0/sqrt(magsq_c);
+    VSCALE(Cu, eip->c, f);
+
+    /* Validate that A.B == 0, B.C == 0, A.C == 0 (check dir only) */
+    f = VDOT(Au, Bu);
+    if (! NEAR_ZERO(f, rtip->rti_tol.dist)) {
+       bu_log("rt_sph_prep():  sph(%s) A not perpendicular to B, f=%f\n", 
stp->st_name, f);
+       return 1;               /* BAD */
+    }
+    f = VDOT(Bu, Cu);
+    if (! NEAR_ZERO(f, rtip->rti_tol.dist)) {
+       bu_log("rt_sph_prep():  sph(%s) B not perpendicular to C, f=%f\n", 
stp->st_name, f);
+       return 1;               /* BAD */
+    }
+    f = VDOT(Au, Cu);
+    if (! NEAR_ZERO(f, rtip->rti_tol.dist)) {
+       bu_log("rt_sph_prep():  sph(%s) A not perpendicular to C, f=%f\n", 
stp->st_name, f);
+       return 1;               /* BAD */
+    }
+
+    /*
+     * This ELL is really an SPH
+     */
+    stp->st_id = ID_SPH;               /* "fix" soltab ID */
+    stp->st_meth = &OBJ[ID_SPH];
+
+    /* Solid is OK, compute constant terms now */
+    BU_GET(sph, struct sph_specific);
+    stp->st_specific = (genptr_t)sph;
+
+    VMOVE(sph->sph_V, eip->v);
+
+    sph->sph_radsq = magsq_a;
+    sph->sph_rad = sqrt(sph->sph_radsq);
+    sph->sph_invrad = 1.0 / sph->sph_rad;
+
+    /*
+     * Save the matrix which rotates our ABC to world
+     * XYZ respectively, and scales points on surface
+     * to unit length.  Used here in UV mapping.
+     * See ell.c for details.
+     */
+    MAT_IDN(sph->sph_SoR);
+    VSCALE(&sph->sph_SoR[0], eip->a, 1.0/magsq_a);
+    VSCALE(&sph->sph_SoR[4], eip->b, 1.0/magsq_b);
+    VSCALE(&sph->sph_SoR[8], eip->c, 1.0/magsq_c);
+
+    /* Compute bounding sphere */
+    VMOVE(stp->st_center, sph->sph_V);
+    stp->st_aradius = stp->st_bradius = sph->sph_rad;
+
+    /* Compute bounding RPP */
+    if (stp->st_meth->ft_bbox(ip, &(stp->st_min), &(stp->st_max), 
&(rtip->rti_tol))) return 1;
+    return 0;                  /* OK */
+}
+
+
+/**
+ * R T _ S P H _ P R I N T
+ */
+void
+rt_sph_print(register const struct soltab *stp)
+{
+    register const struct sph_specific *sph =
+       (struct sph_specific *)stp->st_specific;
+
+    VPRINT("V", sph->sph_V);
+    bu_log("Rad %g\n", sph->sph_rad);
+    bu_log("Radsq %g\n", sph->sph_radsq);
+    bu_log("Invrad %g\n", sph->sph_invrad);
+    bn_mat_print("S o R", sph->sph_SoR);
+}
+
+
+/**
+ * R T _ S P H _ S H O T
+ *
+ * Intersect a ray with a sphere.  If an intersection occurs, a struct
+ * seg will be acquired and filled in.
+ *
+ * Notes: In the quadratic equation, A is MAGSQ(r_dir) which is always
+ * equal to 1, so it does not appear.  The sign of B is reversed
+ * (vector is reversed) to save negation.  We have factored out the 2
+ * and 4 constants.
+ *
+ * Claim: The straight quadratic formula leads to precision problems
+ * if either A or C are small.  In our case A is always 1.  C is a
+ * radial distance of the ray origin from the sphere surface.  Thus if
+ * we are shooting from near the surface we may have problems.  XXX -
+ * investigate this.
+ *
+ * Returns -
+ * 0 MISS
+ * >0 HIT
+ */
+int
+rt_sph_shot(struct soltab *stp, register struct xray *rp, struct application 
*ap, struct seg *seghead)
+{
+#ifdef OPENCL
+    cl_double3 o; /* ray origin  */
+    cl_double3 l; /* ray direction (unit vector) */
+    cl_double3 c; /* sphere center  */
+    cl_double r; /* sphere radius */
+    cl_double3 result;
+    struct seg *segp;
+
+    (void)rp; (void)ap;
+
+    VMOVE(o.s, rp->r_pt);
+    VMOVE(l.s, rp->r_dir);
+    VMOVE(c.s, stp->st_center);
+    r = ((struct sph_specific *)stp->st_specific)->sph_rad;
+    result = clt_shot(o, l, c, r);
+
+    if (EQUAL(result.s[0], 0)) return 0; /* no hit  */
+
+    RT_GET_SEG(segp, ap->a_resource);
+    segp->seg_stp = stp;
+
+    segp->seg_in.hit_dist = result.s[1];
+    segp->seg_out.hit_dist = result.s[0];
+    segp->seg_in.hit_surfno = 0;
+    segp->seg_out.hit_surfno = 0;
+    BU_LIST_INSERT(&(seghead->l), &(segp->l));
+    return 2;                  /* HIT */
+
+#else
+    register struct sph_specific *sph =
+       (struct sph_specific *)stp->st_specific;
+    register struct seg *segp;
+
+    vect_t ov;         /* ray origin to center (V - P) */
+    fastf_t magsq_ov;  /* length squared of ov */
+    fastf_t b;         /* second term of quadratic eqn */
+    fastf_t root;              /* root of radical */
+
+    VSUB2(ov, sph->sph_V, rp->r_pt);
+    b = VDOT(rp->r_dir, ov);
+    magsq_ov = MAGSQ(ov);
+
+    if (magsq_ov >= sph->sph_radsq) {
+       /* ray origin is outside of sphere */
+       if (b < 0) {
+           /* ray direction is away from sphere */
+           return 0;           /* No hit */
+       }
+       root = b*b - magsq_ov + sph->sph_radsq;
+       if (root <= 0) {
+           /* no real roots */
+           return 0;           /* No hit */
+       }
+    } else {
+       root = b*b - magsq_ov + sph->sph_radsq;
+    }
+    root = sqrt(root);
+
+    RT_GET_SEG(segp, ap->a_resource);
+    segp->seg_stp = stp;
+
+    /* we know root is positive, so we know the smaller t */
+    segp->seg_in.hit_dist = b - root;
+    segp->seg_out.hit_dist = b + root;
+    segp->seg_in.hit_surfno = 0;
+    segp->seg_out.hit_surfno = 0;
+    BU_LIST_INSERT(&(seghead->l), &(segp->l));
+    return 2;                  /* HIT */
+#endif
+}
+
+
+#define RT_SPH_SEG_MISS(SEG)           (SEG).seg_stp=(struct soltab *) 0;
+/**
+ * R T _ S P H _ V S H O T
+ *
+ * This is the Becker vectorized version
+ */
+void
+rt_sph_vshot(struct soltab **stp, struct xray **rp, struct seg *segp, int n, 
struct application *ap)
+    /* An array of solid pointers */
+    /* An array of ray pointers */
+    /* array of segs (results returned) */
+    /* Number of ray/object pairs */
+
+{
+    register struct sph_specific *sph;
+    register int i;
+
+    vect_t ov;         /* ray origin to center (V - P) */
+    fastf_t magsq_ov;  /* length squared of ov */
+    fastf_t b;         /* second term of quadratic eqn */
+    fastf_t root;              /* root of radical */
+
+    if (ap) RT_CK_APPLICATION(ap);
+
+    /* for each ray/sphere pair */
+    for (i = 0; i < n; i++) {
+       if (stp[i] == 0) continue; /* stp[i] == 0 signals skip ray */
+
+       sph = (struct sph_specific *)stp[i]->st_specific;
+       VSUB2(ov, sph->sph_V, rp[i]->r_pt);
+       b = VDOT(rp[i]->r_dir, ov);
+       magsq_ov = MAGSQ(ov);
+
+       if (magsq_ov >= sph->sph_radsq) {
+           /* ray origin is outside of sphere */
+           if (b < 0) {
+               /* ray direction is away from sphere */
+               RT_SPH_SEG_MISS(segp[i]);               /* No hit */
+               continue;
+           }
+           root = b*b - magsq_ov + sph->sph_radsq;
+           if (root <= 0) {
+               /* no real roots */
+               RT_SPH_SEG_MISS(segp[i]);               /* No hit */
+               continue;
+           }
+       } else {
+           root = b*b - magsq_ov + sph->sph_radsq;
+       }
+       root = sqrt(root);
+
+       segp[i].seg_stp = stp[i];
+
+       /* we know root is positive, so we know the smaller t */
+       segp[i].seg_in.hit_dist = b - root;
+       segp[i].seg_out.hit_dist = b + root;
+       segp[i].seg_in.hit_surfno = 0;
+       segp[i].seg_out.hit_surfno = 0;
+    }
+}
+
+
+/**
+ * R T _ S P H _ N O R M
+ *
+ * Given ONE ray distance, return the normal and entry/exit point.
+ */
+void
+rt_sph_norm(register struct hit *hitp, struct soltab *stp, register struct 
xray *rp)
+{
+    register struct sph_specific *sph =
+       (struct sph_specific *)stp->st_specific;
+
+    VJOIN1(hitp->hit_point, rp->r_pt, hitp->hit_dist, rp->r_dir);
+    VSUB2(hitp->hit_normal, hitp->hit_point, sph->sph_V);
+    VSCALE(hitp->hit_normal, hitp->hit_normal, sph->sph_invrad);
+}
+
+
+/**
+ * R T _ S P H _ C U R V E
+ *
+ * Return the curvature of the sphere.
+ */
+void
+rt_sph_curve(register struct curvature *cvp, register struct hit *hitp, struct 
soltab *stp)
+{
+    register struct sph_specific *sph =
+       (struct sph_specific *)stp->st_specific;
+
+    cvp->crv_c1 = cvp->crv_c2 = - sph->sph_invrad;
+
+    /* any tangent direction */
+    bn_vec_ortho(cvp->crv_pdir, hitp->hit_normal);
+}
+
+
+/**
+ * R T _ S P H _ U V
+ *
+ * For a hit on the surface of an SPH, return the (u, v) coordinates
+ * of the hit point, 0 <= u, v <= 1.
+ *
+ * u = azimuth
+ * v = elevation
+ */
+void
+rt_sph_uv(struct application *ap, struct soltab *stp, register struct hit 
*hitp, register struct uvcoord *uvp)
+{
+    register struct sph_specific *sph =
+       (struct sph_specific *)stp->st_specific;
+    fastf_t r;
+    vect_t work;
+    vect_t pprime;
+
+    /* hit_point is on surface; project back to unit sphere, creating
+     * a vector from vertex to hit point which always has length=1.0
+     */
+    VSUB2(work, hitp->hit_point, sph->sph_V);
+    MAT4X3VEC(pprime, sph->sph_SoR, work);
+    /* Assert that pprime has unit length */
+
+    /* U is azimuth, atan() range: -pi to +pi */
+    uvp->uv_u = bn_atan2(pprime[Y], pprime[X]) * bn_inv2pi;
+    if (uvp->uv_u < 0)
+       uvp->uv_u += 1.0;
+    /*
+     * V is elevation, atan() range: -pi/2 to +pi/2, because sqrt()
+     * ensures that X parameter is always >0
+     */
+    uvp->uv_v = bn_atan2(pprime[Z],
+                        sqrt(pprime[X] * pprime[X] + pprime[Y] * pprime[Y])) *
+       bn_invpi + 0.5;
+
+    /* approximation: r / (circumference, 2 * pi * aradius) */
+    r = ap->a_rbeam + ap->a_diverge * hitp->hit_dist;
+    uvp->uv_du = uvp->uv_dv =
+       bn_inv2pi * r / stp->st_aradius;
+}
+
+
+/**
+ * R T _ S P H _ F R E E
+ */
+void
+rt_sph_free(register struct soltab *stp)
+{
+    register struct sph_specific *sph =
+       (struct sph_specific *)stp->st_specific;
+
+    BU_PUT(sph, struct sph_specific);
+}
+
+
+int
+rt_sph_class(void)
+{
+    return 0;
+}
+
+
+/**
+ * R T _ S P H _ P A R A M S
+ *
+ */
+int
+rt_sph_params(struct pc_pc_set *UNUSED(ps), const struct rt_db_internal *ip)
+{
+    if (ip) RT_CK_DB_INTERNAL(ip);
+
+    return 0;                  /* OK */
+}
+
+
+/* ELL versions are used for many of the callbacks */
+
+
+/*
+ * Local Variables:
+ * mode: C
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * c-file-style: "stroustrup"
+ * End:
+ * ex: shiftwidth=4 tabstop=8
+ */


Property changes on: brlcad/branches/opencl/src/librt/primitives/sph/mem_sph.c
___________________________________________________________________
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Modified: brlcad/branches/opencl/src/librt/primitives/sph/sph.c
===================================================================
--- brlcad/branches/opencl/src/librt/primitives/sph/sph.c       2013-08-12 
12:07:40 UTC (rev 56743)
+++ brlcad/branches/opencl/src/librt/primitives/sph/sph.c       2013-08-12 
15:17:39 UTC (rev 56744)
@@ -97,19 +97,19 @@
 
 
 const char * const clt_program_code = "\
-__kernel void ell_shot(__global float *output, float3 o, float3 l, float3 c, 
float r)\n\
+__kernel void ell_shot(__global double3 *output, double3 o, double3 l, double3 
c, double r)\n\
 {\n\
-    float A = dot(l, l);\n\
-    float B = 2*dot(l, o-c);\n\
-    float C = dot(o-c, o-c) - r*r;\n\
-    float disc = B*B - 4*A*C;\n\
+    double A = dot(l, l);\n\
+    double B = 2*dot(l, o-c);\n\
+    double C = dot(o-c, o-c) - r*r;\n\
+    double disc = B*B - 4*A*C;\n\
 \n\
     if (disc <= 0) return;\n\
 \n\
-    float q = B < 0 ? (-B + sqrt(disc))/2 : (-B - sqrt(disc))/2;\n\
+    double q = B < 0 ? (-B + sqrt(disc))/2 : (-B - sqrt(disc))/2;\n\
     \n\
-    output[0] = q/A;\n\
-    output[1] = C/q;\n\
+    (*output)[0] = q/A;\n\
+    (*output)[1] = C/q;\n\
 }\n\
 \n\
 ";
@@ -187,25 +187,25 @@
 }
 
 
-static cl_float3
-clt_shot(cl_float3 o /* ray origin */, cl_float3 l /* ray direction (unit 
vector) */,
-cl_float3 c /* sphere center */, cl_float r /* sphere radius */)
+static cl_double3
+clt_shot(cl_double3 o /* ray origin */, cl_double3 l /* ray direction (unit 
vector) */,
+cl_double3 c /* sphere center */, cl_double r /* sphere radius */)
 {
     cl_int error;
     cl_mem output;
-    cl_float3 result;
+    cl_double3 result;
     const size_t global_size = 1;
 
     VSET(result.s, 0, 0, 0);
     output = clCreateBuffer(clt_context, CL_MEM_USE_HOST_PTR | 
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
-           sizeof(cl_float3), &result, &error);
+           sizeof(cl_double3), &result, &error);
     if (error != CL_SUCCESS) bu_bomb("failed to create OpenCL output buffer");
 
     error = clSetKernelArg(clt_kernel, 0, sizeof(cl_mem), &output);
-    error |= clSetKernelArg(clt_kernel, 1, sizeof(cl_float3), &o);
-    error |= clSetKernelArg(clt_kernel, 2, sizeof(cl_float3), &l);
-    error |= clSetKernelArg(clt_kernel, 3, sizeof(cl_float3), &c);
-    error |= clSetKernelArg(clt_kernel, 4, sizeof(cl_float), &r);
+    error |= clSetKernelArg(clt_kernel, 1, sizeof(cl_double3), &o);
+    error |= clSetKernelArg(clt_kernel, 2, sizeof(cl_double3), &l);
+    error |= clSetKernelArg(clt_kernel, 3, sizeof(cl_double3), &c);
+    error |= clSetKernelArg(clt_kernel, 4, sizeof(cl_double), &r);
     if (error != CL_SUCCESS) bu_bomb("failed to set OpenCL kernel arguments");
 
     error = clEnqueueNDRangeKernel(clt_queue, clt_kernel, 1, NULL, 
&global_size, NULL, 0, NULL, NULL);
@@ -370,11 +370,11 @@
 rt_sph_shot(struct soltab *stp, register struct xray *rp, struct application 
*ap, struct seg *seghead)
 {
 #ifdef OPENCL
-    cl_float3 o; /* ray origin  */
-    cl_float3 l; /* ray direction (unit vector) */
-    cl_float3 c; /* sphere center  */
-    cl_float r; /* sphere radius */
-    cl_float3 result;
+    cl_double3 o; /* ray origin  */
+    cl_double3 l; /* ray direction (unit vector) */
+    cl_double3 c; /* sphere center  */
+    cl_double r; /* sphere radius */
+    cl_double3 result;
     struct seg *segp;
 
     (void)rp; (void)ap;

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Get 100% visibility into Java/.NET code with AppDynamics Lite!
It's a free troubleshooting tool designed for production.
Get down to code-level detail for bottlenecks, with <2% overhead. 
Download for free and get started troubleshooting in minutes. 
http://pubads.g.doubleclick.net/gampad/clk?id=48897031&iu=/4140/ostg.clktrk
_______________________________________________
BRL-CAD Source Commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/brlcad-commits

Reply via email to