Re: [brlcad-devel] GSoC 2016 - BRL-CAD

Param Hanji Thu, 16 Jun 2016 04:32:56 -0700

Hi,

I tried to call boolweave and boolfinal methods using segments generated on
the GPU. I'm pretty sure the weaving process works fine, but boolfinal()
not so much. It returns an empty partition for every pixel, which I
confirmed using bu_log() statements.


I'll attach some code. It'd be great if someone could figure out where I'm
going wrong.

Thanks.
Param


On Wed, Jun 8, 2016 at 4:32 PM Vasco Alexandre da Silva Costa <
vasco.co...@gmail.com> wrote:

> On Wed, Jun 8, 2016 at 8:04 AM, Param Hanji <param.catchch...@gmail.com>
> wrote:
>
>> Another thing I realized is that while attempting to call boolweave() and
>> boolfinal(), I'm replicating a lot of the code written in librt/shoot.c.
>> This is because the structures used by GPU and CPU storage of segments are
>> a little different (CPU storage of course is more elaborate). The soltab
>> pointer in CPU seg structure is proving to be specially troublesome. Is it
>> vital that I replicate everything? And is there a more elegant way of
>> achieving this?
>>
>
> Only replicate what's necessary to get the boolean weaving to work. We
> don't want to reimplement the whole rendering pipeline in a summer. Believe
> me I tried. :-)
>
> --
> Vasco Alexandre da Silva Costa
> PhD in Computer Engineering (Computer Graphics)
> Instituto Superior Técnico/University of Lisbon, Portugal
>
> ------------------------------------------------------------------------------
> What NetFlow Analyzer can do for you? Monitors network bandwidth and
> traffic
> patterns at an interface-level. Reveals which users, apps, and protocols
> are
> consuming the most bandwidth. Provides multi-vendor support for NetFlow,
> J-Flow, sFlow and other flows. Make informed decisions using capacity
> planning reports. https://ad.doubleclick.net/ddm/clk/305295220;132659582;e
> _______________________________________________
> BRL-CAD Developer mailing list
> brlcad-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/brlcad-devel
>

Index: src/librt/primitives/primitive_util.c
===================================================================
--- src/librt/primitives/primitive_util.c	(revision 68060)
+++ src/librt/primitives/primitive_util.c	(working copy)
@@ -29,6 +29,7 @@
 #include "optical.h"
 #include "../librt_private.h"
 #include "optical/plastic.h"
+#include <stdbool.h>
 
 /**
  * If only the absolute tolerance is valid (positive), it is returned.
@@ -812,10 +813,10 @@
 
 void
 clt_frame(void *pixels, uint8_t o[3], int cur_pixel, int last_pixel,
-	  int width, int ibackground[3], int inonbackground[3],
+	  int width, int height, int ibackground[3], int inonbackground[3],
 	  double airdensity, double haze[3], fastf_t gamma,
           mat_t view2model, fastf_t cell_width, fastf_t cell_height,
-          fastf_t aspect, int lightmodel)
+          fastf_t aspect, int lightmodel, struct application *ap)
 {
     const size_t npix = last_pixel-cur_pixel+1;
 
@@ -882,6 +883,7 @@
 	    size_t sz_segs;
 	    cl_mem psegs;
 	    size_t snpix = swxh[0]*swxh[1];
+	    struct cl_seg *segs;
 
 	    sz_counts = sizeof(cl_int)*npix;
 	    pcounts = clCreateBuffer(clt_context, CL_MEM_WRITE_ONLY|CL_MEM_HOST_READ_ONLY, sz_counts, NULL, &error);
@@ -914,10 +916,12 @@
 	    sz_h = sizeof(cl_uint)*(npix+1);
 	    h = (cl_uint*)bu_calloc(1, sz_h, "h");
 	    h[0] = 0;
+
 	    for (i=1; i<=npix; i++) {
 		BU_ASSERT((counts[i-1] % 2) == 0);
 		h[i] = h[i-1] + counts[i-1]/2;	/* number of segs is half the number of hits */
 	    }
+
 	    bu_free(counts, "counts");
 
 	    ph = clCreateBuffer(clt_context, CL_MEM_READ_ONLY|CL_MEM_HOST_WRITE_ONLY|CL_MEM_COPY_HOST_PTR, sz_h, h, &error);
@@ -924,7 +928,7 @@
 	    if (error != CL_SUCCESS) bu_bomb("failed to create OpenCL offs buffer");
 
 	    sz_segs = sizeof(struct cl_seg)*h[npix];
-	    bu_free(h, "h");
+	    segs = (struct cl_seg*)bu_malloc(sz_segs, "segments on gpu");
 
 	    if (sz_segs != 0) {
 		psegs = clCreateBuffer(clt_context, CL_MEM_READ_WRITE|CL_MEM_HOST_NO_ACCESS, sz_segs, NULL, &error);
@@ -948,10 +952,13 @@
 		if (error != CL_SUCCESS) bu_bomb("failed to set OpenCL kernel arguments");
 		error = clEnqueueNDRangeKernel(clt_queue, clt_store_segs_kernel, 2, NULL, wxh,
 			swxh, 0, NULL, NULL);
+		clEnqueueReadBuffer(clt_queue, psegs, CL_TRUE, 0, sz_segs, segs, 0, NULL, NULL);
+		clt_boolweave_and_boolfinal(segs, ap, h, width, height);
 		bu_semaphore_release(clt_semaphore);
             } else {
 		psegs = NULL;
             }
+	    bu_free(h, "h");
 
 	    bu_semaphore_acquire(clt_semaphore);
 	    error = clSetKernelArg(clt_shade_segs_kernel, 0, sizeof(cl_mem), &ppixels);
@@ -1024,6 +1031,179 @@
     clEnqueueReadBuffer(clt_queue, ppixels, CL_TRUE, 0, sz_pixels, pixels, 0, NULL, NULL);
     clReleaseMemObject(ppixels);
 }
+
+void
+copy_to_CPU(struct seg *cpu_seg, struct cl_seg *gpu_seg)
+{
+    cpu_seg->seg_in.hit_normal[X] = gpu_seg->seg_in.hit_normal.x;
+    cpu_seg->seg_in.hit_normal[Y] = gpu_seg->seg_in.hit_normal.y;
+    cpu_seg->seg_in.hit_normal[Z] = gpu_seg->seg_in.hit_normal.z;
+
+    cpu_seg->seg_in.hit_point[X] = gpu_seg->seg_in.hit_point.x;
+    cpu_seg->seg_in.hit_point[Y] = gpu_seg->seg_in.hit_point.y;
+    cpu_seg->seg_in.hit_point[Z] = gpu_seg->seg_in.hit_point.z;
+
+    cpu_seg->seg_in.hit_vpriv[X] = gpu_seg->seg_in.hit_vpriv.x;
+    cpu_seg->seg_in.hit_vpriv[Y] = gpu_seg->seg_in.hit_vpriv.y;
+    cpu_seg->seg_in.hit_vpriv[Z] = gpu_seg->seg_in.hit_vpriv.z;
+
+    cpu_seg->seg_in.hit_dist = gpu_seg->seg_in.hit_dist;
+    cpu_seg->seg_in.hit_surfno = gpu_seg->seg_in.hit_surfno;
+
+
+    cpu_seg->seg_out.hit_normal[X] = gpu_seg->seg_out.hit_normal.x;
+    cpu_seg->seg_out.hit_normal[Y] = gpu_seg->seg_out.hit_normal.y;
+    cpu_seg->seg_out.hit_normal[Z] = gpu_seg->seg_out.hit_normal.z;
+
+    cpu_seg->seg_out.hit_point[X] = gpu_seg->seg_out.hit_point.x;
+    cpu_seg->seg_out.hit_point[Y] = gpu_seg->seg_out.hit_point.y;
+    cpu_seg->seg_out.hit_point[Z] = gpu_seg->seg_out.hit_point.z;
+
+    cpu_seg->seg_out.hit_vpriv[X] = gpu_seg->seg_out.hit_vpriv.x;
+    cpu_seg->seg_out.hit_vpriv[Y] = gpu_seg->seg_out.hit_vpriv.y;
+    cpu_seg->seg_out.hit_vpriv[Z] = gpu_seg->seg_out.hit_vpriv.z;
+
+    cpu_seg->seg_out.hit_dist = gpu_seg->seg_out.hit_dist;
+    cpu_seg->seg_out.hit_surfno = gpu_seg->seg_out.hit_surfno;
+
+    if (gpu_seg->seg_sti)
+	cpu_seg->seg_stp->st_id = gpu_seg->seg_sti;
+}
+
+void
+copy_to_GPU(struct cl_seg *gpu_seg, struct partition *cpu_part)
+{
+    gpu_seg->seg_in.hit_normal.x = cpu_part->pt_inhit->hit_normal[X];
+    gpu_seg->seg_in.hit_normal.y = cpu_part->pt_inhit->hit_normal[Y];
+    gpu_seg->seg_in.hit_normal.z = cpu_part->pt_inhit->hit_normal[Z];
+
+    gpu_seg->seg_in.hit_point.x = cpu_part->pt_inhit->hit_point[X];
+    gpu_seg->seg_in.hit_point.y = cpu_part->pt_inhit->hit_point[Y];
+    gpu_seg->seg_in.hit_point.z = cpu_part->pt_inhit->hit_point[Z];
+
+    gpu_seg->seg_in.hit_vpriv.x = cpu_part->pt_inhit->hit_vpriv[X];
+    gpu_seg->seg_in.hit_vpriv.y = cpu_part->pt_inhit->hit_vpriv[Y];
+    gpu_seg->seg_in.hit_vpriv.z = cpu_part->pt_inhit->hit_vpriv[Z];
+
+    gpu_seg->seg_in.hit_dist = cpu_part->pt_inhit->hit_dist;
+    gpu_seg->seg_in.hit_surfno = cpu_part->pt_inhit->hit_surfno;
+
+
+    gpu_seg->seg_out.hit_normal.x = cpu_part->pt_outhit->hit_normal[X];
+    gpu_seg->seg_out.hit_normal.y = cpu_part->pt_outhit->hit_normal[Y];
+    gpu_seg->seg_out.hit_normal.z = cpu_part->pt_outhit->hit_normal[Z];
+
+    gpu_seg->seg_out.hit_point.x = cpu_part->pt_outhit->hit_point[X];
+    gpu_seg->seg_out.hit_point.y = cpu_part->pt_outhit->hit_point[Y];
+    gpu_seg->seg_out.hit_point.z = cpu_part->pt_outhit->hit_point[Z];
+
+    gpu_seg->seg_out.hit_vpriv.x = cpu_part->pt_outhit->hit_vpriv[X];
+    gpu_seg->seg_out.hit_vpriv.y = cpu_part->pt_outhit->hit_vpriv[Y];
+    gpu_seg->seg_out.hit_vpriv.z = cpu_part->pt_outhit->hit_vpriv[Z];
+
+    gpu_seg->seg_out.hit_dist = cpu_part->pt_outhit->hit_dist;
+    gpu_seg->seg_out.hit_surfno = cpu_part->pt_outhit->hit_surfno;
+}
+
+void
+clt_boolweave_and_boolfinal(struct cl_seg *segs, struct application *ap,
+			    cl_uint *h, int width, int height)
+{
+    size_t i, j;
+    size_t x = 0;
+
+    for (i = 0; i < (size_t)(width * height); i++) {
+	struct seg waiting_segs;
+	struct seg finished_segs;
+	struct seg *temp_seg;
+	struct partition initial_part;
+	struct partition final_part;
+	struct bu_ptbl *regionbits;
+	struct resource *resp;
+	struct bu_bitv *solidbits;
+	struct rt_i *rtip;
+	struct partition *pp;
+	bool weave = false;
+
+	RT_AP_CHECK(ap);
+	if (ap->a_magic) {
+	    RT_CK_AP(ap);
+	}
+	else {
+	    ap->a_magic = RT_AP_MAGIC;
+	}
+	if (ap->a_ray.magic) {
+	    RT_CK_RAY(&(ap->a_ray));
+	}
+	else {
+	    ap->a_ray.magic = RT_RAY_MAGIC;
+	}
+	if (ap->a_resource == RESOURCE_NULL) {
+	    ap->a_resource = &rt_uniresource;
+	    if (rt_uniresource.re_magic == 0)
+		rt_init_resource(&rt_uniresource, 0, ap->a_rt_i);
+	}
+
+	resp = ap->a_resource;
+	RT_CK_RESOURCE(resp);
+
+	rtip = ap->a_rt_i;
+	if (rtip->needprep)
+	    rt_prep_parallel(rtip, 1);	/* Stay on our CPU */
+
+	if (!resp->re_pieces && rtip->rti_nsolids_with_pieces > 0) {
+	    /* Initialize this processors 'solid pieces' state */
+	    rt_res_pieces_init(resp, rtip);
+	}
+
+	solidbits = rt_get_solidbitv(ap->a_rt_i->nsolids, resp);
+
+	initial_part.pt_forw = initial_part.pt_back = &initial_part;
+	initial_part.pt_magic = PT_HD_MAGIC;
+	final_part.pt_forw = final_part.pt_back = &final_part;
+	final_part.pt_magic = PT_HD_MAGIC;
+	ap->a_Final_Part_hdp = &final_part;
+
+	BU_LIST_INIT(&waiting_segs.l);
+	BU_LIST_INIT(&finished_segs.l);
+
+	ap->a_finished_segs_hdp = &finished_segs;
+
+	if (BU_LIST_IS_EMPTY(&resp->re_region_ptbl)) {
+	    BU_ALLOC(regionbits, struct bu_ptbl);
+	    bu_ptbl_init(regionbits, 7, "rt_shootray() regionbits ptbl");
+	} else {
+	    regionbits = BU_LIST_FIRST(bu_ptbl, &resp->re_region_ptbl);
+	    BU_LIST_DEQUEUE(&regionbits->l);
+	    BU_CK_PTBL(regionbits);
+	}
+
+	for (j = h[i]; j < h[i+1]; j++) {
+	    RT_GET_SEG(temp_seg, ap->a_resource);
+    	    copy_to_CPU(temp_seg, &segs[j]);
+	    BU_LIST_INSERT(&(waiting_segs.l), &(temp_seg->l));
+	}
+
+	if (BU_LIST_NON_EMPTY(&(waiting_segs.l))) {
+	    rt_boolweave(&finished_segs, &waiting_segs, &initial_part, ap);
+	    weave = true;
+	}
+
+	if (weave)
+	    (void)rt_boolfinal(&initial_part, &final_part, BACKING_DIST,
+			       INFINITY, regionbits, ap, solidbits);
+
+	for (BU_LIST_FOR(pp, partition, (struct bu_list *)&final_part)) {
+	    copy_to_GPU(&segs[x], pp);	/* This loop is never entered */
+	    h[x] = i;			/* Code to copy evaluated segments back to GPU */
+	    x++;
+	}
+
+	RT_FREE_PT_LIST(&initial_part, resp);
+	RT_FREE_SEG_LIST(&finished_segs, resp);
+	RT_FREE_PT_LIST(&final_part, resp);
+    }
+}
 #endif
 
 
Index: src/librt/bool.c
===================================================================
--- src/librt/bool.c	(revision 68060)
+++ src/librt/bool.c	(working copy)
@@ -174,9 +174,13 @@
 	segp = BU_LIST_FIRST(seg, &(in_hd->l));
 	RT_CHECK_SEG(segp);
 	RT_CK_HIT(&(segp->seg_in));
+#ifndef USE_OPENCL
 	RT_CK_RAY(segp->seg_in.hit_rayp);
+#endif
 	RT_CK_HIT(&(segp->seg_out));
+#ifndef USE_OPENCL
 	RT_CK_RAY(segp->seg_out.hit_rayp);
+#endif
 	if (RT_G_DEBUG&DEBUG_PARTITION) {
 	    point_t pt;
 
@@ -193,9 +197,10 @@
 	    VPRINT(" OPoint", pt);
 	    bu_log("***********\n");
 	}
+#ifndef USE_OPENCL
 	if ((size_t)segp->seg_stp->st_bit >= rtip->nsolids)
 	    bu_bomb("rt_boolweave: st_bit");
-
+#endif	
 	BU_LIST_DEQUEUE(&(segp->l));
 	BU_LIST_INSERT(&(out_hd->l), &(segp->l));
 
@@ -209,6 +214,7 @@
 	if (segp->seg_out.hit_dist < -10.0)
 	    continue;
 
+#ifndef USE_OPENCL
 	if (segp->seg_stp->st_aradius < INFINITY &&
 	    !(segp->seg_in.hit_dist >= -INFINITY &&
 	      segp->seg_out.hit_dist <= INFINITY)) {
@@ -231,7 +237,7 @@
 		   segp->seg_out.hit_surfno);
 	    continue;
 	}
-
+#endif
 	diff = segp->seg_in.hit_dist - segp->seg_out.hit_dist;
 
 	/*

------------------------------------------------------------------------------
What NetFlow Analyzer can do for you? Monitors network bandwidth and traffic
patterns at an interface-level. Reveals which users, apps, and protocols are 
consuming the most bandwidth. Provides multi-vendor support for NetFlow, 
J-Flow, sFlow and other flows. Make informed decisions using capacity planning
reports. http://pubads.g.doubleclick.net/gampad/clk?id=1444514421&iu=/41014381

_______________________________________________
BRL-CAD Developer mailing list
brlcad-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/brlcad-devel

Re: [brlcad-devel] GSoC 2016 - BRL-CAD

Reply via email to