On Mon, Jul 4, 2016 at 9:52 PM Vasco Alexandre da Silva Costa <
vasco.co...@gmail.com> wrote:

> On Mon, Jul 4, 2016 at 12:30 PM, Param Hanji <param.catchch...@gmail.com>
> wrote:
>
>> Hi Vasco,
>>
>> I tried everything and still no luck. I'm pretty sure just copying inhits
>> and outhits is not enough. Going by Sean's explanation of segments and
>> partitions, it looks like I have no option but to implement partitions on
>> the GPU.
>>
>
> IIRC the code doesn't use partitions before calling rt_bool_*. Only
> segments. So I don't see how this is a problem.
> Have you examined the output lists in all stages?
>
>
>
Well to check if the bool functions are working as expected, I inserted
code to print number of finalPartitions (after weave and boolfinal) for
every pixel. I did this in shoot.c and primitive_util.c as well. Then I ran
rt in serial as well as OpenCL mode.

The results were very different. When boolean operations like subtraction
were used, number of final partitions was 0 for every pixel. In ANSI C,
this was not the case and partitions were indeed generated. This is why I
think I'm missing something or doing something wrong.

Here's a patch i just generated.
Index: include/rt/shoot.h
===================================================================
--- include/rt/shoot.h	(revision 68231)
+++ include/rt/shoot.h	(working copy)
@@ -178,10 +178,10 @@
 
 RT_EXPORT extern void
 clt_frame(void *pixels, uint8_t o[3], int cur_pixel, int last_pixel,
-	  int width, int ibackground[3], int inonbackground[3],
+	  int width, int height, int ibackground[3], int inonbackground[3],
 	  double airdensity, double haze[3], fastf_t gamma,
           mat_t view2model, fastf_t cell_width, fastf_t cell_height,
-          fastf_t aspect, int lightmodel);
+          fastf_t aspect, int lightmodel, struct application *ap);
 #endif
 
 
Index: src/librt/bool.c
===================================================================
--- src/librt/bool.c	(revision 68231)
+++ src/librt/bool.c	(working copy)
@@ -174,9 +174,13 @@
 	segp = BU_LIST_FIRST(seg, &(in_hd->l));
 	RT_CHECK_SEG(segp);
 	RT_CK_HIT(&(segp->seg_in));
+#ifndef USE_OPENCL
 	RT_CK_RAY(segp->seg_in.hit_rayp);
+#endif
 	RT_CK_HIT(&(segp->seg_out));
+#ifndef USE_OPENCL
 	RT_CK_RAY(segp->seg_out.hit_rayp);
+#endif
 	if (RT_G_DEBUG&DEBUG_PARTITION) {
 	    point_t pt;
 
Index: src/librt/primitives/primitive_util.c
===================================================================
--- src/librt/primitives/primitive_util.c	(revision 68231)
+++ src/librt/primitives/primitive_util.c	(working copy)
@@ -24,6 +24,8 @@
  * librt_private.h.
  */
 
+#include <stdbool.h>
+
 #include "bu/malloc.h"
 #include "bu/opt.h"
 #include "optical.h"
@@ -814,10 +816,10 @@
 
 void
 clt_frame(void *pixels, uint8_t o[3], int cur_pixel, int last_pixel,
-	  int width, int ibackground[3], int inonbackground[3],
+	  int width, int height, int ibackground[3], int inonbackground[3],
 	  double airdensity, double haze[3], fastf_t gamma,
           mat_t view2model, fastf_t cell_width, fastf_t cell_height,
-          fastf_t aspect, int lightmodel)
+          fastf_t aspect, int lightmodel, struct application *ap)
 {
     const size_t npix = last_pixel-cur_pixel+1;
 
@@ -884,6 +886,7 @@
 	    size_t sz_segs;
 	    cl_mem psegs;
 	    size_t snpix = swxh[0]*swxh[1];
+	    struct cl_seg *segs;
 
 	    sz_counts = sizeof(cl_int)*npix;
 	    pcounts = clCreateBuffer(clt_context, CL_MEM_WRITE_ONLY|CL_MEM_HOST_READ_ONLY, sz_counts, NULL, &error);
@@ -916,10 +919,12 @@
 	    sz_h = sizeof(cl_uint)*(npix+1);
 	    h = (cl_uint*)bu_calloc(1, sz_h, "h");
 	    h[0] = 0;
+
 	    for (i=1; i<=npix; i++) {
 		BU_ASSERT((counts[i-1] % 2) == 0);
 		h[i] = h[i-1] + counts[i-1]/2;	/* number of segs is half the number of hits */
 	    }
+
 	    bu_free(counts, "counts");
 
 	    ph = clCreateBuffer(clt_context, CL_MEM_READ_ONLY|CL_MEM_HOST_WRITE_ONLY|CL_MEM_COPY_HOST_PTR, sz_h, h, &error);
@@ -926,10 +931,10 @@
 	    if (error != CL_SUCCESS) bu_bomb("failed to create OpenCL offs buffer");
 
 	    sz_segs = sizeof(struct cl_seg)*h[npix];
-	    bu_free(h, "h");
+	    segs = (struct cl_seg*)bu_malloc(sz_segs, "segments on gpu");
 
 	    if (sz_segs != 0) {
-		psegs = clCreateBuffer(clt_context, CL_MEM_READ_WRITE|CL_MEM_HOST_NO_ACCESS, sz_segs, NULL, &error);
+		psegs = clCreateBuffer(clt_context, CL_MEM_READ_WRITE, sz_segs, NULL, &error);
 		if (error != CL_SUCCESS) bu_bomb("failed to create OpenCL segs buffer");
 
 		bu_semaphore_acquire(clt_semaphore);
@@ -950,10 +955,17 @@
 		if (error != CL_SUCCESS) bu_bomb("failed to set OpenCL kernel arguments");
 		error = clEnqueueNDRangeKernel(clt_queue, clt_store_segs_kernel, 2, NULL, wxh,
 			swxh, 0, NULL, NULL);
+		error = clEnqueueReadBuffer(clt_queue, psegs, CL_TRUE, 0, sz_segs, segs, 0, NULL, NULL);
+		if (error != CL_SUCCESS) bu_bomb("failed to read OpenCL segs buffer");
+		clt_boolweave_and_boolfinal(segs, ap, h, width, height);
+		error = clEnqueueWriteBuffer(clt_queue, psegs, CL_TRUE, 0, sz_segs, segs, 0, NULL, NULL);
+		if (error != CL_SUCCESS) bu_bomb("failed to write into OpenCL segs buffer");
+
 		bu_semaphore_release(clt_semaphore);
             } else {
 		psegs = NULL;
             }
+	    bu_free(h, "h");
 
 	    bu_semaphore_acquire(clt_semaphore);
 	    error = clSetKernelArg(clt_shade_segs_kernel, 0, sizeof(cl_mem), &ppixels);
@@ -1026,6 +1038,218 @@
     clEnqueueReadBuffer(clt_queue, ppixels, CL_TRUE, 0, sz_pixels, pixels, 0, NULL, NULL);
     clReleaseMemObject(ppixels);
 }
+
+void
+copy_to_CPU(struct seg *cpu_seg, struct cl_seg *gpu_seg, struct application a)
+{
+    struct soltab *stp;
+    unsigned int i = 0;
+
+    cpu_seg->seg_in.hit_normal[X] = gpu_seg->seg_in.hit_normal.x;
+    cpu_seg->seg_in.hit_normal[Y] = gpu_seg->seg_in.hit_normal.y;
+    cpu_seg->seg_in.hit_normal[Z] = gpu_seg->seg_in.hit_normal.z;
+
+    cpu_seg->seg_in.hit_point[X] = gpu_seg->seg_in.hit_point.x;
+    cpu_seg->seg_in.hit_point[Y] = gpu_seg->seg_in.hit_point.y;
+    cpu_seg->seg_in.hit_point[Z] = gpu_seg->seg_in.hit_point.z;
+
+    cpu_seg->seg_in.hit_vpriv[X] = gpu_seg->seg_in.hit_vpriv.x;
+    cpu_seg->seg_in.hit_vpriv[Y] = gpu_seg->seg_in.hit_vpriv.y;
+    cpu_seg->seg_in.hit_vpriv[Z] = gpu_seg->seg_in.hit_vpriv.z;
+
+    cpu_seg->seg_in.hit_dist = gpu_seg->seg_in.hit_dist;
+    cpu_seg->seg_in.hit_surfno = gpu_seg->seg_in.hit_surfno;
+
+
+    cpu_seg->seg_out.hit_normal[X] = gpu_seg->seg_out.hit_normal.x;
+    cpu_seg->seg_out.hit_normal[Y] = gpu_seg->seg_out.hit_normal.y;
+    cpu_seg->seg_out.hit_normal[Z] = gpu_seg->seg_out.hit_normal.z;
+
+    cpu_seg->seg_out.hit_point[X] = gpu_seg->seg_out.hit_point.x;
+    cpu_seg->seg_out.hit_point[Y] = gpu_seg->seg_out.hit_point.y;
+    cpu_seg->seg_out.hit_point[Z] = gpu_seg->seg_out.hit_point.z;
+
+    cpu_seg->seg_out.hit_vpriv[X] = gpu_seg->seg_out.hit_vpriv.x;
+    cpu_seg->seg_out.hit_vpriv[Y] = gpu_seg->seg_out.hit_vpriv.y;
+    cpu_seg->seg_out.hit_vpriv[Z] = gpu_seg->seg_out.hit_vpriv.z;
+
+    cpu_seg->seg_out.hit_dist = gpu_seg->seg_out.hit_dist;
+    cpu_seg->seg_out.hit_surfno = gpu_seg->seg_out.hit_surfno;
+
+    RT_VISIT_ALL_SOLTABS_START(stp, a.a_rt_i) {
+	/* Ignore "dead" solids in the list.  (They failed prep) */
+	if (stp->st_aradius <= 0) continue;
+	/* Infinite solids make the BVH construction explode. */
+	if (stp->st_aradius >= INFINITY) continue;
+
+	if (i == gpu_seg->seg_sti) {
+	    cpu_seg->seg_stp = stp;
+	    break;
+	}
+	i++;
+    } RT_VISIT_ALL_SOLTABS_END
+}
+
+void
+copy_to_GPU(struct cl_seg *gpu_seg, struct partition *cpu_part, struct application a)
+{
+    struct soltab *stp;
+    struct seg **segpp;
+    unsigned int i = 0;
+
+    gpu_seg->seg_in.hit_normal.x = cpu_part->pt_inhit->hit_normal[X];
+    gpu_seg->seg_in.hit_normal.y = cpu_part->pt_inhit->hit_normal[Y];
+    gpu_seg->seg_in.hit_normal.z = cpu_part->pt_inhit->hit_normal[Z];
+
+    gpu_seg->seg_in.hit_point.x = cpu_part->pt_inhit->hit_point[X];
+    gpu_seg->seg_in.hit_point.y = cpu_part->pt_inhit->hit_point[Y];
+    gpu_seg->seg_in.hit_point.z = cpu_part->pt_inhit->hit_point[Z];
+
+    gpu_seg->seg_in.hit_vpriv.x = cpu_part->pt_inhit->hit_vpriv[X];
+    gpu_seg->seg_in.hit_vpriv.y = cpu_part->pt_inhit->hit_vpriv[Y];
+    gpu_seg->seg_in.hit_vpriv.z = cpu_part->pt_inhit->hit_vpriv[Z];
+
+    gpu_seg->seg_in.hit_dist = cpu_part->pt_inhit->hit_dist;
+    gpu_seg->seg_in.hit_surfno = cpu_part->pt_inhit->hit_surfno;
+
+
+    gpu_seg->seg_out.hit_normal.x = cpu_part->pt_outhit->hit_normal[X];
+    gpu_seg->seg_out.hit_normal.y = cpu_part->pt_outhit->hit_normal[Y];
+    gpu_seg->seg_out.hit_normal.z = cpu_part->pt_outhit->hit_normal[Z];
+
+    gpu_seg->seg_out.hit_point.x = cpu_part->pt_outhit->hit_point[X];
+    gpu_seg->seg_out.hit_point.y = cpu_part->pt_outhit->hit_point[Y];
+    gpu_seg->seg_out.hit_point.z = cpu_part->pt_outhit->hit_point[Z];
+
+    gpu_seg->seg_out.hit_vpriv.x = cpu_part->pt_outhit->hit_vpriv[X];
+    gpu_seg->seg_out.hit_vpriv.y = cpu_part->pt_outhit->hit_vpriv[Y];
+    gpu_seg->seg_out.hit_vpriv.z = cpu_part->pt_outhit->hit_vpriv[Z];
+
+    gpu_seg->seg_out.hit_dist = cpu_part->pt_outhit->hit_dist;
+    gpu_seg->seg_out.hit_surfno = cpu_part->pt_outhit->hit_surfno;
+
+    RT_VISIT_ALL_SOLTABS_START(stp, a.a_rt_i) {
+	/* Ignore "dead" solids in the list.  (They failed prep) */
+	if (stp->st_aradius <= 0) continue;
+	/* Infinite solids make the BVH construction explode. */
+	if (stp->st_aradius >= INFINITY) continue;
+
+	for (BU_PTBL_FOR(segpp, (struct seg **), &cpu_part->pt_seglist))
+	    if ((*segpp)->seg_stp == stp) {
+		gpu_seg->seg_sti = i;
+		break;
+	    }
+	i++;
+    } RT_VISIT_ALL_SOLTABS_END
+}
+
+void
+clt_boolweave_and_boolfinal(struct cl_seg *segs, struct application *ap,
+			    cl_uint *h, int width, int height)
+{
+    size_t i, j;
+    size_t x, out = 0;
+    FILE *file;
+
+    for (i = 0; i < (size_t)(width * height); i++) {
+	struct seg waiting_segs;
+	struct seg finished_segs;
+	struct seg *temp_seg;
+	struct partition initial_part;
+	struct partition final_part;
+	struct bu_ptbl *regionbits;
+	struct resource *resp;
+	struct bu_bitv *solidbits;
+	struct rt_i *rtip;
+	struct partition *pp;
+	bool weave;
+
+	RT_AP_CHECK(ap);
+	if (ap->a_magic) {
+	    RT_CK_AP(ap);
+	}
+	else {
+	    ap->a_magic = RT_AP_MAGIC;
+	}
+	if (ap->a_ray.magic) {
+	    RT_CK_RAY(&(ap->a_ray));
+	}
+	else {
+	    ap->a_ray.magic = RT_RAY_MAGIC;
+	}
+	if (ap->a_resource == RESOURCE_NULL) {
+	    ap->a_resource = &rt_uniresource;
+	    if (rt_uniresource.re_magic == 0)
+		rt_init_resource(&rt_uniresource, 0, ap->a_rt_i);
+	}
+
+	resp = ap->a_resource;
+	RT_CK_RESOURCE(resp);
+
+	rtip = ap->a_rt_i;
+	if (rtip->needprep)
+	    rt_prep_parallel(rtip, 1);	/* Stay on our CPU */
+
+	if (!resp->re_pieces && rtip->rti_nsolids_with_pieces > 0) {
+	    /* Initialize this processors 'solid pieces' state */
+	    rt_res_pieces_init(resp, rtip);
+	}
+
+	solidbits = rt_get_solidbitv(ap->a_rt_i->nsolids, resp);
+
+	initial_part.pt_forw = initial_part.pt_back = &initial_part;
+	initial_part.pt_magic = PT_HD_MAGIC;
+	final_part.pt_forw = final_part.pt_back = &final_part;
+	final_part.pt_magic = PT_HD_MAGIC;
+	ap->a_Final_Part_hdp = &final_part;
+
+	BU_LIST_INIT(&waiting_segs.l);
+	BU_LIST_INIT(&finished_segs.l);
+
+	ap->a_finished_segs_hdp = &finished_segs;
+
+	if (BU_LIST_IS_EMPTY(&resp->re_region_ptbl)) {
+	    BU_ALLOC(regionbits, struct bu_ptbl);
+	    bu_ptbl_init(regionbits, 7, "rt_shootray() regionbits ptbl");
+	} else {
+	    regionbits = BU_LIST_FIRST(bu_ptbl, &resp->re_region_ptbl);
+	    BU_LIST_DEQUEUE(&regionbits->l);
+	    BU_CK_PTBL(regionbits);
+	}
+
+	for (j = h[i]; j < h[i+1]; j++) {
+	    RT_GET_SEG(temp_seg, ap->a_resource);
+    	    copy_to_CPU(temp_seg, &segs[j], *ap);
+	    BU_LIST_INSERT(&(waiting_segs.l), &(temp_seg->l));
+	}
+
+	weave = false;
+	if (BU_LIST_NON_EMPTY(&(waiting_segs.l))) {
+	    rt_boolweave(&finished_segs, &waiting_segs, &initial_part, ap);
+	    weave = true;
+	}
+
+	if (weave)
+	    (void)rt_boolfinal(&initial_part, &final_part, BACKING_DIST,
+			       INFINITY, regionbits, ap, solidbits);
+
+	x = 0;
+	for (BU_LIST_FOR(pp, partition, (struct bu_list *)&final_part))
+	    x++;
+    	file = fopen("gpu.txt", "a");
+	fprintf(file, "%lu\n", x);
+	fclose(file);
+
+	for (BU_LIST_FOR(pp, partition, (struct bu_list *)&final_part)) {
+	    copy_to_GPU(&segs[out], pp, *ap);
+	    out++;
+	}
+
+	RT_FREE_PT_LIST(&initial_part, resp);
+	RT_FREE_SEG_LIST(&finished_segs, resp);
+	RT_FREE_PT_LIST(&final_part, resp);
+    }
+}
 #endif
 
 
Index: src/librt/shoot.c
===================================================================
--- src/librt/shoot.c	(revision 68231)
+++ src/librt/shoot.c	(working copy)
@@ -858,6 +858,9 @@
     struct rt_i *rtip;
     const int debug_shoot = RT_G_DEBUG & DEBUG_SHOOT;
     fastf_t pending_hit = 0; /* dist of closest odd hit pending */
+    size_t x;
+    struct partition *pp;
+    FILE *file;
 
     RT_AP_CHECK(ap);
     if (ap->a_magic) {
@@ -1456,6 +1459,13 @@
 		       INFINITY,
 		       regionbits, ap, solidbits);
 
+    x = 0;
+    for (BU_LIST_FOR(pp, partition, (struct bu_list *)&FinalPart))
+	x++;
+    file = fopen("cpu.txt", "a");
+    fprintf(file, "%lu\n", x);
+    fclose(file);
+
     if (FinalPart.pt_forw == &FinalPart) {
 	if (ap->a_miss)
 	    ap->a_return = ap->a_miss(ap);
 
------------------------------------------------------------------------------
Attend Shape: An AT&T Tech Expo July 15-16. Meet us at AT&T Park in San
Francisco, CA to explore cutting-edge tech and listen to tech luminaries
present their vision of the future. This family event has something for
everyone, including kids. Get more information and register today.
http://sdm.link/attshape
_______________________________________________
BRL-CAD Developer mailing list
brlcad-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/brlcad-devel

Reply via email to