On Mon, Jul 4, 2016 at 9:33 PM, Param Hanji <param.catchch...@gmail.com>
wrote:

> On Mon, Jul 4, 2016 at 9:52 PM Vasco Alexandre da Silva Costa <
> vasco.co...@gmail.com> wrote:
>
>> On Mon, Jul 4, 2016 at 12:30 PM, Param Hanji <param.catchch...@gmail.com>
>> wrote:
>>
>>> Hi Vasco,
>>>
>>> I tried everything and still no luck. I'm pretty sure just copying
>>> inhits and outhits is not enough. Going by Sean's explanation of segments
>>> and partitions, it looks like I have no option but to implement partitions
>>> on the GPU.
>>>
>>
>> IIRC the code doesn't use partitions before calling rt_bool_*. Only
>> segments. So I don't see how this is a problem.
>> Have you examined the output lists in all stages?
>>
>>
>>
> Well to check if the bool functions are working as expected, I inserted
> code to print number of finalPartitions (after weave and boolfinal) for
> every pixel. I did this in shoot.c and primitive_util.c as well. Then I ran
> rt in serial as well as OpenCL mode.
>
> The results were very different. When boolean operations like subtraction
> were used, number of final partitions was 0 for every pixel. In ANSI C,
> this was not the case and partitions were indeed generated. This is why I
> think I'm missing something or doing something wrong.
>
> Here's a patch i just generated.
>

Hmmm. There seem to be a couple of issues here:

The boolean weaving code expects seg_in.hit_rayp and seg_out.hit_rayp to be
pointing to something. These values are mostly used for debug printfs in
bool.c with the exception of bool.c:bool_max_raynum. So I changed this to
point to a dummy struct xray.

The other issue is that the number of partitions per pixel is not
necessarily the same as the number of segments. IIRC it may be larger. I
changed the patch to only copy the first partition per pixel. This may not
be sufficient when doing multi-hit rendering.

There may still be issues with the regionbits and solidbits initialization.
Or, like you said, with a mismatch between the implementation of the
partitions and the segs that are currently used in the OpenCL shading code.


Get a sample .g file which demonstrates the issue with the subtraction
boolean op. Then we can compare the results from the OpenCL code with the
results from the ANSI C code. One simple way to do this is to dump the per
pixel partition length results to a grayscale image. e.g. you can use the
PPM P2 graymap image format which is simple to write:
https://en.wikipedia.org/wiki/Netpbm_format

This should make the result comparison easier than looking at a text dump.

-- 
Vasco Alexandre da Silva Costa
PhD in Computer Engineering (Computer Graphics)
Instituto Superior Técnico/University of Lisbon, Portugal
Index: include/rt/shoot.h
===================================================================
--- include/rt/shoot.h	(revision 68233)
+++ include/rt/shoot.h	(working copy)
@@ -178,10 +178,10 @@
 
 RT_EXPORT extern void
 clt_frame(void *pixels, uint8_t o[3], int cur_pixel, int last_pixel,
-	  int width, int ibackground[3], int inonbackground[3],
+	  int width, int height, int ibackground[3], int inonbackground[3],
 	  double airdensity, double haze[3], fastf_t gamma,
           mat_t view2model, fastf_t cell_width, fastf_t cell_height,
-          fastf_t aspect, int lightmodel);
+          fastf_t aspect, int lightmodel, struct application *ap);
 #endif
 
 
Index: src/librt/primitives/primitive_util.c
===================================================================
--- src/librt/primitives/primitive_util.c	(revision 68233)
+++ src/librt/primitives/primitive_util.c	(working copy)
@@ -24,6 +24,8 @@
  * librt_private.h.
  */
 
+#include <stdbool.h>
+
 #include "bu/malloc.h"
 #include "bu/opt.h"
 #include "optical.h"
@@ -812,12 +814,235 @@
     clt_db_nprims = 0;
 }
 
+static void
+copy_to_CPU(struct seg *cpu_seg, struct cl_seg *gpu_seg, struct application *ap)
+{
+    struct hit seg = RT_HIT_INIT_ZERO;
+    struct soltab *stp;
+    unsigned int i = 0;
+
+    cpu_seg->seg_in = seg;
+
+    cpu_seg->seg_in.hit_normal[X] = gpu_seg->seg_in.hit_normal.x;
+    cpu_seg->seg_in.hit_normal[Y] = gpu_seg->seg_in.hit_normal.y;
+    cpu_seg->seg_in.hit_normal[Z] = gpu_seg->seg_in.hit_normal.z;
+
+    cpu_seg->seg_in.hit_point[X] = gpu_seg->seg_in.hit_point.x;
+    cpu_seg->seg_in.hit_point[Y] = gpu_seg->seg_in.hit_point.y;
+    cpu_seg->seg_in.hit_point[Z] = gpu_seg->seg_in.hit_point.z;
+
+    cpu_seg->seg_in.hit_vpriv[X] = gpu_seg->seg_in.hit_vpriv.x;
+    cpu_seg->seg_in.hit_vpriv[Y] = gpu_seg->seg_in.hit_vpriv.y;
+    cpu_seg->seg_in.hit_vpriv[Z] = gpu_seg->seg_in.hit_vpriv.z;
+
+    cpu_seg->seg_in.hit_dist = gpu_seg->seg_in.hit_dist;
+    cpu_seg->seg_in.hit_surfno = gpu_seg->seg_in.hit_surfno;
+    cpu_seg->seg_in.hit_rayp = &ap->a_ray;
+
+
+    cpu_seg->seg_out = seg;
+
+    cpu_seg->seg_out.hit_normal[X] = gpu_seg->seg_out.hit_normal.x;
+    cpu_seg->seg_out.hit_normal[Y] = gpu_seg->seg_out.hit_normal.y;
+    cpu_seg->seg_out.hit_normal[Z] = gpu_seg->seg_out.hit_normal.z;
+
+    cpu_seg->seg_out.hit_point[X] = gpu_seg->seg_out.hit_point.x;
+    cpu_seg->seg_out.hit_point[Y] = gpu_seg->seg_out.hit_point.y;
+    cpu_seg->seg_out.hit_point[Z] = gpu_seg->seg_out.hit_point.z;
+
+    cpu_seg->seg_out.hit_vpriv[X] = gpu_seg->seg_out.hit_vpriv.x;
+    cpu_seg->seg_out.hit_vpriv[Y] = gpu_seg->seg_out.hit_vpriv.y;
+    cpu_seg->seg_out.hit_vpriv[Z] = gpu_seg->seg_out.hit_vpriv.z;
+
+    cpu_seg->seg_out.hit_dist = gpu_seg->seg_out.hit_dist;
+    cpu_seg->seg_out.hit_surfno = gpu_seg->seg_out.hit_surfno;
+    cpu_seg->seg_out.hit_rayp = &ap->a_ray;
+
+    cpu_seg->seg_stp = NULL;
+
+    RT_VISIT_ALL_SOLTABS_START(stp, ap->a_rt_i) {
+	/* Ignore "dead" solids in the list.  (They failed prep) */
+	if (stp->st_aradius <= 0) continue;
+	/* Infinite solids make the BVH construction explode. */
+	if (stp->st_aradius >= INFINITY) continue;
+
+	if (i == gpu_seg->seg_sti) {
+	    cpu_seg->seg_stp = stp;
+	    return;
+	}
+	i++;
+    } RT_VISIT_ALL_SOLTABS_END
+    printf("error\n");
+}
+
+static void
+copy_to_GPU(struct cl_seg *gpu_seg, struct partition *cpu_part, struct application *ap)
+{
+    struct soltab *stp;
+    struct seg **segpp;
+    unsigned int i = 0;
+
+    gpu_seg->seg_in.hit_normal.x = cpu_part->pt_inhit->hit_normal[X];
+    gpu_seg->seg_in.hit_normal.y = cpu_part->pt_inhit->hit_normal[Y];
+    gpu_seg->seg_in.hit_normal.z = cpu_part->pt_inhit->hit_normal[Z];
+
+    gpu_seg->seg_in.hit_point.x = cpu_part->pt_inhit->hit_point[X];
+    gpu_seg->seg_in.hit_point.y = cpu_part->pt_inhit->hit_point[Y];
+    gpu_seg->seg_in.hit_point.z = cpu_part->pt_inhit->hit_point[Z];
+
+    gpu_seg->seg_in.hit_vpriv.x = cpu_part->pt_inhit->hit_vpriv[X];
+    gpu_seg->seg_in.hit_vpriv.y = cpu_part->pt_inhit->hit_vpriv[Y];
+    gpu_seg->seg_in.hit_vpriv.z = cpu_part->pt_inhit->hit_vpriv[Z];
+
+    gpu_seg->seg_in.hit_dist = cpu_part->pt_inhit->hit_dist;
+    gpu_seg->seg_in.hit_surfno = cpu_part->pt_inhit->hit_surfno;
+
+
+    gpu_seg->seg_out.hit_normal.x = cpu_part->pt_outhit->hit_normal[X];
+    gpu_seg->seg_out.hit_normal.y = cpu_part->pt_outhit->hit_normal[Y];
+    gpu_seg->seg_out.hit_normal.z = cpu_part->pt_outhit->hit_normal[Z];
+
+    gpu_seg->seg_out.hit_point.x = cpu_part->pt_outhit->hit_point[X];
+    gpu_seg->seg_out.hit_point.y = cpu_part->pt_outhit->hit_point[Y];
+    gpu_seg->seg_out.hit_point.z = cpu_part->pt_outhit->hit_point[Z];
+
+    gpu_seg->seg_out.hit_vpriv.x = cpu_part->pt_outhit->hit_vpriv[X];
+    gpu_seg->seg_out.hit_vpriv.y = cpu_part->pt_outhit->hit_vpriv[Y];
+    gpu_seg->seg_out.hit_vpriv.z = cpu_part->pt_outhit->hit_vpriv[Z];
+
+    gpu_seg->seg_out.hit_dist = cpu_part->pt_outhit->hit_dist;
+    gpu_seg->seg_out.hit_surfno = cpu_part->pt_outhit->hit_surfno;
+
+    RT_VISIT_ALL_SOLTABS_START(stp, ap->a_rt_i) {
+	/* Ignore "dead" solids in the list.  (They failed prep) */
+	if (stp->st_aradius <= 0) continue;
+	/* Infinite solids make the BVH construction explode. */
+	if (stp->st_aradius >= INFINITY) continue;
+
+	for (BU_PTBL_FOR(segpp, (struct seg **), &cpu_part->pt_seglist))
+	    if ((*segpp)->seg_stp == stp) {
+		gpu_seg->seg_sti = i;
+		return;
+	    }
+	i++;
+    } RT_VISIT_ALL_SOLTABS_END
+    printf("error\n");
+}
+
+static void
+clt_boolweave_and_boolfinal(struct cl_seg *segs, struct application *ap,
+			    cl_uint *h, int width, int height)
+{
+    size_t i, j;
+    size_t x, out = 0;
+    FILE *file;
+
+    for (i = 0; i < (size_t)(width * height); i++) {
+	struct seg waiting_segs;
+	struct seg finished_segs;
+	struct seg *s2;
+	struct partition InitialPart;
+	struct partition FinalPart;
+	struct bu_ptbl *regionbits;
+	struct resource *resp;
+	struct bu_bitv *solidbits;
+	struct rt_i *rtip;
+	struct partition *pp;
+
+	RT_AP_CHECK(ap);
+	if (ap->a_magic) {
+	    RT_CK_AP(ap);
+	} else {
+	    ap->a_magic = RT_AP_MAGIC;
+	}
+	if (ap->a_ray.magic) {
+	    RT_CK_RAY(&(ap->a_ray));
+	} else {
+	    ap->a_ray.magic = RT_RAY_MAGIC;
+	}
+	if (ap->a_resource == RESOURCE_NULL) {
+	    ap->a_resource = &rt_uniresource;
+	    if (rt_uniresource.re_magic == 0)
+		rt_init_resource(&rt_uniresource, 0, ap->a_rt_i);
+	}
+
+	rtip = ap->a_rt_i;
+	RT_CK_RTI(rtip);
+	resp = ap->a_resource;
+	RT_CK_RESOURCE(resp);
+
+	if (rtip->needprep)
+	    rt_prep_parallel(rtip, 1);	/* Stay on our CPU */
+
+	solidbits = rt_get_solidbitv(ap->a_rt_i->nsolids, resp);
+
+	InitialPart.pt_forw = InitialPart.pt_back = &InitialPart;
+	InitialPart.pt_magic = PT_HD_MAGIC;
+	FinalPart.pt_forw = FinalPart.pt_back = &FinalPart;
+	FinalPart.pt_magic = PT_HD_MAGIC;
+	ap->a_Final_Part_hdp = &FinalPart;
+
+	BU_LIST_INIT(&waiting_segs.l);
+	BU_LIST_INIT(&finished_segs.l);
+
+	ap->a_finished_segs_hdp = &finished_segs;
+
+	if (BU_LIST_IS_EMPTY(&resp->re_region_ptbl)) {
+	    BU_ALLOC(regionbits, struct bu_ptbl);
+	    bu_ptbl_init(regionbits, 7, "rt_shootray() regionbits ptbl");
+	} else {
+	    regionbits = BU_LIST_FIRST(bu_ptbl, &resp->re_region_ptbl);
+	    BU_LIST_DEQUEUE(&regionbits->l);
+	    BU_CK_PTBL(regionbits);
+	}
+
+	if (!resp->re_pieces && rtip->rti_nsolids_with_pieces > 0) {
+	    /* Initialize this processors 'solid pieces' state */
+	    rt_res_pieces_init(resp, rtip);
+	}
+
+	for (j = h[i]; j < h[i+1]; j++) {
+	    RT_GET_SEG(s2, ap->a_resource);
+    	    copy_to_CPU(s2, &segs[j], ap);
+	    BU_LIST_INSERT(&(waiting_segs.l), &(s2->l));
+	}
+
+	if (BU_LIST_NON_EMPTY(&(waiting_segs.l))) {
+	    rt_boolweave(&finished_segs, &waiting_segs, &InitialPart, ap);
+	}
+
+	if (BU_LIST_NON_EMPTY(&(finished_segs.l))) {
+	    (void)rt_boolfinal(&InitialPart, &FinalPart, BACKING_DIST,
+			       INFINITY, regionbits, ap, solidbits);
+	}
+
+	x = 0;
+	for (BU_LIST_FOR(pp, partition, (struct bu_list *)&FinalPart))
+	    x++;
+    	file = fopen("gpu.txt", "a");
+	fprintf(file, "%lu\n", x);
+	fclose(file);
+
+	for (BU_LIST_FOR(pp, partition, (struct bu_list *)&FinalPart)) {
+	    copy_to_GPU(&segs[out], pp, ap);
+	    out++;
+	    break;
+	}
+
+	BU_CK_PTBL(regionbits);
+	BU_LIST_APPEND(&resp->re_region_ptbl, &regionbits->l);
+
+	RT_FREE_PT_LIST(&InitialPart, resp);
+	RT_FREE_SEG_LIST(&finished_segs, resp);
+	RT_FREE_PT_LIST(&FinalPart, resp);
+    }
+}
 void
 clt_frame(void *pixels, uint8_t o[3], int cur_pixel, int last_pixel,
-	  int width, int ibackground[3], int inonbackground[3],
+	  int width, int height, int ibackground[3], int inonbackground[3],
 	  double airdensity, double haze[3], fastf_t gamma,
           mat_t view2model, fastf_t cell_width, fastf_t cell_height,
-          fastf_t aspect, int lightmodel)
+          fastf_t aspect, int lightmodel, struct application *ap)
 {
     const size_t npix = last_pixel-cur_pixel+1;
 
@@ -858,6 +1083,9 @@
     p.lightmodel = lightmodel;
 
     sz_pixels = sizeof(cl_uchar)*o[2]*npix;
+
+    printf("w=%d, h=%d\n", width, height);
+
     ppixels = clCreateBuffer(clt_context, CL_MEM_WRITE_ONLY|CL_MEM_HOST_READ_ONLY, sz_pixels, NULL, &error);
     if (error != CL_SUCCESS) bu_bomb("failed to create OpenCL pixels buffer");
 
@@ -884,6 +1112,7 @@
 	    size_t sz_segs;
 	    cl_mem psegs;
 	    size_t snpix = swxh[0]*swxh[1];
+	    struct cl_seg *segs;
 
 	    sz_counts = sizeof(cl_int)*npix;
 	    pcounts = clCreateBuffer(clt_context, CL_MEM_WRITE_ONLY|CL_MEM_HOST_READ_ONLY, sz_counts, NULL, &error);
@@ -916,20 +1145,20 @@
 	    sz_h = sizeof(cl_uint)*(npix+1);
 	    h = (cl_uint*)bu_calloc(1, sz_h, "h");
 	    h[0] = 0;
+
 	    for (i=1; i<=npix; i++) {
 		BU_ASSERT((counts[i-1] % 2) == 0);
 		h[i] = h[i-1] + counts[i-1]/2;	/* number of segs is half the number of hits */
 	    }
-	    bu_free(counts, "counts");
 
 	    ph = clCreateBuffer(clt_context, CL_MEM_READ_ONLY|CL_MEM_HOST_WRITE_ONLY|CL_MEM_COPY_HOST_PTR, sz_h, h, &error);
 	    if (error != CL_SUCCESS) bu_bomb("failed to create OpenCL offs buffer");
 
 	    sz_segs = sizeof(struct cl_seg)*h[npix];
-	    bu_free(h, "h");
+	    segs = (struct cl_seg*)bu_malloc(sz_segs, "segments on gpu");
 
 	    if (sz_segs != 0) {
-		psegs = clCreateBuffer(clt_context, CL_MEM_READ_WRITE|CL_MEM_HOST_NO_ACCESS, sz_segs, NULL, &error);
+		psegs = clCreateBuffer(clt_context, CL_MEM_READ_WRITE, sz_segs, NULL, &error);
 		if (error != CL_SUCCESS) bu_bomb("failed to create OpenCL segs buffer");
 
 		bu_semaphore_acquire(clt_semaphore);
@@ -950,10 +1179,34 @@
 		if (error != CL_SUCCESS) bu_bomb("failed to set OpenCL kernel arguments");
 		error = clEnqueueNDRangeKernel(clt_queue, clt_store_segs_kernel, 2, NULL, wxh,
 			swxh, 0, NULL, NULL);
+
+		error = clEnqueueReadBuffer(clt_queue, psegs, CL_TRUE, 0, sz_segs, segs, 0, NULL, NULL);
+		if (error != CL_SUCCESS) bu_bomb("failed to read OpenCL segs buffer");
 		bu_semaphore_release(clt_semaphore);
+
+		clt_boolweave_and_boolfinal(segs, ap, h, width, height);
+
+		h[0] = 0;
+
+		for (i=1; i<=npix; i++) {
+		    if (counts[i-1] != 0)
+			h[i] = h[i-1] + 1;
+		    else
+			h[i] = h[i-1];
+		}
+
+		bu_semaphore_acquire(clt_semaphore);
+		error = clEnqueueWriteBuffer(clt_queue, ph, CL_TRUE, 0, sz_h, h, 0, NULL, NULL);
+		if (error != CL_SUCCESS) bu_bomb("failed to write into OpenCL h buffer");
+		error = clEnqueueWriteBuffer(clt_queue, psegs, CL_TRUE, 0, sz_segs, segs, 0, NULL, NULL);
+		if (error != CL_SUCCESS) bu_bomb("failed to write into OpenCL segs buffer");
+		bu_semaphore_release(clt_semaphore);
+
+		bu_free(counts, "counts");
             } else {
 		psegs = NULL;
             }
+	    bu_free(h, "h");
 
 	    bu_semaphore_acquire(clt_semaphore);
 	    error = clSetKernelArg(clt_shade_segs_kernel, 0, sizeof(cl_mem), &ppixels);
Index: src/librt/shoot.c
===================================================================
--- src/librt/shoot.c	(revision 68233)
+++ src/librt/shoot.c	(working copy)
@@ -858,6 +858,9 @@
     struct rt_i *rtip;
     const int debug_shoot = RT_G_DEBUG & DEBUG_SHOOT;
     fastf_t pending_hit = 0; /* dist of closest odd hit pending */
+    size_t x;
+    struct partition *pp;
+    FILE *file;
 
     RT_AP_CHECK(ap);
     if (ap->a_magic) {
@@ -1456,6 +1459,13 @@
 		       INFINITY,
 		       regionbits, ap, solidbits);
 
+    x = 0;
+    for (BU_LIST_FOR(pp, partition, (struct bu_list *)&FinalPart))
+	x++;
+    file = fopen("cpu.txt", "a");
+    fprintf(file, "%lu\n", x);
+    fclose(file);
+
     if (FinalPart.pt_forw == &FinalPart) {
 	if (ap->a_miss)
 	    ap->a_return = ap->a_miss(ap);
Index: src/rt/do.c
===================================================================
--- src/rt/do.c	(revision 68233)
+++ src/rt/do.c	(working copy)
@@ -593,10 +593,10 @@
 
     pixels = (uint8_t*)bu_calloc(size, sizeof(uint8_t), "image buffer");
 
-    clt_frame(pixels, clt_o, cur_pixel, last_pixel, width,
+    clt_frame(pixels, clt_o, cur_pixel, last_pixel, width, height,
               ibackground, inonbackground,
 	      airdensity, haze, gamma_corr, view2model, cell_width,
-              cell_height, aspect, lightmodel);
+              cell_height, aspect, lightmodel, &a);
 
     pixelp = pixels + cur_pixel*clt_o[2];
 
------------------------------------------------------------------------------
Attend Shape: An AT&T Tech Expo July 15-16. Meet us at AT&T Park in San
Francisco, CA to explore cutting-edge tech and listen to tech luminaries
present their vision of the future. This family event has something for
everyone, including kids. Get more information and register today.
http://sdm.link/attshape
_______________________________________________
BRL-CAD Developer mailing list
brlcad-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/brlcad-devel

Reply via email to