On Mon, Jul 4, 2016 at 9:52 PM Vasco Alexandre da Silva Costa <
vasco.co...@gmail.com> wrote:
> On Mon, Jul 4, 2016 at 12:30 PM, Param Hanji <param.catchch...@gmail.com>
> wrote:
>
>> Hi Vasco,
>>
>> I tried everything and still no luck. I'm pretty sure just copying inhits
>> and outhits is not enough. Going by Sean's explanation of segments and
>> partitions, it looks like I have no option but to implement partitions on
>> the GPU.
>>
>
> IIRC the code doesn't use partitions before calling rt_bool_*. Only
> segments. So I don't see how this is a problem.
> Have you examined the output lists in all stages?
>
>
>
Well to check if the bool functions are working as expected, I inserted
code to print number of finalPartitions (after weave and boolfinal) for
every pixel. I did this in shoot.c and primitive_util.c as well. Then I ran
rt in serial as well as OpenCL mode.
The results were very different. When boolean operations like subtraction
were used, number of final partitions was 0 for every pixel. In ANSI C,
this was not the case and partitions were indeed generated. This is why I
think I'm missing something or doing something wrong.
Here's a patch i just generated.
Index: include/rt/shoot.h
===================================================================
--- include/rt/shoot.h (revision 68231)
+++ include/rt/shoot.h (working copy)
@@ -178,10 +178,10 @@
RT_EXPORT extern void
clt_frame(void *pixels, uint8_t o[3], int cur_pixel, int last_pixel,
- int width, int ibackground[3], int inonbackground[3],
+ int width, int height, int ibackground[3], int inonbackground[3],
double airdensity, double haze[3], fastf_t gamma,
mat_t view2model, fastf_t cell_width, fastf_t cell_height,
- fastf_t aspect, int lightmodel);
+ fastf_t aspect, int lightmodel, struct application *ap);
#endif
Index: src/librt/bool.c
===================================================================
--- src/librt/bool.c (revision 68231)
+++ src/librt/bool.c (working copy)
@@ -174,9 +174,13 @@
segp = BU_LIST_FIRST(seg, &(in_hd->l));
RT_CHECK_SEG(segp);
RT_CK_HIT(&(segp->seg_in));
+#ifndef USE_OPENCL
RT_CK_RAY(segp->seg_in.hit_rayp);
+#endif
RT_CK_HIT(&(segp->seg_out));
+#ifndef USE_OPENCL
RT_CK_RAY(segp->seg_out.hit_rayp);
+#endif
if (RT_G_DEBUG&DEBUG_PARTITION) {
point_t pt;
Index: src/librt/primitives/primitive_util.c
===================================================================
--- src/librt/primitives/primitive_util.c (revision 68231)
+++ src/librt/primitives/primitive_util.c (working copy)
@@ -24,6 +24,8 @@
* librt_private.h.
*/
+#include <stdbool.h>
+
#include "bu/malloc.h"
#include "bu/opt.h"
#include "optical.h"
@@ -814,10 +816,10 @@
void
clt_frame(void *pixels, uint8_t o[3], int cur_pixel, int last_pixel,
- int width, int ibackground[3], int inonbackground[3],
+ int width, int height, int ibackground[3], int inonbackground[3],
double airdensity, double haze[3], fastf_t gamma,
mat_t view2model, fastf_t cell_width, fastf_t cell_height,
- fastf_t aspect, int lightmodel)
+ fastf_t aspect, int lightmodel, struct application *ap)
{
const size_t npix = last_pixel-cur_pixel+1;
@@ -884,6 +886,7 @@
size_t sz_segs;
cl_mem psegs;
size_t snpix = swxh[0]*swxh[1];
+ struct cl_seg *segs;
sz_counts = sizeof(cl_int)*npix;
pcounts = clCreateBuffer(clt_context, CL_MEM_WRITE_ONLY|CL_MEM_HOST_READ_ONLY, sz_counts, NULL, &error);
@@ -916,10 +919,12 @@
sz_h = sizeof(cl_uint)*(npix+1);
h = (cl_uint*)bu_calloc(1, sz_h, "h");
h[0] = 0;
+
for (i=1; i<=npix; i++) {
BU_ASSERT((counts[i-1] % 2) == 0);
h[i] = h[i-1] + counts[i-1]/2; /* number of segs is half the number of hits */
}
+
bu_free(counts, "counts");
ph = clCreateBuffer(clt_context, CL_MEM_READ_ONLY|CL_MEM_HOST_WRITE_ONLY|CL_MEM_COPY_HOST_PTR, sz_h, h, &error);
@@ -926,10 +931,10 @@
if (error != CL_SUCCESS) bu_bomb("failed to create OpenCL offs buffer");
sz_segs = sizeof(struct cl_seg)*h[npix];
- bu_free(h, "h");
+ segs = (struct cl_seg*)bu_malloc(sz_segs, "segments on gpu");
if (sz_segs != 0) {
- psegs = clCreateBuffer(clt_context, CL_MEM_READ_WRITE|CL_MEM_HOST_NO_ACCESS, sz_segs, NULL, &error);
+ psegs = clCreateBuffer(clt_context, CL_MEM_READ_WRITE, sz_segs, NULL, &error);
if (error != CL_SUCCESS) bu_bomb("failed to create OpenCL segs buffer");
bu_semaphore_acquire(clt_semaphore);
@@ -950,10 +955,17 @@
if (error != CL_SUCCESS) bu_bomb("failed to set OpenCL kernel arguments");
error = clEnqueueNDRangeKernel(clt_queue, clt_store_segs_kernel, 2, NULL, wxh,
swxh, 0, NULL, NULL);
+ error = clEnqueueReadBuffer(clt_queue, psegs, CL_TRUE, 0, sz_segs, segs, 0, NULL, NULL);
+ if (error != CL_SUCCESS) bu_bomb("failed to read OpenCL segs buffer");
+ clt_boolweave_and_boolfinal(segs, ap, h, width, height);
+ error = clEnqueueWriteBuffer(clt_queue, psegs, CL_TRUE, 0, sz_segs, segs, 0, NULL, NULL);
+ if (error != CL_SUCCESS) bu_bomb("failed to write into OpenCL segs buffer");
+
bu_semaphore_release(clt_semaphore);
} else {
psegs = NULL;
}
+ bu_free(h, "h");
bu_semaphore_acquire(clt_semaphore);
error = clSetKernelArg(clt_shade_segs_kernel, 0, sizeof(cl_mem), &ppixels);
@@ -1026,6 +1038,218 @@
clEnqueueReadBuffer(clt_queue, ppixels, CL_TRUE, 0, sz_pixels, pixels, 0, NULL, NULL);
clReleaseMemObject(ppixels);
}
+
+void
+copy_to_CPU(struct seg *cpu_seg, struct cl_seg *gpu_seg, struct application a)
+{
+ struct soltab *stp;
+ unsigned int i = 0;
+
+ cpu_seg->seg_in.hit_normal[X] = gpu_seg->seg_in.hit_normal.x;
+ cpu_seg->seg_in.hit_normal[Y] = gpu_seg->seg_in.hit_normal.y;
+ cpu_seg->seg_in.hit_normal[Z] = gpu_seg->seg_in.hit_normal.z;
+
+ cpu_seg->seg_in.hit_point[X] = gpu_seg->seg_in.hit_point.x;
+ cpu_seg->seg_in.hit_point[Y] = gpu_seg->seg_in.hit_point.y;
+ cpu_seg->seg_in.hit_point[Z] = gpu_seg->seg_in.hit_point.z;
+
+ cpu_seg->seg_in.hit_vpriv[X] = gpu_seg->seg_in.hit_vpriv.x;
+ cpu_seg->seg_in.hit_vpriv[Y] = gpu_seg->seg_in.hit_vpriv.y;
+ cpu_seg->seg_in.hit_vpriv[Z] = gpu_seg->seg_in.hit_vpriv.z;
+
+ cpu_seg->seg_in.hit_dist = gpu_seg->seg_in.hit_dist;
+ cpu_seg->seg_in.hit_surfno = gpu_seg->seg_in.hit_surfno;
+
+
+ cpu_seg->seg_out.hit_normal[X] = gpu_seg->seg_out.hit_normal.x;
+ cpu_seg->seg_out.hit_normal[Y] = gpu_seg->seg_out.hit_normal.y;
+ cpu_seg->seg_out.hit_normal[Z] = gpu_seg->seg_out.hit_normal.z;
+
+ cpu_seg->seg_out.hit_point[X] = gpu_seg->seg_out.hit_point.x;
+ cpu_seg->seg_out.hit_point[Y] = gpu_seg->seg_out.hit_point.y;
+ cpu_seg->seg_out.hit_point[Z] = gpu_seg->seg_out.hit_point.z;
+
+ cpu_seg->seg_out.hit_vpriv[X] = gpu_seg->seg_out.hit_vpriv.x;
+ cpu_seg->seg_out.hit_vpriv[Y] = gpu_seg->seg_out.hit_vpriv.y;
+ cpu_seg->seg_out.hit_vpriv[Z] = gpu_seg->seg_out.hit_vpriv.z;
+
+ cpu_seg->seg_out.hit_dist = gpu_seg->seg_out.hit_dist;
+ cpu_seg->seg_out.hit_surfno = gpu_seg->seg_out.hit_surfno;
+
+ RT_VISIT_ALL_SOLTABS_START(stp, a.a_rt_i) {
+ /* Ignore "dead" solids in the list. (They failed prep) */
+ if (stp->st_aradius <= 0) continue;
+ /* Infinite solids make the BVH construction explode. */
+ if (stp->st_aradius >= INFINITY) continue;
+
+ if (i == gpu_seg->seg_sti) {
+ cpu_seg->seg_stp = stp;
+ break;
+ }
+ i++;
+ } RT_VISIT_ALL_SOLTABS_END
+}
+
+void
+copy_to_GPU(struct cl_seg *gpu_seg, struct partition *cpu_part, struct application a)
+{
+ struct soltab *stp;
+ struct seg **segpp;
+ unsigned int i = 0;
+
+ gpu_seg->seg_in.hit_normal.x = cpu_part->pt_inhit->hit_normal[X];
+ gpu_seg->seg_in.hit_normal.y = cpu_part->pt_inhit->hit_normal[Y];
+ gpu_seg->seg_in.hit_normal.z = cpu_part->pt_inhit->hit_normal[Z];
+
+ gpu_seg->seg_in.hit_point.x = cpu_part->pt_inhit->hit_point[X];
+ gpu_seg->seg_in.hit_point.y = cpu_part->pt_inhit->hit_point[Y];
+ gpu_seg->seg_in.hit_point.z = cpu_part->pt_inhit->hit_point[Z];
+
+ gpu_seg->seg_in.hit_vpriv.x = cpu_part->pt_inhit->hit_vpriv[X];
+ gpu_seg->seg_in.hit_vpriv.y = cpu_part->pt_inhit->hit_vpriv[Y];
+ gpu_seg->seg_in.hit_vpriv.z = cpu_part->pt_inhit->hit_vpriv[Z];
+
+ gpu_seg->seg_in.hit_dist = cpu_part->pt_inhit->hit_dist;
+ gpu_seg->seg_in.hit_surfno = cpu_part->pt_inhit->hit_surfno;
+
+
+ gpu_seg->seg_out.hit_normal.x = cpu_part->pt_outhit->hit_normal[X];
+ gpu_seg->seg_out.hit_normal.y = cpu_part->pt_outhit->hit_normal[Y];
+ gpu_seg->seg_out.hit_normal.z = cpu_part->pt_outhit->hit_normal[Z];
+
+ gpu_seg->seg_out.hit_point.x = cpu_part->pt_outhit->hit_point[X];
+ gpu_seg->seg_out.hit_point.y = cpu_part->pt_outhit->hit_point[Y];
+ gpu_seg->seg_out.hit_point.z = cpu_part->pt_outhit->hit_point[Z];
+
+ gpu_seg->seg_out.hit_vpriv.x = cpu_part->pt_outhit->hit_vpriv[X];
+ gpu_seg->seg_out.hit_vpriv.y = cpu_part->pt_outhit->hit_vpriv[Y];
+ gpu_seg->seg_out.hit_vpriv.z = cpu_part->pt_outhit->hit_vpriv[Z];
+
+ gpu_seg->seg_out.hit_dist = cpu_part->pt_outhit->hit_dist;
+ gpu_seg->seg_out.hit_surfno = cpu_part->pt_outhit->hit_surfno;
+
+ RT_VISIT_ALL_SOLTABS_START(stp, a.a_rt_i) {
+ /* Ignore "dead" solids in the list. (They failed prep) */
+ if (stp->st_aradius <= 0) continue;
+ /* Infinite solids make the BVH construction explode. */
+ if (stp->st_aradius >= INFINITY) continue;
+
+ for (BU_PTBL_FOR(segpp, (struct seg **), &cpu_part->pt_seglist))
+ if ((*segpp)->seg_stp == stp) {
+ gpu_seg->seg_sti = i;
+ break;
+ }
+ i++;
+ } RT_VISIT_ALL_SOLTABS_END
+}
+
+void
+clt_boolweave_and_boolfinal(struct cl_seg *segs, struct application *ap,
+ cl_uint *h, int width, int height)
+{
+ size_t i, j;
+ size_t x, out = 0;
+ FILE *file;
+
+ for (i = 0; i < (size_t)(width * height); i++) {
+ struct seg waiting_segs;
+ struct seg finished_segs;
+ struct seg *temp_seg;
+ struct partition initial_part;
+ struct partition final_part;
+ struct bu_ptbl *regionbits;
+ struct resource *resp;
+ struct bu_bitv *solidbits;
+ struct rt_i *rtip;
+ struct partition *pp;
+ bool weave;
+
+ RT_AP_CHECK(ap);
+ if (ap->a_magic) {
+ RT_CK_AP(ap);
+ }
+ else {
+ ap->a_magic = RT_AP_MAGIC;
+ }
+ if (ap->a_ray.magic) {
+ RT_CK_RAY(&(ap->a_ray));
+ }
+ else {
+ ap->a_ray.magic = RT_RAY_MAGIC;
+ }
+ if (ap->a_resource == RESOURCE_NULL) {
+ ap->a_resource = &rt_uniresource;
+ if (rt_uniresource.re_magic == 0)
+ rt_init_resource(&rt_uniresource, 0, ap->a_rt_i);
+ }
+
+ resp = ap->a_resource;
+ RT_CK_RESOURCE(resp);
+
+ rtip = ap->a_rt_i;
+ if (rtip->needprep)
+ rt_prep_parallel(rtip, 1); /* Stay on our CPU */
+
+ if (!resp->re_pieces && rtip->rti_nsolids_with_pieces > 0) {
+ /* Initialize this processors 'solid pieces' state */
+ rt_res_pieces_init(resp, rtip);
+ }
+
+ solidbits = rt_get_solidbitv(ap->a_rt_i->nsolids, resp);
+
+ initial_part.pt_forw = initial_part.pt_back = &initial_part;
+ initial_part.pt_magic = PT_HD_MAGIC;
+ final_part.pt_forw = final_part.pt_back = &final_part;
+ final_part.pt_magic = PT_HD_MAGIC;
+ ap->a_Final_Part_hdp = &final_part;
+
+ BU_LIST_INIT(&waiting_segs.l);
+ BU_LIST_INIT(&finished_segs.l);
+
+ ap->a_finished_segs_hdp = &finished_segs;
+
+ if (BU_LIST_IS_EMPTY(&resp->re_region_ptbl)) {
+ BU_ALLOC(regionbits, struct bu_ptbl);
+ bu_ptbl_init(regionbits, 7, "rt_shootray() regionbits ptbl");
+ } else {
+ regionbits = BU_LIST_FIRST(bu_ptbl, &resp->re_region_ptbl);
+ BU_LIST_DEQUEUE(®ionbits->l);
+ BU_CK_PTBL(regionbits);
+ }
+
+ for (j = h[i]; j < h[i+1]; j++) {
+ RT_GET_SEG(temp_seg, ap->a_resource);
+ copy_to_CPU(temp_seg, &segs[j], *ap);
+ BU_LIST_INSERT(&(waiting_segs.l), &(temp_seg->l));
+ }
+
+ weave = false;
+ if (BU_LIST_NON_EMPTY(&(waiting_segs.l))) {
+ rt_boolweave(&finished_segs, &waiting_segs, &initial_part, ap);
+ weave = true;
+ }
+
+ if (weave)
+ (void)rt_boolfinal(&initial_part, &final_part, BACKING_DIST,
+ INFINITY, regionbits, ap, solidbits);
+
+ x = 0;
+ for (BU_LIST_FOR(pp, partition, (struct bu_list *)&final_part))
+ x++;
+ file = fopen("gpu.txt", "a");
+ fprintf(file, "%lu\n", x);
+ fclose(file);
+
+ for (BU_LIST_FOR(pp, partition, (struct bu_list *)&final_part)) {
+ copy_to_GPU(&segs[out], pp, *ap);
+ out++;
+ }
+
+ RT_FREE_PT_LIST(&initial_part, resp);
+ RT_FREE_SEG_LIST(&finished_segs, resp);
+ RT_FREE_PT_LIST(&final_part, resp);
+ }
+}
#endif
Index: src/librt/shoot.c
===================================================================
--- src/librt/shoot.c (revision 68231)
+++ src/librt/shoot.c (working copy)
@@ -858,6 +858,9 @@
struct rt_i *rtip;
const int debug_shoot = RT_G_DEBUG & DEBUG_SHOOT;
fastf_t pending_hit = 0; /* dist of closest odd hit pending */
+ size_t x;
+ struct partition *pp;
+ FILE *file;
RT_AP_CHECK(ap);
if (ap->a_magic) {
@@ -1456,6 +1459,13 @@
INFINITY,
regionbits, ap, solidbits);
+ x = 0;
+ for (BU_LIST_FOR(pp, partition, (struct bu_list *)&FinalPart))
+ x++;
+ file = fopen("cpu.txt", "a");
+ fprintf(file, "%lu\n", x);
+ fclose(file);
+
if (FinalPart.pt_forw == &FinalPart) {
if (ap->a_miss)
ap->a_return = ap->a_miss(ap);
------------------------------------------------------------------------------
Attend Shape: An AT&T Tech Expo July 15-16. Meet us at AT&T Park in San
Francisco, CA to explore cutting-edge tech and listen to tech luminaries
present their vision of the future. This family event has something for
everyone, including kids. Get more information and register today.
http://sdm.link/attshape
_______________________________________________
BRL-CAD Developer mailing list
brlcad-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/brlcad-devel