Use OpenMP to spawn rendering threads.

The code has some bugs in it. I think its improper initialization of the
resource pools which leads to a spurious double free. But I haven't been
able to find out exactly where the bugs are.

On Wed, Jun 10, 2015 at 1:27 AM, Vasco Alexandre da Silva Costa <
vasco.co...@gmail.com> wrote:

> - Added the main boolean weaving code. Eliminated gotos in boolfinal.
> - Made the code more thread-safe by reducing amount of direct uses of the
> global app pointer.
>
> This was a good exercise to see the amount of effort that will be required
> to do a full reimplementation of the rendering pipeline in OpenCL.
>
> The main obstacle to an OpenCL port is, as I expected, the boolean code
> which is branch and goto heavy and uses dynamic linked lists.
>
> What I did not expect was that the total LOC count in that boolean weaving
> code dwarfs everything else put together (traversal, ray generation, shade,
> writing to the frame buffer).
>
> The current code already supports threaded execution with some heavy app
> context data structures. These probably have too large of a memory
> footprint per thread for the kind of fine grained parallelism with
> thousands of threads in flight that we desire (see 'struct resource').
>
>
> As discussed with Sean a couple of days ago I am going to stop working on
> this angle and start working on the grid spatial partitioning in OpenCL
> next.
>
>
> On Fri, Jun 5, 2015 at 12:16 AM, Vasco Alexandre da Silva Costa <
> vasco.co...@gmail.com> wrote:
>
>> Simplify the shading code some more.
>>
>> On Thu, Jun 4, 2015 at 11:10 PM, Vasco Alexandre da Silva Costa <
>> vasco.co...@gmail.com> wrote:
>>
>>> Phong shading with a default material.
>>>
>>> The idea here is to create a simplified self-contained rendering
>>> pipeline that we can use as a basis for a port to OpenCL later.
>>>
>>> Things to be done:
>>>  - rip the acceleration structure out and replace it with the simplified
>>> grid we want to use.
>>>  - cleanly separate the shots from the boolean weaving with minimal
>>> context between stages.
>>>  - cleanly mark all data in/out on each stage and minimize CPU<->GPU
>>> data transfers.
>>>
>>> On Thu, Jun 4, 2015 at 5:04 PM, Vasco Alexandre da Silva Costa <
>>> vasco.co...@gmail.com> wrote:
>>>
>>>> Now with the actual patch attached...
>>>>
>>>>
>>>> On Thu, Jun 4, 2015 at 5:03 PM, Vasco Alexandre da Silva Costa <
>>>> vasco.co...@gmail.com> wrote:
>>>>
>>>>> Simplified code for:
>>>>> - ray generation
>>>>> - writing the color output to the frame buffer
>>>>>
>>>>> On Tue, Jun 2, 2015 at 11:55 PM, Vasco Alexandre da Silva Costa <
>>>>> vasco.co...@gmail.com> wrote:
>>>>>
>>>>>> Hello,
>>>>>> I've been trying to make a really simple bare bones rendering loop in
>>>>>> C without branches, recursion, etc that we can try to parallelize later.
>>>>>>
>>>>>> For now I managed to do this for the ray generation part (patch
>>>>>> attached). Still need to work on the ray traversal and color computation
>>>>>> proper.
>>>>>>
>>>>>> Regards,
>>>>>>
>>>>>> --
>>>>>> Vasco Alexandre da Silva Costa
>>>>>> PhD Student at Department of Information Systems and Computer Science
>>>>>> Instituto Superior Técnico/University of Lisbon, Portugal
>>>>>>
>>>>>
>>>>>
>>>>>
>>>>> --
>>>>> Vasco Alexandre da Silva Costa
>>>>> PhD Student at Department of Information Systems and Computer Science
>>>>> Instituto Superior Técnico/University of Lisbon, Portugal
>>>>>
>>>>
>>>>
>>>>
>>>> --
>>>> Vasco Alexandre da Silva Costa
>>>> PhD Student at Department of Information Systems and Computer Science
>>>> Instituto Superior Técnico/University of Lisbon, Portugal
>>>>
>>>
>>>
>>>
>>> --
>>> Vasco Alexandre da Silva Costa
>>> PhD Student at Department of Information Systems and Computer Science
>>> Instituto Superior Técnico/University of Lisbon, Portugal
>>>
>>
>>
>>
>> --
>> Vasco Alexandre da Silva Costa
>> PhD Student at Department of Information Systems and Computer Science
>> Instituto Superior Técnico/University of Lisbon, Portugal
>>
>
>
>
> --
> Vasco Alexandre da Silva Costa
> PhD Student at Department of Information Systems and Computer Science
> Instituto Superior Técnico/University of Lisbon, Portugal
>



-- 
Vasco Alexandre da Silva Costa
PhD Student at Department of Information Systems and Computer Science
Instituto Superior Técnico/University of Lisbon, Portugal
Index: CMakeLists.txt
===================================================================
--- CMakeLists.txt	(revision 65323)
+++ CMakeLists.txt	(working copy)
@@ -1505,6 +1505,15 @@
 # CHECK_C_FLAG(msse2)
 CHECK_C_FLAG(msse3 BUILD_TYPES Debug)
 
+# OpenMP.
+find_package(OpenMP)
+if (OPENMP_FOUND)
+  CHECK_CXX_FLAG(fopenmp)
+  CHECK_C_FLAG(fopenmp)
+#  set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+#  set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+endif()
+
 # Check for c90 support with gnu extensions if we're not building for
 # a release so we get more broad portability testing.  Since the
 # default is debug, it will be the more difficult to keep working
Index: src/libbu/parallel.c
===================================================================
--- src/libbu/parallel.c	(revision 65323)
+++ src/libbu/parallel.c	(working copy)
@@ -113,6 +113,8 @@
 #  define rt_thread_t HANDLE
 #endif
 
+#include <omp.h>
+
 #include "bio.h"
 
 #include "bu/debug.h"
@@ -314,6 +316,11 @@
     }
 #  endif
 
+#  if defined(_OPENMP)
+    if (ncpu < 0) {
+        ncpu = omp_get_num_threads();
+    }
+#  endif
 
 #endif /* PARALLEL */
 
Index: src/rt/do.c
===================================================================
--- src/rt/do.c	(revision 65323)
+++ src/rt/do.c	(working copy)
@@ -50,7 +50,11 @@
 #include "./rtuif.h"
 #include "./ext.h"
 
+#include "scanline.h"
 
+#include <omp.h>
+
+
 /***** Variables shared with viewing model *** */
 extern FILE *outfp;			/* optional pixel output file */
 extern mat_t view2model;
@@ -541,8 +545,868 @@
     }
 }
 
+static int ibackground[3] = {0, 0, 50};         /* integer 0..255 version */
+static int inonbackground[3] = {0, 0, 51};	/* integer non-background */
+static size_t pwidth = 3;                       /* Width of each pixel (in bytes) */
+extern fb *fbp;                                 /* Framebuffer handle */
+static struct scanline scanline;
 
+fastf_t
+rt_find_backing_dist(struct rt_shootray_status *ss, struct bu_bitv *backbits);
+const union cutter *
+rt_advance_to_next_cell(register struct rt_shootray_status *ssp);
+
+
+int
+my_shootray(struct resource *resp, struct xray *a_ray, struct rt_i *rtip, register struct application *ap, struct partition *pFinalPart, struct seg *pfinished_segs)
+{
+    struct rt_shootray_status ss;
+    struct seg new_segs;	/* from solid intersections */
+    struct seg waiting_segs;	/* awaiting rt_boolweave() */
+    fastf_t last_bool_start;
+    struct bu_bitv *solidbits;	/* bits for all solids shot so far */
+    struct bu_bitv *backbits=NULL;	/* bits for all solids using pieces that need to be intersected behind
+					   the ray start point */
+    struct bu_ptbl *regionbits;	/* table of all involved regions */
+    struct partition InitialPart;	/* Head of Initial Partitions */
+    struct soltab **stpp;
+    register const union cutter *cutp;
+    fastf_t pending_hit = 0; /* dist of closest odd hit pending */
+
+    ss.ap = ap;
+    ss.resp = resp;
+
+    InitialPart.pt_forw = InitialPart.pt_back = &InitialPart;
+    InitialPart.pt_magic = PT_HD_MAGIC;
+
+    BU_LIST_INIT(&new_segs.l);
+    BU_LIST_INIT(&waiting_segs.l);
+
+    if (!BU_LIST_IS_INITIALIZED(&resp->re_parthead)) {
+	/*
+	 * We've been handed a mostly un-initialized resource struct,
+	 * with only a magic number and a cpu number filled in.  Init
+	 * it and add it to the table.  This is how
+	 * application-provided resource structures are remembered for
+	 * later cleanup by the library.
+	 */
+	rt_init_resource(resp, resp->re_cpu, rtip);
+    }
+
+    solidbits = rt_get_solidbitv(rtip->nsolids, resp);
+
+    if (BU_LIST_IS_EMPTY(&resp->re_region_ptbl)) {
+	BU_ALLOC(regionbits, struct bu_ptbl);
+	bu_ptbl_init(regionbits, 7, "rt_shootray() regionbits ptbl");
+    } else {
+	regionbits = BU_LIST_FIRST(bu_ptbl, &resp->re_region_ptbl);
+	BU_LIST_DEQUEUE(&regionbits->l);
+	BU_CK_PTBL(regionbits);
+    }
+
+    if (!resp->re_pieces && rtip->rti_nsolids_with_pieces > 0) {
+	/* Initialize this processors 'solid pieces' state */
+	rt_res_pieces_init(resp, rtip);
+    }
+    if (UNLIKELY(!BU_LIST_MAGIC_EQUAL(&resp->re_pieces_pending.l, BU_PTBL_MAGIC))) {
+	/* only happens first time through */
+	bu_ptbl_init(&resp->re_pieces_pending, 100, "re_pieces_pending");
+    }
+    bu_ptbl_reset(&resp->re_pieces_pending);
+
+    /*
+     * Record essential statistics in per-processor data structure.
+     */
+    resp->re_nshootray++;
+
+    /* Compute the inverse of the direction cosines */
+    if (a_ray->r_dir[X] < -SQRT_SMALL_FASTF) {
+	ss.abs_inv_dir[X] = -(ss.inv_dir[X]=1.0/a_ray->r_dir[X]);
+	ss.rstep[X] = -1;
+    } else if (a_ray->r_dir[X] > SQRT_SMALL_FASTF) {
+	ss.abs_inv_dir[X] =  (ss.inv_dir[X]=1.0/a_ray->r_dir[X]);
+	ss.rstep[X] = 1;
+    } else {
+	a_ray->r_dir[X] = 0.0;
+	ss.abs_inv_dir[X] = ss.inv_dir[X] = INFINITY;
+	ss.rstep[X] = 0;
+    }
+    if (a_ray->r_dir[Y] < -SQRT_SMALL_FASTF) {
+	ss.abs_inv_dir[Y] = -(ss.inv_dir[Y]=1.0/a_ray->r_dir[Y]);
+	ss.rstep[Y] = -1;
+    } else if (a_ray->r_dir[Y] > SQRT_SMALL_FASTF) {
+	ss.abs_inv_dir[Y] =  (ss.inv_dir[Y]=1.0/a_ray->r_dir[Y]);
+	ss.rstep[Y] = 1;
+    } else {
+	a_ray->r_dir[Y] = 0.0;
+	ss.abs_inv_dir[Y] = ss.inv_dir[Y] = INFINITY;
+	ss.rstep[Y] = 0;
+    }
+    if (a_ray->r_dir[Z] < -SQRT_SMALL_FASTF) {
+	ss.abs_inv_dir[Z] = -(ss.inv_dir[Z]=1.0/a_ray->r_dir[Z]);
+	ss.rstep[Z] = -1;
+    } else if (a_ray->r_dir[Z] > SQRT_SMALL_FASTF) {
+	ss.abs_inv_dir[Z] =  (ss.inv_dir[Z]=1.0/a_ray->r_dir[Z]);
+	ss.rstep[Z] = 1;
+    } else {
+	a_ray->r_dir[Z] = 0.0;
+	ss.abs_inv_dir[Z] = ss.inv_dir[Z] = INFINITY;
+	ss.rstep[Z] = 0;
+    }
+    VMOVE(ap->a_inv_dir, ss.inv_dir);
+
+    /*
+     * If ray does not enter the model RPP, skip on.  If ray ends
+     * exactly at the model RPP, trace it.
+     */
+    if (!rt_in_rpp(a_ray, ss.inv_dir, rtip->mdl_min, rtip->mdl_max)  ||
+	a_ray->r_max < 0.0) {
+	cutp = &rtip->rti_inf_box;
+	if (cutp->bn.bn_len > 0) {
+	    /* Model has infinite solids, need to fire at them. */
+	    ss.box_start = BACKING_DIST;
+	    ss.model_start = 0;
+	    ss.box_end = ss.model_end = INFINITY;
+	    ss.lastcut = CUTTER_NULL;
+	    ss.old_status = (struct rt_shootray_status *)NULL;
+	    ss.curcut = cutp;
+	    ss.lastcell = ss.curcut;
+	    VMOVE(ss.curmin, rtip->mdl_min);
+	    VMOVE(ss.curmax, rtip->mdl_max);
+	    last_bool_start = BACKING_DIST;
+	    ss.newray = *a_ray;		/* struct copy */
+	    ss.odist_corr = ss.obox_start = ss.obox_end = -99;
+	    ss.dist_corr = 0.0;
+	    goto start_cell;
+	}
+	resp->re_nmiss_model++;
+	if (ap->a_miss)
+	    ap->a_return = ap->a_miss(ap);
+	else
+	    ap->a_return = 0;
+	goto out;
+    }
+
+    /*
+     * The interesting part of the ray starts at distance 0.  If the
+     * ray enters the model at a negative distance, (i.e., the ray
+     * starts within the model RPP), we only look at little bit behind
+     * (BACKING_DIST) to see if we are just coming out of something,
+     * but never further back than the intersection with the model
+     * RPP.  If the ray enters the model at a positive distance, we
+     * always start there.  It is vital that we never pick a start
+     * point outside the model RPP, or the space partitioning tree
+     * will pick the wrong box and the ray will miss it.
+     *
+     * BACKING_DIST should probably be determined by floating point
+     * noise factor due to model RPP size -vs- number of bits of
+     * floating point mantissa significance, rather than a constant,
+     * but that is too hideous to think about here.  Also note that
+     * applications that really depend on knowing what region they are
+     * leaving from should probably back their own start-point up,
+     * rather than depending on it here, but it isn't much trouble
+     * here.
+     *
+     * Modification by JRA for pieces methodology:
+     *
+     * The original algorithm here assumed that if we encountered any
+     * primitive along the positive direction of the ray, ALL its
+     * intersections would be calculated.  With pieces, we may see
+     * only an exit hit if the entrance piece is in a space partition
+     * cell that is more than "BACKING_DIST" behind the ray start
+     * point (leading to incorrect results).  I have modified the
+     * setting of "ss.box_start" (when pieces are present and the ray
+     * start point is inside the model bounding box) as follows (see
+     * rt_find_backing_dist()):
+     *
+     * - The ray is traced through the space partitioning tree.
+     *
+     * - The ray is intersected with the bounding box of each
+     * primitive using pieces in each cell
+     *
+     * - The minimum of all these intersections is set as the initial
+     * "ss.box_start".
+     *
+     * - The "backbits" bit vector has a bit set for each of the
+     * primitives using pieces that have bounding boxes that extend
+     * behind the ray start point
+     *
+     * Further below (in the "pieces" loop), I have added code to
+     * ignore primitives that do not have a bit set in the backbits
+     * vector when we are behind the ray start point.
+     */
+
+    /* these two values set the point where the ray tracing actually
+     * begins and ends
+     */
+    ss.box_start = ss.model_start = a_ray->r_min;
+    ss.box_end = ss.model_end = a_ray->r_max;
+
+    if (rtip->rti_nsolids_with_pieces > 0) {
+	/* pieces are present */
+	if (ss.box_start < BACKING_DIST) {
+	    /* the first ray intersection with the model bounding box
+	     * is more than BACKING_DIST behind the ray start point
+	     */
+
+	    /* get a bit vector to keep track of which primitives need
+	     * to be intersected behind the ray start point (those
+	     * having bounding boxes extending behind the ray start
+	     * point and using pieces)
+	     */
+	    backbits = rt_get_solidbitv(rtip->nsolids, resp);
+
+	    /* call "rt_find_backing_dist()" to calculate the required
+	     * start point for calculation, and to fill in the
+	     * "backbits" bit vector
+	     */
+	    ss.box_start = rt_find_backing_dist(&ss, backbits);
+	}
+    } else {
+	/* no pieces present, use the old scheme */
+	if (ss.box_start < BACKING_DIST)
+	    ss.box_start = BACKING_DIST; /* Only look a little bit behind */
+    }
+
+    ss.lastcut = CUTTER_NULL;
+    ss.old_status = (struct rt_shootray_status *)NULL;
+    ss.curcut = &rtip->rti_CutHead;
+    if (ss.curcut->cut_type == CUT_NUGRIDNODE) {
+	ss.lastcell = CUTTER_NULL;
+	VSET(ss.curmin, ss.curcut->nugn.nu_axis[X][0].nu_spos,
+	     ss.curcut->nugn.nu_axis[Y][0].nu_spos,
+	     ss.curcut->nugn.nu_axis[Z][0].nu_spos);
+	VSET(ss.curmax, ss.curcut->nugn.nu_axis[X][ss.curcut->nugn.nu_cells_per_axis[X]-1].nu_epos,
+	     ss.curcut->nugn.nu_axis[Y][ss.curcut->nugn.nu_cells_per_axis[Y]-1].nu_epos,
+	     ss.curcut->nugn.nu_axis[Z][ss.curcut->nugn.nu_cells_per_axis[Z]-1].nu_epos);
+    } else if (ss.curcut->cut_type == CUT_CUTNODE ||
+	       ss.curcut->cut_type == CUT_BOXNODE) {
+	ss.lastcell = ss.curcut;
+	VMOVE(ss.curmin, rtip->mdl_min);
+	VMOVE(ss.curmax, rtip->mdl_max);
+    }
+
+    last_bool_start = BACKING_DIST;
+    ss.newray = *a_ray;		/* struct copy */
+    ss.odist_corr = ss.obox_start = ss.obox_end = -99;
+    ss.dist_corr = 0.0;
+    ss.box_num = 0;
+
+    /*
+     * While the ray remains inside model space, push from box to box
+     * until ray emerges from model space again (or first hit is
+     * found, if user is impatient).  It is vitally important to
+     * always stay within the model RPP, or the space partitioning tree
+     * will pick wrong boxes & miss them.
+     */
+    while ((cutp = rt_advance_to_next_cell(&ss)) != CUTTER_NULL) {
+    start_cell:
+	if (cutp->bn.bn_len <= 0 && cutp->bn.bn_piecelen <= 0) {
+	    /* Push ray onwards to next box */
+	    ss.box_start = ss.box_end;
+	    resp->re_nempty_cells++;
+	    continue;
+	}
+
+	/* Consider all "pieces" of all solids within the box */
+	pending_hit = ss.box_end;
+	if (cutp->bn.bn_piecelen > 0) {
+	    register struct rt_piecelist *plp;
+
+	    plp = &(cutp->bn.bn_piecelist[cutp->bn.bn_piecelen-1]);
+	    for (; plp >= cutp->bn.bn_piecelist; plp--) {
+		struct rt_piecestate *psp;
+		struct soltab *stp;
+		int ret;
+		int had_hits_before;
+
+		RT_CK_PIECELIST(plp);
+
+		/* Consider all pieces of this one solid in this
+		 * cell.
+		 */
+		stp = plp->stp;
+		RT_CK_SOLTAB(stp);
+
+		if (backbits && ss.box_end < BACKING_DIST && BU_BITTEST(backbits, stp->st_bit) == 0) {
+		    /* we are behind the ray start point and this
+		     * primitive is not one that we need to intersect
+		     * back here.
+		     */
+		    continue;
+		}
+
+		psp = &(resp->re_pieces[stp->st_piecestate_num]);
+		RT_CK_PIECESTATE(psp);
+		if (psp->ray_seqno != resp->re_nshootray) {
+		    /* state is from an earlier ray, scrub */
+		    BU_BITV_ZEROALL(psp->shot);
+		    psp->ray_seqno = resp->re_nshootray;
+		    rt_htbl_reset(&psp->htab);
+
+		    /* Compute ray entry and exit to entire solid's
+		     * bounding box.
+		     */
+		    if (!rt_in_rpp(&ss.newray, ss.inv_dir,
+				   stp->st_min, stp->st_max)) {
+			resp->re_prune_solrpp++;
+			BU_BITSET(solidbits, stp->st_bit);
+			continue;	/* MISS */
+		    }
+		    psp->mindist = ss.newray.r_min + ss.dist_corr;
+		    psp->maxdist = ss.newray.r_max + ss.dist_corr;
+		    had_hits_before = 0;
+		} else {
+		    if (BU_BITTEST(solidbits, stp->st_bit)) {
+			/* we missed the solid RPP in an earlier cell */
+			resp->re_ndup++;
+			continue;	/* already shot */
+		    }
+		    had_hits_before = psp->htab.end;
+		}
+
+		/*
+		 * Allow this solid to shoot at all of its 'pieces' in
+		 * this cell, all at once.  'newray' has been
+		 * transformed to be near to this cell, and
+		 * 'dist_corr' is the additive correction factor that
+		 * ft_piece_shot() must apply to hits calculated using
+		 * 'newray'.
+		 */
+		resp->re_piece_shots++;
+		psp->cutp = cutp;
+
+		ret = -1;
+		if (stp->st_meth->ft_piece_shot) {
+		    ret = stp->st_meth->ft_piece_shot(psp, plp, ss.dist_corr, &ss.newray, ap, &waiting_segs);
+		}
+		if (ret <= 0) {
+		    /* No hits at all */
+		    resp->re_piece_shot_miss++;
+		} else {
+		    resp->re_piece_shot_hit++;
+		}
+
+		/* See if this solid has been fully processed yet.  If
+		 * ray has passed through bounding volume, we're done.
+		 * ft_piece_hitsegs() will only be called once per
+		 * ray.
+		 */
+		if (ss.box_end > psp->maxdist && psp->htab.end > 0) {
+		    /* Convert hits into segs */
+		    /* Distance correction was handled in ft_piece_shot */
+		    if (stp->st_meth->ft_piece_hitsegs)
+			stp->st_meth->ft_piece_hitsegs(psp, &waiting_segs, ap);
+		    rt_htbl_reset(&psp->htab);
+		    BU_BITSET(solidbits, stp->st_bit);
+
+		    if (had_hits_before)
+			bu_ptbl_rm(&resp->re_pieces_pending, (long *)psp);
+		} else {
+		    if (!had_hits_before)
+			bu_ptbl_ins_unique(&resp->re_pieces_pending, (long *)psp);
+		}
+	    }
+	}
+
+	/* Consider all solids within the box */
+	if (cutp->bn.bn_len > 0 && ss.box_end >= BACKING_DIST) {
+	    stpp = &(cutp->bn.bn_list[cutp->bn.bn_len-1]);
+	    for (; stpp >= cutp->bn.bn_list; stpp--) {
+		register struct soltab *stp = *stpp;
+		int ret;
+
+		if (BU_BITTEST(solidbits, stp->st_bit)) {
+		    resp->re_ndup++;
+		    continue;	/* already shot */
+		}
+
+		/* Shoot a ray */
+		BU_BITSET(solidbits, stp->st_bit);
+
+		/* Check against bounding RPP, if desired by solid */
+		if (stp->st_meth->ft_use_rpp) {
+		    if (!rt_in_rpp(&ss.newray, ss.inv_dir,
+				   stp->st_min, stp->st_max)) {
+			resp->re_prune_solrpp++;
+			continue;	/* MISS */
+		    }
+		    if (ss.dist_corr + ss.newray.r_max < BACKING_DIST) {
+			resp->re_prune_solrpp++;
+			continue;	/* MISS */
+		    }
+		}
+
+		resp->re_shots++;
+		BU_LIST_INIT(&(new_segs.l));
+
+		ret = -1;
+
+		if (stp->st_meth->ft_shot) {
+		    ret = stp->st_meth->ft_shot(stp, &ss.newray, ap, &new_segs);
+		}
+		if (ret <= 0) {
+		    resp->re_shot_miss++;
+		    continue;	/* MISS */
+		}
+
+		/* Add seg chain to list awaiting rt_boolweave() */
+		{
+		    register struct seg *s2;
+		    while (BU_LIST_WHILE(s2, seg, &(new_segs.l))) {
+			BU_LIST_DEQUEUE(&(s2->l));
+			/* Restore to original distance */
+			s2->seg_in.hit_dist += ss.dist_corr;
+			s2->seg_out.hit_dist += ss.dist_corr;
+			s2->seg_in.hit_rayp = s2->seg_out.hit_rayp = a_ray;
+			BU_LIST_INSERT(&(waiting_segs.l), &(s2->l));
+		    }
+		}
+		resp->re_shot_hit++;
+	    }
+	}
+
+	/*
+	 * If a_onehit == 0 and a_ray_length <= 0, then the ray is
+	 * traced to +infinity.
+	 *
+	 * If a_onehit != 0, then it indicates how many hit points
+	 * (which are greater than the ray start point of 0.0) the
+	 * application requires, i.e., partitions with inhit >= 0.  (If
+	 * negative, indicates number of non-air hits needed).  If
+	 * this box yielded additional segments, immediately weave
+	 * them into the partition list, and perform final boolean
+	 * evaluation.  If this results in the required number of
+	 * final partitions, then cease ray-tracing and hand the
+	 * partitions over to the application.  All partitions will
+	 * have valid in and out distances.  a_ray_length is treated
+	 * similarly to a_onehit.
+	 */
+	if (BU_LIST_NON_EMPTY(&(waiting_segs.l))) {
+	    if (ap->a_onehit != 0) {
+		int done;
+
+		/* Weave these segments into partition list */
+		rt_boolweave(pfinished_segs, &waiting_segs, &InitialPart, ap);
+
+		if (BU_PTBL_LEN(&resp->re_pieces_pending) > 0) {
+
+		    /* Find the lowest pending mindist, that's as far
+		     * as boolfinal can progress to.
+		     */
+		    struct rt_piecestate **psp;
+		    for (BU_PTBL_FOR(psp, (struct rt_piecestate **), &resp->re_pieces_pending)) {
+			register fastf_t dist;
+
+			dist = (*psp)->mindist;
+			if (dist < pending_hit) {
+			    pending_hit = dist;
+			}
+		    }
+		}
+
+		/* Evaluate regions up to end of good segs */
+		if (ss.box_end < pending_hit) pending_hit = ss.box_end;
+		done = rt_boolfinal(&InitialPart, pFinalPart, last_bool_start, pending_hit, regionbits, ap, solidbits);
+		last_bool_start = pending_hit;
+
+		/* See if enough partitions have been acquired */
+		if (done > 0) goto hitit;
+	    }
+	}
+
+	if (ap->a_ray_length > 0.0 &&
+	    ss.box_end >= ap->a_ray_length &&
+	    ap->a_ray_length < pending_hit)
+	    goto weave;
+
+	/* Push ray onwards to next box */
+	ss.box_start = ss.box_end;
+    }
+
+    /*
+     * Ray has finally left known space -- Weave any remaining
+     * segments into the partition list.
+     */
+weave:
+    /* Process any pending hits into segs */
+    if (BU_PTBL_LEN(&resp->re_pieces_pending) > 0) {
+	struct rt_piecestate **psp;
+	for (BU_PTBL_FOR(psp, (struct rt_piecestate **), &resp->re_pieces_pending)) {
+	    if ((*psp)->htab.end > 0) {
+		/* Convert any pending hits into segs */
+		/* Distance correction was handled in ft_piece_shot */
+		(*psp)->stp->st_meth->ft_piece_hitsegs(*psp, &waiting_segs, ap);
+		rt_htbl_reset(&(*psp)->htab);
+	    }
+	    *psp = NULL;
+	}
+	bu_ptbl_reset(&resp->re_pieces_pending);
+    }
+
+    if (BU_LIST_NON_EMPTY(&(waiting_segs.l))) {
+	rt_boolweave(pfinished_segs, &waiting_segs, &InitialPart, ap);
+    }
+
+    /* finished_segs chain now has all segments hit by this ray */
+    if (BU_LIST_IS_EMPTY(&(pfinished_segs->l))) {
+        ap->a_return = 0;
+	goto out;
+    }
+
+    /*
+     * All intersections of the ray with the model have been computed.
+     * Evaluate the boolean trees over each partition.
+     */
+    (void)rt_boolfinal(&InitialPart, pFinalPart, BACKING_DIST, INFINITY, regionbits, ap, solidbits);
+
+    if (pFinalPart->pt_forw == pFinalPart) {
+        ap->a_return = 0;
+	RT_FREE_PT_LIST(&InitialPart, resp);
+	goto out;
+    }
+
+    /*
+     * Ray/model intersections exist.  Pass the list to the user's
+     * a_hit() routine.  Note that only the hit_dist elements of
+     * pt_inhit and pt_outhit have been computed yet.  To compute both
+     * hit_point and hit_normal, use the
+     *
+     * RT_HIT_NORMAL(NULL, hitp, stp, rayp, 0);
+     *
+     * macro.  To compute just hit_point, use
+     *
+     * VJOIN1(hitp->hit_point, rp->r_pt, hitp->hit_dist, rp->r_dir);
+     */
+hitit:
+    /*
+     * Before recursing, release storage for unused Initial
+     * partitions.  finished_segs can not be released yet, because
+     * FinalPart partitions will point to hits in those segments.
+     */
+    RT_FREE_PT_LIST(&InitialPart, resp);
+
+    /*
+     * finished_segs is only used by special hit routines which don't
+     * follow the traditional solid modeling paradigm.
+     */
+    ap->a_return = 1;
+
+    /*
+     * Processing of this ray is complete.
+     */
+out:
+    /* Return dynamic resources to their freelists.  */
+    BU_CK_BITV(solidbits);
+    BU_LIST_APPEND(&resp->re_solid_bitv, &solidbits->l);
+    if (backbits) {
+	BU_CK_BITV(backbits);
+	BU_LIST_APPEND(&resp->re_solid_bitv, &backbits->l);
+    }
+    BU_CK_PTBL(regionbits);
+    BU_LIST_APPEND(&resp->re_region_ptbl, &regionbits->l);
+
+    /* Clean up any pending hits */
+    if (BU_PTBL_LEN(&resp->re_pieces_pending) > 0) {
+	struct rt_piecestate **psp;
+	for (BU_PTBL_FOR(psp, (struct rt_piecestate **), &resp->re_pieces_pending)) {
+	    if ((*psp)->htab.end > 0)
+		rt_htbl_reset(&(*psp)->htab);
+	}
+	bu_ptbl_reset(&resp->re_pieces_pending);
+    }
+
+    /* Terminate any logging */
+    return ap->a_return;
+}
+
 /**
+ * Arrange to have the pixel output.  a_uptr has region pointer, for
+ * reference.
+ */
+void
+my_view_pixel(int a_x, int a_y, int a_user, point_t a_color)
+{
+    int r, g, b;
+    unsigned char *pixelp;
+    struct scanline *slp;
+
+    if (a_user == 0) {
+	/* Shot missed the model, don't dither */
+	r = ibackground[0];
+	g = ibackground[1];
+	b = ibackground[2];
+	VSETALL(a_color, -1e-20);	/* background flag */
+    } else {
+        r = a_color[0]*255;
+        g = a_color[1]*255;
+        b = a_color[2]*255;
+	if (r > 255) r = 255;
+	else if (r < 0) r = 0;
+	if (g > 255) g = 255;
+	else if (g < 0) g = 0;
+	if (b > 255) b = 255;
+	else if (b < 0) b = 0;
+	if (r == ibackground[0] && g == ibackground[1] &&  b == ibackground[2]) {
+	    r = inonbackground[0];
+	    g = inonbackground[1];
+	    b = inonbackground[2];
+	}
+
+	/* Make sure it's never perfect black */
+	if (r==0 && g==0 && b==0)
+	    b = 1;
+    }
+
+    slp = &scanline;
+    if (slp->sl_buf == (unsigned char *)0) {
+        slp->sl_buf = (unsigned char *)bu_calloc(width, pwidth, "sl_buf scanline buffer");
+        slp->sl_left = width;
+    }
+    pixelp = slp->sl_buf+(a_x*pwidth);
+    *pixelp++ = r;
+    *pixelp++ = g;
+    *pixelp++ = b;
+    if (--(slp->sl_left) <= 0) {
+        /* do_eol */
+        if (fbp != FB_NULL) {
+            size_t npix;
+            bu_semaphore_acquire(BU_SEM_SYSCALL);
+            npix = fb_write(fbp, 0, a_y, (unsigned char *)slp->sl_buf, width);
+            bu_semaphore_release(BU_SEM_SYSCALL);
+            if (npix < width)
+                bu_exit(EXIT_FAILURE, "scanline fb_write error");
+        }
+        bu_free(slp->sl_buf, "sl_buf scanline buffer");
+        slp->sl_buf = (unsigned char *)0;
+    }
+}
+
+/**
+ * Call the material-specific shading function, after making certain
+ * that all shadework fields desired have been provided.
+ *
+ * Returns -
+ * 0 on failure
+ * 1 on success
+ *
+ * But of course, nobody cares what this returns.  Everyone calls us
+ * as (void)viewshade()
+ */
+int
+my_viewshade(struct xray *a_ray, double perp, const struct partition *pp, struct hit *sw_hit, point_t *color)
+{
+    fastf_t cosine;
+    point_t matcolor;		/* Material color */
+
+    /* Default color is white (uncolored) */
+    VSETALL(matcolor, 1);
+
+    if (sw_hit->hit_dist < 0.0)
+	sw_hit->hit_dist = 0.0;	/* Eye inside solid */
+
+    /* If optional inputs are required, have them computed */
+    VJOIN1(sw_hit->hit_point, a_ray->r_pt, sw_hit->hit_dist, a_ray->r_dir);
+
+    /* shade inputs */
+    if (sw_hit->hit_dist < 0.0) {
+        /* Eye inside solid, orthoview */
+        VREVERSE(sw_hit->hit_normal, a_ray->r_dir);
+    } else {
+        fastf_t f;
+        /* Get surface normal for hit point */
+        if (pp->pt_inseg->seg_stp->st_meth->ft_norm) {
+            pp->pt_inseg->seg_stp->st_meth->ft_norm(sw_hit, pp->pt_inseg->seg_stp, a_ray);
+        }
+        if (pp->pt_inflip) {
+            VREVERSE(sw_hit->hit_normal, sw_hit->hit_normal);
+        } else {
+            VMOVE(sw_hit->hit_normal, sw_hit->hit_normal);
+        }
+
+        /* Check to make sure normals are OK */
+        f = VDOT(a_ray->r_dir, sw_hit->hit_normal);
+        if (f > 0.0 && !(((f) < 0) ? ((-f)<=perp) : (f <= perp))) {
+            /* reverse the normal so it's lit */
+            VREVERSE(sw_hit->hit_normal, sw_hit->hit_normal);
+        }
+    }
+    
+    /* Invoke the actual shader (diffuse) */
+    /* Diffuse reflectance from "Ambient" light source (at eye) */
+    if ((cosine = -VDOT(sw_hit->hit_normal, a_ray->r_dir)) > 0.0) {
+        if (cosine > 1.00001) {
+            cosine = 1;
+        }
+        cosine *= AmbientIntensity;
+        VSCALE(*color, matcolor, cosine);
+    } else {
+        VSETALL(*color, 0);
+    }
+    return 1;
+}
+
+/**
+ * Manage the coloring of whatever it was we just hit.  This can be a
+ * recursive procedure.
+ */
+int
+my_colorview(struct application *ap, struct partition *PartHeadp, point_t *color)
+{
+    struct partition *pp;
+    struct hit *hitp;
+
+    for (pp = PartHeadp->pt_forw; pp != PartHeadp; pp = pp->pt_forw)
+	if (pp->pt_outhit->hit_dist >= 0.0) break;
+
+    if (pp == PartHeadp) {
+	return 0;
+    }
+
+    hitp = pp->pt_inhit;
+    ap->a_uptr = (void *)pp->pt_regionp;	/* note which region was shaded */
+
+    /* individual shaders must handle reflection & refraction */
+    (void)my_viewshade(&ap->a_ray, ap->a_rt_i->rti_tol.perp, pp, pp->pt_inhit, color);
+
+    /* XXX This is always negative when eye is inside air solid */
+    ap->a_dist = hitp->hit_dist;
+    return 1;
+}
+
+
+void
+my_pixel(point_t *buf, struct resource *resp, int pixelnum)
+{
+    struct application a;
+    vect_t point;		/* Ref point on eye or view plane */
+
+    struct seg finished_segs;	/* processed by rt_boolweave() */
+    struct partition FinalPart;	/* Head of Final Partitions */
+    int a_y, a_x;
+#if 0
+    int a_user;
+#endif
+    point_t a_color;
+
+    int hit;
+    point_t color;
+
+    /* Obtain fresh copy of global application struct */
+    a = APP;				/* struct copy */
+    a.a_magic = RT_AP_MAGIC;
+    a.a_ray.magic = RT_RAY_MAGIC;
+    a.a_resource = resp;
+
+    a_y = (int)(pixelnum/width);
+    a_x = (int)(pixelnum - (a_y * width));
+
+    /* our starting point, used for non-jitter */
+    VJOIN2 (point, viewbase_model, a_x, dx_model, a_y, dy_model);
+
+    /* not tracing the corners of a prism by default */
+    a.a_pixelext=NULL;
+
+    /* LOOP BELOW IS UNROLLED ONE SAMPLE SINCE THAT'S THE COMMON CASE.
+     *
+     * XXX - If you edit the unrolled or non-unrolled section, be sure
+     * to edit the other section.
+     */
+    /* not hypersampling, so just do it */
+
+    VMOVE(a.a_ray.r_pt, point);
+    VMOVE(a.a_ray.r_dir, APP.a_ray.r_dir);
+    
+    a.a_level = 0;		/* recursion level */
+    a.a_purpose = "main ray";
+
+    BU_LIST_INIT(&finished_segs.l);
+
+    FinalPart.pt_forw = FinalPart.pt_back = &FinalPart;
+    FinalPart.pt_magic = PT_HD_MAGIC;
+    a.a_Final_Part_hdp = &FinalPart;
+    a.a_finished_segs_hdp = &finished_segs;
+    
+    hit = my_shootray(resp, &a.a_ray, a.a_rt_i, &a, &FinalPart, &finished_segs);
+
+    if (hit) {
+        /* hit */
+#if 0
+        a_user = 1;   		/* Signal view_pixel:  HIT */
+#endif
+        a.a_return = my_colorview(&a, &FinalPart, &color);
+        VMOVE(a_color, color);
+    } else {
+        /* miss */
+#if 0
+        a_user = 0;             	/* Signal view_pixel:  MISS */
+#endif
+        a.a_return = 0;
+        VMOVE(a_color, background);	/* In case someone looks */
+    }
+
+    RT_FREE_SEG_LIST(&finished_segs, resp);
+    RT_FREE_PT_LIST(&FinalPart, resp);
+    
+    VMOVE(buf[a_x*width+a_y], a_color);
+    return;
+}
+
+void
+my_run(int cur_pixel, int last_pixel)
+{
+    int per_processor_chunk = 0;	/* how many pixels to do at once */
+
+    int pixelnum;
+
+    point_t *buf;
+    
+    /* The more CPUs at work, the bigger the bites we take */
+    if (per_processor_chunk <= 0) per_processor_chunk = npsw;
+
+    /*bu_semaphore_acquire(RT_SEM_WORKER);*/
+
+    buf = (point_t *)bu_calloc(sizeof(point_t), width*height, "buf scanline buffer");
+
+#pragma omp parallel for
+    for (pixelnum = cur_pixel; pixelnum <= last_pixel; pixelnum++) {
+        int cpu;
+        struct resource *resp;
+
+        cpu = omp_get_thread_num();
+        resp = &resource[cpu];
+        RT_CK_RESOURCE(resp);
+
+        my_pixel(buf, resp, pixelnum);
+    }
+
+    for (pixelnum = cur_pixel; pixelnum <= last_pixel; pixelnum++) {
+        int a_y, a_x;
+
+        a_y = (int)(pixelnum/width);
+        a_x = (int)(pixelnum - (a_y * width));
+        my_view_pixel(a_x, a_y, 1, buf[a_x*width+a_y]);
+    }
+
+    bu_free(buf, "buf scanline buffer");
+    
+    /*bu_semaphore_release(RT_SEM_WORKER);*/
+
+    /* Tally up the statistics */
+    {
+        size_t cpu;
+        for (cpu=0; cpu < npsw; cpu++) {
+            if (resource[cpu].re_magic != RESOURCE_MAGIC) {
+                bu_log("ERROR: CPU %d resources corrupted, statistics bad\n", cpu);
+                continue;
+            }
+            rt_add_res_stats(APP.a_rt_i, &resource[cpu]);
+        }
+    }
+    return;
+}
+
+/**
  * Do all the actual work to run a frame.
  *
  * Returns -1 on error, 0 if OK.
@@ -810,7 +1674,7 @@
      * It may prove desirable to do this in chunks
      */
     rt_prep_timer();
-
+#if 0
     if (incr_mode) {
 	for (incr_level = 1; incr_level <= incr_nlevel; incr_level++) {
 	    if (incr_level > 1)
@@ -835,6 +1699,15 @@
 	pix_start = 0;
 	pix_end = (int)(height*width - 1);
     }
+#endif
+    printf("pix_start: %d, pix_end: %d\n", pix_start, pix_end);
+    my_run(pix_start, pix_end);
+
+    /* Reset values to full size, for next frame (if any) */
+    pix_start = 0;
+    pix_end = (int)(height*width - 1);
+    /**/
+    
     utime = rt_get_timer(&times, &wallclock);
 
     /*
------------------------------------------------------------------------------
_______________________________________________
BRL-CAD Developer mailing list
brlcad-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/brlcad-devel

Reply via email to