Hello, I've been trying to make a really simple bare bones rendering loop in C without branches, recursion, etc that we can try to parallelize later.
For now I managed to do this for the ray generation part (patch attached). Still need to work on the ray traversal and color computation proper. Regards, -- Vasco Alexandre da Silva Costa PhD Student at Department of Information Systems and Computer Science Instituto Superior Técnico/University of Lisbon, Portugal
Index: src/rt/do.c =================================================================== --- src/rt/do.c (revision 65157) +++ src/rt/do.c (working copy) @@ -542,6 +542,89 @@ } +void +my_pixel(int cpu, int pixelnum) +{ + struct application a; + vect_t point; /* Ref point on eye or view plane */ + vect_t colorsum = {(fastf_t)0.0, (fastf_t)0.0, (fastf_t)0.0}; + + + /* Obtain fresh copy of global application struct */ + a = APP; /* struct copy */ + a.a_resource = &resource[cpu]; + + a.a_y = (int)(pixelnum/width); + a.a_x = (int)(pixelnum - (a.a_y * width)); + + /* our starting point, used for non-jitter */ + VJOIN2 (point, viewbase_model, a.a_x, dx_model, a.a_y, dy_model); + + /* not tracing the corners of a prism by default */ + a.a_pixelext=(struct pixel_ext *)NULL; + + /* LOOP BELOW IS UNROLLED ONE SAMPLE SINCE THAT'S THE COMMON CASE. + * + * XXX - If you edit the unrolled or non-unrolled section, be sure + * to edit the other section. + */ + /* not hypersampling, so just do it */ + + VMOVE(a.a_ray.r_pt, point); + VMOVE(a.a_ray.r_dir, APP.a_ray.r_dir); + + a.a_level = 0; /* recursion level */ + a.a_purpose = "main ray"; + (void)rt_shootray(&a); + VADD2(colorsum, colorsum, a.a_color); + + view_pixel(&a); + return; +} + +void +my_run(int a, int b) +{ + size_t cpu = 0; + int per_processor_chunk = 0; /* how many pixels to do at once */ + + int cur_pixel; /* current pixel number, 0..last_pixel */ + int last_pixel; /* last pixel number */ + + int pixelnum; + + cur_pixel = a; + last_pixel = b; + + /* + * SERIAL case -- one CPU does all the work. + */ + npsw = 1; + + /* The more CPUs at work, the bigger the bites we take */ + if (per_processor_chunk <= 0) per_processor_chunk = npsw; + + RT_CK_RESOURCE(&resource[cpu]); + + bu_semaphore_acquire(RT_SEM_WORKER); + + for (pixelnum = cur_pixel; pixelnum <= last_pixel; pixelnum++) { + my_pixel(cpu, pixelnum); + } + + bu_semaphore_release(RT_SEM_WORKER); + + /* Tally up the statistics */ + for (cpu=0; cpu < npsw; cpu++) { + if (resource[cpu].re_magic != RESOURCE_MAGIC) { + bu_log("ERROR: CPU %d resources corrupted, statistics bad\n", cpu); + continue; + } + rt_add_res_stats(APP.a_rt_i, &resource[cpu]); + } + return; +} + /** * Do all the actual work to run a frame. * @@ -810,7 +893,7 @@ * It may prove desirable to do this in chunks */ rt_prep_timer(); - +#if 0 if (incr_mode) { for (incr_level = 1; incr_level <= incr_nlevel; incr_level++) { if (incr_level > 1) @@ -835,6 +918,15 @@ pix_start = 0; pix_end = (int)(height*width - 1); } +#endif + printf("pix_start: %d, pix_end: %d\n", pix_start, pix_end); + my_run(pix_start, pix_end); + + /* Reset values to full size, for next frame (if any) */ + pix_start = 0; + pix_end = (int)(height*width - 1); + /**/ + utime = rt_get_timer(×, &wallclock); /*
------------------------------------------------------------------------------
_______________________________________________ BRL-CAD Developer mailing list brlcad-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/brlcad-devel