Commit: 8ca3cf816627c5361492abb9f700ce547e3f1196
Author: Sergey Sharybin
Date: Thu Jun 16 11:25:47 2016 +0200
Branches: cycles_bvh
https://developer.blender.org/rB8ca3cf816627c5361492abb9f700ce547e3f1196
Cycles: Hook up proper node intersection function to regular BVH camera rays
traversal
===================================================================
M intern/cycles/kernel/geom/geom_bvh_traversal.h
===================================================================
diff --git a/intern/cycles/kernel/geom/geom_bvh_traversal.h
b/intern/cycles/kernel/geom/geom_bvh_traversal.h
index 65c4255..add8fce 100644
--- a/intern/cycles/kernel/geom/geom_bvh_traversal.h
+++ b/intern/cycles/kernel/geom/geom_bvh_traversal.h
@@ -49,7 +49,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
* - likely and unlikely for if() statements
* - test restrict attribute for pointers
*/
-
+
/* traversal stack in CUDA thread-local memory */
int traversalStack[BVH_STACK_SIZE];
traversalStack[0] = ENTRYPOINT_SENTINEL;
@@ -79,9 +79,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals
*kg,
#if defined(__KERNEL_SSE2__)
const shuffle_swap_t shuf_identity = shuffle_swap_identity();
const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
+
const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
ssef Psplat[3], idirsplat[3];
+ ssef tnear(0.0f), tfar(isect->t);
shuffle_swap_t shufflexyz[3];
Psplat[0] = ssef(P.x);
@@ -101,121 +102,59 @@ ccl_device bool
BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
do {
/* traverse internal nodes */
while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL)
{
- bool traverseChild0, traverseChild1;
- int nodeAddrChild1;
-
-#if !defined(__KERNEL_SSE2__)
- /* Intersect two child bounding boxes, non-SSE
version */
- float t = isect->t;
-
- /* fetch node data */
+ int nodeAddrChild1, traverse_mask;
+ float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes,
nodeAddr+0);
- float4 node0 = kernel_tex_fetch(__bvh_nodes,
nodeAddr+1);
- float4 node1 = kernel_tex_fetch(__bvh_nodes,
nodeAddr+2);
- float4 node2 = kernel_tex_fetch(__bvh_nodes,
nodeAddr+3);
-
- /* intersect ray against child nodes */
- NO_EXTENDED_PRECISION float c0lox = (node0.x -
P.x) * idir.x;
- NO_EXTENDED_PRECISION float c0hix = (node0.z -
P.x) * idir.x;
- NO_EXTENDED_PRECISION float c0loy = (node1.x -
P.y) * idir.y;
- NO_EXTENDED_PRECISION float c0hiy = (node1.z -
P.y) * idir.y;
- NO_EXTENDED_PRECISION float c0loz = (node2.x -
P.z) * idir.z;
- NO_EXTENDED_PRECISION float c0hiz = (node2.z -
P.z) * idir.z;
- NO_EXTENDED_PRECISION float c0min =
max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
- NO_EXTENDED_PRECISION float c0max =
min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
-
- NO_EXTENDED_PRECISION float c1lox = (node0.y -
P.x) * idir.x;
- NO_EXTENDED_PRECISION float c1hix = (node0.w -
P.x) * idir.x;
- NO_EXTENDED_PRECISION float c1loy = (node1.y -
P.y) * idir.y;
- NO_EXTENDED_PRECISION float c1hiy = (node1.w -
P.y) * idir.y;
- NO_EXTENDED_PRECISION float c1loz = (node2.y -
P.z) * idir.z;
- NO_EXTENDED_PRECISION float c1hiz = (node2.w -
P.z) * idir.z;
- NO_EXTENDED_PRECISION float c1min =
max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
- NO_EXTENDED_PRECISION float c1max =
min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
+#if !defined(__KERNEL_SSE2__)
# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
if(difl != 0.0f) {
- float hdiff = 1.0f + difl;
- float ldiff = 1.0f - difl;
- if(__float_as_int(cnodes.x) &
PATH_RAY_CURVE) {
- c0min = max(ldiff * c0min,
c0min - extmax);
- c0max = min(hdiff * c0max,
c0max + extmax);
- }
- if(__float_as_int(cnodes.y) &
PATH_RAY_CURVE) {
- c1min = max(ldiff * c1min,
c1min - extmax);
- c1max = min(hdiff * c1max,
c1max + extmax);
- }
}
+ else
# endif
-
- /* decide which nodes to traverse next */
-# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance
hit, how to solve? */
- traverseChild0 = (c0max >= c0min) &&
(__float_as_uint(cnodes.x) & visibility);
- traverseChild1 = (c1max >= c1min) &&
(__float_as_uint(cnodes.y) & visibility);
-# else
- traverseChild0 = (c0max >= c0min);
- traverseChild1 = (c1max >= c1min);
-# endif
-
+ {
+ }
#else // __KERNEL_SSE2__
- /* Intersect two child bounding boxes, SSE3
version adapted from Embree */
-
- /* fetch node data */
- const ssef *bvh_nodes =
(ssef*)kg->__bvh_nodes.data + nodeAddr;
- const float4 cnodes = ((float4*)bvh_nodes)[0];
-
- /* intersect ray against child nodes */
- const ssef tminmaxx =
(shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
- const ssef tminmaxy =
(shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
- const ssef tminmaxz =
(shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
-
- /* calculate { c0min, c1min, -c0max, -c1max} */
- ssef minmax = max(max(tminmaxx, tminmaxy),
max(tminmaxz, tsplat));
- const ssef tminmax = minmax ^ pn;
-
# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
if(difl != 0.0f) {
- float4 *tminmaxview = (float4*)&tminmax;
- float &c0min = tminmaxview->x, &c1min =
tminmaxview->y;
- float &c0max = tminmaxview->z, &c1max =
tminmaxview->w;
-
- float hdiff = 1.0f + difl;
- float ldiff = 1.0f - difl;
- if(__float_as_int(cnodes.x) &
PATH_RAY_CURVE) {
- c0min = max(ldiff * c0min,
c0min - extmax);
- c0max = min(hdiff * c0max,
c0max + extmax);
- }
- if(__float_as_int(cnodes.y) &
PATH_RAY_CURVE) {
- c1min = max(ldiff * c1min,
c1min - extmax);
- c1max = min(hdiff * c1max,
c1max + extmax);
- }
+ traverse_mask =
bvh_node_intersect_robust(kg,
+
P,
+
dir,
+
tnear,
+
tfar,
+
tsplat,
+
Psplat,
+
idirsplat,
+
shufflexyz,
+
difl,
+
visibility,
+
nodeAddr,
+
dist);
}
+ else
# endif
-
- const sseb lrhit = tminmax <= shuffle<2, 3, 0,
1>(tminmax);
-
- /* decide which nodes to traverse next */
-# ifdef __VISIBILITY_FLAG__
- /* this visibility test gives a 5% performance
hit, how to solve? */
- traverseChild0 = (movemask(lrhit) & 1) &&
(__float_as_uint(cnodes.x) & visibility);
- traverseChild1 = (movemask(lrhit) & 2) &&
(__float_as_uint(cnodes.y) & visibility);
-# else
- traverseChild0 = (movemask(lrhit) & 1);
- traverseChild1 = (movemask(lrhit) & 2);
-# endif
+ {
+ traverse_mask = bvh_node_intersect(kg,
+ P,
+ dir,
+
tnear,
+ tfar,
+
tsplat,
+
Psplat,
+
idirsplat,
+
shufflexyz,
+
visibility,
+
nodeAddr,
+
dist);
+ }
#endif // __KERNEL_SSE2__
nodeAddr = __float_as_int(cnodes.z);
nodeAddrChild1 = __float_as_int(cnodes.w);
- if(traverseChild0 && traverseChild1) {
- /* both children were intersected, push
the farther one */
-#if !defined(__KERNEL_SSE2__)
- bool closestChild1 = (c1min < c0min);
-#else
- bool closestChild1 = tminmax[1] <
tminmax[0];
-#endif
+ if(traverse_mask == 3) {
+ /* Both children were intersected, push
the farther one. */
+ bool closestChild1 = (dist[1] <
dist[0]);
if(closestChild1) {
int tmp = nodeAddr;
@@ -228,12 +167,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals
*kg,
traversalStack[stackPtr] =
nodeAddrChild1;
}
else {
- /* one child was intersected */
- if(traverseChild1) {
+ /* One child was intersected. */
+ if(traverse_mask == 2) {
nodeAddr = nodeAddrChild1;
}
- else if(!traverseChild0) {
- /* neither child was
intersected */
+ else if(traverse_mask == 0) {
+ /* Neither child was
intersected. */
nodeAddr =
traversalStack[stackPtr];
--stackPtr;
}
@@ -268,6 +207,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals
*kg,
if(visibility == PATH_RAY_SHADOW_OPAQUE)
return true;
tsplat
= ssef(0.0f, 0.0f, -isect->t, -isect->t);
+ tfar =
ssef(isect->t);
#else
if(visibility == PATH_RAY_SHADOW_OPAQUE)
return true;
@@ -287,6 +227,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals
*kg,
if(visibility == PATH_RAY_SHADOW_OPAQUE)
return true;
tsplat
= ssef(0.0f, 0.0f, -isect->t, -isect->t);
+ tfar =
ssef(isect->t);
# else
if(visibility == PATH_RAY_SHADOW_OPAQUE)
return true;
@@ -313,6 +254,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals
*kg,
if(visibility == PATH_RAY_SHADOW_OPAQUE)
return true;
tsplat
= ssef(0.0f, 0.0f, -isect->t, -isect->t);
+ tfar =
ssef(isect->t);
# else
if(visibility == PATH_RAY_SHADOW_OPAQUE)
return true;
@@ -342,6 +284,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals
*kg,
Psplat[2] = ssef(P.z);
tsplat = ssef(0.0f, 0.0f, -isect->t,
-isect->t);
+ tfar = ssef(isect->t);
gen_idirsplat_swap(pn, shuf_identity,
shuf_swap, idir, idirsplat, shufflexyz);
# endif
@@ -376,6 +319,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals
*kg,
Psplat[2] = ssef(P.z);
tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
+ tfar = ssef(isect->t);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir,
idirsplat, shufflexyz);
# endif
_______________________________________________
Bf-blender-cvs mailing list
[email protected]
https://lists.blender.org/mailman/listinfo/bf-blender-cvs