Commit: ce51c895af82244ec0bd79cf0ec94dc9aa7c9797
Author: Sergey Sharybin
Date:   Fri Jun 17 13:06:59 2016 +0200
Branches: cycles_bvh
https://developer.blender.org/rBce51c895af82244ec0bd79cf0ec94dc9aa7c9797

Cycles: Fixes for regular BVH traversal and unaligned nodes

- Support proper packing
- Fix array offsets in intersectors

===================================================================

M       intern/cycles/bvh/bvh.cpp
M       intern/cycles/bvh/bvh_node.cpp
M       intern/cycles/bvh/bvh_node.h
M       intern/cycles/kernel/geom/geom_bvh_nodes.h

===================================================================

diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp
index 7c144a4..37d72fc 100644
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -410,6 +410,13 @@ void BVH::pack_instances(size_t nodes_size, size_t 
leaf_nodes_size)
 
 /* Regular BVH */
 
+static bool node_bvh_is_unaligned(const BVHNode *node)
+{
+       const BVHNode *node0 = node->get_child(0),
+                     *node1 = node->get_child(1);
+       return node0->is_unaligned() || node1->is_unaligned();
+}
+
 RegularBVH::RegularBVH(const BVHParams& params_, const vector<Object*>& 
objects_)
 : BVH(params_, objects_)
 {
@@ -522,22 +529,30 @@ void RegularBVH::pack_unaligned_node(int idx,
 
 void RegularBVH::pack_nodes(const BVHNode *root)
 {
-       size_t tot_node_size = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
-       size_t leaf_node_size = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
-       size_t node_size = tot_node_size - leaf_node_size;
-
-       /* resize arrays */
+       const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
+       const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
+       assert(num_leaf_nodes <= num_nodes);
+       const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
+       size_t node_size;
+       if(params.use_unaligned_nodes) {
+               const size_t num_unaligned_nodes =
+                       root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
+               node_size = (num_unaligned_nodes * BVH_UNALIGNED_NODE_SIZE) +
+                           (num_inner_nodes - num_unaligned_nodes) * 
BVH_NODE_SIZE;
+       }
+       else {
+               node_size = num_inner_nodes * BVH_NODE_SIZE;
+       }
+       /* Resize arrays */
        pack.nodes.clear();
-
-       /* for top level BVH, first merge existing BVH's so we know the offsets 
*/
-       const int nsize = params.use_unaligned_nodes? BVH_UNALIGNED_NODE_SIZE: 
BVH_NODE_SIZE;
+       pack.leaf_nodes.clear();
+       /* For top level BVH, first merge existing BVH's so we know the 
offsets. */
        if(params.top_level) {
-               pack_instances(node_size*nsize,
-                              leaf_node_size*BVH_NODE_LEAF_SIZE);
+               pack_instances(node_size, num_leaf_nodes*BVH_NODE_LEAF_SIZE);
        }
        else {
-               pack.nodes.resize(node_size*nsize);
-               pack.leaf_nodes.resize(leaf_node_size*BVH_NODE_LEAF_SIZE);
+               pack.nodes.resize(node_size);
+               pack.leaf_nodes.resize(num_leaf_nodes*BVH_NODE_LEAF_SIZE);
        }
 
        int nextNodeIdx = 0, nextLeafNodeIdx = 0;
@@ -549,7 +564,9 @@ void RegularBVH::pack_nodes(const BVHNode *root)
        }
        else {
                stack.push_back(BVHStackEntry(root, nextNodeIdx));
-               nextNodeIdx += nsize;
+               nextNodeIdx += node_bvh_is_unaligned(root)
+                                      ? BVH_UNALIGNED_NODE_SIZE
+                                      : BVH_NODE_SIZE;
        }
 
        while(stack.size()) {
@@ -570,7 +587,9 @@ void RegularBVH::pack_nodes(const BVHNode *root)
                                }
                                else {
                                        idx[i] = nextNodeIdx;
-                                       nextNodeIdx += nsize;
+                                       nextNodeIdx += 
node_bvh_is_unaligned(e.node->get_child(i))
+                                                              ? 
BVH_UNALIGNED_NODE_SIZE
+                                                              : BVH_NODE_SIZE;
                                }
                        }
 
@@ -580,7 +599,7 @@ void RegularBVH::pack_nodes(const BVHNode *root)
                        pack_inner(e, stack[stack.size()-2], 
stack[stack.size()-1]);
                }
        }
-
+       assert(node_size == nextNodeIdx);
        /* root index to start traversal at, to handle case of single leaf node 
*/
        pack.root_index = (root->is_leaf())? -1: 0;
 }
diff --git a/intern/cycles/bvh/bvh_node.cpp b/intern/cycles/bvh/bvh_node.cpp
index 41e372f..f5cd699 100644
--- a/intern/cycles/bvh/bvh_node.cpp
+++ b/intern/cycles/bvh/bvh_node.cpp
@@ -71,6 +71,24 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
                                cnt = 1;
                        }
                        break;
+               case BVH_STAT_ALIGNED_INNER_COUNT:
+                       if(!is_leaf()) {
+                               bool has_unaligned = false;
+                               for(int j = 0; j < num_children(); j++) {
+                                       has_unaligned |= 
get_child(j)->is_unaligned();
+                               }
+                               cnt += has_unaligned? 0: 1;
+                       }
+                       break;
+               case BVH_STAT_UNALIGNED_INNER_COUNT:
+                       if(!is_leaf()) {
+                               bool has_unaligned = false;
+                               for(int j = 0; j < num_children(); j++) {
+                                       has_unaligned |= 
get_child(j)->is_unaligned();
+                               }
+                               cnt += has_unaligned? 1: 0;
+                       }
+                       break;
                case BVH_STAT_ALIGNED_INNER_QNODE_COUNT:
                        {
                                bool has_unaligned = false;
diff --git a/intern/cycles/bvh/bvh_node.h b/intern/cycles/bvh/bvh_node.h
index b078cb9..53f85f7 100644
--- a/intern/cycles/bvh/bvh_node.h
+++ b/intern/cycles/bvh/bvh_node.h
@@ -33,6 +33,8 @@ enum BVH_STAT {
        BVH_STAT_QNODE_COUNT,
        BVH_STAT_ALIGNED_COUNT,
        BVH_STAT_UNALIGNED_COUNT,
+       BVH_STAT_ALIGNED_INNER_COUNT,
+       BVH_STAT_UNALIGNED_INNER_COUNT,
        BVH_STAT_ALIGNED_INNER_QNODE_COUNT,
        BVH_STAT_UNALIGNED_INNER_QNODE_COUNT,
        BVH_STAT_ALIGNED_LEAF_COUNT,
diff --git a/intern/cycles/kernel/geom/geom_bvh_nodes.h 
b/intern/cycles/kernel/geom/geom_bvh_nodes.h
index cb34dd4..f7b9cec 100644
--- a/intern/cycles/kernel/geom/geom_bvh_nodes.h
+++ b/intern/cycles/kernel/geom/geom_bvh_nodes.h
@@ -20,16 +20,16 @@ ccl_device_inline Transform 
bvh_unaligned_node_fetch_space(KernelGlobals *kg,
 {
        Transform space;
        if(child == 0) {
-               space.x = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
-               space.y = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
-               space.z = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
-               space.w = kernel_tex_fetch(__bvh_nodes, nodeAddr+3);
+               space.x = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
+               space.y = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
+               space.z = kernel_tex_fetch(__bvh_nodes, nodeAddr+3);
+               space.w = kernel_tex_fetch(__bvh_nodes, nodeAddr+4);
        }
        else {
-               space.x = kernel_tex_fetch(__bvh_nodes, nodeAddr+4);
-               space.y = kernel_tex_fetch(__bvh_nodes, nodeAddr+5);
-               space.z = kernel_tex_fetch(__bvh_nodes, nodeAddr+6);
-               space.w = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
+               space.x = kernel_tex_fetch(__bvh_nodes, nodeAddr+5);
+               space.y = kernel_tex_fetch(__bvh_nodes, nodeAddr+6);
+               space.z = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
+               space.w = kernel_tex_fetch(__bvh_nodes, nodeAddr+8);
        }
        return space;
 }
@@ -45,10 +45,10 @@ ccl_device_inline int 
bvh_aligned_node_intersect(KernelGlobals *kg,
 {
 
        /* fetch node data */
-       float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
-       float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
-       float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
-       float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+8);
+       float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
+       float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
+       float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
+       float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+3);
 
        /* intersect ray against child nodes */
        NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
@@ -74,8 +74,8 @@ ccl_device_inline int 
bvh_aligned_node_intersect(KernelGlobals *kg,
 
 #ifdef __VISIBILITY_FLAG__
        /* this visibility test gives a 5% performance hit, how to solve? */
-       return (((c0max >= c0min) && (__float_as_uint(cnodes.z) & visibility))? 
1: 0) |
-              (((c1max >= c1min) && (__float_as_uint(cnodes.w) & visibility))? 
2: 0);
+       return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 
1: 0) |
+              (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 
2: 0);
 #else
        return ((c0max >= c0min)? 1: 0) |
               ((c1max >= c1min)? 2: 0);
@@ -93,10 +93,10 @@ ccl_device_inline int 
bvh_aligned_node_intersect_robust(KernelGlobals *kg,
 {
 
        /* fetch node data */
-       float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
-       float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
-       float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
-       float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+8);
+       float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
+       float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
+       float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
+       float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+3);
 
        /* intersect ray against child nodes */
        NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
@@ -135,8 +135,8 @@ ccl_device_inline int 
bvh_aligned_node_intersect_robust(KernelGlobals *kg,
 
 #ifdef __VISIBILITY_FLAG__
        /* this visibility test gives a 5% performance hit, how to solve? */
-       return (((c0max >= c0min) && (__float_as_uint(cnodes.z) & visibility))? 
1: 0) |
-              (((c1max >= c1min) && (__float_as_uint(cnodes.w) & visibility))? 
2: 0);
+       return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 
1: 0) |
+              (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 
2: 0);
 #else
        return ((c0max >= c0min)? 1: 0) |
               ((c1max >= c1min)? 2: 0);
@@ -222,10 +222,10 @@ ccl_device_inline int 
bvh_unaligned_node_intersect(KernelGlobals *kg,
                                                    float *dist)
 {
        int mask = 0;
-       float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+8);
+       float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
        if(bvh_unaligned_node_intersect_child(kg, P, dir, t, nodeAddr, 0, 
&dist[0])) {
 #ifdef __VISIBILITY_FLAG__
-               if((__float_as_uint(cnodes.z) & visibility))
+               if((__float_as_uint(cnodes.x) & visibility))
 #endif
                {
                        mask |= 1;
@@ -233,7 +233,7 @@ ccl_device_inline int 
bvh_unaligned_node_intersect(KernelGlobals *kg,
        }
        if(bvh_unaligned_node_intersect_child(kg, P, dir, t, nodeAddr, 1, 
&dist[1])) {
 #ifdef __VISIBILITY_FLAG__
-               if((__float_as_uint(cnodes.w) & visibility))
+               if((__float_as_uint(cnodes.y) & visibility))
 #endif
                {
                        mask |= 2;
@@ -253,10 +253,10 @@ ccl_device_inline int 
bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
                                                           float *dist)
 {
        int mask = 0;
-       float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+8);
+       float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
        if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, 
nodeAddr, 0, &dist[0])) {
 #ifdef __VISIBILITY_FLAG__
-               if((__float_as_uint(cnodes.z) & visibility))
+               if((__float_as_uint(cnodes.x) & visibility))
 #endif
                {
                        mask |= 1;
@@ -264,7 +264,7 @@ ccl_device_inline int 
bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
        }
        if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, 
nodeAddr, 1, &dist[1])) {
 #ifdef __VISIBILITY_FLAG__
-               if((__float_as_uint(cnodes.w) & visibility))
+               if((__float_as_uint(cnodes.y) & visibility))
 #endif
                {
 

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to