[Bf-blender-cvs] [e013a24d072] soc-2019-embree-gpu: Add a visibility flag for time node

MATILLAT Quentin Sat, 13 Jul 2019 03:53:22 -0700

Commit: e013a24d0725ea7aa64faec11c2b4873e1622c73
Author: MATILLAT Quentin
Date:   Sat Jul 13 12:51:37 2019 +0200
Branches: soc-2019-embree-gpu
https://developer.blender.org/rBe013a24d0725ea7aa64faec11c2b4873e1622c73


Add a visibility flag for time node

This should help to reduce computation overhead for non time node

===================================================================

M       intern/cycles/bvh/bvh2.cpp
M       intern/cycles/bvh/bvh_embree.cpp
M       intern/cycles/bvh/bvh_embree_converter.cpp
M       intern/cycles/bvh/bvh_node.cpp
M       intern/cycles/bvh/bvh_node.h
M       intern/cycles/kernel/bvh/bvh_nodes.h
M       intern/cycles/kernel/bvh/bvh_traversal.h
M       intern/cycles/kernel/kernel_types.h

===================================================================

diff --git a/intern/cycles/bvh/bvh2.cpp b/intern/cycles/bvh/bvh2.cpp
index 0475ac33887..ed355f16efa 100644
--- a/intern/cycles/bvh/bvh2.cpp
+++ b/intern/cycles/bvh/bvh2.cpp
@@ -96,6 +96,16 @@ void BVH2::pack_aligned_node(int idx,
   assert(c0 < 0 || c0 < pack.nodes.size());
   assert(c1 < 0 || c1 < pack.nodes.size());
 
+  if(time0.x > 0 || time0.y < 1)
+      visibility0 |= PATH_RAY_NODE_4D;
+  else
+      visibility0 &= ~PATH_RAY_NODE_4D;
+
+  if(time1.x > 0 || time1.y < 1)
+      visibility1 |= PATH_RAY_NODE_4D;
+  else
+      visibility1 &= ~PATH_RAY_NODE_4D;
+
   int4 data[BVH_NODE_SIZE] = {
       make_int4(
           visibility0 & ~PATH_RAY_NODE_UNALIGNED, visibility1 & 
~PATH_RAY_NODE_UNALIGNED, c0, c1),
diff --git a/intern/cycles/bvh/bvh_embree.cpp b/intern/cycles/bvh/bvh_embree.cpp
index 2be512d38c8..7a2a4b2d984 100644
--- a/intern/cycles/bvh/bvh_embree.cpp
+++ b/intern/cycles/bvh/bvh_embree.cpp
@@ -496,7 +496,9 @@ void BVHEmbree::build(Progress &progress, Stats *stats_)
   progress.set_substatus("Packing geometry");
   if(this->bvh_layout == BVH_LAYOUT_EMBREE_CONVERTED) {
     BVHEmbreeConverter conv(scene, objects, this->params);
-    pack_nodes(conv.getBVH2());
+    BVHNode *root = conv.getBVH2();
+    BVH2::pack_nodes(root);
+    root->deleteSubtree();
   } else {
     pack_nodes(NULL);
   }
diff --git a/intern/cycles/bvh/bvh_embree_converter.cpp 
b/intern/cycles/bvh/bvh_embree_converter.cpp
index b93b6039c54..fc20f4062a8 100644
--- a/intern/cycles/bvh/bvh_embree_converter.cpp
+++ b/intern/cycles/bvh/bvh_embree_converter.cpp
@@ -83,7 +83,7 @@ BVHNode *bvh_shrink(BVHNode *root) {
     if(root->num_children() == 3) {
         node->children[1] = bvh_shrink(node->children[2]);
     } else {
-       node->children[1] = new InnerNode(merge(node->children[2]->bounds, 
node->children[3]->bounds), bvh_shrink(node->children[2]), 
bvh_shrink(node->children[3]));
+        node->children[1] = new InnerNode(merge(node->children[2]->bounds, 
node->children[3]->bounds), bvh_shrink(node->children[2]), 
bvh_shrink(node->children[3]));
     }
     node->num_children_ = 2;
     return node;
@@ -114,7 +114,6 @@ std::deque<BVHNode*> 
BVHEmbreeConverter::handleLeaf<embree::Triangle4i>(const em
             /* TODO Compute local boundbox
              * BoundBox bb = RTCBoundBoxToCCL(prims[i].linearBounds(s, 
embree::BBox1f(0, 1)).bounds());
              */
-
             ids.push_back(RangeInput(prim_offset + prim_id, obj->visibility, 
bb));
         }
     }
@@ -298,7 +297,7 @@ BVHNode* BVHEmbreeConverter::getBVH4() {
             embree::BVH4::NodeRef root = bvh->root;
             BVHNode *rootNode = nullptr;
             if(bvh->primTy == &embree::Triangle4v::type) {
-                rootNode = this->nodeEmbreeToCcl<embree::Triangle4v>(root, 
RTCBoundBoxToCCL(bvh->bounds.bounds()));
+                rootNode = nodeEmbreeToCcl<embree::Triangle4v>(root, 
RTCBoundBoxToCCL(bvh->bounds.bounds()));
             } else if(bvh->primTy == &embree::InstancePrimitive::type) {
                 rootNode = nodeEmbreeToCcl<embree::InstancePrimitive>(root, 
RTCBoundBoxToCCL(bvh->bounds.bounds()));
             } else if(bvh->primTy == &embree::Triangle4i::type) {
@@ -325,6 +324,23 @@ BVHNode* BVHEmbreeConverter::getBVH4() {
     return new InnerNode(bb, nodes.data(), nodes.size());
 }
 
+BoundBox bvh_tighten(BVHNode *root) {
+    if(root->is_leaf())
+        return root->bounds;
+
+    assert(root->num_children() == 2);
+    BoundBox bb = BoundBox::empty;
+    for(int i = 0; i < root->num_children(); i++) {
+        bb.grow(bvh_tighten(root->get_child(i)));
+    }
+
+    if(std::abs(root->bounds.area() - bb.area()) > .05f) {
+        std::cout << "Area " << root->bounds.area() << "\t" << bb.area() << 
std::endl;
+    }
+    root->bounds.intersect(bb);
+    return root->bounds;
+}
+
 BVHNode* BVHEmbreeConverter::getBVH2() {
     BVHNode *root = this->getBVH4();
     std::cout << root->getSubtreeSize(BVH_STAT_TIMELIMIT_NODE) << " times 
nodes" << std::endl;
@@ -332,6 +348,8 @@ BVHNode* BVHEmbreeConverter::getBVH2() {
     root = bvh_shrink(root);
     std::cout << root->getSubtreeSize(BVH_STAT_TIMELIMIT_NODE) << " times 
nodes" << std::endl;
     std::cout << "BVH2 SAH is " << root->computeSubtreeSAHCost(this->params) 
<< std::endl;
+    bvh_tighten(root);
+    std::cout << "BVH² SAH is " << root->computeSubtreeSAHCost(this->params) 
<< std::endl;
     return root;
 }
 
diff --git a/intern/cycles/bvh/bvh_node.cpp b/intern/cycles/bvh/bvh_node.cpp
index 413f5befcca..baea4d4365e 100644
--- a/intern/cycles/bvh/bvh_node.cpp
+++ b/intern/cycles/bvh/bvh_node.cpp
@@ -92,8 +92,8 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
       }
       return cnt;
     case BVH_STAT_TIMELIMIT_NODE:
-      if(this->time_from > 0 || this->time_to < 1)
-          cnt = 1;
+      if(this->has_time_limits())
+        cnt = 1;
       break;
     default:
       assert(0); /* unknown mode */
diff --git a/intern/cycles/bvh/bvh_node.h b/intern/cycles/bvh/bvh_node.h
index ce3ea24fc71..e82bf6712e0 100644
--- a/intern/cycles/bvh/bvh_node.h
+++ b/intern/cycles/bvh/bvh_node.h
@@ -89,6 +89,10 @@ class BVHNode {
     return false;
   }
 
+  inline bool has_time_limits() const {
+    return this->time_from > 0 || this->time_to < 1;
+  }
+
   // Subtree functions
   int getSubtreeSize(BVH_STAT stat = BVH_STAT_NODE_COUNT) const;
   float computeSubtreeSAHCost(const BVHParams &p, float probability = 1.0f) 
const;
diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h 
b/intern/cycles/kernel/bvh/bvh_nodes.h
index 0fc6ecad5bc..8a299012aa7 100644
--- a/intern/cycles/kernel/bvh/bvh_nodes.h
+++ b/intern/cycles/kernel/bvh/bvh_nodes.h
@@ -29,14 +29,14 @@ ccl_device_forceinline Transform 
bvh_unaligned_node_fetch_space(KernelGlobals *k
 }
 
 #if !defined(__KERNEL_SSE2__)
-ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
-                                                      const float3 P,
-                                                      const float3 idir,
-                                                      const float t,
-                                                      const int node_addr,
-                                                      const uint visibility,
-                                                      const float rayTime,
-                                                      float dist[2])
+ccl_device_forceinline int bvh_4D_aligned_node_intersect(KernelGlobals *kg,
+                                                         const float3 P,
+                                                         const float3 idir,
+                                                         const float t,
+                                                         const int node_addr,
+                                                         const uint visibility,
+                                                         const float rayTime,
+                                                         float dist[2])
 {
 
   /* fetch node data */
@@ -44,7 +44,6 @@ ccl_device_forceinline int 
bvh_aligned_node_intersect(KernelGlobals *kg,
   float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1);
   float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2);
   float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3);
-  float4 timeLimits = kernel_tex_fetch(__bvh_nodes, node_addr + 4);
 
   /* intersect ray against child nodes */
   float c0lox = (node0.x - P.x) * idir.x;
@@ -68,6 +67,7 @@ ccl_device_forceinline int 
bvh_aligned_node_intersect(KernelGlobals *kg,
   dist[0] = c0min;
   dist[1] = c1min;
 
+  float4 timeLimits = kernel_tex_fetch(__bvh_nodes, node_addr + 4);
   if(timeLimits.x > rayTime || timeLimits.z < rayTime)
       c0max = -1.0f;
 
@@ -83,6 +83,56 @@ ccl_device_forceinline int 
bvh_aligned_node_intersect(KernelGlobals *kg,
 #  endif
 }
 
+ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
+                                                      const float3 P,
+                                                      const float3 idir,
+                                                      const float t,
+                                                      const int node_addr,
+                                                      const uint visibility,
+                                                     const float rayTime,
+                                                      float dist[2])
+{
+  /* fetch node data */
+  float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+  float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1);
+  float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2);
+  float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3);
+
+  if (__float_as_int(cnodes.x) & PATH_RAY_NODE_4D) {
+    return bvh_4D_aligned_node_intersect(kg, P, idir, t, node_addr, 
visibility, rayTime, dist);
+  }
+
+  /* intersect ray against child nodes */
+  float c0lox = (node0.x - P.x) * idir.x;
+  float c0hix = (node0.z - P.x) * idir.x;
+  float c0loy = (node1.x - P.y) * idir.y;
+  float c0hiy = (node1.z - P.y) * idir.y;
+  float c0loz = (node2.x - P.z) * idir.z;
+  float c0hiz = (node2.z - P.z) * idir.z;
+  float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, 
c0hiz));
+  float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, 
c0hiz));
+
+  float c1lox = (node0.y - P.x) * idir.x;
+  float c1hix = (node0.w - P.x) * idir.x;
+  float c1loy = (node1.y - P.y) * idir.y;
+  float c1hiy = (node1.w - P.y) * idir.y;
+  float c1loz = (node2.y - P.z) * idir.z;
+  float c1hiz = (node2.w - P.z) * idir.z;
+  float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, 
c1hiz));
+  float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, 
c1hiz));
+
+  dist[0] = c0min;
+  dist[1] = c1min;
+
+#  ifdef __VISIBILITY_FLAG__
+  /* this visibility test gives a 5% performance hit, how to solve? */
+  return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 
0) |
+         (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 
0);
+#  else
+  return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0);
+#  endif
+}
+
 ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals 
*kg,
           

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [e013a24d072] soc-2019-embree-gpu: Add a visibility flag for time node

Reply via email to