Commit: e013a24d0725ea7aa64faec11c2b4873e1622c73
Author: MATILLAT Quentin
Date: Sat Jul 13 12:51:37 2019 +0200
Branches: soc-2019-embree-gpu
https://developer.blender.org/rBe013a24d0725ea7aa64faec11c2b4873e1622c73
Add a visibility flag for time node
This should help to reduce computation overhead for non time node
===================================================================
M intern/cycles/bvh/bvh2.cpp
M intern/cycles/bvh/bvh_embree.cpp
M intern/cycles/bvh/bvh_embree_converter.cpp
M intern/cycles/bvh/bvh_node.cpp
M intern/cycles/bvh/bvh_node.h
M intern/cycles/kernel/bvh/bvh_nodes.h
M intern/cycles/kernel/bvh/bvh_traversal.h
M intern/cycles/kernel/kernel_types.h
===================================================================
diff --git a/intern/cycles/bvh/bvh2.cpp b/intern/cycles/bvh/bvh2.cpp
index 0475ac33887..ed355f16efa 100644
--- a/intern/cycles/bvh/bvh2.cpp
+++ b/intern/cycles/bvh/bvh2.cpp
@@ -96,6 +96,16 @@ void BVH2::pack_aligned_node(int idx,
assert(c0 < 0 || c0 < pack.nodes.size());
assert(c1 < 0 || c1 < pack.nodes.size());
+ if(time0.x > 0 || time0.y < 1)
+ visibility0 |= PATH_RAY_NODE_4D;
+ else
+ visibility0 &= ~PATH_RAY_NODE_4D;
+
+ if(time1.x > 0 || time1.y < 1)
+ visibility1 |= PATH_RAY_NODE_4D;
+ else
+ visibility1 &= ~PATH_RAY_NODE_4D;
+
int4 data[BVH_NODE_SIZE] = {
make_int4(
visibility0 & ~PATH_RAY_NODE_UNALIGNED, visibility1 &
~PATH_RAY_NODE_UNALIGNED, c0, c1),
diff --git a/intern/cycles/bvh/bvh_embree.cpp b/intern/cycles/bvh/bvh_embree.cpp
index 2be512d38c8..7a2a4b2d984 100644
--- a/intern/cycles/bvh/bvh_embree.cpp
+++ b/intern/cycles/bvh/bvh_embree.cpp
@@ -496,7 +496,9 @@ void BVHEmbree::build(Progress &progress, Stats *stats_)
progress.set_substatus("Packing geometry");
if(this->bvh_layout == BVH_LAYOUT_EMBREE_CONVERTED) {
BVHEmbreeConverter conv(scene, objects, this->params);
- pack_nodes(conv.getBVH2());
+ BVHNode *root = conv.getBVH2();
+ BVH2::pack_nodes(root);
+ root->deleteSubtree();
} else {
pack_nodes(NULL);
}
diff --git a/intern/cycles/bvh/bvh_embree_converter.cpp
b/intern/cycles/bvh/bvh_embree_converter.cpp
index b93b6039c54..fc20f4062a8 100644
--- a/intern/cycles/bvh/bvh_embree_converter.cpp
+++ b/intern/cycles/bvh/bvh_embree_converter.cpp
@@ -83,7 +83,7 @@ BVHNode *bvh_shrink(BVHNode *root) {
if(root->num_children() == 3) {
node->children[1] = bvh_shrink(node->children[2]);
} else {
- node->children[1] = new InnerNode(merge(node->children[2]->bounds,
node->children[3]->bounds), bvh_shrink(node->children[2]),
bvh_shrink(node->children[3]));
+ node->children[1] = new InnerNode(merge(node->children[2]->bounds,
node->children[3]->bounds), bvh_shrink(node->children[2]),
bvh_shrink(node->children[3]));
}
node->num_children_ = 2;
return node;
@@ -114,7 +114,6 @@ std::deque<BVHNode*>
BVHEmbreeConverter::handleLeaf<embree::Triangle4i>(const em
/* TODO Compute local boundbox
* BoundBox bb = RTCBoundBoxToCCL(prims[i].linearBounds(s,
embree::BBox1f(0, 1)).bounds());
*/
-
ids.push_back(RangeInput(prim_offset + prim_id, obj->visibility,
bb));
}
}
@@ -298,7 +297,7 @@ BVHNode* BVHEmbreeConverter::getBVH4() {
embree::BVH4::NodeRef root = bvh->root;
BVHNode *rootNode = nullptr;
if(bvh->primTy == &embree::Triangle4v::type) {
- rootNode = this->nodeEmbreeToCcl<embree::Triangle4v>(root,
RTCBoundBoxToCCL(bvh->bounds.bounds()));
+ rootNode = nodeEmbreeToCcl<embree::Triangle4v>(root,
RTCBoundBoxToCCL(bvh->bounds.bounds()));
} else if(bvh->primTy == &embree::InstancePrimitive::type) {
rootNode = nodeEmbreeToCcl<embree::InstancePrimitive>(root,
RTCBoundBoxToCCL(bvh->bounds.bounds()));
} else if(bvh->primTy == &embree::Triangle4i::type) {
@@ -325,6 +324,23 @@ BVHNode* BVHEmbreeConverter::getBVH4() {
return new InnerNode(bb, nodes.data(), nodes.size());
}
+BoundBox bvh_tighten(BVHNode *root) {
+ if(root->is_leaf())
+ return root->bounds;
+
+ assert(root->num_children() == 2);
+ BoundBox bb = BoundBox::empty;
+ for(int i = 0; i < root->num_children(); i++) {
+ bb.grow(bvh_tighten(root->get_child(i)));
+ }
+
+ if(std::abs(root->bounds.area() - bb.area()) > .05f) {
+ std::cout << "Area " << root->bounds.area() << "\t" << bb.area() <<
std::endl;
+ }
+ root->bounds.intersect(bb);
+ return root->bounds;
+}
+
BVHNode* BVHEmbreeConverter::getBVH2() {
BVHNode *root = this->getBVH4();
std::cout << root->getSubtreeSize(BVH_STAT_TIMELIMIT_NODE) << " times
nodes" << std::endl;
@@ -332,6 +348,8 @@ BVHNode* BVHEmbreeConverter::getBVH2() {
root = bvh_shrink(root);
std::cout << root->getSubtreeSize(BVH_STAT_TIMELIMIT_NODE) << " times
nodes" << std::endl;
std::cout << "BVH2 SAH is " << root->computeSubtreeSAHCost(this->params)
<< std::endl;
+ bvh_tighten(root);
+ std::cout << "BVH² SAH is " << root->computeSubtreeSAHCost(this->params)
<< std::endl;
return root;
}
diff --git a/intern/cycles/bvh/bvh_node.cpp b/intern/cycles/bvh/bvh_node.cpp
index 413f5befcca..baea4d4365e 100644
--- a/intern/cycles/bvh/bvh_node.cpp
+++ b/intern/cycles/bvh/bvh_node.cpp
@@ -92,8 +92,8 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
}
return cnt;
case BVH_STAT_TIMELIMIT_NODE:
- if(this->time_from > 0 || this->time_to < 1)
- cnt = 1;
+ if(this->has_time_limits())
+ cnt = 1;
break;
default:
assert(0); /* unknown mode */
diff --git a/intern/cycles/bvh/bvh_node.h b/intern/cycles/bvh/bvh_node.h
index ce3ea24fc71..e82bf6712e0 100644
--- a/intern/cycles/bvh/bvh_node.h
+++ b/intern/cycles/bvh/bvh_node.h
@@ -89,6 +89,10 @@ class BVHNode {
return false;
}
+ inline bool has_time_limits() const {
+ return this->time_from > 0 || this->time_to < 1;
+ }
+
// Subtree functions
int getSubtreeSize(BVH_STAT stat = BVH_STAT_NODE_COUNT) const;
float computeSubtreeSAHCost(const BVHParams &p, float probability = 1.0f)
const;
diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h
b/intern/cycles/kernel/bvh/bvh_nodes.h
index 0fc6ecad5bc..8a299012aa7 100644
--- a/intern/cycles/kernel/bvh/bvh_nodes.h
+++ b/intern/cycles/kernel/bvh/bvh_nodes.h
@@ -29,14 +29,14 @@ ccl_device_forceinline Transform
bvh_unaligned_node_fetch_space(KernelGlobals *k
}
#if !defined(__KERNEL_SSE2__)
-ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
- const float3 P,
- const float3 idir,
- const float t,
- const int node_addr,
- const uint visibility,
- const float rayTime,
- float dist[2])
+ccl_device_forceinline int bvh_4D_aligned_node_intersect(KernelGlobals *kg,
+ const float3 P,
+ const float3 idir,
+ const float t,
+ const int node_addr,
+ const uint visibility,
+ const float rayTime,
+ float dist[2])
{
/* fetch node data */
@@ -44,7 +44,6 @@ ccl_device_forceinline int
bvh_aligned_node_intersect(KernelGlobals *kg,
float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1);
float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2);
float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3);
- float4 timeLimits = kernel_tex_fetch(__bvh_nodes, node_addr + 4);
/* intersect ray against child nodes */
float c0lox = (node0.x - P.x) * idir.x;
@@ -68,6 +67,7 @@ ccl_device_forceinline int
bvh_aligned_node_intersect(KernelGlobals *kg,
dist[0] = c0min;
dist[1] = c1min;
+ float4 timeLimits = kernel_tex_fetch(__bvh_nodes, node_addr + 4);
if(timeLimits.x > rayTime || timeLimits.z < rayTime)
c0max = -1.0f;
@@ -83,6 +83,56 @@ ccl_device_forceinline int
bvh_aligned_node_intersect(KernelGlobals *kg,
# endif
}
+ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
+ const float3 P,
+ const float3 idir,
+ const float t,
+ const int node_addr,
+ const uint visibility,
+ const float rayTime,
+ float dist[2])
+{
+ /* fetch node data */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+ float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1);
+ float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2);
+ float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3);
+
+ if (__float_as_int(cnodes.x) & PATH_RAY_NODE_4D) {
+ return bvh_4D_aligned_node_intersect(kg, P, idir, t, node_addr,
visibility, rayTime, dist);
+ }
+
+ /* intersect ray against child nodes */
+ float c0lox = (node0.x - P.x) * idir.x;
+ float c0hix = (node0.z - P.x) * idir.x;
+ float c0loy = (node1.x - P.y) * idir.y;
+ float c0hiy = (node1.z - P.y) * idir.y;
+ float c0loz = (node2.x - P.z) * idir.z;
+ float c0hiz = (node2.z - P.z) * idir.z;
+ float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz,
c0hiz));
+ float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz,
c0hiz));
+
+ float c1lox = (node0.y - P.x) * idir.x;
+ float c1hix = (node0.w - P.x) * idir.x;
+ float c1loy = (node1.y - P.y) * idir.y;
+ float c1hiy = (node1.w - P.y) * idir.y;
+ float c1loz = (node2.y - P.z) * idir.z;
+ float c1hiz = (node2.w - P.z) * idir.z;
+ float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz,
c1hiz));
+ float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz,
c1hiz));
+
+ dist[0] = c0min;
+ dist[1] = c1min;
+
+# ifdef __VISIBILITY_FLAG__
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 :
0) |
+ (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 :
0);
+# else
+ return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0);
+# endif
+}
+
ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals
*kg,
@@ Diff output truncated at 10240 characters. @@
_______________________________________________
Bf-blender-cvs mailing list
[email protected]
https://lists.blender.org/mailman/listinfo/bf-blender-cvs