Commit: e7c2578576380288befcd77e88edd8ae508ed01a
Author: Sv. Lockal
Date:   Thu Apr 3 22:08:53 2014 +0400
https://developer.blender.org/rBe7c2578576380288befcd77e88edd8ae508ed01a

Cycles: avoid 1.0f/(1.0f/x) divisions, which msvc (only) can't optimize.

This makes bmw scene in msvc 12 builds 6% faster.
It also gives a minor speedup for SSE hair in all compilers.

===================================================================

M       intern/cycles/kernel/geom/geom_bvh_subsurface.h
M       intern/cycles/kernel/geom/geom_bvh_traversal.h
M       intern/cycles/kernel/geom/geom_curve.h
M       intern/cycles/kernel/geom/geom_motion_triangle.h
M       intern/cycles/kernel/geom/geom_object.h
M       intern/cycles/kernel/geom/geom_triangle.h

===================================================================

diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h 
b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
index ae4641d..6b71ffc 100644
--- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h
+++ b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
@@ -50,7 +50,8 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const 
Ray *ray, Intersectio
        /* ray parameters in registers */
        const float tmax = ray->t;
        float3 P = ray->P;
-       float3 idir = bvh_inverse_direction(ray->D);
+       float3 dir = bvh_clamp_direction(ray->D);
+       float3 idir = bvh_inverse_direction(dir);
        int object = OBJECT_NONE;
        float isect_t = tmax;
 
@@ -215,11 +216,11 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, 
const Ray *ray, Intersectio
 
                                                switch(type & PRIMITIVE_ALL) {
                                                        case 
PRIMITIVE_TRIANGLE: {
-                                                               
triangle_intersect_subsurface(kg, isect_array, P, idir, object, primAddr, 
isect_t, &num_hits, lcg_state, max_hits);
+                                                               
triangle_intersect_subsurface(kg, isect_array, P, dir, object, primAddr, 
isect_t, &num_hits, lcg_state, max_hits);
                                                                break;
                                                        }
                                                        case 
PRIMITIVE_MOTION_TRIANGLE: {
-                                                               
motion_triangle_intersect_subsurface(kg, isect_array, P, idir, ray->time, 
object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
+                                                               
motion_triangle_intersect_subsurface(kg, isect_array, P, dir, ray->time, 
object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
                                                                break;
                                                        }
                                                        default: {
@@ -235,9 +236,9 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const 
Ray *ray, Intersectio
                                                object = subsurface_object;
 
 #if FEATURE(BVH_MOTION)
-                                               bvh_instance_motion_push(kg, 
object, ray, &P, &idir, &isect_t, &ob_tfm, tmax);
+                                               bvh_instance_motion_push(kg, 
object, ray, &P, &dir, &idir, &isect_t, &ob_tfm, tmax);
 #else
-                                               bvh_instance_push(kg, object, 
ray, &P, &idir, &isect_t, tmax);
+                                               bvh_instance_push(kg, object, 
ray, &P, &dir, &idir, &isect_t, tmax);
 #endif
 
 #if defined(__KERNEL_SSE2__)
@@ -271,9 +272,9 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const 
Ray *ray, Intersectio
 
                        /* instance pop */
 #if FEATURE(BVH_MOTION)
-                       bvh_instance_motion_pop(kg, object, ray, &P, &idir, 
&isect_t, &ob_tfm, tmax);
+                       bvh_instance_motion_pop(kg, object, ray, &P, &dir, 
&idir, &isect_t, &ob_tfm, tmax);
 #else
-                       bvh_instance_pop(kg, object, ray, &P, &idir, &isect_t, 
tmax);
+                       bvh_instance_pop(kg, object, ray, &P, &dir, &idir, 
&isect_t, tmax);
 #endif
 
 #if defined(__KERNEL_SSE2__)
diff --git a/intern/cycles/kernel/geom/geom_bvh_traversal.h 
b/intern/cycles/kernel/geom/geom_bvh_traversal.h
index 153efe3..566aa42 100644
--- a/intern/cycles/kernel/geom/geom_bvh_traversal.h
+++ b/intern/cycles/kernel/geom/geom_bvh_traversal.h
@@ -55,7 +55,8 @@ ccl_device bool BVH_FUNCTION_NAME
        /* ray parameters in registers */
        const float tmax = ray->t;
        float3 P = ray->P;
-       float3 idir = bvh_inverse_direction(ray->D);
+       float3 dir = bvh_clamp_direction(ray->D);
+       float3 idir = bvh_inverse_direction(dir);
        int object = OBJECT_NONE;
 
 #if FEATURE(BVH_MOTION)
@@ -253,11 +254,11 @@ ccl_device bool BVH_FUNCTION_NAME
 
                                                switch(type & PRIMITIVE_ALL) {
                                                        case 
PRIMITIVE_TRIANGLE: {
-                                                               hit = 
triangle_intersect(kg, isect, P, idir, visibility, object, primAddr);
+                                                               hit = 
triangle_intersect(kg, isect, P, dir, visibility, object, primAddr);
                                                                break;
                                                        }
                                                        case 
PRIMITIVE_MOTION_TRIANGLE: {
-                                                               hit = 
motion_triangle_intersect(kg, isect, P, idir, ray->time, visibility, object, 
primAddr);
+                                                               hit = 
motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, 
primAddr);
                                                                break;
                                                        }
 #if FEATURE(BVH_HAIR)
@@ -265,14 +266,14 @@ ccl_device bool BVH_FUNCTION_NAME
                                                        case 
PRIMITIVE_MOTION_CURVE: {
 #if FEATURE(BVH_HAIR_MINIMUM_WIDTH)
                                                                
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) 
-                                                                       hit = 
bvh_cardinal_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, 
ray->time, type, lcg_state, difl, extmax);
+                                                                       hit = 
bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, 
ray->time, type, lcg_state, difl, extmax);
                                                                else
-                                                                       hit = 
bvh_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, 
ray->time, type, lcg_state, difl, extmax);
+                                                                       hit = 
bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, 
type, lcg_state, difl, extmax);
 #else
                                                                
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) 
-                                                                       hit = 
bvh_cardinal_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, 
ray->time, type);
+                                                                       hit = 
bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, 
ray->time, type);
                                                                else
-                                                                       hit = 
bvh_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, 
ray->time, type);
+                                                                       hit = 
bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, 
type);
 #endif
 
                                                                break;
@@ -306,9 +307,9 @@ ccl_device bool BVH_FUNCTION_NAME
                                        object = 
kernel_tex_fetch(__prim_object, -primAddr-1);
 
 #if FEATURE(BVH_MOTION)
-                                       bvh_instance_motion_push(kg, object, 
ray, &P, &idir, &isect->t, &ob_tfm, tmax);
+                                       bvh_instance_motion_push(kg, object, 
ray, &P, &dir, &idir, &isect->t, &ob_tfm, tmax);
 #else
-                                       bvh_instance_push(kg, object, ray, &P, 
&idir, &isect->t, tmax);
+                                       bvh_instance_push(kg, object, ray, &P, 
&dir, &idir, &isect->t, tmax);
 #endif
 
 #if defined(__KERNEL_SSE2__)
@@ -336,9 +337,9 @@ ccl_device bool BVH_FUNCTION_NAME
 
                        /* instance pop */
 #if FEATURE(BVH_MOTION)
-                       bvh_instance_motion_pop(kg, object, ray, &P, &idir, 
&isect->t, &ob_tfm, tmax);
+                       bvh_instance_motion_pop(kg, object, ray, &P, &dir, 
&idir, &isect->t, &ob_tfm, tmax);
 #else
-                       bvh_instance_pop(kg, object, ray, &P, &idir, &isect->t, 
tmax);
+                       bvh_instance_pop(kg, object, ray, &P, &dir, &idir, 
&isect->t, tmax);
 #endif
 
 #if defined(__KERNEL_SSE2__)
diff --git a/intern/cycles/kernel/geom/geom_curve.h 
b/intern/cycles/kernel/geom/geom_curve.h
index b508f50..e57bcd8 100644
--- a/intern/cycles/kernel/geom/geom_curve.h
+++ b/intern/cycles/kernel/geom/geom_curve.h
@@ -205,12 +205,12 @@ ccl_device_inline __m128 transform_point_T3(const __m128 
t[3], const __m128 &a)
 #endif
 
 #ifdef __KERNEL_SSE2__
-/* Pass P and idir by reference to aligned vector */
+/* Pass P and dir by reference to aligned vector */
 ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, 
Intersection *isect,
-       const float3 &P, const float3 &idir, uint visibility, int object, int 
curveAddr, float time, int type, uint *lcg_state, float difl, float extmax)
+       const float3 &P, const float3 &dir, uint visibility, int object, int 
curveAddr, float time, int type, uint *lcg_state, float difl, float extmax)
 #else
 ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, 
Intersection *isect,
-       float3 P, float3 idir, uint visibility, int object, int curveAddr, 
float time,int type, uint *lcg_state, float difl, float extmax)
+       float3 P, float3 dir, uint visibility, int object, int curveAddr, float 
time,int type, uint *lcg_state, float difl, float extmax)
 #endif
 {
        int segment = PRIMITIVE_UNPACK_SEGMENT(type);
@@ -222,7 +222,7 @@ ccl_device_inline bool 
bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
        int prim = kernel_tex_fetch(__prim_index, curveAddr);
 
 #ifdef __KERNEL_SSE2__
-       __m128 vdir = _mm_div_ps(_mm_set1_ps(1.0f), load_m128(idir));
+       __m128 vdir = load_m128(dir);
        __m128 vcurve_coef[4];
        const float3 *curve_coef = (float3 *)vcurve_coef;
        
@@ -285,8 +285,6 @@ ccl_device_inline bool 
bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
        float3 curve_coef[4];
 
        /* curve Intersection check */
-       float3 dir = 1.0f/idir;
-
        /* obtain curve parameters */
        {
                /* ray transform created - this should be created at beginning 
of intersection loop */
@@ -597,7 +595,7 @@ ccl_device_inline bool 
bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersect
 }
 
 ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection 
*isect,
-       float3 P, float3 idir, uint visibility, int object, int curveAddr, 
float time, int type, uint *lcg_state, float difl, float extmax)
+       float3 P, float3 direction, uint visibility, int object, int curveAddr, 
float time, int type, uint *lcg_state, float difl, float extmax)
 {
        /* define few macros to minimize code duplication for SSE */
 #ifndef __KERNEL_SSE2__
@@ -647,9 +645,9 @@ ccl_device_inline bool bvh_curve_intersect(KernelGlobals 
*kg, Intersection *isec
        }
        /* --- */
 
-       float3 dir = 1.0f / idir;
        float3 p21_diff = p2 - p1;
        float3 sphere_dif1 = (dif + dif_second) * 0.5f;
+       float3 dir = direction;
        float sphere_b_tmp = dot3(dir, sphere_dif1);
        float3 sphere_dif2 = sphere_dif1 - sphere_b_tmp * dir;
 #else
@@ -680,9 +678,9 @@ ccl_device_inline bool bvh_curve_intersect(KernelGlobals 
*kg, Intersection *isec
        float or1 = _mm_cvtss_f32(or12), or2 = 
_mm_cvtss_f32(broadcast<2>(or12));
        float r1 = _mm_cvtss_f32(r12), r2 = _mm_cvtss_f32(broadcast<2>(r12));
 
-       const __m128 dir = _mm_div_ps(_mm_set1_ps(1.0f), load_m128(idir));
        const __m128 p21_diff = _mm_sub_ps(P_curve[1], P_curve[0]);
        const __m128 sphere_dif1 = _mm_mul_ps(_mm_add_ps(dif, dif_second), 
_mm_set1_ps(0.5f));
+       const __m128 dir = load_m128(direction);
        const __m128 sphere_b_tmp = dot3_splat(dir, sphere_dif1);
        const __m128 sphere_dif2 = fnma(sphere_b_tmp, dir, sphere_dif1);
 #endif
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h 
b/intern/cycles/kernel/geom/geom_motion_triangle.h
index c5eb097..73338bb 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle.h
@@ -313,7 +313,7 @@ ccl_device_noinline void 
motion_triangle_shader_setup(KernelGlobals *kg, ShaderD
  * time and do a ray intersection with the resulting triangle */
 
 ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, 
Intersection *isect,
-       float3 P, float3 idir, float time, uint visibility, int object, int 
triAddr)
+       float3 P, float3 dir, float time, uint visibility, int object, int 
triAddr)
 {
        /* primitive index for vertex location lookup */
        int prim = kernel_tex_fetch(__prim_index, triAddr);
@@ -324,

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
http://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to