Commit: 157067acbde7e65bb5bef2023effca8a5fc657f3
Author: Sergey Sharybin
Date:   Mon Nov 10 09:54:55 2014 +0100
Branches: master
https://developer.blender.org/rB157067acbde7e65bb5bef2023effca8a5fc657f3

Cycles: Speedup for homogenous volumes in decoupled volume sampling

The idea is to avoid memory allocation when only one segment step is to be 
allocated.
This gives some speedup which is difficult to measure on this trashcan from 
hell, but
it's about from 7% to 10% in the extreme case with single volume filling the 
whole of
the viewport. This seems to depends on the phase of the bug-o-meter in the 
studio.

On the linux boxes it's not that spectacular speedup, it's about 2% on my 
laptop and
about 3% on the studio desktop. This is likely because of the awesomeness of 
jemalloc.

===================================================================

M       intern/cycles/kernel/kernel_volume.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_volume.h 
b/intern/cycles/kernel/kernel_volume.h
index cba95ab..99caff0 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -595,6 +595,7 @@ typedef struct VolumeStep {
 } VolumeStep;
 
 typedef struct VolumeSegment {
+       VolumeStep stack_step;      /* stack storage for homogenous step, to 
avoid malloc */
        VolumeStep *steps;                      /* recorded steps */
        int numsteps;                           /* number of steps */
        int closure_flag;                       /* accumulated closure flags 
from all steps */
@@ -627,11 +628,13 @@ ccl_device void 
kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta
 
                /* compute exact steps in advance for malloc */
                max_steps = max((int)ceilf(ray->t/step_size), 1);
+               segment->steps = 
(VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
        }
        else {
                max_steps = 1;
                step_size = ray->t;
                random_jitter_offset = 0.0f;
+               segment->steps = &segment->stack_step;
        }
        
        /* init accumulation variables */
@@ -640,10 +643,8 @@ ccl_device void 
kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta
        float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
        float t = 0.0f;
 
-       segment->closure_flag = 0;
        segment->numsteps = 0;
-
-       segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
+       segment->closure_flag = 0;
 
        VolumeStep *step = segment->steps;
 
@@ -729,7 +730,8 @@ ccl_device void 
kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta
 
 ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment 
*segment)
 {
-       free(segment->steps);
+       if(segment->steps != &segment->stack_step)
+               free(segment->steps);
 }
 
 /* scattering for homogeneous and heterogeneous volumes, using decoupled ray

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
http://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to