Commit: 8a72be7697f8fbfc8cb6cc9f3df049104e41d4a6
Author: Brecht Van Lommel
Date:   Wed Nov 1 21:02:28 2017 +0100
Branches: master
https://developer.blender.org/rB8a72be7697f8fbfc8cb6cc9f3df049104e41d4a6

Cycles: reduce closure memory usage for emission/shadow shader data.

With a Titan Xp, reduces path trace local memory from 1092MB to 840MB.
Benchmark performance was within 1% with both RX 480 and Titan Xp.

Original patch was implemented by Sergey.

Differential Revision: https://developer.blender.org/D2249

===================================================================

M       intern/cycles/kernel/closure/alloc.h
M       intern/cycles/kernel/kernel_bake.h
M       intern/cycles/kernel/kernel_emission.h
M       intern/cycles/kernel/kernel_path.h
M       intern/cycles/kernel/kernel_path_branched.h
M       intern/cycles/kernel/kernel_shader.h
M       intern/cycles/kernel/kernel_shadow.h
M       intern/cycles/kernel/kernel_subsurface.h
M       intern/cycles/kernel/kernel_types.h
M       intern/cycles/kernel/kernel_volume.h
M       intern/cycles/kernel/split/kernel_buffer_update.h
M       intern/cycles/kernel/split/kernel_direct_lighting.h
M       intern/cycles/kernel/split/kernel_do_volume.h
M       
intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
M       intern/cycles/kernel/split/kernel_path_init.h
M       intern/cycles/kernel/split/kernel_shader_eval.h
M       intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
M       intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
M       intern/cycles/kernel/split/kernel_split_data_types.h
M       intern/cycles/kernel/split/kernel_subsurface_scatter.h

===================================================================

diff --git a/intern/cycles/kernel/closure/alloc.h 
b/intern/cycles/kernel/closure/alloc.h
index e799855a65e..48a60405b5a 100644
--- a/intern/cycles/kernel/closure/alloc.h
+++ b/intern/cycles/kernel/closure/alloc.h
@@ -20,17 +20,16 @@ ccl_device ShaderClosure *closure_alloc(ShaderData *sd, int 
size, ClosureType ty
 {
        kernel_assert(size <= sizeof(ShaderClosure));
 
-       int num_closure = sd->num_closure;
-       int num_closure_extra = sd->num_closure_extra;
-       if(num_closure + num_closure_extra >= MAX_CLOSURE)
+       if(sd->num_closure_left == 0)
                return NULL;
 
-       ShaderClosure *sc = &sd->closure[num_closure];
+       ShaderClosure *sc = &sd->closure[sd->num_closure];
 
        sc->type = type;
        sc->weight = weight;
 
        sd->num_closure++;
+       sd->num_closure_left--;
 
        return sc;
 }
@@ -44,18 +43,16 @@ ccl_device ccl_addr_space void 
*closure_alloc_extra(ShaderData *sd, int size)
         * This lets us keep the same fast array iteration over closures, as we
         * found linked list iteration and iteration with skipping to be 
slower. */
        int num_extra = ((size + sizeof(ShaderClosure) - 1) / 
sizeof(ShaderClosure));
-       int num_closure = sd->num_closure;
-       int num_closure_extra = sd->num_closure_extra + num_extra;
 
-       if(num_closure + num_closure_extra > MAX_CLOSURE) {
+       if(num_extra > sd->num_closure_left) {
                /* Remove previous closure. */
                sd->num_closure--;
-               sd->num_closure_extra++;
+               sd->num_closure_left++;
                return NULL;
        }
 
-       sd->num_closure_extra = num_closure_extra;
-       return (ccl_addr_space void*)(sd->closure + MAX_CLOSURE - 
num_closure_extra);
+       sd->num_closure_left -= num_extra;
+       return (ccl_addr_space void*)(sd->closure + sd->num_closure + 
sd->num_closure_left);
 }
 
 ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 
weight)
diff --git a/intern/cycles/kernel/kernel_bake.h 
b/intern/cycles/kernel/kernel_bake.h
index 84d8d84d486..9ce10358b81 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -51,7 +51,7 @@ ccl_device_inline void compute_light_pass(KernelGlobals *kg,
        path_state_init(kg, &emission_sd, &state, rng_hash, sample, NULL);
 
        /* evaluate surface shader */
-       shader_eval_surface(kg, sd, &state, state.flag);
+       shader_eval_surface(kg, sd, &state, state.flag, MAX_CLOSURE);
 
        /* TODO, disable more closures we don't need besides transparent */
        shader_bsdf_disable_transparency(kg, sd);
@@ -239,12 +239,12 @@ ccl_device float3 
kernel_bake_evaluate_direct_indirect(KernelGlobals *kg,
                }
                else {
                        /* surface color of the pass only */
-                       shader_eval_surface(kg, sd, state, 0);
+                       shader_eval_surface(kg, sd, state, 0, MAX_CLOSURE);
                        return kernel_bake_shader_bsdf(kg, sd, type);
                }
        }
        else {
-               shader_eval_surface(kg, sd, state, 0);
+               shader_eval_surface(kg, sd, state, 0, MAX_CLOSURE);
                color = kernel_bake_shader_bsdf(kg, sd, type);
        }
 
@@ -337,7 +337,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, 
ccl_global uint4 *input,
                {
                        float3 N = sd.N;
                        if((sd.flag & SD_HAS_BUMP)) {
-                               shader_eval_surface(kg, &sd, &state, 0);
+                               shader_eval_surface(kg, &sd, &state, 0, 
MAX_CLOSURE);
                                N = shader_bsdf_average_normal(kg, &sd);
                        }
 
@@ -352,7 +352,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, 
ccl_global uint4 *input,
                }
                case SHADER_EVAL_EMISSION:
                {
-                       shader_eval_surface(kg, &sd, &state, 0);
+                       shader_eval_surface(kg, &sd, &state, 0, 0);
                        out = shader_emissive_eval(kg, &sd);
                        break;
                }
diff --git a/intern/cycles/kernel/kernel_emission.h 
b/intern/cycles/kernel/kernel_emission.h
index 45b8c6311e1..94b0a37ce62 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -70,14 +70,11 @@ ccl_device_noinline float3 
direct_emissive_eval(KernelGlobals *kg,
                /* no path flag, we're evaluating this for all closures. that's 
weak but
                 * we'd have to do multiple evaluations otherwise */
                path_state_modify_bounce(state, true);
-               shader_eval_surface(kg, emission_sd, state, 0);
+               shader_eval_surface(kg, emission_sd, state, 0, 0);
                path_state_modify_bounce(state, false);
 
                /* evaluate emissive closure */
-               if(emission_sd->flag & SD_EMISSION)
-                       eval = shader_emissive_eval(kg, emission_sd);
-               else
-                       eval = make_float3(0.0f, 0.0f, 0.0f);
+               eval = shader_emissive_eval(kg, emission_sd);
        }
        
        eval *= ls->eval_fac;
diff --git a/intern/cycles/kernel/kernel_path.h 
b/intern/cycles/kernel/kernel_path.h
index 1099064038b..8519e0682e1 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -443,7 +443,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
                                      sd,
                                      &isect,
                                      ray);
-               shader_eval_surface(kg, sd, state, state->flag);
+               shader_eval_surface(kg, sd, state, state->flag, MAX_CLOSURE);
                shader_prepare_closures(sd, state);
 
                /* Apply shadow catcher, holdout, emission. */
@@ -561,7 +561,7 @@ ccl_device_forceinline void kernel_path_integrate(
                bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, 
L);
 
                /* Find intersection with lamps and compute emission for MIS. */
-               kernel_path_lamp_emission(kg, state, ray, throughput, &isect, 
emission_sd, L);
+               kernel_path_lamp_emission(kg, state, ray, throughput, &isect, 
&sd, L);
 
 #ifdef __VOLUME__
                /* Volume integration. */
@@ -585,7 +585,7 @@ ccl_device_forceinline void kernel_path_integrate(
 
                /* Shade background. */
                if(!hit) {
-                       kernel_path_background(kg, state, ray, throughput, 
emission_sd, L);
+                       kernel_path_background(kg, state, ray, throughput, &sd, 
L);
                        break;
                }
                else if(path_state_ao_bounce(kg, state)) {
@@ -594,7 +594,7 @@ ccl_device_forceinline void kernel_path_integrate(
 
                /* Setup and evaluate shader. */
                shader_setup_from_ray(kg, &sd, &isect, ray);
-               shader_eval_surface(kg, &sd, state, state->flag);
+               shader_eval_surface(kg, &sd, state, state->flag, MAX_CLOSURE);
                shader_prepare_closures(&sd, state);
 
                /* Apply shadow catcher, holdout, emission. */
@@ -706,9 +706,11 @@ ccl_device void kernel_path_trace(KernelGlobals *kg,
        PathRadiance L;
        path_radiance_init(&L, kernel_data.film.use_light_pass);
 
-       ShaderData emission_sd;
+       ShaderDataTinyStorage emission_sd_storage;
+       ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+
        PathState state;
-       path_state_init(kg, &emission_sd, &state, rng_hash, sample, &ray);
+       path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
 
        /* Integrate. */
        kernel_path_integrate(kg,
@@ -717,7 +719,7 @@ ccl_device void kernel_path_trace(KernelGlobals *kg,
                              &ray,
                              &L,
                              buffer,
-                             &emission_sd);
+                             emission_sd);
 
        kernel_write_result(kg, buffer, sample, &L);
 }
diff --git a/intern/cycles/kernel/kernel_path_branched.h 
b/intern/cycles/kernel/kernel_path_branched.h
index 3877e4f0058..f93366eade1 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -436,10 +436,12 @@ ccl_device void 
kernel_branched_path_integrate(KernelGlobals *kg,
        /* shader data memory used for both volumes and surfaces, saves stack 
space */
        ShaderData sd;
        /* shader data used by emission, shadows, volume stacks, indirect path 
*/
-       ShaderData emission_sd, indirect_sd;
+       ShaderDataTinyStorage emission_sd_storage;
+       ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+       ShaderData indirect_sd;
 
        PathState state;
-       path_state_init(kg, &emission_sd, &state, rng_hash, sample, &ray);
+       path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
 
        /* Main Loop
         * Here we only handle transparency intersections from the camera ray.
@@ -460,7 +462,7 @@ ccl_device void 
kernel_branched_path_integrate(KernelGlobals *kg,
                                            &isect,
                                            hit,
                                            &indirect_sd,
-                                           &emission_sd,
+                                           emission_sd,
                                            L);
 #endif  /* __VOLUME__ */
 
@@ -472,7 +474,7 @@ ccl_device void 
kernel_branched_path_integrate(KernelGlobals *kg,
 
                /* Setup and evaluate shader. */
                shader_setup_from_ray(kg, &sd, &isect, &ray);
-               shader_eval_surface(kg, &sd, &state, state.flag);
+               shader_eval_surface(kg, &sd, &state, state.flag, MAX_CLOSURE);
                shader_merge_closures(&sd);
 
                /* Apply shadow catcher, holdout, emission. */
@@ -481,7 +483,7 @@ ccl_device void 
kernel_branched_path_integrate(KernelGlobals *kg,
                                             &state,
                                             &ray,
                                             throughput,
-                                            &emission_sd,
+                                            emission_sd,
                                             L,
                                             buffer))
                {
@@ -513,14 +515,14 @@ ccl_device void 
kernel_branched_path_integrate(KernelGlobals *kg,
 #ifdef __AO__
                /* ambient occlusion */
                if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & 
SD_AO)) {
-                       kernel_branched_path_ao(kg, &sd, &emission_sd, L, 
&state, throughput);
+                       kernel_branched_path_ao(kg, &sd, emission_sd, L, 
&state, throughput);
                }
 #endif  /* __AO__ */
 
 #ifdef __SUBSURFACE__
                /* bssrdf scatter to a different location on the same object */
                if(sd.flag & SD_BSSRDF) {
-                       kernel_branched_path_subsurface_scatter(kg, &sd, 
&indirect_sd, &emission_sd,
+                       kernel_branched_path_subsurface_scatter(kg, &sd, 
&indirect_sd, emission_sd,
                                                                L, &state, 
&ray, throughput);
                }
 #endif  /* __SUBSURFACE__ */
@@ -534,13 +536,13 @@ ccl_device void 
kernel_branched_path_integrate(KernelGlobals *kg,
                                int all = 
(kernel_data.integrator.sample_all_lights_direct) ||
                                          (state.flag & 
PATH_RAY_SHADOW_CATCHER);
                                kernel_branched_path_surface_connect_light(kg,
-                                       &sd, &emission_sd, &hit_state, 
throughput, 1.0f, L, all);
+                                       &sd, emission_sd, &hit_state, 
throughput, 1.0f, L, all);
                        }
 #endif  /* __EMISSION__ */
 
                        /* indirect light */
                        kernel_branched_path_surface_indirect_light(kg,
-                               &sd, &indirect_sd, &emission_sd, throughput, 
1.0f, &hit_state, L);
+                               &sd, &indirect_sd, emissio

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to