Commit: f8549205ad7d21b391d9da2986c1fbfa0e2f699e
Author: Brecht Van Lommel
Date:   Sun May 22 22:35:47 2016 +0200
Branches: compositor-2016
https://developer.blender.org/rBf8549205ad7d21b391d9da2986c1fbfa0e2f699e

Cycles CUDA: reduce stack memory by reusing ShaderData.

57% less for path and 48% less for branched path.

===================================================================

M       intern/cycles/kernel/kernel_bake.h
M       intern/cycles/kernel/kernel_emission.h
M       intern/cycles/kernel/kernel_path.h
M       intern/cycles/kernel/kernel_path_branched.h
M       intern/cycles/kernel/kernel_path_state.h
M       intern/cycles/kernel/kernel_path_surface.h
M       intern/cycles/kernel/kernel_path_volume.h
M       intern/cycles/kernel/kernel_shadow.h
M       intern/cycles/kernel/kernel_volume.h
M       intern/cycles/kernel/split/kernel_background_buffer_update.h
M       intern/cycles/kernel/split/kernel_data_init.h
M       intern/cycles/kernel/split/kernel_direct_lighting.h
M       intern/cycles/kernel/split/kernel_lamp_emission.h
M       intern/cycles/kernel/split/kernel_shadow_blocked.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_bake.h 
b/intern/cycles/kernel/kernel_bake.h
index 392cff9..77982ee 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -30,6 +30,9 @@ ccl_device void compute_light_pass(KernelGlobals *kg, 
ShaderData *sd, PathRadian
        Ray ray;
        float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
 
+       /* emission shader data memory used by various functions */
+       ShaderData emission_sd;
+
        ray.P = sd->P + sd->Ng;
        ray.D = -sd->Ng;
        ray.t = FLT_MAX;
@@ -41,7 +44,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, 
ShaderData *sd, PathRadian
        path_radiance_init(&L_sample, kernel_data.film.use_light_pass);
 
        /* init path state */
-       path_state_init(kg, &state, &rng, sample, NULL);
+       path_state_init(kg, &emission_sd, &state, &rng, sample, NULL);
 
        /* evaluate surface shader */
        float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF);
@@ -56,7 +59,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, 
ShaderData *sd, PathRadian
 
                /* sample ambient occlusion */
                if(pass_filter & BAKE_FILTER_AO) {
-                       kernel_path_ao(kg, sd, &L_sample, &state, &rng, 
throughput);
+                       kernel_path_ao(kg, sd, &emission_sd, &L_sample, &state, 
&rng, throughput);
                }
 
                /* sample emission */
@@ -75,6 +78,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, 
ShaderData *sd, PathRadian
                        kernel_path_subsurface_init_indirect(&ss_indirect);
                        if(kernel_path_subsurface_scatter(kg,
                                                          sd,
+                                                         &emission_sd,
                                                          &L_sample,
                                                          &state,
                                                          &rng,
@@ -90,6 +94,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, 
ShaderData *sd, PathRadian
                                                                              
&L_sample,
                                                                              
&throughput);
                                        kernel_path_indirect(kg,
+                                                            &emission_sd,
                                                             &rng,
                                                             &ray,
                                                             throughput,
@@ -105,14 +110,14 @@ ccl_device void compute_light_pass(KernelGlobals *kg, 
ShaderData *sd, PathRadian
 
                /* sample light and BSDF */
                if(!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | 
BAKE_FILTER_INDIRECT))) {
-                       kernel_path_surface_connect_light(kg, &rng, sd, 
throughput, &state, &L_sample);
+                       kernel_path_surface_connect_light(kg, &rng, sd, 
&emission_sd, throughput, &state, &L_sample);
 
                        if(kernel_path_surface_bounce(kg, &rng, sd, 
&throughput, &state, &L_sample, &ray)) {
 #ifdef __LAMP_MIS__
                                state.ray_t = 0.0f;
 #endif
                                /* compute indirect light */
-                               kernel_path_indirect(kg, &rng, &ray, 
throughput, 1, &state, &L_sample);
+                               kernel_path_indirect(kg, &emission_sd, &rng, 
&ray, throughput, 1, &state, &L_sample);
 
                                /* sum and reset indirect light pass variables 
for the next samples */
                                path_radiance_sum_indirect(&L_sample);
@@ -126,7 +131,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, 
ShaderData *sd, PathRadian
 
                /* sample ambient occlusion */
                if(pass_filter & BAKE_FILTER_AO) {
-                       kernel_branched_path_ao(kg, sd, &L_sample, &state, 
&rng, throughput);
+                       kernel_branched_path_ao(kg, sd, &emission_sd, 
&L_sample, &state, &rng, throughput);
                }
 
                /* sample emission */
@@ -139,7 +144,7 @@ ccl_device void compute_light_pass(KernelGlobals *kg, 
ShaderData *sd, PathRadian
                /* sample subsurface scattering */
                if((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & 
SD_BSSRDF)) {
                        /* when mixing BSSRDF and BSDF closures we should skip 
BSDF lighting if scattering was successful */
-                       kernel_branched_path_subsurface_scatter(kg, sd, 
&L_sample, &state, &rng, &ray, throughput);
+                       kernel_branched_path_subsurface_scatter(kg, sd, 
&emission_sd, &L_sample, &state, &rng, &ray, throughput);
                }
 #endif
 
@@ -150,13 +155,13 @@ ccl_device void compute_light_pass(KernelGlobals *kg, 
ShaderData *sd, PathRadian
                        if(kernel_data.integrator.use_direct_light) {
                                int all = 
kernel_data.integrator.sample_all_lights_direct;
                                kernel_branched_path_surface_connect_light(kg, 
&rng,
-                                       sd, &state, throughput, 1.0f, 
&L_sample, all);
+                                       sd, &emission_sd, &state, throughput, 
1.0f, &L_sample, all);
                        }
 #endif
 
                        /* indirect light */
                        kernel_branched_path_surface_indirect_light(kg, &rng,
-                               sd, throughput, 1.0f, &state, &L_sample);
+                               sd, &emission_sd, throughput, 1.0f, &state, 
&L_sample);
                }
        }
 #endif
diff --git a/intern/cycles/kernel/kernel_emission.h 
b/intern/cycles/kernel/kernel_emission.h
index 5cf52f9..4de8e0f 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -18,6 +18,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Direction Emission */
 ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
+                                                ShaderData *emission_sd,
                                                 LightSample *ls,
                                                 ccl_addr_space PathState 
*state,
                                                 float3 I,
@@ -26,12 +27,6 @@ ccl_device_noinline float3 
direct_emissive_eval(KernelGlobals *kg,
                                                 float time)
 {
        /* setup shading at emitter */
-#ifdef __SPLIT_KERNEL__
-       ShaderData *sd = kg->sd_input;
-#else
-       ShaderData sd_object;
-       ShaderData *sd = &sd_object;
-#endif
        float3 eval;
 
 #ifdef __BACKGROUND_MIS__
@@ -46,28 +41,28 @@ ccl_device_noinline float3 
direct_emissive_eval(KernelGlobals *kg,
                ray.dP = differential3_zero();
                ray.dD = dI;
 
-               shader_setup_from_background(kg, sd, &ray);
+               shader_setup_from_background(kg, emission_sd, &ray);
 
                path_state_modify_bounce(state, true);
-               eval = shader_eval_background(kg, sd, state, 0, 
SHADER_CONTEXT_EMISSION);
+               eval = shader_eval_background(kg, emission_sd, state, 0, 
SHADER_CONTEXT_EMISSION);
                path_state_modify_bounce(state, false);
        }
        else
 #endif
        {
-               shader_setup_from_sample(kg, sd, ls->P, ls->Ng, I, ls->shader, 
ls->object, ls->prim, ls->u, ls->v, t, time);
+               shader_setup_from_sample(kg, emission_sd, ls->P, ls->Ng, I, 
ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time);
 
-               ls->Ng = ccl_fetch(sd, Ng);
+               ls->Ng = ccl_fetch(emission_sd, Ng);
 
                /* no path flag, we're evaluating this for all closures. that's 
weak but
                 * we'd have to do multiple evaluations otherwise */
                path_state_modify_bounce(state, true);
-               shader_eval_surface(kg, sd, state, 0.0f, 0, 
SHADER_CONTEXT_EMISSION);
+               shader_eval_surface(kg, emission_sd, state, 0.0f, 0, 
SHADER_CONTEXT_EMISSION);
                path_state_modify_bounce(state, false);
 
                /* evaluate emissive closure */
-               if(ccl_fetch(sd, flag) & SD_EMISSION)
-                       eval = shader_emissive_eval(kg, sd);
+               if(ccl_fetch(emission_sd, flag) & SD_EMISSION)
+                       eval = shader_emissive_eval(kg, emission_sd);
                else
                        eval = make_float3(0.0f, 0.0f, 0.0f);
        }
@@ -79,6 +74,7 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals 
*kg,
 
 ccl_device_noinline bool direct_emission(KernelGlobals *kg,
                                          ShaderData *sd,
+                                         ShaderData *emission_sd,
                                          LightSample *ls,
                                          ccl_addr_space PathState *state,
                                          Ray *ray,
@@ -94,6 +90,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg,
        /* evaluate closure */
 
        float3 light_eval = direct_emissive_eval(kg,
+                                                emission_sd,
                                                 ls,
                                                 state,
                                                 -ls->D,
@@ -198,6 +195,7 @@ ccl_device_noinline float3 
indirect_primitive_emission(KernelGlobals *kg, Shader
 /* Indirect Lamp Emission */
 
 ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
+                                                ShaderData *emission_sd,
                                                 ccl_addr_space PathState 
*state,
                                                 Ray *ray,
                                                 float3 *emission)
@@ -225,6 +223,7 @@ ccl_device_noinline bool 
indirect_lamp_emission(KernelGlobals *kg,
 #endif
 
                float3 L = direct_emissive_eval(kg,
+                                               emission_sd,
                                                &ls,
                                                state,
                                                -ray->D,
@@ -238,7 +237,7 @@ ccl_device_noinline bool 
indirect_lamp_emission(KernelGlobals *kg,
                        Ray volume_ray = *ray;
                        volume_ray.t = ls.t;
                        float3 volume_tp = make_float3(1.0f, 1.0f, 1.0f);
-                       kernel_volume_shadow(kg, state, &volume_ray, 
&volume_tp);
+                       kernel_volume_shadow(kg, emission_sd, state, 
&volume_ray, &volume_tp);
                        L *= volume_tp;
                }
 #endif
@@ -260,6 +259,7 @@ ccl_device_noinline bool 
indirect_lamp_emission(KernelGlobals *kg,
 /* Indirect Background */
 
 ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
+                                               ShaderData *emission_sd,
                                                ccl_addr_space PathState *state,
                                                ccl_addr_space Ray *ray)
 {
@@ -280,19 +280,14 @@ ccl_device_noinline float3 
indirect_background(KernelGlobals *kg,
        /* evaluate background closure */
 #  ifdef __SPLIT_KERNEL__
        Ray priv_ray = *ray;
-       shader_setup_from_background(kg, kg->sd_input, &priv_ray);
-
-       path_state_modify_bounce(state, true);
-       float3 L = shader_eval_background(kg, kg->sd_input, state, state->flag, 
SHADER_CONTEXT_EMISSION);
-       path_state_modify_bounce(state, false);
+       shader_setup_from_background(kg, emission_sd, &priv_ray);
 #  else
-       ShaderData sd;
-       shader_setup_from_background(kg, &sd, ray);
+       shader_setup_from_background(kg, emission_sd, ray);
+#  endif
 
        path_state_modify_bounce(state, true);
-       float3 L = shader_eval_background(kg, &sd, state, state->flag, 
SHADER_CONTEXT_EMISSION);
+       float3 L = shader_eval_background(kg, emission_sd, state, state->flag, 
SHADER_CONTEXT_EMISSION);
        path_state_modify_bounce(state, false);
-#  endif
 
 #ifdef __BA

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to