Commit: 3a243ad83fb5f485ecc6fdc2bcc65a93a9f5ea53
Author: Clément Foucault
Date:   Fri Jun 23 02:52:15 2017 +0200
Branches: blender2.8
https://developer.blender.org/rB3a243ad83fb5f485ecc6fdc2bcc65a93a9f5ea53

Eevee: Attempt to optimize GTAO shader.

Unroll horizon search loop. Use fast version of acos.
On nvidia linux, unrolling the 2nd loop is giving very high compilation time.

===================================================================

M       source/blender/draw/engines/eevee/shaders/ambient_occlusion_lib.glsl
M       source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl

===================================================================

diff --git 
a/source/blender/draw/engines/eevee/shaders/ambient_occlusion_lib.glsl 
b/source/blender/draw/engines/eevee/shaders/ambient_occlusion_lib.glsl
index 65ff09c4eae..b044cacf1b1 100644
--- a/source/blender/draw/engines/eevee/shaders/ambient_occlusion_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/ambient_occlusion_lib.glsl
@@ -5,28 +5,21 @@
 
 #define MAX_PHI_STEP 32
 /* NOTICE : this is multiplied by 2 */
-#define MAX_THETA_STEP 6.0
+#define MAX_THETA_STEP 12
 
 uniform sampler2D minMaxDepthTex;
 uniform float aoDistance;
 uniform float aoSamples;
 uniform float aoFactor;
 
-float sample_depth(vec2 co, int level)
+float get_max_horizon(vec2 co, vec3 x, float h, float lod)
 {
-       return textureLod(minMaxDepthTex, co, float(level)).g;
-}
-
-float get_max_horizon(vec2 co, vec3 x, float h, float step)
-{
-       if (co.x > 1.0 || co.x < 0.0 || co.y > 1.0 || co.y < 0.0)
-               return h;
-
-       float depth = sample_depth(co, int(step));
+       float depth = textureLod(minMaxDepthTex, co, floor(lod)).g;
 
        /* Background case */
-       if (depth == 1.0)
-               return h;
+       /* this is really slow and is only a problem
+        * if the far clip plane is near enough to notice */
+       // depth += step(1.0, depth) * 1e20;
 
        vec3 s = get_view_space_from_depth(co, depth); /* s View coordinate */
        vec3 omega_s = s - x;
@@ -39,6 +32,124 @@ float get_max_horizon(vec2 co, vec3 x, float h, float step)
        return mix(h, max_h, blend);
 }
 
+void search_step(
+        vec2 t_phi, vec3 x, vec2 x_, float rand, vec2 pixel_ratio,
+        inout float j, inout float ofs, inout float h1, inout float h2)
+{
+       ofs += ofs; /* Step size is doubled each iteration */
+
+       vec2 s_ = t_phi * ofs * rand * pixel_ratio; /* s^ Screen coordinate */
+       vec2 co;
+
+       co = x_ + s_;
+       h1 = get_max_horizon(co, x, h1, j);
+
+       co = x_ - s_;
+       h2 = get_max_horizon(co, x, h2, j);
+
+       j += 0.5;
+}
+
+void search_horizon(
+        vec2 t_phi, vec3 x, vec2 x_, float rand,
+        float max_dist, vec2 pixel_ratio, float pixel_len,
+        inout float h1, inout float h2)
+{
+       float ofs = 1.5 * pixel_len;
+       float j = 0.0;
+
+#if 0 /* manually unrolled bellow */
+       for (int i = 0; i < MAX_THETA_STEP; i++) {
+               search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+               if (ofs > max_dist)
+                       return;
+       }
+#endif
+       search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+       if (ofs > max_dist)     return;
+
+       search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+       if (ofs > max_dist)     return;
+
+       search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+       if (ofs > max_dist)     return;
+
+       search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+       if (ofs > max_dist)     return;
+
+       search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+       if (ofs > max_dist)     return;
+
+       search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+       if (ofs > max_dist)     return;
+
+       search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+       if (ofs > max_dist)     return;
+
+       search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+       if (ofs > max_dist)     return;
+
+       search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+       if (ofs > max_dist)     return;
+
+       search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+       if (ofs > max_dist)     return;
+
+       search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+       if (ofs > max_dist)     return;
+
+       search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+}
+
+void integrate_slice(
+        float iter, vec3 x, vec3 normal, vec2 x_, vec2 noise,
+        float max_dist, vec2 pixel_ratio, float pixel_len,
+        inout float visibility, inout vec3 bent_normal)
+{
+       float phi = M_PI * ((noise.r + iter) / aoSamples);
+
+       /* Rotate with random direction to get jittered result. */
+       vec2 t_phi = vec2(cos(phi), sin(phi)); /* Screen space direction */
+
+       /* Search maximum horizon angles h1 and h2 */
+       float h1 = -1.0, h2 = -1.0; /* init at cos(pi) */
+       search_horizon(t_phi, x, x_, noise.g, max_dist, pixel_ratio, pixel_len, 
h1, h2);
+
+       /* (Slide 54) */
+       h1 = -fast_acos(h1);
+       h2 = fast_acos(h2);
+
+       /* Projecting Normal to Plane P defined by t_phi and omega_o */
+       vec3 h = vec3(t_phi.y, -t_phi.x, 0.0); /* Normal vector to Integration 
plane */
+       vec3 t = vec3(-t_phi, 0.0);
+       vec3 n_proj = normal - h * dot(h, normal);
+       float n_proj_len = max(1e-16, length(n_proj));
+
+       /* Clamping thetas (slide 58) */
+       float cos_n = clamp(n_proj.z / n_proj_len, -1.0, 1.0);
+       float n = sign(dot(n_proj, t)) * fast_acos(cos_n); /* Angle between 
view vec and normal */
+       h1 = n + max(h1 - n, -M_PI_2);
+       h2 = n + min(h2 - n, M_PI_2);
+
+       /* Solving inner integral */
+       float sin_n = sin(n);
+       float h1_2 = 2.0 * h1;
+       float h2_2 = 2.0 * h2;
+       float vd = (-cos(h1_2 - n) + cos_n + h1_2 * sin_n) + (-cos(h2_2 - n) + 
cos_n + h2_2 * sin_n);
+       vd *= 0.25 * n_proj_len;
+       visibility += vd;
+
+#ifdef USE_BENT_NORMAL
+       /* Finding Bent normal */
+       float b_angle = (h1 + h2) / 2.0;
+       /* The 0.5 factor below is here to equilibrate the accumulated vectors.
+        * (sin(b_angle) * -t_phi) will accumulate to (phi_step * result_nor.xy 
* 0.5).
+        * (cos(b_angle) * 0.5) will accumulate to (phi_step * result_nor.z * 
0.5). */
+       /* Weight sample by vd */
+       bent_normal += vec3(sin(b_angle) * -t_phi, cos(b_angle) * 0.5) * vd;
+#endif
+}
+
 void gtao(vec3 normal, vec3 position, vec2 noise, out float visibility
 #ifdef USE_BENT_NORMAL
        , out vec3 bent_normal
@@ -66,62 +177,12 @@ void gtao(vec3 normal, vec3 position, vec2 noise, out 
float visibility
        visibility = 0.0;
 #ifdef USE_BENT_NORMAL
        bent_normal = vec3(0.0);
+#else
+       vec3 bent_normal = vec3(0.0);
 #endif
-       for (float i = 0.0; i < aoSamples && i < MAX_PHI_STEP; i++) {
-               float phi = M_PI * ((noise.r + i) / aoSamples);
-
-               /* Rotate with random direction to get jittered result. */
-               vec2 t_phi = vec2(cos(phi), sin(phi)); /* Screen space 
direction */
-
-               /* Search maximum horizon angles h1 and h2 */
-               float h1 = -1.0, h2 = -1.0; /* init at cos(pi) */
-               float ofs = 1.5 * pixel_len;
-               for (float j = 0.0; ofs < max_dist && j < MAX_THETA_STEP; j += 
0.5) {
-                       ofs += ofs; /* Step size is doubled each iteration */
-
-                       vec2 s_ = t_phi * ofs * noise.g * pixel_ratio; /* s^ 
Screen coordinate */
-                       vec2 co;
-
-                       co = x_ + s_;
-                       h1 = get_max_horizon(co, x, h1, j);
-
-                       co = x_ - s_;
-                       h2 = get_max_horizon(co, x, h2, j);
-               }
-
-               /* (Slide 54) */
-               h1 = -acos(h1);
-               h2 = acos(h2);
-
-               /* Projecting Normal to Plane P defined by t_phi and omega_o */
-               vec3 h = vec3(t_phi.y, -t_phi.x, 0.0); /* Normal vector to 
Integration plane */
-               vec3 t = vec3(-t_phi, 0.0);
-               vec3 n_proj = normal - h * dot(h, normal);
-               float n_proj_len = max(1e-16, length(n_proj));
-
-               /* Clamping thetas (slide 58) */
-               float cos_n = clamp(n_proj.z / n_proj_len, -1.0, 1.0);
-               float n = sign(dot(n_proj, t)) * acos(cos_n); /* Angle between 
view vec and normal */
-               h1 = n + max(h1 - n, -M_PI_2);
-               h2 = n + min(h2 - n, M_PI_2);
-
-               /* Solving inner integral */
-               float sin_n = sin(n);
-               float h1_2 = 2.0 * h1;
-               float h2_2 = 2.0 * h2;
-               float vd = (-cos(h1_2 - n) + cos_n + h1_2 * sin_n) + (-cos(h2_2 
- n) + cos_n + h2_2 * sin_n);
-               vd *= 0.25 * n_proj_len;
-               visibility += vd;
-
-#ifdef USE_BENT_NORMAL
-               /* Finding Bent normal */
-               float b_angle = (h1 + h2) / 2.0;
-               /* The 0.5 factor below is here to equilibrate the accumulated 
vectors.
-                * (sin(b_angle) * -t_phi) will accumulate to (phi_step * 
result_nor.xy * 0.5).
-                * (cos(b_angle) * 0.5) will accumulate to (phi_step * 
result_nor.z * 0.5). */
-               /* Weight sample by vd */
-               bent_normal += vec3(sin(b_angle) * -t_phi, cos(b_angle) * 0.5) 
* vd;
-#endif
+       for (float i = 0.0; i < MAX_PHI_STEP; i++) {
+               if (i >= aoSamples) break;
+               integrate_slice(i, x, normal, x_, noise, max_dist, pixel_ratio, 
pixel_len, visibility, bent_normal);
        }
 
        visibility = clamp(visibility / aoSamples, 1e-8, 1.0);
diff --git a/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl 
b/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl
index 6ce4b2f7501..d4f1781ae6c 100644
--- a/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl
@@ -146,6 +146,22 @@ float distance_squared(vec3 a, vec3 b) { a -= b; return 
dot(a, a); }
 
 float inverse_distance(vec3 V) { return max( 1 / length(V), 1e-8); }
 
+/* ------- Fast Math ------- */
+
+/* [Drobot2014a] Low Level Optimizations for GCN */
+float fast_sqrt(float x)
+{
+       return intBitsToFloat(0x1fbd1df5 + (floatBitsToInt(x) >> 1));
+}
+
+/* [Eberly2014] GPGPU Programming for Games and Science */
+float fast_acos(float x)
+{
+       float res = -0.156583 * abs(x) + M_PI_2;
+       res *= fast_sqrt(1.0 - abs(x));
+       return (x >= 0) ? res : M_PI - res;
+}
+
 float line_plane_intersect_dist(vec3 lineorigin, vec3 linedirection, vec3 
planeorigin, vec3 planenormal)
 {
        return dot(planenormal, planeorigin - lineorigin) / dot(planenormal, 
linedirection);

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to