Commit: b332fc8f23c9995923abbf526e67517f1a26f073
Author: lazydodo
Date:   Fri Apr 7 10:25:54 2017 -0600
Branches: master
https://developer.blender.org/rBb332fc8f23c9995923abbf526e67517f1a26f073

[Cycles/msvc] Get cycles_kernel compile time under control.

Ever since we merged the extra texture types (half etc) and spit kernel the 
compile time for cycles_kernel has been going out of control.

It's currently sitting at a cool 1295.762 seconds with our standard compiler 
(2013/x64/release)

I'm not entirely sure why msvc gets upset with it, but the inlining of matrix 
near the bottom of the tri-cubic 3d interpolator is the source of the issue, 
this patch excludes it from being inlined.

This patch bring it back down to a manageable 186 seconds. (7x faster!!)

with the attached bzzt.blend that @sergey  kindly provided i got the following 
results with builds with identical hashes

58:51.73 buildbot
58:04.23 Patched

it's really close, the slight speedup could be explained by the switch instead 
of having multiple if's (switches do generate more optimal code than a chain of 
if/else/if/else statements) but in all honesty it might just have been pure 
luck (dev box,very polluted, bad for benchmarks) regardless, this patch doesn't 
seem to slow down anything with my limited testing.

{F532336}

{F532337}

Reviewers: brecht, lukasstockner97, juicyfruit, dingto, sergey

Reviewed By: brecht, dingto, sergey

Subscribers: InsigMathK, sergey

Tags: #cycles

Differential Revision: https://developer.blender.org/D2595

===================================================================

M       intern/cycles/kernel/kernel_compat_cpu.h
M       intern/cycles/util/util_types.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_compat_cpu.h 
b/intern/cycles/kernel/kernel_compat_cpu.h
index ae7c9b836c4..cad5f4d2959 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -316,184 +316,203 @@ template<typename T> struct texture_image  {
                return interp_3d_ex(x, y, z, interpolation);
        }
 
-       ccl_always_inline float4 interp_3d_ex(float x, float y, float z,
-                                             int interpolation = 
INTERPOLATION_LINEAR)
+       ccl_always_inline float4 interp_3d_ex_closest(float x, float y, float z)
        {
-               if(UNLIKELY(!data))
-                       return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
-               int ix, iy, iz, nix, niy, niz;
-
-               if(interpolation == INTERPOLATION_CLOSEST) {
-                       frac(x*(float)width, &ix);
-                       frac(y*(float)height, &iy);
-                       frac(z*(float)depth, &iz);
-
-                       switch(extension) {
-                               case EXTENSION_REPEAT:
-                                       ix = wrap_periodic(ix, width);
-                                       iy = wrap_periodic(iy, height);
-                                       iz = wrap_periodic(iz, depth);
-                                       break;
-                               case EXTENSION_CLIP:
-                                       if(x < 0.0f || y < 0.0f || z < 0.0f ||
-                                          x > 1.0f || y > 1.0f || z > 1.0f)
-                                       {
-                                               return make_float4(0.0f, 0.0f, 
0.0f, 0.0f);
-                                       }
-                                       /* Fall through. */
-                               case EXTENSION_EXTEND:
-                                       ix = wrap_clamp(ix, width);
-                                       iy = wrap_clamp(iy, height);
-                                       iz = wrap_clamp(iz, depth);
-                                       break;
-                               default:
-                                       kernel_assert(0);
+               int ix, iy, iz;
+               frac(x*(float)width, &ix);
+               frac(y*(float)height, &iy);
+               frac(z*(float)depth, &iz);
+
+               switch(extension) {
+                       case EXTENSION_REPEAT:
+                               ix = wrap_periodic(ix, width);
+                               iy = wrap_periodic(iy, height);
+                               iz = wrap_periodic(iz, depth);
+                               break;
+                       case EXTENSION_CLIP:
+                               if(x < 0.0f || y < 0.0f || z < 0.0f ||
+                                  x > 1.0f || y > 1.0f || z > 1.0f)
+                               {
                                        return make_float4(0.0f, 0.0f, 0.0f, 
0.0f);
-                       }
-
-                       return read(data[ix + iy*width + iz*width*height]);
+                               }
+                               /* Fall through. */
+                       case EXTENSION_EXTEND:
+                               ix = wrap_clamp(ix, width);
+                               iy = wrap_clamp(iy, height);
+                               iz = wrap_clamp(iz, depth);
+                               break;
+                       default:
+                               kernel_assert(0);
+                               return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
                }
-               else if(interpolation == INTERPOLATION_LINEAR) {
-                       float tx = frac(x*(float)width - 0.5f, &ix);
-                       float ty = frac(y*(float)height - 0.5f, &iy);
-                       float tz = frac(z*(float)depth - 0.5f, &iz);
 
-                       switch(extension) {
-                               case EXTENSION_REPEAT:
-                                       ix = wrap_periodic(ix, width);
-                                       iy = wrap_periodic(iy, height);
-                                       iz = wrap_periodic(iz, depth);
-
-                                       nix = wrap_periodic(ix+1, width);
-                                       niy = wrap_periodic(iy+1, height);
-                                       niz = wrap_periodic(iz+1, depth);
-                                       break;
-                               case EXTENSION_CLIP:
-                                       if(x < 0.0f || y < 0.0f || z < 0.0f ||
-                                          x > 1.0f || y > 1.0f || z > 1.0f)
-                                       {
-                                               return make_float4(0.0f, 0.0f, 
0.0f, 0.0f);
-                                       }
-                                       /* Fall through. */
-                               case EXTENSION_EXTEND:
-                                       nix = wrap_clamp(ix+1, width);
-                                       niy = wrap_clamp(iy+1, height);
-                                       niz = wrap_clamp(iz+1, depth);
+               return read(data[ix + iy*width + iz*width*height]);
+       }
 
-                                       ix = wrap_clamp(ix, width);
-                                       iy = wrap_clamp(iy, height);
-                                       iz = wrap_clamp(iz, depth);
-                                       break;
-                               default:
-                                       kernel_assert(0);
+       ccl_always_inline float4 interp_3d_ex_linear(float x, float y, float z)
+       {
+               int ix, iy, iz;
+               int nix, niy, niz;
+               
+               float tx = frac(x*(float)width - 0.5f, &ix);
+               float ty = frac(y*(float)height - 0.5f, &iy);
+               float tz = frac(z*(float)depth - 0.5f, &iz);
+
+               switch(extension) {
+                       case EXTENSION_REPEAT:
+                               ix = wrap_periodic(ix, width);
+                               iy = wrap_periodic(iy, height);
+                               iz = wrap_periodic(iz, depth);
+
+                               nix = wrap_periodic(ix+1, width);
+                               niy = wrap_periodic(iy+1, height);
+                               niz = wrap_periodic(iz+1, depth);
+                               break;
+                       case EXTENSION_CLIP:
+                               if(x < 0.0f || y < 0.0f || z < 0.0f ||
+                                  x > 1.0f || y > 1.0f || z > 1.0f)
+                               {
                                        return make_float4(0.0f, 0.0f, 0.0f, 
0.0f);
-                       }
-
-                       float4 r;
-
-                       r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + 
iy*width + iz*width*height]);
-                       r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + 
iy*width + iz*width*height]);
-                       r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + 
niy*width + iz*width*height]);
-                       r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + 
iz*width*height]);
-
-                       r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width 
+ niz*width*height]);
-                       r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + 
niz*width*height]);
-                       r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + 
niz*width*height]);
-                       r += tz*ty*tx*read(data[nix + niy*width + 
niz*width*height]);
-
-                       return r;
+                               }
+                               /* Fall through. */
+                       case EXTENSION_EXTEND:
+                               nix = wrap_clamp(ix+1, width);
+                               niy = wrap_clamp(iy+1, height);
+                               niz = wrap_clamp(iz+1, depth);
+
+                               ix = wrap_clamp(ix, width);
+                               iy = wrap_clamp(iy, height);
+                               iz = wrap_clamp(iz, depth);
+                               break;
+                       default:
+                               kernel_assert(0);
+                               return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
                }
-               else {
-                       /* Tricubic b-spline interpolation. */
-                       const float tx = frac(x*(float)width - 0.5f, &ix);
-                       const float ty = frac(y*(float)height - 0.5f, &iy);
-                       const float tz = frac(z*(float)depth - 0.5f, &iz);
-                       int pix, piy, piz, nnix, nniy, nniz;
-
-                       switch(extension) {
-                               case EXTENSION_REPEAT:
-                                       ix = wrap_periodic(ix, width);
-                                       iy = wrap_periodic(iy, height);
-                                       iz = wrap_periodic(iz, depth);
 
-                                       pix = wrap_periodic(ix-1, width);
-                                       piy = wrap_periodic(iy-1, height);
-                                       piz = wrap_periodic(iz-1, depth);
+               float4 r;
 
-                                       nix = wrap_periodic(ix+1, width);
-                                       niy = wrap_periodic(iy+1, height);
-                                       niz = wrap_periodic(iz+1, depth);
-
-                                       nnix = wrap_periodic(ix+2, width);
-                                       nniy = wrap_periodic(iy+2, height);
-                                       nniz = wrap_periodic(iz+2, depth);
-                                       break;
-                               case EXTENSION_CLIP:
-                                       if(x < 0.0f || y < 0.0f || z < 0.0f ||
-                                          x > 1.0f || y > 1.0f || z > 1.0f)
-                                       {
-                                               return make_float4(0.0f, 0.0f, 
0.0f, 0.0f);
-                                       }
-                                       /* Fall through. */
-                               case EXTENSION_EXTEND:
-                                       pix = wrap_clamp(ix-1, width);
-                                       piy = wrap_clamp(iy-1, height);
-                                       piz = wrap_clamp(iz-1, depth);
+               r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + 
iy*width + iz*width*height]);
+               r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + 
iz*width*height]);
+               r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + 
iz*width*height]);
+               r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + 
iz*width*height]);
 
-                                       nix = wrap_clamp(ix+1, width);
-                                       niy = wrap_clamp(iy+1, height);
-                                       niz = wrap_clamp(iz+1, depth);
+               r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + 
niz*width*height]);
+               r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + 
niz*width*height]);
+               r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + 
niz*width*height]);
+               r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]);
 
-                                       nnix = wrap_clamp(ix+2, width);
-                                       nniy = wrap_clamp(iy+2, height);
-                                       nniz = wrap_clamp(iz+2, depth);
+               return r;
+       }
 
-                                       ix = wrap_clamp(ix, width);
-                                       iy = wrap_clamp(iy, height);
-                                       iz = wrap_clamp(iz, depth);
-                                       break;
-                               default:
-                                       kernel_assert(0);
+       ccl_never_inline float4 interp_3d_ex_tricubic(float x, float y, float z)
+       {
+               int ix, iy, iz;
+               int nix, niy, niz;
+               /* Tricubic b-spline interpolation. */
+               const float tx = frac(x*(float)width - 0.5f, &ix);
+               const float ty = frac(y*(float)height - 0.5f, &iy);
+               const float tz = frac(z*(float)depth - 0.5f, &iz);
+               int pix, piy, piz, nnix, nniy, nniz;
+
+               switch(extension) {
+                       case EXTENSION_REPEAT:
+                               ix = wrap_periodic(ix, width);
+                               iy = wrap_periodic(iy, height);
+                               iz = wrap_periodic(iz, depth);
+
+                               pix = wrap_periodic(ix-1, width);
+                               piy = wrap_periodic(iy-1, height);
+                               piz = wrap_periodic(iz-1, depth);
+
+                               nix = wrap_periodic(ix+1, width);
+                               niy = wrap_periodic(iy+1, height);
+                               niz = wrap_periodic(iz+1, depth);
+
+                               nnix = wrap_periodic(ix+2, width);
+                               nniy = wrap_periodic(iy+2, height);
+                               nniz = wrap_periodic(iz+2, depth);
+                               break;
+                       case EXTENSION_CLIP:
+                               if(x < 0.0f || y < 0.0f || z < 0.0f ||
+                                  x > 1.0f || y > 1.0f || z > 1.0f)
+                               {
                                        return make_float4(0.0f, 0.0f, 0.0f, 
0.0f);
-                       }
-
-                       const int xc[4] = {pix, ix, nix, nnix};
-                       const int yc[4] = {width * piy,
-                                          width * iy,
-                                          width * niy,
-                                          width * nniy};
-                       const int zc[4] = {width * height * piz,
-                                          width * height * iz,
-                                          width * height * niz,
-                                          width * height * nniz};
-                       float u[4], v[4], w[4];
+                               }
+                               /* Fall through. */
+                       case EXTENSION_EXTEND:
+                               pix = wrap_clamp(ix-1, width);
+                               piy = wrap_clamp(iy-1, height);
+                               piz = wrap_clamp(iz-1, depth);
+
+                               nix = wrap_clamp(ix+1, width);
+                               niy = wrap_clamp(iy+1, height);
+                               niz = wrap_clamp(iz+1, depth);
+
+                               nnix = wrap_clamp(ix+2, width);
+                               nniy = wrap_clamp(iy+2, height);
+                               nniz = wrap_clamp(iz+2, depth);
+
+                               ix = wrap_clamp(ix, width);
+                               iy = wrap_clamp(iy, height);
+                               iz = wrap_clamp(iz, depth);
+                               break;
+                       default:
+                               kernel_assert(0);
+                               return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+               }
 
-                       /* Some helper macro to keep code reasonable size,
-                        * let compiler to inline all the matrix 
multiplications.
-                        */
+               const int xc[4] = {pix, ix, nix, nnix};
+               const int yc[4] = {width * piy,
+                                                  width * iy,
+                                                  width * niy,
+                                                  width * nniy};
+               const int zc[4] = {width * height * piz,
+                                                  width * height * iz,
+                                                  width * height * niz,
+                                                  width * height * nniz};
+               float u[4], v[4], w[4];
+
+               /* Some helper macro to keep code reasonable size,
+                * let compiler to inline all the matrix multiplications.
+                */
 #define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]]))
 #define COL_TERM(col, row) \
-                       (v[col] * (u[0] * DATA(0, col, row) + \
-                                  u[1] * DATA(1, col, row) + \
-                                  u[2] * DATA(2, col, row) + \
-                                  u[3] * DATA(3, col, row)))
+               (v[col] * (u[0] * DATA(0, col, row) + \
+                                  u[1] * DATA(1, col, row) + \
+                                  u[2] * DATA(2, col, row) + \
+                                  u[3] * DATA(3, c

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to