[Bf-blender-cvs] [1e4d993] master: Cycles: Use more accurate implementation of erf() and erfinv()

Sergey Sharybin Fri, 03 Oct 2014 05:33:51 -0700

Commit: 1e4d99368bba53edb6f903036b49edda92f27e31
Author: Sergey Sharybin
Date:   Fri Oct 3 18:27:08 2014 +0600
Branches: master
https://developer.blender.org/rB1e4d99368bba53edb6f903036b49edda92f27e31


Cycles: Use more accurate implementation of erf() and erfinv()

This functions are  orders of magnitude more accurate than the old ones,
and they're around the same complexity to compute.

===================================================================

M       intern/cycles/kernel/closure/bsdf_microfacet.h

===================================================================

diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h 
b/intern/cycles/kernel/closure/bsdf_microfacet.h
index a0c59e6..c6aed12 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -35,139 +35,38 @@
 
 CCL_NAMESPACE_BEGIN
 
-/* Approximate erf and erfinv implementations
+/* Approximate erf and erfinv implementations.
+ * Implementation comes stright from the wikipedia:
  *
- * Adapted from code (C) Copyright John Maddock 2006.
- * Use, modification and distribution are subject to the
- * Boost Software License, Version 1.0. (See accompanying file
- * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) */
-
-ccl_device float approx_erff_impl(float z)
-{
-       float result;
-
-       if(z < 0.5f) {
-               if(z < 1e-10f) {
-                       if(z == 0) {
-                               result = 0;
-                       }
-                       else {
-                               float c = 0.0033791670f;
-                               result = z * 1.125f + z * c;
-                       }
-               }
-               else {
-                       float Y = 1.044948577f;
-
-                       float zz = z * z;
-                       float num = (((-0.007727583f * zz) + -0.050999073f)*zz 
+ -0.338165134f)*zz + 0.083430589f;
-                       float denom = (((0.000370900f * zz) + 0.008585719f)*zz 
+ 0.087522260f)*zz + 0.455004033f;
-                       result = z * (Y + num / denom);
-               }
-       }
-       else if(z < 2.5f) {
-               if(z < 1.5f) {
-                       float Y = 0.4059357643f;
-                       float fz = z - 0.5f;
-
-                       float num = (((0.088890036f * fz) + 0.191003695f)*fz + 
0.178114665f)*fz + -0.098090592f;
-                       float denom = (((0.123850974f * fz) + 0.578052804f)*fz 
+ 1.426280048f)*fz + 1.847590709f;
-
-                       result = Y + num / denom;
-                       result *= expf(-z * z) / z;
-               }
-               else  {
-                       float Y = 0.506728172f;
-                       float fz = z - 1.5f;
-                       float num = (((0.017567943f * fz) + 0.043948189f)*fz + 
0.038654037f)*fz + -0.024350047f;
-                       float denom = (((0.325732924f * fz) + 0.982403709f)*fz 
+ 1.539914949f)*fz + 1;
-
-                       result = Y + num / denom;
-                       result *= expf(-z * z) / z;
-               }
-
-               result = 1 - result;
-       }
-       else {
-               result = 1;
-       }
-
-       return result;
-}
+ *   http://en.wikipedia.org/wiki/Error_function
+ *
+ * Some constants are baked into the code.
+ */
 
-ccl_device float approx_erff(float z)
+ccl_device_inline float approx_erff(float x)
 {
        float s = 1.0f;
-
-       if(z < 0.0f) {
+       if(x < 0.0f) {
                s = -1.0f;
-               z = -z;
-       }
-
-       return s * approx_erff_impl(z);
-}
-
-ccl_device float approx_erfinvf_impl(float p, float q)
-{
-       float result = 0;
-
-       if(p <= 0.5f) {
-               float Y = 0.089131474f;
-               float g = p * (p + 10);
-               float num = (((-0.012692614f * p) + 0.033480662f)*p + 
-0.008368748f)*p + -0.000508781f;
-               float denom = (((1.562215583f * p) + -1.565745582f)*p + 
-0.970005043f)*p + 1.0f;
-               float r = num / denom;
-               result = g * Y + g * r;
+               x = -x;
        }
-       else if(q >= 0.25f) {
-               float Y = 2.249481201f;
-               float g = sqrtf(-2 * logf(q));
-               float xs = q - 0.25f;
-               float num = (((17.644729840f * xs) + 8.370503283f)*xs + 
0.105264680f)*xs + -0.202433508f;
-               float denom = (((-28.660818049f * xs) + 3.971343795f)*xs + 
6.242641248f)*xs + 1.0f;
-               float r = num / denom;
-               result = g / (Y + r);
+       /* Such a clamp doesn't give much distortion to the output value
+        * and gives quite a few of the speedup.
+        */
+       if(x > 3.0f) {
+               return s;
        }
-       else {
-               float x = sqrtf(-logf(q));
-
-               if(x < 3) {
-                       float Y = 0.807220458f;
-                       float xs = x - 1.125f;
-                       float num = (((0.387079738f * xs) + 0.117030156f)*xs + 
-0.163794047f)*xs + -0.131102781f;
-                       float denom = (((4.778465929f * xs) + 5.381683457f)*xs 
+ 3.466254072f)*xs + 1.0f;
-                       float R = num / denom;
-                       result = Y * x + R * x;
-               }
-               else {
-                       float Y = 0.939955711f;
-                       float xs = x - 3;
-                       float num = (((0.009508047f * xs) + 0.018557330f)*xs + 
-0.002224265f)*xs + -0.035035378f;
-                       float denom = (((0.220091105f * xs) + 0.762059164f)*xs 
+ 1.365334981f)*xs + 1.0f;
-                       float R = num / denom;
-                       result = Y * x + R * x;
-               }
-       }
-
-       return result;
+       float t = 1.0f / (1.0f + 0.47047f*x);
+       return s * (1.0f -
+                   t*(0.3480242f + t*(-0.0958798f + t*0.7478556f)) * 
expf(-x*x));
 }
 
-ccl_device float approx_erfinvf(float z)
+ccl_device_inline float approx_erfinvf(float x)
 {
-       float p, q, s;
-
-       if(z < 0) {
-         p = -z;
-         q = 1 - p;
-         s = -1;
-       }
-       else {
-         p = z;
-         q = 1 - z;
-         s = 1;
-       }
-
-       return s * approx_erfinvf_impl(p, q);
+       float ln1_x2 = logf(1.0f - x*x);
+       float term = 4.546884979448f + ln1_x2 * 0.5f;
+       return copysignf(1.0f, x) *
+              sqrtf(sqrtf(term*term - ln1_x2 * 7.142230224076f) - term);
 }
 
 /* Beckmann and GGX microfacet importance sampling from:

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
http://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [1e4d993] master: Cycles: Use more accurate implementation of erf() and erfinv()

Reply via email to