mseeger closed pull request #9982: Unary ops logcdf_normal, derivlogcdf_normal 
[MXNET-39]
URL: https://github.com/apache/incubator-mxnet/pull/9982
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/docs/api/python/ndarray/ndarray.md 
b/docs/api/python/ndarray/ndarray.md
index 59ca4a612e6..d7c87807a42 100644
--- a/docs/api/python/ndarray/ndarray.md
+++ b/docs/api/python/ndarray/ndarray.md
@@ -606,6 +606,8 @@ The `ndarray` package provides several classes:
     sign
     gamma
     gammaln
+    logcdf_normal
+    derivlogcdf_normal
 ```
 
 ## Neural network
diff --git a/docs/api/python/symbol/symbol.md b/docs/api/python/symbol/symbol.md
index e383597236d..bd96948ca85 100644
--- a/docs/api/python/symbol/symbol.md
+++ b/docs/api/python/symbol/symbol.md
@@ -607,6 +607,8 @@ Composite multiple symbols into a new one by an operator.
     sign
     gamma
     gammaln
+    logcdf_normal
+    derivlogcdf_normal
 ```
 
 ## Neural network
diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h
index 1d4284e1ac2..b469eb9fa80 100644
--- a/src/operator/mshadow_op.h
+++ b/src/operator/mshadow_op.h
@@ -621,6 +621,72 @@ MSHADOW_XINLINE double gammaln_grad::Map<double>(double a) 
{
   return special_functions::cephes::psi<double>(a);
 }
 
+/***** logcdf_normal ******/
+
+struct logcdf_normal : public mxnet_op::tunable {
+  template<typename DType>
+  MSHADOW_XINLINE static DType Map(DType a) {
+    // Default implementation using floating precision
+    float af(static_cast<float>(a));
+    return DType(special_functions::apbsint::logCdfNormal<float>(af));
+  }
+};
+
+template<>
+MSHADOW_XINLINE double logcdf_normal::Map<double>(double a) {
+  return special_functions::apbsint::logCdfNormal<double>(a);
+}
+
+struct logcdf_normal_grad : public mxnet_op::tunable {
+  template<typename DType>
+  MSHADOW_XINLINE static DType Map(DType a) {
+    // Default implementation using floating precision
+    float af(static_cast<float>(a));
+    return DType(special_functions::apbsint::derivLogCdfNormal<float>(af));
+  }
+};
+
+template<>
+MSHADOW_XINLINE double logcdf_normal_grad::Map<double>(double a) {
+  return special_functions::apbsint::derivLogCdfNormal<double>(a);
+}
+
+/***** derivlogcdf_normal ******/
+
+struct derivlogcdf_normal : public mxnet_op::tunable {
+  template<typename DType>
+  MSHADOW_XINLINE static DType Map(DType a) {
+    // Default implementation using floating precision
+    float af(static_cast<float>(a));
+    return DType(special_functions::apbsint::derivLogCdfNormal<float>(af));
+  }
+};
+
+template<>
+MSHADOW_XINLINE double derivlogcdf_normal::Map<double>(double a) {
+  return special_functions::apbsint::derivLogCdfNormal<double>(a);
+}
+
+// NOTE: This grad would best be computed as ElemwiseGradUseInOut, with a and 
da as
+// input. Here, we recompute da, because ElemwiseGradUseInOut is not properly 
supported
+// for basic unary functions.
+struct derivlogcdf_normal_grad : public mxnet_op::tunable {
+  template<typename DType>
+  MSHADOW_XINLINE static DType Map(DType a) {
+    // Default implementation using floating precision
+    float af(static_cast<float>(a));
+    float daf(special_functions::apbsint::derivLogCdfNormal<float>(af));
+    return DType(-daf * (af + daf));
+  }
+};
+
+template<>
+MSHADOW_XINLINE double derivlogcdf_normal_grad::Map<double>(double a) {
+  double da(special_functions::apbsint::derivLogCdfNormal<double>(a));
+  return -da * (a + da);
+}
+
+
 /* Smooth L1 Loss is a loss specific for R-CNN franchise training
  * Smooth L1 Loss function:
  * f(x) = 0.5 * (sigma * x) ^ 2,     |x| < 1 / sigma^2
diff --git a/src/operator/operator_tune.cc b/src/operator/operator_tune.cc
index c13f1ac2fae..4dd012ddfeb 100644
--- a/src/operator/operator_tune.cc
+++ b/src/operator/operator_tune.cc
@@ -277,6 +277,10 @@ 
IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::gamma);  // NOLINT()
 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::gamma_grad);  // NOLINT()
 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::gammaln);  // NOLINT()
 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::gammaln_grad);  // NOLINT()
+IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::logcdf_normal);  // 
NOLINT()
+IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::logcdf_normal_grad);  // 
NOLINT()
+IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::derivlogcdf_normal);  // 
NOLINT()
+IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::derivlogcdf_normal_grad);  
// NOLINT()
 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::ceil);  // NOLINT()
 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::degrees);  // NOLINT()
 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::degrees_grad);  // NOLINT()
diff --git a/src/operator/special_functions-inl.h 
b/src/operator/special_functions-inl.h
index 743391e0fce..aab234c7bcc 100644
--- a/src/operator/special_functions-inl.h
+++ b/src/operator/special_functions-inl.h
@@ -9,6 +9,8 @@
 #ifndef MXNET_OPERATOR_SPECIAL_FUNCTIONS_INL_H_
 #define MXNET_OPERATOR_SPECIAL_FUNCTIONS_INL_H_
 
+#include "math_functions-inl.h"
+
 namespace mxnet {
 namespace op {
 
@@ -86,7 +88,7 @@ struct cephes {
 
   /*
    *
-   *   Psi (digamma) function
+   *    Psi (digamma) function
    *
    *
    * SYNOPSIS:
@@ -241,6 +243,213 @@ MSHADOW_XINLINE float cephes::psi_helper<float>(float s) {
     return 0.0;
   }
 }
+
+
+// This is code extracted from ApBsInT, available at
+//    https://github.com/mseeger/apbsint
+// The author of ApBsInT, Matthias Seeger ([email protected]) is the same one
+// who ported the code to MXNet.
+//
+// NOTE: Instantiate this with DType in {float, double}, nothing else will 
work!
+
+// Some constants used in apbsint struct below
+template<typename DType>
+struct apbsint_const {
+  MSHADOW_XINLINE static DType m_ln2pi() {
+    return DType(1.83787706640934533908193770913);
+  }
+  MSHADOW_XINLINE static DType m_ln2() {
+    return DType(0.69314718055994530941723212146);
+  }
+  MSHADOW_XINLINE static DType m_sqrtpi() {
+    return DType(1.77245385090551602729816748334);
+  }
+  MSHADOW_XINLINE static DType m_sqrt2() {
+    return DType(1.41421356237309504880168872421);
+  }
+  MSHADOW_XINLINE static DType erf_cody_limit1() {
+    return DType(0.6629);
+  }
+  MSHADOW_XINLINE static DType erf_cody_limit2() {
+    return DType(5.6569);
+  }
+};
+
+struct apbsint {
+  // Internal helpers
+
+  /**
+   * For x >= erf_cody_limit1(), define Q(x) by
+   *   1 - Phi(x) approx N(x) x^{-1} Q(x).
+   * We compute Q(x) according to
+   *   Cody
+   *   Rational Chebyshev approximation to the error function
+   * This is done differently for x >= erf_cody_limit2() and
+   * erf_cody_limit1() <= x < erf_cody_limit2().
+   * NOTE: Q(x) -> 1 for x->infty.
+   */
+  template<typename DType>
+  MSHADOW_XINLINE static DType erfRationalHelper(DType x) {
+    int i;
+    DType res, den, y;
+
+    // MYASS(x>0.0);
+    if (x >= apbsint_const<DType>::erf_cody_limit2()) {
+      // x/sqrt(2) >= 4
+      // Q(x)   = 1 + sqrt(pi) y R_1(y),
+      // R_1(y) = poly(p_j,y) / poly(q_j,y),   y = 2/x^2
+      // Ordering of arrays: 4,3,2,1,0,5 (only for numerator p_j; q_5=1)
+      // ATTENTION: The p_j are negative of the entries here
+      DType p[] = {3.05326634961232344e-1, 3.60344899949804439e-1,
+                   1.25781726111229246e-1, 1.60837851487422766e-2,
+                   6.58749161529837803e-4, 1.63153871373020978e-2};
+      DType q[] = {2.56852019228982242,    1.87295284992346047,
+                   5.27905102951428412e-1, 6.05183413124413191e-2,
+                   2.33520497626869185e-3};
+      y = 2.0/x/x;
+      res = y*p[5];
+      den = y;
+      for (i = 0; i < 4; i++) {
+        res = (res + p[i])*y;
+        den = (den + q[i])*y;
+      }
+      // Minus, because p[j] values have to be negated
+      res = 1.0 - apbsint_const<DType>::m_sqrtpi()*y*(res + p[4])/(den + q[4]);
+    } else {
+      // x/sqrt(2) < 4, x/sqrt(2) >= 0.469
+      // Q(x)   = sqrt(pi) y R_2(y),
+      // R_2(y) = poly(p_j,y) / poly(q_j,y),   y = x/sqrt(2)
+      // Ordering of arrays: 7,6,5,4,3,2,1,0,8 (only p_8; q_8=1)
+      DType p[] = {5.64188496988670089e-1, 8.88314979438837594,
+                   6.61191906371416295e+1, 2.98635138197400131e+2,
+                   8.81952221241769090e+2, 1.71204761263407058e+3,
+                   2.05107837782607147e+3, 1.23033935479799725e+3,
+                   2.15311535474403846e-8};
+      DType q[] = {1.57449261107098347e+1, 1.17693950891312499e+2,
+                   5.37181101862009858e+2, 1.62138957456669019e+3,
+                   3.29079923573345963e+3, 4.36261909014324716e+3,
+                   3.43936767414372164e+3, 1.23033935480374942e+3};
+      y = x/apbsint_const<DType>::m_sqrt2();
+      res = y*p[8];
+      den = y;
+      for (i = 0; i < 7; i++) {
+        res = (res + p[i])*y;
+        den = (den + q[i])*y;
+      }
+      res = apbsint_const<DType>::m_sqrtpi()*y*(res + p[7])/(den + q[7]);
+    }
+
+    return res;
+  }
+
+  /**
+   * Implements rational function R_3(y),  y = x^2/2,
+   * which is used if 0 <= x < erf_cody_limit1(). In this range:
+   *   Phi(x) approx (1 + (x/sqrt(2)) R_3(x^2/2))/2
+   * See
+   *   Cody
+   *   Rational Chebyshev approximation to the error function
+   */
+  template<typename DType>
+  MSHADOW_XINLINE static DType erfRationalHelperR3(DType y) {
+    int i;
+    DType nom, den;
+
+    // MYASS(y>=0.0);
+    // R_3(y) = poly(p_j,y) / poly(q_j,y)
+    // Ordering of arrays: 3,2,1,0,4 (only for p_5; q_5=1)
+    DType p[] = {3.16112374387056560,    1.13864154151050156e+2,
+                 3.77485237685302021e+2, 3.20937758913846947e+3,
+                 1.85777706184603153e-1};
+    DType q[] = {2.36012909523441209e+1, 2.44024637934444173e+2,
+                 1.28261652607737228e+3, 2.84423683343917062e+3};
+    nom = y*p[4];
+    den = y;
+    for (i = 0; i < 3; i++) {
+      nom = (nom + p[i])*y;
+      den = (den + q[i])*y;
+    }
+
+    return (nom + p[3])/(den + q[3]);
+  }
+
+  // Exported functions
+
+  /**
+   * @param z Argument
+   * @return  log N(z|0,1)
+   */
+  template<typename DType>
+  MSHADOW_XINLINE static DType logPdfNormal(DType z) {
+    return -0.5 * (apbsint_const<DType>::m_ln2pi() + z*z);
+  }
+
+  /**
+   * If Phi(z) denotes the c.d.f. of N(0,1), this method computes
+   * log Phi(z).
+   *
+   * @param z Argument
+   * @return  log Phi(z)
+   */
+  template<typename DType>
+  MSHADOW_XINLINE static DType logCdfNormal(DType z) {
+    DType res;
+
+    if (math::fabs(z) < apbsint_const<DType>::erf_cody_limit1()) {
+      // Part 3 approximation:
+      // Phi(z) approx (1 + y R_3(y^2))/2, y = z/sqrt(2)
+      res = 
math::log1p((z/apbsint_const<DType>::m_sqrt2())*erfRationalHelperR3(0.5*z*z))
+        - apbsint_const<DType>::m_ln2();
+    } else {
+      // Part 1 or 2 approximation:
+      // Phi(z) approx N(z) Q(-z)/(-z), z < 0
+      // NOTE: The case z >= erf_cody_limit1() is uncritical, we could even use
+      // a cheaper approximation then
+      if (z < 0.0)
+        res = logPdfNormal(z) - math::log(-z) + 
math::log(erfRationalHelper(-z));
+      else
+        res = math::log1p(-math::exp(logPdfNormal(z))*erfRationalHelper(z)/z);
+    }
+
+    return res;
+  }
+
+  /**
+   * If Phi(z) denotes the c.d.f. of N(0,1), this method computes
+   *   f(z) = (d/dz) log Phi(z) = N(z)/Phi(z).
+   * NOTE: The technical report defines the hazard function
+   *   h(x) = N(x)/(1 - Phi(x)).
+   * This method computes h(-z).
+   *
+   * @param z Argument
+   * @return  (d/dz) log Phi(z)
+   */
+  template<typename DType>
+  MSHADOW_XINLINE static DType derivLogCdfNormal(DType z) {
+    DType res;
+
+    if (math::fabs(z) < apbsint_const<DType>::erf_cody_limit1()) {
+      // Part 3 approximation:
+      // Phi(z) approx (1 + y R_3(y^2))/2, y = z/sqrt(2)
+      res = 2.0 * math::exp(logPdfNormal(z)) /
+        (1.0 + 
(z/apbsint_const<DType>::m_sqrt2())*erfRationalHelperR3(0.5*z*z));
+    } else {
+      // Part 1 or 2:
+      // Phi(z) approx N(z) Q(-z)/(-z), z<0
+      // NOTE: The case z >= erf_cody_limit1() is uncritical, we could even use
+      // a cheaper approximation then
+      if (z < 0.0) {
+        res = -z/erfRationalHelper(-z);
+      } else {
+        DType temp = math::exp(logPdfNormal(z));
+        res = temp / (1.0 - temp*erfRationalHelper(z)/z);
+      }
+    }
+
+    return res;
+  }
+};
+
 }  // namespace special_functions
 }  // namespace op
 }  // namespace mxnet
diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc 
b/src/operator/tensor/elemwise_unary_op_basic.cc
index acd8f7b23ff..07b8bd93ed0 100644
--- a/src/operator/tensor/elemwise_unary_op_basic.cc
+++ b/src/operator/tensor/elemwise_unary_op_basic.cc
@@ -826,5 +826,42 @@ The storage type of ``gammaln`` output is always dense
 MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_gammaln,
                                                   
unary_bwd<mshadow_op::gammaln_grad>);
 
+// logcdf_normal
+MXNET_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(logcdf_normal, cpu, 
mshadow_op::logcdf_normal)
+MXNET_ADD_SPARSE_OP_ALIAS(logcdf_normal)
+.describe(R"code(Returns ``log`` of cumulative distribution function of 
standard normal, \
+computed element-wise on the input array.
+
+The standard normal distribution has mean 0, variance 1.
+
+The storage type of ``logcdf_normal`` output is always dense.
+
+)code")
+.set_attr<nnvm::FGradient>("FGradient", 
ElemwiseGradUseIn{"_backward_logcdf_normal"});
+
+MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_logcdf_normal,
+                                                  
unary_bwd<mshadow_op::logcdf_normal_grad>);
+
+// derivlogcdf_normal
+MXNET_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(derivlogcdf_normal, cpu,
+                                             mshadow_op::derivlogcdf_normal)
+MXNET_ADD_SPARSE_OP_ALIAS(derivlogcdf_normal)
+.describe(R"code(Returns derivative of ``log`` of cumulative distribution 
function of \
+standard normal, computed element-wise on the input array.
+
+.. math::
+   y = pdf_normal(x) / cdf_normal(x)
+
+The standard normal distribution has mean 0, variance 1. ``pdf_normal`` 
denotes its PDF, \
+``cdf_normal`` its CDF. The expression is also the derivative of 
``logcdf_normal(x)``.
+
+The storage type of ``derivlogcdf_normal`` output is always dense.
+
+)code")
+.set_attr<nnvm::FGradient>("FGradient", 
ElemwiseGradUseIn{"_backward_derivlogcdf_normal"});
+
+MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_derivlogcdf_normal,
+                                                  
unary_bwd<mshadow_op::derivlogcdf_normal_grad>);
+
 }  // namespace op
 }  // namespace mxnet
diff --git a/src/operator/tensor/elemwise_unary_op_basic.cu 
b/src/operator/tensor/elemwise_unary_op_basic.cu
index 8dfa9af74ce..906a085d44d 100644
--- a/src/operator/tensor/elemwise_unary_op_basic.cu
+++ b/src/operator/tensor/elemwise_unary_op_basic.cu
@@ -246,5 +246,21 @@ NNVM_REGISTER_OP(_backward_gammaln)
 .set_attr<FCompute>("FCompute<gpu>", ElemwiseBinaryOp::Compute<
   gpu, unary_bwd<mshadow_op::gammaln_grad> >);
 
+// logcdf_normal
+NNVM_REGISTER_OP(logcdf_normal)
+.set_attr<FCompute>("FCompute<gpu>", UnaryOp::Compute<gpu, 
mshadow_op::logcdf_normal>);
+
+NNVM_REGISTER_OP(_backward_logcdf_normal)
+.set_attr<FCompute>("FCompute<gpu>", ElemwiseBinaryOp::Compute<
+  gpu, unary_bwd<mshadow_op::logcdf_normal_grad> >);
+
+// derivlogcdf_normal
+NNVM_REGISTER_OP(derivlogcdf_normal)
+.set_attr<FCompute>("FCompute<gpu>", UnaryOp::Compute<gpu, 
mshadow_op::derivlogcdf_normal>);
+
+NNVM_REGISTER_OP(_backward_derivlogcdf_normal)
+.set_attr<FCompute>("FCompute<gpu>", ElemwiseBinaryOp::Compute<
+  gpu, unary_bwd<mshadow_op::derivlogcdf_normal_grad> >);
+
 }  // namespace op
 }  // namespace mxnet
diff --git a/tests/python/unittest/test_operator.py 
b/tests/python/unittest/test_operator.py
index 7889e084f74..01c551437aa 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -4325,7 +4325,6 @@ def test_laop_2():
     # Tests for linalg.syrk
     mnalpha_lst = [(2, 3, 1.), (5, 3, -2.), (1, 6, 5.), (3, 3, 0.5), (4, 1, 
10.), (1, 1, 1.)]
     for m, n, alpha in mnalpha_lst:
-        #print('syrk: m={}, n={}, alpha={}'.format(m, n, alpha))
         data_in1 = np.random.uniform(1, 10, (m, n))
         res_syrk1 = alpha * np.dot(data_in1, data_in1.T)
         test_syrk1 = mx.sym.linalg.syrk(data1, transpose=False, alpha=alpha)
@@ -4359,7 +4358,6 @@ def test_laop_2():
     test_gelqf_l = _gelqf_second_output(data1)  # Output L (Q is not dangling)
     mn_lst = [(4, 4), (1, 1), (5, 20), (1, 10), (15, 50)]
     for m, n in mn_lst:
-        #print('gelqf: m={}, n={}'.format(m, n))
         data_in1 = np.random.normal(0., 10., (m, n))
         res_eye = np.eye(m)
         res_a = data_in1
@@ -4463,7 +4461,6 @@ def test_laop_3():
     test_syevd_l_4 = _syevd_second_output(data1_s4)
     n_lst = [4, 1, 2, 10, 14]
     for n in n_lst:
-        #print('\n** syevd: n={}'.format(n))
         data_in1 = np.random.normal(0., 10., (n, n))
         data_in1 = 0.5 * (data_in1 + data_in1.T)
         res_eye = np.eye(n)
@@ -4514,10 +4511,8 @@ def test_laop_4():
     l_np = np.array([0., 5.])
     test_syevd = mx.sym.linalg.syevd(data1)
     # float64
-    #print('float64')
     check_fw(test_syevd, [a_np], [u_np, l_np], np.float64)
     # float32
-    #print('float32')
     check_fw(test_syevd, [a_np], [u_np, l_np], np.float32)
 
 
@@ -4676,6 +4671,7 @@ def check(data, idx):
             idx = mx.nd.array([[0, 0, 0, 0]], dtype='int32')
             assert (mx.nd._internal._backward_gather_nd(data, idx, 
shape=(1,)).asscalar() == data.asnumpy().sum())
 
+
 def compare_forw_backw_unary_op(
         name, forward_mxnet_call, forward_numpy_call,
         backward_numpy_call, shape, input_low, input_high, rtol, atol,
@@ -4716,6 +4712,54 @@ def finite_diff_unary_op(
         name=op_name)
     check_grad(op_ex, [data_np])
 
+# Here, we compare forward and backward results for unary ops called with
+# different dtype.
+def compare_forw_backw_unary_op_dtypes(
+        name, forward_mxnet_call, shape, input_low, input_high, rtols, atols,
+        dtype_ref=np.float64, dtypes_cmp=[np.float32, np.float16]):
+    ctx = default_context()
+    data_np = np.random.uniform(input_low, input_high, shape)
+    out_grad = np.random.uniform(-2.0, 2.0, shape)
+    # Compute results for dtype_ref
+    op_name = 'unary_op={}, dtype={}'.format(name, dtype_ref)
+    data_name = op_name + '_data'
+    data_ref = mx.symbol.Variable(data_name, dtype=dtype_ref)
+    sym_ref = mx.sym.broadcast_add(
+        forward_mxnet_call(data_ref), mx.sym.zeros_like(data_ref),
+        name=op_name)
+    args_ref = {
+        data_name: mx.nd.array(data_np.astype(dtype_ref), ctx=ctx, 
dtype=dtype_ref)
+    }
+    args_grad_ref = {
+        data_name: mx.nd.empty(shape, ctx=ctx, dtype=dtype_ref)
+    }
+    ex_ref = sym_ref.bind(
+        ctx=ctx, grad_req='write', args=args_ref, args_grad=args_grad_ref)
+    ex_ref.forward(is_train=True)
+    res_forw_ref = [x.asnumpy() for x in ex_ref.outputs]
+    ex_ref.backward(mx.nd.array(
+        out_grad.astype(dtype_ref), ctx=ctx, dtype=dtype_ref))
+    res_grad_ref = [x.asnumpy() for x in args_grad_ref.values()]
+    # Loop over dtypes_cmp
+    for ind in range(len(dtypes_cmp)):
+        dtype = dtypes_cmp[ind]
+        _data_np = data_np.astype(dtype)
+        _out_grad = out_grad.astype(dtype)
+        # Compare forward
+        _res_forw_ref = [x.astype(dtype) for x in res_forw_ref]
+        op_name = 'unary_op={}, dtype={}'.format(name, dtype)
+        data = mx.symbol.Variable(op_name + '_data', dtype=dtype)
+        sym = mx.sym.broadcast_add(
+            forward_mxnet_call(data), mx.sym.zeros_like(data), name=op_name)
+        check_symbolic_forward(
+            sym, [_data_np], _res_forw_ref, rtol=rtols[ind], atol=atols[ind],
+            dtype=dtype)
+        # Compare backward
+        _res_grad_ref = [x.astype(dtype) for x in res_grad_ref]
+        check_symbolic_backward(
+            sym, [_data_np], [_out_grad], _res_grad_ref, rtol=rtols[ind],
+            atol=atols[ind], dtype=dtype)
+
 def np_smooth_l1(x, sigma):
     issq = 1. / sigma / sigma
     absx = np.abs(x)
@@ -4726,25 +4770,40 @@ def np_smooth_l1_grad(x, sigma):
     ssq = sigma * sigma
     return np.where(np.abs(x) < 1. / ssq, x * ssq, np.sign(x))
 
+# This needs scipy
+def np_derivlogcdf_normal(x):
+    from scipy.stats import norm
+    temp = np.square(x) + np.log(2.0 * np.pi)
+    return np.exp(-0.5 * temp - norm.logcdf(x))
+
+# This needs scipy
+def np_derivlogcdf_normal_grad(x):
+    y = np_derivlogcdf_normal(x)
+    return -y * (x + y)
+
 # Tests for unary operators (basic mathematical functions):
-# - Forward: Comparison to NumPy (several dtype)
-# - Backward: Comparison to NumPy (several dtype)
-# - Finite difference tests (only dtype = float64)
+# - Forward, backward: Comparison to NumPy (dtype = float64)
+# - Forward, backward: Comparison float64 against {float32, float16}
+# - Finite difference tests (dtype = float64)
 # Seed set because the test is not robust enough to operate on random data
 @with_seed(192837465)
 def test_unary_math_operators():
     have_scipy = True
     try:
         from scipy import special as scipy_special
+        from scipy import stats as scipy_stats
     except:
         print("Could not import scipy. Skipping unit tests for special 
functions")
         have_scipy = False
     shape=(9, 10)
-    dtype_l = [np.float64, np.float32, np.float16]
-    rtol_l = [1e-7, 1e-6, 1e-2]
-    rtol_less_l = [1e-6, 1e-5, 1e-2]
-    atol_l = [1e-7, 1e-6, 1e-2]
-    atol_less_l = [1e-6, 1e-5, 1e-2]
+    dtype_ref = np.float64
+    dtypes_cmp = [np.float32, np.float16]
+    rtol_np = 1e-7
+    atol_np = 1e-7
+    rtol_np_less = 1e-6
+    atol_np_less = 1e-6
+    rtol_l = [1e-5, 1e-2]
+    atol_l = [1e-5, 1e-2]
     rtol_fd = 1e-5
     atol_fd = 1e-6
     num_eps = 1e-6
@@ -4871,20 +4930,31 @@ def test_unary_math_operators():
                                 lambda x: scipy_special.gammaln(x),
                                 lambda x: scipy_special.psi(x),
                                 0.01, 20.0]
+        unary_ops['logcdf_normal'] = [lambda x: mx.sym.logcdf_normal(x),
+                                      lambda x: scipy_stats.norm.logcdf(x),
+                                      lambda x: np_derivlogcdf_normal(x),
+                                      -10.0, 5.0]
+        unary_ops['derivlogcdf_normal'] = [lambda x: 
mx.sym.derivlogcdf_normal(x),
+                                           lambda x: np_derivlogcdf_normal(x),
+                                           lambda x: 
np_derivlogcdf_normal_grad(x),
+                                           -10.0, 5.0]
     # Loop over operators
     for name, op in unary_ops.items():
-        # Loop over dtype's
-        for ind in range(len(dtype_l)):
-            dtype = dtype_l[ind]
-            if name == 'gammaln' or name == 'gamma':
-                rtol = rtol_less_l[ind]
-                atol = atol_less_l[ind]
-            else:
-                rtol = rtol_l[ind]
-                atol = atol_l[ind]
-            compare_forw_backw_unary_op(
-                name, op[0], op[1], op[2], shape, op[3], op[4], rtol, atol,
-                dtype)
+        # Compare to NumPy (float64)
+        if name == 'gammaln' or name == 'gamma' or name == 'logcdf_normal' \
+                or name == 'derivlogcdf_normal':
+            rtol = rtol_np_less
+            atol = atol_np_less
+        else:
+            rtol = rtol_np
+            atol = atol_np
+        compare_forw_backw_unary_op(
+            name, op[0], op[1], op[2], shape, op[3], op[4], rtol, atol,
+            dtype=np.float64)
+        # Compare float64 (reference) to other dtypes
+        compare_forw_backw_unary_op_dtypes(
+            name, op[0], shape, op[3], op[4], rtol_l, atol_l, dtype_ref,
+            dtypes_cmp)
         # Finite difference testing
         finite_diff_unary_op(
             name, op[0], shape, op[3], op[4], rtol_fd, atol_fd, num_eps)
@@ -4934,13 +5004,110 @@ def finite_diff_binary_op(
         name=op_name)
     check_grad(op_ex, [data1_np, data2_np])
 
-# Tests for unary operators (basic mathematical functions):
-# - Forward: Comparison to NumPy (several dtype)
-# - Backward: Comparison to NumPy (several dtype)
-# - Finite difference tests (only dtype = float64)
+# Here, we compare forward and backward results for binary ops called with
+# different dtype.
+def compare_forw_backw_binary_op_dtypes(
+        name, forward_mxnet_call, shape, input1_low, input1_high, input2_low,
+        input2_high, rtols, atols, dtype_ref=np.float64,
+        dtypes_cmp=[np.float32, np.float16]):
+    ctx = default_context()
+    data1_np = np.random.uniform(input1_low, input1_high, shape)
+    data2_np = np.random.uniform(input2_low, input2_high, shape)
+    out_grad = np.random.uniform(-2.0, 2.0, shape)
+    # Compute results for dtype_ref
+    op_name = 'binary_op={}, dtype={}'.format(name, dtype_ref)
+    data1_name = op_name + '_data1'
+    data1_ref = mx.symbol.Variable(data1_name, dtype=dtype_ref)
+    data2_name = op_name + '_data2'
+    data2_ref = mx.symbol.Variable(data2_name, dtype=dtype_ref)
+    sym_ref = mx.sym.broadcast_add(
+        forward_mxnet_call(data1_ref, data2_ref), mx.sym.zeros_like(data1_ref),
+        name=op_name)
+    args_ref = {
+        data1_name: mx.nd.array(data1_np.astype(dtype_ref), ctx=ctx, 
dtype=dtype_ref),
+        data2_name: mx.nd.array(data2_np.astype(dtype_ref), ctx=ctx, 
dtype=dtype_ref)
+    }
+    args_grad_ref = {
+        data1_name: mx.nd.empty(shape, ctx=ctx, dtype=dtype_ref),
+        data2_name: mx.nd.empty(shape, ctx=ctx, dtype=dtype_ref)
+    }
+    ex_ref = sym_ref.bind(
+        ctx=ctx, grad_req='write', args=args_ref, args_grad=args_grad_ref)
+    ex_ref.forward(is_train=True)
+    res_forw_ref = [x.asnumpy() for x in ex_ref.outputs]
+    ex_ref.backward(mx.nd.array(
+        out_grad.astype(dtype_ref), ctx=ctx, dtype=dtype_ref))
+    res_grad_ref = [args_grad_ref[data1_name].asnumpy(),
+                    args_grad_ref[data2_name].asnumpy()]
+    # Loop over dtypes_cmp
+    for ind in range(len(dtypes_cmp)):
+        dtype = dtypes_cmp[ind]
+        _data1_np = data1_np.astype(dtype)
+        _data2_np = data2_np.astype(dtype)
+        _out_grad = out_grad.astype(dtype)
+        # Compare forward
+        _res_forw_ref = [x.astype(dtype) for x in res_forw_ref]
+        op_name = 'binary_op={}, dtype={}'.format(name, dtype)
+        data1 = mx.symbol.Variable(op_name + '_data1', dtype=dtype)
+        data2 = mx.symbol.Variable(op_name + '_data2', dtype=dtype)
+        sym = mx.sym.broadcast_add(
+            forward_mxnet_call(data1, data2), mx.sym.zeros_like(data1),
+            name=op_name)
+        check_symbolic_forward(
+            sym, [_data1_np, _data2_np], _res_forw_ref, rtol=rtols[ind],
+            atol=atols[ind], dtype=dtype)
+        # Compare backward
+        _res_grad_ref = [x.astype(dtype) for x in res_grad_ref]
+        check_symbolic_backward(
+            sym, [_data1_np, _data2_np], [_out_grad], _res_grad_ref,
+            rtol=rtols[ind], atol=atols[ind], dtype=dtype)
+
+# Tests for binary operators (basic mathematical functions):
+# - Forward, backward: Comparison to NumPy (dtype = float64)
+# - Forward, backward: Comparison float64 against {float32, float16}
+# - Finite difference tests (dtype = float64)
 # Seed set because the test is not robust enough to operate on random data
 @with_seed(192837465)
 def test_binary_math_operators():
+    shape=(9, 10)
+    dtype_ref = np.float64
+    dtypes_cmp = [np.float32, np.float16]
+    rtol_np = 1e-7
+    atol_np = 1e-7
+    rtol_l = [1e-5, 1e-2]
+    atol_l = [1e-5, 1e-2]
+    rtol_fd = 1e-5
+    atol_fd = 1e-6
+    num_eps = 1e-6
+    binary_ops = {
+        'hypot' : [lambda x, y: mx.sym.hypot(x, y),
+                   lambda x, y: np.hypot(x, y),
+                   lambda x, y: x / np.hypot(x, y),
+                   lambda x, y: y / np.hypot(x, y),
+                    -5.0, 5.0, -5.0, 5.0],
+        'pow': [lambda x, y: mx.sym.pow(x, y),
+                lambda x, y: np.power(x, y),
+                lambda x, y: np.power(x, y - 1.) * y,
+                lambda x, y: np.power(x, y) * np.log(x),
+                0.2, 5.0, -4.0, 4.0]
+    }
+    # Loop over operators
+    for name, op in binary_ops.items():
+        # Compare to NumPy (float64)
+        compare_forw_backw_binary_op(
+            name, op[0], op[1], op[2], op[3], shape, op[4], op[5], op[6],
+            op[7], rtol_np, atol_np, dtype=np.float64)
+        # Compare float64 (reference) to other dtypes
+        compare_forw_backw_binary_op_dtypes(
+            name, op[0], shape, op[4], op[5], op[6], op[7], rtol_l, atol_l,
+            dtype_ref, dtypes_cmp)
+        # Finite difference testing
+        finite_diff_binary_op(
+            name, op[0], shape, op[4], op[5], op[6], op[7], rtol_fd, atol_fd,
+            num_eps)
+
+@with_seed(192837465)
+def test_binary_math_operators_2():
     shape=(9, 10)
     dtype_l = [np.float64, np.float32, np.float16]
     rtol_l = [1e-7, 1e-6, 1e-2]
diff --git a/tests/python/unittest/test_sparse_operator.py 
b/tests/python/unittest/test_sparse_operator.py
index 4e5d3546280..c9ab6ad0222 100644
--- a/tests/python/unittest/test_sparse_operator.py
+++ b/tests/python/unittest/test_sparse_operator.py
@@ -746,6 +746,17 @@ def check_binary_op_with_scalar(stype,
                                        force_overlap=force_overlap,
                                        verbose=False)
 
+    # This needs scipy
+    def np_derivlogcdf_normal(x):
+        from scipy.stats import norm
+        temp = np.square(x) + np.log(2.0 * np.pi)
+        return np.exp(-0.5 * temp - norm.logcdf(x))
+
+    # This needs scipy
+    def np_derivlogcdf_normal_grad(x):
+        y = np_derivlogcdf_normal(x)
+        return -y * (x + y)
+
     # Check many basic unary operators
     def check_mathematical_core(stype, output_grad_stype=None,
                                 input_grad_stype=None, force_overlap=False,
@@ -1035,6 +1046,7 @@ def check_mathematical_core(stype, output_grad_stype=None,
 
             try:
                 from scipy import special as scipy_special
+                from scipy import stats as scipy_stats
                 import_succeeded = True
                 # gamma
                 check_sparse_mathematical_core("gamma", stype,
@@ -1054,6 +1066,24 @@ def check_mathematical_core(stype, 
output_grad_stype=None,
                                                
input_grad_stype=input_grad_stype,
                                                force_overlap=force_overlap,
                                                density=density, 
ograd_density=ograd_density)
+                # logcdf_normal
+                check_sparse_mathematical_core("logcdf_normal", stype,
+                                               lambda x: 
mx.sym.logcdf_normal(x),
+                                               lambda x: 
scipy_stats.norm.logcdf(x),
+                                               lambda x: 
np_derivlogcdf_normal(x),
+                                               
output_grad_stype=output_grad_stype,
+                                               
input_grad_stype=input_grad_stype,
+                                               force_overlap=force_overlap,
+                                               density=density, 
ograd_density=ograd_density)
+                # derivlogcdf_normal
+                check_sparse_mathematical_core("derivlogcdf_normal", stype,
+                                               lambda x: 
mx.sym.derivlogcdf_normal(x),
+                                               lambda x: 
np_derivlogcdf_normal(x),
+                                               lambda x: 
np_derivlogcdf_normal_grad(x),
+                                               
output_grad_stype=output_grad_stype,
+                                               
input_grad_stype=input_grad_stype,
+                                               force_overlap=force_overlap,
+                                               density=density, 
ograd_density=ograd_density)
 
             except:
                 if import_succeeded == False:


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to