This is an automated email from the ASF dual-hosted git repository.

haibin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new b4ce4e7  improve layernorm CPU performance (#15313)
b4ce4e7 is described below

commit b4ce4e714a59f9f34d3db751e102609a34a9ce4e
Author: pengxin99 <[email protected]>
AuthorDate: Sat Jun 22 07:30:33 2019 +0800

    improve layernorm CPU performance (#15313)
---
 src/operator/mkl_functions-inl.h | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/operator/mkl_functions-inl.h b/src/operator/mkl_functions-inl.h
index 6080347..ae23b1e 100644
--- a/src/operator/mkl_functions-inl.h
+++ b/src/operator/mkl_functions-inl.h
@@ -137,23 +137,24 @@ MSHADOW_XINLINE static void LayerNormLastDim(index_t m,
   for (index_t i = 0; i < m; i++) {
     DType* in_offset = a + i * n;
     DType* out_offset = b + i * n;
+    DType x_sum = 0.0f;
+    DType x_square_sum = 0.0f;
 
-    sum_(n, in_offset, &(mean[i]));
-    mean[i] /= n;
-    var[i] = 0.0f;
 #if !defined(_MSC_VER)
 #pragma omp simd
 #endif
     for (index_t j = 0; j < n; j++) {
-      out_offset[j] = in_offset[j] - mean[i];
-      var[i] += out_offset[j] * out_offset[j];
+      x_sum += in_offset[j];
+      x_square_sum += in_offset[j] * in_offset[j];
     }
-    var[i] = math::sqrt(var[i] / n + eps);
+    mean[i] = x_sum / n;
+    var[i] = math::sqrt(x_square_sum / n - mean[i] * mean[i] + eps);
+
 #if !defined(_MSC_VER)
 #pragma omp simd
 #endif
     for (index_t j = 0; j < n; j++) {
-      out_offset[j] = out_offset[j] * gamma[j] / var[i] + beta[j];
+      out_offset[j] = (in_offset[j] - mean[i]) * gamma[j] / var[i] + beta[j];
     }
   }
 }

Reply via email to