This is an automated email from the ASF dual-hosted git repository.
haibin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new b4ce4e7 improve layernorm CPU performance (#15313)
b4ce4e7 is described below
commit b4ce4e714a59f9f34d3db751e102609a34a9ce4e
Author: pengxin99 <[email protected]>
AuthorDate: Sat Jun 22 07:30:33 2019 +0800
improve layernorm CPU performance (#15313)
---
src/operator/mkl_functions-inl.h | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/src/operator/mkl_functions-inl.h b/src/operator/mkl_functions-inl.h
index 6080347..ae23b1e 100644
--- a/src/operator/mkl_functions-inl.h
+++ b/src/operator/mkl_functions-inl.h
@@ -137,23 +137,24 @@ MSHADOW_XINLINE static void LayerNormLastDim(index_t m,
for (index_t i = 0; i < m; i++) {
DType* in_offset = a + i * n;
DType* out_offset = b + i * n;
+ DType x_sum = 0.0f;
+ DType x_square_sum = 0.0f;
- sum_(n, in_offset, &(mean[i]));
- mean[i] /= n;
- var[i] = 0.0f;
#if !defined(_MSC_VER)
#pragma omp simd
#endif
for (index_t j = 0; j < n; j++) {
- out_offset[j] = in_offset[j] - mean[i];
- var[i] += out_offset[j] * out_offset[j];
+ x_sum += in_offset[j];
+ x_square_sum += in_offset[j] * in_offset[j];
}
- var[i] = math::sqrt(var[i] / n + eps);
+ mean[i] = x_sum / n;
+ var[i] = math::sqrt(x_square_sum / n - mean[i] * mean[i] + eps);
+
#if !defined(_MSC_VER)
#pragma omp simd
#endif
for (index_t j = 0; j < n; j++) {
- out_offset[j] = out_offset[j] * gamma[j] / var[i] + beta[j];
+ out_offset[j] = (in_offset[j] - mean[i]) * gamma[j] / var[i] + beta[j];
}
}
}