cjolivier01 closed pull request #9002: Misc performance changes
URL: https://github.com/apache/incubator-mxnet/pull/9002
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/python/mxnet/io.py b/python/mxnet/io.py
index 25a95be787..295c8db146 100644
--- a/python/mxnet/io.py
+++ b/python/mxnet/io.py
@@ -34,7 +34,7 @@
 from .base import mx_real_t
 from .base import check_call, build_param_doc as _build_param_doc
 from .ndarray import NDArray
-from .ndarray.sparse import CSRNDArray
+from .ndarray.sparse import CSRNDArray, RowSparseNDArray
 from .ndarray.sparse import array as sparse_array
 from .ndarray import _ndarray_cls
 from .ndarray import array
@@ -537,8 +537,10 @@ def _shuffle(data, idx):
             shuffle_data.append((k, v))
         elif isinstance(v, CSRNDArray):
             shuffle_data.append((k, sparse_array(v.asscipy()[idx], v.context)))
-        else:
+        elif isinstance(v, RowSparseNDArray):
             shuffle_data.append((k, array(v.asnumpy()[idx], v.context)))
+        else:
+            shuffle_data.append((k, array(v[idx], v.context)))
 
     return shuffle_data
 
diff --git a/src/operator/random/sampler.h b/src/operator/random/sampler.h
index d544aec88d..d0030b9163 100644
--- a/src/operator/random/sampler.h
+++ b/src/operator/random/sampler.h
@@ -25,6 +25,8 @@
 #ifndef MXNET_OPERATOR_RANDOM_SAMPLER_H_
 #define MXNET_OPERATOR_RANDOM_SAMPLER_H_
 
+#include "../../engine/openmp.h"
+
 #ifdef __CUDACC__
 #include <curand.h>
 #include <curand_kernel.h>
@@ -89,8 +91,8 @@ class RandGenerator<gpu, double> {
 template<typename xpu>
 MSHADOW_XINLINE index_t OptSampleSeedNum(index_t N);
 template<>
-MSHADOW_XINLINE index_t OptSampleSeedNum<cpu>(index_t N) {
-  return omp_get_num_threads();
+MSHADOW_CINLINE index_t OptSampleSeedNum<cpu>(index_t N) {
+  return engine::OpenMP::Get()->GetRecommendedOMPThreadCount();
 }
 template<>
 MSHADOW_XINLINE index_t OptSampleSeedNum<gpu>(index_t N) {
diff --git a/src/operator/tensor/init_op.h b/src/operator/tensor/init_op.h
index 3f5014d8ca..9218a9cd18 100644
--- a/src/operator/tensor/init_op.h
+++ b/src/operator/tensor/init_op.h
@@ -209,7 +209,7 @@ void Fill(mshadow::Stream<xpu> *s, const TBlob& b, const 
OpReqType req, ValueTyp
     const size_t size = b.Size();
     if (val == 0) {
       if (req != kAddTo) {
-        if (b.dev_mask() == cpu::kDevMask) {
+        if (b.dev_mask() == cpu::kDevMask && size < 50000) {
           MSHADOW_TYPE_SWITCH(b.type_flag_, DType, {
             memset(b.dptr_, 0, size * sizeof(DType));
           });
diff --git a/src/storage/storage.cc b/src/storage/storage.cc
index ce40daa904..dd2bdfe625 100644
--- a/src/storage/storage.cc
+++ b/src/storage/storage.cc
@@ -47,7 +47,6 @@ class StorageImpl : public Storage {
 
  private:
   static constexpr size_t kMaxNumberOfDevices = Context::kMaxDevType + 1;
-  static constexpr size_t kMaxNumberOfDeviceIDs = Context::kMaxDevID + 1;
 #if MXNET_USE_CUDA
   static int num_gpu_device;
 #endif  // MXNET_USE_CUDA
diff --git a/tests/cpp/misc/memory_test.cc b/tests/cpp/misc/memory_test.cc
index 8f4e8c25e8..7c83e461bd 100644
--- a/tests/cpp/misc/memory_test.cc
+++ b/tests/cpp/misc/memory_test.cc
@@ -64,40 +64,40 @@ TEST(MEMORY_TEST, MemsetAndMemcopyPerformance) {
     std::cout << "====================================" << std::endl
               << "Data size: " << test::pretty_num(test_size) << std::endl << 
std::flush;
 
-    std::unique_ptr<uint8_t> buffer_1(new uint8_t[test_size]), buffer_2(new 
uint8_t[test_size]);
-    uint8_t *src = buffer_1.get(), *dest = buffer_2.get();
+    std::unique_ptr<float> buffer_1(new float[test_size]), buffer_2(new 
float[test_size]);
+    float *src = buffer_1.get(), *dest = buffer_2.get();
 
     for (size_t x = 0; x < 5; ++x) {
       // Init memory with different values
-      memset(src, 3, test_size);
-      memset(dest, 255, test_size);  // wipe out some/all of src cache
+      memset(src, 3, test_size * sizeof(float));
+      memset(dest, 255, test_size * sizeof(float));  // wipe out some/all of 
src cache
 
       // memset
-      uint64_t start = test::perf::getNannoTickCount();
-      memset(src, 123, test_size);
-      const uint64_t memset_time = test::perf::getNannoTickCount() - start;
+      uint64_t start = mxnet::test::perf::getNannoTickCount();
+      memset(src, 0, test_size * sizeof(float));
+      const uint64_t memset_time = mxnet::test::perf::getNannoTickCount() - 
start;
 
-      start = test::perf::getNannoTickCount();
+      start = mxnet::test::perf::getNannoTickCount();
       #pragma omp parallel for num_threads(GetOMPThreadCount())
       for (int i = 0; i < static_cast<int>(test_size); ++i) {
-        src[i] = 42;
+        src[i] = 42.0f;
       }
-      const uint64_t omp_set_time = test::perf::getNannoTickCount() - start;
+      const uint64_t omp_set_time = mxnet::test::perf::getNannoTickCount() - 
start;
 
-      start = test::perf::getNannoTickCount();
-      memcpy(dest, src, test_size);
-      const uint64_t memcpy_time = test::perf::getNannoTickCount() - start;
+      start = mxnet::test::perf::getNannoTickCount();
+      memcpy(dest, src, test_size * sizeof(float));
+      const uint64_t memcpy_time = mxnet::test::perf::getNannoTickCount() - 
start;
 
       // bounce the cache and dirty logic
-      memset(src, 6, test_size);
-      memset(dest, 200, test_size);
+      memset(src, 6, test_size * sizeof(float));
+      memset(dest, 200, test_size * sizeof(float));
 
-      start = test::perf::getNannoTickCount();
+      start = mxnet::test::perf::getNannoTickCount();
       #pragma omp parallel for num_threads(GetOMPThreadCount())
       for (int i = 0; i < static_cast<int>(test_size); ++i) {
         dest[i] = src[i];
       }
-      const uint64_t omp_copy_time = test::perf::getNannoTickCount() - start;
+      const uint64_t omp_copy_time = mxnet::test::perf::getNannoTickCount() - 
start;
 
       memset_times.push_back(memset_time);
       omp_set_times.push_back(omp_set_time);


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to