This is an automated email from the ASF dual-hosted git repository.
haibin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 0f63659 add a compiler flag to use int64 as tensor size (#14570)
0f63659 is described below
commit 0f63659be5070af218095a6a460427d2a1b67aba
Author: Lin Yuan <[email protected]>
AuthorDate: Tue Apr 23 14:47:10 2019 -0700
add a compiler flag to use int64 as tensor size (#14570)
* use a compile flag to use int64 tensor size
* use personal mshadow repo
* update data type
* update make config
* change size_t to index_t and add documentation
* update mshadow submodule to master
* fix compilation warning
* fix compiler warning
* fix compiler warning
* fix compiler warning
* fix compiler warning
* fix compiler error
* change nnvm::Tuple to mxnet::Tuple
* fix compiler warning
* fix compiler warning
* fix compiler warning
* fix compiler warning
* fix compiler warning
* fix lint
* update CI runtime_functons
* update runtime function
* correct runtime_functions
* udpate runtime functions
* add nightly test for large tensor
* update Jenkins files to test new compiler flag
* fix CI
* add runtime feature detect for the compiler flag
* change build from make to cmake
* fix CI
* move tests to nightly
---
3rdparty/mshadow | 2 +-
CMakeLists.txt | 8 ++++
Makefile | 5 +++
ci/docker/runtime_functions.sh | 54 +++++++++++++++++++++++++++
ci/jenkins/Jenkins_steps.groovy | 28 ++++++++++++++
ci/jenkins/Jenkinsfile_unix_cpu | 3 +-
ci/jenkins/Jenkinsfile_unix_gpu | 1 +
include/mxnet/libinfo.h | 6 ++-
include/mxnet/tensor_blob.h | 13 ++++---
include/mxnet/tuple.h | 16 ++++----
make/config.mk | 6 +++
make/crosscompile.jetson.mk | 6 +++
make/osx.mk | 6 +++
src/common/serialization.h | 12 +++---
src/imperative/cached_op.h | 8 ++--
src/io/image_det_aug_default.cc | 4 +-
src/io/image_io.cc | 2 +-
src/io/iter_batchloader.h | 2 +-
src/io/iter_sparse_batchloader.h | 2 +-
src/libinfo.cc | 3 ++
src/operator/contrib/dgl_graph.cc | 4 +-
src/operator/contrib/multi_proposal-inl.h | 16 ++++----
src/operator/contrib/multibox_detection-inl.h | 2 +-
src/operator/contrib/multibox_detection.cc | 2 +-
src/operator/contrib/multibox_detection.cu | 2 +-
src/operator/contrib/multibox_prior-inl.h | 8 ++--
src/operator/contrib/multibox_target-inl.h | 2 +-
src/operator/contrib/multibox_target.cc | 2 +-
src/operator/contrib/multibox_target.cu | 2 +-
src/operator/contrib/proposal-inl.h | 16 ++++----
src/operator/convolution_v1-inl.h | 2 +-
src/operator/image/image_random-inl.h | 14 +++----
src/operator/nn/deconvolution-inl.h | 2 +-
src/operator/optimizer_op-inl.h | 8 ++--
src/operator/swapaxis-inl.h | 4 +-
src/operator/tensor/indexing_op.h | 4 +-
src/operator/tensor/matrix_op-inl.h | 28 +++++++-------
src/operator/tensor/sparse_retain-inl.h | 2 +-
src/operator/tensor/square_sum-inl.h | 9 +++--
tests/nightly/JenkinsfileForBinaries | 35 +++++++++++++++++
tests/nightly/test_large_array.py | 28 ++++++++++++--
41 files changed, 282 insertions(+), 97 deletions(-)
diff --git a/3rdparty/mshadow b/3rdparty/mshadow
index 95ebe0f..6e94643 160000
--- a/3rdparty/mshadow
+++ b/3rdparty/mshadow
@@ -1 +1 @@
-Subproject commit 95ebe0f109ae021d0d66e2a627ccfc55c3253b55
+Subproject commit 6e94643bdf1d51a505b147f28c358fb71070b8fd
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9cd68e1..09a52be 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -50,6 +50,7 @@ mxnet_option(USE_SIGNAL_HANDLER "Print stack traces on
segfaults." OFF)
mxnet_option(USE_TENSORRT "Enable infeference optimization with
TensorRT." OFF)
mxnet_option(USE_ASAN "Enable Clang/GCC ASAN sanitizers." OFF)
mxnet_option(ENABLE_TESTCOVERAGE "Enable compilation with test coverage
metric output" OFF)
+mxnet_option(USE_INT64_TENSOR_SIZE "Use int64_t to represent the total number
of elements in a tensor" OFF)
message(STATUS "CMAKE_CROSSCOMPILING ${CMAKE_CROSSCOMPILING}")
message(STATUS "CMAKE_HOST_SYSTEM_PROCESSOR ${CMAKE_HOST_SYSTEM_PROCESSOR}")
@@ -295,6 +296,13 @@ else()
add_definitions(-DMXNET_USE_NCCL=0)
endif()
+if (USE_INT64_TENSOR_SIZE)
+ message(STATUS "Using 64-bit integer for tensor size")
+ add_definitions(-DMSHADOW_INT64_TENSOR_SIZE=1)
+else()
+ add_definitions(-DMSHADOW_INT64_TENSOR_SIZE=0)
+endif()
+
include(cmake/ChooseBlas.cmake)
if(USE_CUDA AND FIRST_CUDA)
include(3rdparty/mshadow/cmake/Utils.cmake)
diff --git a/Makefile b/Makefile
index 53998ac..29cfd57 100644
--- a/Makefile
+++ b/Makefile
@@ -189,6 +189,11 @@ ifeq ($(USE_OPERATOR_TUNING), 1)
CFLAGS += -DMXNET_USE_OPERATOR_TUNING=1
endif
+ifeq ($(USE_INT64_TENSOR_SIZE), 1)
+ CFLAGS += -DMSHADOW_INT64_TENSOR_SIZE=1
+else
+ CFLAGS += -DMSHADOW_INT64_TENSOR_SIZE=0
+endif
# verify existence of separate lapack library when using blas/openblas/atlas
# switch off lapack support in case it can't be found
# issue covered with this
diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index ac8033c..c3610d2 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -755,6 +755,53 @@ build_ubuntu_gpu_cmake() {
ninja -v
}
+build_ubuntu_cpu_large_tensor() {
+ set -ex
+ cd /work/build
+ build_ccache_wrappers
+ cmake \
+ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
+ -DUSE_SIGNAL_HANDLER=ON \
+ -DENABLE_TESTCOVERAGE=ON \
+ -DUSE_CUDA=OFF \
+ -DUSE_CUDNN=OFF \
+ -DUSE_MKLDNN=OFF \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DUSE_INT64_TENSOR_SIZE=ON \
+ -G Ninja \
+ /work/mxnet
+
+ ninja -v
+}
+
+build_ubuntu_gpu_large_tensor() {
+ set -ex
+ cd /work/build
+ build_ccache_wrappers
+ cmake \
+ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
+ -DUSE_SIGNAL_HANDLER=ON \
+ -DENABLE_TESTCOVERAGE=ON \
+ -DUSE_CUDA=ON \
+ -DUSE_CUDNN=ON \
+ -DUSE_MKL_IF_AVAILABLE=OFF \
+ -DUSE_MKLML_MKL=OFF \
+ -DUSE_MKLDNN=OFF \
+ -DUSE_DIST_KVSTORE=ON \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DCUDA_ARCH_NAME=Manual \
+ -DCUDA_ARCH_BIN=$CI_CMAKE_CUDA_ARCH_BIN \
+ -DUSE_INT64_TENSOR_SIZE=ON \
+ -G Ninja \
+ /work/mxnet
+
+ ninja -v
+}
+
build_ubuntu_blc() {
echo "pass"
}
@@ -1183,6 +1230,13 @@ nightly_test_KVStore_singleNode() {
python tests/nightly/test_kvstore.py
}
+#Test Large Tensor Size
+nightly_test_large_tensor() {
+ set -ex
+ export PYTHONPATH=./python/
+ nosetests-3.4 tests/nightly/test_large_array.py
+}
+
#Tests Amalgamation Build with 5 different sets of flags
nightly_test_amalgamation() {
set -ex
diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index ac1579a..23230ac 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -119,6 +119,34 @@ def compile_unix_openblas_debug_cpu() {
}]
}
+def compile_unix_int64_cpu() {
+ return ['CPU: USE_INT64_TENSOR_SIZE': {
+ node(NODE_LINUX_CPU) {
+ ws('workspace/build-cpu-int64') {
+ timeout(time: max_time, unit: 'MINUTES') {
+ utils.init_git()
+ utils.docker_run('ubuntu_cpu', 'build_ubuntu_cpu_large_tensor',
false)
+ utils.pack_lib('ubuntu_cpu_int64', mx_cmake_lib, true)
+ }
+ }
+ }
+ }]
+}
+
+def compile_unix_int64_gpu() {
+ return ['GPU: USE_INT64_TENSOR_SIZE': {
+ node(NODE_LINUX_GPU) {
+ ws('workspace/build-gpu-int64') {
+ timeout(time: max_time, unit: 'MINUTES') {
+ utils.init_git()
+ utils.docker_run('ubuntu_gpu', 'build_ubuntu_gpu_large_tensor',
false)
+ utils.pack_lib('ubuntu_gpu_int64', mx_cmake_lib, true)
+ }
+ }
+ }
+ }]
+}
+
def compile_unix_mkl_cpu() {
return ['CPU: MKL': {
node(NODE_LINUX_CPU) {
diff --git a/ci/jenkins/Jenkinsfile_unix_cpu b/ci/jenkins/Jenkinsfile_unix_cpu
index 919381e..fa09429 100644
--- a/ci/jenkins/Jenkinsfile_unix_cpu
+++ b/ci/jenkins/Jenkinsfile_unix_cpu
@@ -38,7 +38,8 @@ core_logic: {
custom_steps.compile_unix_openblas_debug_cpu(),
custom_steps.compile_unix_mkl_cpu(),
custom_steps.compile_unix_mkldnn_cpu(),
- custom_steps.compile_unix_mkldnn_mkl_cpu()
+ custom_steps.compile_unix_mkldnn_mkl_cpu(),
+ custom_steps.compile_unix_int64_cpu()
])
utils.parallel_stage('Tests', [
diff --git a/ci/jenkins/Jenkinsfile_unix_gpu b/ci/jenkins/Jenkinsfile_unix_gpu
index 6018e18..e2a089b 100644
--- a/ci/jenkins/Jenkinsfile_unix_gpu
+++ b/ci/jenkins/Jenkinsfile_unix_gpu
@@ -40,6 +40,7 @@ core_logic: {
custom_steps.compile_unix_cmake_mkldnn_gpu(),
custom_steps.compile_unix_cmake_gpu(),
custom_steps.compile_unix_tensorrt_gpu(),
+ custom_steps.compile_unix_int64_gpu()
])
utils.parallel_stage('Tests', [
diff --git a/include/mxnet/libinfo.h b/include/mxnet/libinfo.h
index f35d41a..8b58a39 100644
--- a/include/mxnet/libinfo.h
+++ b/include/mxnet/libinfo.h
@@ -123,7 +123,9 @@
#define MXNET_USE_SIGNAL_HANDLER 0
#endif
-
+#ifndef MXNET_USE_INT64_TENSOR_SIZE
+#define MXNET_USE_INT64_TENSOR_SIZE MSHADOW_INT64_TENSOR_SIZE
+#endif
namespace mxnet {
namespace features {
@@ -177,6 +179,8 @@ enum : unsigned {
PROFILER,
DIST_KVSTORE,
CXX14,
+ INT64_TENSOR_SIZE,
+
// Signal handler to print stack traces on exceptions
SIGNAL_HANDLER,
DEBUG,
diff --git a/include/mxnet/tensor_blob.h b/include/mxnet/tensor_blob.h
index a7a5726..a08dab1 100755
--- a/include/mxnet/tensor_blob.h
+++ b/include/mxnet/tensor_blob.h
@@ -218,15 +218,16 @@ class TBlob {
return shape_.ndim();
}
/*!
- * \brief return size of i-th dimension, start counting from highest
dimension
+ * \brief return size of i-th dimension, start counting from highest
dimension.
+ * return type needs to be a signed integer.
* \param idx the dimension count from the highest dimensin
- * \return the size
+ * \return the size. -1 means unknown size to support zero-size tensor.
*/
inline index_t size(index_t idx) const {
return shape_[idx];
}
/*! \brief total number of elements in the tensor */
- inline index_t Size(void) const {
+ inline size_t Size(void) const {
return shape_.Size();
}
/*! \brief get pointer in dtype */
@@ -443,7 +444,7 @@ class FieldEntry<mxnet::TShape>
throw dmlc::ParamError(os.str());
}
if (enforce_nonzero_) {
- for (mxnet::index_t i = 0; i < v.ndim(); ++i) {
+ for (int i = 0; i < v.ndim(); ++i) {
if (v[i] == 0U) {
std::ostringstream os;
os << "value " << v << "for Parameter " << this->key_
@@ -457,7 +458,7 @@ class FieldEntry<mxnet::TShape>
this->enforce_nonzero_ = true;
return this->self();
}
- inline FieldEntry<mxnet::TShape> &set_expect_ndim(mxnet::index_t ndim) {
+ inline FieldEntry<mxnet::TShape> &set_expect_ndim(int ndim) {
expect_ndim_ = ndim;
return this->self();
}
@@ -466,7 +467,7 @@ class FieldEntry<mxnet::TShape>
// whether all the entries need to be nonzero
bool enforce_nonzero_;
// expected number of dimension, default = 0 means no restriction.
- mxnet::index_t expect_ndim_;
+ int expect_ndim_;
};
} // namespace parameter
diff --git a/include/mxnet/tuple.h b/include/mxnet/tuple.h
index 8431bbb..bc630f1 100644
--- a/include/mxnet/tuple.h
+++ b/include/mxnet/tuple.h
@@ -569,7 +569,7 @@ class TShape : public Tuple<dim_t> {
* \param axis_end The ending axis specified.
* \return the flat 3d shape
*/
- inline mshadow::Shape<3> FlatTo3D(size_t axis_begin, size_t axis_end) const {
+ inline mshadow::Shape<3> FlatTo3D(int axis_begin, int axis_end) const {
CHECK(axis_end >= axis_begin);
mshadow::Shape<3> s;
CHECK(ndim_is_known(ndim())) << "shape must have a valid ndim";
@@ -579,10 +579,10 @@ class TShape : public Tuple<dim_t> {
s.shape_[1] = 1;
s.shape_[2] = 1;
- for (size_t i = 0; i < axis_begin; ++i) {
+ for (int i = 0; i < axis_begin; ++i) {
s.shape_[0] *= d[i];
}
- for (size_t i = axis_begin; i <= axis_end; ++i) {
+ for (int i = axis_begin; i <= axis_end; ++i) {
s.shape_[1] *= d[i];
}
for (int i = axis_end + 1; i < ndim(); ++i) {
@@ -595,7 +595,7 @@ class TShape : public Tuple<dim_t> {
* \param axis The axis specified.
* \return the flat 3d shape
*/
- inline mshadow::Shape<3> FlatTo3D(size_t axis) const {
+ inline mshadow::Shape<3> FlatTo3D(int axis) const {
return FlatTo3D(axis, axis);
}
inline bool operator==(const TShape &s) const {
@@ -712,8 +712,8 @@ template<typename T>
struct hash<mxnet::Tuple<T> > {
/*! \brief hash a Tuple into unsigned int */
size_t operator()(const mxnet::Tuple<T>& val) const {
- std::hash<uint32_t> hash_uint;
- size_t res = hash_uint(val.ndim());
+ std::hash<int> hash_int;
+ size_t res = hash_int(val.ndim());
for (int i = 0; i < val.ndim(); ++i) {
res = dmlc::HashCombine(res, val[i]);
}
@@ -726,8 +726,8 @@ template<>
struct hash<mxnet::TShape> {
/*! \brief hash a TShape into unsigned int */
size_t operator()(const mxnet::TShape& val) const {
- std::hash<uint32_t> hash_uint;
- size_t res = hash_uint(val.ndim());
+ std::hash<int> hash_int;
+ size_t res = hash_int(val.ndim());
for (int i = 0; i < val.ndim(); ++i) {
res = dmlc::HashCombine(res, val[i]);
}
diff --git a/make/config.mk b/make/config.mk
index d4431a9..2083467 100644
--- a/make/config.mk
+++ b/make/config.mk
@@ -215,6 +215,12 @@ EXTRA_OPERATORS =
# Create C++ interface package
USE_CPP_PACKAGE = 0
+# Use int64_t type to represent the total number of elements in a tensor
+# This will cause performance degradation reported in issue #14496
+# Set to 1 for large tensor with tensor size greater than INT32_MAX i.e.
2147483647
+# Note: the size of each dimension is still bounded by INT32_MAX
+USE_INT64_TENSOR_SIZE = 0
+
#----------------------------
# plugins
#----------------------------
diff --git a/make/crosscompile.jetson.mk b/make/crosscompile.jetson.mk
index f0c89d6..880e2cf 100644
--- a/make/crosscompile.jetson.mk
+++ b/make/crosscompile.jetson.mk
@@ -192,6 +192,12 @@ EXTRA_OPERATORS =
# Create C++ interface package
USE_CPP_PACKAGE = 0
+# Use int64_t type to represent the total number of elements in the tensor
+# This will cause performance degradation reported in issue #14496
+# Set to 1 for large tensor with tensor size greater than INT32_MAX i.e.
2147483647
+# Note: the size of each dimension is still bounded by INT32_MAX
+USE_INT64_TENSOR_SIZE = 0
+
#----------------------------
# plugins
#----------------------------
diff --git a/make/osx.mk b/make/osx.mk
index 7e32d81..0b5842e 100644
--- a/make/osx.mk
+++ b/make/osx.mk
@@ -135,6 +135,12 @@ EXTRA_OPERATORS =
# Create C++ interface package
USE_CPP_PACKAGE = 0
+# Use int64_t type to represent the total number of elements in a tensor
+# This will cause performance degradation reported in issue #14496
+# Set to 1 for large tensor with tensor size greater than INT32_MAX i.e.
2147483647
+# Note: the size of each dimension is still bounded by INT32_MAX
+USE_INT64_TENSOR_SIZE = 0
+
#----------------------------
# plugins
#----------------------------
diff --git a/src/common/serialization.h b/src/common/serialization.h
index 8192ee2..c22d8bc 100644
--- a/src/common/serialization.h
+++ b/src/common/serialization.h
@@ -49,7 +49,7 @@ template<typename T>
inline size_t SerializedSize(const T &obj);
template<typename T>
-inline size_t SerializedSize(const nnvm::Tuple <T> &obj);
+inline size_t SerializedSize(const mxnet::Tuple <T> &obj);
template<typename K, typename V>
inline size_t SerializedSize(const std::map <K, V> &obj);
@@ -64,7 +64,7 @@ template<typename T>
inline void Serialize(const T &obj, char **buffer);
template<typename T>
-inline void Serialize(const nnvm::Tuple <T> &obj, char **buffer);
+inline void Serialize(const mxnet::Tuple <T> &obj, char **buffer);
template<typename K, typename V>
inline void Serialize(const std::map <K, V> &obj, char **buffer);
@@ -79,7 +79,7 @@ template<typename T>
inline void Deserialize(T *obj, const std::string &buffer, size_t *curr_pos);
template<typename T>
-inline void Deserialize(nnvm::Tuple <T> *obj, const std::string &buffer,
size_t *curr_pos);
+inline void Deserialize(mxnet::Tuple <T> *obj, const std::string &buffer,
size_t *curr_pos);
template<typename K, typename V>
inline void Deserialize(std::map <K, V> *obj, const std::string &buffer,
size_t *curr_pos);
@@ -102,7 +102,7 @@ inline size_t SerializedSize(const T &obj) {
}
template<typename T>
-inline size_t SerializedSize(const nnvm::Tuple <T> &obj) {
+inline size_t SerializedSize(const mxnet::Tuple <T> &obj) {
if (is_container<T>::value) {
size_t sum_val = 4;
for (const auto& el : obj) {
@@ -180,7 +180,7 @@ inline void Serialize(const T &obj, char **buffer) {
}
template<typename T>
-inline void Serialize(const nnvm::Tuple <T> &obj, char **buffer) {
+inline void Serialize(const mxnet::Tuple <T> &obj, char **buffer) {
uint32_t size = obj.ndim();
std::memcpy(*buffer, &size, 4);
*buffer += 4;
@@ -244,7 +244,7 @@ inline void Deserialize(T *obj, const std::string &buffer,
size_t *curr_pos) {
}
template<typename T>
-inline void Deserialize(nnvm::Tuple <T> *obj, const std::string &buffer,
size_t *curr_pos) {
+inline void Deserialize(mxnet::Tuple <T> *obj, const std::string &buffer,
size_t *curr_pos) {
uint32_t size = obj->ndim();
std::memcpy(&size, &buffer[*curr_pos], 4);
*curr_pos += 4;
diff --git a/src/imperative/cached_op.h b/src/imperative/cached_op.h
index b3192dc..14b373e 100644
--- a/src/imperative/cached_op.h
+++ b/src/imperative/cached_op.h
@@ -36,8 +36,8 @@ struct CachedOpConfig : public
dmlc::Parameter<CachedOpConfig> {
bool static_alloc;
bool static_shape;
bool is_dynamic;
- nnvm::Tuple<uint32_t> data_indices;
- nnvm::Tuple<uint32_t> param_indices;
+ mxnet::Tuple<uint32_t> data_indices;
+ mxnet::Tuple<uint32_t> param_indices;
std::string subgraph;
DMLC_DECLARE_PARAMETER(CachedOpConfig) {
DMLC_DECLARE_FIELD(static_alloc)
@@ -59,10 +59,10 @@ struct CachedOpConfig : public
dmlc::Parameter<CachedOpConfig> {
.set_default(Imperative::BulkExecMaxNodeTrainBwd())
.describe("Segment size of bulk execution during backward pass.");
DMLC_DECLARE_FIELD(data_indices)
- .set_default(nnvm::Tuple<uint32_t>())
+ .set_default(mxnet::Tuple<uint32_t>())
.describe("Position of argument variables.");
DMLC_DECLARE_FIELD(param_indices)
- .set_default(nnvm::Tuple<uint32_t>())
+ .set_default(mxnet::Tuple<uint32_t>())
.describe("Position of parameters.");
DMLC_DECLARE_FIELD(subgraph)
.set_default(std::string(""))
diff --git a/src/io/image_det_aug_default.cc b/src/io/image_det_aug_default.cc
index 74e51b5..3bd3720 100644
--- a/src/io/image_det_aug_default.cc
+++ b/src/io/image_det_aug_default.cc
@@ -34,7 +34,7 @@
namespace mxnet {
namespace io {
-using nnvm::Tuple;
+using mxnet::Tuple;
namespace image_det_aug_default_enum {
enum ImageDetAugDefaultCropEmitMode {kCenter, kOverlap};
@@ -462,7 +462,7 @@ class DefaultImageDetAugmenter : public ImageAugmenter {
/*! \brief Check number of crop samplers and given parameters */
template<typename DType>
- void ValidateCropParameters(nnvm::Tuple<DType> *param, const int
num_sampler) {
+ void ValidateCropParameters(mxnet::Tuple<DType> *param, const int
num_sampler) {
if (num_sampler == 1) {
CHECK_EQ(param->ndim(), 1);
} else if (num_sampler > 1) {
diff --git a/src/io/image_io.cc b/src/io/image_io.cc
index 965078c..c035799 100644
--- a/src/io/image_io.cc
+++ b/src/io/image_io.cc
@@ -295,7 +295,7 @@ struct MakeBorderParam : public
dmlc::Parameter<MakeBorderParam> {
int top, bot, left, right;
int type;
double value;
- nnvm::Tuple<double> values;
+ mxnet::Tuple<double> values;
DMLC_DECLARE_PARAMETER(MakeBorderParam) {
DMLC_DECLARE_FIELD(top)
.describe("Top margin.");
diff --git a/src/io/iter_batchloader.h b/src/io/iter_batchloader.h
index 69eb05f..279690b 100644
--- a/src/io/iter_batchloader.h
+++ b/src/io/iter_batchloader.h
@@ -78,7 +78,7 @@ class BatchLoader : public IIterator<TBlobBatch> {
// if overflow from previous round, directly return false, until before
first is called
if (num_overflow_ != 0) return false;
- index_t top = 0;
+ size_t top = 0;
while (base_->Next()) {
const DataInst& d = base_->Value();
diff --git a/src/io/iter_sparse_batchloader.h b/src/io/iter_sparse_batchloader.h
index 17c509a..c0d856d 100644
--- a/src/io/iter_sparse_batchloader.h
+++ b/src/io/iter_sparse_batchloader.h
@@ -67,7 +67,7 @@ class SparseBatchLoader : public BatchLoader, public
SparseIIterator<TBlobBatch>
this->head_ = 0;
// if overflown from previous round, directly return false, until before
first is called
if (num_overflow_ != 0) return false;
- index_t top = 0;
+ size_t top = 0;
offsets_.clear();
while (sparse_base_->Next()) {
const DataInst& inst = sparse_base_->Value();
diff --git a/src/libinfo.cc b/src/libinfo.cc
index 2af61ea..f67b45e 100644
--- a/src/libinfo.cc
+++ b/src/libinfo.cc
@@ -86,7 +86,9 @@ class FeatureSet {
// Misc
feature_bits.set(CAFFE, MXNET_USE_CAFFE);
feature_bits.set(DIST_KVSTORE, MXNET_USE_DIST_KVSTORE);
+ feature_bits.set(INT64_TENSOR_SIZE, MXNET_USE_INT64_TENSOR_SIZE);
feature_bits.set(SIGNAL_HANDLER, MXNET_USE_SIGNAL_HANDLER);
+
#ifndef NDEBUG
feature_bits.set(DEBUG);
#endif
@@ -154,6 +156,7 @@ const std::vector<std::string> EnumNames::names = {
"PROFILER",
"DIST_KVSTORE",
"CXX14",
+ "INT64_TENSOR_SIZE",
"SIGNAL_HANDLER",
"DEBUG",
};
diff --git a/src/operator/contrib/dgl_graph.cc
b/src/operator/contrib/dgl_graph.cc
index 313b855..4288997 100644
--- a/src/operator/contrib/dgl_graph.cc
+++ b/src/operator/contrib/dgl_graph.cc
@@ -1251,7 +1251,7 @@ void EdgeIDForwardCsrImpl(const OpContext& ctx,
CHECK_EQ(req, kWriteTo) << "EdgeID with CSR only supports kWriteTo";
Stream<xpu> *s = ctx.get_stream<xpu>();
const NDArray& u = inputs[1];
- const nnvm::dim_t out_elems = u.shape().Size();
+ const dim_t out_elems = u.shape().Size();
if (!inputs[0].storage_initialized()) {
MSHADOW_TYPE_SWITCH(output.dtype(), DType, {
Kernel<mxnet_op::op_with_req<mshadow_op::identity, kWriteTo>,
xpu>::Launch(
@@ -1408,7 +1408,7 @@ Example:
struct SubgraphCompactParam : public dmlc::Parameter<SubgraphCompactParam> {
int num_args;
bool return_mapping;
- nnvm::Tuple<nnvm::dim_t> graph_sizes;
+ mxnet::Tuple<dim_t> graph_sizes;
DMLC_DECLARE_PARAMETER(SubgraphCompactParam) {
DMLC_DECLARE_FIELD(num_args).set_lower_bound(2)
.describe("Number of input arguments.");
diff --git a/src/operator/contrib/multi_proposal-inl.h
b/src/operator/contrib/multi_proposal-inl.h
index 4d278fb..7010dad 100644
--- a/src/operator/contrib/multi_proposal-inl.h
+++ b/src/operator/contrib/multi_proposal-inl.h
@@ -56,8 +56,8 @@ struct MultiProposalParam : public
dmlc::Parameter<MultiProposalParam> {
int rpn_post_nms_top_n;
float threshold;
int rpn_min_size;
- nnvm::Tuple<float> scales;
- nnvm::Tuple<float> ratios;
+ mxnet::Tuple<float> scales;
+ mxnet::Tuple<float> ratios;
int feature_stride;
bool output_score;
bool iou_loss;
@@ -73,10 +73,10 @@ struct MultiProposalParam : public
dmlc::Parameter<MultiProposalParam> {
DMLC_DECLARE_FIELD(rpn_min_size).set_default(16)
.describe("Minimum height or width in proposal");
tmp[0] = 4.0f; tmp[1] = 8.0f; tmp[2] = 16.0f; tmp[3] = 32.0f;
- DMLC_DECLARE_FIELD(scales).set_default(nnvm::Tuple<float>(tmp, tmp + 4))
+ DMLC_DECLARE_FIELD(scales).set_default(mxnet::Tuple<float>(tmp, tmp + 4))
.describe("Used to generate anchor windows by enumerating scales");
tmp[0] = 0.5f; tmp[1] = 1.0f; tmp[2] = 2.0f;
- DMLC_DECLARE_FIELD(ratios).set_default(nnvm::Tuple<float>(tmp, tmp + 3))
+ DMLC_DECLARE_FIELD(ratios).set_default(mxnet::Tuple<float>(tmp, tmp + 3))
.describe("Used to generate anchor windows by enumerating ratios");
DMLC_DECLARE_FIELD(feature_stride).set_default(16)
.describe("The size of the receptive field each unit in the convolution
layer of the rpn,"
@@ -214,11 +214,11 @@ inline void _Transform(float scale,
// out_anchors must have shape (n, 5), where n is ratios.size() * scales.size()
inline void GenerateAnchors(const std::vector<float>& base_anchor,
- const nnvm::Tuple<float>& ratios,
- const nnvm::Tuple<float>& scales,
+ const mxnet::Tuple<float>& ratios,
+ const mxnet::Tuple<float>& scales,
std::vector<float> *out_anchors) {
- for (size_t j = 0; j < ratios.ndim(); ++j) {
- for (size_t k = 0; k < scales.ndim(); ++k) {
+ for (int j = 0; j < ratios.ndim(); ++j) {
+ for (int k = 0; k < scales.ndim(); ++k) {
_Transform(scales[k], ratios[j], base_anchor, out_anchors);
}
}
diff --git a/src/operator/contrib/multibox_detection-inl.h
b/src/operator/contrib/multibox_detection-inl.h
index 1ac14e2..34ad447 100644
--- a/src/operator/contrib/multibox_detection-inl.h
+++ b/src/operator/contrib/multibox_detection-inl.h
@@ -52,7 +52,7 @@ struct MultiBoxDetectionParam : public
dmlc::Parameter<MultiBoxDetectionParam> {
bool force_suppress;
int keep_topk;
int nms_topk;
- nnvm::Tuple<float> variances;
+ mxnet::Tuple<float> variances;
DMLC_DECLARE_PARAMETER(MultiBoxDetectionParam) {
DMLC_DECLARE_FIELD(clip).set_default(true)
.describe("Clip out-of-boundary boxes.");
diff --git a/src/operator/contrib/multibox_detection.cc
b/src/operator/contrib/multibox_detection.cc
index 8d10829..65fe5f1 100644
--- a/src/operator/contrib/multibox_detection.cc
+++ b/src/operator/contrib/multibox_detection.cc
@@ -87,7 +87,7 @@ inline void MultiBoxDetectionForward(const Tensor<cpu, 3,
DType> &out,
const Tensor<cpu, 3, DType> &temp_space,
const float threshold,
const bool clip,
- const nnvm::Tuple<float> &variances,
+ const mxnet::Tuple<float> &variances,
const float nms_threshold,
const bool force_suppress,
const int nms_topk) {
diff --git a/src/operator/contrib/multibox_detection.cu
b/src/operator/contrib/multibox_detection.cu
index 98151f8..51b2aa7 100644
--- a/src/operator/contrib/multibox_detection.cu
+++ b/src/operator/contrib/multibox_detection.cu
@@ -213,7 +213,7 @@ inline void MultiBoxDetectionForward(const Tensor<gpu, 3,
DType> &out,
const Tensor<gpu, 3, DType> &temp_space,
const float threshold,
const bool clip,
- const nnvm::Tuple<float> &variances,
+ const mxnet::Tuple<float> &variances,
const float nms_threshold,
const bool force_suppress,
const int nms_topk) {
diff --git a/src/operator/contrib/multibox_prior-inl.h
b/src/operator/contrib/multibox_prior-inl.h
index d8929f3..bfc244f 100644
--- a/src/operator/contrib/multibox_prior-inl.h
+++ b/src/operator/contrib/multibox_prior-inl.h
@@ -57,11 +57,11 @@ enum MultiBoxPriorOpOutputs {kOut};
} // namespace mboxprior_enum
struct MultiBoxPriorParam : public dmlc::Parameter<MultiBoxPriorParam> {
- nnvm::Tuple<float> sizes;
- nnvm::Tuple<float> ratios;
+ mxnet::Tuple<float> sizes;
+ mxnet::Tuple<float> ratios;
bool clip;
- nnvm::Tuple<float> steps;
- nnvm::Tuple<float> offsets;
+ mxnet::Tuple<float> steps;
+ mxnet::Tuple<float> offsets;
DMLC_DECLARE_PARAMETER(MultiBoxPriorParam) {
DMLC_DECLARE_FIELD(sizes).set_default({1.0f})
.describe("List of sizes of generated MultiBoxPriores.");
diff --git a/src/operator/contrib/multibox_target-inl.h
b/src/operator/contrib/multibox_target-inl.h
index f7a9288..6034f13 100644
--- a/src/operator/contrib/multibox_target-inl.h
+++ b/src/operator/contrib/multibox_target-inl.h
@@ -62,7 +62,7 @@ struct MultiBoxTargetParam : public
dmlc::Parameter<MultiBoxTargetParam> {
float negative_mining_ratio;
float negative_mining_thresh;
int minimum_negative_samples;
- nnvm::Tuple<float> variances;
+ mxnet::Tuple<float> variances;
DMLC_DECLARE_PARAMETER(MultiBoxTargetParam) {
DMLC_DECLARE_FIELD(overlap_threshold).set_default(0.5f)
.describe("Anchor-GT overlap threshold to be regarded as a positive
match.");
diff --git a/src/operator/contrib/multibox_target.cc
b/src/operator/contrib/multibox_target.cc
index a1f2aac..a1808c5 100644
--- a/src/operator/contrib/multibox_target.cc
+++ b/src/operator/contrib/multibox_target.cc
@@ -81,7 +81,7 @@ inline void MultiBoxTargetForward(const Tensor<cpu, 2, DType>
&loc_target,
const float negative_mining_ratio,
const float negative_mining_thresh,
const int minimum_negative_samples,
- const nnvm::Tuple<float> &variances) {
+ const mxnet::Tuple<float> &variances) {
const DType *p_anchor = anchors.dptr_;
const int num_batches = labels.size(0);
const int num_labels = labels.size(1);
diff --git a/src/operator/contrib/multibox_target.cu
b/src/operator/contrib/multibox_target.cu
index ca04283..a44c08b 100644
--- a/src/operator/contrib/multibox_target.cu
+++ b/src/operator/contrib/multibox_target.cu
@@ -349,7 +349,7 @@ inline void MultiBoxTargetForward(const Tensor<gpu, 2,
DType> &loc_target,
const float negative_mining_ratio,
const float negative_mining_thresh,
const int minimum_negative_samples,
- const nnvm::Tuple<float> &variances) {
+ const mxnet::Tuple<float> &variances) {
const int num_batches = labels.size(0);
const int num_labels = labels.size(1);
const int label_width = labels.size(2);
diff --git a/src/operator/contrib/proposal-inl.h
b/src/operator/contrib/proposal-inl.h
index 21e9fe1..10f1f86 100644
--- a/src/operator/contrib/proposal-inl.h
+++ b/src/operator/contrib/proposal-inl.h
@@ -54,8 +54,8 @@ struct ProposalParam : public dmlc::Parameter<ProposalParam> {
int rpn_post_nms_top_n;
float threshold;
int rpn_min_size;
- nnvm::Tuple<float> scales;
- nnvm::Tuple<float> ratios;
+ mxnet::Tuple<float> scales;
+ mxnet::Tuple<float> ratios;
int feature_stride;
bool output_score;
bool iou_loss;
@@ -71,10 +71,10 @@ struct ProposalParam : public
dmlc::Parameter<ProposalParam> {
DMLC_DECLARE_FIELD(rpn_min_size).set_default(16)
.describe("Minimum height or width in proposal");
tmp[0] = 4.0f; tmp[1] = 8.0f; tmp[2] = 16.0f; tmp[3] = 32.0f;
- DMLC_DECLARE_FIELD(scales).set_default(nnvm::Tuple<float>(tmp, tmp + 4))
+ DMLC_DECLARE_FIELD(scales).set_default(mxnet::Tuple<float>(tmp, tmp + 4))
.describe("Used to generate anchor windows by enumerating scales");
tmp[0] = 0.5f; tmp[1] = 1.0f; tmp[2] = 2.0f;
- DMLC_DECLARE_FIELD(ratios).set_default(nnvm::Tuple<float>(tmp, tmp + 3))
+ DMLC_DECLARE_FIELD(ratios).set_default(mxnet::Tuple<float>(tmp, tmp + 3))
.describe("Used to generate anchor windows by enumerating ratios");
DMLC_DECLARE_FIELD(feature_stride).set_default(16)
.describe("The size of the receptive field each unit in the convolution
layer of the rpn,"
@@ -212,11 +212,11 @@ inline void _Transform(float scale,
// out_anchors must have shape (n, 5), where n is ratios.size() * scales.size()
inline void GenerateAnchors(const std::vector<float>& base_anchor,
- const nnvm::Tuple<float>& ratios,
- const nnvm::Tuple<float>& scales,
+ const mxnet::Tuple<float>& ratios,
+ const mxnet::Tuple<float>& scales,
std::vector<float> *out_anchors) {
- for (size_t j = 0; j < ratios.ndim(); ++j) {
- for (size_t k = 0; k < scales.ndim(); ++k) {
+ for (int j = 0; j < ratios.ndim(); ++j) {
+ for (int k = 0; k < scales.ndim(); ++k) {
_Transform(scales[k], ratios[j], base_anchor, out_anchors);
}
}
diff --git a/src/operator/convolution_v1-inl.h
b/src/operator/convolution_v1-inl.h
index 080c718..d2126bd 100644
--- a/src/operator/convolution_v1-inl.h
+++ b/src/operator/convolution_v1-inl.h
@@ -336,7 +336,7 @@ class ConvolutionV1Op : public Operator {
// param_.workspace is in elements of sizeof(DType)
// if param_.workspace is set to zero the nstep_ equals ishape[0] (batch)
nstep_ = std::max<index_t>(
- std::min(static_cast<index_t>(param_.workspace) /
+ std::min<index_t>(param_.workspace /
(shape_colunit_.Size() + shape_dstunit_.Size()), ishape[0]),
1);
diff --git a/src/operator/image/image_random-inl.h
b/src/operator/image/image_random-inl.h
index 182cd68..aeb189f 100644
--- a/src/operator/image/image_random-inl.h
+++ b/src/operator/image/image_random-inl.h
@@ -215,16 +215,16 @@ void ToTensorOpForward(const nnvm::NodeAttrs &attrs,
}
struct NormalizeParam : public dmlc::Parameter<NormalizeParam> {
- nnvm::Tuple<float> mean;
- nnvm::Tuple<float> std;
+ mxnet::Tuple<float> mean;
+ mxnet::Tuple<float> std;
DMLC_DECLARE_PARAMETER(NormalizeParam) {
DMLC_DECLARE_FIELD(mean)
- .set_default(nnvm::Tuple<float> {0.0f, 0.0f, 0.0f, 0.0f})
+ .set_default(mxnet::Tuple<float> {0.0f, 0.0f, 0.0f, 0.0f})
.describe("Sequence of means for each channel. "
"Default value is 0.");
DMLC_DECLARE_FIELD(std)
- .set_default(nnvm::Tuple<float> {1.0f, 1.0f, 1.0f, 1.0f})
+ .set_default(mxnet::Tuple<float> {1.0f, 1.0f, 1.0f, 1.0f})
.describe("Sequence of standard deviations for each channel. "
"Default value is 1.");
}
@@ -245,7 +245,7 @@ inline bool NormalizeOpShape(const nnvm::NodeAttrs& attrs,
<< "Input tensor must have shape (channels, height, width), or "
<< "(N, channels, height, width), but got " << dshape;
- uint32_t nchannels;
+ int nchannels = 0;
if (dshape.ndim() == 3) {
nchannels = dshape[0];
CHECK(nchannels == 3 || nchannels == 1)
@@ -981,7 +981,7 @@ inline void RandomColorJitter(const nnvm::NodeAttrs &attrs,
}
struct AdjustLightingParam : public dmlc::Parameter<AdjustLightingParam> {
- nnvm::Tuple<float> alpha;
+ mxnet::Tuple<float> alpha;
DMLC_DECLARE_PARAMETER(AdjustLightingParam) {
DMLC_DECLARE_FIELD(alpha)
.describe("The lighting alphas for the R, G, B channels.");
@@ -997,7 +997,7 @@ struct RandomLightingParam : public
dmlc::Parameter<RandomLightingParam> {
}
};
-inline void AdjustLightingImpl(const nnvm::Tuple<float>& alpha,
+inline void AdjustLightingImpl(const mxnet::Tuple<float>& alpha,
const OpContext &ctx,
const std::vector<TBlob> &inputs,
const std::vector<OpReqType> &req,
diff --git a/src/operator/nn/deconvolution-inl.h
b/src/operator/nn/deconvolution-inl.h
index 1eeccb0..58f9be7 100644
--- a/src/operator/nn/deconvolution-inl.h
+++ b/src/operator/nn/deconvolution-inl.h
@@ -462,7 +462,7 @@ class DeconvolutionOp {
oshape[2] * oshape[3]);
// See convolution for workspace calculations. nstep_ will be the
effective batch size
nstep_ = std::max<index_t>(
- std::min(static_cast<index_t>(param_.workspace) /
+ std::min<index_t>(param_.workspace /
(shape_colunit_.Size() + shape_dstunit_.Size()), ishape[0]),
1);
diff --git a/src/operator/optimizer_op-inl.h b/src/operator/optimizer_op-inl.h
index 49eb96b..bd923ae 100644
--- a/src/operator/optimizer_op-inl.h
+++ b/src/operator/optimizer_op-inl.h
@@ -83,8 +83,8 @@ struct SGDParam : public dmlc::Parameter<SGDParam> {
};
struct MultiSGDParam : public dmlc::Parameter<MultiSGDParam> {
- nnvm::Tuple<float> lrs;
- nnvm::Tuple<float> wds;
+ mxnet::Tuple<float> lrs;
+ mxnet::Tuple<float> wds;
float rescale_grad;
float clip_gradient;
int num_weights;
@@ -110,8 +110,8 @@ struct MultiSGDParam : public
dmlc::Parameter<MultiSGDParam> {
};
struct MultiSGDMomParam : public dmlc::Parameter<MultiSGDMomParam> {
- nnvm::Tuple<float> lrs;
- nnvm::Tuple<float> wds;
+ mxnet::Tuple<float> lrs;
+ mxnet::Tuple<float> wds;
float momentum;
float rescale_grad;
float clip_gradient;
diff --git a/src/operator/swapaxis-inl.h b/src/operator/swapaxis-inl.h
index 7335daa..b17a81f 100644
--- a/src/operator/swapaxis-inl.h
+++ b/src/operator/swapaxis-inl.h
@@ -106,8 +106,8 @@ class SwapAxisOp : public Operator {
const std::vector<OpReqType> &req) {
using namespace mshadow;
using namespace mshadow::expr;
- uint32_t dim1 = param_.dim1;
- uint32_t dim2 = param_.dim2;
+ int dim1 = param_.dim1;
+ int dim2 = param_.dim2;
TBlob data_in = in_data[swapaxisenum::kData];
TBlob data_out = out_data[swapaxisenum::kData];
diff --git a/src/operator/tensor/indexing_op.h
b/src/operator/tensor/indexing_op.h
index 6469aae..e8c5e88 100644
--- a/src/operator/tensor/indexing_op.h
+++ b/src/operator/tensor/indexing_op.h
@@ -1273,7 +1273,9 @@ inline bool GatherNDShape(const nnvm::NodeAttrs& attrs,
mxnet::TShape oshape(ishape.ndim() - 1 + dshape.ndim() - ishape[0], -1);
- for (int i = 0; i < ishape.ndim() - 1; ++i) oshape[i] = ishape[i+1];
+ for (int i = 0; i < ishape.ndim() - 1; ++i) {
+ oshape[i] = ishape[i+1];
+ }
for (int i = 0; i < dshape.ndim() - ishape[0]; ++i) {
oshape[ishape.ndim()-1+i] = dshape[ishape[0] + i];
}
diff --git a/src/operator/tensor/matrix_op-inl.h
b/src/operator/tensor/matrix_op-inl.h
index 0e7f662..e99741b 100644
--- a/src/operator/tensor/matrix_op-inl.h
+++ b/src/operator/tensor/matrix_op-inl.h
@@ -662,7 +662,7 @@ inline void GetIndexRange(const mxnet::TShape& dshape,
<< "step and begin must have the same length";
}
- for (index_t i = 0; i < param_begin.ndim(); ++i) {
+ for (int i = 0; i < param_begin.ndim(); ++i) {
index_t s = param_step.ndim() != 0U && param_step[i].has_value() ?
param_step[i].value() : 1;
CHECK_NE(s, 0) << "slice op step[" << i << "] cannot be 0";
@@ -736,11 +736,11 @@ inline bool SliceOpShape(const nnvm::NodeAttrs& attrs,
MXNET_NDIM_SWITCH(dshape.ndim(), ndim, {
common::StaticArray<index_t, ndim> begin, end, step;
GetIndexRange(dshape, param.begin, param.end, param.step, &begin, &end,
&step);
- for (index_t i = 0; i < param.begin.ndim(); ++i) {
+ for (int i = 0; i < param.begin.ndim(); ++i) {
const int b = begin[i], e = end[i], s = step[i];
SetSliceOpOutputDimSize(i, b, e, s, &oshape);
}
- });
+ })
SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape);
return shape_is_known(oshape);
@@ -953,7 +953,7 @@ inline bool SliceAssignOpShape(const nnvm::NodeAttrs& attrs,
const int b = begin[i], e = end[i], s = step[i];
SetSliceOpOutputDimSize(i, b, e, s, &vshape);
}
- });
+ })
SHAPE_ASSIGN_CHECK(*in_attrs, 1, vshape);
SHAPE_ASSIGN_CHECK(*out_attrs, 0, dshape);
return true;
@@ -1169,7 +1169,7 @@ inline bool SliceAxisShape(const nnvm::NodeAttrs& attrs,
}
mxnet::TShape shape(ishape.ndim(), -1);
for (int i = 0; i < ishape.ndim(); ++i) {
- if (static_cast<int>(i) == axis) {
+ if (i == axis) {
shape[i] = static_cast<index_t>(end - begin);
} else {
shape[i] = ishape[i];
@@ -1227,7 +1227,7 @@ void SliceAxisGrad_(const nnvm::NodeAttrs& attrs,
int axis;
index_t begin, end;
GetSliceAxisParams(param, outputs[0].shape_, &axis, &begin, &end);
- int ndim = static_cast<int>(outputs[0].shape_.ndim());
+ int ndim = outputs[0].shape_.ndim();
if (axis + 1 == ndim) {
MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
@@ -1293,12 +1293,12 @@ inline bool SliceLikeShape(const nnvm::NodeAttrs& attrs,
} else {
mxnet::TShape shape(ishape);
for (int i = 0; i < param.axes.ndim(); ++i) {
- int axis = static_cast<int>(param.axes[i]);
+ int axis = param.axes[i];
if (axis < 0) {
- axis += static_cast<int>(ishape.ndim());
+ axis += ishape.ndim();
}
CHECK_GE(axis, 0)
- << "Slice axis: " << static_cast<int>(param.axes[i]) << " too small";
+ << "Slice axis: " << param.axes[i] << " too small";
CHECK_GT(ishape.ndim(), axis)
<< "Slice axis: " << axis << " exceeds first input: " << ishape.ndim();
CHECK_GT(from_shape.ndim(), axis)
@@ -1330,15 +1330,15 @@ inline void SliceLikeInferRanges(const mxnet::TShape&
dshape,
}
} else {
for (int i = 0; i < axes.ndim(); ++i) {
- int axis = static_cast<int>(axes[i]);
+ int axis = axes[i];
if (axis < 0) {
- axis += static_cast<int>(dshape.ndim());
+ axis += dshape.ndim();
}
CHECK_GE(axis, 0)
- << "Slice axis: " << static_cast<int>(axes[i]) << " too small";
- CHECK_LT(axis, static_cast<int>(dshape.ndim()))
+ << "Slice axis: " << axes[i] << " too small";
+ CHECK_LT(axis, dshape.ndim())
<< "Slice axis: " << axis << " exceeds first input: " << dshape.ndim();
- CHECK_LT(axis, static_cast<int>(fshape.ndim()))
+ CHECK_LT(axis, fshape.ndim())
<< "Slice axis: " << axis << " exceeds first input: " << fshape.ndim();
pb[axis] = 0;
pe[axis] = fshape[axis];
diff --git a/src/operator/tensor/sparse_retain-inl.h
b/src/operator/tensor/sparse_retain-inl.h
index 951bf80..04860e6 100644
--- a/src/operator/tensor/sparse_retain-inl.h
+++ b/src/operator/tensor/sparse_retain-inl.h
@@ -290,7 +290,7 @@ void SparseRetainOpForwardRspImpl(mshadow::Stream<xpu> *s,
Kernel<set_zero, xpu>::Launch(s, output_data.Size(),
output_data.dptr<DType>());
MSHADOW_IDX_TYPE_SWITCH(output_idx.type_flag_, RType, { // row index data
type
MSHADOW_TYPE_SWITCH(idx_data.type_flag_, IType, { // index array data
type
- if (input_idx.Size() == input_nd.shape()[0]) { // input rsp is dense
+ if (input_idx.Size() == static_cast<size_t>(input_nd.shape()[0])) {
// input rsp is dense
using namespace mshadow;
// copy indices
Tensor<xpu, 1, RType> output_idx_tensor = output_idx.FlatTo1D<xpu,
RType>(s);
diff --git a/src/operator/tensor/square_sum-inl.h
b/src/operator/tensor/square_sum-inl.h
index 016b383..c2e3182 100644
--- a/src/operator/tensor/square_sum-inl.h
+++ b/src/operator/tensor/square_sum-inl.h
@@ -434,14 +434,16 @@ void SquareSumRspGradImpl(const nnvm::NodeAttrs& attrs,
" when ograd_stype = kRowSparseStorage";
CHECK_EQ(ograd.shape().ndim(), 2U);
const TBlob ograd_row_idx = ograd.aux_data(rowsparse::kIdx);
- CHECK(ograd_row_idx.Size() == in_row_idx.Size() || in_row_idx.Size() ==
in_data.shape_[0]);
+ CHECK(ograd_row_idx.Size() == in_row_idx.Size() ||
+ in_row_idx.Size() == static_cast<size_t>(in_data.shape_[0]));
igrad->CheckAndAlloc({ograd.aux_shape(rowsparse::kIdx)});
const TBlob& igrad_data = igrad->data();
const TBlob igrad_row_idx = igrad->aux_data(rowsparse::kIdx);
MSHADOW_IDX_TYPE_SWITCH(igrad_row_idx.type_flag_, IType, {
// when ograd_row_idx and in_row_idx have the same size and input is not
a full rsp
// ograd_row_idx and in_row_idx are expected to have the same elements
- if (in_row_idx.Size() != input.shape()[0]) { // if input data is not a
full rsp
+ if (in_row_idx.Size() != static_cast<size_t>(input.shape()[0])) {
+ // if input data is not a full rsp
CHECK_EQ(ograd_row_idx.Size(), in_row_idx.Size()) <<
"SquareSumRspGradImpl only supports"
" equal
ograd_row_idx and"
" input_row_idx
when ograd and"
@@ -452,7 +454,8 @@ void SquareSumRspGradImpl(const nnvm::NodeAttrs& attrs,
}
MSHADOW_TYPE_SWITCH(igrad_data.type_flag_, DType, {
MXNET_ASSIGN_REQ_SWITCH(req, req_type, {
- if (in_row_idx.Size() != input.shape()[0]) { // input data is not a
full rsp
+ if (in_row_idx.Size() != static_cast<size_t>(input.shape()[0])) {
+ // input data is not a full rsp
Kernel<SquareSumRspGradKernel<req_type, 1, kRowSparseStorage,
false>, xpu>::Launch(
s, igrad_data.Size(), igrad_row_idx.dptr<IType>(),
igrad_data.dptr<DType>(), ograd_row_idx.dptr<IType>(),
diff --git a/tests/nightly/JenkinsfileForBinaries
b/tests/nightly/JenkinsfileForBinaries
index 53e1c30..13bb50e 100755
--- a/tests/nightly/JenkinsfileForBinaries
+++ b/tests/nightly/JenkinsfileForBinaries
@@ -19,6 +19,7 @@
//This is a Jenkinsfile for nightly tests. The format and some functions have
been picked up from the top-level Jenkinsfile
mx_lib = 'lib/libmxnet.so, lib/libmxnet.a, 3rdparty/dmlc-core/libdmlc.a,
3rdparty/tvm/nnvm/lib/libnnvm.a'
+mx_cmake_lib = 'build/libmxnet.so, build/libmxnet.a,
build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests,
build/3rdparty/openmp/runtime/src/libomp.so'
node('utility') {
// Loading the utilities requires a node context unfortunately
@@ -39,6 +40,24 @@ core_logic: {
utils.pack_lib('gpu', mx_lib)
}
}
+ },
+ 'CPU: USE_INT64_TENSOR_SIZE': {
+ node(NODE_LINUX_CPU) {
+ ws('workspace/build-cpu-int64') {
+ utils.init_git()
+ utils.docker_run('ubuntu_nightly_cpu',
'build_ubuntu_cpu_large_tensor', false)
+ utils.pack_lib('ubuntu_cpu_int64', mx_cmake_lib, true)
+ }
+ }
+ },
+ 'GPU: USE_INT64_TENSOR_SIZE': {
+ node(NODE_LINUX_GPU) {
+ ws('workspace/build-gpu-int64') {
+ utils.init_git()
+ utils.docker_run('ubuntu_nightly_gpu',
'build_ubuntu_gpu_large_tensor', true)
+ utils.pack_lib('ubuntu_gpu_int64', mx_cmake_lib, true)
+ }
+ }
}
}
@@ -59,6 +78,22 @@ core_logic: {
}
}
},
+ 'Test Large Tensor Size: CPU': {
+ node(NODE_LINUX_CPU) {
+ ws('workspace/large_tensor-cpu') {
+ utils.unpack_and_init('cpu_int64', mx_cmake_lib)
+ utils.docker_run('ubuntu_nightly_cpu',
'nightly_test_large_tensor', false)
+ }
+ }
+ },
+ 'Test Large Tensor Size: GPU': {
+ node(NODE_LINUX_GPU) {
+ ws('workspace/large_tensor-gpu') {
+ utils.unpack_and_init('gpu_int64', mx_cmake_lib)
+ utils.docker_run('ubuntu_nightly_gpu',
'nightly_test_large_tensor', true)
+ }
+ }
+ },
'StraightDope: Python2 Single-GPU': {
node(NODE_LINUX_GPU_P3) {
ws('workspace/straight_dope-single_gpu') {
diff --git a/tests/nightly/test_large_array.py
b/tests/nightly/test_large_array.py
index a627467..1b7dad4 100644
--- a/tests/nightly/test_large_array.py
+++ b/tests/nightly/test_large_array.py
@@ -27,6 +27,7 @@ LARGE_Y = 50000000
SMALL_Y = 50
LARGE_SIZE = LARGE_X * SMALL_Y
+
def test_gluon_embedding():
m = gluon.nn.Embedding(SMALL_Y, MEDIUM_X)
m.initialize()
@@ -35,22 +36,26 @@ def test_gluon_embedding():
assert b.shape == (MEDIUM_X, SMALL_Y, MEDIUM_X)
assert b.asnumpy().size == LARGE_SIZE
+
def test_ndarray_zeros():
a = nd.zeros(shape=(LARGE_X, SMALL_Y))
assert a[-1][0] == 0
assert a.shape == (LARGE_X, SMALL_Y)
assert a.size == LARGE_SIZE
+
def test_ndarray_ones():
a = nd.ones(shape=(LARGE_X, SMALL_Y))
assert a[-1][0] == 1
assert nd.sum(a).asnumpy() == LARGE_SIZE
+
@with_seed()
def test_ndarray_random_uniform():
a = nd.random.uniform(shape=(LARGE_X, SMALL_Y))
assert a[-1][0] != 0
+
@with_seed()
def test_ndarray_random_randint():
a = nd.random.randint(100, 10000, shape=(LARGE_X, SMALL_Y))
@@ -59,14 +64,16 @@ def test_ndarray_random_randint():
low_large_value = 2**32
high_large_value = 2**34
a = nd.random.randint(low_large_value,high_large_value)
- low = mx.nd.array([low_large_value],dtype='int64')
- high = mx.nd.array([high_large_value],dtype='int64')
+ low = mx.nd.array([low_large_value], dtype='int64')
+ high = mx.nd.array([high_large_value], dtype='int64')
assert a.__gt__(low) & a.__lt__(high)
+
def test_ndarray_empty():
a = nd.empty((LARGE_X, SMALL_Y))
assert a.shape == (LARGE_X, SMALL_Y)
+
def test_elementwise():
a = nd.ones(shape=(LARGE_X, SMALL_Y))
b = nd.ones(shape=(LARGE_X, SMALL_Y))
@@ -77,22 +84,26 @@ def test_elementwise():
res = nd.sqrt(a + 3)
assert np.sum(res[-1].asnumpy() == 2) == a.shape[1]
+
def test_reduce():
a = nd.ones(shape=(LARGE_X, SMALL_Y))
assert nd.sum(a).asnumpy() == a.shape[0] * a.shape[1]
+
def test_dot():
a = nd.ones(shape=(LARGE_X, SMALL_Y))
b = nd.ones(shape=(SMALL_Y, SMALL_Y))
res = nd.dot(a, b)
assert np.sum(res[-1].asnumpy() == SMALL_Y) == b.shape[1]
+
def test_FullyConnected():
a = nd.ones(shape=(LARGE_X, SMALL_Y))
b = nd.ones(shape=(SMALL_Y, SMALL_Y))
res = nd.FullyConnected(a, b, num_hidden=b.shape[1], no_bias=True)
assert np.sum(res[-1].asnumpy() == SMALL_Y) == b.shape[1]
+
def test_broadcast():
a = nd.ones(shape=(LARGE_X, SMALL_Y))
b = nd.arange(0, LARGE_X).reshape(LARGE_X, 1)
@@ -101,45 +112,53 @@ def test_broadcast():
res = mx.nd.broadcast_like(b, a)
assert np.sum(res[-1].asnumpy() == LARGE_X) == a.shape[1]
+
def test_clip():
a = nd.arange(0, LARGE_X).reshape(LARGE_X, 1)
b = nd.broadcast_to(a, shape=(a.shape[0], SMALL_Y))
res = nd.clip(b, a_min=100, a_max=1000)
assert np.sum(res[-1].asnumpy() == 1000) == b.shape[1]
+
def test_take():
a = nd.ones(shape=(LARGE_X, SMALL_Y))
idx = nd.arange(LARGE_X-1000, LARGE_X)
res = nd.take(a, idx)
assert np.sum(res[-1].asnumpy() == 1) == res.shape[1]
+
def test_slice():
a = nd.ones(shape=(LARGE_X, SMALL_Y))
res = nd.slice(a, begin=(LARGE_X-1000, 1), end=(LARGE_X, SMALL_Y))
assert np.sum(res[-1].asnumpy() == 1) == res.shape[1]
+
def test_slice_assign():
a = nd.ones(shape=(LARGE_X, SMALL_Y))
a[LARGE_X-1:LARGE_X] = 1000
assert np.sum(a[-1].asnumpy() == 1000) == a.shape[1]
-
+
+
def test_expand_dims():
a = nd.ones(shape=(LARGE_X, SMALL_Y))
res = nd.expand_dims(a, axis=1)
assert res.shape == (a.shape[0], 1, a.shape[1])
+
def test_squeeze():
a = nd.ones(shape=(LARGE_X, SMALL_Y))
data = nd.expand_dims(a, axis=1)
res = nd.squeeze(data)
assert res.shape == a.shape
+
def test_broadcast_div():
a = nd.ones(shape=(LARGE_X, SMALL_Y))
b = nd.ones(shape=(LARGE_X, 1)) * 2
res = a / b
assert np.sum(res[-1].asnumpy() == 0.5) == a.shape[1]
+
def test_Dense(ctx=mx.cpu(0)):
data = mx.nd.ones(shape=(50*1000*1000, 100))
linear = gluon.nn.Dense(100)
@@ -148,6 +167,7 @@ def test_Dense(ctx=mx.cpu(0)):
res.wait_to_read()
assert res.shape == (50000000, 100)
+
def test_where():
a = nd.ones(shape=(LARGE_X, SMALL_Y))
b = nd.arange(0, LARGE_X).reshape(LARGE_X, 1)
@@ -159,12 +179,14 @@ def test_where():
res = nd.sparse.where(csr_cond, a, b)
assert np.sum(res[0].asnumpy() == 1) == b.shape[1]
+
def test_pick():
a = mx.nd.ones(shape=(256*35, 1024*1024))
b = mx.nd.ones(shape=(256*35,))
res = mx.nd.pick(a,b)
assert res.shape == b.shape
+
if __name__ == '__main__':
import nose
nose.runmodule()