from:"\"jxie\""

[incubator-mxnet] branch master updated: Fall back when sparse arrays are passed to MKLDNN-enabled operators (#11664)

2018-08-24 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 15e43c0  Fall back when sparse arrays are passed to MKLDNN-enabled 
operators (#11664)
15e43c0 is described below

commit 15e43c096a54329006e5e44c3723b60694ff1239
Author: Luobao 
AuthorDate: Sat Aug 25 07:50:35 2018 +0800

Fall back when sparse arrays are passed to MKLDNN-enabled operators (#11664)

* softmax_fallbach

* Fallback Amend
This is the final rectify for fallback problem(functions call)

* Lint amend

* test_try

* Patch for test fail

* Pooling amend

* Delete non_rectified_operation_test

* fallback_normal

* Fixed_dispatch

* activation-amend

* activation second

* activation backward

* activate_try

* activation_debug

* Act change.

* test_random

* mkldnn choice

* format_modify

* rebase
---
 src/operator/nn/activation.cc |  57 
 src/operator/nn/batch_norm.cc |   3 +-
 src/operator/nn/convolution.cc|  43 +---
 src/operator/nn/deconvolution.cc  |  36 --
 src/operator/nn/lrn.cc|  36 --
 src/operator/nn/mkldnn/mkldnn_base.cc |   4 +-
 src/operator/nn/pooling.cc|  42 
 src/operator/nn/softmax.cc|  21 ++
 tests/python/mkl/test_mkldnn.py   | 121 +-
 9 files changed, 198 insertions(+), 165 deletions(-)

diff --git a/src/operator/nn/activation.cc b/src/operator/nn/activation.cc
index 277ca8e..b8c2045 100644
--- a/src/operator/nn/activation.cc
+++ b/src/operator/nn/activation.cc
@@ -31,6 +31,8 @@
 #include "./mkldnn/mkldnn_base-inl.h"
 #include "./mkldnn/mkldnn_ops-inl.h"
 #endif  // MXNET_USE_MKLDNN
+#include "../operator_common.h"
+#include "../../common/utils.h"
 
 namespace mxnet {
 namespace op {
@@ -101,6 +103,7 @@ void ActivationGradComputeExCPU(const nnvm::NodeAttrs& 
attrs,
 }
 #endif
 
+#if MXNET_USE_MKLDNN == 1
 inline static bool ActivationStorageType(const nnvm::NodeAttrs& attrs,
  const int dev_mask,
  DispatchMode* dispatch_mode,
@@ -108,20 +111,9 @@ inline static bool ActivationStorageType(const 
nnvm::NodeAttrs& attrs,
  std::vector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 1);
   CHECK_EQ(out_attrs->size(), 1);
-  bool ret = ElemwiseStorageType<1, 1, false, false, false>(attrs, dev_mask,
-dispatch_mode,
-in_attrs, 
out_attrs);
-#if MXNET_USE_MKLDNN == 1
   const ActivationParam& param = nnvm::get(attrs.parsed);
-  if (dev_mask == mshadow::cpu::kDevMask && SupportMKLDNNAct(param)) {
-*dispatch_mode = DispatchMode::kFComputeEx;
-  }
-  if (dev_mask == mshadow::cpu::kDevMask && !MKLDNNEnvSet()) {
-*dispatch_mode = DispatchMode::kFComputeFallback;
-return ret;
-  }
-#endif
-  return ret;
+  return MKLDNNStorageType(attrs, dev_mask, SupportMKLDNNAct(param),
+   dispatch_mode, in_attrs, out_attrs);
 }
 
 inline static bool BackwardActStorageType(const nnvm::NodeAttrs& attrs,
@@ -129,46 +121,17 @@ inline static bool BackwardActStorageType(const 
nnvm::NodeAttrs& attrs,
   DispatchMode* dispatch_mode,
   std::vector *in_attrs,
   std::vector *out_attrs) {
-  bool ret = false;
   const ActivationParam& param = nnvm::get(attrs.parsed);
-#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
   if (param.act_type != activation::kReLU) {
 CHECK_EQ(in_attrs->size(), 3U);
-ret = ElemwiseStorageType<3, 1, false, false, false>(attrs, dev_mask,
- dispatch_mode,
- in_attrs, out_attrs);
   } else {
 // for ReLU activation, the backward pass only needs ograd and output
 CHECK_EQ(in_attrs->size(), 2U);
-ret = ElemwiseStorageType<2, 1, false, false, false>(attrs, dev_mask,
- dispatch_mode,
- in_attrs, out_attrs);
-  }
-#else
-  if (param.act_type == activation::kSoftSign) {
-CHECK_EQ(in_attrs->size(), 3U);
-ret = ElemwiseStorageType<3, 1, false, false, false>(attrs, dev_mask,
- dispatch_mode,
-

[incubator-mxnet] branch master updated: fix potential floating number overflow, enable float16 (#12118)

2018-08-20 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new c479eb2  fix potential floating number overflow, enable float16 
(#12118)
c479eb2 is described below

commit c479eb24eaab8857dca254ea76c1179b0f6fe36f
Author: Joshua Z. Zhang 
AuthorDate: Mon Aug 20 14:11:01 2018 -0700

fix potential floating number overflow, enable float16 (#12118)

* fix potential floating number overflow, enable float16

* fix cuda impl

* fix cuda imple

* fix template substitution for windows

* half_f substantiate operand + fix

* remove ambiguous operand + for mshadow half_T

* fix con't

* use int32_t as indices

* use overload

* try remove ambiguous function overloading

* thrust version limit

* change sizeof cast from floor to ceil  when allocating buffers

* cleaner

* fix alignment of pointers
---
 src/operator/contrib/bounding_box-inl.cuh  |   4 +-
 src/operator/contrib/bounding_box-inl.h|  86 +---
 src/operator/tensor/sort_op-inl.cuh| 135 ++---
 tests/python/unittest/test_contrib_operator.py |  25 ++---
 4 files changed, 184 insertions(+), 66 deletions(-)

diff --git a/src/operator/contrib/bounding_box-inl.cuh 
b/src/operator/contrib/bounding_box-inl.cuh
index fb1dacc..fd5e30b 100644
--- a/src/operator/contrib/bounding_box-inl.cuh
+++ b/src/operator/contrib/bounding_box-inl.cuh
@@ -45,9 +45,9 @@ struct valid_score {
 
 template
 int FilterScores(mshadow::Tensor out_scores,
- mshadow::Tensor out_sorted_index,
+ mshadow::Tensor out_sorted_index,
  mshadow::Tensor scores,
- mshadow::Tensor sorted_index,
+ mshadow::Tensor sorted_index,
  float valid_thresh) {
   valid_score pred(static_cast(valid_thresh));
   DType * end_scores = thrust::copy_if(thrust::device, scores.dptr_, 
scores.dptr_ + scores.MSize(),
diff --git a/src/operator/contrib/bounding_box-inl.h 
b/src/operator/contrib/bounding_box-inl.h
index f739dbc..8e96346 100644
--- a/src/operator/contrib/bounding_box-inl.h
+++ b/src/operator/contrib/bounding_box-inl.h
@@ -150,9 +150,9 @@ inline uint32_t BoxNMSNumVisibleOutputs(const NodeAttrs& 
attrs) {
 
 template
 int FilterScores(mshadow::Tensor out_scores,
- mshadow::Tensor out_sorted_index,
+ mshadow::Tensor out_sorted_index,
  mshadow::Tensor scores,
- mshadow::Tensor sorted_index,
+ mshadow::Tensor sorted_index,
  float valid_thresh) {
   index_t j = 0;
   for (index_t i = 0; i < scores.size(0); i++) {
@@ -230,7 +230,7 @@ MSHADOW_XINLINE DType BoxArea(const DType *box, int encode) 
{
 
 /*!
  * \brief compute areas specialized for nms to reduce computation
- * 
+ *
  * \param i the launched thread index (total thread num_batch * topk)
  * \param out 1d array for areas (size num_batch * num_elem)
  * \param in 1st coordinate of 1st box (buffer + coord_start)
@@ -243,7 +243,7 @@ MSHADOW_XINLINE DType BoxArea(const DType *box, int encode) 
{
 struct compute_area {
   template
   MSHADOW_XINLINE static void Map(int i, DType *out, const DType *in,
-  const DType *indices, const DType 
*batch_start,
+  const int32_t *indices, const int32_t 
*batch_start,
   int topk, int num_elem, int stride, int 
encode) {
 int b = i / topk;
 int k = i % topk;
@@ -302,7 +302,7 @@ MSHADOW_XINLINE DType Intersect(const DType *a, const DType 
*b, int encode) {
*/
 struct nms_impl {
   template
-  MSHADOW_XINLINE static void Map(int i, DType *index, const DType 
*batch_start,
+  MSHADOW_XINLINE static void Map(int i, int32_t *index, const int32_t 
*batch_start,
   const DType *input, const DType *areas,
   int k, int ref, int num,
   int stride, int offset_box, int offset_id,
@@ -326,8 +326,7 @@ struct nms_impl {
 intersect *= Intersect(input + ref_offset + 1, input + pos_offset + 1, 
encode);
 int ref_area_offset = static_cast(index[ref]);
 int pos_area_offset = static_cast(index[pos]);
-DType iou = intersect / (areas[ref_area_offset] + areas[pos_area_offset] -
-  intersect);
+DType iou = intersect / (areas[ref_area_offset] + areas[pos_area_offset] - 
intersect);
 if (iou > thresh) {
   index[pos] = -1;
 }
@@ -336,7 +335,7 @@ struct nms_impl {
 
 /*!
* \brief Assign output of nms by indexing input
-   * 
+   *
* \param i the launched thread index (total num_batch)
* \param out output array [cls, conf, b0, b1, b2, b3]

[incubator-mxnet] branch master updated: Fix a bug in CachedOP. (#12184)

2018-08-17 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new cd9f9c8  Fix a bug in CachedOP. (#12184)
cd9f9c8 is described below

commit cd9f9c87a65caf2d3ac451066e05fb40f3db5e96
Author: Da Zheng 
AuthorDate: Fri Aug 17 12:20:27 2018 -0700

Fix a bug in CachedOP. (#12184)

* fix a bug.

* address comments.

* retrigger

* address comments.
---
 src/imperative/cached_op.cc | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/imperative/cached_op.cc b/src/imperative/cached_op.cc
index 1e7f8e0..0c4c1e6 100644
--- a/src/imperative/cached_op.cc
+++ b/src/imperative/cached_op.cc
@@ -821,11 +821,12 @@ OpStatePtr CachedOp::DynamicForward(
 
   const auto& dispatch_modes = g.GetAttr("dispatch_mode");
 
-  // If we are already recording, we don't need RunGraph to record all
-  // computation again.
+  // If CachedOp is running in the inline mode, it uses RunGraph to record
+  // computation; otherwise, CachedOp records computation itself.
+  // So if it's not the inline mode, we disable recording.
   RunGraph(false, idx, arrays, 0, idx.num_nodes(), std::move(array_reqs),
std::move(ref_count), &states, dispatch_modes,
-   !recording || inlining_);
+   recording && inlining_);
 
   return op_state;
 }

[incubator-mxnet] branch master updated: Fix a minor bug in deformable_im2col.cuh (#12060)

2018-08-15 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 0fbdff8  Fix a minor bug in deformable_im2col.cuh (#12060)
0fbdff8 is described below

commit 0fbdff8bfbe8da6c1f1761abd4db30e7f8876c55
Author: 刘斌 
AuthorDate: Thu Aug 16 07:45:24 2018 +0800

Fix a minor bug in deformable_im2col.cuh (#12060)

Function `deformable_col2im_coord ` called 
deformable_col2im_coord_gpu_kernel but check the deformable_col2im_gpu_kernel.
---
 src/operator/contrib/nn/deformable_im2col.cuh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/operator/contrib/nn/deformable_im2col.cuh 
b/src/operator/contrib/nn/deformable_im2col.cuh
index d874276..5914184 100644
--- a/src/operator/contrib/nn/deformable_im2col.cuh
+++ b/src/operator/contrib/nn/deformable_im2col.cuh
@@ -510,7 +510,7 @@ inline void deformable_col2im_coord(mshadow::Stream* s,
 num_kernels, data_col, data_im, data_offset, im_shape[1], im_shape[2], 
im_shape[3],
 kernel_shape[0], kernel_shape[1], pad[0], pad[1], stride[0], stride[1],
 dilation[0], dilation[1], channel_per_deformable_group, col_shape[1], 
col_shape[2], grad_offset, req);
-MSHADOW_CUDA_POST_KERNEL_CHECK(deformable_col2im_gpu_kernel);
+MSHADOW_CUDA_POST_KERNEL_CHECK(deformable_col2im_coord_gpu_kernel);
 break;
   default:
 LOG(FATAL) << "col2im_nd_gpu does not support computation with "

[incubator-mxnet] branch master updated: [MXNET-684] Add `cond` operator (#11760)

2018-07-23 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 4bb141d  [MXNET-684] Add `cond` operator (#11760)
4bb141d is described below

commit 4bb141d6826b60e6b3dca25c007c36f6d4585c33
Author: Junru Shao 
AuthorDate: Mon Jul 23 21:18:17 2018 -0700

[MXNET-684] Add `cond` operator (#11760)

* Initial commit for `Ifelse`

* Address comments

* Rename ifelse to condition

* API change

* Trigger CI

* Rename condition to cond

* Fix lint
---
 benchmark/python/control_flow/foreach_rnn.py   | 195 
 benchmark/python/control_flow/while_loop_rnn.py| 213 
 docs/api/python/ndarray/contrib.md |   1 +
 docs/api/python/symbol/contrib.md  |   1 +
 python/mxnet/ndarray/contrib.py|  89 +++-
 python/mxnet/symbol/contrib.py | 146 +-
 src/operator/control_flow.cc   | 538 +
 src/operator/subgraph_op_common.cc |  28 ++
 src/operator/subgraph_op_common.h  |  62 +++
 tests/python/unittest/test_contrib_control_flow.py | 159 +-
 10 files changed, 916 insertions(+), 516 deletions(-)

diff --git a/benchmark/python/control_flow/foreach_rnn.py 
b/benchmark/python/control_flow/foreach_rnn.py
deleted file mode 100644
index 4ce7a42..000
--- a/benchmark/python/control_flow/foreach_rnn.py
+++ /dev/null
@@ -1,195 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import subprocess
-import mxnet as mx
-from mxnet import gluon
-import time
-import copy
-
-def get_gpus():
-"""
-return a list of GPUs
-"""
-try:
-re = subprocess.check_output(["nvidia-smi", "-L"], 
universal_newlines=True)
-except OSError:
-return []
-return range(len([i for i in re.split('\n') if 'GPU' in i]))
-
-class TestRNNLayer(gluon.HybridBlock):
-def __init__(self, cell, prefix=None, params=None):
-super(TestRNNLayer, self).__init__(prefix=prefix, params=params)
-self.cell = cell
-
-def hybrid_forward(self, F, inputs, states):
-out, states = F.contrib.foreach(self.cell, inputs, states)
-return out
-
-def benchmark_rnn(cell, rnn_data, states):
-ctx = rnn_data.context
-num_batches = 20
-
-# Imperative
-cell0 = copy.deepcopy(cell)
-layer0 = TestRNNLayer(cell0)
-layer0.initialize(ctx=ctx)
-
-# Hybridize
-cell1 = copy.deepcopy(cell)
-cell1.hybridize()
-layer1 = TestRNNLayer(cell1)
-layer1.initialize(ctx=ctx)
-
-# Hybridize
-cell2 = copy.deepcopy(cell)
-layer2 = TestRNNLayer(cell2)
-layer2.initialize(ctx=ctx)
-layer2.hybridize()
-layer2(rnn_data, states)
-
-# Hybridize
-cell3 = copy.deepcopy(cell)
-cell3.hybridize(static_alloc=True)
-layer3 = TestRNNLayer(cell3)
-layer3.initialize(ctx=ctx)
-
-tic = time.time()
-for i in range(num_batches):
-res0 = layer0(rnn_data, states)
-mx.nd.waitall()
-print("Imperative inference takes " + str(time.time() - tic))
-
-tic = time.time()
-for i in range(num_batches):
-res1 = layer1(rnn_data, states)
-mx.nd.waitall()
-print("Hybrid-cell inference takes " + str(time.time() - tic))
-
-tic = time.time()
-for i in range(num_batches):
-res3 = layer3(rnn_data, states)
-mx.nd.waitall()
-print("Static-hybrid-cell inference takes " + str(time.time() - tic))
-
-tic = time.time()
-for i in range(num_batches):
-res2 = layer2(rnn_data, states)
-mx.nd.waitall()
-print("Hybrid inference takes " + str(time.time() - tic))
-
-layer2.export("foreach_rnn")
-symnet = mx.symbol.load('foreach_rnn-symbol.json')
-args1 = {}
-params = layer2.collect_params()
-for key in params.keys():
-args1[key] = params[key].data()
-args1['data0'

[incubator-mxnet] branch master updated: [MXNET-323] Improve performance of broadcast ops backward pass (#11252)

2018-07-12 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 32d298b  [MXNET-323] Improve performance of broadcast ops backward 
pass (#11252)
32d298b is described below

commit 32d298bd35a1b8513c51fbe505c2ee878480cb0f
Author: Anirudh Subramanian 
AuthorDate: Thu Jul 12 22:27:30 2018 -0700

[MXNET-323] Improve performance of broadcast ops backward pass (#11252)

* Fix cached broadcast

* Fix

* Use seq_reduce_compute logic for stable sum

* Fix lint

* Add declarations

* Add elemwise binary broadcast op cuh file

* Add license for elemwise_binary_broadcast_op-inl.cuh

* Fix broadcast

* Fix indentation

* Use cpu and gpu instead of xpu
---
 src/operator/tensor/broadcast_reduce-inl.cuh   |  4 ++
 src/operator/tensor/broadcast_reduce-inl.h | 48 +++-
 .../tensor/elemwise_binary_broadcast_op-inl.cuh| 64 ++
 src/operator/tensor/elemwise_binary_broadcast_op.h | 35 +++-
 4 files changed, 135 insertions(+), 16 deletions(-)

diff --git a/src/operator/tensor/broadcast_reduce-inl.cuh 
b/src/operator/tensor/broadcast_reduce-inl.cuh
index 5c9b45f..be3d1f9 100644
--- a/src/operator/tensor/broadcast_reduce-inl.cuh
+++ b/src/operator/tensor/broadcast_reduce-inl.cuh
@@ -613,6 +613,10 @@ void Reduce(Stream *s, const TBlob& small, const 
OpReqType req,
   ReduceImpl(stream, small, req, big, workspace, 
config);
 }
 
+template 
+void ReduceWithExtraMem(Stream* s, const TBlob& small, const OpReqType 
req,
+const Tensor& workspace, const TBlob& 
big) {};
+
 template
 void Reduce(Stream *s, const TBlob& small, const OpReqType req,
 const Tensor& workspace, const TBlob& big,
diff --git a/src/operator/tensor/broadcast_reduce-inl.h 
b/src/operator/tensor/broadcast_reduce-inl.h
index 713e3f1..39c68bd 100644
--- a/src/operator/tensor/broadcast_reduce-inl.h
+++ b/src/operator/tensor/broadcast_reduce-inl.h
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include "../mshadow_op.h"
+#include "../operator_common.h"
 
 namespace mxnet {
 namespace op {
@@ -205,16 +206,57 @@ void seq_reduce_compute(const int N, const int M, const 
bool addto,
   }
 }
 
-template
-void Reduce(Stream *s, const TBlob& small, const OpReqType req,
+template 
+void seq_reduce_compute_extra_mem(const int N, const int M, const bool addto,
+  const DType* big, DType* small,
+  const Shape bshape,
+  const Shape sshape,
+  const Shape rshape,
+  const Shape rstride,
+  const index_t* ws_dptr) {
+  #pragma omp parallel for 
num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())
+  for (int idx = 0; idx < N; ++idx) {
+Shape coord = unravel(idx, sshape);
+int j = ravel(coord, bshape);
+DType val, residual;
+Reducer::SetInitValue(val, residual);
+for (int k = 0; k < M; ++k) {
+  Reducer::Reduce(val, OP::Map(big[j + ws_dptr[k]]), residual);
+}
+assign(&small[idx], addto, val);
+  }
+}
+
+template 
+void Reduce(Stream* s, const TBlob& small, const OpReqType req,
 const Tensor& workspace, const TBlob& big) {
   if (req == kNullOp) return;
   Shape rshape, rstride;
   diff(small.shape_.get(), big.shape_.get(), &rshape, &rstride);
   int N = small.shape_.Size(), M = rshape.Size();
   seq_reduce_compute(
+N, M, req == kAddTo, big.dptr(), small.dptr(),
+big.shape_.get(), small.shape_.get(), rshape, rstride);
+}
+
+template 
+void ReduceWithExtraMem(Stream* s, const TBlob& small, const OpReqType 
req,
+const Tensor& workspace, const TBlob& 
big) {
+  using namespace mxnet_op;
+  if (req == kNullOp) return;
+  Shape rshape, rstride;
+  diff(small.shape_.get(), big.shape_.get(), &rshape, &rstride);
+  index_t* ws_dptr = reinterpret_cast(workspace.dptr_);
+  int N = small.shape_.Size(), M = rshape.Size();
+  #pragma omp parallel for 
num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())
+  for (int k = 0; k < M; k++) {
+Shape coord = unravel(k, rshape);
+ws_dptr[k] = dot(coord, rstride);
+  }
+
+  seq_reduce_compute_extra_mem(
 N, M, req == kAddTo, big.dptr(), small.dptr(), 
big.shape_.get(),
-small.shape_.get(), rshape, rstride);
+small.shape_.get(), rshape, rstride, ws_dptr);
 }
 
 template
diff --git a/src/operator/tensor/elemwise_binary_broadcast_op-inl.cuh 
b/src/operator/tensor/elemwise_binary_broadcast_op-inl.cuh
new file mode 100644
index 000..1998576
--- /dev/null
+++ b/src/operator/tenso

[incubator-mxnet] branch master updated: [MXNET-432] Add Foreach (#11531)

2018-07-02 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 030fbc3  [MXNET-432] Add Foreach (#11531)
030fbc3 is described below

commit 030fbc3c7baec4a0f0cce78a45aa1da00eea0b48
Author: Da Zheng 
AuthorDate: Mon Jul 2 15:26:51 2018 -0700

[MXNET-432] Add Foreach (#11531)

* Test input a graph.

* Update foreach to execute the subgraph.

* print inputs/outputs in foreach.

* Remove print.

* add test code for foreach.

* exec foreach outside the engine.

* Implements forward of foreach.

* Add support for variable numbers of inputs and outputs.

* Add a python wrapper for foreach.

* Fix the order of inputs.

* add test with lstm.

* hide C version of foreach.

* fix a bug temporarily.

* Test free variables.

* change for the new interface of InputGraph attribute.

* Add attribute to the subgraph.

* Handle free variables.

* Get all input symbols of a subgraph.

* Fix shape, dtype and storage inference.

* reorganize the output of foreach.

* Add a gluon RNN unroll with symbol foreach.

* print unnecessary print.

* have imperative and symbolic foreach.

* Fix an error after moving foreach.

* Fix imperative foreach

* Fix a minor problem.

* Use CachedOp to execute subgraph.

* update TODO.

* make foreach op use FStatefulComputeEx.

TODO we need to change stateful executor to handle subgraph.

* Add backward.

* Fix bugs.

* enable backward test in lstm.

* Fix a bug in foreach backward for free variables.

* change for the new CachedOp.

* Detect the backward computation.

* Fix bugs in foreach.

* fix tests.

* update tests.

* check state shape.

* enable nested foreach.

* remove print.

* fix a bug in test.

* handle infer storage type for backward.

* address comments.

* address comments.

* move some common functions out.

* address comments.

* fix lint.

* Fix lint.

* add doc.

* undo modification in imperative.h

* add doc and remove example code.

* fix lint.

* fix lint.

* Fix lint.

* make nd.foreach and sym.foreach consistent.

* fix compile error.

* address comments.

* update.

* check for loop only works for dense arrays.

* move control flow op out of nn/

* fix include.

* add a test in gluon.

* work for GPU.

* small fix.

* remove subgraph_name

* create loop state for reuse in the future.

* move code.

* Revert "remove subgraph_name"

This reverts commit 977f5624ad0b0dedb9dcb8629f975afc56bb1e1a.

* cut graph.

* rename new var nodes.

* Fix tests.

* Fix bugs caused by ctypes (#29)

* Add save/load json in testcases for foreach (#30)

* support subgraph in stateful executor.

* Fix compilation.

* fix a bug when a subgraph has variable nodes.

* Fix a bug of getting symbols.

* copy var nodes.

* Fix getting op states.

* fix lint error.

* address comments.

* fix lint error.

* simplify the execution of subgraph in the main thread.

* fix lint error.

* avoid waiting for computation in each iteration.

* reuse cached op for inference.

* share memory across mini-batches.

* reuse memory.

reuse memory between iterations in inference.
reuse memory between mini-batches in training.

* add tests for multiple batches.

* remove entry.

* add benchmark for foreach.

* benchmark large batch size.

* Fix the benchmark for GPU.

* address comments.

* update shape/dtype/storage inference.

* update contrib API docs.

* support nested foreach.

* use a single CachedOp for all iterations.

* use large dim.

* update benchmark.

* update benchmark.

* update benchmark.

* update benchmark.

* return symbol arrays correctly in MXSymbolCutSubgraph.

* return symbol arrays in MXSymbolGetInputSymbols.

* fix lint error.

* use cachedop to infer storage in backward.

* fix scala API.

* update comments.

* fix scala.

* fix test.

* fix attribute name.

* move benchmark.

* fix the mapping of operator inputs/outputs and subgraph inputs/outputs.

[incubator-mxnet] branch master updated: handle the case that inputs and outputs of a graph share NDArrays (#11436)

2018-07-02 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new d6a8ca7  handle the case that inputs and outputs of a graph share 
NDArrays (#11436)
d6a8ca7 is described below

commit d6a8ca79b7dd8e253fe3b81a345e8f375b6a0b38
Author: Da Zheng 
AuthorDate: Mon Jul 2 10:42:33 2018 -0700

handle the case that inputs and outputs of a graph share NDArrays (#11436)

* handle the case that inputs and outputs of a graph share NDArrays

* add test.

* test multiple times.

* don't change the state's array list.

* retrigger
---
 src/imperative/cached_op.cc | 65 +
 src/imperative/cached_op.h  |  1 +
 tests/python/unittest/test_gluon.py | 50 
 3 files changed, 96 insertions(+), 20 deletions(-)

diff --git a/src/imperative/cached_op.cc b/src/imperative/cached_op.cc
index 5a3d44c..2181c5c 100644
--- a/src/imperative/cached_op.cc
+++ b/src/imperative/cached_op.cc
@@ -591,6 +591,7 @@ void CachedOp::StaticRunOps(
 const Context& default_ctx,
 const nnvm::Graph& g,
 const OpStatePtr& state_ptr,
+const std::vector &state_arrays,
 size_t start_nid,
 size_t end_nid) {
   static auto& createop = nnvm::Op::GetAttr("FCreateOpState");
@@ -624,7 +625,7 @@ void CachedOp::StaticRunOps(
   ndinputs.clear();
   ndinputs.reserve(node.inputs.size());
   for (const auto& j : node.inputs) {
-ndinputs.emplace_back(state.arrays[idx.entry_id(j)]);
+ndinputs.emplace_back(state_arrays[idx.entry_id(j)]);
 CHECK(!ndinputs.back()->is_none());
   }
   ndoutputs.clear();
@@ -633,7 +634,7 @@ void CachedOp::StaticRunOps(
   req.reserve(num_outputs);
   for (size_t j = 0; j < num_outputs; ++j) {
 size_t eid = idx.entry_id(i, j);
-ndoutputs.emplace_back(state.arrays[eid]);
+ndoutputs.emplace_back(state_arrays[eid]);
 req.push_back(state.array_reqs[eid]);
 CHECK(req.back() == kNullOp || !ndoutputs.back()->is_none());
   }
@@ -688,25 +689,29 @@ OpStatePtr CachedOp::StaticForward(
 StaticAllocMemory(state_ptr, recording, false);
   }
 
+  // We are going to add input and output arrays to the array list.
+  // The input and output arrays should only be valid for this run,
+  // so we shouldn't modify the state's array list.
+  auto arrays = state.arrays;
   if (config_.static_shape) {
 for (auto i : config_.param_indices) {
   auto nid = idx.input_nodes()[i];
-  if (!state.arrays[idx.entry_id(nid, 0)]->IsSame(*inputs[i])) {
+  if (!arrays[idx.entry_id(nid, 0)]->IsSame(*inputs[i])) {
 match = false;
 auto ptr = &state.buff[idx.entry_id(nid, 0)];
-CHECK_EQ(state.arrays[idx.entry_id(nid, 0)], ptr);
-*state.arrays[idx.entry_id(nid, 0)] = *inputs[i];
+CHECK_EQ(arrays[idx.entry_id(nid, 0)], ptr);
+*arrays[idx.entry_id(nid, 0)] = *inputs[i];
 state.dynamic_entries[idx.entry_id(nid, 0)] = false;
   }
 }
 for (auto i : config_.data_indices) {
   auto eid = idx.entry_id(idx.input_nodes()[i], 0);
-  state.arrays[eid] = inputs[i];
+  arrays[eid] = inputs[i];
 }
   } else {
 for (size_t i = 0; i < num_inputs(); ++i) {
   auto nid = idx.input_nodes()[i];
-  state.arrays[idx.entry_id(nid, 0)] = inputs[i];
+  arrays[idx.entry_id(nid, 0)] = inputs[i];
 }
   }
 
@@ -720,13 +725,16 @@ OpStatePtr CachedOp::StaticForward(
 
   for (size_t i = 0; i < outputs.size(); ++i) {
 auto eid = idx.entry_id(idx.outputs()[i]);
-state.arrays[eid] = outputs[i];
+// An input and an output may share the same array.
+if (!arrays[eid]->is_none())
+  *outputs[i] = arrays[eid]->Detach();
+arrays[eid] = outputs[i];
 if (!outputs[i]->is_none()) continue;
 *outputs[i] = NDArray(static_cast(stypes[eid]),
   shapes[eid], default_ctx, true, dtypes[eid]);
   }
 
-  StaticRunOps(default_ctx, g, state_ptr, 0, idx.num_nodes());
+  StaticRunOps(default_ctx, g, state_ptr, arrays, 0, idx.num_nodes());
 
   return recording ? state_ptr : OpStatePtr();
 }
@@ -891,7 +899,11 @@ void CachedOp::DynamicBackward(
   }
   for (size_t i = 0, j = num_forward_outputs; i < reqs.size(); ++i) {
 if (reqs[i] == kNullOp) continue;
-arrays[idx.entry_id(idx.outputs()[j++])] = outputs[i];
+auto eid = idx.entry_id(idx.outputs()[j++]);
+// An input and an output may share the same array.
+if (!arrays[eid]->is_none())
+  *outputs[i] = arrays[eid]->Detach();
+arrays[eid] = outputs[i];
   }
 
   // Allocate NDArrays
@@ -952,6 +964,15 @@ void CachedOp::StaticBackward(
 StaticAllocMemory(state_ptr, t

[incubator-mxnet] branch master updated: [MXNET-551] Test CreateMKLDNNMem/CommitOutput (#11308)

2018-06-26 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new e4bf646  [MXNET-551] Test CreateMKLDNNMem/CommitOutput (#11308)
e4bf646 is described below

commit e4bf6465cf352507538a29e2aeb05d4772df9690
Author: Alexander Zai 
AuthorDate: Tue Jun 26 11:00:47 2018 -0700

[MXNET-551] Test CreateMKLDNNMem/CommitOutput (#11308)

* refactor copyfrom

* add boilerplate

* rename to MKLDNNCopy

* write to temp memory

* reorder mkldnn / views

* return memory from GetMKLDNNData

* add kaddto to unit test

* move orig output before creatingnewmem

* coerce memory if shape does not fit

* use MKLDNNCopy in commit

* uncomment addto test

* switch order of mkldnnsum params

* improving logging

* wait to read after copying arr

* remove extra white spaces

* remove extra white space

* remove unused var

* reorder output

* do not write to views

* remove shape check in test

* use input pdesc

* remove unused var

* fix merge

* put inplace in separate loop

* use two mem

* use sum_pd when calling CreateMKLDNNData

* reorder sum shapes if needed

* comment out getsumpd

* use MKLDNNCopy helper to reshape mem

* remove getsumpd

* use output mem for createmem

* remove todo

* waittoread output

* do not attempt to shape output

* use correct arr as input

* revert commit change to ps-lite

* revert change to tvm

* fix lint

* add comment to test

* reduce calls to get_primitive_desc

* skip tests that reorder2default

* push_back to inputs

* skip if view/mkldnn

* add noop test

* pass input ptr for write in place

* allow empty
---
 src/ndarray/ndarray.cc   |  72 +---
 src/operator/nn/mkldnn/mkldnn_base-inl.h |   1 +
 src/operator/nn/mkldnn/mkldnn_base.cc| 103 +++---
 src/operator/nn/mkldnn/mkldnn_sum.cc |  20 -
 tests/cpp/operator/mkldnn.cc | 142 +--
 5 files changed, 242 insertions(+), 96 deletions(-)

diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index 94d3d90..e90fb63 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -482,7 +482,7 @@ const mkldnn::memory *NDArray::GetMKLDNNData(
   if (mem->get_primitive_desc() == desc
   || (desc1.data.format == GetDefaultFormat(desc1)
 && desc2.data.format == GetDefaultFormat(desc2))) {
-return GetMKLDNNExact(ptr_->mkl_mem_->GetRaw(), desc);
+return GetMKLDNNExact(mem, desc);
   } else {
 return nullptr;
   }
@@ -638,7 +638,6 @@ void NDArray::CopyFrom(const mkldnn::memory &mem) {
 
   CHECK(mem.get_primitive_desc().get_size() == shape().Size() * 
GetTypeSize(dtype_))
   << "The size of NDArray doesn't match the requested MKLDNN memory desc";
-  MKLDNNStream *stream = MKLDNNStream::Get();
   // If this array uses MKLDNN layout, we have to make sure it's not a view.
   // Otherwise, we'll have to change the layout inside the array.
 
@@ -646,74 +645,7 @@ void NDArray::CopyFrom(const mkldnn::memory &mem) {
 ptr_->Reorder2Default();
 
   const mkldnn::memory *this_mem = GetMKLDNNData();
-  mkldnn::memory::primitive_desc from_pd = mem.get_primitive_desc();
-  mkldnn::memory::desc from_desc = from_pd.desc();
-  mkldnn::memory::primitive_desc this_pd = this_mem->get_primitive_desc();
-  mkldnn::memory::desc this_desc = this_pd.desc();
-  mkldnn_memory_format_t from_def_format = GetDefaultFormat(from_desc);
-  mkldnn_memory_format_t this_def_format = GetDefaultFormat(this_desc);
-  if (IsView()) {
-// Sliced array must use the default layout.
-CHECK_EQ(GetDefaultFormat(this_desc), this_desc.data.format);
-  }
-  // It's possible that the memory and the NDArray don't have the same shape.
-  if (!same_shape(this_desc, from_desc)
-  // If the source memory uses the default layout, we can reshape directly.
-  && from_def_format == from_desc.data.format) {
-// In this case, we can simply create a new MKLDNN memory for the required
-// shape.
-mkldnn::memory::dims dims(this_desc.data.dims,
-  this_desc.data.dims + this_desc.data.ndims);
-auto this_dtype = 
static_cast(this_desc.data.data_type);
-auto this_format = 
static_cast(GetDefaultFormat(this_desc));
-mkldnn::memory::desc data_md(dims, this_dtype, this_format);
-mkldnn::memory::primitive_desc pd(data_md, from_pd.get_engine());
-mkldnn_mem_ptr tmp_mem(

[incubator-mxnet] branch master updated: add vRNN and dropout (#11399)

2018-06-26 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 0538ad9  add vRNN and dropout (#11399)
0538ad9 is described below

commit 0538ad9115e0856c2f45fcff479a9af431b31f76
Author: Hao Li 
AuthorDate: Wed Jun 27 01:43:57 2018 +0800

add vRNN and dropout (#11399)
---
 example/rnn/bucketing/cudnn_rnn_bucketing.py |  16 +-
 src/operator/rnn-inl.h   |  74 ++-
 src/operator/rnn_impl.h  | 947 ++-
 tests/python/unittest/test_operator.py   | 116 +++-
 4 files changed, 1099 insertions(+), 54 deletions(-)

diff --git a/example/rnn/bucketing/cudnn_rnn_bucketing.py 
b/example/rnn/bucketing/cudnn_rnn_bucketing.py
index 29a66a8..5825290 100644
--- a/example/rnn/bucketing/cudnn_rnn_bucketing.py
+++ b/example/rnn/bucketing/cudnn_rnn_bucketing.py
@@ -66,7 +66,7 @@ parser.add_argument('--stack-rnn', default=False,
 parser.add_argument('--dropout', type=float, default='0.0',
 help='dropout probability (1.0 - keep probability)')
 parser.add_argument('--rnntype', type=str, default='lstm',
-help='rnn type: gru and lstm are supported')
+help='rnn type: gru, lstm, rnn_tanh and rnn_relu are 
supported')
 
 #buckets = [32]
 buckets = [10, 20, 30, 40, 50, 60]
@@ -188,6 +188,20 @@ def test(args):
 cell,
 mx.rnn.GRUCell(num_hidden=args.num_hidden, 
prefix='%s_%dr0_'%(args.rnntype,i)),
 output_prefix='bi_%s_%d'%(args.rnntype,i))
+elif args.rnntype == 'rnn_tanh':
+cell = mx.rnn.RNNCell(num_hidden=args.num_hidden, 
activation='tanh', prefix='%s_%dl0_'%(args.rnntype,i))
+if args.bidirectional:
+cell = mx.rnn.BidirectionalCell(
+cell,
+mx.rnn.RNNCell(num_hidden=args.num_hidden, 
activation='tanh', prefix='%s_%dr0_'%(args.rnntype,i)),
+output_prefix='bi_%s_%d'%(args.rnntype,i))
+elif args.rnntype == 'rnn_relu':
+cell = mx.rnn.RNNCell(num_hidden=args.num_hidden, 
activation='relu', prefix='%s_%dl0_'%(args.rnntype,i))
+if args.bidirectional:
+cell = mx.rnn.BidirectionalCell(
+cell,
+mx.rnn.RNNCell(num_hidden=args.num_hidden, 
activation='relu', prefix='%s_%dr0_'%(args.rnntype,i)),
+output_prefix='bi_%s_%d'%(args.rnntype,i))
 
 stack.add(cell)
 
diff --git a/src/operator/rnn-inl.h b/src/operator/rnn-inl.h
index 9953173..1f905ed 100644
--- a/src/operator/rnn-inl.h
+++ b/src/operator/rnn-inl.h
@@ -99,10 +99,6 @@ inline size_t GetRNNWorkspaceSize(int seq_length,
   int mode) {
   size_t size = 0;
   switch (mode) {
-case rnn_enum::kRnnRelu:
-case rnn_enum::kRnnTanh:
-  LOG(FATAL) << "Only LSTM and GRU are supported at the moment";
-  break;
 case rnn_enum::kLstm:
   size = (seq_length + 1) * batch_size * hidden_size * 4 + batch_size * 
hidden_size * 2
  + seq_length * batch_size * hidden_size * direction + hidden_size 
* seq_length * 8;
@@ -110,6 +106,10 @@ inline size_t GetRNNWorkspaceSize(int seq_length,
 case rnn_enum::kGru:
   size = seq_length * batch_size * hidden_size * direction * 4 + 
batch_size * hidden_size * 8;
   break;
+case rnn_enum::kRnnRelu:
+case rnn_enum::kRnnTanh:
+  size = seq_length * batch_size * hidden_size * direction * 2 + 
batch_size * hidden_size * 4;
+  break;
 default:
   LOG(FATAL) << "unknown RNN mode " << mode;
   break;
@@ -125,18 +125,20 @@ inline size_t GetRNNReserveSpaceSize(int num_layer,
  int mode) {
   size_t size = 0;
   switch (mode) {
-case rnn_enum::kRnnRelu:
-case rnn_enum::kRnnTanh:
-  LOG(FATAL) << "Only LSTM and GRU are supported at the moment";
-  break;
 case rnn_enum::kLstm:
-  size = num_layer * direction * seq_length * batch_size * hidden_size * 6;
+  size = direction * seq_length * batch_size * hidden_size * (num_layer * 
7 - 1);
   break;
 case rnn_enum::kGru:
-  size = seq_length * batch_size * hidden_size * direction * num_layer * 8 
+
+  size = seq_length * batch_size * hidden_size * direction * (num_layer * 
9 - 1) +
   batch_size * hidden_size * direction * 9 + hidden_size * seq_length 
* 6 +
   seq_lengt

[incubator-mxnet] branch master updated: [MXNET-349] Histogram Operator (#10931)

2018-06-25 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new ed7e360  [MXNET-349] Histogram Operator (#10931)
ed7e360 is described below

commit ed7e3602a8046646582c0c681b70d9556f5fa0a4
Author: Hao Jin 
AuthorDate: Mon Jun 25 16:45:32 2018 -0700

[MXNET-349] Histogram Operator (#10931)

* implementation of histogram operator

* address code reviews and code re-design

* add exception for invalid inputs

* address code reviews

* add symbol and symbolic forward check for histogram
---
 python/mxnet/ndarray/ndarray.py  |  35 +-
 python/mxnet/symbol/symbol.py|  30 -
 src/common/cuda_utils.h  |  30 +
 src/operator/tensor/histogram-inl.h  | 172 +++
 src/operator/tensor/histogram.cc | 159 +
 src/operator/tensor/histogram.cu | 111 +
 src/operator/tensor/util/tensor_util-inl.cuh |   4 +-
 tests/python/unittest/test_operator.py   |  34 ++
 8 files changed, 571 insertions(+), 4 deletions(-)

diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py
index f017d7e..002ce3e 100644
--- a/python/mxnet/ndarray/ndarray.py
+++ b/python/mxnet/ndarray/ndarray.py
@@ -46,7 +46,7 @@ __all__ = ["NDArray", "concatenate", "_DTYPE_NP_TO_MX", 
"_DTYPE_MX_TO_NP", "_GRA
"ones", "add", "arange", "eye", "divide", "equal", "full", 
"greater", "greater_equal",
"imdecode", "lesser", "lesser_equal", "logical_and", "logical_or", 
"logical_xor",
"maximum", "minimum", "moveaxis", "modulo", "multiply", 
"not_equal", "onehot_encode",
-   "power", "subtract", "true_divide", "waitall", "_new_empty_handle"]
+   "power", "subtract", "true_divide", "waitall", "_new_empty_handle", 
"histogram"]
 
 _STORAGE_TYPE_UNDEFINED = -1
 _STORAGE_TYPE_DEFAULT = 0
@@ -3740,3 +3740,36 @@ def empty(shape, ctx=None, dtype=None):
 if dtype is None:
 dtype = mx_real_t
 return NDArray(handle=_new_alloc_handle(shape, ctx, False, dtype))
+
+
+# pylint: disable= redefined-builtin
+def histogram(a, bins=10, range=None):
+"""Compute the histogram of the input data.
+
+Parameters
+--
+a : NDArray
+Input data. The histogram is computed over the flattened array.
+bins : int or sequence of scalars
+If bins is an int, it defines the number of equal-width bins in the
+given range (10, by default). If bins is a sequence, it defines the 
bin edges,
+including the rightmost edge, allowing for non-uniform bin widths.
+range : (float, float), optional
+The lower and upper range of the bins. If not provided, range is 
simply (a.min(), a.max()).
+Values outside the range are ignored. The first element of the range 
must be less than or
+equal to the second. range affects the automatic bin computation as 
well, the range will
+be equally divided by the number of bins.
+"""
+
+# pylint: disable= no-member, protected-access
+if isinstance(bins, NDArray):
+return _internal._histogram(data=a, bins=bins)
+elif isinstance(bins, integer_types):
+if range is None:
+warnings.warn("range is not specified, using numpy's result "
+  "to ensure consistency with numpy")
+res, bin_bounds = np.histogram(a.asnumpy(), bins=bins)
+return array(res), array(bin_bounds)
+return _internal._histogram(data=a, bin_cnt=bins, range=range)
+raise ValueError("bins argument should be either an integer or an NDArray")
+# pylint: enable= no-member, protected-access, redefined-builtin
diff --git a/python/mxnet/symbol/symbol.py b/python/mxnet/symbol/symbol.py
index 7e5b527..c5e2f5c 100644
--- a/python/mxnet/symbol/symbol.py
+++ b/python/mxnet/symbol/symbol.py
@@ -34,7 +34,7 @@ import numpy as _numpy
 
 from ..attribute import AttrScope
 from ..base import _LIB, numeric_types, c_array, c_array_buf, c_str, 
c_str_array, c_handle_array
-from ..base import mx_uint, py_str, string_types
+from ..base import mx_uint, py_str, string_types, integer_types
 from ..base import NDArrayHandle, ExecutorHandle, SymbolHandle
 from ..base import check_call, MXNetError, NotImplementedForSymbol
 from ..context import Context,

[incubator-mxnet] branch master updated: [MXNET-555] Add subgraph storage type inference to CachedOp (#11306)

2018-06-20 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 7a83a75  [MXNET-555] Add subgraph storage type inference to CachedOp  
(#11306)
7a83a75 is described below

commit 7a83a7589e46cae1ec98d43c71d001797ad57b08
Author: Haibin Lin 
AuthorDate: Wed Jun 20 17:31:35 2018 -0700

[MXNET-555] Add subgraph storage type inference to CachedOp  (#11306)

* copy paste

* pass unit test

* remove lock

* save all inputs and outputs

* add one more test

* update test

* update backward stype inference

* + fwd inference
---
 src/imperative/cached_op.cc | 147 +---
 src/imperative/cached_op.h  |  24 --
 src/imperative/imperative_utils.h   |   1 -
 src/operator/operator_common.h  |   4 +-
 tests/python/unittest/test_gluon.py |  57 ++
 5 files changed, 197 insertions(+), 36 deletions(-)

diff --git a/src/imperative/cached_op.cc b/src/imperative/cached_op.cc
index c0e5e83..5a3d44c 100644
--- a/src/imperative/cached_op.cc
+++ b/src/imperative/cached_op.cc
@@ -22,6 +22,7 @@
 #include "./cached_op.h"
 #include "../executor/exec_pass.h"
 #include "../profiler/profiler.h"
+#include "../operator/operator_common.h"
 
 
 namespace mxnet {
@@ -95,7 +96,6 @@ CachedOp::CachedOp(
   using namespace imperative;
   static const std::vector zero_ops{Op::Get("zeros_like"), 
Op::Get("_zeros")};
   static const auto _copy = Op::Get("_copy");
-
   config_.Init(flags);
 
   if (config_.static_shape) {
@@ -204,26 +204,17 @@ CachedOp::CachedOp(
 size_t num_forward_outputs = num_outputs();
 for (uint32_t i = 0; i < ograd_entries_.size(); ++i) {
   if (!idx.exist(ograd_entries_[i].node.get())) continue;
-  auto eid = idx.entry_id(ograd_entries_[i]);
-  if (ref_count[eid] > 0) {
-bwd_ograd_dep_.push_back(i);
-  }
+  bwd_ograd_dep_.push_back(i);
 }
 save_inputs_.resize(num_forward_inputs, false);
 for (uint32_t i = 0; i < num_forward_inputs; ++i) {
-  auto eid = idx.entry_id(idx.input_nodes()[i], 0);
-  if (ref_count[eid] > 0) {
-save_inputs_[i] = true;
-bwd_in_dep_.push_back(i);
-  }
+  save_inputs_[i] = true;
+  bwd_in_dep_.push_back(i);
 }
 save_outputs_.resize(idx.outputs().size(), false);
 for (uint32_t i = 0; i < num_forward_outputs; ++i) {
-  auto eid = idx.entry_id(idx.outputs()[i]);
-  if (ref_count[eid] > 0) {
-save_outputs_[i] = true;
-bwd_out_dep_.push_back(i);
-  }
+  save_outputs_[i] = true;
+  bwd_out_dep_.push_back(i);
 }
   }
 }
@@ -233,7 +224,7 @@ CachedOp::~CachedOp() {
 
 std::vector CachedOp::Gradient(
 const nnvm::NodePtr& node,
-const std::vector& ograds) {
+const std::vector& ograds) const {
   using namespace nnvm;
   static const auto _backward_CachedOp = Op::Get("_backward_CachedOp");
   static const auto _NoGrad = Op::Get("_NoGradient");
@@ -328,6 +319,27 @@ bool CachedOp::SetForwardGraph(
   return false;
 }
 
+// Utility function to set backward input eids
+void SetBackwardInputEid(const std::vector& bwd_in_dep,
+ const std::vector& bwd_out_dep,
+ const std::vector& bwd_ograd_dep,
+ const std::vector& ograd_entries,
+ const nnvm::IndexedGraph& idx,
+ std::vector *bwd_input_eid) {
+  for (const auto& i : bwd_ograd_dep) {
+auto eid = idx.entry_id(ograd_entries[i]);
+bwd_input_eid->push_back(eid);
+  }
+  for (const auto& i : bwd_in_dep) {
+auto eid = idx.entry_id(idx.input_nodes()[i], 0);
+bwd_input_eid->push_back(eid);
+  }
+  for (const auto& i : bwd_out_dep) {
+auto eid = idx.entry_id(idx.outputs()[i]);
+bwd_input_eid->push_back(eid);
+  }
+}
+
 bool CachedOp::SetBackwardGraph(
 GraphInfo* info,
 const std::vector& reqs,
@@ -356,18 +368,8 @@ bool CachedOp::SetBackwardGraph(
 
   if (info->bwd_input_eid.size() != inputs.size()) {
 info->bwd_input_eid.clear();
-for (const auto& i : bwd_ograd_dep_) {
-  auto eid = idx.entry_id(ograd_entries_[i]);
-  info->bwd_input_eid.push_back(eid);
-}
-for (const auto& i : bwd_in_dep_) {
-  auto eid = idx.entry_id(idx.input_nodes()[i], 0);
-  info->bwd_input_eid.push_back(eid);
-}
-for (const auto& i : bwd_out_dep_) {
-  auto eid = idx.entry_id(idx.outputs()[i]);
-  info->bwd_input_eid.push_back(eid);
-}
+SetBackwardInputEid(bwd_in_dep_, bwd_out_dep_, bwd_ograd_dep_,
+ograd_entri

[incubator-mxnet] branch master updated: Fix save load doc (#11345)

2018-06-20 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 7aa8a0c  Fix save load doc (#11345)
7aa8a0c is described below

commit 7aa8a0c32dd6656cfc822c1fd4866f1bbe917160
Author: Anirudh Subramanian 
AuthorDate: Wed Jun 20 14:18:04 2018 -0700

Fix save load doc (#11345)
---
 python/mxnet/gluon/block.py | 34 +-
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/python/mxnet/gluon/block.py b/python/mxnet/gluon/block.py
index 0845669..3dd7e94 100644
--- a/python/mxnet/gluon/block.py
+++ b/python/mxnet/gluon/block.py
@@ -310,23 +310,21 @@ class Block(object):
 
 def save_parameters(self, filename):
 """Save parameters to file.
-This function is to be used to save parameters of a Gluon model, note 
that
-the saved parameters are not meant to be loaded in a different 
language binding for now.
-Saving parameters using `.save_parameters()` is different than
-`.collect_params().save()` and `.save_params()`, which are deprecated 
ways
-to save the parameters of a model and should be avoided.
-
-If your model is hybridizable and you want to export a serialized 
version of the
-structure of the model as well as its parameters please refer to
-:py:meth:`HybridBlock.export`. Such model can then be loaded back in 
any language binding
-or even in Gluon using a :py:class:`SymbolBlock`.
-Refer to this tutorial for a complete overview of saving/loading 
models with
-MXNet: 
https://mxnet.incubator.apache.org/tutorials/gluon/save_load_params.html
+
+Saved parameters can only be loaded with `load_parameters`. Note that 
this
+method only saves parameters, not model structure. If you want to save
+model structures, please use :py:meth:`HybridBlock.export`.
 
 Parameters
 --
 filename : str
 Path to file.
+
+References
+--
+`Saving and Loading Gluon Models
+
+
<https://mxnet.incubator.apache.org/tutorials/gluon/save_load_params.html>`_
 """
 params = self._collect_params_with_prefix()
 arg_dict = {key : val._reduce() for key, val in params.items()}
@@ -349,11 +347,7 @@ class Block(object):
 
 def load_parameters(self, filename, ctx=None, allow_missing=False,
 ignore_extra=False):
-"""Load parameters from file.
-This function is to be used to load parameters of a Gluon model that 
were
-saved using the `.save_parameters()` function. Any other use is 
undefined behaviour.
-Refer to this tutorial for a complete overview of saving/loading 
models with
-MXNet: 
https://mxnet.incubator.apache.org/tutorials/gluon/save_load_params.html
+"""Load parameters from file previously saved by `save_parameters`.
 
 Parameters
 --
@@ -366,6 +360,12 @@ class Block(object):
 ignore_extra : bool, default False
 Whether to silently ignore parameters from the file that are not
 present in this Block.
+
+References
+--
+`Saving and Loading Gluon Models
+
+
<https://mxnet.incubator.apache.org/tutorials/gluon/save_load_params.html>`_
 """
 loaded = ndarray.load(filename)
 params = self._collect_params_with_prefix()

[incubator-mxnet] branch master updated: [MXNET-498] Test MKLDNN backward operators (#11232)

2018-06-20 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 12cb4b9  [MXNET-498] Test MKLDNN backward operators  (#11232)
12cb4b9 is described below

commit 12cb4b9cee5e1c4c39dff18d728dabf70b747c49
Author: Alexander Zai 
AuthorDate: Wed Jun 20 10:57:50 2018 -0700

[MXNET-498] Test MKLDNN backward operators  (#11232)

* add act backwards test

* use only verifyfn template

* fix param name

* update number of inputs

* fix assertion for act backwards

* limit rand num range

* change to assert

* wait to read on correct vector

* add writeinplace test

* fix params

* add copy backwards test

* add missing fixture

* fix lint

* add sum backwards verify

* use correct num of inputs for sum backwards

* switch input / output

* wait for both outputs

* limit input/output

* limit input/outputs for relu/sum

* fix var source

* reorder backwards if view

* add another entry to reqs in ttest

* uncomment write in place sumbackwards

* refactor testunary and testbinary into testop

* remove special testbackwardsop and use testop

* fill reqs vector with num of outputs

* change req size to num outputs

* create mulitple output ndarrays

* wait for all outputs

* remove unused comments

* remove redundant VerifyCopyResult method

* remove redundant VerifySumResult

* remove unused var

* use only InitDefaultArray

* move MKLDNNSum near copy test

* use fallback compute for backwards sum

* fix verifydefmem test

* fix lint

* move MKLDNNSum test back to bottom
---
 src/operator/nn/mkldnn/mkldnn_act.cc|  16 +-
 src/operator/tensor/elemwise_binary_op_basic.cc |   5 +
 tests/cpp/operator/mkldnn.cc| 298 +---
 3 files changed, 174 insertions(+), 145 deletions(-)

diff --git a/src/operator/nn/mkldnn/mkldnn_act.cc 
b/src/operator/nn/mkldnn/mkldnn_act.cc
index fae72bd..b21d123 100644
--- a/src/operator/nn/mkldnn/mkldnn_act.cc
+++ b/src/operator/nn/mkldnn/mkldnn_act.cc
@@ -184,14 +184,22 @@ void MKLDNNActivationBackward(const nnvm::NodeAttrs& 
attrs, const OpContext &ctx
 return;
   }
 
+  NDArray out_buffer = out_grad;
+  if (out_grad.IsView() && out_grad.IsMKLDNNData())
+out_buffer = out_grad.Reorder2Default();
+
+  NDArray in_buffer = in_data;
+  if (in_data.IsView() && in_data.IsMKLDNNData())
+in_buffer = in_data.Reorder2Default();
+
   const ActivationParam& param = nnvm::get(attrs.parsed);
   TmpMemMgr::Get()->Init(ctx.requested[activation::kTempSpace]);
-  auto diff_dst_memory = out_grad.GetMKLDNNData();
-  auto input_mem = in_data.GetMKLDNNData();
+  auto diff_dst_memory = out_buffer.GetMKLDNNData();
+  auto input_mem = in_buffer.GetMKLDNNData();
   // We need to make sure the two inputs to eltwise_backward has the same 
memory
   // descriptor. Otherwise, the perf will suffer.
   if (input_mem->get_primitive_desc() != diff_dst_memory->get_primitive_desc())
-input_mem = 
in_data.GetMKLDNNDataReorder(diff_dst_memory->get_primitive_desc());
+input_mem = 
in_buffer.GetMKLDNNDataReorder(diff_dst_memory->get_primitive_desc());
   mkldnn::memory::primitive_desc data_mpd = input_mem->get_primitive_desc();
   mkldnn::memory::desc data_md = data_mpd.desc();
   mkldnn::memory::desc diff_md = diff_dst_memory->get_primitive_desc().desc();
@@ -201,7 +209,7 @@ void MKLDNNActivationBackward(const nnvm::NodeAttrs& attrs, 
const OpContext &ctx
   auto alg = GetMKLDNNActAlgo(param);
   mkldnn_output_t diff_src_memory;
 
-  MSHADOW_REAL_TYPE_SWITCH(in_data.dtype(), DType, {
+  MSHADOW_REAL_TYPE_SWITCH(in_buffer.dtype(), DType, {
 DType alpha = 0;
 mkldnn::eltwise_forward::desc fw_desc(mkldnn::prop_kind::forward_training,
   alg, data_md, alpha);
diff --git a/src/operator/tensor/elemwise_binary_op_basic.cc 
b/src/operator/tensor/elemwise_binary_op_basic.cc
index 9b5b9d3..6fc1ebb 100644
--- a/src/operator/tensor/elemwise_binary_op_basic.cc
+++ b/src/operator/tensor/elemwise_binary_op_basic.cc
@@ -111,6 +111,11 @@ static void _backward_ElemwiseAddEx(const nnvm::NodeAttrs& 
attrs,
 MKLDNNCopy(attrs, ctx, inputs[0], req[0], outputs[0]);
 MKLDNNCopy(attrs, ctx, inputs[0], req[1], outputs[1]);
 return;
+  } else if (common::ContainsOnlyStorage(inputs, kDefaultStorage)) {
+FallBackCompute(
+ElemwiseBinaryOp::BackwardUseNone,
+attrs, ctx, inputs, req, outputs);
+return;
   }
 #endif
   ElemwiseBinaryOp::BackwardUse

[incubator-mxnet] branch master updated: Add standard ResNet data augmentation for ImageRecordIter (#11027)

2018-06-19 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new ccee176  Add standard ResNet data augmentation for ImageRecordIter 
(#11027)
ccee176 is described below

commit ccee17672b23fa864f5c2e67d6bcea5ccff2979e
Author: Tong He 
AuthorDate: Tue Jun 19 15:23:19 2018 -0700

Add standard ResNet data augmentation for ImageRecordIter (#11027)

* add resnet augmentation

* add test

* fix scope

* fix warning

* fix lint

* fix lint

* add color jitter and pca noise

* fix center crop

* merge

* fix lint

* Trigger CI

* fix

* fix augmentation implementation

* add checks for parameters

* modify training script

* fix compile error

* Trigger CI

* Trigger CI

* modify error message

* Trigger CI

* Trigger CI

* Trigger CI

* improve script in example

* fix script

* clear code

* Trigger CI

* set min_aspect_ratio to optional, move rotation and pad before random 
resized crop

* fix

* Trigger CI

* Trigger CI

* Trigger CI

* fix default values

* Trigger CI
---
 example/image-classification/common/data.py|  48 +++--
 example/image-classification/train_imagenet.py |   4 +-
 src/io/image_aug_default.cc| 241 +++--
 tests/python/train/test_resnet_aug.py  | 173 ++
 4 files changed, 435 insertions(+), 31 deletions(-)

diff --git a/example/image-classification/common/data.py 
b/example/image-classification/common/data.py
index 05f5ddc..bfaadb3 100755
--- a/example/image-classification/common/data.py
+++ b/example/image-classification/common/data.py
@@ -43,9 +43,9 @@ def add_data_args(parser):
 def add_data_aug_args(parser):
 aug = parser.add_argument_group(
 'Image augmentations', 'implemented in src/io/image_aug_default.cc')
-aug.add_argument('--random-crop', type=int, default=1,
+aug.add_argument('--random-crop', type=int, default=0,
  help='if or not randomly crop the image')
-aug.add_argument('--random-mirror', type=int, default=1,
+aug.add_argument('--random-mirror', type=int, default=0,
  help='if or not randomly flip horizontally')
 aug.add_argument('--max-random-h', type=int, default=0,
  help='max change of hue, whose range is [0, 180]')
@@ -53,8 +53,13 @@ def add_data_aug_args(parser):
  help='max change of saturation, whose range is [0, 255]')
 aug.add_argument('--max-random-l', type=int, default=0,
  help='max change of intensity, whose range is [0, 255]')
+aug.add_argument('--min-random-aspect-ratio', type=float, default=None,
+ help='min value of aspect ratio, whose value is either 
None or a positive value.')
 aug.add_argument('--max-random-aspect-ratio', type=float, default=0,
- help='max change of aspect ratio, whose range is [0, 1]')
+ help='max value of aspect ratio. If 
min_random_aspect_ratio is None, '
+  'the aspect ratio range is 
[1-max_random_aspect_ratio, '
+  '1+max_random_aspect_ratio], otherwise it is '
+  '[min_random_aspect_ratio, 
max_random_aspect_ratio].')
 aug.add_argument('--max-random-rotate-angle', type=int, default=0,
  help='max angle to rotate, whose range is [0, 360]')
 aug.add_argument('--max-random-shear-ratio', type=float, default=0,
@@ -63,16 +68,28 @@ def add_data_aug_args(parser):
  help='max ratio to scale')
 aug.add_argument('--min-random-scale', type=float, default=1,
  help='min ratio to scale, should >= img_size/input_shape. 
otherwise use --pad-size')
+aug.add_argument('--max-random-area', type=float, default=1,
+ help='max area to crop in random resized crop, whose 
range is [0, 1]')
+aug.add_argument('--min-random-area', type=float, default=1,
+ help='min area to crop in random resized crop, whose 
range is [0, 1]')
+aug.add_argument('--brightness', type=float, default=0,
+ help='brightness jittering, whose range is [0, 1]')
+aug.add_argument('--contrast', type=f

[incubator-mxnet] branch master updated: [MXNET-514] Add clip_global_norm(row_sparse_grad). Fix row_sparse_param.save(). Fix trainer init_kvstore (#11266)

2018-06-18 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 9226d91  [MXNET-514] Add clip_global_norm(row_sparse_grad). Fix 
row_sparse_param.save(). Fix trainer init_kvstore (#11266)
9226d91 is described below

commit 9226d9159259dc6462031f04bf3e6a3d1884905a
Author: Haibin Lin 
AuthorDate: Mon Jun 18 11:59:43 2018 -0700

[MXNET-514] Add clip_global_norm(row_sparse_grad). Fix 
row_sparse_param.save(). Fix trainer init_kvstore (#11266)

* clip sparse grad. fix _reduce for rowsparse param

* fix kvstore init for local kv

* trigger
---
 python/mxnet/gluon/parameter.py | 10 +++---
 python/mxnet/gluon/trainer.py   |  2 +-
 python/mxnet/gluon/utils.py |  8 +++--
 tests/python/unittest/test_gluon.py | 47 +++-
 tests/python/unittest/test_gluon_trainer.py | 48 -
 5 files changed, 66 insertions(+), 49 deletions(-)

diff --git a/python/mxnet/gluon/parameter.py b/python/mxnet/gluon/parameter.py
index 73fca60..0c6aae9 100644
--- a/python/mxnet/gluon/parameter.py
+++ b/python/mxnet/gluon/parameter.py
@@ -310,14 +310,16 @@ class Parameter(object):
 self._grad, self.grad_req)
 
 def _reduce(self):
-"""Reduce data from multiple context."""
+"""Reduce data from multiple context to cpu."""
+ctx = context.cpu()
 if self._stype == 'default':
 block = self.list_data()
-data = ndarray.add_n(*(w.copyto(context.cpu()) for w in block)) / 
len(block)
+data = ndarray.add_n(*(w.copyto(ctx) for w in block)) / len(block)
 else:
 # fetch all rows for 'row_sparse' param
-all_row_ids = ndarray.arange(0, self.shape[0], dtype='int64', 
ctx=context.cpu())
-data = self.row_sparse_data(all_row_ids)
+all_row_ids = ndarray.arange(0, self.shape[0], dtype='int64', 
ctx=ctx)
+data = ndarray.zeros(self.shape, stype='row_sparse', ctx=ctx)
+self._trainer._row_sparse_pull(self, data, all_row_ids)
 return data
 
 def initialize(self, init=None, ctx=None, 
default_init=initializer.Uniform(),
diff --git a/python/mxnet/gluon/trainer.py b/python/mxnet/gluon/trainer.py
index ef20109..02d68f0 100644
--- a/python/mxnet/gluon/trainer.py
+++ b/python/mxnet/gluon/trainer.py
@@ -152,7 +152,6 @@ class Trainer(object):
 
 def _init_kvstore(self):
 """Create kvstore."""
-arg_arrays = {}
 config = self._kvstore_params
 if self._contains_sparse:
 kvstore, update_on_kvstore = 
_create_sparse_kvstore(config['kvstore'])
@@ -162,6 +161,7 @@ class Trainer(object):
"gradients and/or sparse weights are 
present for "
"Parameter '%s'."%param.name)
 else:
+arg_arrays = {param.name: param.data(self._contexts[0]) for param 
in self._params}
 kvstore, update_on_kvstore = _create_kvstore(config['kvstore'], 
len(self._contexts),
  arg_arrays)
 if config['update_on_kvstore'] is not None:
diff --git a/python/mxnet/gluon/utils.py b/python/mxnet/gluon/utils.py
index 06b91fa..fcb7c97 100644
--- a/python/mxnet/gluon/utils.py
+++ b/python/mxnet/gluon/utils.py
@@ -118,10 +118,14 @@ def split_and_load(data, ctx_list, batch_axis=0, 
even_split=True):
 def clip_global_norm(arrays, max_norm):
 """Rescales NDArrays so that the sum of their 2-norm is smaller than 
`max_norm`.
 """
+def _norm(array):
+if array.stype == 'default':
+x = array.reshape((-1,))
+return ndarray.dot(x, x)
+return array.norm().square()
 assert len(arrays) > 0
 ctx = arrays[0].context
-total_norm = ndarray.add_n(*[ndarray.dot(x, x).as_in_context(ctx)
- for x in (arr.reshape((-1,)) for arr in 
arrays)])
+total_norm = ndarray.add_n(*[_norm(arr).as_in_context(ctx) for arr in 
arrays])
 total_norm = ndarray.sqrt(total_norm).asscalar()
 if not np.isfinite(total_norm):
 warnings.warn(UserWarning('nan or inf is detected. Clipping results 
will be undefined.'),
diff --git a/tests/python/unittest/test_gluon.py 
b/tests/python/unittest/test_gluon.py
index e9259fd..e540657 100644
--- a/tests/python/unittest/test_gluon.py
+++ b/tests/python/unittest/test_gluon.py
@@ -90,15 +90,16 @@ def test_parameter_invalid_access():
 
 @with_seed()

[incubator-mxnet] branch master updated: [MXNET-542] Fix mkldnn performance regression + improve test logging (#11262)

2018-06-18 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 92fde19  [MXNET-542] Fix mkldnn performance regression + improve test 
logging (#11262)
92fde19 is described below

commit 92fde19924fcbb18582f7662d9ae1e8c2b0bcf24
Author: Alexander Zai 
AuthorDate: Mon Jun 18 10:18:42 2018 -0700

[MXNET-542] Fix mkldnn performance regression + improve test logging 
(#11262)

* do not create tmp memory during act

* fix order of alloc memory

* fix conditional

* fix order

* do not pass nullptr to commit

* fix comment

* do not create tmp mem unless shapes diff

* fix params

* always return in CreateMKLDNNMem

* add boilerplate for CreateMKLDNNMem test

* refactor copyfrom

* use copyfrom helper in tests

* add logs

* missing semi

* improve print msg

* target out_mem

* test copy from

* reuse verify copy

* add inplace test / use sum for test

* use assert in sum verify

* lint

* remove unused var

* fix test messsage

* out_mem can be null

* Revert "refactor copyfrom"

This reverts commit 4ab131ee41832eefb32971f82a58440736ca3417.

* add back missing var

* writeinplace explicitly returns same memory

* refactor

* only writeinplace if add and pdesc are eq

* fix comparison

* add second CreateMKLDNNMemory

* CreateMKLDNNMem accepts input

* refactor WriteTo criteria into separate method

* fix lint

* copyfrom test back

* update mldnnsum test to have diff inputs for write in place

* test in place sum with diff arrs

* revert CreateMKLDNNMem extra param change

* pass input arr param for act_forward

* remove extra header

* fix indent

* add check for writeto

* canwriteto uses ref instead of ptr

* update comments for CreateMKLDNNData

* compare input and output desc with op pdesc

* check CreateMKLDNNData does not return null
---
 src/operator/nn/mkldnn/mkldnn_act.cc |  6 ++--
 src/operator/nn/mkldnn/mkldnn_base-inl.h |  9 +++--
 src/operator/nn/mkldnn/mkldnn_base.cc| 43 +++-
 src/operator/nn/mkldnn/mkldnn_sum.cc | 31 +++---
 tests/cpp/operator/mkldnn.cc | 56 ++--
 5 files changed, 88 insertions(+), 57 deletions(-)

diff --git a/src/operator/nn/mkldnn/mkldnn_act.cc 
b/src/operator/nn/mkldnn/mkldnn_act.cc
index a278456..fae72bd 100644
--- a/src/operator/nn/mkldnn/mkldnn_act.cc
+++ b/src/operator/nn/mkldnn/mkldnn_act.cc
@@ -161,15 +161,15 @@ void MKLDNNActivationForward(const nnvm::NodeAttrs& 
attrs, const OpContext &ctx,
   const ActivationParam& param = nnvm::get(attrs.parsed);
 
   NDArray in_buffer = in_data;
+  MKLDNNStream *stream = MKLDNNStream::Get();
+
   if (in_data.IsView() && in_data.IsMKLDNNData())
 in_buffer = in_data.Reorder2Default();
 
   auto input_mem = in_buffer.GetMKLDNNData();
   MKLDNNActForward &fwd = GetActForward(param, ctx, in_buffer, *input_mem);
-  auto out_mem = CreateMKLDNNMem(out_data, fwd.fwd_pd.dst_primitive_desc(),
- req);
+  auto out_mem = CreateMKLDNNMem(out_data, fwd.fwd_pd.dst_primitive_desc(), 
req, &in_buffer);
   fwd.SetNewMem(*input_mem, *out_mem.second);
-  MKLDNNStream *stream = MKLDNNStream::Get();
   stream->RegisterPrim(fwd.GetFwd());
   CommitOutput(out_data, out_mem);
   stream->Submit();
diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h 
b/src/operator/nn/mkldnn/mkldnn_base-inl.h
index bd2faf5..6a7c58f 100644
--- a/src/operator/nn/mkldnn/mkldnn_base-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h
@@ -324,13 +324,16 @@ typedef std::pair 
mkldnn_output_t;
  * The difference is that the first function can create MKLDNN memory with
  * special layouts in an NDArray, while the second one can only create MKLDNN
  * memory with default layouts.
+ * Also an optional in_arr parameter can be passed in the first function with
+ * the kWriteInPlace req to validate if mkldnn can support write in place;
+ * otherwise new memory will be written to an copied back onto out_arr.
  * If these two functions are used, we have to call CommitOutput to write
  * the output back to the output NDArray.
  */
-mkldnn_output_t CreateMKLDNNMem(const NDArray &arr,
+mkldnn_output_t CreateMKLDNNMem(const NDArray &out_arr,
 const mkldnn::memory::primitive_desc &desc,
-OpReqType req);
-mkldnn_output_t CreateMKLDNNWeightGrad(const NDArray &arr,
+

[incubator-mxnet] branch piiswrong-patch-2 updated (12c73ae -> fcdca35)

2018-06-15 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a change to branch piiswrong-patch-2
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git.


from 12c73ae  Update cached_op.cc
 add fcdca35  Update cached_op.cc

No new revisions were added by this update.

Summary of changes:
 src/imperative/cached_op.cc | 1 +
 1 file changed, 1 insertion(+)

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: leaky relu speed (#11012)

2018-06-15 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new fb084cc  leaky relu speed (#11012)
fb084cc is described below

commit fb084cc7bb97176e24db7c426b39cf504d440adb
Author: Sheng Zha 
AuthorDate: Fri Jun 15 13:45:40 2018 -0400

leaky relu speed (#11012)

* leaky relu forward speed

* leaky relu backward speed

* fix infer shape

* fix shape
---
 src/operator/leaky_relu-inl.h  | 132 +++--
 src/operator/mshadow_op.h  |   2 +
 src/operator/operator_tune.cc  |   1 +
 tests/python/unittest/test_operator.py |  11 ++-
 4 files changed, 104 insertions(+), 42 deletions(-)

diff --git a/src/operator/leaky_relu-inl.h b/src/operator/leaky_relu-inl.h
index c99280a..8b93e83 100644
--- a/src/operator/leaky_relu-inl.h
+++ b/src/operator/leaky_relu-inl.h
@@ -39,6 +39,7 @@
 #include "./mshadow_op.h"
 #include "./random/sampler.h"
 #include "./random/sample_op.h"
+#include "./tensor/elemwise_binary_broadcast_op.h"
 
 namespace mxnet {
 namespace op {
@@ -72,12 +73,6 @@ struct LeakyReLUParam : public 
dmlc::Parameter {
   }
 };
 
-struct prelu_grad {
-  MSHADOW_XINLINE static real_t Map(real_t a) {
-return a > 0.0f ? 0.0f : a;
-  }
-};
-
 template
 class LeakyReLUOp : public Operator {
  public:
@@ -98,28 +93,51 @@ class LeakyReLUOp : public Operator {
 Tensor data;
 Tensor out;
 Tensor mask;
-Tensor weight;
 int n = in_data[leakyrelu::kData].shape_[0];
 int k = in_data[leakyrelu::kData].shape_[1];
 Shape<3> dshape = Shape3(n, k, in_data[leakyrelu::kData].Size()/n/k);
 data = in_data[leakyrelu::kData].get_with_shape(dshape, s);
 out = out_data[leakyrelu::kOut].get_with_shape(dshape, s);
+if (req[leakyrelu::kOut] == kNullOp) {
+  return;
+}
 switch (param_.act_type) {
   case leakyrelu::kLeakyReLU: {
 MXNET_ASSIGN_REQ_SWITCH(req[leakyrelu::kOut], Req, {
-  mxnet_op::Kernel, xpu>::Launch(
+  mxnet_op::Kernel, 
xpu>::Launch(
 s, out.size(0) * out.size(1) * out.size(2), out.dptr_, data.dptr_, 
DType(param_.slope));
 });
 break;
   }
   case leakyrelu::kPReLU: {
-weight = in_data[leakyrelu::kGamma].get(s);
-if (weight.shape_.Size() == 1) {
-  Assign(out, req[leakyrelu::kOut],
- F(data, 
mshadow::expr::broadcast_scalar(weight, out.shape_)));
+TShape gshape = expand_shape(in_data[leakyrelu::kGamma].shape_,
+ in_data[leakyrelu::kData].shape_);
+TShape new_lshape, new_rshape, new_oshape;
+const int ndim = 
op::BinaryBroadcastShapeCompact(in_data[leakyrelu::kData].shape_,
+ gshape,
+ 
out_data[leakyrelu::kOut].shape_,
+ &new_lshape, 
&new_rshape, &new_oshape);
+if (!ndim) {
+  MXNET_ASSIGN_REQ_SWITCH(req[leakyrelu::kOut], Req, {
+const size_t size = (minthree(out_data[leakyrelu::kOut].Size(),
+  in_data[leakyrelu::kData].Size(),
+  in_data[leakyrelu::kGamma].Size())
++ DataType::kLanes - 1) / DataType::kLanes;
+mxnet_op::Kernel, 
xpu>::Launch(
+s, size, out_data[leakyrelu::kOut].dptr(),
+in_data[leakyrelu::kData].dptr(), 
in_data[leakyrelu::kGamma].dptr());
+  });
 } else {
-  Assign(out, req[leakyrelu::kOut],
- F(data, mshadow::expr::broadcast<1>(weight, 
out.shape_)));
+  BROADCAST_NDIM_SWITCH(ndim, NDim, {
+mshadow::Shape oshape = new_oshape.get();
+mshadow::Shape lstride = 
mxnet_op::calc_stride(new_lshape.get());
+mshadow::Shape rstride = 
mxnet_op::calc_stride(new_rshape.get());
+mxnet_op::Kernel, xpu>::
+template LaunchEx(s, new_oshape.Size(), req[leakyrelu::kOut], 
lstride, rstride, oshape,
+in_data[leakyrelu::kData].dptr(), 
in_data[leakyrelu::kGamma].dptr(),
+out_data[leakyrelu::kOut].dptr());
+  });
 }
 break;
   }
@@ -134,23 +152,23 @@ class LeakyReLUOp : public Operator {
   Tensor out = mask.FlatTo1D();
   sampler.Sample(low, high, out, pgen, s);
   MXNET_ASSIGN_REQ_SWITCH(req[leakyrelu::kMask], Req, {
-mxnet_op::Kernel, xpu>::Launch(
+mxnet_op::Kernel, 
xpu>::Launch(
   s, mask.size(0) * mask.size(1) * mask.size(2), mask.dptr_, 
mask.dptr_,
   DType(param_.upper_bound - param_.

[incubator-mxnet] branch master updated: Improve data transform for gluon data loader (#11183)

2018-06-14 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new b819fd9  Improve data transform for gluon data loader (#11183)
b819fd9 is described below

commit b819fd9dc2435a582831ffad3b1668e58664ee5d
Author: Tong He 
AuthorDate: Thu Jun 14 14:50:39 2018 -0700

Improve data transform for gluon data loader (#11183)

* improve transforms.Resize

* fix

* Trigger CI

* Trigger CI

* improve

* Trigger CI

* Trigger CI

* fix unittest

* keep_ratio is false by default, to keep consistency
---
 python/mxnet/gluon/data/vision/transforms.py| 31 +++--
 tests/python/unittest/test_gluon_data_vision.py | 25 ++--
 2 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/python/mxnet/gluon/data/vision/transforms.py 
b/python/mxnet/gluon/data/vision/transforms.py
index 7ec1c32..2e35a40 100644
--- a/python/mxnet/gluon/data/vision/transforms.py
+++ b/python/mxnet/gluon/data/vision/transforms.py
@@ -196,7 +196,7 @@ class RandomResizedCrop(Block):
 - **out**: output tensor with (H x W x C) shape.
 """
 def __init__(self, size, scale=(0.08, 1.0), ratio=(3.0/4.0, 4.0/3.0),
- interpolation=2):
+ interpolation=1):
 super(RandomResizedCrop, self).__init__()
 if isinstance(size, numeric_types):
 size = (size, size)
@@ -233,7 +233,7 @@ class CenterCrop(Block):
 >>> transformer(image)
 
 """
-def __init__(self, size, interpolation=2):
+def __init__(self, size, interpolation=1):
 super(CenterCrop, self).__init__()
 if isinstance(size, numeric_types):
 size = (size, size)
@@ -250,6 +250,9 @@ class Resize(Block):
 --
 size : int or tuple of (W, H)
 Size of output image.
+keep_ratio : bool
+Whether to resize the short edge or both edges to `size`,
+if size is give as an integer.
 interpolation : int
 Interpolation method for resizing. By default uses bilinear
 interpolation. See OpenCV's resize function for available choices.
@@ -268,14 +271,28 @@ class Resize(Block):
 >>> transformer(image)
 
 """
-def __init__(self, size, interpolation=2):
+def __init__(self, size, keep_ratio=False, interpolation=1):
 super(Resize, self).__init__()
-if isinstance(size, numeric_types):
-size = (size, size)
-self._args = tuple(size) + (interpolation,)
+self._keep = keep_ratio
+self._size = size
+self._interpolation = interpolation
 
 def forward(self, x):
-return image.imresize(x, *self._args)
+if isinstance(self._size, numeric_types):
+if not self._keep:
+wsize = self._size
+hsize = self._size
+else:
+h, w, _ = x.shape
+if h > w:
+wsize = self._size
+hsize = int(h * wsize / w)
+else:
+hsize = self._size
+wsize = int(w * hsize / h)
+else:
+wsize, hsize = self._size
+return image.imresize(x, wsize, hsize, self._interpolation)
 
 
 class RandomFlipLeftRight(HybridBlock):
diff --git a/tests/python/unittest/test_gluon_data_vision.py 
b/tests/python/unittest/test_gluon_data_vision.py
index a15a7e9..2ff9c5c 100644
--- a/tests/python/unittest/test_gluon_data_vision.py
+++ b/tests/python/unittest/test_gluon_data_vision.py
@@ -66,18 +66,19 @@ def test_transformer():
 from mxnet.gluon.data.vision import transforms
 
 transform = transforms.Compose([
-   transforms.Resize(300),
-   transforms.CenterCrop(256),
-   transforms.RandomResizedCrop(224),
-   transforms.RandomFlipLeftRight(),
-   transforms.RandomColorJitter(0.1, 0.1, 0.1, 0.1),
-   transforms.RandomBrightness(0.1),
-   transforms.RandomContrast(0.1),
-   transforms.RandomSaturation(0.1),
-   transforms.RandomHue(0.1),
-   transforms.RandomLighting(0.1),
-   transforms.ToTensor(),
-   transforms.Normalize([0, 0, 0], [1, 1, 1])])
+transforms.Resize(300),
+transforms.Resize(300, keep_ratio=True),
+transforms.CenterCrop(256),
+transforms.RandomResizedCrop(224),
+transforms.RandomFlipLeftRight(),
+transforms.RandomColorJitter(0.1, 0.1, 0.1, 0.1),
+transforms.RandomBrightness(0.1),
+transforms.RandomContrast(0.1),
+transforms.RandomSaturation(0.1),
+transforms.RandomHue(0.1),
+transforms.RandomLight

[incubator-mxnet] branch master updated: Enable shared ccache and fix test (#11269)

2018-06-14 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 2eadfa2  Enable shared ccache and fix test (#11269)
2eadfa2 is described below

commit 2eadfa29c5cdb39bf30af28fb3265b5243984d91
Author: Marco de Abreu 
AuthorDate: Thu Jun 14 14:46:02 2018 -0700

Enable shared ccache and fix test (#11269)

* Enable shared ccache

* Increase cache size

* Don't fail if EFS ccache is not available
---
 ci/build.py | 12 
 ci/test_docker_cache.py |  3 +++
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/ci/build.py b/ci/build.py
index 35f8b47..b6821b3 100755
--- a/ci/build.py
+++ b/ci/build.py
@@ -40,7 +40,7 @@ from itertools import chain
 from subprocess import call, check_call
 from typing import *
 
-CCACHE_MAXSIZE = '10G'
+CCACHE_MAXSIZE = '500G'
 
 def get_platforms(path: Optional[str] = "docker"):
 """Get a list of architectures given our dockerfiles"""
@@ -124,9 +124,12 @@ def buildir() -> str:
 def default_ccache_dir() -> str:
 # Share ccache across containers
 if 'CCACHE_DIR' in os.environ:
-ccache_dir = os.path.realpath(os.environ['CCACHE_DIR'])
-os.makedirs(ccache_dir, exist_ok=True)
-return ccache_dir
+try:
+ccache_dir = os.path.realpath(os.environ['CCACHE_DIR'])
+os.makedirs(ccache_dir, exist_ok=True)
+return ccache_dir
+except PermissionError:
+logging.info('Unable to make dirs at %s, falling back to local 
temp dir', ccache_dir)
 # In osx tmpdir is not mountable by default
 if platform.system() == 'Darwin':
 ccache_dir = "/tmp/_mxnet_ccache"
@@ -157,6 +160,7 @@ def container_run(platform: str,
'-v', "{}:/work/ccache".format(local_ccache_dir),
'-u', '{}:{}'.format(os.getuid(), os.getgid()),
'-e', 'CCACHE_MAXSIZE={}'.format(CCACHE_MAXSIZE),
+   '-e', 'CCACHE_TEMPDIR=/tmp/ccache',  # temp dir should be local 
and not shared
'-e', "CCACHE_DIR=/work/ccache",  # this path is inside the 
container as /work/ccache is mounted
tag]
 runlist.extend(command)
diff --git a/ci/test_docker_cache.py b/ci/test_docker_cache.py
index fa8833f..3f471db 100644
--- a/ci/test_docker_cache.py
+++ b/ci/test_docker_cache.py
@@ -28,6 +28,7 @@ import os
 import logging
 import subprocess
 import sys
+from unittest.mock import MagicMock
 
 sys.path.append(os.path.dirname(__file__))
 import docker_cache
@@ -87,6 +88,8 @@ class TestDockerCache(unittest.TestCase):
 base = os.path.split(os.path.realpath(__file__))[0]
 os.chdir(base)
 
+docker_cache._login_dockerhub = MagicMock()  # Override login
+
 # Stop in case previous execution was dirty
 try:
 self._stop_local_docker_registry()

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: [MXNET-546] Add unit test for MKLDNNSum (#11272)

2018-06-14 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new a40055e  [MXNET-546] Add unit test for MKLDNNSum (#11272)
a40055e is described below

commit a40055eeb98326120642039e29732d2ca39a7610
Author: Alexander Zai 
AuthorDate: Thu Jun 14 10:26:46 2018 -0700

[MXNET-546] Add unit test for MKLDNNSum (#11272)

* add test for mkldnnsum

* add extra comment

* fix verify

* only run if mkldnn supported

* divide size by size of type

* filter out regular arrays

* fix cond

* add verify mes

* filter views

* different outputs arrays

* move print message

* add in place test

* update copy fn

* refactor copyfrom

* use arr.copy instead of tmpmemmg

* use InitMKLDNNArray helper

* fix params

* pass correct type to copyfrom

* add print message for inplace sum

* remove copyfrom refactor

* remove redundant header

* remove extra line

* fix lint

* retrigger
---
 tests/cpp/operator/mkldnn.cc | 46 
 1 file changed, 46 insertions(+)

diff --git a/tests/cpp/operator/mkldnn.cc b/tests/cpp/operator/mkldnn.cc
index 76872d5..a7a1187 100644
--- a/tests/cpp/operator/mkldnn.cc
+++ b/tests/cpp/operator/mkldnn.cc
@@ -30,6 +30,7 @@
 #include "gtest/gtest.h"
 #include "mxnet/imperative.h"
 #include "../../src/operator/nn/mkldnn/mkldnn_base-inl.h"
+#include "../../src/operator/nn/mkldnn/mkldnn_ops-inl.h"
 
 using namespace mxnet;
 
@@ -736,4 +737,49 @@ TEST(IMPERATIVE, BinaryOp) {
   TestBinaryOp(attrs, VerifySumResult);
 }
 
+void VerifySumMemory(mkldnn::memory in_mem1, mkldnn::memory in_mem2, 
mkldnn::memory out_mem) {
+  float *in1 = static_cast(in_mem1.get_data_handle());
+  float *in2 = static_cast(in_mem2.get_data_handle());
+  float *out = static_cast(out_mem.get_data_handle());
+  for (size_t i = 0; i < in_mem1.get_primitive_desc().get_size() / 
sizeof(float); i++) {
+ASSERT_EQ(in1[i] + in2[i], out[i]);
+  }
+}
+
+TEST(MKLDNN_BASE, MKLDNNSum) {
+  std::vector in_arrs = GetTestInputArrays(InitDefaultArray);
+  TestArrayShapes tas = GetTestArrayShapes();
+  std::vector pds = tas.pds;
+
+  for (auto in_arr : in_arrs) {
+std::vector out_arrs = 
GetTestOutputArrays(in_arr.arr.shape(), pds,
+ InitDefaultArray);
+if (!SupportMKLDNN(in_arr.arr) || !in_arr.arr.IsMKLDNNData() || 
in_arr.arr.IsView())
+  continue;
+
+for (auto out_arr : out_arrs) {
+  auto in_mem1 = in_arr.arr.GetMKLDNNData();
+  auto in_mem2 = in_arr.arr.GetMKLDNNData();
+  auto out_mem = out_arr.arr.GetMKLDNNData(in_mem1->get_primitive_desc());
+  if (out_mem == nullptr)
+continue;
+  PrintVerifyMsg(in_arr, in_arr);
+  op::MKLDNNSum(*in_mem1, *in_mem2, *out_mem);
+  MKLDNNStream::Get()->Submit();
+  VerifySumMemory(*in_mem1, *in_mem2, *out_mem);
+}
+
+// in place
+auto input_mem = in_arr.arr.GetMKLDNNData();
+NDArrayAttrs orig_arr(in_arr.arr.Copy(in_arr.arr.ctx()), "In Place Copy");
+PrintVerifyMsg(orig_arr, in_arr);
+InitMKLDNNArray(&orig_arr.arr, input_mem->get_primitive_desc(), 
InitDefaultArray);
+orig_arr.arr.CopyFrom(*input_mem);
+auto old_mem = orig_arr.arr.GetMKLDNNData();
+op::MKLDNNSum(*input_mem, *input_mem, *input_mem);
+MKLDNNStream::Get()->Submit();
+VerifySumMemory(*old_mem, *old_mem, *input_mem);
+  }
+}
+
 #endif

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: add import_ for SymbolBlock (#11127)

2018-06-13 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 66ab27e  add import_ for SymbolBlock (#11127)
66ab27e is described below

commit 66ab27e67a70b1164364a8a52ebbe0def45dc327
Author: Eric Junyuan Xie 
AuthorDate: Wed Jun 13 22:01:40 2018 -0700

add import_ for SymbolBlock (#11127)

* add import_ for SymbolBlock

* fix

* Update block.py

* add save_parameters

* fix

* fix lint

* fix

* fix

* fix

* fix

* fix

* Update save_load_params.md
---
 docs/tutorials/gluon/hybrid.md|  2 +-
 docs/tutorials/gluon/naming.md|  6 +-
 docs/tutorials/gluon/save_load_params.md  | 16 +---
 example/gluon/dcgan.py|  8 +-
 example/gluon/embedding_learning/train.py |  2 +-
 example/gluon/image_classification.py |  8 +-
 example/gluon/mnist.py|  2 +-
 example/gluon/style_transfer/main.py  |  8 +-
 example/gluon/super_resolution.py |  4 +-
 example/gluon/tree_lstm/main.py   |  2 +-
 example/gluon/word_language_model/train.py|  4 +-
 python/mxnet/gluon/block.py   | 90 +--
 python/mxnet/gluon/model_zoo/vision/alexnet.py|  2 +-
 python/mxnet/gluon/model_zoo/vision/densenet.py   |  2 +-
 python/mxnet/gluon/model_zoo/vision/inception.py  |  2 +-
 python/mxnet/gluon/model_zoo/vision/mobilenet.py  |  4 +-
 python/mxnet/gluon/model_zoo/vision/resnet.py |  4 +-
 python/mxnet/gluon/model_zoo/vision/squeezenet.py |  2 +-
 python/mxnet/gluon/model_zoo/vision/vgg.py|  4 +-
 tests/python/unittest/test_gluon.py   | 54 +++---
 20 files changed, 164 insertions(+), 62 deletions(-)

diff --git a/docs/tutorials/gluon/hybrid.md b/docs/tutorials/gluon/hybrid.md
index 3554a15..5c8372a 100644
--- a/docs/tutorials/gluon/hybrid.md
+++ b/docs/tutorials/gluon/hybrid.md
@@ -117,7 +117,7 @@ x = mx.sym.var('data')
 y = net(x)
 print(y)
 y.save('model.json')
-net.save_params('model.params')
+net.save_parameters('model.params')
 ```
 
 If your network outputs more than one value, you can use `mx.sym.Group` to
diff --git a/docs/tutorials/gluon/naming.md b/docs/tutorials/gluon/naming.md
index 37b63fa..3606a03 100644
--- a/docs/tutorials/gluon/naming.md
+++ b/docs/tutorials/gluon/naming.md
@@ -203,12 +203,12 @@ except Exception as e:
 Parameter 'model1_dense0_weight' is missing in file 'model.params', which 
contains parameters: 'model0_mydense_weight', 'model0_dense1_bias', 
'model0_dense1_weight', 'model0_dense0_weight', 'model0_dense0_bias', 
'model0_mydense_bias'. Please make sure source and target networks have the 
same prefix.
 
 
-To solve this problem, we use `save_params`/`load_params` instead of 
`collect_params` and `save`/`load`. `save_params` uses model structure, instead 
of parameter name, to match parameters.
+To solve this problem, we use `save_parameters`/`load_parameters` instead of 
`collect_params` and `save`/`load`. `save_parameters` uses model structure, 
instead of parameter name, to match parameters.
 
 
 ```python
-model0.save_params('model.params')
-model1.load_params('model.params')
+model0.save_parameters('model.params')
+model1.load_parameters('model.params')
 print(mx.nd.load('model.params').keys())
 ```
 
diff --git a/docs/tutorials/gluon/save_load_params.md 
b/docs/tutorials/gluon/save_load_params.md
index cd87680..f5f4812 100644
--- a/docs/tutorials/gluon/save_load_params.md
+++ b/docs/tutorials/gluon/save_load_params.md
@@ -10,7 +10,7 @@ Parameters of any Gluon model can be saved using the 
`save_params` and `load_par
 
 **2. Save/load model parameters AND architecture**
 
-The Model architecture of `Hybrid` models stays static and don't change during 
execution. Therefore both model parameters AND architecture can be saved and 
loaded using `export`, `load_checkpoint` and `load` methods.
+The Model architecture of `Hybrid` models stays static and don't change during 
execution. Therefore both model parameters AND architecture can be saved and 
loaded using `export`, `imports` methods.
 
 Let's look at the above methods in more detail. Let's start by importing the 
modules we'll need.
 
@@ -61,7 +61,7 @@ def build_lenet(net):
 net.add(gluon.nn.Dense(512, activation="relu"))
 # Second fully connected layer with as many neurons as the number of 
classes
 net.add(gluon.nn.Dense(num_outputs))
-
+
 return net
 
 # Train a given model using

[incubator-mxnet] branch v1.2.0 updated: [MXNET-491] Use depthwise convolution by cuDNNv7 if available, updated version (#11076) (#11233)

2018-06-12 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch v1.2.0
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/v1.2.0 by this push:
 new 546a233  [MXNET-491] Use depthwise convolution by cuDNNv7 if 
available, updated version (#11076) (#11233)
546a233 is described below

commit 546a285592e8df24a0b85072ac40c558b7c2
Author: Anirudh Subramanian 
AuthorDate: Tue Jun 12 10:19:35 2018 -0700

[MXNET-491] Use depthwise convolution by cuDNNv7 if available, updated 
version (#11076) (#11233)

* Use group convolution by cuDNNv7 if available

* Fix coding style

* ident-- for #if statements

* more ident--

* more ident--

* prefer cudnnv7 depthwise convolution
---
 src/operator/nn/convolution.cu| 10 ++-
 src/operator/nn/cudnn/cudnn_convolution-inl.h | 92 +++
 2 files changed, 100 insertions(+), 2 deletions(-)

diff --git a/src/operator/nn/convolution.cu b/src/operator/nn/convolution.cu
index 045e570..65a320d 100644
--- a/src/operator/nn/convolution.cu
+++ b/src/operator/nn/convolution.cu
@@ -97,7 +97,9 @@ void ConvolutionCompute(const nnvm::NodeAttrs& attrs,
   op.Forward(ctx, inputs, req, outputs);
 })
 return;
-  } else if (param.num_filter == param.num_group &&
+  }
+#if MXNET_USE_CUDNN == 0 || CUDNN_MAJOR < 7
+  if (param.num_filter == param.num_group &&
   param.layout.value() == mshadow::kNCHW &&
   param.num_filter == inputs[conv::kData].shape_[1] &&
   param.kernel.ndim() == 2 &&
@@ -112,6 +114,7 @@ void ConvolutionCompute(const nnvm::NodeAttrs& attrs,
 op.Forward(ctx, inputs, req, outputs);
 return;
   }
+#endif
 
 #if MXNET_USE_CUDNN == 1
   // On fp16-I/O instances, use fp32 compute (i.e. pseudo-fp16).
@@ -167,7 +170,9 @@ void ConvolutionGradCompute(const nnvm::NodeAttrs& 
attrs,
   op.Backward(ctx, std::vector{out_grad}, in_data, req, in_grad);
 })
 return;
-  } else if (param.num_filter == param.num_group &&
+  }
+#if MXNET_USE_CUDNN == 0 || CUDNN_MAJOR < 7
+  if (param.num_filter == param.num_group &&
   param.layout.value() == mshadow::kNCHW &&
   param.num_filter == in_data[conv::kData].shape_[1] &&
   param.kernel.ndim() == 2 &&
@@ -183,6 +188,7 @@ void ConvolutionGradCompute(const nnvm::NodeAttrs& 
attrs,
 op.Backward(ctx, std::vector{out_grad}, in_data, req, in_grad);
 return;
   }
+#endif
 
 #if MXNET_USE_CUDNN == 1
   // On fp16-I/O instances, use fp32 compute (i.e. pseudo-fp16).
diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h 
b/src/operator/nn/cudnn/cudnn_convolution-inl.h
index ca60c99..4b1cbbe 100644
--- a/src/operator/nn/cudnn/cudnn_convolution-inl.h
+++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h
@@ -137,6 +137,35 @@ class CuDNNConvolutionOp {
 DType *wmat_ptr = GetNdPtr(in_data[conv::kWeight], param_.kernel.ndim() + 
2, s);
 DType *out_ptr = GetNdPtr(out_data[conv::kOut], param_.kernel.ndim() + 2, 
s);
 
+#if CUDNN_MAJOR >= 7
+typename DataType::ScaleType alpha = 1.0f;
+typename DataType::ScaleType beta = 0.0f;
+typename DataType::ScaleType beta_add = 1.0f;
+CUDNN_CALL(cudnnConvolutionForward(s->dnn_handle_,
+&alpha,
+in_desc_,
+data_ptr,
+filter_desc_,
+wmat_ptr,
+forward_conv_desc_,
+forward_algo_.AlgoNumber(),
+workspace.dptr_,
+workspace_size,
+req[conv::kOut] == kAddTo? &beta_add : &beta,
+out_desc_,
+  out_ptr));
+
+if (!param_.no_bias) {
+  Tensor bias = in_data[conv::kBias].get(s);
+  CUDNN_CALL(cudnnAddTensor(s->dnn_handle_,
+  &alpha,
+  bias_desc_,
+  bias.dptr_,
+  &beta_add,
+  out_desc_,
+  out_ptr));
+}
+#else
 for (uint32_t g = 0; g < param_.num_group; ++g) {
   typename DataType::ScaleType alpha = 1.0f;
   typename DataType::ScaleType beta = 0.0f;
@@ -177,6 +206,7 @@ class CuDNNConvolutionOp {
 #endif
   }
 }
+#endif  // CUDNN_MAJOR >= 7
   }
 
   void Backward(const OpContext &ctx,
@@ -202,6 +232,51 @@ class CuDNNConvolutionOp {
 GetTempSize(ctx);
 Tensor workspace = AllocateTempWorkspace(ctx, 
backward_workspace_byte_);
 size_t workspace_size = TensorSizeBytes(workspace);
+#if CUDNN_MAJOR >= 7
+typename DataType::ScaleType alpha = 1.0f;
+typename DataType::ScaleType beta = 0.0f;
+typename DataType::ScaleType beta_a

[incubator-mxnet] branch master updated: fix loading params if ignore_extra is set (#11235)

2018-06-12 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 2d761c9  fix loading params if ignore_extra is set (#11235)
2d761c9 is described below

commit 2d761c9ac722b9ba5b98687b906513658bc0add5
Author: Joshua Z. Zhang 
AuthorDate: Tue Jun 12 10:16:43 2018 -0700

fix loading params if ignore_extra is set (#11235)
---
 python/mxnet/gluon/block.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/mxnet/gluon/block.py b/python/mxnet/gluon/block.py
index f107da3..7406a5d 100644
--- a/python/mxnet/gluon/block.py
+++ b/python/mxnet/gluon/block.py
@@ -355,7 +355,8 @@ class Block(object):
 "Parameter '%s' loaded from file '%s' is not present in 
ParameterDict, " \
 "which contains parameters %s. Set ignore_extra=True to 
ignore. "%(
 name, filename, 
_brief_print_list(self._params.keys(
-params[name]._load_init(loaded[name], ctx)
+if name in params:
+params[name]._load_init(loaded[name], ctx)
 
 def register_child(self, block, name=None):
 """Registers block as a child of self. :py:class:`Block` s assigned to 
self as

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: [WIP] Gluon sparse block and sparse embedding (#11197)

2018-06-11 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 715457d  [WIP] Gluon sparse block and sparse embedding (#11197)
715457d is described below

commit 715457d94ebf8935e34dd6bd445b3ba3950fe9d4
Author: Haibin Lin 
AuthorDate: Mon Jun 11 10:43:40 2018 -0700

[WIP] Gluon sparse block and sparse embedding (#11197)

* add sparse block

* add sparse embedding

* add doc

* lint

* remove sparseblock
---
 docs/api/python/gluon/contrib.md  |  2 ++
 python/mxnet/gluon/contrib/nn/basic_layers.py | 45 +--
 tests/python/unittest/test_gluon_contrib.py   | 16 --
 3 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/docs/api/python/gluon/contrib.md b/docs/api/python/gluon/contrib.md
index bc3089f..877a294 100644
--- a/docs/api/python/gluon/contrib.md
+++ b/docs/api/python/gluon/contrib.md
@@ -35,6 +35,7 @@ In the rest of this document, we list routines provided by 
the `gluon.contrib` p
 Concurrent
 HybridConcurrent
 Identity
+SparseEmbedding
 ```
 
 ### Recurrent neural network
@@ -55,6 +56,7 @@ In the rest of this document, we list routines provided by 
the `gluon.contrib` p
 Conv1DGRUCell
 Conv2DGRUCell
 Conv3DGRUCell
+LSTMPCell
 ```
 
 ### Data
diff --git a/python/mxnet/gluon/contrib/nn/basic_layers.py 
b/python/mxnet/gluon/contrib/nn/basic_layers.py
index eccdf18..1edef14 100644
--- a/python/mxnet/gluon/contrib/nn/basic_layers.py
+++ b/python/mxnet/gluon/contrib/nn/basic_layers.py
@@ -18,10 +18,10 @@
 # coding: utf-8
 # pylint: disable= arguments-differ
 """Custom neural network layers in model_zoo."""
-__all__ = ['Concurrent', 'HybridConcurrent', 'Identity']
+__all__ = ['Concurrent', 'HybridConcurrent', 'Identity', 'SparseEmbedding']
 
 from  import nd
-from ...block import HybridBlock
+from ...block import HybridBlock, Block
 from ...nn import Sequential, HybridSequential
 
 class Concurrent(Sequential):
@@ -110,3 +110,44 @@ class Identity(HybridBlock):
 
 def hybrid_forward(self, F, x):
 return x
+
+class SparseEmbedding(Block):
+r"""Turns non-negative integers (indexes/tokens) into dense vectors
+of fixed size. eg. [4, 20] -> [[0.25, 0.1], [0.6, -0.2]]
+
+This SparseBlock is designed for distributed training with extremely large
+input dimension. Both weight and gradient w.r.t. weight are 
`RowSparseNDArray`.
+
+Parameters
+--
+input_dim : int
+Size of the vocabulary, i.e. maximum integer index + 1.
+output_dim : int
+Dimension of the dense embedding.
+dtype : str or np.dtype, default 'float32'
+Data type of output embeddings.
+weight_initializer : Initializer
+Initializer for the `embeddings` matrix.
+
+Inputs:
+- **data**: (N-1)-D tensor with shape: `(x1, x2, ..., xN-1)`.
+Output:
+- **out**: N-D tensor with shape: `(x1, x2, ..., xN-1, output_dim)`.
+"""
+def __init__(self, input_dim, output_dim, dtype='float32',
+ weight_initializer=None, **kwargs):
+super(SparseEmbedding, self).__init__(**kwargs)
+self._kwargs = {'input_dim': input_dim, 'output_dim': output_dim,
+'dtype': dtype, 'sparse_grad': True}
+self.weight = self.params.get('weight', shape=(input_dim, output_dim),
+  init=weight_initializer, dtype=dtype,
+  grad_stype='row_sparse', 
stype='row_sparse')
+
+def forward(self, x):
+weight = self.weight.row_sparse_data(x)
+return nd.Embedding(x, weight, name='fwd', **self._kwargs)
+
+def __repr__(self):
+s = '{block_name}({input_dim} -> {output_dim}, {dtype})'
+return s.format(block_name=self.__class__.__name__,
+**self._kwargs)
diff --git a/tests/python/unittest/test_gluon_contrib.py 
b/tests/python/unittest/test_gluon_contrib.py
index 729ec84..264ff1f 100644
--- a/tests/python/unittest/test_gluon_contrib.py
+++ b/tests/python/unittest/test_gluon_contrib.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 import mxnet as mx
 from mxnet.gluon import contrib
 from mxnet.gluon import nn
-from mxnet.gluon.contrib.nn import Concurrent, HybridConcurrent, Identity
+from mxnet.gluon.contrib.nn import Concurrent, HybridConcurrent, Identity, 
SparseEmbedding
 from mxnet.test_utils import almost_equal
 from common import setup_module, with_seed
 import numpy as np
@@ -185,13 +18

[incubator-mxnet] branch master updated: Removing tutorial tests (#11170)

2018-06-06 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 7511045  Removing tutorial tests (#11170)
7511045 is described below

commit 751104554046dc30f02889fe72fbab05bde0cb5c
Author: Thomas Delteil 
AuthorDate: Wed Jun 6 13:55:22 2018 -0700

Removing tutorial tests (#11170)

* removing tutorial tests

Removing tutorial tests for now until we figure out why they started 
failing so much

* extend sleep time to > 1s
---
 Jenkinsfile   | 22 --
 tests/tutorials/test_tutorials.py |  5 -
 2 files changed, 4 insertions(+), 23 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 288f9a4..28edda0 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -824,28 +824,6 @@ try {
   }
 }
   }
-},
-'tutorial tests Python 2 GPU': {
-  node('mxnetlinux-gpu') {
-ws('workspace/it-tutorials-py2') {
-  timeout(time: max_time, unit: 'MINUTES') {
-init_git()
-unpack_lib('gpu')
-docker_run('ubuntu_gpu', 'tutorialtest_ubuntu_python2_gpu', true, 
'3g')
-  }
-}
-  }
-},
-'tutorial tests Python 3 GPU': {
-  node('mxnetlinux-gpu') {
-ws('workspace/it-tutorials-py3') {
-  timeout(time: max_time, unit: 'MINUTES') {
-init_git()
-unpack_lib('gpu')
-docker_run('ubuntu_gpu', 'tutorialtest_ubuntu_python3_gpu', true, 
'3g')
-  }
-}
-  }
 }
   }
 
diff --git a/tests/tutorials/test_tutorials.py 
b/tests/tutorials/test_tutorials.py
index 4c19a8e..5070364 100644
--- a/tests/tutorials/test_tutorials.py
+++ b/tests/tutorials/test_tutorials.py
@@ -79,7 +79,10 @@ def _test_tutorial_nb(tutorial):
 os.makedirs(working_dir)
 try:
 notebook = nbformat.read(tutorial_path + '.ipynb', 
as_version=IPYTHON_VERSION)
-time.sleep(0.5) # Adding a small delay to allow time for sockets to be 
freed
+# Adding a small delay to allow time for sockets to be freed
+# stop-gap measure to battle the 1000ms linger of socket hard coded
+# in the kernel API code
+time.sleep(1.1) 
 if kernel is not None:
 eprocessor = ExecutePreprocessor(timeout=TIME_OUT, 
kernel_name=kernel)
 else:

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: [MXNET-107]Fused GRU implementation for CPU (#10311)

2018-06-06 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 069026a  [MXNET-107]Fused GRU implementation for CPU (#10311)
069026a is described below

commit 069026ab1a9924fd870a625558e000b19b9b9507
Author: Hao Li 
AuthorDate: Thu Jun 7 02:38:03 2018 +0800

[MXNET-107]Fused GRU implementation for CPU (#10311)

* Add GRU Support and Test Case

* skip the gpu test case that has nothing to do with RNN GRU

* fix robust bug for gru backward

* fix bug for unifying weight parameter

* add GRU multiple layer and bidirection support with test case

* fix test case bug

* fix test case bug

* fix bug for memory issue

* fix bug for bidirection

* rebase code and fix bug for memory corruption issue

* fix gpu compile issue

* fix bug and enable some test cases

* fix robust bug

* trigger the build to check if quantize-gpu case is covered

* trigger the build to check if MKLDNN+GPU case is covered

* disable failed gpu test case of MKLDNN_UTIL_FUNC-MemFormat because it has 
nothing to do with this PR and will recover it once the issue is passed

* skip failed test_reduce test case temporarily as it has nothing to do 
with RNN

* enable several test cases

* retrigger the build

* rebase code from lstm

* rebase code for resolve conflict

* add gru code after resolve conflict

* fix bug for resolve conflict

* add Fused GRU code with test case

* retrigger the build

* add GetRecommendedOMPThreadCount for omp

* fix conflict issue

* add gru relate code

* fix bug for code

* update code for gru

* retrigger the build

* fix code about gru condition

* enhance test case to test gradient weights and bias

* fix bug for test case

* fix bug for test case

* fix bug about dropout condition and test case

* fix bug for test case

* fix bug for test case

* retrigger the build

* rebase code

* add gru code

* fix issues about namespace, removing define and memcpy

* retrigger the build

* fix issues and add cudnn_gru_bucketing.py test case

* retrigger the build

* update cudnn_rnn_bucketing.py test case

* update cudnn_rnn_bucketing.py test case

* update cudnn_rnn_bucketing.py test case

* add check for req[kParams] and kAddTo from cudnn_rnn-inl.h

* retrigger the build

* retrigger the build

* retrigger the build

* add kNullOp check

* retrigger the build

* update kNullOp support and test case for both GRU and LSTM

* update kAddToOp support for both GRU and LSTM
---
 ...nn_lstm_bucketing.py => cudnn_rnn_bucketing.py} |  33 +-
 python/mxnet/gluon/rnn/rnn_layer.py|   2 +-
 src/operator/rnn-inl.h |  57 +-
 src/operator/rnn_impl.h| 955 -
 tests/python/unittest/test_operator.py |  63 +-
 5 files changed, 1060 insertions(+), 50 deletions(-)

diff --git a/example/rnn/bucketing/cudnn_lstm_bucketing.py 
b/example/rnn/bucketing/cudnn_rnn_bucketing.py
similarity index 87%
rename from example/rnn/bucketing/cudnn_lstm_bucketing.py
rename to example/rnn/bucketing/cudnn_rnn_bucketing.py
index 84cfc9d..29a66a8 100644
--- a/example/rnn/bucketing/cudnn_lstm_bucketing.py
+++ b/example/rnn/bucketing/cudnn_rnn_bucketing.py
@@ -65,6 +65,8 @@ parser.add_argument('--stack-rnn', default=False,
 help='stack fused RNN cells to reduce communication 
overhead')
 parser.add_argument('--dropout', type=float, default='0.0',
 help='dropout probability (1.0 - keep probability)')
+parser.add_argument('--rnntype', type=str, default='lstm',
+help='rnn type: gru and lstm are supported')
 
 #buckets = [32]
 buckets = [10, 20, 30, 40, 50, 60]
@@ -97,13 +99,13 @@ def train(args):
 cell = mx.rnn.SequentialRNNCell()
 for i in range(args.num_layers):
 cell.add(mx.rnn.FusedRNNCell(args.num_hidden, num_layers=1,
- mode='lstm', prefix='lstm_l%d'%i,
+ mode=args.rnntype, 
prefix='%s_l%d'%(args.rnntype,i),
  bidirectional=args.bidirectional))
-if args.dropout > 0 and i < args.num_layers - 1:
-cell.add(mx.rnn.DropoutCell(args.dropout, prefix='lstm_d%d'%i))
+

[incubator-mxnet] branch master updated: add input argument in warpctc layer (#11167)

2018-06-06 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new aafbd17  add input argument in warpctc layer (#11167)
aafbd17 is described below

commit aafbd17460da31a31414a501f99a3e4a2346efe9
Author: Wang Jiajun 
AuthorDate: Thu Jun 7 01:57:50 2018 +0800

add input argument in warpctc layer (#11167)
---
 plugin/warpctc/warpctc.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/plugin/warpctc/warpctc.cc b/plugin/warpctc/warpctc.cc
index 055a6d6..aac36a3 100644
--- a/plugin/warpctc/warpctc.cc
+++ b/plugin/warpctc/warpctc.cc
@@ -41,6 +41,8 @@ Operator *WarpCTCProp::CreateOperator(Context ctx) const {
 DMLC_REGISTER_PARAMETER(WarpCTCParam);
 
 MXNET_REGISTER_OP_PROPERTY(WarpCTC, WarpCTCProp)
+.add_argument("data", "NDArray-or-Symbol", "Input data.")
+.add_argument("label", "NDArray-or-Symbol", "Input label.")
 .describe("warp ctc.")
 .add_arguments(WarpCTCParam::__FIELDS__());
 

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: fix shared_storage free (#11159)

2018-06-05 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 318c689  fix shared_storage free (#11159)
318c689 is described below

commit 318c6899031d024acb28a4937678b85f141f98d8
Author: Joshua Z. Zhang 
AuthorDate: Tue Jun 5 22:57:51 2018 -0700

fix shared_storage free (#11159)

* fix shared_storage free

* fix bracket

* make local ref

* cpplint

* fix tests

* fix tests
---
 python/mxnet/gluon/data/dataloader.py|  2 ++
 src/storage/cpu_shared_storage_manager.h | 10 --
 tests/python/unittest/test_gluon_data.py | 18 ++
 tests/python/unittest/test_ndarray.py|  1 -
 4 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/python/mxnet/gluon/data/dataloader.py 
b/python/mxnet/gluon/data/dataloader.py
index 151b49d..29b9b81 100644
--- a/python/mxnet/gluon/data/dataloader.py
+++ b/python/mxnet/gluon/data/dataloader.py
@@ -57,6 +57,8 @@ else:
 
 def reduce_ndarray(data):
 """Reduce ndarray to shared memory handle"""
+# keep a local ref before duplicating fd
+data = data.as_in_context(context.Context('cpu_shared', 0))
 pid, fd, shape, dtype = data._to_shared_mem()
 if sys.version_info[0] == 2:
 fd = multiprocessing.reduction.reduce_handle(fd)
diff --git a/src/storage/cpu_shared_storage_manager.h 
b/src/storage/cpu_shared_storage_manager.h
index 85c6a35..a52d779 100644
--- a/src/storage/cpu_shared_storage_manager.h
+++ b/src/storage/cpu_shared_storage_manager.h
@@ -174,8 +174,12 @@ void CPUSharedStorageManager::Alloc(Storage::Handle* 
handle) {
   }
 
   if (fid == -1) {
-LOG(FATAL) << "Failed to open shared memory. shm_open failed with error "
-   << strerror(errno);
+if (is_new) {
+  LOG(FATAL) << "Failed to open shared memory. shm_open failed with error "
+ << strerror(errno);
+} else {
+  LOG(FATAL) << "Invalid file descriptor from shared array.";
+}
   }
 
   if (is_new) CHECK_EQ(ftruncate(fid, size), 0);
@@ -216,9 +220,11 @@ void CPUSharedStorageManager::FreeImpl(const 
Storage::Handle& handle) {
   << strerror(errno);
 
 #ifdef __linux__
+  if (handle.shared_id != -1) {
   CHECK_EQ(close(handle.shared_id), 0)
   << "Failed to close shared memory. close failed with error "
   << strerror(errno);
+  }
 #else
   if (count == 0) {
 auto filename = SharedHandleToString(handle.shared_pid, handle.shared_id);
diff --git a/tests/python/unittest/test_gluon_data.py 
b/tests/python/unittest/test_gluon_data.py
index 93160aa..751886b 100644
--- a/tests/python/unittest/test_gluon_data.py
+++ b/tests/python/unittest/test_gluon_data.py
@@ -140,6 +140,16 @@ def test_multi_worker_forked_data_loader():
 def __len__(self):
 return 50
 
+def batchify_list(self, data):
+"""
+return list of ndarray without stack/concat/pad
+"""
+if isinstance(data, (tuple, list)):
+return list(data)
+if isinstance(data, mx.nd.NDArray):
+return [data]
+return data
+
 def batchify(self, data):
 """
 Collate data into batch. Use shared memory for stacking.
@@ -194,6 +204,14 @@ def test_multi_worker_forked_data_loader():
 print(data)
 print('{}:{}'.format(epoch, i))
 
+data = Dummy(True)
+loader = DataLoader(data, batch_size=40, 
batchify_fn=data.batchify_list, num_workers=2)
+for epoch in range(1):
+for i, data in enumerate(loader):
+if i % 100 == 0:
+print(data)
+print('{}:{}'.format(epoch, i))
+
 if __name__ == '__main__':
 import nose
 nose.runmodule()
diff --git a/tests/python/unittest/test_ndarray.py 
b/tests/python/unittest/test_ndarray.py
index 496f80f..a060465 100644
--- a/tests/python/unittest/test_ndarray.py
+++ b/tests/python/unittest/test_ndarray.py
@@ -1304,7 +1304,6 @@ def test_norm(ctx=default_context()):
 assert arr1.shape == arr2.shape
 mx.test_utils.assert_almost_equal(arr1, arr2.asnumpy())
 
-
 if __name__ == '__main__':
 import nose
 nose.runmodule()

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated (b93f90f -> 21997a2)

2018-06-04 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git.


from b93f90f  fix doc of sym.RNN (#11135)
 add 21997a2  allow int shape in parameter (#11104)

No new revisions were added by this update.

Summary of changes:
 python/mxnet/gluon/parameter.py | 4 +++-
 tests/python/unittest/test_gluon.py | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: [MXNET-502] Fixing broken feature_extract cpp example (#11114)

2018-06-02 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 39b6bba  [MXNET-502] Fixing broken feature_extract cpp example (#4)
39b6bba is described below

commit 39b6bbab5e1cabfb22b275ab6c6ece0dda25b2f8
Author: Thomas Delteil 
AuthorDate: Sat Jun 2 14:41:33 2018 -0700

[MXNET-502] Fixing broken feature_extract cpp example (#4)

* Fix cpp example

* Fix Makefile

* Fix the openCV prep file

* Update run.sh

* Update run.sh

* Update README.md

* update the files

* Update run.sh

* Trigger build

* Trigger build

* Trigger build

* Trigger build

* Trigger build
---
 cpp-package/example/feature_extract/Makefile   | 10 --
 cpp-package/example/feature_extract/README.md  | 10 +++---
 .../example/feature_extract/feature_extract.cpp|  8 
 .../feature_extract/prepare_data_with_opencv.cpp   |  4 ++--
 cpp-package/example/feature_extract/run.sh | 22 +-
 5 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/cpp-package/example/feature_extract/Makefile 
b/cpp-package/example/feature_extract/Makefile
index cc76d05..f598183 100644
--- a/cpp-package/example/feature_extract/Makefile
+++ b/cpp-package/example/feature_extract/Makefile
@@ -16,15 +16,13 @@
 # under the License.
 
 CXX=g++
-BLAS=-L /opt/openblas/lib -lopenblas -DMSHADOW_USE_CBLAS=1 -DMSHADOW_USE_MKL=0 
+BLAS=-L /opt/openblas/lib -lopenblas -DMSHADOW_USE_CBLAS=1 -DMSHADOW_USE_MKL=0
 CUDA=-DMSHADOW_USE_CUDA=1
 OPENCV_CFLAGS=`pkg-config --cflags opencv`
 OPENCV_LDFLAGS=`pkg-config --libs opencv`
 
-#COMMFLAGS=-static -static-libgcc -static-libstdc++
-
-CFLAGS=$(COMMFLAGS) -I ../../include -Wall -O3 -msse3 -funroll-loops 
-Wno-unused-parameter -Wno-unknown-pragmas -fopenmp 
-LDFLAGS=$(COMMFLAGS) -L ../../lib/linux -lmxnet $(BLAS) $(CUDA) -lgomp -pthread
+CFLAGS=$(COMMFLAGS) -I../../../3rdparty/nnvm/include 
-I../../../3rdparty/dmlc-core/include -I ../../include -I ../../../include 
-Wall -O3 -msse3 -funroll-loops -Wno-unused-parameter -Wno-unknown-pragmas 
-fopenmp
+LDFLAGS=$(COMMFLAGS) -L ../../../lib -lmxnet $(BLAS) $(CUDA) -lgomp -pthread
 
 all: feature_extract prepare_data_with_opencv
 
@@ -34,7 +32,7 @@ feature_extract: ./feature_extract.cpp
-rm -f $(basename $@).o
 
 prepare_data_with_opencv: ./prepare_data_with_opencv.cpp
-   $(CXX) -c -std=c++0x $(OPENCV_CFLAGS) $^ 
+   $(CXX) -c -std=c++0x $(OPENCV_CFLAGS) $^
$(CXX) $(basename $@).o -o $@ $(OPENCV_LDFLAGS)
-rm -f $(basename $@).o
 
diff --git a/cpp-package/example/feature_extract/README.md 
b/cpp-package/example/feature_extract/README.md
index 4367a0c..87cfcb4 100644
--- a/cpp-package/example/feature_extract/README.md
+++ b/cpp-package/example/feature_extract/README.md
@@ -1,8 +1,12 @@
 This example shows how to extract features with a pretrained model.
 
-You can first get a pretrained model from 
<https://github.com/dmlc/mxnet-model-gallery/blob/master/imagenet-1k-inception-bn.md>,
-then prepare 2 pictures 1.jpg and 2.jpg to extract by executing `run.sh`.
+Execute `run.sh` to:
+- Download a pretrained model
+- Download sample pictures (`dog.jpg` and `cat.jpg`)
+- Compile the files
+- Execute the featurization on `dog.jpg` and `cat.jpg`
+
 
 Note:
 1. The filename of network parameters may vary, line 67 in 
`feature_extract.cpp` should be updated accordingly.
-2. As the build system has changed a lot, to build this example, you need to 
put the compiled library `libmxnet.so` in `../lib/linux`.
+2. You need to build MXNet from source to get access to the `lib/libmxnet.so` 
or point `LD_LIBRARY_PATH` to where it is installed in your system
diff --git a/cpp-package/example/feature_extract/feature_extract.cpp 
b/cpp-package/example/feature_extract/feature_extract.cpp
index 1886c57..c23623e 100644
--- a/cpp-package/example/feature_extract/feature_extract.cpp
+++ b/cpp-package/example/feature_extract/feature_extract.cpp
@@ -58,13 +58,13 @@ class FeatureExtractor {
   LG< paramters;
-NDArray::Load("./model/Inception_BN-0039.params", 0, ¶mters);
+NDArray::Load("./model/Inception-BN-0126.params", 0, ¶mters);
 for (const auto &k : paramters) {
   if (k.first.substr(0, 4) == "aux:") {
 auto name = k.first.substr(4, k.first.size() - 4);
@@ -99,7 +99,7 @@ class FeatureExtractor {
 data.Slice(0, 1) -= mean_img;
 data.Slice(1, 2) -= mean_img;
 args_map["data"] = data;
-/*bind the excutor*/
+/*bind the executor*/
 executor = net.SimpleBind(global_ctx, args_map, map(),
   map(), aux_map);
 executor->Forward(false);
@@ -117,7 +117,7 @@ NDArray Data2NDArray() {

[incubator-mxnet] branch piiswrong-patch-1 updated (a9a527c -> 54ed408)

2018-06-01 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a change to branch piiswrong-patch-1
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git.


from a9a527c  Update cached_op.cc
 add 54ed408  Update cached_op.cc

No new revisions were added by this update.

Summary of changes:
 src/imperative/cached_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch piiswrong-patch-1 updated (74824e0 -> a9a527c)

2018-05-31 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a change to branch piiswrong-patch-1
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git.


from 74824e0  Update cached_op.cc
 add a9a527c  Update cached_op.cc

No new revisions were added by this update.

Summary of changes:
 src/imperative/cached_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: added ravel/unravel operators (#11025)

2018-05-30 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 5109b00  added ravel/unravel operators (#11025)
5109b00 is described below

commit 5109b00b2473fa26de036ed21775b214e09d1bbc
Author: moin 
AuthorDate: Wed May 30 20:59:16 2018 +0200

added ravel/unravel operators (#11025)
---
 docs/api/python/ndarray/ndarray.md |   2 +
 docs/api/python/symbol/symbol.md   |   2 +
 src/operator/tensor/ravel.cc   |  81 
 src/operator/tensor/ravel.cu   |  36 +++
 src/operator/tensor/ravel.h| 166 +
 tests/python/unittest/test_operator.py |  15 +++
 6 files changed, 302 insertions(+)

diff --git a/docs/api/python/ndarray/ndarray.md 
b/docs/api/python/ndarray/ndarray.md
index 5bc3c52..323344d 100644
--- a/docs/api/python/ndarray/ndarray.md
+++ b/docs/api/python/ndarray/ndarray.md
@@ -430,6 +430,8 @@ The `ndarray` package provides several classes:
 one_hot
 pick
 where
+ravel_multi_index
+unravel_index
 ```
 
 ## Mathematical functions
diff --git a/docs/api/python/symbol/symbol.md b/docs/api/python/symbol/symbol.md
index f1e90a0..cc63e13 100644
--- a/docs/api/python/symbol/symbol.md
+++ b/docs/api/python/symbol/symbol.md
@@ -291,6 +291,8 @@ Composite multiple symbols into a new one by an operator.
 Symbol.take
 Symbol.one_hot
 Symbol.pick
+Symbol.ravel_multi_index
+Symbol.unravel_index
 ```
 
 ### Get internal and output symbol
diff --git a/src/operator/tensor/ravel.cc b/src/operator/tensor/ravel.cc
new file mode 100644
index 000..94e3894
--- /dev/null
+++ b/src/operator/tensor/ravel.cc
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file ravel.cc
+ * \brief CPU-operators for ravel/unravel.
+ */
+#include "./ravel.h"
+
+namespace mxnet {
+namespace op {
+
+DMLC_REGISTER_PARAMETER(RavelParam);
+
+NNVM_REGISTER_OP(_ravel_multi_index)
+.add_alias("ravel_multi_index")
+.describe(R"code(Converts a batch of index arrays into an array of flat 
indices. The operator follows numpy conventions so a single multi index is 
given by a column of the input matrix. 
+
+Examples::
+   
+   A = [[3,6,6],[4,5,1]]
+   ravel(A, shape=(7,6)) = [22,41,37]
+
+)code" ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser)
+.set_attr("FResourceRequest", [](const NodeAttrs& attrs)
+  { return std::vector{ResourceRequest::kTempSpace}; })
+.set_attr("FListInputNames", [](const NodeAttrs& attrs)
+  { return std::vector{"data"}; } )
+.set_attr("FInferShape", RavelOpShape)
+.set_attr("FInferType", ElemwiseType<1, 1>)
+.set_attr("FCompute", RavelForward)
+.set_attr("FGradient", MakeZeroGradNodes)
+.add_argument("data", "NDArray-or-Symbol", "Batch of multi-indices")
+.add_arguments(RavelParam::__FIELDS__());
+
+NNVM_REGISTER_OP(_unravel_index)
+.add_alias("unravel_index")
+.describe(R"code(Converts an array of flat indices into a batch of index 
arrays. The operator follows numpy conventions so a single multi index is given 
by a column of the output matrix.
+
+Examples::
+
+   A = [22,41,37]
+   unravel(A, shape=(7,6)) = [[3,6,6],[4,5,1]]
+
+)code" ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser)
+.set_attr("FResourceRequest", [](const NodeAttrs& attrs)
+  { return std::vector{ResourceRequest::kTempSpace}; })
+.set_attr("FListInputNames", [](const NodeAttrs& attrs)
+  { return std::vector{"data"}; } )
+.set_attr("FInferShape", UnravelOpShape)
+.set_attr("FInferType", ElemwiseType<1, 1>)
+.set_attr("FCompute", UnravelForward)
+.set_attr("FGradient", MakeZeroGradNodes)
+.add_argument("data", "NDArray-or-Symbol", "Array of flat indices")
+.a

[incubator-mxnet] branch master updated: setup.sh and fix visualization in dqn_run_test.py (#11051)

2018-05-30 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 005f677  setup.sh and fix visualization in dqn_run_test.py (#11051)
005f677 is described below

commit 005f67759fac7bcf451e31b42c30b6c6ca24586a
Author: Pedro Larroy <928489+lar...@users.noreply.github.com>
AuthorDate: Thu May 31 03:36:29 2018 +0900

setup.sh and fix visualization in dqn_run_test.py (#11051)

fix type error: type of action needs to be int
---
 example/reinforcement-learning/dqn/README.md   | Bin 2146 -> 2230 bytes
 example/reinforcement-learning/dqn/dqn_run_test.py |   8 +---
 example/reinforcement-learning/dqn/setup.sh|   7 ++-
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/example/reinforcement-learning/dqn/README.md 
b/example/reinforcement-learning/dqn/README.md
index fd32667..4547904 100644
Binary files a/example/reinforcement-learning/dqn/README.md and 
b/example/reinforcement-learning/dqn/README.md differ
diff --git a/example/reinforcement-learning/dqn/dqn_run_test.py 
b/example/reinforcement-learning/dqn/dqn_run_test.py
old mode 100644
new mode 100755
index 2abf273..e8f36b9
--- a/example/reinforcement-learning/dqn/dqn_run_test.py
+++ b/example/reinforcement-learning/dqn/dqn_run_test.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -89,8 +91,8 @@ def calculate_avg_reward(game, qnet, test_steps=125000, 
exploartion=0.05):
 current_state = game.current_state()
 state = nd.array(current_state.reshape((1,) + 
current_state.shape),
  ctx=qnet.ctx) / float(255.0)
-action = nd.argmax_channel(
-qnet.forward(is_train=False, data=state)[0]).asscalar()
+action = int(nd.argmax_channel(
+qnet.forward(is_train=False, 
data=state)[0]).asscalar())
 else:
 action = npy_rng.randint(action_num)
 
@@ -120,7 +122,7 @@ def main():
 help='Running Context. E.g `-c gpu` or `-c gpu1` or 
`-c cpu`')
 parser.add_argument('-e', '--epoch-range', required=False, type=str, 
default='22',
 help='Epochs to run testing. E.g `-e 0,80`, `-e 
0,80,2`')
-parser.add_argument('-v', '--visualization', required=False, type=int, 
default=0,
+parser.add_argument('-v', '--visualization', action='store_true',
 help='Visualize the runs.')
 parser.add_argument('--symbol', required=False, type=str, default="nature",
 help='type of network, nature or nips')
diff --git a/example/reinforcement-learning/dqn/setup.sh 
b/example/reinforcement-learning/dqn/setup.sh
index 3fcfacb..3069fef 100755
--- a/example/reinforcement-learning/dqn/setup.sh
+++ b/example/reinforcement-learning/dqn/setup.sh
@@ -22,9 +22,14 @@ set -x
 
 pip install opencv-python
 pip install scipy
+pip install pygame
 
 # Install arcade learning environment
-sudo apt-get install libsdl1.2-dev libsdl-gfx1.2-dev libsdl-image1.2-dev cmake
+if [[ "$OSTYPE" == "linux-gnu" ]]; then
+sudo apt-get install libsdl1.2-dev libsdl-gfx1.2-dev libsdl-image1.2-dev 
cmake
+elif [[ "$OSTYPE" == "darwin"* ]]; then
+brew install sdl sdl_image sdl_mixer sdl_ttf portmidi
+fi
 git clone g...@github.com:mgbellemare/Arcade-Learning-Environment.git || true
 pushd .
 cd Arcade-Learning-Environment

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: Test/mkl dnn act (#11026)

2018-05-29 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 92286c9  Test/mkl dnn act (#11026)
92286c9 is described below

commit 92286c9106dd63d2bfd062f9abb0e53b071a46e4
Author: Alexander Zai 
AuthorDate: Tue May 29 17:48:33 2018 -0700

Test/mkl dnn act (#11026)

* add mkl act unit test

* fix operator name

* use custom ndarray init

* func missing param

* add init fn for act operator test

* remove extra white space

* fix fetch relu operator

* fix get  relu operator name

* add assert abs in verify fn

* remove unused operator

* cast blob ptr to float

* use parsed param

* use attr_parser

* fix header order

* update test fn name

* use relu fn

* add kFComputeEx dispatch

* init posneg mklarray

* fix generating rnd pos neg ints

* output arrays are rnd generated

* test that getinputarrays creates view and mkldnn arrays

* add more output types

* fix typo

* fix gettestput test

* create arrattr struct to display arr info

* refactor initarray

* print arr description in verify fn

* use long int string interpolation

* fix alias params

* iterate over dims

* print c_str

* print output info

* improve print message

* improve print

* fix new lines in output

* refactor print messages

* fix typos

* fix lint issues

* fix rebase

* pass ndarray as ptr

* store copy of ndarray in attrs obj

* fix rem inits

* fix dispatch size

* move print earlier

* use createmkldnnmem helper fun

* fix lint

* refactor if else statement

* use buffer ndarray

* fix spacing

* fix refactor

* revert sum refactor

* use fallback compute

* fix typo

* fix lint

* use fallbackcompute fn for act operator

* convert activation impl funcs to fxcompute std

* remove unused var

* move unused variable

* fix indent
---
 src/operator/nn/activation-inl.h |  44 +++---
 src/operator/nn/activation.cc|   6 +-
 src/operator/nn/mkldnn/mkldnn_act.cc |  16 ++-
 tests/cpp/operator/mkldnn.cc | 267 +++
 4 files changed, 213 insertions(+), 120 deletions(-)

diff --git a/src/operator/nn/activation-inl.h b/src/operator/nn/activation-inl.h
index a9f6dbe..e6f8915 100644
--- a/src/operator/nn/activation-inl.h
+++ b/src/operator/nn/activation-inl.h
@@ -120,59 +120,62 @@ void ActivationBackward(const OpContext &ctx, const TBlob 
&out_grad,
 }
 
 template
-void ActivationComputeImpl(const ActivationParam ¶m, const OpContext &ctx,
-   const TBlob &input, OpReqType req, const TBlob 
&output) {
+void ActivationComputeImpl(const nnvm::NodeAttrs& attrs, const OpContext &ctx,
+   const std::vector& inputs, const 
std::vector& req,
+   const std::vector& outputs) {
+  const ActivationParam& param = nnvm::get(attrs.parsed);
   switch (param.act_type) {
 case activation::kReLU:
   ActivationForward(
-  ctx, input, req, output);
+  ctx, inputs[0], req[0], outputs[0]);
   break;
 case activation::kSigmoid:
   ActivationForward(
-  ctx, input, req, output);
+  ctx, inputs[0], req[0], outputs[0]);
   break;
 case activation::kTanh:
   ActivationForward(
-  ctx, input, req, output);
+  ctx, inputs[0], req[0], outputs[0]);
   break;
 case activation::kSoftReLU:
   ActivationForward(
-  ctx, input, req, output);
+  ctx, inputs[0], req[0], outputs[0]);
   break;
 case activation::kSoftSign:
   ActivationForward(
-  ctx, input, req, output);
-  break;
+  ctx, inputs[0], req[0], outputs[0]);
+  break;
 default:
   LOG(FATAL) << "unknown activation type";
   }
 }
 
 template
-void ActivationGradComputeImpl(const ActivationParam ¶m, const OpContext 
&ctx,
-   const TBlob &out_grad, const TBlob &out_data,
-   OpReqType req, const TBlob &output) {
+void ActivationGradComputeImpl(const nnvm::NodeAttrs& attrs, const OpContext 
&ctx,
+   const std::vector& inputs, const 
std::vector& req,
+   const std::vector& outputs) {
+  const ActivationParam& param = nnvm::get(attrs.parsed);
   switch (param.act_type) {

[incubator-mxnet] branch master updated: Fixing the xml markup (#11068)

2018-05-29 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 9514a1e  Fixing the xml markup (#11068)
9514a1e is described below

commit 9514a1e39f8356f8fee6202cd86c8f20fbf301b6
Author: kpmurali <37911926+kpmur...@users.noreply.github.com>
AuthorDate: Tue May 29 17:36:35 2018 -0700

Fixing the xml markup (#11068)
---
 docs/tutorials/scala/mxnet_scala_on_intellij.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/tutorials/scala/mxnet_scala_on_intellij.md 
b/docs/tutorials/scala/mxnet_scala_on_intellij.md
index 8cdf924..2520b90 100644
--- a/docs/tutorials/scala/mxnet_scala_on_intellij.md
+++ b/docs/tutorials/scala/mxnet_scala_on_intellij.md
@@ -145,7 +145,7 @@ The project's `pom.xml` will be open for editing.
 
   - Specify project profiles and platforms in `pom.xml` by pasting the 
following content below the closing `properties` tag:
 
-```xml
+```
 
 
 osx-x86_64-cpu
@@ -172,7 +172,7 @@ The project's `pom.xml` will be open for editing.
 
   - Specify project dependencies in `pom.xml` adding the dependencies listed 
below. Place them inside the `dependencies` tag:
 
-```xml
+```
 
   
   

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: fix parameters name inconsistent for Proposal OP and Multi Proposal OP (#10242)

2018-05-29 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 33de266  fix parameters name inconsistent for Proposal OP and Multi 
Proposal OP (#10242)
33de266 is described below

commit 33de266342a210cbeaee53107bf766c3d1ceb0d5
Author: JackieWu 
AuthorDate: Wed May 30 06:38:16 2018 +0800

fix parameters name inconsistent for Proposal OP and Multi Proposal OP 
(#10242)
---
 src/operator/contrib/multi_proposal.cc | 2 +-
 src/operator/contrib/proposal.cc   | 2 +-
 tests/python/gpu/test_operator_gpu.py  | 4 ++--
 tests/python/unittest/test_operator.py | 6 +++---
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/operator/contrib/multi_proposal.cc 
b/src/operator/contrib/multi_proposal.cc
index 0c52b9b..3793f27 100644
--- a/src/operator/contrib/multi_proposal.cc
+++ b/src/operator/contrib/multi_proposal.cc
@@ -497,7 +497,7 @@ DMLC_REGISTER_PARAMETER(MultiProposalParam);
 
 MXNET_REGISTER_OP_PROPERTY(_contrib_MultiProposal, MultiProposalProp)
 .describe("Generate region proposals via RPN")
-.add_argument("cls_score", "NDArray-or-Symbol", "Score of how likely proposal 
is object.")
+.add_argument("cls_prob", "NDArray-or-Symbol", "Score of how likely proposal 
is object.")
 .add_argument("bbox_pred", "NDArray-or-Symbol", "BBox Predicted deltas from 
anchors for proposals")
 .add_argument("im_info", "NDArray-or-Symbol", "Image size and scale.")
 .add_arguments(MultiProposalParam::__FIELDS__());
diff --git a/src/operator/contrib/proposal.cc b/src/operator/contrib/proposal.cc
index fa28c26..c582fb0 100644
--- a/src/operator/contrib/proposal.cc
+++ b/src/operator/contrib/proposal.cc
@@ -459,7 +459,7 @@ DMLC_REGISTER_PARAMETER(ProposalParam);
 
 MXNET_REGISTER_OP_PROPERTY(_contrib_Proposal, ProposalProp)
 .describe("Generate region proposals via RPN")
-.add_argument("cls_score", "NDArray-or-Symbol", "Score of how likely proposal 
is object.")
+.add_argument("cls_prob", "NDArray-or-Symbol", "Score of how likely proposal 
is object.")
 .add_argument("bbox_pred", "NDArray-or-Symbol", "BBox Predicted deltas from 
anchors for proposals")
 .add_argument("im_info", "NDArray-or-Symbol", "Image size and scale.")
 .add_arguments(ProposalParam::__FIELDS__());
diff --git a/tests/python/gpu/test_operator_gpu.py 
b/tests/python/gpu/test_operator_gpu.py
index d5e0262..7c3d670 100644
--- a/tests/python/gpu/test_operator_gpu.py
+++ b/tests/python/gpu/test_operator_gpu.py
@@ -1748,7 +1748,7 @@ def test_multi_proposal_op():
 '''
 cls_prob, bbox_pred, im_info = get_new_data(batch_size, mx.cpu(0))
 rois_cpu, score_cpu = op(
-cls_score = cls_prob,
+cls_prob = cls_prob,
 bbox_pred = bbox_pred,
 im_info = im_info,
 feature_stride = feature_stride,
@@ -1767,7 +1767,7 @@ def test_multi_proposal_op():
 im_info_gpu = im_info.as_in_context(gpu_ctx)
 
 rois_gpu, score_gpu = op(
-cls_score = cls_prob_gpu,
+cls_prob = cls_prob_gpu,
 bbox_pred = bbox_pred_gpu,
 im_info = im_info_gpu,
 feature_stride = feature_stride,
diff --git a/tests/python/unittest/test_operator.py 
b/tests/python/unittest/test_operator.py
index 923a453..69516bc 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -5818,7 +5818,7 @@ def test_multi_proposal_op():
 rpn_min_size = 16
 
 batch_size = 20
-feat_len = 14
+feat_len = (1000 + 15) // 16
 H, W = feat_len, feat_len
 num_anchors = len(scales) * len(ratios)
 count_anchors = H * W * num_anchors
@@ -5852,7 +5852,7 @@ def test_multi_proposal_op():
 single_score = []
 for i in range(batch_size):
 rois, score = mx.nd.contrib.Proposal(
-cls_score = get_sub(cls_prob, i),
+cls_prob = get_sub(cls_prob, i),
 bbox_pred = get_sub(bbox_pred, i),
 im_info = get_sub(im_info, i),
 feature_stride = feature_stride,
@@ -5866,7 +5866,7 @@ def test_multi_proposal_op():
 single_score.append(score)
 
 multi_proposal, multi_score = mx.nd.contrib.MultiProposal(
-cls_score = cls_prob,
+cls_prob = cls_prob,
 bbox_pred = bbox_pred,
 im_info = im_info,
 feature_stride = feature_stride,

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: [MXNET-491] Use depthwise convolution by cuDNNv7 if available, updated version (#11076)

2018-05-29 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 805a71a  [MXNET-491] Use depthwise convolution by cuDNNv7 if 
available, updated version (#11076)
805a71a is described below

commit 805a71acf46e60e6c1f289d3d0c80cefaf1b
Author: nihui 
AuthorDate: Wed May 30 02:35:45 2018 +0800

[MXNET-491] Use depthwise convolution by cuDNNv7 if available, updated 
version (#11076)

* Use group convolution by cuDNNv7 if available

* Fix coding style

* ident-- for #if statements

* more ident--

* more ident--

* prefer cudnnv7 depthwise convolution
---
 src/operator/nn/convolution.cu| 10 ++-
 src/operator/nn/cudnn/cudnn_convolution-inl.h | 92 +++
 2 files changed, 100 insertions(+), 2 deletions(-)

diff --git a/src/operator/nn/convolution.cu b/src/operator/nn/convolution.cu
index 045e570..65a320d 100644
--- a/src/operator/nn/convolution.cu
+++ b/src/operator/nn/convolution.cu
@@ -97,7 +97,9 @@ void ConvolutionCompute(const nnvm::NodeAttrs& attrs,
   op.Forward(ctx, inputs, req, outputs);
 })
 return;
-  } else if (param.num_filter == param.num_group &&
+  }
+#if MXNET_USE_CUDNN == 0 || CUDNN_MAJOR < 7
+  if (param.num_filter == param.num_group &&
   param.layout.value() == mshadow::kNCHW &&
   param.num_filter == inputs[conv::kData].shape_[1] &&
   param.kernel.ndim() == 2 &&
@@ -112,6 +114,7 @@ void ConvolutionCompute(const nnvm::NodeAttrs& attrs,
 op.Forward(ctx, inputs, req, outputs);
 return;
   }
+#endif
 
 #if MXNET_USE_CUDNN == 1
   // On fp16-I/O instances, use fp32 compute (i.e. pseudo-fp16).
@@ -167,7 +170,9 @@ void ConvolutionGradCompute(const nnvm::NodeAttrs& 
attrs,
   op.Backward(ctx, std::vector{out_grad}, in_data, req, in_grad);
 })
 return;
-  } else if (param.num_filter == param.num_group &&
+  }
+#if MXNET_USE_CUDNN == 0 || CUDNN_MAJOR < 7
+  if (param.num_filter == param.num_group &&
   param.layout.value() == mshadow::kNCHW &&
   param.num_filter == in_data[conv::kData].shape_[1] &&
   param.kernel.ndim() == 2 &&
@@ -183,6 +188,7 @@ void ConvolutionGradCompute(const nnvm::NodeAttrs& 
attrs,
 op.Backward(ctx, std::vector{out_grad}, in_data, req, in_grad);
 return;
   }
+#endif
 
 #if MXNET_USE_CUDNN == 1
   // On fp16-I/O instances, use fp32 compute (i.e. pseudo-fp16).
diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h 
b/src/operator/nn/cudnn/cudnn_convolution-inl.h
index ca60c99..4b1cbbe 100644
--- a/src/operator/nn/cudnn/cudnn_convolution-inl.h
+++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h
@@ -137,6 +137,35 @@ class CuDNNConvolutionOp {
 DType *wmat_ptr = GetNdPtr(in_data[conv::kWeight], param_.kernel.ndim() + 
2, s);
 DType *out_ptr = GetNdPtr(out_data[conv::kOut], param_.kernel.ndim() + 2, 
s);
 
+#if CUDNN_MAJOR >= 7
+typename DataType::ScaleType alpha = 1.0f;
+typename DataType::ScaleType beta = 0.0f;
+typename DataType::ScaleType beta_add = 1.0f;
+CUDNN_CALL(cudnnConvolutionForward(s->dnn_handle_,
+&alpha,
+in_desc_,
+data_ptr,
+filter_desc_,
+wmat_ptr,
+forward_conv_desc_,
+forward_algo_.AlgoNumber(),
+workspace.dptr_,
+workspace_size,
+req[conv::kOut] == kAddTo? &beta_add : &beta,
+out_desc_,
+  out_ptr));
+
+if (!param_.no_bias) {
+  Tensor bias = in_data[conv::kBias].get(s);
+  CUDNN_CALL(cudnnAddTensor(s->dnn_handle_,
+  &alpha,
+  bias_desc_,
+  bias.dptr_,
+  &beta_add,
+  out_desc_,
+  out_ptr));
+}
+#else
 for (uint32_t g = 0; g < param_.num_group; ++g) {
   typename DataType::ScaleType alpha = 1.0f;
   typename DataType::ScaleType beta = 0.0f;
@@ -177,6 +206,7 @@ class CuDNNConvolutionOp {
 #endif
   }
 }
+#endif  // CUDNN_MAJOR >= 7
   }
 
   void Backward(const OpContext &ctx,
@@ -202,6 +232,51 @@ class CuDNNConvolutionOp {
 GetTempSize(ctx);
 Tensor workspace = AllocateTempWorkspace(ctx, 
backward_workspace_byte_);
 size_t workspace_size = TensorSizeBytes(workspace);
+#if CUDNN_MAJOR >= 7
+typename DataType::ScaleType alpha = 1.0f;
+typename DataType::ScaleType beta = 0.0f;
+typename DataType::ScaleType beta_add = 1.0f;
+if (!param_.no_bias &&am

[incubator-mxnet] branch master updated: Support for axis parameter in linalg.gemm (#10864)

2018-05-29 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 4ac76c8  Support for axis parameter in linalg.gemm (#10864)
4ac76c8 is described below

commit 4ac76c89da6d4d8feef629949dc0f9534b216e3d
Author: moin 
AuthorDate: Tue May 29 20:15:11 2018 +0200

Support for axis parameter in linalg.gemm (#10864)
---
 src/operator/linalg.h  |   7 +
 src/operator/linalg_impl.h | 276 ++---
 src/operator/tensor/la_op.cc   |  39 -
 src/operator/tensor/la_op.cu   |   8 +-
 src/operator/tensor/la_op.h| 206 +---
 src/operator/tensor/la_op_inline.h |  66 
 tests/python/unittest/test_operator.py |  33 +++-
 7 files changed, 447 insertions(+), 188 deletions(-)

diff --git a/src/operator/linalg.h b/src/operator/linalg.h
index aee67d7..dc59400 100644
--- a/src/operator/linalg.h
+++ b/src/operator/linalg.h
@@ -64,6 +64,13 @@ void linalg_batch_gemm(const Tensor& A, const 
Tensor& C, DType alpha, DType beta,
bool tA, bool tB, Stream *s = 0);
 
+// Version of batch gemmm where rows are indexed at axis 1 and columns at axis 
3.
+template
+void linalg_batch_gemm(const Tensor& A, const Tensor& B,
+   const Tensor& C, DType alpha, DType beta,
+   bool tA, bool tB, Stream *s = 0);
+
+
 template
 inline void linalg_gemm(const Tensor& A,
 const Tensor& B,
diff --git a/src/operator/linalg_impl.h b/src/operator/linalg_impl.h
index 151db60..08d2add 100644
--- a/src/operator/linalg_impl.h
+++ b/src/operator/linalg_impl.h
@@ -56,6 +56,11 @@ inline void check_gemm(const Tensor& A, const 
Tensor
+void linalg_gemm_axis(const Tensor& A, const Tensor& B,
+  const Tensor& C, DType alpha, DType beta,
+  bool tA, bool tB, Stream *s = 0);
+
 #if (MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1)
 
 #define LINALG_CPU_GEMM(fname, DType) \
@@ -80,6 +85,38 @@ void linalg_batch_gemm(const Tensor& A, const Tensor<
   } \
 }
 
+// Batched gemm where the batch coordinate is given by the second axis.
+#define LINALG_CPU_GEMM_AXIS(fname, DType) \
+template<> inline \
+void linalg_gemm_axis(const Tensor& A, const 
Tensor& B, \
+  const Tensor& C, DType alpha, 
DType beta, \
+  bool tA, bool tB, Stream *s) { \
+  linalg_check_batch_size(A.size(1), B.size(1), C.size(1)); \
+  for (index_t i = 0; i < A.size(1); ++i) { \
+ cblas_##fname(CblasRowMajor, (tA ? CblasTrans : CblasNoTrans), \
+   (tB ? CblasTrans : CblasNoTrans), \
+   C.size(0), C.size(2), (tA ? A.size(0) : A.size(2)), alpha, \
+   A.dptr_+i*A.stride_, A.size(1)*A.stride_, \
+   B.dptr_+i*B.stride_, B.size(1)*B.stride_, beta, \
+   C.dptr_+i*C.stride_, C.size(1)*C.stride_); \
+  } \
+}
+
+LINALG_CPU_GEMM_AXIS(sgemm, float)
+LINALG_CPU_GEMM_AXIS(dgemm, double)
+
+// Version where matrix rows are given by the second axis.
+#define LINALG_XPU_BATCH_GEMM_AXIS(xpu, DType) \
+template<> inline \
+void linalg_batch_gemm(const Tensor& A, const 
Tensor& B, \
+   const Tensor& C, DType 
alpha, DType beta, \
+   bool tA, bool tB, Stream *s) { \
+  linalg_check_batch_size(A.size(0), B.size(0), C.size(0)); \
+  for (index_t i = 0; i < A.size(0); ++i) { \
+linalg_gemm_axis(A[i], B[i], C[i], alpha, beta, tA, tB, s); \
+  } \
+}
+
 #else
 
 #define LINALG_CPU_GEMM(fname, DType) \
@@ -98,6 +135,14 @@ void linalg_batch_gemm(const Tensor& A, const Tensor<
   LOG(FATAL) << "linalg_batch_gemm not implemented by mxnet for cpu, needs 
cblas!"; \
 }
 
+#define LINALG_XPU_BATCH_GEMM_AXIS(xpu, DType) \
+template<> inline \
+void linalg_batch_gemm(const Tensor& A, const 
Tensor& B, \
+   const Tensor& C, DType 
alpha, DType beta, \
+   bool tA, bool tB, Stream *s) { \
+  LOG(FATAL) << "linalg_batch_gemm not implemented by mxnet for cpu, needs 
cblas!"; \
+}
+
 #endif  // MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1
 
 LINALG_CPU_GEMM(sgemm, float)
@@ -106,6 +151,9 @@ LINALG_CPU_GEMM(dgemm, double)
 LINALG_XPU_BATCH_GEMM(cpu, float)
 LINALG_XPU_BATCH_GEMM(cpu, double)
 
+LINALG_XPU_BATCH_GEMM_AXIS(cpu, float)
+LINALG_XPU_BATCH_GEMM_AXIS(cpu, double)
+
 // Specialization of linalg_gemm for DType=mshadow::half::half_t.
 template<> inline
 void linalg_gemm(const Tensor& A,
@@ -140,6 +188,28 @@ void linalg_gemm(const Tensor&

[incubator-mxnet] branch master updated: Update conv_layers.py documentation (#11082)

2018-05-29 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 9ab0d2b  Update conv_layers.py documentation (#11082)
9ab0d2b is described below

commit 9ab0d2bebea7f1b65e395f427bd3388c568de934
Author: Thomas Delteil 
AuthorDate: Tue May 29 10:26:39 2018 -0700

Update conv_layers.py documentation (#11082)

* Update conv_layers.py

* Update conv_layers.py

* Update conv_layers.py
---
 python/mxnet/gluon/nn/conv_layers.py | 36 
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/python/mxnet/gluon/nn/conv_layers.py 
b/python/mxnet/gluon/nn/conv_layers.py
index 7b4a6be..2fbf7d8 100644
--- a/python/mxnet/gluon/nn/conv_layers.py
+++ b/python/mxnet/gluon/nn/conv_layers.py
@@ -420,15 +420,19 @@ class Conv1DTranspose(_Conv):
 channels : int
 The dimensionality of the output space, i.e. the number of output
 channels (filters) in the convolution.
-kernel_size :int or tuple/list of 3 int
+kernel_size :int or tuple/list of 1 int
 Specifies the dimensions of the convolution window.
-strides : int or tuple/list of 3 int,
+strides : int or tuple/list of 1 int
 Specify the strides of the convolution.
-padding : int or a tuple/list of 3 int,
+padding : int or a tuple/list of 1 int,
 If padding is non-zero, then the input is implicitly zero-padded
 on both sides for padding number of points
-dilation : int or tuple/list of 3 int
-Specifies the dilation rate to use for dilated convolution.
+output_padding: int or a tuple/list of 1 int
+Controls the amount of implicit zero-paddings on both sides of the
+output for output_padding number of points for each dimension.
+dilation : int or tuple/list of 1 int
+Controls the spacing between the kernel points; also known as the
+a trous algorithm
 groups : int
 Controls the connections between inputs and outputs.
 At groups=1, all inputs are convolved to all outputs.
@@ -503,15 +507,19 @@ class Conv2DTranspose(_Conv):
 channels : int
 The dimensionality of the output space, i.e. the number of output
 channels (filters) in the convolution.
-kernel_size :int or tuple/list of 3 int
+kernel_size :int or tuple/list of 2 int
 Specifies the dimensions of the convolution window.
-strides : int or tuple/list of 3 int,
+strides : int or tuple/list of 2 int
 Specify the strides of the convolution.
-padding : int or a tuple/list of 3 int,
+padding : int or a tuple/list of 2 int,
 If padding is non-zero, then the input is implicitly zero-padded
 on both sides for padding number of points
-dilation : int or tuple/list of 3 int
-Specifies the dilation rate to use for dilated convolution.
+output_padding: int or a tuple/list of 2 int
+Controls the amount of implicit zero-paddings on both sides of the
+output for output_padding number of points for each dimension.
+dilation : int or tuple/list of 2 int
+Controls the spacing between the kernel points; also known as the
+a trous algorithm
 groups : int
 Controls the connections between inputs and outputs.
 At groups=1, all inputs are convolved to all outputs.
@@ -593,13 +601,17 @@ class Conv3DTranspose(_Conv):
 channels (filters) in the convolution.
 kernel_size :int or tuple/list of 3 int
 Specifies the dimensions of the convolution window.
-strides : int or tuple/list of 3 int,
+strides : int or tuple/list of 3 int
 Specify the strides of the convolution.
 padding : int or a tuple/list of 3 int,
 If padding is non-zero, then the input is implicitly zero-padded
 on both sides for padding number of points
+output_padding: int or a tuple/list of 3 int
+Controls the amount of implicit zero-paddings on both sides of the
+output for output_padding number of points for each dimension.
 dilation : int or tuple/list of 3 int
-Specifies the dilation rate to use for dilated convolution.
+Controls the spacing between the kernel points; also known as the
+a trous algorithm.
 groups : int
 Controls the connections between inputs and outputs.
 At groups=1, all inputs are convolved to all outputs.

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: [MXNET-374] handle row_sparse weight in parameter and trainer (#11001)

2018-05-29 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 482e50b  [MXNET-374] handle row_sparse weight in parameter and trainer 
(#11001)
482e50b is described below

commit 482e50bbbc429409a792ac4664127f34a226cea3
Author: Haibin Lin 
AuthorDate: Tue May 29 10:21:39 2018 -0700

[MXNET-374] handle row_sparse weight in parameter and trainer (#11001)

* + rsp parameter

* draft

* Fix optimizer pickle

* refactor and document

* add test for save load with cast_stype

* refactor trainer tests

* add test

* add back test

* raise error for load params

* add comment

* remove print

* fix doc

* CR comments

* CR comments

* change error

* remove cast stype

* fix test

* add reset kvstore to trainer

* lint

* add test to CI

* add more checks
---
 ci/docker/runtime_functions.sh  |   1 +
 python/mxnet/gluon/block.py |   9 ++
 python/mxnet/gluon/parameter.py | 123 ++--
 python/mxnet/gluon/trainer.py   | 118 +++
 python/mxnet/model.py   |  19 +++
 src/operator/tensor/indexing_op.h   |   6 +
 tests/nightly/dist_sync_kvstore.py  |  27 +++-
 tests/python/unittest/test_gluon.py | 220 
 tests/python/unittest/test_gluon_trainer.py | 200 +
 9 files changed, 585 insertions(+), 138 deletions(-)

diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index 7abe767..10bca17 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -544,6 +544,7 @@ integrationtest_ubuntu_gpu_dist_kvstore() {
 ../../tools/launch.py -n 7 --launcher local python dist_sync_kvstore.py
 ../../tools/launch.py -n 7 --launcher local python dist_sync_kvstore.py 
--no-multiprecision
 ../../tools/launch.py -n 7 --launcher local python 
dist_device_sync_kvstore.py
+../../tools/launch.py -n 7 --launcher local python dist_sync_kvstore.py 
--type=gluon
 }
 
 test_ubuntu_cpu_python2() {
diff --git a/python/mxnet/gluon/block.py b/python/mxnet/gluon/block.py
index dbe3c5e..4b37f43 100644
--- a/python/mxnet/gluon/block.py
+++ b/python/mxnet/gluon/block.py
@@ -606,6 +606,7 @@ class HybridBlock(Block):
 
 Refer `Hybrid tutorial <http://mxnet.io/tutorials/gluon/hybrid.html>`_ to 
see
 the end-to-end usage.
+
 """
 def __init__(self, prefix=None, params=None):
 super(HybridBlock, self).__init__(prefix=prefix, params=params)
@@ -879,6 +880,14 @@ class SymbolBlock(HybridBlock):
 "Input symbols must be variable, but %s is an output of 
operators"%str(i)
 input_names.add(i.name)
 
+# check if any symbol is row_sparse
+row_sparse_storage = 
ndarray.ndarray._STORAGE_TYPE_STR_TO_ID['row_sparse']
+for i in out:
+for j in i.get_internals():
+assert(j.attr("__storage_type__") != str(row_sparse_storage)), 
\
+"SymbolBlock doesn't support Parameter '%s' because its 
storage " \
+"type is 'row_sparse'." % j.name
+
 for i in out.list_arguments():
 if i not in input_names:
 self.params.get(i, allow_deferred_init=True)
diff --git a/python/mxnet/gluon/parameter.py b/python/mxnet/gluon/parameter.py
index c7cbccc..3265fef 100644
--- a/python/mxnet/gluon/parameter.py
+++ b/python/mxnet/gluon/parameter.py
@@ -81,6 +81,8 @@ class Parameter(object):
 Weight decay multiplier (L2 regularizer coefficient). Works similar to 
lr_mult.
 init : Initializer, default None
 Initializer of this parameter. Will use the global initializer by 
default.
+stype: {'default', 'row_sparse', 'csr'}, defaults to 'default'.
+The storage type of the parameter.
 grad_stype: {'default', 'row_sparse', 'csr'}, defaults to 'default'.
 The storage type of the parameter's gradient.
 
@@ -99,12 +101,13 @@ class Parameter(object):
 """
 def __init__(self, name, grad_req='write', shape=None, dtype=mx_real_t,
  lr_mult=1.0, wd_mult=1.0, init=None, 
allow_deferred_init=False,
- differentiable=True, grad_stype='default'):
+ differentiable=True, stype='default', grad_stype='default'):
 self._var = None
 self._data = None
 self._grad = None
 self._ctx_list = None

[incubator-mxnet] branch master updated: Fix bugs in MKLDNN. (#10979)

2018-05-25 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new d497b37  Fix bugs in MKLDNN. (#10979)
d497b37 is described below

commit d497b37876ffb5d9bc01812bf7f295039bfe35f6
Author: Da Zheng 
AuthorDate: Fri May 25 10:11:45 2018 -0700

Fix bugs in MKLDNN. (#10979)

* Fix bugs in MKLDNN.

* add more test cases.

* Fix CopyFrom when it's the view of an NDArray.

* add test.

* check same shape correctly.

* add unit test for CopyFrom.

* Fix warning.

* Add test sum.

* fix sum.

* Fix fallback.

* Fix fallback of sum.

* add tests.

* Update mkldnn.cc
---
 src/ndarray/ndarray.cc  | 111 +---
 src/operator/nn/mkldnn/mkldnn_base.cc   |   5 +-
 src/operator/nn/mkldnn/mkldnn_sum.cc|  22 +++-
 src/operator/tensor/elemwise_binary_op_basic.cc |  12 +-
 tests/cpp/operator/mkldnn.cc| 165 +---
 5 files changed, 235 insertions(+), 80 deletions(-)

diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index d87e8bc..94d3d90 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -200,6 +200,7 @@ NDArray NDArray::MKLDNNDataReshape(const TShape &shape) 
const {
 ret.ptr_->delay_alloc = false;
 ret.ptr_->static_data = true;
 ret.byte_offset_ = byte_offset_;
+ret.reuse_ = false;
 return ret;
   }
 }
@@ -217,6 +218,7 @@ NDArray NDArray::Reshape(const TShape &shape) const {
   // Otherwise, reshape only works on the default layout.
   CHECK_EQ(storage_type(), kDefaultStorage);
   ret.shape_ = shape;
+  ret.reuse_ = false;
   return ret;
 }
 
@@ -249,6 +251,7 @@ NDArray NDArray::Slice(index_t begin, index_t end) const {
   MSHADOW_TYPE_SWITCH(ret.dtype(), DType, {
 ret.byte_offset_ += begin * length * sizeof(DType);
   });
+  ret.reuse_ = false;
   ret.shape_[0] = end - begin;
   return ret;
 }
@@ -555,6 +558,7 @@ NDArray NDArray::Reorder2Default() const {
   // reshape as needed
   ret.shape_ = shape_;
   ret.byte_offset_ = byte_offset_;
+  ret.reuse_ = false;
   return ret;
 }
 
@@ -584,39 +588,39 @@ void NDArray::MKLDNNDataReorderAsync(const 
mkldnn::memory::primitive_desc &desc)
 
 const mkldnn::memory *NDArray::GetMKLDNNData() const {
   CHECK(storage_type() == kDefaultStorage);
+  bool is_view = IsView();
   if (IsMKLDNNData()) {
 // If this array uses MKLDNN layout, we have to make sure it's not a view.
 // Otherwise, we'll have to change the layout inside the array.
-CHECK(!IsView());
+CHECK(!is_view);
 MKLDNNStream::Get()->RegisterMem(ptr_->mkl_mem_->GetMem());
 // If this array uses MKLDNN format, we should return now. Otherwise,
 // SetMKLMem may mess up mkl_mem_.
 return ptr_->mkl_mem_->GetRaw();
-  }
-  ptr_->SetMKLMem(IsView() ? ptr_->storage_shape : shape_, dtype_);
-  MKLDNNStream::Get()->RegisterMem(ptr_->mkl_mem_->GetMem());
-  if (IsView()) {
-mkldnn::memory::primitive_desc pd = ptr_->mkl_mem_->GetPrimitiveDesc();
-// Sliced array must use the default layout.
-CHECK_EQ(GetDefaultFormat(pd.desc()), pd.desc().data.format);
-void *off_addr = static_cast(ptr_->mkl_mem_->GetDataHandle())
-+ byte_offset_;
-
+  } else if (is_view) {
+// If this is a view, we can't create a MKLDNN memory for the chunk
+// because we don't have the complete data type and shape information for
+// the chunk.
+void *off_addr = static_cast(ptr_->shandle.dptr) + byte_offset_;
 // Create the primitive desc for the new mkldnn memory.
 mkldnn::memory::dims dims(shape().ndim());
 for (size_t i = 0; i < dims.size(); i++)
   dims[i] = shape()[i];
 mkldnn::memory::format cpp_format = static_cast(
 GetDefaultFormat(shape().ndim()));
-mkldnn::memory::data_type cpp_type = 
static_cast(
-pd.desc().data.data_type);
+mkldnn::memory::data_type cpp_type = get_mkldnn_type(dtype_);
 mkldnn::memory::desc data_md(dims, cpp_type, cpp_format);
-mkldnn::memory::primitive_desc new_pd(data_md, pd.get_engine());
+mkldnn::memory::primitive_desc new_pd(data_md,
+  CpuEngine::Get()->get_engine());
 
 std::shared_ptr ret(new mkldnn::memory(new_pd, off_addr));
 MKLDNNStream::Get()->RegisterMem(ret);
 return ret.get();
   } else {
+// If this isn't a view, we can create a MKLDNN memory and store it in the
+// chunk.
+ptr_->SetMKLMem(shape_, dtype_);
+MKLDNNStream::Get()->RegisterMem(ptr_->mkl_mem_->GetMem());
 return ptr_->mkl_mem_->GetRaw();
   }
 }
@@ -637,20 +641,23 @@ void NDArray::CopyFrom(const mkldnn::memory

[incubator-mxnet] branch master updated: [MXNET-414] Tutorial on visualizing CNN decisions using Grad-CAM (#10900)

2018-05-24 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 3f346aa  [MXNET-414] Tutorial on visualizing CNN decisions using 
Grad-CAM (#10900)
3f346aa is described below

commit 3f346aa83cadd8bef60f5e627dfff16540daec0f
Author: Indu Bharathi 
AuthorDate: Thu May 24 11:00:48 2018 -0700

[MXNET-414] Tutorial on visualizing CNN decisions using Grad-CAM (#10900)

* Add gradcam

* Fix imports

* Add Apache license

* Add some documentation for ReluOp. Set guided_backprop to false by default

* Add documentation for Conv2D. Edit doc for ReluOp

* Document _get_grad. Add set_guided_backprop method

* Add doc for get_conv_out_grad

* Add doc for get_image_grad

* Add doc for get_cam and get_guided_grad_cam

* Document to_grayscale and get_img_heatmap

* Minor bug fix and method rename

* Minor

* Images for demo.

* 1. Add first version of gradcam cli demo. 2. Add VGG network for demo.

* Bug fix

* Add comments. Use the image path received in command line

* Create output images in the form %s_%s where former is the image name and 
later is an appropriate suffix like 'gradcam', 'saliency', etc

* Add license

* Print predicted category and output file names. Add more doc.

* Add markdown for CNN visualization tutorial

* Insert source download buttons

* Minor fix

* Download imaged from dmlc/web-data

* Fix the image at the top

* Create README.md

* Remove images from examples folder

* Add license header

* Minor change in how images are displayed

* Add the tutorial to tests

* Minor edit

* Remove nonascii chars

* Add instruction to do visualization from terminal

* Download synset file. We are not placing the file in repo.

* Add tutorial to tutorials index

* Language corrections

* Remove cv2 dependencies

* Simplify and document 'get_vgg'

* Simplify vgg16. Show name of last conv layer in markdown.

* Change colormap to cv2.COLORMAP_COOL
---
 docs/tutorials/index.md|   1 +
 docs/tutorials/vision/cnn_visualization.md | 245 +++
 example/cnn_visualization/README.md|  17 ++
 example/cnn_visualization/gradcam.py   | 263 +
 example/cnn_visualization/gradcam_demo.py  | 110 
 example/cnn_visualization/vgg.py   |  84 +
 tests/tutorials/test_tutorials.py  |   3 +
 7 files changed, 723 insertions(+)

diff --git a/docs/tutorials/index.md b/docs/tutorials/index.md
index f2d61ca..fce3315 100644
--- a/docs/tutorials/index.md
+++ b/docs/tutorials/index.md
@@ -41,6 +41,7 @@ Select API: 
 * [Checkpointing and Model Serialization (a.k.a. saving and 
loading)](http://gluon.mxnet.io/chapter03_deep-neural-networks/serialization.html)
 https://upload.wikimedia.org/wikipedia/commons/6/6a/External_link_font_awesome.svg";
 alt="External link" height="15px" style="margin: 0px 0px 3px 3px;"/>
 * [Inference using an ONNX 
model](/tutorials/onnx/inference_on_onnx_model.html)
 * [Fine-tuning an ONNX model on 
Gluon](/tutorials/onnx/fine_tuning_gluon.html)
+* [Visualizing Decisions of Convolutional Neural 
Networks](/tutorials/vision/cnn_visualization.html)
 * API Guides
 * Core APIs
 * NDArray
diff --git a/docs/tutorials/vision/cnn_visualization.md 
b/docs/tutorials/vision/cnn_visualization.md
new file mode 100644
index 000..ea027df
--- /dev/null
+++ b/docs/tutorials/vision/cnn_visualization.md
@@ -0,0 +1,245 @@
+# Visualizing Decisions of Convolutional Neural Networks
+
+Convolutional Neural Networks have made a lot of progress in Computer Vision. 
Their accuracy is as good as humans in some tasks. However it remains hard to 
explain the predictions of convolutional neural networks, as they lack the 
interpretability offered by other models, for example decision trees.
+
+It is often helpful to be able to explain why a model made the prediction it 
made. For example when a model misclassifies an image, it is hard to say why 
without visualizing the network's decision.
+
+https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/example/cnn_visualization/volcano_barn_spider.png";
 alt="Explaining the misclassification of volcano as spider" width=500px/>
+
+Visualizations also help build confidence about the predictions of a model. 
For example, even if a model correctly predicts birds as birds, we would want 
to confirm that the model bases its decision on the features of bird and not

[incubator-mxnet] branch master updated: Update dmlc-core (#11034)

2018-05-24 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 0d09449  Update dmlc-core (#11034)
0d09449 is described below

commit 0d0944904b57d38724ea22fcd2c004fd935f8856
Author: Pedro Larroy <928489+lar...@users.noreply.github.com>
AuthorDate: Thu May 24 19:59:32 2018 +0200

Update dmlc-core (#11034)
---
 3rdparty/dmlc-core | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/3rdparty/dmlc-core b/3rdparty/dmlc-core
index d26d9e7..dadcd97 16
--- a/3rdparty/dmlc-core
+++ b/3rdparty/dmlc-core
@@ -1 +1 @@
-Subproject commit d26d9e7982b233d4aa105ae084fbecc500d254ff
+Subproject commit dadcd97fdceb5f395e963b2a637f6ed377f59fc4

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: NEWS and README update to master (#11017)

2018-05-23 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 30ca4e3  NEWS and README update to master (#11017)
30ca4e3 is described below

commit 30ca4e319cc5c2635c76fd718a01f6c521bd6922
Author: Anirudh Subramanian 
AuthorDate: Wed May 23 13:52:09 2018 -0700

NEWS and README update to master (#11017)

* Add NEWS and README

* mark MKLDNN experimantal. (#10661)

* Mark ONNX-MXNet experimental (#10677)

* Mark ONNX-MXNet experimental

* change wording.

* space nit

* Add known issue to NEWS.md

* Add known issue to README (#10700)

* Add known issue to README

* Add scala example to known issue

* Fix NEWS and README

* Trigger CI

* Add experimental for quantization

* Trigger CI

* Trigger CI
---
 NEWS.md   | 130 ++
 README.md |   1 +
 2 files changed, 131 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index a51b514..461bb6d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,135 @@
 MXNet Change Log
 
+## 1.2.0
+### New Features - Added Scala Inference APIs
+- Implemented new [Scala Inference 
APIs](https://cwiki.apache.org/confluence/display/MXNET/MXNetScalaInferenceAPI) 
which offer an easy-to-use, Scala Idiomatic and thread-safe high level APIs for 
performing predictions with deep learning models trained with MXNet (#9678). 
Implemented a new ImageClassifier class which provides APIs for classification 
tasks on a Java BufferedImage using a pre-trained model you provide (#10054). 
Implemented a new ObjectDetector class which provides APIs for  [...]
+
+### New Features - Added a Module to Import ONNX models into MXNet
+- Implemented a new ONNX module in MXNet which offers an easy to use API to 
import ONNX models into MXNet's symbolic interface (#9963). Checkout the 
[example](https://github.com/apache/incubator-mxnet/blob/master/example/onnx/super_resolution.py)
 on how you could use this 
[API](https://cwiki.apache.org/confluence/display/MXNET/ONNX-MXNet+API+Design) 
to import ONNX models and perform inference on MXNet. Currently, the ONNX-MXNet 
Import module is still experimental. Please use it with caution.
+
+### New Features - Added Support for Model Quantization with Calibration
+- Implemented model quantization by adopting the [TensorFlow 
approach](https://www.tensorflow.org/performance/quantization) with calibration 
by borrowing the idea from Nvidia's 
[TensorRT](http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf).
 The focus of this work is on keeping quantized models (ConvNets for now) 
inference accuracy loss under control when compared to their corresponding FP32 
models. Please see the [example](https://github.com/ap [...]
+
+### New Features - MKL-DNN Integration
+- MXNet now integrates with Intel MKL-DNN to accelerate neural network 
operators: Convolution, Deconvolution, FullyConnected, Pooling, Batch 
Normalization, Activation, LRN, Softmax, as well as some common operators: sum 
and concat (#9677). This integration allows NDArray to contain data with 
MKL-DNN layouts and reduces data layout conversion to get the maximal 
performance from MKL-DNN. Currently, the MKL-DNN integration is still 
experimental. Please use it with caution.
+
+### New Features - Added Exception Handling Support for Operators
+- Implemented [Exception Handling Support for 
Operators](https://cwiki.apache.org/confluence/display/MXNET/Improved+exception+handling+in+MXNet)
 in MXNet. MXNet now transports backend C++ exceptions to the different 
language front-ends and prevents crashes when exceptions are thrown during 
operator execution (#9681).
+
+### New Features - Enhanced FP16 support
+- Added support for distributed mixed precision training with FP16. It 
supports storing of master copy of weights in float32 with the multi_precision 
mode of optimizers (#10183). Improved speed of float16 operations on x86 CPU by 
8 times through F16C instruction set. Added support for more operators to work 
with FP16 inputs (#10125, #10078, #10169). Added a tutorial on using mixed 
precision with FP16 (#10391).
+
+### New Features - Added Profiling Enhancements
+- Enhanced built-in profiler to support native Intel:registered: VTune:tm: 
Amplifier objects such as Task, Frame, Event, Counter and Marker from both C++ 
and Python -- which is also visible in the Chrome tracing view(#8972). Added 
Runtime tracking of symbolic and imperative operators as well as memory and API 
calls. Added Tracking and dumping of aggregate profiling data. Profiler also no 
longer affects runtime performance when not in use. 
+
+### Breaking Changes
+- Changed Namespace for MXNet scala from `ml.dmlc

[incubator-mxnet] branch master updated: add cpu_pinned in init (#11030)

2018-05-23 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new f7e5e91  add cpu_pinned in __init__ (#11030)
f7e5e91 is described below

commit f7e5e91d58aa878d74304afa72b115358286b6dc
Author: solin319 
AuthorDate: Thu May 24 02:38:14 2018 +0800

add cpu_pinned in __init__ (#11030)

This make mx.cpu_pinned can be used.
---
 python/mxnet/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py
index 58b8bd8..e960829 100644
--- a/python/mxnet/__init__.py
+++ b/python/mxnet/__init__.py
@@ -21,7 +21,7 @@
 """MXNet: a concise, fast and flexible framework for deep learning."""
 from __future__ import absolute_import
 
-from .context import Context, current_context, cpu, gpu
+from .context import Context, current_context, cpu, gpu, cpu_pinned
 from . import engine
 from .base import MXNetError
 from . import base

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: added more detailed error messages for potri/potrf (#11035)

2018-05-23 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new ab08ee8  added more detailed error messages for potri/potrf (#11035)
ab08ee8 is described below

commit ab08ee8293c5fde57c4c91e9a0af38c43751d132
Author: moin 
AuthorDate: Wed May 23 20:37:21 2018 +0200

added more detailed error messages for potri/potrf (#11035)
---
 src/operator/linalg_impl.h | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/operator/linalg_impl.h b/src/operator/linalg_impl.h
index d128617..151db60 100644
--- a/src/operator/linalg_impl.h
+++ b/src/operator/linalg_impl.h
@@ -494,6 +494,9 @@ LINALG_XPU_BATCH_TRMM(gpu, double)
 // for further information about the function and its parameters.
 // Note that this is A = potrf(A), so A is input and output parameter.
 
+static const char *potrf_errstr
+  = "This may happen when the input matrix is either not symmetric or not 
positive definite.";
+
 template
 inline void check_potrf(const Tensor& A, bool lower) {
   // Any checking that helps user debug potential problems.
@@ -507,7 +510,7 @@ void linalg_potrf(const Tensor& 
A, bool lower, Stream
   check_potrf(A, lower); \
   int ret(MXNET_LAPACK_##fname(MXNET_LAPACK_ROW_MAJOR, (lower ? 'L' : 'U'), 
A.size(0),  \
   A.dptr_ , A.stride_)); \
-  CHECK_EQ(ret, 0) << #fname << " failed in lapack on cpu."; \
+  CHECK_EQ(ret, 0) << #fname << " failed in lapack on cpu. " << potrf_errstr; \
 }
 LINALG_CPU_POTRF(spotrf, float)
 LINALG_CPU_POTRF(dpotrf, double)
@@ -589,6 +592,10 @@ LINALG_GPU_BATCH_POTRF(DnDpotrf, double)
 // for further information about the function and its parameters.
 // Note that this is A = potri(A), so A is input and output parameter.
 
+static const char *potri_errstr
+  = "This may happen when the input matrix is not a Cholesky factorization 
obtained"
+" by a prior call of the potrf-operator.";
+
 template
 inline void check_potri(const Tensor& A, bool lower) {
   // Any checking that helps user debug potential problems.
@@ -601,7 +608,7 @@ void linalg_potri(const Tensor& 
A, bool lower, Stream
   check_potri(A, lower); \
   int ret(MXNET_LAPACK_##fname(MXNET_LAPACK_ROW_MAJOR, (lower ? 'L' : 'U'), 
A.size(0),  \
   A.dptr_ , A.stride_)); \
-  CHECK_EQ(ret, 0) << #fname << " failed in lapack on cpu."; \
+  CHECK_EQ(ret, 0) << #fname << " failed in lapack on cpu. " << potri_errstr; \
 }
 LINALG_CPU_POTRI(spotri, float)
 LINALG_CPU_POTRI(dpotri, double)

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: [MXNET-411] Add ROI Align (#10852)

2018-05-23 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 98e671e  [MXNET-411] Add ROI Align (#10852)
98e671e is described below

commit 98e671e2ac27260b44ea1c70869d346cc66b0c00
Author: Hang Zhang <8041160+zhanghang1...@users.noreply.github.com>
AuthorDate: Wed May 23 11:30:36 2018 -0700

[MXNET-411] Add ROI Align (#10852)

* add roi align

* lint

* cpu gpu forward consistent

* roi align from caffe2

* rois and unit-test

* for cpplint

* use pointer instead of reference for lint

* fix

* add docs

* fix vector

* more unit test

* using mshadow

* omp

* omp on channels

* remove omp due to no cpu atomic add

* use lambda func for grads

* knullop return
---
 CONTRIBUTORS.md|   2 +-
 docs/api/python/ndarray/contrib.md |   1 +
 docs/api/python/symbol/contrib.md  |   1 +
 src/operator/contrib/roi_align-inl.h   |  63 
 src/operator/contrib/roi_align.cc  | 584 +
 src/operator/contrib/roi_align.cu  | 484 +++
 tests/python/unittest/test_operator.py | 152 +
 7 files changed, 1286 insertions(+), 1 deletion(-)

diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index 5f5302a..4bfafb6 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -170,4 +170,4 @@ List of Contributors
 * [Sina Afrooze](https://github.com/safrooze)
 * [Sergey Sokolov](https://github.com/Ishitori)
 * [Thomas Delteil](https://github.com/ThomasDelteil)
-
+* [Hang Zhang](http://hangzh.com)
diff --git a/docs/api/python/ndarray/contrib.md 
b/docs/api/python/ndarray/contrib.md
index 25cabed..b017c60 100644
--- a/docs/api/python/ndarray/contrib.md
+++ b/docs/api/python/ndarray/contrib.md
@@ -45,6 +45,7 @@ In the rest of this document, we list routines provided by 
the `ndarray.contrib`
 MultiProposal
 PSROIPooling
 Proposal
+ROIAlign
 count_sketch
 ctc_loss
 dequantize
diff --git a/docs/api/python/symbol/contrib.md 
b/docs/api/python/symbol/contrib.md
index 1af18bb..f2bb3f1 100644
--- a/docs/api/python/symbol/contrib.md
+++ b/docs/api/python/symbol/contrib.md
@@ -45,6 +45,7 @@ In the rest of this document, we list routines provided by 
the `symbol.contrib`
 MultiProposal
 PSROIPooling
 Proposal
+ROIAlign
 count_sketch
 ctc_loss
 dequantize
diff --git a/src/operator/contrib/roi_align-inl.h 
b/src/operator/contrib/roi_align-inl.h
new file mode 100644
index 000..5ac420c
--- /dev/null
+++ b/src/operator/contrib/roi_align-inl.h
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file roi_align-inl.h
+ * \brief roi align operator and symbol
+ * \author Hang Zhang
+ * modified from Caffe2
+*/
+#ifndef MXNET_OPERATOR_CONTRIB_ROI_ALIGN_INL_H_
+#define MXNET_OPERATOR_CONTRIB_ROI_ALIGN_INL_H_
+
+#include 
+#include 
+#include "../mshadow_op.h"
+#include "../tensor/init_op.h"
+
+
+namespace mxnet {
+namespace op {
+
+
+// Declare enumeration of input order to make code more intuitive.
+// These enums are only visible within this header
+namespace roialign {
+enum ROIAlignOpInputs {kData, kBox};
+enum ROIAlignOpOutputs {kOut};
+}  // roialign
+
+
+struct ROIAlignParam : public dmlc::Parameter {
+  TShape pooled_size;
+  float spatial_scale;
+  DMLC_DECLARE_PARAMETER(ROIAlignParam) {
+DMLC_DECLARE_FIELD(pooled_size)
+.set_expect_ndim(2).enforce_nonzero()
+.describe("ROI Align output roi feature map height and width: (h, w)");
+DMLC_DECLARE_FIELD(spatial_scale).set_range(0.0, 1.0)
+.describe("Ratio of input feature map height (or w) to raw image height 
(or w). "
+"Equals the reciprocal of total stride in convolutional layers");
+  }
+};
+
+}  // namespace op
+}  // namespace mxnet
+
+#endif  // MXNET_OPERATOR_CONTRIB_ROI_ALIGN_INL_H_
diff --

[incubator-mxnet] branch master updated: add gluon model summary (#10989)

2018-05-22 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 022f238  add gluon model summary (#10989)
022f238 is described below

commit 022f23885bcd90b69448f25edf507bd89cd46caf
Author: Sheng Zha 
AuthorDate: Tue May 22 10:57:41 2018 -0700

add gluon model summary (#10989)

* add hook api

* add block.summary

* remove count
---
 python/mxnet/gluon/block.py | 172 +++-
 python/mxnet/gluon/rnn/rnn_layer.py |   4 +
 python/mxnet/gluon/utils.py |  37 
 tests/python/unittest/test_gluon.py |  77 
 4 files changed, 287 insertions(+), 3 deletions(-)

diff --git a/python/mxnet/gluon/block.py b/python/mxnet/gluon/block.py
index 4779484..dbe3c5e 100644
--- a/python/mxnet/gluon/block.py
+++ b/python/mxnet/gluon/block.py
@@ -31,7 +31,7 @@ from ..symbol import Symbol
 from ..ndarray import NDArray
 from .. import name as _name
 from .parameter import Parameter, ParameterDict, DeferredInitializationError
-from .utils import _indent, _brief_print_list
+from .utils import _indent, _brief_print_list, HookHandle
 
 
 class _BlockScope(object):
@@ -173,6 +173,8 @@ class Block(object):
 self._scope = _BlockScope(self)
 self._children = OrderedDict()
 self._reg_params = {}
+self._forward_hooks = OrderedDict()
+self._forward_pre_hooks = OrderedDict()
 
 def __repr__(self):
 s = '{name}(\n{modstr}\n)'
@@ -355,7 +357,6 @@ class Block(object):
 name, filename, 
_brief_print_list(self._params.keys(
 params[name]._load_init(loaded[name], ctx)
 
-
 def register_child(self, block, name=None):
 """Registers block as a child of self. :py:class:`Block` s assigned to 
self as
 attributes will be registered automatically."""
@@ -363,6 +364,61 @@ class Block(object):
 name = str(len(self._children))
 self._children[name] = block
 
+def register_forward_pre_hook(self, hook):
+r"""Registers a forward pre-hook on the block.
+
+The hook function is called immediately before :func:`forward`.
+It should not modify the input or output.
+
+Parameters
+--
+hook : callable
+The forward hook function of form `hook(block, input) -> None`.
+
+Returns
+---
+:class:`mxnet.gluon.utils.HookHandle`
+"""
+handle = HookHandle()
+handle.attach(self._forward_pre_hooks, hook)
+return handle
+
+def register_forward_hook(self, hook):
+r"""Registers a forward hook on the block.
+
+The hook function is called immediately after :func:`forward`.
+It should not modify the input or output.
+
+Parameters
+--
+hook : callable
+The forward hook function of form `hook(block, input, output) -> 
None`.
+
+Returns
+---
+:class:`mxnet.gluon.utils.HookHandle`
+"""
+handle = HookHandle()
+handle.attach(self._forward_hooks, hook)
+return handle
+
+def apply(self, fn):
+r"""Applies ``fn`` recursively to every child block as well as self.
+
+Parameters
+--
+fn : callable
+Function to be applied to each submodule, of form `fn(block)`.
+
+Returns
+---
+this block
+"""
+for cld in self._children.values():
+cld.apply(fn)
+fn(self)
+return self
+
 def initialize(self, init=initializer.Uniform(), ctx=None, verbose=False,
force_reinit=False):
 """Initializes :py:class:`Parameter` s of this :py:class:`Block` and 
its children.
@@ -411,7 +467,15 @@ class Block(object):
 
 def __call__(self, *args):
 """Calls forward. Only accepts positional arguments."""
-return self.forward(*args)
+for hook in self._forward_pre_hooks.values():
+hook(self, args)
+
+out = self.forward(*args)
+
+for hook in self._forward_hooks.values():
+hook(self, args, out)
+
+return out
 
 def forward(self, *args):
 """Overrides to implement forward computation using 
:py:class:`NDArray`. Only
@@ -425,6 +489,105 @@ class Block(object):
 # pylint: disable= invalid-name
 raise NotImplementedError
 
+def summary(self, *inputs):
+"""Print the summary of the model's output and parameters.
+
+The network must have been initialized, and must not have been 
hy

[incubator-mxnet] branch master updated: unlink memory shared file immediately on linux (#10992)

2018-05-18 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new bea5fd1  unlink memory shared file immediately on linux (#10992)
bea5fd1 is described below

commit bea5fd13c5445647a1aee3c5be4406d8fb9c
Author: Eric Junyuan Xie 
AuthorDate: Fri May 18 14:07:32 2018 -0700

unlink memory shared file immediately on linux (#10992)
---
 python/mxnet/gluon/data/dataloader.py| 48 +++-
 src/storage/cpu_shared_storage_manager.h | 34 ++
 2 files changed, 63 insertions(+), 19 deletions(-)

diff --git a/python/mxnet/gluon/data/dataloader.py 
b/python/mxnet/gluon/data/dataloader.py
index d80a6bf..151b49d 100644
--- a/python/mxnet/gluon/data/dataloader.py
+++ b/python/mxnet/gluon/data/dataloader.py
@@ -20,27 +20,49 @@
 """Dataset generator."""
 __all__ = ['DataLoader']
 
-import multiprocessing
-import multiprocessing.queues
-from multiprocessing.reduction import ForkingPickler
 import pickle
 import io
 import sys
+import multiprocessing
+import multiprocessing.queues
+from multiprocessing.reduction import ForkingPickler
 import numpy as np
 
+try:
+import multiprocessing.resource_sharer
+except ImportError:
+pass
+
 from . import sampler as _sampler
 from ... import nd, context
 
-
-def rebuild_ndarray(*args):
-"""Rebuild ndarray from pickled shared memory"""
-# pylint: disable=no-value-for-parameter
-return nd.NDArray(nd.ndarray._new_from_shared_mem(*args))
-
-
-def reduce_ndarray(data):
-"""Reduce ndarray to shared memory handle"""
-return rebuild_ndarray, data._to_shared_mem()
+if sys.platform == 'darwin' or sys.platform == 'win32':
+def rebuild_ndarray(*args):
+"""Rebuild ndarray from pickled shared memory"""
+# pylint: disable=no-value-for-parameter
+return nd.NDArray(nd.ndarray._new_from_shared_mem(*args))
+
+def reduce_ndarray(data):
+"""Reduce ndarray to shared memory handle"""
+return rebuild_ndarray, data._to_shared_mem()
+else:
+def rebuild_ndarray(pid, fd, shape, dtype):
+"""Rebuild ndarray from pickled shared memory"""
+# pylint: disable=no-value-for-parameter
+if sys.version_info[0] == 2:
+fd = multiprocessing.reduction.rebuild_handle(fd)
+else:
+fd = fd.detach()
+return nd.NDArray(nd.ndarray._new_from_shared_mem(pid, fd, shape, 
dtype))
+
+def reduce_ndarray(data):
+"""Reduce ndarray to shared memory handle"""
+pid, fd, shape, dtype = data._to_shared_mem()
+if sys.version_info[0] == 2:
+fd = multiprocessing.reduction.reduce_handle(fd)
+else:
+fd = multiprocessing.reduction.DupFd(fd)
+return rebuild_ndarray, (pid, fd, shape, dtype)
 
 ForkingPickler.register(nd.NDArray, reduce_ndarray)
 
diff --git a/src/storage/cpu_shared_storage_manager.h 
b/src/storage/cpu_shared_storage_manager.h
index e2de30d..85c6a35 100644
--- a/src/storage/cpu_shared_storage_manager.h
+++ b/src/storage/cpu_shared_storage_manager.h
@@ -118,10 +118,11 @@ void CPUSharedStorageManager::Alloc(Storage::Handle* 
handle) {
   std::lock_guard lock(mutex_);
   std::uniform_int_distribution<> dis(0, std::numeric_limits::max());
   int fid = -1;
+  std::string filename;
   bool is_new = false;
   size_t size = handle->size + alignment_;
   void *ptr = nullptr;
-  #ifdef _WIN32
+#ifdef _WIN32
   CheckAndRealFree();
   HANDLE map_handle = nullptr;
   uint32_t error = 0;
@@ -130,7 +131,7 @@ void CPUSharedStorageManager::Alloc(Storage::Handle* 
handle) {
 handle->shared_pid = _getpid();
 for (int i = 0; i < 10; ++i) {
   handle->shared_id = dis(rand_gen_);
-  auto filename = SharedHandleToString(handle->shared_pid, 
handle->shared_id);
+  filename = SharedHandleToString(handle->shared_pid, handle->shared_id);
   map_handle = CreateFileMapping(INVALID_HANDLE_VALUE,
  NULL, PAGE_READWRITE, 0, size, 
filename.c_str());
   if ((error = GetLastError()) == ERROR_SUCCESS) {
@@ -138,7 +139,7 @@ void CPUSharedStorageManager::Alloc(Storage::Handle* 
handle) {
   }
 }
   } else {
-auto filename = SharedHandleToString(handle->shared_pid, 
handle->shared_id);
+filename = SharedHandleToString(handle->shared_pid, handle->shared_id);
 map_handle = OpenFileMapping(FILE_MAP_READ | FILE_MAP_WRITE,
  FALSE, filename.c_str());
 error = GetLastError();
@@ -159,13 +160,17 @@ void CPUSharedStorageManager::Alloc(Sto

[incubator-mxnet] branch master updated: fix rnn layer kernel forward (#10982)

2018-05-18 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 5cddc2d  fix rnn layer kernel forward (#10982)
5cddc2d is described below

commit 5cddc2dad8b27985546cbba51ad98fff3d22a879
Author: Sheng Zha 
AuthorDate: Fri May 18 10:04:16 2018 -0700

fix rnn layer kernel forward (#10982)
---
 python/mxnet/gluon/rnn/rnn_layer.py | 4 ++--
 tests/python/unittest/test_gluon_rnn.py | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/mxnet/gluon/rnn/rnn_layer.py 
b/python/mxnet/gluon/rnn/rnn_layer.py
index 89224cf..2beae96 100644
--- a/python/mxnet/gluon/rnn/rnn_layer.py
+++ b/python/mxnet/gluon/rnn/rnn_layer.py
@@ -23,7 +23,7 @@
 from __future__ import print_function
 __all__ = ['RNN', 'LSTM', 'GRU']
 
-from ... import ndarray, autograd
+from ... import ndarray
 from .. import Block
 from . import rnn_cell
 
@@ -186,7 +186,7 @@ class _RNNLayer(Block):
 self.i2h_weight[i].shape = (self._gates*self._hidden_size, 
inputs.shape[2])
 self.i2h_weight[i]._finish_deferred_init()
 if inputs.context.device_type == 'gpu' or \
-   self._mode == 'lstm' and not (self._dropout and 
autograd.is_training()):
+   self._mode == 'lstm' and not self._dropout:
 out = self._forward_kernel(inputs, states)
 else:
 out = self._forward(inputs, states)
diff --git a/tests/python/unittest/test_gluon_rnn.py 
b/tests/python/unittest/test_gluon_rnn.py
index 24d5a93..9dbcb3b 100644
--- a/tests/python/unittest/test_gluon_rnn.py
+++ b/tests/python/unittest/test_gluon_rnn.py
@@ -268,6 +268,8 @@ def check_rnn_layer_forward(layer, inputs, states=None, 
run_only=False):
 assert isinstance(out, mx.nd.NDArray)
 out.backward()
 
+layer(inputs, states) # test is_training = false
+
 if not run_only:
 mx.test_utils.assert_almost_equal(np_out, out.asnumpy(), rtol=1e-3, 
atol=1e-5)
 mx.test_utils.assert_almost_equal(np_dx, inputs.grad.asnumpy(), 
rtol=1e-3, atol=1e-5)

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: Add Apachev2 License for contrib (#10938)

2018-05-17 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 034f24f  Add Apachev2 License for contrib (#10938)
034f24f is described below

commit 034f24f7b3a03deba063ff09ac8a5a2000a8e410
Author: Anirudh Subramanian 
AuthorDate: Thu May 17 10:52:16 2018 -0700

Add Apachev2 License for contrib (#10938)

* Add Apachev2 License for contrib

* Add copyright back
---
 src/operator/contrib/ctc_include/detail/cpu_ctc.h| 19 +++
 src/operator/contrib/ctc_include/detail/ctc_helper.h | 19 +++
 src/operator/contrib/ctc_include/detail/gpu_ctc.h| 19 +++
 .../contrib/ctc_include/detail/gpu_ctc_kernels.h | 19 +++
 src/operator/contrib/ctc_include/detail/hostdevice.h | 20 
 src/operator/contrib/psroi_pooling-inl.h | 20 +++-
 tests/nightly/apache_rat_license_check/.rat-excludes |  1 -
 7 files changed, 115 insertions(+), 2 deletions(-)

diff --git a/src/operator/contrib/ctc_include/detail/cpu_ctc.h 
b/src/operator/contrib/ctc_include/detail/cpu_ctc.h
index ba8bbc5..005b956 100644
--- a/src/operator/contrib/ctc_include/detail/cpu_ctc.h
+++ b/src/operator/contrib/ctc_include/detail/cpu_ctc.h
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 #pragma once
 
 #include 
diff --git a/src/operator/contrib/ctc_include/detail/ctc_helper.h 
b/src/operator/contrib/ctc_include/detail/ctc_helper.h
index 35b7a96..250188c 100644
--- a/src/operator/contrib/ctc_include/detail/ctc_helper.h
+++ b/src/operator/contrib/ctc_include/detail/ctc_helper.h
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 #pragma once
 
 #include 
diff --git a/src/operator/contrib/ctc_include/detail/gpu_ctc.h 
b/src/operator/contrib/ctc_include/detail/gpu_ctc.h
index c249046..8015b39 100644
--- a/src/operator/contrib/ctc_include/detail/gpu_ctc.h
+++ b/src/operator/contrib/ctc_include/detail/gpu_ctc.h
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 #pragma once
 
 
diff --git a/src/operator/contrib/ctc_include/detail/gpu_ctc_kernels.h 
b/src/operator/contrib/ctc_include/detail/gpu_ctc_kernels.h
index 7f53232..c9bc202 100644
--- a/src/operator/contrib/ctc_include/detail/gpu_ctc_kernels.h
+++ b/src/operator/contrib/ctc_include/detail/gpu_ctc_kernels.h
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one

[incubator-mxnet] branch master updated: Minor doc fixes (#10963)

2018-05-17 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new ed846c9  Minor doc fixes (#10963)
ed846c9 is described below

commit ed846c963e17b3dd9b6ac58011235e0f89772232
Author: Haibin Lin 
AuthorDate: Thu May 17 10:48:48 2018 -0700

Minor doc fixes (#10963)

* Update dot.cc

* Update cast_storage.cc

* Update dot.cc

* Update init_op.cc

* Update dot.cc
---
 src/operator/tensor/cast_storage.cc | 2 +-
 src/operator/tensor/dot.cc  | 3 ++-
 src/operator/tensor/init_op.cc  | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/operator/tensor/cast_storage.cc 
b/src/operator/tensor/cast_storage.cc
index f77a50a..afea9b8 100644
--- a/src/operator/tensor/cast_storage.cc
+++ b/src/operator/tensor/cast_storage.cc
@@ -31,7 +31,7 @@ namespace op {
 
 DMLC_REGISTER_PARAMETER(CastStorageParam);
 NNVM_REGISTER_OP(cast_storage)
-.add_alias("_sparse_cast_storage")
+MXNET_ADD_SPARSE_OP_ALIAS(cast_storage)
 .describe(R"code(Casts tensor storage type to the new type.
 
 When an NDArray with default storage type is cast to csr or row_sparse storage,
diff --git a/src/operator/tensor/dot.cc b/src/operator/tensor/dot.cc
index 11a8b27..2f44f53 100644
--- a/src/operator/tensor/dot.cc
+++ b/src/operator/tensor/dot.cc
@@ -29,7 +29,7 @@ namespace op {
 DMLC_REGISTER_PARAMETER(DotParam);
 
 NNVM_REGISTER_OP(dot)
-.add_alias("_sparse_dot")  // alias for op registration under 
mxnet.ndarray.sparse
+MXNET_ADD_SPARSE_OP_ALIAS(dot)
 .describe(R"doc(Dot product of two arrays.
 
 ``dot``'s behavior depends on the input array dimensions:
@@ -57,6 +57,7 @@ forward_stype option for output storage type. Implemented 
sparse operations incl
 - dot(default, default, transpose_a=True/False, transpose_b=True/False) = 
default
 - dot(csr, default, transpose_a=True) = default
 - dot(csr, default, transpose_a=True) = row_sparse
+- dot(csr, default) = default
 - dot(csr, row_sparse) = default
 - dot(default, csr) = csr (CPU only)
 - dot(default, csr, forward_stype='default') = default (GPU only)
diff --git a/src/operator/tensor/init_op.cc b/src/operator/tensor/init_op.cc
index 52cb9f2..bb23f5d 100644
--- a/src/operator/tensor/init_op.cc
+++ b/src/operator/tensor/init_op.cc
@@ -87,7 +87,7 @@ NNVM_REGISTER_OP(_arange)
 .add_arguments(RangeParam::__FIELDS__());
 
 NNVM_REGISTER_OP(zeros_like)
-.add_alias("_sparse_zeros_like")
+MXNET_ADD_SPARSE_OP_ALIAS(zeros_like)
 .describe(R"code(Return an array of zeros with the same shape, type and 
storage type
 as the input array.
 

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: Fix test_sparse_mathematical_core sensitivity to scipy v1.1 (#10961)

2018-05-16 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new b845147  Fix test_sparse_mathematical_core sensitivity to scipy v1.1 
(#10961)
b845147 is described below

commit b84514758d9aefb60dbb90a941c3948d2854c2d3
Author: Dick Carter 
AuthorDate: Wed May 16 10:06:42 2018 -0700

Fix test_sparse_mathematical_core sensitivity to scipy v1.1 (#10961)
---
 tests/python/unittest/test_sparse_operator.py | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/tests/python/unittest/test_sparse_operator.py 
b/tests/python/unittest/test_sparse_operator.py
index 1b3c128..226db70 100644
--- a/tests/python/unittest/test_sparse_operator.py
+++ b/tests/python/unittest/test_sparse_operator.py
@@ -1044,12 +1044,16 @@ def test_sparse_mathematical_core():
 
 try:
 from scipy import special as scipy_special
-import_succeeded = True
+# On scipy v1.0, psi([0, -1, -2, -3, ...]) = [ inf, inf, inf, 
inf, ...]
+# On scipy v1.1, psi([0, -1, -2, -3, ...]) = [-inf, nan, nan, 
nan, ...]
+# Map the behavior of v1.1 psi() to that of v1.0 for ints <= 0 
for consistency
+scipy_psi = np.vectorize(lambda x: np.inf if 
float(x).is_integer() and x <= 0 else
+ scipy_special.psi(x))
 # gamma
 check_sparse_mathematical_core("gamma", stype,
lambda x: 
mx.sym.sparse.gamma(x),
lambda x: 
scipy_special.gamma(x),
-   lambda x: 
scipy_special.gamma(x) * scipy_special.psi(x),
+   lambda x: 
scipy_special.gamma(x) * scipy_psi(x),

output_grad_stype=output_grad_stype,

input_grad_stype=input_grad_stype,
force_overlap=force_overlap,
@@ -1058,17 +1062,14 @@ def test_sparse_mathematical_core():
 check_sparse_mathematical_core("gammaln", stype,
lambda x: 
mx.sym.sparse.gammaln(x),
lambda x: 
scipy_special.gammaln(x),
-   lambda x: scipy_special.psi(x),
+   lambda x: scipy_psi(x),

output_grad_stype=output_grad_stype,

input_grad_stype=input_grad_stype,
force_overlap=force_overlap,
density=density, 
ograd_density=ograd_density)
 
-except:
-if import_succeeded == False:
-print("Could not import scipy. Skipping unit tests for 
special functions")
-else:
-raise
+except ImportError:
+print("Could not import scipy. Skipping unit tests for special 
functions")
 
 for i in range(1):
 print("pass", i)

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: [Sparse-Gluon] embedding with sparse grad (#10924)

2018-05-15 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new b2ccd34  [Sparse-Gluon] embedding with sparse grad (#10924)
b2ccd34 is described below

commit b2ccd34ad2801b6c87c957c28ad718562a4c5b6e
Author: Haibin Lin 
AuthorDate: Tue May 15 17:09:59 2018 -0700

[Sparse-Gluon] embedding with sparse grad (#10924)

* draft

* updat test

*  fix kvstore

* fix lint

* fix test

* add proper error msg

* CR comment
---
 python/mxnet/gluon/nn/basic_layers.py | 10 ---
 python/mxnet/gluon/parameter.py   | 15 ---
 python/mxnet/gluon/trainer.py | 20 +++---
 src/kvstore/kvstore_local.h   |  2 +-
 src/operator/tensor/init_op.h |  4 +--
 tests/python/unittest/test_gluon.py   | 50 ---
 6 files changed, 79 insertions(+), 22 deletions(-)

diff --git a/python/mxnet/gluon/nn/basic_layers.py 
b/python/mxnet/gluon/nn/basic_layers.py
index d86c3e6..abde51b 100644
--- a/python/mxnet/gluon/nn/basic_layers.py
+++ b/python/mxnet/gluon/nn/basic_layers.py
@@ -381,7 +381,8 @@ class Embedding(HybridBlock):
 Data type of output embeddings.
 weight_initializer : Initializer
 Initializer for the `embeddings` matrix.
-
+sparse_grad: bool
+If True, gradient w.r.t. weight will be a 'row_sparse' NDArray.
 
 Inputs:
 - **data**: (N-1)-D tensor with shape: `(x1, x2, ..., xN-1)`.
@@ -390,13 +391,14 @@ class Embedding(HybridBlock):
 - **out**: N-D tensor with shape: `(x1, x2, ..., xN-1, output_dim)`.
 """
 def __init__(self, input_dim, output_dim, dtype='float32',
- weight_initializer=None, **kwargs):
+ weight_initializer=None, sparse_grad=False, **kwargs):
 super(Embedding, self).__init__(**kwargs)
+grad_stype = 'row_sparse' if sparse_grad else 'default'
 self._kwargs = {'input_dim': input_dim, 'output_dim': output_dim,
-'dtype': dtype}
+'dtype': dtype, 'sparse_grad': sparse_grad}
 self.weight = self.params.get('weight', shape=(input_dim, output_dim),
   init=weight_initializer, dtype=dtype,
-  allow_deferred_init=True)
+  allow_deferred_init=True, 
grad_stype=grad_stype)
 
 def hybrid_forward(self, F, x, weight):
 return F.Embedding(x, weight, name='fwd', **self._kwargs)
diff --git a/python/mxnet/gluon/parameter.py b/python/mxnet/gluon/parameter.py
index 320b376..c7cbccc 100644
--- a/python/mxnet/gluon/parameter.py
+++ b/python/mxnet/gluon/parameter.py
@@ -81,6 +81,8 @@ class Parameter(object):
 Weight decay multiplier (L2 regularizer coefficient). Works similar to 
lr_mult.
 init : Initializer, default None
 Initializer of this parameter. Will use the global initializer by 
default.
+grad_stype: {'default', 'row_sparse', 'csr'}, defaults to 'default'.
+The storage type of the parameter's gradient.
 
 Attributes
 --
@@ -97,7 +99,7 @@ class Parameter(object):
 """
 def __init__(self, name, grad_req='write', shape=None, dtype=mx_real_t,
  lr_mult=1.0, wd_mult=1.0, init=None, 
allow_deferred_init=False,
- differentiable=True):
+ differentiable=True, grad_stype='default'):
 self._var = None
 self._data = None
 self._grad = None
@@ -114,6 +116,11 @@ class Parameter(object):
 self.wd_mult = wd_mult
 self.grad_req = grad_req
 self.init = init
+assert grad_stype in ['default', 'row_sparse', 'csr'], \
+"grad_stype for Parameter '%s' must be one of 'default', 
'row_sparse', or 'csr'," \
+" but got '%s'" % (name, grad_stype)
+self._grad_stype = grad_stype
+
 
 def __repr__(self):
 s = 'Parameter {name} (shape={shape}, dtype={dtype})'
@@ -261,7 +268,9 @@ class Parameter(object):
 self._grad = None
 return
 
-self._grad = [ndarray.zeros_like(i) for i in self._data]
+self._grad = [ndarray.zeros(shape=i.shape, dtype=i.dtype, 
ctx=i.context,
+stype=self._grad_stype) for i in 
self._data]
+
 autograd.mark_variables(self.list_data(), self.list_grad(), 
self.grad_req)
 
 def _reduce(self):
@@ -431,7 +440,7 @@ class Parameter(object):
 if

[incubator-mxnet] branch master updated: handle fallback correctly for write inplace when the array is MKLDNN. (#10651)

2018-05-15 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new bae1332  handle fallback correctly for write inplace when the array is 
MKLDNN. (#10651)
bae1332 is described below

commit bae13329d0c1d510223b3f453e44ec2394323540
Author: Da Zheng 
AuthorDate: Tue May 15 15:44:23 2018 -0700

handle fallback correctly for write inplace when the array is MKLDNN. 
(#10651)

* handle writeinplace correctly for mkldnn arrays.

* Add unit tests.

* Fix a bug in mkldnn copy.

* Fix a bug in ndarray copy.

* Verify results.
---
 src/common/exec_utils.h   |  12 ++-
 src/executor/attach_op_execs_pass.cc  |   7 +-
 src/imperative/imperative_utils.h |  10 +-
 src/ndarray/ndarray.cc|   5 +-
 src/operator/nn/mkldnn/mkldnn_copy.cc |   8 +-
 tests/cpp/operator/mkldnn.cc  | 192 +-
 6 files changed, 217 insertions(+), 17 deletions(-)

diff --git a/src/common/exec_utils.h b/src/common/exec_utils.h
index 4881e0f..731d03d 100644
--- a/src/common/exec_utils.h
+++ b/src/common/exec_utils.h
@@ -78,8 +78,8 @@ inline bool SetupDefaultBlobsIn(const std::vector& 
src,
 }
 
 inline bool SetupDefaultBlobsOut(const std::vector& src,
- const std::vector &req,
  const std::vector *bufs,
+ std::vector *req,
  std::vector *blobs,
  std::vector *temp_src,
  std::vector *temp_dst) {
@@ -88,6 +88,12 @@ inline bool SetupDefaultBlobsOut(const std::vector& 
src,
 auto& nd = src[i];
 bool is_default = nd.storage_type() == kDefaultStorage;
 #if MXNET_USE_MKLDNN == 1
+if (req->at(i) == kWriteInplace && nd.IsMKLDNNData())
+  // If it's write inplace and the output array doesn't use the default
+  // layout, we'll generate a temporary output array below, which means
+  // the input array and the output array are no longer the same array.
+  // we should change the request type.
+  req->at(i) = kWriteTo;
 // We have to make sure it's default storage and default layout.
 is_default = nd.IsDefaultData();
 #endif
@@ -117,9 +123,9 @@ inline bool SetupDefaultBlobsOut(const 
std::vector& src,
  */
 inline void SetupDefaultBlobsInOut(const std::vector &ndinputs,
const std::vector &ndoutputs,
-   const std::vector &req,
const std::vector *in_bufs,
const std::vector *out_bufs,
+   std::vector *req,
std::vector *input_blobs,
std::vector *output_blobs,
std::vector *pre_temp_src,
@@ -132,7 +138,7 @@ inline void SetupDefaultBlobsInOut(const 
std::vector &ndinputs,
   SetupDefaultBlobsIn(ndinputs, in_bufs, input_blobs, pre_temp_src, 
pre_temp_dst,
   in_temp_idx_map);
   // populate output blobs
-  SetupDefaultBlobsOut(ndoutputs, req, out_bufs, output_blobs, post_temp_dst,
+  SetupDefaultBlobsOut(ndoutputs, out_bufs, req, output_blobs, post_temp_dst,
post_temp_src);
   // add mutable inputs to post temp list
   for (const auto idx : mutate_idx) {
diff --git a/src/executor/attach_op_execs_pass.cc 
b/src/executor/attach_op_execs_pass.cc
index f7ac772..697e486 100644
--- a/src/executor/attach_op_execs_pass.cc
+++ b/src/executor/attach_op_execs_pass.cc
@@ -78,7 +78,8 @@ class StorageFallbackOpExecutor : public OpExecutor {
 pre_temp_src_.clear(); pre_temp_dst_.clear();
 post_temp_src_.clear(); post_temp_dst_.clear();
 in_temp_idx_map_.clear();
-SetupDefaultBlobsInOut(in_array, out_array, req, &pre_temp_buf_, 
&post_temp_buf_,
+tmp_req = req;
+SetupDefaultBlobsInOut(in_array, out_array, &pre_temp_buf_, 
&post_temp_buf_, &req,
&in_data_, &out_data_,
&pre_temp_src_, &pre_temp_dst_,
&post_temp_src_, &post_temp_dst_,
@@ -89,8 +90,12 @@ class StorageFallbackOpExecutor : public OpExecutor {
   // storage fallback after fcompute is completed
   void PostFCompute(bool is_gpu) {
 common::CastNonDefaultStorage(post_temp_src_, post_temp_dst_, op_ctx, 
is_gpu);
+req = tmp_req;
   }
 
+  // output requirement on each output array.
+  // This temporarily saves the original output requirements.
+  std::vector tmp_req;
   // default storage tensor blobs for fcompute
   std::vector in_data_, out_dat

[incubator-mxnet] branch master updated: fix rnn (#10954)

2018-05-15 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 65061dc  fix rnn (#10954)
65061dc is described below

commit 65061dc93710afce92edfe548aa3352473da3cdb
Author: Sheng Zha 
AuthorDate: Tue May 15 14:30:08 2018 -0700

fix rnn (#10954)
---
 python/mxnet/gluon/rnn/rnn_layer.py |  5 +++--
 tests/python/unittest/test_gluon_rnn.py | 29 ++---
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/python/mxnet/gluon/rnn/rnn_layer.py 
b/python/mxnet/gluon/rnn/rnn_layer.py
index 34ad05d..89224cf 100644
--- a/python/mxnet/gluon/rnn/rnn_layer.py
+++ b/python/mxnet/gluon/rnn/rnn_layer.py
@@ -23,7 +23,7 @@
 from __future__ import print_function
 __all__ = ['RNN', 'LSTM', 'GRU']
 
-from ... import ndarray
+from ... import ndarray, autograd
 from .. import Block
 from . import rnn_cell
 
@@ -185,7 +185,8 @@ class _RNNLayer(Block):
 for i in range(self._dir):
 self.i2h_weight[i].shape = (self._gates*self._hidden_size, 
inputs.shape[2])
 self.i2h_weight[i]._finish_deferred_init()
-if inputs.context.device_type == 'gpu' or self._mode == 'lstm':
+if inputs.context.device_type == 'gpu' or \
+   self._mode == 'lstm' and not (self._dropout and 
autograd.is_training()):
 out = self._forward_kernel(inputs, states)
 else:
 out = self._forward(inputs, states)
diff --git a/tests/python/unittest/test_gluon_rnn.py 
b/tests/python/unittest/test_gluon_rnn.py
index f22b13d..24d5a93 100644
--- a/tests/python/unittest/test_gluon_rnn.py
+++ b/tests/python/unittest/test_gluon_rnn.py
@@ -80,7 +80,7 @@ def test_lstm_cpu_inference():
 
 mx.test_utils.assert_almost_equal(y, EXPECTED_LSTM_OUTPUT,
   rtol=1e-3, atol=1e-5)
-
+
 
 def test_gru():
 cell = gluon.rnn.GRUCell(100, prefix='rnn_')
@@ -242,7 +242,7 @@ def test_rnn_cells():
 net.add(gluon.rnn.GRUCell(100, input_size=100))
 check_rnn_forward(net, mx.nd.ones((8, 3, 200)))
 
-def check_rnn_layer_forward(layer, inputs, states=None):
+def check_rnn_layer_forward(layer, inputs, states=None, run_only=False):
 layer.collect_params().initialize()
 inputs.attach_grad()
 with mx.autograd.record():
@@ -268,17 +268,32 @@ def check_rnn_layer_forward(layer, inputs, states=None):
 assert isinstance(out, mx.nd.NDArray)
 out.backward()
 
-mx.test_utils.assert_almost_equal(np_out, out.asnumpy(), rtol=1e-3, 
atol=1e-5)
-mx.test_utils.assert_almost_equal(np_dx, inputs.grad.asnumpy(), rtol=1e-3, 
atol=1e-5)
+if not run_only:
+mx.test_utils.assert_almost_equal(np_out, out.asnumpy(), rtol=1e-3, 
atol=1e-5)
+mx.test_utils.assert_almost_equal(np_dx, inputs.grad.asnumpy(), 
rtol=1e-3, atol=1e-5)
 
 
 def test_rnn_layers():
 check_rnn_layer_forward(gluon.rnn.RNN(10, 2), mx.nd.ones((8, 3, 20)))
-check_rnn_layer_forward(gluon.rnn.RNN(10, 2), mx.nd.ones((8, 3, 20)), 
mx.nd.ones((2, 3, 10)))
+check_rnn_layer_forward(gluon.rnn.RNN(10, 2, bidirectional=True), 
mx.nd.ones((8, 3, 20)), mx.nd.ones((4, 3, 10)))
 check_rnn_layer_forward(gluon.rnn.LSTM(10, 2), mx.nd.ones((8, 3, 20)))
-check_rnn_layer_forward(gluon.rnn.LSTM(10, 2), mx.nd.ones((8, 3, 20)), 
[mx.nd.ones((2, 3, 10)), mx.nd.ones((2, 3, 10))])
+check_rnn_layer_forward(gluon.rnn.LSTM(10, 2, bidirectional=True), 
mx.nd.ones((8, 3, 20)), [mx.nd.ones((4, 3, 10)), mx.nd.ones((4, 3, 10))])
 check_rnn_layer_forward(gluon.rnn.GRU(10, 2), mx.nd.ones((8, 3, 20)))
-check_rnn_layer_forward(gluon.rnn.GRU(10, 2), mx.nd.ones((8, 3, 20)), 
mx.nd.ones((2, 3, 10)))
+check_rnn_layer_forward(gluon.rnn.GRU(10, 2, bidirectional=True), 
mx.nd.ones((8, 3, 20)), mx.nd.ones((4, 3, 10)))
+
+check_rnn_layer_forward(gluon.rnn.RNN(10, 2, dropout=0.5), mx.nd.ones((8, 
3, 20)),
+run_only=True)
+check_rnn_layer_forward(gluon.rnn.RNN(10, 2, bidirectional=True, 
dropout=0.5),
+mx.nd.ones((8, 3, 20)), mx.nd.ones((4, 3, 10)), 
run_only=True)
+check_rnn_layer_forward(gluon.rnn.LSTM(10, 2, dropout=0.5), mx.nd.ones((8, 
3, 20)),
+run_only=True)
+check_rnn_layer_forward(gluon.rnn.LSTM(10, 2, bidirectional=True, 
dropout=0.5),
+mx.nd.ones((8, 3, 20)),
+[mx.nd.ones((4, 3, 10)), mx.nd.ones((4, 3, 10))], 
run_only=True)
+check_rnn_layer_forward(gluon.rnn.GRU(10, 2, dropout=0.5), mx.nd.ones((8, 
3, 20)),
+run_only=True)
+check_rnn_layer_forward(gluon.rnn.GRU(10, 2, bidirectional=True, 
dropout=0.5),
+mx.nd.ones((8, 3

[incubator-mxnet] branch master updated: Expose the number of GPUs. (#10354)

2018-05-15 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 1214205  Expose the number of GPUs. (#10354)
1214205 is described below

commit 12142056152d1331d4c519f77eb75bad89b4f3eb
Author: Tobias Domhan 
AuthorDate: Tue May 15 20:39:02 2018 +0200

Expose the number of GPUs. (#10354)

* Expose the number of GPUs.

* Added GPU test.

* Removed trailing whitespace.

* making the compiler happy

* Reverted CPU only logic and added CPU test.

* Updated python docs.

* Removing break from test.

* no longer assert on 0 gpus
---
 include/mxnet/base.h   | 19 +++
 include/mxnet/c_api.h  |  7 +++
 python/mxnet/context.py| 21 +
 src/c_api/c_api.cc |  6 ++
 tests/python/gpu/test_operator_gpu.py  |  3 +++
 tests/python/unittest/test_operator.py | 12 
 6 files changed, 68 insertions(+)

diff --git a/include/mxnet/base.h b/include/mxnet/base.h
index 7cabfe5..bff2ab4 100644
--- a/include/mxnet/base.h
+++ b/include/mxnet/base.h
@@ -218,6 +218,11 @@ struct Context {
*/
   inline static Context GPU(int32_t dev_id = -1);
   /*!
+   * Get the number of GPUs available.
+   * \return The number of GPUs that are available.
+   */
+  inline static int32_t GetGPUCount();
+  /*!
* Create a pinned CPU context.
* \param dev_id the device id for corresponding GPU.
* \return Pinned CPU context. -1 for current GPU.
@@ -307,6 +312,20 @@ inline Context Context::GPU(int32_t dev_id) {
   return Create(kGPU, dev_id);
 }
 
+inline int32_t Context::GetGPUCount() {
+#if MXNET_USE_CUDA
+  int32_t count;
+  cudaError_t e = cudaGetDeviceCount(&count);
+  if (e == cudaErrorNoDevice) {
+return 0;
+  }
+  CHECK_EQ(e, cudaSuccess) << " CUDA: " << cudaGetErrorString(e);
+  return count;
+#else
+  return 0;
+#endif
+}
+
 inline Context Context::FromString(const std::string& str) {
   Context ret;
   try {
diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h
index 9ac90d6..06e39bf 100644
--- a/include/mxnet/c_api.h
+++ b/include/mxnet/c_api.h
@@ -384,6 +384,13 @@ MXNET_DLL int MXSetNumOMPThreads(int thread_num);
 MXNET_DLL int MXEngineSetBulkSize(int bulk_size, int* prev_bulk_size);
 
 /*!
+ * \brief Get the number of GPUs.
+ * \param pointer to int that will hold the number of GPUs available.
+ * \return 0 when success, -1 when failure happens.
+ */
+MXNET_DLL int MXGetGPUCount(int* out);
+
+/*!
  * \brief get the MXNet library version as an integer
  * \param pointer to the integer holding the version number
  * \return 0 when success, -1 when failure happens
diff --git a/python/mxnet/context.py b/python/mxnet/context.py
index 5861890..61b7053 100644
--- a/python/mxnet/context.py
+++ b/python/mxnet/context.py
@@ -20,7 +20,11 @@
 from __future__ import absolute_import
 import threading
 import warnings
+import ctypes
 from .base import classproperty, with_metaclass, _MXClassPropertyMetaClass
+from .base import _LIB
+from .base import check_call
+
 
 class Context(with_metaclass(_MXClassPropertyMetaClass, object)):
 """Constructs a context.
@@ -237,6 +241,23 @@ def gpu(device_id=0):
 return Context('gpu', device_id)
 
 
+def num_gpus():
+"""Query CUDA for the number of GPUs present.
+
+Raises
+--
+Will raise an exception on any CUDA error.
+
+Returns
+---
+count : int
+The number of GPUs.
+
+"""
+count = ctypes.c_int()
+check_call(_LIB.MXGetGPUCount(ctypes.byref(count)))
+return count.value
+
 def current_context():
 """Returns the current context.
 
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index b3dcd6a..467118b 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -116,6 +116,12 @@ int MXEngineSetBulkSize(int bulk_size, int* 
prev_bulk_size) {
   API_END();
 }
 
+int MXGetGPUCount(int* out) {
+  API_BEGIN();
+  *out = Context::GetGPUCount();
+  API_END();
+}
+
 int MXGetVersion(int *out) {
   API_BEGIN();
   *out = static_cast(MXNET_VERSION);
diff --git a/tests/python/gpu/test_operator_gpu.py 
b/tests/python/gpu/test_operator_gpu.py
index 090773c..b9f2b67 100644
--- a/tests/python/gpu/test_operator_gpu.py
+++ b/tests/python/gpu/test_operator_gpu.py
@@ -1853,6 +1853,9 @@ def test_softmax_activation():
 assert_almost_equal(cpu_a.grad.asnumpy(), gpu_a.grad.asnumpy(),
 atol = 1e-3, rtol = 1e-3)
 
+def test_context_num_gpus():
+# Test that num_gpus reports at least one GPU, as the test is run on a GPU 
host.
+assert mx.context.num_gpus() > 0
 
 if __name__ == '__main__':
 import nose
diff --git a/tests/python/uni

[incubator-mxnet] branch master updated: Update performance page. (#10876)

2018-05-15 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new db4de44  Update performance page. (#10876)
db4de44 is described below

commit db4de44f170df08a1500feabccb55a63266d7ae2
Author: Da Zheng 
AuthorDate: Tue May 15 11:23:32 2018 -0700

Update performance page. (#10876)

* Update perf.

* add float16.

* update header.
---
 docs/faq/perf.md | 56 
 1 file changed, 40 insertions(+), 16 deletions(-)

diff --git a/docs/faq/perf.md b/docs/faq/perf.md
index ce74391..ad81b5d 100644
--- a/docs/faq/perf.md
+++ b/docs/faq/perf.md
@@ -36,7 +36,7 @@ to measure the performance on different AWS EC2 machines.
 
 AWS EC2 C5.18xlarge:
 
-| Batch | Alexnet | VGG| Inception-BN | Inception-v3 | Resnet 50 | Resnet 
152 |
+| Batch | Alexnet | VGG 16| Inception-BN | Inception-v3 | Resnet 50 | 
Resnet 152 |
 
|---|-||--|--|---||
 | 1 | 390.53  | 81.57  | 124.13   | 62.26| 76.22 | 32.92   
   |
 | 2 | 596.45  | 100.84 | 206.58   | 93.36| 119.55| 46.80   
   |
@@ -48,7 +48,7 @@ AWS EC2 C5.18xlarge:
 
 AWS EC2 C5.9xlarge:
 
-| Batch | Alexnet | VGG   | Inception-BN | Inception-v3 | Resnet 50 | Resnet 
152 |
+| Batch | Alexnet | VGG 16   | Inception-BN | Inception-v3 | Resnet 50 | 
Resnet 152 |
 
|---|-|---|--|--|---||
 | 1 | 257.77  | 50.61 | 130.99   | 66.95| 75.38 | 32.33
  |
 | 2 | 410.60  | 63.02 | 195.14   | 87.84| 102.67| 41.57
  |
@@ -60,7 +60,7 @@ AWS EC2 C5.9xlarge:
 
 AWS EC2 C5.4xlarge:
 
-| Batch | Alexnet | VGG   | Inception-BN | Inception-v3 | Resnet 50 | Resnet 
152 |
+| Batch | Alexnet | VGG 16   | Inception-BN | Inception-v3 | Resnet 50 | 
Resnet 152 |
 
|---|-|---|--|--|---||
 | 1 | 214.15  | 29.32 | 114.97   | 47.96| 61.01 | 23.92
  |
 | 2 | 310.04  | 34.81 | 150.09   | 60.89| 71.16 | 27.92
  |
@@ -72,7 +72,7 @@ AWS EC2 C5.4xlarge:
 
 AWS EC2 C5.2xlarge:
 
-| Batch | Alexnet | VGG   | Inception-BN | Inception-v3 | Resnet 50 | Resnet 
152 |
+| Batch | Alexnet | VGG 16   | Inception-BN | Inception-v3 | Resnet 50 | 
Resnet 152 |
 
|---|-|---|--|--|---||
 | 1 | 131.01  | 15.67 | 78.75| 31.12| 37.30 | 14.75
  |
 | 2 | 182.29  | 18.01 | 98.59| 39.13| 45.98 | 17.84
  |
@@ -84,7 +84,7 @@ AWS EC2 C5.2xlarge:
 
 AWS EC2 C5.xlarge:
 
-| Batch | Alexnet | VGG  | Inception-BN | Inception-v3 | Resnet 50 | Resnet 
152 |
+| Batch | Alexnet | VGG 16  | Inception-BN | Inception-v3 | Resnet 50 | Resnet 
152 |
 
|---|-|--|--|--|---||
 | 1 | 36.64   | 3.93 | 27.06| 10.09| 12.98 | 5.06  
 |
 | 2 | 49.21   | 4.49 | 29.67| 10.80| 12.94 | 5.14  
 |
@@ -117,7 +117,7 @@ and  
[MXNet-1.2.0.rc1](https://github.com/apache/incubator-mxnet/releases/downlo
 
 - K80 (single GPU)
 
-| Batch | Alexnet | VGG| Inception-BN | Inception-v3 | Resnet 50 | Resnet 
152 |
+| Batch | Alexnet | VGG 16| Inception-BN | Inception-v3 | Resnet 50 | 
Resnet 152 |
 
|---|-||--|--|---||
 | 1 | 243.93  | 43.59  | 68.62| 35.52| 67.41 | 23.65   
   |
 | 2 | 338.16  | 49.14  | 113.41   | 56.29| 93.35 | 33.88   
   |
@@ -125,10 +125,13 @@ and  
[MXNet-1.2.0.rc1](https://github.com/apache/incubator-mxnet/releases/downlo
 | 8 | 683.52  | 70.50  | 190.49   | 86.23| 131.32| 50.54   
   |
 | 16| 1004.66 | 109.01 | 254.20   | 105.70   | 155.40| 62.55   
   |
 | 32| 1238.55 | 114.98 | 285.49   | 116.79   | 159.42| 64.99   
   |
+| 64 | 1346.72 | 123.56 | 308.73 | 122.21 | 167.58 | 70.21 |
+| 128 | 1416.91 | OOM | 320.98 | 123.11 | 171.55 | 71.85 |
+| 256 | 1462.97 | OOM | 329.16 | 127.53 | 153.01 | 57.23 |
 
 - M60
 
-| Batch | Alexnet | VGG| Inception-BN | Inception-v3 | Resnet 50 | Resnet 
152 |
+| Batch | Alexnet | VGG 16| Inception-BN | Inception-v3 | Resnet 50 | 
Resnet 152 |
 
|---|-||--|--|---||
 | 1 | 243.49  | 59.95  | 101.97   | 48.30| 95.46 | 39.29   
   |
 | 2 | 491.04  | 69.14  | 170.35   | 80.27| 142.61| 60.17   
   |
@@ -136,19 +139,38 @@ and  
[MXNet-1.2.0.rc1](https://github.com/apache/incubator-mxnet/releases/downlo
 | 8 | 1077.73 | 109.34 | 343.42

[incubator-mxnet] branch master updated: Fix engine stop/start (#10911)

2018-05-15 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 57c8ca1  Fix engine stop/start  (#10911)
57c8ca1 is described below

commit 57c8ca1a0a6dae36dc27a9f054041ecce652e4c8
Author: Joshua Z. Zhang 
AuthorDate: Tue May 15 11:22:39 2018 -0700

Fix engine stop/start  (#10911)

* fix engine start/stop

* add tests

* fix test

* fix

* fix tests
---
 python/mxnet/gluon/data/dataloader.py   |  2 +-
 src/engine/naive_engine.cc  |  6 +++
 src/engine/threaded_engine_pooled.cc| 57 +
 tests/cpp/engine/threaded_engine_test.cc| 17 +
 tests/python/unittest/test_engine_import.py | 44 ++
 5 files changed, 109 insertions(+), 17 deletions(-)

diff --git a/python/mxnet/gluon/data/dataloader.py 
b/python/mxnet/gluon/data/dataloader.py
index 7ef18bd..d80a6bf 100644
--- a/python/mxnet/gluon/data/dataloader.py
+++ b/python/mxnet/gluon/data/dataloader.py
@@ -143,7 +143,7 @@ class _MultiWorkerIter(object):
 self._batchify_fn = batchify_fn
 self._batch_sampler = batch_sampler
 self._key_queue = Queue()
-self._data_queue = SimpleQueue()
+self._data_queue = Queue() if sys.version_info[0] <= 2 else 
SimpleQueue()
 self._data_buffer = {}
 self._rcvd_idx = 0
 self._sent_idx = 0
diff --git a/src/engine/naive_engine.cc b/src/engine/naive_engine.cc
index 1fa5306..8196af2 100644
--- a/src/engine/naive_engine.cc
+++ b/src/engine/naive_engine.cc
@@ -63,6 +63,12 @@ class NaiveEngine final : public Engine {
 #endif
   }
 
+  void Stop() override {
+  }
+
+  void Start() override {
+  }
+
   // new variables
   VarHandle NewVariable() override {
 size_t v = ++counter_;
diff --git a/src/engine/threaded_engine_pooled.cc 
b/src/engine/threaded_engine_pooled.cc
index 074ea4e..574e832 100644
--- a/src/engine/threaded_engine_pooled.cc
+++ b/src/engine/threaded_engine_pooled.cc
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "./threaded_engine.h"
 #include "./thread_pool.h"
 #include "./stream_manager.h"
@@ -42,14 +43,38 @@ namespace engine {
  */
 class ThreadedEnginePooled : public ThreadedEngine {
  public:
-  ThreadedEnginePooled() :
-  thread_pool_(kNumWorkingThreads, [this]() { ThreadWorker(&task_queue_); 
}),
-  io_thread_pool_(1, [this]() { ThreadWorker(&io_task_queue_); }) {}
+  ThreadedEnginePooled() {
+this->Start();
+  }
 
   ~ThreadedEnginePooled() noexcept(false) {
-streams_.Finalize();
-task_queue_.SignalForKill();
-io_task_queue_.SignalForKill();
+StopNoWait();
+  }
+
+  void StopNoWait() {
+streams_->Finalize();
+task_queue_->SignalForKill();
+io_task_queue_->SignalForKill();
+task_queue_ = nullptr;
+io_task_queue_ = nullptr;
+thread_pool_ = nullptr;
+io_thread_pool_ = nullptr;
+streams_ = nullptr;
+  }
+
+  void Stop() override {
+WaitForAll();
+StopNoWait();
+  }
+
+  void Start() override {
+streams_.reset(new StreamManager());
+task_queue_.reset(new dmlc::ConcurrentBlockingQueue());
+io_task_queue_.reset(new dmlc::ConcurrentBlockingQueue());
+thread_pool_.reset(new ThreadPool(kNumWorkingThreads, [this]() {
+  ThreadWorker(task_queue_); }));
+io_thread_pool_.reset(new ThreadPool(1, [this]() {
+  ThreadWorker(io_task_queue_); }));
   }
 
  protected:
@@ -71,24 +96,24 @@ class ThreadedEnginePooled : public ThreadedEngine {
   /*!
* \brief Streams.
*/
-  StreamManager streams_;
+  std::unique_ptr> streams_;
   /*!
* \brief Task queues.
*/
-  dmlc::ConcurrentBlockingQueue task_queue_;
-  dmlc::ConcurrentBlockingQueue io_task_queue_;
+  std::shared_ptr> task_queue_;
+  std::shared_ptr> io_task_queue_;
   /*!
* \brief Thread pools.
*/
-  ThreadPool thread_pool_;
-  ThreadPool io_thread_pool_;
+  std::unique_ptr thread_pool_;
+  std::unique_ptr io_thread_pool_;
   /*!
* \brief Worker.
* \param task_queue Queue to work on.
*
* The method to pass to thread pool to parallelize.
*/
-  void ThreadWorker(dmlc::ConcurrentBlockingQueue* task_queue) {
+  void ThreadWorker(std::shared_ptr> 
task_queue) {
 OprBlock* opr_block;
 while (task_queue->Pop(&opr_block)) {
   DoExecute(opr_block);
@@ -110,8 +135,8 @@ class ThreadedEnginePooled : public ThreadedEngine {
 bool is_copy = (opr_block->opr->prop == FnProperty::kCopyFromGPU ||
 opr_block->opr->prop == FnProperty::kCopyToGPU);
 auto&& rctx = is_copy
-? streams_.GetIORunContext(opr_block->ctx)
-: streams_.GetRunContext(opr_block->ctx);
+? streams_->GetIORunContext(opr_block

[incubator-mxnet] branch master updated: [MXNET-9704] An assertion check for invalid layout (#10915)

2018-05-15 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 0805e4d  [MXNET-9704] An assertion check for invalid layout (#10915)
0805e4d is described below

commit 0805e4de3a1070763349fb245e00845af4ca8d49
Author: Yan Li 
AuthorDate: Wed May 16 02:20:51 2018 +0800

[MXNET-9704] An assertion check for invalid layout (#10915)

* add assertion and update documentations for gluon conv and pooling layers

* Update conv_layers.py

fix pylint

* fix for pylint
---
 python/mxnet/gluon/nn/conv_layers.py | 104 +++
 1 file changed, 57 insertions(+), 47 deletions(-)

diff --git a/python/mxnet/gluon/nn/conv_layers.py 
b/python/mxnet/gluon/nn/conv_layers.py
index 87a62bc..7b4a6be 100644
--- a/python/mxnet/gluon/nn/conv_layers.py
+++ b/python/mxnet/gluon/nn/conv_layers.py
@@ -196,7 +196,7 @@ class Conv1D(_Conv):
 layers side by side, each seeing half the input channels, and producing
 half the output channels, and both subsequently concatenated.
 layout: str, default 'NCW'
-Dimension ordering of data and weight. Can be 'NCW', 'NWC', etc.
+Dimension ordering of data and weight. Only supports 'NCW' layout for 
now.
 'N', 'C', 'W' stands for batch, channel, and width (time) dimensions
 respectively. Convolution is applied on the 'W' dimension.
 in_channels : int, default 0
@@ -229,6 +229,7 @@ class Conv1D(_Conv):
  groups=1, layout='NCW', activation=None, use_bias=True,
  weight_initializer=None, bias_initializer='zeros',
  in_channels=0, **kwargs):
+assert layout == 'NCW', "Only supports 'NCW' layout for now"
 if isinstance(kernel_size, numeric_types):
 kernel_size = (kernel_size,)
 assert len(kernel_size) == 1, "kernel_size must be a number or a list 
of 1 ints"
@@ -271,9 +272,9 @@ class Conv2D(_Conv):
 layers side by side, each seeing half the input channels, and producing
 half the output channels, and both subsequently concatenated.
 layout : str, default 'NCHW'
-Dimension ordering of data and weight. Can be 'NCHW', 'NHWC', etc.
-'N', 'C', 'H', 'W' stands for batch, channel, height, and width
-dimensions respectively. Convolution is applied on the 'H' and
+Dimension ordering of data and weight. Only supports 'NCHW' and 'NHWC'
+layout for now. 'N', 'C', 'H', 'W' stands for batch, channel, height,
+and width dimensions respectively. Convolution is applied on the 'H' 
and
 'W' dimensions.
 in_channels : int, default 0
 The number of input channels to this layer. If not specified,
@@ -293,12 +294,12 @@ class Conv2D(_Conv):
 
 Inputs:
 - **data**: 4D input tensor with shape
-  `(batch_size, in_channels, height, width)` when `layout` is `NCW`.
+  `(batch_size, in_channels, height, width)` when `layout` is `NCHW`.
   For other layouts shape is permuted accordingly.
 
 Outputs:
 - **out**: 4D output tensor with shape
-  `(batch_size, channels, out_height, out_width)` when `layout` is 
`NCW`.
+  `(batch_size, channels, out_height, out_width)` when `layout` is 
`NCHW`.
   out_height and out_width are calculated as::
 
   out_height = 
floor((height+2*padding[0]-dilation[0]*(kernel_size[0]-1)-1)/stride[0])+1
@@ -308,6 +309,8 @@ class Conv2D(_Conv):
  dilation=(1, 1), groups=1, layout='NCHW',
  activation=None, use_bias=True, weight_initializer=None,
  bias_initializer='zeros', in_channels=0, **kwargs):
+assert layout == 'NCHW' or layout == 'NHWC', \
+"Only supports 'NCHW' and 'NHWC' layout for now"
 if isinstance(kernel_size, numeric_types):
 kernel_size = (kernel_size,)*2
 assert len(kernel_size) == 2, "kernel_size must be a number or a list 
of 2 ints"
@@ -350,9 +353,9 @@ class Conv3D(_Conv):
 layers side by side, each seeing half the input channels, and producing
 half the output channels, and both subsequently concatenated.
 layout : str, default 'NCDHW'
-Dimension ordering of data and weight. Can be 'NCDHW', 'NDHWC', etc.
-'N', 'C', 'H', 'W', 'D' stands for batch, channel,

[incubator-mxnet] branch master updated: Correction of gluon documentation in nn.md (#10944)

2018-05-15 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new e103ea5  Correction of gluon documentation in nn.md (#10944)
e103ea5 is described below

commit e103ea59e1ca810e4fcb9790b843ca8a058727f3
Author: f.diakogiannis 
AuthorDate: Wed May 16 02:05:36 2018 +0800

Correction of gluon documentation in nn.md (#10944)

* Correction of gluon documentation in nn.md

Added ```HybridLambda``` in the Basic Layers  list as suggested in #10930

* Update nn.md

Added Lambda too
---
 docs/api/python/gluon/nn.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/api/python/gluon/nn.md b/docs/api/python/gluon/nn.md
index 1001f20..1791faf 100644
--- a/docs/api/python/gluon/nn.md
+++ b/docs/api/python/gluon/nn.md
@@ -23,6 +23,8 @@ This document lists the neural network blocks in Gluon:
 LayerNorm
 Embedding
 Flatten
+Lambda
+HybridLambda
 ```
 
 

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: fix symbolblock save_params (#10748)

2018-05-14 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 0fb57ff  fix symbolblock save_params (#10748)
0fb57ff is described below

commit 0fb57ff31ef5caa32edf973213bde8a8faba85e5
Author: Eric Junyuan Xie 
AuthorDate: Mon May 14 22:31:05 2018 -0700

fix symbolblock save_params (#10748)

* fix symbolblock save_params

* fix
---
 python/mxnet/gluon/block.py | 14 ++
 tests/python/unittest/test_gluon.py | 27 +++
 2 files changed, 41 insertions(+)

diff --git a/python/mxnet/gluon/block.py b/python/mxnet/gluon/block.py
index 7e41272..4779484 100644
--- a/python/mxnet/gluon/block.py
+++ b/python/mxnet/gluon/block.py
@@ -649,6 +649,18 @@ class HybridBlock(Block):
 # pylint: disable= invalid-name
 raise NotImplementedError
 
+def _common_prefix(names):
+"""Get the common prefix for all names"""
+if not names:
+return ''
+prefix = names[0]
+for name in names:
+i = 0
+while i < len(prefix) and i < len(name) and prefix[i] == name[i]:
+i += 1
+prefix = prefix[:i]
+return prefix
+
 
 class SymbolBlock(HybridBlock):
 """Construct block from symbol. This is useful for using pre-trained models
@@ -710,6 +722,8 @@ class SymbolBlock(HybridBlock):
 self.params.get(i, grad_req='null', allow_deferred_init=True)
 
 self._cached_graph = syms, out
+len_prefix = len(_common_prefix(list(self._params.keys(
+self._reg_params = {key[len_prefix:]: val for key, val in 
self._params.items()}
 
 def forward(self, x, *args):
 if isinstance(x, NDArray):
diff --git a/tests/python/unittest/test_gluon.py 
b/tests/python/unittest/test_gluon.py
index b054aa6..fb73e53 100644
--- a/tests/python/unittest/test_gluon.py
+++ b/tests/python/unittest/test_gluon.py
@@ -986,6 +986,33 @@ def test_save_load():
 
 net.load_params('test.params')
 
+def test_symbol_block_save_load():
+class Net(gluon.HybridBlock):
+def __init__(self):
+super(Net, self).__init__()
+with self.name_scope():
+backbone = gluon.model_zoo.vision.resnet18_v1()
+data = mx.sym.var('data')
+featnames = ['stage1_activation0', 'stage2_activation0', 
'stage3_activation0']
+out_names = ['_'.join([backbone.name, featname, 'output']) for 
featname in featnames]
+internals = backbone(data).get_internals()
+outs = [internals[out_name] for out_name in out_names]
+self.backbone = gluon.SymbolBlock(outs, data, 
params=backbone.collect_params())
+self.body = nn.Conv2D(3, 1)
+
+def hybrid_forward(self, F, x):
+x = self.body(x)
+return self.backbone(x)
+
+net1 = Net()
+net1.initialize(mx.init.Normal())
+net1.hybridize()
+net1(mx.nd.random.normal(shape=(1, 3, 32, 32)))
+net1.save_params('./test.params')
+
+net2 = Net()
+net2.load_params('./test.params', ctx=mx.cpu())
+
 
 def test_hybrid_multi_context():
 net = mx.gluon.model_zoo.vision.get_resnet(1, 18)

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: [WIP][MXNET-107] Fused LSTM implementation for CPU (#10104)

2018-05-14 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 275378a  [WIP][MXNET-107] Fused LSTM implementation for CPU (#10104)
275378a is described below

commit 275378a49a6035fd5bdead4a74ac36b6070295a7
Author: Zhang-Shu 
AuthorDate: Tue May 15 03:29:52 2018 +0800

[WIP][MXNET-107] Fused LSTM implementation for CPU (#10104)

* register RNN fused-API with nnvm, finish single-layer && undirection LSTM 
forward function

* fix coding style and lint complains

* add single-layer && undirectional LSTM backward function

* make interface universal for other RNN mode

* share intermediate result between forward and backward in a trick way

* add comments for important parameters

* modify testcase

* Fix coding style and error message

* fix openmp collapse error

* fix const

* remove rnn.cu and skip related testcases temporarily for building on GPU

* support multi-layer and bidirectional for lstm inference

* remove some testcaseS in test_gluon_rnn.py to build on GPU

* remove testcase between fp32 and fp64 temporarily

* retrigger ci

* fix some logs

* use a better way to share memory

* fix cudnn registration

* fix invariant calculations and enable some gpu testcases

* add thread local cache for cudnn rnn op

* add thread local cache for rnn op

* fix bugs

* remove some testcases to check segmentfault

* remove cudnn registeration to check segmentfault

* support multi-layer for LSTM Training

* modify lstm testcase

* add bidirectional support for lstm

* fix gluon and coding style

* fix bugs

* remove nnvm registration

* enable gpu testcases

* add detailed descriptions

* add dropout check

* fix workspace size

* dropout is not supported, add unit test for it

* fix review comments
---
 python/mxnet/gluon/rnn/rnn_layer.py|   4 +-
 src/operator/cudnn_rnn-inl.h   |   3 +-
 src/operator/rnn-inl.h | 624 +
 src/operator/rnn.cc|  48 ++-
 src/operator/rnn_impl.h| 457 
 tests/python/gpu/test_operator_gpu.py  |  17 -
 tests/python/unittest/test_operator.py |  83 +
 7 files changed, 991 insertions(+), 245 deletions(-)

diff --git a/python/mxnet/gluon/rnn/rnn_layer.py 
b/python/mxnet/gluon/rnn/rnn_layer.py
index 59dd747..34ad05d 100644
--- a/python/mxnet/gluon/rnn/rnn_layer.py
+++ b/python/mxnet/gluon/rnn/rnn_layer.py
@@ -23,7 +23,6 @@
 from __future__ import print_function
 __all__ = ['RNN', 'LSTM', 'GRU']
 
-from ...autograd import is_training
 from ... import ndarray
 from .. import Block
 from . import rnn_cell
@@ -186,8 +185,7 @@ class _RNNLayer(Block):
 for i in range(self._dir):
 self.i2h_weight[i].shape = (self._gates*self._hidden_size, 
inputs.shape[2])
 self.i2h_weight[i]._finish_deferred_init()
-if inputs.context.device_type == 'gpu' or \
-(not is_training() and self._mode == 'lstm'):
+if inputs.context.device_type == 'gpu' or self._mode == 'lstm':
 out = self._forward_kernel(inputs, states)
 else:
 out = self._forward(inputs, states)
diff --git a/src/operator/cudnn_rnn-inl.h b/src/operator/cudnn_rnn-inl.h
index 1a54b73..033d30e 100644
--- a/src/operator/cudnn_rnn-inl.h
+++ b/src/operator/cudnn_rnn-inl.h
@@ -38,7 +38,7 @@ namespace mxnet {
 namespace op {
 #if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 5
 template
-class CuDNNRNNOp : public Operator {
+class CuDNNRNNOp : public Operator{
  public:
   explicit CuDNNRNNOp(RNNParam param) {
 this->param_ = param;
@@ -101,6 +101,7 @@ class CuDNNRNNOp : public Operator {
   CUDNN_CALL(cudnnDestroyDropoutDescriptor(dropout_desc_));
   Storage::Get()->Free(dropout_states_);
   Storage::Get()->Free(reserve_space_);
+  init_cudnn_ = false;
 }
   }
 
diff --git a/src/operator/rnn-inl.h b/src/operator/rnn-inl.h
index 13c077d..eded6ae 100644
--- a/src/operator/rnn-inl.h
+++ b/src/operator/rnn-inl.h
@@ -21,7 +21,7 @@
  * Copyright (c) 2015 by Contributors
  * \file rnn-inl.h
  * \brief
- * \author Sebastian Bodenstein
+ * \author Sebastian Bodenstein, Shu Zhang
 */
 #ifndef MXNET_OPERATOR_RNN_INL_H_
 #define MXNET_OPERATOR_RNN_INL_H_
@@ -29,6 +29,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -37,8 +38,7 @@
 #include "./math.h"
 #include "

[incubator-mxnet] branch master updated: split trainer.step into allreduce and update (#10861)

2018-05-14 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new ab232c1  split trainer.step into allreduce and update (#10861)
ab232c1 is described below

commit ab232c1414372f9ca23554b36de0a17a95e7e716
Author: Sheng Zha 
AuthorDate: Mon May 14 11:18:53 2018 -0700

split trainer.step into allreduce and update (#10861)

* split trainer.step into allreduce and update

* fix test

* address comments

* update doc

* update

* update docs
---
 python/mxnet/gluon/trainer.py   | 96 -
 tests/python/unittest/test_gluon.py | 19 +++-
 2 files changed, 101 insertions(+), 14 deletions(-)

diff --git a/python/mxnet/gluon/trainer.py b/python/mxnet/gluon/trainer.py
index 5ae0e46..da67fc0 100644
--- a/python/mxnet/gluon/trainer.py
+++ b/python/mxnet/gluon/trainer.py
@@ -49,6 +49,9 @@ class Trainer(object):
 on the type of compression being used. For example, 2bit compression 
requires a threshold.
 Arguments would then be {'type':'2bit', 'threshold':0.5}
 See mxnet.KVStore.set_gradient_compression method for more details on 
gradient compression.
+update_on_kvstore : bool, default None
+Whether to perform parameter updates on kvstore. If None, then trainer 
will choose the more
+suitable option depending on the type of kvstore.
 
 Properties
 --
@@ -57,7 +60,7 @@ class Trainer(object):
 optimizer, its learning rate can be accessed as 
optimizer.learning_rate.
 """
 def __init__(self, params, optimizer, optimizer_params=None, 
kvstore='device',
- compression_params=None):
+ compression_params=None, update_on_kvstore=None):
 if isinstance(params, (dict, ParameterDict)):
 params = list(params.values())
 if not isinstance(params, (list, tuple)):
@@ -73,11 +76,12 @@ class Trainer(object):
 self._params.append(param)
 self._compression_params = compression_params
 optimizer_params = optimizer_params if optimizer_params else {}
-self._scale = optimizer_params.get('rescale_grad', 1.0)
+self._scale = float(optimizer_params.get('rescale_grad', 1.0))
 self._contexts = self._check_contexts()
 self._init_optimizer(optimizer, optimizer_params)
 self._kv_initialized = False
 self._kvstore = kvstore
+self._update_on_kvstore = update_on_kvstore
 
 def _check_contexts(self):
 contexts = None
@@ -109,6 +113,8 @@ class Trainer(object):
 arg_arrays = {param.name: param.data(self._contexts[0]) for param in 
self._params}
 kvstore, update_on_kvstore = _create_kvstore(self._kvstore, 
len(self._contexts),
  arg_arrays)
+update_on_kvstore = self._update_on_kvstore if self._update_on_kvstore 
is not None \
+else update_on_kvstore
 if kvstore:
 if self._compression_params:
 kvstore.set_gradient_compression(self._compression_params)
@@ -129,7 +135,6 @@ class Trainer(object):
 
 self._kv_initialized = True
 
-
 @property
 def learning_rate(self):
 if not isinstance(self._optimizer, opt.Optimizer):
@@ -138,7 +143,6 @@ class Trainer(object):
 else:
 return self._optimizer.learning_rate
 
-
 def set_learning_rate(self, lr):
 """Sets a new learning rate of the optimizer.
 
@@ -153,10 +157,73 @@ class Trainer(object):
 else:
 self._optimizer.set_learning_rate(lr)
 
-
 def step(self, batch_size, ignore_stale_grad=False):
 """Makes one step of parameter update. Should be called after
-`autograd.compute_gradient` and outside of `record()` scope.
+`autograd.backward()` and outside of `record()` scope.
+
+For normal parameter updates, `step()` should be used, which 
internally calls
+`allreduce_grads()` and then `update()`. However, if you need to get 
the reduced
+gradients to perform certain transformation, such as in gradient 
clipping, then
+you may want to manually call `allreduce_grads()` and `update()` 
separately.
+
+Parameters
+--
+batch_size : int
+Batch size of data processed. Gradient will be normalized by 
`1/batch_size`.
+Set this to 1 if you normalized loss manually with `loss = 
mean(loss)`.
+ignore_stale_grad : bool, optional, default=False
+If true, ignores Parameters with stale gradient (gradient that has 
not
+been updated by `backward` after last step) and skip upd

[incubator-mxnet] branch master updated: fix example in smooth l1 document (#10929)

2018-05-14 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 89ffab9  fix example in smooth l1 document (#10929)
89ffab9 is described below

commit 89ffab9e81ae608da0af536a720fa0aebc163fd6
Author: JackieWu 
AuthorDate: Tue May 15 01:37:31 2018 +0800

fix example in smooth l1 document (#10929)
---
 src/operator/tensor/elemwise_binary_scalar_op_extended.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/operator/tensor/elemwise_binary_scalar_op_extended.cc 
b/src/operator/tensor/elemwise_binary_scalar_op_extended.cc
index 0f00e48..9870342 100644
--- a/src/operator/tensor/elemwise_binary_scalar_op_extended.cc
+++ b/src/operator/tensor/elemwise_binary_scalar_op_extended.cc
@@ -98,7 +98,7 @@ where :math:`x` is an element of the tensor *lhs* and 
:math:`\sigma` is the scal
 
 Example::
 
-  smooth_l1([1, 2, 3, 4], sigma=1) = [0.5, 1.5, 2.5, 3.5]
+  smooth_l1([1, 2, 3, 4], scalar=1) = [0.5, 1.5, 2.5, 3.5]
 
 )code" ADD_FILELINE)
 .set_attr("FCompute", BinaryScalarOp::Compute<

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: Fix typo "weights" (#10934)

2018-05-14 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 1143e78  Fix typo "weights" (#10934)
1143e78 is described below

commit 1143e78c94568732c517c369d49cbb824e0996c4
Author: Di Yang 
AuthorDate: Tue May 15 03:25:00 2018 +1000

Fix typo "weights" (#10934)

There is a typo in mx.opt.get.updater, it's supposed to be weight rather 
than weights. This typo will yield List Access Error in R
---
 R-package/R/optimizer.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R-package/R/optimizer.R b/R-package/R/optimizer.R
index ff88531..3c503c2 100644
--- a/R-package/R/optimizer.R
+++ b/R-package/R/optimizer.R
@@ -401,7 +401,7 @@ mx.opt.get.updater <- function(optimizer, weights) {
   update <- optimizer$update
 
   update.closure <- function(weight, grad) {
-ulist <- lapply(seq_along(weights), function(i) {
+ulist <- lapply(seq_along(weight), function(i) {
   if (!is.null(grad[[i]])) {
 update(i, weight[[i]], grad[[i]], state.list[[i]])
   } else {

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: Fix eye doc (#10908)

2018-05-11 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 0fcf191  Fix eye doc (#10908)
0fcf191 is described below

commit 0fcf191760303fd000fd756baab30553d45e6ce8
Author: Ziyue Huang 
AuthorDate: Sat May 12 06:52:04 2018 +0800

Fix eye doc (#10908)

* fix eye doc

* fix
---
 python/mxnet/ndarray/ndarray.py | 3 +++
 python/mxnet/symbol/symbol.py   | 5 +++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py
index 007b3c8..f017d7e 100644
--- a/python/mxnet/ndarray/ndarray.py
+++ b/python/mxnet/ndarray/ndarray.py
@@ -3674,6 +3674,7 @@ def zeros(shape, ctx=None, dtype=None, **kwargs):
 
 def eye(N, M=0, k=0, ctx=None, dtype=None, **kwargs):
 """Return a 2-D array with ones on the diagonal and zeros elsewhere.
+
 Parameters
 --
 N: int
@@ -3688,10 +3689,12 @@ def eye(N, M=0, k=0, ctx=None, dtype=None, **kwargs):
 An optional device context (default is the current default context)
 dtype: str or numpy.dtype, optional
 An optional value type (default is `float32`)
+
 Returns
 ---
 NDArray
 A created array
+
 Examples
 
 >>> mx.nd.eye(2)
diff --git a/python/mxnet/symbol/symbol.py b/python/mxnet/symbol/symbol.py
index 49023db..b113ddc 100644
--- a/python/mxnet/symbol/symbol.py
+++ b/python/mxnet/symbol/symbol.py
@@ -2746,8 +2746,8 @@ def hypot(left, right):
 raise TypeError('types (%s, %s) not supported' % (str(type(left)), 
str(type(right
 
 def eye(N, M=0, k=0, dtype=None, **kwargs):
-"""Returns a new symbol of 2-D shpae, filled with ones on the diagonal
-   and zeros elsewhere.
+"""Returns a new symbol of 2-D shpae, filled with ones on the diagonal and 
zeros elsewhere.
+
 Parameters
 --
 N: int
@@ -2760,6 +2760,7 @@ def eye(N, M=0, k=0, dtype=None, **kwargs):
 and a negative value to a lower diagonal.
 dtype : str or numpy.dtype, optional
 The value type of the inner value, default to ``np.float32``.
+
 Returns
 ---
 out : Symbol

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: Change class variables to thread local variables (#10833)

2018-05-11 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 86ee3e1  Change class variables to thread local variables (#10833)
86ee3e1 is described below

commit 86ee3e119a38b440350a847683313e23f9b3cc1e
Author: Anirudh Subramanian 
AuthorDate: Fri May 11 11:06:52 2018 -0700

Change class variables to thread local variables (#10833)

* Change to simpler implementation

* Add property

* Remove pdb

* Add support for setter and getter

* fix issues

* Add warnings

* Add thread local unittest and tlocal race condition

* Fix pylint

* Use current_context instead of _default_ctx

* Use current_context

* Fix race condition

* Fix thread local test

* Change to current_context
---
 python/mxnet/attribute.py  |  38 +--
 python/mxnet/base.py   |  64 +++-
 python/mxnet/context.py|  41 ++--
 python/mxnet/gluon/block.py|  13 +--
 python/mxnet/name.py   |  33 --
 python/mxnet/ndarray/ndarray.py|  14 +--
 python/mxnet/ndarray/sparse.py |  12 +--
 python/mxnet/symbol/register.py|   8 +-
 python/mxnet/symbol/symbol.py  |   6 +-
 python/mxnet/test_utils.py |   6 +-
 tests/nightly/test_tlocal_racecondition.py | 110 +++
 tests/python/unittest/test_contrib_operator.py |   2 +-
 tests/python/unittest/test_operator.py |   8 +-
 tests/python/unittest/test_thread_local.py | 139 +
 14 files changed, 437 insertions(+), 57 deletions(-)

diff --git a/python/mxnet/attribute.py b/python/mxnet/attribute.py
index 15d38f8..17044dd 100644
--- a/python/mxnet/attribute.py
+++ b/python/mxnet/attribute.py
@@ -18,10 +18,12 @@
 # coding: utf-8
 """Attribute scoping support for symbolic API."""
 from __future__ import absolute_import
+import threading
+import warnings
 
-from .base import string_types
+from .base import string_types, classproperty, with_metaclass, 
_MXClassPropertyMetaClass
 
-class AttrScope(object):
+class AttrScope(with_metaclass(_MXClassPropertyMetaClass, object)):
 """Attribute manager for scoping.
 
 User can also inherit this object to change naming behavior.
@@ -31,7 +33,7 @@ class AttrScope(object):
 kwargs
 The attributes to set for all symbol creations in the scope.
 """
-current = None
+_current = threading.local()
 
 def __init__(self, **kwargs):
 self._old_scope = None
@@ -64,15 +66,35 @@ class AttrScope(object):
 
 def __enter__(self):
 # pylint: disable=protected-access
-self._old_scope = AttrScope.current
-attr = AttrScope.current._attr.copy()
+if not hasattr(AttrScope._current, "value"):
+AttrScope._current.value = AttrScope()
+self._old_scope = AttrScope._current.value
+attr = AttrScope._current.value._attr.copy()
 attr.update(self._attr)
 self._attr = attr
-AttrScope.current = self
+AttrScope._current.value = self
 return self
 
 def __exit__(self, ptype, value, trace):
 assert self._old_scope
-AttrScope.current = self._old_scope
+AttrScope._current.value = self._old_scope
 
-AttrScope.current = AttrScope()
+#pylint: disable=no-self-argument
+@classproperty
+def current(cls):
+warnings.warn("AttrScope.current has been deprecated. "
+  "It is advised to use the `with` statement with 
AttrScope.",
+  DeprecationWarning)
+if not hasattr(AttrScope._current, "value"):
+cls._current.value = AttrScope()
+return cls._current.value
+
+@current.setter
+def current(cls, val):
+warnings.warn("AttrScope.current has been deprecated. "
+  "It is advised to use the `with` statement with 
AttrScope.",
+  DeprecationWarning)
+cls._current.value = val
+#pylint: enable=no-self-argument
+
+AttrScope._current.value = AttrScope()
diff --git a/python/mxnet/base.py b/python/mxnet/base.py
index 9790e09..0fb73b3 100644
--- a/python/mxnet/base.py
+++ b/python/mxnet/base.py
@@ -16,7 +16,7 @@
 # under the License.
 
 # coding: utf-8
-# pylint: disable=invalid-name, no-member, trailing-comma-tuple
+# pylint: disable=invalid-name, no-member, trailing-comma-tuple, 
bad-mcs-classmethod-argument
 """ctypes library of mxnet and helper functions."""
 from __future__ import absolute_import

[incubator-mxnet] branch master updated: Pin scipy to 1.0.1 (#10902)

2018-05-11 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 645b4c5  Pin scipy to 1.0.1 (#10902)
645b4c5 is described below

commit 645b4c5265726efec86c07f03fd4d3f2afef1449
Author: Marco de Abreu 
AuthorDate: Fri May 11 19:31:04 2018 +0200

Pin scipy to 1.0.1 (#10902)
---
 ci/docker/install/centos7_python.sh | 4 ++--
 ci/docker/install/ubuntu_python.sh  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ci/docker/install/centos7_python.sh 
b/ci/docker/install/centos7_python.sh
index 9e076b6..154e3b8 100755
--- a/ci/docker/install/centos7_python.sh
+++ b/ci/docker/install/centos7_python.sh
@@ -31,5 +31,5 @@ curl "https://bootstrap.pypa.io/get-pip.py"; -o "get-pip.py"
 python2.7 get-pip.py
 python3.6 get-pip.py
 
-pip2 install nose pylint numpy nose-timer requests h5py scipy
-pip3 install nose pylint numpy nose-timer requests h5py scipy
\ No newline at end of file
+pip2 install nose pylint numpy nose-timer requests h5py scipy==1.0.1
+pip3 install nose pylint numpy nose-timer requests h5py scipy==1.0.1
diff --git a/ci/docker/install/ubuntu_python.sh 
b/ci/docker/install/ubuntu_python.sh
index 554000d..da7c256 100755
--- a/ci/docker/install/ubuntu_python.sh
+++ b/ci/docker/install/ubuntu_python.sh
@@ -29,5 +29,5 @@ wget -nv https://bootstrap.pypa.io/get-pip.py
 python3 get-pip.py
 python2 get-pip.py
 
-pip2 install nose cpplint==1.3.0 pylint==1.8.3 'numpy<1.15.0,>=1.8.2' 
nose-timer 'requests<2.19.0,>=2.18.4' h5py==2.8.0rc1 scipy
-pip3 install nose cpplint==1.3.0 pylint==1.8.3 'numpy<1.15.0,>=1.8.2' 
nose-timer 'requests<2.19.0,>=2.18.4' h5py==2.8.0rc1 scipy
+pip2 install nose cpplint==1.3.0 pylint==1.8.3 'numpy<1.15.0,>=1.8.2' 
nose-timer 'requests<2.19.0,>=2.18.4' h5py==2.8.0rc1 scipy==1.0.1
+pip3 install nose cpplint==1.3.0 pylint==1.8.3 'numpy<1.15.0,>=1.8.2' 
nose-timer 'requests<2.19.0,>=2.18.4' h5py==2.8.0rc1 scipy==1.0.1

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: Fixed divison by zero bug in DistanceWeightedSampling in gluon example (#10857)

2018-05-10 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 26c30af  Fixed divison by zero bug in DistanceWeightedSampling in 
gluon example (#10857)
26c30af is described below

commit 26c30affd0ecf77b2628a1b8ccbaccea7ce5245e
Author: Istvan Fehervari 
AuthorDate: Thu May 10 13:01:14 2018 -0700

Fixed divison by zero bug in DistanceWeightedSampling in gluon example 
(#10857)

* Fixed divison by zero bug in DistanceWeightedSampling in gluon example

Sample selection for training is based on a vector of computed probablities 
that come from distance weights. In the current implementation these weights 
can become zero when distance to all other neightbors is zero.
Zero weights lead to nan bugs in the model, this commit is supposed to fix 
it by changing those zero weights not being divided by zero any longer.

* Reworked how np.choice is invoked in DistanceWeightedSampling to avoid a 
try-except closure

* Added an early return to MarginLoss if loss becomes zero.

* Rewrote MargingLoss so it is hybridizable
---
 example/gluon/embedding_learning/model.py | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/example/gluon/embedding_learning/model.py 
b/example/gluon/embedding_learning/model.py
index 0f041bc..91f7735 100644
--- a/example/gluon/embedding_learning/model.py
+++ b/example/gluon/embedding_learning/model.py
@@ -110,7 +110,8 @@ class DistanceWeightedSampling(HybridBlock):
 mask[i:i+k, i:i+k] = 0
 
 weights = weights * F.array(mask) * (distance < 
self.nonzero_loss_cutoff)
-weights = weights / F.sum(weights, axis=1, keepdims=True)
+weights_sum = F.sum(weights, axis=1, keepdims=True)
+weights = weights / weights_sum
 
 a_indices = []
 p_indices = []
@@ -120,9 +121,10 @@ class DistanceWeightedSampling(HybridBlock):
 for i in range(n):
 block_idx = i // k
 
-try:
+if weights_sum[i] != 0:
 n_indices += np.random.choice(n, k-1, p=np_weights[i]).tolist()
-except:
+else:
+# all samples are above the cutoff so we sample uniformly
 n_indices += np.random.choice(n, k-1).tolist()
 for j in range(block_idx * k, (block_idx + 1) * k):
 if j != i:
@@ -217,8 +219,11 @@ class MarginLoss(gluon.loss.Loss):
 pos_loss = F.maximum(d_ap - beta + self._margin, 0.0)
 neg_loss = F.maximum(beta - d_an + self._margin, 0.0)
 
-pair_cnt = float(F.sum((pos_loss > 0.0) + (neg_loss > 0.0)).asscalar())
-
-# Normalize based on the number of pairs.
-loss = (F.sum(pos_loss + neg_loss) + beta_reg_loss) / pair_cnt
+pair_cnt = F.sum((pos_loss > 0.0) + (neg_loss > 0.0))
+if pair_cnt == 0.0:
+# When poss_loss and neg_loss is zero then total loss is zero as 
well
+loss = F.sum(pos_loss + neg_loss)
+else:
+# Normalize based on the number of pairs.
+loss = (F.sum(pos_loss + neg_loss) + beta_reg_loss) / pair_cnt
 return gluon.loss._apply_weighting(F, loss, self._weight, None)

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: prevent setting grad_req for Constant (#10869)

2018-05-10 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 1014c94  prevent setting grad_req for Constant (#10869)
1014c94 is described below

commit 1014c9479421d25b6122af9e58e8a978e2c17598
Author: Sheng Zha 
AuthorDate: Thu May 10 10:55:44 2018 -0700

prevent setting grad_req for Constant (#10869)

* prevent setting grad_req for Constant

* update

* add ignore new value
---
 python/mxnet/gluon/parameter.py | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/python/mxnet/gluon/parameter.py b/python/mxnet/gluon/parameter.py
index a3a1e32..320b376 100644
--- a/python/mxnet/gluon/parameter.py
+++ b/python/mxnet/gluon/parameter.py
@@ -503,6 +503,17 @@ class Constant(Parameter):
 s = 'Constant {name} (shape={shape}, dtype={dtype})'
 return s.format(name=self.name, shape=self.shape, dtype=self.dtype)
 
+@property
+def grad_req(self):
+return 'null'
+
+@grad_req.setter
+def grad_req(self, req):
+if req != 'null':
+warnings.warn('Constant parameter "{}" does not support '
+  'grad_req other than "null", and new value "{}" '
+  'is ignored.'.format(self.name, req))
+
 
 class ParameterDict(object):
 """A dictionary managing a set of parameters.

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: add mobilenetv2 pretrained models (#10879)

2018-05-09 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 5088ca9  add mobilenetv2 pretrained models (#10879)
5088ca9 is described below

commit 5088ca9a65641ddf905b60deae00fa6006f5e431
Author: Tong He 
AuthorDate: Wed May 9 21:40:42 2018 -0700

add mobilenetv2 pretrained models (#10879)

* add mobilenetv2 pretrained models

* improve docs
---
 docs/api/python/gluon/model_zoo.md  | 12 +---
 python/mxnet/gluon/model_zoo/model_store.py |  5 -
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/docs/api/python/gluon/model_zoo.md 
b/docs/api/python/gluon/model_zoo.md
index 950e2c0..453fe8d 100644
--- a/docs/api/python/gluon/model_zoo.md
+++ b/docs/api/python/gluon/model_zoo.md
@@ -42,9 +42,12 @@ The following table summarizes the available models.
 | mobilenet0.5  | [MobileNet 0.5](https://arxiv.org/abs/1704.04861)
 | 1,342,536| 0.6307 | 0.8475 | 
Trained with 
[script](https://github.com/apache/incubator-mxnet/blob/master/example/gluon/image_classification.py)
  |
 | mobilenet0.75 | [MobileNet 0.75](https://arxiv.org/abs/1704.04861)   
 | 2,601,976| 0.6738 | 0.8782 | 
Trained with 
[script](https://github.com/apache/incubator-mxnet/blob/master/example/gluon/image_classification.py)
  |
 | mobilenet1.0  | [MobileNet 1.0](https://arxiv.org/abs/1704.04861)
 | 4,253,864| 0.7105 | 0.9006 | 
Trained with 
[script](https://github.com/apache/incubator-mxnet/blob/master/example/gluon/image_classification.py)
  |
-| mobilenetv2_1.0 | [MobileNetV2 1.0](https://arxiv.org/abs/1801.04381)
 | 3,539,136| 0.7159 | 0.9047 | 
Trained with 
[script](https://github.com/dmlc/gluon-cv/blob/15ed8a4c71d411b878f0d71d1c7afdce6710c913/scripts/classification/imagenet/train_imagenet.py)
 |
-| resnet18_v1   | [ResNet-18 V1](http://arxiv.org/abs/1512.03385)  
 | 11,699,112   | 0.7039 | 0.8959 | 
Trained with 
[script](https://github.com/dmlc/gluon-cv/blob/15ed8a4c71d411b878f0d71d1c7afdce6710c913/scripts/classification/imagenet/train_imagenet.py)
 |
-| resnet34_v1   | [ResNet-34 V1](http://arxiv.org/abs/1512.03385)  
 | 21,814,696   | 0.7411 | 0.9184 | 
Trained with 
[script](https://github.com/dmlc/gluon-cv/blob/15ed8a4c71d411b878f0d71d1c7afdce6710c913/scripts/classification/imagenet/train_imagenet.py)
 |
+| mobilenetv2_1.0  | [MobileNetV2 1.0](https://arxiv.org/abs/1801.04381)   
 | 3,539,136| 0.7192 | 0.9056 | 
Trained with 
[script](https://gluon-cv.mxnet.io/model_zoo/index.html#image-classification)   
   |
+| mobilenetv2_0.75 | [MobileNetV2 0.75](https://arxiv.org/abs/1801.04381)  
 | 2,653,864| 0.6961 | 0.8895 | 
Trained with 
[script](https://gluon-cv.mxnet.io/model_zoo/index.html#image-classification)   
   |
+| mobilenetv2_0.5  | [MobileNetV2 0.5](https://arxiv.org/abs/1801.04381)   
 | 1,983,104| 0.6449 | 0.8547 | 
Trained with 
[script](https://gluon-cv.mxnet.io/model_zoo/index.html#image-classification)   
   |
+| mobilenetv2_0.25 | [MobileNetV2 0.25](https://arxiv.org/abs/1801.04381)  
 | 1,526,856| 0.5074 | 0.7456 | 
Trained with 
[script](https://gluon-cv.mxnet.io/model_zoo/index.html#image-classification)   
   |
+| resnet18_v1   | [ResNet-18 V1](http://arxiv.org/abs/1512.03385)  
 | 11,699,112   | 0.7039 | 0.8959 | 
Trained with 
[script](https://gluon-cv.mxnet.io/model_zoo/index.html#image-classification)   
   |
+| resnet34_v1   | [ResNet-34 V1](http://arxiv.org/abs/1512.03385)  
 | 21,814,696   | 0.7411 | 0.9184 | 
Trained with 
[script](https://gluon-cv.mxnet.io/model_zoo/index.html#image-classification)   
   |
 | resnet50_v1   | [ResNet-50 V1](http://arxiv.org/abs/1512.03385)  
 | 25,629,032   | 0.7540 | 0.9266 | 
Trained with 
[script](https://github.com/apache/incubator-mxnet/blob/master/example/gluon/image_classification.py)
  |
 | resnet101_v1  | [ResNet-101 V1](http://arxiv.org/abs/1512.03385) 
 | 44,695,144   | 0.7693 | 0.9334

[incubator-mxnet] branch master updated: Update expected result in osx python install script (#10842)

2018-05-09 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new c3242f7  Update expected result in osx python install script (#10842)
c3242f7 is described below

commit c3242f7a3a67770ad548c13e67736dd96accb3a5
Author: nskool 
AuthorDate: Wed May 9 21:37:38 2018 -0700

Update expected result in osx python install script (#10842)

* Change the expected value writtent to file mxnet_test.expected
---
 setup-utils/install-mxnet-osx-python.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup-utils/install-mxnet-osx-python.sh 
b/setup-utils/install-mxnet-osx-python.sh
index 56f6998..b24650a 100755
--- a/setup-utils/install-mxnet-osx-python.sh
+++ b/setup-utils/install-mxnet-osx-python.sh
@@ -506,8 +506,8 @@ print ((a*2).asnumpy());
 END
rm -f mxnet_test.expected
cat << END > mxnet_test.expected
-[[ 2.  2.  2.]
- [ 2.  2.  2.]]
+[[2. 2. 2.]
+ [2. 2. 2.]]
 END
diff mxnet_test.log mxnet_test.expected
if [[ $? = 0 ]]; then

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: Op for scaled attention (#10795)

2018-05-09 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 61c00ee  Op for scaled attention (#10795)
61c00ee is described below

commit 61c00eed4496eb0c49e9060ea91d1e9d78f3e289
Author: Xingjian Shi 
AuthorDate: Thu May 10 03:09:44 2018 +0800

Op for scaled attention (#10795)

* add test

* add
---
 src/operator/contrib/transformer-inl.h | 55 ++
 src/operator/contrib/transformer.cc| 43 ++
 src/operator/contrib/transformer.cu| 36 ++
 tests/python/unittest/test_operator.py | 10 +++
 4 files changed, 144 insertions(+)

diff --git a/src/operator/contrib/transformer-inl.h 
b/src/operator/contrib/transformer-inl.h
new file mode 100644
index 000..01faf24
--- /dev/null
+++ b/src/operator/contrib/transformer-inl.h
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file transformer-inl.h
+ * \brief Function used in cc and cu
+ */
+#ifndef MXNET_OPERATOR_CONTRIB_TRANSFORMER_INL_H_
+#define MXNET_OPERATOR_CONTRIB_TRANSFORMER_INL_H_
+
+#include 
+#include 
+#include "../mxnet_op.h"
+#include "../mshadow_op.h"
+
+
+namespace mxnet {
+namespace op {
+
+template
+static void DivSqrtDimForward_(const nnvm::NodeAttrs& attrs,
+  const OpContext& ctx,
+  const std::vector& inputs,
+  const std::vector& req,
+  const std::vector& outputs) {
+  mshadow::Stream *s = ctx.get_stream();
+  double sqrt_dim = 
std::sqrt(static_cast(inputs[0].shape_[inputs[0].ndim() - 1]));
+  MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
+  mxnet_op::Kernel, 
xpu>::Launch(
+s, inputs[0].Size(), outputs[0].dptr(), 
inputs[0].dptr(), DType(sqrt_dim));
+});
+  });
+}
+
+}  // namespace op
+}  // namespace mxnet
+#endif  // MXNET_OPERATOR_CONTRIB_TRANSFORMER_INL_H_
diff --git a/src/operator/contrib/transformer.cc 
b/src/operator/contrib/transformer.cc
new file mode 100644
index 000..00085c0
--- /dev/null
+++ b/src/operator/contrib/transformer.cc
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file transformer.cc
+ * \brief CPU implementation of the operators used in Transformer
+ */
+#include 
+#include "./transformer-inl.h"
+#include "../tensor/elemwise_unary_op.h"
+
+namespace mxnet {
+namespace op {
+
+// relu
+MXNET_OPERATOR_REGISTER_UNARY(_contrib_div_sqrt_dim)
+.describe(R"code(Rescale the input by the square root of the channel dimension.
+
+   out = data / sqrt(data.shape[-1])
+
+)code" ADD_FILELINE)
+.set_attr("FCompute", DivSqrtDimForward_)
+.set_attr("FGradient", 
ElemwiseGradUseNone{"_contrib_div_sqrt_dim"});
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/contrib/transformer.cu 
b/src/operator/contrib/transformer.cu
new file mode 100644
index 000..6ed073d
--- /dev/null
+++ b/src/operator/contrib/transformer.cu
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Founda

[incubator-mxnet] branch master updated: Adding max_area to random_size_crop (#10825)

2018-05-09 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 36f8477  Adding max_area to random_size_crop (#10825)
36f8477 is described below

commit 36f8477a6bfee1ec5cf58cb92cf041cc932e8237
Author: Tong He 
AuthorDate: Wed May 9 11:01:47 2018 -0700

Adding max_area to random_size_crop (#10825)

* add mobilenet v2 1.0 pretrained model

* add auto summary

* fix random_size_crop with maximized area

* change as requested

* add deprecation warnings

* fix lint

* improve compatibility, add assertion
---
 .../tutorials/python/types_of_data_augmentation.md |  2 +-
 python/mxnet/gluon/data/vision/transforms.py   |  2 +-
 python/mxnet/image/image.py| 40 --
 3 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/docs/tutorials/python/types_of_data_augmentation.md 
b/docs/tutorials/python/types_of_data_augmentation.md
index 4ec461d..4308932 100644
--- a/docs/tutorials/python/types_of_data_augmentation.md
+++ b/docs/tutorials/python/types_of_data_augmentation.md
@@ -302,7 +302,7 @@ Some shortcut functions are provided to perform multiple 
augmentation in a singl
 ```python
 # A random crop, with a random resizing, and random aspect ratio jitter
 example_image_copy = example_image.copy()
-aug = mx.image.RandomSizedCropAug(size=(100, 100), min_area=0.1, ratio=(1.0, 
1.5))
+aug = mx.image.RandomSizedCropAug(size=(100, 100), area=0.1, ratio=(1.0, 1.5))
 aug_image = aug(example_image_copy)
 plot_mx_array(aug_image)
 
diff --git a/python/mxnet/gluon/data/vision/transforms.py 
b/python/mxnet/gluon/data/vision/transforms.py
index 5e65715..7ec1c32 100644
--- a/python/mxnet/gluon/data/vision/transforms.py
+++ b/python/mxnet/gluon/data/vision/transforms.py
@@ -200,7 +200,7 @@ class RandomResizedCrop(Block):
 super(RandomResizedCrop, self).__init__()
 if isinstance(size, numeric_types):
 size = (size, size)
-self._args = (size, scale[0], ratio, interpolation)
+self._args = (size, scale, ratio, interpolation)
 
 def forward(self, x):
 return image.random_size_crop(x, *self._args)[0]
diff --git a/python/mxnet/image/image.py b/python/mxnet/image/image.py
index ace9cb1..b4b9cc2 100644
--- a/python/mxnet/image/image.py
+++ b/python/mxnet/image/image.py
@@ -25,6 +25,7 @@ import os
 import random
 import logging
 import json
+import warnings
 import numpy as np
 
 try:
@@ -432,7 +433,7 @@ def color_normalize(src, mean, std=None):
 return src
 
 
-def random_size_crop(src, size, min_area, ratio, interp=2):
+def random_size_crop(src, size, area, ratio, interp=2, **kwargs):
 """Randomly crop src with size. Randomize area and aspect ratio.
 
 Parameters
@@ -441,8 +442,9 @@ def random_size_crop(src, size, min_area, ratio, interp=2):
 Input image
 size : tuple of (int, int)
 Size of the crop formatted as (width, height).
-min_area : int
-Minimum area to be maintained after cropping
+area : float in (0, 1] or tuple of (float, float)
+If tuple, minimum area and maximum area to be maintained after cropping
+If float, minimum area to be maintained after cropping, maximum area 
is set to 1.0
 ratio : tuple of (float, float)
 Aspect ratio range as (min_aspect_ratio, max_aspect_ratio)
 interp: int, optional, default=2
@@ -457,9 +459,18 @@ def random_size_crop(src, size, min_area, ratio, interp=2):
 
 """
 h, w, _ = src.shape
-area = h * w
+src_area = h * w
+
+if 'min_area' in kwargs:
+warnings.warn('`min_area` is deprecated. Please use `area` instead.',
+  DeprecationWarning)
+area = kwargs.pop('min_area')
+assert not kwargs, "unexpected keyword arguments for `random_size_crop`."
+
+if isinstance(area, numeric_types):
+area = (area, 1.0)
 for _ in range(10):
-target_area = random.uniform(min_area, 1.0) * area
+target_area = random.uniform(area[0], area[1]) * src_area
 new_ratio = random.uniform(*ratio)
 
 new_w = int(round(np.sqrt(target_area * new_ratio)))
@@ -596,24 +607,31 @@ class RandomSizedCropAug(Augmenter):
 --
 size : tuple of (int, int)
 Size of the crop formatted as (width, height).
-min_area : int
-Minimum area to be maintained after cropping
+area : float in (0, 1] or tuple of (float, float)
+If tuple, minimum area and maximum area to be maintained after cropping
+If float, minimum area to be maintained after cropping, maximum area 
is set to 1.0
 ratio : tuple of (float, float)
 Aspect ratio range as (min_aspect_ratio, max_aspect_ratio)

[incubator-mxnet] branch master updated: [MXNET-407] Better error handling of NDArray setitem autograd (#10844)

2018-05-09 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 333e7fe  [MXNET-407] Better error handling of NDArray setitem autograd 
(#10844)
333e7fe is described below

commit 333e7fef64033572d1b02017309d97e7b91a9343
Author: reminisce 
AuthorDate: Wed May 9 10:58:31 2018 -0700

[MXNET-407] Better error handling of NDArray setitem autograd (#10844)

* Initial commit

* More fix
---
 python/mxnet/ndarray/ndarray.py   | 30 ++
 src/imperative/imperative.cc  |  2 +-
 src/operator/tensor/indexing_op.cc| 57 ++-
 src/operator/tensor/indexing_op.h |  7 +++--
 tests/python/unittest/test_ndarray.py | 15 -
 5 files changed, 87 insertions(+), 24 deletions(-)

diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py
index 2411932..7bfb3c7 100644
--- a/python/mxnet/ndarray/ndarray.py
+++ b/python/mxnet/ndarray/ndarray.py
@@ -682,17 +682,20 @@ fixed-size items.
 on the values of slices' steps."""
 shape = self.shape
 if isinstance(key, integer_types):
-sliced_arr = self._at(key)
-sliced_arr[:] = value
-return
-elif isinstance(key, py_slice):
-if key.step is None or key.step == 1:  # trivial step
-if key.start is not None or key.stop is not None:
-sliced_arr = self._slice(key.start, key.stop)
-sliced_arr[:] = value
-return
-# assign value to the whole NDArray
-# may need to broadcast first
+if key < 0:
+key += shape[0]
+if key < 0 or key >= shape[0]:
+if key < 0:
+key -= shape[0]
+raise IndexError('index %d is out of bounds for axis 0 with 
size %d'
+ % (key, shape[0]))
+key = py_slice(key, key+1)  # key must be >= 0 here
+
+if isinstance(key, py_slice):
+assign_to_self = key.step is None or key.step == 1
+assign_to_self &= key.start is None or key.start == 0
+assign_to_self &= key.stop is None or key.stop == shape[0]
+if assign_to_self:  # trivial case, assign value to self
 if isinstance(value, NDArray):
 if value.handle is not self.handle:
 if value.shape != shape:
@@ -709,7 +712,7 @@ fixed-size items.
 value_nd = self._prepare_value_nd(value, shape)
 value_nd.copyto(self)
 return
-else:  # non-trivial step, use _slice_assign or 
_slice_assign_scalar
+else:  # non-trivial case, use _slice_assign or 
_slice_assign_scalar
 key = (key,)
 
 assert isinstance(key, tuple), "key=%s must be a tuple of slices and 
integers" % str(key)
@@ -762,7 +765,8 @@ fixed-size items.
 indices = self._get_index_nd(key)
 vshape = _get_oshape_of_gather_nd_op(self.shape, indices.shape)
 value_nd = self._prepare_value_nd(value, vshape)
-_internal._scatter_set_nd(data=value_nd, indices=indices, 
shape=self.shape, out=self)
+_internal._scatter_set_nd(lhs=self, rhs=value_nd, indices=indices,
+  shape=self.shape, out=self)
 
 def _get_nd_basic_indexing(self, key):
 """This function is called when key is a slice, or an integer,
diff --git a/src/imperative/imperative.cc b/src/imperative/imperative.cc
index c5a4740..7caf305 100644
--- a/src/imperative/imperative.cc
+++ b/src/imperative/imperative.cc
@@ -194,7 +194,7 @@ void Imperative::RecordOp(
   << "will cause undefined behavior when evaluating gradients. "
   << "Please call backward first to clear the graph or do this out side of 
"
   << "a record section. Also note that you cannot use inplace operations "
-  << "like +=, *=, relu(x, out=x), etc inside a record section.";
+  << "like +=, *=, relu(x, out=x), y[idx]=x, etc inside a record section.";
   }
 
   bool need_grad = false;
diff --git a/src/operator/tensor/indexing_op.cc 
b/src/operator/tensor/indexing_op.cc
index 6f0f468..fbb94b2 100644
--- a/src/operator/tensor/indexing_op.cc
+++ b/src/operator/tensor/indexing_op.cc
@@ -668,7 +668,9 @@ Examples::
 NNVM_REGISTER_OP(_scatter_set_nd)
 .describe(R"code(This operator has the same functionality as scatter_nd
 except that it does not reset the elements not indexed by the input
-index `NDArray` in the input data `NDArray`.
+index `NDArray` in the input data `NDArray`. output should

[incubator-mxnet] branch master updated: Add Util Function for Memory Plan Inspection (#10859)

2018-05-09 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new c5958f0  Add Util Function for Memory Plan Inspection (#10859)
c5958f0 is described below

commit c5958f0ade579487ecdbd6c1bcb67538f531c62a
Author: Haibin Lin 
AuthorDate: Wed May 9 10:45:02 2018 -0700

Add Util Function for Memory Plan Inspection (#10859)

* add inplace option for bn backward

* Add util code for debugging memory plan for the graph

* update var name

* fix bug

* fix lint

* add example
---
 src/common/exec_utils.h   | 124 ++
 src/executor/graph_executor.cc|   7 ++
 src/executor/infer_graph_attr_pass.cc |  31 +
 3 files changed, 133 insertions(+), 29 deletions(-)

diff --git a/src/common/exec_utils.h b/src/common/exec_utils.h
index 3ac86fb..4881e0f 100644
--- a/src/common/exec_utils.h
+++ b/src/common/exec_utils.h
@@ -25,6 +25,8 @@
 #define MXNET_COMMON_EXEC_UTILS_H_
 
 #include 
+#include 
+#include 
 #include "../common/utils.h"
 
 namespace mxnet {
@@ -226,7 +228,129 @@ inline bool DefaultStorageType(const nnvm::NodeAttrs& 
attrs,
   return true;
 }
 
+// string representation of storage id
+inline std::string storage_str(int storage_id) {
+  std::string str;
+  if (storage_id == -1) {
+str = "var (-1)";
+  } else if (storage_id == -2) {
+str = "external storage (-2)";
+  } else {
+str = "group " + std::to_string(storage_id);
+  }
+  return str;
+}
+
+/* log the static memory plan of the graph. Example:
+   node 0 var
+   node 1 _copy
+input 0: [80,3,224,224] (47040 KB) -> var storage (-1)
+output 1: [80,3,224,224] (47040 KB) -> group 0
+   node 2 var
+   node 3 var
+   node 4 var
+   node 5 var
+   node 6 BatchNorm
+input 1: [80,3,224,224] (47040 KB) -> group 0
+input 2: [3] (0 KB) -> var storage (-1)
+input 3: [3] (0 KB) -> var storage (-1)
+input 4: [3] (0 KB) -> var storage (-1)
+input 5: [3] (0 KB) -> var storage (-1)
+output 6: [80,3,224,224] (47040 KB) -> group 1
+output 7: [3] (0 KB) -> group 3
+output 8: [3] (0 KB) -> group 2
+   ...
+ */
+inline void LogMemoryPlan(const nnvm::Graph& g) {
+  const auto &idx = g.indexed_graph();
+  const auto& vshape = g.GetAttr("shape");
+  const auto& vtype = g.GetAttr("dtype");
+  const auto& vstorage = g.GetAttr("storage_id");
+  // find node range
+  uint32_t node_start = 0, node_end = idx.num_nodes();
+  if (g.attrs.count("node_range")) {
+const auto& range = g.GetAttr 
>("node_range");
+node_start = range.first;
+node_end = range.second;
+  }
+  for (uint32_t nid = node_start; nid < node_end; ++nid) {
+const auto& inode = idx[nid];
+if (inode.source->is_variable()) {
+  LOG(INFO) << "node " << nid << " var";
+} else {
+  LOG(INFO) << "node " << nid << " " << inode.source->attrs.op->name;
+  for (const auto& e : inode.inputs) {
+auto eid = idx.entry_id(e);
+size_t kilo_bytes = vshape[eid].Size() * 
mshadow::mshadow_sizeof(vtype[eid]) / 1024;
+LOG(INFO) << "\t\tinput " << eid << ": " << vshape[eid] << " ("
+  << kilo_bytes << " KB) -> " << storage_str(vstorage[eid]);
+  }
+  for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
+uint32_t eid = idx.entry_id(nid, index);
+size_t kilo_bytes = vshape[eid].Size() * 
mshadow::mshadow_sizeof(vtype[eid]) / 1024;
+LOG(INFO) << "\t\toutput " << eid << ": " << vshape[eid] << " ("
+  << kilo_bytes << " KB) -> " << storage_str(vstorage[eid]);
+  }
+}
+  }
+}
+
+/* log the static memory plan of the graph. Example:
+node 0 var
+node 1 _copy: fcompute
+input 0: default
+output 1: default
+node 2 var
+node 3 Convolution: fcompute
+input 1: default
+input 2: default
+output 3: default
+node 4 var
+node 5 var
+node 6 var
+node 7 var
+node 8 BatchNorm: fcompute
+input 3: default
+input 4: default
+input 5: default
+input 6: default
+input 7: default
+output 8: default
+output 9: default
+

[incubator-mxnet] branch master updated: [MXNET-408] [WIP] inplace ReLU activation (#10847)

2018-05-08 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new b2ec05b  [MXNET-408] [WIP] inplace ReLU activation (#10847)
b2ec05b is described below

commit b2ec05b5ec2164c5d47c040d4bebbe4ff6a1cb8f
Author: Haibin Lin 
AuthorDate: Tue May 8 15:10:36 2018 -0700

[MXNET-408] [WIP] inplace ReLU activation (#10847)

* inplace version of activation(relu)

* inplace relu

* add comments

* add commnet

* comments

* fix compilation error

* add check_numerical_grad test
---
 src/operator/nn/activation-inl.h   | 148 -
 src/operator/nn/activation.cc  |  50 ++---
 src/operator/nn/activation.cu  |  39 ---
 src/operator/nn/cudnn/cudnn_activation-inl.h   |   3 +
 src/operator/nn/mkldnn/mkldnn_act.cc   |   2 +
 src/operator/tensor/elemwise_unary_op_basic.cc |   2 +-
 tests/python/gpu/test_operator_gpu.py  |  19 ++--
 tests/python/unittest/test_operator.py |  42 +++
 8 files changed, 187 insertions(+), 118 deletions(-)

diff --git a/src/operator/nn/activation-inl.h b/src/operator/nn/activation-inl.h
index 32a7a5a..a9f6dbe 100644
--- a/src/operator/nn/activation-inl.h
+++ b/src/operator/nn/activation-inl.h
@@ -83,7 +83,7 @@ struct hash {
 namespace mxnet {
 namespace op {
 
-template
+template
 void ActivationForward(const OpContext &ctx, const TBlob &in_data,
const OpReqType &req, const TBlob &out_data) {
   using namespace mshadow;
@@ -91,16 +91,16 @@ void ActivationForward(const OpContext &ctx, const TBlob 
&in_data,
   Stream *s = ctx.get_stream();
   const size_t sz = in_data.shape_.Size();
   if (sz) {
-MXNET_ASSIGN_REQ_SWITCH(req, Req, {
-  mxnet_op::Kernel, xpu>::Launch(
-s, sz,
-out_data.dptr(),
-in_data.dptr());
+MSHADOW_REAL_TYPE_SWITCH(in_data.type_flag_, DType, {
+  MXNET_ASSIGN_REQ_SWITCH(req, Req, {
+mxnet_op::Kernel, xpu>::Launch(
+  s, sz, out_data.dptr(), in_data.dptr());
+  });
 });
   }
 }
 
-template
+template
 void ActivationBackward(const OpContext &ctx, const TBlob &out_grad,
 const TBlob &out_data, const OpReqType &req,
 const TBlob &in_grad) {
@@ -109,13 +109,12 @@ void ActivationBackward(const OpContext &ctx, const TBlob 
&out_grad,
   Stream *s = ctx.get_stream();
   const size_t sz = out_data.shape_.Size();
   if (sz) {
-MXNET_ASSIGN_REQ_SWITCH(req, Req, {
-  mxnet_op::Kernel, Req>, 
xpu>::Launch(
-s, sz,
-in_grad.dptr(),
-out_grad.dptr(),
-out_data.dptr());
+MSHADOW_REAL_TYPE_SWITCH(out_grad.type_flag_, DType, {
+  MXNET_ASSIGN_REQ_SWITCH(req, Req, {
+mxnet_op::Kernel, Req>, xpu>::Launch(
+s, sz, in_grad.dptr(), out_grad.dptr(), 
out_data.dptr());
+  });
 });
   }
 }
@@ -123,72 +122,68 @@ void ActivationBackward(const OpContext &ctx, const TBlob 
&out_grad,
 template
 void ActivationComputeImpl(const ActivationParam ¶m, const OpContext &ctx,
const TBlob &input, OpReqType req, const TBlob 
&output) {
-  MSHADOW_REAL_TYPE_SWITCH(input.type_flag_, DType, {
-switch (param.act_type) {
-  case activation::kReLU:
-ActivationForward(
-ctx, input, req, output);
-break;
-  case activation::kSigmoid:
-ActivationForward(
-ctx, input, req, output);
-break;
-  case activation::kTanh:
-ActivationForward(
-ctx, input, req, output);
-break;
-  case activation::kSoftReLU:
-ActivationForward(
-ctx, input, req, output);
-break;
-  case activation::kSoftSign:
-ActivationForward(
-ctx, input, req, output);
-break;
-  default:
-LOG(FATAL) << "unknown activation type";
-}
-  });
+  switch (param.act_type) {
+case activation::kReLU:
+  ActivationForward(
+  ctx, input, req, output);
+  break;
+case activation::kSigmoid:
+  ActivationForward(
+  ctx, input, req, output);
+  break;
+case activation::kTanh:
+  ActivationForward(
+  ctx, input, req, output);
+  break;
+case activation::kSoftReLU:
+  ActivationForward(
+  ctx, input, req, output);
+  break;
+case activation::kSoftSign:
+  ActivationForward(
+  ctx, input, req, output);
+  break;
+default:
+  LOG(FATAL) << "unknown activation type";
+  }
 }
 
 template
 void ActivationGradComputeImpl(const ActivationParam ¶m, const OpContext 
&ctx,

[incubator-mxnet] branch master updated: fix a bug in prepare_mkldnn.sh (#10843)

2018-05-08 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new e573097  fix a bug in prepare_mkldnn.sh (#10843)
e573097 is described below

commit e57309736935f0b092aa6e27a12a7617fefa778c
Author: Da Zheng 
AuthorDate: Tue May 8 10:07:57 2018 -0700

fix a bug in prepare_mkldnn.sh (#10843)
---
 prepare_mkldnn.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prepare_mkldnn.sh b/prepare_mkldnn.sh
index 828cfe1..9b11b4a 100755
--- a/prepare_mkldnn.sh
+++ b/prepare_mkldnn.sh
@@ -72,7 +72,7 @@ if [ ! -z "$HOME_MKLDNN" ]; then
   fi
 fi
 
-if [ $OSTYPE == "darwin16" ]; then
+if [ $(uname) == "Darwin" ]; then
   OMP_LIBFILE="$MKLDNN_INSTALLDIR/lib/libiomp5.dylib"
   MKLML_LIBFILE="$MKLDNN_INSTALLDIR/lib/libmklml.dylib"
   MKLDNN_LIBFILE="$MKLDNN_INSTALLDIR/lib/libmkldnn.0.dylib"

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: Finish prerequisites on MXNetTutorialTemplate (#10851)

2018-05-08 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new b64b9a0  Finish prerequisites on MXNetTutorialTemplate (#10851)
b64b9a0 is described below

commit b64b9a00d57a8a29a1d7ce1f988e03c16b2e9730
Author: Pigeon <32315294+luckypig...@users.noreply.github.com>
AuthorDate: Wed May 9 00:59:35 2018 +0800

Finish prerequisites on MXNetTutorialTemplate (#10851)

* Update MXNetTutorialTemplate.ipynb

*  Add prerequisites URL

give hyper link to each of MXNet, Language, Tool, Familiarity with concept 
or tool.

* Update MXNetTutorialTemplate.ipynb

* Try the symbol title number

use "<" and ">" instead of "<" and ">"

* try make title number in markdown

use " ` " to solve the " < " disappear problem.

* solve the symbol title number disappear

find all "< >" and replace to "`< >`"

* change some start sign to title number sign

Change some symbols, which sentence is more appropriate to use title number

* change title number

change some title number to star
---
 example/MXNetTutorialTemplate.ipynb | 19 +--
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/example/MXNetTutorialTemplate.ipynb 
b/example/MXNetTutorialTemplate.ipynb
index 2ec9b85..851a87f 100644
--- a/example/MXNetTutorialTemplate.ipynb
+++ b/example/MXNetTutorialTemplate.ipynb
@@ -32,7 +32,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
-"A brief explanation of how the reader can use the tutorial. Can the 
reader copy each code snippet into a Python or other environment? Or can the 
reader run  before or after reading through the explanations to 
understand how the code works?"
+"A brief explanation of how the reader can use the tutorial. Can the 
reader copy each code snippet into a Python or other environment? Or can the 
reader run `` before or after reading through the explanations to 
understand how the code works?"
]
   },
   {
@@ -70,10 +70,10 @@
"source": [
 "To complete this tutorial, you need:\n",
 "\n",
-"- [MXNet](//http://mxnet.io/get_started/setup.html#overview)\n",
-"- [Language](http://)\n",
-"- [Tool](http://)\n",
-"- Familiarity with concept or tool"
+"- [MXNet](https://mxnet.incubator.apache.org/install/#overview)\n",
+"- [Language](https://mxnet.incubator.apache.org/tutorials/)\n",
+"- [Tool](https://mxnet.incubator.apache.org/api/python/index.html)\n",
+"- [Familiarity with concept or tool](https://gluon.mxnet.io/)\n"
]
   },
   {
@@ -96,10 +96,9 @@
"source": [
 "You can download the data used in this tutorial from the [Site 
Name](http://) site. To download the data:\n",
 "\n",
-"1. At the  prompt, type:\n",
-"\n",
-"``command``\n",
+"1. At the `` prompt, type:\n",
 "\n",
+"``\n",
 "2. Second task.\n",
 "\n",
 "3. Last task."
@@ -109,7 +108,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
-"Briefly describe key aspects of the data. If there are two or more 
aspects of the data that require involved discussion, use subheads (### 
). To include a graphic, introduce it with a 
brief description and use the image linking tool to include it. Store the 
graphic in GitHub and use the following format: https://cloud.githubusercontent.com/assets/5545640/15089697/d6f4fca0-13d7-11e6-
 [...]
+"Briefly describe key aspects of the data. If there are two or more 
aspects of the data that require involved discussion, use subheads (### 
``). To include a graphic, introduce it with a 
brief description and use the image linking tool to include it. Store the 
graphic in GitHub and use the following format: https://cloud.githubusercontent.com/assets/5545640/15089697/d6f4fca0-13d7-11e
 [...]
]
   },
   {
@@ -343,7 +342,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
-"To *perform the task*, *provide explanation here.*"
+"To *fperform the task*, *provide explanation here.*"
]
   },
   {

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: hide kvstore api from api index (#10806)

2018-05-07 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 7464e33  hide kvstore api from api index (#10806)
7464e33 is described below

commit 7464e33b35b609f0d192133608f2d98e24470fd6
Author: Haibin Lin 
AuthorDate: Mon May 7 20:35:03 2018 -0700

hide kvstore api from api index (#10806)

* remove kvstore api from top level idx

* Update kvstore.md

* Update executor.md

* Update executor.md

* Update executor.md

* Update kvstore.md
---
 docs/api/python/executor/executor.md | 2 ++
 docs/api/python/index.md | 9 -
 docs/api/python/kvstore/kvstore.md   | 2 ++
 3 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/docs/api/python/executor/executor.md 
b/docs/api/python/executor/executor.md
index ce920ff..65245a4 100644
--- a/docs/api/python/executor/executor.md
+++ b/docs/api/python/executor/executor.md
@@ -3,6 +3,8 @@
 The executor and executor manager are internal classes for managing symbolic
 graph execution. This document is only intended for reference for advanced 
users.
 
+.. note:: Direct interactions with executor and executor manager are dangerous 
and not recommended.
+
 ## Executor
 
 ```eval_rst
diff --git a/docs/api/python/index.md b/docs/api/python/index.md
index 54aaef1..420f4c9 100644
--- a/docs/api/python/index.md
+++ b/docs/api/python/index.md
@@ -86,15 +86,6 @@ Code examples are placed throughout the API documentation 
and these can be run a
gluon/contrib.md
 ```
 
-## KVStore API
-
-```eval_rst
-.. toctree::
-   :maxdepth: 1
-
-   kvstore/kvstore.md
-```
-
 ## IO API
 
 ```eval_rst
diff --git a/docs/api/python/kvstore/kvstore.md 
b/docs/api/python/kvstore/kvstore.md
index 28297fa..efd34bc 100644
--- a/docs/api/python/kvstore/kvstore.md
+++ b/docs/api/python/kvstore/kvstore.md
@@ -1,5 +1,7 @@
 # KVStore API
 
+.. note:: Direct interactions with ``KVStore`` are dangerous and not 
recommended.
+
 ## Basic Push and Pull
 
 Provides basic operation over multiple devices (GPUs) on a single device.

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: [MXNET-343]fix Mkldnn with msvc (#10629)

2018-05-07 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 31016c4  [MXNET-343]fix Mkldnn with msvc (#10629)
31016c4 is described below

commit 31016c4ddd807d209b5ac252ca84b1af5ff8e262
Author: Hu Shiwen 
AuthorDate: Tue May 8 02:16:11 2018 +0800

[MXNET-343]fix Mkldnn with msvc (#10629)

* fix mkldnn with msvc
add linux

* fix sum name

* jk ip

* add test

* fixed missing depend

* fix tools url
use arc to all
remove redundant test

* updata mkldnn to 0.14

* change mkldnn to v0.14

* close "-mtune=generic" with msvc, turn off test example

* up mkldnn

* change mkldnn to good v0.14 tag

* fix mklml download

* fix  MKLDNN_UTIL_FUNC.MemFormat
---
 3rdparty/mkldnn |  2 +-
 3rdparty/mshadow|  2 +-
 CMakeLists.txt  | 44 ---
 Jenkinsfile | 63 +++--
 cmake/FirstClassLangCuda.cmake  |  2 +-
 cmake/MklDnn.cmake  | 44 +++
 python/mxnet/libinfo.py |  7 +++-
 src/operator/nn/mkldnn/mkldnn_base.cc   |  8 -
 src/operator/nn/mkldnn/mkldnn_copy.cc   |  2 +-
 src/operator/nn/mkldnn/mkldnn_ops-inl.h |  2 +-
 src/operator/nn/mkldnn/mkldnn_sum.cc|  2 +-
 tests/CMakeLists.txt|  2 +-
 tests/cpp/operator/mkldnn.cc|  4 +--
 13 files changed, 142 insertions(+), 42 deletions(-)

diff --git a/3rdparty/mkldnn b/3rdparty/mkldnn
index b4137df..0e7ca73 16
--- a/3rdparty/mkldnn
+++ b/3rdparty/mkldnn
@@ -1 +1 @@
-Subproject commit b4137dfc88e3bf5c6b62e833121802eb8c6696da
+Subproject commit 0e7ca738866d22cc700aa33b8de120b938f910d0
diff --git a/3rdparty/mshadow b/3rdparty/mshadow
index a8c650c..0b4cedd 16
--- a/3rdparty/mshadow
+++ b/3rdparty/mshadow
@@ -1 +1 @@
-Subproject commit a8c650ce8a708608a282c4d1e251c57873a8db25
+Subproject commit 0b4cedd7015cc69191f8338a8feaacda90697758
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ed96a6c..246ae99 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -19,8 +19,8 @@ mxnet_option(USE_SSE  "Build with x86 SSE 
instruction support" ON)
 mxnet_option(USE_F16C "Build with x86 F16C instruction support" 
ON) # autodetects support if ON
 mxnet_option(USE_LAPACK   "Build with lapack support" ON IF NOT MSVC)
 mxnet_option(USE_MKL_IF_AVAILABLE "Use MKL if found" ON)
-mxnet_option(USE_MKLML_MKL"Use MKLDNN variant of MKL (if MKL found)" 
ON IF USE_MKL_IF_AVAILABLE AND UNIX AND (NOT APPLE))
-mxnet_option(USE_MKLDNN   "Use MKLDNN variant of MKL (if MKL found)" 
ON IF USE_MKL_IF_AVAILABLE AND UNIX AND (NOT APPLE))
+mxnet_option(USE_MKLML_MKL"Use MKLDNN variant of MKL (if MKL found)" 
ON IF USE_MKL_IF_AVAILABLE AND (NOT APPLE))
+mxnet_option(USE_MKLDNN   "Use MKLDNN variant of MKL (if MKL found)" 
ON IF USE_MKL_IF_AVAILABLE AND (NOT APPLE))
 mxnet_option(USE_OPERATOR_TUNING  "Enable auto-tuning of operators" ON IF NOT 
MSVC)
 mxnet_option(USE_GPERFTOOLS   "Build with GPerfTools support (if found)" 
ON)
 mxnet_option(USE_JEMALLOC "Build with Jemalloc support"   ON)
@@ -87,7 +87,6 @@ if(MSVC)
   add_definitions(-DNNVM_EXPORTS)
   add_definitions(-DDMLC_STRICT_CXX11)
   add_definitions(-DNOMINMAX)
-  set(SUPPORT_F16C FALSE)
   if(USE_F16C)
 message("F16C instruction set is not yet supported for MSVC")
   endif()
@@ -185,34 +184,21 @@ if(USE_VTUNE)
   list(APPEND mxnet_LINKER_LIBS dl)
 endif()
 
-if(USE_MKL_IF_AVAILABLE)
-  if(USE_MKLDNN)
-# We need to use generic archtecture. Otherwise, MKLDNN compiled in one
-# CPU architecture (e.g., C5) can't run on another architecture (e.g., g3).
+if(USE_MKLDNN)
+  include(cmake/MklDnn.cmake)
+  # CPU architecture (e.g., C5) can't run on another architecture (e.g., g3).
+  if(NOT MSVC)
 set(ARCH_OPT_FLAGS "-mtune=generic")
-add_subdirectory(3rdparty/mkldnn)
-include_directories(3rdparty/mkldnn/include)
-add_definitions(-DMXNET_USE_MKLDNN=1)
-list(APPEND mxnet_LINKER_LIBS mkldnn)
-  endif()
-  find_package(MKL)
-
-  if(MKL_FOUND)
-include_directories(${MKL_INCLUDE_DIR})
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src/operator/mkl)
-
-add_definitions(-DUSE_MKL=1)
-add_definitions(-DCUB_MKL=1)
-list(APPEND mxnet_LINKER_LIBS ${MKL_LIBRARIES})
-
-if(NOT MSVC)
-  list(APPEND mxnet_LINKER_LIBS dl)
-endif()
-# If using MKL, use the Intel OMP libraries
-list(APPEND mxnet_LINKER_LIBS iomp5)
-  else()
-mess

[incubator-mxnet] branch master updated: fix the latex formula for LRN operator (#10802)

2018-05-07 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 2e935b9  fix the latex formula for LRN operator (#10802)
2e935b9 is described below

commit 2e935b94326124082e364f441bc4e4fe16e1ab1d
Author: JackieWu 
AuthorDate: Tue May 8 02:08:19 2018 +0800

fix the latex formula for LRN operator (#10802)

* fix formula for LRN operator

* retrigger test
---
 src/operator/nn/lrn.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/operator/nn/lrn.cc b/src/operator/nn/lrn.cc
index 68d3261..e86c471 100644
--- a/src/operator/nn/lrn.cc
+++ b/src/operator/nn/lrn.cc
@@ -167,7 +167,7 @@ If :math:`a_{x,y}^{i}` is the activity of a neuron computed 
by applying kernel :
 activity :math:`b_{x,y}^{i}` is given by the expression:
 
 .. math::
-   b_{x,y}^{i} = \frac{a_{x,y}^{i}}{\Bigg({k + \alpha \sum_{j=max(0, 
i-\frac{n}{2})}^{min(N-1, i+\frac{n}{2})} (a_{x,y}^{j})^{2}}\Bigg)^{\beta}}
+   b_{x,y}^{i} = \frac{a_{x,y}^{i}}{\Bigg({k + \frac{\alpha}{n} \sum_{j=max(0, 
i-\frac{n}{2})}^{min(N-1, i+\frac{n}{2})} (a_{x,y}^{j})^{2}}\Bigg)^{\beta}}
 
 where the sum runs over :math:`n` "adjacent" kernel maps at the same spatial 
position, and :math:`N` is the total
 number of kernels in the layer.

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: [WIP] fix csv iter (#10829)

2018-05-07 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new aea0fc3  [WIP] fix csv iter (#10829)
aea0fc3 is described below

commit aea0fc34374892771250647baf5fc9855ccb2eae
Author: Haibin Lin 
AuthorDate: Mon May 7 10:35:35 2018 -0700

[WIP] fix csv iter (#10829)

* trigger

* remove skip

* trigger

* 4th trigger.

* remove the loop of testing 1 times
---
 tests/python/unittest/test_io.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/python/unittest/test_io.py b/tests/python/unittest/test_io.py
index e986ae7..a54cb92 100644
--- a/tests/python/unittest/test_io.py
+++ b/tests/python/unittest/test_io.py
@@ -292,7 +292,6 @@ def test_DataBatch():
 ok_(re.match('DataBatch: data shapes: \[\(2L?, 3L?\), \(7L?, 8L?\)\] label 
shapes: \[\(4L?, 5L?\)\]', str(batch)))
 
 
-@unittest.skip("test fails intermittently. temporarily disabled till it gets 
fixed. tracked at https://github.com/apache/incubator-mxnet/issues/7826";)
 def test_CSVIter():
 def check_CSVIter_synthetic():
 cwd = os.getcwd()

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: fix creating cpu sparse array from gpu data (#10830)

2018-05-06 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 1968bba  fix creating cpu sparse array from gpu data (#10830)
1968bba is described below

commit 1968bba659bfb6fcc461132aac1729a4f33d5c41
Author: Haibin Lin 
AuthorDate: Sun May 6 13:57:15 2018 -0700

fix creating cpu sparse array from gpu data (#10830)
---
 src/ndarray/ndarray.cc|  8 
 tests/python/gpu/test_operator_gpu.py | 18 ++
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index a643da1..67b4c06 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -1911,7 +1911,7 @@ void NDArray::SyncCopyFromNDArray(const NDArray& src, int 
i, int j) {
 if (src_dev_mask == cpu::kDevMask && dst_dev_mask == gpu::kDevMask) {
   Engine::Get()->PushAsync(
 [&](RunContext rctx, Engine::CallbackOnComplete on_complete) {
-  const TBlob src_data = (i >= 0? src.aux_data(i) : src.data());
+  const TBlob src_data = (i >= 0 ? src.aux_data(i) : src.data());
   TBlob dst_data = get_dst_data(src_data.shape_);
   ndarray::Copy(src_data, &dst_data, src.ctx(), this->ctx(), 
rctx);
   rctx.get_stream()->Wait();
@@ -1921,17 +1921,17 @@ void NDArray::SyncCopyFromNDArray(const NDArray& src, 
int i, int j) {
 } else if (src_dev_mask == gpu::kDevMask && dst_dev_mask == cpu::kDevMask) 
{
   Engine::Get()->PushAsync(
 [&](RunContext rctx, Engine::CallbackOnComplete on_complete) {
-  const TBlob src_data = (i >= 0? src.aux_data(i) : src.data());
+  const TBlob src_data = (i >= 0 ? src.aux_data(i) : src.data());
   TBlob dst_data = get_dst_data(src_data.shape_);
   ndarray::Copy(src_data, &dst_data, src.ctx(), this->ctx(), 
rctx);
   rctx.get_stream()->Wait();
   on_complete();
-}, this->ctx(), const_vars, {this->var()},
+}, src.ctx(), const_vars, {this->var()},
 FnProperty::kCopyFromGPU, 0, "SyncCopyFromNDArrayGPU2CPU");
 } else if (src_dev_mask == gpu::kDevMask && dst_dev_mask == gpu::kDevMask) 
{
   Engine::Get()->PushAsync(
 [&](RunContext rctx, Engine::CallbackOnComplete on_complete) {
-  const TBlob src_data = (i >= 0? src.aux_data(i) : src.data());
+  const TBlob src_data = (i >= 0 ? src.aux_data(i) : src.data());
   TBlob dst_data = get_dst_data(src_data.shape_);
   ndarray::Copy(src_data, &dst_data, src.ctx(), this->ctx(), 
rctx);
   rctx.get_stream()->Wait();
diff --git a/tests/python/gpu/test_operator_gpu.py 
b/tests/python/gpu/test_operator_gpu.py
index 08c749e..313730c 100644
--- a/tests/python/gpu/test_operator_gpu.py
+++ b/tests/python/gpu/test_operator_gpu.py
@@ -1834,6 +1834,24 @@ def test_batchnorm_backwards_notrain():
 loss=y.square().sum()
 loss.backward(train_mode=False)
 
+@with_seed()
+def test_create_sparse_ndarray_gpu_to_cpu():
+dim0 = 10
+dim1 = 5
+densities = [0, 0.5, 1]
+for density in densities:
+shape = rand_shape_2d(dim0, dim1)
+matrix = rand_ndarray(shape, 'row_sparse', density)
+data = matrix.data
+indices = matrix.indices
+rsp_created = mx.nd.sparse.row_sparse_array((data, indices), 
shape=shape, ctx=mx.cpu())
+assert rsp_created.stype == 'row_sparse'
+assert same(rsp_created.data.asnumpy(), data.asnumpy())
+assert same(rsp_created.indices.asnumpy(), indices.asnumpy())
+rsp_copy = mx.nd.array(rsp_created)
+assert(same(rsp_copy.asnumpy(), rsp_created.asnumpy()))
+
+
 if __name__ == '__main__':
 import nose
 nose.runmodule()

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: fix (#10814)

2018-05-04 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 3bba4c8  fix (#10814)
3bba4c8 is described below

commit 3bba4c8f6362df8b3355404002eab6a6c88123d6
Author: Eric Junyuan Xie 
AuthorDate: Fri May 4 15:59:20 2018 -0700

fix (#10814)
---
 python/mxnet/gluon/parameter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/mxnet/gluon/parameter.py b/python/mxnet/gluon/parameter.py
index 04694df..a3a1e32 100644
--- a/python/mxnet/gluon/parameter.py
+++ b/python/mxnet/gluon/parameter.py
@@ -366,7 +366,7 @@ class Parameter(object):
 self.shape = data.shape
 
 if self._data is None:
-assert self._deferred_init is not None, \
+assert self._deferred_init, \
 "Parameter '%s' has not been initialized"%self.name
 self._deferred_init = self._deferred_init[:3] + (data,)
 return

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: Reorder2Default: return directly for default format (#10810)

2018-05-04 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new ab6a25e  Reorder2Default: return directly for default format (#10810)
ab6a25e is described below

commit ab6a25ea2f184998cc472a98f1b0f4808c89211a
Author: Tao Lv 
AuthorDate: Sat May 5 05:20:44 2018 +0800

Reorder2Default: return directly for default format (#10810)
---
 src/ndarray/ndarray.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index a28a907..a643da1 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -348,7 +348,8 @@ void NDArray::Chunk::Reorder2Default() {
 return;
 
   mkldnn_memory_format_t format = mkl_mem_->GetDefaultFormat();
-  CHECK_NE(format, mkl_mem_->GetFormat());
+  if (format ==  mkl_mem_->GetFormat())
+return;
 
   mkldnn::memory::primitive_desc def_pd = mkl_mem_->GetPrimitiveDesc(format);
   mkldnn_mem_ptr def_mem(new mkldnn::memory(def_pd));

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: Fix a mem error. (#10812)

2018-05-04 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new e49fdae  Fix a mem error. (#10812)
e49fdae is described below

commit e49fdaefd7017005aaed968f66413a0e2ef4a3b9
Author: Da Zheng 
AuthorDate: Fri May 4 13:44:26 2018 -0700

Fix a mem error. (#10812)
---
 include/mxnet/ndarray.h | 5 +
 src/ndarray/ndarray.cc  | 6 ++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/include/mxnet/ndarray.h b/include/mxnet/ndarray.h
index 6fda8c3..e243eb7 100644
--- a/include/mxnet/ndarray.h
+++ b/include/mxnet/ndarray.h
@@ -678,10 +678,7 @@ class NDArray {
*/
   NDArray Reorder2Default() const;
 
-  void InvalidateMKLDNNData() {
-// Removing mkl_mem_ means the NDArray will store data in the default 
format.
-ptr_->mkl_mem_ = nullptr;
-  }
+  void InvalidateMKLDNNData();
 
   /*
* This function is used inside operators to reshape an array.
diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index 82de094..a28a907 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -620,6 +620,12 @@ const mkldnn::memory *NDArray::GetMKLDNNData() const {
   }
 }
 
+void NDArray::InvalidateMKLDNNData() {
+  // Removing mkl_mem_ means the NDArray will store data in the default format.
+  if (ptr_->mkl_mem_ && ptr_->mkl_mem_->IsMKLDNN())
+ptr_->mkl_mem_ = nullptr;
+}
+
 void NDArray::CopyFrom(const mkldnn::memory &mem) {
   CHECK(ptr_ != nullptr) << "The NDArray hasn't been initialized";
   if (ptr_->mkl_mem_ && ptr_->mkl_mem_->GetRaw() == &mem)

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: Update index.md (#10800)

2018-05-04 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new aabaab4  Update index.md (#10800)
aabaab4 is described below

commit aabaab418eab303c64042cf068d2f47965f55086
Author: Eric Junyuan Xie 
AuthorDate: Fri May 4 13:42:54 2018 -0700

Update index.md (#10800)

* Update index.md

* Update index.md
---
 docs/api/python/index.md | 9 +
 1 file changed, 9 insertions(+)

diff --git a/docs/api/python/index.md b/docs/api/python/index.md
index 88e8031..54aaef1 100644
--- a/docs/api/python/index.md
+++ b/docs/api/python/index.md
@@ -140,6 +140,15 @@ Code examples are placed throughout the API documentation 
and these can be run a
metric/metric.md
 ```
 
+## Profiler API
+
+```eval_rst
+.. toctree::
+   :maxdepth: 1
+
+   profiler/profiler.md
+```
+
 ## Run-Time Compilation API
 
 ```eval_rst

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch piiswrong-patch-2 updated (0d1927c -> f6f67e8)

2018-05-04 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a change to branch piiswrong-patch-2
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git.


from 0d1927c  Update index.md
 add f6f67e8  Update index.md

No new revisions were added by this update.

Summary of changes:
 docs/api/python/index.md | 1 -
 1 file changed, 1 deletion(-)

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] 01/01: Update index.md

2018-05-03 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch piiswrong-patch-2
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit 0d1927cd915eb91d99c56cf67915d22a2343a62b
Author: Eric Junyuan Xie 
AuthorDate: Thu May 3 14:54:41 2018 -0700

Update index.md
---
 docs/api/python/index.md | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/docs/api/python/index.md b/docs/api/python/index.md
index 88e8031..aa1adef 100644
--- a/docs/api/python/index.md
+++ b/docs/api/python/index.md
@@ -140,6 +140,16 @@ Code examples are placed throughout the API documentation 
and these can be run a
metric/metric.md
 ```
 
+## Profiler API
+
+```eval_rst
+.. toctree::
+   :maxdepth: 1
+
+   profiler/profiler.md
+```
+
+
 ## Run-Time Compilation API
 
 ```eval_rst

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch piiswrong-patch-2 created (now 0d1927c)

2018-05-03 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a change to branch piiswrong-patch-2
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git.


  at 0d1927c  Update index.md

This branch includes the following new commits:

 new 0d1927c  Update index.md

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: add reverse option to ndarray inplace reshape (#10767)

2018-05-03 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 66b2944  add reverse option to ndarray inplace reshape (#10767)
66b2944 is described below

commit 66b294434aeffa9ed3f1cf01416345549139bf23
Author: Sheng Zha 
AuthorDate: Thu May 3 10:16:48 2018 -0700

add reverse option to ndarray inplace reshape (#10767)

* add reverse option to ndarray inplace reshape

* update check
---
 include/mxnet/c_api.h |  1 +
 python/mxnet/ndarray/ndarray.py   | 26 +--
 src/c_api/c_api.cc|  3 ++-
 tests/python/unittest/test_ndarray.py | 39 ++-
 4 files changed, 39 insertions(+), 30 deletions(-)

diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h
index 3f04051..9ac90d6 100644
--- a/include/mxnet/c_api.h
+++ b/include/mxnet/c_api.h
@@ -663,6 +663,7 @@ MXNET_DLL int MXNDArrayReshape(NDArrayHandle handle,
 MXNET_DLL int MXNDArrayReshape64(NDArrayHandle handle,
  int ndim,
  dim_t *dims,
+ bool reverse,
  NDArrayHandle *out);
 /*!
  * \brief get the shape of the array
diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py
index 6b2ff23..2411932 100644
--- a/python/mxnet/ndarray/ndarray.py
+++ b/python/mxnet/ndarray/ndarray.py
@@ -989,6 +989,19 @@ fixed-size items.
   - input shape = (2,3,4), shape = (-4,1,2,-2), output shape 
=(1,2,3,4)
   - input shape = (2,3,4), shape = (2,-4,-1,3,-2), output shape = 
(2,1,3,4)
 
+- If the argument `reverse` is set to 1, then the special values 
are inferred from right
+  to left.
+
+  Example::
+
+  - without reverse=1, for input shape = (10,5,4), shape = (-1,0), 
output shape would be
+(40,5).
+  - with reverse=1, output shape will be (50,4).
+
+reverse : bool, default False
+If true then the special values are inferred from right to left. 
Only supported as
+keyword argument.
+
 
 Returns
 ---
@@ -1029,18 +1042,19 @@ fixed-size items.
 elif not shape:
 shape = kwargs.get('shape')
 assert shape, "Shape must be provided."
-if len(kwargs) != 1:
-raise TypeError("Only 'shape' is supported as keyword 
argument. Got: {}."
-.format(', '.join(kwargs.keys(
-else:
-assert not kwargs,\
-"Specifying both positional and keyword arguments is not 
allowed in reshape."
+if not all(k in ['shape', 'reverse'] for k in kwargs):
+raise TypeError(
+"Got unknown keywords in reshape: {}. " \
+"Accepted keyword arguments are 'shape' and 'reverse'.".format(
+', '.join([k for k in kwargs if k not in ['shape', 
'reverse']])))
+reverse = kwargs.get('reverse', False)
 handle = NDArrayHandle()
 
 # Actual reshape
 check_call(_LIB.MXNDArrayReshape64(self.handle,
len(shape),
c_array(ctypes.c_int64, shape),
+   reverse,
ctypes.byref(handle)))
 return NDArray(handle=handle, writable=self.writable)
 
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index 34b4fd2..b3dcd6a 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -431,12 +431,13 @@ MXNET_DLL int MXNDArrayReshape(NDArrayHandle handle,
 MXNET_DLL int MXNDArrayReshape64(NDArrayHandle handle,
  int ndim,
  dim_t *dims,
+ bool reverse,
  NDArrayHandle *out) {
   NDArray *ptr = new NDArray();
   API_BEGIN();
   NDArray *arr = static_cast(handle);
   nnvm::Tuple shape(dims, dims+ndim);
-  TShape new_shape = mxnet::op::InferReshapeShape(shape, arr->shape(), false);
+  TShape new_shape = mxnet::op::InferReshapeShape(shape, arr->shape(), 
reverse);
   *ptr = arr->ReshapeWithRecord(new_shape);
   *out = ptr;
   API_END_HANDLE_ERROR(delete ptr);
diff --git a/tests/python/unittest/test_ndarray.py 
b/tests/python/unittest/test_ndarray.py
index 030816e..9ff2f1a 100644
--- a/tests/python/unittest/test_ndarray.py
+++ b/tests/python/unittest/test_ndarray.py
@@ -154,30 +154,23 @@ def test_ndarray_negate():
 
 @with_seed()
 def test_ndarray_reshape():
-te

[incubator-mxnet] branch master updated: [MXNET-359] fix checks on convolution parameters in MKLDNN. (#10666)

2018-05-02 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 1420697  [MXNET-359] fix checks on convolution parameters in MKLDNN. 
(#10666)
1420697 is described below

commit 14206978f461364c53aaf1c787e2f268e2a94b00
Author: Da Zheng 
AuthorDate: Wed May 2 13:20:13 2018 -0700

[MXNET-359] fix checks on convolution parameters in MKLDNN. (#10666)

* fix check on tuples of conv.

* check params in (de)conv.

* rename.

* add messages.
---
 src/operator/nn/convolution.cc | 18 -
 src/operator/nn/deconvolution.cc   | 22 +-
 src/operator/nn/mkldnn/mkldnn_base-inl.h   | 10 +--
 src/operator/nn/mkldnn/mkldnn_convolution.cc   | 69 +-
 src/operator/nn/mkldnn/mkldnn_deconvolution.cc | 99 +-
 5 files changed, 107 insertions(+), 111 deletions(-)

diff --git a/src/operator/nn/convolution.cc b/src/operator/nn/convolution.cc
index 7fd8bbb..0e8a929 100644
--- a/src/operator/nn/convolution.cc
+++ b/src/operator/nn/convolution.cc
@@ -54,7 +54,8 @@ static void ConvolutionComputeExCPU(const nnvm::NodeAttrs& 
attrs,
 const std::vector& inputs,
 const std::vector& req,
 const std::vector& outputs) {
-  if (SupportMKLDNNConv(inputs[0])) {
+  const ConvolutionParam& params = nnvm::get(attrs.parsed);
+  if (SupportMKLDNNConv(params, inputs[0])) {
 MKLDNN_OPCHECK_INIT(false, outputs.size(), inputs, outputs);
 MKLDNNConvolutionForward(attrs, ctx, inputs, req, outputs);
 MKLDNN_OPCHECK_RUN(ConvolutionCompute, attrs, ctx, inputs, req, 
outputs);
@@ -68,7 +69,8 @@ static void ConvolutionGradComputeExCPU(const 
nnvm::NodeAttrs& attrs,
 const std::vector& inputs,
 const std::vector& req,
 const std::vector& outputs) {
-  if (SupportMKLDNNConv(inputs[0])) {
+  const ConvolutionParam& params = nnvm::get(attrs.parsed);
+  if (SupportMKLDNNConv(params, inputs[0])) {
 MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs);
 MKLDNNConvolutionBackward(attrs, ctx, inputs, req, outputs);
 MKLDNN_OPCHECK_RUN(ConvolutionGradCompute, attrs, ctx, inputs, req, 
outputs);
@@ -363,6 +365,18 @@ static void ConvolutionParamParser(nnvm::NodeAttrs* attrs) 
{
 if (param_.dilate.ndim() == 0) param_.dilate = Shape3(1, 1, 1);
 if (param_.pad.ndim() == 0) param_.pad = Shape3(0, 0, 0);
   }
+  CHECK_EQ(param_.kernel.ndim(), param_.stride.ndim())
+<< "Stride must have the same number of dimensions with kernel_size,"
+<< "but kernel_size is set to " << param_.kernel << " while stride is "
+<< param_.stride;
+  CHECK_EQ(param_.kernel.ndim(), param_.dilate.ndim())
+<< "Dilate must have the same number of dimensions with kernel_size,"
+<< "but kernel_size is set to " << param_.kernel << " while dilate is "
+<< param_.dilate;
+  CHECK_EQ(param_.kernel.ndim(), param_.pad.ndim())
+<< "Padding must have the same number of dimensions with kernel_size,"
+<< "but kernel_size is set to " << param_.kernel << " while padding is "
+<< param_.pad;
   attrs->parsed = std::move(param_);
 }
 
diff --git a/src/operator/nn/deconvolution.cc b/src/operator/nn/deconvolution.cc
index 0d1b391..13fc757 100644
--- a/src/operator/nn/deconvolution.cc
+++ b/src/operator/nn/deconvolution.cc
@@ -304,7 +304,8 @@ static void DeconvolutionComputeExCPU(const 
nnvm::NodeAttrs& attrs,
   const std::vector& inputs,
   const std::vector& req,
   const std::vector& outputs) {
-  if (SupportMKLDNNConv(inputs[0])) {
+  const DeconvolutionParam& param = 
nnvm::get(attrs.parsed);
+  if (SupportMKLDNNDeconv(param, inputs[0])) {
 MKLDNN_OPCHECK_INIT(false, outputs.size(), inputs, outputs);
 MKLDNNDeconvolutionForward(attrs, ctx, inputs, req, outputs);
 MKLDNN_OPCHECK_RUN(DeconvolutionCompute, attrs, ctx, inputs, req,
@@ -320,7 +321,8 @@ static void DeconvolutionGradComputeExCPU(const 
nnvm::NodeAttrs& attrs,
   const std::vector& inputs,
   const std::vector& req,
   const std::vector& outputs) 
{
-  if (SupportMKLDNNConv(inputs[0])) {
+  const DeconvolutionParam& param = 
nnvm::get(attrs.parsed);
+  if (SupportMKLDNN

[incubator-mxnet] branch master updated: update perf. (#10761)

2018-05-02 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new ebd8a6b  update perf. (#10761)
ebd8a6b is described below

commit ebd8a6bd35a34af427d8ee100788447eb564bcf1
Author: Da Zheng 
AuthorDate: Wed May 2 10:40:59 2018 -0700

update perf. (#10761)
---
 docs/faq/perf.md | 231 ---
 1 file changed, 119 insertions(+), 112 deletions(-)

diff --git a/docs/faq/perf.md b/docs/faq/perf.md
index b5d73f6..ce74391 100644
--- a/docs/faq/perf.md
+++ b/docs/faq/perf.md
@@ -29,65 +29,70 @@ Note that _MXNet_ treats all CPUs on a single machine as a 
single device.
 So whether you specify `cpu(0)` or `cpu()`, _MXNet_ will use all CPU cores on 
the machine.
 
 ### Scoring results
-The following table shows performance,
+The following table shows performance of 
[MXNet-1.2.0.rc1](https://github.com/apache/incubator-mxnet/releases/download/1.2.0.rc1/apache-mxnet-src-1.2.0.rc1-incubating.tar.gz),
 namely number of images that can be predicted per second.
 We used 
[example/image-classification/benchmark_score.py](https://github.com/dmlc/mxnet/blob/master/example/image-classification/benchmark_score.py)
 to measure the performance on different AWS EC2 machines.
 
-AWS EC2 C4.8xlarge:
-
-| Batch | Alexnet | VGG | Inception-BN | Inception-v3 | Resnet 50 | Resnet 152 
|
-| --- | --- | --- | --- | --- | --- | --- |
-|   1 |  119.57 | 34.23 |  111.36 |  54.42 |  42.83 | 19.51 |
-|   2 | 210.58 | 51.63 |  137.10 |  67.30 |  57.54 | 23.56 |
-|   4 | 318.54 | 70.00 |  187.21 |  76.53 |  63.64 | 25.80 |
-|   8 | 389.34 | 77.39 |  211.90 |  84.26 |  63.89 | 28.11 |
-|  16 | 489.12 | 85.26 |  220.52 |  82.00 |  63.93 | 27.08 |
-|  32 | 564.04 | 87.15 |  208.21 |  83.05 |  62.19 | 25.76 |
-
-AWS EC2 C4.4xlarge:
-
-| Batch | Alexnet | VGG | Inception-BN | Inception-v3 | Resnet 50 | Resnet 152 
|
-| --- | --- | --- | --- | --- | --- | --- |
-|   1 |  109.96 | 23.00 |  71.82 |  28.10 |  30.66 | 11.81 |
-|   2 | 124.56 | 24.86 |  81.61 |  31.32 |  32.73 | 12.82 |
-|   4 | 157.01 | 26.60 |  86.77 |  32.94 |  33.32 | 13.16 |
-|   8 | 178.40 | 30.67 |  88.58 |  33.52 |  33.32 | 13.32 |
-|  16 | 189.52 | 35.61 |  90.36 |  33.63 |  32.94 | 13.18 |
-|  32 | 196.61 | 38.98 |  105.27 |  33.77 |  32.65 | 13.00 |
-
-AWS EC2 C4.2xlarge:
-
-| Batch | Alexnet | VGG | Inception-BN | Inception-v3 | Resnet 50 | Resnet 152 
|
-| --- | --- | --- | --- | --- | --- | --- |
-|   1 |  70.75 | 12.87 |  42.86 |  16.53 |  18.14 | 7.01 |
-|   2 | 71.53 | 13.08 |  45.66 |  17.38 |  18.53 | 7.18 |
-|   4 | 84.72 | 15.38 |  47.50 |  17.80 |  18.96 | 7.35 |
-|   8 | 93.44 | 18.33 |  48.08 |  17.93 |  18.99 | 7.40 |
-|  16 | 97.03 | 20.12 |  55.73 |  18.00 |  18.91 | 7.36 |
-|  32 | 113.90 | 21.10 |  62.54 |  17.98 |  18.80 | 7.33 |
-
-AWS EC2 C4.xlarge:
-
-| Batch | Alexnet | VGG | Inception-BN | Inception-v3 | Resnet 50 | Resnet 152 
|
-| --- | --- | --- | --- | --- | --- | --- |
-|   1 |  37.92 | 6.57 |  23.09 |  8.79 |  9.65 | 3.73 |
-|   2 | 36.77 | 7.31 |  24.00 |  9.00 |  9.84 | 3.78 |
-|   4 | 43.18 | 8.94 |  24.42 |  9.12 |  9.91 | 3.83 |
-|   8 | 47.05 | 10.01 |  28.32 |  9.13 |  9.88 | 3.83 |
-|  16 | 55.74 | 10.61 |  31.96 |  9.14 |  9.86 | 3.80 |
-|  32 | 65.05 | 10.91 |  33.86 |  9.34 |  10.31 | 3.86 |
-
-AWS EC2 C4.large:
-
-| Batch | Alexnet | VGG | Inception-BN | Inception-v3 | Resnet 50 | Resnet 152 
|
-| --- | --- | --- | --- | --- | --- | --- |
-|   1 |  19.86 | 3.67 |  12.20 |  4.59 |  5.11 | 1.97 |
-|   2 | 19.37 | 4.24 |  12.41 |  4.64 |  5.15 | 1.98 |
-|   4 | 22.64 | 4.89 |  14.34 |  4.66 |  5.16 | 2.00 |
-|   8 | 27.19 | 5.25 |  16.17 |  4.66 |  5.16 | 1.99 |
-|  16 | 31.82 | 5.46 |  17.24 |  4.76 |  5.35 | OOM |
-|  32 | 34.67 | 5.55 |  17.64 |  4.88 |  OOM | OOM |
+AWS EC2 C5.18xlarge:
+
+| Batch | Alexnet | VGG| Inception-BN | Inception-v3 | Resnet 50 | Resnet 
152 |
+|---|-||--|--|---||
+| 1 | 390.53  | 81.57  | 124.13   | 62.26| 76.22 | 32.92   
   |
+| 2 | 596.45  | 100.84 | 206.58   | 93.36| 119.55| 46.80   
   |
+| 4 | 710.77  | 119.04 | 275.55   | 127.86   | 148.62| 59.36   
   |
+| 8 | 921.40  | 120.38 | 380.82   | 157.11   | 167.95| 70.78   
   |
+| 16| 1018.43 | 115.30 | 411.67   | 168.71   | 178.54| 75.13   
   |
+| 32| 1290.31 | 107.19 | 483.34   | 179.38   | 193.47| 85.86   
   |
+
+
+AWS EC2 C5.9xlarge:
+
+| Batch | Alexnet | VGG   | Inception-BN | Inception-v3 | Resnet 50 | Resnet 
152 |
+|---|-|---|--|--|---||
+| 1 | 257.77  | 50.61 | 130.99   | 66.95| 75.38 | 32.33
  |
+| 2 | 410.60  | 63.02 | 195.14   | 87.84

[incubator-mxnet] branch master updated: Use numpy.arange in RandomSampler (#10768)

2018-05-01 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 23934cf  Use numpy.arange in RandomSampler (#10768)
23934cf is described below

commit 23934cf62ea92bf12876ac27fb63bbc1bf4a9490
Author: Leonard Lausen 
AuthorDate: Tue May 1 23:51:47 2018 -0700

Use numpy.arange in RandomSampler (#10768)

Significant speedup for large datasets:

In [2]: %timeit current_sample(1529*8192)
12.3 s ± 721 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

In [3]: %timeit np_sample(1529*8192)
641 ms ± 6.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
---
 python/mxnet/gluon/data/sampler.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/mxnet/gluon/data/sampler.py 
b/python/mxnet/gluon/data/sampler.py
index 66d6cfb..2f827c8 100644
--- a/python/mxnet/gluon/data/sampler.py
+++ b/python/mxnet/gluon/data/sampler.py
@@ -20,7 +20,7 @@
 """Dataset sampler."""
 __all__ = ['Sampler', 'SequentialSampler', 'RandomSampler', 'BatchSampler']
 
-import random
+import numpy as np
 
 class Sampler(object):
 """Base class for samplers.
@@ -65,8 +65,8 @@ class RandomSampler(Sampler):
 self._length = length
 
 def __iter__(self):
-indices = list(range(self._length))
-random.shuffle(indices)
+indices = np.arange(self._length)
+np.random.shuffle(indices)
 return iter(indices)
 
 def __len__(self):

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: API calls are not consistent in the example (#10763)

2018-05-01 Thread jxie

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
 new 97da5e3  API calls are not consistent in the example (#10763)
97da5e3 is described below

commit 97da5e3b35b0725d10deffef0032f203df51d271
Author: James 
AuthorDate: Tue May 1 10:54:46 2018 -0700

API calls are not consistent in the example (#10763)

* Make api call consistent

* Make API call consistent
---
 example/sparse/factorization_machine/model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/example/sparse/factorization_machine/model.py 
b/example/sparse/factorization_machine/model.py
index f0af2e6..98967ed 100644
--- a/example/sparse/factorization_machine/model.py
+++ b/example/sparse/factorization_machine/model.py
@@ -27,10 +27,10 @@ def factorization_machine_model(factor_size, num_features,
 v = mx.symbol.Variable("v", shape=(num_features, factor_size), 
stype='row_sparse',
init=init_config['v'], lr_mult=lr_mult_config['v'],
wd_mult=wd_mult_config['v'])
-w = mx.symbol.var('w', shape=(num_features, 1), stype='row_sparse',
+w = mx.symbol.Variable('w', shape=(num_features, 1), stype='row_sparse',
   init=init_config['w'], lr_mult=lr_mult_config['w'],
   wd_mult=wd_mult_config['w'])
-w0 = mx.symbol.var('w0', shape=(1,), init=init_config['w0'],
+w0 = mx.symbol.Variable('w0', shape=(1,), init=init_config['w0'],
lr_mult=lr_mult_config['w0'], 
wd_mult=wd_mult_config['w0'])
 w1 = mx.symbol.broadcast_add(mx.symbol.dot(x, w), w0)
 

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 970 matches

Mail list logo