This is an automated email from the ASF dual-hosted git repository.

haibin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new 3062122  [MXNET-329] support SparseEmbedding with dense weight (#10585)
3062122 is described below

commit 306212289888562617bf1dae1199695daea2b054
Author: Haibin Lin <linhaibin.e...@gmail.com>
AuthorDate: Thu Apr 19 08:04:40 2018 -0700

    [MXNET-329] support SparseEmbedding with dense weight (#10585)
    
    * add sparseembedding(dense_weight)
    
    * update test
    
    * Update test_sparse_operator.py
---
 src/operator/tensor/indexing_op.cc            |  8 +++-----
 src/operator/tensor/indexing_op.h             | 12 +++++++++---
 tests/python/unittest/test_sparse_operator.py | 20 ++++++++++++++------
 3 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/src/operator/tensor/indexing_op.cc 
b/src/operator/tensor/indexing_op.cc
index bb65419..6f0f468 100644
--- a/src/operator/tensor/indexing_op.cc
+++ b/src/operator/tensor/indexing_op.cc
@@ -263,8 +263,7 @@ All the input values should be integers in the range [0, 
input_dim).
 If the input_dim is ip0 and output_dim is op0, then shape of the embedding 
weight matrix must be
 (ip0, op0).
 
-The storage type of weight must be `row_sparse`, and the gradient of the 
weight will be of
-`row_sparse` storage type, too.
+The storage type of the gradient will be `row_sparse`.
 
 .. Note::
 
@@ -272,9 +271,8 @@ The storage type of weight must be `row_sparse`, and the 
gradient of the weight
     The operator is available on both CPU and GPU.
     When `deterministic` is set to `True`, the accumulation of gradients 
follows a
     deterministic order if a feature appears multiple times in the input. 
However, the
-    accumulation is usually slower when the order is enforced.
-    When the operator is used in recurrent neural network models on the GPU,
-    the recommended value for `deterministic` is `True`.
+    accumulation is usually slower when the order is enforced on GPU.
+    When the operator is used on the GPU, the recommended value for 
`deterministic` is `True`.
 
 Examples::
 
diff --git a/src/operator/tensor/indexing_op.h 
b/src/operator/tensor/indexing_op.h
index 2d17798..0f65066 100644
--- a/src/operator/tensor/indexing_op.h
+++ b/src/operator/tensor/indexing_op.h
@@ -21,7 +21,7 @@
  * Copyright (c) 2017 by Contributors
  * \file indexing_op.h
  * \brief
- * \author Bing Xu, Siyi Li, Chi Zhang
+ * \author Bing Xu, Siyi Li, Chi Zhang, Haibin Lin
 */
 #ifndef MXNET_OPERATOR_TENSOR_INDEXING_OP_H_
 #define MXNET_OPERATOR_TENSOR_INDEXING_OP_H_
@@ -209,8 +209,8 @@ inline bool SparseEmbeddingOpForwardStorageType(const 
nnvm::NodeAttrs& attrs,
   int& out_stype = out_attrs->at(embedding::kOut);
   bool dispatched = false;
   if (!dispatched && data_stype == kDefaultStorage &&
-      weight_stype == kRowSparseStorage) {
-    // dns, rsp -> dns
+      (weight_stype == kRowSparseStorage || weight_stype == kDefaultStorage)) {
+    // dns, rsp/dns -> dns
     dispatched = storage_type_assign(&out_stype, kDefaultStorage,
                                      dispatch_mode, DispatchMode::kFComputeEx);
   }
@@ -423,7 +423,13 @@ void SparseEmbeddingOpForwardEx(const nnvm::NodeAttrs& 
attrs,
   const auto out_stype = out.storage_type();
   if (data_stype == kDefaultStorage && weight_stype == kRowSparseStorage &&
       out_stype == kDefaultStorage) {
+    // dns, rsp -> dns
     SparseEmbeddingOpForwardRspImpl<xpu>(ctx, data.data(), weight, req[0], 
out.data());
+  } else if (data_stype == kDefaultStorage && weight_stype == kDefaultStorage 
&&
+             out_stype == kDefaultStorage) {
+    // dns, dns -> dns
+    EmbeddingOpForwardDnsImpl<xpu>(ctx.get_stream<xpu>(), data.data(), 
weight.data(),
+                                   req[0], out.data());
   } else {
     LogUnimplementedOp(attrs, ctx, inputs, req, outputs);
   }
diff --git a/tests/python/unittest/test_sparse_operator.py 
b/tests/python/unittest/test_sparse_operator.py
index 3479486..31f2e49 100644
--- a/tests/python/unittest/test_sparse_operator.py
+++ b/tests/python/unittest/test_sparse_operator.py
@@ -1638,10 +1638,10 @@ def test_sparse_elementwise_sum():
 @with_seed()
 def test_sparse_embedding():
     ''' test sparse embedding operator '''
-    def check_sparse_embedding(in_dim, out_dim, batch, densities, 
deterministic):
+    def check_sparse_embedding(in_dim, out_dim, batch, densities, 
deterministic, weight_stype):
         # init executor
         data = mx.sym.Variable("data")
-        weight = mx.sym.Variable("embed_weight", stype='row_sparse')
+        weight = mx.sym.Variable("embed_weight", stype=weight_stype)
         embed = mx.sym.contrib.SparseEmbedding(data=data, weight=weight, 
input_dim=in_dim,
                                                output_dim=out_dim, 
deterministic=deterministic,
                                                name="embed")
@@ -1662,21 +1662,29 @@ def test_sparse_embedding():
         weight = arg_map["embed_weight"]
         for density in densities:
             # update weight based on density
-            weight[:] = rand_ndarray(weight.shape, 'row_sparse', 
density=density)
+            weight[:] = rand_ndarray(weight.shape, weight_stype, 
density=density)
             # check forward
             exe_test.forward(is_train=True)
             assert_almost_equal(exe_test.outputs[0].asnumpy(), 
np.dot(np_onehot, weight.asnumpy()), atol=1e-4)
             # check backward
             exe_test.backward([grad])
             assert_almost_equal(grad_map["embed_weight"].asnumpy(), 
np.dot(np_onehot.T, grad.asnumpy()), atol=1e-4)
+            # run twice to check if the result is deterministic when passing 
"deterministic=True" to SparseEmbedding
+            if deterministic:
+                grad_ref = grad_map["embed_weight"].asnumpy()
+                exe_test.backward([grad])
+                assert_almost_equal(grad_map["embed_weight"].asnumpy(), 
grad_ref, atol=0, rtol=0)
 
     densities = [0, 0.5, 1]
     in_dim = 50
     out_dim = 3
     batch = 8
-    check_sparse_embedding(in_dim, out_dim, batch, densities, True)
-    check_sparse_embedding(in_dim, out_dim, batch, densities, False)
-
+    stypes = ['default', 'row_sparse']
+    deterministics = [True, False]
+    for stype in stypes:
+        for deterministic in deterministics:
+            check_sparse_embedding(in_dim, out_dim, batch, densities, 
deterministic, stype)
+            check_sparse_embedding(in_dim, out_dim, batch, densities, 
deterministic, stype)
 
 @with_seed()
 def test_sparse_broadcast_mul_div():

-- 
To stop receiving notification emails like this one, please contact
hai...@apache.org.

Reply via email to