This is an automated email from the ASF dual-hosted git repository. haibin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push: new 3062122 [MXNET-329] support SparseEmbedding with dense weight (#10585) 3062122 is described below commit 306212289888562617bf1dae1199695daea2b054 Author: Haibin Lin <linhaibin.e...@gmail.com> AuthorDate: Thu Apr 19 08:04:40 2018 -0700 [MXNET-329] support SparseEmbedding with dense weight (#10585) * add sparseembedding(dense_weight) * update test * Update test_sparse_operator.py --- src/operator/tensor/indexing_op.cc | 8 +++----- src/operator/tensor/indexing_op.h | 12 +++++++++--- tests/python/unittest/test_sparse_operator.py | 20 ++++++++++++++------ 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/src/operator/tensor/indexing_op.cc b/src/operator/tensor/indexing_op.cc index bb65419..6f0f468 100644 --- a/src/operator/tensor/indexing_op.cc +++ b/src/operator/tensor/indexing_op.cc @@ -263,8 +263,7 @@ All the input values should be integers in the range [0, input_dim). If the input_dim is ip0 and output_dim is op0, then shape of the embedding weight matrix must be (ip0, op0). -The storage type of weight must be `row_sparse`, and the gradient of the weight will be of -`row_sparse` storage type, too. +The storage type of the gradient will be `row_sparse`. .. Note:: @@ -272,9 +271,8 @@ The storage type of weight must be `row_sparse`, and the gradient of the weight The operator is available on both CPU and GPU. When `deterministic` is set to `True`, the accumulation of gradients follows a deterministic order if a feature appears multiple times in the input. However, the - accumulation is usually slower when the order is enforced. - When the operator is used in recurrent neural network models on the GPU, - the recommended value for `deterministic` is `True`. + accumulation is usually slower when the order is enforced on GPU. + When the operator is used on the GPU, the recommended value for `deterministic` is `True`. Examples:: diff --git a/src/operator/tensor/indexing_op.h b/src/operator/tensor/indexing_op.h index 2d17798..0f65066 100644 --- a/src/operator/tensor/indexing_op.h +++ b/src/operator/tensor/indexing_op.h @@ -21,7 +21,7 @@ * Copyright (c) 2017 by Contributors * \file indexing_op.h * \brief - * \author Bing Xu, Siyi Li, Chi Zhang + * \author Bing Xu, Siyi Li, Chi Zhang, Haibin Lin */ #ifndef MXNET_OPERATOR_TENSOR_INDEXING_OP_H_ #define MXNET_OPERATOR_TENSOR_INDEXING_OP_H_ @@ -209,8 +209,8 @@ inline bool SparseEmbeddingOpForwardStorageType(const nnvm::NodeAttrs& attrs, int& out_stype = out_attrs->at(embedding::kOut); bool dispatched = false; if (!dispatched && data_stype == kDefaultStorage && - weight_stype == kRowSparseStorage) { - // dns, rsp -> dns + (weight_stype == kRowSparseStorage || weight_stype == kDefaultStorage)) { + // dns, rsp/dns -> dns dispatched = storage_type_assign(&out_stype, kDefaultStorage, dispatch_mode, DispatchMode::kFComputeEx); } @@ -423,7 +423,13 @@ void SparseEmbeddingOpForwardEx(const nnvm::NodeAttrs& attrs, const auto out_stype = out.storage_type(); if (data_stype == kDefaultStorage && weight_stype == kRowSparseStorage && out_stype == kDefaultStorage) { + // dns, rsp -> dns SparseEmbeddingOpForwardRspImpl<xpu>(ctx, data.data(), weight, req[0], out.data()); + } else if (data_stype == kDefaultStorage && weight_stype == kDefaultStorage && + out_stype == kDefaultStorage) { + // dns, dns -> dns + EmbeddingOpForwardDnsImpl<xpu>(ctx.get_stream<xpu>(), data.data(), weight.data(), + req[0], out.data()); } else { LogUnimplementedOp(attrs, ctx, inputs, req, outputs); } diff --git a/tests/python/unittest/test_sparse_operator.py b/tests/python/unittest/test_sparse_operator.py index 3479486..31f2e49 100644 --- a/tests/python/unittest/test_sparse_operator.py +++ b/tests/python/unittest/test_sparse_operator.py @@ -1638,10 +1638,10 @@ def test_sparse_elementwise_sum(): @with_seed() def test_sparse_embedding(): ''' test sparse embedding operator ''' - def check_sparse_embedding(in_dim, out_dim, batch, densities, deterministic): + def check_sparse_embedding(in_dim, out_dim, batch, densities, deterministic, weight_stype): # init executor data = mx.sym.Variable("data") - weight = mx.sym.Variable("embed_weight", stype='row_sparse') + weight = mx.sym.Variable("embed_weight", stype=weight_stype) embed = mx.sym.contrib.SparseEmbedding(data=data, weight=weight, input_dim=in_dim, output_dim=out_dim, deterministic=deterministic, name="embed") @@ -1662,21 +1662,29 @@ def test_sparse_embedding(): weight = arg_map["embed_weight"] for density in densities: # update weight based on density - weight[:] = rand_ndarray(weight.shape, 'row_sparse', density=density) + weight[:] = rand_ndarray(weight.shape, weight_stype, density=density) # check forward exe_test.forward(is_train=True) assert_almost_equal(exe_test.outputs[0].asnumpy(), np.dot(np_onehot, weight.asnumpy()), atol=1e-4) # check backward exe_test.backward([grad]) assert_almost_equal(grad_map["embed_weight"].asnumpy(), np.dot(np_onehot.T, grad.asnumpy()), atol=1e-4) + # run twice to check if the result is deterministic when passing "deterministic=True" to SparseEmbedding + if deterministic: + grad_ref = grad_map["embed_weight"].asnumpy() + exe_test.backward([grad]) + assert_almost_equal(grad_map["embed_weight"].asnumpy(), grad_ref, atol=0, rtol=0) densities = [0, 0.5, 1] in_dim = 50 out_dim = 3 batch = 8 - check_sparse_embedding(in_dim, out_dim, batch, densities, True) - check_sparse_embedding(in_dim, out_dim, batch, densities, False) - + stypes = ['default', 'row_sparse'] + deterministics = [True, False] + for stype in stypes: + for deterministic in deterministics: + check_sparse_embedding(in_dim, out_dim, batch, densities, deterministic, stype) + check_sparse_embedding(in_dim, out_dim, batch, densities, deterministic, stype) @with_seed() def test_sparse_broadcast_mul_div(): -- To stop receiving notification emails like this one, please contact hai...@apache.org.