http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd08f413/test/singa/test_cudnn_softmax.cc ---------------------------------------------------------------------- diff --cc test/singa/test_cudnn_softmax.cc index e11be87,53ecb2b..d715b33 --- a/test/singa/test_cudnn_softmax.cc +++ b/test/singa/test_cudnn_softmax.cc @@@ -33,75 -35,133 +35,129 @@@ TEST(CudnnSoftmax, Setup) singa::LayerConf conf; singa::SoftmaxConf* softmaxconf = conf.mutable_softmax_conf(); - softmaxconf->set_axis(2); - - sft.Setup(conf); - sft.InitCudnn(1, singa::kFloat32); - EXPECT_EQ(2, sft.Axis()); + softmaxconf->set_algorithm("fast"); + sft.Setup(Shape{1}, conf); + EXPECT_EQ(CUDNN_SOFTMAX_FAST, sft.Algorithm()); } - TEST(CudnnSoftmax, Forward) { - const float x[] = {1.0f, 2.0f, 0.0f, -2.0f, -3.0f, -1.0}; + TEST(CudnnSoftmax, Forward1D) { + const float x[] = {1.f, 2.f, 0.f, -2.f, -3.f, -1.f}; size_t n = sizeof(x) / sizeof(float); -- singa::CudaGPU cuda(0, 1); - singa::Tensor in(singa::Shape{n}, &cuda); ++ auto cuda = std::make_shared<singa::CudaGPU>(0, 1); + singa::Shape shape = {n}; - singa::Tensor in(shape, &cuda); ++ singa::Tensor in(shape, cuda); in.CopyDataFromHostPtr<float>(x, n); - int axis = 1; CudnnSoftmax sft; singa::LayerConf conf; singa::SoftmaxConf* softmaxconf = conf.mutable_softmax_conf(); - softmaxconf->set_axis(axis); - sft.Setup(conf); - sft.InitCudnn(n, singa::kFloat32); - + softmaxconf->set_algorithm("accurate"); + sft.Setup(Shape{1}, conf); singa::Tensor out = sft.Forward(singa::kTrain, in); -- singa::CppCPU host(0, 1); -- out.ToDevice(&host); - const float* yptr = out.data<const float*>(); ++ out.ToHost(); + const float* yptr = out.data<float>(); EXPECT_EQ(n, out.Size()); float* y = new float[n]; float sigma = 0.f; for (size_t i = 0; i < n; i++) sigma += exp(x[i]); for (size_t i = 0; i < n; i++) y[i] = exp(x[i]) / sigma; - EXPECT_FLOAT_EQ(y[0], yptr[0]); - EXPECT_FLOAT_EQ(y[4], yptr[4]); - EXPECT_FLOAT_EQ(y[5], yptr[5]); + for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(y[i], yptr[i]); } - TEST(CudnnSoftmax, Backward) { - const float x[] = {1.0f, 2.0f, 3.0f, -2.0f, -3.0f, -1.0}; + TEST(CudnnSoftmax, Backward1D) { + const float x[] = {1.f, 2.f, 3.f, -2.f, -3.f, -1.f}; size_t n = sizeof(x) / sizeof(float); -- singa::CudaGPU cuda(0, 1); - singa::Tensor in(singa::Shape{n}, &cuda); + singa::Shape shape = {n}; - singa::Tensor in(shape, &cuda); ++ auto cuda = std::make_shared<singa::CudaGPU>(0, 1); ++ singa::Tensor in(shape, cuda); in.CopyDataFromHostPtr<float>(x, n); - int axis = 1; CudnnSoftmax sft; singa::LayerConf conf; singa::SoftmaxConf* softmaxconf = conf.mutable_softmax_conf(); - softmaxconf->set_axis(axis); - sft.Setup(conf); + softmaxconf->set_algorithm("accurate"); + sft.Setup(Shape{1}, conf); + singa::Tensor out = sft.Forward(singa::kTrain, in); -- singa::CppCPU host(0, 1); -- out.ToDevice(&host); - const float* yptr = out.data<const float*>(); ++ out.ToHost(); + const float* yptr = out.data<float>(); - const float grad[] = {2.0f, -3.0f, 1.0f, 3.0f, -1.0f, -2.0}; - singa::Tensor out_diff(singa::Shape{n}, &cuda); + const float grad[] = {2.f, -3.f, 1.f, 3.f, -1.f, -2.f}; - singa::Tensor out_diff(shape, &cuda); ++ singa::Tensor out_diff(shape, cuda); out_diff.CopyDataFromHostPtr<float>(grad, n); const auto ret = sft.Backward(singa::kTrain, out_diff); singa::Tensor in_diff = ret.first; -- in_diff.ToDevice(&host); - const float* xptr = in_diff.data<const float*>(); ++ in_diff.ToHost(); + const float* xptr = in_diff.data<float>(); float* dx = new float[n]; float sigma = 0.f; for (size_t i = 0; i < n; i++) sigma += grad[i] * yptr[i]; for (size_t i = 0; i < n; i++) dx[i] = (grad[i] - sigma) * yptr[i]; - EXPECT_FLOAT_EQ(dx[0], xptr[0]); - EXPECT_FLOAT_EQ(dx[4], xptr[4]); - EXPECT_FLOAT_EQ(dx[5], xptr[5]); + for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(dx[i], xptr[i]); + } + + TEST(CudnnSoftmax, Forward2D) { + const float x[] = {1.f, 2.f, 0.f, -2.f, -3.f, -1.f}; + size_t n = sizeof(x) / sizeof(float); + size_t batch = 2, c = 3; - singa::CudaGPU cuda(0, 1); + singa::Shape shape = {batch, c}; - singa::Tensor in(shape, &cuda); ++ auto cuda = std::make_shared<singa::CudaGPU>(0, 1); ++ singa::Tensor in(shape, cuda); + in.CopyDataFromHostPtr<float>(x, n); + + CudnnSoftmax sft; + singa::LayerConf conf; + singa::SoftmaxConf* softmaxconf = conf.mutable_softmax_conf(); + softmaxconf->set_algorithm("accurate"); + sft.Setup(Shape{c}, conf); + + singa::Tensor out = sft.Forward(singa::kTrain, in); - singa::CppCPU host(0, 1); - out.ToDevice(&host); ++ out.ToHost(); + const float* yptr = out.data<float>(); + EXPECT_EQ(n, out.Size()); + + float* y = new float[n]; + float* sigma = new float[batch]; + for (size_t i = 0; i < batch; i++) sigma[i] = 0.f; + for (size_t i = 0; i < n; i++) sigma[i / c] += exp(x[i]); + for (size_t i = 0; i < n; i++) y[i] = exp(x[i]) / sigma[i / c]; + for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(y[i], yptr[i]); + } + + TEST(CudnnSoftmax, Backward2D) { + const float x[] = {1.f, 2.f, 3.f, -2.f, -3.f, -1.f}; + size_t n = sizeof(x) / sizeof(float); + size_t batch = 2, c = 3; - singa::CudaGPU cuda(0, 1); ++ auto cuda = std::make_shared<singa::CudaGPU>(0, 1); + singa::Shape shape = {batch, c}; - singa::Tensor in(shape, &cuda); ++ singa::Tensor in(shape, cuda); + in.CopyDataFromHostPtr<float>(x, n); + + CudnnSoftmax sft; + singa::LayerConf conf; + singa::SoftmaxConf* softmaxconf = conf.mutable_softmax_conf(); + softmaxconf->set_algorithm("accurate"); + sft.Setup(Shape{c}, conf); + + singa::Tensor out = sft.Forward(singa::kTrain, in); - singa::CppCPU host(0, 1); - out.ToDevice(&host); ++ out.ToHost(); + const float* yptr = out.data<float>(); + + const float grad[] = {2.f, -3.f, 1.f, 3.f, -1.f, -2.f}; - singa::Tensor out_diff(shape, &cuda); ++ singa::Tensor out_diff(shape, cuda); + out_diff.CopyDataFromHostPtr<float>(grad, n); + const auto ret = sft.Backward(singa::kTrain, out_diff); + singa::Tensor in_diff = ret.first; - in_diff.ToDevice(&host); ++ in_diff.ToHost(); + const float* xptr = in_diff.data<float>(); + + float* dx = new float[n]; + float* sigma = new float[batch]; + for (size_t i = 0; i < batch; i++) sigma[i] = 0.f; + for (size_t i = 0; i < n; i++) sigma[i / c] += grad[i] * yptr[i]; + for (size_t i = 0; i < n; i++) dx[i] = (grad[i] - sigma[i / c]) * yptr[i]; + for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(dx[i], xptr[i]); } #endif // USE_CUDNN
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd08f413/test/singa/test_dense.cc ---------------------------------------------------------------------- diff --cc test/singa/test_dense.cc index 7ed4d33,a5fd960..363fb6e --- a/test/singa/test_dense.cc +++ b/test/singa/test_dense.cc @@@ -1,242 -1,245 +1,238 @@@ --/************************************************************ --* --* Licensed to the Apache Software Foundation (ASF) under one --* or more contributor license agreements. See the NOTICE file --* distributed with this work for additional information --* regarding copyright ownership. The ASF licenses this file --* to you under the Apache License, Version 2.0 (the --* "License"); you may not use this file except in compliance --* with the License. You may obtain a copy of the License at --* --* http://www.apache.org/licenses/LICENSE-2.0 --* --* Unless required by applicable law or agreed to in writing, --* software distributed under the License is distributed on an --* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --* KIND, either express or implied. See the License for the --* specific language governing permissions and limitations --* under the License. --* --*************************************************************/ --#include "../src/model/layer/dense.h" --#include "gtest/gtest.h" - #include "singa/singa_config.h" -#include "singa_config.h" -- --using singa::Dense; -using singa::Shape; --TEST(Dense, Setup) { -- Dense dense; -- EXPECT_EQ("Dense", dense.layer_type()); -- -- singa::LayerConf conf; -- singa::DenseConf *denseconf = conf.mutable_dense_conf(); - denseconf->set_num_input(2); -- denseconf->set_num_output(3); -- denseconf->set_transpose(false); - dense.Setup(conf); - dense.Setup(Shape{2}, conf); -- -- EXPECT_EQ(3u, dense.num_output()); -- EXPECT_EQ(2u, dense.num_input()); --} --#ifdef USE_CBLAS --TEST(Dense, ForwardCpp) { -- Dense dense; -- -- singa::LayerConf conf; -- singa::DenseConf *denseconf = conf.mutable_dense_conf(); - denseconf->set_num_input(2); -- denseconf->set_num_output(3); -- denseconf->set_transpose(false); - dense.Setup(conf); - dense.Setup(Shape{2}, conf); -- -- const size_t batchsize = 3, vdim = 2, hdim = 3; -- const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; -- singa::Tensor in(singa::Shape{batchsize, vdim}); -- in.CopyDataFromHostPtr(x, batchsize * vdim); -- -- // set weight -- const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f}; -- singa::Tensor weight(singa::Shape{hdim, vdim}); -- weight.CopyDataFromHostPtr(we, hdim * vdim); -- -- const float bia[hdim] = {1.0f, 1.0f, 1.0f}; -- singa::Tensor bias(singa::Shape{hdim}); -- bias.CopyDataFromHostPtr(bia, hdim); -- -- dense.set_weight(weight); -- dense.set_bias(bias); -- -- singa::Tensor out1 = dense.Forward(singa::kTrain, in); - const float *outptr1 = out1.data<const float *>(); - singa::CppCPU host(0, 1); - const float *outptr1 = out1.data<float>(); -- EXPECT_EQ(9u, out1.Size()); -- for (int i = 0; i < 3; i++) -- for (int j = 0; j < 3; j++) -- EXPECT_FLOAT_EQ((x[i * 2 + 0] * we[j * 2 + 0] + -- x[i * 2 + 1] * we[j * 2 + 1] + bia[j]), -- outptr1[i * 3 + j]); --} --#endif // USE_CBLAS -#ifdef USE_CUDA --TEST(Dense, BackwardCpp) { -- Dense dense; -- -- singa::LayerConf conf; -- singa::DenseConf *denseconf = conf.mutable_dense_conf(); - denseconf->set_num_input(2); -- denseconf->set_num_output(3); -- denseconf->set_transpose(false); - dense.Setup(conf); - dense.Setup(Shape{2}, conf); -- -- const size_t batchsize = 3, vdim = 2, hdim = 3; -- const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; - singa::CudaGPU cuda(0, 1); -- singa::Tensor in(singa::Shape{batchsize, vdim}); -- in.CopyDataFromHostPtr(x, batchsize * vdim); -- -- // set weight -- const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f}; -- singa::Tensor weight(singa::Shape{hdim, vdim}); -- weight.CopyDataFromHostPtr(we, hdim * vdim); -- -- const float bia[hdim] = {1.0f, 1.0f, 1.0f}; -- singa::Tensor bias(singa::Shape{hdim}); -- bias.CopyDataFromHostPtr(bia, hdim); -- -- dense.set_weight(weight); -- dense.set_bias(bias); -- -- singa::Tensor out1 = dense.Forward(singa::kTrain, in); -- -- // grad -- const float dy[batchsize * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, -- 2.0f, 3.0f, 3.0f, 3.0f}; -- singa::Tensor grad(singa::Shape{batchsize, hdim}); -- grad.CopyDataFromHostPtr(dy, batchsize * hdim); -- -- const auto ret = dense.Backward(singa::kTrain, grad); - singa::CppCPU host(0, 1); -- singa::Tensor in_grad = ret.first; -- singa::Tensor dweight = ret.second.at(0); -- singa::Tensor dbias = ret.second.at(1); - const float *dx = in_grad.data<const float *>(); - const float *dx = in_grad.data<float>(); -- EXPECT_EQ(6u, in_grad.Size()); -- for (int i = 0; i < 3; i++) -- for (int j = 0; j < 2; j++) -- EXPECT_FLOAT_EQ( -- (dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 + j] + -- dy[i * 3 + 2] * we[2 * 2 + j]), -- dx[i * 2 + j]); - const float *dweightx = dweight.data<const float *>(); - const float *dweightx = dweight.data<float>(); -- EXPECT_EQ(6u, dweight.Size()); -- for (int i = 0; i < 3; i++) -- for (int j = 0; j < 2; j++) -- EXPECT_FLOAT_EQ( -- (dy[0 * 3 + i] * x[0 * 2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] + -- dy[2 * 3 + i] * x[2 * 2 + j]), -- dweightx[i * 2 + j]); - const float *dbiasx = dbias.data<const float *>(); - const float *dbiasx = dbias.data<float>(); -- EXPECT_EQ(3u, dbias.Size()); -- for (int i = 0; i < 3; i++) -- EXPECT_FLOAT_EQ((dy[0 * 3 + i] + dy[1 * 3 + i] + dy[2 * 3 + i]), dbiasx[i]); --} -#endif -- --#ifdef USE_CUDA --TEST(Dense, ForwardCuda) { -- Dense dense; -- -- singa::LayerConf conf; -- singa::DenseConf *denseconf = conf.mutable_dense_conf(); - denseconf->set_num_input(2); -- denseconf->set_num_output(3); -- denseconf->set_transpose(false); - dense.Setup(conf); - dense.Setup(Shape{2}, conf); -- -- const size_t batchsize = 3, vdim = 2, hdim = 3; -- const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; - auto cuda = std::make_shared<singa::CudaGPU>(0, 1); - singa::Tensor in(singa::Shape{batchsize, vdim}, cuda); - singa::CudaGPU cuda(0, 1); - singa::Tensor in(singa::Shape{batchsize, vdim}, &cuda); -- in.CopyDataFromHostPtr(x, batchsize * vdim); -- -- // set weight -- const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f}; - singa::Tensor weight(singa::Shape{hdim, vdim}, cuda); - singa::Tensor weight(singa::Shape{hdim, vdim}, &cuda); -- weight.CopyDataFromHostPtr(we, hdim * vdim); -- -- const float bia[hdim] = {1.0f, 1.0f, 1.0f}; - singa::Tensor bias(singa::Shape{hdim}, cuda); - singa::Tensor bias(singa::Shape{hdim}, &cuda); -- bias.CopyDataFromHostPtr(bia, hdim); -- -- dense.set_weight(weight); -- dense.set_bias(bias); -- -- singa::Tensor out1 = dense.Forward(singa::kTrain, in); - out1.ToHost(); - const float *outptr1 = out1.data<const float *>(); - singa::CppCPU host(0, 1); - out1.ToDevice(&host); - const float *outptr1 = out1.data<float>(); -- EXPECT_EQ(9u, out1.Size()); -- for (int i = 0; i < 3; i++) -- for (int j = 0; j < 3; j++) -- EXPECT_FLOAT_EQ((x[i * 2 + 0] * we[j * 2 + 0] + -- x[i * 2 + 1] * we[j * 2 + 1] + bia[j]), -- outptr1[i * 3 + j]); --} --TEST(Dense, BackwardCuda) { -- Dense dense; -- -- singa::LayerConf conf; -- singa::DenseConf *denseconf = conf.mutable_dense_conf(); - denseconf->set_num_input(2); -- denseconf->set_num_output(3); -- denseconf->set_transpose(false); - dense.Setup(conf); - dense.Setup(Shape{2}, conf); -- -- const size_t batchsize = 3, vdim = 2, hdim = 3; -- const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; - auto cuda = std::make_shared<singa::CudaGPU>(0, 1); - singa::Tensor in(singa::Shape{batchsize, vdim}, cuda); - singa::CudaGPU cuda(0, 1); - singa::Tensor in(singa::Shape{batchsize, vdim}, &cuda); -- in.CopyDataFromHostPtr(x, batchsize * vdim); -- -- // set weight -- const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f}; - singa::Tensor weight(singa::Shape{hdim, vdim}, cuda); - singa::Tensor weight(singa::Shape{hdim, vdim}, &cuda); -- weight.CopyDataFromHostPtr(we, hdim * vdim); -- -- const float bia[hdim] = {1.0f, 1.0f, 1.0f}; - singa::Tensor bias(singa::Shape{hdim}, cuda); - singa::Tensor bias(singa::Shape{hdim}, &cuda); -- bias.CopyDataFromHostPtr(bia, hdim); -- -- dense.set_weight(weight); -- dense.set_bias(bias); -- -- singa::Tensor out1 = dense.Forward(singa::kTrain, in); -- -- // grad -- const float dy[batchsize * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, -- 2.0f, 3.0f, 3.0f, 3.0f}; - singa::Tensor grad(singa::Shape{batchsize, hdim}, cuda); - singa::Tensor grad(singa::Shape{batchsize, hdim}, &cuda); -- grad.CopyDataFromHostPtr(dy, batchsize * hdim); -- -- const auto ret = dense.Backward(singa::kTrain, grad); - singa::CppCPU host(0, 1); -- singa::Tensor in_grad = ret.first; -- singa::Tensor dweight = ret.second.at(0); -- singa::Tensor dbias = ret.second.at(1); - in_grad.ToHost(); - const float *dx = in_grad.data<const float *>(); - in_grad.ToDevice(&host); - const float *dx = in_grad.data<float>(); -- EXPECT_EQ(6u, in_grad.Size()); -- for (int i = 0; i < 3; i++) -- for (int j = 0; j < 2; j++) -- EXPECT_FLOAT_EQ( -- (dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 + j] + -- dy[i * 3 + 2] * we[2 * 2 + j]), -- dx[i * 2 + j]); - dweight.ToHost(); - const float *dweightx = dweight.data<const float *>(); - dweight.ToDevice(&host); - const float *dweightx = dweight.data<float>(); -- EXPECT_EQ(6u, dweight.Size()); -- for (int i = 0; i < 3; i++) -- for (int j = 0; j < 2; j++) -- EXPECT_FLOAT_EQ( -- (dy[0 * 3 + i] * x[0 * 2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] + -- dy[2 * 3 + i] * x[2 * 2 + j]), -- dweightx[i * 2 + j]); - dbias.ToHost(); - const float *dbiasx = dbias.data<const float *>(); - dbias.ToDevice(&host); - const float *dbiasx = dbias.data<float>(); -- EXPECT_EQ(3u, dbias.Size()); -- for (int i = 0; i < 3; i++) -- EXPECT_FLOAT_EQ((dy[0 * 3 + i] + dy[1 * 3 + i] + dy[2 * 3 + i]), dbiasx[i]); --} --#endif ++/************************************************************ ++* ++* Licensed to the Apache Software Foundation (ASF) under one ++* or more contributor license agreements. See the NOTICE file ++* distributed with this work for additional information ++* regarding copyright ownership. The ASF licenses this file ++* to you under the Apache License, Version 2.0 (the ++* "License"); you may not use this file except in compliance ++* with the License. You may obtain a copy of the License at ++* ++* http://www.apache.org/licenses/LICENSE-2.0 ++* ++* Unless required by applicable law or agreed to in writing, ++* software distributed under the License is distributed on an ++* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY ++* KIND, either express or implied. See the License for the ++* specific language governing permissions and limitations ++* under the License. ++* ++*************************************************************/ ++#include "../src/model/layer/dense.h" ++#include "gtest/gtest.h" ++#include "singa/singa_config.h" ++ ++using singa::Dense; ++using singa::Shape; ++TEST(Dense, Setup) { ++ Dense dense; ++ EXPECT_EQ("Dense", dense.layer_type()); ++ ++ singa::LayerConf conf; ++ singa::DenseConf *denseconf = conf.mutable_dense_conf(); ++ denseconf->set_num_output(3); ++ denseconf->set_transpose(false); ++ dense.Setup(Shape{2}, conf); ++ ++ EXPECT_EQ(3u, dense.num_output()); ++ EXPECT_EQ(2u, dense.num_input()); ++} ++#ifdef USE_CBLAS ++TEST(Dense, ForwardCpp) { ++ Dense dense; ++ ++ singa::LayerConf conf; ++ singa::DenseConf *denseconf = conf.mutable_dense_conf(); ++ denseconf->set_num_output(3); ++ denseconf->set_transpose(false); ++ dense.Setup(Shape{2}, conf); ++ ++ const size_t batchsize = 3, vdim = 2, hdim = 3; ++ const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; ++ singa::Tensor in(singa::Shape{batchsize, vdim}); ++ in.CopyDataFromHostPtr(x, batchsize * vdim); ++ ++ // set weight ++ const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f}; ++ singa::Tensor weight(singa::Shape{hdim, vdim}); ++ weight.CopyDataFromHostPtr(we, hdim * vdim); ++ ++ const float bia[hdim] = {1.0f, 1.0f, 1.0f}; ++ singa::Tensor bias(singa::Shape{hdim}); ++ bias.CopyDataFromHostPtr(bia, hdim); ++ ++ dense.set_weight(weight); ++ dense.set_bias(bias); ++ ++ singa::Tensor out1 = dense.Forward(singa::kTrain, in); ++ const float *outptr1 = out1.data<float>(); ++ EXPECT_EQ(9u, out1.Size()); ++ for (int i = 0; i < 3; i++) ++ for (int j = 0; j < 3; j++) ++ EXPECT_FLOAT_EQ((x[i * 2 + 0] * we[j * 2 + 0] + ++ x[i * 2 + 1] * we[j * 2 + 1] + bia[j]), ++ outptr1[i * 3 + j]); ++} ++TEST(Dense, BackwardCpp) { ++ Dense dense; ++ ++ singa::LayerConf conf; ++ singa::DenseConf *denseconf = conf.mutable_dense_conf(); ++ denseconf->set_num_output(3); ++ denseconf->set_transpose(false); ++ dense.Setup(Shape{2}, conf); ++ ++ const size_t batchsize = 3, vdim = 2, hdim = 3; ++ const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; ++ singa::Tensor in(singa::Shape{batchsize, vdim}); ++ in.CopyDataFromHostPtr(x, batchsize * vdim); ++ ++ // set weight ++ const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f}; ++ singa::Tensor weight(singa::Shape{hdim, vdim}); ++ weight.CopyDataFromHostPtr(we, hdim * vdim); ++ ++ const float bia[hdim] = {1.0f, 1.0f, 1.0f}; ++ singa::Tensor bias(singa::Shape{hdim}); ++ bias.CopyDataFromHostPtr(bia, hdim); ++ ++ dense.set_weight(weight); ++ dense.set_bias(bias); ++ ++ singa::Tensor out1 = dense.Forward(singa::kTrain, in); ++ ++ // grad ++ const float dy[batchsize * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, ++ 2.0f, 3.0f, 3.0f, 3.0f}; ++ singa::Tensor grad(singa::Shape{batchsize, hdim}); ++ grad.CopyDataFromHostPtr(dy, batchsize * hdim); ++ ++ const auto ret = dense.Backward(singa::kTrain, grad); ++ singa::Tensor in_grad = ret.first; ++ singa::Tensor dweight = ret.second.at(0); ++ singa::Tensor dbias = ret.second.at(1); ++ const float *dx = in_grad.data<float>(); ++ EXPECT_EQ(6u, in_grad.Size()); ++ for (int i = 0; i < 3; i++) ++ for (int j = 0; j < 2; j++) ++ EXPECT_FLOAT_EQ( ++ (dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 + j] + ++ dy[i * 3 + 2] * we[2 * 2 + j]), ++ dx[i * 2 + j]); ++ const float *dweightx = dweight.data<float>(); ++ EXPECT_EQ(6u, dweight.Size()); ++ for (int i = 0; i < 3; i++) ++ for (int j = 0; j < 2; j++) ++ EXPECT_FLOAT_EQ( ++ (dy[0 * 3 + i] * x[0 * 2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] + ++ dy[2 * 3 + i] * x[2 * 2 + j]), ++ dweightx[i * 2 + j]); ++ const float *dbiasx = dbias.data<float>(); ++ EXPECT_EQ(3u, dbias.Size()); ++ for (int i = 0; i < 3; i++) ++ EXPECT_FLOAT_EQ((dy[0 * 3 + i] + dy[1 * 3 + i] + dy[2 * 3 + i]), dbiasx[i]); ++} ++#endif // USE_CBLAS ++ ++#ifdef USE_CUDA ++TEST(Dense, ForwardCuda) { ++ Dense dense; ++ ++ singa::LayerConf conf; ++ singa::DenseConf *denseconf = conf.mutable_dense_conf(); ++ denseconf->set_num_output(3); ++ denseconf->set_transpose(false); ++ dense.Setup(Shape{2}, conf); ++ ++ const size_t batchsize = 3, vdim = 2, hdim = 3; ++ const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; ++ auto cuda = std::make_shared<singa::CudaGPU>(0, 1); ++ singa::Tensor in(singa::Shape{batchsize, vdim}, cuda); ++ in.CopyDataFromHostPtr(x, batchsize * vdim); ++ ++ // set weight ++ const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f}; ++ singa::Tensor weight(singa::Shape{hdim, vdim}, cuda); ++ weight.CopyDataFromHostPtr(we, hdim * vdim); ++ ++ const float bia[hdim] = {1.0f, 1.0f, 1.0f}; ++ singa::Tensor bias(singa::Shape{hdim}, cuda); ++ bias.CopyDataFromHostPtr(bia, hdim); ++ ++ dense.set_weight(weight); ++ dense.set_bias(bias); ++ ++ singa::Tensor out1 = dense.Forward(singa::kTrain, in); ++ out1.ToHost(); ++ const float *outptr1 = out1.data<float>(); ++ EXPECT_EQ(9u, out1.Size()); ++ for (int i = 0; i < 3; i++) ++ for (int j = 0; j < 3; j++) ++ EXPECT_FLOAT_EQ((x[i * 2 + 0] * we[j * 2 + 0] + ++ x[i * 2 + 1] * we[j * 2 + 1] + bia[j]), ++ outptr1[i * 3 + j]); ++} ++TEST(Dense, BackwardCuda) { ++ Dense dense; ++ ++ singa::LayerConf conf; ++ singa::DenseConf *denseconf = conf.mutable_dense_conf(); ++ denseconf->set_num_output(3); ++ denseconf->set_transpose(false); ++ dense.Setup(Shape{2}, conf); ++ ++ const size_t batchsize = 3, vdim = 2, hdim = 3; ++ const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; ++ auto cuda = std::make_shared<singa::CudaGPU>(0, 1); ++ singa::Tensor in(singa::Shape{batchsize, vdim}, cuda); ++ in.CopyDataFromHostPtr(x, batchsize * vdim); ++ ++ // set weight ++ const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f}; ++ singa::Tensor weight(singa::Shape{hdim, vdim}, cuda); ++ weight.CopyDataFromHostPtr(we, hdim * vdim); ++ ++ const float bia[hdim] = {1.0f, 1.0f, 1.0f}; ++ singa::Tensor bias(singa::Shape{hdim}, cuda); ++ bias.CopyDataFromHostPtr(bia, hdim); ++ ++ dense.set_weight(weight); ++ dense.set_bias(bias); ++ ++ singa::Tensor out1 = dense.Forward(singa::kTrain, in); ++ ++ // grad ++ const float dy[batchsize * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, ++ 2.0f, 3.0f, 3.0f, 3.0f}; ++ singa::Tensor grad(singa::Shape{batchsize, hdim}, cuda); ++ grad.CopyDataFromHostPtr(dy, batchsize * hdim); ++ ++ const auto ret = dense.Backward(singa::kTrain, grad); ++ singa::Tensor in_grad = ret.first; ++ singa::Tensor dweight = ret.second.at(0); ++ singa::Tensor dbias = ret.second.at(1); ++ in_grad.ToHost(); ++ const float *dx = in_grad.data<float>(); ++ EXPECT_EQ(6u, in_grad.Size()); ++ for (int i = 0; i < 3; i++) ++ for (int j = 0; j < 2; j++) ++ EXPECT_FLOAT_EQ( ++ (dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 + j] + ++ dy[i * 3 + 2] * we[2 * 2 + j]), ++ dx[i * 2 + j]); ++ dweight.ToHost(); ++ const float *dweightx = dweight.data<float>(); ++ EXPECT_EQ(6u, dweight.Size()); ++ for (int i = 0; i < 3; i++) ++ for (int j = 0; j < 2; j++) ++ EXPECT_FLOAT_EQ( ++ (dy[0 * 3 + i] * x[0 * 2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] + ++ dy[2 * 3 + i] * x[2 * 2 + j]), ++ dweightx[i * 2 + j]); ++ dbias.ToHost(); ++ const float *dbiasx = dbias.data<float>(); ++ EXPECT_EQ(3u, dbias.Size()); ++ for (int i = 0; i < 3; i++) ++ EXPECT_FLOAT_EQ((dy[0 * 3 + i] + dy[1 * 3 + i] + dy[2 * 3 + i]), dbiasx[i]); ++} ++#endif http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd08f413/test/singa/test_flatten.cc ---------------------------------------------------------------------- diff --cc test/singa/test_flatten.cc index 0000000,2a77272..25e00c4 mode 000000,100644..100644 --- a/test/singa/test_flatten.cc +++ b/test/singa/test_flatten.cc @@@ -1,0 -1,145 +1,143 @@@ + /************************************************************ + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + *************************************************************/ + + #include "../src/model/layer/flatten.h" + #include "gtest/gtest.h" + + using singa::Flatten; + using singa::Shape; + TEST(Flatten, Setup) { + Flatten flt; + EXPECT_EQ("Flatten", flt.layer_type()); + + singa::LayerConf conf; + singa::FlattenConf *flattenconf = conf.mutable_flatten_conf(); + flattenconf->set_axis(1); + + flt.Setup(Shape{2}, conf); + EXPECT_EQ(1, flt.Axis()); + } + + TEST(Flatten, ForwardCPU) { + const float x[] = {1.f, 2.f, 3.f, -2.f, -3.f, -4.f, + 1.5f, -1.5f, 0.f, -0.5f, -2.f, -1.f}; + size_t n = sizeof(x) / sizeof(float); + singa::Shape s = {2, 1, 3, 2}; + singa::Tensor in(s); + in.CopyDataFromHostPtr<float>(x, n); + + int axis = 3; + Flatten flt; + singa::LayerConf conf; + singa::FlattenConf *flattenconf = conf.mutable_flatten_conf(); + flattenconf->set_axis(axis); + flt.Setup(Shape{1, 3, 2}, conf); + + singa::Tensor out = flt.Forward(singa::kTrain, in); + EXPECT_EQ(n, out.Size()); + EXPECT_EQ(6u, out.shape(0)); + EXPECT_EQ(2u, out.shape(1)); + const float *yptr = out.data<float>(); + for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(x[i], yptr[i]); + } + + TEST(Flatten, BackwardCPU) { + // directly use input as the output_grad for backward + // note that only the shape of input really matters + const float dy[] = {1.f, 2.f, 3.f, -2.f, -3.f, -4.f, + 1.5f, -1.5f, 0.f, -0.5f, -2.f, -1.f}; + size_t n = sizeof(dy) / sizeof(float); + singa::Tensor in(singa::Shape{2, 1, 3, 2}); + in.CopyDataFromHostPtr<float>(dy, n); + + int axis = 2; + Flatten flt; + singa::LayerConf conf; + singa::FlattenConf *flattenconf = conf.mutable_flatten_conf(); + flattenconf->set_axis(axis); + flt.Setup(Shape{1, 3, 2}, conf); + + singa::Tensor temp = flt.Forward(singa::kTrain, in); + const auto out = flt.Backward(singa::kTrain, temp); + const float *xptr = out.first.data<float>(); + EXPECT_EQ(n, out.first.Size()); + EXPECT_EQ(2u, out.first.shape(0)); + EXPECT_EQ(1u, out.first.shape(1)); + EXPECT_EQ(3u, out.first.shape(2)); + EXPECT_EQ(2u, out.first.shape(3)); + for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(dy[i], xptr[i]); + } + + #ifdef USE_CUDA + TEST(Flatten, ForwardGPU) { + const float x[] = {1.f, 2.f, 3.f, -2.f, -3.f, -4.f, + 1.5f, -1.5f, 0.f, -0.5f, -2.f, -1.f}; + size_t n = sizeof(x) / sizeof(float); - singa::CudaGPU cuda(0, 1); - singa::Tensor in(singa::Shape{2, 1, 3, 2}, &cuda); ++ auto cuda = std::make_shared<singa::CudaGPU>(); ++ singa::Tensor in(singa::Shape{2, 1, 3, 2}, cuda); + in.CopyDataFromHostPtr<float>(x, n); + + int axis = 3; + Flatten flt; + singa::LayerConf conf; + singa::FlattenConf *flattenconf = conf.mutable_flatten_conf(); + flattenconf->set_axis(axis); + flt.Setup(Shape{1, 3, 2}, conf); + + singa::Tensor out = flt.Forward(singa::kTrain, in); - singa::CppCPU host(0, 1); - out.ToDevice(&host); ++ out.ToHost(); + EXPECT_EQ(n, out.Size()); + EXPECT_EQ(6u, out.shape(0)); + EXPECT_EQ(2u, out.shape(1)); + const float *yptr = out.data<float>(); + for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(x[i], yptr[i]); + } + + TEST(Flatten, BackwardGPU) { + // directly use input as the output_grad for backward + // note that only the shape of input really matters + const float dy[] = {1.f, 2.f, 3.f, -2.f, -3.f, -4.f, + 1.5f, -1.5f, 0.f, -0.5f, -2.f, -1.f}; + size_t n = sizeof(dy) / sizeof(float); - singa::CudaGPU cuda(0, 1); - singa::Tensor in(singa::Shape{2, 1, 3, 2}, &cuda); ++ auto cuda = std::make_shared<singa::CudaGPU>(); ++ singa::Tensor in(singa::Shape{2, 1, 3, 2}, cuda); + in.CopyDataFromHostPtr<float>(dy, n); + + int axis = 2; + Flatten flt; + singa::LayerConf conf; + singa::FlattenConf *flattenconf = conf.mutable_flatten_conf(); + flattenconf->set_axis(axis); + flt.Setup(Shape{1, 3, 2}, conf); + + singa::Tensor out = flt.Forward(singa::kTrain, in); + const auto ret = flt.Backward(singa::kTrain, out); - singa::CppCPU host(0, 1); + singa::Tensor in_diff = ret.first; - in_diff.ToDevice(&host); ++ in_diff.ToHost(); + const float *xptr = in_diff.data<float>(); + EXPECT_EQ(n, in_diff.Size()); + EXPECT_EQ(2u, in_diff.shape(0)); + EXPECT_EQ(1u, in_diff.shape(1)); + EXPECT_EQ(3u, in_diff.shape(2)); + EXPECT_EQ(2u, in_diff.shape(3)); + for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(dy[i], xptr[i]); + } + #endif // USE_CUDA http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd08f413/test/singa/test_initializer.cc ---------------------------------------------------------------------- diff --cc test/singa/test_initializer.cc index 0000000,e99cd79..4631af2 mode 000000,100644..100644 --- a/test/singa/test_initializer.cc +++ b/test/singa/test_initializer.cc @@@ -1,0 -1,148 +1,148 @@@ + /** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + #include "singa/model/initializer.h" + #include "gtest/gtest.h" + + TEST(Initializer, Constant) { + singa::init::Constant x; + size_t n = 10; + singa::Tensor t(singa::Shape{n}); + singa::FillerConf conf; + conf.set_value(3.1f); + x.Setup(conf); + x.Fill(&t); + const float* xPtr = t.data<float>(); + for (size_t i = 0; i < n; i++) + EXPECT_FLOAT_EQ(xPtr[i], 3.1f); + } + + + TEST(Initializer, Gaussian) { + singa::init::Gaussian x; + size_t n = 1000; + singa::Tensor t(singa::Shape{n}); + singa::FillerConf conf; + conf.set_mean(0.11f); + conf.set_std(0.01f); + x.Setup(conf); + x.Fill(&t); + const float* xPtr = t.data<float>(); + float mean = 0.0f, std = 0.0f; + for (size_t i = 0; i < n; i++) + mean += xPtr[i]; + mean /= n; + EXPECT_NEAR(mean, 0.11f, 1e-3); + for (size_t i = 0; i < n; i++) + std += (xPtr[i] - mean) * (xPtr[i] - mean); + std /= n; + std = sqrt(std); + EXPECT_NEAR(std, 0.01f, 1e-3); + } + + #ifdef USE_CUDA + TEST(Initializer, ConstantCUDA) { + singa::init::Constant x; - singa::CudaGPU dev; ++ auto dev = std::make_shared<singa::CudaGPU>(); + size_t n = 10; - singa::Tensor t(singa::Shape{n}, &dev); ++ singa::Tensor t(singa::Shape{n}, dev); + singa::FillerConf conf; + conf.set_value(3.1f); + x.Setup(conf); + x.Fill(&t); + t.ToHost(); + const float* xPtr = t.data<float>(); + for (size_t i = 0; i < n; i++) + EXPECT_FLOAT_EQ(xPtr[i], 3.1f); + + + singa::init::Constant y(-0.1f); - singa::Tensor s(singa::Shape{n}, &dev); ++ singa::Tensor s(singa::Shape{n}, dev); + y.Fill(&s); + s.ToHost(); + const float* sPtr = s.data<float>(); + for (size_t i = 0; i < n; i++) + EXPECT_FLOAT_EQ(sPtr[i], -0.1f); + } + + + TEST(Initializer, GaussianCUDA) { + singa::init::Gaussian x; - singa::CudaGPU dev; ++ auto dev = std::make_shared<singa::CudaGPU>(); + size_t n = 1000; - singa::Tensor t(singa::Shape{n}, &dev); ++ singa::Tensor t(singa::Shape{n}, dev); + singa::FillerConf conf; + conf.set_mean(0.11f); + conf.set_std(0.01f); + x.Setup(conf); + x.Fill(&t); + t.ToHost(); + const float* tPtr = t.data<float>(); + float mean = 0.0f, std = 0.0f; + for (size_t i = 0; i < n; i++) + mean += tPtr[i]; + mean /= n; + EXPECT_NEAR(mean, 0.11f, 1e-2); + for (size_t i = 0; i < n; i++) + std += (tPtr[i] - mean) * (tPtr[i] - mean); + std /= n; + std = sqrt(std); + EXPECT_NEAR(std, 0.01f, 1e-2); + + + singa::init::Gaussian y(1.5f, 0.1f); - singa::Tensor s(singa::Shape{n}, &dev); ++ singa::Tensor s(singa::Shape{n}, dev); + y.Fill(&s); + s.ToHost(); + const float* sPtr = s.data<float>(); + for (size_t i = 0; i < n; i++) + mean += sPtr[i]; + mean /= n; + EXPECT_NEAR(mean, 1.5f, 0.1f); + for (size_t i = 0; i < n; i++) + std += (sPtr[i] - mean) * (sPtr[i] - mean); + std /= n; + std = sqrt(std); + EXPECT_NEAR(std, 0.1f, 0.1f); + } + + TEST(Initializer, XavierCUDA) { + singa::init::Constant x; - singa::CudaGPU dev; ++ auto dev = std::make_shared<singa::CudaGPU>(); + size_t m = 30, n=40; - singa::Tensor t(singa::Shape{m, n}, &dev); ++ singa::Tensor t(singa::Shape{m, n}, dev); + x.Fill(&t); + t.ToHost(); + const float* xPtr = t.data<float>(); + float mean = 0.0f; + float high = -100.0f, low = 100.0f; + for (size_t i = 0; i < n; i++) { + mean += xPtr[i]; + if (high < xPtr[i]) + high = xPtr[i]; + if (low > xPtr[i]) + low = xPtr[i]; + } + mean /= m * n; + EXPECT_NEAR(mean, 0, 1e-2); + float scale = sqrt(6.0f / (m + n)); + EXPECT_LT(high, scale); + EXPECT_GT(low, -scale); + } + + #endif http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd08f413/test/singa/test_memory.cc ---------------------------------------------------------------------- diff --cc test/singa/test_memory.cc index 90fc99a,0000000..b0df226 mode 100644,000000..100644 --- a/test/singa/test_memory.cc +++ b/test/singa/test_memory.cc @@@ -1,111 -1,0 +1,104 @@@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at - * ++* +* http://www.apache.org/licenses/LICENSE-2.0 - * ++* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#include "gtest/gtest.h" +#include "singa/utils/logging.h" +#include "singa/core/memory.h" +#include "singa/singa_config.h" ++#include "singa/utils/timer.h" +#include <sys/time.h> + +#ifdef USE_CUDA +TEST(CnmemPool, PoolInit) { - singa::CnMemPool pool; - pool.InitPool(); ++ singa::CnMemPool pool; ++ pool.InitPool(); +} + +TEST(CnmemPool, PoolInitAll) { - singa::CnMemPool pool; - int nDevices; - cudaGetDeviceCount(&nDevices); - CHECK_GE(nDevices,1); - pool.InitPool(nDevices,1000000U,0); ++ singa::CnMemPool pool; ++ int nDevices; ++ cudaGetDeviceCount(&nDevices); ++ CHECK_GE(nDevices, 1); ++ pool.InitPool(nDevices, 32, 0); +} + +TEST(CnmemPool, UsePool) { - singa::CnMemPool pool; - pool.InitPool(); - int numOfTests = 10; - int numOfWriteVsRead = 3; - int allocSize = 1000000U; - for(int i = 0; i < numOfTests; i++) { - int** memPtrs = new int*[numOfWriteVsRead]; - for(int j = 0; j < numOfWriteVsRead; j++) { - pool.Malloc((void**)(&memPtrs[j]), allocSize); - } - pool.Free(memPtrs[0]); - delete[] memPtrs; - } ++ singa::CnMemPool pool; ++ pool.InitPool(); ++ int numOfTests = 10; ++ int numOfWriteVsRead = 3; ++ int allocSize = 32; ++ for (int i = 0; i < numOfTests; i++) { ++ int** memPtrs = new int* [numOfWriteVsRead]; ++ for (int j = 0; j < numOfWriteVsRead; j++) { ++ pool.Malloc((void**)(&memPtrs[j]), allocSize); ++ } ++ pool.Free(memPtrs[0]); ++ delete[] memPtrs; ++ } +} + +TEST(CudaMemPool, UsePool) { - singa::CudaMemPool pool; - int numOfTests = 10; - int numOfWriteVsRead = 3; - int allocSize = 1000000U; - for(int i = 0; i < numOfTests; i++) { - int** memPtrs = new int*[numOfWriteVsRead]; - for(int j = 0; j < numOfWriteVsRead; j++) { - pool.Malloc((void**)(&memPtrs[j]), allocSize); - } - pool.Free(memPtrs[0]); - delete[] memPtrs; - } ++ singa::CudaMemPool pool; ++ int numOfTests = 10; ++ int numOfWriteVsRead = 3; ++ int allocSize = 32; ++ for (int i = 0; i < numOfTests; i++) { ++ int** memPtrs = new int* [numOfWriteVsRead]; ++ for (int j = 0; j < numOfWriteVsRead; j++) { ++ pool.Malloc((void**)(&memPtrs[j]), allocSize); ++ } ++ pool.Free(memPtrs[0]); ++ delete[] memPtrs; ++ } +} + +TEST(MemPool, CompareCudaCnmem) { - singa::CudaMemPool cudaPool; - singa::CnMemPool cnPool; - cnPool.InitPool(); ++ singa::CudaMemPool cudaPool; ++ singa::CnMemPool cnPool; ++ cnPool.InitPool(); ++ ++ int numOfTests = 5000; ++ int allocSize = 32; + - int numOfTests = 5000; - int allocSize = 1000000U; - struct timeval start,end; - double t1,t2; ++ singa::DeviceMemPool* pool = NULL; ++ pool = &cnPool; + - singa::DeviceMemPool* pool = NULL; - pool = &cnPool; - - gettimeofday(&start,NULL); - for(int i = 0; i < numOfTests; i++) { - int* memPtrs = NULL; - pool->Malloc((void**)&memPtrs, allocSize); - pool->Free(memPtrs); - } - gettimeofday(&end,NULL); - - t1 = start.tv_sec * 1000 + start.tv_usec/1000; - t2 = end.tv_sec * 1000 + end.tv_usec/1000; - LOG(INFO) << "cnmem memory time: " << t2-t1 << " ms" << std::endl; ++ singa::Timer tick; ++ for (int i = 0; i < numOfTests; i++) { ++ int* memPtrs = NULL; ++ pool->Malloc((void**)&memPtrs, allocSize); ++ pool->Free(memPtrs); ++ } ++ tick.Tick(); ++ int cn_time = tick.Elapsed(); + - pool = &cudaPool; - gettimeofday(&start,NULL); - for(int i = 0; i < numOfTests; i++) { - int* memPtrs = NULL; - pool->Malloc((void**)&memPtrs, allocSize); - pool->Free(memPtrs); - } - gettimeofday(&end,NULL); - - t1 = start.tv_sec * 1000 + start.tv_usec/1000; - t2 = end.tv_sec * 1000 + end.tv_usec/1000; - LOG(INFO) << "cuda memory time: " << t2-t1 << " ms" << std::endl; ++ pool = &cudaPool; ++ for (int i = 0; i < numOfTests; i++) { ++ int* memPtrs = NULL; ++ pool->Malloc((void**)&memPtrs, allocSize); ++ pool->Free(memPtrs); ++ } ++ tick.Tick(); ++ int cuda_time = tick.Elapsed(); ++ EXPECT_GE(cuda_time, cn_time); +} - #endif // USE_CUDA ++#endif // USE_CUDA http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd08f413/test/singa/test_mse.cc ---------------------------------------------------------------------- diff --cc test/singa/test_mse.cc index d2c5125,928be9d..788652f --- a/test/singa/test_mse.cc +++ b/test/singa/test_mse.cc @@@ -22,8 -22,9 +22,8 @@@ #include "gtest/gtest.h" #include "singa/core/tensor.h" #include "singa/core/device.h" - #include "../src/model/loss/mse.h" - #include "singa/singa_config.h" + #include "singa/model/loss.h" -#include "singa_config.h" + using singa::Tensor; class TestMSE : public ::testing::Test { protected: @@@ -68,14 -69,14 +68,14 @@@ TEST_F(TestMSE, CppBackward) #endif #ifdef USE_CUDA TEST_F(TestMSE, CudaForward) { - singa::MSE mse; - singa::CudaGPU dev; - p.ToDevice(&dev); - t.ToDevice(&dev); - Tensor loss = mse.Forward(p, t); + singa::MSE* mse = new singa::MSE(); + auto dev = std::make_shared<singa::CudaGPU>(); + p.ToDevice(dev); + t.ToDevice(dev); + Tensor loss = mse->Forward(p, t); loss.ToHost(); - auto ldat = loss.data<const float*>(); + auto ldat = loss.data<float>(); for (size_t i = 0, k = 0; i < loss.Size(); i++) { float l = 0.f; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd08f413/test/singa/test_nesterov.cc ---------------------------------------------------------------------- diff --cc test/singa/test_nesterov.cc index 0000000,35b2b4d..73f69f4 mode 000000,100644..100644 --- a/test/singa/test_nesterov.cc +++ b/test/singa/test_nesterov.cc @@@ -1,0 -1,101 +1,101 @@@ + /************************************************************ + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + *************************************************************/ + + #include "gtest/gtest.h" + #include "singa/model/optimizer.h" -#include "singa_config.h" ++#include "singa/singa_config.h" + + TEST(Nesterov, ApplyCPU) { + singa::Nesterov nesterov; + float lr = 0.1f; + auto func = [](int step) { return step <= 5 ? 0.5f : 0.9f; }; + nesterov.SetMomentumGenerator(func); + const float v[4] = {0.1, 0.2, 0.3, 0.4}; + const float g[4] = {0.01, 0.02, 0.03, 0.04}; + + singa::Tensor value(singa::Shape{4}), grad(singa::Shape{4}); + value.CopyDataFromHostPtr(v, 4); + grad.CopyDataFromHostPtr(g, 4); + + nesterov.Apply(0, lr, "xx", grad, &value); + + singa::Tensor v1 = value.Clone(); + const float* newv1 = v1.data<float>(); + float history[4], tmp[4]; + for (int i = 0; i < 4; ++i) { + history[i] = g[i] * lr; + tmp[i] = history[i] * (1 + func(0)); + } + for (int i = 0; i < 4; ++i) EXPECT_FLOAT_EQ(newv1[i], v[i] - tmp[i]); + + grad.CopyDataFromHostPtr(g, 4); + nesterov.Apply(1, lr, "xx", grad, &value); + singa::Tensor v2 = value.Clone(); + const float* newv2 = v2.data<float>(); + for (int i = 0; i < 4; ++i) { + tmp[i] = history[i]; + history[i] = history[i] * func(1) + g[i] * lr; + tmp[i] = history[i] * (1 + func(1)) - tmp[i] * func(1); + } + + for (int i = 0; i < 4; ++i) EXPECT_FLOAT_EQ(newv2[i], newv1[i] - tmp[i]); + } + + #ifdef USE_CUDA + TEST(Nesterov, ApplyCUDA) { + singa::Nesterov nesterov; + float lr = 0.1f; + auto func = [](int step) { return step <= 5 ? 0.5f : 0.9f; }; + nesterov.SetMomentumGenerator(func); + const float v[4] = {0.1, 0.2, 0.3, 0.4}; + const float g[4] = {0.01, 0.02, 0.03, 0.04}; + - singa::CudaGPU dev; - singa::Tensor value(singa::Shape{4}, &dev), grad(singa::Shape{4}, &dev); ++ auto dev = std::make_shared<singa::CudaGPU>(); ++ singa::Tensor value(singa::Shape{4}, dev), grad(singa::Shape{4}, dev); + value.CopyDataFromHostPtr(v, 4); + grad.CopyDataFromHostPtr(g, 4); + + nesterov.Apply(0, lr, "xx", grad, &value); + + singa::Tensor v1 = value.Clone(); + v1.ToHost(); + const float* newv1 = v1.data<float>(); + float history[4], tmp[4]; + for (int i = 0; i < 4; ++i) { + history[i] = g[i] * lr; + tmp[i] = history[i] * (1 + func(0)); + } + for (int i = 0; i < 4; ++i) EXPECT_FLOAT_EQ(newv1[i], v[i] - tmp[i]); + + grad.CopyDataFromHostPtr(g, 4); + nesterov.Apply(1, lr, "xx", grad, &value); + singa::Tensor v2 = value.Clone(); + v2.ToHost(); + const float* newv2 = v2.data<float>(); + for (int i = 0; i < 4; ++i) { + tmp[i] = history[i]; + history[i] = history[i] * func(1) + g[i] * lr; + tmp[i] = history[i] * (1 + func(1)) - tmp[i] * func(1); + } + + for (int i = 0; i < 4; ++i) EXPECT_FLOAT_EQ(newv2[i], newv1[i] - tmp[i]); + } + #endif http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd08f413/test/singa/test_prelu.cc ---------------------------------------------------------------------- diff --cc test/singa/test_prelu.cc index 0000000,fee7c5b..dbf5ca6 mode 000000,100644..100644 --- a/test/singa/test_prelu.cc +++ b/test/singa/test_prelu.cc @@@ -1,0 -1,247 +1,245 @@@ + /************************************************************ + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + *************************************************************/ + + #include "../src/model/layer/prelu.h" + #include "gtest/gtest.h" -#include "singa_config.h" ++#include "singa/singa_config.h" + + using singa::PReLU; + using singa::Shape; + TEST(PReLU, Setup) { + PReLU prelu; + EXPECT_EQ("PReLU", prelu.layer_type()); + + singa::LayerConf conf; + singa::PReLUConf *preluconf = conf.mutable_prelu_conf(); + preluconf->set_channel_shared(true); + preluconf->set_format("NHWC"); + + prelu.Setup(Shape{4}, conf); + EXPECT_EQ(true, prelu.Channel_shared()); + EXPECT_EQ("NHWC", prelu.Format()); + } + + TEST(PReLU, ForwardCPU) { + const float x[] = {1.f, 2.f, 3.f, -2.f, -3.f, -1.f, + -1.f, 2.f, -1.f, -2.f, -2.f, -1.f}; + size_t n = sizeof(x) / sizeof(float); + size_t batchsize = 2, c = 3, h = 2, w = 1; + singa::Tensor in(singa::Shape{batchsize, h, w, c}); + in.CopyDataFromHostPtr<float>(x, n); + + PReLU prelu; + singa::LayerConf conf; + singa::PReLUConf *preluconf = conf.mutable_prelu_conf(); + preluconf->set_channel_shared(false); + preluconf->set_format("NHWC"); + prelu.Setup(Shape{h, w, c}, conf); + + const float neg_slope[] = {0.25f, 0.5f, 0.75f}; + singa::Tensor a(singa::Shape{c}); + a.CopyDataFromHostPtr<float>(neg_slope, c); + prelu.Set_a(a); + + singa::Tensor out = prelu.Forward(singa::kTrain, in); + const float *yptr = out.data<float>(); + EXPECT_EQ(n, out.Size()); + + float *y = new float[n]; + size_t div_factor = prelu.Channel_shared() ? c : 1; + if (prelu.Format() == "NCHW") { + for (size_t i = 0; i < n; i++) { + size_t pos = i / (h * w) % c / div_factor; + y[i] = std::max(x[i], 0.f) + neg_slope[pos] * std::min(x[i], 0.f); + } + } else if (prelu.Format() == "NHWC") { + for (size_t i = 0; i < n; i++) { + size_t pos = i % c / div_factor; + y[i] = std::max(x[i], 0.f) + neg_slope[pos] * std::min(x[i], 0.f); + } + } + for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(y[i], yptr[i]); + } + + TEST(PReLU, BackwardCPU) { + const float x[] = {1.f, 2.f, 3.f, -2.f, -3.f, -1.f, + -1.f, 2.f, -1.f, -2.f, -2.f, -1.f}; + size_t n = sizeof(x) / sizeof(float); + size_t batchsize = 2, c = 3, h = 2, w = 1; + singa::Tensor in(singa::Shape{batchsize, c, h, w}); + in.CopyDataFromHostPtr<float>(x, n); + + PReLU prelu; + singa::LayerConf conf; + singa::PReLUConf *preluconf = conf.mutable_prelu_conf(); + preluconf->set_channel_shared(false); + preluconf->set_format("NCHW"); + prelu.Setup(Shape{c, h, w}, conf); + + const float neg_slope[] = {0.25f, 0.5f, 0.75f}; + singa::Tensor a(singa::Shape{c}); + a.CopyDataFromHostPtr<float>(neg_slope, c); + prelu.Set_a(a); + + singa::Tensor out = prelu.Forward(singa::kTrain, in); + + const float grad[] = {1.f, 2.f, -2.f, -1.f, -1.f, -3.f, + 2.f, -2.f, 1.f, 1.f, -2.f, 0.f}; + singa::Tensor out_diff(singa::Shape{batchsize, c, h, w}); + out_diff.CopyDataFromHostPtr<float>(grad, n); + const auto ret = prelu.Backward(singa::kTrain, out_diff); + const float *xptr = ret.first.data<float>(); + const float *aptr = ret.second.at(0).data<float>(); + float *dx = new float[n]; + size_t div_factor = prelu.Channel_shared() ? c : 1; + size_t params = prelu.Channel_shared() ? 1 : c; + float da[] = {0.f, 0.f, 0.f}; + if (prelu.Format() == "NCHW") { + for (size_t i = 0; i < n; i++) { + size_t pos = i / (h * w) % c / div_factor; + dx[i] = grad[i] * + (std::max(x[i], 0.f) + neg_slope[pos] * std::min(x[i], 0.f)); + } + for (size_t i = 0; i < n; i++) { + size_t pos = i / (h * w) % c / div_factor; + da[pos] += grad[i] * std::min(x[i], 0.f); + } + } else if (prelu.Format() == "NHWC") { + for (size_t i = 0; i < n; i++) { + size_t pos = i % c / div_factor; + dx[i] = grad[i] * + (std::max(x[i], 0.f) + neg_slope[pos] * std::min(x[i], 0.f)); + } + for (size_t i = 0; i < n; i++) { + size_t pos = i % c / div_factor; + da[pos] += grad[i] * std::min(x[i], 0.f); + } + } + for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(dx[i], xptr[i]); + for (size_t i = 0; i < params; i++) EXPECT_FLOAT_EQ(da[i], aptr[i]); + } + + #ifdef USE_CUDA + TEST(PReLU, ForwardGPU) { + const float x[] = {1.f, 2.f, 3.f, -2.f, -3.f, -1.f, + -1.f, 2.f, -1.f, -2.f, -2.f, -1.f}; + size_t n = sizeof(x) / sizeof(float); + size_t batchsize = 2, c = 3, h = 2, w = 1; - singa::CudaGPU cuda(0, 1); - singa::Tensor in(singa::Shape{batchsize, h, w, c}, &cuda); ++ auto cuda = std::make_shared<singa::CudaGPU>(); ++ singa::Tensor in(singa::Shape{batchsize, h, w, c}, cuda); + in.CopyDataFromHostPtr<float>(x, n); + + PReLU prelu; + singa::LayerConf conf; + singa::PReLUConf *preluconf = conf.mutable_prelu_conf(); + preluconf->set_channel_shared(false); + preluconf->set_format("NHWC"); + prelu.Setup(Shape{h, w, c}, conf); + + const float neg_slope[] = {0.25f, 0.5f, 0.75f}; - singa::Tensor a(singa::Shape{c}, &cuda); ++ singa::Tensor a(singa::Shape{c}, cuda); + a.CopyDataFromHostPtr<float>(neg_slope, c); + prelu.Set_a(a); + + singa::Tensor out = prelu.Forward(singa::kTrain, in); - singa::CppCPU host(0, 1); - out.ToDevice(&host); ++ out.ToHost(); + const float *yptr = out.data<float>(); + EXPECT_EQ(n, out.Size()); + + float *y = new float[n]; + size_t div_factor = prelu.Channel_shared() ? c : 1; + if (prelu.Format() == "NCHW") { + for (size_t i = 0; i < n; i++) { + size_t pos = i / (h * w) % c / div_factor; + y[i] = std::max(x[i], 0.f) + neg_slope[pos] * std::min(x[i], 0.f); + } + } else if (prelu.Format() == "NHWC") { + for (size_t i = 0; i < n; i++) { + size_t pos = i % c / div_factor; + y[i] = std::max(x[i], 0.f) + neg_slope[pos] * std::min(x[i], 0.f); + } + } + for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(y[i], yptr[i]); + } + + TEST(PReLU, BackwardGPU) { + const float x[] = {1.f, 2.f, 3.f, -2.f, -3.f, -1.f, + -1.f, 2.f, -1.f, -2.f, -2.f, -1.f}; + size_t n = sizeof(x) / sizeof(float); + size_t batchsize = 2, c = 3, h = 2, w = 1; - singa::CudaGPU cuda(0, 1); - singa::Tensor in(singa::Shape{batchsize, c, h, w}, &cuda); ++ auto cuda = std::make_shared<singa::CudaGPU>(); ++ singa::Tensor in(singa::Shape{batchsize, c, h, w}, cuda); + in.CopyDataFromHostPtr<float>(x, n); + + PReLU prelu; + singa::LayerConf conf; + singa::PReLUConf *preluconf = conf.mutable_prelu_conf(); + preluconf->set_channel_shared(false); + preluconf->set_format("NCHW"); + prelu.Setup(Shape{c, h, w}, conf); + + const float neg_slope[] = {0.25f, 0.5f, 0.75f}; - singa::Tensor a(singa::Shape{c}, &cuda); ++ singa::Tensor a(singa::Shape{c}, cuda); + a.CopyDataFromHostPtr<float>(neg_slope, c); + prelu.Set_a(a); + + singa::Tensor out = prelu.Forward(singa::kTrain, in); + const float grad[] = {1.f, 2.f, -2.f, -1.f, -1.f, -3.f, + 2.f, -2.f, 1.f, 1.f, -2.f, 0.f}; - singa::Tensor out_diff(singa::Shape{batchsize, c, h, w}, &cuda); ++ singa::Tensor out_diff(singa::Shape{batchsize, c, h, w}, cuda); + out_diff.CopyDataFromHostPtr<float>(grad, n); + const auto ret = prelu.Backward(singa::kTrain, out_diff); + + singa::Tensor in_diff = ret.first; - singa::CppCPU host(0, 1); - in_diff.ToDevice(&host); ++ in_diff.ToHost(); + const float *xptr = in_diff.data<float>(); + singa::Tensor a_diff = ret.second.at(0); - a_diff.ToDevice(&host); ++ a_diff.ToHost(); + const float *aptr = a_diff.data<float>(); + float *dx = new float[n]; + size_t div_factor = prelu.Channel_shared() ? c : 1; + size_t params = prelu.Channel_shared() ? 1 : c; + float da[] = {0.f, 0.f, 0.f}; + if (prelu.Format() == "NCHW") { + for (size_t i = 0; i < n; i++) { + size_t pos = i / (h * w) % c / div_factor; + dx[i] = grad[i] * + (std::max(x[i], 0.f) + neg_slope[pos] * std::min(x[i], 0.f)); + } + for (size_t i = 0; i < n; i++) { + size_t pos = i / (h * w) % c / div_factor; + da[pos] += grad[i] * std::min(x[i], 0.f); + } + } else if (prelu.Format() == "NHWC") { + for (size_t i = 0; i < n; i++) { + size_t pos = i % c / div_factor; + dx[i] = grad[i] * + (std::max(x[i], 0.f) + neg_slope[pos] * std::min(x[i], 0.f)); + } + for (size_t i = 0; i < n; i++) { + size_t pos = i % c / div_factor; + da[pos] += grad[i] * std::min(x[i], 0.f); + } + } + for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(dx[i], xptr[i]); + for (size_t i = 0; i < params; i++) EXPECT_FLOAT_EQ(da[i], aptr[i]); + } + #endif http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd08f413/test/singa/test_rmsprop.cc ---------------------------------------------------------------------- diff --cc test/singa/test_rmsprop.cc index 0000000,004a9b6..18de9c3 mode 000000,100644..100644 --- a/test/singa/test_rmsprop.cc +++ b/test/singa/test_rmsprop.cc @@@ -1,0 -1,106 +1,105 @@@ + /************************************************************ + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + *************************************************************/ + + #include "gtest/gtest.h" + #include "singa/model/optimizer.h" -#include "singa_config.h" + #include <cmath> + + TEST(RMSProp, ApplyCPU) { + singa::RMSProp rmsprop; + float lr = 0.1f; + float rho = 0.9; + const float v[4] = {0.1, 0.2, 0.3, 0.4}; + const float g[4] = {0.01, 0.02, 0.03, 0.04}; + + singa::OptimizerConf conf; + conf.set_rho(rho); + conf.set_delta(1E-8); + + singa::Tensor value(singa::Shape{4}), grad(singa::Shape{4}); + value.CopyDataFromHostPtr(v, 4); + grad.CopyDataFromHostPtr(g, 4); + + rmsprop.Setup(conf); + rmsprop.Apply(0, lr, "xx", grad, &value); + + singa::Tensor v1 = value.Clone(); + const float* newv1 = v1.data<float>(); + float history[4]; + for (int i = 0; i < 4; ++i) history[i] = g[i] * g[i] * (1 - rho); + for (int i = 0; i < 4; ++i) + EXPECT_NEAR(newv1[i], v[i] - g[i] * lr / sqrt(history[i] + (float)1E-8), + 1e-5); + + grad.CopyDataFromHostPtr(g, 4); + rmsprop.Apply(1, lr, "xx", grad, &value); + singa::Tensor v2 = value.Clone(); + const float* newv2 = v2.data<float>(); + for (int i = 0; i < 4; ++i) + history[i] = history[i] * rho + g[i] * g[i] * (1 - rho); + + for (int i = 0; i < 4; ++i) + EXPECT_NEAR(newv2[i], newv1[i] - lr * g[i] / sqrt(history[i] + (float)1E-8), + 1e-5); + } + + #ifdef USE_CUDA + TEST(RMSProp, ApplyCUDA) { + singa::RMSProp rmsprop; + float lr = 0.1f; + float rho = 0.02; + const float v[4] = {0.1, 0.2, 0.3, 0.4}; + const float g[4] = {0.01, 0.02, 0.03, 0.04}; + + singa::OptimizerConf conf; + conf.set_rho(rho); + conf.set_delta(1e-8); + - singa::CudaGPU dev; - singa::Tensor value(singa::Shape{4}, &dev), grad(singa::Shape{4}, &dev); ++ auto dev = std::make_shared<singa::CudaGPU>(); ++ singa::Tensor value(singa::Shape{4}, dev), grad(singa::Shape{4}, dev); + value.CopyDataFromHostPtr(v, 4); + grad.CopyDataFromHostPtr(g, 4); + + rmsprop.Setup(conf); + rmsprop.Apply(0, lr, "xx", grad, &value); + + singa::Tensor v1 = value.Clone(); + v1.ToHost(); + const float* newv1 = v1.data<float>(); + float history[4]; + for (int i = 0; i < 4; ++i) history[i] = g[i] * g[i] * (1 - rho); + for (int i = 0; i < 4; ++i) + EXPECT_NEAR(newv1[i], v[i] - lr * g[i] / sqrt(history[i] + conf.delta()), + 1e-5); + + grad.CopyDataFromHostPtr(g, 4); + rmsprop.Apply(1, lr, "xx", grad, &value); + singa::Tensor v2 = value.Clone(); + v2.ToHost(); + const float* newv2 = v2.data<float>(); + for (int i = 0; i < 4; ++i) + history[i] = history[i] * rho + g[i] * g[i] * (1 - rho); + + for (int i = 0; i < 4; ++i) + EXPECT_NEAR(newv2[i], + newv1[i] - lr * g[i] / sqrt(history[i] + conf.delta()), 1e-5); + } + #endif http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd08f413/test/singa/test_sgd.cc ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd08f413/test/singa/test_tensor.cc ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd08f413/test/singa/test_tensor_math.cc ---------------------------------------------------------------------- diff --cc test/singa/test_tensor_math.cc index 0f998c0,a40a848..f8d0351 --- a/test/singa/test_tensor_math.cc +++ b/test/singa/test_tensor_math.cc @@@ -253,12 -507,21 +507,21 @@@ TEST_F(TestTensorMath, SumColumnsCpp) } #endif #ifdef USE_CUDA + TEST_F(TestTensorMath, L2Cuda) { - singa::CudaGPU dev; - Tensor t(Shape{3, 2}, &dev); ++ auto dev = std::make_shared<singa::CudaGPU>(); ++ Tensor t(Shape{3, 2}, dev); + t.CopyDataFromHostPtr(dat1, 6); + float l2 = t.L2(); + float target = 0.0f; + for (size_t i = 0; i < t.Size(); i++) target += dat1[i] * dat1[i]; + EXPECT_FLOAT_EQ(l2, sqrt(target)); + } TEST_F(TestTensorMath, MultCuda) { const float x[4] = {1.0f, 2.0f, 3.0f, 4.0f}; - singa::CudaGPU dev; - Tensor t(Shape{2, 2}, &dev); + auto dev = std::make_shared<singa::CudaGPU>(); + Tensor t(Shape{2, 2}, dev); t.CopyDataFromHostPtr(x, 4); - d.ToDevice(&dev); + d.ToDevice(dev); d.CopyDataFromHostPtr(dat1, 6); Tensor C = Mult(d, t); C.ToHost(); @@@ -302,20 -565,18 +565,20 @@@ EXPECT_FLOAT_EQ(oPtr[i * 4 + j], x[i]); } } - d.ToHost(); - p.ToHost(); ++ d.ToHost(); ++ p.ToHost(); } TEST_F(TestTensorMath, AddColumnCuda) { const float x[3] = {1.0f, 2.0f, 3.0f}; - auto dev = std::make_shared<singa::CudaGPU>(); - singa::CudaGPU dev; - Tensor t(Shape{3}, &dev); ++ auto dev = std::make_shared<singa::CudaGPU>(); + Tensor t(Shape{3}, dev); t.CopyDataFromHostPtr(x, 3); d.CopyDataFromHostPtr(dat1, 6); - d.ToDevice(&dev); + d.ToDevice(dev); AddColumn(t, &d); d.ToHost(); - const float *xptr = d.data<const float *>(); + const float *xptr = d.data<float>(); for (int i = 0; i < 3; i++) { for (int j = 0; j < 2; j++) { EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] + x[i]); @@@ -323,17 -584,16 +586,16 @@@ } } - TEST_F(TestTensorMath, SubColumnCuda) { const float x[3] = {1.0f, 2.0f, 3.0f}; - auto dev = std::make_shared<singa::CudaGPU>(); - singa::CudaGPU dev; - Tensor t(Shape{3}, &dev); ++ auto dev = std::make_shared<singa::CudaGPU>(); + Tensor t(Shape{3}, dev); t.CopyDataFromHostPtr(x, 3); d.CopyDataFromHostPtr(dat1, 6); - d.ToDevice(&dev); + d.ToDevice(dev); SubColumn(t, &d); d.ToHost(); - const float *xptr = d.data<const float *>(); + const float *xptr = d.data<float>(); for (int i = 0; i < 3; i++) { for (int j = 0; j < 2; j++) { EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] - x[i]); @@@ -357,14 -617,14 +619,14 @@@ TEST_F(TestTensorMath, MultColumnCpp) #ifdef USE_CUDA TEST_F(TestTensorMath, MultColumnCuda) { const float x[3] = {1.0f, 2.0f, 3.0f}; - auto dev = std::make_shared<singa::CudaGPU>(); - singa::CudaGPU dev; - Tensor t(Shape{3}, &dev); ++ auto dev = std::make_shared<singa::CudaGPU>(); + Tensor t(Shape{3}, dev); t.CopyDataFromHostPtr(x, 3); d.CopyDataFromHostPtr(dat1, 6); - d.ToDevice(&dev); + d.ToDevice(dev); MultColumn(t, &d); d.ToHost(); - const float *xptr = d.data<const float *>(); + const float *xptr = d.data<float>(); for (int i = 0; i < 3; i++) { for (int j = 0; j < 2; j++) { EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] * x[i]); @@@ -373,14 -633,14 +635,14 @@@ } TEST_F(TestTensorMath, DivColumnCuda) { const float x[3] = {1.0f, 2.0f, 3.0f}; - auto dev = std::make_shared<singa::CudaGPU>(); - singa::CudaGPU dev; - Tensor t(Shape{3}, &dev); ++ auto dev = std::make_shared<singa::CudaGPU>(); + Tensor t(Shape{3}, dev); t.CopyDataFromHostPtr(x, 3); d.CopyDataFromHostPtr(dat1, 6); - d.ToDevice(&dev); + d.ToDevice(dev); DivColumn(t, &d); d.ToHost(); - const float *xptr = d.data<const float *>(); + const float *xptr = d.data<float>(); for (int i = 0; i < 3; i++) { for (int j = 0; j < 2; j++) { EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] / x[i]); @@@ -389,14 -649,14 +651,14 @@@ } TEST_F(TestTensorMath, AddRowCuda) { const float x[2] = {1.1f, 2.1f}; - auto dev = std::make_shared<singa::CudaGPU>(); - singa::CudaGPU dev; - Tensor t(Shape{2}, &dev); ++ auto dev = std::make_shared<singa::CudaGPU>(); + Tensor t(Shape{2}, dev); t.CopyDataFromHostPtr(x, 2); d.CopyDataFromHostPtr(dat1, 6); - d.ToDevice(&dev); + d.ToDevice(dev); AddRow(t, &d); d.ToHost(); - const float *xptr = d.data<const float *>(); + const float *xptr = d.data<float>(); for (int i = 0; i < 3; i++) { for (int j = 0; j < 2; j++) { EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] + x[j]); @@@ -405,14 -665,14 +667,14 @@@ } TEST_F(TestTensorMath, SubRowCuda) { const float x[2] = {1.1f, 2.1f}; - auto dev = std::make_shared<singa::CudaGPU>(); - singa::CudaGPU dev; - Tensor t(Shape{2}, &dev); ++ auto dev = std::make_shared<singa::CudaGPU>(); + Tensor t(Shape{2}, dev); t.CopyDataFromHostPtr(x, 2); d.CopyDataFromHostPtr(dat1, 6); - d.ToDevice(&dev); + d.ToDevice(dev); SubRow(t, &d); d.ToHost(); - const float *xptr = d.data<const float *>(); + const float *xptr = d.data<float>(); for (int i = 0; i < 3; i++) { for (int j = 0; j < 2; j++) { EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] - x[j]); @@@ -421,14 -681,14 +683,14 @@@ } TEST_F(TestTensorMath, MultRowCuda) { const float x[2] = {1.1f, 2.1f}; - auto dev = std::make_shared<singa::CudaGPU>(); - singa::CudaGPU dev; - Tensor t(Shape{2}, &dev); ++ auto dev = std::make_shared<singa::CudaGPU>(); + Tensor t(Shape{2}, dev); t.CopyDataFromHostPtr(x, 2); d.CopyDataFromHostPtr(dat1, 6); - d.ToDevice(&dev); + d.ToDevice(dev); MultRow(t, &d); d.ToHost(); - const float *xptr = d.data<const float *>(); + const float *xptr = d.data<float>(); for (int i = 0; i < 3; i++) { for (int j = 0; j < 2; j++) { EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] * x[j]); @@@ -452,14 -712,14 +714,14 @@@ TEST_F(TestTensorMath, DivRowCpp) #ifdef USE_CUDA TEST_F(TestTensorMath, DivRowCuda) { const float x[2] = {1.1f, 2.1f}; - auto dev = std::make_shared<singa::CudaGPU>(); - singa::CudaGPU dev; - Tensor t(Shape{2}, &dev); ++ auto dev = std::make_shared<singa::CudaGPU>(); + Tensor t(Shape{2}, dev); t.CopyDataFromHostPtr(x, 2); d.CopyDataFromHostPtr(dat1, 6); - d.ToDevice(&dev); + d.ToDevice(dev); DivRow(t, &d); d.ToHost(); - const float *xptr = d.data<const float *>(); + const float *xptr = d.data<float>(); for (int i = 0; i < 3; i++) { for (int j = 0; j < 2; j++) { EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] / x[j]); @@@ -467,13 -727,13 +729,13 @@@ } } TEST_F(TestTensorMath, SumRowsCuda) { - auto dev = std::make_shared<singa::CudaGPU>(); - singa::CudaGPU dev; - Tensor t(Shape{2}, &dev); ++ auto dev = std::make_shared<singa::CudaGPU>(); + Tensor t(Shape{2}, dev); d.CopyDataFromHostPtr(dat1, 6); - d.ToDevice(&dev); + d.ToDevice(dev); SumRows(d, &t); t.ToHost(); - const float *tptr = t.data<const float *>(); + const float *tptr = t.data<float>(); for (int i = 0; i < 2; i++) { float tmp = 0; for (int j = 0; j < 3; j++) { @@@ -481,16 -741,15 +743,16 @@@ } EXPECT_FLOAT_EQ(tptr[i], tmp); } - d.ToHost(); ++ d.ToHost(); } TEST_F(TestTensorMath, SumColumnCuda) { - auto dev = std::make_shared<singa::CudaGPU>(); - singa::CudaGPU dev; - Tensor t(Shape{3}, &dev); ++ auto dev = std::make_shared<singa::CudaGPU>(); + Tensor t(Shape{3}, dev); d.CopyDataFromHostPtr(dat1, 6); - d.ToDevice(&dev); + d.ToDevice(dev); SumColumns(d, &t); t.ToHost(); - const float *tptr = t.data<const float *>(); + const float *tptr = t.data<float>(); for (int i = 0; i < 3; i++) { float tmp = 0; for (int j = 0; j < 2; j++) { @@@ -498,6 -757,120 +760,121 @@@ } EXPECT_FLOAT_EQ(tptr[i], tmp); } - d.ToHost(); ++ d.ToHost(); } + + #endif + + TEST_F(TestTensorMath, ConcatenateRowsCpp) { + d.CopyDataFromHostPtr<float>(dat1, 6); + e.CopyDataFromHostPtr<float>(dat2, 6); + const auto ret = singa::ConcatenateRows(vector<Tensor>{d, e}); + EXPECT_EQ(ret.shape(0), d.shape(0) + e.shape(0)); + EXPECT_EQ(ret.shape(1), d.shape(1)); + const float *retPtr = ret.data<float>(); + for (int i = 0; i < 6; i++) EXPECT_FLOAT_EQ(retPtr[i], dat1[i]); + for (int i = 0; i < 6; i++) EXPECT_FLOAT_EQ(retPtr[i + 6], dat2[i]); + } + + TEST_F(TestTensorMath, ConcatenateColumnsCpp) { + d.CopyDataFromHostPtr<float>(dat1, 6); + e.CopyDataFromHostPtr<float>(dat2, 6); + const auto ret = singa::ConcatenateColumns(vector<Tensor>{d, e}); + EXPECT_EQ(ret.shape(0), d.shape(0)); + EXPECT_EQ(ret.shape(1), d.shape(1) + e.shape(1)); + + const float *retPtr = ret.data<float>(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) + EXPECT_FLOAT_EQ(retPtr[i * 4 + j], dat1[i * 2 + j]); + for (int j = 0; j < 2; j++) + EXPECT_FLOAT_EQ(retPtr[i * 4 + 2 + j], dat2[i * 2 + j]); + } + } + + TEST_F(TestTensorMath, CopyRowsCpp) { + const auto ret = singa::CopyRows(e, 1, 2); + EXPECT_EQ(ret.shape(0), 1u); + EXPECT_EQ(ret.shape(1), e.shape(1)); + const float *retPtr = ret.data<float>(); + for (size_t i = 0; i < ret.Size(); i++) + EXPECT_FLOAT_EQ(retPtr[i], dat1[1 * 2 + i]); + } + + TEST_F(TestTensorMath, CopyColumnsCpp) { + a.Reshape(Shape{2, 3}); + const auto ret = singa::CopyColumns(a, 1, 3); + EXPECT_EQ(ret.shape(0), a.shape(0)); + EXPECT_EQ(ret.shape(1), 2u); + const float *retPtr = ret.data<float>(); + for (size_t i = 0; i < ret.shape(0); i++) + for (size_t j = 0; j < ret.shape(1); j++) + EXPECT_FLOAT_EQ(retPtr[i * ret.shape(1) + j], + dat1[i * a.shape(1) + j + 1]); + } + + #ifdef USE_CUDA + + TEST_F(TestTensorMath, ConcatenateRowsCuda) { - singa::CudaGPU dev; - d.ToDevice(&dev); - e.ToDevice(&dev); ++ auto dev = std::make_shared<singa::CudaGPU>(); ++ d.ToDevice(dev); ++ e.ToDevice(dev); + d.CopyDataFromHostPtr<float>(dat1, 6); + e.CopyDataFromHostPtr<float>(dat2, 6); + auto ret = singa::ConcatenateRows(vector<Tensor>{d, e}); + EXPECT_EQ(ret.shape(0), d.shape(0) + e.shape(0)); + EXPECT_EQ(ret.shape(1), d.shape(1)); + ret.ToHost(); + const float *retPtr = ret.data<float>(); + for (int i = 0; i < 6; i++) EXPECT_FLOAT_EQ(retPtr[i], dat1[i]); + for (int i = 0; i < 6; i++) EXPECT_FLOAT_EQ(retPtr[i + 6], dat2[i]); + } + + TEST_F(TestTensorMath, ConcatenateColumnsCuda) { - singa::CudaGPU dev; - d.ToDevice(&dev); - e.ToDevice(&dev); ++ auto dev = std::make_shared<singa::CudaGPU>(); ++ d.ToDevice(dev); ++ e.ToDevice(dev); + d.CopyDataFromHostPtr<float>(dat1, 6); + e.CopyDataFromHostPtr<float>(dat2, 6); + auto ret = singa::ConcatenateColumns(vector<Tensor>{d, e}); + ret.ToHost(); + EXPECT_EQ(ret.shape(0), d.shape(0)); + EXPECT_EQ(ret.shape(1), d.shape(1) + e.shape(1)); + + const float *retPtr = ret.data<float>(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) + EXPECT_FLOAT_EQ(retPtr[i * 4 + j], dat1[i * 2 + j]); + for (int j = 0; j < 2; j++) + EXPECT_FLOAT_EQ(retPtr[i * 4 + 2 + j], dat2[i * 2 + j]); + } + } + + TEST_F(TestTensorMath, CopyRowsCuda) { - singa::CudaGPU dev; - e.ToDevice(&dev); ++ auto dev = std::make_shared<singa::CudaGPU>(); ++ e.ToDevice(dev); + auto ret = singa::CopyRows(e, 1, 2); + ret.ToHost(); + EXPECT_EQ(ret.shape(0), 1u); + EXPECT_EQ(ret.shape(1), e.shape(1)); + const float *retPtr = ret.data<float>(); + for (size_t i = 0; i < ret.Size(); i++) + EXPECT_FLOAT_EQ(retPtr[i], dat1[1 * 2 + i]); + } + + TEST_F(TestTensorMath, CopyColumnsCuda) { - singa::CudaGPU dev; ++ auto dev = std::make_shared<singa::CudaGPU>(); + a.Reshape(Shape{2, 3}); - a.ToDevice(&dev); ++ a.ToDevice(dev); + auto ret = singa::CopyColumns(a, 1, 3); + EXPECT_EQ(ret.shape(0), a.shape(0)); + EXPECT_EQ(ret.shape(1), 2u); + ret.ToHost(); + const float *retPtr = ret.data<float>(); + for (size_t i = 0; i < ret.shape(0); i++) + for (size_t j = 0; j < ret.shape(1); j++) + EXPECT_FLOAT_EQ(retPtr[i * ret.shape(1) + j], + dat1[i * a.shape(1) + j + 1]); + } + #endif
