Repository: incubator-singa Updated Branches: refs/heads/dev 4d596dde8 -> f488070e3
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f488070e/src/core/tensor/tensor_math_cuda.h ---------------------------------------------------------------------- diff --git a/src/core/tensor/tensor_math_cuda.h b/src/core/tensor/tensor_math_cuda.h index 9a8839e..3488b55 100644 --- a/src/core/tensor/tensor_math_cuda.h +++ b/src/core/tensor/tensor_math_cuda.h @@ -32,7 +32,7 @@ namespace singa { /// out[i] = |in[i]| template <> -void Abs<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out, +void Abs<float, lang::Cuda>(const size_t num, const Block* in, Block* out, Context* ctx) { const float* inPtr = static_cast<const float*>(in->data()); float* outPtr = static_cast<float*>(out->mutable_data()); @@ -40,16 +40,16 @@ void Abs<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out, } /// out = in + x template <> -void Add<float, lang::Cuda>(const size_t num, const Blob* in, const float x, - Blob* out, Context* ctx) { +void Add<float, lang::Cuda>(const size_t num, const Block* in, const float x, + Block* out, Context* ctx) { const float* inPtr = static_cast<const float*>(in->data()); float* outPtr = static_cast<float*>(out->mutable_data()); cuda::add(num, inPtr, x, outPtr, ctx->stream); } /// out = in1 + in2 template <> -void Add<float, lang::Cuda>(const size_t num, const Blob* in1, const Blob* in2, - Blob* out, Context* ctx) { +void Add<float, lang::Cuda>(const size_t num, const Block* in1, + const Block* in2, Block* out, Context* ctx) { const float* inPtr1 = static_cast<const float*>(in1->data()); const float* inPtr2 = static_cast<const float*>(in2->data()); float* outPtr = static_cast<float*>(out->mutable_data()); @@ -59,7 +59,7 @@ void Add<float, lang::Cuda>(const size_t num, const Blob* in1, const Blob* in2, /// if x>high, then x=high; if x<low, then x=low. template <> void Clamp<float, lang::Cuda>(const size_t num, const float low, - const float high, const Blob* in, Blob* out, + const float high, const Block* in, Block* out, Context* ctx) { const float* inPtr = static_cast<const float*>(in->data()); float* outPtr = static_cast<float*>(out->mutable_data()); @@ -67,8 +67,8 @@ void Clamp<float, lang::Cuda>(const size_t num, const float low, } /// out = in1 / in2 template <> -void Div<float, lang::Cuda>(const size_t num, const Blob* in1, const Blob* in2, - Blob* out, Context* ctx) { +void Div<float, lang::Cuda>(const size_t num, const Block* in1, + const Block* in2, Block* out, Context* ctx) { const float* inPtr1 = static_cast<const float*>(in1->data()); const float* inPtr2 = static_cast<const float*>(in2->data()); float* outPtr = static_cast<float*>(out->mutable_data()); @@ -76,8 +76,8 @@ void Div<float, lang::Cuda>(const size_t num, const Blob* in1, const Blob* in2, } template <> -void Div<float, lang::Cuda>(const size_t num, const float x, const Blob* in, - Blob* out, Context* ctx) { +void Div<float, lang::Cuda>(const size_t num, const float x, const Block* in, + Block* out, Context* ctx) { const float* inPtr = static_cast<const float*>(in->data()); float* outPtr = static_cast<float*>(out->mutable_data()); cuda::div(num, x, inPtr, outPtr, ctx->stream); @@ -85,16 +85,17 @@ void Div<float, lang::Cuda>(const size_t num, const float x, const Blob* in, /// out = in * x template <> -void EltwiseMult<float, lang::Cuda>(const size_t num, const Blob* in, - const float x, Blob* out, Context* ctx) { +void EltwiseMult<float, lang::Cuda>(const size_t num, const Block* in, + const float x, Block* out, Context* ctx) { const float* inPtr = static_cast<const float*>(in->data()); float* outPtr = static_cast<float*>(out->mutable_data()); cuda::mult(num, inPtr, x, outPtr, ctx->stream); } /// out = in1 * in2 template <> -void EltwiseMult<float, lang::Cuda>(const size_t num, const Blob* in1, - const Blob* in2, Blob* out, Context* ctx) { +void EltwiseMult<float, lang::Cuda>(const size_t num, const Block* in1, + const Block* in2, Block* out, + Context* ctx) { const float* inPtr1 = static_cast<const float*>(in1->data()); const float* inPtr2 = static_cast<const float*>(in2->data()); float* outPtr = static_cast<float*>(out->mutable_data()); @@ -102,7 +103,7 @@ void EltwiseMult<float, lang::Cuda>(const size_t num, const Blob* in1, } /// Base is e. out[i]=e^in[i] template <> -void Exp<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out, +void Exp<float, lang::Cuda>(const size_t num, const Block* in, Block* out, Context* ctx) { const float* inPtr = static_cast<const float*>(in->data()); float* outPtr = static_cast<float*>(out->mutable_data()); @@ -110,24 +111,24 @@ void Exp<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out, } template <> -void GE<float, lang::Cuda>(const size_t num, const Blob* in, const float x, - Blob* out, Context* ctx) { +void GE<float, lang::Cuda>(const size_t num, const Block* in, const float x, + Block* out, Context* ctx) { float* outPtr = static_cast<float*>(out->mutable_data()); const float* inPtr = static_cast<const float*>(in->data()); cuda::ge(num, inPtr, x, outPtr, ctx->stream); } template <> -void GT<float, lang::Cuda>(const size_t num, const Blob* in, const float x, - Blob* out, Context* ctx) { +void GT<float, lang::Cuda>(const size_t num, const Block* in, const float x, + Block* out, Context* ctx) { float* outPtr = static_cast<float*>(out->mutable_data()); const float* inPtr = static_cast<const float*>(in->data()); cuda::gt(num, inPtr, x, outPtr, ctx->stream); } template <> -void LE<float, lang::Cuda>(const size_t num, const Blob* in, const float x, - Blob* out, Context* ctx) { +void LE<float, lang::Cuda>(const size_t num, const Block* in, const float x, + Block* out, Context* ctx) { float* outPtr = static_cast<float*>(out->mutable_data()); const float* inPtr = static_cast<const float*>(in->data()); cuda::le(num, inPtr, x, outPtr, ctx->stream); @@ -135,15 +136,15 @@ void LE<float, lang::Cuda>(const size_t num, const Blob* in, const float x, /// Natual logarithm, the base is e, Neper number out[i]=ln(in[i]). template <> -void Log<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out, +void Log<float, lang::Cuda>(const size_t num, const Block* in, Block* out, Context* ctx) { const float* inPtr = static_cast<const float*>(in->data()); float* outPtr = static_cast<float*>(out->mutable_data()); cuda::log(num, inPtr, outPtr, ctx->stream); } template <> -void LT<float, lang::Cuda>(const size_t num, const Blob* in, const float x, - Blob* out, Context* ctx) { +void LT<float, lang::Cuda>(const size_t num, const Block* in, const float x, + Block* out, Context* ctx) { float* outPtr = static_cast<float*>(out->mutable_data()); const float* inPtr = static_cast<const float*>(in->data()); cuda::lt(num, inPtr, x, outPtr, ctx->stream); @@ -151,16 +152,16 @@ void LT<float, lang::Cuda>(const size_t num, const Blob* in, const float x, /// Element-wise operation, out[i] = in[i]^x template <> -void Pow<float, lang::Cuda>(const size_t num, const Blob* in, const float x, - Blob* out, Context* ctx) { +void Pow<float, lang::Cuda>(const size_t num, const Block* in, const float x, + Block* out, Context* ctx) { const float* inPtr = static_cast<const float*>(in->data()); float* outPtr = static_cast<float*>(out->mutable_data()); cuda::pow(num, inPtr, x, outPtr, ctx->stream); } /// Element-wise operation, out[i] = in1[i]^in2[i] template <> -void Pow<float, lang::Cuda>(const size_t num, const Blob* in1, const Blob* in2, - Blob* out, Context* ctx) { +void Pow<float, lang::Cuda>(const size_t num, const Block* in1, + const Block* in2, Block* out, Context* ctx) { const float* inPtr1 = static_cast<const float*>(in1->data()); const float* inPtr2 = static_cast<const float*>(in2->data()); float* outPtr = static_cast<float*>(out->mutable_data()); @@ -169,7 +170,7 @@ void Pow<float, lang::Cuda>(const size_t num, const Blob* in1, const Blob* in2, /// Element-wise operation, out[i]=max(0, in[i]) template <> -void ReLU<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out, +void ReLU<float, lang::Cuda>(const size_t num, const Block* in, Block* out, Context* ctx) { const float* inPtr = static_cast<const float*>(in->data()); float* outPtr = static_cast<float*>(out->mutable_data()); @@ -178,14 +179,14 @@ void ReLU<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out, /// out[i] = x template <> -void Set<float, lang::Cuda>(const size_t num, const float x, Blob* out, +void Set<float, lang::Cuda>(const size_t num, const float x, Block* out, Context* ctx) { float* outPtr = static_cast<float*>(out->mutable_data()); cuda::set(num, x, outPtr, ctx->stream); } /// Element-wise operation, out[i]=sigmoid([in[i]) template <> -void Sigmoid<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out, +void Sigmoid<float, lang::Cuda>(const size_t num, const Block* in, Block* out, Context* ctx) { const float* inPtr = static_cast<const float*>(in->data()); float* outPtr = static_cast<float*>(out->mutable_data()); @@ -193,7 +194,7 @@ void Sigmoid<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out, } // out[i] = sign(in[i]) template <> -void Sign<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out, +void Sign<float, lang::Cuda>(const size_t num, const Block* in, Block* out, Context* ctx) { const float* inPtr = static_cast<const float*>(in->data()); float* outPtr = static_cast<float*>(out->mutable_data()); @@ -202,7 +203,7 @@ void Sign<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out, /// Element-wise operation, out[i]=sqrt([in[i]) template <> -void Sqrt<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out, +void Sqrt<float, lang::Cuda>(const size_t num, const Block* in, Block* out, Context* ctx) { const float* inPtr = static_cast<const float*>(in->data()); float* outPtr = static_cast<float*>(out->mutable_data()); @@ -211,7 +212,7 @@ void Sqrt<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out, /// Element-wise operation, out[i]=in[i]^2 template <> -void Square<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out, +void Square<float, lang::Cuda>(const size_t num, const Block* in, Block* out, Context* ctx) { const float* inPtr = static_cast<const float*>(in->data()); float* outPtr = static_cast<float*>(out->mutable_data()); @@ -219,8 +220,8 @@ void Square<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out, } /// out = in1 - in2 template <> -void Sub<float, lang::Cuda>(const size_t num, const Blob* in1, const Blob* in2, - Blob* out, Context* ctx) { +void Sub<float, lang::Cuda>(const size_t num, const Block* in1, + const Block* in2, Block* out, Context* ctx) { const float* inPtr1 = static_cast<const float*>(in1->data()); const float* inPtr2 = static_cast<const float*>(in2->data()); float* outPtr = static_cast<float*>(out->mutable_data()); @@ -229,7 +230,7 @@ void Sub<float, lang::Cuda>(const size_t num, const Blob* in1, const Blob* in2, /// sum all elements of input into out template <> -void Sum<float, lang::Cuda>(const size_t num, const Blob* in, float* out, +void Sum<float, lang::Cuda>(const size_t num, const Block* in, float* out, Context* ctx) { const float* inPtr = static_cast<const float*>(in->data()); cuda::sum(num, inPtr, out, ctx->stream); @@ -237,7 +238,7 @@ void Sum<float, lang::Cuda>(const size_t num, const Blob* in, float* out, /// Element-wise operation, out[i]=tanh([in[i]) template <> -void Tanh<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out, +void Tanh<float, lang::Cuda>(const size_t num, const Block* in, Block* out, Context* ctx) { const float* inPtr = static_cast<const float*>(in->data()); float* outPtr = static_cast<float*>(out->mutable_data()); @@ -249,7 +250,7 @@ void Tanh<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out, // Get the random generator from 'ctx' // If DType is not float, then convert the threshold to DType template <> -void Bernoulli<float, lang::Cuda>(const size_t num, const float p, Blob* out, +void Bernoulli<float, lang::Cuda>(const size_t num, const float p, Block* out, Context* ctx) { auto rgen = ctx->curand_generator; float* outPtr = static_cast<float*>(out->mutable_data()); @@ -261,7 +262,7 @@ void Bernoulli<float, lang::Cuda>(const size_t num, const float p, Blob* out, // If DType is not float, then convert the low and high to DType template <> void Uniform<float, lang::Cuda>(const size_t num, const float low, - const float high, Blob* out, Context* ctx) { + const float high, Block* out, Context* ctx) { auto rgen = ctx->curand_generator; float* outPtr = static_cast<float*>(out->mutable_data()); CURAND_CHECK(curandGenerateUniform(rgen, outPtr, num)); @@ -273,7 +274,7 @@ void Uniform<float, lang::Cuda>(const size_t num, const float low, // If DType is not float, then convert the mean and delta to DType template <> void Gaussian<float, lang::Cuda>(const size_t num, const float mean, - const float std, Blob* out, Context* ctx) { + const float std, Block* out, Context* ctx) { auto rgen = ctx->curand_generator; float* outPtr = static_cast<float*>(out->mutable_data()); CURAND_CHECK(curandGenerateNormal(rgen, outPtr, num, mean, std)); @@ -282,7 +283,7 @@ void Gaussian<float, lang::Cuda>(const size_t num, const float mean, // =========================Blas operations================================== // ref to http://docs.nvidia.com/cuda/cublas template <> -void Amax<float, lang::Cuda>(const size_t num, const Blob* in, size_t* out, +void Amax<float, lang::Cuda>(const size_t num, const Block* in, size_t* out, Context* ctx) { const float* inPtr = static_cast<const float*>(in->data()); auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream @@ -293,7 +294,7 @@ void Amax<float, lang::Cuda>(const size_t num, const Blob* in, size_t* out, /// return the index of the element with the min value. template <> -void Amin<float, lang::Cuda>(const size_t num, const Blob* in, size_t* out, +void Amin<float, lang::Cuda>(const size_t num, const Block* in, size_t* out, Context* ctx) { const float* inPtr = static_cast<const float*>(in->data()); auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream @@ -304,7 +305,7 @@ void Amin<float, lang::Cuda>(const size_t num, const Blob* in, size_t* out, /// out = sum |x| for all x in in template <> -void Asum<float, lang::Cuda>(const size_t num, const Blob* in, float* out, +void Asum<float, lang::Cuda>(const size_t num, const Block* in, float* out, Context* ctx) { const float* inPtr = static_cast<const float*>(in->data()); auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream @@ -314,7 +315,7 @@ void Asum<float, lang::Cuda>(const size_t num, const Blob* in, float* out, /// out = alpha * in + out template <> void Axpy<float, lang::Cuda>(const size_t num, const float alpha, - const Blob* in, Blob* out, Context* ctx) { + const Block* in, Block* out, Context* ctx) { const float* inPtr = static_cast<const float*>(in->data()); float* outPtr = static_cast<float*>(out->mutable_data()); auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream @@ -323,22 +324,22 @@ void Axpy<float, lang::Cuda>(const size_t num, const float alpha, /// out = \sum_i in1[i] * in2[i] template <> -void Dot<float, lang::Cuda>(const size_t num, const Blob* in1, const Blob* in2, - float* out, Context* ctx) { +void Dot<float, lang::Cuda>(const size_t num, const Block* in1, + const Block* in2, float* out, Context* ctx) { const float* inPtr1 = static_cast<const float*>(in1->data()); const float* inPtr2 = static_cast<const float*>(in2->data()); auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream CUBLAS_CHECK(cublasSdot(handle, num, inPtr1, 1, inPtr2, 1, out)); } template <> -void Nrm2<float, lang::Cuda>(const size_t num, const Blob* in, float* out, +void Nrm2<float, lang::Cuda>(const size_t num, const Block* in, float* out, Context* ctx) { auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream const float* inPtr = static_cast<const float*>(in->data()); cublasSnrm2(handle, num, inPtr, 1, out); } template <> -void Scale<float, lang::Cuda>(const size_t num, const float x, Blob* out, +void Scale<float, lang::Cuda>(const size_t num, const float x, Block* out, Context* ctx) { auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream float* outPtr = static_cast<float*>(out->mutable_data()); @@ -348,8 +349,8 @@ void Scale<float, lang::Cuda>(const size_t num, const float x, Blob* out, // http://peterwittek.com/cublas-matrix-c-style.html template <> void DGMM<float, lang::Cuda>(const bool side_right, const size_t nrow, - const size_t ncol, const Blob* M, const Blob* v, - Blob* out, Context* ctx) { + const size_t ncol, const Block* M, const Block* v, + Block* out, Context* ctx) { auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream const float* MPtr = static_cast<const float*>(M->data()); const float* vPtr = static_cast<const float*>(v->data()); @@ -364,8 +365,8 @@ void DGMM<float, lang::Cuda>(const bool side_right, const size_t nrow, } template <> void GEMV<float, lang::Cuda>(bool trans, const size_t m, const size_t n, - const float alpha, const Blob* A, const Blob* v, - const float beta, Blob* out, Context* ctx) { + const float alpha, const Block* A, const Block* v, + const float beta, Block* out, Context* ctx) { const float* APtr = static_cast<const float*>(A->data()); const float* vPtr = static_cast<const float*>(v->data()); float* outPtr = static_cast<float*>(out->mutable_data()); @@ -383,8 +384,8 @@ template <> void GEMM<float, lang::Cuda>(const bool transA, const bool transB, const size_t nrowA, const size_t ncolB, const size_t ncolA, const float alpha, - const Blob* A, const Blob* B, const float beta, - Blob* C, Context* ctx) { + const Block* A, const Block* B, const float beta, + Block* C, Context* ctx) { auto transa = transA ? CUBLAS_OP_T : CUBLAS_OP_N; auto transb = transB ? CUBLAS_OP_T : CUBLAS_OP_N; int lda = transA ? nrowA : ncolA; @@ -400,23 +401,23 @@ void GEMM<float, lang::Cuda>(const bool transA, const bool transB, template <> void ComputeCrossEntropy<float, lang::Cuda>(const size_t batchsize, - const size_t dim, const Blob *p, - const Blob *t, Blob *loss, - Context *ctx) { - const float *pPtr = static_cast<const float *>(p->data()); - const int *tPtr = static_cast<const int *>(t->data()); - float *lossPtr = static_cast<float *>(loss->mutable_data()); + const size_t dim, const Block* p, + const Block* t, Block* loss, + Context* ctx) { + const float* pPtr = static_cast<const float*>(p->data()); + const int* tPtr = static_cast<const int*>(t->data()); + float* lossPtr = static_cast<float*>(loss->mutable_data()); cuda::ComputeCrossEntropy(batchsize, dim, pPtr, tPtr, lossPtr, ctx->stream); } template <> void SoftmaxCrossEntropyBwd<float, lang::Cuda>(const size_t batchsize, - const size_t dim, const Blob *p, - const Blob *t, Blob *grad, - Context *ctx) { + const size_t dim, const Block* p, + const Block* t, Block* grad, + Context* ctx) { CHECK_EQ(p, grad) << "Use the same pointer to optimize performance"; - const float *pPtr = static_cast<const float *>(p->data()); - const int *tPtr = static_cast<const int *>(t->data()); - float *gradPtr = static_cast<float *>(grad->mutable_data()); + const float* pPtr = static_cast<const float*>(p->data()); + const int* tPtr = static_cast<const int*>(t->data()); + float* gradPtr = static_cast<float*>(grad->mutable_data()); cuda::SoftmaxCrossEntropyBwd(batchsize, dim, pPtr, tPtr, gradPtr, ctx->stream); } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f488070e/src/model/layer/cudnn_activation.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/cudnn_activation.cc b/src/model/layer/cudnn_activation.cc index 8ecbbc7..98a5758 100644 --- a/src/model/layer/cudnn_activation.cc +++ b/src/model/layer/cudnn_activation.cc @@ -63,18 +63,18 @@ const Tensor CudnnActivation::Forward(int flag, const Tensor& input) { Tensor output; output.ResetLike(input); output.device()->Exec([input, output, this](Context* ctx) { - Blob* inblob = input.blob(), * outblob = output.blob(); + Block* inblock = input.block(), * outblock = output.block(); float alpha = 1.0f, beta = 0.0f; #if CUDNN_VERSION_MAJOR == 5 CUDNN_CHECK(cudnnActivationForward( ctx->cudnn_handle, this->acti_desc_, &alpha, this->desc_, - inblob->data(), &beta, this->desc_, outblob->mutable_data())); + inblock->data(), &beta, this->desc_, outblock->mutable_data())); #elif CUDNN_VERSION_MAJOR == 4 CUDNN_CHECK(cudnnActivationForward_v4( ctx->cudnn_handle, this->acti_desc_, &alpha, this->desc_, - inblob->data(), &beta, this->desc_, outblob->mutable_data())); + inblock->data(), &beta, this->desc_, outblock->mutable_data())); #endif - }, {input.blob()}, {output.blob()}); + }, {input.block()}, {output.block()}); if (flag & kTrain) { if (cudnn_mode_ == CUDNN_ACTIVATION_SIGMOID || cudnn_mode_ == CUDNN_ACTIVATION_TANH) { @@ -97,21 +97,21 @@ const std::pair<Tensor, vector<Tensor>> CudnnActivation::Backward( buf_.pop(); dx.ResetLike(grad); dx.device()->Exec([dx, grad, inout, this](Context* ctx) { - Blob* dyblob = grad.blob(), * dxblob = dx.blob(), * yblob = inout.blob(), - * xblob = inout.blob(); + Block* dyblock = grad.block(), * dxblock = dx.block(), + * yblock = inout.block(), * xblock = inout.block(); float alpha = 1.0f, beta = 0.0f; #if CUDNN_VERSION_MAJOR == 5 CUDNN_CHECK(cudnnActivationBackward( - ctx->cudnn_handle, this->acti_desc_, &alpha, this->desc_, yblob->data(), - this->desc_, dyblob->data(), this->desc_, xblob->data(), &beta, - this->desc_, dxblob->mutable_data())); + ctx->cudnn_handle, this->acti_desc_, &alpha, this->desc_, + yblock->data(), this->desc_, dyblock->data(), this->desc_, + xblock->data(), &beta, this->desc_, dxblock->mutable_data())); #elif CUDNN_VERSION_MAJOR == 4 CUDNN_CHECK(cudnnActivationBackward_v4( - ctx->cudnn_handle, this->acti_desc_, &alpha, this->desc_, yblob->data(), - this->desc_, dyblob->data(), this->desc_, xblob->data(), &beta, - this->desc_, dxblob->mutable_data())); + ctx->cudnn_handle, this->acti_desc_, &alpha, this->desc_, yblock->data(), + this->desc_, dyblock->data(), this->desc_, xblock->data(), &beta, + this->desc_, dxblock->mutable_data())); #endif - }, {grad.blob(), inout.blob()}, {dx.blob()}); + }, {grad.block(), inout.block()}, {dx.block()}); return std::make_pair(dx, param_grad); } } // namespace singa http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f488070e/src/model/layer/cudnn_batchnorm.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/cudnn_batchnorm.cc b/src/model/layer/cudnn_batchnorm.cc index 8288a41..1393916 100644 --- a/src/model/layer/cudnn_batchnorm.cc +++ b/src/model/layer/cudnn_batchnorm.cc @@ -81,13 +81,13 @@ const Tensor CudnnBatchNorm::Forward(int flag, const Tensor& input) { if ((flag & kTrain) == kTrain) { output.device()->Exec( [=](Context* ctx) { - Blob *inBlob = input.blob(), *outBlob = output.blob(), - *saveMeanBlob = resultSaveMean_.blob(), - *saveVarBlob = resultSaveVariance_.blob(), - *runningMeanBlob = runningMean_.blob(), - *runningVarBlob = runningVariance_.blob(), - *bnScaleBlob = bnScale_.blob(), - *bnBiasBlob = bnBias_.blob(); + Block *inBlock = input.block(), *outBlock = output.block(), + *saveMeanBlock = resultSaveMean_.block(), + *saveVarBlock = resultSaveVariance_.block(), + *runningMeanBlock = runningMean_.block(), + *runningVarBlock = runningVariance_.block(), + *bnScaleBlock = bnScale_.block(), + *bnBiasBlock = bnBias_.block(); const float alpha = 1.0f, beta = 0.0f; double epsilon = CUDNN_BN_MIN_EPSILON; CUDNN_CHECK(cudnnBatchNormalizationForwardTraining( @@ -96,36 +96,36 @@ const Tensor CudnnBatchNorm::Forward(int flag, const Tensor& input) { &alpha, &beta, shape_desc_, - inBlob->data(), + inBlock->data(), shape_desc_, - outBlob->mutable_data(), + outBlock->mutable_data(), param_desc_, - bnScaleBlob->data(), - bnBiasBlob->data(), + bnScaleBlock->data(), + bnBiasBlock->data(), factor_, - runningMeanBlob->mutable_data(), - runningVarBlob->mutable_data(), + runningMeanBlock->mutable_data(), + runningVarBlock->mutable_data(), epsilon, - saveMeanBlob->mutable_data(), - saveVarBlob->mutable_data())); + saveMeanBlock->mutable_data(), + saveVarBlock->mutable_data())); }, - {input.blob(), - bnScale_.blob(), - bnBias_.blob()}, - {output.blob(), - runningMean_.blob(), - runningVariance_.blob(), - resultSaveMean_.blob(), - resultSaveVariance_.blob()}); + {input.block(), + bnScale_.block(), + bnBias_.block()}, + {output.block(), + runningMean_.block(), + runningVariance_.block(), + resultSaveMean_.block(), + resultSaveVariance_.block()}); buf_.push(input); } else { output.device()->Exec( [=](Context* ctx) { - Blob *inBlob = input.blob(), *outBlob = output.blob(), - *runningMeanBlob = runningMean_.blob(), - *runningVarBlob = runningVariance_.blob(), - *bnScaleBlob = bnScale_.blob(), - *bnBiasBlob = bnBias_.blob(); + Block *inBlock = input.block(), *outBlock = output.block(), + *runningMeanBlock = runningMean_.block(), + *runningVarBlock = runningVariance_.block(), + *bnScaleBlock = bnScale_.block(), + *bnBiasBlock = bnBias_.block(); const float alpha = 1.0f, beta = 0.0f; double epsilon = CUDNN_BN_MIN_EPSILON; CUDNN_CHECK(cudnnBatchNormalizationForwardInference( @@ -134,22 +134,22 @@ const Tensor CudnnBatchNorm::Forward(int flag, const Tensor& input) { &alpha, &beta, shape_desc_, - inBlob->data(), + inBlock->data(), shape_desc_, - outBlob->mutable_data(), + outBlock->mutable_data(), param_desc_, - bnScaleBlob->data(), - bnBiasBlob->data(), - runningMeanBlob->data(), - runningVarBlob->data(), + bnScaleBlock->data(), + bnBiasBlock->data(), + runningMeanBlock->data(), + runningVarBlock->data(), epsilon)); }, - {input.blob(), - bnScale_.blob(), - bnBias_.blob(), - runningMean_.blob(), - runningVariance_.blob()}, - {output.blob()}); + {input.block(), + bnScale_.block(), + bnBias_.block(), + runningMean_.block(), + runningVariance_.block()}, + {output.block()}); } return output; } @@ -164,13 +164,13 @@ const std::pair<Tensor, vector<Tensor>> CudnnBatchNorm::Backward( dx.ResetLike(grad); dx.device()->Exec( [=](Context* ctx) { - Blob *dyblob = grad.blob(), *dxblob = dx.blob(), - *xblob = input.blob(), - *bnScaleBlob = bnScale_.blob(), - *dbnScaleBlob = dbnScale_.blob(), - *dbnBiasBlob = dbnBias_.blob(), - *saveMeanBlob = resultSaveMean_.blob(), - *saveVarBlob = resultSaveVariance_.blob(); + Block *dyblock = grad.block(), *dxblock = dx.block(), + *xblock = input.block(), + *bnScaleBlock = bnScale_.block(), + *dbnScaleBlock = dbnScale_.block(), + *dbnBiasBlock = dbnBias_.block(), + *saveMeanBlock = resultSaveMean_.block(), + *saveVarBlock = resultSaveVariance_.block(); const float alpha = 1.0f, beta = .0f; double epsilon = CUDNN_BN_MIN_EPSILON; CUDNN_CHECK(cudnnBatchNormalizationBackward(ctx->cudnn_handle, @@ -180,28 +180,28 @@ const std::pair<Tensor, vector<Tensor>> CudnnBatchNorm::Backward( &alpha, &beta, shape_desc_, - xblob->data(), + xblock->data(), shape_desc_, - dyblob->data(), + dyblock->data(), shape_desc_, - dxblob->mutable_data(), + dxblock->mutable_data(), param_desc_, - bnScaleBlob->data(), - dbnScaleBlob->mutable_data(), - dbnBiasBlob->mutable_data(), + bnScaleBlock->data(), + dbnScaleBlock->mutable_data(), + dbnBiasBlock->mutable_data(), epsilon, - saveMeanBlob->data(), - saveVarBlob->data())); + saveMeanBlock->data(), + saveVarBlock->data())); }, - {dx.blob(), - grad.blob(), - bnScale_.blob(), - resultSaveMean_.blob(), - resultSaveVariance_.blob()}, - {dx.blob(), - dbnScale_.blob(), - dbnBias_.blob()}); + {dx.block(), + grad.block(), + bnScale_.block(), + resultSaveMean_.block(), + resultSaveVariance_.block()}, + {dx.block(), + dbnScale_.block(), + dbnBias_.block()}); } else { LOG(ERROR) << "Do not call backward for evaluation phase"; } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f488070e/src/model/layer/cudnn_convolution.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/cudnn_convolution.cc b/src/model/layer/cudnn_convolution.cc index b80c3bd..efc7f88 100644 --- a/src/model/layer/cudnn_convolution.cc +++ b/src/model/layer/cudnn_convolution.cc @@ -167,30 +167,26 @@ const Tensor CudnnConvolution::Forward(int flag, const Tensor &input) { Shape shape{batchsize, num_filters_, conv_height_, conv_width_}; Tensor output(shape, dev, dtype); - output.device()->Exec( - [input, output, this](Context *ctx) { - Blob *inblob = input.blob(), *outblob = output.blob(), - *wblob = this->weight_.blob(); - float alpha = 1.f, beta = 0.f; - cudnnConvolutionForward(ctx->cudnn_handle, &alpha, this->x_desc_, - inblob->data(), this->filter_desc_, - wblob->data(), this->conv_desc_, this->fp_alg_, - this->workspace_.blob()->mutable_data(), - this->workspace_count_ * sizeof(float), &beta, - this->y_desc_, outblob->mutable_data()); - }, - {input.blob(), weight_.blob()}, {output.blob()}, workspace_.blob()); + output.device()->Exec([input, output, this](Context *ctx) { + Block *inblock = input.block(), *outblock = output.block(), + *wblock = this->weight_.block(); + float alpha = 1.f, beta = 0.f; + cudnnConvolutionForward(ctx->cudnn_handle, &alpha, this->x_desc_, + inblock->data(), this->filter_desc_, wblock->data(), + this->conv_desc_, this->fp_alg_, + this->workspace_.block()->mutable_data(), + this->workspace_count_ * sizeof(float), &beta, + this->y_desc_, outblock->mutable_data()); + }, {input.block(), weight_.block()}, {output.block()}, workspace_.block()); if (bias_term_) { - output.device()->Exec( - [output, this](Context *ctx) { - float beta = 1.f, alpha = 1.0f; - Blob *outblob = output.blob(), *bblob = this->bias_.blob(); - cudnnAddTensor(ctx->cudnn_handle, &alpha, this->bias_desc_, - bblob->data(), &beta, this->y_desc_, - outblob->mutable_data()); - }, - {output.blob(), bias_.blob()}, {output.blob()}); + output.device()->Exec([output, this](Context *ctx) { + float beta = 1.f, alpha = 1.0f; + Block *outblock = output.block(), *bblock = this->bias_.block(); + cudnnAddTensor(ctx->cudnn_handle, &alpha, this->bias_desc_, + bblock->data(), &beta, this->y_desc_, + outblock->mutable_data()); + }, {output.block(), bias_.block()}, {output.block()}); } return output; } @@ -212,45 +208,39 @@ const std::pair<Tensor, vector<Tensor>> CudnnConvolution::Backward( // LOG(ERROR) << "backward bias"; if (bias_term_) { - dx.device()->Exec( - [grad, db, this](Context *ctx) { - Blob *dyblob = grad.blob(), *dbblob = db.blob(); - float alpha = 1.f, beta = 0.f; - cudnnConvolutionBackwardBias(ctx->cudnn_handle, &alpha, this->y_desc_, - dyblob->data(), &beta, this->bias_desc_, - dbblob->mutable_data()); - }, - {grad.blob()}, {db.blob()}); + dx.device()->Exec([grad, db, this](Context *ctx) { + Block *dyblock = grad.block(), *dbblock = db.block(); + float alpha = 1.f, beta = 0.f; + cudnnConvolutionBackwardBias(ctx->cudnn_handle, &alpha, this->y_desc_, + dyblock->data(), &beta, this->bias_desc_, + dbblock->mutable_data()); + }, {grad.block()}, {db.block()}); } // LOG(ERROR) << "backward w"; - dx.device()->Exec( - [grad, dw, src_data, this](Context *ctx) { - Blob *inblob = src_data.blob(), *dyblob = grad.blob(), - *dwblob = dw.blob(); - float alpha = 1.f, beta = 0.f; - cudnnConvolutionBackwardFilter( - ctx->cudnn_handle, &alpha, this->x_desc_, inblob->data(), - this->y_desc_, dyblob->data(), this->conv_desc_, - this->bp_filter_alg_, this->workspace_.blob()->mutable_data(), - this->workspace_count_ * sizeof(float), &beta, this->filter_desc_, - dwblob->mutable_data()); - }, - {grad.blob(), src_data.blob()}, {dw.blob(), workspace_.blob()}); + dx.device()->Exec([grad, dw, src_data, this](Context *ctx) { + Block *inblock = src_data.block(), *dyblock = grad.block(), + *dwblock = dw.block(); + float alpha = 1.f, beta = 0.f; + cudnnConvolutionBackwardFilter( + ctx->cudnn_handle, &alpha, this->x_desc_, inblock->data(), + this->y_desc_, dyblock->data(), this->conv_desc_, this->bp_filter_alg_, + this->workspace_.block()->mutable_data(), + this->workspace_count_ * sizeof(float), &beta, this->filter_desc_, + dwblock->mutable_data()); + }, {grad.block(), src_data.block()}, {dw.block(), workspace_.block()}); // LOG(ERROR) << "backward src"; - dx.device()->Exec( - [dx, grad, this](Context *ctx) { - Blob *wblob = this->weight_.blob(), *dyblob = grad.blob(), - *dxblob = dx.blob(); - float alpha = 1.f, beta = 0.f; - cudnnConvolutionBackwardData( - ctx->cudnn_handle, &alpha, this->filter_desc_, wblob->data(), - this->y_desc_, dyblob->data(), this->conv_desc_, this->bp_data_alg_, - this->workspace_.blob()->mutable_data(), - this->workspace_count_ * sizeof(float), &beta, this->x_desc_, - dxblob->mutable_data()); - }, - {grad.blob(), weight_.blob()}, {dx.blob(), workspace_.blob()}); + dx.device()->Exec([dx, grad, this](Context *ctx) { + Block *wblock = this->weight_.block(), *dyblock = grad.block(), + *dxblock = dx.block(); + float alpha = 1.f, beta = 0.f; + cudnnConvolutionBackwardData(ctx->cudnn_handle, &alpha, this->filter_desc_, + wblock->data(), this->y_desc_, dyblock->data(), + this->conv_desc_, this->bp_data_alg_, + this->workspace_.block()->mutable_data(), + this->workspace_count_ * sizeof(float), &beta, + this->x_desc_, dxblock->mutable_data()); + }, {grad.block(), weight_.block()}, {dx.block(), workspace_.block()}); param_grad.push_back(dw); param_grad.push_back(db); return std::make_pair(dx, param_grad); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f488070e/src/model/layer/cudnn_dropout.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/cudnn_dropout.cc b/src/model/layer/cudnn_dropout.cc index 64a581b..877dd12 100644 --- a/src/model/layer/cudnn_dropout.cc +++ b/src/model/layer/cudnn_dropout.cc @@ -57,7 +57,7 @@ void CudnnDropout::InitCudnn(int size, DataType dtype, Device* dev, // TODO(wangwei) get seed from ctx or user config? auto seed = std::chrono::system_clock::now().time_since_epoch().count(); cudnnSetDropoutDescriptor(drop_desc_, ctx->cudnn_handle, 1 - dropout_ratio_, - state_.blob()->mutable_data(), state_size_, seed); + state_.block()->mutable_data(), state_size_, seed); has_init_cudnn_ = true; } @@ -67,24 +67,20 @@ const Tensor CudnnDropout::Forward(int flag, const Tensor& input) { DataType dtype = input.data_type(); Device* dev = input.device(); if (!has_init_cudnn_) { - input.device()->Exec( - [size, dtype, this, dev](Context* ctx) { - this->InitCudnn(size, dtype, dev, ctx); - }, - {}, {this->state_.blob()}); + input.device()->Exec([size, dtype, this, dev](Context* ctx) { + this->InitCudnn(size, dtype, dev, ctx); + }, {}, {this->state_.block()}); } Tensor output; output.ResetLike(input); - output.device()->Exec( - [input, output, this](Context* ctx) { - Blob *inblob = input.blob(), *outblob = output.blob(), - *mblob = mask_.blob(); - cudnnDropoutForward(ctx->cudnn_handle, this->drop_desc_, - this->x_desc_, inblob->data(), this->y_desc_, - outblob->mutable_data(), mblob->mutable_data(), - this->reserve_size_); - }, - {input.blob()}, {output.blob(), mask_.blob()}); + output.device()->Exec([input, output, this](Context* ctx) { + Block* inblock = input.block(), * outblock = output.block(), + * mblock = mask_.block(); + cudnnDropoutForward(ctx->cudnn_handle, this->drop_desc_, this->x_desc_, + inblock->data(), this->y_desc_, + outblock->mutable_data(), mblock->mutable_data(), + this->reserve_size_); + }, {input.block()}, {output.block(), mask_.block()}); return output; } else { return input; @@ -97,16 +93,14 @@ const std::pair<Tensor, vector<Tensor>> CudnnDropout::Backward( Tensor dx; if (flag & kTrain) { dx.ResetLike(grad); - dx.device()->Exec( - [dx, grad, this](Context* ctx) { - Blob *dyblob = grad.blob(), *dxblob = dx.blob(), - *mblob = this->mask_.blob(); - cudnnDropoutBackward(ctx->cudnn_handle, this->drop_desc_, - this->y_desc_, dyblob->data(), this->x_desc_, - dxblob->mutable_data(), mblob->mutable_data(), - this->reserve_size_); - }, - {grad.blob(), mask_.blob()}, {dx.blob()}); + dx.device()->Exec([dx, grad, this](Context* ctx) { + Block* dyblock = grad.block(), * dxblock = dx.block(), + * mblock = this->mask_.block(); + cudnnDropoutBackward(ctx->cudnn_handle, this->drop_desc_, this->y_desc_, + dyblock->data(), this->x_desc_, + dxblock->mutable_data(), mblock->mutable_data(), + this->reserve_size_); + }, {grad.block(), mask_.block()}, {dx.block()}); } else { LOG(ERROR) << "Do not call backward for evaluation phase"; } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f488070e/src/model/layer/cudnn_lrn.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/cudnn_lrn.cc b/src/model/layer/cudnn_lrn.cc index 1012813..4dbf426 100644 --- a/src/model/layer/cudnn_lrn.cc +++ b/src/model/layer/cudnn_lrn.cc @@ -29,47 +29,32 @@ CudnnLRN::~CudnnLRN() { CUDNN_CHECK(cudnnDestroyTensorDescriptor(shape_desc_)); } } -void CudnnLRN::InitCudnn(const Shape& shape , DataType dtype) { +void CudnnLRN::InitCudnn(const Shape& shape, DataType dtype) { CHECK(!has_init_cudnn_); mode_ = CUDNN_LRN_CROSS_CHANNEL_DIM1; CUDNN_CHECK(cudnnCreateTensorDescriptor(&shape_desc_)); CHECK_EQ(shape.size(), 4u); - CUDNN_CHECK(cudnnSetTensor4dDescriptor(shape_desc_, - CUDNN_TENSOR_NCHW, - GetCudnnDataType(dtype), - shape[0], - shape[1], - shape[2], - shape[3])); + CUDNN_CHECK(cudnnSetTensor4dDescriptor(shape_desc_, CUDNN_TENSOR_NCHW, + GetCudnnDataType(dtype), shape[0], + shape[1], shape[2], shape[3])); CUDNN_CHECK(cudnnCreateLRNDescriptor(&lrn_desc_)); - CUDNN_CHECK(cudnnSetLRNDescriptor(lrn_desc_, - local_size_, - alpha_, - beta_, - k_)); + CUDNN_CHECK(cudnnSetLRNDescriptor(lrn_desc_, local_size_, alpha_, beta_, k_)); has_init_cudnn_ = true; } const Tensor CudnnLRN::Forward(int flag, const Tensor& input) { auto shape = input.shape(); auto dtype = input.data_type(); - if (!has_init_cudnn_) - InitCudnn(shape, dtype); + if (!has_init_cudnn_) InitCudnn(shape, dtype); Tensor output; output.ResetLike(input); - output.device()->Exec( - [=](Context* ctx) { - Blob *inblob = input.blob(), *outblob = output.blob(); - const float alpha = 1.0f, beta = 0.0f; - CUDNN_CHECK(cudnnLRNCrossChannelForward(ctx->cudnn_handle, - this->lrn_desc_, - this->mode_, - &alpha, - this->shape_desc_, - inblob->data(), - &beta, - this->shape_desc_, - outblob->mutable_data())); - }, {input.blob()}, {output.blob()}); + output.device()->Exec([=](Context* ctx) { + Block* inblock = input.block(), * outblock = output.block(); + const float alpha = 1.0f, beta = 0.0f; + CUDNN_CHECK(cudnnLRNCrossChannelForward( + ctx->cudnn_handle, this->lrn_desc_, this->mode_, &alpha, + this->shape_desc_, inblock->data(), &beta, this->shape_desc_, + outblock->mutable_data())); + }, {input.block()}, {output.block()}); if (flag & kTrain) { buf_.push(input); @@ -78,9 +63,9 @@ const Tensor CudnnLRN::Forward(int flag, const Tensor& input) { return output; } -const std::pair<Tensor, vector<Tensor>> CudnnLRN::Backward( - int flag, const Tensor& grad) { - vector <Tensor> param_grad; +const std::pair<Tensor, vector<Tensor>> CudnnLRN::Backward(int flag, + const Tensor& grad) { + vector<Tensor> param_grad; Tensor dx; CHECK(!buf_.empty()); Tensor output = buf_.top(); @@ -89,25 +74,16 @@ const std::pair<Tensor, vector<Tensor>> CudnnLRN::Backward( buf_.pop(); if ((flag & kTrain) == kTrain) { dx.ResetLike(grad); - dx.device()->Exec( - [=](Context *ctx) { - Blob *dyblob = grad.blob(), *dxblob = dx.blob(); - Blob *yblob = output.blob(), *xblob = input.blob(); - float alpha = 1.0f, beta = 0.0f; - CUDNN_CHECK(cudnnLRNCrossChannelBackward(ctx->cudnn_handle, - this->lrn_desc_, - this->mode_, - &alpha, - this->shape_desc_, - yblob->data(), - this->shape_desc_, - dyblob->data(), - this->shape_desc_, - xblob->data(), - &beta, - this->shape_desc_, - dxblob->mutable_data())); - }, {output.blob(), grad.blob(), input.blob()}, {dx.blob()}); + dx.device()->Exec([=](Context* ctx) { + Block* dyblock = grad.block(), * dxblock = dx.block(); + Block* yblock = output.block(), * xblock = input.block(); + float alpha = 1.0f, beta = 0.0f; + CUDNN_CHECK(cudnnLRNCrossChannelBackward( + ctx->cudnn_handle, this->lrn_desc_, this->mode_, &alpha, + this->shape_desc_, yblock->data(), this->shape_desc_, dyblock->data(), + this->shape_desc_, xblock->data(), &beta, this->shape_desc_, + dxblock->mutable_data())); + }, {output.block(), grad.block(), input.block()}, {dx.block()}); } else { LOG(ERROR) << "Do not call backward for evaluation phase"; } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f488070e/src/model/layer/cudnn_pooling.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/cudnn_pooling.cc b/src/model/layer/cudnn_pooling.cc index 842685d..fb8256a 100644 --- a/src/model/layer/cudnn_pooling.cc +++ b/src/model/layer/cudnn_pooling.cc @@ -41,7 +41,7 @@ void CudnnPooling::Setup(const LayerConf &conf) { nan_prop_ = CUDNN_NOT_PROPAGATE_NAN; } -void CudnnPooling::InitCudnn(const Tensor& input) { +void CudnnPooling::InitCudnn(const Tensor &input) { CHECK(!has_init_cudnn_); DataType dtype = input.data_type(); size_t batchsize = input.shape(0); @@ -53,8 +53,8 @@ void CudnnPooling::InitCudnn(const Tensor& input) { GetCudnnDataType(dtype), batchsize, channels_, height_, width_)); CUDNN_CHECK(cudnnSetTensor4dDescriptor( - y_desc_, CUDNN_TENSOR_NCHW, GetCudnnDataType(dtype), batchsize, - channels_, pooled_height_, pooled_width_)); + y_desc_, CUDNN_TENSOR_NCHW, GetCudnnDataType(dtype), batchsize, channels_, + pooled_height_, pooled_width_)); auto pool_method = CUDNN_POOLING_MAX; if (pool_ == PoolingConf_PoolMethod_MAX) pool_method = CUDNN_POOLING_MAX; @@ -87,15 +87,13 @@ const Tensor CudnnPooling::Forward(int flag, const Tensor &input) { Shape shape{batchsize, channels_, pooled_height_, pooled_width_}; Tensor output = Tensor(shape, dev, dtype); - output.device()->Exec( - [input, output, this](Context *ctx) { - Blob *inblob = input.blob(), *outblob = output.blob(); - float alpha = 1.0f, beta = 0.0f; - cudnnPoolingForward(ctx->cudnn_handle, this->pool_desc_, &alpha, - this->x_desc_, inblob->data(), &beta, this->y_desc_, - outblob->mutable_data()); - }, - {input.blob()}, {output.blob()}); + output.device()->Exec([input, output, this](Context *ctx) { + Block *inblock = input.block(), *outblock = output.block(); + float alpha = 1.0f, beta = 0.0f; + cudnnPoolingForward(ctx->cudnn_handle, this->pool_desc_, &alpha, + this->x_desc_, inblock->data(), &beta, this->y_desc_, + outblock->mutable_data()); + }, {input.block()}, {output.block()}); if (flag & kTrain) { buf_.push(input); buf_.push(output); @@ -116,17 +114,15 @@ const std::pair<Tensor, vector<Tensor>> CudnnPooling::Backward( Tensor dx; dx.ResetLike(x); - dx.device()->Exec( - [dx, grad, x, y, this](Context *ctx) { - Blob *dyblob = grad.blob(), *dxblob = dx.blob(), *yblob = y.blob(), - *xblob = x.blob(); - float alpha = 1.0f, beta = 0.0f; - cudnnPoolingBackward(ctx->cudnn_handle, this->pool_desc_, &alpha, - this->y_desc_, yblob->data(), this->y_desc_, - dyblob->data(), this->x_desc_, xblob->data(), - &beta, this->x_desc_, dxblob->mutable_data()); - }, - {grad.blob(), y.blob(), x.blob()}, {dx.blob()}); + dx.device()->Exec([dx, grad, x, y, this](Context *ctx) { + Block *dyblock = grad.block(), *dxblock = dx.block(), *yblock = y.block(), + *xblock = x.block(); + float alpha = 1.0f, beta = 0.0f; + cudnnPoolingBackward(ctx->cudnn_handle, this->pool_desc_, &alpha, + this->y_desc_, yblock->data(), this->y_desc_, + dyblock->data(), this->x_desc_, xblock->data(), &beta, + this->x_desc_, dxblock->mutable_data()); + }, {grad.block(), y.block(), x.block()}, {dx.block()}); return std::make_pair(dx, param_grad); } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f488070e/src/model/layer/cudnn_softmax.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/cudnn_softmax.cc b/src/model/layer/cudnn_softmax.cc index 85b0c3d..16d4022 100644 --- a/src/model/layer/cudnn_softmax.cc +++ b/src/model/layer/cudnn_softmax.cc @@ -47,14 +47,13 @@ const Tensor CudnnSoftmax::Forward(int flag, const Tensor& input) { Tensor output; output.ResetLike(input); output.device()->Exec([input, output, this](Context* ctx) { - Blob* inblob = input.blob(), * outblob = output.blob(); + Block* inblock = input.block(), * outblock = output.block(); float alpha = 1.0f, beta = 0.0f; cudnnSoftmaxForward(ctx->cudnn_handle, this->algorithm_, this->mode_, - &alpha, this->desc_, inblob->data(), &beta, this->desc_, - outblob->mutable_data()); - }, {input.blob()}, {output.blob()}); - if (flag & kTrain) - buf_.push(output); + &alpha, this->desc_, inblock->data(), &beta, + this->desc_, outblock->mutable_data()); + }, {input.block()}, {output.block()}); + if (flag & kTrain) buf_.push(output); return output; } @@ -66,13 +65,14 @@ const std::pair<Tensor, vector<Tensor>> CudnnSoftmax::Backward( buf_.pop(); dx.ResetLike(grad); dx.device()->Exec([dx, grad, output, this](Context* ctx) { - Blob* dyblob = grad.blob(), * dxblob = dx.blob(), * yblob = output.blob(); + Block* dyblock = grad.block(), * dxblock = dx.block(), + * yblock = output.block(); float alpha = 1.0f, beta = 0.0f; cudnnSoftmaxBackward(ctx->cudnn_handle, this->algorithm_, this->mode_, - &alpha, this->desc_, yblob->data(), this->desc_, - dyblob->data(), &beta, this->desc_, - dxblob->mutable_data()); - }, {grad.blob(), output.blob()}, {dx.blob()}); + &alpha, this->desc_, yblock->data(), this->desc_, + dyblock->data(), &beta, this->desc_, + dxblock->mutable_data()); + }, {grad.block(), output.block()}, {dx.block()}); return std::make_pair(dx, param_grad); } } // namespace singa http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f488070e/test/singa/test_cpp_cpu.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_cpp_cpu.cc b/test/singa/test_cpp_cpu.cc index 86654e1..ec5c7e1 100644 --- a/test/singa/test_cpp_cpu.cc +++ b/test/singa/test_cpp_cpu.cc @@ -24,7 +24,7 @@ #include "singa/proto/core.pb.h" using singa::CppCPU; -using singa::Blob; +using singa::Block; TEST(CppCPU, Constructor) { CppCPU dev(0, 1); EXPECT_EQ(0, dev.id()); @@ -32,15 +32,15 @@ TEST(CppCPU, Constructor) { TEST(CppCPU, MemoryMallocFree) { CppCPU dev(0, 1); - Blob* b = dev.NewBlob(4); + Block* b = dev.NewBlock(4); EXPECT_NE(nullptr, b); EXPECT_EQ(4u, b->size()); - dev.FreeBlob(b); + dev.FreeBlock(b); } TEST(CppCPU, Exec) { CppCPU dev(0, 1); - Blob* b = dev.NewBlob(4); + Block* b = dev.NewBlock(4); int x = 1, y =3, z = 0; dev.Exec([x, y, &z](singa::Context *ctx) { z = x + y; @@ -50,7 +50,7 @@ TEST(CppCPU, Exec) { TEST(CppCPU, CopyData) { CppCPU dev(0, 1); - Blob* b = dev.NewBlob(4); + Block* b = dev.NewBlock(4); char s[] = {'a', 'b', 'c', 'x'}; dev.CopyDataFromHostPtr(b, s, 4); const char* bstr = static_cast<const char*>(b->data()); @@ -58,14 +58,14 @@ TEST(CppCPU, CopyData) { EXPECT_EQ('b', bstr[1]); EXPECT_EQ('x', bstr[3]); - Blob* c = dev.NewBlob(4); + Block* c = dev.NewBlock(4); dev.CopyDataToFrom(c, b, 4, singa::kHostToHost, 0, 0); const char* cstr = static_cast<const char*>(c->data()); EXPECT_EQ('a', cstr[0]); EXPECT_EQ('b', cstr[1]); EXPECT_EQ('x', cstr[3]); - dev.FreeBlob(b); - dev.FreeBlob(c); + dev.FreeBlock(b); + dev.FreeBlock(c); } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f488070e/test/singa/test_tensor.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_tensor.cc b/test/singa/test_tensor.cc index bd039ad..2cce336 100644 --- a/test/singa/test_tensor.cc +++ b/test/singa/test_tensor.cc @@ -18,17 +18,17 @@ TEST(TensorTest, TestConstructor) { singa::Tensor float16_t(Shape{2,3}, singa::kFloat16); EXPECT_EQ(singa::kFloat16, float16_t.data_type()); EXPECT_EQ(6u, float16_t.Size()); - EXPECT_EQ(12u, float16_t.blob()->size()); + EXPECT_EQ(12u, float16_t.block()->size()); singa::Tensor x(float16_t); EXPECT_EQ(float16_t.Size(), x.Size()); - EXPECT_EQ(float16_t.blob(), x.blob()); + EXPECT_EQ(float16_t.block(), x.block()); EXPECT_EQ(float16_t.data_type(), x.data_type()); EXPECT_EQ(float16_t.device(), x.device()); singa::Tensor y = float16_t; EXPECT_EQ(float16_t.Size(), x.Size()); - EXPECT_EQ(float16_t.blob(), x.blob()); + EXPECT_EQ(float16_t.block(), x.block()); EXPECT_EQ(float16_t.data_type(), x.data_type()); EXPECT_EQ(float16_t.device(), x.device()); } @@ -69,7 +69,7 @@ TEST(TensorClass, CopyDataFromHostPtr) { float data[] = {1.0f, 2.0f, 3.0f}; Tensor t(Shape{3}); t.CopyDataFromHostPtr(data, 3); - const float* dptr = static_cast<const float*>(t.blob()->data()); + const float* dptr = static_cast<const float*>(t.block()->data()); EXPECT_FLOAT_EQ(1.0f, dptr[0]); EXPECT_FLOAT_EQ(2.0f, dptr[1]); EXPECT_FLOAT_EQ(3.0f, dptr[2]); @@ -82,7 +82,7 @@ TEST(TensorClass, CopyData) { Tensor o(Shape{3}); o.CopyData(t); - const float* dptr = static_cast<const float*>(o.blob()->data()); + const float* dptr = static_cast<const float*>(o.block()->data()); EXPECT_FLOAT_EQ(1.0f, dptr[0]); EXPECT_FLOAT_EQ(2.0f, dptr[1]); EXPECT_FLOAT_EQ(3.0f, dptr[2]); @@ -94,7 +94,7 @@ TEST(TensorClass, Clone) { t.CopyDataFromHostPtr(data, 3); Tensor o = t.Clone(); - const float* dptr = static_cast<const float*>(o.blob()->data()); + const float* dptr = static_cast<const float*>(o.block()->data()); EXPECT_FLOAT_EQ(1.0f, dptr[0]); EXPECT_FLOAT_EQ(2.0f, dptr[1]); EXPECT_FLOAT_EQ(3.0f, dptr[2]); @@ -105,7 +105,7 @@ TEST(TensorClass, T) { EXPECT_FALSE(t.transpose()); Tensor o = t.T(); EXPECT_EQ(true, o.transpose()); - EXPECT_EQ(t.blob(), o.blob()); + EXPECT_EQ(t.block(), o.block()); EXPECT_EQ(t.data_type(), o.data_type()); EXPECT_EQ(t.shape()[0], o.shape()[1]); EXPECT_EQ(t.shape()[1], o.shape()[0]);
