Repository: incubator-singa
Updated Branches:
  refs/heads/dev 4d596dde8 -> f488070e3


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f488070e/src/core/tensor/tensor_math_cuda.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math_cuda.h 
b/src/core/tensor/tensor_math_cuda.h
index 9a8839e..3488b55 100644
--- a/src/core/tensor/tensor_math_cuda.h
+++ b/src/core/tensor/tensor_math_cuda.h
@@ -32,7 +32,7 @@ namespace singa {
 
 /// out[i] = |in[i]|
 template <>
-void Abs<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out,
+void Abs<float, lang::Cuda>(const size_t num, const Block* in, Block* out,
                             Context* ctx) {
   const float* inPtr = static_cast<const float*>(in->data());
   float* outPtr = static_cast<float*>(out->mutable_data());
@@ -40,16 +40,16 @@ void Abs<float, lang::Cuda>(const size_t num, const Blob* 
in, Blob* out,
 }
 /// out = in + x
 template <>
-void Add<float, lang::Cuda>(const size_t num, const Blob* in, const float x,
-                            Blob* out, Context* ctx) {
+void Add<float, lang::Cuda>(const size_t num, const Block* in, const float x,
+                            Block* out, Context* ctx) {
   const float* inPtr = static_cast<const float*>(in->data());
   float* outPtr = static_cast<float*>(out->mutable_data());
   cuda::add(num, inPtr, x, outPtr, ctx->stream);
 }
 /// out = in1 + in2
 template <>
-void Add<float, lang::Cuda>(const size_t num, const Blob* in1, const Blob* in2,
-                            Blob* out, Context* ctx) {
+void Add<float, lang::Cuda>(const size_t num, const Block* in1,
+                            const Block* in2, Block* out, Context* ctx) {
   const float* inPtr1 = static_cast<const float*>(in1->data());
   const float* inPtr2 = static_cast<const float*>(in2->data());
   float* outPtr = static_cast<float*>(out->mutable_data());
@@ -59,7 +59,7 @@ void Add<float, lang::Cuda>(const size_t num, const Blob* 
in1, const Blob* in2,
 /// if x>high, then x=high; if x<low, then x=low.
 template <>
 void Clamp<float, lang::Cuda>(const size_t num, const float low,
-                              const float high, const Blob* in, Blob* out,
+                              const float high, const Block* in, Block* out,
                               Context* ctx) {
   const float* inPtr = static_cast<const float*>(in->data());
   float* outPtr = static_cast<float*>(out->mutable_data());
@@ -67,8 +67,8 @@ void Clamp<float, lang::Cuda>(const size_t num, const float 
low,
 }
 /// out = in1 / in2
 template <>
-void Div<float, lang::Cuda>(const size_t num, const Blob* in1, const Blob* in2,
-                            Blob* out, Context* ctx) {
+void Div<float, lang::Cuda>(const size_t num, const Block* in1,
+                            const Block* in2, Block* out, Context* ctx) {
   const float* inPtr1 = static_cast<const float*>(in1->data());
   const float* inPtr2 = static_cast<const float*>(in2->data());
   float* outPtr = static_cast<float*>(out->mutable_data());
@@ -76,8 +76,8 @@ void Div<float, lang::Cuda>(const size_t num, const Blob* 
in1, const Blob* in2,
 }
 
 template <>
-void Div<float, lang::Cuda>(const size_t num, const float x, const Blob* in,
-                            Blob* out, Context* ctx) {
+void Div<float, lang::Cuda>(const size_t num, const float x, const Block* in,
+                            Block* out, Context* ctx) {
   const float* inPtr = static_cast<const float*>(in->data());
   float* outPtr = static_cast<float*>(out->mutable_data());
   cuda::div(num, x, inPtr, outPtr, ctx->stream);
@@ -85,16 +85,17 @@ void Div<float, lang::Cuda>(const size_t num, const float 
x, const Blob* in,
 
 /// out = in * x
 template <>
-void EltwiseMult<float, lang::Cuda>(const size_t num, const Blob* in,
-                                    const float x, Blob* out, Context* ctx) {
+void EltwiseMult<float, lang::Cuda>(const size_t num, const Block* in,
+                                    const float x, Block* out, Context* ctx) {
   const float* inPtr = static_cast<const float*>(in->data());
   float* outPtr = static_cast<float*>(out->mutable_data());
   cuda::mult(num, inPtr, x, outPtr, ctx->stream);
 }
 /// out = in1 * in2
 template <>
-void EltwiseMult<float, lang::Cuda>(const size_t num, const Blob* in1,
-                                    const Blob* in2, Blob* out, Context* ctx) {
+void EltwiseMult<float, lang::Cuda>(const size_t num, const Block* in1,
+                                    const Block* in2, Block* out,
+                                    Context* ctx) {
   const float* inPtr1 = static_cast<const float*>(in1->data());
   const float* inPtr2 = static_cast<const float*>(in2->data());
   float* outPtr = static_cast<float*>(out->mutable_data());
@@ -102,7 +103,7 @@ void EltwiseMult<float, lang::Cuda>(const size_t num, const 
Blob* in1,
 }
 /// Base is e. out[i]=e^in[i]
 template <>
-void Exp<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out,
+void Exp<float, lang::Cuda>(const size_t num, const Block* in, Block* out,
                             Context* ctx) {
   const float* inPtr = static_cast<const float*>(in->data());
   float* outPtr = static_cast<float*>(out->mutable_data());
@@ -110,24 +111,24 @@ void Exp<float, lang::Cuda>(const size_t num, const Blob* 
in, Blob* out,
 }
 
 template <>
-void GE<float, lang::Cuda>(const size_t num, const Blob* in, const float x,
-                           Blob* out, Context* ctx) {
+void GE<float, lang::Cuda>(const size_t num, const Block* in, const float x,
+                           Block* out, Context* ctx) {
   float* outPtr = static_cast<float*>(out->mutable_data());
   const float* inPtr = static_cast<const float*>(in->data());
   cuda::ge(num, inPtr, x, outPtr, ctx->stream);
 }
 
 template <>
-void GT<float, lang::Cuda>(const size_t num, const Blob* in, const float x,
-                           Blob* out, Context* ctx) {
+void GT<float, lang::Cuda>(const size_t num, const Block* in, const float x,
+                           Block* out, Context* ctx) {
   float* outPtr = static_cast<float*>(out->mutable_data());
   const float* inPtr = static_cast<const float*>(in->data());
   cuda::gt(num, inPtr, x, outPtr, ctx->stream);
 }
 
 template <>
-void LE<float, lang::Cuda>(const size_t num, const Blob* in, const float x,
-                           Blob* out, Context* ctx) {
+void LE<float, lang::Cuda>(const size_t num, const Block* in, const float x,
+                           Block* out, Context* ctx) {
   float* outPtr = static_cast<float*>(out->mutable_data());
   const float* inPtr = static_cast<const float*>(in->data());
   cuda::le(num, inPtr, x, outPtr, ctx->stream);
@@ -135,15 +136,15 @@ void LE<float, lang::Cuda>(const size_t num, const Blob* 
in, const float x,
 
 /// Natual logarithm, the base is e, Neper number out[i]=ln(in[i]).
 template <>
-void Log<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out,
+void Log<float, lang::Cuda>(const size_t num, const Block* in, Block* out,
                             Context* ctx) {
   const float* inPtr = static_cast<const float*>(in->data());
   float* outPtr = static_cast<float*>(out->mutable_data());
   cuda::log(num, inPtr, outPtr, ctx->stream);
 }
 template <>
-void LT<float, lang::Cuda>(const size_t num, const Blob* in, const float x,
-                           Blob* out, Context* ctx) {
+void LT<float, lang::Cuda>(const size_t num, const Block* in, const float x,
+                           Block* out, Context* ctx) {
   float* outPtr = static_cast<float*>(out->mutable_data());
   const float* inPtr = static_cast<const float*>(in->data());
   cuda::lt(num, inPtr, x, outPtr, ctx->stream);
@@ -151,16 +152,16 @@ void LT<float, lang::Cuda>(const size_t num, const Blob* 
in, const float x,
 
 /// Element-wise operation, out[i] = in[i]^x
 template <>
-void Pow<float, lang::Cuda>(const size_t num, const Blob* in, const float x,
-                            Blob* out, Context* ctx) {
+void Pow<float, lang::Cuda>(const size_t num, const Block* in, const float x,
+                            Block* out, Context* ctx) {
   const float* inPtr = static_cast<const float*>(in->data());
   float* outPtr = static_cast<float*>(out->mutable_data());
   cuda::pow(num, inPtr, x, outPtr, ctx->stream);
 }
 /// Element-wise operation, out[i] = in1[i]^in2[i]
 template <>
-void Pow<float, lang::Cuda>(const size_t num, const Blob* in1, const Blob* in2,
-                            Blob* out, Context* ctx) {
+void Pow<float, lang::Cuda>(const size_t num, const Block* in1,
+                            const Block* in2, Block* out, Context* ctx) {
   const float* inPtr1 = static_cast<const float*>(in1->data());
   const float* inPtr2 = static_cast<const float*>(in2->data());
   float* outPtr = static_cast<float*>(out->mutable_data());
@@ -169,7 +170,7 @@ void Pow<float, lang::Cuda>(const size_t num, const Blob* 
in1, const Blob* in2,
 
 /// Element-wise operation, out[i]=max(0, in[i])
 template <>
-void ReLU<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out,
+void ReLU<float, lang::Cuda>(const size_t num, const Block* in, Block* out,
                              Context* ctx) {
   const float* inPtr = static_cast<const float*>(in->data());
   float* outPtr = static_cast<float*>(out->mutable_data());
@@ -178,14 +179,14 @@ void ReLU<float, lang::Cuda>(const size_t num, const 
Blob* in, Blob* out,
 
 /// out[i] = x
 template <>
-void Set<float, lang::Cuda>(const size_t num, const float x, Blob* out,
+void Set<float, lang::Cuda>(const size_t num, const float x, Block* out,
                             Context* ctx) {
   float* outPtr = static_cast<float*>(out->mutable_data());
   cuda::set(num, x, outPtr, ctx->stream);
 }
 /// Element-wise operation, out[i]=sigmoid([in[i])
 template <>
-void Sigmoid<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out,
+void Sigmoid<float, lang::Cuda>(const size_t num, const Block* in, Block* out,
                                 Context* ctx) {
   const float* inPtr = static_cast<const float*>(in->data());
   float* outPtr = static_cast<float*>(out->mutable_data());
@@ -193,7 +194,7 @@ void Sigmoid<float, lang::Cuda>(const size_t num, const 
Blob* in, Blob* out,
 }
 // out[i] = sign(in[i])
 template <>
-void Sign<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out,
+void Sign<float, lang::Cuda>(const size_t num, const Block* in, Block* out,
                              Context* ctx) {
   const float* inPtr = static_cast<const float*>(in->data());
   float* outPtr = static_cast<float*>(out->mutable_data());
@@ -202,7 +203,7 @@ void Sign<float, lang::Cuda>(const size_t num, const Blob* 
in, Blob* out,
 
 /// Element-wise operation, out[i]=sqrt([in[i])
 template <>
-void Sqrt<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out,
+void Sqrt<float, lang::Cuda>(const size_t num, const Block* in, Block* out,
                              Context* ctx) {
   const float* inPtr = static_cast<const float*>(in->data());
   float* outPtr = static_cast<float*>(out->mutable_data());
@@ -211,7 +212,7 @@ void Sqrt<float, lang::Cuda>(const size_t num, const Blob* 
in, Blob* out,
 
 /// Element-wise operation, out[i]=in[i]^2
 template <>
-void Square<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out,
+void Square<float, lang::Cuda>(const size_t num, const Block* in, Block* out,
                                Context* ctx) {
   const float* inPtr = static_cast<const float*>(in->data());
   float* outPtr = static_cast<float*>(out->mutable_data());
@@ -219,8 +220,8 @@ void Square<float, lang::Cuda>(const size_t num, const 
Blob* in, Blob* out,
 }
 /// out = in1 - in2
 template <>
-void Sub<float, lang::Cuda>(const size_t num, const Blob* in1, const Blob* in2,
-                            Blob* out, Context* ctx) {
+void Sub<float, lang::Cuda>(const size_t num, const Block* in1,
+                            const Block* in2, Block* out, Context* ctx) {
   const float* inPtr1 = static_cast<const float*>(in1->data());
   const float* inPtr2 = static_cast<const float*>(in2->data());
   float* outPtr = static_cast<float*>(out->mutable_data());
@@ -229,7 +230,7 @@ void Sub<float, lang::Cuda>(const size_t num, const Blob* 
in1, const Blob* in2,
 
 /// sum all elements of input into out
 template <>
-void Sum<float, lang::Cuda>(const size_t num, const Blob* in, float* out,
+void Sum<float, lang::Cuda>(const size_t num, const Block* in, float* out,
                             Context* ctx) {
   const float* inPtr = static_cast<const float*>(in->data());
   cuda::sum(num, inPtr, out, ctx->stream);
@@ -237,7 +238,7 @@ void Sum<float, lang::Cuda>(const size_t num, const Blob* 
in, float* out,
 
 /// Element-wise operation, out[i]=tanh([in[i])
 template <>
-void Tanh<float, lang::Cuda>(const size_t num, const Blob* in, Blob* out,
+void Tanh<float, lang::Cuda>(const size_t num, const Block* in, Block* out,
                              Context* ctx) {
   const float* inPtr = static_cast<const float*>(in->data());
   float* outPtr = static_cast<float*>(out->mutable_data());
@@ -249,7 +250,7 @@ void Tanh<float, lang::Cuda>(const size_t num, const Blob* 
in, Blob* out,
 // Get the random generator from 'ctx'
 // If DType is not float, then convert the threshold to DType
 template <>
-void Bernoulli<float, lang::Cuda>(const size_t num, const float p, Blob* out,
+void Bernoulli<float, lang::Cuda>(const size_t num, const float p, Block* out,
                                   Context* ctx) {
   auto rgen = ctx->curand_generator;
   float* outPtr = static_cast<float*>(out->mutable_data());
@@ -261,7 +262,7 @@ void Bernoulli<float, lang::Cuda>(const size_t num, const 
float p, Blob* out,
 // If DType is not float, then convert the low and high to DType
 template <>
 void Uniform<float, lang::Cuda>(const size_t num, const float low,
-                                const float high, Blob* out, Context* ctx) {
+                                const float high, Block* out, Context* ctx) {
   auto rgen = ctx->curand_generator;
   float* outPtr = static_cast<float*>(out->mutable_data());
   CURAND_CHECK(curandGenerateUniform(rgen, outPtr, num));
@@ -273,7 +274,7 @@ void Uniform<float, lang::Cuda>(const size_t num, const 
float low,
 // If DType is not float, then convert the mean and delta to DType
 template <>
 void Gaussian<float, lang::Cuda>(const size_t num, const float mean,
-                                 const float std, Blob* out, Context* ctx) {
+                                 const float std, Block* out, Context* ctx) {
   auto rgen = ctx->curand_generator;
   float* outPtr = static_cast<float*>(out->mutable_data());
   CURAND_CHECK(curandGenerateNormal(rgen, outPtr, num, mean, std));
@@ -282,7 +283,7 @@ void Gaussian<float, lang::Cuda>(const size_t num, const 
float mean,
 // =========================Blas operations==================================
 // ref to http://docs.nvidia.com/cuda/cublas
 template <>
-void Amax<float, lang::Cuda>(const size_t num, const Blob* in, size_t* out,
+void Amax<float, lang::Cuda>(const size_t num, const Block* in, size_t* out,
                              Context* ctx) {
   const float* inPtr = static_cast<const float*>(in->data());
   auto handle = ctx->cublas_handle;  // TODO(wangwei) set cudastream
@@ -293,7 +294,7 @@ void Amax<float, lang::Cuda>(const size_t num, const Blob* 
in, size_t* out,
 
 /// return the index of the element with the min value.
 template <>
-void Amin<float, lang::Cuda>(const size_t num, const Blob* in, size_t* out,
+void Amin<float, lang::Cuda>(const size_t num, const Block* in, size_t* out,
                              Context* ctx) {
   const float* inPtr = static_cast<const float*>(in->data());
   auto handle = ctx->cublas_handle;  // TODO(wangwei) set cudastream
@@ -304,7 +305,7 @@ void Amin<float, lang::Cuda>(const size_t num, const Blob* 
in, size_t* out,
 
 /// out = sum |x| for all x in in
 template <>
-void Asum<float, lang::Cuda>(const size_t num, const Blob* in, float* out,
+void Asum<float, lang::Cuda>(const size_t num, const Block* in, float* out,
                              Context* ctx) {
   const float* inPtr = static_cast<const float*>(in->data());
   auto handle = ctx->cublas_handle;  // TODO(wangwei) set cudastream
@@ -314,7 +315,7 @@ void Asum<float, lang::Cuda>(const size_t num, const Blob* 
in, float* out,
 /// out = alpha * in + out
 template <>
 void Axpy<float, lang::Cuda>(const size_t num, const float alpha,
-                             const Blob* in, Blob* out, Context* ctx) {
+                             const Block* in, Block* out, Context* ctx) {
   const float* inPtr = static_cast<const float*>(in->data());
   float* outPtr = static_cast<float*>(out->mutable_data());
   auto handle = ctx->cublas_handle;  // TODO(wangwei) set cudastream
@@ -323,22 +324,22 @@ void Axpy<float, lang::Cuda>(const size_t num, const 
float alpha,
 
 /// out = \sum_i in1[i] * in2[i]
 template <>
-void Dot<float, lang::Cuda>(const size_t num, const Blob* in1, const Blob* in2,
-                            float* out, Context* ctx) {
+void Dot<float, lang::Cuda>(const size_t num, const Block* in1,
+                            const Block* in2, float* out, Context* ctx) {
   const float* inPtr1 = static_cast<const float*>(in1->data());
   const float* inPtr2 = static_cast<const float*>(in2->data());
   auto handle = ctx->cublas_handle;  // TODO(wangwei) set cudastream
   CUBLAS_CHECK(cublasSdot(handle, num, inPtr1, 1, inPtr2, 1, out));
 }
 template <>
-void Nrm2<float, lang::Cuda>(const size_t num, const Blob* in, float* out,
+void Nrm2<float, lang::Cuda>(const size_t num, const Block* in, float* out,
                              Context* ctx) {
   auto handle = ctx->cublas_handle;  // TODO(wangwei) set cudastream
   const float* inPtr = static_cast<const float*>(in->data());
   cublasSnrm2(handle, num, inPtr, 1, out);
 }
 template <>
-void Scale<float, lang::Cuda>(const size_t num, const float x, Blob* out,
+void Scale<float, lang::Cuda>(const size_t num, const float x, Block* out,
                               Context* ctx) {
   auto handle = ctx->cublas_handle;  // TODO(wangwei) set cudastream
   float* outPtr = static_cast<float*>(out->mutable_data());
@@ -348,8 +349,8 @@ void Scale<float, lang::Cuda>(const size_t num, const float 
x, Blob* out,
 // http://peterwittek.com/cublas-matrix-c-style.html
 template <>
 void DGMM<float, lang::Cuda>(const bool side_right, const size_t nrow,
-                             const size_t ncol, const Blob* M, const Blob* v,
-                             Blob* out, Context* ctx) {
+                             const size_t ncol, const Block* M, const Block* v,
+                             Block* out, Context* ctx) {
   auto handle = ctx->cublas_handle;  // TODO(wangwei) set cudastream
   const float* MPtr = static_cast<const float*>(M->data());
   const float* vPtr = static_cast<const float*>(v->data());
@@ -364,8 +365,8 @@ void DGMM<float, lang::Cuda>(const bool side_right, const 
size_t nrow,
 }
 template <>
 void GEMV<float, lang::Cuda>(bool trans, const size_t m, const size_t n,
-                             const float alpha, const Blob* A, const Blob* v,
-                             const float beta, Blob* out, Context* ctx) {
+                             const float alpha, const Block* A, const Block* v,
+                             const float beta, Block* out, Context* ctx) {
   const float* APtr = static_cast<const float*>(A->data());
   const float* vPtr = static_cast<const float*>(v->data());
   float* outPtr = static_cast<float*>(out->mutable_data());
@@ -383,8 +384,8 @@ template <>
 void GEMM<float, lang::Cuda>(const bool transA, const bool transB,
                              const size_t nrowA, const size_t ncolB,
                              const size_t ncolA, const float alpha,
-                             const Blob* A, const Blob* B, const float beta,
-                             Blob* C, Context* ctx) {
+                             const Block* A, const Block* B, const float beta,
+                             Block* C, Context* ctx) {
   auto transa = transA ? CUBLAS_OP_T : CUBLAS_OP_N;
   auto transb = transB ? CUBLAS_OP_T : CUBLAS_OP_N;
   int lda = transA ? nrowA : ncolA;
@@ -400,23 +401,23 @@ void GEMM<float, lang::Cuda>(const bool transA, const 
bool transB,
 
 template <>
 void ComputeCrossEntropy<float, lang::Cuda>(const size_t batchsize,
-                                            const size_t dim, const Blob *p,
-                                            const Blob *t, Blob *loss,
-                                            Context *ctx) {
-  const float *pPtr = static_cast<const float *>(p->data());
-  const int *tPtr = static_cast<const int *>(t->data());
-  float *lossPtr = static_cast<float *>(loss->mutable_data());
+                                            const size_t dim, const Block* p,
+                                            const Block* t, Block* loss,
+                                            Context* ctx) {
+  const float* pPtr = static_cast<const float*>(p->data());
+  const int* tPtr = static_cast<const int*>(t->data());
+  float* lossPtr = static_cast<float*>(loss->mutable_data());
   cuda::ComputeCrossEntropy(batchsize, dim, pPtr, tPtr, lossPtr, ctx->stream);
 }
 template <>
 void SoftmaxCrossEntropyBwd<float, lang::Cuda>(const size_t batchsize,
-                                               const size_t dim, const Blob *p,
-                                               const Blob *t, Blob *grad,
-                                               Context *ctx) {
+                                               const size_t dim, const Block* 
p,
+                                               const Block* t, Block* grad,
+                                               Context* ctx) {
   CHECK_EQ(p, grad) << "Use the same pointer to optimize performance";
-  const float *pPtr = static_cast<const float *>(p->data());
-  const int *tPtr = static_cast<const int *>(t->data());
-  float *gradPtr = static_cast<float *>(grad->mutable_data());
+  const float* pPtr = static_cast<const float*>(p->data());
+  const int* tPtr = static_cast<const int*>(t->data());
+  float* gradPtr = static_cast<float*>(grad->mutable_data());
   cuda::SoftmaxCrossEntropyBwd(batchsize, dim, pPtr, tPtr, gradPtr,
                                ctx->stream);
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f488070e/src/model/layer/cudnn_activation.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_activation.cc 
b/src/model/layer/cudnn_activation.cc
index 8ecbbc7..98a5758 100644
--- a/src/model/layer/cudnn_activation.cc
+++ b/src/model/layer/cudnn_activation.cc
@@ -63,18 +63,18 @@ const Tensor CudnnActivation::Forward(int flag, const 
Tensor& input) {
   Tensor output;
   output.ResetLike(input);
   output.device()->Exec([input, output, this](Context* ctx) {
-    Blob* inblob = input.blob(), * outblob = output.blob();
+    Block* inblock = input.block(), * outblock = output.block();
     float alpha = 1.0f, beta = 0.0f;
 #if CUDNN_VERSION_MAJOR == 5
     CUDNN_CHECK(cudnnActivationForward(
         ctx->cudnn_handle, this->acti_desc_, &alpha, this->desc_,
-        inblob->data(), &beta, this->desc_, outblob->mutable_data()));
+        inblock->data(), &beta, this->desc_, outblock->mutable_data()));
 #elif CUDNN_VERSION_MAJOR == 4
     CUDNN_CHECK(cudnnActivationForward_v4(
         ctx->cudnn_handle, this->acti_desc_, &alpha, this->desc_,
-        inblob->data(), &beta, this->desc_, outblob->mutable_data()));
+        inblock->data(), &beta, this->desc_, outblock->mutable_data()));
 #endif
-  }, {input.blob()}, {output.blob()});
+  }, {input.block()}, {output.block()});
   if (flag & kTrain) {
     if (cudnn_mode_ == CUDNN_ACTIVATION_SIGMOID ||
         cudnn_mode_ == CUDNN_ACTIVATION_TANH) {
@@ -97,21 +97,21 @@ const std::pair<Tensor, vector<Tensor>> 
CudnnActivation::Backward(
   buf_.pop();
   dx.ResetLike(grad);
   dx.device()->Exec([dx, grad, inout, this](Context* ctx) {
-    Blob* dyblob = grad.blob(), * dxblob = dx.blob(), * yblob = inout.blob(),
-          * xblob = inout.blob();
+    Block* dyblock = grad.block(), * dxblock = dx.block(),
+           * yblock = inout.block(), * xblock = inout.block();
     float alpha = 1.0f, beta = 0.0f;
 #if CUDNN_VERSION_MAJOR == 5
     CUDNN_CHECK(cudnnActivationBackward(
-        ctx->cudnn_handle, this->acti_desc_, &alpha, this->desc_, 
yblob->data(),
-        this->desc_, dyblob->data(), this->desc_, xblob->data(), &beta,
-        this->desc_, dxblob->mutable_data()));
+        ctx->cudnn_handle, this->acti_desc_, &alpha, this->desc_,
+        yblock->data(), this->desc_, dyblock->data(), this->desc_,
+        xblock->data(), &beta, this->desc_, dxblock->mutable_data()));
 #elif CUDNN_VERSION_MAJOR == 4
     CUDNN_CHECK(cudnnActivationBackward_v4(
-        ctx->cudnn_handle, this->acti_desc_, &alpha, this->desc_, 
yblob->data(),
-        this->desc_, dyblob->data(), this->desc_, xblob->data(), &beta,
-        this->desc_, dxblob->mutable_data()));
+        ctx->cudnn_handle, this->acti_desc_, &alpha, this->desc_, 
yblock->data(),
+        this->desc_, dyblock->data(), this->desc_, xblock->data(), &beta,
+        this->desc_, dxblock->mutable_data()));
 #endif
-  }, {grad.blob(), inout.blob()}, {dx.blob()});
+  }, {grad.block(), inout.block()}, {dx.block()});
   return std::make_pair(dx, param_grad);
 }
 }  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f488070e/src/model/layer/cudnn_batchnorm.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_batchnorm.cc 
b/src/model/layer/cudnn_batchnorm.cc
index 8288a41..1393916 100644
--- a/src/model/layer/cudnn_batchnorm.cc
+++ b/src/model/layer/cudnn_batchnorm.cc
@@ -81,13 +81,13 @@ const Tensor CudnnBatchNorm::Forward(int flag, const 
Tensor& input) {
   if ((flag & kTrain) == kTrain) {
     output.device()->Exec(
         [=](Context* ctx) {
-          Blob *inBlob = input.blob(), *outBlob = output.blob(),
-            *saveMeanBlob = resultSaveMean_.blob(),
-            *saveVarBlob = resultSaveVariance_.blob(),
-            *runningMeanBlob = runningMean_.blob(),
-            *runningVarBlob = runningVariance_.blob(),
-            *bnScaleBlob = bnScale_.blob(),
-            *bnBiasBlob = bnBias_.blob();
+          Block *inBlock = input.block(), *outBlock = output.block(),
+            *saveMeanBlock = resultSaveMean_.block(),
+            *saveVarBlock = resultSaveVariance_.block(),
+            *runningMeanBlock = runningMean_.block(),
+            *runningVarBlock = runningVariance_.block(),
+            *bnScaleBlock = bnScale_.block(),
+            *bnBiasBlock = bnBias_.block();
           const float alpha = 1.0f, beta = 0.0f;
           double epsilon = CUDNN_BN_MIN_EPSILON;
           CUDNN_CHECK(cudnnBatchNormalizationForwardTraining(
@@ -96,36 +96,36 @@ const Tensor CudnnBatchNorm::Forward(int flag, const 
Tensor& input) {
               &alpha,
               &beta,
               shape_desc_,
-              inBlob->data(),
+              inBlock->data(),
               shape_desc_,
-              outBlob->mutable_data(),
+              outBlock->mutable_data(),
               param_desc_,
-              bnScaleBlob->data(),
-              bnBiasBlob->data(),
+              bnScaleBlock->data(),
+              bnBiasBlock->data(),
               factor_,
-              runningMeanBlob->mutable_data(),
-              runningVarBlob->mutable_data(),
+              runningMeanBlock->mutable_data(),
+              runningVarBlock->mutable_data(),
               epsilon,
-              saveMeanBlob->mutable_data(),
-              saveVarBlob->mutable_data()));
+              saveMeanBlock->mutable_data(),
+              saveVarBlock->mutable_data()));
         },
-        {input.blob(),
-         bnScale_.blob(),
-         bnBias_.blob()},
-        {output.blob(),
-         runningMean_.blob(),
-         runningVariance_.blob(),
-         resultSaveMean_.blob(),
-         resultSaveVariance_.blob()});
+        {input.block(),
+         bnScale_.block(),
+         bnBias_.block()},
+        {output.block(),
+         runningMean_.block(),
+         runningVariance_.block(),
+         resultSaveMean_.block(),
+         resultSaveVariance_.block()});
     buf_.push(input);
   } else {
     output.device()->Exec(
         [=](Context* ctx) {
-          Blob *inBlob = input.blob(), *outBlob = output.blob(),
-            *runningMeanBlob = runningMean_.blob(),
-            *runningVarBlob = runningVariance_.blob(),
-            *bnScaleBlob = bnScale_.blob(),
-            *bnBiasBlob = bnBias_.blob();
+          Block *inBlock = input.block(), *outBlock = output.block(),
+            *runningMeanBlock = runningMean_.block(),
+            *runningVarBlock = runningVariance_.block(),
+            *bnScaleBlock = bnScale_.block(),
+            *bnBiasBlock = bnBias_.block();
           const float alpha = 1.0f, beta = 0.0f;
           double epsilon = CUDNN_BN_MIN_EPSILON;
           CUDNN_CHECK(cudnnBatchNormalizationForwardInference(
@@ -134,22 +134,22 @@ const Tensor CudnnBatchNorm::Forward(int flag, const 
Tensor& input) {
               &alpha,
               &beta,
               shape_desc_,
-              inBlob->data(),
+              inBlock->data(),
               shape_desc_,
-              outBlob->mutable_data(),
+              outBlock->mutable_data(),
               param_desc_,
-              bnScaleBlob->data(),
-              bnBiasBlob->data(),
-              runningMeanBlob->data(),
-              runningVarBlob->data(),
+              bnScaleBlock->data(),
+              bnBiasBlock->data(),
+              runningMeanBlock->data(),
+              runningVarBlock->data(),
               epsilon));
         },
-        {input.blob(),
-         bnScale_.blob(),
-         bnBias_.blob(),
-         runningMean_.blob(),
-         runningVariance_.blob()},
-        {output.blob()});
+        {input.block(),
+         bnScale_.block(),
+         bnBias_.block(),
+         runningMean_.block(),
+         runningVariance_.block()},
+        {output.block()});
   }
   return output;
 }
@@ -164,13 +164,13 @@ const std::pair<Tensor, vector<Tensor>> 
CudnnBatchNorm::Backward(
     dx.ResetLike(grad);
     dx.device()->Exec(
         [=](Context* ctx) {
-          Blob *dyblob = grad.blob(), *dxblob = dx.blob(),
-            *xblob = input.blob(),
-            *bnScaleBlob = bnScale_.blob(),
-            *dbnScaleBlob = dbnScale_.blob(),
-            *dbnBiasBlob = dbnBias_.blob(),
-            *saveMeanBlob = resultSaveMean_.blob(),
-            *saveVarBlob = resultSaveVariance_.blob();
+          Block *dyblock = grad.block(), *dxblock = dx.block(),
+            *xblock = input.block(),
+            *bnScaleBlock = bnScale_.block(),
+            *dbnScaleBlock = dbnScale_.block(),
+            *dbnBiasBlock = dbnBias_.block(),
+            *saveMeanBlock = resultSaveMean_.block(),
+            *saveVarBlock = resultSaveVariance_.block();
           const float alpha = 1.0f, beta = .0f;
           double epsilon = CUDNN_BN_MIN_EPSILON;
           CUDNN_CHECK(cudnnBatchNormalizationBackward(ctx->cudnn_handle,
@@ -180,28 +180,28 @@ const std::pair<Tensor, vector<Tensor>> 
CudnnBatchNorm::Backward(
               &alpha,
               &beta,
               shape_desc_,
-              xblob->data(),
+              xblock->data(),
               shape_desc_,
-              dyblob->data(),
+              dyblock->data(),
               shape_desc_,
-              dxblob->mutable_data(),
+              dxblock->mutable_data(),
               param_desc_,
-              bnScaleBlob->data(),
-              dbnScaleBlob->mutable_data(),
-              dbnBiasBlob->mutable_data(),
+              bnScaleBlock->data(),
+              dbnScaleBlock->mutable_data(),
+              dbnBiasBlock->mutable_data(),
               epsilon,
-              saveMeanBlob->data(),
-              saveVarBlob->data()));
+              saveMeanBlock->data(),
+              saveVarBlock->data()));
 
         },
-        {dx.blob(),
-         grad.blob(),
-         bnScale_.blob(),
-         resultSaveMean_.blob(),
-         resultSaveVariance_.blob()},
-        {dx.blob(),
-         dbnScale_.blob(),
-         dbnBias_.blob()});
+        {dx.block(),
+         grad.block(),
+         bnScale_.block(),
+         resultSaveMean_.block(),
+         resultSaveVariance_.block()},
+        {dx.block(),
+         dbnScale_.block(),
+         dbnBias_.block()});
   } else {
     LOG(ERROR) << "Do not call backward for evaluation phase";
   }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f488070e/src/model/layer/cudnn_convolution.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_convolution.cc 
b/src/model/layer/cudnn_convolution.cc
index b80c3bd..efc7f88 100644
--- a/src/model/layer/cudnn_convolution.cc
+++ b/src/model/layer/cudnn_convolution.cc
@@ -167,30 +167,26 @@ const Tensor CudnnConvolution::Forward(int flag, const 
Tensor &input) {
 
   Shape shape{batchsize, num_filters_, conv_height_, conv_width_};
   Tensor output(shape, dev, dtype);
-  output.device()->Exec(
-      [input, output, this](Context *ctx) {
-        Blob *inblob = input.blob(), *outblob = output.blob(),
-             *wblob = this->weight_.blob();
-        float alpha = 1.f, beta = 0.f;
-        cudnnConvolutionForward(ctx->cudnn_handle, &alpha, this->x_desc_,
-                                inblob->data(), this->filter_desc_,
-                                wblob->data(), this->conv_desc_, this->fp_alg_,
-                                this->workspace_.blob()->mutable_data(),
-                                this->workspace_count_ * sizeof(float), &beta,
-                                this->y_desc_, outblob->mutable_data());
-      },
-      {input.blob(), weight_.blob()}, {output.blob()}, workspace_.blob());
+  output.device()->Exec([input, output, this](Context *ctx) {
+    Block *inblock = input.block(), *outblock = output.block(),
+          *wblock = this->weight_.block();
+    float alpha = 1.f, beta = 0.f;
+    cudnnConvolutionForward(ctx->cudnn_handle, &alpha, this->x_desc_,
+                            inblock->data(), this->filter_desc_, 
wblock->data(),
+                            this->conv_desc_, this->fp_alg_,
+                            this->workspace_.block()->mutable_data(),
+                            this->workspace_count_ * sizeof(float), &beta,
+                            this->y_desc_, outblock->mutable_data());
+  }, {input.block(), weight_.block()}, {output.block()}, workspace_.block());
 
   if (bias_term_) {
-    output.device()->Exec(
-        [output, this](Context *ctx) {
-          float beta = 1.f, alpha = 1.0f;
-          Blob *outblob = output.blob(), *bblob = this->bias_.blob();
-          cudnnAddTensor(ctx->cudnn_handle, &alpha, this->bias_desc_,
-                         bblob->data(), &beta, this->y_desc_,
-                         outblob->mutable_data());
-        },
-        {output.blob(), bias_.blob()}, {output.blob()});
+    output.device()->Exec([output, this](Context *ctx) {
+      float beta = 1.f, alpha = 1.0f;
+      Block *outblock = output.block(), *bblock = this->bias_.block();
+      cudnnAddTensor(ctx->cudnn_handle, &alpha, this->bias_desc_,
+                     bblock->data(), &beta, this->y_desc_,
+                     outblock->mutable_data());
+    }, {output.block(), bias_.block()}, {output.block()});
   }
   return output;
 }
@@ -212,45 +208,39 @@ const std::pair<Tensor, vector<Tensor>> 
CudnnConvolution::Backward(
 
   // LOG(ERROR) << "backward bias";
   if (bias_term_) {
-    dx.device()->Exec(
-        [grad, db, this](Context *ctx) {
-          Blob *dyblob = grad.blob(), *dbblob = db.blob();
-          float alpha = 1.f, beta = 0.f;
-          cudnnConvolutionBackwardBias(ctx->cudnn_handle, &alpha, 
this->y_desc_,
-                                       dyblob->data(), &beta, this->bias_desc_,
-                                       dbblob->mutable_data());
-        },
-        {grad.blob()}, {db.blob()});
+    dx.device()->Exec([grad, db, this](Context *ctx) {
+      Block *dyblock = grad.block(), *dbblock = db.block();
+      float alpha = 1.f, beta = 0.f;
+      cudnnConvolutionBackwardBias(ctx->cudnn_handle, &alpha, this->y_desc_,
+                                   dyblock->data(), &beta, this->bias_desc_,
+                                   dbblock->mutable_data());
+    }, {grad.block()}, {db.block()});
   }
   // LOG(ERROR) << "backward w";
-  dx.device()->Exec(
-      [grad, dw, src_data, this](Context *ctx) {
-        Blob *inblob = src_data.blob(), *dyblob = grad.blob(),
-             *dwblob = dw.blob();
-        float alpha = 1.f, beta = 0.f;
-        cudnnConvolutionBackwardFilter(
-            ctx->cudnn_handle, &alpha, this->x_desc_, inblob->data(),
-            this->y_desc_, dyblob->data(), this->conv_desc_,
-            this->bp_filter_alg_, this->workspace_.blob()->mutable_data(),
-            this->workspace_count_ * sizeof(float), &beta, this->filter_desc_,
-            dwblob->mutable_data());
-      },
-      {grad.blob(), src_data.blob()}, {dw.blob(), workspace_.blob()});
+  dx.device()->Exec([grad, dw, src_data, this](Context *ctx) {
+    Block *inblock = src_data.block(), *dyblock = grad.block(),
+          *dwblock = dw.block();
+    float alpha = 1.f, beta = 0.f;
+    cudnnConvolutionBackwardFilter(
+        ctx->cudnn_handle, &alpha, this->x_desc_, inblock->data(),
+        this->y_desc_, dyblock->data(), this->conv_desc_, this->bp_filter_alg_,
+        this->workspace_.block()->mutable_data(),
+        this->workspace_count_ * sizeof(float), &beta, this->filter_desc_,
+        dwblock->mutable_data());
+  }, {grad.block(), src_data.block()}, {dw.block(), workspace_.block()});
 
   // LOG(ERROR) << "backward src";
-  dx.device()->Exec(
-      [dx, grad, this](Context *ctx) {
-        Blob *wblob = this->weight_.blob(), *dyblob = grad.blob(),
-             *dxblob = dx.blob();
-        float alpha = 1.f, beta = 0.f;
-        cudnnConvolutionBackwardData(
-            ctx->cudnn_handle, &alpha, this->filter_desc_, wblob->data(),
-            this->y_desc_, dyblob->data(), this->conv_desc_, 
this->bp_data_alg_,
-            this->workspace_.blob()->mutable_data(),
-            this->workspace_count_ * sizeof(float), &beta, this->x_desc_,
-            dxblob->mutable_data());
-      },
-      {grad.blob(), weight_.blob()}, {dx.blob(), workspace_.blob()});
+  dx.device()->Exec([dx, grad, this](Context *ctx) {
+    Block *wblock = this->weight_.block(), *dyblock = grad.block(),
+          *dxblock = dx.block();
+    float alpha = 1.f, beta = 0.f;
+    cudnnConvolutionBackwardData(ctx->cudnn_handle, &alpha, this->filter_desc_,
+                                 wblock->data(), this->y_desc_, 
dyblock->data(),
+                                 this->conv_desc_, this->bp_data_alg_,
+                                 this->workspace_.block()->mutable_data(),
+                                 this->workspace_count_ * sizeof(float), &beta,
+                                 this->x_desc_, dxblock->mutable_data());
+  }, {grad.block(), weight_.block()}, {dx.block(), workspace_.block()});
   param_grad.push_back(dw);
   param_grad.push_back(db);
   return std::make_pair(dx, param_grad);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f488070e/src/model/layer/cudnn_dropout.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_dropout.cc b/src/model/layer/cudnn_dropout.cc
index 64a581b..877dd12 100644
--- a/src/model/layer/cudnn_dropout.cc
+++ b/src/model/layer/cudnn_dropout.cc
@@ -57,7 +57,7 @@ void CudnnDropout::InitCudnn(int size, DataType dtype, 
Device* dev,
   // TODO(wangwei) get seed from ctx or user config?
   auto seed = std::chrono::system_clock::now().time_since_epoch().count();
   cudnnSetDropoutDescriptor(drop_desc_, ctx->cudnn_handle, 1 - dropout_ratio_,
-                            state_.blob()->mutable_data(), state_size_, seed);
+                            state_.block()->mutable_data(), state_size_, seed);
   has_init_cudnn_ = true;
 }
 
@@ -67,24 +67,20 @@ const Tensor CudnnDropout::Forward(int flag, const Tensor& 
input) {
     DataType dtype = input.data_type();
     Device* dev = input.device();
     if (!has_init_cudnn_) {
-      input.device()->Exec(
-          [size, dtype, this, dev](Context* ctx) {
-            this->InitCudnn(size, dtype, dev, ctx);
-          },
-          {}, {this->state_.blob()});
+      input.device()->Exec([size, dtype, this, dev](Context* ctx) {
+        this->InitCudnn(size, dtype, dev, ctx);
+      }, {}, {this->state_.block()});
     }
     Tensor output;
     output.ResetLike(input);
-    output.device()->Exec(
-        [input, output, this](Context* ctx) {
-          Blob *inblob = input.blob(), *outblob = output.blob(),
-               *mblob = mask_.blob();
-          cudnnDropoutForward(ctx->cudnn_handle, this->drop_desc_,
-                              this->x_desc_, inblob->data(), this->y_desc_,
-                              outblob->mutable_data(), mblob->mutable_data(),
-                              this->reserve_size_);
-        },
-        {input.blob()}, {output.blob(), mask_.blob()});
+    output.device()->Exec([input, output, this](Context* ctx) {
+      Block* inblock = input.block(), * outblock = output.block(),
+             * mblock = mask_.block();
+      cudnnDropoutForward(ctx->cudnn_handle, this->drop_desc_, this->x_desc_,
+                          inblock->data(), this->y_desc_,
+                          outblock->mutable_data(), mblock->mutable_data(),
+                          this->reserve_size_);
+    }, {input.block()}, {output.block(), mask_.block()});
     return output;
   } else {
     return input;
@@ -97,16 +93,14 @@ const std::pair<Tensor, vector<Tensor>> 
CudnnDropout::Backward(
   Tensor dx;
   if (flag & kTrain) {
     dx.ResetLike(grad);
-    dx.device()->Exec(
-        [dx, grad, this](Context* ctx) {
-          Blob *dyblob = grad.blob(), *dxblob = dx.blob(),
-               *mblob = this->mask_.blob();
-          cudnnDropoutBackward(ctx->cudnn_handle, this->drop_desc_,
-                               this->y_desc_, dyblob->data(), this->x_desc_,
-                               dxblob->mutable_data(), mblob->mutable_data(),
-                               this->reserve_size_);
-        },
-        {grad.blob(), mask_.blob()}, {dx.blob()});
+    dx.device()->Exec([dx, grad, this](Context* ctx) {
+      Block* dyblock = grad.block(), * dxblock = dx.block(),
+             * mblock = this->mask_.block();
+      cudnnDropoutBackward(ctx->cudnn_handle, this->drop_desc_, this->y_desc_,
+                           dyblock->data(), this->x_desc_,
+                           dxblock->mutable_data(), mblock->mutable_data(),
+                           this->reserve_size_);
+    }, {grad.block(), mask_.block()}, {dx.block()});
   } else {
     LOG(ERROR) << "Do not call backward for evaluation phase";
   }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f488070e/src/model/layer/cudnn_lrn.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_lrn.cc b/src/model/layer/cudnn_lrn.cc
index 1012813..4dbf426 100644
--- a/src/model/layer/cudnn_lrn.cc
+++ b/src/model/layer/cudnn_lrn.cc
@@ -29,47 +29,32 @@ CudnnLRN::~CudnnLRN() {
     CUDNN_CHECK(cudnnDestroyTensorDescriptor(shape_desc_));
   }
 }
-void CudnnLRN::InitCudnn(const Shape& shape , DataType dtype) {
+void CudnnLRN::InitCudnn(const Shape& shape, DataType dtype) {
   CHECK(!has_init_cudnn_);
   mode_ = CUDNN_LRN_CROSS_CHANNEL_DIM1;
   CUDNN_CHECK(cudnnCreateTensorDescriptor(&shape_desc_));
   CHECK_EQ(shape.size(), 4u);
-  CUDNN_CHECK(cudnnSetTensor4dDescriptor(shape_desc_,
-      CUDNN_TENSOR_NCHW,
-      GetCudnnDataType(dtype),
-      shape[0],
-      shape[1],
-      shape[2],
-      shape[3]));
+  CUDNN_CHECK(cudnnSetTensor4dDescriptor(shape_desc_, CUDNN_TENSOR_NCHW,
+                                         GetCudnnDataType(dtype), shape[0],
+                                         shape[1], shape[2], shape[3]));
   CUDNN_CHECK(cudnnCreateLRNDescriptor(&lrn_desc_));
-  CUDNN_CHECK(cudnnSetLRNDescriptor(lrn_desc_,
-        local_size_,
-        alpha_,
-        beta_,
-        k_));
+  CUDNN_CHECK(cudnnSetLRNDescriptor(lrn_desc_, local_size_, alpha_, beta_, 
k_));
   has_init_cudnn_ = true;
 }
 const Tensor CudnnLRN::Forward(int flag, const Tensor& input) {
   auto shape = input.shape();
   auto dtype = input.data_type();
-  if (!has_init_cudnn_)
-    InitCudnn(shape, dtype);
+  if (!has_init_cudnn_) InitCudnn(shape, dtype);
   Tensor output;
   output.ResetLike(input);
-  output.device()->Exec(
-      [=](Context* ctx) {
-      Blob *inblob = input.blob(), *outblob = output.blob();
-      const float alpha = 1.0f, beta = 0.0f;
-      CUDNN_CHECK(cudnnLRNCrossChannelForward(ctx->cudnn_handle,
-            this->lrn_desc_,
-            this->mode_,
-            &alpha,
-            this->shape_desc_,
-            inblob->data(),
-            &beta,
-            this->shape_desc_,
-            outblob->mutable_data()));
-      }, {input.blob()}, {output.blob()});
+  output.device()->Exec([=](Context* ctx) {
+    Block* inblock = input.block(), * outblock = output.block();
+    const float alpha = 1.0f, beta = 0.0f;
+    CUDNN_CHECK(cudnnLRNCrossChannelForward(
+        ctx->cudnn_handle, this->lrn_desc_, this->mode_, &alpha,
+        this->shape_desc_, inblock->data(), &beta, this->shape_desc_,
+        outblock->mutable_data()));
+  }, {input.block()}, {output.block()});
 
   if (flag & kTrain) {
     buf_.push(input);
@@ -78,9 +63,9 @@ const Tensor CudnnLRN::Forward(int flag, const Tensor& input) 
{
   return output;
 }
 
-const std::pair<Tensor, vector<Tensor>> CudnnLRN::Backward(
-    int flag, const Tensor& grad) {
-  vector <Tensor> param_grad;
+const std::pair<Tensor, vector<Tensor>> CudnnLRN::Backward(int flag,
+                                                           const Tensor& grad) 
{
+  vector<Tensor> param_grad;
   Tensor dx;
   CHECK(!buf_.empty());
   Tensor output = buf_.top();
@@ -89,25 +74,16 @@ const std::pair<Tensor, vector<Tensor>> CudnnLRN::Backward(
   buf_.pop();
   if ((flag & kTrain) == kTrain) {
     dx.ResetLike(grad);
-    dx.device()->Exec(
-        [=](Context *ctx) {
-          Blob *dyblob = grad.blob(), *dxblob = dx.blob();
-          Blob *yblob = output.blob(), *xblob = input.blob();
-          float alpha = 1.0f, beta = 0.0f;
-          CUDNN_CHECK(cudnnLRNCrossChannelBackward(ctx->cudnn_handle,
-              this->lrn_desc_,
-              this->mode_,
-              &alpha,
-              this->shape_desc_,
-              yblob->data(),
-              this->shape_desc_,
-              dyblob->data(),
-              this->shape_desc_,
-              xblob->data(),
-              &beta,
-              this->shape_desc_,
-              dxblob->mutable_data()));
-        }, {output.blob(), grad.blob(), input.blob()}, {dx.blob()});
+    dx.device()->Exec([=](Context* ctx) {
+      Block* dyblock = grad.block(), * dxblock = dx.block();
+      Block* yblock = output.block(), * xblock = input.block();
+      float alpha = 1.0f, beta = 0.0f;
+      CUDNN_CHECK(cudnnLRNCrossChannelBackward(
+          ctx->cudnn_handle, this->lrn_desc_, this->mode_, &alpha,
+          this->shape_desc_, yblock->data(), this->shape_desc_, 
dyblock->data(),
+          this->shape_desc_, xblock->data(), &beta, this->shape_desc_,
+          dxblock->mutable_data()));
+    }, {output.block(), grad.block(), input.block()}, {dx.block()});
   } else {
     LOG(ERROR) << "Do not call backward for evaluation phase";
   }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f488070e/src/model/layer/cudnn_pooling.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_pooling.cc b/src/model/layer/cudnn_pooling.cc
index 842685d..fb8256a 100644
--- a/src/model/layer/cudnn_pooling.cc
+++ b/src/model/layer/cudnn_pooling.cc
@@ -41,7 +41,7 @@ void CudnnPooling::Setup(const LayerConf &conf) {
     nan_prop_ = CUDNN_NOT_PROPAGATE_NAN;
 }
 
-void CudnnPooling::InitCudnn(const Tensor& input) {
+void CudnnPooling::InitCudnn(const Tensor &input) {
   CHECK(!has_init_cudnn_);
   DataType dtype = input.data_type();
   size_t batchsize = input.shape(0);
@@ -53,8 +53,8 @@ void CudnnPooling::InitCudnn(const Tensor& input) {
                                          GetCudnnDataType(dtype), batchsize,
                                          channels_, height_, width_));
   CUDNN_CHECK(cudnnSetTensor4dDescriptor(
-      y_desc_, CUDNN_TENSOR_NCHW, GetCudnnDataType(dtype), batchsize,
-      channels_, pooled_height_, pooled_width_));
+      y_desc_, CUDNN_TENSOR_NCHW, GetCudnnDataType(dtype), batchsize, 
channels_,
+      pooled_height_, pooled_width_));
   auto pool_method = CUDNN_POOLING_MAX;
   if (pool_ == PoolingConf_PoolMethod_MAX)
     pool_method = CUDNN_POOLING_MAX;
@@ -87,15 +87,13 @@ const Tensor CudnnPooling::Forward(int flag, const Tensor 
&input) {
 
   Shape shape{batchsize, channels_, pooled_height_, pooled_width_};
   Tensor output = Tensor(shape, dev, dtype);
-  output.device()->Exec(
-      [input, output, this](Context *ctx) {
-        Blob *inblob = input.blob(), *outblob = output.blob();
-        float alpha = 1.0f, beta = 0.0f;
-        cudnnPoolingForward(ctx->cudnn_handle, this->pool_desc_, &alpha,
-                            this->x_desc_, inblob->data(), &beta, 
this->y_desc_,
-                            outblob->mutable_data());
-      },
-      {input.blob()}, {output.blob()});
+  output.device()->Exec([input, output, this](Context *ctx) {
+    Block *inblock = input.block(), *outblock = output.block();
+    float alpha = 1.0f, beta = 0.0f;
+    cudnnPoolingForward(ctx->cudnn_handle, this->pool_desc_, &alpha,
+                        this->x_desc_, inblock->data(), &beta, this->y_desc_,
+                        outblock->mutable_data());
+  }, {input.block()}, {output.block()});
   if (flag & kTrain) {
     buf_.push(input);
     buf_.push(output);
@@ -116,17 +114,15 @@ const std::pair<Tensor, vector<Tensor>> 
CudnnPooling::Backward(
   Tensor dx;
   dx.ResetLike(x);
 
-  dx.device()->Exec(
-      [dx, grad, x, y, this](Context *ctx) {
-        Blob *dyblob = grad.blob(), *dxblob = dx.blob(), *yblob = y.blob(),
-             *xblob = x.blob();
-        float alpha = 1.0f, beta = 0.0f;
-        cudnnPoolingBackward(ctx->cudnn_handle, this->pool_desc_, &alpha,
-                             this->y_desc_, yblob->data(), this->y_desc_,
-                             dyblob->data(), this->x_desc_, xblob->data(),
-                             &beta, this->x_desc_, dxblob->mutable_data());
-      },
-      {grad.blob(), y.blob(), x.blob()}, {dx.blob()});
+  dx.device()->Exec([dx, grad, x, y, this](Context *ctx) {
+    Block *dyblock = grad.block(), *dxblock = dx.block(), *yblock = y.block(),
+          *xblock = x.block();
+    float alpha = 1.0f, beta = 0.0f;
+    cudnnPoolingBackward(ctx->cudnn_handle, this->pool_desc_, &alpha,
+                         this->y_desc_, yblock->data(), this->y_desc_,
+                         dyblock->data(), this->x_desc_, xblock->data(), &beta,
+                         this->x_desc_, dxblock->mutable_data());
+  }, {grad.block(), y.block(), x.block()}, {dx.block()});
 
   return std::make_pair(dx, param_grad);
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f488070e/src/model/layer/cudnn_softmax.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_softmax.cc b/src/model/layer/cudnn_softmax.cc
index 85b0c3d..16d4022 100644
--- a/src/model/layer/cudnn_softmax.cc
+++ b/src/model/layer/cudnn_softmax.cc
@@ -47,14 +47,13 @@ const Tensor CudnnSoftmax::Forward(int flag, const Tensor& 
input) {
   Tensor output;
   output.ResetLike(input);
   output.device()->Exec([input, output, this](Context* ctx) {
-    Blob* inblob = input.blob(), * outblob = output.blob();
+    Block* inblock = input.block(), * outblock = output.block();
     float alpha = 1.0f, beta = 0.0f;
     cudnnSoftmaxForward(ctx->cudnn_handle, this->algorithm_, this->mode_,
-                        &alpha, this->desc_, inblob->data(), &beta, 
this->desc_,
-                        outblob->mutable_data());
-  }, {input.blob()}, {output.blob()});
-  if (flag & kTrain)
-    buf_.push(output);
+                        &alpha, this->desc_, inblock->data(), &beta,
+                        this->desc_, outblock->mutable_data());
+  }, {input.block()}, {output.block()});
+  if (flag & kTrain) buf_.push(output);
   return output;
 }
 
@@ -66,13 +65,14 @@ const std::pair<Tensor, vector<Tensor>> 
CudnnSoftmax::Backward(
   buf_.pop();
   dx.ResetLike(grad);
   dx.device()->Exec([dx, grad, output, this](Context* ctx) {
-    Blob* dyblob = grad.blob(), * dxblob = dx.blob(), * yblob = output.blob();
+    Block* dyblock = grad.block(), * dxblock = dx.block(),
+           * yblock = output.block();
     float alpha = 1.0f, beta = 0.0f;
     cudnnSoftmaxBackward(ctx->cudnn_handle, this->algorithm_, this->mode_,
-                         &alpha, this->desc_, yblob->data(), this->desc_,
-                         dyblob->data(), &beta, this->desc_,
-                         dxblob->mutable_data());
-  }, {grad.blob(), output.blob()}, {dx.blob()});
+                         &alpha, this->desc_, yblock->data(), this->desc_,
+                         dyblock->data(), &beta, this->desc_,
+                         dxblock->mutable_data());
+  }, {grad.block(), output.block()}, {dx.block()});
   return std::make_pair(dx, param_grad);
 }
 }  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f488070e/test/singa/test_cpp_cpu.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cpp_cpu.cc b/test/singa/test_cpp_cpu.cc
index 86654e1..ec5c7e1 100644
--- a/test/singa/test_cpp_cpu.cc
+++ b/test/singa/test_cpp_cpu.cc
@@ -24,7 +24,7 @@
 #include "singa/proto/core.pb.h"
 
 using singa::CppCPU;
-using singa::Blob;
+using singa::Block;
 TEST(CppCPU, Constructor) {
   CppCPU dev(0, 1);
   EXPECT_EQ(0, dev.id());
@@ -32,15 +32,15 @@ TEST(CppCPU, Constructor) {
 
 TEST(CppCPU, MemoryMallocFree) {
   CppCPU dev(0, 1);
-  Blob* b = dev.NewBlob(4);
+  Block* b = dev.NewBlock(4);
   EXPECT_NE(nullptr, b);
   EXPECT_EQ(4u, b->size());
-  dev.FreeBlob(b);
+  dev.FreeBlock(b);
 }
 
 TEST(CppCPU, Exec) {
   CppCPU dev(0, 1);
-  Blob* b = dev.NewBlob(4);
+  Block* b = dev.NewBlock(4);
   int x = 1, y =3, z = 0;
   dev.Exec([x, y, &z](singa::Context *ctx) {
       z = x + y;
@@ -50,7 +50,7 @@ TEST(CppCPU, Exec) {
 
 TEST(CppCPU, CopyData) {
   CppCPU dev(0, 1);
-  Blob* b = dev.NewBlob(4);
+  Block* b = dev.NewBlock(4);
   char s[] = {'a', 'b', 'c', 'x'};
   dev.CopyDataFromHostPtr(b, s, 4);
   const char* bstr = static_cast<const char*>(b->data());
@@ -58,14 +58,14 @@ TEST(CppCPU, CopyData) {
   EXPECT_EQ('b', bstr[1]);
   EXPECT_EQ('x', bstr[3]);
 
-  Blob* c = dev.NewBlob(4);
+  Block* c = dev.NewBlock(4);
   dev.CopyDataToFrom(c, b, 4, singa::kHostToHost, 0, 0);
   const char* cstr = static_cast<const char*>(c->data());
 
   EXPECT_EQ('a', cstr[0]);
   EXPECT_EQ('b', cstr[1]);
   EXPECT_EQ('x', cstr[3]);
-  dev.FreeBlob(b);
-  dev.FreeBlob(c);
+  dev.FreeBlock(b);
+  dev.FreeBlock(c);
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f488070e/test/singa/test_tensor.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_tensor.cc b/test/singa/test_tensor.cc
index bd039ad..2cce336 100644
--- a/test/singa/test_tensor.cc
+++ b/test/singa/test_tensor.cc
@@ -18,17 +18,17 @@ TEST(TensorTest, TestConstructor) {
   singa::Tensor float16_t(Shape{2,3}, singa::kFloat16);
   EXPECT_EQ(singa::kFloat16, float16_t.data_type());
   EXPECT_EQ(6u, float16_t.Size());
-  EXPECT_EQ(12u, float16_t.blob()->size());
+  EXPECT_EQ(12u, float16_t.block()->size());
 
   singa::Tensor x(float16_t);
   EXPECT_EQ(float16_t.Size(), x.Size());
-  EXPECT_EQ(float16_t.blob(), x.blob());
+  EXPECT_EQ(float16_t.block(), x.block());
   EXPECT_EQ(float16_t.data_type(), x.data_type());
   EXPECT_EQ(float16_t.device(), x.device());
 
   singa::Tensor y = float16_t;
   EXPECT_EQ(float16_t.Size(), x.Size());
-  EXPECT_EQ(float16_t.blob(), x.blob());
+  EXPECT_EQ(float16_t.block(), x.block());
   EXPECT_EQ(float16_t.data_type(), x.data_type());
   EXPECT_EQ(float16_t.device(), x.device());
 }
@@ -69,7 +69,7 @@ TEST(TensorClass, CopyDataFromHostPtr) {
   float data[] = {1.0f, 2.0f, 3.0f};
   Tensor t(Shape{3});
   t.CopyDataFromHostPtr(data, 3);
-  const float* dptr = static_cast<const float*>(t.blob()->data());
+  const float* dptr = static_cast<const float*>(t.block()->data());
   EXPECT_FLOAT_EQ(1.0f, dptr[0]);
   EXPECT_FLOAT_EQ(2.0f, dptr[1]);
   EXPECT_FLOAT_EQ(3.0f, dptr[2]);
@@ -82,7 +82,7 @@ TEST(TensorClass, CopyData) {
 
   Tensor o(Shape{3});
   o.CopyData(t);
-  const float* dptr = static_cast<const float*>(o.blob()->data());
+  const float* dptr = static_cast<const float*>(o.block()->data());
   EXPECT_FLOAT_EQ(1.0f, dptr[0]);
   EXPECT_FLOAT_EQ(2.0f, dptr[1]);
   EXPECT_FLOAT_EQ(3.0f, dptr[2]);
@@ -94,7 +94,7 @@ TEST(TensorClass, Clone) {
   t.CopyDataFromHostPtr(data, 3);
 
   Tensor o = t.Clone();
-  const float* dptr = static_cast<const float*>(o.blob()->data());
+  const float* dptr = static_cast<const float*>(o.block()->data());
   EXPECT_FLOAT_EQ(1.0f, dptr[0]);
   EXPECT_FLOAT_EQ(2.0f, dptr[1]);
   EXPECT_FLOAT_EQ(3.0f, dptr[2]);
@@ -105,7 +105,7 @@ TEST(TensorClass, T) {
   EXPECT_FALSE(t.transpose());
   Tensor o = t.T();
   EXPECT_EQ(true, o.transpose());
-  EXPECT_EQ(t.blob(), o.blob());
+  EXPECT_EQ(t.block(), o.block());
   EXPECT_EQ(t.data_type(), o.data_type());
   EXPECT_EQ(t.shape()[0],  o.shape()[1]);
   EXPECT_EQ(t.shape()[1],  o.shape()[0]);

Reply via email to