http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/src/neuralnet/neuron_layer/bm.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/bm.cc b/src/neuralnet/neuron_layer/bm.cc
deleted file mode 100644
index 66e303c..0000000
--- a/src/neuralnet/neuron_layer/bm.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#include <glog/logging.h>
-#include "singa/neuralnet/neuron_layer.h"
-#include "singa/utils/singleton.h"
-
-namespace singa {
-
-using std::vector;
-
-void BMLayer::Setup(const LayerProto& conf,
-    const vector<Layer*>& srclayers) {
-  Layer::Setup(conf, srclayers);
-  data_.ReshapeLike(srclayers[0]->data(this));
-  grad_.ReshapeLike(srclayers[0]->grad(this));
-
-  const vector<int>& srcshape = srclayers[0]->data(this).shape();
-
-  batchsize_ = srcshape[0];
-  channels_ = srcshape[1];
-  height_ = srcshape[2];
-  width_ = srcshape[3];
-
-  bnScale_ = Param::Create(conf.param(0));
-  bnScale_->Setup(vector<int>{1, channels_, 1, 1});
-
-  bnBias_ = Param::Create(conf.param(1));
-  bnBias_->Setup(vector<int>{1, channels_, 1, 1});
-
-  resultRunningMean_ = Param::Create(conf.param(2));
-  resultRunningMean_->Setup(vector<int>{1, channels_, 1, 1});
-
-  resultRunningInvVariance_ = Param::Create(conf.param(3));
-  resultRunningInvVariance_->Setup(vector<int>{1, channels_, 1, 1});
-}
-
-void BMLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
-  // Todo
-}
-
-void BMLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) {
-  // Todo
-}
-
-}  //  namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/src/neuralnet/neuron_layer/convolution.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/convolution.cc 
b/src/neuralnet/neuron_layer/convolution.cc
deleted file mode 100644
index e77e9ca..0000000
--- a/src/neuralnet/neuron_layer/convolution.cc
+++ /dev/null
@@ -1,192 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#include <glog/logging.h>
-#include "singa/neuralnet/neuron_layer.h"
-#include "singa/utils/singleton.h"
-
-namespace singa {
-using std::vector;
-
-/************ Implementation for ConvolutionLayer*************************/
-ConvolutionLayer::~ConvolutionLayer() {
-  delete weight_;
-  delete bias_;
-}
-void ConvolutionLayer::Setup(const LayerProto& conf,
-    const vector<Layer*>& srclayers) {
-  CHECK_EQ(srclayers.size(), 1);
-  Layer::Setup(conf, srclayers);
-  ConvolutionProto conv_conf = conf.convolution_conf();
-  if (conv_conf.has_kernel()) {
-    kernel_x_ = kernel_y_ = conv_conf.kernel();
-  } else {
-    kernel_x_ = conv_conf.kernel_x();
-    kernel_y_ = conv_conf.kernel_y();
-  }
-  CHECK_NE(kernel_x_, 0);
-  CHECK_NE(kernel_y_, 0);
-
-  if (conv_conf.has_pad()) {
-    pad_x_ = pad_y_ = conv_conf.pad();
-  } else {
-    pad_x_ = conv_conf.pad_x();
-    pad_y_ = conv_conf.pad_y();
-  }
-
-  if (conv_conf.has_stride()) {
-    stride_x_ = stride_y_ = conv_conf.stride();
-  } else {
-    stride_x_ = conv_conf.stride_x();
-    stride_y_ = conv_conf.stride_y();
-  }
-
-  num_filters_ = conv_conf.num_filters();
-  // partition filters
-  if (partition_dim() > 0)
-    num_filters_ /= srclayers.at(0)->num_partitions();
-
-  const vector<int>& srcshape = srclayers[0]->data(this).shape();
-  batchsize_ = srcshape[0];
-  int dim = srcshape.size();
-  CHECK_GT(dim, 2);
-  width_ = srcshape[dim - 1];
-  height_ = srcshape[dim - 2];
-  if (dim > 3)
-    channels_ = srcshape[dim - 3];
-  else if (dim > 2)
-    channels_ = 1;
-
-  conv_height_ = (height_ + 2 * pad_y_ - kernel_y_) / stride_y_ + 1;
-  conv_width_ = (width_ + 2 * pad_x_ - kernel_x_) / stride_x_ + 1;
-  col_height_ = channels_ * kernel_x_ * kernel_y_;
-  col_width_ = conv_height_ * conv_width_;
-  vector<int> shape{batchsize_, num_filters_, conv_height_, conv_width_};
-  data_.Reshape(shape);
-  grad_.Reshape(shape);
-  col_data_.Reshape(vector<int>{col_height_, col_width_});
-  col_grad_.Reshape(vector<int>{col_height_, col_width_});
-  weight_ = Param::Create(conf.param(0));
-  weight_->Setup(vector<int>{num_filters_, col_height_});
-  if (conf.param_size() > 1) {
-    bias_ = Param::Create(conf.param(1));
-    bias_->Setup(vector<int>{num_filters_});
-  }
-}
-
-// TODO(wangwei) remove mshadow's functions
-void ConvolutionLayer::ComputeFeature(int flag,
-    const vector<Layer*>& srclayers) {
-  auto src = Tensor4(srclayers[0]->mutable_data(this));
-  auto data = Tensor3(&data_);
-  auto col = Tensor2(&col_data_);
-  auto weight = Tensor2(weight_->mutable_data());
-  auto bias = Tensor1(bias_->mutable_data());
-  for (int n = 0; n < batchsize_; n++) {
-    if (pad_x_ > 0)
-      col = expr::unpack_patch2col(pad(src[n], pad_x_), kernel_x_, stride_x_);
-    else
-      col = expr::unpack_patch2col(src[n], kernel_x_, stride_x_);
-    data[n] = dot(weight, col);
-  }
-  data += expr::broadcast<1>(bias, data.shape);
-}
-
-void ConvolutionLayer::ComputeGradient(int flag,
-    const vector<Layer*>& srclayers) {
-  auto src = Tensor4(srclayers[0]->mutable_data(this));
-  auto col = Tensor2(&col_data_);
-  auto weight = Tensor2(weight_->mutable_data());
-  auto grad = Tensor3(&grad_);
-  auto gcol = Tensor2(&col_grad_);
-  auto gweight = Tensor2(weight_->mutable_grad());
-  auto gbias = Tensor1(bias_->mutable_grad());
-  Blob<float>* gsrcblob = srclayers[0]->mutable_grad(this);
-  Tensor<cpu, 4> gsrc(nullptr, Shape4(batchsize_, channels_, height_, width_));
-  if (gsrcblob != nullptr)
-    gsrc.dptr = gsrcblob->mutable_cpu_data();
-  gbias = expr::sumall_except_dim<1>(grad);
-  gweight = 0.0f;
-  Shape<3> padshp(gsrc.shape.SubShape());
-  padshp[0] += 2 * pad_y_;
-  padshp[1] += 2 * pad_x_;
-  Shape<2> imgshp = Shape2(height_, width_);
-  for (int n = 0; n < batchsize_; n++) {
-    if (pad_x_ > 0)
-      col = expr::unpack_patch2col(pad(src[n], pad_x_), kernel_x_, stride_x_);
-    else
-      col = expr::unpack_patch2col(src[n], kernel_x_, stride_x_);
-    gweight += dot(grad[n], col.T());
-    if (gsrcblob != nullptr) {
-      gcol = dot(weight.T(), grad[n]);
-      gsrc[n] = crop(expr::pack_col2patch(gcol, padshp, kernel_x_, stride_x_),
-          imgshp);
-    }
-  }
-}
-
-/******************* Implementation for CConvolutionLayer *********/
-void CConvolutionLayer::ComputeFeature(int flag,
-    const vector<Layer*>& srclayers) {
-  auto src = Tensor4(srclayers[0]->mutable_data(this));
-  auto data = Tensor3(&data_);
-  auto col = Tensor2(&col_data_);
-  auto weight = Tensor2(weight_->mutable_data());
-  auto bias = Tensor1(bias_->mutable_data());
-
-  for (int n = 0; n < batchsize_; n++) {
-    Im2col(src[n].dptr, channels_, height_, width_,
-        kernel_y_, kernel_x_, pad_y_, pad_x_, stride_y_, stride_x_, col.dptr);
-    data[n] = dot(weight, col);
-  }
-  data += expr::broadcast<1>(bias, data.shape);
-}
-
-void CConvolutionLayer::ComputeGradient(int flag,
-    const vector<Layer*>& srclayers) {
-  auto src = Tensor4(srclayers[0]->mutable_data(this));
-  auto col = Tensor2(&col_data_);
-  auto weight = Tensor2(weight_->mutable_data());
-
-  auto grad = Tensor3(&grad_);
-  auto gcol = Tensor2(&col_grad_);
-  auto gweight = Tensor2(weight_->mutable_grad());
-  auto gbias = Tensor1(bias_->mutable_grad());
-  gweight = 0.f;
-  Blob<float>* gsrcblob = srclayers[0]->mutable_grad(this);
-  Tensor<cpu, 4> gsrc(nullptr, Shape4(batchsize_, channels_, height_, width_));
-  if (gsrcblob != nullptr)
-    gsrc.dptr = gsrcblob->mutable_cpu_data();
-  gbias = expr::sumall_except_dim<1>(grad);
-  for (int n = 0; n < batchsize_; n++) {
-    Im2col(src[n].dptr, channels_, height_, width_,
-        kernel_y_, kernel_x_, pad_y_, pad_x_, stride_y_, stride_x_, col.dptr);
-    gweight += dot(grad[n], col.T());
-    if (gsrcblob != nullptr) {
-      gcol = dot(weight.T(), grad[n]);
-      Col2im(gcol.dptr, channels_, height_, width_,
-          kernel_y_, kernel_x_, pad_y_, pad_x_, stride_y_, stride_x_,
-          gsrc[n].dptr);
-    }
-  }
-}
-
-}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/src/neuralnet/neuron_layer/cudnn_activation.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/cudnn_activation.cc 
b/src/neuralnet/neuron_layer/cudnn_activation.cc
deleted file mode 100644
index 12b3d48..0000000
--- a/src/neuralnet/neuron_layer/cudnn_activation.cc
+++ /dev/null
@@ -1,108 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#include "singa/neuralnet/neuron_layer.h"
-
-namespace singa {
-
-void CudnnActivationLayer::InitCudnn() {
-  CudnnBase::InitCudnn();
-
-  // TODO(wangwei) make the mode case insensitive
-  if (layer_conf_.activation_conf().type() == SIGMOID)
-    mode_ = CUDNN_ACTIVATION_SIGMOID;
-  else if (layer_conf_.activation_conf().type() == TANH)
-    mode_ = CUDNN_ACTIVATION_TANH;
-  else if (layer_conf_.activation_conf().type() == RELU)
-    mode_ = CUDNN_ACTIVATION_RELU;
-  else
-    LOG(FATAL) << "Unkown activation: " << 
layer_conf_.activation_conf().type();
-
-  const auto& shape = data_.shape();
-  CHECK_GT(shape.size(), 0);
-  // TODO(wangwei) cudnnSetTensorNdDescriptor reports error if nbdim is < 4.
-  const int nbdim = 4;
-  // size of each dimension
-  int* sdim = new int[nbdim];
-  int* stride = new int[nbdim];
-  int i = shape.size() - 1;
-  sdim[i] = shape[i];
-  stride[i] = 1;
-  // LOG(ERROR) << "layer " << name();
-  // LOG(ERROR) << sdim[i] << " " << stride[i];
-  for (--i; i >= 0; i--) {
-    sdim[i] = shape[i];
-    stride[i] = shape[i + 1] * stride[i + 1];
-    // LOG(ERROR) << sdim[i] << " " << stride[i];
-  }
-  // padding sdim and stride to 4 dimensions
-  for (i = shape.size(); i < nbdim; i++) {
-    sdim[i] = 1;
-    stride[i] = 1;
-  }
-  CHECK_CUDNN(cudnnSetTensorNdDescriptor(src_desc_,
-        CUDNN_DATA_FLOAT,
-        nbdim,
-        sdim,
-        stride));
-  CHECK_CUDNN(cudnnSetTensorNdDescriptor(my_desc_,
-        CUDNN_DATA_FLOAT,
-        nbdim,
-        sdim,
-        stride));
-  delete[] sdim;
-  delete[] stride;
-}
-
-void CudnnActivationLayer::ComputeFeature(int flag,
-    const vector<Layer*>& srclayers) {
-  if (!has_init_cudnn_)
-    InitCudnn();
-  float alpha = 1.0f, beta = 0.0f;
-  // currently only consider single src layer
-  CHECK_EQ(srclayers.size(), 1);
-  CHECK_CUDNN(cudnnActivationForward(handle_,
-        mode_,
-        &alpha,
-        src_desc_,
-        srclayers[0]->data(this).gpu_data(),
-        &beta,
-        my_desc_,
-        data_.mutable_gpu_data()));
-}
-
-void CudnnActivationLayer::ComputeGradient(int flag,
-    const vector<Layer*>& srclayers) {
-  float alpha = 1.0f, beta = 0.0f;
-  CHECK_CUDNN(cudnnActivationBackward(handle_,
-        mode_,
-        &alpha,
-        my_desc_,
-        data_.gpu_data(),
-        my_desc_,
-        grad_.gpu_data(),
-        src_desc_,
-        srclayers[0]->data(this).gpu_data(),
-        &beta,
-        src_desc_,
-        srclayers[0]->mutable_grad(this)->mutable_gpu_data()));
-}
-}   // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/src/neuralnet/neuron_layer/cudnn_bm.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/cudnn_bm.cc 
b/src/neuralnet/neuron_layer/cudnn_bm.cc
deleted file mode 100644
index ca90007..0000000
--- a/src/neuralnet/neuron_layer/cudnn_bm.cc
+++ /dev/null
@@ -1,149 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-#include "singa/neuralnet/neuron_layer.h"
-
-#if CUDNN_MAJOR == 4
-namespace singa {
-
-CudnnBMLayer::~CudnnBMLayer() {
-  if (has_init_cudnn_) {
-    CHECK_CUDNN(cudnnDestroyTensorDescriptor(bnScaleBiasMeanVar_desc_));
-    CHECK_CUDNN(cudnnDestroyTensorDescriptor(bnScaleBiasDiff_desc_));
-  }
-}
-
-void CudnnBMLayer::InitCudnn() {
-  CudnnBase::InitCudnn();
-
-  CHECK_CUDNN(cudnnCreateTensorDescriptor(&bnScaleBiasMeanVar_desc_));
-  CHECK_CUDNN(cudnnCreateTensorDescriptor(&bnScaleBiasDiff_desc_));
-
-  CHECK_CUDNN(cudnnSetTensor4dDescriptor(src_desc_,
-        CUDNN_TENSOR_NCHW,
-        CUDNN_DATA_FLOAT,
-        batchsize_,
-        channels_,
-        height_,
-        width_));
-  CHECK_CUDNN(cudnnSetTensor4dDescriptor(my_desc_,
-        CUDNN_TENSOR_NCHW,
-        CUDNN_DATA_FLOAT,
-        batchsize_,
-        channels_,
-        height_,
-        width_));
-  CHECK_CUDNN(cudnnSetTensor4dDescriptor(bnScaleBiasMeanVar_desc_,
-        CUDNN_TENSOR_NCHW,
-        CUDNN_DATA_FLOAT,
-        1,
-        channels_,
-        1,
-        1));
-  CHECK_CUDNN(cudnnSetTensor4dDescriptor(bnScaleBiasDiff_desc_,
-        CUDNN_TENSOR_NCHW,
-        CUDNN_DATA_FLOAT,
-        1,
-        channels_,
-        1,
-        1));
-
-  vector<int> shape{1, channels_, 1, 1};
-
-  resultSaveMean_.Reshape(shape);
-  resultSaveInvVariance_.Reshape(shape);
-
-  mode_ = CUDNN_BATCHNORM_SPATIAL;
-}
-
-void CudnnBMLayer::ComputeFeature(int flag,
-    const vector<Layer*>& srclayers) {
-  if (!has_init_cudnn_)
-    InitCudnn();
-
-  const float alpha = 1.0f, beta = 0.0f;
-  double exponentialAverageFactor = 1.0;
-  double epsilon = CUDNN_BN_MIN_EPSILON;
-
-  // check training
-  if ((flag & kTrain) != kTrain) {
-    CHECK_CUDNN(cudnnBatchNormalizationForwardInference(handle_,
-          mode_,
-          &alpha,
-          &beta,
-          src_desc_,
-          srclayers.at(0)->data(this).gpu_data(),
-          my_desc_,
-          data_.mutable_gpu_data(),
-          bnScaleBiasMeanVar_desc_,
-          bnScale_->data().gpu_data(),
-          bnBias_->data().gpu_data(),
-          resultRunningMean_->data().gpu_data(),
-          resultRunningInvVariance_->data().gpu_data(),
-          epsilon));
-  } else {
-    CHECK_CUDNN(cudnnBatchNormalizationForwardTraining(handle_,
-          mode_,
-          &alpha,
-          &beta,
-          src_desc_,
-          srclayers.at(0)->data(this).gpu_data(),
-          my_desc_,
-          data_.mutable_gpu_data(),
-          bnScaleBiasMeanVar_desc_,
-          bnScale_->data().gpu_data(),
-          bnBias_->data().gpu_data(),
-          exponentialAverageFactor,
-          resultRunningMean_->mutable_data()->mutable_gpu_data(),
-          resultRunningInvVariance_->mutable_data()->mutable_gpu_data(),
-          epsilon,
-          resultSaveMean_.mutable_gpu_data(),
-          resultSaveInvVariance_.mutable_gpu_data()));
-  }
-}
-
-void CudnnBMLayer::ComputeGradient(int flag,
-    const vector<Layer*>& srclayers) {
-
-  const float alpha = 1.0f, beta = 0.0f, alphaDiff = 1.0f, betaDiff = 0.0f;
-  double epsilon = CUDNN_BN_MIN_EPSILON;
-
-  CHECK_CUDNN(cudnnBatchNormalizationBackward(handle_,
-      mode_,
-      &alpha,
-      &beta,
-      &alphaDiff,
-      &betaDiff,
-      src_desc_,
-      srclayers.at(0)->data(this).gpu_data(),
-      my_desc_,
-      grad_.gpu_data(),
-      src_desc_,
-      srclayers.at(0)->mutable_grad(this)->mutable_gpu_data(),
-      bnScaleBiasDiff_desc_,
-      bnScale_->data().gpu_data(),
-      bnScale_->mutable_grad()->mutable_gpu_data(),
-      bnBias_->mutable_grad()->mutable_gpu_data(),
-      epsilon,
-      resultSaveMean_.gpu_data(),
-      resultSaveInvVariance_.gpu_data()));
-}
-}  // namespace singa
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/src/neuralnet/neuron_layer/cudnn_convolution.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/cudnn_convolution.cc 
b/src/neuralnet/neuron_layer/cudnn_convolution.cc
deleted file mode 100644
index 560ee63..0000000
--- a/src/neuralnet/neuron_layer/cudnn_convolution.cc
+++ /dev/null
@@ -1,221 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#include "singa/neuralnet/neuron_layer.h"
-
-namespace singa {
-
-CudnnConvLayer::~CudnnConvLayer() {
-  if (has_init_cudnn_) {
-    CHECK_CUDNN(cudnnDestroyTensorDescriptor(bias_desc_));
-    CHECK_CUDNN(cudnnDestroyFilterDescriptor(filter_desc_));
-    CHECK_CUDNN(cudnnDestroyConvolutionDescriptor(conv_desc_));
-  }
-}
-
-void CudnnConvLayer::InitCudnn() {
-  CudnnBase::InitCudnn();
-  // convert MB to bytes
-  workspace_byte_limit_
-    = layer_conf_.convolution_conf().workspace_byte_limit() << 20;
-
-  CHECK_CUDNN(cudnnCreateTensorDescriptor(&bias_desc_));
-  CHECK_CUDNN(cudnnCreateFilterDescriptor(&filter_desc_));
-  CHECK_CUDNN(cudnnCreateConvolutionDescriptor(&conv_desc_));
-
-  CHECK_CUDNN(cudnnSetConvolution2dDescriptor(conv_desc_,
-        pad_y_,
-        pad_x_,
-        stride_y_,
-        stride_x_,
-        1,
-        1,
-        CUDNN_CROSS_CORRELATION));
-  CHECK_CUDNN(cudnnSetFilter4dDescriptor(filter_desc_,
-        CUDNN_DATA_FLOAT,
-        num_filters_,
-        channels_,
-        kernel_y_,
-        kernel_x_));
-  if (bias_) {
-    CHECK_CUDNN(cudnnSetTensor4dDescriptor(bias_desc_,
-          CUDNN_TENSOR_NCHW,
-          CUDNN_DATA_FLOAT,
-          1,
-          num_filters_,
-          1,
-          1));
-  }
-  CHECK_CUDNN(cudnnSetTensor4dDescriptor(src_desc_,
-        CUDNN_TENSOR_NCHW,
-        CUDNN_DATA_FLOAT,
-        batchsize_,
-        channels_,
-        height_,
-        width_));
-  CHECK_CUDNN(cudnnSetTensor4dDescriptor(my_desc_,
-        CUDNN_TENSOR_NCHW,
-        CUDNN_DATA_FLOAT,
-        batchsize_,
-        num_filters_,
-        conv_height_,
-        conv_width_));
-
-  CHECK_CUDNN(cudnnGetConvolutionForwardAlgorithm(handle_,
-        src_desc_,
-        filter_desc_,
-        conv_desc_,
-        my_desc_,
-        CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,
-        workspace_byte_limit_,
-        &fp_alg_));
-
-  CHECK_CUDNN(cudnnGetConvolutionBackwardFilterAlgorithm(handle_,
-        src_desc_,
-        my_desc_,
-        conv_desc_,
-        filter_desc_,
-        CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST,
-        workspace_byte_limit_,
-        &bp_filter_alg_));
-  CHECK_CUDNN(cudnnGetConvolutionBackwardDataAlgorithm(handle_,
-        filter_desc_,
-        my_desc_,
-        conv_desc_,
-        src_desc_,
-        CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST,
-        workspace_byte_limit_,
-        &bp_data_alg_));
-
-  size_t fp_byte, bp_data_byte, bp_filter_byte;
-  CHECK_CUDNN(cudnnGetConvolutionForwardWorkspaceSize(handle_,
-        src_desc_,
-        filter_desc_,
-        conv_desc_,
-        my_desc_,
-        fp_alg_,
-        &fp_byte));
-  CHECK_CUDNN(cudnnGetConvolutionBackwardDataWorkspaceSize(handle_,
-        filter_desc_,
-        my_desc_,
-        conv_desc_,
-        src_desc_,
-        bp_data_alg_,
-        &bp_data_byte));
-  CHECK_CUDNN(cudnnGetConvolutionBackwardFilterWorkspaceSize(handle_,
-        src_desc_,
-        my_desc_,
-        conv_desc_,
-        filter_desc_,
-        bp_filter_alg_,
-        &bp_filter_byte));
-  workspace_count_ = std::max(std::max(fp_byte, bp_data_byte), bp_filter_byte)
-    / sizeof(float) + 1;
-}
-
-void CudnnConvLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) 
{
-  if (!has_init_cudnn_)
-    InitCudnn();
-  float alpha = 1.f, beta = 0.f;
-  Blob<float> workspace(vector<int>{static_cast<int>(workspace_count_)});
-  CHECK_CUDNN(cudnnConvolutionForward(handle_,
-        &alpha,
-        src_desc_,
-        srclayers[0]->data(this).gpu_data(),
-        filter_desc_,
-        weight_->data().gpu_data(),
-        conv_desc_,
-        fp_alg_,
-        workspace.mutable_gpu_data(),
-        workspace_count_ * sizeof(float),
-        &beta,
-        my_desc_,
-        data_.mutable_gpu_data()));
-  if (bias_) {
-    beta = 1.f;
-
-#if CUDNN_MAJOR == 4
-    CHECK_CUDNN(cudnnAddTensor(handle_,
-          &alpha,
-          bias_desc_,
-          bias_->data().gpu_data(),
-          &beta,
-          my_desc_,
-          data_.mutable_gpu_data()));
-#else
-    CHECK_CUDNN(cudnnAddTensor(handle_,
-          CUDNN_ADD_SAME_C,
-          &alpha,
-          bias_desc_,
-          bias_->data().gpu_data(),
-          &beta,
-          my_desc_,
-          data_.mutable_gpu_data()));
-#endif
-  }
-}
-
-void
-CudnnConvLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) {
-  float alpha = 1.f, beta = 0.f;
-  Blob<float> workspace(vector<int>{static_cast<int>(workspace_count_)});
-  // LOG(ERROR) << "backward bias";
-  if (bias_) {
-    CHECK_CUDNN(cudnnConvolutionBackwardBias(handle_,
-          &alpha,
-          my_desc_,
-          grad_.gpu_data(),
-          &beta,
-          bias_desc_,
-          bias_->mutable_grad()->mutable_gpu_data()));
-  }
-  // LOG(ERROR) << "backward w";
-  CHECK_CUDNN(cudnnConvolutionBackwardFilter_v3(handle_,
-        &alpha,
-        src_desc_,
-        srclayers[0]->data(this).gpu_data(),
-        my_desc_,
-        grad_.gpu_data(),
-        conv_desc_,
-        bp_filter_alg_,
-        workspace.mutable_gpu_data(),
-        workspace_count_ * sizeof(float),
-        &beta,
-        filter_desc_,
-        weight_->mutable_grad()->mutable_gpu_data()));
-  // LOG(ERROR) << "backward src";
-  if (srclayers[0]->mutable_grad(this) != nullptr) {
-    CHECK_CUDNN(cudnnConvolutionBackwardData_v3(handle_,
-          &alpha,
-          filter_desc_,
-          weight_->data().gpu_data(),
-          my_desc_,
-          grad_.gpu_data(),
-          conv_desc_,
-          bp_data_alg_,
-          workspace.mutable_gpu_data(),
-          workspace_count_ * sizeof(float),
-          &beta,
-          src_desc_,
-          srclayers[0]->mutable_grad(this)->mutable_gpu_data()));
-  }
-}
-}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/src/neuralnet/neuron_layer/cudnn_lrn.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/cudnn_lrn.cc 
b/src/neuralnet/neuron_layer/cudnn_lrn.cc
deleted file mode 100644
index fb8e476..0000000
--- a/src/neuralnet/neuron_layer/cudnn_lrn.cc
+++ /dev/null
@@ -1,87 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#include "singa/neuralnet/neuron_layer.h"
-
-namespace singa {
-CudnnLRNLayer::~CudnnLRNLayer() {
-  if (has_init_cudnn_) {
-    cudnnDestroyLRNDescriptor(norm_desc_);
-  }
-}
-
-void CudnnLRNLayer::InitCudnn() {
-  mode_ = CUDNN_LRN_CROSS_CHANNEL_DIM1;
-  CudnnBase::InitCudnn();
-  CHECK_CUDNN(cudnnCreateLRNDescriptor(&norm_desc_));
-  CHECK_CUDNN(cudnnSetLRNDescriptor(norm_desc_,
-        lsize_,
-        alpha_,
-        beta_,
-        knorm_));
-  CHECK_CUDNN(cudnnSetTensor4dDescriptor(src_desc_,
-      CUDNN_TENSOR_NCHW,
-      CUDNN_DATA_FLOAT,
-      batchsize_,
-      channels_,
-      height_,
-      width_));
-  CHECK_CUDNN(cudnnSetTensor4dDescriptor(my_desc_,
-      CUDNN_TENSOR_NCHW,
-      CUDNN_DATA_FLOAT,
-      batchsize_,
-      channels_,
-      height_,
-      width_));
-}
-void CudnnLRNLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
-  if (!has_init_cudnn_)
-    InitCudnn();
-  float alpha = 1.0f, beta = 0.0f;
-  CHECK_CUDNN(cudnnLRNCrossChannelForward(handle_,
-      norm_desc_,
-      mode_,
-      &alpha,
-      src_desc_,
-      srclayers[0]->data(this).gpu_data(),
-      &beta,
-      my_desc_,
-      data_.mutable_gpu_data()));
-}
-void CudnnLRNLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) 
{
-  float alpha = 1.0f, beta = 0.0f;
-  CHECK_CUDNN(cudnnLRNCrossChannelBackward(handle_,
-        norm_desc_,
-        mode_,
-        &alpha,
-        my_desc_,
-        data_.gpu_data(),
-        my_desc_,
-        grad_.gpu_data(),
-        src_desc_,
-        srclayers[0]->data(this).gpu_data(),
-        &beta,
-        src_desc_,
-        srclayers[0]->mutable_grad(this)->mutable_gpu_data()));
-}
-
-
-}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/src/neuralnet/neuron_layer/cudnn_pooling.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/cudnn_pooling.cc 
b/src/neuralnet/neuron_layer/cudnn_pooling.cc
deleted file mode 100644
index 4c4c038..0000000
--- a/src/neuralnet/neuron_layer/cudnn_pooling.cc
+++ /dev/null
@@ -1,95 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#include "singa/neuralnet/neuron_layer.h"
-
-namespace singa {
-
-CudnnPoolLayer::~CudnnPoolLayer() {
-  if (has_init_cudnn_) {
-    CHECK_CUDNN(cudnnDestroyPoolingDescriptor(pool_desc_));
-  }
-}
-
-void CudnnPoolLayer::InitCudnn() {
-  CudnnBase::InitCudnn();
-  CHECK_CUDNN(cudnnCreatePoolingDescriptor(&pool_desc_));
-  CHECK_CUDNN(cudnnSetTensor4dDescriptor(src_desc_,
-        CUDNN_TENSOR_NCHW,
-        CUDNN_DATA_FLOAT,
-        batchsize_,
-        channels_,
-        height_,
-        width_));
-  CHECK_CUDNN(cudnnSetTensor4dDescriptor(my_desc_,
-        CUDNN_TENSOR_NCHW,
-        CUDNN_DATA_FLOAT,
-        batchsize_,
-        channels_,
-        pooled_height_,
-        pooled_width_));
-  auto pool_method = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
-  if (pool_ == PoolingProto_PoolMethod_MAX)
-    pool_method = CUDNN_POOLING_MAX;
-  CHECK_CUDNN(cudnnSetPooling2dDescriptor(pool_desc_,
-        pool_method,
-        kernel_y_,
-        kernel_x_,
-        pad_y_,
-        pad_x_,
-        stride_y_,
-        stride_x_));
-}
-
-void CudnnPoolLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) 
{
-  if (!has_init_cudnn_)
-    InitCudnn();
-  float alpha = 1.0f, beta = 0.0f;
-  // currently only consider single src layer
-  CHECK_EQ(srclayers.size(), 1);
-  CHECK_CUDNN(cudnnPoolingForward(handle_,
-        pool_desc_,
-        &alpha,
-        src_desc_,
-        srclayers[0]->data(this).gpu_data(),
-        &beta,
-        my_desc_,
-        data_.mutable_gpu_data()));
-}
-
-void
-CudnnPoolLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) {
-  float alpha = 1.0f, beta = 0.0f;
-  CHECK_CUDNN(cudnnPoolingBackward(handle_,
-        pool_desc_,
-        &alpha,
-        my_desc_,
-        data_.gpu_data(),
-        my_desc_,
-        grad_.gpu_data(),
-        src_desc_,
-        srclayers[0]->data(this).gpu_data(),
-        &beta,
-        src_desc_,
-        srclayers[0]->mutable_grad(this)->mutable_gpu_data()));
-}
-}  // namespace singa
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/src/neuralnet/neuron_layer/cudnn_softmax.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/cudnn_softmax.cc 
b/src/neuralnet/neuron_layer/cudnn_softmax.cc
deleted file mode 100644
index bf5a8d3..0000000
--- a/src/neuralnet/neuron_layer/cudnn_softmax.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#include "singa/neuralnet/neuron_layer.h"
-
-namespace singa {
-
-void CudnnSoftmaxLayer::InitCudnn() {
-  CudnnBase::InitCudnn();
-  CHECK_CUDNN(cudnnSetTensor4dDescriptor(src_desc_,
-        CUDNN_TENSOR_NCHW,
-        CUDNN_DATA_FLOAT,
-        batchsize_,
-        dim_,
-        1,
-        1));
-  CHECK_CUDNN(cudnnSetTensor4dDescriptor(my_desc_,
-        CUDNN_TENSOR_NCHW,
-        CUDNN_DATA_FLOAT,
-        batchsize_,
-        dim_,
-        1,
-        1));
-}
-
-void CudnnSoftmaxLayer::ComputeFeature(int flag,
-    const vector<Layer*>& srclayers) {
-  if (!has_init_cudnn_)
-    InitCudnn();
-  const float alpha = 1.0f, beta = 0.0f;
-  CHECK_EQ(srclayers.at(0)->data(this).shape().size(), 2);
-  CHECK_CUDNN(cudnnSoftmaxForward(handle_,
-        CUDNN_SOFTMAX_ACCURATE,
-        CUDNN_SOFTMAX_MODE_INSTANCE,
-        &alpha,
-        src_desc_,
-        srclayers.at(0)->data(this).gpu_data(),
-        &beta,
-        my_desc_,
-        data_.mutable_gpu_data()));
-}
-
-void CudnnSoftmaxLayer::ComputeGradient(int flag,
-    const vector<Layer*>& srclayers) {
-  const float alpha = 1.f, beta = 0.f;
-  CHECK_CUDNN(cudnnSoftmaxBackward(handle_,
-        CUDNN_SOFTMAX_ACCURATE,
-        CUDNN_SOFTMAX_MODE_INSTANCE,
-        &alpha,
-        my_desc_,
-        data_.gpu_data(),
-        my_desc_,
-        grad_.gpu_data(),
-        &beta,
-        src_desc_,
-        srclayers.at(0)->mutable_grad(this)->mutable_gpu_data()));
-}
-}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/src/neuralnet/neuron_layer/dropout.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/dropout.cc 
b/src/neuralnet/neuron_layer/dropout.cc
deleted file mode 100644
index 706b999..0000000
--- a/src/neuralnet/neuron_layer/dropout.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#include <glog/logging.h>
-#include "singa/neuralnet/neuron_layer.h"
-#include "singa/utils/singleton.h"
-#include "singa/utils/singa_op.h"
-#include "singa/utils/math_blob.h"
-
-namespace singa {
-using std::vector;
-
-void DropoutLayer::Setup(const LayerProto& conf,
-    const vector<Layer*>& srclayers) {
-  Layer::Setup(conf, srclayers);
-  data_.ReshapeLike(srclayers[0]->data(this));
-  grad_.ReshapeLike(*srclayers[0]->mutable_grad(this));
-  mask_.Reshape(srclayers[0]->data(this).shape());
-  pdrop_ = conf.dropout_conf().dropout_ratio();
-}
-
-void DropoutLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
-  // check training
-  if ((flag & kTrain) != kTrain) {
-    data_.CopyFrom(srclayers[0]->data(this));
-    return;
-  }
-
-  float pkeep = 1 - pdrop_;
-  Blob<float> rand(data_.count());
-  SampleUniform(0.0f, 1.0f, &rand);
-  Map<op::Threshold<float>, float>(pkeep, rand, &mask_);
-  // scale the mask to avoid scaling in ComputeGradient
-  Scale(1.0f / pkeep, &mask_);
-  Mult(srclayers[0]->data(this), mask_, &data_);
-}
-
-void DropoutLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers)  
{
-  Mult(grad_, mask_, srclayers[0]->mutable_grad(this));
-  // no need to mult scale as mask is scaled already.
-}
-
-}  // namespace singa
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/src/neuralnet/neuron_layer/dummy.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/dummy.cc 
b/src/neuralnet/neuron_layer/dummy.cc
deleted file mode 100644
index 9796407..0000000
--- a/src/neuralnet/neuron_layer/dummy.cc
+++ /dev/null
@@ -1,102 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#include <glog/logging.h>
-#include "singa/neuralnet/neuron_layer.h"
-#include "singa/utils/math_blob.h"
-#include "singa/utils/context.h"
-#include "singa/utils/singleton.h"
-
-namespace singa {
-
-void DummyLayer::Setup(const std::string str,
-                       const vector<Layer*>& srclayers) {
-  LayerProto conf;
-  conf.ParseFromString(str);
-  DummyLayer::Setup(conf, srclayers);
-}
-
-void DummyLayer::Setup(const LayerProto& proto,
-                       const vector<Layer*>& srclayers) {
-  NeuronLayer::Setup(proto, srclayers);
-  if (proto.dummy_conf().input()) {  // use as input layer
-    CHECK_EQ(srclayers.size(), 0);
-    input_ = true;
-    vector<int> shape;
-    for (int s : proto.dummy_conf().shape()) shape.push_back(s);
-    data_.Reshape(shape);
-    grad_.ReshapeLike(data_);
-  } else {
-    CHECK_EQ(srclayers.size(), 1);
-    data_.ReshapeLike(srclayers[0]->data(this));
-    grad_.ReshapeLike(srclayers[0]->grad(this));
-  }
-  if (proto.dummy_conf().output()) {  // use as output layer
-    output_ = true;
-  }
-}
-
-void DummyLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
-  std::uniform_real_distribution<float> dis(0, 1);
-  auto gen = Singleton<Context>::Instance()->rand_generator();
-  if (input_) {
-    // randomly init data with [0,1] values
-    for (int i = 0; i < data_.count(); ++i)
-      data_.mutable_cpu_data()[i] = dis(*gen);
-  }
-  if (srclayers.size() > 0)
-    Copy(srclayers[0]->data(this), &data_);
-}
-
-void DummyLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) {
-  std::uniform_real_distribution<float> dis(0, 1);
-  auto gen = Singleton<Context>::Instance()->rand_generator();
-  if (output_) {
-    // randomly init data with [0,1] values
-    for (int i = 0; i < data_.count(); ++i)
-      grad_.mutable_cpu_data()[i] = dis(*gen);
-  }
-  if (srclayers.size() > 0)
-    Copy(grad_, srclayers[0]->mutable_grad(this));
-}
-
-void DummyLayer::Feed(int batchsize, vector<float>& data, vector<int>& 
aux_data){
-
-    batchsize_ = batchsize;
-    // input data
-    if (data.size() > 0) {
-      int size = data.size();
-      float* ptr = data_.mutable_cpu_data();
-      for (int i = 0; i< size; i++) { 
-          ptr[i] = data.at(i);
-      }
-    }
-    // auxiliary data, e.g., label
-    if (aux_data.size() > 0) {
-      aux_data_.resize(batchsize_);
-      for (int i = 0; i< batchsize_; i++) {
-          aux_data_[i] = static_cast<int>(aux_data.at(i));
-      }
-    }
-    return;
-}
-
-}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/src/neuralnet/neuron_layer/embedding.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/embedding.cc 
b/src/neuralnet/neuron_layer/embedding.cc
deleted file mode 100644
index c980c54..0000000
--- a/src/neuralnet/neuron_layer/embedding.cc
+++ /dev/null
@@ -1,98 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#include "singa/neuralnet/neuron_layer.h"
-#include "singa/utils/math_addr.h"
-#include "singa/utils/math_blob.h"
-#include "singa/utils/singleton.h"
-#include "singa/utils/context.h"
-
-namespace singa {
-
-void EmbeddingLayer::Setup(const LayerProto& conf,
-    const vector<Layer*>& srclayers) {
-  NeuronLayer::Setup(conf, srclayers);
-  vocab_size_ = conf.embedding_conf().vocab_size();
-  feature_dim_ = conf.embedding_conf().feature_dim();
-  vocab_ = Param::Create(conf.param(0));
-  vocab_->Setup(vector<int>{vocab_size_, feature_dim_});
-  batchsize_ = srclayers.at(0)->data(unroll_index()).shape(0);
-  data_.Reshape(batchsize_, feature_dim_);
-  grad_.ReshapeLike(data_);
-}
-
-void EmbeddingLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) 
{
-  const float* word_idx = srclayers.at(0)->data(unroll_index()).cpu_data();
-  int device = Singleton<Context>::Instance()->device_id();
-  if (device == -1) {
-    const float* src = vocab_->data().cpu_data();
-    float* dst = data_.mutable_cpu_data();
-    for (int i = 0; i < batchsize_; i++) {
-      memcpy(dst + i * feature_dim_,
-          src + static_cast<int>(word_idx[i]) * feature_dim_,
-          feature_dim_ * sizeof(float));
-    }
-  } else {
-#ifdef USE_GPU
-    const float* src = vocab_->data().gpu_data();
-    float* dst = data_.mutable_gpu_data();
-    for (int i = 0; i < batchsize_; i++) {
-      cudaMemcpy(dst + i * feature_dim_,
-          src + static_cast<int>(word_idx[i]) * feature_dim_,
-          feature_dim_ * sizeof(float), cudaMemcpyDefault);
-    }
-#else
-    LOG(FATAL) << "Not implemented";
-#endif
-  }
-}
-
-void EmbeddingLayer::ComputeGradient(int flag,
-    const vector<Layer*>& srclayers) {
-  const float* word_idx = srclayers.at(0)->data(unroll_index()).cpu_data();
-  auto context = Singleton<Context>::Instance();
-  if ((flag & kAggGrad) == 0)
-    Zero(vocab_->mutable_grad());
-
-  if (context->device_id() == -1) {
-    const float* src = grad_.cpu_data();
-    float* dst = vocab_->mutable_grad()->mutable_cpu_data();
-    memset(dst, 0 , sizeof(float) * grad_.count());
-    for (int i = 0; i < batchsize_; i++) {
-      cpu_axpy(feature_dim_, 1.0f, src + i * feature_dim_,
-          dst + static_cast<int>(word_idx[i]) * feature_dim_);
-    }
-  } else {
-#ifdef USE_GPU
-    const float* src = grad_.gpu_data();
-    float* dst = vocab_->mutable_grad()->mutable_gpu_data();
-    for (int i = 0; i < batchsize_; i++) {
-      gpu_axpy(context->cublas_handle(), grad_.count(), 1.0f,
-          src + i * feature_dim_,
-          dst + static_cast<int>(word_idx[i]) * feature_dim_);
-    }
-#else
-    LOG(FATAL) << "Not implemented";
-#endif
-  }
-}
-
-}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/src/neuralnet/neuron_layer/gru.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/gru.cc 
b/src/neuralnet/neuron_layer/gru.cc
deleted file mode 100644
index 440da91..0000000
--- a/src/neuralnet/neuron_layer/gru.cc
+++ /dev/null
@@ -1,258 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#include <glog/logging.h>
-#include "singa/neuralnet/neuron_layer.h"
-#include "singa/utils/singleton.h"
-#include "singa/utils/math_blob.h"
-#include "singa/utils/singa_op.h"
-
-using namespace std;
-
-namespace singa {
-
-using std::vector;
-
-GRULayer::~GRULayer() {
-  delete weight_z_hx_;
-  delete weight_z_hh_;
-  delete bias_z_;
-
-  delete weight_r_hx_;
-  delete weight_r_hh_;
-  delete bias_r_;
-
-  delete weight_c_hx_;
-  delete weight_c_hh_;
-  delete bias_c_;
-
-  delete update_gate_;
-  delete reset_gate_;
-  delete new_memory_;
-  // delete reset_context_;
-}
-
-void GRULayer::Setup(const LayerProto& conf,
-    const vector<Layer*>& srclayers) {
-  Layer::Setup(conf, srclayers);
-  CHECK_LE(srclayers.size(), 2);
-  const auto& src = srclayers[0]->data(this);
-
-  batchsize_ = src.shape()[0];  // size of batch
-  vdim_ = src.count() / (batchsize_);  // dimension of input
-
-  hdim_ = layer_conf_.gru_conf().dim_hidden();  // dimension of hidden state
-
-  data_.Reshape(vector<int>{batchsize_, hdim_});
-  grad_.ReshapeLike(data_);
-  // one for grad from dst GRU, one for grad from upper layer
-  gradvec_.push_back(new Blob<float>(grad_.shape()));
-
-  // Initialize the parameters
-  weight_z_hx_ = Param::Create(conf.param(0));
-  weight_r_hx_ = Param::Create(conf.param(1));
-  weight_c_hx_ = Param::Create(conf.param(2));
-
-  weight_z_hh_ = Param::Create(conf.param(3));
-  weight_r_hh_ = Param::Create(conf.param(4));
-  weight_c_hh_ = Param::Create(conf.param(5));
-
-  if (conf.param_size() > 6) {
-    bias_z_ = Param::Create(conf.param(6));
-    bias_r_ = Param::Create(conf.param(7));
-    bias_c_ = Param::Create(conf.param(8));
-  }
-
-  weight_z_hx_->Setup(vector<int>{hdim_, vdim_});
-  weight_r_hx_->Setup(vector<int>{hdim_, vdim_});
-  weight_c_hx_->Setup(vector<int>{hdim_, vdim_});
-
-  weight_z_hh_->Setup(vector<int>{hdim_, hdim_});
-  weight_r_hh_->Setup(vector<int>{hdim_, hdim_});
-  weight_c_hh_->Setup(vector<int>{hdim_, hdim_});
-
-  if (conf.param_size() > 6) {
-    bias_z_->Setup(vector<int>{hdim_});
-    bias_r_->Setup(vector<int>{hdim_});
-    bias_c_->Setup(vector<int>{hdim_});
-  }
-
-  update_gate_ = new Blob<float>(batchsize_, hdim_);
-  reset_gate_ = new Blob<float>(batchsize_, hdim_);
-  new_memory_ = new Blob<float>(batchsize_, hdim_);
-}
-
-void GRULayer::ComputeFeature(int flag,
-    const vector<Layer*>& srclayers) {
-  CHECK_LE(srclayers.size(), 2);
-
-  // Do transpose
-  Blob<float> *w_z_hx_t = Transpose(weight_z_hx_->data());
-  Blob<float> *w_z_hh_t = Transpose(weight_z_hh_->data());
-  Blob<float> *w_r_hx_t = Transpose(weight_r_hx_->data());
-  Blob<float> *w_r_hh_t = Transpose(weight_r_hh_->data());
-  Blob<float> *w_c_hx_t = Transpose(weight_c_hx_->data());
-  Blob<float> *w_c_hh_t = Transpose(weight_c_hh_->data());
-
-  // Prepare the data input and the context
-  const auto& src = srclayers[0]->data(this);
-  const Blob<float> *context;
-  if (srclayers.size() == 1) {  // only have data input
-    context = new Blob<float>(batchsize_, hdim_);
-  } else {  // have data input & context
-    context = &srclayers[1]->data(this);
-  }
-
-  // Compute the update gate
-  GEMM(1.0f, 0.0f, src, *w_z_hx_t, update_gate_);
-  if (bias_z_ != nullptr)
-    MVAddRow(1.0f, 1.0f, bias_z_->data(), update_gate_);
-  GEMM(1.0f, 1.0f, *context, *w_z_hh_t, update_gate_);
-  Map<op::Sigmoid<float>, float>(*update_gate_, update_gate_);
-  // LOG(ERROR) << "Update Gate: " << update_gate_->cpu_data()[0];
-  // Compute the reset gate
-  GEMM(1.0f, 0.0f, src, *w_r_hx_t, reset_gate_);
-  if (bias_r_ != nullptr)
-    MVAddRow(1.0f, 1.0f, bias_r_->data(), reset_gate_);
-  GEMM(1.0f, 1.0f, *context, *w_r_hh_t, reset_gate_);
-  Map<op::Sigmoid<float>, float>(*reset_gate_, reset_gate_);
-  // LOG(ERROR) << "Reset Gate: " << reset_gate_->cpu_data()[0];
-  // Compute the new memory
-  GEMM(1.0f, 0.0f, *context, *w_c_hh_t, new_memory_);
-  Mult<float>(*reset_gate_, *new_memory_, new_memory_);
-  GEMM(1.0f, 1.0f, src, *w_c_hx_t, new_memory_);
-  if (bias_c_ != nullptr)
-    MVAddRow(1.0f, 1.0f, bias_c_->data(), new_memory_);
-  Map<op::Tanh<float>, float>(*new_memory_, new_memory_);
-
-  Sub(*context, *new_memory_, &data_);
-  Mult(data_, *update_gate_, &data_);
-  Add(data_, *new_memory_, &data_);
-
-  // delete the pointers
-  if (srclayers.size() == 1)
-    delete context;
-
-  delete w_z_hx_t;
-  delete w_z_hh_t;
-  delete w_r_hx_t;
-  delete w_r_hh_t;
-  delete w_c_hx_t;
-  delete w_c_hh_t;
-}
-
-void GRULayer::ComputeGradient(int flag,
-    const vector<Layer*>& srclayers) {
-  CHECK_LE(srclayers.size(), 2);
-  // agg grad from two dst layers, gradvec_[0] is grad_
-  AXPY(1.0f, *gradvec_[1], &grad_);
-  float beta = 1.0f;  // agg param gradients
-
-  Layer* ilayer = srclayers[0];  // input layer
-  Layer* clayer = nullptr;  // context layer
-  // Prepare the data input and the context
-  const Blob<float>& src = ilayer->data(this);
-  const Blob<float> *context;
-  if (srclayers.size() == 1) {  // only have data input
-    context = new Blob<float>(batchsize_, hdim_);
-  } else {  // have data input & context
-    clayer = srclayers[1];
-    context = &(clayer->data(this));
-  }
-
-  // Compute intermediate gradients which are used for other computations
-  Blob<float> dugatedz(batchsize_, hdim_);
-  Map<singa::op::SigmoidGrad<float>, float>(*update_gate_, &dugatedz);
-  Blob<float> drgatedr(batchsize_, hdim_);
-  Map<singa::op::SigmoidGrad<float>, float>(*reset_gate_, &drgatedr);
-  Blob<float> dnewmdc(batchsize_, hdim_);
-  Map<singa::op::TanhGrad<float>, float>(*new_memory_, &dnewmdc);
-
-  Blob<float> dLdz(batchsize_, hdim_);
-  Sub<float>(*context, *new_memory_, &dLdz);
-  Mult<float>(dLdz, grad_, &dLdz);
-  Mult<float>(dLdz, dugatedz, &dLdz);
-
-  Blob<float> dLdc(batchsize_, hdim_);
-  Blob<float> z1(batchsize_, hdim_);
-  z1.SetValue(1.0f);
-  AXPY<float>(-1.0f, *update_gate_, &z1);
-  Mult(grad_, z1, &dLdc);
-  Mult(dLdc, dnewmdc, &dLdc);
-
-  Blob<float> reset_dLdc(batchsize_, hdim_);
-  Mult(dLdc, *reset_gate_, &reset_dLdc);
-
-  Blob<float> dLdr(batchsize_, hdim_);
-  Blob<float> cprev(batchsize_, hdim_);
-  GEMM(1.0f, 0.0f, *context, weight_c_hh_->data().T(), &cprev);
-  Mult(dLdc, cprev, &dLdr);
-  Mult(dLdr, drgatedr, &dLdr);
-
-  // Compute gradients for parameters of update gate
-  Blob<float> *dLdz_t = Transpose(dLdz);
-  GEMM(1.0f, beta, *dLdz_t, src, weight_z_hx_->mutable_grad());
-  GEMM(1.0f, beta, *dLdz_t, *context, weight_z_hh_->mutable_grad());
-  if (bias_z_ != nullptr)
-    MVSumRow<float>(1.0f, beta, dLdz, bias_z_->mutable_grad());
-  delete dLdz_t;
-
-  // Compute gradients for parameters of reset gate
-  Blob<float> *dLdr_t = Transpose(dLdr);
-  GEMM(1.0f, beta, *dLdr_t, src, weight_r_hx_->mutable_grad());
-  GEMM(1.0f, beta, *dLdr_t, *context, weight_r_hh_->mutable_grad());
-  if (bias_r_ != nullptr)
-    MVSumRow(1.0f, beta, dLdr, bias_r_->mutable_grad());
-  delete dLdr_t;
-
-  // Compute gradients for parameters of new memory
-  Blob<float> *dLdc_t = Transpose(dLdc);
-  GEMM(1.0f, beta, *dLdc_t, src, weight_c_hx_->mutable_grad());
-  if (bias_c_ != nullptr)
-    MVSumRow(1.0f, beta, dLdc, bias_c_->mutable_grad());
-  delete dLdc_t;
-
-  Blob<float> *reset_dLdc_t = Transpose(reset_dLdc);
-  GEMM(1.0f, beta, *reset_dLdc_t, *context, weight_c_hh_->mutable_grad());
-  delete reset_dLdc_t;
-
-  // Compute gradients for data input layer
-  if (srclayers[0]->mutable_grad(this) != nullptr) {
-    GEMM(1.0f, 0.0f, dLdc, weight_c_hx_->data(), ilayer->mutable_grad(this));
-    GEMM(1.0f, 1.0f, dLdz, weight_z_hx_->data(), ilayer->mutable_grad(this));
-    GEMM(1.0f, 1.0f, dLdr, weight_r_hx_->data(), ilayer->mutable_grad(this));
-  }
-
-  if (clayer != nullptr && clayer->mutable_grad(this) != nullptr) {
-    // Compute gradients for context layer
-    GEMM(1.0f, 0.0f, reset_dLdc, weight_c_hh_->data(),
-        clayer->mutable_grad(this));
-    GEMM(1.0f, 1.0f, dLdr, weight_r_hh_->data(), clayer->mutable_grad(this));
-    GEMM(1.0f, 1.0f, dLdz, weight_z_hh_->data(), clayer->mutable_grad(this));
-    Add(clayer->grad(this), *update_gate_, clayer->mutable_grad(this));
-    // LOG(ERROR) << "grad to prev gru " << Asum(clayer->grad(this));
-  }
-
-  if (srclayers.size() == 1)
-    delete context;
-}
-
-}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/src/neuralnet/neuron_layer/inner_product.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/inner_product.cc 
b/src/neuralnet/neuron_layer/inner_product.cc
deleted file mode 100644
index a7378a2..0000000
--- a/src/neuralnet/neuron_layer/inner_product.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#include <glog/logging.h>
-#include "singa/neuralnet/neuron_layer.h"
-#include "singa/utils/singleton.h"
-#include "singa/utils/math_blob.h"
-
-namespace singa {
-
-using std::vector;
-
-InnerProductLayer::~InnerProductLayer() {
-  delete weight_;
-  delete bias_;
-}
-
-void InnerProductLayer::Setup(const LayerProto& conf,
-    const vector<Layer*>& srclayers) {
-  Layer::Setup(conf, srclayers);
-  CHECK_EQ(srclayers.size(), 1);
-  const auto& src = srclayers[0]->data(this);
-  batchsize_ = src.shape()[0];
-  vdim_ = src.count() / batchsize_;
-  hdim_ = layer_conf_.innerproduct_conf().num_output();
-  transpose_ = conf.innerproduct_conf().transpose();
-  if (partition_dim() > 0)
-    hdim_ /= srclayers.at(0)->num_partitions();
-  data_.Reshape(vector<int>{batchsize_, hdim_});
-  grad_.ReshapeLike(data_);
-  weight_ = Param::Create(conf.param(0));
-  bias_ = Param::Create(conf.param(1));
-  if (transpose_)
-    weight_->Setup(vector<int>{vdim_, hdim_});
-  else
-    weight_->Setup(vector<int>{hdim_, vdim_});
-  bias_->Setup(vector<int>{hdim_});
-}
-
-void InnerProductLayer::ComputeFeature(int flag,
-    const vector<Layer*>& srclayers) {
-  if (transpose_)
-    MMDot(srclayers[0]->data(this), weight_->data(), &data_);
-  else
-    MMDot(srclayers[0]->data(this), weight_->data().T(), &data_);
-  MVAddRow(bias_->data(), &data_);
-}
-
-void InnerProductLayer::ComputeGradient(int flag,
-    const vector<Layer*>& srclayers) {
-  float beta = 0.0f;
-  if (flag & kAggGrad)
-    beta = 1.0f;
-  MVSumRow(1.0f, beta, grad_, bias_->mutable_grad());
-  if (transpose_)
-    GEMM(1.0f, beta, srclayers[0]->data(this).T(), grad_,
-        weight_->mutable_grad());
-  else
-    GEMM(1.0f, beta, grad_.T(), srclayers[0]->data(this),
-        weight_->mutable_grad());
-
-  if (srclayers[0]->mutable_grad(this) != nullptr) {
-    if (transpose_)
-      MMDot(grad_, weight_->data().T(), srclayers[0]->mutable_grad(this));
-    else
-      MMDot(grad_, weight_->data(), srclayers[0]->mutable_grad(this));
-  }
-  //clee auto w = weight_->mutable_cpu_data();
-  //LOG(ERROR) << srclayers[0]->name() << " " << w[0];
-}
-}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/src/neuralnet/neuron_layer/lrn.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/lrn.cc 
b/src/neuralnet/neuron_layer/lrn.cc
deleted file mode 100644
index b199b9a..0000000
--- a/src/neuralnet/neuron_layer/lrn.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#include <glog/logging.h>
-#include "singa/neuralnet/neuron_layer.h"
-#include "singa/utils/singleton.h"
-
-
-namespace singa {
-
-using std::vector;
-
-void LRNLayer::Setup(const LayerProto& conf, const vector<Layer*>& srclayers) {
-  Layer::Setup(conf, srclayers);
-  CHECK_EQ(srclayers.size(), 1);
-  lsize_ = conf.lrn_conf().local_size();
-  CHECK_EQ(lsize_ % 2, 1) << "LRN only supports odd values for Localvol";
-  knorm_ = conf.lrn_conf().knorm();
-  alpha_ = conf.lrn_conf().alpha();
-  beta_ = conf.lrn_conf().beta();
-  const vector<int>& s = srclayers[0]->data(this).shape();
-  data_.Reshape(s);
-  grad_.Reshape(s);
-  norm_.Reshape(s);
-  batchsize_ = s[0];
-  channels_ = s[1];
-  height_ = s[2];
-  width_ = s[3];
-}
-
-void LRNLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
-  const float salpha = alpha_ / lsize_;
-  auto src = Tensor4(srclayers[0]->mutable_data(this));
-  auto data = Tensor4(&data_);
-  auto norm = Tensor4(&norm_);
-  // stores normalizer without power
-  norm = expr::chpool<red::sum>(expr::F<op::square>(src), lsize_) * salpha
-    + knorm_;
-  data = src * expr::F<op::power>(norm, -beta_);
-}
-
-void LRNLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) {
-  const float salpha = alpha_ / lsize_;
-  auto src = Tensor4(srclayers[0]->mutable_data(this));
-  auto norm = Tensor4(&norm_);
-  auto grad = Tensor4(&grad_);
-  auto gsrc = Tensor4(srclayers[0]->mutable_grad(this));
-
-  gsrc = grad * expr::F<op::power>(norm, -beta_);
-  Tensor<cpu, 4> tmp(gsrc.shape);
-  AllocSpace(tmp);
-  tmp = gsrc * src / norm;
-  gsrc += (- 2.0f * beta_ * salpha) * expr::chpool<red::sum>(tmp, lsize_) * 
src;
-  FreeSpace(tmp);
-}
-
-}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/src/neuralnet/neuron_layer/pooling.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/pooling.cc 
b/src/neuralnet/neuron_layer/pooling.cc
deleted file mode 100644
index 4eda2e4..0000000
--- a/src/neuralnet/neuron_layer/pooling.cc
+++ /dev/null
@@ -1,146 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#include <glog/logging.h>
-#include "singa/neuralnet/neuron_layer.h"
-#include "singa/utils/singleton.h"
-
-
-namespace singa {
-
-using std::vector;
-
-/******************** Implementation for PoolingLayer******************/
-void PoolingLayer::Setup(const LayerProto& conf,
-    const vector<Layer*>& srclayers) {
-  Layer::Setup(conf, srclayers);
-  CHECK_EQ(srclayers.size(), 1);
-  PoolingProto pool_conf = conf.pooling_conf();
-  if (pool_conf.has_kernel()) {
-    kernel_x_ = kernel_y_ = pool_conf.kernel();
-  } else {
-    kernel_x_ = pool_conf.kernel_x();
-    kernel_y_ = pool_conf.kernel_y();
-  }
-  CHECK_NE(kernel_x_, 0);
-  CHECK_NE(kernel_y_, 0);
-
-  if (pool_conf.has_pad()) {
-    pad_x_ = pad_y_ = pool_conf.pad();
-  } else {
-    pad_x_ = pool_conf.pad_x();
-    pad_y_ = pool_conf.pad_y();
-  }
-
-  if (pool_conf.has_stride()) {
-    stride_x_ = stride_y_ = pool_conf.stride();
-  } else {
-    stride_x_ = pool_conf.stride_x();
-    stride_y_ = pool_conf.stride_y();
-  }
-
-  pool_ = conf.pooling_conf().pool();
-  CHECK(pool_ == PoolingProto_PoolMethod_AVG
-        || pool_ == PoolingProto_PoolMethod_MAX)
-        << "Padding implemented only for average and max pooling.";
-  const auto& srcshape = srclayers[0]->data(this).shape();
-  int dim = srcshape.size();
-  CHECK_GT(dim, 2);
-  width_ = srcshape[dim - 1];
-  height_ = srcshape[dim - 2];
-  if (dim > 3)
-    channels_ = srcshape[dim-3];
-  else
-    channels_ = 1;
-  batchsize_ = srcshape[0];
-  pooled_height_ = static_cast<int>(
-      (height_ + 2 * pad_y_- kernel_y_) / stride_y_) + 1;
-  pooled_width_ = static_cast<int>(
-      (width_ + 2* pad_x_ - kernel_x_) / stride_x_) + 1;
-  data_.Reshape(vector<int>{batchsize_, channels_, pooled_height_,
-                            pooled_width_});
-  grad_.ReshapeLike(data_);
-}
-
-void PoolingLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
-  auto src = Tensor4(srclayers[0]->mutable_data(this));
-  auto data = Tensor4(&data_);
-  if (pool_ == PoolingProto_PoolMethod_MAX)
-    data = expr::pool<red::maximum>(src, kernel_x_, stride_x_);
-  else if (pool_ == PoolingProto_PoolMethod_AVG)
-    data = expr::pool<red::sum>(src, kernel_x_, stride_x_)
-      * (1.0f / (kernel_x_ * kernel_x_));
-}
-
-/*
- * partition only on num/channel dim
- * assume grad and data have the same paritition
- */
-void PoolingLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) {
-  auto src = Tensor4(srclayers[0]->mutable_data(this));
-  auto gsrc = Tensor4(srclayers[0]->mutable_grad(this));
-  auto data = Tensor4(&data_);
-  auto grad = Tensor4(&grad_);
-  if (pool_ == PoolingProto_PoolMethod_MAX)
-    gsrc = expr::unpool<red::maximum>(src, data, grad, kernel_x_, stride_x_);
-  else if (pool_ == PoolingProto_PoolMethod_AVG)
-    gsrc = expr::unpool<red::sum>(src, data, grad, kernel_x_, stride_x_)
-           * (1.0f / (kernel_x_ * kernel_x_));
-}
-
-/***************** Implementation of CPoolingLayer ***************/
-
-void CPoolingLayer::Setup(const LayerProto& conf,
-    const vector<Layer*>& srclayers) {
-  PoolingLayer::Setup(conf, srclayers);
-  if (pool_ == PoolingProto_PoolMethod_MAX)
-      mask_.ReshapeLike(data_);
-}
-void CPoolingLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
-  if (pool_ == PoolingProto_PoolMethod_MAX)
-    ForwardMaxPooling(srclayers[0]->mutable_data(this)->mutable_cpu_data(),
-        batchsize_, channels_, height_, width_, kernel_y_, kernel_x_,
-        pad_y_, pad_y_, stride_y_, stride_x_,
-        data_.mutable_cpu_data(), mask_.mutable_cpu_data());
-  else if (pool_ == PoolingProto_PoolMethod_AVG)
-    ForwardAvgPooling(srclayers[0]->mutable_data(this)->mutable_cpu_data(),
-        batchsize_, channels_, height_, width_, kernel_y_, kernel_x_,
-        pad_y_, pad_x_, stride_y_, stride_y_, data_.mutable_cpu_data());
-  else
-    LOG(FATAL) << "unknow pooling method";
-}
-
-void CPoolingLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) 
{
-  if (pool_ == PoolingProto_PoolMethod_MAX)
-    BackwardMaxPooling(grad_.cpu_data(), mask_.cpu_data(), batchsize_,
-        channels_, height_, width_, kernel_y_, kernel_x_, pad_y_, pad_x_,
-        stride_y_, stride_y_,
-        srclayers[0]->mutable_grad(this)->mutable_cpu_data());
-  else if (pool_ == PoolingProto_PoolMethod_AVG)
-    BackwardAvgPooling(grad_.cpu_data(), batchsize_,
-        channels_, height_, width_, kernel_y_, kernel_x_, pad_y_, pad_x_,
-        stride_y_, stride_x_,
-        srclayers[0]->mutable_grad(this)->mutable_cpu_data());
-  else
-    LOG(FATAL) << "unknow pooling method";
-}
-
-}  //  namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/src/neuralnet/neuron_layer/rbm.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/rbm.cc 
b/src/neuralnet/neuron_layer/rbm.cc
deleted file mode 100644
index 67d0922..0000000
--- a/src/neuralnet/neuron_layer/rbm.cc
+++ /dev/null
@@ -1,200 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#include <glog/logging.h>
-#include "singa/neuralnet/neuron_layer.h"
-#include "singa/utils/singleton.h"
-
-namespace singa {
-
-using std::vector;
-
-/**************** Implementation for RBMLayer********************/
-Blob<float>* RBMLayer::Sample(int flag) {
-  Tensor<cpu, 2> sample, data;
-  if ((flag & kPositive) == kPositive || first_gibbs_) {
-    data = Tensor2(&pos_data_);
-    sample = Tensor2(&pos_sample_);
-  } else {
-    data = Tensor2(&neg_data_);
-    sample = Tensor2(&neg_sample_);
-  }
-  auto random = TSingleton<Random<cpu>>::Instance();
-  if (gaussian_) {
-    random->SampleGaussian(sample, 0.0f, 1.0f);
-    sample += data;
-  } else {
-    random->SampleBinary(sample, data);
-  }
-  return (flag & kPositive) == kPositive || first_gibbs_ ?
-    &pos_sample_ : &neg_sample_;
-}
-void RBMLayer::Setup(const LayerProto& conf, const vector<Layer*>& srclayers) {
-  Layer::Setup(conf, srclayers);
-  hdim_ = conf.rbm_conf().hdim();
-  gaussian_ = conf.rbm_conf().gaussian();
-  first_gibbs_ = true;
-  datavec_.clear();
-  datavec_.push_back(&pos_data_);
-  datavec_.push_back(&neg_data_);
-  datavec_.push_back(&neg_sample_);
-  datavec_.push_back(&pos_sample_);
-  gradvec_.resize(4);
-}
-/**************** Implementation for RBMVisLayer********************/
-RBMVisLayer::~RBMVisLayer() {
-  delete weight_;
-  delete bias_;
-}
-
-void RBMVisLayer::Setup(const LayerProto& conf,
-    const vector<Layer*>& srclayers) {
-  CHECK_EQ(srclayers.size(), 2);
-  RBMLayer::Setup(conf, srclayers);
-  CHECK_EQ(srclayers.size(), 2);
-  hid_layer_ = nullptr;
-  for (auto src : srclayers) {
-    if (typeid(*src) == typeid(RBMHidLayer)) {
-      // note the hid layer has may not been set up.
-      CHECK(hid_layer_ == nullptr);
-      hid_layer_ = dynamic_cast<RBMHidLayer*>(src);
-    }
-  }
-  input_layer_ = srclayers[0] != hid_layer_ ? srclayers[0]: srclayers[1];
-  const auto& src = input_layer_->data(this);
-  batchsize_ = src.shape()[0];
-  pos_data_.ReshapeLike(src);
-  neg_data_.ReshapeLike(pos_data_);
-  neg_sample_.ReshapeLike(pos_data_);
-  vdim_ = src.count() / batchsize_;
-  weight_ = Param::Create(conf.param(0));
-  weight_ ->Setup(vector<int>{hdim_, vdim_});
-  bias_ = Param::Create(conf.param(1));
-  bias_->Setup(vector<int>{vdim_});
-}
-
-void RBMVisLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
-  if ((flag & kPositive) == kPositive) {
-    pos_data_.CopyFrom(input_layer_->data(this), true);
-    first_gibbs_ = true;
-  } else if ((flag & kNegative) == kNegative) {
-    // fetch sampling results from hidden layer
-    auto hid_sample = Tensor2(hid_layer_->Sample(flag));
-    auto data = Tensor2(&neg_data_);
-    auto weight = Tensor2(weight_->mutable_data());
-    auto bias = Tensor1(bias_->mutable_data());
-    data = dot(hid_sample, weight);
-    data += expr::repmat(bias, batchsize_);
-    data = expr::F<op::sigmoid>(data);
-    if ((flag & kTest) == kTest) {
-      const float *dptr = pos_data_.cpu_data(), *rcns = neg_data_.cpu_data();
-      float err = 0.f;
-      for (int i = 0; i < pos_data_.count(); i++) {
-        err += (dptr[i] - rcns[i]) * (dptr[i] - rcns[i]);
-      }
-      error_ += err / batchsize_;
-    }
-    first_gibbs_ = false;
-  }
-  counter_ += 1;
-}
-
-void RBMVisLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) {
-  auto vis_pos = Tensor2(&pos_data_);
-  auto vis_neg = Tensor2(&neg_data_);
-  auto hid_pos = Tensor2(hid_layer_->mutable_data(0));
-  auto hid_neg = Tensor2(hid_layer_->mutable_data(1));
-
-  auto gbias = Tensor1(bias_->mutable_grad());
-  gbias = expr::sum_rows(vis_neg);
-  gbias -= expr::sum_rows(vis_pos);
-  gbias /= batchsize_;
-
-  auto gweight = Tensor2(weight_->mutable_grad());
-  gweight = dot(hid_neg.T(), vis_neg);
-  gweight -= dot(hid_pos.T(), vis_pos);
-  gweight /= batchsize_;
-}
-const std::string RBMVisLayer::ToString(bool debug, int flag) {
-  if (debug)
-    return Layer::ToString(debug, flag);
-
-  string disp = "Squared Error = " + std::to_string(error_ / counter_);
-  counter_ = 0;
-  error_ = 0;
-  return disp;
-}
-/**************** Implementation for RBMHidLayer********************/
-RBMHidLayer::~RBMHidLayer() {
-  delete weight_;
-  delete bias_;
-}
-
-void RBMHidLayer::Setup(const LayerProto& conf,
-      const vector<Layer*>& srclayers) {
-  RBMLayer::Setup(conf, srclayers);
-  CHECK_EQ(srclayers.size(), 1);
-  const auto& src_data = srclayers[0]->data(0);
-  batchsize_ = src_data.shape()[0];
-  vdim_ = src_data.count() / batchsize_;
-  pos_data_.Reshape(vector<int>{batchsize_, hdim_});
-  neg_data_.ReshapeLike(pos_data_);
-  pos_sample_.ReshapeLike(pos_data_);
-  neg_sample_.ReshapeLike(pos_data_);
-  weight_ = Param::Create(conf.param(0));
-  weight_->Setup(vector<int>{hdim_, vdim_});
-  bias_ = Param::Create(conf.param(1));
-  bias_->Setup(vector<int>{hdim_});
-  vis_layer_ = dynamic_cast<RBMVisLayer*> (srclayers[0]);
-}
-
-void RBMHidLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
-  auto weight = Tensor2(weight_->mutable_data());
-  auto bias = Tensor1(bias_->mutable_data());
-
-  Tensor<cpu, 2> data, src;
-  if ((flag & kPositive) == kPositive) {
-    data = Tensor2(&pos_data_);
-    src = Tensor2(vis_layer_->mutable_data(0));
-    first_gibbs_ = true;
-  } else {
-    data = Tensor2(&neg_data_);
-    // hinton's science paper does not sample the vis layer
-    src = Tensor2(vis_layer_->mutable_data(1));
-    first_gibbs_ = false;
-  }
-  data = dot(src, weight.T());
-  data += expr::repmat(bias, batchsize_);
-
-  if (!gaussian_)
-    data = expr::F<op::sigmoid>(data);
-}
-
-void RBMHidLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) {
-  auto hid_pos = Tensor2(&pos_data_);
-  auto hid_neg = Tensor2(&neg_data_);
-  auto gbias = Tensor1(bias_->mutable_grad());
-  gbias = expr::sum_rows(hid_neg);
-  gbias -= expr::sum_rows(hid_pos);
-  gbias /= batchsize_;
-}
-
-}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/src/neuralnet/neuron_layer/relu.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/relu.cc 
b/src/neuralnet/neuron_layer/relu.cc
deleted file mode 100644
index 5d4d954..0000000
--- a/src/neuralnet/neuron_layer/relu.cc
+++ /dev/null
@@ -1,51 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#include <glog/logging.h>
-#include "singa/neuralnet/neuron_layer.h"
-#include "singa/utils/singleton.h"
-
-
-namespace singa {
-
-using std::vector;
-
-void ReLULayer::Setup(const LayerProto& conf,
-    const vector<Layer*>& srclayers) {
-  Layer::Setup(conf, srclayers);
-  data_.ReshapeLike(srclayers[0]->data(this));
-  grad_.ReshapeLike(*(srclayers[0]->mutable_grad(this)));
-}
-
-void ReLULayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
-  auto data = Tensor1(&data_);
-  auto src = Tensor1(srclayers[0]->mutable_data(this));
-  data = expr::F<op::relu>(src);
-}
-
-void ReLULayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) {
-  auto data = Tensor1(&data_);
-  auto grad = Tensor1(&grad_);
-  auto gsrc = Tensor1(srclayers[0]->mutable_grad(this));
-  gsrc = expr::F<op::relu_grad>(data)*grad;
-}
-
-}  //  namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/src/neuralnet/neuron_layer/sigmoid.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/sigmoid.cc 
b/src/neuralnet/neuron_layer/sigmoid.cc
deleted file mode 100644
index 9348011..0000000
--- a/src/neuralnet/neuron_layer/sigmoid.cc
+++ /dev/null
@@ -1,51 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#include <glog/logging.h>
-#include "singa/neuralnet/neuron_layer.h"
-#include "singa/utils/singleton.h"
-
-
-namespace singa {
-
-using std::vector;
-
-void SigmoidLayer::Setup(const LayerProto& conf,
-    const vector<Layer*>& srclayers) {
-  Layer::Setup(conf, srclayers);
-  data_.ReshapeLike(srclayers[0]->data(this));
-  grad_.ReshapeLike(srclayers[0]->grad(this));
-}
-
-void SigmoidLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
-  auto data = Tensor1(&data_);
-  auto src = Tensor1(srclayers[0]->mutable_data(this));
-  data = expr::F<op::sigmoid>(src);
-}
-
-void SigmoidLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) {
-  auto data = Tensor1(&data_);
-  auto grad = Tensor1(&grad_);
-  auto gsrc = Tensor1(srclayers[0]->mutable_grad(this));
-  gsrc = expr::F<op::sigmoid_grad>(data) * grad;
-}
-
-}  //  namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/src/neuralnet/neuron_layer/softmax.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/softmax.cc 
b/src/neuralnet/neuron_layer/softmax.cc
deleted file mode 100644
index 4a09241..0000000
--- a/src/neuralnet/neuron_layer/softmax.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#include "singa/neuralnet/neuron_layer.h"
-
-namespace singa {
-
-using namespace mshadow;
-using mshadow::cpu;
-
-using mshadow::Shape;
-using mshadow::Shape1;
-using mshadow::Shape2;
-using mshadow::Tensor;
-
-using std::vector;
-
-void SoftmaxLayer::Setup(const LayerProto& proto,
-    const vector<Layer*>& srclayers) {
-  CHECK_EQ(srclayers.size(), 1);
-  NeuronLayer::Setup(proto, srclayers);
-  const auto& srcdata = srclayers[0]->data(this);
-  batchsize_ = srcdata.shape()[0];
-  dim_ = srcdata.count() / batchsize_;
-  /*
-  num_softmax_per_instance_ = proto.softmax_conf().num_softmax_per_instance();
-  count_per_softmax_ = srcdata.count() / batchsize_ / 
num_softmax_per_instance_;
-  */
-  data_.Reshape(batchsize_, dim_);
-  grad_.ReshapeLike(data_);
-}
-
-void SoftmaxLayer::ComputeFeature(int flag,
-    const vector<Layer*>& srclayers) {
-  int dim = data_.count() / batchsize_;
-  Shape<2> s = Shape2(batchsize_, dim);
-  Tensor<cpu, 2> prob(data_.mutable_cpu_data(), s);
-  Tensor<cpu, 2> src(srclayers[0]->mutable_data(this)->mutable_cpu_data(), s);
-  Softmax(prob, src);
-}
-
-void SoftmaxLayer::ComputeGradient(int flag,
-    const vector<Layer*>& srclayers) {
-  int batchsize = data_.shape()[0];
-  LOG(FATAL) << "not implemented";
-  for (int n = 0; n < batchsize; n++) {
-    // TODO(wangwei) finish the code using new math API
-    // gxi=[(gyi+gyi*yi)-\sum_k(gyk*yk)]*yi
-  }
-}
-
-}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/src/neuralnet/neuron_layer/stanh.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/stanh.cc 
b/src/neuralnet/neuron_layer/stanh.cc
deleted file mode 100644
index 70b9cd1..0000000
--- a/src/neuralnet/neuron_layer/stanh.cc
+++ /dev/null
@@ -1,48 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#include "singa/neuralnet/neuron_layer.h"
-
-namespace singa {
-
-using std::vector;
-
-void STanhLayer::Setup(const LayerProto& conf,
-    const vector<Layer*>& srclayers) {
-  Layer::Setup(conf, srclayers);
-  data_.ReshapeLike(srclayers[0]->data(this));
-  grad_.ReshapeLike(srclayers[0]->grad(this));
-}
-
-void STanhLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
-  auto data = Tensor1(&data_);
-  auto src = Tensor1(srclayers[0]->mutable_data(this));
-  data = expr::F<op::stanh>(src);
-}
-
-void STanhLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) {
-  auto data = Tensor1(&data_);
-  auto grad = Tensor1(&grad_);
-  auto gsrc = Tensor1(srclayers[0]->mutable_grad(this));
-  gsrc = expr::F<op::stanh_grad>(data) * grad;
-}
-
-}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/src/neuralnet/output_layer/accuracy.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/output_layer/accuracy.cc 
b/src/neuralnet/output_layer/accuracy.cc
deleted file mode 100644
index 53a9406..0000000
--- a/src/neuralnet/output_layer/accuracy.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#include <algorithm>
-#include "singa/neuralnet/output_layer.h"
-
-namespace singa {
-
-void AccuracyLayer::Setup(const LayerProto& proto,
-    const vector<Layer*>& srclayers) {
-  CHECK_EQ(srclayers.size(), 2);
-  ArgSortLayer::Setup(proto, vector<Layer*>{srclayers.at(0)});
-}
-
-void AccuracyLayer::ComputeFeature(int flag,
-    const vector<Layer*>& srclayers) {
-  ArgSortLayer::ComputeFeature(flag, vector<Layer*>{srclayers.at(0)});
-  const auto& label = srclayers[1]->aux_data(this);
-  int ncorrect = 0;
-  for (int n = 0; n < batchsize_; n++) {
-    const float* pos = data_.cpu_data() + topk_ * n;
-    // check if true label is in top k predictions
-    for (int k = 0; k < topk_; k++) {
-      if (pos[k] == label[n]) {
-        ncorrect++;
-        break;
-      }
-    }
-  }
-  accuracy_ += ncorrect * 1.0f / batchsize_;
-  counter_++;
-}
-
-const std::string AccuracyLayer::ToString(bool debug, int flag) {
-  if (debug)
-    return Layer::ToString(debug, flag);
-
-  string disp = "accuracy = " + std::to_string(accuracy_ / counter_);
-  counter_ = 0;
-  accuracy_ = 0;
-  return disp;
-}
-}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/src/neuralnet/output_layer/argsort.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/output_layer/argsort.cc 
b/src/neuralnet/output_layer/argsort.cc
deleted file mode 100644
index 869bc65..0000000
--- a/src/neuralnet/output_layer/argsort.cc
+++ /dev/null
@@ -1,56 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#include <algorithm>
-#include "singa/neuralnet/output_layer.h"
-
-namespace singa {
-
-void ArgSortLayer::Setup(const LayerProto& proto,
-    const vector<Layer*>& srclayers) {
-  CHECK_EQ(srclayers.size(), 1);
-  OutputLayer::Setup(proto, srclayers);
-  batchsize_ = srclayers[0]->data(this).shape()[0];
-  dim_ = srclayers[0]->data(this).count() / batchsize_;
-  topk_ = proto.argsort_conf().topk();
-  data_.Reshape(vector<int>{batchsize_, topk_});
-}
-
-void ArgSortLayer::ComputeFeature(int flag,
-    const vector<Layer*>& srclayers) {
-  // TODO(wangwei) check flag to ensure it is not called in training phase
-  const float* srcptr = srclayers.at(0)->data(this).cpu_data();
-  float* ptr = data_.mutable_cpu_data();
-  for (int n = 0; n < batchsize_; n++) {
-    vector<std::pair<float, int> > vec;
-    for (int j = 0; j < dim_; ++j)
-      vec.push_back(std::make_pair(srcptr[j], j));
-    std::partial_sort(vec.begin(), vec.begin() + topk_, vec.end(),
-                      std::greater<std::pair<float, int> >());
-
-    for (int j = 0; j < topk_; ++j)
-      ptr[j] = static_cast<float> (vec.at(j).second);
-    ptr += topk_;
-    srcptr += dim_;
-  }
-}
-
-}  // namespace singa


Reply via email to