[5/7] incubator-singa git commit: SINGA-21 Code review 4

wangwei Fri, 04 Sep 2015 03:09:22 -0700

SINGA-21 Code review 4

categorize all layers into 5 types: input, loss, output, neuron and
connection layers.


remove all is_xxlayer functions. Worker::TrainOneBatch() and
Worker::TestOneBatch() have to check the layer type using other methods
(e.g., typeinfo from typeid()). A function like type_id() may be added to
return the type ID of a specific layer (which can be overridden by users).

tested with example cnn, mlp, and rbm models.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/53de92b7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/53de92b7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/53de92b7

Branch: refs/heads/master
Commit: 53de92b7fdf7e6eac58fe86663738377bedfbd36
Parents: 134c891
Author: Wei Wang <[email protected]>
Authored: Thu Aug 27 22:01:39 2015 +0800
Committer: wangwei <[email protected]>
Committed: Wed Sep 2 18:42:23 2015 +0800

----------------------------------------------------------------------
 examples/cifar10/job.conf            |    1 -
 examples/mnist/job.conf              |   35 +-
 include/neuralnet/base_layer.h       |  445 -------------
 include/neuralnet/connection_layer.h |  125 ++++
 include/neuralnet/input_layer.h      |  170 +++++
 include/neuralnet/layer.h            |  490 +++++---------
 include/neuralnet/loss_layer.h       |   46 ++
 include/neuralnet/neuralnet.h        |    3 +
 include/neuralnet/neuron_layer.h     |  206 ++++++
 include/neuralnet/optional_layer.h   |   34 -
 include/neuralnet/output_layer.h     |    4 +
 include/utils/data_shard.h           |   18 +-
 src/driver.cc                        |    2 +-
 src/neuralnet/base_layer.cc          |  128 ----
 src/neuralnet/connection_layer.cc    |  107 +++
 src/neuralnet/input_layer.cc         |  346 ++++++++++
 src/neuralnet/layer.cc               | 1033 +----------------------------
 src/neuralnet/loss_layer.cc          |  103 +++
 src/neuralnet/neuralnet.cc           |    2 +
 src/neuralnet/neuron_layer.cc        |  540 +++++++++++++++
 src/neuralnet/optional_layer.cc      |  112 ----
 src/neuralnet/output_layer.cc        |    6 +
 src/proto/job.proto                  |   12 +-
 src/trainer/server.cc                |    2 +-
 src/trainer/worker.cc                |   45 +-
 src/utils/param.cc                   |    4 -
 26 files changed, 1893 insertions(+), 2126 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/53de92b7/examples/cifar10/job.conf
----------------------------------------------------------------------
diff --git a/examples/cifar10/job.conf b/examples/cifar10/job.conf
index 9d25904..0fdd244 100644
--- a/examples/cifar10/job.conf
+++ b/examples/cifar10/job.conf
@@ -3,7 +3,6 @@ train_steps: 1000
 test_steps: 100
 test_freq:300
 disp_freq:30
-debug: true
 train_one_batch {
   alg: kBP
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/53de92b7/examples/mnist/job.conf
----------------------------------------------------------------------
diff --git a/examples/mnist/job.conf b/examples/mnist/job.conf
index b8d14e8..6d02561 100644
--- a/examples/mnist/job.conf
+++ b/examples/mnist/job.conf
@@ -83,11 +83,7 @@ neuralnet {
 
   layer{
     name: "tanh1"
-    type: kTanh
-    tanh_conf {
-      outer_scale: 1.7159047
-      inner_scale: 0.6666667
-    }
+    type: kSTanh
     srclayers:"fc1"
   }
   layer{
@@ -117,12 +113,7 @@ neuralnet {
 
   layer{
     name: "tanh2"
-    type: kTanh
-    tanh_conf {
-      outer_scale: 1.7159047
-      inner_scale: 0.6666667
-    }
-
+    type: kSTanh
     srclayers:"fc2"
   }
   layer{
@@ -153,12 +144,7 @@ neuralnet {
 
   layer{
     name: "tanh3"
-    type: kTanh
-    tanh_conf {
-      outer_scale: 1.7159047
-      inner_scale: 0.6666667
-    }
-
+    type: kSTanh
     srclayers:"fc3"
   }
   layer{
@@ -189,12 +175,7 @@ neuralnet {
 
   layer{
     name: "tanh4"
-    type: kTanh
-    tanh_conf {
-      outer_scale: 1.7159047
-      inner_scale: 0.6666667
-    }
-
+    type: kSTanh
     srclayers:"fc4"
   }
   layer{
@@ -220,17 +201,11 @@ neuralnet {
         high:0.05
       }
     }
-
   }
 
   layer{
     name: "tanh5"
-    type: kTanh
-    tanh_conf {
-      outer_scale: 1.7159047
-      inner_scale: 0.6666667
-    }
-
+    type: kSTanh
     srclayers:"fc5"
   }
   layer{

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/53de92b7/include/neuralnet/base_layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/base_layer.h b/include/neuralnet/base_layer.h
deleted file mode 100644
index 5498cd0..0000000
--- a/include/neuralnet/base_layer.h
+++ /dev/null
@@ -1,445 +0,0 @@
-#ifndef SINGA_NEURALNET_BASE_LAYER_H_
-#define SINGA_NEURALNET_BASE_LAYER_H_
-
-#include <map>
-#include <string>
-#include <thread>
-#include <vector>
-
-#include "proto/common.pb.h"
-#include "proto/job.pb.h"
-#include "utils/common.h"
-#include "utils/blob.h"
-#include "utils/param.h"
-
-namespace singa {
-
-/**
- * Base layer class.
- *
- * Children should implement at least
- * Layer::ComputeFeature() and Layer::ComputGradient()
- * functions for contrastive-divergence/back-propagation algorithm.
- */
-class Layer {
- public:
-  static Layer* Create(const LayerProto& proto);
-
-  Layer() {}
-  virtual ~Layer() {}
-  /**
-   * Setup layer properties.
-   *
-   * Setup the shapes for data and parameters, also setup some properties
-   * based on the layer configuration and connected layers.
-   *
-   * @param proto layer configuration.
-   * @param npartitions num of total partitions of the original layer. This
-   * layer should be setup as one partition.
-   */
-  virtual void Setup(const LayerProto& proto, int npartitions = 1) {
-    CHECK_GE(npartitions, 1);
-    layer_proto_ = proto;
-  }
-  /**
-   * Compute features of this layer based on connected layers.
-   *
-   * @param flag kTrain, kTest, kPositive, etc.
-   */
-  virtual void ComputeFeature(int flag, Metric* perf) = 0;
-  /**
-   * Compute gradients for parameters and connected layers.
-   *
-   * @param flag kTrain, kTest, kPositive, etc.
-   */
-  virtual void ComputeGradient(int flag, Metric* perf) = 0;
-  virtual void ComputeGradient(int flag, Metric* perf) = 0;
-  /**
-   * For print debug info about each layer, e.g., norm of feature vector,
-   * norm of parameters.
-   *
-   * @param step training/test/validation step
-   * @param flag forward/backward/positive/negative...
-   * @return debug info about this layer.
-   */
-  const string DebugString(int step, int flag);
-  /**
-   * Layers that have paramters must override this function.
-   *
-   * @return parameters associated with this layer
-   */
-  virtual const std::vector<Param*> GetParams() const {
-    return std::vector<Param*> {};
-  }
-  /**
-   * Return the connection type between one neuron of this layer and
-   * its source layer.
-   * Currently support two connection types: kOneToOne, and kOneToAll.
-   * kOneToOne indicates the neuron depends on only one neuron from src layer.
-   * kOneToAll indicates the neuron depends on all neurons from src layer.
-   * TODO(wangwei) support kOneToMany.
-   *
-   * @param k index of source layer (current only support k = 0.
-   * @param connection type.
-   */
-  virtual ConnectionType src_neuron_connection(int k) const {
-    // CHECK_LT(k, srclayers_.size());
-    return kOneToOne;
-  }
-  /**
-   * Return the connection type of this layer and all dst layers.
-   *
-   * Currently support two connection types: kOneToOne, and kOneToMany.
-   * kOneToOne indicates the users implement the ComputeFeature and
-   * ComputeGradient function considering only one dest layer. In this case,
-   * a SplitLayer will be added automatically to connect this layer with all
-   * dest layer.
-   * kOneToMany indicates the users has already considered multiple dest layers
-   * in the implementation.
-   * @return connection type default is kOneToOne.
-   */
-  virtual ConnectionType dst_layer_connection() const {
-    return kOneToOne;
-  }
-  /**
-   * For print debug info about each layer, e.g., norm of feature vector,
-   * norm of parameters.
-   *
-   * @param step training/test/validation step
-   * @param phase forward/backward/positive/negative...
-   * @return debug info about this layer.
-   */
-  virtual const std::string DebugString(int step, Phase phase);
-  /**
-   * @return partition dimension of this layer.
-   * -1 for no partition;
-   *  0 for partition the mini-batch into sub-mini-batch.
-   *  1 for partition the layer feature vector into sub-vector.
-   */
-  inline int partition_dim() const {
-    CHECK_LE(layer_proto_.partition_dim(), 1);
-    return layer_proto_.partition_dim();
-  }
-  inline int partition_id() const { return layer_proto_.partition_id(); }
-  inline int type() const { return layer_proto_.type(); }
-  /**
-   * Return name of this layer
-   */
-  inline const std::string &name() const { return layer_proto_.name(); }
-  /**
-   * @return name of src data blob, used by prefetch layer to locate the data
-   * blob in parser layers; The default value is "unknown"; If the
-   * src layer is the prefetch layer and there are more than one parser layers,
-   * this value be set.
-  const std::string &datablob() const {
-    return layer_proto_.datablob();
-  }
-   */
-  /**
-   * @return a const ref for Blob storing neuron values of this layer for BP
-   */
-  virtual const Blob<float>& data(const Layer* from) const {
-    return data_;
-  }
-  virtual Blob<float>* mutable_data(const Layer* from) {
-    return &data_;
-  }
-  virtual const Blob<float>& grad(const Layer* from) const {
-    return grad_;
-  }
-  /**
-   * @return a pointer to storing neuron grads of this layer for BP
-   */
-  virtual Blob<float>* mutable_grad(const Layer* from) {
-    return &grad_;
-  }
-  /**
-   * return LayerS that connected to this layer
-   */
-  inline const std::vector<Layer*> srclayers() const { return srclayers_; }
-  /**
-   * return LayerS that this layer connected to
-   */
-  inline const std::vector<Layer*> dstlayers() const { return dstlayers_; }
-  inline int srclayers_size() const { return srclayers_.size(); }
-  inline int dstlayers_size() const { return dstlayers_.size(); }
-  inline void clear_dstlayers() { dstlayers_.clear(); }
-  inline void clear_srclayers() { srclayers_.clear(); }
-  inline void add_srclayer(Layer* src) { srclayers_.push_back(src); }
-  inline void add_dstlayer(Layer* dst) { dstlayers_.push_back(dst); }
-  virtual bool is_datalayer() const {
-    return false;
-  }
-  virtual bool is_parserlayer() const {
-    return false;
-  }
-  virtual bool is_losslayer() const {
-    return false;
-  }
-  virtual bool is_bridgesrclayer() const {
-    return false;
-  }
-  virtual bool is_bridgedstlayer() const {
-    return false;
-  }
-  virtual bool is_bridgelayer() const {
-    return false;
-  }
-  virtual bool is_vislayer() const {
-    return false;
-  }
-  virtual bool is_hidlayer() const {
-    return false;
-  }
-
- protected:
-  LayerProto layer_proto_;
-<<<<<<< HEAD
-  Blob<float> data_, grad_;
-  vector<Layer*> srclayers_, dstlayers_;
-};
-
-class BridgeLayer : public Layer {
- public:
-  void set_ready(bool a) {
-    ready_ = a;
-  }
-  bool ready() const {
-    return ready_;
-  }
-  bool is_bridgelayer() const override {
-    return true;
-  }
-
- protected:
-  //!< true if received grad from BridgeDstLayer
-  bool ready_;
-};
-/**
- * For sending data to layer on other threads which may resident on other nodes
- * due to layer/data partition.
- */
-class BridgeSrcLayer: public BridgeLayer {
- public:
-  using Layer::ComputeFeature;
-  using Layer::ComputeGradient;
-
-  void ComputeFeature(int flag, Metric* perf) override {}
-  void ComputeGradient(int flag) override {
-    ready_ = false;
-  }
-
-  const Blob<float>& data(const Layer* from) const override {
-    return srclayers_[0]->data(this);
-  }
-  Blob<float>* mutable_data(const Layer* from) override {
-    return srclayers_[0]->mutable_data(this);
-  }
-  const Blob<float>& grad(const Layer* from) const override {
-    return srclayers_[0]->grad(this);
-  }
-  Blob<float>* mutable_grad(const Layer* from) override {
-    return srclayers_[0]->mutable_grad(this);
-  }
-
-  bool is_bridgesrclayer() const override {
-    return true;
-  }
-};
-/**
- * For recv data from layer on other threads which may resident on other nodes
- * due to layer/data partiton
- */
-class BridgeDstLayer: public BridgeLayer {
- public:
-  using Layer::ComputeFeature;
-  using Layer::ComputeGradient;
-
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ComputeFeature(int flag, Metric* perf) override {
-    // reset ready_ for next iteration.
-    ready_ = false;
-  }
-  void ComputeGradient(int flag) override {}
-  bool is_bridgedstlayer() const {
-    return true;
-  }
-};
-
-/**
- * Concate src layers on one dimension
- */
-class ConcateLayer: public Layer {
- public:
-  using Layer::ComputeFeature;
-  using Layer::ComputeGradient;
-
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ComputeFeature(int flag, Metric* perf) override;
-  void ComputeGradient(int flag) override;
-};
-
-/**
- * Base layer for reading records from local Shard, HDFS, lmdb, etc.
- */
-class DataLayer: public Layer {
- public:
-  void ComputeGradient(int flag, Metric* perf) override {}
-  bool is_datalayer() const override {
-    return true;
-  }
-  Blob<float>* mutable_data(const Layer* layer) override {
-    return nullptr;
-  }
-  Blob<float>* mutable_grad(const Layer* layer) override {
-    return nullptr;
-  }
-  ConnectionType dst_layer_connection() const override {
-    return kOneToMany;
-  }
-  inline int batchsize() const { return batchsize_; }
-  virtual const Record& sample() const {
-    return sample_;
-  }
-  /**
-   * @return the loaded records
-   */
-  virtual const std::vector<Record>& records() const {
-    return records_;
-  }
-
- protected:
-  int random_skip_;
-  int batchsize_;
-  Record sample_;
-  std::vector<Record> records_;
-};
-
-/**
- * Base layer for parsing the input records into Blobs.
- */
-class ParserLayer : public Layer {
- public:
-  void ComputeFeature(Phase phase, Metric* perf) override;
-  void ComputeGradient(Phase phase, Metric* perf) override {}
-  /**
-   * Parse records from DataLayer into blob.
-   */
-  virtual void ParseRecords(Phase phase, const std::vector<Record>& records,
-      Blob<float>* blob) = 0;
-  bool is_parserlayer() const override {
-    return true;
-  }
-  Blob<float>* mutable_grad(const Layer* layer) override {
-    return nullptr;
-  }
-  const Blob<float>& grad(const Layer* from) const  override {
-    CHECK(false) << "Parser layer has not gradient blob";
-    return grad_;
-  }
-};
-
-class NeuronLayer : public Layer {
-  // defined as a layer category
-};
-
-/**
- * Base layer for calculating loss and other metrics, e.g., precison.
- */
-class LossLayer: public Layer {
- public:
-  Blob<float>* mutable_grad(const Layer* layer) override {
-    return nullptr;
-  }
-  const Blob<float>& grad(const Layer* from) const override {
-    LOG(FATAL) << "Loss layer has no gradient blob";
-    return grad_;
-  }
-  bool is_losslayer() const override {
-    return true;
-  }
-
- protected:
-  Blob<float> metric_;
-};
-
-/**
- * Base layer for sending/waiting remote messages.
- */
-class BridgeLayer : public Layer {
- public:
-  inline void set_ready(bool a) { ready_ = a; }
-  inline bool ready() const { return ready_; }
-  bool is_bridgelayer() const override { return true; }
-
- protected:
-  //!< true if received grad from BridgeDstLayer
-  bool ready_;
-};
-
-/**
- * Base layer for connecting layers when neural net is partitioned.
- */
-class ConnectionLayer : public Layer {
-  // defined as a layer category
-};
-
-/**
- * Layer for prefetching data records and parsing them.
- *
- * The data loading and parsing work is done by internal DataLayer and
- * ParserLayer respectively. This layer controls the prefetching thread, i.e.,
- * creating and joining the prefetching thread.
- */
-class PrefetchLayer : public Layer {
- public:
-  ~PrefetchLayer();
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ComputeFeature(Phase phase, Metric* perf) override;
-  void ComputeGradient(Phase phase, Metric* perf) override {}
-  const Blob<float>& data(const Layer* from, Phase phase) const override;
-  void Prefetch(Phase phase);
-  Blob<float>* mutable_data(const Layer* layer, Phase phase) override;
-  Blob<float>* mutable_grad(const Layer* layer) override {
-    return nullptr;
-  }
-  const Blob<float>& grad(const Layer* from) const override {
-    CHECK(false) << "Loss layer has not gradient blob";
-    return grad_;
-  }
-
- protected:
-  std::vector<Layer*> sublayers_;
-  std::map<std::string, Blob<float>> datablobs_;
-  std::thread thread_;
-};
-
-class RBMLayer: public Layer {
- public:
-  const Blob<float>& neg_data(const Layer* layer) {
-    return neg_data_;
-  }
-  Blob<float>* mutable_neg_data(const Layer* layer) {
-    return &neg_data_;
-  }
-  const vector<Param*> GetParams() const override {
-    vector<Param*> params{weight_, bias_};
-    return params;
-  }
-  virtual Blob<float>* Sample(int flat) = 0;
-
- protected:
-  //! dimension of the hidden layer
-  int hdim_;
-  //! dimension of the visible layer
-  int vdim_;
-  int batchsize_;
-  Param* weight_, *bias_;
-
-  Blob<float> neg_data_;
-  Blob<float> neg_sample_;
-  Blob<float> sample_;
-};
-}  // namespace singa
-
-#endif  // SINGA_NEURALNET_BASE_LAYER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/53de92b7/include/neuralnet/connection_layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/connection_layer.h 
b/include/neuralnet/connection_layer.h
new file mode 100644
index 0000000..e44f4f3
--- /dev/null
+++ b/include/neuralnet/connection_layer.h
@@ -0,0 +1,125 @@
+#ifndef SINGA_NEURALNET_CONNECTION_LAYER_H_
+#define SINGA_NEURALNET_CONNECTION_LAYER_H_
+#include "neuralnet/layer.h"
+
+/**
+ * \file this file includes the declarations of layers that inherit the
+ * base ConnectionLayer.
+ */
+namespace singa {
+class BridgeLayer : public ConnectionLayer {
+ public:
+  void set_ready(bool a) {
+    ready_ = a;
+  }
+  bool ready() const {
+    return ready_;
+  }
+  virtual bool is_bridgesrclayer() const {
+    return false;
+  }
+  virtual bool is_bridgedstlayer() const {
+    return false;
+  }
+
+ protected:
+  //!< true if received grad from BridgeDstLayer
+  bool ready_;
+};
+
+/**
+ * For recv data from layer on other threads which may resident on other nodes
+ * due to layer/data partiton
+ */
+class BridgeDstLayer : public BridgeLayer {
+ public:
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(int flag, Metric* perf) override {
+    // reset ready_ for next iteration.
+    ready_ = false;
+  }
+  void ComputeGradient(int flag, Metric* perf) override {}
+  bool is_bridgedstlayer() const {
+    return true;
+  }
+};
+
+/**
+ * For sending data to layer on other threads which may resident on other nodes
+ * due to layer/data partition.
+ */
+class BridgeSrcLayer : public BridgeLayer {
+ public:
+  void ComputeFeature(int flag, Metric* perf) override {}
+  void ComputeGradient(int flag, Metric* perf) override {
+    ready_ = false;
+  }
+  const Blob<float>& data(const Layer* from) const override {
+    return srclayers_[0]->data(this);
+  }
+  Blob<float>* mutable_data(const Layer* from) override {
+    return srclayers_[0]->mutable_data(this);
+  }
+  const Blob<float>& grad(const Layer* from) const override {
+    return srclayers_[0]->grad(this);
+  }
+  Blob<float>* mutable_grad(const Layer* from) override {
+    return srclayers_[0]->mutable_grad(this);
+  }
+  bool is_bridgesrclayer() const override {
+    return true;
+  }
+};
+
+
+/**
+ * Connect multiple (src) layers with a single (dst) layer.
+ *
+ * It concates feature Blobs (i.e., matrix) of src layers on one dimension.
+ * The concated feature Blob will be fed into the dst layer.
+ */
+class ConcateLayer : public ConnectionLayer {
+ public:
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(int flag, Metric* perf) override;
+  void ComputeGradient(int flag, Metric* perf) override;
+};
+
+/**
+ * Connect a single (src) layer with multiple (dst) layers.
+ *
+ * It slices the feature Blob (i.e., matrix) of the src layer on one dimension.
+ * The sliced feature Blobs will be fed into dst layers.
+ */
+class SliceLayer : public ConnectionLayer {
+ public:
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(int flag, Metric *perf) override;
+  void ComputeGradient(int flag, Metric* perf) override;
+
+ private:
+  std::vector<Blob<float>> datavec_;
+  std::vector<Blob<float>> gradvec_;
+  int slice_dim_;
+  int slice_num_;
+};
+
+/**
+ * Connect a single (src) layer with multiple dst layers.
+ *
+ * It replicates the feature Blob of the src layer.
+ * Each replicated feature Blob will be fed into one dst layer.
+ * It aggregates gradients set by all dst layers and set it to the src layer.
+ */
+class SplitLayer : public ConnectionLayer {
+ public:
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(int flag, Metric* perf) override;
+  void ComputeGradient(int flag, Metric* perf) override;
+
+ protected:
+  Blob<float> grads_;
+};
+}
+//  namespace singa
+#endif  // SINGA_NEURALNET_CONNECTION_LAYER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/53de92b7/include/neuralnet/input_layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/input_layer.h b/include/neuralnet/input_layer.h
new file mode 100644
index 0000000..62595c6
--- /dev/null
+++ b/include/neuralnet/input_layer.h
@@ -0,0 +1,170 @@
+#ifndef SINGA_NEURALNET_INPUT_LAYER_H_
+#define SINGA_NEURALNET_INPUT_LAYER_H_
+
+#include <vector>
+#include "neuralnet/layer.h"
+#include "utils/data_shard.h"
+/**
+ * \file this file includes the declarations of input layers that inherit the
+ * base InputLayer to load input features.
+ *
+ * The feature loading phase can be implemented using a single layer or
+ * separated into DataLayer (for loading features as records) and ParserLayer
+ * (for parsing features from records). SINGA has provided some built-in layers
+ * for DataLayer and ParserLayer.
+ *
+ * Data prefetching can be implemented as a sub-class of InputLayer.
+ * SINGA provides a built-in PrefetchLayer which embeds DataLayer and
+ * ParserLayer.
+ */
+namespace singa {
+/**
+ * Base layer for reading records from local Shard, HDFS, lmdb, etc.
+ */
+class DataLayer: public InputLayer {
+ public:
+  void ComputeGradient(int flag, Metric* perf) override {}
+  Blob<float>* mutable_data(const Layer* layer) override {
+    return nullptr;
+  }
+  Blob<float>* mutable_grad(const Layer* layer) override {
+    return nullptr;
+  }
+  ConnectionType dst_layer_connection() const override {
+    return kOneToMany;
+  }
+  inline int batchsize() const { return batchsize_; }
+  virtual const Record& sample() const {
+    return sample_;
+  }
+  /**
+   * @return the loaded records
+   */
+  virtual const std::vector<Record>& records() const {
+    return records_;
+  }
+
+ protected:
+  int random_skip_;
+  int batchsize_;
+  Record sample_;
+  std::vector<Record> records_;
+};
+/**
+ * Layer for loading Record from DataShard.
+ *
+ * It is derived from DataLayer.
+ */
+class ShardDataLayer : public DataLayer {
+ public:
+  ~ShardDataLayer();
+
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(int flag, Metric *perf) override;
+
+ private:
+  DataShard* shard_;
+};
+
+#ifdef USE_LMDB
+#include <lmdb.h>
+class LMDBDataLayer : public DataLayer {
+ public:
+  ~LMDBDataLayer();
+
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void OpenLMDB(const std::string& path);
+  void ComputeFeature(int flag, Metric *perf) override;
+  void ConvertCaffeDatumToRecord(const CaffeDatum& datum,
+                                 SingleLabelImageRecord* record);
+
+ private:
+  MDB_env* mdb_env_;
+  MDB_dbi mdb_dbi_;
+  MDB_txn* mdb_txn_;
+  MDB_cursor* mdb_cursor_;
+  MDB_val mdb_key_, mdb_value_;
+};
+#endif
+
+/**
+ * Base layer for parsing the input records into Blobs.
+ */
+class ParserLayer : public InputLayer {
+ public:
+  void ComputeFeature(int flag, Metric* perf) override;
+  void ComputeGradient(int flag, Metric* perf) override {}
+  ConnectionType dst_layer_connection() const override {
+    return kOneToMany;
+  }
+  /**
+   * Parse records from DataLayer into blob.
+   */
+  virtual void ParseRecords(int flag, const std::vector<Record>& records,
+      Blob<float>* blob) = 0;
+  Blob<float>* mutable_grad(const Layer* layer) override {
+    return nullptr;
+  }
+  const Blob<float>& grad(const Layer* from) const  override {
+    CHECK(false) << "Parser layer has not gradient blob";
+    return grad_;
+  }
+};
+
+/**
+ * Derived from ParserLayer to parse label from SingaleLabelImageRecord.
+ */
+class LabelLayer : public ParserLayer {
+ public:
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ParseRecords(int flag, const std::vector<Record>& records,
+                    Blob<float>* blob) override;
+};
+
+/**
+ * Derived from ParserLayer to parse MNIST feature from 
SingaleLabelImageRecord.
+ */
+class MnistLayer : public ParserLayer {
+ public:
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ParseRecords(int flag, const std::vector<Record>& records,
+                    Blob<float>* blob) override;
+
+ protected:
+  float norm_a_, norm_b_;
+};
+/**
+ * Derived from ParserLayer to parse RGB image feature from
+ * SingaleLabelImageRecord.
+ */
+class RGBImageLayer : public ParserLayer {
+ public:
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ParseRecords(int flag, const std::vector<Record>& records,
+                    Blob<float>* blob) override;
+
+ private:
+  float scale_;
+  int cropsize_;
+  bool mirror_;
+  Blob<float> mean_;
+};
+/**
+ * Layer for prefetching data records and parsing them.
+ *
+ * The data loading and parsing work is done by internal DataLayer and
+ * ParserLayer respectively. This layer controls the prefetching thread, i.e.,
+ * creating and joining the prefetching thread.
+ */
+class PrefetchLayer : public Layer {
+ public:
+  ~PrefetchLayer();
+  void ComputeFeature(int flag, Metric* perf) override;
+  void ComputeGradient(int flag, Metric* perf) override {}
+
+ protected:
+  std::thread thread_;
+};
+}  // namespace singa
+
+#endif  // SINGA_NEURALNET_INPUT_LAYER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/53de92b7/include/neuralnet/layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/layer.h b/include/neuralnet/layer.h
index c64cee9..56201f5 100644
--- a/include/neuralnet/layer.h
+++ b/include/neuralnet/layer.h
@@ -1,372 +1,216 @@
 #ifndef SINGA_NEURALNET_LAYER_H_
 #define SINGA_NEURALNET_LAYER_H_
 
+#include <map>
+#include <string>
+#include <thread>
 #include <vector>
-#include "neuralnet/base_layer.h"
+
+#include "proto/common.pb.h"
 #include "proto/job.pb.h"
-#include "utils/data_shard.h"
+#include "utils/common.h"
+#include "utils/blob.h"
+#include "utils/param.h"
 
-/**
- * \file this file includes the declarations neuron layer classes that conduct
- * the transformation of features.
- */
 namespace singa {
-
-/********** Derived from DataLayer **********/
-
-class ShardDataLayer : public DataLayer {
- public:
-  ~ShardDataLayer();
-
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ComputeFeature(Phase phase, Metric *perf) override;
-
- private:
-  DataShard* shard_;
-};
-
-/********** Derived from ParserLayer **********/
-
-class LabelLayer : public ParserLayer {
- public:
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ParseRecords(Phase phase, const std::vector<Record>& records,
-                    Blob<float>* blob) override;
-};
-
-class MnistLayer : public ParserLayer {
- public:
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ParseRecords(Phase phase, const std::vector<Record>& records,
-                    Blob<float>* blob) override;
-
- protected:
-  // height and width of the image after deformation
-  // kernel size for elastic distortion
-  // n^2 images are processed as a batch for elastic distortion
-  // conv height and conv width
-  // gauss kernel values, displacements, column image and tmp buffer
-  // float* gauss_, *displacementx_, *displacementy_, *colimg_, *tmpimg_;
-  float  gamma_, beta_, sigma_, kernel_, alpha_, norm_a_, norm_b_;
-  int resize_, elastic_freq_;
-};
-
-class RGBImageLayer : public ParserLayer {
- public:
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ParseRecords(Phase phase, const std::vector<Record>& records,
-                    Blob<float>* blob) override;
-
- private:
-  float scale_;
-  int cropsize_;
-  bool mirror_;
-  Blob<float> mean_;
-};
-
-/********** Derived from NeuronLayer **********/
+using std::vector;
+using std::string;
 
 /**
- * Convolution layer.
+ * Base layer class.
+ *
+ * Children should implement at least
+ * Layer::ComputeFeature() and Layer::ComputGradient()
+ * functions for contrastive-divergence/back-propagation algorithm.
  */
-class ConvolutionLayer : public NeuronLayer {
+class Layer {
  public:
-  ~ConvolutionLayer();
+  static Layer* Create(const LayerProto& proto);
 
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ComputeFeature(int flag, Metric *perf) override;
-  void ComputeGradient(int flag) override;
-  const std::vector<Param*> GetParams() const override {
-    std::vector<Param*> params{weight_, bias_};
-    return params;
+  Layer() {}
+  virtual ~Layer() {}
+  /**
+   * Setup layer properties.
+   *
+   * Setup the shapes for data and parameters, also setup some properties
+   * based on the layer configuration and connected layers.
+   *
+   * @param proto layer configuration.
+   * @param npartitions num of total partitions of the original layer. This
+   * layer should be setup as one partition.
+   */
+  virtual void Setup(const LayerProto& proto, int npartitions = 1) {
+    CHECK_GE(npartitions, 1);
+    layer_proto_ = proto;
   }
-  ConnectionType src_neuron_connection(int k) const  override {
-    // CHECK_LT(k, srclayers_.size());
-    return kOneToAll;
+  /**
+   * Compute features of this layer based on connected layers.
+   *
+   * @param perf pointer to Metric obj for collect and aggregate performance
+   */
+  virtual void ComputeFeature(int flag, Metric* perf) = 0;
+  /**
+   * Compute gradients for parameters and connected layers.
+   * @param flag used to get the calling phase, e.g., forward of training
+   * (kForward | kTrain)
+   * @param flag used to get the calling phase, e.g., forward of training
+   */
+  virtual void ComputeGradient(int flag, Metric* perf) = 0;
+  /**
+   * Layers that have paramters must override this function.
+   * @param flag used to get the calling phase, e.g., forward of training
+   * (kForward | kTrain)
+   * @return parameters associated with this layer
+   */
+  virtual const std::vector<Param*> GetParams() const {
+    return std::vector<Param*> {};
   }
-
- protected:
-  int kernel_, pad_,  stride_;
-  int batchsize_,  channels_, height_, width_;
-  int col_height_, col_width_, conv_height_, conv_width_, num_filters_;
-  Param* weight_, *bias_;
-  Blob<float> col_data_, col_grad_;
-};
-
-class DropoutLayer : public NeuronLayer {
- public:
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ComputeFeature(int flag, Metric* perf) override;
-  void ComputeGradient(int flag, Metric* perf) override;
- protected:
-  // drop probability
-  float pdrop_;
-  /* record which neuron is dropped, required for back propagating gradients,
-   * if mask[i]=0, then the i-th neuron is dropped.
+  /**
+   * Return the connection type between one neuron of this layer and
+   * its source layer.
+   * Currently support two connection types: kOneToOne, and kOneToAll.
+   * kOneToOne indicates the neuron depends on only one neuron from src layer.
+   * kOneToAll indicates the neuron depends on all neurons from src layer.
+   * TODO(wangwei) support kOneToMany.
+   *
+   * @param k index of source layer (current only support k = 0.
+   * @param connection type.
    */
-  Blob<float> mask_;
-};
-
-/**
- * RBM visible layer
- */
-class RBMVisLayer: public RBMLayer {
- public:
-  using Layer::ComputeFeature;
-  using Layer::ComputeGradient;
-
-  ~RBMVisLayer();
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ComputeFeature(int flag, Metric *perf) override;
-  void ComputeGradient(int flag) override;
-  Blob<float>* Sample(int flat) override;
-
- private:
-  RBMLayer* hid_layer_;
-  Layer* input_layer_;
-};
-
-class LRNLayer : public NeuronLayer {
-/**
- * Local Response Normalization edge
- * b_i=a_i/x_i^beta
- * x_i=knorm+alpha*\sum_{j=max(0,i-n/2}^{min(N,i+n/2}(a_j)^2
- * n is size of local response area.
- * a_i, the activation (after ReLU) of a neuron convolved with the i-th kernel.
- * b_i, the neuron after normalization, N is the total num of kernels
- */
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ComputeFeature(Phase phase, Metric *perf) override;
-  void ComputeGradient(Phase phase, Metric* perf) override;
-
- protected:
-  //! shape of the bottom layer feature
-  int batchsize_, channels_, height_, width_;
-  //! size local response (neighbor) area
-  int lsize_;
-  //! hyper-parameter
-  float alpha_, beta_, knorm_;
-  Blob<float> norm_;
-};
-
-class PoolingLayer : public NeuronLayer {
- public:
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ComputeFeature(Phase phase, Metric *perf) override;
-  void ComputeGradient(Phase phase, Metric* perf) override;
-
- protected:
-  int kernel_, pad_, stride_;
-  int batchsize_, channels_, height_, width_, pooled_height_, pooled_width_;
-  PoolingProto_PoolMethod pool_;
-};
-
-class ReLULayer : public NeuronLayer {
- public:
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ComputeFeature(Phase phase, Metric *perf) override;
-  void ComputeGradient(Phase phase, Metric* perf) override;
-};
-
-/**
- * RBM hidden layer
- */
-class RBMHidLayer: public RBMLayer {
- public:
-  ~RBMHidLayer();
-
-  ~RBMHidLayer();
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ComputeFeature(int flag, Metric* perf) override;
-  void ComputeGradient(int flag, Metric* perf) override;
-  Blob<float>* Sample(int flat) override;
- private:
-  // whether use gaussian sampling
-  bool gaussian_;
-  RBMLayer *vis_layer_;
-};
-
-/**
-  * RBM visible layer
-  */
-class RBMVisLayer : public NeuronLayer {
- public:
-  ~RBMVisLayer();
-
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ComputeFeature(int flag, Metric* perf) override;
-  void ComputeGradient(int flag, Metric* perf) override;
-
-  ConnectionType src_neuron_connection(int k) const override {
+  virtual ConnectionType src_neuron_connection(int k) const {
     // CHECK_LT(k, srclayers_.size());
-    return kOneToAll;
-  }
-  const Blob<float>& data(const Layer* from, Phase phase) const override {
-    return (phase == kPositive) ? data_ : vis_sample_;
+    return kOneToOne;
   }
-  const std::vector<Param*> GetParams() const override {
-    std::vector<Param*> params{weight_, bias_};
-    return params;
+  /**
+   * Return the connection type of this layer and all dst layers.
+   *
+   * Currently support two connection types: kOneToOne, and kOneToMany.
+   * kOneToOne indicates the users implement the ComputeFeature and
+   * ComputeGradient function considering only one dest layer. In this case,
+   * a SplitLayer will be added automatically to connect this layer with all
+   * dest layer.
+   * kOneToMany indicates the users has already considered multiple dest layers
+   * in the implementation.
+   * @return connection type default is kOneToOne.
+   */
+  virtual ConnectionType dst_layer_connection() const {
+    return kOneToOne;
   }
-
- private:
-  //! dimension of the hidden layer
-  int hdim_;
-  //! dimension of the visible layer
-  int vdim_;
-  int batchsize_;
-  bool transpose_;
-  Param* weight_, *bias_;
-  // data to store sampling result
-  Blob<float> vis_sample_;
-  // in order to implement Persistent Contrastive Divergence,
-};
-
-/**
- * This layer apply Tan function to neuron activations.
- * f(x)=A tanh(Bx)
- * f'(x)=B/A (A*A-f(x)*f(x))
- */
-class TanhLayer : public NeuronLayer {
- public:
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ComputeFeature(Phase phase, Metric *perf) override;
-  void ComputeGradient(Phase phase, Metric* perf) override;
-
- private:
-  float outer_scale_, inner_scale_;
-};
-
-/********** Derived from LossLayer **********/
-
-class SoftmaxLossLayer : public LossLayer {
-  /*
-   * connected from the label layer and the last fc layer
+  /**
+   * For print debug info about each layer, e.g., norm of feature vector,
+   * norm of parameters.
+   *
+   * @param step training/test/validation step
+   * @param flag used to get the calling phase, e.g., forward of training
+   * (kForward | kTrain)
+   * @return debug info about this layer.
    */
- public:
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ComputeFeature(int flag, Metric* perf) override;
-  void ComputeGradient(int flag, Metric* perf) override;
-
+  virtual const std::string DebugString(int step, int flag);
   /**
-   * softmax is not recommendeded for partition because it requires the whole
-   * src layer for normalization.
+   * @return partition dimension of this layer.
+   * -1 for no partition;
+   *  0 for partition the mini-batch into sub-mini-batch.
+   *  1 for partition the layer feature vector into sub-vector.
    */
-  ConnectionType src_neuron_connection(int k) const override {
-    // CHECK_LT(k, srclayers_.size());
-    return kOneToAll;
-  }
-
- private:
-  int batchsize_;
-  int dim_;
-  float scale_;
-  int topk_;
-};
-
-/********** Derived from BridgeLayer **********/
-
-/**
- * For recv data from layer on other threads which may resident on other nodes
- * due to layer/data partiton
- */
-class BridgeDstLayer : public BridgeLayer {
- public:
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ComputeFeature(int flag, Metric* perf) override {
-    // reset ready_ for next iteration.
-    ready_ = false;
+  inline int partition_dim() const {
+    CHECK_LE(layer_proto_.partition_dim(), 1);
+    return layer_proto_.partition_dim();
   }
-  void ComputeGradient(int flag, Metric* perf) override {}
-  bool is_bridgedstlayer() const {
-    return true;
-  }
-};
-
-/**
- * For sending data to layer on other threads which may resident on other nodes
- * due to layer/data partition.
- */
-class BridgeSrcLayer : public BridgeLayer {
- public:
-  void ComputeFeature(Phase phase, Metric* perf) override {}
-  void ComputeGradient(Phase phase, Metric* perf) override {
-    ready_ = false;
-  }
-  const Blob<float>& data(const Layer* from, Phase phase) const override {
-    return srclayers_[0]->data(this);
+  inline int partition_id() const { return layer_proto_.partition_id(); }
+  inline int type() const { return layer_proto_.type(); }
+  /**
+   * Return name of this layer
+   */
+  inline const std::string &name() const { return layer_proto_.name(); }
+  /**
+   * @return name of src data blob, used by prefetch layer to locate the data
+   * blob in parser layers; The default value is "unknown"; If the
+   * src layer is the prefetch layer and there are more than one parser layers,
+   * this value be set.
+  const std::string &datablob() const {
+    return layer_proto_.datablob();
   }
-  Blob<float>* mutable_data(const Layer* from, Phase phase) override {
-    return srclayers_[0]->mutable_data(this);
+   */
+  /**
+   * @return a const ref for Blob storing neuron values of this layer for BP
+   */
+  virtual const Blob<float>& data(const Layer* from) const {
+    return data_;
   }
-  const Blob<float>& grad(const Layer* from) const override {
-    return srclayers_[0]->grad(this);
+  virtual Blob<float>* mutable_data(const Layer* from) {
+    return &data_;
   }
-  Blob<float>* mutable_grad(const Layer* from) override {
-    return srclayers_[0]->mutable_grad(this);
+  virtual const Blob<float>& grad(const Layer* from) const {
+    return grad_;
   }
-  bool is_bridgesrclayer() const override {
-    return true;
+  /**
+   * @return a pointer to storing neuron grads of this layer for BP
+   */
+  virtual Blob<float>* mutable_grad(const Layer* from) {
+    return &grad_;
   }
-};
+  /**
+   * return LayerS that connected to this layer
+   */
+  inline const std::vector<Layer*> srclayers() const { return srclayers_; }
+  /**
+   * return LayerS that this layer connected to
+   */
+  inline const std::vector<Layer*> dstlayers() const { return dstlayers_; }
+  inline int srclayers_size() const { return srclayers_.size(); }
+  inline int dstlayers_size() const { return dstlayers_.size(); }
+  inline void clear_dstlayers() { dstlayers_.clear(); }
+  inline void clear_srclayers() { srclayers_.clear(); }
+  inline void add_srclayer(Layer* src) { srclayers_.push_back(src); }
+  inline void add_dstlayer(Layer* dst) { dstlayers_.push_back(dst); }
 
-/********** Derived from ConnectionLayer **********/
+ protected:
+  LayerProto layer_proto_;
+  Blob<float> data_, grad_;
+  std::vector<Layer*> srclayers_, dstlayers_;
+};
 
 /**
- * Concate src layers on one dimension
+ * Base layer for connecting layers when neural net is partitioned.
  */
-class ConcateLayer : public ConnectionLayer {
- public:
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ComputeFeature(Phase phase, Metric* perf) override;
-  void ComputeGradient(Phase phase, Metric* perf) override;
+class ConnectionLayer : public Layer {
+  // defined as a layer category
 };
 
 /**
- * Slice the source layer into multiple dst layers on one dimension
+ * Base layer for getting input data. May include layers for loading records,
+ * parsing records.
  */
-class SliceLayer : public ConnectionLayer {
- public:
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ComputeFeature(int flag, Metric *perf) override;
-
- private:
-  std::vector<Blob<float>> datavec_;
-  std::vector<Blob<float>> gradvec_;
-  int slice_dim_;
-  int slice_num_;
+class InputLayer : public Layer {
+  // defined as a layer category
 };
 
-/**
- * This layer apply Sigmoid function to neuron activations.
- * f(x)=1/(1+exp(-x))
- * f'(x)=f(x)*(1-f(x))
- */
-class SigmoidLayer: public Layer {
- public:
-  using Layer::ComputeFeature;
-  using Layer::ComputeGradient;
 
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ComputeFeature(int flag, Metric* perf) override;
-  void ComputeGradient(int flag, Metric* perf) override;
+class NeuronLayer : public Layer {
+  // defined as a layer category
 };
 
 /**
- * Connect the source layer with multiple dst layers.
- * Pass source layer's data blob directly to dst layers.
- * Aggregate dst layer's gradients into source layer's gradient.
+ * Base layer for calculating loss and other metrics, e.g., precison.
  */
-class SplitLayer : public ConnectionLayer {
+class LossLayer : public Layer {
  public:
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void ComputeFeature(int flag, Metric* perf) override;
-  void ComputeGradient(int flag, Metric* perf) override;
+  Blob<float>* mutable_grad(const Layer* layer) override {
+    return nullptr;
+  }
+  const Blob<float>& grad(const Layer* from) const override {
+    LOG(FATAL) << "Loss layer has no gradient blob";
+    return grad_;
+  }
 
  protected:
-  Blob<float> grads_;
+  Blob<float> metric_;
 };
 
 }  // namespace singa
+#include "neuralnet/connection_layer.h"
+#include "neuralnet/input_layer.h"
+#include "neuralnet/loss_layer.h"
+#include "neuralnet/neuron_layer.h"
+#include "neuralnet/output_layer.h"
 
-#endif  // SINGA_NEURALNET_LAYER_H_
+#endif  // SINGA_NEURALNET_BASE_LAYER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/53de92b7/include/neuralnet/loss_layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/loss_layer.h b/include/neuralnet/loss_layer.h
new file mode 100644
index 0000000..8358bd6
--- /dev/null
+++ b/include/neuralnet/loss_layer.h
@@ -0,0 +1,46 @@
+#ifndef SINGA_NEURALNET_LOSS_LAYER_H_
+#define SINGA_NEURALNET_LOSS_LAYER_H_
+
+#include "neuralnet/layer.h"
+
+/**
+ * \file this file includes the declarations of layers that inherit the base
+ * LossLayer for measuring the objective training loss.
+ */
+namespace singa {
+/**
+ * Squared Euclidean loss as 0.5 ||predict - ground_truth||^2.
+ */
+class EuclideanLossLayer : public LossLayer {
+ public:
+  void ComputeFeature(int flag, Metric* perf) override;
+  void ComputeGradient(int flag, Metric* perf) override;
+};
+
+/**
+ * Cross-entropy loss applied to the probabilities after Softmax.
+ */
+class SoftmaxLossLayer : public LossLayer {
+ public:
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(int flag, Metric* perf) override;
+  void ComputeGradient(int flag, Metric* perf) override;
+
+  /**
+   * softmax is not recommendeded for partition because it requires the whole
+   * src layer for normalization.
+   */
+  ConnectionType src_neuron_connection(int k) const override {
+    // CHECK_LT(k, srclayers_.size());
+    return kOneToAll;
+  }
+
+ private:
+  int batchsize_;
+  int dim_;
+  float scale_;
+  int topk_;
+};
+}
+//  namespace singa
+#endif  // SINGA_NEURALNET_LOSS_LAYER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/53de92b7/include/neuralnet/neuralnet.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/neuralnet.h b/include/neuralnet/neuralnet.h
index 99f4a5c..bcd102c 100644
--- a/include/neuralnet/neuralnet.h
+++ b/include/neuralnet/neuralnet.h
@@ -60,12 +60,15 @@ class NeuralNet {
     return layers_;
   }
   const std::vector<ParserLayer*>& parserlayers() const {
+    LOG(FATAL)<< " not implemented";
     return parserlayers_;
   }
   const std::vector<LossLayer*>& losslayers() const {
+    LOG(FATAL)<< " not implemented";
     return losslayers_;
   }
   const std::vector<DataLayer*>& datalayers() const {
+    LOG(FATAL)<< " not implemented";
     return datalayers_;
   }
   const std::vector<Param*>& params() const {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/53de92b7/include/neuralnet/neuron_layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/neuron_layer.h b/include/neuralnet/neuron_layer.h
new file mode 100644
index 0000000..ce88ced
--- /dev/null
+++ b/include/neuralnet/neuron_layer.h
@@ -0,0 +1,206 @@
+#ifndef SINGA_NEURALNET_NEURON_LAYER_H_
+#define SINGA_NEURALNET_NEURON_LAYER_H_
+#include <vector>
+
+#include "neuralnet/layer.h"
+#include "proto/job.pb.h"
+/**
+ * \file this file includes the declarations neuron layer classes that conduct
+ * the transformation of features.
+ */
+namespace singa {
+/**
+ * Convolution layer.
+ */
+class ConvolutionLayer : public NeuronLayer {
+ public:
+  ~ConvolutionLayer();
+
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(int flag, Metric* perf) override;
+  void ComputeGradient(int flag, Metric* perf) override;
+  const std::vector<Param*> GetParams() const override {
+    std::vector<Param*> params{weight_, bias_};
+    return params;
+  }
+  ConnectionType src_neuron_connection(int k) const  override {
+    // CHECK_LT(k, srclayers_.size());
+    return kOneToAll;
+  }
+
+ protected:
+  int kernel_, pad_,  stride_;
+  int batchsize_,  channels_, height_, width_;
+  int col_height_, col_width_, conv_height_, conv_width_, num_filters_;
+  Param* weight_, *bias_;
+  Blob<float> col_data_, col_grad_;
+};
+
+class DropoutLayer : public NeuronLayer {
+ public:
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(int flag, Metric* perf) override;
+  void ComputeGradient(int flag, Metric* perf) override;
+ protected:
+  // drop probability
+  float pdrop_;
+  /* record which neuron is dropped, required for back propagating gradients,
+   * if mask[i]=0, then the i-th neuron is dropped.
+   */
+  Blob<float> mask_;
+};
+/**
+ * Local Response Normalization edge
+ *
+ * b_i=a_i/x_i^beta
+ * x_i=knorm+alpha*\sum_{j=max(0,i-n/2}^{min(N,i+n/2}(a_j)^2
+ * n is size of local response area.
+ * a_i, the activation (after ReLU) of a neuron convolved with the i-th kernel.
+ * b_i, the neuron after normalization, N is the total num of kernels
+ */
+class LRNLayer : public NeuronLayer {
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(int flag, Metric *perf) override;
+  void ComputeGradient(int flag, Metric* perf) override;
+
+ protected:
+  //! shape of the bottom layer feature
+  int batchsize_, channels_, height_, width_;
+  //! size local response (neighbor) area
+  int lsize_;
+  //! hyper-parameter
+  float alpha_, beta_, knorm_;
+  Blob<float> norm_;
+};
+
+class PoolingLayer : public NeuronLayer {
+ public:
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(int flag, Metric *perf) override;
+  void ComputeGradient(int flag, Metric* perf) override;
+
+ protected:
+  int kernel_, pad_, stride_;
+  int batchsize_, channels_, height_, width_, pooled_height_, pooled_width_;
+  PoolingProto_PoolMethod pool_;
+};
+
+class ReLULayer : public NeuronLayer {
+ public:
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(int flag, Metric *perf) override;
+  void ComputeGradient(int flag, Metric* perf) override;
+};
+
+class InnerProductLayer : public NeuronLayer {
+ public:
+  ~InnerProductLayer();
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(int flag, Metric* perf) override;
+  void ComputeGradient(int flag, Metric* perf) override;
+  const std::vector<Param*> GetParams() const override {
+    std::vector<Param*> params{weight_, bias_};
+    return params;
+  }
+
+ private:
+  int batchsize_;
+  int vdim_, hdim_;
+  bool transpose_;
+  Param *weight_, *bias_;
+};
+
+/**
+ * This layer apply scaled Tan function to neuron activations.
+ * f(x)=1.7159047  tanh(0.66666667 x)
+ */
+class STanhLayer : public NeuronLayer {
+ public:
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(int flag, Metric *perf) override;
+  void ComputeGradient(int flag, Metric* perf) override;
+};
+
+/**
+ * This layer apply Sigmoid function to neuron activations.
+ * f(x)=1/(1+exp(-x))
+ * f'(x)=f(x)*(1-f(x))
+ */
+class SigmoidLayer: public Layer {
+ public:
+  using Layer::ComputeFeature;
+  using Layer::ComputeGradient;
+
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(int flag, Metric* perf) override;
+  void ComputeGradient(int flag, Metric* perf) override;
+};
+
+
+/**
+ * Base layer for RBM models.
+ */
+class RBMLayer: public Layer {
+ public:
+  virtual ~RBMLayer() {}
+  const Blob<float>& neg_data(const Layer* layer) {
+    return neg_data_;
+  }
+  Blob<float>* mutable_neg_data(const Layer* layer) {
+    return &neg_data_;
+  }
+  const std::vector<Param*> GetParams() const override {
+    std::vector<Param*> params{weight_, bias_};
+    return params;
+  }
+  virtual Blob<float>* Sample(int flat) = 0;
+
+ protected:
+  //! dimension of the hidden layer
+  int hdim_;
+  //! dimension of the visible layer
+  int vdim_;
+  int batchsize_;
+  Param* weight_, *bias_;
+
+  Blob<float> neg_data_;
+  Blob<float> neg_sample_;
+  Blob<float> sample_;
+};
+
+/**
+ * RBM visible layer
+ */
+class RBMVisLayer: public RBMLayer {
+ public:
+  ~RBMVisLayer();
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(int flag, Metric* perf) override;
+  void ComputeGradient(int flag, Metric* perf) override;
+  Blob<float>* Sample(int flat) override;
+
+ private:
+  RBMLayer* hid_layer_;
+  Layer* input_layer_;
+};
+/**
+ * RBM hidden layer
+ */
+class RBMHidLayer: public RBMLayer {
+ public:
+  ~RBMHidLayer();
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(int flag, Metric* perf) override;
+  void ComputeGradient(int flag, Metric* perf) override;
+  Blob<float>* Sample(int flat) override;
+
+ private:
+  // whether use gaussian sampling
+  bool gaussian_;
+  RBMLayer *vis_layer_;
+};
+
+
+}  // namespace singa
+
+#endif  // SINGA_NEURALNET_NEURON_LAYER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/53de92b7/include/neuralnet/optional_layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/optional_layer.h 
b/include/neuralnet/optional_layer.h
deleted file mode 100644
index 8f64ab4..0000000
--- a/include/neuralnet/optional_layer.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef SINGA_NEURALNET_OPTIONAL_LAYER_H_
-#define SINGA_NEURALNET_OPTIONAL_LAYER_H_
-
-#ifdef USE_LMDB
-#include <lmdb.h>
-#endif
-#include <string>
-#include "neuralnet/base_layer.h"
-
-namespace singa {
-
-#ifdef USE_LMDB
-class LMDBDataLayer : public DataLayer {
- public:
-  ~LMDBDataLayer();
-
-  void Setup(const LayerProto& proto, int npartitions) override;
-  void OpenLMDB(const std::string& path);
-  void ComputeFeature(Phase phase, Metric *perf) override;
-  void ConvertCaffeDatumToRecord(const CaffeDatum& datum,
-                                 SingleLabelImageRecord* record);
-
- private:
-  MDB_env* mdb_env_;
-  MDB_dbi mdb_dbi_;
-  MDB_txn* mdb_txn_;
-  MDB_cursor* mdb_cursor_;
-  MDB_val mdb_key_, mdb_value_;
-};
-#endif
-
-}  // namespace singa
-
-#endif  // SINGA_NEURALNET_OPTIONAL_LAYER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/53de92b7/include/neuralnet/output_layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/output_layer.h b/include/neuralnet/output_layer.h
new file mode 100644
index 0000000..c507e1c
--- /dev/null
+++ b/include/neuralnet/output_layer.h
@@ -0,0 +1,4 @@
+#ifndef SINGA_NEURALNET_OUTPUT_LAYER_H_
+#define SINGA_NEURALNET_OUTPUT_LAYER_H_
+// currently no output sub-classes are defined
+#endif  // SINGA_NEURALNET_OUTPUT_LAYER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/53de92b7/include/utils/data_shard.h
----------------------------------------------------------------------
diff --git a/include/utils/data_shard.h b/include/utils/data_shard.h
index 4156ab6..7e86da1 100644
--- a/include/utils/data_shard.h
+++ b/include/utils/data_shard.h
@@ -23,7 +23,7 @@ namespace singa {
  * and key size do not match because the last write of tuple crashed.
  *
  * TODO
- * 1. split one shard into multile shards.
+ * 1. split one shard into multiple shards.
  * 2. add threading to prefetch and parse records
  *
  */
@@ -52,7 +52,7 @@ class DataShard {
 
   /**
    * read next tuple from the shard.
-   * 
+   *
    * @param key Tuple key
    * @param val Record of type Message
    * @return false if read unsuccess, e.g., the tuple was not inserted
@@ -61,7 +61,7 @@ class DataShard {
   bool Next(std::string* key, google::protobuf::Message* val);
   /**
    * read next tuple from the shard.
-   * 
+   *
    * @param key Tuple key
    * @param val Record of type string
    * @return false if read unsuccess, e.g., the tuple was not inserted
@@ -70,7 +70,7 @@ class DataShard {
   bool Next(std::string* key, std::string* val);
   /**
    * Append one tuple to the shard.
-   * 
+   *
    * @param key e.g., image path
    * @param val
    * @return false if unsucess, e.g., inserted before
@@ -78,7 +78,7 @@ class DataShard {
   bool Insert(const std::string& key, const google::protobuf::Message& tuple);
   /**
    * Append one tuple to the shard.
-   * 
+   *
    * @param key e.g., image path
    * @param val
    * @return false if unsucess, e.g., inserted before
@@ -96,7 +96,7 @@ class DataShard {
   void Flush();
   /**
    * Iterate through all tuples to get the num of all tuples.
-   * 
+   *
    * @return num of tuples
    */
   int Count();
@@ -108,7 +108,7 @@ class DataShard {
  protected:
   /**
    * Read the next key and prepare buffer for reading value.
-   * 
+   *
    * @param key
    * @return length (i.e., bytes) of value field.
    */
@@ -116,14 +116,14 @@ class DataShard {
   /**
    * Setup the disk pointer to the right position for append in case that
    * the pervious write crashes.
-   * 
+   *
    * @param path shard path.
    * @return offset (end pos) of the last success written record.
    */
   int PrepareForAppend(const std::string& path);
   /**
    * Read data from disk if the current data in the buffer is not a full field.
-   * 
+   *
    * @param size size of the next field.
    */
   bool PrepareNextField(int size);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/53de92b7/src/driver.cc
----------------------------------------------------------------------
diff --git a/src/driver.cc b/src/driver.cc
index 9fa4b86..f017f45 100644
--- a/src/driver.cc
+++ b/src/driver.cc
@@ -50,7 +50,7 @@ void Driver::Init(int argc, char **argv) {
   RegisterLayer<SliceLayer, int>(kSlice);
   RegisterLayer<SoftmaxLossLayer, int>(kSoftmaxLoss);
   RegisterLayer<SplitLayer, int>(kSplit);
-  RegisterLayer<TanhLayer, int>(kTanh);
+  RegisterLayer<STanhLayer, int>(kSTanh);
 #ifdef USE_LMDB
   RegisterLayer<LMDBDataLayer, int>(kLMDBData);
 #endif

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/53de92b7/src/neuralnet/base_layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/base_layer.cc b/src/neuralnet/base_layer.cc
deleted file mode 100644
index 7d94a75..0000000
--- a/src/neuralnet/base_layer.cc
+++ /dev/null
@@ -1,128 +0,0 @@
-#include "neuralnet/base_layer.h"
-
-#include <cblas.h>
-#include <glog/logging.h>
-#include <math.h>
-#include <opencv2/highgui/highgui.hpp>
-#include <opencv2/imgproc/imgproc.hpp>
-#include <cfloat>
-#include "utils/factory.h"
-#include "utils/singleton.h"
-
-namespace singa {
-
-using std::string;
-using std::vector;
-
-Layer* Layer::Create(const LayerProto& proto) {
-  auto* factory = Singleton<Factory<Layer>>::Instance();
-  Layer* layer = nullptr;
-  if (proto.has_user_type())
-    layer = factory->Create(proto.user_type());
-  else
-    layer = factory->Create(proto.type());
-  return layer;
-}
-
-const string Layer::DebugString(int step, Phase phase) {
-  string ret = StringPrintf("Layer %10s ", name().c_str());
-  if (data_.count() != 0)
-    return ret;
-  if (phase == kForward) {
-    ret += StringPrintf("data %10s data norm1 %13.9f", data_.asum_data());
-  } else if (phase == kBackward) {
-    ret += StringPrintf("grad norm1 %13.9f\n", grad_.asum_data());
-    for (Param* p : GetParams()) {
-      ret += StringPrintf(
-          "param id %2d, name %10s, value norm1 %13.9f, grad norm1 %13.9f\n",
-          p->id(), p->name().c_str(), p->data().asum_data(),
-          p->grad().asum_data());
-    }
-  }
-  return ret;
-}
-
-/************* Implementation for ParserLayer ***********/
-void ParserLayer::ComputeFeature(Phase phase, Metric *perf) {
-  CHECK_EQ(srclayers_.size(), 1);
-  auto datalayer = static_cast<DataLayer*>(*srclayers_.begin());
-  ParseRecords(phase, datalayer->records(), &data_);
-}
-
-/************* Implementation for PrefetchLayer ***********/
-PrefetchLayer::~PrefetchLayer() {
-  if (thread_.joinable())
-    thread_.join();
-  for (auto layer : sublayers_)
-    delete layer;
-}
-
-void PrefetchLayer::Setup(const LayerProto& proto, int npartitions) {
-  Layer::Setup(proto, npartitions);
-  // CHECK_EQ(npartitions, 1);
-  Factory<Layer>* factory = Singleton<Factory<Layer>>::Instance();
-  const auto& sublayers = proto.prefetch_conf().sublayers();
-  CHECK_GE(sublayers.size(), 1);
-  std::map<string, Layer*> layers;
-  for (auto const &p : sublayers) {
-    auto layer = factory->Create(p.type());
-    sublayers_.push_back(layer);
-    layers[p.name()] = layer;
-  }
-  // TODO(wangwei) topology sort layers
-  auto layer = sublayers_.begin();
-  for (auto const &p : sublayers) {
-    std::vector<Layer*> src;
-    for (auto const &srcname : p.srclayers()) {
-      src.push_back(layers[srcname]);
-      (*layer)->add_srclayer(layers[srcname]);
-    }
-    (*layer)->Setup(p);
-    layer++;
-  }
-  for (auto layer : sublayers_)
-    if (layer->is_parserlayer())
-      datablobs_[layer->name()] = Blob<float>(layer->data(this).shape());
-}
-
-void PrefetchLayer::ComputeFeature(Phase phase, Metric* perf) {
-  if (thread_.joinable())
-    thread_.join();
-  else
-    Prefetch(phase);
-  for (auto layer : sublayers_) {
-    if (layer->is_parserlayer())
-      // TODO(wangwei) replace CopyFrom with Swap?
-      datablobs_.at(layer->name()).CopyFrom(layer->data(this));
-  }
-  thread_ = std::thread(&PrefetchLayer::Prefetch, this, phase);
-}
-
-void PrefetchLayer::Prefetch(Phase phase) {
-  // clock_t s=clock();
-  for (auto layer : sublayers_)
-    layer->ComputeFeature(phase, nullptr);
-  // LOG(ERROR)<<(clock()-s)*1.0/CLOCKS_PER_SEC;
-}
-
-const Blob<float>& PrefetchLayer::data(const Layer* from) const {
-  LOG(FATAL) << " needs update";
-  if (from != nullptr) {
-    return datablobs_.at("");
-  } else {
-    // CHECK_EQ(datablobs_.size(),1);
-    return datablobs_.begin()->second;
-  }
-}
-
-Blob<float>* PrefetchLayer::mutable_data(const Layer* from) {
-  LOG(FATAL) << " needs update";
-  if (from != nullptr) {
-    return &(datablobs_.at(""));
-  } else {
-    // CHECK_EQ(datablobs_.size(),1);
-    return &(datablobs_.begin()->second);
-  }
-}
-
-}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/53de92b7/src/neuralnet/connection_layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/connection_layer.cc 
b/src/neuralnet/connection_layer.cc
new file mode 100644
index 0000000..a3d0a75
--- /dev/null
+++ b/src/neuralnet/connection_layer.cc
@@ -0,0 +1,107 @@
+#include <vector>
+#include "neuralnet/layer.h"
+
+namespace singa {
+/************* Implementation for ConcateLayer ***********/
+void ConcateLayer::Setup(const LayerProto& proto, int npartitions) {
+  // CHECK_EQ(npartitions, 1);
+  Layer::Setup(proto, npartitions);
+  size_t concate_dim = proto.concate_conf().concate_dim();
+  CHECK_GE(concate_dim, 0);
+  CHECK_GT(srclayers_.size(), 1);
+  vector<int> shape = srclayers_[0]->data(this).shape();
+  for (size_t i = 1; i < srclayers_.size(); i++) {
+    const vector<int>& srcshape = srclayers_[i]->data(this).shape();
+    for (size_t j = 0; j < shape.size(); j++)
+      if (j == concate_dim)
+        shape[j] += srcshape[j];
+      else
+        CHECK_EQ(shape[j], srcshape[j]);
+  }
+  data_.Reshape(shape);
+  grad_.Reshape(shape);
+}
+
+void ConcateLayer::ComputeFeature(int flag, Metric *perf) {
+  LOG(FATAL) << "Not implemented for Concate Layer";
+}
+
+void ConcateLayer::ComputeGradient(int flag, Metric* perf) {
+  LOG(FATAL) << "Not implemented for Concate Layer";
+}
+
+/************* Implementation for SliceLayer****************/
+void SliceLayer::Setup(const LayerProto& proto, int npartitions) {
+  /*
+  Layer::Setup(proto, npartitions);
+  slice_dim_ = proto.slice_conf().slice_dim();
+  slice_num_ = npartitions;
+  CHECK_GE(slice_dim_, 0);
+  CHECK_EQ(slice_num_, dstlayers_.size());
+  data_.Reshape(srclayers_[0]->data(this).shape());
+  grad_.ReshapeLike(data_);
+  datavec_.resize(slice_num_);
+  gradvec_.resize(slice_num_);
+  CHECK_EQ(data_.count() % slice_num_, 0);  // restrict equal slicing
+  // LOG(ERROR)<<"slice dim "<<slice_dim<<" slice num "<<slice_num;
+  for (int i = 0; i < slice_num_; i++) {
+    vector<int> newshape(data_.shape());
+    newshape[slice_dim_] = newshape[slice_dim_] / slice_num_ +
+      ((i == slice_num_ - 1) ? newshape[slice_dim_] % slice_num_ : 0);
+    datavec_[i].Reshape(newshape);
+    gradvec_[i].Reshape(newshape);
+    // LOG(ERROR)<<"slice "<<IntVecToString(newshape);
+  }
+  */
+  LOG(FATAL) << "Not implemented";
+}
+
+void SliceLayer::ComputeFeature(int flag, Metric *perf) {
+  /*
+  CHECK_EQ(srclayers_.size(), 1);
+  if (slice_dim_ == 0) {
+    const auto& blob = srclayers_.at(0)->data(this);
+    int size = blob.count() / slice_num_;
+    for (int i = 0; i < slice_num_; i++) {
+      float* dst = datavec_[i].mutable_cpu_data();
+      const float* src = blob.cpu_data() + i * size;
+      memcpy(dst, src, size*sizeof(float));
+    }
+  }
+  */
+  LOG(FATAL) << "Not implemented";
+}
+
+void SliceLayer::ComputeGradient(int flag, Metric* perf) {
+  LOG(FATAL) << "Not implemented";
+}
+
+/*
+int SliceLayer::SliceID(const Layer* layer) const {
+  CHECK(layer != nullptr);
+  for (size_t i = 0; i < datavec_.size(); i++) {
+    // LOG(ERROR)<<"get slice "<<IntVecToString(shapes_[i]);
+    if (dstlayers_[i] == layer)
+      return i;
+  }
+  CHECK(false);
+  return -1;
+}*/
+
+/************* Implementation for SplitLayer****************/
+void SplitLayer::Setup(const LayerProto& proto, int npartitions) {
+  // CHECK_EQ(npartitions, 1);
+  Layer::Setup(proto, npartitions);
+  CHECK_EQ(srclayers_.size(), 1);
+  data_.Reshape(srclayers_[0]->data(this).shape());
+  grad_.Reshape(srclayers_[0]->data(this).shape());
+}
+
+void SplitLayer::ComputeFeature(int flag, Metric *perf) {
+  LOG(FATAL) << "Not implemented";
+}
+
+void SplitLayer::ComputeGradient(int flag, Metric* perf) {
+  LOG(FATAL) << "Not implemented";
+}
+}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/53de92b7/src/neuralnet/input_layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/input_layer.cc b/src/neuralnet/input_layer.cc
new file mode 100644
index 0000000..b1c6986
--- /dev/null
+++ b/src/neuralnet/input_layer.cc
@@ -0,0 +1,346 @@
+#include <vector>
+#include <string>
+
+#include "neuralnet/layer.h"
+#include "mshadow/tensor.h"
+namespace singa {
+
+using namespace mshadow;
+using mshadow::cpu;
+using mshadow::Shape4;
+using mshadow::Tensor;
+
+
+/************* Implementation for ParserLayer ***********/
+void ParserLayer::ComputeFeature(int flag, Metric *perf) {
+  CHECK_EQ(srclayers_.size(), 1);
+  auto datalayer = static_cast<DataLayer*>(*srclayers_.begin());
+  ParseRecords(flag, datalayer->records(), &data_);
+}
+
+#ifdef USE_LMDB
+/*********************LMDBDataLayer**********************************/
+LMDBDataLayer::~LMDBDataLayer() {
+  mdb_cursor_close(mdb_cursor_);
+  mdb_txn_abort(mdb_txn_);
+  mdb_cursor_ = nullptr;
+}
+
+void LMDBDataLayer::Setup(const LayerProto& proto, int npartitions) {
+  Layer::Setup(proto, npartitions);
+  OpenLMDB(proto.lmdbdata_conf().path());
+  CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, MDB_NEXT),
+           MDB_SUCCESS);
+  mdb_cursor_close(mdb_cursor_);
+  mdb_txn_abort(mdb_txn_);
+  mdb_cursor_ = nullptr;
+  CaffeDatum datum;
+  datum.ParseFromArray(mdb_value_.mv_data, mdb_value_.mv_size);
+  SingleLabelImageRecord* record = sample_.mutable_image();
+  ConvertCaffeDatumToRecord(datum, record);
+  batchsize_ = proto.lmdbdata_conf().batchsize();
+  if (partition_dim() == 0)
+    batchsize_ /= npartitions;
+  records_.resize(batchsize_);
+  random_skip_ = proto.lmdbdata_conf().random_skip();
+}
+
+void LMDBDataLayer::OpenLMDB(const std::string& path) {
+  CHECK_EQ(mdb_env_create(&mdb_env_), MDB_SUCCESS) << "mdb_env_create failed";
+  CHECK_EQ(mdb_env_set_mapsize(mdb_env_, 1099511627776), MDB_SUCCESS);  // 1TB
+  CHECK_EQ(mdb_env_open(mdb_env_, path.c_str(),
+           MDB_RDONLY, 0664), MDB_SUCCESS) << "cannot open lmdb " << path;
+  CHECK_EQ(mdb_txn_begin(mdb_env_, NULL, MDB_RDONLY, &mdb_txn_), MDB_SUCCESS)
+      << "mdb_txn_begin failed";
+  CHECK_EQ(mdb_open(mdb_txn_, NULL, 0, &mdb_dbi_), MDB_SUCCESS)
+      << "mdb_open failed";
+  CHECK_EQ(mdb_cursor_open(mdb_txn_, mdb_dbi_, &mdb_cursor_), MDB_SUCCESS)
+      << "mdb_cursor_open failed";
+  LOG(INFO) << "Opening lmdb " << path;
+  CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, MDB_FIRST),
+           MDB_SUCCESS) << "mdb_cursor_get failed";
+}
+
+void LMDBDataLayer::ComputeFeature(int flag, Metric* perf) {
+  if (mdb_cursor_ == nullptr)
+    OpenLMDB(layer_proto_.lmdbdata_conf().path());
+  if (random_skip_) {
+    int nskip = rand() % random_skip_;
+    int n = 0;
+    CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_,
+             &mdb_value_, MDB_FIRST), MDB_SUCCESS);
+    while (mdb_cursor_get(mdb_cursor_, &mdb_key_,
+           &mdb_value_, MDB_NEXT) == MDB_SUCCESS)
+      n++;
+    LOG(INFO) << "Random Skip " << nskip << " records of total "
+              << n << "records";
+    // We have reached the end. Restart from the first.
+    CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_,
+             &mdb_value_, MDB_FIRST), MDB_SUCCESS);
+    for (int i = 0; i < nskip; i++) {
+      if (mdb_cursor_get(mdb_cursor_, &mdb_key_,
+          &mdb_value_, MDB_NEXT) != MDB_SUCCESS) {
+        // We have reached the end. Restart from the first.
+        DLOG(INFO) << "Restarting data prefetching from start.";
+        CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_,
+                 &mdb_value_, MDB_FIRST), MDB_SUCCESS);
+      }
+    }
+    random_skip_ = 0;
+  }
+  CaffeDatum datum;
+  for (auto& record : records_) {
+    SingleLabelImageRecord* image = record.mutable_image();
+    CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_,
+             &mdb_value_, MDB_GET_CURRENT), MDB_SUCCESS);
+    datum.ParseFromArray(mdb_value_.mv_data, mdb_value_.mv_size);
+    ConvertCaffeDatumToRecord(datum, image);
+    if (mdb_cursor_get(mdb_cursor_, &mdb_key_,
+        &mdb_value_, MDB_NEXT) != MDB_SUCCESS) {
+      // We have reached the end. Restart from the first.
+      DLOG(INFO) << "Restarting data prefetching from start.";
+      CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_,
+               &mdb_value_, MDB_FIRST), MDB_SUCCESS);
+    }
+  }
+}
+
+void LMDBDataLayer::ConvertCaffeDatumToRecord(const CaffeDatum& datum,
+                                              SingleLabelImageRecord* record) {
+  record->set_label(datum.label());
+  record->clear_shape();
+  if (datum.has_channels())
+    record->add_shape(datum.channels());
+  if (datum.has_height())
+    record->add_shape(datum.height());
+  if (datum.has_width())
+    record->add_shape(datum.width());
+  if (datum.has_data())
+    record->set_pixel(datum.data());
+  if (datum.float_data_size()) {
+    record->clear_data();
+    for (float x : datum.float_data())
+      record->add_data(x);
+  }
+}
+#endif
+
+
+/***************Implementation for ShardDataLayer**************************/
+ShardDataLayer::~ShardDataLayer() {
+  if (shard_ != nullptr)
+    delete shard_;
+  shard_ = nullptr;
+}
+
+void ShardDataLayer::Setup(const LayerProto& proto, int npartitions) {
+  Layer::Setup(proto, npartitions);
+  shard_ = new DataShard(proto.sharddata_conf().path(), DataShard::kRead);
+  string key;
+  shard_->Next(&key, &sample_);
+  delete shard_;
+  shard_ = nullptr;
+  batchsize_ = proto.sharddata_conf().batchsize();
+  if (partition_dim() == 0)
+    batchsize_ /= npartitions;
+  records_.resize(batchsize_);
+  random_skip_ = proto.sharddata_conf().random_skip();
+}
+
+void ShardDataLayer::ComputeFeature(int flag, Metric* perf) {
+  if (shard_ == nullptr)
+    shard_ = new DataShard(layer_proto_.sharddata_conf().path(),
+                           DataShard::kRead);
+  if (random_skip_) {
+    int nskip = rand() % random_skip_;
+    LOG(INFO) << "Random Skip " << nskip << " records, there are "
+              << shard_->Count() << " records in total";
+    string key;
+    for (int i = 0; i < nskip; i++) {
+      shard_->Next(&key, &sample_);
+    }
+    random_skip_ = 0;
+  }
+  for (auto& record : records_) {
+    string key;
+    if (!shard_->Next(&key, &record)) {
+      shard_->SeekToFirst();
+      CHECK(shard_->Next(&key, &record));
+    }
+  }
+}
+
+/********* Implementation for LabelLayer **************/
+void LabelLayer::Setup(const LayerProto& proto, int npartitions) {
+  Layer::Setup(proto, npartitions);
+  CHECK_EQ(srclayers_.size(), 1);
+  int batchsize = static_cast<DataLayer*>(srclayers_[0])->batchsize();
+  data_.Reshape(vector<int>{batchsize});
+}
+
+void LabelLayer::ParseRecords(int flag, const vector<Record>& records,
+                              Blob<float>* blob) {
+  int rid = 0;
+  float *label = blob->mutable_cpu_data();
+  for (const Record& record : records) {
+    label[rid++] = record.image().label();
+    // CHECK_LT(record.image().label(),10);
+  }
+  CHECK_EQ(rid, blob->shape()[0]);
+}
+
+/**************** Implementation for MnistLayer ******************/
+void MnistLayer::ParseRecords(int flag,
+    const vector<Record>& records, Blob<float>* blob) {
+  LOG_IF(ERROR, records.size() == 0) << "Empty records to parse";
+  int ndim = records.at(0).image().shape_size();
+  int inputsize = records.at(0).image().shape(ndim-1);
+  CHECK_EQ(inputsize, blob->shape()[2]);
+
+  float* dptr = blob->mutable_cpu_data();
+  for (const Record& record : records) {
+    const SingleLabelImageRecord& imagerecord = record.image();
+    if (imagerecord.pixel().size()) {
+      string pixel = imagerecord.pixel();
+      for (int i = 0, k = 0; i < inputsize; i++) {
+        for (int j = 0; j < inputsize; j++) {
+          // NOTE!!! must cast pixel to uint8_t then to float!!! waste a lot of
+          // time to debug this
+          float x =  static_cast<float>(static_cast<uint8_t>(pixel[k++]));
+          x = x / norm_a_-norm_b_;
+          *dptr = x;
+          dptr++;
+        }
+      }
+    } else {
+      for (int i = 0, k = 0; i < inputsize; i++) {
+        for (int j = 0; j < inputsize; j++) {
+          *dptr = imagerecord.data(k++) / norm_a_ - norm_b_;
+          dptr++;
+        }
+      }
+    }
+  }
+  CHECK_EQ(dptr, blob->mutable_cpu_data() + blob->count());
+}
+
+void MnistLayer::Setup(const LayerProto& proto, int npartitions) {
+  Layer::Setup(proto, npartitions);
+  CHECK_EQ(srclayers_.size(), 1);
+  int batchsize = static_cast<DataLayer*>(srclayers_[0])->batchsize();
+  Record sample = static_cast<DataLayer*>(srclayers_[0])->sample();
+  norm_a_ = proto.mnist_conf().norm_a();
+  norm_b_ = proto.mnist_conf().norm_b();
+  int ndim = sample.image().shape_size();
+  CHECK_GE(ndim, 2);
+  int s = sample.image().shape(ndim - 1);
+  CHECK_EQ(s, sample.image().shape(ndim - 2));
+  data_.Reshape(vector<int>{batchsize, 1, s, s});
+}
+
+/*************** Implementation for RGBImageLayer *************************/
+void RGBImageLayer::ParseRecords(int flag,
+    const vector<Record>& records, Blob<float>* blob) {
+  const vector<int>& s = blob->shape();
+  Tensor<cpu, 4> images(data_.mutable_cpu_data(),
+      Shape4(s[0], s[1], s[2], s[3]));
+  const SingleLabelImageRecord& r = records.at(0).image();
+  Tensor<cpu, 3> raw_image(Shape3(r.shape(0), r.shape(1), r.shape(2)));
+  AllocSpace(raw_image);
+  Tensor<cpu, 3> croped_image(nullptr, Shape3(s[1], s[2], s[3]));
+  if (cropsize_)
+    AllocSpace(croped_image);
+  int rid = 0;
+  const float* meandptr = mean_.cpu_data();
+  for (const Record& record : records) {
+    auto image = images[rid];
+    bool do_crop = cropsize_> 0 && ((flag & kTrain) == kTrain);
+    bool do_mirror = mirror_ && rand() % 2 && ((flag & kTrain) == kTrain);
+    float* dptr = nullptr;
+    if (do_crop || do_mirror)
+      dptr = raw_image.dptr;
+    else
+      dptr = image.dptr;
+    if (record.image().pixel().size()) {
+      string pixel = record.image().pixel();
+      for (size_t i = 0; i < pixel.size(); i++)
+        dptr[i] = static_cast<float>(static_cast<uint8_t>(pixel[i]));
+    } else {
+      memcpy(dptr, record.image().data().data(),
+          sizeof(float) * record.image().data_size());
+    }
+    for (int i = 0; i < mean_.count(); i++)
+      dptr[i] -= meandptr[i];
+    if (do_crop) {
+      int hoff = rand() % (r.shape(1) - cropsize_);
+      int woff = rand() % (r.shape(2) - cropsize_);
+      Shape<2> cropshape = Shape2(cropsize_, cropsize_);
+      if (do_mirror) {
+        croped_image = expr::crop(raw_image, cropshape, hoff, woff);
+        image = expr::mirror(croped_image);
+      } else {
+        image = expr::crop(raw_image, cropshape, hoff, woff);
+      }
+    } else if (do_mirror) {
+      image = expr::mirror(raw_image);
+    }
+    rid++;
+  }
+  if (scale_)
+    images = images * scale_;
+  FreeSpace(raw_image);
+  if (cropsize_)
+    FreeSpace(croped_image);
+}
+
+void RGBImageLayer::Setup(const LayerProto& proto, int npartitions) {
+  ParserLayer::Setup(proto, npartitions);
+  CHECK_EQ(srclayers_.size(), 1);
+  scale_ = proto.rgbimage_conf().scale();
+  cropsize_ = proto.rgbimage_conf().cropsize();
+  mirror_ = proto.rgbimage_conf().mirror();
+  int batchsize = static_cast<DataLayer*>(srclayers_[0])->batchsize();
+  Record sample = static_cast<DataLayer*>(srclayers_[0])->sample();
+  vector<int> shape;
+  shape.push_back(batchsize);
+  for (int x : sample.image().shape()) {
+    shape.push_back(x);
+  }
+  CHECK_EQ(shape.size(), 4);
+  if (cropsize_) {
+    shape[2] = cropsize_;
+    shape[3] = cropsize_;
+  }
+  data_.Reshape(shape);
+  mean_.Reshape({shape[1], shape[2], shape[3]});
+  if (proto.rgbimage_conf().has_meanfile()) {
+    if (proto.rgbimage_conf().meanfile().find("binaryproto") != string::npos) {
+      CaffeBlob mean;
+      ReadProtoFromBinaryFile(proto.rgbimage_conf().meanfile().c_str(), &mean);
+      CHECK_EQ(mean_.count(), mean.data_size());
+      memcpy(mean_.mutable_cpu_data(), mean.data().data(),
+             sizeof(float)*mean.data_size());
+    } else {
+      SingleLabelImageRecord mean;
+      ReadProtoFromBinaryFile(proto.rgbimage_conf().meanfile().c_str(), &mean);
+      CHECK_EQ(mean_.count(), mean.data_size());
+      memcpy(mean_.mutable_cpu_data(), mean.data().data(),
+             sizeof(float)*mean.data_size());
+    }
+  } else {
+    memset(mean_.mutable_cpu_data(), 0, sizeof(float) * mean_.count());
+  }
+}
+
+/************* Implementation for PrefetchLayer ***********/
+PrefetchLayer::~PrefetchLayer() {
+  if (thread_.joinable())
+    thread_.join();
+}
+
+
+void PrefetchLayer::ComputeFeature(int flag, Metric* perf) {
+  LOG(FATAL) << "Not implemented";
+}
+
+}  // namespace singa

[5/7] incubator-singa git commit: SINGA-21 Code review 4

Reply via email to